PT_low =  .01*(99/100) + .72*(1/100)
PT_high = .01*(9/10) + .72*(1/10)
print(PT_low, PT_high)

0.0171 0.08099999999999999


PCgT_low = (.01*.72)/PT_low
PCgT_high = (.1*.72)/PT_high

print(PCgT_low, PCgT_high)

0.42105263157894735 0.888888888888889


import pandas as pd
from fractions import Fraction 
#so that we can use actual fractions (1/3) rather than floating point approximations (.33...)


MHtable = pd.DataFrame(index=['Door 1', 'Door 2', 'Door 3'])
MHtable['prior'] = Fraction(1, 3)
MHtable


MHtable['likelihood'] = Fraction(1, 2), 1, 0
MHtable


def update(table):
    """Compute the posterior probabilities."""
    table['unnorm'] = table['prior'] * table['likelihood']
    prob_data = table['unnorm'].sum()
    table['posterior'] = table['unnorm'] / prob_data
    return prob_data


update(MHtable)
MHtable


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as spt

import statsmodels.api as sm
import statsmodels.formula.api as smf
import seaborn as sns

from cycler import cycler

plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams["axes.labelsize"]= 12
plt.rcParams["figure.facecolor"] = "#f2f2f2"
#plt.rcParams['figure.savefig.dpi'] = 100
plt.rcParams['savefig.edgecolor'] = "#f2f2f2"
plt.rcParams['savefig.facecolor'] ="#f2f2f2"
plt.rcParams["figure.figsize"] = [16,10]
plt.rcParams['savefig.bbox'] = "tight"
plt.rcParams['font.size'] = 14
greens = ['#66c2a4','#41ae76','#238b45','#006d2c','#00441b']
multi =['#66c2a4','#1f78b4','#a6cee3','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f']
plt.rcParams["axes.prop_cycle"] = cycler(color=multi)


elDF = pd.read_csv("http://jmaurit.github.io/analytics/labs/data/wt_data2.csv")


elDF


mod1wind = smf.ols("DK1EurMW ~ wind_DK1", data=elDF).fit()


mod1wind.summary()


def regSim(regMod): 
    #extract values from regression model
    nmk = regMod.df_resid #n-k
    sigma_hat = np.sqrt(regMod.mse_resid)
    bs_vcov = regMod.cov_params(scale=1)
    bs = regMod.params
    
    #create simulated values
    sigma_sim = sigma_hat*np.sqrt((nmk/np.random.chisquare(nmk,1)))
    V_sim = np.array(bs_vcov) * sigma_sim**2
    bs_sim = np.random.multivariate_normal(bs, V_sim, 1)
    
    return([bs_sim.flatten(), sigma_sim])


sigma_hat = np.sqrt(mod1wind.mse_resid) #estimate of sigma, sigma_hat

nsim = 1000

bs_sims = []
sigma_sims = []

for s in range(nsim):
    param_sim = regSim(mod1wind)
    bs_sims.append(param_sim[0])
    sigma_sims.append(param_sim[1])


simsDF = pd.DataFrame(bs_sims)


simsDF


x_line = np.arange(0,4000,1)

fig, ax = plt.subplots()
elDF.plot.scatter(x="wind_DK1", y="DK1EurMW", ax=ax, alpha=.2)
for i, row in simsDF.iloc[:100,:].iterrows():
    y_line = row[0] + row[1]*x_line + np.random.normal(0,sigma_hat)
    ax.plot(x_line, y_line, color="grey", alpha=.5)


meanWind = np.mean(elDF.wind_DK1)
meanWind #mean amount of wind 

newWind = .5*400
expectedWind = meanWind + newWind


simsDF["simPrices"] = simsDF.iloc[:,0] + simsDF.iloc[:,1]*meanWind + np.random.normal(0,sigma_hat, nsim)
simsDF["simPricesHighWind"] =  simsDF.iloc[:,0] + simsDF.iloc[:,1]*expectedWind + np.random.normal(0,sigma_hat, nsim)


fig, ax = plt.subplots()
simsDF.simPrices.hist(bins=100, ax=ax, label="No investment", alpha=.5)
simsDF.simPricesHighWind.hist(bins=100, ax=ax, label="investment", alpha=.5)
ax.set_xlabel("Predicted prices")
ax.legend()

<matplotlib.legend.Legend at 0x7fc0c2125e50>


InvestmentCost = 300000000 #investment cost

ExistingWindCap = 500 #How much existing wind capacity the company owns
NewWind = 400 #Size in capacity of new investment
cfWind = .5 #capacity factor of wind power
#no running/marginal cost for wind

thermalCap = 1000 #How much thermal capacity the company owns
mcThermal = 2000 #The marginal cost of the thermal capacity owned by the company
n = 1000 #1000 periods, relevant economic period to evaluate profitability. 

#only run thermal if price>mc


def profitFunction(row):
    #calculate profit without new investment
    operProfit = row.simPrices*ExistingWindCap*cfWind
    if row.simPrices > mcThermal:
        operProfit += (row.simPrices-mcThermal)*thermalCap
    row["profit"] = operProfit*n #multiply predicted operating profit by number of periods
    
    #calculate  profit with new investment
    operProfitHighWind = row.simPricesHighWind*(ExistingWindCap+newWind)*cfWind
    if row.simPricesHighWind > mcThermal:
        operProfitHighWind += (row.simPricesHighWind-mcThermal)*thermalCap
    row["profitHighWind"] = operProfitHighWind*n - InvestmentCost
    return(row)


newSimsDF = simsDF.apply(profitFunction, axis=1)


newSimsDF


fig, ax = plt.subplots()
newSimsDF.profit.hist(bins=100, ax=ax, label="No investment", alpha=.5)
newSimsDF.profitHighWind.hist(bins=100, ax=ax, label="investment",alpha=.5)
ax.set_xlabel("Predicted profit")
ax.legend()

<matplotlib.legend.Legend at 0x7fc0c1c27e50>


expectedProfit = newSimsDF.profit.mean()
expectedProfitHighWind = newSimsDF.profitHighWind.mean()


print("Expected Profit, no investment", expectedProfit)
print("Proift, investment",expectedProfitHighWind)

Expected Profit, no investment 2436272596.907039
Proift, investment 2286089024.467252


expectedProfitHighWind>expectedProfit

False


nLoss  = np.sum(newSimsDF.profit<0)/nsim
nLossHighWind = np.sum(newSimsDF.profitHighWind<0)/nsim
print("Probability of loss, no investment", nLoss*100)
print("Probability of loss, investment", nLossHighWind*100)

Probability of loss, no investment 0.2
Probability of loss, investment 3.4000000000000004


windParkNetProfits = newSimsDF.simPricesHighWind*NewWind*cfWind*n-InvestmentCost
fig, ax = plt.subplots()
windParkNetProfits.hist(bins=50, ax=ax)
ax.set_ylabel("Net profit of wind park independently")

Text(0, 0.5, 'Net profit of wind park independently')


import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
import xarray as xr

from pymc import HalfCauchy, Model, Normal, sample

print(f"Running on PyMC v{pm.__version__}")


RANDOM_SEED = 8927
rng = np.random.default_rng(RANDOM_SEED)

%config InlineBackend.figure_format = 'retina'
az.style.use("arviz-darkgrid")


size = 200
true_intercept = 1
true_slope = 2

x = np.linspace(0, 1, size)
# y = a + b*x
true_regression_line = true_intercept + true_slope * x
# add noise
y = true_regression_line + rng.normal(scale=0.5, size=size)

data = pd.DataFrame(dict(x=x, y=y))


fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(111, xlabel="x", ylabel="y", title="Generated data and underlying model")
ax.plot(x, y, "x", label="sampled data")
ax.plot(x, true_regression_line, label="true regression line", lw=2.0)
plt.legend(loc=0);


with Model() as model:  # model specifications in PyMC are wrapped in a with-statement
    # Define priors
    sigma = HalfCauchy("sigma", beta=10)
    intercept = Normal("Intercept", 0, sigma=20)
    slope = Normal("slope", 0, sigma=20)

    # Define likelihood
    likelihood = Normal("y", mu=intercept + slope * x, sigma=sigma, observed=y)

    # Inference!
    # draw 3000 posterior samples using NUTS sampling
    idata = sample(3000)


az.plot_trace(idata, var_names="slope")

array([[<AxesSubplot: title={'center': 'slope'}>,
        <AxesSubplot: title={'center': 'slope'}>]], dtype=object)


idata.posterior.slope

<xarray.DataArray 'slope' (chain: 4, draw: 3000)>
array([[2.07844938, 2.00946818, 1.98094884, ..., 1.92221807, 2.09151181,
        2.222756  ],
       [1.96990623, 2.05698143, 2.0651126 , ..., 2.09183843, 2.08851394,
        2.1722263 ],
       [2.25853188, 2.16465264, 1.72929357, ..., 1.82192198, 1.97196397,
        2.13588937],
       [2.13479654, 2.02128419, 2.03796066, ..., 2.0216993 , 1.86120875,
        2.24262356]])
Coordinates:
  * chain    (chain) int64 0 1 2 3
  * draw     (draw) int64 0 1 2 3 4 5 6 7 ... 2993 2994 2995 2996 2997 2998 2999

array([[2.07844938, 2.00946818, 1.98094884, ..., 1.92221807, 2.09151181,
        2.222756  ],
       [1.96990623, 2.05698143, 2.0651126 , ..., 2.09183843, 2.08851394,
        2.1722263 ],
       [2.25853188, 2.16465264, 1.72929357, ..., 1.82192198, 1.97196397,
        2.13588937],
       [2.13479654, 2.02128419, 2.03796066, ..., 2.0216993 , 1.86120875,
        2.24262356]])

array([0, 1, 2, 3])

array([   0,    1,    2, ..., 2997, 2998, 2999])

PandasIndex(Int64Index([0, 1, 2, 3], dtype='int64', name='chain'))

PandasIndex(Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            2990, 2991, 2992, 2993, 2994, 2995, 2996, 2997, 2998, 2999],
           dtype='int64', name='draw', length=3000))


np.mean(idata.posterior.slope)

<xarray.DataArray 'slope' ()>
array(2.05917285)

array(2.05917285)


np.std(idata.posterior.slope)

<xarray.DataArray 'slope' ()>
array(0.12981255)

array(0.12981255)


az.summary(idata.posterior, kind="stats")


with model:
    pm.sample_posterior_predictive(idata, extend_inferencedata=True, random_seed=rng)

az.plot_ppc(idata, num_pp_samples=100)

Sampling: [y]

<AxesSubplot: xlabel='y / y'>


earnings = pd.read_csv("https://raw.githubusercontent.com/avehtari/ROS-Examples/master/Earnings/data/earnings.csv")

earnings


earn_s = (earnings.earn - np.mean(earnings.earn))/np.std(earnings.earn)
height_s =  (earnings.height - np.mean(earnings.height))/np.std(earnings.height)
male = earnings.male


with Model() as earnings_mod:  # model specifications in PyMC are wrapped in a with-statement
    # Define our data
    earnings = pm.MutableData("earnings", earn_s)
    isMale = pm.MutableData("isMale", male)
    height = pm.MutableData("height", height_s)
    # Define priors
    sigma = HalfCauchy("sigma", beta=10)
    intercept = Normal("Intercept", 0, sigma=10)
    beta_height = Normal("beta_height", 0, sigma=10)
    beta_male = Normal("beta_male", 0, sigma=10)

    # Define likelihood
    likelihood = Normal("y", mu=intercept + beta_height * height + beta_male*isMale, sigma=sigma, observed=earnings)

    # Inference!
    # draw 3000 posterior samples using NUTS sampling
    earnings_trace = sample(3000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, Intercept, beta_height, beta_male]

Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 21 seconds.


az.plot_trace(earnings_trace, var_names=["beta_height", "beta_male"])

array([[<AxesSubplot: title={'center': 'beta_height'}>,
        <AxesSubplot: title={'center': 'beta_height'}>],
       [<AxesSubplot: title={'center': 'beta_male'}>,
        <AxesSubplot: title={'center': 'beta_male'}>]], dtype=object)


np.mean(earnings_trace.posterior.beta_height)

<xarray.DataArray 'beta_height' ()>
array(0.10952402)

array(0.10952402)


az.hdi(earnings_trace.posterior.beta_height,hdi_prob=.95)

<xarray.Dataset>
Dimensions:      (hdi: 2)
Coordinates:
  * hdi          (hdi) <U6 'lower' 'higher'
Data variables:
    beta_height  (hdi) float64 0.04898 0.1704

array(['lower', 'higher'], dtype='<U6')

array([0.04898333, 0.17040501])

PandasIndex(Index(['lower', 'higher'], dtype='object', name='hdi'))


az.summary(earnings_trace)

	date	time	hour	wind_SE1	wind_SE2	wind_SE3	wind_SE4	wind_DK1	wind_DK2	SE_nx	...	PLAtoPL_cap	PLtoPLA_cap	SE4toLT_cap	LTtoSE4_cap	SYSEurMW	SE4EurMW	DK1EurMW	DK2EurMW	hour_ind	month
0	2016-01-01 00:00:00	2016-01-01 00:00:00	00 - 01	420.0	1247.0	432.0	208.0	1314.0	113.0	-2621.0	...	0.0	3600.0	0.0	0.0	1639.0	1639.0	1639.0	1639.0	0	m1
1	2016-01-01 00:00:00	2016-01-01 01:00:00	01 - 02	416.0	1214.0	419.0	187.0	1267.0	98.0	-2274.0	...	0.0	3900.0	0.0	0.0	1604.0	1604.0	1604.0	1604.0	1	m1
2	2016-01-01 00:00:00	2016-01-01 02:00:00	02 - 03	417.0	1227.0	378.0	177.0	1159.0	74.0	-2370.0	...	0.0	3900.0	0.0	0.0	1574.0	1574.0	1574.0	1574.0	2	m1
3	2016-01-01 00:00:00	2016-01-01 03:00:00	03 - 04	424.0	1232.0	357.0	173.0	1160.0	61.0	-2541.0	...	0.0	4000.0	0.0	0.0	1557.0	1557.0	1557.0	1557.0	3	m1
4	2016-01-01 00:00:00	2016-01-01 04:00:00	04 - 05	412.0	1245.0	326.0	161.0	1069.0	47.0	-2616.0	...	0.0	4100.0	0.0	0.0	1547.0	1547.0	1547.0	1547.0	4	m1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
26299	2018-12-31 00:00:00	2018-12-31 19:00:00	19 - 20	NaN	NaN	NaN	NaN	3335.0	800.0	-5452.0	...	2019.0	1011.0	700.0	281.0	4883.0	4751.0	4751.0	4751.0	19	m12
26300	2018-12-31 00:00:00	2018-12-31 20:00:00	20 - 21	NaN	NaN	NaN	NaN	3147.0	865.0	-4919.0	...	976.0	1551.0	700.0	220.0	4723.0	4582.0	4582.0	4582.0	20	m12
26301	2018-12-31 00:00:00	2018-12-31 21:00:00	21 - 22	NaN	NaN	NaN	NaN	2859.0	902.0	-4493.0	...	208.0	2229.0	700.0	204.0	4602.0	4303.0	4303.0	4303.0	21	m12
26302	2018-12-31 00:00:00	2018-12-31 22:00:00	22 - 23	NaN	NaN	NaN	NaN	2708.0	792.0	-4165.0	...	0.0	2800.0	700.0	148.0	4555.0	3854.0	3854.0	3854.0	22	m12
26303	2018-12-31 00:00:00	2018-12-31 23:00:00	23 - 00	NaN	NaN	NaN	NaN	2719.0	785.0	-3915.0	...	0.0	3127.0	700.0	119.0	4269.0	2570.0	2570.0	2570.0	23	m12

Dep. Variable:	DK1EurMW	R-squared:	0.101
Model:	OLS	Adj. R-squared:	0.101
Method:	Least Squares	F-statistic:	2949.
Date:	Thu, 11 May 2023	Prob (F-statistic):	0.00
Time:	13:52:57	Log-Likelihood:	-2.2683e+05
No. Observations:	26301	AIC:	4.537e+05
Df Residuals:	26299	BIC:	4.537e+05
Df Model:	1
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	3958.6249	13.804	286.771	0.000	3931.568	3985.682
wind_DK1	-0.5077	0.009	-54.305	0.000	-0.526	-0.489

Omnibus:	2763.096	Durbin-Watson:	0.096
Prob(Omnibus):	0.000	Jarque-Bera (JB):	6128.193
Skew:	0.652	Prob(JB):	0.00
Kurtosis:	4.973	Cond. No.	2.45e+03

	0	1
0	3971.254744	-0.514325
1	3970.117303	-0.506004
2	3956.200387	-0.510918
3	3950.656899	-0.505535
4	3941.609104	-0.493350
...	...	...
995	3975.594169	-0.514547
996	3951.050708	-0.492996
997	3965.325493	-0.505766
998	3966.999980	-0.512246
999	3962.317763	-0.510511

Probability, bayesian statistics and decision analysis¶

MET 430¶

NTNU Business School¶

Johannes Mauritzen¶

Learning goals¶

Literature¶

Bayes Theorem¶

An application of Bayes' Theorem: When is a positive corona test really positive?¶

Inverse probability and Bayes' theorem.¶

Bayesian analysis is just counting¶

Did my niece have Corona?¶

Diachronic Bayes and bayesian statistics¶

The Monty Hall problem¶

Bayesian analysis and statistical analysis¶

Other sources for learning Bayesian analysis¶

Propogation of uncertainy and decision analysis¶

Defining the business problem¶

Posterior simulations from the regression model¶

Profit function¶

Decision rule¶

Is the wind park in itself profitable?¶

PYMC (optional)¶

A technical warning¶

Installing PYMC¶

Creating a Bayesian model in PyMC¶

Toy data generation¶

Estimating the model¶

Priors¶

We then define the likelihood of the equation.¶

Sampler¶

Output from a bayesian model.¶

Bayesian inference: Height and income¶

Weakly informative analysis¶

Where Bayes shines¶

Exercises¶

1 Two coins in a box¶

2 Monty hall again¶

3 Decision Analysis:¶

4 Decision Analysis Free Problem:¶

References¶

	0	1	simPrices	simPricesHighWind	profit	profitHighWind
0	3952.788759	-0.493896	5012.876829	3142.103835	4.266096e+09	1.941840e+09
1	3957.635557	-0.503402	3282.681386	712.747128	2.103352e+09	-5.053851e+07
2	3957.711510	-0.503402	1146.672129	2941.187571	2.866680e+08	1.670603e+09
3	3933.209222	-0.491248	3878.110936	2215.606619	2.847639e+09	6.910689e+08
4	3969.419491	-0.504066	1409.517923	4814.874590	3.523795e+08	4.200081e+09
...	...	...	...	...	...	...
995	3956.120951	-0.507384	3659.349398	3047.835291	2.574187e+09	1.814578e+09
996	3966.880672	-0.515162	4529.915873	3013.219574	3.662395e+09	1.767846e+09
997	3989.846916	-0.523470	3797.144854	3008.258164	2.746431e+09	1.761149e+09
998	3979.523098	-0.521184	3466.670350	3837.460831	2.333338e+09	2.880572e+09
999	3959.795794	-0.511533	2441.090415	2737.520674	1.051363e+09	1.395653e+09

	mean	sd	hdi_3%	hdi_97%
Intercept	1.046	0.075	0.910	1.187
slope	2.059	0.130	1.820	2.310
sigma	0.524	0.026	0.474	0.574

	height	weight	male	earn	earnk	ethnicity	education	mother_education	father_education	walk	exercise	smokenow	tense	angry	age
0	74	210.0	1	50000.0	50.0	White	16.0	16.0	16.0	3	3	2.0	0.0	0.0	45
1	66	125.0	0	60000.0	60.0	White	16.0	16.0	16.0	6	5	1.0	0.0	0.0	58
2	64	126.0	0	30000.0	30.0	White	16.0	16.0	16.0	8	1	2.0	1.0	1.0	29
3	65	200.0	0	25000.0	25.0	White	17.0	17.0	NaN	8	1	2.0	0.0	0.0	57
4	63	110.0	0	50000.0	50.0	Other	16.0	16.0	16.0	5	6	2.0	0.0	0.0	91
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1811	61	120.0	0	15000.0	15.0	White	18.0	18.0	18.0	6	1	2.0	0.0	0.0	82
1812	64	130.0	0	8000.0	8.0	White	12.0	12.0	12.0	1	1	1.0	7.0	7.0	33
1813	72	194.0	1	60000.0	60.0	White	12.0	12.0	12.0	2	1	2.0	0.0	0.0	50
1814	63	155.0	0	15000.0	15.0	Other	14.0	14.0	14.0	6	1	2.0	2.0	2.0	69
1815	68	150.0	1	6000.0	6.0	White	12.0	12.0	12.0	1	6	1.0	2.0	2.0	27

	mean	sd	hdi_3%	hdi_97%	mcse_mean	mcse_sd	ess_bulk	ess_tail	r_hat
Intercept	-0.176	0.033	-0.237	-0.114	0.000	0.000	7191.0	8287.0	1.0
beta_height	0.110	0.031	0.051	0.168	0.000	0.000	7159.0	7589.0	1.0
beta_male	0.473	0.065	0.350	0.595	0.001	0.001	6665.0	7616.0	1.0
sigma	0.950	0.016	0.921	0.980	0.000	0.000	7901.0	7768.0	1.0

	prior
Door 1	1/3
Door 2	1/3
Door 3	1/3

	prior	likelihood
Door 1	1/3	1/2
Door 2	1/3	1
Door 3	1/3	0

	prior	likelihood	unnorm	posterior
Door 1	1/3	1/2	1/6	1/3
Door 2	1/3	1	1/3	2/3
Door 3	1/3	0	0	0