import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import cm
plt.rcParams["figure.figsize"] = (11, 5)
import pandas as pd
bnpGrowth = pd.read_csv("https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1138&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=A191RP1Q027SBEA&scale=left&cosd=1947-04-01&coed=2023-01-01&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Quarterly&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2023-05-21&revision_date=2023-05-21&nd=1947-04-01")
bnpGrowth
DATE | A191RP1Q027SBEA | |
---|---|---|
0 | 1947-04-01 | 4.7 |
1 | 1947-07-01 | 6.0 |
2 | 1947-10-01 | 17.3 |
3 | 1948-01-01 | 9.6 |
4 | 1948-04-01 | 10.7 |
... | ... | ... |
299 | 2022-01-01 | 6.6 |
300 | 2022-04-01 | 8.5 |
301 | 2022-07-01 | 7.7 |
302 | 2022-10-01 | 6.6 |
303 | 2023-01-01 | 5.1 |
304 rows × 2 columns
bnpGrowth.columns = ['date', 'BNP_growth']
bnpGrowth.set_index("date").plot()
<Axes: xlabel='date'>
Create lags
bnpGrowth["BNP_growth_l1"] = bnpGrowth["BNP_growth"].shift(1)
bnpGrowth["BNP_growth_l2"] = bnpGrowth["BNP_growth"].shift(2)
bnpGrowth["BNP_growth_l3"] = bnpGrowth["BNP_growth"].shift(3)
bnpGrowth["BNP_growth_l4"] = bnpGrowth["BNP_growth"].shift(4)
bnpGrowth["const"] = 1
y = np.array(bnpGrowth.loc[:,'BNP_growth'])
X = np.array(bnpGrowth.loc[:,['const', 'BNP_growth_l1', 'BNP_growth_l2', 'BNP_growth_l3', 'BNP_growth_l4']])
X
array([[ 1. , nan, nan, nan, nan], [ 1. , 4.7, nan, nan, nan], [ 1. , 6. , 4.7, nan, nan], ..., [ 1. , 8.5, 6.6, 14.3, 9. ], [ 1. , 7.7, 8.5, 6.6, 14.3], [ 1. , 6.6, 7.7, 8.5, 6.6]])
T = bnpGrowth.shape[0]
T
304
#we set our initial values and restrict our X and y
# matrices to only include rows where we have all the data
y_3 = y[0]
y_2 = y[1]
y_1 = y[2]
y0 = y[3]
y = y[4:] #drop the first 4 rows
X = X[4:,:]
We recall our equation for getting the estimated betas through ols: $$\mathbf{\hat{\beta}} = \mathbf{(X^TX)^{-1}X^Ty}$$
But recall that solving using the inverse can often be computationally inefficient
Instead we can rewrite our equation
$$\mathbf{(X^TX)\hat{\beta}} = \mathbf{X^Ty}$$which is then in our standard format
$$\mathbf{Ax=b}$$where
$$\mathbf{A=(X^TX)}$$$$\mathbf{x}=\hat{\beta}$$#first solving using our standard formula
betas = np.dot((np.dot(np.linalg.inv(np.dot(X.T,X)),X.T)),y)
print(betas)
[ 3.91227308 0.18780641 0.2413185 0.0159126 -0.04334535]
#Then solving using our linear algebra solver
betas_sol = np.linalg.solve(np.dot(X.T,X), np.dot(X.T,y))
betas_sol
array([ 3.91227308, 0.18780641, 0.2413185 , 0.0159126 , -0.04334535])
#calculate the standard deviation of the residuals
fitted = np.dot(X,betas_sol)
resid = y-fitted
sigmasq_hat = np.var(resid)
sigma_hat = np.sqrt(sigmasq_hat)
sigma_hat
5.133942958889211
# set the parameters and create our A and b matrices
a0 = betas_sol[0]
a1 = betas_sol[1]
a2 = betas_sol[2]
a3 = betas_sol[3]
a4 = betas_sol[4]
A = np.identity(T) # The T x T identity matrix
for t in range(T):
if t-1 >= 0:
A[t, t-1] = -a1
if t-2 >= 0:
A[t, t-2] = -a2
if t-3 >= 0:
A[t, t-3] = -a3
if t-4 >= 0:
A[t, t-4] = -a4
b = np.full(T, a0) #fills an array of size T with zeros
#sets initial two values of b
b[0] = a0 + a1 * y0 + a2 * y_1 + a3*y_2 + a4*y_3
b[1] = a0 + a2 * y0 + a3 * y_1 + a4*y_2
b[2] = a0 + a3 * y0 + a4 * y_1
b[3] = a0 + a4 * y0
sigma_u = 2
u = np.random.normal(0, sigma_hat, T)
y_sim = np.linalg.solve(A, (b+u))
#Here is one run of our simulation
plt.plot(np.arange(T)+1, y_sim)
plt.xlabel('t')
plt.ylabel('y_sim')
plt.show()
#Now we will do 100 runs of our simulation and plot them together with the actual series
N = 100
for i in range(N):
col = cm.Greens(np.random.rand()) # Choose a random color from greens
#https://matplotlib.org/stable/tutorials/colors/colormaps.html
u = np.random.normal(0, sigma_hat, size=T)
y_sim = np.linalg.solve(A,(b + u))
plt.plot(np.arange(T)+1, y_sim, lw=0.5, color=col, alpha=.2)
plt.plot(np.arange(T)+1, bnpGrowth["BNP_growth"] , lw=1, color="black")
plt.xlabel('t')
plt.ylabel('y')
plt.show()
What we can say is that the model appears to model the mean value fairly well. Perhaps more importantly the model appears to give a fairly good representation of the variance and movements in the GDP. The one large outlier is the huge jumps related to the Covid-19 epidemic.