#Import the required modules for vectors and matrix operations, data generation
import numpy as np
#Import the required modules for plot creation:
import matplotlib.pyplot as plt
#import the required modules for TimeSeries data generation:
import statsmodels.api as sm
#Import the required modules for test statistic calculation:
import statsmodels.stats as sm_stat
#Import the required modules for model estimation:
import statsmodels.tsa as smt
We will simulate the following time series processes
We will assume that if $t \leq 0$, then $Y_t = \epsilon_t = 0$ and $\epsilon \sim \mathcal{N}(0, 0.5^2)$
We will begin by rewriting the processes in their expanded forms
From their functional forms, we see that we have the following processes:
To recap the theory:
This translates to checking the following:
We begin by checking the stationarity of (C), (D) and (E).
We can also do it via Python
using poly1d
.
From the documentation an example of poly1d([1, 2, 3])
represents $x^2 + 2x + 3$, so:
print("(C)" + str(np.poly1d([-1.1, 1], variable = 'z')))
print("Roots: " + str(np.poly1d([-1.1, 1], variable = 'z').roots))
print("(D) and (E)\n" + str(np.poly1d([0.2, -1.1, 1], variable = 'z')))
print("Roots: " + str(np.poly1d([0.2, -1.1, 1], variable = 'z').roots))
The invertability concept concerns only moving-average and ARMA processes, but it is calculated in a similar way as before, only now we need to find the roots of the moving-average lag operator function $\Theta(x)$:
We will not repeat the manual calculations and instead immediately look at the results via Python
:
print("(A)" + str(np.poly1d([0.5, 1], variable = 'x')))
print("Roots: " + str(np.poly1d([0.5, 1], variable = 'x').roots))
print("(B)\n" + str(np.poly1d([-0.4, 1.3, 1], variable = 'x')))
print("Roots: " + str(np.poly1d([-0.4, 1.3, 1], variable = 'x').roots))
print("(E)\n" + str(np.poly1d([1.3, 1], variable = 'x')))
print("Roots: " + str(np.poly1d([1.3, 1], variable = 'x').roots))
We see that for (A), the process is invertible (the roots are outside the unit circle), while in (B) it is not since one of its roots is inside the unit circle. Finally, (E) is also non-invertible.
So, to recap:
We will remind that an MA model is invertible if it is equivalent to a converging infinite order AR model. By converging, we mean that the AR coefficients decrease to 0 as we move back in time.
The logic behind generating this process is as follows:
we can carry out these calculations in Python
as follows:
np.random.seed(123)
#
N = 150
epsilon = np.random.normal(loc = 0, scale = 0.5, size = N)
#
A_data = 1 + epsilon[0]
for j in range(1, N):
Y_temp = 1 + epsilon[j] + 0.5 * epsilon[j - 1]
A_data = np.append(A_data, Y_temp)
The logic behind generating this process is as follows:
we can carry out these calculations in Python
as follows:
np.random.seed(123)
#
N = 150
epsilon = np.random.normal(loc = 0, scale = 0.5, size = N)
#
B_data = 1 + epsilon[0]
B_data = np.append(B_data, 1 + epsilon[1] + 1.3 * epsilon[0])
for j in range(2, N):
Y_temp = 1 + epsilon[j] + 1.3 * epsilon[j - 1] - 0.4 * epsilon[j - 2]
B_data = np.append(B_data, Y_temp)
We now start the loop from 2
instead of 1
, since the first two elements include $\epsilon$ at values $t \leq 0$, so they have a different formula.
plt.figure(figsize = (15, 5))
plt.plot(B_data, label = "$Y_t = 1 + \\epsilon_t + 0.5 \\epsilon_{t-1}$ (invertible)")
plt.plot(A_data, label = "$Y_t = 1 + \\epsilon_t + 1.3 \\epsilon_{t-1} - 0.4 \\epsilon_{t-2}$ (non-invertible)")
plt.title("Moving-Average Processes")
plt.legend()
plt.show()
Notice that the plots do not exhibit large differences - you may not be able to tell whether a process is invertrible or not from the time series plots alone.
The logic behind generating this process is as follows:
we can carry out these calculations in Python
as follows:
np.random.seed(123)
#
N = 150
epsilon = np.random.normal(loc = 0, scale = 0.5, size = N)
#
C_data = np.array(1 + epsilon[0], ndmin = 1)
for j in range(1, N):
Y_temp = 1 + 1.1 * C_data[j - 1] + epsilon[j]
C_data = np.append(C_data, Y_temp)
The logic behind generating this process is as follows:
we can carry out these calculations in Python
as follows:
np.random.seed(123)
#
N = 150
epsilon = np.random.normal(loc = 0, scale = 0.5, size = N)
#
D_data = np.array(1 + epsilon[0], ndmin = 1)
D_data = np.append(D_data, 1 + 1.1 * D_data[0] + epsilon[1])
for j in range(2, N):
Y_temp = 1 + 1.1 * D_data[j - 1] - 0.2 * D_data[j - 2] + epsilon[j]
D_data = np.append(D_data, Y_temp)
We start the loop from 2
instead of 1
, since the first two elements include pas values of $Y$ at values $t \leq 0$, so they have a different formula.
fig = plt.figure(figsize = (15, 5))
fig.add_subplot(1, 2, 1).plot(C_data)
plt.title("$Y_t = 1 + 1.1 Y_{t-1} + \\epsilon_t$ (non-stationary)")
fig.add_subplot(1, 2, 2).plot(D_data)
plt.title("$Y_t = 1 + 1.1 Y_{t-1} - 0.2 Y_{t-2} + \\epsilon_t$ (stationary)")
plt.show()
The logic behind generating this process is as follows:
we can carry out these calculations in Python
as follows:
np.random.seed(123)
#
N = 150
epsilon = np.random.normal(loc = 0, scale = 0.5, size = N)
#
E_data = np.array(epsilon[0], ndmin = 1)
E_data = np.append(E_data, 1.1 * E_data[0] + epsilon[1] + 1.3 * epsilon[0])
for j in range(2, N):
Y_temp = 1.1 * E_data[j - 1] - 0.2 * E_data[j - 2] + epsilon[j] + 1.3 * epsilon[j - 1]
E_data = np.append(E_data, Y_temp)
plt.figure(figsize = (15, 5))
plt.plot(E_data, label = "$Y_t = 1.1 Y_{t-1} - 0.2 Y_{t-2} + \\epsilon_t + 1.3 \\epsilon_{t-1}$ (stationary non-invertible)")
plt.plot(D_data, label = "$Y_t = 1 + 1.1 Y_{t-1} - 0.2 Y_{t-2} + \\epsilon_t$ (stationary)")
plt.title("ARMA(2, 1) Process vs AR(2) process")
plt.legend()
plt.show()
Note the difference in the mean - our $ARMA$ process does not have an intercept term.
Alternatively, we can use the built-in functions to simulate the models. We need to specify the $\Phi(z)$ and $\Theta(x)$ functions.
np.random.seed(123)
E_auto_spec = smt.arima_process.ArmaProcess(ar = np.array([1, -1.1, 0.2]), ma = [1, 1.3])
E_autogenr = E_auto_spec.generate_sample(N, scale = 0.5)
Note that for puse $\rm AR$ models $\Theta(x) = 1$ and for pure $\rm MA$ models $\Phi(z) = 1$.
The nice thing about this function is that we can examine these lag functions
E_auto_spec.arpoly
E_auto_spec.mapoly
as well as get their calculated roots
E_auto_spec.arroots
E_auto_spec.maroots
and check the stationarity/invertibility automatically:
print("Stationarity: " + str(E_auto_spec.isstationary))
print("Invertibility: " + str(E_auto_spec.isinvertible))
plt.figure(figsize = (20, 8))
plt.plot(E_data, label = "$Y_t = 1.1 Y_{t-1} - 0.2 Y_{t-2} + \\epsilon_t + 1.3 \\epsilon_{t-1}$ (stationary non-invertible)", color = "blue")
plt.plot(E_autogenr, label = "autogenerated ARMA(2, 1) process with the same parameters", linestyle = "-.", color = "red")
plt.title("ARMA(2, 1) Process vs AR(2) process")
plt.legend()
plt.show()
Note - we used the same innovations to generate (A) - (E) processes, and the autogenerated used different innovations simulated via the same specified seed.
fig = plt.figure(figsize=(15, 5))
sm.tsa.graphics.plot_acf(E_autogenr, lags = 20, ax = fig.add_subplot(121))
plt.ylabel('ACF')
sm.tsa.graphics.plot_pacf(E_autogenr, lags = 20, ax = fig.add_subplot(122), method = "ywm")
plt.ylabel('PACF')
plt.tight_layout()
NOTE on possible PACF error, need to specify either method = "ywm"
, or method = "ols"
We remember that $\epsilon_t \sim \mathcal{N}(0, 0.5^2)$.
print(A_data.mean())
print(B_data.mean())
Assuming that $Y_t = 0$ for $t \leq 0$, this can be expressed as $\mathbb{E} (Y_t) =\sum_{j = 0}^{t-1} (1.1)^j$ for an $AR(1)$ process. So, for $t = 150$ this is roughly
tmp_sum = 0
for i in range(0, N):
tmp_sum = tmp_sum + (1.1)**i
print(tmp_sum)
print(C_data.mean())
An example of a non-stationary process - a non-constant mean.
print(D_data.mean())
print(E_data.mean())
fig = plt.figure(figsize=(15, 5))
sm.tsa.graphics.plot_acf(A_data, lags = 20, zero = False, ax = fig.add_subplot(121))
plt.ylabel('ACF')
sm.tsa.graphics.plot_pacf(A_data, lags = 20, zero = False, ax = fig.add_subplot(122), method = "ywm")
plt.ylabel('PACF')
plt.tight_layout()
We see that the ACF is abruptly cutoff after the first lag. Regarding PACF - it appears to be somewhat declining, though it is difficult to say. We expect it to be an MA(1) process.
fig = plt.figure(figsize=(15, 5))
sm.tsa.graphics.plot_acf(B_data, lags = 20, zero = False, ax = fig.add_subplot(121))
plt.ylabel('ACF')
sm.tsa.graphics.plot_pacf(B_data, lags = 20, zero = False, ax = fig.add_subplot(122), method = "ywm")
plt.ylabel('PACF')
plt.tight_layout()
We see that the ACF is abruptly cutoff after the first lag. Regarding PACF - it appears to be somewhat declining, though it is difficult to say. We would expect an MA(1) process (though in reality we know that it is an MA(2) process).
fig = plt.figure(figsize=(15, 5))
sm.tsa.graphics.plot_acf(C_data, lags = 20, zero = False, ax = fig.add_subplot(121))
plt.ylabel('ACF')
sm.tsa.graphics.plot_pacf(C_data, lags = 20, zero = False, ax = fig.add_subplot(122), method = "ywm")
plt.ylabel('PACF')
plt.tight_layout()
The ACF appears to decline, while the PACF is abruptly cutoff after the first lag. We would expect an AR(1) process.
fig = plt.figure(figsize=(15, 5))
sm.tsa.graphics.plot_acf(D_data, lags = 20, zero = False, ax = fig.add_subplot(121))
plt.ylabel('ACF')
sm.tsa.graphics.plot_pacf(D_data, lags = 20, zero = False, ax = fig.add_subplot(122), method = "ywm")
plt.ylabel('PACF')
plt.tight_layout()
The ACF appears to decline, while the PACF is abruptly cutoff after the first lag. We would expect an AR(1) process (though in reality it is an AR(2) process).
(Try to increase the sample size and re-examine the ACf/PACF plots - are they closer to the true theoretical cases?)
fig = plt.figure(figsize=(15, 5))
sm.tsa.graphics.plot_acf(E_data, lags = 20, zero = False, ax = fig.add_subplot(121))
plt.ylabel('ACF')
sm.tsa.graphics.plot_pacf(E_data, lags = 20, zero = False, ax = fig.add_subplot(122), method = "ywm")
plt.ylabel('PACF')
plt.tight_layout()
We see both the ACF and PACF to be declining - it is an ARMA process. We cannot identify its order from these plots alone.
Note that if we have automatically generated the data using smt.arima_process.ArmaProcess
, we can examine the theoretical acf/pacf plots
fig = plt.figure(figsize = (15, 10))
ax1 = fig.add_subplot(211)
ax1.stem(E_auto_spec.acf(lags = 20), label = "Theoretical ACF", basefmt = "white", linefmt = "r-", markerfmt = "ro-")
ax1.stem(smt.stattools.acf(E_autogenr, nlags = 19), label = "Empirical ACF", basefmt = "white", linefmt = "g-", markerfmt = "go-")
ax1.axhline(y = 0, color = "black")
plt.legend()
ax2 = fig.add_subplot(212)
ax2.stem(E_auto_spec.pacf(lags = 20), label = "Theoretical PACF", basefmt = "white", linefmt = "r-", markerfmt = "ro-")
ax2.stem(smt.stattools.pacf(E_autogenr, nlags = 19, method = "ywm"), label = "Empirical PACF", basefmt = "white", linefmt = "g-", markerfmt = "go-")
ax2.axhline(y = 0, color = "black")
plt.legend()
plt.show()
As an example, we will re-generate the process but with a larger sample size of $N = 1500$
np.random.seed(123)
E_autogenr_large = E_auto_spec.generate_sample(1500, scale = 0.5)
fig = plt.figure(figsize = (15, 10))
ax1 = fig.add_subplot(211)
ax1.stem(E_auto_spec.acf(lags = 20), label = "Theoretical ACF", basefmt = "white", linefmt = "r-", markerfmt = "ro-")
ax1.stem(smt.stattools.acf(E_autogenr_large, nlags = 19), label = "Empirical ACF, sample size N = 1500", basefmt = "white", linefmt = "g-", markerfmt = "go-")
ax1.axhline(y = 0, color = "black")
plt.legend()
ax2 = fig.add_subplot(212)
ax2.stem(E_auto_spec.pacf(lags = 20), label = "Theoretical PACF", basefmt = "white", linefmt = "r-", markerfmt = "ro-")
ax2.stem(smt.stattools.pacf(E_autogenr_large, nlags = 19, method = "ywm"), label = "Empirical PACF, sample size N = 1500", basefmt = "white", linefmt = "g-", markerfmt = "go-")
ax2.axhline(y = 0, color = "black")
plt.legend()
plt.show()
The following result is important to note: the larger the sample size, the closer the estimates to the true values.
We will estimate the models with the exact order that we used to generate the data:
A_spec_1 = smt.arima_model.ARMA(A_data, order = (0, 1))
A_mdl_1 = A_spec_1.fit(trend = 'c')
print(A_mdl_1.summary())
B_spec_1 = smt.arima_model.ARMA(B_data, order = (0, 2))
B_mdl_1 = B_spec_1.fit(trend = 'c')
print(B_mdl_1.summary())
Note that the coefficients are not close to the true values (except for the constant and the overall coefficient signs). This is because our true process is not invertible. Hence, the software attepts to fit an invertible model, which results in different coefficients.
C_spec_1 = smt.arima_model.ARMA(C_data, order = (1, 0))
C_mdl_1 = C_spec_1.fit(trend = 'c')
print(C_mdl_1.summary())
Note the coefficient values for the non-stationary process.
D_spec_1 = smt.arima_model.ARMA(D_data, order = (2, 0))
D_mdl_1 = D_spec_1.fit(trend = 'c')
print(D_mdl_1.summary())
Note the const
coefficient of the model is close to the process mean. This is not the intercept!
E_spec_1 = smt.arima_model.ARMA(E_data, order = (2, 1))
E_mdl_1 = E_spec_1.fit(trend = 'nc')
print(E_mdl_1.summary())
Note that we use trend
to decide whether to include a constant or not. c
includes constant, nc
- no constant.
We can also see the roots for stationarity and invertibility. Again, the fitted model attempts to fit a stationary and invertible process, hence the MA coefficient is different for this process (which is generated from a non-invertible data generating process).
We can also extract specific information about the model:
print("Included trend (this is actually the constant): ", E_mdl_1.k_trend)
print("Included constat: ", E_mdl_1.k_constant)
print("Number of AR parameters: ", E_mdl_1.k_ar)
print("Number of MA parameters" , E_mdl_1.k_ma)
print("Included exogenous variables: ", E_mdl_1.k_exog)
as well as specific parameters
print("All of the parameters: ", E_mdl_1.params)
print("All of the p-values: ", E_mdl_1.pvalues)
print("Only AR parameters: ", E_mdl_1.arparams)
print("Only MA parameters: ", E_mdl_1.maparams)
Interestingly, on the larger sample size the autoregressive parameters are indeed close to the true values
print(smt.arima_model.ARMA(E_autogenr_large, order = (2, 1)).fit(trend = 'nc').summary())
However, the moving-average parameters are not, since the process is non-invertible.
The estimated model is:
$ \widehat{Y}_t = 1.1010 \cdot \widehat{Y}_{t-1} - 0.2045 \cdot \widehat{Y}_{t-2} + e_t + 0.7826 \cdot e_{t-1} $
where $e_t = Y_t - 1.1010 \cdot \widehat{Y}_{t-1} + 0.2045 \cdot \widehat{Y}_{t-2} - 0.7826 \cdot e_{t-1}$ and $e_0 = 0$.
We will see more on model fitting in the last task.
We will automatically etimate the unknown parameters as well as the lag order. Note the documentation:
This method can be used to tentatively identify the order of an ARMA process, provided that the time series is stationary and invertible.
A_est_auto = sm.tsa.stattools.arma_order_select_ic(A_data, ic='aic', trend='c')
print("(A) process order: ", A_est_auto.aic_min_order)
The automatically selected model is:
A_mdl_auto = smt.arima_model.ARMA(A_data, order = A_est_auto.aic_min_order).fit(trend = 'c')
print(A_mdl_auto.summary().tables[1])
If we wanted, we could restrict the models by specifying the maximum order of the $MA$ or the $AR$ (or both) parts:
A_est_auto = sm.tsa.stattools.arma_order_select_ic(A_data, ic='aic', trend='c', max_ar = 0)
print("(A) process order: ", A_est_auto.aic_min_order)
B_est_auto = sm.tsa.stattools.arma_order_select_ic(B_data, ic='aic', trend='c')
print("(B) process order: ", B_est_auto.aic_min_order)
The automatically selected model is:
B_mdl_auto = smt.arima_model.ARMA(B_data, order = B_est_auto.aic_min_order).fit(trend = 'c')
print(B_mdl_auto.summary().tables[1])
C_est_auto = sm.tsa.stattools.arma_order_select_ic(C_data, ic='aic', trend='c')
print("(C) process order: ", C_est_auto.aic_min_order)
Since our process is non-stationary, we will get an error when estimating. To make the error more compact, we will wrapt the function in a try-catch block:
try:
C_mdl_auto = smt.arima_model.ARMA(C_data, order = C_est_auto.aic_min_order).fit(trend = 'c')
print(C_mdl_auto.summary().tables[1])
except ValueError as err:
print('Handling run-time error:', err)
So our suggestions are to either (a) induce stationarity; or (b) choose a different model order. Since we know that it is non-stationary, we may try to specify an $\rm ARMA(2, 2)$ model and hope that the $\rm AR$ coefficients approximately stationary, or a pure $\rm MA$ model. We will go with a pure $MA$ process, since they are always stationary (though they may not be the best fit on the data). We opt to choose an order automatically.
Note that for larger orders the calculations take a while, so we restrict to $q_\max = 6$ (default max is 2 for $\rm MA$ and $p_\max = 4$ for $\rm AR$).
C_mdl_auto = sm.tsa.stattools.arma_order_select_ic(C_data, ic='aic', trend='c', max_ar = 0, max_ma = 6)
print("(C) process order: ", C_mdl_auto.aic_min_order)
C_mdl_auto = smt.arima_model.ARMA(C_data, order = C_mdl_auto.aic_min_order).fit(trend = 'c')
print(C_mdl_auto.summary().tables[1])
Note again that the non-stationarity of the process is quite a problem even looking at the results - some standard errors are nan
.
D_est_auto = sm.tsa.stattools.arma_order_select_ic(D_data, ic='aic', trend='c')
print("(D) process order: ", D_est_auto.aic_min_order)
The automatically selected model is:
D_mdl_auto = smt.arima_model.ARMA(D_data, order = D_est_auto.aic_min_order).fit(trend = 'c')
print(D_mdl_auto.summary().tables[1])
E_est_auto = sm.tsa.stattools.arma_order_select_ic(E_data, ic='aic', trend='nc')
print("(E) process order: ", E_est_auto.aic_min_order)
Note that we would expect to have problems with (B) and (C), since the function requires stationarity and invertibility.
Note that in the case of (A), specifying max_ar = 0
returns a result without any warnings. This is a current drawback of this method as it does not automatically distringuish when an order of 0
is appropriate for either MA, or AR components. Update: after a package update some warnings may have disappeared.
Either way, in all cases the final suggested models are ARMA models, instead of pure AR or pure MA models.
The automatically selected model is:
E_mdl_auto = smt.arima_model.ARMA(E_data, order = E_est_auto.aic_min_order).fit(trend = 'nc')
print(E_mdl_auto.summary().tables[1])
For the larger sample size
print("(E) process with larger sample order: ", sm.tsa.stattools.arma_order_select_ic(E_autogenr_large, ic='aic', trend='nc').aic_min_order)
The suggested process is actually the same order as the true underlying process. Note that because of the non-invertibility of the process this may not always be the case.
If the process was both stationary and ivnertible, then it is very likely that for a larger sample, the automated order selection would suggest a model order, which is close to the true process, as long as the true underlying process is an $\rm ARMA(p, q)$ process.
We will show with (A) since it is both stationary and invertible.
rewriting as an $AR(\infty)$ process yields $Y_t = \alpha + \sum_{j = 1}^\infty (-1)^{j+1} \theta^j Y_{t-j} + \epsilon_t$ with coefficients:
theta_coef = 0.5
for j in range(2, 10):
theta_coef = np.append(theta_coef, (-1)**(j+1)*0.5**j)
print(theta_coef)
A manually specified arbitrary approximation could be, for example, an $\rm AR(10)$ process:
A_spec_2 = smt.arima_model.ARMA(A_data, order = (10, 0))
A_mdl_2 = A_spec_2.fit(trend = 'c')
print(A_mdl_2.summary().tables[0])
print(A_mdl_2.summary().tables[1])
print(A_mdl_2.summary().tables[2])
We can also try to automatically fit the best AR model:
A_est_auto = sm.tsa.stattools.arma_order_select_ic(A_data, ic='aic', trend='c', max_ma = 0)
print("(A) process order: ", A_est_auto.aic_min_order)
A_spec_2 = smt.arima_model.ARMA(A_data, order = A_est_auto.aic_min_order)
A_mdl_2 = A_spec_2.fit(trend = 'c')
print(A_mdl_2.summary2())
Note the different coefficients. This is because the lower level model is a sort of approximation of the higher order model.
We leave the remaining cases as excercises.
From our automatically simulated data, we can transform it into a pure AR, or pure MA process of different orders:
E_auto_spec.arma2ma(lags = 5)
E_auto_spec.arma2ar(lags = 5)
Remember that our underlying assumption is that $\epsilon_t \sim WN$. In other words the residuals:
The zero-mean property is guaranteed by the estimation method (the proof is similar to the OLS estimation method, which minimizes the sum of squared residuals in the univariate cros-sectional regression case). We can examine the variance by checkign for ARCH effects. For the purpose of these exercises, we will examine the variance only visually.
We note that, unlike forecast
in R
, the statsmodels
package in Python
does not have the tsdisplay
function. Fortunately, we can always create one ourselves for simplicity:
import pandas as pd
def tsdisplay(y, figsize = (14, 8), title = "", lags = 20):
tmp_data = pd.Series(y)
fig = plt.figure(figsize = figsize)
#Plot the time series
tmp_data.plot(ax = fig.add_subplot(311), title = "$Time\ Series\ " + title + "$", legend = False)
#Plot the ACF:
sm.graphics.tsa.plot_acf(tmp_data, lags = lags, zero = False, ax = fig.add_subplot(323))
plt.xticks(np.arange(1, lags + 1, 1.0))
#Plot the PACF:
sm.graphics.tsa.plot_pacf(tmp_data, lags = 20, zero = False, ax = fig.add_subplot(324))
plt.xticks(np.arange(1, lags + 1, 1.0))
#Plot the QQ plot of the data:
sm.qqplot(tmp_data, line='s', ax = fig.add_subplot(325))
plt.title("QQ Plot")
#Plot the residual histogram:
fig.add_subplot(326).hist(tmp_data, bins = 40, normed = 1)
plt.title("Histogram")
#Fix the layout of the plots:
plt.tight_layout()
plt.show()
Furthermore, the tsdiag
, available in R
, does not have an analog in statsmodels
. Neverhteless, we can create our own:
def tsdiag(y, figsize = (14, 8), title = "", lags = 30):
tmp_data = pd.Series(y)
tmp_data.index += 1
tmp_acor = list(sm_stat.diagnostic.acorr_ljungbox(tmp_data, lags = lags, boxpierce = True))
# Plot Ljung-Box and Box-Pierce statistic p-values:
plt.plot(range(1, len(tmp_acor[0]) + 1), tmp_acor[1], 'bo', label = "Ljung-Box values")
plt.plot(range(1, len(tmp_acor[0]) + 1), tmp_acor[3], 'go', label = "Box-Pierce values")
plt.xticks(np.arange(1, len(tmp_acor[0]) + 1, 1.0))
plt.axhline(y = 0.05, color = "red", label = "5% critical value")
plt.title("$Time\ Series\ " + title + "$")
plt.legend()
plt.show()
# Return the statistics:
col_index = ["Ljung-Box: X-squared", "Ljung-Box: p-value", "Box-Pierce: X-squared", "Box-Pierce: p-value"]
return pd.DataFrame(tmp_acor, index = col_index, columns = range(1, len(tmp_acor[0]) + 1))
We now examine the residuals of our models:
The residuals of the true model:
tsdisplay(A_mdl_1.resid, title = ":\ Residuals\ Of\ Y_t = 1 + \epsilon_t + 0.5 \epsilon_{t-1}")