# Set plot options in Jupyter Lab
options(repr.plot.width = 20)
options(repr.plot.height = 6)
We will revisit some of the examples for Time Series Decomposition and Seasonal ARMA model estimation
?fma::airpass
airpass <- fma::airpass
We can examine the data plot:
plot.ts(airpass)
Note the totak number of years in the series:
total_years <- length(unique(floor(time(airpass))))
print(total_years)
We can also plot the seasonal plot:
forecast::seasonplot(airpass, year.labels = TRUE, year.labels.left = TRUE, col = 1:total_years)
Looking at the $\rm ACF$ and $\rm PACF$ plots:
forecast::tsdisplay(airpass)
and note the ACF slowly declining and the decline is in waves, which appear to repeat every 12
lags - this indicates seasonality. The time series chart indicates that there is an increasing trend.
The variation appears to be increasing, which also indicates that the series may be multiplicative: $Y_t = T_t \cdot S_t \cdot E_t$.
We want to take logarithms of the data, to get an additive form: $\log(Y_t) = \widetilde{Y}_t$ so that $\widetilde{Y}_t = \widetilde{T}_t + \widetilde{S}_t + \widetilde{E}_t$
log_passengers = log(airpass)
forecast::tsdisplay(log_passengers)
Now the series appears to be additive. We can move on to decomposition of the series.
one_sided_trd <- filter(log_passengers, filter = c(1/24, rep(1/12, total_years - 1), 1/24), sides = 1)
two_sided_trd <- filter(log_passengers, filter = c(1/24, rep(1/12, total_years - 1), 1/24), sides = 2)
plot.ts(log_passengers, main = "Log of Airpassangers")
lines(one_sided_trd, col = "red")
lines(two_sided_trd, col = "blue")
legend(x = "topleft", legend = c("Actual", "One-sided MA Trend", "Two-sided MA Trend"), lty = 1, col = c("black", "red", "blue"))
Next, we move to decompose the seasonality
#seasonal component of one-sided averaging
one_sided_season <- log_passengers - one_sided_trd
one_sided_season <- matrix(one_sided_season, ncol = 12, byrow = TRUE)
one_sided_season <- apply(one_sided_season, 2, mean, na.rm = TRUE)
#seasonal component of two-sided averaging
two_sided_season <- log_passengers - two_sided_trd
two_sided_season <- matrix(two_sided_season, ncol = 12, byrow = TRUE)
two_sided_season <- apply(two_sided_season, 2, mean, na.rm = TRUE)
We then scale the seasons to sum up to 0:
one_sided_season <- one_sided_season - sum(one_sided_season) / length(one_sided_season)
two_sided_season <- two_sided_season - sum(two_sided_season) / length(two_sided_season)
par(mfrow = c(1, 2))
plot.ts(one_sided_season, main = "Seasonality from One-sided trend decomposition")
plot.ts(two_sided_season, main = "Seasonality from Two-sided trend decomposition")
Using $S_t = S_{t+d}$, create the series for the full years:
one_sided_season <- rep(one_sided_season, times = total_years)
one_sided_season <- ts(one_sided_season, start = start(log_passengers), freq = frequency(log_passengers))
#
two_sided_season <- rep(two_sided_season, times = total_years)
two_sided_season <- ts(two_sided_season, start = start(log_passengers), freq = frequency(log_passengers))
Next, we can examine the decomposition visually
par(mfrow = c(1, 2))
#
plot(log_passengers, lwd = 2, main = "One-sided averaging decomposition")
lines(one_sided_trd, col = "blue", lwd = 2)
lines(one_sided_trd + one_sided_season, col = "red", lwd = 2)
#
plot(log_passengers, lwd = 2, main = "Two-sided averaging decomposition")
lines(two_sided_trd, col = "blue", lwd = 2)
lines(two_sided_trd + two_sided_season, col = "red", lwd = 2)
We can also use the decompose(...)
function to do all of this:
two_sided_decomp <- decompose(log_passengers, type = "additive")
Compared with the filter()
function:
two_sided_trd - two_sided_decomp$trend
The results are the same. The upside is we automatically get the seasonal decomposition as well, which are the same as our manually calculated ones:
two_sided_season - two_sided_decomp$seasonal
We can also plot the series:
plot.ts(log_passengers, main = "Log of Airpassangers")
lines(two_sided_decomp$trend, col = "blue", lwd = 2)
lines(two_sided_decomp$trend + two_sided_decomp$seasonal, col = "red", lwd = 2)
and examine the residuals:
forecast::tsdisplay(two_sided_decomp$random)
Looking at the $\rm ACF$, the residuals still appear to have some searonality.
s_es <- forecast::ses(log_passengers)
par(mfrow = c(1, 2))
#
plot.ts(log_passengers)#, main = "Simple Exponential smoothing")
lines(fitted.values(s_es), col = "blue", lty = 2, lwd = 2)
lines(fitted.values(forecast::ses(log_passengers, alpha = 0.1)), col = "darkorange", lty = 2, lwd = 2)
#
plot(forecast::forecast(s_es, h = 10))
d_es <- forecast::holt(log_passengers)
par(mfrow = c(1, 2))
#
plot.ts(log_passengers)#, main = "Double Exponential smoothing")
lines(fitted.values(d_es), col = "blue", lty = 2, lwd = 2)
lines(fitted.values(forecast::holt(log_passengers, alpha = 0.1, beta = 0.09)), col = "darkorange", lty = 2, lwd = 2)
#
plot(forecast::forecast(d_es, h = 10))
t_es <- forecast::hw(log_passengers)
par(mfrow = c(1, 2))
#
plot.ts(log_passengers)#, main = "Triple Exponential smoothing")
lines(fitted.values(t_es), col = "blue", lty = 2, lwd = 2)
#
plot(forecast::forecast(t_es, h = 10))
R
allows to use an automated procedure to choose the smoothing parameters - the function ets
selects the best variant automatically (by the minimum of AIC) and estimates its parameters:
ets_es <- forecast::ets(log_passengers)
par(mfrow = c(1, 2))
#
plot.ts(log_passengers)
lines(fitted.values(ets_es), col = "blue", lty = 2, lwd = 2)
#
plot(forecast::forecast(ets_es, h = 10))
Note the help file:
In all cases, "N"=none, "A"=additive, "M"=multiplicative and "Z"=automatically selected. So, for example, "ANN" is simple exponential smoothing with additive errors, "MAM" is multiplicative Holt-Winters' method with multiplicative errors, and so on.
forecast::tsdisplay(ets_es$residuals)
The residuals ppear to have a slight autocorrelation
tt_dt <- data.frame(tt = 1:length(log_passengers))
tt_dt$tt_sq <- tt_dt$tt^2
#
mdl_auto = forecast::auto.arima(airpass, max.d = 0, max.D = 0, xreg = tt_dt)
mdl_auto
plot(airpass, main = "SARMA with trend")
lines(mdl_auto$fitted, col = "red")
We can examine the residuals
forecast::tsdisplay(mdl_auto$residuals)
Note that there appears to be a large autocorrelation for every 12
-th lag. So the restriction of no differencing causes auto.arima
to fail to find a suitible model.
For somparison, if we were to specify a $SARIMA(1, 0, 1)\times(1, 0, 1)_{12}$. If we use the default specification, we may get a warning that the series is not stationary:
tryCatch({
forecast::Arima(airpass, order = c(1, 0, 1), seasonal = c(1, 0, 1), xreg = tt_dt)
}, warning = function(w) {
# warning-handler-code
}, error = function(e) {
print(e)
}, finally = {
# cleanup-code
})
If we are sure that the process is stationary, we can specify a different parameter estimation method:
mdl_manual = forecast::Arima(airpass, order = c(1, 0, 1), seasonal = c(1, 0, 1), xreg = tt_dt, method = "ML")
mdl_manual
plot(airpass, main = "SARMA with trend")
lines(mdl_manual$fitted, col = "red")
forecast::tsdisplay(mdl_manual$residuals)
The residuals look much better compared to the auto.arima
results with restriction that the data is not to be differenced.
Finally, we can forecast the process. we do need to have the soem future values of the trend. Thankfully, we have opted to use the standard indexing of $t = 1,2,...$ for the trend.
tt_forc <- data.frame(tt = tail(tt_dt$tt, 1) + 1:20)
tt_forc$tt_sq <- tt_forc$tt^2
plot(forecast::forecast(mdl_manual, xreg = tt_forc))
lines(mdl_manual$fitted, col = "red")
# Set plot options in Jupyter Lab
options(repr.plot.width = 15)
options(repr.plot.height = 4)
suppressPackageStartupMessages({library(forecast)})
nyse <- read.table(url("http://uosis.mif.vu.lt/~rlapinskas/(data%20R&GRETL/nyse.txt"), header = TRUE)
nyse <- ts(nyse, start = 1952, frequency = 12)
head(nyse, 36)
par(mfrow = c(1, 3))
plot(nyse, main = bquote(Y[t]))
plot(log(nyse), main = bquote(log(Y[t])))
plot(diff(log(nyse)), main = bquote(Delta~log(Y[t])))
Note that the growth appears to be exponential - it is more convenient to take logarithms to linearize the series.
lnyse <- log(nyse)
par(mfrow = c(1, 2))
forecast::Acf(lnyse, )
forecast::Pacf(lnyse)
Slow ACF decay - indication of non-stationarity, or a trend, or a nonstationarity and/or drift and/or trend. We need to test whether the data is TS or DS. To do this - we need to carry out unit root testing.
We can always load the relevant libraries to avoid the use of ::
notation.
library(forecast)
library(dynlm)
library(urca)
Unit root tests consist of:
We will examine three cases:
mod.1 <- dynlm(d(lnyse) ~ L(lnyse) + L(d(lnyse), 1:4) + time(lnyse))
summary(mod.1)
The last lag coefficient, L(d(lnyse), 1:4)4
is insignificant since its $p-value = 0.4070 > 0.05`. We will reduce the maximum lag and continue the procedure.
mod.1 <- dynlm(d(lnyse) ~ L(lnyse) + L(d(lnyse), 1:3) + time(lnyse))
print(round(coef(summary(mod.1)), 4))
p-value
of L(d(lnyse), 1:3)3
is > 0.05
.
mod.1 <- dynlm(d(lnyse) ~ L(lnyse) + L(d(lnyse), 1:2) + time(lnyse))
print(round(coef(summary(mod.1)), 4))
p-value
of L(d(lnyse), 1:2)2
is > 0.05
.
mod.1 <- dynlm(d(lnyse) ~ L(lnyse) + L(d(lnyse), 1) + time(lnyse))
print(round(coef(summary(mod.1)), 4))
p-value
of L(d(lnyse), 1)
is > 0.05
.
mod.1 <- dynlm(d(lnyse) ~ L(lnyse) + time(lnyse))
print(round(coef(summary(mod.1)), 4))
The final model contains no lags of $\Delta Y_{t-j}$ - so there is no autoregressive coefficient for the difference model.
mod.2 <- dynlm(d(lnyse) ~ L(lnyse) + L(d(lnyse), 1:5) + time(lnyse))
print(round(coef(summary(mod.2)), 4))
L(d(lnyse), 1:5)5
has p-value < 0.05
, so the final lag order is 5
.
The maximum lag via the rule-of-thumb is:
floor(12 * (length(lnyse) / 100)^(1/4))
mod.3 <- dynlm(d(lnyse) ~ L(lnyse) + L(d(lnyse), 1:18) + time(lnyse))
print(round(coef(summary(mod.3)), 4))
L(d(lnyse), 1:18)18
is insignificant. We will continue reducing the lag order untill the last lag is significant. We omit the intermediate model output with insignificant $p_\max$ coefficients. The final model
mod.3 <- dynlm(d(lnyse) ~ L(lnyse) + L(d(lnyse), 1:14) + time(lnyse))
print(round(coef(summary(mod.3)), 4))
has a $p_\max = 14$ lag order.
If we compare the $\rm AIC$ and $\rm BIC$ values of the three models:
a <- data.frame('mdl_1 (p = 0)' = c(AIC(mod.1), BIC(mod.1)),
'mdl_2 (p = 5)' = c(AIC(mod.2), BIC(mod.2)),
'mdl_3 (p = 14)' = c(AIC(mod.3), BIC(mod.3)),
check.names = FALSE)
rownames(a) <- c("AIC", "BIC")
a
The $\rm AIC$ and $\rm BIC$ values are smaller for the model, where we started with an arbitraty value of $p = 4$. Having determined the lag order, we asses the trend significance.
Note: we will examine the final models for each case. In reality we would need to select an arbitrary lag outselves, if we opt to manually test for a unit root.
For the 1st model:
print(round(coef(summary(mod.1)), 4))
The trend is insignificant, so we remove it.
mod.1 <- dynlm(d(lnyse) ~ L(lnyse))
print(round(coef(summary(mod.1)), 4))
For the 2nd model:
mod.2 <- dynlm(d(lnyse) ~ L(lnyse) + L(d(lnyse), 1:5) + time(lnyse))
print(round(coef(summary(mod.2)), 4))
The trend is significant, so we leave it.
For the 3rd model:
print(round(coef(summary(mod.3)), 4))
The trend is insignificant, so we remove it
mod.3 <- dynlm(d(lnyse) ~ L(lnyse) + L(d(lnyse), 1:14))
print(round(coef(summary(mod.3)), 4))
So, we have that:
The $\rm ADF$ test tests the null hypothesis: $$ H_0: \rho = 0 \text{ (i.e. the series contains a unit root)} $$
Depending on the trend, the critical values are:
Looking at the 1st model:
print(round(coef(summary(mod.1)), 4))
The coefficient $\widehat{\rho}$ t-value is L(lnyse) = -0.0720 > -2.89
. So we have no grounds to reject the null hypothesis and conclude that the series contains a unit root.
Since we have determined that L(lnyse)
is insignificant, we could remove it from the model, if we wanted to consider this model as a contender for the best model for this series:
mod.1 <- dynlm(d(lnyse) ~ 1)
print(round(coef(summary(mod.1)), 4))
Then the estimated for $\Delta Y_t$ is: $\Delta Y_t = 0.0069 + \epsilon_t$, which we can write it as a model for $Y_t$: $Y_t = 0.0069 + Y_{t-1} + \epsilon_t$, which is an $\rm AR(1)$ model for $Y_t$.
Looking at the 2nd model:
print(round(coef(summary(mod.2)), 4))
The coefficient $\widehat{\rho}$ t-value is L(lnyse) = -2.2920 > -3.45
. So we have no grounds to reject the null hypothesis and conclude that the series contains a unit root.
Since we have determined that L(lnyse)
is insignificant, we could remove it from the model, if we wanted to consider this model as a contender for the best model for this series:
mod.2 <- dynlm(d(lnyse) ~ L(d(lnyse), 1:5) + time(lnyse))
print(round(coef(summary(mod.2)), 5))
Since we needed the trend for unit root testing - having removed the insignificant $\rho$ coefficient, we could try to remove the insignificant trend and improve our model. But we will assume that this model is acceptible as is.
Then the estimated model for $\Delta Y_t$ is:
$\Delta Y_t = -0.07357 + 0.00004 \cdot t + 0.04398 \Delta Y_{t-1} -0.03932 \Delta Y_{t-2} + 0.01035 \Delta Y_{t-3} + 0.02288 \Delta Y_{t-4} + 0.09225 \Delta Y_{t-5} + \epsilon_t$,
which we can re-write as a model for $Y_t$.
We use the property $\gamma_j = -[\phi_{j+1} + ... + \phi_p]$, $j = 1, ..., p - 1$ and the expression of $\rho$ to get the coefficients:
$ \begin{aligned} \gamma_5 &= - \phi_6 &&\Longrightarrow \phi_6 = - \gamma_5 = -0.09225\\ \gamma_4 &= - [\phi_5 + \phi_6] &&\Longrightarrow \phi_5 = - \gamma_4 - \phi_6 = -0.02288+0.09225 = 0.06937\\ \gamma_3 &= - [\phi_4 + \phi_5 + \phi_6] &&\Longrightarrow \phi_4 = - \gamma_3 - [\phi_5 + \phi_6] = - \gamma_3 + \gamma_4 = -0.01035+0.02288 = 0.01253\\ \gamma_2 &= - [\phi_3 + \phi_4 + \phi_5 + \phi_6] &&\Longrightarrow \phi_3 = - \gamma_2 + \gamma_3 = 0.03932 + 0.01035 = 0.04967\\ \gamma_1 &= - [\phi_2 + \phi_3 + \phi_4 + \phi_5 + \phi_6] &&\Longrightarrow \phi_2 = - \gamma_1 + \gamma_2 = -0.04398 -0.03932 = -0.0833\\ \rho &= \phi_1 + \phi_2 + \phi_3 + \phi_4 + \phi_5 + \phi_6 - 1,\text{ since } \rho = 0 &&\Longrightarrow \phi_1 = 1 - (\phi_2 + \phi_3 + \phi_4 + \phi_5 + \phi_6) = 1 + \gamma_1= 1 + 0.04398 = 1.04398 \end{aligned} $
So, the equivalent model for $Y_t$ is:
$Y_t = -0.07357 + 0.00004 \cdot t + 1.04398 Y_{t-1} -0.0833 Y_{t-2} + 0.04967 Y_{t-3} + 0.01253 Y_{t-4} + 0.06937 Y_{t-5} -0.09225 Y_{t-6} + \epsilon_t$
To verify that this is indeed the correct transformation, we can try to simulate a few observations with the same residuals:
set.seed(123)
N <- 200
eps <- rnorm(mean = 0, sd = 1, n = N)
tt <- 1:length(eps)
First, we will simulate $Y_t$:
Y <- NULL
Y[1] <- -0.07357 + 0.00004 * tt[1] + eps[1]
Y[2] <- -0.07357 + 0.00004 * tt[2] + 1.04398 * Y[1] + eps[2]
Y[3] <- -0.07357 + 0.00004 * tt[3] + 1.04398 * Y[2] −0.0833 * Y[1] + eps[3]
Y[4] <- -0.07357 + 0.00004 * tt[4] + 1.04398 * Y[3] −0.0833 * Y[2] + 0.04967 * Y[1] + eps[4]
Y[5] <- -0.07357 + 0.00004 * tt[5] + 1.04398 * Y[4] −0.0833 * Y[3] + 0.04967 * Y[2] + 0.01253 * Y[1] + eps[5]
Y[6] <- -0.07357 + 0.00004 * tt[6] + 1.04398 * Y[5] −0.0833 * Y[4] + 0.04967 * Y[3] + 0.01253 * Y[2] + 0.06937 * Y[1] + eps[5]
for(j in 7:N){
Y[j] <- -0.07357 + 0.00004 * tt[j] + 1.04398 * Y[j-1] −0.0833 * Y[j-2] + 0.04967 * Y[j - 3] + 0.01253 * Y[j - 4] + 0.06937 * Y[j - 5] − 0.09225 * Y[j - 6] + eps[j]
}
Secondly, we will simulate $\Delta Y_t$:
dY <- NULL
dY[1] <- -0.0735 + 0.00004 * tt[1] + eps[1]
dY[2] <- -0.07357 + 0.00004 * tt[2] + 0.04398 * dY[1] + eps[2]
dY[3] <- -0.07357 + 0.00004 * tt[3] + 0.04398 * dY[2] −0.03932 * dY[1] + eps[3]
dY[4] <- -0.07357 + 0.00004 * tt[4] + 0.04398 * dY[3] −0.03932 * dY[2] + 0.01035 * dY[1] + eps[4]
dY[5] <- -0.07357 + 0.00004 * tt[5] + 0.04398 * dY[4] −0.03932 * dY[3] + 0.01035 * dY[2] + 0.02288 * dY[1] + eps[5]
for(j in 6:N){
dY[j] <- -0.07357 + 0.00004 * tt[j] + 0.04398 * dY[j-1] −0.03932 * dY[j-2] + 0.01035 * dY[j - 3] + 0.02288 * dY[j - 4] + 0.09225 * dY[j - 5] + eps[j]
}
Finally, we will assume that the first 100
, or so, observations of $Y_t$ are burn-in values (since the first couple of values assume that $Y_j = \epsilon_j = 0$, for $j \leq 0$). This means that we treat treat the first 101
$\Delta Y_t$ values as burn-in values as well and drop them:
Y <- Y[-c(1:100)]
dY <- dY[-c(1:101)]
We can compare $\Delta Y_t$ with $Y_t - Y_{t-1}$:
dt_compare <- data.frame(diff(Y), dY)
dt_compare$DIFFERENCE <- round(diff(Y) - dY, 10)
head(dt_compare)
We see that there is no difference between the simulate $Y_t - Y_{t-1}$ (diff.Y.
), which was calculated from the simulated $Y_t$ series, and $\Delta Y_t$ (dY
).
We can also visually compare the charts:
plot.ts(ts(diff(Y)), col = "red", lwd = 2)
lines(ts(dY), col = "blue", lty = 2, lwd = 2)
So, the two models are equivalent. For simulation and forecasting we may be more interested in $Y_t$ model, as it is the original series, which we are interested in.
The final, 3rd model
print(round(coef(summary(mod.3)), 4))
The coefficient $\widehat{\rho}$ t-value is L(lnyse) = -0.0471 > -2.89
. So we have no grounds to reject the null hypothesis and conclude that the series contains a unit root.
We will not re-write this models equations as they can be determined by applying the same methods as before.
Overall, regardless of the lag order we see that the series has a unit root.
It is important to note, that removing the trend increased the t-statistic of $\widehat{\rho}$, so it is very possible that, if we incorrectly remove the trend - we may make an incorrect test conclusion.
We will carry out different tests from different packages. For completeness sake, we will provide the same test results from more than one package.
The $\rm ADF$ test tests the null hypothesis: $H_0: \rho = 0 \text{ (i.e. the series contains a unit root)}$
We can set the maximum lag order, and choose the criterion for the order selection:
tst_adf_1 <- urca::ur.df(lnyse, type = "trend", lags = 18, selectlags = "BIC")
print(tst_adf_1)
We can examine the model
tst_adf_1@testreg
The critical values can be extracted via @cval
:
print(tst_adf_1@cval)
To carry out the unit root test, we examine the t-statistic of z.lag.1
- it is -2.096
(Note that this is the same as the first value in the output The value of the test statistic is: -2.0965 6.0649 2.2091
). The relevant critival value is tau
, which is -3.41
for the $5\%$ significance level.
Since The test statistic -2.096 > -3.45
, we do not reject the null hypothesis of a unit root.
The second way to carry out and $\rm ADF$ test:
tst_adf_2 <- tseries::adf.test(lnyse, alternative = "stationary")
We can either manually specify the lag order, or let the function select the maximum lag order itself.
tst_adf_2
This output is a bit easier to read - the $p-value = 0.6198 > 0.05$, so we have no grounds to reject the null hypothesis of a unit root.
The $\rm KPSS$ test tests the null hypothesis: $H_0: \text{ the data is stationary}$
The test involves specifying and estimating the following equation with a random-walk component: $$ Y_t = r_t + \delta \cdot t + \epsilon_t,\quad r_t = r_{t-1} + u_t $$
The null hypothesis of stationarity is equivalent to the hypothesis: $H_0: \sigma^2_u = 0$, which would mean that $r_t = r_{t-1} = ... = r_0 = \rm const$ (i.e. a constant intercept).
We can set the number of lags to use the rule of thumb (lags = "long"
, which is equivalent to $\root 4 \of {12 \times (n/100)}$), or lags = "short"
, which is equivalent to $\root 4 \of {4 \times (n/100)}$. The test types specify as deterministic component either a constant mu
or a constant with linear trend tau
.
tst_kpss_1 <- urca::ur.kpss(lnyse, type = "tau", lags = "long")
tst_kpss_1
We see that the test statistic is 0.3909
. The critical values are:
tst_kpss_1@cval
Since 0.3909 > 0.146
, we reject the null hypothesis of stationarity and conclude that the series is not stationary.
The second way to carry out the KPSS
test (where lshort = FALSE
indicates to use the rule-of-thumb larger lag selection):
tst_kpss_2 <- tseries::kpss.test(lnyse, null = "Trend", lshort = FALSE)
print(tst_kpss_2)
Since p-value = 0.01 < 0.05
, we reject the null hypothesis of stationarity and conclude that the series is not stationary.
The $\rm PP$ test tests builds on the ADF test and tests the null hypothesis: $H_0: \text{ (i.e. the series contains a unit root)}$.
We begin with the built-in function:
tst_pp_0 <- stats::PP.test(lnyse, lshort = FALSE)
tst_pp_0
p-value = 0.6039 > 0.05
, so we have no grounds to reject the null hypothesis of a unit root.
The urca
package also contains the $PP$ test:
tst_pp_1 <- urca::ur.pp(lnyse, model = "trend", type = "Z-tau", lags = "long")
tst_pp_1
Note: use type = "Z-tau"
, otherwise the critical values will be missing!.
If we are interested, we can examine the specified regression, which was used for this test:
tst_pp_1@testreg
Although the critical test statistic is calculated differently than the one in the $ADF$ test, the critical values for the PP test are the same as for the DF test.
The critical values are:
tst_pp_1@cval
Since the value of the test statistic -1.9393
is greater than the $5\%$ critical value -3.420314
, we have no grounds to reject the null hypothesis of a unit root.
The tseries
package also has the $PP$ test:
tst_pp_2 <- tseries::pp.test(lnyse, alternative = "stationary", type = "Z(t_alpha)", lshort = FALSE)
tst_pp_2
The p-value = 0.6039 > 0.05
, we have no grounds to reject the null hypothesis and conclude that the series has a unit root.
So, we can conclude that all tests indicate that the series has a unit root.
# Set plot options in Jupyter Lab
options(repr.plot.width = 10)
options(repr.plot.height = 4)
Since we have determined that the series has a unit root (and we see no indication of seasonality), we can take the differences of the series:
dlnyse <- diff(lnyse)
If we examine the series:
forecast::tsdisplay(dlnyse)
The series appear to be stationary. We will test this by repeating the unit root tests on the differenced data. For simplicity, we will use the test, which have the p-values
, from the tseries
package:
tseries::adf.test(dlnyse, alternative = "stationary")
$p-value = 0.01 < 0.05$, so we reject the null hypothesis of a unit root.
tseries::kpss.test(dlnyse, null = "Level", lshort = FALSE)
$p-value = 0.1 > 0.05$, so we have no grounds to reject the null hypothesis of stationarity.
tseries::pp.test(dlnyse, alternative = "stationary", type = "Z(t_alpha)", lshort = FALSE)
$p-value = 0.01 < 0.05$, so we reject the null hypothesis of a unit root.
In other words, the differences, $\Delta Y_t$, are stationary.
To make it interesting, we will specify three different models:
auto.arima
, without any restrictions.ADF
test we select and re-estimate mod.2
model (this is a different parametrization than the one on dynlm
)¶Furthermore, we will only be able to estimate the drift component, since if $Y_t$ contais a constant $\alpha$ and trend $\delta \cdot t$, then $\Delta Y_t$ contains $\delta$.
In this specification $\alpha$ is not identifiable.
mdl_4_1 <- forecast::Arima(lnyse, order = c(5, 1, 0), xreg = data.frame(tt = 1:length(lnyse)))
print(mdl_4_1)
The residuals of this model:
forecast::tsdisplay(mdl_4_1$residuals)
Appear to be white noise.
We will now expand upon what a different parametrization entails for models specified dynlm
versus the ones specified with Arima()
.
forecast::Arima
(or via stats::arima
) is specified as a process with a linear trend, whose deviations from the trend are described as the AR(1) process:¶For example, assume that we would want to specify a model as a stationary $\rm AR(1)$ model with a trend using forecast::Arima
(or stats::arima
) the specification is as follows:
$ \begin{cases} Y_t &= \alpha + \delta \cdot t + u_t,\\ u_t &= \phi u_{t-1} + \epsilon_t \end{cases} $
mod.a1 <- stats::arima(lnyse, order = c(1,0,0), xreg=time(lnyse))
mod.a1
mod.a2 <- forecast::Arima(lnyse, order = c(1,0,0), xreg=time(lnyse))
mod.a2
The estimated model is:
$ \begin{cases} Y_t &= -144.2528 + 0.0763 \cdot t + u_t,\\ u_t &= 0.9832 u_{t-1} + \epsilon_t \end{cases} $
In this case, we would want to test whether the residuals exhibit a unit root by testing whether $\phi = 1$.
dynlm::dynlm
is specified as an autoregressive process with a linear trend:¶From the (1) expression we would have that:
$ \begin{cases} u_t &= Y_t - \alpha - \delta \cdot t,\\ u_t &= \phi u_{t-1} + \epsilon_t \end{cases} \Longrightarrow Y_t - \alpha - \delta \cdot t = \phi \left(Y_{t-1} - \alpha - \delta \cdot (t-1) \right) + \epsilon_t $
which means that an alternative specification involves a reparametrization of the previous process as:
$ \begin{aligned} Y_t &= \tilde{\alpha} + \tilde{\delta} \cdot t + \phi Y_{t-1} + \epsilon_t = \left[ (1 - \phi) \alpha + \phi \delta\right] + \left[ (1 - \phi)\delta \right] \cdot t + \phi Y_{t-1} + \epsilon_t \end{aligned} $
which is as an autoregressive process with a linear trend.
This is the specification that dynlm::dynlm(...)
uses:
mod.b1 <- dynlm::dynlm(lnyse ~ time(lnyse) + L(lnyse))
summary(mod.b1)
The estimated model is:
$ \begin{aligned} Y_t &= 2.169524 + 0.001152 \cdot t + 0.984518 Y_{t-1} + \epsilon_t \end{aligned} $
The models are equivalent, but due to the parametrization $\alpha \neq \tilde \alpha$ and $\delta \neq \tilde{\delta}$. The autoregressive coefficient $\widehat{\phi}$ also differ due to different parameter estimation methods (which again may further differ for different parametrization options). However they are fairly close: ar1 = 0.9832
versus L(lnyse) = 0.984518
.
If we use the parametrization on the estimated coefficients from the first model, we would have that:
$ \begin{aligned} \tilde{\alpha} &= \left[ (1 - \phi) \alpha + \phi \delta\right]\\ \tilde{\delta} &=\left[ (1 - \phi)\delta \right] \end{aligned} $
# tilde{alpha}
(1 - 0.9832) * (-144.2528) + 0.9832 * 0.0763
# tilde{delta}
(1 - 0.9832) * 0.0763
We see that they are fairly close to (Intercept) = -2.169524
and time(lnyse) = 0.001152
respectively.
par(mfrow = c(1, 2))
forecast::Acf(diff(lnyse))
forecast::Pacf(diff(lnyse))
We can test the lag significance via the $Ljung-Box$ test. The null hypothesis is:
$$
\begin{aligned}
H_0&: \rho(1) = ... = \rho(k) = 0\\
H_1&: \exists j: \rho(j) \neq 0
\end{aligned}
$$
We will test, whether the first 20
lags have a significant autocorrelation with different values of $k$:
for(i in 1:20){
aa <- Box.test(diff(lnyse), lag = i, type = "Ljung-Box")
print(paste0("lag = ", i, " p-value = ", round(aa$p.value, 5)))
}
We see that in all cases the $p-value > 0.05$, so we have no grounds to reject the null hypothesis. This indicates that the differences, $\Delta Y_t$, are white noise. We can specify the model as:
mdl_4_2 <- forecast::Arima(lnyse, order = c(0, 1, 0), include.drift = TRUE)
print(mdl_4_2)
The residuals
forecast::tsdisplay(mdl_4_2$residuals)
for(i in (length(coef(mdl_4_2)) + 1):