Granger Causality

The file stockpab.xls contains 133 monthly data of the most important stock price indices in Countries A and B (the data are logged) and respective stock market returns:

  • pchA stock returns in Country A
  • pchB stock returns in Country B

Lets assume that both indices have unit roots but are not cointegrated. On the other hand, the logarithmic differences, i.e., returns, are stationary. We are interested in whether the stock returns of country A Granger causes the returns in B.

suppressPackageStartupMessages({
  library(readxl)
  library(dynlm)
  library(forecast)
  library(urca)
  library(vars)
})
txt1 <- "http://uosis.mif.vu.lt/~rlapinskas/(data%20R&GRETL/"
txt2 <- "stockpab.xls"
tmp = tempfile(fileext = ".xls")
download.file(url = paste0(txt1, txt2), destfile = tmp, mode = "wb")
stock <- data.frame(read_excel(path = tmp))
stock <- ts(stock[-1, 3:4], frequency = 4, start = c(1947, 1))
plot.ts(stock)

We have already learned of the sequential procedure to choose p and q in the model. On the other hand, in the Granger causality case, this is not the question of the first importance (and, also, we usually assume p = q ).

The restricted model:

We will begin by estimating the Restricted model: \[Y_t = \alpha + \delta t + \phi Y_{t-1} + ... + \phi_p Y_{t-p} + \epsilon_t\]

mod1R=dynlm(pchA~time(stock)+L(pchA,1:2),data=stock)
summary(mod1R)
## 
## Time series regression with "ts" data:
## Start = 1947(3), End = 1979(4)
## 
## Call:
## dynlm(formula = pchA ~ time(stock) + L(pchA, 1:2), data = stock)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.1491  -1.9509  -0.3877   1.8186  21.6793 
## 
## Coefficients:
##                 Estimate Std. Error t value        Pr(>|t|)    
## (Intercept)   -273.12037   99.28645  -2.751         0.00682 ** 
## time(stock)      0.14002    0.05065   2.764         0.00656 ** 
## L(pchA, 1:2)1    0.65583    0.08872   7.392 0.0000000000176 ***
## L(pchA, 1:2)2   -0.07830    0.08876  -0.882         0.37936    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.547 on 126 degrees of freedom
## Multiple R-squared:  0.5257, Adjusted R-squared:  0.5144 
## F-statistic: 46.55 on 3 and 126 DF,  p-value: < 0.00000000000000022

The unrestricted model

Now we estimate the unrestricted model: \[Y_t = \alpha + \delta t + \phi_1 Y_{t-1} + ... + \phi_p Y_{t-p} + \beta_1 X_{t-1} + ... + \beta_q X_{t-q} + \epsilon_t\]

mod1NR=dynlm(pchA~time(stock)+L(pchA,1:2)+L(pchB,1:2),data=stock)
summary(mod1NR)
## 
## Time series regression with "ts" data:
## Start = 1947(3), End = 1979(4)
## 
## Call:
## dynlm(formula = pchA ~ time(stock) + L(pchA, 1:2) + L(pchB, 1:2), 
##     data = stock)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.7168  -2.1826  -0.2584   1.9523  19.7466 
## 
## Coefficients:
##                   Estimate   Std. Error t value  Pr(>|t|)    
## (Intercept)   -290.8565848   93.6062093  -3.107   0.00234 ** 
## time(stock)      0.1492898    0.0477631   3.126   0.00221 ** 
## L(pchA, 1:2)1    0.0489767    0.1659134   0.295   0.76834    
## L(pchA, 1:2)2   -0.0007376    0.1656757  -0.004   0.99646    
## L(pchB, 1:2)1    0.8607950    0.1977275   4.353 0.0000277 ***
## L(pchB, 1:2)2   -0.2339125    0.2026224  -1.154   0.25055    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.268 on 124 degrees of freedom
## Multiple R-squared:  0.5887, Adjusted R-squared:  0.5721 
## F-statistic:  35.5 on 5 and 124 DF,  p-value: < 0.00000000000000022

Model Comparison

To compare these two models by their SSR, we can use the F statistics and anova function:

anova(mod1NR, mod1R)
## Analysis of Variance Table
## 
## Model 1: pchA ~ time(stock) + L(pchA, 1:2) + L(pchB, 1:2)
## Model 2: pchA ~ time(stock) + L(pchA, 1:2)
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    124 2258.8                                  
## 2    126 2604.9 -2   -346.13 9.5005 0.0001449 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

With the p-value = 0.0001449 < 0.05, we reject the null hypothesis that the models are identical, thus the information on pchB notably improves the forecast of pchA. On the other hand, on reversing the procedure, we can see that pchA is not the Granger cause of pchB:

mod2NR=dynlm(pchB~time(stock)+L(pchB,1:2)+L(pchA,1:2),data=stock)
summary(mod2NR)
## 
## Time series regression with "ts" data:
## Start = 1947(3), End = 1979(4)
## 
## Call:
## dynlm(formula = pchB ~ time(stock) + L(pchB, 1:2) + L(pchA, 1:2), 
##     data = stock)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.8898  -1.5177  -0.2520   0.8356  12.1289 
## 
## Coefficients:
##                 Estimate Std. Error t value   Pr(>|t|)    
## (Intercept)   -185.63874   78.88663  -2.353     0.0202 *  
## time(stock)      0.09513    0.04025   2.363     0.0197 *  
## L(pchB, 1:2)1    0.82706    0.16663   4.963 0.00000224 ***
## L(pchB, 1:2)2   -0.05817    0.17076  -0.341     0.7339    
## L(pchA, 1:2)1   -0.01806    0.13982  -0.129     0.8975    
## L(pchA, 1:2)2   -0.09376    0.13962  -0.672     0.5031    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.597 on 124 degrees of freedom
## Multiple R-squared:  0.6066, Adjusted R-squared:  0.5907 
## F-statistic: 38.24 on 5 and 124 DF,  p-value: < 0.00000000000000022
mod2R=dynlm(pchB~time(stock)+L(pchB,1:2),data=stock)
summary(mod2R)
## 
## Time series regression with "ts" data:
## Start = 1947(3), End = 1979(4)
## 
## Call:
## dynlm(formula = pchB ~ time(stock) + L(pchB, 1:2), data = stock)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.0611  -1.6162  -0.3550   0.8346  12.1037 
## 
## Coefficients:
##                 Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)   -173.19420   75.63878  -2.290               0.0237 *  
## time(stock)      0.08872    0.03857   2.300               0.0231 *  
## L(pchB, 1:2)1    0.80997    0.08760   9.246 0.000000000000000746 ***
## L(pchB, 1:2)2   -0.15727    0.08770  -1.793               0.0753 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.575 on 126 degrees of freedom
## Multiple R-squared:  0.605,  Adjusted R-squared:  0.5956 
## F-statistic: 64.34 on 3 and 126 DF,  p-value: < 0.00000000000000022
anova(mod2NR, mod2R)
## Analysis of Variance Table
## 
## Model 1: pchB ~ time(stock) + L(pchB, 1:2) + L(pchA, 1:2)
## Model 2: pchB ~ time(stock) + L(pchB, 1:2)
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    124 1604.3                           
## 2    126 1610.5 -2   -6.2165 0.2402 0.7868

with p-value = 0.7868 > 0.05 we do not reject the null hypothesis that \(SSR_{UR} \sim SSR_R\).

Note that we can shorten the whole procedure with:

#'both' means that the model includes both a constant and a trend
var.2c <- VAR(stock, p=2, type="both")
causality(var.2c, cause = "pchA")
## $Granger
## 
##  Granger causality H0: pchA do not Granger-cause pchB
## 
## data:  VAR object var.2c
## F-Test = 0.24025, df1 = 2, df2 = 248, p-value = 0.7866
## 
## 
## $Instant
## 
##  H0: No instantaneous causality between: pchA and pchB
## 
## data:  VAR object var.2c
## Chi-squared = 54.14, df = 1, p-value = 0.0000000000001866
causality(var.2c, cause = "pchB")
## $Granger
## 
##  Granger causality H0: pchB do not Granger-cause pchA
## 
## data:  VAR object var.2c
## F-Test = 9.5005, df1 = 2, df2 = 248, p-value = 0.0001058
## 
## 
## $Instant
## 
##  H0: No instantaneous causality between: pchB and pchA
## 
## data:  VAR object var.2c
## Chi-squared = 54.14, df = 1, p-value = 0.0000000000001866

with p-value = 0.7866 we do not reject Granger causality H0: pchA do not Granger-cause pchB. with p-value = 0.0001058 we reject Granger causality H0: pchB do not Granger-cause pchA.

VAR: Estimation and Forecasting

Economists often use such important macroeconomic variables as: R = the interest rate, M = the money supply, P = the price level and Y = real GDP. Due to the symbols used, models using these variables are sometimes informally referred to as RMPY models

The file rmpy.xls contains quarterly data on the variables for the US from 1947:1 through 1992:4. To be precise:

  • r three-month Treasury bill rate
  • m money supply (M2) measured in billions of dollars
  • p price level measured by the GDP deflator (a price index with 1987 = 1.00)
  • y GDP measured in billions of 1987 dollars

Before carrying out an analysis using time series data, you must conduct unit root tests. Remember that, if unit roots are present but cointegration does not occur, then the spurious regression problem exists. In this case, you should work with differenced data. Alternatively, if unit roots exist and cointegration does occur, then you will have important economic information that the series are trending together and use ECM.

suppressPackageStartupMessages({
  library(readxl)
  library(dynlm)
  library(forecast)
  library(urca)
  library(vars)
})
txt1 <- "http://uosis.mif.vu.lt/~rlapinskas/(data%20R&GRETL/"
txt2 <- "RMPY.xls"
tmp = tempfile(fileext = ".xls")
download.file(url = paste0(txt1, txt2), destfile = tmp, mode = "wb")
rmpy <- data.frame(read_excel(path = tmp))
rmpy <- ts(rmpy, frequency = 4, start = c(1947, 1))
plot.ts(rmpy)

tsdisplay(rmpy[, "Y"])

tsdisplay(rmpy[, "P"])

tsdisplay(rmpy[, "R"])

tsdisplay(rmpy[, "M"])

We will test for a unit root as we’ve done previously:

model.Y <- dynlm(d(Y) ~ L(Y) + L(d(Y),1) + time(Y), data = rmpy)
round(summary(model.Y)$coefficients, 4)
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5752.1701  2498.4603 -2.3023   0.0225
## L(Y)           -0.0327     0.0151 -2.1577   0.0323
## L(d(Y), 1)      0.3565     0.0702  5.0794   0.0000
## time(Y)         2.9753     1.2905  2.3055   0.0223

t-statistic = -2.1577 > -3.45 => we do not reject the null hypothesis that Y has a unit root.

model.P <- dynlm(d(P) ~ L(P) + L(d(P),1:3) + time(P), data = rmpy)
round(summary(model.P)$coefficients, 4)
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)    -0.1870     0.0683 -2.7360   0.0069
## L(P)           -0.0023     0.0013 -1.7249   0.0863
## L(d(P), 1:3)1   0.3535     0.0737  4.7963   0.0000
## L(d(P), 1:3)2   0.2931     0.0775  3.7833   0.0002
## L(d(P), 1:3)3   0.2055     0.0755  2.7218   0.0072
## time(P)         0.0001     0.0000  2.7388   0.0068

t-statistic = -1.7249 > -3.45 => we do not reject the null hypothesis that P has a unit root.

model.R <- dynlm(d(R) ~ L(R) + L(d(R),1:3), data = rmpy)
round(summary(model.R)$coefficients, 4)
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)     0.1993     0.1032  1.9314   0.0550
## L(R)           -0.0374     0.0174 -2.1482   0.0331
## L(d(R), 1:3)1   0.3299     0.0727  4.5353   0.0000
## L(d(R), 1:3)2  -0.3395     0.0717 -4.7353   0.0000
## L(d(R), 1:3)3   0.2603     0.0727  3.5811   0.0004

t-statistic = -1.7249 > -2.89 => we do not reject the null hypothesis that R has a unit root.

model.M <- dynlm(d(M) ~ L(M) + L(d(M),1) + time(M), data = rmpy)
round(summary(model.M)$coefficients, 4)
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -893.7814   274.0292 -3.2616   0.0013
## L(M)          -0.0008     0.0016 -0.5070   0.6128
## L(d(M), 1)     0.6210     0.0594 10.4545   0.0000
## time(M)        0.4576     0.1401  3.2677   0.0013

t-statistic = -0.5070 > -3.45 => we do not reject the null hypothesis that M has a unit root.

It is also possible to verify that they are not cointegrated (for the sake of this example, assume that they are not cointegrated). In order to avoid the spurious regression problem, we work with differenced data. In particular, we take logs of each series, then take differences of these logged series, then multiply them by 100. This implies that we are working with percentage changes in each variable (e.g., a value of 1 implies a 1% change). Thus:

  • dr percentage change in the interest rate.
  • dm percentage change in the money supply.
  • dp percentage change in the price level (i.e., inflation).
  • dy percentage change in GDP (i.e., GDP growth).

We choose somewhat arbitrarily a VAR(1) model with a linear trend:

var.1t <- VAR(rmpy[-1, 6:9], p = 1, type = "both")
summary(var.1t)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: dY, dP, dR, dM 
## Deterministic variables: both 
## Sample size: 182 
## Log Likelihood: -1252.745 
## Roots of the characteristic polynomial:
## 0.6508 0.5593 0.5593 0.0872
## Call:
## VAR(y = rmpy[-1, 6:9], p = 1, type = "both")
## 
## 
## Estimation results for equation dY: 
## =================================== 
## dY = dY.l1 + dP.l1 + dR.l1 + dM.l1 + const + trend 
## 
##         Estimate Std. Error t value  Pr(>|t|)    
## dY.l1  0.3085537  0.0757405   4.074 0.0000699 ***
## dP.l1 -0.1168849  0.0995695  -1.174   0.24202    
## dR.l1  0.0003808  0.0050367   0.076   0.93982    
## dM.l1  0.2830971  0.0840506   3.368   0.00093 ***
## const  0.4986157  0.1758513   2.835   0.00511 ** 
## trend -0.0031337  0.0014759  -2.123   0.03513 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.9207 on 176 degrees of freedom
## Multiple R-Squared: 0.2026,  Adjusted R-squared: 0.1799 
## F-statistic: 8.942 on 5 and 176 DF,  p-value: 0.000000138 
## 
## 
## Estimation results for equation dP: 
## =================================== 
## dP = dY.l1 + dP.l1 + dR.l1 + dM.l1 + const + trend 
## 
##         Estimate Std. Error t value           Pr(>|t|)    
## dY.l1 -0.0387799  0.0466774  -0.831            0.40721    
## dP.l1  0.5190143  0.0613627   8.458 0.0000000000000102 ***
## dR.l1  0.0099351  0.0031040   3.201            0.00163 ** 
## dM.l1  0.1206121  0.0517987   2.328            0.02102 *  
## const  0.1588937  0.1083737   1.466            0.14439    
## trend  0.0018117  0.0009096   1.992            0.04793 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.5674 on 176 degrees of freedom
## Multiple R-Squared: 0.4533,  Adjusted R-squared: 0.4378 
## F-statistic: 29.19 on 5 and 176 DF,  p-value: < 0.00000000000000022 
## 
## 
## Estimation results for equation dR: 
## =================================== 
## dR = dY.l1 + dP.l1 + dR.l1 + dM.l1 + const + trend 
## 
##       Estimate Std. Error t value Pr(>|t|)   
## dY.l1  3.22423    1.11748   2.885  0.00440 **
## dP.l1  1.77875    1.46905   1.211  0.22759   
## dR.l1  0.22188    0.07431   2.986  0.00323 **
## dM.l1  3.39062    1.24008   2.734  0.00689 **
## const -3.57467    2.59451  -1.378  0.17002   
## trend -0.05618    0.02178  -2.580  0.01070 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 13.58 on 176 degrees of freedom
## Multiple R-Squared: 0.2277,  Adjusted R-squared: 0.2057 
## F-statistic: 10.38 on 5 and 176 DF,  p-value: 0.000000009757 
## 
## 
## Estimation results for equation dM: 
## =================================== 
## dM = dY.l1 + dP.l1 + dR.l1 + dM.l1 + const + trend 
## 
##         Estimate Std. Error t value             Pr(>|t|)    
## dY.l1 -0.0315759  0.0446037  -0.708              0.47993    
## dP.l1  0.0606118  0.0586367   1.034              0.30270    
## dR.l1 -0.0129930  0.0029661  -4.380            0.0000203 ***
## dM.l1  0.7494549  0.0494975  15.141 < 0.0000000000000002 ***
## const  0.3351330  0.1035592   3.236              0.00145 ** 
## trend  0.0003412  0.0008692   0.393              0.69515    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.5422 on 176 degrees of freedom
## Multiple R-Squared: 0.6574,  Adjusted R-squared: 0.6477 
## F-statistic: 67.55 on 5 and 176 DF,  p-value: < 0.00000000000000022 
## 
## 
## 
## Covariance matrix of residuals:
##           dY        dP      dR       dM
## dY  0.847780 -0.008047   3.323  0.02177
## dP -0.008047  0.321988   0.260  0.03532
## dR  3.323312  0.260025 184.545 -1.52641
## dM  0.021771  0.035319  -1.526  0.29401
## 
## Correlation matrix of residuals:
##          dY       dP       dR       dM
## dY  1.00000 -0.01540  0.26569  0.04361
## dP -0.01540  1.00000  0.03373  0.11479
## dR  0.26569  0.03373  1.00000 -0.20722
## dM  0.04361  0.11479 -0.20722  1.00000

To compare the original series with the fitted data, one can use:

plot(var.1t, names = "dY")

plot(var.1t, names = "dP")

plot(var.1t, names = "dR")

plot(var.1t, names = "dM")

The 12 months forecast can be produced with:

var.1t.prd <- predict(var.1t, n.ahead = 12, ci = 0.95)
plot(var.1t.prd)

We chose a VAR(1) model with a constant and trend without any deeper consideration. On the other hand, one can automate the selection (use the VARselect function which allows us to use different information criteria to choose the order):

#the first row of the data is NA
VARselect(rmpy[-1, 6:9], lag.max = 5, type="both")
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      3      2      1      3 
## 
## $criteria
##                1         2         3         4         5
## AIC(n)  2.507590  2.335251  2.333008  2.382359  2.346143
## HQ(n)   2.681563  2.625206  2.738945  2.904277  2.984044
## SC(n)   2.936595  3.050259  3.334019  3.669372  3.919160
## FPE(n) 12.276570 10.336949 10.322340 10.860509 10.498417

(thus the most conservative SC criterion suggests the 1st order).

To plot the impulse-response graphs we use the plot(irf(…)) function. Recall that the response, generally, depends on the order of variables.

plot(irf(VAR(rmpy[-1, 6:9], p = 1, type = "both"),impulse="dY",boot=FALSE))

In this case, we created the VAR(1) model using the original order of variables in the rmpy matrix and analyzed how all the variables reacted to the unit dY impulse.

Now we will change the order of columns in the rmpy matrix:

plot(irf(VAR(rmpy[-1,9:6], p = 1, type = "both"),impulse="dY",boot=FALSE))

the VAR model is the same but reaction to the unit dY impulse is different (although, the differences are not big).

Using:

plot(irf(var.1t))

will plot all the responses to all the impulses together with the confidence intervals obtained via the bootstrapping procedure.

VECM: Example 1

We shall model three different two-dimensional VAR(1) processes with not only integrated but also cointegrated components:

\[ \begin{pmatrix} Y_{1t}\\ Y_{2t} \end{pmatrix} = \vec{\mu} + \Theta_1 \vec{Y}_{t-1} + \vec{\epsilon}_t = \begin{pmatrix} \mu_1 \\ \mu_2 \end{pmatrix} + \begin{pmatrix} 0 & 1\\ 0 & 1 \end{pmatrix} \begin{pmatrix} Y_{1,t-1}\\ Y_{2,t-1} \end{pmatrix} + \begin{pmatrix} \epsilon_{1,t}\\ \epsilon_{2,t} \end{pmatrix} \]

or, in a VECM form:

\[ \Delta \vec{Y}_t = \vec{\mu} + \left( \begin{pmatrix} 0 & 1 \\ 0 & 1 \end{pmatrix} - \begin{pmatrix} 1 & 0\\ 0 & 1 \end{pmatrix} \right) \begin{pmatrix} Y_{1,t-1}\\ Y_{2,t-1} \end{pmatrix} + \vec{\epsilon}_t = \vec{\mu} + \begin{pmatrix} -1 & 1 \\ 0 & 0 \end{pmatrix} \begin{pmatrix} Y_{1,t-1}\\ Y_{2,t-1} \end{pmatrix} + \vec{\epsilon}_t \]

or, in a specific system form:

\[ \begin{aligned} \begin{cases} \Delta Y_{1t} &= \mu_1 + (-1) \cdot (Y_{1,t-1} + (-1)\cdot Y_{2,t-1}) + \epsilon_{1t}\\ \Delta Y_{2t} &= \mu_2 + 0 \cdot (Y_{1,t-1} + (-1)\cdot Y_{2,t-1}) + \epsilon_{1t} \end{cases} \end{aligned} \]

The rank of the matrix \(\Pi = \begin{pmatrix} -1 & 1 \\ 0 & 0\end{pmatrix}\) equals 1 (< 2, which would imply that \(det(\Pi) = 0\)), thus there exists one cointegration relationship.

Since \(\Pi = \alpha \beta^T = \begin{pmatrix} -1 \\ 0 \end{pmatrix} \begin{pmatrix} 1 & -1 \end{pmatrix}\), the (normalized with respect to \(Y_t\)) relationship is \[1 \cdot Y_{1,t-1} + (-1) Y_{2, t-1}\].

We shall consider three cases where \(\vec{\mu}\) is constant but, depending on its particular value, we shall simulate:

  • “no constant” (\(\vec{\mu} = 0\)) case;
  • “restricted constant” (\(\vec{\mu}\) is proportional to \(\alpha\)) case;
  • “unrestricted constant” (\(\vec{\mu} \neq 0\)) case.

The processes can be modelled in several ways.

Firstly, we shall use the tsDyn package:

suppressPackageStartupMessages({
  library(tsDyn)
})
set.seed(1)
  • Both components are without drift, distance between them is constant and equals zero:
B.nc <- matrix(c(0, 0, 1, 1), 2) # no constant
B.nc
##      [,1] [,2]
## [1,]    0    1
## [2,]    0    1
var.nc <- VAR.sim(B = B.nc, n = 100, include = "none")
  • Both components are without drift, distance between them is constant and does not equal zero:
B.rc <- matrix(c(15.5, 0, 0, 0, 1, 1), 2) # restricted constant
B.rc
##      [,1] [,2] [,3]
## [1,] 15.5    0    1
## [2,]  0.0    0    1
var.rc <- VAR.sim(B = B.rc, n = 100, include = "const")
  • Both components are with a drift, (average) distance between them is constant, but not equal to zero:
B.ur <- matrix(c(15.5, 0.5, 0, 0, 1, 1), 2) # unrestricted constant
B.ur
##      [,1] [,2] [,3]
## [1,] 15.5    0    1
## [2,]  0.5    0    1
var.ur <- VAR.sim(B = B.ur,n = 100, include = "const")
par(mfrow=c(1,3))
ts.plot(var.nc, type="l", col=c(1,2), main="no constant")
ts.plot(var.rc, type="l", col=c(1,2), main="restricted")
ts.plot(var.ur, type="l", col=c(1,2), main="unrestricted")

Secondly, the unrestricted variant of the above process can be generated as follows:

library(MASS); set.seed(1000)
par(mfrow=c(1,3))
NN=100
e75=vector(mode="list",length=3) # create an empty list of length 3
                                 # to place three replicas of the process
for(ii in 1:3){
  y1=numeric(NN+3)
  y2=numeric(NN+3)
  mu1=15.5 # unrestriced
  mu2=0.5 # this component does not equal zero
  y1[1]=0
  y2[1]=0
  for(i in 2:(NN+3)){
    y1[i]=mu1+y2[i-1]+rnorm(1,sd=6)
    y2[i]=mu2+y2[i-1]+rnorm(1,sd=5)
  }
  Y1=ts(y1[4:(NN+3)]) # Remove the ¥warming´ starter
  Y2=ts(y2[4:(NN+3)])
  e75[[ii]]=cbind(Y1,Y2) # save the iith replica in e75[[ii]]
  plot(Y1, main = "unrestricted", ylab="Y")
  lines(Y2,col=2)
}

We shall analyse the second replica:

e75.2 = e75[[2]]

We shall examine e75.2 and describe it with a VEC model. We use the tsDyn, vars, and urca packages.

As the components of e75.2 are drifting, we should try Cases 3 or 4 (both include a constant as a deterministic trend) and choose the better one. We begin with an automated procedure which chooses the “best” VEC model (that is, its rank and lag) according to the information criteria (this approach is not based on the Johansen test)

suppressPackageStartupMessages({
  library(tsDyn)
})
rank.select(e75.2, include="const")
## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.
## Best AIC: rank= 1  lag= 1 
## Best BIC: rank= 1  lag= 1 
## Best HQ : rank= 1  lag= 1

In case of Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.... we note that this depends on the package itself rather than any data errors.

The recommended VECM has a rank equal 1 and lag equal 1.

Another approach to decide on the right rank is to use the Johansen test: choose Case 3 and do the following:

e3 <- VECM(e75.2, lag = 1, estim="ML")
## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.
summary(rank.test(e3))
##   r       trace trace_pval trace_pval_T       eigen eigen_pval
## 1 0 48.02862670     <0.001       <0.001 47.95507296     <0.001
## 2 1  0.07355374     0.7862       0.7896  0.07355374     0.7862

Thus, if we decide to describe our data with Case 3, we should take r=1. The model itself is:

summary(e3)
## #############
## ###Model VECM 
## #############
## Full sample size: 100    End sample size: 98
## Number of variables: 2   Number of estimated slope parameters 8
## AIC 661.3935     BIC 684.6582    SSR 5585.899
## Cointegrating vector (estimated by ML):
##    Y1        Y2
## r1  1 -1.020492
## 
## 
##             ECT                 Intercept          Y1 -1              
## Equation Y1 -0.9814(0.1319)***  14.8715(2.0927)*** -0.0221(0.0742)    
## Equation Y2 -0.0434(0.0970)     1.0742(1.5397)     0.1115(0.0546)*    
##             Y2 -1              
## Equation Y1 0.1393(0.2049)     
## Equation Y2 -0.0145(0.1508)

Respective model in Case 4 is:

e4=VECM(e75.2,lag=1,estim="ML",LRinclude="trend")
## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.

## Warning in x/sqrt(t(x) %*% S11 %*% x): Recycling array of length 1 in vector-array arithmetic is deprecated.
##   Use c() or as.vector() instead.
summary(e4)
## #############
## ###Model VECM 
## #############
## Full sample size: 100    End sample size: 98
## Number of variables: 2   Number of estimated slope parameters 8
## AIC 661.1619     BIC 684.4266    SSR 5549.801
## Cointegrating vector (estimated by ML):
##    Y1        Y2      trend
## r1  1 -1.045556 0.02319024
## 
## 
##             ECT                 Intercept          Y1 -1              
## Equation Y1 -0.9952(0.1318)***  16.1549(2.2267)*** -0.0166(0.0740)    
## Equation Y2 -0.0212(0.0976)     0.7616(1.6485)     0.1039(0.0548).    
##             Y2 -1             
## Equation Y1 0.1211(0.2046)    
## Equation Y2 0.0109(0.1515)

Note that the Case 4 model e4 is slightly better according to its AIC thus we missclassify our e75.2 by case and the lag length.

To forecast and plot both models, we use the script:

par(mfrow=c(1,2))
pred_e3 <- predict(e3, n.ahead=20)
ee3=rbind(e75.2,pred_e3)
matplot(ee3,type="l")
pred_e4 <- predict(e4, n.ahead=20)
ee4=rbind(e75.2,pred_e4)
matplot(ee4,type="l")

VECM: Example 2

We shall create a VEC model of the vector (Y_1M, Y_5Y): the data file ustbill.txt contains monthly, 1964:01 through 1993:12, interest rates of US treasure bills for maturities of one month Y_1M and five years Y_5Y. Both series are integrated and even cointegrated.

txt1 <- "http://uosis.mif.vu.lt/~rlapinskas/(data%20R&GRETL/"
txt2 <- "us-tbill.txt"
rate <- data.frame(read.table(paste0(txt1, txt2), header = TRUE))
rate <- ts(rate, frequency = 12, start = 1964)
matplot(rate[,c(2,4)],type="l")

Let us say that previously, we have created a \(VAR(2)\) model for levels and differences. Since the sequences are cointegrated, the difference model lacks the error correction term - now we shall include it using the Johansen method.

We shall use the vars and urca packages where the Johansen test is performed with ca.jo function. Note that rank.test allowed for standard five different specifications of deterministic terms whereas ca.jo only three.

Also, the lag is specified differently: K from ca.jo corresponds to lag+1 in rank.test!

Looking at the plot, both series do not have a drift and the difference in cointegration is constant. Also, since we previously selected VAR(2) model, we will use K = 2:

rrate=rate[,c(2,4)]
suppressPackageStartupMessages({
  library(urca)
  library(vars)
})
#Johansens procedure:
H1 <- ca.jo(rrate, ecdet="const", K = 2)

The summary of the cointegration matrix \(\Pi = \alpha \beta^T\):

summary(H1)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: maximal eigenvalue statistic (lambda max) , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1] 0.0761991568754926129614091 0.0130615270737686108709630
## [3] 0.0000000000000000006413711
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 1 |  4.71  7.52  9.24 12.97
## r = 0  | 28.37 13.75 15.67 20.20
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##             Y_1M.l2   Y_5Y.l2   constant
## Y_1M.l2   1.0000000    1.0000   1.000000
## Y_5Y.l2  -0.9236070  113.3143  -1.183872
## constant  0.9097674 -888.7860 -27.049571
## 
## Weights W:
## (This is the loading matrix)
## 
##            Y_1M.l2       Y_5Y.l2                    constant
## Y_1M.d -0.10977230 -0.0002484112 -0.000000000000000001263013
## Y_5Y.d  0.04014519 -0.0001576087  0.000000000000000001238933

We begin by looking at the Values of test statistic and critical values of test :

  • Looking at \(r = 0\) results, we see that \(test = 28.37 > 15.67\), so we reject the null \(H_0: r = 0\).

  • Looking at \(r <=1\) results, we do not reject the null that \(H_0: r \leq 1\), since \(4.71 < 9.24\) and since we rejected \(H_0: r = 0\), we do not reject the null \(H_0: r = 1\).

We now look at Eigenvectors, normalised to first column: (These are the cointegration relations)

Here, the matrix \(\beta^T\) is the first column. The equation: \[\widehat{z}_t = 0.9097674 + Y\_1M_t - 0.9236070 Y\_5Y_t\] is the cointegration. or long-run equilibrium, equation.

Finally, looking at the Weights W: (This is the loading matrix)

The matrix \(\alpha\) is also the first column, and \(\widehat{\alpha} = \begin{pmatrix} -0.10977230 & 0.04014519 \end{pmatrix}\).

The matrices \(\beta^T\) and \(\alpha\) can also be expressed another way:

cajorls(H1)
## $rlm
## 
## Call:
## lm(formula = substitute(form1), data = data.mat)
## 
## Coefficients:
##           Y_1M.d    Y_5Y.d  
## ect1      -0.10977   0.04015
## Y_1M.dl1  -0.28757   0.01169
## Y_5Y.dl1   0.71654   0.04408
## 
## 
## $beta
##                ect1
## Y_1M.l2   1.0000000
## Y_5Y.l2  -0.9236070
## constant  0.9097674

Recall that the VECM equation is \(\Delta \vec{Y}_t = \vec{\mu}_t + \Pi \vec{Y}_{t-1} + \Gamma_1 \Delta \vec{Y}_{t-1} + \vec{\epsilon}_t\). The function cajools parameterizes this equation differently (now the cointegration equation contains the second order lags):

summary(cajools(H1)) # VECM system
## Response Y_1M.d :
## 
## Call:
## lm(formula = Y_1M.d ~ Y_1M.dl1 + Y_5Y.dl1 + Y_1M.l2 + Y_5Y.l2 + 
##     constant - 1, data = data.mat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1312 -0.3293  0.0087  0.3365  3.2826 
## 
## Coefficients:
##          Estimate Std. Error t value       Pr(>|t|)    
## Y_1M.dl1 -0.28970    0.05664  -5.114 0.000000517561 ***
## Y_5Y.dl1  0.70251    0.10873   6.461 0.000000000346 ***
## Y_1M.l2  -0.11002    0.03245  -3.390       0.000778 ***
## Y_5Y.l2   0.07324    0.03434   2.133       0.033635 *  
## constant  0.12092    0.13944   0.867       0.386438    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7743 on 353 degrees of freedom
## Multiple R-squared:  0.1335, Adjusted R-squared:  0.1212 
## F-statistic: 10.87 on 5 and 353 DF,  p-value: 0.0000000009586
## 
## 
## Response Y_5Y.d :
## 
## Call:
## lm(formula = Y_5Y.d ~ Y_1M.dl1 + Y_5Y.dl1 + Y_1M.l2 + Y_5Y.l2 + 
##     constant - 1, data = data.mat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.13129 -0.20653 -0.01927  0.21441  1.56073 
## 
## Coefficients:
##          Estimate Std. Error t value Pr(>|t|)   
## Y_1M.dl1  0.01034    0.03086   0.335  0.73786   
## Y_5Y.dl1  0.03517    0.05923   0.594  0.55301   
## Y_1M.l2   0.03999    0.01768   2.262  0.02432 * 
## Y_5Y.l2  -0.05494    0.01871  -2.937  0.00353 **
## constant  0.17660    0.07596   2.325  0.02064 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4218 on 353 degrees of freedom
## Multiple R-squared:  0.02956,    Adjusted R-squared:  0.01581 
## F-statistic:  2.15 on 5 and 353 DF,  p-value: 0.05908

To forecast the components of the model, express VECM in the VAR form. Transform VECM to VAR model, where r is the rank of the matrix \(\Pi\):

H1var=vec2var(H1, r=1)
H1var
## 
## Coefficient matrix of lagged endogenous variables:
## 
## A1:
##         Y_1M.l1   Y_5Y.l1
## Y_1M 0.71243321 0.7165378
## Y_5Y 0.01168731 1.0440759
## 
## 
## A2:
##         Y_1M.l2     Y_5Y.l2
## Y_1M 0.17779448 -0.61515131
## Y_5Y 0.02845789 -0.08115424
## 
## 
## Coefficient matrix of deterministic regressor(s).
## 
##         constant
## Y_1M -0.09986727
## Y_5Y  0.03652279

Thus, the VEC form of our model is: \[ \begin{aligned} \begin{cases} \Delta Y\_M_t &= 0.12092 -0.28970 \Delta Y\_M_{t-1} + 0.7025 \Delta Y\_5Y_{t-1} - 0.11002 Y\_M_{t-2} + 0.07324 Y\_5Y_{t-2}\\ \Delta Y\_5Y_t &= 0.17660 + 0.01034 \Delta Y\_M_{t-1} + 0.03517 \Delta Y\_5Y_{t-1} + 0.03999 Y\_M_{t-2} -0.05494 Y\_5Y_{t-2} \end{cases} \end{aligned} \]

and the VAR form is \[ \begin{aligned} \begin{cases} Y\_M_t &= -0.09986727 + 0.71243321 Y\_M_{t-1} + 0.7165378 Y\_5Y_{t-1} + 0.17779448 Y\_M_{t-2} -0.61515131 Y\_5Y_{t-2}\\ Y\_5Y_t &= 0.03652279 + 0.01168731 Y\_M_{t-1} + 1.0440759 Y\_5Y_{t-1} + 0.02845789 Y\_M_{t-2} -0.08115424 Y\_5Y_{t-2} \end{cases} \end{aligned} \]

We use this model and forecast the components 60 months ahead:

H1pred=predict(H1var, n.ahead = 60)
plot(H1pred)

# We draw a more coherent graph:
Y_1M=rrate[,1]
Y_5Y=rrate[,2]
plot(seq(1964,1998+11/12,by=1/12),c(Y_1M,H1pred$fcst$Y_1M[,1]),type="l",
xlab="Time",ylab="Y")
lines(seq(1964,1998+11/12,by=1/12),c(Y_5Y,H1pred$fcst$Y_5Y[,1]),type="l",
xlab="Time",ylab="Y",col=2)

The forecast is quite reasonable and in line with the economic theory (the differences between cointegrated series must tend towards a constant).