1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #define ADF_DEBUG 0
21 #define KPSS_DEBUG 0
22 
23 #include "libgretl.h"
24 #include "transforms.h"
25 
26 /**
27  * SECTION:adf_kpss
28  * @short_description: unit root and cointegration tests
29  * @title: ADF, KPSS, Engle-Granger
30  * @include: libgretl.h
31  *
32  * Implementations of the (Augmented) Dickey-Fuller test
33  * and the Kwiatkowski, Phillips, Schmidt and Shin test for
34  * the presence of a unit root in a time series, along with
35  * the Engle-Granger test for cointegration of two or more
36  * time series.
37  *
38  * The Johansen cointegration test is also provided in
39  * libgretl; see johansen_test() and johansen_test_simple().
40  */
41 
42 /* codes for deterministic regressors */
43 typedef enum {
44     UR_NO_CONST = 1,
45     UR_CONST,
46     UR_TREND,
47     UR_QUAD_TREND,
48     UR_MAX
49 } DetCode;
50 
51 /* flags for "special stuff" going on */
52 typedef enum {
53     ADF_EG_TEST   = 1 << 0, /* doing Engle-Granger test */
54     ADF_EG_RESIDS = 1 << 1, /* final stage of the above */
55     ADF_PANEL     = 1 << 2, /* working on panel data */
56     ADF_GLS       = 1 << 3, /* GLS: Elliott, Rothenberg, Stock */
57     ADF_PQ        = 1 << 4  /* Perron-Qu, 2007 */
58 } AdfFlags;
59 
60 /* automatic lag selection methods */
61 enum {
62     k_AIC = 1,
63     k_BIC,
64     k_TSTAT
65 };
66 
67 /* demean/detrend method */
68 enum {
69     demean_GLS = 1,
70     demean_OLS
71 };
72 
73 /* p-value type */
74 enum {
75     PV_FINITE = 1,
76     PV_ASY,
77     PV_APPROX
78 };
79 
80 typedef struct adf_info_ adf_info;
81 typedef struct kpss_info_ kpss_info;
82 
83 struct adf_info_ {
84     int v;           /* ID number of series to test (in/out) */
85     int order;       /* lag order for ADF (in/out) */
86     int kmax;        /* max. order (for testing down) */
87     int altv;        /* ID of modified series (detrended) */
88     int niv;         /* number of (co-)integrated vars (Engle-Granger) */
89     AdfFlags flags;  /* bitflags: see above */
90     DetCode det;     /* code for deterministics */
91     int pvtype;      /* code for p-value type */
92     int nseas;       /* number of seasonal dummies */
93     int T;           /* number of obs used in test */
94     int df;          /* degrees of freedom, test regression */
95     double b0;       /* coefficient on lagged level */
96     double tau;      /* test statistic */
97     double pval;     /* p-value of test stat */
98     int *list;       /* regression list */
99     int *slist;      /* list of seasonal dummies, if applicable */
100     const char *vname; /* name of series tested */
101     gretl_matrix *g; /* GLS coefficients (if applicable) */
102     int verbosity;
103 };
104 
105 struct kpss_info_ {
106     int T;
107     double test;
108     double pval;
109 };
110 
111 /* replace @y with demeaned or detrended y via GLS */
112 
GLS_demean_detrend(DATASET * dset,int v,int offset,int T,DetCode det,adf_info * ainfo,PRN * prn)113 static int GLS_demean_detrend (DATASET *dset, int v,
114 			       int offset, int T,
115 			       DetCode det,
116 			       adf_info *ainfo,
117 			       PRN *prn)
118 {
119     double *y = dset->Z[v];
120     gretl_matrix *yd = NULL;
121     gretl_matrix *Xd = NULL;
122     gretl_matrix *b = NULL;
123     double c;
124     int i, t, xcols;
125     int err = 0;
126 
127     xcols = (det == UR_TREND)? 2 : 1;
128 
129     if (T - xcols <= 0) {
130 	return E_DF;
131     }
132 
133     yd = gretl_column_vector_alloc(T);
134     Xd = gretl_matrix_alloc(T, xcols);
135     b = gretl_column_vector_alloc(xcols);
136 
137     if (yd == NULL || Xd == NULL || b == NULL) {
138 	err = E_ALLOC;
139 	goto bailout;
140     }
141 
142     c = (det == UR_CONST)? (1.0 - 7.0/T) : (1.0 - 13.5/T);
143 
144     /* invalidate pre-offset observations */
145     for (t=0; t<offset; t++) {
146 	y[t] = NADBL;
147     }
148     y += offset;
149 
150     gretl_vector_set(yd, 0, y[0]);
151     // gretl_vector_set(yd, 0, (1 - c) * y[0]);
152     for (t=1; t<T; t++) {
153 	gretl_vector_set(yd, t, y[t] - c * y[t-1]);
154     }
155 
156     gretl_matrix_set(Xd, 0, 0, 1);
157     if (xcols == 2) {
158 	gretl_matrix_set(Xd, 0, 1, 1);
159     }
160 
161     for (t=1; t<T; t++) {
162 	gretl_matrix_set(Xd, t, 0, 1 - c);
163 	if (xcols == 2) {
164 	    gretl_matrix_set(Xd, t, 1, t+1 - t*c);
165 	}
166     }
167 
168     err = gretl_matrix_ols(yd, Xd, b, NULL, NULL, NULL);
169 
170     if (!err && ainfo->verbosity > 1) {
171 	char date1[OBSLEN];
172 	char date2[OBSLEN];
173 
174 	ntolabel(date1, offset, dset);
175 	ntolabel(date2, offset + T - 1, dset);
176 	pprintf(prn, "\nGLS demean/detrend: using %s-%s (T = %d)\n",
177 		date1, date2, yd->rows);
178 	for (i=0; i<xcols; i++) {
179 	    pprintf(prn, "  %s = %#.8g\n", (i==0)? "mean" : "trend", b->val[i]);
180 	}
181     }
182 
183     if (!err) {
184 	for (t=0; t<T; t++) {
185 	    y[t] -= b->val[0];
186 	    if (xcols == 2) {
187 		y[t] -= b->val[1] * (t+1);
188 	    }
189 	}
190     }
191 
192  bailout:
193 
194     gretl_matrix_free(yd);
195     gretl_matrix_free(Xd);
196 
197     if (err) {
198 	gretl_matrix_free(b);
199     } else {
200 	ainfo->g = b;
201     }
202 
203     return err;
204 }
205 
206 /* replace @y with demeaned or detrended y via OLS */
207 
OLS_demean_detrend(double * y,int offset,int T,DetCode det,PRN * prn)208 static int OLS_demean_detrend (double *y, int offset,
209 			       int T, DetCode det,
210 			       PRN *prn)
211 {
212     gretl_matrix *yd = NULL;
213     gretl_matrix *Xd = NULL;
214     gretl_matrix *b = NULL;
215     int t, xcols = 1;
216     int err = 0;
217 
218     if (det == UR_QUAD_TREND) {
219 	xcols = 3;
220     } else if (det == UR_TREND) {
221 	xcols = 2;
222     }
223 
224     if (T - xcols <= 0) {
225 	return E_DF;
226     }
227 
228     yd = gretl_column_vector_alloc(T);
229     Xd = gretl_matrix_alloc(T, xcols);
230     b = gretl_column_vector_alloc(xcols);
231 
232     if (yd == NULL || Xd == NULL || b == NULL) {
233 	err = E_ALLOC;
234 	goto bailout;
235     }
236 
237     /* invalidate pre-offset observations */
238     for (t=0; t<offset; t++) {
239 	y[t] = NADBL;
240     }
241     y += offset;
242 
243     for (t=0; t<T; t++) {
244 	gretl_vector_set(yd, t, y[t]);
245 	gretl_matrix_set(Xd, t, 0, 1.0);
246 	if (xcols > 1) {
247 	    gretl_matrix_set(Xd, t, 1, t+1);
248 	}
249 	if (xcols > 2) {
250 	    gretl_matrix_set(Xd, t, 2, (t+1) * (t+1));
251 	}
252     }
253 
254     err = gretl_matrix_ols(yd, Xd, b, NULL, NULL, NULL);
255 
256     if (!err && prn != NULL) {
257 	pprintf(prn, "\nOLS demean/detrend: (T = %d)\n", yd->rows);
258 	for (t=0; t<xcols; t++) {
259 	    pprintf(prn, "  %s = %#.8g\n", (t==0)? "mean" : "trend", b->val[t]);
260 	}
261     }
262 
263     if (!err) {
264 	for (t=0; t<T; t++) {
265 	    y[t] -= b->val[0];
266 	    if (xcols > 1) {
267 		y[t] -= b->val[1] * (t+1);
268 	    }
269 	    if (xcols > 2) {
270 		y[t] -= b->val[2] * (t+1) * (t+1);
271 	    }
272 	}
273     }
274 
275  bailout:
276 
277     gretl_matrix_free(yd);
278     gretl_matrix_free(Xd);
279     gretl_matrix_free(b);
280 
281     return err;
282 }
283 
real_adf_form_list(adf_info * ainfo,DATASET * dset)284 static int real_adf_form_list (adf_info *ainfo,
285 			       DATASET *dset)
286 {
287     int v, save_t1 = dset->t1;
288     int k, j, err = 0;
289 
290     /* using the original var, or transformed? */
291     v = ainfo->altv > 0 ? ainfo->altv : ainfo->v;
292 
293     /* temporararily reset sample */
294     dset->t1 = 0;
295 
296     /* generate the first difference of series @v:
297        this will be the LHS variable in the test
298     */
299     ainfo->list[1] = diffgenr(v, DIFF, dset);
300     if (ainfo->list[1] < 0) {
301 	dset->t1 = save_t1;
302 	return E_DATA;
303     }
304 
305     /* generate lag 1 of series @v: the basic RHS series */
306     ainfo->list[2] = laggenr(v, 1, dset);
307     if (ainfo->list[2] < 0) {
308 	dset->t1 = save_t1;
309 	return E_DATA;
310     }
311 
312     /* generate lagged differences for augmented test */
313     j = 3;
314     for (k=1; k<=ainfo->order && !err; k++) {
315 	int vk = laggenr(ainfo->list[1], k, dset);
316 
317 	if (vk < 0) {
318 	    fprintf(stderr, "Error generating lags\n");
319 	    err = E_DATA;
320 	} else {
321 	    ainfo->list[j++] = vk;
322 	}
323     }
324 
325     if (!err && ainfo->nseas > 0) {
326 	/* note: centered */
327 	ainfo->slist = seasonals_list(dset, dset->pd, 1, &err);
328 	if (err) {
329 	    ainfo->nseas = 0;
330 	} else {
331 	    ainfo->nseas = ainfo->slist[0];
332 	}
333     }
334 
335     /* restore incoming sample */
336     dset->t1 = save_t1;
337 
338     return err;
339 }
340 
341 /* The "offset" will determine where the data start for the
342    initial detrending regression. I suppose we don't want
343    to include excessive "pre-sample" data if the user has
344    explicitly moved the sample start off zero, but we need
345    the data to start early enough to provide @order + 1
346    lags, to be in sync with the ADF regressions.
347 */
348 
adf_y_offset(adf_info * ainfo,int v,DATASET * dset)349 static int adf_y_offset (adf_info *ainfo, int v, DATASET *dset)
350 {
351     int min_offset = dset->t1 - (ainfo->order + 1);
352     int t, offset = 0;
353 
354     /* copy original data into series v */
355     for (t=0; t<=dset->t2; t++) {
356 	dset->Z[v][t] = dset->Z[ainfo->v][t];
357 	if (na(dset->Z[v][t])) {
358 	    offset = t+1;
359 	}
360     }
361 
362     if (offset < min_offset) {
363 	offset = min_offset;
364     }
365 
366 #if ADF_DEBUG
367     fprintf(stderr, "adf_y_offset: dset->t1=%d, order=%d, offset=%d\n",
368 	    dset->t1, ainfo->order, offset);
369 #endif
370 
371     return offset;
372 }
373 
374 /* Generate the various differences and lags required for
375    the ADF test, detrending first if required.
376 */
377 
adf_prepare_vars(adf_info * ainfo,DATASET * dset,gretlopt opt,int demean_mode,PRN * prn)378 static int adf_prepare_vars (adf_info *ainfo, DATASET *dset,
379 			     gretlopt opt, int demean_mode,
380 			     PRN *prn)
381 {
382     int err = 0;
383 
384     if (ainfo->v == 0) {
385 	return E_DATA;
386     }
387 
388     if (ainfo->list == NULL) {
389 	/* the max number of terms (quadratic trend case) */
390 	int len = 5 + ainfo->nseas + ainfo->order;
391 
392 	ainfo->list = gretl_list_new(len);
393 	if (ainfo->list == NULL) {
394 	    return E_ALLOC;
395 	}
396     }
397 
398     if (demean_mode == demean_OLS) {
399 	/* OLS adjustment is wanted (first pass) */
400 	DetCode det = UR_CONST;
401 	int v = dset->v;
402 
403 	if (opt & OPT_R) {
404 	    det = UR_QUAD_TREND;
405 	} else if (opt & OPT_T) {
406 	    det = UR_TREND;
407 	}
408 
409 	err = dataset_add_series(dset, 1);
410 
411 	if (!err) {
412 	    PRN *vprn = ainfo->verbosity > 1 ? prn : NULL;
413 	    int offset = adf_y_offset(ainfo, v, dset);
414 	    int T = dset->t2 - offset + 1;
415 
416 	    err = OLS_demean_detrend(dset->Z[v], offset, T, det, vprn);
417 	}
418 
419 	if (!err) {
420 	    /* replace with OLS-detrended version */
421 	    strcpy(dset->varname[v], "ydetols");
422 	    ainfo->altv = v;
423 	}
424     } else if (demean_mode == demean_GLS) {
425 	/* GLS adjustment is wanted */
426 	DetCode det = (opt & OPT_T)? UR_TREND : UR_CONST;
427 	int v;
428 
429 	if (ainfo->altv > 0) {
430 	    /* if we already did detrending, re-use the
431 	       storage we added earlier */
432 	    v = ainfo->altv;
433 	} else {
434 	    v = dset->v;
435 	    err = dataset_add_series(dset, 1);
436 	}
437 
438 	if (!err) {
439 	    int offset = adf_y_offset(ainfo, v, dset);
440 	    int T = dset->t2 - offset + 1;
441 
442 	    err = GLS_demean_detrend(dset, v, offset, T,
443 				     det, ainfo, prn);
444 	}
445 
446 	if (!err) {
447 	    /* replace with GLS-detrended version */
448 	    strcpy(dset->varname[v], "ydetrend");
449 	    ainfo->altv = v;
450 	}
451     }
452 
453     if (!err) {
454 	err = real_adf_form_list(ainfo, dset);
455     }
456 
457 #if ADF_DEBUG
458     printlist(ainfo->list, "adf initial list");
459 #endif
460 
461     return err;
462 }
463 
464 /* Peter Sephton's response-surface based critical values for
465    ADF-GLS with testing down of lag order as per Ng and Perron,
466    or Perron and Qu. See PS's paper in Computational Economics,
467    https://doi.org/10.1007/s10614-020-10082-6
468 */
469 
sephton_adf_critvals(adf_info * ainfo,double * c)470 static void sephton_adf_critvals (adf_info *ainfo, double *c)
471 {
472     /* Each row contains 6 coeffs: const, 4 powers of 1/T,
473        and maxlag / T. The rows are in blocks of four:
474        (1) Ng-Perron constant; (2) Perron-Qu constant;
475        (3) Ng-Perron trend;    (3) Perron-Qu trend.
476        Within each block are coefficients for 4 critical
477        values: 0.01, 0.025, 0.05 and 0.1.
478     */
479     const double coef[16][6] = {
480 	/* Ng-Perron constant */
481 	{-2.5512, 1.084, -720.43, 18279, -175060, 0.3174},
482 	{-2.2186, -3.7919, -333.25, 6665.6, -52297, 0.31557},
483 	{-1.9341, -4.7232, -124.1, 1441.1, -6754.7, 0.0096449},
484 	{-1.6136, -8.9613, 120.77, -3103.9, 24355, -0.15212},
485 	/* Perron-Qu constant */
486 	{-2.5449, -3.2252,  -626, 14049, -107060, 0.33716},
487 	{-2.2154, -9.9681, -250.85, 3955.5, -12315, 0.5985},
488 	{-1.932, -7.5717, -259.69, 5407.7, -31513, 0.2145},
489 	{-1.6108, -8.9303, -83.401, 2341.3, -13132, -0.10669},
490 	/* Ng-Perron trend */
491 	{-3.3772, -0.2059, -1809.6, 46527, -415670,  1.27},
492 	{-3.0761, 2.9114, -1466.4, 35993, -312430, 0.7397},
493 	{-2.828, 7.4946, -1579.2, 39383, -341880, 0.47643},
494 	{-2.545, 5.5607, -1133.2, 26203, -213800, 0.24246},
495 	/* Perron-Qu trend */
496 	{-3.3719, -14.027, -1194.7, 32450, -301000, 1.6079},
497 	{-3.0688, -4.2253, -1135, 29133, -257020, 0.73835},
498 	{-2.8196, 2.4706, -1346.6, 35398, -314840, 0.37564},
499 	{-2.5401, -2.7442, -950.57, 25683, -233160, 0.52035}
500     };
501     int PQ = (ainfo->flags & ADF_PQ)? 1 : 0;
502     int trend = (ainfo->det == UR_TREND);
503     int i, j, rmin = 4 * PQ + trend ? 8 : 0;
504     const double *b;
505     double X[6];
506 
507     X[0] = 1.0;
508     X[1] = 1.0 / ainfo->T;
509     X[2] = X[1] * X[1];
510     X[3] = X[2] * X[1];
511     X[4] = X[3] * X[1];
512     X[5] = ainfo->kmax * X[1];
513 
514     for (i=0; i<4; i++) {
515 	b = coef[rmin+i];
516 	c[i] = 0;
517 	for (j=0; j<6; j++) {
518 	    c[i] += X[j] * b[j];
519 	}
520     }
521 }
522 
523 #if 0 /* unused? */
524 
525 /* based on a much larger replication of ERS using gretl:
526    see http://gretl.sourceforge.net/papers/df-gls-pvals.pdf
527 */
528 
529 static void get_df_gls_ct_cval (adf_info *ainfo, double *c)
530 {
531     static double b[4][3] = {
532 	/* b0       b(1/T)    b(1/T^2) */
533 	{ -2.56073, -17.5434,  68.4750 }, /* 10% */
534 	{ -2.84864, -17.6702,  36.9221 }, /* 5% */
535 	{ -3.10420, -18.2513,  9.99274 }, /* 2.5% */
536 	{ -3.40846, -19.4237, -23.9869 }  /* 1% */
537     };
538     int i, T = ainfo->T;
539 
540     for (i=0; i<4; i++) {
541 	c[i] = b[i][0] + b[i][1] / T + b[i][2] / (T*T);
542     }
543 }
544 
545 static void get_df_gls_ct_cval (adf_info *ainfo, double *c)
546 {
547     /* Elliott, Rothenberg and Stock (1996), Table 1 */
548     static double df_gls_ct_cvals[4][4] = {
549 	/* 10%     5%    2.5%    1% */
550 	{ -2.89, -3.19, -3.46, -3.77 }, /* T = 50  */
551 	{ -2.74, -3.03, -3.29, -3.58 }, /* T = 100 */
552 	{ -2.64, -2.93, -3.18, -3.46 }, /* T = 200 */
553 	{ -2.57, -2.89, -3.15, -3.48 }  /* \infty  */
554     };
555     int T = ainfo->T;
556     int j, i = 3;
557 
558     if (T <= 50){
559 	i = 0;
560     } else if (T <= 100){
561 	i = 1;
562     } else if (T <= 200){
563 	i = 2;
564     }
565 
566     for (j=0; j<4; j++) {
567 	c[j] = df_gls_ct_cvals[i][j];
568     }
569 }
570 
571 #endif
572 
573 /* display an F-test for the joint significance of the lagged
574    \delta y terms in ADF test */
575 
show_lags_test(MODEL * pmod,int order,PRN * prn)576 static void show_lags_test (MODEL *pmod, int order, PRN *prn)
577 {
578     int *llist = gretl_list_new(order);
579     double F;
580     int i;
581 
582     if (llist != NULL) {
583 	for (i=0; i<order; i++) {
584 	    /* lagged differences */
585 	    llist[i+1] = pmod->list[pmod->ifc + 3 + i];
586 	}
587 
588 	F = wald_omit_F(llist, pmod);
589 
590 	if (!na(F)) {
591 	    pprintf(prn, "  %s: F(%d, %d) = %.3f [%.4f]\n",
592 		    _("lagged differences"), order, pmod->dfd, F,
593 		    snedecor_cdf_comp(order, pmod->dfd, F));
594 	}
595 
596 	free(llist);
597     }
598 }
599 
test_down_string(int i,gretlopt opt)600 static const char *test_down_string (int i, gretlopt opt)
601 {
602     if (opt & OPT_U) {
603 	/* perron-qu */
604 	if (i == k_BIC) {
605 	    return _("modified BIC, Perron-Qu");
606 	} else {
607 	    return _("modified AIC, Perron-Qu");
608 	}
609     } else {
610 	int gls = (opt & OPT_G);
611 
612 	if (i == k_BIC) {
613 	    return gls ? _("modified BIC") : _("BIC");
614 	} else if (i == k_TSTAT) {
615 	    return _("t-statistic");
616 	} else {
617 	    /* the default */
618 	    return gls ? _("modified AIC") : _("AIC");
619 	}
620     }
621 }
622 
DF_header(const char * s,int p,int pmax,int test_down,gretlopt opt,PRN * prn)623 static void DF_header (const char *s, int p, int pmax,
624 		       int test_down, gretlopt opt,
625 		       PRN *prn)
626 {
627     pputc(prn, '\n');
628 
629     if (p <= 0 && pmax == 0) {
630 	if (opt & OPT_G) {
631 	    pprintf(prn, _("Dickey-Fuller (GLS) test for %s\n"), s);
632 	} else {
633 	    pprintf(prn, _("Dickey-Fuller test for %s\n"), s);
634 	}
635     } else {
636 	if (opt & OPT_G) {
637 	    pprintf(prn, _("Augmented Dickey-Fuller (GLS) test for %s\n"), s);
638 	} else {
639 	    pprintf(prn, _("Augmented Dickey-Fuller test for %s\n"), s);
640 	}
641 	if (test_down) {
642 	    const char *critstr = test_down_string(test_down, opt);
643 
644 	    pprintf(prn, _("testing down from %d lags, criterion %s"),
645 		    pmax, critstr);
646 	} else {
647 	    if (p == 1) {
648 		pprintf(prn, _("including one lag of (1-L)%s"), s);
649 	    } else {
650 		pprintf(prn, _("including %d lags of (1-L)%s"), p, s);
651 	    }
652 	}
653 	pputc(prn, '\n');
654     }
655 }
656 
DF_model_string(int i)657 static const char *DF_model_string (int i)
658 {
659     const char *models[] = {
660 	"(1-L)y = (a-1)*y(-1) + e",
661 	"(1-L)y = b0 + (a-1)*y(-1) + e",
662 	"(1-L)y = b0 + b1*t + (a-1)*y(-1) + e",
663 	"(1-L)y = b0 + b1*t + b2*t^2 + (a-1)*y(-1) + e"
664     };
665 
666     if (i >= 0 && i < 4) {
667 	return models[i];
668     } else {
669 	return "";
670     }
671 }
672 
ADF_model_string(int i)673 static const char *ADF_model_string (int i)
674 {
675     const char *models[] = {
676 	"(1-L)y = (a-1)*y(-1) + ... + e",
677 	"(1-L)y = b0 + (a-1)*y(-1) + ... + e",
678 	"(1-L)y = b0 + b1*t + (a-1)*y(-1) + ... + e",
679 	"(1-L)y = b0 + b1*t + b2*t^2 + (a-1)*y(-1) + ... + e"
680     };
681 
682     if (i >= 0 && i < 4) {
683 	return models[i];
684     } else {
685 	return "";
686     }
687 }
688 
DF_test_string(int i)689 static const char *DF_test_string (int i)
690 {
691     const char *tests[] = {
692 	N_("test without constant"),
693 	N_("test with constant"),
694 	N_("with constant and trend"),
695 	N_("with constant, linear and quadratic trend")
696     };
697 
698     if (i >= 0 && i < 4) {
699 	return tests[i];
700     } else {
701 	return "";
702     }
703 }
704 
print_adf_results(adf_info * ainfo,MODEL * dfmod,int * blurb_done,gretlopt opt,int test_down,PRN * prn)705 static void print_adf_results (adf_info *ainfo, MODEL *dfmod,
706 			       int *blurb_done, gretlopt opt,
707 			       int test_down, PRN *prn)
708 {
709     const char *urcstrs[] = {
710 	"nc", "c", "ct", "ctt"
711     };
712     gchar *pvstr = NULL;
713     char taustr[16];
714     int i;
715 
716     if (prn == NULL) return;
717 
718     /* convert deterministics code to 0-base */
719     i = ainfo->det - 1;
720 
721     if (na(ainfo->pval)) {
722 	pvstr = g_strdup_printf("%s %s", _("p-value"), _("unknown"));
723     } else if (ainfo->pvtype == PV_ASY) {
724 	pvstr = g_strdup_printf("%s %.4g", _("asymptotic p-value"), ainfo->pval);
725     } else if (ainfo->pvtype == PV_APPROX) {
726 	pvstr = g_strdup_printf("%s %.3f", _("approximate p-value"), ainfo->pval);
727     } else {
728 	pvstr = g_strdup_printf("%s %.4g", _("p-value"), ainfo->pval);
729     }
730 
731     if (*blurb_done == 0) {
732 	DF_header(ainfo->vname, ainfo->order, ainfo->kmax,
733 		  test_down, opt, prn);
734 	pprintf(prn, _("sample size %d\n"), ainfo->T);
735 	if (ainfo->flags & ADF_PANEL) {
736 	    pputc(prn, '\n');
737 	} else {
738 	    pputs(prn, _("unit-root null hypothesis: a = 1"));
739 	    pputs(prn, "\n\n");
740 	}
741 	*blurb_done = 1;
742     }
743 
744     if (ainfo->flags & ADF_EG_RESIDS) {
745 	/* last step of Engle-Granger test */
746 	pprintf(prn, "  %s ", _(DF_test_string(0)));
747 	pputc(prn, '\n');
748 	if (test_down) {
749 	    pputs(prn, "  ");
750 	    if (ainfo->order == 1) {
751 		pprintf(prn, _("including one lag of (1-L)%s"), ainfo->vname);
752 	    } else {
753 		pprintf(prn, _("including %d lags of (1-L)%s"),
754 			ainfo->order, ainfo->vname);
755 	    }
756 	    pputc(prn, '\n');
757 	}
758 	pprintf(prn, "  %s: %s\n", _("model"),
759 		(ainfo->order > 0)? ADF_model_string(0) :
760 		DF_model_string(0));
761     } else {
762 	pprintf(prn, "  %s ", _(DF_test_string(i)));
763 	if (ainfo->nseas > 0) {
764 	    pputs(prn, _("plus seasonal dummies"));
765 	}
766 	pputc(prn, '\n');
767 	if (test_down) {
768 	    pputs(prn, "  ");
769 	    if (ainfo->order == 1) {
770 		pprintf(prn, _("including one lag of (1-L)%s"), ainfo->vname);
771 	    } else {
772 		pprintf(prn, _("including %d lags of (1-L)%s"),
773 			ainfo->order, ainfo->vname);
774 	    }
775 	    pputc(prn, '\n');
776 	}
777 	pprintf(prn, "  %s: %s\n", _("model"),
778 		(ainfo->order > 0)? ADF_model_string(i) :
779 		DF_model_string(i));
780     }
781 
782     if (ainfo->flags & ADF_GLS) {
783 	strcpy(taustr, "tau");
784     } else {
785 	sprintf(taustr, "tau_%s(%d)", urcstrs[i], ainfo->niv);
786     }
787 
788     pprintf(prn, "  %s: %g\n"
789 	    "  %s: %s = %g\n",
790 	    _("estimated value of (a - 1)"), ainfo->b0,
791 	    _("test statistic"), taustr, ainfo->tau);
792 
793     if ((ainfo->flags & ADF_GLS) && na(ainfo->pval)) {
794 	double c[4];
795 
796 	sephton_adf_critvals(ainfo, c);
797 	pprintf(prn, "\n  %*s    ", TRANSLATED_WIDTH(_("Critical values")), " ");
798 	pprintf(prn, "%g%%     %g%%     %g%%     %g%%\n", 10.0, 5.0, 2.5, 1.0);
799 	pprintf(prn, "  %s: %.2f   %.2f   %.2f   %.2f\n",
800 		_("Critical values"), c[3], c[2], c[1], c[0]);
801     } else {
802 	pprintf(prn, "  %s\n", pvstr);
803     }
804 
805     g_free(pvstr);
806 
807     if (!na(dfmod->rho)) {
808 	pprintf(prn, "  %s: %.3f\n", _("1st-order autocorrelation coeff. for e"),
809 		dfmod->rho);
810     }
811 
812     if (ainfo->order > 1) {
813 	show_lags_test(dfmod, ainfo->order, prn);
814     }
815 }
816 
817 /* test the lag order down using the t-statistic criterion */
818 
t_adjust_order(adf_info * ainfo,DATASET * dset,int * err,PRN * prn)819 static int t_adjust_order (adf_info *ainfo, DATASET *dset,
820 			   int *err, PRN *prn)
821 {
822     gretlopt kmod_opt = (OPT_A | OPT_Z);
823     MODEL kmod;
824     int kmax = ainfo->kmax;
825     double tstat, pval;
826     int k, pos;
827 
828     for (k=kmax; k>0; k--) {
829 	kmod = lsq(ainfo->list, dset, OLS, kmod_opt);
830 	if (!kmod.errcode && kmod.dfd == 0) {
831 	    kmod.errcode = E_DF;
832 	}
833 	if (kmod.errcode) {
834 	    fprintf(stderr, "t_adjust_order: k = %d, err = %d\n", k,
835 		    kmod.errcode);
836 	    *err = kmod.errcode;
837 	    clear_model(&kmod);
838 	    k = -1;
839 	    break;
840 	}
841 #if ADF_DEBUG
842 	printmodel(&kmod, dset, OPT_NONE, prn);
843 #endif
844 	pos = k + kmod.ifc;
845 	tstat = kmod.coeff[pos] / kmod.sderr[pos];
846 	clear_model(&kmod);
847 	pval = normal_pvalue_2(tstat);
848 
849 	if (pval > 0.10) {
850 	    gretl_list_delete_at_pos(ainfo->list, k + 2);
851 	} else {
852 	    break;
853 	}
854     }
855 
856     return k;
857 }
858 
859 /* compute modified information criterion */
860 
get_MIC(MODEL * pmod,int k,double sum_ylag2,int kmethod,const DATASET * dset)861 static double get_MIC (MODEL *pmod, int k, double sum_ylag2,
862 		       int kmethod, const DATASET *dset)
863 {
864     double g, CT, ttk, s2k = 0;
865     int t, T = pmod->nobs;
866 
867     g = pmod->coeff[pmod->ifc];
868 
869     for (t=pmod->t1; t<=pmod->t2; t++) {
870 	s2k += pmod->uhat[t] * pmod->uhat[t];
871     }
872 
873     s2k /= pmod->nobs;
874     ttk = g * g * sum_ylag2 / s2k;
875     CT = kmethod == k_BIC ? log(T) : 2.0;
876 
877     return log(s2k) + CT * (ttk + k)/T;
878 }
879 
880 /* component calculation for modified IC methods */
881 
get_sum_y2(adf_info * ainfo,MODEL * pmod,const DATASET * dset)882 static double get_sum_y2 (adf_info *ainfo, MODEL *pmod,
883 			  const DATASET *dset)
884 {
885     const double *ylag = dset->Z[ainfo->list[2]];
886     double sumy2 = 0;
887     int t;
888 
889     for (t=pmod->t1; t<=pmod->t2; t++) {
890 	sumy2 += ylag[t] * ylag[t];
891     }
892 
893     return sumy2;
894 }
895 
896 /* Using modified information criterion, as per Ng and Perron,
897    "Lag Length Selection and the Construction of Unit Root Tests
898    with Good Size and Power", Econometrica 69/6, Nov 2001, pp.
899    1519-1554, for the GLS case -- otherwise plain IC (as of
900    2015-03-31).
901 */
902 
ic_adjust_order(adf_info * ainfo,int kmethod,DATASET * dset,gretlopt opt,int test_num,int * err,PRN * prn)903 static int ic_adjust_order (adf_info *ainfo, int kmethod,
904 			    DATASET *dset, gretlopt opt,
905 			    int test_num, int *err,
906 			    PRN *prn)
907 {
908     MODEL kmod;
909     gretlopt kmod_opt = (OPT_A | OPT_Z);
910     double IC, ICmin = 0;
911     double sum_ylag2 = 0;
912     int kmax = ainfo->kmax;
913     int k, kopt = kmax;
914     int save_t1 = dset->t1;
915     int save_t2 = dset->t2;
916     int use_MIC = 0;
917     int *tmplist;
918 
919     tmplist = gretl_list_copy(ainfo->list);
920     if (tmplist == NULL) {
921 	*err = E_ALLOC;
922 	return -1;
923     }
924 
925     if (ainfo->flags & ADF_GLS) {
926 	/* modified criterion wanted */
927 	use_MIC = 1;
928     }
929 
930     for (k=kmax; k>=0; k--) {
931 #if 0
932 	int i, vi;
933 	fprintf(stderr, "\nk = %d\n", k);
934 	for (i=1; i<=tmplist[0]; i++) {
935 	    vi = tmplist[i];
936 	    fprintf(stderr, "  %d: %s\n", vi, dset->varname[vi]);
937 	}
938 #endif
939 	kmod = lsq(tmplist, dset, OLS, kmod_opt);
940 	if (!kmod.errcode && kmod.dfd == 0) {
941 	    kmod.errcode = E_DF;
942 	}
943 	if (kmod.errcode) {
944 	    fprintf(stderr, "ic_adjust_order: k = %d, err = %d\n", k,
945 		    kmod.errcode);
946 	    *err = kmod.errcode;
947 	    clear_model(&kmod);
948 	    kopt = -1;
949 	    break;
950 	}
951 	if (use_MIC) {
952 	    if (k == kmax) {
953 		/* this need only be done once */
954 		sum_ylag2 = get_sum_y2(ainfo, &kmod, dset);
955 	    }
956 	    IC = get_MIC(&kmod, k, sum_ylag2, kmethod, dset);
957 	} else if (kmethod == k_BIC) {
958 	    IC = kmod.criterion[C_BIC];
959 	} else {
960 	    IC = kmod.criterion[C_AIC];
961 	}
962 	if (k == kmax) {
963 	    /* ensure a uniform sample */
964 	    dset->t1 = kmod.t1;
965 	    dset->t2 = kmod.t2;
966 	    ICmin = IC;
967 	} else if (IC < ICmin) {
968 	    ICmin = IC;
969 	    kopt = k;
970 	}
971 	if (ainfo->verbosity > 1) {
972 	    printmodel(&kmod, dset, OPT_S, prn);
973 	}
974 	if (ainfo->verbosity) {
975 	    const char *tag;
976 
977 	    if (use_MIC) {
978 		tag = (kmethod == k_BIC) ? "MBIC" : "MAIC";
979 	    } else {
980 		tag = (kmethod == k_BIC) ? "BIC" : "AIC";
981 	    }
982 
983 	    if (k == kmax && test_num == 1) {
984 		pputc(prn, '\n');
985 	    }
986 	    pprintf(prn, "  k = %2d: %s = %#g\n", k, tag, IC);
987 	}
988 	clear_model(&kmod);
989 	gretl_list_delete_at_pos(tmplist, k + 2);
990     }
991 
992     if ((opt & OPT_V) && test_num > 1) {
993 	pputc(prn, '\n');
994     }
995 
996     free(tmplist);
997 
998     if (kopt >= 0) {
999 	/* now trim the "real" list to @kopt lags */
1000 	for (k=kmax; k>kopt; k--) {
1001 	    gretl_list_delete_at_pos(ainfo->list, k + 2);
1002 	}
1003     }
1004 
1005     dset->t1 = save_t1;
1006     dset->t2 = save_t2;
1007 
1008     return kopt;
1009 }
1010 
1011 /* targ must be big enough to accept all of src! */
1012 
copy_list_values(int * targ,const int * src)1013 static void copy_list_values (int *targ, const int *src)
1014 {
1015     int i;
1016 
1017     for (i=0; i<=src[0]; i++) {
1018 	targ[i] = src[i];
1019     }
1020 }
1021 
1022 /**
1023  * get_urc_pvalue:
1024  * @tau: test statistic.
1025  * @n: sample size (or 0 for asymptotic result).
1026  * @niv: number of potentially cointegrated variables
1027  * (1 for simple unit-root test).
1028  * @itv: code: 1, 2, 3, 4 for nc, c, ct, ctt models
1029  * respectively.
1030  *
1031  * Retrieves the p-value for @tau from the Dickey–Fuller
1032  * unit-root test or the Engle–Granger cointegration
1033  * test, as per James MacKinnon (1996).
1034  *
1035  * Returns: p-value, or %NADBL on failure.
1036  */
1037 
get_urc_pvalue(double tau,int n,int niv,int itv)1038 double get_urc_pvalue (double tau, int n, int niv, int itv)
1039 {
1040     double (*pvfunc)(double, int, int, int);
1041     double pval = NADBL;
1042 
1043     pvfunc = get_plugin_function("mackinnon_pvalue");
1044     if (pvfunc == NULL) {
1045         return pval;
1046     }
1047 
1048     pval = (*pvfunc)(tau, n, niv, itv);
1049 
1050 #if ADF_DEBUG
1051     fprintf(stderr, "get_urc_pvalue: tau=%g, n=%d, niv=%d, itv=%d: pval=%g\n",
1052 	    tau, n, niv, itv, pval);
1053 #endif
1054 
1055     return pval;
1056 }
1057 
get_mackinnon_pvalue(adf_info * ainfo)1058 static double get_mackinnon_pvalue (adf_info *ainfo)
1059 {
1060     int asy = (ainfo->kmax > 0 || ainfo->order > 0);
1061     int niv, T = asy ? 0 : ainfo->T;
1062     double pval;
1063 
1064     if (ainfo->flags & ADF_EG_RESIDS) {
1065 	niv = ainfo->niv;
1066     } else {
1067 	niv = 1;
1068     }
1069 
1070     pval = get_urc_pvalue(ainfo->tau, T, niv, ainfo->det);
1071     if (!na(pval)) {
1072 	ainfo->pvtype = asy ? PV_ASY : PV_FINITE;
1073     }
1074 
1075     return pval;
1076 }
1077 
get_dfgls_pvalue(adf_info * ainfo)1078 static double get_dfgls_pvalue (adf_info *ainfo)
1079 {
1080     double (*pvfunc)(double, int, int, int, int, int *);
1081     double pval = NADBL;
1082     int T = ainfo->T;
1083     int PQ, trend, err = 0;
1084 
1085     pvfunc = get_plugin_function("dfgls_pvalue");
1086     if (pvfunc == NULL) {
1087 	return pval;
1088     }
1089 
1090     if (ainfo->kmax == 0 && ainfo->order > 0) {
1091 	/* fixed lag order > 0: asymptotic */
1092 	T = 0;
1093     }
1094     trend = (ainfo->det == UR_TREND);
1095     PQ = (ainfo->flags & ADF_PQ)? 1 : 0;
1096 
1097     pval = (*pvfunc)(ainfo->tau, T, trend, ainfo->kmax, PQ, &err);
1098     if (!na(pval)) {
1099 	ainfo->pvtype = (T == 0)? PV_ASY : PV_APPROX;
1100     }
1101 
1102 #if ADF_DEBUG
1103     fprintf(stderr, "dfgls_pval: tau=%g, T=%d, trend=%d, kmax=%d, PQ=%d: pval=%g\n",
1104 	    ainfo->tau, T, trend, ainfo->kmax, PQ, pval);
1105 #endif
1106 
1107     return pval;
1108 }
1109 
1110 #define test_opt_not_set(o) (!(o & OPT_N) && !(o & OPT_C) && \
1111                              !(o & OPT_T) && !(o & OPT_R))
1112 
test_wanted(DetCode det,gretlopt opt)1113 static int test_wanted (DetCode det, gretlopt opt)
1114 {
1115     int ret = 0;
1116 
1117     switch (det) {
1118     case UR_NO_CONST:
1119 	ret = (opt & OPT_N);
1120 	break;
1121     case UR_CONST:
1122 	ret = (opt & OPT_C);
1123 	break;
1124     case UR_TREND:
1125 	ret = (opt & OPT_T);
1126 	break;
1127     case UR_QUAD_TREND:
1128 	ret = (opt & OPT_R);
1129 	break;
1130     default:
1131 	break;
1132     }
1133 
1134     return ret;
1135 }
1136 
engle_granger_itv(gretlopt opt)1137 static DetCode engle_granger_itv (gretlopt opt)
1138 {
1139     DetCode itv = UR_CONST;
1140 
1141     if (opt & OPT_N) {
1142 	itv = UR_NO_CONST;
1143     } else if (opt & OPT_T) {
1144 	itv = UR_TREND;
1145     } else if (opt & OPT_R) {
1146 	itv = UR_QUAD_TREND;
1147     }
1148 
1149     return itv;
1150 }
1151 
gettrend(DATASET * dset,int square)1152 static int gettrend (DATASET *dset, int square)
1153 {
1154     int idx, t, v = dset->v;
1155     double x;
1156 
1157     idx = series_index(dset, (square)? "timesq" : "time");
1158 
1159     if (idx < v) {
1160 	return idx;
1161     }
1162 
1163     if (dataset_add_series(dset, 1)) {
1164 	return 0; /* error: valid value cannot == 0 */
1165     }
1166 
1167     for (t=0; t<dset->n; t++) {
1168 	x = (double) t + 1;
1169 	dset->Z[v][t] = (square)? x * x : x;
1170     }
1171 
1172     if (square) {
1173 	strcpy(dset->varname[v], "timesq");
1174 	series_set_label(dset, v, _("squared time trend variable"));
1175     } else {
1176 	strcpy(dset->varname[v], "time");
1177 	series_set_label(dset, v, _("time trend variable"));
1178     }
1179 
1180     return idx;
1181 }
1182 
print_df_model(adf_info * ainfo,MODEL * pmod,int dfnum,DATASET * dset,PRN * prn)1183 static void print_df_model (adf_info *ainfo, MODEL *pmod,
1184 			    int dfnum, DATASET *dset,
1185 			    PRN *prn)
1186 {
1187     pmod->aux = (ainfo->order > 0)? AUX_ADF : AUX_DF;
1188 
1189     if (!na(ainfo->pval)) {
1190 	gretl_model_set_int(pmod, "dfnum", dfnum);
1191 	gretl_model_set_double(pmod, "dfpval", ainfo->pval);
1192     }
1193 
1194     if (ainfo->flags & ADF_EG_RESIDS) {
1195 	gretl_model_set_int(pmod, "eg-resids", 1);
1196     }
1197 
1198     if (ainfo->g != NULL) {
1199 	gretl_model_set_int(pmod, "dfgls", 1);
1200     }
1201 
1202     printmodel(pmod, dset, OPT_NONE, prn);
1203 
1204     if (ainfo->g != NULL) {
1205 	pputs(prn, "  ");
1206 	if (ainfo->g->rows == 2) {
1207 	    pprintf(prn, _("GLS detrending: b0 = %g, b1 = %g\n"),
1208 		    ainfo->g->val[0], ainfo->g->val[1]);
1209 	} else {
1210 	    pprintf(prn, _("GLS estimate of b0: %g\n"),
1211 		    ainfo->g->val[0]);
1212 	}
1213 	pputc(prn, '\n');
1214     }
1215 }
1216 
set_deterministic_terms(adf_info * ainfo,DATASET * dset)1217 static int set_deterministic_terms (adf_info *ainfo,
1218 				    DATASET *dset)
1219 {
1220     int i, j;
1221 
1222     /* Note that list[1] and list[2], plus the @order
1223        lagged differences, are in common for all
1224        specifications. So we start adding deterministic
1225        terms at position 3 + ainfo->order.
1226     */
1227 
1228     ainfo->list[0] = 1 + ainfo->order + ainfo->det + ainfo->nseas;
1229     i = 3 + ainfo->order;
1230 
1231     if (ainfo->det >= UR_TREND) {
1232 	ainfo->list[i] = gettrend(dset, 0);
1233 	if (ainfo->list[i] == 0) {
1234 	    return E_ALLOC;
1235 	}
1236 	i++;
1237     }
1238 
1239     if (ainfo->det == UR_QUAD_TREND) {
1240 	ainfo->list[i] = gettrend(dset, 1);
1241 	if (ainfo->list[i] == 0) {
1242 	    return E_ALLOC;
1243 	}
1244 	i++;
1245     }
1246 
1247     if (ainfo->nseas > 0) {
1248 	for (j=0; j<ainfo->nseas; j++) {
1249 	    ainfo->list[i++] = ainfo->slist[j+1];
1250 	}
1251     }
1252 
1253     if (ainfo->det != UR_NO_CONST) {
1254 	/* stick constant on end of list */
1255 	ainfo->list[i] = 0;
1256     }
1257 
1258     return 0;
1259 }
1260 
1261 /* When we're done with testing down in the DF-GLS context
1262    we may need to regenerate the detrended data: this
1263    applies if (a) we used Perron-Qu OLS detrending in the
1264    test-down phase or (b) the sample start is currently
1265    set after the start of the dataset.
1266 */
1267 
reset_detrended_data(adf_info * ainfo,DATASET * dset)1268 static int reset_detrended_data (adf_info *ainfo,
1269 				 DATASET *dset)
1270 {
1271     if (ainfo->flags & ADF_PQ) {
1272 	/* testing down using Perron-Qu */
1273 	return 1;
1274     } else if ((ainfo->flags & ADF_GLS) && dset->t1 > 0) {
1275 	/* GLS + testing down + t1 > 0 */
1276 	return 1;
1277     } else {
1278 	return 0;
1279     }
1280 }
1281 
handle_test_down_option(adf_info * ainfo,gretlopt opt,int * err)1282 static int handle_test_down_option (adf_info *ainfo,
1283 				    gretlopt opt,
1284 				    int *err)
1285 {
1286     int kmethod = 0;
1287     const char *s;
1288 
1289     if (ainfo->flags & (ADF_EG_TEST | ADF_EG_RESIDS)) {
1290 	s = get_optval_string(COINT, OPT_E);
1291     } else {
1292 	s = get_optval_string(ADF, OPT_E);
1293     }
1294 
1295     if (s == NULL || *s == '\0') {
1296 	/* the default */
1297 	kmethod = k_AIC;
1298     } else if (!strcmp(s, "MAIC") || !strcmp(s, "AIC")) {
1299 	kmethod = k_AIC;
1300     } else if (!strcmp(s, "MBIC") || !strcmp(s, "BIC")) {
1301 	kmethod = k_BIC;
1302     } else if (!strcmp(s, "tstat")) {
1303 	kmethod = k_TSTAT;
1304     } else {
1305 	gretl_errmsg_set(_("Invalid option"));
1306 	*err = E_DATA;
1307     }
1308 
1309     if (!*err) {
1310 	/* take the given order to be the max */
1311 	ainfo->kmax = ainfo->order;
1312 	if (ainfo->flags & ADF_PQ) {
1313 	    /* --perron-qu */
1314 	    if (kmethod != k_AIC && kmethod != k_BIC) {
1315 		*err = E_BADOPT;
1316 	    }
1317 	}
1318     }
1319 
1320     return kmethod;
1321 }
1322 
check_adf_options(adf_info * ainfo,gretlopt opt)1323 static int check_adf_options (adf_info *ainfo, gretlopt opt)
1324 {
1325     int err = 0;
1326 
1327     if (opt & OPT_G) {
1328 	/* we can only have one of the basic deterministics options */
1329 	err = incompatible_options(opt, OPT_N | OPT_C | OPT_T | OPT_R);
1330 	/* options incompatible with --gls: no-const, seasonals,
1331 	   and quadratic trend
1332 	*/
1333 	if (!err && (opt & (OPT_N | OPT_D | OPT_R))) {
1334 	    err = E_BADOPT;
1335 	}
1336 	if (!err) {
1337 	    ainfo->flags |= ADF_GLS;
1338 	    if (opt & OPT_U) {
1339 		/* Perron-Qu */
1340 		ainfo->flags |= ADF_PQ;
1341 	    }
1342 	}
1343     } else if (opt & OPT_U) {
1344 	/* option dependent on --gls: Perron-Qu modified AIC/BIC */
1345 	err = E_BADOPT;
1346     }
1347 
1348     return err;
1349 }
1350 
real_adf_test(adf_info * ainfo,DATASET * dset,gretlopt opt,PRN * prn)1351 static int real_adf_test (adf_info *ainfo, DATASET *dset,
1352 			  gretlopt opt, PRN *prn)
1353 {
1354     MODEL dfmod;
1355     gretlopt eg_opt = OPT_NONE;
1356     gretlopt df_mod_opt = (OPT_A | OPT_Z);
1357     int *biglist = NULL;
1358     int orig_nvars = dset->v;
1359     int blurb_done = 0;
1360     int demean_mode = 0;
1361     int test_down = 0;
1362     int test_num = 0;
1363     int i, err;
1364 
1365     /* (most of) this may have been done already
1366        but it won't hurt to check here */
1367     err = check_adf_options(ainfo, opt);
1368     if (err) {
1369 	return err;
1370     }
1371 
1372     /* safety-first initializations */
1373     ainfo->nseas = ainfo->kmax = ainfo->altv = 0;
1374     ainfo->vname = dset->varname[ainfo->v];
1375     ainfo->list = ainfo->slist = NULL;
1376     ainfo->det = 0;
1377 
1378 #if ADF_DEBUG
1379     fprintf(stderr, "real_adf_test: got order = %d\n", ainfo->order);
1380 #endif
1381 
1382     if (gretl_isconst(dset->t1, dset->t2, dset->Z[ainfo->v])) {
1383 	gretl_errmsg_sprintf(_("%s is a constant"), ainfo->vname);
1384 	return E_DATA;
1385     }
1386 
1387     if (opt & OPT_E) {
1388 	/* --test-down[=...] */
1389 	test_down = handle_test_down_option(ainfo, opt, &err);
1390 	if (err) {
1391 	    return err;
1392 	}
1393     }
1394 
1395     if (ainfo->order < 0) {
1396 	test_down = k_AIC;
1397 	ainfo->order = ainfo->kmax = -ainfo->order;
1398     }
1399 
1400 #if ADF_DEBUG
1401     fprintf(stderr, "real_adf_test: order = %d, test_down = %d\n",
1402 	    ainfo->order, test_down);
1403 #endif
1404 
1405     if (ainfo->flags & ADF_EG_RESIDS) {
1406 	/* Final step of Engle-Granger test: the (A)DF test
1407 	   regression will contain no deterministic terms,
1408 	   but the selection of the p-value is based on the
1409 	   deterministic terms in the cointegrating
1410 	   regression, represented by @eg_opt.
1411 	*/
1412 	int verbose = (opt & OPT_V);
1413 	int silent = (opt & OPT_I);
1414 
1415 	eg_opt = opt;
1416 	opt = OPT_N;
1417 	if (silent) {
1418 	    opt |= OPT_I;
1419 	} else if (verbose) {
1420 	    opt |= OPT_V;
1421 	}
1422     }
1423 
1424     if (opt & OPT_F) {
1425 	/* difference the target series before testing */
1426 	int t1 = dset->t1;
1427 
1428 	dset->t1 = 0;
1429 	ainfo->v = diffgenr(ainfo->v, DIFF, dset);
1430 	dset->t1 = t1;
1431 	if (ainfo->v < 0) {
1432 	    return E_DATA;
1433 	}
1434 	ainfo->vname = dset->varname[ainfo->v];
1435     }
1436 
1437     if ((opt & OPT_D) && dset->pd > 1) {
1438 	/* arrange to add seasonal dummies */
1439 	ainfo->nseas = dset->pd - 1;
1440     }
1441 
1442     if (test_opt_not_set(opt)) {
1443 	/* default model(s) */
1444 	if (ainfo->flags & ADF_GLS) {
1445 	    opt |= OPT_C;
1446 	} else {
1447 	    opt |= (OPT_C | OPT_T);
1448 	}
1449     }
1450 
1451     if (ainfo->flags & ADF_PQ) {
1452 	demean_mode = demean_OLS;
1453     } else if (ainfo->flags & ADF_GLS) {
1454 	demean_mode = demean_GLS;
1455     }
1456 
1457     err = adf_prepare_vars(ainfo, dset, opt, demean_mode, prn);
1458     if (err) {
1459 	return err;
1460     }
1461 
1462     if (test_down) {
1463 	ainfo->list[0] = ainfo->order + 5;
1464 	biglist = gretl_list_copy(ainfo->list);
1465 	if (biglist == NULL) {
1466 	    err = E_ALLOC;
1467 	    goto bailout;
1468 	}
1469     }
1470 
1471     gretl_model_init(&dfmod, dset);
1472 
1473     /* Now loop across the wanted deterministics cases:
1474        in many instances we'll actually be doing only one
1475        case.
1476     */
1477 
1478     for (i=UR_NO_CONST; i<UR_MAX; i++) {
1479 	int b0pos = (i > UR_NO_CONST);
1480 
1481 	ainfo->det = i;
1482 	if (!test_wanted(ainfo->det, opt)) {
1483 	    continue;
1484 	}
1485 
1486 	if (test_down) {
1487 	    /* re-establish max order before testing down */
1488 	    ainfo->order = ainfo->kmax;
1489 	    copy_list_values(ainfo->list, biglist);
1490 	}
1491 
1492 	if (ainfo->flags & ADF_GLS) {
1493 	    /* skip deterministics */
1494 	    ainfo->list[0] = ainfo->order + 2;
1495 	    b0pos = 0;
1496 	} else {
1497 	    err = set_deterministic_terms(ainfo, dset);
1498 	    if (err) {
1499 		goto bailout;
1500 	    }
1501 	}
1502 
1503 	test_num++;
1504 
1505 	if (test_down) {
1506 	    /* determine the optimal lag order */
1507 	    if (test_down == k_TSTAT) {
1508 		ainfo->order = t_adjust_order(ainfo, dset, &err, prn);
1509 	    } else {
1510 		ainfo->order = ic_adjust_order(ainfo, test_down,
1511 					       dset, opt, test_num,
1512 					       &err, prn);
1513 	    }
1514 	    if (err) {
1515 		clear_model(&dfmod);
1516 		goto bailout;
1517 	    } else if (reset_detrended_data(ainfo, dset)) {
1518 		/* swap out the detrended data */
1519 		err = adf_prepare_vars(ainfo, dset, opt, demean_GLS, prn);
1520 	    }
1521 	}
1522 
1523 #if ADF_DEBUG
1524 	printlist(ainfo->list, "final ADF regression list");
1525 #endif
1526 
1527 	/* run the actual test regression */
1528 	dfmod = lsq(ainfo->list, dset, OLS, df_mod_opt);
1529 
1530 	if (!dfmod.errcode && dfmod.dfd == 0) {
1531 	    /* we can't tolerate an exact fit here */
1532 	    dfmod.errcode = E_DF;
1533 	}
1534 	if (dfmod.errcode) {
1535 	    fprintf(stderr, "adf_test: dfmod.errcode = %d\n",
1536 		    dfmod.errcode);
1537 	    err = dfmod.errcode;
1538 	    clear_model(&dfmod);
1539 	    goto bailout;
1540 	}
1541 
1542 	/* transcribe info from test regression */
1543 	ainfo->b0 = dfmod.coeff[b0pos];
1544 	ainfo->tau = ainfo->b0 / dfmod.sderr[b0pos];
1545 	ainfo->T = dfmod.nobs;
1546 	ainfo->df = dfmod.dfd;
1547 
1548 	if (ainfo->flags & ADF_EG_RESIDS) {
1549 	    ainfo->det = engle_granger_itv(eg_opt);
1550 	}
1551 
1552 	/* obtain p-value if wanted */
1553 	if (getenv("DFGLS_NO_PVALUE")) {
1554 	    /* skip it, to speed up monte carlo stuff */
1555 	    ainfo->pval = NADBL;
1556 	} else if (ainfo->flags & ADF_GLS) {
1557 	    /* use Cottrell/Komashko or Sephton */
1558 	    ainfo->pval = get_dfgls_pvalue(ainfo);
1559 	} else {
1560 	    /* no GLS adjustment applied: use MacKinnon p-values,
1561 	       either finite sample or asymptotic in the case
1562 	       of an augmented test
1563 	    */
1564 	    ainfo->pval = get_mackinnon_pvalue(ainfo);
1565 	}
1566 
1567 	if (!(opt & (OPT_Q | OPT_I)) && !(ainfo->flags & ADF_PANEL)) {
1568 	    print_adf_results(ainfo, &dfmod, &blurb_done,
1569 			      opt, test_down, prn);
1570 	}
1571 
1572 	if ((opt & OPT_V) && !(ainfo->flags & ADF_PANEL)) {
1573 	    /* verbose */
1574 	    print_df_model(ainfo, &dfmod, b0pos, dset, prn);
1575 	} else if (!(opt & OPT_Q) && !(ainfo->flags & (ADF_EG_RESIDS | ADF_PANEL))) {
1576 	    pputc(prn, '\n');
1577 	}
1578 
1579 	clear_model(&dfmod);
1580     }
1581 
1582     if (!err) {
1583 	if (!(ainfo->flags & (ADF_EG_TEST | ADF_PANEL)) ||
1584 	    (ainfo->flags & ADF_EG_RESIDS)) {
1585 	    record_test_result(ainfo->tau, ainfo->pval);
1586 	}
1587     }
1588 
1589  bailout:
1590 
1591     free(ainfo->list);
1592     ainfo->list = NULL;
1593     free(ainfo->slist);
1594     ainfo->slist = NULL;
1595     gretl_matrix_free(ainfo->g);
1596     ainfo->g = NULL;
1597     free(biglist);
1598 
1599     dataset_drop_last_variables(dset, dset->v - orig_nvars);
1600 
1601     return err;
1602 }
1603 
1604 /* print critical value for ADF-IPS or KPSS */
1605 
print_critical_values(double * a,double * cv,int ci,PRN * prn)1606 static void print_critical_values (double *a, double *cv,
1607 				   int ci, PRN *prn)
1608 {
1609     const char *label = N_("Critical values");
1610     int figs = (ci == ADF)? 2 : 3;
1611 
1612     pprintf(prn, "%*s    ", TRANSLATED_WIDTH(_(label)), " ");
1613     pprintf(prn, "%g%%      %g%%      %g%%\n",
1614 	    100*a[0], 100*a[1], 100*a[2]);
1615     pprintf(prn, "%s: %.*f   %.*f   %.*f\n",
1616 	    _(label), figs, cv[0], figs, cv[1], figs, cv[2]);
1617 }
1618 
panel_adjust_ADF_opt(gretlopt * opt)1619 static int panel_adjust_ADF_opt (gretlopt *opt)
1620 {
1621     int err = 0;
1622 
1623     /* Has the user selected an option governing the
1624        deterministic terms to be included? If so, don't
1625        mess with it.
1626     */
1627     if (*opt & (OPT_N | OPT_C | OPT_R | OPT_T)) {
1628 	; /* no-op */
1629     } else {
1630 	/* panel default: test with constant */
1631 	*opt |= OPT_C;
1632     }
1633 
1634     return err;
1635 }
1636 
DF_index(gretlopt opt)1637 static int DF_index (gretlopt opt)
1638 {
1639     if (opt & OPT_N) {
1640 	return 0;
1641     } else if (opt & OPT_C) {
1642 	return 1;
1643     } else if (opt & OPT_T) {
1644 	return 2;
1645     } else {
1646 	return 3;
1647     }
1648 }
1649 
1650 /* See Im, Pesaran and Shin, "Testing for unit roots in
1651    heterogeneous panels", Journal of Econometrics 115 (2003),
1652    53-74.
1653 */
1654 
do_IPS_test(double tbar,int n,const int * Ti,int order,const int * Oi,gretlopt opt,PRN * prn)1655 static int do_IPS_test (double tbar, int n, const int *Ti,
1656 			int order, const int *Oi,
1657 			gretlopt opt, PRN *prn)
1658 {
1659     int (*get_IPS_critvals) (int, int, int, double *);
1660     int (*IPS_tbar_moments) (int, double *, double *);
1661     int (*IPS_tbar_rho_moments) (int, int, int, double *, double *);
1662     int Tmin = Ti[1], Tmax = Ti[1];
1663     int i, T, err = 0;
1664 
1665     for (i=2; i<=n; i++) {
1666 	if (Ti[i] > Tmax) {
1667 	    Tmax = Ti[i];
1668 	}
1669 	if (Ti[i] < Tmin) {
1670 	    Tmin = Ti[i];
1671 	}
1672     }
1673 
1674     if (Oi != NULL || order > 0) {
1675 	/* non-zero lag order: use IPS's W_{tbar} statistic */
1676 	double E, V, Wtbar, Etbar = 0, Vtbar = 0;
1677 	int order_i;
1678 
1679 	IPS_tbar_rho_moments = get_plugin_function("IPS_tbar_rho_moments");
1680 
1681 	if (IPS_tbar_rho_moments != NULL) {
1682 	    for (i=0; i<n && !err; i++) {
1683 		T = Ti[i+1];
1684 		order_i = (Oi != NULL)? Oi[i+1] : order;
1685 		err = IPS_tbar_rho_moments(order_i, T, (opt & OPT_T), &E, &V);
1686 		Etbar += E;
1687 		Vtbar += V;
1688 	    }
1689 
1690 	    if (!err) {
1691 		Etbar /= n;
1692 		Vtbar /= n;
1693 		Wtbar = sqrt(n) * (tbar - Etbar) / sqrt(Vtbar);
1694 		pprintf(prn, "N = %d, Tmin = %d, Tmax = %d\n", n, Tmin, Tmax);
1695 		pprintf(prn, "Im-Pesaran-Shin W_tbar = %g [%.4f]\n", Wtbar,
1696 			normal_pvalue_1(-Wtbar));
1697 	    }
1698 	}
1699     } else if (Tmax > Tmin) {
1700 	/* sample sizes differ: use IPS's Z_{tbar} */
1701 	double E, V, Ztbar, Etbar = 0, Vtbar = 0;
1702 
1703 	IPS_tbar_moments = get_plugin_function("IPS_tbar_moments");
1704 
1705 	if (IPS_tbar_moments != NULL) {
1706 	    for (i=0; i<n && !err; i++) {
1707 		T = Ti[i+1];
1708 		err = IPS_tbar_moments(T, &E, &V);
1709 		Etbar += E;
1710 		Vtbar += V;
1711 	    }
1712 
1713 	    if (!err) {
1714 		Etbar /= n;
1715 		Vtbar /= n;
1716 		Ztbar = sqrt(n) * (tbar - Etbar) / sqrt(Vtbar);
1717 		pprintf(prn, "N = %d, Tmin = %d, Tmax = %d\n", n, Tmin, Tmax);
1718 		pprintf(prn, "Im-Pesaran-Shin Z_tbar = %g [%.4f]\n", Ztbar,
1719 			normal_pvalue_1(-Ztbar));
1720 	    }
1721 	}
1722     } else {
1723 	/* simple case: use tbar with exact critical values */
1724 	pprintf(prn, "N,T = (%d,%d)\n", n, Tmax);
1725 	pprintf(prn, "Im-Pesaran-Shin t-bar = %g\n", tbar);
1726 
1727 	get_IPS_critvals = get_plugin_function("get_IPS_critvals");
1728 
1729 	if (get_IPS_critvals != NULL) {
1730 	    double a[] = { 0.1, 0.05, 0.01 };
1731 	    double cv[3];
1732 
1733 	    err = (*get_IPS_critvals) (n, Tmax, (opt & OPT_T), cv);
1734 	    if (!err) {
1735 		print_critical_values(a, cv, ADF, prn);
1736 	    }
1737 	}
1738     }
1739 
1740     return err;
1741 }
1742 
1743 /* See In Choi, "Unit root tests for panel data", Journal of
1744    International Money and Finance 20 (2001), 249-272.
1745 */
1746 
do_choi_test(double ppv,double zpv,double lpv,int n,PRN * prn)1747 static void do_choi_test (double ppv, double zpv, double lpv,
1748 			  int n, PRN *prn)
1749 {
1750     double P = -2 * ppv;
1751     double Z = zpv / sqrt((double) n);
1752     int tdf = 5 * n + 4;
1753     double k = (3.0*tdf)/(M_PI*M_PI*n*(5*n+2));
1754     double L = sqrt(k) * lpv;
1755 
1756     pprintf(prn, "%s\n", _("Choi meta-tests:"));
1757     pprintf(prn, "   %s(%d) = %g [%.4f]\n", _("Inverse chi-square"),
1758 	    2*n, P, chisq_cdf_comp(2*n, P));
1759     pprintf(prn, "   %s = %g [%.4f]\n", _("Inverse normal test"),
1760 	    Z, normal_pvalue_1(-Z));
1761     pprintf(prn, "   %s: t(%d) = %g [%.4f]\n", _("Logit test"),
1762 	    tdf, L, student_pvalue_1(tdf, -L));
1763 }
1764 
panel_unit_DF_print(adf_info * ainfo,int i,PRN * prn)1765 static void panel_unit_DF_print (adf_info *ainfo, int i, PRN *prn)
1766 {
1767     pprintf(prn, "%s %d, T = %d, %s = %d\n", _("Unit"), i,
1768 	    ainfo->T, _("lag order"), ainfo->order);
1769     pprintf(prn, "   %s: %g\n"
1770 	    "   %s = %g",
1771 	    _("estimated value of (a - 1)"), ainfo->b0,
1772 	    _("test statistic"), ainfo->tau);
1773     if (na(ainfo->pval)) {
1774 	pputs(prn, "\n\n");
1775     } else {
1776 	pprintf(prn, " [%.4f]\n\n", ainfo->pval);
1777     }
1778 }
1779 
panel_DF_test(int v,int order,DATASET * dset,gretlopt opt,PRN * prn)1780 static int panel_DF_test (int v, int order, DATASET *dset,
1781 			  gretlopt opt, PRN *prn)
1782 {
1783     adf_info ainfo = {0};
1784     int u0 = dset->t1 / dset->pd;
1785     int uN = dset->t2 / dset->pd;
1786     int quiet = (opt & OPT_Q);
1787     int verbose = (opt & OPT_V);
1788     double ppv = 0.0, zpv = 0.0, lpv = 0.0;
1789     double pval, tbar = 0.0;
1790     int *Ti = NULL, *Oi = NULL;
1791     int i, n, err;
1792 
1793     err = panel_adjust_ADF_opt(&opt);
1794     if (err) {
1795 	return err;
1796     }
1797 
1798     if (opt & OPT_G) {
1799 	/* GLS option: can't do IPS t-bar test */
1800 	tbar = NADBL;
1801     } else {
1802 	Ti = gretl_list_new(uN - u0 + 1);
1803 	if (Ti == NULL) {
1804 	    return E_ALLOC;
1805 	}
1806 	if ((opt & OPT_E) || order < 0) {
1807 	    /* testing down: lag order may vary by unit */
1808 	    Oi = gretl_list_new(uN - u0 + 1);
1809 	    if (Oi == NULL) {
1810 		free(Ti);
1811 		return E_ALLOC;
1812 	    }
1813 	}
1814     }
1815 
1816     if (!quiet) {
1817 	int j = DF_index(opt);
1818 
1819 	DF_header(dset->varname[v], (opt & OPT_E)? 0 : order,
1820 		  0, 0, opt, prn);
1821 	pprintf(prn, "   %s ", _(DF_test_string(j)));
1822 	pputc(prn, '\n');
1823 	pprintf(prn, "   %s: %s\n\n", _("model"),
1824 		(order > 0)? ADF_model_string(j) : DF_model_string(j));
1825     }
1826 
1827     /* number of units in sample range */
1828     n = uN - u0 + 1;
1829 
1830     /* initialize @ainfo */
1831     ainfo.v = v;
1832     ainfo.niv = 1;
1833     ainfo.flags = ADF_PANEL;
1834 
1835     /* run a Dickey-Fuller test for each unit and record the
1836        results */
1837 
1838     for (i=u0; i<=uN && !err; i++) {
1839 	ainfo.order = order;
1840 	dset->t1 = i * dset->pd;
1841 	dset->t2 = dset->t1 + dset->pd - 1;
1842 	err = series_adjust_sample(dset->Z[v], &dset->t1, &dset->t2);
1843 
1844 	if (!err) {
1845 	    err = real_adf_test(&ainfo, dset, opt, prn);
1846 	    if (!err && verbose) {
1847 		panel_unit_DF_print(&ainfo, i+1, prn);
1848 	    }
1849 	}
1850 
1851 	if (!err) {
1852 	    if (Ti != NULL) {
1853 		Ti[i-u0+1] = ainfo.T;
1854 	    }
1855 	    if (Oi != NULL) {
1856 		Oi[i-u0+1] = ainfo.order;
1857 	    }
1858 	    if (!na(tbar)) {
1859 		tbar += ainfo.tau;
1860 	    }
1861 	    pval = ainfo.pval;
1862 	    if (na(pval)) {
1863 		ppv = zpv = lpv = NADBL;
1864 	    } else if (!na(ppv)) {
1865 		ppv += log(pval);
1866 		zpv += normal_cdf_inverse(pval);
1867 		lpv += log(pval / (1-pval));
1868 	    }
1869 	}
1870     }
1871 
1872     /* process the results as per Im-Pesaran-Shin and/or Choi */
1873 
1874     if (!err) {
1875 	pprintf(prn, "%s\n\n", _("H0: all groups have unit root"));
1876 	if (!na(tbar)) {
1877 	    tbar /= n;
1878 	    do_IPS_test(tbar, n, Ti, order, Oi, opt, prn);
1879 	}
1880 	if (!na(ppv)) {
1881 	    pputc(prn, '\n');
1882 	    do_choi_test(ppv, zpv, lpv, n, prn);
1883 	}
1884 	pputc(prn, '\n');
1885     }
1886 
1887     free(Ti);
1888     free(Oi);
1889 
1890     return err;
1891 }
1892 
1893 /**
1894  * levin_lin_test:
1895  * @vnum: ID number of variable to test.
1896  * @plist: list of ADF lag orders.
1897  * @dset: data information struct.
1898  * @opt: option flags.
1899  * @prn: gretl printing struct.
1900  *
1901  * Carries out and prints the results of the Levin-Lin-Chu test
1902  * for a unit root in panel data.
1903  *
1904  * The list @plist should contain either a single lag order
1905  * to be applied to all units, or a set of unit-specific
1906  * orders; in the latter case the length of the list must
1907  * equal the number of panel units in the current sample
1908  * range. (This is a gretl list: the first element holds
1909  * a count of the number of elements following.)
1910  *
1911  * By default a test with constant is performed, but the
1912  * (mutually exclusive) options OPT_N and OPT_T in @opt switch to
1913  * the case of no constant or constant plus trend respectively.
1914  * The OPT_Q flag may be used to suppress printed output.
1915  *
1916  * Returns: 0 on successful completion, non-zero on error.
1917  */
1918 
levin_lin_test(int vnum,const int * plist,DATASET * dset,gretlopt opt,PRN * prn)1919 int levin_lin_test (int vnum, const int *plist,
1920 		    DATASET *dset, gretlopt opt,
1921 		    PRN *prn)
1922 {
1923     int (*real_levin_lin) (int, const int *, DATASET *,
1924 			   gretlopt, PRN *);
1925     int panelmode;
1926     int err = 0;
1927 
1928     panelmode = multi_unit_panel_sample(dset);
1929 
1930     if (!panelmode || incompatible_options(opt, OPT_N | OPT_T)) {
1931 	return E_BADOPT;
1932     }
1933 
1934     real_levin_lin = get_plugin_function("real_levin_lin");
1935 
1936     if (real_levin_lin == NULL) {
1937 	err = E_FOPEN;
1938     } else {
1939 	int save_t1 = dset->t1;
1940 	int save_t2 = dset->t2;
1941 
1942 	err = (*real_levin_lin) (vnum, plist, dset, opt, prn);
1943 
1944 	dset->t1 = save_t1;
1945 	dset->t2 = save_t2;
1946     }
1947 
1948     return err;
1949 }
1950 
1951 /**
1952  * adf_test:
1953  * @order: lag order for the (augmented) test.
1954  * @list: list of variables to test.
1955  * @dset: dataset struct.
1956  * @opt: option flags.
1957  * @prn: gretl printing struct.
1958  *
1959  * Carries out and prints the results of the Augmented Dickey-Fuller
1960  * test for a unit root.
1961  *
1962  * By default two tests are performed, one for a model
1963  * including a constant and one including a linear trend. The
1964  * deterministic components of the model can be controlled via
1965  * flags in @opt as follows: OPT_N, omit the constant; OPT_C,
1966  * run just one test using the constant; OPT_T, one test including
1967  * linear trend; OPT_R, one test including a quadratic trend;
1968  * OPT_D, include seasonal dummy variables.
1969  *
1970  * Additional flags that may be given in @opt include:
1971  * OPT_V for verbose operation; OPT_F to apply first-differencing
1972  * before testing; OPT_G for GLS preprocessing as in Elliott, Rothenberg
1973  * and Stock (incompatible with OPT_N, OPT_R, OPT_D); OPT_E to
1974  * "test down" from a given maximum lag order (see the entry for
1975  * "adf" in the Gretl Command Reference for details).
1976  *
1977  * Returns: 0 on successful completion, non-zero on error.
1978  */
1979 
adf_test(int order,const int * list,DATASET * dset,gretlopt opt,PRN * prn)1980 int adf_test (int order, const int *list, DATASET *dset,
1981 	      gretlopt opt, PRN *prn)
1982 {
1983     int save_t1 = dset->t1;
1984     int save_t2 = dset->t2;
1985     int panelmode;
1986     int err;
1987 
1988     /* GLS incompatible with no const, quadratic trend or seasonals */
1989     err = incompatible_options(opt, OPT_G | OPT_N | OPT_R);
1990     if (!err) {
1991 	err = incompatible_options(opt, OPT_D | OPT_G);
1992     }
1993 
1994     if (!err && (opt & OPT_G)) {
1995 	/* under GLS, have to choose between cases */
1996 	err = incompatible_options(opt, OPT_C | OPT_T);
1997     }
1998 
1999     panelmode = multi_unit_panel_sample(dset);
2000 
2001     if (panelmode) {
2002 	err = panel_DF_test(list[1], order, dset, opt, prn);
2003     } else {
2004 	/* regular time series case */
2005 	int i, v, vlist[2] = {1, 0};
2006 	adf_info ainfo = {0};
2007 
2008 	ainfo.niv = 1;
2009 	if (opt & OPT_V) {
2010 	    int vlevel = get_optval_int(ADF, OPT_V, &err);
2011 
2012 	    if (vlevel > 1) {
2013 		ainfo.verbosity = vlevel;
2014 	    } else {
2015 		ainfo.verbosity = 1;
2016 	    }
2017 	    /* don't let this check flag an error */
2018 	    err = 0;
2019 	}
2020 
2021 	for (i=1; i<=list[0] && !err; i++) {
2022 	    ainfo.v = vlist[1] = list[i];
2023 	    ainfo.order = order;
2024 	    vlist[1] = v = list[i];
2025 	    err = list_adjust_sample(vlist, &dset->t1, &dset->t2, dset, NULL);
2026 	    if (!err && order == -1) {
2027 		/* default to L_{12}: see G. W. Schwert, "Tests for Unit Roots:
2028 		   A Monte Carlo Investigation", Journal of Business and
2029 		   Economic Statistics, 7(2), 1989, pp. 5-17. Note that at
2030 		   some points Ng uses floor(T/100.0) in the following
2031 		   expression, which can give a lower max order.
2032 		*/
2033 		int T = sample_size(dset);
2034 
2035 		ainfo.order = 12.0 * pow(T/100.0, 0.25);
2036 	    }
2037 	    if (!err) {
2038 		err = real_adf_test(&ainfo, dset, opt, prn);
2039 	    }
2040 	    dset->t1 = save_t1;
2041 	    dset->t2 = save_t2;
2042 	}
2043     }
2044 
2045     dset->t1 = save_t1;
2046     dset->t2 = save_t2;
2047 
2048     return err;
2049 }
2050 
2051 /* See Peter S. Sephton, "Response surface estimates of the KPSS
2052    stationarity test", Economics Letters 47 (1995) 255-261.
2053 
2054    The estimates below of \beta_\infty and \beta_1 (based on
2055    a bigger replication using Sephton's methodology) allow the
2056    construction of better critical values for finite samples
2057    than the values given in the original KPSS article.
2058 */
2059 
kpss_parms(double a,int trend,double * b)2060 static void kpss_parms (double a, int trend, double *b)
2061 {
2062     const double b0_level[] = { 0.74404, 0.46158, 0.34742 };
2063     const double b1_level[] = { -0.99120, 0.01642, 0.19814 };
2064     const double b0_trend[] = { 0.21787, 0.14797, 0.11925 };
2065     const double b1_trend[] = { -0.25128, 0.03270, 0.10244 };
2066     int i = (a == .01)? 0 : (a == .05)? 1 : 2;
2067 
2068     if (trend) {
2069 	b[0] = b0_trend[i];
2070 	b[1] = b1_trend[i];
2071     } else {
2072 	b[0] = b0_level[i];
2073 	b[1] = b1_level[i];
2074     }
2075 }
2076 
kpss_critval(double alpha,int T,int trend)2077 static double kpss_critval (double alpha, int T, int trend)
2078 {
2079     double b[2];
2080 
2081     kpss_parms(alpha, trend, b);
2082 
2083     return b[0] + b[1]/T;
2084 }
2085 
kpss_critvals(int T,int trend,int * err)2086 gretl_matrix *kpss_critvals (int T, int trend, int *err)
2087 {
2088     gretl_matrix *m = NULL;
2089 
2090     if (T < 5) {
2091 	*err = E_TOOFEW;
2092     } else {
2093 	m = gretl_matrix_alloc(1, 3);
2094 	if (m == NULL) {
2095 	    *err = E_ALLOC;
2096 	} else {
2097 	    m->val[0] = kpss_critval(0.10, T, trend);
2098 	    m->val[1] = kpss_critval(0.05, T, trend);
2099 	    m->val[2] = kpss_critval(0.01, T, trend);
2100 	}
2101     }
2102 
2103     return m;
2104 }
2105 
2106 #define PV_GT10 1.1
2107 #define PV_LT01 -1.0
2108 
kpss_interp(double s,int T,int trend)2109 static double kpss_interp (double s, int T, int trend)
2110 {
2111     double c10, c05, c01;
2112     double pv;
2113 
2114     c10 = kpss_critval(.10, T, trend);
2115     if (s < c10) {
2116 	return PV_GT10;
2117     }
2118 
2119     c01 = kpss_critval(.01, T, trend);
2120     if (s > c01) {
2121 	return PV_LT01;
2122     }
2123 
2124     /* OK, p-value must lie between .01 and .10 */
2125 
2126     c05 = kpss_critval(.05, T, trend);
2127     if (s > c05) {
2128 	pv = .01 + .04 * (c01 - s) / (c01 - c05);
2129     } else {
2130 	pv = .05 + .05 * (c05 - s) / (c05 - c10);
2131     }
2132 
2133     return pv;
2134 }
2135 
2136 static int
real_kpss_test(int order,int varno,DATASET * dset,gretlopt opt,kpss_info * kinfo,PRN * prn)2137 real_kpss_test (int order, int varno, DATASET *dset,
2138 		gretlopt opt, kpss_info *kinfo,
2139 		PRN *prn)
2140 {
2141     MODEL KPSSmod;
2142     int *list = NULL;
2143     int hastrend = 0, hasseas = 0;
2144     double et, s2 = 0.0;
2145     double cumsum = 0.0, cumsum2 = 0.0;
2146     double teststat, pval = NADBL;
2147     double *autocov;
2148     int t1, t2, T;
2149     int i, t, ndum, nreg;
2150     int err = 0;
2151 
2152     /* sanity check */
2153     if (varno <= 0 || varno >= dset->v) {
2154 	return E_DATA;
2155     }
2156 
2157     if (gretl_isconst(dset->t1, dset->t2, dset->Z[varno])) {
2158 	gretl_errmsg_sprintf(_("%s is a constant"), dset->varname[varno]);
2159 	return E_DATA;
2160     }
2161 
2162     if (opt & OPT_F) {
2163 	/* difference the variable before testing */
2164 	varno = diffgenr(varno, DIFF, dset);
2165 	if (varno < 0) {
2166 	    return E_DATA;
2167 	}
2168     }
2169 
2170     if (opt & OPT_T) {
2171 	hastrend = 1;
2172     }
2173 
2174     if (opt & OPT_D) {
2175 	hasseas = 1;
2176     }
2177 
2178     ndum = hasseas ? (dset->pd - 1) : 0;
2179     nreg = 1 + hastrend + ndum;
2180 
2181     list = gretl_list_new(nreg + 1);
2182     if (list == NULL) {
2183 	return E_ALLOC;
2184     }
2185 
2186     list[1] = varno;
2187     list[2] = 0;
2188 
2189     if (hastrend) {
2190 	list[3] = gettrend(dset, 0);
2191 	if (list[3] == 0) {
2192 	    return E_ALLOC;
2193 	}
2194     }
2195 
2196     if (hasseas) {
2197 	int *slist = NULL;
2198 
2199 	slist = seasonals_list(dset, dset->pd, 1, &err);
2200 	if (err) {
2201 	    free(list);
2202 	    return err;
2203 	} else {
2204 	    for (i=0; i<ndum; i++) {
2205 		list[3 + hastrend + i] = slist[i+1];
2206 	    }
2207 	    free(slist);
2208 	}
2209     }
2210 
2211     /* OPT_M: reject missing values within sample range */
2212     KPSSmod = lsq(list, dset, OLS, OPT_A | OPT_M);
2213     if (KPSSmod.errcode) {
2214 	clear_model(&KPSSmod);
2215 	return KPSSmod.errcode;
2216     }
2217 
2218     t1 = KPSSmod.t1;
2219     t2 = KPSSmod.t2;
2220     T = KPSSmod.nobs;
2221 
2222     if (order < 0) {
2223 	order = 4.0 * pow(T / 100.0, 0.25);
2224     }
2225 
2226     if (kinfo == NULL && (opt & OPT_V)) {
2227 	KPSSmod.aux = AUX_KPSS;
2228 	printmodel(&KPSSmod, dset, OPT_NONE, prn);
2229     }
2230 
2231     autocov = malloc(order * sizeof *autocov);
2232     if (autocov == NULL) {
2233 	return E_ALLOC;
2234     }
2235 
2236     for (i=0; i<order; i++) {
2237 	autocov[i] = 0.0;
2238     }
2239 
2240     for (t=t1; t<=t2; t++) {
2241 	et = KPSSmod.uhat[t];
2242 	if (na(et)) {
2243 	    continue;
2244 	}
2245 	cumsum += et;
2246 	cumsum2 += cumsum * cumsum;
2247 	s2 += et * et;
2248 	for (i=0; i<order; i++) {
2249 	    int s = i + 1;
2250 
2251 	    if (t - s >= t1) {
2252 		autocov[i] += et * KPSSmod.uhat[t - s];
2253 	    }
2254 	}
2255 #if KPSS_DEBUG
2256 	fprintf(stderr, "%d: %#12.4g %#12.4g %#12.4g %#12.4g \n",
2257 		t, et, KPSSmod.uhat[t-1], s2, cumsum2);
2258 #endif
2259     }
2260 
2261     for (i=0; i<order; i++) {
2262 	double wt = 1.0 - ((double) (i + 1)) / (order + 1);
2263 
2264 	s2 += 2.0 * wt * autocov[i];
2265     }
2266 
2267     s2 /= T;
2268 
2269     if (s2 <= 0.0) {
2270 	teststat = pval = NADBL;
2271     } else {
2272 	teststat = cumsum2 / (s2 * T * T);
2273 	pval = kpss_interp(teststat, T, hastrend);
2274     }
2275 
2276     if (kinfo != NULL) {
2277 	/* storing info for panel test */
2278 	kinfo->T = T;
2279 	kinfo->test = teststat;
2280 	kinfo->pval = pval;
2281     } else {
2282 	/* testing individual time series */
2283 	if (opt & OPT_V) {
2284 	    pprintf(prn, "  %s: %g\n", _("Robust estimate of variance"), s2);
2285 	    pprintf(prn, "  %s: %g\n", _("Sum of squares of cumulated residuals"),
2286 		    cumsum2);
2287 	}
2288     }
2289 
2290     if (!(opt & OPT_Q)) {
2291 	double a[] = { 0.1, 0.05, 0.01 };
2292 	double cv[3];
2293 
2294 	cv[0] = kpss_critval(a[0], T, hastrend);
2295 	cv[1] = kpss_critval(a[1], T, hastrend);
2296 	cv[2] = kpss_critval(a[2], T, hastrend);
2297 
2298 	pprintf(prn, _("\nKPSS test for %s"), dset->varname[varno]);
2299 	if (hastrend) {
2300 	    if (hasseas) {
2301 		pputs(prn, _(" (including trend and seasonals)\n\n"));
2302 	    } else {
2303 		pputs(prn, _(" (including trend)\n\n"));
2304 	    }
2305 	} else {
2306 	    if (hasseas) {
2307 		pputs(prn, _(" (including seasonals)\n\n"));
2308 	    } else {
2309 		pputs(prn, "\n\n");
2310 	    }
2311 	}
2312 
2313 	pprintf(prn, "T = %d\n", T);
2314 	pprintf(prn, _("Lag truncation parameter = %d\n"), order);
2315 	pprintf(prn, "%s = %g\n\n", _("Test statistic"), teststat);
2316 	print_critical_values(a, cv, KPSS, prn);
2317 	if (pval == PV_GT10) {
2318 	    pprintf(prn, "%s > .10\n", _("P-value"));
2319 	} else if (pval == PV_LT01) {
2320 	    pprintf(prn, "%s < .01\n", _("P-value"));
2321 	} else if (!na(pval)) {
2322 	    pprintf(prn, "%s %.3f\n", _("Interpolated p-value"), pval);
2323 	}
2324 	pputc(prn, '\n');
2325     }
2326 
2327     if (kinfo == NULL) {
2328 	if (pval == PV_GT10 || pval == PV_LT01) {
2329 	    /* invalidate for record_test_result */
2330 	    pval = NADBL;
2331 	}
2332 	record_test_result(teststat, pval);
2333     }
2334 
2335     clear_model(&KPSSmod);
2336     free(list);
2337     free(autocov);
2338 
2339     return err;
2340 }
2341 
panel_kpss_test(int order,int v,DATASET * dset,gretlopt opt,PRN * prn)2342 static int panel_kpss_test (int order, int v, DATASET *dset,
2343 			    gretlopt opt, PRN *prn)
2344 {
2345     kpss_info kinfo;
2346     int u0 = dset->t1 / dset->pd;
2347     int uN = dset->t2 / dset->pd;
2348     int n = uN - u0 + 1;
2349     int verbose = (opt & OPT_V);
2350     double ppv = 0.0, zpv = 0.0, lpv = 0.0;
2351     int gt_10 = 0, lt_01 = 0;
2352     double pval;
2353     int i, err = 0;
2354 
2355     /* run a KPSS test for each unit and record the
2356        results */
2357 
2358     pprintf(prn, _("\nKPSS test for %s %s\n"), dset->varname[v],
2359 	    (opt & OPT_T)? _("(including trend)") : _("(without trend)"));
2360     pprintf(prn, _("Lag truncation parameter = %d\n"), order);
2361     pputc(prn, '\n');
2362 
2363     for (i=u0; i<=uN && !err; i++) {
2364 	dset->t1 = i * dset->pd;
2365 	dset->t2 = dset->t1 + dset->pd - 1;
2366 	err = series_adjust_sample(dset->Z[v], &dset->t1, &dset->t2);
2367 	if (!err) {
2368 	    err = real_kpss_test(order, v, dset, opt | OPT_Q, &kinfo, prn);
2369 	    if (!err && verbose) {
2370 		pprintf(prn, "Unit %d, T = %d\n", i + 1, kinfo.T);
2371 		if (na(kinfo.pval)) {
2372 		    pputs(prn, "\n\n");
2373 		} else {
2374 		    pprintf(prn, "test = %g, ", kinfo.test);
2375 		    if (kinfo.pval == PV_GT10) {
2376 			pprintf(prn, "%s > .10\n", _("p-value"));
2377 		    } else if (kinfo.pval == PV_LT01) {
2378 			pprintf(prn, "%s < .01\n", _("p-value"));
2379 		    } else {
2380 			pprintf(prn, "%s %.3f\n", _("interpolated p-value"),
2381 				kinfo.pval);
2382 		    }
2383 		    pputc(prn, '\n');
2384 		}
2385 	    }
2386 	}
2387 
2388 	if (!err) {
2389 	    pval = kinfo.pval;
2390 
2391 	    if (pval == PV_GT10) {
2392 		gt_10++;
2393 		if (lt_01 == 0) {
2394 		    /* record lower bound */
2395 		    pval = .10;
2396 		} else {
2397 		    pval = NADBL;
2398 		}
2399 	    } else if (pval == PV_LT01) {
2400 		lt_01++;
2401 		if (gt_10 == 0) {
2402 		    /* record upper bound */
2403 		    pval = .01;
2404 		} else {
2405 		    pval = NADBL;
2406 		}
2407 	    }
2408 
2409 	    if (na(pval)) {
2410 		ppv = zpv = lpv = NADBL;
2411 	    } else if (!na(ppv)) {
2412 		ppv += log(pval);
2413 		zpv += normal_cdf_inverse(pval);
2414 		lpv += log(pval / (1-pval));
2415 	    }
2416 	}
2417     }
2418 
2419     if (!err && !na(ppv)) {
2420 	/* process the results as per Choi, as best we can */
2421 	pprintf(prn, "%s\n\n", _("H0: all groups are stationary"));
2422 	do_choi_test(ppv, zpv, lpv, n, prn);
2423 	if (gt_10 > 0) {
2424 	    pputs(prn, "   Note: these are LOWER BOUNDS "
2425 		  "on the true p-values\n");
2426 	    pprintf(prn, "   (Individual p-values > .10, and recorded as .10: %d)\n",
2427 		    gt_10);
2428 	} else if (lt_01 > 0) {
2429 	    pputs(prn, "   Note: these are UPPER BOUNDS "
2430 		  "on the true p-values\n");
2431 	    pprintf(prn, "   (Individual p-values < .01, and recorded as .01: %d)\n",
2432 		    lt_01);
2433 	}
2434 	pputc(prn, '\n');
2435     } else {
2436 	pprintf(prn, "Choi test: cannot be calculated\n");
2437     }
2438 
2439     return err;
2440 }
2441 
2442 /**
2443  * kpss_test:
2444  * @order: window size for Bartlett smoothing.
2445  * @list: list of variables to test.
2446  * @dset: dataset struct.
2447  * @opt: option flags.
2448  * @prn: gretl printing struct.
2449  *
2450  * Carries out and prints the results of the KPSS test for
2451  * stationarity. Flags that may be given in @opt include:
2452  * OPT_T to include a linear trend; OPT_F to apply
2453  * first-differencing before testing; OPT_V for verbose
2454  * operation.
2455  *
2456  * Returns: 0 on successful completion, non-zero on error.
2457  */
2458 
kpss_test(int order,const int * list,DATASET * dset,gretlopt opt,PRN * prn)2459 int kpss_test (int order, const int *list, DATASET *dset,
2460 	       gretlopt opt, PRN *prn)
2461 {
2462     int save_t1 = dset->t1;
2463     int save_t2 = dset->t2;
2464     int orig_nvars = dset->v;
2465     int err = 0;
2466 
2467     if (multi_unit_panel_sample(dset)) {
2468 	err = panel_kpss_test(order, list[1], dset, opt, prn);
2469     } else {
2470 	/* regular time series case */
2471 	int i, v, vlist[2] = {1, 0};
2472 
2473 	for (i=1; i<=list[0] && !err; i++) {
2474 	    v = list[i];
2475 	    vlist[1] = v;
2476 	    err = list_adjust_sample(vlist, &dset->t1, &dset->t2,
2477 				     dset, NULL);
2478 	    if (!err) {
2479 		err = real_kpss_test(order, v, dset, opt, NULL, prn);
2480 	    }
2481 	    dset->t1 = save_t1;
2482 	    dset->t2 = save_t2;
2483 	}
2484     }
2485 
2486     dset->t1 = save_t1;
2487     dset->t2 = save_t2;
2488 
2489     /* added 2012-03-22 for consistency with adf test */
2490     dataset_drop_last_variables(dset, dset->v - orig_nvars);
2491 
2492     return err;
2493 }
2494 
make_coint_list(const int * list,int detcode,gretlopt opt,int * nv,DATASET * dset,int * err)2495 static int *make_coint_list (const int *list, int detcode,
2496 			     gretlopt opt, int *nv,
2497 			     DATASET *dset, int *err)
2498 {
2499     int *clist = NULL;
2500     int nseas = 0;
2501     int ifc = 0;
2502     int i, j = 1;
2503 
2504     /* does the incoming list contain a constant? */
2505     for (i=1; i<=list[0]; i++) {
2506 	if (list[i] == 0) {
2507 	    ifc = 1;
2508 	    break;
2509 	}
2510     }
2511 
2512     /* check for sufficient arguments */
2513     *nv = list[0] - ifc;
2514     if (*nv < 2) {
2515 	*err = E_ARGS;
2516 	return NULL;
2517     }
2518 
2519     /* space for seasonals, if applicable */
2520     if (!*err && (opt & OPT_D) && dset->pd > 1) {
2521 	nseas = dset->pd - 1;
2522     }
2523 
2524     /* allocate list for cointegrating regression */
2525     clist = gretl_list_new(*nv + detcode - 1 + nseas);
2526     if (clist == NULL) {
2527 	*err = E_ALLOC;
2528 	return NULL;
2529     }
2530 
2531     /* transcribe original vars */
2532     for (i=1; i<=list[0]; i++) {
2533 	if (list[i] != 0) {
2534 	    clist[j++] = list[i];
2535 	}
2536     }
2537 
2538     /* add trend, if wanted */
2539     if (detcode >= UR_TREND) {
2540 	clist[j] = gettrend(dset, 0);
2541 	if (clist[j++] == 0) {
2542 	    *err = E_ALLOC;
2543 	}
2544     }
2545 
2546     /* add trend-squared, if wanted */
2547     if (!*err && detcode == UR_QUAD_TREND) {
2548 	clist[j] = gettrend(dset, 1);
2549 	if (clist[j++] == 0) {
2550 	    *err = E_ALLOC;
2551 	}
2552     }
2553 
2554     /* add const, if wanted */
2555     if (!*err && detcode != UR_NO_CONST) {
2556 	clist[j++] = 0;
2557     }
2558 
2559     /* add centered seasonals, if wanted */
2560     if (nseas > 0) {
2561 	int *slist = seasonals_list(dset, dset->pd, 1, err);
2562 
2563 	if (!*err) {
2564 	    for (i=0; i<nseas; i++) {
2565 		clist[j++] = slist[i+1];
2566 	    }
2567 	    free(slist);
2568 	}
2569     }
2570 
2571     return clist;
2572 }
2573 
2574 static int
coint_check_opts(gretlopt opt,int * detcode,gretlopt * adf_opt)2575 coint_check_opts (gretlopt opt, int *detcode, gretlopt *adf_opt)
2576 {
2577     if (opt & OPT_N) {
2578 	if ((opt & OPT_T) || (opt & OPT_R)) {
2579 	    return E_BADOPT;
2580 	} else {
2581 	    *detcode = UR_NO_CONST;
2582 	    *adf_opt = OPT_N;
2583 	}
2584     } else if (opt & OPT_T) {
2585 	if (opt & OPT_R) {
2586 	    return E_BADOPT;
2587 	} else {
2588 	    *detcode = UR_TREND;
2589 	    *adf_opt = OPT_T;
2590 	}
2591     } else if (opt & OPT_R) {
2592 	*detcode = UR_QUAD_TREND;
2593 	*adf_opt = OPT_R;
2594     }
2595 
2596     if (opt & OPT_D) {
2597 	*adf_opt |= OPT_D;
2598     }
2599 
2600     if (opt & OPT_E) {
2601 	*adf_opt |= OPT_E;
2602     }
2603 
2604     return 0;
2605 }
2606 
2607 /* Engle-Granger: try to ensure a uniform sample for the individual
2608    (A)DF tests and the test on the cointegrating regression
2609 */
2610 
coint_set_sample(const int * list,int nv,int order,DATASET * dset)2611 static int coint_set_sample (const int *list, int nv, int order,
2612 			     DATASET *dset)
2613 {
2614     int anymiss;
2615     int i, v, t;
2616 
2617     for (t=dset->t1; t<dset->t2; t++) {
2618 	anymiss = 0;
2619 	for (i=1; i<=nv; i++) {
2620 	    v = list[i];
2621 	    if (na(dset->Z[v][t])) {
2622 		anymiss = 1;
2623 		break;
2624 	    }
2625 	}
2626 	if (!anymiss) {
2627 	    break;
2628 	}
2629     }
2630 
2631     dset->t1 = t + order + 1;
2632 
2633     for (t=dset->t2; t>dset->t1; t--) {
2634 	anymiss = 0;
2635 	for (i=1; i<=nv; i++) {
2636 	    v = list[i];
2637 	    if (na(dset->Z[v][t])) {
2638 		anymiss = 1;
2639 		break;
2640 	    }
2641 	}
2642 	if (!anymiss) {
2643 	    break;
2644 	}
2645     }
2646 
2647     dset->t2 = t;
2648 
2649     return 0;
2650 }
2651 
2652 #define EG_MIN_SAMPLE 0
2653 
2654 /**
2655  * engle_granger_test:
2656  * @order: lag order for the test.
2657  * @list: specifies the variables to use.
2658  * @dset: dataset struct.
2659  * @opt: option flags.
2660  * @prn: gretl printing struct.
2661  *
2662  * Carries out the Engle-Granger test for cointegration.
2663  * Flags that may be given in @opt include: OPT_N, do
2664  * not an include a constant in the cointegrating regression;
2665  * OPT_T include constant and linear trend; OPT_R, include
2666  * quadratic trend; OPT_S, skip DF tests for individual variables;
2667  * OPT_E, test down from maximum lag order (see the entry for
2668  * "adf" in the Gretl Command Reference for details); OPT_V,
2669  * verbose operation.
2670  *
2671  * Returns: 0 on successful completion, non-zero code
2672  * on error.
2673  */
2674 
engle_granger_test(int order,const int * list,DATASET * dset,gretlopt opt,PRN * prn)2675 int engle_granger_test (int order, const int *list, DATASET *dset,
2676 			gretlopt opt, PRN *prn)
2677 {
2678 #if EG_MIN_SAMPLE
2679     int test_t1, test_t2;
2680 #endif
2681     int orig_t1 = dset->t1;
2682     int orig_t2 = dset->t2;
2683     adf_info ainfo = {0};
2684     gretlopt adf_opt = OPT_C;
2685     MODEL cmod;
2686     int detcode = UR_CONST;
2687     int i, nv, k = 0;
2688     int step = 1;
2689     int skip = 0;
2690     int silent = 0;
2691     int *clist = NULL;
2692     int err = 0;
2693 
2694     if (multi_unit_panel_sample(dset)) {
2695 	gretl_errmsg_set("Sorry, this command is not yet available "
2696 			 "for panel data");
2697 	return E_DATA;
2698     }
2699 
2700     err = coint_check_opts(opt, &detcode, &adf_opt);
2701     if (err) {
2702 	return err;
2703     }
2704 
2705     clist = make_coint_list(list, detcode, opt, &nv, dset, &err);
2706     if (err) {
2707 	return err;
2708     }
2709 
2710     /* backward compatibility: let a negative lag order
2711        indicate that we should test down */
2712     if (order < 0) {
2713 	order = -order;
2714 	adf_opt |= OPT_E;
2715     }
2716 
2717     /* verbosity? */
2718     if (opt & OPT_V) {
2719 	adf_opt |= OPT_V;
2720     }
2721 
2722     /* or silence? */
2723     if (opt & OPT_I) {
2724 	adf_opt |= OPT_I;
2725 	silent = skip = 1;
2726     } else if (opt & OPT_S) {
2727 	skip = 1;
2728     }
2729 
2730     gretl_model_init(&cmod, dset);
2731 
2732     if (!skip) {
2733 	/* start by testing all candidate vars for unit root */
2734 	int uniform_order = (opt & OPT_E)? 0 : order;
2735 
2736 	coint_set_sample(clist, nv, uniform_order, dset);
2737 	for (i=1; i<=nv; i++) {
2738 	    ainfo.v = clist[i];
2739 	    ainfo.order = order;
2740 	    ainfo.niv = 1;
2741 	    ainfo.flags = ADF_EG_TEST;
2742 
2743 	    if (step == 1) {
2744 		pputc(prn, '\n');
2745 	    }
2746 	    pprintf(prn, _("Step %d: testing for a unit root in %s\n"),
2747 		    step++, dset->varname[ainfo.v]);
2748 	    real_adf_test(&ainfo, dset, adf_opt, prn);
2749 	}
2750     }
2751 
2752     if (!silent) {
2753 	if (step == 1) {
2754 	    pputc(prn, '\n');
2755 	}
2756 	pprintf(prn, _("Step %d: cointegrating regression\n"), step++);
2757     }
2758 
2759 #if EG_MIN_SAMPLE
2760     test_t1 = dset->t1;
2761     test_t2 = dset->t2;
2762 #endif
2763 
2764     dset->t1 = orig_t1;
2765     dset->t2 = orig_t2;
2766 
2767     cmod = lsq(clist, dset, OLS, OPT_NONE);
2768     err = cmod.errcode;
2769     if (err) {
2770 	goto bailout;
2771     }
2772 
2773     if (!silent) {
2774 	cmod.aux = AUX_COINT;
2775 	printmodel(&cmod, dset, OPT_NONE, prn);
2776     }
2777 
2778     /* add residuals from cointegrating regression to data set */
2779     err = dataset_add_allocated_series(dset, cmod.uhat);
2780     if (err) {
2781 	goto bailout;
2782     }
2783 
2784     k = dset->v - 1;
2785     strcpy(dset->varname[k], "uhat");
2786     cmod.uhat = NULL;
2787 
2788     if (!silent) {
2789 	pprintf(prn, _("Step %d: testing for a unit root in %s\n"),
2790 		step, dset->varname[k]);
2791     }
2792 
2793 #if EG_MIN_SAMPLE
2794     dset->t1 = test_t1;
2795     dset->t2 = test_t2;
2796 #endif
2797 
2798     ainfo.v = k;
2799     ainfo.order = order;
2800     ainfo.niv = nv;
2801     ainfo.flags = ADF_EG_TEST | ADF_EG_RESIDS;
2802 
2803     /* Run (A)DF test on the residuals */
2804     real_adf_test(&ainfo, dset, adf_opt, prn);
2805 
2806     if (!silent) {
2807 	pputs(prn, _("\nThere is evidence for a cointegrating relationship if:\n"
2808 		     "(a) The unit-root hypothesis is not rejected for the individual"
2809 		     " variables, and\n(b) the unit-root hypothesis is rejected for the "
2810 		     "residuals (uhat) from the \n    cointegrating regression.\n"));
2811 	pputc(prn, '\n');
2812     }
2813 
2814  bailout:
2815 
2816     clear_model(&cmod);
2817     free(clist);
2818     if (k > 0) {
2819 	dataset_drop_variable(k, dset);
2820     }
2821 
2822     dset->t1 = orig_t1;
2823     dset->t2 = orig_t2;
2824 
2825     return err;
2826 }
2827