1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #include "libgretl.h"
21 #include "libset.h"
22 #include "libglue.h"
23 #include "gretl_panel.h"
24 #include "var.h"
25 #include "system.h"
26 #include "missing_private.h"
27 #include "matrix_extra.h"
28 #include "plotspec.h"
29 #include "tsls.h"
30 #include "uservar.h"
31 
32 /**
33  * SECTION:compare
34  * @short_description: diagnostic and specification tests for models
35  * @title: Model tests
36  * @include: libgretl.h
37  *
38  * Included here are several tests for "pathologies" of the error
39  * term in regression models, as well as specification tests
40  * covering nonlinearity and the omission or addition of variables.
41  */
42 
43 #define WDEBUG 0
44 
45 struct COMPARE {
46     int ci;        /* command index: ADD or OMIT */
47     int model_id;  /* ID of model under test */
48     int model_ci;  /* estimator code for the model under test */
49     int dfn;       /* numerator degrees of freedom */
50     int dfd;       /* denominator degrees of freedom */
51     double test;   /* test statistic */
52     double pval;   /* p-value for test statistic */
53     int stat;      /* code identifying the test statistic */
54     double F;      /* F test statistic */
55     double X2;     /* Wald chi-square test statistic */
56     double LR;     /* likelihood ratio test statistic */
57     int score;     /* number of info stats showing improvement */
58     int robust;    /* = 1 when robust vcv is in use, else 0 */
59     const int *testvars; /* list of variables added or omitted */
60 };
61 
62 /* Given a list of variables, check them against the independent
63    variables included in a model, and construct a mask with 1s in
64    positions where there is a match, 0s otherwise.  If the test list
65    is NULL, match all variables except the constant.
66 */
67 
68 static char *
mask_from_test_list(const int * list,const MODEL * pmod,int * err)69 mask_from_test_list (const int *list, const MODEL *pmod, int *err)
70 {
71     char *mask;
72     int off1 = 2, off2 = 0;
73     int cmax = pmod->ncoeff;
74     int nmask = 0;
75     int i, j;
76 
77     mask = calloc(pmod->ncoeff, 1);
78     if (mask == NULL) {
79 	*err = E_ALLOC;
80 	return NULL;
81     }
82 
83     if (pmod->ci == DPANEL) {
84 	/* find correct offset into independent vars in list */
85 	for (i=2; i<=pmod->list[0]; i++) {
86 	    if (pmod->list[i] == LISTSEP) {
87 		off1 = i + 2;
88 	    }
89 	}
90 	off2 = pmod->list[1];
91     } else if (pmod->ci == NEGBIN) {
92 	cmax -= 1;
93     }
94 
95     for (i=0; i<cmax; i++) {
96 	if (list != NULL) {
97 	    for (j=1; j<=list[0]; j++) {
98 		if (pmod->list[i + off1] == list[j]) {
99 #if WDEBUG
100 		    fprintf(stderr, "matched var %d at pmod->list[%d]: "
101 			    "set mask[%d] = 1\n", list[j], i + off1, i + off2);
102 		    printlist(list, "test list");
103 		    printlist(pmod->list, "pmod->list");
104 #endif
105 		    mask[i + off2] = 1;
106 		    nmask++;
107 		}
108 	    }
109 	} else if (pmod->list[i + off1] != 0) {
110 	    mask[i + off2] = 1;
111 	}
112     }
113 
114     if (list != NULL && nmask != list[0]) {
115 	fprintf(stderr, "mask from list: list[0] = %d but nmask = %d\n",
116 		list[0], nmask);
117 	*err = E_DATA;
118     }
119 
120     return mask;
121 }
122 
123 /* Wald (chi-square and/or F) test for a set of zero restrictions on
124    the parameters of a given model, based on the covariance matrix of
125    the unrestricted model. Suitable for use where the original model
126    is estimated by FGLS or IV.  Note that if list is NULL, we do an
127    automatic test, for the significance of all vars but the constant.
128 */
129 
130 static int
wald_test(const int * list,MODEL * pmod,double * chisq,double * F)131 wald_test (const int *list, MODEL *pmod, double *chisq, double *F)
132 {
133     char *mask = NULL;
134     gretl_matrix *C = NULL;
135     gretl_vector *b = NULL;
136     double wX = NADBL;
137     double wF = NADBL;
138     int err = 0;
139 
140     mask = mask_from_test_list(list, pmod, &err);
141     if (err) {
142 	free(mask);
143 	return err;
144     }
145 
146     if (!err) {
147 	C = gretl_vcv_matrix_from_model(pmod, mask, &err);
148     }
149 
150     if (!err) {
151 	b = gretl_coeff_vector_from_model(pmod, mask, &err);
152     }
153 
154     if (!err) {
155 	/* use "left division" to explicit inversion of @C */
156 	gretl_matrix *tmp, *mx;
157 
158 	mx = gretl_matrix_alloc(1, 1);
159 	tmp = gretl_matrix_divide(C, b, GRETL_MOD_NONE, &err);
160 	if (tmp != NULL) {
161 	    gretl_matrix_multiply_mod(b, GRETL_MOD_TRANSPOSE,
162 				      tmp, GRETL_MOD_NONE,
163 				      mx, GRETL_MOD_NONE);
164 	    wX = mx->val[0];
165 	    gretl_matrix_free(tmp);
166 	    gretl_matrix_free(mx);
167 	}
168     }
169 
170 #if WDEBUG
171     fprintf(stderr, "wX (quadratic form) = %g\n", wX);
172 #endif
173 
174     if (!err) {
175 	if (wX < 0) {
176 	    wF = wX = NADBL;
177 	} else {
178 	    wF = wX / gretl_vector_get_length(b);
179 	}
180     }
181 
182     if (!err) {
183 	if (chisq != NULL) {
184 	    *chisq = wX;
185 	}
186 	if (F != NULL) {
187 	    *F = wF;
188 	}
189     }
190 
191 #if WDEBUG
192     fprintf(stderr, "Wald test: F = %g, Chi^2 = %g\n", wF, wX);
193 #endif
194 
195     free(mask);
196     gretl_matrix_free(C);
197     gretl_matrix_free(b);
198 
199     return err;
200 }
201 
202 /**
203  * wald_omit_F:
204  * @list: list of variables to omit, or NULL.
205  * @pmod: model to be tested.
206  *
207  * Simple form of Wald F-test for omission of variables.  If @list
208  * is non-NULL, do the test for the omission of the variables in
209  * @list from the model @pmod.  Otherwise test for omission of
210  * all variables in @pmod except for the constant.
211  *
212  * Returns: Calculated F-value, or #NADBL on failure.
213  */
214 
wald_omit_F(const int * list,MODEL * pmod)215 double wald_omit_F (const int *list, MODEL *pmod)
216 {
217     double F = NADBL;
218 
219     wald_test(list, pmod, NULL, &F);
220     return F;
221 }
222 
223 /**
224  * wald_omit_chisq:
225  * @list: list of variables to omit, or NULL.
226  * @pmod: model to be tested.
227  *
228  * Simple form of Wald chi-square for omission of variables.  If @list
229  * is non-NULL, do the test for the omission of the variables in
230  * @list from the model @pmod.  Otherwise test for omission of
231  * all variables in @pmod except for the constant.
232  *
233  * Returns: Calculated chi-square value, or #NADBL on failure.
234  */
235 
wald_omit_chisq(const int * list,MODEL * pmod)236 double wald_omit_chisq (const int *list, MODEL *pmod)
237 {
238     double X = NADBL;
239 
240     wald_test(list, pmod, &X, NULL);
241     return X;
242 }
243 
add_diffvars_to_test(ModelTest * test,const int * list,const DATASET * dset)244 static int add_diffvars_to_test (ModelTest *test, const int *list,
245 				 const DATASET *dset)
246 {
247     char *vnames;
248     int i, len = 0;
249     int err = 0;
250 
251     for (i=1; i<=list[0]; i++) {
252 	len += strlen(dset->varname[list[i]]) + 1;
253     }
254 
255     vnames = malloc(len);
256 
257     if (vnames == NULL) {
258 	err = 1;
259     } else {
260 	*vnames = '\0';
261 	for (i=1; i<=list[0]; i++) {
262 	    strcat(vnames, dset->varname[list[i]]);
263 	    if (i < list[0]) {
264 		strcat(vnames, " ");
265 	    }
266 	}
267 	model_test_set_allocated_param(test, vnames);
268     }
269 
270     return err;
271 }
272 
print_add_omit_null(const int * list,const DATASET * dset,gretlopt opt,PRN * prn)273 void print_add_omit_null (const int *list, const DATASET *dset,
274 			  gretlopt opt, PRN *prn)
275 {
276     int nl = list == NULL ? 0 : list[0];
277 
278     if (nl == 1 && !(opt & OPT_S)) {
279 	/* single parameter, not in system context */
280 	pputs(prn, "\n  ");
281 	pprintf(prn, _("Null hypothesis: the regression parameter is zero for %s"),
282 		dset->varname[list[1]]);
283 	pputc(prn, '\n');
284     } else if (nl == 0) {
285 	/* VAR omit specials */
286 	if ((opt & OPT_E) && (opt & OPT_T)) {
287 	    pprintf(prn, "\n  %s: %s\n", _("Null hypothesis"),
288 		    _("no seasonal effects or trend"));
289 	} else if (opt & OPT_E) {
290 	    pprintf(prn, "\n  %s: %s\n", _("Null hypothesis"),
291 		    _("no seasonal effects"));
292 	} else if (opt & OPT_T) {
293 	    pprintf(prn, "\n  %s: %s\n", _("Null hypothesis"),
294 		    _("no trend"));
295 	}
296     } else {
297 	/* all other cases */
298 	const char *vname;
299 	int i, nc = 0;
300 
301 	pputs(prn, _("\n  Null hypothesis: the regression parameters are "
302 		     "zero for the variables\n"));
303 
304 	pputs(prn, "    ");
305 	for (i=1; i<=list[0]; i++) {
306 	    vname = dset->varname[list[i]];
307 	    nc += strlen(vname) + 2;
308 	    pprintf(prn, "%s", vname);
309 	    if (i < list[0]) {
310 		if (nc > 60) {
311 		    pputs(prn, ",\n    ");
312 		    nc = 0;
313 		} else {
314 		    pputs(prn, ", ");
315 		}
316 	    }
317 	}
318 	pputc(prn, '\n');
319 	if (opt & OPT_E) {
320 	    /* seasonals */
321 	    pprintf(prn, "    %s\n", _("seasonal dummies"));
322 	}
323 	if (opt & OPT_T) {
324 	    /* trend */
325 	    pputs(prn, "    time\n");
326 	}
327     }
328 }
329 
330 /* add/omit: are we printing a revised model following the
331    test results? */
332 
printing_revised_model(gretlopt opt)333 static int printing_revised_model (gretlopt opt)
334 {
335     if (opt & OPT_Q) {
336 	/* --quiet: no */
337 	return 0;
338     } else if (opt & (OPT_L | OPT_W)) {
339 	/* --lm or --wald test variants: no */
340 	return 0;
341     } else {
342 	/* then yes */
343 	return 1;
344     }
345 }
346 
347 /* print the results from an add or omit test */
348 
print_compare(struct COMPARE * cmp,const DATASET * dset,gretlopt opt,PRN * prn)349 static void print_compare (struct COMPARE *cmp,
350 			   const DATASET *dset,
351 			   gretlopt opt,
352 			   PRN *prn)
353 {
354     if (opt & OPT_Q) {
355 	if (cmp->model_id > 0) {
356 	    pprintf(prn, _("Test on Model %d:"), cmp->model_id);
357 	    pputc(prn, '\n');
358 	}
359     } else if (cmp->model_id > 0) {
360 	if (opt & OPT_A) {
361 	    /* --auto-omit: add vertical space */
362 	    pputc(prn, '\n');
363 	}
364 	pprintf(prn, _("Test on Model %d:"), cmp->model_id);
365 	pputc(prn, '\n');
366     }
367 
368     print_add_omit_null(cmp->testvars, dset, OPT_NONE, prn);
369 
370     if (cmp->stat == GRETL_STAT_WALD_CHISQ) {
371 	pprintf(prn, "  %s: %s(%d) = %g, %s %g\n",  _("Wald test"),
372 		_("Chi-square"), cmp->dfn, cmp->test,
373 		_("p-value"), cmp->pval);
374 	if (!na(cmp->LR)) {
375 	    /* auxiliary LR test? */
376 	    double pval = chisq_cdf_comp(cmp->dfn, cmp->LR);
377 
378 	    pprintf(prn, "  (%s: %s(%d) = %g, %s %g)\n",  _("LR test"),
379 		    _("Chi-square"), cmp->dfn, cmp->LR,
380 		    _("p-value"), pval);
381 	} else if (!na(cmp->F)) {
382 	    /* alternate F-form for Wald? */
383 	    double pval = snedecor_cdf_comp(cmp->dfn, cmp->dfd, cmp->F);
384 
385 	    pprintf(prn, "  (%s: F(%d, %d) = %g, %s %g)\n",
386 		    _("F-form"), cmp->dfn, cmp->dfd, cmp->F,
387 		    _("p-value"), pval);
388 	}
389     } else if (cmp->stat == GRETL_STAT_LR) {
390 	/* note: unused */
391 	pprintf(prn, "  %s: %s(%d) = %g, %s %g\n",  _("LR test"),
392 		_("Chi-square"), cmp->dfn, cmp->test,
393 		_("p-value"), cmp->pval);
394     } else if (cmp->stat == GRETL_STAT_F) {
395 	pprintf(prn, "  %s: %s(%d, %d) = %g, %s %g\n", _("Test statistic"),
396 		(cmp->robust)? _("Robust F") : "F",
397 		cmp->dfn, cmp->dfd, cmp->test,
398 		_("p-value"), cmp->pval);
399     } else if (cmp->stat == GRETL_STAT_LM) {
400 	pprintf(prn, "  %s: %s(%d) = %g, %s %g\n",  _("LM test"),
401 		_("Chi-square"), cmp->dfn, cmp->test,
402 		_("p-value"), cmp->pval);
403     }
404 
405     if (cmp->score >= 0) {
406 	pputs(prn, "  ");
407 	if (cmp->ci == ADD) {
408 	    pprintf(prn, _("Adding variables improved %d of %d information "
409 			   "criteria.\n"), cmp->score, C_MAX);
410 	} else {
411 	    pprintf(prn, _("Omitting variables improved %d of %d information "
412 			   "criteria.\n"), cmp->score, C_MAX);
413 	}
414     }
415 
416     if (!printing_revised_model(opt)) {
417 	pputc(prn, '\n');
418     }
419 }
420 
421 /* try attaching the add or omit test to @pmod */
422 
add_omit_attach(struct COMPARE * cmp,MODEL * pmod,const DATASET * dset)423 static void add_omit_attach (struct COMPARE *cmp, MODEL *pmod,
424 			     const DATASET *dset)
425 {
426     int tcode = (cmp->ci == OMIT)? GRETL_TEST_OMIT : GRETL_TEST_ADD;
427     ModelTest *mtest = model_test_new(tcode);
428 
429     if (mtest != NULL) {
430 	model_test_set_teststat(mtest, cmp->stat);
431 	model_test_set_dfn(mtest, cmp->dfn);
432 	if (cmp->stat == GRETL_STAT_F) {
433 	    model_test_set_dfd(mtest, cmp->dfd);
434 	}
435 	model_test_set_value(mtest, cmp->test);
436 	model_test_set_pvalue(mtest, cmp->pval);
437 	add_diffvars_to_test(mtest, cmp->testvars, dset);
438 	maybe_add_test_to_model(pmod, mtest);
439     }
440 }
441 
442 /* add a record of improvement/degeneration of the
443    model selection criteria, for non-quiet printing:
444    @pmodA is the original model, and @pmodB the
445    restricted (OMIT) or augmented (ADD) variant
446 */
447 
maybe_add_info_score(struct COMPARE * cmp,const MODEL * pmodA,const MODEL * pmodB)448 static void maybe_add_info_score (struct COMPARE *cmp,
449 				  const MODEL *pmodA,
450 				  const MODEL *pmodB)
451 {
452     if (pmodA != NULL && pmodB != NULL) {
453 	int i;
454 
455 	cmp->score = 0;
456 	for (i=0; i<C_MAX; i++) {
457 	    if (na(pmodB->criterion[i]) || na(pmodA->criterion[i])) {
458 		cmp->score = -1;
459 		break;
460 	    } else if (pmodB->criterion[i] < pmodA->criterion[i]) {
461 		cmp->score++;
462 	    }
463 	}
464     }
465 }
466 
add_or_omit_compare(MODEL * pmodA,MODEL * pmodB,const int * testvars,const DATASET * dset,int ci,gretlopt opt,PRN * prn)467 static int add_or_omit_compare (MODEL *pmodA, MODEL *pmodB,
468 				const int *testvars, const DATASET *dset,
469 				int ci, gretlopt opt, PRN *prn)
470 {
471     struct COMPARE cmp;
472     MODEL *umod, *rmod;
473     int iv_special = 0;
474     int err = 0;
475 
476     cmp.ci = ci;
477     cmp.stat = 0;
478     cmp.test = cmp.pval = NADBL;
479     cmp.F = cmp.X2 = cmp.LR = NADBL;
480     cmp.score = -1;
481     cmp.robust = 0;
482     cmp.dfn = testvars[0];
483     cmp.testvars = testvars;
484     cmp.model_ci = pmodA->ci;
485     cmp.model_id = pmodA->ID;
486 
487     if (ci == OMIT) {
488 	/* the unrestricted model is the original one, 'A' */
489 	umod = pmodA;
490 	rmod = pmodB;
491     } else {
492 	/* add: the unrestricted model is the new one, 'B' */
493 	umod = pmodB;
494 	rmod = pmodA;
495     }
496 
497     if (opt & OPT_B) {
498 	/* ivreg special: A and B have different
499 	   instruments, not just different regressors
500 	*/
501 	iv_special = 1;
502 	cmp.model_id = -1;
503     } else if (opt & OPT_L) {
504 	cmp.model_id = -1;
505     } else if (umod != NULL && rmod != NULL) {
506 	cmp.dfn = umod->ncoeff - rmod->ncoeff;
507     }
508 
509     if (pmodA->opt & OPT_R) {
510 	cmp.robust = 1;
511     }
512 
513     if (pmodA->ci == DPANEL) {
514 	/* FIXME: plus some other cases? */
515 	opt |= OPT_X;
516     }
517 
518     cmp.dfd = umod->dfd;
519 
520     if (opt & OPT_L) {
521 	/* "add" with LM option: the auxiliary regression
522 	   results are in pmodB */
523 	cmp.stat = GRETL_STAT_LM;
524 	cmp.test = pmodB->nobs * pmodB->rsq;
525 	cmp.pval = chisq_cdf_comp(cmp.dfn, cmp.test);
526     } else if (cmp.model_ci == OLS && !cmp.robust &&
527 	       rmod != NULL && umod != NULL &&
528 	       rmod->nobs == umod->nobs &&
529 	       !(opt & OPT_X)) {
530 	/* plain OLS with both the restricted and unrestricted
531 	   models available: base F-test on sums of squared
532 	   residuals */
533 	cmp.stat = GRETL_STAT_F;
534 	cmp.test = ((rmod->ess - umod->ess) / umod->ess) *
535 	    cmp.dfd / cmp.dfn;
536 	cmp.pval = snedecor_cdf_comp(cmp.dfn, cmp.dfd, cmp.test);
537     } else if (opt & OPT_X) {
538 	/* chi-square form of Wald test is requested */
539 	cmp.stat = GRETL_STAT_WALD_CHISQ;
540 	err = wald_test(testvars, umod, &cmp.test, &cmp.F);
541 	if (!err) {
542 	    cmp.pval = chisq_cdf_comp(cmp.dfn, cmp.test);
543 	}
544     } else {
545 	/* F-form of Wald test */
546 	cmp.stat = GRETL_STAT_F;
547 	err = wald_test(testvars, umod, &cmp.X2, &cmp.test);
548 	if (!err) {
549 	    cmp.pval = snedecor_cdf_comp(cmp.dfn, cmp.dfd, cmp.test);
550 	}
551     }
552 
553     /* auxiliary LR test? */
554     if (umod != NULL && rmod != NULL && !(opt & OPT_L)) {
555 	if (!na(umod->lnL) && !na(rmod->lnL)) {
556 	    cmp.LR = 2.0 * (umod->lnL - rmod->lnL);
557 	}
558     }
559 
560     if (!err && (na(cmp.test) || na(cmp.pval))) {
561 	err = E_DATA;
562     }
563 
564     if (!err) {
565 	record_test_result(cmp.test, cmp.pval);
566 
567 	if (opt & OPT_S) {
568 	    /* attach test to model */
569 	    add_omit_attach(&cmp, pmodA, dset);
570 	}
571 	if (!(opt & OPT_I)) {
572 	    /* not --silent */
573 	    if (!(opt & OPT_Q) && cmp.stat != GRETL_STAT_LM &&
574 		!iv_special) {
575 		maybe_add_info_score(&cmp, pmodA, pmodB);
576 	    }
577 	    print_compare(&cmp, dset, opt, prn);
578 	}
579     }
580 
581     return err;
582 }
583 
get_extra_var(const MODEL * pmod)584 static int get_extra_var (const MODEL *pmod)
585 {
586     if (COUNT_MODEL(pmod->ci)) {
587 	return gretl_model_get_int(pmod, "offset_var");
588     } else if (pmod->ci == DURATION) {
589 	return gretl_model_get_int(pmod, "cens_var");
590     } else {
591 	return 0;
592     }
593 }
594 
595 /* reconstitute full varlist for WLS, AR, count and
596    duation models */
597 
598 static int *
full_model_list(const MODEL * pmod,const int * inlist)599 full_model_list (const MODEL *pmod, const int *inlist)
600 {
601     int *flist = NULL;
602 
603     if (pmod->ci == AR) {
604 	/* cobble together arlist and @inlist */
605 	flist = gretl_lists_join_with_separator(pmod->arinfo->arlist,
606 						inlist);
607     } else {
608 	int i, len = inlist[0];
609 
610 	if (pmod->ci == WLS) {
611 	    /* prepend the weight variable */
612 	    len += 1;
613 	} else if (COUNT_MODEL(pmod->ci) || pmod->ci == DURATION) {
614 	    /* append list separator and offset or censoring var */
615 	    len += 2;
616 	}
617 
618 	flist = gretl_list_new(len);
619 	if (flist == NULL) {
620 	    return NULL;
621 	}
622 
623 	if (pmod->ci == WLS) {
624 	    flist[1] = pmod->nwt;
625 	    for (i=1; i<=inlist[0]; i++) {
626 		flist[i+1] = inlist[i];
627 	    }
628 	} else if (COUNT_MODEL(pmod->ci) || pmod->ci == DURATION) {
629 	    int extra = get_extra_var(pmod);
630 
631 	    for (i=1; i<=inlist[0]; i++) {
632 		flist[i] = inlist[i];
633 	    }
634 	    flist[flist[0]-1] = LISTSEP;
635 	    flist[flist[0]] = extra;
636 	}
637     }
638 
639     return flist;
640 }
641 
retrieve_dpanel_opts(const MODEL * pmod)642 static gretlopt retrieve_dpanel_opts (const MODEL *pmod)
643 {
644     gretlopt opt = OPT_NONE;
645 
646     if (pmod->opt & OPT_D) {
647 	/* --dpdstyle */
648 	opt |= OPT_D;
649     }
650 
651     if (pmod->opt & OPT_L) {
652 	/* --system (include levels) */
653 	opt |= OPT_L;
654     }
655 
656     if (gretl_model_get_int(pmod, "asy")) {
657 	opt |= OPT_A;
658     }
659 
660     if (gretl_model_get_int(pmod, "step") == 2) {
661 	opt |=OPT_T;
662     }
663 
664     return opt;
665 }
666 
obs_diff_ok(const MODEL * m_old,const MODEL * m_new)667 static int obs_diff_ok (const MODEL *m_old, const MODEL *m_new)
668 {
669     int tdiff, ndiff = m_new->nobs - m_old->nobs;
670 
671     if (m_old->ci == AR1) {
672 	return 0;
673     }
674 
675     if (ndiff > 0) {
676 	tdiff = (m_new->t2 - m_new->t1) - (m_old->t2 - m_old->t1);
677 	if (ndiff == tdiff) {
678 	    return 1;
679 	}
680     }
681 
682     return 0;
683 }
684 
685 /* This function is used for "add" and "omit", when we are estimating
686    an augmented or reduced version of the original model. It's also
687    used in the special case of calculation of the p-value for the
688    Durbin-Watson statistic, which requires re-estimation of the
689    original specification. In these cases we need to ensure
690    comparability, which means we have to retrieve any relevant options
691    from the original model and re-apply them.
692 */
693 
replicate_estimator(const MODEL * orig,int * list,DATASET * dset,gretlopt myopt,PRN * prn)694 static MODEL replicate_estimator (const MODEL *orig, int *list,
695 				  DATASET *dset, gretlopt myopt,
696 				  PRN *prn)
697 {
698     MODEL rep;
699     const char *param = NULL;
700     const int *laglist = NULL;
701     int *full_list = NULL;
702     char altparm[32] = {0};
703     int mc = get_model_count();
704     int cv, repci = orig->ci;
705     int order = 0;
706     int first = 1;
707 
708     gretl_model_init(&rep, dset);
709 
710     /* recreate options and auxiliary vars, if required */
711 
712     transcribe_option_flags(&myopt, orig->opt, OPT_D | OPT_J | OPT_R);
713 
714     cv = gretl_model_get_cluster_var(orig);
715     if (cv > 0 && cv < dset->v) {
716 	myopt |= OPT_C;
717 	set_optval_string(orig->ci, OPT_C, dset->varname[cv]);
718     }
719 
720     if (orig->ci == AR1) {
721 	if (orig->opt & OPT_H) {
722 	    myopt |= OPT_H;
723 	    if (gretl_model_get_int(orig, "no-corc")) {
724 		myopt |= OPT_B;
725 	    }
726 	} else if (orig->opt & OPT_P) {
727 	    myopt |= OPT_P;
728 	}
729     } else if (orig->ci == WLS || orig->ci == AR || get_extra_var(orig)) {
730 	full_list = full_model_list(orig, list);
731 	if (full_list == NULL) {
732 	    rep.errcode = E_ALLOC;
733 	} else {
734 	    list = full_list;
735 	}
736     } else if (orig->ci == HSK) {
737 	if (gretl_model_get_int(orig, "no-squares")) {
738 	    myopt |= OPT_N;
739 	}
740     } else if (orig->ci == DPANEL) {
741 	param = gretl_model_get_data(orig, "istr");
742 	laglist = gretl_model_get_list(orig, "ylags");
743 	myopt |= retrieve_dpanel_opts(orig);
744     } else if (orig->ci == ARCH) {
745 	order = gretl_model_get_int(orig, "arch_order");
746     } else if (orig->ci == LOGIT || orig->ci == PROBIT) {
747 	if (orig->opt & OPT_P) {
748 	    /* p-values, not slopes, selected */
749 	    myopt |= OPT_P;
750 	} else if (gretl_model_get_int(orig, "ordered")) {
751 	    myopt |= OPT_D;
752 	} else if (gretl_model_get_int(orig, "multinom")) {
753 	    myopt |= OPT_M;
754 	} else if (orig->ci == PROBIT && (orig->opt & OPT_E)) {
755 	    /* random effects */
756 	    int qp = gretl_model_get_int(orig, "quadpoints");
757 
758 	    myopt |= (OPT_E | OPT_G);
759 	    set_optval_double(PROBIT, OPT_G, qp);
760 	}
761     } else if (orig->ci == PANEL) {
762 	if (gretl_model_get_int(orig, "pooled")) {
763 	    /* pooled OLS */
764 	    myopt |= OPT_P;
765 	} else if (orig->opt & OPT_U) {
766 	    /* random effects */
767 	    myopt |= OPT_U;
768 	} else if (orig->opt & OPT_H) {
769 	    /* unit weights */
770 	    myopt |= OPT_H;
771 	    if (gretl_model_get_int(orig, "iters")) {
772 		myopt |= OPT_I;
773 	    }
774 	}
775     } else if (orig->ci == LAD && gretl_model_get_int(orig, "rq")) {
776 	double x;
777 
778 	x = gretl_model_get_double(orig, "tau");
779 	sprintf(altparm, "%g", x);
780 	if (gretl_model_get_int(orig, "rq_nid")) {
781 	    myopt |= OPT_R;
782 	}
783 	x = gretl_model_get_double(orig, "rq_alpha");
784 	if (!na(x)) {
785 	    myopt |= OPT_I;
786 	    set_optval_double(QUANTREG, OPT_I, x);
787 	}
788     } else if (orig->ci == TOBIT) {
789 	double x;
790 
791 	x = gretl_model_get_double(orig, "llimit");
792 	if (!na(x)) {
793 	    myopt |= OPT_L;
794 	    set_optval_double(TOBIT, OPT_L, x);
795 	}
796 	x = gretl_model_get_double(orig, "rlimit");
797 	if (!na(x)) {
798 	    myopt |= OPT_M;
799 	    set_optval_double(TOBIT, OPT_M, x);
800 	}
801     } else if (orig->ci == IVREG) {
802 	transcribe_option_flags(&myopt, orig->opt, OPT_L | OPT_G);
803     }
804 
805     if (rep.errcode) {
806 	goto bailout;
807     }
808 
809  try_again:
810 
811     switch (orig->ci) {
812 
813     case AR:
814 	rep = ar_model(list, dset, myopt, NULL);
815 	break;
816     case AR1:
817 	rep = ar1_model(list, dset, myopt, NULL);
818 	break;
819     case DPANEL:
820 	rep = dpd_model(list, laglist, param, dset, myopt, prn);
821 	break;
822     case ARCH:
823 	rep = arch_model(list, order, dset, myopt);
824 	break;
825     case LOGIT:
826     case PROBIT:
827 	rep = logit_probit(list, dset, orig->ci, myopt, NULL);
828 	break;
829     case TOBIT:
830 	rep = tobit_driver(list, dset, myopt, NULL);
831 	break;
832     case LAD:
833 	if (gretl_model_get_int(orig, "rq")) {
834 	    rep = quantreg_driver(altparm, list, dset, myopt, NULL);
835 	} else {
836 	    rep = lad_model(list, dset, myopt);
837 	}
838 	break;
839     case POISSON:
840     case NEGBIN:
841 	rep = count_model(list, orig->ci, dset, myopt, NULL);
842 	break;
843     case DURATION:
844 	rep = duration_model(list, dset, myopt, NULL);
845 	break;
846     case HECKIT:
847 	rep = heckit_model(list, dset, myopt, NULL);
848 	break;
849     case IVREG:
850 	rep = ivreg(list, dset, myopt);
851 	break;
852     case LOGISTIC:
853 	{
854 	    double lmax = gretl_model_get_double(orig, "lmax");
855 
856 	    rep = logistic_model(list, lmax, dset, myopt);
857 	}
858 	break;
859     case PANEL:
860 	rep = panel_model(list, dset, myopt, prn);
861 	break;
862     case HSK:
863 	rep = hsk_model(list, dset, myopt);
864 	break;
865     default:
866 	/* handles OLS, WLS, etc. */
867 	if (gretl_model_get_int(orig, "pooled")) {
868 	    myopt |= OPT_P;
869 	    rep = panel_model(list, dset, myopt, prn);
870 	} else {
871 	    rep = lsq(list, dset, repci, myopt);
872 	}
873 	break;
874     }
875 
876 #if 0
877     fprintf(stderr, "replicate_estimator:\n"
878 	    " orig: t1=%d, t2=%d, nobs = %d\n"
879 	    " rep:  t1=%d, t2=%d, nobs = %d\n",
880 	    orig->t1, orig->t2, orig->nobs,
881 	    rep.t1, rep.t2, rep.nobs);
882 #endif
883 
884     /* check that we got the same sample as the original */
885     if (!rep.errcode && rep.nobs != orig->nobs) {
886 	if (first && obs_diff_ok(orig, &rep)) {
887 	    dset->t1 = orig->t1;
888 	    dset->t2 = orig->t2;
889 	    clear_model(&rep);
890 	    first = 0;
891 	    goto try_again;
892 	} else {
893 	    fprintf(stderr, "Original obs = %d but new = %d\n", orig->nobs, rep.nobs);
894 	    rep.errcode = E_DATA;
895 	}
896     }
897 
898  bailout:
899 
900     free(full_list);
901 
902     /* if the model count went up for an aux regression,
903        bring it back down */
904     if ((myopt & OPT_A) && get_model_count() > mc) {
905 	model_count_minus(NULL);
906     }
907 
908     return rep;
909 }
910 
add_residual_to_dataset(MODEL * pmod,DATASET * dset)911 static int add_residual_to_dataset (MODEL *pmod, DATASET *dset)
912 {
913     int err = 0;
914 
915     if (dataset_add_series(dset, 1)) {
916 	err = E_ALLOC;
917     } else {
918 	int t, v = dset->v - 1;
919 
920 	for (t=0; t<dset->n; t++) {
921 	    dset->Z[v][t] = pmod->uhat[t];
922 	}
923 
924 	strcpy(dset->varname[v], "uhat");
925 	series_set_label(dset, v, _("residual"));
926     }
927 
928     return err;
929 }
930 
nonlin_test_header(int code,PRN * prn)931 static void nonlin_test_header (int code, PRN *prn)
932 {
933     pputc(prn, '\n');
934     if (code == AUX_SQ) {
935 	pputs(prn, _("Non-linearity test (squared terms)"));
936     } else {
937 	pputs(prn, _("Non-linearity test (log terms)"));
938     }
939     pputs(prn, "\n\n");
940 }
941 
942 static int
real_nonlinearity_test(MODEL * pmod,int * list,DATASET * dset,int aux_code,gretlopt opt,PRN * prn)943 real_nonlinearity_test (MODEL *pmod, int *list,
944 			DATASET *dset, int aux_code,
945 			gretlopt opt, PRN *prn)
946 {
947     MODEL aux;
948     int err;
949 
950     err = add_residual_to_dataset(pmod, dset);
951     if (err) {
952 	return err;
953     }
954 
955     /* replace the dependent var */
956     list[1] = dset->v - 1;
957 
958     aux = lsq(list, dset, OLS, OPT_A);
959     if (aux.errcode) {
960 	err = aux.errcode;
961 	fprintf(stderr, "auxiliary regression failed\n");
962     } else {
963 	double pval, trsq = aux.rsq * aux.nobs;
964 	int df = aux.ncoeff - pmod->ncoeff;
965 
966 	if (df <= 0) {
967 	    /* couldn't add any regressors */
968 	    err = E_SINGULAR;
969 	    goto bailout;
970 	}
971 
972 	pval = chisq_cdf_comp(df, trsq);
973 	aux.aux = aux_code;
974 
975 	if (!(opt & OPT_I)) {
976 	    /* OPT_I = --silent */
977 	    if (opt & OPT_Q) {
978 		nonlin_test_header(aux_code, prn);
979 	    } else {
980 		printmodel(&aux, dset, OPT_NONE, prn);
981 		pputc(prn, '\n');
982 	    }
983 	    pprintf(prn, "  %s: TR^2 = %g,\n  ", _("Test statistic"), trsq);
984 	    pprintf(prn, "%s = P(%s(%d) > %g) = %g\n\n",
985 		    _("with p-value"), _("Chi-square"), df, trsq, pval);
986 	}
987 
988 	if (opt & OPT_S) {
989 	    ModelTest *test;
990 
991 	    test = model_test_new((aux_code == AUX_SQ)?
992 				  GRETL_TEST_SQUARES : GRETL_TEST_LOGS);
993 	    if (test != NULL) {
994 		model_test_set_teststat(test, GRETL_STAT_LM);
995 		model_test_set_dfn(test, df);
996 		model_test_set_value(test, trsq);
997 		model_test_set_pvalue(test, chisq_cdf_comp(df, trsq));
998 		maybe_add_test_to_model(pmod, test);
999 	    }
1000 	}
1001 
1002 	record_test_result(trsq, pval);
1003     }
1004 
1005  bailout:
1006 
1007     clear_model(&aux);
1008 
1009     return err;
1010 }
1011 
1012 /**
1013  * nonlinearity_test:
1014  * @pmod: pointer to original model.
1015  * @dset: dataset struct.
1016  * @aux: AUX_SQ for squares or AUX_LOG for logs
1017  * @opt: if contains OPT_S, save test results to model; if
1018  * contains OPT_I, run silently.
1019  * @prn: gretl printing struct.
1020  *
1021  * Run an auxiliary regression to test @pmod for nonlinearity,
1022  * via the addition of either squares or logs of the original
1023  * indepdendent variables.
1024  *
1025  * Returns: 0 on successful completion, error code on error.
1026  */
1027 
nonlinearity_test(MODEL * pmod,DATASET * dset,ModelAuxCode aux,gretlopt opt,PRN * prn)1028 int nonlinearity_test (MODEL *pmod, DATASET *dset, ModelAuxCode aux,
1029 		       gretlopt opt, PRN *prn)
1030 {
1031     int save_t1 = dset->t1;
1032     int save_t2 = dset->t2;
1033     int *tmplist = NULL;
1034     const int orig_nvar = dset->v;
1035     int err = 0;
1036 
1037     if (!command_ok_for_model(ADD, 0, pmod)) {
1038 	return E_NOTIMP;
1039     }
1040 
1041     if (pmod->ci == LOGISTIC || pmod->ci == LAD) {
1042 	return E_NOTIMP;
1043     }
1044 
1045     /* check for changes in original list members */
1046     err = list_members_replaced(pmod, dset);
1047     if (err) {
1048 	return err;
1049     }
1050 
1051     /* re-impose the sample that was in force when the original model
1052        was estimated */
1053     impose_model_smpl(pmod, dset);
1054 
1055     /* add squares or logs */
1056     tmplist = augment_regression_list(pmod->list, aux, dset, &err);
1057     if (err) {
1058 	return err;
1059     } else if (tmplist[0] == pmod->list[0]) {
1060 	/* no vars were added */
1061 	if (aux == AUX_SQ) {
1062 	    fprintf(stderr, "gretl: generation of squares failed\n");
1063 	    err = E_SQUARES;
1064 	} else if (aux == AUX_LOG) {
1065 	    fprintf(stderr, "gretl: generation of logs failed\n");
1066 	    err = E_LOGS;
1067 	}
1068     }
1069 
1070     if (!err) {
1071 	err = real_nonlinearity_test(pmod, tmplist, dset, aux,
1072 				     opt, prn);
1073     }
1074 
1075     /* trash any extra variables generated (squares, logs) */
1076     dataset_drop_last_variables(dset, dset->v - orig_nvar);
1077 
1078     /* put back into dset what was there on input */
1079     dset->t1 = save_t1;
1080     dset->t2 = save_t2;
1081 
1082     free(tmplist);
1083 
1084     return err;
1085 }
1086 
add_vars_missing(const MODEL * pmod,const int * addvars,const DATASET * dset)1087 static int add_vars_missing (const MODEL *pmod,
1088 			     const int *addvars,
1089 			     const DATASET *dset)
1090 {
1091     int i, vi, t;
1092 
1093     for (t=pmod->t1; t<=pmod->t2; t++) {
1094 	if (model_missing(pmod, t) ||
1095 	    (pmod->yhat != NULL && na(pmod->yhat[t]))) {
1096 	    continue;
1097 	}
1098 	for (i=1; i<=addvars[0]; i++) {
1099 	    vi = addvars[i];
1100 	    if (na(dset->Z[vi][t])) {
1101 		fprintf(stderr, "add: obs %d OK in model but missing for series %s\n",
1102 			t+1, dset->varname[vi]);
1103 		return E_MISSDATA;
1104 	    }
1105 	}
1106     }
1107 
1108     return 0;
1109 }
1110 
LM_add_test(MODEL * pmod,DATASET * dset,int * list,gretlopt opt,PRN * prn)1111 static MODEL LM_add_test (MODEL *pmod, DATASET *dset, int *list,
1112 			  gretlopt opt, PRN *prn)
1113 {
1114     MODEL aux;
1115     int err;
1116 
1117     err = add_residual_to_dataset(pmod, dset);
1118     if (err) {
1119 	gretl_model_init(&aux, dset);
1120 	aux.errcode = err;
1121 	return aux;
1122     }
1123 
1124     list[1] = dset->v - 1;
1125 
1126     aux = lsq(list, dset, OLS, OPT_A | OPT_Z);
1127 
1128     if (aux.errcode) {
1129 	fprintf(stderr, "auxiliary regression failed\n");
1130     } else {
1131 	int df = aux.ncoeff - pmod->ncoeff;
1132 
1133 	if (df <= 0) {
1134 	    /* couldn't add any regressors */
1135 	    aux.errcode = E_SINGULAR;
1136 	} else {
1137 	    if (!(opt & (OPT_Q | OPT_I))) {
1138 		aux.aux = AUX_ADD;
1139 		printmodel(&aux, dset, OPT_S, prn);
1140 	    }
1141 	}
1142     }
1143 
1144     return aux;
1145 }
1146 
1147 /**
1148  * add_test_full:
1149  * @orig: pointer to original model.
1150  * @pmod: pointer to receive augmented model.
1151  * @addvars: list of variables to add to original model.
1152  * @dset: dataset struct.
1153  * @opt: can contain OPT_Q (quiet) to suppress printing
1154  * of the new model, OPT_O to print its covariance matrix,
1155  * OPT_I for silent operation.
1156  * @prn: gretl printing struct.
1157  *
1158  * Re-estimate a given model after adding the specified
1159  * variables, and records a joint test on the additional
1160  * variables.
1161  *
1162  * Returns: 0 on successful completion, error code on error.
1163  */
1164 
add_test_full(MODEL * orig,MODEL * pmod,const int * addvars,DATASET * dset,gretlopt opt,PRN * prn)1165 int add_test_full (MODEL *orig, MODEL *pmod, const int *addvars,
1166 		   DATASET *dset, gretlopt opt, PRN *prn)
1167 {
1168     MODEL umod;
1169     int save_t1 = dset->t1;
1170     int save_t2 = dset->t2;
1171     int *biglist = NULL;
1172     const int orig_nvar = dset->v;
1173     int n_add = 0;
1174     int err = 0;
1175 
1176     if (orig == NULL || orig->list == NULL || addvars == NULL) {
1177 	return E_DATA;
1178     }
1179 
1180     n_add = addvars[0];
1181     if (n_add == 0) {
1182 	return E_NOADD;
1183     }
1184 
1185     if (!command_ok_for_model(ADD, opt, orig)) {
1186 	return E_NOTIMP;
1187     }
1188 
1189     if ((opt & OPT_L) && pmod != NULL) {
1190 	/* --lm option incompatible with "full" variant of add test */
1191 	return E_BADOPT;
1192     }
1193 
1194     if (exact_fit_check(orig, prn)) {
1195 	return 0;
1196     }
1197 
1198     /* check for changes in original list members */
1199     err = list_members_replaced(orig, dset);
1200     if (err) {
1201 	return err;
1202     }
1203 
1204     /* check for NAs in add list relative to model */
1205     err = add_vars_missing(orig, addvars, dset);
1206     if (err) {
1207 	return err;
1208     }
1209 
1210     /* create augmented regression list */
1211     if (orig->ci == IVREG) {
1212 	biglist = ivreg_list_add(orig->list, addvars, opt, &err);
1213     } else if (orig->ci == DPANEL) {
1214 	biglist = panel_list_add(orig, addvars, &err);
1215     } else {
1216 	biglist = gretl_list_add(orig->list, addvars, &err);
1217     }
1218 
1219     if (err) {
1220 	return err;
1221     }
1222 
1223     /* impose the sample range in force when the
1224        original model was estimated
1225     */
1226     impose_model_smpl(orig, dset);
1227 
1228     if (opt & OPT_L) {
1229 	/* run an LM test */
1230 	umod = LM_add_test(orig, dset, biglist, opt, prn);
1231     } else {
1232 	/* Run augmented regression, matching the original estimation
1233 	   method; use OPT_Z to suppress the elimination of perfectly
1234 	   collinear variables.
1235 	*/
1236 	gretlopt ropt = OPT_Z;
1237 
1238 	if (opt & (OPT_Q | OPT_I)) {
1239 	    /* not printing @umod, so pass along OPT_Q */
1240 	    ropt |= OPT_Q;
1241 	}
1242 	umod = replicate_estimator(orig, biglist, dset, ropt, prn);
1243     }
1244 
1245     if (umod.errcode) {
1246 	err = umod.errcode;
1247 	errmsg(err, prn);
1248     } else if (umod.ncoeff - orig->ncoeff != n_add) {
1249 	gretl_errmsg_sprintf("Failed to add %d variable(s)", n_add);
1250 	err = E_DATA;
1251     }
1252 
1253 #if 1
1254     if (!err) {
1255 	err = add_or_omit_compare(orig, &umod, addvars,
1256 				  dset, ADD, opt, prn);
1257     }
1258 #else
1259     if (!err) {
1260 	int *addlist = gretl_list_diff_new(umod.list, orig->list, 2);
1261 
1262 	if (addlist != NULL) {
1263 	    err = add_or_omit_compare(orig, &umod, addlist,
1264 				      dset, ADD, opt, prn);
1265 	    free(addlist);
1266 	}
1267     }
1268 #endif
1269 
1270     if (err || pmod == NULL) {
1271 	clear_model(&umod);
1272     } else {
1273 	*pmod = umod;
1274     }
1275 
1276     /* put dset back as it was on input */
1277     dataset_drop_last_variables(dset, dset->v - orig_nvar);
1278     dset->t1 = save_t1;
1279     dset->t2 = save_t2;
1280 
1281     free(biglist);
1282 
1283     return err;
1284 }
1285 
1286 /**
1287  * add_test:
1288  * @pmod: pointer to model to be tested.
1289  * @addvars: list of variables to test.
1290  * @dset: dataset struct.
1291  * @opt: can contain OPT_Q (quiet) to suppress printing
1292  * of the auxiliary model, OPT_I to suppress all printing
1293  * of results.
1294  * @prn: gretl printing struct.
1295  *
1296  * Performs an LM test on @pmod for the null hypothesis
1297  * that the @addvars variables do not contribute
1298  * significant explanatory power.
1299  *
1300  * Returns: 0 on successful completion, error code on error.
1301  */
1302 
add_test(MODEL * pmod,const int * addvars,DATASET * dset,gretlopt opt,PRN * prn)1303 int add_test (MODEL *pmod, const int *addvars,
1304 	      DATASET *dset, gretlopt opt, PRN *prn)
1305 {
1306     return add_test_full(pmod, NULL, addvars, dset, opt, prn);
1307 }
1308 
wald_omit_test(const int * list,MODEL * pmod,const DATASET * dset,gretlopt opt,PRN * prn)1309 static int wald_omit_test (const int *list, MODEL *pmod,
1310 			   const DATASET *dset, gretlopt opt,
1311 			   PRN *prn)
1312 {
1313     int *test = NULL;
1314     int err = 0;
1315 
1316     /* test validity of omissions */
1317 
1318     if (pmod->ci == IVREG) {
1319 	test = ivreg_list_omit(pmod->list, list, opt, &err);
1320     } else if (pmod->ci == PANEL || pmod->ci == DPANEL) {
1321 	test = panel_list_omit(pmod, list, &err);
1322     } else {
1323 	test = gretl_list_omit(pmod->list, list, 2, &err);
1324     }
1325 
1326     if (!err) {
1327 	free(test);
1328 	err = add_or_omit_compare(pmod, NULL, list, dset, OMIT,
1329 				  opt, prn);
1330     }
1331 
1332     return err;
1333 }
1334 
1335 /* Check whether coefficient @i corresponds to a variable
1336    that is removable from the model: this is the case if either
1337    (a) the @cands list is empty, or (b) coefficient @i is the
1338    coefficient on one of the variables in @cands.
1339 */
1340 
coeff_is_removable(const int * cands,const MODEL * pmod,DATASET * dset,int i)1341 static int coeff_is_removable (const int *cands, const MODEL *pmod,
1342 			       DATASET *dset, int i)
1343 {
1344     int ret = 1;
1345 
1346     if (cands != NULL && cands[0] > 0) {
1347 	const char *vname;
1348 	int j, pj;
1349 
1350 	ret = 0; /* reverse the presumption */
1351 
1352 	for (j=1; j<=cands[0]; j++) {
1353 	    vname = dset->varname[cands[j]];
1354 	    pj = gretl_model_get_param_number(pmod, dset, vname);
1355 	    if (pj == i) {
1356 		ret = 1;
1357 		break;
1358 	    }
1359 	}
1360     }
1361 
1362     return ret;
1363 }
1364 
1365 /* Determine if @pmod contains a variable with p-value
1366    greater than some cutoff alpha_max; and if so, remove
1367    this variable from @list.
1368 
1369    If the list @cands is non-empty then confine the search
1370    to candidate variables in that list.
1371 
1372    Returns 1 if a variable was dropped, else 0.
1373 */
1374 
auto_drop_var(const MODEL * pmod,int * list,const int * cands,DATASET * dset,double alpha_max,int starting,gretlopt opt,PRN * prn)1375 static int auto_drop_var (const MODEL *pmod,
1376 			  int *list, const int *cands,
1377 			  DATASET *dset, double alpha_max,
1378 			  int starting, gretlopt opt,
1379 			  PRN *prn)
1380 {
1381     double tstat, pv = 0.0, tmin = 4.0;
1382     int imin, imax = pmod->ncoeff;
1383     int i, k = -1;
1384     int ret = 0;
1385 
1386     if ((pmod->ci == LOGIT || pmod->ci == PROBIT) &&
1387 	gretl_model_get_int(pmod, "ordered")) {
1388 	/* FIXME other problematic cases? */
1389 	imax = pmod->list[0] - 1;
1390     }
1391 
1392     /* if the constant is the sole regressors, allow it
1393        to be dropped */
1394     imin = (pmod->ncoeff == 1)? 0 : pmod->ifc;
1395 
1396     for (i=imin; i<imax; i++) {
1397 	if (coeff_is_removable(cands, pmod, dset, i)) {
1398 	    tstat = fabs(pmod->coeff[i] / pmod->sderr[i]);
1399 	    if (tstat < tmin) {
1400 		tmin = tstat;
1401 		k = i;
1402 	    }
1403 	}
1404     }
1405 
1406     if (k >= 0) {
1407 	pv = coeff_pval(pmod->ci, tmin, pmod->dfd);
1408     }
1409 
1410     if (pv > alpha_max) {
1411 	char pname[VNAMELEN];
1412 	int err;
1413 
1414 	if (starting && !(opt & OPT_I)) {
1415 	    /* not --silent */
1416 	    pputc(prn, '\n');
1417 	    pprintf(prn, _("Sequential elimination using two-sided alpha = %.2f"),
1418 		    alpha_max);
1419 	    pputs(prn, "\n\n");
1420 	}
1421 
1422 	gretl_model_get_param_name(pmod, dset, k, pname);
1423 	err = gretl_list_delete_at_pos(list, k + 2);
1424 	if (!err) {
1425 	    if (!(opt & OPT_I)) {
1426 		pprintf(prn, _(" Dropping %-16s (p-value %.3f)\n"), pname, pv);
1427 	    }
1428 	    ret = 1;
1429 	}
1430     }
1431 
1432     return ret;
1433 }
1434 
list_copy_values(int * targ,const int * src)1435 static void list_copy_values (int *targ, const int *src)
1436 {
1437     int i;
1438 
1439     for (i=0; i<=src[0]; i++) {
1440 	targ[i] = src[i];
1441     }
1442 }
1443 
1444 /* run a loop in which the least significant variable is dropped
1445    from the regression list, provided its p-value exceeds some
1446    specified cutoff.  FIXME this probably still needs work for
1447    estimators other than OLS.  If @omitlist is non-empty the
1448    routine is confined to members of the list.
1449 */
1450 
auto_omit(MODEL * orig,const int * omitlist,DATASET * dset,gretlopt opt,PRN * prn)1451 static MODEL auto_omit (MODEL *orig, const int *omitlist,
1452 			DATASET *dset, gretlopt opt,
1453 			PRN *prn)
1454 {
1455     MODEL omod;
1456     double amax;
1457     int *tmplist;
1458     int allgone = 0;
1459     int i, drop;
1460     int err = 0;
1461 
1462     gretl_model_init(&omod, dset);
1463 
1464     tmplist = gretl_list_copy(orig->list);
1465     if (tmplist == NULL) {
1466 	omod.errcode = E_ALLOC;
1467 	return omod;
1468     }
1469 
1470     amax = get_optval_double(OMIT, OPT_A, &err);
1471     if (err) {
1472 	omod.errcode = err;
1473 	return omod;
1474     }
1475 
1476     if (na(amax) || amax <= 0.0 || amax >= 1.0) {
1477 	amax = 0.10;
1478     }
1479 
1480     drop = auto_drop_var(orig, tmplist, omitlist, dset, amax,
1481 			 1, opt, prn);
1482     if (!drop) {
1483 	/* nothing was dropped: set a "benign" error code */
1484 	err = omod.errcode = E_NOOMIT;
1485     }
1486 
1487     for (i=0; drop > 0 && !allgone; i++) {
1488 	if (i > 0) {
1489 	    set_reference_missmask_from_model(orig);
1490 	}
1491 	omod = replicate_estimator(orig, tmplist, dset, OPT_A, prn);
1492 	err = omod.errcode;
1493 	if (err) {
1494 	    fprintf(stderr, "auto_omit: error %d from replicate_estimator\n",
1495 		    err);
1496 	    drop = 0; /* will break */
1497 	} else {
1498 	    list_copy_values(tmplist, omod.list);
1499 	    drop = auto_drop_var(&omod, tmplist, omitlist, dset,
1500 				 amax, 0, opt, prn);
1501 	    if (drop && omod.ncoeff == 1) {
1502 		allgone = 1; /* will break */
1503 	    }
1504 	    clear_model(&omod);
1505 	}
1506     }
1507 
1508     if (allgone) {
1509 	pputc(prn, '\n');
1510 	pprintf(prn, _("No coefficient has a p-value less than %g"), amax);
1511 	pputc(prn, '\n');
1512     } else if (!err) {
1513 	/* re-estimate the final model without the auxiliary flag */
1514 	gretlopt ropt = OPT_NONE;
1515 
1516 	if (opt & (OPT_Q | OPT_I)) {
1517 	    /* not printing @omod, so pass along OPT_Q */
1518 	    ropt |= OPT_Q;
1519 	}
1520 	set_reference_missmask_from_model(orig);
1521 	omod = replicate_estimator(orig, tmplist, dset, ropt, prn);
1522     }
1523 
1524     free(tmplist);
1525 
1526     return omod;
1527 }
1528 
1529 /* create reduced list for "omit" test on model, based on
1530    the list of variables to be dropped, @omitvars
1531 */
1532 
make_short_list(MODEL * orig,const int * omitvars,gretlopt opt,int ** plist)1533 static int make_short_list (MODEL *orig, const int *omitvars,
1534 			    gretlopt opt, int **plist)
1535 {
1536     int *list = NULL;
1537     int err = 0;
1538 
1539     if (omitvars == NULL || omitvars[0] == 0) {
1540 	return E_PARSE;
1541     }
1542 
1543     if (orig->ci == IVREG) {
1544 	list = ivreg_list_omit(orig->list, omitvars, opt, &err);
1545     } else if (orig->ci == PANEL || orig->ci == DPANEL) {
1546 	list = panel_list_omit(orig, omitvars, &err);
1547     } else {
1548 	list = gretl_list_omit(orig->list, omitvars, 2, &err);
1549     }
1550 
1551     if (list != NULL && list[0] == 1) {
1552 	/* only the dependent variable would be left */
1553 	err = E_NOVARS;
1554     }
1555 
1556     *plist = list;
1557 
1558     return err;
1559 }
1560 
omit_options_inconsistent(MODEL * pmod,gretlopt opt)1561 static int omit_options_inconsistent (MODEL *pmod, gretlopt opt)
1562 {
1563     if (opt & OPT_B) {
1564 	/* 2sls: omitting variable as instrument */
1565 	if (opt & (OPT_W | OPT_A)) {
1566 	    /* can't use Wald method on original VCV,
1567 	       and can't do --auto */
1568 	    return 1;
1569 	} else if (pmod->ci != IVREG) {
1570 	    return 1;
1571 	}
1572     }
1573 
1574     if ((opt & OPT_A) && (opt & OPT_W)) {
1575 	/* --auto and --wald options incompatible */
1576 	return 1;
1577     }
1578 
1579     return 0;
1580 }
1581 
omit_test_precheck(MODEL * pmod,gretlopt opt)1582 static int omit_test_precheck (MODEL *pmod, gretlopt opt)
1583 {
1584     int err = 0;
1585 
1586     if (pmod == NULL || pmod->list == NULL) {
1587 	err = E_DATA;
1588     } else if (!command_ok_for_model(OMIT, 0, pmod)) {
1589 	err = E_NOTIMP;
1590     } else if (omit_options_inconsistent(pmod, opt)) {
1591 	err = E_BADOPT;
1592     }
1593 
1594     return err;
1595 }
1596 
1597 /**
1598  * omit_test_full:
1599  * @orig: pointer to original model.
1600  * @pmod: pointer to receive new model, with vars omitted.
1601  * @omitvars: list of variables to omit from original model.
1602  * @dset: dataset struct.
1603  * @opt: can contain OPT_Q (quiet) to suppress printing
1604  * of the new model, OPT_O to print its covariance matrix,
1605  * OPT_I for silent operation; for OPT_A, see below.
1606  * @prn: gretl printing struct.
1607  *
1608  * Re-estimate a given model after removing the variables
1609  * specified in @omitvars.  Or if OPT_A is given, proceed
1610  * sequentially, at each step dropping the least significant
1611  * variable provided its p-value is above a certain threshold
1612  * (currently 0.10, two-sided).
1613  *
1614  * Returns: 0 on successful completion, error code on error.
1615  */
1616 
omit_test_full(MODEL * orig,MODEL * pmod,const int * omitvars,DATASET * dset,gretlopt opt,PRN * prn)1617 int omit_test_full (MODEL *orig, MODEL *pmod, const int *omitvars,
1618 		    DATASET *dset, gretlopt opt, PRN *prn)
1619 {
1620     MODEL rmod;
1621     int save_t1 = dset->t1;
1622     int save_t2 = dset->t2;
1623     int *tmplist = NULL;
1624     int err;
1625 
1626     err = omit_test_precheck(orig, opt);
1627     if (err) {
1628 	return err;
1629     }
1630 
1631     /* check that vars to omit have not been redefined */
1632     if ((err = list_members_replaced(orig, dset))) {
1633 	return err;
1634     }
1635 
1636     if (!(opt & OPT_A)) {
1637 	/* not doing auto-omit */
1638 	err = make_short_list(orig, omitvars, opt, &tmplist);
1639 	if (err) {
1640 	    free(tmplist);
1641 	    return err;
1642 	}
1643     }
1644 
1645     /* impose the sample range used for the original model */
1646     impose_model_smpl(orig, dset);
1647 
1648     /* set the mask for missing obs within the sample range, based
1649        on the original model */
1650     set_reference_missmask_from_model(orig);
1651 
1652     if (opt & OPT_A) {
1653 	rmod = auto_omit(orig, omitvars, dset, opt, prn);
1654     } else {
1655 	gretlopt ropt = OPT_NONE;
1656 
1657 	if (opt & (OPT_Q | OPT_I)) {
1658 	    /* not printing @rmod, so pass along OPT_Q */
1659 	    ropt |= OPT_Q;
1660 	}
1661 	rmod = replicate_estimator(orig, tmplist, dset, ropt, prn);
1662     }
1663 
1664     err = rmod.errcode;
1665 
1666     if (err) {
1667 	if (err == E_NOOMIT && (opt & OPT_I)) {
1668 	    ; /* --silent: keep quiet */
1669 	} else {
1670 	    errmsg(err, prn);
1671 	}
1672     } else {
1673 	MODEL *newmod = NULL;
1674 	int minpos = 2;
1675 	int *omitlist = NULL;
1676 
1677 	if (orig->ci == DPANEL) {
1678 	    /* skip AR spec, separator, and dep var */
1679 	    minpos = 4;
1680 	}
1681 
1682 	if (rmod.list == NULL) {
1683 	    /* handle the case where sequential elimination
1684 	       ended up dropping all regressors */
1685 	    int *minlist = gretl_list_new(1);
1686 
1687 	    minlist[1] = orig->list[1];
1688 	    omitlist = gretl_list_diff_new(orig->list, minlist, minpos);
1689 	    free(minlist);
1690 	} else {
1691 	    newmod = &rmod;
1692 	    omitlist = gretl_list_diff_new(orig->list, rmod.list, minpos);
1693 	}
1694 	if (omitlist != NULL) {
1695 	    err = add_or_omit_compare(orig, newmod, omitlist,
1696 				      dset, OMIT, opt, prn);
1697 	    free(omitlist);
1698 	}
1699     }
1700 
1701     if (err || pmod == NULL) {
1702 	clear_model(&rmod);
1703     } else {
1704 	*pmod = rmod;
1705     }
1706 
1707     /* put back into dset what was there on input */
1708     dset->t1 = save_t1;
1709     dset->t2 = save_t2;
1710 
1711     free(tmplist);
1712 
1713     return err;
1714 }
1715 
1716 /**
1717  * omit_test:
1718  * @pmod: pointer to model to be tested.
1719  * @omitvars: list of variables to test.
1720  * @dset: dataset struct.
1721  * @opt: can contain OPT_Q (quiet) to suppress printing
1722  * of results.
1723  * @prn: gretl printing struct.
1724  *
1725  * Performs a Wald test on @pmod for the null hypothesis
1726  * that the @omitvars variables do not contribute
1727  * explanatory power.
1728  *
1729  * Returns: 0 on successful completion, error code on error.
1730  */
1731 
omit_test(MODEL * pmod,const int * omitvars,DATASET * dset,gretlopt opt,PRN * prn)1732 int omit_test (MODEL *pmod, const int *omitvars,
1733 	       DATASET *dset, gretlopt opt, PRN *prn)
1734 {
1735     int err = omit_test_precheck(pmod, opt);
1736 
1737     if (!err) {
1738 	err = wald_omit_test(omitvars, pmod, dset, opt, prn);
1739     }
1740 
1741     return err;
1742 }
1743 
1744 /**
1745  * get_DW_pvalue_for_model:
1746  * @pmod: model to be tested.
1747  * @dset: dataset struct.
1748  * @err: location to receive error code.
1749  *
1750  * Computes the p-value for the Durbin-Watson statistic for the
1751  * given model, using the Imhof method.
1752  *
1753  * Returns: the p-value, or #NADBL on error.
1754  */
1755 
get_DW_pvalue_for_model(MODEL * pmod,DATASET * dset,int * err)1756 double get_DW_pvalue_for_model (MODEL *pmod, DATASET *dset,
1757 				int *err)
1758 {
1759     MODEL dwmod;
1760     int save_t1 = dset->t1;
1761     int save_t2 = dset->t2;
1762     int *list = NULL;
1763     double pv = NADBL;
1764 
1765     /* maybe this has already been done? */
1766     pv = gretl_model_get_double(pmod, "dw_pval");
1767     if (!na(pv)) {
1768 	return pv;
1769     }
1770 
1771     if (pmod->ci == PANEL && panel_DW_pval_ok(pmod)) {
1772 	/* Use Bhargava et al approximation */
1773 	return BFN_panel_DW_pvalue(pmod, dset, err);
1774     }
1775 
1776     if (dset == NULL || dset->Z == NULL) {
1777 	*err = E_NODATA;
1778     } else if (pmod == NULL || pmod->list == NULL) {
1779 	*err = E_DATA;
1780     } else if ((pmod->ci != OLS && pmod->ci != PANEL) ||
1781 	       na(pmod->dw) || model_has_missing_obs(pmod)) {
1782 	*err = E_BADSTAT;
1783     } else {
1784 	/* check that relevant vars have not been redefined */
1785 	*err = list_members_replaced(pmod, dset);
1786     }
1787 
1788     if (!*err) {
1789 	list = gretl_list_copy(pmod->list);
1790 	if (list == NULL) {
1791 	    *err = E_ALLOC;
1792 	}
1793     }
1794 
1795     if (*err) {
1796 	return NADBL;
1797     }
1798 
1799     gretl_model_init(&dwmod, dset);
1800 
1801     /* impose the sample range used for the original model */
1802     impose_model_smpl(pmod, dset);
1803 
1804     dwmod = replicate_estimator(pmod, list, dset, OPT_A | OPT_I, NULL);
1805     *err = dwmod.errcode;
1806 
1807     if (!*err) {
1808 	pv = gretl_model_get_double(&dwmod, "dw_pval");
1809 	if (!na(pv)) {
1810 	    /* record result on the incoming model */
1811 	    gretl_model_set_double(pmod, "dw_pval", pv);
1812 	}
1813     }
1814 
1815     /* put back into dset what was there on input */
1816     dset->t1 = save_t1;
1817     dset->t2 = save_t2;
1818 
1819     clear_model(&dwmod);
1820     free(list);
1821 
1822     return pv;
1823 }
1824 
1825 /**
1826  * reset_test:
1827  * @pmod: pointer to model to be tested.
1828  * @dset: dataset struct.
1829  * @opt: if contains %OPT_S, save test results to model. %OPT_Q
1830  * suppresses the printout of the auxiliary regression. %OPT_R and
1831  * %OPT_C stand for "squares only" and "cubes only", respectively.
1832  * %OPT_I produces silent operation.
1833  * @prn: gretl printing struct.
1834  *
1835  * Carries out and prints Ramsey's RESET test for model specification.
1836  *
1837  * Returns: 0 on successful completion, error code on error.
1838  */
1839 
reset_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)1840 int reset_test (MODEL *pmod, DATASET *dset,
1841 		gretlopt opt, PRN *prn)
1842 {
1843     int *newlist = NULL;
1844     MODEL aux;
1845     double RF;
1846     int save_t1 = dset->t1;
1847     int save_t2 = dset->t2;
1848     int i, t, orig_v = dset->v;
1849     int addv, use_square, use_cube;
1850     const char *mode;
1851     int err = 0;
1852 
1853     if (pmod->ci != OLS) {
1854 	return E_OLSONLY;
1855     }
1856 
1857     err = incompatible_options(opt, OPT_C | OPT_R);
1858 
1859     if (err) {
1860 	return err;
1861     }
1862 
1863     if (exact_fit_check(pmod, prn)) {
1864 	return 0;
1865     }
1866 
1867     use_square = !(opt & OPT_C); /* not cubes-only */
1868     use_cube = !(opt & OPT_R);   /* not squares-only */
1869 
1870     gretl_model_init(&aux, dset);
1871 
1872     if (opt & OPT_R) {
1873 	addv = 1;
1874 	mode = N_("squares only");
1875     } else if (opt & OPT_C) {
1876 	addv = 1;
1877 	mode = N_("cubes only");
1878     } else {
1879 	addv = 2;
1880 	mode = N_("squares and cubes");
1881     }
1882 
1883     impose_model_smpl(pmod, dset);
1884 
1885     if (pmod->ncoeff + addv >= dset->t2 - dset->t1) {
1886 	/* can't run aux regression */
1887 	err = E_DF;
1888     }
1889 
1890     if (!err) {
1891 	newlist = gretl_list_new(pmod->list[0] + addv);
1892 	if (newlist == NULL) {
1893 	    err = E_ALLOC;
1894 	}
1895     }
1896 
1897     if (!err) {
1898 	for (i=1; i<=pmod->list[0]; i++) {
1899 	    newlist[i] = pmod->list[i];
1900 	}
1901 	if (dataset_add_series(dset, addv)) {
1902 	    err = E_ALLOC;
1903 	}
1904     }
1905 
1906     if (!err) {
1907 	/* add yhat^2 and/or yhat^3 to data set */
1908 	int vs = orig_v;
1909 	int vc = (opt & OPT_C)? orig_v : orig_v + 1;
1910 	int k = pmod->list[0] + 1;
1911 
1912 	for (t = pmod->t1; t<=pmod->t2; t++) {
1913 	    double x = pmod->yhat[t];
1914 
1915 	    if (use_square) {
1916 		dset->Z[vs][t] = x * x;
1917 	    }
1918 	    if (use_cube) {
1919 		dset->Z[vc][t] = x * x * x;
1920 	    }
1921 	}
1922 
1923 	if (use_square) {
1924 	    strcpy(dset->varname[vs], "yhat^2");
1925 	    newlist[k++] = vs;
1926 	}
1927 	if (use_cube) {
1928 	    strcpy(dset->varname[vc], "yhat^3");
1929 	    newlist[k++] = vc;
1930 	}
1931     }
1932 
1933     if (!err) {
1934 	aux = lsq(newlist, dset, OLS, OPT_A);
1935 	err = aux.errcode;
1936 	if (err) {
1937 	    errmsg(aux.errcode, prn);
1938 	}
1939     }
1940 
1941     if (!err) {
1942 	int silent = (opt & OPT_I);
1943 	double pval;
1944 
1945 	aux.aux = AUX_RESET;
1946 
1947 	if (!silent) {
1948 	    if (!(opt & OPT_Q)) {
1949 		printmodel(&aux, dset, OPT_NONE, prn);
1950 	    } else {
1951 		if (!(opt & OPT_G)) {
1952 		    /* GUI special; see gui2/library.c */
1953 		    pputc(prn, '\n');
1954 		}
1955 		pputs(prn, _("RESET test for specification"));
1956 		pprintf(prn, " (%s)\n", _(mode));
1957 	    }
1958 	}
1959 
1960 	RF = ((pmod->ess - aux.ess) / addv) / (aux.ess / aux.dfd);
1961 	pval = snedecor_cdf_comp(addv, aux.dfd, RF);
1962 
1963 	if (!silent) {
1964 	    pprintf(prn, "%s: F = %f,\n", _("Test statistic"), RF);
1965 	    pprintf(prn, "%s = P(F(%d,%d) > %g) = %.3g\n", _("with p-value"),
1966 		    addv, aux.dfd, RF, pval);
1967 	    pputc(prn, '\n');
1968 	}
1969 
1970 	if (opt & OPT_S) {
1971 	    ModelTest *test = model_test_new(GRETL_TEST_RESET);
1972 	    gretlopt topt = OPT_NONE;
1973 
1974 	    if (test != NULL) {
1975 		if (opt & OPT_R) {
1976 		    topt = OPT_R;
1977 		} else if (opt & OPT_C) {
1978 		    topt = OPT_C;
1979 		}
1980 		model_test_set_teststat(test, GRETL_STAT_RESET);
1981 		model_test_set_dfn(test, addv);
1982 		model_test_set_dfd(test, aux.dfd);
1983 		model_test_set_value(test, RF);
1984 		model_test_set_pvalue(test, pval);
1985 		model_test_set_opt(test, topt);
1986 		maybe_add_test_to_model(pmod, test);
1987 	    }
1988 	}
1989 
1990 	record_test_result(RF, pval);
1991     }
1992 
1993     free(newlist);
1994     dataset_drop_last_variables(dset, addv);
1995     clear_model(&aux);
1996 
1997     dset->t1 = save_t1;
1998     dset->t2 = save_t2;
1999 
2000     return err;
2001 }
2002 
bg_test_header(int order,PRN * prn,int ivreg)2003 static void bg_test_header (int order, PRN *prn, int ivreg)
2004 {
2005     if (ivreg) {
2006 	pprintf(prn, "\n%s ", _("Godfrey (1994) test for"));
2007     } else {
2008 	pprintf(prn, "\n%s ", _("Breusch-Godfrey test for"));
2009     }
2010 
2011     if (order > 1) {
2012 	pprintf(prn, "%s %d\n", _("autocorrelation up to order"),
2013 		order);
2014     } else {
2015 	pprintf(prn, "%s\n", _("first-order autocorrelation"));
2016     }
2017 
2018     pputc(prn, '\n');
2019 }
2020 
ivreg_autocorr_wald_stat(MODEL * aux,int order,int * err)2021 static double ivreg_autocorr_wald_stat (MODEL *aux, int order, int *err)
2022 {
2023     gretl_vector *b = gretl_vector_alloc(order);
2024     gretl_matrix *V1 = gretl_matrix_alloc(order, order);
2025     gretl_vector *WT = gretl_vector_alloc(1);
2026     gretl_matrix *V0 = NULL;
2027     double x = NADBL;
2028     int i, j, ki, kj;
2029 
2030     if (b == NULL || V1 == NULL || WT == NULL) {
2031 	*err = E_ALLOC;
2032     } else {
2033 	V0 = gretl_model_get_matrix(aux, M_VCV, err);
2034     }
2035 
2036     if (!*err) {
2037 	ki = aux->ncoeff - order;
2038 
2039 	for (i=0; i<order; i++) {
2040 	    x = aux->coeff[ki];
2041 	    gretl_vector_set(b, i, x);
2042 	    kj = ki;
2043 	    for (j=i; j<order; j++) {
2044 		x = gretl_matrix_get(V0, ki, kj);
2045 		gretl_matrix_set(V1, i, j, x);
2046 		gretl_matrix_set(V1, j, i, x);
2047 		kj++;
2048 	    }
2049 	    ki++;
2050 	}
2051 	*err = gretl_invert_symmetric_matrix(V1);
2052     }
2053 
2054     if (!*err) {
2055 	gretl_matrix_qform(b, GRETL_MOD_NONE, V1, WT, GRETL_MOD_NONE);
2056 	x = gretl_vector_get(WT, 0) / order;
2057     }
2058 
2059     gretl_vector_free(WT);
2060     gretl_vector_free(b);
2061     gretl_matrix_free(V1);
2062     gretl_matrix_free(V0);
2063 
2064     return x;
2065 }
2066 
2067 /**
2068  * ivreg_autocorr_test:
2069  * @pmod: pointer to model to be tested.
2070  * @order: lag order for test.
2071  * @dset: dataset struct.
2072  * @opt: if flags include OPT_S, save test results to model;
2073  * if OPT_Q, be less verbose.
2074  * @prn: gretl printing struct.
2075  *
2076  * Tests the given IV model for autocorrelation of order equal
2077  * to the specified value, or equal to the frequency of the data if
2078  * the supplied @order is zero, as per Godfrey (1994), "Testing for
2079  * Serial Correlation by Variable Addition in Dynamic Models Estimated
2080  * by Instrumental Variables", RES. Note that none of the
2081  * asymptotically equivalent tests given on page 553 is used
2082  * here. Instead, we estimate the model augmented with lags and then
2083  * perform a Wald-type test. The resulting chi-square statistic is
2084  * divided by its degrees of freedom as a finite-sample adjustment and
2085  * compared to an F distribution.
2086  *
2087  * Returns: 0 on successful completion, error code on error.
2088  */
2089 
ivreg_autocorr_test(MODEL * pmod,int order,DATASET * dset,gretlopt opt,PRN * prn)2090 static int ivreg_autocorr_test (MODEL *pmod, int order,
2091 				DATASET *dset, gretlopt opt,
2092 				PRN *prn)
2093 {
2094     int smpl_t1 = dset->t1;
2095     int smpl_t2 = dset->t2;
2096     int v = dset->v;
2097     int *addlist = NULL;
2098     int *testlist = NULL;
2099     double x = 0, pval = 1;
2100     MODEL aux;
2101     int i, t;
2102     int err = 0;
2103 
2104     if (dataset_is_panel(dset)) {
2105 	return E_NOTIMP;
2106     }
2107 
2108     if (model_has_missing_obs(pmod)) {
2109 	return E_MISSDATA;
2110     }
2111 
2112     /* impose original sample range */
2113     impose_model_smpl(pmod, dset);
2114 
2115     gretl_model_init(&aux, dset);
2116 
2117     if (order <= 0) {
2118 	order = dset->pd;
2119     }
2120 
2121     if (pmod->ncoeff + order >= dset->t2 - dset->t1) {
2122 	return E_DF;
2123     }
2124 
2125     addlist = gretl_list_new(order);
2126 
2127     if (addlist == NULL) {
2128 	err = E_ALLOC;
2129     } else {
2130 	err = add_residual_to_dataset(pmod, dset);
2131     }
2132 
2133     if (!err) {
2134 	/* add lags of residual */
2135 	for (i=1; i<=order && !err; i++) {
2136 	    int lnum;
2137 
2138 	    lnum = laggenr(v, i, dset);
2139 
2140 	    if (lnum < 0) {
2141 		gretl_errmsg_set(_("lagging uhat failed"));
2142 		err = E_LAGS;
2143 	    } else {
2144 		/* set the first entries to 0 for compatibility with Godfrey (1994)
2145 		   and PcGive (not perfect) */
2146 		for (t=smpl_t1; t<smpl_t1+i; t++) {
2147 		    dset->Z[lnum][t] = 0;
2148 		}
2149 		addlist[i] = lnum;
2150 	    }
2151 	}
2152 	if (!err) {
2153 	    /* compose augmented regression list */
2154 	    testlist = ivreg_list_add(pmod->list, addlist, OPT_B, &err);
2155 	}
2156     }
2157 
2158     if (!err) {
2159 	gretlopt ivopt = OPT_A;
2160 
2161 	transcribe_option_flags(&ivopt, pmod->opt,
2162 				OPT_L | OPT_G | OPT_R);
2163 	aux = ivreg(testlist, dset, ivopt);
2164 	err = aux.errcode;
2165     }
2166 
2167     if (!err) {
2168 	x = ivreg_autocorr_wald_stat(&aux, order, &err);
2169     }
2170 
2171     if (!err) {
2172 	aux.aux = AUX_AR;
2173 	gretl_model_set_int(&aux, "BG_order", order);
2174 	pval = snedecor_cdf_comp(order, aux.nobs - pmod->ncoeff - order, x);
2175 
2176 	if (opt & OPT_Q) {
2177 	    bg_test_header(order, prn, 1);
2178 	} else {
2179 	    printmodel(&aux, dset, OPT_S, prn);
2180 	}
2181 
2182 	pputc(prn, '\n');
2183 	pprintf(prn, "%s: Pseudo-LMF = %f,\n", _("Test statistic"), x);
2184 	pprintf(prn, "%s = P(F(%d,%d) > %g) = %.3g\n", _("with p-value"),
2185 		order, aux.nobs - pmod->ncoeff, x, pval);
2186 	pputc(prn, '\n');
2187 	record_test_result(x / order, pval);
2188 
2189 	if (opt & OPT_S) {
2190 	    ModelTest *test = model_test_new(GRETL_TEST_AUTOCORR);
2191 
2192 	    if (test != NULL) {
2193 		model_test_set_teststat(test, GRETL_STAT_LMF);
2194 		model_test_set_dfn(test, order);
2195 		model_test_set_dfd(test, aux.nobs - pmod->ncoeff);
2196 		model_test_set_order(test, order);
2197 		model_test_set_value(test, x);
2198 		model_test_set_pvalue(test, pval);
2199 		maybe_add_test_to_model(pmod, test);
2200 	    }
2201 	}
2202     }
2203 
2204     free(addlist);
2205     free(testlist);
2206 
2207     dataset_drop_last_variables(dset, dset->v - v);
2208     clear_model(&aux);
2209 
2210     /* reset sample as it was */
2211     dset->t1 = smpl_t1;
2212     dset->t2 = smpl_t2;
2213 
2214     return err;
2215 }
2216 
lb_autocorr_test(MODEL * pmod,int order,gretlopt opt,PRN * prn)2217 static int lb_autocorr_test (MODEL *pmod, int order,
2218 			     gretlopt opt, PRN *prn)
2219 {
2220     double lb, pval = NADBL;
2221     int df, err = 0;
2222 
2223     df = order - arma_model_get_n_arma_coeffs(pmod);
2224 
2225     if (df < 0) {
2226 	gretl_errmsg_set("Insufficient degrees of freedom for test");
2227 	return E_DATA;
2228     }
2229 
2230     lb = ljung_box(order, pmod->t1, pmod->t2, pmod->uhat, &err);
2231 
2232     if (!err) {
2233 	pval = chisq_cdf_comp(df, lb);
2234 	if (na(pval)) {
2235 	    err = E_DATA;
2236 	}
2237     }
2238 
2239     if (err) {
2240 	gretl_errmsg_set(_("Error calculating Ljung-Box statistic"));
2241     } else {
2242 	pputc(prn, '\n');
2243 	pprintf(prn, _("Test for autocorrelation up to order %d"),
2244 		order);
2245 	pputs(prn, "\n\n");
2246 	pprintf(prn, "Ljung-Box Q' = %g,\n", lb);
2247 	pprintf(prn, "%s = P(%s(%d) > %g) = %#.4g\n", _("with p-value"),
2248 		_("Chi-square"), df, lb, chisq_cdf_comp(df, lb));
2249 	pputc(prn, '\n');
2250 	record_test_result(lb, pval);
2251     }
2252 
2253     if (!err && (opt & OPT_S)) {
2254 	ModelTest *test = model_test_new(GRETL_TEST_AUTOCORR);
2255 
2256 	if (test != NULL) {
2257 	    model_test_set_teststat(test, GRETL_STAT_LB_CHISQ);
2258 	    model_test_set_dfn(test, df);
2259 	    model_test_set_order(test, order);
2260 	    model_test_set_value(test, lb);
2261 	    model_test_set_pvalue(test, pval);
2262 	    maybe_add_test_to_model(pmod, test);
2263 	}
2264     }
2265 
2266     return err;
2267 }
2268 
2269 /**
2270  * regular_autocorr_test:
2271  * @pmod: pointer to model to be tested.
2272  * @order: lag order for test.
2273  * @dset: dataset struct.
2274  * @opt: if flags include OPT_S, save test results to model;
2275  * if OPT_Q, be less verbose; if OPT_I, be silent.
2276  * @prn: gretl printing struct.
2277  *
2278  * Tests the given model for autocorrelation of order equal to
2279  * the specified value, or equal to the frequency of the data if
2280  * the supplied @order is zero. Prints TR^2 and LMF test statistics.
2281  *
2282  * Returns: 0 on successful completion, error code on error.
2283  */
2284 
regular_autocorr_test(MODEL * pmod,int order,DATASET * dset,gretlopt opt,PRN * prn)2285 static int regular_autocorr_test (MODEL *pmod, int order, DATASET *dset,
2286 				  gretlopt opt, PRN *prn)
2287 {
2288     int save_t1 = dset->t1;
2289     int save_t2 = dset->t2;
2290     int *newlist = NULL;
2291     MODEL aux;
2292     double RSSxe, RSSx = pmod->ess;
2293     int i, t, n = dset->n, v = dset->v;
2294     double trsq, LMF, lb, pval = 1.0;
2295     int err = 0;
2296 
2297     gretl_model_init(&aux, dset);
2298 
2299     if (order <= 0) {
2300 	order = dset->pd;
2301     }
2302 
2303     if (pmod->ncoeff + order >= pmod->t2 - pmod->t1) {
2304 	return E_DF;
2305     }
2306 
2307     newlist = gretl_list_new(pmod->list[0] + order);
2308 
2309     if (newlist == NULL) {
2310 	err = E_ALLOC;
2311     } else {
2312 	newlist[0] = pmod->list[0] + order;
2313 	for (i=2; i<=pmod->list[0]; i++) {
2314 	    newlist[i] = pmod->list[i];
2315 	}
2316 	if (dataset_add_series(dset, 1 + order)) {
2317 	    err = E_ALLOC;
2318 	}
2319     }
2320 
2321     if (!err) {
2322 	/* add uhat to data set: substitute zeros for
2323 	   pre-sample values */
2324 	for (t=0; t<n; t++) {
2325 	    if (t < pmod->t1) {
2326 		dset->Z[v][t] = 0.0;
2327 	    } else {
2328 		dset->Z[v][t] = pmod->uhat[t];
2329 	    }
2330 	}
2331 	strcpy(dset->varname[v], "uhat");
2332 	series_set_label(dset, v, _("residual"));
2333 	/* then order lags of same */
2334 	for (i=1; i<=order; i++) {
2335 	    int s, lv = v + i;
2336 	    double ul;
2337 
2338 	    sprintf(dset->varname[lv], "uhat_%d", i);
2339 	    newlist[pmod->list[0] + i] = lv;
2340 	    for (t=0; t<dset->n; t++) {
2341 		s = t - i;
2342 		if (s < 0) {
2343 		    dset->Z[lv][t] = 0.0;
2344 		} else {
2345 		    ul = dset->Z[v][s];
2346 		    dset->Z[lv][t] = (na(ul))? 0.0 : ul;
2347 		}
2348 	    }
2349 	}
2350     }
2351 
2352     /* LMF apparatus: see Kiviet, Review of Economic Studies,
2353        53/2, 1986, equation (5), p. 245.
2354     */
2355 
2356     if (!err) {
2357 	/* regression on [X~E], using original sample */
2358 	impose_model_smpl(pmod, dset);
2359 	newlist[1] = v;
2360 	aux = lsq(newlist, dset, OLS, OPT_A);
2361 	err = aux.errcode;
2362 	if (err) {
2363 	    errmsg(err, prn);
2364 	} else {
2365 	    RSSxe = aux.ess;
2366 	}
2367     }
2368 
2369     if (!err) {
2370 	int dfd = aux.nobs - pmod->ncoeff - order;
2371 	int lberr;
2372 
2373 	aux.aux = AUX_AR;
2374 	gretl_model_set_int(&aux, "BG_order", order);
2375 	trsq = aux.rsq * aux.nobs;
2376 	LMF = ((RSSx - RSSxe) / RSSxe) * dfd / order;
2377 	pval = snedecor_cdf_comp(order, dfd, LMF);
2378 
2379 	if (pmod->aux == AUX_VAR || (opt & OPT_I)) {
2380 	    ; /* don't print anything here */
2381 	} else {
2382 	    if (opt & OPT_Q) {
2383 		bg_test_header(order, prn, 0);
2384 	    } else {
2385 		printmodel(&aux, dset, OPT_NONE, prn);
2386 		pputc(prn, '\n');
2387 	    }
2388 	    pprintf(prn, "%s: LMF = %f,\n", _("Test statistic"), LMF);
2389 	    pprintf(prn, "%s = P(F(%d,%d) > %g) = %.3g\n", _("with p-value"),
2390 		    order, aux.nobs - pmod->ncoeff - order, LMF, pval);
2391 
2392 	    pprintf(prn, "\n%s: TR^2 = %f,\n",
2393 		    _("Alternative statistic"), trsq);
2394 	    pprintf(prn, "%s = P(%s(%d) > %g) = %.3g\n\n", _("with p-value"),
2395 		    _("Chi-square"), order, trsq, chisq_cdf_comp(order, trsq));
2396 
2397 	    lb = ljung_box(order, pmod->t1, pmod->t2, dset->Z[v], &lberr);
2398 	    if (!na(lb)) {
2399 		pprintf(prn, "Ljung-Box Q' = %g,\n", lb);
2400 		pprintf(prn, "%s = P(%s(%d) > %g) = %.3g\n", _("with p-value"),
2401 			_("Chi-square"), order, lb, chisq_cdf_comp(order, lb));
2402 	    }
2403 	    pputc(prn, '\n');
2404 	}
2405 
2406 	record_test_result(LMF, pval);
2407 
2408 	if (opt & OPT_S) {
2409 	    /* save the test onto @pmod */
2410 	    ModelTest *test = model_test_new(GRETL_TEST_AUTOCORR);
2411 
2412 	    if (test != NULL) {
2413 		model_test_set_teststat(test, GRETL_STAT_LMF);
2414 		model_test_set_dfn(test, order);
2415 		model_test_set_dfd(test, aux.nobs - pmod->ncoeff - order);
2416 		model_test_set_order(test, order);
2417 		model_test_set_value(test, LMF);
2418 		model_test_set_pvalue(test, pval);
2419 		maybe_add_test_to_model(pmod, test);
2420 	    }
2421 	}
2422     }
2423 
2424     free(newlist);
2425     dataset_drop_last_variables(dset, dset->v - v);
2426     clear_model(&aux);
2427 
2428     /* reset sample as it was */
2429     dset->t1 = save_t1;
2430     dset->t2 = save_t2;
2431 
2432     return err;
2433 }
2434 
2435 /**
2436  * autocorr_test:
2437  * @pmod: pointer to model to be tested.
2438  * @order: lag order for test.
2439  * @dset: dataset struct.
2440  * @opt: if flags include OPT_S, save test results to model;
2441  * if OPT_Q, be less verbose; if OPT_I, be silent.
2442  * @prn: gretl printing struct.
2443  *
2444  * Tests the given model for autocorrelation of order equal to
2445  * the specified value, or equal to the frequency of the data if
2446  * the supplied @order is zero.
2447  *
2448  * Returns: 0 on successful completion, error code on error.
2449  */
2450 
autocorr_test(MODEL * pmod,int order,DATASET * dset,gretlopt opt,PRN * prn)2451 int autocorr_test (MODEL *pmod, int order, DATASET *dset,
2452 		   gretlopt opt, PRN *prn)
2453 {
2454     if (pmod->ci == IVREG) {
2455 	return ivreg_autocorr_test(pmod, order, dset, opt, prn);
2456     } else if (pmod->ci == ARMA) {
2457 	return lb_autocorr_test(pmod, order, opt, prn);
2458     } else if (gretl_is_regular_panel_model(pmod)) {
2459 	if (dset->pd < 3) {
2460 	    /* time series not long enough */
2461 	    return E_NOTIMP;
2462 	} else {
2463 	    return panel_autocorr_test(pmod, dset, opt, prn);
2464 	}
2465     }
2466 
2467     if (pmod->ci != OLS && pmod->ci != VAR) {
2468 	return E_NOTIMP;
2469     } else if (model_has_missing_obs(pmod)) {
2470 	return E_MISSDATA;
2471     }
2472 
2473     return regular_autocorr_test(pmod, order, dset, opt, prn);
2474 }
2475 
chow_active(int split,const double * x,int t)2476 static int chow_active (int split, const double *x, int t)
2477 {
2478     if (x != NULL) {
2479 	return x[t] == 1.0;
2480     } else {
2481 	return (t >= split);
2482     }
2483 }
2484 
get_break_list(const MODEL * pmod,int ci,int * err)2485 static int *get_break_list (const MODEL *pmod, int ci, int *err)
2486 {
2487     const char *lname = get_optval_string(ci, OPT_L);
2488     int *list = NULL;
2489 
2490     if (lname == NULL) {
2491 	*err = E_DATA;
2492     } else {
2493 	list = get_list_by_name(lname);
2494     }
2495 
2496     if (list != NULL) {
2497 	if (list[0] == 0) {
2498 	    *err = E_DATA;
2499 	} else {
2500 	    int i;
2501 
2502 	    for (i=1; i<=list[0] && !*err; i++) {
2503 		if (!in_gretl_list(pmod->list, list[i])) {
2504 		    *err = E_DATA;
2505 		} else if (list[i] == 0) {
2506 		    *err = E_DATA;
2507 		}
2508 	    }
2509 	}
2510     }
2511 
2512     return list;
2513 }
2514 
2515 /* compose list of variables to be used for the Chow test and add
2516    them to the data set */
2517 
2518 static int *
make_chow_list(const MODEL * pmod,DATASET * dset,int split,int dumv,int ci,gretlopt opt,int * err)2519 make_chow_list (const MODEL *pmod, DATASET *dset,
2520 		int split, int dumv, int ci,
2521 		gretlopt opt, int *err)
2522 {
2523     int *chowlist = NULL;
2524     int *brklist = NULL;
2525     int l0 = pmod->list[0];
2526     int ninter = 0, newvars = 0;
2527     int havedum = (dumv > 0);
2528     int i, t, v = dset->v;
2529 
2530     if (havedum && in_gretl_list(pmod->list, dumv)) {
2531 	gretl_errmsg_sprintf(_("The model already contains %s"),
2532 			     dset->varname[dumv]);
2533 	*err = E_DATA;
2534 	return NULL;
2535     }
2536 
2537     if (opt & OPT_L) {
2538 	/* --limit-to */
2539 	brklist = get_break_list(pmod, ci, err);
2540 	if (*err) {
2541 	    return NULL;
2542 	} else {
2543 	    ninter = brklist[0];
2544 	}
2545     } else {
2546 	/* number of interaction terms */
2547 	ninter = pmod->ncoeff - pmod->ifc;
2548     }
2549 
2550     newvars = ninter + 1 - havedum;
2551 
2552     if (dataset_add_series(dset, newvars)) {
2553 	*err = E_ALLOC;
2554     } else {
2555 	chowlist = gretl_list_new(pmod->list[0] + ninter + 1);
2556 	if (chowlist == NULL) {
2557 	    *err = E_ALLOC;
2558 	}
2559     }
2560 
2561     if (!*err) {
2562 	const double *cdum = NULL;
2563 
2564 	for (i=1; i<=l0; i++) {
2565 	    chowlist[i] = pmod->list[i];
2566 	}
2567 
2568 	if (dumv > 0) {
2569 	    /* we have a user-supplied dummy var */
2570 	    cdum = dset->Z[dumv];
2571 	} else {
2572 	    /* generate the split variable */
2573 	    for (t=0; t<dset->n; t++) {
2574 		dset->Z[v][t] = (double) (t >= split);
2575 	    }
2576 	    strcpy(dset->varname[v], "splitdum");
2577 	    series_set_label(dset, v, _("dummy variable for Chow test"));
2578 	}
2579 
2580 	chowlist[l0 + 1] = (dumv > 0)? dumv : v;
2581 
2582 	/* and the interaction terms */
2583 	for (i=0; i<ninter; i++) {
2584 	    int pv, sv = v + i + 1 - havedum;
2585 
2586 	    if (brklist != NULL) {
2587 		pv = brklist[i+1];
2588 	    } else {
2589 		pv = pmod->list[i + 2 + pmod->ifc];
2590 	    }
2591 
2592 	    for (t=0; t<dset->n; t++) {
2593 		if (model_missing(pmod, t)) {
2594 		    dset->Z[sv][t] = NADBL;
2595 		} else if (chow_active(split, cdum, t)) {
2596 		    dset->Z[sv][t] = dset->Z[pv][t];
2597 		} else {
2598 		    dset->Z[sv][t] = 0.0;
2599 		}
2600 	    }
2601 
2602 	    if (havedum) {
2603 		sprintf(dset->varname[sv], "%.2s_", dset->varname[dumv]);
2604 	    } else {
2605 		strcpy(dset->varname[sv], "sd_");
2606 	    }
2607 	    strncat(dset->varname[sv], dset->varname[pv], VNAMELEN - 4);
2608 	    chowlist[l0 + 2 + i] = sv;
2609 	}
2610     }
2611 
2612     return chowlist;
2613 }
2614 
write_plot_x_range(const double * x,int t1,int t2,FILE * fp)2615 static void write_plot_x_range (const double *x, int t1, int t2,
2616 				FILE *fp)
2617 {
2618     double xmin0, xmin, xmax0, xmax;
2619     double xrange;
2620 
2621     gretl_minmax(t1, t2, x, &xmin0, &xmax0);
2622     xrange = xmax0 - xmin0;
2623     xmin = xmin0 - xrange * .025;
2624     if (xmin0 >= 0.0 && xmin < 0.0) {
2625 	xmin = 0.0;
2626     }
2627     xmax = xmax0 + xrange * .025;
2628     fprintf(fp, "set xrange [%.10g:%.10g]\n", xmin, xmax);
2629 }
2630 
QLR_graph(const double * testvec,int t1,int t2,const DATASET * dset,int df,int robust)2631 static int QLR_graph (const double *testvec, int t1, int t2,
2632 		      const DATASET *dset, int df, int robust)
2633 {
2634     const double *x = gretl_plotx(dset, OPT_NONE);
2635     const char *titles[] = {
2636 	N_("Chow F-test for break"),
2637 	N_("Robust Wald test for break")
2638     };
2639     const char *title;
2640     double (*qlr_critval) (int);
2641     double critval = NADBL;
2642     FILE *fp;
2643     int t, err = 0;
2644 
2645     set_effective_plot_ci(QLRTEST);
2646 
2647     fp = open_plot_input_file(PLOT_REGULAR, 0, &err);
2648     if (err) {
2649 	return err;
2650     }
2651 
2652     qlr_critval = get_plugin_function("qlr_critval_15_05");
2653     if (qlr_critval != NULL) {
2654 	critval = qlr_critval(df);
2655     }
2656 
2657     print_keypos_string(GP_KEY_LEFT_TOP, fp);
2658 
2659     if (robust) {
2660 	title = titles[1];
2661     } else {
2662 	title = titles[0];
2663 	if (!na(critval)) {
2664 	    critval /= df;
2665 	}
2666     }
2667 
2668     gretl_push_c_numeric_locale();
2669 
2670     write_plot_x_range(x, t1, t2, fp);
2671 
2672     if (!na(critval)) {
2673 	fprintf(fp, "plot \\\n"
2674 		"'-' using 1:2 title \"%s\" w lines , \\\n"
2675 		"%g title \"%s\" w lines lt 0\n",
2676 		_(title), critval, _("5% QLR critical value"));
2677     } else {
2678 	fprintf(fp, "plot \\\n"
2679 		"'-' using 1:2 title \"%s\" w lines\n", _(title));
2680     }
2681 
2682     for (t=t1; t<=t2; t++) {
2683 	fprintf(fp, "%g %g\n", x[t], testvec[t-t1]);
2684     }
2685     fputs("e\n", fp);
2686 
2687     gretl_pop_c_numeric_locale();
2688 
2689     err = finalize_plot_input_file(fp);
2690 
2691     set_effective_plot_ci(GNUPLOT);
2692 
2693     return err;
2694 }
2695 
save_QLR_test(MODEL * pmod,const char * datestr,double X2,double pval,int df)2696 static void save_QLR_test (MODEL *pmod, const char *datestr,
2697 			   double X2, double pval, int df)
2698 {
2699     ModelTest *test = model_test_new(GRETL_TEST_QLR);
2700 
2701     if (test != NULL) {
2702 	model_test_set_teststat(test, GRETL_STAT_SUP_WALD);
2703 	model_test_set_param(test, datestr);
2704 	model_test_set_value(test, X2);
2705 	model_test_set_pvalue(test, pval);
2706 	model_test_set_dfn(test, df);
2707 	maybe_add_test_to_model(pmod, test);
2708     }
2709 }
2710 
2711 /* for internal use by the "qlrtest" command */
2712 
get_QLR_pval(double test,int df,int k1,int k2,MODEL * pmod)2713 static double get_QLR_pval (double test, int df,
2714 			    int k1, int k2,
2715 			    MODEL *pmod)
2716 {
2717     double (*qlr_asy_pvalue) (double, int, double);
2718     double pi_1, pi_2, lam0;
2719     double pval;
2720 
2721     qlr_asy_pvalue = get_plugin_function("qlr_asy_pvalue");
2722 
2723     if (qlr_asy_pvalue == NULL) {
2724 	return NADBL;
2725     }
2726 
2727     pi_1 = (k1 - pmod->t1 + 1) / (double) pmod->nobs;
2728     pi_2 = (k2 - pmod->t1 + 1) / (double) pmod->nobs;
2729     lam0 = (pi_2*(1.0 - pi_1)) / (pi_1*(1.0 - pi_2));
2730 
2731     pval = (*qlr_asy_pvalue) (test, df, lam0);
2732 
2733 #if 0
2734     fprintf(stderr, "k1 = %d, k2 = %d, pi_1 = %g pi_2 = %g\n",
2735 	    k1, k2, pi_1, pi_2);
2736     fprintf(stderr, "lambda0 = %g, pi0 = %g, test = %g, pval = %g\n",
2737 	    lam0, 1.0/(1 + sqrt(lam0)), test, pval);
2738 #endif
2739 
2740     return pval;
2741 }
2742 
2743 /* for use by the qlrpval() function */
2744 
QLR_pval(double X2,int df,double p1,double p2)2745 double QLR_pval (double X2, int df, double p1, double p2)
2746 {
2747     double (*qlr_asy_pvalue) (double, int, double);
2748     double lamda, pval;
2749 
2750     if (X2 < 0 || df <= 0 || p1 <= 0 || p2 <= p1 || p2 >= 1) {
2751 	return NADBL;
2752     }
2753 
2754     qlr_asy_pvalue = get_plugin_function("qlr_asy_pvalue");
2755 
2756     if (qlr_asy_pvalue == NULL) {
2757 	return NADBL;
2758     }
2759 
2760     lamda = (p2*(1.0 - p1)) / (p1*(1.0 - p2));
2761     pval = (*qlr_asy_pvalue) (X2, df, lamda);
2762 
2763     return pval;
2764 }
2765 
QLR_print_result(double test,double pval,const char * datestr,int dfn,int dfd,int robust,PRN * prn)2766 static void QLR_print_result (double test,
2767 			      double pval,
2768 			      const char *datestr,
2769 			      int dfn, int dfd,
2770 			      int robust,
2771 			      PRN *prn)
2772 {
2773     pputs(prn, _("Quandt likelihood ratio test for structural break at an "
2774 		 "unknown point,\nwith 15 percent trimming"));
2775     pputs(prn, ":\n");
2776     if (robust) {
2777 	pprintf(prn, _("The maximum Wald test = %g occurs "
2778 		       "at observation %s"), test, datestr);
2779     } else {
2780 	pprintf(prn, _("The maximum F(%d, %d) = %g occurs "
2781 		       "at observation %s"), dfn, dfd, test, datestr);
2782 	test *= dfn;
2783     }
2784     pputc(prn, '\n');
2785 
2786     if (!na(pval)) {
2787 	pprintf(prn, _("Asymptotic p-value = %.6g for "
2788 		       "chi-square(%d) = %g"),
2789 		pval, dfn, test);
2790 	pputc(prn, '\n');
2791     }
2792     pputc(prn, '\n');
2793 }
2794 
save_chow_test(MODEL * pmod,const char * chowstr,double test,double pval,int dfn,int dfd,gretlopt opt)2795 static void save_chow_test (MODEL *pmod, const char *chowstr,
2796 			    double test, double pval,
2797 			    int dfn, int dfd,
2798 			    gretlopt opt)
2799 {
2800     int ttype = (opt & OPT_D)? GRETL_TEST_CHOWDUM : GRETL_TEST_CHOW;
2801     ModelTest *mt = model_test_new(ttype);
2802 
2803     if (mt != NULL) {
2804 	if (dfd == 0) {
2805 	    model_test_set_teststat(mt, GRETL_STAT_WALD_CHISQ);
2806 	} else {
2807 	    model_test_set_teststat(mt, GRETL_STAT_F);
2808 	}
2809 	model_test_set_param(mt, chowstr);
2810 	model_test_set_value(mt, test);
2811 	model_test_set_pvalue(mt, pval);
2812 	model_test_set_dfn(mt, dfn);
2813 	model_test_set_dfd(mt, dfd);
2814 	maybe_add_test_to_model(pmod, mt);
2815     }
2816 }
2817 
QLR_plot_wanted(gretlopt opt)2818 static int QLR_plot_wanted (gretlopt opt)
2819 {
2820     /* the default: show a plot only if in interactive mode */
2821     int ret = !gretl_in_batch_mode();
2822 
2823     if (opt & OPT_U) {
2824 	/* but the default can be overruled by @opt */
2825 	const char *plotparm = get_optval_string(QLRTEST, OPT_U);
2826 
2827 	if (plotparm != NULL) {
2828 	    if (!strcmp(plotparm, "none")) {
2829 		ret = 0;
2830 	    } else {
2831 		ret = 1;
2832 	    }
2833 	}
2834     }
2835 
2836     return ret;
2837 }
2838 
2839 /*
2840  * real_chow_test:
2841  * @chowparm: sample breakpoint; or ID number of dummy
2842  * variable if given OPT_D; or ignored if given OPT_T.
2843  * @pmod: pointer to model to be tested.
2844  * @dset: dataset information.
2845  * @ci: either CHOW or QLRTEST.
2846  * @opt: if flags include OPT_S, save test results to model;
2847  * if OPT_D included, do the Chow test based on a given dummy
2848  * variable; if OPT_T included, do the QLR test. If OPT_M,
2849  * just save the test on the model, don't set $test and
2850  * $pvalue.
2851  * @prn: gretl printing struct.
2852  *
2853  * Returns: 0 on successful completion, error code on error.
2854  */
2855 
real_chow_test(int chowparm,MODEL * pmod,DATASET * dset,int ci,gretlopt opt,PRN * prn)2856 static int real_chow_test (int chowparm, MODEL *pmod, DATASET *dset,
2857 			   int ci, gretlopt opt, PRN *prn)
2858 {
2859     int save_t1 = dset->t1;
2860     int save_t2 = dset->t2;
2861     int *chowlist = NULL;
2862     int origv = dset->v;
2863     MODEL chow_mod;
2864     int QLR = (opt & OPT_T);
2865     int dumv = 0, split = 0, smax = 0;
2866     int err = 0;
2867 
2868     if (pmod->ci != OLS) {
2869 	return E_OLSONLY;
2870     }
2871 
2872     if (exact_fit_check(pmod, prn)) {
2873 	return 0;
2874     }
2875 
2876     /* temporarily impose the sample that was in force when the
2877        original model was estimated */
2878     impose_model_smpl(pmod, dset);
2879 
2880     gretl_model_init(&chow_mod, dset);
2881 
2882     if (QLR) {
2883 	/* "15 percent trimming": exactly how this should be
2884 	   defined is debatable, but the following agrees
2885 	   with the strucchange package for R, when the
2886 	   Fstats() function is given "from=0.15, to=0.85".
2887 	*/
2888 	split = pmod->t1 + floor(0.15 * pmod->nobs) - 1;
2889 	smax = pmod->t1 + floor(0.85 * pmod->nobs) - 1;
2890     } else if (opt & OPT_D) {
2891 	/* Chow, using a predefined dummy */
2892 	dumv = chowparm;
2893     } else {
2894 	/* Chow, using break observation */
2895 	smax = split = chowparm;
2896     }
2897 
2898     if (!err) {
2899 	chowlist = make_chow_list(pmod, dset, split, dumv, ci,
2900 				  opt, &err);
2901     }
2902 
2903     if (err) {
2904 	goto bailout;
2905     }
2906 
2907     if (QLR) {
2908 	/* Quandt likelihood ratio */
2909 	int robust = (pmod->opt & OPT_R);
2910 	int do_plot = QLR_plot_wanted(opt);
2911 	gretlopt lsqopt = OPT_A;
2912 	double test, testmax = 0.0;
2913 	double *testvec = NULL;
2914 	int *testlist = NULL;
2915 	int dfn = 0, dfd = 0;
2916 	int nextra = dset->v - origv;
2917 	int tmax = 0;
2918 	int i, t;
2919 
2920 	if (do_plot) {
2921 	    testvec = malloc((smax - split + 1) * sizeof *testvec);
2922 	}
2923 
2924 	if (robust) {
2925 	    lsqopt |= OPT_R;
2926 	    testlist = gretl_list_diff_new(chowlist, pmod->list, 2);
2927 	    if (testlist == NULL) {
2928 		err = E_ALLOC;
2929 	    }
2930 	}
2931 
2932 	for (t=split; t<=smax && !err; t++) {
2933 	    chow_mod = lsq(chowlist, dset, OLS, lsqopt);
2934 	    if (chow_mod.errcode) {
2935 		err = chow_mod.errcode;
2936 		errmsg(err, prn);
2937 		break;
2938 	    }
2939 	    dfn = chow_mod.ncoeff - pmod->ncoeff;
2940 	    if (robust) {
2941 		test = wald_omit_chisq(testlist, &chow_mod);
2942 #if 0
2943 		fprintf(stderr, "%d %g\n", t+1, test);
2944 #endif
2945 	    } else {
2946 		dfd = chow_mod.dfd;
2947 		test = (pmod->ess - chow_mod.ess) * dfd / (chow_mod.ess * dfn);
2948 #if 0
2949 		fprintf(stderr, "%d %g\n", t+1, test*dfn);
2950 #endif
2951 	    }
2952 	    if (test > testmax) {
2953 		tmax = t;
2954 		testmax = test;
2955 	    }
2956 	    if (testvec != NULL) {
2957 		testvec[t - split] = test;
2958 	    }
2959 #if 0
2960 	    fprintf(stderr, "split at t=%d: F(%d,%d)=%g (X2=%g)\n", t,
2961 		    dfn, dfd, F, test*dfn);
2962 	    fprintf(stderr, " pmod->ess = %g, chow_mod.ess = %g\n",
2963 		    pmod->ess, chow_mod.ess);
2964 #endif
2965 	    clear_model(&chow_mod);
2966 	    for (i=0; i<nextra; i++) {
2967 		dset->Z[origv+i][t] = 0.0;
2968 	    }
2969 	}
2970 
2971 	if (!err) {
2972 	    char datestr[OBSLEN];
2973 	    double pval, X2;
2974 
2975 	    X2 = robust ? testmax : dfn * testmax;
2976 	    pval = get_QLR_pval(X2, dfn, split, smax, pmod);
2977 	    ntolabel(datestr, tmax, dset);
2978 
2979 	    if (!(opt & OPT_Q)) {
2980 		QLR_print_result(testmax, pval, datestr, dfn, dfd,
2981 				 robust, prn);
2982 	    }
2983 	    if (!(opt & OPT_M)) {
2984 		record_QLR_test_result(X2, pval, tmax + 1);
2985 	    }
2986 	    if (opt & (OPT_S | OPT_M)) {
2987 		save_QLR_test(pmod, datestr, X2, pval, dfn);
2988 	    }
2989 	    if (testvec != NULL) {
2990 		QLR_graph(testvec, split, smax, dset, dfn, robust);
2991 	    }
2992 	}
2993 
2994 	free(testvec);
2995 	free(testlist);
2996     } else {
2997 	/* regular (or robust) Chow test */
2998 	int robust = (pmod->opt & OPT_R);
2999 	gretlopt lsqopt = OPT_A;
3000 	int *testlist = NULL;
3001 
3002 	if (robust) {
3003 	    lsqopt |= OPT_R;
3004 	}
3005 	chow_mod = lsq(chowlist, dset, OLS, lsqopt);
3006 
3007 	if (chow_mod.errcode) {
3008 	    err = chow_mod.errcode;
3009 	    errmsg(err, prn);
3010 	} else if (chow_mod.ncoeff <= pmod->ncoeff) {
3011 	    err = chow_mod.errcode = E_DATA;
3012 	    errmsg(err, prn);
3013 	} else {
3014 	    int dfd = (robust)? 0 : chow_mod.dfd;
3015 	    int dfn = chow_mod.ncoeff - pmod->ncoeff;
3016 	    double test, pval = NADBL;
3017 
3018 	    if (!(opt & OPT_Q)) {
3019 		chow_mod.aux = AUX_CHOW;
3020 		printmodel(&chow_mod, dset, OPT_NONE, prn);
3021 	    }
3022 
3023 	    if (robust) {
3024 		testlist = gretl_list_diff_new(chow_mod.list, pmod->list, 2);
3025 		if (testlist == NULL) {
3026 		    err = E_ALLOC;
3027 		    goto bailout;
3028 		} else {
3029 		    test = wald_omit_chisq(testlist, &chow_mod);
3030 		}
3031 		if (!na(test)) {
3032 		    pval = chisq_cdf_comp(dfn, test);
3033 		}
3034 	    } else {
3035 		test = (pmod->ess - chow_mod.ess) * dfd / (chow_mod.ess * dfn);
3036 		if (!na(test)) {
3037 		    pval = snedecor_cdf_comp(dfn, dfd, test);
3038 		}
3039 	    }
3040 
3041 	    if (na(test)) {
3042 		pputs(prn, "Couldn't compute Chow test statistic\n");
3043 	    } else if (na(pval)) {
3044 		pprintf(prn, "Couldn't compute Chow test p-value (test = %g)\n", test);
3045 	    }
3046 
3047 	    if (!na(test) && !na(pval)) {
3048 		char chowstr[VNAMELEN];
3049 
3050 		if (opt & OPT_Q) {
3051 		    pputc(prn, '\n');
3052 		}
3053 		if (opt & OPT_D) {
3054 		    strcpy(chowstr, dset->varname[chowparm]);
3055 		    pprintf(prn, _("Chow test for structural difference with respect to %s"),
3056 			    chowstr);
3057 		} else {
3058 		    ntolabel(chowstr, chowparm, dset);
3059 		    pprintf(prn, _("Chow test for structural break at observation %s"),
3060 			    chowstr);
3061 		}
3062 		pputc(prn, '\n');
3063 
3064 		if (robust) {
3065 		    pprintf(prn, "  %s(%d) = %g %s %.4f\n", _("Chi-square"),
3066 			    dfn, test, _("with p-value"), pval);
3067 		    pprintf(prn, "  %s: F(%d, %d) = %g %s %.4f\n\n", _("F-form"),
3068 			    dfn, chow_mod.dfd, test / dfn, _("with p-value"),
3069 			    snedecor_cdf_comp(dfn, chow_mod.dfd, test / dfn));
3070 		} else {
3071 		    pprintf(prn, "  F(%d, %d) = %g %s %.4f\n\n",
3072 			    dfn, dfd, test, _("with p-value"), pval);
3073 		}
3074 
3075 		if (opt & OPT_S) {
3076 		    save_chow_test(pmod, chowstr, test, pval, dfn, dfd, opt);
3077 		}
3078 
3079 		record_test_result(test, pval);
3080 	    }
3081 	}
3082 	clear_model(&chow_mod);
3083 	free(testlist);
3084     }
3085 
3086  bailout:
3087 
3088     /* clean up extra variables */
3089     dataset_drop_last_variables(dset, dset->v - origv);
3090     free(chowlist);
3091 
3092     dset->t1 = save_t1;
3093     dset->t2 = save_t2;
3094 
3095     return err;
3096 }
3097 
3098 /**
3099  * chow_test:
3100  * @splitobs: the 0-based observation number at which to split
3101  * the sample.
3102  * @pmod: pointer to model to be tested.
3103  * @dset: dataset struct.
3104  * @opt: if flags include OPT_S, save test results to model.
3105  * @prn: gretl printing struct.
3106  *
3107  * Tests the given model for structural stability (Chow test)
3108  * using the sample break-point given by @splitobs
3109  * and prints the results to @prn. (The second portion of
3110  * the sample runs from observation @splitobs to the
3111  * end of the original sample range.)
3112  *
3113  * Returns: 0 on successful completion, error code on error.
3114  */
3115 
chow_test(int splitobs,MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3116 int chow_test (int splitobs, MODEL *pmod, DATASET *dset,
3117 	       gretlopt opt, PRN *prn)
3118 {
3119     int err = 0;
3120 
3121     if (splitobs <= 0 || splitobs >= dset->n) {
3122 	gretl_errmsg_set(_("Invalid sample split for Chow test"));
3123 	err = E_DATA;
3124     } else {
3125 	err = real_chow_test(splitobs, pmod, dset, CHOW, opt, prn);
3126     }
3127 
3128     return err;
3129 }
3130 
3131 /**
3132  * chow_test_from_dummy:
3133  * @splitvar: the ID number of a dummy variable that should
3134  * be used to divide the sample.
3135  * @pmod: pointer to model to be tested.
3136  * @dset: dataset struct.
3137  * @opt: if flags include OPT_S, save test results to model.
3138  * @prn: gretl printing struct.
3139  *
3140  * Tests the given model for structural stability (Chow test),
3141  * using the dummy variable specified by @splitvar to divide the
3142  * original sample range of @pmod into two portions.
3143  *
3144  * Returns: 0 on successful completion, error code on error.
3145  */
3146 
chow_test_from_dummy(int splitvar,MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3147 int chow_test_from_dummy (int splitvar, MODEL *pmod, DATASET *dset,
3148 			  gretlopt opt, PRN *prn)
3149 {
3150     int err = 0;
3151 
3152     if (splitvar <= 0 || splitvar >= dset->v) {
3153 	gretl_errmsg_set(_("Invalid sample split for Chow test"));
3154 	err = E_DATA;
3155     } else {
3156 	err = real_chow_test(splitvar, pmod, dset, CHOW,
3157 			     opt | OPT_D, prn);
3158     }
3159 
3160     return err;
3161 }
3162 
3163 /**
3164  * QLR_test:
3165  * @pmod: pointer to model to be tested.
3166  * @dset: dataset struct.
3167  * @opt: if flags include OPT_S, save test results to model.
3168  * @prn: gretl printing struct.
3169  *
3170  * Tests the given model for structural stability via the
3171  * Quandt Likelihood Ratio test for a structural break at
3172  * an unknown point in the sample range.
3173  *
3174  * Returns: 0 on successful completion, error code on error.
3175  */
3176 
QLR_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3177 int QLR_test (MODEL *pmod, DATASET *dset,
3178 	      gretlopt opt, PRN *prn)
3179 {
3180     return real_chow_test(0, pmod, dset, QLRTEST,
3181 			  opt | OPT_T, prn);
3182 }
3183 
3184 /* compute v'Mv, for symmetric M */
3185 
vprime_M_v(double * v,double * M,int n)3186 static double vprime_M_v (double *v, double *M, int n)
3187 {
3188     int i, j, jmin, jmax, k;
3189     double xx, val = 0.0;
3190 
3191     k = jmin = 0;
3192     for (i=0; i<n; i++) {
3193 	xx = 0.0;
3194 	for (j=jmin; j<n; j++) {
3195 	    xx += v[j] * M[k++];
3196 	}
3197 	val += xx * v[i];
3198 	jmin++;
3199     }
3200 
3201     jmax = 1;
3202     for (i=1; i<n; i++) {
3203 	k = i;
3204 	xx = 0.0;
3205 	for (j=0; j<jmax; j++) {
3206 	    xx += v[j] * M[k];
3207 	    k += n - j - 1;
3208 	}
3209 	val += xx * v[i];
3210 	jmax++;
3211     }
3212 
3213     return val;
3214 }
3215 
3216 /* Compute scaled recursive residuals and cumulate \bar{w} via a
3217    sequence of OLS regressions. See William Greene, Econometric
3218    Analysis 5e, pp. 135-136.
3219 */
3220 
cusum_compute(MODEL * pmod,double * cresid,int T,int k,double * wbar,DATASET * dset)3221 static int cusum_compute (MODEL *pmod, double *cresid, int T, int k,
3222 			  double *wbar, DATASET *dset)
3223 {
3224     MODEL cmod;
3225     gretlopt opt = OPT_X | OPT_A;
3226     double yhat, den, *xt;
3227     int n = T - k;
3228     int i, j, t;
3229     int err = 0;
3230 
3231     /* vector of regressors at t */
3232     xt = malloc(k * sizeof *xt);
3233     if (xt == NULL) {
3234 	return E_ALLOC;
3235     }
3236 
3237     /* set minimal sample based on model to be tested */
3238     dset->t1 = pmod->t1;
3239     dset->t2 = pmod->t1 + k - 1;
3240 
3241     for (j=0; j<n && !err; j++) {
3242 	cmod = lsq(pmod->list, dset, OLS, opt);
3243 	if (cmod.errcode) {
3244 	    err = cmod.errcode;
3245 	} else {
3246 	    /* compute one step ahead prediction error: \hat{y}_t
3247 	       based on estimates through t - 1
3248 	    */
3249 	    t = dset->t2 + 1;
3250 	    yhat = 0.0;
3251 	    for (i=0; i<cmod.ncoeff; i++) {
3252 		xt[i] = dset->Z[cmod.list[i+2]][t];
3253 		yhat += cmod.coeff[i] * xt[i];
3254 	    }
3255 	    cresid[j] = dset->Z[pmod->list[1]][t] - yhat;
3256 	    cmod.ci = CUSUM;
3257 	    err = makevcv(&cmod, 1.0); /* (X'X)^{-1} */
3258 	    if (!err) {
3259 		/* compute scaled residual: divide by
3260 		   \sqrt{1 + x_t'(X_{t-1}'X_{t-1})^{-1} x_t}
3261 		*/
3262 		den = vprime_M_v(xt, cmod.vcv, cmod.ncoeff);
3263 		cresid[j] /= sqrt(1.0 + den);
3264 		*wbar += cresid[j];
3265 		/* extend the sample by one observation */
3266 		dset->t2 += 1;
3267 	    }
3268 	}
3269 	clear_model(&cmod);
3270     }
3271 
3272     free(xt);
3273 
3274     return err;
3275 }
3276 
3277 #define okfreq(p) (p == 1 || p == 4 || p == 12 || p == 24 || p == 52)
3278 
cusum_do_graph(double a,double b,const double * W,int t1,int k,int m,DATASET * dset,gretlopt opt)3279 static int cusum_do_graph (double a, double b, const double *W,
3280 			   int t1, int k, int m,
3281 			   DATASET *dset, gretlopt opt)
3282 {
3283     FILE *fp = NULL;
3284     const double *obs = NULL;
3285     double frac = 1.0;
3286     double x0 = 0.0;
3287     int j, t, err = 0;
3288 
3289     fp = open_plot_input_file(PLOT_CUSUM, 0, &err);
3290     if (err) {
3291 	return err;
3292     }
3293 
3294     if (dataset_is_time_series(dset) && okfreq(dset->pd)) {
3295 	b *= dset->pd;
3296 	frac /= dset->pd;
3297         obs = gretl_plotx(dset, OPT_NONE);
3298 	if (obs != NULL) {
3299 	    x0 = obs[t1 + k];
3300 	}
3301     }
3302 
3303     gretl_push_c_numeric_locale();
3304 
3305     fprintf(fp, "set xlabel '%s'\n", _("Observation"));
3306     fputs("set nokey\n", fp);
3307 
3308     if (opt & OPT_R) {
3309 	fprintf(fp, "set title '%s'\n",
3310 		/* xgettext:no-c-format */
3311 		_("CUSUMSQ plot with 95% confidence band"));
3312 	fprintf(fp, "plot \\\n%g*(x-%g) title '' w dots lt 2, \\\n", b, x0 - frac);
3313 	fprintf(fp, "%g+%g*(x-%g) title '' w lines lt 2, \\\n", -a, b, x0 - frac);
3314 	fprintf(fp, "%g+%g*(x-%g) title '' w lines lt 2, \\\n", a, b, x0 - frac);
3315     } else {
3316 	fputs("set xzeroaxis\n", fp);
3317 	fprintf(fp, "set title '%s'\n",
3318 		/* xgettext:no-c-format */
3319 		_("CUSUM plot with 95% confidence band"));
3320 	fprintf(fp, "plot \\\n%g+%g*(x-%g) title '' w lines lt 2, \\\n", a, b, x0);
3321 	fprintf(fp, "%g-%g*(x-%g) title '' w lines lt 2, \\\n", -a, b, x0);
3322     }
3323 
3324     fputs("'-' using 1:2 w linespoints lt 1\n", fp);
3325 
3326     for (j=0; j<m; j++) {
3327 	t = t1 + k + j;
3328 	if (obs != NULL) {
3329 	    fprintf(fp, "%g %g\n", obs[t], W[j]);
3330 	} else {
3331 	    fprintf(fp, "%d %g\n", t, W[j]);
3332 	}
3333     }
3334 
3335     fputs("e\n", fp);
3336 
3337     gretl_pop_c_numeric_locale();
3338 
3339     return finalize_plot_input_file(fp);
3340 }
3341 
cusum_harvey_collier(double wbar,double sigma,int m,MODEL * pmod,gretlopt opt,PRN * prn)3342 static void cusum_harvey_collier (double wbar, double sigma, int m,
3343 				  MODEL *pmod, gretlopt opt,
3344 				  PRN *prn)
3345 {
3346     double hct, pval;
3347 
3348     hct = (sqrt((double) m) * wbar) / sigma;
3349     pval = student_pvalue_2(m - 1, hct);
3350     pprintf(prn, _("\nHarvey-Collier t(%d) = %g with p-value %.4g\n\n"),
3351 	    m - 1, hct, pval);
3352 
3353     if (opt & OPT_S) {
3354 	ModelTest *test = model_test_new(GRETL_TEST_CUSUM);
3355 
3356 	if (test != NULL) {
3357 	    model_test_set_teststat(test, GRETL_STAT_HARVEY_COLLIER);
3358 	    model_test_set_dfn(test, m - 1);
3359 	    model_test_set_value(test, hct);
3360 	    model_test_set_pvalue(test, pval);
3361 	    maybe_add_test_to_model(pmod, test);
3362 	}
3363     }
3364 
3365     record_test_result(hct, pval);
3366 }
3367 
3368 /**
3369  * cusum_test:
3370  * @pmod: pointer to model to be tested.
3371  * @dset: dataset struct.
3372  * @opt: if flags include %OPT_S, save results of test to model.
3373  * @prn: gretl printing struct.
3374  *
3375  * Tests the given model for parameter stability via the CUSUM test,
3376  * or if @opt includes %OPT_R, via the CUSUMSQ test; %OPT_Q makes
3377  * the test quiet; %OPT_U governs the associated plot, if wanted.
3378  *
3379  * Returns: 0 on successful completion, error code on error.
3380  */
3381 
cusum_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3382 int cusum_test (MODEL *pmod, DATASET *dset,
3383 		gretlopt opt, PRN *prn)
3384 {
3385     int save_t1 = dset->t1;
3386     int save_t2 = dset->t2;
3387     int T = pmod->nobs;
3388     int k = pmod->ncoeff;
3389     char cumdate[OBSLEN];
3390     double wbar = 0.0;
3391     double *cresid = NULL, *W = NULL;
3392     int quiet = opt & OPT_Q;
3393     int m, i, j;
3394     int err = 0;
3395 
3396     if (pmod->ci != OLS) {
3397 	return E_OLSONLY;
3398     }
3399 
3400     if (exact_fit_check(pmod, prn)) {
3401 	return 0;
3402     }
3403 
3404     if (model_has_missing_obs(pmod)) {
3405 	return E_MISSDATA;
3406     }
3407 
3408     /* the number of forecasts */
3409     m = T - k;
3410 
3411     cresid = malloc(m * sizeof *cresid);
3412     W = malloc(m * sizeof *W);
3413 
3414     if (cresid == NULL || W == NULL) {
3415 	err = E_ALLOC;
3416     }
3417 
3418     if (!err) {
3419 	err = cusum_compute(pmod, cresid, T, k, &wbar, dset);
3420 	if (err) {
3421 	    errmsg(err, prn);
3422 	}
3423     }
3424 
3425     if (!err) {
3426 	double a, b, den = 0.0, sigma = 0.0;
3427 	int sig;
3428 
3429 	if (opt & OPT_R) {
3430 	    double a1, a2, a3;
3431 	    double n = 0.5 * m - 1;
3432 
3433 	    pprintf(prn, "\n%s\n\n", _("CUSUMSQ test for stability of parameters"));
3434 
3435 	    for (j=0; j<m; j++) {
3436 		den += cresid[j] * cresid[j];
3437 	    }
3438 
3439 	    /* see Edgerton and Wells, Oxford Bulletin of Economics and
3440 	       Statistics, 56, 1994, pp. 355-365 */
3441 	    a1 = 1.358015 / sqrt(n);
3442 	    a2 = -0.6701218 / n;
3443 	    a3 = -0.8858694 / pow(n, 1.5);
3444 
3445 	    /* 0.5 * total height of band */
3446 	    a = a1 + a2 + a3;
3447 	    /* slope of expectation wrt time */
3448 	    b = 1.0 / m;
3449 	    if (!quiet) {
3450 		pputs(prn, _("Cumulated sum of squared residuals"));
3451 	    }
3452 	} else {
3453 	    wbar /= T - k;
3454 	    pprintf(prn, "\n%s\n\n", _("CUSUM test for stability of parameters"));
3455 	    pprintf(prn, _("mean of scaled residuals = %g\n"), wbar);
3456 
3457 	    for (j=0; j<m; j++) {
3458 		sigma += (cresid[j] - wbar) * (cresid[j] - wbar);
3459 	    }
3460 	    sigma /= T - k - 1;
3461 	    sigma = sqrt(sigma);
3462 	    pprintf(prn, _("sigmahat                 = %g\n\n"), sigma);
3463 
3464 	    /* height of confidence band for first prediction */
3465 	    a = 0.948 * sqrt((double) m);
3466 	    /* slope of confidence band limit wrt time */
3467 	    b = 2.0 * a / m;
3468 	    if (!quiet) {
3469 		pputs(prn, _("Cumulated sum of scaled residuals"));
3470 	    }
3471 	}
3472 
3473 	pputc(prn, '\n');
3474 	pputs(prn, /* xgettext:no-c-format */
3475 	      _("('*' indicates a value outside of 95% confidence band)"));
3476 	pputs(prn, "\n\n");
3477 
3478 	for (j=0; j<m; j++) {
3479 	    W[j] = 0.0;
3480 	    if (opt & OPT_R) {
3481 		for (i=0; i<=j; i++) {
3482 		    W[j] += cresid[i] * cresid[i] / den;
3483 		}
3484 		sig = fabs(W[j] - (j+1) / (double) m) > a;
3485 	    } else {
3486 		for (i=0; i<=j; i++) {
3487 		    W[j] += cresid[i];
3488 		}
3489 		W[j] /= sigma;
3490 		sig = fabs(W[j]) > a + j * b;
3491 	    }
3492 	    if (!quiet) {
3493 		ntolabel(cumdate, pmod->t1 + k + j, dset);
3494 		pprintf(prn, " %s %9.3f %s\n", cumdate, W[j], sig? "*" : "");
3495 	    }
3496 	}
3497 
3498 	if (!(opt & OPT_R)) {
3499 	    cusum_harvey_collier(wbar, sigma, m, pmod, opt, prn);
3500 	}
3501 
3502 	/* plot with 95% confidence band if wanted */
3503 	if (gnuplot_graph_wanted(PLOT_CUSUM, opt)) {
3504 	    err = cusum_do_graph(a, b, W, pmod->t1, k, m, dset, opt);
3505 	}
3506     }
3507 
3508     /* restore original sample range */
3509     dset->t1 = save_t1;
3510     dset->t2 = save_t2;
3511 
3512     free(cresid);
3513     free(W);
3514 
3515     return err;
3516 }
3517 
3518 /**
3519  * comfac_test:
3520  * @pmod: pointer to original model.
3521  * @dset: dataset struct.
3522  * @opt: if contains %OPT_S, save test results to model.
3523  * @prn: gretl printing struct.
3524  *
3525  * If @pmod was estimated via an AR(1) estimator, run an
3526  * auxiliary regression to test the implied common-factor
3527  * restriction.
3528  *
3529  * Returns: 0 on successful completion, error code on error.
3530  */
3531 
comfac_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3532 int comfac_test (MODEL *pmod, DATASET *dset,
3533 		 gretlopt opt, PRN *prn)
3534 {
3535     MODEL cmod;
3536     int save_t1 = dset->t1;
3537     int save_t2 = dset->t2;
3538     int v = dset->v;
3539     int *biglist = NULL;
3540     int clearit = 0;
3541     int nadd, i, k, t;
3542     int err;
3543 
3544     if (pmod->ci != AR1 || (pmod->opt & OPT_P)) {
3545 	/* can't handle Prais-Winsten? */
3546 	return E_NOTIMP;
3547     }
3548 
3549     /* check for changes in original list members */
3550     err = list_members_replaced(pmod, dset);
3551     if (err) {
3552 	return err;
3553     }
3554 
3555     biglist = gretl_list_copy(pmod->list);
3556     if (biglist == NULL) {
3557 	return E_ALLOC;
3558     }
3559 
3560     nadd = 1 + pmod->ncoeff - pmod->ifc;
3561 
3562     err = dataset_add_series(dset, nadd);
3563     if (err) {
3564 	free(biglist);
3565 	return err;
3566     }
3567 
3568     /* add lags of the dependent variable and all regressors: some of
3569        these may be redundant but we'll just let the redundant terms be
3570        eliminated automatically via gretl's collinearity checking.
3571     */
3572 
3573     k = v;
3574     for (i=1; i<=pmod->list[0]; i++) {
3575 	int src, lag, parent;
3576 
3577 	src = pmod->list[i];
3578 	if (src == 0) {
3579 	    continue;
3580 	}
3581 	for (t=0; t<dset->n; t++) {
3582 	    if (t == 0 || na(dset->Z[src][t-1])) {
3583 		dset->Z[k][t] = NADBL;
3584 	    } else {
3585 		dset->Z[k][t] = dset->Z[src][t-1];
3586 	    }
3587 	}
3588 	biglist = gretl_list_append_term(&biglist, k);
3589 	if (biglist == NULL) {
3590 	    err = E_ALLOC;
3591 	    break;
3592 	}
3593 	lag = series_get_lag(dset, src);
3594 	parent = series_get_parent_id(dset, src);
3595 	if (lag > 0 && parent > 0) {
3596 	    char tmp[16];
3597 
3598 	    sprintf(tmp, "_%d", lag + 1);
3599 	    strcpy(dset->varname[k], dset->varname[parent]);
3600 	    gretl_trunc(dset->varname[k], 15 - strlen(tmp));
3601 	    strcat(dset->varname[k], tmp);
3602 	} else {
3603 	    strcpy(dset->varname[k], dset->varname[src]);
3604 	    gretl_trunc(dset->varname[k], 13);
3605 	    strcat(dset->varname[k], "_1");
3606 	}
3607 	k++;
3608     }
3609 
3610     if (!err) {
3611 	/* re-impose the sample that was in force when the original model
3612 	   was estimated */
3613 	impose_model_smpl(pmod, dset);
3614 	cmod = lsq(biglist, dset, OLS, OPT_A);
3615 	clearit = 1;
3616 	err = cmod.errcode;
3617     }
3618 
3619     if (!err) {
3620 	if (cmod.nobs != pmod->nobs || cmod.ess > pmod->ess || cmod.dfd >= pmod->dfd) {
3621 	    /* something has gone wrong */
3622 	    err = E_DATA;
3623 	}
3624     }
3625 
3626     if (!err) {
3627 	/* construct an F-test based on the SSR from the original
3628 	   AR(1) model and the SSR from the unrestricted model, cmod.
3629 	*/
3630 	int dfd = cmod.dfd;
3631 	int dfn = pmod->dfd - dfd - 1; /* account for rho */
3632 	double SSRr = pmod->ess;
3633 	double SSRu = cmod.ess;
3634 	double Ftest = ((SSRr - SSRu)/dfn) / (SSRu/dfd);
3635 	double pval = snedecor_cdf_comp(dfn, dfd, Ftest);
3636 
3637 	if (!(opt & OPT_I)) {
3638 	    if (!(opt & OPT_Q)) {
3639 		cmod.aux = AUX_COMFAC;
3640 		printmodel(&cmod, dset, OPT_S, prn);
3641 		pputc(prn, '\n');
3642 	    }
3643 
3644 	    pputs(prn, _("Test of common factor restriction"));
3645 	    pputs(prn, "\n\n");
3646 
3647 	    pprintf(prn, "  %s: %s(%d, %d) = %g, ", _("Test statistic"),
3648 		    "F", dfn, dfd, Ftest);
3649 	    pprintf(prn, _("with p-value = %g\n"), pval);
3650 	    pputc(prn, '\n');
3651 	}
3652 
3653 	if (opt & OPT_S) {
3654 	    ModelTest *test;
3655 
3656 	    test = model_test_new(GRETL_TEST_COMFAC);
3657 	    if (test != NULL) {
3658 		model_test_set_teststat(test, GRETL_STAT_F);
3659 		model_test_set_dfn(test, dfn);
3660 		model_test_set_dfd(test, dfd);
3661 		model_test_set_value(test, Ftest);
3662 		model_test_set_pvalue(test, pval);
3663 		maybe_add_test_to_model(pmod, test);
3664 	    }
3665 	}
3666 
3667 	record_test_result(Ftest, pval);
3668     }
3669 
3670     if (clearit) {
3671 	clear_model(&cmod);
3672     }
3673 
3674     /* delete the added variables and restore the original
3675        sample range */
3676 
3677     dataset_drop_last_variables(dset, nadd);
3678     free(biglist);
3679 
3680     dset->t1 = save_t1;
3681     dset->t2 = save_t2;
3682 
3683     return err;
3684 }
3685 
3686 /**
3687  * panel_specification_test:
3688  * @pmod: pointer to model to be tested.
3689  * @dset: dataset struct.
3690  * @opt: option flags.
3691  * @prn: gretl printing struct.
3692  *
3693  * Tests the given pooled model for fixed and random effects.
3694  *
3695  * Returns: 0 on successful completion, error code on error.
3696  */
3697 
panel_specification_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3698 int panel_specification_test (MODEL *pmod, DATASET *dset,
3699 			      gretlopt opt, PRN *prn)
3700 {
3701     if (pmod->ci != OLS || !dataset_is_panel(dset)) {
3702 	pputs(prn, _("This test is only relevant for pooled models\n"));
3703 	return 1;
3704     }
3705 
3706     if (pmod->ifc == 0) {
3707 	pputs(prn, _("This test requires that the model contains a constant\n"));
3708 	return 1;
3709     }
3710 
3711     if (!balanced_panel(dset)) { /* ?? */
3712 	pputs(prn, _("Sorry, can't do this test on an unbalanced panel.\n"
3713 		"You need to have the same number of observations\n"
3714 		"for each cross-sectional unit"));
3715 	return 1;
3716     } else {
3717 	panel_diagnostics(pmod, dset, opt, prn);
3718     }
3719 
3720     return 0;
3721 }
3722 
3723 /* wrapper for backward compatibility */
3724 
panel_hausman_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3725 int panel_hausman_test (MODEL *pmod, DATASET *dset,
3726 			gretlopt opt, PRN *prn)
3727 {
3728     return panel_specification_test(pmod, dset, opt, prn);
3729 }
3730 
3731 /**
3732  * add_leverage_values_to_dataset:
3733  * @dset: dataset struct.
3734  * @m: matrix containing leverage values.
3735  * @opt: OPT_O = overwrite series on save.
3736  * @flags: may include SAVE_LEVERAGE, SAVE_INFLUENCE,
3737  * and/or SAVE_DFFITS.
3738  *
3739  * Adds to the working dataset one or more series calculated by
3740  * the gretl test for leverage/influence of data points.
3741  *
3742  * Returns: 0 on successful completion, error code on error.
3743  */
3744 
add_leverage_values_to_dataset(DATASET * dset,gretl_matrix * m,gretlopt opt,int flags)3745 int add_leverage_values_to_dataset (DATASET *dset, gretl_matrix *m,
3746 				    gretlopt opt, int flags)
3747 {
3748     int overwrite = (opt & OPT_O);
3749     int t1, t2;
3750     int addvars = 0;
3751 
3752     if (flags & SAVE_LEVERAGE) addvars++;
3753     if (flags & SAVE_INFLUENCE) addvars++;
3754     if (flags & SAVE_DFFITS) addvars++;
3755 
3756     if (addvars == 0) {
3757 	return 0;
3758     }
3759 
3760     if (dataset_add_series(dset, addvars)) {
3761 	return E_ALLOC;
3762     }
3763 
3764     t1 = gretl_matrix_get_t1(m);
3765     t2 = t1 + gretl_matrix_rows(m);
3766 
3767     /* add leverage? */
3768     if (flags & SAVE_LEVERAGE) {
3769 	int t, v = dset->v - addvars;
3770 	int j = 0;
3771 
3772 	for (t=0; t<dset->n; t++) {
3773 	    if (t < t1 || t >= t2) {
3774 		dset->Z[v][t] = NADBL;
3775 	    } else {
3776 		dset->Z[v][t] = gretl_matrix_get(m, j++, 0);
3777 	    }
3778 	}
3779 	strcpy(dset->varname[v], "lever");
3780 	if (!overwrite) {
3781 	    make_varname_unique(dset->varname[v], v, dset);
3782 	}
3783 	series_set_label(dset, v, "leverage values");
3784     }
3785 
3786     /* add influence? */
3787     if (flags & SAVE_INFLUENCE) {
3788 	int t, v = dset->v - (addvars - 1);
3789 	int j = 0;
3790 
3791 	for (t=0; t<dset->n; t++) {
3792 	    if (t < t1 || t >= t2) {
3793 		dset->Z[v][t] = NADBL;
3794 	    } else {
3795 		dset->Z[v][t] = gretl_matrix_get(m, j++, 1);
3796 	    }
3797 	}
3798 	strcpy(dset->varname[v], "influ");
3799 	if (!overwrite) {
3800 	    make_varname_unique(dset->varname[v], v, dset);
3801 	}
3802 	series_set_label(dset, v, "influence values");
3803     }
3804 
3805     /* add DFFITS? */
3806     if (flags & SAVE_DFFITS) {
3807 	int t, v = dset->v - (addvars - 2);
3808 	int j = 0;
3809 
3810 	for (t=0; t<dset->n; t++) {
3811 	    double s, h;
3812 
3813 	    if (t < t1 || t >= t2) {
3814 		dset->Z[v][t] = NADBL;
3815 	    } else {
3816 		/* s = studentized residuals */
3817 		h = gretl_matrix_get(m, j, 0);
3818 		s = gretl_matrix_get(m, j, 2);
3819 		if (na(h) || na(s)) {
3820 		    dset->Z[v][t] = NADBL;
3821 		} else {
3822 		    dset->Z[v][t] = s * sqrt(h / (1.0 - h));
3823 		}
3824 		j++;
3825 	    }
3826 	}
3827 	strcpy(dset->varname[v], "dffits");
3828 	if (!overwrite) {
3829 	    make_varname_unique(dset->varname[v], v, dset);
3830 	}
3831 	series_set_label(dset, v, "DFFITS values");
3832     }
3833 
3834     set_dataset_is_changed(dset, 1);
3835 
3836     return 0;
3837 }
3838 
3839 /**
3840  * leverage_test:
3841  * @pmod: pointer to model to be tested.
3842  * @dset: dataset struct.
3843  * @opt: if OPT_S, add calculated series to data set; operate
3844  * silently if OPT_Q. If includes OPT_O, do not adjust save
3845  * names (overwrite any existing series of the same names).
3846  * @prn: gretl printing struct.
3847  *
3848  * Tests the data used in the given model for points with
3849  * high leverage and influence on the estimates.
3850  *
3851  * Returns: 0 on successful completion, error code on error.
3852  */
3853 
leverage_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3854 int leverage_test (MODEL *pmod, DATASET *dset,
3855 		   gretlopt opt, PRN *prn)
3856 {
3857     gretl_matrix *(*model_leverage) (const MODEL *, const DATASET *,
3858 				     gretlopt, PRN *, int *);
3859     gretl_matrix *m;
3860     int err = 0;
3861 
3862     if (pmod->ci != OLS) {
3863 	return E_OLSONLY;
3864     }
3865 
3866     model_leverage = get_plugin_function("model_leverage");
3867     if (model_leverage == NULL) {
3868 	return 1;
3869     }
3870 
3871     m = (*model_leverage)(pmod, dset, opt, prn, &err);
3872 
3873     if (!err && (opt & OPT_S)) {
3874 	/* we got the --save option */
3875 	err = add_leverage_values_to_dataset(dset, m, opt,
3876 					     SAVE_LEVERAGE |
3877 					     SAVE_INFLUENCE|
3878 					     SAVE_DFFITS);
3879     }
3880 
3881     gretl_matrix_free(m);
3882 
3883     return err;
3884 }
3885 
3886 /**
3887  * vif_test:
3888  * @pmod: pointer to model to be tested.
3889  * @dset: dataset struct.
3890  * @opt: may contain OPT_Q for quiet operation.
3891  * @prn: gretl printing struct.
3892  *
3893  * Calculates and displays the Variance Inflation Factors for
3894  * the independent variables in the given model.
3895  *
3896  * Returns: 0 on successful completion, error code on error.
3897  */
3898 
vif_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3899 int vif_test (MODEL *pmod, DATASET *dset,
3900 	      gretlopt opt, PRN *prn)
3901 {
3902     int (*compute_vifs) (MODEL *, DATASET *, gretlopt, PRN *);
3903 
3904     gretl_error_clear();
3905 
3906     compute_vifs = get_plugin_function("compute_vifs");
3907     if (compute_vifs == NULL) {
3908 	return 1;
3909     }
3910 
3911     return (*compute_vifs)(pmod, dset, opt, prn);
3912 }
3913 
3914 /**
3915  * bkw_test:
3916  * @pmod: pointer to model to be tested.
3917  * @dset: dataset struct.
3918  * @opt: may contain OPT_Q for quiet operation.
3919  * @prn: gretl printing struct.
3920  *
3921  * Calculates and displays the Belsley-Kuh-Welsch collinearity
3922  * diagnostics for @pmod.
3923  *
3924  * Returns: 0 on successful completion, error code on error.
3925  */
3926 
bkw_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3927 int bkw_test (MODEL *pmod, DATASET *dset,
3928 	      gretlopt opt, PRN *prn)
3929 {
3930     int (*compute_bkw) (MODEL *, DATASET *, gretlopt, PRN *);
3931 
3932     gretl_error_clear();
3933 
3934     compute_bkw = get_plugin_function("compute_bkw");
3935     if (compute_bkw == NULL) {
3936 	return 1;
3937     }
3938 
3939     return (*compute_bkw)(pmod, dset, opt, prn);
3940 }
3941