1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #include "libgretl.h"
21 #include "libset.h"
22 #include "libglue.h"
23 #include "gretl_panel.h"
24 #include "var.h"
25 #include "system.h"
26 #include "missing_private.h"
27 #include "matrix_extra.h"
28 #include "plotspec.h"
29 #include "tsls.h"
30 #include "uservar.h"
31
32 /**
33 * SECTION:compare
34 * @short_description: diagnostic and specification tests for models
35 * @title: Model tests
36 * @include: libgretl.h
37 *
38 * Included here are several tests for "pathologies" of the error
39 * term in regression models, as well as specification tests
40 * covering nonlinearity and the omission or addition of variables.
41 */
42
43 #define WDEBUG 0
44
45 struct COMPARE {
46 int ci; /* command index: ADD or OMIT */
47 int model_id; /* ID of model under test */
48 int model_ci; /* estimator code for the model under test */
49 int dfn; /* numerator degrees of freedom */
50 int dfd; /* denominator degrees of freedom */
51 double test; /* test statistic */
52 double pval; /* p-value for test statistic */
53 int stat; /* code identifying the test statistic */
54 double F; /* F test statistic */
55 double X2; /* Wald chi-square test statistic */
56 double LR; /* likelihood ratio test statistic */
57 int score; /* number of info stats showing improvement */
58 int robust; /* = 1 when robust vcv is in use, else 0 */
59 const int *testvars; /* list of variables added or omitted */
60 };
61
62 /* Given a list of variables, check them against the independent
63 variables included in a model, and construct a mask with 1s in
64 positions where there is a match, 0s otherwise. If the test list
65 is NULL, match all variables except the constant.
66 */
67
68 static char *
mask_from_test_list(const int * list,const MODEL * pmod,int * err)69 mask_from_test_list (const int *list, const MODEL *pmod, int *err)
70 {
71 char *mask;
72 int off1 = 2, off2 = 0;
73 int cmax = pmod->ncoeff;
74 int nmask = 0;
75 int i, j;
76
77 mask = calloc(pmod->ncoeff, 1);
78 if (mask == NULL) {
79 *err = E_ALLOC;
80 return NULL;
81 }
82
83 if (pmod->ci == DPANEL) {
84 /* find correct offset into independent vars in list */
85 for (i=2; i<=pmod->list[0]; i++) {
86 if (pmod->list[i] == LISTSEP) {
87 off1 = i + 2;
88 }
89 }
90 off2 = pmod->list[1];
91 } else if (pmod->ci == NEGBIN) {
92 cmax -= 1;
93 }
94
95 for (i=0; i<cmax; i++) {
96 if (list != NULL) {
97 for (j=1; j<=list[0]; j++) {
98 if (pmod->list[i + off1] == list[j]) {
99 #if WDEBUG
100 fprintf(stderr, "matched var %d at pmod->list[%d]: "
101 "set mask[%d] = 1\n", list[j], i + off1, i + off2);
102 printlist(list, "test list");
103 printlist(pmod->list, "pmod->list");
104 #endif
105 mask[i + off2] = 1;
106 nmask++;
107 }
108 }
109 } else if (pmod->list[i + off1] != 0) {
110 mask[i + off2] = 1;
111 }
112 }
113
114 if (list != NULL && nmask != list[0]) {
115 fprintf(stderr, "mask from list: list[0] = %d but nmask = %d\n",
116 list[0], nmask);
117 *err = E_DATA;
118 }
119
120 return mask;
121 }
122
123 /* Wald (chi-square and/or F) test for a set of zero restrictions on
124 the parameters of a given model, based on the covariance matrix of
125 the unrestricted model. Suitable for use where the original model
126 is estimated by FGLS or IV. Note that if list is NULL, we do an
127 automatic test, for the significance of all vars but the constant.
128 */
129
130 static int
wald_test(const int * list,MODEL * pmod,double * chisq,double * F)131 wald_test (const int *list, MODEL *pmod, double *chisq, double *F)
132 {
133 char *mask = NULL;
134 gretl_matrix *C = NULL;
135 gretl_vector *b = NULL;
136 double wX = NADBL;
137 double wF = NADBL;
138 int err = 0;
139
140 mask = mask_from_test_list(list, pmod, &err);
141 if (err) {
142 free(mask);
143 return err;
144 }
145
146 if (!err) {
147 C = gretl_vcv_matrix_from_model(pmod, mask, &err);
148 }
149
150 if (!err) {
151 b = gretl_coeff_vector_from_model(pmod, mask, &err);
152 }
153
154 if (!err) {
155 /* use "left division" to explicit inversion of @C */
156 gretl_matrix *tmp, *mx;
157
158 mx = gretl_matrix_alloc(1, 1);
159 tmp = gretl_matrix_divide(C, b, GRETL_MOD_NONE, &err);
160 if (tmp != NULL) {
161 gretl_matrix_multiply_mod(b, GRETL_MOD_TRANSPOSE,
162 tmp, GRETL_MOD_NONE,
163 mx, GRETL_MOD_NONE);
164 wX = mx->val[0];
165 gretl_matrix_free(tmp);
166 gretl_matrix_free(mx);
167 }
168 }
169
170 #if WDEBUG
171 fprintf(stderr, "wX (quadratic form) = %g\n", wX);
172 #endif
173
174 if (!err) {
175 if (wX < 0) {
176 wF = wX = NADBL;
177 } else {
178 wF = wX / gretl_vector_get_length(b);
179 }
180 }
181
182 if (!err) {
183 if (chisq != NULL) {
184 *chisq = wX;
185 }
186 if (F != NULL) {
187 *F = wF;
188 }
189 }
190
191 #if WDEBUG
192 fprintf(stderr, "Wald test: F = %g, Chi^2 = %g\n", wF, wX);
193 #endif
194
195 free(mask);
196 gretl_matrix_free(C);
197 gretl_matrix_free(b);
198
199 return err;
200 }
201
202 /**
203 * wald_omit_F:
204 * @list: list of variables to omit, or NULL.
205 * @pmod: model to be tested.
206 *
207 * Simple form of Wald F-test for omission of variables. If @list
208 * is non-NULL, do the test for the omission of the variables in
209 * @list from the model @pmod. Otherwise test for omission of
210 * all variables in @pmod except for the constant.
211 *
212 * Returns: Calculated F-value, or #NADBL on failure.
213 */
214
wald_omit_F(const int * list,MODEL * pmod)215 double wald_omit_F (const int *list, MODEL *pmod)
216 {
217 double F = NADBL;
218
219 wald_test(list, pmod, NULL, &F);
220 return F;
221 }
222
223 /**
224 * wald_omit_chisq:
225 * @list: list of variables to omit, or NULL.
226 * @pmod: model to be tested.
227 *
228 * Simple form of Wald chi-square for omission of variables. If @list
229 * is non-NULL, do the test for the omission of the variables in
230 * @list from the model @pmod. Otherwise test for omission of
231 * all variables in @pmod except for the constant.
232 *
233 * Returns: Calculated chi-square value, or #NADBL on failure.
234 */
235
wald_omit_chisq(const int * list,MODEL * pmod)236 double wald_omit_chisq (const int *list, MODEL *pmod)
237 {
238 double X = NADBL;
239
240 wald_test(list, pmod, &X, NULL);
241 return X;
242 }
243
add_diffvars_to_test(ModelTest * test,const int * list,const DATASET * dset)244 static int add_diffvars_to_test (ModelTest *test, const int *list,
245 const DATASET *dset)
246 {
247 char *vnames;
248 int i, len = 0;
249 int err = 0;
250
251 for (i=1; i<=list[0]; i++) {
252 len += strlen(dset->varname[list[i]]) + 1;
253 }
254
255 vnames = malloc(len);
256
257 if (vnames == NULL) {
258 err = 1;
259 } else {
260 *vnames = '\0';
261 for (i=1; i<=list[0]; i++) {
262 strcat(vnames, dset->varname[list[i]]);
263 if (i < list[0]) {
264 strcat(vnames, " ");
265 }
266 }
267 model_test_set_allocated_param(test, vnames);
268 }
269
270 return err;
271 }
272
print_add_omit_null(const int * list,const DATASET * dset,gretlopt opt,PRN * prn)273 void print_add_omit_null (const int *list, const DATASET *dset,
274 gretlopt opt, PRN *prn)
275 {
276 int nl = list == NULL ? 0 : list[0];
277
278 if (nl == 1 && !(opt & OPT_S)) {
279 /* single parameter, not in system context */
280 pputs(prn, "\n ");
281 pprintf(prn, _("Null hypothesis: the regression parameter is zero for %s"),
282 dset->varname[list[1]]);
283 pputc(prn, '\n');
284 } else if (nl == 0) {
285 /* VAR omit specials */
286 if ((opt & OPT_E) && (opt & OPT_T)) {
287 pprintf(prn, "\n %s: %s\n", _("Null hypothesis"),
288 _("no seasonal effects or trend"));
289 } else if (opt & OPT_E) {
290 pprintf(prn, "\n %s: %s\n", _("Null hypothesis"),
291 _("no seasonal effects"));
292 } else if (opt & OPT_T) {
293 pprintf(prn, "\n %s: %s\n", _("Null hypothesis"),
294 _("no trend"));
295 }
296 } else {
297 /* all other cases */
298 const char *vname;
299 int i, nc = 0;
300
301 pputs(prn, _("\n Null hypothesis: the regression parameters are "
302 "zero for the variables\n"));
303
304 pputs(prn, " ");
305 for (i=1; i<=list[0]; i++) {
306 vname = dset->varname[list[i]];
307 nc += strlen(vname) + 2;
308 pprintf(prn, "%s", vname);
309 if (i < list[0]) {
310 if (nc > 60) {
311 pputs(prn, ",\n ");
312 nc = 0;
313 } else {
314 pputs(prn, ", ");
315 }
316 }
317 }
318 pputc(prn, '\n');
319 if (opt & OPT_E) {
320 /* seasonals */
321 pprintf(prn, " %s\n", _("seasonal dummies"));
322 }
323 if (opt & OPT_T) {
324 /* trend */
325 pputs(prn, " time\n");
326 }
327 }
328 }
329
330 /* add/omit: are we printing a revised model following the
331 test results? */
332
printing_revised_model(gretlopt opt)333 static int printing_revised_model (gretlopt opt)
334 {
335 if (opt & OPT_Q) {
336 /* --quiet: no */
337 return 0;
338 } else if (opt & (OPT_L | OPT_W)) {
339 /* --lm or --wald test variants: no */
340 return 0;
341 } else {
342 /* then yes */
343 return 1;
344 }
345 }
346
347 /* print the results from an add or omit test */
348
print_compare(struct COMPARE * cmp,const DATASET * dset,gretlopt opt,PRN * prn)349 static void print_compare (struct COMPARE *cmp,
350 const DATASET *dset,
351 gretlopt opt,
352 PRN *prn)
353 {
354 if (opt & OPT_Q) {
355 if (cmp->model_id > 0) {
356 pprintf(prn, _("Test on Model %d:"), cmp->model_id);
357 pputc(prn, '\n');
358 }
359 } else if (cmp->model_id > 0) {
360 if (opt & OPT_A) {
361 /* --auto-omit: add vertical space */
362 pputc(prn, '\n');
363 }
364 pprintf(prn, _("Test on Model %d:"), cmp->model_id);
365 pputc(prn, '\n');
366 }
367
368 print_add_omit_null(cmp->testvars, dset, OPT_NONE, prn);
369
370 if (cmp->stat == GRETL_STAT_WALD_CHISQ) {
371 pprintf(prn, " %s: %s(%d) = %g, %s %g\n", _("Wald test"),
372 _("Chi-square"), cmp->dfn, cmp->test,
373 _("p-value"), cmp->pval);
374 if (!na(cmp->LR)) {
375 /* auxiliary LR test? */
376 double pval = chisq_cdf_comp(cmp->dfn, cmp->LR);
377
378 pprintf(prn, " (%s: %s(%d) = %g, %s %g)\n", _("LR test"),
379 _("Chi-square"), cmp->dfn, cmp->LR,
380 _("p-value"), pval);
381 } else if (!na(cmp->F)) {
382 /* alternate F-form for Wald? */
383 double pval = snedecor_cdf_comp(cmp->dfn, cmp->dfd, cmp->F);
384
385 pprintf(prn, " (%s: F(%d, %d) = %g, %s %g)\n",
386 _("F-form"), cmp->dfn, cmp->dfd, cmp->F,
387 _("p-value"), pval);
388 }
389 } else if (cmp->stat == GRETL_STAT_LR) {
390 /* note: unused */
391 pprintf(prn, " %s: %s(%d) = %g, %s %g\n", _("LR test"),
392 _("Chi-square"), cmp->dfn, cmp->test,
393 _("p-value"), cmp->pval);
394 } else if (cmp->stat == GRETL_STAT_F) {
395 pprintf(prn, " %s: %s(%d, %d) = %g, %s %g\n", _("Test statistic"),
396 (cmp->robust)? _("Robust F") : "F",
397 cmp->dfn, cmp->dfd, cmp->test,
398 _("p-value"), cmp->pval);
399 } else if (cmp->stat == GRETL_STAT_LM) {
400 pprintf(prn, " %s: %s(%d) = %g, %s %g\n", _("LM test"),
401 _("Chi-square"), cmp->dfn, cmp->test,
402 _("p-value"), cmp->pval);
403 }
404
405 if (cmp->score >= 0) {
406 pputs(prn, " ");
407 if (cmp->ci == ADD) {
408 pprintf(prn, _("Adding variables improved %d of %d information "
409 "criteria.\n"), cmp->score, C_MAX);
410 } else {
411 pprintf(prn, _("Omitting variables improved %d of %d information "
412 "criteria.\n"), cmp->score, C_MAX);
413 }
414 }
415
416 if (!printing_revised_model(opt)) {
417 pputc(prn, '\n');
418 }
419 }
420
421 /* try attaching the add or omit test to @pmod */
422
add_omit_attach(struct COMPARE * cmp,MODEL * pmod,const DATASET * dset)423 static void add_omit_attach (struct COMPARE *cmp, MODEL *pmod,
424 const DATASET *dset)
425 {
426 int tcode = (cmp->ci == OMIT)? GRETL_TEST_OMIT : GRETL_TEST_ADD;
427 ModelTest *mtest = model_test_new(tcode);
428
429 if (mtest != NULL) {
430 model_test_set_teststat(mtest, cmp->stat);
431 model_test_set_dfn(mtest, cmp->dfn);
432 if (cmp->stat == GRETL_STAT_F) {
433 model_test_set_dfd(mtest, cmp->dfd);
434 }
435 model_test_set_value(mtest, cmp->test);
436 model_test_set_pvalue(mtest, cmp->pval);
437 add_diffvars_to_test(mtest, cmp->testvars, dset);
438 maybe_add_test_to_model(pmod, mtest);
439 }
440 }
441
442 /* add a record of improvement/degeneration of the
443 model selection criteria, for non-quiet printing:
444 @pmodA is the original model, and @pmodB the
445 restricted (OMIT) or augmented (ADD) variant
446 */
447
maybe_add_info_score(struct COMPARE * cmp,const MODEL * pmodA,const MODEL * pmodB)448 static void maybe_add_info_score (struct COMPARE *cmp,
449 const MODEL *pmodA,
450 const MODEL *pmodB)
451 {
452 if (pmodA != NULL && pmodB != NULL) {
453 int i;
454
455 cmp->score = 0;
456 for (i=0; i<C_MAX; i++) {
457 if (na(pmodB->criterion[i]) || na(pmodA->criterion[i])) {
458 cmp->score = -1;
459 break;
460 } else if (pmodB->criterion[i] < pmodA->criterion[i]) {
461 cmp->score++;
462 }
463 }
464 }
465 }
466
add_or_omit_compare(MODEL * pmodA,MODEL * pmodB,const int * testvars,const DATASET * dset,int ci,gretlopt opt,PRN * prn)467 static int add_or_omit_compare (MODEL *pmodA, MODEL *pmodB,
468 const int *testvars, const DATASET *dset,
469 int ci, gretlopt opt, PRN *prn)
470 {
471 struct COMPARE cmp;
472 MODEL *umod, *rmod;
473 int iv_special = 0;
474 int err = 0;
475
476 cmp.ci = ci;
477 cmp.stat = 0;
478 cmp.test = cmp.pval = NADBL;
479 cmp.F = cmp.X2 = cmp.LR = NADBL;
480 cmp.score = -1;
481 cmp.robust = 0;
482 cmp.dfn = testvars[0];
483 cmp.testvars = testvars;
484 cmp.model_ci = pmodA->ci;
485 cmp.model_id = pmodA->ID;
486
487 if (ci == OMIT) {
488 /* the unrestricted model is the original one, 'A' */
489 umod = pmodA;
490 rmod = pmodB;
491 } else {
492 /* add: the unrestricted model is the new one, 'B' */
493 umod = pmodB;
494 rmod = pmodA;
495 }
496
497 if (opt & OPT_B) {
498 /* ivreg special: A and B have different
499 instruments, not just different regressors
500 */
501 iv_special = 1;
502 cmp.model_id = -1;
503 } else if (opt & OPT_L) {
504 cmp.model_id = -1;
505 } else if (umod != NULL && rmod != NULL) {
506 cmp.dfn = umod->ncoeff - rmod->ncoeff;
507 }
508
509 if (pmodA->opt & OPT_R) {
510 cmp.robust = 1;
511 }
512
513 if (pmodA->ci == DPANEL) {
514 /* FIXME: plus some other cases? */
515 opt |= OPT_X;
516 }
517
518 cmp.dfd = umod->dfd;
519
520 if (opt & OPT_L) {
521 /* "add" with LM option: the auxiliary regression
522 results are in pmodB */
523 cmp.stat = GRETL_STAT_LM;
524 cmp.test = pmodB->nobs * pmodB->rsq;
525 cmp.pval = chisq_cdf_comp(cmp.dfn, cmp.test);
526 } else if (cmp.model_ci == OLS && !cmp.robust &&
527 rmod != NULL && umod != NULL &&
528 rmod->nobs == umod->nobs &&
529 !(opt & OPT_X)) {
530 /* plain OLS with both the restricted and unrestricted
531 models available: base F-test on sums of squared
532 residuals */
533 cmp.stat = GRETL_STAT_F;
534 cmp.test = ((rmod->ess - umod->ess) / umod->ess) *
535 cmp.dfd / cmp.dfn;
536 cmp.pval = snedecor_cdf_comp(cmp.dfn, cmp.dfd, cmp.test);
537 } else if (opt & OPT_X) {
538 /* chi-square form of Wald test is requested */
539 cmp.stat = GRETL_STAT_WALD_CHISQ;
540 err = wald_test(testvars, umod, &cmp.test, &cmp.F);
541 if (!err) {
542 cmp.pval = chisq_cdf_comp(cmp.dfn, cmp.test);
543 }
544 } else {
545 /* F-form of Wald test */
546 cmp.stat = GRETL_STAT_F;
547 err = wald_test(testvars, umod, &cmp.X2, &cmp.test);
548 if (!err) {
549 cmp.pval = snedecor_cdf_comp(cmp.dfn, cmp.dfd, cmp.test);
550 }
551 }
552
553 /* auxiliary LR test? */
554 if (umod != NULL && rmod != NULL && !(opt & OPT_L)) {
555 if (!na(umod->lnL) && !na(rmod->lnL)) {
556 cmp.LR = 2.0 * (umod->lnL - rmod->lnL);
557 }
558 }
559
560 if (!err && (na(cmp.test) || na(cmp.pval))) {
561 err = E_DATA;
562 }
563
564 if (!err) {
565 record_test_result(cmp.test, cmp.pval);
566
567 if (opt & OPT_S) {
568 /* attach test to model */
569 add_omit_attach(&cmp, pmodA, dset);
570 }
571 if (!(opt & OPT_I)) {
572 /* not --silent */
573 if (!(opt & OPT_Q) && cmp.stat != GRETL_STAT_LM &&
574 !iv_special) {
575 maybe_add_info_score(&cmp, pmodA, pmodB);
576 }
577 print_compare(&cmp, dset, opt, prn);
578 }
579 }
580
581 return err;
582 }
583
get_extra_var(const MODEL * pmod)584 static int get_extra_var (const MODEL *pmod)
585 {
586 if (COUNT_MODEL(pmod->ci)) {
587 return gretl_model_get_int(pmod, "offset_var");
588 } else if (pmod->ci == DURATION) {
589 return gretl_model_get_int(pmod, "cens_var");
590 } else {
591 return 0;
592 }
593 }
594
595 /* reconstitute full varlist for WLS, AR, count and
596 duation models */
597
598 static int *
full_model_list(const MODEL * pmod,const int * inlist)599 full_model_list (const MODEL *pmod, const int *inlist)
600 {
601 int *flist = NULL;
602
603 if (pmod->ci == AR) {
604 /* cobble together arlist and @inlist */
605 flist = gretl_lists_join_with_separator(pmod->arinfo->arlist,
606 inlist);
607 } else {
608 int i, len = inlist[0];
609
610 if (pmod->ci == WLS) {
611 /* prepend the weight variable */
612 len += 1;
613 } else if (COUNT_MODEL(pmod->ci) || pmod->ci == DURATION) {
614 /* append list separator and offset or censoring var */
615 len += 2;
616 }
617
618 flist = gretl_list_new(len);
619 if (flist == NULL) {
620 return NULL;
621 }
622
623 if (pmod->ci == WLS) {
624 flist[1] = pmod->nwt;
625 for (i=1; i<=inlist[0]; i++) {
626 flist[i+1] = inlist[i];
627 }
628 } else if (COUNT_MODEL(pmod->ci) || pmod->ci == DURATION) {
629 int extra = get_extra_var(pmod);
630
631 for (i=1; i<=inlist[0]; i++) {
632 flist[i] = inlist[i];
633 }
634 flist[flist[0]-1] = LISTSEP;
635 flist[flist[0]] = extra;
636 }
637 }
638
639 return flist;
640 }
641
retrieve_dpanel_opts(const MODEL * pmod)642 static gretlopt retrieve_dpanel_opts (const MODEL *pmod)
643 {
644 gretlopt opt = OPT_NONE;
645
646 if (pmod->opt & OPT_D) {
647 /* --dpdstyle */
648 opt |= OPT_D;
649 }
650
651 if (pmod->opt & OPT_L) {
652 /* --system (include levels) */
653 opt |= OPT_L;
654 }
655
656 if (gretl_model_get_int(pmod, "asy")) {
657 opt |= OPT_A;
658 }
659
660 if (gretl_model_get_int(pmod, "step") == 2) {
661 opt |=OPT_T;
662 }
663
664 return opt;
665 }
666
obs_diff_ok(const MODEL * m_old,const MODEL * m_new)667 static int obs_diff_ok (const MODEL *m_old, const MODEL *m_new)
668 {
669 int tdiff, ndiff = m_new->nobs - m_old->nobs;
670
671 if (m_old->ci == AR1) {
672 return 0;
673 }
674
675 if (ndiff > 0) {
676 tdiff = (m_new->t2 - m_new->t1) - (m_old->t2 - m_old->t1);
677 if (ndiff == tdiff) {
678 return 1;
679 }
680 }
681
682 return 0;
683 }
684
685 /* This function is used for "add" and "omit", when we are estimating
686 an augmented or reduced version of the original model. It's also
687 used in the special case of calculation of the p-value for the
688 Durbin-Watson statistic, which requires re-estimation of the
689 original specification. In these cases we need to ensure
690 comparability, which means we have to retrieve any relevant options
691 from the original model and re-apply them.
692 */
693
replicate_estimator(const MODEL * orig,int * list,DATASET * dset,gretlopt myopt,PRN * prn)694 static MODEL replicate_estimator (const MODEL *orig, int *list,
695 DATASET *dset, gretlopt myopt,
696 PRN *prn)
697 {
698 MODEL rep;
699 const char *param = NULL;
700 const int *laglist = NULL;
701 int *full_list = NULL;
702 char altparm[32] = {0};
703 int mc = get_model_count();
704 int cv, repci = orig->ci;
705 int order = 0;
706 int first = 1;
707
708 gretl_model_init(&rep, dset);
709
710 /* recreate options and auxiliary vars, if required */
711
712 transcribe_option_flags(&myopt, orig->opt, OPT_D | OPT_J | OPT_R);
713
714 cv = gretl_model_get_cluster_var(orig);
715 if (cv > 0 && cv < dset->v) {
716 myopt |= OPT_C;
717 set_optval_string(orig->ci, OPT_C, dset->varname[cv]);
718 }
719
720 if (orig->ci == AR1) {
721 if (orig->opt & OPT_H) {
722 myopt |= OPT_H;
723 if (gretl_model_get_int(orig, "no-corc")) {
724 myopt |= OPT_B;
725 }
726 } else if (orig->opt & OPT_P) {
727 myopt |= OPT_P;
728 }
729 } else if (orig->ci == WLS || orig->ci == AR || get_extra_var(orig)) {
730 full_list = full_model_list(orig, list);
731 if (full_list == NULL) {
732 rep.errcode = E_ALLOC;
733 } else {
734 list = full_list;
735 }
736 } else if (orig->ci == HSK) {
737 if (gretl_model_get_int(orig, "no-squares")) {
738 myopt |= OPT_N;
739 }
740 } else if (orig->ci == DPANEL) {
741 param = gretl_model_get_data(orig, "istr");
742 laglist = gretl_model_get_list(orig, "ylags");
743 myopt |= retrieve_dpanel_opts(orig);
744 } else if (orig->ci == ARCH) {
745 order = gretl_model_get_int(orig, "arch_order");
746 } else if (orig->ci == LOGIT || orig->ci == PROBIT) {
747 if (orig->opt & OPT_P) {
748 /* p-values, not slopes, selected */
749 myopt |= OPT_P;
750 } else if (gretl_model_get_int(orig, "ordered")) {
751 myopt |= OPT_D;
752 } else if (gretl_model_get_int(orig, "multinom")) {
753 myopt |= OPT_M;
754 } else if (orig->ci == PROBIT && (orig->opt & OPT_E)) {
755 /* random effects */
756 int qp = gretl_model_get_int(orig, "quadpoints");
757
758 myopt |= (OPT_E | OPT_G);
759 set_optval_double(PROBIT, OPT_G, qp);
760 }
761 } else if (orig->ci == PANEL) {
762 if (gretl_model_get_int(orig, "pooled")) {
763 /* pooled OLS */
764 myopt |= OPT_P;
765 } else if (orig->opt & OPT_U) {
766 /* random effects */
767 myopt |= OPT_U;
768 } else if (orig->opt & OPT_H) {
769 /* unit weights */
770 myopt |= OPT_H;
771 if (gretl_model_get_int(orig, "iters")) {
772 myopt |= OPT_I;
773 }
774 }
775 } else if (orig->ci == LAD && gretl_model_get_int(orig, "rq")) {
776 double x;
777
778 x = gretl_model_get_double(orig, "tau");
779 sprintf(altparm, "%g", x);
780 if (gretl_model_get_int(orig, "rq_nid")) {
781 myopt |= OPT_R;
782 }
783 x = gretl_model_get_double(orig, "rq_alpha");
784 if (!na(x)) {
785 myopt |= OPT_I;
786 set_optval_double(QUANTREG, OPT_I, x);
787 }
788 } else if (orig->ci == TOBIT) {
789 double x;
790
791 x = gretl_model_get_double(orig, "llimit");
792 if (!na(x)) {
793 myopt |= OPT_L;
794 set_optval_double(TOBIT, OPT_L, x);
795 }
796 x = gretl_model_get_double(orig, "rlimit");
797 if (!na(x)) {
798 myopt |= OPT_M;
799 set_optval_double(TOBIT, OPT_M, x);
800 }
801 } else if (orig->ci == IVREG) {
802 transcribe_option_flags(&myopt, orig->opt, OPT_L | OPT_G);
803 }
804
805 if (rep.errcode) {
806 goto bailout;
807 }
808
809 try_again:
810
811 switch (orig->ci) {
812
813 case AR:
814 rep = ar_model(list, dset, myopt, NULL);
815 break;
816 case AR1:
817 rep = ar1_model(list, dset, myopt, NULL);
818 break;
819 case DPANEL:
820 rep = dpd_model(list, laglist, param, dset, myopt, prn);
821 break;
822 case ARCH:
823 rep = arch_model(list, order, dset, myopt);
824 break;
825 case LOGIT:
826 case PROBIT:
827 rep = logit_probit(list, dset, orig->ci, myopt, NULL);
828 break;
829 case TOBIT:
830 rep = tobit_driver(list, dset, myopt, NULL);
831 break;
832 case LAD:
833 if (gretl_model_get_int(orig, "rq")) {
834 rep = quantreg_driver(altparm, list, dset, myopt, NULL);
835 } else {
836 rep = lad_model(list, dset, myopt);
837 }
838 break;
839 case POISSON:
840 case NEGBIN:
841 rep = count_model(list, orig->ci, dset, myopt, NULL);
842 break;
843 case DURATION:
844 rep = duration_model(list, dset, myopt, NULL);
845 break;
846 case HECKIT:
847 rep = heckit_model(list, dset, myopt, NULL);
848 break;
849 case IVREG:
850 rep = ivreg(list, dset, myopt);
851 break;
852 case LOGISTIC:
853 {
854 double lmax = gretl_model_get_double(orig, "lmax");
855
856 rep = logistic_model(list, lmax, dset, myopt);
857 }
858 break;
859 case PANEL:
860 rep = panel_model(list, dset, myopt, prn);
861 break;
862 case HSK:
863 rep = hsk_model(list, dset, myopt);
864 break;
865 default:
866 /* handles OLS, WLS, etc. */
867 if (gretl_model_get_int(orig, "pooled")) {
868 myopt |= OPT_P;
869 rep = panel_model(list, dset, myopt, prn);
870 } else {
871 rep = lsq(list, dset, repci, myopt);
872 }
873 break;
874 }
875
876 #if 0
877 fprintf(stderr, "replicate_estimator:\n"
878 " orig: t1=%d, t2=%d, nobs = %d\n"
879 " rep: t1=%d, t2=%d, nobs = %d\n",
880 orig->t1, orig->t2, orig->nobs,
881 rep.t1, rep.t2, rep.nobs);
882 #endif
883
884 /* check that we got the same sample as the original */
885 if (!rep.errcode && rep.nobs != orig->nobs) {
886 if (first && obs_diff_ok(orig, &rep)) {
887 dset->t1 = orig->t1;
888 dset->t2 = orig->t2;
889 clear_model(&rep);
890 first = 0;
891 goto try_again;
892 } else {
893 fprintf(stderr, "Original obs = %d but new = %d\n", orig->nobs, rep.nobs);
894 rep.errcode = E_DATA;
895 }
896 }
897
898 bailout:
899
900 free(full_list);
901
902 /* if the model count went up for an aux regression,
903 bring it back down */
904 if ((myopt & OPT_A) && get_model_count() > mc) {
905 model_count_minus(NULL);
906 }
907
908 return rep;
909 }
910
add_residual_to_dataset(MODEL * pmod,DATASET * dset)911 static int add_residual_to_dataset (MODEL *pmod, DATASET *dset)
912 {
913 int err = 0;
914
915 if (dataset_add_series(dset, 1)) {
916 err = E_ALLOC;
917 } else {
918 int t, v = dset->v - 1;
919
920 for (t=0; t<dset->n; t++) {
921 dset->Z[v][t] = pmod->uhat[t];
922 }
923
924 strcpy(dset->varname[v], "uhat");
925 series_set_label(dset, v, _("residual"));
926 }
927
928 return err;
929 }
930
nonlin_test_header(int code,PRN * prn)931 static void nonlin_test_header (int code, PRN *prn)
932 {
933 pputc(prn, '\n');
934 if (code == AUX_SQ) {
935 pputs(prn, _("Non-linearity test (squared terms)"));
936 } else {
937 pputs(prn, _("Non-linearity test (log terms)"));
938 }
939 pputs(prn, "\n\n");
940 }
941
942 static int
real_nonlinearity_test(MODEL * pmod,int * list,DATASET * dset,int aux_code,gretlopt opt,PRN * prn)943 real_nonlinearity_test (MODEL *pmod, int *list,
944 DATASET *dset, int aux_code,
945 gretlopt opt, PRN *prn)
946 {
947 MODEL aux;
948 int err;
949
950 err = add_residual_to_dataset(pmod, dset);
951 if (err) {
952 return err;
953 }
954
955 /* replace the dependent var */
956 list[1] = dset->v - 1;
957
958 aux = lsq(list, dset, OLS, OPT_A);
959 if (aux.errcode) {
960 err = aux.errcode;
961 fprintf(stderr, "auxiliary regression failed\n");
962 } else {
963 double pval, trsq = aux.rsq * aux.nobs;
964 int df = aux.ncoeff - pmod->ncoeff;
965
966 if (df <= 0) {
967 /* couldn't add any regressors */
968 err = E_SINGULAR;
969 goto bailout;
970 }
971
972 pval = chisq_cdf_comp(df, trsq);
973 aux.aux = aux_code;
974
975 if (!(opt & OPT_I)) {
976 /* OPT_I = --silent */
977 if (opt & OPT_Q) {
978 nonlin_test_header(aux_code, prn);
979 } else {
980 printmodel(&aux, dset, OPT_NONE, prn);
981 pputc(prn, '\n');
982 }
983 pprintf(prn, " %s: TR^2 = %g,\n ", _("Test statistic"), trsq);
984 pprintf(prn, "%s = P(%s(%d) > %g) = %g\n\n",
985 _("with p-value"), _("Chi-square"), df, trsq, pval);
986 }
987
988 if (opt & OPT_S) {
989 ModelTest *test;
990
991 test = model_test_new((aux_code == AUX_SQ)?
992 GRETL_TEST_SQUARES : GRETL_TEST_LOGS);
993 if (test != NULL) {
994 model_test_set_teststat(test, GRETL_STAT_LM);
995 model_test_set_dfn(test, df);
996 model_test_set_value(test, trsq);
997 model_test_set_pvalue(test, chisq_cdf_comp(df, trsq));
998 maybe_add_test_to_model(pmod, test);
999 }
1000 }
1001
1002 record_test_result(trsq, pval);
1003 }
1004
1005 bailout:
1006
1007 clear_model(&aux);
1008
1009 return err;
1010 }
1011
1012 /**
1013 * nonlinearity_test:
1014 * @pmod: pointer to original model.
1015 * @dset: dataset struct.
1016 * @aux: AUX_SQ for squares or AUX_LOG for logs
1017 * @opt: if contains OPT_S, save test results to model; if
1018 * contains OPT_I, run silently.
1019 * @prn: gretl printing struct.
1020 *
1021 * Run an auxiliary regression to test @pmod for nonlinearity,
1022 * via the addition of either squares or logs of the original
1023 * indepdendent variables.
1024 *
1025 * Returns: 0 on successful completion, error code on error.
1026 */
1027
nonlinearity_test(MODEL * pmod,DATASET * dset,ModelAuxCode aux,gretlopt opt,PRN * prn)1028 int nonlinearity_test (MODEL *pmod, DATASET *dset, ModelAuxCode aux,
1029 gretlopt opt, PRN *prn)
1030 {
1031 int save_t1 = dset->t1;
1032 int save_t2 = dset->t2;
1033 int *tmplist = NULL;
1034 const int orig_nvar = dset->v;
1035 int err = 0;
1036
1037 if (!command_ok_for_model(ADD, 0, pmod)) {
1038 return E_NOTIMP;
1039 }
1040
1041 if (pmod->ci == LOGISTIC || pmod->ci == LAD) {
1042 return E_NOTIMP;
1043 }
1044
1045 /* check for changes in original list members */
1046 err = list_members_replaced(pmod, dset);
1047 if (err) {
1048 return err;
1049 }
1050
1051 /* re-impose the sample that was in force when the original model
1052 was estimated */
1053 impose_model_smpl(pmod, dset);
1054
1055 /* add squares or logs */
1056 tmplist = augment_regression_list(pmod->list, aux, dset, &err);
1057 if (err) {
1058 return err;
1059 } else if (tmplist[0] == pmod->list[0]) {
1060 /* no vars were added */
1061 if (aux == AUX_SQ) {
1062 fprintf(stderr, "gretl: generation of squares failed\n");
1063 err = E_SQUARES;
1064 } else if (aux == AUX_LOG) {
1065 fprintf(stderr, "gretl: generation of logs failed\n");
1066 err = E_LOGS;
1067 }
1068 }
1069
1070 if (!err) {
1071 err = real_nonlinearity_test(pmod, tmplist, dset, aux,
1072 opt, prn);
1073 }
1074
1075 /* trash any extra variables generated (squares, logs) */
1076 dataset_drop_last_variables(dset, dset->v - orig_nvar);
1077
1078 /* put back into dset what was there on input */
1079 dset->t1 = save_t1;
1080 dset->t2 = save_t2;
1081
1082 free(tmplist);
1083
1084 return err;
1085 }
1086
add_vars_missing(const MODEL * pmod,const int * addvars,const DATASET * dset)1087 static int add_vars_missing (const MODEL *pmod,
1088 const int *addvars,
1089 const DATASET *dset)
1090 {
1091 int i, vi, t;
1092
1093 for (t=pmod->t1; t<=pmod->t2; t++) {
1094 if (model_missing(pmod, t) ||
1095 (pmod->yhat != NULL && na(pmod->yhat[t]))) {
1096 continue;
1097 }
1098 for (i=1; i<=addvars[0]; i++) {
1099 vi = addvars[i];
1100 if (na(dset->Z[vi][t])) {
1101 fprintf(stderr, "add: obs %d OK in model but missing for series %s\n",
1102 t+1, dset->varname[vi]);
1103 return E_MISSDATA;
1104 }
1105 }
1106 }
1107
1108 return 0;
1109 }
1110
LM_add_test(MODEL * pmod,DATASET * dset,int * list,gretlopt opt,PRN * prn)1111 static MODEL LM_add_test (MODEL *pmod, DATASET *dset, int *list,
1112 gretlopt opt, PRN *prn)
1113 {
1114 MODEL aux;
1115 int err;
1116
1117 err = add_residual_to_dataset(pmod, dset);
1118 if (err) {
1119 gretl_model_init(&aux, dset);
1120 aux.errcode = err;
1121 return aux;
1122 }
1123
1124 list[1] = dset->v - 1;
1125
1126 aux = lsq(list, dset, OLS, OPT_A | OPT_Z);
1127
1128 if (aux.errcode) {
1129 fprintf(stderr, "auxiliary regression failed\n");
1130 } else {
1131 int df = aux.ncoeff - pmod->ncoeff;
1132
1133 if (df <= 0) {
1134 /* couldn't add any regressors */
1135 aux.errcode = E_SINGULAR;
1136 } else {
1137 if (!(opt & (OPT_Q | OPT_I))) {
1138 aux.aux = AUX_ADD;
1139 printmodel(&aux, dset, OPT_S, prn);
1140 }
1141 }
1142 }
1143
1144 return aux;
1145 }
1146
1147 /**
1148 * add_test_full:
1149 * @orig: pointer to original model.
1150 * @pmod: pointer to receive augmented model.
1151 * @addvars: list of variables to add to original model.
1152 * @dset: dataset struct.
1153 * @opt: can contain OPT_Q (quiet) to suppress printing
1154 * of the new model, OPT_O to print its covariance matrix,
1155 * OPT_I for silent operation.
1156 * @prn: gretl printing struct.
1157 *
1158 * Re-estimate a given model after adding the specified
1159 * variables, and records a joint test on the additional
1160 * variables.
1161 *
1162 * Returns: 0 on successful completion, error code on error.
1163 */
1164
add_test_full(MODEL * orig,MODEL * pmod,const int * addvars,DATASET * dset,gretlopt opt,PRN * prn)1165 int add_test_full (MODEL *orig, MODEL *pmod, const int *addvars,
1166 DATASET *dset, gretlopt opt, PRN *prn)
1167 {
1168 MODEL umod;
1169 int save_t1 = dset->t1;
1170 int save_t2 = dset->t2;
1171 int *biglist = NULL;
1172 const int orig_nvar = dset->v;
1173 int n_add = 0;
1174 int err = 0;
1175
1176 if (orig == NULL || orig->list == NULL || addvars == NULL) {
1177 return E_DATA;
1178 }
1179
1180 n_add = addvars[0];
1181 if (n_add == 0) {
1182 return E_NOADD;
1183 }
1184
1185 if (!command_ok_for_model(ADD, opt, orig)) {
1186 return E_NOTIMP;
1187 }
1188
1189 if ((opt & OPT_L) && pmod != NULL) {
1190 /* --lm option incompatible with "full" variant of add test */
1191 return E_BADOPT;
1192 }
1193
1194 if (exact_fit_check(orig, prn)) {
1195 return 0;
1196 }
1197
1198 /* check for changes in original list members */
1199 err = list_members_replaced(orig, dset);
1200 if (err) {
1201 return err;
1202 }
1203
1204 /* check for NAs in add list relative to model */
1205 err = add_vars_missing(orig, addvars, dset);
1206 if (err) {
1207 return err;
1208 }
1209
1210 /* create augmented regression list */
1211 if (orig->ci == IVREG) {
1212 biglist = ivreg_list_add(orig->list, addvars, opt, &err);
1213 } else if (orig->ci == DPANEL) {
1214 biglist = panel_list_add(orig, addvars, &err);
1215 } else {
1216 biglist = gretl_list_add(orig->list, addvars, &err);
1217 }
1218
1219 if (err) {
1220 return err;
1221 }
1222
1223 /* impose the sample range in force when the
1224 original model was estimated
1225 */
1226 impose_model_smpl(orig, dset);
1227
1228 if (opt & OPT_L) {
1229 /* run an LM test */
1230 umod = LM_add_test(orig, dset, biglist, opt, prn);
1231 } else {
1232 /* Run augmented regression, matching the original estimation
1233 method; use OPT_Z to suppress the elimination of perfectly
1234 collinear variables.
1235 */
1236 gretlopt ropt = OPT_Z;
1237
1238 if (opt & (OPT_Q | OPT_I)) {
1239 /* not printing @umod, so pass along OPT_Q */
1240 ropt |= OPT_Q;
1241 }
1242 umod = replicate_estimator(orig, biglist, dset, ropt, prn);
1243 }
1244
1245 if (umod.errcode) {
1246 err = umod.errcode;
1247 errmsg(err, prn);
1248 } else if (umod.ncoeff - orig->ncoeff != n_add) {
1249 gretl_errmsg_sprintf("Failed to add %d variable(s)", n_add);
1250 err = E_DATA;
1251 }
1252
1253 #if 1
1254 if (!err) {
1255 err = add_or_omit_compare(orig, &umod, addvars,
1256 dset, ADD, opt, prn);
1257 }
1258 #else
1259 if (!err) {
1260 int *addlist = gretl_list_diff_new(umod.list, orig->list, 2);
1261
1262 if (addlist != NULL) {
1263 err = add_or_omit_compare(orig, &umod, addlist,
1264 dset, ADD, opt, prn);
1265 free(addlist);
1266 }
1267 }
1268 #endif
1269
1270 if (err || pmod == NULL) {
1271 clear_model(&umod);
1272 } else {
1273 *pmod = umod;
1274 }
1275
1276 /* put dset back as it was on input */
1277 dataset_drop_last_variables(dset, dset->v - orig_nvar);
1278 dset->t1 = save_t1;
1279 dset->t2 = save_t2;
1280
1281 free(biglist);
1282
1283 return err;
1284 }
1285
1286 /**
1287 * add_test:
1288 * @pmod: pointer to model to be tested.
1289 * @addvars: list of variables to test.
1290 * @dset: dataset struct.
1291 * @opt: can contain OPT_Q (quiet) to suppress printing
1292 * of the auxiliary model, OPT_I to suppress all printing
1293 * of results.
1294 * @prn: gretl printing struct.
1295 *
1296 * Performs an LM test on @pmod for the null hypothesis
1297 * that the @addvars variables do not contribute
1298 * significant explanatory power.
1299 *
1300 * Returns: 0 on successful completion, error code on error.
1301 */
1302
add_test(MODEL * pmod,const int * addvars,DATASET * dset,gretlopt opt,PRN * prn)1303 int add_test (MODEL *pmod, const int *addvars,
1304 DATASET *dset, gretlopt opt, PRN *prn)
1305 {
1306 return add_test_full(pmod, NULL, addvars, dset, opt, prn);
1307 }
1308
wald_omit_test(const int * list,MODEL * pmod,const DATASET * dset,gretlopt opt,PRN * prn)1309 static int wald_omit_test (const int *list, MODEL *pmod,
1310 const DATASET *dset, gretlopt opt,
1311 PRN *prn)
1312 {
1313 int *test = NULL;
1314 int err = 0;
1315
1316 /* test validity of omissions */
1317
1318 if (pmod->ci == IVREG) {
1319 test = ivreg_list_omit(pmod->list, list, opt, &err);
1320 } else if (pmod->ci == PANEL || pmod->ci == DPANEL) {
1321 test = panel_list_omit(pmod, list, &err);
1322 } else {
1323 test = gretl_list_omit(pmod->list, list, 2, &err);
1324 }
1325
1326 if (!err) {
1327 free(test);
1328 err = add_or_omit_compare(pmod, NULL, list, dset, OMIT,
1329 opt, prn);
1330 }
1331
1332 return err;
1333 }
1334
1335 /* Check whether coefficient @i corresponds to a variable
1336 that is removable from the model: this is the case if either
1337 (a) the @cands list is empty, or (b) coefficient @i is the
1338 coefficient on one of the variables in @cands.
1339 */
1340
coeff_is_removable(const int * cands,const MODEL * pmod,DATASET * dset,int i)1341 static int coeff_is_removable (const int *cands, const MODEL *pmod,
1342 DATASET *dset, int i)
1343 {
1344 int ret = 1;
1345
1346 if (cands != NULL && cands[0] > 0) {
1347 const char *vname;
1348 int j, pj;
1349
1350 ret = 0; /* reverse the presumption */
1351
1352 for (j=1; j<=cands[0]; j++) {
1353 vname = dset->varname[cands[j]];
1354 pj = gretl_model_get_param_number(pmod, dset, vname);
1355 if (pj == i) {
1356 ret = 1;
1357 break;
1358 }
1359 }
1360 }
1361
1362 return ret;
1363 }
1364
1365 /* Determine if @pmod contains a variable with p-value
1366 greater than some cutoff alpha_max; and if so, remove
1367 this variable from @list.
1368
1369 If the list @cands is non-empty then confine the search
1370 to candidate variables in that list.
1371
1372 Returns 1 if a variable was dropped, else 0.
1373 */
1374
auto_drop_var(const MODEL * pmod,int * list,const int * cands,DATASET * dset,double alpha_max,int starting,gretlopt opt,PRN * prn)1375 static int auto_drop_var (const MODEL *pmod,
1376 int *list, const int *cands,
1377 DATASET *dset, double alpha_max,
1378 int starting, gretlopt opt,
1379 PRN *prn)
1380 {
1381 double tstat, pv = 0.0, tmin = 4.0;
1382 int imin, imax = pmod->ncoeff;
1383 int i, k = -1;
1384 int ret = 0;
1385
1386 if ((pmod->ci == LOGIT || pmod->ci == PROBIT) &&
1387 gretl_model_get_int(pmod, "ordered")) {
1388 /* FIXME other problematic cases? */
1389 imax = pmod->list[0] - 1;
1390 }
1391
1392 /* if the constant is the sole regressors, allow it
1393 to be dropped */
1394 imin = (pmod->ncoeff == 1)? 0 : pmod->ifc;
1395
1396 for (i=imin; i<imax; i++) {
1397 if (coeff_is_removable(cands, pmod, dset, i)) {
1398 tstat = fabs(pmod->coeff[i] / pmod->sderr[i]);
1399 if (tstat < tmin) {
1400 tmin = tstat;
1401 k = i;
1402 }
1403 }
1404 }
1405
1406 if (k >= 0) {
1407 pv = coeff_pval(pmod->ci, tmin, pmod->dfd);
1408 }
1409
1410 if (pv > alpha_max) {
1411 char pname[VNAMELEN];
1412 int err;
1413
1414 if (starting && !(opt & OPT_I)) {
1415 /* not --silent */
1416 pputc(prn, '\n');
1417 pprintf(prn, _("Sequential elimination using two-sided alpha = %.2f"),
1418 alpha_max);
1419 pputs(prn, "\n\n");
1420 }
1421
1422 gretl_model_get_param_name(pmod, dset, k, pname);
1423 err = gretl_list_delete_at_pos(list, k + 2);
1424 if (!err) {
1425 if (!(opt & OPT_I)) {
1426 pprintf(prn, _(" Dropping %-16s (p-value %.3f)\n"), pname, pv);
1427 }
1428 ret = 1;
1429 }
1430 }
1431
1432 return ret;
1433 }
1434
list_copy_values(int * targ,const int * src)1435 static void list_copy_values (int *targ, const int *src)
1436 {
1437 int i;
1438
1439 for (i=0; i<=src[0]; i++) {
1440 targ[i] = src[i];
1441 }
1442 }
1443
1444 /* run a loop in which the least significant variable is dropped
1445 from the regression list, provided its p-value exceeds some
1446 specified cutoff. FIXME this probably still needs work for
1447 estimators other than OLS. If @omitlist is non-empty the
1448 routine is confined to members of the list.
1449 */
1450
auto_omit(MODEL * orig,const int * omitlist,DATASET * dset,gretlopt opt,PRN * prn)1451 static MODEL auto_omit (MODEL *orig, const int *omitlist,
1452 DATASET *dset, gretlopt opt,
1453 PRN *prn)
1454 {
1455 MODEL omod;
1456 double amax;
1457 int *tmplist;
1458 int allgone = 0;
1459 int i, drop;
1460 int err = 0;
1461
1462 gretl_model_init(&omod, dset);
1463
1464 tmplist = gretl_list_copy(orig->list);
1465 if (tmplist == NULL) {
1466 omod.errcode = E_ALLOC;
1467 return omod;
1468 }
1469
1470 amax = get_optval_double(OMIT, OPT_A, &err);
1471 if (err) {
1472 omod.errcode = err;
1473 return omod;
1474 }
1475
1476 if (na(amax) || amax <= 0.0 || amax >= 1.0) {
1477 amax = 0.10;
1478 }
1479
1480 drop = auto_drop_var(orig, tmplist, omitlist, dset, amax,
1481 1, opt, prn);
1482 if (!drop) {
1483 /* nothing was dropped: set a "benign" error code */
1484 err = omod.errcode = E_NOOMIT;
1485 }
1486
1487 for (i=0; drop > 0 && !allgone; i++) {
1488 if (i > 0) {
1489 set_reference_missmask_from_model(orig);
1490 }
1491 omod = replicate_estimator(orig, tmplist, dset, OPT_A, prn);
1492 err = omod.errcode;
1493 if (err) {
1494 fprintf(stderr, "auto_omit: error %d from replicate_estimator\n",
1495 err);
1496 drop = 0; /* will break */
1497 } else {
1498 list_copy_values(tmplist, omod.list);
1499 drop = auto_drop_var(&omod, tmplist, omitlist, dset,
1500 amax, 0, opt, prn);
1501 if (drop && omod.ncoeff == 1) {
1502 allgone = 1; /* will break */
1503 }
1504 clear_model(&omod);
1505 }
1506 }
1507
1508 if (allgone) {
1509 pputc(prn, '\n');
1510 pprintf(prn, _("No coefficient has a p-value less than %g"), amax);
1511 pputc(prn, '\n');
1512 } else if (!err) {
1513 /* re-estimate the final model without the auxiliary flag */
1514 gretlopt ropt = OPT_NONE;
1515
1516 if (opt & (OPT_Q | OPT_I)) {
1517 /* not printing @omod, so pass along OPT_Q */
1518 ropt |= OPT_Q;
1519 }
1520 set_reference_missmask_from_model(orig);
1521 omod = replicate_estimator(orig, tmplist, dset, ropt, prn);
1522 }
1523
1524 free(tmplist);
1525
1526 return omod;
1527 }
1528
1529 /* create reduced list for "omit" test on model, based on
1530 the list of variables to be dropped, @omitvars
1531 */
1532
make_short_list(MODEL * orig,const int * omitvars,gretlopt opt,int ** plist)1533 static int make_short_list (MODEL *orig, const int *omitvars,
1534 gretlopt opt, int **plist)
1535 {
1536 int *list = NULL;
1537 int err = 0;
1538
1539 if (omitvars == NULL || omitvars[0] == 0) {
1540 return E_PARSE;
1541 }
1542
1543 if (orig->ci == IVREG) {
1544 list = ivreg_list_omit(orig->list, omitvars, opt, &err);
1545 } else if (orig->ci == PANEL || orig->ci == DPANEL) {
1546 list = panel_list_omit(orig, omitvars, &err);
1547 } else {
1548 list = gretl_list_omit(orig->list, omitvars, 2, &err);
1549 }
1550
1551 if (list != NULL && list[0] == 1) {
1552 /* only the dependent variable would be left */
1553 err = E_NOVARS;
1554 }
1555
1556 *plist = list;
1557
1558 return err;
1559 }
1560
omit_options_inconsistent(MODEL * pmod,gretlopt opt)1561 static int omit_options_inconsistent (MODEL *pmod, gretlopt opt)
1562 {
1563 if (opt & OPT_B) {
1564 /* 2sls: omitting variable as instrument */
1565 if (opt & (OPT_W | OPT_A)) {
1566 /* can't use Wald method on original VCV,
1567 and can't do --auto */
1568 return 1;
1569 } else if (pmod->ci != IVREG) {
1570 return 1;
1571 }
1572 }
1573
1574 if ((opt & OPT_A) && (opt & OPT_W)) {
1575 /* --auto and --wald options incompatible */
1576 return 1;
1577 }
1578
1579 return 0;
1580 }
1581
omit_test_precheck(MODEL * pmod,gretlopt opt)1582 static int omit_test_precheck (MODEL *pmod, gretlopt opt)
1583 {
1584 int err = 0;
1585
1586 if (pmod == NULL || pmod->list == NULL) {
1587 err = E_DATA;
1588 } else if (!command_ok_for_model(OMIT, 0, pmod)) {
1589 err = E_NOTIMP;
1590 } else if (omit_options_inconsistent(pmod, opt)) {
1591 err = E_BADOPT;
1592 }
1593
1594 return err;
1595 }
1596
1597 /**
1598 * omit_test_full:
1599 * @orig: pointer to original model.
1600 * @pmod: pointer to receive new model, with vars omitted.
1601 * @omitvars: list of variables to omit from original model.
1602 * @dset: dataset struct.
1603 * @opt: can contain OPT_Q (quiet) to suppress printing
1604 * of the new model, OPT_O to print its covariance matrix,
1605 * OPT_I for silent operation; for OPT_A, see below.
1606 * @prn: gretl printing struct.
1607 *
1608 * Re-estimate a given model after removing the variables
1609 * specified in @omitvars. Or if OPT_A is given, proceed
1610 * sequentially, at each step dropping the least significant
1611 * variable provided its p-value is above a certain threshold
1612 * (currently 0.10, two-sided).
1613 *
1614 * Returns: 0 on successful completion, error code on error.
1615 */
1616
omit_test_full(MODEL * orig,MODEL * pmod,const int * omitvars,DATASET * dset,gretlopt opt,PRN * prn)1617 int omit_test_full (MODEL *orig, MODEL *pmod, const int *omitvars,
1618 DATASET *dset, gretlopt opt, PRN *prn)
1619 {
1620 MODEL rmod;
1621 int save_t1 = dset->t1;
1622 int save_t2 = dset->t2;
1623 int *tmplist = NULL;
1624 int err;
1625
1626 err = omit_test_precheck(orig, opt);
1627 if (err) {
1628 return err;
1629 }
1630
1631 /* check that vars to omit have not been redefined */
1632 if ((err = list_members_replaced(orig, dset))) {
1633 return err;
1634 }
1635
1636 if (!(opt & OPT_A)) {
1637 /* not doing auto-omit */
1638 err = make_short_list(orig, omitvars, opt, &tmplist);
1639 if (err) {
1640 free(tmplist);
1641 return err;
1642 }
1643 }
1644
1645 /* impose the sample range used for the original model */
1646 impose_model_smpl(orig, dset);
1647
1648 /* set the mask for missing obs within the sample range, based
1649 on the original model */
1650 set_reference_missmask_from_model(orig);
1651
1652 if (opt & OPT_A) {
1653 rmod = auto_omit(orig, omitvars, dset, opt, prn);
1654 } else {
1655 gretlopt ropt = OPT_NONE;
1656
1657 if (opt & (OPT_Q | OPT_I)) {
1658 /* not printing @rmod, so pass along OPT_Q */
1659 ropt |= OPT_Q;
1660 }
1661 rmod = replicate_estimator(orig, tmplist, dset, ropt, prn);
1662 }
1663
1664 err = rmod.errcode;
1665
1666 if (err) {
1667 if (err == E_NOOMIT && (opt & OPT_I)) {
1668 ; /* --silent: keep quiet */
1669 } else {
1670 errmsg(err, prn);
1671 }
1672 } else {
1673 MODEL *newmod = NULL;
1674 int minpos = 2;
1675 int *omitlist = NULL;
1676
1677 if (orig->ci == DPANEL) {
1678 /* skip AR spec, separator, and dep var */
1679 minpos = 4;
1680 }
1681
1682 if (rmod.list == NULL) {
1683 /* handle the case where sequential elimination
1684 ended up dropping all regressors */
1685 int *minlist = gretl_list_new(1);
1686
1687 minlist[1] = orig->list[1];
1688 omitlist = gretl_list_diff_new(orig->list, minlist, minpos);
1689 free(minlist);
1690 } else {
1691 newmod = &rmod;
1692 omitlist = gretl_list_diff_new(orig->list, rmod.list, minpos);
1693 }
1694 if (omitlist != NULL) {
1695 err = add_or_omit_compare(orig, newmod, omitlist,
1696 dset, OMIT, opt, prn);
1697 free(omitlist);
1698 }
1699 }
1700
1701 if (err || pmod == NULL) {
1702 clear_model(&rmod);
1703 } else {
1704 *pmod = rmod;
1705 }
1706
1707 /* put back into dset what was there on input */
1708 dset->t1 = save_t1;
1709 dset->t2 = save_t2;
1710
1711 free(tmplist);
1712
1713 return err;
1714 }
1715
1716 /**
1717 * omit_test:
1718 * @pmod: pointer to model to be tested.
1719 * @omitvars: list of variables to test.
1720 * @dset: dataset struct.
1721 * @opt: can contain OPT_Q (quiet) to suppress printing
1722 * of results.
1723 * @prn: gretl printing struct.
1724 *
1725 * Performs a Wald test on @pmod for the null hypothesis
1726 * that the @omitvars variables do not contribute
1727 * explanatory power.
1728 *
1729 * Returns: 0 on successful completion, error code on error.
1730 */
1731
omit_test(MODEL * pmod,const int * omitvars,DATASET * dset,gretlopt opt,PRN * prn)1732 int omit_test (MODEL *pmod, const int *omitvars,
1733 DATASET *dset, gretlopt opt, PRN *prn)
1734 {
1735 int err = omit_test_precheck(pmod, opt);
1736
1737 if (!err) {
1738 err = wald_omit_test(omitvars, pmod, dset, opt, prn);
1739 }
1740
1741 return err;
1742 }
1743
1744 /**
1745 * get_DW_pvalue_for_model:
1746 * @pmod: model to be tested.
1747 * @dset: dataset struct.
1748 * @err: location to receive error code.
1749 *
1750 * Computes the p-value for the Durbin-Watson statistic for the
1751 * given model, using the Imhof method.
1752 *
1753 * Returns: the p-value, or #NADBL on error.
1754 */
1755
get_DW_pvalue_for_model(MODEL * pmod,DATASET * dset,int * err)1756 double get_DW_pvalue_for_model (MODEL *pmod, DATASET *dset,
1757 int *err)
1758 {
1759 MODEL dwmod;
1760 int save_t1 = dset->t1;
1761 int save_t2 = dset->t2;
1762 int *list = NULL;
1763 double pv = NADBL;
1764
1765 /* maybe this has already been done? */
1766 pv = gretl_model_get_double(pmod, "dw_pval");
1767 if (!na(pv)) {
1768 return pv;
1769 }
1770
1771 if (pmod->ci == PANEL && panel_DW_pval_ok(pmod)) {
1772 /* Use Bhargava et al approximation */
1773 return BFN_panel_DW_pvalue(pmod, dset, err);
1774 }
1775
1776 if (dset == NULL || dset->Z == NULL) {
1777 *err = E_NODATA;
1778 } else if (pmod == NULL || pmod->list == NULL) {
1779 *err = E_DATA;
1780 } else if ((pmod->ci != OLS && pmod->ci != PANEL) ||
1781 na(pmod->dw) || model_has_missing_obs(pmod)) {
1782 *err = E_BADSTAT;
1783 } else {
1784 /* check that relevant vars have not been redefined */
1785 *err = list_members_replaced(pmod, dset);
1786 }
1787
1788 if (!*err) {
1789 list = gretl_list_copy(pmod->list);
1790 if (list == NULL) {
1791 *err = E_ALLOC;
1792 }
1793 }
1794
1795 if (*err) {
1796 return NADBL;
1797 }
1798
1799 gretl_model_init(&dwmod, dset);
1800
1801 /* impose the sample range used for the original model */
1802 impose_model_smpl(pmod, dset);
1803
1804 dwmod = replicate_estimator(pmod, list, dset, OPT_A | OPT_I, NULL);
1805 *err = dwmod.errcode;
1806
1807 if (!*err) {
1808 pv = gretl_model_get_double(&dwmod, "dw_pval");
1809 if (!na(pv)) {
1810 /* record result on the incoming model */
1811 gretl_model_set_double(pmod, "dw_pval", pv);
1812 }
1813 }
1814
1815 /* put back into dset what was there on input */
1816 dset->t1 = save_t1;
1817 dset->t2 = save_t2;
1818
1819 clear_model(&dwmod);
1820 free(list);
1821
1822 return pv;
1823 }
1824
1825 /**
1826 * reset_test:
1827 * @pmod: pointer to model to be tested.
1828 * @dset: dataset struct.
1829 * @opt: if contains %OPT_S, save test results to model. %OPT_Q
1830 * suppresses the printout of the auxiliary regression. %OPT_R and
1831 * %OPT_C stand for "squares only" and "cubes only", respectively.
1832 * %OPT_I produces silent operation.
1833 * @prn: gretl printing struct.
1834 *
1835 * Carries out and prints Ramsey's RESET test for model specification.
1836 *
1837 * Returns: 0 on successful completion, error code on error.
1838 */
1839
reset_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)1840 int reset_test (MODEL *pmod, DATASET *dset,
1841 gretlopt opt, PRN *prn)
1842 {
1843 int *newlist = NULL;
1844 MODEL aux;
1845 double RF;
1846 int save_t1 = dset->t1;
1847 int save_t2 = dset->t2;
1848 int i, t, orig_v = dset->v;
1849 int addv, use_square, use_cube;
1850 const char *mode;
1851 int err = 0;
1852
1853 if (pmod->ci != OLS) {
1854 return E_OLSONLY;
1855 }
1856
1857 err = incompatible_options(opt, OPT_C | OPT_R);
1858
1859 if (err) {
1860 return err;
1861 }
1862
1863 if (exact_fit_check(pmod, prn)) {
1864 return 0;
1865 }
1866
1867 use_square = !(opt & OPT_C); /* not cubes-only */
1868 use_cube = !(opt & OPT_R); /* not squares-only */
1869
1870 gretl_model_init(&aux, dset);
1871
1872 if (opt & OPT_R) {
1873 addv = 1;
1874 mode = N_("squares only");
1875 } else if (opt & OPT_C) {
1876 addv = 1;
1877 mode = N_("cubes only");
1878 } else {
1879 addv = 2;
1880 mode = N_("squares and cubes");
1881 }
1882
1883 impose_model_smpl(pmod, dset);
1884
1885 if (pmod->ncoeff + addv >= dset->t2 - dset->t1) {
1886 /* can't run aux regression */
1887 err = E_DF;
1888 }
1889
1890 if (!err) {
1891 newlist = gretl_list_new(pmod->list[0] + addv);
1892 if (newlist == NULL) {
1893 err = E_ALLOC;
1894 }
1895 }
1896
1897 if (!err) {
1898 for (i=1; i<=pmod->list[0]; i++) {
1899 newlist[i] = pmod->list[i];
1900 }
1901 if (dataset_add_series(dset, addv)) {
1902 err = E_ALLOC;
1903 }
1904 }
1905
1906 if (!err) {
1907 /* add yhat^2 and/or yhat^3 to data set */
1908 int vs = orig_v;
1909 int vc = (opt & OPT_C)? orig_v : orig_v + 1;
1910 int k = pmod->list[0] + 1;
1911
1912 for (t = pmod->t1; t<=pmod->t2; t++) {
1913 double x = pmod->yhat[t];
1914
1915 if (use_square) {
1916 dset->Z[vs][t] = x * x;
1917 }
1918 if (use_cube) {
1919 dset->Z[vc][t] = x * x * x;
1920 }
1921 }
1922
1923 if (use_square) {
1924 strcpy(dset->varname[vs], "yhat^2");
1925 newlist[k++] = vs;
1926 }
1927 if (use_cube) {
1928 strcpy(dset->varname[vc], "yhat^3");
1929 newlist[k++] = vc;
1930 }
1931 }
1932
1933 if (!err) {
1934 aux = lsq(newlist, dset, OLS, OPT_A);
1935 err = aux.errcode;
1936 if (err) {
1937 errmsg(aux.errcode, prn);
1938 }
1939 }
1940
1941 if (!err) {
1942 int silent = (opt & OPT_I);
1943 double pval;
1944
1945 aux.aux = AUX_RESET;
1946
1947 if (!silent) {
1948 if (!(opt & OPT_Q)) {
1949 printmodel(&aux, dset, OPT_NONE, prn);
1950 } else {
1951 if (!(opt & OPT_G)) {
1952 /* GUI special; see gui2/library.c */
1953 pputc(prn, '\n');
1954 }
1955 pputs(prn, _("RESET test for specification"));
1956 pprintf(prn, " (%s)\n", _(mode));
1957 }
1958 }
1959
1960 RF = ((pmod->ess - aux.ess) / addv) / (aux.ess / aux.dfd);
1961 pval = snedecor_cdf_comp(addv, aux.dfd, RF);
1962
1963 if (!silent) {
1964 pprintf(prn, "%s: F = %f,\n", _("Test statistic"), RF);
1965 pprintf(prn, "%s = P(F(%d,%d) > %g) = %.3g\n", _("with p-value"),
1966 addv, aux.dfd, RF, pval);
1967 pputc(prn, '\n');
1968 }
1969
1970 if (opt & OPT_S) {
1971 ModelTest *test = model_test_new(GRETL_TEST_RESET);
1972 gretlopt topt = OPT_NONE;
1973
1974 if (test != NULL) {
1975 if (opt & OPT_R) {
1976 topt = OPT_R;
1977 } else if (opt & OPT_C) {
1978 topt = OPT_C;
1979 }
1980 model_test_set_teststat(test, GRETL_STAT_RESET);
1981 model_test_set_dfn(test, addv);
1982 model_test_set_dfd(test, aux.dfd);
1983 model_test_set_value(test, RF);
1984 model_test_set_pvalue(test, pval);
1985 model_test_set_opt(test, topt);
1986 maybe_add_test_to_model(pmod, test);
1987 }
1988 }
1989
1990 record_test_result(RF, pval);
1991 }
1992
1993 free(newlist);
1994 dataset_drop_last_variables(dset, addv);
1995 clear_model(&aux);
1996
1997 dset->t1 = save_t1;
1998 dset->t2 = save_t2;
1999
2000 return err;
2001 }
2002
bg_test_header(int order,PRN * prn,int ivreg)2003 static void bg_test_header (int order, PRN *prn, int ivreg)
2004 {
2005 if (ivreg) {
2006 pprintf(prn, "\n%s ", _("Godfrey (1994) test for"));
2007 } else {
2008 pprintf(prn, "\n%s ", _("Breusch-Godfrey test for"));
2009 }
2010
2011 if (order > 1) {
2012 pprintf(prn, "%s %d\n", _("autocorrelation up to order"),
2013 order);
2014 } else {
2015 pprintf(prn, "%s\n", _("first-order autocorrelation"));
2016 }
2017
2018 pputc(prn, '\n');
2019 }
2020
ivreg_autocorr_wald_stat(MODEL * aux,int order,int * err)2021 static double ivreg_autocorr_wald_stat (MODEL *aux, int order, int *err)
2022 {
2023 gretl_vector *b = gretl_vector_alloc(order);
2024 gretl_matrix *V1 = gretl_matrix_alloc(order, order);
2025 gretl_vector *WT = gretl_vector_alloc(1);
2026 gretl_matrix *V0 = NULL;
2027 double x = NADBL;
2028 int i, j, ki, kj;
2029
2030 if (b == NULL || V1 == NULL || WT == NULL) {
2031 *err = E_ALLOC;
2032 } else {
2033 V0 = gretl_model_get_matrix(aux, M_VCV, err);
2034 }
2035
2036 if (!*err) {
2037 ki = aux->ncoeff - order;
2038
2039 for (i=0; i<order; i++) {
2040 x = aux->coeff[ki];
2041 gretl_vector_set(b, i, x);
2042 kj = ki;
2043 for (j=i; j<order; j++) {
2044 x = gretl_matrix_get(V0, ki, kj);
2045 gretl_matrix_set(V1, i, j, x);
2046 gretl_matrix_set(V1, j, i, x);
2047 kj++;
2048 }
2049 ki++;
2050 }
2051 *err = gretl_invert_symmetric_matrix(V1);
2052 }
2053
2054 if (!*err) {
2055 gretl_matrix_qform(b, GRETL_MOD_NONE, V1, WT, GRETL_MOD_NONE);
2056 x = gretl_vector_get(WT, 0) / order;
2057 }
2058
2059 gretl_vector_free(WT);
2060 gretl_vector_free(b);
2061 gretl_matrix_free(V1);
2062 gretl_matrix_free(V0);
2063
2064 return x;
2065 }
2066
2067 /**
2068 * ivreg_autocorr_test:
2069 * @pmod: pointer to model to be tested.
2070 * @order: lag order for test.
2071 * @dset: dataset struct.
2072 * @opt: if flags include OPT_S, save test results to model;
2073 * if OPT_Q, be less verbose.
2074 * @prn: gretl printing struct.
2075 *
2076 * Tests the given IV model for autocorrelation of order equal
2077 * to the specified value, or equal to the frequency of the data if
2078 * the supplied @order is zero, as per Godfrey (1994), "Testing for
2079 * Serial Correlation by Variable Addition in Dynamic Models Estimated
2080 * by Instrumental Variables", RES. Note that none of the
2081 * asymptotically equivalent tests given on page 553 is used
2082 * here. Instead, we estimate the model augmented with lags and then
2083 * perform a Wald-type test. The resulting chi-square statistic is
2084 * divided by its degrees of freedom as a finite-sample adjustment and
2085 * compared to an F distribution.
2086 *
2087 * Returns: 0 on successful completion, error code on error.
2088 */
2089
ivreg_autocorr_test(MODEL * pmod,int order,DATASET * dset,gretlopt opt,PRN * prn)2090 static int ivreg_autocorr_test (MODEL *pmod, int order,
2091 DATASET *dset, gretlopt opt,
2092 PRN *prn)
2093 {
2094 int smpl_t1 = dset->t1;
2095 int smpl_t2 = dset->t2;
2096 int v = dset->v;
2097 int *addlist = NULL;
2098 int *testlist = NULL;
2099 double x = 0, pval = 1;
2100 MODEL aux;
2101 int i, t;
2102 int err = 0;
2103
2104 if (dataset_is_panel(dset)) {
2105 return E_NOTIMP;
2106 }
2107
2108 if (model_has_missing_obs(pmod)) {
2109 return E_MISSDATA;
2110 }
2111
2112 /* impose original sample range */
2113 impose_model_smpl(pmod, dset);
2114
2115 gretl_model_init(&aux, dset);
2116
2117 if (order <= 0) {
2118 order = dset->pd;
2119 }
2120
2121 if (pmod->ncoeff + order >= dset->t2 - dset->t1) {
2122 return E_DF;
2123 }
2124
2125 addlist = gretl_list_new(order);
2126
2127 if (addlist == NULL) {
2128 err = E_ALLOC;
2129 } else {
2130 err = add_residual_to_dataset(pmod, dset);
2131 }
2132
2133 if (!err) {
2134 /* add lags of residual */
2135 for (i=1; i<=order && !err; i++) {
2136 int lnum;
2137
2138 lnum = laggenr(v, i, dset);
2139
2140 if (lnum < 0) {
2141 gretl_errmsg_set(_("lagging uhat failed"));
2142 err = E_LAGS;
2143 } else {
2144 /* set the first entries to 0 for compatibility with Godfrey (1994)
2145 and PcGive (not perfect) */
2146 for (t=smpl_t1; t<smpl_t1+i; t++) {
2147 dset->Z[lnum][t] = 0;
2148 }
2149 addlist[i] = lnum;
2150 }
2151 }
2152 if (!err) {
2153 /* compose augmented regression list */
2154 testlist = ivreg_list_add(pmod->list, addlist, OPT_B, &err);
2155 }
2156 }
2157
2158 if (!err) {
2159 gretlopt ivopt = OPT_A;
2160
2161 transcribe_option_flags(&ivopt, pmod->opt,
2162 OPT_L | OPT_G | OPT_R);
2163 aux = ivreg(testlist, dset, ivopt);
2164 err = aux.errcode;
2165 }
2166
2167 if (!err) {
2168 x = ivreg_autocorr_wald_stat(&aux, order, &err);
2169 }
2170
2171 if (!err) {
2172 aux.aux = AUX_AR;
2173 gretl_model_set_int(&aux, "BG_order", order);
2174 pval = snedecor_cdf_comp(order, aux.nobs - pmod->ncoeff - order, x);
2175
2176 if (opt & OPT_Q) {
2177 bg_test_header(order, prn, 1);
2178 } else {
2179 printmodel(&aux, dset, OPT_S, prn);
2180 }
2181
2182 pputc(prn, '\n');
2183 pprintf(prn, "%s: Pseudo-LMF = %f,\n", _("Test statistic"), x);
2184 pprintf(prn, "%s = P(F(%d,%d) > %g) = %.3g\n", _("with p-value"),
2185 order, aux.nobs - pmod->ncoeff, x, pval);
2186 pputc(prn, '\n');
2187 record_test_result(x / order, pval);
2188
2189 if (opt & OPT_S) {
2190 ModelTest *test = model_test_new(GRETL_TEST_AUTOCORR);
2191
2192 if (test != NULL) {
2193 model_test_set_teststat(test, GRETL_STAT_LMF);
2194 model_test_set_dfn(test, order);
2195 model_test_set_dfd(test, aux.nobs - pmod->ncoeff);
2196 model_test_set_order(test, order);
2197 model_test_set_value(test, x);
2198 model_test_set_pvalue(test, pval);
2199 maybe_add_test_to_model(pmod, test);
2200 }
2201 }
2202 }
2203
2204 free(addlist);
2205 free(testlist);
2206
2207 dataset_drop_last_variables(dset, dset->v - v);
2208 clear_model(&aux);
2209
2210 /* reset sample as it was */
2211 dset->t1 = smpl_t1;
2212 dset->t2 = smpl_t2;
2213
2214 return err;
2215 }
2216
lb_autocorr_test(MODEL * pmod,int order,gretlopt opt,PRN * prn)2217 static int lb_autocorr_test (MODEL *pmod, int order,
2218 gretlopt opt, PRN *prn)
2219 {
2220 double lb, pval = NADBL;
2221 int df, err = 0;
2222
2223 df = order - arma_model_get_n_arma_coeffs(pmod);
2224
2225 if (df < 0) {
2226 gretl_errmsg_set("Insufficient degrees of freedom for test");
2227 return E_DATA;
2228 }
2229
2230 lb = ljung_box(order, pmod->t1, pmod->t2, pmod->uhat, &err);
2231
2232 if (!err) {
2233 pval = chisq_cdf_comp(df, lb);
2234 if (na(pval)) {
2235 err = E_DATA;
2236 }
2237 }
2238
2239 if (err) {
2240 gretl_errmsg_set(_("Error calculating Ljung-Box statistic"));
2241 } else {
2242 pputc(prn, '\n');
2243 pprintf(prn, _("Test for autocorrelation up to order %d"),
2244 order);
2245 pputs(prn, "\n\n");
2246 pprintf(prn, "Ljung-Box Q' = %g,\n", lb);
2247 pprintf(prn, "%s = P(%s(%d) > %g) = %#.4g\n", _("with p-value"),
2248 _("Chi-square"), df, lb, chisq_cdf_comp(df, lb));
2249 pputc(prn, '\n');
2250 record_test_result(lb, pval);
2251 }
2252
2253 if (!err && (opt & OPT_S)) {
2254 ModelTest *test = model_test_new(GRETL_TEST_AUTOCORR);
2255
2256 if (test != NULL) {
2257 model_test_set_teststat(test, GRETL_STAT_LB_CHISQ);
2258 model_test_set_dfn(test, df);
2259 model_test_set_order(test, order);
2260 model_test_set_value(test, lb);
2261 model_test_set_pvalue(test, pval);
2262 maybe_add_test_to_model(pmod, test);
2263 }
2264 }
2265
2266 return err;
2267 }
2268
2269 /**
2270 * regular_autocorr_test:
2271 * @pmod: pointer to model to be tested.
2272 * @order: lag order for test.
2273 * @dset: dataset struct.
2274 * @opt: if flags include OPT_S, save test results to model;
2275 * if OPT_Q, be less verbose; if OPT_I, be silent.
2276 * @prn: gretl printing struct.
2277 *
2278 * Tests the given model for autocorrelation of order equal to
2279 * the specified value, or equal to the frequency of the data if
2280 * the supplied @order is zero. Prints TR^2 and LMF test statistics.
2281 *
2282 * Returns: 0 on successful completion, error code on error.
2283 */
2284
regular_autocorr_test(MODEL * pmod,int order,DATASET * dset,gretlopt opt,PRN * prn)2285 static int regular_autocorr_test (MODEL *pmod, int order, DATASET *dset,
2286 gretlopt opt, PRN *prn)
2287 {
2288 int save_t1 = dset->t1;
2289 int save_t2 = dset->t2;
2290 int *newlist = NULL;
2291 MODEL aux;
2292 double RSSxe, RSSx = pmod->ess;
2293 int i, t, n = dset->n, v = dset->v;
2294 double trsq, LMF, lb, pval = 1.0;
2295 int err = 0;
2296
2297 gretl_model_init(&aux, dset);
2298
2299 if (order <= 0) {
2300 order = dset->pd;
2301 }
2302
2303 if (pmod->ncoeff + order >= pmod->t2 - pmod->t1) {
2304 return E_DF;
2305 }
2306
2307 newlist = gretl_list_new(pmod->list[0] + order);
2308
2309 if (newlist == NULL) {
2310 err = E_ALLOC;
2311 } else {
2312 newlist[0] = pmod->list[0] + order;
2313 for (i=2; i<=pmod->list[0]; i++) {
2314 newlist[i] = pmod->list[i];
2315 }
2316 if (dataset_add_series(dset, 1 + order)) {
2317 err = E_ALLOC;
2318 }
2319 }
2320
2321 if (!err) {
2322 /* add uhat to data set: substitute zeros for
2323 pre-sample values */
2324 for (t=0; t<n; t++) {
2325 if (t < pmod->t1) {
2326 dset->Z[v][t] = 0.0;
2327 } else {
2328 dset->Z[v][t] = pmod->uhat[t];
2329 }
2330 }
2331 strcpy(dset->varname[v], "uhat");
2332 series_set_label(dset, v, _("residual"));
2333 /* then order lags of same */
2334 for (i=1; i<=order; i++) {
2335 int s, lv = v + i;
2336 double ul;
2337
2338 sprintf(dset->varname[lv], "uhat_%d", i);
2339 newlist[pmod->list[0] + i] = lv;
2340 for (t=0; t<dset->n; t++) {
2341 s = t - i;
2342 if (s < 0) {
2343 dset->Z[lv][t] = 0.0;
2344 } else {
2345 ul = dset->Z[v][s];
2346 dset->Z[lv][t] = (na(ul))? 0.0 : ul;
2347 }
2348 }
2349 }
2350 }
2351
2352 /* LMF apparatus: see Kiviet, Review of Economic Studies,
2353 53/2, 1986, equation (5), p. 245.
2354 */
2355
2356 if (!err) {
2357 /* regression on [X~E], using original sample */
2358 impose_model_smpl(pmod, dset);
2359 newlist[1] = v;
2360 aux = lsq(newlist, dset, OLS, OPT_A);
2361 err = aux.errcode;
2362 if (err) {
2363 errmsg(err, prn);
2364 } else {
2365 RSSxe = aux.ess;
2366 }
2367 }
2368
2369 if (!err) {
2370 int dfd = aux.nobs - pmod->ncoeff - order;
2371 int lberr;
2372
2373 aux.aux = AUX_AR;
2374 gretl_model_set_int(&aux, "BG_order", order);
2375 trsq = aux.rsq * aux.nobs;
2376 LMF = ((RSSx - RSSxe) / RSSxe) * dfd / order;
2377 pval = snedecor_cdf_comp(order, dfd, LMF);
2378
2379 if (pmod->aux == AUX_VAR || (opt & OPT_I)) {
2380 ; /* don't print anything here */
2381 } else {
2382 if (opt & OPT_Q) {
2383 bg_test_header(order, prn, 0);
2384 } else {
2385 printmodel(&aux, dset, OPT_NONE, prn);
2386 pputc(prn, '\n');
2387 }
2388 pprintf(prn, "%s: LMF = %f,\n", _("Test statistic"), LMF);
2389 pprintf(prn, "%s = P(F(%d,%d) > %g) = %.3g\n", _("with p-value"),
2390 order, aux.nobs - pmod->ncoeff - order, LMF, pval);
2391
2392 pprintf(prn, "\n%s: TR^2 = %f,\n",
2393 _("Alternative statistic"), trsq);
2394 pprintf(prn, "%s = P(%s(%d) > %g) = %.3g\n\n", _("with p-value"),
2395 _("Chi-square"), order, trsq, chisq_cdf_comp(order, trsq));
2396
2397 lb = ljung_box(order, pmod->t1, pmod->t2, dset->Z[v], &lberr);
2398 if (!na(lb)) {
2399 pprintf(prn, "Ljung-Box Q' = %g,\n", lb);
2400 pprintf(prn, "%s = P(%s(%d) > %g) = %.3g\n", _("with p-value"),
2401 _("Chi-square"), order, lb, chisq_cdf_comp(order, lb));
2402 }
2403 pputc(prn, '\n');
2404 }
2405
2406 record_test_result(LMF, pval);
2407
2408 if (opt & OPT_S) {
2409 /* save the test onto @pmod */
2410 ModelTest *test = model_test_new(GRETL_TEST_AUTOCORR);
2411
2412 if (test != NULL) {
2413 model_test_set_teststat(test, GRETL_STAT_LMF);
2414 model_test_set_dfn(test, order);
2415 model_test_set_dfd(test, aux.nobs - pmod->ncoeff - order);
2416 model_test_set_order(test, order);
2417 model_test_set_value(test, LMF);
2418 model_test_set_pvalue(test, pval);
2419 maybe_add_test_to_model(pmod, test);
2420 }
2421 }
2422 }
2423
2424 free(newlist);
2425 dataset_drop_last_variables(dset, dset->v - v);
2426 clear_model(&aux);
2427
2428 /* reset sample as it was */
2429 dset->t1 = save_t1;
2430 dset->t2 = save_t2;
2431
2432 return err;
2433 }
2434
2435 /**
2436 * autocorr_test:
2437 * @pmod: pointer to model to be tested.
2438 * @order: lag order for test.
2439 * @dset: dataset struct.
2440 * @opt: if flags include OPT_S, save test results to model;
2441 * if OPT_Q, be less verbose; if OPT_I, be silent.
2442 * @prn: gretl printing struct.
2443 *
2444 * Tests the given model for autocorrelation of order equal to
2445 * the specified value, or equal to the frequency of the data if
2446 * the supplied @order is zero.
2447 *
2448 * Returns: 0 on successful completion, error code on error.
2449 */
2450
autocorr_test(MODEL * pmod,int order,DATASET * dset,gretlopt opt,PRN * prn)2451 int autocorr_test (MODEL *pmod, int order, DATASET *dset,
2452 gretlopt opt, PRN *prn)
2453 {
2454 if (pmod->ci == IVREG) {
2455 return ivreg_autocorr_test(pmod, order, dset, opt, prn);
2456 } else if (pmod->ci == ARMA) {
2457 return lb_autocorr_test(pmod, order, opt, prn);
2458 } else if (gretl_is_regular_panel_model(pmod)) {
2459 if (dset->pd < 3) {
2460 /* time series not long enough */
2461 return E_NOTIMP;
2462 } else {
2463 return panel_autocorr_test(pmod, dset, opt, prn);
2464 }
2465 }
2466
2467 if (pmod->ci != OLS && pmod->ci != VAR) {
2468 return E_NOTIMP;
2469 } else if (model_has_missing_obs(pmod)) {
2470 return E_MISSDATA;
2471 }
2472
2473 return regular_autocorr_test(pmod, order, dset, opt, prn);
2474 }
2475
chow_active(int split,const double * x,int t)2476 static int chow_active (int split, const double *x, int t)
2477 {
2478 if (x != NULL) {
2479 return x[t] == 1.0;
2480 } else {
2481 return (t >= split);
2482 }
2483 }
2484
get_break_list(const MODEL * pmod,int ci,int * err)2485 static int *get_break_list (const MODEL *pmod, int ci, int *err)
2486 {
2487 const char *lname = get_optval_string(ci, OPT_L);
2488 int *list = NULL;
2489
2490 if (lname == NULL) {
2491 *err = E_DATA;
2492 } else {
2493 list = get_list_by_name(lname);
2494 }
2495
2496 if (list != NULL) {
2497 if (list[0] == 0) {
2498 *err = E_DATA;
2499 } else {
2500 int i;
2501
2502 for (i=1; i<=list[0] && !*err; i++) {
2503 if (!in_gretl_list(pmod->list, list[i])) {
2504 *err = E_DATA;
2505 } else if (list[i] == 0) {
2506 *err = E_DATA;
2507 }
2508 }
2509 }
2510 }
2511
2512 return list;
2513 }
2514
2515 /* compose list of variables to be used for the Chow test and add
2516 them to the data set */
2517
2518 static int *
make_chow_list(const MODEL * pmod,DATASET * dset,int split,int dumv,int ci,gretlopt opt,int * err)2519 make_chow_list (const MODEL *pmod, DATASET *dset,
2520 int split, int dumv, int ci,
2521 gretlopt opt, int *err)
2522 {
2523 int *chowlist = NULL;
2524 int *brklist = NULL;
2525 int l0 = pmod->list[0];
2526 int ninter = 0, newvars = 0;
2527 int havedum = (dumv > 0);
2528 int i, t, v = dset->v;
2529
2530 if (havedum && in_gretl_list(pmod->list, dumv)) {
2531 gretl_errmsg_sprintf(_("The model already contains %s"),
2532 dset->varname[dumv]);
2533 *err = E_DATA;
2534 return NULL;
2535 }
2536
2537 if (opt & OPT_L) {
2538 /* --limit-to */
2539 brklist = get_break_list(pmod, ci, err);
2540 if (*err) {
2541 return NULL;
2542 } else {
2543 ninter = brklist[0];
2544 }
2545 } else {
2546 /* number of interaction terms */
2547 ninter = pmod->ncoeff - pmod->ifc;
2548 }
2549
2550 newvars = ninter + 1 - havedum;
2551
2552 if (dataset_add_series(dset, newvars)) {
2553 *err = E_ALLOC;
2554 } else {
2555 chowlist = gretl_list_new(pmod->list[0] + ninter + 1);
2556 if (chowlist == NULL) {
2557 *err = E_ALLOC;
2558 }
2559 }
2560
2561 if (!*err) {
2562 const double *cdum = NULL;
2563
2564 for (i=1; i<=l0; i++) {
2565 chowlist[i] = pmod->list[i];
2566 }
2567
2568 if (dumv > 0) {
2569 /* we have a user-supplied dummy var */
2570 cdum = dset->Z[dumv];
2571 } else {
2572 /* generate the split variable */
2573 for (t=0; t<dset->n; t++) {
2574 dset->Z[v][t] = (double) (t >= split);
2575 }
2576 strcpy(dset->varname[v], "splitdum");
2577 series_set_label(dset, v, _("dummy variable for Chow test"));
2578 }
2579
2580 chowlist[l0 + 1] = (dumv > 0)? dumv : v;
2581
2582 /* and the interaction terms */
2583 for (i=0; i<ninter; i++) {
2584 int pv, sv = v + i + 1 - havedum;
2585
2586 if (brklist != NULL) {
2587 pv = brklist[i+1];
2588 } else {
2589 pv = pmod->list[i + 2 + pmod->ifc];
2590 }
2591
2592 for (t=0; t<dset->n; t++) {
2593 if (model_missing(pmod, t)) {
2594 dset->Z[sv][t] = NADBL;
2595 } else if (chow_active(split, cdum, t)) {
2596 dset->Z[sv][t] = dset->Z[pv][t];
2597 } else {
2598 dset->Z[sv][t] = 0.0;
2599 }
2600 }
2601
2602 if (havedum) {
2603 sprintf(dset->varname[sv], "%.2s_", dset->varname[dumv]);
2604 } else {
2605 strcpy(dset->varname[sv], "sd_");
2606 }
2607 strncat(dset->varname[sv], dset->varname[pv], VNAMELEN - 4);
2608 chowlist[l0 + 2 + i] = sv;
2609 }
2610 }
2611
2612 return chowlist;
2613 }
2614
write_plot_x_range(const double * x,int t1,int t2,FILE * fp)2615 static void write_plot_x_range (const double *x, int t1, int t2,
2616 FILE *fp)
2617 {
2618 double xmin0, xmin, xmax0, xmax;
2619 double xrange;
2620
2621 gretl_minmax(t1, t2, x, &xmin0, &xmax0);
2622 xrange = xmax0 - xmin0;
2623 xmin = xmin0 - xrange * .025;
2624 if (xmin0 >= 0.0 && xmin < 0.0) {
2625 xmin = 0.0;
2626 }
2627 xmax = xmax0 + xrange * .025;
2628 fprintf(fp, "set xrange [%.10g:%.10g]\n", xmin, xmax);
2629 }
2630
QLR_graph(const double * testvec,int t1,int t2,const DATASET * dset,int df,int robust)2631 static int QLR_graph (const double *testvec, int t1, int t2,
2632 const DATASET *dset, int df, int robust)
2633 {
2634 const double *x = gretl_plotx(dset, OPT_NONE);
2635 const char *titles[] = {
2636 N_("Chow F-test for break"),
2637 N_("Robust Wald test for break")
2638 };
2639 const char *title;
2640 double (*qlr_critval) (int);
2641 double critval = NADBL;
2642 FILE *fp;
2643 int t, err = 0;
2644
2645 set_effective_plot_ci(QLRTEST);
2646
2647 fp = open_plot_input_file(PLOT_REGULAR, 0, &err);
2648 if (err) {
2649 return err;
2650 }
2651
2652 qlr_critval = get_plugin_function("qlr_critval_15_05");
2653 if (qlr_critval != NULL) {
2654 critval = qlr_critval(df);
2655 }
2656
2657 print_keypos_string(GP_KEY_LEFT_TOP, fp);
2658
2659 if (robust) {
2660 title = titles[1];
2661 } else {
2662 title = titles[0];
2663 if (!na(critval)) {
2664 critval /= df;
2665 }
2666 }
2667
2668 gretl_push_c_numeric_locale();
2669
2670 write_plot_x_range(x, t1, t2, fp);
2671
2672 if (!na(critval)) {
2673 fprintf(fp, "plot \\\n"
2674 "'-' using 1:2 title \"%s\" w lines , \\\n"
2675 "%g title \"%s\" w lines lt 0\n",
2676 _(title), critval, _("5% QLR critical value"));
2677 } else {
2678 fprintf(fp, "plot \\\n"
2679 "'-' using 1:2 title \"%s\" w lines\n", _(title));
2680 }
2681
2682 for (t=t1; t<=t2; t++) {
2683 fprintf(fp, "%g %g\n", x[t], testvec[t-t1]);
2684 }
2685 fputs("e\n", fp);
2686
2687 gretl_pop_c_numeric_locale();
2688
2689 err = finalize_plot_input_file(fp);
2690
2691 set_effective_plot_ci(GNUPLOT);
2692
2693 return err;
2694 }
2695
save_QLR_test(MODEL * pmod,const char * datestr,double X2,double pval,int df)2696 static void save_QLR_test (MODEL *pmod, const char *datestr,
2697 double X2, double pval, int df)
2698 {
2699 ModelTest *test = model_test_new(GRETL_TEST_QLR);
2700
2701 if (test != NULL) {
2702 model_test_set_teststat(test, GRETL_STAT_SUP_WALD);
2703 model_test_set_param(test, datestr);
2704 model_test_set_value(test, X2);
2705 model_test_set_pvalue(test, pval);
2706 model_test_set_dfn(test, df);
2707 maybe_add_test_to_model(pmod, test);
2708 }
2709 }
2710
2711 /* for internal use by the "qlrtest" command */
2712
get_QLR_pval(double test,int df,int k1,int k2,MODEL * pmod)2713 static double get_QLR_pval (double test, int df,
2714 int k1, int k2,
2715 MODEL *pmod)
2716 {
2717 double (*qlr_asy_pvalue) (double, int, double);
2718 double pi_1, pi_2, lam0;
2719 double pval;
2720
2721 qlr_asy_pvalue = get_plugin_function("qlr_asy_pvalue");
2722
2723 if (qlr_asy_pvalue == NULL) {
2724 return NADBL;
2725 }
2726
2727 pi_1 = (k1 - pmod->t1 + 1) / (double) pmod->nobs;
2728 pi_2 = (k2 - pmod->t1 + 1) / (double) pmod->nobs;
2729 lam0 = (pi_2*(1.0 - pi_1)) / (pi_1*(1.0 - pi_2));
2730
2731 pval = (*qlr_asy_pvalue) (test, df, lam0);
2732
2733 #if 0
2734 fprintf(stderr, "k1 = %d, k2 = %d, pi_1 = %g pi_2 = %g\n",
2735 k1, k2, pi_1, pi_2);
2736 fprintf(stderr, "lambda0 = %g, pi0 = %g, test = %g, pval = %g\n",
2737 lam0, 1.0/(1 + sqrt(lam0)), test, pval);
2738 #endif
2739
2740 return pval;
2741 }
2742
2743 /* for use by the qlrpval() function */
2744
QLR_pval(double X2,int df,double p1,double p2)2745 double QLR_pval (double X2, int df, double p1, double p2)
2746 {
2747 double (*qlr_asy_pvalue) (double, int, double);
2748 double lamda, pval;
2749
2750 if (X2 < 0 || df <= 0 || p1 <= 0 || p2 <= p1 || p2 >= 1) {
2751 return NADBL;
2752 }
2753
2754 qlr_asy_pvalue = get_plugin_function("qlr_asy_pvalue");
2755
2756 if (qlr_asy_pvalue == NULL) {
2757 return NADBL;
2758 }
2759
2760 lamda = (p2*(1.0 - p1)) / (p1*(1.0 - p2));
2761 pval = (*qlr_asy_pvalue) (X2, df, lamda);
2762
2763 return pval;
2764 }
2765
QLR_print_result(double test,double pval,const char * datestr,int dfn,int dfd,int robust,PRN * prn)2766 static void QLR_print_result (double test,
2767 double pval,
2768 const char *datestr,
2769 int dfn, int dfd,
2770 int robust,
2771 PRN *prn)
2772 {
2773 pputs(prn, _("Quandt likelihood ratio test for structural break at an "
2774 "unknown point,\nwith 15 percent trimming"));
2775 pputs(prn, ":\n");
2776 if (robust) {
2777 pprintf(prn, _("The maximum Wald test = %g occurs "
2778 "at observation %s"), test, datestr);
2779 } else {
2780 pprintf(prn, _("The maximum F(%d, %d) = %g occurs "
2781 "at observation %s"), dfn, dfd, test, datestr);
2782 test *= dfn;
2783 }
2784 pputc(prn, '\n');
2785
2786 if (!na(pval)) {
2787 pprintf(prn, _("Asymptotic p-value = %.6g for "
2788 "chi-square(%d) = %g"),
2789 pval, dfn, test);
2790 pputc(prn, '\n');
2791 }
2792 pputc(prn, '\n');
2793 }
2794
save_chow_test(MODEL * pmod,const char * chowstr,double test,double pval,int dfn,int dfd,gretlopt opt)2795 static void save_chow_test (MODEL *pmod, const char *chowstr,
2796 double test, double pval,
2797 int dfn, int dfd,
2798 gretlopt opt)
2799 {
2800 int ttype = (opt & OPT_D)? GRETL_TEST_CHOWDUM : GRETL_TEST_CHOW;
2801 ModelTest *mt = model_test_new(ttype);
2802
2803 if (mt != NULL) {
2804 if (dfd == 0) {
2805 model_test_set_teststat(mt, GRETL_STAT_WALD_CHISQ);
2806 } else {
2807 model_test_set_teststat(mt, GRETL_STAT_F);
2808 }
2809 model_test_set_param(mt, chowstr);
2810 model_test_set_value(mt, test);
2811 model_test_set_pvalue(mt, pval);
2812 model_test_set_dfn(mt, dfn);
2813 model_test_set_dfd(mt, dfd);
2814 maybe_add_test_to_model(pmod, mt);
2815 }
2816 }
2817
QLR_plot_wanted(gretlopt opt)2818 static int QLR_plot_wanted (gretlopt opt)
2819 {
2820 /* the default: show a plot only if in interactive mode */
2821 int ret = !gretl_in_batch_mode();
2822
2823 if (opt & OPT_U) {
2824 /* but the default can be overruled by @opt */
2825 const char *plotparm = get_optval_string(QLRTEST, OPT_U);
2826
2827 if (plotparm != NULL) {
2828 if (!strcmp(plotparm, "none")) {
2829 ret = 0;
2830 } else {
2831 ret = 1;
2832 }
2833 }
2834 }
2835
2836 return ret;
2837 }
2838
2839 /*
2840 * real_chow_test:
2841 * @chowparm: sample breakpoint; or ID number of dummy
2842 * variable if given OPT_D; or ignored if given OPT_T.
2843 * @pmod: pointer to model to be tested.
2844 * @dset: dataset information.
2845 * @ci: either CHOW or QLRTEST.
2846 * @opt: if flags include OPT_S, save test results to model;
2847 * if OPT_D included, do the Chow test based on a given dummy
2848 * variable; if OPT_T included, do the QLR test. If OPT_M,
2849 * just save the test on the model, don't set $test and
2850 * $pvalue.
2851 * @prn: gretl printing struct.
2852 *
2853 * Returns: 0 on successful completion, error code on error.
2854 */
2855
real_chow_test(int chowparm,MODEL * pmod,DATASET * dset,int ci,gretlopt opt,PRN * prn)2856 static int real_chow_test (int chowparm, MODEL *pmod, DATASET *dset,
2857 int ci, gretlopt opt, PRN *prn)
2858 {
2859 int save_t1 = dset->t1;
2860 int save_t2 = dset->t2;
2861 int *chowlist = NULL;
2862 int origv = dset->v;
2863 MODEL chow_mod;
2864 int QLR = (opt & OPT_T);
2865 int dumv = 0, split = 0, smax = 0;
2866 int err = 0;
2867
2868 if (pmod->ci != OLS) {
2869 return E_OLSONLY;
2870 }
2871
2872 if (exact_fit_check(pmod, prn)) {
2873 return 0;
2874 }
2875
2876 /* temporarily impose the sample that was in force when the
2877 original model was estimated */
2878 impose_model_smpl(pmod, dset);
2879
2880 gretl_model_init(&chow_mod, dset);
2881
2882 if (QLR) {
2883 /* "15 percent trimming": exactly how this should be
2884 defined is debatable, but the following agrees
2885 with the strucchange package for R, when the
2886 Fstats() function is given "from=0.15, to=0.85".
2887 */
2888 split = pmod->t1 + floor(0.15 * pmod->nobs) - 1;
2889 smax = pmod->t1 + floor(0.85 * pmod->nobs) - 1;
2890 } else if (opt & OPT_D) {
2891 /* Chow, using a predefined dummy */
2892 dumv = chowparm;
2893 } else {
2894 /* Chow, using break observation */
2895 smax = split = chowparm;
2896 }
2897
2898 if (!err) {
2899 chowlist = make_chow_list(pmod, dset, split, dumv, ci,
2900 opt, &err);
2901 }
2902
2903 if (err) {
2904 goto bailout;
2905 }
2906
2907 if (QLR) {
2908 /* Quandt likelihood ratio */
2909 int robust = (pmod->opt & OPT_R);
2910 int do_plot = QLR_plot_wanted(opt);
2911 gretlopt lsqopt = OPT_A;
2912 double test, testmax = 0.0;
2913 double *testvec = NULL;
2914 int *testlist = NULL;
2915 int dfn = 0, dfd = 0;
2916 int nextra = dset->v - origv;
2917 int tmax = 0;
2918 int i, t;
2919
2920 if (do_plot) {
2921 testvec = malloc((smax - split + 1) * sizeof *testvec);
2922 }
2923
2924 if (robust) {
2925 lsqopt |= OPT_R;
2926 testlist = gretl_list_diff_new(chowlist, pmod->list, 2);
2927 if (testlist == NULL) {
2928 err = E_ALLOC;
2929 }
2930 }
2931
2932 for (t=split; t<=smax && !err; t++) {
2933 chow_mod = lsq(chowlist, dset, OLS, lsqopt);
2934 if (chow_mod.errcode) {
2935 err = chow_mod.errcode;
2936 errmsg(err, prn);
2937 break;
2938 }
2939 dfn = chow_mod.ncoeff - pmod->ncoeff;
2940 if (robust) {
2941 test = wald_omit_chisq(testlist, &chow_mod);
2942 #if 0
2943 fprintf(stderr, "%d %g\n", t+1, test);
2944 #endif
2945 } else {
2946 dfd = chow_mod.dfd;
2947 test = (pmod->ess - chow_mod.ess) * dfd / (chow_mod.ess * dfn);
2948 #if 0
2949 fprintf(stderr, "%d %g\n", t+1, test*dfn);
2950 #endif
2951 }
2952 if (test > testmax) {
2953 tmax = t;
2954 testmax = test;
2955 }
2956 if (testvec != NULL) {
2957 testvec[t - split] = test;
2958 }
2959 #if 0
2960 fprintf(stderr, "split at t=%d: F(%d,%d)=%g (X2=%g)\n", t,
2961 dfn, dfd, F, test*dfn);
2962 fprintf(stderr, " pmod->ess = %g, chow_mod.ess = %g\n",
2963 pmod->ess, chow_mod.ess);
2964 #endif
2965 clear_model(&chow_mod);
2966 for (i=0; i<nextra; i++) {
2967 dset->Z[origv+i][t] = 0.0;
2968 }
2969 }
2970
2971 if (!err) {
2972 char datestr[OBSLEN];
2973 double pval, X2;
2974
2975 X2 = robust ? testmax : dfn * testmax;
2976 pval = get_QLR_pval(X2, dfn, split, smax, pmod);
2977 ntolabel(datestr, tmax, dset);
2978
2979 if (!(opt & OPT_Q)) {
2980 QLR_print_result(testmax, pval, datestr, dfn, dfd,
2981 robust, prn);
2982 }
2983 if (!(opt & OPT_M)) {
2984 record_QLR_test_result(X2, pval, tmax + 1);
2985 }
2986 if (opt & (OPT_S | OPT_M)) {
2987 save_QLR_test(pmod, datestr, X2, pval, dfn);
2988 }
2989 if (testvec != NULL) {
2990 QLR_graph(testvec, split, smax, dset, dfn, robust);
2991 }
2992 }
2993
2994 free(testvec);
2995 free(testlist);
2996 } else {
2997 /* regular (or robust) Chow test */
2998 int robust = (pmod->opt & OPT_R);
2999 gretlopt lsqopt = OPT_A;
3000 int *testlist = NULL;
3001
3002 if (robust) {
3003 lsqopt |= OPT_R;
3004 }
3005 chow_mod = lsq(chowlist, dset, OLS, lsqopt);
3006
3007 if (chow_mod.errcode) {
3008 err = chow_mod.errcode;
3009 errmsg(err, prn);
3010 } else if (chow_mod.ncoeff <= pmod->ncoeff) {
3011 err = chow_mod.errcode = E_DATA;
3012 errmsg(err, prn);
3013 } else {
3014 int dfd = (robust)? 0 : chow_mod.dfd;
3015 int dfn = chow_mod.ncoeff - pmod->ncoeff;
3016 double test, pval = NADBL;
3017
3018 if (!(opt & OPT_Q)) {
3019 chow_mod.aux = AUX_CHOW;
3020 printmodel(&chow_mod, dset, OPT_NONE, prn);
3021 }
3022
3023 if (robust) {
3024 testlist = gretl_list_diff_new(chow_mod.list, pmod->list, 2);
3025 if (testlist == NULL) {
3026 err = E_ALLOC;
3027 goto bailout;
3028 } else {
3029 test = wald_omit_chisq(testlist, &chow_mod);
3030 }
3031 if (!na(test)) {
3032 pval = chisq_cdf_comp(dfn, test);
3033 }
3034 } else {
3035 test = (pmod->ess - chow_mod.ess) * dfd / (chow_mod.ess * dfn);
3036 if (!na(test)) {
3037 pval = snedecor_cdf_comp(dfn, dfd, test);
3038 }
3039 }
3040
3041 if (na(test)) {
3042 pputs(prn, "Couldn't compute Chow test statistic\n");
3043 } else if (na(pval)) {
3044 pprintf(prn, "Couldn't compute Chow test p-value (test = %g)\n", test);
3045 }
3046
3047 if (!na(test) && !na(pval)) {
3048 char chowstr[VNAMELEN];
3049
3050 if (opt & OPT_Q) {
3051 pputc(prn, '\n');
3052 }
3053 if (opt & OPT_D) {
3054 strcpy(chowstr, dset->varname[chowparm]);
3055 pprintf(prn, _("Chow test for structural difference with respect to %s"),
3056 chowstr);
3057 } else {
3058 ntolabel(chowstr, chowparm, dset);
3059 pprintf(prn, _("Chow test for structural break at observation %s"),
3060 chowstr);
3061 }
3062 pputc(prn, '\n');
3063
3064 if (robust) {
3065 pprintf(prn, " %s(%d) = %g %s %.4f\n", _("Chi-square"),
3066 dfn, test, _("with p-value"), pval);
3067 pprintf(prn, " %s: F(%d, %d) = %g %s %.4f\n\n", _("F-form"),
3068 dfn, chow_mod.dfd, test / dfn, _("with p-value"),
3069 snedecor_cdf_comp(dfn, chow_mod.dfd, test / dfn));
3070 } else {
3071 pprintf(prn, " F(%d, %d) = %g %s %.4f\n\n",
3072 dfn, dfd, test, _("with p-value"), pval);
3073 }
3074
3075 if (opt & OPT_S) {
3076 save_chow_test(pmod, chowstr, test, pval, dfn, dfd, opt);
3077 }
3078
3079 record_test_result(test, pval);
3080 }
3081 }
3082 clear_model(&chow_mod);
3083 free(testlist);
3084 }
3085
3086 bailout:
3087
3088 /* clean up extra variables */
3089 dataset_drop_last_variables(dset, dset->v - origv);
3090 free(chowlist);
3091
3092 dset->t1 = save_t1;
3093 dset->t2 = save_t2;
3094
3095 return err;
3096 }
3097
3098 /**
3099 * chow_test:
3100 * @splitobs: the 0-based observation number at which to split
3101 * the sample.
3102 * @pmod: pointer to model to be tested.
3103 * @dset: dataset struct.
3104 * @opt: if flags include OPT_S, save test results to model.
3105 * @prn: gretl printing struct.
3106 *
3107 * Tests the given model for structural stability (Chow test)
3108 * using the sample break-point given by @splitobs
3109 * and prints the results to @prn. (The second portion of
3110 * the sample runs from observation @splitobs to the
3111 * end of the original sample range.)
3112 *
3113 * Returns: 0 on successful completion, error code on error.
3114 */
3115
chow_test(int splitobs,MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3116 int chow_test (int splitobs, MODEL *pmod, DATASET *dset,
3117 gretlopt opt, PRN *prn)
3118 {
3119 int err = 0;
3120
3121 if (splitobs <= 0 || splitobs >= dset->n) {
3122 gretl_errmsg_set(_("Invalid sample split for Chow test"));
3123 err = E_DATA;
3124 } else {
3125 err = real_chow_test(splitobs, pmod, dset, CHOW, opt, prn);
3126 }
3127
3128 return err;
3129 }
3130
3131 /**
3132 * chow_test_from_dummy:
3133 * @splitvar: the ID number of a dummy variable that should
3134 * be used to divide the sample.
3135 * @pmod: pointer to model to be tested.
3136 * @dset: dataset struct.
3137 * @opt: if flags include OPT_S, save test results to model.
3138 * @prn: gretl printing struct.
3139 *
3140 * Tests the given model for structural stability (Chow test),
3141 * using the dummy variable specified by @splitvar to divide the
3142 * original sample range of @pmod into two portions.
3143 *
3144 * Returns: 0 on successful completion, error code on error.
3145 */
3146
chow_test_from_dummy(int splitvar,MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3147 int chow_test_from_dummy (int splitvar, MODEL *pmod, DATASET *dset,
3148 gretlopt opt, PRN *prn)
3149 {
3150 int err = 0;
3151
3152 if (splitvar <= 0 || splitvar >= dset->v) {
3153 gretl_errmsg_set(_("Invalid sample split for Chow test"));
3154 err = E_DATA;
3155 } else {
3156 err = real_chow_test(splitvar, pmod, dset, CHOW,
3157 opt | OPT_D, prn);
3158 }
3159
3160 return err;
3161 }
3162
3163 /**
3164 * QLR_test:
3165 * @pmod: pointer to model to be tested.
3166 * @dset: dataset struct.
3167 * @opt: if flags include OPT_S, save test results to model.
3168 * @prn: gretl printing struct.
3169 *
3170 * Tests the given model for structural stability via the
3171 * Quandt Likelihood Ratio test for a structural break at
3172 * an unknown point in the sample range.
3173 *
3174 * Returns: 0 on successful completion, error code on error.
3175 */
3176
QLR_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3177 int QLR_test (MODEL *pmod, DATASET *dset,
3178 gretlopt opt, PRN *prn)
3179 {
3180 return real_chow_test(0, pmod, dset, QLRTEST,
3181 opt | OPT_T, prn);
3182 }
3183
3184 /* compute v'Mv, for symmetric M */
3185
vprime_M_v(double * v,double * M,int n)3186 static double vprime_M_v (double *v, double *M, int n)
3187 {
3188 int i, j, jmin, jmax, k;
3189 double xx, val = 0.0;
3190
3191 k = jmin = 0;
3192 for (i=0; i<n; i++) {
3193 xx = 0.0;
3194 for (j=jmin; j<n; j++) {
3195 xx += v[j] * M[k++];
3196 }
3197 val += xx * v[i];
3198 jmin++;
3199 }
3200
3201 jmax = 1;
3202 for (i=1; i<n; i++) {
3203 k = i;
3204 xx = 0.0;
3205 for (j=0; j<jmax; j++) {
3206 xx += v[j] * M[k];
3207 k += n - j - 1;
3208 }
3209 val += xx * v[i];
3210 jmax++;
3211 }
3212
3213 return val;
3214 }
3215
3216 /* Compute scaled recursive residuals and cumulate \bar{w} via a
3217 sequence of OLS regressions. See William Greene, Econometric
3218 Analysis 5e, pp. 135-136.
3219 */
3220
cusum_compute(MODEL * pmod,double * cresid,int T,int k,double * wbar,DATASET * dset)3221 static int cusum_compute (MODEL *pmod, double *cresid, int T, int k,
3222 double *wbar, DATASET *dset)
3223 {
3224 MODEL cmod;
3225 gretlopt opt = OPT_X | OPT_A;
3226 double yhat, den, *xt;
3227 int n = T - k;
3228 int i, j, t;
3229 int err = 0;
3230
3231 /* vector of regressors at t */
3232 xt = malloc(k * sizeof *xt);
3233 if (xt == NULL) {
3234 return E_ALLOC;
3235 }
3236
3237 /* set minimal sample based on model to be tested */
3238 dset->t1 = pmod->t1;
3239 dset->t2 = pmod->t1 + k - 1;
3240
3241 for (j=0; j<n && !err; j++) {
3242 cmod = lsq(pmod->list, dset, OLS, opt);
3243 if (cmod.errcode) {
3244 err = cmod.errcode;
3245 } else {
3246 /* compute one step ahead prediction error: \hat{y}_t
3247 based on estimates through t - 1
3248 */
3249 t = dset->t2 + 1;
3250 yhat = 0.0;
3251 for (i=0; i<cmod.ncoeff; i++) {
3252 xt[i] = dset->Z[cmod.list[i+2]][t];
3253 yhat += cmod.coeff[i] * xt[i];
3254 }
3255 cresid[j] = dset->Z[pmod->list[1]][t] - yhat;
3256 cmod.ci = CUSUM;
3257 err = makevcv(&cmod, 1.0); /* (X'X)^{-1} */
3258 if (!err) {
3259 /* compute scaled residual: divide by
3260 \sqrt{1 + x_t'(X_{t-1}'X_{t-1})^{-1} x_t}
3261 */
3262 den = vprime_M_v(xt, cmod.vcv, cmod.ncoeff);
3263 cresid[j] /= sqrt(1.0 + den);
3264 *wbar += cresid[j];
3265 /* extend the sample by one observation */
3266 dset->t2 += 1;
3267 }
3268 }
3269 clear_model(&cmod);
3270 }
3271
3272 free(xt);
3273
3274 return err;
3275 }
3276
3277 #define okfreq(p) (p == 1 || p == 4 || p == 12 || p == 24 || p == 52)
3278
cusum_do_graph(double a,double b,const double * W,int t1,int k,int m,DATASET * dset,gretlopt opt)3279 static int cusum_do_graph (double a, double b, const double *W,
3280 int t1, int k, int m,
3281 DATASET *dset, gretlopt opt)
3282 {
3283 FILE *fp = NULL;
3284 const double *obs = NULL;
3285 double frac = 1.0;
3286 double x0 = 0.0;
3287 int j, t, err = 0;
3288
3289 fp = open_plot_input_file(PLOT_CUSUM, 0, &err);
3290 if (err) {
3291 return err;
3292 }
3293
3294 if (dataset_is_time_series(dset) && okfreq(dset->pd)) {
3295 b *= dset->pd;
3296 frac /= dset->pd;
3297 obs = gretl_plotx(dset, OPT_NONE);
3298 if (obs != NULL) {
3299 x0 = obs[t1 + k];
3300 }
3301 }
3302
3303 gretl_push_c_numeric_locale();
3304
3305 fprintf(fp, "set xlabel '%s'\n", _("Observation"));
3306 fputs("set nokey\n", fp);
3307
3308 if (opt & OPT_R) {
3309 fprintf(fp, "set title '%s'\n",
3310 /* xgettext:no-c-format */
3311 _("CUSUMSQ plot with 95% confidence band"));
3312 fprintf(fp, "plot \\\n%g*(x-%g) title '' w dots lt 2, \\\n", b, x0 - frac);
3313 fprintf(fp, "%g+%g*(x-%g) title '' w lines lt 2, \\\n", -a, b, x0 - frac);
3314 fprintf(fp, "%g+%g*(x-%g) title '' w lines lt 2, \\\n", a, b, x0 - frac);
3315 } else {
3316 fputs("set xzeroaxis\n", fp);
3317 fprintf(fp, "set title '%s'\n",
3318 /* xgettext:no-c-format */
3319 _("CUSUM plot with 95% confidence band"));
3320 fprintf(fp, "plot \\\n%g+%g*(x-%g) title '' w lines lt 2, \\\n", a, b, x0);
3321 fprintf(fp, "%g-%g*(x-%g) title '' w lines lt 2, \\\n", -a, b, x0);
3322 }
3323
3324 fputs("'-' using 1:2 w linespoints lt 1\n", fp);
3325
3326 for (j=0; j<m; j++) {
3327 t = t1 + k + j;
3328 if (obs != NULL) {
3329 fprintf(fp, "%g %g\n", obs[t], W[j]);
3330 } else {
3331 fprintf(fp, "%d %g\n", t, W[j]);
3332 }
3333 }
3334
3335 fputs("e\n", fp);
3336
3337 gretl_pop_c_numeric_locale();
3338
3339 return finalize_plot_input_file(fp);
3340 }
3341
cusum_harvey_collier(double wbar,double sigma,int m,MODEL * pmod,gretlopt opt,PRN * prn)3342 static void cusum_harvey_collier (double wbar, double sigma, int m,
3343 MODEL *pmod, gretlopt opt,
3344 PRN *prn)
3345 {
3346 double hct, pval;
3347
3348 hct = (sqrt((double) m) * wbar) / sigma;
3349 pval = student_pvalue_2(m - 1, hct);
3350 pprintf(prn, _("\nHarvey-Collier t(%d) = %g with p-value %.4g\n\n"),
3351 m - 1, hct, pval);
3352
3353 if (opt & OPT_S) {
3354 ModelTest *test = model_test_new(GRETL_TEST_CUSUM);
3355
3356 if (test != NULL) {
3357 model_test_set_teststat(test, GRETL_STAT_HARVEY_COLLIER);
3358 model_test_set_dfn(test, m - 1);
3359 model_test_set_value(test, hct);
3360 model_test_set_pvalue(test, pval);
3361 maybe_add_test_to_model(pmod, test);
3362 }
3363 }
3364
3365 record_test_result(hct, pval);
3366 }
3367
3368 /**
3369 * cusum_test:
3370 * @pmod: pointer to model to be tested.
3371 * @dset: dataset struct.
3372 * @opt: if flags include %OPT_S, save results of test to model.
3373 * @prn: gretl printing struct.
3374 *
3375 * Tests the given model for parameter stability via the CUSUM test,
3376 * or if @opt includes %OPT_R, via the CUSUMSQ test; %OPT_Q makes
3377 * the test quiet; %OPT_U governs the associated plot, if wanted.
3378 *
3379 * Returns: 0 on successful completion, error code on error.
3380 */
3381
cusum_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3382 int cusum_test (MODEL *pmod, DATASET *dset,
3383 gretlopt opt, PRN *prn)
3384 {
3385 int save_t1 = dset->t1;
3386 int save_t2 = dset->t2;
3387 int T = pmod->nobs;
3388 int k = pmod->ncoeff;
3389 char cumdate[OBSLEN];
3390 double wbar = 0.0;
3391 double *cresid = NULL, *W = NULL;
3392 int quiet = opt & OPT_Q;
3393 int m, i, j;
3394 int err = 0;
3395
3396 if (pmod->ci != OLS) {
3397 return E_OLSONLY;
3398 }
3399
3400 if (exact_fit_check(pmod, prn)) {
3401 return 0;
3402 }
3403
3404 if (model_has_missing_obs(pmod)) {
3405 return E_MISSDATA;
3406 }
3407
3408 /* the number of forecasts */
3409 m = T - k;
3410
3411 cresid = malloc(m * sizeof *cresid);
3412 W = malloc(m * sizeof *W);
3413
3414 if (cresid == NULL || W == NULL) {
3415 err = E_ALLOC;
3416 }
3417
3418 if (!err) {
3419 err = cusum_compute(pmod, cresid, T, k, &wbar, dset);
3420 if (err) {
3421 errmsg(err, prn);
3422 }
3423 }
3424
3425 if (!err) {
3426 double a, b, den = 0.0, sigma = 0.0;
3427 int sig;
3428
3429 if (opt & OPT_R) {
3430 double a1, a2, a3;
3431 double n = 0.5 * m - 1;
3432
3433 pprintf(prn, "\n%s\n\n", _("CUSUMSQ test for stability of parameters"));
3434
3435 for (j=0; j<m; j++) {
3436 den += cresid[j] * cresid[j];
3437 }
3438
3439 /* see Edgerton and Wells, Oxford Bulletin of Economics and
3440 Statistics, 56, 1994, pp. 355-365 */
3441 a1 = 1.358015 / sqrt(n);
3442 a2 = -0.6701218 / n;
3443 a3 = -0.8858694 / pow(n, 1.5);
3444
3445 /* 0.5 * total height of band */
3446 a = a1 + a2 + a3;
3447 /* slope of expectation wrt time */
3448 b = 1.0 / m;
3449 if (!quiet) {
3450 pputs(prn, _("Cumulated sum of squared residuals"));
3451 }
3452 } else {
3453 wbar /= T - k;
3454 pprintf(prn, "\n%s\n\n", _("CUSUM test for stability of parameters"));
3455 pprintf(prn, _("mean of scaled residuals = %g\n"), wbar);
3456
3457 for (j=0; j<m; j++) {
3458 sigma += (cresid[j] - wbar) * (cresid[j] - wbar);
3459 }
3460 sigma /= T - k - 1;
3461 sigma = sqrt(sigma);
3462 pprintf(prn, _("sigmahat = %g\n\n"), sigma);
3463
3464 /* height of confidence band for first prediction */
3465 a = 0.948 * sqrt((double) m);
3466 /* slope of confidence band limit wrt time */
3467 b = 2.0 * a / m;
3468 if (!quiet) {
3469 pputs(prn, _("Cumulated sum of scaled residuals"));
3470 }
3471 }
3472
3473 pputc(prn, '\n');
3474 pputs(prn, /* xgettext:no-c-format */
3475 _("('*' indicates a value outside of 95% confidence band)"));
3476 pputs(prn, "\n\n");
3477
3478 for (j=0; j<m; j++) {
3479 W[j] = 0.0;
3480 if (opt & OPT_R) {
3481 for (i=0; i<=j; i++) {
3482 W[j] += cresid[i] * cresid[i] / den;
3483 }
3484 sig = fabs(W[j] - (j+1) / (double) m) > a;
3485 } else {
3486 for (i=0; i<=j; i++) {
3487 W[j] += cresid[i];
3488 }
3489 W[j] /= sigma;
3490 sig = fabs(W[j]) > a + j * b;
3491 }
3492 if (!quiet) {
3493 ntolabel(cumdate, pmod->t1 + k + j, dset);
3494 pprintf(prn, " %s %9.3f %s\n", cumdate, W[j], sig? "*" : "");
3495 }
3496 }
3497
3498 if (!(opt & OPT_R)) {
3499 cusum_harvey_collier(wbar, sigma, m, pmod, opt, prn);
3500 }
3501
3502 /* plot with 95% confidence band if wanted */
3503 if (gnuplot_graph_wanted(PLOT_CUSUM, opt)) {
3504 err = cusum_do_graph(a, b, W, pmod->t1, k, m, dset, opt);
3505 }
3506 }
3507
3508 /* restore original sample range */
3509 dset->t1 = save_t1;
3510 dset->t2 = save_t2;
3511
3512 free(cresid);
3513 free(W);
3514
3515 return err;
3516 }
3517
3518 /**
3519 * comfac_test:
3520 * @pmod: pointer to original model.
3521 * @dset: dataset struct.
3522 * @opt: if contains %OPT_S, save test results to model.
3523 * @prn: gretl printing struct.
3524 *
3525 * If @pmod was estimated via an AR(1) estimator, run an
3526 * auxiliary regression to test the implied common-factor
3527 * restriction.
3528 *
3529 * Returns: 0 on successful completion, error code on error.
3530 */
3531
comfac_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3532 int comfac_test (MODEL *pmod, DATASET *dset,
3533 gretlopt opt, PRN *prn)
3534 {
3535 MODEL cmod;
3536 int save_t1 = dset->t1;
3537 int save_t2 = dset->t2;
3538 int v = dset->v;
3539 int *biglist = NULL;
3540 int clearit = 0;
3541 int nadd, i, k, t;
3542 int err;
3543
3544 if (pmod->ci != AR1 || (pmod->opt & OPT_P)) {
3545 /* can't handle Prais-Winsten? */
3546 return E_NOTIMP;
3547 }
3548
3549 /* check for changes in original list members */
3550 err = list_members_replaced(pmod, dset);
3551 if (err) {
3552 return err;
3553 }
3554
3555 biglist = gretl_list_copy(pmod->list);
3556 if (biglist == NULL) {
3557 return E_ALLOC;
3558 }
3559
3560 nadd = 1 + pmod->ncoeff - pmod->ifc;
3561
3562 err = dataset_add_series(dset, nadd);
3563 if (err) {
3564 free(biglist);
3565 return err;
3566 }
3567
3568 /* add lags of the dependent variable and all regressors: some of
3569 these may be redundant but we'll just let the redundant terms be
3570 eliminated automatically via gretl's collinearity checking.
3571 */
3572
3573 k = v;
3574 for (i=1; i<=pmod->list[0]; i++) {
3575 int src, lag, parent;
3576
3577 src = pmod->list[i];
3578 if (src == 0) {
3579 continue;
3580 }
3581 for (t=0; t<dset->n; t++) {
3582 if (t == 0 || na(dset->Z[src][t-1])) {
3583 dset->Z[k][t] = NADBL;
3584 } else {
3585 dset->Z[k][t] = dset->Z[src][t-1];
3586 }
3587 }
3588 biglist = gretl_list_append_term(&biglist, k);
3589 if (biglist == NULL) {
3590 err = E_ALLOC;
3591 break;
3592 }
3593 lag = series_get_lag(dset, src);
3594 parent = series_get_parent_id(dset, src);
3595 if (lag > 0 && parent > 0) {
3596 char tmp[16];
3597
3598 sprintf(tmp, "_%d", lag + 1);
3599 strcpy(dset->varname[k], dset->varname[parent]);
3600 gretl_trunc(dset->varname[k], 15 - strlen(tmp));
3601 strcat(dset->varname[k], tmp);
3602 } else {
3603 strcpy(dset->varname[k], dset->varname[src]);
3604 gretl_trunc(dset->varname[k], 13);
3605 strcat(dset->varname[k], "_1");
3606 }
3607 k++;
3608 }
3609
3610 if (!err) {
3611 /* re-impose the sample that was in force when the original model
3612 was estimated */
3613 impose_model_smpl(pmod, dset);
3614 cmod = lsq(biglist, dset, OLS, OPT_A);
3615 clearit = 1;
3616 err = cmod.errcode;
3617 }
3618
3619 if (!err) {
3620 if (cmod.nobs != pmod->nobs || cmod.ess > pmod->ess || cmod.dfd >= pmod->dfd) {
3621 /* something has gone wrong */
3622 err = E_DATA;
3623 }
3624 }
3625
3626 if (!err) {
3627 /* construct an F-test based on the SSR from the original
3628 AR(1) model and the SSR from the unrestricted model, cmod.
3629 */
3630 int dfd = cmod.dfd;
3631 int dfn = pmod->dfd - dfd - 1; /* account for rho */
3632 double SSRr = pmod->ess;
3633 double SSRu = cmod.ess;
3634 double Ftest = ((SSRr - SSRu)/dfn) / (SSRu/dfd);
3635 double pval = snedecor_cdf_comp(dfn, dfd, Ftest);
3636
3637 if (!(opt & OPT_I)) {
3638 if (!(opt & OPT_Q)) {
3639 cmod.aux = AUX_COMFAC;
3640 printmodel(&cmod, dset, OPT_S, prn);
3641 pputc(prn, '\n');
3642 }
3643
3644 pputs(prn, _("Test of common factor restriction"));
3645 pputs(prn, "\n\n");
3646
3647 pprintf(prn, " %s: %s(%d, %d) = %g, ", _("Test statistic"),
3648 "F", dfn, dfd, Ftest);
3649 pprintf(prn, _("with p-value = %g\n"), pval);
3650 pputc(prn, '\n');
3651 }
3652
3653 if (opt & OPT_S) {
3654 ModelTest *test;
3655
3656 test = model_test_new(GRETL_TEST_COMFAC);
3657 if (test != NULL) {
3658 model_test_set_teststat(test, GRETL_STAT_F);
3659 model_test_set_dfn(test, dfn);
3660 model_test_set_dfd(test, dfd);
3661 model_test_set_value(test, Ftest);
3662 model_test_set_pvalue(test, pval);
3663 maybe_add_test_to_model(pmod, test);
3664 }
3665 }
3666
3667 record_test_result(Ftest, pval);
3668 }
3669
3670 if (clearit) {
3671 clear_model(&cmod);
3672 }
3673
3674 /* delete the added variables and restore the original
3675 sample range */
3676
3677 dataset_drop_last_variables(dset, nadd);
3678 free(biglist);
3679
3680 dset->t1 = save_t1;
3681 dset->t2 = save_t2;
3682
3683 return err;
3684 }
3685
3686 /**
3687 * panel_specification_test:
3688 * @pmod: pointer to model to be tested.
3689 * @dset: dataset struct.
3690 * @opt: option flags.
3691 * @prn: gretl printing struct.
3692 *
3693 * Tests the given pooled model for fixed and random effects.
3694 *
3695 * Returns: 0 on successful completion, error code on error.
3696 */
3697
panel_specification_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3698 int panel_specification_test (MODEL *pmod, DATASET *dset,
3699 gretlopt opt, PRN *prn)
3700 {
3701 if (pmod->ci != OLS || !dataset_is_panel(dset)) {
3702 pputs(prn, _("This test is only relevant for pooled models\n"));
3703 return 1;
3704 }
3705
3706 if (pmod->ifc == 0) {
3707 pputs(prn, _("This test requires that the model contains a constant\n"));
3708 return 1;
3709 }
3710
3711 if (!balanced_panel(dset)) { /* ?? */
3712 pputs(prn, _("Sorry, can't do this test on an unbalanced panel.\n"
3713 "You need to have the same number of observations\n"
3714 "for each cross-sectional unit"));
3715 return 1;
3716 } else {
3717 panel_diagnostics(pmod, dset, opt, prn);
3718 }
3719
3720 return 0;
3721 }
3722
3723 /* wrapper for backward compatibility */
3724
panel_hausman_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3725 int panel_hausman_test (MODEL *pmod, DATASET *dset,
3726 gretlopt opt, PRN *prn)
3727 {
3728 return panel_specification_test(pmod, dset, opt, prn);
3729 }
3730
3731 /**
3732 * add_leverage_values_to_dataset:
3733 * @dset: dataset struct.
3734 * @m: matrix containing leverage values.
3735 * @opt: OPT_O = overwrite series on save.
3736 * @flags: may include SAVE_LEVERAGE, SAVE_INFLUENCE,
3737 * and/or SAVE_DFFITS.
3738 *
3739 * Adds to the working dataset one or more series calculated by
3740 * the gretl test for leverage/influence of data points.
3741 *
3742 * Returns: 0 on successful completion, error code on error.
3743 */
3744
add_leverage_values_to_dataset(DATASET * dset,gretl_matrix * m,gretlopt opt,int flags)3745 int add_leverage_values_to_dataset (DATASET *dset, gretl_matrix *m,
3746 gretlopt opt, int flags)
3747 {
3748 int overwrite = (opt & OPT_O);
3749 int t1, t2;
3750 int addvars = 0;
3751
3752 if (flags & SAVE_LEVERAGE) addvars++;
3753 if (flags & SAVE_INFLUENCE) addvars++;
3754 if (flags & SAVE_DFFITS) addvars++;
3755
3756 if (addvars == 0) {
3757 return 0;
3758 }
3759
3760 if (dataset_add_series(dset, addvars)) {
3761 return E_ALLOC;
3762 }
3763
3764 t1 = gretl_matrix_get_t1(m);
3765 t2 = t1 + gretl_matrix_rows(m);
3766
3767 /* add leverage? */
3768 if (flags & SAVE_LEVERAGE) {
3769 int t, v = dset->v - addvars;
3770 int j = 0;
3771
3772 for (t=0; t<dset->n; t++) {
3773 if (t < t1 || t >= t2) {
3774 dset->Z[v][t] = NADBL;
3775 } else {
3776 dset->Z[v][t] = gretl_matrix_get(m, j++, 0);
3777 }
3778 }
3779 strcpy(dset->varname[v], "lever");
3780 if (!overwrite) {
3781 make_varname_unique(dset->varname[v], v, dset);
3782 }
3783 series_set_label(dset, v, "leverage values");
3784 }
3785
3786 /* add influence? */
3787 if (flags & SAVE_INFLUENCE) {
3788 int t, v = dset->v - (addvars - 1);
3789 int j = 0;
3790
3791 for (t=0; t<dset->n; t++) {
3792 if (t < t1 || t >= t2) {
3793 dset->Z[v][t] = NADBL;
3794 } else {
3795 dset->Z[v][t] = gretl_matrix_get(m, j++, 1);
3796 }
3797 }
3798 strcpy(dset->varname[v], "influ");
3799 if (!overwrite) {
3800 make_varname_unique(dset->varname[v], v, dset);
3801 }
3802 series_set_label(dset, v, "influence values");
3803 }
3804
3805 /* add DFFITS? */
3806 if (flags & SAVE_DFFITS) {
3807 int t, v = dset->v - (addvars - 2);
3808 int j = 0;
3809
3810 for (t=0; t<dset->n; t++) {
3811 double s, h;
3812
3813 if (t < t1 || t >= t2) {
3814 dset->Z[v][t] = NADBL;
3815 } else {
3816 /* s = studentized residuals */
3817 h = gretl_matrix_get(m, j, 0);
3818 s = gretl_matrix_get(m, j, 2);
3819 if (na(h) || na(s)) {
3820 dset->Z[v][t] = NADBL;
3821 } else {
3822 dset->Z[v][t] = s * sqrt(h / (1.0 - h));
3823 }
3824 j++;
3825 }
3826 }
3827 strcpy(dset->varname[v], "dffits");
3828 if (!overwrite) {
3829 make_varname_unique(dset->varname[v], v, dset);
3830 }
3831 series_set_label(dset, v, "DFFITS values");
3832 }
3833
3834 set_dataset_is_changed(dset, 1);
3835
3836 return 0;
3837 }
3838
3839 /**
3840 * leverage_test:
3841 * @pmod: pointer to model to be tested.
3842 * @dset: dataset struct.
3843 * @opt: if OPT_S, add calculated series to data set; operate
3844 * silently if OPT_Q. If includes OPT_O, do not adjust save
3845 * names (overwrite any existing series of the same names).
3846 * @prn: gretl printing struct.
3847 *
3848 * Tests the data used in the given model for points with
3849 * high leverage and influence on the estimates.
3850 *
3851 * Returns: 0 on successful completion, error code on error.
3852 */
3853
leverage_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3854 int leverage_test (MODEL *pmod, DATASET *dset,
3855 gretlopt opt, PRN *prn)
3856 {
3857 gretl_matrix *(*model_leverage) (const MODEL *, const DATASET *,
3858 gretlopt, PRN *, int *);
3859 gretl_matrix *m;
3860 int err = 0;
3861
3862 if (pmod->ci != OLS) {
3863 return E_OLSONLY;
3864 }
3865
3866 model_leverage = get_plugin_function("model_leverage");
3867 if (model_leverage == NULL) {
3868 return 1;
3869 }
3870
3871 m = (*model_leverage)(pmod, dset, opt, prn, &err);
3872
3873 if (!err && (opt & OPT_S)) {
3874 /* we got the --save option */
3875 err = add_leverage_values_to_dataset(dset, m, opt,
3876 SAVE_LEVERAGE |
3877 SAVE_INFLUENCE|
3878 SAVE_DFFITS);
3879 }
3880
3881 gretl_matrix_free(m);
3882
3883 return err;
3884 }
3885
3886 /**
3887 * vif_test:
3888 * @pmod: pointer to model to be tested.
3889 * @dset: dataset struct.
3890 * @opt: may contain OPT_Q for quiet operation.
3891 * @prn: gretl printing struct.
3892 *
3893 * Calculates and displays the Variance Inflation Factors for
3894 * the independent variables in the given model.
3895 *
3896 * Returns: 0 on successful completion, error code on error.
3897 */
3898
vif_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3899 int vif_test (MODEL *pmod, DATASET *dset,
3900 gretlopt opt, PRN *prn)
3901 {
3902 int (*compute_vifs) (MODEL *, DATASET *, gretlopt, PRN *);
3903
3904 gretl_error_clear();
3905
3906 compute_vifs = get_plugin_function("compute_vifs");
3907 if (compute_vifs == NULL) {
3908 return 1;
3909 }
3910
3911 return (*compute_vifs)(pmod, dset, opt, prn);
3912 }
3913
3914 /**
3915 * bkw_test:
3916 * @pmod: pointer to model to be tested.
3917 * @dset: dataset struct.
3918 * @opt: may contain OPT_Q for quiet operation.
3919 * @prn: gretl printing struct.
3920 *
3921 * Calculates and displays the Belsley-Kuh-Welsch collinearity
3922 * diagnostics for @pmod.
3923 *
3924 * Returns: 0 on successful completion, error code on error.
3925 */
3926
bkw_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3927 int bkw_test (MODEL *pmod, DATASET *dset,
3928 gretlopt opt, PRN *prn)
3929 {
3930 int (*compute_bkw) (MODEL *, DATASET *, gretlopt, PRN *);
3931
3932 gretl_error_clear();
3933
3934 compute_bkw = get_plugin_function("compute_bkw");
3935 if (compute_bkw == NULL) {
3936 return 1;
3937 }
3938
3939 return (*compute_bkw)(pmod, dset, opt, prn);
3940 }
3941