1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #include "libgretl.h"
21 #include "matrix_extra.h"
22 #include "uservar.h"
23 #include "gretl_midas.h"
24 #include "forecast.h"
25 #include "var.h"
26 #include "system.h"
27 #include "libset.h"
28
29 #define AR_DEBUG 0
30
31 /**
32 * SECTION:forecast
33 * @short_description: Forecasting based on models
34 * @title: Forecasting
35 * @include: gretl/libgretl.h, gretl/forecast.h
36 *
37 * Write-up needed here.
38 *
39 */
40
41 /* estimators where a simple X*b does _not_ give the
42 predicted value of the dependent variable */
43
44 #define FCAST_SPECIAL(c) (c == LOGIT || \
45 c == LOGISTIC || \
46 c == MIDASREG || \
47 c == NEGBIN || \
48 c == NLS || \
49 c == POISSON || \
50 c == PROBIT || \
51 c == TOBIT)
52
53 typedef struct Forecast_ Forecast;
54
55 struct Forecast_ {
56 int method; /* static, dynamic or auto */
57 double *yhat; /* array of forecast values */
58 double *sderr; /* array of forecast standard errors */
59 double *eps; /* array of estimated forecast errors */
60 int *dvlags; /* info on lagged dependent variable */
61 int t1; /* start of forecast range */
62 int t2; /* end of forecast range */
63 int model_t2; /* end of period over which model was estimated */
64 };
65
66 static gretl_matrix *fcast_matrix;
67 static gretl_matrix *fcerr_matrix;
68
set_fcast_matrices(gretl_matrix * fc,gretl_matrix * se)69 static void set_fcast_matrices (gretl_matrix *fc,
70 gretl_matrix *se)
71 {
72 gretl_matrix_free(fcast_matrix);
73 gretl_matrix_free(fcerr_matrix);
74
75 fcast_matrix = fc;
76 fcerr_matrix = se;
77 }
78
79 /* Create an empty, dummy AR info structure for use with models
80 that don't have an explicit AR error process, but that do
81 have a lagged dependent variable that in effect produces
82 an AR error, for forecasting purposes. Or add a single-
83 element AR info structure for the AR1 case.
84 */
85
dummy_ar_info_init(MODEL * pmod)86 static int dummy_ar_info_init (MODEL *pmod)
87 {
88 int err = 0;
89
90 pmod->arinfo = malloc(sizeof *pmod->arinfo);
91
92 if (pmod->arinfo == NULL) {
93 err = E_ALLOC;
94 } else {
95 pmod->arinfo->rho = NULL;
96 pmod->arinfo->sderr = NULL;
97 }
98
99 if (!err && pmod->ci == AR1) {
100 pmod->arinfo->arlist = gretl_list_new(1);
101 pmod->arinfo->rho = malloc(sizeof(double));
102 pmod->arinfo->arlist[1] = 1;
103 pmod->arinfo->rho[0] = gretl_model_get_double(pmod, "rho_gls");
104 } else if (!err) {
105 pmod->arinfo->arlist = gretl_null_list();
106 }
107
108 if (err) {
109 free(pmod->arinfo);
110 pmod->arinfo = NULL;
111 }
112
113 return err;
114 }
115
fit_resid_add_sderr(FITRESID * fr)116 static int fit_resid_add_sderr (FITRESID *fr)
117 {
118 int t, err = 0;
119
120 fr->sderr = malloc(fr->nobs * sizeof *fr->sderr);
121
122 if (fr->sderr == NULL) {
123 err = E_ALLOC;
124 } else {
125 for (t=0; t<fr->nobs; t++) {
126 fr->sderr[t] = NADBL;
127 }
128 }
129
130 return err;
131 }
132
133 /**
134 * free_fit_resid:
135 * @fr: the pointer to be freed.
136 *
137 * Frees all resources associated with @fr, then frees the pointer
138 * itself.
139 */
140
free_fit_resid(FITRESID * fr)141 void free_fit_resid (FITRESID *fr)
142 {
143 if (fr != NULL) {
144 free(fr->actual);
145 free(fr->fitted);
146 free(fr->resid);
147 free(fr->sderr);
148 free(fr);
149 }
150 }
151
152 /**
153 * fit_resid_new_with_length:
154 * @n: the number of observations to allow for, or 0 if this
155 * information will be added later.
156 *
157 * Allocates a #FITRESID struct for holding fitted values and
158 * residuals from a model (or out-of-sample forecasts). If
159 * @n is greater than 0 the arrays required for that number
160 * of observations will be allocated.
161 *
162 * Returns: pointer to allocated structure, or %NULL on failure.
163 */
164
fit_resid_new_with_length(int n,int add_errs)165 static FITRESID *fit_resid_new_with_length (int n, int add_errs)
166 {
167 FITRESID *f = malloc(sizeof *f);
168
169 if (f == NULL) {
170 return NULL;
171 }
172
173 f->method = 0;
174 f->model_ID = 0;
175 f->asymp = 0;
176 f->std = 0;
177 f->model_t1 = 0;
178 f->t0 = 0;
179 f->t1 = 0;
180 f->t2 = 0;
181 f->df = 0;
182 f->k = 0;
183 f->nobs = 0;
184 f->pmax = PMAX_NOT_AVAILABLE;
185
186 f->sigma = NADBL;
187 f->alpha = 0.05;
188
189 f->actual = NULL;
190 f->fitted = NULL;
191 f->resid = NULL;
192 f->sderr = NULL;
193
194 *f->depvar = '\0';
195
196 f->actual = malloc(n * sizeof *f->actual);
197 f->fitted = malloc(n * sizeof *f->fitted);
198 f->resid = malloc(n * sizeof *f->resid);
199 if (add_errs) {
200 f->sderr = malloc(n * sizeof *f->sderr);
201 }
202
203 if (f->actual == NULL || f->fitted == NULL || f->resid == NULL ||
204 (add_errs && f->sderr == NULL)) {
205 free_fit_resid(f);
206 f = NULL;
207 } else {
208 int t;
209
210 for (t=0; t<n; t++) {
211 f->actual[t] = f->fitted[t] = f->resid[t] = NADBL;
212 if (f->sderr != NULL) {
213 f->sderr[t] = NADBL;
214 }
215 }
216 f->nobs = n;
217 }
218
219 return f;
220 }
221
fit_resid_new_for_model(const MODEL * pmod,const DATASET * dset,int t1,int t2,int pre_n,int * err)222 static FITRESID *fit_resid_new_for_model (const MODEL *pmod,
223 const DATASET *dset,
224 int t1, int t2, int pre_n,
225 int *err)
226 {
227 FITRESID *fr;
228
229 if (t1 < 0 || t2 < 0 || t2 < t1) {
230 *err = E_OBS;
231 return NULL;
232 }
233
234 fr = fit_resid_new_with_length(dset->n, 0);
235
236 if (fr == NULL) {
237 *err = E_ALLOC;
238 return NULL;
239 }
240
241 fr->t1 = t1;
242 fr->t2 = t2;
243
244 if (pre_n > 0) {
245 fr->t0 = fr->t1 - pre_n;
246 } else {
247 fr->t0 = t1;
248 }
249
250 fr->model_ID = pmod->ID;
251 fr->model_t1 = pmod->t1;
252
253 fr->asymp = ASYMPTOTIC_MODEL(pmod->ci);
254
255 return fr;
256 }
257
fit_resid_new_for_system(int asy,const DATASET * dset,int t1,int t2,int pre_n,int * err)258 static FITRESID *fit_resid_new_for_system (int asy,
259 const DATASET *dset,
260 int t1, int t2, int pre_n,
261 int *err)
262 {
263 FITRESID *fr;
264
265 if (t1 < 0 || t2 < 0 || t2 < t1) {
266 *err = E_OBS;
267 return NULL;
268 }
269
270 fr = fit_resid_new_with_length(dset->n, 1);
271
272 if (fr == NULL) {
273 *err = E_ALLOC;
274 return NULL;
275 }
276
277 fr->t1 = t1;
278 fr->t2 = t2;
279
280 if (pre_n > 0) {
281 fr->t0 = fr->t1 - pre_n;
282 } else {
283 fr->t0 = t1;
284 }
285
286 fr->asymp = asy;
287
288 return fr;
289 }
290
fit_resid_set_dec_places(FITRESID * fr)291 static void fit_resid_set_dec_places (FITRESID *fr)
292 {
293 int n = fr->t2 - fr->t0 + 1;
294
295 if (gretl_isdummy(fr->t0, fr->t2, fr->actual) > 0) {
296 fr->pmax = get_precision(fr->fitted + fr->t0, n, 6);
297 } else {
298 fr->pmax = get_precision(fr->actual + fr->t0, n, 6);
299 }
300
301 if (fr->pmax == 0) {
302 fr->pmax = 2;
303 }
304 }
305
special_depvar(const MODEL * pmod)306 static int special_depvar (const MODEL *pmod)
307 {
308 int ret = 0;
309
310 if (pmod->ci == INTREG) {
311 ret = 1;
312 } else if (pmod->ci == PANEL) {
313 if (pmod->opt & OPT_B) {
314 /* panel "between" model */
315 ret = 1;
316 }
317 }
318
319 return ret;
320 }
321
322 /**
323 * get_fit_resid:
324 * @pmod: the model for which actual and fitted values
325 * are wanted.
326 * @dset: dataset struct.
327 * @err: location to receive error code.
328 *
329 * Allocates a #FITRESID structure and fills it out with
330 * the actual and predicted values of the dependent variable
331 * in @pmod.
332 *
333 * Returns: pointer to allocated structure, or %NULL on failure.
334 */
335
get_fit_resid(const MODEL * pmod,const DATASET * dset,int * err)336 FITRESID *get_fit_resid (const MODEL *pmod, const DATASET *dset,
337 int *err)
338 {
339 double *uhat = pmod->uhat;
340 double *yhat = pmod->yhat;
341 FITRESID *fr;
342 int t, dv = -1;
343
344 if (!special_depvar(pmod)) {
345 dv = gretl_model_get_depvar(pmod);
346 if (dv < 0 || dv >= dset->v) {
347 *err = E_DATA;
348 return NULL;
349 }
350 }
351
352 if (uhat == NULL || yhat == NULL) {
353 gretl_matrix *uh = gretl_model_get_data(pmod, "uhat");
354 gretl_matrix *yh = gretl_model_get_data(pmod, "yhat");
355
356 if (uh == NULL || yh == NULL) {
357 *err = E_DATA;
358 return NULL;
359 } else {
360 uhat = uh->val;
361 yhat = yh->val;
362 }
363 }
364
365 fr = fit_resid_new_for_model(pmod, dset, pmod->t1, pmod->t2,
366 0, err);
367 if (*err) {
368 return NULL;
369 }
370
371 if (LIMDEP(pmod->ci)) {
372 fr->sigma = NADBL;
373 } else if (pmod->ci == GARCH && (pmod->opt & OPT_Z)) {
374 fr->sigma = 1.0;
375 fr->std = 1;
376 } else {
377 fr->sigma = gretl_model_get_double(pmod, "sigma_orig");
378 if (na(fr->sigma)) {
379 fr->sigma = pmod->sigma;
380 }
381 }
382
383 for (t=0; t<fr->nobs; t++) {
384 if (dv < 0) {
385 if (na(yhat[t]) || na(uhat[t])) {
386 fr->actual[t] = NADBL;
387 } else {
388 fr->actual[t] = yhat[t] + uhat[t];
389 }
390 } else {
391 fr->actual[t] = dset->Z[dv][t];
392 }
393 fr->fitted[t] = yhat[t];
394 fr->resid[t] = uhat[t];
395 }
396
397 if (dv < 0) {
398 fr->pmax = PMAX_NOT_AVAILABLE;
399 } else {
400 fit_resid_set_dec_places(fr);
401 }
402
403 if (dv < 0) {
404 const char *dvname = pmod->depvar;
405
406 if (dvname != NULL) {
407 strcpy(fr->depvar, dvname);
408 } else {
409 strcpy(fr->depvar, "implicit y");
410 }
411 } else {
412 strcpy(fr->depvar, dset->varname[dv]);
413 }
414
415 return fr;
416 }
417
418 /* local shortcut to get a model's list of regressors */
419
model_xlist(MODEL * pmod)420 static const int *model_xlist (MODEL *pmod)
421 {
422 int *xlist = gretl_model_get_list(pmod, "xlist");
423
424 if (xlist == NULL) {
425 xlist = gretl_model_get_x_list(pmod);
426 if (xlist != NULL) {
427 gretl_model_set_list_as_data(pmod, "xlist", xlist);
428 }
429 }
430
431 return xlist;
432 }
433
434 /* try to figure out if a model has any lags of the dependent
435 variable among the regressors: return 1 if so, 0 if not
436 */
437
has_depvar_lags(MODEL * pmod,const DATASET * dset)438 static int has_depvar_lags (MODEL *pmod, const DATASET *dset)
439 {
440 const char *yname, *parent;
441 const int *xlist;
442 int i, vi;
443
444 xlist = model_xlist(pmod);
445 if (xlist == NULL) {
446 return 0;
447 }
448
449 yname = gretl_model_get_depvar_name(pmod, dset);
450
451 for (i=1; i<=xlist[0]; i++) {
452 vi = xlist[i];
453 if (series_get_lag(dset, vi) > 0) {
454 parent = series_get_parent_name(dset, vi);
455 if (parent != NULL && !strcmp(yname, parent)) {
456 return 1;
457 }
458 }
459 }
460
461 return 0;
462 }
463
464 /* Makes a list to keep track of any "independent variables" that are
465 really lags of the dependent variable. The list has as many
466 elements as the model has independent variables, and in each place
467 we either write a zero (if the coefficient does not correspond to a
468 lag of the dependent variable) or a positive integer corresponding
469 to the lag order. However, In case the list of independent vars
470 contains no lagged dependent var, *depvar_lags is set to NULL.
471 Returns 1 on error, 0 otherwise.
472 */
473
process_lagged_depvar(MODEL * pmod,const DATASET * dset,int ** depvar_lags)474 static int process_lagged_depvar (MODEL *pmod,
475 const DATASET *dset,
476 int **depvar_lags)
477 {
478 const int *xlist = NULL;
479 int *dvlags = NULL;
480 int lag, anylags;
481 int err = 0;
482
483 anylags = has_depvar_lags(pmod, dset);
484
485 if (!anylags) {
486 *depvar_lags = NULL;
487 return 0;
488 }
489
490 xlist = model_xlist(pmod);
491
492 dvlags = malloc(xlist[0] * sizeof *dvlags);
493
494 if (dvlags == NULL) {
495 err = E_ALLOC;
496 } else {
497 const char *yname, *parent;
498 int i, vi;
499
500 yname = gretl_model_get_depvar_name(pmod, dset);
501
502 for (i=1; i<=xlist[0]; i++) {
503 vi = xlist[i];
504 lag = series_get_lag(dset, vi);
505 parent = series_get_parent_name(dset, vi);
506 if (lag > 0 && parent != NULL && !strcmp(yname, parent)) {
507 dvlags[i-1] = lag;
508 } else {
509 dvlags[i-1] = 0;
510 }
511 }
512 }
513
514 *depvar_lags = dvlags;
515
516 return err;
517 }
518
519 /* Tries to determine if a model has any "real" exogenous regressors:
520 we discount a simple time trend and periodic dummy variables, since
521 these can be extended automatically. If dvlags is non-NULL we use
522 it to screen out "independent vars" that are really lags of the
523 dependent variable.
524 */
525
526 static int
has_real_exog_regressors(MODEL * pmod,const int * dvlags,const DATASET * dset)527 has_real_exog_regressors (MODEL *pmod, const int *dvlags,
528 const DATASET *dset)
529 {
530 const int *xlist = model_xlist(pmod);
531 int i, xi, ret = 0;
532
533 if (xlist != NULL) {
534 for (i=0; i<xlist[0]; i++) {
535 xi = xlist[i + 1];
536 if (xi != 0 && (dvlags == NULL || dvlags[i] == 0)) {
537 if (is_trend_variable(dset->Z[xi], dset->n)) {
538 continue;
539 } else if (is_periodic_dummy(dset->Z[xi], dset)) {
540 continue;
541 } else {
542 ret = 1;
543 break;
544 }
545 }
546 }
547 }
548
549 return ret;
550 }
551
552 /* Get a value for a lag of the dependent variable. If method is
553 dynamic we prefer lagged prediction to lagged actual. If method is
554 static, we never want the lagged prediction, only the actual. If
555 method is "auto", which value we prefer depends on whether we're in
556 or out of sample (actual within, lagged prediction without).
557 */
558
fcast_get_ldv(Forecast * fc,int i,int t,int lag,const DATASET * dset)559 static double fcast_get_ldv (Forecast *fc, int i, int t, int lag,
560 const DATASET *dset)
561 {
562 int p = t - lag;
563 double ldv;
564
565 /* initialize to actual lagged value, if available */
566 if (p < 0) {
567 ldv = NADBL;
568 } else {
569 ldv = dset->Z[i][p];
570 }
571
572 #if AR_DEBUG
573 fprintf(stderr, "fcast_get_ldv: i=%d, t=%d, lag=%d; "
574 "initial ldv = Z[%d][%d] = %g\n", i, t, lag, i, p, ldv);
575 #endif
576
577 if (fc->method != FC_STATIC && p >= 0) {
578 double yhat = fc->yhat[p];
579 int fc_ok = !na(yhat);
580
581 if (fc->method == FC_DYNAMIC) {
582 if (fc_ok) {
583 ldv = yhat;
584 }
585 } else if (fc->method == FC_AUTO) {
586 if (t > fc->model_t2 + lag || na(ldv)) {
587 if (fc_ok) {
588 ldv = yhat;
589 }
590 }
591 }
592 #if AR_DEBUG
593 fprintf(stderr, "fcast_get_ldv (non-static): using %g\n", ldv);
594 #endif
595 }
596
597 return ldv;
598 }
599
600 /* Get forecasts, plus standard errors for same, for models without
601 autoregressive errors and without "special requirements"
602 (e.g. nonlinearity).
603
604 The forecast standard errors include both uncertainty over the
605 error process and parameter uncertainty (Davidson and MacKinnon
606 method) -- unless @opt contains OPT_M, in which case the standard
607 errors pertain to prediction of mean Y.
608 */
609
610 static int
static_fcast_with_errs(Forecast * fc,MODEL * pmod,const DATASET * dset,gretlopt opt)611 static_fcast_with_errs (Forecast *fc, MODEL *pmod,
612 const DATASET *dset,
613 gretlopt opt)
614 {
615 gretl_matrix *V = NULL;
616 gretl_vector *Xt = NULL;
617 gretl_vector *b = NULL;
618 double s2 = pmod->sigma * pmod->sigma;
619 double vyh, xval;
620 int k = pmod->ncoeff;
621 int same_data = 0;
622 int i, vi, t;
623 int err = 0;
624
625 V = gretl_vcv_matrix_from_model(pmod, NULL, &err);
626 if (err) {
627 goto bailout;
628 }
629
630 b = gretl_coeff_vector_from_model(pmod, NULL, &err);
631 if (err) {
632 goto bailout;
633 }
634
635 Xt = gretl_vector_alloc(k);
636 if (Xt == NULL) {
637 err = E_ALLOC;
638 goto bailout;
639 }
640
641 same_data = same_dataset(pmod, dset);
642
643 for (t=fc->t1; t<=fc->t2 && !err; t++) {
644 int missing = 0;
645
646 /* skip if we can't compute forecast */
647 if (same_data && t >= pmod->t1 && t <= pmod->t2) {
648 missing = na(pmod->yhat[t]);
649 }
650
651 /* populate Xt vector for observation */
652 for (i=0; i<k && !missing; i++) {
653 vi = pmod->list[i + 2];
654 xval = dset->Z[vi][t];
655 if (na(xval)) {
656 fc->sderr[t] = fc->yhat[t] = NADBL;
657 missing = 1;
658 } else {
659 gretl_vector_set(Xt, i, xval);
660 }
661 }
662
663 if (missing) {
664 fc->sderr[t] = fc->yhat[t] = NADBL;
665 continue;
666 }
667
668 /* forecast value */
669 fc->yhat[t] = gretl_vector_dot_product(Xt, b, NULL);
670
671 /* forecast variance */
672 vyh = gretl_scalar_qform(Xt, V, &err);
673 if (na(vyh)) {
674 err = 1;
675 } else {
676 if (!(opt & OPT_M)) {
677 vyh += s2;
678 }
679 if (vyh >= 0.0) {
680 fc->sderr[t] = sqrt(vyh);
681 } else {
682 fc->sderr[t] = NADBL;
683 err = 1;
684 }
685 }
686 }
687
688 bailout:
689
690 gretl_matrix_free(V);
691 gretl_vector_free(Xt);
692 gretl_vector_free(b);
693
694 return err;
695 }
696
697 #define individual_effects_model(m) (m != NULL && m->ci == PANEL && \
698 (m->opt & (OPT_F | OPT_U)))
699
special_set_fcast_matrix(gretl_matrix * fc)700 static void special_set_fcast_matrix (gretl_matrix *fc)
701 {
702 size_t sz;
703
704 /* copy forecast from second column to first */
705 sz = fc->rows * sizeof(double);
706 memcpy(fc->val, fc->val + fc->rows, sz);
707
708 /* and release some storage */
709 gretl_matrix_realloc(fc, fc->rows, 1);
710
711 set_fcast_matrices(fc, NULL);
712 }
713
special_print_fc_stats(gretl_matrix * fc,PRN * prn)714 static void special_print_fc_stats (gretl_matrix *fc,
715 PRN *prn)
716 {
717 const double *y = fc->val;
718 const double *f = y + fc->rows;
719 gretl_matrix *fcs;
720 int n_used;
721 int err = 0;
722
723 fcs = forecast_stats(y, f, 0, fc->rows - 1, &n_used,
724 OPT_O, &err);
725 if (fcs != NULL) {
726 print_fcast_stats_matrix(fcs, n_used, OPT_NONE, prn);
727 gretl_matrix_free(fcs);
728 }
729 }
730
transcribe_to_matrix(gretl_matrix * fc,double yt,double fct,char ** S,const DATASET * dset,int t,int j)731 static void transcribe_to_matrix (gretl_matrix *fc,
732 double yt, double fct,
733 char **S,
734 const DATASET *dset,
735 int t, int j)
736 {
737 if (S != NULL) {
738 char obsstr[OBSLEN];
739
740 ntolabel(obsstr, t, dset);
741 S[j] = gretl_strdup(obsstr);
742 }
743 if (fc->cols == 2) {
744 gretl_matrix_set(fc, j, 0, yt);
745 gretl_matrix_set(fc, j, 1, fct);
746 } else {
747 gretl_matrix_set(fc, j, 0, fct);
748 }
749 }
750
751 /* specific to "dpanel": handles both within-sample
752 and out of sample forecasts
753 */
754
real_dpanel_fcast(double * yhat,gretl_matrix * fc,char ** rlabels,MODEL * pmod,const char * mask,const DATASET * dset,gretlopt opt)755 static int real_dpanel_fcast (double *yhat,
756 gretl_matrix *fc,
757 char **rlabels,
758 MODEL *pmod,
759 const char *mask,
760 const DATASET *dset,
761 gretlopt opt)
762 {
763 const double *y, *xi, *b;
764 double *yi = NULL;
765 int *xlist, *ylags = NULL;
766 int yno, free_ylags = 0;
767 int fc_static = (opt & OPT_S);
768 int dpdstyle = (pmod->opt & OPT_X);
769 int t1min, ntdum, ifc, tdt = 0;
770 int i, vi, s, t, k, u;
771 int T, ti, row;
772
773 ylags = gretl_model_get_data(pmod, "ylags");
774 if (ylags == NULL) {
775 ylags = gretl_consecutive_list_new(1, pmod->list[1]);
776 free_ylags = 1;
777 }
778
779 /* per unit time series */
780 T = dset->pd;
781 yi = malloc(T * sizeof *yi);
782
783 if (ylags == NULL || yi == NULL) {
784 return E_ALLOC;
785 }
786
787 #if 0
788 fprintf(stderr, "real_dpanel_fcast: yhat=%p, fc=%p, mask=%p\n",
789 (void *) yhat, (void *) fc, (void *) mask);
790 fprintf(stderr, " dset: n=%d, pd=%d, t1=%d, t2=%d\n",
791 dset->n, dset->pd, dset->t1, dset->t2);
792 #endif
793
794 if (mask == NULL && !(opt & OPT_D)) {
795 /* within-sample: default to static */
796 fc_static = 1;
797 }
798
799 /* first usable time-series observation, zero based */
800 t1min = gretl_model_get_int(pmod, "t1min");
801 if (t1min > 0) {
802 t1min--;
803 } else {
804 /* fallback? problematic if we have time dummies */
805 t1min = 1 + ylags[ylags[0]];
806 }
807
808 /* is a constant included? */
809 ifc = gretl_model_get_int(pmod, "ifc");
810
811 /* do we have time dummies? */
812 ntdum = gretl_model_get_int(pmod, "ntdum");
813 if (ntdum > 0) {
814 /* period to which first time dummy refers */
815 tdt = t1min + ifc;
816 }
817
818 #if 0
819 fprintf(stderr, "ntdum=%d, t1min=%d, ifc=%d, tdt=%d\n",
820 ntdum, t1min, ifc, tdt);
821 #endif
822
823 yno = gretl_model_get_depvar(pmod);
824 xlist = gretl_model_get_x_list(pmod);
825 y = dset->Z[yno];
826 b = pmod->coeff;
827 t = u = row = 0;
828
829 for (s=0; s<=dset->t2; s++) {
830 double fct = NADBL;
831 int missing = 0;
832 int fc_skip = 0;
833 int j = 0;
834
835 if (t == 0) {
836 /* fill dependent var vector for unit */
837 for (ti=0; ti<T; ti++) {
838 yi[ti] = y[s+ti];
839 }
840 }
841
842 if (s < dset->t1) {
843 /* we haven't got to the requested sample yet */
844 goto next_t;
845 }
846
847 if (mask != NULL && mask[s] == 1) {
848 /* we're doing out-of-sample forecast and this is
849 an in-sample observation
850 */
851 fc_skip = 1;
852 goto transcribe;
853 }
854
855 if (t < t1min) {
856 /* required lags not available */
857 goto transcribe;
858 }
859
860 /* get lagged level for integration */
861 if (na(yi[t-1])) {
862 missing = 1;
863 } else {
864 fct = yi[t-1];
865 }
866
867 /* handle autoregressive terms */
868 for (i=1; i<=ylags[0] && !missing; i++) {
869 k = ylags[i];
870 if (t - k - 1 < 0 || na(yi[t-k]) || na(yi[t-k-1])) {
871 missing = 1;
872 } else {
873 fct += b[j++] * (yi[t-k] - yi[t-k-1]);
874 }
875 }
876
877 /* then regular regressors, differenced */
878 for (i=1; i<=xlist[0] && !missing; i++) {
879 vi = xlist[i];
880 if (vi == 0) {
881 fct += b[j++];
882 } else {
883 xi = dset->Z[vi];
884 if (t - 1 < 0 || na(xi[s]) || na(xi[s-1])) {
885 missing = 1;
886 } else {
887 fct += b[j++] * (xi[s] - xi[s-1]);
888 }
889 }
890 }
891
892 /* then a time effect, if applicable */
893 if (!missing && ntdum > 0 && t >= tdt) {
894 fct += b[j + t - tdt];
895 if (!dpdstyle) {
896 /* time dummies differenced */
897 if (t - tdt > 0) {
898 fct -= b[j + t - tdt - 1];
899 }
900 }
901 }
902
903 if (missing) {
904 fct = NADBL;
905 } else if (!fc_static) {
906 /* dynamic: replace observed with forecast */
907 yi[t] = fct;
908 }
909
910 transcribe:
911
912 /* transcribe result? */
913 if (yhat != NULL) {
914 yhat[s] = fct;
915 } else if (fc_skip) {
916 ; /* skip */
917 } else if (row >= fc->rows) {
918 fprintf(stderr, "dpanel_fcast out of bounds! s=%d, row=%d\n", s, row);
919 } else {
920 transcribe_to_matrix(fc, y[s], fct, rlabels, dset, s, row++);
921 }
922
923 next_t:
924
925 if (t == dset->pd - 1) {
926 t = 0;
927 u++;
928 } else {
929 t++;
930 }
931 }
932
933 free(xlist);
934 free(yi);
935 if (free_ylags) {
936 free(ylags);
937 }
938
939 return 0;
940 }
941
942 /* for pooled OLS, fixed effects and random effects:
943 handles both within-sample and out of sample
944 */
945
real_panel_fcast(double * yhat,gretl_matrix * fc,char ** rlabels,MODEL * pmod,const char * mask,const DATASET * dset,gretlopt opt)946 static int real_panel_fcast (double *yhat,
947 gretl_matrix *fc,
948 char **rlabels,
949 MODEL *pmod,
950 const char *mask,
951 const DATASET *dset,
952 gretlopt opt)
953 {
954 const double *y, *b;
955 gretl_vector *a_vec = NULL;
956 int i, vi, s, t;
957 int T, row, err = 0;
958
959 #if 0
960 fprintf(stderr, "real_panel_fcast: yhat=%p, fc=%p, mask=%p\n",
961 (void *) yhat, (void *) fc, (void *) mask);
962 fprintf(stderr, " dset: n=%d, pd=%d, t1=%d, t2=%d\n",
963 dset->n, dset->pd, dset->t1, dset->t2);
964 #endif
965
966 if (individual_effects_model(pmod)) {
967 a_vec = gretl_model_ahat_vec(pmod, &err);
968 if (err) {
969 fprintf(stderr, "real_panel_fcast: ahat vector is missing\n");
970 return err;
971 }
972 }
973
974 y = dset->Z[pmod->list[1]];
975 b = pmod->coeff;
976 s = 0; /* panel unit index */
977 T = dset->pd;
978 row = 0;
979
980 for (t=0; t<=dset->t2; t++) {
981 double xit, fct = NADBL;
982 int missing = 0;
983 int fc_skip = 0;
984 int j = 0;
985
986 if (t > 0 && t % T == 0) {
987 /* advance unit index */
988 s++;
989 }
990
991 if (t < dset->t1) {
992 continue;
993 }
994
995 if (mask != NULL && mask[t] == 1) {
996 /* we're doing out-of-sample forecast and this is
997 an in-sample observation
998 */
999 fc_skip = 1;
1000 goto transcribe;
1001 }
1002
1003 if (a_vec != NULL) {
1004 /* individual effect */
1005 if (na(a_vec->val[s])) {
1006 missing = 1;
1007 } else {
1008 fct = a_vec->val[s];
1009 }
1010 } else {
1011 fct = 0.0;
1012 }
1013
1014 /* regular regressors */
1015 for (i=2, j=0; i<=pmod->list[0] && !missing; i++, j++) {
1016 vi = pmod->list[i];
1017 if (vi == 0) {
1018 if (!(pmod->opt & OPT_F)) {
1019 fct += b[j];
1020 }
1021 } else {
1022 xit = dset->Z[vi][t];
1023 if (na(xit)) {
1024 missing = 1;
1025 } else {
1026 fct += b[j] * xit;
1027 }
1028 }
1029 }
1030
1031 /* FIXME handling of time dummies? */
1032
1033 if (missing) {
1034 fct = NADBL;
1035 }
1036
1037 transcribe:
1038
1039 /* transcribe result? */
1040 if (yhat != NULL) {
1041 yhat[t] = fct;
1042 } else if (fc_skip) {
1043 ; /* skip */
1044 } else if (row >= fc->rows) {
1045 fprintf(stderr, "panel_fcast out of bounds! t=%d, row=%d\n", t, row);
1046 } else {
1047 transcribe_to_matrix(fc, y[t], fct, rlabels, dset, t, row++);
1048 }
1049 }
1050
1051 gretl_matrix_free(a_vec);
1052
1053 return 0;
1054 }
1055
panel_fcast(Forecast * fc,MODEL * pmod,DATASET * dset,gretlopt opt)1056 static int panel_fcast (Forecast *fc, MODEL *pmod,
1057 DATASET *dset, gretlopt opt)
1058 {
1059 int save_t1 = dset->t1;
1060 int save_t2 = dset->t2;
1061 int err;
1062
1063 dset->t1 = fc->t1;
1064 dset->t2 = fc->t2;
1065 if (pmod->ci == DPANEL) {
1066 err = real_dpanel_fcast(fc->yhat, NULL, NULL, pmod,
1067 NULL, dset, opt);
1068 } else {
1069 err = real_panel_fcast(fc->yhat, NULL, NULL, pmod,
1070 NULL, dset, opt);
1071 }
1072 dset->t1 = save_t1;
1073 dset->t2 = save_t2;
1074
1075 return err;
1076 }
1077
1078 /* Out-of-sample (in the time dimension) forecast routine
1079 for panel data, supporting pooled OLS, fixed effects,
1080 random effects and dpanel. This is the top-level
1081 driver.
1082 */
1083
panel_os_special(MODEL * pmod,DATASET * dset,const char * vname,gretlopt opt,PRN * prn)1084 static int panel_os_special (MODEL *pmod, DATASET *dset,
1085 const char *vname,
1086 gretlopt opt,
1087 PRN *prn)
1088 {
1089 DATASET *fset = fetch_full_dataset();
1090 int named_save = *vname != '\0';
1091 gretl_matrix *fc = NULL;
1092 double *yhat = NULL;
1093 char *mask = dset->submask;
1094 char **Sr = NULL;
1095 int vi, s;
1096 int err = 0;
1097
1098 s = get_dataset_submask_size(dset);
1099 if (s != fset->n) {
1100 fprintf(stderr, "fullset->n = %d but submask size = %d\n",
1101 fset->n, s);
1102 return E_DATA;
1103 }
1104
1105 if (named_save) {
1106 yhat = malloc(fset->n * sizeof *yhat);
1107 if (yhat == NULL) {
1108 return E_ALLOC;
1109 }
1110 } else {
1111 int nfc = fset->n - dset->n;
1112 int cols = opt & OPT_Q ? 1 : 2;
1113
1114 fc = gretl_matrix_alloc(nfc, cols);
1115 if (fc == NULL) {
1116 return E_ALLOC;
1117 }
1118 Sr = strings_array_new(nfc);
1119 }
1120
1121 if (pmod->ci == DPANEL) {
1122 err = real_dpanel_fcast(yhat, fc, Sr, pmod, mask, fset, opt);
1123 } else {
1124 err = real_panel_fcast(yhat, fc, Sr, pmod, mask, fset, opt);
1125 }
1126
1127 if (!err && yhat != NULL) {
1128 err = dataset_add_NA_series(dset, 1);
1129 if (!err) {
1130 vi = dset->v - 1;
1131 err = dataset_rename_series(dset, vi, vname);
1132 series_set_label(dset, vi, _("predicted values"));
1133 }
1134 if (!err) {
1135 /* keep number of series in sync */
1136 err = dataset_add_allocated_series(fset, yhat);
1137 }
1138 yhat = NULL;
1139 } else if (!err) {
1140 gretl_matrix_set_rownames(fc, Sr);
1141 if (opt & OPT_Q) {
1142 /* @fc should be suitable to purpose */
1143 set_fcast_matrices(fc, NULL);
1144 } else {
1145 if (!(opt & OPT_T)) {
1146 /* not --stats-only */
1147 char **Sc = strings_array_new(2);
1148 int yno = gretl_model_get_depvar(pmod);
1149
1150 Sc[0] = gretl_strdup(fset->varname[yno]);
1151 Sc[1] = gretl_strdup(_("prediction"));
1152 gretl_matrix_set_colnames(fc, Sc);
1153 gretl_matrix_print_to_prn(fc, NULL, prn);
1154 }
1155 if (!(opt & OPT_N)) {
1156 /* not --no-stats */
1157 special_print_fc_stats(fc, prn);
1158 }
1159 /* @fc needs some adjustment */
1160 special_set_fcast_matrix(fc);
1161 }
1162 fc = NULL;
1163 }
1164
1165 gretl_matrix_free(fc);
1166 free(yhat);
1167
1168 return err;
1169 }
1170
1171 #define NLS_DEBUG 0
1172
1173 /* Generate forecasts from nonlinear least squares model, using the
1174 string specification of the regression function that was saved as
1175 data on the model (see nls.c). If the NLS formula is dynamic and
1176 the user has not requested a static forecast, we do an
1177 autoregressive genr out of sample. If we're doing a static
1178 forecast we add a simple-minded forecast error, namely the standard
1179 error of the NLS regression.
1180 */
1181
nls_fcast(Forecast * fc,const MODEL * pmod,DATASET * dset)1182 static int nls_fcast (Forecast *fc, const MODEL *pmod,
1183 DATASET *dset)
1184 {
1185 int oldt1 = dset->t1;
1186 int oldt2 = dset->t2;
1187 int origv = dset->v;
1188 int yno = 0;
1189 const char *nlfunc;
1190 double *y = NULL;
1191 char formula[MAXLINE];
1192 int t, err = 0;
1193
1194 nlfunc = gretl_model_get_data(pmod, "nl_regfunc");
1195 if (nlfunc == NULL) {
1196 err = E_DATA;
1197 }
1198
1199 #if NLS_DEBUG
1200 fprintf(stderr, "nls_fcast: method=%d, nlfunc='%s'\n",
1201 fc->method, nlfunc);
1202 #endif
1203
1204 if (fc->method == FC_AUTO) {
1205 yno = pmod->list[1];
1206 y = copyvec(dset->Z[yno], dset->n);
1207 if (y == NULL) {
1208 err = E_ALLOC;
1209 }
1210 }
1211
1212 /* The computation of $nl_y below may depend on computing
1213 some auxiliary quantities used within the nls block, so
1214 try that first below: nl_model_run_aux_genrs().
1215 */
1216
1217 if (!err) {
1218 /* upper limit of static forecast */
1219 int t2 = (fc->method == FC_STATIC)? fc->t2 : pmod->t2;
1220
1221 if (t2 >= fc->t1) {
1222 /* non-null static range */
1223 int fcv;
1224
1225 dset->t1 = fc->t1;
1226 dset->t2 = t2;
1227 err = nl_model_run_aux_genrs(pmod, dset);
1228 fcv = dset->v;
1229 #if NLS_DEBUG
1230 fprintf(stderr, " static range %d to %d, fcv=%d\n",
1231 dset->t1, dset->t2, fcv);
1232 #endif
1233 if (!err) {
1234 sprintf(formula, "$nl_y = %s", nlfunc);
1235 err = generate(formula, dset, GRETL_TYPE_SERIES,
1236 OPT_P, NULL);
1237 }
1238 if (!err) {
1239 for (t=dset->t1; t<=dset->t2; t++) {
1240 fc->yhat[t] = dset->Z[fcv][t];
1241 }
1242 }
1243 }
1244
1245 if (!err && fc->method == FC_AUTO && fc->t2 > pmod->t2) {
1246 /* dynamic forecast out of sample: in this context
1247 pmod->depvar is the expression to generate
1248 the series in question under its own name
1249 */
1250 dset->t1 = pmod->t2 + 1;
1251 dset->t2 = fc->t2;
1252 #if NLS_DEBUG
1253 fprintf(stderr, " dynamic range %d to %d\n", dset->t1, dset->t2);
1254 #endif
1255 err = nl_model_run_aux_genrs(pmod, dset);
1256 if (!err) {
1257 strcpy(formula, pmod->depvar);
1258 err = generate(formula, dset, GRETL_TYPE_SERIES,
1259 OPT_P, NULL);
1260 }
1261 if (!err) {
1262 for (t=dset->t1; t<=dset->t2; t++) {
1263 fc->yhat[t] = dset->Z[yno][t];
1264 }
1265 }
1266 }
1267 }
1268
1269 /* restore dataset state */
1270 if (dset->v > origv) {
1271 err = dataset_drop_last_variables(dset, dset->v - origv);
1272 }
1273 dset->t1 = oldt1;
1274 dset->t2 = oldt2;
1275
1276 if (y != NULL) {
1277 /* restore original dependent variable */
1278 memcpy(dset->Z[yno], y, dset->n * sizeof *y);
1279 free(y);
1280 }
1281
1282 if (!err && fc->method == FC_STATIC && fc->sderr != NULL) {
1283 #if 0 /* Not much tested: FIXME make this an option? */
1284 err = nls_boot_calc(pmod, dset, fc->t1, fc->t2, fc->sderr);
1285 if (!err) {
1286 double et, s2 = pmod->sigma * pmod->sigma;
1287
1288 for (t=fc->t1; t<=fc->t2; t++) {
1289 if (!na(fc->sderr[t])) {
1290 et = fc->sderr[t];
1291 fc->sderr[t] = sqrt(et * et + s2);
1292 }
1293 }
1294 }
1295 #else
1296 /* kinda simple-minded */
1297 for (t=fc->t1; t<=fc->t2; t++) {
1298 fc->sderr[t] = pmod->sigma;
1299 }
1300 #endif
1301 }
1302
1303 return err;
1304 }
1305
1306 #define MIDAS_DEBUG 0
1307
midas_fcast(Forecast * fc,const MODEL * pmod,DATASET * dset)1308 static int midas_fcast (Forecast *fc, const MODEL *pmod,
1309 DATASET *dset)
1310 {
1311 int oldt1 = dset->t1;
1312 int oldt2 = dset->t2;
1313 int origv = dset->v;
1314 char formula[MAXLINE];
1315 char *mdsfunc = NULL;
1316 double *y = NULL;
1317 int yno = pmod->list[1];
1318 int t, err;
1319
1320 err = midas_forecast_setup(pmod, dset, fc->method, &mdsfunc);
1321
1322 #if MIDAS_DEBUG
1323 fprintf(stderr, "midas_fcast: method %d, err = %d\n", fc->method, err);
1324 #endif
1325
1326 if (!err && fc->method == FC_AUTO) {
1327 y = copyvec(dset->Z[yno], dset->n);
1328 if (y == NULL) {
1329 err = E_ALLOC;
1330 }
1331 }
1332
1333 if (!err) {
1334 /* upper limit of static forecast */
1335 int t2 = (fc->method == FC_STATIC)? fc->t2 : pmod->t2;
1336
1337 if (t2 >= fc->t1) {
1338 /* non-null static range */
1339 int fcv = dset->v;
1340
1341 dset->t1 = fc->t1;
1342 dset->t2 = t2;
1343 sprintf(formula, "$midas_y=%s", mdsfunc);
1344 err = generate(formula, dset, GRETL_TYPE_SERIES,
1345 OPT_P, NULL);
1346 #if MIDAS_DEBUG
1347 fprintf(stderr, " static range %d to %d, fcv=%d, err=%d\n",
1348 dset->t1, dset->t2, fcv, err);
1349 #endif
1350 if (!err) {
1351 for (t=dset->t1; t<=dset->t2; t++) {
1352 fc->yhat[t] = dset->Z[fcv][t];
1353 }
1354 }
1355 }
1356
1357 if (!err && fc->method == FC_AUTO && fc->t2 > pmod->t2) {
1358 /* dynamic forecast out of sample */
1359 dset->t1 = pmod->t2 + 1;
1360 dset->t2 = fc->t2;
1361 sprintf(formula, "%s=%s", dset->varname[yno], mdsfunc);
1362 #if MIDAS_DEBUG
1363 fprintf(stderr, " dynamic range %d to %d\n %s\n", dset->t1,
1364 dset->t2, formula);
1365 #endif
1366 err = generate(formula, dset, GRETL_TYPE_SERIES,
1367 OPT_P, NULL);
1368 if (!err) {
1369 for (t=dset->t1; t<=dset->t2; t++) {
1370 fc->yhat[t] = dset->Z[yno][t];
1371 }
1372 }
1373 }
1374 }
1375
1376 /* restore dataset state */
1377 if (dset->v > origv) {
1378 err = dataset_drop_last_variables(dset, dset->v - origv);
1379 }
1380 dset->t1 = oldt1;
1381 dset->t2 = oldt2;
1382
1383 if (y != NULL) {
1384 /* restore original dependent variable */
1385 memcpy(dset->Z[yno], y, dset->n * sizeof *y);
1386 free(y);
1387 }
1388
1389 if (!err && fc->method == FC_STATIC && fc->sderr != NULL) {
1390 for (t=fc->t1; t<=fc->t2; t++) {
1391 fc->sderr[t] = pmod->sigma;
1392 }
1393 }
1394
1395 /* clean up any uservars the MIDAS mechanism created */
1396 destroy_private_uvars();
1397
1398 free(mdsfunc);
1399
1400 return err;
1401 }
1402
1403 #if AR_DEBUG
1404 # include <stdarg.h>
my_dprintf(const char * format,...)1405 static void my_dprintf (const char *format, ...)
1406 {
1407 va_list args;
1408
1409 va_start(args, format);
1410 vfprintf(stderr, format, args);
1411 va_end(args);
1412
1413 return;
1414 }
1415 # define DPRINTF(x) my_dprintf x
1416 #else
1417 # define DPRINTF(x)
1418 #endif
1419
1420 #define depvar_lag(f,i) ((f->dvlags != NULL)? f->dvlags[i] : 0)
1421
1422 #define GFC_DEBUG 0
1423
1424 /*
1425 GARCH error variance process:
1426
1427 h(t) = a(0) + sum(i=1 to q) a(i) * u(t-i)^2
1428 + sum(j=1 to p) b(j) * h(t-j)
1429
1430 This is then complexified if the model includes lags of the
1431 dependent variable among the regressors, but in the following
1432 function we just calculate the successive h's.
1433 */
1434
garch_h_hat(const MODEL * pmod,int t1,int t2,int xvars)1435 static double *garch_h_hat (const MODEL *pmod, int t1, int t2,
1436 int xvars)
1437 {
1438 const double *alpha;
1439 const double *beta;
1440 double hlag;
1441 double *h, *mh;
1442 int nf, q, p, lmax;
1443 int i, s, t, ti;
1444 int err = 0;
1445
1446 mh = gretl_model_get_data(pmod, "garch_h");
1447 if (mh == NULL) {
1448 return NULL;
1449 }
1450
1451 nf = t2 - t1 + 1;
1452 if (nf <= 0) {
1453 return NULL;
1454 }
1455
1456 h = malloc(nf * sizeof *h);
1457 if (h == NULL) {
1458 return NULL;
1459 }
1460
1461 q = pmod->list[1];
1462 p = pmod->list[2];
1463 alpha = pmod->coeff + xvars;
1464 beta = alpha + q + 1;
1465
1466 lmax = MAX(p, q);
1467
1468 for (t=t1; t<=t2 && !err; t++) {
1469 s = t - t1;
1470
1471 h[s] = alpha[0];
1472 #if GFC_DEBUG
1473 fprintf(stderr, "h[%d or %d] init'd to %g\n", s, t, h[s]);
1474 #endif
1475 for (i=1; i<=lmax; i++) {
1476 ti = t - i;
1477 if (ti < 0) {
1478 break;
1479 }
1480 hlag = 0.0;
1481 if (ti <= pmod->t2) {
1482 if (!na(mh[ti])) {
1483 hlag = mh[ti];
1484 }
1485 } else {
1486 hlag = h[s-i];
1487 }
1488 if (i <= q) {
1489 if (ti <= pmod->t2 && !na(pmod->uhat[ti])) {
1490 h[s] += alpha[i] * pmod->uhat[ti] * pmod->uhat[ti];
1491 #if GFC_DEBUG
1492 fprintf(stderr, " added alpha[%d] * uhat[%d]^2 = %g * %g = %g\n",
1493 i, ti, alpha[i], pmod->uhat[ti] * pmod->uhat[ti],
1494 alpha[i] * pmod->uhat[ti] * pmod->uhat[ti]);
1495 #endif
1496 } else {
1497 /* lagged uhat^2 not available: substitute its
1498 expectation, h at the same date */
1499 h[s] += alpha[i] * hlag;
1500 }
1501 }
1502 if (i <= p) {
1503 h[s] += beta[i-1] * hlag;
1504 #if GFC_DEBUG
1505 fprintf(stderr, " added beta[%d] * h[-%d] = %g * %g = %g\n",
1506 i-1, i, beta[i-1], hlag, beta[i-1] * hlag);
1507 #endif
1508 }
1509 }
1510 if (h[s] < 0.0) {
1511 err = 1;
1512 }
1513 #if GFC_DEBUG
1514 fprintf(stderr, "h[%d or %d] = %g (%g)\n", s, t, h[s], sqrt(h[s]));
1515 #endif
1516 }
1517
1518 if (err) {
1519 free(h);
1520 h = NULL;
1521 }
1522
1523 return h;
1524 }
1525
1526 /* Infinite MA representation, in case GARCH model has lags of the
1527 dependent var among the regressors (the autoregressive coeffs are
1528 recorded in the array phi).
1529 */
1530
garch_psi(const double * phi,int p,int nf)1531 static double *garch_psi (const double *phi, int p, int nf)
1532 {
1533 double *psi;
1534 int i, s;
1535
1536 if (phi == NULL) {
1537 return NULL;
1538 }
1539
1540 psi = malloc(nf * sizeof *psi);
1541 if (psi == NULL) {
1542 return NULL;
1543 }
1544
1545 psi[0] = 1.0;
1546
1547 /* Are we OK with regard to signs below? Do we need to keep a
1548 record of the squares of the psi elements too? (Or instead?)
1549 */
1550
1551 for (s=1; s<nf; s++) {
1552 psi[s] = 0.0;
1553
1554 /* add psi[s] components derived from dep var lags */
1555 for (i=0; i<p && i<s; i++) {
1556 psi[s] += phi[i] * psi[s-i-1];
1557 #if GFC_DEBUG
1558 fprintf(stderr, "psi[%d]: adding phi[%d] * psi[%d] = %g\n",
1559 s, i, s-i-1, phi[i] * psi[s-i-1]);
1560 #endif
1561 }
1562 }
1563
1564 #if GFC_DEBUG
1565 for (s=0; s<nf; s++) {
1566 fprintf(stderr, "psi[%d] = %g\n", s, psi[s]);
1567 }
1568 #endif
1569
1570 return psi;
1571 }
1572
1573 /* Construct an array, "phi", of autoregressive coefficients, if a
1574 GARCH model has any lags of the dependent variable among the
1575 regressors; the array has dimension equal to the highest-order lag
1576 of the dependent variable.
1577 */
1578
garch_ldv_phi(const MODEL * pmod,const int * xlist,const int * dvlags,int * ppmax)1579 static double *garch_ldv_phi (const MODEL *pmod, const int *xlist,
1580 const int *dvlags, int *ppmax)
1581 {
1582 double *phi = NULL;
1583 int i, pmax = 0;
1584
1585 if (dvlags != NULL && xlist != NULL) {
1586 for (i=0; i<xlist[0]; i++) {
1587 if (dvlags[i] > pmax) {
1588 pmax = dvlags[i];
1589 }
1590 }
1591
1592 phi = malloc(pmax * sizeof *phi);
1593 if (phi != NULL) {
1594 for (i=0; i<pmax; i++) {
1595 phi[i] = 0.0;
1596 }
1597 for (i=0; i<xlist[0]; i++) {
1598 if (dvlags[i] > 0) {
1599 phi[dvlags[i] - 1] = pmod->coeff[i];
1600 }
1601 }
1602 }
1603 }
1604
1605 if (phi != NULL) {
1606 #if GFC_DEBUG
1607 fprintf(stderr, "garch_ldv_phi: pmax = %d\n", pmax);
1608 for (i=0; i<pmax; i++) {
1609 fprintf(stderr, "phi[%d] = %g\n", i, phi[i]);
1610 }
1611 #endif
1612 *ppmax = pmax;
1613 }
1614
1615 return phi;
1616 }
1617
1618 /* Compute forecast standard errors for a GARCH model that includes
1619 lags of the dependent variable among the regressors. This could do
1620 with some scrutiny (or the calculation, above, of the "psi" array
1621 could do with checking).
1622 */
1623
garch_ldv_sderr(const double * h,const double * psi,int s)1624 static double garch_ldv_sderr (const double *h,
1625 const double *psi,
1626 int s)
1627 {
1628 double ss, vs = h[s];
1629 int i;
1630
1631 for (i=1; i<=s; i++) {
1632 vs += h[s-i] * psi[i] * psi[i];
1633 }
1634
1635 if (vs >= 0.0) {
1636 ss = sqrt(vs);
1637 } else {
1638 ss = NADBL;
1639 }
1640
1641 return ss;
1642 }
1643
garch_fcast(Forecast * fc,MODEL * pmod,const DATASET * dset)1644 static int garch_fcast (Forecast *fc, MODEL *pmod,
1645 const DATASET *dset)
1646 {
1647 double xval;
1648 int xvars, yno;
1649 const int *xlist = NULL;
1650 double *h = NULL;
1651 double *mh = NULL;
1652 double *phi = NULL;
1653 double *psi = NULL;
1654 int i, v, t;
1655
1656 xlist = model_xlist(pmod);
1657 yno = gretl_model_get_depvar(pmod);
1658
1659 if (xlist != NULL) {
1660 xvars = xlist[0];
1661 } else {
1662 xvars = 0;
1663 }
1664
1665 if (fc->sderr != NULL) {
1666 h = garch_h_hat(pmod, pmod->t2 + 1, fc->t2, xvars);
1667 mh = gretl_model_get_data(pmod, "garch_h");
1668 }
1669
1670 if (fc->dvlags != NULL) {
1671 int ns = fc->t2 - pmod->t2;
1672 int pmax = 0;
1673
1674 if (ns > 0) {
1675 phi = garch_ldv_phi(pmod, xlist, fc->dvlags, &pmax);
1676 psi = garch_psi(phi, pmax, ns);
1677 free(phi);
1678 }
1679 }
1680
1681 for (t=fc->t1; t<=fc->t2; t++) {
1682 int lag, miss = 0;
1683 double yh = 0.0;
1684
1685 if (fc->method != FC_DYNAMIC &&
1686 t >= pmod->t1 && t <= pmod->t2) {
1687 fc->yhat[t] = pmod->yhat[t];
1688 if (fc->sderr != NULL && mh != NULL) {
1689 fc->sderr[t] = mh[t];
1690 }
1691 continue;
1692 }
1693
1694 for (i=0; i<xvars; i++) {
1695 v = xlist[i+1];
1696 if ((lag = depvar_lag(fc, i))) {
1697 xval = fcast_get_ldv(fc, yno, t, lag, dset);
1698 } else {
1699 xval = dset->Z[v][t];
1700 }
1701 if (na(xval)) {
1702 miss = 1;
1703 } else {
1704 yh += pmod->coeff[i] * xval;
1705 }
1706 }
1707
1708 if (miss) {
1709 fc->yhat[t] = NADBL;
1710 } else {
1711 fc->yhat[t] = yh;
1712 }
1713
1714 if (h != NULL) {
1715 if (t > pmod->t2) {
1716 if (psi != NULL) {
1717 /* build in effect of lagged dependent var */
1718 fc->sderr[t] = garch_ldv_sderr(h, psi, t - pmod->t2 - 1);
1719 } else {
1720 /* no lagged dependent variable */
1721 fc->sderr[t] = sqrt(h[t - pmod->t2 - 1]);
1722 }
1723 } else {
1724 fc->sderr[t] = NADBL;
1725 }
1726 }
1727 }
1728
1729 if (h != NULL) {
1730 free(h);
1731 }
1732 if (psi != NULL) {
1733 free(psi);
1734 }
1735
1736 return 0;
1737 }
1738
1739 /* Compute ARMA forecast error variance (ignoring parameter
1740 uncertainty, as is common), via recursion. Cf. Box and Jenkins,
1741 1976, p. 508, "Program 4", V(l) algorithm (with the sign of theta
1742 changed).
1743 */
1744
arma_variance(const double * phi,int p,const double * theta,int q,double * psi,int npsi,int l)1745 static double arma_variance (const double *phi, int p,
1746 const double *theta, int q,
1747 double *psi, int npsi, int l)
1748 {
1749 /* the sum of squared psi's */
1750 static double sspsi;
1751
1752 DPRINTF(("arma_variance: p=%d, q=%d, npsi=%d, l=%d\n", p, q, npsi, l));
1753
1754 if (l == 1) {
1755 int i, j;
1756
1757 psi[0] = 1.0;
1758 for (j=1; j<npsi; j++) {
1759 psi[j] = 0.0;
1760 for (i=1; i<=j; i++) {
1761 if (i <= p) {
1762 psi[j] += phi[i] * psi[j-i];
1763 }
1764 }
1765 if (theta != NULL && j <= q) {
1766 psi[j] += theta[j];
1767 }
1768 DPRINTF(("psi[%d] = %g\n", j, psi[j]));
1769 }
1770 sspsi = 1.0;
1771 } else {
1772 sspsi += psi[l-1] * psi[l-1];
1773 }
1774
1775 DPRINTF(("augmented 'sspsi' using psi(%d) = %g (sspsi = %g)\n",
1776 l-1, psi[l-1], sspsi));
1777
1778 return sspsi;
1779 }
1780
arima_difference_obs(const double * x,int t,int * c,int k)1781 static double arima_difference_obs (const double *x, int t,
1782 int *c, int k)
1783 {
1784 double dx = x[t];
1785
1786 if (!na(dx)) {
1787 int i, s;
1788
1789 for (i=0; i<k; i++) {
1790 if (c[i] != 0) {
1791 s = t - i - 1;
1792 if (s < 0 || na(x[s])) {
1793 dx = NADBL;
1794 break;
1795 } else {
1796 dx -= c[i] * x[s];
1797 }
1798 }
1799 }
1800 }
1801
1802 return dx;
1803 }
1804
1805 /* When forecasting based on an armax model estimated using X12A,
1806 or via the Kalman filter, we need to form the series X\beta
1807 so that we can subtract X\beta_{t-i} from y_{t-i} in
1808 computing the AR portion of the forecast. "beta" below
1809 is the array of ARMAX coefficients, not including the
1810 constant.
1811
1812 If the model is ARIMAX and was NOT estimated with the
1813 --y-diff-only option (OPT_Y) then the regressors X have
1814 to be differenced in this context.
1815 */
1816
create_Xb_series(Forecast * fc,const MODEL * pmod,const double * beta,const int * xlist,const double ** Z)1817 static double *create_Xb_series (Forecast *fc, const MODEL *pmod,
1818 const double *beta, const int *xlist,
1819 const double **Z)
1820 {
1821 double *Xb;
1822 int *delta = NULL;
1823 double x;
1824 int miss, diff = 0;
1825 int d, D, s = 0, k = 0;
1826 int i, j, t, vi;
1827
1828 Xb = malloc((fc->t2 + 1) * sizeof *Xb);
1829 if (Xb == NULL) {
1830 return NULL;
1831 }
1832
1833 d = gretl_model_get_int(pmod, "arima_d");
1834 D = gretl_model_get_int(pmod, "arima_D");
1835
1836 if ((d > 0 || D > 0) && !(pmod->opt & OPT_Y)) {
1837 s = gretl_model_get_int(pmod, "arma_pd");
1838 diff = 1;
1839 }
1840
1841 if (diff) {
1842 delta = arima_delta_coeffs(d, D, s);
1843 if (delta == NULL) {
1844 free(Xb);
1845 return NULL;
1846 }
1847 k = d + s * D;
1848 }
1849
1850 for (t=0; t<=fc->t2; t++) {
1851 Xb[t] = 0.0;
1852 miss = 0;
1853 j = 0;
1854 for (i=1; i<=xlist[0] && !miss; i++) {
1855 vi = xlist[i];
1856 if (vi == 0) {
1857 Xb[t] += pmod->coeff[0];
1858 } else {
1859 if (diff) {
1860 x = arima_difference_obs(Z[vi], t, delta, k);
1861 } else {
1862 x = Z[vi][t];
1863 }
1864 if (na(x)) {
1865 Xb[t] = NADBL;
1866 miss = 1;
1867 } else {
1868 Xb[t] += beta[j++] * x;
1869 }
1870 }
1871 }
1872 }
1873
1874 if (delta != NULL) {
1875 free(delta);
1876 }
1877
1878 return Xb;
1879 }
1880
want_x_beta_prep(const MODEL * pmod,const int * xlist)1881 static int want_x_beta_prep (const MODEL *pmod, const int *xlist)
1882 {
1883 int ret = 0;
1884
1885 if (xlist != NULL) {
1886 int aflags = gretl_model_get_int(pmod, "arma_flags");
1887
1888 if (aflags & (ARMA_EXACT | ARMA_X12A)) {
1889 ret = 1;
1890 }
1891 }
1892
1893 return ret;
1894 }
1895
1896 /* generate forecasts for AR(I)MA (or ARMAX) models, including
1897 forecast standard errors if we're doing out-of-sample
1898 forecasting
1899 */
1900
arma_fcast(Forecast * fc,MODEL * pmod,const DATASET * dset)1901 static int arma_fcast (Forecast *fc, MODEL *pmod,
1902 const DATASET *dset)
1903 {
1904 double *psi = NULL;
1905 gretl_vector *phi = NULL;
1906 gretl_vector *theta = NULL;
1907 double *phi0 = NULL;
1908 double *Xb = NULL;
1909 const double *beta;
1910 const double *y;
1911 double xval, yval, vl;
1912 double mu = NADBL;
1913 int xvars, yno;
1914 const int *xlist = NULL;
1915 int p, q, px = 0, npsi = 0;
1916 int fcstart = fc->t1;
1917 int ar_smax, ma_smax;
1918 int regarma = 0;
1919 int i, s, t;
1920 int err = 0;
1921
1922 DPRINTF(("\n\n*** arma_fcast: METHOD = %d\n", fc->method));
1923
1924 if (fc->method != FC_DYNAMIC) {
1925 /* use pre-calculated fitted values over model estimation range,
1926 and don't bother calculating forecast error variance */
1927 for (t=fc->t1; t<=pmod->t2; t++) {
1928 fc->yhat[t] = pmod->yhat[t];
1929 if (fc->sderr != NULL) {
1930 fc->sderr[t] = NADBL;
1931 }
1932 }
1933 if (fc->t2 <= pmod->t2) {
1934 /* no "real" forecasts were called for, we're done */
1935 return 0;
1936 }
1937 fcstart = pmod->t2 + 1;
1938 }
1939
1940 p = arma_model_max_AR_lag(pmod);
1941 q = arma_model_max_MA_lag(pmod);
1942
1943 xlist = model_xlist(pmod);
1944 yno = gretl_model_get_depvar(pmod);
1945
1946 DPRINTF(("forecasting variable %d (%s), obs %d to %d, with p=%d, q=%d\n", yno,
1947 dset->varname[yno], fc->t1, fc->t2, p, q));
1948
1949 xvars = (xlist != NULL)? xlist[0] : 0;
1950
1951 err = arma_model_AR_MA_coeffs(pmod, &phi, &theta, OPT_I);
1952 if (err) {
1953 goto bailout;
1954 }
1955
1956 beta = arma_model_get_x_coeffs(pmod);
1957
1958 #if AR_DEBUG
1959 fprintf(stderr, "beta = %p\n", (void *) beta);
1960 printlist(xlist, "xlist");
1961 #endif
1962
1963 if (want_x_beta_prep(pmod, xlist)) {
1964 if (gretl_is_arima_model(pmod)) {
1965 regarma = 1;
1966 err = regarma_model_AR_coeffs(pmod, &phi0, &px);
1967 }
1968 if (xlist[0] == 1 && xlist[1] == 0) {
1969 /* just a const, no ARMAX */
1970 if (phi0 != NULL) {
1971 mu = 1.0;
1972 for (i=1; i<=px; i++) {
1973 mu -= phi0[i];
1974 }
1975 mu *= pmod->coeff[0];
1976 } else {
1977 mu = pmod->coeff[0];
1978 }
1979 } else {
1980 /* we have ARMAX terms */
1981 Xb = create_Xb_series(fc, pmod, beta, xlist,
1982 (const double **) dset->Z);
1983 if (Xb == NULL) {
1984 err = E_ALLOC;
1985 }
1986 }
1987 if (err) {
1988 goto bailout;
1989 }
1990 }
1991
1992 /* setup for forecast error variance */
1993 if (fc->sderr != NULL) {
1994 npsi = fc->t2 - fcstart + 1;
1995 psi = malloc(npsi * sizeof *psi);
1996 }
1997
1998 /* cut-off points for using actual rather than forecast
1999 values of y in generating further forecasts (FIXME??) */
2000 if (fc->method == FC_STATIC) {
2001 ar_smax = fc->t2;
2002 ma_smax = pmod->t2;
2003 } else if (fc->method == FC_DYNAMIC) {
2004 ar_smax = MAX(fc->t1 - 1, p - 1);
2005 ma_smax = MAX(fc->t1 - 1, q);
2006 } else {
2007 ar_smax = pmod->t2;
2008 ma_smax = pmod->t2;
2009 }
2010
2011 DPRINTF(("ar_smax = %d, ma_smax = %d\n", ar_smax, ma_smax));
2012
2013 /* dependent variable */
2014 y = dset->Z[yno];
2015
2016 /* do real forecast */
2017 for (t=fcstart; t<=fc->t2 && !err; t++) {
2018 double yh = 0.0;
2019 int miss = 0;
2020
2021 #if AR_DEBUG
2022 char obsstr[OBSLEN];
2023 ntolabel(obsstr, t, dset);
2024 #endif
2025 DPRINTF(("\n *** Doing forecast for obs %d (%s)\n", t, obsstr));
2026
2027 /* contribution of const and/or independent variables */
2028
2029 if (!na(mu)) {
2030 yh = mu;
2031 } else if (Xb != NULL) {
2032 /* X\beta series is pre-computed */
2033 if (na(Xb[t])) {
2034 miss = 1;
2035 } else {
2036 yh = Xb[t];
2037 }
2038 } else {
2039 int j = 0;
2040
2041 for (i=1; i<=xvars; i++) {
2042 if (xlist[i] == 0) {
2043 yh += pmod->coeff[0];
2044 } else {
2045 xval = dset->Z[xlist[i]][t];
2046 if (na(xval)) {
2047 miss = 1;
2048 } else {
2049 yh += beta[j++] * xval;
2050 }
2051 }
2052 }
2053 }
2054
2055 if (phi0 != NULL && Xb != NULL) {
2056 /* the regarma case */
2057 for (i=1; i<=px; i++) {
2058 s = t - i;
2059 if (s < 0) {
2060 miss = 1;
2061 } else if (na(Xb[s])) {
2062 miss = 1;
2063 } else {
2064 yh -= phi0[i] * Xb[s];
2065 }
2066 }
2067 }
2068
2069 DPRINTF((" Xb contribution: yh = %g\n", yh));
2070
2071 /* AR contribution (incorporating any differencing) */
2072
2073 for (i=1; i<=p && !miss; i++) {
2074 if (phi->val[i] == 0.0) {
2075 continue;
2076 }
2077 s = t - i;
2078 if (s < 0) {
2079 yval = NADBL;
2080 } else if (s <= ar_smax) {
2081 yval = y[s];
2082 DPRINTF((" AR: lag %d, y[%d] = %g\n", i, s, yval));
2083 } else {
2084 yval = fc->yhat[s];
2085 DPRINTF((" AR: lag %d, yhat[%d] = %g\n", i, s, yval));
2086 }
2087 if (na(yval)) {
2088 DPRINTF((" AR: lag %d, missing value\n", i));
2089 miss = 1;
2090 } else {
2091 DPRINTF((" AR: lag %d, using coeff %#.8g\n", i, phi->val[i]));
2092 if (!regarma && Xb != NULL) {
2093 if (na(Xb[s])) {
2094 miss = 1;
2095 } else {
2096 yh += phi->val[i] * (yval - Xb[s]);
2097 }
2098 } else if (!regarma && !na(mu)) {
2099 DPRINTF((" !regarma && !na(mu): yh += %g * (%g - %g)\n",
2100 phi->val[i], yval, mu));
2101 yh += phi->val[i] * (yval - mu);
2102 } else {
2103 yh += phi->val[i] * yval;
2104 }
2105 }
2106 }
2107
2108 DPRINTF((" with AR contribution: %g\n", yh));
2109
2110 /* MA contribution */
2111
2112 for (i=1; i<=q && !miss; i++) {
2113 if (theta->val[i] == 0.0) {
2114 continue;
2115 }
2116 s = t - i;
2117 if (s >= pmod->t1 && s <= ma_smax) {
2118 DPRINTF((" MA: lag %d, e[%d] = %g, theta[%d] = %g\n", i, s,
2119 pmod->uhat[s], i, theta->val[i]));
2120 if (na(pmod->uhat[s])) {
2121 char obsstr[OBSLEN];
2122
2123 ntolabel(obsstr, s, dset);
2124 fprintf(stderr, "Uh oh, pmod->uhat[%d] is missing! (%s)\n",
2125 s, obsstr);
2126 } else {
2127 yh += theta->val[i] * pmod->uhat[s];
2128 }
2129 } else if (fc->eps != NULL) {
2130 DPRINTF((" MA: lag %d, ehat[%d] = %g, theta[%d] = %g\n", i, s,
2131 fc->eps[s], i, theta->val[i]));
2132 yh += theta->val[i] * fc->eps[s];
2133 }
2134 }
2135
2136 DPRINTF((" with MA contribution: %g\n", yh));
2137
2138 if (miss) {
2139 fc->yhat[t] = NADBL;
2140 } else {
2141 fc->yhat[t] = yh;
2142 if (fc->eps != NULL) {
2143 /* form estimated error: we do this only in
2144 the case of a "static" forecast */
2145 fc->eps[t] = y[t] - yh;
2146 DPRINTF(("\n setting ehat[%d] = %g - %g = %g\n", t,
2147 y[t], yh, fc->eps[t]));
2148 }
2149 }
2150
2151 /* forecast error variance */
2152 if (psi != NULL) {
2153 const double *arvec = (phi != NULL)? phi->val : NULL;
2154 const double *mavec = (theta != NULL)? theta->val : NULL;
2155
2156 vl = arma_variance(arvec, p, mavec, q,
2157 psi, npsi, t - fcstart + 1);
2158 fc->sderr[t] = pmod->sigma * sqrt(vl);
2159 }
2160 }
2161
2162 bailout:
2163
2164 free(psi);
2165 free(Xb);
2166 free(phi0);
2167
2168 gretl_vector_free(phi);
2169 gretl_vector_free(theta);
2170
2171 return err;
2172 }
2173
2174 /* construct the "phi" array of AR coefficients, based on the ARINFO
2175 that was added to the model at estimation time. The latter's rho
2176 member may be a compacted array, with zero elements omitted, but
2177 here we need a full-length array with zeros inserted as required.
2178 */
2179
make_phi_from_arinfo(const ARINFO * arinfo,int pmax)2180 static double *make_phi_from_arinfo (const ARINFO *arinfo, int pmax)
2181 {
2182 double *phi = malloc((pmax + 1) * sizeof *phi);
2183
2184 if (phi != NULL) {
2185 int i, lag;
2186
2187 for (i=0; i<=pmax; i++) {
2188 phi[i] = 0.0;
2189 }
2190
2191 for (i=1; i<=arinfo->arlist[0]; i++) {
2192 lag = arinfo->arlist[i];
2193 phi[lag] = arinfo->rho[i-1];
2194 }
2195 }
2196
2197 return phi;
2198 }
2199
2200 /* Determine the greatest lag order for a model, either via explicit
2201 AR error process or via inclusion of lagged dependent var as
2202 regressor.
2203 */
2204
max_ar_lag(Forecast * fc,const MODEL * pmod,int p)2205 static int max_ar_lag (Forecast *fc, const MODEL *pmod, int p)
2206 {
2207 int i, pmax = p; /* explicit AR order */
2208
2209 if (fc->dvlags != NULL) {
2210 for (i=0; i<pmod->ncoeff; i++) {
2211 if (fc->dvlags[i] > pmax) {
2212 pmax = fc->dvlags[i];
2213 }
2214 }
2215 }
2216
2217 return pmax;
2218 }
2219
2220 /* Set things up for computing forecast error variance for ar models
2221 (AR, AR1). This is complicated by the fact that there
2222 may be a lagged dependent variable in the picture. If there is,
2223 the effective AR coefficients have to be incremented, for the
2224 purposes of calculating forecast variance. But I'm not sure this
2225 is quite right yet.
2226 */
2227
2228 static int
set_up_ar_fcast_variance(const MODEL * pmod,int pmax,int npsi,double ** pphi,double ** ppsi,double ** perrphi)2229 set_up_ar_fcast_variance (const MODEL *pmod, int pmax, int npsi,
2230 double **pphi, double **ppsi,
2231 double **perrphi)
2232 {
2233 double *errphi = NULL;
2234 double *psi = NULL;
2235 double *phi = NULL;
2236 int err = 0;
2237
2238 errphi = make_phi_from_arinfo(pmod->arinfo, pmax);
2239 psi = malloc(npsi * sizeof *psi);
2240 phi = malloc((pmax + 1) * sizeof *phi);
2241
2242 if (errphi == NULL || psi == NULL || phi == NULL) {
2243 free(errphi);
2244 free(psi);
2245 free(phi);
2246 err = E_ALLOC;
2247 } else {
2248 *perrphi = errphi;
2249 *pphi = phi;
2250 *ppsi = psi;
2251 }
2252
2253 return err;
2254 }
2255
2256 /*
2257 The code below generates forecasts that incorporate the
2258 predictable portion of an AR error term:
2259
2260 u_t = r1 u_{t-1} + r2 u_{t-1} + ... + e_t
2261
2262 where e_t is white noise. The forecasts are based on the
2263 representation of a model with such an error term as
2264
2265 (1 - r(L)) y_t = (1 - r(L)) X_t b + e_t
2266
2267 or
2268
2269 y_t = r(L) y_t + (1 - r(L)) X_t b + e_t
2270
2271 where r(L) is a polynomial in the lag operator.
2272
2273 We also attempt to calculate forecast error variance for
2274 out-of-sample forecasts. These calculations, like those for
2275 ARMA, do not take into account parameter uncertainty.
2276
2277 This code is used for AR and AR1 models; it is also used for
2278 dynamic forecasting with models that do not have an explicit AR
2279 error process but that have one or more lagged values of the
2280 dependent variable as regressors.
2281 */
2282
ar_fcast(Forecast * fc,MODEL * pmod,const DATASET * dset)2283 static int ar_fcast (Forecast *fc, MODEL *pmod,
2284 const DATASET *dset)
2285 {
2286 const int *arlist;
2287 double *phi = NULL;
2288 double *psi = NULL;
2289 double *errphi = NULL;
2290 double *rho;
2291 double xval, yh, vl;
2292 double ylag, xlag;
2293 int miss, yno;
2294 int i, k, v, t, tk;
2295 int p, dvlag, pmax = 0;
2296 int pwe, npsi = 0;
2297 int err = 0;
2298
2299 #if AR_DEBUG
2300 fprintf(stderr, "\n*** ar_fcast, method = %d\n\n", fc->method);
2301 #endif
2302
2303 yno = pmod->list[1];
2304
2305 if (pmod->ci == AR1 && pmod->arinfo == NULL) {
2306 dummy_ar_info_init(pmod);
2307 }
2308
2309 arlist = pmod->arinfo->arlist;
2310 p = arlist[arlist[0]]; /* AR order of error term */
2311 rho = pmod->arinfo->rho;
2312
2313 if (fc->t2 > pmod->t2 && fc->sderr != NULL) {
2314 /* we'll compute variance only if we're forecasting out of
2315 sample */
2316 pmax = max_ar_lag(fc, pmod, p);
2317 npsi = fc->t2 - fc->t1 + 1;
2318 /* npsi = fc->t2 - pmod->t2; */
2319 DPRINTF(("pmax = %d, npsi = %d\n", pmax, npsi));
2320 set_up_ar_fcast_variance(pmod, pmax, npsi, &phi, &psi, &errphi);
2321 }
2322
2323 pwe = (pmod->opt & OPT_P);
2324
2325 for (t=fc->t1; t<=fc->t2; t++) {
2326 miss = 0;
2327 yh = 0.0;
2328
2329 if (t < p) {
2330 fc->yhat[t] = NADBL;
2331 if (fc->sderr != NULL) {
2332 fc->sderr[t] = NADBL;
2333 }
2334 continue;
2335 }
2336
2337 if (pwe && t == pmod->t1) {
2338 /* PWE first obs is special */
2339 fc->yhat[t] = pmod->yhat[t];
2340 continue;
2341 }
2342
2343 if (phi != NULL) {
2344 /* initialize the phi's based on the AR error process
2345 alone */
2346 for (i=1; i<=pmax; i++) {
2347 phi[i] = errphi[i];
2348 }
2349 }
2350
2351 /* r(L) y_t */
2352 for (k=1; k<=arlist[0]; k++) {
2353 tk = t - arlist[k];
2354 ylag = fcast_get_ldv(fc, yno, tk, 0, dset);
2355 if (na(ylag)) {
2356 miss = 1;
2357 } else {
2358 yh += rho[k-1] * ylag;
2359 }
2360 }
2361
2362 /* (1 - r(L)) X_t b */
2363 for (i=0; i<pmod->ncoeff && !miss; i++) {
2364 v = pmod->list[i+2];
2365 if ((dvlag = depvar_lag(fc, i))) {
2366 xval = fcast_get_ldv(fc, yno, t, dvlag, dset);
2367 } else {
2368 xval = dset->Z[v][t];
2369 }
2370 if (na(xval)) {
2371 miss = 1;
2372 } else {
2373 if (dvlag > 0 && phi != NULL) {
2374 /* augment phi for computation of variance */
2375 phi[dvlag] += pmod->coeff[i];
2376 }
2377 for (k=1; k<=arlist[0]; k++) {
2378 tk = t - arlist[k];
2379 if (dvlag > 0) {
2380 xlag = fcast_get_ldv(fc, yno, tk, dvlag, dset);
2381 } else {
2382 xlag = dset->Z[v][tk];
2383 }
2384 if (!na(xlag)) {
2385 xval -= rho[k-1] * xlag;
2386 }
2387 }
2388 yh += xval * pmod->coeff[i];
2389 }
2390 }
2391
2392 if (miss) {
2393 fc->yhat[t] = NADBL;
2394 } else {
2395 fc->yhat[t] = yh;
2396 }
2397
2398 /* forecast error variance */
2399 if (phi != NULL && pmod->ci != GARCH) {
2400 if (t > pmod->t2) {
2401 vl = arma_variance(phi, pmax, NULL, 0, psi, npsi, t - pmod->t2);
2402 fc->sderr[t] = pmod->sigma * sqrt(vl);
2403 DPRINTF(("sderr[%d] = %g * sqrt(%g) = %g\n", t, pmod->sigma,
2404 vl, fc->sderr[t]));
2405 } else {
2406 fc->sderr[t] = NADBL;
2407 }
2408 }
2409 }
2410
2411 if (psi != NULL) {
2412 free(phi);
2413 free(psi);
2414 free(errphi);
2415 }
2416
2417 return err;
2418 }
2419
2420 /* Calculates the transformation required to get from xb (= X*b) to
2421 the actual prediction for the dependent variable, for models of
2422 type LOGISTIC, LOGIT, PROBIT, TOBIT, POISSON and NEGBIN.
2423 */
2424
fcast_transform(double xb,const MODEL * pmod,int t,const double * offset,double lmax)2425 static double fcast_transform (double xb, const MODEL *pmod,
2426 int t, const double *offset,
2427 double lmax)
2428 {
2429 int ymin, ci = pmod->ci;
2430 double yf = xb;
2431
2432 if (ci == TOBIT) {
2433 if (xb < 0.0) {
2434 yf = 0.0;
2435 }
2436 } else if (ci == LOGIT) {
2437 if (gretl_model_get_int(pmod, "ordered")) {
2438 ymin = gretl_model_get_int(pmod, "ymin");
2439 yf = ordered_model_prediction(pmod, xb, ymin);
2440 } else {
2441 yf = exp(xb) / (1.0 + exp(xb));
2442 }
2443 } else if (ci == PROBIT) {
2444 if (gretl_model_get_int(pmod, "ordered")) {
2445 ymin = gretl_model_get_int(pmod, "ymin");
2446 yf = ordered_model_prediction(pmod, xb, ymin);
2447 } else {
2448 yf = normal_cdf(xb);
2449 }
2450 } else if (ci == LOGISTIC) {
2451 if (na(lmax)) {
2452 yf = 1.0 / (1.0 + exp(-xb));
2453 } else {
2454 yf = lmax / (1.0 + exp(-xb));
2455 }
2456 } else if (ci == POISSON || ci == NEGBIN) {
2457 if (offset != NULL) {
2458 if (na(offset[t])) {
2459 yf = NADBL;
2460 } else {
2461 yf = exp(xb + log(offset[t]));
2462 }
2463 } else {
2464 yf = exp(xb);
2465 }
2466 }
2467
2468 return yf;
2469 }
2470
2471 static int
integrated_fcast(Forecast * fc,const MODEL * pmod,int yno,const DATASET * dset)2472 integrated_fcast (Forecast *fc, const MODEL *pmod, int yno,
2473 const DATASET *dset)
2474 {
2475 double s2 = NADBL;
2476 int dyn_start; /* start of dynamic forecast */
2477 double xval, yht = 0.0;
2478 int i, vi, t;
2479
2480 if (fc->method == FC_STATIC) {
2481 dyn_start = dset->n; /* i.e., never */
2482 } else if (fc->method == FC_DYNAMIC) {
2483 dyn_start = fc->t1;
2484 } else {
2485 /* FC_AUTO: dynamic out of sample */
2486 dyn_start = pmod->t2 + 1;
2487 }
2488
2489 if (fc->sderr != NULL) {
2490 s2 = pmod->sigma * pmod->sigma;
2491 fc->sderr[dyn_start - 1] = 0.0;
2492 }
2493
2494 for (t=fc->t1; t<=fc->t2; t++) {
2495 int miss = 0;
2496
2497 if (t <= dyn_start) {
2498 yht = dset->Z[yno][t-1];
2499 if (na(yht)) {
2500 miss = 1;
2501 }
2502 }
2503
2504 for (i=0; i<pmod->ncoeff && !miss; i++) {
2505 vi = pmod->list[i+2];
2506 xval = dset->Z[vi][t];
2507 if (na(xval)) {
2508 miss = 1;
2509 } else {
2510 yht += xval * pmod->coeff[i];
2511 }
2512 }
2513
2514 if (miss) {
2515 if (t >= dyn_start) {
2516 /* all subsequent values are NA */
2517 int s;
2518
2519 for (s=t; s<=fc->t2; s++) {
2520 fc->yhat[s] = NADBL;
2521 }
2522 break;
2523 } else {
2524 fc->yhat[t] = NADBL;
2525 }
2526 } else {
2527 fc->yhat[t] = yht;
2528 if (fc->sderr != NULL && t >= dyn_start) {
2529 fc->sderr[t] = fc->sderr[t-1] + s2;
2530 }
2531 }
2532 }
2533
2534 if (fc->sderr != NULL) {
2535 fc->sderr[dyn_start - 1] = NADBL;
2536 for (t=dyn_start; t<=fc->t2; t++) {
2537 if (!na(fc->sderr[t])) {
2538 fc->sderr[t] = sqrt(fc->sderr[t]);
2539 }
2540 }
2541 }
2542
2543 return 0;
2544 }
2545
mlogit_fcast(Forecast * fc,const MODEL * pmod,const DATASET * dset)2546 static int mlogit_fcast (Forecast *fc, const MODEL *pmod,
2547 const DATASET *dset)
2548 {
2549 const gretl_matrix *yvals;
2550 gretl_matrix *Xt;
2551 int k, i, vi, t;
2552
2553 yvals = gretl_model_get_data(pmod, "yvals");
2554 if (yvals == NULL) {
2555 return E_DATA;
2556 }
2557
2558 k = pmod->list[0] - 1;
2559 Xt = gretl_matrix_alloc(1, k);
2560 if (Xt == NULL) {
2561 return E_ALLOC;
2562 }
2563
2564 for (t=fc->t1; t<=fc->t2; t++) {
2565 double xti, yht;
2566 int miss = 0;
2567
2568 for (i=0; i<k && !miss; i++) {
2569 vi = pmod->list[i+2];
2570 xti = dset->Z[vi][t];
2571 if (na(xti)) {
2572 miss = 1;
2573 } else {
2574 Xt->val[i] = xti;
2575 }
2576 }
2577
2578 if (miss) {
2579 fc->yhat[t] = NADBL;
2580 } else {
2581 yht = mn_logit_prediction(Xt, pmod->coeff, yvals);
2582 fc->yhat[t] = yht;
2583 }
2584 }
2585
2586 gretl_matrix_free(Xt);
2587
2588 return 0;
2589 }
2590
2591 /* Compute forecasts for linear models without autoregressive errors.
2592 We don't compute forecast standard errors, unless we're integrating
2593 the forecast from a static model estimated via OLS.
2594 */
2595
linear_fcast(Forecast * fc,const MODEL * pmod,int yno,const DATASET * dset)2596 static int linear_fcast (Forecast *fc, const MODEL *pmod, int yno,
2597 const DATASET *dset)
2598 {
2599 const double *offvar = NULL;
2600 double xval, lmax = NADBL;
2601 int k = pmod->ncoeff;
2602 int i, vi, t;
2603
2604 if (COUNT_MODEL(pmod->ci)) {
2605 /* special for "offset" variable */
2606 int offnum = gretl_model_get_int(pmod, "offset_var");
2607
2608 if (offnum > 0) {
2609 offvar = dset->Z[offnum];
2610 }
2611 } else if (pmod->ci == LOGISTIC) {
2612 lmax = gretl_model_get_double(pmod, "lmax");
2613 } else if (pmod->ci == LOGIT || pmod->ci == PROBIT) {
2614 if (gretl_model_get_int(pmod, "ordered")) {
2615 /* we need to know how many coeffs are not
2616 just estimated cut-points */
2617 k = gretl_model_get_int(pmod, "nx");
2618 if (k <= 0) {
2619 return E_MISSDATA;
2620 }
2621 }
2622 }
2623
2624 for (t=fc->t1; t<=fc->t2; t++) {
2625 double yht = 0.0;
2626 int miss = 0;
2627
2628 for (i=0; i<k && !miss; i++) {
2629 int lag;
2630
2631 vi = pmod->list[i+2];
2632 if ((lag = depvar_lag(fc, i))) {
2633 xval = fcast_get_ldv(fc, yno, t, lag, dset);
2634 } else {
2635 xval = dset->Z[vi][t];
2636 }
2637 if (na(xval)) {
2638 miss = 1;
2639 } else {
2640 yht += xval * pmod->coeff[i];
2641 }
2642 }
2643
2644 if (miss) {
2645 fc->yhat[t] = NADBL;
2646 } else if (FCAST_SPECIAL(pmod->ci)) {
2647 /* special handling for LOGIT and others */
2648 fc->yhat[t] = fcast_transform(yht, pmod, t, offvar, lmax);
2649 } else {
2650 fc->yhat[t] = yht;
2651 }
2652 }
2653
2654 return 0;
2655 }
2656
2657 #define dynamic_nls(m) ((m->ci == NLS || m->ci == MIDASREG) \
2658 && gretl_model_get_int(m, "dynamic"))
2659
get_forecast_method(Forecast * fc,MODEL * pmod,const DATASET * dset,gretlopt opt)2660 static int get_forecast_method (Forecast *fc,
2661 MODEL *pmod,
2662 const DATASET *dset,
2663 gretlopt opt)
2664 {
2665 int dyn_ok = 0;
2666 int dyn_errs_ok = 0;
2667 int err;
2668
2669 err = incompatible_options(opt, OPT_D | OPT_S);
2670 if (err) {
2671 return err;
2672 }
2673
2674 fc->dvlags = NULL;
2675 fc->method = FC_STATIC;
2676
2677 /* do setup for possible lags of the dependent variable,
2678 unless OPT_S for "static" has been given */
2679 if (dataset_is_time_series(dset) && !(opt & OPT_S) && pmod->ci != ARMA) {
2680 process_lagged_depvar(pmod, dset, &fc->dvlags);
2681 }
2682
2683 if (!(opt & OPT_S)) {
2684 /* user didn't give the "static" option */
2685 if (pmod->ci == ARMA || fc->dvlags != NULL ||
2686 dynamic_nls(pmod) || (opt & OPT_I)) {
2687 /* dynamic forecast is possible */
2688 dyn_ok = 1;
2689 }
2690 if (SIMPLE_AR_MODEL(pmod->ci) || pmod->ci == GARCH) {
2691 dyn_errs_ok = 1;
2692 }
2693 }
2694
2695 if (!dyn_ok && (opt & OPT_D)) {
2696 /* "dynamic" option given, but can't be honored */
2697 return inapplicable_option_error(FCAST, OPT_D);
2698 }
2699
2700 /* NLS: we can only do dynamic out of sample (fc->t1 > pmod->t2) */
2701 if (pmod->ci == NLS && dyn_ok && (opt & OPT_D)) {
2702 if (fc->t1 <= pmod->t2) {
2703 return inapplicable_option_error(FCAST, OPT_D);
2704 }
2705 }
2706
2707 if (opt & OPT_D) {
2708 /* user requested dynamic forecast and it seems OK */
2709 fc->method = FC_DYNAMIC;
2710 } else if ((dyn_ok || dyn_errs_ok) && fc->t2 > pmod->t2) {
2711 /* do dynamic f'cast out of sample */
2712 fc->method = FC_AUTO;
2713 }
2714
2715 if (fc->method == FC_DYNAMIC || fc->method == FC_AUTO) {
2716 if (fc->t1 > fc->model_t2 + 1) {
2717 /* we'll probably need intervening forecasts */
2718 fc->t1 = fc->model_t2 + 1;
2719 }
2720 }
2721
2722 return err;
2723 }
2724
forecast_init(Forecast * fc)2725 static void forecast_init (Forecast *fc)
2726 {
2727 fc->method = FC_AUTO;
2728
2729 fc->t1 = 0;
2730 fc->t2 = 0;
2731 fc->model_t2 = 0;
2732
2733 fc->yhat = NULL;
2734 fc->sderr = NULL;
2735 fc->eps = NULL;
2736 fc->dvlags = NULL;
2737 }
2738
forecast_free(Forecast * fc)2739 static void forecast_free (Forecast *fc)
2740 {
2741 if (fc->dvlags != NULL) {
2742 free(fc->dvlags);
2743 }
2744
2745 if (fc->eps != NULL) {
2746 free(fc->eps);
2747 }
2748 }
2749
fc_add_eps(Forecast * fc,int n)2750 static int fc_add_eps (Forecast *fc, int n)
2751 {
2752 int t, err = 0;
2753
2754 fc->eps = malloc(n * sizeof *fc->eps);
2755
2756 if (fc->eps == NULL) {
2757 err = E_ALLOC;
2758 } else {
2759 for (t=0; t<n; t++) {
2760 fc->eps[t] = 0.0;
2761 }
2762 }
2763
2764 return err;
2765 }
2766
2767 /* find the earlist starting point at which we have a previous
2768 valid observation on the level of Z[yno] with which to
2769 initialize an integrated forecast
2770 */
2771
revise_fr_start(FITRESID * fr,int yno,const double ** Z,const DATASET * dset)2772 static int revise_fr_start (FITRESID *fr, int yno, const double **Z,
2773 const DATASET *dset)
2774 {
2775 int t, t0;
2776
2777 if (fr->t0 == 0) {
2778 /* certainly can't start before obs 1 */
2779 fr->t0 = 1;
2780 }
2781
2782 /* previous obs */
2783 t0 = fr->t0 - 1;
2784
2785 /* find first non-missing value */
2786 for (t=t0; t<dset->n; t++) {
2787 if (!na(Z[yno][t])) {
2788 break;
2789 }
2790 }
2791
2792 t0 = t;
2793
2794 if (t0 >= dset->n - 2) {
2795 return E_MISSDATA;
2796 }
2797
2798 fr->t0 = t0 + 1;
2799
2800 if (fr->t1 < fr->t0) {
2801 fr->t1 = fr->t0;
2802 }
2803
2804 if (fr->t2 < fr->t1) {
2805 fr->t2 = fr->t1;
2806 }
2807
2808 fr->fitted[t0] = Z[yno][t0];
2809 fr->resid[t0] = 0.0;
2810
2811 return 0;
2812 }
2813
2814 /* check we we really can do an integrated forecast */
2815
check_integrated_forecast_option(MODEL * pmod,DATASET * dset,int * pyno)2816 static int check_integrated_forecast_option (MODEL *pmod,
2817 DATASET *dset,
2818 int *pyno)
2819 {
2820 int err = 0;
2821
2822 if (pmod->ci != OLS) {
2823 err = inapplicable_option_error(FCAST, OPT_I);
2824 } else {
2825 int yno = gretl_model_get_depvar(pmod);
2826 int d, parent;
2827
2828 d = is_standard_diff(yno, dset, &parent);
2829 if (!d) {
2830 err = inapplicable_option_error(FCAST, OPT_I);
2831 } else if (pyno != NULL) {
2832 /* in caller, make yno = ID of level variable */
2833 *pyno = parent;
2834 }
2835 }
2836
2837 return err;
2838 }
2839
2840 /* driver for various functions that compute forecasts
2841 for different sorts of models */
2842
real_get_fcast(FITRESID * fr,MODEL * pmod,DATASET * dset,gretlopt opt)2843 static int real_get_fcast (FITRESID *fr, MODEL *pmod,
2844 DATASET *dset, gretlopt opt)
2845 {
2846 Forecast fc;
2847 const double **Z = (const double **) dset->Z;
2848 int yno = gretl_model_get_depvar(pmod);
2849 int integrate = (opt & OPT_I);
2850 int dummy_AR = 0;
2851 int DM_errs = 0;
2852 int dyn_errs = 0;
2853 int asy_errs = 0;
2854 int int_errs = 0;
2855 int same_data = 0;
2856 int nf = 0;
2857 int t, err;
2858
2859 forecast_init(&fc);
2860
2861 if (integrate) {
2862 err = check_integrated_forecast_option(pmod, dset, &yno);
2863 if (!err) {
2864 err = revise_fr_start(fr, yno, Z, dset);
2865 }
2866 if (err) {
2867 return err;
2868 }
2869 }
2870
2871 fc.t1 = fr->t1;
2872 fc.t2 = fr->t2;
2873 fc.model_t2 = pmod->t2;
2874
2875 err = get_forecast_method(&fc, pmod, dset, opt);
2876 if (err) {
2877 return err;
2878 }
2879
2880 if (pmod->ci == NLS && fc.method == FC_STATIC) {
2881 asy_errs = 1;
2882 }
2883
2884 if (pmod->ci == PANEL || pmod->ci == DPANEL) {
2885 ; /* don't do the things below */
2886 } else if (!FCAST_SPECIAL(pmod->ci) && !integrate) {
2887 if (!AR_MODEL(pmod->ci) && fc.dvlags == NULL) {
2888 /* we'll do Davidson-MacKinnon error variance */
2889 DM_errs = 1;
2890 } else if (fc.method == FC_DYNAMIC) {
2891 /* we'll do dynamic forecast errors throughout */
2892 dyn_errs = 1;
2893 } else if (fc.method == FC_AUTO && fc.t2 > pmod->t2) {
2894 /* do dynamic forecast errors out of sample */
2895 dyn_errs = 1;
2896 }
2897 }
2898
2899 if (integrate && pmod->ci == OLS &&
2900 fc.dvlags == NULL && fc.method != FC_STATIC) {
2901 int_errs = 1;
2902 }
2903
2904 if (dyn_errs && !AR_MODEL(pmod->ci) && fc.dvlags != NULL) {
2905 /* create dummy AR info structure for model with lagged
2906 dependent variable */
2907 int err = dummy_ar_info_init(pmod);
2908
2909 if (err) {
2910 dyn_errs = 0;
2911 } else {
2912 dummy_AR = 1;
2913 }
2914 }
2915
2916 if (DM_errs || dyn_errs || asy_errs || int_errs) {
2917 err = fit_resid_add_sderr(fr);
2918 }
2919
2920 if (err) {
2921 return err;
2922 }
2923
2924 fc.yhat = fr->fitted;
2925 fc.sderr = fr->sderr;
2926
2927 if (pmod->ci == ARMA && fc.method == FC_STATIC) {
2928 fc_add_eps(&fc, dset->n);
2929 }
2930
2931 /* compute the actual forecast */
2932 if (pmod->ci == DPANEL || individual_effects_model(pmod)) {
2933 err = panel_fcast(&fc, pmod, dset, opt);
2934 } else if (DM_errs) {
2935 err = static_fcast_with_errs(&fc, pmod, dset, opt);
2936 } else if (pmod->ci == NLS) {
2937 err = nls_fcast(&fc, pmod, dset);
2938 } else if (pmod->ci == MIDASREG) {
2939 err = midas_fcast(&fc, pmod, dset);
2940 } else if (SIMPLE_AR_MODEL(pmod->ci) || dummy_AR) {
2941 err = ar_fcast(&fc, pmod, dset);
2942 } else if (pmod->ci == ARMA) {
2943 err = arma_fcast(&fc, pmod, dset);
2944 } else if (pmod->ci == GARCH) {
2945 err = garch_fcast(&fc, pmod, dset);
2946 } else if (integrate) {
2947 err = integrated_fcast(&fc, pmod, yno, dset);
2948 } else if (pmod->ci == LOGIT && gretl_model_get_int(pmod, "multinom")) {
2949 err = mlogit_fcast(&fc, pmod, dset);
2950 } else {
2951 err = linear_fcast(&fc, pmod, yno, dset);
2952 }
2953
2954 /* free any auxiliary info */
2955 forecast_free(&fc);
2956
2957 if (dummy_AR) {
2958 free(pmod->arinfo->arlist);
2959 free(pmod->arinfo);
2960 pmod->arinfo = NULL;
2961 }
2962
2963 same_data = same_dataset(pmod, dset);
2964
2965 for (t=0; t<fr->nobs; t++) {
2966 if (t >= fr->t1 && t <= fr->t2) {
2967 if (!na(fr->fitted[t])) {
2968 nf++;
2969 }
2970 } else if (same_data && t >= fr->t0 && t <= fr->t2 &&
2971 t >= pmod->t1 && t <= pmod->t2) {
2972 if (integrate) {
2973 fr->fitted[t] = fr->fitted[t-1] + pmod->yhat[t];
2974 fr->resid[t] = fr->resid[t-1] + pmod->uhat[t];
2975 } else {
2976 fr->fitted[t] = pmod->yhat[t];
2977 fr->resid[t] = pmod->uhat[t];
2978 }
2979 }
2980 fr->actual[t] = dset->Z[yno][t];
2981 }
2982
2983 if (nf == 0) {
2984 err = E_MISSDATA;
2985 } else {
2986 fit_resid_set_dec_places(fr);
2987 strcpy(fr->depvar, dset->varname[yno]);
2988 fr->df = pmod->dfd;
2989 }
2990
2991 return err;
2992 }
2993
fcast_get_limit(const char * s,DATASET * dset)2994 static int fcast_get_limit (const char *s, DATASET *dset)
2995 {
2996 double x;
2997 int t, err = 0;
2998
2999 if (gretl_is_scalar(s)) {
3000 x = gretl_scalar_get_value(s, NULL);
3001 } else {
3002 x = generate_scalar(s, dset, &err);
3003 }
3004
3005 if (x < 1 || x > dset->n) {
3006 gretl_error_clear();
3007 gretl_errmsg_set(_("Observation number out of bounds"));
3008 t = -1;
3009 } else {
3010 t = x - 1;
3011 }
3012
3013 return t;
3014 }
3015
3016 enum {
3017 OS_OK = 0,
3018 OS_ERR,
3019 OS_PANEL
3020 };
3021
out_of_sample_check(MODEL * pmod,DATASET * dset)3022 static int out_of_sample_check (MODEL *pmod, DATASET *dset)
3023 {
3024 int panel, ret;
3025
3026 if (pmod->ci == PANEL || pmod->ci == DPANEL ||
3027 (pmod->ci == OLS && dataset_is_panel(dset))) {
3028 panel = 1;
3029 ret = OS_ERR;
3030 if (gretl_model_get_int(pmod, "ntdum") > 0) {
3031 gretl_errmsg_set(_("Specification includes time dummies: cannot "
3032 "forecast out of sample"));
3033 return ret;
3034 }
3035 } else {
3036 panel = 0;
3037 ret = OS_OK;
3038 }
3039
3040 if (pmod->smpl.t1 == 0 && pmod->smpl.t2 == dset->n - 1) {
3041 /* no out-of-sample obs reachable via t1, t2 */
3042 ret = OS_ERR;
3043 if (!panel) {
3044 if (pmod->t2 < dset->n - 1) {
3045 /* 2019-11-15: may be OK for dynamic model */
3046 ret = OS_OK;
3047 }
3048 } else {
3049 /* the panel case */
3050 DATASET *fullset = fetch_full_dataset();
3051
3052 if (dataset_is_panel(fullset)) {
3053 int Tfull = fullset->pd;
3054 int Tcurr = dset->pd;
3055 int Nfull = fullset->n / Tfull;
3056 int Ncurr = dset->n / Tcurr;
3057
3058 if (Ncurr == Nfull && Tcurr < Tfull &&
3059 dset->v == fullset->v) {
3060 /* sub-sampled in the time dimension only, and
3061 no series added or deleted (we trust)
3062 */
3063 ret = OS_PANEL;
3064 }
3065 }
3066 }
3067 if (ret == OS_ERR) {
3068 gretl_errmsg_set(_("No out-of-sample observations are available"));
3069 }
3070 } else if (panel) {
3071 gretl_errmsg_set(_("The --out-of-sample option is only supported in the "
3072 "time dimension"));
3073 }
3074
3075 return ret;
3076 }
3077
parse_forecast_string(const char * s,gretlopt opt,MODEL * pmod,int t2est,DATASET * dset,int * pt1,int * pt2,int * pk,char * vname,int * os_case)3078 static int parse_forecast_string (const char *s,
3079 gretlopt opt,
3080 MODEL *pmod,
3081 int t2est,
3082 DATASET *dset,
3083 int *pt1, int *pt2,
3084 int *pk, char *vname,
3085 int *os_case)
3086 {
3087 char f[4][32] = {0};
3088 char *t1str = NULL, *t2str = NULL;
3089 char *kstr = NULL, *vstr = NULL;
3090 int nmax, nmin = (opt & OPT_R)? 1 : 0;
3091 int t1 = 0, t2 = 0;
3092 int nf = 0;
3093 int err = 0;
3094
3095 /* "static" and "recursive" can't be combined */
3096 err = incompatible_options(opt, OPT_S | OPT_R);
3097 if (err) {
3098 return err;
3099 }
3100
3101 *vname = '\0';
3102
3103 /* How many fields should we be looking for in the user input?
3104 If OPT_R ("recursive") is given, the max is 4:
3105
3106 t1, t2, k (steps-ahead), vname
3107
3108 Otherwise the max is 3 (no k-value):
3109
3110 t1, t2, vname
3111
3112 However, if OPT_O ("out-of-sample") is given, we should not
3113 expect t1 and t2, so the max becomes 2 (if OPT_R), else 1.
3114
3115 Also note: t1 and t2 need not be given (even in the absence
3116 if OPT_O) since these default to the current sample range.
3117 */
3118
3119 if (opt & OPT_O) {
3120 nmax = (opt & OPT_R)? 2 : 1;
3121 } else {
3122 nmax = (opt & OPT_R)? 4 : 3;
3123 }
3124
3125 if (*s == '\0') {
3126 nf = 0;
3127 } else {
3128 nf = sscanf(s, "%31s %31s %31s %31s", f[0], f[1], f[2], f[3]);
3129 if (nf > nmax) {
3130 /* try for parenthesized t1, t2 terms? */
3131 nf = sscanf(s, "(%31[^)]) (%31[^)]) %31s %31s", f[0], f[1], f[2], f[3]);
3132 }
3133 }
3134
3135 if (nf < nmin || nf > nmax) {
3136 gretl_errmsg_sprintf("fcast: expected %d to %d fields in input, got %d",
3137 nmin, nmax, nf);
3138 return E_PARSE;
3139 }
3140
3141 if (opt & OPT_R) {
3142 if (nf == 4) {
3143 /* t1, t2, k, vname */
3144 t1str = f[0];
3145 t2str = f[1];
3146 kstr = f[2];
3147 vstr = f[3];
3148 } else if (nf == 3) {
3149 /* t1, t2, k */
3150 t1str = f[0];
3151 t2str = f[1];
3152 kstr = f[2];
3153 } else if (nf == 2) {
3154 /* k, vname */
3155 kstr = f[0];
3156 vstr = f[1];
3157 } else if (nf == 1) {
3158 /* just k */
3159 kstr = f[0];
3160 }
3161 } else {
3162 if (nf == 3) {
3163 /* t1, t2, vname */
3164 t1str = f[0];
3165 t2str = f[1];
3166 vstr = f[2];
3167 } else if (nf == 2) {
3168 /* t1, t2 */
3169 t1str = f[0];
3170 t2str = f[1];
3171 } else if (nf == 1) {
3172 /* just vname */
3173 vstr = f[0];
3174 }
3175 }
3176
3177 if (t1str != NULL || t2str != NULL) {
3178 if (opt & OPT_O) {
3179 /* t1, t2 should not be given with --out-of-sample */
3180 gretl_errmsg_set("fcast: unexpected t1 and/or t2 field in input");
3181 return E_DATA;
3182 }
3183 }
3184
3185 if (kstr != NULL && pk != NULL) {
3186 *pk = gretl_int_from_string(kstr, &err);
3187 if (err) {
3188 return err;
3189 }
3190 }
3191
3192 if (vstr != NULL) {
3193 strncat(vname, vstr, VNAMELEN - 1);
3194 }
3195
3196 if (t1str != NULL && t2str != NULL) {
3197 t1 = dateton(t1str, dset);
3198 if (t1 < 0) {
3199 t1 = fcast_get_limit(t1str, dset);
3200 }
3201 t2 = dateton(t2str, dset);
3202 if (t2 < 0) {
3203 t2 = fcast_get_limit(t2str, dset);
3204 }
3205 if (t1 < 0 || t2 < 0 || t2 < t1) {
3206 err = E_DATA;
3207 }
3208 } else if (opt & OPT_O) {
3209 /* out of sample, if possible */
3210 if (pmod != NULL) {
3211 *os_case = out_of_sample_check(pmod, dset);
3212 }
3213 if (*os_case == OS_ERR) {
3214 err = E_DATA;
3215 } else if (*os_case == OS_OK) {
3216 if (dset->n - t2est - 1 > 0) {
3217 t1 = t2est + 1;
3218 t2 = dset->n - 1;
3219 } else {
3220 err = E_OBS;
3221 }
3222 }
3223 } else {
3224 /* default: current sample range */
3225 t1 = dset->t1;
3226 t2 = dset->t2;
3227 }
3228
3229 if (!err) {
3230 /* in case we hit any "temporary" errors above */
3231 gretl_error_clear();
3232 *pt1 = t1;
3233 *pt2 = t2;
3234 }
3235
3236 return err;
3237 }
3238
3239 /**
3240 * get_forecast:
3241 * @pmod: the model from which forecasts are wanted.
3242 * @t1: start of forecast range.
3243 * @t2: end of forecast range.
3244 * @pre_n: number of pre-forecast observations to include.
3245 * @dset: dataset struct.
3246 * @opt: if %OPT_D, force a dynamic forecast; if %OPT_S, force
3247 * a static forecast. By default, the forecast is static within
3248 * the data range over which the model was estimated, and dynamic
3249 * out of sample (in cases where a dynamic forecast is meaningful).
3250 * If @opt includes %OPT_I, integrate the forecast (only relevant
3251 * if the dependent variable in the model in question is recognized
3252 * as the first difference of another variable).
3253 * @err: location to receive error code.
3254 *
3255 * Allocates a #FITRESID structure and fills it out with forecasts
3256 * based on @pmod, over the specified range of observations.
3257 * For some sorts of models forecast standard errors are also
3258 * computed (these appear in the %sderr member of the structure
3259 * to which a pointer is returned; otherwise the %sderr member is
3260 * %NULL).
3261 *
3262 * The calculation of forecast errors, where applicable, is based
3263 * on Davidson and MacKinnon, Econometric Theory and Methods,
3264 * chapter 3 (p. 104), which shows how the variance of forecast errors
3265 * can be computed given the covariance matrix of the parameter
3266 * estimates, provided the error term may be assumed to be serially
3267 * uncorrelated.
3268 *
3269 * Returns: pointer to allocated structure, or %NULL on failure,
3270 * in which case an error code is assigned via @err.
3271 */
3272
get_forecast(MODEL * pmod,int t1,int t2,int pre_n,DATASET * dset,gretlopt opt,int * err)3273 FITRESID *get_forecast (MODEL *pmod, int t1, int t2, int pre_n,
3274 DATASET *dset, gretlopt opt, int *err)
3275 {
3276 FITRESID *fr;
3277
3278 if (pmod->errcode) {
3279 *err = E_DATA;
3280 return NULL;
3281 }
3282
3283 fr = fit_resid_new_for_model(pmod, dset, t1, t2, pre_n, err);
3284
3285 if (!*err) {
3286 *err = real_get_fcast(fr, pmod, dset, opt);
3287 if (*err) {
3288 free_fit_resid(fr);
3289 fr = NULL;
3290 }
3291 }
3292
3293 return fr;
3294 }
3295
forecast_matrix_cleanup(void)3296 void forecast_matrix_cleanup (void)
3297 {
3298 set_fcast_matrices(NULL, NULL);
3299 }
3300
get_forecast_matrix(int idx,int * err)3301 gretl_matrix *get_forecast_matrix (int idx, int *err)
3302 {
3303 gretl_matrix *ret = NULL;
3304 gretl_matrix *M = NULL;
3305
3306 if (idx == M_FCAST) {
3307 M = fcast_matrix;
3308 } else if (idx == M_FCSE) {
3309 M = fcerr_matrix;
3310 }
3311
3312 if (M == NULL) {
3313 *err = E_BADSTAT;
3314 } else {
3315 ret = gretl_matrix_copy(M);
3316 if (ret == NULL) {
3317 *err = E_ALLOC;
3318 }
3319 }
3320
3321 return ret;
3322 }
3323
fr_adjust_sample(FITRESID * fr,int * ft1,int * ft2,int * et1,int * et2)3324 static void fr_adjust_sample (FITRESID *fr, int *ft1, int *ft2,
3325 int *et1, int *et2)
3326 {
3327 int t;
3328
3329 t = fr->t0;
3330 while (t<=fr->t2 && na(fr->fitted[t])) {
3331 *ft1 = ++t;
3332 }
3333
3334 t = fr->t2;
3335 while (t>=*ft1 && na(fr->fitted[t])) {
3336 *ft2 = --t;
3337 }
3338
3339 if (fr->sderr != NULL) {
3340 t = fr->t0;
3341 while (t<=fr->t2 && na(fr->sderr[t])) {
3342 *et1 = ++t;
3343 }
3344
3345 t = fr->t2;
3346 while (t>=*et1 && na(fr->sderr[t])) {
3347 *et2 = --t;
3348 }
3349 }
3350 }
3351
set_forecast_matrices_from_fr(FITRESID * fr)3352 static int set_forecast_matrices_from_fr (FITRESID *fr)
3353 {
3354 gretl_matrix *f = NULL;
3355 gretl_matrix *e = NULL;
3356 int ft1 = fr->t0;
3357 int ft2 = fr->t2;
3358 int et1 = fr->t0;
3359 int et2 = fr->t2;
3360 int T, t, s;
3361 int err = 0;
3362
3363 fr_adjust_sample(fr, &ft1, &ft2, &et1, &et2);
3364
3365 T = ft2 - ft1 + 1;
3366 if (T <= 0) {
3367 return 0;
3368 }
3369
3370 f = gretl_matrix_alloc(T, 1);
3371 if (f == NULL) {
3372 return E_ALLOC;
3373 }
3374
3375 gretl_matrix_set_t1(f, ft1);
3376 gretl_matrix_set_t2(f, ft2);
3377
3378 if (fr->sderr != NULL) {
3379 T = et2 - et1 + 1;
3380 if (T > 0) {
3381 e = gretl_matrix_alloc(T, 1);
3382 if (e == NULL) {
3383 err = E_ALLOC;
3384 } else {
3385 gretl_matrix_set_t1(e, et1);
3386 gretl_matrix_set_t2(e, et2);
3387 }
3388 }
3389 }
3390
3391 s = 0;
3392 for (t=ft1; t<=ft2; t++) {
3393 f->val[s++] = fr->fitted[t];
3394 }
3395
3396 if (e != NULL) {
3397 s = 0;
3398 for (t=et1; t<=et2; t++) {
3399 e->val[s++] = fr->sderr[t];
3400 }
3401 }
3402
3403 set_fcast_matrices(f, e);
3404
3405 return err;
3406 }
3407
matrix_first_ok_row(const gretl_matrix * a,int jmin,int cols)3408 static int matrix_first_ok_row (const gretl_matrix *a, int jmin, int cols)
3409 {
3410 int jmax = jmin + cols;
3411 double x;
3412 int i, j, miss;
3413
3414 for (i=0; i<a->rows; i++) {
3415 miss = 0;
3416 for (j=jmin; j<jmax && !miss; j++) {
3417 x = gretl_matrix_get(a, i, j);
3418 if (na(x)) {
3419 miss = 1;
3420 }
3421 }
3422 if (!miss) {
3423 break;
3424 }
3425 }
3426
3427 return i;
3428 }
3429
matrix_last_ok_row(const gretl_matrix * a,int jmin,int cols)3430 static int matrix_last_ok_row (const gretl_matrix *a, int jmin, int cols)
3431 {
3432 int jmax = jmin + cols;
3433 double x;
3434 int i, j, miss;
3435
3436 for (i=a->rows-1; i>=0; i--) {
3437 miss = 0;
3438 for (j=jmin; j<jmax && !miss; j++) {
3439 x = gretl_matrix_get(a, i, j);
3440 if (na(x)) {
3441 miss = 1;
3442 }
3443 }
3444 if (!miss) {
3445 break;
3446 }
3447 }
3448
3449 return i;
3450 }
3451
F_matrix_adjust_sample(const gretl_matrix * F,int * f0,int * fn,int * e0,int * en)3452 static void F_matrix_adjust_sample (const gretl_matrix *F,
3453 int *f0, int *fn,
3454 int *e0, int *en)
3455 {
3456 int k = F->cols / 2;
3457
3458 *f0 = matrix_first_ok_row(F, 0, k);
3459 *fn = matrix_last_ok_row(F, 0, k);
3460
3461 *e0 = matrix_first_ok_row(F, k, k);
3462 *en = matrix_last_ok_row(F, k, k);
3463 }
3464
set_forecast_matrices_from_F(const gretl_matrix * F,int imin,int imax)3465 static int set_forecast_matrices_from_F (const gretl_matrix *F,
3466 int imin, int imax)
3467 {
3468 gretl_matrix *F1 = NULL;
3469 gretl_matrix *f = NULL;
3470 gretl_matrix *e = NULL;
3471 int n = F->cols;
3472 int k = n / 2;
3473 int fT = F->rows;
3474 int eT = F->rows;
3475 int f0 = 0, fn = fT;
3476 int e0 = 0, en = eT;
3477 double x;
3478 int i, j, Ft1, mt1;
3479 int err = 0;
3480
3481 Ft1 = gretl_matrix_get_t1(F);
3482
3483 if (imin == imax) {
3484 /* extract one pair of columns */
3485 F1 = gretl_matrix_alloc(fT, 2);
3486 if (F1 == NULL) {
3487 return E_ALLOC;
3488 }
3489 for (i=0; i<fT; i++) {
3490 x = gretl_matrix_get(F, i, imin);
3491 gretl_matrix_set(F1, i, 0, x);
3492 x = gretl_matrix_get(F, i, imin + k);
3493 gretl_matrix_set(F1, i, 1, x);
3494 }
3495 F = F1;
3496 k = 1;
3497 }
3498
3499 F_matrix_adjust_sample(F, &f0, &fn, &e0, &en);
3500
3501 fT = fn - f0 + 1;
3502 if (fT <= 0) {
3503 return E_MISSDATA;
3504 }
3505
3506 f = gretl_matrix_alloc(fT, k);
3507 if (f == NULL) {
3508 return E_ALLOC;
3509 }
3510
3511 mt1 = Ft1 + f0;
3512 gretl_matrix_set_t1(f, mt1);
3513 gretl_matrix_set_t2(f, mt1 + fT - 1);
3514
3515 eT = en - e0 + 1;
3516
3517 if (eT > 0) {
3518 e = gretl_matrix_alloc(eT, k);
3519 if (e == NULL) {
3520 err = E_ALLOC;
3521 } else {
3522 mt1 = Ft1 + e0;
3523 gretl_matrix_set_t1(e, mt1);
3524 gretl_matrix_set_t2(e, mt1 + eT - 1);
3525 }
3526 }
3527
3528 for (j=0; j<k; j++) {
3529 for (i=0; i<fT; i++) {
3530 x = gretl_matrix_get(F, i + f0, j);
3531 gretl_matrix_set(f, i, j, x);
3532 }
3533 }
3534
3535 if (e != NULL) {
3536 for (j=0; j<k; j++) {
3537 for (i=0; i<eT; i++) {
3538 x = gretl_matrix_get(F, i + e0, k + j);
3539 gretl_matrix_set(e, i, j, x);
3540 }
3541 }
3542 }
3543
3544 set_fcast_matrices(f, e);
3545
3546 if (F1 != NULL) {
3547 gretl_matrix_free(F1);
3548 }
3549
3550 return err;
3551 }
3552
3553 /* compatibility with behaviour of old fcast command */
3554
add_fcast_to_dataset(FITRESID * fr,const char * vname,DATASET * dset,PRN * prn)3555 static int add_fcast_to_dataset (FITRESID *fr, const char *vname,
3556 DATASET *dset, PRN *prn)
3557 {
3558 int oldv = dset->v;
3559 int v, err = 0;
3560
3561 v = series_index(dset, vname);
3562
3563 if (v == dset->v) {
3564 /* new variable */
3565 if (check_varname(vname)) {
3566 err = E_DATA;
3567 } else {
3568 err = dataset_add_series(dset, 1);
3569 if (!err) {
3570 strcpy(dset->varname[v], vname);
3571 }
3572 }
3573 }
3574
3575 if (!err) {
3576 int t;
3577
3578 for (t=0; t<dset->n; t++) {
3579 dset->Z[v][t] = fr->fitted[t];
3580 }
3581
3582 series_set_label(dset, v, _("predicted values"));
3583
3584 if (gretl_messages_on()) {
3585 if (v < oldv) {
3586 pprintf(prn, _("Replaced series %s (ID %d)"),
3587 vname, v);
3588 } else {
3589 pprintf(prn, _("Generated series %s (ID %d)"),
3590 vname, v);
3591 }
3592 pputc(prn, '\n');
3593 }
3594 }
3595
3596 return err;
3597 }
3598
model_do_forecast(const char * str,MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3599 static int model_do_forecast (const char *str, MODEL *pmod,
3600 DATASET *dset, gretlopt opt,
3601 PRN *prn)
3602 {
3603 char vname[VNAMELEN];
3604 FITRESID *fr = NULL;
3605 int t1, t2, k = -1;
3606 int os_case = 0;
3607 int err;
3608
3609 if (pmod->ci == HECKIT || pmod->ci == DURATION) {
3610 /* FIXME */
3611 return E_NOTIMP;
3612 }
3613
3614 if (pmod->ci == PANEL && !(pmod->opt & OPT_P)) {
3615 /* FIXME */
3616 if (opt & OPT_R) {
3617 return E_NOTIMP;
3618 }
3619 }
3620
3621 if (pmod->ci == PROBIT && (pmod->opt & OPT_E)) {
3622 /* random effects probit: FIXME */
3623 return E_NOTIMP;
3624 }
3625
3626 /* OPT_I for integrate: reject for non-OLS, or if the dependent
3627 variable is not recognized as a first difference */
3628 if (opt & OPT_I) {
3629 err = check_integrated_forecast_option(pmod, dset, NULL);
3630 if (err) {
3631 return err;
3632 }
3633 }
3634
3635 err = parse_forecast_string(str, opt, pmod, pmod->t2, dset,
3636 &t1, &t2, &k, vname, &os_case);
3637 if (err) {
3638 return err;
3639 }
3640
3641 if (*vname != '\0') {
3642 /* saving to named series */
3643 opt |= (OPT_A | OPT_Q);
3644 }
3645
3646 if (os_case == OS_PANEL) {
3647 return panel_os_special(pmod, dset, vname, opt, prn);
3648 } else if (opt & OPT_R) {
3649 fr = recursive_OLS_k_step_fcast(pmod, dset, t1, t2,
3650 k, 0, &err);
3651 } else {
3652 fr = get_forecast(pmod, t1, t2, 0, dset, opt, &err);
3653 }
3654
3655 if (!err && !(opt & OPT_Q)) {
3656 gretlopt printopt = opt;
3657
3658 if (opt & OPT_U) {
3659 /* do graph (from command line) */
3660 printopt |= OPT_P;
3661 }
3662 err = text_print_forecast(fr, dset, printopt, prn);
3663 }
3664
3665 if (!err && (opt & OPT_A)) {
3666 /* add forecast directly */
3667 err = add_fcast_to_dataset(fr, vname, dset, prn);
3668 }
3669
3670 if (!err) {
3671 set_forecast_matrices_from_fr(fr);
3672 }
3673
3674 free_fit_resid(fr);
3675
3676 return err;
3677 }
3678
get_sys_fcast_var(const int * ylist,const char * vname,DATASET * dset)3679 static int get_sys_fcast_var (const int *ylist, const char *vname,
3680 DATASET *dset)
3681 {
3682 int i, vi;
3683
3684 for (i=0; i<ylist[0]; i++) {
3685 vi = ylist[i+1];
3686 if (!strcmp(vname, dset->varname[vi])) {
3687 return i;
3688 }
3689 }
3690
3691 return -1;
3692 }
3693
3694 /* grab the relevant range from system forecast matrix and write it
3695 into a FITRESID struct for printing or graphing
3696 */
3697
fill_system_forecast(FITRESID * fr,int i,int yno,GRETL_VAR * var,equation_system * sys,const gretl_matrix * F,DATASET * dset)3698 static int fill_system_forecast (FITRESID *fr, int i, int yno,
3699 GRETL_VAR *var, equation_system *sys,
3700 const gretl_matrix *F,
3701 DATASET *dset)
3702 {
3703 int m = F->cols / 2;
3704 int s, t, nf;
3705 int err = 0;
3706
3707 strcpy(fr->depvar, dset->varname[yno]);
3708
3709 /* "pre-forecast" observations */
3710 for (t=fr->t0; t<fr->t1; t++) {
3711 fr->actual[t] = dset->Z[yno][t];
3712 if (sys != NULL) {
3713 if (i < sys->neqns && sys->lists[i][1] == yno) {
3714 /* Note: right now we can only handle endogenous
3715 variables that appear on the LHS of stochastic
3716 equations, because only such variables have an
3717 associated column in sys->yhat.
3718 */
3719 if (t >= sys->t1 && t <= sys->t2) {
3720 s = t - sys->t1;
3721 fr->fitted[t] = gretl_matrix_get(sys->yhat, s, i);
3722 }
3723 }
3724 } else if (var != NULL) {
3725 if (t >= var->t1 && t <= var->t2) {
3726 s = t - var->t1;
3727 fr->fitted[t] = fr->actual[t] - gretl_matrix_get(var->E, s, i);
3728 }
3729 }
3730 }
3731
3732 /* actual forecasts */
3733 nf = 0;
3734 for (t=fr->t1, s=0; t<=fr->t2; t++, s++) {
3735 fr->actual[t] = dset->Z[yno][t];
3736 fr->fitted[t] = gretl_matrix_get(F, s, i);
3737 if (!na(fr->fitted[t])) {
3738 nf++;
3739 }
3740 if (fr->sderr != NULL) {
3741 fr->sderr[t] = gretl_matrix_get(F, s, i + m);
3742 }
3743 }
3744
3745 if (nf == 0) {
3746 err = E_MISSDATA;
3747 } else {
3748 fit_resid_set_dec_places(fr);
3749 }
3750
3751 return err;
3752 }
3753
system_do_forecast(const char * str,void * ptr,int type,DATASET * dset,gretlopt opt,PRN * prn)3754 static int system_do_forecast (const char *str, void *ptr, int type,
3755 DATASET *dset, gretlopt opt,
3756 PRN *prn)
3757 {
3758 char vname[VNAMELEN];
3759 GRETL_VAR *var = NULL;
3760 equation_system *sys = NULL;
3761 const int *ylist = NULL;
3762 const gretl_matrix *F = NULL;
3763 int t1 = 0, t2 = 0;
3764 int t2est, ci;
3765 int imax, imin = 0;
3766 int os_case = 0;
3767 int df = 0;
3768 int err = 0;
3769
3770 if (type == GRETL_OBJ_VAR) {
3771 var = (GRETL_VAR *) ptr;
3772 imax = var->neqns - 1;
3773 t2est = var->t2;
3774 ci = var->ci;
3775 df = var->df;
3776 ylist = var->ylist;
3777 } else {
3778 sys = (equation_system *) ptr;
3779 imax = sys->neqns + sys->nidents - 1;
3780 t2est = sys->t2;
3781 ci = SYSTEM;
3782 df = sys->df;
3783 ylist = sys->ylist;
3784 }
3785
3786 err = parse_forecast_string(str, opt, NULL, t2est, dset,
3787 &t1, &t2, NULL, vname, &os_case);
3788
3789 if (!err) {
3790 if (var != NULL) {
3791 F = gretl_VAR_get_forecast_matrix(var, t1, t2, dset,
3792 opt, &err);
3793 } else {
3794 F = system_get_forecast_matrix(sys, t1, t2, dset,
3795 opt, &err);
3796 }
3797 }
3798
3799 if (!err && *vname != '\0') {
3800 imin = get_sys_fcast_var(ylist, vname, dset);
3801 if (imin < 0) {
3802 err = E_DATA;
3803 } else {
3804 imax = imin;
3805 }
3806 }
3807
3808 if (!err) {
3809 /* arrange to save forecast and errors */
3810 err = set_forecast_matrices_from_F(F, imin, imax);
3811 }
3812
3813 if (err) {
3814 return err;
3815 }
3816
3817 if (!(opt & OPT_Q)) {
3818 /* assemble and print per-equation forecasts */
3819 gretlopt printopt = (opt & OPT_N)? OPT_N : OPT_NONE;
3820 FITRESID *fr = NULL;
3821 int i, asy;
3822
3823 asy = (ci == VECM);
3824
3825 fr = fit_resid_new_for_system(asy, dset, t1, t2, 0, &err);
3826 if (err) {
3827 return err;
3828 }
3829
3830 if (asy) {
3831 /* asymptotic normal */
3832 fr->df = var->T;
3833 } else {
3834 fr->df = df;
3835 }
3836
3837 for (i=imin; i<=imax && !err; i++) {
3838 err = fill_system_forecast(fr, i, ylist[i+1], var, sys,
3839 F, dset);
3840 if (!err) {
3841 err = text_print_forecast(fr, dset, printopt, prn);
3842 }
3843 printopt |= OPT_Q;
3844 }
3845
3846 free_fit_resid(fr);
3847 }
3848
3849 return err;
3850 }
3851
3852 /**
3853 * do_forecast:
3854 * @str: command string, which may include a starting
3855 * observation and ending observation, and/or the name of a
3856 * variable for saving the forecast values.
3857 * @dset: dataset struct.
3858 * @opt: if %OPT_D, force a dynamic forecast; if %OPT_S, force
3859 * a static forecast. By default, the forecast is static within
3860 * the data range over which the model was estimated, and dynamic
3861 * out of sample (in cases where this distinction is meaningful).
3862 * %OPT_R: do recursive forecast.
3863 * %OPT_Q: suppress printing of the forecast;
3864 * %OPT_P: ensure that the values are printed.
3865 * %OPT_U: produce gnuplot plot.
3866 * @prn: gretl printing struct.
3867 *
3868 * In the case of "simple" models with an autoregressive error term
3869 * (%AR, %AR1) the forecast values incorporate the predictable
3870 * component of the error.
3871 *
3872 * Returns: 0 on success, non-zero error code on failure.
3873 */
3874
do_forecast(const char * str,DATASET * dset,gretlopt opt,PRN * prn)3875 int do_forecast (const char *str, DATASET *dset,
3876 gretlopt opt, PRN *prn)
3877 {
3878 void *ptr;
3879 GretlObjType type;
3880 int err;
3881
3882 ptr = get_last_model(&type);
3883 if (ptr == NULL) {
3884 return E_BADSTAT;
3885 }
3886
3887 if ((opt & OPT_U) && dataset_is_panel(dset)) {
3888 gretl_errmsg_set(_("Forecast plot not implemented for panel data"));
3889 err = E_NOTIMP;
3890 } else if ((opt & (OPT_R | OPT_I | OPT_U)) && type != GRETL_OBJ_EQN) {
3891 /* "recursive", "integrate", plot option: single equations only */
3892 err = E_BADOPT;
3893 } else if (type == GRETL_OBJ_EQN) {
3894 err = model_do_forecast(str, ptr, dset, opt, prn);
3895 } else if (type == GRETL_OBJ_SYS || type == GRETL_OBJ_VAR) {
3896 err = system_do_forecast(str, ptr, type, dset, opt, prn);
3897 } else {
3898 err = E_DATA;
3899 }
3900
3901 return err;
3902 }
3903
3904 /* try to determine in advance how far we can go with a forecast,
3905 either dynamic or static (ftype) */
3906
3907 static int
fcast_get_t2max(const int * list,const int * dvlags,const MODEL * pmod,const DATASET * dset,int ftype)3908 fcast_get_t2max (const int *list, const int *dvlags, const MODEL *pmod,
3909 const DATASET *dset, int ftype)
3910 {
3911 const double *ay = NULL;
3912 int i, vi, t;
3913
3914 if (pmod->ci == ARMA && ftype == FC_STATIC) {
3915 int yno = gretl_model_get_depvar(pmod);
3916
3917 ay = dset->Z[yno];
3918 }
3919
3920 for (t=pmod->t2; t<dset->n; t++) {
3921 int all_ok = 1;
3922
3923 if (ay != NULL && na(ay[t-1])) {
3924 /* FIXME? */
3925 break;
3926 }
3927
3928 for (i=1; i<=list[0]; i++) {
3929 vi = list[i];
3930 if (vi == 0) {
3931 continue;
3932 } else if (dvlags != NULL && dvlags[i-1] != 0) {
3933 continue;
3934 } else if (is_trend_variable(dset->Z[vi], dset->n)) {
3935 continue;
3936 } else if (is_periodic_dummy(dset->Z[vi], dset)) {
3937 continue;
3938 }
3939 if (na(dset->Z[vi][t])) {
3940 all_ok = 0;
3941 break;
3942 }
3943 }
3944
3945 if (!all_ok) {
3946 t--;
3947 break;
3948 } else if (t == dset->n - 1) {
3949 break;
3950 }
3951 }
3952
3953 return t;
3954 }
3955
3956 /**
3957 * get_system_forecast:
3958 * @p: pointer to the VAR or equation system from which
3959 * forecasts are wanted.
3960 * @ci: command index for system (%VAR, %VECM or %SYSTEM)
3961 * @i: 0-based index for the variable to forecast, within
3962 * the equation system.
3963 * @t1: start of forecast range.
3964 * @t2: end of forecast range.
3965 * @pre_n: number of pre-forecast observations to include.
3966 * @dset: dataset struct.
3967 * @opt: if %OPT_D, force a dynamic forecast; if %OPT_S, force
3968 * a static forecast. By default, the forecast is static within
3969 * the data range over which the model was estimated, and dynamic
3970 * out of sample.
3971 * @err: location to receive error code.
3972 *
3973 * Allocates a #FITRESID structure and fills it out with forecasts
3974 * based on the system at location @p, over the specified range of
3975 * observations.
3976 *
3977 * Returns: pointer to allocated structure, or %NULL on failure.
3978 */
3979
get_system_forecast(void * p,int ci,int i,int t1,int t2,int pre_n,DATASET * dset,gretlopt opt,int * err)3980 FITRESID *get_system_forecast (void *p, int ci, int i,
3981 int t1, int t2, int pre_n,
3982 DATASET *dset, gretlopt opt,
3983 int *err)
3984 {
3985 FITRESID *fr;
3986 GRETL_VAR *var = NULL;
3987 equation_system *sys = NULL;
3988 const gretl_matrix *F = NULL;
3989 int nf = t2 - t1 + 1;
3990 int asy, df = 0, yno = 0;
3991
3992 if (nf <= 0) {
3993 *err = E_DATA;
3994 return NULL;
3995 }
3996
3997 if (ci == VAR || ci == VECM) {
3998 var = (GRETL_VAR *) p;
3999 yno = var->ylist[i+1];
4000 df = var->df;
4001 F = gretl_VAR_get_forecast_matrix(var, t1, t2, dset, opt, err);
4002 } else if (ci == SYSTEM) {
4003 sys = (equation_system *) p;
4004 yno = sys->ylist[i+1];
4005 df = sys->df;
4006 F = system_get_forecast_matrix(sys, t1, t2, dset, opt, err);
4007 } else {
4008 *err = E_DATA;
4009 }
4010
4011 if (*err) {
4012 fprintf(stderr, "get_system_forecast: matrix F is NULL\n");
4013 return NULL;
4014 }
4015
4016 asy = (ci == VECM);
4017
4018 fr = fit_resid_new_for_system(asy, dset, t1, t2, pre_n, err);
4019 if (*err) {
4020 return NULL;
4021 }
4022
4023 if (asy) {
4024 /* asymptotic normal */
4025 fr->df = var->T;
4026 } else {
4027 fr->df = df;
4028 }
4029
4030 *err = fill_system_forecast(fr, i, yno, var, sys, F, dset);
4031
4032 if (*err) {
4033 free_fit_resid(fr);
4034 fr = NULL;
4035 }
4036
4037 return fr;
4038 }
4039
4040 /**
4041 * forecast_options_for_model:
4042 * @pmod: the model from which forecasts are wanted.
4043 * @dset: dataset struct.
4044 * @flags: location to receive flags from among #ForecastFlags.
4045 * @dt2max: location to receive the last observation that can
4046 * be supported for a dynamic forecast.
4047 * @st2max: location to receive the last observation that can
4048 * be supported for a static forecast.
4049 *
4050 * Examines @pmod and determines which forecasting options are
4051 * applicable.
4052 */
4053
forecast_options_for_model(MODEL * pmod,const DATASET * dset,int * flags,int * dt2max,int * st2max)4054 void forecast_options_for_model (MODEL *pmod, const DATASET *dset,
4055 int *flags, int *dt2max, int *st2max)
4056 {
4057 int *dvlags = NULL;
4058 int dv, exo = 1;
4059
4060 *flags = 0;
4061
4062 dv = gretl_model_get_depvar(pmod);
4063
4064 if (pmod->ci == OLS) {
4065 if (is_standard_diff(dv, dset, NULL)) {
4066 *flags |= FC_INTEGRATE_OK;
4067 } else {
4068 *flags |= FC_MEAN_OK;
4069 }
4070 } else if (pmod->ci == NLS) {
4071 /* we'll try winging it! */
4072 if (gretl_model_get_int(pmod, "dynamic") && pmod->t2 < dset->n - 1) {
4073 *flags |= FC_AUTO_OK;
4074 }
4075 return;
4076 }
4077
4078 *dt2max = pmod->t2;
4079 *st2max = pmod->t2;
4080
4081 if (pmod->ci == ARMA || pmod->ci == GARCH) {
4082 *flags |= FC_DYNAMIC_OK;
4083 } else if (AR_MODEL(pmod->ci)) {
4084 *flags |= FC_DYNAMIC_OK;
4085 } else if (dataset_is_time_series(dset) &&
4086 has_depvar_lags(pmod, dset)) {
4087 *flags |= FC_DYNAMIC_OK;
4088 }
4089
4090 if (*flags & FC_DYNAMIC_OK) {
4091 int err = process_lagged_depvar(pmod, dset, &dvlags);
4092
4093 if (!err) {
4094 exo = has_real_exog_regressors(pmod, dvlags, dset);
4095 }
4096 if (!exo) {
4097 *flags |= FC_ADDOBS_OK;
4098 *dt2max = dset->n - 1;
4099 }
4100 }
4101
4102 if (exo) {
4103 const int *xlist = model_xlist(pmod);
4104
4105 if (xlist != NULL) {
4106 *dt2max = fcast_get_t2max(xlist, dvlags, pmod, dset, FC_DYNAMIC);
4107 *st2max = fcast_get_t2max(xlist, dvlags, pmod, dset, FC_STATIC);
4108 }
4109 }
4110
4111 if (dvlags != NULL) {
4112 free(dvlags);
4113 }
4114 }
4115
y_lag(int v,int parent_id,const DATASET * dset)4116 static int y_lag (int v, int parent_id, const DATASET *dset)
4117 {
4118 if (series_get_transform(dset, v) == LAGS) {
4119 int pv = series_get_parent_id(dset, v);
4120
4121 if (pv == parent_id) {
4122 return series_get_lag(dset, v);
4123 }
4124 }
4125
4126 return 0;
4127 }
4128
k_step_init(MODEL * pmod,const DATASET * dset,double ** py,int ** pllist)4129 static int k_step_init (MODEL *pmod, const DATASET *dset,
4130 double **py, int **pllist)
4131 {
4132 double *y = NULL;
4133 int *llist = NULL;
4134 int vy = pmod->list[1];
4135 int i, nl = 0;
4136
4137 for (i=2; i<=pmod->list[0]; i++) {
4138 if (y_lag(pmod->list[i], vy, dset)) {
4139 nl++;
4140 }
4141 }
4142
4143 if (nl == 0) {
4144 return 0;
4145 }
4146
4147 y = malloc(dset->n * sizeof *y);
4148 llist = gretl_list_new(pmod->list[0] - 1);
4149
4150 if (y == NULL || llist == NULL) {
4151 free(y);
4152 free(llist);
4153 return E_ALLOC;
4154 }
4155
4156 for (i=0; i<dset->n; i++) {
4157 y[i] = dset->Z[vy][i];
4158 }
4159
4160 for (i=2; i<=pmod->list[0]; i++) {
4161 llist[i-1] = y_lag(pmod->list[i], vy, dset);
4162 }
4163
4164 *py = y;
4165 *pllist = llist;
4166
4167 return 0;
4168 }
4169
recursive_fcast_adjust_obs(MODEL * pmod,int * t1,int t2,int k)4170 static int recursive_fcast_adjust_obs (MODEL *pmod, int *t1, int t2, int k)
4171 {
4172 /* the earliest possible forecast start */
4173 int t1min = pmod->t1 + pmod->ncoeff + k - 1;
4174 int err = 0;
4175
4176 if (*t1 < t1min) {
4177 *t1 = t1min;
4178 }
4179
4180 /* minimal forecast range = 1, otherwise error */
4181 if (t2 < *t1) {
4182 err = E_OBS;
4183 }
4184
4185 return err;
4186 }
4187
4188 /* recursive k-step ahead forecasts, for models estimated via OLS */
4189
4190 FITRESID *
recursive_OLS_k_step_fcast(MODEL * pmod,DATASET * dset,int t1,int t2,int k,int pre_n,int * err)4191 recursive_OLS_k_step_fcast (MODEL *pmod, DATASET *dset,
4192 int t1, int t2, int k,
4193 int pre_n, int *err)
4194 {
4195 FITRESID *fr;
4196 int orig_t1 = dset->t1;
4197 int orig_t2 = dset->t2;
4198 double *y = NULL;
4199 int *llist = NULL;
4200 double xit, yf = NADBL;
4201 MODEL mod;
4202 int j, p, vi, nf;
4203 int i, s, t;
4204
4205 if (pmod->ci != OLS) {
4206 *err = E_OLSONLY;
4207 return NULL;
4208 }
4209
4210 if (k < 1) {
4211 gretl_errmsg_set("recursive forecast: steps-ahead must be >= 1");
4212 *err = E_DATA;
4213 return NULL;
4214 }
4215
4216 /* check feasibility of forecast range */
4217 *err = recursive_fcast_adjust_obs(pmod, &t1, t2, k);
4218 if (*err) {
4219 return NULL;
4220 }
4221
4222 if (k > 1) {
4223 *err = k_step_init(pmod, dset, &y, &llist);
4224 if (*err) {
4225 return NULL;
4226 }
4227 }
4228
4229 fr = fit_resid_new_for_model(pmod, dset, t1, t2, pre_n, err);
4230 if (*err) {
4231 free(y);
4232 free(llist);
4233 return NULL;
4234 }
4235
4236 fr->method = FC_KSTEP;
4237 fr->k = k;
4238
4239 /* sample range for initial estimation */
4240 dset->t1 = pmod->t1;
4241 dset->t2 = t1 - k; /* start of fcast range minus k */
4242
4243 /* number of forecasts */
4244 nf = t2 - t1 + 1;
4245
4246 fprintf(stderr, "recursive fcast: dset->t1=%d, dset->t2=%d, t1=%d, "
4247 "t2=%d, k=%d, nf=%d\n", dset->t1, dset->t2, t1, t2, k, nf);
4248
4249 for (t=0; t<dset->n; t++) {
4250 fr->actual[t] = dset->Z[pmod->list[1]][t];
4251 }
4252
4253 for (s=0; s<nf; s++) {
4254 mod = lsq(pmod->list, dset, OLS, OPT_A | OPT_Z);
4255 if (mod.errcode) {
4256 *err = mod.errcode;
4257 clear_model(&mod);
4258 break;
4259 }
4260
4261 /* the first obs following the estimation sample */
4262 t = dset->t2 + 1;
4263
4264 for (j=0; j<k; j++) {
4265 yf = 0.0;
4266 /* steps ahead */
4267 for (i=0; i<mod.ncoeff; i++) {
4268 vi = mod.list[i+2];
4269 p = (llist != NULL)? llist[i+1] : 0;
4270 if (p > 0 && p <= j) {
4271 xit = y[t-p];
4272 } else {
4273 xit = dset->Z[vi][t];
4274 }
4275 if (na(xit)) {
4276 yf = NADBL;
4277 break;
4278 } else {
4279 yf += mod.coeff[i] * xit;
4280 }
4281 }
4282 if (y != NULL && j < k - 1) {
4283 y[t++] = yf;
4284 }
4285 }
4286
4287 fr->fitted[t] = yf;
4288
4289 if (!na(fr->actual[t]) && !na(fr->fitted[t])) {
4290 fr->resid[t] = fr->actual[t] - fr->fitted[t];
4291 }
4292
4293 clear_model(&mod);
4294 dset->t2 += 1;
4295 }
4296
4297 dset->t1 = orig_t1;
4298 dset->t2 = orig_t2;
4299
4300 if (*err) {
4301 free_fit_resid(fr);
4302 fr = NULL;
4303 } else {
4304 fit_resid_set_dec_places(fr);
4305 strcpy(fr->depvar, dset->varname[pmod->list[1]]);
4306 }
4307
4308 free(y);
4309 free(llist);
4310
4311 return fr;
4312 }
4313
fcast_get_continuous_range(const FITRESID * fr,int * pt1,int * pt2)4314 void fcast_get_continuous_range (const FITRESID *fr, int *pt1, int *pt2)
4315 {
4316 int t, t1 = fr->t1, t2 = fr->t2;
4317
4318 for (t=t1; t<=t2; t++) {
4319 if (na(fr->actual[t]) || na(fr->fitted[t])) {
4320 t1++;
4321 } else {
4322 break;
4323 }
4324 }
4325
4326 for (t=t2; t>=t1; t--) {
4327 if (na(fr->actual[t]) || na(fr->fitted[t])) {
4328 t2--;
4329 } else {
4330 break;
4331 }
4332 }
4333
4334 *pt1 = t1;
4335 *pt2 = t2;
4336 }
4337