1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 /* estimate.c - basic gretl estimation procedures */
21 
22 #include "libgretl.h"
23 #include "qr_estimate.h"
24 #include "gretl_panel.h"
25 #include "libset.h"
26 #include "compat.h"
27 #include "missing_private.h"
28 #include "estim_private.h"
29 #include "system.h"
30 #include "tsls.h"
31 #include "nls.h"
32 
33 #ifdef WIN32
34 # include "gretl_win32.h"
35 #endif
36 
37 /**
38  * SECTION:estimate
39  * @short_description: estimation of regression models
40  * @title: Estimation
41  * @include: gretl/libgretl.h
42  *
43  * Most libgretl functions that estimate regression models are
44  * collected here.
45  *
46  * Please note that most of these functions return a #MODEL
47  * struct -- literally a struct, not a pointer to one.
48  * To handle such a return value in C you should either (a)
49  * declare a MODEL and assign to it, or (b) allocate a MODEL
50  * "on the heap" and then assign to its content using the
51  * indirection operator. The code fragment below illustrates
52  * both approaches.
53  *
54  * <informalexample><programlisting>
55  * #include &lt;gretl/libgretl.h&gt;
56  *
57  * int ols_using_gretl (const int *list, DATASET *dset,
58  *                      PRN *prn)
59  * {
60  *     MODEL model;
61  *     MODEL *pmod;
62  *     int err;
63  *
64  *     // method (a)
65  *     model = lsq(list, dset, OLS, OPT_NONE);
66  *     err = model.errcode;
67  *     if (!err) {
68  *         printmodel(&amp;model, dset, OPT_NONE, prn);
69  *     }
70  *     clear_model(&amp;model);
71  *
72  *     // method (b)
73  *     pmod = gretl_model_new();
74  *     *pmod = lsq(list, dset, OLS, OPT_NONE);
75  *     err = pmod->errcode;
76  *     if (!err) {
77  *         printmodel(pmod, dset, OPT_NONE, prn);
78  *     }
79  *     gretl_model_free(pmod);
80  *
81  *     return err;
82  * }
83  * </programlisting></informalexample>
84  *
85  */
86 
87 /* Comment on 'TINY': It's the minimum value for 'test' (see below)
88    that libgretl's Cholesky decomposition routine will accept before
89    rejecting a data matrix as too highly collinear.  If you set it too
90    high, data sets for which Cholesky could produce reasonable
91    estimates will be rejected.  If you set it too low (and 100 *
92    DBL_EPSILON is definitely too low), gretl will produce more or less
93    worthless coefficient estimates when given highly collinear data.
94    Before making a permanent change to the value of TINY, check how
95    gretl does on the NIST reference data sets for linear regression
96    and ensure you're not getting any garbage results.  The current
97    value enables us to get decent results on the NIST nonlinear
98    regression test suite; it might be a bit too low for some contexts.
99 */
100 
101 #define TINY      8.0e-09 /* was 2.1e-09 (last changed 2007/01/20) */
102 #define SMALL     2.0e-08 /* threshold for printing a warning for collinearity */
103 #define YBARZERO  0.5e-14 /* threshold for treating mean of dependent
104 			     variable as effectively zero */
105 #define ESSZERO   1.0e-22 /* threshold for considering a tiny error-sum-of-
106 			     squares value to be effectively zero */
107 
108 #define XPX_DEBUG 0
109 
110 static void regress (MODEL *pmod, double *xpy,
111 		     double ysum, double ypy,
112 		     const DATASET *dset,
113 		     double rho, gretlopt opt);
114 static void omitzero (MODEL *pmod, const DATASET *dset,
115 		      gretlopt opt);
116 static int model_lags (const int *list, const DATASET *dset, int *pmax);
117 static double estimate_rho (const int *list, DATASET *dset,
118 			    gretlopt opt, PRN *prn, int *err);
119 
120 /* compute statistics for the dependent variable in a model */
121 
model_depvar_stats(MODEL * pmod,DATASET * dset)122 static void model_depvar_stats (MODEL *pmod, DATASET *dset)
123 {
124     double xx, sum = 0.0;
125     int yno = pmod->list[1];
126     int t, zw = 0;
127 
128     if (pmod->ci == WLS && gretl_model_get_int(pmod, "wt_zeros")) {
129 	/* WLS with some zero weights */
130 	zw = 1;
131     }
132 
133     pmod->ybar = pmod->sdy = NADBL;
134 
135     if (pmod->nobs <= 0) {
136 	return;
137     }
138 
139     for (t=pmod->t1; t<=pmod->t2; t++) {
140 	if (zw && dset->Z[pmod->nwt][t] == 0.0) {
141 	    continue;
142 	}
143 	if (!model_missing(pmod, t)) {
144 	    sum += dset->Z[yno][t];
145 	}
146     }
147 
148     pmod->ybar = sum / pmod->nobs;
149 
150     sum = 0.0;
151     for (t=pmod->t1; t<=pmod->t2; t++) {
152 	if (zw && dset->Z[pmod->nwt][t] == 0.0) {
153 	    continue;
154 	}
155 	if (!model_missing(pmod, t)) {
156 	    sum += (dset->Z[yno][t] - pmod->ybar);
157 	}
158     }
159 
160     pmod->ybar = pmod->ybar + sum / pmod->nobs;
161 
162     if (fabs(pmod->ybar) < YBARZERO) {
163 	pmod->ybar = 0.0;
164     }
165 
166     sum = 0.0;
167     for (t=pmod->t1; t<=pmod->t2; t++) {
168 	if (zw && dset->Z[pmod->nwt][t] == 0.0) {
169 	    continue;
170 	}
171 	if (!model_missing(pmod, t)) {
172 	    xx = dset->Z[yno][t] - pmod->ybar;
173 	    sum += xx * xx;
174 	}
175     }
176 
177     sum = (pmod->nobs > 1)? sum / (pmod->nobs - 1) : 0.0;
178 
179     pmod->sdy = (sum >= 0)? sqrt(sum) : NADBL;
180 }
181 
182 /* determine the degrees of freedom for a model */
183 
get_model_df(MODEL * pmod)184 static int get_model_df (MODEL *pmod)
185 {
186     int err = 0;
187 
188     pmod->ncoeff = pmod->list[0] - 1;
189     pmod->dfd = pmod->nobs - pmod->ncoeff;
190 
191     if (pmod->dfd < 0) {
192 	pmod->errcode = E_DF;
193         gretl_errmsg_sprintf(_("No. of obs (%d) is less than no. "
194 			       "of parameters (%d)"), pmod->nobs,
195 			     pmod->ncoeff);
196 	err = 1;
197     } else {
198 	pmod->dfn = pmod->ncoeff - pmod->ifc;
199     }
200 
201     return err;
202 }
203 
204 #define LDDEBUG 0
205 
transcribe_ld_vcv(MODEL * targ,MODEL * src)206 static int transcribe_ld_vcv (MODEL *targ, MODEL *src)
207 {
208     int nv = targ->ncoeff;
209     int nxpx = (nv * nv + nv) / 2;
210     int i, j, err = 0;
211 
212     err = makevcv(src, src->sigma);
213 
214     if (!err && targ->vcv == NULL) {
215 	targ->vcv = malloc(nxpx * sizeof *targ->vcv);
216 	if (targ->vcv == NULL) {
217 	    err = E_ALLOC;
218 	}
219     }
220 
221     if (!err) {
222 	for (i=0; i<nv; i++) {
223 	    for (j=i; j<nv; j++) {
224 		targ->vcv[ijton(i, j, nv)] =
225 		    src->vcv[ijton(i, j, src->ncoeff)];
226 	    }
227 	}
228     }
229 
230     return err;
231 }
232 
233 /* Calculate consistent standard errors (and VCV matrix) when doing
234    AR(1) estimation of a model with lagged dependent variable.  See
235    Ramanathan, Introductory Econometrics, 5e, p. 450.
236 */
237 
238 static int
ldepvar_std_errors(MODEL * pmod,DATASET * dset)239 ldepvar_std_errors (MODEL *pmod, DATASET *dset)
240 {
241     MODEL emod;
242     const double *x;
243     int orig_t1 = dset->t1;
244     int orig_t2 = dset->t2;
245     double rho = gretl_model_get_double(pmod, "rho_gls");
246     int origv = dset->v;
247     int vnew = pmod->list[0] + 1 - pmod->ifc;
248     int *list;
249     int vi, vm;
250     int i, t;
251     int err = 0;
252 
253 #if LDDEBUG
254     PRN *prn = gretl_print_new(GRETL_PRINT_STDOUT, NULL);
255     printlist(pmod->list, "pmod->list");
256     printf("vnew = %d\n", vnew);
257     printf("rho = %g\n", rho);
258 #endif
259 
260     list = gretl_list_new(vnew + pmod->ifc);
261     if (list == NULL) {
262 	pmod->errcode = E_ALLOC;
263 	return 1;
264     }
265 
266     err = dataset_add_series(dset, vnew);
267     if (err) {
268 	free(list);
269 	pmod->errcode = E_ALLOC;
270 	return 1;
271     }
272 
273     vi = origv;
274 
275     /* dependent var is residual from original model */
276     for (t=0; t<dset->n; t++) {
277 	dset->Z[vi][t] = pmod->uhat[t];
278     }
279     strcpy(dset->varname[vi], "eps");
280     list[1] = vi++;
281 
282     /* indep vars are rho-differenced vars from original model */
283     for (i=2; i<=pmod->list[0]; i++) {
284 	vm = pmod->list[i];
285 	if (vm == 0) {
286 	    list[i] = 0;
287 	    continue;
288 	}
289 	sprintf(dset->varname[vi], "%.6s_r", dset->varname[vm]);
290 	x = dset->Z[vm];
291 	for (t=0; t<dset->n; t++) {
292 	    if (t == 0 || na(x[t]) || na(x[t-1])) {
293 		dset->Z[vi][t] = NADBL;
294 	    } else {
295 		dset->Z[vi][t] = x[t] - rho * x[t-1];
296 	    }
297 	}
298 	list[i] = vi++;
299     }
300 
301     /* last indep var is lagged u-hat */
302     for (t=0; t<dset->n; t++) {
303 	if (t == 0) {
304 	    dset->Z[vi][t] = NADBL;
305 	} else {
306 	    dset->Z[vi][t] = dset->Z[pmod->list[1]][t-1];
307 	}
308 	if (na(dset->Z[vi][t])) {
309 	    continue;
310 	}
311 	for (i=0; i<pmod->ncoeff; i++) {
312 	    x = dset->Z[pmod->list[i+2]];
313 	    if (na(x[t-1])) {
314 		dset->Z[vi][t] = NADBL;
315 		break;
316 	    } else {
317 		dset->Z[vi][t] -= pmod->coeff[i] * x[t-1];
318 	    }
319 	}
320     }
321 
322     list[list[0]] = vi;
323     strcpy(dset->varname[vi], "uhat_1");
324 
325     dset->t1 = pmod->t1;
326     dset->t2 = pmod->t2;
327 
328     emod = lsq(list, dset, OLS, OPT_A);
329     if (emod.errcode) {
330 	err = emod.errcode;
331     } else {
332 #if LDDEBUG
333 	printmodel(&emod, dset, OPT_NONE, prn);
334 	gretl_print_destroy(prn);
335 #endif
336 	for (i=0; i<pmod->ncoeff; i++) {
337 	    pmod->sderr[i] = emod.sderr[i];
338 	}
339 
340 	err = transcribe_ld_vcv(pmod, &emod);
341     }
342 
343     clear_model(&emod);
344 
345     free(list);
346     dataset_drop_last_variables(dset, vnew);
347 
348     dset->t1 = orig_t1;
349     dset->t2 = orig_t2;
350 
351     if (err && !pmod->errcode) {
352 	pmod->errcode = err;
353     }
354 
355     return err;
356 }
357 
358 /* special computation of statistics for autoregressive models */
359 
compute_ar_stats(MODEL * pmod,const DATASET * dset,double rho)360 static int compute_ar_stats (MODEL *pmod, const DATASET *dset,
361 			     double rho)
362 {
363     int i, v, t, yno = pmod->list[1];
364     int pwe = (pmod->opt & OPT_P);
365     double x, pw1 = 0.0;
366 
367     if (pwe) {
368 	pw1 = sqrt(1.0 - rho * rho);
369     }
370 
371     for (t=pmod->t1; t<=pmod->t2; t++) {
372 	if (t == pmod->t1 && pwe) {
373 	    x = pw1 * dset->Z[yno][t];
374 	    for (i=pmod->ifc; i<pmod->ncoeff; i++) {
375 		v = pmod->list[i+2];
376 		x -= pmod->coeff[i] * pw1 * dset->Z[v][t];
377 	    }
378 	    if (pmod->ifc) {
379 		x -= pw1 * pmod->coeff[0];
380 	    }
381 	} else {
382 	    x = dset->Z[yno][t] - rho*dset->Z[yno][t-1];
383 	    for (i=0; i<pmod->ncoeff; i++) {
384 		v = pmod->list[i+2];
385 		x -= pmod->coeff[i] * (dset->Z[v][t] - rho*dset->Z[v][t-1]);
386 	    }
387 	}
388 	pmod->uhat[t] = x;
389 	pmod->yhat[t] = dset->Z[yno][t] - x;
390     }
391 
392     pmod->rsq = gretl_corr_rsq(pmod->t1, pmod->t2, dset->Z[yno], pmod->yhat);
393     pmod->adjrsq =
394 	1.0 - ((1.0 - pmod->rsq) * (pmod->t2 - pmod->t1) /
395 	       (double) pmod->dfd);
396 
397     return 0;
398 }
399 
400 /* calculation of WLS stats (in agreement with GNU R) */
401 
get_wls_stats(MODEL * pmod,const DATASET * dset,gretlopt opt)402 static void get_wls_stats (MODEL *pmod, const DATASET *dset,
403 			   gretlopt opt)
404 {
405     double x, dy, wmean = 0.0, wsum = 0.0;
406     int yno = pmod->list[1];
407     int t;
408 
409     for (t=pmod->t1; t<=pmod->t2; t++) {
410 	if (model_missing(pmod, t)) {
411 	    continue;
412 	}
413 	if (dset->Z[pmod->nwt][t] > 0) {
414 	    wmean += dset->Z[pmod->nwt][t] * dset->Z[yno][t];
415 	    wsum += dset->Z[pmod->nwt][t];
416 	}
417     }
418 
419     wmean /= wsum;
420     x = 0.0;
421 
422     for (t=pmod->t1; t<=pmod->t2; t++) {
423 	if (model_missing(pmod, t) || dset->Z[pmod->nwt][t] == 0.0) {
424 	    continue;
425 	}
426 	dy = dset->Z[yno][t] - wmean;
427 	x += dset->Z[pmod->nwt][t] * dy * dy;
428     }
429 
430     if (!(opt & OPT_R)) {
431 	pmod->fstt = ((x - pmod->ess) * pmod->dfd) / (pmod->dfn * pmod->ess);
432     }
433     pmod->rsq = (1 - (pmod->ess / x));
434     pmod->adjrsq = 1 - ((1 - pmod->rsq) * (pmod->nobs - 1)/pmod->dfd);
435 }
436 
fix_wls_values(MODEL * pmod,const DATASET * dset)437 static void fix_wls_values (MODEL *pmod, const DATASET *dset)
438 {
439     int dwt = gretl_model_get_int(pmod, "wt_dummy");
440     double yh, ess_orig = 0.0;
441     double sw, sigma_orig;
442     int t, i, v;
443 
444     for (t=pmod->t1; t<=pmod->t2; t++) {
445 	if (model_missing(pmod, t)) {
446 	    continue;
447 	}
448 	if (dset->Z[pmod->nwt][t] == 0.0) {
449 	    yh = 0.0;
450 	    for (i=0; i<pmod->ncoeff; i++) {
451 		v = pmod->list[i+2];
452 		yh += pmod->coeff[i] * dset->Z[v][t];
453 	    }
454 	    pmod->yhat[t] = yh;
455 	    v = pmod->list[1];
456 	    pmod->uhat[t] = dset->Z[v][t] - yh;
457 	} else if (!dwt) {
458 	    sw = sqrt(dset->Z[pmod->nwt][t]);
459 	    pmod->yhat[t] /= sw;
460 	    pmod->uhat[t] /= sw;
461 	    ess_orig += pmod->uhat[t] * pmod->uhat[t];
462 	}
463     }
464 
465     if (!dwt) {
466 	sigma_orig = sqrt(ess_orig / pmod->dfd);
467 	gretl_model_set_double(pmod, "ess_orig", ess_orig);
468 	gretl_model_set_double(pmod, "sigma_orig", sigma_orig);
469     }
470 }
471 
472 /* drop the weight var from the list of regressors (WLS) */
473 
dropwt(int * list)474 static void dropwt (int *list)
475 {
476     int i;
477 
478     list[0] -= 1;
479     for (i=1; i<=list[0]; i++) {
480 	list[i] = list[i+1];
481     }
482 }
483 
model_missval_count(const MODEL * pmod)484 static int model_missval_count (const MODEL *pmod)
485 {
486     int mc = 0;
487 
488     if (pmod->missmask != NULL) {
489 	int t;
490 
491 	for (t=pmod->t1; t<=pmod->t2; t++) {
492 	    if (pmod->missmask[t] == '1') {
493 		mc++;
494 	    }
495 	}
496     }
497 
498     return mc;
499 }
500 
wls_usable_obs(MODEL * pmod,const DATASET * dset)501 static int wls_usable_obs (MODEL *pmod, const DATASET *dset)
502 {
503     int t, n = pmod->nobs;
504 
505     for (t=pmod->t1; t<=pmod->t2; t++) {
506 	if (pmod->missmask[t] == '1') {
507 	    n--;
508 	} else if (dset->Z[pmod->nwt][t] == 0) {
509 	    n--;
510 	}
511     }
512 
513     return n;
514 }
515 
516 #define SMPL_DEBUG 0
517 
518 static int
lsq_check_for_missing_obs(MODEL * pmod,gretlopt opts,DATASET * dset,int * misst)519 lsq_check_for_missing_obs (MODEL *pmod, gretlopt opts, DATASET *dset,
520 			   int *misst)
521 {
522     int ref_mask = reference_missmask_present();
523     int missv = 0;
524     int reject_missing = 0;
525 
526 #if SMPL_DEBUG
527     fprintf(stderr, "lsq_check_for_missing_obs: ref_mask = %d\n",
528 	    ref_mask);
529 #endif
530 
531     if (ref_mask) {
532 	int err = apply_reference_missmask(pmod);
533 
534 	/* If there was a reference mask present, it was put there
535 	   as part of a hypothesis test on some original model, and
536 	   it should be respected in estimation of this model */
537 
538 	if (err) {
539 	    pmod->errcode = E_ALLOC;
540 	    return 1;
541 	} else {
542 	    return 0;
543 	}
544     }
545 
546     /* can't do HAC VCV with missing obs in middle */
547     if ((opts & OPT_R) && dataset_is_time_series(dset) &&
548 	!libset_get_bool(FORCE_HC)) {
549 	reject_missing = 1;
550     }
551 
552     if (opts & OPT_M) {
553 	reject_missing = 1;
554     }
555 
556     if (reject_missing) {
557 	/* reject missing obs within adjusted sample */
558 	missv = model_adjust_sample(pmod, dset->n,
559 				    (const double **) dset->Z,
560 				    misst);
561     } else {
562 	/* we'll try to work around missing obs */
563 	missv = model_adjust_sample(pmod, dset->n,
564 				    (const double **) dset->Z,
565 				    NULL);
566     }
567 
568 #if SMPL_DEBUG
569     if (1) {
570 	char t1s[OBSLEN], t2s[OBSLEN];
571 	int misscount = model_missval_count(pmod);
572 
573 	ntolabel(t1s, pmod->t1, dset);
574 	ntolabel(t2s, pmod->t2, dset);
575 	fprintf(stderr, "*** after adjustment, t1=%d (%s), t2=%d (%s)\n",
576 		pmod->t1, t1s, pmod->t2, t2s);
577 	fprintf(stderr, "Valid observations in range = %d\n",
578 		pmod->t2 - pmod->t1 + 1 - misscount);
579 	fprintf(stderr, "Returning missv = %d\n", missv);
580     }
581 #endif
582 
583     return missv;
584 }
585 
check_for_lags(MODEL * pmod,const DATASET * dset)586 static int check_for_lags (MODEL *pmod, const DATASET *dset)
587 {
588     int pmax = 0;
589     int ldv = model_lags(pmod->list, dset, &pmax);
590 
591     if (pmax > 0) {
592 	gretl_model_set_int(pmod, "maxlag", pmax);
593     } else if (gretl_model_get_int(pmod, "maxlag")) {
594 	gretl_model_destroy_data_item(pmod, "maxlag");
595     }
596 
597     if (ldv) {
598 	gretl_model_set_int(pmod, "ldepvar", ldv);
599     } else if (gretl_model_get_int(pmod, "ldepvar")) {
600 	gretl_model_destroy_data_item(pmod, "ldepvar");
601     }
602 
603     return ldv;
604 }
605 
log_depvar_ll(MODEL * pmod,const DATASET * dset)606 static void log_depvar_ll (MODEL *pmod, const DATASET *dset)
607 {
608     char parent[VNAMELEN];
609 
610     if (series_is_log(dset, pmod->list[1], parent)) {
611 	double jll = pmod->lnL;
612 	int t;
613 
614 	for (t=0; t<dset->n; t++) {
615 	    if (!na(pmod->uhat[t])) {
616 		jll -= dset->Z[pmod->list[1]][t];
617 	    }
618 	}
619 	gretl_model_set_double(pmod, "jll", jll);
620 	gretl_model_set_string_as_data(pmod,
621 				       "log-parent",
622 				       gretl_strdup(parent));
623     }
624 }
625 
check_weight_var(MODEL * pmod,const double * w,int * effobs,gretlopt opt)626 static int check_weight_var (MODEL *pmod, const double *w, int *effobs,
627 			     gretlopt opt)
628 {
629     const char *all_zeros =
630 	N_("Weight variable is all zeros, aborting regression");
631     const char *some_neg =
632 	N_("Weight variable contains negative values");
633     const char *bad_zeros =
634 	N_("Weight variable is not a dummy but contains zeros");
635     int ones = 0, zeros = 0, nobs = 0;
636     int t, is_dummy = 1;
637 
638     for (t=pmod->t1; t<=pmod->t2; t++) {
639 	if (w[t] < 0.0) {
640 	    gretl_errmsg_set(_(some_neg));
641 	    pmod->errcode = E_DATA;
642 	    return 1;
643 	} else if (w[t] > 0.0) {
644 	    nobs++;
645 	    if (w[t] == 1.0) {
646 		ones++;
647 	    } else {
648 		is_dummy = 0;
649 	    }
650 	} else if (w[t] == 0.0) {
651 	    zeros++;
652 	}
653     }
654 
655     if (nobs == 0) {
656 	gretl_errmsg_set(_(all_zeros));
657 	pmod->errcode = E_DATA;
658 	return 1;
659     }
660 
661     *effobs = nobs;
662 
663     if (is_dummy) {
664 	/* the weight var is a dummy */
665 	gretl_model_set_int(pmod, "wt_dummy", 1);
666 	gretl_model_set_int(pmod, "wt_zeros", zeros);
667     } else if (zeros > 0) {
668 	if (opt & OPT_Z) {
669 	    gretl_model_set_int(pmod, "wt_zeros", zeros);
670 	} else {
671 	    gretl_errmsg_set(_(bad_zeros));
672 	    pmod->errcode = E_DATA;
673 	    return 1;
674 	}
675     }
676 
677     return 0;
678 }
679 
maybe_shift_ldepvar(MODEL * pmod,DATASET * dset)680 void maybe_shift_ldepvar (MODEL *pmod, DATASET *dset)
681 {
682     if (gretl_model_get_int(pmod, "ldepvar")) {
683 	check_for_lags(pmod, dset);
684     }
685 }
686 
687 /*
688  * XTX_XTy:
689  * @list: list of variables in model.
690  * @t1: starting observation.
691  * @t2: ending observation.
692  * @dset: pointer to dataset.
693  * @nwt: ID number of variable used as weight, or 0.
694  * @rho: quasi-differencing coefficent, or 0.0;
695  * @pwe: if non-zero, use Prais-Winsten for first observation.
696  * @xpx: on output, X'X matrix as lower triangle, stacked by columns.
697  * @xpy: on output, X'y vector (but see below).
698  * @ysum: location to recieve \sum y_i, or NULL.
699  * @ypy: location to recieve (scalar) y'y, or NULL.
700  * @mask: missing observations mask, or NULL.
701  *
702  * Calculates X'X and X'y, with various possible transformations
703  * of the original data, depending on @nwt, @rho and @pwe.
704  * If X'y is not required, @xpy can be given as NULL.
705  *
706  * Note: the y-related pointer arguments @xpy, @ysum, and @ypy
707  * form a "package": either all should be given, or all should
708  * be NULL.
709  *
710  * Returns: 0 on success, non-zero on error.
711  */
712 
XTX_XTy(const int * list,int t1,int t2,const DATASET * dset,int nwt,double rho,int pwe,double * xpx,double * xpy,double * ysum,double * ypy,const char * mask)713 static int XTX_XTy (const int *list, int t1, int t2,
714 		    const DATASET *dset, int nwt,
715 		    double rho, int pwe,
716 		    double *xpx, double *xpy,
717 		    double *ysum, double *ypy,
718 		    const char *mask)
719 {
720     int yno = list[1];
721     int lmin = (xpy != NULL)? 2 : 1;
722     int lmax = list[0];
723     int qdiff = (rho != 0.0);
724     const double *y = NULL;
725     const double *w = NULL;
726     const double *xi = NULL;
727     const double *xj = NULL;
728     double x, pw1;
729     int i, j, t, m;
730     int err = 0;
731 
732     /* Prais-Winsten term */
733     if (qdiff && pwe) {
734 	pw1 = sqrt(1.0 - rho * rho);
735     } else {
736 	pwe = 0;
737 	pw1 = 0.0;
738     }
739 
740     /* dependent variable */
741     y = dset->Z[yno];
742 
743     if (nwt) {
744 	/* weight variable */
745 	w = dset->Z[nwt];
746     }
747 
748     if (xpy != NULL) {
749 	*ysum = *ypy = 0.0;
750 
751 	for (t=t1; t<=t2; t++) {
752 	    if (masked(mask, t)) {
753 		continue;
754 	    }
755 	    x = y[t];
756 	    if (qdiff) {
757 		if (pwe && t == t1) {
758 		    x = pw1 * y[t];
759 		} else {
760 		    x -= rho * y[t-1];
761 		}
762 	    } else if (nwt) {
763 		x *= sqrt(w[t]);
764 	    }
765 	    *ysum += x;
766 	    *ypy += x * x;
767 	}
768 
769 	if (*ypy <= 0.0) {
770 	    /* error condition */
771 	    return yno;
772 	}
773     }
774 
775     m = 0;
776 
777     if (qdiff) {
778 	/* quasi-difference the data */
779 	for (i=lmin; i<=lmax; i++) {
780 	    xi = dset->Z[list[i]];
781 	    for (j=i; j<=lmax; j++) {
782 		xj = dset->Z[list[j]];
783 		x = 0.0;
784 		for (t=t1; t<=t2; t++) {
785 		    if (pwe && t == t1) {
786 			x += pw1 * xi[t1] * pw1 * xj[t];
787 		    } else {
788 			x += (xi[t] - rho * xi[t-1]) *
789 			    (xj[t] - rho * xj[t-1]);
790 		    }
791 		}
792 		if (i == j && x < DBL_EPSILON)  {
793 		    return E_SINGULAR;
794 		}
795 		xpx[m++] = x;
796 	    }
797 	    if (xpy != NULL) {
798 		x = 0.0;
799 		for (t=t1; t<=t2; t++) {
800 		    if (pwe && t == t1) {
801 			x += pw1 * y[t] * pw1 * xi[t];
802 		    } else {
803 			x += (y[t] - rho * y[t-1]) *
804 			    (xi[t] - rho * xi[t-1]);
805 		    }
806 		}
807 		xpy[i-2] = x;
808 	    }
809 	}
810     } else if (nwt) {
811 	/* weight the data */
812 	for (i=lmin; i<=lmax; i++) {
813 	    xi = dset->Z[list[i]];
814 	    for (j=i; j<=lmax; j++) {
815 		xj = dset->Z[list[j]];
816 		x = 0.0;
817 		for (t=t1; t<=t2; t++) {
818 		    if (!masked(mask, t)) {
819 			x += w[t] * xi[t] * xj[t];
820 		    }
821 		}
822 		if (i == j && x < DBL_EPSILON) {
823 		    return E_SINGULAR;
824 		}
825 		xpx[m++] = x;
826 	    }
827 	    if (xpy != NULL) {
828 		x = 0.0;
829 		for (t=t1; t<=t2; t++) {
830 		    if (!masked(mask, t)) {
831 			x += w[t] * y[t] * xi[t];
832 		    }
833 		}
834 		xpy[i-2] = x;
835 	    }
836 	}
837     } else if (mask != NULL) {
838 	/* plain data, but with missing obs mask */
839 	for (i=lmin; i<=lmax; i++) {
840 	    xi = dset->Z[list[i]];
841 	    for (j=i; j<=lmax; j++) {
842 		xj = dset->Z[list[j]];
843 		x = 0.0;
844 		for (t=t1; t<=t2; t++) {
845 		    if (!masked(mask, t)) {
846 			x += xi[t] * xj[t];
847 		    }
848 		}
849 		if (i == j && x < DBL_EPSILON) {
850 		    return E_SINGULAR;
851 		}
852 		xpx[m++] = x;
853 	    }
854 	    if (xpy != NULL) {
855 		x = 0.0;
856 		for (t=t1; t<=t2; t++) {
857 		    if (!masked(mask, t)) {
858 			x += y[t] * xi[t];
859 		    }
860 		}
861 		xpy[i-2] = x;
862 	    }
863 	}
864     } else {
865 	/* plain data, no missing obs mask */
866 	for (i=lmin; i<=lmax; i++) {
867 	    xi = dset->Z[list[i]];
868 	    for (j=i; j<=lmax; j++) {
869 		xj = dset->Z[list[j]];
870 		x = 0.0;
871 		for (t=t1; t<=t2; t++) {
872 		    x += xi[t] * xj[t];
873 		}
874 		if (i == j && x < DBL_EPSILON) {
875 		    return E_SINGULAR;
876 		}
877 		xpx[m++] = x;
878 	    }
879 	    if (xpy != NULL) {
880 		x = 0.0;
881 		for (t=t1; t<=t2; t++) {
882 		    x += y[t] * xi[t];
883 		}
884 		xpy[i-2] = x;
885 	    }
886 	}
887     }
888 
889     return err;
890 }
891 
native_cholesky_regress(MODEL * pmod,const DATASET * dset,gretlopt opt)892 static int native_cholesky_regress (MODEL *pmod, const DATASET *dset,
893 				    gretlopt opt)
894 {
895     double rho, ysum = 0.0, ypy = 0.0;
896     double *xpy;
897     int k = pmod->ncoeff;
898     int nxpx = k * (k + 1) / 2;
899     int pwe = (pmod->opt & OPT_P);
900     int i;
901 
902     if (nxpx == 0) {
903 	fprintf(stderr, "problem: nxpx = 0\n");
904 	pmod->errcode = E_DATA;
905 	return pmod->errcode;
906     }
907 
908     xpy = malloc(k * sizeof *xpy);
909     if (xpy == NULL) {
910 	pmod->errcode = E_ALLOC;
911 	return pmod->errcode;
912     }
913 
914     pmod->xpx = malloc(nxpx * sizeof *pmod->xpx);
915     pmod->coeff = malloc(k * sizeof *pmod->coeff);
916     pmod->sderr = malloc(k * sizeof *pmod->sderr);
917 
918     if (pmod->yhat == NULL) {
919 	pmod->yhat = malloc(pmod->full_n * sizeof *pmod->yhat);
920     }
921 
922     if (pmod->uhat == NULL) {
923 	pmod->uhat = malloc(pmod->full_n * sizeof *pmod->uhat);
924     }
925 
926     if (pmod->xpx == NULL || pmod->coeff == NULL ||
927 	pmod->sderr == NULL || pmod->yhat == NULL ||
928 	pmod->uhat == NULL) {
929 	free(xpy);
930 	pmod->errcode = E_ALLOC;
931 	return pmod->errcode;
932     }
933 
934     for (i=0; i<k; i++) {
935 	xpy[i] = 0.0;
936     }
937     for (i=0; i<nxpx; i++) {
938 	pmod->xpx[i] = 0.0;
939     }
940 
941     rho = gretl_model_get_double_default(pmod, "rho_gls", 0.0);
942 
943     /* calculate regression results, Cholesky style */
944     pmod->errcode = XTX_XTy(pmod->list, pmod->t1, pmod->t2, dset,
945 			    pmod->nwt, rho, pwe, pmod->xpx, xpy,
946 			    &ysum, &ypy, pmod->missmask);
947 
948 #if XPX_DEBUG
949     for (i=0; i<k; i++) {
950 	fprintf(stderr, "xpy[%d] = %g\n", i, xpy[i]);
951     }
952     for (i=0; i<nxpx; i++) {
953 	fprintf(stderr, "xpx[%d] = %g\n", i, pmod->xpx[i]);
954     }
955     fputc('\n', stderr);
956 #endif
957 
958     if (!pmod->errcode) {
959 	regress(pmod, xpy, ysum, ypy, dset, rho, opt);
960     }
961 
962     free(xpy);
963 
964     return pmod->errcode;
965 }
966 
gretl_null_regress(MODEL * pmod,const DATASET * dset)967 static int gretl_null_regress (MODEL *pmod, const DATASET *dset)
968 {
969     double yt;
970     int t;
971 
972     if (pmod->yhat == NULL) {
973 	pmod->yhat = malloc(pmod->full_n * sizeof *pmod->yhat);
974     }
975 
976     if (pmod->uhat == NULL) {
977 	pmod->uhat = malloc(pmod->full_n * sizeof *pmod->uhat);
978     }
979 
980     if (pmod->yhat == NULL || pmod->uhat == NULL) {
981 	pmod->errcode = E_ALLOC;
982 	return pmod->errcode;
983     }
984 
985     pmod->nobs = 0;
986     pmod->ifc = 0;
987     pmod->ess = 0.0;
988     pmod->rsq = pmod->adjrsq = 0.0;
989 
990     for (t=0; t<pmod->full_n; t++) {
991 	yt = dset->Z[pmod->list[1]][t];
992 	if (t < pmod->t1 || t > pmod->t2 || na(yt)) {
993 	    pmod->uhat[t] = pmod->yhat[t] = NADBL;
994 	} else {
995 	    pmod->uhat[t] = yt;
996 	    pmod->yhat[t] = 0.0;
997 	    pmod->ess += yt * yt;
998 	    pmod->nobs += 1;
999 	}
1000     }
1001 
1002     if (pmod->ess == 0) {
1003 	pmod->sigma = 0.0;
1004     } else if (pmod->nobs > 1) {
1005 	pmod->sigma = sqrt(pmod->ess / (pmod->nobs - 1));
1006     } else {
1007 	pmod->errcode = E_DATA;
1008     }
1009 
1010     if (pmod->errcode == 0) {
1011 	ls_criteria(pmod);
1012     }
1013 
1014     return pmod->errcode;
1015 }
1016 
1017 /* check whether the variable with ID @yno is all zeros;
1018    return 1 if so, otherwise return 0 */
1019 
depvar_zero(const MODEL * pmod,int yno,const double ** Z)1020 static int depvar_zero (const MODEL *pmod, int yno, const double **Z)
1021 {
1022     double yt;
1023     int t, ret = 1;
1024 
1025     for (t=pmod->t1; t<=pmod->t2; t++) {
1026 	if (model_missing(pmod, t)) {
1027 	    continue;
1028 	}
1029 	yt = Z[yno][t];
1030 	if (pmod->nwt) {
1031 	    yt *= Z[pmod->nwt][t];
1032 	}
1033 	if (yt != 0.0) {
1034 	    ret = 0;
1035 	    break;
1036 	}
1037     }
1038 
1039     return ret;
1040 }
1041 
cholesky_regress(MODEL * pmod,const DATASET * dset,gretlopt opt)1042 static int cholesky_regress (MODEL *pmod, const DATASET *dset,
1043 			     gretlopt opt)
1044 {
1045     int T = pmod->t2 - pmod->t1 + 1;
1046     int k = pmod->list[0] - 1;
1047 
1048     if (k >= 50 || (T >= 250 && k >= 30)) {
1049 	return lapack_cholesky_regress(pmod, dset, opt);
1050     } else {
1051 	return native_cholesky_regress(pmod, dset, opt);
1052     }
1053 }
1054 
1055 /* limited freeing of elements before passing a model
1056    on for QR estimation in the case of (near-)singularity
1057 */
1058 
model_free_storage(MODEL * pmod)1059 static void model_free_storage (MODEL *pmod)
1060 {
1061     free(pmod->xpx);
1062     free(pmod->coeff);
1063     free(pmod->sderr);
1064 
1065     pmod->xpx = NULL;
1066     pmod->coeff = NULL;
1067     pmod->sderr = NULL;
1068 }
1069 
1070 /*
1071  * ar1_lsq:
1072  * @list: model specification.
1073  * @dset: dataset struct.
1074  * @ci: command index, e.g. OLS, AR1.
1075  * @opt: option flags (see lsq and ar1_model).
1076  * @rho: coefficient for quasi-differencing the data, or 0.
1077  *
1078  * Nests lsq() and ar1_model(); is also used internally with ci == OLS
1079  * but rho != 0.0 when estimating rho via, e.g., Cochrane-Orcutt
1080  * iteration.
1081  *
1082  * Returns: model struct containing the estimates.
1083  */
1084 
ar1_lsq(const int * list,DATASET * dset,GretlCmdIndex ci,gretlopt opt,double rho)1085 static MODEL ar1_lsq (const int *list, DATASET *dset,
1086 		      GretlCmdIndex ci, gretlopt opt,
1087 		      double rho)
1088 {
1089     MODEL mdl;
1090     int effobs = 0;
1091     int missv = 0, misst = 0;
1092     int pwe = (opt & OPT_P);
1093     int nullmod = 0, ldv = 0;
1094     int save_hc = -1;
1095     int yno, i;
1096 
1097     gretl_model_init(&mdl, dset);
1098 
1099     if (list == NULL || dset == NULL || dset->Z == NULL) {
1100 	fprintf(stderr, "E_DATA: lsq: list = %p, dset = %p\n",
1101 		(void *) list, (void *) dset);
1102 	mdl.errcode = E_DATA;
1103         return mdl;
1104     }
1105 
1106     if (opt & OPT_C) {
1107 	/* cluster option implies robust */
1108 	opt |= OPT_R;
1109     }
1110 
1111     if (ci == AR1) {
1112 	if (opt & OPT_P) {
1113 	    /* OPT_P: Prais-Winsten */
1114 	    mdl.opt |= OPT_P;
1115 	} else if (opt & OPT_H) {
1116 	    /* OPT_H: Hildreth-Lu */
1117 	    mdl.opt |= OPT_H;
1118 	}
1119     } else if (rho != 0.0 && (opt & OPT_P)) {
1120 	/* estimation of rho in progress */
1121 	mdl.opt |= OPT_P;
1122     }
1123 
1124     if (list[0] == 1 && ci == OLS && (opt & OPT_U)) {
1125 	/* null model OK */
1126 	nullmod = 1;
1127     } else if (list[0] == 1 || dset->v == 1) {
1128 	fprintf(stderr, "E_DATA: lsq: list[0] = %d, dset->v = %d\n",
1129 		list[0], dset->v);
1130 	mdl.errcode = E_DATA;
1131         return mdl;
1132     }
1133 
1134     /* preserve a copy of the list supplied, for future reference */
1135     mdl.list = gretl_list_copy(list);
1136     if (mdl.list == NULL) {
1137         mdl.errcode = E_ALLOC;
1138         return mdl;
1139     }
1140 
1141     mdl.t1 = dset->t1;
1142     mdl.t2 = dset->t2;
1143     mdl.full_n = dset->n;
1144     mdl.ci = ci;
1145 
1146     /* Doing weighted least squares? */
1147     if (ci == WLS) {
1148 	check_weight_var(&mdl, dset->Z[mdl.list[1]], &effobs, opt);
1149 	if (mdl.errcode) {
1150 	    return mdl;
1151 	}
1152 	mdl.nwt = mdl.list[1];
1153     } else {
1154 	mdl.nwt = 0;
1155     }
1156 
1157     /* sanity check */
1158     if (mdl.t1 < 0 || mdl.t2 > dset->n - 1) {
1159         mdl.errcode = E_DATA;
1160         goto lsq_abort;
1161     }
1162 
1163     /* adjust sample range and check for missing obs: this
1164        may set the model errcode */
1165     missv = lsq_check_for_missing_obs(&mdl, opt, dset, &misst);
1166     if (mdl.errcode) {
1167         goto lsq_abort;
1168     }
1169 
1170     /* react to presence of unhandled missing obs */
1171     if (missv) {
1172 	gretl_errmsg_sprintf(_("Missing value encountered for "
1173 			       "variable %d, obs %d"),
1174 			     missv, misst);
1175 	mdl.errcode = E_DATA;
1176 	return mdl;
1177     }
1178 
1179     if (ci == WLS) {
1180 	dropwt(mdl.list);
1181     }
1182 
1183     yno = mdl.list[1];
1184 
1185     /* check for unknown vars in list */
1186     for (i=1; i<=mdl.list[0]; i++) {
1187         if (mdl.list[i] > dset->v - 1) {
1188             mdl.errcode = E_UNKVAR;
1189             goto lsq_abort;
1190         }
1191     }
1192 
1193     /* check for zero dependent var */
1194     if (depvar_zero(&mdl, yno, (const double **) dset->Z)) {
1195         mdl.errcode = E_ZERO;
1196         goto lsq_abort;
1197     }
1198 
1199     /* drop any vars that are all zero and repack the list */
1200     omitzero(&mdl, dset, opt);
1201 
1202     /* if regressor list contains a constant, record this fact and
1203        place it first among the regressors */
1204     mdl.ifc = reglist_check_for_const(mdl.list, dset);
1205 
1206     /* Check for presence of lagged dependent variable?
1207        (Don't bother if this is an auxiliary regression.) */
1208     if (!(opt & OPT_A) && !dataset_is_cross_section(dset)) {
1209 	ldv = check_for_lags(&mdl, dset);
1210     }
1211 
1212     /* AR1: advance the starting observation by one? */
1213     if (rho != 0.0 && !pwe) {
1214 	mdl.t1 += 1;
1215     }
1216 
1217     mdl.ncoeff = mdl.list[0] - 1;
1218     if (effobs > 0 && mdl.missmask == NULL) {
1219 	mdl.nobs = effobs;
1220     } else {
1221 	mdl.nobs = mdl.t2 - mdl.t1 + 1;
1222 	if (mdl.nwt) {
1223 	    mdl.nobs = wls_usable_obs(&mdl, dset);
1224 	} else if (mdl.missmask != NULL) {
1225 	    mdl.nobs -= model_missval_count(&mdl);
1226 	}
1227     }
1228 
1229     /* check degrees of freedom */
1230     if (get_model_df(&mdl)) {
1231         goto lsq_abort;
1232     }
1233 
1234     /* if df correction is not wanted, record this fact */
1235     if (opt & OPT_N) {
1236 	mdl.opt |= OPT_N;
1237     }
1238 
1239     if (dataset_is_time_series(dset)) {
1240 	opt |= OPT_T;
1241     }
1242 
1243     /* remove Durbin-Watson p-value flag if not appropriate */
1244     if (ldv || !(opt & OPT_T)) {
1245 	opt &= ~OPT_I;
1246     }
1247 
1248     if (opt & OPT_J) {
1249 	/* --jackknife */
1250 	save_hc = libset_get_int(HC_VERSION);
1251 	libset_set_int(HC_VERSION, 4);
1252 	opt |= OPT_R;
1253 	mdl.opt |= (OPT_J | OPT_R);
1254     }
1255 
1256     if (rho != 0.0) {
1257 	gretl_model_set_double(&mdl, "rho_gls", rho);
1258     }
1259 
1260     if (nullmod) {
1261 	gretl_null_regress(&mdl, dset);
1262     } else if (libset_get_bool(USE_QR)) {
1263 	gretl_qr_regress(&mdl, dset, opt);
1264     } else if (opt & (OPT_R | OPT_I)) {
1265 	gretl_qr_regress(&mdl, dset, opt);
1266     } else {
1267 	cholesky_regress(&mdl, dset, opt);
1268 	if (mdl.errcode == E_SINGULAR) {
1269 	    /* near-perfect collinearity is better handled by QR */
1270 	    model_free_storage(&mdl);
1271 	    gretl_qr_regress(&mdl, dset, opt);
1272 	}
1273     }
1274 
1275     if (save_hc >= 0) {
1276 	libset_set_int(HC_VERSION, save_hc);
1277     }
1278 
1279     if (mdl.errcode) {
1280 	goto lsq_abort;
1281     }
1282 
1283     /* get the mean and sd of dep. var. and make available */
1284     model_depvar_stats(&mdl, dset);
1285 
1286     /* Doing an autoregressive procedure? */
1287     if (ci == AR1) {
1288 	if (compute_ar_stats(&mdl, dset, rho)) {
1289 	    goto lsq_abort;
1290 	}
1291 	if (ldv) {
1292 	    ldepvar_std_errors(&mdl, dset);
1293 	    if (mdl.errcode) {
1294 		goto lsq_abort;
1295 	    }
1296 	}
1297 	if ((opt & OPT_H) && (opt & OPT_B)) {
1298 	    gretl_model_set_int(&mdl, "no-corc", 1);
1299 	}
1300 	if (gretl_model_get_int(&mdl, "maxlag") < 1) {
1301 	    gretl_model_set_int(&mdl, "maxlag", 1);
1302 	}
1303     }
1304 
1305     /* weighted least squares: fix yhat and uhat; add calculation of
1306        ESS and sigma based on unweighted data
1307     */
1308     if (ci == WLS) {
1309 	get_wls_stats(&mdl, dset, opt);
1310 	fix_wls_values(&mdl, dset);
1311     }
1312 
1313     if (mdl.missmask == NULL && (opt & OPT_T) && !(opt & OPT_I)) {
1314 	mdl.rho = rhohat(1, mdl.t1, mdl.t2, mdl.uhat);
1315 	mdl.dw = dwstat(1, &mdl, dset);
1316     } else if (!(opt & OPT_I)) {
1317 	mdl.rho = mdl.dw = NADBL;
1318     }
1319 
1320     /* special case: degenerate model */
1321     if (mdl.ncoeff == 1 && mdl.ifc) {
1322 	mdl.rsq = mdl.adjrsq = 0.0;
1323 	mdl.fstt = NADBL;
1324     }
1325 
1326     /* Generate model selection statistics */
1327     if (ci == AR1) {
1328 	mdl.lnL = NADBL;
1329 	mle_criteria(&mdl, 0);
1330     } else {
1331 	ls_criteria(&mdl);
1332     }
1333     if (!(opt & OPT_A) && !na(mdl.lnL)) {
1334 	log_depvar_ll(&mdl, dset);
1335     }
1336 
1337  lsq_abort:
1338 
1339     if (!(opt & (OPT_A | OPT_S))) {
1340 	/* if it's not an auxiliary regression, or part of
1341 	   a system, set an ID number on the model
1342 	*/
1343 	set_model_id(&mdl, opt);
1344     }
1345 
1346     return mdl;
1347 }
1348 
1349 /**
1350  * lsq:
1351  * @list: dependent variable plus list of regressors.
1352  * @dset: dataset struct.
1353  * @ci: one of the command indices in #LSQ_MODEL.
1354  * @opt: option flags: zero or more of the following --
1355  *   OPT_R: compute robust standard errors;
1356  *   OPT_A: treat as auxiliary regression (don't bother checking
1357  *     for presence of lagged dependent var, don't augment model count);
1358  *   OPT_N: don't use degrees of freedom correction for standard
1359  *      error of regression;
1360  *   OPT_M: reject missing observations within sample range;
1361  *   OPT_Z: (internal use) suppress the automatic elimination of
1362  *      perfectly collinear variables.
1363  *   OPT_X: compute "variance matrix" as just (X'X)^{-1}
1364  *   OPT_I: compute Durbin-Watson p-value.
1365  *   OPT_U: treat null model as OK.
1366  *   OPT_P: (ar1 only): use Prais-Winsten.
1367  *   OPT_S: estimation as part of system.
1368  *
1369  * Computes least squares estimates of the model specified by @list,
1370  * using an estimator determined by the value of @ci.
1371  *
1372  * Returns: a #MODEL struct, containing the estimates.
1373  */
1374 
lsq(const int * list,DATASET * dset,GretlCmdIndex ci,gretlopt opt)1375 MODEL lsq (const int *list, DATASET *dset, GretlCmdIndex ci,
1376 	   gretlopt opt)
1377 {
1378     return ar1_lsq(list, dset, ci, opt, 0.0);
1379 }
1380 
1381 /**
1382  * ar1_model:
1383  * @list: model specification.
1384  * @dset: dataset struct.
1385  * @opt: option flags: may include OPT_H to use Hildreth-Lu,
1386  *       OPT_P to use Prais-Winsten, OPT_B to suppress Cochrane-Orcutt
1387  *       fine-tuning of Hildreth-Lu results, OPT_G to generate
1388  *       a gnuplot graph of the search in Hildreth-Lu case.
1389  * @prn: gretl printer.
1390  *
1391  * Returns: model struct containing the estimates.
1392  */
1393 
ar1_model(const int * list,DATASET * dset,gretlopt opt,PRN * prn)1394 MODEL ar1_model (const int *list, DATASET *dset,
1395 		 gretlopt opt, PRN *prn)
1396 {
1397     MODEL mdl;
1398     double rho;
1399     int err = 0;
1400 
1401     rho = estimate_rho(list, dset, opt, prn, &err);
1402 
1403     if (err) {
1404 	gretl_model_init(&mdl, NULL);
1405 	mdl.errcode = err;
1406 	return mdl;
1407     } else {
1408 	return ar1_lsq(list, dset, AR1, opt, rho);
1409     }
1410 }
1411 
make_ess(MODEL * pmod,const DATASET * dset)1412 static int make_ess (MODEL *pmod, const DATASET *dset)
1413 {
1414     int i, t, yno = pmod->list[1], l0 = pmod->list[0];
1415     int nwt = pmod->nwt;
1416     double yhat, ut;
1417 
1418     pmod->ess = 0.0;
1419 
1420     for (t=pmod->t1; t<=pmod->t2; t++) {
1421 	if (nwt && dset->Z[nwt][t] == 0.0) {
1422 	    continue;
1423 	}
1424 	if (model_missing(pmod, t)) {
1425 	    continue;
1426 	}
1427 	yhat = 0.0;
1428 	for (i=2; i<=l0; i++) {
1429 	    yhat += pmod->coeff[i-2] * dset->Z[pmod->list[i]][t];
1430 	}
1431 	ut = dset->Z[yno][t] - yhat;
1432 	if (nwt) {
1433 	    ut *= sqrt(dset->Z[nwt][t]);
1434 	}
1435 	pmod->ess += ut * ut;
1436     }
1437 
1438     return 0;
1439 }
1440 
1441 /* The heuristic used here is that the model effectively
1442    contains a constant or intercept if the means of y and
1443    yhat are the same, or in other words the mean residual
1444    is zero -- but allowing for some numerical slop.
1445 */
1446 
check_for_effective_const(MODEL * pmod,const DATASET * dset)1447 int check_for_effective_const (MODEL *pmod, const DATASET *dset)
1448 {
1449     double ubar = 0.0, ybar = 0.0;
1450     const double *y = dset->Z[pmod->list[1]];
1451     int t, ret = 0;
1452 
1453     for (t=pmod->t1; t<=pmod->t2; t++) {
1454 	if (!na(pmod->uhat[t])) {
1455 	    ubar += pmod->uhat[t];
1456 	    ybar += y[t];
1457 	}
1458     }
1459 
1460     ubar = fabs(ubar / pmod->nobs);
1461     ybar = fabs(ybar / pmod->nobs);
1462 
1463     /* the calibration below is debatable: watch out for
1464        "wrong" results */
1465 
1466     if (ubar < 1.0e-7) {
1467 	/* absolute value of mean residual small enough? */
1468 	ret = 1;
1469     } else if (ubar < 0.01 && ybar > 1.0e-20) {
1470 	/* try scaled variant? */
1471 	ret = ubar / ybar < 2.0e-15;
1472     }
1473 
1474 #if 0
1475     fprintf(stderr, "check_for_effective_const:\n"
1476 	    "ubar = %g, ybar = %g, ret = %d\n",
1477 	    ubar, ybar, ret);
1478 #endif
1479 
1480     if (ret) {
1481 	gretl_model_set_int(pmod, "effconst", 1);
1482 	pmod->dfn -= 1;
1483     } else if (gretl_model_get_int(pmod, "effconst")) {
1484 	gretl_model_set_int(pmod, "effconst", 0);
1485 	pmod->dfn += 1;
1486     }
1487 
1488     return ret;
1489 }
1490 
uncentered_r_squared(MODEL * pmod,const DATASET * dset)1491 static void uncentered_r_squared (MODEL *pmod, const DATASET *dset)
1492 {
1493     double y0 = dset->Z[pmod->list[1]][pmod->t1];
1494 
1495     /* special computation for the case of TSS = 0, i.e.
1496        the dependent variable is a constant */
1497 
1498     if (y0 != 0) {
1499 	double den = pmod->nobs * y0 * y0;
1500 
1501 	pmod->rsq = 1 - (pmod->ess / den);
1502 	gretl_model_set_int(pmod, "uncentered", 1);
1503     }
1504 }
1505 
compute_r_squared(MODEL * pmod,const DATASET * dset,int * ifc)1506 static void compute_r_squared (MODEL *pmod, const DATASET *dset,
1507 			       int *ifc)
1508 {
1509     int yno = pmod->list[1];
1510 
1511     pmod->rsq = 1.0 - (pmod->ess / pmod->tss);
1512 
1513     if (*ifc == 0) {
1514 	*ifc = check_for_effective_const(pmod, dset);
1515     }
1516 
1517     if (pmod->dfd > 0) {
1518 	double yt, den = 0.0;
1519 
1520 	if (*ifc) {
1521 	    den = pmod->tss * pmod->dfd;
1522 	    pmod->adjrsq = 1 - (pmod->ess * (pmod->nobs - 1) / den);
1523 	} else {
1524 	    /* model does not contain a constant */
1525 	    int t;
1526 
1527 	    for (t=pmod->t1; t<=pmod->t2; t++) {
1528 		if (!na(pmod->yhat[t])) {
1529 		    yt = dset->Z[yno][t];
1530 		    den += yt * yt;
1531 		}
1532 	    }
1533 
1534 	    /* make the centered R^2 available for output */
1535 	    gretl_model_set_double(pmod, "centered-R2", pmod->rsq);
1536 
1537 	    /* but flag the fact that the reported R-squared is
1538 	       in fact uncentered */
1539 	    gretl_model_set_int(pmod, "uncentered", 1);
1540 
1541 	    /* and make the "official" figure the uncentered R^2,
1542 	       as per NIST, R, Stata, SPSS, ... */
1543 	    pmod->rsq = 1 - pmod->ess / den;
1544 	    pmod->adjrsq =
1545 		1.0 - ((1.0 - pmod->rsq) * (pmod->nobs - 1.0) / pmod->dfd);
1546 	}
1547     }
1548 
1549     if (pmod->rsq < 0.0) {
1550 	pmod->rsq = 0.0;
1551     }
1552 }
1553 
1554 /*
1555   diaginv: solves for the diagonal elements of X'X inverse.
1556 
1557   @xpx = Cholesky-decomposed X'X
1558   @tmp = work array, length >= nv
1559   @diag = diagonal elements of {X'X}^{-1} (output)
1560   @nv = number of regression coefficients = length of diag
1561 */
1562 
diaginv(const double * xpx,double * tmp,double * diag,int nv)1563 static void diaginv (const double *xpx, double *tmp, double *diag,
1564 		     int nv)
1565 {
1566     int kk, l, m, k, i, j;
1567     double d, e;
1568 
1569     kk = 0;
1570 
1571     for (l=0; l<nv-1; l++) {
1572         d = xpx[kk];
1573         tmp[l] = d;
1574         e = d * d;
1575         m = 0;
1576         if (l > 0) {
1577 	    for (j=1; j<=l; j++) {
1578 		m += nv - j;
1579 	    }
1580 	}
1581         for (i=l+1; i<nv; i++) {
1582             d = 0.0;
1583             k = i + m;
1584             for (j=l; j<i; j++) {
1585                 d += tmp[j] * xpx[k];
1586                 k += nv - (j+1);
1587             }
1588             d = -d * xpx[k];
1589             tmp[i] = d;
1590             e += d * d;
1591         }
1592 	kk += nv - l;
1593         diag[l] = e;
1594     }
1595 
1596     diag[nv-1] = xpx[kk] * xpx[kk];
1597 }
1598 
1599 /*
1600   cholbeta: in-place Cholesky decomposition of X'X (pmod->xpx) (lower
1601   triangular matrix stacked in columns) plus solution for the
1602   least-squares coefficient estimates.
1603 
1604   pmod->xpx = X'X on input and Cholesky decomposition on output
1605   xpy = the X'y vector on input and Cholesky-transformed t
1606         vector on output
1607   rss = location to receive regression sum of squares
1608 
1609   The number of floating-point operations is basically 3.5 * nv^2
1610   plus (nv^3) / 3.
1611 */
1612 
cholbeta(MODEL * pmod,double * xpy,double * rss)1613 static int cholbeta (MODEL *pmod, double *xpy, double *rss)
1614 {
1615     int i, j, k, kk, l, jm1;
1616     double e, d, d1, d2, test, xx;
1617     double *xpx = pmod->xpx;
1618     double *b = pmod->coeff;
1619     int nc = pmod->ncoeff;
1620 
1621     if (xpx[0] <= 0.0) {
1622 	fprintf(stderr, "%s %d: xpx <= 0.0\n", __FILE__, __LINE__);
1623 	return E_NAN;
1624     }
1625 
1626     e = 1.0 / sqrt(xpx[0]);
1627     xpx[0] = e;
1628     xpy[0] *= e;
1629     for (i=1; i<nc; i++) {
1630 	xpx[i] *= e;
1631     }
1632 
1633     kk = nc;
1634 
1635     for (j=1; j<nc; j++) {
1636 	/* diagonal elements */
1637         d = d1 = 0.0;
1638         k = jm1 = j;
1639 
1640         for (l=1; l<=jm1; l++) {
1641             xx = xpx[k];
1642             d1 += xx * xpy[l-1];
1643             d += xx * xx;
1644             k += nc - l;
1645         }
1646 
1647         d2 = xpx[kk] - d;
1648 	test = d2 / xpx[kk];
1649 
1650 	/* check for singularity */
1651         if (test < TINY) {
1652 #if 0
1653 	    fprintf(stderr, "cholbeta: test[%d] = %g\n", j, test);
1654 #endif
1655 	    *rss = -1.0;
1656 	    return E_SINGULAR;
1657         } else if (test < SMALL) {
1658 	    gretl_model_set_int(pmod, "near-singular", 1);
1659 	}
1660 
1661         e = 1 / sqrt(d2);
1662         xpx[kk] = e;
1663         xpy[j] = (xpy[j] - d1) * e;
1664 
1665 	/* off-diagonal elements */
1666         for (i=j+1; i<nc; i++) {
1667             kk++;
1668             d = 0.0;
1669             k = j;
1670             for (l=1; l<=jm1; l++) {
1671                 d += xpx[k] * xpx[k-j+i];
1672                 k += nc - l;
1673             }
1674             xpx[kk] = (xpx[kk] - d) * e;
1675         }
1676         kk++;
1677     }
1678 
1679     kk--;
1680 
1681     /* calculate regression sum of squares */
1682     d = 0.0;
1683     for (j=0; j<nc; j++) {
1684 	d += xpy[j] * xpy[j];
1685     }
1686     *rss = d;
1687 
1688     /* back-solve for the coefficients */
1689 
1690     b[nc-1] = xpy[nc-1] * xpx[kk];
1691 
1692     for (j=nc-2; j>=0; j--) {
1693 	d = xpy[j];
1694 	for (i=nc-1; i>j; i--) {
1695 	    d -= b[i] * xpx[--kk];
1696 	}
1697 	b[j] = d * xpx[--kk];
1698     }
1699 
1700     for (j=0; j<nc; j++) {
1701 	if (isnan(b[j])) {
1702 	    fprintf(stderr, "%s %d: coeff %d is NaN\n", __FILE__, __LINE__, j);
1703 	    return E_NAN;
1704 	}
1705     }
1706 
1707     return 0;
1708 }
1709 
1710 /* compute fitted values and residuals */
1711 
hatvars(MODEL * pmod,const DATASET * dset)1712 static int hatvars (MODEL *pmod, const DATASET *dset)
1713 {
1714     int yno = pmod->list[1];
1715     int xno, i, t;
1716     double x;
1717 
1718     for (t=pmod->t1; t<=pmod->t2; t++) {
1719 	if (model_missing(pmod, t)) {
1720 	    continue;
1721 	}
1722 	pmod->yhat[t] = 0.0;
1723         for (i=0; i<pmod->ncoeff; i++) {
1724             xno = pmod->list[i+2];
1725 	    x = dset->Z[xno][t];
1726 	    if (pmod->nwt) {
1727 		x *= sqrt(dset->Z[pmod->nwt][t]);
1728 	    }
1729             pmod->yhat[t] += pmod->coeff[i] * x;
1730         }
1731 	x = dset->Z[yno][t];
1732 	if (pmod->nwt) {
1733 	    x *= sqrt(dset->Z[pmod->nwt][t]);
1734 	}
1735         pmod->uhat[t] = x - pmod->yhat[t];
1736     }
1737 
1738     return 0;
1739 }
1740 
1741 /*
1742   regress: takes X'X (pmod->xpx) and X'y (@xpy) and
1743   computes OLS estimates and associated statistics.
1744 
1745   n = total number of observations per series in data set
1746   pmod->ifc = 1 if constant is present in model, else = 0
1747 
1748   ess = error sum of squares
1749   sigma = standard error of regression
1750   fstt = F-statistic
1751   pmod->coeff = array of regression coefficients
1752   pmod->sderr = corresponding array of standard errors
1753 */
1754 
regress(MODEL * pmod,double * xpy,double ysum,double ypy,const DATASET * dset,double rho,gretlopt opt)1755 static void regress (MODEL *pmod, double *xpy,
1756 		     double ysum, double ypy,
1757 		     const DATASET *dset,
1758 		     double rho, gretlopt opt)
1759 {
1760     int ifc = pmod->ifc;
1761     double zz, rss = 0.0;
1762     double s2 = 0.0;
1763     double *diag = NULL;
1764     int i, err = 0;
1765 
1766     for (i=0; i<pmod->full_n; i++) {
1767 	pmod->yhat[i] = pmod->uhat[i] = NADBL;
1768     }
1769 
1770     zz = ysum * ysum / pmod->nobs;
1771     pmod->tss = ypy - zz;
1772 
1773     /* Cholesky-decompose X'X and find the coefficients */
1774     err = cholbeta(pmod, xpy, &rss);
1775     if (err) {
1776         pmod->errcode = err;
1777         return;
1778     }
1779 
1780     if (rho != 0.0) {
1781 	pmod->ess = ypy - rss;
1782     } else {
1783 	make_ess(pmod, dset);
1784 	rss = ypy - pmod->ess;
1785     }
1786 
1787     if (fabs(pmod->ess) < ESSZERO) {
1788 	pmod->ess = 0.0;
1789     } else if (pmod->ess < 0.0) {
1790 	gretl_errmsg_sprintf(_("Error sum of squares (%g) is not > 0"),
1791 			     pmod->ess);
1792         return;
1793     }
1794 
1795     if (pmod->dfd == 0) {
1796 	pmod->sigma = 0.0;
1797 	pmod->adjrsq = NADBL;
1798     } else {
1799 	if (pmod->opt & OPT_N) {
1800 	    /* no-df-corr */
1801 	    s2 = pmod->ess / pmod->nobs;
1802 	} else {
1803 	    s2 = pmod->ess / pmod->dfd;
1804 	}
1805 	pmod->sigma = sqrt(s2);
1806     }
1807 
1808     if (pmod->tss < DBL_EPSILON) {
1809 	pmod->rsq = pmod->adjrsq = NADBL;
1810     }
1811 
1812     hatvars(pmod, dset);
1813 
1814     if (pmod->errcode) return;
1815 
1816     if (pmod->tss > 0.0) {
1817 	compute_r_squared(pmod, dset, &ifc);
1818     } else if (pmod->tss == 0.0 && !(opt & OPT_A)) {
1819 	uncentered_r_squared(pmod, dset);
1820     }
1821 
1822     if (s2 <= 0.0 || pmod->dfd == 0 || pmod->dfn == 0) {
1823 	pmod->fstt = NADBL;
1824     } else if (pmod->rsq == 1.0) {
1825 	pmod->fstt = NADBL;
1826     } else if (pmod->opt & OPT_N) {
1827 	/* for consistency with no-df-corr */
1828 	pmod->fstt = NADBL;
1829 	pmod->chisq = (rss - zz * ifc) / s2;
1830     } else {
1831 	pmod->fstt = (rss - zz * ifc) / (s2 * pmod->dfn);
1832 	if (pmod->fstt < 0.0) {
1833 	    pmod->fstt = 0.0;
1834 	}
1835     }
1836 
1837     diag = malloc(pmod->ncoeff * sizeof *diag);
1838     if (diag == NULL) {
1839 	pmod->errcode = E_ALLOC;
1840 	return;
1841     }
1842 
1843     /* Note: 'xpy' is used purely as workspace in diaginv() below, as
1844        a matter of economy/convenience; no values in this array are
1845        used before they are written.
1846     */
1847     diaginv(pmod->xpx, xpy, diag, pmod->ncoeff);
1848 
1849     for (i=0; i<pmod->ncoeff; i++) {
1850 	if (diag[i] >= 0.0) {
1851 	    pmod->sderr[i] = pmod->sigma * sqrt(diag[i]);
1852 	} else {
1853 	    pmod->sderr[i] = 0.0;
1854 	}
1855     }
1856 
1857     free(diag);
1858 }
1859 
1860 /**
1861  * makevcv:
1862  * @pmod: pointer to model.
1863  * @sigma: square root of error variance, or 1.0 to
1864  * produce just X'X^{-1}.
1865  *
1866  * Inverts the Cholesky-decomposed X'X and computes the
1867  * coefficient covariance matrix.
1868  *
1869  * Returns: 0 on successful completion, non-zero code on error.
1870  */
1871 
makevcv(MODEL * pmod,double sigma)1872 int makevcv (MODEL *pmod, double sigma)
1873 {
1874     int dec, mst, kk, i, j, kj, icnt, m, k, l = 0;
1875     int nv, nxpx;
1876     double d;
1877 
1878     if (pmod->vcv != NULL) {
1879 	/* already done */
1880 	return 0;
1881     }
1882 
1883     if (pmod->xpx == NULL) {
1884 	/* raw material not available */
1885 	return E_BADSTAT;
1886     }
1887 
1888     nv = pmod->ncoeff;
1889     nxpx = (nv * nv + nv) / 2;
1890     mst = nxpx;
1891     kk = nxpx - 1;
1892 
1893     pmod->vcv = malloc(nxpx * sizeof *pmod->vcv);
1894     if (pmod->vcv == NULL) {
1895 	return E_ALLOC;
1896     }
1897 
1898     for (i=0; i<nv; i++) {
1899 	mst -= i;
1900 	/* find diagonal element */
1901 	d = pmod->xpx[kk];
1902 	if (i > 0) {
1903 	    for (j=kk+1; j<=kk+i; j++) {
1904 		d -= pmod->xpx[j] * pmod->vcv[j];
1905 	    }
1906 	}
1907 	pmod->vcv[kk] = d * pmod->xpx[kk];
1908 	/* find off-diagonal elements indexed by kj */
1909 	kj = kk;
1910 	kk = kk - i - 2;
1911 	if (i > nv - 2) {
1912 	    continue;
1913 	}
1914 	for (j=i+1; j<nv; j++) {
1915 	    icnt = i+1;
1916 	    kj -= j;
1917 	    d = 0.0;
1918 	    m = mst + 1;
1919 	    for (k=0; k<=j-1; k++) {
1920 		if (icnt > 0) {
1921 		    dec = 1;
1922 		    icnt--;
1923 		} else {
1924 		    dec = k;
1925 		}
1926 		m -= dec;
1927 		l = kj + i - k;
1928 		d += pmod->vcv[m-1] * pmod->xpx[l];
1929 	    }
1930 	    pmod->vcv[kj] = -d * pmod->xpx[l-1];
1931 	}
1932     }
1933 
1934     if (pmod->ci == LOGIT || pmod->ci == PROBIT) {
1935 	sigma = 1.0;
1936     }
1937 
1938     if (sigma != 1.0) {
1939 	double s2 = sigma * sigma;
1940 
1941 	for (i=0; i<nxpx; i++) {
1942 	    pmod->vcv[i] *= s2;
1943 	}
1944     }
1945 
1946     return 0;
1947 }
1948 
1949 /**
1950  * dwstat:
1951  * @order: order of autoregression (usually 1).
1952  * @pmod: pointer to model.
1953  * @dset: pointer to dataset.
1954  *
1955  * Computes the Durbin-Watson statistic for @pmod.
1956  *
1957  * Returns: the D-W value, or #NADBL on error.
1958  */
1959 
dwstat(int order,MODEL * pmod,const DATASET * dset)1960 double dwstat (int order, MODEL *pmod, const DATASET *dset)
1961 {
1962     const double *w = NULL;
1963     double ut, u1;
1964     double num = 0.0;
1965     double den = 0.0;
1966     int t, t1;
1967 
1968     if (pmod->ess <= 0.0) {
1969 	return NADBL;
1970     }
1971 
1972     t1 = pmod->t1 + order;
1973 
1974     if (pmod->nwt) {
1975 	w = dset->Z[pmod->nwt];
1976 	ut = pmod->uhat[t1 - 1];
1977 	if (!na(ut)) {
1978 	    den += ut * ut;
1979 	}
1980     } else {
1981 	den = pmod->ess;
1982     }
1983 
1984     for (t=t1; t<=pmod->t2; t++)  {
1985         ut = pmod->uhat[t];
1986         u1 = pmod->uhat[t-1];
1987         if (na(ut) || na(u1) ||
1988 	    (w != NULL && (w[t] == 0.0 || w[t-1] == 0.0))) {
1989 	    continue;
1990 	}
1991         num += (ut - u1) * (ut - u1);
1992 	if (pmod->nwt) {
1993 	    den += ut * ut;
1994 	}
1995     }
1996 
1997     return num / den;
1998 }
1999 
2000 /* altrho: alternative calculation of rho */
2001 
altrho(int order,int t1,int t2,const double * uhat)2002 static double altrho (int order, int t1, int t2, const double *uhat)
2003 {
2004     int t, n, len = t2 - (t1 + order) + 1;
2005     double *ut, *u1;
2006     double uht, uh1, rho;
2007 
2008     ut = malloc(2 * len * sizeof *ut);
2009     if (ut == NULL) {
2010 	return NADBL;
2011     }
2012 
2013     u1 = ut + len;
2014     n = 0;
2015 
2016     for (t=t1+order; t<=t2; t++) {
2017         uht = uhat[t];
2018 	uh1 = (t > 0)? uhat[t-1] : NADBL;
2019         if (!na(uht) && !na(uh1)) {
2020 	    ut[n] = uht;
2021 	    u1[n] = uh1;
2022 	    n++;
2023 	}
2024     }
2025 
2026     rho = gretl_corr(0, n - 1, ut, u1, NULL);
2027 
2028     free(ut);
2029 
2030     return rho;
2031 }
2032 
2033 /**
2034  * rhohat:
2035  * @order: order of autoregression, usually 1.
2036  * @t1: start of sample range.
2037  * @t2: end of sample range.
2038  * @uhat: array of regression residuals.
2039  *
2040  * Computes the first order serial correlation coefficient
2041  * for @uhat, over the range @t1 to @t2.
2042  *
2043  * Returns: the \hat{rho} value, or #NADBL on error.
2044  */
2045 
rhohat(int order,int t1,int t2,const double * uhat)2046 double rhohat (int order, int t1, int t2, const double *uhat)
2047 {
2048     double ut, u1, num = 0.0, den = 0.0;
2049     double rho;
2050     int t;
2051 
2052     for (t=t1+order; t<=t2; t++) {
2053         ut = uhat[t];
2054         u1 = uhat[t-1];
2055         if (na(ut) || na(u1)) {
2056 	    continue;
2057 	}
2058         num += ut * u1;
2059         den += u1 * u1;
2060     }
2061 
2062     if (den < DBL_EPSILON) {
2063 	return NADBL;
2064     }
2065 
2066     rho = num / den;
2067 
2068     if (rho > 1.0 || rho < -1.0) {
2069 	/* out of bounds, try again */
2070 	rho = altrho(order, t1, t2, uhat);
2071     }
2072 
2073     return rho;
2074 }
2075 
hilu_plot(double * ssr,double * rho,int n)2076 static int hilu_plot (double *ssr, double *rho, int n)
2077 {
2078     FILE *fp;
2079     int i, err = 0;
2080 
2081     fp = open_plot_input_file(PLOT_REGULAR, 0, &err);
2082     if (err) {
2083 	return err;
2084     }
2085 
2086     fputs("# hildreth-lu\n", fp);
2087     fputs("set xlabel 'rho'\n", fp);
2088 
2089     fprintf(fp, "set ylabel '%s'\n", _("ESS"));
2090 
2091     fputs("set nokey\n", fp);
2092     fputs("set xrange [-1.0:1.0]\n", fp);
2093     fputs("plot '-' using 1:2 w impulses\n", fp);
2094 
2095     gretl_push_c_numeric_locale();
2096 
2097     for (i=0; i<n; i++) {
2098 	fprintf(fp, "%g %g\n", rho[i], ssr[i]);
2099     }
2100     fputs("e\n", fp);
2101 
2102     gretl_pop_c_numeric_locale();
2103 
2104     return finalize_plot_input_file(fp);
2105 }
2106 
2107 /* calculation of rhohat for use with CORC/PWE */
2108 
autores(MODEL * pmod,DATASET * dset,gretlopt opt)2109 static double autores (MODEL *pmod, DATASET *dset, gretlopt opt)
2110 {
2111     double *uhat = pmod->uhat;
2112     double ut, num = 0.0, den = 0.0;
2113     int yno = pmod->list[1];
2114     int t1 = pmod->t1;
2115     int t, i, v;
2116 
2117     if (!(opt & OPT_P)) {
2118 	/* not using Prais-Winsten */
2119 	t1--;
2120     }
2121 
2122     for (t=t1; t<=pmod->t2; t++) {
2123 	ut = dset->Z[yno][t];
2124 	for (i=0; i<pmod->ncoeff; i++) {
2125 	    v = pmod->list[i+2];
2126 	    ut -= pmod->coeff[i] * dset->Z[v][t];
2127 	}
2128 	uhat[t] = ut;
2129 	if (t > t1) {
2130 	    num += uhat[t] * uhat[t-1];
2131 	    den += uhat[t-1] * uhat[t-1];
2132 	}
2133     }
2134 
2135     return num / den;
2136 }
2137 
rho_val_ends_row(int k)2138 static int rho_val_ends_row (int k)
2139 {
2140     int i, endval = -80; /* rho = -0.80 */
2141 
2142     for (i=0; i<7; i++) {
2143 	if (k == endval) {
2144 	    return 1;
2145 	}
2146 	endval += 30;
2147     }
2148 
2149     return 0;
2150 }
2151 
hilu_print_iter(double r,double ess,int iter,double * ssr,double * rh,int * ip,int asciiplot,PRN * prn)2152 static void hilu_print_iter (double r, double ess, int iter,
2153 			     double *ssr, double *rh,
2154 			     int *ip, int asciiplot,
2155 			     PRN *prn)
2156 {
2157     char num[8];
2158     int k;
2159 
2160     sprintf(num, "%.1f", 100 * r);
2161     k = atoi(num);
2162 
2163     /* we'll not print all 199 rho values */
2164 
2165     if (k == -99 || k == 99 || k % 10 == 0) {
2166 	if (asciiplot) {
2167 	    /* recording a subset of values */
2168 	    ssr[*ip] = ess;
2169 	    rh[*ip] = r;
2170 	    *ip += 1;
2171 	}
2172 	pprintf(prn, " %5.2f %#12.6g", r, ess);
2173 	if (rho_val_ends_row(k)) {
2174 	    pputc(prn, '\n');
2175 	} else {
2176 	    bufspace(3, prn);
2177 	}
2178     }
2179 }
2180 
hilu_header(PRN * prn)2181 static void hilu_header (PRN *prn)
2182 {
2183     int i;
2184 
2185     pputs(prn, "\n   ");
2186 
2187     for (i=0; i<3; i++) {
2188 	pputs(prn, "rho");
2189 	bufspace(10, prn);
2190 	pputs(prn, _("ESS"));
2191 	if (i < 2) {
2192 	    pputs(prn, "      ");
2193 	}
2194     }
2195 
2196     pputc(prn, '\n');
2197 }
2198 
hilu_search(const int * list,DATASET * dset,MODEL * pmod,gretlopt opt,PRN * prn)2199 static double hilu_search (const int *list, DATASET *dset,
2200 			   MODEL *pmod, gretlopt opt,
2201 			   PRN *prn)
2202 {
2203     double *rh = NULL, *ssr = NULL;
2204     double essmin = 1.0e200;
2205     double ess, r, hl_rho = 0;
2206     int quiet = (opt & OPT_Q);
2207     int niceplot = (opt & OPT_G);
2208     int iter = 0, i = 0;
2209     int nplot = 0;
2210 
2211     if (!quiet) {
2212 	/* allocate arrays for recording plot info */
2213 	nplot = niceplot ? 199 : 21;
2214 	ssr = malloc(2 * nplot * sizeof *ssr);
2215 	if (ssr == NULL) {
2216 	    pmod->errcode = E_ALLOC;
2217 	    return NADBL;
2218 	}
2219 	rh = ssr + nplot;
2220 	/* and print header */
2221 	hilu_header(prn);
2222     }
2223 
2224     for (r = -0.99; r < 1.0; r += .01, iter++) {
2225 	clear_model(pmod);
2226 
2227 	*pmod = ar1_lsq(list, dset, OLS, OPT_A, r);
2228 	if (pmod->errcode) {
2229 	    free(ssr);
2230 	    return NADBL;
2231 	}
2232 
2233 	ess = pmod->ess;
2234 
2235 	if (!quiet) {
2236 	    hilu_print_iter(r, ess, iter, ssr, rh, &i,
2237 			    !niceplot, prn);
2238 	    if (niceplot) {
2239 		/* record full data for plotting */
2240 		ssr[i] = ess;
2241 		rh[i++] = r;
2242 	    }
2243 	}
2244 
2245 	if (iter == 0 || ess < essmin) {
2246 	    essmin = ess;
2247 	    hl_rho = r;
2248 	}
2249     } /* end of basic iteration */
2250 
2251     if (hl_rho > 0.989) {
2252 	/* try exploring this funny region? */
2253 	for (r = 0.99; r <= 0.999; r += .001) {
2254 	    clear_model(pmod);
2255 	    *pmod = ar1_lsq(list, dset, OLS, OPT_A, r);
2256 	    if (pmod->errcode) {
2257 		free(ssr);
2258 		return NADBL;
2259 	    }
2260 	    ess = pmod->ess;
2261 	    if (ess < essmin) {
2262 		essmin = ess;
2263 		hl_rho = r;
2264 	    }
2265 	}
2266     }
2267 
2268     if (hl_rho > 0.9989) {
2269 	/* this even funnier one? */
2270 	for (r = 0.9991; r <= 0.9999; r += .0001) {
2271 	    clear_model(pmod);
2272 	    *pmod = ar1_lsq(list, dset, OLS, OPT_A, r);
2273 	    if (pmod->errcode) {
2274 		free(ssr);
2275 		return NADBL;
2276 	    }
2277 	    ess = pmod->ess;
2278 	    if (ess < essmin) {
2279 		essmin = ess;
2280 		hl_rho = r;
2281 	    }
2282 	}
2283     }
2284 
2285     if (!quiet) {
2286 	pprintf(prn, _("\n\nESS is minimized for rho = %g\n\n"), hl_rho);
2287 	if (niceplot) {
2288 	    hilu_plot(ssr, rh, nplot);
2289 	} else {
2290 	    /* ascii plot */
2291 	    graphyx(ssr, rh, nplot, "ESS", "RHO", prn);
2292 	    pputs(prn, "\n");
2293 	}
2294     }
2295 
2296     free(ssr);
2297 
2298     return hl_rho;
2299 }
2300 
corc_header(gretlopt opt,PRN * prn)2301 static void corc_header (gretlopt opt, PRN *prn)
2302 {
2303     if (opt & OPT_H) {
2304 	pputs(prn, _("\nFine-tune rho using the CORC procedure...\n\n"));
2305     } else {
2306 	pputs(prn, _("\nPerforming iterative calculation of rho...\n\n"));
2307     }
2308 
2309     pputs(prn, _("                 ITER       RHO        ESS"));
2310     pputc(prn, '\n');
2311 }
2312 
2313 /* Unlike plain CORC, PWE needs the term sqrt(1 - rho^2), so
2314    we really cannot proceed if |rho| >= 1.0, even just to see
2315    where rho ends up, as we now do with CORC.
2316 */
2317 
pwe_error(double rho,int quiet,int iter,PRN * prn)2318 static int pwe_error (double rho, int quiet, int iter, PRN *prn)
2319 {
2320     gretl_errmsg_sprintf(_("Prais-Winsten: invalid rho value %g "
2321 			   "(explosive error)"), rho);
2322     if (!quiet) {
2323 	pprintf(prn, "          %10d %12.5f", iter, rho);
2324 	pprintf(prn, "        NA\n");
2325     }
2326 
2327     return E_NOCONV;
2328 }
2329 
2330 /*
2331  * estimate_rho:
2332  * @list: dependent variable plus list of regressors.
2333  * @dset: dataset struct.
2334  * @opt: option flags: may include OPT_H to use Hildreth-Lu,
2335  *       OPT_P to use Prais-Winsten, OPT_B to suppress Cochrane-Orcutt
2336  *       fine-tuning of Hildreth-Lu results, OPT_G to generate
2337  *       a gnuplot graph of the search in Hildreth-Lu case.
2338  * @prn: gretl printing struct.
2339  * @err: location to receive error code.
2340  *
2341  * Estimate the quasi-differencing coefficient for use with the
2342  * Cochrane-Orcutt, Hildreth-Lu or Prais-Winsten procedures for
2343  * handling first-order serial correlation. Print a trace of the
2344  * search for rho.
2345  *
2346  * Returns: rho estimate on successful completion, #NADBL on error.
2347  */
2348 
estimate_rho(const int * list,DATASET * dset,gretlopt opt,PRN * prn,int * err)2349 static double estimate_rho (const int *list, DATASET *dset,
2350 			    gretlopt opt, PRN *prn, int *err)
2351 {
2352     int save_t1 = dset->t1;
2353     int save_t2 = dset->t2;
2354     int quiet = (opt & OPT_Q);
2355     int pwe = (opt & OPT_P);
2356     double rho = 0.0;
2357     MODEL armod;
2358 
2359     *err = list_adjust_sample(list, &dset->t1, &dset->t2,
2360 			      dset, NULL);
2361     if (*err) {
2362 	goto bailout;
2363     }
2364 
2365     gretl_model_init(&armod, dset);
2366 
2367     if (opt & OPT_H) {
2368 	/* Do Hildreth-Lu first */
2369 	rho = hilu_search(list, dset, &armod, opt, prn);
2370     } else {
2371 	/* Initialize Cochrane-Orcutt (or Prais-Winsten) */
2372 	armod = lsq(list, dset, OLS, OPT_A);
2373 	if (!armod.errcode && armod.dfd == 0) {
2374 	    armod.errcode = E_DF;
2375 	}
2376 	if (!armod.errcode) {
2377 	    rho = armod.rho;
2378 	}
2379     }
2380 
2381     if (armod.errcode) {
2382 	*err = armod.errcode;
2383     } else if (!(opt & OPT_H) || !(opt & OPT_B)) {
2384 	/* either not Hildreth-Lu, or Hildreth-Lu but CORC
2385 	   fine-tuning is wanted (has not been suppressed
2386 	   via OPT_B)
2387 	*/
2388 	gretlopt lsqopt = pwe ? (OPT_A | OPT_P) : OPT_A;
2389 	double edsmall = 5.0e-8;
2390 	double ess0 = armod.ess;
2391 	double rho0 = rho;
2392 	double rdsmall = (opt & OPT_L)? 0.001 : 1.0e-6;
2393 	int iter = 0, itermax = 100;
2394 	int converged = 0;
2395 
2396 	if (!quiet) {
2397 	    corc_header(opt, prn);
2398 	}
2399 
2400 	while (++iter <= itermax && !converged) {
2401 	    clear_model(&armod);
2402 	    armod = ar1_lsq(list, dset, OLS, lsqopt, rho);
2403 	    if ((*err = armod.errcode)) {
2404 		break;
2405 	    }
2406 
2407 	    if (!quiet) {
2408 		pprintf(prn, "          %10d %12.5f", iter, rho);
2409 		pprintf(prn, "   %#.6g\n", armod.ess);
2410 	    }
2411 
2412 	    rho = autores(&armod, dset, opt);
2413 
2414 	    if (pwe && fabs(rho) >= 1.0) {
2415 		*err = pwe_error(rho, quiet, iter + 1, prn);
2416 		break;
2417 	    }
2418 
2419 	    if (armod.ess == 0.0) {
2420 		converged = 1;
2421 	    } else if ((ess0 - armod.ess) / ess0 < edsmall) {
2422 		converged = 1;
2423 	    } else if (fabs(rho - rho0) < rdsmall) {
2424 		converged = 1;
2425 	    }
2426 
2427 	    ess0 = armod.ess;
2428 	    rho0 = rho;
2429 	} /* end Cochrane-Orcutt iteration */
2430 
2431 	if (converged && (rho >= 1.0 || rho <= -1.0)) {
2432 	    gretl_errmsg_sprintf(_("%s: rho = %g, error is unbounded"),
2433 				 "ar1", rho);
2434 	    converged = 0;
2435 	}
2436 
2437 	if (!converged && !*err) {
2438 	    *err = E_NOCONV;
2439 	}
2440 
2441 	if (!quiet) {
2442 	    if (*err) {
2443 		pputc(prn, '\n');
2444 	    } else {
2445 		pprintf(prn, "          %10d %12.5f", iter, rho);
2446 		pprintf(prn, "   %#.6g\n", armod.ess);
2447 	    }
2448 	}
2449     }
2450 
2451     clear_model(&armod);
2452 
2453  bailout:
2454 
2455     dset->t1 = save_t1;
2456     dset->t2 = save_t2;
2457 
2458     if (*err) {
2459 	rho = NADBL;
2460     }
2461 
2462     return rho;
2463 }
2464 
2465 /**
2466  * augment_regression_list:
2467  * @orig: list giving original regression specification.
2468  * @aux: either %AUX_SQ, %AUX_LOG or %AUX_WHITE.
2469  * @dset: dataset struct.
2470  * @err: location to receive error code.
2471  *
2472  * Augment the regression list @orig with auxiliary terms.  If @aux
2473  * is %AUX_SQ add the squares of the original regressors; if @aux
2474  * is %AUX_WHITE add squares and cross-products, or if @aux is
2475  * %AUX_LOG add the natural logs of the original regressors.
2476  * If they are not already present, these variables are added
2477  * to the data array.
2478  *
2479  * Returns: the augmented list, or NULL on failure.
2480  */
2481 
augment_regression_list(const int * orig,int aux,DATASET * dset,int * err)2482 int *augment_regression_list (const int *orig, int aux,
2483 			      DATASET *dset, int *err)
2484 {
2485     int *list;
2486     int listlen;
2487     int cnum = 0;
2488     int i, k;
2489 
2490     if (aux == AUX_WHITE) {
2491 	int cpos = gretl_list_const_pos(orig, 2, dset);
2492 	int nt, trv = orig[0] - 1;
2493 
2494 	if (cpos > 0) {
2495 	    trv--;
2496 	    cnum = orig[cpos];
2497 	}
2498 	nt = (trv * trv + trv) / 2;
2499 	listlen = orig[0] + nt + 1;
2500     } else {
2501 	listlen = 2 * orig[0];
2502     }
2503 
2504     list = malloc(listlen * sizeof *list);
2505     if (list == NULL) {
2506 	*err = E_ALLOC;
2507 	return NULL;
2508     }
2509 
2510     /* transcribe original list */
2511     for (i=0; i<=orig[0]; i++) {
2512 	list[i] = orig[i];
2513     }
2514 
2515     /* add squares, cross-products or logs of independent vars */
2516 
2517     k = list[0];
2518     for (i=2; i<=orig[0]; i++) {
2519 	int vnew, vi = orig[i];
2520 
2521 	if (vi == 0) {
2522 	    continue;
2523 	}
2524 
2525 	if (aux == AUX_SQ || aux == AUX_WHITE) {
2526 	    vnew = xpxgenr(vi, vi, dset);
2527 	    if (vnew > 0) {
2528 		list[++k] = vnew;
2529 	    }
2530 	    if (aux == AUX_WHITE) {
2531 		int j, vj;
2532 
2533 		for (j=i+1; j<=orig[0]; j++) {
2534 		    vj = orig[j];
2535 		    if (vj == cnum) {
2536 			continue;
2537 		    }
2538 		    vnew = xpxgenr(vi, vj, dset);
2539 		    if (vnew > 0) {
2540 			/* ensure uniqueness of generated varnames */
2541 			sprintf(dset->varname[vnew], "X%d_X%d", i-1, j-1);
2542 			list[++k] = vnew;
2543 		    }
2544 		}
2545 	    }
2546 	} else if (aux == AUX_LOG) {
2547 	    /* don't try to log series with non-positive values */
2548 	    if (gretl_ispositive(dset->t1, dset->t2, dset->Z[vi], 1)) {
2549 		vnew = loggenr(vi, dset);
2550 		if (vnew > 0) {
2551 		    list[++k] = vnew;
2552 		}
2553 	    }
2554 	}
2555     }
2556 
2557     list[0] = k;
2558 
2559     return list;
2560 }
2561 
2562 /* For observation s, see if the regression list contains a variable
2563    that has a single non-zero value at that particular observation.
2564    We run this check on observations showing an OLS residual of zero.
2565 */
2566 
observation_is_dummied(const MODEL * pmod,int * list,const double ** Z,int s)2567 static int observation_is_dummied (const MODEL *pmod,
2568 				   int *list, const double **Z,
2569 				   int s)
2570 {
2571     int i, t, v;
2572     int ret = 0;
2573 
2574     for (i=list[0]; i>=2; i--) {
2575 	v = list[i];
2576 	if (v == 0) {
2577 	    continue;
2578 	}
2579 	ret = 1;
2580 	for (t=pmod->t1; t<=pmod->t2; t++) {
2581 	    if ((t == s && Z[v][t] == 0.0) || (t != s && Z[v][t] != 0.0)) {
2582 		ret = 0;
2583 		break;
2584 	    }
2585 	}
2586 	if (ret) {
2587 	    gretl_list_delete_at_pos(list, i);
2588 	    break;
2589 	}
2590     }
2591 
2592     return ret;
2593 }
2594 
copy_ensure_const(const int * orig)2595 static int *copy_ensure_const (const int *orig)
2596 {
2597     int *list = gretl_list_new(orig[0] + 1);
2598     int i;
2599 
2600     if (list != NULL) {
2601 	list[1] = orig[1];
2602 	list[2] = 0;
2603 	for (i=3; i<=list[0]; i++) {
2604 	    list[i] = orig[i-1];
2605 	}
2606     }
2607 
2608     return list;
2609 }
2610 
2611 /* get_hsk_weights: take the residuals from the model @pmod, square
2612    them and take logs; find the fitted values for this series using an
2613    auxiliary regression including the original independent variables
2614    (and their squares, if not given OPT_N); exponentiate the fitted
2615    values; and add the resulting series to the data set.
2616 */
2617 
get_hsk_weights(MODEL * pmod,DATASET * dset,gretlopt opt)2618 static int get_hsk_weights (MODEL *pmod, DATASET *dset, gretlopt opt)
2619 {
2620     int oldv = dset->v;
2621     int t, t1 = dset->t1, t2 = dset->t2;
2622     int *lcpy = NULL;
2623     int *auxlist = NULL;
2624     int err = 0, shrink = 0;
2625     double xx;
2626     MODEL aux;
2627 
2628     if (pmod->ifc) {
2629 	lcpy = gretl_list_copy(pmod->list);
2630     } else {
2631 	lcpy = copy_ensure_const(pmod->list);
2632     }
2633 
2634     if (lcpy == NULL) {
2635 	return E_ALLOC;
2636     }
2637 
2638     /* allocate space for an additional variable */
2639     if (dataset_add_series(dset, 1)) {
2640 	free(lcpy);
2641 	return E_ALLOC;
2642     }
2643 
2644     /* add transformed residuals to data set */
2645     for (t=0; t<dset->n; t++) {
2646 	xx = pmod->uhat[t];
2647 	if (na(xx)) {
2648 	    dset->Z[oldv][t] = NADBL;
2649 	} else if (xx == 0.0) {
2650 	    if (observation_is_dummied(pmod, lcpy, (const double **) dset->Z, t)) {
2651 		dset->Z[oldv][t] = NADBL;
2652 	    } else {
2653 		fprintf(stderr, "hsk: got a zero residual, could be a problem!\n");
2654 		dset->Z[oldv][t] = -1.0e16; /* ?? */
2655 	    }
2656 	} else {
2657 	    dset->Z[oldv][t] = log(xx * xx);
2658 	}
2659     }
2660 
2661     if (opt & OPT_N) {
2662 	/* --no-squares option */
2663 	auxlist = lcpy;
2664     } else {
2665 	/* build regression list, adding the squares of the original
2666 	   independent vars */
2667 	auxlist = augment_regression_list(lcpy, AUX_SQ, dset, &err);
2668 	if (err) {
2669 	    return err;
2670 	}
2671     }
2672 
2673     auxlist[1] = oldv; /* the newly added log(uhat-squared) */
2674 
2675     dset->t1 = pmod->t1;
2676     dset->t2 = pmod->t2;
2677 
2678     aux = lsq(auxlist, dset, OLS, OPT_A);
2679     err = aux.errcode;
2680     if (err) {
2681 	shrink = dset->v - oldv;
2682     } else {
2683 	/* write into the data set the required weights */
2684 	for (t=aux.t1; t<=aux.t2; t++) {
2685 	    xx = aux.yhat[t];
2686 	    if (na(xx)) {
2687 		dset->Z[oldv][t] = NADBL;
2688 	    } else {
2689 		dset->Z[oldv][t] = 1.0 / exp(xx);
2690 	    }
2691 	}
2692 	shrink = dset->v - oldv - 1;
2693     }
2694 
2695     dset->t1 = t1;
2696     dset->t2 = t2;
2697 
2698     clear_model(&aux);
2699 
2700     if (shrink > 0) {
2701 	dataset_drop_last_variables(dset, shrink);
2702     }
2703 
2704     if (auxlist != lcpy) {
2705 	free(auxlist);
2706     }
2707     free(lcpy);
2708 
2709     return err;
2710 }
2711 
2712 /**
2713  * hsk_model:
2714  * @list: dependent variable plus list of regressors.
2715  * @dset: dataset struct.
2716  * @opt: may include OPT_N to suppress use of squares
2717  * of regressors in the auxiliary regression.
2718  *
2719  * Estimate the model given in @list using a correction for
2720  * heteroskedasticity.
2721  *
2722  * Returns: a #MODEL struct, containing the estimates.
2723  */
2724 
hsk_model(const int * list,DATASET * dset,gretlopt opt)2725 MODEL hsk_model (const int *list, DATASET *dset, gretlopt opt)
2726 {
2727     int i, err;
2728     int orig_nvar = dset->v;
2729     int *hsklist;
2730     MODEL hsk;
2731 
2732     if (dset->Z == NULL) {
2733 	hsk.errcode = E_DATA;
2734 	return hsk;
2735     }
2736 
2737     gretl_error_clear();
2738 
2739     /* run initial OLS */
2740     hsk = lsq(list, dset, OLS, OPT_A);
2741     if (hsk.errcode) {
2742 	return hsk;
2743     }
2744 
2745     /* form weights based on the OLS residuals */
2746     err = get_hsk_weights(&hsk, dset, opt);
2747     if (err) {
2748 	hsk.errcode = err;
2749 	return hsk;
2750     }
2751 
2752     /* allocate regression list for weighted least squares */
2753     hsklist = gretl_list_new(list[0] + 1);
2754     if (hsklist == NULL) {
2755 	hsk.errcode = E_ALLOC;
2756 	return hsk;
2757     }
2758 
2759     /* the last variable in the dataset will be the weight var */
2760     hsklist[1] = dset->v - 1;
2761 
2762     /* put the original dependent variable in at position 2 */
2763     hsklist[2] = list[1];
2764 
2765     /* add the original independent vars into the WLS list */
2766     for (i=3; i<=hsklist[0]; i++) {
2767 	hsklist[i] = list[i-1];
2768     }
2769 
2770     clear_model(&hsk);
2771     hsk = lsq(hsklist, dset, WLS, OPT_NONE);
2772     hsk.ci = HSK;
2773 
2774     if (opt & OPT_N) {
2775 	gretl_model_set_int(&hsk, "no-squares", 1);
2776 	hsk.opt |= OPT_N;
2777     }
2778 
2779     dataset_drop_last_variables(dset, dset->v - orig_nvar);
2780 
2781     free(hsklist);
2782 
2783     return hsk;
2784 }
2785 
print_HET_1(double z,double pval,PRN * prn)2786 static void print_HET_1 (double z, double pval, PRN *prn)
2787 {
2788     pprintf(prn, "\n%s\n", _("Pesaran-Taylor test for heteroskedasticity"));
2789     pprintf(prn, "\n%s: HET_1 = %f,\n", _("Test statistic"), z);
2790     pprintf(prn, "%s = 2 * P(z > %f) = %.3g\n\n",
2791 	    _("with p-value"), z, pval);
2792 }
2793 
print_whites_test(double LM,int df,double pval,gretlopt opt,PRN * prn)2794 static void print_whites_test (double LM, int df, double pval,
2795 			       gretlopt opt, PRN *prn)
2796 {
2797     if (opt & OPT_B) {
2798 	pprintf(prn, "\n%s\n", _("Breusch-Pagan test for heteroskedasticity"));
2799 	if (opt & OPT_R) {
2800 	    pprintf(prn, "%s\n", _("with Koenker robust variance estimator"));
2801 	}
2802 	pprintf(prn, "\n%s: LM = %f,\n", _("Test statistic"), LM);
2803     } else {
2804 	pprintf(prn, "\n%s", _("White's test for heteroskedasticity"));
2805 	if (opt & OPT_X) {
2806 	    pprintf(prn, " (%s)\n", _("squares only"));
2807 	} else {
2808 	    pputc(prn, '\n');
2809 	}
2810 	pprintf(prn, "\n%s: TR^2 = %f,\n", _("Test statistic"), LM);
2811     }
2812 
2813     pprintf(prn, "%s = P(%s(%d) > %f) = %f\n\n",
2814 	    _("with p-value"), _("Chi-square"),
2815 	    df, LM, pval);
2816 }
2817 
2818 /* For White's test, see if we have enough degrees of freedom
2819    to add squares -- and if so, if we have enough df to add
2820    cross-products also.
2821 */
2822 
get_whites_aux(const MODEL * pmod,const double ** Z)2823 static int get_whites_aux (const MODEL *pmod, const double **Z)
2824 {
2825     int aux = AUX_WHITE;
2826     int rem = pmod->ncoeff - pmod->ifc - 1;
2827     int nsq = 0, nxpx = 0;
2828     int i, v;
2829 
2830     for (i=2; i<=pmod->list[0]; i++) {
2831 	v = pmod->list[i];
2832 	if (v > 0) {
2833 	    if (!gretl_isdummy(pmod->t1, pmod->t2, Z[v])) {
2834 		nsq++;
2835 	    }
2836 	    nxpx += rem--;
2837 	}
2838     }
2839 
2840     if (pmod->dfd - nsq < 1) {
2841 	aux = AUX_NONE;
2842     } else if (pmod->dfd - nsq - nxpx < 1) {
2843 	aux = AUX_SQ;
2844     }
2845 
2846     return aux;
2847 }
2848 
2849 #define PT_DEBUG 0
2850 
2851 /**
2852  * tsls_hetero_test:
2853  * @pmod: pointer to model.
2854  * @dset: dataset struct.
2855  * @opt: if flags include OPT_S, save results to model; OPT_Q
2856  * means don't print the auxiliary regression.
2857  * @prn: gretl printing struct.
2858  *
2859  * Runs Pesaran and Taylor's (1999) HET_1 test for heteroskedasticity
2860  * on the given tsls model. The statistic is just a t-statistic, so
2861  * under the null it is distributed as a standard normal.
2862  *
2863  * Returns: 0 on successful completion, error code on error.
2864  */
2865 
tsls_hetero_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)2866 static int tsls_hetero_test (MODEL *pmod, DATASET *dset,
2867 			     gretlopt opt, PRN *prn)
2868 {
2869     int pos, newv = dset->v;
2870     int *ptlist = NULL, *testlist = NULL;
2871     int save_t1 = dset->t1;
2872     int save_t2 = dset->t2;
2873     MODEL ptmod;
2874     double x;
2875     int i, h, t;
2876     int err = 0;
2877 
2878     pos = gretl_list_separator_position(pmod->list);
2879     h = pmod->list[0] - pos;
2880 
2881 #if PT_DEBUG
2882     pprintf(prn, "v = %d, h = %d\n", v, h);
2883 #endif
2884 
2885     ptlist = gretl_list_new(h + 1);
2886     testlist = gretl_list_new(3);
2887 
2888     if (ptlist == NULL || testlist == NULL) {
2889 	free(ptlist);
2890 	free(testlist);
2891 	return E_ALLOC;
2892     }
2893 
2894     ptlist[1] = pmod->list[1];
2895     for (i=2; i<=ptlist[0]; i++) {
2896 	ptlist[i] = pmod->list[i + pos - 1];
2897     }
2898 
2899     testlist[1] = newv;
2900     testlist[2] = 0;
2901     testlist[3] = newv + 1;
2902 
2903 #if PT_DEBUG
2904     printlist(ptlist, "ptlist");
2905     printlist(testlist, "testlist");
2906 #endif
2907 
2908     /* reduced form: regress the original dependent variable on all of
2909        the instruments from the original model
2910     */
2911     ptmod = lsq(ptlist, dset, OLS, OPT_A);
2912     err = ptmod.errcode;
2913     if (err) {
2914 	goto bailout;
2915     }
2916 
2917 #if PT_DEBUG
2918     printmodel(&ptmod, dset, OPT_S, prn);
2919 #endif
2920 
2921     /* add two series: (i) the squares of the residuals from the
2922        original model and (ii) the squares of the fitted values from
2923        the auxiliary regression above
2924      */
2925     err = dataset_add_series(dset, 2);
2926     if (err) {
2927 	clear_model(&ptmod);
2928 	goto bailout;
2929     }
2930 
2931     strcpy(dset->varname[newv+1], "yhat^2");
2932 
2933     for (t=pmod->t1; t<=pmod->t2; t++) {
2934 	x = pmod->uhat[t];
2935 	dset->Z[newv][t] = x*x;   /* squared residual */
2936 	x = ptmod.yhat[t];
2937 	dset->Z[newv+1][t] = x*x; /* squared fitted value */
2938     }
2939 
2940     clear_model(&ptmod);
2941 
2942     dset->t1 = pmod->t1;
2943     dset->t2 = pmod->t2;
2944 
2945     /* regress the squared residuals on the squared fitted
2946        values from the reduced-form auxiliary regression
2947     */
2948     ptmod = lsq(testlist, dset, OLS, OPT_A);
2949     err = ptmod.errcode;
2950 
2951     if (!err) {
2952 	double z = fabs(ptmod.coeff[1]) / ptmod.sderr[1];
2953 	double pval = 2.0 * (1 - normal_cdf(z));
2954 
2955 	if (opt & OPT_Q) {
2956 	    print_HET_1(z, pval, prn);
2957 	} else {
2958 	    ptmod.aux = AUX_HET_1;
2959 	    printmodel(&ptmod, dset, OPT_NONE, prn);
2960 	}
2961 
2962 	if (opt & OPT_S) {
2963 	    ModelTest *test = model_test_new(GRETL_TEST_HET_1);
2964 
2965 	    if (test != NULL) {
2966 		model_test_set_teststat(test, GRETL_STAT_Z);
2967 		model_test_set_value(test, z);
2968 		model_test_set_pvalue(test, pval);
2969 		maybe_add_test_to_model(pmod, test);
2970 	    }
2971 	}
2972 
2973 	record_test_result(z, pval);
2974     }
2975 
2976     clear_model(&ptmod);
2977 
2978     dataset_drop_last_variables(dset, 2);
2979 
2980  bailout:
2981 
2982     free(ptlist);
2983     free(testlist);
2984 
2985     dset->t1 = save_t1;
2986     dset->t2 = save_t2;
2987 
2988     return err;
2989 }
2990 
2991 /* Compute LM statistic as per Breusch and Pagan (Econometrica, 1979),
2992    with the option to use the robust variance estimator proposed by
2993    Koenker (Journal of Econometrics, 1981).
2994 */
2995 
get_BP_LM(MODEL * pmod,int * list,MODEL * aux,DATASET * dset,gretlopt opt,int * err)2996 static double get_BP_LM (MODEL *pmod, int *list, MODEL *aux,
2997 			 DATASET *dset, gretlopt opt, int *err)
2998 {
2999     double s2, u2t, gt;
3000     double V = 0.0, LM = NADBL;
3001     int t, v = list[1];
3002 
3003     /* note: no df adjustment */
3004     s2 = pmod->ess / pmod->nobs;
3005 
3006     if (opt & OPT_R) {
3007 	/* calculate robust variance estimate a la Koenker */
3008 	for (t=pmod->t1; t<=pmod->t2; t++) {
3009 	    if (!na(pmod->uhat[t])) {
3010 		u2t = pmod->uhat[t] * pmod->uhat[t];
3011 		V += (u2t - s2) * (u2t - s2);
3012 	    }
3013 	}
3014 	V /= pmod->nobs;
3015     }
3016 
3017     for (t=pmod->t1; t<=pmod->t2; t++) {
3018 	if (na(pmod->uhat[t])) {
3019 	    dset->Z[v][t] = NADBL;
3020 	} else {
3021 	    u2t = pmod->uhat[t] * pmod->uhat[t];
3022 	    gt = (opt & OPT_R)? (u2t - s2) : (u2t / s2);
3023 	    dset->Z[v][t] = gt;
3024 	}
3025     }
3026 
3027     *aux = lsq(list, dset, OLS, OPT_A);
3028     *err = aux->errcode;
3029 
3030     if (!*err) {
3031 	double RSS = aux->tss - aux->ess;
3032 
3033 	if (RSS < 0) {
3034 	    *err = E_DATA;
3035 	} else {
3036 	    if (opt & OPT_R) {
3037 		aux->opt |= OPT_R;
3038 		LM = RSS / V;
3039 	    } else {
3040 		LM = .5 * RSS;
3041 	    }
3042 	    gretl_model_set_double(aux, "BPLM", LM);
3043 	    aux->aux = AUX_BP;
3044 	}
3045     }
3046 
3047     return LM;
3048 }
3049 
ensure_const_copy_list(const MODEL * pmod,int * err)3050 static int *ensure_const_copy_list (const MODEL *pmod, int *err)
3051 {
3052     int *list = NULL;
3053 
3054     if (pmod->ifc) {
3055 	list = gretl_list_copy(pmod->list);
3056     } else {
3057 	int i, nl = pmod->list[0] + 1;
3058 
3059 	list = gretl_list_new(nl);
3060 	if (list != NULL) {
3061 	    list[1] = pmod->list[1];
3062 	    list[2] = 0; /* insert const */
3063 	    for (i=3; i<=nl; i++) {
3064 		list[i] = pmod->list[i-1];
3065 	    }
3066 	}
3067     }
3068 
3069     if (list == NULL) {
3070 	*err = E_ALLOC;
3071     }
3072 
3073     return list;
3074 }
3075 
ensure_const_augment_list(const MODEL * pmod,int aux,DATASET * dset,int * err)3076 static int *ensure_const_augment_list (const MODEL *pmod, int aux,
3077 				       DATASET *dset, int *err)
3078 {
3079     int *list = NULL;
3080 
3081     if (pmod->ifc) {
3082 	list = augment_regression_list(pmod->list, aux, dset, err);
3083     } else {
3084 	int *tmp = ensure_const_copy_list(pmod, err);
3085 
3086 	if (!*err) {
3087 	    list = augment_regression_list(tmp, aux, dset, err);
3088 	    free(tmp);
3089 	}
3090     }
3091 
3092     return list;
3093 }
3094 
3095 /**
3096  * whites_test:
3097  * @pmod: pointer to model.
3098  * @dset: dataset struct.
3099  * @opt: if flags include OPT_S, save results to model; OPT_Q
3100  * means don't print the auxiliary regression; OPT_B means
3101  * do the simpler Breusch-Pagan variant; OPT_I means run
3102  * silently.
3103  * @prn: gretl printing struct.
3104  *
3105  * Runs White's test for heteroskedasticity on the given model.
3106  *
3107  * Returns: 0 on successful completion, error code on error.
3108  */
3109 
whites_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3110 int whites_test (MODEL *pmod, DATASET *dset,
3111 		 gretlopt opt, PRN *prn)
3112 {
3113     int BP = (opt & OPT_B);
3114     int aux = AUX_NONE;
3115     int v = dset->v;
3116     int *list = NULL;
3117     int save_t1 = dset->t1;
3118     int save_t2 = dset->t2;
3119     double zz, LM = 0;
3120     MODEL white;
3121     int t, err = 0;
3122 
3123     if (pmod->ci == IVREG) {
3124 	return tsls_hetero_test(pmod, dset, opt, prn);
3125     }
3126 
3127     if (pmod->list == NULL || gretl_list_has_separator(pmod->list)) {
3128 	return E_NOTIMP;
3129     }
3130 
3131     if (pmod->ci == NLS || pmod->ci == MLE ||
3132 	pmod->ci == GMM || pmod->ci == ARMA ||
3133 	pmod->ci == LOGISTIC) {
3134 	return E_NOTIMP;
3135     }
3136 
3137     if ((err = list_members_replaced(pmod, dset))) {
3138 	return err;
3139     }
3140 
3141     impose_model_smpl(pmod, dset);
3142 
3143     /* what can we do, with the degrees of freedom available? */
3144     if (!BP) {
3145 	if (opt & OPT_X) {
3146 	    aux = AUX_SQ;
3147 	} else {
3148 	    aux = get_whites_aux(pmod, (const double **) dset->Z);
3149 	    if (aux == AUX_NONE) {
3150 		dset->t1 = save_t1;
3151 		dset->t2 = save_t2;
3152 		return E_DF;
3153 	    }
3154 	}
3155     }
3156 
3157     gretl_model_init(&white, dset);
3158 
3159     /* make space in data set */
3160     if (dataset_add_series(dset, 1)) {
3161 	err = E_ALLOC;
3162     }
3163 
3164     if (!err) {
3165 	/* get residuals, square and add to data set */
3166 	for (t=0; t<dset->n; t++) {
3167 	    zz = pmod->uhat[t];
3168 	    if (na(zz)) {
3169 		dset->Z[v][t] = NADBL;
3170 	    } else {
3171 		dset->Z[v][t] = zz * zz;
3172 	    }
3173 	}
3174 	strcpy(dset->varname[v], "uhatsq");
3175     }
3176 
3177     if (!err) {
3178 	if (BP) {
3179 	    list = ensure_const_copy_list(pmod, &err);
3180 	} else {
3181 	    /* build aux regression list, adding squares and
3182 	       (possibly) cross-products of the original
3183 	       independent vars */
3184 	    list = ensure_const_augment_list(pmod, aux, dset, &err);
3185 	}
3186 	if (!err){
3187 	    list[1] = v; /* the newly added uhat-squared */
3188 	}
3189     }
3190 
3191     if (!err) {
3192 	/* run auxiliary regression */
3193 	if (BP) {
3194 	    LM = get_BP_LM(pmod, list, &white, dset, opt, &err);
3195 	} else {
3196 	    white = lsq(list, dset, OLS, OPT_A);
3197 	    err = white.errcode;
3198 	    if (!err) {
3199 		LM = white.rsq * white.nobs;
3200 		white.aux = AUX_WHITE;
3201 	    }
3202 	}
3203     }
3204 
3205     if (!err) {
3206 	int df = white.ncoeff - 1;
3207 	double pval = chisq_cdf_comp(df, LM);
3208 	gretlopt testopt = OPT_NONE;
3209 
3210 	if (BP) {
3211 	    if (opt & OPT_R) {
3212 		/* Koenker robust variant */
3213 		testopt = OPT_R;
3214 	    }
3215 	} else if (opt & OPT_X) {
3216 	    /* squares only */
3217 	    testopt = OPT_X;
3218 	}
3219 
3220 	if (!(opt & OPT_I)) {
3221 	    if (opt & OPT_Q) {
3222 		print_whites_test(LM, df, pval, opt, prn);
3223 	    } else {
3224 		white.opt |= testopt;
3225 		printmodel(&white, dset, OPT_NONE, prn);
3226 	    }
3227 	}
3228 
3229 	if (opt & OPT_S) {
3230 	    ModelTestType mt = BP? GRETL_TEST_BP : GRETL_TEST_WHITES;
3231 	    ModelTest *test = model_test_new(mt);
3232 
3233 	    if (test != NULL) {
3234 		model_test_set_teststat(test, GRETL_STAT_LM);
3235 		model_test_set_dfn(test, df);
3236 		model_test_set_value(test, LM);
3237 		model_test_set_pvalue(test, pval);
3238 		model_test_set_opt(test, testopt);
3239 		maybe_add_test_to_model(pmod, test);
3240 	    }
3241 	}
3242 
3243 	record_test_result(LM, pval);
3244     }
3245 
3246     clear_model(&white);
3247     dataset_drop_last_variables(dset, dset->v - v);
3248     free(list);
3249 
3250     dset->t1 = save_t1;
3251     dset->t2 = save_t2;
3252 
3253     return err;
3254 }
3255 
ar_list_max(const int * list)3256 static int ar_list_max (const int *list)
3257 {
3258     int i, lmax = 0;
3259 
3260     for (i=1; i<=list[0]; i++) {
3261 	if (list[i] > lmax) {
3262 	    lmax = list[i];
3263 	}
3264     }
3265 
3266     return lmax;
3267 }
3268 
3269 /**
3270  * ar_model:
3271  * @list: list of lags plus dependent variable and list of regressors.
3272  * @dset: dataset struct.
3273  * @opt: may contain OPT_O to print covariance matrix.
3274  * @prn: gretl printing struct.
3275  *
3276  * Estimate the model given in @list using the generalized
3277  * Cochrane-Orcutt procedure for autoregressive errors.
3278  *
3279  * Returns: #MODEL struct containing the results.
3280  */
3281 
ar_model(const int * list,DATASET * dset,gretlopt opt,PRN * prn)3282 MODEL ar_model (const int *list, DATASET *dset,
3283 		gretlopt opt, PRN *prn)
3284 {
3285     double diff, ess, tss, xx;
3286     int i, j, vi, t, t1, t2, vc, yno, ryno, iter;
3287     int k, pmax, v = dset->v;
3288     int *arlist = NULL, *rholist = NULL;
3289     int *reglist = NULL, *reglist2 = NULL;
3290     int cpos, nvadd;
3291     MODEL ar, rhomod;
3292 
3293     gretl_error_clear();
3294     gretl_model_init(&ar, dset);
3295     gretl_model_init(&rhomod, dset);
3296 
3297     ar.errcode = gretl_list_split_on_separator(list, &arlist, &reglist);
3298 
3299     if (!ar.errcode && arlist[0] == 1 && arlist[1] == 1) {
3300 	/* special case: ar 1 ; ... => use AR1 apparatus */
3301 	ar = ar1_model(reglist, dset, opt, prn);
3302 	goto bailout;
3303     }
3304 
3305     if (!ar.errcode) {
3306 	reglist2 = gretl_list_new(reglist[0]);
3307 	rholist = gretl_list_new(arlist[0] + 1);
3308 	if (reglist2 == NULL || rholist == NULL) {
3309 	    ar.errcode = E_ALLOC;
3310 	}
3311     }
3312 
3313     if (ar.errcode) {
3314 	goto bailout;
3315     }
3316 
3317     pmax = ar_list_max(arlist);
3318     cpos = reglist_check_for_const(reglist, dset);
3319 
3320     /* first pass: estimate model via OLS: use OPT_M to generate an
3321        error in case of missing values within sample range
3322     */
3323     ar = lsq(reglist, dset, OLS, OPT_A | OPT_M);
3324     if (ar.errcode) {
3325 	goto bailout;
3326     }
3327 
3328     nvadd = arlist[0] + 1 + reglist[0];
3329 
3330     /* allocate space for the uhat terms and transformed data */
3331     if (dataset_add_series(dset, nvadd)) {
3332 	ar.errcode = E_ALLOC;
3333 	goto bailout;
3334     }
3335 
3336     yno = reglist[1];
3337     t1 = ar.t1;
3338     t2 = ar.t2;
3339     rholist[1] = v;
3340 
3341     pprintf(prn, "%s\n\n", _("Generalized Cochrane-Orcutt estimation"));
3342     bufspace(17, prn);
3343     /* xgettext:no-c-format */
3344     pputs(prn, _("ITER             ESS           % CHANGE"));
3345     pputs(prn, "\n\n");
3346 
3347     /* now loop while ess is changing */
3348     diff = 1.0e6;
3349     ess = 0.0;
3350 
3351     for (iter = 1; iter <= 20 && diff > 0.005; iter++) {
3352 	for (t=0; t<dset->n; t++) {
3353 	    if (t < t1 || t > t2) {
3354 		dset->Z[v][t] = NADBL;
3355 	    } else {
3356 		/* special computation of uhat */
3357 		xx = dset->Z[yno][t];
3358 		for (j=0; j<reglist[0]-1; j++) {
3359 		    xx -= ar.coeff[j] * dset->Z[reglist[j+2]][t];
3360 		}
3361 		dset->Z[v][t] = xx;
3362 	    }
3363 	}
3364 	for (i=1; i<=arlist[0]; i++) {
3365 	    k = arlist[i];
3366 	    rholist[1+i] = v + i;
3367 	    for (t=0; t<dset->n; t++) {
3368 		if (t < t1 + k || t > t2) {
3369 		    dset->Z[v+i][t] = NADBL;
3370 		} else {
3371 		    dset->Z[v+i][t] = dset->Z[v][t-k];
3372 		}
3373 	    }
3374 	}
3375 
3376 	/* estimate the rho terms */
3377 	if (iter > 1) {
3378 	    clear_model(&rhomod);
3379 	}
3380 	rhomod = lsq(rholist, dset, OLS, OPT_A);
3381 
3382 	/* and rho-transform the data */
3383 	ryno = vc = v + i;
3384 	for (i=1; i<=reglist[0]; i++) {
3385 	    vi = reglist[i];
3386 	    for (t=0; t<dset->n; t++) {
3387 		if (t < t1 + pmax || t > t2) {
3388 		    dset->Z[vc][t] = NADBL;
3389 		} else {
3390 		    xx = dset->Z[vi][t];
3391 		    for (j=1; j<=arlist[0]; j++) {
3392 			k = arlist[j];
3393 			xx -= rhomod.coeff[j-1] * dset->Z[vi][t-k];
3394 		    }
3395 		    dset->Z[vc][t] = xx;
3396 		}
3397 	    }
3398 	    reglist2[i] = vc++;
3399 	}
3400 
3401 	/* estimate the transformed model */
3402 	clear_model(&ar);
3403 	ar = lsq(reglist2, dset, OLS, OPT_A);
3404 
3405         if (iter > 1) {
3406 	    diff = fabs(100 * (ar.ess - ess) / ess);
3407 	}
3408 
3409 	ess = ar.ess;
3410 	pprintf(prn, "%16c%3d %20f ", ' ', iter, ess);
3411 
3412 	if (iter > 1) {
3413 	    pprintf(prn, "%13.3f\n", diff);
3414 	} else {
3415 	    pputc(prn, '\n');
3416 	}
3417     } /* end "ess changing" loop */
3418 
3419     for (i=0; i<=reglist[0]; i++) {
3420 	ar.list[i] = reglist[i];
3421     }
3422     if (cpos > 0) {
3423 	ar.ifc = 1;
3424     }
3425     if (ar.ifc) {
3426 	if (!gretl_model_get_int(&ar, "effconst")) {
3427 	    ar.dfn -= 1;
3428 	}
3429     }
3430     ar.ci = AR;
3431 
3432     /* special computation of fitted values */
3433     for (t=t1; t<=t2; t++) {
3434 	xx = 0.0;
3435 	for (j=2; j<=reglist[0]; j++) {
3436 	    xx += ar.coeff[j-2] * dset->Z[reglist[j]][t];
3437 	}
3438 	ar.uhat[t] = dset->Z[yno][t] - xx;
3439 	for (j=1; j<=arlist[0]; j++) {
3440 	    if (t - t1 >= arlist[j]) {
3441 		k = arlist[j];
3442 		xx += rhomod.coeff[j-1] * ar.uhat[t-k];
3443 	    }
3444 	}
3445 	ar.yhat[t] = xx;
3446     }
3447 
3448     for (t=t1; t<=t2; t++) {
3449 	ar.uhat[t] = dset->Z[yno][t] - ar.yhat[t];
3450     }
3451 
3452     ar.rsq = gretl_corr_rsq(ar.t1, ar.t2, dset->Z[reglist[1]], ar.yhat);
3453     ar.adjrsq = 1.0 - ((1.0 - ar.rsq) * (ar.nobs - 1.0) / ar.dfd);
3454 
3455     /* special computation of TSS */
3456     xx = gretl_mean(ar.t1, ar.t2, dset->Z[ryno]);
3457     tss = 0.0;
3458     for (t=ar.t1; t<=ar.t2; t++) {
3459 	tss += (dset->Z[ryno][t] - xx) * (dset->Z[ryno][t] - xx);
3460     }
3461     if (ar.dfn == 0) {
3462 	ar.fstt = NADBL;
3463     } else {
3464 	ar.fstt = ar.dfd * (tss - ar.ess) / (ar.dfn * ar.ess);
3465     }
3466     ar.lnL = NADBL;
3467     mle_criteria(&ar, 0);
3468     ar.dw = dwstat(pmax, &ar, dset);
3469     ar.rho = rhohat(pmax, ar.t1, ar.t2, ar.uhat);
3470 
3471     dataset_drop_last_variables(dset, nvadd);
3472 
3473     if (gretl_model_add_arinfo(&ar, pmax)) {
3474 	ar.errcode = E_ALLOC;
3475     } else {
3476 	double *y = dset->Z[reglist[1]];
3477 
3478 	for (i=0; i<=arlist[0]; i++) {
3479 	    ar.arinfo->arlist[i] = arlist[i];
3480 	    if (i >= 1) {
3481 		ar.arinfo->rho[i-1] = rhomod.coeff[i-1];
3482 		ar.arinfo->sderr[i-1] = rhomod.sderr[i-1];
3483 	    }
3484 	}
3485 	ar.ybar = gretl_mean(ar.t1, ar.t2, y);
3486 	ar.sdy = gretl_stddev(ar.t1, ar.t2, y);
3487     }
3488     clear_model(&rhomod);
3489 
3490     if (gretl_model_get_int(&ar, "maxlag") < pmax) {
3491 	gretl_model_set_int(&ar, "maxlag", pmax);
3492     }
3493     set_model_id(&ar, opt);
3494 
3495  bailout:
3496 
3497     free(reglist);
3498     free(reglist2);
3499     free(rholist);
3500     free(arlist);
3501 
3502     return ar;
3503 }
3504 
modelvar_iszero(const MODEL * pmod,const double * x)3505 static int modelvar_iszero (const MODEL *pmod, const double *x)
3506 {
3507     int t;
3508 
3509     for (t=pmod->t1; t<=pmod->t2; t++) {
3510 	if (!model_missing(pmod, t) && floatneq(x[t], 0.0)) {
3511 	    return 0;
3512 	}
3513     }
3514 
3515     return 1;
3516 }
3517 
3518 /* From the position of the first regressor to end of list, omits
3519    variables with all-zero observations and repacks the rest of
3520    them. If the regression in question is not just an auxiliary
3521    one (OPT_A), records the list of dropped variables, if any,
3522    on @pmod.
3523 */
3524 
omitzero(MODEL * pmod,const DATASET * dset,gretlopt opt)3525 static void omitzero (MODEL *pmod, const DATASET *dset,
3526 		      gretlopt opt)
3527 {
3528     int *zlist = NULL;
3529     int i, v, offset;
3530     double x = 0.0;
3531 
3532     offset = (pmod->ci == WLS)? 3 : 2;
3533 
3534     if (!(opt & OPT_A)) {
3535 	zlist = gretl_null_list();
3536     }
3537 
3538     for (i=pmod->list[0]; i>=offset; i--) {
3539         v = pmod->list[i];
3540         if (modelvar_iszero(pmod, dset->Z[v])) {
3541 	    if (zlist != NULL) {
3542 		gretl_list_append_term(&zlist, v);
3543 	    }
3544 	    fprintf(stderr, "Deleting var %d (%s) at list pos %d: all zero\n",
3545 		    v, dset->varname[v], i);
3546 	    gretl_list_delete_at_pos(pmod->list, i);
3547 	}
3548     }
3549 
3550     if (pmod->nwt) {
3551 	int t, wtzero;
3552 
3553 	for (i=pmod->list[0]; i>=offset; i--) {
3554 	    v = pmod->list[i];
3555 	    wtzero = 1;
3556 	    for (t=pmod->t1; t<=pmod->t2; t++) {
3557 		x = dset->Z[v][t] * dset->Z[pmod->nwt][t];
3558 		if (!model_missing(pmod, t) && floatneq(x, 0.0)) {
3559 		    wtzero = 0;
3560 		    break;
3561 		}
3562 	    }
3563 	    if (wtzero) {
3564 		if (zlist != NULL) {
3565 		    gretl_list_append_term(&zlist, v);
3566 		}
3567 		gretl_list_delete_at_pos(pmod->list, i);
3568 	    }
3569 	}
3570     }
3571 
3572     if (zlist != NULL) {
3573 	if (zlist[0] > 0) {
3574 	    gretl_model_set_list_as_data(pmod, "zerolist", zlist);
3575 	} else {
3576 	    free(zlist);
3577 	}
3578     }
3579 }
3580 
3581 /* model_lags: attempt to detect presence of a lagged dependent
3582    variable among the regressors -- if found, return its position
3583    in @list, otherwise return 0. Also write into *pmax the maximum
3584    lag among the regressors.
3585 
3586    FIXME: use inside a function, when the name of the dependent
3587    variable may be changed if it was a function argument?
3588 */
3589 
model_lags(const int * list,const DATASET * dset,int * pmax)3590 static int model_lags (const int *list, const DATASET *dset, int *pmax)
3591 {
3592     int xno, yno = list[1];
3593     int ilag, maxlag = 0;
3594     int i, ret = 0;
3595 
3596     for (i=2; i<=list[0]; i++) {
3597 	xno = list[i];
3598 	if (xno == LISTSEP) {
3599 	    break;
3600 	}
3601 	ilag = series_get_lag(dset, xno);
3602 	if (ilag > 0) {
3603 	    if (ret == 0 && series_get_parent_id(dset, xno) == yno) {
3604 		ret = i;
3605 	    }
3606 	    if (ilag > maxlag) {
3607 		maxlag = ilag;
3608 	    }
3609 	}
3610     }
3611 
3612     *pmax = maxlag;
3613 
3614     return ret;
3615 }
3616 
arch_test_print_simple(int order,double LM,double pval,gretlopt opt,PRN * prn)3617 static void arch_test_print_simple (int order, double LM,
3618 				    double pval, gretlopt opt,
3619 				    PRN *prn)
3620 {
3621     if (opt & OPT_M) {
3622 	/* multi-equation system */
3623 	pprintf(prn, "%s: TR^2 = %f,\n", _("Test statistic"), LM);
3624     } else {
3625 	pprintf(prn, "\n%s %d\n", _("Test for ARCH of order"), order);
3626 	pprintf(prn, "\n%s: TR^2 = %f,\n", _("Test statistic"), LM);
3627     }
3628     pprintf(prn, "%s = P(%s(%d) > %f) = %f\n\n",
3629 	    _("with p-value"), _("Chi-square"),
3630 	    order, LM, pval);
3631 }
3632 
print_arch_regression(const gretl_matrix * b,const gretl_matrix * V,int T,int order,gretlopt opt,PRN * prn)3633 static void print_arch_regression (const gretl_matrix *b,
3634 				   const gretl_matrix *V,
3635 				   int T, int order,
3636 				   gretlopt opt, PRN *prn)
3637 {
3638     int i, k = order + 1;
3639     double *se = malloc(k * sizeof *se);
3640     char **names;
3641 
3642     names = strings_array_new_with_length(k, 16);
3643 
3644     if (se != NULL && names != NULL) {
3645 	if (!(opt & OPT_M)) {
3646 	    /* not multi-equation */
3647 	    pputc(prn, '\n');
3648 	    pprintf(prn, _("Test for ARCH of order %d"), order);
3649 	    pputs(prn, "\n\n");
3650 	}
3651 
3652 	for (i=0; i<k; i++) {
3653 	    se[i] = sqrt(gretl_matrix_get(V, i, i));
3654 	    sprintf(names[i], "alpha(%d)", i);
3655 	}
3656 
3657 	/* is a df correction wanted here? */
3658 	print_coeffs(b->val, se, (const char **) names,
3659 		     k, T - k, ARCH, prn);
3660     }
3661 
3662     free(se);
3663     strings_array_free(names, k);
3664 }
3665 
3666 static void
arch_test_save_or_print(const gretl_matrix * b,const gretl_matrix * V,int T,int order,double rsq,MODEL * pmod,gretlopt opt,PRN * prn)3667 arch_test_save_or_print (const gretl_matrix *b, const gretl_matrix *V,
3668 			 int T, int order, double rsq, MODEL *pmod,
3669 			 gretlopt opt, PRN *prn)
3670 {
3671     double LM = T * rsq;
3672     double pv = chisq_cdf_comp(order, LM);
3673 
3674     record_test_result(LM, pv);
3675 
3676     if (!(opt & OPT_I)) {
3677 	if (V != NULL) {
3678 	    /* V will be NULL if --quiet is in force */
3679 	    print_arch_regression(b, V, T, order, opt, prn);
3680 	}
3681 	if (opt & OPT_Q) {
3682 	    arch_test_print_simple(order, LM, pv, opt, prn);
3683 	}
3684     }
3685 
3686     if ((opt & OPT_S) || !(opt & OPT_Q)) {
3687 	ModelTest *test = model_test_new(GRETL_TEST_ARCH);
3688 
3689 	if (test != NULL) {
3690 	    int heading = (V == NULL);
3691 
3692 	    model_test_set_teststat(test, GRETL_STAT_LM);
3693 	    model_test_set_order(test, order);
3694 	    model_test_set_dfn(test, order);
3695 	    model_test_set_value(test, LM);
3696 	    model_test_set_pvalue(test, pv);
3697 
3698 	    if (!(opt & OPT_Q)) {
3699 		gretl_model_test_print_direct(test, heading, prn);
3700 	    }
3701 
3702 	    if (pmod != NULL && (opt & OPT_S)) {
3703 		maybe_add_test_to_model(pmod, test);
3704 	    } else {
3705 		model_test_free(test);
3706 	    }
3707 	}
3708     }
3709 }
3710 
real_arch_test(const double * u,int T,int order,MODEL * pmod,gretlopt opt,PRN * prn)3711 static int real_arch_test (const double *u, int T, int order,
3712 			   MODEL *pmod, gretlopt opt,
3713 			   PRN *prn)
3714 {
3715     gretl_matrix *X = NULL;
3716     gretl_matrix *y = NULL;
3717     gretl_matrix *b = NULL;
3718     gretl_matrix *V = NULL;
3719     int i, k, s, t;
3720     double x, s2, rsq;
3721     double *ps2 = NULL;
3722     int err = 0;
3723 
3724     gretl_error_clear();
3725 
3726     if (order < 1 || order > T - 1) {
3727 	gretl_errmsg_sprintf(_("Invalid lag order for arch (%d)"), order);
3728 	return E_DATA;
3729     }
3730 
3731     T -= order;
3732     k = order + 1;
3733 
3734     X = gretl_matrix_alloc(T, k);
3735     y = gretl_column_vector_alloc(T);
3736     b = gretl_column_vector_alloc(k);
3737 
3738     if (X == NULL || y == NULL || b == NULL) {
3739 	gretl_matrix_free(X);
3740 	gretl_matrix_free(y);
3741 	gretl_matrix_free(b);
3742 	return E_ALLOC;
3743     }
3744 
3745     if (!(opt & OPT_Q)) {
3746 	V = gretl_matrix_alloc(k, k);
3747 	if (V == NULL) {
3748 	    err = E_ALLOC;
3749 	    goto bailout;
3750 	}
3751 	ps2 = &s2;
3752     }
3753 
3754     /* fill out the matrices with squared residuals
3755        and lags of same */
3756 
3757     for (i=0; i<k; i++) {
3758 	for (t=0; t<T; t++) {
3759 	    s = t + order;
3760 	    if (i == 0) {
3761 		x = u[s];
3762 		gretl_vector_set(y, t, x * x);
3763 		gretl_matrix_set(X, t, i, 1.0);
3764 	    } else {
3765 		x = u[s - i];
3766 		gretl_matrix_set(X, t, i, x * x);
3767 	    }
3768 	}
3769     }
3770 
3771     err = gretl_matrix_ols(y, X, b, V, NULL, ps2);
3772 
3773     if (!err) {
3774 	rsq = gretl_matrix_r_squared(y, X, b, &err);
3775     }
3776 
3777     if (!err) {
3778 	arch_test_save_or_print(b, V, T, order, rsq, pmod,
3779 				opt, prn);
3780     }
3781 
3782  bailout:
3783 
3784     gretl_matrix_free(X);
3785     gretl_matrix_free(y);
3786     gretl_matrix_free(b);
3787     gretl_matrix_free(V);
3788 
3789     return err;
3790 }
3791 
3792 /**
3793  * arch_test:
3794  * @pmod: model to be tested.
3795  * @order: lag order for ARCH process.
3796  * @dset: dataset struct.
3797  * @opt: if flags include OPT_S, save test results to model;
3798  * if OPT_Q, be less verbose; if OPT_I, be silent.
3799  * @prn: gretl printing struct.
3800  *
3801  * Tests @pmod for AutoRegressive Conditional Heteroskedasticity.
3802  *
3803  * Returns: 0 on success, non-zero code on error.
3804  */
3805 
arch_test(MODEL * pmod,int order,const DATASET * dset,gretlopt opt,PRN * prn)3806 int arch_test (MODEL *pmod, int order, const DATASET *dset,
3807 	       gretlopt opt, PRN *prn)
3808 {
3809     int err;
3810 
3811     if (pmod->missmask != NULL) {
3812 	err = E_MISSDATA;
3813     } else {
3814 	const double *u = pmod->uhat + pmod->t1;
3815 
3816 	if (order == 0) {
3817 	    /* use data frequency as default lag order */
3818 	    order = dset->pd;
3819 	}
3820 
3821 	err = real_arch_test(u, pmod->nobs, order, pmod,
3822 			     opt, prn);
3823     }
3824 
3825     return err;
3826 }
3827 
array_arch_test(const double * u,int n,int order,gretlopt opt,PRN * prn)3828 int array_arch_test (const double *u, int n, int order,
3829 		     gretlopt opt, PRN *prn)
3830 {
3831     return real_arch_test(u, n, order, NULL, opt, prn);
3832 }
3833 
3834 /**
3835  * arch_model:
3836  * @list: dependent variable plus list of regressors.
3837  * @order: lag order for ARCH process.
3838  * @dset: dataset struct.
3839  * @opt: may contain OPT_O to print covariance matrix.
3840  *
3841  * Estimate the model given in @list via weighted least squares,
3842  * with the weights based on the predicted error variances from
3843  * an auxiliary regression of the squared residuals on their lagged
3844  * values.
3845  *
3846  * Returns: a #MODEL struct, containing the estimates.
3847  */
3848 
arch_model(const int * list,int order,DATASET * dset,gretlopt opt)3849 MODEL arch_model (const int *list, int order, DATASET *dset,
3850 		  gretlopt opt)
3851 {
3852     MODEL amod;
3853     int *wlist = NULL, *alist = NULL;
3854     int T = sample_size(dset);
3855     int oldv = dset->v;
3856     int i, t, nwt, k, n = dset->n;
3857     double *a = NULL;
3858     double *se = NULL;
3859     double xx;
3860 
3861     gretl_error_clear();
3862     gretl_model_init(&amod, dset);
3863 
3864     if (order == 0) {
3865 	/* use data frequency as default lag order */
3866 	order = dset->pd;
3867     }
3868 
3869     if (order < 1 || order > T - list[0]) {
3870 	amod.errcode = E_UNSPEC;
3871 	gretl_errmsg_sprintf(_("Invalid lag order for arch (%d)"), order);
3872 	return amod;
3873     }
3874 
3875     if (dataset_add_series(dset, order + 1)) {
3876 	amod.errcode = E_ALLOC;
3877     } else {
3878 	alist = gretl_list_new(order + 2);
3879 	if (alist == NULL) {
3880 	    amod.errcode = E_ALLOC;
3881 	}
3882     }
3883 
3884     if (amod.errcode) {
3885 	goto bailout;
3886     }
3887 
3888     /* start list for aux regression */
3889     alist[1] = dset->v - order - 1;
3890     alist[2] = 0;
3891 
3892     /* run initial OLS and get squared residuals */
3893     amod = lsq(list, dset, OLS, OPT_A | OPT_M);
3894     if (amod.errcode) {
3895 	goto bailout;
3896     }
3897 
3898     k = dset->v - order - 1;
3899     strcpy(dset->varname[k], "utsq");
3900     for (t=0; t<n; t++) {
3901 	dset->Z[k][t] = NADBL;
3902     }
3903     for (t=amod.t1; t<=amod.t2; t++) {
3904 	xx = amod.uhat[t];
3905 	dset->Z[k][t] = xx * xx;
3906     }
3907     /* also lags of squared resids */
3908     for (i=1; i<=order; i++) {
3909 	k =  dset->v - order + i - 1;
3910 	alist[i+2] = k;
3911 	sprintf(dset->varname[k], "utsq_%d", i);
3912 	for (t=0; t<n; t++) {
3913 	    dset->Z[k][t] = NADBL;
3914 	}
3915 	for (t=amod.t1+i; t<=amod.t2; t++) {
3916 	    dset->Z[k][t] = dset->Z[alist[1]][t-i];
3917 	}
3918     }
3919 
3920     /* run auxiliary regression */
3921     clear_model(&amod);
3922     amod = lsq(alist, dset, OLS, OPT_A);
3923     if (amod.errcode) {
3924 	goto bailout;
3925     }
3926 
3927     /* steal the coefficients for reference */
3928     a = amod.coeff;
3929     amod.coeff = NULL;
3930     se = amod.sderr;
3931     amod.sderr = NULL;
3932 
3933     /* do weighted estimation */
3934     wlist = gretl_list_new(list[0] + 1);
3935 
3936     if (wlist == NULL) {
3937 	amod.errcode = E_ALLOC;
3938     } else {
3939 	/* construct the weight variable */
3940 	nwt = wlist[1] = dset->v - 1;
3941 	strcpy(dset->varname[nwt], "1/sigma");
3942 
3943 	for (i=2; i<=wlist[0]; i++) {
3944 	    wlist[i] = list[i-1];
3945 	}
3946 
3947 	k = dset->v - order - 1;
3948 
3949 	for (t=amod.t1; t<=amod.t2; t++) {
3950 	    xx = amod.yhat[t];
3951 	    if (xx <= 0.0) {
3952 		xx = dset->Z[k][t];
3953 	    }
3954 	    dset->Z[nwt][t] = 1.0 / xx; /* FIXME is this right? */
3955 	}
3956 
3957 	clear_model(&amod);
3958 	amod = lsq(wlist, dset, WLS, OPT_NONE);
3959 	amod.ci = ARCH;
3960 
3961 	if (!amod.errcode) {
3962 	    gretl_model_set_int(&amod, "arch_order", order);
3963 	    gretl_model_set_data(&amod, "arch_coeff", a,
3964 				 GRETL_TYPE_DOUBLE_ARRAY,
3965 				 (order + 1) * sizeof *a);
3966 	    gretl_model_set_data(&amod, "arch_sderr", se,
3967 				 GRETL_TYPE_DOUBLE_ARRAY,
3968 				 (order + 1) * sizeof *se);
3969 	}
3970     }
3971 
3972  bailout:
3973 
3974     if (alist != NULL) free(alist);
3975     if (wlist != NULL) free(wlist);
3976 
3977     dataset_drop_last_variables(dset, dset->v - oldv);
3978 
3979     return amod;
3980 }
3981 
3982 /**
3983  * lad_model:
3984  * @list: dependent variable plus list of regressors.
3985  * @dset: dataset struct.
3986  * @opt: may include OPT_Q for quiet operation.
3987  *
3988  * Estimate the model given in @list using the method of Least
3989  * Absolute Deviation (LAD).
3990  *
3991  * Returns: a #MODEL struct, containing the estimates.
3992  */
3993 
lad_model(const int * list,DATASET * dset,gretlopt opt)3994 MODEL lad_model (const int *list, DATASET *dset, gretlopt opt)
3995 {
3996     MODEL mod;
3997     int (*lad_driver) (MODEL *, DATASET *, gretlopt);
3998 
3999     /* run an initial OLS to "set the model up" and check for errors.
4000        the lad_driver function will overwrite the coefficients etc.
4001     */
4002 
4003     mod = lsq(list, dset, OLS, OPT_A);
4004 
4005     if (mod.errcode) {
4006         return mod;
4007     }
4008 
4009     lad_driver = get_plugin_function("lad_driver");
4010 
4011     if (lad_driver == NULL) {
4012 	mod.errcode = E_FOPEN;
4013 	return mod;
4014     }
4015 
4016     (*lad_driver) (&mod, dset, opt);
4017     set_model_id(&mod, opt);
4018 
4019     return mod;
4020 }
4021 
4022 /**
4023  * quantreg:
4024  * @tau: vector containing one or more quantile values, in the range
4025  * 0.01 to 0.99.
4026  * @list: model specification: dependent var and regressors.
4027  * @dset: dataset struct.
4028  * @opt: may contain OPT_R for robust standard errors,
4029  * OPT_I to produce confidence intervals.
4030  * @prn: gretl printing struct.
4031  *
4032  * Estimate the model given in @list using the method of
4033  * quantile regression.
4034  *
4035  * Returns: a #MODEL struct, containing the estimates.
4036  */
4037 
quantreg(const gretl_matrix * tau,const int * list,DATASET * dset,gretlopt opt,PRN * prn)4038 MODEL quantreg (const gretl_matrix *tau, const int *list,
4039 		DATASET *dset, gretlopt opt, PRN *prn)
4040 {
4041     MODEL mod;
4042     int (*rq_driver) (const gretl_matrix *, MODEL *, DATASET *,
4043 		      gretlopt, PRN *);
4044     gretlopt olsopt = (OPT_A | OPT_M);
4045 
4046     /* Run an initial OLS to "set the model up" and check for errors.
4047        the driver function will overwrite the coefficients, etc.
4048        For now we make life easier by rejecting within-sample missing
4049        values (OPT_M).
4050     */
4051 
4052     if (opt & OPT_R) {
4053 	olsopt |= OPT_R;
4054     }
4055 
4056     mod = lsq(list, dset, OLS, olsopt);
4057 
4058     if (mod.errcode) {
4059         return mod;
4060     }
4061 
4062     rq_driver = get_plugin_function("rq_driver");
4063 
4064     if (rq_driver == NULL) {
4065 	mod.errcode = E_FOPEN;
4066 	return mod;
4067     }
4068 
4069     (*rq_driver) (tau, &mod, dset, opt, prn);
4070     set_model_id(&mod, opt);
4071 
4072     return mod;
4073 }
4074 
4075 #ifndef WIN32
4076 
gretl_glib_grab_output(const char * prog,char ** sout)4077 static void gretl_glib_grab_output (const char *prog,
4078 				    char **sout)
4079 {
4080     gchar *argv[] = { (gchar *) prog, NULL };
4081     gboolean ok;
4082 
4083     ok = g_spawn_sync(NULL, argv, NULL, G_SPAWN_SEARCH_PATH,
4084 		      NULL, NULL, sout, NULL,
4085 		      NULL, NULL);
4086 
4087     if (!ok && *sout != NULL) {
4088 	g_free(*sout);
4089 	*sout = NULL;
4090     }
4091 }
4092 
4093 #endif
4094 
4095 /*
4096  * get_x12a_maxpd:
4097  *
4098  * Retrieve the highest data frequency handled by X-12-ARIMA,
4099  * which may vary depending on how the program was built.
4100  * This may be relevant when executing the gretl arima
4101  * command with the option to use X-12-ARIMA.
4102  */
4103 
get_x12a_maxpd(void)4104 int get_x12a_maxpd (void)
4105 {
4106     static int n;
4107 
4108     if (n == 0) {
4109 	const char *x12a = gretl_x12_arima();
4110 	char *sout = NULL;
4111 
4112 #ifdef WIN32
4113 	gretl_win32_grab_output(x12a, gretl_dotdir(), &sout);
4114 #else
4115 	gretl_glib_grab_output(x12a, &sout);
4116 #endif
4117 	if (sout != NULL) {
4118 	    char *p = strstr(sout, "PSP = ");
4119 
4120 	    if (p != NULL) {
4121 		n = atoi(p + 6);
4122 	    }
4123 	    free(sout);
4124 	}
4125 	if (n <= 0) {
4126 	    n = 12;
4127 	}
4128     }
4129 
4130     return n;
4131 }
4132 
4133 /**
4134  * arma:
4135  * @list: AR and MA orders, dependent variable, and any exogenous
4136  * regressors.
4137  * @pqlags: list giving specific non-seasonal AR/MA lags (or NULL).
4138  * @dset: dataset struct.
4139  * @opt: option flags.
4140  * @PRN: for printing details of iterations (or NULL).
4141  *
4142  * Calculates ARMA estimates. By default the estimator is
4143  * exact ML, implemented via gretl's Kalman filter in
4144  * conjunction with the BFGS maximizer. Other options:
4145  * if @opt includes OPT_L we use L-BFGS-B rather than standard
4146  * BFGS; OPT_C (incompatible with OPT_L) calls for use of
4147  * conditional ML (BHHH algorithm); and OPT_X requests estimation via
4148  * X-12-ARIMA rather than native code.
4149  *
4150  * If @pqlags is non-NULL it should take the form of two
4151  * gretl sub-lists joined by #LISTSEP: the first sub-list holds
4152  * a set of AR lags and the second a list of MA lags. If only
4153  * AR lags are specified, a single list may be given; if only
4154  * MA lags are specified, use #LISTSEP with a null first sub-list.
4155  *
4156  * If the model specification does not include a constant
4157  * this is added automatically, unless @opt includes OPT_N
4158  * ("no constant").
4159  *
4160  * When the estimation method is native exact ML, two (mutually
4161  * exclusive) flags in @opt may be used to control the estimator
4162  * of the covariance matrix: OPT_G specifies use of the outer
4163  * product of the gradient (OPG), while OPT_H specifies use of
4164  * the (numerical) Hessian. If neither of these flags is given, the
4165  * default is to use the Hessian by preference, but to fall back
4166  * to OPG if computation of the numerical Hessian fails. These
4167  * flags are ignored if estimation is not via native exact ML.
4168  *
4169  * Returns: a #MODEL struct, containing the estimates.
4170  */
4171 
arma(const int * list,const int * pqlags,DATASET * dset,gretlopt opt,PRN * prn)4172 MODEL arma (const int *list, const int *pqlags,
4173 	    DATASET *dset, gretlopt opt,
4174 	    PRN *prn)
4175 {
4176     MODEL (*arma_model) (const int *, const int *,
4177 			 DATASET *, gretlopt, PRN *);
4178     MODEL (*arma_x12_model) (const int *, const int *,
4179 			     const DATASET *,
4180 			     int, gretlopt, PRN *);
4181     MODEL armod;
4182     int err = 0;
4183 
4184     gretl_model_init(&armod, dset);
4185 
4186     err = incompatible_options(opt, OPT_G | OPT_H);
4187     if (!err) {
4188 	err = options_incompatible_with(opt, OPT_X, OPT_R | OPT_S);
4189     }
4190     if (err) {
4191 	armod.errcode = err;
4192 	return armod;
4193     }
4194 
4195     if (opt & OPT_X) {
4196 	int pdmax = get_x12a_maxpd();
4197 
4198 	if ((dset->t2 - dset->t1 + 1) > pdmax * 50) {
4199 	    gretl_errmsg_sprintf(_("X-12-ARIMA can't handle more than %d observations.\n"
4200 				   "Please select a smaller sample."), pdmax * 50);
4201 	    armod.errcode = E_DATA;
4202 	    return armod;
4203 	}
4204 
4205 	arma_x12_model = get_plugin_function("arma_x12_model");
4206 	if (arma_x12_model == NULL) {
4207 	    err = E_FOPEN;
4208 	} else {
4209 	    armod = (*arma_x12_model) (list, pqlags, dset, pdmax, opt, prn);
4210 	}
4211     } else {
4212 	arma_model = get_plugin_function("arma_model");
4213 	if (arma_model == NULL) {
4214 	    err = E_FOPEN;
4215 	} else {
4216 	    armod = (*arma_model) (list, pqlags, dset, opt, prn);
4217 	}
4218     }
4219 
4220     if (err) {
4221 	armod.errcode = err;
4222 	return armod;
4223     }
4224 
4225     set_model_id(&armod, opt);
4226 
4227     return armod;
4228 }
4229 
4230 /**
4231  * garch:
4232  * @list: dependent variable plus arch and garch orders.
4233  * @dset: dataset struct.
4234  * @opt: can specify robust standard errors and VCV.
4235  * @prn: for printing details of iterations (or NULL).
4236  *
4237  * Calculate GARCH estimates.
4238  *
4239  * Returns: a #MODEL struct, containing the estimates.
4240  */
4241 
garch(const int * list,DATASET * dset,gretlopt opt,PRN * prn)4242 MODEL garch (const int *list, DATASET *dset, gretlopt opt,
4243 	     PRN *prn)
4244 {
4245     MODEL mod;
4246     PRN *myprn;
4247     MODEL (*garch_model) (const int *, DATASET *, PRN *,
4248 			  gretlopt);
4249 
4250     gretl_error_clear();
4251 
4252     garch_model = get_plugin_function("garch_model");
4253 
4254     if (garch_model == NULL) {
4255 	gretl_model_init(&mod, dset);
4256 	mod.errcode = E_FOPEN;
4257 	return mod;
4258     }
4259 
4260     if (opt & OPT_V) {
4261 	myprn = prn;
4262     } else {
4263 	myprn = NULL;
4264     }
4265 
4266     mod = (*garch_model) (list, dset, myprn, opt);
4267     set_model_id(&mod, opt);
4268 
4269     return mod;
4270 }
4271 
4272 /**
4273  * mp_ols:
4274  * @list: specification of variables to use.
4275  * @dset: dataset struct.
4276  * @opt: maye include OPT_Q for quiet operation.
4277  *
4278  * Estimate an OLS model using multiple-precision arithmetic
4279  * via the GMP library.
4280  *
4281  * Returns: a #MODEL struct, containing the estimates.
4282  */
4283 
mp_ols(const int * list,DATASET * dset,gretlopt opt)4284 MODEL mp_ols (const int *list, DATASET *dset, gretlopt opt)
4285 {
4286     int (*mplsq)(const int *, const int *, const int *,
4287 		 DATASET *, MODEL *,
4288 		 gretlopt);
4289     MODEL mpmod;
4290 
4291     gretl_model_init(&mpmod, dset);
4292 
4293     mplsq = get_plugin_function("mplsq");
4294     if (mplsq == NULL) {
4295 	mpmod.errcode = 1;
4296 	return mpmod;
4297     }
4298 
4299     if (gretl_list_has_separator(list)) {
4300 	int *base = NULL;
4301 	int *poly = NULL;
4302 
4303 	mpmod.errcode = gretl_list_split_on_separator(list, &base, &poly);
4304 	if (mpmod.errcode == 0 && (base == NULL || poly == NULL)) {
4305 	    mpmod.errcode = E_ARGS;
4306 	} else {
4307 	    mpmod.errcode = (*mplsq)(base, poly, NULL, dset,
4308 				     &mpmod, OPT_S);
4309 	}
4310 	free(base);
4311 	free(poly);
4312     } else {
4313 	mpmod.errcode = (*mplsq)(list, NULL, NULL, dset,
4314 				 &mpmod, OPT_S);
4315     }
4316 
4317     set_model_id(&mpmod, opt);
4318 
4319     return mpmod;
4320 }
4321 
check_panel_options(gretlopt opt)4322 static int check_panel_options (gretlopt opt)
4323 {
4324     if ((opt & OPT_U) && (opt & OPT_H)) {
4325 	/* can't specify random effects + weighted least squares */
4326 	return E_BADOPT;
4327     } else if ((opt & OPT_T) && !(opt & OPT_H)) {
4328 	/* iterate option requires weighted least squares option */
4329 	return E_BADOPT;
4330     } else if ((opt & OPT_N) && !(opt & OPT_U)) {
4331 	/* the Nerlove option requires random effects */
4332 	return E_BADOPT;
4333     } else if ((opt & OPT_C) && !(opt & OPT_P)) {
4334 	/* explicit cluster option only OK with pooled OLS */
4335 	return E_BADOPT;
4336     } else if (incompatible_options(opt, OPT_B | OPT_U | OPT_P)) {
4337 	/* mutually exclusive estimator requests */
4338 	return E_BADOPT;
4339     }
4340 
4341     return 0;
4342 }
4343 
4344 /**
4345  * panel_model:
4346  * @list: regression list (dependent variable plus independent
4347  * variables).
4348  * @dset: dataset struct.
4349  * @opt: can include OPT_Q (quiet estimation), OPT_U (random
4350  * effects model), OPT_H (weights based on the error variance
4351  * for the respective cross-sectional units), OPT_I (iterate,
4352  * only available in conjunction with OPT_H).
4353  * @prn: printing struct (or NULL).
4354  *
4355  * Calculate estimates for a panel dataset, using fixed
4356  * effects (the default), random effects, or weighted
4357  * least squares based on the respective variances for the
4358  * cross-sectional units.
4359  *
4360  * Returns: a #MODEL struct, containing the estimates.
4361  */
4362 
panel_model(const int * list,DATASET * dset,gretlopt opt,PRN * prn)4363 MODEL panel_model (const int *list, DATASET *dset,
4364 		   gretlopt opt, PRN *prn)
4365 {
4366     MODEL mod;
4367 
4368     if (check_panel_options(opt)) {
4369 	gretl_model_init(&mod, dset);
4370 	mod.errcode = E_BADOPT;
4371     } else if (opt & OPT_H) {
4372 	mod = panel_wls_by_unit(list, dset, opt, prn);
4373     } else {
4374 	mod = real_panel_model(list, dset, opt, prn);
4375     }
4376 
4377     return mod;
4378 }
4379 
4380 /**
4381  * ivreg:
4382  * @list: regression list; see the documentation for the "tsls"
4383  * command.
4384  * @dset: dataset struct.
4385  * @opt: option flags.
4386  *
4387  * Calculate IV estimates for a linear model, by default via two-stage
4388  * least squares. The option flags can include OPT_Q for quiet
4389  * operation, and either OPT_L to specify estimation via Limited
4390  * Information Maximum Likelihood or OPT_G for estimation via
4391  * Generalized method of Moments.
4392  *
4393  * Returns: a #MODEL struct, containing the estimates.
4394  */
4395 
ivreg(const int * list,DATASET * dset,gretlopt opt)4396 MODEL ivreg (const int *list, DATASET *dset, gretlopt opt)
4397 {
4398     MODEL mod;
4399     int err;
4400 
4401     gretl_error_clear();
4402 
4403     /* can't have both LIML and GMM options */
4404     err = incompatible_options(opt, OPT_G | OPT_L);
4405 
4406     if (!err) {
4407 	/* two-step, iterate and weights options are GMM-only */
4408 	err = option_prereq_missing(opt, OPT_T | OPT_I | OPT_H, OPT_G);
4409     }
4410 
4411     if (err) {
4412 	gretl_model_init(&mod, dset);
4413 	mod.errcode = err;
4414 	return mod;
4415     }
4416 
4417     if (opt & OPT_L) {
4418 	mod = single_equation_liml(list, dset, opt);
4419     } else if (opt & OPT_G) {
4420 	mod = ivreg_via_gmm(list, dset, opt);
4421     } else {
4422 	mod = tsls(list, dset, opt);
4423     }
4424 
4425     return mod;
4426 }
4427 
4428 /**
4429  * dpd_model:
4430  * @list: regression list.
4431  * @laglist: list of specific lags of the dependent variable, or
4432  * NULL.
4433  * @ispec: may contain additional instrument specification.
4434  * @dset: dataset struct.
4435  * @opt: to be hooked up.
4436  * @prn: printing struct.
4437  *
4438  * This is at present a secret function, for testing only.
4439  *
4440  * Returns: a #MODEL struct, containing the estimates.
4441  */
4442 
dpd_model(const int * list,const int * laglist,const char * ispec,const DATASET * dset,gretlopt opt,PRN * prn)4443 MODEL dpd_model (const int *list, const int *laglist,
4444 		 const char *ispec, const DATASET *dset,
4445 		 gretlopt opt, PRN *prn)
4446 {
4447     MODEL (*dpd_estimate) (const int *, const int *,
4448 			   const char *, const DATASET *,
4449 			   gretlopt, PRN *);
4450     MODEL mod;
4451 
4452     gretl_model_init(&mod, dset);
4453 
4454     dpd_estimate = get_plugin_function("dpd_estimate");
4455     if (dpd_estimate == NULL) {
4456 	mod.errcode = 1;
4457 	return mod;
4458     }
4459 
4460     mod = (*dpd_estimate)(list, laglist, ispec, dset, opt, prn);
4461     set_model_id(&mod, opt);
4462 
4463     return mod;
4464 }
4465 
4466 /**
4467  * anova:
4468  * @list: must contain the response and treatment variables.
4469  * @dset: dataset struct.
4470  * @opt: unused at present.
4471  * @prn: printing struct.
4472  *
4473  * Does one-way or two-way Analysis of Variance (prints table and F-test).
4474  *
4475  * Returns: 0 on success, non-zero on failure.
4476 */
4477 
anova(const int * list,const DATASET * dset,gretlopt opt,PRN * prn)4478 int anova (const int *list, const DATASET *dset,
4479 	   gretlopt opt, PRN *prn)
4480 {
4481     int (*gretl_anova) (const int *, const DATASET *,
4482 			gretlopt, PRN *);
4483 
4484     gretl_anova = get_plugin_function("gretl_anova");
4485     if (gretl_anova == NULL) {
4486 	return 1;
4487     }
4488 
4489     return (*gretl_anova)(list, dset, opt, prn);
4490 }
4491