1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 /* estimate.c - basic gretl estimation procedures */
21
22 #include "libgretl.h"
23 #include "qr_estimate.h"
24 #include "gretl_panel.h"
25 #include "libset.h"
26 #include "compat.h"
27 #include "missing_private.h"
28 #include "estim_private.h"
29 #include "system.h"
30 #include "tsls.h"
31 #include "nls.h"
32
33 #ifdef WIN32
34 # include "gretl_win32.h"
35 #endif
36
37 /**
38 * SECTION:estimate
39 * @short_description: estimation of regression models
40 * @title: Estimation
41 * @include: gretl/libgretl.h
42 *
43 * Most libgretl functions that estimate regression models are
44 * collected here.
45 *
46 * Please note that most of these functions return a #MODEL
47 * struct -- literally a struct, not a pointer to one.
48 * To handle such a return value in C you should either (a)
49 * declare a MODEL and assign to it, or (b) allocate a MODEL
50 * "on the heap" and then assign to its content using the
51 * indirection operator. The code fragment below illustrates
52 * both approaches.
53 *
54 * <informalexample><programlisting>
55 * #include <gretl/libgretl.h>
56 *
57 * int ols_using_gretl (const int *list, DATASET *dset,
58 * PRN *prn)
59 * {
60 * MODEL model;
61 * MODEL *pmod;
62 * int err;
63 *
64 * // method (a)
65 * model = lsq(list, dset, OLS, OPT_NONE);
66 * err = model.errcode;
67 * if (!err) {
68 * printmodel(&model, dset, OPT_NONE, prn);
69 * }
70 * clear_model(&model);
71 *
72 * // method (b)
73 * pmod = gretl_model_new();
74 * *pmod = lsq(list, dset, OLS, OPT_NONE);
75 * err = pmod->errcode;
76 * if (!err) {
77 * printmodel(pmod, dset, OPT_NONE, prn);
78 * }
79 * gretl_model_free(pmod);
80 *
81 * return err;
82 * }
83 * </programlisting></informalexample>
84 *
85 */
86
87 /* Comment on 'TINY': It's the minimum value for 'test' (see below)
88 that libgretl's Cholesky decomposition routine will accept before
89 rejecting a data matrix as too highly collinear. If you set it too
90 high, data sets for which Cholesky could produce reasonable
91 estimates will be rejected. If you set it too low (and 100 *
92 DBL_EPSILON is definitely too low), gretl will produce more or less
93 worthless coefficient estimates when given highly collinear data.
94 Before making a permanent change to the value of TINY, check how
95 gretl does on the NIST reference data sets for linear regression
96 and ensure you're not getting any garbage results. The current
97 value enables us to get decent results on the NIST nonlinear
98 regression test suite; it might be a bit too low for some contexts.
99 */
100
101 #define TINY 8.0e-09 /* was 2.1e-09 (last changed 2007/01/20) */
102 #define SMALL 2.0e-08 /* threshold for printing a warning for collinearity */
103 #define YBARZERO 0.5e-14 /* threshold for treating mean of dependent
104 variable as effectively zero */
105 #define ESSZERO 1.0e-22 /* threshold for considering a tiny error-sum-of-
106 squares value to be effectively zero */
107
108 #define XPX_DEBUG 0
109
110 static void regress (MODEL *pmod, double *xpy,
111 double ysum, double ypy,
112 const DATASET *dset,
113 double rho, gretlopt opt);
114 static void omitzero (MODEL *pmod, const DATASET *dset,
115 gretlopt opt);
116 static int model_lags (const int *list, const DATASET *dset, int *pmax);
117 static double estimate_rho (const int *list, DATASET *dset,
118 gretlopt opt, PRN *prn, int *err);
119
120 /* compute statistics for the dependent variable in a model */
121
model_depvar_stats(MODEL * pmod,DATASET * dset)122 static void model_depvar_stats (MODEL *pmod, DATASET *dset)
123 {
124 double xx, sum = 0.0;
125 int yno = pmod->list[1];
126 int t, zw = 0;
127
128 if (pmod->ci == WLS && gretl_model_get_int(pmod, "wt_zeros")) {
129 /* WLS with some zero weights */
130 zw = 1;
131 }
132
133 pmod->ybar = pmod->sdy = NADBL;
134
135 if (pmod->nobs <= 0) {
136 return;
137 }
138
139 for (t=pmod->t1; t<=pmod->t2; t++) {
140 if (zw && dset->Z[pmod->nwt][t] == 0.0) {
141 continue;
142 }
143 if (!model_missing(pmod, t)) {
144 sum += dset->Z[yno][t];
145 }
146 }
147
148 pmod->ybar = sum / pmod->nobs;
149
150 sum = 0.0;
151 for (t=pmod->t1; t<=pmod->t2; t++) {
152 if (zw && dset->Z[pmod->nwt][t] == 0.0) {
153 continue;
154 }
155 if (!model_missing(pmod, t)) {
156 sum += (dset->Z[yno][t] - pmod->ybar);
157 }
158 }
159
160 pmod->ybar = pmod->ybar + sum / pmod->nobs;
161
162 if (fabs(pmod->ybar) < YBARZERO) {
163 pmod->ybar = 0.0;
164 }
165
166 sum = 0.0;
167 for (t=pmod->t1; t<=pmod->t2; t++) {
168 if (zw && dset->Z[pmod->nwt][t] == 0.0) {
169 continue;
170 }
171 if (!model_missing(pmod, t)) {
172 xx = dset->Z[yno][t] - pmod->ybar;
173 sum += xx * xx;
174 }
175 }
176
177 sum = (pmod->nobs > 1)? sum / (pmod->nobs - 1) : 0.0;
178
179 pmod->sdy = (sum >= 0)? sqrt(sum) : NADBL;
180 }
181
182 /* determine the degrees of freedom for a model */
183
get_model_df(MODEL * pmod)184 static int get_model_df (MODEL *pmod)
185 {
186 int err = 0;
187
188 pmod->ncoeff = pmod->list[0] - 1;
189 pmod->dfd = pmod->nobs - pmod->ncoeff;
190
191 if (pmod->dfd < 0) {
192 pmod->errcode = E_DF;
193 gretl_errmsg_sprintf(_("No. of obs (%d) is less than no. "
194 "of parameters (%d)"), pmod->nobs,
195 pmod->ncoeff);
196 err = 1;
197 } else {
198 pmod->dfn = pmod->ncoeff - pmod->ifc;
199 }
200
201 return err;
202 }
203
204 #define LDDEBUG 0
205
transcribe_ld_vcv(MODEL * targ,MODEL * src)206 static int transcribe_ld_vcv (MODEL *targ, MODEL *src)
207 {
208 int nv = targ->ncoeff;
209 int nxpx = (nv * nv + nv) / 2;
210 int i, j, err = 0;
211
212 err = makevcv(src, src->sigma);
213
214 if (!err && targ->vcv == NULL) {
215 targ->vcv = malloc(nxpx * sizeof *targ->vcv);
216 if (targ->vcv == NULL) {
217 err = E_ALLOC;
218 }
219 }
220
221 if (!err) {
222 for (i=0; i<nv; i++) {
223 for (j=i; j<nv; j++) {
224 targ->vcv[ijton(i, j, nv)] =
225 src->vcv[ijton(i, j, src->ncoeff)];
226 }
227 }
228 }
229
230 return err;
231 }
232
233 /* Calculate consistent standard errors (and VCV matrix) when doing
234 AR(1) estimation of a model with lagged dependent variable. See
235 Ramanathan, Introductory Econometrics, 5e, p. 450.
236 */
237
238 static int
ldepvar_std_errors(MODEL * pmod,DATASET * dset)239 ldepvar_std_errors (MODEL *pmod, DATASET *dset)
240 {
241 MODEL emod;
242 const double *x;
243 int orig_t1 = dset->t1;
244 int orig_t2 = dset->t2;
245 double rho = gretl_model_get_double(pmod, "rho_gls");
246 int origv = dset->v;
247 int vnew = pmod->list[0] + 1 - pmod->ifc;
248 int *list;
249 int vi, vm;
250 int i, t;
251 int err = 0;
252
253 #if LDDEBUG
254 PRN *prn = gretl_print_new(GRETL_PRINT_STDOUT, NULL);
255 printlist(pmod->list, "pmod->list");
256 printf("vnew = %d\n", vnew);
257 printf("rho = %g\n", rho);
258 #endif
259
260 list = gretl_list_new(vnew + pmod->ifc);
261 if (list == NULL) {
262 pmod->errcode = E_ALLOC;
263 return 1;
264 }
265
266 err = dataset_add_series(dset, vnew);
267 if (err) {
268 free(list);
269 pmod->errcode = E_ALLOC;
270 return 1;
271 }
272
273 vi = origv;
274
275 /* dependent var is residual from original model */
276 for (t=0; t<dset->n; t++) {
277 dset->Z[vi][t] = pmod->uhat[t];
278 }
279 strcpy(dset->varname[vi], "eps");
280 list[1] = vi++;
281
282 /* indep vars are rho-differenced vars from original model */
283 for (i=2; i<=pmod->list[0]; i++) {
284 vm = pmod->list[i];
285 if (vm == 0) {
286 list[i] = 0;
287 continue;
288 }
289 sprintf(dset->varname[vi], "%.6s_r", dset->varname[vm]);
290 x = dset->Z[vm];
291 for (t=0; t<dset->n; t++) {
292 if (t == 0 || na(x[t]) || na(x[t-1])) {
293 dset->Z[vi][t] = NADBL;
294 } else {
295 dset->Z[vi][t] = x[t] - rho * x[t-1];
296 }
297 }
298 list[i] = vi++;
299 }
300
301 /* last indep var is lagged u-hat */
302 for (t=0; t<dset->n; t++) {
303 if (t == 0) {
304 dset->Z[vi][t] = NADBL;
305 } else {
306 dset->Z[vi][t] = dset->Z[pmod->list[1]][t-1];
307 }
308 if (na(dset->Z[vi][t])) {
309 continue;
310 }
311 for (i=0; i<pmod->ncoeff; i++) {
312 x = dset->Z[pmod->list[i+2]];
313 if (na(x[t-1])) {
314 dset->Z[vi][t] = NADBL;
315 break;
316 } else {
317 dset->Z[vi][t] -= pmod->coeff[i] * x[t-1];
318 }
319 }
320 }
321
322 list[list[0]] = vi;
323 strcpy(dset->varname[vi], "uhat_1");
324
325 dset->t1 = pmod->t1;
326 dset->t2 = pmod->t2;
327
328 emod = lsq(list, dset, OLS, OPT_A);
329 if (emod.errcode) {
330 err = emod.errcode;
331 } else {
332 #if LDDEBUG
333 printmodel(&emod, dset, OPT_NONE, prn);
334 gretl_print_destroy(prn);
335 #endif
336 for (i=0; i<pmod->ncoeff; i++) {
337 pmod->sderr[i] = emod.sderr[i];
338 }
339
340 err = transcribe_ld_vcv(pmod, &emod);
341 }
342
343 clear_model(&emod);
344
345 free(list);
346 dataset_drop_last_variables(dset, vnew);
347
348 dset->t1 = orig_t1;
349 dset->t2 = orig_t2;
350
351 if (err && !pmod->errcode) {
352 pmod->errcode = err;
353 }
354
355 return err;
356 }
357
358 /* special computation of statistics for autoregressive models */
359
compute_ar_stats(MODEL * pmod,const DATASET * dset,double rho)360 static int compute_ar_stats (MODEL *pmod, const DATASET *dset,
361 double rho)
362 {
363 int i, v, t, yno = pmod->list[1];
364 int pwe = (pmod->opt & OPT_P);
365 double x, pw1 = 0.0;
366
367 if (pwe) {
368 pw1 = sqrt(1.0 - rho * rho);
369 }
370
371 for (t=pmod->t1; t<=pmod->t2; t++) {
372 if (t == pmod->t1 && pwe) {
373 x = pw1 * dset->Z[yno][t];
374 for (i=pmod->ifc; i<pmod->ncoeff; i++) {
375 v = pmod->list[i+2];
376 x -= pmod->coeff[i] * pw1 * dset->Z[v][t];
377 }
378 if (pmod->ifc) {
379 x -= pw1 * pmod->coeff[0];
380 }
381 } else {
382 x = dset->Z[yno][t] - rho*dset->Z[yno][t-1];
383 for (i=0; i<pmod->ncoeff; i++) {
384 v = pmod->list[i+2];
385 x -= pmod->coeff[i] * (dset->Z[v][t] - rho*dset->Z[v][t-1]);
386 }
387 }
388 pmod->uhat[t] = x;
389 pmod->yhat[t] = dset->Z[yno][t] - x;
390 }
391
392 pmod->rsq = gretl_corr_rsq(pmod->t1, pmod->t2, dset->Z[yno], pmod->yhat);
393 pmod->adjrsq =
394 1.0 - ((1.0 - pmod->rsq) * (pmod->t2 - pmod->t1) /
395 (double) pmod->dfd);
396
397 return 0;
398 }
399
400 /* calculation of WLS stats (in agreement with GNU R) */
401
get_wls_stats(MODEL * pmod,const DATASET * dset,gretlopt opt)402 static void get_wls_stats (MODEL *pmod, const DATASET *dset,
403 gretlopt opt)
404 {
405 double x, dy, wmean = 0.0, wsum = 0.0;
406 int yno = pmod->list[1];
407 int t;
408
409 for (t=pmod->t1; t<=pmod->t2; t++) {
410 if (model_missing(pmod, t)) {
411 continue;
412 }
413 if (dset->Z[pmod->nwt][t] > 0) {
414 wmean += dset->Z[pmod->nwt][t] * dset->Z[yno][t];
415 wsum += dset->Z[pmod->nwt][t];
416 }
417 }
418
419 wmean /= wsum;
420 x = 0.0;
421
422 for (t=pmod->t1; t<=pmod->t2; t++) {
423 if (model_missing(pmod, t) || dset->Z[pmod->nwt][t] == 0.0) {
424 continue;
425 }
426 dy = dset->Z[yno][t] - wmean;
427 x += dset->Z[pmod->nwt][t] * dy * dy;
428 }
429
430 if (!(opt & OPT_R)) {
431 pmod->fstt = ((x - pmod->ess) * pmod->dfd) / (pmod->dfn * pmod->ess);
432 }
433 pmod->rsq = (1 - (pmod->ess / x));
434 pmod->adjrsq = 1 - ((1 - pmod->rsq) * (pmod->nobs - 1)/pmod->dfd);
435 }
436
fix_wls_values(MODEL * pmod,const DATASET * dset)437 static void fix_wls_values (MODEL *pmod, const DATASET *dset)
438 {
439 int dwt = gretl_model_get_int(pmod, "wt_dummy");
440 double yh, ess_orig = 0.0;
441 double sw, sigma_orig;
442 int t, i, v;
443
444 for (t=pmod->t1; t<=pmod->t2; t++) {
445 if (model_missing(pmod, t)) {
446 continue;
447 }
448 if (dset->Z[pmod->nwt][t] == 0.0) {
449 yh = 0.0;
450 for (i=0; i<pmod->ncoeff; i++) {
451 v = pmod->list[i+2];
452 yh += pmod->coeff[i] * dset->Z[v][t];
453 }
454 pmod->yhat[t] = yh;
455 v = pmod->list[1];
456 pmod->uhat[t] = dset->Z[v][t] - yh;
457 } else if (!dwt) {
458 sw = sqrt(dset->Z[pmod->nwt][t]);
459 pmod->yhat[t] /= sw;
460 pmod->uhat[t] /= sw;
461 ess_orig += pmod->uhat[t] * pmod->uhat[t];
462 }
463 }
464
465 if (!dwt) {
466 sigma_orig = sqrt(ess_orig / pmod->dfd);
467 gretl_model_set_double(pmod, "ess_orig", ess_orig);
468 gretl_model_set_double(pmod, "sigma_orig", sigma_orig);
469 }
470 }
471
472 /* drop the weight var from the list of regressors (WLS) */
473
dropwt(int * list)474 static void dropwt (int *list)
475 {
476 int i;
477
478 list[0] -= 1;
479 for (i=1; i<=list[0]; i++) {
480 list[i] = list[i+1];
481 }
482 }
483
model_missval_count(const MODEL * pmod)484 static int model_missval_count (const MODEL *pmod)
485 {
486 int mc = 0;
487
488 if (pmod->missmask != NULL) {
489 int t;
490
491 for (t=pmod->t1; t<=pmod->t2; t++) {
492 if (pmod->missmask[t] == '1') {
493 mc++;
494 }
495 }
496 }
497
498 return mc;
499 }
500
wls_usable_obs(MODEL * pmod,const DATASET * dset)501 static int wls_usable_obs (MODEL *pmod, const DATASET *dset)
502 {
503 int t, n = pmod->nobs;
504
505 for (t=pmod->t1; t<=pmod->t2; t++) {
506 if (pmod->missmask[t] == '1') {
507 n--;
508 } else if (dset->Z[pmod->nwt][t] == 0) {
509 n--;
510 }
511 }
512
513 return n;
514 }
515
516 #define SMPL_DEBUG 0
517
518 static int
lsq_check_for_missing_obs(MODEL * pmod,gretlopt opts,DATASET * dset,int * misst)519 lsq_check_for_missing_obs (MODEL *pmod, gretlopt opts, DATASET *dset,
520 int *misst)
521 {
522 int ref_mask = reference_missmask_present();
523 int missv = 0;
524 int reject_missing = 0;
525
526 #if SMPL_DEBUG
527 fprintf(stderr, "lsq_check_for_missing_obs: ref_mask = %d\n",
528 ref_mask);
529 #endif
530
531 if (ref_mask) {
532 int err = apply_reference_missmask(pmod);
533
534 /* If there was a reference mask present, it was put there
535 as part of a hypothesis test on some original model, and
536 it should be respected in estimation of this model */
537
538 if (err) {
539 pmod->errcode = E_ALLOC;
540 return 1;
541 } else {
542 return 0;
543 }
544 }
545
546 /* can't do HAC VCV with missing obs in middle */
547 if ((opts & OPT_R) && dataset_is_time_series(dset) &&
548 !libset_get_bool(FORCE_HC)) {
549 reject_missing = 1;
550 }
551
552 if (opts & OPT_M) {
553 reject_missing = 1;
554 }
555
556 if (reject_missing) {
557 /* reject missing obs within adjusted sample */
558 missv = model_adjust_sample(pmod, dset->n,
559 (const double **) dset->Z,
560 misst);
561 } else {
562 /* we'll try to work around missing obs */
563 missv = model_adjust_sample(pmod, dset->n,
564 (const double **) dset->Z,
565 NULL);
566 }
567
568 #if SMPL_DEBUG
569 if (1) {
570 char t1s[OBSLEN], t2s[OBSLEN];
571 int misscount = model_missval_count(pmod);
572
573 ntolabel(t1s, pmod->t1, dset);
574 ntolabel(t2s, pmod->t2, dset);
575 fprintf(stderr, "*** after adjustment, t1=%d (%s), t2=%d (%s)\n",
576 pmod->t1, t1s, pmod->t2, t2s);
577 fprintf(stderr, "Valid observations in range = %d\n",
578 pmod->t2 - pmod->t1 + 1 - misscount);
579 fprintf(stderr, "Returning missv = %d\n", missv);
580 }
581 #endif
582
583 return missv;
584 }
585
check_for_lags(MODEL * pmod,const DATASET * dset)586 static int check_for_lags (MODEL *pmod, const DATASET *dset)
587 {
588 int pmax = 0;
589 int ldv = model_lags(pmod->list, dset, &pmax);
590
591 if (pmax > 0) {
592 gretl_model_set_int(pmod, "maxlag", pmax);
593 } else if (gretl_model_get_int(pmod, "maxlag")) {
594 gretl_model_destroy_data_item(pmod, "maxlag");
595 }
596
597 if (ldv) {
598 gretl_model_set_int(pmod, "ldepvar", ldv);
599 } else if (gretl_model_get_int(pmod, "ldepvar")) {
600 gretl_model_destroy_data_item(pmod, "ldepvar");
601 }
602
603 return ldv;
604 }
605
log_depvar_ll(MODEL * pmod,const DATASET * dset)606 static void log_depvar_ll (MODEL *pmod, const DATASET *dset)
607 {
608 char parent[VNAMELEN];
609
610 if (series_is_log(dset, pmod->list[1], parent)) {
611 double jll = pmod->lnL;
612 int t;
613
614 for (t=0; t<dset->n; t++) {
615 if (!na(pmod->uhat[t])) {
616 jll -= dset->Z[pmod->list[1]][t];
617 }
618 }
619 gretl_model_set_double(pmod, "jll", jll);
620 gretl_model_set_string_as_data(pmod,
621 "log-parent",
622 gretl_strdup(parent));
623 }
624 }
625
check_weight_var(MODEL * pmod,const double * w,int * effobs,gretlopt opt)626 static int check_weight_var (MODEL *pmod, const double *w, int *effobs,
627 gretlopt opt)
628 {
629 const char *all_zeros =
630 N_("Weight variable is all zeros, aborting regression");
631 const char *some_neg =
632 N_("Weight variable contains negative values");
633 const char *bad_zeros =
634 N_("Weight variable is not a dummy but contains zeros");
635 int ones = 0, zeros = 0, nobs = 0;
636 int t, is_dummy = 1;
637
638 for (t=pmod->t1; t<=pmod->t2; t++) {
639 if (w[t] < 0.0) {
640 gretl_errmsg_set(_(some_neg));
641 pmod->errcode = E_DATA;
642 return 1;
643 } else if (w[t] > 0.0) {
644 nobs++;
645 if (w[t] == 1.0) {
646 ones++;
647 } else {
648 is_dummy = 0;
649 }
650 } else if (w[t] == 0.0) {
651 zeros++;
652 }
653 }
654
655 if (nobs == 0) {
656 gretl_errmsg_set(_(all_zeros));
657 pmod->errcode = E_DATA;
658 return 1;
659 }
660
661 *effobs = nobs;
662
663 if (is_dummy) {
664 /* the weight var is a dummy */
665 gretl_model_set_int(pmod, "wt_dummy", 1);
666 gretl_model_set_int(pmod, "wt_zeros", zeros);
667 } else if (zeros > 0) {
668 if (opt & OPT_Z) {
669 gretl_model_set_int(pmod, "wt_zeros", zeros);
670 } else {
671 gretl_errmsg_set(_(bad_zeros));
672 pmod->errcode = E_DATA;
673 return 1;
674 }
675 }
676
677 return 0;
678 }
679
maybe_shift_ldepvar(MODEL * pmod,DATASET * dset)680 void maybe_shift_ldepvar (MODEL *pmod, DATASET *dset)
681 {
682 if (gretl_model_get_int(pmod, "ldepvar")) {
683 check_for_lags(pmod, dset);
684 }
685 }
686
687 /*
688 * XTX_XTy:
689 * @list: list of variables in model.
690 * @t1: starting observation.
691 * @t2: ending observation.
692 * @dset: pointer to dataset.
693 * @nwt: ID number of variable used as weight, or 0.
694 * @rho: quasi-differencing coefficent, or 0.0;
695 * @pwe: if non-zero, use Prais-Winsten for first observation.
696 * @xpx: on output, X'X matrix as lower triangle, stacked by columns.
697 * @xpy: on output, X'y vector (but see below).
698 * @ysum: location to recieve \sum y_i, or NULL.
699 * @ypy: location to recieve (scalar) y'y, or NULL.
700 * @mask: missing observations mask, or NULL.
701 *
702 * Calculates X'X and X'y, with various possible transformations
703 * of the original data, depending on @nwt, @rho and @pwe.
704 * If X'y is not required, @xpy can be given as NULL.
705 *
706 * Note: the y-related pointer arguments @xpy, @ysum, and @ypy
707 * form a "package": either all should be given, or all should
708 * be NULL.
709 *
710 * Returns: 0 on success, non-zero on error.
711 */
712
XTX_XTy(const int * list,int t1,int t2,const DATASET * dset,int nwt,double rho,int pwe,double * xpx,double * xpy,double * ysum,double * ypy,const char * mask)713 static int XTX_XTy (const int *list, int t1, int t2,
714 const DATASET *dset, int nwt,
715 double rho, int pwe,
716 double *xpx, double *xpy,
717 double *ysum, double *ypy,
718 const char *mask)
719 {
720 int yno = list[1];
721 int lmin = (xpy != NULL)? 2 : 1;
722 int lmax = list[0];
723 int qdiff = (rho != 0.0);
724 const double *y = NULL;
725 const double *w = NULL;
726 const double *xi = NULL;
727 const double *xj = NULL;
728 double x, pw1;
729 int i, j, t, m;
730 int err = 0;
731
732 /* Prais-Winsten term */
733 if (qdiff && pwe) {
734 pw1 = sqrt(1.0 - rho * rho);
735 } else {
736 pwe = 0;
737 pw1 = 0.0;
738 }
739
740 /* dependent variable */
741 y = dset->Z[yno];
742
743 if (nwt) {
744 /* weight variable */
745 w = dset->Z[nwt];
746 }
747
748 if (xpy != NULL) {
749 *ysum = *ypy = 0.0;
750
751 for (t=t1; t<=t2; t++) {
752 if (masked(mask, t)) {
753 continue;
754 }
755 x = y[t];
756 if (qdiff) {
757 if (pwe && t == t1) {
758 x = pw1 * y[t];
759 } else {
760 x -= rho * y[t-1];
761 }
762 } else if (nwt) {
763 x *= sqrt(w[t]);
764 }
765 *ysum += x;
766 *ypy += x * x;
767 }
768
769 if (*ypy <= 0.0) {
770 /* error condition */
771 return yno;
772 }
773 }
774
775 m = 0;
776
777 if (qdiff) {
778 /* quasi-difference the data */
779 for (i=lmin; i<=lmax; i++) {
780 xi = dset->Z[list[i]];
781 for (j=i; j<=lmax; j++) {
782 xj = dset->Z[list[j]];
783 x = 0.0;
784 for (t=t1; t<=t2; t++) {
785 if (pwe && t == t1) {
786 x += pw1 * xi[t1] * pw1 * xj[t];
787 } else {
788 x += (xi[t] - rho * xi[t-1]) *
789 (xj[t] - rho * xj[t-1]);
790 }
791 }
792 if (i == j && x < DBL_EPSILON) {
793 return E_SINGULAR;
794 }
795 xpx[m++] = x;
796 }
797 if (xpy != NULL) {
798 x = 0.0;
799 for (t=t1; t<=t2; t++) {
800 if (pwe && t == t1) {
801 x += pw1 * y[t] * pw1 * xi[t];
802 } else {
803 x += (y[t] - rho * y[t-1]) *
804 (xi[t] - rho * xi[t-1]);
805 }
806 }
807 xpy[i-2] = x;
808 }
809 }
810 } else if (nwt) {
811 /* weight the data */
812 for (i=lmin; i<=lmax; i++) {
813 xi = dset->Z[list[i]];
814 for (j=i; j<=lmax; j++) {
815 xj = dset->Z[list[j]];
816 x = 0.0;
817 for (t=t1; t<=t2; t++) {
818 if (!masked(mask, t)) {
819 x += w[t] * xi[t] * xj[t];
820 }
821 }
822 if (i == j && x < DBL_EPSILON) {
823 return E_SINGULAR;
824 }
825 xpx[m++] = x;
826 }
827 if (xpy != NULL) {
828 x = 0.0;
829 for (t=t1; t<=t2; t++) {
830 if (!masked(mask, t)) {
831 x += w[t] * y[t] * xi[t];
832 }
833 }
834 xpy[i-2] = x;
835 }
836 }
837 } else if (mask != NULL) {
838 /* plain data, but with missing obs mask */
839 for (i=lmin; i<=lmax; i++) {
840 xi = dset->Z[list[i]];
841 for (j=i; j<=lmax; j++) {
842 xj = dset->Z[list[j]];
843 x = 0.0;
844 for (t=t1; t<=t2; t++) {
845 if (!masked(mask, t)) {
846 x += xi[t] * xj[t];
847 }
848 }
849 if (i == j && x < DBL_EPSILON) {
850 return E_SINGULAR;
851 }
852 xpx[m++] = x;
853 }
854 if (xpy != NULL) {
855 x = 0.0;
856 for (t=t1; t<=t2; t++) {
857 if (!masked(mask, t)) {
858 x += y[t] * xi[t];
859 }
860 }
861 xpy[i-2] = x;
862 }
863 }
864 } else {
865 /* plain data, no missing obs mask */
866 for (i=lmin; i<=lmax; i++) {
867 xi = dset->Z[list[i]];
868 for (j=i; j<=lmax; j++) {
869 xj = dset->Z[list[j]];
870 x = 0.0;
871 for (t=t1; t<=t2; t++) {
872 x += xi[t] * xj[t];
873 }
874 if (i == j && x < DBL_EPSILON) {
875 return E_SINGULAR;
876 }
877 xpx[m++] = x;
878 }
879 if (xpy != NULL) {
880 x = 0.0;
881 for (t=t1; t<=t2; t++) {
882 x += y[t] * xi[t];
883 }
884 xpy[i-2] = x;
885 }
886 }
887 }
888
889 return err;
890 }
891
native_cholesky_regress(MODEL * pmod,const DATASET * dset,gretlopt opt)892 static int native_cholesky_regress (MODEL *pmod, const DATASET *dset,
893 gretlopt opt)
894 {
895 double rho, ysum = 0.0, ypy = 0.0;
896 double *xpy;
897 int k = pmod->ncoeff;
898 int nxpx = k * (k + 1) / 2;
899 int pwe = (pmod->opt & OPT_P);
900 int i;
901
902 if (nxpx == 0) {
903 fprintf(stderr, "problem: nxpx = 0\n");
904 pmod->errcode = E_DATA;
905 return pmod->errcode;
906 }
907
908 xpy = malloc(k * sizeof *xpy);
909 if (xpy == NULL) {
910 pmod->errcode = E_ALLOC;
911 return pmod->errcode;
912 }
913
914 pmod->xpx = malloc(nxpx * sizeof *pmod->xpx);
915 pmod->coeff = malloc(k * sizeof *pmod->coeff);
916 pmod->sderr = malloc(k * sizeof *pmod->sderr);
917
918 if (pmod->yhat == NULL) {
919 pmod->yhat = malloc(pmod->full_n * sizeof *pmod->yhat);
920 }
921
922 if (pmod->uhat == NULL) {
923 pmod->uhat = malloc(pmod->full_n * sizeof *pmod->uhat);
924 }
925
926 if (pmod->xpx == NULL || pmod->coeff == NULL ||
927 pmod->sderr == NULL || pmod->yhat == NULL ||
928 pmod->uhat == NULL) {
929 free(xpy);
930 pmod->errcode = E_ALLOC;
931 return pmod->errcode;
932 }
933
934 for (i=0; i<k; i++) {
935 xpy[i] = 0.0;
936 }
937 for (i=0; i<nxpx; i++) {
938 pmod->xpx[i] = 0.0;
939 }
940
941 rho = gretl_model_get_double_default(pmod, "rho_gls", 0.0);
942
943 /* calculate regression results, Cholesky style */
944 pmod->errcode = XTX_XTy(pmod->list, pmod->t1, pmod->t2, dset,
945 pmod->nwt, rho, pwe, pmod->xpx, xpy,
946 &ysum, &ypy, pmod->missmask);
947
948 #if XPX_DEBUG
949 for (i=0; i<k; i++) {
950 fprintf(stderr, "xpy[%d] = %g\n", i, xpy[i]);
951 }
952 for (i=0; i<nxpx; i++) {
953 fprintf(stderr, "xpx[%d] = %g\n", i, pmod->xpx[i]);
954 }
955 fputc('\n', stderr);
956 #endif
957
958 if (!pmod->errcode) {
959 regress(pmod, xpy, ysum, ypy, dset, rho, opt);
960 }
961
962 free(xpy);
963
964 return pmod->errcode;
965 }
966
gretl_null_regress(MODEL * pmod,const DATASET * dset)967 static int gretl_null_regress (MODEL *pmod, const DATASET *dset)
968 {
969 double yt;
970 int t;
971
972 if (pmod->yhat == NULL) {
973 pmod->yhat = malloc(pmod->full_n * sizeof *pmod->yhat);
974 }
975
976 if (pmod->uhat == NULL) {
977 pmod->uhat = malloc(pmod->full_n * sizeof *pmod->uhat);
978 }
979
980 if (pmod->yhat == NULL || pmod->uhat == NULL) {
981 pmod->errcode = E_ALLOC;
982 return pmod->errcode;
983 }
984
985 pmod->nobs = 0;
986 pmod->ifc = 0;
987 pmod->ess = 0.0;
988 pmod->rsq = pmod->adjrsq = 0.0;
989
990 for (t=0; t<pmod->full_n; t++) {
991 yt = dset->Z[pmod->list[1]][t];
992 if (t < pmod->t1 || t > pmod->t2 || na(yt)) {
993 pmod->uhat[t] = pmod->yhat[t] = NADBL;
994 } else {
995 pmod->uhat[t] = yt;
996 pmod->yhat[t] = 0.0;
997 pmod->ess += yt * yt;
998 pmod->nobs += 1;
999 }
1000 }
1001
1002 if (pmod->ess == 0) {
1003 pmod->sigma = 0.0;
1004 } else if (pmod->nobs > 1) {
1005 pmod->sigma = sqrt(pmod->ess / (pmod->nobs - 1));
1006 } else {
1007 pmod->errcode = E_DATA;
1008 }
1009
1010 if (pmod->errcode == 0) {
1011 ls_criteria(pmod);
1012 }
1013
1014 return pmod->errcode;
1015 }
1016
1017 /* check whether the variable with ID @yno is all zeros;
1018 return 1 if so, otherwise return 0 */
1019
depvar_zero(const MODEL * pmod,int yno,const double ** Z)1020 static int depvar_zero (const MODEL *pmod, int yno, const double **Z)
1021 {
1022 double yt;
1023 int t, ret = 1;
1024
1025 for (t=pmod->t1; t<=pmod->t2; t++) {
1026 if (model_missing(pmod, t)) {
1027 continue;
1028 }
1029 yt = Z[yno][t];
1030 if (pmod->nwt) {
1031 yt *= Z[pmod->nwt][t];
1032 }
1033 if (yt != 0.0) {
1034 ret = 0;
1035 break;
1036 }
1037 }
1038
1039 return ret;
1040 }
1041
cholesky_regress(MODEL * pmod,const DATASET * dset,gretlopt opt)1042 static int cholesky_regress (MODEL *pmod, const DATASET *dset,
1043 gretlopt opt)
1044 {
1045 int T = pmod->t2 - pmod->t1 + 1;
1046 int k = pmod->list[0] - 1;
1047
1048 if (k >= 50 || (T >= 250 && k >= 30)) {
1049 return lapack_cholesky_regress(pmod, dset, opt);
1050 } else {
1051 return native_cholesky_regress(pmod, dset, opt);
1052 }
1053 }
1054
1055 /* limited freeing of elements before passing a model
1056 on for QR estimation in the case of (near-)singularity
1057 */
1058
model_free_storage(MODEL * pmod)1059 static void model_free_storage (MODEL *pmod)
1060 {
1061 free(pmod->xpx);
1062 free(pmod->coeff);
1063 free(pmod->sderr);
1064
1065 pmod->xpx = NULL;
1066 pmod->coeff = NULL;
1067 pmod->sderr = NULL;
1068 }
1069
1070 /*
1071 * ar1_lsq:
1072 * @list: model specification.
1073 * @dset: dataset struct.
1074 * @ci: command index, e.g. OLS, AR1.
1075 * @opt: option flags (see lsq and ar1_model).
1076 * @rho: coefficient for quasi-differencing the data, or 0.
1077 *
1078 * Nests lsq() and ar1_model(); is also used internally with ci == OLS
1079 * but rho != 0.0 when estimating rho via, e.g., Cochrane-Orcutt
1080 * iteration.
1081 *
1082 * Returns: model struct containing the estimates.
1083 */
1084
ar1_lsq(const int * list,DATASET * dset,GretlCmdIndex ci,gretlopt opt,double rho)1085 static MODEL ar1_lsq (const int *list, DATASET *dset,
1086 GretlCmdIndex ci, gretlopt opt,
1087 double rho)
1088 {
1089 MODEL mdl;
1090 int effobs = 0;
1091 int missv = 0, misst = 0;
1092 int pwe = (opt & OPT_P);
1093 int nullmod = 0, ldv = 0;
1094 int save_hc = -1;
1095 int yno, i;
1096
1097 gretl_model_init(&mdl, dset);
1098
1099 if (list == NULL || dset == NULL || dset->Z == NULL) {
1100 fprintf(stderr, "E_DATA: lsq: list = %p, dset = %p\n",
1101 (void *) list, (void *) dset);
1102 mdl.errcode = E_DATA;
1103 return mdl;
1104 }
1105
1106 if (opt & OPT_C) {
1107 /* cluster option implies robust */
1108 opt |= OPT_R;
1109 }
1110
1111 if (ci == AR1) {
1112 if (opt & OPT_P) {
1113 /* OPT_P: Prais-Winsten */
1114 mdl.opt |= OPT_P;
1115 } else if (opt & OPT_H) {
1116 /* OPT_H: Hildreth-Lu */
1117 mdl.opt |= OPT_H;
1118 }
1119 } else if (rho != 0.0 && (opt & OPT_P)) {
1120 /* estimation of rho in progress */
1121 mdl.opt |= OPT_P;
1122 }
1123
1124 if (list[0] == 1 && ci == OLS && (opt & OPT_U)) {
1125 /* null model OK */
1126 nullmod = 1;
1127 } else if (list[0] == 1 || dset->v == 1) {
1128 fprintf(stderr, "E_DATA: lsq: list[0] = %d, dset->v = %d\n",
1129 list[0], dset->v);
1130 mdl.errcode = E_DATA;
1131 return mdl;
1132 }
1133
1134 /* preserve a copy of the list supplied, for future reference */
1135 mdl.list = gretl_list_copy(list);
1136 if (mdl.list == NULL) {
1137 mdl.errcode = E_ALLOC;
1138 return mdl;
1139 }
1140
1141 mdl.t1 = dset->t1;
1142 mdl.t2 = dset->t2;
1143 mdl.full_n = dset->n;
1144 mdl.ci = ci;
1145
1146 /* Doing weighted least squares? */
1147 if (ci == WLS) {
1148 check_weight_var(&mdl, dset->Z[mdl.list[1]], &effobs, opt);
1149 if (mdl.errcode) {
1150 return mdl;
1151 }
1152 mdl.nwt = mdl.list[1];
1153 } else {
1154 mdl.nwt = 0;
1155 }
1156
1157 /* sanity check */
1158 if (mdl.t1 < 0 || mdl.t2 > dset->n - 1) {
1159 mdl.errcode = E_DATA;
1160 goto lsq_abort;
1161 }
1162
1163 /* adjust sample range and check for missing obs: this
1164 may set the model errcode */
1165 missv = lsq_check_for_missing_obs(&mdl, opt, dset, &misst);
1166 if (mdl.errcode) {
1167 goto lsq_abort;
1168 }
1169
1170 /* react to presence of unhandled missing obs */
1171 if (missv) {
1172 gretl_errmsg_sprintf(_("Missing value encountered for "
1173 "variable %d, obs %d"),
1174 missv, misst);
1175 mdl.errcode = E_DATA;
1176 return mdl;
1177 }
1178
1179 if (ci == WLS) {
1180 dropwt(mdl.list);
1181 }
1182
1183 yno = mdl.list[1];
1184
1185 /* check for unknown vars in list */
1186 for (i=1; i<=mdl.list[0]; i++) {
1187 if (mdl.list[i] > dset->v - 1) {
1188 mdl.errcode = E_UNKVAR;
1189 goto lsq_abort;
1190 }
1191 }
1192
1193 /* check for zero dependent var */
1194 if (depvar_zero(&mdl, yno, (const double **) dset->Z)) {
1195 mdl.errcode = E_ZERO;
1196 goto lsq_abort;
1197 }
1198
1199 /* drop any vars that are all zero and repack the list */
1200 omitzero(&mdl, dset, opt);
1201
1202 /* if regressor list contains a constant, record this fact and
1203 place it first among the regressors */
1204 mdl.ifc = reglist_check_for_const(mdl.list, dset);
1205
1206 /* Check for presence of lagged dependent variable?
1207 (Don't bother if this is an auxiliary regression.) */
1208 if (!(opt & OPT_A) && !dataset_is_cross_section(dset)) {
1209 ldv = check_for_lags(&mdl, dset);
1210 }
1211
1212 /* AR1: advance the starting observation by one? */
1213 if (rho != 0.0 && !pwe) {
1214 mdl.t1 += 1;
1215 }
1216
1217 mdl.ncoeff = mdl.list[0] - 1;
1218 if (effobs > 0 && mdl.missmask == NULL) {
1219 mdl.nobs = effobs;
1220 } else {
1221 mdl.nobs = mdl.t2 - mdl.t1 + 1;
1222 if (mdl.nwt) {
1223 mdl.nobs = wls_usable_obs(&mdl, dset);
1224 } else if (mdl.missmask != NULL) {
1225 mdl.nobs -= model_missval_count(&mdl);
1226 }
1227 }
1228
1229 /* check degrees of freedom */
1230 if (get_model_df(&mdl)) {
1231 goto lsq_abort;
1232 }
1233
1234 /* if df correction is not wanted, record this fact */
1235 if (opt & OPT_N) {
1236 mdl.opt |= OPT_N;
1237 }
1238
1239 if (dataset_is_time_series(dset)) {
1240 opt |= OPT_T;
1241 }
1242
1243 /* remove Durbin-Watson p-value flag if not appropriate */
1244 if (ldv || !(opt & OPT_T)) {
1245 opt &= ~OPT_I;
1246 }
1247
1248 if (opt & OPT_J) {
1249 /* --jackknife */
1250 save_hc = libset_get_int(HC_VERSION);
1251 libset_set_int(HC_VERSION, 4);
1252 opt |= OPT_R;
1253 mdl.opt |= (OPT_J | OPT_R);
1254 }
1255
1256 if (rho != 0.0) {
1257 gretl_model_set_double(&mdl, "rho_gls", rho);
1258 }
1259
1260 if (nullmod) {
1261 gretl_null_regress(&mdl, dset);
1262 } else if (libset_get_bool(USE_QR)) {
1263 gretl_qr_regress(&mdl, dset, opt);
1264 } else if (opt & (OPT_R | OPT_I)) {
1265 gretl_qr_regress(&mdl, dset, opt);
1266 } else {
1267 cholesky_regress(&mdl, dset, opt);
1268 if (mdl.errcode == E_SINGULAR) {
1269 /* near-perfect collinearity is better handled by QR */
1270 model_free_storage(&mdl);
1271 gretl_qr_regress(&mdl, dset, opt);
1272 }
1273 }
1274
1275 if (save_hc >= 0) {
1276 libset_set_int(HC_VERSION, save_hc);
1277 }
1278
1279 if (mdl.errcode) {
1280 goto lsq_abort;
1281 }
1282
1283 /* get the mean and sd of dep. var. and make available */
1284 model_depvar_stats(&mdl, dset);
1285
1286 /* Doing an autoregressive procedure? */
1287 if (ci == AR1) {
1288 if (compute_ar_stats(&mdl, dset, rho)) {
1289 goto lsq_abort;
1290 }
1291 if (ldv) {
1292 ldepvar_std_errors(&mdl, dset);
1293 if (mdl.errcode) {
1294 goto lsq_abort;
1295 }
1296 }
1297 if ((opt & OPT_H) && (opt & OPT_B)) {
1298 gretl_model_set_int(&mdl, "no-corc", 1);
1299 }
1300 if (gretl_model_get_int(&mdl, "maxlag") < 1) {
1301 gretl_model_set_int(&mdl, "maxlag", 1);
1302 }
1303 }
1304
1305 /* weighted least squares: fix yhat and uhat; add calculation of
1306 ESS and sigma based on unweighted data
1307 */
1308 if (ci == WLS) {
1309 get_wls_stats(&mdl, dset, opt);
1310 fix_wls_values(&mdl, dset);
1311 }
1312
1313 if (mdl.missmask == NULL && (opt & OPT_T) && !(opt & OPT_I)) {
1314 mdl.rho = rhohat(1, mdl.t1, mdl.t2, mdl.uhat);
1315 mdl.dw = dwstat(1, &mdl, dset);
1316 } else if (!(opt & OPT_I)) {
1317 mdl.rho = mdl.dw = NADBL;
1318 }
1319
1320 /* special case: degenerate model */
1321 if (mdl.ncoeff == 1 && mdl.ifc) {
1322 mdl.rsq = mdl.adjrsq = 0.0;
1323 mdl.fstt = NADBL;
1324 }
1325
1326 /* Generate model selection statistics */
1327 if (ci == AR1) {
1328 mdl.lnL = NADBL;
1329 mle_criteria(&mdl, 0);
1330 } else {
1331 ls_criteria(&mdl);
1332 }
1333 if (!(opt & OPT_A) && !na(mdl.lnL)) {
1334 log_depvar_ll(&mdl, dset);
1335 }
1336
1337 lsq_abort:
1338
1339 if (!(opt & (OPT_A | OPT_S))) {
1340 /* if it's not an auxiliary regression, or part of
1341 a system, set an ID number on the model
1342 */
1343 set_model_id(&mdl, opt);
1344 }
1345
1346 return mdl;
1347 }
1348
1349 /**
1350 * lsq:
1351 * @list: dependent variable plus list of regressors.
1352 * @dset: dataset struct.
1353 * @ci: one of the command indices in #LSQ_MODEL.
1354 * @opt: option flags: zero or more of the following --
1355 * OPT_R: compute robust standard errors;
1356 * OPT_A: treat as auxiliary regression (don't bother checking
1357 * for presence of lagged dependent var, don't augment model count);
1358 * OPT_N: don't use degrees of freedom correction for standard
1359 * error of regression;
1360 * OPT_M: reject missing observations within sample range;
1361 * OPT_Z: (internal use) suppress the automatic elimination of
1362 * perfectly collinear variables.
1363 * OPT_X: compute "variance matrix" as just (X'X)^{-1}
1364 * OPT_I: compute Durbin-Watson p-value.
1365 * OPT_U: treat null model as OK.
1366 * OPT_P: (ar1 only): use Prais-Winsten.
1367 * OPT_S: estimation as part of system.
1368 *
1369 * Computes least squares estimates of the model specified by @list,
1370 * using an estimator determined by the value of @ci.
1371 *
1372 * Returns: a #MODEL struct, containing the estimates.
1373 */
1374
lsq(const int * list,DATASET * dset,GretlCmdIndex ci,gretlopt opt)1375 MODEL lsq (const int *list, DATASET *dset, GretlCmdIndex ci,
1376 gretlopt opt)
1377 {
1378 return ar1_lsq(list, dset, ci, opt, 0.0);
1379 }
1380
1381 /**
1382 * ar1_model:
1383 * @list: model specification.
1384 * @dset: dataset struct.
1385 * @opt: option flags: may include OPT_H to use Hildreth-Lu,
1386 * OPT_P to use Prais-Winsten, OPT_B to suppress Cochrane-Orcutt
1387 * fine-tuning of Hildreth-Lu results, OPT_G to generate
1388 * a gnuplot graph of the search in Hildreth-Lu case.
1389 * @prn: gretl printer.
1390 *
1391 * Returns: model struct containing the estimates.
1392 */
1393
ar1_model(const int * list,DATASET * dset,gretlopt opt,PRN * prn)1394 MODEL ar1_model (const int *list, DATASET *dset,
1395 gretlopt opt, PRN *prn)
1396 {
1397 MODEL mdl;
1398 double rho;
1399 int err = 0;
1400
1401 rho = estimate_rho(list, dset, opt, prn, &err);
1402
1403 if (err) {
1404 gretl_model_init(&mdl, NULL);
1405 mdl.errcode = err;
1406 return mdl;
1407 } else {
1408 return ar1_lsq(list, dset, AR1, opt, rho);
1409 }
1410 }
1411
make_ess(MODEL * pmod,const DATASET * dset)1412 static int make_ess (MODEL *pmod, const DATASET *dset)
1413 {
1414 int i, t, yno = pmod->list[1], l0 = pmod->list[0];
1415 int nwt = pmod->nwt;
1416 double yhat, ut;
1417
1418 pmod->ess = 0.0;
1419
1420 for (t=pmod->t1; t<=pmod->t2; t++) {
1421 if (nwt && dset->Z[nwt][t] == 0.0) {
1422 continue;
1423 }
1424 if (model_missing(pmod, t)) {
1425 continue;
1426 }
1427 yhat = 0.0;
1428 for (i=2; i<=l0; i++) {
1429 yhat += pmod->coeff[i-2] * dset->Z[pmod->list[i]][t];
1430 }
1431 ut = dset->Z[yno][t] - yhat;
1432 if (nwt) {
1433 ut *= sqrt(dset->Z[nwt][t]);
1434 }
1435 pmod->ess += ut * ut;
1436 }
1437
1438 return 0;
1439 }
1440
1441 /* The heuristic used here is that the model effectively
1442 contains a constant or intercept if the means of y and
1443 yhat are the same, or in other words the mean residual
1444 is zero -- but allowing for some numerical slop.
1445 */
1446
check_for_effective_const(MODEL * pmod,const DATASET * dset)1447 int check_for_effective_const (MODEL *pmod, const DATASET *dset)
1448 {
1449 double ubar = 0.0, ybar = 0.0;
1450 const double *y = dset->Z[pmod->list[1]];
1451 int t, ret = 0;
1452
1453 for (t=pmod->t1; t<=pmod->t2; t++) {
1454 if (!na(pmod->uhat[t])) {
1455 ubar += pmod->uhat[t];
1456 ybar += y[t];
1457 }
1458 }
1459
1460 ubar = fabs(ubar / pmod->nobs);
1461 ybar = fabs(ybar / pmod->nobs);
1462
1463 /* the calibration below is debatable: watch out for
1464 "wrong" results */
1465
1466 if (ubar < 1.0e-7) {
1467 /* absolute value of mean residual small enough? */
1468 ret = 1;
1469 } else if (ubar < 0.01 && ybar > 1.0e-20) {
1470 /* try scaled variant? */
1471 ret = ubar / ybar < 2.0e-15;
1472 }
1473
1474 #if 0
1475 fprintf(stderr, "check_for_effective_const:\n"
1476 "ubar = %g, ybar = %g, ret = %d\n",
1477 ubar, ybar, ret);
1478 #endif
1479
1480 if (ret) {
1481 gretl_model_set_int(pmod, "effconst", 1);
1482 pmod->dfn -= 1;
1483 } else if (gretl_model_get_int(pmod, "effconst")) {
1484 gretl_model_set_int(pmod, "effconst", 0);
1485 pmod->dfn += 1;
1486 }
1487
1488 return ret;
1489 }
1490
uncentered_r_squared(MODEL * pmod,const DATASET * dset)1491 static void uncentered_r_squared (MODEL *pmod, const DATASET *dset)
1492 {
1493 double y0 = dset->Z[pmod->list[1]][pmod->t1];
1494
1495 /* special computation for the case of TSS = 0, i.e.
1496 the dependent variable is a constant */
1497
1498 if (y0 != 0) {
1499 double den = pmod->nobs * y0 * y0;
1500
1501 pmod->rsq = 1 - (pmod->ess / den);
1502 gretl_model_set_int(pmod, "uncentered", 1);
1503 }
1504 }
1505
compute_r_squared(MODEL * pmod,const DATASET * dset,int * ifc)1506 static void compute_r_squared (MODEL *pmod, const DATASET *dset,
1507 int *ifc)
1508 {
1509 int yno = pmod->list[1];
1510
1511 pmod->rsq = 1.0 - (pmod->ess / pmod->tss);
1512
1513 if (*ifc == 0) {
1514 *ifc = check_for_effective_const(pmod, dset);
1515 }
1516
1517 if (pmod->dfd > 0) {
1518 double yt, den = 0.0;
1519
1520 if (*ifc) {
1521 den = pmod->tss * pmod->dfd;
1522 pmod->adjrsq = 1 - (pmod->ess * (pmod->nobs - 1) / den);
1523 } else {
1524 /* model does not contain a constant */
1525 int t;
1526
1527 for (t=pmod->t1; t<=pmod->t2; t++) {
1528 if (!na(pmod->yhat[t])) {
1529 yt = dset->Z[yno][t];
1530 den += yt * yt;
1531 }
1532 }
1533
1534 /* make the centered R^2 available for output */
1535 gretl_model_set_double(pmod, "centered-R2", pmod->rsq);
1536
1537 /* but flag the fact that the reported R-squared is
1538 in fact uncentered */
1539 gretl_model_set_int(pmod, "uncentered", 1);
1540
1541 /* and make the "official" figure the uncentered R^2,
1542 as per NIST, R, Stata, SPSS, ... */
1543 pmod->rsq = 1 - pmod->ess / den;
1544 pmod->adjrsq =
1545 1.0 - ((1.0 - pmod->rsq) * (pmod->nobs - 1.0) / pmod->dfd);
1546 }
1547 }
1548
1549 if (pmod->rsq < 0.0) {
1550 pmod->rsq = 0.0;
1551 }
1552 }
1553
1554 /*
1555 diaginv: solves for the diagonal elements of X'X inverse.
1556
1557 @xpx = Cholesky-decomposed X'X
1558 @tmp = work array, length >= nv
1559 @diag = diagonal elements of {X'X}^{-1} (output)
1560 @nv = number of regression coefficients = length of diag
1561 */
1562
diaginv(const double * xpx,double * tmp,double * diag,int nv)1563 static void diaginv (const double *xpx, double *tmp, double *diag,
1564 int nv)
1565 {
1566 int kk, l, m, k, i, j;
1567 double d, e;
1568
1569 kk = 0;
1570
1571 for (l=0; l<nv-1; l++) {
1572 d = xpx[kk];
1573 tmp[l] = d;
1574 e = d * d;
1575 m = 0;
1576 if (l > 0) {
1577 for (j=1; j<=l; j++) {
1578 m += nv - j;
1579 }
1580 }
1581 for (i=l+1; i<nv; i++) {
1582 d = 0.0;
1583 k = i + m;
1584 for (j=l; j<i; j++) {
1585 d += tmp[j] * xpx[k];
1586 k += nv - (j+1);
1587 }
1588 d = -d * xpx[k];
1589 tmp[i] = d;
1590 e += d * d;
1591 }
1592 kk += nv - l;
1593 diag[l] = e;
1594 }
1595
1596 diag[nv-1] = xpx[kk] * xpx[kk];
1597 }
1598
1599 /*
1600 cholbeta: in-place Cholesky decomposition of X'X (pmod->xpx) (lower
1601 triangular matrix stacked in columns) plus solution for the
1602 least-squares coefficient estimates.
1603
1604 pmod->xpx = X'X on input and Cholesky decomposition on output
1605 xpy = the X'y vector on input and Cholesky-transformed t
1606 vector on output
1607 rss = location to receive regression sum of squares
1608
1609 The number of floating-point operations is basically 3.5 * nv^2
1610 plus (nv^3) / 3.
1611 */
1612
cholbeta(MODEL * pmod,double * xpy,double * rss)1613 static int cholbeta (MODEL *pmod, double *xpy, double *rss)
1614 {
1615 int i, j, k, kk, l, jm1;
1616 double e, d, d1, d2, test, xx;
1617 double *xpx = pmod->xpx;
1618 double *b = pmod->coeff;
1619 int nc = pmod->ncoeff;
1620
1621 if (xpx[0] <= 0.0) {
1622 fprintf(stderr, "%s %d: xpx <= 0.0\n", __FILE__, __LINE__);
1623 return E_NAN;
1624 }
1625
1626 e = 1.0 / sqrt(xpx[0]);
1627 xpx[0] = e;
1628 xpy[0] *= e;
1629 for (i=1; i<nc; i++) {
1630 xpx[i] *= e;
1631 }
1632
1633 kk = nc;
1634
1635 for (j=1; j<nc; j++) {
1636 /* diagonal elements */
1637 d = d1 = 0.0;
1638 k = jm1 = j;
1639
1640 for (l=1; l<=jm1; l++) {
1641 xx = xpx[k];
1642 d1 += xx * xpy[l-1];
1643 d += xx * xx;
1644 k += nc - l;
1645 }
1646
1647 d2 = xpx[kk] - d;
1648 test = d2 / xpx[kk];
1649
1650 /* check for singularity */
1651 if (test < TINY) {
1652 #if 0
1653 fprintf(stderr, "cholbeta: test[%d] = %g\n", j, test);
1654 #endif
1655 *rss = -1.0;
1656 return E_SINGULAR;
1657 } else if (test < SMALL) {
1658 gretl_model_set_int(pmod, "near-singular", 1);
1659 }
1660
1661 e = 1 / sqrt(d2);
1662 xpx[kk] = e;
1663 xpy[j] = (xpy[j] - d1) * e;
1664
1665 /* off-diagonal elements */
1666 for (i=j+1; i<nc; i++) {
1667 kk++;
1668 d = 0.0;
1669 k = j;
1670 for (l=1; l<=jm1; l++) {
1671 d += xpx[k] * xpx[k-j+i];
1672 k += nc - l;
1673 }
1674 xpx[kk] = (xpx[kk] - d) * e;
1675 }
1676 kk++;
1677 }
1678
1679 kk--;
1680
1681 /* calculate regression sum of squares */
1682 d = 0.0;
1683 for (j=0; j<nc; j++) {
1684 d += xpy[j] * xpy[j];
1685 }
1686 *rss = d;
1687
1688 /* back-solve for the coefficients */
1689
1690 b[nc-1] = xpy[nc-1] * xpx[kk];
1691
1692 for (j=nc-2; j>=0; j--) {
1693 d = xpy[j];
1694 for (i=nc-1; i>j; i--) {
1695 d -= b[i] * xpx[--kk];
1696 }
1697 b[j] = d * xpx[--kk];
1698 }
1699
1700 for (j=0; j<nc; j++) {
1701 if (isnan(b[j])) {
1702 fprintf(stderr, "%s %d: coeff %d is NaN\n", __FILE__, __LINE__, j);
1703 return E_NAN;
1704 }
1705 }
1706
1707 return 0;
1708 }
1709
1710 /* compute fitted values and residuals */
1711
hatvars(MODEL * pmod,const DATASET * dset)1712 static int hatvars (MODEL *pmod, const DATASET *dset)
1713 {
1714 int yno = pmod->list[1];
1715 int xno, i, t;
1716 double x;
1717
1718 for (t=pmod->t1; t<=pmod->t2; t++) {
1719 if (model_missing(pmod, t)) {
1720 continue;
1721 }
1722 pmod->yhat[t] = 0.0;
1723 for (i=0; i<pmod->ncoeff; i++) {
1724 xno = pmod->list[i+2];
1725 x = dset->Z[xno][t];
1726 if (pmod->nwt) {
1727 x *= sqrt(dset->Z[pmod->nwt][t]);
1728 }
1729 pmod->yhat[t] += pmod->coeff[i] * x;
1730 }
1731 x = dset->Z[yno][t];
1732 if (pmod->nwt) {
1733 x *= sqrt(dset->Z[pmod->nwt][t]);
1734 }
1735 pmod->uhat[t] = x - pmod->yhat[t];
1736 }
1737
1738 return 0;
1739 }
1740
1741 /*
1742 regress: takes X'X (pmod->xpx) and X'y (@xpy) and
1743 computes OLS estimates and associated statistics.
1744
1745 n = total number of observations per series in data set
1746 pmod->ifc = 1 if constant is present in model, else = 0
1747
1748 ess = error sum of squares
1749 sigma = standard error of regression
1750 fstt = F-statistic
1751 pmod->coeff = array of regression coefficients
1752 pmod->sderr = corresponding array of standard errors
1753 */
1754
regress(MODEL * pmod,double * xpy,double ysum,double ypy,const DATASET * dset,double rho,gretlopt opt)1755 static void regress (MODEL *pmod, double *xpy,
1756 double ysum, double ypy,
1757 const DATASET *dset,
1758 double rho, gretlopt opt)
1759 {
1760 int ifc = pmod->ifc;
1761 double zz, rss = 0.0;
1762 double s2 = 0.0;
1763 double *diag = NULL;
1764 int i, err = 0;
1765
1766 for (i=0; i<pmod->full_n; i++) {
1767 pmod->yhat[i] = pmod->uhat[i] = NADBL;
1768 }
1769
1770 zz = ysum * ysum / pmod->nobs;
1771 pmod->tss = ypy - zz;
1772
1773 /* Cholesky-decompose X'X and find the coefficients */
1774 err = cholbeta(pmod, xpy, &rss);
1775 if (err) {
1776 pmod->errcode = err;
1777 return;
1778 }
1779
1780 if (rho != 0.0) {
1781 pmod->ess = ypy - rss;
1782 } else {
1783 make_ess(pmod, dset);
1784 rss = ypy - pmod->ess;
1785 }
1786
1787 if (fabs(pmod->ess) < ESSZERO) {
1788 pmod->ess = 0.0;
1789 } else if (pmod->ess < 0.0) {
1790 gretl_errmsg_sprintf(_("Error sum of squares (%g) is not > 0"),
1791 pmod->ess);
1792 return;
1793 }
1794
1795 if (pmod->dfd == 0) {
1796 pmod->sigma = 0.0;
1797 pmod->adjrsq = NADBL;
1798 } else {
1799 if (pmod->opt & OPT_N) {
1800 /* no-df-corr */
1801 s2 = pmod->ess / pmod->nobs;
1802 } else {
1803 s2 = pmod->ess / pmod->dfd;
1804 }
1805 pmod->sigma = sqrt(s2);
1806 }
1807
1808 if (pmod->tss < DBL_EPSILON) {
1809 pmod->rsq = pmod->adjrsq = NADBL;
1810 }
1811
1812 hatvars(pmod, dset);
1813
1814 if (pmod->errcode) return;
1815
1816 if (pmod->tss > 0.0) {
1817 compute_r_squared(pmod, dset, &ifc);
1818 } else if (pmod->tss == 0.0 && !(opt & OPT_A)) {
1819 uncentered_r_squared(pmod, dset);
1820 }
1821
1822 if (s2 <= 0.0 || pmod->dfd == 0 || pmod->dfn == 0) {
1823 pmod->fstt = NADBL;
1824 } else if (pmod->rsq == 1.0) {
1825 pmod->fstt = NADBL;
1826 } else if (pmod->opt & OPT_N) {
1827 /* for consistency with no-df-corr */
1828 pmod->fstt = NADBL;
1829 pmod->chisq = (rss - zz * ifc) / s2;
1830 } else {
1831 pmod->fstt = (rss - zz * ifc) / (s2 * pmod->dfn);
1832 if (pmod->fstt < 0.0) {
1833 pmod->fstt = 0.0;
1834 }
1835 }
1836
1837 diag = malloc(pmod->ncoeff * sizeof *diag);
1838 if (diag == NULL) {
1839 pmod->errcode = E_ALLOC;
1840 return;
1841 }
1842
1843 /* Note: 'xpy' is used purely as workspace in diaginv() below, as
1844 a matter of economy/convenience; no values in this array are
1845 used before they are written.
1846 */
1847 diaginv(pmod->xpx, xpy, diag, pmod->ncoeff);
1848
1849 for (i=0; i<pmod->ncoeff; i++) {
1850 if (diag[i] >= 0.0) {
1851 pmod->sderr[i] = pmod->sigma * sqrt(diag[i]);
1852 } else {
1853 pmod->sderr[i] = 0.0;
1854 }
1855 }
1856
1857 free(diag);
1858 }
1859
1860 /**
1861 * makevcv:
1862 * @pmod: pointer to model.
1863 * @sigma: square root of error variance, or 1.0 to
1864 * produce just X'X^{-1}.
1865 *
1866 * Inverts the Cholesky-decomposed X'X and computes the
1867 * coefficient covariance matrix.
1868 *
1869 * Returns: 0 on successful completion, non-zero code on error.
1870 */
1871
makevcv(MODEL * pmod,double sigma)1872 int makevcv (MODEL *pmod, double sigma)
1873 {
1874 int dec, mst, kk, i, j, kj, icnt, m, k, l = 0;
1875 int nv, nxpx;
1876 double d;
1877
1878 if (pmod->vcv != NULL) {
1879 /* already done */
1880 return 0;
1881 }
1882
1883 if (pmod->xpx == NULL) {
1884 /* raw material not available */
1885 return E_BADSTAT;
1886 }
1887
1888 nv = pmod->ncoeff;
1889 nxpx = (nv * nv + nv) / 2;
1890 mst = nxpx;
1891 kk = nxpx - 1;
1892
1893 pmod->vcv = malloc(nxpx * sizeof *pmod->vcv);
1894 if (pmod->vcv == NULL) {
1895 return E_ALLOC;
1896 }
1897
1898 for (i=0; i<nv; i++) {
1899 mst -= i;
1900 /* find diagonal element */
1901 d = pmod->xpx[kk];
1902 if (i > 0) {
1903 for (j=kk+1; j<=kk+i; j++) {
1904 d -= pmod->xpx[j] * pmod->vcv[j];
1905 }
1906 }
1907 pmod->vcv[kk] = d * pmod->xpx[kk];
1908 /* find off-diagonal elements indexed by kj */
1909 kj = kk;
1910 kk = kk - i - 2;
1911 if (i > nv - 2) {
1912 continue;
1913 }
1914 for (j=i+1; j<nv; j++) {
1915 icnt = i+1;
1916 kj -= j;
1917 d = 0.0;
1918 m = mst + 1;
1919 for (k=0; k<=j-1; k++) {
1920 if (icnt > 0) {
1921 dec = 1;
1922 icnt--;
1923 } else {
1924 dec = k;
1925 }
1926 m -= dec;
1927 l = kj + i - k;
1928 d += pmod->vcv[m-1] * pmod->xpx[l];
1929 }
1930 pmod->vcv[kj] = -d * pmod->xpx[l-1];
1931 }
1932 }
1933
1934 if (pmod->ci == LOGIT || pmod->ci == PROBIT) {
1935 sigma = 1.0;
1936 }
1937
1938 if (sigma != 1.0) {
1939 double s2 = sigma * sigma;
1940
1941 for (i=0; i<nxpx; i++) {
1942 pmod->vcv[i] *= s2;
1943 }
1944 }
1945
1946 return 0;
1947 }
1948
1949 /**
1950 * dwstat:
1951 * @order: order of autoregression (usually 1).
1952 * @pmod: pointer to model.
1953 * @dset: pointer to dataset.
1954 *
1955 * Computes the Durbin-Watson statistic for @pmod.
1956 *
1957 * Returns: the D-W value, or #NADBL on error.
1958 */
1959
dwstat(int order,MODEL * pmod,const DATASET * dset)1960 double dwstat (int order, MODEL *pmod, const DATASET *dset)
1961 {
1962 const double *w = NULL;
1963 double ut, u1;
1964 double num = 0.0;
1965 double den = 0.0;
1966 int t, t1;
1967
1968 if (pmod->ess <= 0.0) {
1969 return NADBL;
1970 }
1971
1972 t1 = pmod->t1 + order;
1973
1974 if (pmod->nwt) {
1975 w = dset->Z[pmod->nwt];
1976 ut = pmod->uhat[t1 - 1];
1977 if (!na(ut)) {
1978 den += ut * ut;
1979 }
1980 } else {
1981 den = pmod->ess;
1982 }
1983
1984 for (t=t1; t<=pmod->t2; t++) {
1985 ut = pmod->uhat[t];
1986 u1 = pmod->uhat[t-1];
1987 if (na(ut) || na(u1) ||
1988 (w != NULL && (w[t] == 0.0 || w[t-1] == 0.0))) {
1989 continue;
1990 }
1991 num += (ut - u1) * (ut - u1);
1992 if (pmod->nwt) {
1993 den += ut * ut;
1994 }
1995 }
1996
1997 return num / den;
1998 }
1999
2000 /* altrho: alternative calculation of rho */
2001
altrho(int order,int t1,int t2,const double * uhat)2002 static double altrho (int order, int t1, int t2, const double *uhat)
2003 {
2004 int t, n, len = t2 - (t1 + order) + 1;
2005 double *ut, *u1;
2006 double uht, uh1, rho;
2007
2008 ut = malloc(2 * len * sizeof *ut);
2009 if (ut == NULL) {
2010 return NADBL;
2011 }
2012
2013 u1 = ut + len;
2014 n = 0;
2015
2016 for (t=t1+order; t<=t2; t++) {
2017 uht = uhat[t];
2018 uh1 = (t > 0)? uhat[t-1] : NADBL;
2019 if (!na(uht) && !na(uh1)) {
2020 ut[n] = uht;
2021 u1[n] = uh1;
2022 n++;
2023 }
2024 }
2025
2026 rho = gretl_corr(0, n - 1, ut, u1, NULL);
2027
2028 free(ut);
2029
2030 return rho;
2031 }
2032
2033 /**
2034 * rhohat:
2035 * @order: order of autoregression, usually 1.
2036 * @t1: start of sample range.
2037 * @t2: end of sample range.
2038 * @uhat: array of regression residuals.
2039 *
2040 * Computes the first order serial correlation coefficient
2041 * for @uhat, over the range @t1 to @t2.
2042 *
2043 * Returns: the \hat{rho} value, or #NADBL on error.
2044 */
2045
rhohat(int order,int t1,int t2,const double * uhat)2046 double rhohat (int order, int t1, int t2, const double *uhat)
2047 {
2048 double ut, u1, num = 0.0, den = 0.0;
2049 double rho;
2050 int t;
2051
2052 for (t=t1+order; t<=t2; t++) {
2053 ut = uhat[t];
2054 u1 = uhat[t-1];
2055 if (na(ut) || na(u1)) {
2056 continue;
2057 }
2058 num += ut * u1;
2059 den += u1 * u1;
2060 }
2061
2062 if (den < DBL_EPSILON) {
2063 return NADBL;
2064 }
2065
2066 rho = num / den;
2067
2068 if (rho > 1.0 || rho < -1.0) {
2069 /* out of bounds, try again */
2070 rho = altrho(order, t1, t2, uhat);
2071 }
2072
2073 return rho;
2074 }
2075
hilu_plot(double * ssr,double * rho,int n)2076 static int hilu_plot (double *ssr, double *rho, int n)
2077 {
2078 FILE *fp;
2079 int i, err = 0;
2080
2081 fp = open_plot_input_file(PLOT_REGULAR, 0, &err);
2082 if (err) {
2083 return err;
2084 }
2085
2086 fputs("# hildreth-lu\n", fp);
2087 fputs("set xlabel 'rho'\n", fp);
2088
2089 fprintf(fp, "set ylabel '%s'\n", _("ESS"));
2090
2091 fputs("set nokey\n", fp);
2092 fputs("set xrange [-1.0:1.0]\n", fp);
2093 fputs("plot '-' using 1:2 w impulses\n", fp);
2094
2095 gretl_push_c_numeric_locale();
2096
2097 for (i=0; i<n; i++) {
2098 fprintf(fp, "%g %g\n", rho[i], ssr[i]);
2099 }
2100 fputs("e\n", fp);
2101
2102 gretl_pop_c_numeric_locale();
2103
2104 return finalize_plot_input_file(fp);
2105 }
2106
2107 /* calculation of rhohat for use with CORC/PWE */
2108
autores(MODEL * pmod,DATASET * dset,gretlopt opt)2109 static double autores (MODEL *pmod, DATASET *dset, gretlopt opt)
2110 {
2111 double *uhat = pmod->uhat;
2112 double ut, num = 0.0, den = 0.0;
2113 int yno = pmod->list[1];
2114 int t1 = pmod->t1;
2115 int t, i, v;
2116
2117 if (!(opt & OPT_P)) {
2118 /* not using Prais-Winsten */
2119 t1--;
2120 }
2121
2122 for (t=t1; t<=pmod->t2; t++) {
2123 ut = dset->Z[yno][t];
2124 for (i=0; i<pmod->ncoeff; i++) {
2125 v = pmod->list[i+2];
2126 ut -= pmod->coeff[i] * dset->Z[v][t];
2127 }
2128 uhat[t] = ut;
2129 if (t > t1) {
2130 num += uhat[t] * uhat[t-1];
2131 den += uhat[t-1] * uhat[t-1];
2132 }
2133 }
2134
2135 return num / den;
2136 }
2137
rho_val_ends_row(int k)2138 static int rho_val_ends_row (int k)
2139 {
2140 int i, endval = -80; /* rho = -0.80 */
2141
2142 for (i=0; i<7; i++) {
2143 if (k == endval) {
2144 return 1;
2145 }
2146 endval += 30;
2147 }
2148
2149 return 0;
2150 }
2151
hilu_print_iter(double r,double ess,int iter,double * ssr,double * rh,int * ip,int asciiplot,PRN * prn)2152 static void hilu_print_iter (double r, double ess, int iter,
2153 double *ssr, double *rh,
2154 int *ip, int asciiplot,
2155 PRN *prn)
2156 {
2157 char num[8];
2158 int k;
2159
2160 sprintf(num, "%.1f", 100 * r);
2161 k = atoi(num);
2162
2163 /* we'll not print all 199 rho values */
2164
2165 if (k == -99 || k == 99 || k % 10 == 0) {
2166 if (asciiplot) {
2167 /* recording a subset of values */
2168 ssr[*ip] = ess;
2169 rh[*ip] = r;
2170 *ip += 1;
2171 }
2172 pprintf(prn, " %5.2f %#12.6g", r, ess);
2173 if (rho_val_ends_row(k)) {
2174 pputc(prn, '\n');
2175 } else {
2176 bufspace(3, prn);
2177 }
2178 }
2179 }
2180
hilu_header(PRN * prn)2181 static void hilu_header (PRN *prn)
2182 {
2183 int i;
2184
2185 pputs(prn, "\n ");
2186
2187 for (i=0; i<3; i++) {
2188 pputs(prn, "rho");
2189 bufspace(10, prn);
2190 pputs(prn, _("ESS"));
2191 if (i < 2) {
2192 pputs(prn, " ");
2193 }
2194 }
2195
2196 pputc(prn, '\n');
2197 }
2198
hilu_search(const int * list,DATASET * dset,MODEL * pmod,gretlopt opt,PRN * prn)2199 static double hilu_search (const int *list, DATASET *dset,
2200 MODEL *pmod, gretlopt opt,
2201 PRN *prn)
2202 {
2203 double *rh = NULL, *ssr = NULL;
2204 double essmin = 1.0e200;
2205 double ess, r, hl_rho = 0;
2206 int quiet = (opt & OPT_Q);
2207 int niceplot = (opt & OPT_G);
2208 int iter = 0, i = 0;
2209 int nplot = 0;
2210
2211 if (!quiet) {
2212 /* allocate arrays for recording plot info */
2213 nplot = niceplot ? 199 : 21;
2214 ssr = malloc(2 * nplot * sizeof *ssr);
2215 if (ssr == NULL) {
2216 pmod->errcode = E_ALLOC;
2217 return NADBL;
2218 }
2219 rh = ssr + nplot;
2220 /* and print header */
2221 hilu_header(prn);
2222 }
2223
2224 for (r = -0.99; r < 1.0; r += .01, iter++) {
2225 clear_model(pmod);
2226
2227 *pmod = ar1_lsq(list, dset, OLS, OPT_A, r);
2228 if (pmod->errcode) {
2229 free(ssr);
2230 return NADBL;
2231 }
2232
2233 ess = pmod->ess;
2234
2235 if (!quiet) {
2236 hilu_print_iter(r, ess, iter, ssr, rh, &i,
2237 !niceplot, prn);
2238 if (niceplot) {
2239 /* record full data for plotting */
2240 ssr[i] = ess;
2241 rh[i++] = r;
2242 }
2243 }
2244
2245 if (iter == 0 || ess < essmin) {
2246 essmin = ess;
2247 hl_rho = r;
2248 }
2249 } /* end of basic iteration */
2250
2251 if (hl_rho > 0.989) {
2252 /* try exploring this funny region? */
2253 for (r = 0.99; r <= 0.999; r += .001) {
2254 clear_model(pmod);
2255 *pmod = ar1_lsq(list, dset, OLS, OPT_A, r);
2256 if (pmod->errcode) {
2257 free(ssr);
2258 return NADBL;
2259 }
2260 ess = pmod->ess;
2261 if (ess < essmin) {
2262 essmin = ess;
2263 hl_rho = r;
2264 }
2265 }
2266 }
2267
2268 if (hl_rho > 0.9989) {
2269 /* this even funnier one? */
2270 for (r = 0.9991; r <= 0.9999; r += .0001) {
2271 clear_model(pmod);
2272 *pmod = ar1_lsq(list, dset, OLS, OPT_A, r);
2273 if (pmod->errcode) {
2274 free(ssr);
2275 return NADBL;
2276 }
2277 ess = pmod->ess;
2278 if (ess < essmin) {
2279 essmin = ess;
2280 hl_rho = r;
2281 }
2282 }
2283 }
2284
2285 if (!quiet) {
2286 pprintf(prn, _("\n\nESS is minimized for rho = %g\n\n"), hl_rho);
2287 if (niceplot) {
2288 hilu_plot(ssr, rh, nplot);
2289 } else {
2290 /* ascii plot */
2291 graphyx(ssr, rh, nplot, "ESS", "RHO", prn);
2292 pputs(prn, "\n");
2293 }
2294 }
2295
2296 free(ssr);
2297
2298 return hl_rho;
2299 }
2300
corc_header(gretlopt opt,PRN * prn)2301 static void corc_header (gretlopt opt, PRN *prn)
2302 {
2303 if (opt & OPT_H) {
2304 pputs(prn, _("\nFine-tune rho using the CORC procedure...\n\n"));
2305 } else {
2306 pputs(prn, _("\nPerforming iterative calculation of rho...\n\n"));
2307 }
2308
2309 pputs(prn, _(" ITER RHO ESS"));
2310 pputc(prn, '\n');
2311 }
2312
2313 /* Unlike plain CORC, PWE needs the term sqrt(1 - rho^2), so
2314 we really cannot proceed if |rho| >= 1.0, even just to see
2315 where rho ends up, as we now do with CORC.
2316 */
2317
pwe_error(double rho,int quiet,int iter,PRN * prn)2318 static int pwe_error (double rho, int quiet, int iter, PRN *prn)
2319 {
2320 gretl_errmsg_sprintf(_("Prais-Winsten: invalid rho value %g "
2321 "(explosive error)"), rho);
2322 if (!quiet) {
2323 pprintf(prn, " %10d %12.5f", iter, rho);
2324 pprintf(prn, " NA\n");
2325 }
2326
2327 return E_NOCONV;
2328 }
2329
2330 /*
2331 * estimate_rho:
2332 * @list: dependent variable plus list of regressors.
2333 * @dset: dataset struct.
2334 * @opt: option flags: may include OPT_H to use Hildreth-Lu,
2335 * OPT_P to use Prais-Winsten, OPT_B to suppress Cochrane-Orcutt
2336 * fine-tuning of Hildreth-Lu results, OPT_G to generate
2337 * a gnuplot graph of the search in Hildreth-Lu case.
2338 * @prn: gretl printing struct.
2339 * @err: location to receive error code.
2340 *
2341 * Estimate the quasi-differencing coefficient for use with the
2342 * Cochrane-Orcutt, Hildreth-Lu or Prais-Winsten procedures for
2343 * handling first-order serial correlation. Print a trace of the
2344 * search for rho.
2345 *
2346 * Returns: rho estimate on successful completion, #NADBL on error.
2347 */
2348
estimate_rho(const int * list,DATASET * dset,gretlopt opt,PRN * prn,int * err)2349 static double estimate_rho (const int *list, DATASET *dset,
2350 gretlopt opt, PRN *prn, int *err)
2351 {
2352 int save_t1 = dset->t1;
2353 int save_t2 = dset->t2;
2354 int quiet = (opt & OPT_Q);
2355 int pwe = (opt & OPT_P);
2356 double rho = 0.0;
2357 MODEL armod;
2358
2359 *err = list_adjust_sample(list, &dset->t1, &dset->t2,
2360 dset, NULL);
2361 if (*err) {
2362 goto bailout;
2363 }
2364
2365 gretl_model_init(&armod, dset);
2366
2367 if (opt & OPT_H) {
2368 /* Do Hildreth-Lu first */
2369 rho = hilu_search(list, dset, &armod, opt, prn);
2370 } else {
2371 /* Initialize Cochrane-Orcutt (or Prais-Winsten) */
2372 armod = lsq(list, dset, OLS, OPT_A);
2373 if (!armod.errcode && armod.dfd == 0) {
2374 armod.errcode = E_DF;
2375 }
2376 if (!armod.errcode) {
2377 rho = armod.rho;
2378 }
2379 }
2380
2381 if (armod.errcode) {
2382 *err = armod.errcode;
2383 } else if (!(opt & OPT_H) || !(opt & OPT_B)) {
2384 /* either not Hildreth-Lu, or Hildreth-Lu but CORC
2385 fine-tuning is wanted (has not been suppressed
2386 via OPT_B)
2387 */
2388 gretlopt lsqopt = pwe ? (OPT_A | OPT_P) : OPT_A;
2389 double edsmall = 5.0e-8;
2390 double ess0 = armod.ess;
2391 double rho0 = rho;
2392 double rdsmall = (opt & OPT_L)? 0.001 : 1.0e-6;
2393 int iter = 0, itermax = 100;
2394 int converged = 0;
2395
2396 if (!quiet) {
2397 corc_header(opt, prn);
2398 }
2399
2400 while (++iter <= itermax && !converged) {
2401 clear_model(&armod);
2402 armod = ar1_lsq(list, dset, OLS, lsqopt, rho);
2403 if ((*err = armod.errcode)) {
2404 break;
2405 }
2406
2407 if (!quiet) {
2408 pprintf(prn, " %10d %12.5f", iter, rho);
2409 pprintf(prn, " %#.6g\n", armod.ess);
2410 }
2411
2412 rho = autores(&armod, dset, opt);
2413
2414 if (pwe && fabs(rho) >= 1.0) {
2415 *err = pwe_error(rho, quiet, iter + 1, prn);
2416 break;
2417 }
2418
2419 if (armod.ess == 0.0) {
2420 converged = 1;
2421 } else if ((ess0 - armod.ess) / ess0 < edsmall) {
2422 converged = 1;
2423 } else if (fabs(rho - rho0) < rdsmall) {
2424 converged = 1;
2425 }
2426
2427 ess0 = armod.ess;
2428 rho0 = rho;
2429 } /* end Cochrane-Orcutt iteration */
2430
2431 if (converged && (rho >= 1.0 || rho <= -1.0)) {
2432 gretl_errmsg_sprintf(_("%s: rho = %g, error is unbounded"),
2433 "ar1", rho);
2434 converged = 0;
2435 }
2436
2437 if (!converged && !*err) {
2438 *err = E_NOCONV;
2439 }
2440
2441 if (!quiet) {
2442 if (*err) {
2443 pputc(prn, '\n');
2444 } else {
2445 pprintf(prn, " %10d %12.5f", iter, rho);
2446 pprintf(prn, " %#.6g\n", armod.ess);
2447 }
2448 }
2449 }
2450
2451 clear_model(&armod);
2452
2453 bailout:
2454
2455 dset->t1 = save_t1;
2456 dset->t2 = save_t2;
2457
2458 if (*err) {
2459 rho = NADBL;
2460 }
2461
2462 return rho;
2463 }
2464
2465 /**
2466 * augment_regression_list:
2467 * @orig: list giving original regression specification.
2468 * @aux: either %AUX_SQ, %AUX_LOG or %AUX_WHITE.
2469 * @dset: dataset struct.
2470 * @err: location to receive error code.
2471 *
2472 * Augment the regression list @orig with auxiliary terms. If @aux
2473 * is %AUX_SQ add the squares of the original regressors; if @aux
2474 * is %AUX_WHITE add squares and cross-products, or if @aux is
2475 * %AUX_LOG add the natural logs of the original regressors.
2476 * If they are not already present, these variables are added
2477 * to the data array.
2478 *
2479 * Returns: the augmented list, or NULL on failure.
2480 */
2481
augment_regression_list(const int * orig,int aux,DATASET * dset,int * err)2482 int *augment_regression_list (const int *orig, int aux,
2483 DATASET *dset, int *err)
2484 {
2485 int *list;
2486 int listlen;
2487 int cnum = 0;
2488 int i, k;
2489
2490 if (aux == AUX_WHITE) {
2491 int cpos = gretl_list_const_pos(orig, 2, dset);
2492 int nt, trv = orig[0] - 1;
2493
2494 if (cpos > 0) {
2495 trv--;
2496 cnum = orig[cpos];
2497 }
2498 nt = (trv * trv + trv) / 2;
2499 listlen = orig[0] + nt + 1;
2500 } else {
2501 listlen = 2 * orig[0];
2502 }
2503
2504 list = malloc(listlen * sizeof *list);
2505 if (list == NULL) {
2506 *err = E_ALLOC;
2507 return NULL;
2508 }
2509
2510 /* transcribe original list */
2511 for (i=0; i<=orig[0]; i++) {
2512 list[i] = orig[i];
2513 }
2514
2515 /* add squares, cross-products or logs of independent vars */
2516
2517 k = list[0];
2518 for (i=2; i<=orig[0]; i++) {
2519 int vnew, vi = orig[i];
2520
2521 if (vi == 0) {
2522 continue;
2523 }
2524
2525 if (aux == AUX_SQ || aux == AUX_WHITE) {
2526 vnew = xpxgenr(vi, vi, dset);
2527 if (vnew > 0) {
2528 list[++k] = vnew;
2529 }
2530 if (aux == AUX_WHITE) {
2531 int j, vj;
2532
2533 for (j=i+1; j<=orig[0]; j++) {
2534 vj = orig[j];
2535 if (vj == cnum) {
2536 continue;
2537 }
2538 vnew = xpxgenr(vi, vj, dset);
2539 if (vnew > 0) {
2540 /* ensure uniqueness of generated varnames */
2541 sprintf(dset->varname[vnew], "X%d_X%d", i-1, j-1);
2542 list[++k] = vnew;
2543 }
2544 }
2545 }
2546 } else if (aux == AUX_LOG) {
2547 /* don't try to log series with non-positive values */
2548 if (gretl_ispositive(dset->t1, dset->t2, dset->Z[vi], 1)) {
2549 vnew = loggenr(vi, dset);
2550 if (vnew > 0) {
2551 list[++k] = vnew;
2552 }
2553 }
2554 }
2555 }
2556
2557 list[0] = k;
2558
2559 return list;
2560 }
2561
2562 /* For observation s, see if the regression list contains a variable
2563 that has a single non-zero value at that particular observation.
2564 We run this check on observations showing an OLS residual of zero.
2565 */
2566
observation_is_dummied(const MODEL * pmod,int * list,const double ** Z,int s)2567 static int observation_is_dummied (const MODEL *pmod,
2568 int *list, const double **Z,
2569 int s)
2570 {
2571 int i, t, v;
2572 int ret = 0;
2573
2574 for (i=list[0]; i>=2; i--) {
2575 v = list[i];
2576 if (v == 0) {
2577 continue;
2578 }
2579 ret = 1;
2580 for (t=pmod->t1; t<=pmod->t2; t++) {
2581 if ((t == s && Z[v][t] == 0.0) || (t != s && Z[v][t] != 0.0)) {
2582 ret = 0;
2583 break;
2584 }
2585 }
2586 if (ret) {
2587 gretl_list_delete_at_pos(list, i);
2588 break;
2589 }
2590 }
2591
2592 return ret;
2593 }
2594
copy_ensure_const(const int * orig)2595 static int *copy_ensure_const (const int *orig)
2596 {
2597 int *list = gretl_list_new(orig[0] + 1);
2598 int i;
2599
2600 if (list != NULL) {
2601 list[1] = orig[1];
2602 list[2] = 0;
2603 for (i=3; i<=list[0]; i++) {
2604 list[i] = orig[i-1];
2605 }
2606 }
2607
2608 return list;
2609 }
2610
2611 /* get_hsk_weights: take the residuals from the model @pmod, square
2612 them and take logs; find the fitted values for this series using an
2613 auxiliary regression including the original independent variables
2614 (and their squares, if not given OPT_N); exponentiate the fitted
2615 values; and add the resulting series to the data set.
2616 */
2617
get_hsk_weights(MODEL * pmod,DATASET * dset,gretlopt opt)2618 static int get_hsk_weights (MODEL *pmod, DATASET *dset, gretlopt opt)
2619 {
2620 int oldv = dset->v;
2621 int t, t1 = dset->t1, t2 = dset->t2;
2622 int *lcpy = NULL;
2623 int *auxlist = NULL;
2624 int err = 0, shrink = 0;
2625 double xx;
2626 MODEL aux;
2627
2628 if (pmod->ifc) {
2629 lcpy = gretl_list_copy(pmod->list);
2630 } else {
2631 lcpy = copy_ensure_const(pmod->list);
2632 }
2633
2634 if (lcpy == NULL) {
2635 return E_ALLOC;
2636 }
2637
2638 /* allocate space for an additional variable */
2639 if (dataset_add_series(dset, 1)) {
2640 free(lcpy);
2641 return E_ALLOC;
2642 }
2643
2644 /* add transformed residuals to data set */
2645 for (t=0; t<dset->n; t++) {
2646 xx = pmod->uhat[t];
2647 if (na(xx)) {
2648 dset->Z[oldv][t] = NADBL;
2649 } else if (xx == 0.0) {
2650 if (observation_is_dummied(pmod, lcpy, (const double **) dset->Z, t)) {
2651 dset->Z[oldv][t] = NADBL;
2652 } else {
2653 fprintf(stderr, "hsk: got a zero residual, could be a problem!\n");
2654 dset->Z[oldv][t] = -1.0e16; /* ?? */
2655 }
2656 } else {
2657 dset->Z[oldv][t] = log(xx * xx);
2658 }
2659 }
2660
2661 if (opt & OPT_N) {
2662 /* --no-squares option */
2663 auxlist = lcpy;
2664 } else {
2665 /* build regression list, adding the squares of the original
2666 independent vars */
2667 auxlist = augment_regression_list(lcpy, AUX_SQ, dset, &err);
2668 if (err) {
2669 return err;
2670 }
2671 }
2672
2673 auxlist[1] = oldv; /* the newly added log(uhat-squared) */
2674
2675 dset->t1 = pmod->t1;
2676 dset->t2 = pmod->t2;
2677
2678 aux = lsq(auxlist, dset, OLS, OPT_A);
2679 err = aux.errcode;
2680 if (err) {
2681 shrink = dset->v - oldv;
2682 } else {
2683 /* write into the data set the required weights */
2684 for (t=aux.t1; t<=aux.t2; t++) {
2685 xx = aux.yhat[t];
2686 if (na(xx)) {
2687 dset->Z[oldv][t] = NADBL;
2688 } else {
2689 dset->Z[oldv][t] = 1.0 / exp(xx);
2690 }
2691 }
2692 shrink = dset->v - oldv - 1;
2693 }
2694
2695 dset->t1 = t1;
2696 dset->t2 = t2;
2697
2698 clear_model(&aux);
2699
2700 if (shrink > 0) {
2701 dataset_drop_last_variables(dset, shrink);
2702 }
2703
2704 if (auxlist != lcpy) {
2705 free(auxlist);
2706 }
2707 free(lcpy);
2708
2709 return err;
2710 }
2711
2712 /**
2713 * hsk_model:
2714 * @list: dependent variable plus list of regressors.
2715 * @dset: dataset struct.
2716 * @opt: may include OPT_N to suppress use of squares
2717 * of regressors in the auxiliary regression.
2718 *
2719 * Estimate the model given in @list using a correction for
2720 * heteroskedasticity.
2721 *
2722 * Returns: a #MODEL struct, containing the estimates.
2723 */
2724
hsk_model(const int * list,DATASET * dset,gretlopt opt)2725 MODEL hsk_model (const int *list, DATASET *dset, gretlopt opt)
2726 {
2727 int i, err;
2728 int orig_nvar = dset->v;
2729 int *hsklist;
2730 MODEL hsk;
2731
2732 if (dset->Z == NULL) {
2733 hsk.errcode = E_DATA;
2734 return hsk;
2735 }
2736
2737 gretl_error_clear();
2738
2739 /* run initial OLS */
2740 hsk = lsq(list, dset, OLS, OPT_A);
2741 if (hsk.errcode) {
2742 return hsk;
2743 }
2744
2745 /* form weights based on the OLS residuals */
2746 err = get_hsk_weights(&hsk, dset, opt);
2747 if (err) {
2748 hsk.errcode = err;
2749 return hsk;
2750 }
2751
2752 /* allocate regression list for weighted least squares */
2753 hsklist = gretl_list_new(list[0] + 1);
2754 if (hsklist == NULL) {
2755 hsk.errcode = E_ALLOC;
2756 return hsk;
2757 }
2758
2759 /* the last variable in the dataset will be the weight var */
2760 hsklist[1] = dset->v - 1;
2761
2762 /* put the original dependent variable in at position 2 */
2763 hsklist[2] = list[1];
2764
2765 /* add the original independent vars into the WLS list */
2766 for (i=3; i<=hsklist[0]; i++) {
2767 hsklist[i] = list[i-1];
2768 }
2769
2770 clear_model(&hsk);
2771 hsk = lsq(hsklist, dset, WLS, OPT_NONE);
2772 hsk.ci = HSK;
2773
2774 if (opt & OPT_N) {
2775 gretl_model_set_int(&hsk, "no-squares", 1);
2776 hsk.opt |= OPT_N;
2777 }
2778
2779 dataset_drop_last_variables(dset, dset->v - orig_nvar);
2780
2781 free(hsklist);
2782
2783 return hsk;
2784 }
2785
print_HET_1(double z,double pval,PRN * prn)2786 static void print_HET_1 (double z, double pval, PRN *prn)
2787 {
2788 pprintf(prn, "\n%s\n", _("Pesaran-Taylor test for heteroskedasticity"));
2789 pprintf(prn, "\n%s: HET_1 = %f,\n", _("Test statistic"), z);
2790 pprintf(prn, "%s = 2 * P(z > %f) = %.3g\n\n",
2791 _("with p-value"), z, pval);
2792 }
2793
print_whites_test(double LM,int df,double pval,gretlopt opt,PRN * prn)2794 static void print_whites_test (double LM, int df, double pval,
2795 gretlopt opt, PRN *prn)
2796 {
2797 if (opt & OPT_B) {
2798 pprintf(prn, "\n%s\n", _("Breusch-Pagan test for heteroskedasticity"));
2799 if (opt & OPT_R) {
2800 pprintf(prn, "%s\n", _("with Koenker robust variance estimator"));
2801 }
2802 pprintf(prn, "\n%s: LM = %f,\n", _("Test statistic"), LM);
2803 } else {
2804 pprintf(prn, "\n%s", _("White's test for heteroskedasticity"));
2805 if (opt & OPT_X) {
2806 pprintf(prn, " (%s)\n", _("squares only"));
2807 } else {
2808 pputc(prn, '\n');
2809 }
2810 pprintf(prn, "\n%s: TR^2 = %f,\n", _("Test statistic"), LM);
2811 }
2812
2813 pprintf(prn, "%s = P(%s(%d) > %f) = %f\n\n",
2814 _("with p-value"), _("Chi-square"),
2815 df, LM, pval);
2816 }
2817
2818 /* For White's test, see if we have enough degrees of freedom
2819 to add squares -- and if so, if we have enough df to add
2820 cross-products also.
2821 */
2822
get_whites_aux(const MODEL * pmod,const double ** Z)2823 static int get_whites_aux (const MODEL *pmod, const double **Z)
2824 {
2825 int aux = AUX_WHITE;
2826 int rem = pmod->ncoeff - pmod->ifc - 1;
2827 int nsq = 0, nxpx = 0;
2828 int i, v;
2829
2830 for (i=2; i<=pmod->list[0]; i++) {
2831 v = pmod->list[i];
2832 if (v > 0) {
2833 if (!gretl_isdummy(pmod->t1, pmod->t2, Z[v])) {
2834 nsq++;
2835 }
2836 nxpx += rem--;
2837 }
2838 }
2839
2840 if (pmod->dfd - nsq < 1) {
2841 aux = AUX_NONE;
2842 } else if (pmod->dfd - nsq - nxpx < 1) {
2843 aux = AUX_SQ;
2844 }
2845
2846 return aux;
2847 }
2848
2849 #define PT_DEBUG 0
2850
2851 /**
2852 * tsls_hetero_test:
2853 * @pmod: pointer to model.
2854 * @dset: dataset struct.
2855 * @opt: if flags include OPT_S, save results to model; OPT_Q
2856 * means don't print the auxiliary regression.
2857 * @prn: gretl printing struct.
2858 *
2859 * Runs Pesaran and Taylor's (1999) HET_1 test for heteroskedasticity
2860 * on the given tsls model. The statistic is just a t-statistic, so
2861 * under the null it is distributed as a standard normal.
2862 *
2863 * Returns: 0 on successful completion, error code on error.
2864 */
2865
tsls_hetero_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)2866 static int tsls_hetero_test (MODEL *pmod, DATASET *dset,
2867 gretlopt opt, PRN *prn)
2868 {
2869 int pos, newv = dset->v;
2870 int *ptlist = NULL, *testlist = NULL;
2871 int save_t1 = dset->t1;
2872 int save_t2 = dset->t2;
2873 MODEL ptmod;
2874 double x;
2875 int i, h, t;
2876 int err = 0;
2877
2878 pos = gretl_list_separator_position(pmod->list);
2879 h = pmod->list[0] - pos;
2880
2881 #if PT_DEBUG
2882 pprintf(prn, "v = %d, h = %d\n", v, h);
2883 #endif
2884
2885 ptlist = gretl_list_new(h + 1);
2886 testlist = gretl_list_new(3);
2887
2888 if (ptlist == NULL || testlist == NULL) {
2889 free(ptlist);
2890 free(testlist);
2891 return E_ALLOC;
2892 }
2893
2894 ptlist[1] = pmod->list[1];
2895 for (i=2; i<=ptlist[0]; i++) {
2896 ptlist[i] = pmod->list[i + pos - 1];
2897 }
2898
2899 testlist[1] = newv;
2900 testlist[2] = 0;
2901 testlist[3] = newv + 1;
2902
2903 #if PT_DEBUG
2904 printlist(ptlist, "ptlist");
2905 printlist(testlist, "testlist");
2906 #endif
2907
2908 /* reduced form: regress the original dependent variable on all of
2909 the instruments from the original model
2910 */
2911 ptmod = lsq(ptlist, dset, OLS, OPT_A);
2912 err = ptmod.errcode;
2913 if (err) {
2914 goto bailout;
2915 }
2916
2917 #if PT_DEBUG
2918 printmodel(&ptmod, dset, OPT_S, prn);
2919 #endif
2920
2921 /* add two series: (i) the squares of the residuals from the
2922 original model and (ii) the squares of the fitted values from
2923 the auxiliary regression above
2924 */
2925 err = dataset_add_series(dset, 2);
2926 if (err) {
2927 clear_model(&ptmod);
2928 goto bailout;
2929 }
2930
2931 strcpy(dset->varname[newv+1], "yhat^2");
2932
2933 for (t=pmod->t1; t<=pmod->t2; t++) {
2934 x = pmod->uhat[t];
2935 dset->Z[newv][t] = x*x; /* squared residual */
2936 x = ptmod.yhat[t];
2937 dset->Z[newv+1][t] = x*x; /* squared fitted value */
2938 }
2939
2940 clear_model(&ptmod);
2941
2942 dset->t1 = pmod->t1;
2943 dset->t2 = pmod->t2;
2944
2945 /* regress the squared residuals on the squared fitted
2946 values from the reduced-form auxiliary regression
2947 */
2948 ptmod = lsq(testlist, dset, OLS, OPT_A);
2949 err = ptmod.errcode;
2950
2951 if (!err) {
2952 double z = fabs(ptmod.coeff[1]) / ptmod.sderr[1];
2953 double pval = 2.0 * (1 - normal_cdf(z));
2954
2955 if (opt & OPT_Q) {
2956 print_HET_1(z, pval, prn);
2957 } else {
2958 ptmod.aux = AUX_HET_1;
2959 printmodel(&ptmod, dset, OPT_NONE, prn);
2960 }
2961
2962 if (opt & OPT_S) {
2963 ModelTest *test = model_test_new(GRETL_TEST_HET_1);
2964
2965 if (test != NULL) {
2966 model_test_set_teststat(test, GRETL_STAT_Z);
2967 model_test_set_value(test, z);
2968 model_test_set_pvalue(test, pval);
2969 maybe_add_test_to_model(pmod, test);
2970 }
2971 }
2972
2973 record_test_result(z, pval);
2974 }
2975
2976 clear_model(&ptmod);
2977
2978 dataset_drop_last_variables(dset, 2);
2979
2980 bailout:
2981
2982 free(ptlist);
2983 free(testlist);
2984
2985 dset->t1 = save_t1;
2986 dset->t2 = save_t2;
2987
2988 return err;
2989 }
2990
2991 /* Compute LM statistic as per Breusch and Pagan (Econometrica, 1979),
2992 with the option to use the robust variance estimator proposed by
2993 Koenker (Journal of Econometrics, 1981).
2994 */
2995
get_BP_LM(MODEL * pmod,int * list,MODEL * aux,DATASET * dset,gretlopt opt,int * err)2996 static double get_BP_LM (MODEL *pmod, int *list, MODEL *aux,
2997 DATASET *dset, gretlopt opt, int *err)
2998 {
2999 double s2, u2t, gt;
3000 double V = 0.0, LM = NADBL;
3001 int t, v = list[1];
3002
3003 /* note: no df adjustment */
3004 s2 = pmod->ess / pmod->nobs;
3005
3006 if (opt & OPT_R) {
3007 /* calculate robust variance estimate a la Koenker */
3008 for (t=pmod->t1; t<=pmod->t2; t++) {
3009 if (!na(pmod->uhat[t])) {
3010 u2t = pmod->uhat[t] * pmod->uhat[t];
3011 V += (u2t - s2) * (u2t - s2);
3012 }
3013 }
3014 V /= pmod->nobs;
3015 }
3016
3017 for (t=pmod->t1; t<=pmod->t2; t++) {
3018 if (na(pmod->uhat[t])) {
3019 dset->Z[v][t] = NADBL;
3020 } else {
3021 u2t = pmod->uhat[t] * pmod->uhat[t];
3022 gt = (opt & OPT_R)? (u2t - s2) : (u2t / s2);
3023 dset->Z[v][t] = gt;
3024 }
3025 }
3026
3027 *aux = lsq(list, dset, OLS, OPT_A);
3028 *err = aux->errcode;
3029
3030 if (!*err) {
3031 double RSS = aux->tss - aux->ess;
3032
3033 if (RSS < 0) {
3034 *err = E_DATA;
3035 } else {
3036 if (opt & OPT_R) {
3037 aux->opt |= OPT_R;
3038 LM = RSS / V;
3039 } else {
3040 LM = .5 * RSS;
3041 }
3042 gretl_model_set_double(aux, "BPLM", LM);
3043 aux->aux = AUX_BP;
3044 }
3045 }
3046
3047 return LM;
3048 }
3049
ensure_const_copy_list(const MODEL * pmod,int * err)3050 static int *ensure_const_copy_list (const MODEL *pmod, int *err)
3051 {
3052 int *list = NULL;
3053
3054 if (pmod->ifc) {
3055 list = gretl_list_copy(pmod->list);
3056 } else {
3057 int i, nl = pmod->list[0] + 1;
3058
3059 list = gretl_list_new(nl);
3060 if (list != NULL) {
3061 list[1] = pmod->list[1];
3062 list[2] = 0; /* insert const */
3063 for (i=3; i<=nl; i++) {
3064 list[i] = pmod->list[i-1];
3065 }
3066 }
3067 }
3068
3069 if (list == NULL) {
3070 *err = E_ALLOC;
3071 }
3072
3073 return list;
3074 }
3075
ensure_const_augment_list(const MODEL * pmod,int aux,DATASET * dset,int * err)3076 static int *ensure_const_augment_list (const MODEL *pmod, int aux,
3077 DATASET *dset, int *err)
3078 {
3079 int *list = NULL;
3080
3081 if (pmod->ifc) {
3082 list = augment_regression_list(pmod->list, aux, dset, err);
3083 } else {
3084 int *tmp = ensure_const_copy_list(pmod, err);
3085
3086 if (!*err) {
3087 list = augment_regression_list(tmp, aux, dset, err);
3088 free(tmp);
3089 }
3090 }
3091
3092 return list;
3093 }
3094
3095 /**
3096 * whites_test:
3097 * @pmod: pointer to model.
3098 * @dset: dataset struct.
3099 * @opt: if flags include OPT_S, save results to model; OPT_Q
3100 * means don't print the auxiliary regression; OPT_B means
3101 * do the simpler Breusch-Pagan variant; OPT_I means run
3102 * silently.
3103 * @prn: gretl printing struct.
3104 *
3105 * Runs White's test for heteroskedasticity on the given model.
3106 *
3107 * Returns: 0 on successful completion, error code on error.
3108 */
3109
whites_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3110 int whites_test (MODEL *pmod, DATASET *dset,
3111 gretlopt opt, PRN *prn)
3112 {
3113 int BP = (opt & OPT_B);
3114 int aux = AUX_NONE;
3115 int v = dset->v;
3116 int *list = NULL;
3117 int save_t1 = dset->t1;
3118 int save_t2 = dset->t2;
3119 double zz, LM = 0;
3120 MODEL white;
3121 int t, err = 0;
3122
3123 if (pmod->ci == IVREG) {
3124 return tsls_hetero_test(pmod, dset, opt, prn);
3125 }
3126
3127 if (pmod->list == NULL || gretl_list_has_separator(pmod->list)) {
3128 return E_NOTIMP;
3129 }
3130
3131 if (pmod->ci == NLS || pmod->ci == MLE ||
3132 pmod->ci == GMM || pmod->ci == ARMA ||
3133 pmod->ci == LOGISTIC) {
3134 return E_NOTIMP;
3135 }
3136
3137 if ((err = list_members_replaced(pmod, dset))) {
3138 return err;
3139 }
3140
3141 impose_model_smpl(pmod, dset);
3142
3143 /* what can we do, with the degrees of freedom available? */
3144 if (!BP) {
3145 if (opt & OPT_X) {
3146 aux = AUX_SQ;
3147 } else {
3148 aux = get_whites_aux(pmod, (const double **) dset->Z);
3149 if (aux == AUX_NONE) {
3150 dset->t1 = save_t1;
3151 dset->t2 = save_t2;
3152 return E_DF;
3153 }
3154 }
3155 }
3156
3157 gretl_model_init(&white, dset);
3158
3159 /* make space in data set */
3160 if (dataset_add_series(dset, 1)) {
3161 err = E_ALLOC;
3162 }
3163
3164 if (!err) {
3165 /* get residuals, square and add to data set */
3166 for (t=0; t<dset->n; t++) {
3167 zz = pmod->uhat[t];
3168 if (na(zz)) {
3169 dset->Z[v][t] = NADBL;
3170 } else {
3171 dset->Z[v][t] = zz * zz;
3172 }
3173 }
3174 strcpy(dset->varname[v], "uhatsq");
3175 }
3176
3177 if (!err) {
3178 if (BP) {
3179 list = ensure_const_copy_list(pmod, &err);
3180 } else {
3181 /* build aux regression list, adding squares and
3182 (possibly) cross-products of the original
3183 independent vars */
3184 list = ensure_const_augment_list(pmod, aux, dset, &err);
3185 }
3186 if (!err){
3187 list[1] = v; /* the newly added uhat-squared */
3188 }
3189 }
3190
3191 if (!err) {
3192 /* run auxiliary regression */
3193 if (BP) {
3194 LM = get_BP_LM(pmod, list, &white, dset, opt, &err);
3195 } else {
3196 white = lsq(list, dset, OLS, OPT_A);
3197 err = white.errcode;
3198 if (!err) {
3199 LM = white.rsq * white.nobs;
3200 white.aux = AUX_WHITE;
3201 }
3202 }
3203 }
3204
3205 if (!err) {
3206 int df = white.ncoeff - 1;
3207 double pval = chisq_cdf_comp(df, LM);
3208 gretlopt testopt = OPT_NONE;
3209
3210 if (BP) {
3211 if (opt & OPT_R) {
3212 /* Koenker robust variant */
3213 testopt = OPT_R;
3214 }
3215 } else if (opt & OPT_X) {
3216 /* squares only */
3217 testopt = OPT_X;
3218 }
3219
3220 if (!(opt & OPT_I)) {
3221 if (opt & OPT_Q) {
3222 print_whites_test(LM, df, pval, opt, prn);
3223 } else {
3224 white.opt |= testopt;
3225 printmodel(&white, dset, OPT_NONE, prn);
3226 }
3227 }
3228
3229 if (opt & OPT_S) {
3230 ModelTestType mt = BP? GRETL_TEST_BP : GRETL_TEST_WHITES;
3231 ModelTest *test = model_test_new(mt);
3232
3233 if (test != NULL) {
3234 model_test_set_teststat(test, GRETL_STAT_LM);
3235 model_test_set_dfn(test, df);
3236 model_test_set_value(test, LM);
3237 model_test_set_pvalue(test, pval);
3238 model_test_set_opt(test, testopt);
3239 maybe_add_test_to_model(pmod, test);
3240 }
3241 }
3242
3243 record_test_result(LM, pval);
3244 }
3245
3246 clear_model(&white);
3247 dataset_drop_last_variables(dset, dset->v - v);
3248 free(list);
3249
3250 dset->t1 = save_t1;
3251 dset->t2 = save_t2;
3252
3253 return err;
3254 }
3255
ar_list_max(const int * list)3256 static int ar_list_max (const int *list)
3257 {
3258 int i, lmax = 0;
3259
3260 for (i=1; i<=list[0]; i++) {
3261 if (list[i] > lmax) {
3262 lmax = list[i];
3263 }
3264 }
3265
3266 return lmax;
3267 }
3268
3269 /**
3270 * ar_model:
3271 * @list: list of lags plus dependent variable and list of regressors.
3272 * @dset: dataset struct.
3273 * @opt: may contain OPT_O to print covariance matrix.
3274 * @prn: gretl printing struct.
3275 *
3276 * Estimate the model given in @list using the generalized
3277 * Cochrane-Orcutt procedure for autoregressive errors.
3278 *
3279 * Returns: #MODEL struct containing the results.
3280 */
3281
ar_model(const int * list,DATASET * dset,gretlopt opt,PRN * prn)3282 MODEL ar_model (const int *list, DATASET *dset,
3283 gretlopt opt, PRN *prn)
3284 {
3285 double diff, ess, tss, xx;
3286 int i, j, vi, t, t1, t2, vc, yno, ryno, iter;
3287 int k, pmax, v = dset->v;
3288 int *arlist = NULL, *rholist = NULL;
3289 int *reglist = NULL, *reglist2 = NULL;
3290 int cpos, nvadd;
3291 MODEL ar, rhomod;
3292
3293 gretl_error_clear();
3294 gretl_model_init(&ar, dset);
3295 gretl_model_init(&rhomod, dset);
3296
3297 ar.errcode = gretl_list_split_on_separator(list, &arlist, ®list);
3298
3299 if (!ar.errcode && arlist[0] == 1 && arlist[1] == 1) {
3300 /* special case: ar 1 ; ... => use AR1 apparatus */
3301 ar = ar1_model(reglist, dset, opt, prn);
3302 goto bailout;
3303 }
3304
3305 if (!ar.errcode) {
3306 reglist2 = gretl_list_new(reglist[0]);
3307 rholist = gretl_list_new(arlist[0] + 1);
3308 if (reglist2 == NULL || rholist == NULL) {
3309 ar.errcode = E_ALLOC;
3310 }
3311 }
3312
3313 if (ar.errcode) {
3314 goto bailout;
3315 }
3316
3317 pmax = ar_list_max(arlist);
3318 cpos = reglist_check_for_const(reglist, dset);
3319
3320 /* first pass: estimate model via OLS: use OPT_M to generate an
3321 error in case of missing values within sample range
3322 */
3323 ar = lsq(reglist, dset, OLS, OPT_A | OPT_M);
3324 if (ar.errcode) {
3325 goto bailout;
3326 }
3327
3328 nvadd = arlist[0] + 1 + reglist[0];
3329
3330 /* allocate space for the uhat terms and transformed data */
3331 if (dataset_add_series(dset, nvadd)) {
3332 ar.errcode = E_ALLOC;
3333 goto bailout;
3334 }
3335
3336 yno = reglist[1];
3337 t1 = ar.t1;
3338 t2 = ar.t2;
3339 rholist[1] = v;
3340
3341 pprintf(prn, "%s\n\n", _("Generalized Cochrane-Orcutt estimation"));
3342 bufspace(17, prn);
3343 /* xgettext:no-c-format */
3344 pputs(prn, _("ITER ESS % CHANGE"));
3345 pputs(prn, "\n\n");
3346
3347 /* now loop while ess is changing */
3348 diff = 1.0e6;
3349 ess = 0.0;
3350
3351 for (iter = 1; iter <= 20 && diff > 0.005; iter++) {
3352 for (t=0; t<dset->n; t++) {
3353 if (t < t1 || t > t2) {
3354 dset->Z[v][t] = NADBL;
3355 } else {
3356 /* special computation of uhat */
3357 xx = dset->Z[yno][t];
3358 for (j=0; j<reglist[0]-1; j++) {
3359 xx -= ar.coeff[j] * dset->Z[reglist[j+2]][t];
3360 }
3361 dset->Z[v][t] = xx;
3362 }
3363 }
3364 for (i=1; i<=arlist[0]; i++) {
3365 k = arlist[i];
3366 rholist[1+i] = v + i;
3367 for (t=0; t<dset->n; t++) {
3368 if (t < t1 + k || t > t2) {
3369 dset->Z[v+i][t] = NADBL;
3370 } else {
3371 dset->Z[v+i][t] = dset->Z[v][t-k];
3372 }
3373 }
3374 }
3375
3376 /* estimate the rho terms */
3377 if (iter > 1) {
3378 clear_model(&rhomod);
3379 }
3380 rhomod = lsq(rholist, dset, OLS, OPT_A);
3381
3382 /* and rho-transform the data */
3383 ryno = vc = v + i;
3384 for (i=1; i<=reglist[0]; i++) {
3385 vi = reglist[i];
3386 for (t=0; t<dset->n; t++) {
3387 if (t < t1 + pmax || t > t2) {
3388 dset->Z[vc][t] = NADBL;
3389 } else {
3390 xx = dset->Z[vi][t];
3391 for (j=1; j<=arlist[0]; j++) {
3392 k = arlist[j];
3393 xx -= rhomod.coeff[j-1] * dset->Z[vi][t-k];
3394 }
3395 dset->Z[vc][t] = xx;
3396 }
3397 }
3398 reglist2[i] = vc++;
3399 }
3400
3401 /* estimate the transformed model */
3402 clear_model(&ar);
3403 ar = lsq(reglist2, dset, OLS, OPT_A);
3404
3405 if (iter > 1) {
3406 diff = fabs(100 * (ar.ess - ess) / ess);
3407 }
3408
3409 ess = ar.ess;
3410 pprintf(prn, "%16c%3d %20f ", ' ', iter, ess);
3411
3412 if (iter > 1) {
3413 pprintf(prn, "%13.3f\n", diff);
3414 } else {
3415 pputc(prn, '\n');
3416 }
3417 } /* end "ess changing" loop */
3418
3419 for (i=0; i<=reglist[0]; i++) {
3420 ar.list[i] = reglist[i];
3421 }
3422 if (cpos > 0) {
3423 ar.ifc = 1;
3424 }
3425 if (ar.ifc) {
3426 if (!gretl_model_get_int(&ar, "effconst")) {
3427 ar.dfn -= 1;
3428 }
3429 }
3430 ar.ci = AR;
3431
3432 /* special computation of fitted values */
3433 for (t=t1; t<=t2; t++) {
3434 xx = 0.0;
3435 for (j=2; j<=reglist[0]; j++) {
3436 xx += ar.coeff[j-2] * dset->Z[reglist[j]][t];
3437 }
3438 ar.uhat[t] = dset->Z[yno][t] - xx;
3439 for (j=1; j<=arlist[0]; j++) {
3440 if (t - t1 >= arlist[j]) {
3441 k = arlist[j];
3442 xx += rhomod.coeff[j-1] * ar.uhat[t-k];
3443 }
3444 }
3445 ar.yhat[t] = xx;
3446 }
3447
3448 for (t=t1; t<=t2; t++) {
3449 ar.uhat[t] = dset->Z[yno][t] - ar.yhat[t];
3450 }
3451
3452 ar.rsq = gretl_corr_rsq(ar.t1, ar.t2, dset->Z[reglist[1]], ar.yhat);
3453 ar.adjrsq = 1.0 - ((1.0 - ar.rsq) * (ar.nobs - 1.0) / ar.dfd);
3454
3455 /* special computation of TSS */
3456 xx = gretl_mean(ar.t1, ar.t2, dset->Z[ryno]);
3457 tss = 0.0;
3458 for (t=ar.t1; t<=ar.t2; t++) {
3459 tss += (dset->Z[ryno][t] - xx) * (dset->Z[ryno][t] - xx);
3460 }
3461 if (ar.dfn == 0) {
3462 ar.fstt = NADBL;
3463 } else {
3464 ar.fstt = ar.dfd * (tss - ar.ess) / (ar.dfn * ar.ess);
3465 }
3466 ar.lnL = NADBL;
3467 mle_criteria(&ar, 0);
3468 ar.dw = dwstat(pmax, &ar, dset);
3469 ar.rho = rhohat(pmax, ar.t1, ar.t2, ar.uhat);
3470
3471 dataset_drop_last_variables(dset, nvadd);
3472
3473 if (gretl_model_add_arinfo(&ar, pmax)) {
3474 ar.errcode = E_ALLOC;
3475 } else {
3476 double *y = dset->Z[reglist[1]];
3477
3478 for (i=0; i<=arlist[0]; i++) {
3479 ar.arinfo->arlist[i] = arlist[i];
3480 if (i >= 1) {
3481 ar.arinfo->rho[i-1] = rhomod.coeff[i-1];
3482 ar.arinfo->sderr[i-1] = rhomod.sderr[i-1];
3483 }
3484 }
3485 ar.ybar = gretl_mean(ar.t1, ar.t2, y);
3486 ar.sdy = gretl_stddev(ar.t1, ar.t2, y);
3487 }
3488 clear_model(&rhomod);
3489
3490 if (gretl_model_get_int(&ar, "maxlag") < pmax) {
3491 gretl_model_set_int(&ar, "maxlag", pmax);
3492 }
3493 set_model_id(&ar, opt);
3494
3495 bailout:
3496
3497 free(reglist);
3498 free(reglist2);
3499 free(rholist);
3500 free(arlist);
3501
3502 return ar;
3503 }
3504
modelvar_iszero(const MODEL * pmod,const double * x)3505 static int modelvar_iszero (const MODEL *pmod, const double *x)
3506 {
3507 int t;
3508
3509 for (t=pmod->t1; t<=pmod->t2; t++) {
3510 if (!model_missing(pmod, t) && floatneq(x[t], 0.0)) {
3511 return 0;
3512 }
3513 }
3514
3515 return 1;
3516 }
3517
3518 /* From the position of the first regressor to end of list, omits
3519 variables with all-zero observations and repacks the rest of
3520 them. If the regression in question is not just an auxiliary
3521 one (OPT_A), records the list of dropped variables, if any,
3522 on @pmod.
3523 */
3524
omitzero(MODEL * pmod,const DATASET * dset,gretlopt opt)3525 static void omitzero (MODEL *pmod, const DATASET *dset,
3526 gretlopt opt)
3527 {
3528 int *zlist = NULL;
3529 int i, v, offset;
3530 double x = 0.0;
3531
3532 offset = (pmod->ci == WLS)? 3 : 2;
3533
3534 if (!(opt & OPT_A)) {
3535 zlist = gretl_null_list();
3536 }
3537
3538 for (i=pmod->list[0]; i>=offset; i--) {
3539 v = pmod->list[i];
3540 if (modelvar_iszero(pmod, dset->Z[v])) {
3541 if (zlist != NULL) {
3542 gretl_list_append_term(&zlist, v);
3543 }
3544 fprintf(stderr, "Deleting var %d (%s) at list pos %d: all zero\n",
3545 v, dset->varname[v], i);
3546 gretl_list_delete_at_pos(pmod->list, i);
3547 }
3548 }
3549
3550 if (pmod->nwt) {
3551 int t, wtzero;
3552
3553 for (i=pmod->list[0]; i>=offset; i--) {
3554 v = pmod->list[i];
3555 wtzero = 1;
3556 for (t=pmod->t1; t<=pmod->t2; t++) {
3557 x = dset->Z[v][t] * dset->Z[pmod->nwt][t];
3558 if (!model_missing(pmod, t) && floatneq(x, 0.0)) {
3559 wtzero = 0;
3560 break;
3561 }
3562 }
3563 if (wtzero) {
3564 if (zlist != NULL) {
3565 gretl_list_append_term(&zlist, v);
3566 }
3567 gretl_list_delete_at_pos(pmod->list, i);
3568 }
3569 }
3570 }
3571
3572 if (zlist != NULL) {
3573 if (zlist[0] > 0) {
3574 gretl_model_set_list_as_data(pmod, "zerolist", zlist);
3575 } else {
3576 free(zlist);
3577 }
3578 }
3579 }
3580
3581 /* model_lags: attempt to detect presence of a lagged dependent
3582 variable among the regressors -- if found, return its position
3583 in @list, otherwise return 0. Also write into *pmax the maximum
3584 lag among the regressors.
3585
3586 FIXME: use inside a function, when the name of the dependent
3587 variable may be changed if it was a function argument?
3588 */
3589
model_lags(const int * list,const DATASET * dset,int * pmax)3590 static int model_lags (const int *list, const DATASET *dset, int *pmax)
3591 {
3592 int xno, yno = list[1];
3593 int ilag, maxlag = 0;
3594 int i, ret = 0;
3595
3596 for (i=2; i<=list[0]; i++) {
3597 xno = list[i];
3598 if (xno == LISTSEP) {
3599 break;
3600 }
3601 ilag = series_get_lag(dset, xno);
3602 if (ilag > 0) {
3603 if (ret == 0 && series_get_parent_id(dset, xno) == yno) {
3604 ret = i;
3605 }
3606 if (ilag > maxlag) {
3607 maxlag = ilag;
3608 }
3609 }
3610 }
3611
3612 *pmax = maxlag;
3613
3614 return ret;
3615 }
3616
arch_test_print_simple(int order,double LM,double pval,gretlopt opt,PRN * prn)3617 static void arch_test_print_simple (int order, double LM,
3618 double pval, gretlopt opt,
3619 PRN *prn)
3620 {
3621 if (opt & OPT_M) {
3622 /* multi-equation system */
3623 pprintf(prn, "%s: TR^2 = %f,\n", _("Test statistic"), LM);
3624 } else {
3625 pprintf(prn, "\n%s %d\n", _("Test for ARCH of order"), order);
3626 pprintf(prn, "\n%s: TR^2 = %f,\n", _("Test statistic"), LM);
3627 }
3628 pprintf(prn, "%s = P(%s(%d) > %f) = %f\n\n",
3629 _("with p-value"), _("Chi-square"),
3630 order, LM, pval);
3631 }
3632
print_arch_regression(const gretl_matrix * b,const gretl_matrix * V,int T,int order,gretlopt opt,PRN * prn)3633 static void print_arch_regression (const gretl_matrix *b,
3634 const gretl_matrix *V,
3635 int T, int order,
3636 gretlopt opt, PRN *prn)
3637 {
3638 int i, k = order + 1;
3639 double *se = malloc(k * sizeof *se);
3640 char **names;
3641
3642 names = strings_array_new_with_length(k, 16);
3643
3644 if (se != NULL && names != NULL) {
3645 if (!(opt & OPT_M)) {
3646 /* not multi-equation */
3647 pputc(prn, '\n');
3648 pprintf(prn, _("Test for ARCH of order %d"), order);
3649 pputs(prn, "\n\n");
3650 }
3651
3652 for (i=0; i<k; i++) {
3653 se[i] = sqrt(gretl_matrix_get(V, i, i));
3654 sprintf(names[i], "alpha(%d)", i);
3655 }
3656
3657 /* is a df correction wanted here? */
3658 print_coeffs(b->val, se, (const char **) names,
3659 k, T - k, ARCH, prn);
3660 }
3661
3662 free(se);
3663 strings_array_free(names, k);
3664 }
3665
3666 static void
arch_test_save_or_print(const gretl_matrix * b,const gretl_matrix * V,int T,int order,double rsq,MODEL * pmod,gretlopt opt,PRN * prn)3667 arch_test_save_or_print (const gretl_matrix *b, const gretl_matrix *V,
3668 int T, int order, double rsq, MODEL *pmod,
3669 gretlopt opt, PRN *prn)
3670 {
3671 double LM = T * rsq;
3672 double pv = chisq_cdf_comp(order, LM);
3673
3674 record_test_result(LM, pv);
3675
3676 if (!(opt & OPT_I)) {
3677 if (V != NULL) {
3678 /* V will be NULL if --quiet is in force */
3679 print_arch_regression(b, V, T, order, opt, prn);
3680 }
3681 if (opt & OPT_Q) {
3682 arch_test_print_simple(order, LM, pv, opt, prn);
3683 }
3684 }
3685
3686 if ((opt & OPT_S) || !(opt & OPT_Q)) {
3687 ModelTest *test = model_test_new(GRETL_TEST_ARCH);
3688
3689 if (test != NULL) {
3690 int heading = (V == NULL);
3691
3692 model_test_set_teststat(test, GRETL_STAT_LM);
3693 model_test_set_order(test, order);
3694 model_test_set_dfn(test, order);
3695 model_test_set_value(test, LM);
3696 model_test_set_pvalue(test, pv);
3697
3698 if (!(opt & OPT_Q)) {
3699 gretl_model_test_print_direct(test, heading, prn);
3700 }
3701
3702 if (pmod != NULL && (opt & OPT_S)) {
3703 maybe_add_test_to_model(pmod, test);
3704 } else {
3705 model_test_free(test);
3706 }
3707 }
3708 }
3709 }
3710
real_arch_test(const double * u,int T,int order,MODEL * pmod,gretlopt opt,PRN * prn)3711 static int real_arch_test (const double *u, int T, int order,
3712 MODEL *pmod, gretlopt opt,
3713 PRN *prn)
3714 {
3715 gretl_matrix *X = NULL;
3716 gretl_matrix *y = NULL;
3717 gretl_matrix *b = NULL;
3718 gretl_matrix *V = NULL;
3719 int i, k, s, t;
3720 double x, s2, rsq;
3721 double *ps2 = NULL;
3722 int err = 0;
3723
3724 gretl_error_clear();
3725
3726 if (order < 1 || order > T - 1) {
3727 gretl_errmsg_sprintf(_("Invalid lag order for arch (%d)"), order);
3728 return E_DATA;
3729 }
3730
3731 T -= order;
3732 k = order + 1;
3733
3734 X = gretl_matrix_alloc(T, k);
3735 y = gretl_column_vector_alloc(T);
3736 b = gretl_column_vector_alloc(k);
3737
3738 if (X == NULL || y == NULL || b == NULL) {
3739 gretl_matrix_free(X);
3740 gretl_matrix_free(y);
3741 gretl_matrix_free(b);
3742 return E_ALLOC;
3743 }
3744
3745 if (!(opt & OPT_Q)) {
3746 V = gretl_matrix_alloc(k, k);
3747 if (V == NULL) {
3748 err = E_ALLOC;
3749 goto bailout;
3750 }
3751 ps2 = &s2;
3752 }
3753
3754 /* fill out the matrices with squared residuals
3755 and lags of same */
3756
3757 for (i=0; i<k; i++) {
3758 for (t=0; t<T; t++) {
3759 s = t + order;
3760 if (i == 0) {
3761 x = u[s];
3762 gretl_vector_set(y, t, x * x);
3763 gretl_matrix_set(X, t, i, 1.0);
3764 } else {
3765 x = u[s - i];
3766 gretl_matrix_set(X, t, i, x * x);
3767 }
3768 }
3769 }
3770
3771 err = gretl_matrix_ols(y, X, b, V, NULL, ps2);
3772
3773 if (!err) {
3774 rsq = gretl_matrix_r_squared(y, X, b, &err);
3775 }
3776
3777 if (!err) {
3778 arch_test_save_or_print(b, V, T, order, rsq, pmod,
3779 opt, prn);
3780 }
3781
3782 bailout:
3783
3784 gretl_matrix_free(X);
3785 gretl_matrix_free(y);
3786 gretl_matrix_free(b);
3787 gretl_matrix_free(V);
3788
3789 return err;
3790 }
3791
3792 /**
3793 * arch_test:
3794 * @pmod: model to be tested.
3795 * @order: lag order for ARCH process.
3796 * @dset: dataset struct.
3797 * @opt: if flags include OPT_S, save test results to model;
3798 * if OPT_Q, be less verbose; if OPT_I, be silent.
3799 * @prn: gretl printing struct.
3800 *
3801 * Tests @pmod for AutoRegressive Conditional Heteroskedasticity.
3802 *
3803 * Returns: 0 on success, non-zero code on error.
3804 */
3805
arch_test(MODEL * pmod,int order,const DATASET * dset,gretlopt opt,PRN * prn)3806 int arch_test (MODEL *pmod, int order, const DATASET *dset,
3807 gretlopt opt, PRN *prn)
3808 {
3809 int err;
3810
3811 if (pmod->missmask != NULL) {
3812 err = E_MISSDATA;
3813 } else {
3814 const double *u = pmod->uhat + pmod->t1;
3815
3816 if (order == 0) {
3817 /* use data frequency as default lag order */
3818 order = dset->pd;
3819 }
3820
3821 err = real_arch_test(u, pmod->nobs, order, pmod,
3822 opt, prn);
3823 }
3824
3825 return err;
3826 }
3827
array_arch_test(const double * u,int n,int order,gretlopt opt,PRN * prn)3828 int array_arch_test (const double *u, int n, int order,
3829 gretlopt opt, PRN *prn)
3830 {
3831 return real_arch_test(u, n, order, NULL, opt, prn);
3832 }
3833
3834 /**
3835 * arch_model:
3836 * @list: dependent variable plus list of regressors.
3837 * @order: lag order for ARCH process.
3838 * @dset: dataset struct.
3839 * @opt: may contain OPT_O to print covariance matrix.
3840 *
3841 * Estimate the model given in @list via weighted least squares,
3842 * with the weights based on the predicted error variances from
3843 * an auxiliary regression of the squared residuals on their lagged
3844 * values.
3845 *
3846 * Returns: a #MODEL struct, containing the estimates.
3847 */
3848
arch_model(const int * list,int order,DATASET * dset,gretlopt opt)3849 MODEL arch_model (const int *list, int order, DATASET *dset,
3850 gretlopt opt)
3851 {
3852 MODEL amod;
3853 int *wlist = NULL, *alist = NULL;
3854 int T = sample_size(dset);
3855 int oldv = dset->v;
3856 int i, t, nwt, k, n = dset->n;
3857 double *a = NULL;
3858 double *se = NULL;
3859 double xx;
3860
3861 gretl_error_clear();
3862 gretl_model_init(&amod, dset);
3863
3864 if (order == 0) {
3865 /* use data frequency as default lag order */
3866 order = dset->pd;
3867 }
3868
3869 if (order < 1 || order > T - list[0]) {
3870 amod.errcode = E_UNSPEC;
3871 gretl_errmsg_sprintf(_("Invalid lag order for arch (%d)"), order);
3872 return amod;
3873 }
3874
3875 if (dataset_add_series(dset, order + 1)) {
3876 amod.errcode = E_ALLOC;
3877 } else {
3878 alist = gretl_list_new(order + 2);
3879 if (alist == NULL) {
3880 amod.errcode = E_ALLOC;
3881 }
3882 }
3883
3884 if (amod.errcode) {
3885 goto bailout;
3886 }
3887
3888 /* start list for aux regression */
3889 alist[1] = dset->v - order - 1;
3890 alist[2] = 0;
3891
3892 /* run initial OLS and get squared residuals */
3893 amod = lsq(list, dset, OLS, OPT_A | OPT_M);
3894 if (amod.errcode) {
3895 goto bailout;
3896 }
3897
3898 k = dset->v - order - 1;
3899 strcpy(dset->varname[k], "utsq");
3900 for (t=0; t<n; t++) {
3901 dset->Z[k][t] = NADBL;
3902 }
3903 for (t=amod.t1; t<=amod.t2; t++) {
3904 xx = amod.uhat[t];
3905 dset->Z[k][t] = xx * xx;
3906 }
3907 /* also lags of squared resids */
3908 for (i=1; i<=order; i++) {
3909 k = dset->v - order + i - 1;
3910 alist[i+2] = k;
3911 sprintf(dset->varname[k], "utsq_%d", i);
3912 for (t=0; t<n; t++) {
3913 dset->Z[k][t] = NADBL;
3914 }
3915 for (t=amod.t1+i; t<=amod.t2; t++) {
3916 dset->Z[k][t] = dset->Z[alist[1]][t-i];
3917 }
3918 }
3919
3920 /* run auxiliary regression */
3921 clear_model(&amod);
3922 amod = lsq(alist, dset, OLS, OPT_A);
3923 if (amod.errcode) {
3924 goto bailout;
3925 }
3926
3927 /* steal the coefficients for reference */
3928 a = amod.coeff;
3929 amod.coeff = NULL;
3930 se = amod.sderr;
3931 amod.sderr = NULL;
3932
3933 /* do weighted estimation */
3934 wlist = gretl_list_new(list[0] + 1);
3935
3936 if (wlist == NULL) {
3937 amod.errcode = E_ALLOC;
3938 } else {
3939 /* construct the weight variable */
3940 nwt = wlist[1] = dset->v - 1;
3941 strcpy(dset->varname[nwt], "1/sigma");
3942
3943 for (i=2; i<=wlist[0]; i++) {
3944 wlist[i] = list[i-1];
3945 }
3946
3947 k = dset->v - order - 1;
3948
3949 for (t=amod.t1; t<=amod.t2; t++) {
3950 xx = amod.yhat[t];
3951 if (xx <= 0.0) {
3952 xx = dset->Z[k][t];
3953 }
3954 dset->Z[nwt][t] = 1.0 / xx; /* FIXME is this right? */
3955 }
3956
3957 clear_model(&amod);
3958 amod = lsq(wlist, dset, WLS, OPT_NONE);
3959 amod.ci = ARCH;
3960
3961 if (!amod.errcode) {
3962 gretl_model_set_int(&amod, "arch_order", order);
3963 gretl_model_set_data(&amod, "arch_coeff", a,
3964 GRETL_TYPE_DOUBLE_ARRAY,
3965 (order + 1) * sizeof *a);
3966 gretl_model_set_data(&amod, "arch_sderr", se,
3967 GRETL_TYPE_DOUBLE_ARRAY,
3968 (order + 1) * sizeof *se);
3969 }
3970 }
3971
3972 bailout:
3973
3974 if (alist != NULL) free(alist);
3975 if (wlist != NULL) free(wlist);
3976
3977 dataset_drop_last_variables(dset, dset->v - oldv);
3978
3979 return amod;
3980 }
3981
3982 /**
3983 * lad_model:
3984 * @list: dependent variable plus list of regressors.
3985 * @dset: dataset struct.
3986 * @opt: may include OPT_Q for quiet operation.
3987 *
3988 * Estimate the model given in @list using the method of Least
3989 * Absolute Deviation (LAD).
3990 *
3991 * Returns: a #MODEL struct, containing the estimates.
3992 */
3993
lad_model(const int * list,DATASET * dset,gretlopt opt)3994 MODEL lad_model (const int *list, DATASET *dset, gretlopt opt)
3995 {
3996 MODEL mod;
3997 int (*lad_driver) (MODEL *, DATASET *, gretlopt);
3998
3999 /* run an initial OLS to "set the model up" and check for errors.
4000 the lad_driver function will overwrite the coefficients etc.
4001 */
4002
4003 mod = lsq(list, dset, OLS, OPT_A);
4004
4005 if (mod.errcode) {
4006 return mod;
4007 }
4008
4009 lad_driver = get_plugin_function("lad_driver");
4010
4011 if (lad_driver == NULL) {
4012 mod.errcode = E_FOPEN;
4013 return mod;
4014 }
4015
4016 (*lad_driver) (&mod, dset, opt);
4017 set_model_id(&mod, opt);
4018
4019 return mod;
4020 }
4021
4022 /**
4023 * quantreg:
4024 * @tau: vector containing one or more quantile values, in the range
4025 * 0.01 to 0.99.
4026 * @list: model specification: dependent var and regressors.
4027 * @dset: dataset struct.
4028 * @opt: may contain OPT_R for robust standard errors,
4029 * OPT_I to produce confidence intervals.
4030 * @prn: gretl printing struct.
4031 *
4032 * Estimate the model given in @list using the method of
4033 * quantile regression.
4034 *
4035 * Returns: a #MODEL struct, containing the estimates.
4036 */
4037
quantreg(const gretl_matrix * tau,const int * list,DATASET * dset,gretlopt opt,PRN * prn)4038 MODEL quantreg (const gretl_matrix *tau, const int *list,
4039 DATASET *dset, gretlopt opt, PRN *prn)
4040 {
4041 MODEL mod;
4042 int (*rq_driver) (const gretl_matrix *, MODEL *, DATASET *,
4043 gretlopt, PRN *);
4044 gretlopt olsopt = (OPT_A | OPT_M);
4045
4046 /* Run an initial OLS to "set the model up" and check for errors.
4047 the driver function will overwrite the coefficients, etc.
4048 For now we make life easier by rejecting within-sample missing
4049 values (OPT_M).
4050 */
4051
4052 if (opt & OPT_R) {
4053 olsopt |= OPT_R;
4054 }
4055
4056 mod = lsq(list, dset, OLS, olsopt);
4057
4058 if (mod.errcode) {
4059 return mod;
4060 }
4061
4062 rq_driver = get_plugin_function("rq_driver");
4063
4064 if (rq_driver == NULL) {
4065 mod.errcode = E_FOPEN;
4066 return mod;
4067 }
4068
4069 (*rq_driver) (tau, &mod, dset, opt, prn);
4070 set_model_id(&mod, opt);
4071
4072 return mod;
4073 }
4074
4075 #ifndef WIN32
4076
gretl_glib_grab_output(const char * prog,char ** sout)4077 static void gretl_glib_grab_output (const char *prog,
4078 char **sout)
4079 {
4080 gchar *argv[] = { (gchar *) prog, NULL };
4081 gboolean ok;
4082
4083 ok = g_spawn_sync(NULL, argv, NULL, G_SPAWN_SEARCH_PATH,
4084 NULL, NULL, sout, NULL,
4085 NULL, NULL);
4086
4087 if (!ok && *sout != NULL) {
4088 g_free(*sout);
4089 *sout = NULL;
4090 }
4091 }
4092
4093 #endif
4094
4095 /*
4096 * get_x12a_maxpd:
4097 *
4098 * Retrieve the highest data frequency handled by X-12-ARIMA,
4099 * which may vary depending on how the program was built.
4100 * This may be relevant when executing the gretl arima
4101 * command with the option to use X-12-ARIMA.
4102 */
4103
get_x12a_maxpd(void)4104 int get_x12a_maxpd (void)
4105 {
4106 static int n;
4107
4108 if (n == 0) {
4109 const char *x12a = gretl_x12_arima();
4110 char *sout = NULL;
4111
4112 #ifdef WIN32
4113 gretl_win32_grab_output(x12a, gretl_dotdir(), &sout);
4114 #else
4115 gretl_glib_grab_output(x12a, &sout);
4116 #endif
4117 if (sout != NULL) {
4118 char *p = strstr(sout, "PSP = ");
4119
4120 if (p != NULL) {
4121 n = atoi(p + 6);
4122 }
4123 free(sout);
4124 }
4125 if (n <= 0) {
4126 n = 12;
4127 }
4128 }
4129
4130 return n;
4131 }
4132
4133 /**
4134 * arma:
4135 * @list: AR and MA orders, dependent variable, and any exogenous
4136 * regressors.
4137 * @pqlags: list giving specific non-seasonal AR/MA lags (or NULL).
4138 * @dset: dataset struct.
4139 * @opt: option flags.
4140 * @PRN: for printing details of iterations (or NULL).
4141 *
4142 * Calculates ARMA estimates. By default the estimator is
4143 * exact ML, implemented via gretl's Kalman filter in
4144 * conjunction with the BFGS maximizer. Other options:
4145 * if @opt includes OPT_L we use L-BFGS-B rather than standard
4146 * BFGS; OPT_C (incompatible with OPT_L) calls for use of
4147 * conditional ML (BHHH algorithm); and OPT_X requests estimation via
4148 * X-12-ARIMA rather than native code.
4149 *
4150 * If @pqlags is non-NULL it should take the form of two
4151 * gretl sub-lists joined by #LISTSEP: the first sub-list holds
4152 * a set of AR lags and the second a list of MA lags. If only
4153 * AR lags are specified, a single list may be given; if only
4154 * MA lags are specified, use #LISTSEP with a null first sub-list.
4155 *
4156 * If the model specification does not include a constant
4157 * this is added automatically, unless @opt includes OPT_N
4158 * ("no constant").
4159 *
4160 * When the estimation method is native exact ML, two (mutually
4161 * exclusive) flags in @opt may be used to control the estimator
4162 * of the covariance matrix: OPT_G specifies use of the outer
4163 * product of the gradient (OPG), while OPT_H specifies use of
4164 * the (numerical) Hessian. If neither of these flags is given, the
4165 * default is to use the Hessian by preference, but to fall back
4166 * to OPG if computation of the numerical Hessian fails. These
4167 * flags are ignored if estimation is not via native exact ML.
4168 *
4169 * Returns: a #MODEL struct, containing the estimates.
4170 */
4171
arma(const int * list,const int * pqlags,DATASET * dset,gretlopt opt,PRN * prn)4172 MODEL arma (const int *list, const int *pqlags,
4173 DATASET *dset, gretlopt opt,
4174 PRN *prn)
4175 {
4176 MODEL (*arma_model) (const int *, const int *,
4177 DATASET *, gretlopt, PRN *);
4178 MODEL (*arma_x12_model) (const int *, const int *,
4179 const DATASET *,
4180 int, gretlopt, PRN *);
4181 MODEL armod;
4182 int err = 0;
4183
4184 gretl_model_init(&armod, dset);
4185
4186 err = incompatible_options(opt, OPT_G | OPT_H);
4187 if (!err) {
4188 err = options_incompatible_with(opt, OPT_X, OPT_R | OPT_S);
4189 }
4190 if (err) {
4191 armod.errcode = err;
4192 return armod;
4193 }
4194
4195 if (opt & OPT_X) {
4196 int pdmax = get_x12a_maxpd();
4197
4198 if ((dset->t2 - dset->t1 + 1) > pdmax * 50) {
4199 gretl_errmsg_sprintf(_("X-12-ARIMA can't handle more than %d observations.\n"
4200 "Please select a smaller sample."), pdmax * 50);
4201 armod.errcode = E_DATA;
4202 return armod;
4203 }
4204
4205 arma_x12_model = get_plugin_function("arma_x12_model");
4206 if (arma_x12_model == NULL) {
4207 err = E_FOPEN;
4208 } else {
4209 armod = (*arma_x12_model) (list, pqlags, dset, pdmax, opt, prn);
4210 }
4211 } else {
4212 arma_model = get_plugin_function("arma_model");
4213 if (arma_model == NULL) {
4214 err = E_FOPEN;
4215 } else {
4216 armod = (*arma_model) (list, pqlags, dset, opt, prn);
4217 }
4218 }
4219
4220 if (err) {
4221 armod.errcode = err;
4222 return armod;
4223 }
4224
4225 set_model_id(&armod, opt);
4226
4227 return armod;
4228 }
4229
4230 /**
4231 * garch:
4232 * @list: dependent variable plus arch and garch orders.
4233 * @dset: dataset struct.
4234 * @opt: can specify robust standard errors and VCV.
4235 * @prn: for printing details of iterations (or NULL).
4236 *
4237 * Calculate GARCH estimates.
4238 *
4239 * Returns: a #MODEL struct, containing the estimates.
4240 */
4241
garch(const int * list,DATASET * dset,gretlopt opt,PRN * prn)4242 MODEL garch (const int *list, DATASET *dset, gretlopt opt,
4243 PRN *prn)
4244 {
4245 MODEL mod;
4246 PRN *myprn;
4247 MODEL (*garch_model) (const int *, DATASET *, PRN *,
4248 gretlopt);
4249
4250 gretl_error_clear();
4251
4252 garch_model = get_plugin_function("garch_model");
4253
4254 if (garch_model == NULL) {
4255 gretl_model_init(&mod, dset);
4256 mod.errcode = E_FOPEN;
4257 return mod;
4258 }
4259
4260 if (opt & OPT_V) {
4261 myprn = prn;
4262 } else {
4263 myprn = NULL;
4264 }
4265
4266 mod = (*garch_model) (list, dset, myprn, opt);
4267 set_model_id(&mod, opt);
4268
4269 return mod;
4270 }
4271
4272 /**
4273 * mp_ols:
4274 * @list: specification of variables to use.
4275 * @dset: dataset struct.
4276 * @opt: maye include OPT_Q for quiet operation.
4277 *
4278 * Estimate an OLS model using multiple-precision arithmetic
4279 * via the GMP library.
4280 *
4281 * Returns: a #MODEL struct, containing the estimates.
4282 */
4283
mp_ols(const int * list,DATASET * dset,gretlopt opt)4284 MODEL mp_ols (const int *list, DATASET *dset, gretlopt opt)
4285 {
4286 int (*mplsq)(const int *, const int *, const int *,
4287 DATASET *, MODEL *,
4288 gretlopt);
4289 MODEL mpmod;
4290
4291 gretl_model_init(&mpmod, dset);
4292
4293 mplsq = get_plugin_function("mplsq");
4294 if (mplsq == NULL) {
4295 mpmod.errcode = 1;
4296 return mpmod;
4297 }
4298
4299 if (gretl_list_has_separator(list)) {
4300 int *base = NULL;
4301 int *poly = NULL;
4302
4303 mpmod.errcode = gretl_list_split_on_separator(list, &base, &poly);
4304 if (mpmod.errcode == 0 && (base == NULL || poly == NULL)) {
4305 mpmod.errcode = E_ARGS;
4306 } else {
4307 mpmod.errcode = (*mplsq)(base, poly, NULL, dset,
4308 &mpmod, OPT_S);
4309 }
4310 free(base);
4311 free(poly);
4312 } else {
4313 mpmod.errcode = (*mplsq)(list, NULL, NULL, dset,
4314 &mpmod, OPT_S);
4315 }
4316
4317 set_model_id(&mpmod, opt);
4318
4319 return mpmod;
4320 }
4321
check_panel_options(gretlopt opt)4322 static int check_panel_options (gretlopt opt)
4323 {
4324 if ((opt & OPT_U) && (opt & OPT_H)) {
4325 /* can't specify random effects + weighted least squares */
4326 return E_BADOPT;
4327 } else if ((opt & OPT_T) && !(opt & OPT_H)) {
4328 /* iterate option requires weighted least squares option */
4329 return E_BADOPT;
4330 } else if ((opt & OPT_N) && !(opt & OPT_U)) {
4331 /* the Nerlove option requires random effects */
4332 return E_BADOPT;
4333 } else if ((opt & OPT_C) && !(opt & OPT_P)) {
4334 /* explicit cluster option only OK with pooled OLS */
4335 return E_BADOPT;
4336 } else if (incompatible_options(opt, OPT_B | OPT_U | OPT_P)) {
4337 /* mutually exclusive estimator requests */
4338 return E_BADOPT;
4339 }
4340
4341 return 0;
4342 }
4343
4344 /**
4345 * panel_model:
4346 * @list: regression list (dependent variable plus independent
4347 * variables).
4348 * @dset: dataset struct.
4349 * @opt: can include OPT_Q (quiet estimation), OPT_U (random
4350 * effects model), OPT_H (weights based on the error variance
4351 * for the respective cross-sectional units), OPT_I (iterate,
4352 * only available in conjunction with OPT_H).
4353 * @prn: printing struct (or NULL).
4354 *
4355 * Calculate estimates for a panel dataset, using fixed
4356 * effects (the default), random effects, or weighted
4357 * least squares based on the respective variances for the
4358 * cross-sectional units.
4359 *
4360 * Returns: a #MODEL struct, containing the estimates.
4361 */
4362
panel_model(const int * list,DATASET * dset,gretlopt opt,PRN * prn)4363 MODEL panel_model (const int *list, DATASET *dset,
4364 gretlopt opt, PRN *prn)
4365 {
4366 MODEL mod;
4367
4368 if (check_panel_options(opt)) {
4369 gretl_model_init(&mod, dset);
4370 mod.errcode = E_BADOPT;
4371 } else if (opt & OPT_H) {
4372 mod = panel_wls_by_unit(list, dset, opt, prn);
4373 } else {
4374 mod = real_panel_model(list, dset, opt, prn);
4375 }
4376
4377 return mod;
4378 }
4379
4380 /**
4381 * ivreg:
4382 * @list: regression list; see the documentation for the "tsls"
4383 * command.
4384 * @dset: dataset struct.
4385 * @opt: option flags.
4386 *
4387 * Calculate IV estimates for a linear model, by default via two-stage
4388 * least squares. The option flags can include OPT_Q for quiet
4389 * operation, and either OPT_L to specify estimation via Limited
4390 * Information Maximum Likelihood or OPT_G for estimation via
4391 * Generalized method of Moments.
4392 *
4393 * Returns: a #MODEL struct, containing the estimates.
4394 */
4395
ivreg(const int * list,DATASET * dset,gretlopt opt)4396 MODEL ivreg (const int *list, DATASET *dset, gretlopt opt)
4397 {
4398 MODEL mod;
4399 int err;
4400
4401 gretl_error_clear();
4402
4403 /* can't have both LIML and GMM options */
4404 err = incompatible_options(opt, OPT_G | OPT_L);
4405
4406 if (!err) {
4407 /* two-step, iterate and weights options are GMM-only */
4408 err = option_prereq_missing(opt, OPT_T | OPT_I | OPT_H, OPT_G);
4409 }
4410
4411 if (err) {
4412 gretl_model_init(&mod, dset);
4413 mod.errcode = err;
4414 return mod;
4415 }
4416
4417 if (opt & OPT_L) {
4418 mod = single_equation_liml(list, dset, opt);
4419 } else if (opt & OPT_G) {
4420 mod = ivreg_via_gmm(list, dset, opt);
4421 } else {
4422 mod = tsls(list, dset, opt);
4423 }
4424
4425 return mod;
4426 }
4427
4428 /**
4429 * dpd_model:
4430 * @list: regression list.
4431 * @laglist: list of specific lags of the dependent variable, or
4432 * NULL.
4433 * @ispec: may contain additional instrument specification.
4434 * @dset: dataset struct.
4435 * @opt: to be hooked up.
4436 * @prn: printing struct.
4437 *
4438 * This is at present a secret function, for testing only.
4439 *
4440 * Returns: a #MODEL struct, containing the estimates.
4441 */
4442
dpd_model(const int * list,const int * laglist,const char * ispec,const DATASET * dset,gretlopt opt,PRN * prn)4443 MODEL dpd_model (const int *list, const int *laglist,
4444 const char *ispec, const DATASET *dset,
4445 gretlopt opt, PRN *prn)
4446 {
4447 MODEL (*dpd_estimate) (const int *, const int *,
4448 const char *, const DATASET *,
4449 gretlopt, PRN *);
4450 MODEL mod;
4451
4452 gretl_model_init(&mod, dset);
4453
4454 dpd_estimate = get_plugin_function("dpd_estimate");
4455 if (dpd_estimate == NULL) {
4456 mod.errcode = 1;
4457 return mod;
4458 }
4459
4460 mod = (*dpd_estimate)(list, laglist, ispec, dset, opt, prn);
4461 set_model_id(&mod, opt);
4462
4463 return mod;
4464 }
4465
4466 /**
4467 * anova:
4468 * @list: must contain the response and treatment variables.
4469 * @dset: dataset struct.
4470 * @opt: unused at present.
4471 * @prn: printing struct.
4472 *
4473 * Does one-way or two-way Analysis of Variance (prints table and F-test).
4474 *
4475 * Returns: 0 on success, non-zero on failure.
4476 */
4477
anova(const int * list,const DATASET * dset,gretlopt opt,PRN * prn)4478 int anova (const int *list, const DATASET *dset,
4479 gretlopt opt, PRN *prn)
4480 {
4481 int (*gretl_anova) (const int *, const DATASET *,
4482 gretlopt, PRN *);
4483
4484 gretl_anova = get_plugin_function("gretl_anova");
4485 if (gretl_anova == NULL) {
4486 return 1;
4487 }
4488
4489 return (*gretl_anova)(list, dset, opt, prn);
4490 }
4491