1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 /* tsls.c - Two-Stage Least Squares for gretl */
21
22 #include "libgretl.h"
23 #include "qr_estimate.h"
24 #include "libset.h"
25 #include "estim_private.h"
26 #include "missing_private.h"
27 #include "matrix_extra.h"
28 #include "tsls.h"
29
30 #define TDEBUG 0
31
tsls_omitzero(int * list,const DATASET * dset,const char * mask)32 static void tsls_omitzero (int *list, const DATASET *dset,
33 const char *mask)
34 {
35 int i, v, t, allzero;
36
37 for (i=list[0]; i>1; i--) {
38 v = list[i];
39 allzero = 1;
40 for (t=dset->t1; t<=dset->t2; t++) {
41 if (mask != NULL && mask[t - dset->t1]) {
42 continue;
43 }
44 if (dset->Z[v][t] != 0.0) {
45 allzero = 0;
46 break;
47 }
48 }
49 if (allzero) {
50 gretl_list_delete_at_pos(list, i);
51 }
52 }
53 }
54
55 /**
56 * tsls_free_data:
57 * @pmod: model to operate on.
58 *
59 * Frees the first-stage data attached to a two-stage least squares
60 * model that was estimated using %OPT_E (part of a system).
61 */
62
tsls_free_data(const MODEL * pmod)63 void tsls_free_data (const MODEL *pmod)
64 {
65 gretl_matrix *endog = gretl_model_get_data(pmod, "endog");
66 double **X = gretl_model_get_data(pmod, "tslsX");
67 int i, m = 0;
68
69 if (endog != NULL && X != NULL) {
70 for (i=0; i<pmod->ncoeff; i++) {
71 if (endog->val[i] != 0) m++;
72 }
73 for (i=0; i<m; i++) {
74 free(X[i]);
75 }
76 }
77 }
78
79 /* when tsls is called for initial estimation of an equation
80 within a system of equations, save the first-stage fitted
81 values on the model under the key "tslsX".
82 */
83
84 static int
tsls_save_data(MODEL * pmod,const int * hatlist,const int * exolist,DATASET * dset)85 tsls_save_data (MODEL *pmod, const int *hatlist, const int *exolist,
86 DATASET *dset)
87 {
88 gretl_matrix *endog = NULL;
89 double **X = NULL;
90 size_t Xsize, xs_old;
91 int recycle_X = 0;
92 int recycle_e = 0;
93 int i, v, err = 0;
94
95 Xsize = hatlist[0] * sizeof *X;
96
97 /* re-use old pointers if applicable */
98
99 X = gretl_model_get_data_full(pmod, "tslsX", NULL, NULL, &xs_old);
100 if (X != NULL) {
101 if (Xsize == xs_old) {
102 recycle_X = 1;
103 } else {
104 tsls_free_data(pmod);
105 gretl_model_detach_data_item(pmod, "tslsX");
106 free(X);
107 X = NULL;
108 }
109 }
110
111 if (!recycle_X && Xsize > 0) {
112 X = malloc(Xsize);
113 if (X == NULL) {
114 return E_ALLOC;
115 }
116 }
117
118 endog = gretl_model_get_data(pmod, "endog");
119 if (endog != NULL) {
120 if (gretl_vector_get_length(endog) == pmod->ncoeff) {
121 recycle_e = 1;
122 } else {
123 gretl_model_detach_data_item(pmod, "endog");
124 gretl_matrix_free(endog);
125 endog = NULL;
126 }
127 }
128
129 if (!recycle_e) {
130 endog = gretl_matrix_alloc(pmod->ncoeff, 1);
131 if (endog == NULL) {
132 free(X);
133 return E_ALLOC;
134 }
135 }
136
137 /* steal the appropriate columns from Z */
138 for (i=1; i<=hatlist[0]; i++) {
139 v = hatlist[i];
140 X[i-1] = dset->Z[v];
141 dset->Z[v] = NULL;
142 }
143
144 for (i=0; i<pmod->ncoeff; i++) {
145 v = pmod->list[i+2];
146 endog->val[i] = !in_gretl_list(exolist, v);
147 }
148
149 /* now attach X and endog to the model */
150 if (!recycle_X && X != NULL) {
151 gretl_model_set_data(pmod, "tslsX", X, GRETL_TYPE_DOUBLE_ARRAY,
152 Xsize);
153 }
154 if (!recycle_e && endog != NULL) {
155 gretl_model_set_matrix_as_data(pmod, "endog", endog);
156 }
157
158 return err;
159 }
160
add_tsls_var(int * list,int v,gretlopt opt)161 static void add_tsls_var (int *list, int v, gretlopt opt)
162 {
163 int pos = gretl_list_separator_position(list);
164 int i;
165
166 if (opt & OPT_T) {
167 /* add as instrument */
168 list[0] += 1;
169 list[list[0]] = v;
170 } else if (opt & OPT_B) {
171 /* add as exogenous regressor */
172 list[0] += 2;
173 for (i=list[0]-1; i>pos; i--) {
174 list[i] = list[i-1];
175 }
176 list[pos] = v;
177 list[list[0]] = v;
178 } else {
179 /* add as endogenous regressor */
180 list[0] += 1;
181 for (i=list[0]; i>pos; i--) {
182 list[i] = list[i-1];
183 }
184 list[pos] = v;
185 }
186 }
187
delete_tsls_var(int * list,int v,gretlopt opt)188 static void delete_tsls_var (int *list, int v, gretlopt opt)
189 {
190 int pos = gretl_list_separator_position(list);
191 int i, j;
192
193 if (opt & OPT_T) {
194 /* delete as instrument */
195 for (i=pos+1; i<=list[0]; i++) {
196 if (list[i] == v) {
197 for (j=i; j<list[0]; j++) {
198 list[j] = list[j + 1];
199 }
200 list[0] -= 1;
201 break;
202 }
203 }
204 } else if (opt & OPT_B) {
205 /* delete from both sub-lists */
206 for (i=2; i<=list[0]; i++) {
207 if (list[i] == v) {
208 for (j=i; j<list[0]; j++) {
209 list[j] = list[j + 1];
210 }
211 list[0] -= 1;
212 }
213 }
214 } else {
215 /* delete from regressor list */
216 for (i=2; i<pos; i++) {
217 if (list[i] == v) {
218 for (j=i; j<list[0]; j++) {
219 list[j] = list[j + 1];
220 }
221 list[0] -= 1;
222 break;
223 }
224 }
225 }
226 }
227
in_ivreg_list(const int * list,int v,gretlopt opt)228 static int in_ivreg_list (const int *list, int v, gretlopt opt)
229 {
230 int pos = gretl_list_separator_position(list);
231 int i, imin, imax;
232
233 if (pos == 0) {
234 return 0;
235 }
236
237 if (opt & OPT_T) {
238 imin = pos + 1;
239 imax = list[0];
240 } else if (opt & OPT_B) {
241 imin = 2;
242 imax = list[0];
243 } else {
244 imin = 2;
245 imax = pos - 1;
246 }
247
248 for (i=imin; i<=imax; i++) {
249 if (list[i] == v) {
250 return i;
251 }
252 }
253
254 return 0;
255 }
256
257 /**
258 * ivreg_list_omit:
259 * @orig: list specifying original IV model.
260 * @drop: list of variables to be omitted.
261 * @opt: may include %OPT_T (omit variable only as instrument)
262 * or %OPT_B (omit both as regressor and instrument). The default
263 * is to omit (only) as regressor.
264 * @err: pointer to receive error code.
265 *
266 * Creates a new IVREG specification list, by first copying @orig
267 * then deleting from the copy the variables found in @drop.
268 *
269 * Returns: the new, reduced list or %NULL on error.
270 */
271
272 int *
ivreg_list_omit(const int * orig,const int * drop,gretlopt opt,int * err)273 ivreg_list_omit (const int *orig, const int *drop, gretlopt opt, int *err)
274 {
275 int *newlist;
276 int i;
277
278 *err = incompatible_options(opt, OPT_T | OPT_B);
279 if (*err) {
280 return NULL;
281 }
282
283 newlist = gretl_list_copy(orig);
284
285 for (i=1; i<=drop[0]; i++) {
286 if (in_ivreg_list(orig, drop[i], opt)) {
287 delete_tsls_var(newlist, drop[i], opt);
288 } else {
289 *err = E_UNSPEC;
290 }
291 }
292
293 if (*err) {
294 free(newlist);
295 newlist = NULL;
296 }
297
298 return newlist;
299 }
300
301 /**
302 * ivreg_list_add:
303 * @orig: list specifying original IV model.
304 * @add: list of variables to be added.
305 * @opt: may include %OPT_T (add variable only as instrument)
306 * or %OPT_B (add both as regressor and instrument). The default
307 * is to add as an endogenous regressor.
308 * @err: pointer to receive error code.
309 *
310 * Creates a new IVREG specification list, by copying @orig
311 * and adding the variables found in @add.
312 *
313 * Returns: the new, augmented list or %NULL on error.
314 */
315
316 int *
ivreg_list_add(const int * orig,const int * add,gretlopt opt,int * err)317 ivreg_list_add (const int *orig, const int *add, gretlopt opt, int *err)
318 {
319 int norig = orig[0];
320 int nadd = add[0];
321 int *newlist;
322 int i;
323
324 *err = incompatible_options(opt, OPT_T | OPT_B);
325 if (*err) {
326 return NULL;
327 }
328
329 if (opt & OPT_B) {
330 nadd *= 2;
331 }
332
333 newlist = gretl_list_new(norig + nadd);
334
335 for (i=0; i<=norig; i++) {
336 newlist[i] = orig[i];
337 }
338
339 for (i=1; i<=add[0]; i++) {
340 if (in_ivreg_list(orig, add[i], opt)) {
341 *err = E_ADDDUP;
342 } else {
343 add_tsls_var(newlist, add[i], opt);
344 }
345 }
346
347 if (*err) {
348 free(newlist);
349 newlist = NULL;
350 }
351
352 return newlist;
353 }
354
355 /* prepend constant to list of instruments */
356
zlist_prepend_const(int ** pzlist)357 static int zlist_prepend_const (int **pzlist)
358 {
359 int clist[2] = {1, 0};
360
361 return gretl_list_insert_list(pzlist, clist, 1);
362 }
363
364 /**
365 * tsls_make_endolist:
366 * @reglist: regression specification.
367 * @instlist: predetermined and exogenous variables.
368 * @addconst: location to receive notification of whether
369 * the constant was added to the content of @instlist,
370 * or NULL.
371 * @err: location to receive error code.
372 *
373 * Determines which variables in @reglist, when checked against the
374 * predetermined and exogenous vars in @instlist, need to be
375 * instrumented, and populates the returned list accordingly.
376 *
377 * If @addconst is non-NULL and the constant is found in @reglist
378 * but not in @instlist, then it is added to @instlist and this
379 * is flagged by writing 1 into @addconst.
380 *
381 * Returns: allocated list of variables to be instrumented or
382 * NULL if there are no such variables.
383 */
384
tsls_make_endolist(const int * reglist,int ** instlist,int * addconst,int * err)385 int *tsls_make_endolist (const int *reglist, int **instlist,
386 int *addconst, int *err)
387 {
388 int *endolist = NULL;
389 int i, vi;
390
391 for (i=2; i<=reglist[0]; i++) {
392 vi = reglist[i];
393 if (!in_gretl_list(*instlist, vi)) {
394 if (vi == 0) {
395 /* found const in reglist but not instlist:
396 needs fixing -- or is this debatable? */
397 if (addconst != NULL) {
398 *addconst = 1;
399 }
400 } else {
401 endolist = gretl_list_append_term(&endolist, vi);
402 if (endolist == NULL) {
403 *err = E_ALLOC;
404 return NULL;
405 }
406 }
407 }
408 }
409
410 if (addconst != NULL && *addconst) {
411 /* add constant to list of instruments */
412 *err = zlist_prepend_const(instlist);
413 }
414
415 return endolist;
416 }
417
418 /* fill the residuals matrix for tsls likelihood calculation */
419
fill_E_matrix(gretl_matrix * E,const MODEL * pmod,const int * endolist,const int * reglist,const int * instlist,const char * missmask,const DATASET * dset)420 static int fill_E_matrix (gretl_matrix *E,
421 const MODEL *pmod,
422 const int *endolist,
423 const int *reglist,
424 const int *instlist,
425 const char *missmask,
426 const DATASET *dset)
427 {
428 gretl_matrix *Y = NULL;
429 gretl_matrix *X = NULL;
430 int ny, *ylist;
431 int i, err = 0;
432
433 ny = endolist[0] + 1;
434 ylist = gretl_list_new(ny);
435 ylist[1] = reglist[1];
436 for (i=2; i<=ny; i++) {
437 ylist[i] = endolist[i-1];
438 }
439
440 /* dependent variable plus endogenous regressors on LHS */
441 Y = gretl_matrix_data_subset_masked(ylist, dset, pmod->t1, pmod->t2,
442 missmask, &err);
443 if (!err) {
444 /* all instruments on RHS */
445 X = gretl_matrix_data_subset_masked(instlist, dset, pmod->t1, pmod->t2,
446 missmask, &err);
447 }
448
449 if (!err) {
450 /* populate the residuals matrix, E */
451 err = gretl_matrix_multi_ols(Y, X, NULL, E, NULL);
452 }
453
454 gretl_matrix_free(Y);
455 gretl_matrix_free(X);
456 free(ylist);
457
458 return err;
459 }
460
461 /* calculate the maximized log-likelihood for a just-identified
462 model */
463
tsls_loglik(MODEL * pmod,const int * endolist,const int * reglist,const int * instlist,const char * missmask,DATASET * dset)464 static int tsls_loglik (MODEL *pmod,
465 const int *endolist,
466 const int *reglist,
467 const int *instlist,
468 const char *missmask,
469 DATASET *dset)
470 {
471 gretl_matrix *E, *S;
472 int T = pmod->nobs;
473 int k = 1 + endolist[0];
474 int err = 0;
475
476 pmod->lnL = NADBL;
477
478 E = gretl_matrix_alloc(T, k);
479 S = gretl_matrix_alloc(k, k);
480
481 if (E == NULL || S == NULL) {
482 err = E_ALLOC;
483 } else {
484 err = fill_E_matrix(E, pmod, endolist, reglist, instlist,
485 missmask, dset);
486 }
487
488 if (!err) {
489 err = gretl_matrix_multiply_mod(E, GRETL_MOD_TRANSPOSE,
490 E, GRETL_MOD_NONE,
491 S, GRETL_MOD_NONE);
492 }
493
494 if (!err) {
495 double ldet = gretl_matrix_log_determinant(S, &err);
496
497 if (err) {
498 pmod->lnL = NADBL;
499 } else {
500 /* Davidson and MacKinnon, ETM, p. 538, taking
501 kappa = 1 for the just-identified case
502 */
503 pmod->lnL = -(T / 2.0) * (LN_2_PI + ldet);
504 }
505 }
506
507 mle_criteria(pmod, 0);
508
509 gretl_matrix_free(E);
510 gretl_matrix_free(S);
511
512 return err;
513 }
514
515 /* perform the Sargan overidentification test for an IV model */
516
517 static int
ivreg_sargan_test(MODEL * pmod,int Orank,int * instlist,DATASET * dset)518 ivreg_sargan_test (MODEL *pmod, int Orank, int *instlist,
519 DATASET *dset)
520 {
521 int t1 = pmod->t1;
522 int t2 = pmod->t2;
523 int ninst = instlist[0];
524 int i, t, err = 0;
525 MODEL smod;
526 int *OT_list = NULL;
527 int nv = dset->v;
528
529 if (Orank == 0) {
530 return 0;
531 }
532
533 if (pmod->list[0] == 2 && pmod->list[2] == 0) {
534 /* degenerate model with const only */
535 return 0;
536 }
537
538 err = dataset_add_series(dset, 1);
539 if (err) {
540 return err;
541 }
542
543 /* add the @pmod residual to the dataset */
544 for (t=t1; t<=t2; t++) {
545 dset->Z[nv][t] = pmod->uhat[t];
546 }
547
548 OT_list = gretl_list_new(ninst + 1);
549 if (OT_list == NULL) {
550 dataset_drop_last_variables(dset, 1);
551 return E_ALLOC;
552 }
553
554 OT_list[1] = nv;
555 for (i=2; i<=OT_list[0]; i++) {
556 OT_list[i] = instlist[i-1];
557 }
558
559 #if TDEBUG
560 fprintf(stderr, "running regression for Sargan test\n");
561 #endif
562
563 /* OLS: the dependent variable is the residual from @pmod;
564 the regressors are the (full set of) instruments
565 */
566 smod = lsq(OT_list, dset, OLS, OPT_A);
567
568 #if TDEBUG
569 if (!smod.errcode) {
570 PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, NULL);
571
572 strcpy(dset->varname[nv], "IV uhat");
573 printmodel(&smod, dset, OPT_NONE, prn);
574 gretl_print_destroy(prn);
575 }
576 #endif
577
578 if (smod.errcode) {
579 fprintf(stderr, "ivreg_sargan_test: smod.errcode = %d\n", smod.errcode);
580 err = smod.errcode;
581 } else {
582 ModelTest *test = model_test_new(GRETL_TEST_SARGAN);
583 double OTest = smod.rsq * smod.nobs;
584
585 #if TDEBUG
586 fprintf(stderr, "Otest = %g * %d = %g\n", smod.rsq, smod.nobs,
587 smod.rsq * smod.nobs);
588 #endif
589
590 if (test != NULL) {
591 model_test_set_teststat(test, GRETL_STAT_LM);
592 model_test_set_dfn(test, Orank);
593 model_test_set_value(test, OTest);
594 model_test_set_pvalue(test, chisq_cdf_comp(Orank, OTest));
595 maybe_add_test_to_model(pmod, test);
596 }
597 }
598
599 clear_model(&smod);
600 dataset_drop_last_variables(dset, 1);
601 free(OT_list);
602
603 return err;
604 }
605
606 #define HTDBG 0
607
608 /* Hausman test via the regression method */
609
610 static int
tsls_hausman_test(MODEL * tmod,int * reglist,int * hatlist,DATASET * dset)611 tsls_hausman_test (MODEL *tmod, int *reglist, int *hatlist,
612 DATASET *dset)
613 {
614 MODEL hmod;
615 double URSS;
616 int *HT_list = NULL;
617 int t, df;
618 int nv = dset->v;
619 int ku = 0;
620 int err = 0;
621
622 #if HTDBG
623 int nobs1;
624 PRN *dbgprn = NULL;
625 dbgprn = gretl_print_new(GRETL_PRINT_STDOUT, NULL);
626 #endif
627
628 /* create regression list for unrestricted model by appending
629 @hatlist to @reglist */
630 HT_list = gretl_list_add(reglist, hatlist, &err);
631 if (err) {
632 if (err == E_NOADD) {
633 /* more of a no-op than an error */
634 err = 0;
635 } else {
636 fprintf(stderr, "tsls_hausman_test: gretl_list_add: err = %d\n", err);
637 }
638 return err;
639 }
640
641 #if TDEBUG
642 fprintf(stderr, "running regression for Hausman test, 1\n");
643 #endif
644
645 /* estimate the unrestricted model */
646 hmod = lsq(HT_list, dset, OLS, OPT_A);
647 if (hmod.errcode) {
648 fprintf(stderr, "tsls_hausman_test: hmod.errcode (U) = %d\n", hmod.errcode);
649 err = hmod.errcode;
650 goto bailout;
651 }
652
653 #if HTDBG
654 pprintf(dbgprn, "Hausman: unrestricted model (df = %d)\n", hmod.dfd);
655 printmodel(&hmod, dset, OPT_NONE, dbgprn);
656 nobs1 = hmod.nobs;
657 #endif
658
659 if (hmod.dfd == 0) {
660 /* perfect fit, can't do test */
661 goto bailout;
662 }
663
664 /* record U-model info */
665 URSS = hmod.ess;
666 ku = hmod.ncoeff;
667 #if HTDBG
668 pprintf(dbgprn, "URSS = %g (ku = %d)\n", URSS, ku);
669 #endif
670
671 /* add fitted values from unrestricted model to dataset */
672 err = dataset_add_series(dset, 1);
673 if (err) {
674 goto bailout;
675 } else {
676 for (t=hmod.t1; t<=hmod.t2; t++) {
677 dset->Z[nv][t] = hmod.yhat[t];
678 }
679 }
680
681 clear_model(&hmod);
682
683 /* redefine HT_list: dep var is the fitted value from the
684 U-model, regressors are from @reglist */
685 free(HT_list);
686 HT_list = gretl_list_copy(reglist);
687 HT_list[1] = nv;
688
689 #if TDEBUG
690 fprintf(stderr, "running regression for Hausman test, 2\n");
691 #endif
692
693 /* regress the U fitted values on the regressors of the restricted
694 model (so the sum of squares equals RRSS-URSS)
695 */
696 hmod = lsq(HT_list, dset, OLS, OPT_A);
697
698 #if HTDBG
699 strcpy(dset->varname[dset->v - 1], "Ufitvals");
700 pprintf(dbgprn, "Hausman: aux regression\n", hmod.dfd);
701 printmodel(&hmod, dset, OPT_NONE, dbgprn);
702 pprintf(dbgprn, "smpl1 = %d, smpl2 = %d\n", nobs1, hmod.nobs);
703 pprintf(dbgprn, "k1 = %d, k2 = %d\n", hmod.ncoeff, ku);
704 #endif
705
706 if (hmod.errcode) {
707 fprintf(stderr, "tsls_hausman_test: hmod.errcode (D) = %d\n", hmod.errcode);
708 err = hmod.errcode;
709 } else if ((df = ku - hmod.ncoeff) > 0) {
710 /* Degrees of freedom for the Hausman test need not be equal to the
711 number of added regressors, since some of them may not really be
712 endogenous.
713 */
714 double DRSS = hmod.ess;
715 double HTest = (DRSS/URSS) * hmod.nobs;
716 ModelTest *test = model_test_new(GRETL_TEST_IV_HAUSMAN);
717
718 #if HTDBG
719 pprintf(dbgprn, "df = %d - %d = %d\n", ku, hmod.ncoeff, df);
720 pprintf(dbgprn, "DRSS = (RRSS - URSS) = %g, URSS = %g\n", DRSS, URSS);
721 pprintf(dbgprn, "Htest = %g [%.4f]\n", HTest, chisq_cdf_comp(df, HTest));
722 #endif
723
724 if (test != NULL) {
725 model_test_set_teststat(test, GRETL_STAT_WALD_CHISQ);
726 model_test_set_dfn(test, df);
727 model_test_set_value(test, HTest);
728 model_test_set_pvalue(test, chisq_cdf_comp(df, HTest));
729 maybe_add_test_to_model(tmod, test);
730 }
731 }
732
733 bailout:
734
735 #if HTDBG
736 gretl_print_destroy(dbgprn);
737 #endif
738
739 dataset_drop_last_variables(dset, dset->v - nv);
740 clear_model(&hmod);
741 free(HT_list);
742
743 return err;
744 }
745
746 /* form matrix of instruments and perform QR decomposition
747 of this matrix; return Q */
748
tsls_Q(int * instlist,int ** pdlist,const DATASET * dset,char * mask,int * err)749 static gretl_matrix *tsls_Q (int *instlist, int **pdlist,
750 const DATASET *dset, char *mask,
751 int *err)
752 {
753 gretl_matrix *Q = NULL;
754 gretl_matrix *R = NULL;
755 int rank, ndrop = 0;
756 int *droplist = NULL;
757 double test;
758 int i, j, k;
759
760 Q = gretl_matrix_data_subset_masked(instlist, dset,
761 dset->t1, dset->t2,
762 mask, err);
763 if (*err) {
764 return NULL;
765 }
766
767 k = gretl_matrix_cols(Q);
768
769 if (k > Q->rows) {
770 /* can't do QR decomp! */
771 gretl_errmsg_sprintf(_("Number of instruments (%d) exceeds the "
772 "number of observations (%d)"),
773 k, Q->rows);
774 *err = E_DF;
775 goto bailout;
776 }
777
778 R = gretl_matrix_alloc(k, k);
779 if (R == NULL) {
780 *err = E_ALLOC;
781 goto bailout;
782 }
783
784 *err = gretl_matrix_QR_decomp(Q, R);
785 if (*err) {
786 goto bailout;
787 }
788
789 rank = gretl_check_QR_rank(R, err, NULL);
790
791 #if TDEBUG
792 fprintf(stderr, "tsls_Q: k = Q->cols = %d, rank = %d\n", k, rank);
793 if (rank == 0) {
794 gretl_matrix_print(R, "R");
795 }
796 #endif
797
798 if (*err) {
799 goto bailout;
800 } else if (rank < k) {
801 fprintf(stderr, "tsls_Q: k = %d, rank = %d\n", k, rank);
802 ndrop = k - rank;
803 }
804
805 if (ndrop > 0) {
806 droplist = gretl_list_new(ndrop);
807 if (droplist != NULL) {
808 droplist[0] = 0;
809 }
810
811 j = 1;
812 for (i=0; i<k; i++) {
813 test = gretl_matrix_get(R, i, i);
814 if (fabs(test) < R_DIAG_MIN) {
815 if (droplist != NULL) {
816 droplist[0] += 1;
817 droplist[droplist[0]] = instlist[j];
818 }
819 fprintf(stderr, "tsls_Q: dropping redundant instrument %d (%s)\n",
820 instlist[j], dset->varname[instlist[j]]);
821 gretl_list_delete_at_pos(instlist, j--);
822 }
823 j++;
824 }
825
826 k = instlist[0];
827 gretl_matrix_free(Q);
828 Q = gretl_matrix_data_subset_masked(instlist, dset,
829 dset->t1, dset->t2,
830 mask, err);
831 if (!*err) {
832 R = gretl_matrix_reuse(R, k, k);
833 *err = gretl_matrix_QR_decomp(Q, R);
834 }
835 }
836
837 bailout:
838
839 gretl_matrix_free(R);
840
841 if (*err) {
842 free(droplist);
843 gretl_matrix_free(Q);
844 Q = NULL;
845 } else {
846 *pdlist = droplist;
847 }
848
849 return Q;
850 }
851
tsls_form_xhat(gretl_matrix * Q,gretl_matrix * g,DATASET * dset,int v0,int v1,const char * mask)852 static int tsls_form_xhat (gretl_matrix *Q, gretl_matrix *g,
853 DATASET *dset, int v0, int v1,
854 const char *mask)
855 {
856 const double *x = dset->Z[v0];
857 double *xhat = dset->Z[v1];
858 int k = gretl_matrix_cols(Q);
859 int allzero = 1;
860 int i, t, s = 0;
861 int err = 0;
862
863 #if TDEBUG > 1
864 fprintf(stderr, "tsls_form_xhat: v0=%d, v1=%d, t1=%d, t2=%d, mask=%p\n",
865 v0, v1, dset->t1, dset->t2, (void *) mask);
866 #endif
867
868 /* form g = Q'x */
869 for (i=0; i<k; i++) {
870 g->val[i] = 0.0;
871 s = 0;
872 for (t=dset->t1; t<=dset->t2; t++) {
873 if (mask != NULL && mask[t - dset->t1]) {
874 continue;
875 }
876 g->val[i] += gretl_matrix_get(Q, s++, i) * x[t];
877 }
878 }
879
880 /* form \hat{x} = Qg = QQ'x */
881 s = 0;
882 for (t=dset->t1; t<=dset->t2; t++) {
883 if (mask != NULL && mask[t - dset->t1]) {
884 xhat[t] = NADBL;
885 continue;
886 }
887 xhat[t] = 0.0;
888 for (i=0; i<k; i++) {
889 xhat[t] += gretl_matrix_get(Q, s, i) * g->val[i];
890 }
891 if (xhat[t] != 0) {
892 allzero = 0;
893 }
894 s++;
895 }
896
897 if (allzero) {
898 gretl_errmsg_sprintf(_("The first-stage fitted values for %s are all zero"),
899 dset->varname[v0]);
900 err = E_DATA;
901 } else {
902 /* name the fitted series according to the original */
903 strcpy(dset->varname[v1], "h_");
904 strncat(dset->varname[v1], dset->varname[v0], VNAMELEN - 3);
905 }
906
907 return err;
908 }
909
tsls_residuals(MODEL * pmod,const int * reglist,const DATASET * dset,gretlopt opt)910 static void tsls_residuals (MODEL *pmod, const int *reglist,
911 const DATASET *dset,
912 gretlopt opt)
913 {
914 int yno = reglist[1];
915 double yh, sigma0 = pmod->sigma;
916 int i, t;
917
918 pmod->ess = 0.0;
919
920 for (t=pmod->t1; t<=pmod->t2; t++) {
921 if (model_missing(pmod, t)) {
922 continue;
923 }
924 yh = 0.0;
925 for (i=0; i<pmod->ncoeff; i++) {
926 yh += pmod->coeff[i] * dset->Z[reglist[i+2]][t];
927 }
928 pmod->yhat[t] = yh;
929 pmod->uhat[t] = dset->Z[yno][t] - yh;
930 pmod->ess += pmod->uhat[t] * pmod->uhat[t];
931 }
932
933 if (pmod->ess <= 0.0) {
934 pmod->sigma = 0.0;
935 } else {
936 int den = (opt & OPT_N)? pmod->nobs : pmod->dfd;
937
938 pmod->sigma = sqrt(pmod->ess / den);
939 }
940
941 if (sigma0 > 0.0) {
942 double corr = pmod->sigma / sigma0;
943
944 for (i=0; i<pmod->ncoeff; i++) {
945 pmod->sderr[i] *= corr;
946 }
947 }
948 }
949
tsls_extra_stats(MODEL * pmod,int yno,int overid,gretlopt opt,const DATASET * dset)950 static void tsls_extra_stats (MODEL *pmod, int yno, int overid,
951 gretlopt opt, const DATASET *dset)
952 {
953 double r;
954
955 /* squared correlation between y and yhat */
956 pmod->rsq = gretl_corr_rsq(pmod->t1, pmod->t2, dset->Z[yno], pmod->yhat);
957
958 r = 1 - pmod->rsq;
959 pmod->adjrsq = 1.0 - (r * (pmod->nobs - 1) / pmod->dfd);
960
961 pmod->fstt = pmod->chisq = NADBL;
962
963 if (pmod->ncoeff > 1) {
964 if ((opt & OPT_N) || (overid && pmod->ncoeff == 2)) {
965 pmod->chisq = wald_omit_chisq(NULL, pmod);
966 } else {
967 pmod->fstt = wald_omit_F(NULL, pmod);
968 }
969 }
970
971 /* tsls is not an ML estimator unless it's exactly identified,
972 and in that case we require a special calculation */
973 pmod->lnL = NADBL;
974 pmod->criterion[C_AIC] = NADBL;
975 pmod->criterion[C_BIC] = NADBL;
976 pmod->criterion[C_HQC] = NADBL;
977
978 if (dataset_is_time_series(dset) && !model_has_missing_obs(pmod)) {
979 /* time series, no missing obs within sample range */
980 pmod->rho = rhohat(1, pmod->t1, pmod->t2, pmod->uhat);
981 pmod->dw = dwstat(1, pmod, dset);
982 } else {
983 pmod->rho = pmod->dw = NADBL;
984 }
985 }
986
tsls_recreate_full_list(MODEL * pmod,const int * reglist,const int * instlist)987 static void tsls_recreate_full_list (MODEL *pmod, const int *reglist,
988 const int *instlist)
989 {
990 int *full_list;
991
992 if (instlist != NULL && instlist[0] > 0) {
993 full_list = gretl_lists_join_with_separator(reglist, instlist);
994 } else {
995 full_list = gretl_list_copy(reglist);
996 }
997
998 if (full_list == NULL) {
999 pmod->errcode = E_ALLOC;
1000 } else {
1001 free(pmod->list);
1002 pmod->list = full_list;
1003 }
1004 }
1005
replace_list_element(int * list,int targ,int repl)1006 static void replace_list_element (int *list, int targ, int repl)
1007 {
1008 int i;
1009
1010 for (i=1; i<=list[0]; i++) {
1011 if (list[i] == targ) {
1012 list[i] = repl;
1013 break;
1014 }
1015 }
1016 }
1017
reglist_remove_redundant_vars(const MODEL * tmod,int * s2list,int * reglist,int * endolist,int * hatlist)1018 static int reglist_remove_redundant_vars (const MODEL *tmod,
1019 int *s2list,
1020 int *reglist,
1021 int *endolist,
1022 int *hatlist)
1023 {
1024 int *dlist = gretl_model_get_list(tmod, "droplist");
1025 int i, pos;
1026
1027 if (dlist == NULL) {
1028 return 1;
1029 }
1030
1031 for (i=1; i<=dlist[0]; i++) {
1032 pos = in_gretl_list(s2list, dlist[i]);
1033 if (pos > 1) {
1034 gretl_list_delete_at_pos(reglist, pos);
1035 }
1036 if (endolist != NULL) {
1037 /* note: if endolist is NULL, so is hatlist */
1038 pos = in_gretl_list(hatlist, dlist[i]);
1039 if (pos > 1) {
1040 /* First replace dlist[i] with the ID of the original
1041 regressor from @endolist, in place of the "hatlist"
1042 variable (which will be deleted when the model is
1043 returned), so that the printout of regressors that are
1044 dropped due to exact collinearity will show the
1045 appropriate names.
1046
1047 Second, delete the redundant term from both endolist
1048 and hatlist, so that subsequent calculations will not
1049 get messed up.
1050 */
1051 dlist[i] = endolist[pos];
1052 gretl_list_delete_at_pos(endolist, pos);
1053 gretl_list_delete_at_pos(hatlist, pos);
1054 }
1055 }
1056 }
1057
1058 return 0;
1059 }
1060
1061 /* Stock-Yogo: compute test statistic for weakness of instruments.
1062 This is the Cragg-Donald statistic: the minimum eigenvalue of the
1063 matrix defined below (which reduces to the first-stage F-statistic
1064 when there is just one endogenous regressor). See Stock and Yogo,
1065 "Testing for Weak Instruments in Linear IV Regressions" (originally
1066 NBER Technical Working Paper 284; revised February 2003), at
1067 http://ksghome.harvard.edu/~JStock/pdf/rfa_6.pdf
1068 */
1069
1070 static int
compute_stock_yogo(MODEL * pmod,const int * endolist,const int * instlist,const int * hatlist,const DATASET * dset)1071 compute_stock_yogo (MODEL *pmod, const int *endolist,
1072 const int *instlist, const int *hatlist,
1073 const DATASET *dset)
1074 {
1075 const double **dZ = (const double **) dset->Z;
1076 gretl_matrix_block *B = NULL;
1077 gretl_matrix_block *B2 = NULL;
1078 gretl_matrix *G, *S, *Y;
1079 gretl_matrix *Ya, *YPY;
1080 gretl_matrix *X, *Z, *E = NULL;
1081 int T = pmod->nobs;
1082 int n = endolist[0];
1083 int K1 = pmod->ncoeff - n;
1084 int K2 = instlist[0] - K1;
1085 int i, s, vi, vj, t;
1086 int err = 0;
1087
1088 B = gretl_matrix_block_new(&G, n, n,
1089 &S, n, n,
1090 &Y, T, n,
1091 &Ya, T, n,
1092 &YPY, n, n,
1093 NULL);
1094 if (B == NULL) {
1095 return E_ALLOC;
1096 }
1097
1098 #if TDEBUG
1099 fprintf(stderr, "stock_yogo: pmod->ncoeff=%d, n=%d, K1=%d, K2=%d\n",
1100 pmod->ncoeff, n, K1, K2);
1101 printlist(endolist, "endolist, in stock-yogo");
1102 #endif
1103
1104 if (K1 > 0) {
1105 /* we have some exogenous regressors */
1106 gretl_matrix *M;
1107 int j, ix = 0, iz = 0;
1108
1109 B2 = gretl_matrix_block_new(&X, T, K1,
1110 &Z, T, K2,
1111 &E, T, K2,
1112 NULL);
1113 if (B2 == NULL) {
1114 gretl_matrix_block_destroy(B);
1115 return E_ALLOC;
1116 }
1117
1118 /* form the X and Z matrices */
1119 for (i=1; i<=instlist[0]; i++) {
1120 vi = instlist[i];
1121 if (in_gretl_list(pmod->list, vi)) {
1122 /* included exogenous var -> X */
1123 M = X;
1124 j = ix++;
1125 } else {
1126 /* excluded exogenous var -> Z */
1127 M = Z;
1128 j = iz++;
1129 }
1130 s = 0;
1131 for (t=pmod->t1; t<=pmod->t2; t++) {
1132 if (!na(pmod->uhat[t])) {
1133 gretl_matrix_set(M, s++, j, dZ[vi][t]);
1134 }
1135 }
1136 }
1137 }
1138
1139 /* form Y matrix */
1140 for (i=0; i<n; i++) {
1141 vi = endolist[i+1];
1142 s = 0;
1143 for (t=pmod->t1; t<=pmod->t2; t++) {
1144 if (!na(pmod->uhat[t])) {
1145 gretl_matrix_set(Y, s++, i, dZ[vi][t]);
1146 }
1147 }
1148 }
1149
1150 if (K1 == 0) {
1151 /* form Y-hat matrix in Ya */
1152 for (i=0; i<n; i++) {
1153 vi = hatlist[i+1];
1154 s = 0;
1155 for (t=pmod->t1; t<=pmod->t2; t++) {
1156 if (!na(pmod->uhat[t])) {
1157 gretl_matrix_set(Ya, s++, i, dZ[vi][t]);
1158 }
1159 }
1160 }
1161 } else {
1162 int bdim = (K1 > K2)? K1 : K2;
1163 gretl_matrix *b;
1164
1165 /* the leading dimension of b is the number of regressors */
1166 b = gretl_matrix_alloc(bdim, K2);
1167 if (b == NULL) {
1168 err = E_ALLOC;
1169 }
1170
1171 if (!err) {
1172 /* partial X out of Y: Y <- M_x Y */
1173 gretl_matrix_reuse(b, K1, n);
1174 gretl_matrix_reuse(E, -1, n);
1175 err = gretl_matrix_multi_ols(Y, X, b, E, NULL);
1176 gretl_matrix_copy_values(Y, E);
1177 }
1178
1179 if (!err) {
1180 /* partial X out of Z: Z <- M_x Z */
1181 gretl_matrix_reuse(b, K1, K2);
1182 gretl_matrix_reuse(E, -1, K2);
1183 err = gretl_matrix_multi_ols(Z, X, b, E, NULL);
1184 gretl_matrix_copy_values(Z, E);
1185 }
1186
1187 if (!err) {
1188 /* form projection P_z Y, in Ya */
1189 gretl_matrix_reuse(b, K2, n);
1190 gretl_matrix_reuse(E, -1, n);
1191 err = gretl_matrix_multi_ols(Y, Z, b, E, NULL);
1192 gretl_matrix_copy_values(Ya, Y);
1193 gretl_matrix_subtract_from(Ya, E);
1194 }
1195
1196 gretl_matrix_free(b);
1197 }
1198
1199 if (!err) {
1200 /* form Y' P_z Y */
1201 err = gretl_matrix_multiply_mod(Y, GRETL_MOD_TRANSPOSE,
1202 Ya, GRETL_MOD_NONE,
1203 YPY, GRETL_MOD_NONE);
1204 }
1205
1206 /* now write first-stage residuals into Ya */
1207 for (i=0; i<n && !err; i++) {
1208 vi = endolist[i+1];
1209 vj = hatlist[i+1];
1210 s = 0;
1211 for (t=pmod->t1; t<=pmod->t2; t++) {
1212 if (!na(pmod->uhat[t])) {
1213 gretl_matrix_set(Ya, s++, i, dZ[vi][t] - dZ[vj][t]);
1214 }
1215 }
1216 }
1217
1218 if (!err) {
1219 /* form S = \hat{\Sigma}_{vv} */
1220 err = gretl_matrix_multiply_mod(Y, GRETL_MOD_TRANSPOSE,
1221 Ya, GRETL_MOD_NONE,
1222 S, GRETL_MOD_NONE);
1223 if (!err) {
1224 gretl_matrix_xtr_symmetric(S);
1225 gretl_matrix_divide_by_scalar(S, T - K1 - K2);
1226 for (i=0; i<S->rows; i++) {
1227 if (gretl_matrix_get(S, i, i) <= 0) {
1228 err = E_SINGULAR;
1229 break;
1230 }
1231 }
1232 }
1233 }
1234
1235 if (!err) {
1236 double rc = gretl_symmetric_matrix_rcond(S, &err);
1237
1238 if (!err && (na(rc) || rc < 1.0e-7)) { /* note: was 1.0e-6 */
1239 #if 0
1240 fprintf(stderr, "Stock-Yogo: rcond(S) = %g\n", rc);
1241 #endif
1242 err = E_SINGULAR;
1243 }
1244 }
1245
1246 if (!err) {
1247 /* S^{-1/2}: invert S and Cholesky-decompose */
1248 err = gretl_invert_symmetric_matrix(S);
1249 if (!err) {
1250 err = gretl_matrix_cholesky_decomp(S);
1251 }
1252 }
1253
1254 if (!err) {
1255 /* finally, form big sandwich */
1256 err = gretl_matrix_qform(S, GRETL_MOD_TRANSPOSE,
1257 YPY, G, GRETL_MOD_NONE);
1258 if (!err) {
1259 gretl_matrix_divide_by_scalar(G, K2);
1260 }
1261 }
1262
1263 if (!err) {
1264 double gmin = gretl_symm_matrix_lambda_min(G, &err);
1265
1266 if (!err) {
1267 gretl_model_set_double(pmod, "gmin", gmin);
1268 gretl_model_set_int(pmod, "n", n);
1269 gretl_model_set_int(pmod, "K2", K2);
1270 }
1271 }
1272
1273 gretl_matrix_block_destroy(B);
1274 gretl_matrix_block_destroy(B2);
1275
1276 #if 0
1277 if (err) {
1278 fprintf(stderr, "compute_stock_yogo: err = %d\n", err);
1279 }
1280 #endif
1281
1282 return err;
1283 }
1284
1285 /* Calculate first-stage F-test as diagnostic for weak instruments.
1286 This variant can only handle one endogenous regressor, but it
1287 supports robust estimation if the model in question uses a robust
1288 covariance estimator.
1289 */
1290
compute_first_stage_F(MODEL * pmod,const int * endolist,const int * reglist,const int * instlist,DATASET * dset,gretlopt opt)1291 static int compute_first_stage_F (MODEL *pmod,
1292 const int *endolist,
1293 const int *reglist,
1294 const int *instlist,
1295 DATASET *dset,
1296 gretlopt opt)
1297 {
1298 MODEL mod1;
1299 int *list1 = NULL;
1300 int *flist = NULL;
1301 double F;
1302 int i, ev, vi;
1303 int err = 0;
1304
1305 gretl_model_init(&mod1, dset);
1306 ev = endolist[1];
1307
1308 /* The regression list, list1, has endogenous regressor ev as the
1309 dependent variable and includes all instruments; the list for
1310 omission, flist, includes all the excluded instruments.
1311 */
1312
1313 list1 = gretl_list_append_term(&list1, ev);
1314 if (list1 == NULL) {
1315 return E_ALLOC;
1316 }
1317
1318 for (i=1; i<=instlist[0]; i++) {
1319 vi = instlist[i];
1320 list1 = gretl_list_append_term(&list1, vi);
1321 if (list1 == NULL) {
1322 err = E_ALLOC;
1323 break;
1324 }
1325 if (!in_gretl_list(reglist, vi)) {
1326 flist = gretl_list_append_term(&flist, vi);
1327 if (flist == NULL) {
1328 err = E_ALLOC;
1329 break;
1330 }
1331 }
1332 }
1333
1334 if (!err) {
1335 gretlopt myopt = (opt & OPT_R)? OPT_R : OPT_NONE;
1336
1337 mod1 = lsq(list1, dset, OLS, myopt | OPT_A);
1338 err = mod1.errcode;
1339 if (err) {
1340 fprintf(stderr, "compute_first_stage F: lsq failed\n");
1341 }
1342 }
1343
1344 if (!err) {
1345 F = wald_omit_F(flist, &mod1);
1346 if (na(F)) {
1347 err = 1;
1348 }
1349 }
1350
1351 if (!err) {
1352 gretl_model_set_double(pmod, "stage1-F", F);
1353 gretl_model_set_int(pmod, "stage1-dfn", flist[0]);
1354 gretl_model_set_int(pmod, "stage1-dfd", mod1.dfd);
1355 if (!(opt & OPT_R)) {
1356 /* flag lookup of Stock-Yogo critical values */
1357 gretl_model_set_double(pmod, "gmin", F);
1358 }
1359 }
1360
1361 clear_model(&mod1);
1362 free(list1);
1363 free(flist);
1364
1365 return err;
1366 }
1367
tsls_adjust_sample(const int * list,DATASET * dset,char ** pmask)1368 static int tsls_adjust_sample (const int *list, DATASET *dset,
1369 char **pmask)
1370 {
1371 int i, t, t1min = dset->t1, t2max = dset->t2;
1372 char *mask = NULL;
1373 int T, vi, missobs;
1374 int err = 0;
1375
1376 /* advance start of sample range to skip missing obs? */
1377 for (t=t1min; t<t2max; t++) {
1378 missobs = 0;
1379 for (i=1; i<=list[0]; i++) {
1380 vi = list[i];
1381 if (vi == 0 || vi == LISTSEP) {
1382 continue;
1383 }
1384 if (na(dset->Z[vi][t])) {
1385 missobs = 1;
1386 break;
1387 }
1388 }
1389 if (missobs) {
1390 t1min++;
1391 } else {
1392 break;
1393 }
1394 }
1395
1396 /* retard end of sample range to skip missing obs? */
1397 for (t=t2max; t>t1min; t--) {
1398 missobs = 0;
1399 for (i=1; i<=list[0]; i++) {
1400 vi = list[i];
1401 if (vi == 0 || vi == LISTSEP) {
1402 continue;
1403 }
1404 if (na(dset->Z[vi][t])) {
1405 missobs = 1;
1406 break;
1407 }
1408 }
1409 if (missobs) {
1410 t2max--;
1411 } else {
1412 break;
1413 }
1414 }
1415
1416 /* count missing values in mid-range of data */
1417 missobs = 0;
1418 for (t=t1min; t<=t2max; t++) {
1419 for (i=1; i<=list[0]; i++) {
1420 vi = list[i];
1421 if (vi == 0 || vi == LISTSEP) {
1422 continue;
1423 }
1424 if (na(dset->Z[vi][t])) {
1425 missobs++;
1426 break;
1427 }
1428 }
1429 }
1430
1431 T = t2max - t1min + 1;
1432
1433 if (missobs == T) {
1434 err = E_MISSDATA;
1435 } else if (missobs > 0) {
1436 mask = calloc(T, 1); /* all NUL bytes */
1437 if (mask == NULL) {
1438 err = E_ALLOC;
1439 } else {
1440 for (t=t1min; t<=t2max; t++) {
1441 for (i=1; i<=list[0]; i++) {
1442 vi = list[i];
1443 if (vi == 0 || vi == LISTSEP) {
1444 continue;
1445 }
1446 if (na(dset->Z[vi][t])) {
1447 mask[t - t1min] = 1;
1448 break;
1449 }
1450 }
1451 }
1452 }
1453 }
1454
1455 #if TDEBUG
1456 fprintf(stderr, "tsls_adjust_sample: t1=%d, t2=%d, missobs=%d, ok obs=%d\n",
1457 t1min, t2max, missobs, t2max - t1min + 1 - missobs);
1458 #endif
1459
1460 if (!err) {
1461 dset->t1 = t1min;
1462 dset->t2 = t2max;
1463 }
1464
1465 *pmask = mask;
1466
1467 return err;
1468 }
1469
1470 /**
1471 * ivreg_process_lists:
1472 * @list: original specification list.
1473 * @reglist: location to receive regression list.
1474 * @instlist: location to receive list of instruments.
1475 *
1476 * Split the incoming list into its two components and perform
1477 * some basic checks; if the checks fail the two created
1478 * lists are destroyed.
1479 *
1480 * Returns: 0, on success, non-zero error code on failure.
1481 */
1482
ivreg_process_lists(const int * list,int ** reglist,int ** instlist)1483 int ivreg_process_lists (const int *list, int **reglist, int **instlist)
1484 {
1485 int *rlist, *zlist;
1486 int i, err;
1487
1488 err = gretl_list_split_on_separator(list, reglist, instlist);
1489 if (err) {
1490 fprintf(stderr, "gretl_list_split_on_separator: failed\n");
1491 return err;
1492 }
1493
1494 rlist = *reglist;
1495 zlist = *instlist;
1496
1497 if (rlist[0] < 2 || zlist == NULL || zlist[0] < 1) {
1498 err = E_ARGS;
1499 } else {
1500 for (i=1; i<=zlist[0]; i++) {
1501 if (zlist[i] == list[1]) {
1502 gretl_errmsg_set(_("You can't use the dependent variable as an instrument"));
1503 err = E_DATA;
1504 break;
1505 }
1506 }
1507 }
1508
1509 if (!err) {
1510 int oid = zlist[0] - rlist[0] + 1;
1511
1512 if (oid < 0 && (in_gretl_list(rlist, 0) > 1) &&
1513 !in_gretl_list(zlist, 0)) {
1514 /* do not treat the constant as if it were endogenous */
1515 err = zlist_prepend_const(instlist);
1516 if (!err) {
1517 zlist = *instlist;
1518 oid++;
1519 }
1520 }
1521 if (!err && oid < 0) {
1522 gretl_errmsg_sprintf(_("The order condition for identification is not satisfied.\n"
1523 "At least %d more instruments are needed."), -oid);
1524 fprintf(stderr, "zlist[0] = %d, rlist[0] = %d\n", zlist[0], rlist[0]);
1525 err = E_DATA;
1526 }
1527 }
1528
1529 if (err) {
1530 free(*reglist);
1531 free(*instlist);
1532 *reglist = NULL;
1533 *instlist = NULL;
1534 }
1535
1536 return err;
1537 }
1538
1539 /**
1540 * tsls:
1541 * @list: dependent variable plus list of regressors.
1542 * @dset: dataset struct.
1543 * @opt: may contain %OPT_R for robust VCV, %OPT_N for no df correction,
1544 * %OPT_A if this is an auxiliary reression, %OPT_E if we're
1545 * estimating one equation within a system, %OPT_H to
1546 * add "hatlist" to model even under %OPT_E, %OPT_X to skip
1547 * the set of tests that are usually calculated. Also %OPT_C
1548 * for clustered standard errors.
1549 *
1550 * Estimate the model given in @list by means of Two-Stage Least
1551 * Squares. If %OPT_E is given, fitted values from the first-stage
1552 * regressions are saved with the model.
1553 *
1554 * Returns: a #MODEL struct, containing the estimates.
1555 */
1556
tsls(const int * list,DATASET * dset,gretlopt opt)1557 MODEL tsls (const int *list, DATASET *dset, gretlopt opt)
1558 {
1559 MODEL tsls;
1560 gretl_matrix *Q = NULL, *g = NULL;
1561 char *missmask = NULL;
1562 int orig_t1 = dset->t1, orig_t2 = dset->t2;
1563 int *reglist = NULL, *instlist = NULL;
1564 int *hatlist = NULL, *s2list = NULL;
1565 int *exolist = NULL, *endolist = NULL;
1566 int *idroplist = NULL;
1567 int addconst = 0;
1568 int nendo = 0, nreg = 0;
1569 int orig_nvar = dset->v;
1570 int sysest = (opt & OPT_E);
1571 int no_tests = (opt & OPT_X);
1572 int OverIdRank = 0;
1573 int i, err = 0;
1574
1575 #if TDEBUG
1576 printlist(list, "tsls: incoming list");
1577 #endif
1578
1579 /* initialize model in case we bail out early on error */
1580 gretl_model_init(&tsls, dset);
1581
1582 gretl_error_clear();
1583
1584 /* reglist: dependent var plus list of regressors
1585 instlist: list of instruments
1586 */
1587 tsls.errcode = ivreg_process_lists(list, ®list, &instlist);
1588 if (tsls.errcode) {
1589 return tsls;
1590 }
1591
1592 /* adjust sample range for missing observations */
1593 err = tsls_adjust_sample(list, dset, &missmask);
1594
1595 if (!err) {
1596 /* allocate second stage regression list */
1597 s2list = gretl_list_copy(reglist);
1598 if (s2list == NULL) {
1599 err = E_ALLOC;
1600 }
1601 }
1602
1603 if (err) {
1604 goto bailout;
1605 }
1606
1607 /* in case we drop any instruments as redundant */
1608 if (sysest) {
1609 exolist = gretl_list_copy(instlist);
1610 }
1611
1612 /* drop any vars that are all zero, and reshuffle the constant
1613 into first position among the independent vars
1614 */
1615 tsls_omitzero(reglist, dset, missmask);
1616 reglist_check_for_const(reglist, dset);
1617 tsls_omitzero(instlist, dset, missmask);
1618
1619 #if TDEBUG
1620 printlist(reglist, "reglist");
1621 printlist(instlist, "instlist");
1622 #endif
1623
1624 /* Determine the list of variables (endolist) for which we need to
1625 obtain fitted values in the first stage. Note that we accept
1626 the condition where there are no such variables (and hence
1627 TSLS is in fact redundant); this may sometimes be required
1628 when tsls is called in the context of system estimation.
1629 */
1630 endolist = tsls_make_endolist(reglist, &instlist, &addconst, &err);
1631 if (err) {
1632 goto bailout;
1633 }
1634
1635 #if TDEBUG
1636 printlist(endolist, "endolist, in tsls");
1637 #endif
1638
1639 if (endolist != NULL) {
1640 nendo = endolist[0];
1641 hatlist = gretl_list_copy(endolist);
1642 if (hatlist == NULL) {
1643 err = E_ALLOC;
1644 }
1645 }
1646
1647 if (!err) {
1648 Q = tsls_Q(instlist, &idroplist, dset, missmask, &err);
1649 }
1650
1651 if (err) {
1652 goto bailout;
1653 }
1654
1655 /* check (again) for order condition: we do this after tsls_Q in case
1656 any instruments get dropped at that stage
1657 */
1658 nreg = reglist[0] - 1;
1659 OverIdRank = instlist[0] - nreg;
1660 if (OverIdRank < 0) {
1661 gretl_errmsg_sprintf(_("The order condition for identification is not satisfied.\n"
1662 "At least %d more instruments are needed."), -OverIdRank);
1663 err = E_DATA;
1664 goto bailout;
1665 }
1666
1667 #if TDEBUG
1668 fprintf(stderr, "nreg = %d, OverIdRank = %d\n", nreg, OverIdRank);
1669 #endif
1670
1671 /* allocate storage for fitted vars (etc.), if needed */
1672 if (nendo > 0) {
1673 err = dataset_add_series(dset, nendo);
1674 if (!err) {
1675 g = gretl_vector_alloc(Q->cols);
1676 if (g == NULL) {
1677 err = E_ALLOC;
1678 }
1679 }
1680 if (err) {
1681 goto bailout;
1682 }
1683 }
1684
1685 /*
1686 Deal with the variables for which instruments are needed: loop
1687 across the list of variables to be instrumented (endolist),
1688 form the fitted values as QQ'x_i, and add these fitted values
1689 into the data array, dset->Z.
1690 */
1691
1692 for (i=0; i<nendo; i++) {
1693 int v0 = endolist[i+1];
1694 int v1 = orig_nvar + i;
1695
1696 /* form xhat_i = QQ'x_i */
1697 err = tsls_form_xhat(Q, g, dset, v0, v1, missmask);
1698 if (err) {
1699 goto bailout;
1700 }
1701
1702 /* substitute v1 into the right place in the second-stage
1703 regression list */
1704 replace_list_element(s2list, v0, v1);
1705
1706 /* and update hatlist */
1707 hatlist[i+1] = v1;
1708 }
1709
1710 /* second-stage regression */
1711 tsls = lsq(s2list, dset, OLS, (sysest)? OPT_Z : OPT_NONE);
1712 if (tsls.errcode) {
1713 fprintf(stderr, "tsls, stage 2: tsls.errcode = %d\n", tsls.errcode);
1714 goto bailout;
1715 }
1716
1717 #if TDEBUG > 1
1718 PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, &err);
1719
1720 printmodel(&tsls, dset, OPT_S, prn);
1721 gretl_print_destroy(prn);
1722 #endif
1723
1724 if (tsls.list[0] < s2list[0]) {
1725 /* Were collinear regressors dropped? If so, adjustments are needed */
1726 OverIdRank += s2list[0] - tsls.list[0];
1727 reglist_remove_redundant_vars(&tsls, s2list, reglist, endolist,
1728 hatlist);
1729 if (endolist != NULL) {
1730 nendo = endolist[0];
1731 }
1732 #if TDEBUG
1733 fprintf(stderr, "tsls: dropped collinear vars\n");
1734 #endif
1735 }
1736
1737 if (instlist != NULL && instlist[0] > 0) {
1738 /* record the number of instruments used */
1739 gretl_model_set_int(&tsls, "ninst", instlist[0]);
1740 }
1741
1742 if (!no_tests) {
1743 if (!sysest || (opt & OPT_H)) {
1744 if (nendo == 1) {
1745 /* handles robust estimation, for single endogenous regressor */
1746 compute_first_stage_F(&tsls, endolist, reglist, instlist,
1747 dset, opt);
1748 } else if (nendo > 0 && !(opt & OPT_R)) {
1749 /* at present, only handles case of i.i.d. errors */
1750 compute_stock_yogo(&tsls, endolist, instlist, hatlist, dset);
1751 }
1752 }
1753 }
1754
1755 if (nendo > 0) {
1756 /* special: we need to use the original RHS vars to compute
1757 residuals and associated statistics */
1758 tsls_residuals(&tsls, reglist, dset, opt);
1759 }
1760
1761 if (opt & OPT_C) {
1762 /* clustered implies robust */
1763 opt |= OPT_R;
1764 }
1765
1766 if (opt & OPT_R) {
1767 /* robust standard errors called for */
1768 qr_tsls_vcv(&tsls, dset, opt);
1769 if (tsls.errcode) {
1770 fprintf(stderr, "qr_tsls_vcv: err = %d\n", tsls.errcode);
1771 goto bailout;
1772 }
1773 }
1774
1775 if (nendo > 0) {
1776 /* compute additional statistics (R^2, F, etc.) */
1777 tsls_extra_stats(&tsls, reglist[1], OverIdRank, opt, dset);
1778 }
1779
1780 if (!sysest && !no_tests) {
1781 if (nendo > 0 && hatlist != NULL) {
1782 tsls_hausman_test(&tsls, reglist, hatlist, dset);
1783 }
1784 if (OverIdRank > 0) {
1785 ivreg_sargan_test(&tsls, OverIdRank, instlist, dset);
1786 } else if (nendo > 0) {
1787 tsls_loglik(&tsls, endolist, reglist, instlist, missmask, dset);
1788 }
1789 }
1790
1791 /* set command code on the model */
1792 tsls.ci = IVREG;
1793
1794 /* write the full 2SLS list (dep. var. and regressors, followed by
1795 instruments, possibly purged of redundant elements) into the model
1796 for future reference
1797 */
1798 tsls_recreate_full_list(&tsls, reglist, instlist);
1799
1800 if (idroplist != NULL) {
1801 gretl_model_set_list_as_data(&tsls, "inst_droplist", idroplist);
1802 }
1803
1804 #if 0
1805 if (addconst) {
1806 gretl_model_set_int(&tsls, "addconst", addconst);
1807 }
1808 #endif
1809
1810 bailout:
1811
1812 gretl_matrix_free(Q);
1813 gretl_matrix_free(g);
1814 free(missmask);
1815
1816 if (err && !tsls.errcode) {
1817 tsls.errcode = err;
1818 }
1819
1820 if (!tsls.errcode && nendo > 0) {
1821 if (sysest) {
1822 /* save first-stage fitted values */
1823 tsls_save_data(&tsls, hatlist, exolist, dset);
1824 }
1825 if (!sysest || (opt & OPT_H)) {
1826 /* save list of endogenous regressors on model */
1827 gretl_model_set_list_as_data(&tsls, "endolist", endolist);
1828 endolist = NULL; /* model takes ownership */
1829 gretl_model_set_list_as_data(&tsls, "instlist", instlist);
1830 instlist = NULL; /* ditto */
1831 }
1832 }
1833
1834 /* delete any first-stage fitted values from dataset */
1835 dataset_drop_last_variables(dset, dset->v - orig_nvar);
1836
1837 free(reglist);
1838 free(instlist);
1839 free(hatlist);
1840 free(exolist);
1841 free(endolist);
1842 free(s2list);
1843
1844 if ((opt & OPT_A) || tsls.errcode) {
1845 model_count_minus(&tsls); /* OK? */
1846 }
1847
1848 if (!tsls.errcode && !(opt & OPT_N)) {
1849 gretl_model_set_int(&tsls, "dfcorr", 1);
1850 }
1851
1852 /* restore original sample range */
1853 dset->t1 = orig_t1;
1854 dset->t2 = orig_t2;
1855
1856 return tsls;
1857 }
1858