1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 /* tsls.c - Two-Stage Least Squares for gretl */
21 
22 #include "libgretl.h"
23 #include "qr_estimate.h"
24 #include "libset.h"
25 #include "estim_private.h"
26 #include "missing_private.h"
27 #include "matrix_extra.h"
28 #include "tsls.h"
29 
30 #define TDEBUG 0
31 
tsls_omitzero(int * list,const DATASET * dset,const char * mask)32 static void tsls_omitzero (int *list, const DATASET *dset,
33 			   const char *mask)
34 {
35     int i, v, t, allzero;
36 
37     for (i=list[0]; i>1; i--) {
38         v = list[i];
39 	allzero = 1;
40 	for (t=dset->t1; t<=dset->t2; t++) {
41 	    if (mask != NULL && mask[t - dset->t1]) {
42 		continue;
43 	    }
44 	    if (dset->Z[v][t] != 0.0) {
45 		allzero = 0;
46 		break;
47 	    }
48 	}
49         if (allzero) {
50 	    gretl_list_delete_at_pos(list, i);
51 	}
52     }
53 }
54 
55 /**
56  * tsls_free_data:
57  * @pmod: model to operate on.
58  *
59  * Frees the first-stage data attached to a two-stage least squares
60  * model that was estimated using %OPT_E (part of a system).
61  */
62 
tsls_free_data(const MODEL * pmod)63 void tsls_free_data (const MODEL *pmod)
64 {
65     gretl_matrix *endog = gretl_model_get_data(pmod, "endog");
66     double **X = gretl_model_get_data(pmod, "tslsX");
67     int i, m = 0;
68 
69     if (endog != NULL && X != NULL) {
70 	for (i=0; i<pmod->ncoeff; i++) {
71 	    if (endog->val[i] != 0) m++;
72 	}
73 	for (i=0; i<m; i++) {
74 	    free(X[i]);
75 	}
76     }
77 }
78 
79 /* when tsls is called for initial estimation of an equation
80    within a system of equations, save the first-stage fitted
81    values on the model under the key "tslsX".
82 */
83 
84 static int
tsls_save_data(MODEL * pmod,const int * hatlist,const int * exolist,DATASET * dset)85 tsls_save_data (MODEL *pmod, const int *hatlist, const int *exolist,
86 		DATASET *dset)
87 {
88     gretl_matrix *endog = NULL;
89     double **X = NULL;
90     size_t Xsize, xs_old;
91     int recycle_X = 0;
92     int recycle_e = 0;
93     int i, v, err = 0;
94 
95     Xsize = hatlist[0] * sizeof *X;
96 
97     /* re-use old pointers if applicable */
98 
99     X = gretl_model_get_data_full(pmod, "tslsX", NULL, NULL, &xs_old);
100     if (X != NULL) {
101 	if (Xsize == xs_old) {
102 	    recycle_X = 1;
103 	} else {
104 	    tsls_free_data(pmod);
105 	    gretl_model_detach_data_item(pmod, "tslsX");
106 	    free(X);
107 	    X = NULL;
108 	}
109     }
110 
111     if (!recycle_X && Xsize > 0) {
112 	X = malloc(Xsize);
113 	if (X == NULL) {
114 	    return E_ALLOC;
115 	}
116     }
117 
118     endog = gretl_model_get_data(pmod, "endog");
119     if (endog != NULL) {
120 	if (gretl_vector_get_length(endog) == pmod->ncoeff) {
121 	    recycle_e = 1;
122 	} else {
123 	    gretl_model_detach_data_item(pmod, "endog");
124 	    gretl_matrix_free(endog);
125 	    endog = NULL;
126 	}
127     }
128 
129     if (!recycle_e) {
130 	endog = gretl_matrix_alloc(pmod->ncoeff, 1);
131 	if (endog == NULL) {
132 	    free(X);
133 	    return E_ALLOC;
134 	}
135     }
136 
137     /* steal the appropriate columns from Z */
138     for (i=1; i<=hatlist[0]; i++) {
139 	v = hatlist[i];
140 	X[i-1] = dset->Z[v];
141 	dset->Z[v] = NULL;
142     }
143 
144     for (i=0; i<pmod->ncoeff; i++) {
145 	v = pmod->list[i+2];
146 	endog->val[i] = !in_gretl_list(exolist, v);
147     }
148 
149      /* now attach X and endog to the model */
150     if (!recycle_X && X != NULL) {
151 	gretl_model_set_data(pmod, "tslsX", X, GRETL_TYPE_DOUBLE_ARRAY,
152 			     Xsize);
153     }
154     if (!recycle_e && endog != NULL) {
155 	gretl_model_set_matrix_as_data(pmod, "endog", endog);
156     }
157 
158     return err;
159 }
160 
add_tsls_var(int * list,int v,gretlopt opt)161 static void add_tsls_var (int *list, int v, gretlopt opt)
162 {
163     int pos = gretl_list_separator_position(list);
164     int i;
165 
166     if (opt & OPT_T) {
167 	/* add as instrument */
168 	list[0] += 1;
169 	list[list[0]] = v;
170     } else if (opt & OPT_B) {
171 	/* add as exogenous regressor */
172 	list[0] += 2;
173 	for (i=list[0]-1; i>pos; i--) {
174 	    list[i] = list[i-1];
175 	}
176 	list[pos] = v;
177 	list[list[0]] = v;
178     } else {
179 	/* add as endogenous regressor */
180 	list[0] += 1;
181 	for (i=list[0]; i>pos; i--) {
182 	    list[i] = list[i-1];
183 	}
184 	list[pos] = v;
185     }
186 }
187 
delete_tsls_var(int * list,int v,gretlopt opt)188 static void delete_tsls_var (int *list, int v, gretlopt opt)
189 {
190     int pos = gretl_list_separator_position(list);
191     int i, j;
192 
193     if (opt & OPT_T) {
194 	/* delete as instrument */
195 	for (i=pos+1; i<=list[0]; i++) {
196 	    if (list[i] == v) {
197 		for (j=i; j<list[0]; j++) {
198 		    list[j] = list[j + 1];
199 		}
200 		list[0] -= 1;
201 		break;
202 	    }
203 	}
204     } else if (opt & OPT_B) {
205 	/* delete from both sub-lists */
206 	for (i=2; i<=list[0]; i++) {
207 	    if (list[i] == v) {
208 		for (j=i; j<list[0]; j++) {
209 		    list[j] = list[j + 1];
210 		}
211 		list[0] -= 1;
212 	    }
213 	}
214     } else {
215 	/* delete from regressor list */
216 	for (i=2; i<pos; i++) {
217 	    if (list[i] == v) {
218 		for (j=i; j<list[0]; j++) {
219 		    list[j] = list[j + 1];
220 		}
221 		list[0] -= 1;
222 		break;
223 	    }
224 	}
225     }
226 }
227 
in_ivreg_list(const int * list,int v,gretlopt opt)228 static int in_ivreg_list (const int *list, int v, gretlopt opt)
229 {
230     int pos = gretl_list_separator_position(list);
231     int i, imin, imax;
232 
233     if (pos == 0) {
234 	return 0;
235     }
236 
237     if (opt & OPT_T) {
238 	imin = pos + 1;
239 	imax = list[0];
240     } else if (opt & OPT_B) {
241 	imin = 2;
242 	imax = list[0];
243     } else {
244 	imin = 2;
245 	imax = pos - 1;
246     }
247 
248     for (i=imin; i<=imax; i++) {
249 	if (list[i] == v) {
250 	    return i;
251 	}
252     }
253 
254     return 0;
255 }
256 
257 /**
258  * ivreg_list_omit:
259  * @orig: list specifying original IV model.
260  * @drop: list of variables to be omitted.
261  * @opt: may include %OPT_T (omit variable only as instrument)
262  * or %OPT_B (omit both as regressor and instrument).  The default
263  * is to omit (only) as regressor.
264  * @err: pointer to receive error code.
265  *
266  * Creates a new IVREG specification list, by first copying @orig
267  * then deleting from the copy the variables found in @drop.
268  *
269  * Returns: the new, reduced list or %NULL on error.
270  */
271 
272 int *
ivreg_list_omit(const int * orig,const int * drop,gretlopt opt,int * err)273 ivreg_list_omit (const int *orig, const int *drop, gretlopt opt, int *err)
274 {
275     int *newlist;
276     int i;
277 
278     *err = incompatible_options(opt, OPT_T | OPT_B);
279     if (*err) {
280 	return NULL;
281     }
282 
283     newlist = gretl_list_copy(orig);
284 
285     for (i=1; i<=drop[0]; i++) {
286 	if (in_ivreg_list(orig, drop[i], opt)) {
287 	    delete_tsls_var(newlist, drop[i], opt);
288 	} else {
289 	    *err = E_UNSPEC;
290 	}
291     }
292 
293     if (*err) {
294 	free(newlist);
295 	newlist = NULL;
296     }
297 
298     return newlist;
299 }
300 
301 /**
302  * ivreg_list_add:
303  * @orig: list specifying original IV model.
304  * @add: list of variables to be added.
305  * @opt: may include %OPT_T (add variable only as instrument)
306  * or %OPT_B (add both as regressor and instrument).  The default
307  * is to add as an endogenous regressor.
308  * @err: pointer to receive error code.
309  *
310  * Creates a new IVREG specification list, by copying @orig
311  * and adding the variables found in @add.
312  *
313  * Returns: the new, augmented list or %NULL on error.
314  */
315 
316 int *
ivreg_list_add(const int * orig,const int * add,gretlopt opt,int * err)317 ivreg_list_add (const int *orig, const int *add, gretlopt opt, int *err)
318 {
319     int norig = orig[0];
320     int nadd = add[0];
321     int *newlist;
322     int i;
323 
324     *err = incompatible_options(opt, OPT_T | OPT_B);
325     if (*err) {
326 	return NULL;
327     }
328 
329     if (opt & OPT_B) {
330 	nadd *= 2;
331     }
332 
333     newlist = gretl_list_new(norig + nadd);
334 
335     for (i=0; i<=norig; i++) {
336 	newlist[i] = orig[i];
337     }
338 
339     for (i=1; i<=add[0]; i++) {
340 	if (in_ivreg_list(orig, add[i], opt)) {
341 	    *err = E_ADDDUP;
342 	} else {
343 	    add_tsls_var(newlist, add[i], opt);
344 	}
345     }
346 
347     if (*err) {
348 	free(newlist);
349 	newlist = NULL;
350     }
351 
352     return newlist;
353 }
354 
355 /* prepend constant to list of instruments */
356 
zlist_prepend_const(int ** pzlist)357 static int zlist_prepend_const (int **pzlist)
358 {
359     int clist[2] = {1, 0};
360 
361     return gretl_list_insert_list(pzlist, clist, 1);
362 }
363 
364 /**
365  * tsls_make_endolist:
366  * @reglist: regression specification.
367  * @instlist: predetermined and exogenous variables.
368  * @addconst: location to receive notification of whether
369  * the constant was added to the content of @instlist,
370  * or NULL.
371  * @err: location to receive error code.
372  *
373  * Determines which variables in @reglist, when checked against the
374  * predetermined and exogenous vars in @instlist, need to be
375  * instrumented, and populates the returned list accordingly.
376  *
377  * If @addconst is non-NULL and the constant is found in @reglist
378  * but not in @instlist, then it is added to @instlist and this
379  * is flagged by writing 1 into @addconst.
380  *
381  * Returns: allocated list of variables to be instrumented or
382  * NULL if there are no such variables.
383  */
384 
tsls_make_endolist(const int * reglist,int ** instlist,int * addconst,int * err)385 int *tsls_make_endolist (const int *reglist, int **instlist,
386 			 int *addconst, int *err)
387 {
388     int *endolist = NULL;
389     int i, vi;
390 
391     for (i=2; i<=reglist[0]; i++) {
392 	vi = reglist[i];
393 	if (!in_gretl_list(*instlist, vi)) {
394 	    if (vi == 0) {
395 		/* found const in reglist but not instlist:
396 		   needs fixing -- or is this debatable? */
397 		if (addconst != NULL) {
398 		    *addconst = 1;
399 		}
400 	    } else {
401 		endolist = gretl_list_append_term(&endolist, vi);
402 		if (endolist == NULL) {
403 		    *err = E_ALLOC;
404 		    return NULL;
405 		}
406 	    }
407 	}
408     }
409 
410     if (addconst != NULL && *addconst) {
411 	/* add constant to list of instruments */
412 	*err = zlist_prepend_const(instlist);
413     }
414 
415     return endolist;
416 }
417 
418 /* fill the residuals matrix for tsls likelihood calculation */
419 
fill_E_matrix(gretl_matrix * E,const MODEL * pmod,const int * endolist,const int * reglist,const int * instlist,const char * missmask,const DATASET * dset)420 static int fill_E_matrix (gretl_matrix *E,
421 			  const MODEL *pmod,
422 			  const int *endolist,
423 			  const int *reglist,
424 			  const int *instlist,
425 			  const char *missmask,
426 			  const DATASET *dset)
427 {
428     gretl_matrix *Y = NULL;
429     gretl_matrix *X = NULL;
430     int ny, *ylist;
431     int i, err = 0;
432 
433     ny = endolist[0] + 1;
434     ylist = gretl_list_new(ny);
435     ylist[1] = reglist[1];
436     for (i=2; i<=ny; i++) {
437 	ylist[i] = endolist[i-1];
438     }
439 
440     /* dependent variable plus endogenous regressors on LHS */
441     Y = gretl_matrix_data_subset_masked(ylist, dset, pmod->t1, pmod->t2,
442 					missmask, &err);
443     if (!err) {
444 	/* all instruments on RHS */
445 	X = gretl_matrix_data_subset_masked(instlist, dset, pmod->t1, pmod->t2,
446 					    missmask, &err);
447     }
448 
449     if (!err) {
450 	/* populate the residuals matrix, E */
451 	err = gretl_matrix_multi_ols(Y, X, NULL, E, NULL);
452     }
453 
454     gretl_matrix_free(Y);
455     gretl_matrix_free(X);
456     free(ylist);
457 
458     return err;
459 }
460 
461 /* calculate the maximized log-likelihood for a just-identified
462    model */
463 
tsls_loglik(MODEL * pmod,const int * endolist,const int * reglist,const int * instlist,const char * missmask,DATASET * dset)464 static int tsls_loglik (MODEL *pmod,
465 			const int *endolist,
466 			const int *reglist,
467 			const int *instlist,
468 			const char *missmask,
469 			DATASET *dset)
470 {
471     gretl_matrix *E, *S;
472     int T = pmod->nobs;
473     int k = 1 + endolist[0];
474     int err = 0;
475 
476     pmod->lnL = NADBL;
477 
478     E = gretl_matrix_alloc(T, k);
479     S = gretl_matrix_alloc(k, k);
480 
481     if (E == NULL || S == NULL) {
482 	err = E_ALLOC;
483     } else {
484 	err = fill_E_matrix(E, pmod, endolist, reglist, instlist,
485 			    missmask, dset);
486     }
487 
488     if (!err) {
489 	err = gretl_matrix_multiply_mod(E, GRETL_MOD_TRANSPOSE,
490 					E, GRETL_MOD_NONE,
491 					S, GRETL_MOD_NONE);
492     }
493 
494     if (!err) {
495 	double ldet = gretl_matrix_log_determinant(S, &err);
496 
497 	if (err) {
498 	    pmod->lnL = NADBL;
499 	} else {
500 	    /* Davidson and MacKinnon, ETM, p. 538, taking
501 	       kappa = 1 for the just-identified case
502 	    */
503 	    pmod->lnL = -(T / 2.0) * (LN_2_PI + ldet);
504 	}
505     }
506 
507     mle_criteria(pmod, 0);
508 
509     gretl_matrix_free(E);
510     gretl_matrix_free(S);
511 
512     return err;
513 }
514 
515 /* perform the Sargan overidentification test for an IV model */
516 
517 static int
ivreg_sargan_test(MODEL * pmod,int Orank,int * instlist,DATASET * dset)518 ivreg_sargan_test (MODEL *pmod, int Orank, int *instlist,
519 		   DATASET *dset)
520 {
521     int t1 = pmod->t1;
522     int t2 = pmod->t2;
523     int ninst = instlist[0];
524     int i, t, err = 0;
525     MODEL smod;
526     int *OT_list = NULL;
527     int nv = dset->v;
528 
529     if (Orank == 0) {
530 	return 0;
531     }
532 
533     if (pmod->list[0] == 2 && pmod->list[2] == 0) {
534 	/* degenerate model with const only */
535 	return 0;
536     }
537 
538     err = dataset_add_series(dset, 1);
539     if (err) {
540 	return err;
541     }
542 
543     /* add the @pmod residual to the dataset */
544     for (t=t1; t<=t2; t++) {
545 	dset->Z[nv][t] = pmod->uhat[t];
546     }
547 
548     OT_list = gretl_list_new(ninst + 1);
549     if (OT_list == NULL) {
550 	dataset_drop_last_variables(dset, 1);
551 	return E_ALLOC;
552     }
553 
554     OT_list[1] = nv;
555     for (i=2; i<=OT_list[0]; i++) {
556 	OT_list[i] = instlist[i-1];
557     }
558 
559 #if TDEBUG
560     fprintf(stderr, "running regression for Sargan test\n");
561 #endif
562 
563     /* OLS: the dependent variable is the residual from @pmod;
564        the regressors are the (full set of) instruments
565     */
566     smod = lsq(OT_list, dset, OLS, OPT_A);
567 
568 #if TDEBUG
569     if (!smod.errcode) {
570 	PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, NULL);
571 
572 	strcpy(dset->varname[nv], "IV uhat");
573 	printmodel(&smod, dset, OPT_NONE, prn);
574 	gretl_print_destroy(prn);
575     }
576 #endif
577 
578     if (smod.errcode) {
579 	fprintf(stderr, "ivreg_sargan_test: smod.errcode = %d\n", smod.errcode);
580 	err = smod.errcode;
581     } else {
582 	ModelTest *test = model_test_new(GRETL_TEST_SARGAN);
583 	double OTest = smod.rsq * smod.nobs;
584 
585 #if TDEBUG
586 	fprintf(stderr, "Otest = %g * %d = %g\n", smod.rsq, smod.nobs,
587 		smod.rsq * smod.nobs);
588 #endif
589 
590 	if (test != NULL) {
591 	    model_test_set_teststat(test, GRETL_STAT_LM);
592 	    model_test_set_dfn(test, Orank);
593 	    model_test_set_value(test, OTest);
594 	    model_test_set_pvalue(test, chisq_cdf_comp(Orank, OTest));
595 	    maybe_add_test_to_model(pmod, test);
596 	}
597     }
598 
599     clear_model(&smod);
600     dataset_drop_last_variables(dset, 1);
601     free(OT_list);
602 
603     return err;
604 }
605 
606 #define HTDBG 0
607 
608 /* Hausman test via the regression method */
609 
610 static int
tsls_hausman_test(MODEL * tmod,int * reglist,int * hatlist,DATASET * dset)611 tsls_hausman_test (MODEL *tmod, int *reglist, int *hatlist,
612 		   DATASET *dset)
613 {
614     MODEL hmod;
615     double URSS;
616     int *HT_list = NULL;
617     int t, df;
618     int nv = dset->v;
619     int ku = 0;
620     int err = 0;
621 
622 #if HTDBG
623     int nobs1;
624     PRN *dbgprn = NULL;
625     dbgprn = gretl_print_new(GRETL_PRINT_STDOUT, NULL);
626 #endif
627 
628     /* create regression list for unrestricted model by appending
629        @hatlist to @reglist */
630     HT_list = gretl_list_add(reglist, hatlist, &err);
631     if (err) {
632 	if (err == E_NOADD) {
633 	    /* more of a no-op than an error */
634 	    err = 0;
635 	} else {
636 	    fprintf(stderr, "tsls_hausman_test: gretl_list_add: err = %d\n", err);
637 	}
638 	return err;
639     }
640 
641 #if TDEBUG
642     fprintf(stderr, "running regression for Hausman test, 1\n");
643 #endif
644 
645     /* estimate the unrestricted model */
646     hmod = lsq(HT_list, dset, OLS, OPT_A);
647     if (hmod.errcode) {
648 	fprintf(stderr, "tsls_hausman_test: hmod.errcode (U) = %d\n", hmod.errcode);
649 	err = hmod.errcode;
650 	goto bailout;
651     }
652 
653 #if HTDBG
654     pprintf(dbgprn, "Hausman: unrestricted model (df = %d)\n", hmod.dfd);
655     printmodel(&hmod, dset, OPT_NONE, dbgprn);
656     nobs1 = hmod.nobs;
657 #endif
658 
659     if (hmod.dfd == 0) {
660 	/* perfect fit, can't do test */
661 	goto bailout;
662     }
663 
664     /* record U-model info */
665     URSS = hmod.ess;
666     ku = hmod.ncoeff;
667 #if HTDBG
668     pprintf(dbgprn, "URSS = %g (ku = %d)\n", URSS, ku);
669 #endif
670 
671     /* add fitted values from unrestricted model to dataset */
672     err = dataset_add_series(dset, 1);
673     if (err) {
674 	goto bailout;
675     } else {
676 	for (t=hmod.t1; t<=hmod.t2; t++) {
677 	    dset->Z[nv][t] = hmod.yhat[t];
678 	}
679     }
680 
681     clear_model(&hmod);
682 
683     /* redefine HT_list: dep var is the fitted value from the
684        U-model, regressors are from @reglist */
685     free(HT_list);
686     HT_list = gretl_list_copy(reglist);
687     HT_list[1] = nv;
688 
689 #if TDEBUG
690     fprintf(stderr, "running regression for Hausman test, 2\n");
691 #endif
692 
693     /* regress the U fitted values on the regressors of the restricted
694        model (so the sum of squares equals RRSS-URSS)
695     */
696     hmod = lsq(HT_list, dset, OLS, OPT_A);
697 
698 #if HTDBG
699     strcpy(dset->varname[dset->v - 1], "Ufitvals");
700     pprintf(dbgprn, "Hausman: aux regression\n", hmod.dfd);
701     printmodel(&hmod, dset, OPT_NONE, dbgprn);
702     pprintf(dbgprn, "smpl1 = %d, smpl2 = %d\n", nobs1, hmod.nobs);
703     pprintf(dbgprn, "k1 = %d, k2 = %d\n", hmod.ncoeff, ku);
704 #endif
705 
706     if (hmod.errcode) {
707 	fprintf(stderr, "tsls_hausman_test: hmod.errcode (D) = %d\n", hmod.errcode);
708 	err = hmod.errcode;
709     } else if ((df = ku - hmod.ncoeff) > 0) {
710 	/* Degrees of freedom for the Hausman test need not be equal to the
711 	   number of added regressors, since some of them may not really be
712 	   endogenous.
713 	*/
714 	double DRSS = hmod.ess;
715 	double HTest = (DRSS/URSS) * hmod.nobs;
716 	ModelTest *test = model_test_new(GRETL_TEST_IV_HAUSMAN);
717 
718 #if HTDBG
719 	pprintf(dbgprn, "df = %d - %d = %d\n", ku, hmod.ncoeff, df);
720 	pprintf(dbgprn, "DRSS = (RRSS - URSS) = %g, URSS = %g\n", DRSS, URSS);
721 	pprintf(dbgprn, "Htest = %g [%.4f]\n", HTest, chisq_cdf_comp(df, HTest));
722 #endif
723 
724 	if (test != NULL) {
725 	    model_test_set_teststat(test, GRETL_STAT_WALD_CHISQ);
726 	    model_test_set_dfn(test, df);
727 	    model_test_set_value(test, HTest);
728 	    model_test_set_pvalue(test, chisq_cdf_comp(df, HTest));
729 	    maybe_add_test_to_model(tmod, test);
730 	}
731     }
732 
733  bailout:
734 
735 #if HTDBG
736     gretl_print_destroy(dbgprn);
737 #endif
738 
739     dataset_drop_last_variables(dset, dset->v - nv);
740     clear_model(&hmod);
741     free(HT_list);
742 
743     return err;
744 }
745 
746 /* form matrix of instruments and perform QR decomposition
747    of this matrix; return Q */
748 
tsls_Q(int * instlist,int ** pdlist,const DATASET * dset,char * mask,int * err)749 static gretl_matrix *tsls_Q (int *instlist, int **pdlist,
750 			     const DATASET *dset, char *mask,
751 			     int *err)
752 {
753     gretl_matrix *Q = NULL;
754     gretl_matrix *R = NULL;
755     int rank, ndrop = 0;
756     int *droplist = NULL;
757     double test;
758     int i, j, k;
759 
760     Q = gretl_matrix_data_subset_masked(instlist, dset,
761 					dset->t1, dset->t2,
762 					mask, err);
763     if (*err) {
764 	return NULL;
765     }
766 
767     k = gretl_matrix_cols(Q);
768 
769     if (k > Q->rows) {
770 	/* can't do QR decomp! */
771 	gretl_errmsg_sprintf(_("Number of instruments (%d) exceeds the "
772 			       "number of observations (%d)"),
773 			     k, Q->rows);
774 	*err = E_DF;
775 	goto bailout;
776     }
777 
778     R = gretl_matrix_alloc(k, k);
779     if (R == NULL) {
780 	*err = E_ALLOC;
781 	goto bailout;
782     }
783 
784     *err = gretl_matrix_QR_decomp(Q, R);
785     if (*err) {
786 	goto bailout;
787     }
788 
789     rank = gretl_check_QR_rank(R, err, NULL);
790 
791 #if TDEBUG
792     fprintf(stderr, "tsls_Q: k = Q->cols = %d, rank = %d\n", k, rank);
793     if (rank == 0) {
794 	gretl_matrix_print(R, "R");
795     }
796 #endif
797 
798     if (*err) {
799 	goto bailout;
800     } else if (rank < k) {
801 	fprintf(stderr, "tsls_Q: k = %d, rank = %d\n", k, rank);
802 	ndrop = k - rank;
803     }
804 
805     if (ndrop > 0) {
806 	droplist = gretl_list_new(ndrop);
807 	if (droplist != NULL) {
808 	    droplist[0] = 0;
809 	}
810 
811 	j = 1;
812 	for (i=0; i<k; i++) {
813 	    test = gretl_matrix_get(R, i, i);
814 	    if (fabs(test) < R_DIAG_MIN) {
815 		if (droplist != NULL) {
816 		    droplist[0] += 1;
817 		    droplist[droplist[0]] = instlist[j];
818 		}
819 		fprintf(stderr, "tsls_Q: dropping redundant instrument %d (%s)\n",
820 			instlist[j], dset->varname[instlist[j]]);
821 		gretl_list_delete_at_pos(instlist, j--);
822 	    }
823 	    j++;
824 	}
825 
826 	k = instlist[0];
827 	gretl_matrix_free(Q);
828 	Q = gretl_matrix_data_subset_masked(instlist, dset,
829 					    dset->t1, dset->t2,
830 					    mask, err);
831 	if (!*err) {
832 	    R = gretl_matrix_reuse(R, k, k);
833 	    *err = gretl_matrix_QR_decomp(Q, R);
834 	}
835     }
836 
837  bailout:
838 
839     gretl_matrix_free(R);
840 
841     if (*err) {
842 	free(droplist);
843 	gretl_matrix_free(Q);
844 	Q = NULL;
845     } else {
846 	*pdlist = droplist;
847     }
848 
849     return Q;
850 }
851 
tsls_form_xhat(gretl_matrix * Q,gretl_matrix * g,DATASET * dset,int v0,int v1,const char * mask)852 static int tsls_form_xhat (gretl_matrix *Q, gretl_matrix *g,
853 			   DATASET *dset, int v0, int v1,
854 			   const char *mask)
855 {
856     const double *x = dset->Z[v0];
857     double *xhat = dset->Z[v1];
858     int k = gretl_matrix_cols(Q);
859     int allzero = 1;
860     int i, t, s = 0;
861     int err = 0;
862 
863 #if TDEBUG > 1
864     fprintf(stderr, "tsls_form_xhat: v0=%d, v1=%d, t1=%d, t2=%d, mask=%p\n",
865 	    v0, v1, dset->t1, dset->t2, (void *) mask);
866 #endif
867 
868     /* form g = Q'x */
869     for (i=0; i<k; i++) {
870 	g->val[i] = 0.0;
871 	s = 0;
872 	for (t=dset->t1; t<=dset->t2; t++) {
873 	    if (mask != NULL && mask[t - dset->t1]) {
874 		continue;
875 	    }
876 	    g->val[i] += gretl_matrix_get(Q, s++, i) * x[t];
877 	}
878     }
879 
880     /* form \hat{x} = Qg = QQ'x */
881     s = 0;
882     for (t=dset->t1; t<=dset->t2; t++) {
883 	if (mask != NULL && mask[t - dset->t1]) {
884 	    xhat[t] = NADBL;
885 	    continue;
886 	}
887 	xhat[t] = 0.0;
888 	for (i=0; i<k; i++) {
889 	    xhat[t] += gretl_matrix_get(Q, s, i) * g->val[i];
890 	}
891 	if (xhat[t] != 0) {
892 	    allzero = 0;
893 	}
894 	s++;
895     }
896 
897     if (allzero) {
898 	gretl_errmsg_sprintf(_("The first-stage fitted values for %s are all zero"),
899 			     dset->varname[v0]);
900 	err = E_DATA;
901     } else {
902 	/* name the fitted series according to the original */
903 	strcpy(dset->varname[v1], "h_");
904 	strncat(dset->varname[v1], dset->varname[v0], VNAMELEN - 3);
905     }
906 
907     return err;
908 }
909 
tsls_residuals(MODEL * pmod,const int * reglist,const DATASET * dset,gretlopt opt)910 static void tsls_residuals (MODEL *pmod, const int *reglist,
911 			    const DATASET *dset,
912 			    gretlopt opt)
913 {
914     int yno = reglist[1];
915     double yh, sigma0 = pmod->sigma;
916     int i, t;
917 
918     pmod->ess = 0.0;
919 
920     for (t=pmod->t1; t<=pmod->t2; t++) {
921 	if (model_missing(pmod, t)) {
922 	    continue;
923 	}
924 	yh = 0.0;
925 	for (i=0; i<pmod->ncoeff; i++) {
926 	    yh += pmod->coeff[i] * dset->Z[reglist[i+2]][t];
927 	}
928 	pmod->yhat[t] = yh;
929 	pmod->uhat[t] = dset->Z[yno][t] - yh;
930 	pmod->ess += pmod->uhat[t] * pmod->uhat[t];
931     }
932 
933     if (pmod->ess <= 0.0) {
934 	pmod->sigma = 0.0;
935     } else {
936 	int den = (opt & OPT_N)? pmod->nobs : pmod->dfd;
937 
938 	pmod->sigma = sqrt(pmod->ess / den);
939     }
940 
941     if (sigma0 > 0.0) {
942 	double corr = pmod->sigma / sigma0;
943 
944 	for (i=0; i<pmod->ncoeff; i++) {
945 	    pmod->sderr[i] *= corr;
946 	}
947     }
948 }
949 
tsls_extra_stats(MODEL * pmod,int yno,int overid,gretlopt opt,const DATASET * dset)950 static void tsls_extra_stats (MODEL *pmod, int yno, int overid,
951 			      gretlopt opt, const DATASET *dset)
952 {
953     double r;
954 
955     /* squared correlation between y and yhat */
956     pmod->rsq = gretl_corr_rsq(pmod->t1, pmod->t2, dset->Z[yno], pmod->yhat);
957 
958     r = 1 - pmod->rsq;
959     pmod->adjrsq = 1.0 - (r * (pmod->nobs - 1) / pmod->dfd);
960 
961     pmod->fstt = pmod->chisq = NADBL;
962 
963     if (pmod->ncoeff > 1) {
964 	if ((opt & OPT_N) || (overid && pmod->ncoeff == 2)) {
965 	    pmod->chisq = wald_omit_chisq(NULL, pmod);
966 	} else {
967 	    pmod->fstt = wald_omit_F(NULL, pmod);
968 	}
969     }
970 
971     /* tsls is not an ML estimator unless it's exactly identified,
972        and in that case we require a special calculation */
973     pmod->lnL = NADBL;
974     pmod->criterion[C_AIC] = NADBL;
975     pmod->criterion[C_BIC] = NADBL;
976     pmod->criterion[C_HQC] = NADBL;
977 
978     if (dataset_is_time_series(dset) && !model_has_missing_obs(pmod)) {
979 	/* time series, no missing obs within sample range */
980 	pmod->rho = rhohat(1, pmod->t1, pmod->t2, pmod->uhat);
981 	pmod->dw = dwstat(1, pmod, dset);
982     } else {
983 	pmod->rho = pmod->dw = NADBL;
984     }
985 }
986 
tsls_recreate_full_list(MODEL * pmod,const int * reglist,const int * instlist)987 static void tsls_recreate_full_list (MODEL *pmod, const int *reglist,
988 				     const int *instlist)
989 {
990     int *full_list;
991 
992     if (instlist != NULL && instlist[0] > 0) {
993 	full_list = gretl_lists_join_with_separator(reglist, instlist);
994     } else {
995 	full_list = gretl_list_copy(reglist);
996     }
997 
998     if (full_list == NULL) {
999 	pmod->errcode = E_ALLOC;
1000     } else {
1001 	free(pmod->list);
1002 	pmod->list = full_list;
1003     }
1004 }
1005 
replace_list_element(int * list,int targ,int repl)1006 static void replace_list_element (int *list, int targ, int repl)
1007 {
1008     int i;
1009 
1010     for (i=1; i<=list[0]; i++) {
1011 	if (list[i] == targ) {
1012 	    list[i] = repl;
1013 	    break;
1014 	}
1015     }
1016 }
1017 
reglist_remove_redundant_vars(const MODEL * tmod,int * s2list,int * reglist,int * endolist,int * hatlist)1018 static int reglist_remove_redundant_vars (const MODEL *tmod,
1019 					  int *s2list,
1020 					  int *reglist,
1021 					  int *endolist,
1022 					  int *hatlist)
1023 {
1024     int *dlist = gretl_model_get_list(tmod, "droplist");
1025     int i, pos;
1026 
1027     if (dlist == NULL) {
1028 	return 1;
1029     }
1030 
1031     for (i=1; i<=dlist[0]; i++) {
1032 	pos = in_gretl_list(s2list, dlist[i]);
1033 	if (pos > 1) {
1034 	    gretl_list_delete_at_pos(reglist, pos);
1035 	}
1036 	if (endolist != NULL) {
1037 	    /* note: if endolist is NULL, so is hatlist */
1038 	    pos = in_gretl_list(hatlist, dlist[i]);
1039 	    if (pos > 1) {
1040 		/* First replace dlist[i] with the ID of the original
1041 		   regressor from @endolist, in place of the "hatlist"
1042 		   variable (which will be deleted when the model is
1043 		   returned), so that the printout of regressors that are
1044 		   dropped due to exact collinearity will show the
1045 		   appropriate names.
1046 
1047 		   Second, delete the redundant term from both endolist
1048 		   and hatlist, so that subsequent calculations will not
1049 		   get messed up.
1050 		*/
1051 		dlist[i] = endolist[pos];
1052 		gretl_list_delete_at_pos(endolist, pos);
1053 		gretl_list_delete_at_pos(hatlist, pos);
1054 	    }
1055 	}
1056     }
1057 
1058     return 0;
1059 }
1060 
1061 /* Stock-Yogo: compute test statistic for weakness of instruments.
1062    This is the Cragg-Donald statistic: the minimum eigenvalue of the
1063    matrix defined below (which reduces to the first-stage F-statistic
1064    when there is just one endogenous regressor).  See Stock and Yogo,
1065    "Testing for Weak Instruments in Linear IV Regressions" (originally
1066    NBER Technical Working Paper 284; revised February 2003), at
1067    http://ksghome.harvard.edu/~JStock/pdf/rfa_6.pdf
1068 */
1069 
1070 static int
compute_stock_yogo(MODEL * pmod,const int * endolist,const int * instlist,const int * hatlist,const DATASET * dset)1071 compute_stock_yogo (MODEL *pmod, const int *endolist,
1072 		    const int *instlist, const int *hatlist,
1073 		    const DATASET *dset)
1074 {
1075     const double **dZ = (const double **) dset->Z;
1076     gretl_matrix_block *B = NULL;
1077     gretl_matrix_block *B2 = NULL;
1078     gretl_matrix *G, *S, *Y;
1079     gretl_matrix *Ya, *YPY;
1080     gretl_matrix *X, *Z, *E = NULL;
1081     int T = pmod->nobs;
1082     int n = endolist[0];
1083     int K1 = pmod->ncoeff - n;
1084     int K2 = instlist[0] - K1;
1085     int i, s, vi, vj, t;
1086     int err = 0;
1087 
1088     B = gretl_matrix_block_new(&G, n, n,
1089 			       &S, n, n,
1090 			       &Y, T, n,
1091 			       &Ya, T, n,
1092 			       &YPY, n, n,
1093 			       NULL);
1094     if (B == NULL) {
1095 	return E_ALLOC;
1096     }
1097 
1098 #if TDEBUG
1099     fprintf(stderr, "stock_yogo: pmod->ncoeff=%d, n=%d, K1=%d, K2=%d\n",
1100 	    pmod->ncoeff, n, K1, K2);
1101     printlist(endolist, "endolist, in stock-yogo");
1102 #endif
1103 
1104     if (K1 > 0) {
1105 	/* we have some exogenous regressors */
1106 	gretl_matrix *M;
1107 	int j, ix = 0, iz = 0;
1108 
1109 	B2 = gretl_matrix_block_new(&X, T, K1,
1110 				    &Z, T, K2,
1111 				    &E, T, K2,
1112 				    NULL);
1113 	if (B2 == NULL) {
1114 	    gretl_matrix_block_destroy(B);
1115 	    return E_ALLOC;
1116 	}
1117 
1118 	/* form the X and Z matrices */
1119 	for (i=1; i<=instlist[0]; i++) {
1120 	    vi = instlist[i];
1121 	    if (in_gretl_list(pmod->list, vi)) {
1122 		/* included exogenous var -> X */
1123 		M = X;
1124 		j = ix++;
1125 	    } else {
1126 		/* excluded exogenous var -> Z */
1127 		M = Z;
1128 		j = iz++;
1129 	    }
1130 	    s = 0;
1131 	    for (t=pmod->t1; t<=pmod->t2; t++) {
1132 		if (!na(pmod->uhat[t])) {
1133 		    gretl_matrix_set(M, s++, j, dZ[vi][t]);
1134 		}
1135 	    }
1136 	}
1137     }
1138 
1139     /* form Y matrix */
1140     for (i=0; i<n; i++) {
1141 	vi = endolist[i+1];
1142 	s = 0;
1143 	for (t=pmod->t1; t<=pmod->t2; t++) {
1144 	    if (!na(pmod->uhat[t])) {
1145 		gretl_matrix_set(Y, s++, i, dZ[vi][t]);
1146 	    }
1147 	}
1148     }
1149 
1150     if (K1 == 0) {
1151 	/* form Y-hat matrix in Ya */
1152 	for (i=0; i<n; i++) {
1153 	    vi = hatlist[i+1];
1154 	    s = 0;
1155 	    for (t=pmod->t1; t<=pmod->t2; t++) {
1156 		if (!na(pmod->uhat[t])) {
1157 		    gretl_matrix_set(Ya, s++, i, dZ[vi][t]);
1158 		}
1159 	    }
1160 	}
1161     } else {
1162 	int bdim = (K1 > K2)? K1 : K2;
1163 	gretl_matrix *b;
1164 
1165 	/* the leading dimension of b is the number of regressors */
1166 	b = gretl_matrix_alloc(bdim, K2);
1167 	if (b == NULL) {
1168 	    err = E_ALLOC;
1169 	}
1170 
1171 	if (!err) {
1172 	    /* partial X out of Y: Y <- M_x Y */
1173 	    gretl_matrix_reuse(b, K1, n);
1174 	    gretl_matrix_reuse(E, -1, n);
1175 	    err = gretl_matrix_multi_ols(Y, X, b, E, NULL);
1176 	    gretl_matrix_copy_values(Y, E);
1177 	}
1178 
1179 	if (!err) {
1180 	    /* partial X out of Z: Z <- M_x Z */
1181 	    gretl_matrix_reuse(b, K1, K2);
1182 	    gretl_matrix_reuse(E, -1, K2);
1183 	    err = gretl_matrix_multi_ols(Z, X, b, E, NULL);
1184 	    gretl_matrix_copy_values(Z, E);
1185 	}
1186 
1187 	if (!err) {
1188 	    /* form projection P_z Y, in Ya */
1189 	    gretl_matrix_reuse(b, K2, n);
1190 	    gretl_matrix_reuse(E, -1, n);
1191 	    err = gretl_matrix_multi_ols(Y, Z, b, E, NULL);
1192 	    gretl_matrix_copy_values(Ya, Y);
1193 	    gretl_matrix_subtract_from(Ya, E);
1194 	}
1195 
1196 	gretl_matrix_free(b);
1197     }
1198 
1199     if (!err) {
1200 	/* form Y' P_z Y */
1201 	err = gretl_matrix_multiply_mod(Y, GRETL_MOD_TRANSPOSE,
1202 					Ya, GRETL_MOD_NONE,
1203 					YPY, GRETL_MOD_NONE);
1204     }
1205 
1206     /* now write first-stage residuals into Ya */
1207     for (i=0; i<n && !err; i++) {
1208 	vi = endolist[i+1];
1209 	vj = hatlist[i+1];
1210 	s = 0;
1211 	for (t=pmod->t1; t<=pmod->t2; t++) {
1212 	    if (!na(pmod->uhat[t])) {
1213 		gretl_matrix_set(Ya, s++, i, dZ[vi][t] - dZ[vj][t]);
1214 	    }
1215 	}
1216     }
1217 
1218     if (!err) {
1219 	/* form S = \hat{\Sigma}_{vv} */
1220 	err = gretl_matrix_multiply_mod(Y, GRETL_MOD_TRANSPOSE,
1221 					Ya, GRETL_MOD_NONE,
1222 					S, GRETL_MOD_NONE);
1223 	if (!err) {
1224 	    gretl_matrix_xtr_symmetric(S);
1225 	    gretl_matrix_divide_by_scalar(S, T - K1 - K2);
1226 	    for (i=0; i<S->rows; i++) {
1227 		if (gretl_matrix_get(S, i, i) <= 0) {
1228 		    err = E_SINGULAR;
1229 		    break;
1230 		}
1231 	    }
1232 	}
1233     }
1234 
1235     if (!err) {
1236 	double rc = gretl_symmetric_matrix_rcond(S, &err);
1237 
1238 	if (!err && (na(rc) || rc < 1.0e-7)) { /* note: was 1.0e-6 */
1239 #if 0
1240 	    fprintf(stderr, "Stock-Yogo: rcond(S) = %g\n", rc);
1241 #endif
1242 	    err = E_SINGULAR;
1243 	}
1244     }
1245 
1246     if (!err) {
1247 	/* S^{-1/2}: invert S and Cholesky-decompose */
1248 	err = gretl_invert_symmetric_matrix(S);
1249 	if (!err) {
1250 	    err = gretl_matrix_cholesky_decomp(S);
1251 	}
1252     }
1253 
1254     if (!err) {
1255 	/* finally, form big sandwich */
1256 	err = gretl_matrix_qform(S, GRETL_MOD_TRANSPOSE,
1257 				 YPY, G, GRETL_MOD_NONE);
1258 	if (!err) {
1259 	    gretl_matrix_divide_by_scalar(G, K2);
1260 	}
1261     }
1262 
1263     if (!err) {
1264 	double gmin = gretl_symm_matrix_lambda_min(G, &err);
1265 
1266 	if (!err) {
1267 	    gretl_model_set_double(pmod, "gmin", gmin);
1268 	    gretl_model_set_int(pmod, "n", n);
1269 	    gretl_model_set_int(pmod, "K2", K2);
1270 	}
1271     }
1272 
1273     gretl_matrix_block_destroy(B);
1274     gretl_matrix_block_destroy(B2);
1275 
1276 #if 0
1277     if (err) {
1278 	fprintf(stderr, "compute_stock_yogo: err = %d\n", err);
1279     }
1280 #endif
1281 
1282     return err;
1283 }
1284 
1285 /* Calculate first-stage F-test as diagnostic for weak instruments.
1286    This variant can only handle one endogenous regressor, but it
1287    supports robust estimation if the model in question uses a robust
1288    covariance estimator.
1289 */
1290 
compute_first_stage_F(MODEL * pmod,const int * endolist,const int * reglist,const int * instlist,DATASET * dset,gretlopt opt)1291 static int compute_first_stage_F (MODEL *pmod,
1292 				  const int *endolist,
1293 				  const int *reglist,
1294 				  const int *instlist,
1295 				  DATASET *dset,
1296 				  gretlopt opt)
1297 {
1298     MODEL mod1;
1299     int *list1 = NULL;
1300     int *flist = NULL;
1301     double F;
1302     int i, ev, vi;
1303     int err = 0;
1304 
1305     gretl_model_init(&mod1, dset);
1306     ev = endolist[1];
1307 
1308     /* The regression list, list1, has endogenous regressor ev as the
1309        dependent variable and includes all instruments; the list for
1310        omission, flist, includes all the excluded instruments.
1311     */
1312 
1313     list1 = gretl_list_append_term(&list1, ev);
1314     if (list1 == NULL) {
1315 	return E_ALLOC;
1316     }
1317 
1318     for (i=1; i<=instlist[0]; i++) {
1319 	vi = instlist[i];
1320 	list1 = gretl_list_append_term(&list1, vi);
1321 	if (list1 == NULL) {
1322 	    err = E_ALLOC;
1323 	    break;
1324 	}
1325 	if (!in_gretl_list(reglist, vi)) {
1326 	    flist = gretl_list_append_term(&flist, vi);
1327 	    if (flist == NULL) {
1328 		err = E_ALLOC;
1329 		break;
1330 	    }
1331 	}
1332     }
1333 
1334     if (!err) {
1335 	gretlopt myopt = (opt & OPT_R)? OPT_R : OPT_NONE;
1336 
1337 	mod1 = lsq(list1, dset, OLS, myopt | OPT_A);
1338 	err = mod1.errcode;
1339 	if (err) {
1340 	    fprintf(stderr, "compute_first_stage F: lsq failed\n");
1341 	}
1342     }
1343 
1344     if (!err) {
1345 	F = wald_omit_F(flist, &mod1);
1346 	if (na(F)) {
1347 	    err = 1;
1348 	}
1349     }
1350 
1351     if (!err) {
1352 	gretl_model_set_double(pmod, "stage1-F", F);
1353 	gretl_model_set_int(pmod, "stage1-dfn", flist[0]);
1354 	gretl_model_set_int(pmod, "stage1-dfd", mod1.dfd);
1355 	if (!(opt & OPT_R)) {
1356 	    /* flag lookup of Stock-Yogo critical values */
1357 	    gretl_model_set_double(pmod, "gmin", F);
1358 	}
1359     }
1360 
1361     clear_model(&mod1);
1362     free(list1);
1363     free(flist);
1364 
1365     return err;
1366 }
1367 
tsls_adjust_sample(const int * list,DATASET * dset,char ** pmask)1368 static int tsls_adjust_sample (const int *list, DATASET *dset,
1369 			       char **pmask)
1370 {
1371     int i, t, t1min = dset->t1, t2max = dset->t2;
1372     char *mask = NULL;
1373     int T, vi, missobs;
1374     int err = 0;
1375 
1376     /* advance start of sample range to skip missing obs? */
1377     for (t=t1min; t<t2max; t++) {
1378 	missobs = 0;
1379 	for (i=1; i<=list[0]; i++) {
1380 	    vi = list[i];
1381 	    if (vi == 0 || vi == LISTSEP) {
1382 		continue;
1383 	    }
1384 	    if (na(dset->Z[vi][t])) {
1385 		missobs = 1;
1386 		break;
1387 	    }
1388 	}
1389 	if (missobs) {
1390 	    t1min++;
1391 	} else {
1392 	    break;
1393 	}
1394     }
1395 
1396     /* retard end of sample range to skip missing obs? */
1397     for (t=t2max; t>t1min; t--) {
1398 	missobs = 0;
1399 	for (i=1; i<=list[0]; i++) {
1400 	    vi = list[i];
1401 	    if (vi == 0 || vi == LISTSEP) {
1402 		continue;
1403 	    }
1404 	    if (na(dset->Z[vi][t])) {
1405 		missobs = 1;
1406 		break;
1407 	    }
1408 	}
1409 	if (missobs) {
1410 	    t2max--;
1411 	} else {
1412 	    break;
1413 	}
1414     }
1415 
1416     /* count missing values in mid-range of data */
1417     missobs = 0;
1418     for (t=t1min; t<=t2max; t++) {
1419 	for (i=1; i<=list[0]; i++) {
1420 	    vi = list[i];
1421 	    if (vi == 0 || vi == LISTSEP) {
1422 		continue;
1423 	    }
1424 	    if (na(dset->Z[vi][t])) {
1425 		missobs++;
1426 		break;
1427 	    }
1428 	}
1429     }
1430 
1431     T = t2max - t1min + 1;
1432 
1433     if (missobs == T) {
1434 	err = E_MISSDATA;
1435     } else if (missobs > 0) {
1436 	mask = calloc(T, 1); /* all NUL bytes */
1437 	if (mask == NULL) {
1438 	    err = E_ALLOC;
1439 	} else {
1440 	    for (t=t1min; t<=t2max; t++) {
1441 		for (i=1; i<=list[0]; i++) {
1442 		    vi = list[i];
1443 		    if (vi == 0 || vi == LISTSEP) {
1444 			continue;
1445 		    }
1446 		    if (na(dset->Z[vi][t])) {
1447 			mask[t - t1min] = 1;
1448 			break;
1449 		    }
1450 		}
1451 	    }
1452 	}
1453     }
1454 
1455 #if TDEBUG
1456     fprintf(stderr, "tsls_adjust_sample: t1=%d, t2=%d, missobs=%d, ok obs=%d\n",
1457 	    t1min, t2max, missobs, t2max - t1min + 1 - missobs);
1458 #endif
1459 
1460     if (!err) {
1461 	dset->t1 = t1min;
1462 	dset->t2 = t2max;
1463     }
1464 
1465     *pmask = mask;
1466 
1467     return err;
1468 }
1469 
1470 /**
1471  * ivreg_process_lists:
1472  * @list: original specification list.
1473  * @reglist: location to receive regression list.
1474  * @instlist: location to receive list of instruments.
1475  *
1476  * Split the incoming list into its two components and perform
1477  * some basic checks; if the checks fail the two created
1478  * lists are destroyed.
1479  *
1480  * Returns: 0, on success, non-zero error code on failure.
1481  */
1482 
ivreg_process_lists(const int * list,int ** reglist,int ** instlist)1483 int ivreg_process_lists (const int *list, int **reglist, int **instlist)
1484 {
1485     int *rlist, *zlist;
1486     int i, err;
1487 
1488     err = gretl_list_split_on_separator(list, reglist, instlist);
1489     if (err) {
1490 	fprintf(stderr, "gretl_list_split_on_separator: failed\n");
1491 	return err;
1492     }
1493 
1494     rlist = *reglist;
1495     zlist = *instlist;
1496 
1497     if (rlist[0] < 2 || zlist == NULL || zlist[0] < 1) {
1498 	err = E_ARGS;
1499     } else {
1500 	for (i=1; i<=zlist[0]; i++) {
1501 	    if (zlist[i] == list[1]) {
1502 		gretl_errmsg_set(_("You can't use the dependent variable as an instrument"));
1503 		err = E_DATA;
1504 		break;
1505 	    }
1506 	}
1507     }
1508 
1509     if (!err) {
1510 	int oid = zlist[0] - rlist[0] + 1;
1511 
1512 	if (oid < 0 && (in_gretl_list(rlist, 0) > 1) &&
1513 	    !in_gretl_list(zlist, 0)) {
1514 	    /* do not treat the constant as if it were endogenous */
1515 	    err = zlist_prepend_const(instlist);
1516 	    if (!err) {
1517 		zlist = *instlist;
1518 		oid++;
1519 	    }
1520 	}
1521 	if (!err && oid < 0) {
1522 	    gretl_errmsg_sprintf(_("The order condition for identification is not satisfied.\n"
1523 				   "At least %d more instruments are needed."), -oid);
1524 	    fprintf(stderr, "zlist[0] = %d, rlist[0] = %d\n", zlist[0], rlist[0]);
1525 	    err = E_DATA;
1526 	}
1527     }
1528 
1529     if (err) {
1530 	free(*reglist);
1531 	free(*instlist);
1532 	*reglist = NULL;
1533 	*instlist = NULL;
1534     }
1535 
1536     return err;
1537 }
1538 
1539 /**
1540  * tsls:
1541  * @list: dependent variable plus list of regressors.
1542  * @dset: dataset struct.
1543  * @opt: may contain %OPT_R for robust VCV, %OPT_N for no df correction,
1544  * %OPT_A if this is an auxiliary reression, %OPT_E if we're
1545  * estimating one equation within a system, %OPT_H to
1546  * add "hatlist" to model even under %OPT_E, %OPT_X to skip
1547  * the set of tests that are usually calculated. Also %OPT_C
1548  * for clustered standard errors.
1549  *
1550  * Estimate the model given in @list by means of Two-Stage Least
1551  * Squares.  If %OPT_E is given, fitted values from the first-stage
1552  * regressions are saved with the model.
1553  *
1554  * Returns: a #MODEL struct, containing the estimates.
1555  */
1556 
tsls(const int * list,DATASET * dset,gretlopt opt)1557 MODEL tsls (const int *list, DATASET *dset, gretlopt opt)
1558 {
1559     MODEL tsls;
1560     gretl_matrix *Q = NULL, *g = NULL;
1561     char *missmask = NULL;
1562     int orig_t1 = dset->t1, orig_t2 = dset->t2;
1563     int *reglist = NULL, *instlist = NULL;
1564     int *hatlist = NULL, *s2list = NULL;
1565     int *exolist = NULL, *endolist = NULL;
1566     int *idroplist = NULL;
1567     int addconst = 0;
1568     int nendo = 0, nreg = 0;
1569     int orig_nvar = dset->v;
1570     int sysest = (opt & OPT_E);
1571     int no_tests = (opt & OPT_X);
1572     int OverIdRank = 0;
1573     int i, err = 0;
1574 
1575 #if TDEBUG
1576     printlist(list, "tsls: incoming list");
1577 #endif
1578 
1579     /* initialize model in case we bail out early on error */
1580     gretl_model_init(&tsls, dset);
1581 
1582     gretl_error_clear();
1583 
1584     /* reglist: dependent var plus list of regressors
1585        instlist: list of instruments
1586     */
1587     tsls.errcode = ivreg_process_lists(list, &reglist, &instlist);
1588     if (tsls.errcode) {
1589 	return tsls;
1590     }
1591 
1592     /* adjust sample range for missing observations */
1593     err = tsls_adjust_sample(list, dset, &missmask);
1594 
1595     if (!err) {
1596 	/* allocate second stage regression list */
1597 	s2list = gretl_list_copy(reglist);
1598 	if (s2list == NULL) {
1599 	    err = E_ALLOC;
1600 	}
1601     }
1602 
1603     if (err) {
1604 	goto bailout;
1605     }
1606 
1607     /* in case we drop any instruments as redundant */
1608     if (sysest) {
1609 	exolist = gretl_list_copy(instlist);
1610     }
1611 
1612     /* drop any vars that are all zero, and reshuffle the constant
1613        into first position among the independent vars
1614     */
1615     tsls_omitzero(reglist, dset, missmask);
1616     reglist_check_for_const(reglist, dset);
1617     tsls_omitzero(instlist, dset, missmask);
1618 
1619 #if TDEBUG
1620     printlist(reglist, "reglist");
1621     printlist(instlist, "instlist");
1622 #endif
1623 
1624     /* Determine the list of variables (endolist) for which we need to
1625        obtain fitted values in the first stage.  Note that we accept
1626        the condition where there are no such variables (and hence
1627        TSLS is in fact redundant); this may sometimes be required
1628        when tsls is called in the context of system estimation.
1629     */
1630     endolist = tsls_make_endolist(reglist, &instlist, &addconst, &err);
1631     if (err) {
1632 	goto bailout;
1633     }
1634 
1635 #if TDEBUG
1636     printlist(endolist, "endolist, in tsls");
1637 #endif
1638 
1639     if (endolist != NULL) {
1640 	nendo = endolist[0];
1641 	hatlist = gretl_list_copy(endolist);
1642 	if (hatlist == NULL) {
1643 	    err = E_ALLOC;
1644 	}
1645     }
1646 
1647     if (!err) {
1648 	Q = tsls_Q(instlist, &idroplist, dset, missmask, &err);
1649     }
1650 
1651     if (err) {
1652 	goto bailout;
1653     }
1654 
1655     /* check (again) for order condition: we do this after tsls_Q in case
1656        any instruments get dropped at that stage
1657     */
1658     nreg = reglist[0] - 1;
1659     OverIdRank = instlist[0] - nreg;
1660     if (OverIdRank < 0) {
1661         gretl_errmsg_sprintf(_("The order condition for identification is not satisfied.\n"
1662 			       "At least %d more instruments are needed."), -OverIdRank);
1663 	err = E_DATA;
1664 	goto bailout;
1665     }
1666 
1667 #if TDEBUG
1668     fprintf(stderr, "nreg = %d, OverIdRank = %d\n", nreg, OverIdRank);
1669 #endif
1670 
1671     /* allocate storage for fitted vars (etc.), if needed */
1672     if (nendo > 0) {
1673 	err = dataset_add_series(dset, nendo);
1674 	if (!err) {
1675 	    g = gretl_vector_alloc(Q->cols);
1676 	    if (g == NULL) {
1677 		err = E_ALLOC;
1678 	    }
1679 	}
1680 	if (err) {
1681 	    goto bailout;
1682 	}
1683     }
1684 
1685     /*
1686        Deal with the variables for which instruments are needed: loop
1687        across the list of variables to be instrumented (endolist),
1688        form the fitted values as QQ'x_i, and add these fitted values
1689        into the data array, dset->Z.
1690     */
1691 
1692     for (i=0; i<nendo; i++) {
1693 	int v0 = endolist[i+1];
1694 	int v1 = orig_nvar + i;
1695 
1696 	/* form xhat_i = QQ'x_i */
1697 	err = tsls_form_xhat(Q, g, dset, v0, v1, missmask);
1698 	if (err) {
1699 	    goto bailout;
1700 	}
1701 
1702 	/* substitute v1 into the right place in the second-stage
1703 	   regression list */
1704 	replace_list_element(s2list, v0, v1);
1705 
1706 	/* and update hatlist */
1707 	hatlist[i+1] = v1;
1708     }
1709 
1710     /* second-stage regression */
1711     tsls = lsq(s2list, dset, OLS, (sysest)? OPT_Z : OPT_NONE);
1712     if (tsls.errcode) {
1713 	fprintf(stderr, "tsls, stage 2: tsls.errcode = %d\n", tsls.errcode);
1714 	goto bailout;
1715     }
1716 
1717 #if TDEBUG > 1
1718     PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, &err);
1719 
1720     printmodel(&tsls, dset, OPT_S, prn);
1721     gretl_print_destroy(prn);
1722 #endif
1723 
1724     if (tsls.list[0] < s2list[0]) {
1725 	/* Were collinear regressors dropped? If so, adjustments are needed */
1726 	OverIdRank += s2list[0] - tsls.list[0];
1727 	reglist_remove_redundant_vars(&tsls, s2list, reglist, endolist,
1728 				      hatlist);
1729 	if (endolist != NULL) {
1730 	    nendo = endolist[0];
1731 	}
1732 #if TDEBUG
1733 	fprintf(stderr, "tsls: dropped collinear vars\n");
1734 #endif
1735     }
1736 
1737     if (instlist != NULL && instlist[0] > 0) {
1738 	/* record the number of instruments used */
1739 	gretl_model_set_int(&tsls, "ninst", instlist[0]);
1740     }
1741 
1742     if (!no_tests) {
1743 	if (!sysest || (opt & OPT_H)) {
1744 	    if (nendo == 1) {
1745 		/* handles robust estimation, for single endogenous regressor */
1746 		compute_first_stage_F(&tsls, endolist, reglist, instlist,
1747 				      dset, opt);
1748 	    } else if (nendo > 0 && !(opt & OPT_R)) {
1749 		/* at present, only handles case of i.i.d. errors */
1750 		compute_stock_yogo(&tsls, endolist, instlist, hatlist, dset);
1751 	    }
1752 	}
1753     }
1754 
1755     if (nendo > 0) {
1756 	/* special: we need to use the original RHS vars to compute
1757 	   residuals and associated statistics */
1758 	tsls_residuals(&tsls, reglist, dset, opt);
1759     }
1760 
1761     if (opt & OPT_C) {
1762 	/* clustered implies robust */
1763 	opt |= OPT_R;
1764     }
1765 
1766     if (opt & OPT_R) {
1767 	/* robust standard errors called for */
1768 	qr_tsls_vcv(&tsls, dset, opt);
1769 	if (tsls.errcode) {
1770 	    fprintf(stderr, "qr_tsls_vcv: err = %d\n", tsls.errcode);
1771 	    goto bailout;
1772 	}
1773     }
1774 
1775     if (nendo > 0) {
1776 	/* compute additional statistics (R^2, F, etc.) */
1777 	tsls_extra_stats(&tsls, reglist[1], OverIdRank, opt, dset);
1778     }
1779 
1780     if (!sysest && !no_tests) {
1781 	if (nendo > 0 && hatlist != NULL) {
1782 	    tsls_hausman_test(&tsls, reglist, hatlist, dset);
1783 	}
1784 	if (OverIdRank > 0) {
1785 	    ivreg_sargan_test(&tsls, OverIdRank, instlist, dset);
1786 	} else if (nendo > 0) {
1787 	    tsls_loglik(&tsls, endolist, reglist, instlist, missmask, dset);
1788 	}
1789     }
1790 
1791     /* set command code on the model */
1792     tsls.ci = IVREG;
1793 
1794     /* write the full 2SLS list (dep. var. and regressors, followed by
1795        instruments, possibly purged of redundant elements) into the model
1796        for future reference
1797     */
1798     tsls_recreate_full_list(&tsls, reglist, instlist);
1799 
1800     if (idroplist != NULL) {
1801 	gretl_model_set_list_as_data(&tsls, "inst_droplist", idroplist);
1802     }
1803 
1804 #if 0
1805     if (addconst) {
1806 	gretl_model_set_int(&tsls, "addconst", addconst);
1807     }
1808 #endif
1809 
1810  bailout:
1811 
1812     gretl_matrix_free(Q);
1813     gretl_matrix_free(g);
1814     free(missmask);
1815 
1816     if (err && !tsls.errcode) {
1817 	tsls.errcode = err;
1818     }
1819 
1820     if (!tsls.errcode && nendo > 0) {
1821 	if (sysest) {
1822 	    /* save first-stage fitted values */
1823 	    tsls_save_data(&tsls, hatlist, exolist, dset);
1824 	}
1825 	if (!sysest || (opt & OPT_H)) {
1826 	    /* save list of endogenous regressors on model */
1827 	    gretl_model_set_list_as_data(&tsls, "endolist", endolist);
1828 	    endolist = NULL; /* model takes ownership */
1829 	    gretl_model_set_list_as_data(&tsls, "instlist", instlist);
1830 	    instlist = NULL; /* ditto */
1831 	}
1832     }
1833 
1834     /* delete any first-stage fitted values from dataset */
1835     dataset_drop_last_variables(dset, dset->v - orig_nvar);
1836 
1837     free(reglist);
1838     free(instlist);
1839     free(hatlist);
1840     free(exolist);
1841     free(endolist);
1842     free(s2list);
1843 
1844     if ((opt & OPT_A) || tsls.errcode) {
1845 	model_count_minus(&tsls); /* OK? */
1846     }
1847 
1848     if (!tsls.errcode && !(opt & OPT_N)) {
1849 	gretl_model_set_int(&tsls, "dfcorr", 1);
1850     }
1851 
1852     /* restore original sample range */
1853     dset->t1 = orig_t1;
1854     dset->t2 = orig_t2;
1855 
1856     return tsls;
1857 }
1858