1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #include "libgretl.h"
21 #include "matrix_extra.h"
22 #include "version.h"
23 
24 #define NAMEWIDTH 9
25 
26 static void BKW_print (gretl_matrix *B, int namelen, PRN *prn);
27 
28 /* run the vif regression for regressor k */
29 
get_vif(MODEL * mod,const int * xlist,int * vlist,int k,DATASET * dset,int * err)30 static double get_vif (MODEL *mod, const int *xlist,
31 		       int *vlist, int k,
32 		       DATASET *dset,
33 		       int *err)
34 {
35     double vk = NADBL;
36     int i, j;
37 
38     vlist[1] = xlist[k]; /* dep. var. is regressor k */
39     /* position 2 in vlist holds 0 = const */
40     j = 3;
41     for (i=1; i<=xlist[0]; i++) {
42 	if (i != k) {
43 	    vlist[j++] = xlist[i];
44 	}
45     }
46 
47     *mod = lsq(vlist, dset, OLS, OPT_A);
48     *err = mod->errcode;
49 
50     if (!*err && !na(mod->rsq) && mod->rsq != 1.0) {
51 	vk = 1.0 / (1.0 - mod->rsq);
52     }
53 
54     clear_model(mod);
55 
56     return vk;
57 }
58 
59 /* run regressions of each x_i on the other x_j's */
60 
model_vif_vector(MODEL * pmod,const int * xlist,DATASET * dset,int * err)61 static gretl_vector *model_vif_vector (MODEL *pmod, const int *xlist,
62 				       DATASET *dset, int *err)
63 {
64     MODEL tmpmod;
65     gretl_vector *vif = NULL;
66     int *vlist = NULL;
67     int nvif = xlist[0];
68     int save_t1 = dset->t1;
69     int save_t2 = dset->t2;
70     int i;
71 
72     vif = gretl_column_vector_alloc(nvif);
73     if (vif == NULL) {
74 	*err = E_ALLOC;
75 	return NULL;
76     }
77 
78     /* vlist is the list for the vif regressions:
79        allow space for the constant */
80     vlist = gretl_list_new(nvif + 1);
81     if (vlist == NULL) {
82 	*err = E_ALLOC;
83 	free(vif);
84 	return NULL;
85     }
86 
87     /* impose original model sample */
88     dset->t1 = pmod->t1;
89     dset->t2 = pmod->t2;
90 
91     for (i=1; i<=xlist[0] && !*err; i++) {
92 	vif->val[i-1] = get_vif(&tmpmod, xlist, vlist, i, dset, err);
93     }
94 
95     /* reinstate sample */
96     dset->t1 = save_t1;
97     dset->t2 = save_t2;
98 
99     free(vlist);
100 
101     if (*err) {
102 	gretl_matrix_free(vif);
103 	vif = NULL;
104     }
105 
106     return vif;
107 }
108 
bkw_add_colnames(gretl_matrix * BKW,const gretl_matrix * VCV,gretl_array * pnames)109 static int bkw_add_colnames (gretl_matrix *BKW,
110 			     const gretl_matrix *VCV,
111 			     gretl_array *pnames)
112 {
113     char **S = strings_array_new(BKW->cols);
114     const char **S0 = NULL;
115     int maxlen = 0;
116 
117     if (pnames == NULL) {
118 	S0 = gretl_matrix_get_colnames(VCV);
119     }
120 
121     if (S != NULL) {
122 	int i, len, k = BKW->cols - 2;
123 	const char *si;
124 	char tmp[16];
125 
126 	S[0] = gretl_strdup("lambda");
127 	S[1] = gretl_strdup("cond");
128 
129 	for (i=0; i<k; i++) {
130 	    if (pnames != NULL || S0 != NULL) {
131 		si = pnames != NULL ? gretl_array_get_data(pnames, i) : S0[i];
132 		if (strlen(si) > NAMEWIDTH) {
133 		    *tmp = '\0';
134 		    strncat(tmp, si, NAMEWIDTH - 1);
135 		    strcat(tmp, "~");
136 		    S[i+2] = gretl_strdup(tmp);
137 		} else if (pnames != NULL) {
138 		    S[i+2] = gretl_array_get_data(pnames, i);
139 		    gretl_array_set_data(pnames, i, NULL);
140 		} else {
141 		    S[i+2] = gretl_strdup(si);
142 		}
143 	    } else {
144 		sprintf(tmp, "x%d", i+1);
145 		S[i+2] = gretl_strdup(tmp);
146 	    }
147 	    len = strlen(S[i+2]);
148 	    if (len > maxlen) {
149 		maxlen = len;
150 	    }
151 	}
152 	gretl_matrix_set_colnames(BKW, S);
153     }
154 
155     return maxlen;
156 }
157 
158 /* note: we're assuming in bkw_matrix() that the array argument
159    @pnames is disposable: we pull out its entries and set them
160    to NULL, but it's still up to the caller to destroy the
161    array itself.
162 */
163 
bkw_matrix(const gretl_matrix * VCV,gretl_array * pnames,PRN * prn,int * err)164 gretl_matrix *bkw_matrix (const gretl_matrix *VCV,
165 			  gretl_array *pnames,
166 			  PRN *prn, int *err)
167 {
168     gretl_matrix *Vi = NULL;
169     gretl_matrix *S = NULL;
170     gretl_matrix *Q = NULL;
171     gretl_matrix *V = NULL;
172     gretl_matrix *lambda = NULL;
173     gretl_matrix *BKW = NULL;
174     double x, y;
175     int k = VCV->rows;
176     int namelen = 0;
177     int i, j;
178 
179     if (pnames != NULL) {
180 	if (gretl_array_get_length(pnames) != k) {
181 	    fprintf(stderr, "bkw_matrix: expected %d names but got %d\n",
182 		    k, gretl_array_get_length(pnames));
183 	    *err = E_INVARG;
184 	    return NULL;
185 	}
186     }
187 
188     /* copy the covariance matrix */
189     Vi = gretl_matrix_copy(VCV);
190     if (Vi == NULL) {
191 	*err = E_ALLOC;
192 	return NULL;
193     }
194 
195     /* and invert it */
196     *err = gretl_invert_symmetric_matrix(Vi);
197     if (*err) {
198 	goto bailout;
199     }
200 
201     /* allocate workspace */
202     S = gretl_identity_matrix_new(k);
203     Q = gretl_matrix_alloc(k, k);
204     BKW = gretl_matrix_alloc(k, k+2);
205 
206     if (S == NULL || Q == NULL || BKW == NULL) {
207 	*err = E_ALLOC;
208 	goto bailout;
209     }
210 
211     for (i=0; i<k; i++) {
212 	x = gretl_matrix_get(Vi, i, i);
213 	gretl_matrix_set(S, i, i, 1/sqrt(x));
214     }
215 
216     *err = gretl_matrix_qform(S, GRETL_MOD_TRANSPOSE,
217 			      Vi, Q, GRETL_MOD_NONE);
218 
219     if (!*err) {
220 	*err = gretl_matrix_SVD(Q, NULL, &lambda, &V, 0);
221     }
222 
223     if (*err) {
224 	goto bailout;
225     }
226 
227     /* S = (1/lambda) ** ones(k, 1) */
228     for (j=0; j<k; j++) {
229 	x = lambda->val[j];
230 	for (i=0; i<k; i++) {
231 	    gretl_matrix_set(S, i, j, 1/x);
232 	}
233     }
234 
235     for (i=0; i<k; i++) {
236 	for (j=0; j<k; j++) {
237 	    x = gretl_matrix_get(V, j, i);
238 	    y = gretl_matrix_get(S, i, j);
239 	    gretl_matrix_set(Q, i, j, x * x * y);
240 	}
241     }
242 
243     for (i=0; i<k; i++) {
244 	/* compute row sums */
245 	y = 0.0;
246 	for (j=0; j<k; j++) {
247 	    y += gretl_matrix_get(Q, i, j);
248 	}
249 	for (j=0; j<k; j++) {
250 	    x = gretl_matrix_get(Q, i, j);
251 	    gretl_matrix_set(V, j, i, x/y);
252 	}
253     }
254 
255     y = lambda->val[0];
256 
257     /* assemble the matrix to return */
258     for (i=0; i<k; i++) {
259 	x = lambda->val[i];
260 	gretl_matrix_set(BKW, i, 0, x);
261 	gretl_matrix_set(BKW, i, 1, sqrt(y / x));
262 	for (j=0; j<k; j++) {
263 	    x = gretl_matrix_get(V, i, j);
264 	    gretl_matrix_set(BKW, i, j+2, x);
265 	}
266     }
267 
268     namelen = bkw_add_colnames(BKW, VCV, pnames);
269 
270  bailout:
271 
272     gretl_matrix_free(Vi);
273     gretl_matrix_free(S);
274     gretl_matrix_free(Q);
275     gretl_matrix_free(V);
276     gretl_matrix_free(lambda);
277 
278     if (*err) {
279 	gretl_matrix_free(BKW);
280 	BKW = NULL;
281     } else if (prn != NULL) {
282 	BKW_print(BKW, namelen, prn);
283     }
284 
285     return BKW;
286 }
287 
do_proportion_sums(const gretl_matrix * B,const char ** bnames,const char * label,double cval,PRN * prn)288 static int do_proportion_sums (const gretl_matrix *B,
289 			       const char **bnames,
290 			       const char *label,
291 			       double cval, PRN *prn)
292 {
293     gretl_matrix *P;
294     char **cnames;
295     double x;
296     int np = B->cols - 2;
297     int len, namelen = 0;
298     int ngp5 = 0;
299     int i, j, k;
300 
301     cnames = strings_array_new(np);
302     if (cnames == NULL) {
303 	return E_ALLOC;
304     }
305 
306     P = gretl_zero_matrix_new(1, np);
307     if (P == NULL) {
308 	return E_ALLOC;
309     }
310 
311     for (i=0; i<B->rows; i++) {
312 	if (gretl_matrix_get(B, i, 1) >= cval) {
313 	    for (j=2; j<B->cols; j++) {
314 		x = 0;
315 		for (k=i; k<B->rows; k++) {
316 		    x += gretl_matrix_get(B, k, j);
317 		}
318 		if (x >= 0.5) {
319 		    P->val[ngp5] = x;
320 		    cnames[ngp5] = gretl_strdup(bnames[j]);
321 		    len = strlen(cnames[ngp5]);
322 		    if (len > namelen) {
323 			namelen = len;
324 		    }
325 		    ngp5++;
326 		}
327 	    }
328 	    break;
329 	}
330     }
331 
332     if (ngp5 > 0) {
333 	char fmt[16];
334 	int len = 8;
335 
336 	if (len < namelen + 1) {
337 	    len = namelen + 1;
338 	}
339 	sprintf(fmt, "%%%d.3f", len);
340 	P->cols = ngp5;
341 	gretl_matrix_set_colnames(P, cnames);
342 	pprintf(prn, "%s:\n\n", _(label));
343 	gretl_matrix_print_with_format(P, fmt, 0, 0, prn);
344 	pputc(prn, '\n');
345     } else {
346 	pprintf(prn, "%s: 0\n\n", _(label));
347 	strings_array_free(cnames, np);
348     }
349 
350     gretl_matrix_free(P);
351 
352     return 0;
353 }
354 
BKW_analyse(gretl_matrix * B,double maxcond,const char * fmt,PRN * prn)355 static int BKW_analyse (gretl_matrix *B, double maxcond,
356 			const char *fmt, PRN *prn)
357 {
358     const char *labels[] = {
359 	N_("Count of condition indices >= 30"),
360 	N_("Variance proportions >= 0.5 associated with cond >= 30"),
361 	N_("Count of condition indices >= 10"),
362         N_("Variance proportions >= 0.5 associated with cond >= 10"),
363 	N_("No evidence of excessive collinearity")
364     };
365     const char **bnames = NULL;
366     int rows = B->rows;
367     int ngc10 = 0, ngc30 = 0;
368     int i, err = 0;
369 
370     pputs(prn, _("According to BKW, cond >= 30 indicates \"strong\" near linear dependence,\n"
371 		 "and cond between 10 and 30 \"moderately strong\".  Parameter estimates whose\n"
372 		 "variance is mostly associated with problematic cond values may themselves\n"
373 		 "be considered problematic."));
374     pputs(prn, "\n\n");
375 
376     /* count rows with condition index >= thresholds */
377     for (i=rows-1; i>=0; i--) {
378 	if (gretl_matrix_get(B, i, 1) >= 30) {
379 	    ngc30++;
380 	}
381 	if (gretl_matrix_get(B, i, 1) >= 10) {
382 	    ngc10++;
383 	} else {
384 	    break;
385 	}
386     }
387 
388     if (ngc10 > 0) {
389 	bnames = gretl_matrix_get_colnames(B);
390     }
391 
392     pprintf(prn, "%s: %d\n", _(labels[0]), ngc30);
393     if (ngc30 == 0 && ngc10 > 0) {
394 	pputc(prn, '\n');
395     }
396 
397     if (ngc30 > 0) {
398 	/* variance proportion sums, cond >= 30 */
399 	do_proportion_sums(B, bnames, _(labels[1]), 30, prn);
400     }
401 
402     pprintf(prn, "%s: %d\n", _(labels[2]), ngc10);
403 
404     if (ngc10 > ngc30) {
405 	/* variance proportion sums, cond >= 10 */
406 	do_proportion_sums(B, bnames, _(labels[3]), 10, prn);
407     } else if (ngc10 == 0) {
408 	pprintf(prn, "\n%s\n", _(labels[4]));
409     }
410 
411     if (!err) {
412 	pputc(prn, '\n');
413     }
414 
415     return err;
416 }
417 
BKW_print(gretl_matrix * B,int namelen,PRN * prn)418 static void BKW_print (gretl_matrix *B, int namelen, PRN *prn)
419 {
420     const char *strs[] = {
421 	N_("Belsley-Kuh-Welsch collinearity diagnostics"),
422 	N_("variance proportions"),
423 	N_("eigenvalues of inverse covariance matrix"),
424 	N_("condition index"),
425 	N_("note: variance proportions columns sum to 1.0")
426     };
427     double maxcond = gretl_matrix_get(B, B->rows - 1, 1);
428     int sp = maxcond >= 10000 ? 10 : maxcond >= 1000 ? 9 : 8;
429     int maxcols = 9;
430     char fmt[16];
431 
432     if (sp < namelen + 1) {
433 	sp = namelen + 1;
434     }
435 
436     sprintf(fmt, "%%%d.3f", sp);
437     pprintf(prn, "\n%s:\n\n", _(strs[0]));
438     bufspace(2, prn);
439     pprintf(prn, "%s\n\n", _(strs[1]));
440 
441     if (B->cols > maxcols) {
442 	int err = 0;
443 	gretl_array *a = gretl_matrix_col_split(B, 2, maxcols, &err);
444 
445 	if (a != NULL) {
446 	    int i, n = gretl_array_get_length(a);
447 	    gretl_matrix *ai;
448 
449 	    for (i=0; i<n; i++) {
450 		ai = gretl_array_get_data(a, i);
451 		gretl_matrix_print_with_format(ai, fmt, 0, 0, prn);
452 		if (i < n-1) {
453 		    pputc(prn, '\n');
454 		}
455 	    }
456 	    gretl_array_destroy(a);
457 	}
458     } else {
459 	gretl_matrix_print_with_format(B, fmt, 0, 0, prn);
460     }
461 
462     pprintf(prn, "\n  lambda = %s (smallest is %g)\n", _(strs[2]),
463 	    gretl_matrix_get(B, B->rows - 1, 0));
464     pprintf(prn, "  cond   = %s\n", _(strs[3]));
465     pprintf(prn, "  %s\n\n", _(strs[4]));
466 
467     BKW_analyse(B, maxcond, fmt, prn);
468 }
469 
BKW_pnames(MODEL * pmod,DATASET * dset)470 static gretl_array *BKW_pnames (MODEL *pmod, DATASET *dset)
471 {
472     gretl_array *pnames;
473     char pname[VNAMELEN];
474     int i, k = pmod->ncoeff;
475     int err = 0;
476 
477     pnames = gretl_array_new(GRETL_TYPE_STRINGS, k, &err);
478 
479     if (pnames != NULL) {
480 	for (i=0; i<pmod->ncoeff; i++) {
481 	    gretl_model_get_param_name(pmod, dset, i, pname);
482 	    gretl_array_set_string(pnames, i, pname, 1);
483 	}
484     }
485 
486     return pnames;
487 }
488 
compute_vifs(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)489 int compute_vifs (MODEL *pmod, DATASET *dset,
490 		  gretlopt opt, PRN *prn)
491 {
492     gretl_vector *vif = NULL;
493     int *xlist;
494     int quiet = (opt & OPT_Q);
495     int i, err = 0;
496 
497     /* fetch list of regressors */
498     xlist = gretl_model_get_x_list(pmod);
499     if (xlist == NULL) {
500 	return E_DATA;
501     }
502 
503     /* drop the constant if present in xlist */
504     for (i=xlist[0]; i>0; i--) {
505 	if (xlist[i] == 0) {
506 	    gretl_list_delete_at_pos(xlist, i);
507 	    break;
508 	}
509     }
510 
511     if (xlist[0] > 1) {
512 	vif = model_vif_vector(pmod, xlist, dset, &err);
513     }
514 
515     if (vif != NULL && !quiet) {
516 	int vlen = gretl_vector_get_length(vif);
517 	int vi, n, maxlen = 0;
518 	double vj;
519 
520 	pprintf(prn, "\n%s\n", _("Variance Inflation Factors"));
521 	pprintf(prn, "%s\n", _("Minimum possible value = 1.0"));
522 	pprintf(prn, "%s\n", _("Values > 10.0 may indicate a collinearity problem"));
523 	pputc(prn, '\n');
524 
525 	for (i=0; i<vlen; i++) {
526 	    vi = xlist[i+1];
527 	    vj = vif->val[i];
528 	    if (!na(vj)) {
529 		n = strlen(dset->varname[vi]);
530 		if (n > maxlen) {
531 		    maxlen = n;
532 		}
533 	    }
534 	}
535 
536 	maxlen = maxlen < 12 ? 12 : maxlen;
537 
538 	for (i=0; i<vlen; i++) {
539 	    vi = xlist[i+1];
540 	    vj = vif->val[i];
541 	    if (!quiet && !na(vj)) {
542 		pprintf(prn, "%*s %8.3f\n", maxlen, dset->varname[vi], vj);
543 	    }
544 	}
545 
546 	pputc(prn, '\n');
547 	pputs(prn, _("VIF(j) = 1/(1 - R(j)^2), where R(j) is the "
548 		     "multiple correlation coefficient\nbetween "
549 		     "variable j and the other independent variables"));
550 	pputc(prn, '\n');
551     }
552 
553     if (!err && !(opt & OPT_G)) {
554 	set_last_result_data(vif, GRETL_TYPE_MATRIX);
555     } else {
556 	gretl_matrix_free(vif);
557     }
558 
559     free(xlist);
560 
561     return err;
562 }
563 
compute_bkw(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)564 int compute_bkw (MODEL *pmod, DATASET *dset,
565 		 gretlopt opt, PRN *prn)
566 {
567     gretl_matrix *V, *BKW = NULL;
568     int quiet = (opt & OPT_Q);
569     int err = 0;
570 
571     V = gretl_vcv_matrix_from_model(pmod, NULL, &err);
572 
573     if (!err) {
574 	gretl_array *pnames = BKW_pnames(pmod, dset);
575 	PRN *vprn = quiet ? NULL : prn;
576 
577 	BKW = bkw_matrix(V, pnames, vprn, &err);
578 	gretl_array_destroy(pnames);
579 	gretl_matrix_free(V);
580     }
581 
582     if (!err && !(opt & OPT_G)) {
583 	set_last_result_data(BKW, GRETL_TYPE_MATRIX);
584     } else {
585 	gretl_matrix_free(BKW);
586     }
587 
588     return err;
589 }
590