1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #include "libgretl.h"
21 #include "matrix_extra.h"
22 #include "version.h"
23
24 #define NAMEWIDTH 9
25
26 static void BKW_print (gretl_matrix *B, int namelen, PRN *prn);
27
28 /* run the vif regression for regressor k */
29
get_vif(MODEL * mod,const int * xlist,int * vlist,int k,DATASET * dset,int * err)30 static double get_vif (MODEL *mod, const int *xlist,
31 int *vlist, int k,
32 DATASET *dset,
33 int *err)
34 {
35 double vk = NADBL;
36 int i, j;
37
38 vlist[1] = xlist[k]; /* dep. var. is regressor k */
39 /* position 2 in vlist holds 0 = const */
40 j = 3;
41 for (i=1; i<=xlist[0]; i++) {
42 if (i != k) {
43 vlist[j++] = xlist[i];
44 }
45 }
46
47 *mod = lsq(vlist, dset, OLS, OPT_A);
48 *err = mod->errcode;
49
50 if (!*err && !na(mod->rsq) && mod->rsq != 1.0) {
51 vk = 1.0 / (1.0 - mod->rsq);
52 }
53
54 clear_model(mod);
55
56 return vk;
57 }
58
59 /* run regressions of each x_i on the other x_j's */
60
model_vif_vector(MODEL * pmod,const int * xlist,DATASET * dset,int * err)61 static gretl_vector *model_vif_vector (MODEL *pmod, const int *xlist,
62 DATASET *dset, int *err)
63 {
64 MODEL tmpmod;
65 gretl_vector *vif = NULL;
66 int *vlist = NULL;
67 int nvif = xlist[0];
68 int save_t1 = dset->t1;
69 int save_t2 = dset->t2;
70 int i;
71
72 vif = gretl_column_vector_alloc(nvif);
73 if (vif == NULL) {
74 *err = E_ALLOC;
75 return NULL;
76 }
77
78 /* vlist is the list for the vif regressions:
79 allow space for the constant */
80 vlist = gretl_list_new(nvif + 1);
81 if (vlist == NULL) {
82 *err = E_ALLOC;
83 free(vif);
84 return NULL;
85 }
86
87 /* impose original model sample */
88 dset->t1 = pmod->t1;
89 dset->t2 = pmod->t2;
90
91 for (i=1; i<=xlist[0] && !*err; i++) {
92 vif->val[i-1] = get_vif(&tmpmod, xlist, vlist, i, dset, err);
93 }
94
95 /* reinstate sample */
96 dset->t1 = save_t1;
97 dset->t2 = save_t2;
98
99 free(vlist);
100
101 if (*err) {
102 gretl_matrix_free(vif);
103 vif = NULL;
104 }
105
106 return vif;
107 }
108
bkw_add_colnames(gretl_matrix * BKW,const gretl_matrix * VCV,gretl_array * pnames)109 static int bkw_add_colnames (gretl_matrix *BKW,
110 const gretl_matrix *VCV,
111 gretl_array *pnames)
112 {
113 char **S = strings_array_new(BKW->cols);
114 const char **S0 = NULL;
115 int maxlen = 0;
116
117 if (pnames == NULL) {
118 S0 = gretl_matrix_get_colnames(VCV);
119 }
120
121 if (S != NULL) {
122 int i, len, k = BKW->cols - 2;
123 const char *si;
124 char tmp[16];
125
126 S[0] = gretl_strdup("lambda");
127 S[1] = gretl_strdup("cond");
128
129 for (i=0; i<k; i++) {
130 if (pnames != NULL || S0 != NULL) {
131 si = pnames != NULL ? gretl_array_get_data(pnames, i) : S0[i];
132 if (strlen(si) > NAMEWIDTH) {
133 *tmp = '\0';
134 strncat(tmp, si, NAMEWIDTH - 1);
135 strcat(tmp, "~");
136 S[i+2] = gretl_strdup(tmp);
137 } else if (pnames != NULL) {
138 S[i+2] = gretl_array_get_data(pnames, i);
139 gretl_array_set_data(pnames, i, NULL);
140 } else {
141 S[i+2] = gretl_strdup(si);
142 }
143 } else {
144 sprintf(tmp, "x%d", i+1);
145 S[i+2] = gretl_strdup(tmp);
146 }
147 len = strlen(S[i+2]);
148 if (len > maxlen) {
149 maxlen = len;
150 }
151 }
152 gretl_matrix_set_colnames(BKW, S);
153 }
154
155 return maxlen;
156 }
157
158 /* note: we're assuming in bkw_matrix() that the array argument
159 @pnames is disposable: we pull out its entries and set them
160 to NULL, but it's still up to the caller to destroy the
161 array itself.
162 */
163
bkw_matrix(const gretl_matrix * VCV,gretl_array * pnames,PRN * prn,int * err)164 gretl_matrix *bkw_matrix (const gretl_matrix *VCV,
165 gretl_array *pnames,
166 PRN *prn, int *err)
167 {
168 gretl_matrix *Vi = NULL;
169 gretl_matrix *S = NULL;
170 gretl_matrix *Q = NULL;
171 gretl_matrix *V = NULL;
172 gretl_matrix *lambda = NULL;
173 gretl_matrix *BKW = NULL;
174 double x, y;
175 int k = VCV->rows;
176 int namelen = 0;
177 int i, j;
178
179 if (pnames != NULL) {
180 if (gretl_array_get_length(pnames) != k) {
181 fprintf(stderr, "bkw_matrix: expected %d names but got %d\n",
182 k, gretl_array_get_length(pnames));
183 *err = E_INVARG;
184 return NULL;
185 }
186 }
187
188 /* copy the covariance matrix */
189 Vi = gretl_matrix_copy(VCV);
190 if (Vi == NULL) {
191 *err = E_ALLOC;
192 return NULL;
193 }
194
195 /* and invert it */
196 *err = gretl_invert_symmetric_matrix(Vi);
197 if (*err) {
198 goto bailout;
199 }
200
201 /* allocate workspace */
202 S = gretl_identity_matrix_new(k);
203 Q = gretl_matrix_alloc(k, k);
204 BKW = gretl_matrix_alloc(k, k+2);
205
206 if (S == NULL || Q == NULL || BKW == NULL) {
207 *err = E_ALLOC;
208 goto bailout;
209 }
210
211 for (i=0; i<k; i++) {
212 x = gretl_matrix_get(Vi, i, i);
213 gretl_matrix_set(S, i, i, 1/sqrt(x));
214 }
215
216 *err = gretl_matrix_qform(S, GRETL_MOD_TRANSPOSE,
217 Vi, Q, GRETL_MOD_NONE);
218
219 if (!*err) {
220 *err = gretl_matrix_SVD(Q, NULL, &lambda, &V, 0);
221 }
222
223 if (*err) {
224 goto bailout;
225 }
226
227 /* S = (1/lambda) ** ones(k, 1) */
228 for (j=0; j<k; j++) {
229 x = lambda->val[j];
230 for (i=0; i<k; i++) {
231 gretl_matrix_set(S, i, j, 1/x);
232 }
233 }
234
235 for (i=0; i<k; i++) {
236 for (j=0; j<k; j++) {
237 x = gretl_matrix_get(V, j, i);
238 y = gretl_matrix_get(S, i, j);
239 gretl_matrix_set(Q, i, j, x * x * y);
240 }
241 }
242
243 for (i=0; i<k; i++) {
244 /* compute row sums */
245 y = 0.0;
246 for (j=0; j<k; j++) {
247 y += gretl_matrix_get(Q, i, j);
248 }
249 for (j=0; j<k; j++) {
250 x = gretl_matrix_get(Q, i, j);
251 gretl_matrix_set(V, j, i, x/y);
252 }
253 }
254
255 y = lambda->val[0];
256
257 /* assemble the matrix to return */
258 for (i=0; i<k; i++) {
259 x = lambda->val[i];
260 gretl_matrix_set(BKW, i, 0, x);
261 gretl_matrix_set(BKW, i, 1, sqrt(y / x));
262 for (j=0; j<k; j++) {
263 x = gretl_matrix_get(V, i, j);
264 gretl_matrix_set(BKW, i, j+2, x);
265 }
266 }
267
268 namelen = bkw_add_colnames(BKW, VCV, pnames);
269
270 bailout:
271
272 gretl_matrix_free(Vi);
273 gretl_matrix_free(S);
274 gretl_matrix_free(Q);
275 gretl_matrix_free(V);
276 gretl_matrix_free(lambda);
277
278 if (*err) {
279 gretl_matrix_free(BKW);
280 BKW = NULL;
281 } else if (prn != NULL) {
282 BKW_print(BKW, namelen, prn);
283 }
284
285 return BKW;
286 }
287
do_proportion_sums(const gretl_matrix * B,const char ** bnames,const char * label,double cval,PRN * prn)288 static int do_proportion_sums (const gretl_matrix *B,
289 const char **bnames,
290 const char *label,
291 double cval, PRN *prn)
292 {
293 gretl_matrix *P;
294 char **cnames;
295 double x;
296 int np = B->cols - 2;
297 int len, namelen = 0;
298 int ngp5 = 0;
299 int i, j, k;
300
301 cnames = strings_array_new(np);
302 if (cnames == NULL) {
303 return E_ALLOC;
304 }
305
306 P = gretl_zero_matrix_new(1, np);
307 if (P == NULL) {
308 return E_ALLOC;
309 }
310
311 for (i=0; i<B->rows; i++) {
312 if (gretl_matrix_get(B, i, 1) >= cval) {
313 for (j=2; j<B->cols; j++) {
314 x = 0;
315 for (k=i; k<B->rows; k++) {
316 x += gretl_matrix_get(B, k, j);
317 }
318 if (x >= 0.5) {
319 P->val[ngp5] = x;
320 cnames[ngp5] = gretl_strdup(bnames[j]);
321 len = strlen(cnames[ngp5]);
322 if (len > namelen) {
323 namelen = len;
324 }
325 ngp5++;
326 }
327 }
328 break;
329 }
330 }
331
332 if (ngp5 > 0) {
333 char fmt[16];
334 int len = 8;
335
336 if (len < namelen + 1) {
337 len = namelen + 1;
338 }
339 sprintf(fmt, "%%%d.3f", len);
340 P->cols = ngp5;
341 gretl_matrix_set_colnames(P, cnames);
342 pprintf(prn, "%s:\n\n", _(label));
343 gretl_matrix_print_with_format(P, fmt, 0, 0, prn);
344 pputc(prn, '\n');
345 } else {
346 pprintf(prn, "%s: 0\n\n", _(label));
347 strings_array_free(cnames, np);
348 }
349
350 gretl_matrix_free(P);
351
352 return 0;
353 }
354
BKW_analyse(gretl_matrix * B,double maxcond,const char * fmt,PRN * prn)355 static int BKW_analyse (gretl_matrix *B, double maxcond,
356 const char *fmt, PRN *prn)
357 {
358 const char *labels[] = {
359 N_("Count of condition indices >= 30"),
360 N_("Variance proportions >= 0.5 associated with cond >= 30"),
361 N_("Count of condition indices >= 10"),
362 N_("Variance proportions >= 0.5 associated with cond >= 10"),
363 N_("No evidence of excessive collinearity")
364 };
365 const char **bnames = NULL;
366 int rows = B->rows;
367 int ngc10 = 0, ngc30 = 0;
368 int i, err = 0;
369
370 pputs(prn, _("According to BKW, cond >= 30 indicates \"strong\" near linear dependence,\n"
371 "and cond between 10 and 30 \"moderately strong\". Parameter estimates whose\n"
372 "variance is mostly associated with problematic cond values may themselves\n"
373 "be considered problematic."));
374 pputs(prn, "\n\n");
375
376 /* count rows with condition index >= thresholds */
377 for (i=rows-1; i>=0; i--) {
378 if (gretl_matrix_get(B, i, 1) >= 30) {
379 ngc30++;
380 }
381 if (gretl_matrix_get(B, i, 1) >= 10) {
382 ngc10++;
383 } else {
384 break;
385 }
386 }
387
388 if (ngc10 > 0) {
389 bnames = gretl_matrix_get_colnames(B);
390 }
391
392 pprintf(prn, "%s: %d\n", _(labels[0]), ngc30);
393 if (ngc30 == 0 && ngc10 > 0) {
394 pputc(prn, '\n');
395 }
396
397 if (ngc30 > 0) {
398 /* variance proportion sums, cond >= 30 */
399 do_proportion_sums(B, bnames, _(labels[1]), 30, prn);
400 }
401
402 pprintf(prn, "%s: %d\n", _(labels[2]), ngc10);
403
404 if (ngc10 > ngc30) {
405 /* variance proportion sums, cond >= 10 */
406 do_proportion_sums(B, bnames, _(labels[3]), 10, prn);
407 } else if (ngc10 == 0) {
408 pprintf(prn, "\n%s\n", _(labels[4]));
409 }
410
411 if (!err) {
412 pputc(prn, '\n');
413 }
414
415 return err;
416 }
417
BKW_print(gretl_matrix * B,int namelen,PRN * prn)418 static void BKW_print (gretl_matrix *B, int namelen, PRN *prn)
419 {
420 const char *strs[] = {
421 N_("Belsley-Kuh-Welsch collinearity diagnostics"),
422 N_("variance proportions"),
423 N_("eigenvalues of inverse covariance matrix"),
424 N_("condition index"),
425 N_("note: variance proportions columns sum to 1.0")
426 };
427 double maxcond = gretl_matrix_get(B, B->rows - 1, 1);
428 int sp = maxcond >= 10000 ? 10 : maxcond >= 1000 ? 9 : 8;
429 int maxcols = 9;
430 char fmt[16];
431
432 if (sp < namelen + 1) {
433 sp = namelen + 1;
434 }
435
436 sprintf(fmt, "%%%d.3f", sp);
437 pprintf(prn, "\n%s:\n\n", _(strs[0]));
438 bufspace(2, prn);
439 pprintf(prn, "%s\n\n", _(strs[1]));
440
441 if (B->cols > maxcols) {
442 int err = 0;
443 gretl_array *a = gretl_matrix_col_split(B, 2, maxcols, &err);
444
445 if (a != NULL) {
446 int i, n = gretl_array_get_length(a);
447 gretl_matrix *ai;
448
449 for (i=0; i<n; i++) {
450 ai = gretl_array_get_data(a, i);
451 gretl_matrix_print_with_format(ai, fmt, 0, 0, prn);
452 if (i < n-1) {
453 pputc(prn, '\n');
454 }
455 }
456 gretl_array_destroy(a);
457 }
458 } else {
459 gretl_matrix_print_with_format(B, fmt, 0, 0, prn);
460 }
461
462 pprintf(prn, "\n lambda = %s (smallest is %g)\n", _(strs[2]),
463 gretl_matrix_get(B, B->rows - 1, 0));
464 pprintf(prn, " cond = %s\n", _(strs[3]));
465 pprintf(prn, " %s\n\n", _(strs[4]));
466
467 BKW_analyse(B, maxcond, fmt, prn);
468 }
469
BKW_pnames(MODEL * pmod,DATASET * dset)470 static gretl_array *BKW_pnames (MODEL *pmod, DATASET *dset)
471 {
472 gretl_array *pnames;
473 char pname[VNAMELEN];
474 int i, k = pmod->ncoeff;
475 int err = 0;
476
477 pnames = gretl_array_new(GRETL_TYPE_STRINGS, k, &err);
478
479 if (pnames != NULL) {
480 for (i=0; i<pmod->ncoeff; i++) {
481 gretl_model_get_param_name(pmod, dset, i, pname);
482 gretl_array_set_string(pnames, i, pname, 1);
483 }
484 }
485
486 return pnames;
487 }
488
compute_vifs(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)489 int compute_vifs (MODEL *pmod, DATASET *dset,
490 gretlopt opt, PRN *prn)
491 {
492 gretl_vector *vif = NULL;
493 int *xlist;
494 int quiet = (opt & OPT_Q);
495 int i, err = 0;
496
497 /* fetch list of regressors */
498 xlist = gretl_model_get_x_list(pmod);
499 if (xlist == NULL) {
500 return E_DATA;
501 }
502
503 /* drop the constant if present in xlist */
504 for (i=xlist[0]; i>0; i--) {
505 if (xlist[i] == 0) {
506 gretl_list_delete_at_pos(xlist, i);
507 break;
508 }
509 }
510
511 if (xlist[0] > 1) {
512 vif = model_vif_vector(pmod, xlist, dset, &err);
513 }
514
515 if (vif != NULL && !quiet) {
516 int vlen = gretl_vector_get_length(vif);
517 int vi, n, maxlen = 0;
518 double vj;
519
520 pprintf(prn, "\n%s\n", _("Variance Inflation Factors"));
521 pprintf(prn, "%s\n", _("Minimum possible value = 1.0"));
522 pprintf(prn, "%s\n", _("Values > 10.0 may indicate a collinearity problem"));
523 pputc(prn, '\n');
524
525 for (i=0; i<vlen; i++) {
526 vi = xlist[i+1];
527 vj = vif->val[i];
528 if (!na(vj)) {
529 n = strlen(dset->varname[vi]);
530 if (n > maxlen) {
531 maxlen = n;
532 }
533 }
534 }
535
536 maxlen = maxlen < 12 ? 12 : maxlen;
537
538 for (i=0; i<vlen; i++) {
539 vi = xlist[i+1];
540 vj = vif->val[i];
541 if (!quiet && !na(vj)) {
542 pprintf(prn, "%*s %8.3f\n", maxlen, dset->varname[vi], vj);
543 }
544 }
545
546 pputc(prn, '\n');
547 pputs(prn, _("VIF(j) = 1/(1 - R(j)^2), where R(j) is the "
548 "multiple correlation coefficient\nbetween "
549 "variable j and the other independent variables"));
550 pputc(prn, '\n');
551 }
552
553 if (!err && !(opt & OPT_G)) {
554 set_last_result_data(vif, GRETL_TYPE_MATRIX);
555 } else {
556 gretl_matrix_free(vif);
557 }
558
559 free(xlist);
560
561 return err;
562 }
563
compute_bkw(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)564 int compute_bkw (MODEL *pmod, DATASET *dset,
565 gretlopt opt, PRN *prn)
566 {
567 gretl_matrix *V, *BKW = NULL;
568 int quiet = (opt & OPT_Q);
569 int err = 0;
570
571 V = gretl_vcv_matrix_from_model(pmod, NULL, &err);
572
573 if (!err) {
574 gretl_array *pnames = BKW_pnames(pmod, dset);
575 PRN *vprn = quiet ? NULL : prn;
576
577 BKW = bkw_matrix(V, pnames, vprn, &err);
578 gretl_array_destroy(pnames);
579 gretl_matrix_free(V);
580 }
581
582 if (!err && !(opt & OPT_G)) {
583 set_last_result_data(BKW, GRETL_TYPE_MATRIX);
584 } else {
585 gretl_matrix_free(BKW);
586 }
587
588 return err;
589 }
590