1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 /* panel estimation and dataset handling for gretl */
21
22 #include "libgretl.h"
23 #include "gretl_f2c.h"
24 #include "clapack_double.h"
25 #include "gretl_model.h"
26 #include "gretl_panel.h"
27 #include "libset.h"
28 #include "uservar.h"
29 #include "gretl_string_table.h"
30 #include "matrix_extra.h" /* for testing */
31
32 /**
33 * SECTION:gretl_panel
34 * @short_description: estimation of panel data models
35 * @title: Panel data
36 * @include: libgretl.h
37 *
38 * Provides support for estimating panel data models
39 * such as fixed and random effects.
40 */
41
42 #define PDEBUG 0
43
44 enum vcv_ops {
45 VCV_INIT,
46 VCV_SUBTRACT
47 };
48
49 typedef struct unit_ts_ unit_ts;
50
51 struct unit_ts_ {
52 int *t0; /* start of longest contiguous time series, per unit */
53 int *T; /* length of such sequence, per unit */
54 };
55
56 typedef struct panelmod_t_ panelmod_t;
57
58 struct panelmod_t_ {
59 gretlopt opt; /* option flags */
60 int nunits; /* total cross-sectional units in sample range */
61 int effn; /* effective (included) cross-section units */
62 int T; /* times-series length of panel */
63 int Tmax; /* effective times-series length (max usable obs per unit) */
64 int Tmin; /* shortest usable times-series */
65 double Tbar; /* harmonic mean of per-unit time-series lengths */
66 int NT; /* total observations used (based on pooled model) */
67 int ntdum; /* number of time dummies added */
68 int *unit_obs; /* array of number of observations per x-sect unit */
69 char *varying; /* array to record properties of pooled-model regressors */
70 int *vlist; /* list of time-varying variables from pooled model */
71 int balanced; /* 1 if the model dataset is balanced, else 0 */
72 int dfH; /* number of coeffs for Hausman test */
73 int Fdfn; /* numerator df, F for differing intercepts */
74 double Fdfd; /* denominator df, F for differing intercepts */
75 double theta; /* quasi-demeaning coefficient */
76 double theta_bar; /* mean of theta_i (unbalanced case) */
77 double Ffe; /* joint significance of fixed effects */
78 double BP; /* Breusch-Pagan test statistic */
79 double H; /* Hausman test statistic */
80 gretl_matrix *bdiff; /* array of coefficient differences */
81 gretl_matrix *Sigma; /* Hausman covariance matrix */
82 double s2b; /* residual variance, group means regression */
83 double s2e; /* residual variance, fixed-effects regression */
84 double s2v; /* estimate of between variance */
85 double ubPub; /* for use in unbalanced Swamy-Arora */
86 double dw; /* Durbin-Watson, for transfer to random effects */
87 double rho; /* for transfer to random effects */
88 int *small2big; /* data indexation array */
89 int *big2small; /* reverse data indexation array */
90 MODEL *pooled; /* reference model (pooled OLS) */
91 MODEL *realmod; /* fixed or random effects model */
92 double *re_uhat; /* "fixed" random-effects residuals */
93 };
94
95 struct {
96 int n; /* number of cross-sectional units */
97 int T; /* number of observations per unit */
98 int offset; /* sampling offset into full dataset */
99 } panidx;
100
101 /* for testing purposes */
102 int stata_sa;
103 int IGLS;
104 char glsmat[MAXLEN];
105
106 static int varying_vars_list (const DATASET *dset, panelmod_t *pan);
107 static void calculate_Tbar (panelmod_t *pan);
108
109 /* translate from (i = unit, t = time period for that unit) to
110 overall 0-based index into the data set */
111 #define panel_index(i,t) (i * panidx.T + t + panidx.offset)
112
113 #define panel_missing(p, t) (na(p->pooled->uhat[t]))
114
115
116 static void
panel_index_init(const DATASET * dset,int nunits,int T)117 panel_index_init (const DATASET *dset, int nunits, int T)
118 {
119 panidx.n = nunits;
120 panidx.T = T;
121 panidx.offset = dset->t1;
122
123 #if PDEBUG
124 fprintf(stderr, "panel_index_init: n=%d, T=%d, offset=%d\n",
125 panidx.n, panidx.T, panidx.offset);
126 #endif
127 }
128
129 /* Allocate the indexation arrays that allow us to translate between
130 the full-length data array and the possibly shorter array used for
131 transformed data (de-meaned or quasi-demeaned).
132 */
133
allocate_data_finders(panelmod_t * pan,int bign)134 static int allocate_data_finders (panelmod_t *pan, int bign)
135 {
136 int s;
137
138 if (pan->small2big != NULL) {
139 /* already done */
140 return 0;
141 }
142
143 pan->small2big = malloc(pan->NT * sizeof *pan->small2big);
144 pan->big2small = malloc(bign * sizeof *pan->big2small);
145
146 if (pan->small2big == NULL || pan->big2small == NULL) {
147 return E_ALLOC;
148 }
149
150 for (s=0; s<bign; s++) {
151 pan->big2small[s] = -1;
152 }
153
154 return 0;
155 }
156
157 #define small_index(p,t) ((p->big2small == NULL)? t : p->big2small[t])
158 #define big_index(p,t) ((p->small2big == NULL)? t : p->small2big[t])
159
panelmod_init(panelmod_t * pan)160 static void panelmod_init (panelmod_t *pan)
161 {
162 pan->nunits = 0;
163 pan->effn = 0;
164 pan->T = 0;
165 pan->Tmax = 0;
166 pan->Tmin = 0;
167 pan->Tbar = 0;
168 pan->NT = 0;
169 pan->ntdum = 0;
170 pan->unit_obs = NULL;
171 pan->varying = NULL;
172 pan->vlist = NULL;
173 pan->opt = OPT_NONE;
174
175 pan->balanced = 1;
176 pan->theta = NADBL;
177 pan->theta_bar = NADBL;
178 pan->ubPub = NADBL;
179 pan->dw = NADBL;
180 pan->rho = NADBL;
181
182 pan->Ffe = NADBL;
183 pan->Fdfn = 0;
184 pan->Fdfd = 0;
185
186 pan->BP = NADBL;
187 pan->H = NADBL;
188 pan->dfH = 0;
189
190 pan->bdiff = NULL;
191 pan->Sigma = NULL;
192
193 pan->small2big = NULL;
194 pan->big2small = NULL;
195
196 pan->pooled = NULL;
197 pan->realmod = NULL;
198 pan->re_uhat = NULL;
199 }
200
panelmod_free(panelmod_t * pan)201 static void panelmod_free (panelmod_t *pan)
202 {
203 free(pan->unit_obs);
204 free(pan->varying);
205 free(pan->vlist);
206
207 gretl_matrix_free(pan->bdiff);
208 gretl_matrix_free(pan->Sigma);
209
210 free(pan->small2big);
211 free(pan->big2small);
212 free(pan->re_uhat);
213
214 free(pan->realmod);
215 }
216
217 /* test variable number v against the (possibly reduced) regression
218 list which contains only time-varying regressors
219 */
220
var_is_varying(const panelmod_t * pan,int v)221 static int var_is_varying (const panelmod_t *pan, int v)
222 {
223 if (v != 0) {
224 int i;
225
226 for (i=2; i<=pan->vlist[0]; i++) {
227 if (pan->vlist[i] == v) {
228 return 1;
229 }
230 }
231 }
232
233 return 0;
234 }
235
236 /* retrieve X'X^{-1} from the pooled or fixed effects model */
237
panel_model_xpxinv(MODEL * pmod,int * err)238 static gretl_matrix *panel_model_xpxinv (MODEL *pmod, int *err)
239 {
240 gretl_matrix *X;
241 int k = pmod->ncoeff;
242 double x;
243 int i, j, m;
244
245 #if 0
246 fprintf(stderr, "panel_model_xpxinv...\n");
247 #endif
248
249 if (gretl_model_get_int(pmod, "vcv_xpx")) {
250 /* already done */
251 gretl_model_set_int(pmod, "vcv_xpx", 0);
252 } else if (pmod->vcv != NULL) {
253 double s2 = pmod->sigma * pmod->sigma;
254 int nv = (k * k + k) / 2;
255
256 for (i=0; i<nv; i++) {
257 pmod->vcv[i] /= s2;
258 }
259 } else {
260 *err = makevcv(pmod, 1.0);
261 if (*err) {
262 return NULL;
263 }
264 gretl_model_set_int(pmod, "vcv_xpx", 1);
265 }
266
267 X = gretl_matrix_alloc(k, k);
268 if (X == NULL) {
269 *err = E_ALLOC;
270 return NULL;
271 }
272
273 m = 0;
274 for (j=0; j<k; j++) {
275 for (i=j; i<k; i++) {
276 x = pmod->vcv[m++];
277 gretl_matrix_set(X, i, j, x);
278 gretl_matrix_set(X, j, i, x);
279 }
280 }
281
282 #if 0
283 fprintf(stderr, "pmod->sigma = %g\n", pmod->sigma);
284 printlist(pmod->list, "pmod->list");
285 gretl_matrix_print(X, "panel (X'X)^{-1}");
286 #endif
287
288 return X;
289 }
290
291 /* Beck and Katz, as outlined in Greene. We offer the following
292 for pooled OLS and FE. Greene writes (in effect):
293
294 Var(b) = A^{-1} W A^{-1}
295
296 where A = \sum_{i=1}^n X'_i X_i (called "XX" below)
297 W = \sum_{i=1}^n \sum_{j=1}^n \sigma_{ij} X'_{i} X_{j}
298
299 and \sigma_{ij} is estimated as (1/T) \sum_{t=1}^T e_i e_j,
300 with the e's being OLS (or FE) residuals.
301
302 In computing this, we need to check that W is positive definite:
303 this seems not to be guaranteed for unbalanced panels.
304 */
305
306 static int
beck_katz_vcv(MODEL * pmod,panelmod_t * pan,const DATASET * dset,gretl_matrix * XX,gretl_matrix * W,gretl_matrix * V)307 beck_katz_vcv (MODEL *pmod, panelmod_t *pan, const DATASET *dset,
308 gretl_matrix *XX, gretl_matrix *W, gretl_matrix *V)
309 {
310 gretl_matrix *Xi = NULL;
311 gretl_matrix *Xj = NULL;
312 int T = pan->T;
313 int k = pmod->ncoeff;
314 int s, si, sj;
315 int i, j, p, v, t;
316 int err = 0;
317
318 Xi = gretl_matrix_alloc(pan->Tmax, k);
319 Xj = gretl_matrix_alloc(pan->Tmax, k);
320
321 if (Xi == NULL || Xj == NULL) {
322 err = E_ALLOC;
323 goto bailout;
324 }
325
326 for (i=0; i<pan->nunits; i++) {
327 int Ti = pan->unit_obs[i];
328 double sii = 0.0;
329
330 if (Ti == 0) {
331 continue;
332 }
333
334 Xi = gretl_matrix_reuse(Xi, Ti, k);
335
336 s = 0;
337 for (t=0; t<T; t++) {
338 si = panel_index(i, t);
339 if (!na(pmod->uhat[si])) {
340 sii += pmod->uhat[si] * pmod->uhat[si];
341 si = small_index(pan, si);
342 for (p=0; p<k; p++) {
343 v = pmod->list[p + 2];
344 gretl_matrix_set(Xi, s, p, dset->Z[v][si]);
345 }
346 s++;
347 }
348 }
349
350 sii /= Ti;
351 sii = sqrt(sii);
352
353 /* "diagonal" component of W */
354 gretl_matrix_multiply_by_scalar(Xi, sii);
355 gretl_matrix_multiply_mod(Xi, GRETL_MOD_TRANSPOSE,
356 Xi, GRETL_MOD_NONE,
357 W, GRETL_MOD_CUMULATE);
358
359 for (j=i+1; j<pan->nunits; j++) {
360 int Tij = 0;
361 double sij = 0.0;
362
363 if (pan->unit_obs[j] == 0) {
364 continue;
365 }
366
367 /* count matching observations */
368 for (t=0; t<T; t++) {
369 si = panel_index(i, t);
370 sj = panel_index(j, t);
371 if (!na(pmod->uhat[si]) && !na(pmod->uhat[sj])) {
372 sij += pmod->uhat[si] * pmod->uhat[sj];
373 Tij++;
374 }
375 }
376
377 if (Tij == 0) {
378 continue;
379 }
380
381 sij /= Tij;
382 Xi = gretl_matrix_reuse(Xi, Tij, k);
383 Xj = gretl_matrix_reuse(Xj, Tij, k);
384
385 s = 0;
386 for (t=0; t<T; t++) {
387 si = panel_index(i, t);
388 sj = panel_index(j, t);
389 if (!na(pmod->uhat[si]) && !na(pmod->uhat[sj])) {
390 si = small_index(pan, si);
391 sj = small_index(pan, sj);
392 for (p=0; p<k; p++) {
393 v = pmod->list[p + 2];
394 gretl_matrix_set(Xi, s, p, dset->Z[v][si]);
395 gretl_matrix_set(Xj, s, p, dset->Z[v][sj]);
396 }
397 s++;
398 }
399 }
400
401 /* cumulate s_ij * Xi'Xj into W (using V as storage) */
402 gretl_matrix_multiply_by_scalar(Xi, sij);
403 gretl_matrix_multiply_mod(Xi, GRETL_MOD_TRANSPOSE,
404 Xj, GRETL_MOD_NONE,
405 V, GRETL_MOD_NONE);
406 gretl_matrix_add_to(W, V);
407 /* and take in s_ij * Xj'Xi */
408 gretl_matrix_add_transpose_to(W, V);
409 }
410 }
411
412 /* check that the middle term is p.d. */
413 gretl_matrix_copy_values(V, W);
414 err = gretl_matrix_cholesky_decomp(V);
415 if (err) {
416 fprintf(stderr, "beck_katz_vcv: matrix W is not p.d.\n");
417 gretl_model_set_int(pmod, "panel_bk_failed", 1);
418 goto bailout;
419 }
420
421 /* form V = (Xi'Xi)^{-1} W (Xi'Xi)^{-1} */
422 gretl_matrix_qform(XX, GRETL_MOD_NONE, W,
423 V, GRETL_MOD_NONE);
424
425 gretl_model_set_vcv_info(pmod, VCV_PANEL, PANEL_BK);
426
427 bailout:
428
429 gretl_matrix_free(Xi);
430 gretl_matrix_free(Xj);
431
432 return err;
433 }
434
435 /* HAC covariance matrix for pooled, fixed- or random-effects models,
436 given "fixed T and large N". In the case of "large T" a different
437 form is needed for robustness in respect of autocorrelation. See
438 Arellano, "Panel Data Econometrics" (Oxford, 2003), pages 18-19.
439
440 In the case of fixed or random effects there should be no missing
441 values, since we created a special dataset purged of same. But
442 with the pooled model we need to index into the full dataset, and
443 work around any missing values.
444 */
445
446 static int
arellano_vcv(MODEL * pmod,panelmod_t * pan,const DATASET * dset,gretl_matrix * XX,gretl_matrix * W,gretl_matrix * V)447 arellano_vcv (MODEL *pmod, panelmod_t *pan, const DATASET *dset,
448 gretl_matrix *XX, gretl_matrix *W, gretl_matrix *V)
449 {
450 gretl_vector *e = NULL;
451 gretl_matrix *Xi = NULL;
452 gretl_vector *eXi = NULL;
453 int T = pan->Tmax;
454 int k = pmod->ncoeff;
455 double Nfac = 1.0;
456 int i, j, v, s, t;
457 int err = 0;
458
459 e = gretl_column_vector_alloc(T);
460 Xi = gretl_matrix_alloc(T, k);
461 eXi = gretl_vector_alloc(k);
462
463 if (e == NULL || Xi == NULL || eXi == NULL) {
464 err = E_ALLOC;
465 goto bailout;
466 }
467
468 if (getenv("ARELLANO_ASY") != NULL) {
469 /* don't do the @Nfac thing */
470 fprintf(stderr, "skipping Cameron-Miller adjustment\n");
471 } else {
472 /* Small N adjustment factor: "reduce downward bias in
473 case of finite N" (Cameron and Miller, "A Practitioner's
474 Guide to Cluster-Robust Inference", Journal of Human
475 Resources, Spring 2015).
476 */
477 if (pmod->ci != IVREG) {
478 /* FIXME IVREG? */
479 Nfac = pan->effn / (pan->effn - 1.0);
480 Nfac *= (pmod->nobs - 1.0) / (pmod->nobs - k);
481 Nfac = sqrt(Nfac);
482 }
483 #if 0
484 fprintf(stderr, "Nfac1 = %d/%d = %g\n", pan->effn, pan->effn-1,
485 pan->effn / (pan->effn - 1.0));
486 fprintf(stderr, "Nfac2 = %d/%d = %g\n", pmod->nobs-1, pmod->nobs-k,
487 (pmod->nobs - 1.0) / (pmod->nobs - k));
488 fprintf(stderr, "Nfac final = %g\n", Nfac);
489 #endif
490 }
491
492 for (i=0; i<pan->nunits; i++) {
493 int Ti = pan->unit_obs[i];
494 int p = 0;
495
496 if (Ti == 0) {
497 continue;
498 }
499
500 e = gretl_matrix_reuse(e, Ti, 1);
501 Xi = gretl_matrix_reuse(Xi, Ti, k);
502
503 for (t=0; t<pan->T; t++) {
504 s = panel_index(i, t);
505 if (na(pmod->uhat[s])) {
506 continue;
507 }
508 gretl_vector_set(e, p, Nfac * pmod->uhat[s]);
509 s = small_index(pan, s);
510 for (j=0; j<k; j++) {
511 v = pmod->list[j+2];
512 if (s < 0) {
513 gretl_matrix_set(Xi, p, j, 0);
514 } else {
515 gretl_matrix_set(Xi, p, j, dset->Z[v][s]);
516 }
517 }
518 p++;
519 }
520
521 gretl_matrix_multiply_mod(e, GRETL_MOD_TRANSPOSE,
522 Xi, GRETL_MOD_NONE,
523 eXi, GRETL_MOD_NONE);
524 gretl_matrix_multiply_mod(eXi, GRETL_MOD_TRANSPOSE,
525 eXi, GRETL_MOD_NONE,
526 W, GRETL_MOD_CUMULATE);
527 }
528
529 /* form V(b) = (X'X)^{-1} W (X'X)^{-1} */
530 gretl_matrix_qform(XX, GRETL_MOD_NONE, W,
531 V, GRETL_MOD_NONE);
532
533 #if 0
534 gretl_matrix_print(XX, "X'X^{-1}");
535 gretl_matrix_print(W, "W");
536 gretl_matrix_print(V, "V");
537 #endif
538
539 gretl_model_set_vcv_info(pmod, VCV_PANEL, PANEL_HAC);
540
541 bailout:
542
543 gretl_matrix_free(e);
544 gretl_matrix_free(Xi);
545 gretl_matrix_free(eXi);
546
547 return err;
548 }
549
550 /* common setup for Arellano and Beck-Katz VCV estimators
551 (note that pooled OLS with the --cluster option is
552 handled separately)
553 */
554
555 static int
panel_robust_vcv(MODEL * pmod,panelmod_t * pan,const DATASET * dset)556 panel_robust_vcv (MODEL *pmod, panelmod_t *pan, const DATASET *dset)
557 {
558 gretl_matrix *W = NULL;
559 gretl_matrix *V = NULL;
560 gretl_matrix *XX = NULL;
561 int k = pmod->ncoeff;
562 int err = 0;
563
564 W = gretl_zero_matrix_new(k, k);
565 V = gretl_matrix_alloc(k, k);
566
567 if (W == NULL || V == NULL) {
568 err = E_ALLOC;
569 goto bailout;
570 }
571
572 XX = panel_model_xpxinv(pmod, &err);
573 if (err) {
574 fprintf(stderr, "panel_robust_vcv: failed at panel_model_xpxinv\n");
575 goto bailout;
576 }
577
578 /* call the appropriate function */
579 if (libset_get_bool(USE_PCSE)) {
580 err = beck_katz_vcv(pmod, pan, dset, XX, W, V);
581 } else {
582 err = arellano_vcv(pmod, pan, dset, XX, W, V);
583 }
584
585 if (!err) {
586 int i, j, s = 0;
587
588 for (i=0; i<k; i++) {
589 for (j=i; j<k; j++) {
590 pmod->vcv[s++] = gretl_matrix_get(V, i, j);
591 }
592 pmod->sderr[i] = sqrt(gretl_matrix_get(V, i, i));
593 }
594 }
595
596 bailout:
597
598 gretl_matrix_free(W);
599 gretl_matrix_free(V);
600 gretl_matrix_free(XX);
601
602 if (err && pmod->vcv != NULL) {
603 free(pmod->vcv);
604 pmod->vcv = NULL;
605 }
606
607 return err;
608 }
609
femod_regular_vcv(MODEL * pmod)610 static void femod_regular_vcv (MODEL *pmod)
611 {
612 if (pmod->vcv == NULL) {
613 /* estimated via Cholesky: no vcv yet */
614 makevcv(pmod, pmod->sigma);
615 } else {
616 /* estimated via QR: "vcv" = (X'X)^{-1} */
617 int i, k = pmod->ncoeff;
618 int n = k * (k + 1) / 2;
619 double s2 = pmod->sigma * pmod->sigma;
620
621 for (i=0; i<n; i++) {
622 pmod->vcv[i] *= s2;
623 }
624 }
625 }
626
627 #if 0 /* not ready yet */
628
629 /* See Cameron and Trivedi, Microeconometrics, 21.3.4 */
630
631 static int panel_autocorr_1 (MODEL *pmod, const panelmod_t *pan)
632 {
633 double *ubar;
634 double *ctt, *css, *cst;
635 double uit, uis, rho;
636 int i, t, ti;
637 int n = 0;
638
639 ubar = malloc(pan->T * sizeof *ubar);
640 ctt = malloc(pan->T * sizeof *ctt);
641 css = malloc(pan->T * sizeof *css);
642 cst = malloc(pan->T * sizeof *cst);
643
644 if (ubar == NULL) {
645 return E_ALLOC;
646 }
647
648 for (t=0; t<pan->T; t++) {
649 ubar[t] = 0.0;
650 ctt[t] = css[t] = cst[t] = 0.0;
651 }
652
653 for (i=0; i<pan->nunits; i++) {
654 if (pan->unit_obs[i] > 0) {
655 for (t=0; t<pan->T; t++) {
656 ti = panel_index(i, t);
657 uit = pmod->uhat[ti];
658 if (!na(uit)) {
659 ubar[t] += uit;
660 }
661 }
662 n++;
663 }
664 }
665
666 for (t=0; t<pan->T; t++) {
667 ubar[t] /= n;
668 }
669
670 for (i=0; i<pan->nunits; i++) {
671 if (pan->unit_obs[i] > 0) {
672 for (t=0; t<pan->T; t++) {
673 ti = panel_index(i, t);
674 uit = pmod->uhat[ti];
675 if (t > 0) {
676 uis = pmod->uhat[ti-1];
677 } else {
678 uis = NADBL;
679 }
680 if (!na(uit)) {
681 ctt[t] += (uit - ubar[t]) * (uit - ubar[t]);
682 }
683 if (!na(uis)) {
684 css[t] += (uis - ubar[t-1]) * (uis - ubar[t-1]);
685 }
686 if (!na(uis) && !na(uit)) {
687 cst[t] += (uis - ubar[t-1]) * (uit - ubar[t-1]);
688 }
689 }
690 }
691 }
692
693 for (t=1; t<pan->T; t++) {
694 ctt[t] /= (n - 1);
695 css[t] /= (n - 1);
696 cst[t] /= (n - 1);
697 rho = cst[t] / sqrt(css[t] * ctt[t]);
698 fprintf(stderr, "rho(%d) = %g\n", t, rho);
699 }
700
701 free(ubar);
702 free(ctt);
703 free(cst);
704 free(css);
705
706 return 0;
707 }
708
709 #endif
710
711 #define DWPVAL_TESTING 1
712
panel_DW_pval_ok(const MODEL * pmod)713 int panel_DW_pval_ok (const MODEL *pmod)
714 {
715 if (na(pmod->dw)) {
716 return 0;
717 } else {
718 #if DWPVAL_TESTING
719 return 1;
720 #else
721 int Tmax = gretl_model_get_int(pmod, "Tmax");
722 int Tmin = gretl_model_get_int(pmod, "Tmin");
723
724 /* Too restrictive? Or not restrictive enough? */
725 return Tmax == Tmin;
726 #endif
727 }
728 }
729
730 /* See Bhargava, Franzini and Narendranathan, "Serial Correlation and
731 the Fixed Effects Model", Review of Economic Studies 49, 1982,
732 page 536. Strictly speaking what's calculated here is the marginal
733 significance level of the DW stat when considered as a d_L value.
734 It's unclear if this method can really be extended to unbalanced
735 panels.
736 */
737
BFN_panel_DW_pvalue(MODEL * pmod,const DATASET * dset,int * err)738 double BFN_panel_DW_pvalue (MODEL *pmod, const DATASET *dset, int *err)
739 {
740 gretl_matrix *lam = NULL;
741 double r, pv, lamq, sinarg, pi_2T;
742 int T = gretl_model_get_int(pmod, "Tmax");
743 int N = gretl_model_get_int(pmod, "n_included_units");
744 int nlam, k = pmod->ncoeff;
745 int i, q;
746
747 if (pmod->ifc) {
748 k--; /* don't include the constant */
749 }
750
751 nlam = pmod->nobs - N - k;
752 lam = gretl_column_vector_alloc(nlam);
753 if (lam == NULL) {
754 *err = E_ALLOC;
755 return NADBL;
756 }
757
758 pi_2T = M_PI / (2.0*T);
759 sinarg = sin(pi_2T);
760 lamq = 4 * sinarg * sinarg;
761 r = pmod->dw;
762
763 q = 1;
764 for (i=0; i<nlam; i++) {
765 lam->val[i] = lamq - r;
766 if ((i+1) % N == 0) {
767 q++;
768 sinarg = sin(q*pi_2T);
769 lamq = 4 * sinarg * sinarg;
770 }
771 }
772
773 pv = imhof(lam, 0, err);
774 if (!*err) {
775 if (pv < 0) {
776 pv = 0;
777 }
778 gretl_model_set_double(pmod, "dw_pval", pv);
779 }
780
781 #if 0
782 fprintf(stderr, "DW: T=%d, Tmin=%d, N=%d, nlam=%d, DW=%g, pv=%g\n",
783 T, gretl_model_get_int(pmod, "Tmin"), N, nlam, pmod->dw, pv);
784 #endif
785
786 gretl_matrix_free(lam);
787
788 return pv;
789 }
790
791 /* Durbin-Watson statistic for the pooled or fixed effects model.
792
793 See Bhargava, Franzini and Narendranathan, "Serial Correlation and
794 the Fixed Effects Model", Review of Economic Studies 49, 1982,
795 pp. 533-549, and also Baltagi and Wu, "Unequally Spaced Panel Data
796 Regressions With AR(1) Disturbances", Econometric Theory, 15, 1999,
797 pp. 814-823 for discussion of the unbalanced case.
798 */
799
panel_dwstat(MODEL * pmod,panelmod_t * pan)800 static void panel_dwstat (MODEL *pmod, panelmod_t *pan)
801 {
802 double dwnum = 0.0;
803 double rnum = 0.0;
804 double rden = 0.0;
805 double ut, u1;
806 int in_bounds = 1;
807 int i, t, s;
808
809 pmod->dw = pmod->rho = NADBL;
810
811 if (pmod->ess <= 0.0) {
812 return;
813 }
814
815 #if 0
816 fprintf(stderr, "DW: pmod=%p, pooled=%p, T=%d\n",
817 (void *) pmod, (void *) pan->pooled, pan->T);
818 fprintf(stderr, " t1=%d, t2=%d, nobs=%d, full_n=%d\n", pmod->t1,
819 pmod->t2, pmod->nobs, pmod->full_n);
820 #endif
821
822 for (i=0; i<pan->nunits && in_bounds; i++) {
823 int started = 0;
824
825 if (pan->unit_obs[i] == 0) {
826 continue;
827 }
828 for (t=0; t<pan->T; t++) {
829 s = panel_index(i, t);
830 if (s >= pmod->t2) {
831 in_bounds = 0;
832 break;
833 }
834 ut = pmod->uhat[s];
835 if (!na(ut)) {
836 if (t == 0) {
837 started = 1;
838 continue;
839 }
840 u1 = pmod->uhat[s-1];
841 if (na(u1)) {
842 /* implicitly take u1 as 0 */
843 if (started) {
844 dwnum += ut * ut;
845 }
846 } else {
847 dwnum += (ut - u1) * (ut - u1);
848 rnum += ut * u1;
849 rden += u1 * u1;
850 }
851 started = 1;
852 }
853 }
854 }
855
856 if (dwnum > 0.0) {
857 pmod->dw = dwnum / pmod->ess;
858 }
859 if (na(pmod->rho) && rden > 0.0 && !na(rden)) {
860 pmod->rho = rnum / rden;
861 }
862 if (!na(pmod->rho) && (pmod->rho <= -1.0 || pmod->rho >= 1.0)) {
863 pmod->rho = NADBL;
864 }
865
866 if (pan->opt & OPT_U) {
867 /* make these stats available for random effects reporting */
868 pan->dw = pmod->dw;
869 pan->rho = pmod->rho;
870 }
871 }
872
873 /* Allocate the arrays needed to perform the Hausman test,
874 in its matrix formulation.
875 */
876
matrix_hausman_allocate(panelmod_t * pan)877 static int matrix_hausman_allocate (panelmod_t *pan)
878 {
879 int k = pan->vlist[0] - 2;
880 int err = 0;
881
882 /* array to hold differences between coefficient estimates */
883 pan->bdiff = gretl_vector_alloc(k);
884 if (pan->bdiff == NULL) {
885 err = E_ALLOC;
886 } else {
887 /* array to hold covariance matrix */
888 pan->Sigma = gretl_matrix_alloc(k, k);
889 if (pan->Sigma == NULL) {
890 gretl_matrix_free(pan->bdiff);
891 pan->bdiff = NULL;
892 err = E_ALLOC;
893 }
894 }
895
896 if (!err) {
897 pan->dfH = k;
898 }
899
900 return err;
901 }
902
real_varying_list(panelmod_t * pan)903 static int *real_varying_list (panelmod_t *pan)
904 {
905 int *vlist = gretl_list_copy(pan->vlist);
906 int i;
907
908 if (vlist != NULL) {
909 for (i=2; i<=vlist[0]; i++) {
910 if (vlist[i] == 0) {
911 gretl_list_delete_at_pos(vlist, i);
912 break;
913 }
914 }
915 }
916
917 return vlist;
918 }
919
920 /* Construct a version of the dataset from which the group means are
921 subtracted, for the "within" regression. Nota bene: this auxiliary
922 dataset is not necessarily of full length: missing observations
923 are skipped.
924 */
925
within_groups_dataset(const DATASET * dset,panelmod_t * pan)926 static DATASET *within_groups_dataset (const DATASET *dset,
927 panelmod_t *pan)
928 {
929 DATASET *wset = NULL;
930 int *vlist = NULL;
931 int i, j, vj, nv;
932 int s, t, bigt;
933 int err = 0;
934
935 pan->balanced = 1;
936
937 for (i=0; i<pan->nunits; i++) {
938 if (pan->unit_obs[i] > 0) {
939 if (pan->unit_obs[i] != pan->Tmax) {
940 pan->balanced = 0;
941 }
942 }
943 }
944
945 if (pan->NT < dset->n) {
946 err = allocate_data_finders(pan, dset->n);
947 if (err) {
948 return NULL;
949 }
950 }
951
952 vlist = real_varying_list(pan);
953 if (vlist == NULL) {
954 return NULL;
955 }
956
957 nv = pan->vlist[0];
958
959 #if PDEBUG
960 fprintf(stderr, "within_groups dataset: nvars=%d, nobs=%d\n",
961 pan->vlist[0], pan->NT);
962 #endif
963
964 wset = create_auxiliary_dataset(nv, pan->NT, 0);
965 if (wset == NULL) {
966 free(vlist);
967 return NULL;
968 }
969
970 for (j=1; j<=vlist[0]; j++) {
971 double xbar, gxbar = 0.0;
972 int allzero = 1;
973
974 vj = vlist[j];
975 gxbar = 0.0;
976 s = 0;
977
978 #if PDEBUG
979 strcpy(wset->varname[j], dset->varname[vj]);
980 fprintf(stderr, "de-meaning: working on list[%d], %s\n",
981 j, dset->varname[vj]);
982 #endif
983
984 for (i=0; i<pan->nunits; i++) {
985 int Ti = pan->unit_obs[i];
986 int grpzero = 1;
987 int got = 0;
988
989 if (Ti == 0) {
990 continue;
991 }
992
993 /* first pass: find the group mean */
994 xbar = 0.0;
995 for (t=0; t<pan->T; t++) {
996 bigt = panel_index(i, t);
997 if (!panel_missing(pan, bigt)) {
998 xbar += dset->Z[vj][bigt];
999 }
1000 }
1001
1002 gxbar += xbar;
1003 xbar /= Ti;
1004
1005 /* second pass: calculate de-meaned values */
1006 got = 0;
1007 for (t=0; t<pan->T && got<Ti; t++) {
1008 bigt = panel_index(i, t);
1009 if (!panel_missing(pan, bigt)) {
1010 wset->Z[j][s] = dset->Z[vj][bigt] - xbar;
1011 if (wset->Z[j][s] != 0.0) {
1012 grpzero = 0;
1013 }
1014 got++;
1015 if (pan->small2big != NULL) {
1016 pan->small2big[s] = bigt;
1017 pan->big2small[bigt] = s;
1018 }
1019 s++;
1020 }
1021 }
1022
1023 if (!grpzero) {
1024 allzero = 0;
1025 }
1026 } /* end loop over units */
1027
1028 gxbar /= pan->NT;
1029
1030 if (j == 1 && allzero) {
1031 /* the dependent variable is not time-varying */
1032 fprintf(stderr, "fixed effects: dependent var is time-invariant\n");
1033 }
1034
1035 /* wZ = data - group mean + grand mean */
1036 for (s=0; s<pan->NT; s++) {
1037 wset->Z[j][s] += gxbar;
1038 }
1039 }
1040
1041 free(vlist);
1042
1043 return wset;
1044 }
1045
1046 /* Construct a quasi-demeaned version of the dataset so we can apply
1047 least squares to estimate the random effects model. This dataset
1048 is not necessarily of full length. If we're implementing the
1049 Hausman test using the regression approach this dataset will also
1050 include "straight" de-meaned counterparts of the time-varying
1051 variables.
1052 */
1053
1054 static DATASET *
random_effects_dataset(const DATASET * dset,const DATASET * gset,int * relist,int * hlist,panelmod_t * pan)1055 random_effects_dataset (const DATASET *dset,
1056 const DATASET *gset,
1057 int *relist,
1058 int *hlist,
1059 panelmod_t *pan)
1060 {
1061 DATASET *rset;
1062 double xbar, theta_i;
1063 int hreg = (hlist != NULL);
1064 int v1 = relist[0];
1065 int v2 = 0;
1066 int i, j, k, k2, t;
1067 int vj, s, bigt, u;
1068 int err = 0;
1069
1070 if (hreg) {
1071 /* Apparatus for regression version of Hausman test:
1072 note that we shouldn't include time dummies here
1073 since the de-meaned versions would be perfectly
1074 collinear with the quasi-demeaned ones.
1075 */
1076 int hmax = pan->vlist[0] - pan->ntdum;
1077
1078 for (i=1; i<hmax; i++) {
1079 if (pan->vlist[i+1] != 0) {
1080 hlist[0] = ++v2;
1081 hlist[i-1] = v1 + i - 2;
1082 }
1083 }
1084 }
1085
1086 if (pan->NT < dset->n) {
1087 err = allocate_data_finders(pan, dset->n);
1088 if (err) {
1089 return NULL;
1090 }
1091 }
1092
1093 #if PDEBUG
1094 fprintf(stderr, "random_effects_dataset: nvars=%d, nobs=%d\n",
1095 v1 + v2, pan->NT);
1096 #endif
1097
1098 rset = create_auxiliary_dataset(v1 + v2, pan->NT, 0);
1099 if (rset == NULL) {
1100 return NULL;
1101 }
1102
1103 /* build GLS regression list, and process varnames for regression
1104 version of Hausman test if wanted
1105 */
1106
1107 k = 0;
1108 k2 = v1 - 1;
1109 for (j=1; j<=v1; j++) {
1110 vj = pan->pooled->list[j];
1111 if (vj == 0) {
1112 relist[j] = 0;
1113 } else {
1114 relist[j] = ++k;
1115 strcpy(rset->varname[k], dset->varname[vj]);
1116 if (hreg && k2 < rset->v - 1 && j > 1 && var_is_varying(pan, vj)) {
1117 /* hausman-related term */
1118 k2++;
1119 strcpy(rset->varname[k2], "_");
1120 strncat(rset->varname[k2], dset->varname[vj],
1121 VNAMELEN - 2);
1122 }
1123 }
1124 }
1125
1126 /* Now create the transformed variables: original data minus theta
1127 times the appropriate group mean (and in addition, original
1128 data minus the group mean for time-varying vars, if doing the
1129 Hausman test by the regression method).
1130 */
1131
1132 s = u = 0;
1133 pan->theta_bar = 0.0;
1134
1135 for (i=0; i<pan->nunits; i++) {
1136 int Ti = pan->unit_obs[i];
1137 int got = 0;
1138
1139 if (Ti == 0) {
1140 continue;
1141 }
1142
1143 if (Ti != pan->Tmax) {
1144 theta_i = 1.0 - sqrt(pan->s2e / (Ti * pan->s2v + pan->s2e));
1145 } else {
1146 theta_i = pan->theta;
1147 }
1148
1149 pan->theta_bar += theta_i;
1150
1151 for (t=0; t<pan->T && got<Ti; t++) {
1152 bigt = panel_index(i, t);
1153 k = 0;
1154 k2 = v1 - 1;
1155 if (!panel_missing(pan, bigt)) {
1156 for (j=0; j<v1; j++) {
1157 vj = pan->pooled->list[j+1];
1158 if (vj == 0) {
1159 rset->Z[0][s] -= theta_i;
1160 } else {
1161 k++;
1162 xbar = (k < gset->v)? gset->Z[k][u] : 1.0 / pan->Tmax;
1163 rset->Z[k][s] = dset->Z[vj][bigt] - theta_i * xbar;
1164 if (hreg && k2 < rset->v - 1 && var_is_varying(pan, vj)) {
1165 /* hausman-related term */
1166 rset->Z[++k2][s] = dset->Z[vj][bigt] - xbar;
1167 }
1168 }
1169 }
1170 got++;
1171 if (pan->small2big != NULL) {
1172 pan->small2big[s] = bigt;
1173 pan->big2small[bigt] = s;
1174 }
1175 s++;
1176 }
1177 }
1178 u++;
1179 }
1180
1181 return rset;
1182 }
1183
1184 #define BETWEEN_DEBUG 0
1185
1186 /* Construct a mini-dataset containing the group means, in
1187 order to run the group-means or "between" regression.
1188 */
1189
group_means_dataset(panelmod_t * pan,const DATASET * dset)1190 static DATASET *group_means_dataset (panelmod_t *pan,
1191 const DATASET *dset)
1192 {
1193 DATASET *gset;
1194 double x;
1195 int gn = pan->effn;
1196 int gv = pan->pooled->list[0];
1197 int i, j, k;
1198 int s, t, bigt;
1199
1200 if (pan->balanced && pan->ntdum > 0) {
1201 gv -= pan->ntdum;
1202 }
1203
1204 #if PDEBUG
1205 fprintf(stderr, "group_means_dataset: nvars=%d, nobs=%d\n",
1206 gv, gn);
1207 #endif
1208
1209 gset = create_auxiliary_dataset(gv, gn, 0);
1210 if (gset == NULL) {
1211 return NULL;
1212 }
1213
1214 k = 1;
1215 for (j=1; j<=gv; j++) {
1216 int vj = pan->pooled->list[j];
1217
1218 if (vj == 0) {
1219 continue;
1220 }
1221
1222 #if BETWEEN_DEBUG
1223 strcpy(ginfo->varname[k], dset->varname[vj]);
1224 #else
1225 if (pan->opt & OPT_B) {
1226 /* will save the "between" model: so name the variables */
1227 strcpy(gset->varname[k], dset->varname[vj]);
1228 }
1229 #endif
1230
1231 s = 0;
1232 for (i=0; i<pan->nunits; i++) {
1233 int Ti = pan->unit_obs[i];
1234
1235 if (Ti == 0) {
1236 continue;
1237 }
1238
1239 x = 0.0;
1240 for (t=0; t<pan->T; t++) {
1241 bigt = panel_index(i, t);
1242 if (!panel_missing(pan, bigt)) {
1243 x += dset->Z[vj][bigt];
1244 }
1245 }
1246 gset->Z[k][s++] = x / Ti;
1247 }
1248 k++;
1249 }
1250
1251 #if BETWEEN_DEBUG
1252 if (1) {
1253 PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, NULL);
1254
1255 printdata(NULL, NULL, gset, OPT_O, prn);
1256 gretl_print_destroy(prn);
1257 }
1258 #endif
1259
1260 return gset;
1261 }
1262
1263 /* spruce up the between model and attach it to pan */
1264
save_between_model(MODEL * pmod,const int * blist,DATASET * gset,panelmod_t * pan)1265 static int save_between_model (MODEL *pmod, const int *blist,
1266 DATASET *gset, panelmod_t *pan)
1267 {
1268 gretl_matrix *uh, *yh;
1269 int i, err = 0;
1270
1271 pmod->ci = PANEL;
1272 pmod->opt |= OPT_B;
1273 pmod->dw = NADBL;
1274 gretl_model_add_panel_varnames(pmod, gset, NULL);
1275
1276 uh = gretl_column_vector_alloc(pmod->nobs);
1277 yh = gretl_column_vector_alloc(pmod->nobs);
1278
1279 if (uh == NULL || yh == NULL) {
1280 err = E_ALLOC;
1281 } else {
1282 for (i=0; i<pmod->nobs; i++) {
1283 uh->val[i] = pmod->uhat[i];
1284 yh->val[i] = pmod->yhat[i];
1285 }
1286 gretl_model_set_matrix_as_data(pmod, "uhat", uh);
1287 gretl_model_set_matrix_as_data(pmod, "yhat", yh);
1288 }
1289
1290 #if 0
1291 /* this is risky at present: too many functions want
1292 to read pmod->uhat directly */
1293 free(pmod->uhat); free(pmod->yhat);
1294 pmod->uhat = pmod->yhat = NULL;
1295 #endif
1296
1297 *pan->realmod = *pmod;
1298
1299 return err;
1300 }
1301
1302 /* Compute @ubPub as the Ti-weighted sum of the squared
1303 residuals from the Between model, as per Stata (but
1304 in disagreement with Baltagi and Chang, 1994).
1305 */
1306
alt_compute_ubPub(panelmod_t * pan,MODEL * bmod)1307 static int alt_compute_ubPub (panelmod_t *pan, MODEL *bmod)
1308 {
1309 int i, Ti, t = 0;
1310
1311 pan->ubPub = 0.0;
1312
1313 for (i=0; i<pan->nunits; i++) {
1314 Ti = pan->unit_obs[i];
1315 if (Ti > 0) {
1316 pan->ubPub += Ti * bmod->uhat[t] * bmod->uhat[t];
1317 t++;
1318 }
1319 }
1320
1321 return 0;
1322 }
1323
adjust_gset_data(panelmod_t * pan,DATASET * gset,int step)1324 static void adjust_gset_data (panelmod_t *pan, DATASET *gset,
1325 int step)
1326 {
1327 int i, j, Ti, t = 0;
1328 double adj;
1329
1330 for (i=0; i<pan->nunits; i++) {
1331 Ti = pan->unit_obs[i];
1332 if (Ti > 0) {
1333 adj = step == 0 ? sqrt(Ti) : 1.0/sqrt(Ti);
1334 for (j=0; j<gset->v; j++) {
1335 gset->Z[j][t] *= adj;
1336 }
1337 t++;
1338 }
1339 }
1340 }
1341
1342 /* Compute @ubPub as the sum of squared residuals from a
1343 Ti-weighted Between regression, as per Baltagi and Chang,
1344 1994, and also Baltagi, 2013.
1345 */
1346
compute_ubPub(panelmod_t * pan,MODEL * bmod,int * blist,DATASET * gset)1347 static int compute_ubPub (panelmod_t *pan, MODEL *bmod,
1348 int *blist, DATASET *gset)
1349 {
1350 int err;
1351
1352 /* multiply all data by sqrt(Ti) */
1353 adjust_gset_data(pan, gset, 0);
1354 clear_model(bmod);
1355 *bmod = lsq(blist, gset, OLS, OPT_A);
1356 err = bmod->errcode;
1357 if (!err) {
1358 pan->ubPub = bmod->ess;
1359 }
1360 /* put the original data back */
1361 adjust_gset_data(pan, gset, 1);
1362
1363 return err;
1364 }
1365
1366 /* calculate the group means or "between" regression and its error
1367 variance */
1368
between_variance(panelmod_t * pan,DATASET * gset)1369 static int between_variance (panelmod_t *pan, DATASET *gset)
1370 {
1371 gretlopt bopt;
1372 MODEL bmod;
1373 int *blist;
1374 int i, j, k;
1375 int err = 0;
1376
1377 blist = gretl_list_new(gset->v);
1378 if (blist == NULL) {
1379 return E_ALLOC;
1380 }
1381
1382 j = k = 1;
1383 for (i=1; i<=gset->v; i++) {
1384 if (pan->pooled->list[i] == 0) {
1385 blist[k++] = 0;
1386 } else {
1387 blist[k++] = j++;
1388 }
1389 }
1390
1391 bopt = (pan->opt & OPT_B)? OPT_NONE : OPT_A;
1392 bmod = lsq(blist, gset, OLS, bopt);
1393
1394 if (bmod.errcode == 0) {
1395 pan->s2b = bmod.ess / (bmod.nobs - bmod.ncoeff);
1396 } else {
1397 err = bmod.errcode;
1398 }
1399
1400 #if PDEBUG
1401 if (err) {
1402 fprintf(stderr, "error %d in between_variance\n", err);
1403 } else {
1404 fprintf(stderr, "pan->s2b = %g\n", pan->s2b);
1405 }
1406 #endif
1407
1408 if (!err && (pan->opt & OPT_B)) {
1409 err = save_between_model(&bmod, blist, gset, pan);
1410 } else {
1411 if (!err && !pan->balanced && (pan->opt & OPT_U) &&
1412 (pan->opt & OPT_X) && !(pan->opt & OPT_N)) {
1413 /* Prepare for the Baltagi-Chang take on Swamy-Arora
1414 in the case of an unbalanced panel
1415 */
1416 if (stata_sa) {
1417 err = alt_compute_ubPub(pan, &bmod);
1418 } else {
1419 err = compute_ubPub(pan, &bmod, blist, gset);
1420 }
1421 }
1422 clear_model(&bmod);
1423 }
1424
1425 free(blist);
1426
1427 return err;
1428 }
1429
1430 /* op is VCV_SUBTRACT for the random effects model. With the fixed
1431 effects model we don't have to worry about excluding vcv entries
1432 for time-invariant variables, since none of these are included.
1433 */
1434
1435 static int
vcv_skip(const MODEL * pmod,int i,const panelmod_t * pan,int op)1436 vcv_skip (const MODEL *pmod, int i, const panelmod_t *pan, int op)
1437 {
1438 int skip = 0;
1439
1440 if (pmod->list[i+2] == 0) {
1441 /* always skip the constant */
1442 skip = 1;
1443 } else if (op == VCV_SUBTRACT && !pan->varying[i]) {
1444 /* random effects, time-invariant var */
1445 skip = 1;
1446 }
1447
1448 return skip;
1449 }
1450
1451 /* Fill out the covariance matrix for use with the Hausman test:
1452 the entries that get transcribed here are only those for
1453 slopes with respect to time-varying variables.
1454 */
1455
1456 static void
vcv_slopes(panelmod_t * pan,const MODEL * pmod,int op)1457 vcv_slopes (panelmod_t *pan, const MODEL *pmod, int op)
1458 {
1459 int idx, i, j;
1460 int mj, mi = 0;
1461 int sj, si = 0;
1462 double x;
1463
1464 for (i=0; i<pan->dfH; i++) {
1465 if (vcv_skip(pmod, mi, pan, op)) {
1466 i--;
1467 mi++;
1468 continue;
1469 }
1470 mj = mi;
1471 sj = si;
1472 for (j=i; j<pan->dfH; j++) {
1473 if (vcv_skip(pmod, mj, pan, op)) {
1474 j--;
1475 mj++;
1476 continue;
1477 }
1478
1479 idx = ijton(mi, mj, pmod->ncoeff);
1480
1481 if (op == VCV_SUBTRACT) {
1482 x = gretl_matrix_get(pan->Sigma, si, sj);
1483 x -= pmod->vcv[idx];
1484 } else {
1485 x = pmod->vcv[idx];
1486 }
1487
1488 gretl_matrix_set(pan->Sigma, si, sj, x);
1489 if (si != sj) {
1490 gretl_matrix_set(pan->Sigma, sj, si, x);
1491 }
1492
1493 mj++;
1494 sj++;
1495 }
1496 mi++;
1497 si++;
1498 }
1499 }
1500
1501 /* calculate Hausman test statistic, matrix diff style */
1502
bXb(panelmod_t * pan)1503 static int bXb (panelmod_t *pan)
1504 {
1505 int err;
1506
1507 err = gretl_invert_symmetric_matrix(pan->Sigma);
1508
1509 if (!err) {
1510 pan->H = gretl_scalar_qform(pan->bdiff, pan->Sigma, &err);
1511 }
1512
1513 if (err) {
1514 pan->H = NADBL;
1515 }
1516
1517 return err;
1518 }
1519
fixed_effects_df_correction(MODEL * pmod,int k)1520 static void fixed_effects_df_correction (MODEL *pmod, int k)
1521 {
1522 double dfcorr = sqrt((double) pmod->dfd / (pmod->dfd - k));
1523 int i;
1524
1525 pmod->dfd -= k;
1526 pmod->dfn += k;
1527
1528 pmod->sigma *= dfcorr;
1529
1530 for (i=0; i<pmod->ncoeff; i++) {
1531 pmod->sderr[i] *= dfcorr;
1532 }
1533 }
1534
1535 /* used for printing fixed- or random-effects estimates
1536 in the context of the "panel diagnostics" routine
1537 */
1538
simple_print_panel_model(MODEL * pmod,DATASET * dset,const int * xlist,PRN * prn)1539 static void simple_print_panel_model (MODEL *pmod,
1540 DATASET *dset,
1541 const int *xlist,
1542 PRN *prn)
1543 {
1544 int *savelist = gretl_list_copy(pmod->list);
1545 int i;
1546
1547 for (i=0; i<pmod->ncoeff; i++) {
1548 pmod->list[i+2] = xlist[i+2];
1549 }
1550 printmodel(pmod, dset, OPT_P, prn);
1551 free(pmod->list);
1552 pmod->list = savelist;
1553 }
1554
print_re_results(panelmod_t * pan,MODEL * pmod,DATASET * dset,PRN * prn)1555 static void print_re_results (panelmod_t *pan,
1556 MODEL *pmod,
1557 DATASET *dset,
1558 PRN *prn)
1559 {
1560 pputs(prn, _("Variance estimators:"));
1561 pputc(prn, '\n');
1562 pprintf(prn, _(" between = %g"), pan->s2v);
1563 pputc(prn, '\n');
1564 pprintf(prn, _(" within = %g"), pan->s2e);
1565 pputc(prn, '\n');
1566
1567 if (pan->balanced || pan->s2v == 0) {
1568 pprintf(prn, _("theta used for quasi-demeaning = %g"), pan->theta);
1569 pputc(prn, '\n');
1570 } else {
1571 pputs(prn, _("Panel is unbalanced: theta varies across units"));
1572 pputc(prn, '\n');
1573 }
1574 pputc(prn, '\n');
1575
1576 pputs(prn, _("Random effects estimator\n"
1577 "allows for a unit-specific component to the error term\n"));
1578 pputc(prn, '\n');
1579
1580 simple_print_panel_model(pmod, dset, pan->pooled->list, prn);
1581 }
1582
print_fe_results(panelmod_t * pan,MODEL * pmod,DATASET * dset,PRN * prn)1583 static int print_fe_results (panelmod_t *pan,
1584 MODEL *pmod,
1585 DATASET *dset,
1586 PRN *prn)
1587 {
1588 int dfn = pan->effn - 1;
1589
1590 pputs(prn, _("Fixed effects estimator\n"
1591 "allows for differing intercepts by cross-sectional unit\n"));
1592 pputc(prn, '\n');
1593
1594 simple_print_panel_model(pmod, dset, pan->vlist, prn);
1595
1596 pprintf(prn, _("Residual variance: %g/(%d - %d) = %g\n"),
1597 pmod->ess, pmod->nobs, pan->vlist[0] - 1 + dfn, pan->s2e);
1598 pputc(prn, '\n');
1599
1600 if (!na(pan->Ffe)) {
1601 pprintf(prn, _("Joint significance of differing group means:\n"));
1602 pprintf(prn, " F(%d, %g) = %g %s %g\n", pan->Fdfn, pan->Fdfd, pan->Ffe,
1603 _("with p-value"), snedecor_cdf_comp(pan->Fdfn, pan->Fdfd, pan->Ffe));
1604 pputs(prn, _("(A low p-value counts against the null hypothesis that "
1605 "the pooled OLS model\nis adequate, in favor of the fixed "
1606 "effects alternative.)\n\n"));
1607 } else {
1608 pputc(prn, '\n');
1609 }
1610
1611 return 0;
1612 }
1613
time_dummies_wald_test(panelmod_t * pan,MODEL * pmod)1614 static int time_dummies_wald_test (panelmod_t *pan, MODEL *pmod)
1615 {
1616 gretl_matrix *vcv = NULL;
1617 gretl_vector *b = NULL;
1618 double x;
1619 int i, j, k, bigk;
1620 int di, dj;
1621 int err;
1622
1623 if (pan->ntdum == 0) {
1624 return 0;
1625 }
1626
1627 k = pan->ntdum;
1628 bigk = pmod->ncoeff;
1629
1630 if (pmod->vcv == NULL) {
1631 err = makevcv(pmod, pmod->sigma);
1632 if (err) {
1633 return err;
1634 }
1635 }
1636
1637 b = gretl_column_vector_alloc(k);
1638 vcv = gretl_matrix_alloc(k, k);
1639 if (b == NULL || vcv == NULL) {
1640 err = E_ALLOC;
1641 goto bailout;
1642 }
1643
1644 di = bigk - k;
1645 for (i=0; i<k; i++) {
1646 b->val[i] = pmod->coeff[di++];
1647 }
1648
1649 di = bigk - k;
1650 for (i=0; i<k; i++) {
1651 dj = bigk - k;
1652 for (j=0; j<=i; j++) {
1653 x = pmod->vcv[ijton(di, dj++, bigk)];
1654 gretl_matrix_set(vcv, i, j, x);
1655 gretl_matrix_set(vcv, j, i, x);
1656 }
1657 di++;
1658 }
1659
1660 err = gretl_invert_symmetric_matrix(vcv);
1661 if (err) {
1662 goto bailout;
1663 }
1664
1665 x = gretl_scalar_qform(b, vcv, &err);
1666 if (err) {
1667 fprintf(stderr, _("Failed to compute test statistic\n"));
1668 goto bailout;
1669 }
1670
1671 if (!err) {
1672 ModelTest *test = model_test_new(GRETL_TEST_PANEL_TIMEDUM);
1673
1674 if (test != NULL) {
1675 model_test_set_teststat(test, GRETL_STAT_WALD_CHISQ);
1676 model_test_set_dfn(test, k);
1677 model_test_set_value(test, x);
1678 model_test_set_pvalue(test, chisq_cdf_comp(k, x));
1679 maybe_add_test_to_model(pmod, test);
1680 }
1681 }
1682
1683 bailout:
1684
1685 gretl_matrix_free(vcv);
1686 gretl_vector_free(b);
1687
1688 return err;
1689 }
1690
save_fixed_effects_F(panelmod_t * pan,MODEL * wmod)1691 static void save_fixed_effects_F (panelmod_t *pan, MODEL *wmod)
1692 {
1693 int robust = (pan->opt & OPT_R);
1694 ModelTest *test;
1695
1696 if (na(pan->Ffe)) {
1697 return;
1698 }
1699
1700 test = model_test_new(robust ? GRETL_TEST_PANEL_WELCH :
1701 GRETL_TEST_PANEL_F);
1702
1703 if (test != NULL) {
1704 model_test_set_teststat(test, robust ? GRETL_STAT_WF : GRETL_STAT_F);
1705 model_test_set_dfn(test, pan->Fdfn);
1706 model_test_set_dfd(test, pan->Fdfd);
1707 model_test_set_value(test, pan->Ffe);
1708 model_test_set_pvalue(test, snedecor_cdf_comp(pan->Fdfn, pan->Fdfd, pan->Ffe));
1709 maybe_add_test_to_model(wmod, test);
1710 }
1711 }
1712
1713 /* "regular" = not robust: sums-of-squares based joint test on
1714 the fixed effects */
1715
regular_fixed_effects_F(panelmod_t * pan,MODEL * wmod)1716 static void regular_fixed_effects_F (panelmod_t *pan, MODEL *wmod)
1717 {
1718 int k_pooled = pan->pooled->list[0];
1719 int k_fe = pan->vlist[0];
1720
1721 pan->Fdfn = pan->effn - 1;
1722 pan->Fdfd = wmod->dfd;
1723
1724 if (k_pooled > k_fe) {
1725 pan->Fdfn -= k_pooled - k_fe;
1726 if (pan->Fdfn <= 0) {
1727 pan->Ffe = NADBL;
1728 return;
1729 }
1730 }
1731
1732 pan->Ffe = (pan->pooled->ess - wmod->ess) * pan->Fdfd /
1733 (wmod->ess * pan->Fdfn);
1734 if (na(pan->Ffe)) {
1735 pan->Ffe = NADBL;
1736 } else if (pan->Ffe < 0) {
1737 pan->Ffe = 0;
1738 }
1739 }
1740
fix_panelmod_list(MODEL * targ,panelmod_t * pan)1741 static int fix_panelmod_list (MODEL *targ, panelmod_t *pan)
1742 {
1743 int i;
1744
1745 #if PDEBUG
1746 printlist(targ->list, "targ->list");
1747 printlist(pan->pooled->list, "pan->pooled->list");
1748 printlist(pan->vlist, "pan->vlist");
1749 #endif
1750
1751 free(targ->list);
1752 targ->list = gretl_list_copy(pan->pooled->list);
1753 if (targ->list == NULL) {
1754 return E_ALLOC;
1755 }
1756
1757 if (pan->opt & OPT_F) {
1758 /* fixed effects: remove any non-varying variables */
1759 for (i=2; i<=targ->list[0]; i++) {
1760 if (!in_gretl_list(pan->vlist, targ->list[i])) {
1761 gretl_list_delete_at_pos(targ->list, i--);
1762 }
1763 }
1764 }
1765
1766 #if PDEBUG
1767 printlist(targ->list, "new targ->list");
1768 #endif
1769
1770 return 0;
1771 }
1772
1773 /* Compute F-test or chi-square test for the regular
1774 regressors, skipping @nskip trailing coefficients
1775 (which can be used to skip time dummies)
1776 */
1777
panel_overall_test(MODEL * pmod,panelmod_t * pan,int nskip,gretlopt opt)1778 static double panel_overall_test (MODEL *pmod, panelmod_t *pan,
1779 int nskip, gretlopt opt)
1780 {
1781 double test = NADBL;
1782 int *omitlist = NULL;
1783
1784 if (pmod->ncoeff == 1) {
1785 return test;
1786 }
1787
1788 if (nskip > 0) {
1789 int i, k = pmod->list[0] - nskip - 2;
1790
1791 omitlist = gretl_list_new(k);
1792 for (i=1; i<=k; i++) {
1793 omitlist[i] = pmod->list[i+2];
1794 }
1795 }
1796
1797 if (opt & OPT_X) {
1798 test = wald_omit_chisq(omitlist, pmod);
1799 } else {
1800 test = wald_omit_F(omitlist, pmod);
1801 }
1802
1803 free(omitlist);
1804
1805 return test;
1806 }
1807
1808 /* Correct various model statistics, in the case where we estimated
1809 the fixed effects or "within" model on an auxiliary dataset
1810 from which the group means were subtracted.
1811 */
1812
fix_within_stats(MODEL * fmod,panelmod_t * pan)1813 static int fix_within_stats (MODEL *fmod, panelmod_t *pan)
1814 {
1815 double wrsq, wfstt = NADBL;
1816 int wdfn, nc = fmod->ncoeff;
1817 int err = 0;
1818
1819 err = fix_panelmod_list(fmod, pan);
1820 if (err) {
1821 return err;
1822 }
1823
1824 fmod->ybar = pan->pooled->ybar;
1825 fmod->sdy = pan->pooled->sdy;
1826 fmod->tss = pan->pooled->tss;
1827 fmod->ifc = 1;
1828
1829 wrsq = fmod->rsq;
1830 if (wrsq < 0.0) {
1831 wrsq = 0.0;
1832 } else if (na(wrsq)) {
1833 wrsq = NADBL;
1834 }
1835
1836 /* Should we differentiate "regular" regressors from
1837 time dummies, if included? For now, yes.
1838 */
1839
1840 if (pan->ntdum > 0) {
1841 wdfn = fmod->ncoeff - 1 - pan->ntdum;
1842 if (wdfn > 0) {
1843 wfstt = panel_overall_test(fmod, pan, pan->ntdum,
1844 OPT_NONE);
1845 }
1846 } else {
1847 wdfn = fmod->ncoeff - 1;
1848 if (wdfn > 0) {
1849 if (pan->opt & OPT_R) {
1850 wfstt = panel_overall_test(fmod, pan, 0, OPT_NONE);
1851 } else {
1852 wfstt = (wrsq / (1.0 - wrsq)) * ((double) fmod->dfd / wdfn);
1853 }
1854 }
1855 }
1856
1857 if (!na(wfstt) && wfstt >= 0.0) {
1858 ModelTest *test = model_test_new(GRETL_TEST_WITHIN_F);
1859 int wdfd = fmod->dfd;
1860
1861 if (pan->opt & OPT_R) {
1862 fmod->dfd = wdfd = pan->effn - 1;
1863 }
1864
1865 if (test != NULL) {
1866 model_test_set_teststat(test, GRETL_STAT_F);
1867 model_test_set_dfn(test, wdfn);
1868 model_test_set_dfd(test, wdfd);
1869 model_test_set_value(test, wfstt);
1870 model_test_set_pvalue(test, snedecor_cdf_comp(wdfn, wdfd, wfstt));
1871 maybe_add_test_to_model(fmod, test);
1872 }
1873 }
1874
1875 /* note: this member is being borrowed for the "Within R-squared" */
1876 fmod->adjrsq = wrsq;
1877
1878 /* LSDV-based statistics (FIXME: can we do this in
1879 the --robust case?)
1880 */
1881 if (pan->opt & OPT_R) {
1882 fmod->rsq = 1.0 - (fmod->ess / fmod->tss);
1883 fmod->fstt = NADBL;
1884 } else {
1885 fmod->rsq = 1.0 - (fmod->ess / fmod->tss);
1886 if (fmod->rsq < 0.0) {
1887 fmod->rsq = 0.0;
1888 } else {
1889 fmod->fstt = (fmod->rsq / (1.0 - fmod->rsq)) *
1890 ((double) fmod->dfd / fmod->dfn);
1891 }
1892 }
1893
1894 fmod->ncoeff = fmod->dfn + 1; /* number of params estimated */
1895 ls_criteria(fmod);
1896 fmod->ncoeff = nc;
1897
1898 return err;
1899 }
1900
1901 /* Fixed-effects model: add the per-unit intercept estimates
1902 to the model in case the user wants to retrieve them.
1903 Random-effects model: add estimate of the individual effects.
1904
1905 By this point the model -- even if it been estimated on a short
1906 dataset -- should have a full-length residual series.
1907 */
1908
panel_model_add_ahat(MODEL * pmod,const DATASET * dset,panelmod_t * pan)1909 static int panel_model_add_ahat (MODEL *pmod, const DATASET *dset,
1910 panelmod_t *pan)
1911 {
1912 double *ahat = NULL;
1913 const double *x;
1914 int i, j, t, bigt;
1915 int n, err = 0;
1916
1917 n = pmod->full_n;
1918
1919 ahat = malloc(n * sizeof *ahat);
1920 if (ahat == NULL) {
1921 return E_ALLOC;
1922 }
1923
1924 for (t=0; t<n; t++) {
1925 ahat[t] = NADBL;
1926 }
1927
1928 if (pan->opt & OPT_F) {
1929 /* fixed effects */
1930 for (i=0; i<pan->nunits; i++) {
1931 if (pan->unit_obs[i] > 0) {
1932 double a = 0.0;
1933
1934 /* a = y - Xb, where the 'b' is based on de-meaned data */
1935
1936 for (t=0; t<pan->T; t++) {
1937 bigt = panel_index(i, t);
1938 if (!na(pmod->uhat[bigt])) {
1939 a += dset->Z[pmod->list[1]][bigt];
1940 for (j=1; j<pmod->ncoeff; j++) {
1941 x = dset->Z[pmod->list[j+2]];
1942 a -= pmod->coeff[j] * x[bigt];
1943 }
1944 }
1945 }
1946
1947 a /= pan->unit_obs[i];
1948
1949 for (t=0; t<pan->T; t++) {
1950 bigt = panel_index(i, t);
1951 if (!na(pmod->uhat[bigt])) {
1952 ahat[bigt] = a;
1953 }
1954 }
1955 }
1956 }
1957 } else {
1958 /* random effects */
1959 double uhbar, frac = 0;
1960
1961 if (pan->balanced) {
1962 frac = 1.0 - pan->theta;
1963 frac = 1.0 - frac * frac;
1964 }
1965
1966 for (i=0; i<pan->nunits; i++) {
1967 if (pan->unit_obs[i] > 0) {
1968 /* get mean residual */
1969 uhbar = 0.0;
1970 for (t=0; t<pan->T; t++) {
1971 bigt = panel_index(i, t);
1972 if (!na(pmod->uhat[bigt])) {
1973 uhbar += pmod->uhat[bigt];
1974 }
1975 }
1976 uhbar /= pan->unit_obs[i];
1977 /* ahat = frac * uhbar */
1978 if (!pan->balanced) {
1979 frac = pan->s2v / (pan->s2v + pan->s2e / pan->unit_obs[i]);
1980 }
1981 for (t=0; t<pan->T; t++) {
1982 bigt = panel_index(i, t);
1983 if (!na(pmod->uhat[bigt])) {
1984 ahat[bigt] = frac * uhbar;
1985 }
1986 }
1987 }
1988 }
1989 }
1990
1991 err = gretl_model_set_data(pmod, "ahat", ahat,
1992 GRETL_TYPE_DOUBLE_ARRAY,
1993 n * sizeof *ahat);
1994
1995 return err;
1996 }
1997
1998 /* If we're estimating the fixed effects model with the --robust
1999 flag, we should do a robust version of the joint test on
2000 the fixed effects. Here we use the algorithm of B. L. Welch,
2001 "On the Comparison of Several Mean Values: An Alternative
2002 Approach" (Biometrika 38, 1951, pp. 330-336). The variable
2003 we're testing for difference of means (by individual) is the
2004 residual from pooled OLS.
2005 */
2006
robust_fixed_effects_F(panelmod_t * pan)2007 static int robust_fixed_effects_F (panelmod_t *pan)
2008 {
2009 MODEL *pmod = pan->pooled;
2010 double *u, *w, *h, *xbar;
2011 double muhat = 0.0;
2012 double x, s2, W = 0.0;
2013 double A, B, sum_h;
2014 int k = pan->effn;
2015 int i, j, t, s;
2016 int Ti, bigt;
2017 int err = 0;
2018
2019 u = malloc(pan->Tmax * sizeof *u);
2020 w = malloc(3 * k * sizeof *w);
2021
2022 if (u == NULL || w == NULL) {
2023 free(u);
2024 free(w);
2025 return E_ALLOC;
2026 }
2027
2028 h = w + k;
2029 xbar = h + k;
2030
2031 j = 0;
2032 for (i=0; i<pan->nunits; i++) {
2033 Ti = pan->unit_obs[i];
2034 if (Ti > 1) {
2035 s = 0;
2036 for (t=0; t<pan->T; t++) {
2037 bigt = panel_index(i, t);
2038 if (!panel_missing(pan, bigt)) {
2039 u[s++] = pmod->uhat[bigt];
2040 }
2041 }
2042 xbar[j] = gretl_mean(0, s-1, u);
2043 s2 = gretl_variance(0, s-1, u);
2044 w[j] = Ti / s2;
2045 W += w[j];
2046 muhat += w[j] * xbar[j];
2047 j++;
2048 }
2049 }
2050
2051 muhat /= W;
2052 A = sum_h = 0.0;
2053
2054 j = 0;
2055 for (i=0; i<pan->nunits; i++) {
2056 Ti = pan->unit_obs[i];
2057 if (Ti > 1) {
2058 x = 1 - w[j]/W;
2059 h[j] = (x * x) / (Ti - 1);
2060 sum_h += h[j];
2061 x = xbar[j] - muhat;
2062 A += w[j] * x * x;
2063 j++;
2064 }
2065 }
2066
2067 A /= (k - 1);
2068 B = 1.0 + (2.0*(k-2.0)/(k * k - 1.0)) * sum_h;
2069
2070 pan->Ffe = A / B;
2071 pan->Fdfn = k - 1;
2072 pan->Fdfd = (k * k - 1.0)/(3 * sum_h);
2073
2074 free(u);
2075 free(w);
2076
2077 return err;
2078 }
2079
real_FE_list(panelmod_t * pan)2080 static int *real_FE_list (panelmod_t *pan)
2081 {
2082 int *list = gretl_list_copy(pan->pooled->list);
2083 int i;
2084
2085 if (list != NULL) {
2086 /* purge any non-time-varying variables */
2087 for (i=2; i<=list[0]; i++) {
2088 if (!in_gretl_list(pan->vlist, list[i])) {
2089 gretl_list_delete_at_pos(list, i--);
2090 }
2091 }
2092 }
2093
2094 return list;
2095 }
2096
2097 /* For testing purposes: retrieve user-specified values for
2098 s2v and s2e. These would typically be known good values
2099 in the context of a simulation.
2100 */
2101
read_true_variances(panelmod_t * pan)2102 static int read_true_variances (panelmod_t *pan)
2103 {
2104 gretl_matrix *m;
2105 int err = 0;
2106
2107 m = gretl_matrix_read_from_file(glsmat, 0, &err);
2108 if (m == NULL) {
2109 fprintf(stderr, "read_true_variances: no matrix!\n");
2110 if (!err) {
2111 err = E_DATA;
2112 }
2113 } else {
2114 pan->s2v = m->val[0];
2115 pan->s2e = m->val[1];
2116 }
2117
2118 return err;
2119 }
2120
2121 /* computation of $\hat{\sigma}^2_v$ a la Nerlove, if wanted */
2122
nerlove_s2v(MODEL * pmod,const DATASET * dset,panelmod_t * pan)2123 static int nerlove_s2v (MODEL *pmod, const DATASET *dset,
2124 panelmod_t *pan)
2125 {
2126 double a, amean, *ahat;
2127 double wmean, *wi = NULL;
2128 const double *x;
2129 int i, j, t, k, bigt;
2130 int *list = NULL;
2131 int err = 0;
2132
2133 ahat = malloc(pan->effn * sizeof *ahat);
2134 if (ahat == NULL) {
2135 return E_ALLOC;
2136 }
2137
2138 if (pan->opt & OPT_X) {
2139 /* --unbalanced */
2140 wi = malloc(pan->effn * sizeof *wi);
2141 if (wi == NULL) {
2142 free(ahat);
2143 return E_ALLOC;
2144 }
2145 }
2146
2147 list = real_FE_list(pan);
2148 if (list == NULL) {
2149 free(ahat);
2150 free(wi);
2151 return E_ALLOC;
2152 }
2153
2154 wmean = amean = 0.0;
2155 k = 0;
2156
2157 for (i=0; i<pan->nunits; i++) {
2158 int Ti = pan->unit_obs[i];
2159
2160 if (Ti > 0) {
2161 a = 0.0;
2162 for (t=0; t<pan->T; t++) {
2163 bigt = panel_index(i, t);
2164 if (!na(pan->pooled->uhat[bigt])) {
2165 a += dset->Z[list[1]][bigt];
2166 for (j=1; j<pmod->ncoeff; j++) {
2167 x = dset->Z[list[j+2]];
2168 a -= pmod->coeff[j] * x[bigt];
2169 }
2170 }
2171 }
2172 a /= Ti;
2173 ahat[k] = a;
2174 amean += a;
2175 if (wi != NULL) {
2176 wi[k] = Ti / (double) pmod->nobs;
2177 wmean += wi[k] * a;
2178 }
2179 k++;
2180 }
2181 }
2182
2183 pan->s2v = 0.0;
2184
2185 if (wi != NULL) {
2186 for (i=0; i<pan->effn; i++) {
2187 pan->s2v += wi[i] * (ahat[i] - wmean) * (ahat[i] - wmean);
2188 }
2189 pan->s2v /= (pan->effn - 1.0) / (double) pan->effn;
2190 free(wi);
2191 } else {
2192 amean /= pan->effn;
2193 for (i=0; i<pan->effn; i++) {
2194 pan->s2v += (ahat[i] - amean) * (ahat[i] - amean);
2195 }
2196 pan->s2v /= pan->effn - 1;
2197 }
2198
2199 free(ahat);
2200 free(list);
2201
2202 return err;
2203 }
2204
2205 /* robust RE: we need an extra residuals array */
2206
re_hatvars_prep(panelmod_t * pan)2207 static int re_hatvars_prep (panelmod_t *pan)
2208 {
2209 int t, n = pan->pooled->full_n;
2210
2211 pan->re_uhat = malloc(n * sizeof *pan->re_uhat);
2212
2213 if (pan->re_uhat == NULL) {
2214 return E_ALLOC;
2215 } else {
2216 for (t=0; t<n; t++) {
2217 pan->re_uhat[t] = NADBL;
2218 }
2219 return 0;
2220 }
2221 }
2222
2223 /* When calculating DW using the fixed-effects residuals,
2224 in the context of estimation of the random-effects
2225 specification, we need to expand up the residual series
2226 temporarily.
2227 */
2228
expand_fe_uhat(MODEL * femod,panelmod_t * pan)2229 static int expand_fe_uhat (MODEL *femod, panelmod_t *pan)
2230 {
2231 double *uhat = NULL;
2232 int NT = pan->pooled->full_n;
2233 int i, s, t;
2234
2235 uhat = malloc(NT * sizeof *uhat);
2236 if (uhat == NULL) {
2237 return E_ALLOC;
2238 }
2239
2240 for (t=0; t<NT; t++) {
2241 uhat[t] = NADBL;
2242 }
2243
2244 s = 0;
2245 for (i=0; i<pan->nunits; i++) {
2246 int ti, Ti = pan->unit_obs[i];
2247
2248 if (Ti == 0) {
2249 continue;
2250 }
2251 for (ti=0; ti<Ti; ti++) {
2252 t = big_index(pan, s);
2253 uhat[t] = femod->uhat[s];
2254 if (s == 0) {
2255 femod->t1 = t;
2256 } else if (s == femod->nobs - 1) {
2257 femod->t2 = t;
2258 }
2259 s++;
2260 }
2261 }
2262
2263 /* replace the uhat array on @femod */
2264 free(femod->uhat);
2265 femod->uhat = uhat;
2266
2267 return 0;
2268 }
2269
2270 /* Fix uhat and yhat in two cases.
2271
2272 (a) When we estimated fixed effects using a de-meaned dataset we
2273 need to ensure that the uhat and yhat values get written to the
2274 right observation slots in relation to the full dataset, and also
2275 that the yhat values get corrected, putting the means back in.
2276
2277 (b) When estimating the random effects model we need to compute
2278 residuals based on the untransformed data (and again, place them
2279 correctly in relation to the full dataset). However, if we're
2280 going to produce robust standard errors we also need to preserve
2281 the GLS residuals.
2282
2283 The placement issue arises because the special datasets used in
2284 these cases are not necessarily of full length, since they are
2285 purged of missing observations.
2286 */
2287
2288 static int
fix_panel_hatvars(MODEL * pmod,panelmod_t * pan,const double ** Z)2289 fix_panel_hatvars (MODEL *pmod, panelmod_t *pan, const double **Z)
2290 {
2291 const double *y = NULL;
2292 double *yhat = pan->pooled->yhat;
2293 double *uhat = NULL;
2294 int n = pan->pooled->full_n;
2295 int re_n = 0;
2296 double yht, SSR = 0.0;
2297 int i, j, s, t;
2298 int err = 0;
2299
2300 if (yhat == NULL) {
2301 fprintf(stderr, "fix_panel_hatvars: pan->pooled->yhat is NULL\n");
2302 return E_DATA;
2303 }
2304
2305 y = Z[pan->pooled->list[1]];
2306
2307 uhat = malloc(n * sizeof *uhat);
2308 if (uhat == NULL) {
2309 return E_ALLOC;
2310 }
2311
2312 if (pan->opt & OPT_U) {
2313 /* random effects */
2314 if (pan->opt & OPT_R) {
2315 err = re_hatvars_prep(pan);
2316 }
2317 if (err) {
2318 return err;
2319 }
2320 }
2321
2322 for (t=0; t<n; t++) {
2323 uhat[t] = NADBL;
2324 }
2325
2326 s = 0;
2327 for (i=0; i<pan->nunits; i++) {
2328 int ti, Ti = pan->unit_obs[i];
2329
2330 if (Ti == 0) {
2331 continue;
2332 }
2333
2334 for (ti=0; ti<Ti; ti++) {
2335 t = big_index(pan, s);
2336 if (pan->opt & OPT_U) {
2337 /* random effects */
2338 yht = 0.0;
2339 for (j=0; j<pmod->ncoeff; j++) {
2340 yht += pmod->coeff[j] * Z[pan->pooled->list[j+2]][t];
2341 }
2342 yhat[t] = yht;
2343 re_n++;
2344 uhat[t] = y[t] - yht;
2345 SSR += uhat[t] * uhat[t];
2346 if (pan->re_uhat != NULL) {
2347 /* store both the "fixed" and the GLS residuals */
2348 pan->re_uhat[t] = uhat[t];
2349 uhat[t] = pmod->uhat[s];
2350 }
2351 } else {
2352 /* fixed effects */
2353 uhat[t] = pmod->uhat[s];
2354 yhat[t] = y[t] - uhat[t];
2355 }
2356 if (s == 0) {
2357 pmod->t1 = t;
2358 } else if (s == pmod->nobs - 1) {
2359 pmod->t2 = t;
2360 }
2361 s++;
2362 }
2363 }
2364
2365 if (pan->opt & OPT_U) {
2366 double r;
2367
2368 /* Defer rewriting of pmod->ess and pmod->sigma to avoid
2369 screwing up the covariance matrix estimator and Hausman
2370 test.
2371 */
2372 gretl_model_set_double(pmod, "fixed_SSR", SSR);
2373 r = gretl_corr(pmod->t1, pmod->t2, y, yhat, NULL);
2374 if (!na(r)) {
2375 gretl_model_set_double(pmod, "corr-rsq", r * r);
2376 }
2377 }
2378
2379 pmod->full_n = n;
2380
2381 /* replace the uhat and yhat arrays on @pmod */
2382 free(pmod->uhat);
2383 pmod->uhat = uhat;
2384 free(pmod->yhat);
2385 pmod->yhat = yhat;
2386
2387 /* NULLify stolen pointer */
2388 pan->pooled->yhat = NULL;
2389
2390 return err;
2391 }
2392
2393 static int
hausman_move_uhat(MODEL * pmod,panelmod_t * pan)2394 hausman_move_uhat (MODEL *pmod, panelmod_t *pan)
2395 {
2396 int n = pan->pooled->full_n;
2397 double *uhat;
2398 int i, s, t, ti;
2399
2400 uhat = malloc(n * sizeof *uhat);
2401 if (uhat == NULL) {
2402 return E_ALLOC;
2403 }
2404
2405 for (t=0; t<n; t++) {
2406 uhat[t] = NADBL;
2407 }
2408
2409 s = 0;
2410 for (i=0; i<pan->nunits; i++) {
2411 int Ti = pan->unit_obs[i];
2412
2413 if (Ti > 0) {
2414 for (ti=0; ti<Ti; ti++) {
2415 t = big_index(pan, s);
2416 uhat[t] = pmod->uhat[s];
2417 s++;
2418 }
2419 }
2420 }
2421
2422 free(pmod->uhat);
2423 pmod->uhat = uhat;
2424
2425 return 0;
2426 }
2427
2428 #if PDEBUG > 1
2429
verbose_femod_print(MODEL * femod,DATASET * wset,PRN * prn)2430 static void verbose_femod_print (MODEL *femod, DATASET *wset,
2431 PRN *prn)
2432 {
2433 pprintf(prn, "*** initial FE model (on within data)\n");
2434 printmodel(femod, wset, OPT_O, prn);
2435
2436 # if PDEBUG > 2
2437 int i, j;
2438
2439 fprintf(stderr, "femod: data series length = %d\n", wset->n);
2440 for (i=0; i<wset->n; i++) {
2441 fprintf(stderr, "femod.uhat[%d] = %g, ", i, femod->uhat[i]);
2442 fprintf(stderr, "data: ");
2443 for (j=0; j<wset->v; j++) {
2444 fprintf(stderr, "%g ", wset->Z[j][i]);
2445 }
2446 fputc('\n', stderr);
2447 }
2448 # endif
2449 }
2450
2451 #endif
2452
2453 /* Estimate the fixed-effects model using a parallel dataset
2454 with the group means subtracted from all variables.
2455 */
2456
2457 static MODEL
fixed_effects_model(panelmod_t * pan,DATASET * dset,PRN * prn)2458 fixed_effects_model (panelmod_t *pan, DATASET *dset, PRN *prn)
2459 {
2460 MODEL femod;
2461 gretlopt lsqopt = OPT_A | OPT_X;
2462 DATASET *wset = NULL;
2463 int *felist = NULL;
2464 int save_qr = 0;
2465 int i;
2466
2467 #if PDEBUG
2468 fprintf(stderr, "fixed_effects: using de-meaned data\n");
2469 #endif
2470
2471 gretl_model_init(&femod, dset);
2472
2473 felist = gretl_list_new(pan->vlist[0]);
2474 if (felist == NULL) {
2475 femod.errcode = E_ALLOC;
2476 return femod;
2477 }
2478
2479 wset = within_groups_dataset(dset, pan);
2480 if (wset == NULL) {
2481 free(felist);
2482 femod.errcode = E_ALLOC;
2483 return femod;
2484 }
2485
2486 felist[1] = 1;
2487 felist[2] = 0;
2488 for (i=3; i<=felist[0]; i++) {
2489 felist[i] = i - 1;
2490 }
2491
2492 if (pan->opt & OPT_F) {
2493 /* suppress auto-removal of collinear terms */
2494 lsqopt |= OPT_Z;
2495 }
2496
2497 save_qr = libset_get_bool(USE_QR);
2498 libset_set_bool(USE_QR, 1);
2499
2500 femod = lsq(felist, wset, OLS, lsqopt);
2501
2502 libset_set_bool(USE_QR, save_qr);
2503
2504 if (femod.errcode) {
2505 fprintf(stderr, "femod.errcode = %d\n", femod.errcode);
2506 } else if ((pan->opt & OPT_F) && femod.list[0] < felist[0]) {
2507 femod.errcode = E_SINGULAR;
2508 } else {
2509 /* we estimated a bunch of group means, and have to
2510 subtract degrees of freedom */
2511 fixed_effects_df_correction(&femod, pan->effn - 1);
2512 #if PDEBUG > 1
2513 verbose_femod_print(&femod, wset, prn);
2514 #endif
2515 if (pan->opt & OPT_F) {
2516 /* estimating the FE model in its own right */
2517 if (pan->opt & OPT_R) {
2518 /* we have to do this before the pooled residual
2519 array is "stolen" for the fixed-effects model
2520 */
2521 robust_fixed_effects_F(pan);
2522 }
2523 fix_panel_hatvars(&femod, pan, (const double **) dset->Z);
2524 if (pan->opt & OPT_R) {
2525 panel_robust_vcv(&femod, pan, wset);
2526 } else {
2527 femod_regular_vcv(&femod);
2528 }
2529 } else if (pan->opt & OPT_N) {
2530 if (IGLS) {
2531 read_true_variances(pan);
2532 } else {
2533 femod.errcode = nerlove_s2v(&femod, dset, pan);
2534 }
2535 }
2536 }
2537
2538 destroy_dataset(wset);
2539 free(felist);
2540
2541 return femod;
2542 }
2543
2544 /* Construct a gretl list containing the index numbers of the
2545 cross-sectional units included in the fixed-effects
2546 regression. This is for the purpose of naming the per-unit
2547 intercepts.
2548 */
2549
fe_units_list(const panelmod_t * pan)2550 static int *fe_units_list (const panelmod_t *pan)
2551 {
2552 int *ulist = NULL;
2553 int i, j, n = 0;
2554
2555 for (i=0; i<pan->nunits; i++) {
2556 if (pan->unit_obs[i] > 0) {
2557 n++;
2558 }
2559 }
2560
2561 ulist = gretl_list_new(n);
2562
2563 if (ulist != NULL) {
2564 j = 1;
2565 for (i=0; i<pan->nunits; i++) {
2566 if (pan->unit_obs[i] > 0) {
2567 ulist[j++] = i + 1;
2568 }
2569 }
2570 }
2571
2572 return ulist;
2573 }
2574
2575 /* Compose a list referencing all variables that were dropped from the
2576 final panel model relative to the incoming regression
2577 specification. This may include some variables that were dropped
2578 at the stage of running the baseline pooled model (presumably
2579 because of perfect collinearity).
2580
2581 In the case of fixed effects it may include additional variables
2582 dropped due to the fact that they are time-invariant.
2583 */
2584
compose_panel_droplist(MODEL * pmod,panelmod_t * pan)2585 static int compose_panel_droplist (MODEL *pmod, panelmod_t *pan)
2586 {
2587 int fixed_effects = (pmod->opt & OPT_F);
2588 const int *pooldrop;
2589 int *dlist;
2590 int i, ndrop = 0;
2591
2592 /* regressors dropped at the stage of estimating the
2593 initial pooled model */
2594 pooldrop = gretl_model_get_list(pan->pooled, "droplist");
2595 if (pooldrop != NULL) {
2596 ndrop += pooldrop[0];
2597 }
2598
2599 if (fixed_effects) {
2600 /* regressors dropped because time-invariant */
2601 ndrop += pan->pooled->list[0] - pan->vlist[0];
2602 }
2603
2604 if (ndrop == 0) {
2605 /* nothing to be done */
2606 return 0;
2607 }
2608
2609 dlist = gretl_list_new(ndrop);
2610 if (dlist == NULL) {
2611 return E_ALLOC;
2612 }
2613
2614 i = 1;
2615
2616 if (pooldrop != NULL) {
2617 for (i=1; i<=pooldrop[0]; i++) {
2618 dlist[i] = pooldrop[i];
2619 }
2620 }
2621
2622 if (fixed_effects) {
2623 if (pan->vlist[0] < pan->pooled->list[0]) {
2624 int j, vj;
2625
2626 for (j=2; j<=pan->pooled->list[0]; j++) {
2627 vj = pan->pooled->list[j];
2628 if (!in_gretl_list(pan->vlist, vj)) {
2629 dlist[i++] = vj;
2630 }
2631 }
2632 }
2633 }
2634
2635 return gretl_model_set_list_as_data(pmod, "droplist", dlist);
2636 }
2637
fix_gls_stats(MODEL * pmod,panelmod_t * pan)2638 static void fix_gls_stats (MODEL *pmod, panelmod_t *pan)
2639 {
2640 double chisq = NADBL;
2641 int nc, df = 0;
2642
2643 fix_panelmod_list(pmod, pan);
2644
2645 pmod->ybar = pan->pooled->ybar;
2646 pmod->sdy = pan->pooled->sdy;
2647 pmod->tss = pan->pooled->tss;
2648
2649 pmod->rsq = NADBL;
2650 pmod->adjrsq = NADBL;
2651 pmod->fstt = NADBL;
2652 pmod->rho = pan->rho;
2653 pmod->dw = pan->dw;
2654
2655 /* add joint chi-square test on regressors */
2656
2657 if (pan->ntdum > 0) {
2658 df = pmod->ncoeff - 1 - pan->ntdum;
2659 if (df > 0) {
2660 chisq = panel_overall_test(pmod, pan, pan->ntdum,
2661 OPT_X);
2662 }
2663 } else {
2664 df = pmod->ncoeff - 1;
2665 if (df > 0) {
2666 chisq = panel_overall_test(pmod, pan, 0, OPT_X);
2667 }
2668 }
2669
2670 /* deferred rewriting of ess and sigma, etc. */
2671 pmod->ess = gretl_model_get_double(pmod, "fixed_SSR");
2672 pmod->sigma = sqrt(pmod->ess / (pmod->nobs - (pmod->ncoeff - 1)));
2673 gretl_model_destroy_data_item(pmod, "fixed_SSR");
2674 nc = pmod->ncoeff;
2675 pmod->ncoeff = pmod->dfn + 1;
2676 ls_criteria(pmod);
2677 pmod->ncoeff = nc;
2678
2679 if (!na(chisq) && chisq >= 0.0) {
2680 ModelTest *test = model_test_new(GRETL_TEST_RE_WALD);
2681
2682 if (test != NULL) {
2683 model_test_set_teststat(test, GRETL_STAT_WALD_CHISQ);
2684 model_test_set_dfn(test, df);
2685 model_test_set_value(test, chisq);
2686 model_test_set_pvalue(test, chisq_cdf_comp(df, chisq));
2687 maybe_add_test_to_model(pmod, test);
2688 }
2689 }
2690 }
2691
add_panel_obs_info(MODEL * pmod,panelmod_t * pan)2692 static void add_panel_obs_info (MODEL *pmod, panelmod_t *pan)
2693 {
2694 gretl_model_set_int(pmod, "n_included_units", pan->effn);
2695 gretl_model_set_int(pmod, "panel_T", pan->T);
2696 gretl_model_set_int(pmod, "Tmin", pan->Tmin);
2697 gretl_model_set_int(pmod, "Tmax", pan->Tmax);
2698 if (pan->Tmax > pan->Tmin && pan->Tbar == 0) {
2699 calculate_Tbar(pan);
2700 }
2701 if (pan->Tbar > 0) {
2702 gretl_model_set_double(pmod, "Tbar", pan->Tbar);
2703 }
2704 }
2705
replace_re_residuals(MODEL * pmod,panelmod_t * pan)2706 static void replace_re_residuals (MODEL *pmod, panelmod_t *pan)
2707 {
2708 int t;
2709
2710 /* replace the GLS residuals with the "fixed up" ones */
2711
2712 for (t=0; t<pmod->full_n; t++) {
2713 pmod->uhat[t] = pan->re_uhat[t];
2714 }
2715 }
2716
2717 /* We use this to "finalize" models estimated via fixed effects
2718 and random effects */
2719
save_panel_model(MODEL * pmod,panelmod_t * pan,const double ** Z,const DATASET * dset)2720 static int save_panel_model (MODEL *pmod, panelmod_t *pan,
2721 const double **Z,
2722 const DATASET *dset)
2723 {
2724 int err = 0;
2725
2726 pmod->ci = PANEL;
2727
2728 if (pan->opt & OPT_F) {
2729 /* fixed effects */
2730 int *ulist;
2731
2732 pmod->opt |= OPT_F;
2733 err = fix_within_stats(pmod, pan);
2734 if (err) {
2735 return err;
2736 }
2737 ulist = fe_units_list(pan);
2738 gretl_model_add_panel_varnames(pmod, dset, ulist);
2739 free(ulist);
2740 panel_model_add_ahat(pmod, dset, pan);
2741 save_fixed_effects_F(pan, pmod);
2742 } else {
2743 /* random effects */
2744 pmod->opt |= OPT_U;
2745 gretl_model_set_double(pmod, "s2v", pan->s2v);
2746 gretl_model_set_double(pmod, "s2e", pan->s2e);
2747 if (pan->balanced) {
2748 gretl_model_set_double(pmod, "theta", pan->theta);
2749 } else if (!na(pan->theta_bar)) {
2750 pan->theta_bar /= pan->effn;
2751 gretl_model_set_double(pmod, "theta_bar", pan->theta_bar);
2752 }
2753 gretl_model_add_panel_varnames(pmod, dset, NULL);
2754 if (pan->re_uhat != NULL) {
2755 replace_re_residuals(pmod, pan);
2756 }
2757 fix_gls_stats(pmod, pan);
2758 panel_model_add_ahat(pmod, dset, pan);
2759 if (pan->opt & OPT_N) {
2760 /* record use of Nerlove transformation */
2761 pmod->opt |= OPT_N;
2762 }
2763 if ((pan->opt & OPT_X) && !IGLS) {
2764 /* record use of special unbalanced ANOVA */
2765 pmod->opt |= OPT_X;
2766 if (!(pan->opt & OPT_N)) {
2767 const char *meth = stata_sa ? "stata" : "bc";
2768
2769 gretl_model_set_string_as_data(pmod, "anova_method",
2770 gretl_strdup(meth));
2771 }
2772 }
2773 }
2774
2775 /* compose list of dropped variables, if any */
2776 compose_panel_droplist(pmod, pan);
2777
2778 if (!(pan->opt & OPT_A)) {
2779 set_model_id(pmod, pan->opt);
2780 }
2781
2782 if (pan->opt & OPT_F) {
2783 panel_dwstat(pmod, pan);
2784 }
2785
2786 if (pan->opt & OPT_R) {
2787 pmod->opt |= OPT_R;
2788 }
2789
2790 time_dummies_wald_test(pan, pmod);
2791 add_panel_obs_info(pmod, pan);
2792 *pan->realmod = *pmod;
2793
2794 return err;
2795 }
2796
2797 /* drive the calculation of the fixed effects regression, print the
2798 results (if wanted), and compute the "within" error variance */
2799
within_variance(panelmod_t * pan,DATASET * dset,PRN * prn)2800 static int within_variance (panelmod_t *pan,
2801 DATASET *dset,
2802 PRN *prn)
2803 {
2804 MODEL femod;
2805 int i, err = 0;
2806
2807 femod = fixed_effects_model(pan, dset, prn);
2808
2809 if (femod.errcode) {
2810 pputs(prn, _("Error estimating fixed effects model\n"));
2811 errmsg(femod.errcode, prn);
2812 err = femod.errcode;
2813 clear_model(&femod);
2814 } else {
2815 int den;
2816
2817 if (pan->opt & OPT_N) {
2818 /* Nerlove */
2819 den = femod.nobs;
2820 } else {
2821 /* as per Greene: nT - n - K */
2822 den = femod.nobs - pan->effn - (pan->vlist[0] - 2);
2823 }
2824
2825 if (den == 0) {
2826 gretl_errmsg_set(_("Inadequate data for panel estimation"));
2827 return E_DF;
2828 } else {
2829 pan->s2e = femod.ess / den;
2830 }
2831
2832 #if PDEBUG
2833 fprintf(stderr, "nT = %d, n = %d, K = %d\n", femod.nobs,
2834 pan->effn, pan->vlist[0] - 2);
2835 fprintf(stderr, "pan->s2e = %g / %d = %g\n", femod.ess,
2836 den, pan->s2e);
2837 fprintf(stderr, "sqrt(pan->s2e) = %g\n", sqrt(pan->s2e));
2838 #endif
2839
2840 if (!(pan->opt & OPT_R)) {
2841 regular_fixed_effects_F(pan, &femod);
2842 }
2843
2844 if (pan->opt & OPT_V) {
2845 print_fe_results(pan, &femod, dset, prn);
2846 }
2847
2848 if (pan->bdiff != NULL && pan->Sigma != NULL) {
2849 for (i=1; i<femod.ncoeff; i++) {
2850 pan->bdiff->val[i-1] = femod.coeff[i];
2851 }
2852 femod_regular_vcv(&femod);
2853 vcv_slopes(pan, &femod, VCV_INIT);
2854 }
2855
2856 if (pan->opt & OPT_F) {
2857 err = save_panel_model(&femod, pan, (const double **) dset->Z,
2858 dset);
2859 if (err) {
2860 clear_model(&femod);
2861 }
2862 } else {
2863 if (pan->opt & OPT_U) {
2864 if (expand_fe_uhat(&femod, pan) == 0) {
2865 panel_dwstat(&femod, pan);
2866 }
2867 }
2868 clear_model(&femod);
2869 }
2870 }
2871
2872 return err;
2873 }
2874
print_hausman_result(panelmod_t * pan,PRN * prn)2875 static void print_hausman_result (panelmod_t *pan, PRN *prn)
2876 {
2877 if (na(pan->H)) {
2878 pputs(prn, _("Hausman test matrix is not positive definite!\n"));
2879 } else {
2880 pprintf(prn, _("Hausman test statistic:\n"
2881 " H = %g with p-value = prob(chi-square(%d) > %g) = %g\n"),
2882 pan->H, pan->dfH, pan->H, chisq_cdf_comp(pan->dfH, pan->H));
2883 pputs(prn, _("(A low p-value counts against the null hypothesis that "
2884 "the random effects\nmodel is consistent, in favor of the fixed "
2885 "effects model.)\n"));
2886 }
2887 }
2888
save_hausman_result(panelmod_t * pan)2889 static void save_hausman_result (panelmod_t *pan)
2890 {
2891 ModelTest *test;
2892
2893 if (pan->realmod == NULL || na(pan->H) || pan->dfH == 0) {
2894 return;
2895 }
2896
2897 test = model_test_new(GRETL_TEST_PANEL_HAUSMAN);
2898
2899 if (test != NULL) {
2900 model_test_set_teststat(test, GRETL_STAT_WALD_CHISQ);
2901 model_test_set_dfn(test, pan->dfH);
2902 model_test_set_value(test, pan->H);
2903 if (na(pan->H)) {
2904 model_test_set_pvalue(test, NADBL);
2905 } else {
2906 model_test_set_pvalue(test, chisq_cdf_comp(pan->dfH, pan->H));
2907 }
2908 maybe_add_test_to_model(pan->realmod, test);
2909 }
2910 }
2911
2912 /* Handle the case where collinear terms were dropped
2913 when doing the Hausman test via the regression
2914 method in robust mode.
2915 */
2916
robust_hausman_fixup(const int * hlist,MODEL * pmod)2917 static double robust_hausman_fixup (const int *hlist,
2918 MODEL *pmod)
2919 {
2920 int *wlist = gretl_list_copy(hlist);
2921 double H = NADBL;
2922
2923 if (wlist != NULL) {
2924 int i;
2925
2926 for (i=wlist[0]; i>0; i--) {
2927 if (!in_gretl_list(pmod->list, wlist[i])) {
2928 gretl_list_delete_at_pos(wlist, i);
2929 }
2930 }
2931 H = wald_omit_chisq(wlist, pmod);
2932 free(wlist);
2933 }
2934
2935 return H;
2936 }
2937
2938 /* Estimate the augmented GLS model for the Hausman test;
2939 return either the error sum of squares or, in the
2940 robust case, a Wald chi-square statistic.
2941 */
2942
hausman_regression_result(panelmod_t * pan,const int * relist,const int * hlist,DATASET * rset,PRN * prn)2943 static double hausman_regression_result (panelmod_t *pan,
2944 const int *relist,
2945 const int *hlist,
2946 DATASET *rset,
2947 PRN *prn)
2948 {
2949 double ret = NADBL;
2950 int *biglist = NULL;
2951 int err = 0;
2952
2953 biglist = gretl_list_add(relist, hlist, &err);
2954
2955 if (biglist != NULL) {
2956 MODEL hmod;
2957
2958 gretl_model_init(&hmod, NULL);
2959 hmod = lsq(biglist, rset, OLS, OPT_A);
2960 #if PDEBUG > 1
2961 pputs(prn, "Hausman test regression\n");
2962 printmodel(&hmod, rset, OPT_NONE, prn);
2963 #endif
2964 if (hmod.errcode == 0) {
2965 /* Find the number of additional regressors actually used,
2966 relative to @relist, allowing for the possibility
2967 that one or more elements of @biglist were dropped
2968 in estimation of @hmod due to excessive collinearity.
2969 */
2970 pan->dfH = hmod.list[0] - relist[0];
2971 if (pan->dfH > 0) {
2972 if (pan->opt & OPT_R) {
2973 /* do robust Wald test */
2974 if (hmod.full_n < pan->pooled->full_n) {
2975 hausman_move_uhat(&hmod, pan);
2976 }
2977 panel_robust_vcv(&hmod, pan, rset); /* FIXME? */
2978 if (hmod.vcv != NULL) {
2979 if (pan->dfH < hlist[0]) {
2980 ret = robust_hausman_fixup(hlist, &hmod);
2981 } else {
2982 ret = wald_omit_chisq(hlist, &hmod);
2983 }
2984 }
2985 } else {
2986 /* just record the unrestricted SSR */
2987 ret = hmod.ess;
2988 }
2989 }
2990 }
2991 clear_model(&hmod);
2992 }
2993
2994 free(biglist);
2995
2996 return ret;
2997 }
2998
2999 /* Computation of s2_v in the manner of Swamy and Arora, for an
3000 unbalanced panel. See Baltagi, Econometric Analysis of Panel
3001 Data, 3e, section 9.2.1. */
3002
unbalanced_SA_s2v(panelmod_t * pan,DATASET * dset)3003 static int unbalanced_SA_s2v (panelmod_t *pan,
3004 DATASET *dset)
3005 {
3006 gretl_matrix_block *B;
3007 gretl_matrix *ZmZ = NULL;
3008 gretl_matrix *PZ = NULL;
3009 gretl_matrix *Z = NULL;
3010 gretl_matrix *ZPZ = NULL;
3011 gretl_matrix *D2 = NULL;
3012 gretl_matrix *trmat = NULL;
3013 int k = pan->pooled->ncoeff;
3014 double zjt, z, tr;
3015 int i, j, s, t, p;
3016 int bigt, got;
3017 int err = 0;
3018
3019 B = gretl_matrix_block_new(&ZmZ, pan->effn, k,
3020 &PZ, pan->NT, k,
3021 &Z, pan->NT, k,
3022 &ZPZ, k, k,
3023 &D2, k, k, NULL);
3024
3025 if (B == NULL) {
3026 return E_ALLOC;
3027 }
3028
3029 s = p = 0;
3030 for (i=0; i<pan->nunits; i++) {
3031 int Ti = pan->unit_obs[i];
3032
3033 if (Ti == 0) {
3034 continue;
3035 }
3036
3037 for (j=0; j<k; j++) {
3038 zjt = 0.0;
3039 got = 0;
3040 /* cumulate sum of observations for unit */
3041 for (t=0; t<pan->T && got<Ti; t++) {
3042 bigt = panel_index(i, t);
3043 if (!panel_missing(pan, bigt)) {
3044 z = dset->Z[pan->pooled->list[j+2]][bigt];
3045 zjt += z;
3046 gretl_matrix_set(Z, p + got, j, z);
3047 got++;
3048 }
3049 }
3050 gretl_matrix_set(ZmZ, s, j, zjt);
3051 for (t=0; t<Ti; t++) {
3052 gretl_matrix_set(PZ, p + t, j, zjt / Ti);
3053 }
3054 }
3055 s++;
3056 p += Ti;
3057 }
3058
3059 gretl_matrix_multiply_mod(Z, GRETL_MOD_TRANSPOSE,
3060 PZ, GRETL_MOD_NONE,
3061 ZPZ, GRETL_MOD_NONE);
3062 gretl_invert_symmetric_matrix(ZPZ);
3063
3064 gretl_matrix_multiply_mod(ZmZ, GRETL_MOD_TRANSPOSE,
3065 ZmZ, GRETL_MOD_NONE,
3066 D2, GRETL_MOD_NONE);
3067
3068 trmat = gretl_matrix_reuse(PZ, k, k);
3069 gretl_matrix_multiply(ZPZ, D2, trmat);
3070 tr = gretl_matrix_trace(trmat);
3071
3072 #if 0
3073 gretl_matrix_print(trmat, "trmat");
3074 fprintf(stderr, "S-A: effn=%d, k=%d, NT=%d, tr=%g\n", pan->effn,
3075 k, pan->NT, tr);
3076 #endif
3077
3078 pan->s2v = (pan->ubPub - (pan->effn - k) * pan->s2e) / (pan->NT - tr);
3079
3080 #if PDEBUG
3081 fprintf(stderr, "S-A: ubPub=%#.8g, tr=%#.8g, s2v=%#.8g, sv=%#.8g\n",
3082 pan->ubPub, tr, pan->s2v, sqrt(pan->s2v));
3083 #endif
3084
3085 gretl_matrix_block_destroy(B);
3086
3087 return err;
3088 }
3089
3090 /* Calculate the random effects regression. Print the results
3091 here if we're doing the "panel diagnostics" test, otherwise
3092 save the results.
3093 */
3094
random_effects(panelmod_t * pan,DATASET * dset,DATASET * gset,PRN * prn)3095 static int random_effects (panelmod_t *pan,
3096 DATASET *dset,
3097 DATASET *gset,
3098 PRN *prn)
3099 {
3100 DATASET *rset;
3101 MODEL remod;
3102 gretlopt lsqopt = OPT_A | OPT_Z;
3103 int *relist = NULL;
3104 int *hlist = NULL;
3105 double hres = NADBL;
3106 int i, err = 0;
3107
3108 gretl_model_init(&remod, dset);
3109
3110 /* FGLS regression list */
3111 relist = gretl_list_new(pan->pooled->list[0]);
3112 if (relist == NULL) {
3113 return E_ALLOC;
3114 }
3115
3116 if (!(pan->opt & OPT_M)) {
3117 /* extra regressors for Hausman test, regression approach */
3118 hlist = gretl_list_new(pan->vlist[0] - 1);
3119 if (hlist == NULL) {
3120 free(relist);
3121 return E_ALLOC;
3122 }
3123 }
3124
3125 /* If OPT_N (--nerlove) was given, we've already calculated
3126 pan->s2v as the variance of the fixed effects. Otherwise
3127 we're using the Swamy and Arora method, and pan->s2v still
3128 needs to be computed.
3129
3130 Note: for unbalanced panels, theta will vary across the
3131 units in the final calculation.
3132 */
3133 if (!(pan->opt & OPT_N)) {
3134 if (IGLS) {
3135 /* get user-specified values */
3136 err = read_true_variances(pan);
3137 } else if (!pan->balanced && !na(pan->ubPub)) {
3138 err = unbalanced_SA_s2v(pan, dset);
3139 } else {
3140 pan->s2v = pan->s2b - pan->s2e / pan->Tbar;
3141 #if PDEBUG
3142 fprintf(stderr, "Swamy-Arora: initial s2v = %g - %g = %g\n",
3143 pan->s2b, pan->s2e / pan->Tbar, pan->s2v);
3144 #endif
3145 }
3146 if (pan->s2v < 0) {
3147 pan->s2v = 0.0;
3148 }
3149 }
3150
3151 /* theta, the quasi-demeaning coefficient */
3152 pan->theta = 1.0 - sqrt(pan->s2e / (pan->s2e + pan->Tmax * pan->s2v));
3153
3154 #if PDEBUG
3155 fprintf(stderr, "pan->s2v = %.8g, pan->s2e = %.8g\n", pan->s2v, pan->s2e);
3156 fprintf(stderr, "random_effects theta = %g\n", pan->theta);
3157 #endif
3158
3159 /* make special transformed dataset, and regression list */
3160 rset = random_effects_dataset(dset, gset, relist, hlist, pan);
3161 if (rset == NULL) {
3162 free(relist);
3163 free(hlist);
3164 return E_ALLOC;
3165 }
3166
3167 if (hlist != NULL) {
3168 hres = hausman_regression_result(pan, relist, hlist, rset, prn);
3169 }
3170
3171 /* regular random-effects model */
3172 #if PDEBUG
3173 fprintf(stderr, "estimate regular GLS model\n");
3174 #endif
3175 remod = lsq(relist, rset, OLS, lsqopt);
3176
3177 if ((err = remod.errcode)) {
3178 pputs(prn, _("Error estimating random effects model\n"));
3179 errmsg(err, prn);
3180 } else {
3181 #if PDEBUG > 1
3182 for (i=0; i<20 && i<remod.nobs; i++) {
3183 fprintf(stderr, "remod uhat[%d] = %g\n", i, remod.uhat[i]);
3184 }
3185 #endif
3186 if (pan->bdiff != NULL) {
3187 /* matrix-diff Hausman variant, if wanted */
3188 int vi, k = 0;
3189
3190 for (i=0; i<remod.ncoeff; i++) {
3191 vi = pan->pooled->list[i+2];
3192 if (var_is_varying(pan, vi)) {
3193 pan->bdiff->val[k++] -= remod.coeff[i];
3194 }
3195 }
3196 }
3197
3198 /* note: this call moved to here 2017-10-18 so we
3199 get computation of robust standard errors right
3200 */
3201 fix_panel_hatvars(&remod, pan, (const double **) dset->Z);
3202
3203 if (pan->opt & OPT_R) {
3204 panel_robust_vcv(&remod, pan, rset);
3205 } else {
3206 double sigma = remod.sigma; /* or... ? */
3207 #if PDEBUG
3208 fprintf(stderr, "GLS sigma = %g\n", sigma);
3209 fprintf(stderr, "GLS SSR = %.7g\n", remod.ess);
3210 #endif
3211 makevcv(&remod, sigma);
3212 }
3213
3214 if (pan->opt & OPT_V) {
3215 print_re_results(pan, &remod, dset, prn);
3216 }
3217
3218 if (!na(hres)) {
3219 if (pan->opt & OPT_R) {
3220 /* @hres is already the (robust) Hausman test statistic */
3221 pan->H = hres;
3222 } else {
3223 /* @hres is the unrestricted ess */
3224 pan->H = (remod.ess / hres - 1.0) * (remod.nobs);
3225 }
3226 } else if (pan->Sigma != NULL) {
3227 vcv_slopes(pan, &remod, VCV_SUBTRACT);
3228 }
3229 }
3230
3231 #if PDEBUG > 1
3232 if (remod.errcode == 0) {
3233 printmodel(&remod, rset, OPT_NONE, prn);
3234 }
3235 #endif
3236
3237 if (!err && (pan->opt & OPT_U)) {
3238 save_panel_model(&remod, pan, (const double **) dset->Z, rset);
3239 } else {
3240 clear_model(&remod);
3241 }
3242
3243 destroy_dataset(rset);
3244
3245 free(relist);
3246 free(hlist);
3247
3248 return err;
3249 }
3250
save_breusch_pagan_result(panelmod_t * pan)3251 static void save_breusch_pagan_result (panelmod_t *pan)
3252 {
3253 ModelTest *test;
3254
3255 if (pan->realmod == NULL || na(pan->BP)) {
3256 return;
3257 }
3258
3259 test = model_test_new(GRETL_TEST_PANEL_BP);
3260
3261 if (test != NULL) {
3262 model_test_set_teststat(test, GRETL_STAT_WALD_CHISQ);
3263 model_test_set_dfn(test, 1);
3264 model_test_set_value(test, pan->BP);
3265 model_test_set_pvalue(test, chisq_cdf_comp(1, pan->BP));
3266 maybe_add_test_to_model(pan->realmod, test);
3267 }
3268 }
3269
3270 /* do the panel Breusch-Pagan test and either print or save
3271 the results */
3272
breusch_pagan_LM(panelmod_t * pan,PRN * prn)3273 static int breusch_pagan_LM (panelmod_t *pan, PRN *prn)
3274 {
3275 double A = 0.0;
3276 int n = pan->pooled->nobs;
3277 int print_means = 0;
3278 int i, t, M = 0;
3279
3280 if ((pan->opt & OPT_V) && pan->effn <= 10) {
3281 print_means = 1;
3282 }
3283
3284 if (print_means) {
3285 pputs(prn, _("Means of pooled OLS residuals for cross-sectional units:"));
3286 pputs(prn, "\n\n");
3287 }
3288
3289 for (i=0; i<pan->nunits; i++) {
3290 int Ti = pan->unit_obs[i];
3291
3292 if (Ti > 0) {
3293 double u, usum = 0.0;
3294
3295 for (t=0; t<pan->T; t++) {
3296 u = pan->pooled->uhat[panel_index(i, t)];
3297 if (!na(u)) {
3298 usum += u;
3299 }
3300 }
3301 if (print_means) {
3302 pprintf(prn, _(" unit %2d: %13.5g\n"), i + 1,
3303 usum / (double) Ti);
3304 }
3305 A += usum * usum;
3306 M += Ti * Ti;
3307 }
3308 }
3309
3310 pan->BP = ((double) n * n /(2.0 * (M - n))) * pow((A / pan->pooled->ess) - 1.0, 2);
3311
3312 if (pan->opt & OPT_V) {
3313 pputc(prn, '\n');
3314 pprintf(prn, _("Breusch-Pagan test statistic:\n"
3315 " LM = %g with p-value = prob(chi-square(1) > %g) = %g\n"),
3316 pan->BP, pan->BP, chisq_cdf_comp(1, pan->BP));
3317
3318 pputs(prn, _("(A low p-value counts against the null hypothesis that "
3319 "the pooled OLS model\nis adequate, in favor of the random "
3320 "effects alternative.)\n\n"));
3321 }
3322
3323 return 0;
3324 }
3325
save_panspec_result(panelmod_t * pan,PRN * prn)3326 static void save_panspec_result (panelmod_t *pan, PRN *prn)
3327 {
3328 gretl_matrix *tests = gretl_column_vector_alloc(3);
3329 gretl_matrix *pvals = gretl_column_vector_alloc(3);
3330 char **S = strings_array_new(3);
3331
3332 /* fixed-effects poolability F-test */
3333 tests->val[0] = pan->Ffe;
3334 pvals->val[0] = snedecor_cdf_comp(pan->Fdfn, pan->Fdfd, pan->Ffe);
3335 /* random-effects poolability test */
3336 tests->val[1] = pan->BP;
3337 pvals->val[1] = chisq_cdf_comp(1, pan->BP);
3338 /* Hausman test */
3339 tests->val[2] = pan->H;
3340 pvals->val[2] = chisq_cdf_comp(pan->dfH, pan->H);
3341
3342 if (S != NULL) {
3343 /* add row names */
3344 char **S2;
3345
3346 S[0] = gretl_strdup("Poolability (Wald)");
3347 S[1] = gretl_strdup("Poolability (B-P)");
3348 S[2] = gretl_strdup("Hausman");
3349 S2 = strings_array_dup(S, 3);
3350 gretl_matrix_set_rownames(tests, S);
3351 if (S2 != NULL) {
3352 gretl_matrix_set_rownames(pvals, S2);
3353 }
3354 }
3355
3356 if (pan->opt & OPT_V) {
3357 print_hausman_result(pan, prn);
3358 }
3359 record_matrix_test_result(tests, pvals);
3360 }
3361
finalize_hausman_test(panelmod_t * pan,int ci,PRN * prn)3362 static int finalize_hausman_test (panelmod_t *pan, int ci, PRN *prn)
3363 {
3364 int mdiff = 0, err = 0;
3365
3366 if (pan->bdiff != NULL && pan->Sigma != NULL) {
3367 /* matrix approach */
3368 mdiff = 1;
3369 err = bXb(pan);
3370 } else if (na(pan->H)) {
3371 /* regression approach bombed somehow? */
3372 err = E_DATA;
3373 }
3374
3375 if (ci == PANSPEC) {
3376 /* the context is the "panspec" command */
3377 if (!err || (mdiff && err == E_NOTPD)) {
3378 save_panspec_result(pan, prn);
3379 }
3380 } else {
3381 /* the context is random-effects estimation */
3382 if (mdiff && err == E_NOTPD) {
3383 pputs(prn, _("Hausman test matrix is not positive definite"));
3384 pputc(prn, '\n');
3385 }
3386 save_hausman_result(pan);
3387 }
3388
3389 return err;
3390 }
3391
3392 /* Based on the residuals from pooled OLS, do some accounting to see
3393 (a) how many cross-sectional units were actually included (after
3394 omitting any missing values); (b) how many time-series observations
3395 were included for each unit; and (c) what was the maximum number
3396 of time-series observations used. Return 0 if all goes OK,
3397 non-zero otherwise.
3398 */
3399
panel_obs_accounts(panelmod_t * pan)3400 static int panel_obs_accounts (panelmod_t *pan)
3401 {
3402 int *uobs;
3403 int i, t, bigt;
3404
3405 uobs = malloc(pan->nunits * sizeof *uobs);
3406 if (uobs == NULL) {
3407 return E_ALLOC;
3408 }
3409
3410 pan->NT = 0;
3411 pan->effn = 0;
3412 pan->Tmax = 0;
3413 pan->Tmin = pan->T;
3414
3415 for (i=0; i<pan->nunits; i++) {
3416 uobs[i] = 0;
3417 for (t=0; t<pan->T; t++) {
3418 bigt = panel_index(i, t);
3419 #if PDEBUG > 2
3420 fprintf(stderr, "unit %d, bigt=%d, pmod->uhat[%d]: %s\n", i, t,
3421 bigt, (panel_missing(pan, bigt))? "NA" : "OK");
3422 #endif
3423 if (!panel_missing(pan, bigt)) {
3424 uobs[i] += 1;
3425 }
3426 }
3427 if (uobs[i] > 0) {
3428 pan->effn += 1;
3429 if (uobs[i] > pan->Tmax) {
3430 pan->Tmax = uobs[i];
3431 }
3432 if (uobs[i] < pan->Tmin) {
3433 pan->Tmin = uobs[i];
3434 }
3435 pan->NT += uobs[i];
3436 }
3437 }
3438
3439 for (i=0; i<pan->nunits; i++) {
3440 if (uobs[i] > 0 && uobs[i] != pan->Tmax) {
3441 pan->balanced = 0;
3442 break;
3443 }
3444 }
3445
3446 pan->unit_obs = uobs;
3447
3448 return 0;
3449 }
3450
3451 /* Construct an array of char to record which parameters in the full
3452 model correspond to time-varying regressors
3453 */
3454
panel_set_varying(panelmod_t * pan,const MODEL * pmod)3455 static int panel_set_varying (panelmod_t *pan, const MODEL *pmod)
3456 {
3457 int i;
3458
3459 pan->varying = calloc(pmod->ncoeff, 1);
3460 if (pan->varying == NULL) {
3461 return E_ALLOC;
3462 }
3463
3464 for (i=0; i<pmod->ncoeff; i++) {
3465 if (var_is_varying(pan, pmod->list[i+2])) {
3466 pan->varying[i] = 1;
3467 }
3468 }
3469
3470 return 0;
3471 }
3472
3473 /* find harmonic mean of the number of time-series observations
3474 per included group */
3475
calculate_Tbar(panelmod_t * pan)3476 static void calculate_Tbar (panelmod_t *pan)
3477 {
3478 int i;
3479
3480 if (pan->balanced) {
3481 pan->Tbar = pan->Tmax;
3482 } else {
3483 double den = 0.0;
3484
3485 for (i=0; i<pan->nunits; i++) {
3486 if (pan->unit_obs[i] > 0) {
3487 den += 1.0 / pan->unit_obs[i];
3488 }
3489 }
3490 pan->Tbar = pan->effn / den;
3491 }
3492 }
3493
3494 static int
panelmod_setup(panelmod_t * pan,MODEL * pmod,const DATASET * dset,int ntdum,gretlopt opt)3495 panelmod_setup (panelmod_t *pan, MODEL *pmod, const DATASET *dset,
3496 int ntdum, gretlopt opt)
3497 {
3498 int nobs, dsetT = sample_size(dset);
3499 int err = 0;
3500
3501 pan->opt = opt;
3502 pan->pooled = pmod;
3503
3504 /* assumes (possibly padded) balanced panel dataset */
3505 pan->nunits = dsetT / dset->pd;
3506 pan->T = dset->pd;
3507 nobs = pan->nunits * pan->T;
3508
3509 if (nobs < dsetT) {
3510 fprintf(stderr, "dataset nobs = %d, but panel nobs = %d\n",
3511 dsetT, nobs);
3512 }
3513
3514 panel_index_init(dset, pan->nunits, pan->T);
3515 pan->ntdum = ntdum;
3516
3517 err = panel_obs_accounts(pan);
3518
3519 if (!err && (pan->opt & (OPT_U | OPT_F | OPT_B))) {
3520 pan->realmod = malloc(sizeof *pan->realmod);
3521 if (pan->realmod == NULL) {
3522 err = E_ALLOC;
3523 }
3524 }
3525
3526 IGLS = stata_sa = 0;
3527
3528 if (!err && (opt & OPT_X)) {
3529 if (!(pan->opt & OPT_U)) {
3530 /* --unbalanced --random-effects */
3531 err = E_BADOPT;
3532 } else {
3533 /* probe optional argument to --unbalanced */
3534 const char *s = get_optval_string(PANEL, OPT_X);
3535
3536 if (s != NULL) {
3537 if (!strcmp(s, "stata")) {
3538 stata_sa = 1;
3539 } else if (!strcmp(s, "bc")) {
3540 stata_sa = 0;
3541 } else if (has_suffix(s, ".mat")) {
3542 /* hidden testing option */
3543 strcpy(glsmat, s);
3544 IGLS = 1;
3545 } else {
3546 gretl_errmsg_sprintf(_("%s: invalid option argument"), s);
3547 err = E_INVARG;
3548 }
3549 }
3550 }
3551 }
3552
3553 if (err && pan->unit_obs != NULL) {
3554 free(pan->unit_obs);
3555 pan->unit_obs = NULL;
3556 }
3557
3558 return err;
3559 }
3560
3561 /* Called in relation to a model estimated by pooled OLS: test for
3562 both fixed and random effects. Implements the "panspec" command.
3563 */
3564
panel_diagnostics(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)3565 int panel_diagnostics (MODEL *pmod, DATASET *dset,
3566 gretlopt opt, PRN *prn)
3567 {
3568 int nerlove = 0;
3569 int quiet = (opt & OPT_Q);
3570 gretlopt psopt = opt;
3571 panelmod_t pan;
3572 int xdf, err = 0;
3573
3574 #if PDEBUG
3575 fputs("\n*** Starting panel_diagnostics ***\n", stderr);
3576 fprintf(stderr, " OPT_M? %s\n", (opt & OPT_M)? "yes" : "no");
3577 #endif
3578
3579 if (pmod->ifc == 0) {
3580 /* at many points we assume the base regression has an
3581 intercept included */
3582 return E_NOCONST;
3583 }
3584
3585 #if PDEBUG /* not really ready yet! */
3586 if (pmod->opt & OPT_R) {
3587 /* forward the robust option */
3588 opt |= OPT_R;
3589 }
3590 #endif
3591
3592 if (opt & OPT_N) {
3593 nerlove = 1;
3594 }
3595
3596 /* Add OPT_V to make the fixed and random effects functions verbose,
3597 unless we've been passed the --quiet option.
3598 */
3599 if (!quiet) {
3600 psopt |= OPT_V;
3601 }
3602 panelmod_init(&pan);
3603 err = panelmod_setup(&pan, pmod, dset, 0, psopt);
3604 if (err) {
3605 goto bailout;
3606 }
3607
3608 if (pan.effn < pan.nunits) {
3609 fprintf(stderr, "number of units included = %d\n", pan.effn);
3610 if (pan.effn <= 0) {
3611 return E_DATA;
3612 }
3613 }
3614
3615 /* figure out which of the original regressors are time-varying */
3616 err = varying_vars_list(dset, &pan);
3617 if (err) {
3618 goto bailout;
3619 }
3620
3621 err = panel_set_varying(&pan, pmod);
3622 if (err) {
3623 goto bailout;
3624 }
3625
3626 calculate_Tbar(&pan);
3627
3628 /* degrees of freedom relative to # of x-sectional units */
3629 xdf = pan.effn - pmod->ncoeff;
3630
3631 #if PDEBUG
3632 fprintf(stderr, "nunits=%d, T=%d, effn=%d, Tmax=%d, xdf=%d\n",
3633 pan.nunits, pan.T, pan.effn, pan.Tmax, xdf);
3634 #endif
3635
3636 if (xdf <= 0 && !nerlove) {
3637 ; /* can't do Random Effects */
3638 } else if (pan.opt & OPT_M) {
3639 /* matrix version of Hausman test */
3640 err = matrix_hausman_allocate(&pan);
3641 if (err) {
3642 goto bailout;
3643 }
3644 }
3645
3646 if (!quiet) {
3647 /* header */
3648 pputc(prn, '\n');
3649 pprintf(prn, _("Diagnostics: using n = %d cross-sectional units\n"),
3650 pan.effn);
3651 pputc(prn, '\n');
3652 }
3653
3654 err = within_variance(&pan, dset, prn);
3655 if (err) {
3656 goto bailout;
3657 }
3658
3659 if (xdf <= 0) {
3660 pprintf(prn, "Omitting group means regression: "
3661 "insufficient degrees of freedom\n");
3662 goto bailout;
3663 }
3664
3665 if (xdf > 0 && !na(pan.s2e)) {
3666 DATASET *gset;
3667
3668 gset = group_means_dataset(&pan, dset);
3669 if (gset == NULL) {
3670 err = E_ALLOC;
3671 } else {
3672 err = between_variance(&pan, gset);
3673 }
3674
3675 if (err) {
3676 pputs(prn, _("Couldn't estimate group means regression\n"));
3677 if (err == E_SINGULAR) {
3678 /* treat this as non-fatal */
3679 err = 0;
3680 }
3681 } else {
3682 random_effects(&pan, dset, gset, prn);
3683 if (pan.theta > 0) {
3684 /* this test hardly makes sense if GLS = OLS */
3685 breusch_pagan_LM(&pan, prn);
3686 }
3687 finalize_hausman_test(&pan, PANSPEC, prn);
3688 }
3689
3690 if (gset != NULL) {
3691 destroy_dataset(gset);
3692 }
3693 }
3694
3695 bailout:
3696
3697 panelmod_free(&pan);
3698
3699 return err;
3700 }
3701
between_model(panelmod_t * pan,const DATASET * dset)3702 static int between_model (panelmod_t *pan, const DATASET *dset)
3703 {
3704 DATASET *gset;
3705 int err = 0;
3706
3707 gset = group_means_dataset(pan, dset);
3708
3709 if (gset == NULL) {
3710 err = E_ALLOC;
3711 } else {
3712 err = between_variance(pan, gset);
3713 if (err) {
3714 destroy_dataset(gset);
3715 } else {
3716 pan->realmod->dataset = gset;
3717 }
3718 }
3719
3720 return err;
3721 }
3722
3723 /* When we automatically add dummy variables to the dataset,
3724 in estimating a panel model with the --time-dummies option,
3725 should we delete these when we're done, or leave them
3726 in the dataset? 2013-11-18: we'll delete them if the dataset
3727 is subsampled, otherwise leave them.
3728 */
3729
3730 static int
process_time_dummies(MODEL * pmod,const DATASET * dset,int v)3731 process_time_dummies (MODEL *pmod, const DATASET *dset, int v)
3732 {
3733 int i, vi, n = 0;
3734
3735 for (i=pmod->list[0]; i>1; i--) {
3736 if (pmod->list[i] >= v) {
3737 n++;
3738 }
3739 }
3740
3741 if (n > 0) {
3742 gretl_model_allocate_param_names(pmod, pmod->ncoeff);
3743
3744 if (pmod->errcode) {
3745 return pmod->errcode;
3746 }
3747
3748 for (i=2; i<=pmod->list[0]; i++) {
3749 vi = pmod->list[i];
3750 gretl_model_set_param_name(pmod, i-2, dset->varname[vi]);
3751 }
3752
3753 for (i=pmod->list[0]; i>1; i--) {
3754 if (pmod->list[i] >= v) {
3755 gretl_list_delete_at_pos(pmod->list, i);
3756 }
3757 }
3758 }
3759
3760 /* Record the fact that the model used time dummies, in case
3761 the user wants to add/omit variables or something.
3762 */
3763 pmod->opt |= OPT_D;
3764
3765 return 0;
3766 }
3767
add_time_dummies_to_list(const int * list,DATASET * dset,int ** plist)3768 static int add_time_dummies_to_list (const int *list,
3769 DATASET *dset,
3770 int **plist)
3771 {
3772 char dname[VNAMELEN];
3773 int *biglist = NULL;
3774 int ntdum = dset->pd - 1;
3775 int i, j, v;
3776 int err = 0;
3777
3778 biglist = gretl_list_new(list[0] + ntdum);
3779 if (biglist == NULL) {
3780 return E_ALLOC;
3781 }
3782
3783 for (i=1; i<=list[0]; i++) {
3784 biglist[i] = list[i];
3785 }
3786
3787 j = list[0] + 1;
3788
3789 for (i=2; i<=dset->pd; i++) {
3790 sprintf(dname, "dt_%d", i);
3791 v = series_greatest_index(dset, dname);
3792 if (v == dset->v) {
3793 err = E_DATA;
3794 break;
3795 } else if (in_gretl_list(list, v)) {
3796 /* already present */
3797 biglist[0] -= 1;
3798 } else {
3799 biglist[j++] = v;
3800 }
3801 }
3802
3803 if (err) {
3804 free(biglist);
3805 } else {
3806 *plist = biglist;
3807 }
3808
3809 return err;
3810 }
3811
panel_check_for_const(const int * list)3812 static int panel_check_for_const (const int *list)
3813 {
3814 int i;
3815
3816 for (i=2; i<=list[0]; i++) {
3817 if (list[i] == 0) {
3818 return 0; /* OK */
3819 }
3820 }
3821
3822 return E_NOCONST;
3823 }
3824
get_ntdum(const int * orig,const int * new)3825 static int get_ntdum (const int *orig, const int *new)
3826 {
3827 int i, n = 0;
3828
3829 for (i=2; i<=new[0]; i++) {
3830 if (!in_gretl_list(orig, new[i])) {
3831 n++;
3832 }
3833 }
3834
3835 return n;
3836 }
3837
save_pooled_model(MODEL * pmod,panelmod_t * pan,const DATASET * dset)3838 static void save_pooled_model (MODEL *pmod, panelmod_t *pan,
3839 const DATASET *dset)
3840 {
3841 gretl_model_set_int(pmod, "pooled", 1);
3842 add_panel_obs_info(pmod, pan);
3843
3844 if (!(pan->opt & OPT_A)) {
3845 set_model_id(pmod, pan->opt);
3846 }
3847
3848 if (pan->opt & OPT_R) {
3849 panel_robust_vcv(pmod, pan, dset);
3850 pmod->opt |= OPT_R;
3851 pmod->fstt = panel_overall_test(pmod, pan, 0, OPT_NONE);
3852 pmod->dfd = pan->effn - 1;
3853 }
3854
3855 panel_dwstat(pmod, pan);
3856 }
3857
3858 /* fixed effects | random effects | between | pooled */
3859
3860 #define estimator_specified(o) (o & (OPT_F|OPT_U|OPT_B|OPT_P))
3861
3862 /**
3863 * real_panel_model:
3864 * @list: list containing model specification.
3865 * @dset: dataset struct.
3866 * @opt: may include %OPT_U for the random effects model;
3867 * %OPT_R for robust standard errors (fixed effects model
3868 * and pooled OLS only); %OPT_M to use the matrix-difference
3869 * version of the Hausman test (random effects only); %OPT_B for
3870 * the "between" model; %OPT_P for pooled OLS; and %OPT_D to
3871 * include time dummies.
3872 * If and only if %OPT_P is given, %OPT_C (clustered standard
3873 * errors) is accepted.
3874 * If %OPT_U is given, either of the mutually incompatible options
3875 * %OPT_N and %OPT_X may be given to inflect the calculation of the
3876 * variance of the individual effects: %OPT_N means use Nerlove's
3877 * method, %OPT_X means use the special "exact" method of Swamy
3878 * and Arora in the case of an unbalanced panel. The default is
3879 * to use the "generic" Swamy-Arora method.
3880 * @prn: printing struct.
3881 *
3882 * Estimates a panel model, by default the fixed effects model.
3883 *
3884 * Returns: a #MODEL struct, containing the estimates.
3885 */
3886
real_panel_model(const int * list,DATASET * dset,gretlopt opt,PRN * prn)3887 MODEL real_panel_model (const int *list, DATASET *dset,
3888 gretlopt opt, PRN *prn)
3889 {
3890 int orig_v = dset->v;
3891 MODEL mod;
3892 panelmod_t pan;
3893 gretlopt pan_opt = opt;
3894 gretlopt ols_opt = OPT_A;
3895 int *olslist = NULL;
3896 int nerlove = 0;
3897 int ntdum = 0;
3898 int err = 0;
3899
3900 gretl_model_init(&mod, dset);
3901 panelmod_init(&pan);
3902
3903 if (!estimator_specified(opt)) {
3904 /* default: add OPT_F to save the fixed effects model */
3905 pan_opt |= OPT_F;
3906 }
3907
3908 if (opt & OPT_P) {
3909 /* doing pooled OLS */
3910 if (opt & OPT_C) {
3911 /* clustered */
3912 ols_opt |= (OPT_C | OPT_R);
3913 }
3914 } else {
3915 /* not just pooled OLS */
3916 mod.errcode = panel_check_for_const(list);
3917 if (mod.errcode) {
3918 return mod;
3919 }
3920 }
3921
3922 if (opt & OPT_D) {
3923 /* time dummies wanted */
3924 err = gen_panel_dummies(dset, OPT_T, prn);
3925 if (!err) {
3926 err = add_time_dummies_to_list(list, dset, &olslist);
3927 }
3928 } else {
3929 olslist = gretl_list_copy(list);
3930 if (olslist == NULL) {
3931 err = E_ALLOC;
3932 }
3933 }
3934
3935 if (err) {
3936 goto bailout;
3937 }
3938
3939 /* baseline: estimate via pooled OLS */
3940 mod = lsq(olslist, dset, OLS, ols_opt);
3941 if (mod.errcode) {
3942 err = mod.errcode;
3943 fprintf(stderr, "real_panel_model: error %d in initial OLS\n",
3944 mod.errcode);
3945 goto bailout;
3946 }
3947
3948 #if PDEBUG > 1
3949 pprintf(prn, "*** initial baseline OLS\n");
3950 printlist(olslist, "olslist");
3951 printmodel(&mod, dset, OPT_NONE, prn);
3952 #endif
3953
3954 free(olslist);
3955
3956 #if PDEBUG
3957 if (pan_opt & OPT_F) {
3958 fprintf(stderr, "\n*** Doing fixed effects\n");
3959 } else if (pan_opt & OPT_U) {
3960 fprintf(stderr, "\n*** Doing random effects\n");
3961 }
3962 #endif
3963
3964 if ((opt & OPT_D) && !(opt & OPT_B)) {
3965 ntdum = get_ntdum(list, mod.list);
3966 }
3967
3968 err = panelmod_setup(&pan, &mod, dset, ntdum, pan_opt);
3969 if (err) {
3970 goto bailout;
3971 }
3972
3973 if (opt & OPT_P) {
3974 save_pooled_model(&mod, &pan, dset);
3975 goto bailout;
3976 }
3977
3978 if (opt & OPT_B) {
3979 err = between_model(&pan, dset);
3980 goto bailout;
3981 }
3982
3983 /* figure out which of the original regressors are time-varying,
3984 or unit-varying as the case may be
3985 */
3986 err = varying_vars_list(dset, &pan);
3987 if (err) {
3988 goto bailout;
3989 }
3990
3991 err = panel_set_varying(&pan, &mod);
3992 if (err) {
3993 goto bailout;
3994 }
3995
3996 if (opt & OPT_U) {
3997 nerlove = (opt & OPT_N)? 1 : 0;
3998 }
3999
4000 calculate_Tbar(&pan);
4001
4002 if (opt & OPT_U) {
4003 /* trying to do random effects */
4004 int xdf = pan.effn - (mod.ncoeff - pan.ntdum);
4005
4006 if (xdf <= 0 && !nerlove) {
4007 gretl_errmsg_set(_("Couldn't estimate group means regression"));
4008 err = mod.errcode = E_DF;
4009 goto bailout;
4010 } else if (pan.opt & OPT_M) {
4011 /* matrix version of Hausman test */
4012 err = matrix_hausman_allocate(&pan);
4013 if (err) {
4014 goto bailout;
4015 }
4016 }
4017 }
4018
4019 err = within_variance(&pan, dset, prn);
4020 if (err) {
4021 goto bailout;
4022 }
4023
4024 if ((opt & OPT_U) && !na(pan.s2e)) {
4025 DATASET *gset;
4026
4027 breusch_pagan_LM(&pan, prn);
4028
4029 gset = group_means_dataset(&pan, dset);
4030 if (gset == NULL) {
4031 err = E_ALLOC;
4032 } else {
4033 err = between_variance(&pan, gset);
4034 }
4035
4036 if (err && !nerlove) {
4037 pputs(prn, _("Couldn't estimate group means regression\n"));
4038 } else {
4039 err = random_effects(&pan, dset, gset, prn);
4040 if (!err) {
4041 save_breusch_pagan_result(&pan);
4042 finalize_hausman_test(&pan, PANEL, prn);
4043 }
4044 }
4045
4046 if (gset != NULL) {
4047 destroy_dataset(gset);
4048 }
4049 }
4050
4051 bailout:
4052
4053 if (!err) {
4054 if ((pan_opt & (OPT_F | OPT_U)) && mod.missmask != NULL) {
4055 /* preserve missing obs mask, if any */
4056 char *mask = mod.missmask;
4057
4058 mod.missmask = NULL;
4059 clear_model(&mod);
4060 mod = *pan.realmod;
4061 mod.missmask = mask;
4062 } else if (!(opt & OPT_P)) {
4063 /* not doing pooled OLS, in which case @mod would
4064 already hold the correct model
4065 */
4066 clear_model(&mod);
4067 mod = *pan.realmod;
4068 }
4069 gretl_model_smpl_init(&mod, dset);
4070 if (opt & OPT_D) {
4071 if (pan.ntdum > 0) {
4072 gretl_model_set_int(&mod, "ntdum", pan.ntdum);
4073 maybe_suppress_time_dummies(&mod, pan.ntdum);
4074 }
4075 if (complex_subsampled()) {
4076 process_time_dummies(&mod, dset, orig_v);
4077 }
4078 }
4079 }
4080
4081 panelmod_free(&pan);
4082
4083 if (err && mod.errcode == 0) {
4084 mod.errcode = err;
4085 }
4086
4087 if (complex_subsampled()) {
4088 dataset_drop_last_variables(dset, dset->v - orig_v);
4089 }
4090
4091 return mod;
4092 }
4093
4094 /* Called from qr_estimate.c in case robust VCV estimation is called
4095 for in the context of TSLS estimation on panel data.
4096 */
4097
panel_tsls_robust_vcv(MODEL * pmod,const DATASET * dset)4098 int panel_tsls_robust_vcv (MODEL *pmod, const DATASET *dset)
4099 {
4100 panelmod_t pan;
4101 int err = 0;
4102
4103 panelmod_init(&pan);
4104
4105 err = panelmod_setup(&pan, pmod, dset, 0, OPT_NONE);
4106 if (!err) {
4107 err = panel_robust_vcv(pmod, &pan, dset);
4108 }
4109
4110 panelmod_free(&pan);
4111
4112 return err;
4113 }
4114
4115 /* write weights for groupwise weighted least squares into the
4116 last variable in the dataset */
4117
4118 static int
write_weights_to_dataset(double * uvar,int nunits,int T,DATASET * dset)4119 write_weights_to_dataset (double *uvar, int nunits, int T,
4120 DATASET *dset)
4121 {
4122 int w = dset->v - 1;
4123 double wi;
4124 int i, t;
4125
4126 for (i=0; i<nunits; i++) {
4127 if (uvar[i] <= 0.0 || na(uvar[i])) {
4128 wi = 0.0;
4129 } else {
4130 wi = 1.0 / uvar[i]; /* sqrt? */
4131 }
4132 for (t=0; t<T; t++) {
4133 dset->Z[w][panel_index(i, t)] = wi;
4134 }
4135 }
4136
4137 return 0;
4138 }
4139
allocate_weight_var(DATASET * dset)4140 static int allocate_weight_var (DATASET *dset)
4141 {
4142 if (dataset_add_series(dset, 1)) {
4143 return E_ALLOC;
4144 }
4145
4146 strcpy(dset->varname[dset->v - 1], "unit_wt");
4147
4148 return 0;
4149 }
4150
max_coeff_diff(const MODEL * pmod,const double * bvec)4151 static double max_coeff_diff (const MODEL *pmod, const double *bvec)
4152 {
4153 double diff, maxdiff = 0.0;
4154 int i;
4155
4156 for (i=0; i<pmod->ncoeff; i++) {
4157 diff = fabs(pmod->coeff[i] - bvec[i]);
4158 if (diff > maxdiff) {
4159 maxdiff = diff;
4160 }
4161 }
4162
4163 return maxdiff;
4164 }
4165
4166 #define S2MINOBS 1
4167
4168 /* Wald test for groupwise heteroskedasticity, without assuming
4169 normality of the errors: see Greene, 4e, p. 598. Note that the
4170 computation involves a factor of (1/(T_i - 1)) so we cannot
4171 include groups that have only one observation.
4172 */
4173
4174 static double
wald_hetero_test(const MODEL * pmod,double s2,const double * uvar,panelmod_t * pan,int * df)4175 wald_hetero_test (const MODEL *pmod, double s2,
4176 const double *uvar, panelmod_t *pan,
4177 int *df)
4178 {
4179 double x, W = 0.0;
4180 int i, t, Ti;
4181
4182 *df = 0;
4183
4184 for (i=0; i<pan->nunits; i++) {
4185 double fii = 0.0;
4186
4187 Ti = pan->unit_obs[i];
4188 if (Ti < 2) {
4189 continue;
4190 }
4191
4192 for (t=0; t<pan->T; t++) {
4193 x = pmod->uhat[panel_index(i, t)];
4194 if (!na(x)) {
4195 x = x * x - uvar[i];
4196 fii += x * x;
4197 }
4198 }
4199
4200 if (fii <= 0) {
4201 W = NADBL;
4202 break;
4203 }
4204
4205 fii *= (1.0 / Ti) * (1.0 / (Ti - 1.0));
4206 x = uvar[i] - s2;
4207 W += x * x / fii;
4208 *df += 1;
4209 }
4210
4211 if (*df < 2) {
4212 W = NADBL;
4213 }
4214
4215 return W;
4216 }
4217
4218 /* Likelihood-ratio test for groupwise heteroskedasticity: see Greene,
4219 4e, pp. 597 and 599. This test requires that we're able to
4220 calculate the ML estimates (using iterated WLS).
4221 */
4222
4223 static int
ml_hetero_test(MODEL * pmod,double s2,const double * uvar,int nunits,const int * unit_obs)4224 ml_hetero_test (MODEL *pmod, double s2, const double *uvar,
4225 int nunits, const int *unit_obs)
4226 {
4227 ModelTest *test;
4228 double x2, s2h = 0.0;
4229 int i, Ti, df = 0;
4230 int err = 0;
4231
4232 for (i=0; i<nunits; i++) {
4233 Ti = unit_obs[i];
4234 if (Ti > 0) {
4235 s2h += Ti * log(uvar[i]);
4236 df++;
4237 }
4238 }
4239
4240 x2 = pmod->nobs * log(s2) - s2h;
4241 df--;
4242
4243 test = model_test_new(GRETL_TEST_GROUPWISE);
4244 if (test != NULL) {
4245 model_test_set_teststat(test, GRETL_STAT_LR);
4246 model_test_set_dfn(test, df);
4247 model_test_set_value(test, x2);
4248 model_test_set_pvalue(test, chisq_cdf_comp(df, x2));
4249 maybe_add_test_to_model(pmod, test);
4250 } else {
4251 err = 1;
4252 }
4253
4254 return err;
4255 }
4256
pooled_ll(const MODEL * pmod)4257 static double pooled_ll (const MODEL *pmod)
4258 {
4259 double n = pmod->nobs;
4260
4261 return -(n / 2.0) * (1.0 + LN_2_PI - log(n) + log(pmod->ess));
4262 }
4263
4264 /* calculate log-likelihood for panel MLE with groupwise
4265 heteroskedasticity
4266 */
4267
panel_ML_ll(MODEL * pmod,const double * uvar,int nunits,const int * unit_obs)4268 static void panel_ML_ll (MODEL *pmod, const double *uvar,
4269 int nunits, const int *unit_obs)
4270 {
4271 double ll = -(pmod->nobs / 2.0) * LN_2_PI;
4272 int i, Ti;
4273
4274 for (i=0; i<nunits; i++) {
4275 Ti = unit_obs[i];
4276 if (Ti > 0) {
4277 ll -= (Ti / 2.0) * (1.0 + log(uvar[i]));
4278 }
4279 }
4280
4281 pmod->lnL = ll;
4282 mle_criteria(pmod, 0);
4283 }
4284
4285 /* we can't estimate a group-specific variance based on just one
4286 observation */
4287
singleton_check(const int * unit_obs,int nunits)4288 static int singleton_check (const int *unit_obs, int nunits)
4289 {
4290 int i;
4291
4292 for (i=0; i<nunits; i++) {
4293 if (unit_obs[i] == 1) {
4294 return 1;
4295 }
4296 }
4297
4298 return 0;
4299 }
4300
4301 /* compute per-unit error variances */
4302
unit_error_variances(double * uvar,const MODEL * pmod,panelmod_t * pan,int * df)4303 static void unit_error_variances (double *uvar, const MODEL *pmod,
4304 panelmod_t *pan, int *df)
4305 {
4306 int i, t;
4307 double uit;
4308
4309 *df = 0;
4310
4311 for (i=0; i<pan->nunits; i++) {
4312 if (pan->unit_obs[i] < S2MINOBS) {
4313 uvar[i] = NADBL;
4314 } else {
4315 uvar[i] = 0.0;
4316 for (t=0; t<pan->T; t++) {
4317 uit = pmod->uhat[panel_index(i, t)];
4318 if (!na(uit)) {
4319 uvar[i] += uit * uit;
4320 }
4321 }
4322 uvar[i] /= pan->unit_obs[i];
4323 *df += 1;
4324 }
4325 }
4326 }
4327
print_unit_variances(panelmod_t * pan,double * uvar,int wald_test,PRN * prn)4328 static void print_unit_variances (panelmod_t *pan, double *uvar,
4329 int wald_test, PRN *prn)
4330 {
4331 int i, Ti;
4332
4333 pputs(prn, " unit variance\n");
4334
4335 for (i=0; i<pan->nunits; i++) {
4336 Ti = pan->unit_obs[i];
4337 if (na(uvar[i]) || (wald_test && Ti < 2)) {
4338 pprintf(prn, "%5d%12s (T = %d)\n", i+1, "NA", Ti);
4339 } else {
4340 pprintf(prn, "%5d%#12g (T = %d)\n", i+1, uvar[i], Ti);
4341 }
4342 }
4343 }
4344
4345 #define SMALLDIFF 0.0001
4346 #define WLS_MAX 30
4347
4348 /**
4349 * panel_wls_by_unit:
4350 * @list: regression list.
4351 * @dset: dataset struct.
4352 * @opt: may include %OPT_I (iterate to ML solution),
4353 * %OPT_V for verbose operation.
4354 * @prn: for printing details of iterations (or %NULL).
4355 *
4356 * Performs weighted least squares (possibly iterated) on
4357 * a panel model, allowing for groupwise heteroskedasticity.
4358 *
4359 * Returns: the estimates, in a %MODEL.
4360 */
4361
panel_wls_by_unit(const int * list,DATASET * dset,gretlopt opt,PRN * prn)4362 MODEL panel_wls_by_unit (const int *list, DATASET *dset,
4363 gretlopt opt, PRN *prn)
4364 {
4365 MODEL mdl;
4366 panelmod_t pan;
4367 gretlopt wlsopt = OPT_A | OPT_Z;
4368 double *uvar = NULL;
4369 double *bvec = NULL;
4370 double s2, diff = 1.0;
4371 int *wlist = NULL;
4372 int orig_v = dset->v;
4373 int i, iter = 0;
4374
4375 gretl_error_clear();
4376 panelmod_init(&pan);
4377
4378 if (opt & OPT_I) {
4379 /* iterating: no degrees-of-freedom correction in WLS */
4380 wlsopt |= OPT_N;
4381 }
4382
4383 /* baseline pooled model */
4384 mdl = lsq(list, dset, OLS, OPT_A);
4385 if (mdl.errcode) {
4386 goto bailout;
4387 }
4388
4389 mdl.errcode = panelmod_setup(&pan, &mdl, dset, 0, OPT_NONE);
4390 if (mdl.errcode) {
4391 goto bailout;
4392 }
4393
4394 uvar = malloc(pan.nunits * sizeof *uvar);
4395 if (uvar == NULL) {
4396 free(pan.unit_obs);
4397 mdl.errcode = E_ALLOC;
4398 return mdl;
4399 }
4400
4401 if (opt & OPT_I) {
4402 if (singleton_check(pan.unit_obs, pan.nunits)) {
4403 pprintf(prn, _("Can't produce ML estimates: "
4404 "some units have only one observation"));
4405 pputc(prn, '\n');
4406 opt ^= OPT_I;
4407 }
4408 }
4409
4410 s2 = mdl.ess / mdl.nobs;
4411
4412 if ((opt & OPT_V) && (opt & OPT_I)) {
4413 pprintf(prn, "\nOLS error variance = %g\n", s2);
4414 pprintf(prn, "log-likelihood = %g\n", pooled_ll(&mdl));
4415 }
4416
4417 if (allocate_weight_var(dset)) {
4418 mdl.errcode = E_ALLOC;
4419 goto bailout;
4420 }
4421
4422 if (opt & OPT_I) {
4423 bvec = malloc(mdl.ncoeff * sizeof *bvec);
4424 if (bvec == NULL) {
4425 mdl.errcode = E_ALLOC;
4426 goto bailout;
4427 }
4428 }
4429
4430 /* allocate and construct WLS regression list */
4431
4432 wlist = gretl_list_new(mdl.list[0] + 1);
4433 if (wlist == NULL) {
4434 mdl.errcode = E_ALLOC;
4435 goto bailout;
4436 }
4437
4438 wlist[1] = dset->v - 1; /* weight variable: the last var added */
4439 for (i=2; i<=wlist[0]; i++) {
4440 wlist[i] = mdl.list[i-1];
4441 }
4442
4443 /* If wanted (and possible) iterate to ML solution; otherwise just do
4444 one-step FGLS estimation.
4445 */
4446
4447 while (diff > SMALLDIFF) {
4448 int df = 0;
4449
4450 iter++;
4451
4452 unit_error_variances(uvar, &mdl, &pan, &df);
4453
4454 if (opt & OPT_V) {
4455 if (opt & OPT_I) {
4456 pprintf(prn, "\n*** %s %d ***\n", _("iteration"),
4457 iter);
4458 } else {
4459 pputc(prn, '\n');
4460 }
4461 print_unit_variances(&pan, uvar, 0, prn);
4462 }
4463
4464 write_weights_to_dataset(uvar, pan.nunits, pan.T, dset);
4465
4466 if (opt & OPT_I) {
4467 /* save coefficients for comparison */
4468 for (i=0; i<mdl.ncoeff; i++) {
4469 bvec[i] = mdl.coeff[i];
4470 }
4471 }
4472
4473 clear_model(&mdl);
4474 mdl = lsq(wlist, dset, WLS, wlsopt);
4475 if (mdl.errcode) {
4476 break;
4477 }
4478
4479 if (iter > WLS_MAX) {
4480 mdl.errcode = E_NOCONV;
4481 break;
4482 }
4483
4484 if (opt & OPT_I) {
4485 diff = max_coeff_diff(&mdl, bvec);
4486 if ((opt & OPT_V) && iter == 1) {
4487 pprintf(prn, "\nFGLS pooled error variance = %g\n",
4488 mdl.ess / mdl.nobs);
4489 }
4490 } else {
4491 /* one-step FGLS */
4492 break;
4493 }
4494 }
4495
4496 if (!mdl.errcode) {
4497 mdl.ci = PANEL;
4498 if (!(opt & OPT_A)) {
4499 set_model_id(&mdl, pan.opt);
4500 }
4501 gretl_model_set_int(&mdl, "n_included_units", pan.effn);
4502 mdl.opt |= OPT_H; /* --unit-weights */
4503 mdl.nwt = 0;
4504
4505 if (opt & OPT_I) {
4506 int df = 0;
4507
4508 gretl_model_set_int(&mdl, "iters", iter);
4509 ml_hetero_test(&mdl, s2, uvar, pan.nunits, pan.unit_obs);
4510 unit_error_variances(uvar, &mdl, &pan, &df);
4511 panel_ML_ll(&mdl, uvar, pan.nunits, pan.unit_obs);
4512 if (opt & OPT_V) {
4513 pputc(prn, '\n');
4514 }
4515 }
4516 }
4517
4518 bailout:
4519
4520 free(pan.unit_obs);
4521 free(uvar);
4522 free(wlist);
4523 free(bvec);
4524
4525 if (!(opt & OPT_V)) {
4526 dataset_drop_last_variables(dset, dset->v - orig_v);
4527 }
4528
4529 return mdl;
4530 }
4531
print_wald_test(double W,int df,double pval,panelmod_t * pan,double * uvar,double s2,PRN * prn)4532 static void print_wald_test (double W, int df, double pval,
4533 panelmod_t *pan, double *uvar,
4534 double s2, PRN *prn)
4535 {
4536 pprintf(prn, "\n%s:\n",
4537 _("Distribution free Wald test for heteroskedasticity"));
4538 pprintf(prn, " %s(%d) = %g, ", _("Chi-square"), df, W);
4539 pprintf(prn, "%s = %g\n\n", _("with p-value"), pval);
4540
4541 if (pan->nunits <= 30) {
4542 pprintf(prn, "%s = %g\n\n", _("Pooled error variance"), s2);
4543 print_unit_variances(pan, uvar, 1, prn);
4544 }
4545 }
4546
4547 /**
4548 * groupwise_hetero_test:
4549 * @pmod: panel model to be tested.
4550 * @dset: information on the (panel) data set.
4551 * @opt: may contain OPT_S to attach the test result
4552 * to @pmod, OPT_I for silent operation.
4553 * @prn: for printing details of iterations (or %NULL).
4554 *
4555 * Performs a Wald test for the null hypothesis that the
4556 * error variance is uniform across the units.
4557 *
4558 * Returns: 0 on success, non-zero error code on failure.
4559 */
4560
groupwise_hetero_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)4561 int groupwise_hetero_test (MODEL *pmod, DATASET *dset,
4562 gretlopt opt, PRN *prn)
4563 {
4564 panelmod_t pan;
4565 double *uvar = NULL;
4566 double s2, W = NADBL;
4567 int df, err = 0;
4568
4569 if (pmod->ci == OLS || (pmod->ci == PANEL && (pmod->opt & OPT_F))) {
4570 ; /* OK for pooled or fixed effects */
4571 } else if (0 && pmod->ci == PANEL && (pmod->opt & OPT_U)) {
4572 ; /* just testing! */
4573 } else {
4574 return E_NOTIMP;
4575 }
4576
4577 gretl_error_clear();
4578 panelmod_init(&pan);
4579
4580 err = panelmod_setup(&pan, pmod, dset, 0, OPT_NONE);
4581 if (err) {
4582 return err;
4583 }
4584
4585 uvar = malloc(pan.nunits * sizeof *uvar);
4586 if (uvar == NULL) {
4587 free(pan.unit_obs);
4588 return E_ALLOC;
4589 }
4590
4591 s2 = pmod->ess / pmod->nobs;
4592
4593 unit_error_variances(uvar, pmod, &pan, &df);
4594
4595 if (df >= 2) {
4596 W = wald_hetero_test(pmod, s2, uvar, &pan, &df);
4597 }
4598
4599 if (na(W)) {
4600 err = E_DATA;
4601 } else {
4602 double pval = chisq_cdf_comp(df, W);
4603
4604 if (!(opt & OPT_I)) {
4605 print_wald_test(W, df, pval, &pan, uvar, s2, prn);
4606 }
4607
4608 if (opt & OPT_S) {
4609 ModelTest *test = model_test_new(GRETL_TEST_GROUPWISE);
4610
4611 if (test != NULL) {
4612 model_test_set_teststat(test, GRETL_STAT_WALD_CHISQ);
4613 model_test_set_dfn(test, df);
4614 model_test_set_value(test, W);
4615 model_test_set_pvalue(test, pval);
4616 maybe_add_test_to_model(pmod, test);
4617 }
4618 }
4619
4620 record_test_result(W, pval);
4621 }
4622
4623 free(pan.unit_obs);
4624 free(uvar);
4625
4626 return err;
4627 }
4628
print_ar_aux_model(MODEL * pmod,DATASET * dset,int j,PRN * prn)4629 static int print_ar_aux_model (MODEL *pmod, DATASET *dset,
4630 int j, PRN *prn)
4631 {
4632 const char *heads[] = {
4633 N_("First differenced equation (dependent, d_y)"),
4634 N_("Autoregression of residuals (dependent, uhat)"),
4635 N_("Auxiliary regression including lagged residual"),
4636 };
4637 gretl_matrix *cse;
4638 gretl_array *S;
4639 int i, vi, err = 0;
4640
4641 cse = gretl_matrix_alloc(pmod->ncoeff, 2);
4642 S = gretl_array_new(GRETL_TYPE_STRINGS, pmod->ncoeff, &err);
4643
4644 if (cse == NULL || S == NULL) {
4645 return E_ALLOC;
4646 }
4647
4648 for (i=0; i<pmod->ncoeff; i++) {
4649 gretl_matrix_set(cse, i, 0, pmod->coeff[i]);
4650 gretl_matrix_set(cse, i, 1, pmod->sderr[i]);
4651 vi = pmod->list[i+2];
4652 gretl_array_set_string(S, i, dset->varname[vi], 0);
4653 }
4654
4655 if (j >= 0) {
4656 pprintf(prn, "%s:\n", _(heads[j]));
4657 }
4658 print_model_from_matrices(cse, NULL, S, pmod->dfd, OPT_NONE, prn);
4659 pprintf(prn, " n = %d, R-squared = %.4f\n\n", pmod->nobs, pmod->rsq);
4660
4661 gretl_matrix_free(cse);
4662 gretl_array_nullify_elements(S);
4663 gretl_array_destroy(S);
4664
4665 return 0;
4666 }
4667
finalize_autocorr_test(MODEL * pmod,ModelTest * test,gretlopt opt,PRN * prn)4668 static void finalize_autocorr_test (MODEL *pmod, ModelTest *test,
4669 gretlopt opt, PRN *prn)
4670 {
4671 if (!(opt & OPT_I)) {
4672 if (opt & OPT_Q) {
4673 pputc(prn, '\n');
4674 }
4675 gretl_model_test_print_direct(test, 1, prn);
4676 }
4677 if (opt & OPT_S) {
4678 maybe_add_test_to_model(pmod, test);
4679 } else {
4680 free(test);
4681 }
4682 }
4683
4684 /* See Wooldridge's Econometric Analysis of Cross Section
4685 and Panel Data (MIT Press, 2002), pages 176-7. Test
4686 for first order autocorrelation in the context of pooled OLS.
4687 */
4688
pooled_autocorr_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)4689 static int pooled_autocorr_test (MODEL *pmod, DATASET *dset,
4690 gretlopt opt, PRN *prn)
4691 {
4692 int quiet = (opt & OPT_Q);
4693 int *ulist = NULL;
4694 int i, vi = 0;
4695 int err = 0;
4696
4697 if (pmod->full_n != dset->n) {
4698 return E_DATA;
4699 }
4700
4701 err = dataset_add_NA_series(dset, 1);
4702 if (!err) {
4703 vi = dset->v - 1;
4704 strcpy(dset->varname[vi], "uhat");
4705 for (i=0; i<dset->n; i++) {
4706 dset->Z[vi][i] = pmod->uhat[i];
4707 }
4708 }
4709
4710 if (!err) {
4711 ulist = gretl_list_new(pmod->ncoeff + 2);
4712 if (ulist == NULL) {
4713 err = E_ALLOC;
4714 } else {
4715 for (i=1; i<=pmod->list[0]; i++) {
4716 ulist[i] = pmod->list[i];
4717 }
4718 /* append lagged residual to list */
4719 ulist[i] = laggenr(vi, 1, dset);
4720 if (ulist[i] < 0) {
4721 err = E_DATA;
4722 } else {
4723 strcpy(dset->varname[ulist[i]], "uhat(-1)");
4724 }
4725 }
4726 }
4727
4728 if (!err) {
4729 gretlopt auxopt = OPT_P | OPT_R | OPT_Q;
4730 MODEL tmp;
4731
4732 /* estimate model including lagged residual */
4733 tmp = real_panel_model(ulist, dset, auxopt, NULL);
4734 err = tmp.errcode;
4735 if (!err && !quiet) {
4736 if (gretl_echo_on()) {
4737 pputc(prn, '\n');
4738 }
4739 print_ar_aux_model(&tmp, dset, 2, prn);
4740 }
4741 if (!err) {
4742 double tstat, pval;
4743 int df = tmp.dfd;
4744
4745 i = tmp.ncoeff - 1;
4746 tstat = tmp.coeff[i] / tmp.sderr[i];
4747 pval = student_pvalue_2(df, tstat);
4748 record_test_result(tstat, pval);
4749
4750 if ((opt & OPT_S) || !(opt & OPT_I)) {
4751 /* saving the test onto @pmod, or not silent */
4752 ModelTest *test = model_test_new(GRETL_TEST_PANEL_AR);
4753
4754 if (test != NULL) {
4755 model_test_set_teststat(test, GRETL_STAT_STUDENT);
4756 model_test_set_value(test, tstat);
4757 model_test_set_dfn(test, df);
4758 model_test_set_pvalue(test, pval);
4759 finalize_autocorr_test(pmod, test, opt, prn);
4760 }
4761 }
4762 }
4763 clear_model(&tmp);
4764 }
4765
4766 free(ulist);
4767
4768 return err;
4769 }
4770
4771 /* See Wooldridge's Econometric Analysis of Cross Section
4772 and Panel Data (MIT Press, 2002), pages 282-3. Test
4773 for first order autocorrelation based on the residuals
4774 from the first-differenced model.
4775 */
4776
wooldridge_autocorr_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)4777 static int wooldridge_autocorr_test (MODEL *pmod, DATASET *dset,
4778 gretlopt opt, PRN *prn)
4779 {
4780 MODEL tmp;
4781 gretlopt tmp_opt;
4782 int quiet = (opt & OPT_Q);
4783 int *dlist = NULL;
4784 int i, j, vi;
4785 int clearit = 0;
4786 int err = 0;
4787
4788 dlist = gretl_list_new(pmod->ncoeff + 1);
4789 if (dlist == NULL) {
4790 return E_ALLOC;
4791 }
4792
4793 dlist[0] = 0;
4794 for (i=1, j=1; i<=pmod->list[0] && !err; i++) {
4795 vi = pmod->list[i];
4796 if (vi != 0 && !gretl_isconst(dset->t1, dset->t2, dset->Z[vi])) {
4797 dlist[j] = diffgenr(vi, DIFF, dset);
4798 if (dlist[j] < 0) {
4799 err = E_DATA;
4800 } else {
4801 j++;
4802 dlist[0] += 1;
4803 }
4804 } else if (i == 1) {
4805 gretl_errmsg_set("The dependent variable is constant");
4806 err = E_DATA;
4807 }
4808 }
4809
4810 /* aux panel models: pooled, robust, quiet */
4811 tmp_opt = OPT_P | OPT_R | OPT_Q;
4812
4813 if (!err) {
4814 /* estimate model in first-differenced form */
4815 tmp = real_panel_model(dlist, dset, tmp_opt, NULL);
4816 err = tmp.errcode;
4817 if (!err && !quiet) {
4818 if (gretl_echo_on()) {
4819 pputc(prn, '\n');
4820 }
4821 print_ar_aux_model(&tmp, dset, 0, prn);
4822 }
4823 }
4824
4825 if (!err) {
4826 err = dataset_add_allocated_series(dset, tmp.uhat);
4827 tmp.uhat = NULL;
4828 clear_model(&tmp);
4829 }
4830
4831 if (!err) {
4832 vi = dset->v - 1; /* the last series added */
4833 strcpy(dset->varname[vi], "uhat");
4834 dlist[0] = 2;
4835 dlist[1] = vi;
4836 dlist[2] = laggenr(vi, 1, dset);
4837 if (dlist[2] < 0) {
4838 err = E_DATA;
4839 } else {
4840 /* regress residual on its first lag */
4841 tmp = real_panel_model(dlist, dset, tmp_opt, NULL);
4842 err = tmp.errcode;
4843 if (!err && !quiet) {
4844 strcpy(dset->varname[dlist[2]], "uhat(-1)");
4845 print_ar_aux_model(&tmp, dset, 1, prn);
4846 }
4847 clearit = 1;
4848 }
4849 }
4850
4851 if (!err) {
4852 double c, s, F, pval;
4853
4854 c = tmp.coeff[0] + 0.5;
4855 s = tmp.sderr[0];
4856 F = c * c / (s * s);
4857 pval = snedecor_cdf_comp(1, tmp.dfd, F);
4858 record_test_result(F, pval);
4859
4860 if ((opt & OPT_S) || !(opt & OPT_I)) {
4861 /* saving the test onto @pmod, or not silent */
4862 ModelTest *test = model_test_new(GRETL_TEST_PANEL_AR);
4863
4864 if (test != NULL) {
4865 model_test_set_teststat(test, GRETL_STAT_F);
4866 model_test_set_value(test, F);
4867 model_test_set_dfn(test, 1);
4868 model_test_set_dfd(test, tmp.dfd);
4869 model_test_set_pvalue(test, pval);
4870 finalize_autocorr_test(pmod, test, opt, prn);
4871 }
4872 }
4873 }
4874
4875 if (clearit) {
4876 clear_model(&tmp);
4877 }
4878 free(dlist);
4879
4880 return err;
4881 }
4882
panel_autocorr_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)4883 int panel_autocorr_test (MODEL *pmod, DATASET *dset,
4884 gretlopt opt, PRN *prn)
4885 {
4886 int save_pcse = libset_get_bool(USE_PCSE);
4887 int orig_v = dset->v;
4888 int err;
4889
4890 libset_set_bool(USE_PCSE, 0);
4891
4892 if (pmod->ci == OLS || (pmod->opt & OPT_P)) {
4893 err = pooled_autocorr_test(pmod, dset, opt, prn);
4894 } else {
4895 err = wooldridge_autocorr_test(pmod, dset, opt, prn);
4896 }
4897
4898 libset_set_bool(USE_PCSE, save_pcse);
4899 dataset_drop_last_variables(dset, dset->v - orig_v);
4900
4901 return err;
4902 }
4903
4904 /* test for cross-sectional dependence */
4905
panel_xdepend_test(MODEL * pmod,DATASET * dset,gretlopt opt,PRN * prn)4906 int panel_xdepend_test (MODEL *pmod, DATASET *dset,
4907 gretlopt opt, PRN *prn)
4908 {
4909 const double *u;
4910 double rij, rsum = 0.0;
4911 double arsum = 0.0;
4912 double ssx, ssy, sxy;
4913 double xbar, ybar;
4914 int N1, N2, T, Tij;
4915 int i, j, t, si, sj;
4916 int N = 0, Nr = 0;
4917 int err = 0;
4918
4919 if (dset->structure != STACKED_TIME_SERIES) {
4920 return E_PDWRONG;
4921 } else if (pmod->uhat == NULL) {
4922 return E_DATA;
4923 }
4924
4925 T = dset->pd;
4926 N1 = pmod->t1 / T;
4927 N2 = pmod->t2 / T;
4928 u = pmod->uhat;
4929
4930 for (i=N1; i<N2; i++) {
4931 int Nj = 0;
4932
4933 for (j=i+1; j<=N2; j++) {
4934 xbar = ybar = 0.0;
4935 Tij = 0;
4936 for (t=0; t<T; t++) {
4937 si = i * T + t;
4938 sj = j * T + t;
4939 if (!na(u[si]) && !na(u[sj])) {
4940 Tij++;
4941 xbar += u[si];
4942 ybar += u[sj];
4943 }
4944 }
4945 if (Tij >= 2) {
4946 ssx = ssy = sxy = 0.0;
4947 xbar /= Tij;
4948 ybar /= Tij;
4949 for (t=0; t<T; t++) {
4950 si = i * T + t;
4951 sj = j * T + t;
4952 if (!na(u[si]) && !na(u[sj])) {
4953 ssx += (u[si] - xbar) * (u[si] - xbar);
4954 ssy += (u[sj] - ybar) * (u[sj] - ybar);
4955 sxy += (u[si] - xbar) * (u[sj] - ybar);
4956 }
4957 }
4958 rij = sxy / sqrt(ssx * ssy);
4959 rsum += sqrt(Tij) * rij;
4960 arsum += fabs(rij);
4961 Nr++;
4962 Nj++;
4963 }
4964 }
4965 if (Nj > 0) {
4966 N++;
4967 }
4968 }
4969
4970 if (N == 0) {
4971 err = E_TOOFEW;
4972 }
4973
4974 if (!err) {
4975 double CD, pval;
4976
4977 N = N + 1;
4978 CD = sqrt(2.0 / (N * (N - 1.0))) * rsum;
4979 pval = normal_pvalue_2(CD);
4980
4981 if (!(opt & OPT_I)) {
4982 pputs(prn, _("Pesaran CD test for cross-sectional dependence"));
4983 pprintf(prn, "\n%s: z = %f,\n", _("Test statistic"), CD);
4984 pprintf(prn, "%s = P(|z| > %g) = %.3g\n", _("with p-value"),
4985 CD, pval);
4986 pprintf(prn, _("Average absolute correlation = %.3f"), arsum / Nr);
4987 pputc(prn, '\n');
4988 }
4989
4990 if (opt & OPT_S) {
4991 ModelTest *test = model_test_new(GRETL_TEST_XDEPEND);
4992
4993 if (test != NULL) {
4994 model_test_set_teststat(test, GRETL_STAT_Z);
4995 model_test_set_value(test, CD);
4996 model_test_set_pvalue(test, pval);
4997 maybe_add_test_to_model(pmod, test);
4998 }
4999 }
5000
5001 record_test_result(CD, pval);
5002 }
5003
5004 return err;
5005 }
5006
5007 /**
5008 * switch_panel_orientation:
5009 * @dset: pointer to dataset struct.
5010 *
5011 * Reorganizes @dset, transforming from stacked cross sections to
5012 * stacked time series or vice versa. If the transformation is from
5013 * stacked time series to stacked cross section, the dataset will
5014 * no longer be acceptable as a panel for gretl's purposes; it
5015 * may be useful for export purposes, though.
5016 *
5017 * Returns: 0 on successful completion, non-zero on error.
5018 */
5019
switch_panel_orientation(DATASET * dset)5020 int switch_panel_orientation (DATASET *dset)
5021 {
5022 int oldmode = dset->structure;
5023 double *tmp;
5024 char **markers = NULL;
5025 double pdx;
5026 int T, n;
5027 int i, j, s, t;
5028
5029 if (dset->Z == NULL) {
5030 return E_NODATA;
5031 } else if (oldmode != STACKED_TIME_SERIES &&
5032 oldmode != STACKED_CROSS_SECTION) {
5033 return E_DATA;
5034 }
5035
5036 tmp = malloc(dset->n * sizeof *tmp);
5037 if (tmp == NULL) {
5038 return E_ALLOC;
5039 }
5040
5041 if (oldmode == STACKED_CROSS_SECTION) {
5042 n = dset->pd;
5043 T = dset->n / n;
5044 } else {
5045 T = dset->pd;
5046 n = dset->n / T;
5047 }
5048
5049 /* copy the data series across in transformed order */
5050 for (i=1; i<dset->v; i++) {
5051 for (t=0; t<dset->n; t++) {
5052 /* transcribe to tmp in original order */
5053 tmp[t] = dset->Z[i][t];
5054 }
5055 s = 0;
5056 if (oldmode == STACKED_CROSS_SECTION) {
5057 /* convert to stacked time-series */
5058 for (j=0; j<n; j++) {
5059 for (t=0; t<T; t++) {
5060 dset->Z[i][s++] = tmp[t * n + j];
5061 }
5062 }
5063 } else {
5064 /* convert to stacked cross-sections */
5065 for (t=0; t<T; t++) {
5066 for (j=0; j<n; j++) {
5067 dset->Z[i][s++] = tmp[j * T + t];
5068 }
5069 }
5070 }
5071 }
5072
5073 /* rearrange observations markers if relevant */
5074 if (dset->S != NULL) {
5075 markers = strings_array_new_with_length(dset->n, OBSLEN);
5076 if (markers != NULL) {
5077 for (t=0; t<dset->n; t++) {
5078 strcpy(markers[t], dset->S[t]);
5079 }
5080 s = 0;
5081 if (oldmode == STACKED_CROSS_SECTION) {
5082 for (j=0; j<n; j++) {
5083 for (t=0; t<T; t++) {
5084 strcpy(dset->S[s++], markers[t * n + j]);
5085 }
5086 }
5087 } else {
5088 for (t=0; t<T; t++) {
5089 for (j=0; j<n; j++) {
5090 strcpy(dset->S[s++], markers[j * T + t]);
5091 }
5092 }
5093 }
5094 strings_array_free(markers, dset->n);
5095 } else {
5096 /* should we flag an error? */
5097 dataset_destroy_obs_markers(dset);
5098 }
5099 }
5100
5101 dset->sd0 = 1.0;
5102 pdx = 0.1;
5103
5104 /* change the datainfo setup */
5105 if (oldmode == STACKED_CROSS_SECTION) {
5106 dset->structure = STACKED_TIME_SERIES;
5107 dset->pd = T;
5108 while (T /= 10) {
5109 pdx *= 0.1;
5110 }
5111 } else {
5112 dset->structure = STACKED_CROSS_SECTION;
5113 dset->pd = n;
5114 while (n /= 10) {
5115 pdx *= 0.1;
5116 }
5117 }
5118
5119 dset->sd0 += pdx;
5120 ntolabel(dset->stobs, 0, dset);
5121 ntolabel(dset->endobs, dset->n - 1, dset);
5122
5123 free(tmp);
5124
5125 return 0;
5126 }
5127
5128 /**
5129 * panel_isconst:
5130 * @t1: starting observation.
5131 * @t2: ending observation.
5132 * @pd: panel time-series length.
5133 * @x: data series to examine.
5134 * @bygroup: use 1 to check for constancy across groups,
5135 * 0 for constancy across time.
5136 *
5137 * Check whether series @x is constant (either over time or
5138 * across groups) in a panel dataset. Missing values are
5139 * ignored.
5140 *
5141 * Returns: 1 if the variable is constant, otherwise 0.
5142 */
5143
panel_isconst(int t1,int t2,int pd,const double * x,int bygroup)5144 int panel_isconst (int t1, int t2, int pd, const double *x,
5145 int bygroup)
5146 {
5147 double x0 = NADBL;
5148 int t, ret = 1;
5149
5150 if (bygroup) {
5151 /* check for variation across groups */
5152 int tref;
5153
5154 for (t=t1; t<=t2 && ret; t++) {
5155 if (na(x[t])) {
5156 continue;
5157 }
5158 tref = t - pd;
5159 while (tref >= t1) {
5160 /* check last obs for the same period */
5161 x0 = x[t-pd];
5162 if (na(x0)) {
5163 tref -= pd;
5164 } else {
5165 if (x[t] != x0) {
5166 ret = 0;
5167 }
5168 break;
5169 }
5170 }
5171 }
5172 } else {
5173 /* check for time-variation */
5174 int u, ubak = -1;
5175
5176 for (t=t1; t<=t2 && ret; t++) {
5177 if (na(x[t])) {
5178 continue;
5179 }
5180 u = t / pd;
5181 if (u == ubak) {
5182 /* same group */
5183 if (!na(x0) && x[t] != x0) {
5184 ret = 0;
5185 }
5186 } else {
5187 /* starting a new group */
5188 x0 = x[t];
5189 ubak = u;
5190 }
5191 }
5192 }
5193
5194 return ret;
5195 }
5196
varying_vars_list(const DATASET * dset,panelmod_t * pan)5197 static int varying_vars_list (const DATASET *dset, panelmod_t *pan)
5198 {
5199 int i, j, k, t, bigt;
5200 int err = 0;
5201
5202 pan->vlist = gretl_list_new(pan->pooled->list[0]);
5203 if (pan->vlist == NULL) {
5204 return E_ALLOC;
5205 }
5206
5207 pan->vlist[0] = 1;
5208 pan->vlist[1] = pan->pooled->list[1];
5209
5210 k = 2;
5211
5212 for (j=2; j<=pan->pooled->list[0]; j++) {
5213 int vj = pan->pooled->list[j];
5214 int varies = 0;
5215
5216 if (vj == 0) {
5217 pan->vlist[k++] = 0;
5218 pan->vlist[0] += 1;
5219 continue;
5220 }
5221
5222 for (i=0; i<pan->nunits && !varies; i++) {
5223 int started = 0;
5224 double xval = NADBL;
5225
5226 if (pan->unit_obs[i] == 0) {
5227 continue;
5228 }
5229
5230 for (t=0; t<pan->T && !varies; t++) {
5231 bigt = panel_index(i, t);
5232 if (panel_missing(pan, bigt)) {
5233 continue;
5234 }
5235 if (!started) {
5236 xval = dset->Z[vj][bigt];
5237 started = 1;
5238 } else if (dset->Z[vj][bigt] != xval) {
5239 varies = 1;
5240 }
5241 }
5242 }
5243
5244 if (varies) {
5245 pan->vlist[k++] = vj;
5246 pan->vlist[0] += 1;
5247 } else {
5248 fprintf(stderr, "Variable %d '%s' is time-invariant\n",
5249 vj, dset->varname[vj]);
5250 }
5251 }
5252
5253 #if PDEBUG
5254 printlist(pan->pooled->list, "original regressors");
5255 printlist(pan->vlist, "time-varying regressors");
5256 #endif
5257
5258 return err;
5259 }
5260
5261 /* apparatus for setting panel structure by reading two variables,
5262 indexing the cross-sectional unit and the time period
5263 respectively
5264 */
5265
5266 typedef struct s_point_t s_point;
5267 typedef struct sorter_t sorter;
5268
5269 struct s_point_t {
5270 int obsnum;
5271 double val1;
5272 double val2;
5273 };
5274
5275 struct sorter_t {
5276 int sortvar1;
5277 int sortvar2;
5278 s_point *points;
5279 };
5280
sorter_fill_points(sorter * s,const double ** Z,int n)5281 static void sorter_fill_points (sorter *s, const double **Z,
5282 int n)
5283 {
5284 int t;
5285
5286 for (t=0; t<n; t++) {
5287 s->points[t].obsnum = t;
5288 s->points[t].val1 = Z[s->sortvar1][t];
5289 s->points[t].val2 = Z[s->sortvar2][t];
5290 }
5291 }
5292
compare_obs(const void * a,const void * b)5293 static int compare_obs (const void *a, const void *b)
5294 {
5295 s_point *pa = (s_point *) a;
5296 s_point *pb = (s_point *) b;
5297 int ret;
5298
5299 ret = (pa->val1 > pb->val1) - (pa->val1 < pb->val1);
5300 if (ret == 0) {
5301 ret = (pa->val2 > pb->val2) - (pa->val2 < pb->val2);
5302 }
5303
5304 if (ret == 0) {
5305 /* mark an error: got a duplicated value */
5306 pa->obsnum = pb->obsnum = -1;
5307 }
5308
5309 return ret;
5310 }
5311
check_indices(sorter * s,int n)5312 static int check_indices (sorter *s, int n)
5313 {
5314 int i;
5315
5316 for (i=0; i<n; i++) {
5317 if (s->points[i].obsnum < 0) {
5318 gretl_errmsg_sprintf(_("Error: unit %g, period %g: duplicated observation"),
5319 s->points[i].val1, s->points[i].val2);
5320 return E_DATA;
5321 }
5322 }
5323
5324 return 0;
5325 }
5326
panel_is_sorted(DATASET * dset,int uv,int tv)5327 static int panel_is_sorted (DATASET *dset, int uv, int tv)
5328 {
5329 const double *unit = dset->Z[uv];
5330 const double *period = dset->Z[tv];
5331 int t, ret = 1;
5332
5333 for (t=1; t<dset->n && ret; t++) {
5334 if (unit[t] < unit[t-1]) {
5335 /* the unit variable must be non-decreasing */
5336 ret = 0;
5337 } else if (unit[t] == unit[t-1] && period[t] <= period[t-1]) {
5338 /* the period variable must be increasing within the
5339 observations for each unit */
5340 ret = 0;
5341 }
5342 }
5343
5344 return ret;
5345 }
5346
panel_data_sort_by(DATASET * dset,int uv,int tv,int * ustrs)5347 static int panel_data_sort_by (DATASET *dset, int uv, int tv, int *ustrs)
5348 {
5349 int n = dset->n;
5350 char **S = NULL;
5351 double *tmp = NULL;
5352 int i, t;
5353 sorter s;
5354 int err = 0;
5355
5356 tmp = malloc(n * sizeof *tmp);
5357 if (tmp == NULL) {
5358 return E_ALLOC;
5359 }
5360
5361 s.points = malloc(n * sizeof *s.points);
5362 if (s.points == NULL) {
5363 free(tmp);
5364 return E_ALLOC;
5365 }
5366
5367 if (dset->S != NULL) {
5368 S = strings_array_new_with_length(n, OBSLEN);
5369 if (S == NULL) {
5370 free(tmp);
5371 free(s.points);
5372 return E_ALLOC;
5373 }
5374 }
5375
5376 s.sortvar1 = uv;
5377 s.sortvar2 = tv;
5378
5379 sorter_fill_points(&s, (const double **) dset->Z, n);
5380
5381 qsort((void *) s.points, (size_t) n, sizeof s.points[0],
5382 compare_obs);
5383
5384 err = check_indices(&s, n);
5385 if (err) {
5386 goto bailout;
5387 }
5388
5389 for (i=1; i<dset->v; i++) {
5390 for (t=0; t<n; t++) {
5391 tmp[t] = dset->Z[i][t];
5392 }
5393 for (t=0; t<n; t++) {
5394 dset->Z[i][t] = tmp[s.points[t].obsnum];
5395 }
5396 }
5397
5398 if (S != NULL) {
5399 *ustrs = 1;
5400 for (t=0; t<n; t++) {
5401 strcpy(S[t], dset->S[t]);
5402 if (S[t][0] == '\0') {
5403 *ustrs = 0;
5404 }
5405 }
5406 for (t=0; t<n; t++) {
5407 strcpy(dset->S[t], S[s.points[t].obsnum]);
5408 }
5409 for (t=1; t<n && *ustrs; t++) {
5410 if (dset->Z[uv][t] == dset->Z[uv][t-1] &&
5411 strcmp(dset->S[t], dset->S[t-1])) {
5412 *ustrs = 0;
5413 }
5414 }
5415 strings_array_free(S, n);
5416 }
5417
5418 bailout:
5419
5420 free(s.points);
5421 free(tmp);
5422
5423 return err;
5424 }
5425
5426 /* Given the variables coding for panel unit and time period,
5427 construct parallel arrays of int that code for the same
5428 information, but are zero-based and consecutive.
5429 */
5430
normalize_uid_tid(const double * tid,int T,const DATASET * dset,int uv,int tv,int ** pnuid,int ** pntid)5431 static int normalize_uid_tid (const double *tid, int T,
5432 const DATASET *dset,
5433 int uv, int tv,
5434 int **pnuid, int **pntid)
5435 {
5436 int *nuid = NULL;
5437 int *ntid = NULL;
5438 int ui, tmin;
5439 int i, t;
5440
5441 nuid = malloc(dset->n * sizeof *nuid);
5442 ntid = malloc(dset->n * sizeof *ntid);
5443 if (nuid == NULL || ntid == NULL) {
5444 free(nuid);
5445 free(ntid);
5446 return E_ALLOC;
5447 }
5448
5449 ui = tmin = 0;
5450
5451 for (i=0; i<dset->n; i++) {
5452 if (i > 0 && dset->Z[uv][i] > dset->Z[uv][i-1]) {
5453 tmin = 0;
5454 ui++;
5455 } else if (i > 0) {
5456 tmin++;
5457 }
5458 nuid[i] = ui;
5459 for (t=tmin; t<T; t++) {
5460 if (dset->Z[tv][i] == tid[t]) {
5461 ntid[i] = t;
5462 break;
5463 }
5464 }
5465 }
5466
5467 *pnuid = nuid;
5468 *pntid = ntid;
5469
5470 return 0;
5471 }
5472
5473 /* Handle the case where a sub-sampled panel dataset has been padded
5474 with rows containing NAs to recreate a (nominally) balanced panel
5475 structure. We have to get rid of the padding before we try
5476 restoring the full data range. Note that while this function
5477 "shrinks" @dset in the sense of reducing the series length as
5478 recorded in dset->n, it does not "physically" shrink the array
5479 members of dset->Z.
5480 */
5481
undo_panel_padding(DATASET * dset)5482 int undo_panel_padding (DATASET *dset)
5483 {
5484 char *mask = dset->padmask;
5485 double *Zi;
5486 char **S = NULL;
5487 int padded_n = dset->n; /* current series length */
5488 int real_n = dset->n; /* target series length */
5489 int i, t;
5490 int err = 0;
5491
5492 for (t=0; t<dset->n; t++) {
5493 if (mask[t]) {
5494 real_n--;
5495 }
5496 }
5497
5498 fprintf(stderr, "undo_panel_padding: padded n*T = %d, original dset->n = %d\n",
5499 padded_n, real_n);
5500
5501 if (real_n == padded_n) {
5502 fprintf(stderr, "strange, couldn't find any padding!\n");
5503 return E_DATA;
5504 }
5505
5506 /* temporary holder for shorter series */
5507 Zi = malloc(real_n * sizeof *Zi);
5508
5509 if (Zi == NULL) {
5510 return E_ALLOC;
5511 }
5512
5513 if (dset->S != NULL) {
5514 /* holder for shorter obs labels array */
5515 S = strings_array_new_with_length(real_n, OBSLEN);
5516 }
5517
5518 if (!err) {
5519 /* write non-padding rows from dset->Z (and dset->S, if present)
5520 into the right places in the reduced-size arrays */
5521 int s;
5522
5523 for (i=0; i<dset->v; i++) {
5524 s = 0;
5525 for (t=0; t<padded_n; t++) {
5526 if (!mask[t]) {
5527 Zi[s] = dset->Z[i][t];
5528 if (i == 0 && S != NULL) {
5529 strcpy(S[s], dset->S[t]);
5530 }
5531 s++;
5532 }
5533 }
5534 /* copy data back to dset->Z */
5535 memcpy(dset->Z[i], Zi, real_n * sizeof *Zi);
5536 }
5537
5538 if (dset->S != NULL && S != NULL) {
5539 strings_array_free(dset->S, padded_n);
5540 dset->S = S;
5541 }
5542 }
5543
5544 free(Zi);
5545
5546 dset->n = real_n;
5547 dset->t2 = dset->n - 1;
5548 free(dset->padmask);
5549 dset->padmask = NULL;
5550
5551 return err;
5552 }
5553
pad_panel_dataset(const double * uid,int uv,int nunits,const double * tid,int tv,int nperiods,DATASET * dset,int ustrs,char * mask)5554 static int pad_panel_dataset (const double *uid, int uv, int nunits,
5555 const double *tid, int tv, int nperiods,
5556 DATASET *dset, int ustrs, char *mask)
5557 {
5558 double **bigZ = NULL;
5559 char **S = NULL;
5560 int *nuid = NULL;
5561 int *ntid = NULL;
5562 int big_n, n_orig = dset->n;
5563 int i, s, t;
5564 int err = 0;
5565
5566 big_n = nunits * nperiods;
5567
5568 fprintf(stderr, "pad_panel_dataset: n*T = %d*%d = %d but dset->n = %d\n",
5569 nunits, nperiods, big_n, n_orig);
5570
5571 err = normalize_uid_tid(tid, nperiods, dset, uv, tv,
5572 &nuid, &ntid);
5573 if (err) {
5574 return err;
5575 }
5576
5577 bigZ = doubles_array_new(dset->v, big_n);
5578 if (bigZ == NULL) {
5579 err = E_ALLOC;
5580 } else {
5581 dset->n = big_n;
5582 dset->t2 = dset->n - 1;
5583 for (i=0; i<dset->v; i++) {
5584 for (t=0; t<big_n; t++) {
5585 bigZ[i][t] = (i == 0)? 1.0 : NADBL;
5586 }
5587 }
5588 }
5589
5590 if (!err && dset->S != NULL && ustrs) {
5591 S = strings_array_new_with_length(dset->n, OBSLEN);
5592 }
5593
5594 if (!err) {
5595 int j, buv = 0, btv = 0;
5596
5597 if (mask != NULL) {
5598 for (t=0; t<big_n; t++) {
5599 mask[t] = 1;
5600 }
5601 }
5602
5603 /* write rows from original Z into the right places in bigZ */
5604 for (t=0; t<n_orig; t++) {
5605 s = nuid[t] * nperiods + ntid[t];
5606 for (i=1; i<dset->v; i++) {
5607 bigZ[i][s] = dset->Z[i][t];
5608 }
5609 if (S != NULL) {
5610 strcpy(S[s], dset->S[t]);
5611 }
5612 if (mask != NULL) {
5613 /* record (non-)padding in @mask */
5614 mask[s] = 0;
5615 }
5616 }
5617
5618 /* where are uv and tv in bigZ? */
5619 j = 1;
5620 for (i=1; i<dset->v; i++) {
5621 if (i == uv) {
5622 buv = j;
5623 } else if (i == tv) {
5624 btv = j;
5625 }
5626 j++;
5627 }
5628
5629 /* complete the index info in slots uv and tv */
5630 i = j = 0;
5631 for (t=0; t<dset->n; t++) {
5632 if (t > 0 && t % nperiods == 0) {
5633 i++;
5634 j = 0;
5635 }
5636 bigZ[buv][t] = uid[i];
5637 bigZ[btv][t] = tid[j++];
5638 }
5639
5640 /* swap the padded arrays into Z */
5641 for (i=0; i<dset->v; i++) {
5642 free(dset->Z[i]);
5643 dset->Z[i] = bigZ[i];
5644 }
5645
5646 free(bigZ);
5647 }
5648
5649 free(nuid);
5650 free(ntid);
5651
5652 if (!err && dset->S != NULL) {
5653 /* expand the obs (unit) marker strings appropriately */
5654 if (S == NULL) {
5655 strings_array_free(dset->S, n_orig);
5656 dset->S = NULL;
5657 dset->markers = NO_MARKERS;
5658 } else {
5659 char si[OBSLEN];
5660 int j;
5661
5662 for (i=0; i<nunits; i++) {
5663 t = i * nperiods;
5664 for (j=0; j<nperiods; j++) {
5665 if (S[t][0] != '\0') {
5666 strcpy(si, S[t]);
5667 break;
5668 }
5669 t++;
5670 }
5671 t = i * nperiods;
5672 for (j=0; j<nperiods; j++) {
5673 strcpy(S[t++], si);
5674 }
5675 }
5676 strings_array_free(dset->S, n_orig);
5677 dset->S = S;
5678 }
5679 }
5680
5681 return err;
5682 }
5683
check_index_values(const double * x,int n)5684 static int check_index_values (const double *x, int n)
5685 {
5686 int i;
5687
5688 for (i=1; i<n; i++) {
5689 if (x[i] < 0) {
5690 return E_DATA;
5691 } else if (na(x[i])) {
5692 return E_MISSDATA;
5693 }
5694 }
5695
5696 return 0;
5697 }
5698
uv_tv_from_varnames(const char * uname,const char * tname,const DATASET * dset,int * uv,int * tv)5699 static int uv_tv_from_varnames (const char *uname, const char *tname,
5700 const DATASET *dset, int *uv, int *tv)
5701 {
5702 *uv = current_series_index(dset, uname);
5703 if (*uv <= 0) {
5704 return E_DATA;
5705 }
5706
5707 *tv = current_series_index(dset, tname);
5708 if (*tv <= 0) {
5709 return E_DATA;
5710 }
5711
5712 return 0;
5713 }
5714
5715 #if PDEBUG
print_unit_var(int uv,double ** Z,int n,int after)5716 static void print_unit_var (int uv, double **Z, int n, int after)
5717 {
5718 int i, imax = 20;
5719
5720 fprintf(stderr, "Z[uv], %s sorting (first %d obs):\n",
5721 (after)? "after" : "before", imax);
5722 for (i=0; i<n && i<imax; i++) {
5723 fprintf(stderr, " Z[%d][%d] = %g\n", uv, i, Z[uv][i]);
5724 }
5725 }
5726 #endif
5727
finalize_panel_datainfo(DATASET * dset,int nperiods)5728 static void finalize_panel_datainfo (DATASET *dset, int nperiods)
5729 {
5730 int pdp = nperiods;
5731 int den = 10.0;
5732
5733 while ((pdp = pdp / 10)) {
5734 den *= 10;
5735 }
5736 dset->structure = STACKED_TIME_SERIES;
5737 dset->pd = nperiods;
5738 dset->sd0 = 1.0 + 1.0 / den;
5739 ntolabel(dset->stobs, 0, dset);
5740 ntolabel(dset->endobs, dset->n - 1, dset);
5741 }
5742
check_full_dataset(void)5743 static int check_full_dataset (void)
5744 {
5745 DATASET *fset = fetch_full_dataset();
5746
5747 if (!dataset_is_panel(fset)) {
5748 const char *msg =
5749 "You cannot use the --panel-vars option with the setobs command when\n"
5750 "\n"
5751 "* the dataset is currently sub-sampled and\n"
5752 "* the full dataset is not a panel.\n"
5753 "\n"
5754 "If you first structure your full dataset as a panel, you can then\n"
5755 "do what you are trying to do.";
5756
5757 gretl_errmsg_set(msg);
5758 return E_DATA;
5759 }
5760
5761 return 0;
5762 }
5763
5764 /**
5765 * set_panel_structure_from_vars:
5766 * @uv: index of series uniquely identifying units/groups.
5767 * @tv: index of series uniquely identifying time periods.
5768 * @dset: pointer to dataset.
5769 *
5770 * Sets the panel structure of @dset based on the information
5771 * in the series with indices @uv and @tv, if possible.
5772 *
5773 * Returns: 0 on success, non-zero code on error.
5774 */
5775
set_panel_structure_from_vars(int uv,int tv,DATASET * dset)5776 int set_panel_structure_from_vars (int uv, int tv, DATASET *dset)
5777 {
5778 int subsampled, presorted;
5779 double *uid = NULL;
5780 double *tid = NULL;
5781 char *mask = NULL;
5782 int n = dset->n;
5783 int totmiss = 0;
5784 int fulln = 0;
5785 int nunits = 0;
5786 int nperiods = 0;
5787 int ustrs = 0;
5788 int err = 0;
5789
5790 subsampled = complex_subsampled();
5791
5792 if (subsampled) {
5793 err = check_full_dataset();
5794 if (err) {
5795 return err;
5796 }
5797 }
5798
5799 #if PDEBUG
5800 fprintf(stderr, "\n*** set_panel_structure_from_vars:\n "
5801 "using var %d ('%s') for unit, var %d ('%s') for time\n",
5802 uv, dset->varname[uv], tv, dset->varname[tv]);
5803 print_unit_var(uv, dset->Z, n, 0);
5804 #endif
5805
5806 /* start by making sorted copies of the unit and time
5807 variables and counting the unique values of each
5808 */
5809
5810 uid = copyvec(dset->Z[uv], n);
5811 tid = copyvec(dset->Z[tv], n);
5812
5813 if (uid == NULL || tid == NULL) {
5814 err = E_ALLOC;
5815 goto bailout;
5816 }
5817
5818 qsort(uid, n, sizeof *uid, gretl_compare_doubles);
5819 nunits = count_distinct_values(uid, dset->n);
5820
5821 qsort(tid, n, sizeof *tid, gretl_compare_doubles);
5822 nperiods = count_distinct_values(tid, dset->n);
5823
5824 /* check that we have a possible panel structure */
5825
5826 if (nunits == 1 || nperiods == 1 ||
5827 nunits == n || nperiods == n ||
5828 n > nunits * nperiods) {
5829 fprintf(stderr, "Dataset does not have a panel structure\n");
5830 fprintf(stderr, " (nunits=%d, nperiods=%d, n=%d)\n", nunits, nperiods, n);
5831 err = E_DATA;
5832 goto bailout;
5833 }
5834
5835 /* figure how many panel-data rows are implied, and how many
5836 "padding" rows are needed to complete the structure
5837 */
5838
5839 fulln = nunits * nperiods;
5840 totmiss = fulln - dset->n;
5841
5842 #if PDEBUG
5843 fprintf(stderr, "Found %d units, %d periods\n", nunits, nperiods);
5844 fprintf(stderr, "Units: min %g, max %g\n", uid[0], uid[n - 1]);
5845 fprintf(stderr, "Periods: min %g, max %g\n", tid[0], tid[n - 1]);
5846 fprintf(stderr, "Required rows = %d * %d = %d\n", nunits, nperiods, fulln);
5847 fprintf(stderr, "Missing rows = %d - %d = %d\n", fulln, dset->n, totmiss);
5848 #endif
5849
5850 /* determine if the data rows are already in sort order by unit,
5851 and by period for each unit */
5852
5853 presorted = panel_is_sorted(dset, uv, tv);
5854
5855 #if PDEBUG
5856 fprintf(stderr, "panel_is_sorted? %s\n", presorted ? "yes" : "no");
5857 #endif
5858
5859 if (!presorted) {
5860 if (subsampled) {
5861 /* We can't re-order the rows in a subsampled dataset, or the
5862 data will get scrambled when restoration of the full dataset
5863 is attempted.
5864 */
5865 gretl_errmsg_set(_("Sorry, can't do this with a sub-sampled dataset"));
5866 err = E_DATA;
5867 } else {
5868 /* sort full dataset by unit and period */
5869 err = panel_data_sort_by(dset, uv, tv, &ustrs);
5870 }
5871 #if PDEBUG
5872 if (!err) print_unit_var(uv, dset->Z, n, 1);
5873 #endif
5874 }
5875
5876 if (!err && totmiss > 0) {
5877 /* establish a nominally balanced panel */
5878 mask = malloc(fulln);
5879 if (mask == NULL) {
5880 err = E_ALLOC;
5881 } else {
5882 rearrange_id_array(uid, nunits, n);
5883 rearrange_id_array(tid, nperiods, n);
5884 err = pad_panel_dataset(uid, uv, nunits,
5885 tid, tv, nperiods,
5886 dset, ustrs,
5887 mask);
5888 }
5889 }
5890
5891 if (!err) {
5892 finalize_panel_datainfo(dset, nperiods);
5893 }
5894
5895 bailout:
5896
5897 free(uid);
5898 free(tid);
5899
5900 if (!err && complex_subsampled()) {
5901 dset->padmask = mask;
5902 } else {
5903 free(mask);
5904 }
5905
5906 return err;
5907 }
5908
5909 int
set_panel_structure_from_varnames(const char * uname,const char * tname,DATASET * dset)5910 set_panel_structure_from_varnames (const char *uname, const char *tname,
5911 DATASET *dset)
5912 {
5913 int n = dset->n;
5914 int uv = 0, tv = 0;
5915 int err = 0;
5916
5917 err = uv_tv_from_varnames(uname, tname, dset, &uv, &tv);
5918
5919 if (!err) {
5920 err = check_index_values(dset->Z[uv], n);
5921 }
5922
5923 if (!err) {
5924 err = check_index_values(dset->Z[tv], n);
5925 }
5926
5927 if (!err) {
5928 err = set_panel_structure_from_vars(uv, tv, dset);
5929 }
5930
5931 return err;
5932 }
5933
group_uniqueness_check(char ** S,int n)5934 static int group_uniqueness_check (char **S, int n)
5935 {
5936 int i, j;
5937
5938 for (i=0; i<n-1; i++) {
5939 for (j=i+1; j<n; j++) {
5940 if (!strcmp(S[i], S[j])) {
5941 gretl_errmsg_sprintf("The string '%s' is given for "
5942 "two or more groups", S[i]);
5943 return E_DATA;
5944 }
5945 }
5946 }
5947
5948 return 0;
5949 }
5950
group_names_from_array(const char * aname,int n_groups,int * err)5951 static char **group_names_from_array (const char *aname,
5952 int n_groups,
5953 int *err)
5954 {
5955 gretl_array *A = get_array_by_name(aname);
5956 char **S = NULL;
5957 int ns = 0;
5958
5959 if (A == NULL) {
5960 *err = E_DATA;
5961 } else {
5962 char **AS = gretl_array_get_strings(A, &ns);
5963
5964 if (ns != n_groups) {
5965 *err = E_DATA;
5966 } else {
5967 S = strings_array_dup(AS, ns);
5968 if (S == NULL) {
5969 *err = E_ALLOC;
5970 }
5971 }
5972 }
5973
5974 return S;
5975 }
5976
5977 /* usable_groups_series: to be usable as the basis for panel
5978 groups strings, a series must have values that are (a)
5979 constant within-group and (b) unique across groups. Note
5980 that we only get to this test if the series in question
5981 has already been found to be string-valued.
5982 */
5983
usable_groups_series(DATASET * dset,int v,const char * vname)5984 static int usable_groups_series (DATASET *dset, int v,
5985 const char *vname)
5986 {
5987 const double *x = dset->Z[v];
5988 int i, j, g = 0;
5989 int ok = 1;
5990
5991 if (na(x[0])) {
5992 return 0;
5993 }
5994
5995 for (i=1; i<dset->n && ok; i++) {
5996 if (na(x[i])) {
5997 ok = 0;
5998 } else if (i % dset->pd == 0) {
5999 /* starting a new group: x-value must not repeat a
6000 prior group's value */
6001 for (j=0; j<=g; j++) {
6002 if (x[i] == x[j*dset->pd]) {
6003 gretl_errmsg_sprintf("%s: values are not unique per group",
6004 vname);
6005 ok = 0;
6006 }
6007 }
6008 g++;
6009 } else if (x[i] != x[i-1]) {
6010 gretl_errmsg_sprintf("%s: is not constant within group", vname);
6011 ok = 0;
6012 }
6013 }
6014
6015 return ok;
6016 }
6017
maybe_use_strval_series(DATASET * dset,const char * vname,int ng)6018 static int maybe_use_strval_series (DATASET *dset,
6019 const char *vname,
6020 int ng)
6021 {
6022 int v = current_series_index(dset, vname);
6023 series_table *st = NULL;
6024 int ns = 0;
6025 int err = 0;
6026
6027 if (v <= 0) {
6028 return E_INVARG;
6029 }
6030
6031 st = series_get_string_table(dset, v);
6032 if (st == NULL) {
6033 gretl_errmsg_sprintf("The series %s is not string-valued", vname);
6034 return E_INVARG;
6035 }
6036
6037 series_table_get_strings(st, &ns);
6038
6039 #if 0
6040 fprintf(stderr, "maybe_use_strval_series (%s, ID %d, ns=%d)\n", vname, v, ns);
6041 #endif
6042
6043 if (ns < ng) {
6044 gretl_errmsg_sprintf("The series %s holds %d strings but %d "
6045 "are needed", vname, ns, ng);
6046 err = E_INVARG;
6047 } else {
6048 /* note: don't mess with numerical values, just check them */
6049 if (usable_groups_series(dset, v, vname)) {
6050 set_panel_groups_name(dset, vname);
6051 } else {
6052 err = E_INVARG;
6053 }
6054 }
6055
6056 return err;
6057 }
6058
6059 /* Enable construction of a string-valued series holding a name for
6060 each panel group/unit. The name of this series is set on the
6061 'pangrps' member of @dset, and the names are then used in panel
6062 graphs where appropriate.
6063
6064 @vname should contain the name of a series, either an existing one
6065 (which may be overwritten) or a new one to create; and @grpnames
6066 should be (1) a string literal, or (2) the name of a string variable
6067 (which in either case should hold N space-separated strings, where N
6068 is the number of panel groups), or (3) the name of an array variable
6069 holding N strings.
6070 */
6071
set_panel_group_strings(const char * vname,const char * grpnames,DATASET * dset)6072 int set_panel_group_strings (const char *vname,
6073 const char *grpnames,
6074 DATASET *dset)
6075 {
6076 const char *namestr = NULL;
6077 char **S = NULL;
6078 int ng = dset->n / dset->pd;
6079 int v, orig_v = dset->v;
6080 int err = 0;
6081
6082 if (vname == NULL || *vname == '\0') {
6083 return E_DATA;
6084 }
6085
6086 if (grpnames == NULL || *grpnames == '\0') {
6087 /* We just got the name of a series? That may be
6088 OK if it's string-valued and has a suitable
6089 set of values.
6090 */
6091 return maybe_use_strval_series(dset, vname, ng);
6092 }
6093
6094 if (strchr(grpnames, ' ')) {
6095 /* group names as string literal */
6096 namestr = grpnames;
6097 } else if (gretl_is_string(grpnames)) {
6098 /* group names in a single string variable */
6099 namestr = get_string_by_name(grpnames);
6100 } else {
6101 /* try for array of strings */
6102 S = group_names_from_array(grpnames, ng, &err);
6103 }
6104
6105 if (namestr != NULL) {
6106 /* we must obtain @S by splitting */
6107 int ns = 0;
6108
6109 if (strchr(namestr, '"') != NULL) {
6110 S = gretl_string_split_quoted(namestr, &ns, NULL, &err);
6111 } else {
6112 S = gretl_string_split(namestr, &ns, " \n\t");
6113 }
6114 if (!err && S == NULL) {
6115 err = E_ALLOC;
6116 }
6117 if (!err && ns != ng) {
6118 /* FIXME subsampled case? */
6119 fprintf(stderr, "Got %d strings but there are %d groups\n",
6120 ns, ng);
6121 strings_array_free(S, ns);
6122 S = NULL;
6123 err = E_DATA;
6124 }
6125 }
6126
6127 if (!err && S != NULL) {
6128 err = group_uniqueness_check(S, ng);
6129 }
6130
6131 if (!err) {
6132 v = current_series_index(dset, vname);
6133 if (v < 0) {
6134 /* we need to add a series */
6135 char *gen = gretl_strdup_printf("%s", vname);
6136
6137 err = generate(gen, dset, GRETL_TYPE_SERIES, OPT_Q, NULL);
6138 if (!err) {
6139 v = dset->v - 1;
6140 }
6141 free(gen);
6142 }
6143 }
6144
6145 if (!err) {
6146 series_table *st = series_table_new(S, ng, &err);
6147
6148 if (!err) {
6149 int i, g = 0;
6150
6151 series_attach_string_table(dset, v, st);
6152 for (i=0; i<dset->n; i++) {
6153 if (i % dset->pd == 0) {
6154 g++;
6155 }
6156 dset->Z[v][i] = g;
6157 }
6158 }
6159 }
6160
6161 if (err) {
6162 if (S != NULL) {
6163 strings_array_free(S, ng);
6164 }
6165 if (dset->v > orig_v) {
6166 dataset_drop_last_variables(dset, dset->v - orig_v);
6167 }
6168 } else {
6169 set_panel_groups_name(dset, vname);
6170 }
6171
6172 return err;
6173 }
6174
6175 /* utility functions */
6176
find_time_var(const DATASET * dset)6177 static int find_time_var (const DATASET *dset)
6178 {
6179 const char *tnames[] = {
6180 "year",
6181 "Year",
6182 "period",
6183 "Period",
6184 NULL
6185 };
6186 int i, v;
6187
6188 for (i=0; tnames[i] != NULL; i++) {
6189 v = series_index(dset, tnames[i]);
6190 if (v < dset->v) {
6191 return v;
6192 }
6193 }
6194
6195 return 0;
6196 }
6197
guess_panel_structure(double ** Z,DATASET * dset)6198 int guess_panel_structure (double **Z, DATASET *dset)
6199 {
6200 int ret, v = find_time_var(dset);
6201
6202 if (v == 0) {
6203 ret = 0; /* can't guess */
6204 } else if (floateq(Z[v][0], Z[v][1])) {
6205 /* "year" or whatever is same for first two obs */
6206 dset->structure = STACKED_CROSS_SECTION;
6207 ret = STACKED_CROSS_SECTION;
6208 } else {
6209 dset->structure = STACKED_TIME_SERIES;
6210 ret = STACKED_TIME_SERIES;
6211 }
6212
6213 return ret;
6214 }
6215
balanced_panel(const DATASET * dset)6216 int balanced_panel (const DATASET *dset)
6217 {
6218 int n = dset->t2 - dset->t1 + 1;
6219 int ret = 0;
6220
6221 if (n % dset->pd == 0) {
6222 char unit[OBSLEN], period[OBSLEN];
6223
6224 if (sscanf(dset->endobs, "%[^:]:%s", unit, period) == 2) {
6225 if (atoi(period) == dset->pd) {
6226 ret = 1;
6227 }
6228 }
6229 }
6230
6231 return ret;
6232 }
6233
may_be_time_name(const char * vname)6234 static int may_be_time_name (const char *vname)
6235 {
6236 char test[VNAMELEN];
6237
6238 strcpy(test, vname);
6239 gretl_lower(test);
6240
6241 return strcmp(test, "year") == 0 ||
6242 strcmp(test, "period") == 0;
6243 }
6244
6245 /* See if the panel dataset contains a variable that plausibly represents
6246 the year of the observations. We look for series labeled "year" or
6247 "period" (in a case insensitive comparison) and, if found, check
6248 that the series has the same sequence of values for each individual,
6249 and the same increment between successive time-series observations.
6250 (The increment does not have to be 1, to accommodate, e.g.,
6251 quinquennial or decennial observations.)
6252
6253 Return the ID number of a suitable variable, or 0 if there's nothing
6254 suitable.
6255
6256 This may be used in setting the x-axis tic marks in a panel time-
6257 series plot.
6258 */
6259
plausible_panel_time_var(const DATASET * dset)6260 int plausible_panel_time_var (const DATASET *dset)
6261 {
6262 int i, t, ret = 0;
6263
6264 for (i=1; i<dset->v && ret==0; i++) {
6265 if (may_be_time_name(dset->varname[i])) {
6266 const double *x = dset->Z[i];
6267 int val0 = x[0];
6268 int incr0 = x[1] - x[0];
6269 int ok = 1;
6270
6271 for (t=0; t<dset->n && ok; t++) {
6272 if (na(x[t]) || x[t] < 0) {
6273 ok = 0;
6274 } else if (t > 0 && t % dset->pd == 0) {
6275 if (x[t] != val0) {
6276 ok = 0;
6277 }
6278 } else if (t > 1 && x[t] - x[t-1] != incr0) {
6279 ok = 0;
6280 }
6281 }
6282 if (ok) {
6283 ret = i;
6284 }
6285 }
6286 }
6287
6288 return ret;
6289 }
6290
6291 /* FIXME: this does not yet handle the dropping of instruments */
6292
dpanel_list_omit(const MODEL * orig,const int * drop,int * err)6293 static int *dpanel_list_omit (const MODEL *orig, const int *drop, int *err)
6294 {
6295 const int *old = orig->list;
6296 int *new = gretl_list_copy(old);
6297 int sep = 0;
6298 int i, j;
6299
6300 if (new == NULL) {
6301 *err = E_ALLOC;
6302 return NULL;
6303 }
6304
6305 for (i=2; i<=new[0]; i++) {
6306 if (new[i] == LISTSEP) {
6307 sep++;
6308 }
6309 if (sep == 1) {
6310 for (j=1; j<=drop[0]; j++) {
6311 if (drop[j] == new[i]) {
6312 gretl_list_delete_at_pos(new, i--);
6313 }
6314 }
6315 }
6316 }
6317
6318 #if 0
6319 printlist(old, "old");
6320 printlist(drop, "drop");
6321 printlist(new, "new");
6322 #endif
6323
6324 return new;
6325 }
6326
6327 /**
6328 * panel_list_omit:
6329 * @orig: list specifying original panel model.
6330 * @drop: list of variables to be omitted.
6331 * @err: pointer to receive error code.
6332 *
6333 * Creates a new panel regression list, by first reconstructing
6334 * the regression specification from @orig then deleting from
6335 * the reconstruction the variables found in @drop.
6336 *
6337 * Returns: the new, reduced list or %NULL on error.
6338 */
6339
panel_list_omit(const MODEL * orig,const int * drop,int * err)6340 int *panel_list_omit (const MODEL *orig, const int *drop, int *err)
6341 {
6342 int *newlist = NULL;
6343 int i;
6344
6345 if (orig->ci == DPANEL) {
6346 return dpanel_list_omit(orig, drop, err);
6347 }
6348
6349 /* sorry, can't drop the constant */
6350 if (drop != NULL) {
6351 int cpos = in_gretl_list(drop, 0);
6352
6353 if (cpos >= 2) {
6354 gretl_errmsg_set("Panel models must include an intercept");
6355 *err = E_DATA;
6356 return NULL;
6357 }
6358 }
6359
6360 if (orig->opt & OPT_F) {
6361 int *panlist;
6362
6363 /* fixed-effects lists have the constant removed,
6364 so we need to put it back first
6365 */
6366 panlist = gretl_list_new(orig->list[0] + 1);
6367 if (panlist != NULL) {
6368 panlist[1] = orig->list[1];
6369 panlist[2] = 0;
6370 for (i=3; i<=panlist[0]; i++) {
6371 panlist[i] = orig->list[i-1];
6372 }
6373 if (drop == NULL) {
6374 newlist = gretl_list_omit_last(panlist, err);
6375 } else {
6376 newlist = gretl_list_omit(panlist, drop, 2, err);
6377 }
6378 free(panlist);
6379 }
6380 } else if (drop == NULL) {
6381 newlist = gretl_list_omit_last(orig->list, err);
6382 } else {
6383 newlist = gretl_list_omit(orig->list, drop, 2, err);
6384 }
6385
6386 return newlist;
6387 }
6388
6389 /* FIXME doesn't handle adding instruments */
6390
dpanel_list_add(const MODEL * orig,const int * add,int * err)6391 static int *dpanel_list_add (const MODEL *orig, const int *add, int *err)
6392 {
6393 const int *old = orig->list;
6394 int *new = gretl_list_copy(old);
6395 int sep = 0, pos = old[0] + 1;
6396 int i;
6397
6398 if (new == NULL) {
6399 *err = E_ALLOC;
6400 return NULL;
6401 }
6402
6403 for (i=2; i<=old[0]; i++) {
6404 if (old[i] == LISTSEP) {
6405 sep++;
6406 if (sep == 2) {
6407 pos = i - 1;
6408 }
6409 }
6410 }
6411
6412 gretl_list_insert_list(&new, add, pos);
6413 if (new == NULL) {
6414 *err = E_ALLOC;
6415 return NULL;
6416 }
6417
6418 #if 0
6419 printlist(old, "old");
6420 printlist(add, "add");
6421 printlist(new, "new");
6422 #endif
6423
6424 return new;
6425 }
6426
6427 /**
6428 * panel_list_add:
6429 * @orig: list specifying original panel model.
6430 * @add: list of variables to be added.
6431 * @err: pointer to receive error code.
6432 *
6433 * Creates a new panel regression list, by first reconstructing
6434 * the regression specification from @orig then adding to
6435 * the reconstruction the variables found in @add.
6436 *
6437 * Returns: the new, augmented list or %NULL on error.
6438 */
6439
panel_list_add(const MODEL * orig,const int * add,int * err)6440 int *panel_list_add (const MODEL *orig, const int *add, int *err)
6441 {
6442 if (orig->ci == DPANEL) {
6443 return dpanel_list_add(orig, add, err);
6444 } else {
6445 return gretl_list_add(orig->list, add, err);
6446 }
6447 }
6448
6449 /* calculate the within and between standard deviations for a given
6450 variable in a panel data set */
6451
panel_variance_info(const double * x,const DATASET * dset,double xbar,double * psw,double * psb)6452 int panel_variance_info (const double *x, const DATASET *dset,
6453 double xbar, double *psw, double *psb)
6454 {
6455 double sw = 0.0, sb = 0.0;
6456 double xibar, d;
6457 int effn, effnT;
6458 int n, T, nT, Ti;
6459 int i, t, s;
6460
6461 if (!dataset_is_panel(dset)) {
6462 return E_PDWRONG;
6463 }
6464
6465 nT = dset->t2 - dset->t1 + 1;
6466 T = dset->pd;
6467 n = nT / T;
6468
6469 effn = 0;
6470 effnT = 0;
6471
6472 for (i=0; i<n; i++) {
6473 Ti = 0;
6474 xibar = 0.0;
6475 for (t=0; t<T; t++) {
6476 s = dset->t1 + i * T + t;
6477 if (!na(x[s])) {
6478 Ti++;
6479 xibar += x[s];
6480 }
6481 }
6482 if (Ti > 1) {
6483 xibar /= Ti;
6484 for (t=0; t<T; t++) {
6485 s = dset->t1 + i * T + t;
6486 if (!na(x[s])) {
6487 d = x[s] - xibar;
6488 sw += d * d;
6489 }
6490 }
6491 }
6492 if (Ti > 0) {
6493 /* is this right for singleton observations? */
6494 d = xibar - xbar;
6495 sb += d * d;
6496 effn++;
6497 effnT += Ti;
6498 }
6499 }
6500
6501 if (effn > 1) {
6502 /* between variance: \sum_{i=1}^N (\bar{x}_i - \bar{x})^2
6503 \times (N-1)^{-1}
6504 */
6505 sb /= (effn - 1);
6506 sb = sqrt(sb);
6507 } else {
6508 sb = NADBL;
6509 }
6510
6511 if (effnT - effn > 0) {
6512 /* within variance: \sum_{i=1}^N \sum_{t=1}^T (x_{it} - \bar{x}_i)^2
6513 \times (\sum_{i=1}^N T_i - N)^{-1}
6514 */
6515 sw /= (effnT - effn);
6516 sw = sqrt(sw);
6517 } else {
6518 sw = NADBL;
6519 }
6520
6521 *psw = sw;
6522 *psb = sb;
6523
6524 return 0;
6525 }
6526
series_is_group_invariant(const DATASET * dset,int v)6527 int series_is_group_invariant (const DATASET *dset, int v)
6528 {
6529 const double *x = dset->Z[v];
6530 int T = dset->pd;
6531 int N = dset->n / T;
6532 int i, t, s;
6533
6534 for (i=1; i<N; i++) {
6535 for (t=0; t<T; t++) {
6536 s = t + i * T;
6537 if (x[s] != x[t]) {
6538 return 0;
6539 }
6540 }
6541 }
6542
6543 return 1;
6544 }
6545
panel_padding_rows(const DATASET * dset)6546 int panel_padding_rows (const DATASET *dset)
6547 {
6548 int missrow, nmiss = 0;
6549 int i, t;
6550
6551 for (t=dset->t1; t<=dset->t2; t++) {
6552 missrow = 1;
6553 for (i=1; i<dset->v; i++) {
6554 if (!na(dset->Z[i][t])) {
6555 missrow = 0;
6556 break;
6557 }
6558 }
6559 if (missrow) {
6560 nmiss++;
6561 }
6562 }
6563
6564 return nmiss;
6565 }
6566