1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 /* various functions called by 'genr' */
21 
22 #include "genparse.h"
23 #include "libset.h"
24 #include "monte_carlo.h"
25 #include "gretl_panel.h"
26 #include "gretl_cmatrix.h"
27 #include "gretl_typemap.h"
28 #include "gretl_midas.h"
29 #include "estim_private.h"
30 #include "matrix_extra.h"
31 #include "gretl_btree.h"
32 #include "kalman.h"
33 #include "../../cephes/cephes.h"
34 
35 #include <errno.h>
36 
sort_series(const double * x,double * y,int f,const DATASET * dset)37 int sort_series (const double *x, double *y, int f,
38 		 const DATASET *dset)
39 {
40     double *z = NULL;
41     int n = sample_size(dset);
42     int i, t;
43 
44     z = malloc(n * sizeof *z);
45     if (z == NULL) {
46 	return E_ALLOC;
47     }
48 
49     i = 0;
50     for (t=dset->t1; t<=dset->t2; t++) {
51 	if (!na(x[t])) {
52 	    z[i++] = x[t];
53 	}
54     }
55 
56     if (f == F_DSORT) {
57 	qsort(z, i, sizeof *z, gretl_inverse_compare_doubles);
58     } else {
59 	qsort(z, i, sizeof *z, gretl_compare_doubles);
60     }
61 
62     i = 0;
63     for (t=dset->t1; t<=dset->t2; t++) {
64 	if (na(x[t])) {
65 	    y[t] = NADBL;
66 	} else {
67 	    y[t] = z[i++];
68 	}
69     }
70 
71     free(z);
72 
73     return 0;
74 }
75 
76 struct pair_sorter {
77     double x;
78     double y;
79 };
80 
81 /* sort the series y by the values of x, putting the result
82    into z */
83 
gretl_sort_by(const double * x,const double * y,double * z,const DATASET * dset)84 int gretl_sort_by (const double *x, const double *y,
85 		   double *z, const DATASET *dset)
86 {
87     struct pair_sorter *xy;
88     int n = sample_size(dset);
89     int i, t;
90 
91     for (t=dset->t1; t<=dset->t2; t++) {
92 	if (na(x[t])) {
93 	    return E_MISSDATA;
94 	}
95     }
96 
97     xy = malloc(n * sizeof *xy);
98     if (xy == NULL) {
99 	return E_ALLOC;
100     }
101 
102     i = 0;
103     for (t=dset->t1; t<=dset->t2; t++) {
104 	xy[i].x = x[t];
105 	xy[i].y = y[t];
106 	i++;
107     }
108 
109     qsort(xy, n, sizeof *xy, gretl_compare_doubles);
110 
111     i = 0;
112     for (t=dset->t1; t<=dset->t2; t++) {
113 	z[t] = xy[i++].y;
114     }
115 
116     free(xy);
117 
118     return 0;
119 }
120 
genrank(const double * sz,int m,const double * z,int n,double * rz)121 static void genrank (const double *sz, int m,
122 		     const double *z, int n,
123 		     double *rz)
124 {
125     int cases, k, i, j;
126     double avg, r = 1;
127 
128     for (i=0; i<m; i++) {
129 	/* scan sorted z */
130 	cases = k = 0;
131 
132 	if (i > 0 && sz[i] == sz[i-1]) {
133 	    continue;
134 	}
135 
136 	for (j=0; j<n; j++) {
137 	    /* scan raw z for matches */
138 	    if (!na(z[j])) {
139 		if (z[j] == sz[i]) {
140 		    rz[k] = r;
141 		    cases++;
142 		}
143 		k++;
144 	    }
145 	}
146 
147 	if (cases > 1) {
148 	    avg = (r + r + cases - 1.0) / 2.0;
149 	    for (j=0; j<m; j++) {
150 		if (rz[j] == r) {
151 		    rz[j] = avg;
152 		}
153 	    }
154 	}
155 
156 	r += cases;
157     }
158 }
159 
160 /* implements both rank_series() and rank_vector() */
161 
rank_array(const double * x,double * y,int f,int n)162 static int rank_array (const double *x, double *y, int f, int n)
163 {
164     double *sx = NULL;
165     double *rx = NULL;
166     int m = n;
167     int i, t;
168 
169     for (t=0; t<n; t++) {
170 	if (na(x[t])) m--;
171     }
172 
173     sx = malloc(m * sizeof *sx);
174     rx = malloc(m * sizeof *rx);
175 
176     if (sx == NULL || rx == NULL) {
177 	free(sx);
178 	free(rx);
179 	return E_ALLOC;
180     }
181 
182     i = 0;
183     for (t=0; t<n; t++) {
184 	if (!na(x[t])) {
185 	    sx[i] = x[t];
186 	    rx[i] = 0.0;
187 	    i++;
188 	}
189     }
190 
191     if (f == F_DSORT) {
192 	qsort(sx, m, sizeof *sx, gretl_inverse_compare_doubles);
193     } else {
194 	qsort(sx, m, sizeof *sx, gretl_compare_doubles);
195     }
196 
197     genrank(sx, m, x, n, rx);
198 
199     i = 0;
200     for (t=0; t<n; t++) {
201 	if (na(x[t])) {
202 	    y[t] = NADBL;
203 	} else {
204 	    y[t] = rx[i++];
205 	}
206     }
207 
208     free(sx);
209     free(rx);
210 
211     return 0;
212 }
213 
rank_series(const double * x,double * y,int f,const DATASET * dset)214 int rank_series (const double *x, double *y, int f,
215 		 const DATASET *dset)
216 {
217     return rank_array(x + dset->t1, y + dset->t1,
218 		      f, sample_size(dset));
219 }
220 
rank_vector(const gretl_matrix * x,int f,int * err)221 gretl_matrix *rank_vector (const gretl_matrix *x, int f, int *err)
222 {
223     gretl_matrix *y = NULL;
224     int n = gretl_vector_get_length(x);
225 
226     if (n == 0) {
227 	*err = E_DATA;
228     } else {
229 	y = gretl_matrix_alloc(x->rows, x->cols);
230 	if (y == NULL) {
231 	    *err = E_ALLOC;
232 	}
233     }
234 
235     if (!*err) {
236 	*err = rank_array(x->val, y->val, f, n);
237 	if (*err) {
238 	    gretl_matrix_free(y);
239 	    y = NULL;
240 	}
241     }
242 
243     return y;
244 }
245 
246 /**
247  * diff_series:
248  * @x: array of original data.
249  * @y: array into which to write the result.
250  * @f: function, F_DIFF, F_SDIFF or F_LDIFF.
251  * @dset: data set information.
252  *
253  * Calculates the differenced counterpart to the input
254  * series @x.  If @f = F_SDIFF, the seasonal difference is
255  * computed; if @f = F_LDIFF, the log difference, and if
256  * @f = F_DIFF, the ordinary first difference.
257  *
258  * Returns: 0 on success, non-zero error code on failure.
259  */
260 
diff_series(const double * x,double * y,int f,const DATASET * dset)261 int diff_series (const double *x, double *y, int f,
262 		 const DATASET *dset)
263 {
264     int k = (f == F_SDIFF)? dset->pd : 1;
265     int t, t1 = dset->t1;
266 
267     if (t1 < k) {
268 	t1 = k;
269     }
270 
271     for (t=t1; t<=dset->t2; t++) {
272 	if (dataset_is_panel(dset) && t % dset->pd == 0) {
273 	    /* skip first observation in panel unit */
274 	    continue;
275 	}
276 	if (na(x[t]) || na(x[t-k])) {
277 	    continue;
278 	}
279 	if (f == F_DIFF || f == F_SDIFF) {
280 	    y[t] = x[t] - x[t-k];
281 	} else if (f == F_LDIFF) {
282 	    if (x[t] > 0.0 && x[t-k] > 0.0) {
283 		y[t] = log(x[t]) - log(x[t-k]);
284 	    }
285 	}
286     }
287 
288     return 0;
289 }
290 
291 /**
292  * standardize_series:
293  * @x: array of original data.
294  * @y: array into which to write the result.
295  * @dfc: degrees of freedom correction.
296  * @dset: data set information.
297  *
298  * By default calculates the standardized counterpart to the input
299  * series @x, but if @dfc < 0 the result is just centered.
300  *
301  * Returns: 0 on success, non-zero error code on failure.
302  */
303 
standardize_series(const double * x,double * y,int dfc,const DATASET * dset)304 int standardize_series (const double *x, double *y, int dfc,
305 			const DATASET *dset)
306 {
307     double d, xbar = 0;
308     int t, n = 0;
309 
310     for (t=dset->t1; t<=dset->t2; t++) {
311 	if (!na(x[t])) {
312 	    xbar += x[t];
313 	    n++;
314 	}
315     }
316 
317     if (dfc >= 0 && n < dfc + 1) {
318 	return E_TOOFEW;
319     }
320 
321     xbar /= n;
322 
323     if (dfc < 0) {
324 	/* just centering */
325 	for (t=dset->t1; t<=dset->t2; t++) {
326 	    if (na(x[t])) {
327 		y[t] = NADBL;
328 	    } else {
329 		y[t] = x[t] - xbar;
330 	    }
331 	}
332     } else {
333 	/* dividing by s.d. */
334 	double sd, TSS = 0;
335 
336 	for (t=dset->t1; t<=dset->t2; t++) {
337 	    if (!na(x[t])) {
338 		d = x[t] - xbar;
339 		TSS += d * d;
340 	    }
341 	}
342 
343 	sd = sqrt(TSS / (n - dfc));
344 
345 	for (t=dset->t1; t<=dset->t2; t++) {
346 	    if (na(x[t])) {
347 		y[t] = NADBL;
348 	    } else {
349 		y[t] = (x[t] - xbar) / sd;
350 	    }
351 	}
352     }
353 
354     return 0;
355 }
356 
357 /**
358  * orthdev_series:
359  * @x: array of original data.
360  * @y: array into which to write the result.
361  * @dset: data set information.
362  *
363  * Calculates in @y the forward orthogonal deviations of the input
364  * series @x.  That is, y[t] is the scaled difference between x[t]
365  * and the mean of the subsequent observations on x.
366  *
367  * Returns: 0 on success, non-zero error code on failure.
368  */
369 
orthdev_series(const double * x,double * y,const DATASET * dset)370 int orthdev_series (const double *x, double *y, const DATASET *dset)
371 {
372     double xbar;
373     int n, s, t, Tt;
374 
375     for (t=dset->t1; t<dset->t2; t++) {
376 
377 	if (na(x[t])) {
378 	    continue;
379 	}
380 
381 	if (dataset_is_panel(dset)) {
382 	    Tt = dset->pd - (t % dset->pd) - 1;
383 	} else {
384 	    Tt = dset->t2 - t;
385 	}
386 
387 	xbar = 0.0;
388 	n = 0;
389 	for (s=1; s<=Tt; s++) {
390 	    if (!na(x[t+s])) {
391 		xbar += x[t+s];
392 		n++;
393 	    }
394 	}
395 
396 	if (n > 0) {
397 	    xbar /= n;
398 	    /* Lead one period, for compatibility with first diffs.
399 	       I.e. we lose the first observation rather than the
400 	       last.  This is for arbond.  Cf. Doornik, Bond and
401 	       Arellano, DPD documentation.
402 	    */
403 	    y[t+1] = sqrt(n / (n + 1.0)) * (x[t] - xbar);
404 	}
405     }
406 
407     return 0;
408 }
409 
410 /**
411  * fracdiff_series:
412  * @x: array of original data.
413  * @y: array into which to write the result.
414  * @d: fraction by which to difference.
415  * @diff: boolean variable 1 for fracdiff, 0 for fraclag
416  * @obs: used for autoreg calculation, -1 if whole series
417  *	should be calculated otherwise just the observation for
418  *	obs is calculated
419  * @dset: data set information.
420  *
421  * Calculates the fractionally differenced or lagged
422  * counterpart to the input series @x. The fractional
423  * difference operator is defined as (1-L)^d, while the
424  * fractional lag operator 1-(1-L)^d.
425  *
426  * Returns: 0 on success, non-zero error code on failure.
427  */
428 
fracdiff_series(const double * x,double * y,double d,int diff,int obs,const DATASET * dset)429 int fracdiff_series (const double *x, double *y, double d,
430 		     int diff, int obs, const DATASET *dset)
431 {
432     int dd, t, T;
433     const double TOL = 1.0E-12;
434     int t1 = dset->t1;
435     int t2 = (obs >= 0)? obs : dset->t2;
436     int tmiss = 0;
437     double phi = (diff)? -d : d;
438 
439 #if 0
440     fprintf(stderr, "Doing fracdiff_series, with d = %g\n", d);
441 #endif
442 
443     if (series_adjust_sample(x, &t1, &t2) != 0) {
444 	tmiss = first_missing_index(x, t1, t2);
445 	if (tmiss > 0 && tmiss < t2) {
446 	    t2 = tmiss;
447 	}
448     }
449 
450     if (obs >= 0) {
451 	/* doing a specific observation */
452 	T = obs - t1 + 1;
453 	for (t=0; t<dset->n; t++) {
454 	    y[t] = NADBL;
455 	}
456 	if (obs != t1) {
457 	    y[obs] = (diff)? x[obs] : 0;
458 	    for (dd=1; dd<T && fabs(phi)>TOL; dd++) {
459 		y[obs] += phi * x[obs - dd];
460 		phi *= (dd - d)/(dd + 1);
461 	    }
462 	} else if (diff) {
463 	    y[obs] = x[obs];
464 	}
465     } else {
466 	/* doing the whole series */
467 	T = t2 - t1 + 1;
468 	for (t=0; t<=t2; t++) {
469 	    if (t >= t1 && t <= t2) {
470 		y[t] = (diff)? x[t] : 0;
471 	    } else {
472 		y[t] = NADBL;
473 	    }
474 	}
475 	for (dd=1; dd<=T && fabs(phi)>TOL; dd++) {
476 	    for (t=t1+dd; t<=t2; t++) {
477 		y[t] += phi * x[t - dd];
478 	    }
479 	    phi *= (dd - d)/(dd + 1);
480 	}
481     }
482 
483     return 0;
484 }
485 
boxcox_vector(const double * x,double * y,double d,int t1,int t2)486 static int boxcox_vector (const double *x, double *y, double d,
487 			  int t1, int t2)
488 {
489     int t, special_case = 999;
490     double tol = 1.0e-12;
491 
492     /* 999 for general case; here we want to avoid
493        the pow() function whenever possible.
494     */
495 
496     if (fabs(d) < tol) {
497 	special_case = 0;
498     } else if (fabs(1.0 - d) < tol) {
499 	special_case = 1;
500     } else if (fabs(-1.0 - d) < tol) {
501 	special_case = -1;
502     } else if (fabs(0.5 - d) < tol) {
503 	special_case = 2;
504     } else if (fabs(-0.5 - d) < tol) {
505 	special_case = -2;
506     }
507 
508     for (t=t1; t<=t2; t++) {
509 	if (na(x[t])) {
510 	    y[t] = NADBL;
511 	} else if (special_case == -2) {
512 	    y[t] = (x[t] > 0)? 2.0 * (1.0 - 1.0/sqrt(x[t])) : NADBL;
513 	} else if (special_case == -1) {
514 	    y[t] = 1.0 - 1.0/x[t];
515 	} else if (special_case == 0) {
516 	    y[t] = (x[t] > 0)? log(x[t]) : NADBL;
517 	} else if (special_case == 1) {
518 	    y[t] = x[t] - 1.0;
519 	} else if (special_case == 2) {
520 	    y[t] = (x[t] > 0)? 2.0 * (sqrt(x[t]) - 1.0) : NADBL;
521 	} else {
522 	    y[t] = (pow(x[t], d) - 1) / d;
523 	}
524     }
525 
526     return 0;
527 }
528 
529 /**
530  * boxcox_series:
531  * @x: array of original data.
532  * @y: array into which to write the result.
533  * @d: lambda parameter.
534  * @dset: data set information.
535  *
536  * Calculates in @y the Box-Cox transformation for the
537  * input series @x.
538  *
539  * Returns: 0 on success, non-zero error code on failure.
540  */
541 
boxcox_series(const double * x,double * y,double d,const DATASET * dset)542 int boxcox_series (const double *x, double *y, double d,
543 		   const DATASET *dset)
544 {
545     return boxcox_vector(x, y, d, dset->t1, dset->t2);
546 }
547 
548 /**
549  * boxcox_matrix:
550  * @m: matrix of original data.
551  * @d: lambda parameter.
552  *
553  * Returns: a matrix whose columns are Box-Cox transformations
554  * of the columns of @m.
555  *
556  * Returns: 0 on success, non-zero error code on failure.
557  */
558 
boxcox_matrix(const gretl_matrix * m,double d,int * err)559 gretl_matrix *boxcox_matrix (const gretl_matrix *m, double d,
560 			     int *err)
561 {
562     gretl_matrix *bc;
563 
564     bc = gretl_matrix_alloc(m->rows, m->cols);
565 
566     if (bc == NULL) {
567 	*err = E_ALLOC;
568     } else {
569 	const double *x = m->val;
570 	double *y = bc->val;
571 	int j, n = m->rows;
572 
573 	for (j=0; j<m->cols; j++) {
574 	    boxcox_vector(x, y, d, 0, n-1);
575 	    x += n;
576 	    y += n;
577 	}
578     }
579 
580     return bc;
581 }
582 
cum_series(const double * x,double * y,const DATASET * dset)583 int cum_series (const double *x, double *y, const DATASET *dset)
584 {
585     int t, s = dset->t1;
586 
587     /* corner case: sample size of 1! */
588     if (dset->t1 == dset->t2) {
589 	y[dset->t1] = x[dset->t1];
590 	return 0;
591     }
592 
593     for (t=dset->t1; t<=dset->t2 && na(x[t]); t++) {
594 	s++;
595     }
596 
597     if (s == dset->t2) {
598 	/* no usable data */
599 	return 0;
600     }
601 
602     y[s] = x[s];
603 
604     if (dataset_is_panel(dset)) {
605 	int k;
606 
607 	for (t=s+1; t<=dset->t2; t++) {
608 	    if (t % dset->pd == 0) {
609 		/* first obs for panel unit */
610 		for (k=0; k<dset->pd; k++) {
611 		    if (!na(x[t+k])) {
612 			t = t + k;
613 			y[t] = x[t];
614 			break;
615 		    }
616 		}
617 	    } else if (!na(y[t-1]) && !na(x[t])) {
618 		y[t] = y[t-1] + x[t];
619 	    }
620 	}
621     } else {
622 	for (t=s+1; t<=dset->t2 && !na(x[t]); t++) {
623 	    y[t] = y[t-1] + x[t];
624 	}
625     }
626 
627     return 0;
628 }
629 
panel_resample_series(const double * x,double * y,const DATASET * dset)630 static int panel_resample_series (const double *x, double *y,
631 				  const DATASET *dset)
632 {
633     int n = panel_sample_size(dset);
634     int T = dset->pd;
635     int i, j, t, s;
636     int u1, u2;
637     int *z;
638 
639     if (n <= 1) {
640 	return E_DATA;
641     }
642 
643     z = malloc(n * sizeof *z);
644     if (z == NULL) {
645 	return E_ALLOC;
646     }
647 
648     u1 = dset->t1 / T;
649     u2 = dset->t2 / T;
650 
651     /* select n units from [u1 .. u2] */
652     gretl_rand_int_minmax(z, n, u1, u2);
653 
654     s = dset->t1;
655     for (i=0; i<n; i++) {
656 	j = z[i] * T;
657 	for (t=0; t<T; t++) {
658 	    y[s++] = x[j + t];
659 	}
660     }
661 
662     free(z);
663 
664     return 0;
665 }
666 
resample_series(const double * x,double * y,const DATASET * dset)667 int resample_series (const double *x, double *y, const DATASET *dset)
668 {
669     int *z = NULL;
670     int t1, t2;
671     int i, t, n;
672 
673     if (dataset_is_panel(dset)) {
674 	return panel_resample_series(x, y, dset);
675     }
676 
677     t1 = dset->t1;
678     t2 = dset->t2;
679     series_adjust_sample(x, &t1, &t2);
680 
681     n = t2 - t1 + 1;
682     if (n <= 1) {
683 	return E_DATA;
684     }
685 
686     z = malloc(n * sizeof *z);
687     if (z == NULL) {
688 	return E_ALLOC;
689     }
690 
691     /* generate n uniform drawings from [t1 .. t2] */
692     gretl_rand_int_minmax(z, n, t1, t2);
693 
694     /* sample from source series x based on indices z */
695     for (t=t1, i=0; t<=t2; t++, i++) {
696 	y[t] = x[z[i]];
697     }
698 
699     free(z);
700 
701     return 0;
702 }
703 
block_resample_series(const double * x,double * y,int blocklen,const DATASET * dset)704 int block_resample_series (const double *x, double *y, int blocklen,
705 			   const DATASET *dset)
706 {
707     int t1 = dset->t1;
708     int t2 = dset->t2;
709     int *z = NULL;
710     int m, rem, bt2, x0;
711     int i, s, t, n;
712 
713     if (dataset_is_panel(dset)) {
714 	return E_PDWRONG;
715     }
716 
717     if (blocklen <= 0) {
718 	return E_DATA;
719     }
720 
721     if (blocklen == 1) {
722 	return resample_series(x, y, dset);
723     }
724 
725     series_adjust_sample(x, &t1, &t2);
726 
727     n = t2 - t1 + 1;
728 
729     m = n / blocklen;
730     rem = n % blocklen;
731 
732     /* Let n now represent the number of blocks of @blocklen
733        contiguous observations which we need to select; the
734        last of these may not be fully used.
735     */
736     n = m + (rem > 0);
737 
738     /* the last selectable starting point for a block */
739     bt2 = t2 - blocklen + 1;
740 
741     if (bt2 < t1) {
742 	return E_DATA;
743     }
744 
745     z = malloc(n * sizeof *z);
746     if (z == NULL) {
747 	return E_ALLOC;
748     }
749 
750     /* Generate uniform random series: we want n drawings from the
751        range [t1 .. t2 - blocklen + 1], each of which will be
752        interpreted as the starting point of a block to be used.
753     */
754     gretl_rand_int_minmax(z, n, t1, bt2);
755 
756     /* Sample from the source series using blocks given by the random
757        indices: note that the last block will be incomplete if rem > 0
758     */
759     t = t1;
760     for (i=0; i<n; i++) {
761 	x0 = z[i];
762 	for (s=0; s<blocklen; s++) {
763 	    if (t <= t2) {
764 		y[t++] = x[x0+s];
765 	    } else {
766 		break;
767 	    }
768 	}
769     }
770 
771     free(z);
772 
773     return 0;
774 }
775 
776 /* retrieve a pre-sample @x value if it's available via the n-vector
777    @x0, an optional argument to the filter() function.
778 */
779 
get_xlag(int lag,int t1,gretl_vector * x0,int n)780 static double get_xlag (int lag, int t1, gretl_vector *x0, int n)
781 {
782     int p = t1 - lag;
783     int i = n - p;
784 
785     if (i >= 0 && i < n) {
786 	return x0->val[i];
787     } else {
788 	return 0;
789     }
790 }
791 
792 /* implements filter_series() and filter_matrix() */
793 
filter_vector(const double * x,double * y,int t1,int t2,gretl_vector * A,gretl_vector * C,double y0,gretl_vector * x0)794 static int filter_vector (const double *x, double *y, int t1, int t2,
795 			  gretl_vector *A, gretl_vector *C, double y0,
796 			  gretl_vector *x0)
797 {
798     int t, s, i, n;
799     int amax, cmax;
800     double xlag, ylag;
801     double coef, *e;
802     int x0len = 0;
803     int err = 0;
804 
805     if (gretl_is_null_matrix(C)) {
806 	cmax = 0;
807     } else {
808 	cmax = gretl_vector_get_length(C);
809 	if (cmax == 0) {
810 	    /* if present, C must be a vector */
811 	    return E_NONCONF;
812 	}
813     }
814 
815     if (gretl_is_null_matrix(A)) {
816 	amax = 0;
817     } else {
818 	amax = gretl_vector_get_length(A);
819 	if (amax == 0) {
820 	    /* if present, A must be a vector */
821 	    return E_NONCONF;
822 	}
823     }
824 
825     n = t2 - t1 + 1;
826     e = malloc(n * sizeof *e);
827 
828     if (e == NULL) {
829 	return E_ALLOC;
830     }
831 
832     if (x0 != NULL) {
833 	x0len = gretl_vector_get_length(x0);
834     }
835 
836     s = 0;
837     if (cmax) {
838 	for (t=t1; t<=t2; t++) {
839 	    e[s] = 0;
840 	    for (i=0; i<cmax; i++) {
841 		if (t-i >= t1) {
842 		    xlag = x[t-i];
843 		} else if (x0 == NULL) {
844 		    xlag = 0;
845 		} else {
846 		    xlag = get_xlag(t-i, t1, x0, x0len);
847 		}
848 		if (na(xlag)) {
849 		    e[s] = NADBL;
850 		    break;
851 		} else {
852 		    coef = gretl_vector_get(C, i);
853 		    e[s] += xlag * coef;
854 		}
855 	    }
856 	    s++;
857 	}
858     } else {
859 	/* implicitly MA(0) = 1 */
860 	for (t=t1; t<=t2; t++) {
861 	    e[s++] = x[t];
862 	}
863     }
864 
865     s = 0;
866     if (amax) {
867 	for (t=t1; t<=t2; t++) {
868 	    if (na(e[s])) {
869 		y[t] = NADBL;
870 	    } else {
871 		y[t] = e[s];
872 		for (i=0; i<amax; i++) {
873 		    ylag = (t-i > t1)? y[t-i-1] : y0;
874 		    if (na(ylag)) {
875 			y[t] = NADBL;
876 			break;
877 		    } else {
878 			coef = gretl_vector_get(A, i);
879 			y[t] += coef * ylag;
880 		    }
881 		}
882 	    }
883 	    s++;
884 	}
885     } else {
886 	for (t=t1; t<=t2; t++) {
887 	    y[t] = e[s++];
888 	}
889     }
890 
891     free(e);
892 
893     return err;
894 }
895 
896 /**
897  * filter_series:
898  * @x: array of original data.
899  * @y: array into which to write the result.
900  * @dset: data set information.
901  * @A: vector for autoregressive polynomial.
902  * @C: vector for moving average polynomial.
903  * @y0: initial value of output series.
904  * @x0: prior values of x.
905  *
906  * Filters x according to y_t = C(L)/A(L) x_t.  If the intended
907  * AR order is p, @A should be a vector of length p.  If the
908  * intended MA order is q, @C should be vector of length (q+1),
909  * the first entry giving the coefficient at lag 0.  However, if
910  * @C is NULL this is taken to mean that the lag-0 MA coefficient
911  * is unity (and all others are zero).
912  *
913  * Returns: 0 on success, non-zero error code on failure.
914  */
915 
filter_series(const double * x,double * y,const DATASET * dset,gretl_vector * A,gretl_vector * C,double y0,gretl_vector * x0)916 int filter_series (const double *x, double *y, const DATASET *dset,
917 		   gretl_vector *A, gretl_vector *C,
918 		   double y0, gretl_vector *x0)
919 {
920     int t1 = dset->t1;
921     int t2 = dset->t2;
922     int err;
923 
924     err = series_adjust_sample(x, &t1, &t2);
925 
926     if (!err) {
927 	err = filter_vector(x, y, t1, t2, A, C, y0, x0);
928     }
929 
930     return err;
931 }
932 
933 /**
934  * filter_matrix:
935  * @X: matrix of original data, r x c.
936  * @A: vector for autoregressive polynomial.
937  * @C: vector for moving average polynomial.
938  * @y0: initial value of output series.
939  * @x0: prior values of x.
940  * @err: location to receive error code.
941  *
942  * Filters the columns of x according to y_t = C(L)/A(L) x_t.  If the
943  * intended AR order is p, @A should be a vector of length p.  If the
944  * intended MA order is q, @C should be vector of length (q+1), the
945  * first entry giving the coefficient at lag 0.  However, if @C is
946  * NULL this is taken to mean that the lag-0 MA coefficient is unity
947  * (and all others are zero).
948  *
949  * Returns: r x c matrix of filtered values, or NULL on failure.
950  */
951 
filter_matrix(gretl_matrix * X,gretl_vector * A,gretl_vector * C,double y0,gretl_matrix * x0,int * err)952 gretl_matrix *filter_matrix (gretl_matrix *X, gretl_vector *A,
953 			     gretl_vector *C, double y0,
954 			     gretl_matrix *x0, int *err)
955 {
956     int r = X->rows;
957     int c = X->cols;
958     gretl_matrix *Y = NULL;
959     double *a = NULL, *b = NULL;
960     int i, j;
961 
962     if (gretl_is_complex(X) ||
963 	gretl_is_complex(A) ||
964 	gretl_is_complex(C)) {
965 	*err = E_CMPLX;
966 	return NULL;
967     }
968 
969     Y = gretl_matrix_alloc(r, c);
970     a = malloc(r * sizeof *a);
971     b = malloc(r * sizeof *b);
972 
973     if (Y == NULL || a == NULL || b == NULL) {
974 	*err = E_ALLOC;
975 	return NULL;
976     }
977 
978     for (j=0; j<c; j++) {
979 	for (i=0; i<r; i++) {
980 	    a[i] = gretl_matrix_get(X, i, j);
981 	}
982 	*err = filter_vector(a, b, 0, r-1, A, C, y0, x0);
983 	if (*err) {
984 	    break;
985 	} else {
986 	    for (i=0; i<r; i++) {
987 		gretl_matrix_set(Y, i, j, b[i]);
988 	    }
989 	}
990     }
991 
992     free(a);
993     free(b);
994 
995     return Y;
996 }
997 
series_goodobs(const double * x,int * t1,int * t2)998 static int series_goodobs (const double *x, int *t1, int *t2)
999 {
1000     int t, t1min = *t1, t2max = *t2;
1001     int T = 0;
1002 
1003     for (t=t1min; t<t2max; t++) {
1004 	if (na(x[t])) t1min++;
1005 	else break;
1006     }
1007 
1008     for (t=t2max; t>t1min; t--) {
1009 	if (na(x[t])) t2max--;
1010 	else break;
1011     }
1012 
1013     for (t=t1min; t<=t2max; t++) {
1014 	if (!na(x[t])) {
1015 	    T++;
1016 	}
1017     }
1018 
1019     *t1 = t1min;
1020     *t2 = t2max;
1021 
1022     return T;
1023 }
1024 
1025 /**
1026  * exponential_movavg_series:
1027  * @x: array of original data.
1028  * @y: array into which to write the result.
1029  * @dset: dataset information.
1030  * @d: coefficient on lagged @x.
1031  * @n: number of @x observations to average to give the
1032  * initial @y value.
1033  * @y0: optional initial value for EMA (or #NADBL).
1034  *
1035  * Returns: 0 on success, non-zero error code on failure.
1036  */
1037 
exponential_movavg_series(const double * x,double * y,const DATASET * dset,double d,int n,double y0)1038 int exponential_movavg_series (const double *x, double *y,
1039 			       const DATASET *dset,
1040 			       double d, int n,
1041 			       double y0)
1042 {
1043     int t1 = dset->t1;
1044     int t2 = dset->t2;
1045     int t, T, err = 0;
1046 
1047     if (dataset_is_panel(dset)) {
1048 	return E_PDWRONG;
1049     }
1050 
1051     if (na(y0) && n < 0) {
1052 	return E_INVARG;
1053     }
1054 
1055     T = series_goodobs(x, &t1, &t2);
1056 
1057     if (na(y0) && T < n) {
1058 	return E_TOOFEW;
1059     }
1060 
1061     if (na(y0) && n == 0) {
1062 	/* signal to use full sample mean */
1063 	n = T;
1064     }
1065 
1066     if (!na(y0)) {
1067 	; /* initialize on supplied value */
1068     } else if (n == 1) {
1069 	/* initialize on first observation */
1070 	y0 = x[t1];
1071     } else {
1072 	/* initialize on mean of first n obs */
1073 	y0 = 0.0;
1074 	for (t=t1; t<t1+n; t++) {
1075 	    if (na(x[t])) {
1076 		err = E_MISSDATA;
1077 		break;
1078 	    }
1079 	    y0 += x[t];
1080 	}
1081 	if (!err) {
1082 	    y0 /= n;
1083 	}
1084     }
1085 
1086     if (!err && na(y0)) {
1087 	err = E_MISSDATA;
1088     }
1089 
1090     if (!err) {
1091 	y[t1] = d * x[t1] + (1-d) * y0;
1092 	for (t=t1+1; t<=t2; t++) {
1093 	    if (na(x[t]) || na(y[t-1])) {
1094 		y[t] = NADBL;
1095 	    } else {
1096 		y[t] = d * x[t] + (1-d) * y[t-1];
1097 	    }
1098 	}
1099     }
1100 
1101     return err;
1102 }
1103 
1104 /**
1105  * movavg_series:
1106  * @x: array of original data.
1107  * @y: array into which to write the result.
1108  * @dset: data set information.
1109  * @k: number of terms in MA.
1110  * @center: if non-zero, produce centered MA.
1111  *
1112  * Returns: 0 on success, non-zero error code on failure.
1113  */
1114 
movavg_series(const double * x,double * y,const DATASET * dset,int k,int center)1115 int movavg_series (const double *x, double *y, const DATASET *dset,
1116 		   int k, int center)
1117 {
1118     int t1 = dset->t1;
1119     int t2 = dset->t2;
1120     int k1 = k-1, k2 = 0;
1121     int i, s, t, T;
1122 
1123     T = series_goodobs(x, &t1, &t2);
1124 
1125     T = t2 - t1 + 1;
1126     if (T < k) {
1127 	return E_TOOFEW;
1128     }
1129 
1130     if (center) {
1131 	k1 = k / 2;
1132 	k2 = (k % 2 == 0)? (k1 - 1) : k1;
1133     }
1134 
1135     t1 += k1;
1136     t2 -= k2;
1137 
1138     for (t=t1; t<=t2; t++) {
1139 	double xs, msum = 0.0;
1140 
1141 	for (i=-k1; i<=k2; i++) {
1142 	    s = t + i;
1143 	    if (dset->structure == STACKED_TIME_SERIES) {
1144 		if (s / dset->pd != t / dset->pd) {
1145 		    s = -1;
1146 		}
1147 	    }
1148 
1149 	    if (s >= 0) {
1150 		xs = x[s];
1151 	    } else {
1152 		xs = NADBL;
1153 	    }
1154 
1155 	    if (na(xs)) {
1156 		msum = NADBL;
1157 		break;
1158 	    } else {
1159 		msum += x[s];
1160 	    }
1161 	}
1162 
1163 	if (!na(msum)) {
1164 	    y[t] = (k > 0)? (msum / k) : msum;
1165 	}
1166     }
1167 
1168     if (center && k % 2 == 0) {
1169 	/* centered, with even number of terms */
1170 	for (t=t1; t<t2; t++) {
1171 	    if (na(y[t]) || na(y[t+1])) {
1172 		y[t] = NADBL;
1173 	    } else {
1174 		y[t] = (y[t] + y[t+1]) / 2.0;
1175 	    }
1176 	}
1177 	y[t2] = NADBL;
1178     }
1179 
1180     return 0;
1181 }
1182 
seasonally_adjust_series(const double * x,double * y,DATASET * dset,int tramo,int use_log)1183 int seasonally_adjust_series (const double *x, double *y,
1184 			      DATASET *dset, int tramo,
1185 			      int use_log)
1186 {
1187     int (*adjust_series) (const double *, double *,
1188 			  const DATASET *, int, int);
1189     int t1 = dset->t1;
1190     int t2 = dset->t2;
1191     int T, err = 0;
1192 
1193     if (!quarterly_or_monthly(dset)) {
1194 	gretl_errmsg_set(_("Input must be a monthly or quarterly time series"));
1195 	return E_PDWRONG;
1196     }
1197 
1198     series_adjust_sample(x, &t1, &t2);
1199     T = t2 - t1 + 1;
1200 
1201     if (T < dset->pd * 3) {
1202 	return E_TOOFEW;
1203     } else if (tramo && T > 600) {
1204 	gretl_errmsg_set(_("TRAMO can't handle more than 600 observations.\n"
1205 			   "Please select a smaller sample."));
1206 	return E_EXTERNAL;
1207     } else if (!tramo) {
1208 	int pdmax = get_x12a_maxpd();
1209 
1210 	if (T > 50 * pdmax) {
1211 	    gretl_errmsg_sprintf(_("X-12-ARIMA can't handle more than %d observations.\n"
1212 				   "Please select a smaller sample."), 50 * pdmax);
1213 	    return E_EXTERNAL;
1214 	}
1215     }
1216 
1217     gretl_error_clear();
1218 
1219     adjust_series = get_plugin_function("adjust_series");
1220 
1221     if (adjust_series == NULL) {
1222 	err = E_FOPEN;
1223     } else {
1224 	int save_t1 = dset->t1;
1225 	int save_t2 = dset->t2;
1226 
1227 	dset->t1 = t1;
1228 	dset->t2 = t2;
1229 
1230 	err = (*adjust_series) (x, y, dset, tramo, use_log);
1231 
1232 	dset->t1 = save_t1;
1233 	dset->t2 = save_t2;
1234     }
1235 
1236     return err;
1237 }
1238 
tramo_linearize_series(const double * x,double * y,DATASET * dset)1239 int tramo_linearize_series (const double *x, double *y,
1240 			    DATASET *dset)
1241 {
1242     int (*linearize_series) (const double *, double *,
1243 			     const DATASET *);
1244     int t1 = dset->t1;
1245     int t2 = dset->t2;
1246     int T, err = 0;
1247 
1248     series_adjust_sample(x, &t1, &t2);
1249     T = t2 - t1 + 1;
1250 
1251     if (T < 8) {
1252 	return E_TOOFEW;
1253     } else if (T > 600) {
1254 	gretl_errmsg_set(_("TRAMO can't handle more than 600 observations.\n"
1255 			   "Please select a smaller sample."));
1256 	return E_EXTERNAL;
1257     }
1258 
1259     gretl_error_clear();
1260 
1261     linearize_series = get_plugin_function("linearize_series");
1262 
1263     if (linearize_series == NULL) {
1264 	err = E_FOPEN;
1265     } else {
1266 	int save_t1 = dset->t1;
1267 	int save_t2 = dset->t2;
1268 
1269 	dset->t1 = t1;
1270 	dset->t2 = t2;
1271 
1272 	err = (*linearize_series) (x, y, dset);
1273 
1274 	dset->t1 = save_t1;
1275 	dset->t2 = save_t2;
1276     }
1277 
1278     return err;
1279 }
1280 
1281 #define panel_obs_ok(x,t,m) ((m == NULL || m[t] != 0) && !na(x[t]))
1282 #define panel_obs_masked(m,t) (m != NULL && m[t] == 0)
1283 
1284 #define PXSUM_SKIP_NA 1
1285 
1286 /**
1287  * panel_statistic:
1288  * @x: source data.
1289  * @y: target into which to write.
1290  * @dset: data set information.
1291  * @k: code representing the desired statistic: F_PNOBS,
1292  * F_PMIN, F_PMAX, F_PSUM, F_PMEAN, F_PXSUM, F_PSD, F_PXNOBS
1293  * or F_PXSUM.
1294  * @mask: either NULL or a series with 0s for observations
1295  * to be excluded from the calculations, non-zero values
1296  * at other observations.
1297  *
1298  * Given the data in @x, constructs in @y a series containing
1299  * a panel-data statistic.
1300  *
1301  * Returns: 0 on success, non-zero on error.
1302  */
1303 
panel_statistic(const double * x,double * y,const DATASET * dset,int k,const double * mask)1304 int panel_statistic (const double *x, double *y, const DATASET *dset,
1305 		     int k, const double *mask)
1306 {
1307     int T, Ti;
1308     int i, s, t, u1, u2;
1309     int err = 0;
1310 
1311     if (!dataset_is_panel(dset)) {
1312 	return E_DATA;
1313     }
1314 
1315     T = dset->pd;
1316     u1 = dset->t1 / T;
1317     u2 = dset->t2 / T;
1318 
1319     if (k == F_PNOBS) {
1320 	for (i=u1; i<=u2; i++) {
1321 	    Ti = 0;
1322 	    for (t=0; t<T; t++) {
1323 		if (panel_obs_ok(x, i*T + t, mask)) {
1324 		    Ti++;
1325 		}
1326 	    }
1327 	    for (t=0; t<T; t++) {
1328 		y[i*T + t] = Ti;
1329 	    }
1330 	}
1331     } else if (k == F_PMIN || k == F_PMAX || k == F_PSUM) {
1332 	double yi;
1333 
1334 	for (i=u1; i<=u2; i++) {
1335 	    yi = NADBL;
1336 	    for (t=0; t<T; t++) {
1337 		s = i*T + t;
1338 		if (panel_obs_ok(x, s, mask)) {
1339 		    if (na(yi)) {
1340 			yi = x[s];
1341 		    } else if (k == F_PMIN && x[s] < yi) {
1342 			yi = x[s];
1343 		    } else if (k == F_PMAX && x[s] > yi) {
1344 			yi = x[s];
1345 		    } else if (k == F_PSUM) {
1346 			yi += x[s];
1347 		    }
1348 		}
1349 	    }
1350 	    for (t=0; t<T; t++) {
1351 		y[i*T + t] = yi;
1352 	    }
1353 	}
1354     } else if (k == F_PMEAN || k == F_PSD) {
1355 	/* first check for presence of time-variation */
1356 	double xref;
1357 	int TV = 0;
1358 
1359 	for (i=u1; i<=u2 && !TV; i++) {
1360 	    xref = NADBL;
1361 	    for (t=0; t<T && !TV; t++) {
1362 		s = i*T + t;
1363 		if (panel_obs_ok(x, s, mask)) {
1364 		    if (na(xref)) {
1365 			xref = x[s];
1366 		    } else if (x[s] != xref) {
1367 			TV = 1;
1368 		    }
1369 		}
1370 	    }
1371 	}
1372 #if 0
1373 	fprintf(stderr, "panel_statistic: time-varying = %d\n", TV);
1374 #endif
1375 	if (k == F_PMEAN || TV) {
1376 	    double xbar;
1377 
1378 	    for (i=u1; i<=u2; i++) {
1379 		xbar = NADBL;
1380 		Ti = 0;
1381 		for (t=0; t<T; t++) {
1382 		    s = i*T + t;
1383 		    if (panel_obs_ok(x, s, mask)) {
1384 			if (na(xbar) || !TV) {
1385 			    xbar = x[s];
1386 			} else {
1387 			    xbar += x[s];
1388 			}
1389 			Ti++;
1390 		    }
1391 		}
1392 		if (!na(xbar) && TV) {
1393 		    xbar /= Ti;
1394 		}
1395 		for (t=0; t<T; t++) {
1396 		    y[i*T + t] = xbar;
1397 		}
1398 	    }
1399 	}
1400 
1401 	if (k == F_PSD && !TV) {
1402 	    /* s.d. with no time variation! */
1403 	    double sd;
1404 
1405 	    for (i=u1; i<=u2; i++) {
1406 		Ti = 0;
1407 		for (t=0; t<T; t++) {
1408 		    if (panel_obs_ok(x, i*T + t, mask)) {
1409 			Ti++;
1410 			break;
1411 		    }
1412 		}
1413 		sd = Ti > 0 ? 0.0 : NADBL;
1414 		for (t=0; t<T; t++) {
1415 		    y[i*T + t] = sd;
1416 		}
1417 	    }
1418 	} else if (k == F_PSD) {
1419 	    /* the time-varying case: we use the mean
1420 	       calculated above */
1421 	    double sd, xbar, ssx, dev;
1422 
1423 	    for (i=u1; i<=u2; i++) {
1424 		ssx = NADBL;
1425 		xbar = y[i*T];
1426 		Ti = 0;
1427 		if (!na(xbar)) {
1428 		    for (t=0; t<T; t++) {
1429 			s = i*T + t;
1430 			if (panel_obs_ok(x, s, mask)) {
1431 			    dev = x[s] - xbar;
1432 			    if (na(ssx)) {
1433 				ssx = dev * dev;
1434 			    } else {
1435 				ssx += dev * dev;
1436 			    }
1437 			    Ti++;
1438 			}
1439 		    }
1440 		}
1441 		if (na(ssx)) {
1442 		    sd = NADBL;
1443 		} else if (Ti == 1) {
1444 		    sd = 0.0;
1445 		} else {
1446 		    sd = sqrt(ssx / (Ti-1));
1447 		}
1448 		for (t=0; t<T; t++) {
1449 		    y[i*T + t] = sd;
1450 		}
1451 	    }
1452 	}
1453     } else if (k == F_PXSUM) {
1454 	/* the sum of cross-sectional values for each period */
1455 	double yt;
1456 	int nt;
1457 
1458 	for (t=0; t<T; t++) {
1459 	    yt = 0.0;
1460 	    nt = 0;
1461 	    for (i=u1; i<=u2; i++) {
1462 		s = i*T + t;
1463 		if (panel_obs_masked(mask, s)) {
1464 		    continue;
1465 		} else if (na(x[s])) {
1466 #if PXSUM_SKIP_NA
1467 		    continue;
1468 #else
1469 		    yt = NADBL;
1470 		    break;
1471 #endif
1472 		} else {
1473 		    yt += x[s];
1474 		    nt++;
1475 		}
1476 	    }
1477 	    if (nt == 0) {
1478 		yt = NADBL;
1479 	    }
1480 	    for (i=u1; i<=u2; i++) {
1481 		y[i*T + t] = yt;
1482 	    }
1483 	}
1484     } else if (k == F_PXNOBS) {
1485 	/* number of valid x-sectional obs in each period */
1486 	int nt;
1487 
1488 	for (t=0; t<T; t++) {
1489 	    nt = 0;
1490 	    for (i=u1; i<=u2; i++) {
1491 		s = i*T + t;
1492 		if (panel_obs_masked(mask, s)) {
1493 		    continue;
1494 		} else if (!na(x[s])) {
1495 		    nt++;
1496 		}
1497 	    }
1498 	    for (i=u1; i<=u2; i++) {
1499 		y[i*T + t] = nt;
1500 	    }
1501 	}
1502     } else {
1503 	/* unsupported option */
1504 	err = E_DATA;
1505     }
1506 
1507     return err;
1508 }
1509 
1510 /**
1511  * panel_shrink:
1512  * @x: panel-data source series.
1513  * @noskip: keep NAs in output.
1514  * @dset: pointer to dataset.
1515  * @err: location to receive error code.
1516  *
1517  * By default, constructs a column vector holding the first
1518  * non-missing observation of @x for each panel unit within
1519  * the current sample range (hence skipping any units that
1520  * have no valid observations). However, if @noskip is non-zero,
1521  * the vector contains an NA for units with all missing
1522  * values.
1523  *
1524  * Returns: a new column vector, or NULL on error.
1525  */
1526 
panel_shrink(const double * x,int noskip,const DATASET * dset,int * err)1527 gretl_matrix *panel_shrink (const double *x, int noskip,
1528 			    const DATASET *dset, int *err)
1529 {
1530     gretl_matrix *m = NULL;
1531     int n, T;
1532 
1533     if (!dataset_is_panel(dset)) {
1534 	*err = E_DATA;
1535 	return NULL;
1536     }
1537 
1538     n = panel_sample_size(dset);
1539     T = dset->pd;
1540     m = gretl_column_vector_alloc(n);
1541 
1542     if (m == NULL) {
1543 	*err = E_ALLOC;
1544     } else {
1545 	int i1 = dset->t1 / T;
1546 	int i2 = dset->t2 / T;
1547 	int s = dset->t1;
1548 	int i, t, k = 0;
1549 	int gotit;
1550 
1551 	for (i=i1; i<=i2; i++) {
1552 	    /* loop across units */
1553 	    gotit = 0;
1554 	    for (t=0; t<T; t++) {
1555 		/* loop inside units */
1556 		if (!na(x[s+t])) {
1557 		    gotit = 1;
1558 		    m->val[k++] = x[s+t];
1559 		    break;
1560 		}
1561 	    }
1562 	    if (!gotit && noskip) {
1563 		m->val[k++] = NADBL;
1564 	    }
1565 	    /* skip to the next unit */
1566 	    s += T;
1567 	}
1568 
1569 	if (k < n) {
1570 	    m->rows = k;
1571 	}
1572     }
1573 
1574     return m;
1575 }
1576 
1577 /**
1578  * panel_expand:
1579  * @x: source vector.
1580  * @y target array.
1581  * @dset: pointer to dataset.
1582  * @opt: OPT_X to repeat across individuals instead
1583  * of across time.
1584  *
1585  * Constructs a series by repeating the values in @x,
1586  * by default across time (in which case the source
1587  * vector must have as many values as there are
1588  * individuals in the current sample range).
1589  *
1590  * Returns: zero on success, non-zero code on error.
1591  */
1592 
panel_expand(const gretl_matrix * x,double * y,gretlopt opt,const DATASET * dset)1593 int panel_expand (const gretl_matrix *x, double *y,
1594 		  gretlopt opt, const DATASET *dset)
1595 {
1596     int n, T = sample_size(dset);
1597 
1598     if (x == NULL || !dataset_is_panel(dset)) {
1599 	return E_DATA;
1600     }
1601 
1602     n = (int) ceil((double) T / dset->pd);
1603 
1604     if (gretl_vector_get_length(x) != n) {
1605 	return E_NONCONF;
1606     } else {
1607 	int ubak = -1;
1608 	int t, u, i = 0;
1609 	double xi = 0;
1610 
1611 	for (t=dset->t1; t<=dset->t2; t++) {
1612 	    u = t / dset->pd;
1613 	    if (u != ubak) {
1614 		xi = x->val[i++];
1615 		ubak = u;
1616 	    }
1617 	    if (na(xi)) {
1618 		y[t] = NADBL;
1619 	    } else {
1620 		y[t] = xi;
1621 	    }
1622 
1623 	}
1624     }
1625 
1626     return 0;
1627 }
1628 
default_hp_lambda(const DATASET * dset)1629 static double default_hp_lambda (const DATASET *dset)
1630 {
1631     return 100 * dset->pd * dset->pd;
1632 }
1633 
1634 /**
1635  * hp_filter:
1636  * @x: array of original data.
1637  * @hp: array in which filtered series is computed.
1638  * @dset: pointer to dataset.
1639  * @lambda: smoothing parameter (or #NADBL to use the default
1640  * value).
1641  * @opt: if %OPT_T, return the trend rather than the cycle.
1642  *
1643  * Calculates the "cycle" component of the time series in
1644  * array @x, using the Hodrick-Prescott filter.  Adapted from the
1645  * original FORTRAN code by E. Prescott.
1646  *
1647  * Returns: 0 on success, non-zero error code on failure.
1648  */
1649 
hp_filter(const double * x,double * hp,const DATASET * dset,double lambda,gretlopt opt)1650 int hp_filter (const double *x, double *hp, const DATASET *dset,
1651 	       double lambda, gretlopt opt)
1652 {
1653     int t1 = dset->t1, t2 = dset->t2;
1654     double v00 = 1.0, v11 = 1.0, v01 = 0.0;
1655     double det, tmp0, tmp1;
1656     double e0, e1, b00, b01, b11;
1657     double **V = NULL;
1658     double m[2], tmp[2];
1659     int i, t, T, tb;
1660     int err = 0;
1661 
1662     for (t=t1; t<=t2; t++) {
1663 	hp[t] = NADBL;
1664     }
1665 
1666     err = series_adjust_sample(x, &t1, &t2);
1667     if (err) {
1668 	goto bailout;
1669     }
1670 
1671     T = t2 - t1 + 1;
1672     if (T < 4) {
1673 	err = E_TOOFEW;
1674 	goto bailout;
1675     }
1676 
1677     if (na(lambda)) {
1678 	lambda = default_hp_lambda(dset);
1679     }
1680 
1681     V = doubles_array_new(4, T);
1682     if (V == NULL) {
1683 	return E_ALLOC;
1684     }
1685 
1686     /* adjust starting points */
1687     x += t1;
1688     hp += t1;
1689 
1690     /* covariance matrices for each obs */
1691 
1692     for (t=2; t<T; t++) {
1693 	tmp0 = v00;
1694 	tmp1 = v01;
1695 	v00 = 1.0 / lambda + 4.0 * (tmp0 - tmp1) + v11;
1696 	v01 = 2.0 * tmp0 - tmp1;
1697 	v11 = tmp0;
1698 
1699 	det = v00 * v11 - v01 * v01;
1700 
1701 	V[0][t] =  v11 / det;
1702 	V[1][t] = -v01 / det;
1703 	V[2][t] =  v00 / det;
1704 
1705 	tmp0 = v00 + 1.0;
1706 	tmp1 = v00;
1707 
1708 	v00 -= v00 * v00 / tmp0;
1709 	v11 -= v01 * v01 / tmp0;
1710 	v01 -= (tmp1 / tmp0) * v01;
1711     }
1712 
1713     m[0] = x[0];
1714     m[1] = x[1];
1715 
1716     /* forward pass */
1717     for (t=2; t<T; t++) {
1718 	tmp[0] = m[1];
1719 	m[1] = 2.0 * m[1] - m[0];
1720 	m[0] = tmp[0];
1721 
1722 	V[3][t-1] = V[0][t] * m[1] + V[1][t] * m[0];
1723 	hp[t-1]   = V[1][t] * m[1] + V[2][t] * m[0];
1724 
1725 	det = V[0][t] * V[2][t] - V[1][t] * V[1][t];
1726 
1727 	v00 =  V[2][t] / det;
1728 	v01 = -V[1][t] / det;
1729 
1730 	tmp[1] = (x[t] - m[1]) / (v00 + 1.0);
1731 	m[1] += v00 * tmp[1];
1732 	m[0] += v01 * tmp[1];
1733     }
1734 
1735     V[3][T-2] = m[0];
1736     V[3][T-1] = m[1];
1737     m[0] = x[T-2];
1738     m[1] = x[T-1];
1739 
1740     /* backward pass */
1741     for (t=T-3; t>=0; t--) {
1742 	t1 = t+1;
1743 	tb = T - t - 1;
1744 
1745 	tmp[0] = m[0];
1746 	m[0] = 2.0 * m[0] - m[1];
1747 	m[1] = tmp[0];
1748 
1749 	if (t > 1) {
1750 	    /* combine info for y < i with info for y > i */
1751 	    e0 = V[2][tb] * m[1] + V[1][tb] * m[0] + V[3][t];
1752 	    e1 = V[1][tb] * m[1] + V[0][tb] * m[0] + hp[t];
1753 	    b00 = V[2][tb] + V[0][t1];
1754 	    b01 = V[1][tb] + V[1][t1];
1755 	    b11 = V[0][tb] + V[2][t1];
1756 
1757 	    det = b00 * b11 - b01 * b01;
1758 
1759 	    V[3][t] = (b00 * e1 - b01 * e0) / det;
1760 	}
1761 
1762 	det = V[0][tb] * V[2][tb] - V[1][tb] * V[1][tb];
1763 	v00 =  V[2][tb] / det;
1764 	v01 = -V[1][tb] / det;
1765 
1766 	tmp[1] = (x[t] - m[0]) / (v00 + 1.0);
1767 	m[1] += v01 * tmp[1];
1768 	m[0] += v00 * tmp[1];
1769     }
1770 
1771     V[3][0] = m[0];
1772     V[3][1] = m[1];
1773 
1774     if (opt & OPT_T) {
1775 	for (t=0; t<T; t++) {
1776 	    hp[t] = V[3][t];
1777 	}
1778     } else {
1779 	for (t=0; t<T; t++) {
1780 	    hp[t] = x[t] - V[3][t];
1781 	}
1782     }
1783 
1784  bailout:
1785 
1786     if (V != NULL) {
1787 	for (i=0; i<4; i++) {
1788 	    free(V[i]);
1789 	}
1790 	free(V);
1791     }
1792 
1793     return err;
1794 }
1795 
1796 /**
1797  * oshp_filter:
1798  * @x: array of original data.
1799  * @hp: array in which filtered series is computed.
1800  * @dset: pointer to dataset.
1801  * @lambda: smoothing parameter (or #NADBL to use the default
1802  * value).
1803  * @opt: if %OPT_T, return the trend rather than the cycle.
1804  *
1805  * Calculates the "cycle" component of the time series in
1806  * array @x, using the a one-sided Hodrick-Prescott filter.
1807  * The implementation uses the Kalman filter.
1808  *
1809  * Returns: 0 on success, non-zero error code on failure.
1810  */
1811 
oshp_filter(const double * x,double * hp,const DATASET * dset,double lambda,gretlopt opt)1812 int oshp_filter (const double *x, double *hp, const DATASET *dset,
1813 		 double lambda, gretlopt opt)
1814 {
1815     int t1 = dset->t1, t2 = dset->t2;
1816     gretl_matrix *M[4] = {NULL};
1817     gretl_matrix *Lambda = NULL;
1818     gretl_matrix *a0 = NULL;
1819     gretl_matrix *mu = NULL;
1820     gretl_bundle *b = NULL;
1821     int copy[4] = {0};
1822     int T, t, err, kerr;
1823     double mt, sqrt_lam;
1824 
1825     for (t=t1; t<=t2; t++) {
1826 	hp[t] = NADBL;
1827     }
1828 
1829     err = series_adjust_sample(x, &t1, &t2);
1830     if (err) {
1831 	return err;
1832     }
1833 
1834     T = t2 - t1 + 1;
1835     if (T < 4) {
1836 	return E_TOOFEW;
1837     }
1838 
1839     if (na(lambda)) {
1840 	lambda = default_hp_lambda(dset);
1841     }
1842     sqrt_lam = sqrt(lambda);
1843 
1844     /* adjust starting points */
1845     x += t1;
1846     hp += t1;
1847 
1848     /* begin Kalman filter setup */
1849 
1850     /* obsy */
1851     M[0] = gretl_matrix_alloc(T+1, 1);
1852     for (t=0; t<T; t++) {
1853 	gretl_matrix_set(M[0], t, 0, x[t]);
1854     }
1855     /* add a dummy trailing observation */
1856     gretl_matrix_set(M[0], T, 0, 0.0);
1857 
1858     /* obsymat */
1859     M[1] = gretl_zero_matrix_new(2, 1);
1860     gretl_matrix_set(M[1], 0, 0, 1);
1861 
1862     /* statemat */
1863     M[2] = gretl_zero_matrix_new(2, 2);
1864     gretl_matrix_set(M[2], 0, 0, 2);
1865     gretl_matrix_set(M[2], 0, 1, -1);
1866     gretl_matrix_set(M[2], 1, 0, 1);
1867 
1868     /* statevar */
1869     M[3] = gretl_zero_matrix_new(2, 2);
1870     gretl_matrix_set(M[3], 0, 0, 1/sqrt_lam);
1871 
1872     b = kalman_bundle_new(M, copy, 4, &err);
1873     if (err) {
1874 	goto bailout;
1875     }
1876 
1877     /* obsvar */
1878     Lambda = gretl_matrix_from_scalar(sqrt_lam);
1879     err = gretl_bundle_donate_data(b, "obsvar", Lambda,
1880 				   GRETL_TYPE_MATRIX, 0);
1881 
1882     /* diffuse prior */
1883     err = gretl_bundle_set_scalar(b, "diffuse", 1);
1884 
1885     if (err) {
1886 	goto bailout;
1887     }
1888 
1889     /* inistate */
1890     a0 = gretl_matrix_alloc(2, 1);
1891     gretl_matrix_set(a0, 0, 0, 2*x[0] - x[1]);
1892     gretl_matrix_set(a0, 1, 0, 3*x[0] - 2*x[1]);
1893     err = gretl_bundle_donate_data(b, "inistate", a0,
1894 				   GRETL_TYPE_MATRIX, 0);
1895     if (err) {
1896 	goto bailout;
1897     }
1898 
1899 #if DEBUG
1900     gretl_matrix_print(M[0], "obsy");
1901     gretl_matrix_print(M[1], "obsymat");
1902     gretl_matrix_print(M[2], "statemat");
1903     gretl_matrix_print(M[3], "statevar");
1904     gretl_matrix_print(a0, "inistate");
1905 #endif
1906 
1907     kerr = kalman_bundle_run(b, NULL, &err);
1908     if (err || kerr) {
1909 	goto bailout;
1910     }
1911 
1912     mu = gretl_bundle_get_matrix(b, "state", &err);
1913     if (err) {
1914 	goto bailout;
1915     }
1916 
1917 #if DEBUG
1918     printf("kerr = %d, err = %d\n", kerr, err);
1919     gretl_matrix_print(mu, "state");
1920 #endif
1921 
1922     /* take the "lead" of the second element of the
1923        state estimate */
1924     if (opt & OPT_T) {
1925 	for (t=0; t<T; t++) {
1926 	    mt = gretl_matrix_get(mu, t+1, 1);
1927 	    hp[t] = mt;
1928 	}
1929     } else {
1930 	for (t=0; t<T; t++) {
1931 	    mt = gretl_matrix_get(mu, t+1, 1);
1932 	    hp[t] = x[t] - mt;
1933 	}
1934     }
1935 
1936  bailout:
1937 
1938     /* since all the matrices we've allocated above belong
1939        to the Kalman bundle, the following will suffice to
1940        free everything on exit
1941     */
1942     gretl_bundle_destroy(b);
1943 
1944     return err;
1945 }
1946 
1947 /**
1948  * bkbp_filter:
1949  * @x: array of original data.
1950  * @bk: array into which to write the filtered series.
1951  * @dset: data set information.
1952  * @bkl: lower frequency bound (or 0 for automatic).
1953  * @bku: upper frequency bound (or 0 for automatic).
1954  * @k: approximation order (or 0 for automatic).
1955  *
1956  * Calculates the Baxter and King bandpass filter. If bku exceeds the
1957  * number of available observations, then the "low-pass" version will
1958  * be computed (weights sum to 1). Otherwise, the ordinary bandpass
1959  * filter will be applied (weights sum to 0).
1960  *
1961  * Returns: 0 on success, non-zero error code on failure.
1962  */
1963 
bkbp_filter(const double * x,double * bk,const DATASET * dset,int bkl,int bku,int k)1964 int bkbp_filter (const double *x, double *bk, const DATASET *dset,
1965 		 int bkl, int bku, int k)
1966 {
1967     int t1 = dset->t1, t2 = dset->t2;
1968     double omubar, omlbar;
1969     double avg_a;
1970     double *a;
1971     int i, t, err = 0;
1972     int lowpass = 0;
1973 
1974     if (bkl <= 0 || bku <= 0) {
1975 	/* get user settings if available (or the defaults) */
1976 	get_bkbp_periods(dset, &bkl, &bku);
1977     }
1978 
1979     if (k <= 0) {
1980 	k = get_bkbp_k(dset);
1981     }
1982 
1983 #if BK_DEBUG
1984     fprintf(stderr, "lower limit = %d, upper limit = %d, \n",
1985 	    bkl, bku);
1986 #endif
1987 
1988     if (bkl >= bku) {
1989 	gretl_errmsg_set("Error in Baxter-King frequencies");
1990 	return 1;
1991     }
1992 
1993     err = series_adjust_sample(x, &t1, &t2);
1994     if (err) {
1995 	return err;
1996     }
1997 
1998     if (2 * k >= t2 - t1 + 1) {
1999 	gretl_errmsg_set("Insufficient observations");
2000 	return E_DATA;
2001     }
2002 
2003     if (bku >= t2 - t1 + 1) {
2004 	lowpass = 1;
2005     }
2006 
2007     a = malloc((k + 1) * sizeof *a);
2008     if (a == NULL) {
2009 	return E_ALLOC;
2010     }
2011 
2012     omubar = M_2PI / bkl;
2013     omlbar = lowpass? 0 : M_2PI / bku;
2014 
2015     /* first we compute the coefficients */
2016 
2017     avg_a = a[0] = (omubar - omlbar) / M_PI;
2018 
2019     if (lowpass) {
2020 	avg_a -= 1.0;
2021     }
2022 
2023     for (i=1; i<=k; i++) {
2024 	a[i] = (sin(i * omubar) - sin(i * omlbar)) / (i * M_PI);
2025 	avg_a += 2 * a[i];
2026     }
2027 
2028     avg_a /= (2 * k + 1);
2029 
2030     for (i=0; i<=k; i++) {
2031 	a[i] -= avg_a;
2032 #if BK_DEBUG
2033 	fprintf(stderr, "a[%d] = %#9.6g\n", i, a[i]);
2034 #endif
2035     }
2036 
2037     /* now we filter the series, skipping the first
2038        and last k observations */
2039 
2040     for (t=0; t<dset->n; t++) {
2041 	if (t < t1 + k || t > t2 - k) {
2042 	    bk[t] = NADBL;
2043 	} else {
2044 	    bk[t] = a[0] * x[t];
2045 	    for (i=1; i<=k; i++) {
2046 		bk[t] += a[i] * (x[t-i] + x[t+i]);
2047 	    }
2048 	}
2049     }
2050 
2051     free(a);
2052 
2053     return err;
2054 }
2055 
2056 /* following: material relating to the Butterworth filter */
2057 
2058 #define Min(x,y) (((x) > (y))? (y) : (x))
2059 #define NEAR_ZERO 1e-35
2060 
safe_pow(double x,int n)2061 static double safe_pow (double x, int n)
2062 {
2063     double lp;
2064 
2065     x = fabs(x);
2066 
2067     if (x < NEAR_ZERO) {
2068 	return 0.0;
2069     } else {
2070 	lp = n * log(x);
2071 	if (lp < -80) {
2072 	    return 0.0;
2073 	} else if (lp > 80) {
2074 	    return exp(80.0);
2075 	} else {
2076 	    return exp(lp);
2077 	}
2078     }
2079 }
2080 
cotan(double theta)2081 static double cotan (double theta)
2082 {
2083     double s = sin(theta);
2084 
2085     if (fabs(s) < NEAR_ZERO) {
2086 	s = copysign(NEAR_ZERO, s);
2087     }
2088 
2089     return cos(theta) / s;
2090 }
2091 
2092 /**
2093  * hp_gain:
2094  * @lambda: H-P parameter.
2095  * @hipass: 1 for high-pass filter, 0 for low-pass.
2096  *
2097  * Returns: a matrix holding omega values from 0 to \pi in
2098  * column 0, and the corresponding filter gain in column 1.
2099  */
2100 
hp_gain(double lambda,int hipass)2101 gretl_matrix *hp_gain (double lambda, int hipass)
2102 {
2103     gretl_matrix *G;
2104     int i, width = 180;
2105     double inc = M_PI / width;
2106     double x, gain, omega = 0.0;
2107 
2108     G = gretl_matrix_alloc(width + 1, 2);
2109     if (G == NULL) {
2110 	return NULL;
2111     }
2112 
2113     for (i=0; i<=width; i++) {
2114 	x = 2 * sin(omega / 2);
2115 	gain = 1 / (1 + lambda * pow(x, 4));
2116 	if (hipass) {
2117 	    gain = 1.0 - gain;
2118 	}
2119 	gretl_matrix_set(G, i, 0, omega);
2120 	gretl_matrix_set(G, i, 1, gain);
2121 	omega += inc;
2122     }
2123 
2124     return G;
2125 }
2126 
2127 /**
2128  * butterworth_gain:
2129  * @n: order of the filter.
2130  * @cutoff: angular cutoff in radians.
2131  * @hipass: 1 for high-pass filter, 0 for low-pass.
2132  *
2133  * Returns: a matrix holding omega values from 0 to \pi in
2134  * column 0, and the corresponding filter gain in column 1.
2135  */
2136 
butterworth_gain(int n,double cutoff,int hipass)2137 gretl_matrix *butterworth_gain (int n, double cutoff, int hipass)
2138 {
2139     gretl_matrix *G;
2140     int i, width = 180;
2141     double inc = M_PI / width;
2142     double x, gain, omega = 0.0;
2143 
2144     G = gretl_matrix_alloc(width + 1, 2);
2145     if (G == NULL) {
2146 	return NULL;
2147     }
2148 
2149     for (i=0; i<=width; i++) {
2150 	if (hipass) {
2151 	    x = cotan(omega / 2) * cotan((M_PI - cutoff) / 2);
2152 	} else {
2153 	    x = tan(omega / 2) * cotan(cutoff / 2);
2154 	}
2155 	gain = 1 / (1 + pow(x, 2 * n));
2156 	gretl_matrix_set(G, i, 0, omega);
2157 	gretl_matrix_set(G, i, 1, gain);
2158 	omega += inc;
2159     }
2160 
2161     return G;
2162 }
2163 
2164 /* Toeplitz methods:
2165 
2166    1 = Stephen Pollock's symmetric Toeplitz solver
2167    2 = gretl's netlib-based general Toeplitz solver
2168    3 = build full Toeplitz matrix and apply SVD
2169 
2170 */
2171 
2172 #define TOEPLITZ_METHOD 1
2173 
2174 #if (TOEPLITZ_METHOD == 3)
2175 
2176 /* form the complete Toeplitz matrix represented in compressed
2177    form by the coefficients in g
2178 */
2179 
toeplize(const double * g,int T,int k)2180 static gretl_matrix *toeplize (const double *g, int T, int k)
2181 {
2182     gretl_matrix *X;
2183     double x;
2184     int i, j;
2185 
2186     X = gretl_zero_matrix_new(T, T);
2187     if (X == NULL) {
2188 	return NULL;
2189     }
2190 
2191     for (i=0; i<T; i++) {
2192         gretl_matrix_set(X, i, i, g[0]);
2193     }
2194 
2195     for (i=1; i<k; i++) {
2196 	x = g[i];
2197 	for (j=i; j<T; j++) {
2198 	    gretl_matrix_set(X, j, j-i, x);
2199 	    gretl_matrix_set(X, j-i, j, x);
2200 	}
2201     }
2202 
2203     return X;
2204 }
2205 
2206 #include "matrix_extra.h"
2207 
toeplitz_solve(const double * g,double * dy,int T,int q)2208 static int toeplitz_solve (const double *g, double *dy, int T, int q)
2209 {
2210     gretl_matrix *y, *X;
2211     int err = 0;
2212 
2213     fprintf(stderr, "svd_toeplitz_solve...\n");
2214 
2215     X = toeplize(g, T, q + 1);
2216     if (X == NULL) {
2217 	return E_ALLOC;
2218     }
2219 
2220     err = gretl_SVD_invert_matrix(X);
2221 
2222     if (!err) {
2223 	y = gretl_vector_from_array(dy, T, GRETL_MOD_NONE);
2224 	if (y == NULL) {
2225 	    err = E_ALLOC;
2226 	} else {
2227 	    gretl_matrix s;
2228 
2229 	    s.t1 = s.t2 = 0;
2230 	    s.rows = T;
2231 	    s.cols = 1;
2232 	    s.val = dy;
2233 
2234 	    err = gretl_matrix_multiply(X, y, &s);
2235 	    gretl_matrix_free(y);
2236 	}
2237     }
2238 
2239     gretl_matrix_free(X);
2240 
2241     return err;
2242 }
2243 
2244 #elif (TOEPLITZ_METHOD == 2)
2245 
toeplitz_solve(double * g,double * y,int T,int q)2246 static int toeplitz_solve (double *g, double *y, int T, int q)
2247 {
2248     gretl_vector *mg = NULL;
2249     gretl_vector *my = NULL;
2250     gretl_vector *mx = NULL;
2251     int i, err = 0;
2252 
2253     fprintf(stderr, "netlib toeplitz solve...\n");
2254 
2255     mg = gretl_vector_alloc(T);
2256     my = gretl_vector_alloc(T);
2257 
2258     if (mg == NULL || my == NULL) {
2259 	return E_ALLOC;
2260     }
2261 
2262     for (i=0; i<T; i++) {
2263 	mg->val[i] = (i <= q) ? g[i] : 0;
2264 	my->val[i] = y[i];
2265     }
2266 
2267     mx = gretl_toeplitz_solve(mg, mg, my, &err);
2268 
2269     if (err) {
2270 	fprintf(stderr, "symm_toeplitz: err = %d\n", err);
2271 	for (i=0; i<T; i++) {
2272 	    y[i] = NADBL;
2273 	}
2274     } else {
2275 	for (i=0; i<T; i++) {
2276 	    y[i] = mx->val[i];
2277 	}
2278     }
2279 
2280     gretl_vector_free(mg);
2281     gretl_vector_free(my);
2282     gretl_vector_free(mx);
2283 
2284     return err;
2285 }
2286 
2287 #else /* TOEPLITZ_METHOD == 1 */
2288 
2289 /* This function uses a Cholesky decomposition to find the solution of
2290    the equation Gx = y, where G is a symmetric Toeplitz matrix of order
2291    T with q supra-diagonal and q sub-diagonal bands. The coefficients of
2292    G are contained in g. The RHS vector y contains the elements of
2293    the solution vector x on completion.
2294 */
2295 
toeplitz_solve(double * g,double * y,int T,int q)2296 static int toeplitz_solve (double *g, double *y, int T, int q)
2297 {
2298     double **mu;
2299     int t, j, k, jmax;
2300 
2301     mu = doubles_array_new(q+1, T);
2302     if (mu == NULL) {
2303 	return E_ALLOC;
2304     }
2305 
2306     /* initialize */
2307     for (t=0; t<q; t++) {
2308 	for (j=t+1; j<=q; j++) {
2309 	    mu[j][t] = 0.0;
2310 	}
2311     }
2312 
2313     /* factorize */
2314     for (t=0; t<T; t++) {
2315 	for (k=Min(q, t); k>=0; k--) {
2316 	    mu[k][t] = g[k];
2317 	    jmax = q - k;
2318 	    for (j=1; j<=jmax && t-k-j >= 0; j++) {
2319 		mu[k][t] -= mu[j][t-k] * mu[j+k][t] * mu[0][t-k-j];
2320 	    }
2321 	    if (k > 0) {
2322 		mu[k][t] /= mu[0][t-k];
2323 	    }
2324 	}
2325     }
2326 
2327     /* forward solve */
2328     for (t=0; t<T; t++) {
2329 	jmax = Min(t, q);
2330 	for (j=1; j<=jmax; j++) {
2331 	    y[t] -= mu[j][t] * y[t-j];
2332 	}
2333     }
2334 
2335     /* divide by the diagonal */
2336     for (t=0; t<T; t++) {
2337 	y[t] /= mu[0][t];
2338     }
2339 
2340     /* backsolve */
2341     for (t=T-1; t>=0; t--) {
2342 	jmax = Min(q, T - 1 - t);
2343 	for (j=1; j<=jmax; j++) {
2344 	    y[t] -= mu[j][t+j] * y[t+j];
2345 	}
2346     }
2347 
2348     doubles_array_free(mu, q+1);
2349 
2350     return 0;
2351 }
2352 
2353 #endif /* alternate TOEPLITZ_METHODs */
2354 
2355 
2356 /* Premultiply vector y by a symmetric banded Toeplitz matrix
2357    Gamma with n nonzero sub-diagonal bands and n nonzero
2358    supra-diagonal bands. The elements of Gamma are contained
2359    in g; tmp is used as workspace.
2360 */
2361 
GammaY(double * g,double * y,double * tmp,int T,int n)2362 static int GammaY (double *g, double *y, double *tmp,
2363 		   int T, int n)
2364 {
2365     double lx, rx;
2366     int t, j;
2367 
2368     for (j=0; j<=n; j++) {
2369 	tmp[j] = 0.0;
2370     }
2371 
2372     for (t=0; t<T; t++) {
2373 	for (j=n; j>0; j--) {
2374 	    tmp[j] = tmp[j-1];
2375 	}
2376 	tmp[0] = g[0] * y[t];
2377 	for (j=1; j<=n; j++) {
2378 	    lx = (t - j < 0) ? 0 : y[t-j];
2379 	    rx = (t + j >= T) ? 0 : y[t+j];
2380 	    tmp[0] += g[j] * (lx + rx);
2381 	}
2382 	if (t >= n) {
2383 	    y[t-n] = tmp[n];
2384 	}
2385     }
2386 
2387     for (j=0; j<n; j++) {
2388 	y[T-j-1] = tmp[j];
2389     }
2390 
2391     return 0;
2392 }
2393 
2394 #undef Min
2395 #undef NEAR_ZERO
2396 
2397 /* Form the autocovariances of an MA process of order q. */
2398 
form_gamma(double * g,double * mu,int q)2399 static void form_gamma (double *g, double *mu, int q)
2400 {
2401     int j, k;
2402 
2403     for (j=0; j<=q; j++) {
2404 	g[j] = 0.0;
2405 	for (k=0; k<=q-j; k++) {
2406 	    g[j] += mu[k] * mu[k+j];
2407 	}
2408     }
2409 }
2410 
2411 /* Find the coefficients of the nth power of the summation
2412    operator (if sign = 1) or of the difference operator (if
2413    sign = -1).
2414 */
2415 
sum_or_diff(double * theta,int n,int sign)2416 static void sum_or_diff (double *theta, int n, int sign)
2417 {
2418     int j, q;
2419 
2420     theta[0] = 1.0;
2421 
2422     for (q=1; q<=n; q++) {
2423 	theta[q] = 0.0;
2424 	for (j=q; j>0; j--) {
2425 	    theta[j] += sign * theta[j-1];
2426 	}
2427     }
2428 }
2429 
2430 /* g is the target, mu is used as workspace for the
2431    summation coefficients */
2432 
form_mvec(double * g,double * mu,int n)2433 static void form_mvec (double *g, double *mu, int n)
2434 {
2435     sum_or_diff(mu, n, 1);
2436     form_gamma(g, mu, n);
2437 }
2438 
2439 /* g is the target, mu is used as workspace for the
2440    differencing coefficients
2441    svec = Q'SQ where Q is the 2nd-diff operator
2442 */
2443 
form_svec(double * g,double * mu,int n)2444 static void form_svec (double *g, double *mu, int n)
2445 {
2446     sum_or_diff(mu, n, -1);
2447     form_gamma(g, mu, n);
2448 }
2449 
2450 /* g is the target, mu and tmp are used as workspace */
2451 
form_wvec(double * g,double * mu,double * tmp,int n,double lam1,double lam2)2452 static void form_wvec (double *g, double *mu, double *tmp,
2453 		       int n, double lam1, double lam2)
2454 {
2455     int i;
2456 
2457     form_svec(tmp, mu, n);
2458     for (i=0; i<=n; i++) {
2459 	g[i] = lam1 * tmp[i];
2460     }
2461 
2462     form_mvec(tmp, mu, n);
2463     for (i=0; i<=n; i++) {
2464 	g[i] += lam2 * tmp[i];
2465     }
2466 }
2467 
2468 /* Find the second differences of the elements of a vector.
2469    Notice that the first two elements of the vector are lost
2470    in the process. However, we index the leading element of
2471    the differenced vector by t = 0.
2472 */
2473 
QprimeY(double * y,int T)2474 static void QprimeY (double *y, int T)
2475 {
2476     int t;
2477 
2478     for (t=0; t<T-2; t++) {
2479 	y[t] += y[t+2] - 2 * y[t+1];
2480     }
2481 }
2482 
2483 /* Multiply the vector y by matrix Q of order T x T-2,
2484    where Q' is the matrix which finds the second
2485    differences of a vector.
2486 */
2487 
form_Qy(double * y,int T)2488 static void form_Qy (double *y, int T)
2489 {
2490     double tmp, lag1 = 0.0, lag2 = 0.0;
2491     int t;
2492 
2493     for (t=0; t<T-2; t++) {
2494 	tmp = y[t];
2495 	y[t] += lag2 - 2 * lag1;
2496 	lag2 = lag1;
2497 	lag1 = tmp;
2498     }
2499 
2500     y[T-2] = lag2 - 2 * lag1;
2501     y[T-1] = lag1;
2502 }
2503 
2504 #if HAVE_GMP
2505 
mp_butterworth(const double * x,double * bw,int T,int order,double cutoff)2506 static int mp_butterworth (const double *x, double *bw, int T,
2507 			   int order, double cutoff)
2508 {
2509     int (*mpfun) (const double *, double *, int, int, double);
2510 
2511     mpfun = get_plugin_function("mp_bw_filter");
2512 
2513     if (mpfun == NULL) {
2514 	return E_FOPEN;
2515     }
2516 
2517     return (*mpfun) (x, bw, T, order, cutoff);
2518 }
2519 
2520 #endif
2521 
2522 #if 0
2523 
2524 /* note: cut should be in degrees */
2525 
2526 static int bw_numeric_check (double cut, int n)
2527 {
2528     const double b0 = -8.56644923648263;
2529     const double b1 = -0.66992007228155;
2530     const double b2 =  2.82851981714539;
2531     double x;
2532 
2533     /* let's be conservative here */
2534     x = b0 + b1 * (cut - 1.0) + b2 * (n + 1);
2535     x = 1/(1+exp(-x));
2536 
2537     return (x > 0.479717591568)
2538 }
2539 
2540 #endif
2541 
2542 /* maximum modulus among the poles of the filter:
2543    too close to 1.0 is a problem
2544 */
2545 
bw_max_mod(double cut,int n)2546 static double bw_max_mod (double cut, int n)
2547 {
2548     double tc = tan(cut / 2);
2549     double theta = M_PI / (2 * n);
2550     double delta = 1 + tc * (2 * sin(theta) + tc);
2551     double x = (1 - tc * tc) / delta;
2552     double y = 2 * tc * cos(theta) / delta;
2553 
2554     return sqrt(x * x + y * y);
2555 }
2556 
2557 #define MAX_LAM 10000
2558 
2559 /* Calculate the Butterworth lambda based on cutoff and
2560    order. Return non-zero if it seems that lambda is too
2561    extreme. FIXME: conditioning on lambda alone is not an
2562    adequate approach.
2563 */
2564 
2565 static int
set_bw_lambda(double cutoff,int n,double * lam1,double * lam2)2566 set_bw_lambda (double cutoff, int n, double *lam1, double *lam2)
2567 {
2568     int ret = 0;
2569 
2570     *lam1 = 1 / tan(cutoff / 2);
2571     *lam1 = safe_pow(*lam1, n * 2);
2572 
2573     fprintf(stderr, "for cutoff %g, order %d: lambda=%g, maxmod=%g\n",
2574 	    cutoff, n, *lam1, bw_max_mod(cutoff, n));
2575 
2576     if (*lam1 > 1.0e18) {
2577 	/* can't cope, even with multiple precision? */
2578 	ret = 2;
2579     } else if (*lam1 > 1.0e6) {
2580 	/* may be OK with multiple precision? */
2581 	ret = 1;
2582     } else {
2583 	/* OK with regular double-precision? */
2584 	if (*lam1 > MAX_LAM) {
2585 	    /* note: AC thinks the following doesn't help */
2586 	    *lam1 = sqrt(*lam1);
2587 	    *lam2 = 1 / *lam1;
2588 	}
2589     }
2590 
2591     return ret;
2592 }
2593 
2594 /**
2595  * butterworth_filter:
2596  * @x: array of original data.
2597  * @bw: array into which to write the filtered series.
2598  * @dset: data set information.
2599  * @n: desired lag order.
2600  * @cutoff: desired angular cutoff in degrees (0, 180).
2601  *
2602  * Calculates the Butterworth filter. The code that does this
2603  * is based on D.S.G. Pollock's IDEOLOG -- see
2604  * http://www.le.ac.uk/users/dsgp1/
2605  *
2606  * Returns: 0 on success, non-zero error code on failure.
2607  */
2608 
butterworth_filter(const double * x,double * bw,const DATASET * dset,int n,double cutoff)2609 int butterworth_filter (const double *x, double *bw, const DATASET *dset,
2610 			int n, double cutoff)
2611 {
2612     double *g, *ds, *tmp, *y;
2613     double lam1, lam2 = 1.0;
2614     int t1 = dset->t1, t2 = dset->t2;
2615     int T, t, m;
2616     int bad_lambda = 0;
2617     int err = 0;
2618 
2619     err = series_adjust_sample(x, &t1, &t2);
2620     if (err) {
2621 	return err;
2622     }
2623 
2624     if (2 * n >= t2 - t1) {
2625 	gretl_errmsg_set("Insufficient observations");
2626 	return E_DATA;
2627     }
2628 
2629     if (cutoff <= 0.0 || cutoff >= 180.0) {
2630 	return E_INVARG;
2631     }
2632 
2633     T = t2 - t1 + 1;
2634     m = 3 * (n+1);
2635 
2636     /* sample offset */
2637     y = bw + t1;
2638     x = x + t1;
2639 
2640     /* the cutoff is expressed in radians internally */
2641     cutoff *= M_PI / 180.0;
2642 
2643     bad_lambda = set_bw_lambda(cutoff, n, &lam1, &lam2);
2644 
2645 #if HAVE_GMP
2646     if (bad_lambda > 1) {
2647 	gretl_errmsg_set("Butterworth: infeasible lambda value");
2648 	return E_DATA;
2649     } else if (bad_lambda) {
2650 	return mp_butterworth(x, y, T, n, cutoff);
2651     }
2652 #else
2653     if (bad_lambda) {
2654 	gretl_errmsg_set("Butterworth: infeasible lambda value");
2655 	return E_DATA;
2656     }
2657 #endif
2658 
2659     /* the workspace we need for everything except the
2660        Toeplitz solver */
2661     g = malloc(m * sizeof *g);
2662     if (g == NULL) {
2663 	return E_ALLOC;
2664     }
2665 
2666     ds = g + n + 1;
2667     tmp = ds + n + 1;
2668 
2669     /* place a copy of the data in y */
2670     memcpy(y, x, T * sizeof *y);
2671 
2672     form_wvec(g, ds, tmp, n, lam1, lam2); /* W = M + lambda * Q'SQ */
2673 
2674     QprimeY(y, T);
2675 
2676     /* solve (M + lambda*Q'SQ)x = d for x */
2677     err = toeplitz_solve(g, y, T-2, n);
2678 
2679     if (!err) {
2680 	form_Qy(y, T);
2681 	form_svec(g, ds, n-2);
2682 	GammaY(g, y, tmp, T, n-2);   /* Form SQy in y */
2683 	/* write the trend into y (low-pass) */
2684 	for (t=0; t<T; t++) {
2685 	    y[t] = x[t] - lam1 * y[t];
2686 	}
2687     }
2688 
2689     free(g);
2690 
2691     return err;
2692 }
2693 
2694 /* common code for both plain and weighted polynomial trend
2695    estimation */
2696 
real_poly_trend(const double * x,double * fx,double * w,int T,int order)2697 static int real_poly_trend (const double *x, double *fx, double *w,
2698 			    int T, int order)
2699 {
2700     double tmp, denom, lagdenom = 1;
2701     double alpha, gamma, delta;
2702     double *phi, *philag;
2703     int i, t;
2704 
2705     phi = malloc(2 * T * sizeof *phi);
2706     if (phi == NULL) {
2707 	return E_ALLOC;
2708     }
2709 
2710     philag = phi + T;
2711 
2712     for (t=0; t<T; t++) {
2713 	phi[t] = 1;
2714 	philag[t] = 0;
2715 	fx[t] = 0;
2716     }
2717 
2718     for (i=0; i<=order; i++) {
2719 	double xadd;
2720 
2721 	alpha = gamma = denom = 0.0;
2722 
2723 	for (t=0; t<T; t++) {
2724 	    xadd = x[t] * phi[t];
2725 	    if (w != NULL) xadd *= w[t];
2726 	    alpha += xadd;
2727 	    xadd = phi[t] * phi[t] * t;
2728 	    if (w != NULL) xadd *= w[t];
2729 	    gamma += xadd;
2730 	    xadd = phi[t] * phi[t];
2731 	    if (w != NULL) xadd *= w[t];
2732 	    denom += xadd;
2733 	}
2734 
2735 	alpha /= denom;
2736 	gamma /= denom;
2737 	delta = denom / lagdenom;
2738 	lagdenom = denom;
2739 
2740 	for (t=0; t<T; t++) {
2741 	    fx[t] += alpha * phi[t];
2742 	    tmp = phi[t];
2743 	    phi[t] = (t - gamma) * phi[t] - delta * philag[t];
2744 	    philag[t] = tmp;
2745 	}
2746     }
2747 
2748     free(phi);
2749 
2750     return 0;
2751 }
2752 
2753 /**
2754  * poly_trend:
2755  * @x: array of original data.
2756  * @fx: array into which to write the fitted series.
2757  * @dset: data set information.
2758  * @order: desired polynomial order.
2759  *
2760  * Calculates a trend via the method of orthogonal polynomials.
2761  * Based on C code for D.S.G. Pollock's DETREND program.
2762  *
2763  * Returns: 0 on success, non-zero error code on failure.
2764  */
2765 
poly_trend(const double * x,double * fx,const DATASET * dset,int order)2766 int poly_trend (const double *x, double *fx, const DATASET *dset, int order)
2767 {
2768     int t1 = dset->t1, t2 = dset->t2;
2769     int T, err;
2770 
2771     if (dataset_is_panel(dset)) {
2772 	gretl_errmsg_set("polyfit: panel data not supported");
2773 	return E_PDWRONG;
2774     }
2775 
2776     err = series_adjust_sample(x, &t1, &t2);
2777     if (err) {
2778 	return err;
2779     }
2780 
2781     T = t2 - t1 + 1;
2782 
2783     if (order > T) {
2784 	return E_DF;
2785     }
2786 
2787     return real_poly_trend(x + t1, fx + t1, NULL, T, order);
2788 }
2789 
2790 /**
2791  * poly_weights:
2792  * @w: array into which the weights will be written.
2793  * @T: the length of @w.
2794  * @wmax: the ratio of maximum to minimum weight.
2795  * @midfrac: the size of the central section that should be given
2796  * the minimum weight.
2797  * @opt: weighting scheme option (OPT_Q = quadratic, OPT_B = cosine bell,
2798  * OPT_C = crenelated).
2799  *
2800  * Calculates a set of weights; intended for use with polynomial
2801  * trend fitting.
2802  */
2803 
poly_weights(double * w,int T,double wmax,double midfrac,gretlopt opt)2804 void poly_weights (double *w, int T, double wmax,
2805 		   double midfrac, gretlopt opt)
2806 {
2807     double wt, a = 0, b = 0;
2808     int t, cut, T2 = T / 2;
2809 
2810     if (midfrac > 0) {
2811         cut = round(T * (1.0 - midfrac) / 2.0);
2812     } else {
2813         cut = T2;
2814     }
2815 
2816     if (opt == OPT_Q) {
2817 	/* quadratic */
2818 	double z1, z2, det;
2819 
2820 	if (midfrac > 0) {
2821 	    z1 = cut;
2822 	    z2 = 2 * cut;
2823 	} else {
2824 	    z1 = (T-1) / 2.0;
2825 	    z2 = T-1;
2826 	}
2827 
2828 	det = 1.0 / (z1 * (z1 * z2 - z2 * z2));
2829 	a = z2 * (1 - wmax) * det;
2830 	b = -z2 * a;
2831     } else if (opt == OPT_B) {
2832 	/* cosine-bell */
2833 	b = (wmax - 1) / 2.0;
2834     }
2835 
2836     for (t=0; t<=T2; t++) {
2837         if (t <= cut) {
2838 	    if (opt == OPT_Q) {
2839 		wt = a * t + b;
2840 		wt = wt * t + wmax;
2841 	    } else if (opt == OPT_B) {
2842 		a = (t * M_PI) / cut;
2843 		wt = 1 + b * (1 + cos(a));
2844 	    } else {
2845 		/* "crenelated" */
2846 		wt = wmax;
2847 	    }
2848         } else {
2849 	    wt = 1;
2850         }
2851         w[t] = w[T-1-t] = wt;
2852     }
2853 }
2854 
2855 /**
2856  * weighted_poly_trend:
2857  * @x: array of original data.
2858  * @fx: array into which to write the fitted series.
2859  * @dset: data set information.
2860  * @order: desired polynomial order.
2861  * @opt: weighting option (OPT_Q = quadratic, OPT_B = cosine bell,
2862  * OPT_C = crenelated).
2863  * @wratio: ratio of maximum to minimum weight.
2864  * @midfrac: proportion of the data to be treated specially, in the
2865  * middle.
2866  *
2867  * Calculates a trend via the method of orthogonal polynomials, using
2868  * the specified weighting scheme.
2869  * Based on C code for D.S.G. Pollock's DETREND program.
2870  *
2871  * Returns: 0 on success, non-zero error code on failure.
2872  */
2873 
weighted_poly_trend(const double * x,double * fx,const DATASET * dset,int order,gretlopt opt,double wratio,double midfrac)2874 int weighted_poly_trend (const double *x, double *fx, const DATASET *dset,
2875 			 int order, gretlopt opt, double wratio,
2876 			 double midfrac)
2877 {
2878     double *w = NULL;
2879     int t1 = dset->t1, t2 = dset->t2;
2880     int T, err;
2881 
2882     err = series_adjust_sample(x, &t1, &t2);
2883     if (err) {
2884 	return err;
2885     }
2886 
2887     T = t2 - t1 + 1;
2888 
2889     if (order > T) {
2890 	return E_DF;
2891     }
2892 
2893     w = malloc(T * sizeof *w);
2894     if (w == NULL) {
2895 	err = E_ALLOC;
2896     } else {
2897 	poly_weights(w, T, wratio, midfrac, opt);
2898 	err = real_poly_trend(x + t1, fx + t1, w, T, order);
2899 	free(w);
2900     }
2901 
2902     return err;
2903 }
2904 
n_new_dummies(const DATASET * dset,int nunits,int nperiods)2905 static int n_new_dummies (const DATASET *dset,
2906 			  int nunits, int nperiods)
2907 {
2908     char dname[VNAMELEN];
2909     int i, nnew = nunits + nperiods;
2910 
2911     for (i=0; i<nunits; i++) {
2912 	sprintf(dname, "du_%d", i + 1);
2913 	if (gretl_is_series(dname, dset)) {
2914 	    nnew--;
2915 	}
2916     }
2917 
2918     for (i=0; i<nperiods; i++) {
2919 	sprintf(dname, "dt_%d", i + 1);
2920 	if (gretl_is_series(dname, dset)) {
2921 	    nnew--;
2922 	}
2923     }
2924 
2925     return nnew;
2926 }
2927 
seas_name_and_label(int k,const DATASET * dset,gretlopt opt,char * vname)2928 static gchar *seas_name_and_label (int k, const DATASET *dset,
2929 				   gretlopt opt, char *vname)
2930 {
2931     int pd = dataset_is_panel(dset) ? dset->panel_pd : dset->pd;
2932     int ts = dset->structure == TIME_SERIES || dset->panel_pd > 1;
2933     gchar *ret = NULL;
2934 
2935     if (opt & OPT_C) {
2936 	sprintf(vname, "S%d", k);
2937 	ret = g_strdup(_("centered periodic dummy"));
2938     } else if (opt & OPT_S) {
2939 	sprintf(vname, "S%d", k);
2940 	ret = g_strdup(_("uncentered periodic dummy"));
2941     } else if (pd == 4 && ts) {
2942 	sprintf(vname, "dq%d", k);
2943 	ret = g_strdup_printf(_("= 1 if quarter = %d, 0 otherwise"), k);
2944     } else if (pd == 12 && ts) {
2945 	sprintf(vname, "dm%d", k);
2946 	ret = g_strdup_printf(_("= 1 if month = %d, 0 otherwise"), k);
2947     } else {
2948 	char dumstr[8] = "dummy_";
2949 	char numstr[12];
2950 	int len;
2951 
2952 	sprintf(numstr, "%d", k);
2953 	len = strlen(numstr);
2954 	dumstr[8 - len] = '\0';
2955 	sprintf(vname, "%s%d", dumstr, k);
2956 	ret = g_strdup_printf(_("%s = 1 if period is %d, 0 otherwise"), vname, k);
2957     }
2958 
2959     return ret;
2960 }
2961 
get_first_panel_period(DATASET * dset)2962 static int get_first_panel_period (DATASET *dset)
2963 {
2964     double x = date_as_double(0, dset->panel_pd, dset->panel_sd0);
2965     int i, d = ceil(log10(dset->panel_pd));
2966     int ret = 0;
2967 
2968     x -= floor(x);
2969     for (i=0; i<d; i++) {
2970 	x *= 10;
2971     }
2972 
2973     x = (x-floor(x) > 0.5)? ceil(x) : floor(x);
2974     ret = x - 1;
2975 
2976     return ret;
2977 }
2978 
real_seasonals(DATASET * dset,int ref,int center,int snames,int ** plist)2979 static int real_seasonals (DATASET *dset, int ref, int center,
2980 			   int snames, int **plist)
2981 {
2982     char vname[VNAMELEN];
2983     gchar *vlabel = NULL;
2984     gretlopt opt = 0;
2985     int *list = NULL;
2986     int i, vi, k, t, pp;
2987     int pd, ndums, nnew = 0;
2988     int pantime = 0;
2989     int nfix = 0;
2990     int err = 0;
2991 
2992     if (dset == NULL || dset->n == 0) {
2993 	return E_NODATA;
2994     }
2995 
2996     if (dataset_is_seasonal_panel(dset)) {
2997 	pd = dset->panel_pd;
2998 	if (pd != 4 && pd != 12 && pd != 24) {
2999 	    /* not handled yet */
3000 	    return E_PDWRONG;
3001 	} else {
3002 	    pantime = 1;
3003 	}
3004     }
3005 
3006     if (!pantime) {
3007 	pd = dset->pd;
3008 	if (pd < 2 || pd > 99999) {
3009 	    return E_PDWRONG;
3010 	}
3011     }
3012 
3013     if (dated_weekly_data(dset)) {
3014 	/* allow for up to 53 ISO 8601 weeks */
3015 	pd = 53;
3016     }
3017 
3018     if (ref < 0 || ref > pd) {
3019 	return E_INVARG;
3020     }
3021 
3022     ndums = pd - (ref > 0);
3023     list = gretl_list_new(ndums);
3024     if (list == NULL) {
3025 	return E_ALLOC;
3026     }
3027 
3028     if (center) opt |= OPT_C;
3029     if (snames) opt |= OPT_S;
3030 
3031     /* check for appropriate series already in place */
3032 
3033     for (k=1, i=1; i<=pd; i++) {
3034 	if (i == ref) {
3035 	    /* dummy not wanted */
3036 	    continue;
3037 	}
3038 	vlabel = seas_name_and_label(i, dset, opt, vname);
3039 	vi = series_index(dset, vname);
3040 	if (vi < dset->v) {
3041 	    const char *orig = series_get_label(dset, vi);
3042 
3043 	    list[k] = vi;
3044 	    if (orig == NULL || strcmp(vlabel, orig)) {
3045 		series_set_label(dset, vi, vlabel);
3046 		nfix++;
3047 	    }
3048 	} else {
3049 	    nnew++;
3050 	}
3051 	g_free(vlabel);
3052 	k++;
3053     }
3054 
3055     if (nnew == 0 && nfix == 0) {
3056 	goto finish;
3057     }
3058 
3059     /* starting point for new dummy IDs */
3060     vi = dset->v;
3061 
3062     if (nnew > 0 && dataset_add_series(dset, nnew)) {
3063 	return E_ALLOC;
3064     }
3065 
3066     for (k=1, i=1; i<=pd; i++) {
3067 	if (i == ref) {
3068 	    continue;
3069 	}
3070 	if (list[k] == 0) {
3071 	    /* no pre-existing series */
3072 	    vlabel = seas_name_and_label(i, dset, opt, vname);
3073 	    strcpy(dset->varname[vi], vname);
3074 	    series_set_label(dset, vi, vlabel);
3075 	    g_free(vlabel);
3076 	    list[k] = vi++;
3077 	}
3078 	k++;
3079     }
3080 
3081     if (pantime) {
3082 	int p0 = get_first_panel_period(dset);
3083 	int T = dset->pd;
3084 
3085 	pp = 0;
3086 
3087 	for (t=0; t<dset->n; t++) {
3088 	    if (t % T == 0) {
3089 		pp = p0;
3090 	    } else {
3091 		pp = (t + p0) % pd;
3092 	    }
3093 	    for (k=0, i=1; i<=list[0]; i++) {
3094 		vi = list[i];
3095 		if (k+1 == ref) k++;
3096 		dset->Z[vi][t] = (pp == k)? 1 : 0;
3097 		k++;
3098 	    }
3099 	}
3100     } else if (dated_daily_data(dset)) {
3101 	char datestr[OBSLEN];
3102 	int wkday;
3103 
3104 	for (t=0; t<dset->n; t++) {
3105 	    ntolabel(datestr, t, dset);
3106 	    wkday = weekday_from_date(datestr);
3107 	    wkday = (wkday == 0)? 7 : wkday;
3108 	    for (k=1, i=1; i<=list[0]; i++) {
3109 		vi = list[i];
3110 		if (k+1 == ref) k++;
3111 		dset->Z[vi][t] = (wkday == k)? 1 : 0;
3112 		k++;
3113 	    }
3114 	}
3115     } else if (dataset_is_daily(dset)) {
3116 	int yy, mm = 10;
3117 	double xx;
3118 
3119 	pp = pd;
3120 	while ((pp = pp / 10)) {
3121 	    mm *= 10;
3122 	}
3123 
3124 	for (k=1, i=1; i<=list[0]; i++) {
3125 	    vi = list[i];
3126 	    if (k == ref) k++;
3127 	    for (t=0; t<dset->n; t++) {
3128 		xx = date_as_double(t, pd, dset->sd0) + .1;
3129 		yy = (int) xx;
3130 		pp = (int) (mm * (xx - yy) + 0.5);
3131 		dset->Z[vi][t] = (pp == k)? 1 : 0;
3132 	    }
3133 	    k++;
3134 	}
3135     } else if (dated_weekly_data(dset)) {
3136 	char datestr[OBSLEN];
3137 	int wknum;
3138 
3139 	for (t=0; t<dset->n; t++) {
3140 	    ntolabel(datestr, t, dset);
3141 	    wknum = iso_week_from_date(datestr);
3142 	    for (k=1, i=1; i<=list[0]; i++) {
3143 		vi = list[i];
3144 		if (k+1 == ref) k++;
3145 		dset->Z[vi][t] = (wknum == k)? 1 : 0;
3146 		k++;
3147 	    }
3148 	}
3149     } else {
3150 	int p0 = get_subperiod(0, dset, NULL);
3151 
3152 	for (t=0; t<dset->n; t++) {
3153 	    pp = (t + p0) % pd;
3154 	    for (k=0, i=1; i<=list[0]; i++) {
3155 		vi = list[i];
3156 		if (k+1 == ref) k++;
3157 		dset->Z[vi][t] = (pp == k)? 1 : 0;
3158 		k++;
3159 	    }
3160 	}
3161     }
3162 
3163     if (center) {
3164 	double cx = 1.0 / pd;
3165 
3166 	for (i=1; i<=list[0]; i++) {
3167 	    vi = list[i];
3168 	    for (t=0; t<dset->n; t++) {
3169 		dset->Z[vi][t] -= cx;
3170 	    }
3171 	}
3172     }
3173 
3174  finish:
3175 
3176     if (plist != NULL) {
3177 	*plist = list;
3178     } else {
3179 	free(list);
3180     }
3181 
3182     return err;
3183 }
3184 
3185 /**
3186  * gen_seasonal_dummies:
3187  * @dset: dataset struct.
3188  * @center: if non-zero subtract the population mean from
3189  * each of the generated dummies.
3190  *
3191  * Adds to the data set (if these variables are not already
3192  * present) a set of seasonal dummy variables.
3193  *
3194  * Returns: 0 on success, non-zero on error.
3195  */
3196 
gen_seasonal_dummies(DATASET * dset,int center)3197 int gen_seasonal_dummies (DATASET *dset, int center)
3198 {
3199     return real_seasonals(dset, 0, center, 0, NULL);
3200 }
3201 
3202 /**
3203  * seasonals_list:
3204  * @dset: dataset struct.
3205  * @ref: 1-based reference period, or 0 for none.
3206  * @center: if non-zero, subtract the population mean from
3207  * each of the generated dummies.
3208  * @err: location to receive error code.
3209  *
3210  * Adds to the data set (if these variables are not already
3211  * present) a set of seasonal dummy variables.
3212  *
3213  * Returns: list holding the ID numbers of the seasonal
3214  * dummies, or NULL on error.
3215  */
3216 
seasonals_list(DATASET * dset,int ref,int center,int * err)3217 int *seasonals_list (DATASET *dset, int ref, int center, int *err)
3218 {
3219     int *list = NULL;
3220 
3221     *err = real_seasonals(dset, ref, center, 1, &list);
3222 
3223     return list;
3224 }
3225 
3226 /**
3227  * gen_panel_dummies:
3228  * @dset: dataset struct.
3229  * @opt: %OPT_T for time dummies, otherwise unit dummies.
3230  * @prn: printer for warning, or NULL.
3231  *
3232  * Adds to the data set a set of dummy variables corresponding
3233  * to either the cross-sectional units in a panel, or the
3234  * time periods.
3235  *
3236  * Returns: 0 on successful completion, error code on error.
3237  */
3238 
gen_panel_dummies(DATASET * dset,gretlopt opt,PRN * prn)3239 int gen_panel_dummies (DATASET *dset, gretlopt opt, PRN *prn)
3240 {
3241     char vname[16], label[MAXLABEL];
3242     int vi, t, yy, pp, mm;
3243     int orig_v = dset->v;
3244     int ndum, nnew;
3245     int n_unitdum = 0;
3246     int n_timedum = 0;
3247     int allnew = 0;
3248     int newvnum;
3249     double xx;
3250 
3251     if (opt & OPT_T) {
3252 	ndum = n_timedum = dset->pd;
3253     } else {
3254 	n_unitdum = dset->n / dset->pd;
3255 	if (dset->n % dset->pd) {
3256 	    n_unitdum++;
3257 	}
3258 	ndum = n_unitdum;
3259     }
3260 
3261     if (ndum == 1) {
3262 	return E_PDWRONG;
3263     }
3264 
3265     if (complex_subsampled()) {
3266 	nnew = ndum;
3267 	allnew = 1;
3268     } else {
3269 	nnew = n_new_dummies(dset, n_unitdum, n_timedum);
3270     }
3271 
3272     if (nnew > 0 && prn != NULL) {
3273 	double sz = nnew * dset->n * 8 / (1024.0 * 1024.0);
3274 
3275 	if (sz > 1024) {
3276 	    pprintf(prn, "warning: requested %gMb of storage\n", sz);
3277 	}
3278     }
3279 
3280     if (nnew > 0 && dataset_add_series(dset, nnew)) {
3281 	return E_ALLOC;
3282     }
3283 
3284     pp = dset->pd;
3285     mm = 10;
3286     while ((pp = pp / 10)) {
3287 	mm *= 10;
3288     }
3289 
3290     newvnum = orig_v;
3291 
3292     /* generate time-based dummies, if wanted */
3293 
3294     for (vi=1; vi<=n_timedum; vi++) {
3295 	int dnum;
3296 
3297 	sprintf(vname, "dt_%d", vi);
3298 
3299 	if (allnew) {
3300 	    dnum = newvnum++;
3301 	} else {
3302 	    dnum = series_index(dset, vname);
3303 	    if (dnum >= orig_v) {
3304 		dnum = newvnum++;
3305 	    }
3306 	}
3307 
3308 	strcpy(dset->varname[dnum], vname);
3309 	sprintf(label, _("%s = 1 if %s is %d, 0 otherwise"), vname,
3310 		_("period"), vi);
3311 	series_set_label(dset, dnum, label);
3312 
3313 	for (t=0; t<dset->n; t++) {
3314 	    xx = date_as_double(t, dset->pd, dset->sd0);
3315 	    yy = (int) xx;
3316 	    pp = (int) (mm * (xx - yy) + 0.5);
3317 	    dset->Z[dnum][t] = (pp == vi)? 1.0 : 0.0;
3318 	}
3319     }
3320 
3321     /* generate unit-based dummies, if wanted */
3322 
3323     for (vi=1; vi<=n_unitdum; vi++) {
3324 	int dmin = (vi - 1) * dset->pd;
3325 	int dmax = vi * dset->pd;
3326 	int dnum;
3327 
3328 	sprintf(vname, "du_%d", vi);
3329 
3330 	if (allnew) {
3331 	    dnum = newvnum++;
3332 	} else {
3333 	    dnum = series_index(dset, vname);
3334 	    if (dnum >= orig_v) {
3335 		dnum = newvnum++;
3336 	    }
3337 	}
3338 
3339 	strcpy(dset->varname[dnum], vname);
3340 	sprintf(label, _("%s = 1 if %s is %d, 0 otherwise"), vname,
3341 		_("unit"), vi);
3342 	series_set_label(dset, dnum, label);
3343 
3344 	for (t=0; t<dset->n; t++) {
3345 	    dset->Z[dnum][t] = (t >= dmin && t < dmax)? 1 : 0;
3346 	}
3347     }
3348 
3349     return 0;
3350 }
3351 
3352 /**
3353  * gen_unit:
3354  * @dset: dataset struct.
3355  * @vnum: location to receive ID number of series, or NULL.
3356  *
3357  * (For panel data only) adds to the data set an index variable
3358  * that uniquely identifies the cross-sectional units.
3359  *
3360  * Returns: 0 on successful completion, error code on error.
3361  */
3362 
gen_unit(DATASET * dset,int * vnum)3363 int gen_unit (DATASET *dset, int *vnum)
3364 {
3365     int xt = 0;
3366     int i, t;
3367 
3368     if (dset->structure != STACKED_TIME_SERIES) {
3369 	gretl_errmsg_set("'genr unit' can be used only with "
3370 			 "panel data");
3371 	return E_DATA;
3372     }
3373 
3374     i = series_index(dset, "unit");
3375 
3376     if (i == dset->v && dataset_add_series(dset, 1)) {
3377 	return E_ALLOC;
3378     }
3379 
3380     strcpy(dset->varname[i], "unit");
3381     series_set_label(dset, i, _("cross-sectional unit index"));
3382 
3383     for (t=0; t<dset->n; t++) {
3384 	if (t % dset->pd == 0) {
3385 	    xt++;
3386 	}
3387 	dset->Z[i][t] = (double) xt;
3388     }
3389 
3390     if (vnum != NULL) {
3391 	*vnum = i;
3392     }
3393 
3394     return 0;
3395 }
3396 
3397 /**
3398  * panel_unit_first_obs:
3399  * @t: zero-based observation number.
3400  * @dset: data information struct.
3401  *
3402  * Returns: 1 if observation @t is the first time-series
3403  * observation on a given cross-sectional unit in a
3404  * panel dataset, else 0.
3405  */
3406 
panel_unit_first_obs(int t,const DATASET * dset)3407 int panel_unit_first_obs (int t, const DATASET *dset)
3408 {
3409     char *p, obs[OBSLEN];
3410     int ret = 0;
3411 
3412     ntolabel(obs, t, dset);
3413     p = strchr(obs, ':');
3414     if (p != NULL && atoi(p + 1) == 1) {
3415 	ret = 1;
3416     }
3417 
3418     return ret;
3419 }
3420 
3421 /* make special time variable for panel data */
3422 
make_panel_time_var(double * x,const DATASET * dset)3423 static void make_panel_time_var (double *x, const DATASET *dset)
3424 {
3425     int t, xt = 0;
3426 
3427     for (t=0; t<dset->n; t++) {
3428 	if (t % dset->pd == 0) {
3429 	    xt = 1;
3430 	}
3431 	x[t] = (double) xt++;
3432     }
3433 }
3434 
3435 /**
3436  * gen_time:
3437  * @dset: dataset struct.
3438  * @tm: if non-zero, an actual time trend is wanted,
3439  * otherwise just an index of observations.
3440  * @vnum: location to receive ID number of series,
3441  * or NULL.
3442  *
3443  * Generates (and adds to the dataset, if it's not already
3444  * present) a time-trend or index variable.  This function
3445  * is panel-data aware: if the dataset is a panel and
3446  * @tm is non-zero, the trend will not simply run
3447  * consecutively over the entire range of the data, but
3448  * will correctly represent the location in time of
3449  * each observation.  The index is 1-based.
3450  *
3451  * Returns: 0 on success, non-zero on error.
3452  */
3453 
gen_time(DATASET * dset,int tm,int * vnum)3454 int gen_time (DATASET *dset, int tm, int *vnum)
3455 {
3456     int v, t;
3457 
3458     v = series_index(dset, (tm)? "time" : "index");
3459 
3460     if (v == dset->v && dataset_add_series(dset, 1)) {
3461 	return E_ALLOC;
3462     }
3463 
3464     if (tm) {
3465 	strcpy(dset->varname[v], "time");
3466 	series_set_label(dset, v, _("time trend variable"));
3467     } else {
3468 	strcpy(dset->varname[v], "index");
3469 	series_set_label(dset, v, _("data index variable"));
3470     }
3471 
3472     if (tm && dset->structure == STACKED_TIME_SERIES) {
3473 	make_panel_time_var(dset->Z[v], dset);
3474     } else {
3475 	for (t=0; t<dset->n; t++) {
3476 	    dset->Z[v][t] = (double) (t + 1);
3477 	}
3478     }
3479 
3480     if (vnum != NULL) {
3481 	*vnum = v;
3482     }
3483 
3484     return 0;
3485 }
3486 
3487 /**
3488  * genr_wkday:
3489  * @dset: dataset struct.
3490  * @vnum: location to receive ID number of series, or NULL.
3491  *
3492  * Generates (and adds to the dataset, if it's not already
3493  * present) an index representing the day of the week for
3494  * each observation (for dated daily data only).
3495  * The index has value 0 for Sunday, 1 for Monday, and
3496  * so on.
3497  *
3498  * Returns: 0 on success, non-zero code on error.
3499  */
3500 
gen_wkday(DATASET * dset,int * vnum)3501 int gen_wkday (DATASET *dset, int *vnum)
3502 {
3503     char datestr[OBSLEN];
3504     int i, t;
3505 
3506     if (!dated_daily_data(dset)) {
3507 	return E_PDWRONG;
3508     }
3509 
3510     i = series_index(dset, "weekday");
3511 
3512     if (i == dset->v && dataset_add_series(dset, 1)) {
3513 	return E_ALLOC;
3514     }
3515 
3516     strcpy(dset->varname[i], "weekday");
3517     series_set_label(dset, i, _("day of week (1 = Monday)"));
3518 
3519     for (t=0; t<dset->n; t++) {
3520 	ntolabel(datestr, t, dset);
3521 	dset->Z[i][t] = weekday_from_date(datestr);
3522     }
3523 
3524     if (vnum != NULL) {
3525 	*vnum = i;
3526     }
3527 
3528     return 0;
3529 }
3530 
3531 typedef enum {
3532     PLOTVAR_INDEX = 1,
3533     PLOTVAR_TIME,
3534     PLOTVAR_ANNUAL,
3535     PLOTVAR_QUARTERS,
3536     PLOTVAR_MONTHS,
3537     PLOTVAR_CALENDAR,
3538     PLOTVAR_GPTIME,
3539     PLOTVAR_DECADES,
3540     PLOTVAR_HOURLY,
3541     PLOTVAR_PANEL,
3542     PLOTVAR_MAX
3543 } plotvar_type;
3544 
plotvar_code(const DATASET * dset,gretlopt opt)3545 static int plotvar_code (const DATASET *dset, gretlopt opt)
3546 {
3547     if ((opt & OPT_S) && calendar_data(dset)) {
3548 	return PLOTVAR_TIME;
3549     } else if (!dataset_is_time_series(dset)) {
3550 	return PLOTVAR_INDEX;
3551     } else if (dset->pd == 1) {
3552 	return PLOTVAR_ANNUAL;
3553     } else if (dset->pd == 4) {
3554 	return PLOTVAR_QUARTERS;
3555     } else if (dset->pd == 12) {
3556 	return PLOTVAR_MONTHS;
3557     } else if (dset->pd == 24) {
3558 	return PLOTVAR_HOURLY;
3559     } else if (calendar_data(dset)) {
3560 	return (opt & OPT_T)? PLOTVAR_GPTIME : PLOTVAR_CALENDAR;
3561     } else if (dataset_is_decennial(dset)) {
3562 	return PLOTVAR_DECADES;
3563     } else {
3564 	return PLOTVAR_TIME;
3565     }
3566 }
3567 
panel_plotvar_code(const DATASET * dset)3568 static int panel_plotvar_code (const DATASET *dset)
3569 {
3570     int pd = dset->panel_pd;
3571 
3572     if (pd == 0) {
3573 	return 0;
3574     } else if (pd == 1) {
3575 	return PLOTVAR_ANNUAL;
3576     } else if (pd == 4) {
3577 	return PLOTVAR_QUARTERS;
3578     } else if (pd == 12) {
3579 	return PLOTVAR_MONTHS;
3580     } else if (pd == 24) {
3581 	return PLOTVAR_HOURLY;
3582     } else if ((pd == 5 || pd == 6 || pd == 7 || pd == 52) &&
3583 	       dset->panel_sd0 > 10000.0) {
3584 	return PLOTVAR_CALENDAR;
3585     } else if (pd == 10) {
3586 	return PLOTVAR_DECADES;
3587     } else {
3588 	return 0;
3589     }
3590 }
3591 
correct_to_int(double x)3592 static double correct_to_int (double x)
3593 {
3594     if (x - floor(x) < 1.0e-6) {
3595 	return floor(x);
3596     } else if (ceil(x) - x < 1.0e-6) {
3597 	return ceil(x);
3598     }
3599     return x;
3600 }
3601 
3602 /**
3603  * gretl_plotx:
3604  * @dset: data information struct.
3605  * @opt: can include OPT_P for panel time-series plot;
3606  * OPT_T to use gnuplot time (seconds of unix epoch);
3607  * OPT_S to indicate that the context is the "scatters"
3608  * command.
3609  *
3610  * Finds or creates a special dummy variable for use on the
3611  * x-axis in plotting; this will have the full length of the
3612  * data series as given in @dset, and will be appropriately
3613  * configured for the data frequency.  Do not try to free this
3614  * variable.
3615  *
3616  * Returns: pointer to plot x-variable, or NULL on failure.
3617  */
3618 
gretl_plotx(const DATASET * dset,gretlopt opt)3619 const double *gretl_plotx (const DATASET *dset, gretlopt opt)
3620 {
3621     static double *x;
3622     static int ptype;
3623     static int Tbak;
3624     static double sd0bak;
3625     double mul, frac;
3626     int t, y1, T = 0;
3627     int maj, min;
3628     int new_ptype = 0;
3629     int panvar = 0;
3630     int failed = 0;
3631     double sd0 = 0;
3632     float rm;
3633 
3634     if (dset == NULL) {
3635 	/* cleanup signal */
3636 	free(x);
3637 	x = NULL;
3638 	ptype = 0;
3639 	T = 0;
3640 	sd0 = 0;
3641 	return NULL;
3642     }
3643 
3644     if (dataset_is_panel(dset) && ((opt & OPT_P) ||
3645 				   sample_size(dset) == dset->pd)) {
3646 	/* we're plotting a single time-series from a panel */
3647 	new_ptype = panel_plotvar_code(dset);
3648 	if (new_ptype != 0) {
3649 	    sd0 = dset->panel_sd0;
3650 	} else {
3651 	    panvar = plausible_panel_time_var(dset);
3652 	    if (panvar > 0) {
3653 		new_ptype = PLOTVAR_PANEL;
3654 	    } else {
3655 		new_ptype = PLOTVAR_INDEX;
3656 	    }
3657 	}
3658 	T = dset->pd;
3659     }
3660 
3661     if (new_ptype == 0) {
3662 	/* not already determined */
3663 	new_ptype = plotvar_code(dset, opt);
3664     }
3665     if (T == 0) {
3666 	/* not already determined */
3667 	T = dset->n;
3668     }
3669     if (sd0 == 0.0) {
3670 	/* not already determined */
3671 	sd0 = dset->sd0;
3672     }
3673 
3674     if (x != NULL && new_ptype == ptype && Tbak == T && sd0 == sd0bak) {
3675 	/* a suitable array is already at hand */
3676 	return x;
3677     }
3678 
3679     if (x != NULL) {
3680 	free(x);
3681     }
3682 
3683     if (new_ptype == PLOTVAR_PANEL) {
3684 	x = copyvec(dset->Z[panvar], dset->n);
3685     } else {
3686 	x = malloc(T * sizeof *x);
3687     }
3688 
3689     if (x == NULL || new_ptype == PLOTVAR_PANEL) {
3690 	return x;
3691     }
3692 
3693  try_again:
3694 
3695     Tbak = T;
3696     ptype = new_ptype;
3697     sd0bak = sd0;
3698 
3699     y1 = (int) sd0;
3700     rm = sd0 - y1;
3701 
3702     switch (ptype) {
3703     case PLOTVAR_ANNUAL:
3704 	for (t=0; t<T; t++) {
3705 	    x[t] = sd0 + t;
3706 	}
3707 	break;
3708     case PLOTVAR_QUARTERS:
3709     case PLOTVAR_MONTHS:
3710     case PLOTVAR_HOURLY:
3711 	frac = 1.0 / dset->pd;
3712 	if (sscanf(dset->stobs, "%d:%d", &maj, &min) == 2) {
3713 	    for (t=0; t<T; t++) {
3714 		x[t] = maj + frac * (min - 1);
3715 		if (min < dset->pd) {
3716 		    min++;
3717 		} else {
3718 		    min = 1;
3719 		    maj++;
3720 		}
3721 	    }
3722 	} else {
3723 	    mul = ptype == PLOTVAR_QUARTERS ? 10 : 100;
3724 	    x[0] = correct_to_int(y1 + (mul * rm - 1.0) / dset->pd);
3725 	    for (t=1; t<T; t++) {
3726 		x[t] = correct_to_int(x[t-1] + frac);
3727 	    }
3728 	}
3729 	break;
3730     case PLOTVAR_CALENDAR:
3731     case PLOTVAR_GPTIME:
3732 	for (t=0; t<T; t++) {
3733 	    if (dset->S != NULL) {
3734 		if (ptype == PLOTVAR_GPTIME) {
3735 		    x[t] = gnuplot_time_from_date(dset->S[t], "%Y-%m-%d");
3736 		} else {
3737 		    x[t] = get_dec_date(dset->S[t]);
3738 		}
3739 	    } else {
3740 		char datestr[OBSLEN];
3741 
3742 		calendar_date_string(datestr, t, dset);
3743 		if (ptype == PLOTVAR_GPTIME) {
3744 		    x[t] = gnuplot_time_from_date(datestr, "%Y-%m-%d");
3745 		} else {
3746 		    x[t] = get_dec_date(datestr);
3747 		}
3748 	    }
3749 	    if (na(x[t])) {
3750 		failed++;
3751 		break;
3752 	    }
3753 	}
3754 	break;
3755     case PLOTVAR_DECADES:
3756 	for (t=0; t<T; t++) {
3757 	    x[t] = dset->sd0 + 10 * t;
3758 	}
3759 	break;
3760     case PLOTVAR_INDEX:
3761     case PLOTVAR_TIME:
3762 	for (t=0; t<T; t++) {
3763 	    x[t] = (double) (t + 1);
3764 	}
3765 	break;
3766     default:
3767 	break;
3768     }
3769 
3770     if (failed == 1) {
3771 	/* calendar dating failed */
3772 	failed++; /* ensure we don't loop */
3773 	new_ptype = PLOTVAR_TIME;
3774 	goto try_again;
3775     }
3776 
3777     return x;
3778 }
3779 
3780 /**
3781  * get_fit_or_resid:
3782  * @pmod: pointer to source model.
3783  * @dset: information on the data set.
3784  * @idx: %M_UHAT, %M_UHAT2, %M_YHAT, %M_AHAT or %M_H.
3785  * @vname: location to write series name (length %VNAMELEN)
3786  * @pdesc: location to receive copy of series description.
3787  * @err: location to receive error code.
3788  *
3789  * Creates a full-length array holding the specified model
3790  * data, and writes name and description into the @vname and
3791  * @vlabel.
3792  *
3793  * Returns: allocated array on success or NULL on failure.
3794  */
3795 
get_fit_or_resid(const MODEL * pmod,DATASET * dset,ModelDataIndex idx,char * vname,gchar ** pdesc,int * err)3796 double *get_fit_or_resid (const MODEL *pmod, DATASET *dset,
3797 			  ModelDataIndex idx, char *vname,
3798 			  gchar **pdesc, int *err)
3799 {
3800     const double *src = NULL;
3801     double *ret = NULL;
3802     int t;
3803 
3804     /* Note: this is called only from the GUI, and in that
3805        context @vname will be shown as the default name
3806        for the saved series, though the user can change
3807        it if she chooses.
3808     */
3809 
3810     if (idx == M_H) {
3811 	src = gretl_model_get_data(pmod, "garch_h");
3812     } else if (idx == M_AHAT) {
3813 	src = gretl_model_get_data(pmod, "ahat");
3814     } else if (idx == M_UHAT || idx == M_UHAT2) {
3815 	src = pmod->uhat;
3816     } else if (idx == M_YHAT) {
3817 	src = pmod->yhat;
3818     }
3819 
3820     if (src == NULL) {
3821 	if (*err == 0) {
3822 	    *err = E_BADSTAT;
3823 	}
3824 	return NULL;
3825     }
3826 
3827     ret = malloc(dset->n * sizeof *ret);
3828     if (ret == NULL) {
3829 	*err = E_ALLOC;
3830 	return NULL;
3831     }
3832 
3833     for (t=0; t<dset->n; t++) {
3834 	if (t >= pmod->t1 && t <= pmod->t2) {
3835 	    if (idx == M_UHAT2) {
3836 		ret[t] = na(src[t]) ? NADBL : (src[t] * src[t]);
3837 	    } else {
3838 		ret[t] = src[t];
3839 	    }
3840 	} else {
3841 	    ret[t] = NADBL;
3842 	}
3843     }
3844 
3845     if (idx == M_UHAT) {
3846 	sprintf(vname, "uhat%d", pmod->ID);
3847 	if (pmod->ci == GARCH && (pmod->opt & OPT_Z)) {
3848 	    *pdesc = g_strdup_printf(_("standardized residual from model %d"), pmod->ID);
3849 	} else {
3850 	    *pdesc = g_strdup_printf(_("residual from model %d"), pmod->ID);
3851 	}
3852     } else if (idx == M_YHAT) {
3853 	sprintf(vname, "yhat%d", pmod->ID);
3854 	*pdesc = g_strdup_printf(_("fitted value from model %d"), pmod->ID);
3855     } else if (idx == M_UHAT2) {
3856 	/* squared residuals */
3857 	sprintf(vname, "usq%d", pmod->ID);
3858 	if (pmod->ci == GARCH && (pmod->opt & OPT_Z)) {
3859 	    *pdesc = g_strdup_printf(_("squared standardized residual from model %d"), pmod->ID);
3860 	} else {
3861 	    *pdesc = g_strdup_printf(_("squared residual from model %d"), pmod->ID);
3862 	}
3863     } else if (idx == M_H) {
3864 	/* garch variance */
3865 	sprintf(vname, "h%d", pmod->ID);
3866 	*pdesc = g_strdup_printf(_("fitted variance from model %d"), pmod->ID);
3867     } else if (idx == M_AHAT) {
3868 	sprintf(vname, "ahat%d", pmod->ID);
3869 	if (pmod->opt & OPT_U) {
3870 	    *pdesc = g_strdup_printf(_("individual effects from model %d"), pmod->ID);
3871 	} else {
3872 	    *pdesc = g_strdup_printf(_("per-unit constants from model %d"), pmod->ID);
3873 	}
3874     }
3875 
3876     return ret;
3877 }
3878 
3879 /**
3880  * genr_fit_resid:
3881  * @pmod: pointer to source model.
3882  * @dset: dataset struct.
3883  * @idx: %M_UHAT, %M_UHAT2, %M_YHAT, %M_AHAT or %M_H.
3884  *
3885  * Adds residuals or fitted values or squared residuals from a
3886  * given model to the data set.
3887  *
3888  * Returns: 0 on successful completion, error code on error.
3889  */
3890 
genr_fit_resid(const MODEL * pmod,DATASET * dset,ModelDataIndex idx)3891 int genr_fit_resid (const MODEL *pmod, DATASET *dset,
3892 		    ModelDataIndex idx)
3893 {
3894     char vname[VNAMELEN];
3895     gchar *vlabel = NULL;
3896     double *x;
3897     int err = 0;
3898 
3899     x = get_fit_or_resid(pmod, dset, idx, vname, &vlabel, &err);
3900 
3901     if (!err) {
3902 	err = dataset_add_allocated_series(dset, x);
3903     }
3904 
3905     if (err) {
3906 	free(x);
3907     } else {
3908 	int v = dset->v - 1;
3909 
3910 	strcpy(dset->varname[v], vname);
3911 	series_set_label(dset, v, vlabel);
3912     }
3913 
3914     g_free(vlabel);
3915 
3916     return err;
3917 }
3918 
get_observation_number(const char * s,const DATASET * dset)3919 int get_observation_number (const char *s, const DATASET *dset)
3920 {
3921     char test[OBSLEN];
3922     size_t n;
3923     int t;
3924 
3925     *test = 0;
3926     strncat(test, (*s == '"')? s + 1 : s, OBSLEN - 1);
3927 
3928     n = strlen(test);
3929     if (test[n-1] == '"') {
3930 	test[n-1] = '\0';
3931     }
3932 
3933     if (dataset_has_markers(dset)) {
3934 	for (t=0; t<dset->n; t++) {
3935 	    if (!strcmp(test, dset->S[t])) {
3936 		return t + 1;
3937 	    }
3938 	}
3939 	if (calendar_data(dset)) {
3940 	    for (t=0; t<dset->n; t++) {
3941 		if (!strcmp(test, dset->S[t]) ||
3942 		    !strcmp(test, dset->S[t] + 2)) {
3943 		    return t + 1;
3944 		}
3945 	    }
3946 	}
3947     }
3948 
3949     if (dset->structure == TIME_SERIES) {
3950 	t = dateton(test, dset);
3951 	if (t >= 0) {
3952 	    return t + 1;
3953 	}
3954     }
3955 
3956     if (calendar_data(dset)) {
3957 	char datestr[OBSLEN];
3958 
3959 	for (t=0; t<dset->n; t++) {
3960 	    calendar_date_string(datestr, t, dset);
3961 	    if (!strcmp(test, datestr) ||
3962 		!strcmp(test, datestr + 2)) {
3963 		return t + 1;
3964 	    }
3965 	}
3966     }
3967 
3968     return 0;
3969 }
3970 
3971 #define OBS_DEBUG 0
3972 
plain_obs_number(const char * obs,const DATASET * dset)3973 static int plain_obs_number (const char *obs, const DATASET *dset)
3974 {
3975     char *test;
3976     int t = -1;
3977 
3978     errno = 0;
3979 
3980     strtol(obs, &test, 10);
3981 
3982     if (errno == 0 && *test == '\0') {
3983 	t = atoi(obs) - 1; /* convert from 1-based to 0-based */
3984 	if (t >= dset->n) {
3985 	    t = -1;
3986 	}
3987     }
3988 
3989     return t;
3990 }
3991 
3992 /* Given what looks like an observation number or date within "[" and
3993    "]", try to determine the observation number.  This is quite tricky
3994    since we try to handle both dates and plain observation numbers
3995    (and in addition, variables representing the latter); and we may
3996    have to deal with the translation from 1-based indexing in user
3997    space to 0-based indexing for internal purposes.
3998 */
3999 
get_t_from_obs_string(const char * s,const DATASET * dset)4000 int get_t_from_obs_string (const char *s, const DATASET *dset)
4001 {
4002     int t;
4003 
4004     if (*s == '"') {
4005 	char obs[OBSLEN+2];
4006 	int err = 0;
4007 
4008 	*obs = '\0';
4009 	strncat(obs, s, OBSLEN+1);
4010 	gretl_unquote(obs, &err);
4011 	t = dateton(obs, dset);
4012     } else {
4013 	t = dateton(s, dset);
4014     }
4015 
4016 #if OBS_DEBUG
4017     fprintf(stderr, "\nget_t_from_obs_string: s ='%s', dateton gives t = %d\n",
4018 	    s, t);
4019 #endif
4020 
4021     if (t < 0) {
4022 	if (isdigit((unsigned char) *s)) {
4023 	    t = plain_obs_number(s, dset);
4024 #if OBS_DEBUG
4025 	    fprintf(stderr, " plain_obs_number gives t = %d\n", t);
4026 #endif
4027 	} else {
4028 	    if (gretl_is_scalar(s)) {
4029 		t = gretl_scalar_get_value(s, NULL);
4030 	    }
4031 
4032 	    if (t > dset->n) {
4033 		/* e.g. annual dates */
4034 		char try[16];
4035 
4036 		sprintf(try, "%d", t);
4037 		t = dateton(try, dset);
4038 #if OBS_DEBUG
4039 		fprintf(stderr, " revised via dateton: t = %d\n", t);
4040 #endif
4041 	    } else {
4042 		/* convert to 0-based */
4043 		t--;
4044 	    }
4045 	}
4046     }
4047 
4048     if (t < 0) {
4049 	gretl_errmsg_set(_("Observation number out of bounds"));
4050     }
4051 
4052 #if OBS_DEBUG
4053     fprintf(stderr, " return value: t = %d\n", t);
4054 #endif
4055 
4056     return t;
4057 }
4058 
check_declarations(char *** pS,parser * p)4059 int check_declarations (char ***pS, parser *p)
4060 {
4061     char **S;
4062     const char *s;
4063     int exists = 0;
4064     int badname = 0;
4065     int i, n = 1;
4066 
4067     gretl_error_clear();
4068 
4069     if (p->lh.expr == NULL) {
4070 	p->err = E_ALLOC;
4071 	return 0;
4072     }
4073 
4074     s = p->lh.expr;
4075     s += strspn(s, " ");
4076 
4077     while (*s) {
4078 	if (*s == ',' || *s == ' ') {
4079 	    n++;
4080 	    s++;
4081 	    s += strspn(s, " ");
4082 	} else {
4083 	    s++;
4084 	}
4085     }
4086 
4087     S = strings_array_new(n);
4088     if (S == NULL) {
4089 	p->err = E_ALLOC;
4090 	return 0;
4091     }
4092 
4093     s = p->lh.expr;
4094     for (i=0; i<n && !p->err; i++) {
4095 	S[i] = gretl_word_strdup(s, &s, OPT_S | OPT_U, &p->err);
4096     }
4097 
4098     if (!p->err && *s != '\0') {
4099 	p->err = E_DATA;
4100     }
4101 
4102     for (i=0; i<n && !p->err; i++) {
4103 	if (gretl_type_from_name(S[i], p->dset)) {
4104 	    /* variable already exists */
4105 	    exists = 1;
4106 	    p->err = E_DATA;
4107 	} else if (check_identifier(S[i])) {
4108 	    /* invalid name */
4109 	    badname = 1;
4110 	    p->err = E_DATA;
4111 	}
4112     }
4113 
4114     if (p->err) {
4115 	if (exists) {
4116 	    gretl_errmsg_set(_("Invalid declaration: maybe you need "
4117 			       "the \"clear\" command?"));
4118 	} else if (!badname) {
4119 	    gretl_errmsg_set(_("Invalid declaration"));
4120 	}
4121 	strings_array_free(S, n);
4122     } else {
4123 	*pS = S;
4124     }
4125 
4126     return n;
4127 }
4128 
4129 /* cross-sectional (list-based) statistics */
4130 
xsect_minmax(double * x,int n,int f)4131 static double xsect_minmax (double *x, int n, int f)
4132 {
4133     double ret = x[0];
4134     int i;
4135 
4136     for (i=1; i<n; i++) {
4137 	if (f == F_MIN) {
4138 	    if (x[i] < ret) {
4139 		ret = x[i];
4140 	    }
4141 	} else if (x[i] > ret) {
4142 	    ret = x[i];
4143 	}
4144     }
4145 
4146     return ret;
4147 }
4148 
4149 /* Fill @targ with data (from @xlist) and associated
4150    weights (from @wlist) for the cross-section at obs @t.
4151    If @partial_ok is 0 (false) we return E_MISSDATA if
4152    any values or weights are missing at @t, otherwise
4153    we skip missing terms and return the number of valid
4154    terms in @pn.
4155 */
4156 
data_and_weight_at_obs(double ** targ,int * pn,const int * xlist,const int * wlist,const DATASET * dset,int t,int partial_ok)4157 static int data_and_weight_at_obs (double **targ, int *pn,
4158 				   const int *xlist,
4159 				   const int *wlist,
4160 				   const DATASET *dset,
4161 				   int t, int partial_ok)
4162 {
4163     int i, j = 0;
4164     double xi, wi;
4165 
4166     for (i=1; i<=xlist[0]; i++) {
4167 	xi = dset->Z[xlist[i]][t];
4168 	wi = dset->Z[wlist[i]][t];
4169 	if (wi < 0) {
4170 	    return E_INVARG;
4171 	} else if (wi == 0) {
4172 	    continue;
4173 	} else if (wi > 0 && na(xi)) {
4174 	    if (!partial_ok) {
4175 		return E_MISSDATA;
4176 	    }
4177 	} else if (!na(xi) && !na(wi)) {
4178 	    targ[0][j] = xi;
4179 	    targ[1][j] = wi;
4180 	    j++;
4181 	} else if (!partial_ok) {
4182 	    return E_MISSDATA;
4183 	}
4184     }
4185 
4186     *pn = j;
4187     return (j == 0)? E_MISSDATA : 0;
4188 }
4189 
4190 /* Fill @targ with data (from @xlist) for the cross-
4191    section at obs @t. If @partial_ok is 0 (false) we
4192    return E_MISSDATA if any values are missing at @t,
4193    otherwise we skip missing terms and return the count
4194    of valid values in @pn.
4195 */
4196 
data_at_obs(double * targ,int * pn,const int * list,const DATASET * dset,int t,int partial_ok)4197 static int data_at_obs (double *targ, int *pn,
4198 			const int *list,
4199 			const DATASET *dset,
4200 			int t, int partial_ok)
4201 {
4202     int i, j = 0;
4203     double xi;
4204 
4205     for (i=1; i<=list[0]; i++) {
4206 	xi = dset->Z[list[i]][t];
4207 	if (!na(xi)) {
4208 	    targ[j++] = xi;
4209 	} else if (!partial_ok) {
4210 	    return E_MISSDATA;
4211 	}
4212     }
4213 
4214     *pn = j;
4215     return (j == 0)? E_MISSDATA : 0;
4216 }
4217 
cross_sectional_stat(double * y,const int * list,const DATASET * dset,int f,int partial_ok)4218 int cross_sectional_stat (double *y, const int *list,
4219 			  const DATASET *dset, int f,
4220 			  int partial_ok)
4221 {
4222     double *x;
4223     int missing;
4224     int i, t, n;
4225 
4226     /* to hold the cross section */
4227     x = malloc(list[0] * sizeof *x);
4228     if (x == NULL) {
4229 	return E_ALLOC;
4230     }
4231 
4232     for (t=dset->t1; t<=dset->t2; t++) {
4233 	missing = data_at_obs(x, &n, list, dset, t, partial_ok);
4234 	if (missing) {
4235 	    y[t] = NADBL;
4236 	} else if ((f == F_VCE || f == F_SD) && n == 1) {
4237 	    y[t] = NADBL;
4238 	} else if (f == F_MIN || f == F_MAX) {
4239 	    y[t] = xsect_minmax(x, n, f);
4240 	} else if (f == F_MEDIAN) {
4241 	    y[t] = gretl_median(0, n-1, x);
4242 	} else {
4243 	    double xsum = 0;
4244 
4245 	    for (i=0; i<n; i++) {
4246 		xsum += x[i];
4247 	    }
4248 	    if (f == F_SUM) {
4249 		y[t] = xsum;
4250 	    } else if (f == F_MEAN) {
4251 		y[t] = xsum / n;
4252 	    } else {
4253 		/* F_VCE or F_SD */
4254 		double d, s2 = 0, xbar = xsum / n;
4255 
4256 		for (i=0; i<n; i++) {
4257 		    d = x[i] - xbar;
4258 		    s2 += d * d;
4259 		}
4260 		if (f == F_VCE) {
4261 		    y[t] = s2 / (n-1);
4262 		} else {
4263 		    y[t] = sqrt(s2 / (n-1));
4264 		}
4265 	    }
4266 	}
4267     }
4268 
4269     free(x);
4270 
4271     return 0;
4272 }
4273 
x_sectional_weighted_stat(double * y,const int * xlist,const int * wlist,const DATASET * dset,int f,int partial_ok)4274 int x_sectional_weighted_stat (double *y, const int *xlist,
4275 			       const int *wlist,
4276 			       const DATASET *dset,
4277 			       int f, int partial_ok)
4278 {
4279     double wxbar, wsum, d, ws2, V2, adj;
4280     double *x, *w, **X;
4281     int i, t, n, missing;
4282     int err = 0;
4283 
4284     if (wlist[0] != xlist[0]) {
4285 	gretl_errmsg_sprintf("Weighted stats: data list has %d members but weight "
4286 			     "list has %d", xlist[0], wlist[0]);
4287 	return E_DATA;
4288     }
4289 
4290     /* storage for values and weights, per obs */
4291     X = doubles_array_new(2, xlist[0]);
4292     if (X == NULL) {
4293 	return E_ALLOC;
4294     }
4295 
4296     x = X[0];
4297     w = X[1];
4298 
4299     for (t=dset->t1; t<=dset->t2; t++) {
4300 	missing = data_and_weight_at_obs(X, &n, xlist, wlist,
4301 					 dset, t, partial_ok);
4302 	if (missing == E_INVARG) {
4303 	    err = E_INVARG;
4304 	    break;
4305 	} else if (missing) {
4306 	    y[t] = NADBL;
4307 	} else if (f != F_WMEAN && n == 1) {
4308 	    y[t] = NADBL;
4309 	} else {
4310 	    wxbar = wsum = 0;
4311 	    for (i=0; i<n; i++) {
4312 		wxbar += x[i] * w[i];
4313 		wsum += w[i];
4314 	    }
4315 	    wxbar /= wsum;
4316 	    if (f == F_WMEAN) {
4317 		y[t] = wxbar;
4318 	    } else {
4319 		ws2 = V2 = 0;
4320 		for (i=0; i<n; i++) {
4321 		    d = (x[i] - wxbar);
4322 		    ws2 += w[i] * d * d;
4323 		    V2 += w[i] * w[i];
4324 		}
4325 		/* "frequency" weights */
4326 		adj = (n-1) * wsum / n;
4327 		/* for "reliability" weights? */
4328 		// adj = wsum - V2 / wsum;
4329 		if (f == F_WVAR) {
4330 		    y[t] = ws2 / adj;
4331 		} else {
4332 		    y[t] = sqrt(ws2 / adj);
4333 		}
4334 	    }
4335 	}
4336     }
4337 
4338     doubles_array_free(X, 2);
4339 
4340     return err;
4341 }
4342 
4343 /* writes to the series @y a linear combination of the variables given
4344    in @list, using the coefficients given in the vector @b.
4345 */
4346 
list_linear_combo(double * y,const int * list,const gretl_vector * b,const DATASET * dset)4347 int list_linear_combo (double *y, const int *list,
4348 		       const gretl_vector *b,
4349 		       const DATASET *dset)
4350 {
4351     int nb = gretl_vector_get_length(b);
4352     int nl = list[0];
4353     int err = 0;
4354 
4355     if (nb != nl) {
4356 	gretl_errmsg_sprintf(_("List has %d members, but length "
4357 			       "of vector b is %d"), nl, nb);
4358 	err = E_DATA;
4359     } else {
4360 	int i, t;
4361 	double xit, yt;
4362 
4363 	for (t=dset->t1; t<=dset->t2; t++) {
4364 	    yt = 0;
4365 	    for (i=0; i<nl; i++) {
4366 		xit = dset->Z[list[i+1]][t];
4367 		if (na(xit)) {
4368 		    yt = NADBL;
4369 		    break;
4370 		} else {
4371 		    yt += xit * gretl_vector_get(b, i);
4372 		}
4373 	    }
4374 	    y[t] = yt;
4375 	}
4376     }
4377 
4378     return err;
4379 }
4380 
4381 #define MDEBUG 0
4382 
4383 #if HAVE_GMP
4384 
try_mp_midas_weights(const double * theta,int k,gretl_matrix * w,int method)4385 static int try_mp_midas_weights (const double *theta, int k,
4386 				 gretl_matrix *w, int method)
4387 {
4388     int (*mpfun) (const double *, int, gretl_matrix *, int);
4389     int err = 0;
4390 
4391     mpfun = get_plugin_function("mp_midas_weights");
4392 
4393     if (mpfun == NULL) {
4394 	return E_FOPEN;
4395     }
4396 
4397     err = (*mpfun) (theta, k, w, method);
4398 
4399     if (!err) {
4400 	int i, n = gretl_vector_get_length(w);
4401 
4402 	for (i=0; i<n; i++) {
4403 	    if (!isfinite(w->val[i])) {
4404 		err = E_NAN;
4405 		break;
4406 	    }
4407 	}
4408     }
4409 
4410     return err;
4411 }
4412 
4413 #endif /* HAVE_GMP */
4414 
4415 #define INF_CHECK_VERBOSE 0
4416 
inf_check(double val,const gretl_matrix * theta,const char * context,int * err)4417 static int inf_check (double val, const gretl_matrix *theta,
4418 		      const char *context, int *err)
4419 {
4420     if (isinf(val)) {
4421 #if INF_CHECK_VERBOSE
4422 	fprintf(stderr, "range error in %s\n", context);
4423 	gretl_matrix_print(theta, "theta");
4424 #endif
4425 	*err = E_NAN;
4426     } else {
4427 	/* tolerate underflow */
4428 	errno = 0;
4429     }
4430 
4431     return *err;
4432 }
4433 
4434 #define BETA_METHOD(m) (m==MIDAS_BETA0 || m==MIDAS_BETA1 || m==MIDAS_BETAN)
4435 
check_beta_params(int method,double * theta,int k,double eps)4436 static int check_beta_params (int method, double *theta,
4437 			      int k, double eps)
4438 {
4439     int err = 0;
4440 
4441     if ((method == MIDAS_BETA0 || method == MIDAS_BETA1) && k != 2) {
4442 	gretl_errmsg_set("theta must be a 2-vector");
4443 	err = E_INVARG;
4444     } else if (method == MIDAS_BETAN && k != 3) {
4445 	gretl_errmsg_set("theta must be a 3-vector");
4446 	err = E_INVARG;
4447     } else if (theta[0] < eps || theta[1] < eps) {
4448 	if (theta[0] < 0.0 || theta[1] < 0.0) {
4449 	    gretl_errmsg_set("beta: parameters must be positive");
4450 	    fprintf(stderr, "beta: theta1=%g, theta2=%g\n", theta[0], theta[1]);
4451 	    err = E_INVARG;
4452 	} else {
4453 	    /* bodge! */
4454 	    if (theta[0] < eps) theta[0] = eps;
4455 	    if (theta[1] < eps) theta[1] = eps;
4456 	}
4457     }
4458 
4459     return err;
4460 }
4461 
4462 /* Computes a column m-vector holding weights for use with MIDAS */
4463 
midas_weights(int p,const gretl_matrix * m,int method,int * err)4464 gretl_matrix *midas_weights (int p, const gretl_matrix *m,
4465 			     int method, int *err)
4466 {
4467     double *theta;
4468     double eps = pow(2.0, -52);
4469     double wsum = 0.0;
4470     gretl_matrix *w = NULL;
4471     int using_mp = 0;
4472     int k, i, j;
4473 
4474 #if MDEBUG
4475     gretl_matrix_print(m, "m, in midas_weights");
4476 #endif
4477 
4478     /* @p = lag order
4479        @k = number of hyper-parameters
4480     */
4481 
4482     if (method < MIDAS_NEALMON || method >= MIDAS_MAX || p < 1) {
4483 	*err = E_INVARG;
4484 	return NULL;
4485     }
4486 
4487     k = gretl_vector_get_length(m);
4488     if (k == 0) {
4489 	*err = E_INVARG;
4490 	return NULL;
4491     }
4492 
4493     theta = m->val;
4494 
4495     if (BETA_METHOD(method)) {
4496 	*err = check_beta_params(method, theta, k, eps);
4497 	if (*err) {
4498 	    return NULL;
4499 	}
4500     }
4501 
4502     w = gretl_zero_matrix_new(p, 1);
4503     if (w == NULL) {
4504 	*err = E_ALLOC;
4505 	return NULL;
4506     }
4507 
4508     errno = 0;
4509 
4510     if (method == MIDAS_NEALMON) {
4511 	for (i=0; i<p; i++) {
4512 	    w->val[i] = (i+1) * theta[0];
4513 	    for (j=1; j<k; j++) {
4514 		w->val[i] += pow(i+1, j+1) * theta[j];
4515 	    }
4516 	    w->val[i] = exp(w->val[i]);
4517 	    wsum += w->val[i];
4518 	    if (errno && inf_check(wsum, m, "nealmon weights", err)) {
4519 		break;
4520 	    }
4521 	}
4522     } else if (BETA_METHOD(method)) {
4523 	double si, ai, bi;
4524 
4525 	for (i=0; i<p; i++) {
4526 	    si = i / (p - 1.0);
4527 	    if (i == 0) {
4528 		si += eps;
4529 	    } else if (i == p-1) {
4530 		si -= eps;
4531 	    }
4532 	    ai = pow(si, theta[0] - 1.0);
4533 	    bi = pow(1.0 - si, theta[1] - 1.0);
4534 	    w->val[i] = ai * bi;
4535 	    wsum += w->val[i];
4536 	    if (errno && inf_check(wsum, m, "beta weights", err)) {
4537 		break;
4538 	    }
4539 	}
4540     } else if (method == MIDAS_ALMONP) {
4541 	/* straight Almon ploynomial */
4542 	for (i=0; i<p; i++) {
4543 	    w->val[i] = theta[0];
4544 	    for (j=1; j<k; j++) {
4545 		w->val[i] += pow(i+1, j) * theta[j];
4546 	    }
4547 	}
4548     }
4549 
4550 #if HAVE_GMP
4551     if (*err == E_NAN || (method != MIDAS_ALMONP && wsum < eps)) {
4552 	/* attempt a fix-up using multiple precision */
4553 	int save_errno = errno;
4554 
4555 	errno = 0;
4556 	*err = try_mp_midas_weights(theta, k, w, method);
4557 	if (*err) {
4558 	    errno = save_errno;
4559 	} else {
4560 	    using_mp = 1;
4561 	}
4562     }
4563 #endif
4564 
4565     if (errno) {
4566 	gretl_errmsg_sprintf("Failed to calculate MIDAS weights: %s",
4567 			     gretl_strerror(errno));
4568 	if (*err == 0) {
4569 	    *err = E_INVARG;
4570 	}
4571 	gretl_matrix_free(w);
4572 	errno = 0;
4573 	return NULL;
4574     }
4575 
4576     if (!using_mp && method != MIDAS_ALMONP) {
4577 	/* normalize the weights */
4578 	for (i=0; i<p; i++) {
4579 	    w->val[i] /= wsum;
4580 	}
4581     }
4582 
4583     if (!using_mp && method == MIDAS_BETAN) {
4584 	/* beta with third param, not zero-terminated:
4585 	   add theta[2] and renormalize
4586 	*/
4587 	wsum = 1 + p * theta[2];
4588 	for (i=0; i<p; i++) {
4589 	    w->val[i] = (w->val[i] + theta[2]) / wsum;
4590 	}
4591     }
4592 
4593 #if MDEBUG
4594     gretl_matrix_print(w, "w: midas weights");
4595 #endif
4596 
4597     return w;
4598 }
4599 
4600 #if HAVE_GMP
4601 
try_mp_midas_grad(const double * theta,gretl_matrix * G,int method)4602 static int try_mp_midas_grad (const double *theta,
4603 			      gretl_matrix *G,
4604 			      int method)
4605 {
4606     int (*mpfun) (const double *, gretl_matrix *, int);
4607     int err = 0;
4608 
4609     mpfun = get_plugin_function("mp_midas_gradient");
4610 
4611     if (mpfun == NULL) {
4612 	return E_FOPEN;
4613     }
4614 
4615     err = (*mpfun) (theta, G, method);
4616 
4617     if (!err) {
4618 	int i, n = G->rows * G->cols;
4619 
4620 	for (i=0; i<n; i++) {
4621 	    if (!isfinite(G->val[i])) {
4622 		err = E_NAN;
4623 		break;
4624 	    }
4625 	}
4626     }
4627 
4628     return err;
4629 }
4630 
mgrad_zero(const gretl_matrix * G)4631 static int mgrad_zero (const gretl_matrix *G)
4632 {
4633     int i, j, colzero;
4634 
4635     for (j=0; j<G->cols; j++) {
4636 	colzero = 1;
4637 	for (i=0; i<G->rows; i++) {
4638 	    if (gretl_matrix_get(G, i, j) != 0.0) {
4639 		colzero = 0;
4640 		break;
4641 	    }
4642 	}
4643 	if (colzero) {
4644 	    return 1;
4645 	}
4646     }
4647 
4648     return 0;
4649 }
4650 
4651 #endif /* HAVE_GMP */
4652 
midas_gradient(int p,const gretl_matrix * m,int method,int * err)4653 gretl_matrix *midas_gradient (int p, const gretl_matrix *m,
4654 			      int method, int *err)
4655 {
4656     double *theta;
4657     double eps = pow(2.0, -52);
4658     double ws2, wsum = 0.0;
4659     gretl_matrix *w = NULL;
4660     gretl_matrix *G = NULL;
4661     int k, i, j;
4662 
4663 #if MDEBUG
4664     gretl_matrix_print(m, "m, in midas_gradient");
4665 #endif
4666 
4667     if (method < MIDAS_NEALMON || method >= MIDAS_MAX || p < 1) {
4668 	*err = E_INVARG;
4669 	return NULL;
4670     }
4671 
4672     /* @p = lag order
4673        @k = number of hyper-parameters
4674     */
4675 
4676     k = gretl_vector_get_length(m);
4677     if (k == 0) {
4678 	*err = E_INVARG;
4679 	return NULL;
4680     }
4681 
4682     theta = m->val;
4683     errno = 0;
4684 
4685     if (BETA_METHOD(method)) {
4686 	*err = check_beta_params(method, theta, k, eps);
4687 	if (*err) {
4688 	    return NULL;
4689 	}
4690     }
4691 
4692     if (method != MIDAS_ALMONP) {
4693 	w = gretl_zero_matrix_new(p, 1);
4694 	if (w == NULL) {
4695 	    *err = E_ALLOC;
4696 	    return NULL;
4697 	}
4698     }
4699 
4700     G = gretl_matrix_alloc(p, k);
4701     if (G == NULL) {
4702 	gretl_matrix_free(w);
4703 	*err = E_ALLOC;
4704 	return NULL;
4705     }
4706 
4707     if (method == MIDAS_NEALMON) {
4708 	double *dsum = malloc(k * sizeof *dsum);
4709 	double gij;
4710 
4711 	for (j=0; j<k; j++) {
4712 	    dsum[j] = 0.0;
4713 	}
4714 	for (i=0; i<p; i++) {
4715 	    w->val[i] = (i+1) * theta[0];
4716 	    for (j=1; j<k; j++) {
4717 		w->val[i] += pow(i+1, j+1) * theta[j];
4718 	    }
4719 	    w->val[i] = exp(w->val[i]);
4720 	    wsum += w->val[i];
4721 	    if (errno && inf_check(wsum, m, "nealmon gradient", err)) {
4722 		free(dsum);
4723 		goto range_error;
4724 	    }
4725 	}
4726 	for (i=0; i<p; i++) {
4727 	    for (j=0; j<k; j++) {
4728 		dsum[j] += w->val[i] * pow(i+1, j+1);
4729 	    }
4730 	}
4731 	for (j=0; j<k; j++) {
4732 	    dsum[j] /= wsum;
4733 	}
4734 	for (i=0; i<p; i++) {
4735 	    w->val[i] /= wsum;
4736 	    for (j=0; j<k; j++) {
4737 		gij = w->val[i] * (pow(i+1, j+1) - dsum[j]);
4738 		gretl_matrix_set(G, i, j, gij);
4739 	    }
4740 	}
4741 	free(dsum);
4742     } else if (BETA_METHOD(method)) {
4743 	double si, ai, bi;
4744 	double g1sum = 0;
4745 	double g2sum = 0;
4746 
4747 	/* loop 1: form raw weights and their sum */
4748 	for (i=0; i<p; i++) {
4749 	    si = i / (p - 1.0);
4750 	    if (i == 0) {
4751 		si += eps;
4752 	    } else if (i == p - 1) {
4753 		si -= eps;
4754 	    }
4755 	    ai = pow(si, theta[0] - 1.0);
4756 	    bi = pow(1.0 - si, theta[1] - 1.0);
4757 	    w->val[i] = ai * bi;
4758 	    wsum += w->val[i];
4759 	    if (errno && inf_check(wsum, m, "beta gradient", err)) {
4760 		goto range_error;
4761 	    }
4762 	}
4763 	if (wsum <= eps) {
4764 	    /* should we just set G to zero in this case? */
4765 #if 0
4766 	    fprintf(stderr, "sum of weights = %g\n", wsum);
4767 #endif
4768 	    *err = E_NAN;
4769 	    goto range_error;
4770 	}
4771 	ws2 = wsum * wsum;
4772 	if (errno && inf_check(ws2, m, "beta gradient", err)) {
4773 	    goto range_error;
4774 	}
4775 	/* loop 2: form first component of derivative
4776 	   and second cumulant */
4777 	for (i=0; i<p; i++) {
4778 	    si = i / (p - 1.0);
4779 	    if (i == 0) {
4780 		si += eps;
4781 	    } else if (i == p - 1) {
4782 		si -= eps;
4783 	    }
4784 	    ai = w->val[i] * log(si);
4785 	    g1sum += ai;
4786 	    gretl_matrix_set(G, i, 0, ai/wsum);
4787 	    bi = w->val[i] * log(1-si);
4788 	    g2sum += bi;
4789 	    gretl_matrix_set(G, i, 1, bi/wsum);
4790 	}
4791 	/* loop 3: form second component and subtract */
4792 	for (i=0; i<p; i++) {
4793 	    ai = gretl_matrix_get(G, i, 0);
4794 	    ai -= w->val[i] * g1sum/ws2;
4795 	    gretl_matrix_set(G, i, 0, ai);
4796 	    bi = gretl_matrix_get(G, i, 1);
4797 	    bi -= w->val[i] * g2sum/ws2;
4798 	    gretl_matrix_set(G, i, 1, bi);
4799 	}
4800 	if (k == 3) {
4801 	    /* not zero-terminated */
4802 	    double c3 = theta[2];
4803 	    double m3 = 1 / (1 + p * c3);
4804 	    double g;
4805 
4806 	    for (i=0; i<2*p; i++) {
4807 		/* scale the first two columns */
4808 		G->val[i] *= m3;
4809 	    }
4810 	    for (i=0; i<p; i++) {
4811 		/* compute the third-col derivative */
4812 		g = 1 - p * w->val[i]/wsum;
4813 		gretl_matrix_set(G, i, 2, m3 * m3 * g);
4814 	    }
4815 	}
4816     } else if (method == MIDAS_ALMONP) {
4817 	/* straight Almon polynomial */
4818 	for (i=0; i<p; i++) {
4819 	    gretl_matrix_set(G, i, 0, 1.0);
4820 	    for (j=1; j<k; j++) {
4821 		gretl_matrix_set(G, i, j, pow(i + 1.0, j));
4822 	    }
4823 	}
4824     }
4825 
4826  range_error:
4827 
4828 #if HAVE_GMP
4829     if (*err == E_NAN || mgrad_zero(G)) {
4830 	/* attempt a fix-up using multiple precision */
4831 	int save_errno = errno;
4832 
4833 	errno = 0;
4834 	*err = try_mp_midas_grad(theta, G, method);
4835 	if (*err) {
4836 	    errno = save_errno;
4837 	}
4838     }
4839 #endif
4840 
4841     gretl_matrix_free(w);
4842 
4843     if (errno) {
4844 	gretl_errmsg_sprintf("Failed to calculate MIDAS gradient: %s",
4845 			     gretl_strerror(errno));
4846 	if (*err == 0) {
4847 	    *err = E_INVARG;
4848 	}
4849 	gretl_matrix_free(G);
4850 	G = NULL;
4851 	errno = 0;
4852     }
4853 
4854 #if MDEBUG
4855     gretl_matrix_print(G, "G: midas gradient");
4856 #endif
4857 
4858     return G;
4859 }
4860 
4861 /* Retrieve from a midasreg $model bundle all the info
4862    needed to construct the array of per-lag multipliers
4863    and their standard errors.
4864 */
4865 
process_midas_bundle(gretl_bundle * mb,int idx,gretl_matrix ** ptheta,gretl_matrix ** pV,int * pmtype,int * ph,int * pminlag,const char ** plname)4866 static int process_midas_bundle (gretl_bundle *mb, int idx,
4867 				 gretl_matrix **ptheta,
4868 				 gretl_matrix **pV,
4869 				 int *pmtype, int *ph,
4870 				 int *pminlag,
4871 				 const char **plname)
4872 {
4873     gretl_array *mt;
4874     gretl_matrix *b, *vcv;
4875     gretl_matrix *theta = NULL;
4876     gretl_matrix *V = NULL;
4877     int *xlist = NULL;
4878     int mtype, np;
4879     int i0, i1;
4880     int i, j, k, nm;
4881     int err = 0;
4882 
4883     mt = gretl_bundle_get_array(mb, "midas_info", &err);
4884     if (!err) {
4885 	xlist = gretl_bundle_get_list(mb, "xlist", &err);
4886     }
4887     if (!err) {
4888 	b = gretl_bundle_get_matrix(mb, "coeff", &err);
4889     }
4890     if (!err) {
4891 	vcv = gretl_bundle_get_matrix(mb, "vcv", &err);
4892     }
4893     if (err) {
4894 	return err;
4895     }
4896 
4897     i0 = xlist[0];
4898     nm = gretl_array_get_length(mt);
4899     if (idx < 1 || idx > nm) {
4900 	gretl_errmsg_set("Invalid MIDAS term index");
4901 	err = E_DATA;
4902     } else {
4903 	idx--; /* convert index to 0-based */
4904     }
4905 
4906     for (i=0; i<=idx && !err; i++) {
4907 	gretl_bundle *mti;
4908 	int minlag, maxlag;
4909 	int nl, leader = 1;
4910 
4911 	mti = gretl_array_get_data(mt, i);
4912 	minlag = gretl_bundle_get_int(mti, "minlag", &err);
4913 	maxlag = gretl_bundle_get_int(mti, "maxlag", &err);
4914 	mtype = gretl_bundle_get_int(mti, "type", &err);
4915 	np = gretl_bundle_get_int(mti, "nparm", &err);
4916 	if (err) {
4917 	    break;
4918 	}
4919 
4920 	if (mtype == MIDAS_U || mtype == MIDAS_ALMONP) {
4921 	    leader = 0;
4922 	}
4923 	nl = maxlag - minlag + 1;
4924 	i1 = i0 + np - 1 + leader;
4925 #if 1
4926 	fprintf(stderr, "midas term %d, type %d, np %d, nlags %d\n",
4927 		i, mtype, np, nl);
4928 #endif
4929 	if (i == idx) {
4930 	    double vjk;
4931 	    int r = i1 - i0 + 1;
4932 	    int jj;
4933 
4934 	    theta = gretl_matrix_alloc(r, 1);
4935 	    V = gretl_matrix_alloc(r, r);
4936 	    for (j=0; j<r; j++) {
4937 		jj = j + i0;
4938 		theta->val[j] = b->val[jj];
4939 		for (k=0; k<r; k++) {
4940 		    vjk = gretl_matrix_get(vcv, jj, k+i0);
4941 		    gretl_matrix_set(V, j, k, vjk);
4942 		}
4943 	    }
4944 	    *pmtype = mtype;
4945 	    *ph = nl;
4946 	    *pminlag = minlag;
4947 	    *plname = gretl_bundle_get_string(mti, "lname", NULL);
4948 	}
4949 	i0 = i1 + 1;
4950     }
4951 
4952     if (err) {
4953 	gretl_matrix_free(theta);
4954 	gretl_matrix_free(V);
4955     } else {
4956 	*ptheta = theta;
4957 	*pV = V;
4958     }
4959 
4960     return err;
4961 }
4962 
midas_multipliers(gretl_bundle * mb,int cumulate,int idx,int * err)4963 gretl_matrix *midas_multipliers (gretl_bundle *mb, int cumulate,
4964 				 int idx, int *err)
4965 {
4966     gretl_matrix *ret = NULL;
4967     gretl_matrix *theta = NULL;
4968     gretl_matrix *mult = NULL;
4969     gretl_matrix *V = NULL;
4970     gretl_matrix *J = NULL;
4971     gretl_matrix *vcv = NULL;
4972     const char *lname = NULL;
4973     int minlag = 0;
4974     int mtype = 0, h = 0;
4975     int i, k;
4976 
4977     *err = process_midas_bundle(mb, idx, &theta, &V, &mtype,
4978 				&h, &minlag, &lname);
4979     if (*err) {
4980 	gretl_errmsg_set("Not a valid midasreg bundle");
4981 	return NULL;
4982     }
4983 
4984     k = theta->rows;
4985 
4986     if (mtype == MIDAS_U) {
4987 	mult = gretl_matrix_copy(theta);
4988 	J = gretl_zero_matrix_new(h, k);
4989 	gretl_matrix_inscribe_I(J, 0, 0, MIN(k,h));
4990     } else if (mtype == MIDAS_ALMONP) {
4991 	mult = midas_weights(h, theta, mtype, err);
4992         J = midas_gradient(h, theta, mtype, err);
4993     } else {
4994 	gretl_matrix *tmp1, *tmp2;
4995         double mg, th0 = theta->val[0];
4996 	int j;
4997 
4998 	tmp1 = gretl_matrix_alloc(k-1, 1);
4999 	for (i=1; i<k; i++) {
5000 	    tmp1->val[i-1] = theta->val[i];
5001 	}
5002         mult = midas_weights(h, tmp1, mtype, err);
5003         tmp2 = midas_gradient(h, tmp1, mtype, err);
5004 	J = gretl_matrix_alloc(h, k);
5005 	for (i=0; i<h; i++) {
5006 	    gretl_matrix_set(J, i, 0, mult->val[i]);
5007 	    for (j=1; j<k; j++) {
5008 		mg = gretl_matrix_get(tmp2, i, j-1);
5009 		gretl_matrix_set(J, i, j, th0*mg);
5010 	    }
5011 	}
5012 	gretl_matrix_multiply_by_scalar(mult, th0);
5013 	gretl_matrix_free(tmp1);
5014 	gretl_matrix_free(tmp2);
5015     }
5016 
5017     if (*err || mult == NULL || J == NULL) {
5018 	if (!*err) {
5019 	    *err = E_ALLOC;
5020 	}
5021 	return NULL;
5022     }
5023 
5024     vcv = gretl_matrix_alloc(J->rows, J->rows);
5025     if (vcv == NULL) {
5026 	*err = E_ALLOC;
5027     } else {
5028 	*err = gretl_matrix_qform(J, GRETL_MOD_NONE,
5029 				  V, vcv, GRETL_MOD_NONE);
5030     }
5031 
5032     if (!*err && cumulate) {
5033 	gretl_matrix *tmp;
5034 
5035 	for (i=1; i<h; i++) {
5036 	    mult->val[i] += mult->val[i-1];
5037 	}
5038 	gretl_matrix_free(J);
5039 	J = gretl_unit_matrix_new(h, h);
5040 	gretl_matrix_set_triangle(J, NULL, 0, 1);
5041 	tmp = gretl_matrix_alloc(h, h);
5042 	*err = gretl_matrix_qform(J, GRETL_MOD_NONE,
5043 				  vcv, tmp, GRETL_MOD_NONE);
5044 	gretl_matrix_free(vcv);
5045 	vcv = tmp;
5046     }
5047 
5048     if (!*err) {
5049 	double vi;
5050 
5051 	ret = gretl_matrix_alloc(h, 2);
5052 	for (i=0; i<h; i++) {
5053 	    gretl_matrix_set(ret, i, 0, mult->val[i]);
5054 	    vi = gretl_matrix_get(vcv, i, i);
5055 	    gretl_matrix_set(ret, i, 1, sqrt(vi));
5056 	}
5057     }
5058 
5059     if (ret != NULL) {
5060 	char **S = strings_array_new(h);
5061 	char rname[32];
5062 
5063 	if (S != NULL) {
5064 	    for (i=0; i<h; i++) {
5065 		if (lname != NULL) {
5066 		    if (cumulate) {
5067 			sprintf(rname, "c%s_%d", lname, i + minlag);
5068 		    } else {
5069 			sprintf(rname, "%s_%d", lname, i + minlag);
5070 		    }
5071 		} else {
5072 		    sprintf(rname, "%s_%d", cumulate ? "cmult" : "mult",
5073 			    i + minlag);
5074 		}
5075 		S[i] = gretl_strdup(rname);
5076 	    }
5077 	    gretl_matrix_set_rownames(ret, S);
5078 	}
5079     }
5080 
5081     gretl_matrix_free(theta);
5082     gretl_matrix_free(V);
5083     gretl_matrix_free(mult);
5084     gretl_matrix_free(J);
5085     gretl_matrix_free(vcv);
5086 
5087     return ret;
5088 }
5089 
midas_linear_combo(double * y,const int * list,const gretl_matrix * theta,int method,const DATASET * dset)5090 int midas_linear_combo (double *y, const int *list,
5091 			const gretl_matrix *theta,
5092 			int method, const DATASET *dset)
5093 {
5094     gretl_matrix *w = NULL;
5095     int m = list[0];
5096     int err = 0;
5097 
5098     w = midas_weights(m, theta, method, &err);
5099 
5100     if (BETA_METHOD(method) && !err && w == NULL) {
5101 	int t;
5102 
5103 	for (t=dset->t1; t<=dset->t2; t++) {
5104 	    y[t] = NADBL;
5105 	}
5106 	return 0;
5107     }
5108 
5109     if (!err) {
5110 	err = list_linear_combo(y, list, w, dset);
5111     }
5112 
5113     gretl_matrix_free(w);
5114 
5115     return err;
5116 }
5117 
vector_to_midas_list(const gretl_matrix * v,int f_ratio,const char * prefix,DATASET * dset,int * err)5118 int *vector_to_midas_list (const gretl_matrix *v,
5119 			   int f_ratio,
5120 			   const char *prefix,
5121 			   DATASET *dset,
5122 			   int *err)
5123 {
5124     char vname[VNAMELEN];
5125     int *list = NULL;
5126     int origv = dset->v;
5127     int i;
5128 
5129     /* double-check to avoid crashing below */
5130     if (gretl_vector_get_length(v) != sample_size(dset) * f_ratio) {
5131 	*err = E_DATA;
5132 	return NULL;
5133     }
5134 
5135     /* check names for collisions first */
5136     for (i=0; i<f_ratio && !*err; i++) {
5137 	sprintf(vname, "%s%d", prefix, i+1);
5138 	if (current_series_index(dset, vname) >= 1 ||
5139 	    get_user_var_by_name(vname) != NULL) {
5140 	    gretl_errmsg_set("The constructed series names would "
5141 			     "collide with those of existing objects");
5142 	    *err = E_INVARG;
5143 	}
5144     }
5145 
5146     if (!*err) {
5147 	/* try adding the required number of series */
5148 	*err = dataset_add_series(dset, f_ratio);
5149 	if (!*err) {
5150 	    list = gretl_list_new(f_ratio);
5151 	    if (list == NULL) {
5152 		*err = E_ALLOC;
5153 	    }
5154 	}
5155     }
5156 
5157     if (!*err) {
5158 	/* actually construct the series */
5159 	char label[MAXLABEL];
5160 	int pos, pos0 = f_ratio - 1;
5161 	int t, k = origv;
5162 
5163 	for (i=0; i<f_ratio; i++) {
5164 	    sprintf(dset->varname[k], "%s%d", prefix, f_ratio - i);
5165 	    sprintf(label, "%s in sub-period %d", prefix, f_ratio - i);
5166 	    series_record_label(dset, k, label);
5167 	    list[i+1] = k;
5168 	    k++;
5169 	}
5170 
5171 	for (t=dset->t1; t<=dset->t2; t++) {
5172 	    pos = pos0;
5173 	    for (k=origv; k<dset->v; k++) {
5174 		dset->Z[k][t] = v->val[pos--];
5175 	    }
5176 	    pos0 += f_ratio;
5177 	}
5178 
5179 	gretl_list_set_midas(list, dset);
5180     }
5181 
5182     return list;
5183 }
5184 
5185 /* Imhof: draws on the RATS code in IMHOF.SRC from Estima, 2004.
5186 
5187    Imhof Procedure for computing P(u'Au < x) for a quadratic form in
5188    Normal(0,1) variables. This can be used for ratios of quadratic
5189    forms as well, since P((u'Au/u'Bu) < x) = P(u'(A-xB)u < 0).
5190 
5191    In the Durbin-Watson context, 'B' is the identity matrix and
5192    'x' is the Durbin-Watson statistic.
5193 
5194    References:
5195 
5196    Imhof, J.P (1961), Computing the Distribution of Quadratic Forms of
5197    Normal Variables, Biometrika 48, 419-426.
5198 
5199    Abrahamse, A.P.J and Koerts, J. (1969), On the Theory and
5200    Application of the General Linear Model, Rotterdam University
5201    Press.
5202 */
5203 
imhof_bound(const double * lambda,int k,int * err)5204 static double imhof_bound (const double *lambda, int k, int *err)
5205 {
5206     double e1 = 0.0001; /* Max truncation error due to finite
5207 			   upper bound on domain */
5208     double e2 = 0.0001; /* Cutoff for deciding whether an
5209 			   eigenvalue is effectively zero */
5210     double absl, bound;
5211     double nl = 0.0, sum = 0.0;
5212     int i;
5213 
5214     for (i=0; i<k; i++) {
5215 	absl = fabs(lambda[i]);
5216 	if (absl > e2) {
5217 	    nl += 1.0;
5218 	    sum += log(absl);
5219 	}
5220     }
5221 
5222     if (nl == 0.0) {
5223 	fprintf(stderr, "imhof_bound: got no non-zero eigenvalues\n");
5224 	*err = E_DATA;
5225 	return NADBL;
5226     }
5227 
5228     /* The key factor in the integrand is the product of
5229        (1+(lambda(i)*x)^2)^(1/4) across i. Since, for those
5230        factors with small |lambda(i)|, this won't go to zero very
5231        quickly, we count only the terms on the bigger eigenvalues.
5232     */
5233 
5234     nl *= 0.5;
5235     sum = 0.5 * sum + log(M_PI * nl);
5236     bound = exp(-(sum + log(e1)) / nl);
5237     bound += 5.0 / nl;
5238 
5239     if (bound < 0) {
5240 	fprintf(stderr, "imhof_bound: got negative result\n");
5241 	*err = E_DATA;
5242 	bound = NADBL;
5243     }
5244 
5245     return bound;
5246 }
5247 
vecsum(const double * x,int k)5248 static double vecsum (const double *x, int k)
5249 {
5250     double sum = 0.0;
5251     int i;
5252 
5253     for (i=0; i<k; i++) {
5254 	sum += x[i];
5255     }
5256 
5257     return sum;
5258 }
5259 
imhof_f(double u,const double * lambda,int k,double arg)5260 static double imhof_f (double u, const double *lambda, int k, double arg)
5261 {
5262     double ul, rho = 0.0;
5263     double theta = -u * arg;
5264     int i;
5265 
5266     /* The value at zero isn't directly computable as
5267        it produces 0/0. The limit is computed below.
5268     */
5269     if (u == 0.0) {
5270 	return 0.5 * (-arg + vecsum(lambda, k));
5271     }
5272 
5273     for (i=0; i<k; i++) {
5274 	ul = u * lambda[i];
5275 	theta += atan(ul);
5276 	rho += log(1.0 + ul * ul);
5277     }
5278 
5279     return sin(0.5 * theta) / (u * exp(0.25 * rho));
5280 }
5281 
5282 #define gridlimit 2048
5283 
5284 /*
5285   Adaptation of Abrahamse and Koert's Pascal code.  Evaluates the
5286   integral by Simpson's rule with grid size halving each time until
5287   the change from moving to a tighter grid is negligible. By halving
5288   the grid, only the odd terms need to be computed, as the old ones
5289   are already in the sum. Points entering get x 4 weights, which then
5290   get reduced to x 2 on the next iteration.
5291 */
5292 
imhof_integral(double arg,const double * lambda,int k,double bound,int * err)5293 static double imhof_integral (double arg, const double *lambda, int k,
5294 			      double bound, int *err)
5295 {
5296     double e3 = 0.0001;
5297     double base, step, sum1;
5298     double int0 = 0.0, int1 = 0.0;
5299     double eps4 = 3.0 * M_PI * e3;
5300     double sum4 = 0.0;
5301     double ret = NADBL;
5302     int j, n = 2;
5303 
5304     base = imhof_f(0, lambda, k, arg);
5305     base += imhof_f(bound, lambda, k, arg);
5306 
5307     while (n < gridlimit) {
5308 	step = bound / n;
5309 	sum1 = base + sum4 * 2.0;
5310 	base = sum1;
5311 	sum4 = 0.0;
5312 	for (j=1; j<=n; j+=2) {
5313 	    sum4 += imhof_f(j * step, lambda, k, arg);
5314 	}
5315 	int1 = (sum1 + 4 * sum4) * step;
5316 	if (n > 8 && fabs(int1 - int0) < eps4) {
5317 	    break;
5318 	}
5319 	int0 = int1;
5320 	n *= 2;
5321     }
5322 
5323     if (n > gridlimit) {
5324 	fprintf(stderr, "n = %d, Imhof integral failed to converge\n", n);
5325 	*err = E_NOCONV;
5326     } else {
5327 	ret = 0.5 - int1 / (3.0 * M_PI);
5328 	if (ret < 0) {
5329 	    fprintf(stderr, "n = %d, Imhof integral gave negative value %g\n", n, ret);
5330 	}
5331     }
5332 
5333     return ret;
5334 }
5335 
imhof_get_eigenvals(const gretl_matrix * m,double ** plam,int * pk)5336 static int imhof_get_eigenvals (const gretl_matrix *m,
5337 				double **plam, int *pk)
5338 {
5339     gretl_matrix *E;
5340     int err = 0;
5341 
5342     E = gretl_general_matrix_eigenvals(m, &err);
5343 
5344     if (!err) {
5345 	*pk = E->rows;
5346 	*plam = gretl_matrix_steal_data(E);
5347     }
5348 
5349     gretl_matrix_free(E);
5350 
5351     return err;
5352 }
5353 
5354 /* Implements the "imhof" function in genr: computes the probability
5355    P(u'Au < arg) for a quadratic form in Normal(0,1) variables.  The
5356    argument @m may be either the square matrix A or a vector
5357    containing the precomputed eigenvalues of A.
5358 */
5359 
imhof(const gretl_matrix * m,double arg,int * err)5360 double imhof (const gretl_matrix *m, double arg, int *err)
5361 {
5362     double *lambda = NULL;
5363     double bound, ret = NADBL;
5364     int k = 0, free_lambda = 0;
5365 
5366     errno = 0;
5367 
5368     if (m->cols == 1 || m->rows == 1) {
5369 	/* we'll assume m is a vector of eigenvalues */
5370 	lambda = m->val;
5371 	k = gretl_vector_get_length(m);
5372     } else if (m->rows == m->cols) {
5373 	/* we'll assume m is the 'A' matrix */
5374 	*err = imhof_get_eigenvals(m, &lambda, &k);
5375 	free_lambda = 1;
5376     } else {
5377 	/* huh? */
5378 	*err = E_DATA;
5379     }
5380 
5381     if (!*err) {
5382 	bound = imhof_bound(lambda, k, err);
5383     }
5384 
5385     if (!*err) {
5386 	ret = imhof_integral(arg, lambda, k, bound, err);
5387     }
5388 
5389     if (errno != 0) {
5390 	fprintf(stderr, "imhof: %s\n", gretl_strerror(errno));
5391 	if (!*err) {
5392 	    *err = E_NOCONV;
5393 	}
5394 	ret = NADBL;
5395 	errno = 0;
5396     } else if (!*err && ret < 0 && ret > -1.0e-14) {
5397 	/* just approximation error? */
5398 	ret = 0;
5399     }
5400 
5401     if (free_lambda) {
5402 	free(lambda);
5403     }
5404 
5405     return ret;
5406 }
5407 
5408 /* Implements the $dwpval accessor: given the residual vector
5409    @u and the matrix of regressors, @X, calculates the Durbin-Watson
5410    statistic then finds its p-value via the Imhof/Koerts/Abrahamse
5411    procedure. If @pDW is non-NULL, the Durbin-Watson statstic is
5412    written to that location.
5413 */
5414 
dw_pval(const gretl_matrix * u,const gretl_matrix * X,double * pDW,int * perr)5415 double dw_pval (const gretl_matrix *u, const gretl_matrix *X,
5416 		double *pDW, int *perr)
5417 {
5418     gretl_matrix *M = NULL;
5419     gretl_matrix *A = NULL;
5420     gretl_matrix *MA = NULL;
5421     gretl_matrix *XX = NULL;
5422     gretl_matrix *E = NULL;
5423     double uu, DW = 0;
5424     double pv = NADBL;
5425     int k = X->cols;
5426     int n = X->rows;
5427     int i, err = 0;
5428 
5429     M = gretl_identity_matrix_new(n);
5430     A = gretl_DW_matrix_new(n);
5431     MA = gretl_matrix_alloc(n, n);
5432     XX = gretl_matrix_alloc(k, k);
5433 
5434     if (M == NULL || A == NULL || MA == NULL || XX == NULL) {
5435 	err = E_ALLOC;
5436 	goto bailout;
5437     }
5438 
5439     gretl_matrix_multiply_mod(X, GRETL_MOD_TRANSPOSE,
5440 			      X, GRETL_MOD_NONE,
5441 			      XX, GRETL_MOD_NONE);
5442     err = gretl_invert_symmetric_matrix(XX);
5443 
5444     if (!err) {
5445 	/* M = I - X(X'X)^{-1}X' */
5446 	err = gretl_matrix_qform(X, GRETL_MOD_NONE,
5447 				 XX, M, GRETL_MOD_DECREMENT);
5448     }
5449     if (!err) {
5450 	err = gretl_matrix_multiply(M, A, MA);
5451     }
5452     if (!err) {
5453 	uu = gretl_matrix_dot_product(u, GRETL_MOD_TRANSPOSE,
5454 				      u, GRETL_MOD_NONE,
5455 				      &err);
5456     }
5457     if (!err) {
5458 	DW = gretl_scalar_qform(u, A, &err);
5459     }
5460     if (!err) {
5461 	DW /= uu;
5462 #if 0
5463 	fprintf(stderr, "DW = %g\n", DW);
5464 #endif
5465 	E = gretl_general_matrix_eigenvals(MA, &err);
5466     }
5467 
5468     if (!err) {
5469 	k = n - k;
5470 	for (i=0; i<k; i++) {
5471 	    E->val[i] -= DW;
5472 	}
5473 	gretl_matrix_reuse(E, k, 1);
5474 	pv = imhof(E, 0.0, &err);
5475 	if (!err && pDW != NULL) {
5476 	    *pDW = DW;
5477 	}
5478     }
5479 
5480  bailout:
5481 
5482     gretl_matrix_free(M);
5483     gretl_matrix_free(A);
5484     gretl_matrix_free(MA);
5485     gretl_matrix_free(XX);
5486     gretl_matrix_free(E);
5487 
5488     *perr = err;
5489 
5490     return pv;
5491 }
5492 
5493 /* create a matrix containing ACF and PACF values for each
5494    column of the input matrix, @m, with lag order @p.
5495 */
5496 
multi_acf(const gretl_matrix * m,const int * list,const DATASET * dset,int p,int * err)5497 gretl_matrix *multi_acf (const gretl_matrix *m,
5498 			 const int *list,
5499 			 const DATASET *dset,
5500 			 int p, int *err)
5501 {
5502     gretl_matrix *a, *A = NULL;
5503     const double *x;
5504     double xa;
5505     int nv, T, acol, pcol;
5506     int i, j;
5507 
5508     if (list == NULL && gretl_is_null_matrix(m)) {
5509 	*err = E_DATA;
5510 	return NULL;
5511     }
5512 
5513     if (m != NULL) {
5514 	nv = m->cols;
5515     } else {
5516 	nv = list[0];
5517     }
5518 
5519     A = gretl_matrix_alloc(p, 2 * nv);
5520     if (A == NULL) {
5521 	*err = E_ALLOC;
5522 	return NULL;
5523     }
5524 
5525     if (m != NULL) {
5526 	x = m->val;
5527 	T = m->rows;
5528     } else {
5529 	x = dset->Z[list[1]] + dset->t1;
5530 	T = sample_size(dset);
5531     }
5532 
5533     acol = 0;
5534     pcol = nv;
5535 
5536     for (j=0; j<nv; j++) {
5537 	/* get ACF/PACF for column/series */
5538 	a = acf_matrix(x, p, NULL, T, err);
5539 	if (*err) {
5540 	    gretl_matrix_free(a);
5541 	    gretl_matrix_free(A);
5542 	    return NULL;
5543 	}
5544 
5545 	/* transcribe into A matrix and free */
5546 	for (i=0; i<p; i++) {
5547 	    xa = gretl_matrix_get(a, i, 0);
5548 	    gretl_matrix_set(A, i, acol, xa);
5549 	    xa = gretl_matrix_get(a, i, 1);
5550 	    gretl_matrix_set(A, i, pcol, xa);
5551 	}
5552 	acol++;
5553 	pcol++;
5554 
5555 	gretl_matrix_free(a);
5556 
5557 	/* move to next data read position */
5558 	if (j < nv - 1) {
5559 	    if (m != NULL) {
5560 		x += m->rows;
5561 	    } else {
5562 		x = dset->Z[list[j+2]] + dset->t1;
5563 	    }
5564 	}
5565     }
5566 
5567     return A;
5568 }
5569 
multi_xcf(const void * px,int xtype,const void * py,int ytype,const DATASET * dset,int p,int * err)5570 gretl_matrix *multi_xcf (const void *px, int xtype,
5571 			 const void *py, int ytype,
5572 			 const DATASET *dset,
5573 			 int p, int *err)
5574 {
5575     const int *xlist = NULL;
5576     const gretl_matrix *Xmat = NULL;
5577     const double *xvec = NULL;
5578     const double *yvec = NULL;
5579     gretl_matrix *xj, *XCF = NULL;
5580     int T = sample_size(dset);
5581     int np = 2 * p + 1;
5582     int Ty, nx = 1;
5583     int i, j;
5584 
5585     if (xtype == LIST) {
5586 	xlist = px;
5587 	nx = xlist[0];
5588 	if (nx < 1) {
5589 	    *err = E_DATA;
5590 	    return NULL;
5591 	}
5592 	xvec = dset->Z[xlist[1]] + dset->t1;
5593     } else if (xtype == MAT) {
5594 	Xmat = px;
5595 	if (gretl_is_null_matrix(Xmat)) {
5596 	    *err = E_DATA;
5597 	    return NULL;
5598 	}
5599 	nx = Xmat->cols;
5600 	T = Xmat->rows;
5601 	xvec = Xmat->val;
5602     } else {
5603 	/* VEC: note: px is of type *void */
5604 	xvec = px;
5605 	xvec += dset->t1;
5606     }
5607 
5608     if (ytype == MAT) {
5609 	const gretl_matrix *ymat = py;
5610 
5611 	if (ymat->cols != 1) {
5612 	    *err = E_NONCONF;
5613 	    return NULL;
5614 	}
5615 	yvec = ymat->val;
5616 	Ty = ymat->rows;
5617     } else {
5618 	yvec = (const double *) py + dset->t1;
5619 	Ty = sample_size(dset);
5620     }
5621 
5622     if (Ty != T) {
5623 	*err = E_NONCONF;
5624 	return NULL;
5625     }
5626 
5627     if (nx > 1) {
5628 	XCF = gretl_matrix_alloc(np, nx);
5629 	if (XCF == NULL) {
5630 	    *err = E_ALLOC;
5631 	    return NULL;
5632 	}
5633     }
5634 
5635     for (j=0; j<nx; j++) {
5636 	/* get XCF for left-hand column/series and y */
5637 	xj = xcf_vec(xvec, yvec, p, NULL, T, err);
5638 	if (*err) {
5639 	    gretl_matrix_free(XCF);
5640 	    return NULL;
5641 	}
5642 
5643 	if (nx == 1) {
5644 	    XCF = xj;
5645 	    break;
5646 	}
5647 
5648 	/* transcribe into big XCF matrix and free */
5649 	for (i=0; i<np; i++) {
5650 	    gretl_matrix_set(XCF, i, j, xj->val[i]);
5651 	}
5652 	gretl_matrix_free(xj);
5653 
5654 	/* move to next data read position */
5655 	if (j < nx - 1) {
5656 	    if (Xmat != NULL) {
5657 		xvec += Xmat->rows;
5658 	    } else {
5659 		xvec = dset->Z[xlist[j+2]] + dset->t1;
5660 	    }
5661 	}
5662     }
5663 
5664     return XCF;
5665 }
5666 
theil_decomp(double * m,double MSE,const double * y,const double * f,int T)5667 static int theil_decomp (double *m, double MSE,
5668 			 const double *y,
5669 			 const double *f,
5670 			 int T)
5671 {
5672     double da, dp;
5673     double Abar, Pbar;
5674     double sa, sp, r;
5675     int t, err = 0;
5676 
5677     if (MSE <= 0.0) {
5678 	m[0] = m[1] = m[2] = NADBL;
5679 	return E_DATA;
5680     }
5681 
5682     Abar = Pbar = 0.0;
5683 
5684     for (t=0; t<T; t++) {
5685 	Abar += y[t];
5686 	Pbar += f[t];
5687     }
5688 
5689     Abar /= T;
5690     Pbar /= T;
5691 
5692     sa = sp = r = 0.0;
5693 
5694     for (t=0; t<T; t++) {
5695 	da = y[t] - Abar;
5696 	sa += da * da;
5697 	dp = f[t] - Pbar;
5698 	sp += dp * dp;
5699 	r += da * dp;
5700     }
5701 
5702     sa = sqrt(sa / T);
5703     sp = sqrt(sp / T);
5704     r /= T * sa * sp;
5705 
5706     if (sa == 0.0 || sp == 0.0) {
5707 	err = E_DATA;
5708 	m[0] = m[1] = m[2] = NADBL;
5709     } else {
5710 	m[0] = (Abar - Pbar) * (Abar - Pbar) / MSE; /* U^M */
5711 	m[1] = (sp - r * sa) * (sp - r * sa) / MSE; /* U^R */
5712 	m[2] = (1.0 - r * r) * sa * sa / MSE;       /* U^D */
5713 
5714 	if (m[2] > 0.99999999999999) {
5715 	    /* U^M and U^R are just machine noise? */
5716 	    m[2] = 1.0;
5717 	    m[0] = m[1] = 0.0;
5718 	}
5719     }
5720 
5721     return err;
5722 }
5723 
5724 /* OPT_O allows embedded missing values (which will be skipped);
5725    OPT_D requests Theil decomposition; OPT_T means time series.
5726 */
5727 
fill_fcstats_column(gretl_matrix * m,const double * y,const double * f,int T,gretlopt opt,int col)5728 static int fill_fcstats_column (gretl_matrix *m,
5729 				const double *y,
5730 				const double *f,
5731 				int T,
5732 				gretlopt opt,
5733 				int col)
5734 {
5735     double ME, MSE, MAE, MPE, MAPE, U;
5736     double Unum, Uden1, Uden2;
5737     double fe, u[2];
5738     int do_theil = 0;
5739     int do_U2 = (opt & OPT_T);
5740     int ok_T = T;
5741     int t, err = 0;
5742 
5743     ME = MSE = MAE = MPE = MAPE = U = 0.0;
5744     Unum = Uden1 = Uden2 = 0.0;
5745     u[0] = u[1] = 0.0;
5746 
5747     for (t=0; t<T; t++) {
5748 	if (na(y[t]) || na(f[t])) {
5749 	    if (opt & OPT_O) {
5750 		ok_T--;
5751 		continue;
5752 	    } else {
5753 		err = E_MISSDATA;
5754 		break;
5755 	    }
5756 	}
5757 	fe = y[t] - f[t];
5758 	ME += fe;
5759 	MSE += fe * fe;
5760 	MAE += fabs(fe);
5761 	if (floateq(fe, 0)) {
5762 	    ; /* OK, zero contribution to MPE, MAPE */
5763 	} else if (y[t] == 0.0) {
5764 	    /* can't calculate percentage */
5765 	    MPE = MAPE = NADBL;
5766 	} else {
5767 	    MPE += 100 * fe / y[t];
5768 	    MAPE += 100 * fabs(fe / y[t]);
5769 	}
5770 	if (do_U2) {
5771 	    /* let U = U2 */
5772 	    if (t < T-1 && !na(U)) {
5773 		if (na(f[t+1]) || na(y[t+1])) {
5774 		    U = NADBL;
5775 		} else {
5776 		    fe = f[t+1] - y[t+1];
5777 		    if (floatneq(fe, 0)) {
5778 			if (y[t] == 0.0) {
5779 			    U = NADBL;
5780 			} else {
5781 			    fe /= y[t];
5782 			    u[0] += fe * fe;
5783 			}
5784 		    }
5785 		    fe = y[t+1] - y[t];
5786 		    if (floatneq(fe, 0)) {
5787 			if (y[t] == 0.0) {
5788 			    U = NADBL;
5789 			} else {
5790 			    fe /= y[t];
5791 			    u[1] += fe * fe;
5792 			}
5793 		    }
5794 		}
5795 	    }
5796 	} else {
5797 	    /* let U = U1 */
5798 	    Unum += fe * fe;
5799 	    Uden1 += y[t] * y[t];
5800 	    Uden2 += f[t] * f[t];
5801 	}
5802     }
5803 
5804     if (!err) {
5805 	if (ok_T == 0) {
5806 	    err = E_MISSDATA;
5807 	} else if (ok_T < T) {
5808 	    T = ok_T;
5809 	} else if (opt & OPT_D) {
5810 	    do_theil = 1;
5811 	}
5812     }
5813 
5814     if (!err) {
5815 	fnpkg *pkg = get_active_function_package(0);
5816 	const char *pkgname = NULL;
5817 	int show_MSE = 0;
5818 
5819 	if (pkg != NULL) {
5820 	    pkgname = function_package_get_name(pkg);
5821 	}
5822 	if (pkgname != NULL && !strcmp(pkgname, "fcModels") &&
5823 	    function_package_get_version(pkg) <= 1.1) {
5824 	    show_MSE = 1;
5825 	}
5826 
5827 	ME /= T;
5828 	MSE /= T;
5829 	MAE /= T;
5830 	if (!isnan(MPE)) {
5831 	    MPE /= T;
5832 	}
5833 	if (!isnan(MAPE)) {
5834 	    MAPE /= T;
5835 	}
5836 	if (do_U2) {
5837 	    if (!isnan(U) && u[1] > 0.0) {
5838 		U = sqrt(u[0] / T) / sqrt(u[1] / T);
5839 	    }
5840 	} else {
5841 	    Unum = sqrt(Unum/T);
5842 	    Uden1 = sqrt(Uden1/T);
5843 	    Uden2 = sqrt(Uden2/T);
5844 	    U = Unum / (Uden1+ Uden2);
5845 	}
5846 	gretl_matrix_set(m, 0, col, ME);
5847 	if (show_MSE) {
5848 	    gretl_matrix_set(m, 1, col, MSE);
5849 	} else {
5850 	    gretl_matrix_set(m, 1, col, sqrt(MSE));
5851 	}
5852 	gretl_matrix_set(m, 2, col, MAE);
5853 	gretl_matrix_set(m, 3, col, MPE);
5854 	gretl_matrix_set(m, 4, col, MAPE);
5855 	gretl_matrix_set(m, 5, col, U);
5856 
5857 	if (do_theil) {
5858 	    double *targ = m->val + col * m->rows + 6;
5859 
5860 	    theil_decomp(targ, MSE, y, f, T);
5861 	}
5862     }
5863 
5864     return err;
5865 }
5866 
add_fcstats_rownames(gretl_matrix * m,gretlopt opt)5867 static void add_fcstats_rownames (gretl_matrix *m,
5868 				  gretlopt opt)
5869 {
5870     const char *S[] = {
5871 	"ME", "RMSE", "MAE", "MPE", "MAPE",
5872 	"U1", "UM", "UR", "UD"
5873     };
5874     int i, ns = m->rows;
5875     char **rownames = strings_array_new(ns);
5876 
5877     if (rownames != NULL) {
5878 	for (i=0; i<ns; i++) {
5879 	    if (i == 5 && (opt & OPT_T)) {
5880 		rownames[5] = gretl_strdup("U2");
5881 	    } else {
5882 		rownames[i] = gretl_strdup(S[i]);
5883 	    }
5884 	}
5885 	gretl_matrix_set_rownames(m, rownames);
5886     }
5887 }
5888 
fcstats_sample_check(const double * y,const double * f,int * pt1,int * pt2,int * nmiss)5889 static int fcstats_sample_check (const double *y,
5890 				 const double *f,
5891 				 int *pt1,
5892 				 int *pt2,
5893 				 int *nmiss)
5894 {
5895     int t1 = *pt1;
5896     int t2 = *pt2;
5897     int t, err = 0;
5898 
5899     for (t=t1; t<=t2; t++) {
5900 	if (na(y[t]) || na(f[t])) {
5901 	    t1++;
5902 	} else {
5903 	    break;
5904 	}
5905     }
5906 
5907     for (t=t2; t>=t1; t--) {
5908 	if (na(y[t]) || na(f[t])) {
5909 	    t2--;
5910 	} else {
5911 	    break;
5912 	}
5913     }
5914 
5915     if (t2 - t1 + 1 < 1) {
5916 	err = E_MISSDATA;
5917     } else if (nmiss != NULL) {
5918 	/* allow internal NAs */
5919 	for (t=t1; t<=t2; t++) {
5920 	    if (na(y[t]) || na(f[t])) {
5921 		*nmiss += 1;
5922 	    }
5923 	}
5924     } else {
5925 	/* flag error on internal NAs */
5926 	for (t=t1; t<=t2; t++) {
5927 	    if (na(y[t]) || na(f[t])) {
5928 		err = E_MISSDATA;
5929 		break;
5930 	    }
5931 	}
5932     }
5933 
5934     *pt1 = t1;
5935     *pt2 = t2;
5936 
5937     return err;
5938 }
5939 
5940 /*
5941    Forecast evaluation statistics: @y is the data series, @f the
5942    forecast.
5943 
5944    cf. http://www.economicsnetwork.ac.uk/showcase/cook_forecast
5945    by Steven Cook of Swansea University <s.cook@Swansea.ac.uk>
5946 
5947    OPT_D indicates that we should include the Theil decomposition.
5948    OPT_O allows missing values (which will be skipped).
5949    OPT_T indicates that the data are time series.
5950 */
5951 
forecast_stats(const double * y,const double * f,int t1,int t2,int * n_used,gretlopt opt,int * err)5952 gretl_matrix *forecast_stats (const double *y, const double *f,
5953 			      int t1, int t2, int *n_used,
5954 			      gretlopt opt, int *err)
5955 {
5956     gretl_matrix *m = NULL;
5957     int rows = 6;
5958     int nmiss = 0;
5959 
5960     if (opt & OPT_O) {
5961 	*err = fcstats_sample_check(y, f, &t1, &t2, &nmiss);
5962     } else {
5963 	*err = fcstats_sample_check(y, f, &t1, &t2, NULL);
5964     }
5965     if (*err) {
5966 	return NULL;
5967     }
5968 
5969     if (opt & OPT_D) {
5970 	if (nmiss == 0) {
5971 	    /* extra rows for Theil decomp */
5972 	    rows = 9;
5973 	} else {
5974 	    /* scrub the Theil decomp option */
5975 	    opt &= ~OPT_D;
5976 	}
5977     }
5978 
5979     m = gretl_column_vector_alloc(rows);
5980 
5981     if (m == NULL) {
5982 	*err = E_ALLOC;
5983     } else {
5984 	int T = t2 - t1 + 1;
5985 
5986 	*err = fill_fcstats_column(m, y + t1, f + t1, T, opt, 0);
5987     }
5988 
5989     if (*err) {
5990 	gretl_matrix_free(m);
5991 	m = NULL;
5992     } else {
5993 	if (n_used != NULL) {
5994 	    *n_used = t2 - t1 + 1 - nmiss;
5995 	}
5996 	add_fcstats_rownames(m, opt);
5997     }
5998 
5999     return m;
6000 }
6001 
matrix_fc_stats(const double * y,const gretl_matrix * F,gretlopt opt,int * err)6002 gretl_matrix *matrix_fc_stats (const double *y,
6003 			       const gretl_matrix *F,
6004 			       gretlopt opt,
6005 			       int *err)
6006 {
6007     int ns = (opt & OPT_D)? 9 : 6;
6008     gretl_matrix *m = NULL;
6009     const double *f;
6010     int j, T = F->rows;
6011 
6012     m = gretl_matrix_alloc(ns, F->cols);
6013     if (m == NULL) {
6014 	*err = E_ALLOC;
6015 	return NULL;
6016     }
6017 
6018     for (j=0; j<F->cols; j++) {
6019 	f = F->val + T * j;
6020 	*err = fill_fcstats_column(m, y, f, T, opt, j);
6021 	if (*err) {
6022 	    break;
6023 	}
6024     }
6025 
6026     if (*err) {
6027 	gretl_matrix_free(m);
6028 	m = NULL;
6029     } else {
6030 	add_fcstats_rownames(m, opt);
6031     }
6032 
6033     return m;
6034 }
6035 
6036 /* Remove "repeats" from Nelson-Aalen or Kaplan-Meier output matrix:
6037    these will be identified by all-zero rows, since we defer entering
6038    the calculated values until we reach the end of a sequence of
6039    repeated duration values.
6040 */
6041 
durations_squeeze(gretl_matrix ** pA,gretl_matrix ** pTmp,int n,int repeats)6042 static int durations_squeeze (gretl_matrix **pA, gretl_matrix **pTmp,
6043 			      int n, int repeats)
6044 {
6045     gretl_matrix *T, *A = *pA;
6046     gretl_matrix *Tmp = *pTmp;
6047     double aij;
6048     int m = n - repeats;
6049     int i, j, err;
6050 
6051     err = gretl_matrix_realloc(Tmp, m, 3);
6052     if (err) {
6053 	return err;
6054     }
6055 
6056     j = 0;
6057     for (i=0; i<n; i++) {
6058 	aij = gretl_matrix_get(A, i, 0);
6059 	if (aij > 0) {
6060 	    /* got some actual values: transcribe */
6061 	    gretl_matrix_set(Tmp, j, 0, aij);
6062 	    aij = gretl_matrix_get(A, i, 1);
6063 	    gretl_matrix_set(Tmp, j, 1, aij);
6064 	    aij = gretl_matrix_get(A, i, 2);
6065 	    gretl_matrix_set(Tmp, j, 2, aij);
6066 	    j++;
6067 	}
6068     }
6069 
6070     /* swap matrix pointers */
6071     T = *pA;
6072     *pA = Tmp;
6073     *pTmp = T;
6074 
6075     return err;
6076 }
6077 
6078 #define cmissing(c,t) (c != NULL && na(c[t]))
6079 
6080 /*
6081   Given a series of duration values @y and (possibly) a series of
6082   censoring values @cens (zero for uncensored, non-zero for censored),
6083   computes either the Nelson-Aalen estimator of the cumulative hazard
6084   function OR (given OPT_K) the Kaplan-Meier estimate of the survival
6085   function.
6086 
6087   Th return value is a matrix with the (unique) duration value in the
6088   first column, the estimator in the second and its standard error
6089   in the third. Any repeated duration values in the input data are
6090   squeezed out.
6091 
6092   On Nelson-Aalen, the piece by O. Borgan at
6093   http://www.med.mcgill.ca/epidemiology/hanley/c609/material/NelsonAalenEstimator.pdf
6094   is quite helpful, and on Kaplan-Meier see
6095   www.public.iastate.edu/~kkoehler/stat565/kaplanmeier.4page.pdf
6096 */
6097 
duration_func(const double * y,const double * cens,int t1,int t2,gretlopt opt,int * err)6098 gretl_matrix *duration_func (const double *y, const double *cens,
6099 			     int t1, int t2, gretlopt opt,
6100 			     int *err)
6101 {
6102     gretl_matrix *Tmp = NULL;
6103     gretl_matrix *M = NULL;
6104     gretl_matrix *A = NULL;
6105     double yi, ai, aibak;
6106     double vaibak, vai = 0;
6107     int kmeier = (opt & OPT_K);
6108     int nmax = t2 - t1 + 1;
6109     int i, t, n = nmax;
6110     int repeats = 0;
6111     int d, r, ni, ibak;
6112 
6113     for (t=t1; t<=t2; t++) {
6114 	if (na(y[t]) || cmissing(cens, t)) {
6115 	    n--;
6116 	}
6117     }
6118 
6119     if (n < 1) {
6120 	*err = E_TOOFEW;
6121 	return NULL;
6122     }
6123 
6124     Tmp = gretl_zero_matrix_new(n, 2);
6125     A   = gretl_zero_matrix_new(n, 3);
6126     if (Tmp == NULL || A == NULL) {
6127 	*err = E_ALLOC;
6128 	goto bailout;
6129     }
6130 
6131     i = 0;
6132     for (t=t1; t<=t2; t++) {
6133 	if (n < nmax && (na(y[t]) || cmissing(cens, t))) {
6134 	    continue;
6135 	}
6136 	gretl_matrix_set(Tmp, i, 0, y[t]);
6137 	if (cens != NULL) {
6138 	    gretl_matrix_set(Tmp, i, 1, cens[t]);
6139 	}
6140 	i++;
6141     }
6142 
6143     M = gretl_matrix_sort_by_column(Tmp, 0, err);
6144     if (*err) {
6145 	goto bailout;
6146     }
6147 
6148     r = n;
6149     ibak = -1;
6150 
6151     for (i=0; i<n; i++) {
6152 	/* if not censored, increment d count */
6153 	d = gretl_matrix_get(M, i, 1) == 0;
6154 	/* start count of "drop-outs" */
6155 	ni = 1;
6156 	/* record current y value */
6157 	yi = gretl_matrix_get(M, i, 0);
6158 	while (i < n-1) {
6159 	    if (gretl_matrix_get(M, i+1, 0) != yi) {
6160 		break;
6161 	    }
6162 	    i++;
6163 	    d += gretl_matrix_get(M, i, 1) == 0;
6164 	    ni++;
6165 	    repeats++;
6166 	}
6167 	if (kmeier) {
6168 	    /* survival */
6169 	    ai = (r - d) / (double) r;
6170 	    vai = d / (r * ((double) r - d));
6171 	} else {
6172 	    /* hazard */
6173 	    ai = d / (double) r;
6174 	    vai = ((r - d) * d) / ((r - 1) * (double) r*r);
6175 	}
6176 #if 0
6177 	fprintf(stderr, "i=%d, y=%g, d=%d, r=%d\n", i, yi, d, r);
6178 #endif
6179 	gretl_matrix_set(A, i, 0, yi);
6180 	if (ibak >= 0) {
6181 	    aibak = gretl_matrix_get(A, ibak, 1);
6182 	    vaibak = gretl_matrix_get(A, ibak, 2);
6183 	    if (kmeier) {
6184 		gretl_matrix_set(A, i, 1, aibak * ai);
6185 		gretl_matrix_set(A, i, 2, vaibak + vai);
6186 	    } else {
6187 		gretl_matrix_set(A, i, 1, aibak + ai);
6188 		gretl_matrix_set(A, i, 2, vaibak + vai);
6189 	    }
6190 	} else {
6191 	    gretl_matrix_set(A, i, 1, ai);
6192 	    gretl_matrix_set(A, i, 2, vai);
6193 	}
6194 	/* decrement number "at risk" */
6195 	r -= ni;
6196 	ibak = i;
6197     }
6198 
6199     if (kmeier) {
6200 	/* compute variance using @vai cumulator */
6201 	for (i=0; i<n; i++) {
6202 	    ai  = gretl_matrix_get(A, i, 1);
6203 	    vai = gretl_matrix_get(A, i, 2);
6204 	    gretl_matrix_set(A, i, 2, sqrt(ai * ai * vai));
6205 	}
6206     } else {
6207 	/* Nelson-Aalen: convert to std error */
6208 	for (i=0; i<n; i++) {
6209 	    vai = gretl_matrix_get(A, i, 2);
6210 	    gretl_matrix_set(A, i, 2, sqrt(vai));
6211 	}
6212     }
6213 
6214 #if 0
6215     gretl_matrix_print(A, "A");
6216 #endif
6217 
6218     if (repeats > 0) {
6219 	*err = durations_squeeze(&A, &Tmp, n, repeats);
6220     }
6221 
6222  bailout:
6223 
6224     if (*err) {
6225 	gretl_matrix_free(A);
6226 	A = NULL;
6227     }
6228     gretl_matrix_free(Tmp);
6229     gretl_matrix_free(M);
6230 
6231     return A;
6232 }
6233 
gretl_round(double x)6234 double gretl_round (double x)
6235 {
6236     double fx = floor(x);
6237 
6238     if (x < 0) {
6239 	return (x - fx <= 0.5)? fx : ceil(x);
6240     } else {
6241 	return (x - fx < 0.5)? fx : ceil(x);
6242     }
6243 }
6244 
gretl_sgn(double x)6245 double gretl_sgn (double x)
6246 {
6247     if (x == 0) {
6248 	return 0;
6249     } else if (x > 0) {
6250 	return 1;
6251     } else if (x < 0) {
6252 	return -1;
6253     } else {
6254 	return NADBL;
6255     }
6256 }
6257 
6258 #define neg_x_real_v_err(t) (t == 'J' || t == 'I')
6259 
6260 /* evaluates bessel function for scalar nu and x */
6261 
gretl_bessel(char type,double v,double x,int * err)6262 double gretl_bessel (char type, double v, double x, int *err)
6263 {
6264     if (na(x) || na(v)) {
6265 	return NADBL;
6266     }
6267 
6268     if (x < 0) {
6269 	/* catch invalid cases for x < 0 */
6270 	if (type == 'K') {
6271 	    *err = E_INVARG;
6272 	    return NADBL;
6273 	} else if (v != floor(v) && (type == 'J' || type == 'I')) {
6274 	    *err = E_INVARG;
6275 	    return NADBL;
6276 	}
6277     }
6278 
6279     switch (type) {
6280     case 'J':
6281 	return cephes_bessel_Jv(v, x);
6282     case 'Y':
6283 	return cephes_bessel_Yv(v, x);
6284     case 'I':
6285 	if (v == 0) {
6286 	    return cephes_bessel_I0(x);
6287 	} else if (v == 1) {
6288 	    return cephes_bessel_I1(x);
6289 	} else if (v > 0) {
6290 	    return cephes_bessel_Iv(v, x);
6291 	} else {
6292 	    /* cephes_bessel_Iv is not right for v < 0 */
6293 	    double b1 = netlib_bessel_K(-v, x, 1);
6294 	    double b2 = cephes_bessel_Iv(-v, x);
6295 
6296 	    return (2*b1*sin(-v*M_PI)) / M_PI + b2;
6297 	}
6298 	break;
6299     case 'K':
6300 	/* bessel K is symmetric around v = 0 */
6301 	v = fabs(v);
6302 	if (v == 0) {
6303 	    return cephes_bessel_K0(x);
6304 	} else if (v == 1) {
6305 	    return cephes_bessel_K1(x);
6306 	} else if (v == floor(v) && v <= 30.0) {
6307 	    /* cephes doesn't do non-integer v, and also loses
6308 	       accuracy beyond |v| = 30
6309 	    */
6310 	    return cephes_bessel_Kn(v, x);
6311 	} else {
6312 	    /* accurate but expensive */
6313 	    return netlib_bessel_K(v, x, 1);
6314 	}
6315 	break;
6316     default:
6317 	/* unknown function type */
6318 	return NADBL;
6319     }
6320 }
6321 
6322 /* Net Present Value: note that the first value is taken as
6323    occurring "now" and is not discounted */
6324 
gretl_npv(int t1,int t2,const double * x,double r,int pd,int * err)6325 double gretl_npv (int t1, int t2, const double *x, double r,
6326 		  int pd, int *err)
6327 {
6328     double d, PV = 0.0;
6329     int i, n = 0;
6330 
6331     if (pd != 1 && pd != 4 && pd != 12) {
6332 	*err = E_PDWRONG;
6333 	return NADBL;
6334     }
6335 
6336     if (pd == 1) {
6337 	d = 1 + r;
6338     } else if (r < -1.0) {
6339 	*err = E_NAN;
6340 	return 0.0 / 0.0;
6341     } else {
6342 	d = pow(1 + r, 1.0 / pd);
6343     }
6344 
6345     for (i=t1; i<=t2; i++) {
6346 	if (!na(x[i])) {
6347 	    PV += x[i] / (pow(d, i-t1));
6348 	    n++;
6349 	}
6350     }
6351 
6352     if (n == 0) {
6353 	PV = NADBL;
6354     }
6355 
6356     return PV;
6357 }
6358 
6359 /* Internal Rate of Return */
6360 
gretl_irr(const double * x,int n,int pd,int * err)6361 double gretl_irr (const double *x, int n, int pd, int *err)
6362 {
6363     double PV, r, r1 = 0.02, r0 = -0.02;
6364     int gotplus = 0, gotminus = 0;
6365     int i, m = n;
6366 
6367     for (i=0; i<n; i++) {
6368 	if (na(x[i])) {
6369 	    m--;
6370 	} else if (x[i] > 0) {
6371 	    gotplus = 1;
6372 	} else if (x[i] < 0) {
6373 	    gotminus = 1;
6374 	}
6375     }
6376 
6377     if (!gotplus && !gotminus) {
6378 	/* null payment stream */
6379 	return (m > 0)? 0 : NADBL;
6380     }
6381 
6382     if (gotplus && !gotminus) {
6383 	/* payments all positive */
6384 	return (x[0] > 0)? 0.0/0.0 : 1.0/0.0;
6385     } else if (gotminus && !gotplus) {
6386 	/* payments all negative */
6387 	return (x[0] < 0)? 0.0/0.0 : -1.0/0.0;
6388     }
6389 
6390     /* find (r0, r1) bracket for solution, if possible */
6391 
6392     while ((PV = gretl_npv(0, n-1, x, r0, pd, err)) < 0 && !*err) {
6393 	if (r0 < -DBL_MAX / 2.0) {
6394 	    return -1.0/0.0;
6395 	}
6396 	r1 = r0;
6397 	r0 *= 2.0;
6398     }
6399 
6400     while ((PV = gretl_npv(0, n-1, x, r1, pd, err)) > 0 && !*err) {
6401 	if (r1 > DBL_MAX / 2.0) {
6402 	    return 1.0/0.0;
6403 	}
6404 	r0 = r1;
6405 	r1 *= 2.0;
6406     }
6407 
6408 #if 0
6409     fprintf(stderr, "initial bracket for r: %g to %g\n", r0, r1);
6410 #endif
6411 
6412     r = r1;
6413 
6414     /* now do binary search */
6415 
6416     for (i=0; i<32 && !*err; i++) {
6417 	if (floateq(PV, 0.0)) {
6418 	    break;
6419 	}
6420 	if (PV < 0) {
6421 	    /* r is too high */
6422 	    if (r < r1) {
6423 		r1 = r;
6424 	    }
6425 	    r = (r + r0) / 2.0;
6426 	} else {
6427 	    /* r too low */
6428 	    if (r > r0) {
6429 		r0 = r;
6430 	    }
6431 	    r = (r + r1) / 2.0;
6432 	}
6433 	PV = gretl_npv(0, n-1, x, r, pd, err);
6434 #if 0
6435 	fprintf(stderr, "binary search: r = %.9g, PV = %g\n", r, PV);
6436 #endif
6437     }
6438 
6439     if (*err) {
6440 	r = NADBL;
6441     }
6442 
6443     return r;
6444 }
6445 
logistic_cdf(double x)6446 double logistic_cdf (double x)
6447 {
6448     double emx, ret;
6449 
6450     errno = 0;
6451 
6452     emx = exp(-x);
6453     if (errno == ERANGE) {
6454 	ret = (x > 0)? 1.0 : 0.0;
6455 	errno = 0;
6456     } else {
6457 	ret = 1.0 / (1.0 + emx);
6458     }
6459 
6460     return ret;
6461 }
6462 
6463 /* Convert from @x or @list (whichever is non-NULL) to matrix,
6464    respecting the current sample range. If @cfac > 1 we take
6465    only every cfac-th row from the data source.
6466 */
6467 
tdisagg_matrix_from_series(const double * x,int xnum,const int * list,const DATASET * dset,int cfac,int * err)6468 gretl_matrix *tdisagg_matrix_from_series (const double *x,
6469 					  int xnum,
6470 					  const int *list,
6471 					  const DATASET *dset,
6472 					  int cfac, int *err)
6473 {
6474     gretl_matrix *m = NULL;
6475     char **S = NULL;
6476     int k = list != NULL ? list[0] : 1;
6477     int T = dset->t2 - dset->t1 + 1;
6478     int i, s, t;
6479     int serr = 0;
6480 
6481     if (cfac > 1) {
6482 	int rem = T % cfac;
6483 
6484 	T = T / cfac + (rem > 0);
6485     }
6486 
6487     m = gretl_matrix_alloc(T, k);
6488     if (m == NULL) {
6489 	return NULL;
6490     }
6491 
6492     if (list != NULL) {
6493 	for (i=0; i<k; i++) {
6494 	    x = dset->Z[list[i+1]];
6495 	    s = dset->t1;
6496 	    for (t=0; t<T; t++) {
6497 		gretl_matrix_set(m, t, i, x[s]);
6498 		s += cfac;
6499 	    }
6500 	}
6501 	S = gretl_list_get_names_array(list, dset, &serr);
6502     } else {
6503 	s = dset->t1;
6504 	for (t=0; t<T; t++) {
6505 	    m->val[t] = x[s];
6506 	    s += cfac;
6507 	}
6508 	if (xnum > 0) {
6509 	    S = strings_array_new(1);
6510 	    if (S != NULL) {
6511 		S[0] = gretl_strdup(dset->varname[xnum]);
6512 	    }
6513 	}
6514     }
6515 
6516     gretl_matrix_set_t1(m, dset->t1);
6517     gretl_matrix_set_t2(m, dset->t2);
6518 
6519     if (S != NULL) {
6520 	gretl_matrix_set_colnames(m, S);
6521     }
6522 
6523     return m;
6524 }
6525 
6526 /**
6527  * matrix_tdisagg:
6528  * @Y: T x g: holds the original data to be disaggregated, series
6529  * in columns.
6530  * @X: (optionally) holds covariates of Y at the higher frequency.
6531  * @s: the expansion factor: e.g. 3 for quarterly to monthly,
6532  * 4 for annual to quarterly, 12 for annual to monthly.
6533  * @b: bundle containing optional arguments, or NULL.
6534  * @r: bundle for retrieving details, or NULL.
6535  * @err: location to receive error code.
6536  *
6537  * Temporal disaggregation, via regression or Denton-type method.
6538  *
6539  * Returns: matrix containing the disaggregated series, or
6540  * NULL on failure.
6541  */
6542 
matrix_tdisagg(const gretl_matrix * Y,const gretl_matrix * X,int s,gretl_bundle * b,gretl_bundle * res,DATASET * dset,PRN * prn,int * err)6543 gretl_matrix *matrix_tdisagg (const gretl_matrix *Y,
6544 			      const gretl_matrix *X,
6545 			      int s, gretl_bundle *b,
6546 			      gretl_bundle *res,
6547 			      DATASET *dset,
6548 			      PRN *prn, int *err)
6549 {
6550     gretl_matrix *(*tdisagg) (const gretl_matrix *,
6551 			      const gretl_matrix *,
6552 			      int, gretl_bundle *,
6553 			      gretl_bundle *,
6554 			      DATASET *,
6555 			      PRN *, int *);
6556     gretl_matrix *ret = NULL;
6557 
6558     if (s <= 1) {
6559 	*err = E_INVARG;
6560 	return NULL;
6561     }
6562 
6563     if (X != NULL) {
6564 	double xs = X->rows / (double) Y->rows;
6565 
6566 	if (xs < s) {
6567 	    *err = E_INVARG;
6568 	    return NULL;
6569 	} else if (X->is_complex) {
6570 	    *err = E_CMPLX;
6571 	    return NULL;
6572 	}
6573     }
6574 
6575     tdisagg = get_plugin_function("time_disaggregate");
6576 
6577     if (tdisagg == NULL) {
6578 	*err = E_FOPEN;
6579     } else {
6580 	ret = (*tdisagg) (Y, X, s, b, res, dset, prn, err);
6581     }
6582 
6583     return ret;
6584 }
6585 
6586 /* For backward compatibility only: support the old chowlin()
6587    function, with "avg" as default aggregation type.
6588 */
6589 
matrix_chowlin(const gretl_matrix * Y,const gretl_matrix * X,int s,int * err)6590 gretl_matrix *matrix_chowlin (const gretl_matrix *Y,
6591 			      const gretl_matrix *X,
6592 			      int s, int *err)
6593 {
6594     gretl_bundle *b = gretl_bundle_new();
6595     gretl_matrix *m = NULL;
6596 
6597     gretl_bundle_set_string(b, "aggtype", "avg");
6598     m = matrix_tdisagg(Y, X, s, b, NULL, NULL, NULL, err);
6599     gretl_bundle_destroy(b);
6600 
6601     return m;
6602 }
6603 
list_ok_dollar_vars(DATASET * dset,PRN * prn)6604 int list_ok_dollar_vars (DATASET *dset, PRN *prn)
6605 {
6606     int nm = 0;
6607     int i;
6608 
6609     pprintf(prn, "\n%s\n", _("model-related"));
6610 
6611     for (i=R_MAX+1; i<M_MAX; i++) {
6612 	GretlType type = GRETL_TYPE_NONE;
6613 	double x = NADBL;
6614 	gretl_matrix *m = NULL;
6615 	int *list = NULL;
6616 	int err = 0;
6617 
6618 	if (i < M_SCALAR_MAX) {
6619 	    x = saved_object_get_scalar(NULL, i, dset, &err);
6620 	    if (!na(x)) {
6621 		type = GRETL_TYPE_DOUBLE;
6622 	    }
6623 	} else if (i > M_SCALAR_MAX && i < M_SERIES_MAX) {
6624 	    type = GRETL_TYPE_SERIES;
6625 	    err = saved_object_get_series(NULL, NULL, i, dset);
6626 	    if (err) {
6627 		if (i == M_UHAT || i == M_YHAT || i == M_SIGMA) {
6628 		    /* maybe the result is a matrix? */
6629 		    type = saved_object_get_data_type(NULL, i);
6630 		    if (type == GRETL_TYPE_MATRIX) {
6631 			m = saved_object_get_matrix(NULL, i, &err);
6632 		    }
6633 		}
6634 	    }
6635 	} else if (i > M_SERIES_MAX && i < M_MATRIX_MAX) {
6636 	    type = GRETL_TYPE_MATRIX;
6637 	    m = saved_object_get_matrix(NULL, i, &err);
6638 	} else if (i > M_MATRIX_MAX && i < M_MBUILD_MAX) {
6639 	    type = GRETL_TYPE_MATRIX;
6640 	    m = saved_object_build_matrix(NULL, i,
6641 					  dset, &err);
6642 	} else {
6643 	    type = GRETL_TYPE_LIST;
6644 	    list = saved_object_get_list(NULL, i, &err);
6645 	}
6646 
6647 	if (!err && type != GRETL_TYPE_NONE) {
6648 	    const char *typestr = gretl_type_get_name(type);
6649 
6650 	    if (!na(x)) {
6651 		pprintf(prn, " %s (%s: %g)\n", mvarname(i), typestr, x);
6652 	    } else {
6653 		pprintf(prn, " %s (%s)\n", mvarname(i), typestr);
6654 	    }
6655 	    gretl_matrix_free(m);
6656 	    free(list);
6657 	    nm++;
6658 	}
6659     }
6660 
6661     if (nm == 0) {
6662 	pprintf(prn, " %s\n", _("none"));
6663     }
6664 
6665     pprintf(prn, "\n%s\n", _("other"));
6666 
6667     for (i=1; i<R_SCALAR_MAX; i++) {
6668 	double x = dvar_get_scalar(i, dset);
6669 
6670 	if (!na(x)) {
6671 	    pprintf(prn, " %s (scalar: %g)\n", dvarname(i), x);
6672 	}
6673     }
6674 
6675     pputc(prn, '\n');
6676 
6677     return 0;
6678 }
6679 
nw_kernel(double x)6680 static double nw_kernel (double x)
6681 {
6682     /*
6683        eventually, a libset variable will be used to choose among
6684        various kernels; for now, the Normal density will have to do.
6685     */
6686     double ret, x2 = x*x;
6687 
6688 #if 0
6689     ret = exp(-0.5*x2)/SQRT_2_PI;
6690 #else
6691     ret = exp(-0.5*x2);
6692 #endif
6693 
6694     return ret;
6695 }
6696 
6697 /**
6698  * nadaraya_watson:
6699  * @y: array with "dependent variable"
6700  * @x: array with "explanatory variable"
6701  * @h: double, bandwidth (may be negative; see below)
6702  * @dset: data set information.
6703  * @LOO: Boolean flag (see below)
6704  * @trim: trim parameter (see below)
6705  * @m: array to hold results
6706  *
6707  * Implements the Nadaraya-Watson nonparametric estimator for the
6708  * conditional mean of @y given @x via the formula
6709  *
6710  * \widehat{m}_h(x)=\frac{\sum_{i=1}^n K_h(x-X_i)
6711  * Y_i}{\sum_{i=1}^nK_h(x-X_i)}
6712  *
6713  * and computes it for all elements of @x. Note that, in principle,
6714  * the kernel K(X_i-X_j) must be computed for every combination of i
6715  * and j, but since the function K() is assumed to be symmetric, we
6716  * compute it once to save time.
6717  *
6718  * The scalar @h holds the kernel bandwidth; if @LOO is non-zero the
6719  * "leave-one-out" variant of the estimator (essentially a jackknife
6720  * estimator; see Pagan and Ullah, Nonparametric Econometrics, p. 119)
6721  * is computed.
6722  *
6723  * A rudimentary form of trimming is implemented: the kernel function
6724  * is set to 0 when the product of the @trim parameter times the
6725  * bandwidth @h exceeds |X_i - X_j|, so as to speed computation and
6726  * enhance numerical stability.
6727  *
6728  * Returns: 0 on successful completion, non-zero code on error.
6729  */
6730 
nadaraya_watson(const double * y,const double * x,double h,DATASET * dset,int LOO,double trim,double * m)6731 int nadaraya_watson (const double *y, const double *x, double h,
6732 		     DATASET *dset, int LOO, double trim,
6733 		     double *m)
6734 {
6735     int t, s, err = 0;
6736     int t1 = dset->t1, t2 = dset->t2;
6737     double xt, xs, ys, yt, k;
6738     double *num, *den;
6739     int n = t2 + 1; /* really? */
6740 
6741     if (h < 0.0) {
6742 	return E_DATA;
6743     } else if (h == 0.0) {
6744 	/* automatic data-based bandwidth */
6745 	const double *sampx = x + dset->t1;
6746 	int n = sample_size(dset);
6747 
6748 	h = kernel_bandwidth(sampx, n);
6749     }
6750 
6751     num = malloc(2 * n * sizeof *num);
6752     if (num == NULL) {
6753 	return E_ALLOC;
6754     }
6755 
6756     den = num + n;
6757     trim *= h;
6758 
6759     /* here we initialize numerator and denominator; we use the
6760        "diagonal" in the standard case and 0 in the leave-one-out
6761        case
6762     */
6763 
6764     if (LOO) {
6765 	for (t=t1; t<=t2; t++) {
6766 	    num[t] = den[t] = 0.0;
6767 	}
6768     } else {
6769 	k = nw_kernel(0);
6770 	for (t=t1; t<=t2; t++) {
6771 	    if (!na(y[t])) {
6772 		num[t] = k * y[t];
6773 		den[t] = k;
6774 	    } else {
6775 		num[t] = 0;
6776 		den[t] = 0;
6777 	    }
6778 	}
6779     }
6780 
6781     for (t=t1; t<=t2; t++) {
6782 	xt = x[t];
6783 	if (!na(xt)) {
6784 	    yt = y[t];
6785 	    for (s=t+1; s<=t2; s++) {
6786 		xs = x[s];
6787 		if (!na(xs) && fabs(xs-xt) < trim) {
6788 		    k = nw_kernel((xt - xs)/h);
6789 		    if (!na(yt)) {
6790 			num[s] += k * yt;
6791 			den[s] += k;
6792 		    }
6793 		    ys = y[s];
6794 		    if (!na(ys)) {
6795 			num[t] += k * ys;
6796 			den[t] += k;
6797 		    }
6798 		}
6799 	    }
6800 	    m[t] = num[t] / den[t];
6801 	} else {
6802 	    m[t] = NADBL;
6803 	}
6804     }
6805 
6806     free(num);
6807 
6808     return err;
6809 }
6810 
xy_get_sample(const double * y,const double * x,int * t1,int * t2,int * n)6811 static int xy_get_sample (const double *y, const double *x,
6812 			  int *t1, int *t2, int *n)
6813 {
6814     int t, nxy, err = 0;
6815 
6816     for (t=*t1; t<=*t2; t++) {
6817 	if (na(x[t])) {
6818 	    *t1 += 1;
6819 	} else {
6820 	    break;
6821 	}
6822     }
6823 
6824     for (t=*t2; t>=*t1; t--) {
6825 	if (na(x[t])) {
6826 	    *t2 -= 1;
6827 	} else {
6828 	    break;
6829 	}
6830     }
6831 
6832     /* nxy = the number of points where we have valid values
6833        for both x and y; n = the number of valid x-values
6834     */
6835 
6836     nxy = *n = 0;
6837 
6838     for (t=*t1; t<=*t2; t++) {
6839 	if (!na(x[t])) {
6840 	    *n += 1;
6841 	    if (!na(y[t])) {
6842 		nxy++;
6843 	    }
6844 	}
6845     }
6846 
6847     if (nxy < 16) {
6848 	err = E_TOOFEW;
6849     }
6850 
6851     return err;
6852 }
6853 
get_dataset_t(const double * x,int pos,int t1)6854 static int get_dataset_t (const double *x, int pos, int t1)
6855 {
6856     int t, k = 0;
6857 
6858     for (t=t1; k<=pos; t++) {
6859 	if (!na(x[t])) {
6860 	    if (k == pos) {
6861 		return t;
6862 	    }
6863 	    k++;
6864 	}
6865     }
6866 
6867     return 0;
6868 }
6869 
6870 /* note: the following requires that @y and @x are truly dataset
6871    series; vectors cannot be accepted given the dependence on
6872    dset->t1 and dset->t2 for the range of data to be used
6873 */
6874 
gretl_loess(const double * y,const double * x,int poly_order,double bandwidth,gretlopt opt,DATASET * dset,double * m)6875 int gretl_loess (const double *y, const double *x, int poly_order,
6876 		 double bandwidth, gretlopt opt, DATASET *dset,
6877 		 double *m)
6878 {
6879     gretl_matrix *my, *mx;
6880     gretl_matrix *yh = NULL;
6881     int *s_order = NULL;
6882     int t1 = dset->t1;
6883     int t2 = dset->t2;
6884     int s, t, n;
6885     int err = 0;
6886 
6887     if (poly_order < 0 || poly_order > 2 ||
6888 	bandwidth <= 0 || bandwidth >= 1) {
6889 	return E_DATA;
6890     }
6891 
6892     err = xy_get_sample(y, x, &t1, &t2, &n);
6893     if (err) {
6894 	return err;
6895     }
6896 
6897     /* note: n holds the number of non-missing observations
6898        on x; the associated y-value may or may not be
6899        missing
6900     */
6901 
6902     my = gretl_column_vector_alloc(n);
6903     mx = gretl_column_vector_alloc(n);
6904 
6905     if (my == NULL || mx == NULL) {
6906 	err = E_ALLOC;
6907     } else {
6908 	s = 0;
6909 	for (t=t1; t<=t2; t++) {
6910 	    if (!na(x[t])) {
6911 		my->val[s] = y[t];
6912 		mx->val[s] = x[t];
6913 		s++;
6914 	    }
6915 	}
6916 	/* sort the points by the value of x */
6917 	err = sort_pairs_by_x(mx, my, &s_order, NULL);
6918     }
6919 
6920     if (!err) {
6921 	yh = loess_fit(mx, my, poly_order, bandwidth, opt, &err);
6922     }
6923 
6924     if (!err) {
6925 	/* put the yh values into dataset order */
6926 	for (s=0; s<n; s++) {
6927 	    t = get_dataset_t(x, s_order[s], t1);
6928 	    m[t] = yh->val[s];
6929 	}
6930     }
6931 
6932     gretl_matrix_free(my);
6933     gretl_matrix_free(mx);
6934     gretl_matrix_free(yh);
6935     free(s_order);
6936 
6937     return err;
6938 }
6939 
series_get_nobs(int t1,int t2,const double * x)6940 double series_get_nobs (int t1, int t2, const double *x)
6941 {
6942     int t, n = 0;
6943 
6944     for (t=t1; t<=t2; t++) {
6945 	if (!na(x[t])) n++;
6946     }
6947 
6948     return n;
6949 }
6950 
series_sum_all(int t1,int t2,const double * x)6951 double series_sum_all (int t1, int t2, const double *x)
6952 {
6953     double xsum = 0.0;
6954     int t;
6955 
6956     for (t=t1; t<=t2; t++) {
6957 	if (na(x[t])) {
6958 	    xsum = NADBL;
6959 	    break;
6960 	} else {
6961 	    xsum += x[t];
6962 	}
6963     }
6964 
6965     return xsum;
6966 }
6967 
delete_null_cases(gretl_matrix * m,int keeprows,int * err)6968 static gretl_matrix *delete_null_cases (gretl_matrix *m,
6969 					int keeprows,
6970 					int *err)
6971 {
6972     gretl_matrix *ret;
6973 
6974     ret = gretl_matrix_alloc(keeprows, m->cols);
6975     if (ret == NULL) {
6976 	*err = E_ALLOC;
6977     } else {
6978 	double x;
6979 	int i, j;
6980 
6981 	for (i=0; i<keeprows; i++) {
6982 	    for (j=0; j<m->cols; j++) {
6983 		x = gretl_matrix_get(m, i, j);
6984 		gretl_matrix_set(ret, i, j, x);
6985 	    }
6986 	}
6987     }
6988 
6989     gretl_matrix_free(m);
6990 
6991     return ret;
6992 }
6993 
real_aggregate_by(const double * x,const double * y,const int * xlist,const int * ylist,const DATASET * dset,double * tmp,double (* builtin)(),gchar * usercall,DATASET * tmpset,int just_count,int * err)6994 static gretl_matrix *real_aggregate_by (const double *x,
6995 					const double *y,
6996 					const int *xlist,
6997 					const int *ylist,
6998 					const DATASET *dset,
6999 					double *tmp,
7000 					double (*builtin)(),
7001 					gchar *usercall,
7002 					DATASET *tmpset,
7003 					int just_count,
7004 					int *err)
7005 {
7006     gretl_matrix *m = NULL;
7007     gretl_matrix **listvals;
7008     gretl_matrix *yvals;
7009     int n = sample_size(dset);
7010     int match, skipnull = 0;
7011     int countcol = 1;
7012     int maxcases;
7013     int ny, nx, mcols;
7014     int *idx;
7015     double *valvec;
7016     double fx;
7017     int i, j, k, t, ni, ii;
7018 
7019     /* note:
7020        - to skip null cases in the output matrix, set skipnull = 1
7021        - to eliminate the case-count column, set countcol = 0
7022     */
7023 
7024     ny = ylist == NULL ? 1 : ylist[0];
7025 
7026     valvec = malloc(ny * sizeof *valvec);
7027     idx = malloc(ny * sizeof *idx);
7028     listvals = malloc(ny * sizeof *listvals);
7029 
7030     if (valvec == NULL || idx == NULL || listvals == NULL) {
7031 	*err = E_ALLOC;
7032 	goto bailout;
7033     }
7034 
7035     for (j=0; j<ny; j++) {
7036 	listvals[j] = NULL;
7037     }
7038 
7039     /* For @y (or each member of @ylist), create a vector holding
7040        its distinct values. As we go, count the combinations of
7041        y-values and initialize the valvec and idx arrays.
7042     */
7043 
7044     maxcases = 1;
7045     for (j=0; j<ny && !*err; j++) {
7046 	if (ylist != NULL) {
7047 	    y = dset->Z[ylist[j+1]] + dset->t1;
7048 	}
7049 	listvals[j] = gretl_matrix_values(y, n, OPT_S, err);
7050 	if (!*err) {
7051 	    maxcases *= listvals[j]->rows;
7052 	    valvec[j] = listvals[j]->val[0];
7053 	    idx[j] = 0;
7054 	}
7055     }
7056 
7057     if (just_count) {
7058 	x = NULL;
7059 	xlist = NULL;
7060 	skipnull = 0;
7061 	countcol = 0;
7062 	nx = 1;
7063     } else {
7064 	nx = xlist == NULL ? 1 : xlist[0];
7065     }
7066 
7067     mcols = ny + nx + countcol;
7068 
7069     /* Allocate a matrix with enough rows to hold all the y-value
7070        combinations (maxcases) and enough columns to hold a
7071        record of the y values, a count of matching cases, and the
7072        value(s) of f(x).
7073     */
7074 
7075     if (!*err) {
7076 	m = gretl_zero_matrix_new(maxcases, mcols);
7077 	if (m == NULL) {
7078 	    *err = E_ALLOC;
7079 	}
7080     }
7081 
7082     if (*err) {
7083 	goto bailout;
7084     }
7085 
7086     ii = 0;
7087 
7088     for (i=0; i<maxcases; i++) {
7089 	ni = 0;
7090 	for (k=0; k<nx && !*err; k++) {
7091 	    /* fill tmp with x for {y} == {yi} */
7092 	    if (xlist != NULL) {
7093 		x = dset->Z[xlist[k+1]] + dset->t1;
7094 	    }
7095 	    ni = 0;
7096 	    for (t=0; t<n; t++) {
7097 		match = 1;
7098 		for (j=0; j<ny; j++) {
7099 		    if (ylist != NULL) {
7100 			y = dset->Z[ylist[j+1]] + dset->t1;
7101 		    }
7102 		    if (y[t] != valvec[j]) {
7103 			match = 0;
7104 			break;
7105 		    }
7106 		}
7107 		if (match) {
7108 		    if (x != NULL) {
7109 			tmp[ni] = x[t];
7110 		    }
7111 		    ni++;
7112 		}
7113 	    }
7114 	    if (just_count) {
7115 		gretl_matrix_set(m, ii, ny, ni);
7116 		break;
7117 	    } else if (ni == 0 && skipnull) {
7118 		/* exclude cases where the obs count is 0 */
7119 		break;
7120 	    } else {
7121 		/* aggregate x at current y values */
7122 		if (builtin != NULL) {
7123 		    fx = (*builtin)(0, ni-1, tmp);
7124 		} else {
7125 		    tmpset->t2 = ni-1;
7126 		    fx = generate_scalar(usercall, tmpset, err);
7127 		}
7128 		gretl_matrix_set(m, ii, ny+k+countcol, fx);
7129 	    }
7130 	}
7131 
7132 	if (ni > 0 || !skipnull) {
7133 	    for (j=0; j<ny; j++) {
7134 		gretl_matrix_set(m, ii, j, valvec[j]);
7135 	    }
7136 	    if (countcol) {
7137 		gretl_matrix_set(m, ii, ny, ni);
7138 	    }
7139 	    ii++;
7140 	}
7141 
7142 	if (*err) {
7143 	    break;
7144 	}
7145 
7146 	/* set up the next y-values array */
7147 	for (j=ny-1; j>=0; j--) {
7148 	    yvals = listvals[j];
7149 	    if (idx[j] == yvals->rows - 1) {
7150 		/* restart the index at this level and pass
7151 		   the buck */
7152 		idx[j] = 0;
7153 		valvec[j] = yvals->val[0];
7154 	    } else {
7155 		/* increment the index at this level and
7156 		   we're done */
7157 		idx[j] += 1;
7158 		valvec[j] = yvals->val[idx[j]];
7159 		break;
7160 	    }
7161 	}
7162     }
7163 
7164     if (skipnull && ii < maxcases) {
7165 	/* the matrix contains some null cases: shrink it */
7166 	m = delete_null_cases(m, ii, err);
7167     }
7168 
7169  bailout:
7170 
7171     free(valvec);
7172     free(idx);
7173 
7174     if (listvals != NULL) {
7175 	for (j=0; j<ny; j++) {
7176 	    gretl_matrix_free(listvals[j]);
7177 	}
7178 	free(listvals);
7179     }
7180 
7181     return m;
7182 }
7183 
aggr_add_colnames(gretl_matrix * m,const int * ylist,const int * xlist,const DATASET * dset,int just_count)7184 static void aggr_add_colnames (gretl_matrix *m,
7185 			       const int *ylist,
7186 			       const int *xlist,
7187 			       const DATASET *dset,
7188 			       int just_count)
7189 {
7190     char **S = NULL;
7191     int i, j, n = m->cols;
7192     int err = 0;
7193 
7194     S = strings_array_new(n);
7195     if (S == NULL) {
7196 	return;
7197     }
7198 
7199     j = 0;
7200 
7201     if (ylist != NULL && ylist[0] > 0) {
7202 	char **Sy = gretl_list_get_names_array(ylist, dset, &err);
7203 
7204 	if (!err) {
7205 	    for (i=0; i<ylist[0]; i++) {
7206 		S[j++] = Sy[i];
7207 		Sy[i] = NULL;
7208 	    }
7209 	    free(Sy);
7210 	}
7211     } else {
7212 	S[j++] = gretl_strdup("byvar");
7213     }
7214 
7215     if (!err) {
7216 	S[j++] = gretl_strdup("count");
7217     }
7218 
7219     if (!err && j < n) {
7220 	if (xlist != NULL && xlist[0] > 0) {
7221 	    char **Sx = gretl_list_get_names_array(xlist, dset, &err);
7222 
7223 	    if (!err) {
7224 		for (i=0; i<xlist[0]; i++) {
7225 		    S[j++] = Sx[i];
7226 		    Sx[i] = NULL;
7227 		}
7228 		free(Sx);
7229 	    }
7230 	} else {
7231 	    S[j] = gretl_strdup("f(x)");
7232 	}
7233     }
7234 
7235     if (!err) {
7236 	gretl_matrix_set_colnames(m, S);
7237     } else {
7238 	strings_array_free(S, n);
7239     }
7240 }
7241 
7242 /**
7243  * aggregate_by:
7244  * @x: data array.
7245  * @y: discrete variable.
7246  * @xlist: list of x series or NULL.
7247  * @ylist: list of y series or NULL.
7248  * @fncall: the name of the aggregation function.
7249  * @dset: data set information.
7250  * @err: location to receive error code.
7251  *
7252  * Aggregates one or more data series (x) "by" the values of
7253  * one or more discrete series (y). In general either @x or
7254  * @xlist should be non-NULL, and one of @y or @ylist should
7255  * be non-NULL. (If @xlist is non-NULL then @x is ignored,
7256  * and similarly for @ylist and @y). For an account of the
7257  * matrix that is returned, see the help for gretl's
7258  * "aggregate" command.
7259  *
7260  * Returns: allocated matrix, or NULL on failure.
7261  */
7262 
aggregate_by(const double * x,const double * y,const int * xlist,const int * ylist,const char * fncall,const DATASET * dset,int * err)7263 gretl_matrix *aggregate_by (const double *x,
7264 			    const double *y,
7265 			    const int *xlist,
7266 			    const int *ylist,
7267 			    const char *fncall,
7268 			    const DATASET *dset,
7269 			    int *err)
7270 {
7271     DATASET *tmpset = NULL;
7272     gretl_matrix *m = NULL;
7273     double *tmp = NULL;
7274     double (*builtin) (int, int, const double *) = NULL;
7275     gchar *usercall = NULL;
7276     int just_count = 0;
7277     int n;
7278 
7279     if (fncall == NULL || !strcmp(fncall, "null")) {
7280 	just_count = 1;
7281     }
7282 
7283     if ((y == NULL && ylist == NULL) ||
7284 	(!just_count && x == NULL && xlist == NULL)) {
7285 	*err = E_DATA;
7286 	return NULL;
7287     }
7288 
7289     if (!just_count) {
7290 	int f = function_lookup(fncall);
7291 
7292 	switch (f) {
7293 	case F_SUM:
7294 	    builtin = gretl_sum;
7295 	    break;
7296 	case F_SUMALL:
7297 	    builtin = series_sum_all;
7298 	    break;
7299 	case F_MEAN:
7300 	    builtin = gretl_mean;
7301 	    break;
7302 	case F_SD:
7303 	    builtin = gretl_stddev;
7304 	    break;
7305 	case F_VCE:
7306 	    builtin = gretl_variance;
7307 	    break;
7308 	case F_SST:
7309 	    builtin = gretl_sst;
7310 	    break;
7311 	case F_SKEWNESS:
7312 	    builtin = gretl_skewness;
7313 	    break;
7314 	case F_KURTOSIS:
7315 	    builtin = gretl_kurtosis;
7316 	    break;
7317 	case F_MIN:
7318 	    builtin = gretl_min;
7319 	    break;
7320 	case F_MAX:
7321 	    builtin = gretl_max;
7322 	    break;
7323 	case F_MEDIAN:
7324 	    builtin = gretl_median;
7325 	    break;
7326 	case F_GINI:
7327 	    builtin = gretl_gini;
7328 	    break;
7329 	case F_NOBS:
7330 	    builtin = series_get_nobs;
7331 	    break;
7332 	default:
7333 	    break;
7334 	}
7335     }
7336 
7337     n = sample_size(dset);
7338 
7339     if (just_count) {
7340 	; /* nothing to do here */
7341     } else if (builtin != NULL) {
7342 	/* nice and simple */
7343 	tmp = malloc(n * sizeof *tmp);
7344 	if (tmp == NULL) {
7345 	    *err = E_ALLOC;
7346 	}
7347     } else {
7348 	/* try treating as user-defined call */
7349 	tmpset = create_auxiliary_dataset(2, n, OPT_NONE);
7350 	if (tmpset == NULL) {
7351 	    *err = E_ALLOC;
7352 	} else {
7353 	    strcpy(tmpset->varname[1], "x");
7354 	    tmp = tmpset->Z[1];
7355 	    usercall = g_strdup_printf("%s(x)", fncall);
7356 	}
7357     }
7358 
7359     if (!*err) {
7360 	x = (x == NULL)? NULL : x + dset->t1;
7361 	y = (y == NULL)? NULL : y + dset->t1;
7362 	m = real_aggregate_by(x, y, xlist, ylist, dset, tmp,
7363 			      builtin, usercall, tmpset,
7364 			      just_count, err);
7365     }
7366 
7367     if (m != NULL && *err) {
7368 	gretl_matrix_free(m);
7369 	m = NULL;
7370     }
7371 
7372     if (m != NULL) {
7373 	aggr_add_colnames(m, ylist, xlist, dset, just_count);
7374     }
7375 
7376     if (tmpset != NULL) {
7377 	g_free(usercall);
7378 	destroy_dataset(tmpset);
7379     } else {
7380 	free(tmp);
7381     }
7382 
7383     return m;
7384 }
7385 
monthly_or_quarterly_dates(const DATASET * dset,int pd,double sd0,int T,double * x)7386 static int monthly_or_quarterly_dates (const DATASET *dset,
7387 				       int pd, double sd0, int T,
7388 				       double *x)
7389 {
7390     char obstr[8];
7391     int mo, yr = floor(sd0);
7392     int t, mmax, dm;
7393 
7394     if (pd == 4) {
7395 	sprintf(obstr, "%.1f", sd0 - yr);
7396 	mo = 1 + (atoi(obstr + 2) - 1) * 3;
7397 	mmax = 10;
7398 	dm = 3;
7399     } else {
7400 	sprintf(obstr, "%.2f", sd0 - yr);
7401 	mo = atoi(obstr + 2);
7402 	mmax = 12;
7403 	dm = 1;
7404     }
7405 
7406     for (t=0; t<T; t++) {
7407 	x[t] = 10000*yr + 100*mo + 1;
7408 	if (mo == mmax) {
7409 	    yr++;
7410 	    mo = 1;
7411 	} else {
7412 	    mo += dm;
7413 	}
7414     }
7415 
7416     return 0;
7417 }
7418 
annual_or_decennial_dates(const DATASET * dset,int pd,double sd0,int T,double * x)7419 static int annual_or_decennial_dates (const DATASET *dset,
7420 				      int pd, double sd0, int T,
7421 				      double *x)
7422 {
7423     int t, yr = (int) sd0;
7424 
7425     for (t=0; t<T; t++) {
7426 	x[t] = 10000*yr + 101;
7427 	yr += pd;
7428     }
7429 
7430     return 0;
7431 }
7432 
panel_daily_or_weekly(const DATASET * dset,double * x)7433 static int panel_daily_or_weekly (const DATASET *dset, double *x)
7434 {
7435     DATASET tsset = {0};
7436     char datestr[16];
7437     int t, y, m, d, n;
7438     int err = 0;
7439 
7440     tsset.structure = TIME_SERIES;
7441     tsset.pd = dset->panel_pd;
7442     tsset.sd0 = dset->panel_sd0;
7443     tsset.n = dset->pd;
7444     calendar_date_string(tsset.stobs, 0, &tsset);
7445 
7446     for (t=0; t<dset->pd && !err; t++) {
7447 	ntolabel(datestr, t, &tsset);
7448 	n = sscanf(datestr, "%d-%d-%d", &y, &m, &d);
7449 	if (n != 3) {
7450 	    err = E_DATA;
7451 	} else {
7452 	    x[t] = 10000*y + 100*m + d;
7453 	}
7454     }
7455 
7456     return err;
7457 }
7458 
regular_daily_or_weekly(const DATASET * dset,double * x)7459 static int regular_daily_or_weekly (const DATASET *dset, double *x)
7460 {
7461     char datestr[16];
7462     int t, y, m, d, n;
7463     int err = 0;
7464 
7465     for (t=0; t<dset->n && !err; t++) {
7466 	ntolabel(datestr, t, dset);
7467 #if 0
7468 	fprintf(stderr, "regular: datestr = '%s'\n", datestr);
7469 #endif
7470 	n = sscanf(datestr, "%d-%d-%d", &y, &m, &d);
7471 	if (n != 3) {
7472 	    err = E_DATA;
7473 	} else {
7474 	    x[t] = 10000*y + 100*m + d;
7475 	}
7476     }
7477 
7478     return err;
7479 }
7480 
fill_dataset_dates_series(const DATASET * dset,double * x)7481 int fill_dataset_dates_series (const DATASET *dset, double *x)
7482 {
7483     double sd0;
7484     int pd, T;
7485     int err = 0;
7486 
7487     if (dataset_is_panel(dset)) {
7488 	/* looking at time dimension of panel */
7489 	pd = dset->panel_pd;
7490 	sd0 = dset->panel_sd0;
7491 	T = dset->pd;
7492     } else {
7493 	/* regular time series */
7494 	pd = dset->pd;
7495 	sd0 = dset->sd0;
7496 	T = dset->n;
7497     }
7498 
7499     if (dataset_has_panel_time(dset)) {
7500 	if (pd == 4 || pd == 12) {
7501 	    err = monthly_or_quarterly_dates(dset, pd, sd0, T, x);
7502 	} else if (pd == 1 || pd == 10) {
7503 	    err = annual_or_decennial_dates(dset, pd, sd0, T, x);
7504 	} else if (pd == 5 || pd == 6 || pd == 7 || pd == 52) {
7505 	    err = panel_daily_or_weekly(dset, x);
7506 	}
7507     } else if (calendar_data(dset)) {
7508 	err = regular_daily_or_weekly(dset, x);
7509     } else if (quarterly_or_monthly(dset)) {
7510 	err = monthly_or_quarterly_dates(dset, pd, sd0, T, x);
7511     } else if (annual_data(dset) || decennial_data(dset)) {
7512 	err = annual_or_decennial_dates(dset, pd, sd0, T, x);
7513     } else {
7514 	err = E_PDWRONG;
7515     }
7516 
7517     if (!err && dataset_is_panel(dset)) {
7518 	/* expand the date series for all groups */
7519 	int i, N = dset->n / dset->pd;
7520 	size_t bytes = dset->pd * sizeof *x;
7521 	double *dest = x + dset->pd;
7522 
7523 	for (i=1; i<N; i++) {
7524 	    memcpy(dest, x, bytes);
7525 	    dest += dset->pd;
7526 	}
7527     }
7528 
7529     return err;
7530 }
7531 
fill_day_of_week_array(double * dow,const double * y,const double * m,const double * d,const DATASET * dset)7532 int fill_day_of_week_array (double *dow,
7533 			    const double *y,
7534 			    const double *m,
7535 			    const double *d,
7536 			    const DATASET *dset)
7537 {
7538     int yt, mt, dt;
7539     int julian;
7540     int t, err = 0;
7541 
7542     for (t=dset->t1; t<=dset->t2 && !err; t++) {
7543 	julian = 0;
7544 	yt = (int) y[t];
7545 	if (yt < 0) {
7546 	    yt = -yt;
7547 	    julian = 1;
7548 	}
7549 	mt = (int) m[t];
7550 	dt = (int) d[t];
7551 	dow[t] = day_of_week(yt, mt, dt, julian, &err);
7552     }
7553 
7554     return err;
7555 }
7556 
fill_isoweek_array(double * wknum,const double * y,const double * m,const double * d,const DATASET * dset)7557 int fill_isoweek_array (double *wknum,
7558 			const double *y,
7559 			const double *m,
7560 			const double *d,
7561 			const DATASET *dset)
7562 {
7563     int yt, mt, dt;
7564     int t, err = 0;
7565 
7566     for (t=dset->t1; t<=dset->t2 && !err; t++) {
7567 	yt = (int) y[t];
7568 	mt = (int) m[t];
7569 	dt = (int) d[t];
7570 	wknum[t] = iso_week_number(yt, mt, dt, &err);
7571     }
7572 
7573     return err;
7574 }
7575 
7576 /**
7577  * empirical_cdf:
7578  * @y: array to process
7579  * @n: length of @y.
7580  * @err: location to receive error code on failure.
7581  *
7582  * Calculates the empirical CDF of @y, skipping any missing values.
7583  *
7584  * Returns: on successful completion, a matrix with row
7585  * dimension equal to the number of unique values in @y, and two
7586  * columns, the first containing the unique values of @y and the
7587  * second the cumulative relative frequency. On failure, returns
7588  * NULL.
7589  */
7590 
empirical_cdf(const double * y,int n,int * err)7591 gretl_matrix *empirical_cdf (const double *y, int n, int *err)
7592 {
7593     gretl_matrix *m = NULL;
7594     double *sy = NULL;
7595     double *crf = NULL;
7596     int n_le, n_u, n_ok = 0;
7597     int percentages = 0;
7598     int i, j, k, kbak;
7599 
7600     /* count non-missing values */
7601     for (i=0; i<n; i++) {
7602 	if (!na(y[i])) {
7603 	    n_ok += 1;
7604 	}
7605     }
7606 
7607     if (n_ok == 0) {
7608 	*err = E_MISSDATA;
7609 	return NULL;
7610     }
7611 
7612     /* allocate full array for sorting */
7613     sy = malloc(n_ok * sizeof *sy);
7614     if (sy == NULL) {
7615 	*err = E_ALLOC;
7616 	return NULL;
7617     }
7618 
7619     j = 0;
7620     for (i=0; i<n; i++) {
7621 	if (!na(y[i])) {
7622 	    sy[j++] = y[i];
7623 	}
7624     }
7625 
7626     qsort(sy, n_ok, sizeof *sy, gretl_compare_doubles);
7627 
7628     /* count unique values */
7629     n_u = 1;
7630     for (i=1; i<n_ok; i++) {
7631 	if (sy[i] != sy[i-1]) {
7632 	    n_u++;
7633 	}
7634     }
7635 
7636     m = gretl_matrix_alloc(n_u, 2);
7637     if (m == NULL) {
7638 	*err = E_ALLOC;
7639 	free(sy);
7640 	return NULL;
7641     }
7642 
7643     crf = m->val + n_u;
7644 
7645     /* The first column of @m holds unique values,
7646        the second cumulative relative frequency
7647     */
7648     j = kbak = n_le = 0;
7649     for (i=0; i<n_ok; i++) {
7650 	if (i == 0 || sy[i] != sy[i-1]) {
7651 	    gretl_matrix_set(m, j, 0, sy[i]);
7652 	    for (k=kbak; k<n_ok; k++) {
7653 		if (sy[k] <= sy[i]) {
7654 		    n_le++;
7655 		} else {
7656 		    break;
7657 		}
7658 	    }
7659 	    if (percentages) {
7660 		crf[j++] = 100 * n_le / (double) n_ok;
7661 	    } else {
7662 		crf[j++] = n_le / (double) n_ok;
7663 	    }
7664 	    kbak = k;
7665 	}
7666     }
7667 
7668     free(sy);
7669 
7670     return m;
7671 }
7672 
7673 /* Convert from ISO 8601 (daily) dates to year, quarter or month
7674    (discarding excess precision), if possible.
7675 */
7676 
handle_excess_precision(int * ymd1,int * ymd2,int pd,char * obs1,char * obs2)7677 static int handle_excess_precision (int *ymd1, int *ymd2, int pd,
7678 				    char *obs1, char *obs2)
7679 {
7680     int err = 0;
7681 
7682     if (ymd1[2] != 1 || ymd2[2] != 1) {
7683 	/* day-of-month must be 1 in all cases */
7684 	return E_INVARG;
7685     }
7686 
7687     *obs1 = *obs2 = '\0';
7688 
7689     if (pd == 1) {
7690 	/* annual: the month must be 1 */
7691 	if (ymd1[1] != 1 || ymd1[2] != 1) {
7692 	    err = E_INVARG;
7693 	} else {
7694 	    sprintf(obs1, "%d", ymd1[0]);
7695 	    sprintf(obs2, "%d", ymd2[0]);
7696 	}
7697     } else if (pd == 4) {
7698 	/* quarterly: the month must start a quarter */
7699 	int m1 = ymd1[1], m2 = ymd2[1];
7700 
7701 	if ((m1 != 1 && m1 != 4 && m1 != 7 && m1 != 10) ||
7702 	    (m2 != 1 && m2 != 4 && m2 != 7 && m2 != 10)) {
7703 	    err = E_INVARG;
7704 	} else {
7705 	    sprintf(obs1, "%d:%d", ymd1[0], 1 + (m1-1)/3);
7706 	    sprintf(obs2, "%d:%d", ymd2[0], 1 + (m2-1)/3);
7707 	}
7708     } else {
7709 	/* pd == 12: monthly */
7710 	int m1 = ymd1[1], m2 = ymd2[1];
7711 
7712 	if (m1 < 1 || m1 > 12 || m2 < 1 || m2 > 12) {
7713 	    err = E_INVARG;
7714 	} else {
7715 	    sprintf(obs1, "%d:%02d", ymd1[0], m1);
7716 	    sprintf(obs2, "%d:%02d", ymd2[0], m2);
7717 	}
7718     }
7719 
7720     return err;
7721 }
7722 
sample_span(const char * stobs,const char * endobs,int pd,int * err)7723 int sample_span (const char *stobs, const char *endobs,
7724 		 int pd, int *err)
7725 {
7726     char obs1[16], obs2[16];
7727     DATASET dset = {0};
7728     int ymd1[3], ymd2[3];
7729     int n, nf, t2, span = -1;
7730 
7731     if (pd == 1 || pd == 12 || (pd >= 4 && pd <= 7) || pd == 52) {
7732 	; /* OK, supported frequency */
7733     } else {
7734 	*err = E_INVARG;
7735 	return -1;
7736     }
7737 
7738     n = strlen(stobs);
7739 
7740     if (n > 10 || strlen(endobs) != n) {
7741 	/* invalid and/or inconsistent observation strings */
7742 	*err = E_INVARG;
7743 	return -1;
7744     }
7745 
7746     strcpy(obs1, stobs);
7747     strcpy(obs2, endobs);
7748 
7749     if (n == 10) {
7750 	/* should be ISO 8601 dates */
7751 	nf = sscanf(obs1, "%d-%d-%d", &ymd1[0], &ymd1[1], &ymd1[2]);
7752 	if (nf != 3) {
7753 	    *err = E_INVARG;
7754 	} else {
7755 	    nf = sscanf(obs2, "%d-%d-%d", &ymd2[0], &ymd2[1], &ymd2[2]);
7756 	    if (nf != 3) {
7757 		*err = E_INVARG;
7758 	    }
7759 	}
7760 	if (!*err && (pd == 1 || pd == 4 || pd == 12)) {
7761 	    *err = handle_excess_precision(ymd1, ymd2, pd, obs1, obs2);
7762 	}
7763     }
7764 
7765     if (*err) {
7766 	return -1;
7767     }
7768 
7769     strcpy(dset.stobs, obs1);
7770     dset.pd = pd;
7771     dset.structure = TIME_SERIES;
7772 
7773     if (n == 10 && ((pd >= 5 && pd <= 7) || pd == 52)) {
7774 	/* validate daily data */
7775 	int ed1, ed2;
7776 
7777 	ed1 = epoch_day_from_ymd(ymd1[0], ymd1[1], ymd1[2]);
7778 	ed2 = epoch_day_from_ymd(ymd2[0], ymd2[1], ymd2[2]);
7779 
7780 	if (ed1 > 0 && ed2 > 0 && (pd == 5 || pd == 6)) {
7781 	    /* validate days-of-week */
7782 	    int wd1 = weekday_from_epoch_day(ed1);
7783 	    int wd2 = weekday_from_epoch_day(ed2);
7784 
7785 	    if (pd == 5 && (wd1 < 1 || wd1 > 5)) {
7786 		ed1 = 0;
7787 	    } else if (pd == 5 && (wd2 < 1 || wd2 > 5)) {
7788 		ed2 = 0;
7789 	    } else if (pd == 6) {
7790 		if (wd1 < 1) {
7791 		    ed1 = 0;
7792 		} else if (wd2 < 1) {
7793 		    ed2 = 0;
7794 		}
7795 	    }
7796 	}
7797 
7798 	if (ed1 == 0) {
7799 	    gretl_errmsg_sprintf("Invalid observation %s", stobs);
7800 	    *err = E_INVARG;
7801 	} else if (ed2 == 0) {
7802 	    gretl_errmsg_sprintf("Invalid observation %s", endobs);
7803 	    *err = E_INVARG;
7804 	} else {
7805 	    dset.sd0 = (double) ed1;
7806 	}
7807     }
7808 
7809     if (*err) {
7810 	span = -1;
7811     } else {
7812 	t2 = dateton(obs2, &dset);
7813 	if (t2 >= 0) {
7814 	    span = t2 + 1; /* t2 is 0-based */
7815 	} else {
7816 	    *err = E_INVARG;
7817 	    span = -1;
7818 	}
7819     }
7820 
7821     return span;
7822 }
7823 
real_clogit_fi(int T,int k,int r,gretl_matrix * z,gretl_matrix * df,int * err)7824 static double real_clogit_fi (int T, int k, int r,
7825 			      gretl_matrix *z,
7826 			      gretl_matrix *df,
7827 			      int *err)
7828 {
7829     double x, ret = NADBL;
7830     int do_score = (df != NULL);
7831     int i, j;
7832 
7833     if (do_score) {
7834         for (i=0; i<r; i++) {
7835 	    gretl_vector_set(df, i, 0);
7836 	}
7837     }
7838 
7839     if (T < k) {
7840 	ret = 0.0;
7841     } else if (k == 0) {
7842 	ret = 1.0;
7843     } else if (k == 1) {
7844 	ret = 0.0;
7845 	for (i=0; i<T; i++) {
7846 	    x = exp(gretl_vector_get(z, i));
7847 	    ret += x;
7848 	    if (do_score) {
7849 		gretl_vector_set(df, i, x);
7850 	    }
7851 	}
7852     } else if (k == 2) {
7853 	double exi;
7854 
7855 	ret = 0.0;
7856 	for (i=0; i<T-1; i++) {
7857 	    exi = exp(gretl_vector_get(z, i));
7858 	    for (j=i+1; j<T; j++) {
7859 		x = exp(gretl_vector_get(z, j)) * exi;
7860 		ret += x;
7861 		if (do_score) {
7862 		    df->val[i] += x;
7863 		    df->val[j] += x;
7864 		}
7865 	    }
7866 	}
7867     } else if (k == T - 1) {
7868 	double S = 0.0;
7869 
7870 	ret = 0.0;
7871 	for (i=0; i<T; i++) {
7872 	    S += gretl_vector_get(z, i);
7873 	}
7874 
7875 	for (i=0; i<T; i++) {
7876 	    x = exp(S - gretl_vector_get(z, i));
7877 	    ret += x;
7878 	    if (do_score) {
7879 		for (j=0; j<T; j++) {
7880 		    if (i != j) {
7881 			df->val[j] += x;
7882 		    }
7883 		}
7884 	    }
7885 	}
7886     } else if (k == T) {
7887 	ret = 0.0;
7888 	for (i=0; i<T; i++) {
7889 	    ret += gretl_vector_get(z, i);
7890 	}
7891 	ret = exp(ret);
7892 	if (do_score) {
7893 	    for (i=0; i<T; i++) {
7894 		gretl_vector_set(df, i, ret);
7895 	    }
7896 	}
7897     } else {
7898 	double x = exp(gretl_vector_get(z, T-1));
7899 
7900 	if (do_score) {
7901 	    double f2 = real_clogit_fi(T-1, k-1, r, z, df, err);
7902 	    gretl_matrix *df1 = NULL;
7903 
7904 	    if (!*err) {
7905 		df1 = gretl_vector_alloc(r);
7906 		if (df1 == NULL) {
7907 		    ret = NADBL;
7908 		    *err = E_ALLOC;
7909 		}
7910 	    }
7911 
7912 	    if (!*err) {
7913 		ret = real_clogit_fi(T-1, k, r, z, df1, err) + f2 * x;
7914 		for (i=0; i<T; i++) {
7915 		    df->val[i] *= x;
7916 		    df->val[i] += df1->val[i];
7917 		}
7918 		df->val[T-1] += x * f2;
7919 		gretl_matrix_free(df1);
7920 	    }
7921 	} else {
7922 	    double f2 = real_clogit_fi(T-1, k-1, r, z, NULL, err);
7923 
7924 	    if (!*err) {
7925 		ret = real_clogit_fi(T-1, k, r, z, NULL, err) + f2 * x;
7926 	    }
7927 	}
7928     }
7929 
7930     return ret;
7931 }
7932 
7933 /* FIXME add some documentation here */
7934 
clogit_fi(int T,int k,gretl_matrix * z,gretl_matrix * df,int * err)7935 double clogit_fi (int T, int k, gretl_matrix *z,
7936 		  gretl_matrix *df, int *err)
7937 {
7938     gretl_matrix *dfarg = NULL;
7939     double ret = NADBL;
7940     int r;
7941 
7942     if (gretl_is_null_matrix(z)) {
7943 	*err = E_DATA;
7944 	return NADBL;
7945     }
7946 
7947     r = z->rows;
7948 
7949     if (df != NULL) {
7950 	if (df->rows != r || df->cols != 1) {
7951 	    dfarg = gretl_column_vector_alloc(r);
7952 	    if (dfarg == NULL) {
7953 		*err = E_ALLOC;
7954 	    }
7955 	} else {
7956 	    dfarg = df;
7957 	}
7958     }
7959 
7960     if (!*err) {
7961 	ret = real_clogit_fi(T, k, r, z, dfarg, err);
7962     }
7963 
7964     if (dfarg != NULL && dfarg != df) {
7965 	gretl_matrix_replace_content(df, dfarg);
7966 	gretl_matrix_free(dfarg);
7967     }
7968 
7969     return ret;
7970 }
7971 
7972 /* simple driver function */
7973 
gretl_matrix_vector_stat(const gretl_matrix * m,GretlVecStat vs,int rowwise,int * err)7974 gretl_matrix *gretl_matrix_vector_stat (const gretl_matrix *m,
7975 					GretlVecStat vs, int rowwise,
7976 					int *err)
7977 {
7978     if (m->is_complex) {
7979 	return gretl_cmatrix_vector_stat(m, vs, rowwise, err);
7980     } else {
7981 	return gretl_rmatrix_vector_stat(m, vs, rowwise, err);
7982     }
7983 }
7984 
7985 /* Fill @v with unique random draws from {1,2,...,@n} */
7986 
fill_permutation_vector(gretl_vector * v,int n)7987 int fill_permutation_vector (gretl_vector *v, int n)
7988 {
7989     int *pool = NULL;
7990     int n_tail;
7991     unsigned u;
7992     int i, k;
7993 
7994     if (n <= 0) {
7995 	return E_INVARG;
7996     }
7997 
7998     k = gretl_vector_get_length(v);
7999     if (k <= 0 || k > n) {
8000 	gretl_errmsg_sprintf(_("Invalid number of draws %d"), k);
8001 	return E_INVARG;
8002     }
8003 
8004     pool = malloc(n * sizeof *pool);
8005     if (pool == NULL) {
8006 	return E_ALLOC;
8007     }
8008 
8009     /* initialize selection pool */
8010     for (i=0; i<n; i++) {
8011 	pool[i] = i + 1;
8012     }
8013 
8014     for (i=0; i<k; i++) {
8015 	u = gretl_rand_int_max(n);
8016 	v->val[i] = pool[u];
8017 	/* remove pool[u] from the selectable set */
8018 	if (u < n - 1) {
8019 	    n_tail = n - u - 1;
8020 	    memmove(pool + u, pool + u + 1, n_tail * sizeof *pool);
8021 	}
8022 	n--;
8023     }
8024 
8025     free(pool);
8026 
8027     return 0;
8028 }
8029 
8030 #define GEODEBUG 0
8031 
8032 /* Driver function for calling the geoplot plugin to produce
8033    a map. To obtain the map polygons we need EITHER the name
8034    of the source file (GeoJSON or Shapefile), via @fname,
8035    OR the map info in the form of a gretl_bundle, via @mapptr.
8036 
8037    The "payload" (if any) is given as a series, via @plx.
8038 
8039    Options (if any) are provided via @optptr, a pointer to
8040    gretl_bundle.
8041 */
8042 
geoplot_driver(const char * fname,gretl_bundle * map,const double * plx,const DATASET * dset,gretl_bundle * opts)8043 int geoplot_driver (const char *fname,
8044 		    gretl_bundle *map,
8045 		    const double *plx,
8046 		    const DATASET *dset,
8047 		    gretl_bundle *opts)
8048 {
8049     int (*mapfunc) (const char *, gretl_bundle *,
8050 		    gretl_matrix *, gretl_bundle *);
8051     gretl_matrix *payload = NULL;
8052     const char *mapfile = NULL;
8053     int free_map = 0;
8054     int err = 0;
8055 
8056     if (fname != NULL && map != NULL) {
8057 	gretl_errmsg_set("geoplot: cannot give both filename and map bundle");
8058 	return E_DATA;
8059     }
8060 
8061     if (map == NULL) {
8062 	mapfile = dataset_get_mapfile(dset);
8063     }
8064 
8065     if (fname == NULL && map == NULL) {
8066 	fname = mapfile;
8067 	if (fname == NULL) {
8068 	    gretl_errmsg_set("geoplot: no map was specified");
8069 	    return E_DATA;
8070 	}
8071     }
8072 
8073     if (plx != NULL) {
8074 	/* convert payload series to vector for convenience in plugin */
8075 	payload = gretl_vector_from_series(plx, dset->t1, dset->t2);
8076 	if (payload == NULL) {
8077 	    err = E_ALLOC;
8078 	}
8079     }
8080 
8081 #if GEODEBUG
8082     fprintf(stderr, "geoplot_driver: map=%p, mapfile=%p, fname=%p\n",
8083 	    (void *) map, (void *) mapfile, (void *) fname);
8084 #endif
8085 
8086     /* In the case where we got @fname, do we want to produce a
8087        map bundle in which the actual map data are synced with
8088        the dataset? Probably so if @fname is just $mapfile
8089        (metadata loaded as dataset), and presumably not if @fname
8090        is an "external" reference.
8091     */
8092     if (map == NULL && mapfile != NULL) {
8093 	if (fname == mapfile || !strcmp(fname, mapfile)) {
8094 #if GEODEBUG
8095 	    fprintf(stderr, "geoplot_driver: calling get_current_map()\n");
8096 #endif
8097 	    map = get_current_map(dset, NULL, &err);
8098 	    free_map = 1;
8099 	    fname = NULL;
8100 	}
8101     }
8102 
8103     if (!err) {
8104 	mapfunc = get_plugin_function("geoplot");
8105 	if (mapfunc == NULL) {
8106 	    err = E_FOPEN;
8107 	} else {
8108 	    err = mapfunc(fname, map, payload, opts);
8109 	}
8110     }
8111 
8112     gretl_matrix_free(payload);
8113     if (free_map) {
8114 	gretl_bundle_destroy(map);
8115     }
8116 
8117     return err;
8118 }
8119 
8120 enum {
8121     SUBST_SIMPLE,
8122     SUBST_BTREE
8123 };
8124 
subst_val_via_tree(double x,const double * x0,int n0,const double * x1,int n1)8125 static double subst_val_via_tree (double x, const double *x0, int n0,
8126 				  const double *x1, int n1)
8127 {
8128     static BTree *tree;
8129 
8130     if (x0 == NULL) {
8131 	/* cleanup */
8132 	gretl_btree_destroy(tree);
8133 	tree = NULL;
8134 	return 0;
8135     }
8136 
8137     if (tree == NULL) {
8138 	/* allocate and populate tree */
8139 	double x1val;
8140 	int i;
8141 
8142 	tree = gretl_btree_new();
8143 	for (i=0; i<n0; i++) {
8144 	    x1val = n1 == 1 ? *x1 : x1[i];
8145 	    gretl_btree_insert(tree, x0[i], x1val);
8146 	}
8147     }
8148 
8149     /* do the actual lookup */
8150     x = gretl_btree_lookup(tree, x);
8151 
8152     return x;
8153 }
8154 
select_subst_method(int n,int nfind)8155 static int select_subst_method (int n, int nfind)
8156 {
8157 #if 1
8158     /* for testing */
8159     if (getenv("REPLACE_USE_BTREE") != NULL) {
8160 	return SUBST_BTREE;
8161     } else if (getenv("REPLACE_NAIVE") != NULL) {
8162 	return SUBST_SIMPLE;
8163     }
8164 #endif
8165     /* The idea here is that it's worth using a binary
8166        tree mapping "find" to "replace" values only if
8167        the problem is big enough. Testing seems to
8168        indicate that the following condition for "big
8169        enough" may be roughly appropriate.
8170     */
8171     if (nfind > 11 && n >= 80) {
8172 	return SUBST_BTREE;
8173     } else {
8174 	return SUBST_SIMPLE;
8175     }
8176 }
8177 
8178 /* Given an original value @x, see if it matches any of the @n0 values
8179    in @x0.  If so, return the substitute value from @x1, otherwise
8180    return the original.
8181 */
8182 
subst_val(double x,const double * x0,int n0,const double * x1,int n1,int method)8183 static double subst_val (double x, const double *x0, int n0,
8184 			 const double *x1, int n1,
8185 			 int method)
8186 {
8187     if (method & SUBST_BTREE) {
8188 	x = subst_val_via_tree(x, x0, n0, x1, n1);
8189     } else {
8190 	int i;
8191 
8192 	for (i=0; i<n0; i++) {
8193 	    if (x == x0[i]) {
8194 		return (n1 == 1)? *x1 : x1[i];
8195 	    } else if (isnan(x) && isnan(x0[i])) {
8196 		/* we'll count this as a match */
8197 		return (n1 == 1)? *x1 : x1[i];
8198 	    }
8199 	}
8200     }
8201 
8202     return x;
8203 }
8204 
8205 /**
8206  * substitute_values:
8207  * @dest: target array.
8208  * @src: source array.
8209  * @n: length of @src and @dest.
8210  * @v0: array of "find" values.
8211  * @n0: length of @v0.
8212  * @v1: array of "replace" values.
8213  * @n1: length of @v1.
8214  *
8215  * For each of the @n elements in @src, determine if it appears
8216  * in @v0: if so, write to @targ the corresponding element of
8217  * @v1, otherwise write the @src element to @targ. The method
8218  * employed is either "naive" lookup, or if the problem is of
8219  * sufficient size a binary tree.
8220  *
8221  * Returns: 0 on successful completion, non-zero code on error.
8222  */
8223 
substitute_values(double * dest,const double * src,int n,const double * v0,int n0,const double * v1,int n1)8224 int substitute_values (double *dest, const double *src, int n,
8225 		       const double *v0, int n0,
8226 		       const double *v1, int n1)
8227 {
8228     int i, method = select_subst_method(n, n1);
8229 
8230     for (i=0; i<n; i++) {
8231 	dest[i] = subst_val(src[i], v0, n0, v1, n1, method);
8232     }
8233     if (method & SUBST_BTREE) {
8234 	/* cleanup call */
8235 	subst_val(0, NULL, 0, NULL, 0, method);
8236     }
8237 
8238     return 0;
8239 }
8240