1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 /* various functions called by 'genr' */
21
22 #include "genparse.h"
23 #include "libset.h"
24 #include "monte_carlo.h"
25 #include "gretl_panel.h"
26 #include "gretl_cmatrix.h"
27 #include "gretl_typemap.h"
28 #include "gretl_midas.h"
29 #include "estim_private.h"
30 #include "matrix_extra.h"
31 #include "gretl_btree.h"
32 #include "kalman.h"
33 #include "../../cephes/cephes.h"
34
35 #include <errno.h>
36
sort_series(const double * x,double * y,int f,const DATASET * dset)37 int sort_series (const double *x, double *y, int f,
38 const DATASET *dset)
39 {
40 double *z = NULL;
41 int n = sample_size(dset);
42 int i, t;
43
44 z = malloc(n * sizeof *z);
45 if (z == NULL) {
46 return E_ALLOC;
47 }
48
49 i = 0;
50 for (t=dset->t1; t<=dset->t2; t++) {
51 if (!na(x[t])) {
52 z[i++] = x[t];
53 }
54 }
55
56 if (f == F_DSORT) {
57 qsort(z, i, sizeof *z, gretl_inverse_compare_doubles);
58 } else {
59 qsort(z, i, sizeof *z, gretl_compare_doubles);
60 }
61
62 i = 0;
63 for (t=dset->t1; t<=dset->t2; t++) {
64 if (na(x[t])) {
65 y[t] = NADBL;
66 } else {
67 y[t] = z[i++];
68 }
69 }
70
71 free(z);
72
73 return 0;
74 }
75
76 struct pair_sorter {
77 double x;
78 double y;
79 };
80
81 /* sort the series y by the values of x, putting the result
82 into z */
83
gretl_sort_by(const double * x,const double * y,double * z,const DATASET * dset)84 int gretl_sort_by (const double *x, const double *y,
85 double *z, const DATASET *dset)
86 {
87 struct pair_sorter *xy;
88 int n = sample_size(dset);
89 int i, t;
90
91 for (t=dset->t1; t<=dset->t2; t++) {
92 if (na(x[t])) {
93 return E_MISSDATA;
94 }
95 }
96
97 xy = malloc(n * sizeof *xy);
98 if (xy == NULL) {
99 return E_ALLOC;
100 }
101
102 i = 0;
103 for (t=dset->t1; t<=dset->t2; t++) {
104 xy[i].x = x[t];
105 xy[i].y = y[t];
106 i++;
107 }
108
109 qsort(xy, n, sizeof *xy, gretl_compare_doubles);
110
111 i = 0;
112 for (t=dset->t1; t<=dset->t2; t++) {
113 z[t] = xy[i++].y;
114 }
115
116 free(xy);
117
118 return 0;
119 }
120
genrank(const double * sz,int m,const double * z,int n,double * rz)121 static void genrank (const double *sz, int m,
122 const double *z, int n,
123 double *rz)
124 {
125 int cases, k, i, j;
126 double avg, r = 1;
127
128 for (i=0; i<m; i++) {
129 /* scan sorted z */
130 cases = k = 0;
131
132 if (i > 0 && sz[i] == sz[i-1]) {
133 continue;
134 }
135
136 for (j=0; j<n; j++) {
137 /* scan raw z for matches */
138 if (!na(z[j])) {
139 if (z[j] == sz[i]) {
140 rz[k] = r;
141 cases++;
142 }
143 k++;
144 }
145 }
146
147 if (cases > 1) {
148 avg = (r + r + cases - 1.0) / 2.0;
149 for (j=0; j<m; j++) {
150 if (rz[j] == r) {
151 rz[j] = avg;
152 }
153 }
154 }
155
156 r += cases;
157 }
158 }
159
160 /* implements both rank_series() and rank_vector() */
161
rank_array(const double * x,double * y,int f,int n)162 static int rank_array (const double *x, double *y, int f, int n)
163 {
164 double *sx = NULL;
165 double *rx = NULL;
166 int m = n;
167 int i, t;
168
169 for (t=0; t<n; t++) {
170 if (na(x[t])) m--;
171 }
172
173 sx = malloc(m * sizeof *sx);
174 rx = malloc(m * sizeof *rx);
175
176 if (sx == NULL || rx == NULL) {
177 free(sx);
178 free(rx);
179 return E_ALLOC;
180 }
181
182 i = 0;
183 for (t=0; t<n; t++) {
184 if (!na(x[t])) {
185 sx[i] = x[t];
186 rx[i] = 0.0;
187 i++;
188 }
189 }
190
191 if (f == F_DSORT) {
192 qsort(sx, m, sizeof *sx, gretl_inverse_compare_doubles);
193 } else {
194 qsort(sx, m, sizeof *sx, gretl_compare_doubles);
195 }
196
197 genrank(sx, m, x, n, rx);
198
199 i = 0;
200 for (t=0; t<n; t++) {
201 if (na(x[t])) {
202 y[t] = NADBL;
203 } else {
204 y[t] = rx[i++];
205 }
206 }
207
208 free(sx);
209 free(rx);
210
211 return 0;
212 }
213
rank_series(const double * x,double * y,int f,const DATASET * dset)214 int rank_series (const double *x, double *y, int f,
215 const DATASET *dset)
216 {
217 return rank_array(x + dset->t1, y + dset->t1,
218 f, sample_size(dset));
219 }
220
rank_vector(const gretl_matrix * x,int f,int * err)221 gretl_matrix *rank_vector (const gretl_matrix *x, int f, int *err)
222 {
223 gretl_matrix *y = NULL;
224 int n = gretl_vector_get_length(x);
225
226 if (n == 0) {
227 *err = E_DATA;
228 } else {
229 y = gretl_matrix_alloc(x->rows, x->cols);
230 if (y == NULL) {
231 *err = E_ALLOC;
232 }
233 }
234
235 if (!*err) {
236 *err = rank_array(x->val, y->val, f, n);
237 if (*err) {
238 gretl_matrix_free(y);
239 y = NULL;
240 }
241 }
242
243 return y;
244 }
245
246 /**
247 * diff_series:
248 * @x: array of original data.
249 * @y: array into which to write the result.
250 * @f: function, F_DIFF, F_SDIFF or F_LDIFF.
251 * @dset: data set information.
252 *
253 * Calculates the differenced counterpart to the input
254 * series @x. If @f = F_SDIFF, the seasonal difference is
255 * computed; if @f = F_LDIFF, the log difference, and if
256 * @f = F_DIFF, the ordinary first difference.
257 *
258 * Returns: 0 on success, non-zero error code on failure.
259 */
260
diff_series(const double * x,double * y,int f,const DATASET * dset)261 int diff_series (const double *x, double *y, int f,
262 const DATASET *dset)
263 {
264 int k = (f == F_SDIFF)? dset->pd : 1;
265 int t, t1 = dset->t1;
266
267 if (t1 < k) {
268 t1 = k;
269 }
270
271 for (t=t1; t<=dset->t2; t++) {
272 if (dataset_is_panel(dset) && t % dset->pd == 0) {
273 /* skip first observation in panel unit */
274 continue;
275 }
276 if (na(x[t]) || na(x[t-k])) {
277 continue;
278 }
279 if (f == F_DIFF || f == F_SDIFF) {
280 y[t] = x[t] - x[t-k];
281 } else if (f == F_LDIFF) {
282 if (x[t] > 0.0 && x[t-k] > 0.0) {
283 y[t] = log(x[t]) - log(x[t-k]);
284 }
285 }
286 }
287
288 return 0;
289 }
290
291 /**
292 * standardize_series:
293 * @x: array of original data.
294 * @y: array into which to write the result.
295 * @dfc: degrees of freedom correction.
296 * @dset: data set information.
297 *
298 * By default calculates the standardized counterpart to the input
299 * series @x, but if @dfc < 0 the result is just centered.
300 *
301 * Returns: 0 on success, non-zero error code on failure.
302 */
303
standardize_series(const double * x,double * y,int dfc,const DATASET * dset)304 int standardize_series (const double *x, double *y, int dfc,
305 const DATASET *dset)
306 {
307 double d, xbar = 0;
308 int t, n = 0;
309
310 for (t=dset->t1; t<=dset->t2; t++) {
311 if (!na(x[t])) {
312 xbar += x[t];
313 n++;
314 }
315 }
316
317 if (dfc >= 0 && n < dfc + 1) {
318 return E_TOOFEW;
319 }
320
321 xbar /= n;
322
323 if (dfc < 0) {
324 /* just centering */
325 for (t=dset->t1; t<=dset->t2; t++) {
326 if (na(x[t])) {
327 y[t] = NADBL;
328 } else {
329 y[t] = x[t] - xbar;
330 }
331 }
332 } else {
333 /* dividing by s.d. */
334 double sd, TSS = 0;
335
336 for (t=dset->t1; t<=dset->t2; t++) {
337 if (!na(x[t])) {
338 d = x[t] - xbar;
339 TSS += d * d;
340 }
341 }
342
343 sd = sqrt(TSS / (n - dfc));
344
345 for (t=dset->t1; t<=dset->t2; t++) {
346 if (na(x[t])) {
347 y[t] = NADBL;
348 } else {
349 y[t] = (x[t] - xbar) / sd;
350 }
351 }
352 }
353
354 return 0;
355 }
356
357 /**
358 * orthdev_series:
359 * @x: array of original data.
360 * @y: array into which to write the result.
361 * @dset: data set information.
362 *
363 * Calculates in @y the forward orthogonal deviations of the input
364 * series @x. That is, y[t] is the scaled difference between x[t]
365 * and the mean of the subsequent observations on x.
366 *
367 * Returns: 0 on success, non-zero error code on failure.
368 */
369
orthdev_series(const double * x,double * y,const DATASET * dset)370 int orthdev_series (const double *x, double *y, const DATASET *dset)
371 {
372 double xbar;
373 int n, s, t, Tt;
374
375 for (t=dset->t1; t<dset->t2; t++) {
376
377 if (na(x[t])) {
378 continue;
379 }
380
381 if (dataset_is_panel(dset)) {
382 Tt = dset->pd - (t % dset->pd) - 1;
383 } else {
384 Tt = dset->t2 - t;
385 }
386
387 xbar = 0.0;
388 n = 0;
389 for (s=1; s<=Tt; s++) {
390 if (!na(x[t+s])) {
391 xbar += x[t+s];
392 n++;
393 }
394 }
395
396 if (n > 0) {
397 xbar /= n;
398 /* Lead one period, for compatibility with first diffs.
399 I.e. we lose the first observation rather than the
400 last. This is for arbond. Cf. Doornik, Bond and
401 Arellano, DPD documentation.
402 */
403 y[t+1] = sqrt(n / (n + 1.0)) * (x[t] - xbar);
404 }
405 }
406
407 return 0;
408 }
409
410 /**
411 * fracdiff_series:
412 * @x: array of original data.
413 * @y: array into which to write the result.
414 * @d: fraction by which to difference.
415 * @diff: boolean variable 1 for fracdiff, 0 for fraclag
416 * @obs: used for autoreg calculation, -1 if whole series
417 * should be calculated otherwise just the observation for
418 * obs is calculated
419 * @dset: data set information.
420 *
421 * Calculates the fractionally differenced or lagged
422 * counterpart to the input series @x. The fractional
423 * difference operator is defined as (1-L)^d, while the
424 * fractional lag operator 1-(1-L)^d.
425 *
426 * Returns: 0 on success, non-zero error code on failure.
427 */
428
fracdiff_series(const double * x,double * y,double d,int diff,int obs,const DATASET * dset)429 int fracdiff_series (const double *x, double *y, double d,
430 int diff, int obs, const DATASET *dset)
431 {
432 int dd, t, T;
433 const double TOL = 1.0E-12;
434 int t1 = dset->t1;
435 int t2 = (obs >= 0)? obs : dset->t2;
436 int tmiss = 0;
437 double phi = (diff)? -d : d;
438
439 #if 0
440 fprintf(stderr, "Doing fracdiff_series, with d = %g\n", d);
441 #endif
442
443 if (series_adjust_sample(x, &t1, &t2) != 0) {
444 tmiss = first_missing_index(x, t1, t2);
445 if (tmiss > 0 && tmiss < t2) {
446 t2 = tmiss;
447 }
448 }
449
450 if (obs >= 0) {
451 /* doing a specific observation */
452 T = obs - t1 + 1;
453 for (t=0; t<dset->n; t++) {
454 y[t] = NADBL;
455 }
456 if (obs != t1) {
457 y[obs] = (diff)? x[obs] : 0;
458 for (dd=1; dd<T && fabs(phi)>TOL; dd++) {
459 y[obs] += phi * x[obs - dd];
460 phi *= (dd - d)/(dd + 1);
461 }
462 } else if (diff) {
463 y[obs] = x[obs];
464 }
465 } else {
466 /* doing the whole series */
467 T = t2 - t1 + 1;
468 for (t=0; t<=t2; t++) {
469 if (t >= t1 && t <= t2) {
470 y[t] = (diff)? x[t] : 0;
471 } else {
472 y[t] = NADBL;
473 }
474 }
475 for (dd=1; dd<=T && fabs(phi)>TOL; dd++) {
476 for (t=t1+dd; t<=t2; t++) {
477 y[t] += phi * x[t - dd];
478 }
479 phi *= (dd - d)/(dd + 1);
480 }
481 }
482
483 return 0;
484 }
485
boxcox_vector(const double * x,double * y,double d,int t1,int t2)486 static int boxcox_vector (const double *x, double *y, double d,
487 int t1, int t2)
488 {
489 int t, special_case = 999;
490 double tol = 1.0e-12;
491
492 /* 999 for general case; here we want to avoid
493 the pow() function whenever possible.
494 */
495
496 if (fabs(d) < tol) {
497 special_case = 0;
498 } else if (fabs(1.0 - d) < tol) {
499 special_case = 1;
500 } else if (fabs(-1.0 - d) < tol) {
501 special_case = -1;
502 } else if (fabs(0.5 - d) < tol) {
503 special_case = 2;
504 } else if (fabs(-0.5 - d) < tol) {
505 special_case = -2;
506 }
507
508 for (t=t1; t<=t2; t++) {
509 if (na(x[t])) {
510 y[t] = NADBL;
511 } else if (special_case == -2) {
512 y[t] = (x[t] > 0)? 2.0 * (1.0 - 1.0/sqrt(x[t])) : NADBL;
513 } else if (special_case == -1) {
514 y[t] = 1.0 - 1.0/x[t];
515 } else if (special_case == 0) {
516 y[t] = (x[t] > 0)? log(x[t]) : NADBL;
517 } else if (special_case == 1) {
518 y[t] = x[t] - 1.0;
519 } else if (special_case == 2) {
520 y[t] = (x[t] > 0)? 2.0 * (sqrt(x[t]) - 1.0) : NADBL;
521 } else {
522 y[t] = (pow(x[t], d) - 1) / d;
523 }
524 }
525
526 return 0;
527 }
528
529 /**
530 * boxcox_series:
531 * @x: array of original data.
532 * @y: array into which to write the result.
533 * @d: lambda parameter.
534 * @dset: data set information.
535 *
536 * Calculates in @y the Box-Cox transformation for the
537 * input series @x.
538 *
539 * Returns: 0 on success, non-zero error code on failure.
540 */
541
boxcox_series(const double * x,double * y,double d,const DATASET * dset)542 int boxcox_series (const double *x, double *y, double d,
543 const DATASET *dset)
544 {
545 return boxcox_vector(x, y, d, dset->t1, dset->t2);
546 }
547
548 /**
549 * boxcox_matrix:
550 * @m: matrix of original data.
551 * @d: lambda parameter.
552 *
553 * Returns: a matrix whose columns are Box-Cox transformations
554 * of the columns of @m.
555 *
556 * Returns: 0 on success, non-zero error code on failure.
557 */
558
boxcox_matrix(const gretl_matrix * m,double d,int * err)559 gretl_matrix *boxcox_matrix (const gretl_matrix *m, double d,
560 int *err)
561 {
562 gretl_matrix *bc;
563
564 bc = gretl_matrix_alloc(m->rows, m->cols);
565
566 if (bc == NULL) {
567 *err = E_ALLOC;
568 } else {
569 const double *x = m->val;
570 double *y = bc->val;
571 int j, n = m->rows;
572
573 for (j=0; j<m->cols; j++) {
574 boxcox_vector(x, y, d, 0, n-1);
575 x += n;
576 y += n;
577 }
578 }
579
580 return bc;
581 }
582
cum_series(const double * x,double * y,const DATASET * dset)583 int cum_series (const double *x, double *y, const DATASET *dset)
584 {
585 int t, s = dset->t1;
586
587 /* corner case: sample size of 1! */
588 if (dset->t1 == dset->t2) {
589 y[dset->t1] = x[dset->t1];
590 return 0;
591 }
592
593 for (t=dset->t1; t<=dset->t2 && na(x[t]); t++) {
594 s++;
595 }
596
597 if (s == dset->t2) {
598 /* no usable data */
599 return 0;
600 }
601
602 y[s] = x[s];
603
604 if (dataset_is_panel(dset)) {
605 int k;
606
607 for (t=s+1; t<=dset->t2; t++) {
608 if (t % dset->pd == 0) {
609 /* first obs for panel unit */
610 for (k=0; k<dset->pd; k++) {
611 if (!na(x[t+k])) {
612 t = t + k;
613 y[t] = x[t];
614 break;
615 }
616 }
617 } else if (!na(y[t-1]) && !na(x[t])) {
618 y[t] = y[t-1] + x[t];
619 }
620 }
621 } else {
622 for (t=s+1; t<=dset->t2 && !na(x[t]); t++) {
623 y[t] = y[t-1] + x[t];
624 }
625 }
626
627 return 0;
628 }
629
panel_resample_series(const double * x,double * y,const DATASET * dset)630 static int panel_resample_series (const double *x, double *y,
631 const DATASET *dset)
632 {
633 int n = panel_sample_size(dset);
634 int T = dset->pd;
635 int i, j, t, s;
636 int u1, u2;
637 int *z;
638
639 if (n <= 1) {
640 return E_DATA;
641 }
642
643 z = malloc(n * sizeof *z);
644 if (z == NULL) {
645 return E_ALLOC;
646 }
647
648 u1 = dset->t1 / T;
649 u2 = dset->t2 / T;
650
651 /* select n units from [u1 .. u2] */
652 gretl_rand_int_minmax(z, n, u1, u2);
653
654 s = dset->t1;
655 for (i=0; i<n; i++) {
656 j = z[i] * T;
657 for (t=0; t<T; t++) {
658 y[s++] = x[j + t];
659 }
660 }
661
662 free(z);
663
664 return 0;
665 }
666
resample_series(const double * x,double * y,const DATASET * dset)667 int resample_series (const double *x, double *y, const DATASET *dset)
668 {
669 int *z = NULL;
670 int t1, t2;
671 int i, t, n;
672
673 if (dataset_is_panel(dset)) {
674 return panel_resample_series(x, y, dset);
675 }
676
677 t1 = dset->t1;
678 t2 = dset->t2;
679 series_adjust_sample(x, &t1, &t2);
680
681 n = t2 - t1 + 1;
682 if (n <= 1) {
683 return E_DATA;
684 }
685
686 z = malloc(n * sizeof *z);
687 if (z == NULL) {
688 return E_ALLOC;
689 }
690
691 /* generate n uniform drawings from [t1 .. t2] */
692 gretl_rand_int_minmax(z, n, t1, t2);
693
694 /* sample from source series x based on indices z */
695 for (t=t1, i=0; t<=t2; t++, i++) {
696 y[t] = x[z[i]];
697 }
698
699 free(z);
700
701 return 0;
702 }
703
block_resample_series(const double * x,double * y,int blocklen,const DATASET * dset)704 int block_resample_series (const double *x, double *y, int blocklen,
705 const DATASET *dset)
706 {
707 int t1 = dset->t1;
708 int t2 = dset->t2;
709 int *z = NULL;
710 int m, rem, bt2, x0;
711 int i, s, t, n;
712
713 if (dataset_is_panel(dset)) {
714 return E_PDWRONG;
715 }
716
717 if (blocklen <= 0) {
718 return E_DATA;
719 }
720
721 if (blocklen == 1) {
722 return resample_series(x, y, dset);
723 }
724
725 series_adjust_sample(x, &t1, &t2);
726
727 n = t2 - t1 + 1;
728
729 m = n / blocklen;
730 rem = n % blocklen;
731
732 /* Let n now represent the number of blocks of @blocklen
733 contiguous observations which we need to select; the
734 last of these may not be fully used.
735 */
736 n = m + (rem > 0);
737
738 /* the last selectable starting point for a block */
739 bt2 = t2 - blocklen + 1;
740
741 if (bt2 < t1) {
742 return E_DATA;
743 }
744
745 z = malloc(n * sizeof *z);
746 if (z == NULL) {
747 return E_ALLOC;
748 }
749
750 /* Generate uniform random series: we want n drawings from the
751 range [t1 .. t2 - blocklen + 1], each of which will be
752 interpreted as the starting point of a block to be used.
753 */
754 gretl_rand_int_minmax(z, n, t1, bt2);
755
756 /* Sample from the source series using blocks given by the random
757 indices: note that the last block will be incomplete if rem > 0
758 */
759 t = t1;
760 for (i=0; i<n; i++) {
761 x0 = z[i];
762 for (s=0; s<blocklen; s++) {
763 if (t <= t2) {
764 y[t++] = x[x0+s];
765 } else {
766 break;
767 }
768 }
769 }
770
771 free(z);
772
773 return 0;
774 }
775
776 /* retrieve a pre-sample @x value if it's available via the n-vector
777 @x0, an optional argument to the filter() function.
778 */
779
get_xlag(int lag,int t1,gretl_vector * x0,int n)780 static double get_xlag (int lag, int t1, gretl_vector *x0, int n)
781 {
782 int p = t1 - lag;
783 int i = n - p;
784
785 if (i >= 0 && i < n) {
786 return x0->val[i];
787 } else {
788 return 0;
789 }
790 }
791
792 /* implements filter_series() and filter_matrix() */
793
filter_vector(const double * x,double * y,int t1,int t2,gretl_vector * A,gretl_vector * C,double y0,gretl_vector * x0)794 static int filter_vector (const double *x, double *y, int t1, int t2,
795 gretl_vector *A, gretl_vector *C, double y0,
796 gretl_vector *x0)
797 {
798 int t, s, i, n;
799 int amax, cmax;
800 double xlag, ylag;
801 double coef, *e;
802 int x0len = 0;
803 int err = 0;
804
805 if (gretl_is_null_matrix(C)) {
806 cmax = 0;
807 } else {
808 cmax = gretl_vector_get_length(C);
809 if (cmax == 0) {
810 /* if present, C must be a vector */
811 return E_NONCONF;
812 }
813 }
814
815 if (gretl_is_null_matrix(A)) {
816 amax = 0;
817 } else {
818 amax = gretl_vector_get_length(A);
819 if (amax == 0) {
820 /* if present, A must be a vector */
821 return E_NONCONF;
822 }
823 }
824
825 n = t2 - t1 + 1;
826 e = malloc(n * sizeof *e);
827
828 if (e == NULL) {
829 return E_ALLOC;
830 }
831
832 if (x0 != NULL) {
833 x0len = gretl_vector_get_length(x0);
834 }
835
836 s = 0;
837 if (cmax) {
838 for (t=t1; t<=t2; t++) {
839 e[s] = 0;
840 for (i=0; i<cmax; i++) {
841 if (t-i >= t1) {
842 xlag = x[t-i];
843 } else if (x0 == NULL) {
844 xlag = 0;
845 } else {
846 xlag = get_xlag(t-i, t1, x0, x0len);
847 }
848 if (na(xlag)) {
849 e[s] = NADBL;
850 break;
851 } else {
852 coef = gretl_vector_get(C, i);
853 e[s] += xlag * coef;
854 }
855 }
856 s++;
857 }
858 } else {
859 /* implicitly MA(0) = 1 */
860 for (t=t1; t<=t2; t++) {
861 e[s++] = x[t];
862 }
863 }
864
865 s = 0;
866 if (amax) {
867 for (t=t1; t<=t2; t++) {
868 if (na(e[s])) {
869 y[t] = NADBL;
870 } else {
871 y[t] = e[s];
872 for (i=0; i<amax; i++) {
873 ylag = (t-i > t1)? y[t-i-1] : y0;
874 if (na(ylag)) {
875 y[t] = NADBL;
876 break;
877 } else {
878 coef = gretl_vector_get(A, i);
879 y[t] += coef * ylag;
880 }
881 }
882 }
883 s++;
884 }
885 } else {
886 for (t=t1; t<=t2; t++) {
887 y[t] = e[s++];
888 }
889 }
890
891 free(e);
892
893 return err;
894 }
895
896 /**
897 * filter_series:
898 * @x: array of original data.
899 * @y: array into which to write the result.
900 * @dset: data set information.
901 * @A: vector for autoregressive polynomial.
902 * @C: vector for moving average polynomial.
903 * @y0: initial value of output series.
904 * @x0: prior values of x.
905 *
906 * Filters x according to y_t = C(L)/A(L) x_t. If the intended
907 * AR order is p, @A should be a vector of length p. If the
908 * intended MA order is q, @C should be vector of length (q+1),
909 * the first entry giving the coefficient at lag 0. However, if
910 * @C is NULL this is taken to mean that the lag-0 MA coefficient
911 * is unity (and all others are zero).
912 *
913 * Returns: 0 on success, non-zero error code on failure.
914 */
915
filter_series(const double * x,double * y,const DATASET * dset,gretl_vector * A,gretl_vector * C,double y0,gretl_vector * x0)916 int filter_series (const double *x, double *y, const DATASET *dset,
917 gretl_vector *A, gretl_vector *C,
918 double y0, gretl_vector *x0)
919 {
920 int t1 = dset->t1;
921 int t2 = dset->t2;
922 int err;
923
924 err = series_adjust_sample(x, &t1, &t2);
925
926 if (!err) {
927 err = filter_vector(x, y, t1, t2, A, C, y0, x0);
928 }
929
930 return err;
931 }
932
933 /**
934 * filter_matrix:
935 * @X: matrix of original data, r x c.
936 * @A: vector for autoregressive polynomial.
937 * @C: vector for moving average polynomial.
938 * @y0: initial value of output series.
939 * @x0: prior values of x.
940 * @err: location to receive error code.
941 *
942 * Filters the columns of x according to y_t = C(L)/A(L) x_t. If the
943 * intended AR order is p, @A should be a vector of length p. If the
944 * intended MA order is q, @C should be vector of length (q+1), the
945 * first entry giving the coefficient at lag 0. However, if @C is
946 * NULL this is taken to mean that the lag-0 MA coefficient is unity
947 * (and all others are zero).
948 *
949 * Returns: r x c matrix of filtered values, or NULL on failure.
950 */
951
filter_matrix(gretl_matrix * X,gretl_vector * A,gretl_vector * C,double y0,gretl_matrix * x0,int * err)952 gretl_matrix *filter_matrix (gretl_matrix *X, gretl_vector *A,
953 gretl_vector *C, double y0,
954 gretl_matrix *x0, int *err)
955 {
956 int r = X->rows;
957 int c = X->cols;
958 gretl_matrix *Y = NULL;
959 double *a = NULL, *b = NULL;
960 int i, j;
961
962 if (gretl_is_complex(X) ||
963 gretl_is_complex(A) ||
964 gretl_is_complex(C)) {
965 *err = E_CMPLX;
966 return NULL;
967 }
968
969 Y = gretl_matrix_alloc(r, c);
970 a = malloc(r * sizeof *a);
971 b = malloc(r * sizeof *b);
972
973 if (Y == NULL || a == NULL || b == NULL) {
974 *err = E_ALLOC;
975 return NULL;
976 }
977
978 for (j=0; j<c; j++) {
979 for (i=0; i<r; i++) {
980 a[i] = gretl_matrix_get(X, i, j);
981 }
982 *err = filter_vector(a, b, 0, r-1, A, C, y0, x0);
983 if (*err) {
984 break;
985 } else {
986 for (i=0; i<r; i++) {
987 gretl_matrix_set(Y, i, j, b[i]);
988 }
989 }
990 }
991
992 free(a);
993 free(b);
994
995 return Y;
996 }
997
series_goodobs(const double * x,int * t1,int * t2)998 static int series_goodobs (const double *x, int *t1, int *t2)
999 {
1000 int t, t1min = *t1, t2max = *t2;
1001 int T = 0;
1002
1003 for (t=t1min; t<t2max; t++) {
1004 if (na(x[t])) t1min++;
1005 else break;
1006 }
1007
1008 for (t=t2max; t>t1min; t--) {
1009 if (na(x[t])) t2max--;
1010 else break;
1011 }
1012
1013 for (t=t1min; t<=t2max; t++) {
1014 if (!na(x[t])) {
1015 T++;
1016 }
1017 }
1018
1019 *t1 = t1min;
1020 *t2 = t2max;
1021
1022 return T;
1023 }
1024
1025 /**
1026 * exponential_movavg_series:
1027 * @x: array of original data.
1028 * @y: array into which to write the result.
1029 * @dset: dataset information.
1030 * @d: coefficient on lagged @x.
1031 * @n: number of @x observations to average to give the
1032 * initial @y value.
1033 * @y0: optional initial value for EMA (or #NADBL).
1034 *
1035 * Returns: 0 on success, non-zero error code on failure.
1036 */
1037
exponential_movavg_series(const double * x,double * y,const DATASET * dset,double d,int n,double y0)1038 int exponential_movavg_series (const double *x, double *y,
1039 const DATASET *dset,
1040 double d, int n,
1041 double y0)
1042 {
1043 int t1 = dset->t1;
1044 int t2 = dset->t2;
1045 int t, T, err = 0;
1046
1047 if (dataset_is_panel(dset)) {
1048 return E_PDWRONG;
1049 }
1050
1051 if (na(y0) && n < 0) {
1052 return E_INVARG;
1053 }
1054
1055 T = series_goodobs(x, &t1, &t2);
1056
1057 if (na(y0) && T < n) {
1058 return E_TOOFEW;
1059 }
1060
1061 if (na(y0) && n == 0) {
1062 /* signal to use full sample mean */
1063 n = T;
1064 }
1065
1066 if (!na(y0)) {
1067 ; /* initialize on supplied value */
1068 } else if (n == 1) {
1069 /* initialize on first observation */
1070 y0 = x[t1];
1071 } else {
1072 /* initialize on mean of first n obs */
1073 y0 = 0.0;
1074 for (t=t1; t<t1+n; t++) {
1075 if (na(x[t])) {
1076 err = E_MISSDATA;
1077 break;
1078 }
1079 y0 += x[t];
1080 }
1081 if (!err) {
1082 y0 /= n;
1083 }
1084 }
1085
1086 if (!err && na(y0)) {
1087 err = E_MISSDATA;
1088 }
1089
1090 if (!err) {
1091 y[t1] = d * x[t1] + (1-d) * y0;
1092 for (t=t1+1; t<=t2; t++) {
1093 if (na(x[t]) || na(y[t-1])) {
1094 y[t] = NADBL;
1095 } else {
1096 y[t] = d * x[t] + (1-d) * y[t-1];
1097 }
1098 }
1099 }
1100
1101 return err;
1102 }
1103
1104 /**
1105 * movavg_series:
1106 * @x: array of original data.
1107 * @y: array into which to write the result.
1108 * @dset: data set information.
1109 * @k: number of terms in MA.
1110 * @center: if non-zero, produce centered MA.
1111 *
1112 * Returns: 0 on success, non-zero error code on failure.
1113 */
1114
movavg_series(const double * x,double * y,const DATASET * dset,int k,int center)1115 int movavg_series (const double *x, double *y, const DATASET *dset,
1116 int k, int center)
1117 {
1118 int t1 = dset->t1;
1119 int t2 = dset->t2;
1120 int k1 = k-1, k2 = 0;
1121 int i, s, t, T;
1122
1123 T = series_goodobs(x, &t1, &t2);
1124
1125 T = t2 - t1 + 1;
1126 if (T < k) {
1127 return E_TOOFEW;
1128 }
1129
1130 if (center) {
1131 k1 = k / 2;
1132 k2 = (k % 2 == 0)? (k1 - 1) : k1;
1133 }
1134
1135 t1 += k1;
1136 t2 -= k2;
1137
1138 for (t=t1; t<=t2; t++) {
1139 double xs, msum = 0.0;
1140
1141 for (i=-k1; i<=k2; i++) {
1142 s = t + i;
1143 if (dset->structure == STACKED_TIME_SERIES) {
1144 if (s / dset->pd != t / dset->pd) {
1145 s = -1;
1146 }
1147 }
1148
1149 if (s >= 0) {
1150 xs = x[s];
1151 } else {
1152 xs = NADBL;
1153 }
1154
1155 if (na(xs)) {
1156 msum = NADBL;
1157 break;
1158 } else {
1159 msum += x[s];
1160 }
1161 }
1162
1163 if (!na(msum)) {
1164 y[t] = (k > 0)? (msum / k) : msum;
1165 }
1166 }
1167
1168 if (center && k % 2 == 0) {
1169 /* centered, with even number of terms */
1170 for (t=t1; t<t2; t++) {
1171 if (na(y[t]) || na(y[t+1])) {
1172 y[t] = NADBL;
1173 } else {
1174 y[t] = (y[t] + y[t+1]) / 2.0;
1175 }
1176 }
1177 y[t2] = NADBL;
1178 }
1179
1180 return 0;
1181 }
1182
seasonally_adjust_series(const double * x,double * y,DATASET * dset,int tramo,int use_log)1183 int seasonally_adjust_series (const double *x, double *y,
1184 DATASET *dset, int tramo,
1185 int use_log)
1186 {
1187 int (*adjust_series) (const double *, double *,
1188 const DATASET *, int, int);
1189 int t1 = dset->t1;
1190 int t2 = dset->t2;
1191 int T, err = 0;
1192
1193 if (!quarterly_or_monthly(dset)) {
1194 gretl_errmsg_set(_("Input must be a monthly or quarterly time series"));
1195 return E_PDWRONG;
1196 }
1197
1198 series_adjust_sample(x, &t1, &t2);
1199 T = t2 - t1 + 1;
1200
1201 if (T < dset->pd * 3) {
1202 return E_TOOFEW;
1203 } else if (tramo && T > 600) {
1204 gretl_errmsg_set(_("TRAMO can't handle more than 600 observations.\n"
1205 "Please select a smaller sample."));
1206 return E_EXTERNAL;
1207 } else if (!tramo) {
1208 int pdmax = get_x12a_maxpd();
1209
1210 if (T > 50 * pdmax) {
1211 gretl_errmsg_sprintf(_("X-12-ARIMA can't handle more than %d observations.\n"
1212 "Please select a smaller sample."), 50 * pdmax);
1213 return E_EXTERNAL;
1214 }
1215 }
1216
1217 gretl_error_clear();
1218
1219 adjust_series = get_plugin_function("adjust_series");
1220
1221 if (adjust_series == NULL) {
1222 err = E_FOPEN;
1223 } else {
1224 int save_t1 = dset->t1;
1225 int save_t2 = dset->t2;
1226
1227 dset->t1 = t1;
1228 dset->t2 = t2;
1229
1230 err = (*adjust_series) (x, y, dset, tramo, use_log);
1231
1232 dset->t1 = save_t1;
1233 dset->t2 = save_t2;
1234 }
1235
1236 return err;
1237 }
1238
tramo_linearize_series(const double * x,double * y,DATASET * dset)1239 int tramo_linearize_series (const double *x, double *y,
1240 DATASET *dset)
1241 {
1242 int (*linearize_series) (const double *, double *,
1243 const DATASET *);
1244 int t1 = dset->t1;
1245 int t2 = dset->t2;
1246 int T, err = 0;
1247
1248 series_adjust_sample(x, &t1, &t2);
1249 T = t2 - t1 + 1;
1250
1251 if (T < 8) {
1252 return E_TOOFEW;
1253 } else if (T > 600) {
1254 gretl_errmsg_set(_("TRAMO can't handle more than 600 observations.\n"
1255 "Please select a smaller sample."));
1256 return E_EXTERNAL;
1257 }
1258
1259 gretl_error_clear();
1260
1261 linearize_series = get_plugin_function("linearize_series");
1262
1263 if (linearize_series == NULL) {
1264 err = E_FOPEN;
1265 } else {
1266 int save_t1 = dset->t1;
1267 int save_t2 = dset->t2;
1268
1269 dset->t1 = t1;
1270 dset->t2 = t2;
1271
1272 err = (*linearize_series) (x, y, dset);
1273
1274 dset->t1 = save_t1;
1275 dset->t2 = save_t2;
1276 }
1277
1278 return err;
1279 }
1280
1281 #define panel_obs_ok(x,t,m) ((m == NULL || m[t] != 0) && !na(x[t]))
1282 #define panel_obs_masked(m,t) (m != NULL && m[t] == 0)
1283
1284 #define PXSUM_SKIP_NA 1
1285
1286 /**
1287 * panel_statistic:
1288 * @x: source data.
1289 * @y: target into which to write.
1290 * @dset: data set information.
1291 * @k: code representing the desired statistic: F_PNOBS,
1292 * F_PMIN, F_PMAX, F_PSUM, F_PMEAN, F_PXSUM, F_PSD, F_PXNOBS
1293 * or F_PXSUM.
1294 * @mask: either NULL or a series with 0s for observations
1295 * to be excluded from the calculations, non-zero values
1296 * at other observations.
1297 *
1298 * Given the data in @x, constructs in @y a series containing
1299 * a panel-data statistic.
1300 *
1301 * Returns: 0 on success, non-zero on error.
1302 */
1303
panel_statistic(const double * x,double * y,const DATASET * dset,int k,const double * mask)1304 int panel_statistic (const double *x, double *y, const DATASET *dset,
1305 int k, const double *mask)
1306 {
1307 int T, Ti;
1308 int i, s, t, u1, u2;
1309 int err = 0;
1310
1311 if (!dataset_is_panel(dset)) {
1312 return E_DATA;
1313 }
1314
1315 T = dset->pd;
1316 u1 = dset->t1 / T;
1317 u2 = dset->t2 / T;
1318
1319 if (k == F_PNOBS) {
1320 for (i=u1; i<=u2; i++) {
1321 Ti = 0;
1322 for (t=0; t<T; t++) {
1323 if (panel_obs_ok(x, i*T + t, mask)) {
1324 Ti++;
1325 }
1326 }
1327 for (t=0; t<T; t++) {
1328 y[i*T + t] = Ti;
1329 }
1330 }
1331 } else if (k == F_PMIN || k == F_PMAX || k == F_PSUM) {
1332 double yi;
1333
1334 for (i=u1; i<=u2; i++) {
1335 yi = NADBL;
1336 for (t=0; t<T; t++) {
1337 s = i*T + t;
1338 if (panel_obs_ok(x, s, mask)) {
1339 if (na(yi)) {
1340 yi = x[s];
1341 } else if (k == F_PMIN && x[s] < yi) {
1342 yi = x[s];
1343 } else if (k == F_PMAX && x[s] > yi) {
1344 yi = x[s];
1345 } else if (k == F_PSUM) {
1346 yi += x[s];
1347 }
1348 }
1349 }
1350 for (t=0; t<T; t++) {
1351 y[i*T + t] = yi;
1352 }
1353 }
1354 } else if (k == F_PMEAN || k == F_PSD) {
1355 /* first check for presence of time-variation */
1356 double xref;
1357 int TV = 0;
1358
1359 for (i=u1; i<=u2 && !TV; i++) {
1360 xref = NADBL;
1361 for (t=0; t<T && !TV; t++) {
1362 s = i*T + t;
1363 if (panel_obs_ok(x, s, mask)) {
1364 if (na(xref)) {
1365 xref = x[s];
1366 } else if (x[s] != xref) {
1367 TV = 1;
1368 }
1369 }
1370 }
1371 }
1372 #if 0
1373 fprintf(stderr, "panel_statistic: time-varying = %d\n", TV);
1374 #endif
1375 if (k == F_PMEAN || TV) {
1376 double xbar;
1377
1378 for (i=u1; i<=u2; i++) {
1379 xbar = NADBL;
1380 Ti = 0;
1381 for (t=0; t<T; t++) {
1382 s = i*T + t;
1383 if (panel_obs_ok(x, s, mask)) {
1384 if (na(xbar) || !TV) {
1385 xbar = x[s];
1386 } else {
1387 xbar += x[s];
1388 }
1389 Ti++;
1390 }
1391 }
1392 if (!na(xbar) && TV) {
1393 xbar /= Ti;
1394 }
1395 for (t=0; t<T; t++) {
1396 y[i*T + t] = xbar;
1397 }
1398 }
1399 }
1400
1401 if (k == F_PSD && !TV) {
1402 /* s.d. with no time variation! */
1403 double sd;
1404
1405 for (i=u1; i<=u2; i++) {
1406 Ti = 0;
1407 for (t=0; t<T; t++) {
1408 if (panel_obs_ok(x, i*T + t, mask)) {
1409 Ti++;
1410 break;
1411 }
1412 }
1413 sd = Ti > 0 ? 0.0 : NADBL;
1414 for (t=0; t<T; t++) {
1415 y[i*T + t] = sd;
1416 }
1417 }
1418 } else if (k == F_PSD) {
1419 /* the time-varying case: we use the mean
1420 calculated above */
1421 double sd, xbar, ssx, dev;
1422
1423 for (i=u1; i<=u2; i++) {
1424 ssx = NADBL;
1425 xbar = y[i*T];
1426 Ti = 0;
1427 if (!na(xbar)) {
1428 for (t=0; t<T; t++) {
1429 s = i*T + t;
1430 if (panel_obs_ok(x, s, mask)) {
1431 dev = x[s] - xbar;
1432 if (na(ssx)) {
1433 ssx = dev * dev;
1434 } else {
1435 ssx += dev * dev;
1436 }
1437 Ti++;
1438 }
1439 }
1440 }
1441 if (na(ssx)) {
1442 sd = NADBL;
1443 } else if (Ti == 1) {
1444 sd = 0.0;
1445 } else {
1446 sd = sqrt(ssx / (Ti-1));
1447 }
1448 for (t=0; t<T; t++) {
1449 y[i*T + t] = sd;
1450 }
1451 }
1452 }
1453 } else if (k == F_PXSUM) {
1454 /* the sum of cross-sectional values for each period */
1455 double yt;
1456 int nt;
1457
1458 for (t=0; t<T; t++) {
1459 yt = 0.0;
1460 nt = 0;
1461 for (i=u1; i<=u2; i++) {
1462 s = i*T + t;
1463 if (panel_obs_masked(mask, s)) {
1464 continue;
1465 } else if (na(x[s])) {
1466 #if PXSUM_SKIP_NA
1467 continue;
1468 #else
1469 yt = NADBL;
1470 break;
1471 #endif
1472 } else {
1473 yt += x[s];
1474 nt++;
1475 }
1476 }
1477 if (nt == 0) {
1478 yt = NADBL;
1479 }
1480 for (i=u1; i<=u2; i++) {
1481 y[i*T + t] = yt;
1482 }
1483 }
1484 } else if (k == F_PXNOBS) {
1485 /* number of valid x-sectional obs in each period */
1486 int nt;
1487
1488 for (t=0; t<T; t++) {
1489 nt = 0;
1490 for (i=u1; i<=u2; i++) {
1491 s = i*T + t;
1492 if (panel_obs_masked(mask, s)) {
1493 continue;
1494 } else if (!na(x[s])) {
1495 nt++;
1496 }
1497 }
1498 for (i=u1; i<=u2; i++) {
1499 y[i*T + t] = nt;
1500 }
1501 }
1502 } else {
1503 /* unsupported option */
1504 err = E_DATA;
1505 }
1506
1507 return err;
1508 }
1509
1510 /**
1511 * panel_shrink:
1512 * @x: panel-data source series.
1513 * @noskip: keep NAs in output.
1514 * @dset: pointer to dataset.
1515 * @err: location to receive error code.
1516 *
1517 * By default, constructs a column vector holding the first
1518 * non-missing observation of @x for each panel unit within
1519 * the current sample range (hence skipping any units that
1520 * have no valid observations). However, if @noskip is non-zero,
1521 * the vector contains an NA for units with all missing
1522 * values.
1523 *
1524 * Returns: a new column vector, or NULL on error.
1525 */
1526
panel_shrink(const double * x,int noskip,const DATASET * dset,int * err)1527 gretl_matrix *panel_shrink (const double *x, int noskip,
1528 const DATASET *dset, int *err)
1529 {
1530 gretl_matrix *m = NULL;
1531 int n, T;
1532
1533 if (!dataset_is_panel(dset)) {
1534 *err = E_DATA;
1535 return NULL;
1536 }
1537
1538 n = panel_sample_size(dset);
1539 T = dset->pd;
1540 m = gretl_column_vector_alloc(n);
1541
1542 if (m == NULL) {
1543 *err = E_ALLOC;
1544 } else {
1545 int i1 = dset->t1 / T;
1546 int i2 = dset->t2 / T;
1547 int s = dset->t1;
1548 int i, t, k = 0;
1549 int gotit;
1550
1551 for (i=i1; i<=i2; i++) {
1552 /* loop across units */
1553 gotit = 0;
1554 for (t=0; t<T; t++) {
1555 /* loop inside units */
1556 if (!na(x[s+t])) {
1557 gotit = 1;
1558 m->val[k++] = x[s+t];
1559 break;
1560 }
1561 }
1562 if (!gotit && noskip) {
1563 m->val[k++] = NADBL;
1564 }
1565 /* skip to the next unit */
1566 s += T;
1567 }
1568
1569 if (k < n) {
1570 m->rows = k;
1571 }
1572 }
1573
1574 return m;
1575 }
1576
1577 /**
1578 * panel_expand:
1579 * @x: source vector.
1580 * @y target array.
1581 * @dset: pointer to dataset.
1582 * @opt: OPT_X to repeat across individuals instead
1583 * of across time.
1584 *
1585 * Constructs a series by repeating the values in @x,
1586 * by default across time (in which case the source
1587 * vector must have as many values as there are
1588 * individuals in the current sample range).
1589 *
1590 * Returns: zero on success, non-zero code on error.
1591 */
1592
panel_expand(const gretl_matrix * x,double * y,gretlopt opt,const DATASET * dset)1593 int panel_expand (const gretl_matrix *x, double *y,
1594 gretlopt opt, const DATASET *dset)
1595 {
1596 int n, T = sample_size(dset);
1597
1598 if (x == NULL || !dataset_is_panel(dset)) {
1599 return E_DATA;
1600 }
1601
1602 n = (int) ceil((double) T / dset->pd);
1603
1604 if (gretl_vector_get_length(x) != n) {
1605 return E_NONCONF;
1606 } else {
1607 int ubak = -1;
1608 int t, u, i = 0;
1609 double xi = 0;
1610
1611 for (t=dset->t1; t<=dset->t2; t++) {
1612 u = t / dset->pd;
1613 if (u != ubak) {
1614 xi = x->val[i++];
1615 ubak = u;
1616 }
1617 if (na(xi)) {
1618 y[t] = NADBL;
1619 } else {
1620 y[t] = xi;
1621 }
1622
1623 }
1624 }
1625
1626 return 0;
1627 }
1628
default_hp_lambda(const DATASET * dset)1629 static double default_hp_lambda (const DATASET *dset)
1630 {
1631 return 100 * dset->pd * dset->pd;
1632 }
1633
1634 /**
1635 * hp_filter:
1636 * @x: array of original data.
1637 * @hp: array in which filtered series is computed.
1638 * @dset: pointer to dataset.
1639 * @lambda: smoothing parameter (or #NADBL to use the default
1640 * value).
1641 * @opt: if %OPT_T, return the trend rather than the cycle.
1642 *
1643 * Calculates the "cycle" component of the time series in
1644 * array @x, using the Hodrick-Prescott filter. Adapted from the
1645 * original FORTRAN code by E. Prescott.
1646 *
1647 * Returns: 0 on success, non-zero error code on failure.
1648 */
1649
hp_filter(const double * x,double * hp,const DATASET * dset,double lambda,gretlopt opt)1650 int hp_filter (const double *x, double *hp, const DATASET *dset,
1651 double lambda, gretlopt opt)
1652 {
1653 int t1 = dset->t1, t2 = dset->t2;
1654 double v00 = 1.0, v11 = 1.0, v01 = 0.0;
1655 double det, tmp0, tmp1;
1656 double e0, e1, b00, b01, b11;
1657 double **V = NULL;
1658 double m[2], tmp[2];
1659 int i, t, T, tb;
1660 int err = 0;
1661
1662 for (t=t1; t<=t2; t++) {
1663 hp[t] = NADBL;
1664 }
1665
1666 err = series_adjust_sample(x, &t1, &t2);
1667 if (err) {
1668 goto bailout;
1669 }
1670
1671 T = t2 - t1 + 1;
1672 if (T < 4) {
1673 err = E_TOOFEW;
1674 goto bailout;
1675 }
1676
1677 if (na(lambda)) {
1678 lambda = default_hp_lambda(dset);
1679 }
1680
1681 V = doubles_array_new(4, T);
1682 if (V == NULL) {
1683 return E_ALLOC;
1684 }
1685
1686 /* adjust starting points */
1687 x += t1;
1688 hp += t1;
1689
1690 /* covariance matrices for each obs */
1691
1692 for (t=2; t<T; t++) {
1693 tmp0 = v00;
1694 tmp1 = v01;
1695 v00 = 1.0 / lambda + 4.0 * (tmp0 - tmp1) + v11;
1696 v01 = 2.0 * tmp0 - tmp1;
1697 v11 = tmp0;
1698
1699 det = v00 * v11 - v01 * v01;
1700
1701 V[0][t] = v11 / det;
1702 V[1][t] = -v01 / det;
1703 V[2][t] = v00 / det;
1704
1705 tmp0 = v00 + 1.0;
1706 tmp1 = v00;
1707
1708 v00 -= v00 * v00 / tmp0;
1709 v11 -= v01 * v01 / tmp0;
1710 v01 -= (tmp1 / tmp0) * v01;
1711 }
1712
1713 m[0] = x[0];
1714 m[1] = x[1];
1715
1716 /* forward pass */
1717 for (t=2; t<T; t++) {
1718 tmp[0] = m[1];
1719 m[1] = 2.0 * m[1] - m[0];
1720 m[0] = tmp[0];
1721
1722 V[3][t-1] = V[0][t] * m[1] + V[1][t] * m[0];
1723 hp[t-1] = V[1][t] * m[1] + V[2][t] * m[0];
1724
1725 det = V[0][t] * V[2][t] - V[1][t] * V[1][t];
1726
1727 v00 = V[2][t] / det;
1728 v01 = -V[1][t] / det;
1729
1730 tmp[1] = (x[t] - m[1]) / (v00 + 1.0);
1731 m[1] += v00 * tmp[1];
1732 m[0] += v01 * tmp[1];
1733 }
1734
1735 V[3][T-2] = m[0];
1736 V[3][T-1] = m[1];
1737 m[0] = x[T-2];
1738 m[1] = x[T-1];
1739
1740 /* backward pass */
1741 for (t=T-3; t>=0; t--) {
1742 t1 = t+1;
1743 tb = T - t - 1;
1744
1745 tmp[0] = m[0];
1746 m[0] = 2.0 * m[0] - m[1];
1747 m[1] = tmp[0];
1748
1749 if (t > 1) {
1750 /* combine info for y < i with info for y > i */
1751 e0 = V[2][tb] * m[1] + V[1][tb] * m[0] + V[3][t];
1752 e1 = V[1][tb] * m[1] + V[0][tb] * m[0] + hp[t];
1753 b00 = V[2][tb] + V[0][t1];
1754 b01 = V[1][tb] + V[1][t1];
1755 b11 = V[0][tb] + V[2][t1];
1756
1757 det = b00 * b11 - b01 * b01;
1758
1759 V[3][t] = (b00 * e1 - b01 * e0) / det;
1760 }
1761
1762 det = V[0][tb] * V[2][tb] - V[1][tb] * V[1][tb];
1763 v00 = V[2][tb] / det;
1764 v01 = -V[1][tb] / det;
1765
1766 tmp[1] = (x[t] - m[0]) / (v00 + 1.0);
1767 m[1] += v01 * tmp[1];
1768 m[0] += v00 * tmp[1];
1769 }
1770
1771 V[3][0] = m[0];
1772 V[3][1] = m[1];
1773
1774 if (opt & OPT_T) {
1775 for (t=0; t<T; t++) {
1776 hp[t] = V[3][t];
1777 }
1778 } else {
1779 for (t=0; t<T; t++) {
1780 hp[t] = x[t] - V[3][t];
1781 }
1782 }
1783
1784 bailout:
1785
1786 if (V != NULL) {
1787 for (i=0; i<4; i++) {
1788 free(V[i]);
1789 }
1790 free(V);
1791 }
1792
1793 return err;
1794 }
1795
1796 /**
1797 * oshp_filter:
1798 * @x: array of original data.
1799 * @hp: array in which filtered series is computed.
1800 * @dset: pointer to dataset.
1801 * @lambda: smoothing parameter (or #NADBL to use the default
1802 * value).
1803 * @opt: if %OPT_T, return the trend rather than the cycle.
1804 *
1805 * Calculates the "cycle" component of the time series in
1806 * array @x, using the a one-sided Hodrick-Prescott filter.
1807 * The implementation uses the Kalman filter.
1808 *
1809 * Returns: 0 on success, non-zero error code on failure.
1810 */
1811
oshp_filter(const double * x,double * hp,const DATASET * dset,double lambda,gretlopt opt)1812 int oshp_filter (const double *x, double *hp, const DATASET *dset,
1813 double lambda, gretlopt opt)
1814 {
1815 int t1 = dset->t1, t2 = dset->t2;
1816 gretl_matrix *M[4] = {NULL};
1817 gretl_matrix *Lambda = NULL;
1818 gretl_matrix *a0 = NULL;
1819 gretl_matrix *mu = NULL;
1820 gretl_bundle *b = NULL;
1821 int copy[4] = {0};
1822 int T, t, err, kerr;
1823 double mt, sqrt_lam;
1824
1825 for (t=t1; t<=t2; t++) {
1826 hp[t] = NADBL;
1827 }
1828
1829 err = series_adjust_sample(x, &t1, &t2);
1830 if (err) {
1831 return err;
1832 }
1833
1834 T = t2 - t1 + 1;
1835 if (T < 4) {
1836 return E_TOOFEW;
1837 }
1838
1839 if (na(lambda)) {
1840 lambda = default_hp_lambda(dset);
1841 }
1842 sqrt_lam = sqrt(lambda);
1843
1844 /* adjust starting points */
1845 x += t1;
1846 hp += t1;
1847
1848 /* begin Kalman filter setup */
1849
1850 /* obsy */
1851 M[0] = gretl_matrix_alloc(T+1, 1);
1852 for (t=0; t<T; t++) {
1853 gretl_matrix_set(M[0], t, 0, x[t]);
1854 }
1855 /* add a dummy trailing observation */
1856 gretl_matrix_set(M[0], T, 0, 0.0);
1857
1858 /* obsymat */
1859 M[1] = gretl_zero_matrix_new(2, 1);
1860 gretl_matrix_set(M[1], 0, 0, 1);
1861
1862 /* statemat */
1863 M[2] = gretl_zero_matrix_new(2, 2);
1864 gretl_matrix_set(M[2], 0, 0, 2);
1865 gretl_matrix_set(M[2], 0, 1, -1);
1866 gretl_matrix_set(M[2], 1, 0, 1);
1867
1868 /* statevar */
1869 M[3] = gretl_zero_matrix_new(2, 2);
1870 gretl_matrix_set(M[3], 0, 0, 1/sqrt_lam);
1871
1872 b = kalman_bundle_new(M, copy, 4, &err);
1873 if (err) {
1874 goto bailout;
1875 }
1876
1877 /* obsvar */
1878 Lambda = gretl_matrix_from_scalar(sqrt_lam);
1879 err = gretl_bundle_donate_data(b, "obsvar", Lambda,
1880 GRETL_TYPE_MATRIX, 0);
1881
1882 /* diffuse prior */
1883 err = gretl_bundle_set_scalar(b, "diffuse", 1);
1884
1885 if (err) {
1886 goto bailout;
1887 }
1888
1889 /* inistate */
1890 a0 = gretl_matrix_alloc(2, 1);
1891 gretl_matrix_set(a0, 0, 0, 2*x[0] - x[1]);
1892 gretl_matrix_set(a0, 1, 0, 3*x[0] - 2*x[1]);
1893 err = gretl_bundle_donate_data(b, "inistate", a0,
1894 GRETL_TYPE_MATRIX, 0);
1895 if (err) {
1896 goto bailout;
1897 }
1898
1899 #if DEBUG
1900 gretl_matrix_print(M[0], "obsy");
1901 gretl_matrix_print(M[1], "obsymat");
1902 gretl_matrix_print(M[2], "statemat");
1903 gretl_matrix_print(M[3], "statevar");
1904 gretl_matrix_print(a0, "inistate");
1905 #endif
1906
1907 kerr = kalman_bundle_run(b, NULL, &err);
1908 if (err || kerr) {
1909 goto bailout;
1910 }
1911
1912 mu = gretl_bundle_get_matrix(b, "state", &err);
1913 if (err) {
1914 goto bailout;
1915 }
1916
1917 #if DEBUG
1918 printf("kerr = %d, err = %d\n", kerr, err);
1919 gretl_matrix_print(mu, "state");
1920 #endif
1921
1922 /* take the "lead" of the second element of the
1923 state estimate */
1924 if (opt & OPT_T) {
1925 for (t=0; t<T; t++) {
1926 mt = gretl_matrix_get(mu, t+1, 1);
1927 hp[t] = mt;
1928 }
1929 } else {
1930 for (t=0; t<T; t++) {
1931 mt = gretl_matrix_get(mu, t+1, 1);
1932 hp[t] = x[t] - mt;
1933 }
1934 }
1935
1936 bailout:
1937
1938 /* since all the matrices we've allocated above belong
1939 to the Kalman bundle, the following will suffice to
1940 free everything on exit
1941 */
1942 gretl_bundle_destroy(b);
1943
1944 return err;
1945 }
1946
1947 /**
1948 * bkbp_filter:
1949 * @x: array of original data.
1950 * @bk: array into which to write the filtered series.
1951 * @dset: data set information.
1952 * @bkl: lower frequency bound (or 0 for automatic).
1953 * @bku: upper frequency bound (or 0 for automatic).
1954 * @k: approximation order (or 0 for automatic).
1955 *
1956 * Calculates the Baxter and King bandpass filter. If bku exceeds the
1957 * number of available observations, then the "low-pass" version will
1958 * be computed (weights sum to 1). Otherwise, the ordinary bandpass
1959 * filter will be applied (weights sum to 0).
1960 *
1961 * Returns: 0 on success, non-zero error code on failure.
1962 */
1963
bkbp_filter(const double * x,double * bk,const DATASET * dset,int bkl,int bku,int k)1964 int bkbp_filter (const double *x, double *bk, const DATASET *dset,
1965 int bkl, int bku, int k)
1966 {
1967 int t1 = dset->t1, t2 = dset->t2;
1968 double omubar, omlbar;
1969 double avg_a;
1970 double *a;
1971 int i, t, err = 0;
1972 int lowpass = 0;
1973
1974 if (bkl <= 0 || bku <= 0) {
1975 /* get user settings if available (or the defaults) */
1976 get_bkbp_periods(dset, &bkl, &bku);
1977 }
1978
1979 if (k <= 0) {
1980 k = get_bkbp_k(dset);
1981 }
1982
1983 #if BK_DEBUG
1984 fprintf(stderr, "lower limit = %d, upper limit = %d, \n",
1985 bkl, bku);
1986 #endif
1987
1988 if (bkl >= bku) {
1989 gretl_errmsg_set("Error in Baxter-King frequencies");
1990 return 1;
1991 }
1992
1993 err = series_adjust_sample(x, &t1, &t2);
1994 if (err) {
1995 return err;
1996 }
1997
1998 if (2 * k >= t2 - t1 + 1) {
1999 gretl_errmsg_set("Insufficient observations");
2000 return E_DATA;
2001 }
2002
2003 if (bku >= t2 - t1 + 1) {
2004 lowpass = 1;
2005 }
2006
2007 a = malloc((k + 1) * sizeof *a);
2008 if (a == NULL) {
2009 return E_ALLOC;
2010 }
2011
2012 omubar = M_2PI / bkl;
2013 omlbar = lowpass? 0 : M_2PI / bku;
2014
2015 /* first we compute the coefficients */
2016
2017 avg_a = a[0] = (omubar - omlbar) / M_PI;
2018
2019 if (lowpass) {
2020 avg_a -= 1.0;
2021 }
2022
2023 for (i=1; i<=k; i++) {
2024 a[i] = (sin(i * omubar) - sin(i * omlbar)) / (i * M_PI);
2025 avg_a += 2 * a[i];
2026 }
2027
2028 avg_a /= (2 * k + 1);
2029
2030 for (i=0; i<=k; i++) {
2031 a[i] -= avg_a;
2032 #if BK_DEBUG
2033 fprintf(stderr, "a[%d] = %#9.6g\n", i, a[i]);
2034 #endif
2035 }
2036
2037 /* now we filter the series, skipping the first
2038 and last k observations */
2039
2040 for (t=0; t<dset->n; t++) {
2041 if (t < t1 + k || t > t2 - k) {
2042 bk[t] = NADBL;
2043 } else {
2044 bk[t] = a[0] * x[t];
2045 for (i=1; i<=k; i++) {
2046 bk[t] += a[i] * (x[t-i] + x[t+i]);
2047 }
2048 }
2049 }
2050
2051 free(a);
2052
2053 return err;
2054 }
2055
2056 /* following: material relating to the Butterworth filter */
2057
2058 #define Min(x,y) (((x) > (y))? (y) : (x))
2059 #define NEAR_ZERO 1e-35
2060
safe_pow(double x,int n)2061 static double safe_pow (double x, int n)
2062 {
2063 double lp;
2064
2065 x = fabs(x);
2066
2067 if (x < NEAR_ZERO) {
2068 return 0.0;
2069 } else {
2070 lp = n * log(x);
2071 if (lp < -80) {
2072 return 0.0;
2073 } else if (lp > 80) {
2074 return exp(80.0);
2075 } else {
2076 return exp(lp);
2077 }
2078 }
2079 }
2080
cotan(double theta)2081 static double cotan (double theta)
2082 {
2083 double s = sin(theta);
2084
2085 if (fabs(s) < NEAR_ZERO) {
2086 s = copysign(NEAR_ZERO, s);
2087 }
2088
2089 return cos(theta) / s;
2090 }
2091
2092 /**
2093 * hp_gain:
2094 * @lambda: H-P parameter.
2095 * @hipass: 1 for high-pass filter, 0 for low-pass.
2096 *
2097 * Returns: a matrix holding omega values from 0 to \pi in
2098 * column 0, and the corresponding filter gain in column 1.
2099 */
2100
hp_gain(double lambda,int hipass)2101 gretl_matrix *hp_gain (double lambda, int hipass)
2102 {
2103 gretl_matrix *G;
2104 int i, width = 180;
2105 double inc = M_PI / width;
2106 double x, gain, omega = 0.0;
2107
2108 G = gretl_matrix_alloc(width + 1, 2);
2109 if (G == NULL) {
2110 return NULL;
2111 }
2112
2113 for (i=0; i<=width; i++) {
2114 x = 2 * sin(omega / 2);
2115 gain = 1 / (1 + lambda * pow(x, 4));
2116 if (hipass) {
2117 gain = 1.0 - gain;
2118 }
2119 gretl_matrix_set(G, i, 0, omega);
2120 gretl_matrix_set(G, i, 1, gain);
2121 omega += inc;
2122 }
2123
2124 return G;
2125 }
2126
2127 /**
2128 * butterworth_gain:
2129 * @n: order of the filter.
2130 * @cutoff: angular cutoff in radians.
2131 * @hipass: 1 for high-pass filter, 0 for low-pass.
2132 *
2133 * Returns: a matrix holding omega values from 0 to \pi in
2134 * column 0, and the corresponding filter gain in column 1.
2135 */
2136
butterworth_gain(int n,double cutoff,int hipass)2137 gretl_matrix *butterworth_gain (int n, double cutoff, int hipass)
2138 {
2139 gretl_matrix *G;
2140 int i, width = 180;
2141 double inc = M_PI / width;
2142 double x, gain, omega = 0.0;
2143
2144 G = gretl_matrix_alloc(width + 1, 2);
2145 if (G == NULL) {
2146 return NULL;
2147 }
2148
2149 for (i=0; i<=width; i++) {
2150 if (hipass) {
2151 x = cotan(omega / 2) * cotan((M_PI - cutoff) / 2);
2152 } else {
2153 x = tan(omega / 2) * cotan(cutoff / 2);
2154 }
2155 gain = 1 / (1 + pow(x, 2 * n));
2156 gretl_matrix_set(G, i, 0, omega);
2157 gretl_matrix_set(G, i, 1, gain);
2158 omega += inc;
2159 }
2160
2161 return G;
2162 }
2163
2164 /* Toeplitz methods:
2165
2166 1 = Stephen Pollock's symmetric Toeplitz solver
2167 2 = gretl's netlib-based general Toeplitz solver
2168 3 = build full Toeplitz matrix and apply SVD
2169
2170 */
2171
2172 #define TOEPLITZ_METHOD 1
2173
2174 #if (TOEPLITZ_METHOD == 3)
2175
2176 /* form the complete Toeplitz matrix represented in compressed
2177 form by the coefficients in g
2178 */
2179
toeplize(const double * g,int T,int k)2180 static gretl_matrix *toeplize (const double *g, int T, int k)
2181 {
2182 gretl_matrix *X;
2183 double x;
2184 int i, j;
2185
2186 X = gretl_zero_matrix_new(T, T);
2187 if (X == NULL) {
2188 return NULL;
2189 }
2190
2191 for (i=0; i<T; i++) {
2192 gretl_matrix_set(X, i, i, g[0]);
2193 }
2194
2195 for (i=1; i<k; i++) {
2196 x = g[i];
2197 for (j=i; j<T; j++) {
2198 gretl_matrix_set(X, j, j-i, x);
2199 gretl_matrix_set(X, j-i, j, x);
2200 }
2201 }
2202
2203 return X;
2204 }
2205
2206 #include "matrix_extra.h"
2207
toeplitz_solve(const double * g,double * dy,int T,int q)2208 static int toeplitz_solve (const double *g, double *dy, int T, int q)
2209 {
2210 gretl_matrix *y, *X;
2211 int err = 0;
2212
2213 fprintf(stderr, "svd_toeplitz_solve...\n");
2214
2215 X = toeplize(g, T, q + 1);
2216 if (X == NULL) {
2217 return E_ALLOC;
2218 }
2219
2220 err = gretl_SVD_invert_matrix(X);
2221
2222 if (!err) {
2223 y = gretl_vector_from_array(dy, T, GRETL_MOD_NONE);
2224 if (y == NULL) {
2225 err = E_ALLOC;
2226 } else {
2227 gretl_matrix s;
2228
2229 s.t1 = s.t2 = 0;
2230 s.rows = T;
2231 s.cols = 1;
2232 s.val = dy;
2233
2234 err = gretl_matrix_multiply(X, y, &s);
2235 gretl_matrix_free(y);
2236 }
2237 }
2238
2239 gretl_matrix_free(X);
2240
2241 return err;
2242 }
2243
2244 #elif (TOEPLITZ_METHOD == 2)
2245
toeplitz_solve(double * g,double * y,int T,int q)2246 static int toeplitz_solve (double *g, double *y, int T, int q)
2247 {
2248 gretl_vector *mg = NULL;
2249 gretl_vector *my = NULL;
2250 gretl_vector *mx = NULL;
2251 int i, err = 0;
2252
2253 fprintf(stderr, "netlib toeplitz solve...\n");
2254
2255 mg = gretl_vector_alloc(T);
2256 my = gretl_vector_alloc(T);
2257
2258 if (mg == NULL || my == NULL) {
2259 return E_ALLOC;
2260 }
2261
2262 for (i=0; i<T; i++) {
2263 mg->val[i] = (i <= q) ? g[i] : 0;
2264 my->val[i] = y[i];
2265 }
2266
2267 mx = gretl_toeplitz_solve(mg, mg, my, &err);
2268
2269 if (err) {
2270 fprintf(stderr, "symm_toeplitz: err = %d\n", err);
2271 for (i=0; i<T; i++) {
2272 y[i] = NADBL;
2273 }
2274 } else {
2275 for (i=0; i<T; i++) {
2276 y[i] = mx->val[i];
2277 }
2278 }
2279
2280 gretl_vector_free(mg);
2281 gretl_vector_free(my);
2282 gretl_vector_free(mx);
2283
2284 return err;
2285 }
2286
2287 #else /* TOEPLITZ_METHOD == 1 */
2288
2289 /* This function uses a Cholesky decomposition to find the solution of
2290 the equation Gx = y, where G is a symmetric Toeplitz matrix of order
2291 T with q supra-diagonal and q sub-diagonal bands. The coefficients of
2292 G are contained in g. The RHS vector y contains the elements of
2293 the solution vector x on completion.
2294 */
2295
toeplitz_solve(double * g,double * y,int T,int q)2296 static int toeplitz_solve (double *g, double *y, int T, int q)
2297 {
2298 double **mu;
2299 int t, j, k, jmax;
2300
2301 mu = doubles_array_new(q+1, T);
2302 if (mu == NULL) {
2303 return E_ALLOC;
2304 }
2305
2306 /* initialize */
2307 for (t=0; t<q; t++) {
2308 for (j=t+1; j<=q; j++) {
2309 mu[j][t] = 0.0;
2310 }
2311 }
2312
2313 /* factorize */
2314 for (t=0; t<T; t++) {
2315 for (k=Min(q, t); k>=0; k--) {
2316 mu[k][t] = g[k];
2317 jmax = q - k;
2318 for (j=1; j<=jmax && t-k-j >= 0; j++) {
2319 mu[k][t] -= mu[j][t-k] * mu[j+k][t] * mu[0][t-k-j];
2320 }
2321 if (k > 0) {
2322 mu[k][t] /= mu[0][t-k];
2323 }
2324 }
2325 }
2326
2327 /* forward solve */
2328 for (t=0; t<T; t++) {
2329 jmax = Min(t, q);
2330 for (j=1; j<=jmax; j++) {
2331 y[t] -= mu[j][t] * y[t-j];
2332 }
2333 }
2334
2335 /* divide by the diagonal */
2336 for (t=0; t<T; t++) {
2337 y[t] /= mu[0][t];
2338 }
2339
2340 /* backsolve */
2341 for (t=T-1; t>=0; t--) {
2342 jmax = Min(q, T - 1 - t);
2343 for (j=1; j<=jmax; j++) {
2344 y[t] -= mu[j][t+j] * y[t+j];
2345 }
2346 }
2347
2348 doubles_array_free(mu, q+1);
2349
2350 return 0;
2351 }
2352
2353 #endif /* alternate TOEPLITZ_METHODs */
2354
2355
2356 /* Premultiply vector y by a symmetric banded Toeplitz matrix
2357 Gamma with n nonzero sub-diagonal bands and n nonzero
2358 supra-diagonal bands. The elements of Gamma are contained
2359 in g; tmp is used as workspace.
2360 */
2361
GammaY(double * g,double * y,double * tmp,int T,int n)2362 static int GammaY (double *g, double *y, double *tmp,
2363 int T, int n)
2364 {
2365 double lx, rx;
2366 int t, j;
2367
2368 for (j=0; j<=n; j++) {
2369 tmp[j] = 0.0;
2370 }
2371
2372 for (t=0; t<T; t++) {
2373 for (j=n; j>0; j--) {
2374 tmp[j] = tmp[j-1];
2375 }
2376 tmp[0] = g[0] * y[t];
2377 for (j=1; j<=n; j++) {
2378 lx = (t - j < 0) ? 0 : y[t-j];
2379 rx = (t + j >= T) ? 0 : y[t+j];
2380 tmp[0] += g[j] * (lx + rx);
2381 }
2382 if (t >= n) {
2383 y[t-n] = tmp[n];
2384 }
2385 }
2386
2387 for (j=0; j<n; j++) {
2388 y[T-j-1] = tmp[j];
2389 }
2390
2391 return 0;
2392 }
2393
2394 #undef Min
2395 #undef NEAR_ZERO
2396
2397 /* Form the autocovariances of an MA process of order q. */
2398
form_gamma(double * g,double * mu,int q)2399 static void form_gamma (double *g, double *mu, int q)
2400 {
2401 int j, k;
2402
2403 for (j=0; j<=q; j++) {
2404 g[j] = 0.0;
2405 for (k=0; k<=q-j; k++) {
2406 g[j] += mu[k] * mu[k+j];
2407 }
2408 }
2409 }
2410
2411 /* Find the coefficients of the nth power of the summation
2412 operator (if sign = 1) or of the difference operator (if
2413 sign = -1).
2414 */
2415
sum_or_diff(double * theta,int n,int sign)2416 static void sum_or_diff (double *theta, int n, int sign)
2417 {
2418 int j, q;
2419
2420 theta[0] = 1.0;
2421
2422 for (q=1; q<=n; q++) {
2423 theta[q] = 0.0;
2424 for (j=q; j>0; j--) {
2425 theta[j] += sign * theta[j-1];
2426 }
2427 }
2428 }
2429
2430 /* g is the target, mu is used as workspace for the
2431 summation coefficients */
2432
form_mvec(double * g,double * mu,int n)2433 static void form_mvec (double *g, double *mu, int n)
2434 {
2435 sum_or_diff(mu, n, 1);
2436 form_gamma(g, mu, n);
2437 }
2438
2439 /* g is the target, mu is used as workspace for the
2440 differencing coefficients
2441 svec = Q'SQ where Q is the 2nd-diff operator
2442 */
2443
form_svec(double * g,double * mu,int n)2444 static void form_svec (double *g, double *mu, int n)
2445 {
2446 sum_or_diff(mu, n, -1);
2447 form_gamma(g, mu, n);
2448 }
2449
2450 /* g is the target, mu and tmp are used as workspace */
2451
form_wvec(double * g,double * mu,double * tmp,int n,double lam1,double lam2)2452 static void form_wvec (double *g, double *mu, double *tmp,
2453 int n, double lam1, double lam2)
2454 {
2455 int i;
2456
2457 form_svec(tmp, mu, n);
2458 for (i=0; i<=n; i++) {
2459 g[i] = lam1 * tmp[i];
2460 }
2461
2462 form_mvec(tmp, mu, n);
2463 for (i=0; i<=n; i++) {
2464 g[i] += lam2 * tmp[i];
2465 }
2466 }
2467
2468 /* Find the second differences of the elements of a vector.
2469 Notice that the first two elements of the vector are lost
2470 in the process. However, we index the leading element of
2471 the differenced vector by t = 0.
2472 */
2473
QprimeY(double * y,int T)2474 static void QprimeY (double *y, int T)
2475 {
2476 int t;
2477
2478 for (t=0; t<T-2; t++) {
2479 y[t] += y[t+2] - 2 * y[t+1];
2480 }
2481 }
2482
2483 /* Multiply the vector y by matrix Q of order T x T-2,
2484 where Q' is the matrix which finds the second
2485 differences of a vector.
2486 */
2487
form_Qy(double * y,int T)2488 static void form_Qy (double *y, int T)
2489 {
2490 double tmp, lag1 = 0.0, lag2 = 0.0;
2491 int t;
2492
2493 for (t=0; t<T-2; t++) {
2494 tmp = y[t];
2495 y[t] += lag2 - 2 * lag1;
2496 lag2 = lag1;
2497 lag1 = tmp;
2498 }
2499
2500 y[T-2] = lag2 - 2 * lag1;
2501 y[T-1] = lag1;
2502 }
2503
2504 #if HAVE_GMP
2505
mp_butterworth(const double * x,double * bw,int T,int order,double cutoff)2506 static int mp_butterworth (const double *x, double *bw, int T,
2507 int order, double cutoff)
2508 {
2509 int (*mpfun) (const double *, double *, int, int, double);
2510
2511 mpfun = get_plugin_function("mp_bw_filter");
2512
2513 if (mpfun == NULL) {
2514 return E_FOPEN;
2515 }
2516
2517 return (*mpfun) (x, bw, T, order, cutoff);
2518 }
2519
2520 #endif
2521
2522 #if 0
2523
2524 /* note: cut should be in degrees */
2525
2526 static int bw_numeric_check (double cut, int n)
2527 {
2528 const double b0 = -8.56644923648263;
2529 const double b1 = -0.66992007228155;
2530 const double b2 = 2.82851981714539;
2531 double x;
2532
2533 /* let's be conservative here */
2534 x = b0 + b1 * (cut - 1.0) + b2 * (n + 1);
2535 x = 1/(1+exp(-x));
2536
2537 return (x > 0.479717591568)
2538 }
2539
2540 #endif
2541
2542 /* maximum modulus among the poles of the filter:
2543 too close to 1.0 is a problem
2544 */
2545
bw_max_mod(double cut,int n)2546 static double bw_max_mod (double cut, int n)
2547 {
2548 double tc = tan(cut / 2);
2549 double theta = M_PI / (2 * n);
2550 double delta = 1 + tc * (2 * sin(theta) + tc);
2551 double x = (1 - tc * tc) / delta;
2552 double y = 2 * tc * cos(theta) / delta;
2553
2554 return sqrt(x * x + y * y);
2555 }
2556
2557 #define MAX_LAM 10000
2558
2559 /* Calculate the Butterworth lambda based on cutoff and
2560 order. Return non-zero if it seems that lambda is too
2561 extreme. FIXME: conditioning on lambda alone is not an
2562 adequate approach.
2563 */
2564
2565 static int
set_bw_lambda(double cutoff,int n,double * lam1,double * lam2)2566 set_bw_lambda (double cutoff, int n, double *lam1, double *lam2)
2567 {
2568 int ret = 0;
2569
2570 *lam1 = 1 / tan(cutoff / 2);
2571 *lam1 = safe_pow(*lam1, n * 2);
2572
2573 fprintf(stderr, "for cutoff %g, order %d: lambda=%g, maxmod=%g\n",
2574 cutoff, n, *lam1, bw_max_mod(cutoff, n));
2575
2576 if (*lam1 > 1.0e18) {
2577 /* can't cope, even with multiple precision? */
2578 ret = 2;
2579 } else if (*lam1 > 1.0e6) {
2580 /* may be OK with multiple precision? */
2581 ret = 1;
2582 } else {
2583 /* OK with regular double-precision? */
2584 if (*lam1 > MAX_LAM) {
2585 /* note: AC thinks the following doesn't help */
2586 *lam1 = sqrt(*lam1);
2587 *lam2 = 1 / *lam1;
2588 }
2589 }
2590
2591 return ret;
2592 }
2593
2594 /**
2595 * butterworth_filter:
2596 * @x: array of original data.
2597 * @bw: array into which to write the filtered series.
2598 * @dset: data set information.
2599 * @n: desired lag order.
2600 * @cutoff: desired angular cutoff in degrees (0, 180).
2601 *
2602 * Calculates the Butterworth filter. The code that does this
2603 * is based on D.S.G. Pollock's IDEOLOG -- see
2604 * http://www.le.ac.uk/users/dsgp1/
2605 *
2606 * Returns: 0 on success, non-zero error code on failure.
2607 */
2608
butterworth_filter(const double * x,double * bw,const DATASET * dset,int n,double cutoff)2609 int butterworth_filter (const double *x, double *bw, const DATASET *dset,
2610 int n, double cutoff)
2611 {
2612 double *g, *ds, *tmp, *y;
2613 double lam1, lam2 = 1.0;
2614 int t1 = dset->t1, t2 = dset->t2;
2615 int T, t, m;
2616 int bad_lambda = 0;
2617 int err = 0;
2618
2619 err = series_adjust_sample(x, &t1, &t2);
2620 if (err) {
2621 return err;
2622 }
2623
2624 if (2 * n >= t2 - t1) {
2625 gretl_errmsg_set("Insufficient observations");
2626 return E_DATA;
2627 }
2628
2629 if (cutoff <= 0.0 || cutoff >= 180.0) {
2630 return E_INVARG;
2631 }
2632
2633 T = t2 - t1 + 1;
2634 m = 3 * (n+1);
2635
2636 /* sample offset */
2637 y = bw + t1;
2638 x = x + t1;
2639
2640 /* the cutoff is expressed in radians internally */
2641 cutoff *= M_PI / 180.0;
2642
2643 bad_lambda = set_bw_lambda(cutoff, n, &lam1, &lam2);
2644
2645 #if HAVE_GMP
2646 if (bad_lambda > 1) {
2647 gretl_errmsg_set("Butterworth: infeasible lambda value");
2648 return E_DATA;
2649 } else if (bad_lambda) {
2650 return mp_butterworth(x, y, T, n, cutoff);
2651 }
2652 #else
2653 if (bad_lambda) {
2654 gretl_errmsg_set("Butterworth: infeasible lambda value");
2655 return E_DATA;
2656 }
2657 #endif
2658
2659 /* the workspace we need for everything except the
2660 Toeplitz solver */
2661 g = malloc(m * sizeof *g);
2662 if (g == NULL) {
2663 return E_ALLOC;
2664 }
2665
2666 ds = g + n + 1;
2667 tmp = ds + n + 1;
2668
2669 /* place a copy of the data in y */
2670 memcpy(y, x, T * sizeof *y);
2671
2672 form_wvec(g, ds, tmp, n, lam1, lam2); /* W = M + lambda * Q'SQ */
2673
2674 QprimeY(y, T);
2675
2676 /* solve (M + lambda*Q'SQ)x = d for x */
2677 err = toeplitz_solve(g, y, T-2, n);
2678
2679 if (!err) {
2680 form_Qy(y, T);
2681 form_svec(g, ds, n-2);
2682 GammaY(g, y, tmp, T, n-2); /* Form SQy in y */
2683 /* write the trend into y (low-pass) */
2684 for (t=0; t<T; t++) {
2685 y[t] = x[t] - lam1 * y[t];
2686 }
2687 }
2688
2689 free(g);
2690
2691 return err;
2692 }
2693
2694 /* common code for both plain and weighted polynomial trend
2695 estimation */
2696
real_poly_trend(const double * x,double * fx,double * w,int T,int order)2697 static int real_poly_trend (const double *x, double *fx, double *w,
2698 int T, int order)
2699 {
2700 double tmp, denom, lagdenom = 1;
2701 double alpha, gamma, delta;
2702 double *phi, *philag;
2703 int i, t;
2704
2705 phi = malloc(2 * T * sizeof *phi);
2706 if (phi == NULL) {
2707 return E_ALLOC;
2708 }
2709
2710 philag = phi + T;
2711
2712 for (t=0; t<T; t++) {
2713 phi[t] = 1;
2714 philag[t] = 0;
2715 fx[t] = 0;
2716 }
2717
2718 for (i=0; i<=order; i++) {
2719 double xadd;
2720
2721 alpha = gamma = denom = 0.0;
2722
2723 for (t=0; t<T; t++) {
2724 xadd = x[t] * phi[t];
2725 if (w != NULL) xadd *= w[t];
2726 alpha += xadd;
2727 xadd = phi[t] * phi[t] * t;
2728 if (w != NULL) xadd *= w[t];
2729 gamma += xadd;
2730 xadd = phi[t] * phi[t];
2731 if (w != NULL) xadd *= w[t];
2732 denom += xadd;
2733 }
2734
2735 alpha /= denom;
2736 gamma /= denom;
2737 delta = denom / lagdenom;
2738 lagdenom = denom;
2739
2740 for (t=0; t<T; t++) {
2741 fx[t] += alpha * phi[t];
2742 tmp = phi[t];
2743 phi[t] = (t - gamma) * phi[t] - delta * philag[t];
2744 philag[t] = tmp;
2745 }
2746 }
2747
2748 free(phi);
2749
2750 return 0;
2751 }
2752
2753 /**
2754 * poly_trend:
2755 * @x: array of original data.
2756 * @fx: array into which to write the fitted series.
2757 * @dset: data set information.
2758 * @order: desired polynomial order.
2759 *
2760 * Calculates a trend via the method of orthogonal polynomials.
2761 * Based on C code for D.S.G. Pollock's DETREND program.
2762 *
2763 * Returns: 0 on success, non-zero error code on failure.
2764 */
2765
poly_trend(const double * x,double * fx,const DATASET * dset,int order)2766 int poly_trend (const double *x, double *fx, const DATASET *dset, int order)
2767 {
2768 int t1 = dset->t1, t2 = dset->t2;
2769 int T, err;
2770
2771 if (dataset_is_panel(dset)) {
2772 gretl_errmsg_set("polyfit: panel data not supported");
2773 return E_PDWRONG;
2774 }
2775
2776 err = series_adjust_sample(x, &t1, &t2);
2777 if (err) {
2778 return err;
2779 }
2780
2781 T = t2 - t1 + 1;
2782
2783 if (order > T) {
2784 return E_DF;
2785 }
2786
2787 return real_poly_trend(x + t1, fx + t1, NULL, T, order);
2788 }
2789
2790 /**
2791 * poly_weights:
2792 * @w: array into which the weights will be written.
2793 * @T: the length of @w.
2794 * @wmax: the ratio of maximum to minimum weight.
2795 * @midfrac: the size of the central section that should be given
2796 * the minimum weight.
2797 * @opt: weighting scheme option (OPT_Q = quadratic, OPT_B = cosine bell,
2798 * OPT_C = crenelated).
2799 *
2800 * Calculates a set of weights; intended for use with polynomial
2801 * trend fitting.
2802 */
2803
poly_weights(double * w,int T,double wmax,double midfrac,gretlopt opt)2804 void poly_weights (double *w, int T, double wmax,
2805 double midfrac, gretlopt opt)
2806 {
2807 double wt, a = 0, b = 0;
2808 int t, cut, T2 = T / 2;
2809
2810 if (midfrac > 0) {
2811 cut = round(T * (1.0 - midfrac) / 2.0);
2812 } else {
2813 cut = T2;
2814 }
2815
2816 if (opt == OPT_Q) {
2817 /* quadratic */
2818 double z1, z2, det;
2819
2820 if (midfrac > 0) {
2821 z1 = cut;
2822 z2 = 2 * cut;
2823 } else {
2824 z1 = (T-1) / 2.0;
2825 z2 = T-1;
2826 }
2827
2828 det = 1.0 / (z1 * (z1 * z2 - z2 * z2));
2829 a = z2 * (1 - wmax) * det;
2830 b = -z2 * a;
2831 } else if (opt == OPT_B) {
2832 /* cosine-bell */
2833 b = (wmax - 1) / 2.0;
2834 }
2835
2836 for (t=0; t<=T2; t++) {
2837 if (t <= cut) {
2838 if (opt == OPT_Q) {
2839 wt = a * t + b;
2840 wt = wt * t + wmax;
2841 } else if (opt == OPT_B) {
2842 a = (t * M_PI) / cut;
2843 wt = 1 + b * (1 + cos(a));
2844 } else {
2845 /* "crenelated" */
2846 wt = wmax;
2847 }
2848 } else {
2849 wt = 1;
2850 }
2851 w[t] = w[T-1-t] = wt;
2852 }
2853 }
2854
2855 /**
2856 * weighted_poly_trend:
2857 * @x: array of original data.
2858 * @fx: array into which to write the fitted series.
2859 * @dset: data set information.
2860 * @order: desired polynomial order.
2861 * @opt: weighting option (OPT_Q = quadratic, OPT_B = cosine bell,
2862 * OPT_C = crenelated).
2863 * @wratio: ratio of maximum to minimum weight.
2864 * @midfrac: proportion of the data to be treated specially, in the
2865 * middle.
2866 *
2867 * Calculates a trend via the method of orthogonal polynomials, using
2868 * the specified weighting scheme.
2869 * Based on C code for D.S.G. Pollock's DETREND program.
2870 *
2871 * Returns: 0 on success, non-zero error code on failure.
2872 */
2873
weighted_poly_trend(const double * x,double * fx,const DATASET * dset,int order,gretlopt opt,double wratio,double midfrac)2874 int weighted_poly_trend (const double *x, double *fx, const DATASET *dset,
2875 int order, gretlopt opt, double wratio,
2876 double midfrac)
2877 {
2878 double *w = NULL;
2879 int t1 = dset->t1, t2 = dset->t2;
2880 int T, err;
2881
2882 err = series_adjust_sample(x, &t1, &t2);
2883 if (err) {
2884 return err;
2885 }
2886
2887 T = t2 - t1 + 1;
2888
2889 if (order > T) {
2890 return E_DF;
2891 }
2892
2893 w = malloc(T * sizeof *w);
2894 if (w == NULL) {
2895 err = E_ALLOC;
2896 } else {
2897 poly_weights(w, T, wratio, midfrac, opt);
2898 err = real_poly_trend(x + t1, fx + t1, w, T, order);
2899 free(w);
2900 }
2901
2902 return err;
2903 }
2904
n_new_dummies(const DATASET * dset,int nunits,int nperiods)2905 static int n_new_dummies (const DATASET *dset,
2906 int nunits, int nperiods)
2907 {
2908 char dname[VNAMELEN];
2909 int i, nnew = nunits + nperiods;
2910
2911 for (i=0; i<nunits; i++) {
2912 sprintf(dname, "du_%d", i + 1);
2913 if (gretl_is_series(dname, dset)) {
2914 nnew--;
2915 }
2916 }
2917
2918 for (i=0; i<nperiods; i++) {
2919 sprintf(dname, "dt_%d", i + 1);
2920 if (gretl_is_series(dname, dset)) {
2921 nnew--;
2922 }
2923 }
2924
2925 return nnew;
2926 }
2927
seas_name_and_label(int k,const DATASET * dset,gretlopt opt,char * vname)2928 static gchar *seas_name_and_label (int k, const DATASET *dset,
2929 gretlopt opt, char *vname)
2930 {
2931 int pd = dataset_is_panel(dset) ? dset->panel_pd : dset->pd;
2932 int ts = dset->structure == TIME_SERIES || dset->panel_pd > 1;
2933 gchar *ret = NULL;
2934
2935 if (opt & OPT_C) {
2936 sprintf(vname, "S%d", k);
2937 ret = g_strdup(_("centered periodic dummy"));
2938 } else if (opt & OPT_S) {
2939 sprintf(vname, "S%d", k);
2940 ret = g_strdup(_("uncentered periodic dummy"));
2941 } else if (pd == 4 && ts) {
2942 sprintf(vname, "dq%d", k);
2943 ret = g_strdup_printf(_("= 1 if quarter = %d, 0 otherwise"), k);
2944 } else if (pd == 12 && ts) {
2945 sprintf(vname, "dm%d", k);
2946 ret = g_strdup_printf(_("= 1 if month = %d, 0 otherwise"), k);
2947 } else {
2948 char dumstr[8] = "dummy_";
2949 char numstr[12];
2950 int len;
2951
2952 sprintf(numstr, "%d", k);
2953 len = strlen(numstr);
2954 dumstr[8 - len] = '\0';
2955 sprintf(vname, "%s%d", dumstr, k);
2956 ret = g_strdup_printf(_("%s = 1 if period is %d, 0 otherwise"), vname, k);
2957 }
2958
2959 return ret;
2960 }
2961
get_first_panel_period(DATASET * dset)2962 static int get_first_panel_period (DATASET *dset)
2963 {
2964 double x = date_as_double(0, dset->panel_pd, dset->panel_sd0);
2965 int i, d = ceil(log10(dset->panel_pd));
2966 int ret = 0;
2967
2968 x -= floor(x);
2969 for (i=0; i<d; i++) {
2970 x *= 10;
2971 }
2972
2973 x = (x-floor(x) > 0.5)? ceil(x) : floor(x);
2974 ret = x - 1;
2975
2976 return ret;
2977 }
2978
real_seasonals(DATASET * dset,int ref,int center,int snames,int ** plist)2979 static int real_seasonals (DATASET *dset, int ref, int center,
2980 int snames, int **plist)
2981 {
2982 char vname[VNAMELEN];
2983 gchar *vlabel = NULL;
2984 gretlopt opt = 0;
2985 int *list = NULL;
2986 int i, vi, k, t, pp;
2987 int pd, ndums, nnew = 0;
2988 int pantime = 0;
2989 int nfix = 0;
2990 int err = 0;
2991
2992 if (dset == NULL || dset->n == 0) {
2993 return E_NODATA;
2994 }
2995
2996 if (dataset_is_seasonal_panel(dset)) {
2997 pd = dset->panel_pd;
2998 if (pd != 4 && pd != 12 && pd != 24) {
2999 /* not handled yet */
3000 return E_PDWRONG;
3001 } else {
3002 pantime = 1;
3003 }
3004 }
3005
3006 if (!pantime) {
3007 pd = dset->pd;
3008 if (pd < 2 || pd > 99999) {
3009 return E_PDWRONG;
3010 }
3011 }
3012
3013 if (dated_weekly_data(dset)) {
3014 /* allow for up to 53 ISO 8601 weeks */
3015 pd = 53;
3016 }
3017
3018 if (ref < 0 || ref > pd) {
3019 return E_INVARG;
3020 }
3021
3022 ndums = pd - (ref > 0);
3023 list = gretl_list_new(ndums);
3024 if (list == NULL) {
3025 return E_ALLOC;
3026 }
3027
3028 if (center) opt |= OPT_C;
3029 if (snames) opt |= OPT_S;
3030
3031 /* check for appropriate series already in place */
3032
3033 for (k=1, i=1; i<=pd; i++) {
3034 if (i == ref) {
3035 /* dummy not wanted */
3036 continue;
3037 }
3038 vlabel = seas_name_and_label(i, dset, opt, vname);
3039 vi = series_index(dset, vname);
3040 if (vi < dset->v) {
3041 const char *orig = series_get_label(dset, vi);
3042
3043 list[k] = vi;
3044 if (orig == NULL || strcmp(vlabel, orig)) {
3045 series_set_label(dset, vi, vlabel);
3046 nfix++;
3047 }
3048 } else {
3049 nnew++;
3050 }
3051 g_free(vlabel);
3052 k++;
3053 }
3054
3055 if (nnew == 0 && nfix == 0) {
3056 goto finish;
3057 }
3058
3059 /* starting point for new dummy IDs */
3060 vi = dset->v;
3061
3062 if (nnew > 0 && dataset_add_series(dset, nnew)) {
3063 return E_ALLOC;
3064 }
3065
3066 for (k=1, i=1; i<=pd; i++) {
3067 if (i == ref) {
3068 continue;
3069 }
3070 if (list[k] == 0) {
3071 /* no pre-existing series */
3072 vlabel = seas_name_and_label(i, dset, opt, vname);
3073 strcpy(dset->varname[vi], vname);
3074 series_set_label(dset, vi, vlabel);
3075 g_free(vlabel);
3076 list[k] = vi++;
3077 }
3078 k++;
3079 }
3080
3081 if (pantime) {
3082 int p0 = get_first_panel_period(dset);
3083 int T = dset->pd;
3084
3085 pp = 0;
3086
3087 for (t=0; t<dset->n; t++) {
3088 if (t % T == 0) {
3089 pp = p0;
3090 } else {
3091 pp = (t + p0) % pd;
3092 }
3093 for (k=0, i=1; i<=list[0]; i++) {
3094 vi = list[i];
3095 if (k+1 == ref) k++;
3096 dset->Z[vi][t] = (pp == k)? 1 : 0;
3097 k++;
3098 }
3099 }
3100 } else if (dated_daily_data(dset)) {
3101 char datestr[OBSLEN];
3102 int wkday;
3103
3104 for (t=0; t<dset->n; t++) {
3105 ntolabel(datestr, t, dset);
3106 wkday = weekday_from_date(datestr);
3107 wkday = (wkday == 0)? 7 : wkday;
3108 for (k=1, i=1; i<=list[0]; i++) {
3109 vi = list[i];
3110 if (k+1 == ref) k++;
3111 dset->Z[vi][t] = (wkday == k)? 1 : 0;
3112 k++;
3113 }
3114 }
3115 } else if (dataset_is_daily(dset)) {
3116 int yy, mm = 10;
3117 double xx;
3118
3119 pp = pd;
3120 while ((pp = pp / 10)) {
3121 mm *= 10;
3122 }
3123
3124 for (k=1, i=1; i<=list[0]; i++) {
3125 vi = list[i];
3126 if (k == ref) k++;
3127 for (t=0; t<dset->n; t++) {
3128 xx = date_as_double(t, pd, dset->sd0) + .1;
3129 yy = (int) xx;
3130 pp = (int) (mm * (xx - yy) + 0.5);
3131 dset->Z[vi][t] = (pp == k)? 1 : 0;
3132 }
3133 k++;
3134 }
3135 } else if (dated_weekly_data(dset)) {
3136 char datestr[OBSLEN];
3137 int wknum;
3138
3139 for (t=0; t<dset->n; t++) {
3140 ntolabel(datestr, t, dset);
3141 wknum = iso_week_from_date(datestr);
3142 for (k=1, i=1; i<=list[0]; i++) {
3143 vi = list[i];
3144 if (k+1 == ref) k++;
3145 dset->Z[vi][t] = (wknum == k)? 1 : 0;
3146 k++;
3147 }
3148 }
3149 } else {
3150 int p0 = get_subperiod(0, dset, NULL);
3151
3152 for (t=0; t<dset->n; t++) {
3153 pp = (t + p0) % pd;
3154 for (k=0, i=1; i<=list[0]; i++) {
3155 vi = list[i];
3156 if (k+1 == ref) k++;
3157 dset->Z[vi][t] = (pp == k)? 1 : 0;
3158 k++;
3159 }
3160 }
3161 }
3162
3163 if (center) {
3164 double cx = 1.0 / pd;
3165
3166 for (i=1; i<=list[0]; i++) {
3167 vi = list[i];
3168 for (t=0; t<dset->n; t++) {
3169 dset->Z[vi][t] -= cx;
3170 }
3171 }
3172 }
3173
3174 finish:
3175
3176 if (plist != NULL) {
3177 *plist = list;
3178 } else {
3179 free(list);
3180 }
3181
3182 return err;
3183 }
3184
3185 /**
3186 * gen_seasonal_dummies:
3187 * @dset: dataset struct.
3188 * @center: if non-zero subtract the population mean from
3189 * each of the generated dummies.
3190 *
3191 * Adds to the data set (if these variables are not already
3192 * present) a set of seasonal dummy variables.
3193 *
3194 * Returns: 0 on success, non-zero on error.
3195 */
3196
gen_seasonal_dummies(DATASET * dset,int center)3197 int gen_seasonal_dummies (DATASET *dset, int center)
3198 {
3199 return real_seasonals(dset, 0, center, 0, NULL);
3200 }
3201
3202 /**
3203 * seasonals_list:
3204 * @dset: dataset struct.
3205 * @ref: 1-based reference period, or 0 for none.
3206 * @center: if non-zero, subtract the population mean from
3207 * each of the generated dummies.
3208 * @err: location to receive error code.
3209 *
3210 * Adds to the data set (if these variables are not already
3211 * present) a set of seasonal dummy variables.
3212 *
3213 * Returns: list holding the ID numbers of the seasonal
3214 * dummies, or NULL on error.
3215 */
3216
seasonals_list(DATASET * dset,int ref,int center,int * err)3217 int *seasonals_list (DATASET *dset, int ref, int center, int *err)
3218 {
3219 int *list = NULL;
3220
3221 *err = real_seasonals(dset, ref, center, 1, &list);
3222
3223 return list;
3224 }
3225
3226 /**
3227 * gen_panel_dummies:
3228 * @dset: dataset struct.
3229 * @opt: %OPT_T for time dummies, otherwise unit dummies.
3230 * @prn: printer for warning, or NULL.
3231 *
3232 * Adds to the data set a set of dummy variables corresponding
3233 * to either the cross-sectional units in a panel, or the
3234 * time periods.
3235 *
3236 * Returns: 0 on successful completion, error code on error.
3237 */
3238
gen_panel_dummies(DATASET * dset,gretlopt opt,PRN * prn)3239 int gen_panel_dummies (DATASET *dset, gretlopt opt, PRN *prn)
3240 {
3241 char vname[16], label[MAXLABEL];
3242 int vi, t, yy, pp, mm;
3243 int orig_v = dset->v;
3244 int ndum, nnew;
3245 int n_unitdum = 0;
3246 int n_timedum = 0;
3247 int allnew = 0;
3248 int newvnum;
3249 double xx;
3250
3251 if (opt & OPT_T) {
3252 ndum = n_timedum = dset->pd;
3253 } else {
3254 n_unitdum = dset->n / dset->pd;
3255 if (dset->n % dset->pd) {
3256 n_unitdum++;
3257 }
3258 ndum = n_unitdum;
3259 }
3260
3261 if (ndum == 1) {
3262 return E_PDWRONG;
3263 }
3264
3265 if (complex_subsampled()) {
3266 nnew = ndum;
3267 allnew = 1;
3268 } else {
3269 nnew = n_new_dummies(dset, n_unitdum, n_timedum);
3270 }
3271
3272 if (nnew > 0 && prn != NULL) {
3273 double sz = nnew * dset->n * 8 / (1024.0 * 1024.0);
3274
3275 if (sz > 1024) {
3276 pprintf(prn, "warning: requested %gMb of storage\n", sz);
3277 }
3278 }
3279
3280 if (nnew > 0 && dataset_add_series(dset, nnew)) {
3281 return E_ALLOC;
3282 }
3283
3284 pp = dset->pd;
3285 mm = 10;
3286 while ((pp = pp / 10)) {
3287 mm *= 10;
3288 }
3289
3290 newvnum = orig_v;
3291
3292 /* generate time-based dummies, if wanted */
3293
3294 for (vi=1; vi<=n_timedum; vi++) {
3295 int dnum;
3296
3297 sprintf(vname, "dt_%d", vi);
3298
3299 if (allnew) {
3300 dnum = newvnum++;
3301 } else {
3302 dnum = series_index(dset, vname);
3303 if (dnum >= orig_v) {
3304 dnum = newvnum++;
3305 }
3306 }
3307
3308 strcpy(dset->varname[dnum], vname);
3309 sprintf(label, _("%s = 1 if %s is %d, 0 otherwise"), vname,
3310 _("period"), vi);
3311 series_set_label(dset, dnum, label);
3312
3313 for (t=0; t<dset->n; t++) {
3314 xx = date_as_double(t, dset->pd, dset->sd0);
3315 yy = (int) xx;
3316 pp = (int) (mm * (xx - yy) + 0.5);
3317 dset->Z[dnum][t] = (pp == vi)? 1.0 : 0.0;
3318 }
3319 }
3320
3321 /* generate unit-based dummies, if wanted */
3322
3323 for (vi=1; vi<=n_unitdum; vi++) {
3324 int dmin = (vi - 1) * dset->pd;
3325 int dmax = vi * dset->pd;
3326 int dnum;
3327
3328 sprintf(vname, "du_%d", vi);
3329
3330 if (allnew) {
3331 dnum = newvnum++;
3332 } else {
3333 dnum = series_index(dset, vname);
3334 if (dnum >= orig_v) {
3335 dnum = newvnum++;
3336 }
3337 }
3338
3339 strcpy(dset->varname[dnum], vname);
3340 sprintf(label, _("%s = 1 if %s is %d, 0 otherwise"), vname,
3341 _("unit"), vi);
3342 series_set_label(dset, dnum, label);
3343
3344 for (t=0; t<dset->n; t++) {
3345 dset->Z[dnum][t] = (t >= dmin && t < dmax)? 1 : 0;
3346 }
3347 }
3348
3349 return 0;
3350 }
3351
3352 /**
3353 * gen_unit:
3354 * @dset: dataset struct.
3355 * @vnum: location to receive ID number of series, or NULL.
3356 *
3357 * (For panel data only) adds to the data set an index variable
3358 * that uniquely identifies the cross-sectional units.
3359 *
3360 * Returns: 0 on successful completion, error code on error.
3361 */
3362
gen_unit(DATASET * dset,int * vnum)3363 int gen_unit (DATASET *dset, int *vnum)
3364 {
3365 int xt = 0;
3366 int i, t;
3367
3368 if (dset->structure != STACKED_TIME_SERIES) {
3369 gretl_errmsg_set("'genr unit' can be used only with "
3370 "panel data");
3371 return E_DATA;
3372 }
3373
3374 i = series_index(dset, "unit");
3375
3376 if (i == dset->v && dataset_add_series(dset, 1)) {
3377 return E_ALLOC;
3378 }
3379
3380 strcpy(dset->varname[i], "unit");
3381 series_set_label(dset, i, _("cross-sectional unit index"));
3382
3383 for (t=0; t<dset->n; t++) {
3384 if (t % dset->pd == 0) {
3385 xt++;
3386 }
3387 dset->Z[i][t] = (double) xt;
3388 }
3389
3390 if (vnum != NULL) {
3391 *vnum = i;
3392 }
3393
3394 return 0;
3395 }
3396
3397 /**
3398 * panel_unit_first_obs:
3399 * @t: zero-based observation number.
3400 * @dset: data information struct.
3401 *
3402 * Returns: 1 if observation @t is the first time-series
3403 * observation on a given cross-sectional unit in a
3404 * panel dataset, else 0.
3405 */
3406
panel_unit_first_obs(int t,const DATASET * dset)3407 int panel_unit_first_obs (int t, const DATASET *dset)
3408 {
3409 char *p, obs[OBSLEN];
3410 int ret = 0;
3411
3412 ntolabel(obs, t, dset);
3413 p = strchr(obs, ':');
3414 if (p != NULL && atoi(p + 1) == 1) {
3415 ret = 1;
3416 }
3417
3418 return ret;
3419 }
3420
3421 /* make special time variable for panel data */
3422
make_panel_time_var(double * x,const DATASET * dset)3423 static void make_panel_time_var (double *x, const DATASET *dset)
3424 {
3425 int t, xt = 0;
3426
3427 for (t=0; t<dset->n; t++) {
3428 if (t % dset->pd == 0) {
3429 xt = 1;
3430 }
3431 x[t] = (double) xt++;
3432 }
3433 }
3434
3435 /**
3436 * gen_time:
3437 * @dset: dataset struct.
3438 * @tm: if non-zero, an actual time trend is wanted,
3439 * otherwise just an index of observations.
3440 * @vnum: location to receive ID number of series,
3441 * or NULL.
3442 *
3443 * Generates (and adds to the dataset, if it's not already
3444 * present) a time-trend or index variable. This function
3445 * is panel-data aware: if the dataset is a panel and
3446 * @tm is non-zero, the trend will not simply run
3447 * consecutively over the entire range of the data, but
3448 * will correctly represent the location in time of
3449 * each observation. The index is 1-based.
3450 *
3451 * Returns: 0 on success, non-zero on error.
3452 */
3453
gen_time(DATASET * dset,int tm,int * vnum)3454 int gen_time (DATASET *dset, int tm, int *vnum)
3455 {
3456 int v, t;
3457
3458 v = series_index(dset, (tm)? "time" : "index");
3459
3460 if (v == dset->v && dataset_add_series(dset, 1)) {
3461 return E_ALLOC;
3462 }
3463
3464 if (tm) {
3465 strcpy(dset->varname[v], "time");
3466 series_set_label(dset, v, _("time trend variable"));
3467 } else {
3468 strcpy(dset->varname[v], "index");
3469 series_set_label(dset, v, _("data index variable"));
3470 }
3471
3472 if (tm && dset->structure == STACKED_TIME_SERIES) {
3473 make_panel_time_var(dset->Z[v], dset);
3474 } else {
3475 for (t=0; t<dset->n; t++) {
3476 dset->Z[v][t] = (double) (t + 1);
3477 }
3478 }
3479
3480 if (vnum != NULL) {
3481 *vnum = v;
3482 }
3483
3484 return 0;
3485 }
3486
3487 /**
3488 * genr_wkday:
3489 * @dset: dataset struct.
3490 * @vnum: location to receive ID number of series, or NULL.
3491 *
3492 * Generates (and adds to the dataset, if it's not already
3493 * present) an index representing the day of the week for
3494 * each observation (for dated daily data only).
3495 * The index has value 0 for Sunday, 1 for Monday, and
3496 * so on.
3497 *
3498 * Returns: 0 on success, non-zero code on error.
3499 */
3500
gen_wkday(DATASET * dset,int * vnum)3501 int gen_wkday (DATASET *dset, int *vnum)
3502 {
3503 char datestr[OBSLEN];
3504 int i, t;
3505
3506 if (!dated_daily_data(dset)) {
3507 return E_PDWRONG;
3508 }
3509
3510 i = series_index(dset, "weekday");
3511
3512 if (i == dset->v && dataset_add_series(dset, 1)) {
3513 return E_ALLOC;
3514 }
3515
3516 strcpy(dset->varname[i], "weekday");
3517 series_set_label(dset, i, _("day of week (1 = Monday)"));
3518
3519 for (t=0; t<dset->n; t++) {
3520 ntolabel(datestr, t, dset);
3521 dset->Z[i][t] = weekday_from_date(datestr);
3522 }
3523
3524 if (vnum != NULL) {
3525 *vnum = i;
3526 }
3527
3528 return 0;
3529 }
3530
3531 typedef enum {
3532 PLOTVAR_INDEX = 1,
3533 PLOTVAR_TIME,
3534 PLOTVAR_ANNUAL,
3535 PLOTVAR_QUARTERS,
3536 PLOTVAR_MONTHS,
3537 PLOTVAR_CALENDAR,
3538 PLOTVAR_GPTIME,
3539 PLOTVAR_DECADES,
3540 PLOTVAR_HOURLY,
3541 PLOTVAR_PANEL,
3542 PLOTVAR_MAX
3543 } plotvar_type;
3544
plotvar_code(const DATASET * dset,gretlopt opt)3545 static int plotvar_code (const DATASET *dset, gretlopt opt)
3546 {
3547 if ((opt & OPT_S) && calendar_data(dset)) {
3548 return PLOTVAR_TIME;
3549 } else if (!dataset_is_time_series(dset)) {
3550 return PLOTVAR_INDEX;
3551 } else if (dset->pd == 1) {
3552 return PLOTVAR_ANNUAL;
3553 } else if (dset->pd == 4) {
3554 return PLOTVAR_QUARTERS;
3555 } else if (dset->pd == 12) {
3556 return PLOTVAR_MONTHS;
3557 } else if (dset->pd == 24) {
3558 return PLOTVAR_HOURLY;
3559 } else if (calendar_data(dset)) {
3560 return (opt & OPT_T)? PLOTVAR_GPTIME : PLOTVAR_CALENDAR;
3561 } else if (dataset_is_decennial(dset)) {
3562 return PLOTVAR_DECADES;
3563 } else {
3564 return PLOTVAR_TIME;
3565 }
3566 }
3567
panel_plotvar_code(const DATASET * dset)3568 static int panel_plotvar_code (const DATASET *dset)
3569 {
3570 int pd = dset->panel_pd;
3571
3572 if (pd == 0) {
3573 return 0;
3574 } else if (pd == 1) {
3575 return PLOTVAR_ANNUAL;
3576 } else if (pd == 4) {
3577 return PLOTVAR_QUARTERS;
3578 } else if (pd == 12) {
3579 return PLOTVAR_MONTHS;
3580 } else if (pd == 24) {
3581 return PLOTVAR_HOURLY;
3582 } else if ((pd == 5 || pd == 6 || pd == 7 || pd == 52) &&
3583 dset->panel_sd0 > 10000.0) {
3584 return PLOTVAR_CALENDAR;
3585 } else if (pd == 10) {
3586 return PLOTVAR_DECADES;
3587 } else {
3588 return 0;
3589 }
3590 }
3591
correct_to_int(double x)3592 static double correct_to_int (double x)
3593 {
3594 if (x - floor(x) < 1.0e-6) {
3595 return floor(x);
3596 } else if (ceil(x) - x < 1.0e-6) {
3597 return ceil(x);
3598 }
3599 return x;
3600 }
3601
3602 /**
3603 * gretl_plotx:
3604 * @dset: data information struct.
3605 * @opt: can include OPT_P for panel time-series plot;
3606 * OPT_T to use gnuplot time (seconds of unix epoch);
3607 * OPT_S to indicate that the context is the "scatters"
3608 * command.
3609 *
3610 * Finds or creates a special dummy variable for use on the
3611 * x-axis in plotting; this will have the full length of the
3612 * data series as given in @dset, and will be appropriately
3613 * configured for the data frequency. Do not try to free this
3614 * variable.
3615 *
3616 * Returns: pointer to plot x-variable, or NULL on failure.
3617 */
3618
gretl_plotx(const DATASET * dset,gretlopt opt)3619 const double *gretl_plotx (const DATASET *dset, gretlopt opt)
3620 {
3621 static double *x;
3622 static int ptype;
3623 static int Tbak;
3624 static double sd0bak;
3625 double mul, frac;
3626 int t, y1, T = 0;
3627 int maj, min;
3628 int new_ptype = 0;
3629 int panvar = 0;
3630 int failed = 0;
3631 double sd0 = 0;
3632 float rm;
3633
3634 if (dset == NULL) {
3635 /* cleanup signal */
3636 free(x);
3637 x = NULL;
3638 ptype = 0;
3639 T = 0;
3640 sd0 = 0;
3641 return NULL;
3642 }
3643
3644 if (dataset_is_panel(dset) && ((opt & OPT_P) ||
3645 sample_size(dset) == dset->pd)) {
3646 /* we're plotting a single time-series from a panel */
3647 new_ptype = panel_plotvar_code(dset);
3648 if (new_ptype != 0) {
3649 sd0 = dset->panel_sd0;
3650 } else {
3651 panvar = plausible_panel_time_var(dset);
3652 if (panvar > 0) {
3653 new_ptype = PLOTVAR_PANEL;
3654 } else {
3655 new_ptype = PLOTVAR_INDEX;
3656 }
3657 }
3658 T = dset->pd;
3659 }
3660
3661 if (new_ptype == 0) {
3662 /* not already determined */
3663 new_ptype = plotvar_code(dset, opt);
3664 }
3665 if (T == 0) {
3666 /* not already determined */
3667 T = dset->n;
3668 }
3669 if (sd0 == 0.0) {
3670 /* not already determined */
3671 sd0 = dset->sd0;
3672 }
3673
3674 if (x != NULL && new_ptype == ptype && Tbak == T && sd0 == sd0bak) {
3675 /* a suitable array is already at hand */
3676 return x;
3677 }
3678
3679 if (x != NULL) {
3680 free(x);
3681 }
3682
3683 if (new_ptype == PLOTVAR_PANEL) {
3684 x = copyvec(dset->Z[panvar], dset->n);
3685 } else {
3686 x = malloc(T * sizeof *x);
3687 }
3688
3689 if (x == NULL || new_ptype == PLOTVAR_PANEL) {
3690 return x;
3691 }
3692
3693 try_again:
3694
3695 Tbak = T;
3696 ptype = new_ptype;
3697 sd0bak = sd0;
3698
3699 y1 = (int) sd0;
3700 rm = sd0 - y1;
3701
3702 switch (ptype) {
3703 case PLOTVAR_ANNUAL:
3704 for (t=0; t<T; t++) {
3705 x[t] = sd0 + t;
3706 }
3707 break;
3708 case PLOTVAR_QUARTERS:
3709 case PLOTVAR_MONTHS:
3710 case PLOTVAR_HOURLY:
3711 frac = 1.0 / dset->pd;
3712 if (sscanf(dset->stobs, "%d:%d", &maj, &min) == 2) {
3713 for (t=0; t<T; t++) {
3714 x[t] = maj + frac * (min - 1);
3715 if (min < dset->pd) {
3716 min++;
3717 } else {
3718 min = 1;
3719 maj++;
3720 }
3721 }
3722 } else {
3723 mul = ptype == PLOTVAR_QUARTERS ? 10 : 100;
3724 x[0] = correct_to_int(y1 + (mul * rm - 1.0) / dset->pd);
3725 for (t=1; t<T; t++) {
3726 x[t] = correct_to_int(x[t-1] + frac);
3727 }
3728 }
3729 break;
3730 case PLOTVAR_CALENDAR:
3731 case PLOTVAR_GPTIME:
3732 for (t=0; t<T; t++) {
3733 if (dset->S != NULL) {
3734 if (ptype == PLOTVAR_GPTIME) {
3735 x[t] = gnuplot_time_from_date(dset->S[t], "%Y-%m-%d");
3736 } else {
3737 x[t] = get_dec_date(dset->S[t]);
3738 }
3739 } else {
3740 char datestr[OBSLEN];
3741
3742 calendar_date_string(datestr, t, dset);
3743 if (ptype == PLOTVAR_GPTIME) {
3744 x[t] = gnuplot_time_from_date(datestr, "%Y-%m-%d");
3745 } else {
3746 x[t] = get_dec_date(datestr);
3747 }
3748 }
3749 if (na(x[t])) {
3750 failed++;
3751 break;
3752 }
3753 }
3754 break;
3755 case PLOTVAR_DECADES:
3756 for (t=0; t<T; t++) {
3757 x[t] = dset->sd0 + 10 * t;
3758 }
3759 break;
3760 case PLOTVAR_INDEX:
3761 case PLOTVAR_TIME:
3762 for (t=0; t<T; t++) {
3763 x[t] = (double) (t + 1);
3764 }
3765 break;
3766 default:
3767 break;
3768 }
3769
3770 if (failed == 1) {
3771 /* calendar dating failed */
3772 failed++; /* ensure we don't loop */
3773 new_ptype = PLOTVAR_TIME;
3774 goto try_again;
3775 }
3776
3777 return x;
3778 }
3779
3780 /**
3781 * get_fit_or_resid:
3782 * @pmod: pointer to source model.
3783 * @dset: information on the data set.
3784 * @idx: %M_UHAT, %M_UHAT2, %M_YHAT, %M_AHAT or %M_H.
3785 * @vname: location to write series name (length %VNAMELEN)
3786 * @pdesc: location to receive copy of series description.
3787 * @err: location to receive error code.
3788 *
3789 * Creates a full-length array holding the specified model
3790 * data, and writes name and description into the @vname and
3791 * @vlabel.
3792 *
3793 * Returns: allocated array on success or NULL on failure.
3794 */
3795
get_fit_or_resid(const MODEL * pmod,DATASET * dset,ModelDataIndex idx,char * vname,gchar ** pdesc,int * err)3796 double *get_fit_or_resid (const MODEL *pmod, DATASET *dset,
3797 ModelDataIndex idx, char *vname,
3798 gchar **pdesc, int *err)
3799 {
3800 const double *src = NULL;
3801 double *ret = NULL;
3802 int t;
3803
3804 /* Note: this is called only from the GUI, and in that
3805 context @vname will be shown as the default name
3806 for the saved series, though the user can change
3807 it if she chooses.
3808 */
3809
3810 if (idx == M_H) {
3811 src = gretl_model_get_data(pmod, "garch_h");
3812 } else if (idx == M_AHAT) {
3813 src = gretl_model_get_data(pmod, "ahat");
3814 } else if (idx == M_UHAT || idx == M_UHAT2) {
3815 src = pmod->uhat;
3816 } else if (idx == M_YHAT) {
3817 src = pmod->yhat;
3818 }
3819
3820 if (src == NULL) {
3821 if (*err == 0) {
3822 *err = E_BADSTAT;
3823 }
3824 return NULL;
3825 }
3826
3827 ret = malloc(dset->n * sizeof *ret);
3828 if (ret == NULL) {
3829 *err = E_ALLOC;
3830 return NULL;
3831 }
3832
3833 for (t=0; t<dset->n; t++) {
3834 if (t >= pmod->t1 && t <= pmod->t2) {
3835 if (idx == M_UHAT2) {
3836 ret[t] = na(src[t]) ? NADBL : (src[t] * src[t]);
3837 } else {
3838 ret[t] = src[t];
3839 }
3840 } else {
3841 ret[t] = NADBL;
3842 }
3843 }
3844
3845 if (idx == M_UHAT) {
3846 sprintf(vname, "uhat%d", pmod->ID);
3847 if (pmod->ci == GARCH && (pmod->opt & OPT_Z)) {
3848 *pdesc = g_strdup_printf(_("standardized residual from model %d"), pmod->ID);
3849 } else {
3850 *pdesc = g_strdup_printf(_("residual from model %d"), pmod->ID);
3851 }
3852 } else if (idx == M_YHAT) {
3853 sprintf(vname, "yhat%d", pmod->ID);
3854 *pdesc = g_strdup_printf(_("fitted value from model %d"), pmod->ID);
3855 } else if (idx == M_UHAT2) {
3856 /* squared residuals */
3857 sprintf(vname, "usq%d", pmod->ID);
3858 if (pmod->ci == GARCH && (pmod->opt & OPT_Z)) {
3859 *pdesc = g_strdup_printf(_("squared standardized residual from model %d"), pmod->ID);
3860 } else {
3861 *pdesc = g_strdup_printf(_("squared residual from model %d"), pmod->ID);
3862 }
3863 } else if (idx == M_H) {
3864 /* garch variance */
3865 sprintf(vname, "h%d", pmod->ID);
3866 *pdesc = g_strdup_printf(_("fitted variance from model %d"), pmod->ID);
3867 } else if (idx == M_AHAT) {
3868 sprintf(vname, "ahat%d", pmod->ID);
3869 if (pmod->opt & OPT_U) {
3870 *pdesc = g_strdup_printf(_("individual effects from model %d"), pmod->ID);
3871 } else {
3872 *pdesc = g_strdup_printf(_("per-unit constants from model %d"), pmod->ID);
3873 }
3874 }
3875
3876 return ret;
3877 }
3878
3879 /**
3880 * genr_fit_resid:
3881 * @pmod: pointer to source model.
3882 * @dset: dataset struct.
3883 * @idx: %M_UHAT, %M_UHAT2, %M_YHAT, %M_AHAT or %M_H.
3884 *
3885 * Adds residuals or fitted values or squared residuals from a
3886 * given model to the data set.
3887 *
3888 * Returns: 0 on successful completion, error code on error.
3889 */
3890
genr_fit_resid(const MODEL * pmod,DATASET * dset,ModelDataIndex idx)3891 int genr_fit_resid (const MODEL *pmod, DATASET *dset,
3892 ModelDataIndex idx)
3893 {
3894 char vname[VNAMELEN];
3895 gchar *vlabel = NULL;
3896 double *x;
3897 int err = 0;
3898
3899 x = get_fit_or_resid(pmod, dset, idx, vname, &vlabel, &err);
3900
3901 if (!err) {
3902 err = dataset_add_allocated_series(dset, x);
3903 }
3904
3905 if (err) {
3906 free(x);
3907 } else {
3908 int v = dset->v - 1;
3909
3910 strcpy(dset->varname[v], vname);
3911 series_set_label(dset, v, vlabel);
3912 }
3913
3914 g_free(vlabel);
3915
3916 return err;
3917 }
3918
get_observation_number(const char * s,const DATASET * dset)3919 int get_observation_number (const char *s, const DATASET *dset)
3920 {
3921 char test[OBSLEN];
3922 size_t n;
3923 int t;
3924
3925 *test = 0;
3926 strncat(test, (*s == '"')? s + 1 : s, OBSLEN - 1);
3927
3928 n = strlen(test);
3929 if (test[n-1] == '"') {
3930 test[n-1] = '\0';
3931 }
3932
3933 if (dataset_has_markers(dset)) {
3934 for (t=0; t<dset->n; t++) {
3935 if (!strcmp(test, dset->S[t])) {
3936 return t + 1;
3937 }
3938 }
3939 if (calendar_data(dset)) {
3940 for (t=0; t<dset->n; t++) {
3941 if (!strcmp(test, dset->S[t]) ||
3942 !strcmp(test, dset->S[t] + 2)) {
3943 return t + 1;
3944 }
3945 }
3946 }
3947 }
3948
3949 if (dset->structure == TIME_SERIES) {
3950 t = dateton(test, dset);
3951 if (t >= 0) {
3952 return t + 1;
3953 }
3954 }
3955
3956 if (calendar_data(dset)) {
3957 char datestr[OBSLEN];
3958
3959 for (t=0; t<dset->n; t++) {
3960 calendar_date_string(datestr, t, dset);
3961 if (!strcmp(test, datestr) ||
3962 !strcmp(test, datestr + 2)) {
3963 return t + 1;
3964 }
3965 }
3966 }
3967
3968 return 0;
3969 }
3970
3971 #define OBS_DEBUG 0
3972
plain_obs_number(const char * obs,const DATASET * dset)3973 static int plain_obs_number (const char *obs, const DATASET *dset)
3974 {
3975 char *test;
3976 int t = -1;
3977
3978 errno = 0;
3979
3980 strtol(obs, &test, 10);
3981
3982 if (errno == 0 && *test == '\0') {
3983 t = atoi(obs) - 1; /* convert from 1-based to 0-based */
3984 if (t >= dset->n) {
3985 t = -1;
3986 }
3987 }
3988
3989 return t;
3990 }
3991
3992 /* Given what looks like an observation number or date within "[" and
3993 "]", try to determine the observation number. This is quite tricky
3994 since we try to handle both dates and plain observation numbers
3995 (and in addition, variables representing the latter); and we may
3996 have to deal with the translation from 1-based indexing in user
3997 space to 0-based indexing for internal purposes.
3998 */
3999
get_t_from_obs_string(const char * s,const DATASET * dset)4000 int get_t_from_obs_string (const char *s, const DATASET *dset)
4001 {
4002 int t;
4003
4004 if (*s == '"') {
4005 char obs[OBSLEN+2];
4006 int err = 0;
4007
4008 *obs = '\0';
4009 strncat(obs, s, OBSLEN+1);
4010 gretl_unquote(obs, &err);
4011 t = dateton(obs, dset);
4012 } else {
4013 t = dateton(s, dset);
4014 }
4015
4016 #if OBS_DEBUG
4017 fprintf(stderr, "\nget_t_from_obs_string: s ='%s', dateton gives t = %d\n",
4018 s, t);
4019 #endif
4020
4021 if (t < 0) {
4022 if (isdigit((unsigned char) *s)) {
4023 t = plain_obs_number(s, dset);
4024 #if OBS_DEBUG
4025 fprintf(stderr, " plain_obs_number gives t = %d\n", t);
4026 #endif
4027 } else {
4028 if (gretl_is_scalar(s)) {
4029 t = gretl_scalar_get_value(s, NULL);
4030 }
4031
4032 if (t > dset->n) {
4033 /* e.g. annual dates */
4034 char try[16];
4035
4036 sprintf(try, "%d", t);
4037 t = dateton(try, dset);
4038 #if OBS_DEBUG
4039 fprintf(stderr, " revised via dateton: t = %d\n", t);
4040 #endif
4041 } else {
4042 /* convert to 0-based */
4043 t--;
4044 }
4045 }
4046 }
4047
4048 if (t < 0) {
4049 gretl_errmsg_set(_("Observation number out of bounds"));
4050 }
4051
4052 #if OBS_DEBUG
4053 fprintf(stderr, " return value: t = %d\n", t);
4054 #endif
4055
4056 return t;
4057 }
4058
check_declarations(char *** pS,parser * p)4059 int check_declarations (char ***pS, parser *p)
4060 {
4061 char **S;
4062 const char *s;
4063 int exists = 0;
4064 int badname = 0;
4065 int i, n = 1;
4066
4067 gretl_error_clear();
4068
4069 if (p->lh.expr == NULL) {
4070 p->err = E_ALLOC;
4071 return 0;
4072 }
4073
4074 s = p->lh.expr;
4075 s += strspn(s, " ");
4076
4077 while (*s) {
4078 if (*s == ',' || *s == ' ') {
4079 n++;
4080 s++;
4081 s += strspn(s, " ");
4082 } else {
4083 s++;
4084 }
4085 }
4086
4087 S = strings_array_new(n);
4088 if (S == NULL) {
4089 p->err = E_ALLOC;
4090 return 0;
4091 }
4092
4093 s = p->lh.expr;
4094 for (i=0; i<n && !p->err; i++) {
4095 S[i] = gretl_word_strdup(s, &s, OPT_S | OPT_U, &p->err);
4096 }
4097
4098 if (!p->err && *s != '\0') {
4099 p->err = E_DATA;
4100 }
4101
4102 for (i=0; i<n && !p->err; i++) {
4103 if (gretl_type_from_name(S[i], p->dset)) {
4104 /* variable already exists */
4105 exists = 1;
4106 p->err = E_DATA;
4107 } else if (check_identifier(S[i])) {
4108 /* invalid name */
4109 badname = 1;
4110 p->err = E_DATA;
4111 }
4112 }
4113
4114 if (p->err) {
4115 if (exists) {
4116 gretl_errmsg_set(_("Invalid declaration: maybe you need "
4117 "the \"clear\" command?"));
4118 } else if (!badname) {
4119 gretl_errmsg_set(_("Invalid declaration"));
4120 }
4121 strings_array_free(S, n);
4122 } else {
4123 *pS = S;
4124 }
4125
4126 return n;
4127 }
4128
4129 /* cross-sectional (list-based) statistics */
4130
xsect_minmax(double * x,int n,int f)4131 static double xsect_minmax (double *x, int n, int f)
4132 {
4133 double ret = x[0];
4134 int i;
4135
4136 for (i=1; i<n; i++) {
4137 if (f == F_MIN) {
4138 if (x[i] < ret) {
4139 ret = x[i];
4140 }
4141 } else if (x[i] > ret) {
4142 ret = x[i];
4143 }
4144 }
4145
4146 return ret;
4147 }
4148
4149 /* Fill @targ with data (from @xlist) and associated
4150 weights (from @wlist) for the cross-section at obs @t.
4151 If @partial_ok is 0 (false) we return E_MISSDATA if
4152 any values or weights are missing at @t, otherwise
4153 we skip missing terms and return the number of valid
4154 terms in @pn.
4155 */
4156
data_and_weight_at_obs(double ** targ,int * pn,const int * xlist,const int * wlist,const DATASET * dset,int t,int partial_ok)4157 static int data_and_weight_at_obs (double **targ, int *pn,
4158 const int *xlist,
4159 const int *wlist,
4160 const DATASET *dset,
4161 int t, int partial_ok)
4162 {
4163 int i, j = 0;
4164 double xi, wi;
4165
4166 for (i=1; i<=xlist[0]; i++) {
4167 xi = dset->Z[xlist[i]][t];
4168 wi = dset->Z[wlist[i]][t];
4169 if (wi < 0) {
4170 return E_INVARG;
4171 } else if (wi == 0) {
4172 continue;
4173 } else if (wi > 0 && na(xi)) {
4174 if (!partial_ok) {
4175 return E_MISSDATA;
4176 }
4177 } else if (!na(xi) && !na(wi)) {
4178 targ[0][j] = xi;
4179 targ[1][j] = wi;
4180 j++;
4181 } else if (!partial_ok) {
4182 return E_MISSDATA;
4183 }
4184 }
4185
4186 *pn = j;
4187 return (j == 0)? E_MISSDATA : 0;
4188 }
4189
4190 /* Fill @targ with data (from @xlist) for the cross-
4191 section at obs @t. If @partial_ok is 0 (false) we
4192 return E_MISSDATA if any values are missing at @t,
4193 otherwise we skip missing terms and return the count
4194 of valid values in @pn.
4195 */
4196
data_at_obs(double * targ,int * pn,const int * list,const DATASET * dset,int t,int partial_ok)4197 static int data_at_obs (double *targ, int *pn,
4198 const int *list,
4199 const DATASET *dset,
4200 int t, int partial_ok)
4201 {
4202 int i, j = 0;
4203 double xi;
4204
4205 for (i=1; i<=list[0]; i++) {
4206 xi = dset->Z[list[i]][t];
4207 if (!na(xi)) {
4208 targ[j++] = xi;
4209 } else if (!partial_ok) {
4210 return E_MISSDATA;
4211 }
4212 }
4213
4214 *pn = j;
4215 return (j == 0)? E_MISSDATA : 0;
4216 }
4217
cross_sectional_stat(double * y,const int * list,const DATASET * dset,int f,int partial_ok)4218 int cross_sectional_stat (double *y, const int *list,
4219 const DATASET *dset, int f,
4220 int partial_ok)
4221 {
4222 double *x;
4223 int missing;
4224 int i, t, n;
4225
4226 /* to hold the cross section */
4227 x = malloc(list[0] * sizeof *x);
4228 if (x == NULL) {
4229 return E_ALLOC;
4230 }
4231
4232 for (t=dset->t1; t<=dset->t2; t++) {
4233 missing = data_at_obs(x, &n, list, dset, t, partial_ok);
4234 if (missing) {
4235 y[t] = NADBL;
4236 } else if ((f == F_VCE || f == F_SD) && n == 1) {
4237 y[t] = NADBL;
4238 } else if (f == F_MIN || f == F_MAX) {
4239 y[t] = xsect_minmax(x, n, f);
4240 } else if (f == F_MEDIAN) {
4241 y[t] = gretl_median(0, n-1, x);
4242 } else {
4243 double xsum = 0;
4244
4245 for (i=0; i<n; i++) {
4246 xsum += x[i];
4247 }
4248 if (f == F_SUM) {
4249 y[t] = xsum;
4250 } else if (f == F_MEAN) {
4251 y[t] = xsum / n;
4252 } else {
4253 /* F_VCE or F_SD */
4254 double d, s2 = 0, xbar = xsum / n;
4255
4256 for (i=0; i<n; i++) {
4257 d = x[i] - xbar;
4258 s2 += d * d;
4259 }
4260 if (f == F_VCE) {
4261 y[t] = s2 / (n-1);
4262 } else {
4263 y[t] = sqrt(s2 / (n-1));
4264 }
4265 }
4266 }
4267 }
4268
4269 free(x);
4270
4271 return 0;
4272 }
4273
x_sectional_weighted_stat(double * y,const int * xlist,const int * wlist,const DATASET * dset,int f,int partial_ok)4274 int x_sectional_weighted_stat (double *y, const int *xlist,
4275 const int *wlist,
4276 const DATASET *dset,
4277 int f, int partial_ok)
4278 {
4279 double wxbar, wsum, d, ws2, V2, adj;
4280 double *x, *w, **X;
4281 int i, t, n, missing;
4282 int err = 0;
4283
4284 if (wlist[0] != xlist[0]) {
4285 gretl_errmsg_sprintf("Weighted stats: data list has %d members but weight "
4286 "list has %d", xlist[0], wlist[0]);
4287 return E_DATA;
4288 }
4289
4290 /* storage for values and weights, per obs */
4291 X = doubles_array_new(2, xlist[0]);
4292 if (X == NULL) {
4293 return E_ALLOC;
4294 }
4295
4296 x = X[0];
4297 w = X[1];
4298
4299 for (t=dset->t1; t<=dset->t2; t++) {
4300 missing = data_and_weight_at_obs(X, &n, xlist, wlist,
4301 dset, t, partial_ok);
4302 if (missing == E_INVARG) {
4303 err = E_INVARG;
4304 break;
4305 } else if (missing) {
4306 y[t] = NADBL;
4307 } else if (f != F_WMEAN && n == 1) {
4308 y[t] = NADBL;
4309 } else {
4310 wxbar = wsum = 0;
4311 for (i=0; i<n; i++) {
4312 wxbar += x[i] * w[i];
4313 wsum += w[i];
4314 }
4315 wxbar /= wsum;
4316 if (f == F_WMEAN) {
4317 y[t] = wxbar;
4318 } else {
4319 ws2 = V2 = 0;
4320 for (i=0; i<n; i++) {
4321 d = (x[i] - wxbar);
4322 ws2 += w[i] * d * d;
4323 V2 += w[i] * w[i];
4324 }
4325 /* "frequency" weights */
4326 adj = (n-1) * wsum / n;
4327 /* for "reliability" weights? */
4328 // adj = wsum - V2 / wsum;
4329 if (f == F_WVAR) {
4330 y[t] = ws2 / adj;
4331 } else {
4332 y[t] = sqrt(ws2 / adj);
4333 }
4334 }
4335 }
4336 }
4337
4338 doubles_array_free(X, 2);
4339
4340 return err;
4341 }
4342
4343 /* writes to the series @y a linear combination of the variables given
4344 in @list, using the coefficients given in the vector @b.
4345 */
4346
list_linear_combo(double * y,const int * list,const gretl_vector * b,const DATASET * dset)4347 int list_linear_combo (double *y, const int *list,
4348 const gretl_vector *b,
4349 const DATASET *dset)
4350 {
4351 int nb = gretl_vector_get_length(b);
4352 int nl = list[0];
4353 int err = 0;
4354
4355 if (nb != nl) {
4356 gretl_errmsg_sprintf(_("List has %d members, but length "
4357 "of vector b is %d"), nl, nb);
4358 err = E_DATA;
4359 } else {
4360 int i, t;
4361 double xit, yt;
4362
4363 for (t=dset->t1; t<=dset->t2; t++) {
4364 yt = 0;
4365 for (i=0; i<nl; i++) {
4366 xit = dset->Z[list[i+1]][t];
4367 if (na(xit)) {
4368 yt = NADBL;
4369 break;
4370 } else {
4371 yt += xit * gretl_vector_get(b, i);
4372 }
4373 }
4374 y[t] = yt;
4375 }
4376 }
4377
4378 return err;
4379 }
4380
4381 #define MDEBUG 0
4382
4383 #if HAVE_GMP
4384
try_mp_midas_weights(const double * theta,int k,gretl_matrix * w,int method)4385 static int try_mp_midas_weights (const double *theta, int k,
4386 gretl_matrix *w, int method)
4387 {
4388 int (*mpfun) (const double *, int, gretl_matrix *, int);
4389 int err = 0;
4390
4391 mpfun = get_plugin_function("mp_midas_weights");
4392
4393 if (mpfun == NULL) {
4394 return E_FOPEN;
4395 }
4396
4397 err = (*mpfun) (theta, k, w, method);
4398
4399 if (!err) {
4400 int i, n = gretl_vector_get_length(w);
4401
4402 for (i=0; i<n; i++) {
4403 if (!isfinite(w->val[i])) {
4404 err = E_NAN;
4405 break;
4406 }
4407 }
4408 }
4409
4410 return err;
4411 }
4412
4413 #endif /* HAVE_GMP */
4414
4415 #define INF_CHECK_VERBOSE 0
4416
inf_check(double val,const gretl_matrix * theta,const char * context,int * err)4417 static int inf_check (double val, const gretl_matrix *theta,
4418 const char *context, int *err)
4419 {
4420 if (isinf(val)) {
4421 #if INF_CHECK_VERBOSE
4422 fprintf(stderr, "range error in %s\n", context);
4423 gretl_matrix_print(theta, "theta");
4424 #endif
4425 *err = E_NAN;
4426 } else {
4427 /* tolerate underflow */
4428 errno = 0;
4429 }
4430
4431 return *err;
4432 }
4433
4434 #define BETA_METHOD(m) (m==MIDAS_BETA0 || m==MIDAS_BETA1 || m==MIDAS_BETAN)
4435
check_beta_params(int method,double * theta,int k,double eps)4436 static int check_beta_params (int method, double *theta,
4437 int k, double eps)
4438 {
4439 int err = 0;
4440
4441 if ((method == MIDAS_BETA0 || method == MIDAS_BETA1) && k != 2) {
4442 gretl_errmsg_set("theta must be a 2-vector");
4443 err = E_INVARG;
4444 } else if (method == MIDAS_BETAN && k != 3) {
4445 gretl_errmsg_set("theta must be a 3-vector");
4446 err = E_INVARG;
4447 } else if (theta[0] < eps || theta[1] < eps) {
4448 if (theta[0] < 0.0 || theta[1] < 0.0) {
4449 gretl_errmsg_set("beta: parameters must be positive");
4450 fprintf(stderr, "beta: theta1=%g, theta2=%g\n", theta[0], theta[1]);
4451 err = E_INVARG;
4452 } else {
4453 /* bodge! */
4454 if (theta[0] < eps) theta[0] = eps;
4455 if (theta[1] < eps) theta[1] = eps;
4456 }
4457 }
4458
4459 return err;
4460 }
4461
4462 /* Computes a column m-vector holding weights for use with MIDAS */
4463
midas_weights(int p,const gretl_matrix * m,int method,int * err)4464 gretl_matrix *midas_weights (int p, const gretl_matrix *m,
4465 int method, int *err)
4466 {
4467 double *theta;
4468 double eps = pow(2.0, -52);
4469 double wsum = 0.0;
4470 gretl_matrix *w = NULL;
4471 int using_mp = 0;
4472 int k, i, j;
4473
4474 #if MDEBUG
4475 gretl_matrix_print(m, "m, in midas_weights");
4476 #endif
4477
4478 /* @p = lag order
4479 @k = number of hyper-parameters
4480 */
4481
4482 if (method < MIDAS_NEALMON || method >= MIDAS_MAX || p < 1) {
4483 *err = E_INVARG;
4484 return NULL;
4485 }
4486
4487 k = gretl_vector_get_length(m);
4488 if (k == 0) {
4489 *err = E_INVARG;
4490 return NULL;
4491 }
4492
4493 theta = m->val;
4494
4495 if (BETA_METHOD(method)) {
4496 *err = check_beta_params(method, theta, k, eps);
4497 if (*err) {
4498 return NULL;
4499 }
4500 }
4501
4502 w = gretl_zero_matrix_new(p, 1);
4503 if (w == NULL) {
4504 *err = E_ALLOC;
4505 return NULL;
4506 }
4507
4508 errno = 0;
4509
4510 if (method == MIDAS_NEALMON) {
4511 for (i=0; i<p; i++) {
4512 w->val[i] = (i+1) * theta[0];
4513 for (j=1; j<k; j++) {
4514 w->val[i] += pow(i+1, j+1) * theta[j];
4515 }
4516 w->val[i] = exp(w->val[i]);
4517 wsum += w->val[i];
4518 if (errno && inf_check(wsum, m, "nealmon weights", err)) {
4519 break;
4520 }
4521 }
4522 } else if (BETA_METHOD(method)) {
4523 double si, ai, bi;
4524
4525 for (i=0; i<p; i++) {
4526 si = i / (p - 1.0);
4527 if (i == 0) {
4528 si += eps;
4529 } else if (i == p-1) {
4530 si -= eps;
4531 }
4532 ai = pow(si, theta[0] - 1.0);
4533 bi = pow(1.0 - si, theta[1] - 1.0);
4534 w->val[i] = ai * bi;
4535 wsum += w->val[i];
4536 if (errno && inf_check(wsum, m, "beta weights", err)) {
4537 break;
4538 }
4539 }
4540 } else if (method == MIDAS_ALMONP) {
4541 /* straight Almon ploynomial */
4542 for (i=0; i<p; i++) {
4543 w->val[i] = theta[0];
4544 for (j=1; j<k; j++) {
4545 w->val[i] += pow(i+1, j) * theta[j];
4546 }
4547 }
4548 }
4549
4550 #if HAVE_GMP
4551 if (*err == E_NAN || (method != MIDAS_ALMONP && wsum < eps)) {
4552 /* attempt a fix-up using multiple precision */
4553 int save_errno = errno;
4554
4555 errno = 0;
4556 *err = try_mp_midas_weights(theta, k, w, method);
4557 if (*err) {
4558 errno = save_errno;
4559 } else {
4560 using_mp = 1;
4561 }
4562 }
4563 #endif
4564
4565 if (errno) {
4566 gretl_errmsg_sprintf("Failed to calculate MIDAS weights: %s",
4567 gretl_strerror(errno));
4568 if (*err == 0) {
4569 *err = E_INVARG;
4570 }
4571 gretl_matrix_free(w);
4572 errno = 0;
4573 return NULL;
4574 }
4575
4576 if (!using_mp && method != MIDAS_ALMONP) {
4577 /* normalize the weights */
4578 for (i=0; i<p; i++) {
4579 w->val[i] /= wsum;
4580 }
4581 }
4582
4583 if (!using_mp && method == MIDAS_BETAN) {
4584 /* beta with third param, not zero-terminated:
4585 add theta[2] and renormalize
4586 */
4587 wsum = 1 + p * theta[2];
4588 for (i=0; i<p; i++) {
4589 w->val[i] = (w->val[i] + theta[2]) / wsum;
4590 }
4591 }
4592
4593 #if MDEBUG
4594 gretl_matrix_print(w, "w: midas weights");
4595 #endif
4596
4597 return w;
4598 }
4599
4600 #if HAVE_GMP
4601
try_mp_midas_grad(const double * theta,gretl_matrix * G,int method)4602 static int try_mp_midas_grad (const double *theta,
4603 gretl_matrix *G,
4604 int method)
4605 {
4606 int (*mpfun) (const double *, gretl_matrix *, int);
4607 int err = 0;
4608
4609 mpfun = get_plugin_function("mp_midas_gradient");
4610
4611 if (mpfun == NULL) {
4612 return E_FOPEN;
4613 }
4614
4615 err = (*mpfun) (theta, G, method);
4616
4617 if (!err) {
4618 int i, n = G->rows * G->cols;
4619
4620 for (i=0; i<n; i++) {
4621 if (!isfinite(G->val[i])) {
4622 err = E_NAN;
4623 break;
4624 }
4625 }
4626 }
4627
4628 return err;
4629 }
4630
mgrad_zero(const gretl_matrix * G)4631 static int mgrad_zero (const gretl_matrix *G)
4632 {
4633 int i, j, colzero;
4634
4635 for (j=0; j<G->cols; j++) {
4636 colzero = 1;
4637 for (i=0; i<G->rows; i++) {
4638 if (gretl_matrix_get(G, i, j) != 0.0) {
4639 colzero = 0;
4640 break;
4641 }
4642 }
4643 if (colzero) {
4644 return 1;
4645 }
4646 }
4647
4648 return 0;
4649 }
4650
4651 #endif /* HAVE_GMP */
4652
midas_gradient(int p,const gretl_matrix * m,int method,int * err)4653 gretl_matrix *midas_gradient (int p, const gretl_matrix *m,
4654 int method, int *err)
4655 {
4656 double *theta;
4657 double eps = pow(2.0, -52);
4658 double ws2, wsum = 0.0;
4659 gretl_matrix *w = NULL;
4660 gretl_matrix *G = NULL;
4661 int k, i, j;
4662
4663 #if MDEBUG
4664 gretl_matrix_print(m, "m, in midas_gradient");
4665 #endif
4666
4667 if (method < MIDAS_NEALMON || method >= MIDAS_MAX || p < 1) {
4668 *err = E_INVARG;
4669 return NULL;
4670 }
4671
4672 /* @p = lag order
4673 @k = number of hyper-parameters
4674 */
4675
4676 k = gretl_vector_get_length(m);
4677 if (k == 0) {
4678 *err = E_INVARG;
4679 return NULL;
4680 }
4681
4682 theta = m->val;
4683 errno = 0;
4684
4685 if (BETA_METHOD(method)) {
4686 *err = check_beta_params(method, theta, k, eps);
4687 if (*err) {
4688 return NULL;
4689 }
4690 }
4691
4692 if (method != MIDAS_ALMONP) {
4693 w = gretl_zero_matrix_new(p, 1);
4694 if (w == NULL) {
4695 *err = E_ALLOC;
4696 return NULL;
4697 }
4698 }
4699
4700 G = gretl_matrix_alloc(p, k);
4701 if (G == NULL) {
4702 gretl_matrix_free(w);
4703 *err = E_ALLOC;
4704 return NULL;
4705 }
4706
4707 if (method == MIDAS_NEALMON) {
4708 double *dsum = malloc(k * sizeof *dsum);
4709 double gij;
4710
4711 for (j=0; j<k; j++) {
4712 dsum[j] = 0.0;
4713 }
4714 for (i=0; i<p; i++) {
4715 w->val[i] = (i+1) * theta[0];
4716 for (j=1; j<k; j++) {
4717 w->val[i] += pow(i+1, j+1) * theta[j];
4718 }
4719 w->val[i] = exp(w->val[i]);
4720 wsum += w->val[i];
4721 if (errno && inf_check(wsum, m, "nealmon gradient", err)) {
4722 free(dsum);
4723 goto range_error;
4724 }
4725 }
4726 for (i=0; i<p; i++) {
4727 for (j=0; j<k; j++) {
4728 dsum[j] += w->val[i] * pow(i+1, j+1);
4729 }
4730 }
4731 for (j=0; j<k; j++) {
4732 dsum[j] /= wsum;
4733 }
4734 for (i=0; i<p; i++) {
4735 w->val[i] /= wsum;
4736 for (j=0; j<k; j++) {
4737 gij = w->val[i] * (pow(i+1, j+1) - dsum[j]);
4738 gretl_matrix_set(G, i, j, gij);
4739 }
4740 }
4741 free(dsum);
4742 } else if (BETA_METHOD(method)) {
4743 double si, ai, bi;
4744 double g1sum = 0;
4745 double g2sum = 0;
4746
4747 /* loop 1: form raw weights and their sum */
4748 for (i=0; i<p; i++) {
4749 si = i / (p - 1.0);
4750 if (i == 0) {
4751 si += eps;
4752 } else if (i == p - 1) {
4753 si -= eps;
4754 }
4755 ai = pow(si, theta[0] - 1.0);
4756 bi = pow(1.0 - si, theta[1] - 1.0);
4757 w->val[i] = ai * bi;
4758 wsum += w->val[i];
4759 if (errno && inf_check(wsum, m, "beta gradient", err)) {
4760 goto range_error;
4761 }
4762 }
4763 if (wsum <= eps) {
4764 /* should we just set G to zero in this case? */
4765 #if 0
4766 fprintf(stderr, "sum of weights = %g\n", wsum);
4767 #endif
4768 *err = E_NAN;
4769 goto range_error;
4770 }
4771 ws2 = wsum * wsum;
4772 if (errno && inf_check(ws2, m, "beta gradient", err)) {
4773 goto range_error;
4774 }
4775 /* loop 2: form first component of derivative
4776 and second cumulant */
4777 for (i=0; i<p; i++) {
4778 si = i / (p - 1.0);
4779 if (i == 0) {
4780 si += eps;
4781 } else if (i == p - 1) {
4782 si -= eps;
4783 }
4784 ai = w->val[i] * log(si);
4785 g1sum += ai;
4786 gretl_matrix_set(G, i, 0, ai/wsum);
4787 bi = w->val[i] * log(1-si);
4788 g2sum += bi;
4789 gretl_matrix_set(G, i, 1, bi/wsum);
4790 }
4791 /* loop 3: form second component and subtract */
4792 for (i=0; i<p; i++) {
4793 ai = gretl_matrix_get(G, i, 0);
4794 ai -= w->val[i] * g1sum/ws2;
4795 gretl_matrix_set(G, i, 0, ai);
4796 bi = gretl_matrix_get(G, i, 1);
4797 bi -= w->val[i] * g2sum/ws2;
4798 gretl_matrix_set(G, i, 1, bi);
4799 }
4800 if (k == 3) {
4801 /* not zero-terminated */
4802 double c3 = theta[2];
4803 double m3 = 1 / (1 + p * c3);
4804 double g;
4805
4806 for (i=0; i<2*p; i++) {
4807 /* scale the first two columns */
4808 G->val[i] *= m3;
4809 }
4810 for (i=0; i<p; i++) {
4811 /* compute the third-col derivative */
4812 g = 1 - p * w->val[i]/wsum;
4813 gretl_matrix_set(G, i, 2, m3 * m3 * g);
4814 }
4815 }
4816 } else if (method == MIDAS_ALMONP) {
4817 /* straight Almon polynomial */
4818 for (i=0; i<p; i++) {
4819 gretl_matrix_set(G, i, 0, 1.0);
4820 for (j=1; j<k; j++) {
4821 gretl_matrix_set(G, i, j, pow(i + 1.0, j));
4822 }
4823 }
4824 }
4825
4826 range_error:
4827
4828 #if HAVE_GMP
4829 if (*err == E_NAN || mgrad_zero(G)) {
4830 /* attempt a fix-up using multiple precision */
4831 int save_errno = errno;
4832
4833 errno = 0;
4834 *err = try_mp_midas_grad(theta, G, method);
4835 if (*err) {
4836 errno = save_errno;
4837 }
4838 }
4839 #endif
4840
4841 gretl_matrix_free(w);
4842
4843 if (errno) {
4844 gretl_errmsg_sprintf("Failed to calculate MIDAS gradient: %s",
4845 gretl_strerror(errno));
4846 if (*err == 0) {
4847 *err = E_INVARG;
4848 }
4849 gretl_matrix_free(G);
4850 G = NULL;
4851 errno = 0;
4852 }
4853
4854 #if MDEBUG
4855 gretl_matrix_print(G, "G: midas gradient");
4856 #endif
4857
4858 return G;
4859 }
4860
4861 /* Retrieve from a midasreg $model bundle all the info
4862 needed to construct the array of per-lag multipliers
4863 and their standard errors.
4864 */
4865
process_midas_bundle(gretl_bundle * mb,int idx,gretl_matrix ** ptheta,gretl_matrix ** pV,int * pmtype,int * ph,int * pminlag,const char ** plname)4866 static int process_midas_bundle (gretl_bundle *mb, int idx,
4867 gretl_matrix **ptheta,
4868 gretl_matrix **pV,
4869 int *pmtype, int *ph,
4870 int *pminlag,
4871 const char **plname)
4872 {
4873 gretl_array *mt;
4874 gretl_matrix *b, *vcv;
4875 gretl_matrix *theta = NULL;
4876 gretl_matrix *V = NULL;
4877 int *xlist = NULL;
4878 int mtype, np;
4879 int i0, i1;
4880 int i, j, k, nm;
4881 int err = 0;
4882
4883 mt = gretl_bundle_get_array(mb, "midas_info", &err);
4884 if (!err) {
4885 xlist = gretl_bundle_get_list(mb, "xlist", &err);
4886 }
4887 if (!err) {
4888 b = gretl_bundle_get_matrix(mb, "coeff", &err);
4889 }
4890 if (!err) {
4891 vcv = gretl_bundle_get_matrix(mb, "vcv", &err);
4892 }
4893 if (err) {
4894 return err;
4895 }
4896
4897 i0 = xlist[0];
4898 nm = gretl_array_get_length(mt);
4899 if (idx < 1 || idx > nm) {
4900 gretl_errmsg_set("Invalid MIDAS term index");
4901 err = E_DATA;
4902 } else {
4903 idx--; /* convert index to 0-based */
4904 }
4905
4906 for (i=0; i<=idx && !err; i++) {
4907 gretl_bundle *mti;
4908 int minlag, maxlag;
4909 int nl, leader = 1;
4910
4911 mti = gretl_array_get_data(mt, i);
4912 minlag = gretl_bundle_get_int(mti, "minlag", &err);
4913 maxlag = gretl_bundle_get_int(mti, "maxlag", &err);
4914 mtype = gretl_bundle_get_int(mti, "type", &err);
4915 np = gretl_bundle_get_int(mti, "nparm", &err);
4916 if (err) {
4917 break;
4918 }
4919
4920 if (mtype == MIDAS_U || mtype == MIDAS_ALMONP) {
4921 leader = 0;
4922 }
4923 nl = maxlag - minlag + 1;
4924 i1 = i0 + np - 1 + leader;
4925 #if 1
4926 fprintf(stderr, "midas term %d, type %d, np %d, nlags %d\n",
4927 i, mtype, np, nl);
4928 #endif
4929 if (i == idx) {
4930 double vjk;
4931 int r = i1 - i0 + 1;
4932 int jj;
4933
4934 theta = gretl_matrix_alloc(r, 1);
4935 V = gretl_matrix_alloc(r, r);
4936 for (j=0; j<r; j++) {
4937 jj = j + i0;
4938 theta->val[j] = b->val[jj];
4939 for (k=0; k<r; k++) {
4940 vjk = gretl_matrix_get(vcv, jj, k+i0);
4941 gretl_matrix_set(V, j, k, vjk);
4942 }
4943 }
4944 *pmtype = mtype;
4945 *ph = nl;
4946 *pminlag = minlag;
4947 *plname = gretl_bundle_get_string(mti, "lname", NULL);
4948 }
4949 i0 = i1 + 1;
4950 }
4951
4952 if (err) {
4953 gretl_matrix_free(theta);
4954 gretl_matrix_free(V);
4955 } else {
4956 *ptheta = theta;
4957 *pV = V;
4958 }
4959
4960 return err;
4961 }
4962
midas_multipliers(gretl_bundle * mb,int cumulate,int idx,int * err)4963 gretl_matrix *midas_multipliers (gretl_bundle *mb, int cumulate,
4964 int idx, int *err)
4965 {
4966 gretl_matrix *ret = NULL;
4967 gretl_matrix *theta = NULL;
4968 gretl_matrix *mult = NULL;
4969 gretl_matrix *V = NULL;
4970 gretl_matrix *J = NULL;
4971 gretl_matrix *vcv = NULL;
4972 const char *lname = NULL;
4973 int minlag = 0;
4974 int mtype = 0, h = 0;
4975 int i, k;
4976
4977 *err = process_midas_bundle(mb, idx, &theta, &V, &mtype,
4978 &h, &minlag, &lname);
4979 if (*err) {
4980 gretl_errmsg_set("Not a valid midasreg bundle");
4981 return NULL;
4982 }
4983
4984 k = theta->rows;
4985
4986 if (mtype == MIDAS_U) {
4987 mult = gretl_matrix_copy(theta);
4988 J = gretl_zero_matrix_new(h, k);
4989 gretl_matrix_inscribe_I(J, 0, 0, MIN(k,h));
4990 } else if (mtype == MIDAS_ALMONP) {
4991 mult = midas_weights(h, theta, mtype, err);
4992 J = midas_gradient(h, theta, mtype, err);
4993 } else {
4994 gretl_matrix *tmp1, *tmp2;
4995 double mg, th0 = theta->val[0];
4996 int j;
4997
4998 tmp1 = gretl_matrix_alloc(k-1, 1);
4999 for (i=1; i<k; i++) {
5000 tmp1->val[i-1] = theta->val[i];
5001 }
5002 mult = midas_weights(h, tmp1, mtype, err);
5003 tmp2 = midas_gradient(h, tmp1, mtype, err);
5004 J = gretl_matrix_alloc(h, k);
5005 for (i=0; i<h; i++) {
5006 gretl_matrix_set(J, i, 0, mult->val[i]);
5007 for (j=1; j<k; j++) {
5008 mg = gretl_matrix_get(tmp2, i, j-1);
5009 gretl_matrix_set(J, i, j, th0*mg);
5010 }
5011 }
5012 gretl_matrix_multiply_by_scalar(mult, th0);
5013 gretl_matrix_free(tmp1);
5014 gretl_matrix_free(tmp2);
5015 }
5016
5017 if (*err || mult == NULL || J == NULL) {
5018 if (!*err) {
5019 *err = E_ALLOC;
5020 }
5021 return NULL;
5022 }
5023
5024 vcv = gretl_matrix_alloc(J->rows, J->rows);
5025 if (vcv == NULL) {
5026 *err = E_ALLOC;
5027 } else {
5028 *err = gretl_matrix_qform(J, GRETL_MOD_NONE,
5029 V, vcv, GRETL_MOD_NONE);
5030 }
5031
5032 if (!*err && cumulate) {
5033 gretl_matrix *tmp;
5034
5035 for (i=1; i<h; i++) {
5036 mult->val[i] += mult->val[i-1];
5037 }
5038 gretl_matrix_free(J);
5039 J = gretl_unit_matrix_new(h, h);
5040 gretl_matrix_set_triangle(J, NULL, 0, 1);
5041 tmp = gretl_matrix_alloc(h, h);
5042 *err = gretl_matrix_qform(J, GRETL_MOD_NONE,
5043 vcv, tmp, GRETL_MOD_NONE);
5044 gretl_matrix_free(vcv);
5045 vcv = tmp;
5046 }
5047
5048 if (!*err) {
5049 double vi;
5050
5051 ret = gretl_matrix_alloc(h, 2);
5052 for (i=0; i<h; i++) {
5053 gretl_matrix_set(ret, i, 0, mult->val[i]);
5054 vi = gretl_matrix_get(vcv, i, i);
5055 gretl_matrix_set(ret, i, 1, sqrt(vi));
5056 }
5057 }
5058
5059 if (ret != NULL) {
5060 char **S = strings_array_new(h);
5061 char rname[32];
5062
5063 if (S != NULL) {
5064 for (i=0; i<h; i++) {
5065 if (lname != NULL) {
5066 if (cumulate) {
5067 sprintf(rname, "c%s_%d", lname, i + minlag);
5068 } else {
5069 sprintf(rname, "%s_%d", lname, i + minlag);
5070 }
5071 } else {
5072 sprintf(rname, "%s_%d", cumulate ? "cmult" : "mult",
5073 i + minlag);
5074 }
5075 S[i] = gretl_strdup(rname);
5076 }
5077 gretl_matrix_set_rownames(ret, S);
5078 }
5079 }
5080
5081 gretl_matrix_free(theta);
5082 gretl_matrix_free(V);
5083 gretl_matrix_free(mult);
5084 gretl_matrix_free(J);
5085 gretl_matrix_free(vcv);
5086
5087 return ret;
5088 }
5089
midas_linear_combo(double * y,const int * list,const gretl_matrix * theta,int method,const DATASET * dset)5090 int midas_linear_combo (double *y, const int *list,
5091 const gretl_matrix *theta,
5092 int method, const DATASET *dset)
5093 {
5094 gretl_matrix *w = NULL;
5095 int m = list[0];
5096 int err = 0;
5097
5098 w = midas_weights(m, theta, method, &err);
5099
5100 if (BETA_METHOD(method) && !err && w == NULL) {
5101 int t;
5102
5103 for (t=dset->t1; t<=dset->t2; t++) {
5104 y[t] = NADBL;
5105 }
5106 return 0;
5107 }
5108
5109 if (!err) {
5110 err = list_linear_combo(y, list, w, dset);
5111 }
5112
5113 gretl_matrix_free(w);
5114
5115 return err;
5116 }
5117
vector_to_midas_list(const gretl_matrix * v,int f_ratio,const char * prefix,DATASET * dset,int * err)5118 int *vector_to_midas_list (const gretl_matrix *v,
5119 int f_ratio,
5120 const char *prefix,
5121 DATASET *dset,
5122 int *err)
5123 {
5124 char vname[VNAMELEN];
5125 int *list = NULL;
5126 int origv = dset->v;
5127 int i;
5128
5129 /* double-check to avoid crashing below */
5130 if (gretl_vector_get_length(v) != sample_size(dset) * f_ratio) {
5131 *err = E_DATA;
5132 return NULL;
5133 }
5134
5135 /* check names for collisions first */
5136 for (i=0; i<f_ratio && !*err; i++) {
5137 sprintf(vname, "%s%d", prefix, i+1);
5138 if (current_series_index(dset, vname) >= 1 ||
5139 get_user_var_by_name(vname) != NULL) {
5140 gretl_errmsg_set("The constructed series names would "
5141 "collide with those of existing objects");
5142 *err = E_INVARG;
5143 }
5144 }
5145
5146 if (!*err) {
5147 /* try adding the required number of series */
5148 *err = dataset_add_series(dset, f_ratio);
5149 if (!*err) {
5150 list = gretl_list_new(f_ratio);
5151 if (list == NULL) {
5152 *err = E_ALLOC;
5153 }
5154 }
5155 }
5156
5157 if (!*err) {
5158 /* actually construct the series */
5159 char label[MAXLABEL];
5160 int pos, pos0 = f_ratio - 1;
5161 int t, k = origv;
5162
5163 for (i=0; i<f_ratio; i++) {
5164 sprintf(dset->varname[k], "%s%d", prefix, f_ratio - i);
5165 sprintf(label, "%s in sub-period %d", prefix, f_ratio - i);
5166 series_record_label(dset, k, label);
5167 list[i+1] = k;
5168 k++;
5169 }
5170
5171 for (t=dset->t1; t<=dset->t2; t++) {
5172 pos = pos0;
5173 for (k=origv; k<dset->v; k++) {
5174 dset->Z[k][t] = v->val[pos--];
5175 }
5176 pos0 += f_ratio;
5177 }
5178
5179 gretl_list_set_midas(list, dset);
5180 }
5181
5182 return list;
5183 }
5184
5185 /* Imhof: draws on the RATS code in IMHOF.SRC from Estima, 2004.
5186
5187 Imhof Procedure for computing P(u'Au < x) for a quadratic form in
5188 Normal(0,1) variables. This can be used for ratios of quadratic
5189 forms as well, since P((u'Au/u'Bu) < x) = P(u'(A-xB)u < 0).
5190
5191 In the Durbin-Watson context, 'B' is the identity matrix and
5192 'x' is the Durbin-Watson statistic.
5193
5194 References:
5195
5196 Imhof, J.P (1961), Computing the Distribution of Quadratic Forms of
5197 Normal Variables, Biometrika 48, 419-426.
5198
5199 Abrahamse, A.P.J and Koerts, J. (1969), On the Theory and
5200 Application of the General Linear Model, Rotterdam University
5201 Press.
5202 */
5203
imhof_bound(const double * lambda,int k,int * err)5204 static double imhof_bound (const double *lambda, int k, int *err)
5205 {
5206 double e1 = 0.0001; /* Max truncation error due to finite
5207 upper bound on domain */
5208 double e2 = 0.0001; /* Cutoff for deciding whether an
5209 eigenvalue is effectively zero */
5210 double absl, bound;
5211 double nl = 0.0, sum = 0.0;
5212 int i;
5213
5214 for (i=0; i<k; i++) {
5215 absl = fabs(lambda[i]);
5216 if (absl > e2) {
5217 nl += 1.0;
5218 sum += log(absl);
5219 }
5220 }
5221
5222 if (nl == 0.0) {
5223 fprintf(stderr, "imhof_bound: got no non-zero eigenvalues\n");
5224 *err = E_DATA;
5225 return NADBL;
5226 }
5227
5228 /* The key factor in the integrand is the product of
5229 (1+(lambda(i)*x)^2)^(1/4) across i. Since, for those
5230 factors with small |lambda(i)|, this won't go to zero very
5231 quickly, we count only the terms on the bigger eigenvalues.
5232 */
5233
5234 nl *= 0.5;
5235 sum = 0.5 * sum + log(M_PI * nl);
5236 bound = exp(-(sum + log(e1)) / nl);
5237 bound += 5.0 / nl;
5238
5239 if (bound < 0) {
5240 fprintf(stderr, "imhof_bound: got negative result\n");
5241 *err = E_DATA;
5242 bound = NADBL;
5243 }
5244
5245 return bound;
5246 }
5247
vecsum(const double * x,int k)5248 static double vecsum (const double *x, int k)
5249 {
5250 double sum = 0.0;
5251 int i;
5252
5253 for (i=0; i<k; i++) {
5254 sum += x[i];
5255 }
5256
5257 return sum;
5258 }
5259
imhof_f(double u,const double * lambda,int k,double arg)5260 static double imhof_f (double u, const double *lambda, int k, double arg)
5261 {
5262 double ul, rho = 0.0;
5263 double theta = -u * arg;
5264 int i;
5265
5266 /* The value at zero isn't directly computable as
5267 it produces 0/0. The limit is computed below.
5268 */
5269 if (u == 0.0) {
5270 return 0.5 * (-arg + vecsum(lambda, k));
5271 }
5272
5273 for (i=0; i<k; i++) {
5274 ul = u * lambda[i];
5275 theta += atan(ul);
5276 rho += log(1.0 + ul * ul);
5277 }
5278
5279 return sin(0.5 * theta) / (u * exp(0.25 * rho));
5280 }
5281
5282 #define gridlimit 2048
5283
5284 /*
5285 Adaptation of Abrahamse and Koert's Pascal code. Evaluates the
5286 integral by Simpson's rule with grid size halving each time until
5287 the change from moving to a tighter grid is negligible. By halving
5288 the grid, only the odd terms need to be computed, as the old ones
5289 are already in the sum. Points entering get x 4 weights, which then
5290 get reduced to x 2 on the next iteration.
5291 */
5292
imhof_integral(double arg,const double * lambda,int k,double bound,int * err)5293 static double imhof_integral (double arg, const double *lambda, int k,
5294 double bound, int *err)
5295 {
5296 double e3 = 0.0001;
5297 double base, step, sum1;
5298 double int0 = 0.0, int1 = 0.0;
5299 double eps4 = 3.0 * M_PI * e3;
5300 double sum4 = 0.0;
5301 double ret = NADBL;
5302 int j, n = 2;
5303
5304 base = imhof_f(0, lambda, k, arg);
5305 base += imhof_f(bound, lambda, k, arg);
5306
5307 while (n < gridlimit) {
5308 step = bound / n;
5309 sum1 = base + sum4 * 2.0;
5310 base = sum1;
5311 sum4 = 0.0;
5312 for (j=1; j<=n; j+=2) {
5313 sum4 += imhof_f(j * step, lambda, k, arg);
5314 }
5315 int1 = (sum1 + 4 * sum4) * step;
5316 if (n > 8 && fabs(int1 - int0) < eps4) {
5317 break;
5318 }
5319 int0 = int1;
5320 n *= 2;
5321 }
5322
5323 if (n > gridlimit) {
5324 fprintf(stderr, "n = %d, Imhof integral failed to converge\n", n);
5325 *err = E_NOCONV;
5326 } else {
5327 ret = 0.5 - int1 / (3.0 * M_PI);
5328 if (ret < 0) {
5329 fprintf(stderr, "n = %d, Imhof integral gave negative value %g\n", n, ret);
5330 }
5331 }
5332
5333 return ret;
5334 }
5335
imhof_get_eigenvals(const gretl_matrix * m,double ** plam,int * pk)5336 static int imhof_get_eigenvals (const gretl_matrix *m,
5337 double **plam, int *pk)
5338 {
5339 gretl_matrix *E;
5340 int err = 0;
5341
5342 E = gretl_general_matrix_eigenvals(m, &err);
5343
5344 if (!err) {
5345 *pk = E->rows;
5346 *plam = gretl_matrix_steal_data(E);
5347 }
5348
5349 gretl_matrix_free(E);
5350
5351 return err;
5352 }
5353
5354 /* Implements the "imhof" function in genr: computes the probability
5355 P(u'Au < arg) for a quadratic form in Normal(0,1) variables. The
5356 argument @m may be either the square matrix A or a vector
5357 containing the precomputed eigenvalues of A.
5358 */
5359
imhof(const gretl_matrix * m,double arg,int * err)5360 double imhof (const gretl_matrix *m, double arg, int *err)
5361 {
5362 double *lambda = NULL;
5363 double bound, ret = NADBL;
5364 int k = 0, free_lambda = 0;
5365
5366 errno = 0;
5367
5368 if (m->cols == 1 || m->rows == 1) {
5369 /* we'll assume m is a vector of eigenvalues */
5370 lambda = m->val;
5371 k = gretl_vector_get_length(m);
5372 } else if (m->rows == m->cols) {
5373 /* we'll assume m is the 'A' matrix */
5374 *err = imhof_get_eigenvals(m, &lambda, &k);
5375 free_lambda = 1;
5376 } else {
5377 /* huh? */
5378 *err = E_DATA;
5379 }
5380
5381 if (!*err) {
5382 bound = imhof_bound(lambda, k, err);
5383 }
5384
5385 if (!*err) {
5386 ret = imhof_integral(arg, lambda, k, bound, err);
5387 }
5388
5389 if (errno != 0) {
5390 fprintf(stderr, "imhof: %s\n", gretl_strerror(errno));
5391 if (!*err) {
5392 *err = E_NOCONV;
5393 }
5394 ret = NADBL;
5395 errno = 0;
5396 } else if (!*err && ret < 0 && ret > -1.0e-14) {
5397 /* just approximation error? */
5398 ret = 0;
5399 }
5400
5401 if (free_lambda) {
5402 free(lambda);
5403 }
5404
5405 return ret;
5406 }
5407
5408 /* Implements the $dwpval accessor: given the residual vector
5409 @u and the matrix of regressors, @X, calculates the Durbin-Watson
5410 statistic then finds its p-value via the Imhof/Koerts/Abrahamse
5411 procedure. If @pDW is non-NULL, the Durbin-Watson statstic is
5412 written to that location.
5413 */
5414
dw_pval(const gretl_matrix * u,const gretl_matrix * X,double * pDW,int * perr)5415 double dw_pval (const gretl_matrix *u, const gretl_matrix *X,
5416 double *pDW, int *perr)
5417 {
5418 gretl_matrix *M = NULL;
5419 gretl_matrix *A = NULL;
5420 gretl_matrix *MA = NULL;
5421 gretl_matrix *XX = NULL;
5422 gretl_matrix *E = NULL;
5423 double uu, DW = 0;
5424 double pv = NADBL;
5425 int k = X->cols;
5426 int n = X->rows;
5427 int i, err = 0;
5428
5429 M = gretl_identity_matrix_new(n);
5430 A = gretl_DW_matrix_new(n);
5431 MA = gretl_matrix_alloc(n, n);
5432 XX = gretl_matrix_alloc(k, k);
5433
5434 if (M == NULL || A == NULL || MA == NULL || XX == NULL) {
5435 err = E_ALLOC;
5436 goto bailout;
5437 }
5438
5439 gretl_matrix_multiply_mod(X, GRETL_MOD_TRANSPOSE,
5440 X, GRETL_MOD_NONE,
5441 XX, GRETL_MOD_NONE);
5442 err = gretl_invert_symmetric_matrix(XX);
5443
5444 if (!err) {
5445 /* M = I - X(X'X)^{-1}X' */
5446 err = gretl_matrix_qform(X, GRETL_MOD_NONE,
5447 XX, M, GRETL_MOD_DECREMENT);
5448 }
5449 if (!err) {
5450 err = gretl_matrix_multiply(M, A, MA);
5451 }
5452 if (!err) {
5453 uu = gretl_matrix_dot_product(u, GRETL_MOD_TRANSPOSE,
5454 u, GRETL_MOD_NONE,
5455 &err);
5456 }
5457 if (!err) {
5458 DW = gretl_scalar_qform(u, A, &err);
5459 }
5460 if (!err) {
5461 DW /= uu;
5462 #if 0
5463 fprintf(stderr, "DW = %g\n", DW);
5464 #endif
5465 E = gretl_general_matrix_eigenvals(MA, &err);
5466 }
5467
5468 if (!err) {
5469 k = n - k;
5470 for (i=0; i<k; i++) {
5471 E->val[i] -= DW;
5472 }
5473 gretl_matrix_reuse(E, k, 1);
5474 pv = imhof(E, 0.0, &err);
5475 if (!err && pDW != NULL) {
5476 *pDW = DW;
5477 }
5478 }
5479
5480 bailout:
5481
5482 gretl_matrix_free(M);
5483 gretl_matrix_free(A);
5484 gretl_matrix_free(MA);
5485 gretl_matrix_free(XX);
5486 gretl_matrix_free(E);
5487
5488 *perr = err;
5489
5490 return pv;
5491 }
5492
5493 /* create a matrix containing ACF and PACF values for each
5494 column of the input matrix, @m, with lag order @p.
5495 */
5496
multi_acf(const gretl_matrix * m,const int * list,const DATASET * dset,int p,int * err)5497 gretl_matrix *multi_acf (const gretl_matrix *m,
5498 const int *list,
5499 const DATASET *dset,
5500 int p, int *err)
5501 {
5502 gretl_matrix *a, *A = NULL;
5503 const double *x;
5504 double xa;
5505 int nv, T, acol, pcol;
5506 int i, j;
5507
5508 if (list == NULL && gretl_is_null_matrix(m)) {
5509 *err = E_DATA;
5510 return NULL;
5511 }
5512
5513 if (m != NULL) {
5514 nv = m->cols;
5515 } else {
5516 nv = list[0];
5517 }
5518
5519 A = gretl_matrix_alloc(p, 2 * nv);
5520 if (A == NULL) {
5521 *err = E_ALLOC;
5522 return NULL;
5523 }
5524
5525 if (m != NULL) {
5526 x = m->val;
5527 T = m->rows;
5528 } else {
5529 x = dset->Z[list[1]] + dset->t1;
5530 T = sample_size(dset);
5531 }
5532
5533 acol = 0;
5534 pcol = nv;
5535
5536 for (j=0; j<nv; j++) {
5537 /* get ACF/PACF for column/series */
5538 a = acf_matrix(x, p, NULL, T, err);
5539 if (*err) {
5540 gretl_matrix_free(a);
5541 gretl_matrix_free(A);
5542 return NULL;
5543 }
5544
5545 /* transcribe into A matrix and free */
5546 for (i=0; i<p; i++) {
5547 xa = gretl_matrix_get(a, i, 0);
5548 gretl_matrix_set(A, i, acol, xa);
5549 xa = gretl_matrix_get(a, i, 1);
5550 gretl_matrix_set(A, i, pcol, xa);
5551 }
5552 acol++;
5553 pcol++;
5554
5555 gretl_matrix_free(a);
5556
5557 /* move to next data read position */
5558 if (j < nv - 1) {
5559 if (m != NULL) {
5560 x += m->rows;
5561 } else {
5562 x = dset->Z[list[j+2]] + dset->t1;
5563 }
5564 }
5565 }
5566
5567 return A;
5568 }
5569
multi_xcf(const void * px,int xtype,const void * py,int ytype,const DATASET * dset,int p,int * err)5570 gretl_matrix *multi_xcf (const void *px, int xtype,
5571 const void *py, int ytype,
5572 const DATASET *dset,
5573 int p, int *err)
5574 {
5575 const int *xlist = NULL;
5576 const gretl_matrix *Xmat = NULL;
5577 const double *xvec = NULL;
5578 const double *yvec = NULL;
5579 gretl_matrix *xj, *XCF = NULL;
5580 int T = sample_size(dset);
5581 int np = 2 * p + 1;
5582 int Ty, nx = 1;
5583 int i, j;
5584
5585 if (xtype == LIST) {
5586 xlist = px;
5587 nx = xlist[0];
5588 if (nx < 1) {
5589 *err = E_DATA;
5590 return NULL;
5591 }
5592 xvec = dset->Z[xlist[1]] + dset->t1;
5593 } else if (xtype == MAT) {
5594 Xmat = px;
5595 if (gretl_is_null_matrix(Xmat)) {
5596 *err = E_DATA;
5597 return NULL;
5598 }
5599 nx = Xmat->cols;
5600 T = Xmat->rows;
5601 xvec = Xmat->val;
5602 } else {
5603 /* VEC: note: px is of type *void */
5604 xvec = px;
5605 xvec += dset->t1;
5606 }
5607
5608 if (ytype == MAT) {
5609 const gretl_matrix *ymat = py;
5610
5611 if (ymat->cols != 1) {
5612 *err = E_NONCONF;
5613 return NULL;
5614 }
5615 yvec = ymat->val;
5616 Ty = ymat->rows;
5617 } else {
5618 yvec = (const double *) py + dset->t1;
5619 Ty = sample_size(dset);
5620 }
5621
5622 if (Ty != T) {
5623 *err = E_NONCONF;
5624 return NULL;
5625 }
5626
5627 if (nx > 1) {
5628 XCF = gretl_matrix_alloc(np, nx);
5629 if (XCF == NULL) {
5630 *err = E_ALLOC;
5631 return NULL;
5632 }
5633 }
5634
5635 for (j=0; j<nx; j++) {
5636 /* get XCF for left-hand column/series and y */
5637 xj = xcf_vec(xvec, yvec, p, NULL, T, err);
5638 if (*err) {
5639 gretl_matrix_free(XCF);
5640 return NULL;
5641 }
5642
5643 if (nx == 1) {
5644 XCF = xj;
5645 break;
5646 }
5647
5648 /* transcribe into big XCF matrix and free */
5649 for (i=0; i<np; i++) {
5650 gretl_matrix_set(XCF, i, j, xj->val[i]);
5651 }
5652 gretl_matrix_free(xj);
5653
5654 /* move to next data read position */
5655 if (j < nx - 1) {
5656 if (Xmat != NULL) {
5657 xvec += Xmat->rows;
5658 } else {
5659 xvec = dset->Z[xlist[j+2]] + dset->t1;
5660 }
5661 }
5662 }
5663
5664 return XCF;
5665 }
5666
theil_decomp(double * m,double MSE,const double * y,const double * f,int T)5667 static int theil_decomp (double *m, double MSE,
5668 const double *y,
5669 const double *f,
5670 int T)
5671 {
5672 double da, dp;
5673 double Abar, Pbar;
5674 double sa, sp, r;
5675 int t, err = 0;
5676
5677 if (MSE <= 0.0) {
5678 m[0] = m[1] = m[2] = NADBL;
5679 return E_DATA;
5680 }
5681
5682 Abar = Pbar = 0.0;
5683
5684 for (t=0; t<T; t++) {
5685 Abar += y[t];
5686 Pbar += f[t];
5687 }
5688
5689 Abar /= T;
5690 Pbar /= T;
5691
5692 sa = sp = r = 0.0;
5693
5694 for (t=0; t<T; t++) {
5695 da = y[t] - Abar;
5696 sa += da * da;
5697 dp = f[t] - Pbar;
5698 sp += dp * dp;
5699 r += da * dp;
5700 }
5701
5702 sa = sqrt(sa / T);
5703 sp = sqrt(sp / T);
5704 r /= T * sa * sp;
5705
5706 if (sa == 0.0 || sp == 0.0) {
5707 err = E_DATA;
5708 m[0] = m[1] = m[2] = NADBL;
5709 } else {
5710 m[0] = (Abar - Pbar) * (Abar - Pbar) / MSE; /* U^M */
5711 m[1] = (sp - r * sa) * (sp - r * sa) / MSE; /* U^R */
5712 m[2] = (1.0 - r * r) * sa * sa / MSE; /* U^D */
5713
5714 if (m[2] > 0.99999999999999) {
5715 /* U^M and U^R are just machine noise? */
5716 m[2] = 1.0;
5717 m[0] = m[1] = 0.0;
5718 }
5719 }
5720
5721 return err;
5722 }
5723
5724 /* OPT_O allows embedded missing values (which will be skipped);
5725 OPT_D requests Theil decomposition; OPT_T means time series.
5726 */
5727
fill_fcstats_column(gretl_matrix * m,const double * y,const double * f,int T,gretlopt opt,int col)5728 static int fill_fcstats_column (gretl_matrix *m,
5729 const double *y,
5730 const double *f,
5731 int T,
5732 gretlopt opt,
5733 int col)
5734 {
5735 double ME, MSE, MAE, MPE, MAPE, U;
5736 double Unum, Uden1, Uden2;
5737 double fe, u[2];
5738 int do_theil = 0;
5739 int do_U2 = (opt & OPT_T);
5740 int ok_T = T;
5741 int t, err = 0;
5742
5743 ME = MSE = MAE = MPE = MAPE = U = 0.0;
5744 Unum = Uden1 = Uden2 = 0.0;
5745 u[0] = u[1] = 0.0;
5746
5747 for (t=0; t<T; t++) {
5748 if (na(y[t]) || na(f[t])) {
5749 if (opt & OPT_O) {
5750 ok_T--;
5751 continue;
5752 } else {
5753 err = E_MISSDATA;
5754 break;
5755 }
5756 }
5757 fe = y[t] - f[t];
5758 ME += fe;
5759 MSE += fe * fe;
5760 MAE += fabs(fe);
5761 if (floateq(fe, 0)) {
5762 ; /* OK, zero contribution to MPE, MAPE */
5763 } else if (y[t] == 0.0) {
5764 /* can't calculate percentage */
5765 MPE = MAPE = NADBL;
5766 } else {
5767 MPE += 100 * fe / y[t];
5768 MAPE += 100 * fabs(fe / y[t]);
5769 }
5770 if (do_U2) {
5771 /* let U = U2 */
5772 if (t < T-1 && !na(U)) {
5773 if (na(f[t+1]) || na(y[t+1])) {
5774 U = NADBL;
5775 } else {
5776 fe = f[t+1] - y[t+1];
5777 if (floatneq(fe, 0)) {
5778 if (y[t] == 0.0) {
5779 U = NADBL;
5780 } else {
5781 fe /= y[t];
5782 u[0] += fe * fe;
5783 }
5784 }
5785 fe = y[t+1] - y[t];
5786 if (floatneq(fe, 0)) {
5787 if (y[t] == 0.0) {
5788 U = NADBL;
5789 } else {
5790 fe /= y[t];
5791 u[1] += fe * fe;
5792 }
5793 }
5794 }
5795 }
5796 } else {
5797 /* let U = U1 */
5798 Unum += fe * fe;
5799 Uden1 += y[t] * y[t];
5800 Uden2 += f[t] * f[t];
5801 }
5802 }
5803
5804 if (!err) {
5805 if (ok_T == 0) {
5806 err = E_MISSDATA;
5807 } else if (ok_T < T) {
5808 T = ok_T;
5809 } else if (opt & OPT_D) {
5810 do_theil = 1;
5811 }
5812 }
5813
5814 if (!err) {
5815 fnpkg *pkg = get_active_function_package(0);
5816 const char *pkgname = NULL;
5817 int show_MSE = 0;
5818
5819 if (pkg != NULL) {
5820 pkgname = function_package_get_name(pkg);
5821 }
5822 if (pkgname != NULL && !strcmp(pkgname, "fcModels") &&
5823 function_package_get_version(pkg) <= 1.1) {
5824 show_MSE = 1;
5825 }
5826
5827 ME /= T;
5828 MSE /= T;
5829 MAE /= T;
5830 if (!isnan(MPE)) {
5831 MPE /= T;
5832 }
5833 if (!isnan(MAPE)) {
5834 MAPE /= T;
5835 }
5836 if (do_U2) {
5837 if (!isnan(U) && u[1] > 0.0) {
5838 U = sqrt(u[0] / T) / sqrt(u[1] / T);
5839 }
5840 } else {
5841 Unum = sqrt(Unum/T);
5842 Uden1 = sqrt(Uden1/T);
5843 Uden2 = sqrt(Uden2/T);
5844 U = Unum / (Uden1+ Uden2);
5845 }
5846 gretl_matrix_set(m, 0, col, ME);
5847 if (show_MSE) {
5848 gretl_matrix_set(m, 1, col, MSE);
5849 } else {
5850 gretl_matrix_set(m, 1, col, sqrt(MSE));
5851 }
5852 gretl_matrix_set(m, 2, col, MAE);
5853 gretl_matrix_set(m, 3, col, MPE);
5854 gretl_matrix_set(m, 4, col, MAPE);
5855 gretl_matrix_set(m, 5, col, U);
5856
5857 if (do_theil) {
5858 double *targ = m->val + col * m->rows + 6;
5859
5860 theil_decomp(targ, MSE, y, f, T);
5861 }
5862 }
5863
5864 return err;
5865 }
5866
add_fcstats_rownames(gretl_matrix * m,gretlopt opt)5867 static void add_fcstats_rownames (gretl_matrix *m,
5868 gretlopt opt)
5869 {
5870 const char *S[] = {
5871 "ME", "RMSE", "MAE", "MPE", "MAPE",
5872 "U1", "UM", "UR", "UD"
5873 };
5874 int i, ns = m->rows;
5875 char **rownames = strings_array_new(ns);
5876
5877 if (rownames != NULL) {
5878 for (i=0; i<ns; i++) {
5879 if (i == 5 && (opt & OPT_T)) {
5880 rownames[5] = gretl_strdup("U2");
5881 } else {
5882 rownames[i] = gretl_strdup(S[i]);
5883 }
5884 }
5885 gretl_matrix_set_rownames(m, rownames);
5886 }
5887 }
5888
fcstats_sample_check(const double * y,const double * f,int * pt1,int * pt2,int * nmiss)5889 static int fcstats_sample_check (const double *y,
5890 const double *f,
5891 int *pt1,
5892 int *pt2,
5893 int *nmiss)
5894 {
5895 int t1 = *pt1;
5896 int t2 = *pt2;
5897 int t, err = 0;
5898
5899 for (t=t1; t<=t2; t++) {
5900 if (na(y[t]) || na(f[t])) {
5901 t1++;
5902 } else {
5903 break;
5904 }
5905 }
5906
5907 for (t=t2; t>=t1; t--) {
5908 if (na(y[t]) || na(f[t])) {
5909 t2--;
5910 } else {
5911 break;
5912 }
5913 }
5914
5915 if (t2 - t1 + 1 < 1) {
5916 err = E_MISSDATA;
5917 } else if (nmiss != NULL) {
5918 /* allow internal NAs */
5919 for (t=t1; t<=t2; t++) {
5920 if (na(y[t]) || na(f[t])) {
5921 *nmiss += 1;
5922 }
5923 }
5924 } else {
5925 /* flag error on internal NAs */
5926 for (t=t1; t<=t2; t++) {
5927 if (na(y[t]) || na(f[t])) {
5928 err = E_MISSDATA;
5929 break;
5930 }
5931 }
5932 }
5933
5934 *pt1 = t1;
5935 *pt2 = t2;
5936
5937 return err;
5938 }
5939
5940 /*
5941 Forecast evaluation statistics: @y is the data series, @f the
5942 forecast.
5943
5944 cf. http://www.economicsnetwork.ac.uk/showcase/cook_forecast
5945 by Steven Cook of Swansea University <s.cook@Swansea.ac.uk>
5946
5947 OPT_D indicates that we should include the Theil decomposition.
5948 OPT_O allows missing values (which will be skipped).
5949 OPT_T indicates that the data are time series.
5950 */
5951
forecast_stats(const double * y,const double * f,int t1,int t2,int * n_used,gretlopt opt,int * err)5952 gretl_matrix *forecast_stats (const double *y, const double *f,
5953 int t1, int t2, int *n_used,
5954 gretlopt opt, int *err)
5955 {
5956 gretl_matrix *m = NULL;
5957 int rows = 6;
5958 int nmiss = 0;
5959
5960 if (opt & OPT_O) {
5961 *err = fcstats_sample_check(y, f, &t1, &t2, &nmiss);
5962 } else {
5963 *err = fcstats_sample_check(y, f, &t1, &t2, NULL);
5964 }
5965 if (*err) {
5966 return NULL;
5967 }
5968
5969 if (opt & OPT_D) {
5970 if (nmiss == 0) {
5971 /* extra rows for Theil decomp */
5972 rows = 9;
5973 } else {
5974 /* scrub the Theil decomp option */
5975 opt &= ~OPT_D;
5976 }
5977 }
5978
5979 m = gretl_column_vector_alloc(rows);
5980
5981 if (m == NULL) {
5982 *err = E_ALLOC;
5983 } else {
5984 int T = t2 - t1 + 1;
5985
5986 *err = fill_fcstats_column(m, y + t1, f + t1, T, opt, 0);
5987 }
5988
5989 if (*err) {
5990 gretl_matrix_free(m);
5991 m = NULL;
5992 } else {
5993 if (n_used != NULL) {
5994 *n_used = t2 - t1 + 1 - nmiss;
5995 }
5996 add_fcstats_rownames(m, opt);
5997 }
5998
5999 return m;
6000 }
6001
matrix_fc_stats(const double * y,const gretl_matrix * F,gretlopt opt,int * err)6002 gretl_matrix *matrix_fc_stats (const double *y,
6003 const gretl_matrix *F,
6004 gretlopt opt,
6005 int *err)
6006 {
6007 int ns = (opt & OPT_D)? 9 : 6;
6008 gretl_matrix *m = NULL;
6009 const double *f;
6010 int j, T = F->rows;
6011
6012 m = gretl_matrix_alloc(ns, F->cols);
6013 if (m == NULL) {
6014 *err = E_ALLOC;
6015 return NULL;
6016 }
6017
6018 for (j=0; j<F->cols; j++) {
6019 f = F->val + T * j;
6020 *err = fill_fcstats_column(m, y, f, T, opt, j);
6021 if (*err) {
6022 break;
6023 }
6024 }
6025
6026 if (*err) {
6027 gretl_matrix_free(m);
6028 m = NULL;
6029 } else {
6030 add_fcstats_rownames(m, opt);
6031 }
6032
6033 return m;
6034 }
6035
6036 /* Remove "repeats" from Nelson-Aalen or Kaplan-Meier output matrix:
6037 these will be identified by all-zero rows, since we defer entering
6038 the calculated values until we reach the end of a sequence of
6039 repeated duration values.
6040 */
6041
durations_squeeze(gretl_matrix ** pA,gretl_matrix ** pTmp,int n,int repeats)6042 static int durations_squeeze (gretl_matrix **pA, gretl_matrix **pTmp,
6043 int n, int repeats)
6044 {
6045 gretl_matrix *T, *A = *pA;
6046 gretl_matrix *Tmp = *pTmp;
6047 double aij;
6048 int m = n - repeats;
6049 int i, j, err;
6050
6051 err = gretl_matrix_realloc(Tmp, m, 3);
6052 if (err) {
6053 return err;
6054 }
6055
6056 j = 0;
6057 for (i=0; i<n; i++) {
6058 aij = gretl_matrix_get(A, i, 0);
6059 if (aij > 0) {
6060 /* got some actual values: transcribe */
6061 gretl_matrix_set(Tmp, j, 0, aij);
6062 aij = gretl_matrix_get(A, i, 1);
6063 gretl_matrix_set(Tmp, j, 1, aij);
6064 aij = gretl_matrix_get(A, i, 2);
6065 gretl_matrix_set(Tmp, j, 2, aij);
6066 j++;
6067 }
6068 }
6069
6070 /* swap matrix pointers */
6071 T = *pA;
6072 *pA = Tmp;
6073 *pTmp = T;
6074
6075 return err;
6076 }
6077
6078 #define cmissing(c,t) (c != NULL && na(c[t]))
6079
6080 /*
6081 Given a series of duration values @y and (possibly) a series of
6082 censoring values @cens (zero for uncensored, non-zero for censored),
6083 computes either the Nelson-Aalen estimator of the cumulative hazard
6084 function OR (given OPT_K) the Kaplan-Meier estimate of the survival
6085 function.
6086
6087 Th return value is a matrix with the (unique) duration value in the
6088 first column, the estimator in the second and its standard error
6089 in the third. Any repeated duration values in the input data are
6090 squeezed out.
6091
6092 On Nelson-Aalen, the piece by O. Borgan at
6093 http://www.med.mcgill.ca/epidemiology/hanley/c609/material/NelsonAalenEstimator.pdf
6094 is quite helpful, and on Kaplan-Meier see
6095 www.public.iastate.edu/~kkoehler/stat565/kaplanmeier.4page.pdf
6096 */
6097
duration_func(const double * y,const double * cens,int t1,int t2,gretlopt opt,int * err)6098 gretl_matrix *duration_func (const double *y, const double *cens,
6099 int t1, int t2, gretlopt opt,
6100 int *err)
6101 {
6102 gretl_matrix *Tmp = NULL;
6103 gretl_matrix *M = NULL;
6104 gretl_matrix *A = NULL;
6105 double yi, ai, aibak;
6106 double vaibak, vai = 0;
6107 int kmeier = (opt & OPT_K);
6108 int nmax = t2 - t1 + 1;
6109 int i, t, n = nmax;
6110 int repeats = 0;
6111 int d, r, ni, ibak;
6112
6113 for (t=t1; t<=t2; t++) {
6114 if (na(y[t]) || cmissing(cens, t)) {
6115 n--;
6116 }
6117 }
6118
6119 if (n < 1) {
6120 *err = E_TOOFEW;
6121 return NULL;
6122 }
6123
6124 Tmp = gretl_zero_matrix_new(n, 2);
6125 A = gretl_zero_matrix_new(n, 3);
6126 if (Tmp == NULL || A == NULL) {
6127 *err = E_ALLOC;
6128 goto bailout;
6129 }
6130
6131 i = 0;
6132 for (t=t1; t<=t2; t++) {
6133 if (n < nmax && (na(y[t]) || cmissing(cens, t))) {
6134 continue;
6135 }
6136 gretl_matrix_set(Tmp, i, 0, y[t]);
6137 if (cens != NULL) {
6138 gretl_matrix_set(Tmp, i, 1, cens[t]);
6139 }
6140 i++;
6141 }
6142
6143 M = gretl_matrix_sort_by_column(Tmp, 0, err);
6144 if (*err) {
6145 goto bailout;
6146 }
6147
6148 r = n;
6149 ibak = -1;
6150
6151 for (i=0; i<n; i++) {
6152 /* if not censored, increment d count */
6153 d = gretl_matrix_get(M, i, 1) == 0;
6154 /* start count of "drop-outs" */
6155 ni = 1;
6156 /* record current y value */
6157 yi = gretl_matrix_get(M, i, 0);
6158 while (i < n-1) {
6159 if (gretl_matrix_get(M, i+1, 0) != yi) {
6160 break;
6161 }
6162 i++;
6163 d += gretl_matrix_get(M, i, 1) == 0;
6164 ni++;
6165 repeats++;
6166 }
6167 if (kmeier) {
6168 /* survival */
6169 ai = (r - d) / (double) r;
6170 vai = d / (r * ((double) r - d));
6171 } else {
6172 /* hazard */
6173 ai = d / (double) r;
6174 vai = ((r - d) * d) / ((r - 1) * (double) r*r);
6175 }
6176 #if 0
6177 fprintf(stderr, "i=%d, y=%g, d=%d, r=%d\n", i, yi, d, r);
6178 #endif
6179 gretl_matrix_set(A, i, 0, yi);
6180 if (ibak >= 0) {
6181 aibak = gretl_matrix_get(A, ibak, 1);
6182 vaibak = gretl_matrix_get(A, ibak, 2);
6183 if (kmeier) {
6184 gretl_matrix_set(A, i, 1, aibak * ai);
6185 gretl_matrix_set(A, i, 2, vaibak + vai);
6186 } else {
6187 gretl_matrix_set(A, i, 1, aibak + ai);
6188 gretl_matrix_set(A, i, 2, vaibak + vai);
6189 }
6190 } else {
6191 gretl_matrix_set(A, i, 1, ai);
6192 gretl_matrix_set(A, i, 2, vai);
6193 }
6194 /* decrement number "at risk" */
6195 r -= ni;
6196 ibak = i;
6197 }
6198
6199 if (kmeier) {
6200 /* compute variance using @vai cumulator */
6201 for (i=0; i<n; i++) {
6202 ai = gretl_matrix_get(A, i, 1);
6203 vai = gretl_matrix_get(A, i, 2);
6204 gretl_matrix_set(A, i, 2, sqrt(ai * ai * vai));
6205 }
6206 } else {
6207 /* Nelson-Aalen: convert to std error */
6208 for (i=0; i<n; i++) {
6209 vai = gretl_matrix_get(A, i, 2);
6210 gretl_matrix_set(A, i, 2, sqrt(vai));
6211 }
6212 }
6213
6214 #if 0
6215 gretl_matrix_print(A, "A");
6216 #endif
6217
6218 if (repeats > 0) {
6219 *err = durations_squeeze(&A, &Tmp, n, repeats);
6220 }
6221
6222 bailout:
6223
6224 if (*err) {
6225 gretl_matrix_free(A);
6226 A = NULL;
6227 }
6228 gretl_matrix_free(Tmp);
6229 gretl_matrix_free(M);
6230
6231 return A;
6232 }
6233
gretl_round(double x)6234 double gretl_round (double x)
6235 {
6236 double fx = floor(x);
6237
6238 if (x < 0) {
6239 return (x - fx <= 0.5)? fx : ceil(x);
6240 } else {
6241 return (x - fx < 0.5)? fx : ceil(x);
6242 }
6243 }
6244
gretl_sgn(double x)6245 double gretl_sgn (double x)
6246 {
6247 if (x == 0) {
6248 return 0;
6249 } else if (x > 0) {
6250 return 1;
6251 } else if (x < 0) {
6252 return -1;
6253 } else {
6254 return NADBL;
6255 }
6256 }
6257
6258 #define neg_x_real_v_err(t) (t == 'J' || t == 'I')
6259
6260 /* evaluates bessel function for scalar nu and x */
6261
gretl_bessel(char type,double v,double x,int * err)6262 double gretl_bessel (char type, double v, double x, int *err)
6263 {
6264 if (na(x) || na(v)) {
6265 return NADBL;
6266 }
6267
6268 if (x < 0) {
6269 /* catch invalid cases for x < 0 */
6270 if (type == 'K') {
6271 *err = E_INVARG;
6272 return NADBL;
6273 } else if (v != floor(v) && (type == 'J' || type == 'I')) {
6274 *err = E_INVARG;
6275 return NADBL;
6276 }
6277 }
6278
6279 switch (type) {
6280 case 'J':
6281 return cephes_bessel_Jv(v, x);
6282 case 'Y':
6283 return cephes_bessel_Yv(v, x);
6284 case 'I':
6285 if (v == 0) {
6286 return cephes_bessel_I0(x);
6287 } else if (v == 1) {
6288 return cephes_bessel_I1(x);
6289 } else if (v > 0) {
6290 return cephes_bessel_Iv(v, x);
6291 } else {
6292 /* cephes_bessel_Iv is not right for v < 0 */
6293 double b1 = netlib_bessel_K(-v, x, 1);
6294 double b2 = cephes_bessel_Iv(-v, x);
6295
6296 return (2*b1*sin(-v*M_PI)) / M_PI + b2;
6297 }
6298 break;
6299 case 'K':
6300 /* bessel K is symmetric around v = 0 */
6301 v = fabs(v);
6302 if (v == 0) {
6303 return cephes_bessel_K0(x);
6304 } else if (v == 1) {
6305 return cephes_bessel_K1(x);
6306 } else if (v == floor(v) && v <= 30.0) {
6307 /* cephes doesn't do non-integer v, and also loses
6308 accuracy beyond |v| = 30
6309 */
6310 return cephes_bessel_Kn(v, x);
6311 } else {
6312 /* accurate but expensive */
6313 return netlib_bessel_K(v, x, 1);
6314 }
6315 break;
6316 default:
6317 /* unknown function type */
6318 return NADBL;
6319 }
6320 }
6321
6322 /* Net Present Value: note that the first value is taken as
6323 occurring "now" and is not discounted */
6324
gretl_npv(int t1,int t2,const double * x,double r,int pd,int * err)6325 double gretl_npv (int t1, int t2, const double *x, double r,
6326 int pd, int *err)
6327 {
6328 double d, PV = 0.0;
6329 int i, n = 0;
6330
6331 if (pd != 1 && pd != 4 && pd != 12) {
6332 *err = E_PDWRONG;
6333 return NADBL;
6334 }
6335
6336 if (pd == 1) {
6337 d = 1 + r;
6338 } else if (r < -1.0) {
6339 *err = E_NAN;
6340 return 0.0 / 0.0;
6341 } else {
6342 d = pow(1 + r, 1.0 / pd);
6343 }
6344
6345 for (i=t1; i<=t2; i++) {
6346 if (!na(x[i])) {
6347 PV += x[i] / (pow(d, i-t1));
6348 n++;
6349 }
6350 }
6351
6352 if (n == 0) {
6353 PV = NADBL;
6354 }
6355
6356 return PV;
6357 }
6358
6359 /* Internal Rate of Return */
6360
gretl_irr(const double * x,int n,int pd,int * err)6361 double gretl_irr (const double *x, int n, int pd, int *err)
6362 {
6363 double PV, r, r1 = 0.02, r0 = -0.02;
6364 int gotplus = 0, gotminus = 0;
6365 int i, m = n;
6366
6367 for (i=0; i<n; i++) {
6368 if (na(x[i])) {
6369 m--;
6370 } else if (x[i] > 0) {
6371 gotplus = 1;
6372 } else if (x[i] < 0) {
6373 gotminus = 1;
6374 }
6375 }
6376
6377 if (!gotplus && !gotminus) {
6378 /* null payment stream */
6379 return (m > 0)? 0 : NADBL;
6380 }
6381
6382 if (gotplus && !gotminus) {
6383 /* payments all positive */
6384 return (x[0] > 0)? 0.0/0.0 : 1.0/0.0;
6385 } else if (gotminus && !gotplus) {
6386 /* payments all negative */
6387 return (x[0] < 0)? 0.0/0.0 : -1.0/0.0;
6388 }
6389
6390 /* find (r0, r1) bracket for solution, if possible */
6391
6392 while ((PV = gretl_npv(0, n-1, x, r0, pd, err)) < 0 && !*err) {
6393 if (r0 < -DBL_MAX / 2.0) {
6394 return -1.0/0.0;
6395 }
6396 r1 = r0;
6397 r0 *= 2.0;
6398 }
6399
6400 while ((PV = gretl_npv(0, n-1, x, r1, pd, err)) > 0 && !*err) {
6401 if (r1 > DBL_MAX / 2.0) {
6402 return 1.0/0.0;
6403 }
6404 r0 = r1;
6405 r1 *= 2.0;
6406 }
6407
6408 #if 0
6409 fprintf(stderr, "initial bracket for r: %g to %g\n", r0, r1);
6410 #endif
6411
6412 r = r1;
6413
6414 /* now do binary search */
6415
6416 for (i=0; i<32 && !*err; i++) {
6417 if (floateq(PV, 0.0)) {
6418 break;
6419 }
6420 if (PV < 0) {
6421 /* r is too high */
6422 if (r < r1) {
6423 r1 = r;
6424 }
6425 r = (r + r0) / 2.0;
6426 } else {
6427 /* r too low */
6428 if (r > r0) {
6429 r0 = r;
6430 }
6431 r = (r + r1) / 2.0;
6432 }
6433 PV = gretl_npv(0, n-1, x, r, pd, err);
6434 #if 0
6435 fprintf(stderr, "binary search: r = %.9g, PV = %g\n", r, PV);
6436 #endif
6437 }
6438
6439 if (*err) {
6440 r = NADBL;
6441 }
6442
6443 return r;
6444 }
6445
logistic_cdf(double x)6446 double logistic_cdf (double x)
6447 {
6448 double emx, ret;
6449
6450 errno = 0;
6451
6452 emx = exp(-x);
6453 if (errno == ERANGE) {
6454 ret = (x > 0)? 1.0 : 0.0;
6455 errno = 0;
6456 } else {
6457 ret = 1.0 / (1.0 + emx);
6458 }
6459
6460 return ret;
6461 }
6462
6463 /* Convert from @x or @list (whichever is non-NULL) to matrix,
6464 respecting the current sample range. If @cfac > 1 we take
6465 only every cfac-th row from the data source.
6466 */
6467
tdisagg_matrix_from_series(const double * x,int xnum,const int * list,const DATASET * dset,int cfac,int * err)6468 gretl_matrix *tdisagg_matrix_from_series (const double *x,
6469 int xnum,
6470 const int *list,
6471 const DATASET *dset,
6472 int cfac, int *err)
6473 {
6474 gretl_matrix *m = NULL;
6475 char **S = NULL;
6476 int k = list != NULL ? list[0] : 1;
6477 int T = dset->t2 - dset->t1 + 1;
6478 int i, s, t;
6479 int serr = 0;
6480
6481 if (cfac > 1) {
6482 int rem = T % cfac;
6483
6484 T = T / cfac + (rem > 0);
6485 }
6486
6487 m = gretl_matrix_alloc(T, k);
6488 if (m == NULL) {
6489 return NULL;
6490 }
6491
6492 if (list != NULL) {
6493 for (i=0; i<k; i++) {
6494 x = dset->Z[list[i+1]];
6495 s = dset->t1;
6496 for (t=0; t<T; t++) {
6497 gretl_matrix_set(m, t, i, x[s]);
6498 s += cfac;
6499 }
6500 }
6501 S = gretl_list_get_names_array(list, dset, &serr);
6502 } else {
6503 s = dset->t1;
6504 for (t=0; t<T; t++) {
6505 m->val[t] = x[s];
6506 s += cfac;
6507 }
6508 if (xnum > 0) {
6509 S = strings_array_new(1);
6510 if (S != NULL) {
6511 S[0] = gretl_strdup(dset->varname[xnum]);
6512 }
6513 }
6514 }
6515
6516 gretl_matrix_set_t1(m, dset->t1);
6517 gretl_matrix_set_t2(m, dset->t2);
6518
6519 if (S != NULL) {
6520 gretl_matrix_set_colnames(m, S);
6521 }
6522
6523 return m;
6524 }
6525
6526 /**
6527 * matrix_tdisagg:
6528 * @Y: T x g: holds the original data to be disaggregated, series
6529 * in columns.
6530 * @X: (optionally) holds covariates of Y at the higher frequency.
6531 * @s: the expansion factor: e.g. 3 for quarterly to monthly,
6532 * 4 for annual to quarterly, 12 for annual to monthly.
6533 * @b: bundle containing optional arguments, or NULL.
6534 * @r: bundle for retrieving details, or NULL.
6535 * @err: location to receive error code.
6536 *
6537 * Temporal disaggregation, via regression or Denton-type method.
6538 *
6539 * Returns: matrix containing the disaggregated series, or
6540 * NULL on failure.
6541 */
6542
matrix_tdisagg(const gretl_matrix * Y,const gretl_matrix * X,int s,gretl_bundle * b,gretl_bundle * res,DATASET * dset,PRN * prn,int * err)6543 gretl_matrix *matrix_tdisagg (const gretl_matrix *Y,
6544 const gretl_matrix *X,
6545 int s, gretl_bundle *b,
6546 gretl_bundle *res,
6547 DATASET *dset,
6548 PRN *prn, int *err)
6549 {
6550 gretl_matrix *(*tdisagg) (const gretl_matrix *,
6551 const gretl_matrix *,
6552 int, gretl_bundle *,
6553 gretl_bundle *,
6554 DATASET *,
6555 PRN *, int *);
6556 gretl_matrix *ret = NULL;
6557
6558 if (s <= 1) {
6559 *err = E_INVARG;
6560 return NULL;
6561 }
6562
6563 if (X != NULL) {
6564 double xs = X->rows / (double) Y->rows;
6565
6566 if (xs < s) {
6567 *err = E_INVARG;
6568 return NULL;
6569 } else if (X->is_complex) {
6570 *err = E_CMPLX;
6571 return NULL;
6572 }
6573 }
6574
6575 tdisagg = get_plugin_function("time_disaggregate");
6576
6577 if (tdisagg == NULL) {
6578 *err = E_FOPEN;
6579 } else {
6580 ret = (*tdisagg) (Y, X, s, b, res, dset, prn, err);
6581 }
6582
6583 return ret;
6584 }
6585
6586 /* For backward compatibility only: support the old chowlin()
6587 function, with "avg" as default aggregation type.
6588 */
6589
matrix_chowlin(const gretl_matrix * Y,const gretl_matrix * X,int s,int * err)6590 gretl_matrix *matrix_chowlin (const gretl_matrix *Y,
6591 const gretl_matrix *X,
6592 int s, int *err)
6593 {
6594 gretl_bundle *b = gretl_bundle_new();
6595 gretl_matrix *m = NULL;
6596
6597 gretl_bundle_set_string(b, "aggtype", "avg");
6598 m = matrix_tdisagg(Y, X, s, b, NULL, NULL, NULL, err);
6599 gretl_bundle_destroy(b);
6600
6601 return m;
6602 }
6603
list_ok_dollar_vars(DATASET * dset,PRN * prn)6604 int list_ok_dollar_vars (DATASET *dset, PRN *prn)
6605 {
6606 int nm = 0;
6607 int i;
6608
6609 pprintf(prn, "\n%s\n", _("model-related"));
6610
6611 for (i=R_MAX+1; i<M_MAX; i++) {
6612 GretlType type = GRETL_TYPE_NONE;
6613 double x = NADBL;
6614 gretl_matrix *m = NULL;
6615 int *list = NULL;
6616 int err = 0;
6617
6618 if (i < M_SCALAR_MAX) {
6619 x = saved_object_get_scalar(NULL, i, dset, &err);
6620 if (!na(x)) {
6621 type = GRETL_TYPE_DOUBLE;
6622 }
6623 } else if (i > M_SCALAR_MAX && i < M_SERIES_MAX) {
6624 type = GRETL_TYPE_SERIES;
6625 err = saved_object_get_series(NULL, NULL, i, dset);
6626 if (err) {
6627 if (i == M_UHAT || i == M_YHAT || i == M_SIGMA) {
6628 /* maybe the result is a matrix? */
6629 type = saved_object_get_data_type(NULL, i);
6630 if (type == GRETL_TYPE_MATRIX) {
6631 m = saved_object_get_matrix(NULL, i, &err);
6632 }
6633 }
6634 }
6635 } else if (i > M_SERIES_MAX && i < M_MATRIX_MAX) {
6636 type = GRETL_TYPE_MATRIX;
6637 m = saved_object_get_matrix(NULL, i, &err);
6638 } else if (i > M_MATRIX_MAX && i < M_MBUILD_MAX) {
6639 type = GRETL_TYPE_MATRIX;
6640 m = saved_object_build_matrix(NULL, i,
6641 dset, &err);
6642 } else {
6643 type = GRETL_TYPE_LIST;
6644 list = saved_object_get_list(NULL, i, &err);
6645 }
6646
6647 if (!err && type != GRETL_TYPE_NONE) {
6648 const char *typestr = gretl_type_get_name(type);
6649
6650 if (!na(x)) {
6651 pprintf(prn, " %s (%s: %g)\n", mvarname(i), typestr, x);
6652 } else {
6653 pprintf(prn, " %s (%s)\n", mvarname(i), typestr);
6654 }
6655 gretl_matrix_free(m);
6656 free(list);
6657 nm++;
6658 }
6659 }
6660
6661 if (nm == 0) {
6662 pprintf(prn, " %s\n", _("none"));
6663 }
6664
6665 pprintf(prn, "\n%s\n", _("other"));
6666
6667 for (i=1; i<R_SCALAR_MAX; i++) {
6668 double x = dvar_get_scalar(i, dset);
6669
6670 if (!na(x)) {
6671 pprintf(prn, " %s (scalar: %g)\n", dvarname(i), x);
6672 }
6673 }
6674
6675 pputc(prn, '\n');
6676
6677 return 0;
6678 }
6679
nw_kernel(double x)6680 static double nw_kernel (double x)
6681 {
6682 /*
6683 eventually, a libset variable will be used to choose among
6684 various kernels; for now, the Normal density will have to do.
6685 */
6686 double ret, x2 = x*x;
6687
6688 #if 0
6689 ret = exp(-0.5*x2)/SQRT_2_PI;
6690 #else
6691 ret = exp(-0.5*x2);
6692 #endif
6693
6694 return ret;
6695 }
6696
6697 /**
6698 * nadaraya_watson:
6699 * @y: array with "dependent variable"
6700 * @x: array with "explanatory variable"
6701 * @h: double, bandwidth (may be negative; see below)
6702 * @dset: data set information.
6703 * @LOO: Boolean flag (see below)
6704 * @trim: trim parameter (see below)
6705 * @m: array to hold results
6706 *
6707 * Implements the Nadaraya-Watson nonparametric estimator for the
6708 * conditional mean of @y given @x via the formula
6709 *
6710 * \widehat{m}_h(x)=\frac{\sum_{i=1}^n K_h(x-X_i)
6711 * Y_i}{\sum_{i=1}^nK_h(x-X_i)}
6712 *
6713 * and computes it for all elements of @x. Note that, in principle,
6714 * the kernel K(X_i-X_j) must be computed for every combination of i
6715 * and j, but since the function K() is assumed to be symmetric, we
6716 * compute it once to save time.
6717 *
6718 * The scalar @h holds the kernel bandwidth; if @LOO is non-zero the
6719 * "leave-one-out" variant of the estimator (essentially a jackknife
6720 * estimator; see Pagan and Ullah, Nonparametric Econometrics, p. 119)
6721 * is computed.
6722 *
6723 * A rudimentary form of trimming is implemented: the kernel function
6724 * is set to 0 when the product of the @trim parameter times the
6725 * bandwidth @h exceeds |X_i - X_j|, so as to speed computation and
6726 * enhance numerical stability.
6727 *
6728 * Returns: 0 on successful completion, non-zero code on error.
6729 */
6730
nadaraya_watson(const double * y,const double * x,double h,DATASET * dset,int LOO,double trim,double * m)6731 int nadaraya_watson (const double *y, const double *x, double h,
6732 DATASET *dset, int LOO, double trim,
6733 double *m)
6734 {
6735 int t, s, err = 0;
6736 int t1 = dset->t1, t2 = dset->t2;
6737 double xt, xs, ys, yt, k;
6738 double *num, *den;
6739 int n = t2 + 1; /* really? */
6740
6741 if (h < 0.0) {
6742 return E_DATA;
6743 } else if (h == 0.0) {
6744 /* automatic data-based bandwidth */
6745 const double *sampx = x + dset->t1;
6746 int n = sample_size(dset);
6747
6748 h = kernel_bandwidth(sampx, n);
6749 }
6750
6751 num = malloc(2 * n * sizeof *num);
6752 if (num == NULL) {
6753 return E_ALLOC;
6754 }
6755
6756 den = num + n;
6757 trim *= h;
6758
6759 /* here we initialize numerator and denominator; we use the
6760 "diagonal" in the standard case and 0 in the leave-one-out
6761 case
6762 */
6763
6764 if (LOO) {
6765 for (t=t1; t<=t2; t++) {
6766 num[t] = den[t] = 0.0;
6767 }
6768 } else {
6769 k = nw_kernel(0);
6770 for (t=t1; t<=t2; t++) {
6771 if (!na(y[t])) {
6772 num[t] = k * y[t];
6773 den[t] = k;
6774 } else {
6775 num[t] = 0;
6776 den[t] = 0;
6777 }
6778 }
6779 }
6780
6781 for (t=t1; t<=t2; t++) {
6782 xt = x[t];
6783 if (!na(xt)) {
6784 yt = y[t];
6785 for (s=t+1; s<=t2; s++) {
6786 xs = x[s];
6787 if (!na(xs) && fabs(xs-xt) < trim) {
6788 k = nw_kernel((xt - xs)/h);
6789 if (!na(yt)) {
6790 num[s] += k * yt;
6791 den[s] += k;
6792 }
6793 ys = y[s];
6794 if (!na(ys)) {
6795 num[t] += k * ys;
6796 den[t] += k;
6797 }
6798 }
6799 }
6800 m[t] = num[t] / den[t];
6801 } else {
6802 m[t] = NADBL;
6803 }
6804 }
6805
6806 free(num);
6807
6808 return err;
6809 }
6810
xy_get_sample(const double * y,const double * x,int * t1,int * t2,int * n)6811 static int xy_get_sample (const double *y, const double *x,
6812 int *t1, int *t2, int *n)
6813 {
6814 int t, nxy, err = 0;
6815
6816 for (t=*t1; t<=*t2; t++) {
6817 if (na(x[t])) {
6818 *t1 += 1;
6819 } else {
6820 break;
6821 }
6822 }
6823
6824 for (t=*t2; t>=*t1; t--) {
6825 if (na(x[t])) {
6826 *t2 -= 1;
6827 } else {
6828 break;
6829 }
6830 }
6831
6832 /* nxy = the number of points where we have valid values
6833 for both x and y; n = the number of valid x-values
6834 */
6835
6836 nxy = *n = 0;
6837
6838 for (t=*t1; t<=*t2; t++) {
6839 if (!na(x[t])) {
6840 *n += 1;
6841 if (!na(y[t])) {
6842 nxy++;
6843 }
6844 }
6845 }
6846
6847 if (nxy < 16) {
6848 err = E_TOOFEW;
6849 }
6850
6851 return err;
6852 }
6853
get_dataset_t(const double * x,int pos,int t1)6854 static int get_dataset_t (const double *x, int pos, int t1)
6855 {
6856 int t, k = 0;
6857
6858 for (t=t1; k<=pos; t++) {
6859 if (!na(x[t])) {
6860 if (k == pos) {
6861 return t;
6862 }
6863 k++;
6864 }
6865 }
6866
6867 return 0;
6868 }
6869
6870 /* note: the following requires that @y and @x are truly dataset
6871 series; vectors cannot be accepted given the dependence on
6872 dset->t1 and dset->t2 for the range of data to be used
6873 */
6874
gretl_loess(const double * y,const double * x,int poly_order,double bandwidth,gretlopt opt,DATASET * dset,double * m)6875 int gretl_loess (const double *y, const double *x, int poly_order,
6876 double bandwidth, gretlopt opt, DATASET *dset,
6877 double *m)
6878 {
6879 gretl_matrix *my, *mx;
6880 gretl_matrix *yh = NULL;
6881 int *s_order = NULL;
6882 int t1 = dset->t1;
6883 int t2 = dset->t2;
6884 int s, t, n;
6885 int err = 0;
6886
6887 if (poly_order < 0 || poly_order > 2 ||
6888 bandwidth <= 0 || bandwidth >= 1) {
6889 return E_DATA;
6890 }
6891
6892 err = xy_get_sample(y, x, &t1, &t2, &n);
6893 if (err) {
6894 return err;
6895 }
6896
6897 /* note: n holds the number of non-missing observations
6898 on x; the associated y-value may or may not be
6899 missing
6900 */
6901
6902 my = gretl_column_vector_alloc(n);
6903 mx = gretl_column_vector_alloc(n);
6904
6905 if (my == NULL || mx == NULL) {
6906 err = E_ALLOC;
6907 } else {
6908 s = 0;
6909 for (t=t1; t<=t2; t++) {
6910 if (!na(x[t])) {
6911 my->val[s] = y[t];
6912 mx->val[s] = x[t];
6913 s++;
6914 }
6915 }
6916 /* sort the points by the value of x */
6917 err = sort_pairs_by_x(mx, my, &s_order, NULL);
6918 }
6919
6920 if (!err) {
6921 yh = loess_fit(mx, my, poly_order, bandwidth, opt, &err);
6922 }
6923
6924 if (!err) {
6925 /* put the yh values into dataset order */
6926 for (s=0; s<n; s++) {
6927 t = get_dataset_t(x, s_order[s], t1);
6928 m[t] = yh->val[s];
6929 }
6930 }
6931
6932 gretl_matrix_free(my);
6933 gretl_matrix_free(mx);
6934 gretl_matrix_free(yh);
6935 free(s_order);
6936
6937 return err;
6938 }
6939
series_get_nobs(int t1,int t2,const double * x)6940 double series_get_nobs (int t1, int t2, const double *x)
6941 {
6942 int t, n = 0;
6943
6944 for (t=t1; t<=t2; t++) {
6945 if (!na(x[t])) n++;
6946 }
6947
6948 return n;
6949 }
6950
series_sum_all(int t1,int t2,const double * x)6951 double series_sum_all (int t1, int t2, const double *x)
6952 {
6953 double xsum = 0.0;
6954 int t;
6955
6956 for (t=t1; t<=t2; t++) {
6957 if (na(x[t])) {
6958 xsum = NADBL;
6959 break;
6960 } else {
6961 xsum += x[t];
6962 }
6963 }
6964
6965 return xsum;
6966 }
6967
delete_null_cases(gretl_matrix * m,int keeprows,int * err)6968 static gretl_matrix *delete_null_cases (gretl_matrix *m,
6969 int keeprows,
6970 int *err)
6971 {
6972 gretl_matrix *ret;
6973
6974 ret = gretl_matrix_alloc(keeprows, m->cols);
6975 if (ret == NULL) {
6976 *err = E_ALLOC;
6977 } else {
6978 double x;
6979 int i, j;
6980
6981 for (i=0; i<keeprows; i++) {
6982 for (j=0; j<m->cols; j++) {
6983 x = gretl_matrix_get(m, i, j);
6984 gretl_matrix_set(ret, i, j, x);
6985 }
6986 }
6987 }
6988
6989 gretl_matrix_free(m);
6990
6991 return ret;
6992 }
6993
real_aggregate_by(const double * x,const double * y,const int * xlist,const int * ylist,const DATASET * dset,double * tmp,double (* builtin)(),gchar * usercall,DATASET * tmpset,int just_count,int * err)6994 static gretl_matrix *real_aggregate_by (const double *x,
6995 const double *y,
6996 const int *xlist,
6997 const int *ylist,
6998 const DATASET *dset,
6999 double *tmp,
7000 double (*builtin)(),
7001 gchar *usercall,
7002 DATASET *tmpset,
7003 int just_count,
7004 int *err)
7005 {
7006 gretl_matrix *m = NULL;
7007 gretl_matrix **listvals;
7008 gretl_matrix *yvals;
7009 int n = sample_size(dset);
7010 int match, skipnull = 0;
7011 int countcol = 1;
7012 int maxcases;
7013 int ny, nx, mcols;
7014 int *idx;
7015 double *valvec;
7016 double fx;
7017 int i, j, k, t, ni, ii;
7018
7019 /* note:
7020 - to skip null cases in the output matrix, set skipnull = 1
7021 - to eliminate the case-count column, set countcol = 0
7022 */
7023
7024 ny = ylist == NULL ? 1 : ylist[0];
7025
7026 valvec = malloc(ny * sizeof *valvec);
7027 idx = malloc(ny * sizeof *idx);
7028 listvals = malloc(ny * sizeof *listvals);
7029
7030 if (valvec == NULL || idx == NULL || listvals == NULL) {
7031 *err = E_ALLOC;
7032 goto bailout;
7033 }
7034
7035 for (j=0; j<ny; j++) {
7036 listvals[j] = NULL;
7037 }
7038
7039 /* For @y (or each member of @ylist), create a vector holding
7040 its distinct values. As we go, count the combinations of
7041 y-values and initialize the valvec and idx arrays.
7042 */
7043
7044 maxcases = 1;
7045 for (j=0; j<ny && !*err; j++) {
7046 if (ylist != NULL) {
7047 y = dset->Z[ylist[j+1]] + dset->t1;
7048 }
7049 listvals[j] = gretl_matrix_values(y, n, OPT_S, err);
7050 if (!*err) {
7051 maxcases *= listvals[j]->rows;
7052 valvec[j] = listvals[j]->val[0];
7053 idx[j] = 0;
7054 }
7055 }
7056
7057 if (just_count) {
7058 x = NULL;
7059 xlist = NULL;
7060 skipnull = 0;
7061 countcol = 0;
7062 nx = 1;
7063 } else {
7064 nx = xlist == NULL ? 1 : xlist[0];
7065 }
7066
7067 mcols = ny + nx + countcol;
7068
7069 /* Allocate a matrix with enough rows to hold all the y-value
7070 combinations (maxcases) and enough columns to hold a
7071 record of the y values, a count of matching cases, and the
7072 value(s) of f(x).
7073 */
7074
7075 if (!*err) {
7076 m = gretl_zero_matrix_new(maxcases, mcols);
7077 if (m == NULL) {
7078 *err = E_ALLOC;
7079 }
7080 }
7081
7082 if (*err) {
7083 goto bailout;
7084 }
7085
7086 ii = 0;
7087
7088 for (i=0; i<maxcases; i++) {
7089 ni = 0;
7090 for (k=0; k<nx && !*err; k++) {
7091 /* fill tmp with x for {y} == {yi} */
7092 if (xlist != NULL) {
7093 x = dset->Z[xlist[k+1]] + dset->t1;
7094 }
7095 ni = 0;
7096 for (t=0; t<n; t++) {
7097 match = 1;
7098 for (j=0; j<ny; j++) {
7099 if (ylist != NULL) {
7100 y = dset->Z[ylist[j+1]] + dset->t1;
7101 }
7102 if (y[t] != valvec[j]) {
7103 match = 0;
7104 break;
7105 }
7106 }
7107 if (match) {
7108 if (x != NULL) {
7109 tmp[ni] = x[t];
7110 }
7111 ni++;
7112 }
7113 }
7114 if (just_count) {
7115 gretl_matrix_set(m, ii, ny, ni);
7116 break;
7117 } else if (ni == 0 && skipnull) {
7118 /* exclude cases where the obs count is 0 */
7119 break;
7120 } else {
7121 /* aggregate x at current y values */
7122 if (builtin != NULL) {
7123 fx = (*builtin)(0, ni-1, tmp);
7124 } else {
7125 tmpset->t2 = ni-1;
7126 fx = generate_scalar(usercall, tmpset, err);
7127 }
7128 gretl_matrix_set(m, ii, ny+k+countcol, fx);
7129 }
7130 }
7131
7132 if (ni > 0 || !skipnull) {
7133 for (j=0; j<ny; j++) {
7134 gretl_matrix_set(m, ii, j, valvec[j]);
7135 }
7136 if (countcol) {
7137 gretl_matrix_set(m, ii, ny, ni);
7138 }
7139 ii++;
7140 }
7141
7142 if (*err) {
7143 break;
7144 }
7145
7146 /* set up the next y-values array */
7147 for (j=ny-1; j>=0; j--) {
7148 yvals = listvals[j];
7149 if (idx[j] == yvals->rows - 1) {
7150 /* restart the index at this level and pass
7151 the buck */
7152 idx[j] = 0;
7153 valvec[j] = yvals->val[0];
7154 } else {
7155 /* increment the index at this level and
7156 we're done */
7157 idx[j] += 1;
7158 valvec[j] = yvals->val[idx[j]];
7159 break;
7160 }
7161 }
7162 }
7163
7164 if (skipnull && ii < maxcases) {
7165 /* the matrix contains some null cases: shrink it */
7166 m = delete_null_cases(m, ii, err);
7167 }
7168
7169 bailout:
7170
7171 free(valvec);
7172 free(idx);
7173
7174 if (listvals != NULL) {
7175 for (j=0; j<ny; j++) {
7176 gretl_matrix_free(listvals[j]);
7177 }
7178 free(listvals);
7179 }
7180
7181 return m;
7182 }
7183
aggr_add_colnames(gretl_matrix * m,const int * ylist,const int * xlist,const DATASET * dset,int just_count)7184 static void aggr_add_colnames (gretl_matrix *m,
7185 const int *ylist,
7186 const int *xlist,
7187 const DATASET *dset,
7188 int just_count)
7189 {
7190 char **S = NULL;
7191 int i, j, n = m->cols;
7192 int err = 0;
7193
7194 S = strings_array_new(n);
7195 if (S == NULL) {
7196 return;
7197 }
7198
7199 j = 0;
7200
7201 if (ylist != NULL && ylist[0] > 0) {
7202 char **Sy = gretl_list_get_names_array(ylist, dset, &err);
7203
7204 if (!err) {
7205 for (i=0; i<ylist[0]; i++) {
7206 S[j++] = Sy[i];
7207 Sy[i] = NULL;
7208 }
7209 free(Sy);
7210 }
7211 } else {
7212 S[j++] = gretl_strdup("byvar");
7213 }
7214
7215 if (!err) {
7216 S[j++] = gretl_strdup("count");
7217 }
7218
7219 if (!err && j < n) {
7220 if (xlist != NULL && xlist[0] > 0) {
7221 char **Sx = gretl_list_get_names_array(xlist, dset, &err);
7222
7223 if (!err) {
7224 for (i=0; i<xlist[0]; i++) {
7225 S[j++] = Sx[i];
7226 Sx[i] = NULL;
7227 }
7228 free(Sx);
7229 }
7230 } else {
7231 S[j] = gretl_strdup("f(x)");
7232 }
7233 }
7234
7235 if (!err) {
7236 gretl_matrix_set_colnames(m, S);
7237 } else {
7238 strings_array_free(S, n);
7239 }
7240 }
7241
7242 /**
7243 * aggregate_by:
7244 * @x: data array.
7245 * @y: discrete variable.
7246 * @xlist: list of x series or NULL.
7247 * @ylist: list of y series or NULL.
7248 * @fncall: the name of the aggregation function.
7249 * @dset: data set information.
7250 * @err: location to receive error code.
7251 *
7252 * Aggregates one or more data series (x) "by" the values of
7253 * one or more discrete series (y). In general either @x or
7254 * @xlist should be non-NULL, and one of @y or @ylist should
7255 * be non-NULL. (If @xlist is non-NULL then @x is ignored,
7256 * and similarly for @ylist and @y). For an account of the
7257 * matrix that is returned, see the help for gretl's
7258 * "aggregate" command.
7259 *
7260 * Returns: allocated matrix, or NULL on failure.
7261 */
7262
aggregate_by(const double * x,const double * y,const int * xlist,const int * ylist,const char * fncall,const DATASET * dset,int * err)7263 gretl_matrix *aggregate_by (const double *x,
7264 const double *y,
7265 const int *xlist,
7266 const int *ylist,
7267 const char *fncall,
7268 const DATASET *dset,
7269 int *err)
7270 {
7271 DATASET *tmpset = NULL;
7272 gretl_matrix *m = NULL;
7273 double *tmp = NULL;
7274 double (*builtin) (int, int, const double *) = NULL;
7275 gchar *usercall = NULL;
7276 int just_count = 0;
7277 int n;
7278
7279 if (fncall == NULL || !strcmp(fncall, "null")) {
7280 just_count = 1;
7281 }
7282
7283 if ((y == NULL && ylist == NULL) ||
7284 (!just_count && x == NULL && xlist == NULL)) {
7285 *err = E_DATA;
7286 return NULL;
7287 }
7288
7289 if (!just_count) {
7290 int f = function_lookup(fncall);
7291
7292 switch (f) {
7293 case F_SUM:
7294 builtin = gretl_sum;
7295 break;
7296 case F_SUMALL:
7297 builtin = series_sum_all;
7298 break;
7299 case F_MEAN:
7300 builtin = gretl_mean;
7301 break;
7302 case F_SD:
7303 builtin = gretl_stddev;
7304 break;
7305 case F_VCE:
7306 builtin = gretl_variance;
7307 break;
7308 case F_SST:
7309 builtin = gretl_sst;
7310 break;
7311 case F_SKEWNESS:
7312 builtin = gretl_skewness;
7313 break;
7314 case F_KURTOSIS:
7315 builtin = gretl_kurtosis;
7316 break;
7317 case F_MIN:
7318 builtin = gretl_min;
7319 break;
7320 case F_MAX:
7321 builtin = gretl_max;
7322 break;
7323 case F_MEDIAN:
7324 builtin = gretl_median;
7325 break;
7326 case F_GINI:
7327 builtin = gretl_gini;
7328 break;
7329 case F_NOBS:
7330 builtin = series_get_nobs;
7331 break;
7332 default:
7333 break;
7334 }
7335 }
7336
7337 n = sample_size(dset);
7338
7339 if (just_count) {
7340 ; /* nothing to do here */
7341 } else if (builtin != NULL) {
7342 /* nice and simple */
7343 tmp = malloc(n * sizeof *tmp);
7344 if (tmp == NULL) {
7345 *err = E_ALLOC;
7346 }
7347 } else {
7348 /* try treating as user-defined call */
7349 tmpset = create_auxiliary_dataset(2, n, OPT_NONE);
7350 if (tmpset == NULL) {
7351 *err = E_ALLOC;
7352 } else {
7353 strcpy(tmpset->varname[1], "x");
7354 tmp = tmpset->Z[1];
7355 usercall = g_strdup_printf("%s(x)", fncall);
7356 }
7357 }
7358
7359 if (!*err) {
7360 x = (x == NULL)? NULL : x + dset->t1;
7361 y = (y == NULL)? NULL : y + dset->t1;
7362 m = real_aggregate_by(x, y, xlist, ylist, dset, tmp,
7363 builtin, usercall, tmpset,
7364 just_count, err);
7365 }
7366
7367 if (m != NULL && *err) {
7368 gretl_matrix_free(m);
7369 m = NULL;
7370 }
7371
7372 if (m != NULL) {
7373 aggr_add_colnames(m, ylist, xlist, dset, just_count);
7374 }
7375
7376 if (tmpset != NULL) {
7377 g_free(usercall);
7378 destroy_dataset(tmpset);
7379 } else {
7380 free(tmp);
7381 }
7382
7383 return m;
7384 }
7385
monthly_or_quarterly_dates(const DATASET * dset,int pd,double sd0,int T,double * x)7386 static int monthly_or_quarterly_dates (const DATASET *dset,
7387 int pd, double sd0, int T,
7388 double *x)
7389 {
7390 char obstr[8];
7391 int mo, yr = floor(sd0);
7392 int t, mmax, dm;
7393
7394 if (pd == 4) {
7395 sprintf(obstr, "%.1f", sd0 - yr);
7396 mo = 1 + (atoi(obstr + 2) - 1) * 3;
7397 mmax = 10;
7398 dm = 3;
7399 } else {
7400 sprintf(obstr, "%.2f", sd0 - yr);
7401 mo = atoi(obstr + 2);
7402 mmax = 12;
7403 dm = 1;
7404 }
7405
7406 for (t=0; t<T; t++) {
7407 x[t] = 10000*yr + 100*mo + 1;
7408 if (mo == mmax) {
7409 yr++;
7410 mo = 1;
7411 } else {
7412 mo += dm;
7413 }
7414 }
7415
7416 return 0;
7417 }
7418
annual_or_decennial_dates(const DATASET * dset,int pd,double sd0,int T,double * x)7419 static int annual_or_decennial_dates (const DATASET *dset,
7420 int pd, double sd0, int T,
7421 double *x)
7422 {
7423 int t, yr = (int) sd0;
7424
7425 for (t=0; t<T; t++) {
7426 x[t] = 10000*yr + 101;
7427 yr += pd;
7428 }
7429
7430 return 0;
7431 }
7432
panel_daily_or_weekly(const DATASET * dset,double * x)7433 static int panel_daily_or_weekly (const DATASET *dset, double *x)
7434 {
7435 DATASET tsset = {0};
7436 char datestr[16];
7437 int t, y, m, d, n;
7438 int err = 0;
7439
7440 tsset.structure = TIME_SERIES;
7441 tsset.pd = dset->panel_pd;
7442 tsset.sd0 = dset->panel_sd0;
7443 tsset.n = dset->pd;
7444 calendar_date_string(tsset.stobs, 0, &tsset);
7445
7446 for (t=0; t<dset->pd && !err; t++) {
7447 ntolabel(datestr, t, &tsset);
7448 n = sscanf(datestr, "%d-%d-%d", &y, &m, &d);
7449 if (n != 3) {
7450 err = E_DATA;
7451 } else {
7452 x[t] = 10000*y + 100*m + d;
7453 }
7454 }
7455
7456 return err;
7457 }
7458
regular_daily_or_weekly(const DATASET * dset,double * x)7459 static int regular_daily_or_weekly (const DATASET *dset, double *x)
7460 {
7461 char datestr[16];
7462 int t, y, m, d, n;
7463 int err = 0;
7464
7465 for (t=0; t<dset->n && !err; t++) {
7466 ntolabel(datestr, t, dset);
7467 #if 0
7468 fprintf(stderr, "regular: datestr = '%s'\n", datestr);
7469 #endif
7470 n = sscanf(datestr, "%d-%d-%d", &y, &m, &d);
7471 if (n != 3) {
7472 err = E_DATA;
7473 } else {
7474 x[t] = 10000*y + 100*m + d;
7475 }
7476 }
7477
7478 return err;
7479 }
7480
fill_dataset_dates_series(const DATASET * dset,double * x)7481 int fill_dataset_dates_series (const DATASET *dset, double *x)
7482 {
7483 double sd0;
7484 int pd, T;
7485 int err = 0;
7486
7487 if (dataset_is_panel(dset)) {
7488 /* looking at time dimension of panel */
7489 pd = dset->panel_pd;
7490 sd0 = dset->panel_sd0;
7491 T = dset->pd;
7492 } else {
7493 /* regular time series */
7494 pd = dset->pd;
7495 sd0 = dset->sd0;
7496 T = dset->n;
7497 }
7498
7499 if (dataset_has_panel_time(dset)) {
7500 if (pd == 4 || pd == 12) {
7501 err = monthly_or_quarterly_dates(dset, pd, sd0, T, x);
7502 } else if (pd == 1 || pd == 10) {
7503 err = annual_or_decennial_dates(dset, pd, sd0, T, x);
7504 } else if (pd == 5 || pd == 6 || pd == 7 || pd == 52) {
7505 err = panel_daily_or_weekly(dset, x);
7506 }
7507 } else if (calendar_data(dset)) {
7508 err = regular_daily_or_weekly(dset, x);
7509 } else if (quarterly_or_monthly(dset)) {
7510 err = monthly_or_quarterly_dates(dset, pd, sd0, T, x);
7511 } else if (annual_data(dset) || decennial_data(dset)) {
7512 err = annual_or_decennial_dates(dset, pd, sd0, T, x);
7513 } else {
7514 err = E_PDWRONG;
7515 }
7516
7517 if (!err && dataset_is_panel(dset)) {
7518 /* expand the date series for all groups */
7519 int i, N = dset->n / dset->pd;
7520 size_t bytes = dset->pd * sizeof *x;
7521 double *dest = x + dset->pd;
7522
7523 for (i=1; i<N; i++) {
7524 memcpy(dest, x, bytes);
7525 dest += dset->pd;
7526 }
7527 }
7528
7529 return err;
7530 }
7531
fill_day_of_week_array(double * dow,const double * y,const double * m,const double * d,const DATASET * dset)7532 int fill_day_of_week_array (double *dow,
7533 const double *y,
7534 const double *m,
7535 const double *d,
7536 const DATASET *dset)
7537 {
7538 int yt, mt, dt;
7539 int julian;
7540 int t, err = 0;
7541
7542 for (t=dset->t1; t<=dset->t2 && !err; t++) {
7543 julian = 0;
7544 yt = (int) y[t];
7545 if (yt < 0) {
7546 yt = -yt;
7547 julian = 1;
7548 }
7549 mt = (int) m[t];
7550 dt = (int) d[t];
7551 dow[t] = day_of_week(yt, mt, dt, julian, &err);
7552 }
7553
7554 return err;
7555 }
7556
fill_isoweek_array(double * wknum,const double * y,const double * m,const double * d,const DATASET * dset)7557 int fill_isoweek_array (double *wknum,
7558 const double *y,
7559 const double *m,
7560 const double *d,
7561 const DATASET *dset)
7562 {
7563 int yt, mt, dt;
7564 int t, err = 0;
7565
7566 for (t=dset->t1; t<=dset->t2 && !err; t++) {
7567 yt = (int) y[t];
7568 mt = (int) m[t];
7569 dt = (int) d[t];
7570 wknum[t] = iso_week_number(yt, mt, dt, &err);
7571 }
7572
7573 return err;
7574 }
7575
7576 /**
7577 * empirical_cdf:
7578 * @y: array to process
7579 * @n: length of @y.
7580 * @err: location to receive error code on failure.
7581 *
7582 * Calculates the empirical CDF of @y, skipping any missing values.
7583 *
7584 * Returns: on successful completion, a matrix with row
7585 * dimension equal to the number of unique values in @y, and two
7586 * columns, the first containing the unique values of @y and the
7587 * second the cumulative relative frequency. On failure, returns
7588 * NULL.
7589 */
7590
empirical_cdf(const double * y,int n,int * err)7591 gretl_matrix *empirical_cdf (const double *y, int n, int *err)
7592 {
7593 gretl_matrix *m = NULL;
7594 double *sy = NULL;
7595 double *crf = NULL;
7596 int n_le, n_u, n_ok = 0;
7597 int percentages = 0;
7598 int i, j, k, kbak;
7599
7600 /* count non-missing values */
7601 for (i=0; i<n; i++) {
7602 if (!na(y[i])) {
7603 n_ok += 1;
7604 }
7605 }
7606
7607 if (n_ok == 0) {
7608 *err = E_MISSDATA;
7609 return NULL;
7610 }
7611
7612 /* allocate full array for sorting */
7613 sy = malloc(n_ok * sizeof *sy);
7614 if (sy == NULL) {
7615 *err = E_ALLOC;
7616 return NULL;
7617 }
7618
7619 j = 0;
7620 for (i=0; i<n; i++) {
7621 if (!na(y[i])) {
7622 sy[j++] = y[i];
7623 }
7624 }
7625
7626 qsort(sy, n_ok, sizeof *sy, gretl_compare_doubles);
7627
7628 /* count unique values */
7629 n_u = 1;
7630 for (i=1; i<n_ok; i++) {
7631 if (sy[i] != sy[i-1]) {
7632 n_u++;
7633 }
7634 }
7635
7636 m = gretl_matrix_alloc(n_u, 2);
7637 if (m == NULL) {
7638 *err = E_ALLOC;
7639 free(sy);
7640 return NULL;
7641 }
7642
7643 crf = m->val + n_u;
7644
7645 /* The first column of @m holds unique values,
7646 the second cumulative relative frequency
7647 */
7648 j = kbak = n_le = 0;
7649 for (i=0; i<n_ok; i++) {
7650 if (i == 0 || sy[i] != sy[i-1]) {
7651 gretl_matrix_set(m, j, 0, sy[i]);
7652 for (k=kbak; k<n_ok; k++) {
7653 if (sy[k] <= sy[i]) {
7654 n_le++;
7655 } else {
7656 break;
7657 }
7658 }
7659 if (percentages) {
7660 crf[j++] = 100 * n_le / (double) n_ok;
7661 } else {
7662 crf[j++] = n_le / (double) n_ok;
7663 }
7664 kbak = k;
7665 }
7666 }
7667
7668 free(sy);
7669
7670 return m;
7671 }
7672
7673 /* Convert from ISO 8601 (daily) dates to year, quarter or month
7674 (discarding excess precision), if possible.
7675 */
7676
handle_excess_precision(int * ymd1,int * ymd2,int pd,char * obs1,char * obs2)7677 static int handle_excess_precision (int *ymd1, int *ymd2, int pd,
7678 char *obs1, char *obs2)
7679 {
7680 int err = 0;
7681
7682 if (ymd1[2] != 1 || ymd2[2] != 1) {
7683 /* day-of-month must be 1 in all cases */
7684 return E_INVARG;
7685 }
7686
7687 *obs1 = *obs2 = '\0';
7688
7689 if (pd == 1) {
7690 /* annual: the month must be 1 */
7691 if (ymd1[1] != 1 || ymd1[2] != 1) {
7692 err = E_INVARG;
7693 } else {
7694 sprintf(obs1, "%d", ymd1[0]);
7695 sprintf(obs2, "%d", ymd2[0]);
7696 }
7697 } else if (pd == 4) {
7698 /* quarterly: the month must start a quarter */
7699 int m1 = ymd1[1], m2 = ymd2[1];
7700
7701 if ((m1 != 1 && m1 != 4 && m1 != 7 && m1 != 10) ||
7702 (m2 != 1 && m2 != 4 && m2 != 7 && m2 != 10)) {
7703 err = E_INVARG;
7704 } else {
7705 sprintf(obs1, "%d:%d", ymd1[0], 1 + (m1-1)/3);
7706 sprintf(obs2, "%d:%d", ymd2[0], 1 + (m2-1)/3);
7707 }
7708 } else {
7709 /* pd == 12: monthly */
7710 int m1 = ymd1[1], m2 = ymd2[1];
7711
7712 if (m1 < 1 || m1 > 12 || m2 < 1 || m2 > 12) {
7713 err = E_INVARG;
7714 } else {
7715 sprintf(obs1, "%d:%02d", ymd1[0], m1);
7716 sprintf(obs2, "%d:%02d", ymd2[0], m2);
7717 }
7718 }
7719
7720 return err;
7721 }
7722
sample_span(const char * stobs,const char * endobs,int pd,int * err)7723 int sample_span (const char *stobs, const char *endobs,
7724 int pd, int *err)
7725 {
7726 char obs1[16], obs2[16];
7727 DATASET dset = {0};
7728 int ymd1[3], ymd2[3];
7729 int n, nf, t2, span = -1;
7730
7731 if (pd == 1 || pd == 12 || (pd >= 4 && pd <= 7) || pd == 52) {
7732 ; /* OK, supported frequency */
7733 } else {
7734 *err = E_INVARG;
7735 return -1;
7736 }
7737
7738 n = strlen(stobs);
7739
7740 if (n > 10 || strlen(endobs) != n) {
7741 /* invalid and/or inconsistent observation strings */
7742 *err = E_INVARG;
7743 return -1;
7744 }
7745
7746 strcpy(obs1, stobs);
7747 strcpy(obs2, endobs);
7748
7749 if (n == 10) {
7750 /* should be ISO 8601 dates */
7751 nf = sscanf(obs1, "%d-%d-%d", &ymd1[0], &ymd1[1], &ymd1[2]);
7752 if (nf != 3) {
7753 *err = E_INVARG;
7754 } else {
7755 nf = sscanf(obs2, "%d-%d-%d", &ymd2[0], &ymd2[1], &ymd2[2]);
7756 if (nf != 3) {
7757 *err = E_INVARG;
7758 }
7759 }
7760 if (!*err && (pd == 1 || pd == 4 || pd == 12)) {
7761 *err = handle_excess_precision(ymd1, ymd2, pd, obs1, obs2);
7762 }
7763 }
7764
7765 if (*err) {
7766 return -1;
7767 }
7768
7769 strcpy(dset.stobs, obs1);
7770 dset.pd = pd;
7771 dset.structure = TIME_SERIES;
7772
7773 if (n == 10 && ((pd >= 5 && pd <= 7) || pd == 52)) {
7774 /* validate daily data */
7775 int ed1, ed2;
7776
7777 ed1 = epoch_day_from_ymd(ymd1[0], ymd1[1], ymd1[2]);
7778 ed2 = epoch_day_from_ymd(ymd2[0], ymd2[1], ymd2[2]);
7779
7780 if (ed1 > 0 && ed2 > 0 && (pd == 5 || pd == 6)) {
7781 /* validate days-of-week */
7782 int wd1 = weekday_from_epoch_day(ed1);
7783 int wd2 = weekday_from_epoch_day(ed2);
7784
7785 if (pd == 5 && (wd1 < 1 || wd1 > 5)) {
7786 ed1 = 0;
7787 } else if (pd == 5 && (wd2 < 1 || wd2 > 5)) {
7788 ed2 = 0;
7789 } else if (pd == 6) {
7790 if (wd1 < 1) {
7791 ed1 = 0;
7792 } else if (wd2 < 1) {
7793 ed2 = 0;
7794 }
7795 }
7796 }
7797
7798 if (ed1 == 0) {
7799 gretl_errmsg_sprintf("Invalid observation %s", stobs);
7800 *err = E_INVARG;
7801 } else if (ed2 == 0) {
7802 gretl_errmsg_sprintf("Invalid observation %s", endobs);
7803 *err = E_INVARG;
7804 } else {
7805 dset.sd0 = (double) ed1;
7806 }
7807 }
7808
7809 if (*err) {
7810 span = -1;
7811 } else {
7812 t2 = dateton(obs2, &dset);
7813 if (t2 >= 0) {
7814 span = t2 + 1; /* t2 is 0-based */
7815 } else {
7816 *err = E_INVARG;
7817 span = -1;
7818 }
7819 }
7820
7821 return span;
7822 }
7823
real_clogit_fi(int T,int k,int r,gretl_matrix * z,gretl_matrix * df,int * err)7824 static double real_clogit_fi (int T, int k, int r,
7825 gretl_matrix *z,
7826 gretl_matrix *df,
7827 int *err)
7828 {
7829 double x, ret = NADBL;
7830 int do_score = (df != NULL);
7831 int i, j;
7832
7833 if (do_score) {
7834 for (i=0; i<r; i++) {
7835 gretl_vector_set(df, i, 0);
7836 }
7837 }
7838
7839 if (T < k) {
7840 ret = 0.0;
7841 } else if (k == 0) {
7842 ret = 1.0;
7843 } else if (k == 1) {
7844 ret = 0.0;
7845 for (i=0; i<T; i++) {
7846 x = exp(gretl_vector_get(z, i));
7847 ret += x;
7848 if (do_score) {
7849 gretl_vector_set(df, i, x);
7850 }
7851 }
7852 } else if (k == 2) {
7853 double exi;
7854
7855 ret = 0.0;
7856 for (i=0; i<T-1; i++) {
7857 exi = exp(gretl_vector_get(z, i));
7858 for (j=i+1; j<T; j++) {
7859 x = exp(gretl_vector_get(z, j)) * exi;
7860 ret += x;
7861 if (do_score) {
7862 df->val[i] += x;
7863 df->val[j] += x;
7864 }
7865 }
7866 }
7867 } else if (k == T - 1) {
7868 double S = 0.0;
7869
7870 ret = 0.0;
7871 for (i=0; i<T; i++) {
7872 S += gretl_vector_get(z, i);
7873 }
7874
7875 for (i=0; i<T; i++) {
7876 x = exp(S - gretl_vector_get(z, i));
7877 ret += x;
7878 if (do_score) {
7879 for (j=0; j<T; j++) {
7880 if (i != j) {
7881 df->val[j] += x;
7882 }
7883 }
7884 }
7885 }
7886 } else if (k == T) {
7887 ret = 0.0;
7888 for (i=0; i<T; i++) {
7889 ret += gretl_vector_get(z, i);
7890 }
7891 ret = exp(ret);
7892 if (do_score) {
7893 for (i=0; i<T; i++) {
7894 gretl_vector_set(df, i, ret);
7895 }
7896 }
7897 } else {
7898 double x = exp(gretl_vector_get(z, T-1));
7899
7900 if (do_score) {
7901 double f2 = real_clogit_fi(T-1, k-1, r, z, df, err);
7902 gretl_matrix *df1 = NULL;
7903
7904 if (!*err) {
7905 df1 = gretl_vector_alloc(r);
7906 if (df1 == NULL) {
7907 ret = NADBL;
7908 *err = E_ALLOC;
7909 }
7910 }
7911
7912 if (!*err) {
7913 ret = real_clogit_fi(T-1, k, r, z, df1, err) + f2 * x;
7914 for (i=0; i<T; i++) {
7915 df->val[i] *= x;
7916 df->val[i] += df1->val[i];
7917 }
7918 df->val[T-1] += x * f2;
7919 gretl_matrix_free(df1);
7920 }
7921 } else {
7922 double f2 = real_clogit_fi(T-1, k-1, r, z, NULL, err);
7923
7924 if (!*err) {
7925 ret = real_clogit_fi(T-1, k, r, z, NULL, err) + f2 * x;
7926 }
7927 }
7928 }
7929
7930 return ret;
7931 }
7932
7933 /* FIXME add some documentation here */
7934
clogit_fi(int T,int k,gretl_matrix * z,gretl_matrix * df,int * err)7935 double clogit_fi (int T, int k, gretl_matrix *z,
7936 gretl_matrix *df, int *err)
7937 {
7938 gretl_matrix *dfarg = NULL;
7939 double ret = NADBL;
7940 int r;
7941
7942 if (gretl_is_null_matrix(z)) {
7943 *err = E_DATA;
7944 return NADBL;
7945 }
7946
7947 r = z->rows;
7948
7949 if (df != NULL) {
7950 if (df->rows != r || df->cols != 1) {
7951 dfarg = gretl_column_vector_alloc(r);
7952 if (dfarg == NULL) {
7953 *err = E_ALLOC;
7954 }
7955 } else {
7956 dfarg = df;
7957 }
7958 }
7959
7960 if (!*err) {
7961 ret = real_clogit_fi(T, k, r, z, dfarg, err);
7962 }
7963
7964 if (dfarg != NULL && dfarg != df) {
7965 gretl_matrix_replace_content(df, dfarg);
7966 gretl_matrix_free(dfarg);
7967 }
7968
7969 return ret;
7970 }
7971
7972 /* simple driver function */
7973
gretl_matrix_vector_stat(const gretl_matrix * m,GretlVecStat vs,int rowwise,int * err)7974 gretl_matrix *gretl_matrix_vector_stat (const gretl_matrix *m,
7975 GretlVecStat vs, int rowwise,
7976 int *err)
7977 {
7978 if (m->is_complex) {
7979 return gretl_cmatrix_vector_stat(m, vs, rowwise, err);
7980 } else {
7981 return gretl_rmatrix_vector_stat(m, vs, rowwise, err);
7982 }
7983 }
7984
7985 /* Fill @v with unique random draws from {1,2,...,@n} */
7986
fill_permutation_vector(gretl_vector * v,int n)7987 int fill_permutation_vector (gretl_vector *v, int n)
7988 {
7989 int *pool = NULL;
7990 int n_tail;
7991 unsigned u;
7992 int i, k;
7993
7994 if (n <= 0) {
7995 return E_INVARG;
7996 }
7997
7998 k = gretl_vector_get_length(v);
7999 if (k <= 0 || k > n) {
8000 gretl_errmsg_sprintf(_("Invalid number of draws %d"), k);
8001 return E_INVARG;
8002 }
8003
8004 pool = malloc(n * sizeof *pool);
8005 if (pool == NULL) {
8006 return E_ALLOC;
8007 }
8008
8009 /* initialize selection pool */
8010 for (i=0; i<n; i++) {
8011 pool[i] = i + 1;
8012 }
8013
8014 for (i=0; i<k; i++) {
8015 u = gretl_rand_int_max(n);
8016 v->val[i] = pool[u];
8017 /* remove pool[u] from the selectable set */
8018 if (u < n - 1) {
8019 n_tail = n - u - 1;
8020 memmove(pool + u, pool + u + 1, n_tail * sizeof *pool);
8021 }
8022 n--;
8023 }
8024
8025 free(pool);
8026
8027 return 0;
8028 }
8029
8030 #define GEODEBUG 0
8031
8032 /* Driver function for calling the geoplot plugin to produce
8033 a map. To obtain the map polygons we need EITHER the name
8034 of the source file (GeoJSON or Shapefile), via @fname,
8035 OR the map info in the form of a gretl_bundle, via @mapptr.
8036
8037 The "payload" (if any) is given as a series, via @plx.
8038
8039 Options (if any) are provided via @optptr, a pointer to
8040 gretl_bundle.
8041 */
8042
geoplot_driver(const char * fname,gretl_bundle * map,const double * plx,const DATASET * dset,gretl_bundle * opts)8043 int geoplot_driver (const char *fname,
8044 gretl_bundle *map,
8045 const double *plx,
8046 const DATASET *dset,
8047 gretl_bundle *opts)
8048 {
8049 int (*mapfunc) (const char *, gretl_bundle *,
8050 gretl_matrix *, gretl_bundle *);
8051 gretl_matrix *payload = NULL;
8052 const char *mapfile = NULL;
8053 int free_map = 0;
8054 int err = 0;
8055
8056 if (fname != NULL && map != NULL) {
8057 gretl_errmsg_set("geoplot: cannot give both filename and map bundle");
8058 return E_DATA;
8059 }
8060
8061 if (map == NULL) {
8062 mapfile = dataset_get_mapfile(dset);
8063 }
8064
8065 if (fname == NULL && map == NULL) {
8066 fname = mapfile;
8067 if (fname == NULL) {
8068 gretl_errmsg_set("geoplot: no map was specified");
8069 return E_DATA;
8070 }
8071 }
8072
8073 if (plx != NULL) {
8074 /* convert payload series to vector for convenience in plugin */
8075 payload = gretl_vector_from_series(plx, dset->t1, dset->t2);
8076 if (payload == NULL) {
8077 err = E_ALLOC;
8078 }
8079 }
8080
8081 #if GEODEBUG
8082 fprintf(stderr, "geoplot_driver: map=%p, mapfile=%p, fname=%p\n",
8083 (void *) map, (void *) mapfile, (void *) fname);
8084 #endif
8085
8086 /* In the case where we got @fname, do we want to produce a
8087 map bundle in which the actual map data are synced with
8088 the dataset? Probably so if @fname is just $mapfile
8089 (metadata loaded as dataset), and presumably not if @fname
8090 is an "external" reference.
8091 */
8092 if (map == NULL && mapfile != NULL) {
8093 if (fname == mapfile || !strcmp(fname, mapfile)) {
8094 #if GEODEBUG
8095 fprintf(stderr, "geoplot_driver: calling get_current_map()\n");
8096 #endif
8097 map = get_current_map(dset, NULL, &err);
8098 free_map = 1;
8099 fname = NULL;
8100 }
8101 }
8102
8103 if (!err) {
8104 mapfunc = get_plugin_function("geoplot");
8105 if (mapfunc == NULL) {
8106 err = E_FOPEN;
8107 } else {
8108 err = mapfunc(fname, map, payload, opts);
8109 }
8110 }
8111
8112 gretl_matrix_free(payload);
8113 if (free_map) {
8114 gretl_bundle_destroy(map);
8115 }
8116
8117 return err;
8118 }
8119
8120 enum {
8121 SUBST_SIMPLE,
8122 SUBST_BTREE
8123 };
8124
subst_val_via_tree(double x,const double * x0,int n0,const double * x1,int n1)8125 static double subst_val_via_tree (double x, const double *x0, int n0,
8126 const double *x1, int n1)
8127 {
8128 static BTree *tree;
8129
8130 if (x0 == NULL) {
8131 /* cleanup */
8132 gretl_btree_destroy(tree);
8133 tree = NULL;
8134 return 0;
8135 }
8136
8137 if (tree == NULL) {
8138 /* allocate and populate tree */
8139 double x1val;
8140 int i;
8141
8142 tree = gretl_btree_new();
8143 for (i=0; i<n0; i++) {
8144 x1val = n1 == 1 ? *x1 : x1[i];
8145 gretl_btree_insert(tree, x0[i], x1val);
8146 }
8147 }
8148
8149 /* do the actual lookup */
8150 x = gretl_btree_lookup(tree, x);
8151
8152 return x;
8153 }
8154
select_subst_method(int n,int nfind)8155 static int select_subst_method (int n, int nfind)
8156 {
8157 #if 1
8158 /* for testing */
8159 if (getenv("REPLACE_USE_BTREE") != NULL) {
8160 return SUBST_BTREE;
8161 } else if (getenv("REPLACE_NAIVE") != NULL) {
8162 return SUBST_SIMPLE;
8163 }
8164 #endif
8165 /* The idea here is that it's worth using a binary
8166 tree mapping "find" to "replace" values only if
8167 the problem is big enough. Testing seems to
8168 indicate that the following condition for "big
8169 enough" may be roughly appropriate.
8170 */
8171 if (nfind > 11 && n >= 80) {
8172 return SUBST_BTREE;
8173 } else {
8174 return SUBST_SIMPLE;
8175 }
8176 }
8177
8178 /* Given an original value @x, see if it matches any of the @n0 values
8179 in @x0. If so, return the substitute value from @x1, otherwise
8180 return the original.
8181 */
8182
subst_val(double x,const double * x0,int n0,const double * x1,int n1,int method)8183 static double subst_val (double x, const double *x0, int n0,
8184 const double *x1, int n1,
8185 int method)
8186 {
8187 if (method & SUBST_BTREE) {
8188 x = subst_val_via_tree(x, x0, n0, x1, n1);
8189 } else {
8190 int i;
8191
8192 for (i=0; i<n0; i++) {
8193 if (x == x0[i]) {
8194 return (n1 == 1)? *x1 : x1[i];
8195 } else if (isnan(x) && isnan(x0[i])) {
8196 /* we'll count this as a match */
8197 return (n1 == 1)? *x1 : x1[i];
8198 }
8199 }
8200 }
8201
8202 return x;
8203 }
8204
8205 /**
8206 * substitute_values:
8207 * @dest: target array.
8208 * @src: source array.
8209 * @n: length of @src and @dest.
8210 * @v0: array of "find" values.
8211 * @n0: length of @v0.
8212 * @v1: array of "replace" values.
8213 * @n1: length of @v1.
8214 *
8215 * For each of the @n elements in @src, determine if it appears
8216 * in @v0: if so, write to @targ the corresponding element of
8217 * @v1, otherwise write the @src element to @targ. The method
8218 * employed is either "naive" lookup, or if the problem is of
8219 * sufficient size a binary tree.
8220 *
8221 * Returns: 0 on successful completion, non-zero code on error.
8222 */
8223
substitute_values(double * dest,const double * src,int n,const double * v0,int n0,const double * v1,int n1)8224 int substitute_values (double *dest, const double *src, int n,
8225 const double *v0, int n0,
8226 const double *v1, int n1)
8227 {
8228 int i, method = select_subst_method(n, n1);
8229
8230 for (i=0; i<n; i++) {
8231 dest[i] = subst_val(src[i], v0, n0, v1, n1, method);
8232 }
8233 if (method & SUBST_BTREE) {
8234 /* cleanup call */
8235 subst_val(0, NULL, 0, NULL, 0, method);
8236 }
8237
8238 return 0;
8239 }
8240