1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #include "libgretl.h"
21 #include "uservar.h"
22 #include "gretl_func.h"
23 #include "matrix_extra.h"
24
25 #define TRDEBUG 0
26
27 /**
28 * SECTION:transforms
29 * @short_description: standard transformations of series in the dataset
30 * @title: Transformations
31 * @include: libgretl.h
32 *
33 * Functions to generate standard transformations (logs, lags, first
34 * differences and so on) of series in the dataset.
35 */
36
37 enum {
38 VARS_IDENTICAL,
39 NEW_HAS_MISSING,
40 OLD_HAS_MISSING,
41 VARS_DIFFER
42 } varcomp_codes;
43
44 enum {
45 INVERSE = NC + 1,
46 RESAMPLE,
47 HFDIFF,
48 HFLDIFF
49 };
50
51 static char *get_mangled_name_by_id (int v);
52
53 /* Note: in this comparison @test is the newly generated series
54 and @cand is the previously existing candidate series.
55 */
56
check_vals(const double * test,const double * cand,int n)57 static int check_vals (const double *test, const double *cand, int n)
58 {
59 int t, ret = VARS_IDENTICAL;
60
61 for (t=0; t<n; t++) {
62 if (test[t] == cand[t] || (na(test[t]) && na(cand[t]))) {
63 /* values the same, or both NA */
64 continue;
65 }
66 /* values differ at obs @t */
67 if (na(test[t]) && !na(cand[t])) {
68 if (ret == VARS_IDENTICAL || ret == NEW_HAS_MISSING) {
69 ret = NEW_HAS_MISSING;
70 } else {
71 ret = VARS_DIFFER;
72 }
73 } else if (!na(test[t]) && na(cand[t])) {
74 if (ret == VARS_IDENTICAL || ret == OLD_HAS_MISSING) {
75 ret = OLD_HAS_MISSING;
76 } else {
77 ret = VARS_DIFFER;
78 }
79 } else {
80 ret = VARS_DIFFER;
81 }
82 if (ret == VARS_DIFFER) {
83 break;
84 }
85 }
86
87 return ret;
88 }
89
90 static int
make_transform_varname(char * vname,const char * orig,int ci,int aux,int len)91 make_transform_varname (char *vname, const char *orig, int ci,
92 int aux, int len)
93 {
94 *vname = '\0';
95
96 if (ci == DIFF) {
97 strcpy(vname, "d_");
98 strncat(vname, orig, len - 2);
99 } else if (ci == LDIFF) {
100 strcpy(vname, "ld_");
101 strncat(vname, orig, len - 3);
102 } else if (ci == SDIFF) {
103 strcpy(vname, "sd_");
104 strncat(vname, orig, len - 3);
105 } else if (ci == ORTHDEV) {
106 strcpy(vname, "o_");
107 strncat(vname, orig, len - 2);
108 } else if (ci == LOGS) {
109 strcpy(vname, "l_");
110 strncat(vname, orig, len - 2);
111 } else if (ci == SQUARE) {
112 strcpy(vname, "sq_");
113 strncat(vname, orig, len - 3);
114 } else if (ci == LAGS) {
115 char ext[12];
116
117 if (aux >= 0) {
118 /* an actual lag (or contemporaneous value) */
119 sprintf(ext, "_%d", aux);
120 strncat(vname, orig, len - strlen(ext));
121 strcat(vname, ext);
122 } else {
123 /* a lead: in naming the output series we'll
124 try to avoid appending a plain digit to
125 a series name that already ends in a digit,
126 since this may get confusing
127 */
128 int n1, n2;
129
130 sprintf(ext, "%d", -aux);
131 n1 = strlen(orig);
132 n2 = strlen(ext);
133 if (n1 + n2 + 2 < VNAMELEN) {
134 if (isdigit(orig[n1-1])) {
135 /* separate the digits */
136 sprintf(vname, "%s_f%d", orig, -aux);
137 } else {
138 sprintf(vname, "%s%d", orig, -aux);
139 }
140 } else {
141 /* Oh, well, can't be helped */
142 strncat(vname, orig, len - n2);
143 strcat(vname, ext);
144 }
145 }
146 } else if (ci == DUMMIFY) {
147 char ext[16];
148
149 if (aux < 0) {
150 sprintf(ext, "_m%d", abs(aux));
151 } else {
152 sprintf(ext, "_%d", aux);
153 }
154 strcpy(vname, "D");
155 strncat(vname, orig, len - strlen(ext) - 1);
156 strcat(vname, ext);
157 } else if (ci == INVERSE) {
158 strcpy(vname, "i_");
159 strncat(vname, orig, len - 2);
160 } else if (ci == STDIZE) {
161 if (aux < 0) {
162 strcpy(vname, "c_");
163 } else {
164 strcpy(vname, "s_");
165 }
166 strncat(vname, orig, len - 2);
167 } else if (ci == RESAMPLE) {
168 strcpy(vname, "rs_");
169 strncat(vname, orig, len - 3);
170 }
171
172 #if TRDEBUG
173 fprintf(stderr, "make_transform_varname:\n"
174 "orig='%s', ci=%d, len=%d, vname='%s'\n",
175 orig, ci, len, vname);
176 #endif
177
178 return 0;
179 }
180
181 static int
make_transform_label(char * label,const char * parent,int ci,int aux)182 make_transform_label (char *label, const char *parent,
183 int ci, int aux)
184 {
185 gchar *s = NULL;
186 int err = 0;
187
188 if (ci == DIFF) {
189 s = g_strdup_printf(_("= first difference of %s"), parent);
190 } else if (ci == LDIFF) {
191 s = g_strdup_printf(_("= log difference of %s"), parent);
192 } else if (ci == SDIFF) {
193 s = g_strdup_printf(_("= seasonal difference of %s"), parent);
194 } else if (ci == LOGS) {
195 s = g_strdup_printf(_("= log of %s"), parent);
196 } else if (ci == SQUARE) {
197 s = g_strdup_printf(_("= %s squared"), parent);
198 } else if (ci == LAGS) {
199 /* @aux = lag */
200 if (aux >= 0) {
201 s = g_strdup_printf("= %s(t - %d)", parent, aux);
202 } else {
203 s = g_strdup_printf("= %s(t + %d)", parent, -aux);
204 }
205 } else if (ci == INVERSE) {
206 s = g_strdup_printf("= 1/%s", parent);
207 } else if (ci == STDIZE) {
208 /* @aux = dfcorr */
209 if (aux < 0) {
210 s = g_strdup_printf(_("= centered %s"), parent);
211 } else {
212 s = g_strdup_printf(_("= standardized %s"), parent);
213 }
214 } else if (ci == RESAMPLE) {
215 s = g_strdup_printf(_("= resampled %s"), parent);
216 } else if (ci == HFDIFF) {
217 s = g_strdup_printf(_("= high-frequency difference of %s"), parent);
218 } else if (ci == HFLDIFF) {
219 s = g_strdup_printf(_("= high-frequency log difference of %s"), parent);
220 } else {
221 err = 1;
222 }
223
224 if (!err) {
225 strcpy(label, gretl_utf8_truncate(s, MAXLABEL-1));
226 g_free(s);
227 }
228
229 return err;
230 }
231
232 /**
233 * standard_lag_of:
234 * @v: ID number of series to test.
235 * @parent: ID of potential parent series.
236 * @dset: dataset information.
237 *
238 * Returns: the lag order of series @v, if it is marked as
239 * a lag of @parent, otherwise 0.
240 */
241
standard_lag_of(int v,int parent,const DATASET * dset)242 int standard_lag_of (int v, int parent, const DATASET *dset)
243 {
244 int pv = 0, ret = 0;
245
246 if (dset == NULL || v <= 0 || v >= dset->v) {
247 return 0;
248 }
249
250 if (series_get_transform(dset, v) == LAGS) {
251 pv = series_get_parent_id(dset, v);
252 if (pv == parent) {
253 ret = series_get_lag(dset, v);
254 }
255 }
256
257 return ret;
258 }
259
260 /**
261 * is_standard_diff:
262 * @v: ID number of variable to test.
263 * @dset: dataset information.
264 * @parent: location to receive ID number of parent variable,
265 * or NULL.
266 *
267 * Returns: 1 if the variable @v is marked as being the first
268 * difference of some "parent" variable in the dataset,
269 * otherwise 0.
270 */
271
is_standard_diff(int v,const DATASET * dset,int * parent)272 int is_standard_diff (int v, const DATASET *dset, int *parent)
273 {
274 int pv = 0, ret = 0;
275
276 if (v <= 0 || v >= dset->v) {
277 return 0;
278 }
279
280 if (series_get_transform(dset, v) == DIFF) {
281 pv = series_get_parent_id(dset, v);
282 if (pv > 0) {
283 if (parent != NULL) {
284 *parent = pv;
285 }
286 ret = 1;
287 }
288 }
289
290 return ret;
291 }
292
make_xp_varname(char * vname,int v1,int v2,const DATASET * dset,int len)293 static void make_xp_varname (char *vname, int v1, int v2,
294 const DATASET *dset,
295 int len)
296 {
297 int n1 = strlen(dset->varname[v1]);
298 int n2 = strlen(dset->varname[v2]);
299 int cut = n1 + n2 + 1 - len;
300 int cut1 = 0, cut2 = 0;
301
302 if (cut > 0) {
303 cut1 = cut2 = cut/2;
304 if (cut % 2) {
305 cut2++;
306 }
307 }
308
309 *vname = '\0';
310 strncat(vname, dset->varname[v1], n1 - cut1);
311 strcat(vname, "_");
312 strncat(vname, dset->varname[v2], n2 - cut2);
313 }
314
315 /* Array into which to write a generated variable, prior to testing
316 whether or not the same var already exists.
317 */
318
testvec(int n)319 static double *testvec (int n)
320 {
321 static double *x;
322 static int nx;
323 int t;
324
325 if (n == 0) {
326 /* clean up */
327 free(x);
328 x = NULL;
329 nx = 0;
330 return NULL;
331 }
332
333 if (n > nx) {
334 double *y = realloc(x, n * sizeof *x);
335
336 if (y == NULL) {
337 free(x);
338 x = NULL;
339 nx = 0;
340 return NULL;
341 }
342
343 nx = n;
344 x = y;
345 }
346
347 for (t=0; t<n; t++) {
348 x[t] = NADBL;
349 }
350
351 return x;
352 }
353
354 /**
355 * gretl_transforms_cleanup:
356 *
357 * Called by libgretl_cleanup(). Frees any memory allocated
358 * as workspace for the creation of transformed variables.
359 */
360
gretl_transforms_cleanup(void)361 void gretl_transforms_cleanup (void)
362 {
363 testvec(0);
364 }
365
366 /* write lagged values of variable v into xlag */
367
get_lag(int v,int lag,double * xlag,const DATASET * dset)368 static int get_lag (int v, int lag, double *xlag,
369 const DATASET *dset)
370 {
371 const double *x = dset->Z[v];
372 int t1 = (lag > 0)? lag : 0;
373 int t2 = dset->n - 1;
374 int t, s;
375
376 for (t=0; t<dset->n; t++) {
377 xlag[t] = NADBL;
378 }
379
380 for (t=t1; t<=t2; t++) {
381 s = t - lag;
382 if (dataset_is_panel(dset) &&
383 t / dset->pd != s / dset->pd) {
384 continue;
385 }
386 if (s >= 0 && s < dset->n) {
387 xlag[t] = x[s];
388 }
389 }
390
391 return 0;
392 }
393
394 /* write log of variable v into logvec */
395
get_log(int v,double * logvec,const DATASET * dset)396 static int get_log (int v, double *logvec, const DATASET *dset)
397 {
398 double xx;
399 int t, err = 0;
400
401 for (t=dset->t1; t<=dset->t2 && !err; t++) {
402 xx = dset->Z[v][t];
403 if (na(xx) || xx <= 0.0) {
404 logvec[t] = NADBL;
405 set_gretl_warning(W_GENMISS);
406 } else {
407 logvec[t] = log(xx);
408 }
409 }
410
411 return err;
412 }
413
414 /* write some sort of difference of variable v into diffvec */
415
get_diff(int v,double * diffvec,int ci,const DATASET * dset)416 static int get_diff (int v, double *diffvec, int ci,
417 const DATASET *dset)
418 {
419 double x0, x1;
420 int t, t0, t1;
421
422 t0 = (ci == SDIFF)? dset->pd : 1;
423 t1 = (dset->t1 > t0)? dset->t1 : t0;
424
425 for (t=t1; t<=dset->t2; t++) {
426 if (dset->structure == STACKED_TIME_SERIES &&
427 panel_unit_first_obs(t, dset)) {
428 continue;
429 }
430
431 x0 = dset->Z[v][t];
432 x1 = dset->Z[v][t - t0];
433
434 if (ci == LDIFF) {
435 if (!na(x0) && !na(x1) && x0 > 0 && x1 > 0) {
436 diffvec[t] = log(x0) - log(x1);
437 }
438 } else {
439 if (!na(x0) && !na(x1)) {
440 diffvec[t] = x0 - x1;
441 }
442 }
443 }
444
445 return 0;
446 }
447
448 /* write a matrix containing high-frequency differences of
449 the series in @list over the current sample range
450 */
451
get_hf_diffs(const int * list,int ci,double mult,const DATASET * dset,int * err)452 static gretl_matrix *get_hf_diffs (const int *list,
453 int ci, double mult,
454 const DATASET *dset,
455 int *err)
456 {
457 gretl_matrix *dX = NULL;
458 double **Z = dset->Z;
459 double x0, x1, dti;
460 int n = list[0];
461 int v1 = list[1];
462 int i, vi, s, t, T;
463
464 T = dset->t2 - dset->t1 + 1;
465 dX = gretl_zero_matrix_new(T, n);
466
467 if (dX == NULL) {
468 *err = E_ALLOC;
469 return NULL;
470 }
471
472 for (t=T-1; t>=0; t--) {
473 s = t + dset->t1;
474 for (i=0; i<n; i++) {
475 vi = list[i+1];
476 x0 = Z[vi][s];
477 if (i < n-1) {
478 x1 = Z[list[i+2]][s];
479 } else if (s > 0) {
480 x1 = Z[v1][s-1];
481 } else {
482 x1 = NADBL;
483 }
484 dti = NADBL;
485 if (!na(x0) && !na(x1)) {
486 if (ci == DIFF) {
487 dti = mult * (x0 - x1);
488 } else if (x0 > 0 && x1 > 0) {
489 dti = mult * log(x0/x1);
490 }
491 }
492 gretl_matrix_set(dX, t, i, dti);
493 }
494 }
495
496 return dX;
497 }
498
499 /* orthogonal deviations */
500
get_orthdev(int v,double * xvec,const DATASET * dset)501 static int get_orthdev (int v, double *xvec, const DATASET *dset)
502 {
503 return orthdev_series(dset->Z[v], xvec, dset);
504 }
505
506 /* write square or cross-product into xvec */
507
get_xpx(int vi,int vj,double * xvec,const DATASET * dset)508 static int get_xpx (int vi, int vj, double *xvec, const DATASET *dset)
509 {
510 double xit, xjt;
511 int t;
512
513 for (t=dset->t1; t<=dset->t2; t++) {
514 xit = dset->Z[vi][t];
515 xjt = dset->Z[vj][t];
516 if (na(xit) || na(xjt)) {
517 xvec[t] = NADBL;
518 } else {
519 xvec[t] = xit * xjt;
520 }
521 }
522
523 return 0;
524 }
525
526 /* write reciprocal into xvec */
527
get_inverse(int v,double * xvec,const DATASET * dset)528 static int get_inverse (int v, double *xvec, const DATASET *dset)
529 {
530 double xt;
531 int t;
532
533 for (t=dset->t1; t<=dset->t2; t++) {
534 xt = dset->Z[v][t];
535 if (na(xt) || xt == 0.0) {
536 xvec[t] = NADBL;
537 } else {
538 xvec[t] = 1.0 / xt;
539 }
540 }
541
542 return 0;
543 }
544
545 /* write standardized series v into targ */
546
get_std(int v,int dfc,double * targ,const DATASET * dset)547 static int get_std (int v, int dfc, double *targ, const DATASET *dset)
548 {
549 double *x = dset->Z[v];
550
551 return standardize_series(x, targ, dfc, dset);
552 }
553
554 /* write resampled series into rsvec */
555
get_resampled(int v,double * rsvec,const DATASET * dset,const int * z)556 static int get_resampled (int v, double *rsvec,
557 const DATASET *dset,
558 const int *z)
559 {
560 const double *x = dset->Z[v];
561 int t, j, s;
562
563 if (dataset_is_panel(dset)) {
564 int n = panel_sample_size(dset);
565 int i, T = dset->pd;
566
567 s = dset->t1;
568 for (i=0; i<n; i++) {
569 j = z[i] * T;
570 for (t=0; t<T; t++) {
571 rsvec[s++] = x[j + t];
572 }
573 }
574 } else {
575 s = 0;
576 for (t=dset->t1; t<=dset->t2; t++) {
577 j = z[s++];
578 rsvec[t] = x[j];
579 }
580 }
581
582 return 0;
583 }
584
585 /* write dummy for (v == value) into xvec */
586
get_discdum(int v,double val,double * xvec,const DATASET * dset)587 static int get_discdum (int v, double val, double *xvec,
588 const DATASET *dset)
589 {
590 double xt;
591 int t;
592
593 for (t=dset->t1; t<=dset->t2; t++) {
594 xt = dset->Z[v][t];
595 if (na(xt)) {
596 xvec[t] = NADBL;
597 } else {
598 xvec[t] = (xt == val)? 1.0 : 0.0;
599 }
600 }
601
602 return 0;
603 }
604
605 enum transform_results {
606 VAR_ADDED_OK,
607 VAR_EXISTS_OK,
608 VAR_ADD_FAILED,
609 VARNAME_DUPLICATE
610 };
611
612 /* Note: new behavior as of 2008-02-11 */
613
614 #define TR_OVERWRITE 1
615
transform_handle_duplicate(int ci,int lag,int v,const double * x,const char * label,DATASET * dset,int parent,int origv)616 static int transform_handle_duplicate (int ci, int lag, int v,
617 const double *x,
618 const char *label,
619 DATASET *dset,
620 int parent,
621 int origv)
622 {
623 const char *vlabel = series_get_label(dset, v);
624 int ret = VARNAME_DUPLICATE;
625 int no_overwrite = 0;
626 int t, ok = 0;
627
628 if (vlabel != NULL && *label && *vlabel) {
629 if (!strcmp(label, vlabel)) {
630 /* labels identical, so OK? */
631 ok = 1;
632 } else {
633 /* labels differ */
634 no_overwrite = 1;
635 }
636 }
637
638 if (!no_overwrite) {
639 int vp = series_get_parent_id(dset, v);
640
641 no_overwrite = parent != vp;
642 }
643
644 #if TR_OVERWRITE
645 if (!ok && v < origv && !no_overwrite) {
646 ok = 1;
647 }
648 #endif
649
650 if (ok) {
651 #if TRDEBUG
652 fprintf(stderr, "transform_handle_duplicate: updating var %d (%s)\n",
653 v, dset->varname[v]);
654 #endif
655 for (t=0; t<dset->n; t++) {
656 dset->Z[v][t] = x[t];
657 }
658 if (*label != '\0') {
659 series_set_label(dset, v, label);
660 }
661 series_set_transform(dset, v, ci);
662 series_set_lag(dset, v, lag);
663 series_zero_flags(dset, v);
664 ret = VAR_EXISTS_OK;
665 }
666
667 #if TRDEBUG
668 if (!ok) {
669 fprintf(stderr, "transform_handle_duplicate: collision with var %d (%s)\n",
670 v, dset->varname[v]);
671 }
672 #endif
673
674 return ret;
675 }
676
677 static int
check_add_transform(int ci,int lag,int vnum,const double * x,const char * vname,const char * label,DATASET * dset,int parent,int origv)678 check_add_transform (int ci, int lag, int vnum, const double *x,
679 const char *vname, const char *label,
680 DATASET *dset, int parent, int origv)
681 {
682 int t, ret = VAR_ADDED_OK;
683
684 if (vnum < dset->v) {
685 /* a variable of this name already exists */
686 int chk = check_vals(x, dset->Z[vnum], dset->n);
687
688 /* heuristic: we'll assume that if the variables have the same
689 name, and differ only in respect of one being more complete
690 than the other (having valid values where the other has
691 missing values), then we should not create a new variable,
692 but rather use the more complete series.
693
694 Watch out for cases where this is not the desired behavior!
695 */
696
697 if (chk == VARS_IDENTICAL) {
698 ret = VAR_EXISTS_OK;
699 } else if (chk == NEW_HAS_MISSING) {
700 /* is this right? */
701 ret = VAR_EXISTS_OK;
702 } else if (chk == OLD_HAS_MISSING) {
703 for (t=0; t<dset->n; t++) {
704 dset->Z[vnum][t] = x[t];
705 }
706 ret = VAR_EXISTS_OK;
707 } else {
708 ret = transform_handle_duplicate(ci, lag, vnum, x, label, dset,
709 parent, origv);
710 }
711 } else {
712 /* no var of this name, working from scratch */
713 if (dataset_add_series(dset, 1)) {
714 ret = VAR_ADD_FAILED;
715 } else {
716 strcpy(dset->varname[vnum], vname);
717 series_set_label(dset, vnum, label);
718 for (t=0; t<dset->n; t++) {
719 dset->Z[vnum][t] = x[t];
720 }
721 }
722 }
723
724 return ret;
725 }
726
727 #define RESPECT_LAG_LEVEL 1
728
get_lag_ID(int srcv,int lag,const DATASET * dset)729 static int get_lag_ID (int srcv, int lag, const DATASET *dset)
730 {
731 #if RESPECT_LAG_LEVEL
732 int fd = gretl_function_depth();
733 #endif
734 const char *parent, *vname = dset->varname[srcv];
735 int i;
736
737 for (i=1; i<dset->v; i++) {
738 #if RESPECT_LAG_LEVEL
739 if (series_get_stack_level(dset, i) != fd) {
740 continue;
741 }
742 #endif
743 if (lag == series_get_lag(dset, i)) {
744 parent = series_get_parent_name(dset, i);
745 if (parent != NULL && !strcmp(vname, parent)) {
746 return i;
747 }
748 }
749 }
750
751 return -1;
752 }
753
754 /* get_transform: create specified transformation of variable v if
755 this variable does not already exist.
756
757 The dummies resulting from DUMMIFY are automatically marked as
758 discrete.
759
760 origv is the number of variables in the dataset prior to the
761 current round of adding.
762
763 Return the ID number of the transformed var, or -1 on error.
764 */
765
get_transform(int ci,int v,int aux,double x,DATASET * dset,int startlen,int origv,int * idxvec)766 static int get_transform (int ci, int v, int aux, double x,
767 DATASET *dset, int startlen, int origv,
768 int *idxvec)
769 {
770 char vname[VNAMELEN] = {0};
771 char label[MAXLABEL] = {0};
772 int vno = -1, err = 0;
773 int len, lag = 0;
774 const char *srcname;
775 double *vx;
776
777 vx = testvec(dset->n);
778 if (vx == NULL) {
779 return -1;
780 }
781
782 if (ci == LAGS) {
783 lag = aux;
784 err = get_lag(v, lag, vx, dset);
785 } else if (ci == LOGS) {
786 err = get_log(v, vx, dset);
787 } else if (ci == DIFF || ci == LDIFF || ci == SDIFF) {
788 err = get_diff(v, vx, ci, dset);
789 } else if (ci == ORTHDEV) {
790 err = get_orthdev(v, vx, dset);
791 } else if (ci == SQUARE) {
792 /* "aux" = second variable number */
793 err = get_xpx(v, aux, vx, dset);
794 } else if (ci == DUMMIFY) {
795 /* "x" = value for dummy */
796 err = get_discdum(v, x, vx, dset);
797 } else if (ci == INVERSE) {
798 err = get_inverse(v, vx, dset);
799 } else if (ci == STDIZE) {
800 err = get_std(v, aux, vx, dset);
801 } else if (ci == RESAMPLE) {
802 err = get_resampled(v, vx, dset, idxvec);
803 }
804
805 if (err) {
806 return -1;
807 }
808
809 if (ci == LAGS && (vno = get_lag_ID(v, aux, dset)) > 0) {
810 /* special case: pre-existing lag */
811 err = check_add_transform(ci, lag, vno, vx, vname, label, dset, v, origv);
812 if (err != VAR_EXISTS_OK) {
813 vno = -1;
814 }
815 return vno;
816 }
817
818 if (ci == DUMMIFY || (ci == SQUARE && v != aux)) {
819 /* special cases in respect of labeling */
820 gchar *s = NULL;
821
822 if (ci == SQUARE && v != aux) {
823 s = g_strdup_printf(_("= %s times %s"), dset->varname[v],
824 dset->varname[aux]);
825 } else if (is_string_valued(dset, v)) {
826 const char *sval = series_get_string_for_value(dset, v, x);
827
828 if (sval != NULL && *sval != '\0') {
829 s = g_strdup_printf(_("dummy for %s = '%s'"), dset->varname[v], sval);
830 } else {
831 s = g_strdup_printf(_("dummy for %s = %g"), dset->varname[v], x);
832 }
833 } else {
834 s = g_strdup_printf(_("dummy for %s = %g"), dset->varname[v], x);
835 }
836 strcpy(label, gretl_utf8_truncate(s, MAXLABEL-1));
837 g_free(s);
838 } else {
839 make_transform_label(label, dset->varname[v], ci, aux);
840 }
841
842 srcname = get_mangled_name_by_id(v);
843 if (srcname == NULL) {
844 srcname = dset->varname[v];
845 }
846
847 #if TRDEBUG
848 fprintf(stderr, "srcname = '%s', startlen = %d, v = %d\n",
849 srcname, startlen, v);
850 #endif
851
852 for (len=startlen; len<=VNAMELEN; len++) {
853 if (*vname != '\0' && len == VNAMELEN) {
854 /* last resort: hack the name */
855 make_varname_unique(vname, 0, dset);
856 } else if (ci == SQUARE && v != aux) {
857 make_xp_varname(vname, v, aux, dset, len);
858 } else if (ci == DUMMIFY && series_is_integer_valued(dset, v)) {
859 make_transform_varname(vname, srcname, ci, (int) x, len);
860 } else {
861 make_transform_varname(vname, srcname, ci, aux, len);
862 }
863 vno = series_index(dset, vname);
864 err = check_add_transform(ci, lag, vno, vx, vname, label, dset, v, origv);
865 if (err != VAR_EXISTS_OK && err != VAR_ADDED_OK) {
866 vno = -1;
867 }
868 if (err != VARNAME_DUPLICATE) {
869 /* either we're OK, or there was a fatal error */
870 break;
871 }
872 }
873
874 if (!err && vno > 0) {
875 if (ci == DUMMIFY) {
876 series_set_parent(dset, vno, dset->varname[v]);
877 series_set_transform(dset, vno, ci);
878 series_set_discrete(dset, vno, 1);
879 } else if (ci == LAGS || ci == DIFF || ci == LOGS) {
880 series_set_parent(dset, vno, dset->varname[v]);
881 series_set_transform(dset, vno, ci);
882 }
883
884 if (ci == LAGS) {
885 series_set_lag(dset, vno, aux);
886 } else {
887 series_set_lag(dset, vno, 0);
888 }
889 }
890
891 return vno;
892 }
893
894 /**
895 * laggenr:
896 * @v: ID number in dataset of source variable.
897 * @lag: the order of the lag to create.
898 * @dset: dataset struct.
899 *
900 * Creates the specified lag of variable @v if this variable does
901 * not already exist.
902 *
903 * Returns: the ID number of the lagged variable, or -1 on error.
904 */
905
laggenr(int v,int lag,DATASET * dset)906 int laggenr (int v, int lag, DATASET *dset)
907 {
908 int lno;
909
910 if (lag > dset->n || -lag > dset->n) {
911 gretl_errmsg_sprintf(_("Invalid lag order %d"), lag);
912 lno = -1;
913 } else if (v == 0) {
914 gretl_errmsg_set(_("The constant cannot be lagged"));
915 lno = -1;
916 } else if (lag == 0) {
917 lno = v;
918 } else {
919 lno = get_transform(LAGS, v, lag, 0.0, dset,
920 VNAMELEN - 3, dset->v, NULL);
921 }
922
923 return lno;
924 }
925
926 /**
927 * laggenr_from_to:
928 * @v: ID number in dataset of source variable.
929 * @fromlag: start of lag/lead range.
930 * @tolag: end of lag/lead range.
931 * @dset: dataset struct.
932 * @err: location to receive error code.
933 *
934 * Creates the specified lags of variable @v if they do not
935 * already exist.
936 *
937 * Returns: list of lag variables, or NULL or on error.
938 */
939
laggenr_from_to(int v,int fromlag,int tolag,DATASET * dset,int * err)940 int *laggenr_from_to (int v, int fromlag, int tolag,
941 DATASET *dset, int *err)
942 {
943 int *llist;
944 int i, lv, nlags = -1;
945 int p, count_down = 0;
946
947 if (fromlag > tolag) {
948 nlags = fromlag - tolag + 1;
949 count_down = 1;
950 } else {
951 nlags = tolag - fromlag + 1;
952 }
953
954 if (nlags <= 0) {
955 *err = E_DATA;
956 return NULL;
957 }
958
959 llist = gretl_list_new(nlags);
960 if (llist == NULL) {
961 *err = E_ALLOC;
962 return NULL;
963 }
964
965 p = fromlag;
966
967 for (i=0; i<nlags; i++) {
968 lv = laggenr(v, -p, dset);
969 if (lv < 0) {
970 *err = E_DATA;
971 free(llist);
972 llist = NULL;
973 break;
974 }
975 llist[i+1] = lv;
976 if (count_down) {
977 p--;
978 } else {
979 p++;
980 }
981 }
982
983 return llist;
984 }
985
986 /**
987 * loggenr:
988 * @v: ID number in dataset of source variable.
989 * @dset: dataset struct.
990 *
991 * Creates the natural log of variable @v if this variable does
992 * not already exist.
993 *
994 * Returns: the ID number of the log variable, or -1 on error.
995 */
996
loggenr(int v,DATASET * dset)997 int loggenr (int v, DATASET *dset)
998 {
999 return get_transform(LOGS, v, 0, 0.0, dset,
1000 VNAMELEN - 3, dset->v, NULL);
1001 }
1002
1003 /**
1004 * invgenr:
1005 * @v: ID number in dataset of source variable.
1006 * @dset: dataset struct.
1007 *
1008 * Creates the reciprocal of variable @v if this variable does
1009 * not already exist.
1010 *
1011 * Returns: the ID number of the reciprocal, or -1 on error.
1012 */
1013
invgenr(int v,DATASET * dset)1014 int invgenr (int v, DATASET *dset)
1015 {
1016 return get_transform(INVERSE, v, 0, 0.0, dset,
1017 VNAMELEN - 3, dset->v, NULL);
1018 }
1019
1020 /**
1021 * diffgenr:
1022 * @v: ID number in dataset of source variable.
1023 * @ci: DIFF (first difference), LDIFF (log difference) or SDIFF
1024 * (seasonal difference).
1025 * @dset: dataset struct.
1026 *
1027 * Creates the first difference (or log- or seasonal difference,
1028 * depending on the value of @ci) of variable @v, if the
1029 * differenced variable does not already exist.
1030 *
1031 * Returns: the ID number of the differenced variable, or -1 on error.
1032 */
1033
diffgenr(int v,int ci,DATASET * dset)1034 int diffgenr (int v, int ci, DATASET *dset)
1035 {
1036 if (ci != DIFF && ci != LDIFF && ci != SDIFF) {
1037 return -1;
1038 }
1039
1040 if (ci == SDIFF && !dataset_is_seasonal(dset)) {
1041 return -1;
1042 }
1043
1044 return get_transform(ci, v, 0, 0.0, dset,
1045 VNAMELEN - 3, dset->v, NULL);
1046 }
1047
1048 /**
1049 * xpxgenr:
1050 * @vi: ID number in dataset of first source variable.
1051 * @vj: ID number in dataset of second source variable.
1052 * @dset: dataset struct.
1053 *
1054 * Creates the cross product of variables @vi and @vj if this
1055 * variable does not already exist.
1056 *
1057 * Returns: the ID number of the cross-product variable,
1058 * or -1 on error.
1059 */
1060
xpxgenr(int vi,int vj,DATASET * dset)1061 int xpxgenr (int vi, int vj, DATASET *dset)
1062 {
1063 if (vi == vj) {
1064 if (gretl_isdummy(dset->t1, dset->t2, dset->Z[vi])) {
1065 return -1;
1066 }
1067 }
1068
1069 return get_transform(SQUARE, vi, vj, 0.0, dset,
1070 VNAMELEN - 3, dset->v, NULL);
1071 }
1072
1073 static int
get_starting_length(const int * list,DATASET * dset,int trim)1074 get_starting_length (const int *list, DATASET *dset, int trim)
1075 {
1076 int width = VNAMELEN - 3 - trim;
1077 const char *vni, *vnj;
1078 int len, maxlen = 0;
1079 int conflict = 0;
1080 int i, j;
1081
1082 if (list[0] == 1) {
1083 return VNAMELEN - 1;
1084 }
1085
1086 for (i=1; i<=list[0]; i++) {
1087 len = strlen(dset->varname[list[i]]);
1088 if (len > maxlen) {
1089 maxlen = len;
1090 }
1091 }
1092
1093 if (maxlen <= width) {
1094 /* no problem: generated names will fit in VNAMELEN - 3 chars */
1095 return VNAMELEN - 3;
1096 }
1097
1098 for (len=width; len<=maxlen; len++) {
1099 conflict = 0;
1100 for (i=1; i<=list[0] && !conflict; i++) {
1101 vni = dset->varname[list[i]];
1102 for (j=i+1; j<=list[0] && !conflict; j++) {
1103 vnj = dset->varname[list[j]];
1104 if (!strncmp(vni, vnj, len)) {
1105 conflict = 1;
1106 }
1107 }
1108 }
1109 if (!conflict) {
1110 break;
1111 }
1112 }
1113
1114 len += trim;
1115
1116 if (len < VNAMELEN - 3) {
1117 len = VNAMELEN - 3;
1118 } else if (len > VNAMELEN - 1) {
1119 len = VNAMELEN - 1;
1120 }
1121
1122 return len;
1123 }
1124
1125 struct mangled_name {
1126 int v;
1127 char *s;
1128 };
1129
1130 static struct mangled_name *mnames;
1131 static int n_mnames;
1132
destroy_mangled_names(void)1133 static void destroy_mangled_names (void)
1134 {
1135 int i;
1136
1137 for (i=0; i<n_mnames; i++) {
1138 free(mnames[i].s);
1139 }
1140 free(mnames);
1141 mnames = NULL;
1142 n_mnames = 0;
1143 }
1144
push_mangled_name(int v,const char * s)1145 static int push_mangled_name (int v, const char *s)
1146 {
1147 static struct mangled_name *tmp;
1148
1149 tmp = realloc(mnames, (n_mnames + 1) * sizeof *tmp);
1150
1151 if (tmp == NULL) {
1152 destroy_mangled_names();
1153 return E_ALLOC;
1154 }
1155
1156 mnames = tmp;
1157 mnames[n_mnames].v = v;
1158 mnames[n_mnames].s = gretl_strdup(s);
1159
1160 n_mnames++;
1161
1162 return 0;
1163 }
1164
get_mangled_name_by_id(int v)1165 static char *get_mangled_name_by_id (int v)
1166 {
1167 int i;
1168
1169 for (i=0; i<n_mnames; i++) {
1170 if (mnames[i].v == v) {
1171 return mnames[i].s;
1172 }
1173 }
1174
1175 return NULL;
1176 }
1177
make_mangled_name(int v,const char * s,int nc)1178 static int make_mangled_name (int v, const char *s, int nc)
1179 {
1180 char tmp[16], sfx[16];
1181 int n, err;
1182
1183 if (get_mangled_name_by_id(v)) {
1184 return 0;
1185 }
1186
1187 *tmp = *sfx = '\0';
1188 /* we know the bit out here must be distinct? */
1189 strcat(sfx, s + nc);
1190 n = strlen(sfx);
1191 strncat(tmp, s, nc - n);
1192 strcat(tmp, sfx);
1193
1194 #if TRDEBUG
1195 fprintf(stderr, "mangled name: '%s' -> '%s'\n", s, tmp);
1196 #endif
1197
1198 err = push_mangled_name(v, tmp);
1199
1200 return err;
1201 }
1202
1203 static int
transform_preprocess_list(int * list,const DATASET * dset,int f)1204 transform_preprocess_list (int *list, const DATASET *dset, int f)
1205 {
1206 int maxc = VNAMELEN - 3;
1207 int longnames = 0;
1208 char **S = NULL;
1209 int i, v, ok;
1210 int err = 0;
1211
1212 if (f == SQUARE || f == LDIFF || f == SDIFF || f == RESAMPLE) {
1213 /* 3-character prefixes */
1214 maxc--;
1215 } else if (f == DUMMIFY) {
1216 /* "D" plus suffix */
1217 maxc--;
1218 }
1219
1220 for (i=1; i<=list[0]; i++) {
1221 v = list[i];
1222 ok = 1;
1223
1224 if (f == LOGS || f == SQUARE) {
1225 if (v == 0) { /* FIXME?? */
1226 ok = 1;
1227 }
1228 if (gretl_isdummy(dset->t1, dset->t2, dset->Z[v])) {
1229 ok = 0;
1230 }
1231 } else if (f == LAGS) {
1232 if (v == 0) {
1233 gretl_errmsg_set(_("The constant cannot be lagged"));
1234 ok = 0;
1235 }
1236 } else if (f == DUMMIFY) {
1237 ok = v > 0 && accept_as_discrete(dset, v, 0);
1238 }
1239
1240 if (!ok) {
1241 gretl_list_delete_at_pos(list, i--);
1242 } else if (strlen(dset->varname[v]) > maxc) {
1243 strings_array_add(&S, &longnames, dset->varname[v]);
1244 }
1245 }
1246
1247 if (list[0] == 0) {
1248 err = E_TYPES;
1249 }
1250
1251 if (longnames > 0) {
1252 if (!err) {
1253 int j, herr = 0;
1254
1255 for (i=0; i<longnames && !herr; i++) {
1256 for (j=i+1; j<longnames && !herr; j++) {
1257 if (!strncmp(S[i], S[j], maxc)) {
1258 v = series_index(dset, S[i]);
1259 herr = make_mangled_name(v, S[i], maxc);
1260 v = series_index(dset, S[j]);
1261 herr += make_mangled_name(v, S[j], maxc);
1262 }
1263 }
1264 }
1265 }
1266
1267 strings_array_free(S, longnames);
1268 }
1269
1270 return err;
1271 }
1272
1273 /**
1274 * list_loggenr:
1275 * @list: on entry, list of variables to process; on exit,
1276 * holds the ID numbers of the generated variables.
1277 * @dset: dataset struct.
1278 *
1279 * Generates and adds to the data set the natural logs of the
1280 * variables given in @list.
1281 *
1282 * Returns: 0 on success, error code on error.
1283 */
1284
list_loggenr(int * list,DATASET * dset)1285 int list_loggenr (int *list, DATASET *dset)
1286 {
1287 int origv = dset->v;
1288 int tnum, i, j, v;
1289 int startlen;
1290 int l0 = 0;
1291 int err;
1292
1293 err = transform_preprocess_list(list, dset, LOGS);
1294 if (err) {
1295 return err;
1296 }
1297
1298 startlen = get_starting_length(list, dset, 2);
1299
1300 j = 1;
1301 for (i=1; i<=list[0]; i++) {
1302 v = list[i];
1303 tnum = get_transform(LOGS, v, 0, 0.0, dset, startlen,
1304 origv, NULL);
1305 if (tnum > 0) {
1306 list[j++] = tnum;
1307 l0++;
1308 }
1309 }
1310
1311 list[0] = l0;
1312
1313 destroy_mangled_names();
1314
1315 return (l0 > 0)? 0 : E_LOGS;
1316 }
1317
1318 /**
1319 * list_stdgenr:
1320 * @list: on entry, list of variables to process; on exit,
1321 * holds the ID numbers of the generated variables.
1322 * @dset: dataset struct.
1323 *
1324 * Generates and adds to the dataset standardized versions
1325 * of the series given in @list.
1326 *
1327 * Returns: 0 on success, error code on error.
1328 */
1329
list_stdgenr(int * list,DATASET * dset,gretlopt opt)1330 int list_stdgenr (int *list, DATASET *dset, gretlopt opt)
1331 {
1332 int origv = dset->v;
1333 int tnum, i, j, v;
1334 int startlen;
1335 int dfc = 1;
1336 int l0 = 0;
1337 int err;
1338
1339 err = transform_preprocess_list(list, dset, STDIZE);
1340 if (err) {
1341 return err;
1342 }
1343
1344 if (opt & OPT_C) {
1345 dfc = -1;
1346 } else if (opt & OPT_N) {
1347 dfc = 0;
1348 }
1349
1350 startlen = get_starting_length(list, dset, 2);
1351
1352 j = 1;
1353 for (i=1; i<=list[0]; i++) {
1354 v = list[i];
1355 tnum = get_transform(STDIZE, v, dfc, 0.0, dset, startlen,
1356 origv, NULL);
1357 if (tnum > 0) {
1358 list[j++] = tnum;
1359 l0++;
1360 }
1361 }
1362
1363 list[0] = l0;
1364
1365 destroy_mangled_names();
1366
1367 return (l0 > 0)? 0 : E_DATA;
1368 }
1369
make_lags_list(int * list,int order)1370 static int *make_lags_list (int *list, int order)
1371 {
1372 int i, v, nl = 0;
1373
1374 for (i=1; i<=list[0]; i++) {
1375 v = list[i];
1376 if (v > 0) {
1377 nl += order;
1378 }
1379 }
1380
1381 return gretl_list_new(nl);
1382 }
1383
lag_wanted(int p,const gretl_matrix * v,int n)1384 static int lag_wanted (int p, const gretl_matrix *v, int n)
1385 {
1386 int ret = 1;
1387
1388 if (v != NULL) {
1389 int i;
1390
1391 ret = 0; /* reverse the assumption */
1392 for (i=0; i<n; i++) {
1393 if (v->val[i] == p) {
1394 ret = 1;
1395 break;
1396 }
1397 }
1398 }
1399
1400 return ret;
1401 }
1402
process_hf_lags_input(int hf_min,int hf_max,DATASET * dset,int compfac,int * lf_min,int * lf_max,int * skip_first,int * n_terms)1403 static int process_hf_lags_input (int hf_min, int hf_max,
1404 DATASET *dset,
1405 int compfac,
1406 int *lf_min, int *lf_max,
1407 int *skip_first,
1408 int *n_terms)
1409 {
1410 if (dset->pd != 1 && dset->pd != 12 && dset->pd != 4) {
1411 return E_PDWRONG;
1412 }
1413
1414 *lf_min = (int) ceil(hf_min / (double) compfac);
1415 *lf_max = (int) ceil(hf_max / (double) compfac);
1416 *skip_first = (hf_min - 1) % compfac;
1417 *n_terms = hf_max - hf_min + 1;
1418
1419 if (*skip_first < 0) {
1420 /* handle leads */
1421 *skip_first = compfac + *skip_first;
1422 }
1423
1424 #if 0
1425 fprintf(stderr, "hf_min = %d, lf_min = %d\n", hf_min, *lf_min);
1426 fprintf(stderr, "hf_max = %d, lf_max = %d\n", hf_max, *lf_max);
1427 fprintf(stderr, "skip_first = %d\n", *skip_first);
1428 fprintf(stderr, "n_terms = %d\n", *n_terms);
1429 #endif
1430
1431 return 0;
1432 }
1433
1434 /**
1435 * list_laggenr:
1436 * @plist: on entry, pointer to list of variables to process. On exit
1437 * the list holds the ID numbers of the lag variables.
1438 * @lmin: minimum lag to include (defaults to 1).
1439 * @lmax: maximum lag to include (or 0 for automatic).
1440 * @lvec: (alternative to @lmin, @lmax) vector holding lags to generate.
1441 * @dset: dataset struct.
1442 * @compfac: compaction factor (for MIDAS lists only, otherwise give 0).
1443 * @opt: may contain OPT_L to order the list by lag rather than
1444 * by variable.
1445 *
1446 * Generates and adds to the data set lagged values of the
1447 * variables given in the list pointed to by @plist.
1448 *
1449 * Returns: 0 on successful completion, 1 on error.
1450 */
1451
list_laggenr(int ** plist,int lmin,int lmax,const gretl_matrix * lvec,DATASET * dset,int compfac,gretlopt opt)1452 int list_laggenr (int **plist, int lmin, int lmax,
1453 const gretl_matrix *lvec,
1454 DATASET *dset, int compfac,
1455 gretlopt opt)
1456 {
1457 int origv = dset->v;
1458 int *list = *plist;
1459 int *laglist = NULL;
1460 int l, i, j, v, lv;
1461 int startlen, l0 = 0;
1462 int skip_first = 0;
1463 int veclen = 0;
1464 int n_terms = 0;
1465 int err = 0;
1466
1467 if (compfac < 0) {
1468 return E_INVARG;
1469 } else if (compfac > 0) {
1470 /* MIDAS: we want @lmin and @lmax, not @lvec */
1471 if (lvec != NULL) {
1472 return E_INVARG;
1473 }
1474 if (!gretl_is_midas_list(list, dset)) {
1475 gretl_warnmsg_set(_("The argument does not seem to be a MIDAS list"));
1476 }
1477 }
1478
1479 if (lvec != NULL) {
1480 /* got a vector of lags in @lvec */
1481 veclen = gretl_vector_get_length(lvec);
1482 if (veclen == 0) {
1483 err = E_INVARG;
1484 } else {
1485 lmin = lvec->val[0];
1486 lmax = lvec->val[veclen-1];
1487 n_terms = veclen;
1488 }
1489 } else if (compfac > 0) {
1490 /* the MIDAS case */
1491 int hmin = lmin, hmax = lmax;
1492
1493 if (hmax < hmin) {
1494 err = E_INVARG;
1495 } else {
1496 err = process_hf_lags_input(hmin, hmax, dset, compfac, &lmin,
1497 &lmax, &skip_first, &n_terms);
1498 }
1499 } else {
1500 /* non-MIDAS, using order = @lmax, ignoring @lmin */
1501 if (lmax < 0 || lmax > dset->n) {
1502 gretl_errmsg_sprintf(_("Invalid lag order %d"), lmax);
1503 return E_INVARG;
1504 } else if (lmax == 0) {
1505 lmax = default_lag_order(dset);
1506 }
1507 lmin = 1;
1508 n_terms = lmax;
1509 }
1510
1511 if (err) {
1512 return err;
1513 }
1514
1515 /* Note: at this point @lmin and @lmax are defined in terms of
1516 the frequency of @dset, even when we're handling MIDAS data.
1517 In the regular case, @n_terms holds the number of lag terms
1518 wanted per series in the input @list, but in the MIDAS case
1519 @n_terms is the total number of hf lag terms wanted (since
1520 the input @list actually represents a single series).
1521 */
1522
1523 err = transform_preprocess_list(list, dset, LAGS);
1524 if (err) {
1525 return err;
1526 }
1527
1528 if (compfac) {
1529 laglist = gretl_list_new(n_terms);
1530 } else {
1531 laglist = make_lags_list(list, n_terms);
1532 }
1533
1534 if (laglist == NULL) {
1535 destroy_mangled_names();
1536 return E_ALLOC;
1537 }
1538
1539 startlen = get_starting_length(list, dset, (lmax > 9)? 3 : 2);
1540
1541 j = 1;
1542
1543 if (compfac > 0) {
1544 /* MIDAS high-frequency lags, by lags */
1545 int hfpmax = n_terms + skip_first;
1546 int mp, hfp = 0;
1547
1548 for (l=lmin; l<=lmax; l++) {
1549 for (i=1; i<=list[0]; i++) {
1550 hfp++;
1551 if (hfp <= skip_first) {
1552 continue;
1553 } else if (hfp > hfpmax) {
1554 break;
1555 }
1556 v = list[i];
1557 lv = get_transform(LAGS, v, l, 0.0, dset,
1558 startlen, origv, NULL);
1559 if (lv > 0) {
1560 mp = series_get_midas_period(dset, v);
1561 series_set_midas_period(dset, lv, mp);
1562 mp = series_get_midas_freq(dset, v);
1563 series_set_midas_freq(dset, lv, mp);
1564 laglist[j++] = lv;
1565 l0++;
1566 }
1567 }
1568 }
1569 } else if (opt & OPT_L) {
1570 /* order the list by lags */
1571 for (l=lmin; l<=lmax; l++) {
1572 if (!lag_wanted(l, lvec, veclen)) {
1573 continue;
1574 }
1575 for (i=1; i<=list[0]; i++) {
1576 v = list[i];
1577 lv = get_transform(LAGS, v, l, 0.0, dset,
1578 startlen, origv, NULL);
1579 if (lv > 0) {
1580 laglist[j++] = lv;
1581 l0++;
1582 }
1583 }
1584 }
1585 } else {
1586 /* order by variable */
1587 for (i=1; i<=list[0]; i++) {
1588 v = list[i];
1589 for (l=lmin; l<=lmax; l++) {
1590 if (!lag_wanted(l, lvec, veclen)) {
1591 continue;
1592 }
1593 lv = get_transform(LAGS, v, l, 0.0, dset,
1594 startlen, origv, NULL);
1595 if (lv > 0) {
1596 laglist[j++] = lv;
1597 l0++;
1598 }
1599 }
1600 }
1601 }
1602
1603 destroy_mangled_names();
1604
1605 laglist[0] = l0;
1606
1607 free(*plist);
1608 *plist = laglist;
1609
1610 return 0;
1611 }
1612
1613 /**
1614 * default_lag_order:
1615 * @dset: data information struct.
1616 *
1617 * Returns: the default lag order for generating lags, performing
1618 * autocorrelation test, and so on.
1619 */
1620
default_lag_order(const DATASET * dset)1621 int default_lag_order (const DATASET *dset)
1622 {
1623 int order = 1;
1624
1625 if (!dataset_is_panel(dset)) {
1626 order = (dset->pd < 52)? dset->pd : 14;
1627 }
1628
1629 return order;
1630 }
1631
1632 /**
1633 * list_diffgenr:
1634 * @list: on entry, list of variables to process; on exit,
1635 * ID numbers of the generated variables.
1636 * @ci: must be DIFF, LDIFF or SDIFF.
1637 * @dset: pointer to dataset struct.
1638 *
1639 * Generate differences of the variables in @list, and add them
1640 * to the data set. If @ci is DIFF these are ordinary first
1641 * differences; if @ci is LDIFF they are log differences; and
1642 * if @ci is SDIFF they are seasonal differences.
1643 *
1644 * Returns: 0 on successful completion, non-zero on error.
1645 */
1646
list_diffgenr(int * list,int ci,DATASET * dset)1647 int list_diffgenr (int *list, int ci, DATASET *dset)
1648 {
1649 int origv = dset->v;
1650 int i, v, startlen;
1651 int tnum, l0 = 0;
1652 int err;
1653
1654 if (list[0] == 0) {
1655 return 0;
1656 }
1657
1658 if (ci != DIFF && ci != LDIFF && ci != SDIFF) {
1659 return 1;
1660 }
1661
1662 if (ci == SDIFF && !dataset_is_seasonal(dset)) {
1663 return E_PDWRONG;
1664 }
1665
1666 err = transform_preprocess_list(list, dset, ci);
1667 if (err) {
1668 return err;
1669 }
1670
1671 startlen = get_starting_length(list, dset, (ci == DIFF)? 2 : 3);
1672
1673 for (i=1; i<=list[0] && !err; i++) {
1674 v = list[i];
1675 tnum = get_transform(ci, v, 0, 0.0, dset,
1676 startlen, origv, NULL);
1677 if (tnum < 0) {
1678 err = 1;
1679 } else {
1680 list[i] = tnum;
1681 l0++;
1682 }
1683 }
1684
1685 list[0] = l0;
1686
1687 destroy_mangled_names();
1688
1689 return err;
1690 }
1691
check_hf_difflist(const int * list,const DATASET * dset,int ci,int * n_add)1692 static int check_hf_difflist (const int *list,
1693 const DATASET *dset,
1694 int ci, int *n_add)
1695 {
1696 char vname[VNAMELEN];
1697 int i, v, li, err = 0;
1698
1699 for (i=1; i<=list[0] && !err; i++) {
1700 li = list[i];
1701 make_transform_varname(vname, dset->varname[li],
1702 ci, 0, VNAMELEN - 8);
1703 v = current_series_index(dset, vname);
1704 if (v > 0) {
1705 *n_add -= 1;
1706 } else if (gretl_is_user_var(vname)) {
1707 gretl_errmsg_sprintf(_("%s: collides with existing object name"),
1708 vname);
1709 err = E_TYPES;
1710 }
1711 }
1712
1713 return err;
1714 }
1715
1716 /**
1717 * hf_list_diffgenr:
1718 * @list: on entry, midas list of variables to process; on exit,
1719 * ID numbers of the generated variables.
1720 * @ci: must be DIFF or LDIFF.
1721 * @parm: optional scalar multiplier.
1722 * @dset: pointer to dataset struct.
1723 *
1724 * Generate high-frequency differences of the variables in @list
1725 * and add them to the data set. If @ci is DIFF these are ordinary
1726 * first differences; if @ci is LDIFF they are log differences.
1727 * If @parm is not NA then the new series are all multiplied by
1728 * the specified value (as in multiplication of log-differences
1729 * by 100).
1730 *
1731 * Returns: 0 on successful completion, non-zero on error.
1732 */
1733
hf_list_diffgenr(int * list,int ci,double parm,DATASET * dset)1734 int hf_list_diffgenr (int *list, int ci, double parm, DATASET *dset)
1735 {
1736 gretl_matrix *dX = NULL;
1737 int n_add, hfci = 0;
1738 int set_midas = 1;
1739 int err = 0;
1740
1741 if (list[0] == 0) {
1742 return 0;
1743 }
1744
1745 if (ci == DIFF) {
1746 hfci = HFDIFF;
1747 } else if (ci == LDIFF) {
1748 hfci = HFLDIFF;
1749 } else {
1750 return E_INVARG;
1751 }
1752
1753 if (!dataset_is_time_series(dset)) {
1754 return E_PDWRONG;
1755 }
1756
1757 n_add = list[0];
1758 err = check_hf_difflist(list, dset, ci, &n_add);
1759 if (err) {
1760 return err;
1761 }
1762
1763 if (!gretl_is_midas_list(list, dset)) {
1764 gretl_warnmsg_set(_("The argument does not seem to be a MIDAS list"));
1765 set_midas = 0;
1766 }
1767
1768 if (na(parm)) {
1769 /* set to default */
1770 parm = 1.0;
1771 }
1772
1773 dX = get_hf_diffs(list, ci, parm, dset, &err);
1774
1775 if (!err) {
1776 char vname[VNAMELEN];
1777 char label[MAXLABEL];
1778 const char *parent;
1779 int i, s, t, n = dX->cols;
1780 int vi, v0 = dset->v;
1781 int offset = 0;
1782
1783 err = dataset_add_series(dset, n_add);
1784
1785 if (!err) {
1786 for (i=0; i<n; i++) {
1787 parent = dset->varname[list[i+1]];
1788 make_transform_varname(vname, parent, ci, 0,
1789 VNAMELEN - 8);
1790 vi = current_series_index(dset, vname);
1791 if (vi < 0) {
1792 vi = v0 + offset;
1793 offset++;
1794 strcpy(dset->varname[vi], vname);
1795 make_transform_label(label, parent, hfci, 0);
1796 series_record_label(dset, vi, label);
1797 }
1798 s = 0;
1799 for (t=dset->t1; t<=dset->t2; t++) {
1800 dset->Z[vi][t] = gretl_matrix_get(dX, s++, i);
1801 }
1802 list[i+1] = vi;
1803 }
1804 list[0] = n;
1805 }
1806 gretl_matrix_free(dX);
1807 }
1808
1809 if (!err && set_midas) {
1810 gretl_list_set_midas(list, dset);
1811 }
1812
1813 return err;
1814 }
1815
1816 /**
1817 * list_orthdev:
1818 * @list: list of variables to process.
1819 * @dset: dataset struct.
1820 *
1821 * Generate orthogonal deviations of the variables in @list, and add
1822 * them to the data set.
1823 *
1824 * Returns: 0 on success, error code on error.
1825 */
1826
list_orthdev(int * list,DATASET * dset)1827 int list_orthdev (int *list, DATASET *dset)
1828 {
1829 int origv = dset->v;
1830 int i, v, startlen;
1831 int tnum, l0 = 0;
1832 int err;
1833
1834 if (list[0] == 0) {
1835 return 0;
1836 }
1837
1838 if (!dataset_is_panel(dset)) {
1839 return E_PDWRONG;
1840 }
1841
1842 err = transform_preprocess_list(list, dset, ORTHDEV);
1843 if (err) {
1844 return err;
1845 }
1846
1847 startlen = get_starting_length(list, dset, 2);
1848
1849 for (i=1; i<=list[0] && !err; i++) {
1850 v = list[i];
1851 tnum = get_transform(ORTHDEV, v, 0, 0.0, dset,
1852 startlen, origv, NULL);
1853 if (tnum < 0) {
1854 err = 1;
1855 } else {
1856 list[i] = tnum;
1857 l0++;
1858 }
1859 }
1860
1861 list[0] = l0;
1862
1863 destroy_mangled_names();
1864
1865 return err;
1866 }
1867
1868 /**
1869 * list_xpxgenr:
1870 * @plist: pointer to list of variables to process. On exit
1871 * the list holds the ID numbers of the squares (and possibly
1872 * cross-products).
1873 * @dset: dataset struct.
1874 * @opt: If OPT_O, both squares and cross-products are generated,
1875 * otherwise only squares.
1876 *
1877 * Generates and adds to the data set squares and (if @opt is OPT_O)
1878 * cross-products of the variables given in the list pointed to
1879 * by @plist.
1880 *
1881 * Returns: 0 on success, error code on error.
1882 */
1883
list_xpxgenr(int ** plist,DATASET * dset,gretlopt opt)1884 int list_xpxgenr (int **plist, DATASET *dset, gretlopt opt)
1885 {
1886 int origv = dset->v;
1887 int *list = *plist;
1888 int *xpxlist = NULL;
1889 int tnum, i, j, k, vi, vj;
1890 int startlen, l0;
1891 int err;
1892
1893 err = transform_preprocess_list(list, dset, SQUARE);
1894 if (err) {
1895 return err;
1896 }
1897
1898 l0 = list[0];
1899
1900 if (opt & OPT_O) {
1901 int maxterms = (l0 * l0 + l0) / 2;
1902
1903 xpxlist = gretl_list_new(maxterms);
1904 if (xpxlist == NULL) {
1905 destroy_mangled_names();
1906 return E_ALLOC;
1907 }
1908 } else {
1909 xpxlist = list;
1910 }
1911
1912 startlen = get_starting_length(list, dset, 3);
1913 xpxlist[0] = 0;
1914
1915 k = 1;
1916 for (i=1; i<=l0; i++) {
1917 vi = list[i];
1918 tnum = get_transform(SQUARE, vi, vi, 0.0, dset,
1919 startlen, origv, NULL);
1920 if (tnum > 0) {
1921 xpxlist[k++] = tnum;
1922 xpxlist[0] += 1;
1923 }
1924 if (opt & OPT_O) {
1925 for (j=i+1; j<=l0; j++) {
1926 vj = list[j];
1927 tnum = xpxgenr(vi, vj, dset);
1928 if (tnum > 0) {
1929 xpxlist[k++] = tnum;
1930 xpxlist[0] += 1;
1931 }
1932 }
1933 }
1934 }
1935
1936 destroy_mangled_names();
1937
1938 if (opt & OPT_O) {
1939 free(*plist);
1940 *plist = xpxlist;
1941 }
1942
1943 return (xpxlist[0] > 0)? 0 : E_SQUARES;
1944 }
1945
list_resample(int * list,DATASET * dset)1946 int list_resample (int *list, DATASET *dset)
1947 {
1948 int origv = dset->v;
1949 int *z = NULL;
1950 int tnum, i, j, vi;
1951 int n, startlen, l0;
1952 int k1, k2;
1953 int err;
1954
1955 err = transform_preprocess_list(list, dset, RESAMPLE);
1956 if (err) {
1957 return err;
1958 }
1959
1960 if (dataset_is_panel(dset)) {
1961 k1 = dset->t1 / dset->pd;
1962 k2 = dset->t2 / dset->pd;
1963 } else {
1964 k1 = dset->t1;
1965 k2 = dset->t2;
1966 }
1967
1968 n = k2 - k1 + 1;
1969
1970 z = malloc(n * sizeof *z);
1971 if (z == NULL) {
1972 return E_ALLOC;
1973 }
1974
1975 /* generate uniform drawings from sample range */
1976 gretl_rand_int_minmax(z, n, k1, k2);
1977
1978 l0 = list[0];
1979 startlen = get_starting_length(list, dset, 3);
1980 list[0] = 0;
1981
1982 j = 1;
1983 for (i=1; i<=l0; i++) {
1984 vi = list[i];
1985 tnum = get_transform(RESAMPLE, vi, 0, 0.0, dset,
1986 startlen, origv, z);
1987 if (tnum > 0) {
1988 list[j++] = tnum;
1989 list[0] += 1;
1990 }
1991 }
1992
1993 free(z);
1994 destroy_mangled_names();
1995
1996 return (list[0] > 0)? 0 : E_DATA;
1997 }
1998
1999 /**
2000 * list_dropcoll:
2001 * @list: on entry, list of variables to process; on exit,
2002 * the original list minus collinear terms.
2003 * @dset: dataset struct.
2004 *
2005 * Drop collinear variables from @list.
2006 *
2007 * Returns: 0 on success, error code on error.
2008 */
2009
list_dropcoll(int * list,double eps,DATASET * dset)2010 int list_dropcoll (int *list, double eps, DATASET *dset)
2011 {
2012 gretl_matrix *R = NULL;
2013 gretl_matrix *X = NULL;
2014 int T, k, err = 0;
2015
2016 if (list == NULL) {
2017 return E_DATA;
2018 } else if (list[0] < 2) {
2019 /* no-op */
2020 return 0;
2021 }
2022
2023 if (eps < 0) {
2024 return E_DATA;
2025 } else if (na(eps)) {
2026 eps = R_DIAG_MIN;
2027 }
2028
2029 X = gretl_matrix_data_subset(list, dset, dset->t1, dset->t2,
2030 M_MISSING_SKIP, &err);
2031 if (err) {
2032 return err;
2033 }
2034
2035 T = X->rows;
2036 k = X->cols;
2037
2038 if (T < k) {
2039 gretl_errmsg_sprintf(_("A minimum of %d observations is required"), k);
2040 err = E_TOOFEW;
2041 } else {
2042 R = gretl_matrix_alloc(k, k);
2043 if (R == NULL) {
2044 err = E_ALLOC;
2045 } else {
2046 err = gretl_matrix_QR_decomp(X, R);
2047 }
2048 }
2049
2050 if (!err) {
2051 double rii;
2052 int i, j = 1;
2053
2054 for (i=0; i<k && !err; i++, j++) {
2055 rii = gretl_matrix_get(R, i, i);
2056 if (fabs(rii) < eps) {
2057 err = gretl_list_delete_at_pos(list, j--);
2058 }
2059 }
2060 }
2061
2062 gretl_matrix_free(X);
2063 gretl_matrix_free(R);
2064
2065 return err;
2066 }
2067
2068 #define DUMDEBUG 0
2069
real_list_dumgenr(int ** plist,DATASET * dset,double oddval,gretlopt opt)2070 static int real_list_dumgenr (int **plist, DATASET *dset,
2071 double oddval, gretlopt opt)
2072 {
2073 int origv = dset->v;
2074 int *list = *plist;
2075 int *tmplist = NULL;
2076 double *x = NULL;
2077 int i, j, t, n;
2078 int startlen;
2079 int err;
2080
2081 err = transform_preprocess_list(list, dset, DUMMIFY);
2082 if (err) {
2083 return err;
2084 }
2085
2086 tmplist = gretl_null_list();
2087 if (tmplist == NULL) {
2088 err = E_ALLOC;
2089 goto bailout;
2090 }
2091
2092 x = malloc(dset->n * sizeof *x);
2093 if (x == NULL) {
2094 err = E_ALLOC;
2095 goto bailout;
2096 }
2097
2098 startlen = get_starting_length(list, dset, 3);
2099
2100 for (i=1; i<=list[0] && !err; i++) {
2101 int vi = list[i];
2102 int nuniq, tnum;
2103 int jmin, jmax;
2104 double xt;
2105
2106 n = 0;
2107 for (t=dset->t1; t<=dset->t2; t++) {
2108 xt = dset->Z[vi][t];
2109 if (!na(xt)) {
2110 x[n++] = xt;
2111 }
2112 }
2113
2114 if (n == 0) {
2115 continue;
2116 }
2117
2118 qsort(x, n, sizeof *x, gretl_compare_doubles);
2119 nuniq = count_distinct_values(x, n);
2120
2121 if (nuniq == 1) {
2122 continue;
2123 }
2124
2125 rearrange_id_array(x, nuniq, n);
2126
2127 jmin = (opt & OPT_F)? 1 : 0;
2128 jmax = (opt & OPT_L)? nuniq - 1 : nuniq;
2129
2130 #if DUMDEBUG
2131 fprintf(stderr, "variable %d has %d distinct values\n", vi, nuniq);
2132 if (opt & OPT_F) fprintf(stderr, "skipping lowest value\n");
2133 if (opt & OPT_L) fprintf(stderr, "skipping highest value\n");
2134 #endif
2135
2136 for (j=jmin; j<jmax && !err; j++) {
2137 if (x[j] != oddval) {
2138 tnum = get_transform(DUMMIFY, vi, j+1, x[j], dset,
2139 startlen, origv, NULL);
2140 #if DUMDEBUG
2141 fprintf(stderr, "%s: for value %g tnum = %d\n",
2142 dset->varname[vi], x[j], tnum);
2143 #endif
2144 if (tnum > 0) {
2145 tmplist = gretl_list_append_term(&tmplist, tnum);
2146 if (tmplist == NULL) {
2147 err = E_ALLOC;
2148 }
2149 } else {
2150 err = E_DATA;
2151 }
2152 }
2153 }
2154 }
2155
2156 if (!err && !(opt & OPT_A) && tmplist[0] == 0) {
2157 /* don't fail here if doing auto-dummify (OPT_A) */
2158 gretl_errmsg_set(_("dummify: no suitable variables were found"));
2159 #if DUMDEBUG
2160 printlist(list, "list for which dummify failed");
2161 #endif
2162 err = E_DATA;
2163 }
2164
2165 free(x);
2166
2167 bailout:
2168
2169 if (!err && tmplist[0] > 0) {
2170 free(*plist);
2171 *plist = tmplist;
2172 #if DUMDEBUG
2173 printlist(tmplist, "output list");
2174 #endif
2175 } else {
2176 free(tmplist);
2177 }
2178
2179 destroy_mangled_names();
2180
2181 return err;
2182 }
2183
2184 /**
2185 * list_dumgenr:
2186 * @plist: pointer to list of variables to process; on exit
2187 * the list holds the ID numbers of the generated dummies.
2188 * @dset: dataset struct.
2189 * @opt: can include OPT_F to drop the first value, OPT_L to drop
2190 * the last value.
2191 *
2192 * For each of the variables given in the list to which @plist
2193 * points, generates and adds to the data set k dummy variables
2194 * coding for the k distinct values of the variable in question.
2195 * All these variables must have already been marked as discrete.
2196 * If the OPT_F or OPT_L option is given, either the first or
2197 * the last value of each variable is taken as the "base" and is
2198 * not given a dummy encoding (that is, only k - 1 dummies are
2199 * added for each variable).
2200 *
2201 * Returns: 0 on success, error code on error.
2202 */
2203
list_dumgenr(int ** plist,DATASET * dset,gretlopt opt)2204 int list_dumgenr (int **plist, DATASET *dset, gretlopt opt)
2205 {
2206 return real_list_dumgenr(plist, dset, NADBL, opt);
2207 }
2208
2209 /**
2210 * dumgenr_with_oddval:
2211 * @plist: pointer to list of variables to process; on exit
2212 * the list holds the ID numbers of the generated dummies.
2213 * @dset: dataset struct.
2214 * @oddval: value which should be skipped when encoding the
2215 * input values as dummies.
2216 *
2217 * For each of the variables given in the list to which @plist
2218 * points, generates and adds to the data set k dummy variables
2219 * coding for the k distinct values of the variable in question.
2220 * All these variables must have already been marked as discrete.
2221 * if @oddval is not %NADBL, it is treated as the omitted
2222 * category and only k - 1 dummies are added for each variable).
2223 *
2224 * Returns: 0 on success, error code on error.
2225 */
2226
dumgenr_with_oddval(int ** plist,DATASET * dset,double oddval)2227 int dumgenr_with_oddval (int **plist, DATASET *dset, double oddval)
2228 {
2229 return real_list_dumgenr(plist, dset, oddval, OPT_NONE);
2230 }
2231
2232 /**
2233 * auto_dummify_list:
2234 * @plist: pointer to list of variables to process.
2235 * @dset: dataset struct.
2236 *
2237 * Produces a list in which any coded series in the original
2238 * list are replaced by a set of dummy variables. If the
2239 * original list contains no such series it is not altered.
2240 * In generating dummy series, the omitted category is the
2241 * minimum value of the coding variable.
2242 *
2243 * Returns: 0 on success, error code on error.
2244 */
2245
auto_dummify_list(int ** plist,DATASET * dset)2246 int auto_dummify_list (int **plist, DATASET *dset)
2247 {
2248 int *orig = *plist;
2249 int *list = NULL;
2250 int *dlist = NULL;
2251 int i, vi;
2252 int err = 0;
2253
2254 /* empty starting point for full list */
2255 list = gretl_null_list();
2256
2257 for (i=1; i<=orig[0] && !err; i++) {
2258 vi = orig[i];
2259 if (series_is_coded(dset, vi)) {
2260 /* coded series: split into dummies */
2261 dlist = gretl_list_new(1);
2262 dlist[1] = vi;
2263 err = real_list_dumgenr(&dlist, dset, NADBL, OPT_F | OPT_A);
2264 if (!err) {
2265 gretl_list_append_list(&list, dlist, &err);
2266 }
2267 free(dlist);
2268 } else if (!in_gretl_list(list, vi)) {
2269 /* non-coded, not already present */
2270 list = gretl_list_append_term(&list, vi);
2271 if (list == NULL) {
2272 err = E_ALLOC;
2273 }
2274 }
2275 }
2276
2277 if (err) {
2278 free(list);
2279 } else if (list != *plist) {
2280 /* replace original list */
2281 free(*plist);
2282 *plist = list;
2283 }
2284
2285 return err;
2286 }
2287
2288 /**
2289 * list_makediscrete:
2290 * @list: list of variables to process.
2291 * @dset: data information struct.
2292 * @opt: if OPT_R, reverse the operation.
2293 *
2294 * Sets the variables given in @list as discrete, unless
2295 * opt is OPT_R, in which case the variables are set as
2296 * continuous.
2297 *
2298 * Returns: 0 on success, error code on error.
2299 */
2300
list_makediscrete(const int * list,DATASET * dset,gretlopt opt)2301 int list_makediscrete (const int *list, DATASET *dset, gretlopt opt)
2302 {
2303 int disc = !(opt & OPT_R);
2304 int i, v, err = 0;
2305
2306 for (i=1; i<=list[0]; i++) {
2307 v = list[i];
2308 if (v > 0) {
2309 series_set_discrete(dset, v, disc);
2310 }
2311 }
2312
2313 return err;
2314 }
2315