1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #include "libgretl.h"
21 #include "uservar.h"
22 #include "gretl_func.h"
23 #include "matrix_extra.h"
24 
25 #define TRDEBUG 0
26 
27 /**
28  * SECTION:transforms
29  * @short_description: standard transformations of series in the dataset
30  * @title: Transformations
31  * @include: libgretl.h
32  *
33  * Functions to generate standard transformations (logs, lags, first
34  * differences and so on) of series in the dataset.
35  */
36 
37 enum {
38     VARS_IDENTICAL,
39     NEW_HAS_MISSING,
40     OLD_HAS_MISSING,
41     VARS_DIFFER
42 } varcomp_codes;
43 
44 enum {
45     INVERSE = NC + 1,
46     RESAMPLE,
47     HFDIFF,
48     HFLDIFF
49 };
50 
51 static char *get_mangled_name_by_id (int v);
52 
53 /* Note: in this comparison @test is the newly generated series
54    and @cand is the previously existing candidate series.
55 */
56 
check_vals(const double * test,const double * cand,int n)57 static int check_vals (const double *test, const double *cand, int n)
58 {
59     int t, ret = VARS_IDENTICAL;
60 
61     for (t=0; t<n; t++) {
62 	if (test[t] == cand[t] || (na(test[t]) && na(cand[t]))) {
63 	    /* values the same, or both NA */
64 	    continue;
65 	}
66 	/* values differ at obs @t */
67 	if (na(test[t]) && !na(cand[t])) {
68 	    if (ret == VARS_IDENTICAL || ret == NEW_HAS_MISSING) {
69 		ret = NEW_HAS_MISSING;
70 	    } else {
71 		ret = VARS_DIFFER;
72 	    }
73 	} else if (!na(test[t]) && na(cand[t])) {
74 	    if (ret == VARS_IDENTICAL || ret == OLD_HAS_MISSING) {
75 		ret = OLD_HAS_MISSING;
76 	    } else {
77 		ret = VARS_DIFFER;
78 	    }
79 	} else {
80 	    ret = VARS_DIFFER;
81 	}
82 	if (ret == VARS_DIFFER) {
83 	    break;
84 	}
85     }
86 
87     return ret;
88 }
89 
90 static int
make_transform_varname(char * vname,const char * orig,int ci,int aux,int len)91 make_transform_varname (char *vname, const char *orig, int ci,
92 			int aux, int len)
93 {
94     *vname = '\0';
95 
96     if (ci == DIFF) {
97 	strcpy(vname, "d_");
98 	strncat(vname, orig, len - 2);
99     } else if (ci == LDIFF) {
100 	strcpy(vname, "ld_");
101 	strncat(vname, orig, len - 3);
102     } else if (ci == SDIFF) {
103 	strcpy(vname, "sd_");
104 	strncat(vname, orig, len - 3);
105     } else if (ci == ORTHDEV) {
106 	strcpy(vname, "o_");
107 	strncat(vname, orig, len - 2);
108     } else if (ci == LOGS) {
109 	strcpy(vname, "l_");
110 	strncat(vname, orig, len - 2);
111     } else if (ci == SQUARE) {
112 	strcpy(vname, "sq_");
113 	strncat(vname, orig, len - 3);
114     } else if (ci == LAGS) {
115 	char ext[12];
116 
117 	if (aux >= 0) {
118 	    /* an actual lag (or contemporaneous value) */
119 	    sprintf(ext, "_%d", aux);
120 	    strncat(vname, orig, len - strlen(ext));
121 	    strcat(vname, ext);
122 	} else {
123 	    /* a lead: in naming the output series we'll
124 	       try to avoid appending a plain digit to
125 	       a series name that already ends in a digit,
126 	       since this may get confusing
127 	    */
128 	    int n1, n2;
129 
130 	    sprintf(ext, "%d", -aux);
131 	    n1 = strlen(orig);
132 	    n2 = strlen(ext);
133 	    if (n1 + n2 + 2 < VNAMELEN) {
134 		if (isdigit(orig[n1-1])) {
135 		    /* separate the digits */
136 		    sprintf(vname, "%s_f%d", orig, -aux);
137 		} else {
138 		    sprintf(vname, "%s%d", orig, -aux);
139 		}
140 	    } else {
141 		/* Oh, well, can't be helped */
142 		strncat(vname, orig, len - n2);
143 		strcat(vname, ext);
144 	    }
145 	}
146     } else if (ci == DUMMIFY) {
147 	char ext[16];
148 
149 	if (aux < 0) {
150 	    sprintf(ext, "_m%d", abs(aux));
151 	} else {
152 	    sprintf(ext, "_%d", aux);
153 	}
154 	strcpy(vname, "D");
155 	strncat(vname, orig, len - strlen(ext) - 1);
156 	strcat(vname, ext);
157     } else if (ci == INVERSE) {
158 	strcpy(vname, "i_");
159 	strncat(vname, orig, len - 2);
160     } else if (ci == STDIZE) {
161 	if (aux < 0) {
162 	    strcpy(vname, "c_");
163 	} else {
164 	    strcpy(vname, "s_");
165 	}
166 	strncat(vname, orig, len - 2);
167     } else if (ci == RESAMPLE) {
168 	strcpy(vname, "rs_");
169 	strncat(vname, orig, len - 3);
170     }
171 
172 #if TRDEBUG
173     fprintf(stderr, "make_transform_varname:\n"
174 	    "orig='%s', ci=%d, len=%d, vname='%s'\n",
175 	    orig, ci, len, vname);
176 #endif
177 
178     return 0;
179 }
180 
181 static int
make_transform_label(char * label,const char * parent,int ci,int aux)182 make_transform_label (char *label, const char *parent,
183 		      int ci, int aux)
184 {
185     gchar *s = NULL;
186     int err = 0;
187 
188     if (ci == DIFF) {
189 	s = g_strdup_printf(_("= first difference of %s"), parent);
190     } else if (ci == LDIFF) {
191 	s = g_strdup_printf(_("= log difference of %s"), parent);
192     } else if (ci == SDIFF) {
193 	s = g_strdup_printf(_("= seasonal difference of %s"), parent);
194     } else if (ci == LOGS) {
195 	s = g_strdup_printf(_("= log of %s"), parent);
196     } else if (ci == SQUARE) {
197 	s = g_strdup_printf(_("= %s squared"), parent);
198     } else if (ci == LAGS) {
199 	/* @aux = lag */
200 	if (aux >= 0) {
201 	    s = g_strdup_printf("= %s(t - %d)", parent, aux);
202 	} else {
203 	    s = g_strdup_printf("= %s(t + %d)", parent, -aux);
204 	}
205     } else if (ci == INVERSE) {
206 	s = g_strdup_printf("= 1/%s", parent);
207     } else if (ci == STDIZE) {
208 	/* @aux = dfcorr */
209 	if (aux < 0) {
210 	    s = g_strdup_printf(_("= centered %s"), parent);
211 	} else {
212 	    s = g_strdup_printf(_("= standardized %s"), parent);
213 	}
214     } else if (ci == RESAMPLE) {
215 	s = g_strdup_printf(_("= resampled %s"), parent);
216     } else if (ci == HFDIFF) {
217 	s = g_strdup_printf(_("= high-frequency difference of %s"), parent);
218     } else if (ci == HFLDIFF) {
219 	s = g_strdup_printf(_("= high-frequency log difference of %s"), parent);
220     } else {
221 	err = 1;
222     }
223 
224     if (!err) {
225 	strcpy(label, gretl_utf8_truncate(s, MAXLABEL-1));
226 	g_free(s);
227     }
228 
229     return err;
230 }
231 
232 /**
233  * standard_lag_of:
234  * @v: ID number of series to test.
235  * @parent: ID of potential parent series.
236  * @dset: dataset information.
237  *
238  * Returns: the lag order of series @v, if it is marked as
239  * a lag of @parent, otherwise 0.
240  */
241 
standard_lag_of(int v,int parent,const DATASET * dset)242 int standard_lag_of (int v, int parent, const DATASET *dset)
243 {
244     int pv = 0, ret = 0;
245 
246     if (dset == NULL || v <= 0 || v >= dset->v) {
247 	return 0;
248     }
249 
250     if (series_get_transform(dset, v) == LAGS) {
251 	pv = series_get_parent_id(dset, v);
252 	if (pv == parent) {
253 	    ret = series_get_lag(dset, v);
254 	}
255     }
256 
257     return ret;
258 }
259 
260 /**
261  * is_standard_diff:
262  * @v: ID number of variable to test.
263  * @dset: dataset information.
264  * @parent: location to receive ID number of parent variable,
265  * or NULL.
266  *
267  * Returns: 1 if the variable @v is marked as being the first
268  * difference of some "parent" variable in the dataset,
269  * otherwise 0.
270  */
271 
is_standard_diff(int v,const DATASET * dset,int * parent)272 int is_standard_diff (int v, const DATASET *dset, int *parent)
273 {
274     int pv = 0, ret = 0;
275 
276     if (v <= 0 || v >= dset->v) {
277 	return 0;
278     }
279 
280     if (series_get_transform(dset, v) == DIFF) {
281 	pv = series_get_parent_id(dset, v);
282 	if (pv > 0) {
283 	    if (parent != NULL) {
284 		*parent = pv;
285 	    }
286 	    ret = 1;
287 	}
288     }
289 
290     return ret;
291 }
292 
make_xp_varname(char * vname,int v1,int v2,const DATASET * dset,int len)293 static void make_xp_varname (char *vname, int v1, int v2,
294 			     const DATASET *dset,
295 			     int len)
296 {
297     int n1 = strlen(dset->varname[v1]);
298     int n2 = strlen(dset->varname[v2]);
299     int cut = n1 + n2 + 1 - len;
300     int cut1 = 0, cut2 = 0;
301 
302     if (cut > 0) {
303 	cut1 = cut2 = cut/2;
304 	if (cut % 2) {
305 	    cut2++;
306 	}
307     }
308 
309     *vname = '\0';
310     strncat(vname, dset->varname[v1], n1 - cut1);
311     strcat(vname, "_");
312     strncat(vname, dset->varname[v2], n2 - cut2);
313 }
314 
315 /* Array into which to write a generated variable, prior to testing
316    whether or not the same var already exists.
317 */
318 
testvec(int n)319 static double *testvec (int n)
320 {
321     static double *x;
322     static int nx;
323     int t;
324 
325     if (n == 0) {
326 	/* clean up */
327 	free(x);
328 	x = NULL;
329 	nx = 0;
330 	return NULL;
331     }
332 
333     if (n > nx) {
334 	double *y = realloc(x, n * sizeof *x);
335 
336 	if (y == NULL) {
337 	    free(x);
338 	    x = NULL;
339 	    nx = 0;
340 	    return NULL;
341 	}
342 
343 	nx = n;
344 	x = y;
345     }
346 
347     for (t=0; t<n; t++) {
348 	x[t] = NADBL;
349     }
350 
351     return x;
352 }
353 
354 /**
355  * gretl_transforms_cleanup:
356  *
357  * Called by libgretl_cleanup(). Frees any memory allocated
358  * as workspace for the creation of transformed variables.
359  */
360 
gretl_transforms_cleanup(void)361 void gretl_transforms_cleanup (void)
362 {
363     testvec(0);
364 }
365 
366 /* write lagged values of variable v into xlag */
367 
get_lag(int v,int lag,double * xlag,const DATASET * dset)368 static int get_lag (int v, int lag, double *xlag,
369 		    const DATASET *dset)
370 {
371     const double *x = dset->Z[v];
372     int t1 = (lag > 0)? lag : 0;
373     int t2 = dset->n - 1;
374     int t, s;
375 
376     for (t=0; t<dset->n; t++) {
377 	xlag[t] = NADBL;
378     }
379 
380     for (t=t1; t<=t2; t++) {
381 	s = t - lag;
382 	if (dataset_is_panel(dset) &&
383 	    t / dset->pd != s / dset->pd) {
384 	    continue;
385 	}
386 	if (s >= 0 && s < dset->n) {
387 	    xlag[t] = x[s];
388 	}
389     }
390 
391     return 0;
392 }
393 
394 /* write log of variable v into logvec */
395 
get_log(int v,double * logvec,const DATASET * dset)396 static int get_log (int v, double *logvec, const DATASET *dset)
397 {
398     double xx;
399     int t, err = 0;
400 
401     for (t=dset->t1; t<=dset->t2 && !err; t++) {
402 	xx = dset->Z[v][t];
403 	if (na(xx) || xx <= 0.0) {
404 	    logvec[t] = NADBL;
405 	    set_gretl_warning(W_GENMISS);
406 	} else {
407 	    logvec[t] = log(xx);
408 	}
409     }
410 
411     return err;
412 }
413 
414 /* write some sort of difference of variable v into diffvec */
415 
get_diff(int v,double * diffvec,int ci,const DATASET * dset)416 static int get_diff (int v, double *diffvec, int ci,
417 		     const DATASET *dset)
418 {
419     double x0, x1;
420     int t, t0, t1;
421 
422     t0 = (ci == SDIFF)? dset->pd : 1;
423     t1 = (dset->t1 > t0)? dset->t1 : t0;
424 
425     for (t=t1; t<=dset->t2; t++) {
426 	if (dset->structure == STACKED_TIME_SERIES &&
427 	    panel_unit_first_obs(t, dset)) {
428 	    continue;
429 	}
430 
431 	x0 = dset->Z[v][t];
432 	x1 = dset->Z[v][t - t0];
433 
434 	if (ci == LDIFF) {
435 	    if (!na(x0) && !na(x1) && x0 > 0 && x1 > 0) {
436 		diffvec[t] = log(x0) - log(x1);
437 	    }
438 	} else {
439 	    if (!na(x0) && !na(x1)) {
440 		diffvec[t] = x0 - x1;
441 	    }
442 	}
443     }
444 
445     return 0;
446 }
447 
448 /* write a matrix containing high-frequency differences of
449    the series in @list over the current sample range
450 */
451 
get_hf_diffs(const int * list,int ci,double mult,const DATASET * dset,int * err)452 static gretl_matrix *get_hf_diffs (const int *list,
453 				   int ci, double mult,
454 				   const DATASET *dset,
455 				   int *err)
456 {
457     gretl_matrix *dX = NULL;
458     double **Z = dset->Z;
459     double x0, x1, dti;
460     int n = list[0];
461     int v1 = list[1];
462     int i, vi, s, t, T;
463 
464     T = dset->t2 - dset->t1 + 1;
465     dX = gretl_zero_matrix_new(T, n);
466 
467     if (dX == NULL) {
468 	*err = E_ALLOC;
469 	return NULL;
470     }
471 
472     for (t=T-1; t>=0; t--) {
473 	s = t + dset->t1;
474 	for (i=0; i<n; i++) {
475 	    vi = list[i+1];
476 	    x0 = Z[vi][s];
477 	    if (i < n-1) {
478 		x1 = Z[list[i+2]][s];
479 	    } else if (s > 0) {
480 		x1 = Z[v1][s-1];
481 	    } else {
482 		x1 = NADBL;
483 	    }
484 	    dti = NADBL;
485 	    if (!na(x0) && !na(x1)) {
486 		if (ci == DIFF) {
487 		    dti = mult * (x0 - x1);
488 		} else if (x0 > 0 && x1 > 0) {
489 		    dti = mult * log(x0/x1);
490 		}
491 	    }
492 	    gretl_matrix_set(dX, t, i, dti);
493 	}
494     }
495 
496     return dX;
497 }
498 
499 /* orthogonal deviations */
500 
get_orthdev(int v,double * xvec,const DATASET * dset)501 static int get_orthdev (int v, double *xvec, const DATASET *dset)
502 {
503     return orthdev_series(dset->Z[v], xvec, dset);
504 }
505 
506 /* write square or cross-product into xvec */
507 
get_xpx(int vi,int vj,double * xvec,const DATASET * dset)508 static int get_xpx (int vi, int vj, double *xvec, const DATASET *dset)
509 {
510     double xit, xjt;
511     int t;
512 
513     for (t=dset->t1; t<=dset->t2; t++) {
514 	xit = dset->Z[vi][t];
515 	xjt = dset->Z[vj][t];
516 	if (na(xit) || na(xjt)) {
517 	    xvec[t] = NADBL;
518 	} else {
519 	    xvec[t] = xit * xjt;
520 	}
521     }
522 
523     return 0;
524 }
525 
526 /* write reciprocal into xvec */
527 
get_inverse(int v,double * xvec,const DATASET * dset)528 static int get_inverse (int v, double *xvec, const DATASET *dset)
529 {
530     double xt;
531     int t;
532 
533     for (t=dset->t1; t<=dset->t2; t++) {
534 	xt = dset->Z[v][t];
535 	if (na(xt) || xt == 0.0) {
536 	    xvec[t] = NADBL;
537 	} else {
538 	    xvec[t] = 1.0 / xt;
539 	}
540     }
541 
542     return 0;
543 }
544 
545 /* write standardized series v into targ */
546 
get_std(int v,int dfc,double * targ,const DATASET * dset)547 static int get_std (int v, int dfc, double *targ, const DATASET *dset)
548 {
549     double *x = dset->Z[v];
550 
551     return standardize_series(x, targ, dfc, dset);
552 }
553 
554 /* write resampled series into rsvec */
555 
get_resampled(int v,double * rsvec,const DATASET * dset,const int * z)556 static int get_resampled (int v, double *rsvec,
557 			  const DATASET *dset,
558 			  const int *z)
559 {
560     const double *x = dset->Z[v];
561     int t, j, s;
562 
563     if (dataset_is_panel(dset)) {
564 	int n = panel_sample_size(dset);
565 	int i, T = dset->pd;
566 
567 	s = dset->t1;
568 	for (i=0; i<n; i++) {
569 	    j = z[i] * T;
570 	    for (t=0; t<T; t++) {
571 		rsvec[s++] = x[j + t];
572 	    }
573 	}
574     } else {
575 	s = 0;
576 	for (t=dset->t1; t<=dset->t2; t++) {
577 	    j = z[s++];
578 	    rsvec[t] = x[j];
579 	}
580     }
581 
582     return 0;
583 }
584 
585 /* write dummy for (v == value) into xvec */
586 
get_discdum(int v,double val,double * xvec,const DATASET * dset)587 static int get_discdum (int v, double val, double *xvec,
588 			const DATASET *dset)
589 {
590     double xt;
591     int t;
592 
593     for (t=dset->t1; t<=dset->t2; t++) {
594 	xt = dset->Z[v][t];
595 	if (na(xt)) {
596 	    xvec[t] = NADBL;
597 	} else {
598 	    xvec[t] = (xt == val)? 1.0 : 0.0;
599 	}
600     }
601 
602     return 0;
603 }
604 
605 enum transform_results {
606     VAR_ADDED_OK,
607     VAR_EXISTS_OK,
608     VAR_ADD_FAILED,
609     VARNAME_DUPLICATE
610 };
611 
612 /* Note: new behavior as of 2008-02-11 */
613 
614 #define TR_OVERWRITE 1
615 
transform_handle_duplicate(int ci,int lag,int v,const double * x,const char * label,DATASET * dset,int parent,int origv)616 static int transform_handle_duplicate (int ci, int lag, int v,
617 				       const double *x,
618 				       const char *label,
619 				       DATASET *dset,
620 				       int parent,
621 				       int origv)
622 {
623     const char *vlabel = series_get_label(dset, v);
624     int ret = VARNAME_DUPLICATE;
625     int no_overwrite = 0;
626     int t, ok = 0;
627 
628     if (vlabel != NULL && *label && *vlabel) {
629 	if (!strcmp(label, vlabel)) {
630 	    /* labels identical, so OK? */
631 	    ok = 1;
632 	} else {
633 	    /* labels differ */
634 	    no_overwrite = 1;
635 	}
636     }
637 
638     if (!no_overwrite) {
639 	int vp = series_get_parent_id(dset, v);
640 
641 	no_overwrite = parent != vp;
642     }
643 
644 #if TR_OVERWRITE
645     if (!ok && v < origv && !no_overwrite) {
646 	ok = 1;
647     }
648 #endif
649 
650     if (ok) {
651 #if TRDEBUG
652 	fprintf(stderr, "transform_handle_duplicate: updating var %d (%s)\n",
653 		v, dset->varname[v]);
654 #endif
655 	for (t=0; t<dset->n; t++) {
656 	    dset->Z[v][t] = x[t];
657 	}
658 	if (*label != '\0') {
659 	    series_set_label(dset, v, label);
660 	}
661 	series_set_transform(dset, v, ci);
662 	series_set_lag(dset, v, lag);
663 	series_zero_flags(dset, v);
664 	ret = VAR_EXISTS_OK;
665     }
666 
667 #if TRDEBUG
668     if (!ok) {
669 	fprintf(stderr, "transform_handle_duplicate: collision with var %d (%s)\n",
670 		v, dset->varname[v]);
671     }
672 #endif
673 
674     return ret;
675 }
676 
677 static int
check_add_transform(int ci,int lag,int vnum,const double * x,const char * vname,const char * label,DATASET * dset,int parent,int origv)678 check_add_transform (int ci, int lag, int vnum, const double *x,
679 		     const char *vname, const char *label,
680 		     DATASET *dset, int parent, int origv)
681 {
682     int t, ret = VAR_ADDED_OK;
683 
684     if (vnum < dset->v) {
685 	/* a variable of this name already exists */
686 	int chk = check_vals(x, dset->Z[vnum], dset->n);
687 
688 	/* heuristic: we'll assume that if the variables have the same
689 	   name, and differ only in respect of one being more complete
690 	   than the other (having valid values where the other has
691 	   missing values), then we should not create a new variable,
692 	   but rather use the more complete series.
693 
694 	   Watch out for cases where this is not the desired behavior!
695 	*/
696 
697 	if (chk == VARS_IDENTICAL) {
698 	    ret = VAR_EXISTS_OK;
699 	} else if (chk == NEW_HAS_MISSING) {
700 	    /* is this right? */
701 	    ret = VAR_EXISTS_OK;
702 	} else if (chk == OLD_HAS_MISSING) {
703 	    for (t=0; t<dset->n; t++) {
704 		dset->Z[vnum][t] = x[t];
705 	    }
706 	    ret = VAR_EXISTS_OK;
707 	} else {
708 	    ret = transform_handle_duplicate(ci, lag, vnum, x, label, dset,
709 					     parent, origv);
710 	}
711     } else {
712 	/* no var of this name, working from scratch */
713 	if (dataset_add_series(dset, 1)) {
714 	    ret = VAR_ADD_FAILED;
715 	} else {
716 	    strcpy(dset->varname[vnum], vname);
717 	    series_set_label(dset, vnum, label);
718 	    for (t=0; t<dset->n; t++) {
719 		dset->Z[vnum][t] = x[t];
720 	    }
721 	}
722     }
723 
724     return ret;
725 }
726 
727 #define RESPECT_LAG_LEVEL 1
728 
get_lag_ID(int srcv,int lag,const DATASET * dset)729 static int get_lag_ID (int srcv, int lag, const DATASET *dset)
730 {
731 #if RESPECT_LAG_LEVEL
732     int fd = gretl_function_depth();
733 #endif
734     const char *parent, *vname = dset->varname[srcv];
735     int i;
736 
737     for (i=1; i<dset->v; i++) {
738 #if RESPECT_LAG_LEVEL
739 	if (series_get_stack_level(dset, i) != fd) {
740 	    continue;
741 	}
742 #endif
743 	if (lag == series_get_lag(dset, i)) {
744 	    parent = series_get_parent_name(dset, i);
745 	    if (parent != NULL && !strcmp(vname, parent)) {
746 		return i;
747 	    }
748 	}
749     }
750 
751     return -1;
752 }
753 
754 /* get_transform: create specified transformation of variable v if
755    this variable does not already exist.
756 
757    The dummies resulting from DUMMIFY are automatically marked as
758    discrete.
759 
760    origv is the number of variables in the dataset prior to the
761    current round of adding.
762 
763    Return the ID number of the transformed var, or -1 on error.
764 */
765 
get_transform(int ci,int v,int aux,double x,DATASET * dset,int startlen,int origv,int * idxvec)766 static int get_transform (int ci, int v, int aux, double x,
767 			  DATASET *dset, int startlen, int origv,
768 			  int *idxvec)
769 {
770     char vname[VNAMELEN] = {0};
771     char label[MAXLABEL] = {0};
772     int vno = -1, err = 0;
773     int len, lag = 0;
774     const char *srcname;
775     double *vx;
776 
777     vx = testvec(dset->n);
778     if (vx == NULL) {
779 	return -1;
780     }
781 
782     if (ci == LAGS) {
783 	lag = aux;
784 	err = get_lag(v, lag, vx, dset);
785     } else if (ci == LOGS) {
786 	err = get_log(v, vx, dset);
787     } else if (ci == DIFF || ci == LDIFF || ci == SDIFF) {
788 	err = get_diff(v, vx, ci, dset);
789     } else if (ci == ORTHDEV) {
790 	err = get_orthdev(v, vx, dset);
791     } else if (ci == SQUARE) {
792 	/* "aux" = second variable number */
793 	err = get_xpx(v, aux, vx, dset);
794     } else if (ci == DUMMIFY) {
795 	/* "x" = value for dummy */
796 	err = get_discdum(v, x, vx, dset);
797     } else if (ci == INVERSE) {
798 	err = get_inverse(v, vx, dset);
799     } else if (ci == STDIZE) {
800 	err = get_std(v, aux, vx, dset);
801     } else if (ci == RESAMPLE) {
802 	err = get_resampled(v, vx, dset, idxvec);
803     }
804 
805     if (err) {
806 	return -1;
807     }
808 
809     if (ci == LAGS && (vno = get_lag_ID(v, aux, dset)) > 0) {
810 	/* special case: pre-existing lag */
811 	err = check_add_transform(ci, lag, vno, vx, vname, label, dset, v, origv);
812 	if (err != VAR_EXISTS_OK) {
813 	    vno = -1;
814 	}
815 	return vno;
816     }
817 
818     if (ci == DUMMIFY || (ci == SQUARE && v != aux)) {
819 	/* special cases in respect of labeling */
820 	gchar *s = NULL;
821 
822 	if (ci == SQUARE && v != aux) {
823 	    s = g_strdup_printf(_("= %s times %s"), dset->varname[v],
824 				dset->varname[aux]);
825 	} else if (is_string_valued(dset, v)) {
826 	    const char *sval = series_get_string_for_value(dset, v, x);
827 
828 	    if (sval != NULL && *sval != '\0') {
829 		s = g_strdup_printf(_("dummy for %s = '%s'"), dset->varname[v], sval);
830 	    } else {
831 		s = g_strdup_printf(_("dummy for %s = %g"), dset->varname[v], x);
832 	    }
833 	} else {
834 	    s = g_strdup_printf(_("dummy for %s = %g"), dset->varname[v], x);
835 	}
836 	strcpy(label, gretl_utf8_truncate(s, MAXLABEL-1));
837 	g_free(s);
838     } else {
839 	make_transform_label(label, dset->varname[v], ci, aux);
840     }
841 
842     srcname = get_mangled_name_by_id(v);
843     if (srcname == NULL) {
844 	srcname = dset->varname[v];
845     }
846 
847 #if TRDEBUG
848     fprintf(stderr, "srcname = '%s', startlen = %d, v = %d\n",
849 	    srcname, startlen, v);
850 #endif
851 
852     for (len=startlen; len<=VNAMELEN; len++) {
853 	if (*vname != '\0' && len == VNAMELEN) {
854 	    /* last resort: hack the name */
855 	    make_varname_unique(vname, 0, dset);
856 	} else if (ci == SQUARE && v != aux) {
857 	    make_xp_varname(vname, v, aux, dset, len);
858 	} else if (ci == DUMMIFY && series_is_integer_valued(dset, v)) {
859 	    make_transform_varname(vname, srcname, ci, (int) x, len);
860 	} else {
861 	    make_transform_varname(vname, srcname, ci, aux, len);
862 	}
863 	vno = series_index(dset, vname);
864 	err = check_add_transform(ci, lag, vno, vx, vname, label, dset, v, origv);
865 	if (err != VAR_EXISTS_OK && err != VAR_ADDED_OK) {
866 	    vno = -1;
867 	}
868 	if (err != VARNAME_DUPLICATE) {
869 	    /* either we're OK, or there was a fatal error */
870 	    break;
871 	}
872     }
873 
874     if (!err && vno > 0) {
875 	if (ci == DUMMIFY) {
876 	    series_set_parent(dset, vno, dset->varname[v]);
877 	    series_set_transform(dset, vno, ci);
878 	    series_set_discrete(dset, vno, 1);
879 	} else if (ci == LAGS || ci == DIFF || ci == LOGS) {
880 	    series_set_parent(dset, vno, dset->varname[v]);
881 	    series_set_transform(dset, vno, ci);
882 	}
883 
884 	if (ci == LAGS) {
885 	    series_set_lag(dset, vno, aux);
886 	} else {
887 	    series_set_lag(dset, vno, 0);
888 	}
889     }
890 
891     return vno;
892 }
893 
894 /**
895  * laggenr:
896  * @v: ID number in dataset of source variable.
897  * @lag: the order of the lag to create.
898  * @dset: dataset struct.
899  *
900  * Creates the specified lag of variable @v if this variable does
901  * not already exist.
902  *
903  * Returns: the ID number of the lagged variable, or -1 on error.
904  */
905 
laggenr(int v,int lag,DATASET * dset)906 int laggenr (int v, int lag, DATASET *dset)
907 {
908     int lno;
909 
910     if (lag > dset->n || -lag > dset->n) {
911 	gretl_errmsg_sprintf(_("Invalid lag order %d"), lag);
912 	lno = -1;
913     } else if (v == 0) {
914 	gretl_errmsg_set(_("The constant cannot be lagged"));
915 	lno = -1;
916     } else if (lag == 0) {
917 	lno = v;
918     } else {
919 	lno = get_transform(LAGS, v, lag, 0.0, dset,
920 			    VNAMELEN - 3, dset->v, NULL);
921     }
922 
923     return lno;
924 }
925 
926 /**
927  * laggenr_from_to:
928  * @v: ID number in dataset of source variable.
929  * @fromlag: start of lag/lead range.
930  * @tolag: end of lag/lead range.
931  * @dset: dataset struct.
932  * @err: location to receive error code.
933  *
934  * Creates the specified lags of variable @v if they do not
935  * already exist.
936  *
937  * Returns: list of lag variables, or NULL or on error.
938  */
939 
laggenr_from_to(int v,int fromlag,int tolag,DATASET * dset,int * err)940 int *laggenr_from_to (int v, int fromlag, int tolag,
941 		      DATASET *dset, int *err)
942 {
943     int *llist;
944     int i, lv, nlags = -1;
945     int p, count_down = 0;
946 
947     if (fromlag > tolag) {
948 	nlags = fromlag - tolag + 1;
949 	count_down = 1;
950     } else {
951 	nlags = tolag - fromlag + 1;
952     }
953 
954     if (nlags <= 0) {
955 	*err = E_DATA;
956 	return NULL;
957     }
958 
959     llist = gretl_list_new(nlags);
960     if (llist == NULL) {
961 	*err = E_ALLOC;
962 	return NULL;
963     }
964 
965     p = fromlag;
966 
967     for (i=0; i<nlags; i++) {
968 	lv = laggenr(v, -p, dset);
969 	if (lv < 0) {
970 	    *err = E_DATA;
971 	    free(llist);
972 	    llist = NULL;
973 	    break;
974 	}
975 	llist[i+1] = lv;
976 	if (count_down) {
977 	    p--;
978 	} else {
979 	    p++;
980 	}
981     }
982 
983     return llist;
984 }
985 
986 /**
987  * loggenr:
988  * @v: ID number in dataset of source variable.
989  * @dset: dataset struct.
990  *
991  * Creates the natural log of variable @v if this variable does
992  * not already exist.
993  *
994  * Returns: the ID number of the log variable, or -1 on error.
995  */
996 
loggenr(int v,DATASET * dset)997 int loggenr (int v, DATASET *dset)
998 {
999     return get_transform(LOGS, v, 0, 0.0, dset,
1000 			 VNAMELEN - 3, dset->v, NULL);
1001 }
1002 
1003 /**
1004  * invgenr:
1005  * @v: ID number in dataset of source variable.
1006  * @dset: dataset struct.
1007  *
1008  * Creates the reciprocal of variable @v if this variable does
1009  * not already exist.
1010  *
1011  * Returns: the ID number of the reciprocal, or -1 on error.
1012  */
1013 
invgenr(int v,DATASET * dset)1014 int invgenr (int v, DATASET *dset)
1015 {
1016     return get_transform(INVERSE, v, 0, 0.0, dset,
1017 			 VNAMELEN - 3, dset->v, NULL);
1018 }
1019 
1020 /**
1021  * diffgenr:
1022  * @v: ID number in dataset of source variable.
1023  * @ci: DIFF (first difference), LDIFF (log difference) or SDIFF
1024  * (seasonal difference).
1025  * @dset: dataset struct.
1026  *
1027  * Creates the first difference (or log- or seasonal difference,
1028  * depending on the value of @ci) of variable @v, if the
1029  * differenced variable does not already exist.
1030  *
1031  * Returns: the ID number of the differenced variable, or -1 on error.
1032  */
1033 
diffgenr(int v,int ci,DATASET * dset)1034 int diffgenr (int v, int ci, DATASET *dset)
1035 {
1036     if (ci != DIFF && ci != LDIFF && ci != SDIFF) {
1037 	return -1;
1038     }
1039 
1040     if (ci == SDIFF && !dataset_is_seasonal(dset)) {
1041 	return -1;
1042     }
1043 
1044     return get_transform(ci, v, 0, 0.0, dset,
1045 			 VNAMELEN - 3, dset->v, NULL);
1046 }
1047 
1048 /**
1049  * xpxgenr:
1050  * @vi: ID number in dataset of first source variable.
1051  * @vj: ID number in dataset of second source variable.
1052  * @dset: dataset struct.
1053  *
1054  * Creates the cross product of variables @vi and @vj if this
1055  * variable does not already exist.
1056  *
1057  * Returns: the ID number of the cross-product variable,
1058  * or -1 on error.
1059  */
1060 
xpxgenr(int vi,int vj,DATASET * dset)1061 int xpxgenr (int vi, int vj, DATASET *dset)
1062 {
1063     if (vi == vj) {
1064 	if (gretl_isdummy(dset->t1, dset->t2, dset->Z[vi])) {
1065 	    return -1;
1066 	}
1067     }
1068 
1069     return get_transform(SQUARE, vi, vj, 0.0, dset,
1070 			 VNAMELEN - 3, dset->v, NULL);
1071 }
1072 
1073 static int
get_starting_length(const int * list,DATASET * dset,int trim)1074 get_starting_length (const int *list, DATASET *dset, int trim)
1075 {
1076     int width = VNAMELEN - 3 - trim;
1077     const char *vni, *vnj;
1078     int len, maxlen = 0;
1079     int conflict = 0;
1080     int i, j;
1081 
1082     if (list[0] == 1) {
1083 	return VNAMELEN - 1;
1084     }
1085 
1086     for (i=1; i<=list[0]; i++) {
1087 	len = strlen(dset->varname[list[i]]);
1088 	if (len > maxlen) {
1089 	    maxlen = len;
1090 	}
1091     }
1092 
1093     if (maxlen <= width) {
1094 	/* no problem: generated names will fit in VNAMELEN - 3 chars */
1095 	return VNAMELEN - 3;
1096     }
1097 
1098     for (len=width; len<=maxlen; len++) {
1099 	conflict = 0;
1100 	for (i=1; i<=list[0] && !conflict; i++) {
1101 	    vni = dset->varname[list[i]];
1102 	    for (j=i+1; j<=list[0] && !conflict; j++) {
1103 		vnj = dset->varname[list[j]];
1104 		if (!strncmp(vni, vnj, len)) {
1105 		    conflict = 1;
1106 		}
1107 	    }
1108 	}
1109 	if (!conflict) {
1110 	    break;
1111 	}
1112     }
1113 
1114     len += trim;
1115 
1116     if (len < VNAMELEN - 3) {
1117 	len = VNAMELEN - 3;
1118     } else if (len > VNAMELEN - 1) {
1119 	len = VNAMELEN - 1;
1120     }
1121 
1122     return len;
1123 }
1124 
1125 struct mangled_name {
1126     int v;
1127     char *s;
1128 };
1129 
1130 static struct mangled_name *mnames;
1131 static int n_mnames;
1132 
destroy_mangled_names(void)1133 static void destroy_mangled_names (void)
1134 {
1135     int i;
1136 
1137     for (i=0; i<n_mnames; i++) {
1138 	free(mnames[i].s);
1139     }
1140     free(mnames);
1141     mnames = NULL;
1142     n_mnames = 0;
1143 }
1144 
push_mangled_name(int v,const char * s)1145 static int push_mangled_name (int v, const char *s)
1146 {
1147     static struct mangled_name *tmp;
1148 
1149     tmp = realloc(mnames, (n_mnames + 1) * sizeof *tmp);
1150 
1151     if (tmp == NULL) {
1152 	destroy_mangled_names();
1153 	return E_ALLOC;
1154     }
1155 
1156     mnames = tmp;
1157     mnames[n_mnames].v = v;
1158     mnames[n_mnames].s = gretl_strdup(s);
1159 
1160     n_mnames++;
1161 
1162     return 0;
1163 }
1164 
get_mangled_name_by_id(int v)1165 static char *get_mangled_name_by_id (int v)
1166 {
1167     int i;
1168 
1169     for (i=0; i<n_mnames; i++) {
1170 	if (mnames[i].v == v) {
1171 	    return mnames[i].s;
1172 	}
1173     }
1174 
1175     return NULL;
1176 }
1177 
make_mangled_name(int v,const char * s,int nc)1178 static int make_mangled_name (int v, const char *s, int nc)
1179 {
1180     char tmp[16], sfx[16];
1181     int n, err;
1182 
1183     if (get_mangled_name_by_id(v)) {
1184 	return 0;
1185     }
1186 
1187     *tmp = *sfx = '\0';
1188     /* we know the bit out here must be distinct? */
1189     strcat(sfx, s + nc);
1190     n = strlen(sfx);
1191     strncat(tmp, s, nc - n);
1192     strcat(tmp, sfx);
1193 
1194 #if TRDEBUG
1195     fprintf(stderr, "mangled name: '%s' -> '%s'\n", s, tmp);
1196 #endif
1197 
1198     err = push_mangled_name(v, tmp);
1199 
1200     return err;
1201 }
1202 
1203 static int
transform_preprocess_list(int * list,const DATASET * dset,int f)1204 transform_preprocess_list (int *list, const DATASET *dset, int f)
1205 {
1206     int maxc = VNAMELEN - 3;
1207     int longnames = 0;
1208     char **S = NULL;
1209     int i, v, ok;
1210     int err = 0;
1211 
1212     if (f == SQUARE || f == LDIFF || f == SDIFF || f == RESAMPLE) {
1213 	/* 3-character prefixes */
1214 	maxc--;
1215     } else if (f == DUMMIFY) {
1216 	/* "D" plus suffix */
1217 	maxc--;
1218     }
1219 
1220     for (i=1; i<=list[0]; i++) {
1221 	v = list[i];
1222 	ok = 1;
1223 
1224 	if (f == LOGS || f == SQUARE) {
1225 	    if (v == 0) { /* FIXME?? */
1226 		ok = 1;
1227 	    }
1228 	    if (gretl_isdummy(dset->t1, dset->t2, dset->Z[v])) {
1229 		ok = 0;
1230 	    }
1231 	} else if (f == LAGS) {
1232 	    if (v == 0) {
1233 		gretl_errmsg_set(_("The constant cannot be lagged"));
1234 		ok = 0;
1235 	    }
1236 	} else if (f == DUMMIFY) {
1237 	    ok = v > 0 && accept_as_discrete(dset, v, 0);
1238 	}
1239 
1240 	if (!ok) {
1241 	    gretl_list_delete_at_pos(list, i--);
1242 	} else if (strlen(dset->varname[v]) > maxc) {
1243 	    strings_array_add(&S, &longnames, dset->varname[v]);
1244 	}
1245     }
1246 
1247     if (list[0] == 0) {
1248 	err = E_TYPES;
1249     }
1250 
1251     if (longnames > 0) {
1252 	if (!err) {
1253 	    int j, herr = 0;
1254 
1255 	    for (i=0; i<longnames && !herr; i++) {
1256 		for (j=i+1; j<longnames && !herr; j++) {
1257 		    if (!strncmp(S[i], S[j], maxc)) {
1258 			v = series_index(dset, S[i]);
1259 			herr = make_mangled_name(v, S[i], maxc);
1260 			v = series_index(dset, S[j]);
1261 			herr += make_mangled_name(v, S[j], maxc);
1262 		    }
1263 		}
1264 	    }
1265 	}
1266 
1267 	strings_array_free(S, longnames);
1268     }
1269 
1270     return err;
1271 }
1272 
1273 /**
1274  * list_loggenr:
1275  * @list: on entry, list of variables to process; on exit,
1276  * holds the ID numbers of the generated variables.
1277  * @dset: dataset struct.
1278  *
1279  * Generates and adds to the data set the natural logs of the
1280  * variables given in @list.
1281  *
1282  * Returns: 0 on success, error code on error.
1283  */
1284 
list_loggenr(int * list,DATASET * dset)1285 int list_loggenr (int *list, DATASET *dset)
1286 {
1287     int origv = dset->v;
1288     int tnum, i, j, v;
1289     int startlen;
1290     int l0 = 0;
1291     int err;
1292 
1293     err = transform_preprocess_list(list, dset, LOGS);
1294     if (err) {
1295 	return err;
1296     }
1297 
1298     startlen = get_starting_length(list, dset, 2);
1299 
1300     j = 1;
1301     for (i=1; i<=list[0]; i++) {
1302 	v = list[i];
1303 	tnum = get_transform(LOGS, v, 0, 0.0, dset, startlen,
1304 			     origv, NULL);
1305 	if (tnum > 0) {
1306 	    list[j++] = tnum;
1307 	    l0++;
1308 	}
1309     }
1310 
1311     list[0] = l0;
1312 
1313     destroy_mangled_names();
1314 
1315     return (l0 > 0)? 0 : E_LOGS;
1316 }
1317 
1318 /**
1319  * list_stdgenr:
1320  * @list: on entry, list of variables to process; on exit,
1321  * holds the ID numbers of the generated variables.
1322  * @dset: dataset struct.
1323  *
1324  * Generates and adds to the dataset standardized versions
1325  * of the series given in @list.
1326  *
1327  * Returns: 0 on success, error code on error.
1328  */
1329 
list_stdgenr(int * list,DATASET * dset,gretlopt opt)1330 int list_stdgenr (int *list, DATASET *dset, gretlopt opt)
1331 {
1332     int origv = dset->v;
1333     int tnum, i, j, v;
1334     int startlen;
1335     int dfc = 1;
1336     int l0 = 0;
1337     int err;
1338 
1339     err = transform_preprocess_list(list, dset, STDIZE);
1340     if (err) {
1341 	return err;
1342     }
1343 
1344     if (opt & OPT_C) {
1345 	dfc = -1;
1346     } else if (opt & OPT_N) {
1347 	dfc = 0;
1348     }
1349 
1350     startlen = get_starting_length(list, dset, 2);
1351 
1352     j = 1;
1353     for (i=1; i<=list[0]; i++) {
1354 	v = list[i];
1355 	tnum = get_transform(STDIZE, v, dfc, 0.0, dset, startlen,
1356 			     origv, NULL);
1357 	if (tnum > 0) {
1358 	    list[j++] = tnum;
1359 	    l0++;
1360 	}
1361     }
1362 
1363     list[0] = l0;
1364 
1365     destroy_mangled_names();
1366 
1367     return (l0 > 0)? 0 : E_DATA;
1368 }
1369 
make_lags_list(int * list,int order)1370 static int *make_lags_list (int *list, int order)
1371 {
1372     int i, v, nl = 0;
1373 
1374     for (i=1; i<=list[0]; i++) {
1375 	v = list[i];
1376 	if (v > 0) {
1377 	    nl += order;
1378 	}
1379     }
1380 
1381     return gretl_list_new(nl);
1382 }
1383 
lag_wanted(int p,const gretl_matrix * v,int n)1384 static int lag_wanted (int p, const gretl_matrix *v, int n)
1385 {
1386     int ret = 1;
1387 
1388     if (v != NULL) {
1389 	int i;
1390 
1391 	ret = 0; /* reverse the assumption */
1392 	for (i=0; i<n; i++) {
1393 	    if (v->val[i] == p) {
1394 		ret = 1;
1395 		break;
1396 	    }
1397 	}
1398     }
1399 
1400     return ret;
1401 }
1402 
process_hf_lags_input(int hf_min,int hf_max,DATASET * dset,int compfac,int * lf_min,int * lf_max,int * skip_first,int * n_terms)1403 static int process_hf_lags_input (int hf_min, int hf_max,
1404 				  DATASET *dset,
1405 				  int compfac,
1406 				  int *lf_min, int *lf_max,
1407 				  int *skip_first,
1408 				  int *n_terms)
1409 {
1410     if (dset->pd != 1 && dset->pd != 12 && dset->pd != 4) {
1411 	return E_PDWRONG;
1412     }
1413 
1414     *lf_min = (int) ceil(hf_min / (double) compfac);
1415     *lf_max = (int) ceil(hf_max / (double) compfac);
1416     *skip_first = (hf_min - 1) % compfac;
1417     *n_terms = hf_max - hf_min + 1;
1418 
1419     if (*skip_first < 0) {
1420 	/* handle leads */
1421 	*skip_first = compfac + *skip_first;
1422     }
1423 
1424 #if 0
1425     fprintf(stderr, "hf_min = %d, lf_min = %d\n", hf_min, *lf_min);
1426     fprintf(stderr, "hf_max = %d, lf_max = %d\n", hf_max, *lf_max);
1427     fprintf(stderr, "skip_first = %d\n", *skip_first);
1428     fprintf(stderr, "n_terms = %d\n", *n_terms);
1429 #endif
1430 
1431     return 0;
1432 }
1433 
1434 /**
1435  * list_laggenr:
1436  * @plist: on entry, pointer to list of variables to process.  On exit
1437  * the list holds the ID numbers of the lag variables.
1438  * @lmin: minimum lag to include (defaults to 1).
1439  * @lmax: maximum lag to include (or 0 for automatic).
1440  * @lvec: (alternative to @lmin, @lmax) vector holding lags to generate.
1441  * @dset: dataset struct.
1442  * @compfac: compaction factor (for MIDAS lists only, otherwise give 0).
1443  * @opt: may contain OPT_L to order the list by lag rather than
1444  * by variable.
1445  *
1446  * Generates and adds to the data set lagged values of the
1447  * variables given in the list pointed to by @plist.
1448  *
1449  * Returns: 0 on successful completion, 1 on error.
1450  */
1451 
list_laggenr(int ** plist,int lmin,int lmax,const gretl_matrix * lvec,DATASET * dset,int compfac,gretlopt opt)1452 int list_laggenr (int **plist, int lmin, int lmax,
1453 		  const gretl_matrix *lvec,
1454 		  DATASET *dset, int compfac,
1455 		  gretlopt opt)
1456 {
1457     int origv = dset->v;
1458     int *list = *plist;
1459     int *laglist = NULL;
1460     int l, i, j, v, lv;
1461     int startlen, l0 = 0;
1462     int skip_first = 0;
1463     int veclen = 0;
1464     int n_terms = 0;
1465     int err = 0;
1466 
1467     if (compfac < 0) {
1468 	return E_INVARG;
1469     } else if (compfac > 0) {
1470 	/* MIDAS: we want @lmin and @lmax, not @lvec */
1471 	if (lvec != NULL) {
1472 	    return E_INVARG;
1473 	}
1474 	if (!gretl_is_midas_list(list, dset)) {
1475 	    gretl_warnmsg_set(_("The argument does not seem to be a MIDAS list"));
1476 	}
1477     }
1478 
1479     if (lvec != NULL) {
1480 	/* got a vector of lags in @lvec */
1481 	veclen = gretl_vector_get_length(lvec);
1482 	if (veclen == 0) {
1483 	    err = E_INVARG;
1484 	} else {
1485 	    lmin = lvec->val[0];
1486 	    lmax = lvec->val[veclen-1];
1487 	    n_terms = veclen;
1488 	}
1489     } else if (compfac > 0) {
1490 	/* the MIDAS case */
1491 	int hmin = lmin, hmax = lmax;
1492 
1493 	if (hmax < hmin) {
1494 	    err = E_INVARG;
1495 	} else {
1496 	    err = process_hf_lags_input(hmin, hmax, dset, compfac, &lmin,
1497 					&lmax, &skip_first, &n_terms);
1498 	}
1499     } else {
1500 	/* non-MIDAS, using order = @lmax, ignoring @lmin */
1501 	if (lmax < 0 || lmax > dset->n) {
1502 	    gretl_errmsg_sprintf(_("Invalid lag order %d"), lmax);
1503 	    return E_INVARG;
1504 	} else if (lmax == 0) {
1505 	    lmax = default_lag_order(dset);
1506 	}
1507 	lmin = 1;
1508 	n_terms = lmax;
1509     }
1510 
1511     if (err) {
1512 	return err;
1513     }
1514 
1515     /* Note: at this point @lmin and @lmax are defined in terms of
1516        the frequency of @dset, even when we're handling MIDAS data.
1517        In the regular case, @n_terms holds the number of lag terms
1518        wanted per series in the input @list, but in the MIDAS case
1519        @n_terms is the total number of hf lag terms wanted (since
1520        the input @list actually represents a single series).
1521     */
1522 
1523     err = transform_preprocess_list(list, dset, LAGS);
1524     if (err) {
1525 	return err;
1526     }
1527 
1528     if (compfac) {
1529 	laglist = gretl_list_new(n_terms);
1530     } else {
1531 	laglist = make_lags_list(list, n_terms);
1532     }
1533 
1534     if (laglist == NULL) {
1535 	destroy_mangled_names();
1536 	return E_ALLOC;
1537     }
1538 
1539     startlen = get_starting_length(list, dset, (lmax > 9)? 3 : 2);
1540 
1541     j = 1;
1542 
1543     if (compfac > 0) {
1544 	/* MIDAS high-frequency lags, by lags */
1545 	int hfpmax = n_terms + skip_first;
1546 	int mp, hfp = 0;
1547 
1548 	for (l=lmin; l<=lmax; l++) {
1549 	    for (i=1; i<=list[0]; i++) {
1550 		hfp++;
1551 		if (hfp <= skip_first) {
1552 		    continue;
1553 		} else if (hfp > hfpmax) {
1554 		    break;
1555 		}
1556 		v = list[i];
1557 		lv = get_transform(LAGS, v, l, 0.0, dset,
1558 				   startlen, origv, NULL);
1559 		if (lv > 0) {
1560 		    mp = series_get_midas_period(dset, v);
1561 		    series_set_midas_period(dset, lv, mp);
1562 		    mp = series_get_midas_freq(dset, v);
1563 		    series_set_midas_freq(dset, lv, mp);
1564 		    laglist[j++] = lv;
1565 		    l0++;
1566 		}
1567 	    }
1568 	}
1569     } else if (opt & OPT_L) {
1570 	/* order the list by lags */
1571 	for (l=lmin; l<=lmax; l++) {
1572 	    if (!lag_wanted(l, lvec, veclen)) {
1573 		continue;
1574 	    }
1575 	    for (i=1; i<=list[0]; i++) {
1576 		v = list[i];
1577 		lv = get_transform(LAGS, v, l, 0.0, dset,
1578 				   startlen, origv, NULL);
1579 		if (lv > 0) {
1580 		    laglist[j++] = lv;
1581 		    l0++;
1582 		}
1583 	    }
1584 	}
1585     } else {
1586 	/* order by variable */
1587 	for (i=1; i<=list[0]; i++) {
1588 	    v = list[i];
1589 	    for (l=lmin; l<=lmax; l++) {
1590 		if (!lag_wanted(l, lvec, veclen)) {
1591 		    continue;
1592 		}
1593 		lv = get_transform(LAGS, v, l, 0.0, dset,
1594 				   startlen, origv, NULL);
1595 		if (lv > 0) {
1596 		    laglist[j++] = lv;
1597 		    l0++;
1598 		}
1599 	    }
1600 	}
1601     }
1602 
1603     destroy_mangled_names();
1604 
1605     laglist[0] = l0;
1606 
1607     free(*plist);
1608     *plist = laglist;
1609 
1610     return 0;
1611 }
1612 
1613 /**
1614  * default_lag_order:
1615  * @dset: data information struct.
1616  *
1617  * Returns: the default lag order for generating lags, performing
1618  * autocorrelation test, and so on.
1619  */
1620 
default_lag_order(const DATASET * dset)1621 int default_lag_order (const DATASET *dset)
1622 {
1623     int order = 1;
1624 
1625     if (!dataset_is_panel(dset)) {
1626 	order = (dset->pd < 52)? dset->pd : 14;
1627     }
1628 
1629     return order;
1630 }
1631 
1632 /**
1633  * list_diffgenr:
1634  * @list: on entry, list of variables to process; on exit,
1635  * ID numbers of the generated variables.
1636  * @ci: must be DIFF, LDIFF or SDIFF.
1637  * @dset: pointer to dataset struct.
1638  *
1639  * Generate differences of the variables in @list, and add them
1640  * to the data set.  If @ci is DIFF these are ordinary first
1641  * differences; if @ci is LDIFF they are log differences; and
1642  * if @ci is SDIFF they are seasonal differences.
1643  *
1644  * Returns: 0 on successful completion, non-zero on error.
1645  */
1646 
list_diffgenr(int * list,int ci,DATASET * dset)1647 int list_diffgenr (int *list, int ci, DATASET *dset)
1648 {
1649     int origv = dset->v;
1650     int i, v, startlen;
1651     int tnum, l0 = 0;
1652     int err;
1653 
1654     if (list[0] == 0) {
1655 	return 0;
1656     }
1657 
1658     if (ci != DIFF && ci != LDIFF && ci != SDIFF) {
1659 	return 1;
1660     }
1661 
1662     if (ci == SDIFF && !dataset_is_seasonal(dset)) {
1663 	return E_PDWRONG;
1664     }
1665 
1666     err = transform_preprocess_list(list, dset, ci);
1667     if (err) {
1668 	return err;
1669     }
1670 
1671     startlen = get_starting_length(list, dset, (ci == DIFF)? 2 : 3);
1672 
1673     for (i=1; i<=list[0] && !err; i++) {
1674 	v = list[i];
1675 	tnum = get_transform(ci, v, 0, 0.0, dset,
1676 			     startlen, origv, NULL);
1677 	if (tnum < 0) {
1678 	    err = 1;
1679 	} else {
1680 	    list[i] = tnum;
1681 	    l0++;
1682 	}
1683     }
1684 
1685     list[0] = l0;
1686 
1687     destroy_mangled_names();
1688 
1689     return err;
1690 }
1691 
check_hf_difflist(const int * list,const DATASET * dset,int ci,int * n_add)1692 static int check_hf_difflist (const int *list,
1693 			      const DATASET *dset,
1694 			      int ci, int *n_add)
1695 {
1696     char vname[VNAMELEN];
1697     int i, v, li, err = 0;
1698 
1699     for (i=1; i<=list[0] && !err; i++) {
1700 	li = list[i];
1701 	make_transform_varname(vname, dset->varname[li],
1702 			       ci, 0, VNAMELEN - 8);
1703 	v = current_series_index(dset, vname);
1704 	if (v > 0) {
1705 	    *n_add -= 1;
1706 	} else if (gretl_is_user_var(vname)) {
1707 	    gretl_errmsg_sprintf(_("%s: collides with existing object name"),
1708 				 vname);
1709 	    err = E_TYPES;
1710 	}
1711     }
1712 
1713     return err;
1714 }
1715 
1716 /**
1717  * hf_list_diffgenr:
1718  * @list: on entry, midas list of variables to process; on exit,
1719  * ID numbers of the generated variables.
1720  * @ci: must be DIFF or LDIFF.
1721  * @parm: optional scalar multiplier.
1722  * @dset: pointer to dataset struct.
1723  *
1724  * Generate high-frequency differences of the variables in @list
1725  * and add them to the data set.  If @ci is DIFF these are ordinary
1726  * first differences; if @ci is LDIFF they are log differences.
1727  * If @parm is not NA then the new series are all multiplied by
1728  * the specified value (as in multiplication of log-differences
1729  * by 100).
1730  *
1731  * Returns: 0 on successful completion, non-zero on error.
1732  */
1733 
hf_list_diffgenr(int * list,int ci,double parm,DATASET * dset)1734 int hf_list_diffgenr (int *list, int ci, double parm, DATASET *dset)
1735 {
1736     gretl_matrix *dX = NULL;
1737     int n_add, hfci = 0;
1738     int set_midas = 1;
1739     int err = 0;
1740 
1741     if (list[0] == 0) {
1742 	return 0;
1743     }
1744 
1745     if (ci == DIFF) {
1746 	hfci = HFDIFF;
1747     } else if (ci == LDIFF) {
1748 	hfci = HFLDIFF;
1749     } else {
1750 	return E_INVARG;
1751     }
1752 
1753     if (!dataset_is_time_series(dset)) {
1754 	return E_PDWRONG;
1755     }
1756 
1757     n_add = list[0];
1758     err = check_hf_difflist(list, dset, ci, &n_add);
1759     if (err) {
1760 	return err;
1761     }
1762 
1763     if (!gretl_is_midas_list(list, dset)) {
1764 	gretl_warnmsg_set(_("The argument does not seem to be a MIDAS list"));
1765 	set_midas = 0;
1766     }
1767 
1768     if (na(parm)) {
1769 	/* set to default */
1770 	parm = 1.0;
1771     }
1772 
1773     dX = get_hf_diffs(list, ci, parm, dset, &err);
1774 
1775     if (!err) {
1776 	char vname[VNAMELEN];
1777 	char label[MAXLABEL];
1778 	const char *parent;
1779 	int i, s, t, n = dX->cols;
1780 	int vi, v0 = dset->v;
1781 	int offset = 0;
1782 
1783 	err = dataset_add_series(dset, n_add);
1784 
1785 	if (!err) {
1786 	    for (i=0; i<n; i++) {
1787 		parent = dset->varname[list[i+1]];
1788 		make_transform_varname(vname, parent, ci, 0,
1789 				       VNAMELEN - 8);
1790 		vi = current_series_index(dset, vname);
1791 		if (vi < 0) {
1792 		    vi = v0 + offset;
1793 		    offset++;
1794 		    strcpy(dset->varname[vi], vname);
1795 		    make_transform_label(label, parent, hfci, 0);
1796 		    series_record_label(dset, vi, label);
1797 		}
1798 		s = 0;
1799 		for (t=dset->t1; t<=dset->t2; t++) {
1800 		    dset->Z[vi][t] = gretl_matrix_get(dX, s++, i);
1801 		}
1802 		list[i+1] = vi;
1803 	    }
1804 	    list[0] = n;
1805 	}
1806 	gretl_matrix_free(dX);
1807     }
1808 
1809     if (!err && set_midas) {
1810 	gretl_list_set_midas(list, dset);
1811     }
1812 
1813     return err;
1814 }
1815 
1816 /**
1817  * list_orthdev:
1818  * @list: list of variables to process.
1819  * @dset: dataset struct.
1820  *
1821  * Generate orthogonal deviations of the variables in @list, and add
1822  * them to the data set.
1823  *
1824  * Returns: 0 on success, error code on error.
1825  */
1826 
list_orthdev(int * list,DATASET * dset)1827 int list_orthdev (int *list, DATASET *dset)
1828 {
1829     int origv = dset->v;
1830     int i, v, startlen;
1831     int tnum, l0 = 0;
1832     int err;
1833 
1834     if (list[0] == 0) {
1835 	return 0;
1836     }
1837 
1838     if (!dataset_is_panel(dset)) {
1839 	return E_PDWRONG;
1840     }
1841 
1842     err = transform_preprocess_list(list, dset, ORTHDEV);
1843     if (err) {
1844 	return err;
1845     }
1846 
1847     startlen = get_starting_length(list, dset, 2);
1848 
1849     for (i=1; i<=list[0] && !err; i++) {
1850 	v = list[i];
1851 	tnum = get_transform(ORTHDEV, v, 0, 0.0, dset,
1852 			     startlen, origv, NULL);
1853 	if (tnum < 0) {
1854 	    err = 1;
1855 	} else {
1856 	    list[i] = tnum;
1857 	    l0++;
1858 	}
1859     }
1860 
1861     list[0] = l0;
1862 
1863     destroy_mangled_names();
1864 
1865     return err;
1866 }
1867 
1868 /**
1869  * list_xpxgenr:
1870  * @plist: pointer to list of variables to process.  On exit
1871  * the list holds the ID numbers of the squares (and possibly
1872  * cross-products).
1873  * @dset: dataset struct.
1874  * @opt: If OPT_O, both squares and cross-products are generated,
1875  * otherwise only squares.
1876  *
1877  * Generates and adds to the data set squares and (if @opt is OPT_O)
1878  * cross-products of the variables given in the list pointed to
1879  * by @plist.
1880  *
1881  * Returns: 0 on success, error code on error.
1882  */
1883 
list_xpxgenr(int ** plist,DATASET * dset,gretlopt opt)1884 int list_xpxgenr (int **plist, DATASET *dset, gretlopt opt)
1885 {
1886     int origv = dset->v;
1887     int *list = *plist;
1888     int *xpxlist = NULL;
1889     int tnum, i, j, k, vi, vj;
1890     int startlen, l0;
1891     int err;
1892 
1893     err = transform_preprocess_list(list, dset, SQUARE);
1894     if (err) {
1895 	return err;
1896     }
1897 
1898     l0 = list[0];
1899 
1900     if (opt & OPT_O) {
1901 	int maxterms = (l0 * l0 + l0) / 2;
1902 
1903 	xpxlist = gretl_list_new(maxterms);
1904 	if (xpxlist == NULL) {
1905 	    destroy_mangled_names();
1906 	    return E_ALLOC;
1907 	}
1908     } else {
1909 	xpxlist = list;
1910     }
1911 
1912     startlen = get_starting_length(list, dset, 3);
1913     xpxlist[0] = 0;
1914 
1915     k = 1;
1916     for (i=1; i<=l0; i++) {
1917 	vi = list[i];
1918 	tnum = get_transform(SQUARE, vi, vi, 0.0, dset,
1919 			     startlen, origv, NULL);
1920 	if (tnum > 0) {
1921 	    xpxlist[k++] = tnum;
1922 	    xpxlist[0] += 1;
1923 	}
1924 	if (opt & OPT_O) {
1925 	    for (j=i+1; j<=l0; j++) {
1926 		vj = list[j];
1927 		tnum = xpxgenr(vi, vj, dset);
1928 		if (tnum > 0) {
1929 		    xpxlist[k++] = tnum;
1930 		    xpxlist[0] += 1;
1931 		}
1932 	    }
1933 	}
1934     }
1935 
1936     destroy_mangled_names();
1937 
1938     if (opt & OPT_O) {
1939 	free(*plist);
1940 	*plist = xpxlist;
1941     }
1942 
1943     return (xpxlist[0] > 0)? 0 : E_SQUARES;
1944 }
1945 
list_resample(int * list,DATASET * dset)1946 int list_resample (int *list, DATASET *dset)
1947 {
1948     int origv = dset->v;
1949     int *z = NULL;
1950     int tnum, i, j, vi;
1951     int n, startlen, l0;
1952     int k1, k2;
1953     int err;
1954 
1955     err = transform_preprocess_list(list, dset, RESAMPLE);
1956     if (err) {
1957 	return err;
1958     }
1959 
1960     if (dataset_is_panel(dset)) {
1961 	k1 = dset->t1 / dset->pd;
1962 	k2 = dset->t2 / dset->pd;
1963     } else {
1964 	k1 = dset->t1;
1965 	k2 = dset->t2;
1966     }
1967 
1968     n = k2 - k1 + 1;
1969 
1970     z = malloc(n * sizeof *z);
1971     if (z == NULL) {
1972 	return E_ALLOC;
1973     }
1974 
1975     /* generate uniform drawings from sample range */
1976     gretl_rand_int_minmax(z, n, k1, k2);
1977 
1978     l0 = list[0];
1979     startlen = get_starting_length(list, dset, 3);
1980     list[0] = 0;
1981 
1982     j = 1;
1983     for (i=1; i<=l0; i++) {
1984 	vi = list[i];
1985 	tnum = get_transform(RESAMPLE, vi, 0, 0.0, dset,
1986 			     startlen, origv, z);
1987 	if (tnum > 0) {
1988 	    list[j++] = tnum;
1989 	    list[0] += 1;
1990 	}
1991     }
1992 
1993     free(z);
1994     destroy_mangled_names();
1995 
1996     return (list[0] > 0)? 0 : E_DATA;
1997 }
1998 
1999 /**
2000  * list_dropcoll:
2001  * @list: on entry, list of variables to process; on exit,
2002  * the original list minus collinear terms.
2003  * @dset: dataset struct.
2004  *
2005  * Drop collinear variables from @list.
2006  *
2007  * Returns: 0 on success, error code on error.
2008  */
2009 
list_dropcoll(int * list,double eps,DATASET * dset)2010 int list_dropcoll (int *list, double eps, DATASET *dset)
2011 {
2012     gretl_matrix *R = NULL;
2013     gretl_matrix *X = NULL;
2014     int T, k, err = 0;
2015 
2016     if (list == NULL) {
2017 	return E_DATA;
2018     } else if (list[0] < 2) {
2019 	/* no-op */
2020 	return 0;
2021     }
2022 
2023     if (eps < 0) {
2024 	return E_DATA;
2025     } else if (na(eps)) {
2026 	eps = R_DIAG_MIN;
2027     }
2028 
2029     X = gretl_matrix_data_subset(list, dset, dset->t1, dset->t2,
2030 				 M_MISSING_SKIP, &err);
2031     if (err) {
2032 	return err;
2033     }
2034 
2035     T = X->rows;
2036     k = X->cols;
2037 
2038     if (T < k) {
2039 	gretl_errmsg_sprintf(_("A minimum of %d observations is required"), k);
2040 	err = E_TOOFEW;
2041     } else {
2042 	R = gretl_matrix_alloc(k, k);
2043 	if (R == NULL) {
2044 	    err = E_ALLOC;
2045 	} else {
2046 	    err = gretl_matrix_QR_decomp(X, R);
2047 	}
2048     }
2049 
2050     if (!err) {
2051 	double rii;
2052 	int i, j = 1;
2053 
2054 	for (i=0; i<k && !err; i++, j++) {
2055 	    rii = gretl_matrix_get(R, i, i);
2056 	    if (fabs(rii) < eps) {
2057 		err = gretl_list_delete_at_pos(list, j--);
2058 	    }
2059 	}
2060     }
2061 
2062     gretl_matrix_free(X);
2063     gretl_matrix_free(R);
2064 
2065     return err;
2066 }
2067 
2068 #define DUMDEBUG 0
2069 
real_list_dumgenr(int ** plist,DATASET * dset,double oddval,gretlopt opt)2070 static int real_list_dumgenr (int **plist, DATASET *dset,
2071 			      double oddval, gretlopt opt)
2072 {
2073     int origv = dset->v;
2074     int *list = *plist;
2075     int *tmplist = NULL;
2076     double *x = NULL;
2077     int i, j, t, n;
2078     int startlen;
2079     int err;
2080 
2081     err = transform_preprocess_list(list, dset, DUMMIFY);
2082     if (err) {
2083 	return err;
2084     }
2085 
2086     tmplist = gretl_null_list();
2087     if (tmplist == NULL) {
2088 	err = E_ALLOC;
2089 	goto bailout;
2090     }
2091 
2092     x = malloc(dset->n * sizeof *x);
2093     if (x == NULL) {
2094 	err = E_ALLOC;
2095 	goto bailout;
2096     }
2097 
2098     startlen = get_starting_length(list, dset, 3);
2099 
2100     for (i=1; i<=list[0] && !err; i++) {
2101 	int vi = list[i];
2102 	int nuniq, tnum;
2103 	int jmin, jmax;
2104 	double xt;
2105 
2106 	n = 0;
2107 	for (t=dset->t1; t<=dset->t2; t++) {
2108 	    xt = dset->Z[vi][t];
2109 	    if (!na(xt)) {
2110 		x[n++] = xt;
2111 	    }
2112 	}
2113 
2114 	if (n == 0) {
2115 	    continue;
2116 	}
2117 
2118 	qsort(x, n, sizeof *x, gretl_compare_doubles);
2119 	nuniq = count_distinct_values(x, n);
2120 
2121 	if (nuniq == 1) {
2122 	    continue;
2123 	}
2124 
2125 	rearrange_id_array(x, nuniq, n);
2126 
2127 	jmin = (opt & OPT_F)? 1 : 0;
2128 	jmax = (opt & OPT_L)? nuniq - 1 : nuniq;
2129 
2130 #if DUMDEBUG
2131 	fprintf(stderr, "variable %d has %d distinct values\n", vi, nuniq);
2132 	if (opt & OPT_F) fprintf(stderr, "skipping lowest value\n");
2133 	if (opt & OPT_L) fprintf(stderr, "skipping highest value\n");
2134 #endif
2135 
2136 	for (j=jmin; j<jmax && !err; j++) {
2137 	    if (x[j] != oddval) {
2138 		tnum = get_transform(DUMMIFY, vi, j+1, x[j], dset,
2139 				     startlen, origv, NULL);
2140 #if DUMDEBUG
2141 		fprintf(stderr, "%s: for value %g tnum = %d\n",
2142 			dset->varname[vi], x[j], tnum);
2143 #endif
2144 		if (tnum > 0) {
2145 		    tmplist = gretl_list_append_term(&tmplist, tnum);
2146 		    if (tmplist == NULL) {
2147 			err = E_ALLOC;
2148 		    }
2149 		} else {
2150 		    err = E_DATA;
2151 		}
2152 	    }
2153 	}
2154     }
2155 
2156     if (!err && !(opt & OPT_A) && tmplist[0] == 0) {
2157 	/* don't fail here if doing auto-dummify (OPT_A) */
2158 	gretl_errmsg_set(_("dummify: no suitable variables were found"));
2159 #if DUMDEBUG
2160 	printlist(list, "list for which dummify failed");
2161 #endif
2162 	err = E_DATA;
2163     }
2164 
2165     free(x);
2166 
2167  bailout:
2168 
2169     if (!err && tmplist[0] > 0) {
2170 	free(*plist);
2171 	*plist = tmplist;
2172 #if DUMDEBUG
2173 	printlist(tmplist, "output list");
2174 #endif
2175     } else {
2176 	free(tmplist);
2177     }
2178 
2179     destroy_mangled_names();
2180 
2181     return err;
2182 }
2183 
2184 /**
2185  * list_dumgenr:
2186  * @plist: pointer to list of variables to process; on exit
2187  * the list holds the ID numbers of the generated dummies.
2188  * @dset: dataset struct.
2189  * @opt: can include OPT_F to drop the first value, OPT_L to drop
2190  * the last value.
2191  *
2192  * For each of the variables given in the list to which @plist
2193  * points, generates and adds to the data set k dummy variables
2194  * coding for the k distinct values of the variable in question.
2195  * All these variables must have already been marked as discrete.
2196  * If the OPT_F or OPT_L option is given, either the first or
2197  * the last value of each variable is taken as the "base" and is
2198  * not given a dummy encoding (that is, only k - 1 dummies are
2199  * added for each variable).
2200  *
2201  * Returns: 0 on success, error code on error.
2202 */
2203 
list_dumgenr(int ** plist,DATASET * dset,gretlopt opt)2204 int list_dumgenr (int **plist, DATASET *dset, gretlopt opt)
2205 {
2206     return real_list_dumgenr(plist, dset, NADBL, opt);
2207 }
2208 
2209 /**
2210  * dumgenr_with_oddval:
2211  * @plist: pointer to list of variables to process; on exit
2212  * the list holds the ID numbers of the generated dummies.
2213  * @dset: dataset struct.
2214  * @oddval: value which should be skipped when encoding the
2215  * input values as dummies.
2216  *
2217  * For each of the variables given in the list to which @plist
2218  * points, generates and adds to the data set k dummy variables
2219  * coding for the k distinct values of the variable in question.
2220  * All these variables must have already been marked as discrete.
2221  * if @oddval is not %NADBL, it is treated as the omitted
2222  * category and only k - 1 dummies are added for each variable).
2223  *
2224  * Returns: 0 on success, error code on error.
2225 */
2226 
dumgenr_with_oddval(int ** plist,DATASET * dset,double oddval)2227 int dumgenr_with_oddval (int **plist, DATASET *dset, double oddval)
2228 {
2229     return real_list_dumgenr(plist, dset, oddval, OPT_NONE);
2230 }
2231 
2232 /**
2233  * auto_dummify_list:
2234  * @plist: pointer to list of variables to process.
2235  * @dset: dataset struct.
2236  *
2237  * Produces a list in which any coded series in the original
2238  * list are replaced by a set of dummy variables. If the
2239  * original list contains no such series it is not altered.
2240  * In generating dummy series, the omitted category is the
2241  * minimum value of the coding variable.
2242  *
2243  * Returns: 0 on success, error code on error.
2244 */
2245 
auto_dummify_list(int ** plist,DATASET * dset)2246 int auto_dummify_list (int **plist, DATASET *dset)
2247 {
2248     int *orig = *plist;
2249     int *list = NULL;
2250     int *dlist = NULL;
2251     int i, vi;
2252     int err = 0;
2253 
2254     /* empty starting point for full list */
2255     list = gretl_null_list();
2256 
2257     for (i=1; i<=orig[0] && !err; i++) {
2258 	vi = orig[i];
2259 	if (series_is_coded(dset, vi)) {
2260 	    /* coded series: split into dummies */
2261 	    dlist = gretl_list_new(1);
2262 	    dlist[1] = vi;
2263 	    err = real_list_dumgenr(&dlist, dset, NADBL, OPT_F | OPT_A);
2264 	    if (!err) {
2265 		gretl_list_append_list(&list, dlist, &err);
2266 	    }
2267 	    free(dlist);
2268 	} else if (!in_gretl_list(list, vi)) {
2269 	    /* non-coded, not already present */
2270 	    list = gretl_list_append_term(&list, vi);
2271 	    if (list == NULL) {
2272 		err = E_ALLOC;
2273 	    }
2274 	}
2275     }
2276 
2277     if (err) {
2278 	free(list);
2279     } else if (list != *plist) {
2280 	/* replace original list */
2281 	free(*plist);
2282 	*plist = list;
2283     }
2284 
2285     return err;
2286 }
2287 
2288 /**
2289  * list_makediscrete:
2290  * @list: list of variables to process.
2291  * @dset: data information struct.
2292  * @opt: if OPT_R, reverse the operation.
2293  *
2294  * Sets the variables given in @list as discrete, unless
2295  * opt is OPT_R, in which case the variables are set as
2296  * continuous.
2297  *
2298  * Returns: 0 on success, error code on error.
2299  */
2300 
list_makediscrete(const int * list,DATASET * dset,gretlopt opt)2301 int list_makediscrete (const int *list, DATASET *dset, gretlopt opt)
2302 {
2303     int disc = !(opt & OPT_R);
2304     int i, v, err = 0;
2305 
2306     for (i=1; i<=list[0]; i++) {
2307 	v = list[i];
2308 	if (v > 0) {
2309 	    series_set_discrete(dset, v, disc);
2310 	}
2311     }
2312 
2313     return err;
2314 }
2315