1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #include "libgretl.h"
21 #include "gretl_func.h"
22 #include "uservar.h"
23 #include "gretl_string_table.h"
24 #include "libset.h"
25 #include "dbread.h"
26 #include "varinfo_priv.h"
27 
28 #define DDEBUG 0
29 #define FULLDEBUG 0
30 
31 #define Z_COLS_BORROWED 2
32 
33 #define dset_zcols_borrowed(d) (d->auxiliary == Z_COLS_BORROWED)
34 
35 static int pad_daily_data (DATASET *dset, int pd, PRN *prn);
36 
37 /**
38  * check_dataset_is_changed:
39  * @dset: dataset to check.
40  *
41  * Returns: 1 if @dset has been modified since
42  * the last call to this function.
43  */
44 
check_dataset_is_changed(DATASET * dset)45 int check_dataset_is_changed (DATASET *dset)
46 {
47     int ret = dset->modflag;
48 
49     dset->modflag = 0;
50     return ret;
51 }
52 
53 /**
54  * set_dataset_is_changed:
55  * @dset: dataset.
56  * @s: 1 or 0.
57  *
58  * Sets the internal boolean "changed" flag to @s.
59  */
60 
set_dataset_is_changed(DATASET * dset,int s)61 void set_dataset_is_changed (DATASET *dset, int s)
62 {
63     if (dset != NULL && gretl_function_depth() == 0) {
64 	dset->modflag = s;
65     }
66 }
67 
dataset_set_nobs(DATASET * dset,int n)68 static void dataset_set_nobs (DATASET *dset, int n)
69 {
70     if (n != dset->n) {
71 	/* if the total number of observations in the dataset
72 	   has changed, the current "matrix_mask", if present
73 	   (see libset.c), will now be invalid
74 	*/
75 	destroy_matrix_mask();
76 	dset->n = n;
77     }
78 }
79 
80 /**
81  * free_Z:
82  * @dset: dataset information.
83  *
84  * Does a deep free on the data matrix.
85  */
86 
free_Z(DATASET * dset)87 void free_Z (DATASET *dset)
88 {
89     if (dset != NULL && dset->Z != NULL) {
90 	int i, v = dset_zcols_borrowed(dset) ? 1 : dset->v;
91 
92 #if DDEBUG
93 	fprintf(stderr, "Freeing Z (%p): %d vars\n", (void *) dset->Z, v);
94 #endif
95 	for (i=0; i<v; i++) {
96 	    free(dset->Z[i]);
97 	}
98 	free(dset->Z);
99 	dset->Z = NULL;
100     }
101 }
102 
103 /**
104  * dataset_destroy_obs_markers:
105  * @dset: data information struct.
106  *
107  * Frees any allocated observation markers for @dset.
108  */
109 
dataset_destroy_obs_markers(DATASET * dset)110 void dataset_destroy_obs_markers (DATASET *dset)
111 {
112     int i;
113 
114     if (dset->S != NULL) {
115 	for (i=0; i<dset->n; i++) {
116 	   free(dset->S[i]);
117 	}
118 	free(dset->S);
119 	dset->S = NULL;
120 	dset->markers = NO_MARKERS;
121     }
122 }
123 
free_varinfo(DATASET * dset,int v)124 static void free_varinfo (DATASET *dset, int v)
125 {
126     if (dset->varinfo[v]->st != NULL) {
127 	series_table_destroy(dset->varinfo[v]->st);
128     }
129     if (dset->varinfo[v]->label != NULL) {
130 	free(dset->varinfo[v]->label);
131     }
132     free(dset->varinfo[v]);
133 }
134 
135 /**
136  * clear_datainfo:
137  * @dset: data information struct.
138  * @code: either %CLEAR_FULL or %CLEAR_SUBSAMPLE.
139  *
140  * Frees the allocated content of a data information struct;
141  * note that @dset itself is not freed.
142  */
143 
clear_datainfo(DATASET * dset,int code)144 void clear_datainfo (DATASET *dset, int code)
145 {
146     int i;
147 
148     if (dset == NULL) return;
149 
150     if (dset->S != NULL) {
151 	dataset_destroy_obs_markers(dset);
152     }
153     if (dset->submask != NULL) {
154 	free_subsample_mask(dset->submask);
155 	dset->submask = NULL;
156     }
157     if (dset->restriction != NULL) {
158 	free(dset->restriction);
159 	dset->restriction = NULL;
160     }
161     if (dset->padmask != NULL) {
162 	free(dset->padmask);
163 	dset->padmask = NULL;
164     }
165     if (dset->pangrps != NULL) {
166 	free(dset->pangrps);
167 	dset->pangrps = NULL;
168     }
169 
170     /* if this is not a sub-sample datainfo, free varnames, labels, etc. */
171 
172     if (code == CLEAR_FULL) {
173 	if (dset->varname != NULL) {
174 	    for (i=0; i<dset->v; i++) {
175 		free(dset->varname[i]);
176 	    }
177 	    free(dset->varname);
178 	    dset->varname = NULL;
179 	}
180 	if (dset->varinfo != NULL) {
181 	    for (i=0; i<dset->v; i++) {
182 		free_varinfo(dset, i);
183 	    }
184 	    free(dset->varinfo);
185 	    dset->varinfo = NULL;
186 	}
187 	if (dset->descrip != NULL) {
188 	    free(dset->descrip);
189 	    dset->descrip = NULL;
190 	}
191 	if (dset->mapfile != NULL) {
192 	    free(dset->mapfile);
193 	    dset->mapfile = NULL;
194 	}
195 
196 	maybe_free_full_dataset(dset);
197 
198 	dset->v = dset->n = 0;
199 	dset->structure = 0;
200 	dset->pd = 1;
201     }
202 }
203 
204 /**
205  * destroy_dataset:
206  * @dset: pointer to dataset.
207  *
208  * Frees all resources associated with @dset.
209  */
210 
destroy_dataset(DATASET * dset)211 void destroy_dataset (DATASET *dset)
212 {
213     if (dset != NULL) {
214 	free_Z(dset);
215 	clear_datainfo(dset, CLEAR_FULL);
216 	free(dset);
217     }
218 }
219 
220 /**
221  * copy_dataset_obs_info:
222  * @targ: pointer to target dataset.
223  * @src: pointer to source dataset.
224  *
225  * Sets the "date" or observations information in @targ to that
226  * found in @src.
227  */
228 
copy_dataset_obs_info(DATASET * targ,const DATASET * src)229 void copy_dataset_obs_info (DATASET *targ, const DATASET *src)
230 {
231     strcpy(targ->stobs, src->stobs);
232     strcpy(targ->endobs, src->endobs);
233     targ->sd0 = src->sd0;
234     targ->pd = src->pd;
235     targ->structure = src->structure;
236 }
237 
238 /**
239  * dataset_obs_info_default:
240  * @dset: pointer to dataset.
241  *
242  * Sets the "date" or observations information in @dset to a
243  * simple default of cross-sectional data, observations 1 to n,
244  * where n is the %n element (number of observations) in @dset.
245  */
246 
dataset_obs_info_default(DATASET * dset)247 void dataset_obs_info_default (DATASET *dset)
248 {
249     strcpy(dset->stobs, "1");
250     sprintf(dset->endobs, "%d", dset->n);
251     dset->sd0 = 1.0;
252     dset->pd = 1;
253     dset->structure = CROSS_SECTION;
254 }
255 
256 /**
257  * dataset_allocate_obs_markers:
258  * @dset: pointer to dataset
259  *
260  * Allocates space in @dset for strings indentifying the
261  * observations and initializes all of the markers to empty
262  * strings.  Note that These strings have a fixed maximum
263  * length of #OBSLEN - 1.
264  *
265  * Returns: 0 on success, E_ALLOC on error.
266  */
267 
dataset_allocate_obs_markers(DATASET * dset)268 int dataset_allocate_obs_markers (DATASET *dset)
269 {
270     char **S = NULL;
271     int err = 0;
272 
273     if (dset->S == NULL) {
274 	/* not already allocated */
275 	S = strings_array_new_with_length(dset->n, OBSLEN);
276 	if (S == NULL) {
277 	    err = E_ALLOC;
278 	} else {
279 	    dset->S = S;
280 	}
281     }
282 
283     if (dset->S != NULL) {
284 	dset->markers = REGULAR_MARKERS;
285     }
286 
287     return err;
288 }
289 
gretl_varinfo_init(VARINFO * vinfo)290 static void gretl_varinfo_init (VARINFO *vinfo)
291 {
292     memset(vinfo, 0, sizeof *vinfo);
293     vinfo->label = NULL;
294     vinfo->st = NULL;
295     vinfo->stack_level = gretl_function_depth();
296 }
297 
copy_label(VARINFO * vinfo,const char * src)298 static void copy_label (VARINFO *vinfo, const char *src)
299 {
300     free(vinfo->label);
301     if (src == NULL) {
302 	vinfo->label = NULL;
303     } else {
304 	vinfo->label = gretl_strdup(src);
305     }
306 }
307 
labels_differ(const char * s1,const char * s2)308 static int labels_differ (const char *s1, const char *s2)
309 {
310     int nv = (s1 != NULL) + (s2 != NULL);
311 
312     if (nv == 1) {
313 	/* one is NULL, the other not */
314 	return 1;
315     } else if (nv == 2) {
316 	/* neither one is NULL */
317 	return strcmp(s1, s2) != 0;
318     } else {
319 	return 0;
320     }
321 }
322 
323 /**
324  * copy_varinfo:
325  * @targ: target to which to copy.
326  * @src: source to copy from.
327  *
328  * Copies all relevant information from @src to @targ.
329  */
330 
copy_varinfo(VARINFO * targ,const VARINFO * src)331 void copy_varinfo (VARINFO *targ, const VARINFO *src)
332 {
333     if (src == NULL || targ == NULL) {
334 	return;
335     }
336     copy_label(targ, src->label);
337     strcpy(targ->display_name, src->display_name);
338     strcpy(targ->parent, src->parent);
339     targ->flags = src->flags;
340     targ->transform = src->transform;
341     targ->lag = src->lag;
342     targ->midas_period = src->midas_period;
343     targ->midas_freq = src->midas_freq;
344     targ->orig_pd = src->orig_pd;
345     targ->compact_method = src->compact_method;
346     targ->stack_level = src->stack_level;
347     if (src->st != NULL) {
348 	targ->st = series_table_copy(src->st);
349     }
350 }
351 
352 /* For use in the context of returning from a sub-sampled
353    dataset to the full one: trim off series names and
354    "varinfo" beyond the index @nv, which gives the number
355    of series in the full dataset.
356 */
357 
shrink_varinfo(DATASET * dset,int nv)358 int shrink_varinfo (DATASET *dset, int nv)
359 {
360     char **vnames;
361     VARINFO **vi;
362     int i, err = 0;
363 
364     if (nv > dset->v) {
365 	return E_DATA;
366     } else if (nv == dset->v) {
367 	return 0;
368     }
369 
370     for (i=nv; i<dset->v; i++) {
371 	free(dset->varname[i]);
372 	free_varinfo(dset, i);
373     }
374 
375     vnames = realloc(dset->varname, nv * sizeof *vnames);
376     vi = realloc(dset->varinfo, nv * sizeof *vi);
377 
378     if (vnames == NULL || vi == NULL) {
379 	free(vnames);
380 	free(vi);
381 	err = E_ALLOC;
382     } else {
383 	dset->varname = vnames;
384 	dset->varinfo = vi;
385     }
386 
387     return err;
388 }
389 
390 /**
391  * dataset_allocate_varnames:
392  * @dset: pointer to dataset.
393  *
394  * Given a blank @dset, which should have been obtained using
395  * datainfo_new(), allocate space for the names of variables.
396  * The @v member of @dset (representing the number of variables,
397  * including the automatically added constant at position 0) must be
398  * set before calling this function.
399  *
400  * Returns: 0 on sucess, E_ALLOC on failure.
401  */
402 
dataset_allocate_varnames(DATASET * dset)403 int dataset_allocate_varnames (DATASET *dset)
404 {
405     int i, j, v = dset->v;
406     int err = 0;
407 
408     dset->varname = strings_array_new_with_length(v, VNAMELEN);
409     if (dset->varname == NULL) {
410 	return E_ALLOC;
411     }
412 
413     dset->varinfo = calloc(v, sizeof *dset->varinfo);
414     if (dset->varinfo == NULL) {
415 	free(dset->varname);
416 	return E_ALLOC;
417     }
418 
419     for (i=0; i<v; i++) {
420 	dset->varinfo[i] = malloc(sizeof **dset->varinfo);
421 	if (dset->varinfo[i] == NULL) {
422 	    for (j=0; j<i; j++) {
423 		free(dset->varinfo[j]);
424 	    }
425 	    free(dset->varinfo);
426 	    dset->varinfo = NULL;
427 	    err = E_ALLOC;
428 	    break;
429 	} else {
430 	    gretl_varinfo_init(dset->varinfo[i]);
431 	}
432     }
433 
434     if (!err) {
435 	strcpy(dset->varname[0], "const");
436 	series_set_label(dset, 0, _("auto-generated constant"));
437     }
438 
439     return err;
440 }
441 
442 /**
443  * datainfo_init:
444  * @dset: pointer to DATASET struct.
445  *
446  * Zeros all members of @dset and sets it as a plain cross-section.
447  * Designed for use with a DATASET structure that has not been
448  * obtained via datainfo_new().
449  */
450 
datainfo_init(DATASET * dset)451 void datainfo_init (DATASET *dset)
452 {
453     dset->v = 0;
454     dset->n = 0;
455     dset->pd = 1;
456     dset->structure = CROSS_SECTION;
457     dset->sd0 = 1.0;
458     dset->t1 = 0;
459     dset->t2 = 0;
460     dset->stobs[0] = '\0';
461     dset->endobs[0] = '\0';
462 
463     dset->Z = NULL;
464     dset->varname = NULL;
465     dset->varinfo = NULL;
466 
467     dset->markers = NO_MARKERS;
468     dset->modflag = 0;
469 
470     dset->S = NULL;
471     dset->descrip = NULL;
472     dset->submask = NULL;
473     dset->restriction = NULL;
474     dset->padmask = NULL;
475     dset->mapfile = NULL;
476     dset->pangrps = NULL;
477     dset->panel_pd = 0;
478     dset->panel_sd0 = 0;
479 
480     dset->auxiliary = 0;
481     dset->rseed = 0;
482 }
483 
484 /**
485  * datainfo_new:
486  *
487  * Creates a new data information struct pointer from scratch,
488  * properly initialized as empty (no variables, no observations).
489  *
490  * Returns: pointer to data information struct, or NULL on error.
491  */
492 
datainfo_new(void)493 DATASET *datainfo_new (void)
494 {
495     DATASET *dset = malloc(sizeof *dset);
496 
497     if (dset != NULL) {
498 	datainfo_init(dset);
499     }
500 
501     return dset;
502 }
503 
real_create_new_dataset(int nvar,int nobs,gretlopt opt)504 static DATASET *real_create_new_dataset (int nvar, int nobs,
505 					 gretlopt opt)
506 {
507     DATASET *dset = datainfo_new();
508 
509     if (dset == NULL) return NULL;
510 
511     dset->v = nvar;
512     dset->n = nobs;
513     dset->Z = NULL;
514 
515     if (start_new_Z(dset, opt)) {
516 	free(dset);
517 	return NULL;
518     }
519 
520     if (opt & OPT_M) {
521 	if (dataset_allocate_obs_markers(dset)) {
522 	    free_datainfo(dset);
523 	    return NULL;
524 	}
525     }
526 
527     dataset_obs_info_default(dset);
528 
529     return dset;
530 }
531 
532 /**
533  * create_new_dataset:
534  * @nvar: number of variables.
535  * @nobs: number of observations per variable.
536  * @markers: 1 if space should be allocated for "case markers" for
537  * the observations, 0 otherwise.
538  *
539  * Allocates space in the dataset to hold the specified number
540  * of variables and observations.
541  *
542  * Returns: pointer to dataset struct, or NULL on error.
543  */
544 
create_new_dataset(int nvar,int nobs,int markers)545 DATASET *create_new_dataset (int nvar, int nobs, int markers)
546 {
547     gretlopt opt = markers ? OPT_M : OPT_NONE;
548 
549     return real_create_new_dataset(nvar, nobs, opt);
550 }
551 
create_auxiliary_dataset(int nvar,int nobs,gretlopt opt)552 DATASET *create_auxiliary_dataset (int nvar, int nobs, gretlopt opt)
553 {
554     DATASET *dset = real_create_new_dataset(nvar, nobs, opt);
555 
556     if (dset != NULL) {
557 	if (opt & OPT_B) {
558 	    dset->auxiliary = Z_COLS_BORROWED;
559 	} else {
560 	    dset->auxiliary = 1;
561 	}
562     }
563 
564     return dset;
565 }
566 
make_borrowed_Z(int v,int n)567 static double **make_borrowed_Z (int v, int n)
568 {
569     double **Z = malloc(v * sizeof *Z);
570 
571     if (Z != NULL) {
572 	int i;
573 
574 	for (i=0; i<v; i++) {
575 	    Z[i] = NULL;
576 	}
577 
578 	Z[0] = malloc(n * sizeof **Z);
579 
580 	if (Z[0] == NULL) {
581 	    free(Z);
582 	    Z = NULL;
583 	} else {
584 	    for (i=0; i<n; i++) {
585 		Z[0][i] = 1.0;
586 	    }
587 	}
588     }
589 
590     return Z;
591 }
592 
593 /**
594  * allocate_Z:
595  * @dset: pointer to dataset.
596  * @opt: may include OPT_B to indicate that the data columns
597  * will be "borrowed".
598  *
599  * Allocates the two-dimensional data array Z,
600  * based on the v (number of variables) and n (number of
601  * observations) members of @dset.  The variable at
602  * position 0 is initialized to all 1s; other variables
603  * are initialized to #NADBL (unless OPT_B is given).
604  *
605  * Returns: 0 on success, E_ALLOC on error.
606  */
607 
allocate_Z(DATASET * dset,gretlopt opt)608 int allocate_Z (DATASET *dset, gretlopt opt)
609 {
610     int i, t;
611     int err = 0;
612 
613     if (dset->Z != NULL) {
614 	fprintf(stderr, "*** error: allocate_Z called with non-NULL Z\n");
615     }
616 
617     if (opt & OPT_B) {
618 	dset->Z = make_borrowed_Z(dset->v, dset->n);
619     } else {
620 	dset->Z = doubles_array_new(dset->v, dset->n);
621     }
622 
623     if (dset->Z == NULL) {
624 	err = E_ALLOC;
625     } else if (!(opt & OPT_B)) {
626 	for (i=0; i<dset->v; i++) {
627 	    for (t=0; t<dset->n; t++) {
628 		dset->Z[i][t] = (i == 0)? 1.0 : NADBL;
629 	    }
630 	}
631     }
632 
633     return err;
634 }
635 
636 /**
637  * start_new_Z:
638  * @dset: pointer to dataset.
639  * @opt: if includes OPT_R we're sub-sampling from a full data set;
640  * if includes OPT_P, do not null out dset->S and markers.
641  *
642  * Initializes the data array within @dset (adding the constant in
643  * position 0).
644  *
645  * Returns: 0 on successful completion, non-zero on error.
646  */
647 
start_new_Z(DATASET * dset,gretlopt opt)648 int start_new_Z (DATASET *dset, gretlopt opt)
649 {
650     if (allocate_Z(dset, opt)) {
651 	return E_ALLOC;
652     }
653 
654     dset->t1 = 0;
655     dset->t2 = dset->n - 1;
656 
657     if (opt & OPT_R) {
658 	/* sub-sampling */
659 	dset->varname = NULL;
660 	dset->varinfo = NULL;
661     } else if (dataset_allocate_varnames(dset)) {
662 	free_Z(dset);
663 	dset->Z = NULL;
664 	return E_ALLOC;
665     }
666 
667     if (!(opt & OPT_P)) {
668 	dset->S = NULL;
669 	dset->markers = NO_MARKERS;
670     }
671 
672     dset->descrip = NULL;
673     dset->submask = NULL;
674     dset->restriction = NULL;
675     dset->padmask = NULL;
676     dset->mapfile = NULL;
677 
678     if (!(opt & OPT_R)) {
679 	dset->pangrps = NULL;
680     }
681 
682     return 0;
683 }
684 
reallocate_markers(DATASET * dset,int n)685 static int reallocate_markers (DATASET *dset, int n)
686 {
687     char **S;
688     int t;
689 
690     S = realloc(dset->S, n * sizeof *S);
691     if (S == NULL) {
692 	return 1;
693     }
694 
695     for (t=dset->n; t<n; t++) {
696 	S[t] = malloc(OBSLEN);
697 	if (S[t] == NULL) {
698 	    int j;
699 
700 	    for (j=dset->n; j<t; j++) {
701 		free(S[j]);
702 	    }
703 	    free(S);
704 	    return 1;
705 	}
706 	S[t][0] = '\0';
707     }
708 
709     dset->S = S;
710 
711     return 0;
712 }
713 
714 /* Allow for the possibility of centered seasonal dummies: usually
715    xon = 1 and xoff = 0, but in the centered case xon = 1 - 1/pd
716    and xoff = -1/pd.
717 */
718 
get_xon_xoff(const double * x,int n,int pd,double * xon,double * xoff)719 static int get_xon_xoff (const double *x, int n, int pd, double *xon, double *xoff)
720 {
721     double cfac = 1.0 / pd;
722     double xc = 1.0 - cfac, yc = -cfac;
723     double x0 = 999, y0 = 999;
724     int t, ret = 1;
725 
726     for (t=0; t<n && ret; t++) {
727 	if (x[t] == 1.0) {
728 	    if (x0 == 999) x0 = 1.0;
729 	    else if (x[t] != x0) ret = 0;
730 	} else if (x[t] == 0.0) {
731 	    if (y0 == 999) y0 = 0.0;
732 	    else if (x[t] != y0) ret = 0;
733 	} else if (x[t] == xc) {
734 	    if (x0 == 999) x0 = xc;
735 	    else if (x[t] != x0) ret = 0;
736 	} else if (x[t] == yc) {
737 	    if (y0 == 999) y0 = yc;
738 	    else if (x[t] != y0) ret = 0;
739 	} else {
740 	    ret = 0;
741 	}
742     }
743 
744     if (ret) {
745 	*xon = x0;
746 	*xoff = y0;
747     }
748 
749     return ret;
750 }
751 
real_periodic_dummy(const double * x,int n,int * pd,int * offset,double * pxon,double * pxoff)752 static int real_periodic_dummy (const double *x, int n,
753 				int *pd, int *offset,
754 				double *pxon, double *pxoff)
755 {
756     double xon = 1.0, xoff = 0.0;
757     int onbak = 0;
758     int gap = 0;
759     int trail = 0;
760     int t, m = n - 1, ret = 1;
761 
762     if (!get_xon_xoff(x, n, *pd, &xon, &xoff)) {
763 	return 0;
764     }
765 
766     *pd = -1;
767     *offset = -1;
768     trail = 0;
769 
770     /* find number of trailing "off" values */
771     for (t=n-1; t>0; t--) {
772 	if (x[t] == xoff) {
773 	    trail++;
774 	} else {
775 	    if (x[t] == xon) {
776 		m = t;
777 	    } else {
778 		ret = 0;
779 	    }
780 	    break;
781 	}
782     }
783 
784     /* check for dummyhood and periodicity */
785     for (t=0; t<=m && ret; t++) {
786 	if (x[t] == xoff) {
787 	    onbak = 0;
788 	    gap++;
789 	} else if (x[t] == xon) {
790 	    if (onbak) {
791 		ret = 0;
792 	    } else if (*offset < 0) {
793 		*offset = gap;
794 	    } else if (*pd < 0) {
795 		*pd = gap + 1;
796 		if (*pd < *offset + 1) {
797 		    ret = 0;
798 		}
799 	    } else if (gap != *pd - 1) {
800 		ret = 0;
801 	    } else if (gap < trail) {
802 		ret = 0;
803 	    }
804 	    gap = 0;
805 	    onbak = 1;
806 	} else {
807 	    ret = 0;
808 	    break;
809 	}
810     }
811 
812     if (ret && pxon != NULL && pxoff != NULL) {
813 	*pxon = xon;
814 	*pxoff = xoff;
815     }
816 
817     return ret;
818 }
819 
820 /**
821  * is_periodic_dummy:
822  * @x: array to examine.
823  * @dset: pointer to dataset.
824  *
825  * Returns: 1 if @x is a periodic dummy variable,
826  * 0 otherwise.
827  */
828 
is_periodic_dummy(const double * x,const DATASET * dset)829 int is_periodic_dummy (const double *x, const DATASET *dset)
830 {
831     int offset, pd = dset->pd;
832 
833     return real_periodic_dummy(x, dset->n, &pd, &offset, NULL, NULL);
834 }
835 
is_linear_trend(const double * x,int n)836 static int is_linear_trend (const double *x, int n)
837 {
838     int t, ret = 1;
839 
840     for (t=1; t<n; t++) {
841 	if (x[t] != x[t-1] + 1.0) {
842 	    ret = 0;
843 	    break;
844 	}
845     }
846 
847     return ret;
848 }
849 
is_quadratic_trend(const double * x,int n)850 static int is_quadratic_trend (const double *x, int n)
851 {
852     double t2;
853     int t, ret = 1;
854 
855     for (t=0; t<n; t++) {
856 	t2 = (t + 1) * (t + 1);
857 	if (x[t] != t2) {
858 	    ret = 0;
859 	    break;
860 	}
861     }
862 
863     return ret;
864 }
865 
866 /**
867  * is_trend_variable:
868  * @x: array to examine.
869  * @n: number of elements in array.
870  *
871  * Returns: 1 if @x is a simple linear trend variable, with each
872  * observation equal to the preceding observation plus 1, or
873  * if @x is a quadratic trend starting at 1 for the first
874  * observation in the data set, and 0 otherwise.
875  */
876 
is_trend_variable(const double * x,int n)877 int is_trend_variable (const double *x, int n)
878 {
879     int ret = 0;
880 
881     if (is_linear_trend(x, n)) {
882 	ret = 1;
883     } else if (is_quadratic_trend(x, n)) {
884 	ret = 1;
885     }
886 
887     return ret;
888 }
889 
maybe_extend_trends(DATASET * dset,int oldn)890 static void maybe_extend_trends (DATASET *dset, int oldn)
891 {
892     int i, t;
893 
894     for (i=1; i<dset->v; i++) {
895 	if (is_linear_trend(dset->Z[i], oldn)) {
896 	    for (t=oldn; t<dset->n; t++) {
897 		dset->Z[i][t] = dset->Z[i][t-1] + 1.0;
898 	    }
899 	} else if (is_quadratic_trend(dset->Z[i], oldn)) {
900 	    for (t=oldn; t<dset->n; t++) {
901 		dset->Z[i][t] = (t + 1) * (t + 1);
902 	    }
903 	}
904     }
905 }
906 
maybe_extend_dummies(DATASET * dset,int oldn)907 static void maybe_extend_dummies (DATASET *dset, int oldn)
908 {
909     int pd = dset->pd;
910     double xon = 1.0, xoff = 0.0;
911     int offset;
912     int i, t;
913 
914     for (i=1; i<dset->v; i++) {
915 	if (real_periodic_dummy(dset->Z[i], oldn, &pd, &offset, &xon, &xoff)) {
916 	    for (t=oldn; t<dset->n; t++) {
917 		dset->Z[i][t] = ((t - offset) % pd)? xoff : xon;
918 	    }
919 	}
920     }
921 }
922 
923 /* regular, not panel-time, version */
924 
real_dataset_add_observations(DATASET * dset,int n,gretlopt opt)925 static int real_dataset_add_observations (DATASET *dset, int n,
926 					  gretlopt opt)
927 {
928     double *x;
929     int oldn = dset->n;
930     int i, t, bign;
931     int err = 0;
932 
933     if (dset_zcols_borrowed(dset)) {
934 	fprintf(stderr, "*** Internal error: modifying borrowed data\n");
935 	return E_DATA;
936     }
937 
938     if (n <= 0) {
939 	return 0;
940     }
941 
942     if (dataset_is_panel(dset) && n % dset->pd != 0) {
943 	return E_PDWRONG;
944     }
945 
946     bign = oldn + n;
947 
948     for (i=0; i<dset->v; i++) {
949 	x = realloc(dset->Z[i], bign * sizeof *x);
950 	if (x == NULL) {
951 	    return E_ALLOC;
952 	}
953 	dset->Z[i] = x;
954 	for (t=oldn; t<bign; t++) {
955 	    dset->Z[i][t] = (i == 0)? 1.0 : NADBL;
956 	}
957     }
958 
959     if (dataset_has_markers(dset)) {
960 	if (opt & OPT_D) {
961 	    dataset_destroy_obs_markers(dset);
962 	} else {
963 	    if (reallocate_markers(dset, bign)) {
964 		return E_ALLOC;
965 	    }
966 	    for (t=oldn; t<bign; t++) {
967 		sprintf(dset->S[t], "%d", t + 1);
968 	    }
969 	}
970     }
971 
972     if (dset->t2 == dset->n - 1) {
973 	dset->t2 = bign - 1;
974     }
975 
976     dataset_set_nobs(dset, bign);
977 
978     if (opt & OPT_A) {
979 	maybe_extend_trends(dset, oldn);
980 	maybe_extend_dummies(dset, oldn);
981     }
982 
983     /* does daily data need special handling? */
984     ntolabel(dset->endobs, bign - 1, dset);
985 
986     return err;
987 }
988 
panel_dataset_extend_time(DATASET * dset,int n)989 static int panel_dataset_extend_time (DATASET *dset, int n)
990 {
991     double *utmp, *vtmp;
992     char **S = NULL;
993     int newT, oldT = dset->pd;
994     int oldn = dset->n;
995     int n_units;
996     int i, j, s, t, bign;
997     size_t usz, vsz;
998     int err = 0;
999 
1000     if (!dataset_is_panel(dset)) {
1001 	return E_PDWRONG;
1002     }
1003 
1004     if (dset_zcols_borrowed(dset)) {
1005 	fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1006 	return E_DATA;
1007     }
1008 
1009     if (n <= 0) {
1010 	return 0;
1011     }
1012 
1013     n_units = oldn / oldT;
1014     newT = oldT + n;
1015     bign = n_units * newT;
1016 
1017     usz = newT * sizeof *utmp;
1018     vsz = bign * sizeof *vtmp;
1019 
1020     utmp = malloc(usz);
1021     if (utmp == NULL) {
1022 	return E_ALLOC;
1023     }
1024 
1025     if (dataset_has_markers(dset)) {
1026 	S = strings_array_new_with_length(bign, OBSLEN);
1027 	if (S == NULL) {
1028 	    free(utmp);
1029 	    return E_ALLOC;
1030 	}
1031     }
1032 
1033     for (i=0; i<dset->v; i++) {
1034 	int uconst = 1, utrend = 1, dtrend = 1;
1035 	double xbak = NADBL;
1036 	guint32 dt = 0, dbak = 0;
1037 	int ed_err;
1038 
1039 	vtmp = malloc(vsz);
1040 	if (vtmp == NULL) {
1041 	    err = E_ALLOC;
1042 	    goto bailout;
1043 	}
1044 
1045 	s = 0;
1046 	for (j=0; j<n_units; j++) {
1047 	    for (t=0; t<oldT; t++) {
1048 		utmp[t] = dset->Z[i][s++];
1049 		if (dtrend) {
1050 		    dt = epoch_day_from_ymd_basic(utmp[t]);
1051 		}
1052 		if (t == 0) {
1053 		    xbak = utmp[t];
1054 		    dbak = dt;
1055 		} else {
1056 		    if (uconst && (utmp[t] != xbak)) {
1057 			uconst = 0;
1058 		    }
1059 		    if (utrend && (utmp[t] != xbak + 1)) {
1060 			utrend = 0;
1061 		    }
1062 		    if (dtrend && (dt != dbak + 1)) {
1063 			dtrend = 0;
1064 		    }
1065 		}
1066 		xbak = utmp[t];
1067 		dbak = dt;
1068 	    }
1069 	    for (t=oldT; t<newT; t++) {
1070 		if (i == 0) {
1071 		    utmp[t] = 1.0;
1072 		} else if (uconst) {
1073 		    utmp[t] = utmp[t-1];
1074 		} else if (utrend) {
1075 		    utmp[t] = utmp[t-1] + 1;
1076 		} else if (dtrend) {
1077 		    dt = epoch_day_from_ymd_basic(utmp[t-1]);
1078 		    utmp[t] = ymd_basic_from_epoch_day(dt+1, 0, &ed_err);
1079 		} else {
1080 		    utmp[t] = NADBL;
1081 		}
1082 	    }
1083 	    memcpy(vtmp + j*newT, utmp, usz);
1084 	}
1085 	free(dset->Z[i]);
1086 	dset->Z[i] = vtmp;
1087     }
1088 
1089     if (S != NULL) {
1090 	int k = 0;
1091 
1092 	s = 0;
1093 	for (j=0; j<n_units; j++) {
1094 	    for (t=0; t<newT; t++) {
1095 		if (t < oldT) {
1096 		    strcpy(S[k], dset->S[s++]);
1097 		} else {
1098 		    sprintf(S[k], "%d:%d", j+1, t+1);
1099 		}
1100 		k++;
1101 	    }
1102 	}
1103 	strings_array_free(dset->S, oldn);
1104 	dset->S = S;
1105 	S = NULL;
1106     }
1107 
1108     if (dset->t2 == dset->n - 1) {
1109 	dset->t2 = bign - 1;
1110     }
1111 
1112     dataset_set_nobs(dset, bign);
1113     dset->pd = newT;
1114     ntolabel(dset->endobs, bign - 1, dset);
1115 
1116  bailout:
1117 
1118     free(utmp);
1119     if (S != NULL) {
1120 	strings_array_free(S, bign);
1121     }
1122 
1123     return err;
1124 }
1125 
1126 /**
1127  * dataset_add_observations:
1128  * @dset: pointer to dataset.
1129  * @n: number of observations to add.
1130  * @opt: use OPT_A to attempt to recognize and
1131  * automatically extend simple deterministic variables such
1132  * as a time trend and periodic dummy variables;
1133  * use OPT_D to drop any observation markers rather than
1134  * expanding the set of markers and padding it out with
1135  * dummy values; use OPT_T to extend in the time dimension
1136  * in the case of panel data.
1137  *
1138  * Extends all series in the dataset by the specified number of
1139  * extra observations.  The added values are initialized to
1140  * the missing value code, #NADBL, with the exception of
1141  * simple deterministic variables when OPT_A is given.
1142  *
1143  * Returns: 0 on success, non-zero code on error.
1144  */
1145 
dataset_add_observations(DATASET * dset,int n,gretlopt opt)1146 int dataset_add_observations (DATASET *dset, int n, gretlopt opt)
1147 {
1148     if (opt & OPT_T) {
1149 	return panel_dataset_extend_time(dset, n);
1150     } else {
1151 	return real_dataset_add_observations(dset, n, opt);
1152     }
1153 }
1154 
real_insert_observation(int pos,DATASET * dset)1155 static int real_insert_observation (int pos, DATASET *dset)
1156 {
1157     double *x;
1158     int n = dset->n + 1;
1159     int i, t;
1160     int err = 0;
1161 
1162     for (i=0; i<dset->v; i++) {
1163 	x = realloc(dset->Z[i], n * sizeof *x);
1164 	if (x == NULL) {
1165 	    return E_ALLOC;
1166 	}
1167 	dset->Z[i] = x;
1168 	for (t=dset->n; t>pos; t--) {
1169 	    dset->Z[i][t] = dset->Z[i][t-1];
1170 	}
1171 	dset->Z[i][pos] = (i == 0)? 1.0 : NADBL;
1172     }
1173 
1174     if (dataset_has_markers(dset)) {
1175 	if (reallocate_markers(dset, n)) {
1176 	    return E_ALLOC;
1177 	}
1178 	for (t=dset->n; t>pos; t--) {
1179 	    strcpy(dset->S[t], dset->S[t-1]);
1180 	}
1181 	sprintf(dset->S[pos], "%d", pos + 1);
1182     }
1183 
1184     if (dset->t2 == dset->n - 1) {
1185 	dset->t2 = n - 1;
1186     }
1187 
1188     dataset_set_nobs(dset, n);
1189     ntolabel(dset->endobs, n - 1, dset);
1190 
1191     return err;
1192 }
1193 
1194 /**
1195  * dataset_drop_observations:
1196  * @dset: pointer to dataset.
1197  * @n: number of observations to drop.
1198  *
1199  * Deletes @n observations from the end of each series in the
1200  * dataset.
1201  *
1202  * Returns: 0 on success, non-zero code on error.
1203  */
1204 
dataset_drop_observations(DATASET * dset,int n)1205 int dataset_drop_observations (DATASET *dset, int n)
1206 {
1207     double *x;
1208     int i, newn;
1209 
1210     if (dset_zcols_borrowed(dset)) {
1211 	fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1212 	return E_DATA;
1213     }
1214 
1215     if (n <= 0) {
1216 	return 0;
1217     }
1218 
1219     if (dataset_is_panel(dset) && n % dset->pd != 0) {
1220 	return E_PDWRONG;
1221     }
1222 
1223     newn = dset->n - n;
1224 
1225     if (newn == 0) {
1226 	free_Z(dset);
1227 	clear_datainfo(dset, CLEAR_FULL);
1228 	return 0;
1229     }
1230 
1231     for (i=0; i<dset->v; i++) {
1232 	x = realloc(dset->Z[i], newn * sizeof *x);
1233 	if (x == NULL) {
1234 	    return E_ALLOC;
1235 	}
1236 	dset->Z[i] = x;
1237     }
1238 
1239     if (dataset_has_markers(dset)) {
1240 	if (reallocate_markers(dset, newn)) {
1241 	    return E_ALLOC;
1242 	}
1243     }
1244 
1245     if (dset->t2 > newn - 1) {
1246 	dset->t2 = newn - 1;
1247     }
1248 
1249     dataset_set_nobs(dset, newn);
1250 
1251     /* does daily data need special handling? */
1252     ntolabel(dset->endobs, newn - 1, dset);
1253 
1254     return 0;
1255 }
1256 
1257 /**
1258  * dataset_shrink_obs_range:
1259  * @dset: pointer to dataset.
1260  *
1261  * Truncates the range of observations in the dataset, based on
1262  * the current values of the t1 and t2 members of @dset.
1263  *
1264  * Returns: 0 on success, non-zero code on error.
1265  */
1266 
dataset_shrink_obs_range(DATASET * dset)1267 int dataset_shrink_obs_range (DATASET *dset)
1268 {
1269     int offset = dset->t1;
1270     int newn = dset->t2 - dset->t1 + 1;
1271     int tail = dset->n - newn;
1272     int err = 0;
1273 
1274     if (dset_zcols_borrowed(dset)) {
1275 	fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1276 	return E_DATA;
1277     }
1278 
1279     if (offset > 0) {
1280 	/* If the revised dataset starts at an offset into
1281 	   the original, shift each series back to the start of
1282 	   its Z[i] array.
1283 	*/
1284 	int i, mvsize;
1285 
1286 	mvsize = newn * sizeof **dset->Z;
1287 	for (i=0; i<dset->v; i++) {
1288 	    memmove(dset->Z[i], dset->Z[i] + offset, mvsize);
1289 	}
1290 
1291 	if (dataset_has_markers(dset)) {
1292 	    for (i=0; i<offset; i++) {
1293 		free(dset->S[i]);
1294 	    }
1295 	    mvsize = newn * sizeof *dset->S;
1296 	    memmove(dset->S, dset->S + offset, mvsize);
1297 	}
1298 
1299 	if (dset->structure == CROSS_SECTION) {
1300 	    ntolabel(dset->stobs, 0, dset);
1301 	} else {
1302 	    /* FIXME panel? */
1303 	    ntolabel(dset->stobs, dset->t1, dset);
1304 	    dset->sd0 = get_date_x(dset->pd, dset->stobs);
1305 	}
1306 
1307 	dset->t1 = 0;
1308     }
1309 
1310     err = dataset_drop_observations(dset, tail);
1311 
1312     return err;
1313 }
1314 
1315 static int
dataset_expand_varinfo(int v0,int newvars,DATASET * dset)1316 dataset_expand_varinfo (int v0, int newvars, DATASET *dset)
1317 {
1318     char **varname = NULL;
1319     VARINFO **varinfo = NULL;
1320     int bigv = v0 + newvars;
1321     int i, v, err = 0;
1322 
1323     varname = realloc(dset->varname, bigv * sizeof *varname);
1324     if (varname == NULL) {
1325 	err = E_ALLOC;
1326     } else {
1327 	dset->varname = varname;
1328     }
1329 
1330     for (i=0; i<newvars && !err; i++) {
1331 	v = v0 + i;
1332 	dset->varname[v] = malloc(VNAMELEN);
1333 	if (dset->varname[v] == NULL) {
1334 	    err = E_ALLOC;
1335 	} else {
1336 	    dset->varname[v][0] = '\0';
1337 	}
1338     }
1339 
1340     if (!err && dset->varinfo != NULL) {
1341 	varinfo = realloc(dset->varinfo, bigv * sizeof *varinfo);
1342 	if (varinfo == NULL) {
1343 	    err = E_ALLOC;
1344 	} else {
1345 	    dset->varinfo = varinfo;
1346 	}
1347 	for (i=0; i<newvars && !err; i++) {
1348 	    v = v0 + i;
1349 	    dset->varinfo[v] = malloc(sizeof **varinfo);
1350 	    if (dset->varinfo[v] == NULL) {
1351 		err = E_ALLOC;
1352 	    } else {
1353 		gretl_varinfo_init(dset->varinfo[v]);
1354 	    }
1355 	}
1356     }
1357 
1358     if (!err) {
1359 	sync_dataset_shared_members(dset);
1360     }
1361 
1362     return err;
1363 }
1364 
1365 /* note: values of series newly added here are left uninitialized:
1366    that is the responsibility of the caller
1367 */
1368 
real_add_series(int newvars,double * x,DATASET * dset)1369 static int real_add_series (int newvars, double *x,
1370 			    DATASET *dset)
1371 {
1372     double **newZ;
1373     int v0 = dset->v;
1374     int i, err = 0;
1375 
1376     if (newvars == 0) {
1377 	/* no-op */
1378 	return 0;
1379     }
1380 
1381     newZ = realloc(dset->Z, (v0 + newvars) * sizeof *newZ);
1382 
1383 #if DDEBUG
1384     fprintf(stderr, "real_add_series: add %d vars, Z = %p\n",
1385 	    newvars, (void *) newZ);
1386 #endif
1387 
1388     if (newZ == NULL) {
1389 	err = E_ALLOC;
1390     } else {
1391 	dset->Z = newZ;
1392     }
1393 
1394     if (!err) {
1395 	if (newvars == 1 && x != NULL) {
1396 	    /* a single new var, storage pre-allocated */
1397 	    newZ[v0] = x;
1398 	} else {
1399 	    for (i=0; i<newvars && !err; i++) {
1400 		newZ[v0+i] = malloc(dset->n * sizeof **newZ);
1401 		if (newZ[v0+i] == NULL) {
1402 		    err = E_ALLOC;
1403 		}
1404 	    }
1405 	}
1406     }
1407 
1408     if (!err && dset != fetch_full_dataset()) {
1409 	/* don't expand varinfo if we're adding a series
1410 	   to the full dataset when currently sub-sampled,
1411 	   since in that case varinfo is shared between
1412 	   the two datasets
1413 	*/
1414 	err = dataset_expand_varinfo(v0, newvars, dset);
1415     }
1416 
1417     if (!err) {
1418 	dset->v += newvars;
1419     }
1420 
1421     return err;
1422 }
1423 
1424 /**
1425  * dataset_add_series:
1426  * @dset: pointer to dataset.
1427  * @newvars: number of series to add.
1428  *
1429  * Adds space for the specified number of additional series
1430  * in the dataset. Values are initialized to zero.
1431  *
1432  * Returns: 0 on success, E_ALLOC on error.
1433  */
1434 
dataset_add_series(DATASET * dset,int newvars)1435 int dataset_add_series (DATASET *dset, int newvars)
1436 {
1437     int v0 = dset->v;
1438     int err;
1439 
1440     if (dset_zcols_borrowed(dset)) {
1441 	fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1442 	return E_DATA;
1443     }
1444 
1445     err = real_add_series(newvars, NULL, dset);
1446 
1447     if (!err) {
1448 	int i, v, t;
1449 
1450 	for (i=0; i<newvars; i++) {
1451 	    v = v0 + i;
1452 	    for (t=0; t<dset->n; t++) {
1453 		dset->Z[v][t] = 0.0;
1454 	    }
1455 	}
1456     }
1457 
1458     return err;
1459 }
1460 
1461 /**
1462  * dataset_add_NA_series:
1463  * @dset: pointer to dataset.
1464  * @newvars: number of series to add.
1465  *
1466  * Adds space for the specified number of additional series
1467  * in the dataset. Values are initialized to NA.
1468  *
1469  * Returns: 0 on success, E_ALLOC on error.
1470  */
1471 
dataset_add_NA_series(DATASET * dset,int newvars)1472 int dataset_add_NA_series (DATASET *dset, int newvars)
1473 {
1474     int v0 = dset->v;
1475     int err;
1476 
1477     if (dset_zcols_borrowed(dset)) {
1478 	fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1479 	return E_DATA;
1480     }
1481 
1482     err = real_add_series(newvars, NULL, dset);
1483 
1484     if (!err) {
1485 	int i, v, t;
1486 
1487 	for (i=0; i<newvars; i++) {
1488 	    v = v0 + i;
1489 	    for (t=0; t<dset->n; t++) {
1490 		dset->Z[v][t] = NADBL;
1491 	    }
1492 	}
1493     }
1494 
1495     return err;
1496 }
1497 
1498 /**
1499  * dataset_add_allocated_series:
1500  * @dset: pointer to dataset.
1501  * @x: one-dimensional data array.
1502  *
1503  * Adds @x as an additional series in the dataset.
1504  * The array @x is not copied; it should be treated as
1505  * belonging to @dset after this operation.
1506  *
1507  * Returns: 0 on success, E_ALLOC on error.
1508  */
1509 
dataset_add_allocated_series(DATASET * dset,double * x)1510 int dataset_add_allocated_series (DATASET *dset, double *x)
1511 {
1512     if (dset_zcols_borrowed(dset)) {
1513 	fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1514 	return E_DATA;
1515     } else {
1516 	return real_add_series(1, x, dset);
1517     }
1518 }
1519 
1520 /**
1521  * dataset_add_series_as:
1522  * @dset: pointer to dataset.
1523  * @x: array to be added.
1524  * @name: name to give the new variable.
1525  *
1526  * Adds to the dataset a new series with name @name and
1527  * values given by @x.  The new variable is added at one
1528  * level "deeper" (in terms of function execution) than the
1529  * current level.  This is for use with user-defined functions.
1530  *
1531  * Returns: 0 on success, E_ALLOC on error.
1532  */
1533 
dataset_add_series_as(DATASET * dset,double * x,const char * name)1534 int dataset_add_series_as (DATASET *dset, double *x, const char *name)
1535 {
1536     int v, t, err = 0;
1537 
1538     if (dset_zcols_borrowed(dset)) {
1539 	fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1540 	return E_DATA;
1541     }
1542 
1543     if (dset->varinfo == NULL) {
1544 	gretl_errmsg_set(_("Please open a data file first"));
1545 	return 1;
1546     }
1547 
1548 #if DDEBUG
1549     fprintf(stderr, "dataset_add_series_as: incoming Z=%p, name='%s'\n",
1550 	    (void *) dset->Z, name);
1551 #endif
1552 
1553     err = real_add_series(1, NULL, dset);
1554 
1555     if (!err) {
1556 	v = dset->v - 1;
1557 	for (t=0; t<dset->n; t++) {
1558 	    dset->Z[v][t] = x[t];
1559 	}
1560 	strcpy(dset->varname[v], name);
1561 	dset->varinfo[v]->stack_level += 1;
1562     }
1563 
1564     return err;
1565 }
1566 
1567 /**
1568  * dataset_copy_series_as:
1569  * @dset: pointer to dataset.
1570  * @v: index number of variable to copy.
1571  * @name: name to give the copy.
1572  *
1573  * Makes a copy of series @v under the name @name.
1574  * The copy exists in a variable namespace one level "deeper"
1575  * (in terms of function execution) than the variable being copied.
1576  * This is for use with user-defined functions: a variable
1577  * supplied to a function as an argument is copied into the
1578  * function's namespace under the name it was given as a
1579  * parameter.
1580  *
1581  * Returns: 0 on success, E_ALLOC on error.
1582  */
1583 
dataset_copy_series_as(DATASET * dset,int v,const char * name)1584 int dataset_copy_series_as (DATASET *dset, int v, const char *name)
1585 {
1586     int t, err;
1587 
1588     err = real_add_series(1, NULL, dset);
1589 
1590     if (!err) {
1591 	int vnew = dset->v - 1;
1592 
1593 	for (t=0; t<dset->n; t++) {
1594 	    dset->Z[vnew][t] = dset->Z[v][t];
1595 	}
1596 	strcpy(dset->varname[vnew], name);
1597 	copy_varinfo(dset->varinfo[vnew], dset->varinfo[v]);
1598 	if (dset->varinfo[v]->flags & VAR_LISTARG) {
1599 	    dset->varinfo[vnew]->flags &= ~VAR_LISTARG;
1600 	}
1601 	dset->varinfo[vnew]->stack_level = gretl_function_depth() + 1;
1602 #if 0
1603 	fprintf(stderr, "copied var %d ('%s', level %d) as var %d ('%s', level %d): ",
1604 		v, dset->varname[v], dset->varinfo[v]->stack_level,
1605 		vnew, name, dset->varinfo[vnew]->stack_level);
1606 	fprintf(stderr, "Z[%d][0] = %g\n", vnew, dset->Z[vnew][0]);
1607 #endif
1608     }
1609 
1610     return err;
1611 }
1612 
1613 enum {
1614     DROP_NORMAL,
1615     DROP_SPECIAL
1616 };
1617 
1618 /* DROP_SPECIAL is used when deleting variables from the "full" shadow
1619    of a sub-sampled dataset, after deleting those same variables from
1620    the sub-sampled version: in that case we don't mess with the
1621    pointer elements of the DATASET struct, because these are shared
1622    between the full and sub-sampled versions.
1623 */
1624 
shrink_dataset_to_size(DATASET * dset,int nv,int drop)1625 static int shrink_dataset_to_size (DATASET *dset, int nv, int drop)
1626 {
1627     double **newZ;
1628 
1629 #if DDEBUG
1630     fprintf(stderr, "shrink_dataset_to_size: dset at %p, dset->v=%d, nv=%d\n"
1631 	    " drop = %s\n", (void *) dset, dset->v, nv,
1632 	    (drop == DROP_NORMAL)? "DROP_NORMAL" : "DROP_SPECIAL");
1633 #endif
1634 
1635     if (drop == DROP_NORMAL) {
1636 	char **varname = realloc(dset->varname, nv * sizeof *varname);
1637 	VARINFO **varinfo = realloc(dset->varinfo, nv * sizeof *varinfo);
1638 
1639 	if (varname == NULL || varinfo == NULL) {
1640 	    free(varname);
1641 	    free(varinfo);
1642 	    return E_ALLOC;
1643 	}
1644 
1645 	dset->varname = varname;
1646 	dset->varinfo = varinfo;
1647 	sync_dataset_shared_members(dset);
1648     }
1649 
1650     newZ = realloc(dset->Z, nv * sizeof *newZ);
1651     if (newZ == NULL) {
1652 	return E_ALLOC;
1653     }
1654 
1655     dset->Z = newZ;
1656     dset->v = nv;
1657 
1658     return 0;
1659 }
1660 
vars_renumbered(const int * list,DATASET * dset,int dmin)1661 static int vars_renumbered (const int *list, DATASET *dset,
1662 			    int dmin)
1663 {
1664     int i, ndel = 0;
1665 
1666     for (i=dmin; i<dset->v; i++) {
1667 	if (in_gretl_list(list, i)) {
1668 	    ndel++;
1669 	} else if (ndel > 0 && !series_is_hidden(dset, i)) {
1670 	    return 1;
1671 	}
1672     }
1673 
1674     return 0;
1675 }
1676 
overwrite_err(const char * name)1677 int overwrite_err (const char *name)
1678 {
1679     gretl_errmsg_sprintf(_("The variable %s is read-only"), name);
1680 
1681     return E_DATA;
1682 }
1683 
1684 /**
1685  * series_is_parent:
1686  * @dset: dataset information.
1687  * @v: ID number of series to test.
1688  *
1689  * Returns: 1 if variable @v is "parent" to a transformed
1690  * variable (e.g. a log, lag or difference), othewise 0.
1691  */
1692 
series_is_parent(const DATASET * dset,int v)1693 int series_is_parent (const DATASET *dset, int v)
1694 {
1695     const char *s = dset->varname[v];
1696     int i;
1697 
1698     if (*s == '\0') {
1699 	return 0;
1700     }
1701 
1702     for (i=1; i<dset->v; i++) {
1703 	if (i != v && !strcmp(s, dset->varinfo[i]->parent)) {
1704 	    return 1;
1705 	}
1706     }
1707 
1708     return 0;
1709 }
1710 
1711 /**
1712  * dataset_rename_series:
1713  * @dset: dataset information.
1714  * @v: ID number of the series to be renamed.
1715  * @name: new name to give the series.
1716  *
1717  * Returns: 0 on success, non-zero on error.
1718  */
1719 
dataset_rename_series(DATASET * dset,int v,const char * name)1720 int dataset_rename_series (DATASET *dset, int v, const char *name)
1721 {
1722     int err = 0;
1723 
1724     if (v <= 0 || v >= dset->v || name == NULL) {
1725 	err = E_DATA;
1726     } else {
1727 	err = check_varname(name);
1728     }
1729 
1730     if (!err) {
1731 	GretlType type;
1732 
1733 	type = user_var_get_type_by_name(name);
1734 	if (type != GRETL_TYPE_NONE) {
1735 	    gretl_errmsg_set("There is already an object of this name");
1736 	    err = E_DATA;
1737 	}
1738     }
1739 
1740     if (!err && current_series_index(dset, name) >= 0) {
1741 	gretl_errmsg_set("There is already a series of this name");
1742 	err = E_DATA;
1743     }
1744 
1745     if (!err && (object_is_const(dset->varname[v], v) ||
1746 		 series_is_parent(dset, v))) {
1747 	err = overwrite_err(dset->varname[v]);
1748     }
1749 
1750     if (!err && strcmp(dset->varname[v], name)) {
1751 	dset->varname[v][0] = '\0';
1752 	strncat(dset->varname[v], name, VNAMELEN-1);
1753 	set_dataset_is_changed(dset, 1);
1754     }
1755 
1756     return err;
1757 }
1758 
1759 /**
1760  * dataset_replace_series:
1761  * @dset: pointer to dataset.
1762  * @v: ID number of the series to be replaced.
1763  * @x: replacement values.
1764  * @descrip: replacement description.
1765  * @flag: if = DS_GRAB_VALUES then replace dset->Z[@v]
1766  * with @x, otherwise copy the values in @x to dset->Z[@v].
1767  *
1768  * Replaces the description and numerical content of
1769  * series @v with the information provided.
1770  *
1771  * Returns: 0 on success, non-zero on error.
1772  */
1773 
dataset_replace_series(DATASET * dset,int v,double * x,const char * descrip,DataCopyFlag flag)1774 int dataset_replace_series (DATASET *dset, int v,
1775 			    double *x, const char *descrip,
1776 			    DataCopyFlag flag)
1777 {
1778     if (v < 0 || v >= dset->v) {
1779 	/* out of bounds */
1780 	return E_DATA;
1781     }
1782 
1783     if (object_is_const(dset->varname[v], v) ||
1784 	series_is_parent(dset, v)) {
1785 	return overwrite_err(dset->varname[v]);
1786     }
1787 
1788     gretl_varinfo_init(dset->varinfo[v]);
1789     series_set_label(dset, v, descrip);
1790 
1791     if (flag == DS_GRAB_VALUES) {
1792 	free(dset->Z[v]);
1793 	dset->Z[v] = x;
1794     } else {
1795 	int t;
1796 
1797 	for (t=0; t<dset->n; t++) {
1798 	    dset->Z[v][t] = x[t];
1799 	}
1800     }
1801 
1802     set_dataset_is_changed(dset, 1);
1803 
1804     return 0;
1805 }
1806 
1807 /**
1808  * dataset_replace_series_data:
1809  * @dset: pointer to dataset.
1810  * @v: ID number of the series to be modified.
1811  * @x: replacement values.
1812  * @t1: start of sample range.
1813  * @t2: end of sample range.
1814  * @descrip: replacement description.
1815  *
1816  * Replaces the description and numerical content of
1817  * series @v over the given sample range, with the
1818  * information provided.
1819  *
1820  * Returns: 0 on success, non-zero on error.
1821  */
1822 
dataset_replace_series_data(DATASET * dset,int v,const double * x,int t1,int t2,const char * descrip)1823 int dataset_replace_series_data (DATASET *dset, int v,
1824 				 const double *x,
1825 				 int t1, int t2,
1826 				 const char *descrip)
1827 {
1828     int t, s;
1829 
1830     if (v < 0 || v >= dset->v) {
1831 	/* out of bounds */
1832 	return E_DATA;
1833     }
1834 
1835     if (object_is_const(dset->varname[v], v) ||
1836 	series_is_parent(dset, v)) {
1837 	return overwrite_err(dset->varname[v]);
1838     }
1839 
1840     gretl_varinfo_init(dset->varinfo[v]);
1841     series_set_label(dset, v, descrip);
1842 
1843     s = 0;
1844     for (t=t1; t<=t2; t++) {
1845 	dset->Z[v][t] = x[s++];
1846     }
1847 
1848     set_dataset_is_changed(dset, 1);
1849 
1850     return 0;
1851 }
1852 
real_drop_listed_vars(int * list,DATASET * dset,int * renumber,int drop,PRN * prn)1853 static int real_drop_listed_vars (int *list, DATASET *dset,
1854 				  int *renumber, int drop,
1855 				  PRN *prn)
1856 {
1857     int oldv = dset->v, vmax = dset->v;
1858     char vname[VNAMELEN] = {0};
1859     int d0, d1;
1860     int delmin = oldv;
1861     int i, v, ndel = 0;
1862     int err = 0;
1863 
1864     if (renumber != NULL) {
1865 	*renumber = 0;
1866     }
1867 
1868     if (list == NULL || list[0] == 0) {
1869 	/* no-op */
1870 	return 0;
1871     }
1872 
1873     d0 = list[0];
1874 
1875     check_variable_deletion_list(list, dset);
1876     d1 = list[0];
1877     if (prn != NULL && d1 == 1) {
1878 	strcpy(vname, dset->varname[list[1]]);
1879     }
1880 
1881     if (d1 == 0) {
1882 	goto finish;
1883     }
1884 
1885 #if DDEBUG
1886     fprintf(stderr, "real_drop_listed_variables: ");
1887     if (d1 == 1) {
1888 	fprintf(stderr, "dropping var %d:\n", list[1]);
1889     } else {
1890 	fprintf(stderr, "dropping %d vars:\n", d1);
1891     }
1892     fprintf(stderr, "memo: dset=%p, dset->varname=%p\n",
1893 	    (void *) dset, (void *) dset->varname);
1894 #endif
1895 
1896     /* check that no vars to be deleted are marked "const", and do
1897        some preliminary accounting while we're at it
1898     */
1899     for (i=1; i<=list[0]; i++) {
1900 	v = list[i];
1901 	if (v > 0 && v < oldv) {
1902 	    if (object_is_const(dset->varname[v], v)) {
1903 		return overwrite_err(dset->varname[v]);
1904 	    }
1905 	    if (v < delmin) {
1906 		delmin = v;
1907 	    }
1908 	    ndel++;
1909 	}
1910     }
1911 
1912     if (ndel == 0) {
1913 	return 0;
1914     }
1915 
1916     if (renumber != NULL) {
1917 	*renumber = vars_renumbered(list, dset, delmin);
1918     }
1919 
1920 #if DDEBUG
1921     fprintf(stderr, "real_drop_listed_variables: lowest ID of deleted var"
1922 	    " = %d\n", delmin);
1923 #endif
1924 
1925     /* free and set to NULL all the vars to be deleted */
1926     for (i=1; i<=list[0]; i++) {
1927 	v = list[i];
1928 	if (v > 0 && v < oldv) {
1929 	    free(dset->Z[v]);
1930 	    dset->Z[v] = NULL;
1931 	    if (drop == DROP_NORMAL) {
1932 		free(dset->varname[v]);
1933 		free_varinfo(dset, v);
1934 	    }
1935 	}
1936     }
1937 
1938     /* repack pointers if necessary */
1939 
1940     for (v=1; v<vmax; v++) {
1941 	if (dset->Z[v] == NULL) {
1942 	    int gap = 1;
1943 
1944 	    for (i=v+1; i<vmax; i++) {
1945 		if (dset->Z[i] == NULL) {
1946 		    gap++;
1947 		} else {
1948 		    break;
1949 		}
1950 	    }
1951 
1952 	    if (i < vmax) {
1953 		vmax -= gap;
1954 		for (i=v; i<vmax; i++) {
1955 		    if (drop == DROP_NORMAL) {
1956 			dset->varname[i] = dset->varname[i + gap];
1957 			dset->varinfo[i] = dset->varinfo[i + gap];
1958 		    }
1959 		    dset->Z[i] = dset->Z[i + gap];
1960 		}
1961 	    } else {
1962 		/* deleting all subsequent vars: done */
1963 		break;
1964 	    }
1965 	}
1966     }
1967 
1968     err = shrink_dataset_to_size(dset, oldv - ndel, drop);
1969 
1970  finish:
1971 
1972     /* report results, if appropriate */
1973 
1974     if (!err && prn != NULL) {
1975 	if (d0 == d1) {
1976 	    if (gretl_messages_on()) {
1977 		if (*vname != '\0') {
1978 		    pprintf(prn, _("Deleted %s"), vname);
1979 		} else {
1980 		    pprintf(prn, _("Deleted %d variables"), d1);
1981 		}
1982 		pputc(prn, '\n');
1983 	    }
1984 	} else {
1985 	    if (d1 == 0) {
1986 		pputs(prn, _("No variables were deleted"));
1987 	    } else if (*vname != '\0') {
1988 		pprintf(prn, _("Deleted %s"), vname);
1989 	    } else {
1990 		pprintf(prn, _("Deleted %d variables"), d1);
1991 	    }
1992 	    pprintf(prn, " (%s)\n", _("some data were in use"));
1993 	}
1994     }
1995 
1996     return err;
1997 }
1998 
make_dollar_list(DATASET * dset,int * err)1999 static int *make_dollar_list (DATASET *dset, int *err)
2000 {
2001     int *list = NULL;
2002     int i;
2003 
2004     for (i=1; i<dset->v; i++) {
2005 	if (dset->varname[i][0] == '$') {
2006 	    list = gretl_list_append_term(&list, i);
2007 	    if (list == NULL) {
2008 		*err = E_ALLOC;
2009 		break;
2010 	    }
2011 	}
2012     }
2013 
2014     return list;
2015 }
2016 
2017 /**
2018  * dataset_drop_listed_variables:
2019  * @list: list of series to drop, by ID number.
2020  * @dset: pointer to dataset.
2021  * @renumber: location for return of information on whether
2022  * remaining variables have been renumbered as a result, or
2023  * NULL.
2024  * @prn: pointer to printing struct.
2025  *
2026  * Deletes the series given in @list from the dataset.  Remaining
2027  * series may have their ID numbers changed as a consequence. If
2028  * @renumber is not NULL, this location receives 1 in case series
2029  * have been renumbered, 0 otherwise.
2030  *
2031  * Returns: 0 on success, E_ALLOC on error.
2032  */
2033 
dataset_drop_listed_variables(int * list,DATASET * dset,int * renumber,PRN * prn)2034 int dataset_drop_listed_variables (int *list,
2035 				   DATASET *dset,
2036 				   int *renumber,
2037 				   PRN *prn)
2038 {
2039     int oldv = dset->v;
2040     int *dlist = NULL;
2041     int dupv, free_dlist = 0;
2042     int err = 0;
2043 
2044     if (dset->n == 0 || dset->v == 0) {
2045 	return E_NODATA;
2046     }
2047 
2048     if (dset_zcols_borrowed(dset)) {
2049 	fprintf(stderr, "*** Internal error: modifying borrowed data\n");
2050 	return E_DATA;
2051     }
2052 
2053     if (list == NULL) {
2054 	/* signal to drop internal "$" variables */
2055 	dlist = make_dollar_list(dset, &err);
2056 	if (err) {
2057 	    return err;
2058 	} else if (dlist == NULL) {
2059 	    /* no-op */
2060 	    return 0;
2061 	}
2062 	free_dlist = 1;
2063     } else if (list[0] == 0) {
2064 	/* no-op */
2065 	return 0;
2066     } else {
2067 	dlist = list;
2068     }
2069 
2070     dupv = gretl_list_duplicates(dlist, DELEET);
2071     if (dupv >= 0) {
2072 	gretl_errmsg_sprintf(_("variable %d duplicated in the "
2073 			       "command list."), dupv);
2074 	return E_DATA;
2075     }
2076 
2077     err = real_drop_listed_vars(dlist, dset, renumber,
2078 				DROP_NORMAL, prn);
2079 
2080     if (dlist[0] > 0) {
2081 	if (!err && !dset->auxiliary) {
2082 	    err = gretl_lists_revise(dlist, 0);
2083 	}
2084 	if (!err && complex_subsampled()) {
2085 	    DATASET *fdset = fetch_full_dataset();
2086 
2087 	    err = real_drop_listed_vars(dlist, fdset, NULL,
2088 					DROP_SPECIAL, NULL);
2089 	}
2090     }
2091 
2092     if (free_dlist) {
2093 	free(dlist);
2094     } else if (dset->v != oldv) {
2095 	set_dataset_is_changed(dset, 1);
2096     }
2097 
2098     return err;
2099 }
2100 
2101 /**
2102  * dataset_drop_variable:
2103  * @v: ID number of variable to drop.
2104  * @dset: pointer to dataset.
2105  *
2106  * Deletes variable @v from the dataset.
2107  *
2108  * Returns: 0 on success, E_ALLOC on error.
2109  */
2110 
dataset_drop_variable(int v,DATASET * dset)2111 int dataset_drop_variable (int v, DATASET *dset)
2112 {
2113     int list[2] = {1, v};
2114 
2115     if (v <= 0 || v >= dset->v) {
2116 	return E_DATA;
2117     }
2118 
2119     if (dset_zcols_borrowed(dset)) {
2120 	fprintf(stderr, "*** Internal error: modifying borrowed data\n");
2121 	return E_DATA;
2122     }
2123 
2124     return dataset_drop_listed_variables(list, dset, NULL, NULL);
2125 }
2126 
2127 /**
2128  * dataset_renumber_variable:
2129  * @v_old: original ID number of variable.
2130  * @v_new: new ID number.
2131  * @dset: dataset information.
2132  *
2133  * Moves the variable that was originally at position @v_old
2134  * in the datset to position @v_new, renumbering other
2135  * variables as required.
2136  *
2137  * Returns: 0 on success, error code on error;
2138  */
2139 
dataset_renumber_variable(int v_old,int v_new,DATASET * dset)2140 int dataset_renumber_variable (int v_old, int v_new,
2141 			       DATASET *dset)
2142 {
2143     double *x;
2144     VARINFO *vinfo;
2145     char vname[VNAMELEN];
2146     int i;
2147 
2148     if (complex_subsampled()) {
2149 	/* too tricky */
2150 	gretl_errmsg_set(_("dataset is subsampled"));
2151 	return E_DATA;
2152     }
2153 
2154     if (dset_zcols_borrowed(dset)) {
2155 	fprintf(stderr, "*** Internal error: modifying borrowed data\n");
2156 	return E_DATA;
2157     }
2158 
2159     if (v_old < 1 || v_old > dset->v - 1 ||
2160 	v_new < 1 || v_new > dset->v - 1) {
2161 	/* out of bounds */
2162 	return E_DATA;
2163     }
2164 
2165     if (v_old == v_new) {
2166 	/* no-op */
2167 	return 0;
2168     }
2169 
2170     x = dset->Z[v_old];
2171     vinfo = dset->varinfo[v_old];
2172     strcpy(vname, dset->varname[v_old]);
2173 
2174     if (v_new < v_old) {
2175 	/* moving up in ordering */
2176 	for (i=v_old; i>v_new; i--) {
2177 	    dset->Z[i] = dset->Z[i-1];
2178 	    strcpy(dset->varname[i], dset->varname[i-1]);
2179 	    dset->varinfo[i] = dset->varinfo[i-1];
2180 	}
2181     } else {
2182 	/* moving down in ordering */
2183 	for (i=v_old; i<v_new; i++) {
2184 	    dset->Z[i] = dset->Z[i+1];
2185 	    strcpy(dset->varname[i], dset->varname[i+1]);
2186 	    dset->varinfo[i] = dset->varinfo[i+1];
2187 	}
2188     }
2189 
2190     dset->Z[v_new] = x;
2191     strcpy(dset->varname[v_new], vname);
2192     dset->varinfo[v_new] = vinfo;
2193 
2194     set_dataset_is_changed(dset, 1);
2195 
2196     return 0;
2197 }
2198 
2199 /**
2200  * dataset_destroy_hidden_variables:
2201  * @dset: pointer to dataset.
2202  * @vmin: do not drop variables with ID numbers less than this.
2203  *
2204  * Deletes from the dataset any "hidden" variables that have
2205  * been added automatically (for example, auto-generated variables
2206  * used for the x-axis in graph plotting), and that have ID
2207  * numbers greater than or equal to @vmin.  Never deletes the
2208  * automatically generated constant (ID number 0).
2209  *
2210  * Returns: 0 on success, E_ALLOC on error.
2211  */
2212 
dataset_destroy_hidden_variables(DATASET * dset,int vmin)2213 int dataset_destroy_hidden_variables (DATASET *dset, int vmin)
2214 {
2215     int i, nhid = 0;
2216     int err = 0;
2217 
2218     if (vmin <= 1) vmin = 1;
2219 
2220     for (i=vmin; i<dset->v; i++) {
2221 	if (series_is_hidden(dset, i)) {
2222 	    nhid++;
2223 	}
2224     }
2225 
2226     if (nhid > 0) {
2227 	int *list = gretl_list_new(nhid);
2228 
2229 	if (list == NULL) {
2230 	    err = 1;
2231 	} else {
2232 	    int j = 1;
2233 
2234 	    for (i=vmin; i<dset->v; i++) {
2235 		if (series_is_hidden(dset, i)) {
2236 		    list[j++] = i;
2237 		}
2238 	    }
2239 	    err = dataset_drop_listed_variables(list, dset, NULL, NULL);
2240 	    free(list);
2241 	}
2242     }
2243 
2244     return err;
2245 }
2246 
dataset_set_matrix_name(DATASET * dset,const char * name)2247 int dataset_set_matrix_name (DATASET *dset, const char *name)
2248 {
2249     int err = 0;
2250 
2251     if (dset->descrip != NULL) {
2252 	free(dset->descrip);
2253 	dset->descrip = NULL;
2254     }
2255 
2256     if (name != NULL && *name != '\0') {
2257 	dset->descrip = malloc(strlen(name) + 8);
2258 	if (dset->descrip == NULL) {
2259 	    err = E_ALLOC;
2260 	} else {
2261 	    sprintf(dset->descrip, "matrix:%s", name);
2262 	}
2263     }
2264 
2265     return err;
2266 }
2267 
dataset_get_matrix_name(const DATASET * dset)2268 const char *dataset_get_matrix_name (const DATASET *dset)
2269 {
2270     if (dset->descrip != NULL &&
2271 	strlen(dset->descrip) > 7 &&
2272 	!strncmp(dset->descrip, "matrix:", 7)) {
2273 	return dset->descrip + 7;
2274     } else {
2275 	return NULL;
2276     }
2277 }
2278 
dataset_get_mapfile(const DATASET * dset)2279 const char *dataset_get_mapfile (const DATASET *dset)
2280 {
2281     return dset == NULL ? NULL : dset->mapfile;
2282 }
2283 
dataset_set_mapfile(DATASET * dset,const char * fname)2284 void dataset_set_mapfile (DATASET *dset, const char *fname)
2285 {
2286     if (dset != NULL) {
2287 	free(dset->mapfile);
2288 	if (fname != NULL) {
2289 	    dset->mapfile = gretl_strdup(fname);
2290 	} else {
2291 	    dset->mapfile = NULL;
2292 	}
2293     }
2294 }
2295 
dataset_period_label(const DATASET * dset)2296 const char *dataset_period_label (const DATASET *dset)
2297 {
2298     if (dset == NULL) {
2299 	return _("periods");
2300     } else if (quarterly_or_monthly(dset)) {
2301 	return dset->pd == 4 ? _("quarters") : _("months");
2302     } else if (annual_data(dset)) {
2303 	return _("years");
2304     } else if (dataset_is_weekly(dset)) {
2305 	return _("weeks");
2306     } else if (dataset_is_daily(dset)) {
2307 	return _("days");
2308     } else if (dataset_is_hourly(dset)) {
2309 	return _("hours");
2310     } else {
2311 	return _("periods");
2312     }
2313 }
2314 
2315 /* intended for use with newly imported data: trash any
2316    series that contain nothing but NAs
2317 */
2318 
maybe_prune_dataset(DATASET ** pdset,gretl_string_table * st)2319 int maybe_prune_dataset (DATASET **pdset, gretl_string_table *st)
2320 {
2321     DATASET *dset = *pdset;
2322     int allmiss, prune = 0, err = 0;
2323     int i, t;
2324 
2325     for (i=1; i<dset->v; i++) {
2326 	allmiss = 1;
2327 	for (t=0; t<dset->n; t++) {
2328 	    if (!na(dset->Z[i][t])) {
2329 		allmiss = 0;
2330 		break;
2331 	    }
2332 	}
2333 	if (allmiss) {
2334 	    prune = 1;
2335 	    break;
2336 	}
2337     }
2338 
2339     if (prune) {
2340 	char *mask = calloc(dset->v, 1);
2341 	DATASET *newset = NULL;
2342 	int ndrop = 0;
2343 
2344 	if (mask == NULL) {
2345 	    return E_ALLOC;
2346 	}
2347 
2348 	for (i=1; i<dset->v; i++) {
2349 	    allmiss = 1;
2350 	    for (t=0; t<dset->n; t++) {
2351 		if (!na(dset->Z[i][t])) {
2352 		    allmiss = 0;
2353 		    break;
2354 		}
2355 	    }
2356 	    if (allmiss) {
2357 		mask[i] = 1;
2358 		ndrop++;
2359 	    }
2360 	}
2361 
2362 	newset = datainfo_new();
2363 	if (newset == NULL) {
2364 	    err = E_ALLOC;
2365 	} else {
2366 	    newset->v = dset->v - ndrop;
2367 	    newset->n = dset->n;
2368 	    err = start_new_Z(newset, 0);
2369 	}
2370 
2371 	if (!err) {
2372 	    size_t ssize = dset->n * sizeof **newset->Z;
2373 	    int k = 1;
2374 
2375 	    for (i=1; i<dset->v; i++) {
2376 		if (!mask[i]) {
2377 		    memcpy(newset->Z[k], dset->Z[i], ssize);
2378 		    strcpy(newset->varname[k], dset->varname[i]);
2379 		    copy_label(newset->varinfo[k], dset->varinfo[i]->label);
2380 		    if (st != NULL && k < i) {
2381 			gretl_string_table_reset_column_id(st, i, k);
2382 		    }
2383 		    k++;
2384 		}
2385 	    }
2386 
2387 	    destroy_dataset(dset);
2388 	    *pdset = newset;
2389 
2390 	    fprintf(stderr, "pruned dataset to %d variables\n", newset->v);
2391 	}
2392 
2393 	free(mask);
2394     }
2395 
2396     return err;
2397 }
2398 
2399 /* apparatus for sorting entire dataset */
2400 
2401 typedef struct spoints_t_ spoints_t;
2402 typedef struct spoint_t_ spoint_t;
2403 
2404 /* these structs will be passed to qsort() */
2405 struct spoint_t_ {
2406     int obsnum;
2407     int nvals;
2408     double *vals;
2409 };
2410 
2411 /* wrapper for spoint_t to economize on malloc/free */
2412 struct spoints_t_ {
2413     int n_points;
2414     int n_vals;
2415     spoint_t *points;
2416     double *val;
2417 };
2418 
free_spoints(spoints_t * sv)2419 static void free_spoints (spoints_t *sv)
2420 {
2421     free(sv->points);
2422     if (sv->n_vals > 1) {
2423 	/* if n_vals == 1, @val is borrowed */
2424 	free(sv->val);
2425     }
2426     free(sv);
2427 }
2428 
allocate_spoints(DATASET * dset,const int * list)2429 static spoints_t *allocate_spoints (DATASET *dset,
2430 				    const int *list)
2431 {
2432     spoints_t *sv;
2433     int n = dset->n;
2434     int v = list[0];
2435 
2436     sv = malloc(sizeof *sv);
2437 
2438     if (sv != NULL) {
2439 	sv->n_vals = v;
2440 	sv->n_points = n;
2441 	sv->points = malloc(n * sizeof *sv->points);
2442 	if (v == 1) {
2443 	    /* can point directly at data array */
2444 	    sv->val = dset->Z[list[1]];
2445 	} else {
2446 	    /* need to allocate storage */
2447 	    sv->val = malloc((n * v) * sizeof(double));
2448 	}
2449 	if (sv->points == NULL || sv->val == NULL) {
2450 	    free_spoints(sv);
2451 	    sv = NULL;
2452 	} else {
2453 	    double *x = sv->val;
2454 	    int i;
2455 
2456 	    for (i=0; i<n; i++) {
2457 		sv->points[i].obsnum = i;
2458 		sv->points[i].nvals = v;
2459 		sv->points[i].vals = x;
2460 		x += v;
2461 	    }
2462 	}
2463     }
2464 
2465     return sv;
2466 }
2467 
compare_vals_up(const void * a,const void * b)2468 static int compare_vals_up (const void *a, const void *b)
2469 {
2470     const spoint_t *pa = (const spoint_t *) a;
2471     const spoint_t *pb = (const spoint_t *) b;
2472     int i, naa, nab, ret = 0;
2473 
2474     for (i=0; i<pa->nvals && !ret; i++) {
2475 	naa = isnan(pa->vals[i]);
2476 	nab = isnan(pb->vals[i]);
2477 	if (naa || nab) {
2478 	    if (!naa) {
2479 		ret = -1;
2480 	    } else if (!nab) {
2481 		ret = 1;
2482 	    }
2483 	} else {
2484 	    ret = (pa->vals[i] > pb->vals[i]) - (pa->vals[i] < pb->vals[i]);
2485 	}
2486     }
2487 
2488     return ret;
2489 }
2490 
compare_vals_down(const void * a,const void * b)2491 static int compare_vals_down (const void *a, const void *b)
2492 {
2493     const spoint_t *pa = (const spoint_t *) a;
2494     const spoint_t *pb = (const spoint_t *) b;
2495     int i, naa, nab, ret = 0;
2496 
2497     for (i=0; i<pa->nvals && !ret; i++) {
2498 	naa = isnan(pa->vals[i]);
2499 	nab = isnan(pb->vals[i]);
2500 	if (naa || nab) {
2501 	    if (!naa) {
2502 		ret = 1;
2503 	    } else if (!nab) {
2504 		ret = -1;
2505 	    }
2506 	} else {
2507 	    ret = (pa->vals[i] < pb->vals[i]) - (pa->vals[i] > pb->vals[i]);
2508 	}
2509     }
2510 
2511     return ret;
2512 }
2513 
2514 /* Turn a string-valued series into an integer-valued series
2515    representing the places of the strings in lexical order.
2516 */
2517 
2518 typedef struct lexval_ {
2519     const char *s;
2520     int code;
2521 } lexval;
2522 
compare_lexvals(const void * a,const void * b)2523 static int compare_lexvals (const void *a, const void *b)
2524 {
2525     const lexval *lva = (const lexval *) a;
2526     const lexval *lvb = (const lexval *) b;
2527 
2528     return g_utf8_collate(lva->s, lvb->s);
2529 }
2530 
series_to_lexvals(DATASET * dset,int v,int * targ)2531 static int series_to_lexvals (DATASET *dset, int v, int *targ)
2532 {
2533     int i, t, ct, n_strs;
2534     series_table *st = series_get_string_table(dset, v);
2535     char **strs = series_table_get_strings(st, &n_strs);
2536     lexval *lexvals;
2537 
2538     lexvals = calloc(n_strs, sizeof *lexvals);
2539     if (lexvals == NULL) {
2540 	return E_ALLOC;
2541     }
2542 
2543     for (i=0; i<n_strs; i++) {
2544 	lexvals[i].s = strs[i];
2545 	lexvals[i].code = i+1;
2546     }
2547 
2548     qsort(lexvals, n_strs, sizeof *lexvals, compare_lexvals);
2549 
2550     for (t=0; t<dset->n; t++) {
2551 	if (na(dset->Z[v][t])) {
2552 	    targ[t] = INT_MAX;
2553 	} else {
2554 	    ct = (int) dset->Z[v][t];
2555 	    targ[t] = 0;
2556 	    for (i=0; i<n_strs; i++) {
2557 		if (ct == lexvals[i].code) {
2558 		    targ[t] = i+1;
2559 		    break;
2560 		}
2561 	    }
2562 	}
2563     }
2564 
2565     free(lexvals);
2566 
2567     return 0;
2568 }
2569 
dataset_sort_by(DATASET * dset,const int * list,gretlopt opt)2570 int dataset_sort_by (DATASET *dset, const int *list, gretlopt opt)
2571 {
2572     spoints_t *sv = NULL;
2573     double *x = NULL;
2574     char **S = NULL;
2575     int *xs = NULL;
2576     int *xsi = NULL;
2577     int ns = list[0];
2578     int nsvals = 0;
2579     int i, t, v;
2580     int err = 0;
2581 
2582     sv = allocate_spoints(dset, list);
2583     if (sv == NULL) {
2584 	return E_ALLOC;
2585     }
2586 
2587     x = malloc(dset->n * sizeof *x);
2588     if (x == NULL) {
2589 	free_spoints(sv);
2590 	return E_ALLOC;
2591     }
2592 
2593     if (dset->S != NULL) {
2594 	S = malloc(dset->n * sizeof *S);
2595 	if (S == NULL) {
2596 	    free_spoints(sv);
2597 	    free(x);
2598 	    return E_ALLOC;
2599 	}
2600     }
2601 
2602     for (i=0; i<ns; i++) {
2603 	if (is_string_valued(dset, list[i+1])) {
2604 	    nsvals++;
2605 	}
2606     }
2607     if (nsvals > 0) {
2608 	xs = malloc(nsvals * dset->n * sizeof *xs);
2609 	if (xs == NULL) {
2610 	    err = E_ALLOC;
2611 	} else {
2612 	    xsi = xs;
2613 	    for (i=0; i<ns && !err; i++) {
2614 		v = list[i+1];
2615 		if (is_string_valued(dset, v)) {
2616 		    err = series_to_lexvals(dset, v, xsi);
2617 		    xsi += dset->n;
2618 		}
2619 	    }
2620 	}
2621 	if (err) {
2622 	    goto bailout;
2623 	}
2624     }
2625 
2626     xsi = xs;
2627     for (i=0; i<ns; i++) {
2628 	v = list[i+1];
2629 	if (is_string_valued(dset, v)) {
2630 	    for (t=0; t<dset->n; t++) {
2631 		if (xsi[t] == INT_MAX) {
2632 		    sv->points[t].vals[i] = NADBL;
2633 		} else {
2634 		    sv->points[t].vals[i] = (double) xsi[t];
2635 		}
2636 	    }
2637 	    xsi += dset->n;
2638 	} else if (ns > 1) {
2639 	    for (t=0; t<dset->n; t++) {
2640 		sv->points[t].vals[i] = dset->Z[v][t];
2641 	    }
2642 	}
2643     }
2644 
2645     if (opt & OPT_D) {
2646 	/* descending */
2647 	qsort(sv->points, dset->n, sizeof(spoint_t), compare_vals_down);
2648     } else {
2649 	/* ascending */
2650 	qsort(sv->points, dset->n, sizeof(spoint_t), compare_vals_up);
2651     }
2652 
2653     /* reorder data values */
2654     for (i=1; i<dset->v; i++) {
2655 	for (t=0; t<dset->n; t++) {
2656 	    x[t] = dset->Z[i][sv->points[t].obsnum];
2657 	}
2658 	memcpy(dset->Z[i], x, dset->n * sizeof *x);
2659     }
2660 
2661     if (S != NULL) {
2662 	/* reorder observation markers */
2663 	for (t=0; t<dset->n; t++) {
2664 	    S[t] = dset->S[sv->points[t].obsnum];
2665 	}
2666 	for (t=0; t<dset->n; t++) {
2667 	    dset->S[t] = S[t];
2668 	}
2669     }
2670 
2671  bailout:
2672 
2673     free_spoints(sv);
2674     free(xs);
2675     free(S);
2676     free(x);
2677 
2678     return err;
2679 }
2680 
dataset_sort(DATASET * dset,const int * list,gretlopt opt)2681 static int dataset_sort (DATASET *dset, const int *list,
2682 			 gretlopt opt)
2683 {
2684     if (dataset_is_time_series(dset) ||
2685 	dataset_is_panel(dset)) {
2686 	gretl_errmsg_set("You can only do this with undated data");
2687 	return E_DATA;
2688     }
2689 
2690     if (list == NULL || list[0] < 1) {
2691 	return E_DATA;
2692     }
2693 
2694     return dataset_sort_by(dset, list, opt);
2695 }
2696 
2697 /**
2698  * dataset_drop_last_variables:
2699  * @dset: pointer to dataset.
2700  * @delvars: number of variables to be dropped.
2701  *
2702  * Deletes from the dataset the number @delvars of variables
2703  * that were added most recently (that have the highest ID numbers).
2704  *
2705  * Returns: 0 on success, E_ALLOC on error.
2706  */
2707 
dataset_drop_last_variables(DATASET * dset,int delvars)2708 int dataset_drop_last_variables (DATASET *dset, int delvars)
2709 {
2710     int newv = dset->v - delvars;
2711     int i, err = 0;
2712 
2713     if (delvars <= 0) {
2714 	return 0;
2715     }
2716 
2717 #if FULLDEBUG
2718     fprintf(stderr, "*** dataset_drop_last_variables: dropping %d, newv = %d\n",
2719 	    delvars, newv);
2720 #endif
2721 
2722     if (newv < 1) {
2723 	fprintf(stderr, "dataset_drop_last_vars: dset->v = %d, delvars = %d "
2724 		" -> newv = %d\n (dset = %p)\n", dset->v, delvars,
2725 		newv, (void *) dset);
2726 	return E_DATA;
2727     }
2728 
2729 #if FULLDEBUG
2730     for (i=0; i<dset->v; i++) {
2731 	if (dset->Z[i] == NULL) {
2732 	    fprintf(stderr, "var %d (%s, level %d, val = NULL) %s\n",
2733 		    i, dset->varname[i], dset->varinfo[i]->stack_level,
2734 		    (i >= newv)? "deleting" : "");
2735 	} else {
2736 	    fprintf(stderr, "var %d (%s, level %d, val[0] = %g) %s\n",
2737 		    i, dset->varname[i], dset->varinfo[i]->stack_level,
2738 		    dset->Z[i][0], (i >= newv)? "deleting" : "");
2739 	}
2740     }
2741 #endif
2742 
2743 #if 0
2744     fprintf(stderr, "dataset_drop_last_variables: origv=%d, newv=%d\n",
2745 	    dset->v, newv);
2746     for (i=1; i<dset->v; i++) {
2747 	fprintf(stderr, "before: var[%d] = '%s'\n", i, dset->varname[i]);
2748     }
2749 #endif
2750 
2751     for (i=newv; i<dset->v; i++) {
2752 	free(dset->varname[i]);
2753 	free_varinfo(dset, i);
2754 	free(dset->Z[i]);
2755 	dset->Z[i] = NULL;
2756     }
2757 
2758     err = shrink_dataset_to_size(dset, newv, DROP_NORMAL);
2759 
2760 #if 0
2761     for (i=1; i<dset->v; i++) {
2762 	fprintf(stderr, "after: var[%d] = '%s'\n", i, dset->varname[i]);
2763     }
2764 #endif
2765 
2766     if (!err && !dset->auxiliary) {
2767 	err = gretl_lists_revise(NULL, newv);
2768     }
2769 
2770     if (!err && complex_subsampled()) {
2771 	DATASET *fset = fetch_full_dataset();
2772 
2773 	/*
2774 	   Context: we're deleting @delvars variables at the end of
2775 	   dset->Z, leaving @newv variables.  The dataset is currently
2776 	   subsampled.
2777 
2778 	   Question: should we delete any variables at the end of
2779 	   fset->Z to keep the two arrays in sync?
2780 
2781 	   If @newv < fset->v, this must mean that at least some of
2782 	   the extra vars we're deleting from the current sub-sampled
2783 	   Z have already been synced to the full Z, so we should do
2784 	   the deletion from full Z.
2785 	*/
2786 
2787 	if (newv < fset->v) {
2788 #if FULLDEBUG
2789 	    fprintf(stderr, "prior fset->v = %d: shrinking full Z to %d vars\n",
2790 		    fset->v, newv);
2791 #endif
2792 	    for (i=newv; i<fset->v; i++) {
2793 		free(fset->Z[i]);
2794 		fset->Z[i] = NULL;
2795 	    }
2796 	    err = shrink_dataset_to_size(fset, newv, DROP_SPECIAL);
2797 	}
2798     }
2799 
2800     return err;
2801 }
2802 
2803 /**
2804  * build_stacked_series:
2805  * @pstack: location for returning stacked series.
2806  * @list: list of series to be stacked.
2807  * @length: number of observations to use per input series.
2808  * @offset: offset at which to start drawing observations.
2809  * @dset: pointer to dataset.
2810  *
2811  * Really for internal use. Don't worry about it.
2812  *
2813  * Returns: 0 on success, non-zero code on error.
2814  */
2815 
build_stacked_series(double ** pstack,int * list,int length,int offset,DATASET * dset)2816 int build_stacked_series (double **pstack, int *list,
2817 			  int length, int offset,
2818 			  DATASET *dset)
2819 {
2820     double *xstack = NULL;
2821     int nv, oldn, bign, tmax;
2822     int i, err = 0;
2823 
2824     if (dset == NULL || dset->n == 0) {
2825 	return E_NODATA;
2826     } else if (dataset_is_subsampled(dset)) {
2827 	gretl_errmsg_set("stack: this function cannot be used when the dataset "
2828 			 "is sub-sampled");
2829 	return E_DATA;
2830     } else if (list == NULL || list[0] <= 0) {
2831 	return E_INVARG;
2832     } else if (length <= 0) {
2833 	return E_INVARG;
2834     } else if (length + offset > dset->n) {
2835 	return E_INVARG;
2836     }
2837 
2838     nv = list[0];
2839 
2840 #if PDEBUG
2841     fprintf(stderr, "nv = %d, length = %d, offset = %d\n", nv, length, offset);
2842 #endif
2843 
2844     bign = nv * length;
2845     if (bign < dset->n) {
2846 	bign = dset->n;
2847     }
2848 
2849 #if PDEBUG
2850     fprintf(stderr, "bign = %d, allocating xstack (oldn = %d)\n", bign, dset->n);
2851 #endif
2852 
2853     /* allocate container for stacked data */
2854     xstack = malloc(bign * sizeof *xstack);
2855     if (xstack == NULL) {
2856 	return E_ALLOC;
2857     }
2858 
2859     /* extend length of all series? */
2860     oldn = dset->n;
2861     if (bign > oldn) {
2862 	err = dataset_add_observations(dset, bign - oldn, OPT_NONE);
2863 	if (err) {
2864 	    return err;
2865 	}
2866     }
2867 
2868     tmax = offset + length;
2869 
2870     /* construct stacked series */
2871     for (i=0; i<nv; i++) {
2872 	int j = list[i+1];
2873 	int bigt = length * i;
2874 	int t;
2875 
2876 	for (t=offset; t<tmax; t++) {
2877 	    xstack[bigt] = dset->Z[j][t];
2878 	    if (dset->S != NULL && bigt != t) {
2879 		strcpy(dset->S[bigt], dset->S[t]);
2880 	    }
2881 	    bigt++;
2882 	}
2883 	if (i == nv - 1) {
2884 	    for (t=bigt; t<bign; t++) {
2885 		xstack[bigt++] = NADBL;
2886 	    }
2887 	}
2888     }
2889 
2890     *pstack = xstack;
2891 
2892     return err;
2893 }
2894 
found_log_parent(const char * s,char * targ)2895 static int found_log_parent (const char *s, char *targ)
2896 {
2897     int len = gretl_namechar_spn(s);
2898 
2899     if (len < VNAMELEN && s[len] == ')') {
2900 	char fmt[8];
2901 
2902 	sprintf(fmt, "%%%d[^)]", VNAMELEN-1);
2903 	sscanf(s, fmt, targ);
2904 	return 1;
2905     }
2906 
2907     return 0;
2908 }
2909 
2910 /**
2911  * series_is_log:
2912  * @dset: dataset information.
2913  * @i: ID number of series.
2914  * @parent: location to which to write the name of the
2915  * "parent" variable if any.
2916  *
2917  * Tries to determine if the variable with ID number @i is
2918  * the logarithm of some other variable.
2919  *
2920  * Returns: 1 if variable @i appears to be a log, else 0.
2921  */
2922 
series_is_log(const DATASET * dset,int i,char * parent)2923 int series_is_log (const DATASET *dset, int i, char *parent)
2924 {
2925     const char *s = series_get_label(dset, i);
2926 
2927     *parent = '\0';
2928 
2929     if (s != NULL && *s != '\0') {
2930 	char fmt[16];
2931 
2932 	sprintf(fmt, "= log of %%%ds", VNAMELEN-1);
2933 
2934 	if (sscanf(s, fmt, parent) == 1) {
2935 	    return 1;
2936 	} else if (!strncmp(s, "log(", 4)) {
2937 	    return found_log_parent(s + 4, parent);
2938 	} else {
2939 	    s += strcspn(s, "=");
2940 	    if (!strncmp(s, "=log(", 5)) {
2941 		return found_log_parent(s + 5, parent);
2942 	    }
2943 	}
2944     }
2945 
2946     return 0;
2947 }
2948 
2949 /**
2950  * series_set_discrete:
2951  * @dset: pointer to data information struct.
2952  * @i: index number of series.
2953  * @s: non-zero to mark variable as discrete, zero to
2954  * mark as not discrete.
2955  *
2956  * Mark a variable as being discrete or not.
2957  */
2958 
series_set_discrete(DATASET * dset,int i,int s)2959 void series_set_discrete (DATASET *dset, int i, int s)
2960 {
2961     if (i > 0 && i < dset->v) {
2962 	int flags = dset->varinfo[i]->flags;
2963 
2964 	if (s && !(flags & VAR_DISCRETE)) {
2965 	    dset->varinfo[i]->flags |= VAR_DISCRETE;
2966 	    set_dataset_is_changed(dset, 1);
2967 	} else if (!s && (flags & VAR_DISCRETE)) {
2968 	    dset->varinfo[i]->flags &= ~VAR_DISCRETE;
2969 	    set_dataset_is_changed(dset, 1);
2970 	}
2971     }
2972 }
2973 
series_record_label(DATASET * dset,int i,const char * s)2974 int series_record_label (DATASET *dset, int i,
2975 			 const char *s)
2976 {
2977     char *targ = dset->varinfo[i]->label;
2978 
2979     if (labels_differ(targ, s)) {
2980 	copy_label(dset->varinfo[i], s);
2981 	set_dataset_is_changed(dset, 1);
2982     }
2983 
2984     return 0;
2985 }
2986 
series_record_display_name(DATASET * dset,int i,const char * s)2987 int series_record_display_name (DATASET *dset, int i,
2988 				const char *s)
2989 {
2990     char *targ = dset->varinfo[i]->display_name;
2991 
2992     if (strcmp(targ, s)) {
2993 	*targ = '\0';
2994 	strncat(targ, s, MAXDISP - 1);
2995 	set_dataset_is_changed(dset, 1);
2996     }
2997 
2998     return 0;
2999 }
3000 
series_get_graph_name(const DATASET * dset,int i)3001 const char *series_get_graph_name (const DATASET *dset, int i)
3002 {
3003     const char *ret = dset->varname[i];
3004 
3005     if (dset->varinfo != NULL && dset->varinfo[i] != NULL) {
3006 	if (dset->varinfo[i]->display_name[0] != '\0') {
3007 	    ret = dset->varinfo[i]->display_name;
3008 	}
3009     }
3010 
3011     return ret;
3012 }
3013 
add_obs(int n,DATASET * dset,gretlopt opt,PRN * prn)3014 static int add_obs (int n, DATASET *dset, gretlopt opt, PRN *prn)
3015 {
3016     int err = 0;
3017 
3018     if (complex_subsampled()) {
3019 	pprintf(prn, _("The data set is currently sub-sampled.\n"));
3020 	err = E_DATA;
3021     } else if (n <= 0) {
3022 	err = E_PARSE;
3023     } else if (opt & OPT_T) {
3024 	/* extending panel time */
3025 	err = panel_dataset_extend_time(dset, n);
3026 	if (!err) {
3027 	    pprintf(prn, _("Panel time extended by %d observations"), n);
3028 	    pputc(prn, '\n');
3029 	}
3030     } else {
3031 	err = dataset_add_observations(dset, n, OPT_A);
3032 	if (!err) {
3033 	    pprintf(prn, _("Dataset extended by %d observations"), n);
3034 	    pputc(prn, '\n');
3035 	    extend_function_sample_range(n);
3036 	}
3037     }
3038 
3039     return err;
3040 }
3041 
insert_obs(int n,DATASET * dset,PRN * prn)3042 static int insert_obs (int n, DATASET *dset, PRN *prn)
3043 {
3044     int err = 0;
3045 
3046     if (complex_subsampled()) {
3047 	pprintf(prn, _("The data set is currently sub-sampled.\n"));
3048 	err = E_DATA;
3049     } else if (dataset_is_panel(dset)) {
3050 	err = E_PDWRONG;
3051     } else if (n <= 0 || n > dset->n) {
3052 	err = E_DATA;
3053     } else {
3054 	err = real_insert_observation(n - 1, dset);
3055     }
3056 
3057     return err;
3058 }
3059 
dataset_op_from_string(const char * s)3060 int dataset_op_from_string (const char *s)
3061 {
3062     int op = DS_NONE;
3063 
3064     if (s == NULL || *s == '\0') {
3065 	return DS_NONE;
3066     }
3067 
3068     if (!strcmp(s, "addobs")) {
3069 	op = DS_ADDOBS;
3070     } else if (!strcmp(s, "compact")) {
3071 	op = DS_COMPACT;
3072     } else if (!strcmp(s, "expand")) {
3073 	op = DS_EXPAND;
3074     } else if (!strcmp(s, "transpose")) {
3075 	op = DS_TRANSPOSE;
3076     } else if (!strcmp(s, "delete")) {
3077 	op = DS_DELETE;
3078     } else if (!strcmp(s, "keep")) {
3079 	op = DS_KEEP;
3080     } else if (!strcmp(s, "sortby")) {
3081 	op = DS_SORTBY;
3082     } else if (!strcmp(s, "dsortby")) {
3083 	op = DS_DSORTBY;
3084     } else if (!strcmp(s, "resample")) {
3085 	op = DS_RESAMPLE;
3086     } else if (!strcmp(s, "restore")) {
3087 	op = DS_RESTORE;
3088     } else if (!strcmp(s, "clear")) {
3089 	op = DS_CLEAR;
3090     } else if (!strcmp(s, "renumber")) {
3091 	op = DS_RENUMBER;
3092     } else if (!strcmp(s, "insobs")) {
3093 	op = DS_INSOBS;
3094     } else if (!strcmp(s, "pad-daily")) {
3095 	op = DS_PAD_DAILY;
3096     }
3097 
3098     return op;
3099 }
3100 
dataset_int_param(const char ** ps,int op,DATASET * dset,int * err)3101 static int dataset_int_param (const char **ps, int op,
3102 			      DATASET *dset, int *err)
3103 {
3104     const char *s = *ps;
3105     char test[32];
3106     int k = 0;
3107 
3108     if ((op == DS_COMPACT || op == DS_EXPAND) &&
3109 	!dataset_is_time_series(dset)) {
3110 	*err = E_PDWRONG;
3111 	return 0;
3112     }
3113 
3114     if (op == DS_PAD_DAILY && !dated_daily_data(dset)) {
3115 	*err = E_PDWRONG;
3116 	return 0;
3117     }
3118 
3119     *test = '\0';
3120     sscanf(s, "%31s", test);
3121     *ps += strlen(test);
3122 
3123     k = gretl_int_from_string(test, err);
3124     if (*err) {
3125 	return 0;
3126     }
3127 
3128     if (k <= 0 || (op == DS_RESAMPLE && k < 1)) {
3129 	*err = E_DATA;
3130     } else if (op == DS_INSOBS) {
3131 	if (k > dset->n) {
3132 	    *err = E_DATA;
3133 	}
3134     } else if (op == DS_COMPACT) {
3135 	*err = E_PDWRONG;
3136 	if (dset->pd == 12 && (k == 4 || k == 1)) {
3137 	    *err = 0;
3138 	} else if (dset->pd == 4 && k == 1) {
3139 	    *err = 0;
3140 	} else if (dset->pd == 52 && k == 12) {
3141 	    *err = 0;
3142 	} else if (dated_daily_data(dset) && (k == 52 || k == 12)) {
3143 	    *err = 0;
3144 	} else if (dataset_is_daily(dset) && k == 4) {
3145 	    if (strstr(*ps, "spread")) {
3146 		*err = 0;
3147 	    }
3148 	}
3149     } else if (op == DS_EXPAND) {
3150 	*err = E_PDWRONG;
3151 	if (dset->pd == 1 && (k == 4 || k == 12)) {
3152 	    *err = 0;
3153 	} else if (dset->pd == 4 && k == 12) {
3154 	    *err = 0;
3155 	}
3156     } else if (op == DS_PAD_DAILY) {
3157 	if (k < 5 || k > 7 || k < dset->pd) {
3158 	    *err = E_PDWRONG;
3159 	}
3160     }
3161 
3162     if (*err == E_PDWRONG) {
3163 	gretl_errmsg_set("This conversion is not supported");
3164     }
3165 
3166     return k;
3167 }
3168 
compact_data_set_wrapper(const char * s,DATASET * dset,int k)3169 static int compact_data_set_wrapper (const char *s, DATASET *dset,
3170 				     int k)
3171 {
3172     CompactMethod method = COMPACT_AVG;
3173 
3174     if (s != NULL) {
3175 	s += strspn(s, " ");
3176 	if (!strcmp(s, "sum")) {
3177 	    method = COMPACT_SUM;
3178 	} else if (!strcmp(s, "first") || !strcmp(s, "sop")) {
3179 	    method = COMPACT_SOP;
3180 	} else if (!strcmp(s, "last") || !strcmp(s, "eop")) {
3181 	    method = COMPACT_EOP;
3182 	} else if (!strcmp(s, "spread")) {
3183 	    method = COMPACT_SPREAD;
3184 	} else if (!strcmp(s, "avg") || !strcmp(s, "average")) {
3185 	    method = COMPACT_AVG;
3186 	} else if (*s != '\0') {
3187 	    return E_PARSE;
3188 	}
3189     }
3190 
3191     return compact_data_set(dset, k, method, 0, 0);
3192 }
3193 
3194 static unsigned int resample_seed;
3195 
get_resampling_seed(void)3196 unsigned int get_resampling_seed (void)
3197 {
3198     return resample_seed;
3199 }
3200 
3201 /* resample the dataset by observation, with replacement */
3202 
dataset_resample(DATASET * dset,int n,unsigned int seed)3203 int dataset_resample (DATASET *dset, int n, unsigned int seed)
3204 {
3205     DATASET *rset = NULL;
3206     char **S = NULL;
3207     unsigned int state;
3208     int T = sample_size(dset);
3209     int v = dset->v;
3210     int i, j, s, t;
3211     int err = 0;
3212 
3213     if (v < 2) {
3214 	return E_DATA;
3215     }
3216 
3217     rset = datainfo_new();
3218     if (rset == NULL) {
3219 	return E_ALLOC;
3220     }
3221 
3222     rset->Z = malloc(v * sizeof *rset->Z);
3223     if (rset->Z == NULL) {
3224 	free(rset);
3225 	return E_ALLOC;
3226     }
3227 
3228     for (i=0; i<v; i++) {
3229 	rset->Z[i] = NULL;
3230     }
3231 
3232     rset->v = v;
3233 
3234     j = 0;
3235     for (i=0; i<dset->v && !err; i++) {
3236 	rset->Z[j] = malloc(n * sizeof **rset->Z);
3237 	if (rset->Z[j] == NULL) {
3238 	    err = E_ALLOC;
3239 	} else if (i == 0) {
3240 	    for (t=0; t<n; t++) {
3241 		rset->Z[j][t] = 1.0;
3242 	    }
3243 	}
3244 	j++;
3245     }
3246 
3247     if (err) {
3248 	goto bailout;
3249     }
3250 
3251     if (dset->markers == REGULAR_MARKERS) {
3252 	S = strings_array_new_with_length(n, OBSLEN);
3253     }
3254 
3255     if (seed > 0) {
3256 	resample_seed = seed;
3257 	gretl_rand_set_seed(seed);
3258     } else {
3259 	resample_seed = gretl_rand_get_seed();
3260     }
3261 
3262     state = gretl_rand_int();
3263 
3264     for (t=0; t<n; t++) {
3265 	s = gretl_rand_int_max(T) + dset->t1;
3266 	j = 1;
3267 	for (i=1; i<dset->v; i++) {
3268 	    rset->Z[j][t] = dset->Z[i][s];
3269 	    j++;
3270 	}
3271 	if (S != NULL) {
3272 	    strcpy(S[t], dset->S[s]);
3273 	}
3274     }
3275 
3276     if (S != NULL) {
3277 	rset->S = S;
3278 	rset->markers = REGULAR_MARKERS;
3279     }
3280 
3281     rset->varname = dset->varname;
3282     rset->varinfo = dset->varinfo;
3283     rset->descrip = dset->descrip;
3284 
3285     rset->n = n;
3286     rset->t1 = 0;
3287     rset->t2 = n - 1;
3288     dataset_obs_info_default(rset);
3289 
3290     set_dataset_resampled(rset, state);
3291 
3292  bailout:
3293 
3294     if (err) {
3295 	free_Z(rset);
3296 	clear_datainfo(rset, CLEAR_SUBSAMPLE);
3297 	free(rset);
3298     } else {
3299 	backup_full_dataset(dset);
3300 	*dset = *rset;
3301 	free(rset);
3302     }
3303 
3304     return err;
3305 }
3306 
3307 /* note: @list should contain a single series ID, that of the
3308    target series, and @param should hold a numeric string
3309    giving the position to which @targ should be moved;
3310    @fixmax is the greatest series ID number that cannot be
3311    changed (based on saved models, etc., as determined by the
3312    caller)
3313 */
3314 
renumber_series_with_checks(const int * list,const char * param,int fixmax,DATASET * dset,PRN * prn)3315 int renumber_series_with_checks (const int *list,
3316 				 const char *param,
3317 				 int fixmax,
3318 				 DATASET *dset,
3319 				 PRN *prn)
3320 {
3321     char vname[VNAMELEN];
3322     int v_old, v_new;
3323     int f1, err = 0;
3324 
3325     if (list == NULL || list[0] != 1 ||
3326 	param == NULL || *param == '\0') {
3327 	return E_INVARG;
3328     }
3329 
3330     if (sscanf(param, "%d", &v_new) != 1) {
3331 	return E_INVARG;
3332     }
3333 
3334     v_old = list[1];
3335 
3336     if (v_old < 1 || v_old > dset->v - 1 ||
3337 	v_new < 1 || v_new > dset->v - 1) {
3338 	/* out of bounds */
3339 	return E_INVARG;
3340     } else if (v_new == v_old) {
3341 	/* no-op */
3342 	return 0;
3343     }
3344 
3345     f1 = max_varno_in_saved_lists();
3346 
3347     if (f1 > fixmax) {
3348 	fixmax = f1;
3349     }
3350 
3351     strcpy(vname, dset->varname[v_old]);
3352 
3353     if (v_old <= fixmax) {
3354 	gretl_errmsg_sprintf(_("Variable %s cannot be renumbered"), vname);
3355 	err = E_DATA;
3356     } else if (v_new <= fixmax) {
3357 	gretl_errmsg_sprintf(_("Target ID %d is not available"), v_new);
3358 	err = E_DATA;
3359     } else {
3360 	err = dataset_renumber_variable(v_old, v_new, dset);
3361     }
3362 
3363     if (!err && gretl_messages_on()) {
3364 	pprintf(prn, _("Renumbered %s as variable %d\n"),
3365 		vname, v_new);
3366 	maybe_list_series(dset, prn);
3367     }
3368 
3369     return err;
3370 }
3371 
3372 /* alternate forms:
3373 
3374            @op        @list  @param
3375    dataset addobs            24
3376    dataset compact           1
3377    dataset compact           4 last
3378    dataset expand            4
3379    dataset transpose
3380    dataset sortby     x1
3381    dataset resample          500
3382    dataset clear
3383    dataset renumber   orig   2
3384    dataset insobs            13
3385    dataset pad-daily         7
3386 
3387 */
3388 
modify_dataset(DATASET * dset,int op,const int * list,const char * param,gretlopt opt,PRN * prn)3389 int modify_dataset (DATASET *dset, int op, const int *list,
3390 		    const char *param, gretlopt opt, PRN *prn)
3391 {
3392     static int resampled;
3393     int k = 0, err = 0;
3394 
3395     if (dset == NULL || dset->Z == NULL) {
3396 	return E_NODATA;
3397     }
3398 
3399 #if 0
3400     fprintf(stderr, "modify_dataset: op=%d, param='%s'\n", op, param);
3401     printlist(list, "list");
3402 #endif
3403 
3404     if (op == DS_CLEAR || op == DS_RENUMBER) {
3405 	/* must be handled by the calling program */
3406 	return E_NOTIMP;
3407     }
3408 
3409     if (gretl_function_depth() > 0) {
3410 	if (op == DS_ADDOBS && !complex_subsampled() &&
3411 	    dset->t2 == dset->n - 1 && !(opt & OPT_T)) {
3412 	    /* experimental, 2015-07-28: allow "addobs" within a
3413 	       function provided the dataset is not subsampled
3414 	    */
3415 	    goto proceed;
3416 	} else {
3417 	    gretl_errmsg_set(_("The 'dataset' command is not available "
3418 			       "within functions"));
3419 	    return 1;
3420 	}
3421     }
3422 
3423     if (gretl_looping() && op != DS_RESAMPLE &&
3424 	op != DS_RESTORE && op != DS_SORTBY) {
3425 	pputs(prn, _("Sorry, this command is not available in loop mode\n"));
3426 	return 1;
3427     }
3428 
3429     if (op == DS_RESAMPLE && resampled) {
3430 	/* repeated "resample": implicitly restore first */
3431 	err = restore_full_sample(dset, NULL);
3432 	if (err) {
3433 	    return err;
3434 	} else {
3435 	    resampled = 0;
3436 	}
3437     }
3438 
3439     if (op != DS_RESTORE && complex_subsampled()) {
3440 	gretl_errmsg_set(_("The data set is currently sub-sampled"));
3441 	return 1;
3442     }
3443 
3444  proceed:
3445 
3446     if (op == DS_ADDOBS || op == DS_INSOBS ||
3447 	op == DS_COMPACT || op == DS_RESAMPLE ||
3448 	op == DS_PAD_DAILY) {
3449 	if (param == NULL) {
3450 	    err = E_ARGS;
3451 	} else {
3452 	    k = dataset_int_param(&param, op, dset, &err);
3453 	}
3454 	if (err) {
3455 	    return err;
3456 	}
3457     } else if (op == DS_EXPAND) {
3458 	if (param != NULL) {
3459 	    k = dataset_int_param(&param, op, dset, &err);
3460 	} else if (dset->pd == 1) {
3461 	    k = 4;
3462 	} else if (dset->pd == 4) {
3463 	    k = 12;
3464 	} else {
3465 	    err = E_PDWRONG;
3466 	}
3467 	if (err) {
3468 	    return err;
3469 	}
3470     }
3471 
3472     if (op == DS_ADDOBS) {
3473 	err = add_obs(k, dset, opt, prn);
3474     } else if (op == DS_INSOBS) {
3475 	err = insert_obs(k, dset, prn);
3476     } else if (op == DS_COMPACT) {
3477 	err = compact_data_set_wrapper(param, dset, k);
3478     } else if (op == DS_EXPAND) {
3479 	err = expand_data_set(dset, k);
3480     } else if (op == DS_PAD_DAILY) {
3481 	err = pad_daily_data(dset, k, prn);
3482     } else if (op == DS_TRANSPOSE) {
3483 	err = transpose_data(dset);
3484     } else if (op == DS_SORTBY) {
3485 	err = dataset_sort(dset, list, OPT_NONE);
3486     } else if (op == DS_DSORTBY) {
3487 	err = dataset_sort(dset, list, OPT_D);
3488     } else if (op == DS_RESAMPLE) {
3489 	err = dataset_resample(dset, k, 0);
3490 	if (!err) {
3491 	    resampled = 1;
3492 	}
3493     } else if (op == DS_RESTORE) {
3494 	if (resampled) {
3495 	    err = restore_full_sample(dset, NULL);
3496 	    resampled = 0;
3497 	} else {
3498 	    pprintf(prn, _("dataset restore: dataset is not resampled\n"));
3499 	    err = E_DATA;
3500 	}
3501     } else if (op == DS_DELETE) {
3502 	pprintf(prn, "dataset delete: not ready yet\n");
3503     } else if (op == DS_KEEP) {
3504 	pprintf(prn, "dataset keep: not ready yet\n");
3505     } else {
3506 	err = E_PARSE;
3507     }
3508 
3509     return err;
3510 }
3511 
3512 /* this supports the $datatype accessor */
3513 
dataset_get_structure(const DATASET * dset)3514 int dataset_get_structure (const DATASET *dset)
3515 {
3516     if (dset == NULL || dset->n == 0) {
3517 	return 0;
3518     } else if (dataset_is_panel(dset)) {
3519 	return 3;
3520     } else if (dataset_is_time_series(dset)) {
3521 	return 2;
3522     } else {
3523 	return 1;
3524     }
3525 }
3526 
3527 /**
3528  * panel_sample_size:
3529  * @dset: pointer to data information struct.
3530  *
3531  * Returns: the numbers of units/individuals in the current
3532  * sample range, or 0 if the dataset is not a panel.
3533  */
3534 
panel_sample_size(const DATASET * dset)3535 int panel_sample_size (const DATASET *dset)
3536 {
3537     int ret = 0;
3538 
3539     if (dataset_is_panel(dset)) {
3540 	ret = (dset->t2 - dset->t1 + 1) / dset->pd;
3541     }
3542 
3543     return ret;
3544 }
3545 
3546 /**
3547  * multi_unit_panel_sample:
3548  * @dset: pointer to dataset.
3549  *
3550  * Returns: 1 if the dataset is a panel and the current sample
3551  * range includes two or more individuals, otherwise 0.
3552  */
3553 
multi_unit_panel_sample(const DATASET * dset)3554 int multi_unit_panel_sample (const DATASET *dset)
3555 {
3556     int ret = 0;
3557 
3558     if (dataset_is_panel(dset)) {
3559 	ret = (dset->t2 - dset->t1 + 1) > dset->pd;
3560     }
3561 
3562     return ret;
3563 }
3564 
3565 /**
3566  * dataset_purge_missing_rows:
3567  * @dset: pointer to dataset.
3568  *
3569  * Removes empty rows from the dataset -- that is, observations
3570  * at which there are no non-missing values.  This is intended
3571  * for daily data only.
3572  *
3573  * Returns: 0 on success, non-zero code on error.
3574  */
3575 
dataset_purge_missing_rows(DATASET * dset)3576 int dataset_purge_missing_rows (DATASET *dset)
3577 {
3578     int new_n, missrow, totmiss = 0;
3579     int t1 = dset->t1;
3580     int t2 = dset->t2;
3581     char **S = NULL;
3582     double *Zi = NULL;
3583     size_t sz;
3584     int i, t, s;
3585     int err = 0;
3586 
3587     for (t=0; t<dset->n; t++) {
3588 	missrow = 1;
3589 	for (i=1; i<dset->v; i++) {
3590 	    if (!na(dset->Z[i][t])) {
3591 		missrow = 0;
3592 		break;
3593 	    }
3594 	}
3595 	if (missrow) {
3596 	    totmiss++;
3597 	    if (t < dset->t1) {
3598 		t1--;
3599 	    }
3600 	    if (t < dset->t2) {
3601 		t2--;
3602 	    }
3603 	}
3604     }
3605 
3606     if (totmiss == 0) {
3607 	/* no-op */
3608 	return 0;
3609     }
3610 
3611     if (dated_daily_data(dset) && dset->S == NULL) {
3612 	err = dataset_allocate_obs_markers(dset);
3613 	if (!err) {
3614 	    for (t=0; t<dset->n; t++) {
3615 		calendar_date_string(dset->S[t], t, dset);
3616 	    }
3617 	}
3618     }
3619 
3620     for (t=0; t<dset->n; t++) {
3621 	missrow = 1;
3622 	for (i=1; i<dset->v; i++) {
3623 	    if (!na(dset->Z[i][t])) {
3624 		missrow = 0;
3625 		break;
3626 	    }
3627 	}
3628 	if (missrow) {
3629 	    sz = (dset->n - t) * sizeof **dset->Z;
3630 	    for (i=1; i<dset->v; i++) {
3631 		memmove(dset->Z[i] + t, dset->Z[i] + t + 1, sz);
3632 	    }
3633 	    if (dset->S != NULL) {
3634 		free(dset->S[t]);
3635 		for (s=t; s<dset->n - 1; s++) {
3636 		    dset->S[s] = dset->S[s+1];
3637 		}
3638 	    }
3639 	}
3640     }
3641 
3642     new_n = dset->n - totmiss;
3643 
3644     for (i=1; i<dset->v; i++) {
3645 	Zi = realloc(dset->Z[i], new_n * sizeof *Zi);
3646 	if (Zi == NULL) {
3647 	    err = E_ALLOC;
3648 	} else {
3649 	    dset->Z[i] = Zi;
3650 	}
3651     }
3652 
3653     if (!err && dset->S != NULL) {
3654 	S = realloc(dset->S, new_n * sizeof *S);
3655 	if (S == NULL) {
3656 	    err = E_ALLOC;
3657 	} else {
3658 	    dset->S = S;
3659 	    if (dated_daily_data(dset)) {
3660 		strcpy(dset->stobs, dset->S[0]);
3661 		strcpy(dset->endobs, dset->S[new_n-1]);
3662 		dset->sd0 = get_epoch_day(dset->stobs);
3663 	    }
3664 	}
3665     }
3666 
3667     dataset_set_nobs(dset, new_n);
3668     dset->t1 = t1;
3669     dset->t2 = t2;
3670 
3671     return err;
3672 }
3673 
3674 /**
3675  * dataset_set_time_series:
3676  * @dset: pointer to dataset.
3677  * @pd: time series annual frequency (1 for annual, 4
3678  * for quarterly or 12 for monthly).
3679  * @yr0: starting year.
3680  * @minor0: starting "minor" period, 1-based (quarter or
3681  * month).
3682  *
3683  * Sets time-series properties on @dset: frequency @pd with
3684  * starting observation @yr0, @minor0. If the data are
3685  * annual (@pd = 1) then @minor0 is ignored.
3686 
3687  * Returns: 0 on success, non-zero code on error.
3688  */
3689 
dataset_set_time_series(DATASET * dset,int pd,int yr0,int minor0)3690 int dataset_set_time_series (DATASET *dset, int pd,
3691 			     int yr0, int minor0)
3692 {
3693     int err = 0;
3694 
3695     if (pd != 1 && pd != 4 && pd != 12) {
3696 	err = E_DATA;
3697     } else if (yr0 < 1) {
3698 	err = E_DATA;
3699     } else if (pd > 1 && (minor0 < 1 || minor0 > pd)) {
3700 	err = E_DATA;
3701     } else {
3702 	gchar *stobs = NULL;
3703 
3704 	dataset_destroy_obs_markers(dset);
3705 	dset->structure = TIME_SERIES;
3706 	dset->pd = pd;
3707 
3708 	if (pd == 1) {
3709 	    stobs = g_strdup_printf("%d", yr0);
3710 	} else if (pd == 4) {
3711 	    stobs = g_strdup_printf("%d.%d", yr0, minor0);
3712 	} else {
3713 	    stobs = g_strdup_printf("%d.%02d", yr0, minor0);
3714 	}
3715 
3716 	dset->sd0 = dot_atof(stobs);
3717 	ntolabel(dset->stobs, 0, dset);
3718 	ntolabel(dset->endobs, dset->n - 1, dset);
3719 	g_free(stobs);
3720     }
3721 
3722     return err;
3723 }
3724 
dataset_clear_sample_record(DATASET * dset)3725 void dataset_clear_sample_record (DATASET *dset)
3726 {
3727     if (dset->restriction != NULL) {
3728 	free(dset->restriction);
3729 	dset->restriction = NULL;
3730     }
3731 }
3732 
3733 /**
3734  * series_is_discrete:
3735  * @dset: pointer to dataset.
3736  * @i: index number of series.
3737  *
3738  * Returns: non-zero iff series @i should be treated as discrete.
3739  */
3740 
series_is_discrete(const DATASET * dset,int i)3741 int series_is_discrete (const DATASET *dset, int i)
3742 {
3743     return dset->varinfo[i]->flags & VAR_DISCRETE;
3744 }
3745 
3746 /**
3747  * series_is_hidden:
3748  * @dset: pointer to dataset.
3749  * @i: index number of series.
3750  *
3751  * Returns: non-zero iff series @i is hidden.
3752  */
3753 
series_is_hidden(const DATASET * dset,int i)3754 int series_is_hidden (const DATASET *dset, int i)
3755 {
3756     return dset->varinfo[i]->flags & VAR_HIDDEN;
3757 }
3758 
3759 /**
3760  * series_is_generated:
3761  * @dset: pointer to dataset.
3762  * @i: index number of series.
3763  *
3764  * Returns: non-zero iff series @i was generated using
3765  * a formula or transformation function.
3766  */
3767 
series_is_generated(const DATASET * dset,int i)3768 int series_is_generated (const DATASET *dset, int i)
3769 {
3770     return dset->varinfo[i]->flags & VAR_GENERATED;
3771 }
3772 
3773 /**
3774  * series_is_listarg:
3775  * @dset: pointer to dataset.
3776  * @i: index number of series.
3777  * @lname: location to receive list name, or NULL.
3778  *
3779  * Returns: non-zero iff series @i has been marked as
3780  * belonging to a list argument to a function.
3781  */
3782 
series_is_listarg(const DATASET * dset,int i,const char ** lname)3783 int series_is_listarg (const DATASET *dset, int i,
3784 		       const char **lname)
3785 {
3786     int ret = dset->varinfo[i]->flags & VAR_LISTARG ? 1 : 0;
3787 
3788     if (ret && lname != NULL) {
3789 	*lname = series_get_list_parent(i);
3790     }
3791 
3792     return ret;
3793 }
3794 
3795 /**
3796  * series_is_coded:
3797  * @dset: pointer to dataset.
3798  * @i: index number of series.
3799  *
3800  * Returns: non-zero iff series @i has been marked as
3801  * "coded", meaning that its numerical values represent
3802  * an arbitrary encoding of qualitative characteristics.
3803  */
3804 
series_is_coded(const DATASET * dset,int i)3805 int series_is_coded (const DATASET *dset, int i)
3806 {
3807     return dset->varinfo[i]->flags & VAR_CODED;
3808 }
3809 
3810 /**
3811  * series_is_integer_valued:
3812  * @dset: pointer to dataset.
3813  * @i: index number of series.
3814  *
3815  * Returns: non-zero iff all values in series @i are
3816  * representable as integers (ignoring missing values).
3817  */
3818 
series_is_integer_valued(const DATASET * dset,int i)3819 int series_is_integer_valued (const DATASET *dset, int i)
3820 {
3821     const double *x = dset->Z[i];
3822     int t, n_ok = 0, ret = 1;
3823 
3824     for (t=0; t<dset->n; t++) {
3825 	if (!na(x[t])) {
3826 	    n_ok++;
3827 	    if (x[t] != floor(x[t])) {
3828 		ret = 0;
3829 		break;
3830 	    } else if (x[t] > INT_MAX || x[t] < INT_MIN) {
3831 		ret = 0;
3832 		break;
3833 	    }
3834 	}
3835     }
3836 
3837     if (n_ok == 0) {
3838 	/* don't let an entirely missing series count as
3839 	   "integer-valued"
3840 	*/
3841 	ret = 0;
3842     }
3843 
3844     return ret;
3845 }
3846 
3847 /**
3848  * series_set_flag:
3849  * @dset: pointer to dataset.
3850  * @i: index number of series.
3851  * @flag: flag to set.
3852  *
3853  * Sets the given @flag on series @i.
3854  */
3855 
series_set_flag(DATASET * dset,int i,VarFlags flag)3856 void series_set_flag (DATASET *dset, int i, VarFlags flag)
3857 {
3858     if (i > 0 && i < dset->v) {
3859 	dset->varinfo[i]->flags |= flag;
3860     }
3861 }
3862 
3863 /**
3864  * series_unset_flag:
3865  * @dset: pointer to dataset.
3866  * @i: index number of series.
3867  * @flag: flag to remove.
3868  *
3869  * Unsets the given @flag on series @i.
3870  */
3871 
series_unset_flag(DATASET * dset,int i,VarFlags flag)3872 void series_unset_flag (DATASET *dset, int i, VarFlags flag)
3873 {
3874     if (i > 0 && i < dset->v) {
3875 	dset->varinfo[i]->flags &= ~flag;
3876     }
3877 }
3878 
3879 /**
3880  * series_get_flags:
3881  * @dset: pointer to dataset.
3882  * @i: index number of series.
3883  *
3884  * Returns: the flags set series @i.
3885  */
3886 
series_get_flags(const DATASET * dset,int i)3887 VarFlags series_get_flags (const DATASET *dset, int i)
3888 {
3889     if (i >= 0 && i < dset->v) {
3890 	return dset->varinfo[i]->flags;
3891     } else {
3892 	return 0;
3893     }
3894 }
3895 
3896 /**
3897  * series_zero_flags:
3898  * @dset: pointer to dataset.
3899  * @i: index number of series.
3900  *
3901  * Sets flags on series @i to zero.
3902  */
3903 
series_zero_flags(DATASET * dset,int i)3904 void series_zero_flags (DATASET *dset, int i)
3905 {
3906     if (i >= 0 && i < dset->v) {
3907 	dset->varinfo[i]->flags = 0;
3908     }
3909 }
3910 
3911 /**
3912  * series_get_label:
3913  * @dset: pointer to dataset.
3914  * @i: index number of series.
3915  *
3916  * Returns: the descriptive label for series @i.
3917  */
3918 
series_get_label(const DATASET * dset,int i)3919 const char *series_get_label (const DATASET *dset, int i)
3920 {
3921     if (i >= 0 && i < dset->v) {
3922 	return dset->varinfo[i]->label;
3923     } else {
3924 	return NULL;
3925     }
3926 }
3927 
3928 /**
3929  * series_get_display_name:
3930  * @dset: pointer to dataset.
3931  * @i: index number of series.
3932  *
3933  * Returns: the display name for series @i.
3934  */
3935 
series_get_display_name(const DATASET * dset,int i)3936 const char *series_get_display_name (const DATASET *dset, int i)
3937 {
3938     if (i >= 0 && i < dset->v) {
3939 	return dset->varinfo[i]->display_name;
3940     } else {
3941 	return NULL;
3942     }
3943 }
3944 
3945 /**
3946  * series_get_parent_name:
3947  * @dset: pointer to dataset.
3948  * @i: index number of series.
3949  *
3950  * Returns: the name of the "parent" of series @i
3951  * (e.g. if series @i is a lag of some other series)
3952  * or NULL if the series has no parent.
3953  */
3954 
series_get_parent_name(const DATASET * dset,int i)3955 const char *series_get_parent_name (const DATASET *dset, int i)
3956 {
3957     if (i > 0 && i < dset->v) {
3958 	if (dset->varinfo[i]->parent[0] != '\0') {
3959 	    return dset->varinfo[i]->parent;
3960 	}
3961     }
3962 
3963     return NULL;
3964 }
3965 
3966 /**
3967  * series_get_parent_id:
3968  * @dset: pointer to dataset.
3969  * @i: index number of series.
3970  *
3971  * Returns: the ID number of the "parent" of series @i
3972  * (e.g. if series @i is a lag of some other series)
3973  * or -1 if the series has no parent.
3974  */
3975 
series_get_parent_id(const DATASET * dset,int i)3976 int series_get_parent_id (const DATASET *dset, int i)
3977 {
3978     if (i > 0 && i < dset->v) {
3979 	const char *pname = dset->varinfo[i]->parent;
3980 
3981 	if (*pname != '\0') {
3982 	    return current_series_index(dset, pname);
3983 	}
3984     }
3985 
3986     return -1;
3987 }
3988 
series_get_lag(const DATASET * dset,int i)3989 int series_get_lag (const DATASET *dset, int i)
3990 {
3991     if (i > 0 && i < dset->v) {
3992 	return dset->varinfo[i]->lag;
3993     } else {
3994 	return 0;
3995     }
3996 }
3997 
series_get_transform(const DATASET * dset,int i)3998 int series_get_transform (const DATASET *dset, int i)
3999 {
4000     if (i > 0 && i < dset->v) {
4001 	return dset->varinfo[i]->transform;
4002     } else {
4003 	return 0;
4004     }
4005 }
4006 
4007 /**
4008  * series_get_compact_method:
4009  * @dset: pointer to dataset.
4010  * @i: index number of series.
4011  *
4012  * Returns: the compaction method set for series @i.
4013  */
4014 
series_get_compact_method(const DATASET * dset,int i)4015 int series_get_compact_method (const DATASET *dset, int i)
4016 {
4017     if (i > 0 && i < dset->v) {
4018 	return dset->varinfo[i]->compact_method;
4019     } else {
4020 	return 0;
4021     }
4022 }
4023 
4024 /**
4025  * series_get_stack_level:
4026  * @dset: pointer to dataset.
4027  * @i: index number of series.
4028  *
4029  * Returns: the stack level of series @i.
4030  */
4031 
series_get_stack_level(const DATASET * dset,int i)4032 int series_get_stack_level (const DATASET *dset, int i)
4033 {
4034     if (i >= 0 && i < dset->v) {
4035 	return dset->varinfo[i]->stack_level;
4036     } else {
4037 	return 0;
4038     }
4039 }
4040 
series_set_mtime(DATASET * dset,int i)4041 void series_set_mtime (DATASET *dset, int i)
4042 {
4043     if (i > 0 && i < dset->v) {
4044 	dset->varinfo[i]->mtime = gretl_monotonic_time();
4045     }
4046 }
4047 
series_get_mtime(const DATASET * dset,int i)4048 gint64 series_get_mtime (const DATASET *dset, int i)
4049 {
4050     if (i > 0 && i < dset->v) {
4051 	return dset->varinfo[i]->mtime;
4052     } else {
4053 	return 0;
4054     }
4055 }
4056 
series_set_label(DATASET * dset,int i,const char * s)4057 void series_set_label (DATASET *dset, int i,
4058 		       const char *s)
4059 {
4060     if (i > 0 && i < dset->v) {
4061 	copy_label(dset->varinfo[i], s);
4062     }
4063 }
4064 
series_set_display_name(DATASET * dset,int i,const char * s)4065 void series_set_display_name (DATASET *dset, int i,
4066 			      const char *s)
4067 {
4068     if (i > 0 && i < dset->v) {
4069 	char *targ = dset->varinfo[i]->display_name;
4070 
4071 	if (strlen(s) >= MAXDISP) {
4072 	    gchar *tmp = g_strdup(s);
4073 
4074 	    strcpy(targ, gretl_utf8_truncate(tmp, MAXDISP-1));
4075 	    g_free(tmp);
4076 	} else {
4077 	    strcpy(targ, s);
4078 	}
4079     }
4080 }
4081 
series_set_compact_method(DATASET * dset,int i,int method)4082 void series_set_compact_method (DATASET *dset, int i,
4083 				int method)
4084 {
4085     if (i > 0 && i < dset->v) {
4086 	dset->varinfo[i]->compact_method = method;
4087     }
4088 }
4089 
series_set_parent(DATASET * dset,int i,const char * parent)4090 void series_set_parent (DATASET *dset, int i,
4091 			const char *parent)
4092 {
4093     if (i > 0 && i < dset->v) {
4094 	strcpy(dset->varinfo[i]->parent, parent);
4095     }
4096 }
4097 
series_set_transform(DATASET * dset,int i,int transform)4098 void series_set_transform (DATASET *dset, int i,
4099 			   int transform)
4100 {
4101     if (i > 0 && i < dset->v) {
4102 	dset->varinfo[i]->transform = transform;
4103     }
4104 }
4105 
series_set_lag(DATASET * dset,int i,int lag)4106 void series_set_lag (DATASET *dset, int i, int lag)
4107 {
4108     if (i > 0 && i < dset->v) {
4109 	dset->varinfo[i]->lag = lag;
4110     }
4111 }
4112 
series_set_stack_level(DATASET * dset,int i,int level)4113 void series_set_stack_level (DATASET *dset, int i, int level)
4114 {
4115     if (i > 0 && i < dset->v) {
4116 	dset->varinfo[i]->stack_level = level;
4117     }
4118 }
4119 
series_increment_stack_level(DATASET * dset,int i)4120 void series_increment_stack_level (DATASET *dset, int i)
4121 {
4122     if (i > 0 && i < dset->v) {
4123 	dset->varinfo[i]->stack_level += 1;
4124     }
4125 }
4126 
series_decrement_stack_level(DATASET * dset,int i)4127 void series_decrement_stack_level (DATASET *dset, int i)
4128 {
4129     if (i > 0 && i < dset->v) {
4130 	dset->varinfo[i]->stack_level -= 1;
4131     }
4132 }
4133 
series_delete_metadata(DATASET * dset,int i)4134 void series_delete_metadata (DATASET *dset, int i)
4135 {
4136     if (i > 0 && i < dset->v &&
4137 	dset->varinfo != NULL &&
4138 	dset->varinfo[i] != NULL) {
4139 	dset->varinfo[i]->lag = 0;
4140 	dset->varinfo[i]->transform = 0;
4141 	dset->varinfo[i]->parent[0] = '\0';
4142     }
4143 }
4144 
series_ensure_level_zero(DATASET * dset)4145 void series_ensure_level_zero (DATASET *dset)
4146 {
4147     if (dset != NULL) {
4148 	int i, n = 0;
4149 
4150 	for (i=1; i<dset->v; i++) {
4151 	    if (dset->varinfo[i]->stack_level > 0) {
4152 		dset->varinfo[i]->stack_level = 0;
4153 		n++;
4154 	    }
4155 	}
4156 #if 0
4157 	if (n > 0) {
4158 	    fprintf(stderr, "Unauthorized access to series detected!\n");
4159 	}
4160 #endif
4161     }
4162 }
4163 
series_attach_string_table(DATASET * dset,int i,series_table * st)4164 void series_attach_string_table (DATASET *dset, int i,
4165 				 series_table *st)
4166 {
4167     if (dset != NULL && i > 0 && i < dset->v) {
4168 	series_set_discrete(dset, i, 1);
4169 	dset->varinfo[i]->st = st;
4170     }
4171 }
4172 
series_destroy_string_table(DATASET * dset,int i)4173 void series_destroy_string_table (DATASET *dset, int i)
4174 {
4175     if (dset != NULL && i > 0 && i < dset->v) {
4176 	series_table_destroy(dset->varinfo[i]->st);
4177 	dset->varinfo[i]->st = NULL;
4178     }
4179 }
4180 
4181 /**
4182  * is_string_valued:
4183  * @dset: pointer to dataset.
4184  * @i: index number of series.
4185  *
4186  * Returns: 1 if series @i has a table of string values
4187  * (that is, a mapping from numerical values to associated
4188  * string values), otherwise 0.
4189  */
4190 
is_string_valued(const DATASET * dset,int i)4191 int is_string_valued (const DATASET *dset, int i)
4192 {
4193     if (dset != NULL && i > 0 && i < dset->v) {
4194 	return dset->varinfo[i]->st != NULL;
4195     } else {
4196 	return 0;
4197     }
4198 }
4199 
4200 /**
4201  * series_get_string_table:
4202  * @dset: pointer to dataset.
4203  * @i: index number of series.
4204  *
4205  * Returns: the string table attched to series @i or NULL if
4206  * there is no such table.
4207  */
4208 
series_get_string_table(const DATASET * dset,int i)4209 series_table *series_get_string_table (const DATASET *dset, int i)
4210 {
4211     if (dset != NULL && i > 0 && i < dset->v) {
4212 	return dset->varinfo[i]->st;
4213     } else {
4214 	return NULL;
4215     }
4216 }
4217 
4218 /**
4219  * series_get_string_for_obs:
4220  * @dset: pointer to dataset.
4221  * @i: index number of series.
4222  * @t: 0-based index of observation.
4223  *
4224  * Returns: the string associated with the numerical value of
4225  * series @i at observation @t, or NULL if there is no such string.
4226  * Note that NULL will be returned if the observation is missing.
4227  */
4228 
series_get_string_for_obs(const DATASET * dset,int i,int t)4229 const char *series_get_string_for_obs (const DATASET *dset, int i,
4230 				       int t)
4231 {
4232     const char *ret = NULL;
4233 
4234     if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
4235 	ret = series_table_get_string(dset->varinfo[i]->st,
4236 				      dset->Z[i][t]);
4237     }
4238 
4239     return ret;
4240 }
4241 
4242 /**
4243  * series_get_string_for_value:
4244  * @dset: pointer to dataset.
4245  * @i: index number of series.
4246  * @val: the value to look up.
4247  *
4248  * Returns: the string associated with numerical value @val of
4249  * series @i, or NULL if there is no such string.
4250  */
4251 
series_get_string_for_value(const DATASET * dset,int i,double val)4252 const char *series_get_string_for_value (const DATASET *dset, int i,
4253 					 double val)
4254 {
4255     const char *ret = NULL;
4256 
4257     if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
4258 	ret = series_table_get_string(dset->varinfo[i]->st, val);
4259     }
4260 
4261     return ret;
4262 }
4263 
4264 /**
4265  * series_set_string_val:
4266  * @dset: pointer to dataset.
4267  * @i: index number of series.
4268  * @t: 0-based index of observation.
4269  * @s: the string value to set.
4270  *
4271  * Attempts to set the string value for observation @t of series @i
4272  * to @s. This will fail if the series in question does not have
4273  * an associated table of string values.
4274  *
4275  * Returns: 0 on success, non-zero code on error.
4276  */
4277 
series_set_string_val(DATASET * dset,int i,int t,const char * s)4278 int series_set_string_val (DATASET *dset, int i, int t, const char *s)
4279 {
4280     int err = 0;
4281 
4282     if (i <= 0 || i >= dset->v) {
4283 	err = E_DATA;
4284     } else if (dset->varinfo[i]->st == NULL) {
4285 	err = E_TYPES;
4286     } else {
4287 	series_table *st = dset->varinfo[i]->st;
4288 	double x = series_table_get_value(st, s);
4289 
4290 	if (na(x)) {
4291 	    int k = series_table_add_string(st, s);
4292 
4293 	    if (k < 0) {
4294 		err = E_ALLOC;
4295 	    } else {
4296 		dset->Z[i][t] = k;
4297 	    }
4298 	} else {
4299 	    dset->Z[i][t] = x;
4300 	}
4301     }
4302 
4303     return err;
4304 }
4305 
4306 /**
4307  * string_series_assign_value:
4308  * @dset: pointer to dataset.
4309  * @i: index number of string-valued series.
4310  * @t: 0-based index of observation.
4311  * @x: the numeric value to set.
4312  *
4313  * Attempts to set the value for observation @t of series @i
4314  * to @x. This will fail if the @x falls outside of the range
4315  * of values supported by the string table for the series.
4316  *
4317  * Returns: 0 on success, non-zero code on error.
4318  */
4319 
string_series_assign_value(DATASET * dset,int i,int t,double x)4320 int string_series_assign_value (DATASET *dset, int i,
4321 				int t, double x)
4322 {
4323     series_table *st = NULL;
4324     int err = 0;
4325 
4326     if (i <= 0 || i >= dset->v) {
4327 	err = E_DATA;
4328     } else if (na(x)) {
4329 	dset->Z[i][t] = x;
4330     } else if (x != floor(x)) {
4331 	err = E_TYPES;
4332     } else if ((st = dset->varinfo[i]->st) == NULL) {
4333 	err = E_TYPES;
4334     } else if (series_table_get_string(st, x) == NULL) {
4335 	err = E_DATA;
4336     } else {
4337 	dset->Z[i][t] = x;
4338     }
4339 
4340     return err;
4341 }
4342 
4343 /**
4344  * series_decode_string:
4345  * @dset: pointer to dataset.
4346  * @i: index number of series.
4347  * @s: string to decode.
4348  *
4349  * Returns: the numerical value associated with the string
4350  * @s for series @i, or #NADBL if there's no such value.
4351  */
4352 
series_decode_string(const DATASET * dset,int i,const char * s)4353 double series_decode_string (const DATASET *dset, int i, const char *s)
4354 {
4355     double ret = NADBL;
4356 
4357     if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
4358 	ret = series_table_get_value(dset->varinfo[i]->st, s);
4359     }
4360 
4361     return ret;
4362 }
4363 
4364 /**
4365  * series_get_string_vals:
4366  * @dset: pointer to dataset.
4367  * @i: index number of series.
4368  * @n_strs: location to receive the number of strings, or NULL.
4369  * @subsample: non-zero to restrict to current sample range.
4370  *
4371  * Returns: the array of strings associated with distinct numerical
4372  * values of series @i, or NULL if there's no such array. The returned
4373  * array should not be modified in any way; copy the strings first if
4374  * you need to modify them.
4375  */
4376 
series_get_string_vals(const DATASET * dset,int i,int * n_strs,int subsample)4377 char **series_get_string_vals (const DATASET *dset, int i,
4378 			       int *n_strs, int subsample)
4379 {
4380     char **strs = NULL;
4381     int n = 0;
4382 
4383     if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
4384 	strs = series_table_get_strings(dset->varinfo[i]->st, &n);
4385     }
4386 
4387     if (strs != NULL && subsample && dataset_is_subsampled(dset)) {
4388 	static char **substrs = NULL;
4389 	const double *x = dset->Z[i] + dset->t1;
4390 	int T = dset->t2 - dset->t1 + 1;
4391 	gretl_matrix *valid;
4392 	int err = 0;
4393 
4394 	if (substrs != NULL) {
4395 	    free(substrs);
4396 	    substrs = NULL;
4397 	}
4398 	valid = gretl_matrix_values(x, T, OPT_NONE, &err);
4399 	if (err) {
4400 	    strs = NULL;
4401 	    n = 0;
4402 	} else {
4403 	    int j, k, nv = valid->rows;
4404 
4405 	    substrs = strings_array_new(nv);
4406 	    for (j=0; j<nv; j++) {
4407 		k = gretl_vector_get(valid, j) - 1;
4408 		substrs[j] = strs[k];
4409 	    }
4410 	    strs = substrs;
4411 	    n = nv;
4412 	    gretl_matrix_free(valid);
4413 	}
4414     }
4415 
4416     if (n_strs != NULL) {
4417 	*n_strs = n;
4418     }
4419 
4420     return strs;
4421 }
4422 
4423 /**
4424  * series_get_string_width:
4425  * @dset: pointer to dataset.
4426  * @i: index number of series.
4427  *
4428  * Returns: the maximum of (a) the number of characters in the
4429  * name of series @i and (b) the number of bytes in the longest
4430  * "string value" attached to series @i, if applicable; or 0
4431  * if @i is not a valid series index.
4432  */
4433 
series_get_string_width(const DATASET * dset,int i)4434 int series_get_string_width (const DATASET *dset, int i)
4435 {
4436     const char *lname;
4437     int n = 0;
4438 
4439     if (i > 0 && i < dset->v) {
4440 	n = strlen(dset->varname[i]);
4441 	if (dset->varinfo[i]->flags & VAR_LISTARG) {
4442 	    lname = series_get_list_parent(i);
4443 	    if (lname != NULL) {
4444 		n += strlen(lname) + 1;
4445 	    }
4446 	}
4447 	if (dset->varinfo[i]->st != NULL) {
4448 	    char **S;
4449 	    int j, ns, m;
4450 
4451 	    S = series_table_get_strings(dset->varinfo[i]->st, &ns);
4452 	    for (j=0; j<ns; j++) {
4453 		m = g_utf8_strlen(S[j], -1);
4454 		if (m > n) {
4455 		    n = m;
4456 		}
4457 	    }
4458 	}
4459     }
4460 
4461     return n;
4462 }
4463 
4464 /**
4465  * steal_string_table:
4466  * @l_dset: pointer to recipient dataset.
4467  * @lvar: index number of target series.
4468  * @r_dset: pointer to donor dataset.
4469  * @rvar: index number of source series.
4470  *
4471  * Detaches the string table from @rvar in @r_dset and attaches it
4472  * to @lvar in @l_dset,
4473  *
4474  * Returns: 0 on success, non-zero code on error.
4475  */
4476 
steal_string_table(DATASET * l_dset,int lvar,DATASET * r_dset,int rvar)4477 int steal_string_table (DATASET *l_dset, int lvar,
4478 			DATASET *r_dset, int rvar)
4479 {
4480     if (l_dset != r_dset || lvar != rvar) {
4481 	l_dset->varinfo[lvar]->st = r_dset->varinfo[rvar]->st;
4482 	r_dset->varinfo[rvar]->st = NULL;
4483 	series_set_discrete(l_dset, lvar, 1);
4484     }
4485 
4486     return 0;
4487 }
4488 
4489 /**
4490  * merge_string_tables:
4491  * @l_dset: pointer to current dataset.
4492  * @lvar: index number of target series.
4493  * @r_dset: pointer to dataset to be appended.
4494  * @rvar: index number of source series.
4495  *
4496  * Translates the encoding of the string-values of series @rvar
4497  * in @r_dset to that of series @lvar in @l_dset, adding extra
4498  * strings as needed.
4499  *
4500  * Returns: 0 on success, non-zero code on error.
4501  */
4502 
merge_string_tables(DATASET * l_dset,int lvar,DATASET * r_dset,int rvar)4503 int merge_string_tables (DATASET *l_dset, int lvar,
4504 			 DATASET *r_dset, int rvar)
4505 {
4506     series_table *lst = l_dset->varinfo[lvar]->st;
4507     double dx, *x = r_dset->Z[rvar];
4508     const char *sr;
4509     int t, idx, err = 0;
4510 
4511     for (t=0; t<r_dset->n && !err; t++) {
4512 	if (na(x[t])) {
4513 	    continue;
4514 	}
4515 	/* get the right-hand side string associated with x[t] */
4516 	sr = series_get_string_for_value(r_dset, rvar, x[t]);
4517 	/* and look up its numeric code on the left */
4518 	dx = series_decode_string(l_dset, lvar, sr);
4519 	if (!na(dx)) {
4520 	    /* got a match: apply the code from @lst */
4521 	    x[t] = dx;
4522 	} else {
4523 	    /* no match: we need to add a string to @lst */
4524 	    idx = series_table_add_string(lst, sr);
4525 	    if (idx < 0) {
4526 		err = E_ALLOC;
4527 	    } else {
4528 		x[t] = (double) idx;
4529 	    }
4530 	}
4531     }
4532 
4533     return err;
4534 }
4535 
maybe_adjust_label(DATASET * dset,int v,char ** S,int ns)4536 static void maybe_adjust_label (DATASET *dset, int v,
4537 				char **S, int ns)
4538 {
4539     int i, len = 3 * ns; /* "=" + ", " */
4540     char *tmp;
4541 
4542     for (i=0; i<ns; i++) {
4543 	len += strlen(S[i]) + 1 + floor(log10(1.0 + i));
4544     }
4545 
4546     /* let's not create a super-long series label */
4547     if (len > 255) {
4548 	return;
4549     }
4550 
4551     tmp = calloc(len + 1, 1);
4552 
4553     if (tmp != NULL) {
4554 	char bit[16];
4555 
4556 	for (i=0; i<ns; i++) {
4557 	    sprintf(bit, "%d=", i+1);
4558 	    strcat(tmp, bit);
4559 	    strcat(tmp, S[i]);
4560 	    if (i < ns - 1) {
4561 		strcat(tmp, ", ");
4562 	    }
4563 	}
4564 	copy_label(dset->varinfo[v], tmp);
4565 	free(tmp);
4566     }
4567 }
4568 
4569 /* Encode the strings in @a into numerical values in series
4570    @v of dataset @dset. "Return" via @pU the array of unique
4571    string values and via @pnu the number of such values.
4572 */
4573 
alt_set_strvals(DATASET * dset,int v,gretl_array * a,char *** pU,int * pnu)4574 static int alt_set_strvals (DATASET *dset, int v, gretl_array *a,
4575 			    char ***pU, int *pnu)
4576 {
4577     char **S, **U = NULL;
4578     double *x = dset->Z[v];
4579     int i, pos, ns, nu = 0;
4580     int err = 0;
4581 
4582     S = gretl_array_get_strings(a, &ns);
4583 
4584     for (i=0; i<ns && !err; i++) {
4585 	err = strings_array_add_uniq(&U, &nu, S[i], &pos);
4586 	if (!err) {
4587 	    x[i] = pos + 1;
4588 	}
4589     }
4590 
4591     if (!err) {
4592 	*pU = U;
4593 	*pnu = nu;
4594     } else if (U != NULL) {
4595 	strings_array_free(U, nu);
4596     }
4597 
4598     return err;
4599 }
4600 
alt_strvals_case(DATASET * dset,int v,gretl_array * a)4601 static int alt_strvals_case (DATASET *dset, int v, gretl_array *a)
4602 {
4603     double *x = dset->Z[v];
4604     int i;
4605 
4606     if (gretl_array_get_length(a) != dset->n) {
4607 	return 0;
4608     }
4609 
4610     for (i=0; i<dset->n; i++) {
4611 	if (x[i] != 0) {
4612 	    return 0;
4613 	}
4614     }
4615 
4616     return 1;
4617 }
4618 
4619 /* here we're trying to set strings values on a series from
4620    scratch */
4621 
series_set_string_vals(DATASET * dset,int i,gretl_array * a)4622 int series_set_string_vals (DATASET *dset, int i, gretl_array *a)
4623 {
4624     gretl_matrix *vals = NULL;
4625     char **S = NULL;
4626     int ns = 0;
4627     int err = 0;
4628 
4629     if (a == NULL || dset == NULL || i < 1 || i >= dset->v) {
4630 	return E_DATA;
4631     }
4632 
4633     if (alt_strvals_case(dset, i, a)) {
4634 	err = alt_set_strvals(dset, i, a, &S, &ns);
4635 	if (err) {
4636 	    return err;
4637 	} else {
4638 	    goto do_strtable;
4639 	}
4640     }
4641 
4642     /* get sorted vector of unique values */
4643     vals = gretl_matrix_values(dset->Z[i], dset->n, OPT_S, &err);
4644 
4645     if (!err) {
4646 	int i, nvals = gretl_vector_get_length(vals);
4647 	double x0 = gretl_vector_get(vals, 0);
4648 	double x1 = gretl_vector_get(vals, nvals - 1);
4649 
4650 	if (x0 < 1.0) {
4651 	    gretl_errmsg_set("The minimum value of the target series "
4652 			     "must be >= 1");
4653 	    err = E_DATA;
4654 	} else {
4655 	    /* the values should all be integers */
4656 	    for (i=0; i<nvals && !err; i++) {
4657 		x1 = gretl_vector_get(vals, i);
4658 		if (x1 != floor(x1)) {
4659 		    gretl_errmsg_set("The series values must be integers");
4660 		    err = E_DATA;
4661 		}
4662 	    }
4663 	}
4664 
4665 	if (!err) {
4666 	    S = gretl_array_get_stringify_strings(a, (int) x1, &ns, &err);
4667 	}
4668 
4669 	if (!err) {
4670 	    /* the strings should all be UTF-8 */
4671 	    for (i=0; i<ns && !err; i++) {
4672 		if (!g_utf8_validate(S[i], -1, NULL)) {
4673 		    gretl_errmsg_sprintf("String %d is not valid UTF-8", i+1);
4674 		    err = E_DATA;
4675 		}
4676 	    }
4677 	}
4678     }
4679 
4680  do_strtable:
4681 
4682     if (!err) {
4683 	series_table *st = series_table_new(S, ns, &err);
4684 
4685 	if (!err) {
4686 	    if (dset->varinfo[i]->st != NULL) {
4687 		/* remove any pre-existing table */
4688 		series_table_destroy(dset->varinfo[i]->st);
4689 	    }
4690 	    series_set_discrete(dset, i, 1);
4691 	    dset->varinfo[i]->st = st;
4692 	    maybe_adjust_label(dset, i, S, ns);
4693 	}
4694     }
4695 
4696     if (err && S != NULL && ns > 0) {
4697 	strings_array_free(S, ns);
4698     }
4699 
4700     gretl_matrix_free(vals);
4701 
4702     return err;
4703 }
4704 
4705 /* The pre-checked case: we know that series @i is suitable
4706    for stringifying, and that @S contains the right number of
4707    strings.
4708 */
4709 
series_set_string_vals_direct(DATASET * dset,int i,char ** S,int ns)4710 int series_set_string_vals_direct (DATASET *dset, int i,
4711 				   char **S, int ns)
4712 {
4713     int err = 0;
4714     series_table *st = series_table_new(S, ns, &err);
4715 
4716     if (!err) {
4717 	if (dset->varinfo[i]->st != NULL) {
4718 	    /* remove any pre-existing table */
4719 	    series_table_destroy(dset->varinfo[i]->st);
4720 	}
4721 	series_set_discrete(dset, i, 1);
4722 	dset->varinfo[i]->st = st;
4723 	maybe_adjust_label(dset, i, S, ns);
4724     }
4725 
4726     if (err && S != NULL && ns > 0) {
4727 	strings_array_free(S, ns);
4728     }
4729 
4730     return err;
4731 }
4732 
4733 /**
4734  * series_recode_strings:
4735  * @dset: pointer to dataset.
4736  * @v: index number of target string-valued series.
4737  * @opt: may contain OPT_P (see below).
4738  * @changed: location to receive "changed" feedback, or NULL.
4739  *
4740  * This function "trims" the array of string values associated
4741  * with series @v so that it contains no redundant elements --
4742  * that is, values of which there is no instance in the
4743  * current sample -- and resets the numeric codes for the
4744  * strings if necessary.
4745  *
4746  * By default the original "series_table" attached to series @v
4747  * is destroyed, but if @opt contains OPT_P it is replaced but
4748  * not freed; this make sense only if another pointer to the
4749  * original table exists.
4750  *
4751  * If it happens that the current sample contains
4752  * instances of all the strings in the full dataset, this
4753  * function will not actually make any changes to @dset. The
4754  * @changed argument provides a means of determining
4755  * whether any change has been made.
4756  *
4757  * Returns: 0 on success, non-zero code on error.
4758  */
4759 
series_recode_strings(DATASET * dset,int v,gretlopt opt,int * changed)4760 int series_recode_strings (DATASET *dset, int v, gretlopt opt,
4761 			   int *changed)
4762 {
4763     double *x = dset->Z[v] + dset->t1;
4764     int n = sample_size(dset);
4765     gretl_matrix *vals = NULL;
4766     gretl_matrix *repl = NULL;
4767     char **S = NULL;
4768     const char *si;
4769     int ns, nu = 0;
4770     int err = 0;
4771 
4772     if (changed != NULL) {
4773 	*changed = 0;
4774     }
4775 
4776     ns = series_table_get_n_strings(dset->varinfo[v]->st);
4777     vals = gretl_matrix_values(x, n, OPT_NONE, &err);
4778 
4779     if (!err) {
4780 	/* number of unique values */
4781 	nu = vals->rows;
4782 	if (nu == ns) {
4783 	    /* nothing to be done */
4784 	    gretl_matrix_free(vals);
4785 	    return 0;
4786 	}
4787 	repl = gretl_zero_matrix_new(nu, 1);
4788 	S = strings_array_new(nu);
4789 	if (repl == NULL || S == NULL) {
4790 	    free(S);
4791 	    err = E_ALLOC;
4792 	}
4793     }
4794 
4795     if (!err) {
4796 	int i;
4797 
4798 	for (i=0; i<nu; i++) {
4799 	    si = series_get_string_for_value(dset, v, vals->val[i]);
4800 	    S[i] = gretl_strdup(si);
4801 	    repl->val[i] = i + 1;
4802 	}
4803 
4804 	substitute_values(x, x, n, vals->val, nu, repl->val, nu);
4805 
4806 	if (!(opt & OPT_P)) {
4807 	    series_table_destroy(dset->varinfo[v]->st);
4808 	}
4809 	/* the series table takes ownership of @S */
4810 	dset->varinfo[v]->st = series_table_new(S, nu, &err);
4811 
4812 	if (changed != NULL) {
4813 	    *changed = 1;
4814 	}
4815     }
4816 
4817     gretl_matrix_free(vals);
4818     gretl_matrix_free(repl);
4819 
4820     return err;
4821 }
4822 
set_panel_groups_name(DATASET * dset,const char * vname)4823 int set_panel_groups_name (DATASET *dset, const char *vname)
4824 {
4825     if (dset->pangrps != NULL) {
4826 	free(dset->pangrps);
4827     }
4828 
4829     dset->pangrps = gretl_strdup(vname);
4830 
4831     return (dset->pangrps == NULL)? E_ALLOC : 0;
4832 }
4833 
4834 /* This should be called only after the "group names"
4835    property of @dset has been (recently) validated, via
4836    panel_group_names_ok().
4837 */
4838 
get_panel_group_name(const DATASET * dset,int obs)4839 const char *get_panel_group_name (const DATASET *dset, int obs)
4840 {
4841     const char *s = NULL;
4842 
4843     if (dataset_is_panel(dset) && dset->pangrps != NULL &&
4844 	obs >= 0 && obs < dset->n) {
4845 	int v = current_series_index(dset, dset->pangrps);
4846 	series_table *st;
4847 
4848 	if ((st = series_get_string_table(dset, v)) != NULL) {
4849 	    s = series_table_get_string(st, dset->Z[v][obs]);
4850 	}
4851     }
4852 
4853     return (s != NULL)? s : "??";
4854 }
4855 
panel_group_names_ok(const DATASET * dset,int maxlen)4856 int panel_group_names_ok (const DATASET *dset, int maxlen)
4857 {
4858     int ok = 0;
4859 
4860     if (dataset_is_panel(dset) && dset->pangrps != NULL) {
4861 	int ns, v = current_series_index(dset, dset->pangrps);
4862 
4863 	if (v > 0 && v < dset->v) {
4864 	    char **S = series_get_string_vals(dset, v, &ns, 0);
4865 
4866 	    if (S != NULL && ns >= dset->n / dset->pd) {
4867 		ok = 1; /* provisional */
4868 		if (maxlen > 0) {
4869 		    int i;
4870 
4871 		    for (i=0; i<ns; i++) {
4872 			if (strlen(S[i]) > maxlen) {
4873 			    ok = 0;
4874 			    break;
4875 			}
4876 		    }
4877 		}
4878 	    }
4879 	}
4880     }
4881 
4882     return ok;
4883 }
4884 
panel_group_names_varname(const DATASET * dset)4885 const char *panel_group_names_varname (const DATASET *dset)
4886 {
4887     if (dataset_is_panel(dset) && dset->pangrps != NULL) {
4888 	int ns, v = current_series_index(dset, dset->pangrps);
4889 
4890 	if (v > 0 && v < dset->v) {
4891 	    char **S = series_get_string_vals(dset, v, &ns, 0);
4892 
4893 	    if (S != NULL) {
4894 		int ng = dset->n / dset->pd;
4895 
4896 		if (ns >= ng) {
4897 		    return dset->pangrps;
4898 		}
4899 	    }
4900 	}
4901     }
4902 
4903     return NULL;
4904 }
4905 
is_panel_group_names_series(const DATASET * dset,int v)4906 int is_panel_group_names_series (const DATASET *dset, int v)
4907 {
4908     if (dataset_is_panel(dset) && dset->pangrps != NULL) {
4909 	return v == current_series_index(dset, dset->pangrps);
4910     } else {
4911 	return 0;
4912     }
4913 }
4914 
suitable_group_names_series(const DATASET * dset,int maxlen,int exclude)4915 static int suitable_group_names_series (const DATASET *dset,
4916 					int maxlen,
4917 					int exclude)
4918 {
4919     int i, vfound = 0;
4920 
4921     for (i=1; i<dset->v && !vfound; i++) {
4922 	if (i == exclude) {
4923 	    continue;
4924 	}
4925 	if (is_string_valued(dset, i)) {
4926 	    int ns = 0;
4927 	    char **S = series_get_string_vals(dset, i, &ns, 0);
4928 
4929 	    if (S != NULL && ns >= dset->n / dset->pd) {
4930 		const char *sbak = NULL;
4931 		int t, u, ubak = -1;
4932 		int fail = 0;
4933 
4934 		for (t=dset->t1; t<=dset->t2 && !fail; t++) {
4935 		    const char *st = series_get_string_for_obs(dset, i, t);
4936 
4937 		    u = t / dset->pd;
4938 		    if (st == NULL || sbak == NULL) {
4939 			fail = 1;
4940 		    } else if (u == ubak && strcmp(st, sbak)) {
4941 			/* same unit, different label: no */
4942 			fail = 1;
4943 		    } else if (ubak >= 0 && u != ubak && !strcmp(st, sbak)) {
4944 			/* different unit, same label: no */
4945 			fail = 2;
4946 		    }
4947 		    if (!fail && maxlen > 0 && strlen(st) > maxlen) {
4948 			fail = 1;
4949 		    }
4950 		    ubak = u;
4951 		    sbak = st;
4952 		}
4953 		if (!fail) {
4954 		    vfound = i;
4955 		}
4956 	    }
4957 	}
4958     }
4959 
4960     return vfound;
4961 }
4962 
4963 /* For plotting purposes, try to get labels for panel groups,
4964    subject to the constraint that they should be no longer
4965    than @maxlen. If successful, this will return an array of
4966    at least N strings, where N is the cross-sectional
4967    dimension of the panel. This array should be treated as
4968    read-only.
4969 */
4970 
get_panel_group_table(const DATASET * dset,int maxlen,int * pv)4971 series_table *get_panel_group_table (const DATASET *dset,
4972 				     int maxlen, int *pv)
4973 {
4974     series_table *st = NULL;
4975     int vpg = 0;
4976 
4977     if (dset->pangrps != NULL) {
4978 	vpg = current_series_index(dset, dset->pangrps);
4979     }
4980 
4981     /* first see if we have valid group labels set explicitly */
4982     if (vpg > 0 && panel_group_names_ok(dset, maxlen)) {
4983 	st = dset->varinfo[vpg]->st;
4984     }
4985 
4986     if (st == NULL) {
4987 	/* can we find a suitable string-valued series? */
4988 	int altv = suitable_group_names_series(dset, maxlen, vpg);
4989 
4990 	if (altv > 0) {
4991 	    vpg = altv;
4992 	    st = dset->varinfo[vpg]->st;
4993 	}
4994     }
4995 
4996     *pv = (st != NULL)? vpg : 0;
4997 
4998     return st;
4999 }
5000 
is_dataset_series(const DATASET * dset,const double * x)5001 int is_dataset_series (const DATASET *dset, const double *x)
5002 {
5003     int i;
5004 
5005     for (i=dset->v-1; i>=0; i--) {
5006 	if (x == dset->Z[i]) {
5007 	    return 1;
5008 	}
5009     }
5010 
5011     return 0;
5012 }
5013 
effective_daily_skip(int delta,int wd,int pd)5014 static int effective_daily_skip (int delta, int wd, int pd)
5015 {
5016     int k, skip = delta - 1;
5017 
5018     if (pd < 7) {
5019 	skip = 0;
5020 	for (k=1; k<delta; k++) {
5021 	    wd = (wd == 0)? 6 : wd - 1;
5022 	    if (pd == 6) {
5023 		skip += (wd != 0);
5024 	    } else {
5025 		skip += (wd != 0 && wd != 6);
5026 	    }
5027 	}
5028     }
5029 
5030     return skip;
5031 }
5032 
5033 /* If we get here we've already checked that @dset is dated daily
5034    data, and that @pd is a valid daily periodicity greater than or
5035    equal to the current dset->pd.
5036 */
5037 
pad_daily_data(DATASET * dset,int pd,PRN * prn)5038 static int pad_daily_data (DATASET *dset, int pd, PRN *prn)
5039 {
5040     DATASET *bigset = NULL;
5041     char datestr[OBSLEN];
5042     guint32 ed, ed0 = 0, edbak = 0;
5043     int wd, skip, totskip = 0;
5044     int t, err = 0;
5045 
5046     for (t=0; t<dset->n; t++) {
5047 	ntolabel(datestr, t, dset);
5048 	if (t == 0) {
5049 	    ed0 = edbak = get_epoch_day(datestr);
5050 	} else {
5051 	    wd = weekday_from_date(datestr);
5052 	    ed = get_epoch_day(datestr);
5053 	    skip = effective_daily_skip(ed - edbak, wd, pd);
5054 	    totskip += skip;
5055 	    edbak = ed;
5056 	}
5057     }
5058 
5059     if (totskip == 0) {
5060 	pprintf(prn, "Dataset is already complete for %d-day calendar", pd);
5061 	return 0;
5062     }
5063 
5064     /* We pass OPT_R here to avoid allocating varnames in @bigset,
5065        since we're going to preserve these from @dset.
5066     */
5067     bigset = real_create_new_dataset(dset->v, dset->n + totskip, OPT_R);
5068 
5069     if (bigset == NULL) {
5070 	err = E_ALLOC;
5071     } else {
5072 	int i, s = 0;
5073 
5074 	edbak = ed0;
5075 
5076 	for (t=0; t<dset->n; t++) {
5077 	    if (t > 0) {
5078 		ntolabel(datestr, t, dset);
5079 		wd = weekday_from_date(datestr);
5080 		ed = get_epoch_day(datestr);
5081 		s += 1 + effective_daily_skip(ed - edbak, wd, pd);
5082 		edbak = ed;
5083 	    }
5084 	    for (i=1; i<dset->v; i++) {
5085 		bigset->Z[i][s] = dset->Z[i][t];
5086 	    }
5087 	}
5088 
5089 	bigset->varname = dset->varname;
5090 	bigset->varinfo = dset->varinfo;
5091 	bigset->descrip = dset->descrip;
5092 
5093 	bigset->pd = pd;
5094 	bigset->structure = TIME_SERIES;
5095 	bigset->sd0 = (double) ed0;
5096 	strcpy(bigset->stobs, dset->stobs);
5097 	ntolabel(bigset->endobs, bigset->n - 1, bigset);
5098 
5099 	dset->varname = NULL;
5100 	dset->varinfo = NULL;
5101 	dset->descrip = NULL;
5102 	dataset_destroy_obs_markers(dset);
5103 	free_Z(dset);
5104 	clear_datainfo(dset, CLEAR_SUBSAMPLE);
5105 
5106 	*dset = *bigset;
5107 	free(bigset); /* avoid leaking memory */
5108     }
5109 
5110     return err;
5111 }
5112 
5113 /* MIDAS-related functions */
5114 
5115 /* postprocess: fill missing slots in daily data array
5116    with the period (month or quarter) average
5117    (FIXME support interpolation as an option?)
5118 */
5119 
postprocess_daily_data(DATASET * dset,const int * list)5120 int postprocess_daily_data (DATASET *dset, const int *list)
5121 {
5122     double *x, xbar, xsum;
5123     int t, i, n_ok, n_miss;
5124     int err = 0;
5125 
5126     for (t=dset->t1; t<=dset->t2; t++) {
5127 	xsum = 0.0;
5128 	n_ok = n_miss = 0;
5129 	for (i=1; i<=list[0]; i++) {
5130 	    x = dset->Z[list[i]];
5131 	    if (na(x[t])) {
5132 		n_miss++;
5133 	    } else {
5134 		xsum += x[t];
5135 		n_ok++;
5136 	    }
5137 	}
5138 	if (n_miss > 0 && n_ok > 0) {
5139 	    xbar = xsum / n_ok;
5140 	    for (i=1; i<=list[0]; i++) {
5141 		x = dset->Z[list[i]];
5142 		if (na(x[t])) {
5143 		    x[t] = xbar;
5144 		}
5145 	    }
5146 	}
5147     }
5148 
5149     return err;
5150 }
5151 
series_get_midas_period(const DATASET * dset,int i)5152 int series_get_midas_period (const DATASET *dset, int i)
5153 {
5154     if (i > 0 && i < dset->v) {
5155 	return dset->varinfo[i]->midas_period;
5156     }
5157 
5158     return 0;
5159 }
5160 
series_set_midas_period(const DATASET * dset,int i,int period)5161 void series_set_midas_period (const DATASET *dset, int i,
5162 			      int period)
5163 {
5164     if (i > 0 && i < dset->v) {
5165 	dset->varinfo[i]->midas_period = period;
5166     }
5167 }
5168 
series_get_midas_freq(const DATASET * dset,int i)5169 int series_get_midas_freq (const DATASET *dset, int i)
5170 {
5171     if (i > 0 && i < dset->v) {
5172 	return dset->varinfo[i]->midas_freq;
5173     }
5174 
5175     return 0;
5176 }
5177 
series_set_midas_freq(const DATASET * dset,int i,int freq)5178 int series_set_midas_freq (const DATASET *dset, int i,
5179 			   int freq)
5180 {
5181     int err = 0;
5182 
5183     if (i > 0 && i < dset->v) {
5184 	if (freq < 5 || freq > 12) {
5185 	    err = E_DATA;
5186 	} else {
5187 	    dset->varinfo[i]->midas_freq = freq;
5188 	}
5189     } else {
5190 	err = E_DATA;
5191     }
5192 
5193     return err;
5194 }
5195 
series_is_midas_anchor(const DATASET * dset,int i)5196 int series_is_midas_anchor (const DATASET *dset, int i)
5197 {
5198     if (i > 0 && i < dset->v &&
5199 	(dset->varinfo[i]->flags & VAR_HFANCHOR)) {
5200 	return dset->varinfo[i]->midas_period;
5201     }
5202 
5203     return 0;
5204 }
5205 
series_set_midas_anchor(const DATASET * dset,int i)5206 void series_set_midas_anchor (const DATASET *dset, int i)
5207 {
5208     if (i > 0 && i < dset->v) {
5209 	dset->varinfo[i]->flags |= VAR_HFANCHOR;
5210     }
5211 }
5212 
5213 /* end MIDAS-related functions */
5214 
series_get_orig_pd(const DATASET * dset,int i)5215 int series_get_orig_pd (const DATASET *dset, int i)
5216 {
5217     if (i > 0 && i < dset->v) {
5218 	return dset->varinfo[i]->orig_pd;
5219     } else {
5220 	return 0;
5221     }
5222 }
5223 
series_set_orig_pd(const DATASET * dset,int i,int pd)5224 void series_set_orig_pd (const DATASET *dset, int i, int pd)
5225 {
5226     if (i > 0 && i < dset->v) {
5227 	dset->varinfo[i]->orig_pd = pd;
5228     }
5229 }
5230 
series_unset_orig_pd(const DATASET * dset,int i)5231 void series_unset_orig_pd (const DATASET *dset, int i)
5232 {
5233     if (i > 0 && i < dset->v) {
5234 	dset->varinfo[i]->orig_pd = 0;
5235     }
5236 }
5237 
series_info_bundle(const DATASET * dset,int i,int * err)5238 gretl_bundle *series_info_bundle (const DATASET *dset,
5239 				  int i, int *err)
5240 {
5241     gretl_bundle *b = NULL;
5242 
5243     if (dset != NULL && i >= 0 && i < dset->v) {
5244 	b = gretl_bundle_new();
5245 	if (b == NULL) {
5246 	    *err = E_ALLOC;
5247 	}
5248     } else {
5249 	*err = E_DATA;
5250     }
5251 
5252     if (b != NULL) {
5253 	VARINFO *vinfo = dset->varinfo[i];
5254 
5255 	gretl_bundle_set_string(b, "name", dset->varname[i]);
5256 	if (vinfo->label != NULL) {
5257 	    gretl_bundle_set_string(b, "description", vinfo->label);
5258 	}
5259 	if (vinfo->display_name[0] != '\0') {
5260 	    gretl_bundle_set_string(b, "graph_name", vinfo->display_name);
5261 	}
5262 	gretl_bundle_set_int(b, "discrete", vinfo->flags & VAR_DISCRETE ?
5263 			     1 : 0);
5264 	gretl_bundle_set_int(b, "coded", vinfo->flags & VAR_CODED ?
5265 			     1 : 0);
5266 	gretl_bundle_set_string(b, "parent", vinfo->parent);
5267 	if (vinfo->transform > 0) {
5268 	    gretl_bundle_set_string(b, "transform",
5269 				    gretl_command_word(vinfo->transform));
5270 	} else {
5271 	    gretl_bundle_set_string(b, "transform", "none");
5272 	}
5273 	gretl_bundle_set_int(b, "lag", vinfo->lag);
5274 	gretl_bundle_set_int(b, "has_string_table", vinfo->st != NULL);
5275 	if (vinfo->midas_period > 0) {
5276 	    gretl_bundle_set_int(b, "midas_period", vinfo->midas_period);
5277 	}
5278 	if (vinfo->midas_freq > 0) {
5279 	    gretl_bundle_set_int(b, "midas_freq", vinfo->midas_freq);
5280 	}
5281 	if (vinfo->orig_pd > 0) {
5282 	    gretl_bundle_set_int(b, "orig_pd", vinfo->orig_pd);
5283 	}
5284     }
5285 
5286     return b;
5287 }
5288 
5289 /* Given a series label @s, see if it can be recognized
5290    as identifying the series as the product of two others,
5291    and if so write the names of the others into @targ1
5292    and @targ2.
5293 */
5294 
get_interaction_names(const char * s,char * targ1,char * targ2)5295 static int get_interaction_names (const char *s,
5296 				  char *targ1,
5297 				  char *targ2)
5298 {
5299     const char *p;
5300     int n1, n2, ret = 0;
5301 
5302     *targ1 = *targ2 = '\0';
5303 
5304     p = strchr(s, '*');
5305     if (p == NULL || strchr(p+1, '*') != NULL) {
5306 	/* the label string does not contain a single '*' */
5307 	return 0;
5308     }
5309 
5310     s += strspn(s, " ");
5311     n1 = gretl_namechar_spn(s);
5312     p++;
5313     p += strspn(p, " ");
5314     n2 = gretl_namechar_spn(p);
5315 
5316     if (n1 > 0 && n1 < VNAMELEN &&
5317 	n2 > 0 && n2 < VNAMELEN) {
5318 	strncat(targ1, s, n1);
5319 	strncat(targ2, p, n2);
5320 	ret = 1;
5321     }
5322 
5323     return ret;
5324 }
5325 
5326 /* Given a series label @s, see if it can be recognized as
5327    identifying the series as the square of another, and if
5328    so write the name of the other into @targ.
5329 */
5330 
get_square_parent_name(const char * s,char * targ,char * targ2)5331 static int get_square_parent_name (const char *s, char *targ,
5332 				   char *targ2)
5333 {
5334     const char *p;
5335     int n1, n2, ret = 0;
5336 
5337     *targ = '\0';
5338 
5339     if (*s == '=' && (p = strstr(s, "squared")) != NULL) {
5340 	/* "= PARENT squared" */
5341 	s++;
5342 	s += strspn(s, " ");
5343 	n1 = gretl_namechar_spn(s);
5344 	n2 = p - s - 1;
5345 	if (n1 > 0 && n1 < VNAMELEN && n2 == n1) {
5346 	    strncat(targ, s, n1);
5347 	    ret = 1;
5348 	}
5349     } else if (strchr(s, '^') != NULL) {
5350 	/* "PARENT^2" */
5351 	n1 = gretl_namechar_spn(s);
5352 	if (n1 > 0 && n1 < VNAMELEN) {
5353 	    p = s + n1;
5354 	    if (!strcmp(p, "^2")) {
5355 		strncat(targ, s, n1);
5356 		ret = 1;
5357 	    }
5358 	}
5359     } else if ((p = strstr(s, "square of ")) != NULL) {
5360 	p += 9;
5361 	p += strspn(p, " ");
5362 	n1 = gretl_namechar_spn(p);
5363 	if (n1 > 0 && n1 < VNAMELEN) {
5364 	    strncat(targ, p, n1);
5365 	    ret = 1;
5366 	}
5367     } else if (get_interaction_names(s, targ, targ2)) {
5368 	/* "x * x" ? */
5369 	if (!strcmp(targ, targ2)) {
5370 	    ret = 1;
5371 	}
5372     }
5373 
5374     return ret;
5375 }
5376 
5377 /* Given either (a) two series identified by ID numbers
5378    i, j where the second is supposed to be the square
5379    of the first, or (b) three series i, j, k where the
5380    third is supposed to be the product of the first two,
5381    check that the putative relationship actually holds
5382    over the current sample range. Return 1 if so, else 0.
5383 */
5384 
validate_relationship(int i,int j,int k,const DATASET * dset)5385 static int validate_relationship (int i, int j, int k,
5386 				  const DATASET *dset)
5387 {
5388     double xi, xj;
5389     int t;
5390 
5391     for (t=dset->t1; t<=dset->t2; t++) {
5392 	xi = dset->Z[i][t];
5393 	xj = dset->Z[j][t];
5394 	if (k > 0) {
5395 	    /* interaction test: xk = xi*xj */
5396 	    if (!na(xi) && !na(xj) && dset->Z[k][t] != xi*xj) {
5397 		return 0;
5398 	    }
5399 	} else {
5400 	    /* square test: xj = xi*xi */
5401 	    if (!na(xi) && xj != xi*xi) {
5402 		return 0;
5403 	    }
5404 	}
5405     }
5406 
5407     return 1;
5408 }
5409 
5410 /* In case we find more interaction terms that can be fitted into
5411    the current column-size of the "list info" matrix, add two more
5412    (since the encoding of each interaction for a given "primary"
5413    series requires two columns).
5414 */
5415 
resize_listinfo_matrix(gretl_matrix * m)5416 static int resize_listinfo_matrix (gretl_matrix *m)
5417 {
5418     int newc = m->cols + 2;
5419     int i, err = 0;
5420 
5421     err = gretl_matrix_realloc(m, m->rows, newc);
5422     if (!err) {
5423 	for (i=0; i<m->rows; i++) {
5424 	    gretl_matrix_set(m, i, newc-2, 0);
5425 	    gretl_matrix_set(m, i, newc-1, 0);
5426 	}
5427     }
5428 
5429     return err;
5430 }
5431 
get_iact_column(gretl_matrix * m,int i,int * err)5432 static int get_iact_column (gretl_matrix *m, int i, int *err)
5433 {
5434     int j;
5435 
5436     for (j=3; j<m->cols; j+=2) {
5437 	if (gretl_matrix_get(m, i, j) == 0) {
5438 	    return j;
5439 	}
5440     }
5441 
5442     /* looks like we need more columns */
5443     *err = resize_listinfo_matrix(m);
5444     return *err ? -1 : m->cols - 2;
5445 }
5446 
5447 /* The (optionally) "condensed" version of the listinfo_matrix
5448    includes only primary terms (and excludes the constant).
5449    The first column of the full matrix is replaced by the
5450    position in @list of each primary term.
5451 */
5452 
condense_listinfo_matrix(gretl_matrix * m,const int * list,const DATASET * dset)5453 static int condense_listinfo_matrix (gretl_matrix *m,
5454 				     const int *list,
5455 				     const DATASET *dset)
5456 {
5457     gretl_matrix *mc = NULL;
5458     char **S = NULL;
5459     double x;
5460     int i, j, ic, n = 0;
5461 
5462     for (i=0; i<m->rows; i++) {
5463 	if (m->val[i] == 1) {
5464 	    n++;
5465 	}
5466     }
5467 
5468     if (n == m->rows) {
5469 	/* nothing to be done */
5470 	return 0;
5471     }
5472 
5473     mc = gretl_matrix_alloc(n, m->cols);
5474     if (mc == NULL) {
5475 	return E_ALLOC;
5476     }
5477 
5478     S = strings_array_new(n);
5479 
5480     ic = 0;
5481     for (i=0; i<m->rows; i++) {
5482 	if (m->val[i] == 1) {
5483 	    gretl_matrix_set(mc, ic, 0, i+1);
5484 	    for (j=1; j<m->cols; j++) {
5485 		x = gretl_matrix_get(m, i, j);
5486 		gretl_matrix_set(mc, ic, j, x);
5487 	    }
5488 	    S[ic] = gretl_strdup(dset->varname[list[i+1]]);
5489 	    ic++;
5490 	}
5491     }
5492 
5493     gretl_matrix_reuse(m, n, m->cols);
5494     gretl_matrix_copy_values(m, mc);
5495     gretl_matrix_free(mc);
5496     gretl_matrix_set_rownames(m, S);
5497 
5498     return 0;
5499 }
5500 
5501 static gretl_matrix *
linfo_matrix_via_labels(const int * list,const DATASET * dset,gretlopt opt,int * err)5502 linfo_matrix_via_labels (const int *list,
5503 			 const DATASET *dset,
5504 			 gretlopt opt,
5505 			 int *err)
5506 {
5507     gretl_matrix *ret = NULL;
5508     const char *label;
5509     char targ1[VNAMELEN];
5510     char targ2[VNAMELEN];
5511     int i, vi, j, vj;
5512     int pcol = 0, dcol = 1;
5513     int iacol, sqcol = 2;
5514     int n;
5515 
5516     if (list == NULL || list[0] == 0) {
5517 	*err = E_DATA;
5518 	return ret;
5519     }
5520 
5521     n = list[0];
5522     ret = gretl_zero_matrix_new(n, 5);
5523     if (ret == NULL) {
5524 	*err = E_ALLOC;
5525 	return ret;
5526     }
5527 
5528     for (i=1; i<=n && !*err; i++) {
5529 	/* default to series is primary */
5530 	gretl_matrix_set(ret, i-1, pcol, 1);
5531 	vi = list[i];
5532 	if (vi == 0) {
5533 	    /* mark as non-primary and move on */
5534 	    gretl_matrix_set(ret, i-1, pcol, 0);
5535 	    continue;
5536 	}
5537 	if (gretl_isdummy(dset->t1, dset->t2, dset->Z[vi])) {
5538 	    /* insert dummy flag in this row */
5539 	    gretl_matrix_set(ret, i-1, dcol, 1);
5540 	}
5541 	label = series_get_label(dset, vi);
5542 	if (label == NULL) {
5543 	    continue;
5544 	}
5545 	if (get_square_parent_name(label, targ1, targ2)) {
5546 	    /* looks like this could be a squared term */
5547 	    for (j=1; j<=n; j++) {
5548 		if (j == i) continue;
5549 		vj = list[j];
5550 		if (!strcmp(targ1, dset->varname[vj]) &&
5551 		    validate_relationship(vj, vi, 0, dset)) {
5552 		    /* mark this series as non-primary, and as square */
5553 		    gretl_matrix_set(ret, i-1, pcol, 0);
5554 		    gretl_matrix_set(ret, i-1, sqcol, j);
5555 		    /* insert square ref in parent's row */
5556 		    gretl_matrix_set(ret, j-1, sqcol, i);
5557 		    break;
5558 		}
5559 	    }
5560 	    continue;
5561 	}
5562 	if (get_interaction_names(label, targ1, targ2)) {
5563 	    /* looks like this could be an interaction term */
5564 	    int ia1 = 0, ia2 = 0;
5565 
5566 	    for (j=1; j<=n; j++) {
5567 		if (j == i) continue;
5568 		vj = list[j];
5569 		if (!strcmp(targ1, dset->varname[vj])) {
5570 		    ia1 = j;
5571 		} else if (!strcmp(targ2, dset->varname[vj])) {
5572 		    ia2 = j;
5573 		}
5574 	    }
5575 	    if (ia1 > 0 && ia2 > 0 &&
5576 		validate_relationship(list[ia1], list[ia2], vi, dset)) {
5577 		/* mark this series as non-primary, interaction */
5578 		gretl_matrix_set(ret, i-1, pcol, 0);
5579 		gretl_matrix_set(ret, i-1, 3, ia1);
5580 		gretl_matrix_set(ret, i-1, 4, ia2);
5581 		/* we may need to expand the number of columns */
5582 		iacol = get_iact_column(ret, i, err);
5583 		if (!*err) {
5584 		    /* insert cross references in parents' rows */
5585 		    gretl_matrix_set(ret, ia1-1, iacol, ia2);
5586 		    gretl_matrix_set(ret, ia1-1, iacol+1, i);
5587 		    gretl_matrix_set(ret, ia2-1, iacol, ia1);
5588 		    gretl_matrix_set(ret, ia2-1, iacol+1, i);
5589 		}
5590 	    }
5591 	}
5592     }
5593 
5594     if (*err) {
5595 	gretl_matrix_free(ret);
5596 	ret = NULL;
5597     } else if (opt & OPT_C) {
5598 	condense_listinfo_matrix(ret, list, dset);
5599     } else {
5600 	/* convenience: attach series names to rows */
5601 	char **S;
5602 	int serr = 0;
5603 
5604 	S = gretl_list_get_names_array(list, dset, &serr);
5605 	if (S != NULL) {
5606 	    gretl_matrix_set_rownames(ret, S);
5607 	}
5608     }
5609 
5610     return ret;
5611 }
5612 
5613 static gretl_matrix *
linfo_matrix_via_data(const int * list,const DATASET * dset,gretlopt opt,int * err)5614 linfo_matrix_via_data (const int *list,
5615 		       const DATASET *dset,
5616 		       gretlopt opt,
5617 		       int *err)
5618 {
5619     gretl_matrix *ret = NULL;
5620     int i, vi, j, vj, k, vk;
5621     int pcol = 0, dcol = 1;
5622     int iacol, sqcol = 2;
5623     int n;
5624 
5625     if (list == NULL || list[0] == 0) {
5626 	*err = E_DATA;
5627 	return ret;
5628     }
5629 
5630     n = list[0];
5631     ret = gretl_zero_matrix_new(n, 5);
5632     if (ret == NULL) {
5633 	*err = E_ALLOC;
5634 	return ret;
5635     }
5636 
5637     for (i=1; i<=n && !*err; i++) {
5638 	int matched = 0;
5639 
5640 	/* default to series is primary */
5641 	gretl_matrix_set(ret, i-1, pcol, 1);
5642 	vi = list[i];
5643 	if (vi == 0) {
5644 	    /* mark as non-primary and move on */
5645 	    gretl_matrix_set(ret, i-1, pcol, 0);
5646 	    continue;
5647 	}
5648 	if (gretl_isdummy(dset->t1, dset->t2, dset->Z[vi])) {
5649 	    /* insert dummy flag in this row */
5650 	    gretl_matrix_set(ret, i-1, dcol, 1);
5651 	}
5652 	for (j=1; j<=n && !matched; j++) {
5653 	    vj = list[j];
5654 	    if (j == i || vj == 0) continue;
5655 	    if (validate_relationship(vj, vi, 0, dset)) {
5656 		/* mark this series as non-primary, square */
5657 		gretl_matrix_set(ret, i-1, pcol, 0);
5658 		gretl_matrix_set(ret, i-1, sqcol, j);
5659 		/* insert square ref in parent's row */
5660 		gretl_matrix_set(ret, j-1, sqcol, i);
5661 		matched = 1;
5662 	    }
5663 	    for (k=1; k<=n && !matched; k++) {
5664 		vk = list[k];
5665 		if (k == i || k == j || vk == 0) continue;
5666 		if (validate_relationship(vj, vk, vi, dset)) {
5667 		    /* mark this series as non-primary, interaction */
5668 		    gretl_matrix_set(ret, i-1, pcol, 0);
5669 		    gretl_matrix_set(ret, i-1, 3, j);
5670 		    gretl_matrix_set(ret, i-1, 4, k);
5671 		    /* we may need to expand the number of columns */
5672 		    iacol = get_iact_column(ret, i, err);
5673 		    if (!*err) {
5674 			/* insert cross references in parents' rows */
5675 			gretl_matrix_set(ret, j-1, iacol, k);
5676 			gretl_matrix_set(ret, j-1, iacol+1, i);
5677 			gretl_matrix_set(ret, k-1, iacol, j);
5678 			gretl_matrix_set(ret, k-1, iacol+1, i);
5679 		    }
5680 		    matched = 1;
5681 		}
5682 	    }
5683 	}
5684     }
5685 
5686     if (*err) {
5687 	gretl_matrix_free(ret);
5688 	ret = NULL;
5689     } else if (opt & OPT_C) {
5690 	condense_listinfo_matrix(ret, list, dset);
5691     } else {
5692 	/* convenience: attach series names to rows */
5693 	char **S;
5694 	int serr = 0;
5695 
5696 	S = gretl_list_get_names_array(list, dset, &serr);
5697 	if (S != NULL) {
5698 	    gretl_matrix_set_rownames(ret, S);
5699 	}
5700     }
5701 
5702     return ret;
5703 }
5704 
5705 /* Construct a matrix providing information about the relations
5706    between the series in @list. This will have rows equal to the
5707    number of series and at least 5 columns (shown as 1-based here).
5708    All elements of the matrix are zero unless otherwise specified.
5709 
5710    col 1: Holds 1 if the series is "primary" (neither the square
5711    of another series in the list, nor the interaction of two
5712    series in the list).
5713 
5714    col 2: Holds 1 if the series is a 0/1 dummy.
5715 
5716    col 3: If the series is primary and its square is also
5717    present in the list, holds the list position of the square,
5718    or if the series itself is a squared term, holds the list
5719    position of the series of which it's the square.
5720 
5721    cols 4, 5: If the series features in an interaction term,
5722    col 4 holds the list position of its "partner" and col 5 the
5723    list position of the interaction term. If the series features
5724    in more than one interaction term, subsequent interaction info
5725    goes into cols 6 and 7 or higher (these being added as required).
5726    If the series itself is an interaction term, cols 4 and 5 get
5727    the list positions of the two source series.
5728 */
5729 
list_info_matrix(const int * list,const DATASET * dset,gretlopt opt,int * err)5730 gretl_matrix *list_info_matrix (const int *list, const DATASET *dset,
5731 				gretlopt opt, int *err)
5732 {
5733     if (opt & OPT_B) {
5734 	return linfo_matrix_via_data(list, dset, opt, err);
5735     } else {
5736 	return linfo_matrix_via_labels(list, dset, opt, err);
5737     }
5738 }
5739 
5740 #define MAP_DEBUG 0
5741 
5742 #define excluded(l,i) (l != NULL && !in_gretl_list(l,i))
5743 
5744 /* Given the current dataset and the $mapfile name recorded
5745    on it: get the content of $mapfile as a bundle then
5746    revise the bundle (a) to include only the features in the
5747    current sample and (b) to reflect any changes in the dataset
5748    (series added, deleted or modified). Return the modified
5749    bundle.
5750 */
5751 
get_current_map(const DATASET * dset,const int * list,int * err)5752 gretl_bundle *get_current_map (const DATASET *dset,
5753 			       const int *list,
5754 			       int *err)
5755 {
5756     const char *sj, *id, *fname;
5757     gretl_bundle *fi, *pp, *jb = NULL;
5758     gretl_array *features = NULL;
5759     int ntarg, fmax = 0;
5760     int i, j, dsi, fidx;
5761     double xj;
5762 
5763     fname = dataset_get_mapfile(dset);
5764 
5765     if (fname == NULL) {
5766 	gretl_errmsg_set("no mapfile is present");
5767 	*err = E_DATA;
5768     } else if (dataset_is_resampled(dset)) {
5769 	/* most unlikely */
5770 	gretl_errmsg_set("dataset is resampled!");
5771 	*err = E_DATA;
5772     }
5773 
5774     if (!*err) {
5775 	jb = gretl_bundle_read_from_file(fname, 0, err);
5776 	if (!*err) {
5777 	    features = gretl_bundle_get_array(jb, "features", err);
5778 	}
5779     }
5780 
5781     if (!*err) {
5782 	int nobs;
5783 
5784 	fmax = gretl_array_get_length(features);
5785 	if (dset->submask != NULL) {
5786 	    nobs = get_full_length_n();
5787 	} else {
5788 	    nobs = dset->n;
5789 	}
5790 	if (fmax != nobs) {
5791 	    /* Although it may be sub-sampled, the full dataset must
5792 	       have a number of observations equal to the number of
5793 	       features in the existing map.
5794 	    */
5795 	    gretl_errmsg_set("map and dataset are out of sync!");
5796 	    *err = E_DATA;
5797 	}
5798 	/* the number of features we're seeking */
5799 	ntarg = sample_size(dset);
5800     }
5801 
5802     if (*err) {
5803 	gretl_bundle_destroy(jb);
5804 	return NULL;
5805     }
5806 
5807 #if MAP_DEBUG
5808     fprintf(stderr, "get_current_map: fmax %d, ntarg %d\n", fmax, ntarg);
5809     printf(stderr, "checking for features to include/drop\n");
5810 #endif
5811 
5812     /* index into dataset rows */
5813     dsi = -1;
5814 
5815     for (i=0, fidx=0; i<fmax; i++) {
5816 	int skip = 0;
5817 
5818 	if (dset->submask != NULL) {
5819 	    if (dset->submask[i] == 0) {
5820 		skip = 1;
5821 	    } else {
5822 		dsi++;
5823 	    }
5824 	} else {
5825 	    dsi = i;
5826 	}
5827 	if (dsi < dset->t1) {
5828 	    skip = 1;
5829 	} else if (dsi > dset->t2) {
5830 	    /* we've got everything we need */
5831 	    break;
5832 	}
5833 	if (skip) {
5834 #if MAP_DEBUG
5835 	    fprintf(stderr, "  drop sampled-out feature %d\n", i);
5836 #endif
5837 	    gretl_array_delete_element(features, fidx);
5838 	} else {
5839 	    fi = gretl_array_get_element(features, fidx, NULL, err);
5840 	    pp = gretl_bundle_get_bundle(fi, "properties", err);
5841 	    /* clear the existing properties bundle */
5842 	    gretl_bundle_void_content(pp);
5843 	    /* and refill it from the dataset */
5844 	    for (j=1; j<dset->v; j++) {
5845 		if (excluded(list, j)) {
5846 		    continue;
5847 		}
5848 		id = dset->varname[j];
5849 		if (is_string_valued(dset, j)) {
5850 		    sj = series_get_string_for_obs(dset, j, dsi);
5851 		    gretl_bundle_set_string(pp, id, sj);
5852 		} else {
5853 		    xj = dset->Z[j][dsi];
5854 		    gretl_bundle_set_scalar(pp, id, xj);
5855 		}
5856 	    }
5857 	    fidx++;
5858 #if MAP_DEBUG
5859 	    fprintf(stderr, "  included feature %d, now got %d\n", i, fidx);
5860 #endif
5861 	    if (fidx == ntarg) {
5862 #if MAP_DEBUG
5863 		fprintf(stderr, "  reached target of %d features, break\n", ntarg);
5864 #endif
5865 		break;
5866 	    }
5867 	}
5868     }
5869 
5870     fmax = gretl_array_get_length(features);
5871 
5872 #if MAP_DEBUG
5873     fprintf(stderr, "after loop, features array has %d elements, fidx=%d\n",
5874 	    fmax, fidx);
5875 #endif
5876 
5877     /* delete any unwanted trailing features */
5878     for (j=fidx; j<fmax; j++) {
5879 	gretl_array_delete_element(features, fidx);
5880     }
5881 
5882     return jb;
5883 }
5884