1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #include "libgretl.h"
21 #include "gretl_func.h"
22 #include "uservar.h"
23 #include "gretl_string_table.h"
24 #include "libset.h"
25 #include "dbread.h"
26 #include "varinfo_priv.h"
27
28 #define DDEBUG 0
29 #define FULLDEBUG 0
30
31 #define Z_COLS_BORROWED 2
32
33 #define dset_zcols_borrowed(d) (d->auxiliary == Z_COLS_BORROWED)
34
35 static int pad_daily_data (DATASET *dset, int pd, PRN *prn);
36
37 /**
38 * check_dataset_is_changed:
39 * @dset: dataset to check.
40 *
41 * Returns: 1 if @dset has been modified since
42 * the last call to this function.
43 */
44
check_dataset_is_changed(DATASET * dset)45 int check_dataset_is_changed (DATASET *dset)
46 {
47 int ret = dset->modflag;
48
49 dset->modflag = 0;
50 return ret;
51 }
52
53 /**
54 * set_dataset_is_changed:
55 * @dset: dataset.
56 * @s: 1 or 0.
57 *
58 * Sets the internal boolean "changed" flag to @s.
59 */
60
set_dataset_is_changed(DATASET * dset,int s)61 void set_dataset_is_changed (DATASET *dset, int s)
62 {
63 if (dset != NULL && gretl_function_depth() == 0) {
64 dset->modflag = s;
65 }
66 }
67
dataset_set_nobs(DATASET * dset,int n)68 static void dataset_set_nobs (DATASET *dset, int n)
69 {
70 if (n != dset->n) {
71 /* if the total number of observations in the dataset
72 has changed, the current "matrix_mask", if present
73 (see libset.c), will now be invalid
74 */
75 destroy_matrix_mask();
76 dset->n = n;
77 }
78 }
79
80 /**
81 * free_Z:
82 * @dset: dataset information.
83 *
84 * Does a deep free on the data matrix.
85 */
86
free_Z(DATASET * dset)87 void free_Z (DATASET *dset)
88 {
89 if (dset != NULL && dset->Z != NULL) {
90 int i, v = dset_zcols_borrowed(dset) ? 1 : dset->v;
91
92 #if DDEBUG
93 fprintf(stderr, "Freeing Z (%p): %d vars\n", (void *) dset->Z, v);
94 #endif
95 for (i=0; i<v; i++) {
96 free(dset->Z[i]);
97 }
98 free(dset->Z);
99 dset->Z = NULL;
100 }
101 }
102
103 /**
104 * dataset_destroy_obs_markers:
105 * @dset: data information struct.
106 *
107 * Frees any allocated observation markers for @dset.
108 */
109
dataset_destroy_obs_markers(DATASET * dset)110 void dataset_destroy_obs_markers (DATASET *dset)
111 {
112 int i;
113
114 if (dset->S != NULL) {
115 for (i=0; i<dset->n; i++) {
116 free(dset->S[i]);
117 }
118 free(dset->S);
119 dset->S = NULL;
120 dset->markers = NO_MARKERS;
121 }
122 }
123
free_varinfo(DATASET * dset,int v)124 static void free_varinfo (DATASET *dset, int v)
125 {
126 if (dset->varinfo[v]->st != NULL) {
127 series_table_destroy(dset->varinfo[v]->st);
128 }
129 if (dset->varinfo[v]->label != NULL) {
130 free(dset->varinfo[v]->label);
131 }
132 free(dset->varinfo[v]);
133 }
134
135 /**
136 * clear_datainfo:
137 * @dset: data information struct.
138 * @code: either %CLEAR_FULL or %CLEAR_SUBSAMPLE.
139 *
140 * Frees the allocated content of a data information struct;
141 * note that @dset itself is not freed.
142 */
143
clear_datainfo(DATASET * dset,int code)144 void clear_datainfo (DATASET *dset, int code)
145 {
146 int i;
147
148 if (dset == NULL) return;
149
150 if (dset->S != NULL) {
151 dataset_destroy_obs_markers(dset);
152 }
153 if (dset->submask != NULL) {
154 free_subsample_mask(dset->submask);
155 dset->submask = NULL;
156 }
157 if (dset->restriction != NULL) {
158 free(dset->restriction);
159 dset->restriction = NULL;
160 }
161 if (dset->padmask != NULL) {
162 free(dset->padmask);
163 dset->padmask = NULL;
164 }
165 if (dset->pangrps != NULL) {
166 free(dset->pangrps);
167 dset->pangrps = NULL;
168 }
169
170 /* if this is not a sub-sample datainfo, free varnames, labels, etc. */
171
172 if (code == CLEAR_FULL) {
173 if (dset->varname != NULL) {
174 for (i=0; i<dset->v; i++) {
175 free(dset->varname[i]);
176 }
177 free(dset->varname);
178 dset->varname = NULL;
179 }
180 if (dset->varinfo != NULL) {
181 for (i=0; i<dset->v; i++) {
182 free_varinfo(dset, i);
183 }
184 free(dset->varinfo);
185 dset->varinfo = NULL;
186 }
187 if (dset->descrip != NULL) {
188 free(dset->descrip);
189 dset->descrip = NULL;
190 }
191 if (dset->mapfile != NULL) {
192 free(dset->mapfile);
193 dset->mapfile = NULL;
194 }
195
196 maybe_free_full_dataset(dset);
197
198 dset->v = dset->n = 0;
199 dset->structure = 0;
200 dset->pd = 1;
201 }
202 }
203
204 /**
205 * destroy_dataset:
206 * @dset: pointer to dataset.
207 *
208 * Frees all resources associated with @dset.
209 */
210
destroy_dataset(DATASET * dset)211 void destroy_dataset (DATASET *dset)
212 {
213 if (dset != NULL) {
214 free_Z(dset);
215 clear_datainfo(dset, CLEAR_FULL);
216 free(dset);
217 }
218 }
219
220 /**
221 * copy_dataset_obs_info:
222 * @targ: pointer to target dataset.
223 * @src: pointer to source dataset.
224 *
225 * Sets the "date" or observations information in @targ to that
226 * found in @src.
227 */
228
copy_dataset_obs_info(DATASET * targ,const DATASET * src)229 void copy_dataset_obs_info (DATASET *targ, const DATASET *src)
230 {
231 strcpy(targ->stobs, src->stobs);
232 strcpy(targ->endobs, src->endobs);
233 targ->sd0 = src->sd0;
234 targ->pd = src->pd;
235 targ->structure = src->structure;
236 }
237
238 /**
239 * dataset_obs_info_default:
240 * @dset: pointer to dataset.
241 *
242 * Sets the "date" or observations information in @dset to a
243 * simple default of cross-sectional data, observations 1 to n,
244 * where n is the %n element (number of observations) in @dset.
245 */
246
dataset_obs_info_default(DATASET * dset)247 void dataset_obs_info_default (DATASET *dset)
248 {
249 strcpy(dset->stobs, "1");
250 sprintf(dset->endobs, "%d", dset->n);
251 dset->sd0 = 1.0;
252 dset->pd = 1;
253 dset->structure = CROSS_SECTION;
254 }
255
256 /**
257 * dataset_allocate_obs_markers:
258 * @dset: pointer to dataset
259 *
260 * Allocates space in @dset for strings indentifying the
261 * observations and initializes all of the markers to empty
262 * strings. Note that These strings have a fixed maximum
263 * length of #OBSLEN - 1.
264 *
265 * Returns: 0 on success, E_ALLOC on error.
266 */
267
dataset_allocate_obs_markers(DATASET * dset)268 int dataset_allocate_obs_markers (DATASET *dset)
269 {
270 char **S = NULL;
271 int err = 0;
272
273 if (dset->S == NULL) {
274 /* not already allocated */
275 S = strings_array_new_with_length(dset->n, OBSLEN);
276 if (S == NULL) {
277 err = E_ALLOC;
278 } else {
279 dset->S = S;
280 }
281 }
282
283 if (dset->S != NULL) {
284 dset->markers = REGULAR_MARKERS;
285 }
286
287 return err;
288 }
289
gretl_varinfo_init(VARINFO * vinfo)290 static void gretl_varinfo_init (VARINFO *vinfo)
291 {
292 memset(vinfo, 0, sizeof *vinfo);
293 vinfo->label = NULL;
294 vinfo->st = NULL;
295 vinfo->stack_level = gretl_function_depth();
296 }
297
copy_label(VARINFO * vinfo,const char * src)298 static void copy_label (VARINFO *vinfo, const char *src)
299 {
300 free(vinfo->label);
301 if (src == NULL) {
302 vinfo->label = NULL;
303 } else {
304 vinfo->label = gretl_strdup(src);
305 }
306 }
307
labels_differ(const char * s1,const char * s2)308 static int labels_differ (const char *s1, const char *s2)
309 {
310 int nv = (s1 != NULL) + (s2 != NULL);
311
312 if (nv == 1) {
313 /* one is NULL, the other not */
314 return 1;
315 } else if (nv == 2) {
316 /* neither one is NULL */
317 return strcmp(s1, s2) != 0;
318 } else {
319 return 0;
320 }
321 }
322
323 /**
324 * copy_varinfo:
325 * @targ: target to which to copy.
326 * @src: source to copy from.
327 *
328 * Copies all relevant information from @src to @targ.
329 */
330
copy_varinfo(VARINFO * targ,const VARINFO * src)331 void copy_varinfo (VARINFO *targ, const VARINFO *src)
332 {
333 if (src == NULL || targ == NULL) {
334 return;
335 }
336 copy_label(targ, src->label);
337 strcpy(targ->display_name, src->display_name);
338 strcpy(targ->parent, src->parent);
339 targ->flags = src->flags;
340 targ->transform = src->transform;
341 targ->lag = src->lag;
342 targ->midas_period = src->midas_period;
343 targ->midas_freq = src->midas_freq;
344 targ->orig_pd = src->orig_pd;
345 targ->compact_method = src->compact_method;
346 targ->stack_level = src->stack_level;
347 if (src->st != NULL) {
348 targ->st = series_table_copy(src->st);
349 }
350 }
351
352 /* For use in the context of returning from a sub-sampled
353 dataset to the full one: trim off series names and
354 "varinfo" beyond the index @nv, which gives the number
355 of series in the full dataset.
356 */
357
shrink_varinfo(DATASET * dset,int nv)358 int shrink_varinfo (DATASET *dset, int nv)
359 {
360 char **vnames;
361 VARINFO **vi;
362 int i, err = 0;
363
364 if (nv > dset->v) {
365 return E_DATA;
366 } else if (nv == dset->v) {
367 return 0;
368 }
369
370 for (i=nv; i<dset->v; i++) {
371 free(dset->varname[i]);
372 free_varinfo(dset, i);
373 }
374
375 vnames = realloc(dset->varname, nv * sizeof *vnames);
376 vi = realloc(dset->varinfo, nv * sizeof *vi);
377
378 if (vnames == NULL || vi == NULL) {
379 free(vnames);
380 free(vi);
381 err = E_ALLOC;
382 } else {
383 dset->varname = vnames;
384 dset->varinfo = vi;
385 }
386
387 return err;
388 }
389
390 /**
391 * dataset_allocate_varnames:
392 * @dset: pointer to dataset.
393 *
394 * Given a blank @dset, which should have been obtained using
395 * datainfo_new(), allocate space for the names of variables.
396 * The @v member of @dset (representing the number of variables,
397 * including the automatically added constant at position 0) must be
398 * set before calling this function.
399 *
400 * Returns: 0 on sucess, E_ALLOC on failure.
401 */
402
dataset_allocate_varnames(DATASET * dset)403 int dataset_allocate_varnames (DATASET *dset)
404 {
405 int i, j, v = dset->v;
406 int err = 0;
407
408 dset->varname = strings_array_new_with_length(v, VNAMELEN);
409 if (dset->varname == NULL) {
410 return E_ALLOC;
411 }
412
413 dset->varinfo = calloc(v, sizeof *dset->varinfo);
414 if (dset->varinfo == NULL) {
415 free(dset->varname);
416 return E_ALLOC;
417 }
418
419 for (i=0; i<v; i++) {
420 dset->varinfo[i] = malloc(sizeof **dset->varinfo);
421 if (dset->varinfo[i] == NULL) {
422 for (j=0; j<i; j++) {
423 free(dset->varinfo[j]);
424 }
425 free(dset->varinfo);
426 dset->varinfo = NULL;
427 err = E_ALLOC;
428 break;
429 } else {
430 gretl_varinfo_init(dset->varinfo[i]);
431 }
432 }
433
434 if (!err) {
435 strcpy(dset->varname[0], "const");
436 series_set_label(dset, 0, _("auto-generated constant"));
437 }
438
439 return err;
440 }
441
442 /**
443 * datainfo_init:
444 * @dset: pointer to DATASET struct.
445 *
446 * Zeros all members of @dset and sets it as a plain cross-section.
447 * Designed for use with a DATASET structure that has not been
448 * obtained via datainfo_new().
449 */
450
datainfo_init(DATASET * dset)451 void datainfo_init (DATASET *dset)
452 {
453 dset->v = 0;
454 dset->n = 0;
455 dset->pd = 1;
456 dset->structure = CROSS_SECTION;
457 dset->sd0 = 1.0;
458 dset->t1 = 0;
459 dset->t2 = 0;
460 dset->stobs[0] = '\0';
461 dset->endobs[0] = '\0';
462
463 dset->Z = NULL;
464 dset->varname = NULL;
465 dset->varinfo = NULL;
466
467 dset->markers = NO_MARKERS;
468 dset->modflag = 0;
469
470 dset->S = NULL;
471 dset->descrip = NULL;
472 dset->submask = NULL;
473 dset->restriction = NULL;
474 dset->padmask = NULL;
475 dset->mapfile = NULL;
476 dset->pangrps = NULL;
477 dset->panel_pd = 0;
478 dset->panel_sd0 = 0;
479
480 dset->auxiliary = 0;
481 dset->rseed = 0;
482 }
483
484 /**
485 * datainfo_new:
486 *
487 * Creates a new data information struct pointer from scratch,
488 * properly initialized as empty (no variables, no observations).
489 *
490 * Returns: pointer to data information struct, or NULL on error.
491 */
492
datainfo_new(void)493 DATASET *datainfo_new (void)
494 {
495 DATASET *dset = malloc(sizeof *dset);
496
497 if (dset != NULL) {
498 datainfo_init(dset);
499 }
500
501 return dset;
502 }
503
real_create_new_dataset(int nvar,int nobs,gretlopt opt)504 static DATASET *real_create_new_dataset (int nvar, int nobs,
505 gretlopt opt)
506 {
507 DATASET *dset = datainfo_new();
508
509 if (dset == NULL) return NULL;
510
511 dset->v = nvar;
512 dset->n = nobs;
513 dset->Z = NULL;
514
515 if (start_new_Z(dset, opt)) {
516 free(dset);
517 return NULL;
518 }
519
520 if (opt & OPT_M) {
521 if (dataset_allocate_obs_markers(dset)) {
522 free_datainfo(dset);
523 return NULL;
524 }
525 }
526
527 dataset_obs_info_default(dset);
528
529 return dset;
530 }
531
532 /**
533 * create_new_dataset:
534 * @nvar: number of variables.
535 * @nobs: number of observations per variable.
536 * @markers: 1 if space should be allocated for "case markers" for
537 * the observations, 0 otherwise.
538 *
539 * Allocates space in the dataset to hold the specified number
540 * of variables and observations.
541 *
542 * Returns: pointer to dataset struct, or NULL on error.
543 */
544
create_new_dataset(int nvar,int nobs,int markers)545 DATASET *create_new_dataset (int nvar, int nobs, int markers)
546 {
547 gretlopt opt = markers ? OPT_M : OPT_NONE;
548
549 return real_create_new_dataset(nvar, nobs, opt);
550 }
551
create_auxiliary_dataset(int nvar,int nobs,gretlopt opt)552 DATASET *create_auxiliary_dataset (int nvar, int nobs, gretlopt opt)
553 {
554 DATASET *dset = real_create_new_dataset(nvar, nobs, opt);
555
556 if (dset != NULL) {
557 if (opt & OPT_B) {
558 dset->auxiliary = Z_COLS_BORROWED;
559 } else {
560 dset->auxiliary = 1;
561 }
562 }
563
564 return dset;
565 }
566
make_borrowed_Z(int v,int n)567 static double **make_borrowed_Z (int v, int n)
568 {
569 double **Z = malloc(v * sizeof *Z);
570
571 if (Z != NULL) {
572 int i;
573
574 for (i=0; i<v; i++) {
575 Z[i] = NULL;
576 }
577
578 Z[0] = malloc(n * sizeof **Z);
579
580 if (Z[0] == NULL) {
581 free(Z);
582 Z = NULL;
583 } else {
584 for (i=0; i<n; i++) {
585 Z[0][i] = 1.0;
586 }
587 }
588 }
589
590 return Z;
591 }
592
593 /**
594 * allocate_Z:
595 * @dset: pointer to dataset.
596 * @opt: may include OPT_B to indicate that the data columns
597 * will be "borrowed".
598 *
599 * Allocates the two-dimensional data array Z,
600 * based on the v (number of variables) and n (number of
601 * observations) members of @dset. The variable at
602 * position 0 is initialized to all 1s; other variables
603 * are initialized to #NADBL (unless OPT_B is given).
604 *
605 * Returns: 0 on success, E_ALLOC on error.
606 */
607
allocate_Z(DATASET * dset,gretlopt opt)608 int allocate_Z (DATASET *dset, gretlopt opt)
609 {
610 int i, t;
611 int err = 0;
612
613 if (dset->Z != NULL) {
614 fprintf(stderr, "*** error: allocate_Z called with non-NULL Z\n");
615 }
616
617 if (opt & OPT_B) {
618 dset->Z = make_borrowed_Z(dset->v, dset->n);
619 } else {
620 dset->Z = doubles_array_new(dset->v, dset->n);
621 }
622
623 if (dset->Z == NULL) {
624 err = E_ALLOC;
625 } else if (!(opt & OPT_B)) {
626 for (i=0; i<dset->v; i++) {
627 for (t=0; t<dset->n; t++) {
628 dset->Z[i][t] = (i == 0)? 1.0 : NADBL;
629 }
630 }
631 }
632
633 return err;
634 }
635
636 /**
637 * start_new_Z:
638 * @dset: pointer to dataset.
639 * @opt: if includes OPT_R we're sub-sampling from a full data set;
640 * if includes OPT_P, do not null out dset->S and markers.
641 *
642 * Initializes the data array within @dset (adding the constant in
643 * position 0).
644 *
645 * Returns: 0 on successful completion, non-zero on error.
646 */
647
start_new_Z(DATASET * dset,gretlopt opt)648 int start_new_Z (DATASET *dset, gretlopt opt)
649 {
650 if (allocate_Z(dset, opt)) {
651 return E_ALLOC;
652 }
653
654 dset->t1 = 0;
655 dset->t2 = dset->n - 1;
656
657 if (opt & OPT_R) {
658 /* sub-sampling */
659 dset->varname = NULL;
660 dset->varinfo = NULL;
661 } else if (dataset_allocate_varnames(dset)) {
662 free_Z(dset);
663 dset->Z = NULL;
664 return E_ALLOC;
665 }
666
667 if (!(opt & OPT_P)) {
668 dset->S = NULL;
669 dset->markers = NO_MARKERS;
670 }
671
672 dset->descrip = NULL;
673 dset->submask = NULL;
674 dset->restriction = NULL;
675 dset->padmask = NULL;
676 dset->mapfile = NULL;
677
678 if (!(opt & OPT_R)) {
679 dset->pangrps = NULL;
680 }
681
682 return 0;
683 }
684
reallocate_markers(DATASET * dset,int n)685 static int reallocate_markers (DATASET *dset, int n)
686 {
687 char **S;
688 int t;
689
690 S = realloc(dset->S, n * sizeof *S);
691 if (S == NULL) {
692 return 1;
693 }
694
695 for (t=dset->n; t<n; t++) {
696 S[t] = malloc(OBSLEN);
697 if (S[t] == NULL) {
698 int j;
699
700 for (j=dset->n; j<t; j++) {
701 free(S[j]);
702 }
703 free(S);
704 return 1;
705 }
706 S[t][0] = '\0';
707 }
708
709 dset->S = S;
710
711 return 0;
712 }
713
714 /* Allow for the possibility of centered seasonal dummies: usually
715 xon = 1 and xoff = 0, but in the centered case xon = 1 - 1/pd
716 and xoff = -1/pd.
717 */
718
get_xon_xoff(const double * x,int n,int pd,double * xon,double * xoff)719 static int get_xon_xoff (const double *x, int n, int pd, double *xon, double *xoff)
720 {
721 double cfac = 1.0 / pd;
722 double xc = 1.0 - cfac, yc = -cfac;
723 double x0 = 999, y0 = 999;
724 int t, ret = 1;
725
726 for (t=0; t<n && ret; t++) {
727 if (x[t] == 1.0) {
728 if (x0 == 999) x0 = 1.0;
729 else if (x[t] != x0) ret = 0;
730 } else if (x[t] == 0.0) {
731 if (y0 == 999) y0 = 0.0;
732 else if (x[t] != y0) ret = 0;
733 } else if (x[t] == xc) {
734 if (x0 == 999) x0 = xc;
735 else if (x[t] != x0) ret = 0;
736 } else if (x[t] == yc) {
737 if (y0 == 999) y0 = yc;
738 else if (x[t] != y0) ret = 0;
739 } else {
740 ret = 0;
741 }
742 }
743
744 if (ret) {
745 *xon = x0;
746 *xoff = y0;
747 }
748
749 return ret;
750 }
751
real_periodic_dummy(const double * x,int n,int * pd,int * offset,double * pxon,double * pxoff)752 static int real_periodic_dummy (const double *x, int n,
753 int *pd, int *offset,
754 double *pxon, double *pxoff)
755 {
756 double xon = 1.0, xoff = 0.0;
757 int onbak = 0;
758 int gap = 0;
759 int trail = 0;
760 int t, m = n - 1, ret = 1;
761
762 if (!get_xon_xoff(x, n, *pd, &xon, &xoff)) {
763 return 0;
764 }
765
766 *pd = -1;
767 *offset = -1;
768 trail = 0;
769
770 /* find number of trailing "off" values */
771 for (t=n-1; t>0; t--) {
772 if (x[t] == xoff) {
773 trail++;
774 } else {
775 if (x[t] == xon) {
776 m = t;
777 } else {
778 ret = 0;
779 }
780 break;
781 }
782 }
783
784 /* check for dummyhood and periodicity */
785 for (t=0; t<=m && ret; t++) {
786 if (x[t] == xoff) {
787 onbak = 0;
788 gap++;
789 } else if (x[t] == xon) {
790 if (onbak) {
791 ret = 0;
792 } else if (*offset < 0) {
793 *offset = gap;
794 } else if (*pd < 0) {
795 *pd = gap + 1;
796 if (*pd < *offset + 1) {
797 ret = 0;
798 }
799 } else if (gap != *pd - 1) {
800 ret = 0;
801 } else if (gap < trail) {
802 ret = 0;
803 }
804 gap = 0;
805 onbak = 1;
806 } else {
807 ret = 0;
808 break;
809 }
810 }
811
812 if (ret && pxon != NULL && pxoff != NULL) {
813 *pxon = xon;
814 *pxoff = xoff;
815 }
816
817 return ret;
818 }
819
820 /**
821 * is_periodic_dummy:
822 * @x: array to examine.
823 * @dset: pointer to dataset.
824 *
825 * Returns: 1 if @x is a periodic dummy variable,
826 * 0 otherwise.
827 */
828
is_periodic_dummy(const double * x,const DATASET * dset)829 int is_periodic_dummy (const double *x, const DATASET *dset)
830 {
831 int offset, pd = dset->pd;
832
833 return real_periodic_dummy(x, dset->n, &pd, &offset, NULL, NULL);
834 }
835
is_linear_trend(const double * x,int n)836 static int is_linear_trend (const double *x, int n)
837 {
838 int t, ret = 1;
839
840 for (t=1; t<n; t++) {
841 if (x[t] != x[t-1] + 1.0) {
842 ret = 0;
843 break;
844 }
845 }
846
847 return ret;
848 }
849
is_quadratic_trend(const double * x,int n)850 static int is_quadratic_trend (const double *x, int n)
851 {
852 double t2;
853 int t, ret = 1;
854
855 for (t=0; t<n; t++) {
856 t2 = (t + 1) * (t + 1);
857 if (x[t] != t2) {
858 ret = 0;
859 break;
860 }
861 }
862
863 return ret;
864 }
865
866 /**
867 * is_trend_variable:
868 * @x: array to examine.
869 * @n: number of elements in array.
870 *
871 * Returns: 1 if @x is a simple linear trend variable, with each
872 * observation equal to the preceding observation plus 1, or
873 * if @x is a quadratic trend starting at 1 for the first
874 * observation in the data set, and 0 otherwise.
875 */
876
is_trend_variable(const double * x,int n)877 int is_trend_variable (const double *x, int n)
878 {
879 int ret = 0;
880
881 if (is_linear_trend(x, n)) {
882 ret = 1;
883 } else if (is_quadratic_trend(x, n)) {
884 ret = 1;
885 }
886
887 return ret;
888 }
889
maybe_extend_trends(DATASET * dset,int oldn)890 static void maybe_extend_trends (DATASET *dset, int oldn)
891 {
892 int i, t;
893
894 for (i=1; i<dset->v; i++) {
895 if (is_linear_trend(dset->Z[i], oldn)) {
896 for (t=oldn; t<dset->n; t++) {
897 dset->Z[i][t] = dset->Z[i][t-1] + 1.0;
898 }
899 } else if (is_quadratic_trend(dset->Z[i], oldn)) {
900 for (t=oldn; t<dset->n; t++) {
901 dset->Z[i][t] = (t + 1) * (t + 1);
902 }
903 }
904 }
905 }
906
maybe_extend_dummies(DATASET * dset,int oldn)907 static void maybe_extend_dummies (DATASET *dset, int oldn)
908 {
909 int pd = dset->pd;
910 double xon = 1.0, xoff = 0.0;
911 int offset;
912 int i, t;
913
914 for (i=1; i<dset->v; i++) {
915 if (real_periodic_dummy(dset->Z[i], oldn, &pd, &offset, &xon, &xoff)) {
916 for (t=oldn; t<dset->n; t++) {
917 dset->Z[i][t] = ((t - offset) % pd)? xoff : xon;
918 }
919 }
920 }
921 }
922
923 /* regular, not panel-time, version */
924
real_dataset_add_observations(DATASET * dset,int n,gretlopt opt)925 static int real_dataset_add_observations (DATASET *dset, int n,
926 gretlopt opt)
927 {
928 double *x;
929 int oldn = dset->n;
930 int i, t, bign;
931 int err = 0;
932
933 if (dset_zcols_borrowed(dset)) {
934 fprintf(stderr, "*** Internal error: modifying borrowed data\n");
935 return E_DATA;
936 }
937
938 if (n <= 0) {
939 return 0;
940 }
941
942 if (dataset_is_panel(dset) && n % dset->pd != 0) {
943 return E_PDWRONG;
944 }
945
946 bign = oldn + n;
947
948 for (i=0; i<dset->v; i++) {
949 x = realloc(dset->Z[i], bign * sizeof *x);
950 if (x == NULL) {
951 return E_ALLOC;
952 }
953 dset->Z[i] = x;
954 for (t=oldn; t<bign; t++) {
955 dset->Z[i][t] = (i == 0)? 1.0 : NADBL;
956 }
957 }
958
959 if (dataset_has_markers(dset)) {
960 if (opt & OPT_D) {
961 dataset_destroy_obs_markers(dset);
962 } else {
963 if (reallocate_markers(dset, bign)) {
964 return E_ALLOC;
965 }
966 for (t=oldn; t<bign; t++) {
967 sprintf(dset->S[t], "%d", t + 1);
968 }
969 }
970 }
971
972 if (dset->t2 == dset->n - 1) {
973 dset->t2 = bign - 1;
974 }
975
976 dataset_set_nobs(dset, bign);
977
978 if (opt & OPT_A) {
979 maybe_extend_trends(dset, oldn);
980 maybe_extend_dummies(dset, oldn);
981 }
982
983 /* does daily data need special handling? */
984 ntolabel(dset->endobs, bign - 1, dset);
985
986 return err;
987 }
988
panel_dataset_extend_time(DATASET * dset,int n)989 static int panel_dataset_extend_time (DATASET *dset, int n)
990 {
991 double *utmp, *vtmp;
992 char **S = NULL;
993 int newT, oldT = dset->pd;
994 int oldn = dset->n;
995 int n_units;
996 int i, j, s, t, bign;
997 size_t usz, vsz;
998 int err = 0;
999
1000 if (!dataset_is_panel(dset)) {
1001 return E_PDWRONG;
1002 }
1003
1004 if (dset_zcols_borrowed(dset)) {
1005 fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1006 return E_DATA;
1007 }
1008
1009 if (n <= 0) {
1010 return 0;
1011 }
1012
1013 n_units = oldn / oldT;
1014 newT = oldT + n;
1015 bign = n_units * newT;
1016
1017 usz = newT * sizeof *utmp;
1018 vsz = bign * sizeof *vtmp;
1019
1020 utmp = malloc(usz);
1021 if (utmp == NULL) {
1022 return E_ALLOC;
1023 }
1024
1025 if (dataset_has_markers(dset)) {
1026 S = strings_array_new_with_length(bign, OBSLEN);
1027 if (S == NULL) {
1028 free(utmp);
1029 return E_ALLOC;
1030 }
1031 }
1032
1033 for (i=0; i<dset->v; i++) {
1034 int uconst = 1, utrend = 1, dtrend = 1;
1035 double xbak = NADBL;
1036 guint32 dt = 0, dbak = 0;
1037 int ed_err;
1038
1039 vtmp = malloc(vsz);
1040 if (vtmp == NULL) {
1041 err = E_ALLOC;
1042 goto bailout;
1043 }
1044
1045 s = 0;
1046 for (j=0; j<n_units; j++) {
1047 for (t=0; t<oldT; t++) {
1048 utmp[t] = dset->Z[i][s++];
1049 if (dtrend) {
1050 dt = epoch_day_from_ymd_basic(utmp[t]);
1051 }
1052 if (t == 0) {
1053 xbak = utmp[t];
1054 dbak = dt;
1055 } else {
1056 if (uconst && (utmp[t] != xbak)) {
1057 uconst = 0;
1058 }
1059 if (utrend && (utmp[t] != xbak + 1)) {
1060 utrend = 0;
1061 }
1062 if (dtrend && (dt != dbak + 1)) {
1063 dtrend = 0;
1064 }
1065 }
1066 xbak = utmp[t];
1067 dbak = dt;
1068 }
1069 for (t=oldT; t<newT; t++) {
1070 if (i == 0) {
1071 utmp[t] = 1.0;
1072 } else if (uconst) {
1073 utmp[t] = utmp[t-1];
1074 } else if (utrend) {
1075 utmp[t] = utmp[t-1] + 1;
1076 } else if (dtrend) {
1077 dt = epoch_day_from_ymd_basic(utmp[t-1]);
1078 utmp[t] = ymd_basic_from_epoch_day(dt+1, 0, &ed_err);
1079 } else {
1080 utmp[t] = NADBL;
1081 }
1082 }
1083 memcpy(vtmp + j*newT, utmp, usz);
1084 }
1085 free(dset->Z[i]);
1086 dset->Z[i] = vtmp;
1087 }
1088
1089 if (S != NULL) {
1090 int k = 0;
1091
1092 s = 0;
1093 for (j=0; j<n_units; j++) {
1094 for (t=0; t<newT; t++) {
1095 if (t < oldT) {
1096 strcpy(S[k], dset->S[s++]);
1097 } else {
1098 sprintf(S[k], "%d:%d", j+1, t+1);
1099 }
1100 k++;
1101 }
1102 }
1103 strings_array_free(dset->S, oldn);
1104 dset->S = S;
1105 S = NULL;
1106 }
1107
1108 if (dset->t2 == dset->n - 1) {
1109 dset->t2 = bign - 1;
1110 }
1111
1112 dataset_set_nobs(dset, bign);
1113 dset->pd = newT;
1114 ntolabel(dset->endobs, bign - 1, dset);
1115
1116 bailout:
1117
1118 free(utmp);
1119 if (S != NULL) {
1120 strings_array_free(S, bign);
1121 }
1122
1123 return err;
1124 }
1125
1126 /**
1127 * dataset_add_observations:
1128 * @dset: pointer to dataset.
1129 * @n: number of observations to add.
1130 * @opt: use OPT_A to attempt to recognize and
1131 * automatically extend simple deterministic variables such
1132 * as a time trend and periodic dummy variables;
1133 * use OPT_D to drop any observation markers rather than
1134 * expanding the set of markers and padding it out with
1135 * dummy values; use OPT_T to extend in the time dimension
1136 * in the case of panel data.
1137 *
1138 * Extends all series in the dataset by the specified number of
1139 * extra observations. The added values are initialized to
1140 * the missing value code, #NADBL, with the exception of
1141 * simple deterministic variables when OPT_A is given.
1142 *
1143 * Returns: 0 on success, non-zero code on error.
1144 */
1145
dataset_add_observations(DATASET * dset,int n,gretlopt opt)1146 int dataset_add_observations (DATASET *dset, int n, gretlopt opt)
1147 {
1148 if (opt & OPT_T) {
1149 return panel_dataset_extend_time(dset, n);
1150 } else {
1151 return real_dataset_add_observations(dset, n, opt);
1152 }
1153 }
1154
real_insert_observation(int pos,DATASET * dset)1155 static int real_insert_observation (int pos, DATASET *dset)
1156 {
1157 double *x;
1158 int n = dset->n + 1;
1159 int i, t;
1160 int err = 0;
1161
1162 for (i=0; i<dset->v; i++) {
1163 x = realloc(dset->Z[i], n * sizeof *x);
1164 if (x == NULL) {
1165 return E_ALLOC;
1166 }
1167 dset->Z[i] = x;
1168 for (t=dset->n; t>pos; t--) {
1169 dset->Z[i][t] = dset->Z[i][t-1];
1170 }
1171 dset->Z[i][pos] = (i == 0)? 1.0 : NADBL;
1172 }
1173
1174 if (dataset_has_markers(dset)) {
1175 if (reallocate_markers(dset, n)) {
1176 return E_ALLOC;
1177 }
1178 for (t=dset->n; t>pos; t--) {
1179 strcpy(dset->S[t], dset->S[t-1]);
1180 }
1181 sprintf(dset->S[pos], "%d", pos + 1);
1182 }
1183
1184 if (dset->t2 == dset->n - 1) {
1185 dset->t2 = n - 1;
1186 }
1187
1188 dataset_set_nobs(dset, n);
1189 ntolabel(dset->endobs, n - 1, dset);
1190
1191 return err;
1192 }
1193
1194 /**
1195 * dataset_drop_observations:
1196 * @dset: pointer to dataset.
1197 * @n: number of observations to drop.
1198 *
1199 * Deletes @n observations from the end of each series in the
1200 * dataset.
1201 *
1202 * Returns: 0 on success, non-zero code on error.
1203 */
1204
dataset_drop_observations(DATASET * dset,int n)1205 int dataset_drop_observations (DATASET *dset, int n)
1206 {
1207 double *x;
1208 int i, newn;
1209
1210 if (dset_zcols_borrowed(dset)) {
1211 fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1212 return E_DATA;
1213 }
1214
1215 if (n <= 0) {
1216 return 0;
1217 }
1218
1219 if (dataset_is_panel(dset) && n % dset->pd != 0) {
1220 return E_PDWRONG;
1221 }
1222
1223 newn = dset->n - n;
1224
1225 if (newn == 0) {
1226 free_Z(dset);
1227 clear_datainfo(dset, CLEAR_FULL);
1228 return 0;
1229 }
1230
1231 for (i=0; i<dset->v; i++) {
1232 x = realloc(dset->Z[i], newn * sizeof *x);
1233 if (x == NULL) {
1234 return E_ALLOC;
1235 }
1236 dset->Z[i] = x;
1237 }
1238
1239 if (dataset_has_markers(dset)) {
1240 if (reallocate_markers(dset, newn)) {
1241 return E_ALLOC;
1242 }
1243 }
1244
1245 if (dset->t2 > newn - 1) {
1246 dset->t2 = newn - 1;
1247 }
1248
1249 dataset_set_nobs(dset, newn);
1250
1251 /* does daily data need special handling? */
1252 ntolabel(dset->endobs, newn - 1, dset);
1253
1254 return 0;
1255 }
1256
1257 /**
1258 * dataset_shrink_obs_range:
1259 * @dset: pointer to dataset.
1260 *
1261 * Truncates the range of observations in the dataset, based on
1262 * the current values of the t1 and t2 members of @dset.
1263 *
1264 * Returns: 0 on success, non-zero code on error.
1265 */
1266
dataset_shrink_obs_range(DATASET * dset)1267 int dataset_shrink_obs_range (DATASET *dset)
1268 {
1269 int offset = dset->t1;
1270 int newn = dset->t2 - dset->t1 + 1;
1271 int tail = dset->n - newn;
1272 int err = 0;
1273
1274 if (dset_zcols_borrowed(dset)) {
1275 fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1276 return E_DATA;
1277 }
1278
1279 if (offset > 0) {
1280 /* If the revised dataset starts at an offset into
1281 the original, shift each series back to the start of
1282 its Z[i] array.
1283 */
1284 int i, mvsize;
1285
1286 mvsize = newn * sizeof **dset->Z;
1287 for (i=0; i<dset->v; i++) {
1288 memmove(dset->Z[i], dset->Z[i] + offset, mvsize);
1289 }
1290
1291 if (dataset_has_markers(dset)) {
1292 for (i=0; i<offset; i++) {
1293 free(dset->S[i]);
1294 }
1295 mvsize = newn * sizeof *dset->S;
1296 memmove(dset->S, dset->S + offset, mvsize);
1297 }
1298
1299 if (dset->structure == CROSS_SECTION) {
1300 ntolabel(dset->stobs, 0, dset);
1301 } else {
1302 /* FIXME panel? */
1303 ntolabel(dset->stobs, dset->t1, dset);
1304 dset->sd0 = get_date_x(dset->pd, dset->stobs);
1305 }
1306
1307 dset->t1 = 0;
1308 }
1309
1310 err = dataset_drop_observations(dset, tail);
1311
1312 return err;
1313 }
1314
1315 static int
dataset_expand_varinfo(int v0,int newvars,DATASET * dset)1316 dataset_expand_varinfo (int v0, int newvars, DATASET *dset)
1317 {
1318 char **varname = NULL;
1319 VARINFO **varinfo = NULL;
1320 int bigv = v0 + newvars;
1321 int i, v, err = 0;
1322
1323 varname = realloc(dset->varname, bigv * sizeof *varname);
1324 if (varname == NULL) {
1325 err = E_ALLOC;
1326 } else {
1327 dset->varname = varname;
1328 }
1329
1330 for (i=0; i<newvars && !err; i++) {
1331 v = v0 + i;
1332 dset->varname[v] = malloc(VNAMELEN);
1333 if (dset->varname[v] == NULL) {
1334 err = E_ALLOC;
1335 } else {
1336 dset->varname[v][0] = '\0';
1337 }
1338 }
1339
1340 if (!err && dset->varinfo != NULL) {
1341 varinfo = realloc(dset->varinfo, bigv * sizeof *varinfo);
1342 if (varinfo == NULL) {
1343 err = E_ALLOC;
1344 } else {
1345 dset->varinfo = varinfo;
1346 }
1347 for (i=0; i<newvars && !err; i++) {
1348 v = v0 + i;
1349 dset->varinfo[v] = malloc(sizeof **varinfo);
1350 if (dset->varinfo[v] == NULL) {
1351 err = E_ALLOC;
1352 } else {
1353 gretl_varinfo_init(dset->varinfo[v]);
1354 }
1355 }
1356 }
1357
1358 if (!err) {
1359 sync_dataset_shared_members(dset);
1360 }
1361
1362 return err;
1363 }
1364
1365 /* note: values of series newly added here are left uninitialized:
1366 that is the responsibility of the caller
1367 */
1368
real_add_series(int newvars,double * x,DATASET * dset)1369 static int real_add_series (int newvars, double *x,
1370 DATASET *dset)
1371 {
1372 double **newZ;
1373 int v0 = dset->v;
1374 int i, err = 0;
1375
1376 if (newvars == 0) {
1377 /* no-op */
1378 return 0;
1379 }
1380
1381 newZ = realloc(dset->Z, (v0 + newvars) * sizeof *newZ);
1382
1383 #if DDEBUG
1384 fprintf(stderr, "real_add_series: add %d vars, Z = %p\n",
1385 newvars, (void *) newZ);
1386 #endif
1387
1388 if (newZ == NULL) {
1389 err = E_ALLOC;
1390 } else {
1391 dset->Z = newZ;
1392 }
1393
1394 if (!err) {
1395 if (newvars == 1 && x != NULL) {
1396 /* a single new var, storage pre-allocated */
1397 newZ[v0] = x;
1398 } else {
1399 for (i=0; i<newvars && !err; i++) {
1400 newZ[v0+i] = malloc(dset->n * sizeof **newZ);
1401 if (newZ[v0+i] == NULL) {
1402 err = E_ALLOC;
1403 }
1404 }
1405 }
1406 }
1407
1408 if (!err && dset != fetch_full_dataset()) {
1409 /* don't expand varinfo if we're adding a series
1410 to the full dataset when currently sub-sampled,
1411 since in that case varinfo is shared between
1412 the two datasets
1413 */
1414 err = dataset_expand_varinfo(v0, newvars, dset);
1415 }
1416
1417 if (!err) {
1418 dset->v += newvars;
1419 }
1420
1421 return err;
1422 }
1423
1424 /**
1425 * dataset_add_series:
1426 * @dset: pointer to dataset.
1427 * @newvars: number of series to add.
1428 *
1429 * Adds space for the specified number of additional series
1430 * in the dataset. Values are initialized to zero.
1431 *
1432 * Returns: 0 on success, E_ALLOC on error.
1433 */
1434
dataset_add_series(DATASET * dset,int newvars)1435 int dataset_add_series (DATASET *dset, int newvars)
1436 {
1437 int v0 = dset->v;
1438 int err;
1439
1440 if (dset_zcols_borrowed(dset)) {
1441 fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1442 return E_DATA;
1443 }
1444
1445 err = real_add_series(newvars, NULL, dset);
1446
1447 if (!err) {
1448 int i, v, t;
1449
1450 for (i=0; i<newvars; i++) {
1451 v = v0 + i;
1452 for (t=0; t<dset->n; t++) {
1453 dset->Z[v][t] = 0.0;
1454 }
1455 }
1456 }
1457
1458 return err;
1459 }
1460
1461 /**
1462 * dataset_add_NA_series:
1463 * @dset: pointer to dataset.
1464 * @newvars: number of series to add.
1465 *
1466 * Adds space for the specified number of additional series
1467 * in the dataset. Values are initialized to NA.
1468 *
1469 * Returns: 0 on success, E_ALLOC on error.
1470 */
1471
dataset_add_NA_series(DATASET * dset,int newvars)1472 int dataset_add_NA_series (DATASET *dset, int newvars)
1473 {
1474 int v0 = dset->v;
1475 int err;
1476
1477 if (dset_zcols_borrowed(dset)) {
1478 fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1479 return E_DATA;
1480 }
1481
1482 err = real_add_series(newvars, NULL, dset);
1483
1484 if (!err) {
1485 int i, v, t;
1486
1487 for (i=0; i<newvars; i++) {
1488 v = v0 + i;
1489 for (t=0; t<dset->n; t++) {
1490 dset->Z[v][t] = NADBL;
1491 }
1492 }
1493 }
1494
1495 return err;
1496 }
1497
1498 /**
1499 * dataset_add_allocated_series:
1500 * @dset: pointer to dataset.
1501 * @x: one-dimensional data array.
1502 *
1503 * Adds @x as an additional series in the dataset.
1504 * The array @x is not copied; it should be treated as
1505 * belonging to @dset after this operation.
1506 *
1507 * Returns: 0 on success, E_ALLOC on error.
1508 */
1509
dataset_add_allocated_series(DATASET * dset,double * x)1510 int dataset_add_allocated_series (DATASET *dset, double *x)
1511 {
1512 if (dset_zcols_borrowed(dset)) {
1513 fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1514 return E_DATA;
1515 } else {
1516 return real_add_series(1, x, dset);
1517 }
1518 }
1519
1520 /**
1521 * dataset_add_series_as:
1522 * @dset: pointer to dataset.
1523 * @x: array to be added.
1524 * @name: name to give the new variable.
1525 *
1526 * Adds to the dataset a new series with name @name and
1527 * values given by @x. The new variable is added at one
1528 * level "deeper" (in terms of function execution) than the
1529 * current level. This is for use with user-defined functions.
1530 *
1531 * Returns: 0 on success, E_ALLOC on error.
1532 */
1533
dataset_add_series_as(DATASET * dset,double * x,const char * name)1534 int dataset_add_series_as (DATASET *dset, double *x, const char *name)
1535 {
1536 int v, t, err = 0;
1537
1538 if (dset_zcols_borrowed(dset)) {
1539 fprintf(stderr, "*** Internal error: modifying borrowed data\n");
1540 return E_DATA;
1541 }
1542
1543 if (dset->varinfo == NULL) {
1544 gretl_errmsg_set(_("Please open a data file first"));
1545 return 1;
1546 }
1547
1548 #if DDEBUG
1549 fprintf(stderr, "dataset_add_series_as: incoming Z=%p, name='%s'\n",
1550 (void *) dset->Z, name);
1551 #endif
1552
1553 err = real_add_series(1, NULL, dset);
1554
1555 if (!err) {
1556 v = dset->v - 1;
1557 for (t=0; t<dset->n; t++) {
1558 dset->Z[v][t] = x[t];
1559 }
1560 strcpy(dset->varname[v], name);
1561 dset->varinfo[v]->stack_level += 1;
1562 }
1563
1564 return err;
1565 }
1566
1567 /**
1568 * dataset_copy_series_as:
1569 * @dset: pointer to dataset.
1570 * @v: index number of variable to copy.
1571 * @name: name to give the copy.
1572 *
1573 * Makes a copy of series @v under the name @name.
1574 * The copy exists in a variable namespace one level "deeper"
1575 * (in terms of function execution) than the variable being copied.
1576 * This is for use with user-defined functions: a variable
1577 * supplied to a function as an argument is copied into the
1578 * function's namespace under the name it was given as a
1579 * parameter.
1580 *
1581 * Returns: 0 on success, E_ALLOC on error.
1582 */
1583
dataset_copy_series_as(DATASET * dset,int v,const char * name)1584 int dataset_copy_series_as (DATASET *dset, int v, const char *name)
1585 {
1586 int t, err;
1587
1588 err = real_add_series(1, NULL, dset);
1589
1590 if (!err) {
1591 int vnew = dset->v - 1;
1592
1593 for (t=0; t<dset->n; t++) {
1594 dset->Z[vnew][t] = dset->Z[v][t];
1595 }
1596 strcpy(dset->varname[vnew], name);
1597 copy_varinfo(dset->varinfo[vnew], dset->varinfo[v]);
1598 if (dset->varinfo[v]->flags & VAR_LISTARG) {
1599 dset->varinfo[vnew]->flags &= ~VAR_LISTARG;
1600 }
1601 dset->varinfo[vnew]->stack_level = gretl_function_depth() + 1;
1602 #if 0
1603 fprintf(stderr, "copied var %d ('%s', level %d) as var %d ('%s', level %d): ",
1604 v, dset->varname[v], dset->varinfo[v]->stack_level,
1605 vnew, name, dset->varinfo[vnew]->stack_level);
1606 fprintf(stderr, "Z[%d][0] = %g\n", vnew, dset->Z[vnew][0]);
1607 #endif
1608 }
1609
1610 return err;
1611 }
1612
1613 enum {
1614 DROP_NORMAL,
1615 DROP_SPECIAL
1616 };
1617
1618 /* DROP_SPECIAL is used when deleting variables from the "full" shadow
1619 of a sub-sampled dataset, after deleting those same variables from
1620 the sub-sampled version: in that case we don't mess with the
1621 pointer elements of the DATASET struct, because these are shared
1622 between the full and sub-sampled versions.
1623 */
1624
shrink_dataset_to_size(DATASET * dset,int nv,int drop)1625 static int shrink_dataset_to_size (DATASET *dset, int nv, int drop)
1626 {
1627 double **newZ;
1628
1629 #if DDEBUG
1630 fprintf(stderr, "shrink_dataset_to_size: dset at %p, dset->v=%d, nv=%d\n"
1631 " drop = %s\n", (void *) dset, dset->v, nv,
1632 (drop == DROP_NORMAL)? "DROP_NORMAL" : "DROP_SPECIAL");
1633 #endif
1634
1635 if (drop == DROP_NORMAL) {
1636 char **varname = realloc(dset->varname, nv * sizeof *varname);
1637 VARINFO **varinfo = realloc(dset->varinfo, nv * sizeof *varinfo);
1638
1639 if (varname == NULL || varinfo == NULL) {
1640 free(varname);
1641 free(varinfo);
1642 return E_ALLOC;
1643 }
1644
1645 dset->varname = varname;
1646 dset->varinfo = varinfo;
1647 sync_dataset_shared_members(dset);
1648 }
1649
1650 newZ = realloc(dset->Z, nv * sizeof *newZ);
1651 if (newZ == NULL) {
1652 return E_ALLOC;
1653 }
1654
1655 dset->Z = newZ;
1656 dset->v = nv;
1657
1658 return 0;
1659 }
1660
vars_renumbered(const int * list,DATASET * dset,int dmin)1661 static int vars_renumbered (const int *list, DATASET *dset,
1662 int dmin)
1663 {
1664 int i, ndel = 0;
1665
1666 for (i=dmin; i<dset->v; i++) {
1667 if (in_gretl_list(list, i)) {
1668 ndel++;
1669 } else if (ndel > 0 && !series_is_hidden(dset, i)) {
1670 return 1;
1671 }
1672 }
1673
1674 return 0;
1675 }
1676
overwrite_err(const char * name)1677 int overwrite_err (const char *name)
1678 {
1679 gretl_errmsg_sprintf(_("The variable %s is read-only"), name);
1680
1681 return E_DATA;
1682 }
1683
1684 /**
1685 * series_is_parent:
1686 * @dset: dataset information.
1687 * @v: ID number of series to test.
1688 *
1689 * Returns: 1 if variable @v is "parent" to a transformed
1690 * variable (e.g. a log, lag or difference), othewise 0.
1691 */
1692
series_is_parent(const DATASET * dset,int v)1693 int series_is_parent (const DATASET *dset, int v)
1694 {
1695 const char *s = dset->varname[v];
1696 int i;
1697
1698 if (*s == '\0') {
1699 return 0;
1700 }
1701
1702 for (i=1; i<dset->v; i++) {
1703 if (i != v && !strcmp(s, dset->varinfo[i]->parent)) {
1704 return 1;
1705 }
1706 }
1707
1708 return 0;
1709 }
1710
1711 /**
1712 * dataset_rename_series:
1713 * @dset: dataset information.
1714 * @v: ID number of the series to be renamed.
1715 * @name: new name to give the series.
1716 *
1717 * Returns: 0 on success, non-zero on error.
1718 */
1719
dataset_rename_series(DATASET * dset,int v,const char * name)1720 int dataset_rename_series (DATASET *dset, int v, const char *name)
1721 {
1722 int err = 0;
1723
1724 if (v <= 0 || v >= dset->v || name == NULL) {
1725 err = E_DATA;
1726 } else {
1727 err = check_varname(name);
1728 }
1729
1730 if (!err) {
1731 GretlType type;
1732
1733 type = user_var_get_type_by_name(name);
1734 if (type != GRETL_TYPE_NONE) {
1735 gretl_errmsg_set("There is already an object of this name");
1736 err = E_DATA;
1737 }
1738 }
1739
1740 if (!err && current_series_index(dset, name) >= 0) {
1741 gretl_errmsg_set("There is already a series of this name");
1742 err = E_DATA;
1743 }
1744
1745 if (!err && (object_is_const(dset->varname[v], v) ||
1746 series_is_parent(dset, v))) {
1747 err = overwrite_err(dset->varname[v]);
1748 }
1749
1750 if (!err && strcmp(dset->varname[v], name)) {
1751 dset->varname[v][0] = '\0';
1752 strncat(dset->varname[v], name, VNAMELEN-1);
1753 set_dataset_is_changed(dset, 1);
1754 }
1755
1756 return err;
1757 }
1758
1759 /**
1760 * dataset_replace_series:
1761 * @dset: pointer to dataset.
1762 * @v: ID number of the series to be replaced.
1763 * @x: replacement values.
1764 * @descrip: replacement description.
1765 * @flag: if = DS_GRAB_VALUES then replace dset->Z[@v]
1766 * with @x, otherwise copy the values in @x to dset->Z[@v].
1767 *
1768 * Replaces the description and numerical content of
1769 * series @v with the information provided.
1770 *
1771 * Returns: 0 on success, non-zero on error.
1772 */
1773
dataset_replace_series(DATASET * dset,int v,double * x,const char * descrip,DataCopyFlag flag)1774 int dataset_replace_series (DATASET *dset, int v,
1775 double *x, const char *descrip,
1776 DataCopyFlag flag)
1777 {
1778 if (v < 0 || v >= dset->v) {
1779 /* out of bounds */
1780 return E_DATA;
1781 }
1782
1783 if (object_is_const(dset->varname[v], v) ||
1784 series_is_parent(dset, v)) {
1785 return overwrite_err(dset->varname[v]);
1786 }
1787
1788 gretl_varinfo_init(dset->varinfo[v]);
1789 series_set_label(dset, v, descrip);
1790
1791 if (flag == DS_GRAB_VALUES) {
1792 free(dset->Z[v]);
1793 dset->Z[v] = x;
1794 } else {
1795 int t;
1796
1797 for (t=0; t<dset->n; t++) {
1798 dset->Z[v][t] = x[t];
1799 }
1800 }
1801
1802 set_dataset_is_changed(dset, 1);
1803
1804 return 0;
1805 }
1806
1807 /**
1808 * dataset_replace_series_data:
1809 * @dset: pointer to dataset.
1810 * @v: ID number of the series to be modified.
1811 * @x: replacement values.
1812 * @t1: start of sample range.
1813 * @t2: end of sample range.
1814 * @descrip: replacement description.
1815 *
1816 * Replaces the description and numerical content of
1817 * series @v over the given sample range, with the
1818 * information provided.
1819 *
1820 * Returns: 0 on success, non-zero on error.
1821 */
1822
dataset_replace_series_data(DATASET * dset,int v,const double * x,int t1,int t2,const char * descrip)1823 int dataset_replace_series_data (DATASET *dset, int v,
1824 const double *x,
1825 int t1, int t2,
1826 const char *descrip)
1827 {
1828 int t, s;
1829
1830 if (v < 0 || v >= dset->v) {
1831 /* out of bounds */
1832 return E_DATA;
1833 }
1834
1835 if (object_is_const(dset->varname[v], v) ||
1836 series_is_parent(dset, v)) {
1837 return overwrite_err(dset->varname[v]);
1838 }
1839
1840 gretl_varinfo_init(dset->varinfo[v]);
1841 series_set_label(dset, v, descrip);
1842
1843 s = 0;
1844 for (t=t1; t<=t2; t++) {
1845 dset->Z[v][t] = x[s++];
1846 }
1847
1848 set_dataset_is_changed(dset, 1);
1849
1850 return 0;
1851 }
1852
real_drop_listed_vars(int * list,DATASET * dset,int * renumber,int drop,PRN * prn)1853 static int real_drop_listed_vars (int *list, DATASET *dset,
1854 int *renumber, int drop,
1855 PRN *prn)
1856 {
1857 int oldv = dset->v, vmax = dset->v;
1858 char vname[VNAMELEN] = {0};
1859 int d0, d1;
1860 int delmin = oldv;
1861 int i, v, ndel = 0;
1862 int err = 0;
1863
1864 if (renumber != NULL) {
1865 *renumber = 0;
1866 }
1867
1868 if (list == NULL || list[0] == 0) {
1869 /* no-op */
1870 return 0;
1871 }
1872
1873 d0 = list[0];
1874
1875 check_variable_deletion_list(list, dset);
1876 d1 = list[0];
1877 if (prn != NULL && d1 == 1) {
1878 strcpy(vname, dset->varname[list[1]]);
1879 }
1880
1881 if (d1 == 0) {
1882 goto finish;
1883 }
1884
1885 #if DDEBUG
1886 fprintf(stderr, "real_drop_listed_variables: ");
1887 if (d1 == 1) {
1888 fprintf(stderr, "dropping var %d:\n", list[1]);
1889 } else {
1890 fprintf(stderr, "dropping %d vars:\n", d1);
1891 }
1892 fprintf(stderr, "memo: dset=%p, dset->varname=%p\n",
1893 (void *) dset, (void *) dset->varname);
1894 #endif
1895
1896 /* check that no vars to be deleted are marked "const", and do
1897 some preliminary accounting while we're at it
1898 */
1899 for (i=1; i<=list[0]; i++) {
1900 v = list[i];
1901 if (v > 0 && v < oldv) {
1902 if (object_is_const(dset->varname[v], v)) {
1903 return overwrite_err(dset->varname[v]);
1904 }
1905 if (v < delmin) {
1906 delmin = v;
1907 }
1908 ndel++;
1909 }
1910 }
1911
1912 if (ndel == 0) {
1913 return 0;
1914 }
1915
1916 if (renumber != NULL) {
1917 *renumber = vars_renumbered(list, dset, delmin);
1918 }
1919
1920 #if DDEBUG
1921 fprintf(stderr, "real_drop_listed_variables: lowest ID of deleted var"
1922 " = %d\n", delmin);
1923 #endif
1924
1925 /* free and set to NULL all the vars to be deleted */
1926 for (i=1; i<=list[0]; i++) {
1927 v = list[i];
1928 if (v > 0 && v < oldv) {
1929 free(dset->Z[v]);
1930 dset->Z[v] = NULL;
1931 if (drop == DROP_NORMAL) {
1932 free(dset->varname[v]);
1933 free_varinfo(dset, v);
1934 }
1935 }
1936 }
1937
1938 /* repack pointers if necessary */
1939
1940 for (v=1; v<vmax; v++) {
1941 if (dset->Z[v] == NULL) {
1942 int gap = 1;
1943
1944 for (i=v+1; i<vmax; i++) {
1945 if (dset->Z[i] == NULL) {
1946 gap++;
1947 } else {
1948 break;
1949 }
1950 }
1951
1952 if (i < vmax) {
1953 vmax -= gap;
1954 for (i=v; i<vmax; i++) {
1955 if (drop == DROP_NORMAL) {
1956 dset->varname[i] = dset->varname[i + gap];
1957 dset->varinfo[i] = dset->varinfo[i + gap];
1958 }
1959 dset->Z[i] = dset->Z[i + gap];
1960 }
1961 } else {
1962 /* deleting all subsequent vars: done */
1963 break;
1964 }
1965 }
1966 }
1967
1968 err = shrink_dataset_to_size(dset, oldv - ndel, drop);
1969
1970 finish:
1971
1972 /* report results, if appropriate */
1973
1974 if (!err && prn != NULL) {
1975 if (d0 == d1) {
1976 if (gretl_messages_on()) {
1977 if (*vname != '\0') {
1978 pprintf(prn, _("Deleted %s"), vname);
1979 } else {
1980 pprintf(prn, _("Deleted %d variables"), d1);
1981 }
1982 pputc(prn, '\n');
1983 }
1984 } else {
1985 if (d1 == 0) {
1986 pputs(prn, _("No variables were deleted"));
1987 } else if (*vname != '\0') {
1988 pprintf(prn, _("Deleted %s"), vname);
1989 } else {
1990 pprintf(prn, _("Deleted %d variables"), d1);
1991 }
1992 pprintf(prn, " (%s)\n", _("some data were in use"));
1993 }
1994 }
1995
1996 return err;
1997 }
1998
make_dollar_list(DATASET * dset,int * err)1999 static int *make_dollar_list (DATASET *dset, int *err)
2000 {
2001 int *list = NULL;
2002 int i;
2003
2004 for (i=1; i<dset->v; i++) {
2005 if (dset->varname[i][0] == '$') {
2006 list = gretl_list_append_term(&list, i);
2007 if (list == NULL) {
2008 *err = E_ALLOC;
2009 break;
2010 }
2011 }
2012 }
2013
2014 return list;
2015 }
2016
2017 /**
2018 * dataset_drop_listed_variables:
2019 * @list: list of series to drop, by ID number.
2020 * @dset: pointer to dataset.
2021 * @renumber: location for return of information on whether
2022 * remaining variables have been renumbered as a result, or
2023 * NULL.
2024 * @prn: pointer to printing struct.
2025 *
2026 * Deletes the series given in @list from the dataset. Remaining
2027 * series may have their ID numbers changed as a consequence. If
2028 * @renumber is not NULL, this location receives 1 in case series
2029 * have been renumbered, 0 otherwise.
2030 *
2031 * Returns: 0 on success, E_ALLOC on error.
2032 */
2033
dataset_drop_listed_variables(int * list,DATASET * dset,int * renumber,PRN * prn)2034 int dataset_drop_listed_variables (int *list,
2035 DATASET *dset,
2036 int *renumber,
2037 PRN *prn)
2038 {
2039 int oldv = dset->v;
2040 int *dlist = NULL;
2041 int dupv, free_dlist = 0;
2042 int err = 0;
2043
2044 if (dset->n == 0 || dset->v == 0) {
2045 return E_NODATA;
2046 }
2047
2048 if (dset_zcols_borrowed(dset)) {
2049 fprintf(stderr, "*** Internal error: modifying borrowed data\n");
2050 return E_DATA;
2051 }
2052
2053 if (list == NULL) {
2054 /* signal to drop internal "$" variables */
2055 dlist = make_dollar_list(dset, &err);
2056 if (err) {
2057 return err;
2058 } else if (dlist == NULL) {
2059 /* no-op */
2060 return 0;
2061 }
2062 free_dlist = 1;
2063 } else if (list[0] == 0) {
2064 /* no-op */
2065 return 0;
2066 } else {
2067 dlist = list;
2068 }
2069
2070 dupv = gretl_list_duplicates(dlist, DELEET);
2071 if (dupv >= 0) {
2072 gretl_errmsg_sprintf(_("variable %d duplicated in the "
2073 "command list."), dupv);
2074 return E_DATA;
2075 }
2076
2077 err = real_drop_listed_vars(dlist, dset, renumber,
2078 DROP_NORMAL, prn);
2079
2080 if (dlist[0] > 0) {
2081 if (!err && !dset->auxiliary) {
2082 err = gretl_lists_revise(dlist, 0);
2083 }
2084 if (!err && complex_subsampled()) {
2085 DATASET *fdset = fetch_full_dataset();
2086
2087 err = real_drop_listed_vars(dlist, fdset, NULL,
2088 DROP_SPECIAL, NULL);
2089 }
2090 }
2091
2092 if (free_dlist) {
2093 free(dlist);
2094 } else if (dset->v != oldv) {
2095 set_dataset_is_changed(dset, 1);
2096 }
2097
2098 return err;
2099 }
2100
2101 /**
2102 * dataset_drop_variable:
2103 * @v: ID number of variable to drop.
2104 * @dset: pointer to dataset.
2105 *
2106 * Deletes variable @v from the dataset.
2107 *
2108 * Returns: 0 on success, E_ALLOC on error.
2109 */
2110
dataset_drop_variable(int v,DATASET * dset)2111 int dataset_drop_variable (int v, DATASET *dset)
2112 {
2113 int list[2] = {1, v};
2114
2115 if (v <= 0 || v >= dset->v) {
2116 return E_DATA;
2117 }
2118
2119 if (dset_zcols_borrowed(dset)) {
2120 fprintf(stderr, "*** Internal error: modifying borrowed data\n");
2121 return E_DATA;
2122 }
2123
2124 return dataset_drop_listed_variables(list, dset, NULL, NULL);
2125 }
2126
2127 /**
2128 * dataset_renumber_variable:
2129 * @v_old: original ID number of variable.
2130 * @v_new: new ID number.
2131 * @dset: dataset information.
2132 *
2133 * Moves the variable that was originally at position @v_old
2134 * in the datset to position @v_new, renumbering other
2135 * variables as required.
2136 *
2137 * Returns: 0 on success, error code on error;
2138 */
2139
dataset_renumber_variable(int v_old,int v_new,DATASET * dset)2140 int dataset_renumber_variable (int v_old, int v_new,
2141 DATASET *dset)
2142 {
2143 double *x;
2144 VARINFO *vinfo;
2145 char vname[VNAMELEN];
2146 int i;
2147
2148 if (complex_subsampled()) {
2149 /* too tricky */
2150 gretl_errmsg_set(_("dataset is subsampled"));
2151 return E_DATA;
2152 }
2153
2154 if (dset_zcols_borrowed(dset)) {
2155 fprintf(stderr, "*** Internal error: modifying borrowed data\n");
2156 return E_DATA;
2157 }
2158
2159 if (v_old < 1 || v_old > dset->v - 1 ||
2160 v_new < 1 || v_new > dset->v - 1) {
2161 /* out of bounds */
2162 return E_DATA;
2163 }
2164
2165 if (v_old == v_new) {
2166 /* no-op */
2167 return 0;
2168 }
2169
2170 x = dset->Z[v_old];
2171 vinfo = dset->varinfo[v_old];
2172 strcpy(vname, dset->varname[v_old]);
2173
2174 if (v_new < v_old) {
2175 /* moving up in ordering */
2176 for (i=v_old; i>v_new; i--) {
2177 dset->Z[i] = dset->Z[i-1];
2178 strcpy(dset->varname[i], dset->varname[i-1]);
2179 dset->varinfo[i] = dset->varinfo[i-1];
2180 }
2181 } else {
2182 /* moving down in ordering */
2183 for (i=v_old; i<v_new; i++) {
2184 dset->Z[i] = dset->Z[i+1];
2185 strcpy(dset->varname[i], dset->varname[i+1]);
2186 dset->varinfo[i] = dset->varinfo[i+1];
2187 }
2188 }
2189
2190 dset->Z[v_new] = x;
2191 strcpy(dset->varname[v_new], vname);
2192 dset->varinfo[v_new] = vinfo;
2193
2194 set_dataset_is_changed(dset, 1);
2195
2196 return 0;
2197 }
2198
2199 /**
2200 * dataset_destroy_hidden_variables:
2201 * @dset: pointer to dataset.
2202 * @vmin: do not drop variables with ID numbers less than this.
2203 *
2204 * Deletes from the dataset any "hidden" variables that have
2205 * been added automatically (for example, auto-generated variables
2206 * used for the x-axis in graph plotting), and that have ID
2207 * numbers greater than or equal to @vmin. Never deletes the
2208 * automatically generated constant (ID number 0).
2209 *
2210 * Returns: 0 on success, E_ALLOC on error.
2211 */
2212
dataset_destroy_hidden_variables(DATASET * dset,int vmin)2213 int dataset_destroy_hidden_variables (DATASET *dset, int vmin)
2214 {
2215 int i, nhid = 0;
2216 int err = 0;
2217
2218 if (vmin <= 1) vmin = 1;
2219
2220 for (i=vmin; i<dset->v; i++) {
2221 if (series_is_hidden(dset, i)) {
2222 nhid++;
2223 }
2224 }
2225
2226 if (nhid > 0) {
2227 int *list = gretl_list_new(nhid);
2228
2229 if (list == NULL) {
2230 err = 1;
2231 } else {
2232 int j = 1;
2233
2234 for (i=vmin; i<dset->v; i++) {
2235 if (series_is_hidden(dset, i)) {
2236 list[j++] = i;
2237 }
2238 }
2239 err = dataset_drop_listed_variables(list, dset, NULL, NULL);
2240 free(list);
2241 }
2242 }
2243
2244 return err;
2245 }
2246
dataset_set_matrix_name(DATASET * dset,const char * name)2247 int dataset_set_matrix_name (DATASET *dset, const char *name)
2248 {
2249 int err = 0;
2250
2251 if (dset->descrip != NULL) {
2252 free(dset->descrip);
2253 dset->descrip = NULL;
2254 }
2255
2256 if (name != NULL && *name != '\0') {
2257 dset->descrip = malloc(strlen(name) + 8);
2258 if (dset->descrip == NULL) {
2259 err = E_ALLOC;
2260 } else {
2261 sprintf(dset->descrip, "matrix:%s", name);
2262 }
2263 }
2264
2265 return err;
2266 }
2267
dataset_get_matrix_name(const DATASET * dset)2268 const char *dataset_get_matrix_name (const DATASET *dset)
2269 {
2270 if (dset->descrip != NULL &&
2271 strlen(dset->descrip) > 7 &&
2272 !strncmp(dset->descrip, "matrix:", 7)) {
2273 return dset->descrip + 7;
2274 } else {
2275 return NULL;
2276 }
2277 }
2278
dataset_get_mapfile(const DATASET * dset)2279 const char *dataset_get_mapfile (const DATASET *dset)
2280 {
2281 return dset == NULL ? NULL : dset->mapfile;
2282 }
2283
dataset_set_mapfile(DATASET * dset,const char * fname)2284 void dataset_set_mapfile (DATASET *dset, const char *fname)
2285 {
2286 if (dset != NULL) {
2287 free(dset->mapfile);
2288 if (fname != NULL) {
2289 dset->mapfile = gretl_strdup(fname);
2290 } else {
2291 dset->mapfile = NULL;
2292 }
2293 }
2294 }
2295
dataset_period_label(const DATASET * dset)2296 const char *dataset_period_label (const DATASET *dset)
2297 {
2298 if (dset == NULL) {
2299 return _("periods");
2300 } else if (quarterly_or_monthly(dset)) {
2301 return dset->pd == 4 ? _("quarters") : _("months");
2302 } else if (annual_data(dset)) {
2303 return _("years");
2304 } else if (dataset_is_weekly(dset)) {
2305 return _("weeks");
2306 } else if (dataset_is_daily(dset)) {
2307 return _("days");
2308 } else if (dataset_is_hourly(dset)) {
2309 return _("hours");
2310 } else {
2311 return _("periods");
2312 }
2313 }
2314
2315 /* intended for use with newly imported data: trash any
2316 series that contain nothing but NAs
2317 */
2318
maybe_prune_dataset(DATASET ** pdset,gretl_string_table * st)2319 int maybe_prune_dataset (DATASET **pdset, gretl_string_table *st)
2320 {
2321 DATASET *dset = *pdset;
2322 int allmiss, prune = 0, err = 0;
2323 int i, t;
2324
2325 for (i=1; i<dset->v; i++) {
2326 allmiss = 1;
2327 for (t=0; t<dset->n; t++) {
2328 if (!na(dset->Z[i][t])) {
2329 allmiss = 0;
2330 break;
2331 }
2332 }
2333 if (allmiss) {
2334 prune = 1;
2335 break;
2336 }
2337 }
2338
2339 if (prune) {
2340 char *mask = calloc(dset->v, 1);
2341 DATASET *newset = NULL;
2342 int ndrop = 0;
2343
2344 if (mask == NULL) {
2345 return E_ALLOC;
2346 }
2347
2348 for (i=1; i<dset->v; i++) {
2349 allmiss = 1;
2350 for (t=0; t<dset->n; t++) {
2351 if (!na(dset->Z[i][t])) {
2352 allmiss = 0;
2353 break;
2354 }
2355 }
2356 if (allmiss) {
2357 mask[i] = 1;
2358 ndrop++;
2359 }
2360 }
2361
2362 newset = datainfo_new();
2363 if (newset == NULL) {
2364 err = E_ALLOC;
2365 } else {
2366 newset->v = dset->v - ndrop;
2367 newset->n = dset->n;
2368 err = start_new_Z(newset, 0);
2369 }
2370
2371 if (!err) {
2372 size_t ssize = dset->n * sizeof **newset->Z;
2373 int k = 1;
2374
2375 for (i=1; i<dset->v; i++) {
2376 if (!mask[i]) {
2377 memcpy(newset->Z[k], dset->Z[i], ssize);
2378 strcpy(newset->varname[k], dset->varname[i]);
2379 copy_label(newset->varinfo[k], dset->varinfo[i]->label);
2380 if (st != NULL && k < i) {
2381 gretl_string_table_reset_column_id(st, i, k);
2382 }
2383 k++;
2384 }
2385 }
2386
2387 destroy_dataset(dset);
2388 *pdset = newset;
2389
2390 fprintf(stderr, "pruned dataset to %d variables\n", newset->v);
2391 }
2392
2393 free(mask);
2394 }
2395
2396 return err;
2397 }
2398
2399 /* apparatus for sorting entire dataset */
2400
2401 typedef struct spoints_t_ spoints_t;
2402 typedef struct spoint_t_ spoint_t;
2403
2404 /* these structs will be passed to qsort() */
2405 struct spoint_t_ {
2406 int obsnum;
2407 int nvals;
2408 double *vals;
2409 };
2410
2411 /* wrapper for spoint_t to economize on malloc/free */
2412 struct spoints_t_ {
2413 int n_points;
2414 int n_vals;
2415 spoint_t *points;
2416 double *val;
2417 };
2418
free_spoints(spoints_t * sv)2419 static void free_spoints (spoints_t *sv)
2420 {
2421 free(sv->points);
2422 if (sv->n_vals > 1) {
2423 /* if n_vals == 1, @val is borrowed */
2424 free(sv->val);
2425 }
2426 free(sv);
2427 }
2428
allocate_spoints(DATASET * dset,const int * list)2429 static spoints_t *allocate_spoints (DATASET *dset,
2430 const int *list)
2431 {
2432 spoints_t *sv;
2433 int n = dset->n;
2434 int v = list[0];
2435
2436 sv = malloc(sizeof *sv);
2437
2438 if (sv != NULL) {
2439 sv->n_vals = v;
2440 sv->n_points = n;
2441 sv->points = malloc(n * sizeof *sv->points);
2442 if (v == 1) {
2443 /* can point directly at data array */
2444 sv->val = dset->Z[list[1]];
2445 } else {
2446 /* need to allocate storage */
2447 sv->val = malloc((n * v) * sizeof(double));
2448 }
2449 if (sv->points == NULL || sv->val == NULL) {
2450 free_spoints(sv);
2451 sv = NULL;
2452 } else {
2453 double *x = sv->val;
2454 int i;
2455
2456 for (i=0; i<n; i++) {
2457 sv->points[i].obsnum = i;
2458 sv->points[i].nvals = v;
2459 sv->points[i].vals = x;
2460 x += v;
2461 }
2462 }
2463 }
2464
2465 return sv;
2466 }
2467
compare_vals_up(const void * a,const void * b)2468 static int compare_vals_up (const void *a, const void *b)
2469 {
2470 const spoint_t *pa = (const spoint_t *) a;
2471 const spoint_t *pb = (const spoint_t *) b;
2472 int i, naa, nab, ret = 0;
2473
2474 for (i=0; i<pa->nvals && !ret; i++) {
2475 naa = isnan(pa->vals[i]);
2476 nab = isnan(pb->vals[i]);
2477 if (naa || nab) {
2478 if (!naa) {
2479 ret = -1;
2480 } else if (!nab) {
2481 ret = 1;
2482 }
2483 } else {
2484 ret = (pa->vals[i] > pb->vals[i]) - (pa->vals[i] < pb->vals[i]);
2485 }
2486 }
2487
2488 return ret;
2489 }
2490
compare_vals_down(const void * a,const void * b)2491 static int compare_vals_down (const void *a, const void *b)
2492 {
2493 const spoint_t *pa = (const spoint_t *) a;
2494 const spoint_t *pb = (const spoint_t *) b;
2495 int i, naa, nab, ret = 0;
2496
2497 for (i=0; i<pa->nvals && !ret; i++) {
2498 naa = isnan(pa->vals[i]);
2499 nab = isnan(pb->vals[i]);
2500 if (naa || nab) {
2501 if (!naa) {
2502 ret = 1;
2503 } else if (!nab) {
2504 ret = -1;
2505 }
2506 } else {
2507 ret = (pa->vals[i] < pb->vals[i]) - (pa->vals[i] > pb->vals[i]);
2508 }
2509 }
2510
2511 return ret;
2512 }
2513
2514 /* Turn a string-valued series into an integer-valued series
2515 representing the places of the strings in lexical order.
2516 */
2517
2518 typedef struct lexval_ {
2519 const char *s;
2520 int code;
2521 } lexval;
2522
compare_lexvals(const void * a,const void * b)2523 static int compare_lexvals (const void *a, const void *b)
2524 {
2525 const lexval *lva = (const lexval *) a;
2526 const lexval *lvb = (const lexval *) b;
2527
2528 return g_utf8_collate(lva->s, lvb->s);
2529 }
2530
series_to_lexvals(DATASET * dset,int v,int * targ)2531 static int series_to_lexvals (DATASET *dset, int v, int *targ)
2532 {
2533 int i, t, ct, n_strs;
2534 series_table *st = series_get_string_table(dset, v);
2535 char **strs = series_table_get_strings(st, &n_strs);
2536 lexval *lexvals;
2537
2538 lexvals = calloc(n_strs, sizeof *lexvals);
2539 if (lexvals == NULL) {
2540 return E_ALLOC;
2541 }
2542
2543 for (i=0; i<n_strs; i++) {
2544 lexvals[i].s = strs[i];
2545 lexvals[i].code = i+1;
2546 }
2547
2548 qsort(lexvals, n_strs, sizeof *lexvals, compare_lexvals);
2549
2550 for (t=0; t<dset->n; t++) {
2551 if (na(dset->Z[v][t])) {
2552 targ[t] = INT_MAX;
2553 } else {
2554 ct = (int) dset->Z[v][t];
2555 targ[t] = 0;
2556 for (i=0; i<n_strs; i++) {
2557 if (ct == lexvals[i].code) {
2558 targ[t] = i+1;
2559 break;
2560 }
2561 }
2562 }
2563 }
2564
2565 free(lexvals);
2566
2567 return 0;
2568 }
2569
dataset_sort_by(DATASET * dset,const int * list,gretlopt opt)2570 int dataset_sort_by (DATASET *dset, const int *list, gretlopt opt)
2571 {
2572 spoints_t *sv = NULL;
2573 double *x = NULL;
2574 char **S = NULL;
2575 int *xs = NULL;
2576 int *xsi = NULL;
2577 int ns = list[0];
2578 int nsvals = 0;
2579 int i, t, v;
2580 int err = 0;
2581
2582 sv = allocate_spoints(dset, list);
2583 if (sv == NULL) {
2584 return E_ALLOC;
2585 }
2586
2587 x = malloc(dset->n * sizeof *x);
2588 if (x == NULL) {
2589 free_spoints(sv);
2590 return E_ALLOC;
2591 }
2592
2593 if (dset->S != NULL) {
2594 S = malloc(dset->n * sizeof *S);
2595 if (S == NULL) {
2596 free_spoints(sv);
2597 free(x);
2598 return E_ALLOC;
2599 }
2600 }
2601
2602 for (i=0; i<ns; i++) {
2603 if (is_string_valued(dset, list[i+1])) {
2604 nsvals++;
2605 }
2606 }
2607 if (nsvals > 0) {
2608 xs = malloc(nsvals * dset->n * sizeof *xs);
2609 if (xs == NULL) {
2610 err = E_ALLOC;
2611 } else {
2612 xsi = xs;
2613 for (i=0; i<ns && !err; i++) {
2614 v = list[i+1];
2615 if (is_string_valued(dset, v)) {
2616 err = series_to_lexvals(dset, v, xsi);
2617 xsi += dset->n;
2618 }
2619 }
2620 }
2621 if (err) {
2622 goto bailout;
2623 }
2624 }
2625
2626 xsi = xs;
2627 for (i=0; i<ns; i++) {
2628 v = list[i+1];
2629 if (is_string_valued(dset, v)) {
2630 for (t=0; t<dset->n; t++) {
2631 if (xsi[t] == INT_MAX) {
2632 sv->points[t].vals[i] = NADBL;
2633 } else {
2634 sv->points[t].vals[i] = (double) xsi[t];
2635 }
2636 }
2637 xsi += dset->n;
2638 } else if (ns > 1) {
2639 for (t=0; t<dset->n; t++) {
2640 sv->points[t].vals[i] = dset->Z[v][t];
2641 }
2642 }
2643 }
2644
2645 if (opt & OPT_D) {
2646 /* descending */
2647 qsort(sv->points, dset->n, sizeof(spoint_t), compare_vals_down);
2648 } else {
2649 /* ascending */
2650 qsort(sv->points, dset->n, sizeof(spoint_t), compare_vals_up);
2651 }
2652
2653 /* reorder data values */
2654 for (i=1; i<dset->v; i++) {
2655 for (t=0; t<dset->n; t++) {
2656 x[t] = dset->Z[i][sv->points[t].obsnum];
2657 }
2658 memcpy(dset->Z[i], x, dset->n * sizeof *x);
2659 }
2660
2661 if (S != NULL) {
2662 /* reorder observation markers */
2663 for (t=0; t<dset->n; t++) {
2664 S[t] = dset->S[sv->points[t].obsnum];
2665 }
2666 for (t=0; t<dset->n; t++) {
2667 dset->S[t] = S[t];
2668 }
2669 }
2670
2671 bailout:
2672
2673 free_spoints(sv);
2674 free(xs);
2675 free(S);
2676 free(x);
2677
2678 return err;
2679 }
2680
dataset_sort(DATASET * dset,const int * list,gretlopt opt)2681 static int dataset_sort (DATASET *dset, const int *list,
2682 gretlopt opt)
2683 {
2684 if (dataset_is_time_series(dset) ||
2685 dataset_is_panel(dset)) {
2686 gretl_errmsg_set("You can only do this with undated data");
2687 return E_DATA;
2688 }
2689
2690 if (list == NULL || list[0] < 1) {
2691 return E_DATA;
2692 }
2693
2694 return dataset_sort_by(dset, list, opt);
2695 }
2696
2697 /**
2698 * dataset_drop_last_variables:
2699 * @dset: pointer to dataset.
2700 * @delvars: number of variables to be dropped.
2701 *
2702 * Deletes from the dataset the number @delvars of variables
2703 * that were added most recently (that have the highest ID numbers).
2704 *
2705 * Returns: 0 on success, E_ALLOC on error.
2706 */
2707
dataset_drop_last_variables(DATASET * dset,int delvars)2708 int dataset_drop_last_variables (DATASET *dset, int delvars)
2709 {
2710 int newv = dset->v - delvars;
2711 int i, err = 0;
2712
2713 if (delvars <= 0) {
2714 return 0;
2715 }
2716
2717 #if FULLDEBUG
2718 fprintf(stderr, "*** dataset_drop_last_variables: dropping %d, newv = %d\n",
2719 delvars, newv);
2720 #endif
2721
2722 if (newv < 1) {
2723 fprintf(stderr, "dataset_drop_last_vars: dset->v = %d, delvars = %d "
2724 " -> newv = %d\n (dset = %p)\n", dset->v, delvars,
2725 newv, (void *) dset);
2726 return E_DATA;
2727 }
2728
2729 #if FULLDEBUG
2730 for (i=0; i<dset->v; i++) {
2731 if (dset->Z[i] == NULL) {
2732 fprintf(stderr, "var %d (%s, level %d, val = NULL) %s\n",
2733 i, dset->varname[i], dset->varinfo[i]->stack_level,
2734 (i >= newv)? "deleting" : "");
2735 } else {
2736 fprintf(stderr, "var %d (%s, level %d, val[0] = %g) %s\n",
2737 i, dset->varname[i], dset->varinfo[i]->stack_level,
2738 dset->Z[i][0], (i >= newv)? "deleting" : "");
2739 }
2740 }
2741 #endif
2742
2743 #if 0
2744 fprintf(stderr, "dataset_drop_last_variables: origv=%d, newv=%d\n",
2745 dset->v, newv);
2746 for (i=1; i<dset->v; i++) {
2747 fprintf(stderr, "before: var[%d] = '%s'\n", i, dset->varname[i]);
2748 }
2749 #endif
2750
2751 for (i=newv; i<dset->v; i++) {
2752 free(dset->varname[i]);
2753 free_varinfo(dset, i);
2754 free(dset->Z[i]);
2755 dset->Z[i] = NULL;
2756 }
2757
2758 err = shrink_dataset_to_size(dset, newv, DROP_NORMAL);
2759
2760 #if 0
2761 for (i=1; i<dset->v; i++) {
2762 fprintf(stderr, "after: var[%d] = '%s'\n", i, dset->varname[i]);
2763 }
2764 #endif
2765
2766 if (!err && !dset->auxiliary) {
2767 err = gretl_lists_revise(NULL, newv);
2768 }
2769
2770 if (!err && complex_subsampled()) {
2771 DATASET *fset = fetch_full_dataset();
2772
2773 /*
2774 Context: we're deleting @delvars variables at the end of
2775 dset->Z, leaving @newv variables. The dataset is currently
2776 subsampled.
2777
2778 Question: should we delete any variables at the end of
2779 fset->Z to keep the two arrays in sync?
2780
2781 If @newv < fset->v, this must mean that at least some of
2782 the extra vars we're deleting from the current sub-sampled
2783 Z have already been synced to the full Z, so we should do
2784 the deletion from full Z.
2785 */
2786
2787 if (newv < fset->v) {
2788 #if FULLDEBUG
2789 fprintf(stderr, "prior fset->v = %d: shrinking full Z to %d vars\n",
2790 fset->v, newv);
2791 #endif
2792 for (i=newv; i<fset->v; i++) {
2793 free(fset->Z[i]);
2794 fset->Z[i] = NULL;
2795 }
2796 err = shrink_dataset_to_size(fset, newv, DROP_SPECIAL);
2797 }
2798 }
2799
2800 return err;
2801 }
2802
2803 /**
2804 * build_stacked_series:
2805 * @pstack: location for returning stacked series.
2806 * @list: list of series to be stacked.
2807 * @length: number of observations to use per input series.
2808 * @offset: offset at which to start drawing observations.
2809 * @dset: pointer to dataset.
2810 *
2811 * Really for internal use. Don't worry about it.
2812 *
2813 * Returns: 0 on success, non-zero code on error.
2814 */
2815
build_stacked_series(double ** pstack,int * list,int length,int offset,DATASET * dset)2816 int build_stacked_series (double **pstack, int *list,
2817 int length, int offset,
2818 DATASET *dset)
2819 {
2820 double *xstack = NULL;
2821 int nv, oldn, bign, tmax;
2822 int i, err = 0;
2823
2824 if (dset == NULL || dset->n == 0) {
2825 return E_NODATA;
2826 } else if (dataset_is_subsampled(dset)) {
2827 gretl_errmsg_set("stack: this function cannot be used when the dataset "
2828 "is sub-sampled");
2829 return E_DATA;
2830 } else if (list == NULL || list[0] <= 0) {
2831 return E_INVARG;
2832 } else if (length <= 0) {
2833 return E_INVARG;
2834 } else if (length + offset > dset->n) {
2835 return E_INVARG;
2836 }
2837
2838 nv = list[0];
2839
2840 #if PDEBUG
2841 fprintf(stderr, "nv = %d, length = %d, offset = %d\n", nv, length, offset);
2842 #endif
2843
2844 bign = nv * length;
2845 if (bign < dset->n) {
2846 bign = dset->n;
2847 }
2848
2849 #if PDEBUG
2850 fprintf(stderr, "bign = %d, allocating xstack (oldn = %d)\n", bign, dset->n);
2851 #endif
2852
2853 /* allocate container for stacked data */
2854 xstack = malloc(bign * sizeof *xstack);
2855 if (xstack == NULL) {
2856 return E_ALLOC;
2857 }
2858
2859 /* extend length of all series? */
2860 oldn = dset->n;
2861 if (bign > oldn) {
2862 err = dataset_add_observations(dset, bign - oldn, OPT_NONE);
2863 if (err) {
2864 return err;
2865 }
2866 }
2867
2868 tmax = offset + length;
2869
2870 /* construct stacked series */
2871 for (i=0; i<nv; i++) {
2872 int j = list[i+1];
2873 int bigt = length * i;
2874 int t;
2875
2876 for (t=offset; t<tmax; t++) {
2877 xstack[bigt] = dset->Z[j][t];
2878 if (dset->S != NULL && bigt != t) {
2879 strcpy(dset->S[bigt], dset->S[t]);
2880 }
2881 bigt++;
2882 }
2883 if (i == nv - 1) {
2884 for (t=bigt; t<bign; t++) {
2885 xstack[bigt++] = NADBL;
2886 }
2887 }
2888 }
2889
2890 *pstack = xstack;
2891
2892 return err;
2893 }
2894
found_log_parent(const char * s,char * targ)2895 static int found_log_parent (const char *s, char *targ)
2896 {
2897 int len = gretl_namechar_spn(s);
2898
2899 if (len < VNAMELEN && s[len] == ')') {
2900 char fmt[8];
2901
2902 sprintf(fmt, "%%%d[^)]", VNAMELEN-1);
2903 sscanf(s, fmt, targ);
2904 return 1;
2905 }
2906
2907 return 0;
2908 }
2909
2910 /**
2911 * series_is_log:
2912 * @dset: dataset information.
2913 * @i: ID number of series.
2914 * @parent: location to which to write the name of the
2915 * "parent" variable if any.
2916 *
2917 * Tries to determine if the variable with ID number @i is
2918 * the logarithm of some other variable.
2919 *
2920 * Returns: 1 if variable @i appears to be a log, else 0.
2921 */
2922
series_is_log(const DATASET * dset,int i,char * parent)2923 int series_is_log (const DATASET *dset, int i, char *parent)
2924 {
2925 const char *s = series_get_label(dset, i);
2926
2927 *parent = '\0';
2928
2929 if (s != NULL && *s != '\0') {
2930 char fmt[16];
2931
2932 sprintf(fmt, "= log of %%%ds", VNAMELEN-1);
2933
2934 if (sscanf(s, fmt, parent) == 1) {
2935 return 1;
2936 } else if (!strncmp(s, "log(", 4)) {
2937 return found_log_parent(s + 4, parent);
2938 } else {
2939 s += strcspn(s, "=");
2940 if (!strncmp(s, "=log(", 5)) {
2941 return found_log_parent(s + 5, parent);
2942 }
2943 }
2944 }
2945
2946 return 0;
2947 }
2948
2949 /**
2950 * series_set_discrete:
2951 * @dset: pointer to data information struct.
2952 * @i: index number of series.
2953 * @s: non-zero to mark variable as discrete, zero to
2954 * mark as not discrete.
2955 *
2956 * Mark a variable as being discrete or not.
2957 */
2958
series_set_discrete(DATASET * dset,int i,int s)2959 void series_set_discrete (DATASET *dset, int i, int s)
2960 {
2961 if (i > 0 && i < dset->v) {
2962 int flags = dset->varinfo[i]->flags;
2963
2964 if (s && !(flags & VAR_DISCRETE)) {
2965 dset->varinfo[i]->flags |= VAR_DISCRETE;
2966 set_dataset_is_changed(dset, 1);
2967 } else if (!s && (flags & VAR_DISCRETE)) {
2968 dset->varinfo[i]->flags &= ~VAR_DISCRETE;
2969 set_dataset_is_changed(dset, 1);
2970 }
2971 }
2972 }
2973
series_record_label(DATASET * dset,int i,const char * s)2974 int series_record_label (DATASET *dset, int i,
2975 const char *s)
2976 {
2977 char *targ = dset->varinfo[i]->label;
2978
2979 if (labels_differ(targ, s)) {
2980 copy_label(dset->varinfo[i], s);
2981 set_dataset_is_changed(dset, 1);
2982 }
2983
2984 return 0;
2985 }
2986
series_record_display_name(DATASET * dset,int i,const char * s)2987 int series_record_display_name (DATASET *dset, int i,
2988 const char *s)
2989 {
2990 char *targ = dset->varinfo[i]->display_name;
2991
2992 if (strcmp(targ, s)) {
2993 *targ = '\0';
2994 strncat(targ, s, MAXDISP - 1);
2995 set_dataset_is_changed(dset, 1);
2996 }
2997
2998 return 0;
2999 }
3000
series_get_graph_name(const DATASET * dset,int i)3001 const char *series_get_graph_name (const DATASET *dset, int i)
3002 {
3003 const char *ret = dset->varname[i];
3004
3005 if (dset->varinfo != NULL && dset->varinfo[i] != NULL) {
3006 if (dset->varinfo[i]->display_name[0] != '\0') {
3007 ret = dset->varinfo[i]->display_name;
3008 }
3009 }
3010
3011 return ret;
3012 }
3013
add_obs(int n,DATASET * dset,gretlopt opt,PRN * prn)3014 static int add_obs (int n, DATASET *dset, gretlopt opt, PRN *prn)
3015 {
3016 int err = 0;
3017
3018 if (complex_subsampled()) {
3019 pprintf(prn, _("The data set is currently sub-sampled.\n"));
3020 err = E_DATA;
3021 } else if (n <= 0) {
3022 err = E_PARSE;
3023 } else if (opt & OPT_T) {
3024 /* extending panel time */
3025 err = panel_dataset_extend_time(dset, n);
3026 if (!err) {
3027 pprintf(prn, _("Panel time extended by %d observations"), n);
3028 pputc(prn, '\n');
3029 }
3030 } else {
3031 err = dataset_add_observations(dset, n, OPT_A);
3032 if (!err) {
3033 pprintf(prn, _("Dataset extended by %d observations"), n);
3034 pputc(prn, '\n');
3035 extend_function_sample_range(n);
3036 }
3037 }
3038
3039 return err;
3040 }
3041
insert_obs(int n,DATASET * dset,PRN * prn)3042 static int insert_obs (int n, DATASET *dset, PRN *prn)
3043 {
3044 int err = 0;
3045
3046 if (complex_subsampled()) {
3047 pprintf(prn, _("The data set is currently sub-sampled.\n"));
3048 err = E_DATA;
3049 } else if (dataset_is_panel(dset)) {
3050 err = E_PDWRONG;
3051 } else if (n <= 0 || n > dset->n) {
3052 err = E_DATA;
3053 } else {
3054 err = real_insert_observation(n - 1, dset);
3055 }
3056
3057 return err;
3058 }
3059
dataset_op_from_string(const char * s)3060 int dataset_op_from_string (const char *s)
3061 {
3062 int op = DS_NONE;
3063
3064 if (s == NULL || *s == '\0') {
3065 return DS_NONE;
3066 }
3067
3068 if (!strcmp(s, "addobs")) {
3069 op = DS_ADDOBS;
3070 } else if (!strcmp(s, "compact")) {
3071 op = DS_COMPACT;
3072 } else if (!strcmp(s, "expand")) {
3073 op = DS_EXPAND;
3074 } else if (!strcmp(s, "transpose")) {
3075 op = DS_TRANSPOSE;
3076 } else if (!strcmp(s, "delete")) {
3077 op = DS_DELETE;
3078 } else if (!strcmp(s, "keep")) {
3079 op = DS_KEEP;
3080 } else if (!strcmp(s, "sortby")) {
3081 op = DS_SORTBY;
3082 } else if (!strcmp(s, "dsortby")) {
3083 op = DS_DSORTBY;
3084 } else if (!strcmp(s, "resample")) {
3085 op = DS_RESAMPLE;
3086 } else if (!strcmp(s, "restore")) {
3087 op = DS_RESTORE;
3088 } else if (!strcmp(s, "clear")) {
3089 op = DS_CLEAR;
3090 } else if (!strcmp(s, "renumber")) {
3091 op = DS_RENUMBER;
3092 } else if (!strcmp(s, "insobs")) {
3093 op = DS_INSOBS;
3094 } else if (!strcmp(s, "pad-daily")) {
3095 op = DS_PAD_DAILY;
3096 }
3097
3098 return op;
3099 }
3100
dataset_int_param(const char ** ps,int op,DATASET * dset,int * err)3101 static int dataset_int_param (const char **ps, int op,
3102 DATASET *dset, int *err)
3103 {
3104 const char *s = *ps;
3105 char test[32];
3106 int k = 0;
3107
3108 if ((op == DS_COMPACT || op == DS_EXPAND) &&
3109 !dataset_is_time_series(dset)) {
3110 *err = E_PDWRONG;
3111 return 0;
3112 }
3113
3114 if (op == DS_PAD_DAILY && !dated_daily_data(dset)) {
3115 *err = E_PDWRONG;
3116 return 0;
3117 }
3118
3119 *test = '\0';
3120 sscanf(s, "%31s", test);
3121 *ps += strlen(test);
3122
3123 k = gretl_int_from_string(test, err);
3124 if (*err) {
3125 return 0;
3126 }
3127
3128 if (k <= 0 || (op == DS_RESAMPLE && k < 1)) {
3129 *err = E_DATA;
3130 } else if (op == DS_INSOBS) {
3131 if (k > dset->n) {
3132 *err = E_DATA;
3133 }
3134 } else if (op == DS_COMPACT) {
3135 *err = E_PDWRONG;
3136 if (dset->pd == 12 && (k == 4 || k == 1)) {
3137 *err = 0;
3138 } else if (dset->pd == 4 && k == 1) {
3139 *err = 0;
3140 } else if (dset->pd == 52 && k == 12) {
3141 *err = 0;
3142 } else if (dated_daily_data(dset) && (k == 52 || k == 12)) {
3143 *err = 0;
3144 } else if (dataset_is_daily(dset) && k == 4) {
3145 if (strstr(*ps, "spread")) {
3146 *err = 0;
3147 }
3148 }
3149 } else if (op == DS_EXPAND) {
3150 *err = E_PDWRONG;
3151 if (dset->pd == 1 && (k == 4 || k == 12)) {
3152 *err = 0;
3153 } else if (dset->pd == 4 && k == 12) {
3154 *err = 0;
3155 }
3156 } else if (op == DS_PAD_DAILY) {
3157 if (k < 5 || k > 7 || k < dset->pd) {
3158 *err = E_PDWRONG;
3159 }
3160 }
3161
3162 if (*err == E_PDWRONG) {
3163 gretl_errmsg_set("This conversion is not supported");
3164 }
3165
3166 return k;
3167 }
3168
compact_data_set_wrapper(const char * s,DATASET * dset,int k)3169 static int compact_data_set_wrapper (const char *s, DATASET *dset,
3170 int k)
3171 {
3172 CompactMethod method = COMPACT_AVG;
3173
3174 if (s != NULL) {
3175 s += strspn(s, " ");
3176 if (!strcmp(s, "sum")) {
3177 method = COMPACT_SUM;
3178 } else if (!strcmp(s, "first") || !strcmp(s, "sop")) {
3179 method = COMPACT_SOP;
3180 } else if (!strcmp(s, "last") || !strcmp(s, "eop")) {
3181 method = COMPACT_EOP;
3182 } else if (!strcmp(s, "spread")) {
3183 method = COMPACT_SPREAD;
3184 } else if (!strcmp(s, "avg") || !strcmp(s, "average")) {
3185 method = COMPACT_AVG;
3186 } else if (*s != '\0') {
3187 return E_PARSE;
3188 }
3189 }
3190
3191 return compact_data_set(dset, k, method, 0, 0);
3192 }
3193
3194 static unsigned int resample_seed;
3195
get_resampling_seed(void)3196 unsigned int get_resampling_seed (void)
3197 {
3198 return resample_seed;
3199 }
3200
3201 /* resample the dataset by observation, with replacement */
3202
dataset_resample(DATASET * dset,int n,unsigned int seed)3203 int dataset_resample (DATASET *dset, int n, unsigned int seed)
3204 {
3205 DATASET *rset = NULL;
3206 char **S = NULL;
3207 unsigned int state;
3208 int T = sample_size(dset);
3209 int v = dset->v;
3210 int i, j, s, t;
3211 int err = 0;
3212
3213 if (v < 2) {
3214 return E_DATA;
3215 }
3216
3217 rset = datainfo_new();
3218 if (rset == NULL) {
3219 return E_ALLOC;
3220 }
3221
3222 rset->Z = malloc(v * sizeof *rset->Z);
3223 if (rset->Z == NULL) {
3224 free(rset);
3225 return E_ALLOC;
3226 }
3227
3228 for (i=0; i<v; i++) {
3229 rset->Z[i] = NULL;
3230 }
3231
3232 rset->v = v;
3233
3234 j = 0;
3235 for (i=0; i<dset->v && !err; i++) {
3236 rset->Z[j] = malloc(n * sizeof **rset->Z);
3237 if (rset->Z[j] == NULL) {
3238 err = E_ALLOC;
3239 } else if (i == 0) {
3240 for (t=0; t<n; t++) {
3241 rset->Z[j][t] = 1.0;
3242 }
3243 }
3244 j++;
3245 }
3246
3247 if (err) {
3248 goto bailout;
3249 }
3250
3251 if (dset->markers == REGULAR_MARKERS) {
3252 S = strings_array_new_with_length(n, OBSLEN);
3253 }
3254
3255 if (seed > 0) {
3256 resample_seed = seed;
3257 gretl_rand_set_seed(seed);
3258 } else {
3259 resample_seed = gretl_rand_get_seed();
3260 }
3261
3262 state = gretl_rand_int();
3263
3264 for (t=0; t<n; t++) {
3265 s = gretl_rand_int_max(T) + dset->t1;
3266 j = 1;
3267 for (i=1; i<dset->v; i++) {
3268 rset->Z[j][t] = dset->Z[i][s];
3269 j++;
3270 }
3271 if (S != NULL) {
3272 strcpy(S[t], dset->S[s]);
3273 }
3274 }
3275
3276 if (S != NULL) {
3277 rset->S = S;
3278 rset->markers = REGULAR_MARKERS;
3279 }
3280
3281 rset->varname = dset->varname;
3282 rset->varinfo = dset->varinfo;
3283 rset->descrip = dset->descrip;
3284
3285 rset->n = n;
3286 rset->t1 = 0;
3287 rset->t2 = n - 1;
3288 dataset_obs_info_default(rset);
3289
3290 set_dataset_resampled(rset, state);
3291
3292 bailout:
3293
3294 if (err) {
3295 free_Z(rset);
3296 clear_datainfo(rset, CLEAR_SUBSAMPLE);
3297 free(rset);
3298 } else {
3299 backup_full_dataset(dset);
3300 *dset = *rset;
3301 free(rset);
3302 }
3303
3304 return err;
3305 }
3306
3307 /* note: @list should contain a single series ID, that of the
3308 target series, and @param should hold a numeric string
3309 giving the position to which @targ should be moved;
3310 @fixmax is the greatest series ID number that cannot be
3311 changed (based on saved models, etc., as determined by the
3312 caller)
3313 */
3314
renumber_series_with_checks(const int * list,const char * param,int fixmax,DATASET * dset,PRN * prn)3315 int renumber_series_with_checks (const int *list,
3316 const char *param,
3317 int fixmax,
3318 DATASET *dset,
3319 PRN *prn)
3320 {
3321 char vname[VNAMELEN];
3322 int v_old, v_new;
3323 int f1, err = 0;
3324
3325 if (list == NULL || list[0] != 1 ||
3326 param == NULL || *param == '\0') {
3327 return E_INVARG;
3328 }
3329
3330 if (sscanf(param, "%d", &v_new) != 1) {
3331 return E_INVARG;
3332 }
3333
3334 v_old = list[1];
3335
3336 if (v_old < 1 || v_old > dset->v - 1 ||
3337 v_new < 1 || v_new > dset->v - 1) {
3338 /* out of bounds */
3339 return E_INVARG;
3340 } else if (v_new == v_old) {
3341 /* no-op */
3342 return 0;
3343 }
3344
3345 f1 = max_varno_in_saved_lists();
3346
3347 if (f1 > fixmax) {
3348 fixmax = f1;
3349 }
3350
3351 strcpy(vname, dset->varname[v_old]);
3352
3353 if (v_old <= fixmax) {
3354 gretl_errmsg_sprintf(_("Variable %s cannot be renumbered"), vname);
3355 err = E_DATA;
3356 } else if (v_new <= fixmax) {
3357 gretl_errmsg_sprintf(_("Target ID %d is not available"), v_new);
3358 err = E_DATA;
3359 } else {
3360 err = dataset_renumber_variable(v_old, v_new, dset);
3361 }
3362
3363 if (!err && gretl_messages_on()) {
3364 pprintf(prn, _("Renumbered %s as variable %d\n"),
3365 vname, v_new);
3366 maybe_list_series(dset, prn);
3367 }
3368
3369 return err;
3370 }
3371
3372 /* alternate forms:
3373
3374 @op @list @param
3375 dataset addobs 24
3376 dataset compact 1
3377 dataset compact 4 last
3378 dataset expand 4
3379 dataset transpose
3380 dataset sortby x1
3381 dataset resample 500
3382 dataset clear
3383 dataset renumber orig 2
3384 dataset insobs 13
3385 dataset pad-daily 7
3386
3387 */
3388
modify_dataset(DATASET * dset,int op,const int * list,const char * param,gretlopt opt,PRN * prn)3389 int modify_dataset (DATASET *dset, int op, const int *list,
3390 const char *param, gretlopt opt, PRN *prn)
3391 {
3392 static int resampled;
3393 int k = 0, err = 0;
3394
3395 if (dset == NULL || dset->Z == NULL) {
3396 return E_NODATA;
3397 }
3398
3399 #if 0
3400 fprintf(stderr, "modify_dataset: op=%d, param='%s'\n", op, param);
3401 printlist(list, "list");
3402 #endif
3403
3404 if (op == DS_CLEAR || op == DS_RENUMBER) {
3405 /* must be handled by the calling program */
3406 return E_NOTIMP;
3407 }
3408
3409 if (gretl_function_depth() > 0) {
3410 if (op == DS_ADDOBS && !complex_subsampled() &&
3411 dset->t2 == dset->n - 1 && !(opt & OPT_T)) {
3412 /* experimental, 2015-07-28: allow "addobs" within a
3413 function provided the dataset is not subsampled
3414 */
3415 goto proceed;
3416 } else {
3417 gretl_errmsg_set(_("The 'dataset' command is not available "
3418 "within functions"));
3419 return 1;
3420 }
3421 }
3422
3423 if (gretl_looping() && op != DS_RESAMPLE &&
3424 op != DS_RESTORE && op != DS_SORTBY) {
3425 pputs(prn, _("Sorry, this command is not available in loop mode\n"));
3426 return 1;
3427 }
3428
3429 if (op == DS_RESAMPLE && resampled) {
3430 /* repeated "resample": implicitly restore first */
3431 err = restore_full_sample(dset, NULL);
3432 if (err) {
3433 return err;
3434 } else {
3435 resampled = 0;
3436 }
3437 }
3438
3439 if (op != DS_RESTORE && complex_subsampled()) {
3440 gretl_errmsg_set(_("The data set is currently sub-sampled"));
3441 return 1;
3442 }
3443
3444 proceed:
3445
3446 if (op == DS_ADDOBS || op == DS_INSOBS ||
3447 op == DS_COMPACT || op == DS_RESAMPLE ||
3448 op == DS_PAD_DAILY) {
3449 if (param == NULL) {
3450 err = E_ARGS;
3451 } else {
3452 k = dataset_int_param(¶m, op, dset, &err);
3453 }
3454 if (err) {
3455 return err;
3456 }
3457 } else if (op == DS_EXPAND) {
3458 if (param != NULL) {
3459 k = dataset_int_param(¶m, op, dset, &err);
3460 } else if (dset->pd == 1) {
3461 k = 4;
3462 } else if (dset->pd == 4) {
3463 k = 12;
3464 } else {
3465 err = E_PDWRONG;
3466 }
3467 if (err) {
3468 return err;
3469 }
3470 }
3471
3472 if (op == DS_ADDOBS) {
3473 err = add_obs(k, dset, opt, prn);
3474 } else if (op == DS_INSOBS) {
3475 err = insert_obs(k, dset, prn);
3476 } else if (op == DS_COMPACT) {
3477 err = compact_data_set_wrapper(param, dset, k);
3478 } else if (op == DS_EXPAND) {
3479 err = expand_data_set(dset, k);
3480 } else if (op == DS_PAD_DAILY) {
3481 err = pad_daily_data(dset, k, prn);
3482 } else if (op == DS_TRANSPOSE) {
3483 err = transpose_data(dset);
3484 } else if (op == DS_SORTBY) {
3485 err = dataset_sort(dset, list, OPT_NONE);
3486 } else if (op == DS_DSORTBY) {
3487 err = dataset_sort(dset, list, OPT_D);
3488 } else if (op == DS_RESAMPLE) {
3489 err = dataset_resample(dset, k, 0);
3490 if (!err) {
3491 resampled = 1;
3492 }
3493 } else if (op == DS_RESTORE) {
3494 if (resampled) {
3495 err = restore_full_sample(dset, NULL);
3496 resampled = 0;
3497 } else {
3498 pprintf(prn, _("dataset restore: dataset is not resampled\n"));
3499 err = E_DATA;
3500 }
3501 } else if (op == DS_DELETE) {
3502 pprintf(prn, "dataset delete: not ready yet\n");
3503 } else if (op == DS_KEEP) {
3504 pprintf(prn, "dataset keep: not ready yet\n");
3505 } else {
3506 err = E_PARSE;
3507 }
3508
3509 return err;
3510 }
3511
3512 /* this supports the $datatype accessor */
3513
dataset_get_structure(const DATASET * dset)3514 int dataset_get_structure (const DATASET *dset)
3515 {
3516 if (dset == NULL || dset->n == 0) {
3517 return 0;
3518 } else if (dataset_is_panel(dset)) {
3519 return 3;
3520 } else if (dataset_is_time_series(dset)) {
3521 return 2;
3522 } else {
3523 return 1;
3524 }
3525 }
3526
3527 /**
3528 * panel_sample_size:
3529 * @dset: pointer to data information struct.
3530 *
3531 * Returns: the numbers of units/individuals in the current
3532 * sample range, or 0 if the dataset is not a panel.
3533 */
3534
panel_sample_size(const DATASET * dset)3535 int panel_sample_size (const DATASET *dset)
3536 {
3537 int ret = 0;
3538
3539 if (dataset_is_panel(dset)) {
3540 ret = (dset->t2 - dset->t1 + 1) / dset->pd;
3541 }
3542
3543 return ret;
3544 }
3545
3546 /**
3547 * multi_unit_panel_sample:
3548 * @dset: pointer to dataset.
3549 *
3550 * Returns: 1 if the dataset is a panel and the current sample
3551 * range includes two or more individuals, otherwise 0.
3552 */
3553
multi_unit_panel_sample(const DATASET * dset)3554 int multi_unit_panel_sample (const DATASET *dset)
3555 {
3556 int ret = 0;
3557
3558 if (dataset_is_panel(dset)) {
3559 ret = (dset->t2 - dset->t1 + 1) > dset->pd;
3560 }
3561
3562 return ret;
3563 }
3564
3565 /**
3566 * dataset_purge_missing_rows:
3567 * @dset: pointer to dataset.
3568 *
3569 * Removes empty rows from the dataset -- that is, observations
3570 * at which there are no non-missing values. This is intended
3571 * for daily data only.
3572 *
3573 * Returns: 0 on success, non-zero code on error.
3574 */
3575
dataset_purge_missing_rows(DATASET * dset)3576 int dataset_purge_missing_rows (DATASET *dset)
3577 {
3578 int new_n, missrow, totmiss = 0;
3579 int t1 = dset->t1;
3580 int t2 = dset->t2;
3581 char **S = NULL;
3582 double *Zi = NULL;
3583 size_t sz;
3584 int i, t, s;
3585 int err = 0;
3586
3587 for (t=0; t<dset->n; t++) {
3588 missrow = 1;
3589 for (i=1; i<dset->v; i++) {
3590 if (!na(dset->Z[i][t])) {
3591 missrow = 0;
3592 break;
3593 }
3594 }
3595 if (missrow) {
3596 totmiss++;
3597 if (t < dset->t1) {
3598 t1--;
3599 }
3600 if (t < dset->t2) {
3601 t2--;
3602 }
3603 }
3604 }
3605
3606 if (totmiss == 0) {
3607 /* no-op */
3608 return 0;
3609 }
3610
3611 if (dated_daily_data(dset) && dset->S == NULL) {
3612 err = dataset_allocate_obs_markers(dset);
3613 if (!err) {
3614 for (t=0; t<dset->n; t++) {
3615 calendar_date_string(dset->S[t], t, dset);
3616 }
3617 }
3618 }
3619
3620 for (t=0; t<dset->n; t++) {
3621 missrow = 1;
3622 for (i=1; i<dset->v; i++) {
3623 if (!na(dset->Z[i][t])) {
3624 missrow = 0;
3625 break;
3626 }
3627 }
3628 if (missrow) {
3629 sz = (dset->n - t) * sizeof **dset->Z;
3630 for (i=1; i<dset->v; i++) {
3631 memmove(dset->Z[i] + t, dset->Z[i] + t + 1, sz);
3632 }
3633 if (dset->S != NULL) {
3634 free(dset->S[t]);
3635 for (s=t; s<dset->n - 1; s++) {
3636 dset->S[s] = dset->S[s+1];
3637 }
3638 }
3639 }
3640 }
3641
3642 new_n = dset->n - totmiss;
3643
3644 for (i=1; i<dset->v; i++) {
3645 Zi = realloc(dset->Z[i], new_n * sizeof *Zi);
3646 if (Zi == NULL) {
3647 err = E_ALLOC;
3648 } else {
3649 dset->Z[i] = Zi;
3650 }
3651 }
3652
3653 if (!err && dset->S != NULL) {
3654 S = realloc(dset->S, new_n * sizeof *S);
3655 if (S == NULL) {
3656 err = E_ALLOC;
3657 } else {
3658 dset->S = S;
3659 if (dated_daily_data(dset)) {
3660 strcpy(dset->stobs, dset->S[0]);
3661 strcpy(dset->endobs, dset->S[new_n-1]);
3662 dset->sd0 = get_epoch_day(dset->stobs);
3663 }
3664 }
3665 }
3666
3667 dataset_set_nobs(dset, new_n);
3668 dset->t1 = t1;
3669 dset->t2 = t2;
3670
3671 return err;
3672 }
3673
3674 /**
3675 * dataset_set_time_series:
3676 * @dset: pointer to dataset.
3677 * @pd: time series annual frequency (1 for annual, 4
3678 * for quarterly or 12 for monthly).
3679 * @yr0: starting year.
3680 * @minor0: starting "minor" period, 1-based (quarter or
3681 * month).
3682 *
3683 * Sets time-series properties on @dset: frequency @pd with
3684 * starting observation @yr0, @minor0. If the data are
3685 * annual (@pd = 1) then @minor0 is ignored.
3686
3687 * Returns: 0 on success, non-zero code on error.
3688 */
3689
dataset_set_time_series(DATASET * dset,int pd,int yr0,int minor0)3690 int dataset_set_time_series (DATASET *dset, int pd,
3691 int yr0, int minor0)
3692 {
3693 int err = 0;
3694
3695 if (pd != 1 && pd != 4 && pd != 12) {
3696 err = E_DATA;
3697 } else if (yr0 < 1) {
3698 err = E_DATA;
3699 } else if (pd > 1 && (minor0 < 1 || minor0 > pd)) {
3700 err = E_DATA;
3701 } else {
3702 gchar *stobs = NULL;
3703
3704 dataset_destroy_obs_markers(dset);
3705 dset->structure = TIME_SERIES;
3706 dset->pd = pd;
3707
3708 if (pd == 1) {
3709 stobs = g_strdup_printf("%d", yr0);
3710 } else if (pd == 4) {
3711 stobs = g_strdup_printf("%d.%d", yr0, minor0);
3712 } else {
3713 stobs = g_strdup_printf("%d.%02d", yr0, minor0);
3714 }
3715
3716 dset->sd0 = dot_atof(stobs);
3717 ntolabel(dset->stobs, 0, dset);
3718 ntolabel(dset->endobs, dset->n - 1, dset);
3719 g_free(stobs);
3720 }
3721
3722 return err;
3723 }
3724
dataset_clear_sample_record(DATASET * dset)3725 void dataset_clear_sample_record (DATASET *dset)
3726 {
3727 if (dset->restriction != NULL) {
3728 free(dset->restriction);
3729 dset->restriction = NULL;
3730 }
3731 }
3732
3733 /**
3734 * series_is_discrete:
3735 * @dset: pointer to dataset.
3736 * @i: index number of series.
3737 *
3738 * Returns: non-zero iff series @i should be treated as discrete.
3739 */
3740
series_is_discrete(const DATASET * dset,int i)3741 int series_is_discrete (const DATASET *dset, int i)
3742 {
3743 return dset->varinfo[i]->flags & VAR_DISCRETE;
3744 }
3745
3746 /**
3747 * series_is_hidden:
3748 * @dset: pointer to dataset.
3749 * @i: index number of series.
3750 *
3751 * Returns: non-zero iff series @i is hidden.
3752 */
3753
series_is_hidden(const DATASET * dset,int i)3754 int series_is_hidden (const DATASET *dset, int i)
3755 {
3756 return dset->varinfo[i]->flags & VAR_HIDDEN;
3757 }
3758
3759 /**
3760 * series_is_generated:
3761 * @dset: pointer to dataset.
3762 * @i: index number of series.
3763 *
3764 * Returns: non-zero iff series @i was generated using
3765 * a formula or transformation function.
3766 */
3767
series_is_generated(const DATASET * dset,int i)3768 int series_is_generated (const DATASET *dset, int i)
3769 {
3770 return dset->varinfo[i]->flags & VAR_GENERATED;
3771 }
3772
3773 /**
3774 * series_is_listarg:
3775 * @dset: pointer to dataset.
3776 * @i: index number of series.
3777 * @lname: location to receive list name, or NULL.
3778 *
3779 * Returns: non-zero iff series @i has been marked as
3780 * belonging to a list argument to a function.
3781 */
3782
series_is_listarg(const DATASET * dset,int i,const char ** lname)3783 int series_is_listarg (const DATASET *dset, int i,
3784 const char **lname)
3785 {
3786 int ret = dset->varinfo[i]->flags & VAR_LISTARG ? 1 : 0;
3787
3788 if (ret && lname != NULL) {
3789 *lname = series_get_list_parent(i);
3790 }
3791
3792 return ret;
3793 }
3794
3795 /**
3796 * series_is_coded:
3797 * @dset: pointer to dataset.
3798 * @i: index number of series.
3799 *
3800 * Returns: non-zero iff series @i has been marked as
3801 * "coded", meaning that its numerical values represent
3802 * an arbitrary encoding of qualitative characteristics.
3803 */
3804
series_is_coded(const DATASET * dset,int i)3805 int series_is_coded (const DATASET *dset, int i)
3806 {
3807 return dset->varinfo[i]->flags & VAR_CODED;
3808 }
3809
3810 /**
3811 * series_is_integer_valued:
3812 * @dset: pointer to dataset.
3813 * @i: index number of series.
3814 *
3815 * Returns: non-zero iff all values in series @i are
3816 * representable as integers (ignoring missing values).
3817 */
3818
series_is_integer_valued(const DATASET * dset,int i)3819 int series_is_integer_valued (const DATASET *dset, int i)
3820 {
3821 const double *x = dset->Z[i];
3822 int t, n_ok = 0, ret = 1;
3823
3824 for (t=0; t<dset->n; t++) {
3825 if (!na(x[t])) {
3826 n_ok++;
3827 if (x[t] != floor(x[t])) {
3828 ret = 0;
3829 break;
3830 } else if (x[t] > INT_MAX || x[t] < INT_MIN) {
3831 ret = 0;
3832 break;
3833 }
3834 }
3835 }
3836
3837 if (n_ok == 0) {
3838 /* don't let an entirely missing series count as
3839 "integer-valued"
3840 */
3841 ret = 0;
3842 }
3843
3844 return ret;
3845 }
3846
3847 /**
3848 * series_set_flag:
3849 * @dset: pointer to dataset.
3850 * @i: index number of series.
3851 * @flag: flag to set.
3852 *
3853 * Sets the given @flag on series @i.
3854 */
3855
series_set_flag(DATASET * dset,int i,VarFlags flag)3856 void series_set_flag (DATASET *dset, int i, VarFlags flag)
3857 {
3858 if (i > 0 && i < dset->v) {
3859 dset->varinfo[i]->flags |= flag;
3860 }
3861 }
3862
3863 /**
3864 * series_unset_flag:
3865 * @dset: pointer to dataset.
3866 * @i: index number of series.
3867 * @flag: flag to remove.
3868 *
3869 * Unsets the given @flag on series @i.
3870 */
3871
series_unset_flag(DATASET * dset,int i,VarFlags flag)3872 void series_unset_flag (DATASET *dset, int i, VarFlags flag)
3873 {
3874 if (i > 0 && i < dset->v) {
3875 dset->varinfo[i]->flags &= ~flag;
3876 }
3877 }
3878
3879 /**
3880 * series_get_flags:
3881 * @dset: pointer to dataset.
3882 * @i: index number of series.
3883 *
3884 * Returns: the flags set series @i.
3885 */
3886
series_get_flags(const DATASET * dset,int i)3887 VarFlags series_get_flags (const DATASET *dset, int i)
3888 {
3889 if (i >= 0 && i < dset->v) {
3890 return dset->varinfo[i]->flags;
3891 } else {
3892 return 0;
3893 }
3894 }
3895
3896 /**
3897 * series_zero_flags:
3898 * @dset: pointer to dataset.
3899 * @i: index number of series.
3900 *
3901 * Sets flags on series @i to zero.
3902 */
3903
series_zero_flags(DATASET * dset,int i)3904 void series_zero_flags (DATASET *dset, int i)
3905 {
3906 if (i >= 0 && i < dset->v) {
3907 dset->varinfo[i]->flags = 0;
3908 }
3909 }
3910
3911 /**
3912 * series_get_label:
3913 * @dset: pointer to dataset.
3914 * @i: index number of series.
3915 *
3916 * Returns: the descriptive label for series @i.
3917 */
3918
series_get_label(const DATASET * dset,int i)3919 const char *series_get_label (const DATASET *dset, int i)
3920 {
3921 if (i >= 0 && i < dset->v) {
3922 return dset->varinfo[i]->label;
3923 } else {
3924 return NULL;
3925 }
3926 }
3927
3928 /**
3929 * series_get_display_name:
3930 * @dset: pointer to dataset.
3931 * @i: index number of series.
3932 *
3933 * Returns: the display name for series @i.
3934 */
3935
series_get_display_name(const DATASET * dset,int i)3936 const char *series_get_display_name (const DATASET *dset, int i)
3937 {
3938 if (i >= 0 && i < dset->v) {
3939 return dset->varinfo[i]->display_name;
3940 } else {
3941 return NULL;
3942 }
3943 }
3944
3945 /**
3946 * series_get_parent_name:
3947 * @dset: pointer to dataset.
3948 * @i: index number of series.
3949 *
3950 * Returns: the name of the "parent" of series @i
3951 * (e.g. if series @i is a lag of some other series)
3952 * or NULL if the series has no parent.
3953 */
3954
series_get_parent_name(const DATASET * dset,int i)3955 const char *series_get_parent_name (const DATASET *dset, int i)
3956 {
3957 if (i > 0 && i < dset->v) {
3958 if (dset->varinfo[i]->parent[0] != '\0') {
3959 return dset->varinfo[i]->parent;
3960 }
3961 }
3962
3963 return NULL;
3964 }
3965
3966 /**
3967 * series_get_parent_id:
3968 * @dset: pointer to dataset.
3969 * @i: index number of series.
3970 *
3971 * Returns: the ID number of the "parent" of series @i
3972 * (e.g. if series @i is a lag of some other series)
3973 * or -1 if the series has no parent.
3974 */
3975
series_get_parent_id(const DATASET * dset,int i)3976 int series_get_parent_id (const DATASET *dset, int i)
3977 {
3978 if (i > 0 && i < dset->v) {
3979 const char *pname = dset->varinfo[i]->parent;
3980
3981 if (*pname != '\0') {
3982 return current_series_index(dset, pname);
3983 }
3984 }
3985
3986 return -1;
3987 }
3988
series_get_lag(const DATASET * dset,int i)3989 int series_get_lag (const DATASET *dset, int i)
3990 {
3991 if (i > 0 && i < dset->v) {
3992 return dset->varinfo[i]->lag;
3993 } else {
3994 return 0;
3995 }
3996 }
3997
series_get_transform(const DATASET * dset,int i)3998 int series_get_transform (const DATASET *dset, int i)
3999 {
4000 if (i > 0 && i < dset->v) {
4001 return dset->varinfo[i]->transform;
4002 } else {
4003 return 0;
4004 }
4005 }
4006
4007 /**
4008 * series_get_compact_method:
4009 * @dset: pointer to dataset.
4010 * @i: index number of series.
4011 *
4012 * Returns: the compaction method set for series @i.
4013 */
4014
series_get_compact_method(const DATASET * dset,int i)4015 int series_get_compact_method (const DATASET *dset, int i)
4016 {
4017 if (i > 0 && i < dset->v) {
4018 return dset->varinfo[i]->compact_method;
4019 } else {
4020 return 0;
4021 }
4022 }
4023
4024 /**
4025 * series_get_stack_level:
4026 * @dset: pointer to dataset.
4027 * @i: index number of series.
4028 *
4029 * Returns: the stack level of series @i.
4030 */
4031
series_get_stack_level(const DATASET * dset,int i)4032 int series_get_stack_level (const DATASET *dset, int i)
4033 {
4034 if (i >= 0 && i < dset->v) {
4035 return dset->varinfo[i]->stack_level;
4036 } else {
4037 return 0;
4038 }
4039 }
4040
series_set_mtime(DATASET * dset,int i)4041 void series_set_mtime (DATASET *dset, int i)
4042 {
4043 if (i > 0 && i < dset->v) {
4044 dset->varinfo[i]->mtime = gretl_monotonic_time();
4045 }
4046 }
4047
series_get_mtime(const DATASET * dset,int i)4048 gint64 series_get_mtime (const DATASET *dset, int i)
4049 {
4050 if (i > 0 && i < dset->v) {
4051 return dset->varinfo[i]->mtime;
4052 } else {
4053 return 0;
4054 }
4055 }
4056
series_set_label(DATASET * dset,int i,const char * s)4057 void series_set_label (DATASET *dset, int i,
4058 const char *s)
4059 {
4060 if (i > 0 && i < dset->v) {
4061 copy_label(dset->varinfo[i], s);
4062 }
4063 }
4064
series_set_display_name(DATASET * dset,int i,const char * s)4065 void series_set_display_name (DATASET *dset, int i,
4066 const char *s)
4067 {
4068 if (i > 0 && i < dset->v) {
4069 char *targ = dset->varinfo[i]->display_name;
4070
4071 if (strlen(s) >= MAXDISP) {
4072 gchar *tmp = g_strdup(s);
4073
4074 strcpy(targ, gretl_utf8_truncate(tmp, MAXDISP-1));
4075 g_free(tmp);
4076 } else {
4077 strcpy(targ, s);
4078 }
4079 }
4080 }
4081
series_set_compact_method(DATASET * dset,int i,int method)4082 void series_set_compact_method (DATASET *dset, int i,
4083 int method)
4084 {
4085 if (i > 0 && i < dset->v) {
4086 dset->varinfo[i]->compact_method = method;
4087 }
4088 }
4089
series_set_parent(DATASET * dset,int i,const char * parent)4090 void series_set_parent (DATASET *dset, int i,
4091 const char *parent)
4092 {
4093 if (i > 0 && i < dset->v) {
4094 strcpy(dset->varinfo[i]->parent, parent);
4095 }
4096 }
4097
series_set_transform(DATASET * dset,int i,int transform)4098 void series_set_transform (DATASET *dset, int i,
4099 int transform)
4100 {
4101 if (i > 0 && i < dset->v) {
4102 dset->varinfo[i]->transform = transform;
4103 }
4104 }
4105
series_set_lag(DATASET * dset,int i,int lag)4106 void series_set_lag (DATASET *dset, int i, int lag)
4107 {
4108 if (i > 0 && i < dset->v) {
4109 dset->varinfo[i]->lag = lag;
4110 }
4111 }
4112
series_set_stack_level(DATASET * dset,int i,int level)4113 void series_set_stack_level (DATASET *dset, int i, int level)
4114 {
4115 if (i > 0 && i < dset->v) {
4116 dset->varinfo[i]->stack_level = level;
4117 }
4118 }
4119
series_increment_stack_level(DATASET * dset,int i)4120 void series_increment_stack_level (DATASET *dset, int i)
4121 {
4122 if (i > 0 && i < dset->v) {
4123 dset->varinfo[i]->stack_level += 1;
4124 }
4125 }
4126
series_decrement_stack_level(DATASET * dset,int i)4127 void series_decrement_stack_level (DATASET *dset, int i)
4128 {
4129 if (i > 0 && i < dset->v) {
4130 dset->varinfo[i]->stack_level -= 1;
4131 }
4132 }
4133
series_delete_metadata(DATASET * dset,int i)4134 void series_delete_metadata (DATASET *dset, int i)
4135 {
4136 if (i > 0 && i < dset->v &&
4137 dset->varinfo != NULL &&
4138 dset->varinfo[i] != NULL) {
4139 dset->varinfo[i]->lag = 0;
4140 dset->varinfo[i]->transform = 0;
4141 dset->varinfo[i]->parent[0] = '\0';
4142 }
4143 }
4144
series_ensure_level_zero(DATASET * dset)4145 void series_ensure_level_zero (DATASET *dset)
4146 {
4147 if (dset != NULL) {
4148 int i, n = 0;
4149
4150 for (i=1; i<dset->v; i++) {
4151 if (dset->varinfo[i]->stack_level > 0) {
4152 dset->varinfo[i]->stack_level = 0;
4153 n++;
4154 }
4155 }
4156 #if 0
4157 if (n > 0) {
4158 fprintf(stderr, "Unauthorized access to series detected!\n");
4159 }
4160 #endif
4161 }
4162 }
4163
series_attach_string_table(DATASET * dset,int i,series_table * st)4164 void series_attach_string_table (DATASET *dset, int i,
4165 series_table *st)
4166 {
4167 if (dset != NULL && i > 0 && i < dset->v) {
4168 series_set_discrete(dset, i, 1);
4169 dset->varinfo[i]->st = st;
4170 }
4171 }
4172
series_destroy_string_table(DATASET * dset,int i)4173 void series_destroy_string_table (DATASET *dset, int i)
4174 {
4175 if (dset != NULL && i > 0 && i < dset->v) {
4176 series_table_destroy(dset->varinfo[i]->st);
4177 dset->varinfo[i]->st = NULL;
4178 }
4179 }
4180
4181 /**
4182 * is_string_valued:
4183 * @dset: pointer to dataset.
4184 * @i: index number of series.
4185 *
4186 * Returns: 1 if series @i has a table of string values
4187 * (that is, a mapping from numerical values to associated
4188 * string values), otherwise 0.
4189 */
4190
is_string_valued(const DATASET * dset,int i)4191 int is_string_valued (const DATASET *dset, int i)
4192 {
4193 if (dset != NULL && i > 0 && i < dset->v) {
4194 return dset->varinfo[i]->st != NULL;
4195 } else {
4196 return 0;
4197 }
4198 }
4199
4200 /**
4201 * series_get_string_table:
4202 * @dset: pointer to dataset.
4203 * @i: index number of series.
4204 *
4205 * Returns: the string table attched to series @i or NULL if
4206 * there is no such table.
4207 */
4208
series_get_string_table(const DATASET * dset,int i)4209 series_table *series_get_string_table (const DATASET *dset, int i)
4210 {
4211 if (dset != NULL && i > 0 && i < dset->v) {
4212 return dset->varinfo[i]->st;
4213 } else {
4214 return NULL;
4215 }
4216 }
4217
4218 /**
4219 * series_get_string_for_obs:
4220 * @dset: pointer to dataset.
4221 * @i: index number of series.
4222 * @t: 0-based index of observation.
4223 *
4224 * Returns: the string associated with the numerical value of
4225 * series @i at observation @t, or NULL if there is no such string.
4226 * Note that NULL will be returned if the observation is missing.
4227 */
4228
series_get_string_for_obs(const DATASET * dset,int i,int t)4229 const char *series_get_string_for_obs (const DATASET *dset, int i,
4230 int t)
4231 {
4232 const char *ret = NULL;
4233
4234 if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
4235 ret = series_table_get_string(dset->varinfo[i]->st,
4236 dset->Z[i][t]);
4237 }
4238
4239 return ret;
4240 }
4241
4242 /**
4243 * series_get_string_for_value:
4244 * @dset: pointer to dataset.
4245 * @i: index number of series.
4246 * @val: the value to look up.
4247 *
4248 * Returns: the string associated with numerical value @val of
4249 * series @i, or NULL if there is no such string.
4250 */
4251
series_get_string_for_value(const DATASET * dset,int i,double val)4252 const char *series_get_string_for_value (const DATASET *dset, int i,
4253 double val)
4254 {
4255 const char *ret = NULL;
4256
4257 if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
4258 ret = series_table_get_string(dset->varinfo[i]->st, val);
4259 }
4260
4261 return ret;
4262 }
4263
4264 /**
4265 * series_set_string_val:
4266 * @dset: pointer to dataset.
4267 * @i: index number of series.
4268 * @t: 0-based index of observation.
4269 * @s: the string value to set.
4270 *
4271 * Attempts to set the string value for observation @t of series @i
4272 * to @s. This will fail if the series in question does not have
4273 * an associated table of string values.
4274 *
4275 * Returns: 0 on success, non-zero code on error.
4276 */
4277
series_set_string_val(DATASET * dset,int i,int t,const char * s)4278 int series_set_string_val (DATASET *dset, int i, int t, const char *s)
4279 {
4280 int err = 0;
4281
4282 if (i <= 0 || i >= dset->v) {
4283 err = E_DATA;
4284 } else if (dset->varinfo[i]->st == NULL) {
4285 err = E_TYPES;
4286 } else {
4287 series_table *st = dset->varinfo[i]->st;
4288 double x = series_table_get_value(st, s);
4289
4290 if (na(x)) {
4291 int k = series_table_add_string(st, s);
4292
4293 if (k < 0) {
4294 err = E_ALLOC;
4295 } else {
4296 dset->Z[i][t] = k;
4297 }
4298 } else {
4299 dset->Z[i][t] = x;
4300 }
4301 }
4302
4303 return err;
4304 }
4305
4306 /**
4307 * string_series_assign_value:
4308 * @dset: pointer to dataset.
4309 * @i: index number of string-valued series.
4310 * @t: 0-based index of observation.
4311 * @x: the numeric value to set.
4312 *
4313 * Attempts to set the value for observation @t of series @i
4314 * to @x. This will fail if the @x falls outside of the range
4315 * of values supported by the string table for the series.
4316 *
4317 * Returns: 0 on success, non-zero code on error.
4318 */
4319
string_series_assign_value(DATASET * dset,int i,int t,double x)4320 int string_series_assign_value (DATASET *dset, int i,
4321 int t, double x)
4322 {
4323 series_table *st = NULL;
4324 int err = 0;
4325
4326 if (i <= 0 || i >= dset->v) {
4327 err = E_DATA;
4328 } else if (na(x)) {
4329 dset->Z[i][t] = x;
4330 } else if (x != floor(x)) {
4331 err = E_TYPES;
4332 } else if ((st = dset->varinfo[i]->st) == NULL) {
4333 err = E_TYPES;
4334 } else if (series_table_get_string(st, x) == NULL) {
4335 err = E_DATA;
4336 } else {
4337 dset->Z[i][t] = x;
4338 }
4339
4340 return err;
4341 }
4342
4343 /**
4344 * series_decode_string:
4345 * @dset: pointer to dataset.
4346 * @i: index number of series.
4347 * @s: string to decode.
4348 *
4349 * Returns: the numerical value associated with the string
4350 * @s for series @i, or #NADBL if there's no such value.
4351 */
4352
series_decode_string(const DATASET * dset,int i,const char * s)4353 double series_decode_string (const DATASET *dset, int i, const char *s)
4354 {
4355 double ret = NADBL;
4356
4357 if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
4358 ret = series_table_get_value(dset->varinfo[i]->st, s);
4359 }
4360
4361 return ret;
4362 }
4363
4364 /**
4365 * series_get_string_vals:
4366 * @dset: pointer to dataset.
4367 * @i: index number of series.
4368 * @n_strs: location to receive the number of strings, or NULL.
4369 * @subsample: non-zero to restrict to current sample range.
4370 *
4371 * Returns: the array of strings associated with distinct numerical
4372 * values of series @i, or NULL if there's no such array. The returned
4373 * array should not be modified in any way; copy the strings first if
4374 * you need to modify them.
4375 */
4376
series_get_string_vals(const DATASET * dset,int i,int * n_strs,int subsample)4377 char **series_get_string_vals (const DATASET *dset, int i,
4378 int *n_strs, int subsample)
4379 {
4380 char **strs = NULL;
4381 int n = 0;
4382
4383 if (i > 0 && i < dset->v && dset->varinfo[i]->st != NULL) {
4384 strs = series_table_get_strings(dset->varinfo[i]->st, &n);
4385 }
4386
4387 if (strs != NULL && subsample && dataset_is_subsampled(dset)) {
4388 static char **substrs = NULL;
4389 const double *x = dset->Z[i] + dset->t1;
4390 int T = dset->t2 - dset->t1 + 1;
4391 gretl_matrix *valid;
4392 int err = 0;
4393
4394 if (substrs != NULL) {
4395 free(substrs);
4396 substrs = NULL;
4397 }
4398 valid = gretl_matrix_values(x, T, OPT_NONE, &err);
4399 if (err) {
4400 strs = NULL;
4401 n = 0;
4402 } else {
4403 int j, k, nv = valid->rows;
4404
4405 substrs = strings_array_new(nv);
4406 for (j=0; j<nv; j++) {
4407 k = gretl_vector_get(valid, j) - 1;
4408 substrs[j] = strs[k];
4409 }
4410 strs = substrs;
4411 n = nv;
4412 gretl_matrix_free(valid);
4413 }
4414 }
4415
4416 if (n_strs != NULL) {
4417 *n_strs = n;
4418 }
4419
4420 return strs;
4421 }
4422
4423 /**
4424 * series_get_string_width:
4425 * @dset: pointer to dataset.
4426 * @i: index number of series.
4427 *
4428 * Returns: the maximum of (a) the number of characters in the
4429 * name of series @i and (b) the number of bytes in the longest
4430 * "string value" attached to series @i, if applicable; or 0
4431 * if @i is not a valid series index.
4432 */
4433
series_get_string_width(const DATASET * dset,int i)4434 int series_get_string_width (const DATASET *dset, int i)
4435 {
4436 const char *lname;
4437 int n = 0;
4438
4439 if (i > 0 && i < dset->v) {
4440 n = strlen(dset->varname[i]);
4441 if (dset->varinfo[i]->flags & VAR_LISTARG) {
4442 lname = series_get_list_parent(i);
4443 if (lname != NULL) {
4444 n += strlen(lname) + 1;
4445 }
4446 }
4447 if (dset->varinfo[i]->st != NULL) {
4448 char **S;
4449 int j, ns, m;
4450
4451 S = series_table_get_strings(dset->varinfo[i]->st, &ns);
4452 for (j=0; j<ns; j++) {
4453 m = g_utf8_strlen(S[j], -1);
4454 if (m > n) {
4455 n = m;
4456 }
4457 }
4458 }
4459 }
4460
4461 return n;
4462 }
4463
4464 /**
4465 * steal_string_table:
4466 * @l_dset: pointer to recipient dataset.
4467 * @lvar: index number of target series.
4468 * @r_dset: pointer to donor dataset.
4469 * @rvar: index number of source series.
4470 *
4471 * Detaches the string table from @rvar in @r_dset and attaches it
4472 * to @lvar in @l_dset,
4473 *
4474 * Returns: 0 on success, non-zero code on error.
4475 */
4476
steal_string_table(DATASET * l_dset,int lvar,DATASET * r_dset,int rvar)4477 int steal_string_table (DATASET *l_dset, int lvar,
4478 DATASET *r_dset, int rvar)
4479 {
4480 if (l_dset != r_dset || lvar != rvar) {
4481 l_dset->varinfo[lvar]->st = r_dset->varinfo[rvar]->st;
4482 r_dset->varinfo[rvar]->st = NULL;
4483 series_set_discrete(l_dset, lvar, 1);
4484 }
4485
4486 return 0;
4487 }
4488
4489 /**
4490 * merge_string_tables:
4491 * @l_dset: pointer to current dataset.
4492 * @lvar: index number of target series.
4493 * @r_dset: pointer to dataset to be appended.
4494 * @rvar: index number of source series.
4495 *
4496 * Translates the encoding of the string-values of series @rvar
4497 * in @r_dset to that of series @lvar in @l_dset, adding extra
4498 * strings as needed.
4499 *
4500 * Returns: 0 on success, non-zero code on error.
4501 */
4502
merge_string_tables(DATASET * l_dset,int lvar,DATASET * r_dset,int rvar)4503 int merge_string_tables (DATASET *l_dset, int lvar,
4504 DATASET *r_dset, int rvar)
4505 {
4506 series_table *lst = l_dset->varinfo[lvar]->st;
4507 double dx, *x = r_dset->Z[rvar];
4508 const char *sr;
4509 int t, idx, err = 0;
4510
4511 for (t=0; t<r_dset->n && !err; t++) {
4512 if (na(x[t])) {
4513 continue;
4514 }
4515 /* get the right-hand side string associated with x[t] */
4516 sr = series_get_string_for_value(r_dset, rvar, x[t]);
4517 /* and look up its numeric code on the left */
4518 dx = series_decode_string(l_dset, lvar, sr);
4519 if (!na(dx)) {
4520 /* got a match: apply the code from @lst */
4521 x[t] = dx;
4522 } else {
4523 /* no match: we need to add a string to @lst */
4524 idx = series_table_add_string(lst, sr);
4525 if (idx < 0) {
4526 err = E_ALLOC;
4527 } else {
4528 x[t] = (double) idx;
4529 }
4530 }
4531 }
4532
4533 return err;
4534 }
4535
maybe_adjust_label(DATASET * dset,int v,char ** S,int ns)4536 static void maybe_adjust_label (DATASET *dset, int v,
4537 char **S, int ns)
4538 {
4539 int i, len = 3 * ns; /* "=" + ", " */
4540 char *tmp;
4541
4542 for (i=0; i<ns; i++) {
4543 len += strlen(S[i]) + 1 + floor(log10(1.0 + i));
4544 }
4545
4546 /* let's not create a super-long series label */
4547 if (len > 255) {
4548 return;
4549 }
4550
4551 tmp = calloc(len + 1, 1);
4552
4553 if (tmp != NULL) {
4554 char bit[16];
4555
4556 for (i=0; i<ns; i++) {
4557 sprintf(bit, "%d=", i+1);
4558 strcat(tmp, bit);
4559 strcat(tmp, S[i]);
4560 if (i < ns - 1) {
4561 strcat(tmp, ", ");
4562 }
4563 }
4564 copy_label(dset->varinfo[v], tmp);
4565 free(tmp);
4566 }
4567 }
4568
4569 /* Encode the strings in @a into numerical values in series
4570 @v of dataset @dset. "Return" via @pU the array of unique
4571 string values and via @pnu the number of such values.
4572 */
4573
alt_set_strvals(DATASET * dset,int v,gretl_array * a,char *** pU,int * pnu)4574 static int alt_set_strvals (DATASET *dset, int v, gretl_array *a,
4575 char ***pU, int *pnu)
4576 {
4577 char **S, **U = NULL;
4578 double *x = dset->Z[v];
4579 int i, pos, ns, nu = 0;
4580 int err = 0;
4581
4582 S = gretl_array_get_strings(a, &ns);
4583
4584 for (i=0; i<ns && !err; i++) {
4585 err = strings_array_add_uniq(&U, &nu, S[i], &pos);
4586 if (!err) {
4587 x[i] = pos + 1;
4588 }
4589 }
4590
4591 if (!err) {
4592 *pU = U;
4593 *pnu = nu;
4594 } else if (U != NULL) {
4595 strings_array_free(U, nu);
4596 }
4597
4598 return err;
4599 }
4600
alt_strvals_case(DATASET * dset,int v,gretl_array * a)4601 static int alt_strvals_case (DATASET *dset, int v, gretl_array *a)
4602 {
4603 double *x = dset->Z[v];
4604 int i;
4605
4606 if (gretl_array_get_length(a) != dset->n) {
4607 return 0;
4608 }
4609
4610 for (i=0; i<dset->n; i++) {
4611 if (x[i] != 0) {
4612 return 0;
4613 }
4614 }
4615
4616 return 1;
4617 }
4618
4619 /* here we're trying to set strings values on a series from
4620 scratch */
4621
series_set_string_vals(DATASET * dset,int i,gretl_array * a)4622 int series_set_string_vals (DATASET *dset, int i, gretl_array *a)
4623 {
4624 gretl_matrix *vals = NULL;
4625 char **S = NULL;
4626 int ns = 0;
4627 int err = 0;
4628
4629 if (a == NULL || dset == NULL || i < 1 || i >= dset->v) {
4630 return E_DATA;
4631 }
4632
4633 if (alt_strvals_case(dset, i, a)) {
4634 err = alt_set_strvals(dset, i, a, &S, &ns);
4635 if (err) {
4636 return err;
4637 } else {
4638 goto do_strtable;
4639 }
4640 }
4641
4642 /* get sorted vector of unique values */
4643 vals = gretl_matrix_values(dset->Z[i], dset->n, OPT_S, &err);
4644
4645 if (!err) {
4646 int i, nvals = gretl_vector_get_length(vals);
4647 double x0 = gretl_vector_get(vals, 0);
4648 double x1 = gretl_vector_get(vals, nvals - 1);
4649
4650 if (x0 < 1.0) {
4651 gretl_errmsg_set("The minimum value of the target series "
4652 "must be >= 1");
4653 err = E_DATA;
4654 } else {
4655 /* the values should all be integers */
4656 for (i=0; i<nvals && !err; i++) {
4657 x1 = gretl_vector_get(vals, i);
4658 if (x1 != floor(x1)) {
4659 gretl_errmsg_set("The series values must be integers");
4660 err = E_DATA;
4661 }
4662 }
4663 }
4664
4665 if (!err) {
4666 S = gretl_array_get_stringify_strings(a, (int) x1, &ns, &err);
4667 }
4668
4669 if (!err) {
4670 /* the strings should all be UTF-8 */
4671 for (i=0; i<ns && !err; i++) {
4672 if (!g_utf8_validate(S[i], -1, NULL)) {
4673 gretl_errmsg_sprintf("String %d is not valid UTF-8", i+1);
4674 err = E_DATA;
4675 }
4676 }
4677 }
4678 }
4679
4680 do_strtable:
4681
4682 if (!err) {
4683 series_table *st = series_table_new(S, ns, &err);
4684
4685 if (!err) {
4686 if (dset->varinfo[i]->st != NULL) {
4687 /* remove any pre-existing table */
4688 series_table_destroy(dset->varinfo[i]->st);
4689 }
4690 series_set_discrete(dset, i, 1);
4691 dset->varinfo[i]->st = st;
4692 maybe_adjust_label(dset, i, S, ns);
4693 }
4694 }
4695
4696 if (err && S != NULL && ns > 0) {
4697 strings_array_free(S, ns);
4698 }
4699
4700 gretl_matrix_free(vals);
4701
4702 return err;
4703 }
4704
4705 /* The pre-checked case: we know that series @i is suitable
4706 for stringifying, and that @S contains the right number of
4707 strings.
4708 */
4709
series_set_string_vals_direct(DATASET * dset,int i,char ** S,int ns)4710 int series_set_string_vals_direct (DATASET *dset, int i,
4711 char **S, int ns)
4712 {
4713 int err = 0;
4714 series_table *st = series_table_new(S, ns, &err);
4715
4716 if (!err) {
4717 if (dset->varinfo[i]->st != NULL) {
4718 /* remove any pre-existing table */
4719 series_table_destroy(dset->varinfo[i]->st);
4720 }
4721 series_set_discrete(dset, i, 1);
4722 dset->varinfo[i]->st = st;
4723 maybe_adjust_label(dset, i, S, ns);
4724 }
4725
4726 if (err && S != NULL && ns > 0) {
4727 strings_array_free(S, ns);
4728 }
4729
4730 return err;
4731 }
4732
4733 /**
4734 * series_recode_strings:
4735 * @dset: pointer to dataset.
4736 * @v: index number of target string-valued series.
4737 * @opt: may contain OPT_P (see below).
4738 * @changed: location to receive "changed" feedback, or NULL.
4739 *
4740 * This function "trims" the array of string values associated
4741 * with series @v so that it contains no redundant elements --
4742 * that is, values of which there is no instance in the
4743 * current sample -- and resets the numeric codes for the
4744 * strings if necessary.
4745 *
4746 * By default the original "series_table" attached to series @v
4747 * is destroyed, but if @opt contains OPT_P it is replaced but
4748 * not freed; this make sense only if another pointer to the
4749 * original table exists.
4750 *
4751 * If it happens that the current sample contains
4752 * instances of all the strings in the full dataset, this
4753 * function will not actually make any changes to @dset. The
4754 * @changed argument provides a means of determining
4755 * whether any change has been made.
4756 *
4757 * Returns: 0 on success, non-zero code on error.
4758 */
4759
series_recode_strings(DATASET * dset,int v,gretlopt opt,int * changed)4760 int series_recode_strings (DATASET *dset, int v, gretlopt opt,
4761 int *changed)
4762 {
4763 double *x = dset->Z[v] + dset->t1;
4764 int n = sample_size(dset);
4765 gretl_matrix *vals = NULL;
4766 gretl_matrix *repl = NULL;
4767 char **S = NULL;
4768 const char *si;
4769 int ns, nu = 0;
4770 int err = 0;
4771
4772 if (changed != NULL) {
4773 *changed = 0;
4774 }
4775
4776 ns = series_table_get_n_strings(dset->varinfo[v]->st);
4777 vals = gretl_matrix_values(x, n, OPT_NONE, &err);
4778
4779 if (!err) {
4780 /* number of unique values */
4781 nu = vals->rows;
4782 if (nu == ns) {
4783 /* nothing to be done */
4784 gretl_matrix_free(vals);
4785 return 0;
4786 }
4787 repl = gretl_zero_matrix_new(nu, 1);
4788 S = strings_array_new(nu);
4789 if (repl == NULL || S == NULL) {
4790 free(S);
4791 err = E_ALLOC;
4792 }
4793 }
4794
4795 if (!err) {
4796 int i;
4797
4798 for (i=0; i<nu; i++) {
4799 si = series_get_string_for_value(dset, v, vals->val[i]);
4800 S[i] = gretl_strdup(si);
4801 repl->val[i] = i + 1;
4802 }
4803
4804 substitute_values(x, x, n, vals->val, nu, repl->val, nu);
4805
4806 if (!(opt & OPT_P)) {
4807 series_table_destroy(dset->varinfo[v]->st);
4808 }
4809 /* the series table takes ownership of @S */
4810 dset->varinfo[v]->st = series_table_new(S, nu, &err);
4811
4812 if (changed != NULL) {
4813 *changed = 1;
4814 }
4815 }
4816
4817 gretl_matrix_free(vals);
4818 gretl_matrix_free(repl);
4819
4820 return err;
4821 }
4822
set_panel_groups_name(DATASET * dset,const char * vname)4823 int set_panel_groups_name (DATASET *dset, const char *vname)
4824 {
4825 if (dset->pangrps != NULL) {
4826 free(dset->pangrps);
4827 }
4828
4829 dset->pangrps = gretl_strdup(vname);
4830
4831 return (dset->pangrps == NULL)? E_ALLOC : 0;
4832 }
4833
4834 /* This should be called only after the "group names"
4835 property of @dset has been (recently) validated, via
4836 panel_group_names_ok().
4837 */
4838
get_panel_group_name(const DATASET * dset,int obs)4839 const char *get_panel_group_name (const DATASET *dset, int obs)
4840 {
4841 const char *s = NULL;
4842
4843 if (dataset_is_panel(dset) && dset->pangrps != NULL &&
4844 obs >= 0 && obs < dset->n) {
4845 int v = current_series_index(dset, dset->pangrps);
4846 series_table *st;
4847
4848 if ((st = series_get_string_table(dset, v)) != NULL) {
4849 s = series_table_get_string(st, dset->Z[v][obs]);
4850 }
4851 }
4852
4853 return (s != NULL)? s : "??";
4854 }
4855
panel_group_names_ok(const DATASET * dset,int maxlen)4856 int panel_group_names_ok (const DATASET *dset, int maxlen)
4857 {
4858 int ok = 0;
4859
4860 if (dataset_is_panel(dset) && dset->pangrps != NULL) {
4861 int ns, v = current_series_index(dset, dset->pangrps);
4862
4863 if (v > 0 && v < dset->v) {
4864 char **S = series_get_string_vals(dset, v, &ns, 0);
4865
4866 if (S != NULL && ns >= dset->n / dset->pd) {
4867 ok = 1; /* provisional */
4868 if (maxlen > 0) {
4869 int i;
4870
4871 for (i=0; i<ns; i++) {
4872 if (strlen(S[i]) > maxlen) {
4873 ok = 0;
4874 break;
4875 }
4876 }
4877 }
4878 }
4879 }
4880 }
4881
4882 return ok;
4883 }
4884
panel_group_names_varname(const DATASET * dset)4885 const char *panel_group_names_varname (const DATASET *dset)
4886 {
4887 if (dataset_is_panel(dset) && dset->pangrps != NULL) {
4888 int ns, v = current_series_index(dset, dset->pangrps);
4889
4890 if (v > 0 && v < dset->v) {
4891 char **S = series_get_string_vals(dset, v, &ns, 0);
4892
4893 if (S != NULL) {
4894 int ng = dset->n / dset->pd;
4895
4896 if (ns >= ng) {
4897 return dset->pangrps;
4898 }
4899 }
4900 }
4901 }
4902
4903 return NULL;
4904 }
4905
is_panel_group_names_series(const DATASET * dset,int v)4906 int is_panel_group_names_series (const DATASET *dset, int v)
4907 {
4908 if (dataset_is_panel(dset) && dset->pangrps != NULL) {
4909 return v == current_series_index(dset, dset->pangrps);
4910 } else {
4911 return 0;
4912 }
4913 }
4914
suitable_group_names_series(const DATASET * dset,int maxlen,int exclude)4915 static int suitable_group_names_series (const DATASET *dset,
4916 int maxlen,
4917 int exclude)
4918 {
4919 int i, vfound = 0;
4920
4921 for (i=1; i<dset->v && !vfound; i++) {
4922 if (i == exclude) {
4923 continue;
4924 }
4925 if (is_string_valued(dset, i)) {
4926 int ns = 0;
4927 char **S = series_get_string_vals(dset, i, &ns, 0);
4928
4929 if (S != NULL && ns >= dset->n / dset->pd) {
4930 const char *sbak = NULL;
4931 int t, u, ubak = -1;
4932 int fail = 0;
4933
4934 for (t=dset->t1; t<=dset->t2 && !fail; t++) {
4935 const char *st = series_get_string_for_obs(dset, i, t);
4936
4937 u = t / dset->pd;
4938 if (st == NULL || sbak == NULL) {
4939 fail = 1;
4940 } else if (u == ubak && strcmp(st, sbak)) {
4941 /* same unit, different label: no */
4942 fail = 1;
4943 } else if (ubak >= 0 && u != ubak && !strcmp(st, sbak)) {
4944 /* different unit, same label: no */
4945 fail = 2;
4946 }
4947 if (!fail && maxlen > 0 && strlen(st) > maxlen) {
4948 fail = 1;
4949 }
4950 ubak = u;
4951 sbak = st;
4952 }
4953 if (!fail) {
4954 vfound = i;
4955 }
4956 }
4957 }
4958 }
4959
4960 return vfound;
4961 }
4962
4963 /* For plotting purposes, try to get labels for panel groups,
4964 subject to the constraint that they should be no longer
4965 than @maxlen. If successful, this will return an array of
4966 at least N strings, where N is the cross-sectional
4967 dimension of the panel. This array should be treated as
4968 read-only.
4969 */
4970
get_panel_group_table(const DATASET * dset,int maxlen,int * pv)4971 series_table *get_panel_group_table (const DATASET *dset,
4972 int maxlen, int *pv)
4973 {
4974 series_table *st = NULL;
4975 int vpg = 0;
4976
4977 if (dset->pangrps != NULL) {
4978 vpg = current_series_index(dset, dset->pangrps);
4979 }
4980
4981 /* first see if we have valid group labels set explicitly */
4982 if (vpg > 0 && panel_group_names_ok(dset, maxlen)) {
4983 st = dset->varinfo[vpg]->st;
4984 }
4985
4986 if (st == NULL) {
4987 /* can we find a suitable string-valued series? */
4988 int altv = suitable_group_names_series(dset, maxlen, vpg);
4989
4990 if (altv > 0) {
4991 vpg = altv;
4992 st = dset->varinfo[vpg]->st;
4993 }
4994 }
4995
4996 *pv = (st != NULL)? vpg : 0;
4997
4998 return st;
4999 }
5000
is_dataset_series(const DATASET * dset,const double * x)5001 int is_dataset_series (const DATASET *dset, const double *x)
5002 {
5003 int i;
5004
5005 for (i=dset->v-1; i>=0; i--) {
5006 if (x == dset->Z[i]) {
5007 return 1;
5008 }
5009 }
5010
5011 return 0;
5012 }
5013
effective_daily_skip(int delta,int wd,int pd)5014 static int effective_daily_skip (int delta, int wd, int pd)
5015 {
5016 int k, skip = delta - 1;
5017
5018 if (pd < 7) {
5019 skip = 0;
5020 for (k=1; k<delta; k++) {
5021 wd = (wd == 0)? 6 : wd - 1;
5022 if (pd == 6) {
5023 skip += (wd != 0);
5024 } else {
5025 skip += (wd != 0 && wd != 6);
5026 }
5027 }
5028 }
5029
5030 return skip;
5031 }
5032
5033 /* If we get here we've already checked that @dset is dated daily
5034 data, and that @pd is a valid daily periodicity greater than or
5035 equal to the current dset->pd.
5036 */
5037
pad_daily_data(DATASET * dset,int pd,PRN * prn)5038 static int pad_daily_data (DATASET *dset, int pd, PRN *prn)
5039 {
5040 DATASET *bigset = NULL;
5041 char datestr[OBSLEN];
5042 guint32 ed, ed0 = 0, edbak = 0;
5043 int wd, skip, totskip = 0;
5044 int t, err = 0;
5045
5046 for (t=0; t<dset->n; t++) {
5047 ntolabel(datestr, t, dset);
5048 if (t == 0) {
5049 ed0 = edbak = get_epoch_day(datestr);
5050 } else {
5051 wd = weekday_from_date(datestr);
5052 ed = get_epoch_day(datestr);
5053 skip = effective_daily_skip(ed - edbak, wd, pd);
5054 totskip += skip;
5055 edbak = ed;
5056 }
5057 }
5058
5059 if (totskip == 0) {
5060 pprintf(prn, "Dataset is already complete for %d-day calendar", pd);
5061 return 0;
5062 }
5063
5064 /* We pass OPT_R here to avoid allocating varnames in @bigset,
5065 since we're going to preserve these from @dset.
5066 */
5067 bigset = real_create_new_dataset(dset->v, dset->n + totskip, OPT_R);
5068
5069 if (bigset == NULL) {
5070 err = E_ALLOC;
5071 } else {
5072 int i, s = 0;
5073
5074 edbak = ed0;
5075
5076 for (t=0; t<dset->n; t++) {
5077 if (t > 0) {
5078 ntolabel(datestr, t, dset);
5079 wd = weekday_from_date(datestr);
5080 ed = get_epoch_day(datestr);
5081 s += 1 + effective_daily_skip(ed - edbak, wd, pd);
5082 edbak = ed;
5083 }
5084 for (i=1; i<dset->v; i++) {
5085 bigset->Z[i][s] = dset->Z[i][t];
5086 }
5087 }
5088
5089 bigset->varname = dset->varname;
5090 bigset->varinfo = dset->varinfo;
5091 bigset->descrip = dset->descrip;
5092
5093 bigset->pd = pd;
5094 bigset->structure = TIME_SERIES;
5095 bigset->sd0 = (double) ed0;
5096 strcpy(bigset->stobs, dset->stobs);
5097 ntolabel(bigset->endobs, bigset->n - 1, bigset);
5098
5099 dset->varname = NULL;
5100 dset->varinfo = NULL;
5101 dset->descrip = NULL;
5102 dataset_destroy_obs_markers(dset);
5103 free_Z(dset);
5104 clear_datainfo(dset, CLEAR_SUBSAMPLE);
5105
5106 *dset = *bigset;
5107 free(bigset); /* avoid leaking memory */
5108 }
5109
5110 return err;
5111 }
5112
5113 /* MIDAS-related functions */
5114
5115 /* postprocess: fill missing slots in daily data array
5116 with the period (month or quarter) average
5117 (FIXME support interpolation as an option?)
5118 */
5119
postprocess_daily_data(DATASET * dset,const int * list)5120 int postprocess_daily_data (DATASET *dset, const int *list)
5121 {
5122 double *x, xbar, xsum;
5123 int t, i, n_ok, n_miss;
5124 int err = 0;
5125
5126 for (t=dset->t1; t<=dset->t2; t++) {
5127 xsum = 0.0;
5128 n_ok = n_miss = 0;
5129 for (i=1; i<=list[0]; i++) {
5130 x = dset->Z[list[i]];
5131 if (na(x[t])) {
5132 n_miss++;
5133 } else {
5134 xsum += x[t];
5135 n_ok++;
5136 }
5137 }
5138 if (n_miss > 0 && n_ok > 0) {
5139 xbar = xsum / n_ok;
5140 for (i=1; i<=list[0]; i++) {
5141 x = dset->Z[list[i]];
5142 if (na(x[t])) {
5143 x[t] = xbar;
5144 }
5145 }
5146 }
5147 }
5148
5149 return err;
5150 }
5151
series_get_midas_period(const DATASET * dset,int i)5152 int series_get_midas_period (const DATASET *dset, int i)
5153 {
5154 if (i > 0 && i < dset->v) {
5155 return dset->varinfo[i]->midas_period;
5156 }
5157
5158 return 0;
5159 }
5160
series_set_midas_period(const DATASET * dset,int i,int period)5161 void series_set_midas_period (const DATASET *dset, int i,
5162 int period)
5163 {
5164 if (i > 0 && i < dset->v) {
5165 dset->varinfo[i]->midas_period = period;
5166 }
5167 }
5168
series_get_midas_freq(const DATASET * dset,int i)5169 int series_get_midas_freq (const DATASET *dset, int i)
5170 {
5171 if (i > 0 && i < dset->v) {
5172 return dset->varinfo[i]->midas_freq;
5173 }
5174
5175 return 0;
5176 }
5177
series_set_midas_freq(const DATASET * dset,int i,int freq)5178 int series_set_midas_freq (const DATASET *dset, int i,
5179 int freq)
5180 {
5181 int err = 0;
5182
5183 if (i > 0 && i < dset->v) {
5184 if (freq < 5 || freq > 12) {
5185 err = E_DATA;
5186 } else {
5187 dset->varinfo[i]->midas_freq = freq;
5188 }
5189 } else {
5190 err = E_DATA;
5191 }
5192
5193 return err;
5194 }
5195
series_is_midas_anchor(const DATASET * dset,int i)5196 int series_is_midas_anchor (const DATASET *dset, int i)
5197 {
5198 if (i > 0 && i < dset->v &&
5199 (dset->varinfo[i]->flags & VAR_HFANCHOR)) {
5200 return dset->varinfo[i]->midas_period;
5201 }
5202
5203 return 0;
5204 }
5205
series_set_midas_anchor(const DATASET * dset,int i)5206 void series_set_midas_anchor (const DATASET *dset, int i)
5207 {
5208 if (i > 0 && i < dset->v) {
5209 dset->varinfo[i]->flags |= VAR_HFANCHOR;
5210 }
5211 }
5212
5213 /* end MIDAS-related functions */
5214
series_get_orig_pd(const DATASET * dset,int i)5215 int series_get_orig_pd (const DATASET *dset, int i)
5216 {
5217 if (i > 0 && i < dset->v) {
5218 return dset->varinfo[i]->orig_pd;
5219 } else {
5220 return 0;
5221 }
5222 }
5223
series_set_orig_pd(const DATASET * dset,int i,int pd)5224 void series_set_orig_pd (const DATASET *dset, int i, int pd)
5225 {
5226 if (i > 0 && i < dset->v) {
5227 dset->varinfo[i]->orig_pd = pd;
5228 }
5229 }
5230
series_unset_orig_pd(const DATASET * dset,int i)5231 void series_unset_orig_pd (const DATASET *dset, int i)
5232 {
5233 if (i > 0 && i < dset->v) {
5234 dset->varinfo[i]->orig_pd = 0;
5235 }
5236 }
5237
series_info_bundle(const DATASET * dset,int i,int * err)5238 gretl_bundle *series_info_bundle (const DATASET *dset,
5239 int i, int *err)
5240 {
5241 gretl_bundle *b = NULL;
5242
5243 if (dset != NULL && i >= 0 && i < dset->v) {
5244 b = gretl_bundle_new();
5245 if (b == NULL) {
5246 *err = E_ALLOC;
5247 }
5248 } else {
5249 *err = E_DATA;
5250 }
5251
5252 if (b != NULL) {
5253 VARINFO *vinfo = dset->varinfo[i];
5254
5255 gretl_bundle_set_string(b, "name", dset->varname[i]);
5256 if (vinfo->label != NULL) {
5257 gretl_bundle_set_string(b, "description", vinfo->label);
5258 }
5259 if (vinfo->display_name[0] != '\0') {
5260 gretl_bundle_set_string(b, "graph_name", vinfo->display_name);
5261 }
5262 gretl_bundle_set_int(b, "discrete", vinfo->flags & VAR_DISCRETE ?
5263 1 : 0);
5264 gretl_bundle_set_int(b, "coded", vinfo->flags & VAR_CODED ?
5265 1 : 0);
5266 gretl_bundle_set_string(b, "parent", vinfo->parent);
5267 if (vinfo->transform > 0) {
5268 gretl_bundle_set_string(b, "transform",
5269 gretl_command_word(vinfo->transform));
5270 } else {
5271 gretl_bundle_set_string(b, "transform", "none");
5272 }
5273 gretl_bundle_set_int(b, "lag", vinfo->lag);
5274 gretl_bundle_set_int(b, "has_string_table", vinfo->st != NULL);
5275 if (vinfo->midas_period > 0) {
5276 gretl_bundle_set_int(b, "midas_period", vinfo->midas_period);
5277 }
5278 if (vinfo->midas_freq > 0) {
5279 gretl_bundle_set_int(b, "midas_freq", vinfo->midas_freq);
5280 }
5281 if (vinfo->orig_pd > 0) {
5282 gretl_bundle_set_int(b, "orig_pd", vinfo->orig_pd);
5283 }
5284 }
5285
5286 return b;
5287 }
5288
5289 /* Given a series label @s, see if it can be recognized
5290 as identifying the series as the product of two others,
5291 and if so write the names of the others into @targ1
5292 and @targ2.
5293 */
5294
get_interaction_names(const char * s,char * targ1,char * targ2)5295 static int get_interaction_names (const char *s,
5296 char *targ1,
5297 char *targ2)
5298 {
5299 const char *p;
5300 int n1, n2, ret = 0;
5301
5302 *targ1 = *targ2 = '\0';
5303
5304 p = strchr(s, '*');
5305 if (p == NULL || strchr(p+1, '*') != NULL) {
5306 /* the label string does not contain a single '*' */
5307 return 0;
5308 }
5309
5310 s += strspn(s, " ");
5311 n1 = gretl_namechar_spn(s);
5312 p++;
5313 p += strspn(p, " ");
5314 n2 = gretl_namechar_spn(p);
5315
5316 if (n1 > 0 && n1 < VNAMELEN &&
5317 n2 > 0 && n2 < VNAMELEN) {
5318 strncat(targ1, s, n1);
5319 strncat(targ2, p, n2);
5320 ret = 1;
5321 }
5322
5323 return ret;
5324 }
5325
5326 /* Given a series label @s, see if it can be recognized as
5327 identifying the series as the square of another, and if
5328 so write the name of the other into @targ.
5329 */
5330
get_square_parent_name(const char * s,char * targ,char * targ2)5331 static int get_square_parent_name (const char *s, char *targ,
5332 char *targ2)
5333 {
5334 const char *p;
5335 int n1, n2, ret = 0;
5336
5337 *targ = '\0';
5338
5339 if (*s == '=' && (p = strstr(s, "squared")) != NULL) {
5340 /* "= PARENT squared" */
5341 s++;
5342 s += strspn(s, " ");
5343 n1 = gretl_namechar_spn(s);
5344 n2 = p - s - 1;
5345 if (n1 > 0 && n1 < VNAMELEN && n2 == n1) {
5346 strncat(targ, s, n1);
5347 ret = 1;
5348 }
5349 } else if (strchr(s, '^') != NULL) {
5350 /* "PARENT^2" */
5351 n1 = gretl_namechar_spn(s);
5352 if (n1 > 0 && n1 < VNAMELEN) {
5353 p = s + n1;
5354 if (!strcmp(p, "^2")) {
5355 strncat(targ, s, n1);
5356 ret = 1;
5357 }
5358 }
5359 } else if ((p = strstr(s, "square of ")) != NULL) {
5360 p += 9;
5361 p += strspn(p, " ");
5362 n1 = gretl_namechar_spn(p);
5363 if (n1 > 0 && n1 < VNAMELEN) {
5364 strncat(targ, p, n1);
5365 ret = 1;
5366 }
5367 } else if (get_interaction_names(s, targ, targ2)) {
5368 /* "x * x" ? */
5369 if (!strcmp(targ, targ2)) {
5370 ret = 1;
5371 }
5372 }
5373
5374 return ret;
5375 }
5376
5377 /* Given either (a) two series identified by ID numbers
5378 i, j where the second is supposed to be the square
5379 of the first, or (b) three series i, j, k where the
5380 third is supposed to be the product of the first two,
5381 check that the putative relationship actually holds
5382 over the current sample range. Return 1 if so, else 0.
5383 */
5384
validate_relationship(int i,int j,int k,const DATASET * dset)5385 static int validate_relationship (int i, int j, int k,
5386 const DATASET *dset)
5387 {
5388 double xi, xj;
5389 int t;
5390
5391 for (t=dset->t1; t<=dset->t2; t++) {
5392 xi = dset->Z[i][t];
5393 xj = dset->Z[j][t];
5394 if (k > 0) {
5395 /* interaction test: xk = xi*xj */
5396 if (!na(xi) && !na(xj) && dset->Z[k][t] != xi*xj) {
5397 return 0;
5398 }
5399 } else {
5400 /* square test: xj = xi*xi */
5401 if (!na(xi) && xj != xi*xi) {
5402 return 0;
5403 }
5404 }
5405 }
5406
5407 return 1;
5408 }
5409
5410 /* In case we find more interaction terms that can be fitted into
5411 the current column-size of the "list info" matrix, add two more
5412 (since the encoding of each interaction for a given "primary"
5413 series requires two columns).
5414 */
5415
resize_listinfo_matrix(gretl_matrix * m)5416 static int resize_listinfo_matrix (gretl_matrix *m)
5417 {
5418 int newc = m->cols + 2;
5419 int i, err = 0;
5420
5421 err = gretl_matrix_realloc(m, m->rows, newc);
5422 if (!err) {
5423 for (i=0; i<m->rows; i++) {
5424 gretl_matrix_set(m, i, newc-2, 0);
5425 gretl_matrix_set(m, i, newc-1, 0);
5426 }
5427 }
5428
5429 return err;
5430 }
5431
get_iact_column(gretl_matrix * m,int i,int * err)5432 static int get_iact_column (gretl_matrix *m, int i, int *err)
5433 {
5434 int j;
5435
5436 for (j=3; j<m->cols; j+=2) {
5437 if (gretl_matrix_get(m, i, j) == 0) {
5438 return j;
5439 }
5440 }
5441
5442 /* looks like we need more columns */
5443 *err = resize_listinfo_matrix(m);
5444 return *err ? -1 : m->cols - 2;
5445 }
5446
5447 /* The (optionally) "condensed" version of the listinfo_matrix
5448 includes only primary terms (and excludes the constant).
5449 The first column of the full matrix is replaced by the
5450 position in @list of each primary term.
5451 */
5452
condense_listinfo_matrix(gretl_matrix * m,const int * list,const DATASET * dset)5453 static int condense_listinfo_matrix (gretl_matrix *m,
5454 const int *list,
5455 const DATASET *dset)
5456 {
5457 gretl_matrix *mc = NULL;
5458 char **S = NULL;
5459 double x;
5460 int i, j, ic, n = 0;
5461
5462 for (i=0; i<m->rows; i++) {
5463 if (m->val[i] == 1) {
5464 n++;
5465 }
5466 }
5467
5468 if (n == m->rows) {
5469 /* nothing to be done */
5470 return 0;
5471 }
5472
5473 mc = gretl_matrix_alloc(n, m->cols);
5474 if (mc == NULL) {
5475 return E_ALLOC;
5476 }
5477
5478 S = strings_array_new(n);
5479
5480 ic = 0;
5481 for (i=0; i<m->rows; i++) {
5482 if (m->val[i] == 1) {
5483 gretl_matrix_set(mc, ic, 0, i+1);
5484 for (j=1; j<m->cols; j++) {
5485 x = gretl_matrix_get(m, i, j);
5486 gretl_matrix_set(mc, ic, j, x);
5487 }
5488 S[ic] = gretl_strdup(dset->varname[list[i+1]]);
5489 ic++;
5490 }
5491 }
5492
5493 gretl_matrix_reuse(m, n, m->cols);
5494 gretl_matrix_copy_values(m, mc);
5495 gretl_matrix_free(mc);
5496 gretl_matrix_set_rownames(m, S);
5497
5498 return 0;
5499 }
5500
5501 static gretl_matrix *
linfo_matrix_via_labels(const int * list,const DATASET * dset,gretlopt opt,int * err)5502 linfo_matrix_via_labels (const int *list,
5503 const DATASET *dset,
5504 gretlopt opt,
5505 int *err)
5506 {
5507 gretl_matrix *ret = NULL;
5508 const char *label;
5509 char targ1[VNAMELEN];
5510 char targ2[VNAMELEN];
5511 int i, vi, j, vj;
5512 int pcol = 0, dcol = 1;
5513 int iacol, sqcol = 2;
5514 int n;
5515
5516 if (list == NULL || list[0] == 0) {
5517 *err = E_DATA;
5518 return ret;
5519 }
5520
5521 n = list[0];
5522 ret = gretl_zero_matrix_new(n, 5);
5523 if (ret == NULL) {
5524 *err = E_ALLOC;
5525 return ret;
5526 }
5527
5528 for (i=1; i<=n && !*err; i++) {
5529 /* default to series is primary */
5530 gretl_matrix_set(ret, i-1, pcol, 1);
5531 vi = list[i];
5532 if (vi == 0) {
5533 /* mark as non-primary and move on */
5534 gretl_matrix_set(ret, i-1, pcol, 0);
5535 continue;
5536 }
5537 if (gretl_isdummy(dset->t1, dset->t2, dset->Z[vi])) {
5538 /* insert dummy flag in this row */
5539 gretl_matrix_set(ret, i-1, dcol, 1);
5540 }
5541 label = series_get_label(dset, vi);
5542 if (label == NULL) {
5543 continue;
5544 }
5545 if (get_square_parent_name(label, targ1, targ2)) {
5546 /* looks like this could be a squared term */
5547 for (j=1; j<=n; j++) {
5548 if (j == i) continue;
5549 vj = list[j];
5550 if (!strcmp(targ1, dset->varname[vj]) &&
5551 validate_relationship(vj, vi, 0, dset)) {
5552 /* mark this series as non-primary, and as square */
5553 gretl_matrix_set(ret, i-1, pcol, 0);
5554 gretl_matrix_set(ret, i-1, sqcol, j);
5555 /* insert square ref in parent's row */
5556 gretl_matrix_set(ret, j-1, sqcol, i);
5557 break;
5558 }
5559 }
5560 continue;
5561 }
5562 if (get_interaction_names(label, targ1, targ2)) {
5563 /* looks like this could be an interaction term */
5564 int ia1 = 0, ia2 = 0;
5565
5566 for (j=1; j<=n; j++) {
5567 if (j == i) continue;
5568 vj = list[j];
5569 if (!strcmp(targ1, dset->varname[vj])) {
5570 ia1 = j;
5571 } else if (!strcmp(targ2, dset->varname[vj])) {
5572 ia2 = j;
5573 }
5574 }
5575 if (ia1 > 0 && ia2 > 0 &&
5576 validate_relationship(list[ia1], list[ia2], vi, dset)) {
5577 /* mark this series as non-primary, interaction */
5578 gretl_matrix_set(ret, i-1, pcol, 0);
5579 gretl_matrix_set(ret, i-1, 3, ia1);
5580 gretl_matrix_set(ret, i-1, 4, ia2);
5581 /* we may need to expand the number of columns */
5582 iacol = get_iact_column(ret, i, err);
5583 if (!*err) {
5584 /* insert cross references in parents' rows */
5585 gretl_matrix_set(ret, ia1-1, iacol, ia2);
5586 gretl_matrix_set(ret, ia1-1, iacol+1, i);
5587 gretl_matrix_set(ret, ia2-1, iacol, ia1);
5588 gretl_matrix_set(ret, ia2-1, iacol+1, i);
5589 }
5590 }
5591 }
5592 }
5593
5594 if (*err) {
5595 gretl_matrix_free(ret);
5596 ret = NULL;
5597 } else if (opt & OPT_C) {
5598 condense_listinfo_matrix(ret, list, dset);
5599 } else {
5600 /* convenience: attach series names to rows */
5601 char **S;
5602 int serr = 0;
5603
5604 S = gretl_list_get_names_array(list, dset, &serr);
5605 if (S != NULL) {
5606 gretl_matrix_set_rownames(ret, S);
5607 }
5608 }
5609
5610 return ret;
5611 }
5612
5613 static gretl_matrix *
linfo_matrix_via_data(const int * list,const DATASET * dset,gretlopt opt,int * err)5614 linfo_matrix_via_data (const int *list,
5615 const DATASET *dset,
5616 gretlopt opt,
5617 int *err)
5618 {
5619 gretl_matrix *ret = NULL;
5620 int i, vi, j, vj, k, vk;
5621 int pcol = 0, dcol = 1;
5622 int iacol, sqcol = 2;
5623 int n;
5624
5625 if (list == NULL || list[0] == 0) {
5626 *err = E_DATA;
5627 return ret;
5628 }
5629
5630 n = list[0];
5631 ret = gretl_zero_matrix_new(n, 5);
5632 if (ret == NULL) {
5633 *err = E_ALLOC;
5634 return ret;
5635 }
5636
5637 for (i=1; i<=n && !*err; i++) {
5638 int matched = 0;
5639
5640 /* default to series is primary */
5641 gretl_matrix_set(ret, i-1, pcol, 1);
5642 vi = list[i];
5643 if (vi == 0) {
5644 /* mark as non-primary and move on */
5645 gretl_matrix_set(ret, i-1, pcol, 0);
5646 continue;
5647 }
5648 if (gretl_isdummy(dset->t1, dset->t2, dset->Z[vi])) {
5649 /* insert dummy flag in this row */
5650 gretl_matrix_set(ret, i-1, dcol, 1);
5651 }
5652 for (j=1; j<=n && !matched; j++) {
5653 vj = list[j];
5654 if (j == i || vj == 0) continue;
5655 if (validate_relationship(vj, vi, 0, dset)) {
5656 /* mark this series as non-primary, square */
5657 gretl_matrix_set(ret, i-1, pcol, 0);
5658 gretl_matrix_set(ret, i-1, sqcol, j);
5659 /* insert square ref in parent's row */
5660 gretl_matrix_set(ret, j-1, sqcol, i);
5661 matched = 1;
5662 }
5663 for (k=1; k<=n && !matched; k++) {
5664 vk = list[k];
5665 if (k == i || k == j || vk == 0) continue;
5666 if (validate_relationship(vj, vk, vi, dset)) {
5667 /* mark this series as non-primary, interaction */
5668 gretl_matrix_set(ret, i-1, pcol, 0);
5669 gretl_matrix_set(ret, i-1, 3, j);
5670 gretl_matrix_set(ret, i-1, 4, k);
5671 /* we may need to expand the number of columns */
5672 iacol = get_iact_column(ret, i, err);
5673 if (!*err) {
5674 /* insert cross references in parents' rows */
5675 gretl_matrix_set(ret, j-1, iacol, k);
5676 gretl_matrix_set(ret, j-1, iacol+1, i);
5677 gretl_matrix_set(ret, k-1, iacol, j);
5678 gretl_matrix_set(ret, k-1, iacol+1, i);
5679 }
5680 matched = 1;
5681 }
5682 }
5683 }
5684 }
5685
5686 if (*err) {
5687 gretl_matrix_free(ret);
5688 ret = NULL;
5689 } else if (opt & OPT_C) {
5690 condense_listinfo_matrix(ret, list, dset);
5691 } else {
5692 /* convenience: attach series names to rows */
5693 char **S;
5694 int serr = 0;
5695
5696 S = gretl_list_get_names_array(list, dset, &serr);
5697 if (S != NULL) {
5698 gretl_matrix_set_rownames(ret, S);
5699 }
5700 }
5701
5702 return ret;
5703 }
5704
5705 /* Construct a matrix providing information about the relations
5706 between the series in @list. This will have rows equal to the
5707 number of series and at least 5 columns (shown as 1-based here).
5708 All elements of the matrix are zero unless otherwise specified.
5709
5710 col 1: Holds 1 if the series is "primary" (neither the square
5711 of another series in the list, nor the interaction of two
5712 series in the list).
5713
5714 col 2: Holds 1 if the series is a 0/1 dummy.
5715
5716 col 3: If the series is primary and its square is also
5717 present in the list, holds the list position of the square,
5718 or if the series itself is a squared term, holds the list
5719 position of the series of which it's the square.
5720
5721 cols 4, 5: If the series features in an interaction term,
5722 col 4 holds the list position of its "partner" and col 5 the
5723 list position of the interaction term. If the series features
5724 in more than one interaction term, subsequent interaction info
5725 goes into cols 6 and 7 or higher (these being added as required).
5726 If the series itself is an interaction term, cols 4 and 5 get
5727 the list positions of the two source series.
5728 */
5729
list_info_matrix(const int * list,const DATASET * dset,gretlopt opt,int * err)5730 gretl_matrix *list_info_matrix (const int *list, const DATASET *dset,
5731 gretlopt opt, int *err)
5732 {
5733 if (opt & OPT_B) {
5734 return linfo_matrix_via_data(list, dset, opt, err);
5735 } else {
5736 return linfo_matrix_via_labels(list, dset, opt, err);
5737 }
5738 }
5739
5740 #define MAP_DEBUG 0
5741
5742 #define excluded(l,i) (l != NULL && !in_gretl_list(l,i))
5743
5744 /* Given the current dataset and the $mapfile name recorded
5745 on it: get the content of $mapfile as a bundle then
5746 revise the bundle (a) to include only the features in the
5747 current sample and (b) to reflect any changes in the dataset
5748 (series added, deleted or modified). Return the modified
5749 bundle.
5750 */
5751
get_current_map(const DATASET * dset,const int * list,int * err)5752 gretl_bundle *get_current_map (const DATASET *dset,
5753 const int *list,
5754 int *err)
5755 {
5756 const char *sj, *id, *fname;
5757 gretl_bundle *fi, *pp, *jb = NULL;
5758 gretl_array *features = NULL;
5759 int ntarg, fmax = 0;
5760 int i, j, dsi, fidx;
5761 double xj;
5762
5763 fname = dataset_get_mapfile(dset);
5764
5765 if (fname == NULL) {
5766 gretl_errmsg_set("no mapfile is present");
5767 *err = E_DATA;
5768 } else if (dataset_is_resampled(dset)) {
5769 /* most unlikely */
5770 gretl_errmsg_set("dataset is resampled!");
5771 *err = E_DATA;
5772 }
5773
5774 if (!*err) {
5775 jb = gretl_bundle_read_from_file(fname, 0, err);
5776 if (!*err) {
5777 features = gretl_bundle_get_array(jb, "features", err);
5778 }
5779 }
5780
5781 if (!*err) {
5782 int nobs;
5783
5784 fmax = gretl_array_get_length(features);
5785 if (dset->submask != NULL) {
5786 nobs = get_full_length_n();
5787 } else {
5788 nobs = dset->n;
5789 }
5790 if (fmax != nobs) {
5791 /* Although it may be sub-sampled, the full dataset must
5792 have a number of observations equal to the number of
5793 features in the existing map.
5794 */
5795 gretl_errmsg_set("map and dataset are out of sync!");
5796 *err = E_DATA;
5797 }
5798 /* the number of features we're seeking */
5799 ntarg = sample_size(dset);
5800 }
5801
5802 if (*err) {
5803 gretl_bundle_destroy(jb);
5804 return NULL;
5805 }
5806
5807 #if MAP_DEBUG
5808 fprintf(stderr, "get_current_map: fmax %d, ntarg %d\n", fmax, ntarg);
5809 printf(stderr, "checking for features to include/drop\n");
5810 #endif
5811
5812 /* index into dataset rows */
5813 dsi = -1;
5814
5815 for (i=0, fidx=0; i<fmax; i++) {
5816 int skip = 0;
5817
5818 if (dset->submask != NULL) {
5819 if (dset->submask[i] == 0) {
5820 skip = 1;
5821 } else {
5822 dsi++;
5823 }
5824 } else {
5825 dsi = i;
5826 }
5827 if (dsi < dset->t1) {
5828 skip = 1;
5829 } else if (dsi > dset->t2) {
5830 /* we've got everything we need */
5831 break;
5832 }
5833 if (skip) {
5834 #if MAP_DEBUG
5835 fprintf(stderr, " drop sampled-out feature %d\n", i);
5836 #endif
5837 gretl_array_delete_element(features, fidx);
5838 } else {
5839 fi = gretl_array_get_element(features, fidx, NULL, err);
5840 pp = gretl_bundle_get_bundle(fi, "properties", err);
5841 /* clear the existing properties bundle */
5842 gretl_bundle_void_content(pp);
5843 /* and refill it from the dataset */
5844 for (j=1; j<dset->v; j++) {
5845 if (excluded(list, j)) {
5846 continue;
5847 }
5848 id = dset->varname[j];
5849 if (is_string_valued(dset, j)) {
5850 sj = series_get_string_for_obs(dset, j, dsi);
5851 gretl_bundle_set_string(pp, id, sj);
5852 } else {
5853 xj = dset->Z[j][dsi];
5854 gretl_bundle_set_scalar(pp, id, xj);
5855 }
5856 }
5857 fidx++;
5858 #if MAP_DEBUG
5859 fprintf(stderr, " included feature %d, now got %d\n", i, fidx);
5860 #endif
5861 if (fidx == ntarg) {
5862 #if MAP_DEBUG
5863 fprintf(stderr, " reached target of %d features, break\n", ntarg);
5864 #endif
5865 break;
5866 }
5867 }
5868 }
5869
5870 fmax = gretl_array_get_length(features);
5871
5872 #if MAP_DEBUG
5873 fprintf(stderr, "after loop, features array has %d elements, fidx=%d\n",
5874 fmax, fidx);
5875 #endif
5876
5877 /* delete any unwanted trailing features */
5878 for (j=fidx; j<fmax; j++) {
5879 gretl_array_delete_element(features, fidx);
5880 }
5881
5882 return jb;
5883 }
5884