1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #include "libgretl.h"
21 #include "uservar.h"
22 #include "dbwrite.h"
23 #include "libset.h"
24 #include "gretl_func.h"
25 #include "gretl_xml.h"
26 #include "gretl_panel.h"
27 #include "gretl_string_table.h"
28 #include "matrix_extra.h"
29 #include "csvdata.h"
30 #include "usermat.h"
31 
32 #include <ctype.h>
33 #include <time.h>
34 #include <errno.h>
35 #include <unistd.h>
36 
37 #include <glib.h>
38 
39 #define MERGE_DEBUG 0
40 
41 /**
42  * SECTION:dataio
43  * @short_description: data handling (internal)
44  * @title: Data support
45  * @include: gretl/libgretl.h
46  *
47  * The following data handling functions are basically internal to
48  * gretl and not in a state where they can be readily
49  * documented as public APIs.
50  *
51  */
52 
53 typedef enum {
54     GRETL_FMT_GDT,       /* standard gretl XML data */
55     GRETL_FMT_BINARY,    /* native gretl binary data */
56     GRETL_FMT_OCTAVE,    /* data in Gnu Octave format */
57     GRETL_FMT_CSV,       /* data in Comma Separated Values format */
58     GRETL_FMT_R,         /* data in Gnu R format */
59     GRETL_FMT_DAT,       /* data in PcGive format */
60     GRETL_FMT_DB,        /* gretl native database format */
61     GRETL_FMT_JM,        /* JMulti ascii data */
62     GRETL_FMT_DTA,       /* Stata .dta format */
63     GRETL_FMT_JSON       /* geojson (maps) */
64 } GretlDataFormat;
65 
66 #define IS_DATE_SEP(c) (c == '.' || c == ':' || c == ',')
67 
68 #define PROGRESS_BAR "progress_bar"
69 
70 /**
71  * get_date_x:
72  * @pd: frequency of data.
73  * @obs: observation string.
74  *
75  * Returns: the floating-point representation of @obs.
76  */
77 
get_date_x(int pd,const char * obs)78 double get_date_x (int pd, const char *obs)
79 {
80     double x = 1.0;
81 
82     if ((pd == 5 || pd == 6 || pd == 7 || pd == 52) && strlen(obs) > 4) {
83 	/* calendar data */
84 	guint32 ed = get_epoch_day(obs);
85 
86 	if (ed > 0) {
87 	    x = ed;
88 	}
89     } else {
90 	x = obs_str_to_double(obs);
91     }
92 
93     return x;
94 }
95 
real_check_varname(const char * vname,int is_series)96 static int real_check_varname (const char *vname,
97 			       int is_series)
98 {
99     int testchar = 'a';
100     int firstbad = 0;
101     int err = 0;
102 
103     gretl_error_clear();
104 
105     if (vname == NULL || *vname == '\0') {
106 	gretl_errmsg_set("Expected an identifier");
107 	return E_PARSE;
108     }
109 
110     if (strlen(vname) >= VNAMELEN) {
111 	gretl_errmsg_set(_("Varname exceeds the maximum of 31 characters"));
112 	err = E_DATA;
113     } else if (strcmp(vname, "return") && gretl_reserved_word(vname)) {
114 	err = E_DATA;
115     } else if (!(isalpha((unsigned char) *vname))) {
116 	firstbad = 1;
117 	testchar = *vname;
118         err = E_DATA;
119     } else if (is_series && (function_lookup(vname) ||
120 			     get_user_function_by_name(vname) ||
121 			     is_function_alias(vname))) {
122 	gretl_warnmsg_sprintf(_("'%s' shadows a function of the same name"),
123 			      vname);
124     } else {
125 	const char *p = vname;
126 
127 	while (*p && testchar == 'a') {
128 	    if (!(isalpha((unsigned char) *p))
129 		&& !(isdigit((unsigned char) *p))
130 		&& *p != '_') {
131 		testchar = *p;
132 		err = E_DATA;
133 	    }
134 	    p++;
135 	}
136     }
137 
138     if (err && !is_series && strlen(vname) == 2 && is_greek_letter(vname)) {
139 	return 0;
140     }
141 
142     if (testchar != 'a') {
143 	if (isprint((unsigned char) testchar)) {
144 	    if (firstbad) {
145 		gretl_errmsg_sprintf(_("First char of varname '%s' is bad\n"
146 				       "(first must be alphabetical)"),
147 				     vname);
148 	    } else {
149 		gretl_errmsg_sprintf(_("Varname '%s' contains illegal character '%c'\n"
150 				       "Use only letters, digits and underscore"),
151 				     vname, (unsigned char) testchar);
152 	    }
153 	} else {
154 	    if (firstbad) {
155 		gretl_errmsg_sprintf(_("First char of varname (0x%x) is bad\n"
156 				       "(first must be alphabetical)"),
157 				     (unsigned) testchar);
158 	    } else {
159 		gretl_errmsg_sprintf(_("Varname contains illegal character 0x%x\n"
160 				       "Use only letters, digits and underscore"),
161 				     (unsigned) testchar);
162 	    }
163 	}
164     }
165 
166     return err;
167 }
168 
169 /**
170  * check_varname:
171  * @varname: putative name for variable (or object).
172  *
173  * Check a variable/object name for legality: the name
174  * must start with a letter, and be composed of letters,
175  * numbers or the underscore character, and nothing else.
176  *
177  * Returns: 0 if name is OK, non-zero if not.
178  */
179 
check_varname(const char * varname)180 int check_varname (const char *varname)
181 {
182     return real_check_varname(varname, 1);
183 }
184 
check_identifier(const char * varname)185 int check_identifier (const char *varname)
186 {
187     /* FIXME series case? */
188     return real_check_varname(varname, 0);
189 }
190 
bad_date_string(const char * s)191 static int bad_date_string (const char *s)
192 {
193     int err = 0;
194 
195     gretl_error_clear();
196 
197     while (*s && !err) {
198 	if (!isdigit((unsigned char) *s) && !IS_DATE_SEP(*s)) {
199 	    if (isprint((unsigned char) *s)) {
200 		gretl_errmsg_sprintf(_("Bad character '%c' in date string"), *s);
201 	    } else {
202 		gretl_errmsg_sprintf(_("Bad character %d in date string"), *s);
203 	    }
204 	    err = 1;
205 	}
206 	s++;
207     }
208 
209     return err;
210 }
211 
maybe_unquote_label(char * targ,const char * src)212 static void maybe_unquote_label (char *targ, const char *src)
213 {
214     if (*src == '"' || *src == '\'') {
215 	int n;
216 
217 	strcpy(targ, src + 1);
218 	n = strlen(targ);
219 	if (n > 0 && (targ[n-1] == '"' || targ[n-1] == '\'')) {
220 	    targ[n-1] = '\0';
221 	}
222     } else {
223 	strcpy(targ, src);
224     }
225 }
226 
get_dot_pos(const char * s)227 static int get_dot_pos (const char *s)
228 {
229     int i, pos = 0;
230 
231     for (i=0; *s != '\0'; i++, s++) {
232 	if (IS_DATE_SEP(*s)) {
233 	    pos = i;
234 	    break;
235 	}
236     }
237 
238     return pos;
239 }
240 
241 #define DATES_DEBUG 0
242 
match_obs_marker(const char * s,const DATASET * dset)243 static int match_obs_marker (const char *s, const DATASET *dset)
244 {
245     char test[OBSLEN];
246     int t;
247 
248 #if DATES_DEBUG
249     fprintf(stderr, "dateton: checking '%s' against marker strings\n", s);
250 #endif
251 
252     maybe_unquote_label(test, s);
253 
254     for (t=0; t<dset->n; t++) {
255 	if (!strcmp(test, dset->S[t])) {
256 	    /* handled */
257 	    return t;
258 	}
259     }
260 
261     return -1;
262 }
263 
caldate_precheck(const char * s,int * y2,int * slashed)264 static int caldate_precheck (const char *s, int *y2, int *slashed)
265 {
266     int n = strlen(s);
267     int ok = 0;
268 
269     if (n < 8 || !isdigit(s[0]) || !isdigit(s[1])) {
270 	/* can't be date */
271 	return 0;
272     }
273 
274     if (n == 10) {
275 	if (s[4] == '-' && s[7] == '-') {
276 	    ok = 1; /* could be YYYY-MM-DD */
277 	} else if (s[4] == '/' && s[7] == '/') {
278 	    *slashed = 1; /* could be YYYY/MM/DD */
279 	    ok = 1;
280 	}
281     } else if (n == 8) {
282 	if (s[2] == '-' && s[5] == '-') {
283 	    ok = 1; /* could be YY-MM-DD */
284 	} else if (s[2] == '/' && s[5] == '/') {
285 	    *slashed = 1; /* could be YY/MM/DD */
286 	    ok = 1;
287 	}
288 	if (ok) *y2 = 1;
289     }
290 
291     return ok;
292 }
293 
datecmp(const char * s1,int y21,int slash1,const char * s2,int y22,int slash2)294 static int datecmp (const char *s1, int y21, int slash1,
295 		    const char *s2, int y22, int slash2)
296 {
297     if (y21 && !y22) {
298 	s2 += 2;
299     } else if (!y21 && y22) {
300 	s1 += 2;
301     }
302     if (slash1 && !slash2) {
303 	char revs1[OBSLEN];
304 
305 	strcpy(revs1, s1);
306 	gretl_charsub(revs1, '/', '-');
307 	return strcmp(revs1, s2);
308     } else if (slash2 && !slash1) {
309 	char revs2[OBSLEN];
310 
311 	strcpy(revs2, s2);
312 	gretl_charsub(revs2, '/', '-');
313 	return strcmp(s1, revs2);
314     } else {
315 	return strcmp(s1, s2);
316     }
317 }
318 
319 static int
real_dateton(const char * date,const DATASET * dset,int nolimit)320 real_dateton (const char *date, const DATASET *dset, int nolimit)
321 {
322     int handled = 0;
323     int t, n = -1;
324 
325     /* first check if this is calendar data and if so,
326        treat accordingly */
327 
328     if (calendar_data(dset)) {
329 #if DATES_DEBUG
330 	fprintf(stderr, "dateton: treating as calendar data\n");
331 #endif
332 	if (dataset_has_markers(dset)) {
333 	    /* "hard-wired" calendar dates as strings */
334 	    int tryit, y21 = 0, slash1 = 0;
335 	    int y22 = 0, slash2 = 0;
336 
337 	    tryit = caldate_precheck(date, &y21, &slash1);
338 	    if (!tryit) {
339 		return -1;
340 	    }
341 	    tryit = caldate_precheck(dset->S[0], &y22, &slash2);
342 	    if (!tryit) {
343 		return -1;
344 	    }
345 	    for (t=0; t<dset->n; t++) {
346 		if (!datecmp(date, y21, slash1, dset->S[t], y22, slash2)) {
347 		    /* handled */
348 		    return t;
349 		}
350 	    }
351 	    /* out of options: abort */
352 	    return -1;
353 	} else {
354 	    /* automatic calendar dates */
355 	    n = calendar_obs_number(date, dset);
356 	    handled = 1;
357 	}
358     } else if (dataset_is_daily(dset) ||
359 	       dataset_is_weekly(dset)) {
360 #if DATES_DEBUG
361 	fprintf(stderr, "dateton: trying undated time series\n");
362 #endif
363 	t = positive_int_from_string(date);
364 	if (t > 0) {
365 	    n = t - 1;
366 	    handled = 1;
367 	}
368     } else if (dataset_is_decennial(dset)) {
369 	t = positive_int_from_string(date);
370 	if (t > 0) {
371 	    n = (t - dset->sd0) / 10;
372 	    handled = 1;
373 	}
374     } else if (dataset_has_markers(dset)) {
375 	t = match_obs_marker(date, dset);
376 	if (t >= 0) {
377 	    return t;
378 	}
379 	/* else maybe just a straight obs number */
380 	t = positive_int_from_string(date);
381 	if (t > 0) {
382 	    n = t - 1;
383 	    handled = 1;
384 	}
385     }
386 
387     if (!handled) {
388 	int pos1, pos2;
389 
390 #if DATES_DEBUG
391 	fprintf(stderr, "dateton: treating %s as regular numeric obs\n",
392 		date);
393 #endif
394 	if (bad_date_string(date)) {
395 	    return -1;
396 	}
397 
398 	pos1 = get_dot_pos(date);
399 	pos2 = get_dot_pos(dset->stobs);
400 
401 	if ((pos1 && !pos2) || (pos2 && !pos1)) {
402 	    gretl_errmsg_sprintf(_("'%s': invalid observation index"),
403 				 date);
404 	} else if (!pos1 && !pos2) {
405 	    n = atoi(date) - atoi(dset->stobs);
406 	} else if (pos1 > OBSLEN - 2) {
407 	    gretl_errmsg_sprintf(_("'%s': invalid observation index"),
408 				 date);
409 	} else {
410 	    char tmp[OBSLEN];
411 	    int maj, min;
412 
413 	    *tmp = '\0';
414 	    strncat(tmp, date, OBSLEN-1);
415 	    tmp[pos1] = '\0';
416 	    maj = positive_int_from_string(tmp);
417 	    min = positive_int_from_string(tmp + pos1 + 1);
418 
419 	    if (maj <= 0 || min <= 0 || min > dset->pd) {
420 		gretl_errmsg_sprintf(_("'%s': invalid observation index"),
421 				     date);
422 		n = -1;
423 	    } else {
424 		int maj0, min0;
425 
426 		*tmp = '\0';
427 		strncat(tmp, dset->stobs, OBSLEN-1);
428 		tmp[pos2] = '\0';
429 		maj0 = atoi(tmp);
430 		min0 = atoi(tmp + pos2 + 1);
431 
432 		n = dset->pd * (maj - maj0) + (min - min0);
433 	    }
434 	}
435     }
436 
437     if (!nolimit && dset->n > 0 && n >= dset->n) {
438 	fprintf(stderr, "n = %d, dset->n = %d: out of bounds\n", n, dset->n);
439 	gretl_errmsg_set(_("Observation number out of bounds"));
440 	n = -1;
441     }
442 
443 #if DATES_DEBUG
444     fprintf(stderr, "dateton: returning %d\n", n);
445 #endif
446 
447     return n;
448 }
449 
450 /**
451  * dateton:
452  * @date: string representation of date for processing.
453  * @dset: pointer to data information struct.
454  *
455  * Determines the observation number corresponding to @date,
456  * relative to @dset. It is an error if @date represents an
457  * observation that lies outside of the full data range
458  * specified in @dset.
459  *
460  * Returns: zero-based observation number, or -1 on error.
461  */
462 
dateton(const char * date,const DATASET * dset)463 int dateton (const char *date, const DATASET *dset)
464 {
465     return real_dateton(date, dset, 0);
466 }
467 
468 /**
469  * merge_dateton:
470  * @date: string representation of date for processing.
471  * @dset: pointer to data information struct.
472  *
473  * Works just as dateton(), except that for this function it
474  * is not an error if @date represents an observation that
475  * lies beyond the data range specified in @dset. This is
476  * inended for use when merging data, or when creating a new
477  * dataset.
478  *
479  * Returns: zero-based observation number, or -1 on error.
480  */
481 
merge_dateton(const char * date,const DATASET * dset)482 int merge_dateton (const char *date, const DATASET *dset)
483 {
484     return real_dateton(date, dset, 1);
485 }
486 
panel_obs(char * s,int t,const DATASET * dset)487 static char *panel_obs (char *s, int t, const DATASET *dset)
488 {
489     int i = t / dset->pd + 1;
490     int j = (t + 1) % dset->pd;
491     int d = 1 + floor(log10(dset->pd));
492 
493     if (j == 0) {
494 	j = dset->pd;
495     }
496 
497     sprintf(s, "%d:%0*d", i, d, j);
498 
499     return s;
500 }
501 
502 /**
503  * ntolabel:
504  * @datestr: char array to which date is to be printed.
505  * @t: zero-based observation number.
506  * @dset: data information struct.
507  *
508  * Prints to @datestr (which must be at least #OBSLEN bytes)
509  * the calendar representation of observation number @t.
510  *
511  * Returns: the observation string.
512  */
513 
ntolabel(char * datestr,int t,const DATASET * dset)514 char *ntolabel (char *datestr, int t, const DATASET *dset)
515 {
516     double x;
517 
518 #if 0
519     fprintf(stderr, "ntolabel: t=%d, pd=%d, sd0=%g, incoming stobs='%s'\n",
520 	    t, dset->pd, dset->sd0, dset->stobs);
521     fprintf(stderr, " calendar_data(dset) %d\n", calendar_data(dset));
522 #endif
523 
524     if (calendar_data(dset)) {
525 	/* handles both daily and dated weekly data */
526 	if (dataset_has_markers(dset)) {
527 	    strcpy(datestr, dset->S[t]);
528 	    if (strchr(datestr, '/')) {
529 		gretl_charsub(datestr, '/', '-');
530 	    }
531 	} else {
532 	    calendar_date_string(datestr, t, dset);
533 	}
534 	return datestr;
535     } else if (dataset_is_daily(dset) ||
536 	       dataset_is_weekly(dset)) {
537 	/* undated time series */
538 	x = date_as_double(t, 1, dset->sd0);
539 	sprintf(datestr, "%d", (int) x);
540 	return datestr;
541     } else if (dataset_is_decennial(dset)) {
542 	x = dset->sd0 + 10 * t;
543 	sprintf(datestr, "%d", (int) x);
544 	return datestr;
545     } else if (dataset_is_panel(dset)) {
546 	panel_obs(datestr, t, dset);
547 	return datestr;
548     }
549 
550     x = date_as_double(t, dset->pd, dset->sd0);
551 
552     if (dset->pd == 1) {
553         sprintf(datestr, "%d", (int) x);
554     } else {
555 	int pdp = dset->pd;
556 	short len = 1;
557 	char fmt[10];
558 
559 	while ((pdp = pdp / 10)) len++;
560 	sprintf(fmt, "%%.%df", len);
561 	sprintf(datestr, fmt, x);
562 	colonize_obs(datestr);
563     }
564 
565     return datestr;
566 }
567 
568 /* print observation date in ISO 8601 extended format */
569 
ntolabel_8601(char * datestr,int t,const DATASET * dset)570 char *ntolabel_8601 (char *datestr, int t, const DATASET *dset)
571 {
572     *datestr = '\0';
573 
574     if (calendar_data(dset)) {
575 	if (dataset_has_markers(dset)) {
576 	    strcpy(datestr, dset->S[t]);
577 	} else {
578 	    calendar_date_string(datestr, t, dset);
579 	}
580     } else if (dataset_is_decennial(dset)) {
581 	double x = dset->sd0 + 10 * t;
582 	int yr = lrint(x);
583 
584 	sprintf(datestr, "%d-01-01", yr);
585     } else {
586 	double x = date_as_double(t, dset->pd, dset->sd0);
587 	int maj = lrint(floor(x));
588 
589 	if (dset->pd == 1) {
590 	    sprintf(datestr, "%d-01-01", maj);
591 	} else if (dset->pd == 12) {
592 	    int min = lrint(100 * (x - floor(x)));
593 
594 	    sprintf(datestr, "%d-%02d-01", maj, min);
595 	} else if (dset->pd == 4) {
596 	    int min = lrint(10 * (x - floor(x)));
597 	    int mo = min==2 ? 4 : min==3? 7 : min==4? 10 : min;
598 
599 	    sprintf(datestr, "%d-%02d-01", maj, mo);
600 	}
601     }
602 
603     return datestr;
604 }
605 
606 #define xround(x) (((x-floor(x))>.5)? ceil(x) : floor(x))
607 
608 /**
609  * get_subperiod:
610  * @t: zero-based observation number.
611  * @dset: data information struct.
612  * @err: location to receive error code, or NULL.
613  *
614  * For "seasonal" time series data (in a broad sense),
615  * determines the sub-period at observation @t. The "sub-period"
616  * might be a quarter, month, hour or whatever.  The value
617  * returned is zero-based (e.g. first quarter = 0).
618  * If the data are not "seasonal", 0 is returned and if
619  * @err is non-NULL it receives a non-zero error code.
620  *
621  * Returns: the sub-period.
622  */
623 
get_subperiod(int t,const DATASET * dset,int * err)624 int get_subperiod (int t, const DATASET *dset, int *err)
625 {
626     int ret = 0;
627 
628     if (!dataset_is_seasonal(dset)) {
629 	if (err != NULL) {
630 	    *err = E_PDWRONG;
631 	}
632 	return 0;
633     }
634 
635     if (dataset_is_weekly(dset)) {
636 	/* bodge -- what else to do? */
637 	ret = t % dset->pd;
638     } else if (calendar_data(dset)) {
639 	/* dated daily data */
640 	char datestr[12];
641 
642 	calendar_date_string(datestr, t, dset);
643 	ret = weekday_from_date(datestr);
644     } else if (dataset_is_daily(dset)) {
645 	/* bodge, again */
646 	ret = t % dset->pd;
647     } else {
648 	/* quarterly, monthly, hourly... */
649 	double x = date_as_double(t, dset->pd, dset->sd0);
650 	int i, d = ceil(log10(dset->pd));
651 
652 	x -= floor(x);
653 	for (i=0; i<d; i++) {
654 	    x *= 10;
655 	}
656 	ret = xround(x) - 1;
657     }
658 
659     return ret;
660 }
661 
662 /**
663  * get_precision:
664  * @x: data array.
665  * @n: length of @x.
666  * @placemax: the maximum number of decimal places to try.
667  *
668  * Find the number of decimal places required to represent a given
669  * data series uniformly and accurately, if possible.
670  *
671  * Returns: the required number of decimal places or
672  * #PMAX_NOT_AVAILABLE if it can't be done.
673  */
674 
get_precision(const double * x,int n,int placemax)675 int get_precision (const double *x, int n, int placemax)
676 {
677     int t, p, pmax = 0;
678     char *s, numstr[64];
679     int len, n_ok = 0;
680     double z;
681 
682     for (t=0; t<n; t++) {
683 	if (!na(x[t])) {
684 	    z = fabs(x[t]);
685 	    /* escape clause: numbers are too big or too small for
686 	       this treatment */
687 	    if (z > 0 && (z < 1.0e-6 || z > 1.0e+8)) {
688 		return PMAX_NOT_AVAILABLE;
689 	    }
690 	    n_ok++;
691 	}
692     }
693 
694     if (n_ok == 0) {
695 	return PMAX_NOT_AVAILABLE;
696     }
697 
698     for (t=0; t<n; t++) {
699 	if (!na(x[t])) {
700 	    p = placemax;
701 	    sprintf(numstr, "%.*f", p, fabs(x[t]));
702 	    /* go to the end and drop trailing zeros */
703 	    len = strlen(numstr);
704 	    s = numstr + len - 1;
705 	    while (*s-- == '0') {
706 		p--;
707 		len--;
708 	    }
709 	    if (len > 10) {
710 		/* this is going to be too big */
711 		return PMAX_NOT_AVAILABLE;
712 	    }
713 	    if (p > pmax) {
714 		pmax = p;
715 	    }
716 	}
717     }
718 
719     return pmax;
720 }
721 
722 struct extmap {
723     GretlFileType ftype;
724     const char *ext;
725 };
726 
727 static struct extmap data_ftype_map[] = {
728     { GRETL_XML_DATA,     ".gdt" },
729     { GRETL_BINARY_DATA,  ".gdtb" },
730     { GRETL_CSV,          ".csv" },
731     { GRETL_OCTAVE,       ".m" },
732     { GRETL_GNUMERIC,     ".gnumeric" },
733     { GRETL_XLS,          ".xls" },
734     { GRETL_XLSX,         ".xlsx" },
735     { GRETL_ODS,          ".ods" },
736     { GRETL_WF1,          ".wf1" },
737     { GRETL_DTA,          ".dta" },
738     { GRETL_SAV,          ".sav" },
739     { GRETL_SAS,          ".xpt" },
740     { GRETL_JMULTI,       ".dat" }
741 };
742 
743 static const char *map_suffixes[] = {
744     ".json",
745     ".geojson",
746     ".dbf",
747     ".shp"
748 };
749 
get_filename_extension(const char * fname)750 static const char *get_filename_extension (const char *fname)
751 {
752     const char *ext = strrchr(fname, '.');
753 
754     if (ext != NULL && strchr(ext, '/')) {
755 	/* the rightmost dot is not in the basename */
756 	ext = NULL;
757     }
758 
759 #ifdef WIN32
760     if (ext != NULL && strchr(ext, '\\')) {
761 	ext = NULL;
762     }
763 #endif
764 
765     return ext;
766 }
767 
data_file_type_from_extension(const char * ext)768 static GretlFileType data_file_type_from_extension (const char *ext)
769 {
770     int i, n = G_N_ELEMENTS(data_ftype_map);
771 
772     for (i=0; i<n; i++) {
773 	if (!g_ascii_strcasecmp(ext, data_ftype_map[i].ext)) {
774 	    return data_ftype_map[i].ftype;
775 	}
776     }
777 
778     /* a few extras */
779     if (!g_ascii_strcasecmp(ext, ".txt") ||
780 	!g_ascii_strcasecmp(ext, ".asc") ||
781 	!g_ascii_strcasecmp(ext, ".gz")) {
782 	return GRETL_CSV;
783     }
784 
785     /* map metadata */
786     n = G_N_ELEMENTS(map_suffixes);
787     for (i=0; i<n; i++) {
788 	if (!g_ascii_strcasecmp(ext, map_suffixes[i])) {
789 	    return GRETL_MAP;
790 	}
791     }
792 
793     return GRETL_UNRECOGNIZED;
794 }
795 
data_file_type_from_name(const char * fname)796 GretlFileType data_file_type_from_name (const char *fname)
797 {
798     const char *ext = strrchr(fname, '.');
799 
800     if (ext != NULL && strchr(ext, '/')) {
801 	/* the rightmost dot is not in the basename */
802 	ext = NULL;
803     }
804 
805 #ifdef WIN32
806     if (ext != NULL && strchr(ext, '\\')) {
807 	ext = NULL;
808     }
809 #endif
810 
811     if (ext != NULL) {
812 	return data_file_type_from_extension(ext);
813     }
814 
815     return GRETL_UNRECOGNIZED;
816 }
817 
818 #define non_native(o) (o & (OPT_M | OPT_R | OPT_D | OPT_G | OPT_J))
819 
820 static GretlDataFormat
format_from_opt_or_name(gretlopt opt,const char * fname,char * delim,int * add_ext,int * gzip,int * err)821 format_from_opt_or_name (gretlopt opt, const char *fname,
822 			 char *delim, int *add_ext,
823 			 int *gzip, int *err)
824 {
825     GretlDataFormat fmt = GRETL_FMT_GDT;
826 
827     if (has_suffix(fname, ".gdt")) {
828 	if (non_native(opt)) {
829 	    *err = E_BADOPT;
830 	}
831 	return GRETL_FMT_GDT;
832     } else if (has_suffix(fname, ".gdtb")) {
833 	if (non_native(opt)) {
834 	    *err = E_BADOPT;
835 	}
836 	return GRETL_FMT_BINARY;
837     } else if (has_suffix(fname, ".geojson")) {
838 	return GRETL_FMT_JSON;
839     }
840 
841     if (opt & OPT_M) {
842 	fmt = GRETL_FMT_OCTAVE;
843     } else if (opt & OPT_R) {
844 	fmt = GRETL_FMT_R;
845     } else if (opt & OPT_D) {
846 	fmt = GRETL_FMT_DB;
847     } else if (opt & OPT_G) {
848 	fmt = GRETL_FMT_DAT;
849     } else if (opt & OPT_J) {
850 	fmt = GRETL_FMT_JM;
851     }
852 
853     if (fmt == GRETL_FMT_GDT) {
854 	if (has_suffix(fname, ".R")) {
855 	    fmt = GRETL_FMT_R;
856 	} else if (has_suffix(fname, ".csv")) {
857 	    fmt = GRETL_FMT_CSV;
858 	} else if (has_suffix(fname, ".csv.gz")) {
859 	    fmt = GRETL_FMT_CSV;
860 	    *gzip = 1;
861 	} else if (has_suffix(fname, ".m")) {
862 	    fmt = GRETL_FMT_OCTAVE;
863 	} else if (has_suffix(fname, ".txt") ||
864 		   has_suffix(fname, ".asc")) {
865 	    fmt = GRETL_FMT_CSV;
866 	    *delim = ' ';
867 	} else if (has_suffix(fname, ".dta")) {
868 	    fmt = GRETL_FMT_DTA;
869 	} else if (has_suffix(fname, ".bin")) {
870 	    fmt = GRETL_FMT_DB;
871 	}
872     }
873 
874     if (fmt == GRETL_FMT_GDT) {
875 	*add_ext = 1;
876     }
877 
878     return fmt;
879 }
880 
date_maj_min(int t,const DATASET * dset,int * maj,int * min)881 void date_maj_min (int t, const DATASET *dset, int *maj, int *min)
882 {
883     char obs[OBSLEN];
884 
885     ntolabel(obs, t, dset);
886 
887     if (maj != NULL) {
888 	*maj = atoi(obs);
889     }
890 
891     if (min != NULL) {
892 	char *s, sep = ':';
893 
894 	if (strchr(obs, sep) == NULL) {
895 	    if (dset->pd == 4 && strchr(obs, 'Q')) {
896 		sep = 'Q';
897 	    } else if (dset->pd == 12 && strchr(obs, 'M')) {
898 		sep = 'M';
899 	    }
900 	}
901 
902 	s = strchr(obs, sep);
903 	if (s != NULL && strlen(s) > 1) {
904 	    *min = atoi(s + 1);
905 	} else {
906 	    *min = 1;
907 	}
908     }
909 }
910 
911 #define NO_PMAX(p,k) (p == NULL || p[k-1] == PMAX_NOT_AVAILABLE)
912 
913 #define TMPLEN 64
914 
csv_data_out(const DATASET * dset,const int * list,int print_obs,int digits,char decpoint,char delim,FILE * fp,gzFile fz)915 static void csv_data_out (const DATASET *dset, const int *list,
916 			  int print_obs, int digits, char decpoint,
917 			  char delim, FILE *fp, gzFile fz)
918 {
919     const char *NA = get_csv_na_write_string();
920     char tmp[TMPLEN];
921     double xt;
922     int popit = 0, dotsub = 0;
923     int t, i, vi;
924 
925     if (decpoint == '.' && get_local_decpoint() == ',') {
926 	gretl_push_c_numeric_locale();
927 	popit = 1;
928     } else if (decpoint == ',' && get_local_decpoint() == '.') {
929 	dotsub = 1;
930     }
931 
932     for (t=dset->t1; t<=dset->t2; t++) {
933 	if (print_obs) {
934 	    if (dset->S != NULL) {
935 		if (fz != NULL) {
936 		    gzprintf(fz, "\"%s\"%c", dset->S[t], delim);
937 		} else {
938 		    fprintf(fp, "\"%s\"%c", dset->S[t], delim);
939 		}
940 	    } else {
941 		ntolabel(tmp, t, dset);
942 		if (quarterly_or_monthly(dset)) {
943 		    modify_date_for_csv(tmp, dset->pd);
944 		}
945 		if (fz != NULL) {
946 		    gzprintf(fz, "%s%c", tmp, delim);
947 		} else {
948 		    fprintf(fp, "%s%c", tmp, delim);
949 		}
950 	    }
951 	}
952 
953 	for (i=1; i<=list[0]; i++) {
954 	    vi = list[i];
955 	    xt = dset->Z[vi][t];
956 	    if (na(xt)) {
957 		fputs(NA, fp);
958 	    } else {
959 		if (is_string_valued(dset, vi)) {
960 		    const char *st;
961 
962 		    st = series_get_string_for_obs(dset, vi, t);
963 		    if (st != NULL) {
964 			*tmp = '\0';
965 			strcat(tmp, "\"");
966 			strncat(tmp, st, TMPLEN - 3);
967 			strcat(tmp, "\"");
968 		    } else {
969 			fprintf(stderr, "missing string at t=%d, vi=%d, xt=%g\n",
970 				t, vi, xt);
971 			strcpy(tmp, "\"NA\"");
972 		    }
973 		} else if (series_is_coded(dset, vi)) {
974 		    sprintf(tmp, "\"%d\"", (int) xt);
975 		} else {
976 		    sprintf(tmp, "%.*g", digits, xt);
977 		}
978 		if (dotsub) {
979 		    gretl_charsub(tmp, '.', ',');
980 		}
981 		if (fz != NULL) {
982 		    gzputs(fz, tmp);
983 		} else {
984 		    fputs(tmp, fp);
985 		}
986 	    }
987 	    if (fz != NULL) {
988 		gzputc(fz, i < list[0] ? delim : '\n');
989 	    } else {
990 		fputc(i < list[0] ? delim : '\n', fp);
991 	    }
992 	}
993     }
994 
995     if (popit) {
996 	gretl_pop_c_numeric_locale();
997     }
998 }
999 
markers_are_unique(const DATASET * dset)1000 static int markers_are_unique (const DATASET *dset)
1001 {
1002     int t, s;
1003 
1004     for (t=dset->t1; t<dset->t2; t++) {
1005 	for (s=t+1; s<=dset->t2; s++) {
1006 	    if (strcmp(dset->S[t], dset->S[s]) == 0) {
1007 		return 0;
1008 	    }
1009 	}
1010     }
1011 
1012     return 1;
1013 }
1014 
R_data_out(const DATASET * dset,const int * list,int digits,FILE * fp)1015 static void R_data_out (const DATASET *dset, const int *list,
1016 			int digits, FILE *fp)
1017 {
1018     int print_markers = 0;
1019     double xt;
1020     int t, i, vi;
1021 
1022     if (dset->S != NULL) {
1023 	print_markers = markers_are_unique(dset);
1024     }
1025 
1026     for (t=dset->t1; t<=dset->t2; t++) {
1027 	if (print_markers) {
1028 	    fprintf(fp, "\"%s\" ", dset->S[t]);
1029 	}
1030 	for (i=1; i<=list[0]; i++) {
1031 	    vi = list[i];
1032 	    xt = dset->Z[vi][t];
1033 	    if (na(xt)) {
1034 		fputs("NA", fp);
1035 	    } else if (is_string_valued(dset, vi)) {
1036 		fprintf(fp, "\"%s\"", series_get_string_for_obs(dset, vi, t));
1037 	    } else if (series_is_coded(dset, vi)) {
1038 		fprintf(fp, "\"%d\"", (int) xt);
1039 	    } else {
1040 		fprintf(fp, "%.*g", digits, xt);
1041 	    }
1042 	    fputc(i < list[0] ? ' ' : '\n', fp);
1043 	}
1044     }
1045 }
1046 
write_dta_data(const char * fname,const int * list,gretlopt opt,const DATASET * dset)1047 static int write_dta_data (const char *fname, const int *list,
1048 			   gretlopt opt, const DATASET *dset)
1049 {
1050     int (*exporter) (const char *, const int *, gretlopt,
1051 		     const DATASET *);
1052     int err = 0;
1053 
1054     exporter = get_plugin_function("stata_export");
1055 
1056     if (exporter == NULL) {
1057         err = 1;
1058     } else {
1059 	err = (*exporter)(fname, list, opt, dset);
1060     }
1061 
1062     return err;
1063 }
1064 
write_map_data(const char * fname,const int * list,const DATASET * dset)1065 static int write_map_data (const char *fname,
1066 			   const int *list,
1067 			   const DATASET *dset)
1068 {
1069     gretl_bundle *b = NULL;
1070     int err = 0;
1071 
1072     b = get_current_map(dset, list, &err);
1073 
1074     if (!err) {
1075 	err = gretl_bundle_write_to_file(b, fname, 0);
1076     }
1077 
1078     gretl_bundle_destroy(b);
1079 
1080     return err;
1081 }
1082 
1083 #define DEFAULT_CSV_DIGITS 15
1084 
real_write_data(const char * fname,int * list,const DATASET * dset,gretlopt opt,int progress,PRN * prn)1085 static int real_write_data (const char *fname, int *list,
1086 			    const DATASET *dset, gretlopt opt,
1087 			    int progress, PRN *prn)
1088 {
1089     int i, t, v, l0;
1090     GretlDataFormat fmt;
1091     char datfile[MAXLEN];
1092     int n = dset->n;
1093     int pop_locale = 0;
1094     char delim = 0;
1095     FILE *fp = NULL;
1096     gzFile fz = NULL;
1097     int freelist = 0;
1098     int csv_digits = 0;
1099     int add_ext = 0;
1100     int gzip = 0;
1101     double xx;
1102     int err = 0;
1103 
1104     gretl_error_clear();
1105 
1106     if (list != NULL && list[0] == 0) {
1107 	return E_ARGS;
1108     }
1109 
1110     fmt = format_from_opt_or_name(opt, fname, &delim, &add_ext,
1111 				  &gzip, &err);
1112     if (err) {
1113 	return err;
1114     }
1115 
1116     if (list == NULL) {
1117 	list = full_var_list(dset, &l0);
1118 	if (l0 == 0) {
1119 	    return E_ARGS;
1120 	} else if (list == NULL) {
1121 	    return E_ALLOC;
1122 	} else {
1123 	    freelist = 1;
1124 	}
1125     }
1126 
1127     l0 = list[0];
1128     fname = gretl_maybe_switch_dir(fname);
1129 
1130     if (fmt == GRETL_FMT_GDT || fmt == GRETL_FMT_BINARY) {
1131 	/* write native data file (.gdt or .gdtb) */
1132 	err = gretl_write_gdt(fname, list, dset, opt, progress);
1133 	goto write_exit;
1134     }
1135 
1136     if (fmt == GRETL_FMT_DB) {
1137 	/* native type database file */
1138 	err = write_db_data(fname, list, opt, dset);
1139 	goto write_exit;
1140     }
1141 
1142     if (fmt == GRETL_FMT_DTA) {
1143 	/* Stata */
1144 	err = write_dta_data(fname, list, opt, dset);
1145 	goto write_exit;
1146     }
1147 
1148     if (fmt == GRETL_FMT_JSON) {
1149 	/* writing map as geojson */
1150 	err = write_map_data(fname, list, dset);
1151 	goto write_exit;
1152     }
1153 
1154     strcpy(datfile, fname);
1155 
1156     /* open file for output */
1157     if (gzip) {
1158 	fz = gretl_gzopen(datfile, "wb1");
1159     } else {
1160 	fp = gretl_fopen(datfile, "wb");
1161     }
1162     if (fp == NULL && fz == NULL) {
1163 	err = E_FOPEN;
1164 	goto write_exit;
1165     }
1166 
1167     csv_digits = libset_get_int(CSV_DIGITS);
1168 
1169     if (csv_digits <= 0) {
1170 	csv_digits = DEFAULT_CSV_DIGITS;
1171     }
1172 
1173     if (fmt != GRETL_FMT_CSV) {
1174 	/* ensure C locale for data output */
1175 	gretl_push_c_numeric_locale();
1176 	pop_locale = 1;
1177     }
1178 
1179     if (fmt == GRETL_FMT_CSV) {
1180 	const char *msg = get_optval_string(STORE, OPT_E);
1181 	char decpoint = get_data_export_decpoint();
1182 	int print_obs = 0;
1183 
1184 	if (opt & OPT_I) {
1185 	    /* the CSV --decimal-comma option */
1186 	    decpoint = ',';
1187 	    delim = ';';
1188 	} else if (delim == 0) {
1189 	    delim = get_data_export_delimiter();
1190 	}
1191 
1192 	if (msg != NULL && *msg != '\0') {
1193 	    if (gzip) {
1194 		gzprintf(fz, "# %s\n", msg);
1195 	    } else {
1196 		fprintf(fp, "# %s\n", msg);
1197 	    }
1198 	}
1199 
1200 	if (!(opt & OPT_X)) {
1201 	    /* OPT_X prohibits printing of observation strings */
1202 	    print_obs = dataset_is_time_series(dset) || dset->S != NULL;
1203 	}
1204 
1205 	if (!(opt & OPT_N)) {
1206 	    /* header: variable names */
1207 	    if (print_obs && (dset->S != NULL || dset->structure != CROSS_SECTION)) {
1208 		if (gzip) {
1209 		    gzprintf(fz, "obs%c", delim);
1210 		} else {
1211 		    fprintf(fp, "obs%c", delim);
1212 		}
1213 	    }
1214 	    for (i=1; i<l0; i++) {
1215 		if (gzip) {
1216 		    gzprintf(fz, "%s%c", dset->varname[list[i]], delim);
1217 		} else {
1218 		    fprintf(fp, "%s%c", dset->varname[list[i]], delim);
1219 		}
1220 	    }
1221 	    if (gzip) {
1222 		gzprintf(fz, "%s\n", dset->varname[list[l0]]);
1223 	    } else {
1224 		fprintf(fp, "%s\n", dset->varname[list[l0]]);
1225 	    }
1226 	}
1227 
1228 	csv_data_out(dset, list, print_obs, csv_digits,
1229 		     decpoint, delim, fp, fz);
1230     } else if (fmt == GRETL_FMT_R) {
1231 	/* friendly to GNU R */
1232 	if (dataset_is_time_series(dset)) {
1233 	    char datestr[OBSLEN];
1234 
1235 	    ntolabel(datestr, dset->t1, dset);
1236 	    fprintf(fp, "# time-series data: start = %s, frequency = %d\n",
1237 		    datestr, dset->pd);
1238 	}
1239 
1240 	for (i=1; i<l0; i++) {
1241 	    fprintf(fp, "%s ", dset->varname[list[i]]);
1242 	}
1243 	fprintf(fp, "%s\n", dset->varname[list[l0]]);
1244 
1245 	R_data_out(dset, list, csv_digits, fp);
1246     } else if (fmt == GRETL_FMT_OCTAVE) {
1247 	/* GNU Octave: write out data as several matrices (one per
1248 	   series) in the same file */
1249 
1250 	for (i=1; i<=list[0]; i++) {
1251 	    v = list[i];
1252 	    fprintf(fp, "# name: %s\n# type: matrix\n# rows: %d\n# columns: 1\n",
1253 		    dset->varname[v], n);
1254 	    for (t=dset->t1; t<=dset->t2; t++) {
1255 		xx = dset->Z[v][t];
1256 		if (na(xx)) {
1257 		    fputs("NaN ", fp);
1258 		} else {
1259 		    fprintf(fp, "%.*g\n", csv_digits, xx);
1260 		}
1261 	    }
1262 	}
1263     } else if (fmt == GRETL_FMT_DAT) {
1264 	/* PcGive: data file with load info */
1265 	int pd = dset->pd;
1266 
1267 	for (i=1; i<=list[0]; i++) {
1268 	    fprintf(fp, ">%s ", dset->varname[list[i]]);
1269 	    if (dset->structure == TIME_SERIES &&
1270 		(pd == 1 || pd == 4 || pd == 12)) {
1271 		int maj, min;
1272 
1273 		date_maj_min(dset->t1, dset, &maj, &min);
1274 		fprintf(fp, "%d %d ", maj, min);
1275 		date_maj_min(dset->t2, dset, &maj, &min);
1276 		fprintf(fp, "%d %d %d", maj, min, pd);
1277 	    } else {
1278 		fprintf(fp, "%d 1 %d 1 1", dset->t1, dset->t2);
1279 	    }
1280 
1281 	    fputc('\n', fp);
1282 
1283 	    for (t=dset->t1; t<=dset->t2; t++) {
1284 		v = list[i];
1285 		xx = dset->Z[v][t];
1286 		if (na(xx)) {
1287 		    fprintf(fp, "-9999.99");
1288 		} else {
1289 		    fprintf(fp, "%.*g", csv_digits, xx);
1290 		}
1291 		fputc('\n', fp);
1292 	    }
1293 	    fputc('\n', fp);
1294 	}
1295     } else if (fmt == GRETL_FMT_JM) {
1296 	/* JMulti: ascii with comments and date info */
1297 	const char *vlabel;
1298 	int maj, min;
1299 
1300 	fputs("/*\n", fp);
1301 	for (i=1; i<=list[0]; i++) {
1302 	    v = list[i];
1303 	    vlabel = series_get_label(dset, v);
1304 	    fprintf(fp, " %s: %s\n", dset->varname[v],
1305 		    vlabel == NULL ? "" : vlabel);
1306 	}
1307 	fputs("*/\n", fp);
1308 	date_maj_min(dset->t1, dset, &maj, &min);
1309 	if (dset->pd == 4 || dset->pd == 12) {
1310 	    fprintf(fp, "<%d %c%d>\n", maj, (dset->pd == 4)? 'Q' : 'M', min);
1311 	} else if (dset->pd == 1) {
1312 	    fprintf(fp, "<%d>\n", maj);
1313 	} else {
1314 	    fputs("<1>\n", fp);
1315 	}
1316 	for (i=1; i<=list[0]; i++) {
1317 	    v = list[i];
1318 	    fprintf(fp, " %s", dset->varname[v]);
1319 	}
1320 	fputc('\n', fp);
1321 	for (t=dset->t1; t<=dset->t2; t++) {
1322 	    for (i=1; i<=list[0]; i++) {
1323 		v = list[i];
1324 		if (na(dset->Z[v][t])) {
1325 		    fputs("NaN ", fp);
1326 		} else {
1327 		    fprintf(fp, "%.*g ", csv_digits, dset->Z[v][t]);
1328 		}
1329 	    }
1330 	    fputc('\n', fp);
1331 	}
1332     }
1333 
1334     if (pop_locale) {
1335 	gretl_pop_c_numeric_locale();
1336     }
1337 
1338     if (fz != NULL) {
1339 	gzclose(fz);
1340     } else if (fp != NULL) {
1341 	fclose(fp);
1342     }
1343 
1344  write_exit:
1345 
1346     if (!err && prn != NULL) {
1347 	if (add_ext) {
1348 	    pprintf(prn, _("wrote %s.gdt\n"), fname);
1349 	} else {
1350 	    pprintf(prn, _("wrote %s\n"), fname);
1351 	}
1352     }
1353 
1354     if (freelist) {
1355 	free(list);
1356     }
1357 
1358     return err;
1359 }
1360 
1361 /**
1362  * write_data:
1363  * @fname: name of file to write.
1364  * @list: list of variables to write (or %NULL to write all series).
1365  * @dset: dataset struct.
1366  * @opt: option flag indicating format in which to write the data.
1367  * @prn: gretl printer or NULL.
1368  *
1369  * Write out a data file containing the values of the given set
1370  * of variables.
1371  *
1372  * Returns: 0 on successful completion, non-zero on error.
1373  */
1374 
write_data(const char * fname,int * list,const DATASET * dset,gretlopt opt,PRN * prn)1375 int write_data (const char *fname, int *list, const DATASET *dset,
1376 		gretlopt opt, PRN *prn)
1377 {
1378     return real_write_data(fname, list, dset, opt, 0, prn);
1379 }
1380 
gui_write_data(const char * fname,int * list,const DATASET * dset,gretlopt opt)1381 int gui_write_data (const char *fname, int *list, const DATASET *dset,
1382 		    gretlopt opt)
1383 {
1384     return real_write_data(fname, list, dset, opt, 1, NULL);
1385 }
1386 
1387 /**
1388  * is_gzipped:
1389  * @fname: filename to examine.
1390  *
1391  * Determine if the given file is gzipped.
1392  *
1393  * Returns: 1 in case of a gzipped file, 0 if not gzipped or
1394  * inaccessible.
1395  *
1396  */
1397 
is_gzipped(const char * fname)1398 int is_gzipped (const char *fname)
1399 {
1400     FILE *fp;
1401     int gz = 0;
1402 
1403     if (fname == NULL || *fname == '\0') {
1404 	return 0;
1405     }
1406 
1407     fp = gretl_fopen(fname, "rb");
1408     if (fp == NULL) {
1409 	return 0;
1410     }
1411 
1412     if (fgetc(fp) == 037 && fgetc(fp) == 0213) {
1413 	gz = 1;
1414     }
1415 
1416     fclose(fp);
1417 
1418     return gz;
1419 }
1420 
1421 /**
1422  * gretl_get_data:
1423  * @fname: name of file to try.
1424  * @dset: dataset struct.
1425  * @opt: option flags.
1426  * @prn: where messages should be written.
1427  *
1428  * Read "native" data from file into gretl's work space,
1429  * allocating space as required. This function handles
1430  * both native XML data format and native binary format.
1431  * It also handles incomplete information: it can perform
1432  * path-searching on @fname, and will try adding the .gdt
1433  * or .gdtb extension to @fname if this is not given.
1434  *
1435  * Note that a more straightforward function for reading a
1436  * native gretl data file, given the correct path, is
1437  * gretl_read_gdt().
1438  *
1439  * The only applicable option is that @opt may contain
1440  * OPT_T when appending data to a panel dataset: in
1441  * that case we try to interpret the new data as time
1442  * series, in common across all panel units. In most
1443  * cases, just give OPT_NONE.
1444  *
1445  * Returns: 0 on successful completion, non-zero otherwise.
1446  */
1447 
gretl_get_data(char * fname,DATASET * dset,gretlopt opt,PRN * prn)1448 int gretl_get_data (char *fname, DATASET *dset,
1449 		    gretlopt opt, PRN *prn)
1450 {
1451     gretlopt append_opt = OPT_NONE;
1452     int gdtsuff;
1453     char *test;
1454     int err = 0;
1455 
1456     gretl_error_clear();
1457 
1458 #if 0
1459     fprintf(stderr, "gretl_get_data: calling addpath\n");
1460 #endif
1461 
1462     test = gretl_addpath(fname, 0);
1463     if (test == NULL) {
1464 	return E_FOPEN;
1465     }
1466 
1467     gdtsuff = has_native_data_suffix(fname);
1468 
1469     if (opt & OPT_T) {
1470 	append_opt = OPT_T;
1471     }
1472 
1473     if (gdtsuff) {
1474 	/* specific processing for gretl datafiles  */
1475 	err = gretl_read_gdt(fname, dset, append_opt, prn);
1476     } else {
1477 	/* try fallback to a "csv"-type import */
1478 	err = import_csv(fname, dset, append_opt, prn);
1479     }
1480 
1481     return err;
1482 }
1483 
1484 /**
1485  * open_nulldata:
1486  * @dset: dataset struct.
1487  * @data_status: indicator for whether a data file is currently open
1488  * in gretl's work space (1) or not (0).
1489  * @length: desired length of data series.
1490  * @opt: may contain OPT_N to suppress addition of an index series.
1491  * @prn: gretl printing struct.
1492  *
1493  * Create an empty "dummy" data set, suitable for simulations.
1494  *
1495  * Returns: 0 on successful completion, non-zero otherwise.
1496  *
1497  */
1498 
open_nulldata(DATASET * dset,int data_status,int length,gretlopt opt,PRN * prn)1499 int open_nulldata (DATASET *dset, int data_status, int length,
1500 		   gretlopt opt, PRN *prn)
1501 {
1502     int t;
1503 
1504     /* clear any existing data info */
1505     if (data_status) {
1506 	clear_datainfo(dset, CLEAR_FULL);
1507     }
1508 
1509     /* dummy up the data info */
1510     dset->n = length;
1511     dset->v = (opt & OPT_N)? 1 : 2;
1512     dataset_obs_info_default(dset);
1513 
1514     if (dataset_allocate_varnames(dset)) {
1515 	return E_ALLOC;
1516     }
1517 
1518     /* allocate data storage */
1519     if (allocate_Z(dset, 0)) {
1520 	return E_ALLOC;
1521     }
1522 
1523     if (dset->v > 1) {
1524 	/* add an index var */
1525 	strcpy(dset->varname[1], "index");
1526 	series_set_label(dset, 1, _("index variable"));
1527 	for (t=0; t<dset->n; t++) {
1528 	    dset->Z[1][t] = (double) (t + 1);
1529 	}
1530     }
1531 
1532     if (prn != NULL && gretl_messages_on()) {
1533 	/* print basic info */
1534 	pprintf(prn, _("periodicity: %d, maxobs: %d\n"
1535 		       "observations range: %s to %s\n"),
1536 		dset->pd, dset->n, dset->stobs, dset->endobs);
1537     }
1538 
1539     /* Set sample range to entire length of data-set by default */
1540     dset->t1 = 0;
1541     dset->t2 = dset->n - 1;
1542 
1543     return 0;
1544 }
1545 
extend_markers(DATASET * dset,int old_n,int new_n)1546 static int extend_markers (DATASET *dset, int old_n, int new_n)
1547 {
1548     char **S = realloc(dset->S, new_n * sizeof *S);
1549     int t, err = 0;
1550 
1551     if (S == NULL) {
1552 	err = 1;
1553     } else {
1554 	dset->S = S;
1555 	for (t=old_n; t<new_n && !err; t++) {
1556 	    S[t] = malloc(OBSLEN);
1557 	    if (S[t] == NULL) {
1558 		err = 1;
1559 	    }
1560 	}
1561     }
1562 
1563     return err;
1564 }
1565 
merge_error(const char * msg,PRN * prn)1566 static void merge_error (const char *msg, PRN *prn)
1567 {
1568     pputs(prn, msg);
1569     if (!printing_to_standard_stream(prn)) {
1570 	gretl_errmsg_set(msg);
1571     }
1572 }
1573 
merge_name_error(const char * objname,PRN * prn)1574 static void merge_name_error (const char *objname, PRN *prn)
1575 {
1576     gchar *msg;
1577 
1578     msg = g_strdup_printf("Can't replace %s with a series", objname);
1579     pprintf(prn, "%s\n", msg);
1580     if (!printing_to_standard_stream(prn)) {
1581 	gretl_errmsg_set(msg);
1582     }
1583     g_free(msg);
1584 }
1585 
count_new_vars(const DATASET * d1,const DATASET * d2,PRN * prn)1586 static int count_new_vars (const DATASET *d1, const DATASET *d2,
1587 			   PRN *prn)
1588 {
1589     const char *vname;
1590     int addvars = d2->v - 1;
1591     int i, j;
1592 
1593     /* We start by assuming that all the series in @d2 are new,
1594        then subtract those we find to be already present. We also
1595        check for collision between the names of series to be added and
1596        the names of existing objects other than series.
1597     */
1598 
1599     for (i=1; i<d2->v && addvars >= 0; i++) {
1600 	vname = d2->varname[i];
1601 	if (gretl_is_user_var(vname)) {
1602 	    merge_name_error(vname, prn);
1603 	    addvars = -1;
1604 	} else if (gretl_function_depth() > 0) {
1605 	    if (current_series_index(d1, vname) > 0) {
1606 		addvars--;
1607 	    }
1608 	} else {
1609 	    for (j=1; j<d1->v; j++) {
1610 		if (!strcmp(vname, d1->varname[j])) {
1611 		    addvars--;
1612 		    break;
1613 		}
1614 	    }
1615 	}
1616     }
1617 
1618 #if MERGE_DEBUG
1619     if (gretl_function_depth() == 0) {
1620 	int found;
1621 
1622 	for (i=1; i<d2->v; i++) {
1623 	    found = 0;
1624 	    for (j=1; j<d1->v && !found; j++) {
1625 		if (!strcmp(d2->varname[i], d1->varname[j])) {
1626 		    found = 1;
1627 		}
1628 	    }
1629 	    if (!found) {
1630 		fprintf(stderr, "'%s' in import but not current dataset\n",
1631 			d2->varname[i]);
1632 	    }
1633 	}
1634     }
1635 #endif
1636 
1637     return addvars;
1638 }
1639 
year_special_markers(const DATASET * dset,const DATASET * addset)1640 static int year_special_markers (const DATASET *dset,
1641 				 const DATASET *addset)
1642 {
1643     char *test;
1644     int overlap = 0;
1645     int i, t, err = 0;
1646 
1647     /* See if we can match obs markers in @addset
1648        against years in @dset: we'll try this if all
1649        the markers in addset are integer strings, at
1650        least some of them are within the obs range of
1651        @dset, and none of them are outside of the
1652        "sanity" range of 1 to 2500.
1653     */
1654 
1655     if (!dataset_is_time_series(dset) || dset->pd != 1) {
1656 	return 0;
1657     }
1658 
1659     if (dset->markers || !addset->markers) {
1660 	return 0;
1661     }
1662 
1663     errno = 0;
1664 
1665     for (i=0; i<addset->n; i++) {
1666 	t = strtol(addset->S[i], &test, 10);
1667 	if (*test || errno) {
1668 	    errno = 0;
1669 	    err = 1;
1670 	    break;
1671 	}
1672 	if (t < 1 || t > 2500) {
1673 	    err = 1;
1674 	    break;
1675 	}
1676 	if (!overlap) {
1677 	    t = dateton(addset->S[i], dset);
1678 	    if (t >= 0 && t < dset->n) {
1679 		overlap = 1;
1680 	    }
1681 	}
1682     }
1683 
1684     return !err && overlap;
1685 }
1686 
compare_ranges(const DATASET * targ,const DATASET * src,int newvars,int * offset,int * yrspecial,int * err)1687 static int compare_ranges (const DATASET *targ,
1688 			   const DATASET *src,
1689 			   int newvars,
1690 			   int *offset,
1691 			   int *yrspecial,
1692 			   int *err)
1693 {
1694     int ed0 = dateton(targ->endobs, targ);
1695     int sd1, ed1, addobs = -1;
1696     int range_err = 0;
1697 
1698     if (dataset_is_cross_section(targ) &&
1699 	dataset_is_cross_section(src) &&
1700 	!(targ->markers && src->markers)) {
1701 	if (newvars == 0) {
1702 	    if (src->markers) {
1703 		/* pass the problem on to just_append_rows */
1704 		return 0;
1705 	    } else {
1706 		/* assume the new data should be appended length-wise */
1707 		*offset = ed0 + 1;
1708 		return src->n;
1709 	    }
1710 	} else {
1711 	    /* we've already determined that the series length in
1712 	       @src doesn't match either the full series length or
1713 	       the current sample range in @targ; we therefore have
1714 	       no information with which to match rows for new
1715 	       series
1716 	    */
1717 	    gretl_errmsg_set(_("append: don't know how to align the new series!"));
1718 	    *err = E_DATA;
1719 	    return -1;
1720 	}
1721     }
1722 
1723     sd1 = merge_dateton(src->stobs, targ);
1724     ed1 = merge_dateton(src->endobs, targ);
1725 
1726 #if DATES_DEBUG
1727     fprintf(stderr, "compare_ranges:\n"
1728 	    " targ->n = %d, src->n = %d\n"
1729 	    " targ->stobs = '%s', src->stobs = '%s'\n"
1730 	    " sd1 = %d, ed1 = %d\n",
1731 	    targ->n, src->n, targ->stobs, src->stobs,
1732 	    sd1, ed1);
1733 #endif
1734 
1735     if (sd1 < 0) {
1736 	/* case: new data start earlier than old */
1737 	if (ed1 < 0) {
1738 	    range_err = 1;
1739 	} else if (ed1 > ed0) {
1740 	    range_err = 2;
1741 	} else {
1742 	    *offset = sd1;
1743 	    addobs = 0;
1744 	}
1745     } else if (sd1 == 0 && ed1 == ed0) {
1746 	/* case: exact match of ranges */
1747 	*offset = 0;
1748 	addobs = 0;
1749     } else if (sd1 == 0) {
1750 	/* case: starting obs the same */
1751 	*offset = 0;
1752 	if (ed1 > ed0) {
1753 	    addobs = ed1 - ed0;
1754 	} else {
1755 	    addobs = 0;
1756 	}
1757     } else if (sd1 == ed0 + 1) {
1758 	/* case: new data start right after end of old */
1759 	*offset = sd1;
1760 	addobs = src->n;
1761     } else if (sd1 > 0) {
1762 	/* case: new data start later than old */
1763 	if (sd1 <= ed0) {
1764 	    /* but there's some overlap */
1765 	    *offset = sd1;
1766 	    if (ed1 > ed0) {
1767 		addobs = ed1 - ed0;
1768 	    } else {
1769 		addobs = 0;
1770 	    }
1771 	}
1772     }
1773 
1774     if (range_err) {
1775 	/* try another approach? */
1776 	*yrspecial = year_special_markers(targ, src);
1777 	if (*yrspecial) {
1778 	    addobs = 0;
1779 	}
1780     }
1781 
1782     if (addobs < 0) {
1783 	if (range_err == 1) {
1784 	    fputs("compare_ranges: no overlap, can't merge\n", stderr);
1785 	} else if (range_err == 2) {
1786 	    fputs("compare ranges: new data start earlier, end later\n", stderr);
1787 	} else {
1788 	    fputs("compare_ranges: flagging error\n", stderr);
1789 	}
1790     }
1791 
1792     return addobs;
1793 }
1794 
1795 /* Determine whether there's any overlap between the calendar
1796    in @addset and that in @dset. Return 0 on success (there is
1797    an overlap), non-zero otherwise.
1798 */
1799 
check_for_overlap(const DATASET * dset,const DATASET * addset,int * offset)1800 static int check_for_overlap (const DATASET *dset,
1801 			      const DATASET *addset,
1802 			      int *offset)
1803 {
1804     int at1 = merge_dateton(addset->stobs, dset);
1805     int at2 = merge_dateton(addset->endobs, dset);
1806 
1807     if (!(at1 >= dset->n) && !(at2 < 0)) {
1808 	/* OK, there must be some overlap */
1809 	*offset = at1;
1810 	return 0;
1811     } else {
1812 	/* either the "add" data start after the original data end,
1813 	   or they end before the originals start, no there's no
1814 	   overlap
1815 	*/
1816 	gretl_errmsg_set("No overlap in data ranges");
1817 	return E_DATA;
1818     }
1819 }
1820 
1821 /* When appending data to a current panel dataset, and the length of
1822    the series in the new data is less than the full panel size
1823    (n * T), try to determine if it's OK to expand the incoming data to
1824    match.
1825 
1826    We'll say it's OK if the new series length equals the panel T: in
1827    that case we'll take the new data to be time-series, which should
1828    be replicated for each panel unit.
1829 
1830    A second possibility arises if the length of the new series
1831    equals the panel n: in that case we could treat it as a time-
1832    invariant characteristic of the panel unit, which should be
1833    replicated for each time period.  But note that if OPT_T is
1834    given, this second expansion is forbidden: the user has
1835    stipulated that the new data are time-varying.
1836 */
1837 
panel_expand_ok(DATASET * dset,DATASET * addinfo,gretlopt opt)1838 static int panel_expand_ok (DATASET *dset, DATASET *addinfo,
1839 			    gretlopt opt)
1840 {
1841     int n = dset->n / dset->pd;
1842     int T = dset->pd;
1843     int ok = 0;
1844 
1845     if (addinfo->n == T) {
1846 	ok = 1;
1847     } else if (!(opt & OPT_T) &&
1848 	       addinfo->n == n &&
1849 	       addinfo->pd == 1) {
1850 	ok = 1;
1851     }
1852 
1853     return ok;
1854 }
1855 
panel_append_special(int addvars,DATASET * dset,DATASET * addset,gretlopt opt,PRN * prn)1856 static int panel_append_special (int addvars,
1857 				 DATASET *dset,
1858 				 DATASET *addset,
1859 				 gretlopt opt,
1860 				 PRN *prn)
1861 {
1862     int n = dset->n / dset->pd;
1863     int T = dset->pd;
1864     int k = dset->v;
1865     int tsdata;
1866     int i, j, s, p, t;
1867     int err = 0;
1868 
1869     if (addvars > 0 && dataset_add_series(dset, addvars)) {
1870 	merge_error(_("Out of memory!\n"), prn);
1871 	err = E_ALLOC;
1872     }
1873 
1874     tsdata = ((opt & OPT_T) || addset->n != n);
1875 
1876     for (i=1; i<addset->v && !err; i++) {
1877 	int v = series_index(dset, addset->varname[i]);
1878 
1879 	if (v >= k) {
1880 	    /* a new variable */
1881 	    v = k++;
1882 	    strcpy(dset->varname[v], addset->varname[i]);
1883 	    copy_varinfo(dset->varinfo[v], addset->varinfo[i]);
1884 	}
1885 
1886 	s = 0;
1887 	for (j=0; j<n; j++) {
1888 	    /* loop across units */
1889 	    for (t=0; t<T; t++) {
1890 		/* loop across periods */
1891 		p = (tsdata)? t : j;
1892 		dset->Z[v][s++] = addset->Z[i][p];
1893 	    }
1894 	}
1895     }
1896 
1897     return err;
1898 }
1899 
markers_compatible(const DATASET * d1,DATASET * d2,int * offset)1900 static int markers_compatible (const DATASET *d1, DATASET *d2,
1901 			       int *offset)
1902 {
1903     int ret = 0;
1904 
1905     if (d1->markers == 0 && d2->markers == 0) {
1906 	*offset = d1->n;
1907 	ret = 1;
1908     } else if (d1->markers == 0) {
1909 	/* markers "on the right only": are they consecutive
1910 	   integers starting between 1 and d1->n + 1?
1911 	*/
1912 	if (integer_string(d2->S[0])) {
1913 	    int k0 = atoi(d2->S[0]);
1914 
1915 	    if (k0 >= 1 && k0 <= d1->n + 1) {
1916 		int i, k1;
1917 
1918 		ret = 1;
1919 		for (i=1; i<d2->n && ret; i++) {
1920 		    if (!integer_string(d2->S[i])) {
1921 			ret = 0;
1922 		    } else if ((k1 = atoi(d2->S[i])) != k0 + 1) {
1923 			ret = 0;
1924 		    } else {
1925 			k0 = k1;
1926 		    }
1927 		}
1928 	    }
1929 	    if (ret) {
1930 		*offset = atoi(d2->S[0]) - 1;
1931 		/* the @d2 markers have done their job -- yielding
1932 		   an @offset value -- and they can now be trashed
1933 		*/
1934 		dataset_destroy_obs_markers(d2);
1935 	    }
1936 	}
1937     } else {
1938 	/* markers on both sides: are they totally distinct? */
1939 	int i, j;
1940 
1941 	ret = 1;
1942 	for (i=0; i<d2->n && ret; i++) {
1943 	    for (j=0; j<d1->n && ret; j++) {
1944 		if (!strcmp(d2->S[i], d1->S[j])) {
1945 		    /* no, not totally distinct */
1946 		    ret = 0;
1947 		}
1948 	    }
1949 	}
1950     }
1951 
1952 #if MERGE_DEBUG
1953     fprintf(stderr, " markers_compatible: ret=%d, offset=%d\n", ret, *offset);
1954 #endif
1955 
1956     return ret;
1957 }
1958 
1959 static int
just_append_rows(const DATASET * targ,DATASET * src,int * offset)1960 just_append_rows (const DATASET *targ, DATASET *src, int *offset)
1961 {
1962     int ret = 0;
1963 
1964     if (targ->structure == CROSS_SECTION &&
1965 	src->structure == CROSS_SECTION &&
1966 	targ->sd0 == 1 && src->sd0 == 1) {
1967 	int ok, test_offset = -1;
1968 
1969 	ok = markers_compatible(targ, src, &test_offset);
1970 	if (ok) {
1971 	    /* note: we do this only if we're not adding any new
1972 	       series: we'll append to existing series lengthwise
1973 	       (or perhaps write data into existing existing rows)
1974 	    */
1975 	    *offset = test_offset;
1976 	    ret = src->n - (targ->n - *offset);
1977 	    if (ret < 0) {
1978 		ret = 0;
1979 	    }
1980 	}
1981     }
1982 
1983     return ret;
1984 }
1985 
simple_range_match(const DATASET * targ,const DATASET * src,int * offset)1986 static int simple_range_match (const DATASET *targ, const DATASET *src,
1987 			       int *offset)
1988 {
1989     int ret = 0;
1990 
1991     if (src->pd == 1 && src->structure == CROSS_SECTION) {
1992 	if (src->n == targ->n) {
1993 	    ret = 1;
1994 	} else if (src->n == targ->t2 - targ->t1 + 1) {
1995 	    ret = 1;
1996 	    *offset = targ->t1;
1997 	}
1998     }
1999 
2000     return ret;
2001 }
2002 
merge_lengthen_series(DATASET * dset,const DATASET * addset,int addobs,int offset)2003 static int merge_lengthen_series (DATASET *dset,
2004 				  const DATASET *addset,
2005 				  int addobs,
2006 				  int offset)
2007 {
2008     int i, t, new_n = dset->n + addobs;
2009     int err = 0;
2010 
2011     if (dset->markers) {
2012 	err = extend_markers(dset, dset->n, new_n);
2013 	if (!err) {
2014 	    for (t=dset->n; t<new_n; t++) {
2015 		strcpy(dset->S[t], addset->S[t-offset]);
2016 	    }
2017 	}
2018     }
2019 
2020     for (i=0; i<dset->v && !err; i++) {
2021 	double *x;
2022 
2023 	x = realloc(dset->Z[i], new_n * sizeof *x);
2024 	if (x == NULL) {
2025 	    err = E_ALLOC;
2026 	    break;
2027 	}
2028 
2029 	for (t=dset->n; t<new_n; t++) {
2030 	    if (i == 0) {
2031 		x[t] = 1.0;
2032 	    } else {
2033 		x[t] = NADBL;
2034 	    }
2035 	}
2036 	dset->Z[i] = x;
2037     }
2038 
2039     if (!err) {
2040 	dset->n = new_n;
2041 	ntolabel(dset->endobs, new_n - 1, dset);
2042 	dset->t2 = dset->n - 1;
2043     }
2044 
2045     return err;
2046 }
2047 
2048 #if 0 /* not yet (maybe usable with DND */
2049 
2050 int basic_data_merge_check (const DATASET *dset,
2051 			    DATASET *addset)
2052 {
2053     int dayspecial = 0;
2054     int yrspecial = 0;
2055     int addsimple = 0;
2056     int addvars = 0;
2057     int addobs = 0;
2058     int offset = 0;
2059     int err = 0;
2060 
2061     /* first see how many new vars we have */
2062     addvars = count_new_vars(dset, addset, NULL);
2063     if (addvars < 0) {
2064 	return 1;
2065     }
2066 
2067     if (dated_daily_data(dset) && dated_daily_data(addset)) {
2068 	dayspecial = 1;
2069     }
2070 
2071     if (simple_range_match(dset, addset, &offset)) {
2072 	addsimple = 1;
2073     } else if (dset->pd != addset->pd) {
2074 	err = 1;
2075     }
2076 
2077     if (!err) {
2078 	if (!addsimple) {
2079 	    addobs = compare_ranges(dset, addset, addvars, &offset,
2080 				    &yrspecial, &err);
2081 	}
2082 	if (!err && addobs <= 0 && addvars == 0) {
2083 	    addobs = just_append_rows(dset, addset, &offset);
2084 	}
2085     }
2086 
2087     if (!err && (addobs < 0 || addvars < 0)) {
2088 	err = E_DATA;
2089     }
2090 
2091     if (!err && dset->markers != addset->markers) {
2092 	if (addobs == 0 && addvars == 0) {
2093 	    err = E_DATA;
2094 	} else if (addset->n != dset->n && !yrspecial && !dayspecial) {
2095 	    err = E_DATA;
2096 	}
2097     }
2098 
2099     return err;
2100 }
2101 
2102 #endif
2103 
2104 #define simple_structure(p) (p->structure == TIME_SERIES ||		\
2105 			     p->structure == SPECIAL_TIME_SERIES ||	\
2106 			     (p->structure == CROSS_SECTION &&		\
2107 			      p->S == NULL))
2108 
2109 /**
2110  * merge_data:
2111  * @dset: dataset struct.
2112  * @addset: dataset to be merged in.
2113  * @opt: may include OPT_T to force a time-series interpretation
2114  * when appending to a panel dataset; may include OPT_U to update
2115  * values of overlapping observations.
2116  * @prn: print struct to accept messages.
2117  *
2118  * Attempt to merge the content of a newly opened data file into
2119  * gretl's current working data set.
2120  *
2121  * Returns: 0 on successful completion, non-zero otherwise.
2122  */
2123 
merge_data(DATASET * dset,DATASET * addset,gretlopt opt,PRN * prn)2124 static int merge_data (DATASET *dset, DATASET *addset,
2125 		       gretlopt opt, PRN *prn)
2126 {
2127     int update_overlap = (opt & OPT_U);
2128     int orig_n = dset->n;
2129     int dayspecial = 0;
2130     int yrspecial = 0;
2131     int fixsample = 0;
2132     int addsimple = 0;
2133     int addpanel = 0;
2134     int addvars = 0;
2135     int addobs = 0;
2136     int offset = 0;
2137     int err = 0;
2138 
2139 #if MERGE_DEBUG
2140     debug_print_option_flags("merge_data", opt);
2141 #endif
2142 
2143     /* first see how many new vars we have */
2144     addvars = count_new_vars(dset, addset, prn);
2145     if (addvars < 0) {
2146 	return 1;
2147     }
2148 
2149 #if MERGE_DEBUG
2150     fprintf(stderr, " new series count = %d\n", addvars);
2151 #endif
2152 
2153     if (dated_daily_data(dset) && dated_daily_data(addset)) {
2154 #if MERGE_DEBUG
2155 	fprintf(stderr, " special: merging daily data\n");
2156 #endif
2157 	dayspecial = 1;
2158     }
2159 
2160     if (opt & OPT_X) {
2161 	fixsample = 1;
2162     } else if (simple_range_match(dset, addset, &offset)) {
2163 	/* we'll allow undated data to be merged with the existing
2164 	   dateset, sideways, provided the number of observations
2165 	   matches OK */
2166 	addsimple = 1;
2167     } else if (dataset_is_panel(dset) &&
2168 	       panel_expand_ok(dset, addset, opt)) {
2169 	/* allow appending to panel when the number of obs matches
2170 	   either the cross-section size or the time-series length
2171 	*/
2172 	addpanel = 1;
2173     } else if (dset->pd != addset->pd) {
2174 	merge_error(_("Data frequency does not match\n"), prn);
2175 	err = 1;
2176     }
2177 
2178     if (!err && fixsample) {
2179 	err = check_for_overlap(dset, addset, &offset);
2180     } else if (!err && gretl_function_depth() > 0) {
2181 	/* we won't add observations within a function, but
2182 	   we should still check for an error from compare_ranges()
2183 	*/
2184 	if (!addsimple && !addpanel) {
2185 	    addobs = compare_ranges(dset, addset, addvars, &offset,
2186 				    &yrspecial, &err);
2187 	    if (!err && addobs > 0) {
2188 		addobs = 0;
2189 	    }
2190 	}
2191     } else if (!err) {
2192 	if (!addsimple && !addpanel) {
2193 	    addobs = compare_ranges(dset, addset, addvars, &offset,
2194 				    &yrspecial, &err);
2195 #if MERGE_DEBUG
2196 	    fprintf(stderr, " added obs, from compare_ranges: %d\n", addobs);
2197 #endif
2198 	}
2199 	if (!err && addobs <= 0 && addvars == 0) {
2200 	    addobs = just_append_rows(dset, addset, &offset);
2201 #if MERGE_DEBUG
2202 	    fprintf(stderr, " added obs, from just_append_rows: %d\n", addobs);
2203 #endif
2204 	}
2205     }
2206 
2207     if (!err && (addobs < 0 || addvars < 0)) {
2208 	merge_error(_("New data not conformable for appending\n"), prn);
2209 	err = E_DATA;
2210     }
2211 
2212     if (!err && !addpanel && dset->markers != addset->markers) {
2213 	if (addobs == 0 && addvars == 0) {
2214 	    if (update_overlap) {
2215 		; /* might be OK? */
2216 	    } else {
2217 		gretl_errmsg_set("Found no data conformable for appending");
2218 		err = E_DATA;
2219 	    }
2220 	} else if (addset->n != dset->n && !yrspecial && !dayspecial) {
2221 	    merge_error(_("Inconsistency in observation markers\n"), prn);
2222 	    err = E_DATA;
2223 	} else if (addset->markers && !dset->markers &&
2224 		   !yrspecial && !dayspecial) {
2225 	    dataset_destroy_obs_markers(addset);
2226 	}
2227     }
2228 
2229 #if MERGE_DEBUG
2230     if (!err) {
2231 	fprintf(stderr, " after preliminaries: addvars = %d, addobs = %d\n",
2232 		addvars, addobs);
2233     } else {
2234 	fprintf(stderr, " after preliminaries: err = %d\n", err);
2235     }
2236 #endif
2237 
2238     /* if checks are passed, try merging the data */
2239 
2240     if (!err && addobs > 0) {
2241 	err = merge_lengthen_series(dset, addset, addobs, offset);
2242 	if (err) {
2243 	    merge_error(_("Out of memory!\n"), prn);
2244 	}
2245     }
2246 
2247     if (!err && addpanel) {
2248 	err = panel_append_special(addvars, dset, addset,
2249 				   opt, prn);
2250     } else if (!err) {
2251 	int k = dset->v;
2252 	int i, t;
2253 
2254 	if (addvars > 0 && dataset_add_series(dset, addvars)) {
2255 	    merge_error(_("Out of memory!\n"), prn);
2256 	    err = E_ALLOC;
2257 	}
2258 
2259 	for (i=1; i<addset->v && !err; i++) {
2260 	    int v = series_index(dset, addset->varname[i]);
2261 	    int tmin, newvar = v >= k;
2262 
2263 	    if (!newvar && !update_overlap) {
2264 		tmin = orig_n;
2265 	    } else {
2266 		tmin = 0;
2267 	    }
2268 
2269 	    if (newvar) {
2270 		v = k++;
2271 		strcpy(dset->varname[v], addset->varname[i]);
2272 		copy_varinfo(dset->varinfo[v], addset->varinfo[i]);
2273 		if (is_string_valued(addset, i) &&
2274 		    addset->n == dset->n && offset == 0 &&
2275 		    addobs == 0) {
2276 		    /* attach the string table to the target
2277 		       series and detach it from @addset
2278 		    */
2279 		    series_table *st;
2280 
2281 		    st = series_get_string_table(addset, i);
2282 		    series_attach_string_table(dset, v, st);
2283 		    series_attach_string_table(addset, i, NULL);
2284 		}
2285 	    } else {
2286 		/* not a new series */
2287 		int lsval = is_string_valued(dset, v);
2288 		int rsval = is_string_valued(addset, i);
2289 
2290 		if (lsval + rsval == 1) {
2291 		    gretl_errmsg_set(_("Can't concatenate string-valued and numeric series"));
2292 		    err = E_DATA;
2293 		} else if (lsval) {
2294 		    err = merge_string_tables(dset, v, addset, i);
2295 		}
2296 	    }
2297 
2298 	    if (dayspecial) {
2299 		char obs[OBSLEN];
2300 		int s;
2301 
2302 		for (t=tmin; t<dset->n; t++) {
2303 		    ntolabel(obs, t, dset);
2304 		    s = dateton(obs, addset);
2305 		    if (s >= 0 && s < addset->n) {
2306 			dset->Z[v][t] = addset->Z[i][s];
2307 		    } else {
2308 			dset->Z[v][t] = NADBL;
2309 		    }
2310 		}
2311 	    } else if (yrspecial) {
2312 		int s;
2313 
2314 		if (newvar) {
2315 		    for (t=0; t<dset->n; t++) {
2316 			dset->Z[v][t] = NADBL;
2317 		    }
2318 		}
2319 		for (s=0; s<addset->n; s++) {
2320 		    t = dateton(addset->S[s], dset);
2321 		    if (t >= tmin && t < dset->n) {
2322 			dset->Z[v][t] = addset->Z[i][s];
2323 		    }
2324 		}
2325 	    } else {
2326 		for (t=tmin; t<dset->n; t++) {
2327 		    if (t >= offset && t - offset < addset->n) {
2328 			dset->Z[v][t] = addset->Z[i][t - offset];
2329 		    } else if (newvar) {
2330 			dset->Z[v][t] = NADBL;
2331 		    }
2332 		}
2333 	    }
2334 	}
2335     }
2336 
2337     if (!err && (addvars || addobs) && gretl_messages_on()) {
2338 	pputs(prn, _("Data appended OK\n"));
2339     }
2340 
2341     return err;
2342 }
2343 
2344 /* We want to ensure that calendar dates are recorded as per
2345    ISO 8601 -- that is, YYYY-MM-DD; here we remedy dates
2346    recorded in the form YYYY/MM/DD.
2347 */
2348 
maybe_fix_calendar_dates(DATASET * dset)2349 static void maybe_fix_calendar_dates (DATASET *dset)
2350 {
2351     if (strchr(dset->stobs, '/') != NULL) {
2352 	gretl_charsub(dset->stobs, '/', '-');
2353 	gretl_charsub(dset->endobs, '/', '-');
2354 	if (dset->S != NULL && dset->markers == DAILY_DATE_STRINGS) {
2355 	    int t;
2356 
2357 	    for (t=0; t<dset->n; t++) {
2358 		gretl_charsub(dset->S[t], '/', '-');
2359 	    }
2360 	}
2361     }
2362 }
2363 
2364 /**
2365  * get_merge_opts:
2366  * @opt: gretl options flags.
2367  *
2368  * Returns: just those components of @opt (if any) that
2369  * can be passed to merge_or_replace_data(); may be
2370  * useful when calling that function in the context
2371  * of a command only some of whose options should be
2372  * forwarded.
2373  */
2374 
get_merge_opts(gretlopt opt)2375 gretlopt get_merge_opts (gretlopt opt)
2376 {
2377     gretlopt merge_opt = OPT_NONE;
2378 
2379     if (opt & OPT_T) {
2380 	/* panel, common time-series */
2381 	merge_opt |= OPT_T;
2382     }
2383     if (opt & OPT_U) {
2384 	/* update overlapping observations */
2385 	merge_opt |= OPT_U;
2386     }
2387     if (opt & OPT_X) {
2388 	/* fixed sample range */
2389 	merge_opt |= OPT_X;
2390     }
2391 
2392     return merge_opt;
2393 }
2394 
2395 /* Apparatus for converting a dataset read from file
2396    into a gretl matrix, as opposed to replacing an
2397    existing dataset or merging with it.
2398 */
2399 
2400 static gretl_matrix **dset_matrix;
2401 
set_dset_matrix_target(gretl_matrix ** pm)2402 void set_dset_matrix_target (gretl_matrix **pm)
2403 {
2404     dset_matrix = pm;
2405 }
2406 
get_dset_matrix_target(void)2407 void *get_dset_matrix_target (void)
2408 {
2409     return dset_matrix;
2410 }
2411 
2412 /**
2413  * merge_or_replace_data:
2414  * @dset0: original dataset struct.
2415  * @pdset1: new dataset struct.
2416  * @opt: zero or more option flags (OPT_K presrves @pdset1,
2417  * otherwise it is destroyed).
2418  * @prn: print struct to accept messages.
2419  *
2420  * Given a newly-created dataset, pointed to by @pdset1, either
2421  * attempt to merge it with @dset0, if the original data array
2422  * is non-NULL, or replace the content of the original pointer
2423  * with the new dataset.
2424  *
2425  * In case merging is not successful, the new dataset is
2426  * destroyed.
2427  *
2428  * Returns: 0 on successful completion, non-zero otherwise.
2429  */
2430 
merge_or_replace_data(DATASET * dset0,DATASET ** pdset1,gretlopt opt,PRN * prn)2431 int merge_or_replace_data (DATASET *dset0, DATASET **pdset1,
2432 			   gretlopt opt, PRN *prn)
2433 {
2434     int keep = (opt & OPT_K);
2435     int err = 0;
2436 
2437     if (dset_matrix != NULL) {
2438 	/* Convert the new dataset to matrix; don't touch
2439 	   the existing dataset, if any.
2440 	*/
2441 	const DATASET *dset = *pdset1;
2442 
2443 	*dset_matrix = gretl_matrix_data_subset(NULL, dset,
2444 						0, dset->n - 1,
2445 						M_MISSING_OK,
2446 						&err);
2447 	destroy_dataset(*pdset1);
2448 	*pdset1 = NULL;
2449 	return err;
2450     }
2451 
2452     if (dset0->Z != NULL) {
2453 	/* we have an existing dataset into which the new data
2454 	   should be merged */
2455 	gretlopt merge_opt = OPT_NONE;
2456 
2457 	if (opt & OPT_T) {
2458 	    /* panel, common time-series */
2459 	    merge_opt |= OPT_T;
2460 	}
2461 	if (opt & OPT_U) {
2462 	    /* update overlapping observations */
2463 	    merge_opt |= OPT_U;
2464 	}
2465 	if (opt & OPT_X) {
2466 	    /* fixed sample range */
2467 	    merge_opt |= OPT_X;
2468 	}
2469 	err = merge_data(dset0, *pdset1, merge_opt, prn);
2470 	if (!keep) {
2471 	    destroy_dataset(*pdset1);
2472 	}
2473     } else {
2474 	/* starting from scratch */
2475 	*dset0 = **pdset1;
2476 	free(*pdset1);
2477 	if (calendar_data(dset0)) {
2478 	    maybe_fix_calendar_dates(dset0);
2479 	}
2480     }
2481 
2482     if (!keep) {
2483 	*pdset1 = NULL;
2484     }
2485 
2486     return err;
2487 }
2488 
check_imported_string(char * src,int i,size_t len)2489 static int check_imported_string (char *src, int i, size_t len)
2490 {
2491     int err = 0;
2492 
2493     if (!g_utf8_validate(src, -1, NULL)) {
2494 	gchar *trstr = NULL;
2495 	gsize bytes;
2496 
2497 	trstr = g_locale_to_utf8(src, -1, NULL, &bytes, NULL);
2498 
2499 	if (trstr == NULL) {
2500 	    gretl_errmsg_sprintf("Invalid characters in imported string, line %d", i);
2501 	    err = E_DATA;
2502 	} else {
2503 	    *src = '\0';
2504 	    strncat(src, trstr, len - 1);
2505 	    g_free(trstr);
2506 	}
2507     }
2508 
2509     return err;
2510 }
2511 
count_markers(FILE * fp,char * line,int linelen,char * marker)2512 static int count_markers (FILE *fp, char *line, int linelen,
2513 			  char *marker)
2514 {
2515     int n = 0;
2516 
2517     while (fgets(line, linelen, fp)) {
2518 	if (sscanf(line, "%31[^\n\r]", marker) == 1) {
2519 	    g_strstrip(marker);
2520 	    if (*marker != '\0') {
2521 		n++;
2522 	    }
2523 	}
2524     }
2525 
2526     rewind(fp);
2527 
2528     return n;
2529 }
2530 
2531 /**
2532  * add_obs_markers_from_file:
2533  * @dset: data information struct.
2534  * @fname: name of file containing case markers.
2535  *
2536  * Read case markers (strings of %OBSLEN - 1 characters or less that identify
2537  * the observations) from a file, and associate them with the
2538  * current data set.  The file should contain one marker per line,
2539  * with a number of lines equal to the number of observations in
2540  * the current data set.
2541  *
2542  * Returns: 0 on successful completion, non-zero otherwise.
2543  */
2544 
add_obs_markers_from_file(DATASET * dset,const char * fname)2545 int add_obs_markers_from_file (DATASET *dset, const char *fname)
2546 {
2547     char **S = NULL;
2548     FILE *fp;
2549     char line[128], marker[32];
2550     int done = 0;
2551     int t, err = 0;
2552 
2553     fp = gretl_fopen(fname, "r");
2554     if (fp == NULL) {
2555 	return E_FOPEN;
2556     }
2557 
2558     S = strings_array_new_with_length(dset->n, OBSLEN);
2559     if (S == NULL) {
2560 	fclose(fp);
2561 	return E_ALLOC;
2562     }
2563 
2564     if (dataset_is_panel(dset)) {
2565 	/* allow the case where we get just enough markers to
2566 	   label the cross-sectional units */
2567 	int nm = count_markers(fp, line, sizeof line, marker);
2568 	int N = dset->n / dset->pd; /* = number of units */
2569 
2570 	if (nm == N) {
2571 	    int T = dset->pd;
2572 	    int t, i = 0;
2573 
2574 	    while (fgets(line, sizeof line, fp) && !err) {
2575 		*marker = '\0';
2576 		if (sscanf(line, "%31[^\n\r]", marker) == 1) {
2577 		    g_strstrip(marker);
2578 		    strncat(S[i], marker, OBSLEN - 1);
2579 		    err = check_imported_string(S[i], i+1, OBSLEN);
2580 		    if (!err) {
2581 			/* copy to remaining observations */
2582 			for (t=1; t<T; t++) {
2583 			    strcpy(S[i+t], S[i]);
2584 			}
2585 		    }
2586 		    i += T;
2587 		}
2588 	    }
2589 	    done = 1;
2590 	}
2591     }
2592 
2593     if (!done) {
2594 	for (t=0; t<dset->n && !err; t++) {
2595 	    if (fgets(line, sizeof line, fp) == NULL) {
2596 		gretl_errmsg_sprintf("Expected %d markers; found %d\n",
2597 				     dset->n, t);
2598 		err = E_DATA;
2599 	    } else if (sscanf(line, "%31[^\n\r]", marker) != 1) {
2600 		gretl_errmsg_sprintf("Couldn't read marker on line %d", t+1);
2601 		err = E_DATA;
2602 	    } else {
2603 		g_strstrip(marker);
2604 		strncat(S[t], marker, OBSLEN - 1);
2605 		err = check_imported_string(S[t], t+1, OBSLEN);
2606 	    }
2607 	}
2608     }
2609 
2610     if (err) {
2611 	strings_array_free(S, dset->n);
2612     } else {
2613 	if (dset->S != NULL) {
2614 	    strings_array_free(dset->S, dset->n);
2615 	}
2616 	dset->markers = REGULAR_MARKERS;
2617 	dset->S = S;
2618     }
2619 
2620     return err;
2621 }
2622 
2623 /**
2624  * dataset_has_var_labels:
2625  * @dset: data information struct.
2626  *
2627  * Returns: 1 if at least one variable in the current dataset
2628  * has a descriptive label, otherwise 0.
2629  */
2630 
dataset_has_var_labels(const DATASET * dset)2631 int dataset_has_var_labels (const DATASET *dset)
2632 {
2633     const char *vlabel;
2634     int i, imin = 1;
2635 
2636     if (dset->v > 1) {
2637 	if (!strcmp(dset->varname[1], "index")) {
2638 	    vlabel = series_get_label(dset, 1);
2639 	    if (vlabel != NULL && !strcmp(vlabel, _("index variable"))) {
2640 		imin = 2;
2641 	    }
2642 	}
2643     }
2644 
2645     for (i=imin; i<dset->v; i++) {
2646 	vlabel = series_get_label(dset, i);
2647 	if (vlabel != NULL && *vlabel != '\0') {
2648 	    return 1;
2649 	}
2650     }
2651 
2652     return 0;
2653 }
2654 
2655 /**
2656  * save_var_labels_to_file:
2657  * @dset: data information struct.
2658  * @fname: name of file containing labels.
2659  *
2660  * Writes to @fname the descriptive labels for the series in
2661  * the current dataset.
2662  *
2663  * Returns: 0 on successful completion, non-zero otherwise.
2664  */
2665 
save_var_labels_to_file(const DATASET * dset,const char * fname)2666 int save_var_labels_to_file (const DATASET *dset,
2667 			     const char *fname)
2668 {
2669     const char *vlabel;
2670     FILE *fp;
2671     int i, err = 0;
2672 
2673     fp = gretl_fopen(fname, "w");
2674 
2675     if (fp == NULL) {
2676 	err = E_FOPEN;
2677     } else {
2678 	for (i=1; i<dset->v; i++) {
2679 	    vlabel = series_get_label(dset, i);
2680 	    fprintf(fp, "%s\n", vlabel == NULL ? "" : vlabel);
2681 	}
2682 	fclose(fp);
2683     }
2684 
2685     return err;
2686 }
2687 
save_var_labels_to_array(const DATASET * dset,const char * aname)2688 static int save_var_labels_to_array (const DATASET *dset,
2689 				     const char *aname)
2690 {
2691     gretl_array *a = NULL;
2692     int err = 0;
2693 
2694     if (gretl_is_series(aname, dset)) {
2695 	err = E_TYPES;
2696     } else {
2697 	err = check_identifier(aname);
2698     }
2699 
2700     if (!err) {
2701 	a = gretl_array_new(GRETL_TYPE_STRINGS, dset->v - 1, &err);
2702     }
2703 
2704     if (!err) {
2705 	err = user_var_add_or_replace(aname, GRETL_TYPE_STRINGS, a);
2706     }
2707 
2708     if (!err) {
2709 	char *vlabel;
2710 	int i;
2711 
2712 	for (i=1; i<dset->v; i++) {
2713 	    vlabel = (char *) series_get_label(dset, i);
2714 	    gretl_array_set_element(a, i-1, vlabel != NULL ? vlabel : "",
2715 				    GRETL_TYPE_STRING, 1);
2716 	}
2717     }
2718 
2719     if (err && a != NULL) {
2720 	gretl_array_destroy(a);
2721 	a = NULL;
2722     }
2723 
2724     return err;
2725 }
2726 
save_obs_markers_to_array(const DATASET * dset,const char * aname)2727 static int save_obs_markers_to_array (const DATASET *dset,
2728 				      const char *aname)
2729 {
2730     gretl_array *a = NULL;
2731     int err = 0;
2732 
2733     if (gretl_is_series(aname, dset)) {
2734 	err = E_TYPES;
2735     } else {
2736 	err = check_identifier(aname);
2737     }
2738 
2739     if (!err) {
2740 	a = gretl_array_new(GRETL_TYPE_STRINGS, dset->n, &err);
2741     }
2742 
2743     if (!err) {
2744 	err = user_var_add_or_replace(aname, GRETL_TYPE_STRINGS, a);
2745     }
2746 
2747     if (!err) {
2748 	char *marker;
2749 	int i;
2750 
2751 	for (i=0; i<dset->n; i++) {
2752 	    marker = dset->S[i];
2753 	    gretl_array_set_element(a, i, marker != NULL ? marker : "",
2754 				    GRETL_TYPE_STRING, 1);
2755 	}
2756     }
2757 
2758     if (err && a != NULL) {
2759 	gretl_array_destroy(a);
2760 	a = NULL;
2761     }
2762 
2763     return err;
2764 }
2765 
2766 /**
2767  * add_var_labels_from_file:
2768  * @dset: data information struct.
2769  * @fname: name of file containing labels.
2770  *
2771  * Read descriptive variables for labels (strings of %MAXLABEL - 1
2772  * characters or less) from a file, and associate them with the
2773  * current data set.  The file should contain one label per line,
2774  * with a number of lines equal to the number of variables in
2775  * the current data set, excluding the constant.
2776  *
2777  * Returns: 0 on successful completion, non-zero otherwise.
2778  */
2779 
add_var_labels_from_file(DATASET * dset,const char * fname)2780 int add_var_labels_from_file (DATASET *dset, const char *fname)
2781 {
2782     FILE *fp;
2783     char line[1024];
2784     gchar *label;
2785     int nlabels = 0;
2786     int i, err = 0;
2787 
2788     fp = gretl_fopen(fname, "r");
2789     if (fp == NULL) {
2790 	return E_FOPEN;
2791     }
2792 
2793     for (i=1; i<dset->v && !err; i++) {
2794 	if (fgets(line, sizeof line, fp) == NULL) {
2795 	    break;
2796 	} else {
2797 	    label = g_strstrip(g_strdup(line));
2798 	    if (strlen(label) > 0) {
2799 		if (!g_utf8_validate(label, -1, NULL)) {
2800 		    gchar *trstr = NULL;
2801 		    gsize bytes;
2802 
2803 		    trstr = g_locale_to_utf8(label, -1, NULL,
2804 					     &bytes, NULL);
2805 		    if (trstr != NULL) {
2806 			series_set_label(dset, i, trstr);
2807 			nlabels++;
2808 			g_free(trstr);
2809 		    }
2810 		} else {
2811 		    series_set_label(dset, i, label);
2812 		    nlabels++;
2813 		}
2814 	    }
2815 	    g_free(label);
2816 	}
2817     }
2818 
2819     if (!err && nlabels == 0) {
2820 	gretl_errmsg_set("No labels found");
2821 	err = E_DATA;
2822     }
2823 
2824     return err;
2825 }
2826 
add_var_labels_from_array(DATASET * dset,const char * aname)2827 static int add_var_labels_from_array (DATASET *dset, const char *aname)
2828 {
2829     gretl_array *a = get_array_by_name(aname);
2830     int i, err = 0;
2831 
2832     if (a == NULL) {
2833 	gretl_errmsg_sprintf("%s: no such array", aname);
2834 	err = E_DATA;
2835     } else if (gretl_array_get_type(a) != GRETL_TYPE_STRINGS ||
2836 	       gretl_array_get_length(a) < dset->v - 1) {
2837 	err = E_TYPES;
2838     }
2839 
2840     for (i=1; i<dset->v && !err; i++) {
2841 	const char *s = gretl_array_get_data(a, i-1);
2842 
2843 	series_set_label(dset, i, s);
2844     }
2845 
2846     return err;
2847 }
2848 
read_or_write_var_labels(gretlopt opt,DATASET * dset,PRN * prn)2849 int read_or_write_var_labels (gretlopt opt, DATASET *dset, PRN *prn)
2850 {
2851     const char *lname = NULL;
2852     int err;
2853 
2854     err = incompatible_options(opt, OPT_D | OPT_T | OPT_F |
2855 			       OPT_A | OPT_R);
2856     if (err) {
2857 	return err;
2858     }
2859 
2860     if (opt & (OPT_T | OPT_F | OPT_A | OPT_R)) {
2861 	lname = get_optval_string(LABELS, opt);
2862 	if (lname == NULL) {
2863 	    return E_BADOPT;
2864 	} else if (opt & (OPT_T | OPT_F)) {
2865 	    gretl_maybe_switch_dir(lname);
2866 	}
2867     }
2868 
2869     if (opt & OPT_D) {
2870 	/* delete */
2871 	int i;
2872 
2873 	for (i=1; i<dset->v; i++) {
2874 	    series_set_label(dset, i, "");
2875 	}
2876     } else if (opt & (OPT_T | OPT_R)) {
2877 	/* to-file, to-array */
2878 	if (!dataset_has_var_labels(dset)) {
2879 	    pprintf(prn, "No labels are available for writing\n");
2880 	    err = E_DATA;
2881 	} else {
2882 	    if (opt & OPT_T) {
2883 		err = save_var_labels_to_file(dset, lname);
2884 	    } else {
2885 		err = save_var_labels_to_array(dset, lname);
2886 	    }
2887 	    if (!err && gretl_messages_on()) {
2888 		pprintf(prn, "Labels written OK\n");
2889 	    }
2890 	}
2891     } else if (opt & (OPT_F | OPT_A)) {
2892 	/* from-file, from-array */
2893 	if (opt & OPT_F) {
2894 	    err = add_var_labels_from_file(dset, lname);
2895 	} else {
2896 	    err = add_var_labels_from_array(dset, lname);
2897 	}
2898 	if (!err && gretl_messages_on()) {
2899 	    pprintf(prn, "Labels loaded OK\n");
2900 	}
2901     }
2902 
2903     return err;
2904 }
2905 
save_obs_markers_to_file(DATASET * dset,const char * fname)2906 static int save_obs_markers_to_file (DATASET *dset, const char *fname)
2907 {
2908     FILE *fp = gretl_fopen(fname, "w");
2909     int err = 0;
2910 
2911     if (fp == NULL) {
2912 	err = E_FOPEN;
2913     } else {
2914 	int i;
2915 
2916 	for (i=0; i<dset->n; i++) {
2917 	    fprintf(fp, "%s\n", dset->S[i]);
2918 	}
2919 	fclose(fp);
2920     }
2921 
2922     return err;
2923 }
2924 
read_or_write_obs_markers(gretlopt opt,DATASET * dset,PRN * prn)2925 int read_or_write_obs_markers (gretlopt opt, DATASET *dset, PRN *prn)
2926 {
2927     const char *fname = NULL;
2928     int err;
2929 
2930     err = incompatible_options(opt, OPT_D | OPT_T | OPT_F);
2931     if (err) {
2932 	return err;
2933     }
2934 
2935     if (opt & (OPT_T | OPT_F)) {
2936 	fname = get_optval_string(MARKERS, opt);
2937 	if (fname == NULL) {
2938 	    return E_BADOPT;
2939 	} else {
2940 	    fname = gretl_maybe_switch_dir(fname);
2941 	}
2942     }
2943 
2944     if (opt & (OPT_A | OPT_T)) {
2945 	/* writing to file or array */
2946 	if (dset->S == NULL) {
2947 	    gretl_errmsg_set(_("No markers are available for writing"));
2948 	    return E_DATA;
2949 	}
2950     }
2951 
2952     if (opt & OPT_D) {
2953 	/* delete */
2954 	dataset_destroy_obs_markers(dset);
2955     } else if (opt & OPT_T) {
2956 	/* to-file */
2957 	err = save_obs_markers_to_file(dset, fname);
2958 	if (!err && gretl_messages_on()) {
2959 	    pprintf(prn, "Markers written OK\n");
2960 	}
2961     } else if (opt & OPT_F) {
2962 	/* from-file */
2963 	err = add_obs_markers_from_file(dset, fname);
2964 	if (!err && gretl_messages_on()) {
2965 	    pprintf(prn, "Markers loaded OK\n");
2966 	}
2967     } else if (opt & OPT_A) {
2968 	/* to-array */
2969 	const char *aname = get_optval_string(MARKERS, OPT_A);
2970 
2971 	err = save_obs_markers_to_array(dset, aname);
2972     }
2973 
2974     return err;
2975 }
2976 
2977 static void
octave_varname(char * name,const char * s,int nnum,int v)2978 octave_varname (char *name, const char *s, int nnum, int v)
2979 {
2980     char nstr[12];
2981     int len, tr;
2982 
2983     if (nnum == 0) {
2984 	strcpy(name, s);
2985     } else {
2986 	sprintf(nstr, "%d", nnum);
2987 	len = strlen(nstr);
2988 	tr = VNAMELEN - len;
2989 
2990 	if (tr > 0) {
2991 	    strncat(name, s, tr);
2992 	    strcat(name, nstr);
2993 	} else {
2994 	    sprintf(name, "v%d", v);
2995 	}
2996     }
2997 }
2998 
get_max_line_length(FILE * fp,PRN * prn)2999 static int get_max_line_length (FILE *fp, PRN *prn)
3000 {
3001     int c, c1, cc = 0;
3002     int maxlen = 0;
3003 
3004     while ((c = fgetc(fp)) != EOF) {
3005 	if (c == 0x0d) {
3006 	    /* CR */
3007 	    c1 = fgetc(fp);
3008 	    if (c1 == EOF) {
3009 		break;
3010 	    } else if (c1 == 0x0a) {
3011 		/* CR + LF -> LF */
3012 		c = c1;
3013 	    } else {
3014 		/* Mac-style: CR not followed by LF */
3015 		c = 0x0a;
3016 		ungetc(c1, fp);
3017 	    }
3018 	}
3019 	if (c == 0x0a) {
3020 	    if (cc > maxlen) {
3021 		maxlen = cc;
3022 	    }
3023 	    cc = 0;
3024 	    continue;
3025 	}
3026 	if (!isspace((unsigned char) c) && !isprint((unsigned char) c) &&
3027 	    !(c == CTRLZ)) {
3028 	    pprintf(prn, _("Binary data (%d) encountered: this is not a valid "
3029 			   "text file\n"), c);
3030 	    return -1;
3031 	}
3032 	cc++;
3033     }
3034 
3035     if (maxlen == 0) {
3036 	pprintf(prn, _("Data file is empty\n"));
3037     }
3038 
3039     if (maxlen > 0) {
3040 	/* allow for newline and null terminator */
3041 	maxlen += 3;
3042     }
3043 
3044     return maxlen;
3045 }
3046 
import_octave(const char * fname,DATASET * dset,gretlopt opt,PRN * prn)3047 static int import_octave (const char *fname, DATASET *dset,
3048 			  gretlopt opt, PRN *prn)
3049 {
3050     DATASET *octset = NULL;
3051     FILE *fp = NULL;
3052     char *line = NULL;
3053     char tmp[8], fmt[16], name[32];
3054     int nrows = 0, ncols = 0, nblocks = 0;
3055     int brows = 0, bcols = 0, oldbcols = 0;
3056     int maxlen, got_type = 0, got_name = 0;
3057     int i, t, err = 0;
3058 
3059     fp = gretl_fopen(fname, "r");
3060     if (fp == NULL) {
3061 	return E_FOPEN;
3062     }
3063 
3064     pprintf(prn, "%s %s...\n", _("parsing"), fname);
3065 
3066     maxlen = get_max_line_length(fp, prn);
3067     if (maxlen <= 0) {
3068 	err = E_DATA;
3069 	goto oct_bailout;
3070     }
3071 
3072     line = malloc(maxlen);
3073     if (line == NULL) {
3074 	err = E_ALLOC;
3075 	goto oct_bailout;
3076     }
3077 
3078     pprintf(prn, _("   longest line: %d characters\n"), maxlen - 1);
3079 
3080     rewind(fp);
3081 
3082     while (fgets(line, maxlen, fp) && !err) {
3083 	if (*line == '#') {
3084 	    if (!got_name) {
3085 		if (sscanf(line, "# name: %31s", name) == 1) {
3086 		    got_name = 1;
3087 		    nblocks++;
3088 		    continue;
3089 		}
3090 	    }
3091 	    if (!got_type) {
3092 		if (sscanf(line, "# type: %7s", tmp) == 1) {
3093 		    if (!got_name || strcmp(tmp, "matrix")) {
3094 			err = 1;
3095 		    } else {
3096 			got_type = 1;
3097 		    }
3098 		    continue;
3099 		}
3100 	    }
3101 	    if (brows == 0) {
3102 		if (sscanf(line, "# rows: %d", &brows) == 1) {
3103 		    if (!got_name || !got_type || brows <= 0) {
3104 			err = 1;
3105 		    } else if (nrows > 0 && brows != nrows) {
3106 			err = 1;
3107 		    } else {
3108 			nrows = brows;
3109 		    }
3110 		    continue;
3111 		}
3112 	    }
3113 	    if (bcols == 0) {
3114 		if (sscanf(line, "# columns: %d", &bcols) == 1) {
3115 		    if (!got_name || !got_type || bcols <= 0) {
3116 			err = 1;
3117 		    } else {
3118 			ncols += bcols;
3119 			pprintf(prn, _("   Found matrix '%s' with "
3120 				       "%d rows, %d columns\n"), name, brows, bcols);
3121 		    }
3122 		    continue;
3123 		}
3124 	    }
3125 	} else if (string_is_blank(line)) {
3126 	    continue;
3127 	} else {
3128 	    got_name = 0;
3129 	    got_type = 0;
3130 	    brows = 0;
3131 	    bcols = 0;
3132 	}
3133     }
3134 
3135     if (err || nrows == 0 || ncols == 0) {
3136 	pputs(prn, _("Invalid data file\n"));
3137 	err = E_DATA;
3138 	goto oct_bailout;
3139     }
3140 
3141     /* initialize datainfo and Z */
3142 
3143     octset = datainfo_new();
3144     if (octset == NULL) {
3145 	pputs(prn, _("Out of memory!\n"));
3146 	err = E_ALLOC;
3147 	goto oct_bailout;
3148     }
3149 
3150     octset->n = nrows;
3151     octset->v = ncols + 1;
3152 
3153     if (start_new_Z(octset, 0)) {
3154 	pputs(prn, _("Out of memory!\n"));
3155 	err = E_ALLOC;
3156 	goto oct_bailout;
3157     }
3158 
3159     rewind(fp);
3160 
3161     pprintf(prn, _("   number of variables: %d\n"), ncols);
3162     pprintf(prn, _("   number of observations: %d\n"), nrows);
3163     pprintf(prn, _("   number of data blocks: %d\n"), nblocks);
3164 
3165     i = 1;
3166     t = 0;
3167 
3168     sprintf(fmt, "# name: %%%ds", VNAMELEN - 1);
3169 
3170     while (fgets(line, maxlen, fp) && !err) {
3171 	char *s = line;
3172 	int j;
3173 
3174 	if (*s == '#') {
3175 	    if (sscanf(line, fmt, name) == 1) {
3176 		;
3177 	    } else if (sscanf(line, "# rows: %d", &brows) == 1) {
3178 		t = 0;
3179 	    } else if (sscanf(line, "# columns: %d", &bcols) == 1) {
3180 		i += oldbcols;
3181 		oldbcols = bcols;
3182 	    }
3183 	}
3184 
3185 	if (*s == '#' || string_is_blank(s)) {
3186 	    continue;
3187 	}
3188 
3189 	if (t >= octset->n) {
3190 	    err = 1;
3191 	}
3192 
3193 	for (j=0; j<bcols && !err; j++) {
3194 	    double x;
3195 	    int v = i + j;
3196 
3197 	    if (t == 0) {
3198 		int nnum = (bcols > 1)? j + 1 : 0;
3199 
3200 		octave_varname(octset->varname[i+j], name, nnum, v);
3201 	    }
3202 
3203 	    while (isspace(*s)) s++;
3204 	    if (sscanf(s, "%lf", &x) != 1) {
3205 		fprintf(stderr, "error: '%s', didn't get double\n", s);
3206 		err = 1;
3207 	    } else {
3208 		octset->Z[v][t] = x;
3209 		while (!isspace(*s)) s++;
3210 	    }
3211 	}
3212 	t++;
3213     }
3214 
3215     if (err) {
3216 	pputs(prn, _("Invalid data file\n"));
3217 	err = E_DATA;
3218     } else {
3219 	int merge = dset->Z != NULL;
3220 	gretlopt merge_opt = 0;
3221 
3222 	if (merge && (opt & OPT_T)) {
3223 	    merge_opt = OPT_T;
3224 	}
3225 	err = merge_or_replace_data(dset, &octset, merge_opt, prn);
3226     }
3227 
3228  oct_bailout:
3229 
3230     if (fp != NULL) {
3231 	fclose(fp);
3232     }
3233 
3234     if (line != NULL) {
3235 	free(line);
3236     }
3237 
3238     if (octset != NULL) {
3239 	clear_datainfo(octset, CLEAR_FULL);
3240     }
3241 
3242     return err;
3243 }
3244 
3245 /**
3246  * import_other:
3247  * @fname: name of file.
3248  * @ftype: type of data file.
3249  * @dset: pointer to dataset struct.
3250  * @opt: option flag; see gretl_get_data().
3251  * @prn: gretl printing struct.
3252  *
3253  * Open a data file of a type that requires a special plugin.
3254  *
3255  * Returns: 0 on successful completion, non-zero otherwise.
3256  */
3257 
import_other(const char * fname,GretlFileType ftype,DATASET * dset,gretlopt opt,PRN * prn)3258 int import_other (const char *fname, GretlFileType ftype,
3259 		  DATASET *dset, gretlopt opt, PRN *prn)
3260 {
3261     FILE *fp;
3262     int (*importer) (const char *, DATASET *,
3263 		     gretlopt, PRN *);
3264     int err = 0;
3265 
3266     fp = gretl_fopen(fname, "r");
3267     if (fp == NULL) {
3268 	pprintf(prn, _("Couldn't open %s\n"), fname);
3269 	err = E_FOPEN;
3270 	goto bailout;
3271     }
3272 
3273     fclose(fp);
3274 
3275     if (ftype == GRETL_OCTAVE) {
3276 	/* plugin not needed */
3277 	return import_octave(fname, dset, opt, prn);
3278     }
3279 
3280     if (ftype == GRETL_WF1) {
3281 	importer = get_plugin_function("wf1_get_data");
3282     } else if (ftype == GRETL_DTA) {
3283 	importer = get_plugin_function("dta_get_data");
3284     } else if (ftype == GRETL_SAV) {
3285 	importer = get_plugin_function("sav_get_data");
3286     } else if (ftype == GRETL_SAS) {
3287 	importer = get_plugin_function("xport_get_data");
3288     } else if (ftype == GRETL_JMULTI) {
3289 	importer = get_plugin_function("jmulti_get_data");
3290     } else if (ftype == GRETL_MAP) {
3291 	importer = get_plugin_function("map_get_data");
3292     } else {
3293 	pprintf(prn, _("Unrecognized data type"));
3294 	pputc(prn, '\n');
3295 	return E_DATA;
3296     }
3297 
3298     if (importer == NULL) {
3299         err = 1;
3300     } else {
3301 	err = (*importer)(fname, dset, opt, prn);
3302     }
3303 
3304  bailout:
3305 
3306     return err;
3307 }
3308 
3309 /**
3310  * import_spreadsheet:
3311  * @fname: name of file.
3312  * @ftype: type of data file.
3313  * @list: list of parameters for spreadsheet import, or NULL.
3314  * @sheetname: name of specific worksheet, or NULL.
3315  * @dset: dataset struct.
3316  * @opt: option flag; see gretl_get_data().
3317  * @prn: gretl printing struct.
3318  *
3319  * Open a data file of a type that requires a special plugin.
3320  * Acceptable values for @ftype are %GRETL_GNUMERIC,
3321  * %GRETL_XLS, %GRETL_XLSX and %GRETL_ODS.
3322  *
3323  * Returns: 0 on successful completion, non-zero otherwise.
3324  */
3325 
import_spreadsheet(const char * fname,GretlFileType ftype,int * list,char * sheetname,DATASET * dset,gretlopt opt,PRN * prn)3326 int import_spreadsheet (const char *fname, GretlFileType ftype,
3327 			int *list, char *sheetname,
3328 			DATASET *dset, gretlopt opt, PRN *prn)
3329 {
3330     FILE *fp;
3331     int (*importer) (const char*, int *, char *,
3332 		     DATASET *, gretlopt, PRN *);
3333     int err = 0;
3334 
3335     import_na_init();
3336 
3337     fp = gretl_fopen(fname, "r");
3338 
3339     if (fp == NULL) {
3340 	pprintf(prn, _("Couldn't open %s\n"), fname);
3341 	err = E_FOPEN;
3342 	goto bailout;
3343     }
3344 
3345     fclose(fp);
3346 
3347     if (ftype == GRETL_GNUMERIC) {
3348 	importer = get_plugin_function("gnumeric_get_data");
3349     } else if (ftype == GRETL_XLS) {
3350 	importer = get_plugin_function("xls_get_data");
3351     } else if (ftype == GRETL_XLSX) {
3352 	importer = get_plugin_function("xlsx_get_data");
3353     } else if (ftype == GRETL_ODS) {
3354 	importer = get_plugin_function("ods_get_data");
3355     } else {
3356 	pprintf(prn, _("Unrecognized data type"));
3357 	pputc(prn, '\n');
3358 	return E_DATA;
3359     }
3360 
3361     if (importer == NULL) {
3362         err = 1;
3363     } else {
3364 	gchar *thisdir = g_get_current_dir();
3365 
3366 	err = (*importer)(fname, list, sheetname, dset, opt, prn);
3367 
3368 	if (thisdir != NULL) {
3369 	    /* come back out of dotdir? */
3370 	    gretl_chdir(thisdir);
3371 	    g_free(thisdir);
3372 	}
3373     }
3374 
3375  bailout:
3376 
3377     return err;
3378 }
3379 
is_jmulti_datafile(const char * fname)3380 static int is_jmulti_datafile (const char *fname)
3381 {
3382     FILE *fp;
3383     int ret = 0;
3384 
3385     fp = gretl_fopen(fname, "r");
3386 
3387     if (fp != NULL) {
3388 	char test[128] = {0};
3389 	int gotobs = 0;
3390 	int gotcomm = 0;
3391 	int incomm = 0;
3392 
3393 	/* look for characteristic C-style comment and
3394 	   <obs stuff> field, outside of comment */
3395 
3396 	while (fgets(test, sizeof test, fp)) {
3397 	    if (!incomm && strstr(test, "/*")) {
3398 		gotcomm = 1;
3399 		incomm = 1;
3400 	    }
3401 	    if (incomm && strstr(test, "*/")) {
3402 		incomm = 0;
3403 	    }
3404 	    if (!incomm && *test == '<' && strchr(test, '>')) {
3405 		gotobs = 1;
3406 	    }
3407 	    if (gotcomm && gotobs) {
3408 		ret = 1;
3409 		break;
3410 	    }
3411 	}
3412 	fclose(fp);
3413     }
3414 
3415     return ret;
3416 }
3417 
3418 /**
3419  * gretl_is_pkzip_file:
3420  * @fname: name of file to examine.
3421  *
3422  * Returns: 1 if @fname is readable and is a PKZIP file,
3423  * else 0.
3424  */
3425 
gretl_is_pkzip_file(const char * fname)3426 int gretl_is_pkzip_file (const char *fname)
3427 {
3428     FILE *fp;
3429     char test[3] = {0};
3430     int ret = 0;
3431 
3432     fp = gretl_fopen(fname, "rb");
3433     if (fp != NULL) {
3434 	if (fread(test, 1, 2, fp) == 2) {
3435 	    if (!strcmp(test, "PK")) ret = 1;
3436 	}
3437 	fclose(fp);
3438     }
3439 
3440     return ret;
3441 }
3442 
3443 /**
3444  * detect_filetype:
3445  * @fname: the name of the file to test.
3446  * @opt: OPT_P may be included to permit path-searching if @fname
3447  * is not an absolute path; in that case the @fname argument
3448  * may be modified, otherwise it will be left unchanged.
3449  *
3450  * Attempts to determine the type of a file to be opened in gretl:
3451  * data file (of various formats), or command script. If OPT_P
3452  * is given, the @fname argument must be an array of length
3453  * at least %MAXLEN: a path may be prepended and in some cases
3454  * an extension may be appended.
3455  *
3456  * Returns: integer code indicating the type of file.
3457  */
3458 
detect_filetype(char * fname,gretlopt opt)3459 GretlFileType detect_filetype (char *fname, gretlopt opt)
3460 {
3461     const char *ext = get_filename_extension(fname);
3462     GretlFileType ftype = GRETL_UNRECOGNIZED;
3463 
3464     if (ext != NULL) {
3465 	/* First try judging the type by extension */
3466 	if (!strcmp(ext, ".inp")) {
3467 	    ftype = GRETL_SCRIPT;
3468 	} else if (!strcmp(ext, ".gretl")) {
3469 	    if (gretl_is_pkzip_file(fname)) {
3470 		ftype = GRETL_SESSION;
3471 	    } else {
3472 		ftype = GRETL_SCRIPT;
3473 	    }
3474 	} else {
3475 	    ftype = data_file_type_from_extension(ext);
3476 	    if (ftype == GRETL_UNRECOGNIZED) {
3477 		/* check for database types */
3478 		if (!strcmp(ext, ".bin")) {
3479 		    ftype = GRETL_NATIVE_DB;
3480 		} else if (!strcmp(ext, ".rat")) {
3481 		    ftype = GRETL_RATS_DB;
3482 		} else if (!strcmp(ext, ".bn7")) {
3483 		    ftype = GRETL_PCGIVE_DB;
3484 		}
3485 	    }
3486 	}
3487 	if (ftype != GRETL_UNRECOGNIZED) {
3488 	    /* We got a type from the extension, but can we find
3489 	       the file "as is"? If so, we're done.
3490 	    */
3491 	    if (gretl_test_fopen(fname, "r") == 0) {
3492 		return ftype;
3493 	    }
3494 	}
3495     }
3496 
3497     if ((opt & OPT_P) && gretl_addpath(fname, 0) != NULL) {
3498 	ext = get_filename_extension(fname);
3499 	if (ext != NULL) {
3500 	    /* check again for known data file types */
3501 	    ftype = data_file_type_from_extension(ext);
3502 	}
3503     }
3504 
3505     if (ftype == GRETL_UNRECOGNIZED) {
3506 	/* last gasp */
3507 	if (gretl_is_xml_file(fname)) {
3508 	    ftype = GRETL_XML_DATA;
3509 	} else if (has_suffix(fname, ".dat") && is_jmulti_datafile(fname)) {
3510 	    ftype = GRETL_JMULTI;
3511 	} else {
3512 	    /* default to assuming plain text data */
3513 	    ftype = GRETL_CSV;
3514 	}
3515     }
3516 
3517     return ftype;
3518 }
3519 
3520 /**
3521  * check_atof:
3522  * @numstr: string to check.
3523  *
3524  * Returns: 0 if @numstr is blank, or is a valid string representation
3525  * of a floating point number, else 1.
3526  */
3527 
check_atof(const char * numstr)3528 int check_atof (const char *numstr)
3529 {
3530     char *test;
3531 
3532     /* accept blank entries */
3533     if (*numstr == '\0') return 0;
3534 
3535     errno = 0;
3536 
3537     strtod(numstr, &test);
3538 
3539     if (*test == '\0' && errno != ERANGE) return 0;
3540 
3541     if (!strcmp(numstr, test)) {
3542 	gretl_errmsg_sprintf(_("'%s' -- no numeric conversion performed!"), numstr);
3543 	return 1;
3544     }
3545 
3546     if (*test != '\0') {
3547 	if (isprint(*test)) {
3548 	    gretl_errmsg_sprintf(_("Extraneous character '%c' in data"), *test);
3549 	} else {
3550 	    gretl_errmsg_sprintf(_("Extraneous character (0x%x) in data"), *test);
3551 	}
3552 	return 1;
3553     }
3554 
3555     if (errno == ERANGE) {
3556 	gretl_errmsg_sprintf(_("'%s' -- number out of range!"), numstr);
3557     }
3558 
3559     return 1;
3560 }
3561 
3562 /**
3563  * check_atoi:
3564  * @numstr: string to check.
3565  *
3566  * Returns: 0 if @numstr is blank, or is a valid string representation
3567  * of an int, else 1.
3568  */
3569 
check_atoi(const char * numstr)3570 int check_atoi (const char *numstr)
3571 {
3572     long int val;
3573     char *test;
3574 
3575     /* accept blank entries */
3576     if (*numstr == '\0') return 0;
3577 
3578     errno = 0;
3579 
3580     val = strtol(numstr, &test, 10);
3581 
3582     if (*test == '\0' && errno != ERANGE) return 0;
3583 
3584     if (!strcmp(numstr, test)) {
3585 	gretl_errmsg_sprintf(_("'%s' -- no numeric conversion performed!"), numstr);
3586 	return 1;
3587     }
3588 
3589     if (*test != '\0') {
3590 	if (isprint(*test)) {
3591 	    gretl_errmsg_sprintf(_("Extraneous character '%c' in data"), *test);
3592 	} else {
3593 	    gretl_errmsg_sprintf(_("Extraneous character (0x%x) in data"), *test);
3594 	}
3595 	return 1;
3596     }
3597 
3598     if (errno == ERANGE || val <= INT_MIN || val >= INT_MAX) {
3599 	gretl_errmsg_sprintf(_("'%s' -- number out of range!"), numstr);
3600     }
3601 
3602     return 1;
3603 }
3604 
transpose_varname_used(const char * vname,DATASET * dinfo,int imax)3605 static int transpose_varname_used (const char *vname,
3606 				   DATASET *dinfo,
3607 				   int imax)
3608 {
3609     int i;
3610 
3611     for (i=0; i<imax; i++) {
3612 	if (!strcmp(vname, dinfo->varname[i])) {
3613 	    return 1;
3614 	}
3615     }
3616 
3617     return 0;
3618 }
3619 
3620 /**
3621  * transpose_data:
3622  * @dset: pointer to dataset information struct.
3623  *
3624  * Attempts to transpose the current dataset, so that each
3625  * variable becomes interpreted as an observation and each
3626  * observation as a variable.
3627  *
3628  * Returns: 0 on success, non-zero error code on error.
3629  */
3630 
transpose_data(DATASET * dset)3631 int transpose_data (DATASET *dset)
3632 {
3633     DATASET *tset;
3634     int k = dset->n + 1;
3635     int T = dset->v - 1;
3636     int i, t;
3637 
3638     tset = create_new_dataset(k, T, 0);
3639     if (tset == NULL) {
3640 	return E_ALLOC;
3641     }
3642 
3643     for (i=1; i<dset->v; i++) {
3644 	for (t=0; t<dset->n; t++) {
3645 	    tset->Z[t+1][i-1] = dset->Z[i][t];
3646 	}
3647     }
3648 
3649     for (t=0; t<dset->n; t++) {
3650 	int k = t + 1;
3651 	char *targ = tset->varname[k];
3652 
3653 	if (dset->S != NULL && dset->S[t][0] != '\0') {
3654 	    int err;
3655 
3656 	    *targ = '\0';
3657 	    strncat(targ, dset->S[t], VNAMELEN - 1);
3658 	    gretl_charsub(targ, ' ', '_');
3659 	    err = check_varname(targ);
3660 	    if (err) {
3661 		sprintf(targ, "v%d", k);
3662 		gretl_error_clear();
3663 	    } else if (transpose_varname_used(targ, tset, k)) {
3664 		sprintf(targ, "v%d", k);
3665 	    }
3666 	} else {
3667 	    sprintf(targ, "v%d", k);
3668 	}
3669     }
3670 
3671     free_Z(dset);
3672     dset->Z = tset->Z;
3673 
3674     clear_datainfo(dset, CLEAR_FULL);
3675 
3676     dset->v = k;
3677     dset->n = T;
3678     dset->t1 = 0;
3679     dset->t2 = dset->n - 1;
3680 
3681     dset->varname = tset->varname;
3682     dset->varinfo = tset->varinfo;
3683 
3684     dataset_obs_info_default(dset);
3685 
3686     free(tset);
3687 
3688     return 0;
3689 }
3690 
dataset_set_regular_markers(DATASET * dset)3691 void dataset_set_regular_markers (DATASET *dset)
3692 {
3693     dset->markers = REGULAR_MARKERS;
3694 }
3695 
3696 struct filetype_info {
3697     GretlFileType type;
3698     const char *src;
3699 };
3700 
3701 /**
3702  * dataset_add_import_info:
3703  * @dset: pointer to dataset information struct.
3704  * @fname: the name of a file from which data have been imported.
3705  * @type: code representing the type of the file identified by
3706  * @fname.
3707  *
3708  * On successful import of data from some "foreign" format,
3709  * add a note to the "descrip" member of the new dataset
3710  * saying where it came from and when.
3711  */
3712 
dataset_add_import_info(DATASET * dset,const char * fname,GretlFileType type)3713 void dataset_add_import_info (DATASET *dset, const char *fname,
3714 			      GretlFileType type)
3715 {
3716     struct filetype_info ftypes[] = {
3717 	{ GRETL_CSV,      "CSV" },
3718 	{ GRETL_GNUMERIC, "Gnumeric" },
3719 	{ GRETL_XLS,      "Excel" },
3720 	{ GRETL_XLSX,     "Excel" },
3721 	{ GRETL_ODS,      "Open Document" },
3722 	{ GRETL_WF1,      "Eviews" },
3723 	{ GRETL_DTA,      "Stata" },
3724 	{ GRETL_SAV,      "SPSS" },
3725 	{ GRETL_SAS,      "SAS" },
3726 	{ GRETL_JMULTI,   "JMulTi" }
3727     };
3728     int i, nt = sizeof ftypes / sizeof ftypes[0];
3729     const char *src = NULL;
3730     gchar *note = NULL;
3731     char tstr[48];
3732 
3733     for (i=0; i<nt; i++) {
3734 	if (type == ftypes[i].type) {
3735 	    src = ftypes[i].src;
3736 	    break;
3737 	}
3738     }
3739 
3740     if (src == NULL) {
3741 	return;
3742     }
3743 
3744     print_time(tstr);
3745 
3746     if (g_utf8_validate(fname, -1, NULL)) {
3747 	const char *p = strrslash(fname);
3748 
3749 	if (p != NULL) {
3750 	    fname = p + 1;
3751 	}
3752 	note = g_strdup_printf(_("Data imported from %s file '%s', %s\n"),
3753 			       src, fname, tstr);
3754     } else {
3755 	note = g_strdup_printf(_("Data imported from %s, %s\n"),
3756 			       src, tstr);
3757     }
3758 
3759     if (note != NULL) {
3760 	if (dset->descrip == NULL) {
3761 	    dset->descrip = gretl_strdup(note);
3762 	} else {
3763 	    int dlen = strlen(dset->descrip);
3764 	    int nlen = strlen(note);
3765 	    char *tmp = realloc(dset->descrip, dlen + nlen + 5);
3766 
3767 	    if (tmp != NULL) {
3768 		dset->descrip = tmp;
3769 		strcat(dset->descrip, "\n\n");
3770 		strncat(dset->descrip, note, nlen);
3771 	    }
3772 	}
3773 	g_free(note);
3774     }
3775 }
3776 
is_weekend(int t,int pd,int sat0,int sun0)3777 static int is_weekend (int t, int pd, int sat0, int sun0)
3778 {
3779     int sat = 0, sun = 0;
3780 
3781     /* this is intended to identify weekend days for
3782        both 7- and 6-day data */
3783 
3784     if (sat0 >= 0) {
3785 	sat = (t - sat0) % pd == 0;
3786     }
3787 
3788     if (!sat && sun0 >= 0) {
3789 	sun = (t - sun0) % pd == 0;
3790     }
3791 
3792     return sat || sun;
3793 }
3794 
3795 /* Scan imported daily data for missing values, so as to
3796    be able to offer the user some options.
3797 
3798    Return values:
3799 
3800    0 : no missing values
3801    1 : all weekend data are missing (or possibly just all
3802        Saturdays, or just all Sundays), but no weekday
3803        data missing
3804    2 : as with 1, but also some weekdays missing
3805    3 : scattering of weekend and/or weekday data missing
3806 */
3807 
analyse_daily_import(const DATASET * dset,PRN * prn)3808 int analyse_daily_import (const DATASET *dset, PRN *prn)
3809 {
3810     int all_weekends_blank = 0;
3811     int blank_weekends = 0;
3812     int blank_weekdays = 0;
3813     int n_weekdays = 0;
3814     int n_weekend_days = 0;
3815     int sat0 = -1, sun0 = -1;
3816     int i, t, pd = dset->pd;
3817     int all_missing, weekend;
3818     int ret = 0;
3819 
3820     if (pd > 5) {
3821 	char datestr[OBSLEN];
3822 	int wkday;
3823 
3824 	/* start by finding first Sat and/or Sun */
3825 	for (t=0; t<dset->n; t++) {
3826 	    ntolabel(datestr, t, dset);
3827 	    wkday = weekday_from_date(datestr);
3828 	    if (wkday == 6 && sat0 < 0) {
3829 		sat0 = t;
3830 	    } else if (wkday == 0 && sun0 < 0) {
3831 		sun0 = t;
3832 	    }
3833 	    if (sat0 >= 0 && sun0 >= 0) {
3834 		break;
3835 	    } else if (dset->pd == 6 && (sat0 >= 0 || sun0 >= 0)) {
3836 		break;
3837 	    }
3838 	}
3839 	all_weekends_blank = 1; /* may be revised below */
3840     } else {
3841 	/* there are no weekend days in 5-day data */
3842 	weekend = 0;
3843     }
3844 
3845     for (t=0; t<dset->n; t++) {
3846 	if (pd > 5) {
3847 	    weekend = is_weekend(t, pd, sat0, sun0);
3848 	}
3849 	all_missing = 1;
3850 	for (i=1; i<dset->v; i++) {
3851 	    if (!na(dset->Z[i][t])) {
3852 		all_missing = 0;
3853 		break;
3854 	    }
3855 	}
3856 	if (weekend) {
3857 	    n_weekend_days++;
3858 	    if (!all_missing) {
3859 		/* not all weekend data are missing */
3860 		all_weekends_blank = 0;
3861 	    }
3862 	} else {
3863 	    n_weekdays++;
3864 	}
3865 	if (all_missing) {
3866 	    if (weekend) {
3867 		blank_weekends++;
3868 	    } else {
3869 		blank_weekdays++;
3870 	    }
3871 	}
3872     }
3873 
3874     if (all_weekends_blank) {
3875 	double misspc = 100.0 * blank_weekdays / (double) n_weekdays;
3876 
3877 	if (pd == 7) {
3878 	    pputs(prn, "This dataset is on 7-day calendar, but weekends are blank.");
3879 	} else {
3880 	    pprintf(prn, "This dataset is on 6-day calendar, but %s are blank.",
3881 		    sat0 >= 0 ? "Sundays" : "Saturdays");
3882 	}
3883 	ret = 1;
3884 	if (misspc > 0.0) {
3885 	    pputc(prn, '\n');
3886 	    pputs(prn, "In addition, ");
3887 	    if (misspc >= 0.01) {
3888 		pprintf(prn, "%.2f percent of weekday observations are missing.",
3889 			misspc);
3890 	    } else {
3891 		pprintf(prn, "%g percent of weekday observations are missing.",
3892 			misspc);
3893 	    }
3894 	    if (misspc < 10.0) {
3895 		ret = 2;
3896 	    }
3897 	}
3898     } else if (blank_weekdays || blank_weekends) {
3899 	int ndays = n_weekdays + n_weekend_days;
3900 	int nmiss = blank_weekdays + blank_weekends;
3901 	double misspc = 100.0 * nmiss / (double) ndays;
3902 
3903 	if (misspc >= 0.01) {
3904 	    pprintf(prn, "%.2f percent of daily observations are missing.",
3905 		    misspc);
3906 	} else {
3907 	    pprintf(prn, "%g percent of daily observations are missing.",
3908 		    misspc);
3909 	}
3910 	if (misspc < 10) {
3911 	    ret = 3;
3912 	}
3913     }
3914 
3915     return ret;
3916 }
3917