1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #include "libgretl.h"
21 #include "uservar.h"
22 #include "dbwrite.h"
23 #include "libset.h"
24 #include "gretl_func.h"
25 #include "gretl_xml.h"
26 #include "gretl_panel.h"
27 #include "gretl_string_table.h"
28 #include "matrix_extra.h"
29 #include "csvdata.h"
30 #include "usermat.h"
31
32 #include <ctype.h>
33 #include <time.h>
34 #include <errno.h>
35 #include <unistd.h>
36
37 #include <glib.h>
38
39 #define MERGE_DEBUG 0
40
41 /**
42 * SECTION:dataio
43 * @short_description: data handling (internal)
44 * @title: Data support
45 * @include: gretl/libgretl.h
46 *
47 * The following data handling functions are basically internal to
48 * gretl and not in a state where they can be readily
49 * documented as public APIs.
50 *
51 */
52
53 typedef enum {
54 GRETL_FMT_GDT, /* standard gretl XML data */
55 GRETL_FMT_BINARY, /* native gretl binary data */
56 GRETL_FMT_OCTAVE, /* data in Gnu Octave format */
57 GRETL_FMT_CSV, /* data in Comma Separated Values format */
58 GRETL_FMT_R, /* data in Gnu R format */
59 GRETL_FMT_DAT, /* data in PcGive format */
60 GRETL_FMT_DB, /* gretl native database format */
61 GRETL_FMT_JM, /* JMulti ascii data */
62 GRETL_FMT_DTA, /* Stata .dta format */
63 GRETL_FMT_JSON /* geojson (maps) */
64 } GretlDataFormat;
65
66 #define IS_DATE_SEP(c) (c == '.' || c == ':' || c == ',')
67
68 #define PROGRESS_BAR "progress_bar"
69
70 /**
71 * get_date_x:
72 * @pd: frequency of data.
73 * @obs: observation string.
74 *
75 * Returns: the floating-point representation of @obs.
76 */
77
get_date_x(int pd,const char * obs)78 double get_date_x (int pd, const char *obs)
79 {
80 double x = 1.0;
81
82 if ((pd == 5 || pd == 6 || pd == 7 || pd == 52) && strlen(obs) > 4) {
83 /* calendar data */
84 guint32 ed = get_epoch_day(obs);
85
86 if (ed > 0) {
87 x = ed;
88 }
89 } else {
90 x = obs_str_to_double(obs);
91 }
92
93 return x;
94 }
95
real_check_varname(const char * vname,int is_series)96 static int real_check_varname (const char *vname,
97 int is_series)
98 {
99 int testchar = 'a';
100 int firstbad = 0;
101 int err = 0;
102
103 gretl_error_clear();
104
105 if (vname == NULL || *vname == '\0') {
106 gretl_errmsg_set("Expected an identifier");
107 return E_PARSE;
108 }
109
110 if (strlen(vname) >= VNAMELEN) {
111 gretl_errmsg_set(_("Varname exceeds the maximum of 31 characters"));
112 err = E_DATA;
113 } else if (strcmp(vname, "return") && gretl_reserved_word(vname)) {
114 err = E_DATA;
115 } else if (!(isalpha((unsigned char) *vname))) {
116 firstbad = 1;
117 testchar = *vname;
118 err = E_DATA;
119 } else if (is_series && (function_lookup(vname) ||
120 get_user_function_by_name(vname) ||
121 is_function_alias(vname))) {
122 gretl_warnmsg_sprintf(_("'%s' shadows a function of the same name"),
123 vname);
124 } else {
125 const char *p = vname;
126
127 while (*p && testchar == 'a') {
128 if (!(isalpha((unsigned char) *p))
129 && !(isdigit((unsigned char) *p))
130 && *p != '_') {
131 testchar = *p;
132 err = E_DATA;
133 }
134 p++;
135 }
136 }
137
138 if (err && !is_series && strlen(vname) == 2 && is_greek_letter(vname)) {
139 return 0;
140 }
141
142 if (testchar != 'a') {
143 if (isprint((unsigned char) testchar)) {
144 if (firstbad) {
145 gretl_errmsg_sprintf(_("First char of varname '%s' is bad\n"
146 "(first must be alphabetical)"),
147 vname);
148 } else {
149 gretl_errmsg_sprintf(_("Varname '%s' contains illegal character '%c'\n"
150 "Use only letters, digits and underscore"),
151 vname, (unsigned char) testchar);
152 }
153 } else {
154 if (firstbad) {
155 gretl_errmsg_sprintf(_("First char of varname (0x%x) is bad\n"
156 "(first must be alphabetical)"),
157 (unsigned) testchar);
158 } else {
159 gretl_errmsg_sprintf(_("Varname contains illegal character 0x%x\n"
160 "Use only letters, digits and underscore"),
161 (unsigned) testchar);
162 }
163 }
164 }
165
166 return err;
167 }
168
169 /**
170 * check_varname:
171 * @varname: putative name for variable (or object).
172 *
173 * Check a variable/object name for legality: the name
174 * must start with a letter, and be composed of letters,
175 * numbers or the underscore character, and nothing else.
176 *
177 * Returns: 0 if name is OK, non-zero if not.
178 */
179
check_varname(const char * varname)180 int check_varname (const char *varname)
181 {
182 return real_check_varname(varname, 1);
183 }
184
check_identifier(const char * varname)185 int check_identifier (const char *varname)
186 {
187 /* FIXME series case? */
188 return real_check_varname(varname, 0);
189 }
190
bad_date_string(const char * s)191 static int bad_date_string (const char *s)
192 {
193 int err = 0;
194
195 gretl_error_clear();
196
197 while (*s && !err) {
198 if (!isdigit((unsigned char) *s) && !IS_DATE_SEP(*s)) {
199 if (isprint((unsigned char) *s)) {
200 gretl_errmsg_sprintf(_("Bad character '%c' in date string"), *s);
201 } else {
202 gretl_errmsg_sprintf(_("Bad character %d in date string"), *s);
203 }
204 err = 1;
205 }
206 s++;
207 }
208
209 return err;
210 }
211
maybe_unquote_label(char * targ,const char * src)212 static void maybe_unquote_label (char *targ, const char *src)
213 {
214 if (*src == '"' || *src == '\'') {
215 int n;
216
217 strcpy(targ, src + 1);
218 n = strlen(targ);
219 if (n > 0 && (targ[n-1] == '"' || targ[n-1] == '\'')) {
220 targ[n-1] = '\0';
221 }
222 } else {
223 strcpy(targ, src);
224 }
225 }
226
get_dot_pos(const char * s)227 static int get_dot_pos (const char *s)
228 {
229 int i, pos = 0;
230
231 for (i=0; *s != '\0'; i++, s++) {
232 if (IS_DATE_SEP(*s)) {
233 pos = i;
234 break;
235 }
236 }
237
238 return pos;
239 }
240
241 #define DATES_DEBUG 0
242
match_obs_marker(const char * s,const DATASET * dset)243 static int match_obs_marker (const char *s, const DATASET *dset)
244 {
245 char test[OBSLEN];
246 int t;
247
248 #if DATES_DEBUG
249 fprintf(stderr, "dateton: checking '%s' against marker strings\n", s);
250 #endif
251
252 maybe_unquote_label(test, s);
253
254 for (t=0; t<dset->n; t++) {
255 if (!strcmp(test, dset->S[t])) {
256 /* handled */
257 return t;
258 }
259 }
260
261 return -1;
262 }
263
caldate_precheck(const char * s,int * y2,int * slashed)264 static int caldate_precheck (const char *s, int *y2, int *slashed)
265 {
266 int n = strlen(s);
267 int ok = 0;
268
269 if (n < 8 || !isdigit(s[0]) || !isdigit(s[1])) {
270 /* can't be date */
271 return 0;
272 }
273
274 if (n == 10) {
275 if (s[4] == '-' && s[7] == '-') {
276 ok = 1; /* could be YYYY-MM-DD */
277 } else if (s[4] == '/' && s[7] == '/') {
278 *slashed = 1; /* could be YYYY/MM/DD */
279 ok = 1;
280 }
281 } else if (n == 8) {
282 if (s[2] == '-' && s[5] == '-') {
283 ok = 1; /* could be YY-MM-DD */
284 } else if (s[2] == '/' && s[5] == '/') {
285 *slashed = 1; /* could be YY/MM/DD */
286 ok = 1;
287 }
288 if (ok) *y2 = 1;
289 }
290
291 return ok;
292 }
293
datecmp(const char * s1,int y21,int slash1,const char * s2,int y22,int slash2)294 static int datecmp (const char *s1, int y21, int slash1,
295 const char *s2, int y22, int slash2)
296 {
297 if (y21 && !y22) {
298 s2 += 2;
299 } else if (!y21 && y22) {
300 s1 += 2;
301 }
302 if (slash1 && !slash2) {
303 char revs1[OBSLEN];
304
305 strcpy(revs1, s1);
306 gretl_charsub(revs1, '/', '-');
307 return strcmp(revs1, s2);
308 } else if (slash2 && !slash1) {
309 char revs2[OBSLEN];
310
311 strcpy(revs2, s2);
312 gretl_charsub(revs2, '/', '-');
313 return strcmp(s1, revs2);
314 } else {
315 return strcmp(s1, s2);
316 }
317 }
318
319 static int
real_dateton(const char * date,const DATASET * dset,int nolimit)320 real_dateton (const char *date, const DATASET *dset, int nolimit)
321 {
322 int handled = 0;
323 int t, n = -1;
324
325 /* first check if this is calendar data and if so,
326 treat accordingly */
327
328 if (calendar_data(dset)) {
329 #if DATES_DEBUG
330 fprintf(stderr, "dateton: treating as calendar data\n");
331 #endif
332 if (dataset_has_markers(dset)) {
333 /* "hard-wired" calendar dates as strings */
334 int tryit, y21 = 0, slash1 = 0;
335 int y22 = 0, slash2 = 0;
336
337 tryit = caldate_precheck(date, &y21, &slash1);
338 if (!tryit) {
339 return -1;
340 }
341 tryit = caldate_precheck(dset->S[0], &y22, &slash2);
342 if (!tryit) {
343 return -1;
344 }
345 for (t=0; t<dset->n; t++) {
346 if (!datecmp(date, y21, slash1, dset->S[t], y22, slash2)) {
347 /* handled */
348 return t;
349 }
350 }
351 /* out of options: abort */
352 return -1;
353 } else {
354 /* automatic calendar dates */
355 n = calendar_obs_number(date, dset);
356 handled = 1;
357 }
358 } else if (dataset_is_daily(dset) ||
359 dataset_is_weekly(dset)) {
360 #if DATES_DEBUG
361 fprintf(stderr, "dateton: trying undated time series\n");
362 #endif
363 t = positive_int_from_string(date);
364 if (t > 0) {
365 n = t - 1;
366 handled = 1;
367 }
368 } else if (dataset_is_decennial(dset)) {
369 t = positive_int_from_string(date);
370 if (t > 0) {
371 n = (t - dset->sd0) / 10;
372 handled = 1;
373 }
374 } else if (dataset_has_markers(dset)) {
375 t = match_obs_marker(date, dset);
376 if (t >= 0) {
377 return t;
378 }
379 /* else maybe just a straight obs number */
380 t = positive_int_from_string(date);
381 if (t > 0) {
382 n = t - 1;
383 handled = 1;
384 }
385 }
386
387 if (!handled) {
388 int pos1, pos2;
389
390 #if DATES_DEBUG
391 fprintf(stderr, "dateton: treating %s as regular numeric obs\n",
392 date);
393 #endif
394 if (bad_date_string(date)) {
395 return -1;
396 }
397
398 pos1 = get_dot_pos(date);
399 pos2 = get_dot_pos(dset->stobs);
400
401 if ((pos1 && !pos2) || (pos2 && !pos1)) {
402 gretl_errmsg_sprintf(_("'%s': invalid observation index"),
403 date);
404 } else if (!pos1 && !pos2) {
405 n = atoi(date) - atoi(dset->stobs);
406 } else if (pos1 > OBSLEN - 2) {
407 gretl_errmsg_sprintf(_("'%s': invalid observation index"),
408 date);
409 } else {
410 char tmp[OBSLEN];
411 int maj, min;
412
413 *tmp = '\0';
414 strncat(tmp, date, OBSLEN-1);
415 tmp[pos1] = '\0';
416 maj = positive_int_from_string(tmp);
417 min = positive_int_from_string(tmp + pos1 + 1);
418
419 if (maj <= 0 || min <= 0 || min > dset->pd) {
420 gretl_errmsg_sprintf(_("'%s': invalid observation index"),
421 date);
422 n = -1;
423 } else {
424 int maj0, min0;
425
426 *tmp = '\0';
427 strncat(tmp, dset->stobs, OBSLEN-1);
428 tmp[pos2] = '\0';
429 maj0 = atoi(tmp);
430 min0 = atoi(tmp + pos2 + 1);
431
432 n = dset->pd * (maj - maj0) + (min - min0);
433 }
434 }
435 }
436
437 if (!nolimit && dset->n > 0 && n >= dset->n) {
438 fprintf(stderr, "n = %d, dset->n = %d: out of bounds\n", n, dset->n);
439 gretl_errmsg_set(_("Observation number out of bounds"));
440 n = -1;
441 }
442
443 #if DATES_DEBUG
444 fprintf(stderr, "dateton: returning %d\n", n);
445 #endif
446
447 return n;
448 }
449
450 /**
451 * dateton:
452 * @date: string representation of date for processing.
453 * @dset: pointer to data information struct.
454 *
455 * Determines the observation number corresponding to @date,
456 * relative to @dset. It is an error if @date represents an
457 * observation that lies outside of the full data range
458 * specified in @dset.
459 *
460 * Returns: zero-based observation number, or -1 on error.
461 */
462
dateton(const char * date,const DATASET * dset)463 int dateton (const char *date, const DATASET *dset)
464 {
465 return real_dateton(date, dset, 0);
466 }
467
468 /**
469 * merge_dateton:
470 * @date: string representation of date for processing.
471 * @dset: pointer to data information struct.
472 *
473 * Works just as dateton(), except that for this function it
474 * is not an error if @date represents an observation that
475 * lies beyond the data range specified in @dset. This is
476 * inended for use when merging data, or when creating a new
477 * dataset.
478 *
479 * Returns: zero-based observation number, or -1 on error.
480 */
481
merge_dateton(const char * date,const DATASET * dset)482 int merge_dateton (const char *date, const DATASET *dset)
483 {
484 return real_dateton(date, dset, 1);
485 }
486
panel_obs(char * s,int t,const DATASET * dset)487 static char *panel_obs (char *s, int t, const DATASET *dset)
488 {
489 int i = t / dset->pd + 1;
490 int j = (t + 1) % dset->pd;
491 int d = 1 + floor(log10(dset->pd));
492
493 if (j == 0) {
494 j = dset->pd;
495 }
496
497 sprintf(s, "%d:%0*d", i, d, j);
498
499 return s;
500 }
501
502 /**
503 * ntolabel:
504 * @datestr: char array to which date is to be printed.
505 * @t: zero-based observation number.
506 * @dset: data information struct.
507 *
508 * Prints to @datestr (which must be at least #OBSLEN bytes)
509 * the calendar representation of observation number @t.
510 *
511 * Returns: the observation string.
512 */
513
ntolabel(char * datestr,int t,const DATASET * dset)514 char *ntolabel (char *datestr, int t, const DATASET *dset)
515 {
516 double x;
517
518 #if 0
519 fprintf(stderr, "ntolabel: t=%d, pd=%d, sd0=%g, incoming stobs='%s'\n",
520 t, dset->pd, dset->sd0, dset->stobs);
521 fprintf(stderr, " calendar_data(dset) %d\n", calendar_data(dset));
522 #endif
523
524 if (calendar_data(dset)) {
525 /* handles both daily and dated weekly data */
526 if (dataset_has_markers(dset)) {
527 strcpy(datestr, dset->S[t]);
528 if (strchr(datestr, '/')) {
529 gretl_charsub(datestr, '/', '-');
530 }
531 } else {
532 calendar_date_string(datestr, t, dset);
533 }
534 return datestr;
535 } else if (dataset_is_daily(dset) ||
536 dataset_is_weekly(dset)) {
537 /* undated time series */
538 x = date_as_double(t, 1, dset->sd0);
539 sprintf(datestr, "%d", (int) x);
540 return datestr;
541 } else if (dataset_is_decennial(dset)) {
542 x = dset->sd0 + 10 * t;
543 sprintf(datestr, "%d", (int) x);
544 return datestr;
545 } else if (dataset_is_panel(dset)) {
546 panel_obs(datestr, t, dset);
547 return datestr;
548 }
549
550 x = date_as_double(t, dset->pd, dset->sd0);
551
552 if (dset->pd == 1) {
553 sprintf(datestr, "%d", (int) x);
554 } else {
555 int pdp = dset->pd;
556 short len = 1;
557 char fmt[10];
558
559 while ((pdp = pdp / 10)) len++;
560 sprintf(fmt, "%%.%df", len);
561 sprintf(datestr, fmt, x);
562 colonize_obs(datestr);
563 }
564
565 return datestr;
566 }
567
568 /* print observation date in ISO 8601 extended format */
569
ntolabel_8601(char * datestr,int t,const DATASET * dset)570 char *ntolabel_8601 (char *datestr, int t, const DATASET *dset)
571 {
572 *datestr = '\0';
573
574 if (calendar_data(dset)) {
575 if (dataset_has_markers(dset)) {
576 strcpy(datestr, dset->S[t]);
577 } else {
578 calendar_date_string(datestr, t, dset);
579 }
580 } else if (dataset_is_decennial(dset)) {
581 double x = dset->sd0 + 10 * t;
582 int yr = lrint(x);
583
584 sprintf(datestr, "%d-01-01", yr);
585 } else {
586 double x = date_as_double(t, dset->pd, dset->sd0);
587 int maj = lrint(floor(x));
588
589 if (dset->pd == 1) {
590 sprintf(datestr, "%d-01-01", maj);
591 } else if (dset->pd == 12) {
592 int min = lrint(100 * (x - floor(x)));
593
594 sprintf(datestr, "%d-%02d-01", maj, min);
595 } else if (dset->pd == 4) {
596 int min = lrint(10 * (x - floor(x)));
597 int mo = min==2 ? 4 : min==3? 7 : min==4? 10 : min;
598
599 sprintf(datestr, "%d-%02d-01", maj, mo);
600 }
601 }
602
603 return datestr;
604 }
605
606 #define xround(x) (((x-floor(x))>.5)? ceil(x) : floor(x))
607
608 /**
609 * get_subperiod:
610 * @t: zero-based observation number.
611 * @dset: data information struct.
612 * @err: location to receive error code, or NULL.
613 *
614 * For "seasonal" time series data (in a broad sense),
615 * determines the sub-period at observation @t. The "sub-period"
616 * might be a quarter, month, hour or whatever. The value
617 * returned is zero-based (e.g. first quarter = 0).
618 * If the data are not "seasonal", 0 is returned and if
619 * @err is non-NULL it receives a non-zero error code.
620 *
621 * Returns: the sub-period.
622 */
623
get_subperiod(int t,const DATASET * dset,int * err)624 int get_subperiod (int t, const DATASET *dset, int *err)
625 {
626 int ret = 0;
627
628 if (!dataset_is_seasonal(dset)) {
629 if (err != NULL) {
630 *err = E_PDWRONG;
631 }
632 return 0;
633 }
634
635 if (dataset_is_weekly(dset)) {
636 /* bodge -- what else to do? */
637 ret = t % dset->pd;
638 } else if (calendar_data(dset)) {
639 /* dated daily data */
640 char datestr[12];
641
642 calendar_date_string(datestr, t, dset);
643 ret = weekday_from_date(datestr);
644 } else if (dataset_is_daily(dset)) {
645 /* bodge, again */
646 ret = t % dset->pd;
647 } else {
648 /* quarterly, monthly, hourly... */
649 double x = date_as_double(t, dset->pd, dset->sd0);
650 int i, d = ceil(log10(dset->pd));
651
652 x -= floor(x);
653 for (i=0; i<d; i++) {
654 x *= 10;
655 }
656 ret = xround(x) - 1;
657 }
658
659 return ret;
660 }
661
662 /**
663 * get_precision:
664 * @x: data array.
665 * @n: length of @x.
666 * @placemax: the maximum number of decimal places to try.
667 *
668 * Find the number of decimal places required to represent a given
669 * data series uniformly and accurately, if possible.
670 *
671 * Returns: the required number of decimal places or
672 * #PMAX_NOT_AVAILABLE if it can't be done.
673 */
674
get_precision(const double * x,int n,int placemax)675 int get_precision (const double *x, int n, int placemax)
676 {
677 int t, p, pmax = 0;
678 char *s, numstr[64];
679 int len, n_ok = 0;
680 double z;
681
682 for (t=0; t<n; t++) {
683 if (!na(x[t])) {
684 z = fabs(x[t]);
685 /* escape clause: numbers are too big or too small for
686 this treatment */
687 if (z > 0 && (z < 1.0e-6 || z > 1.0e+8)) {
688 return PMAX_NOT_AVAILABLE;
689 }
690 n_ok++;
691 }
692 }
693
694 if (n_ok == 0) {
695 return PMAX_NOT_AVAILABLE;
696 }
697
698 for (t=0; t<n; t++) {
699 if (!na(x[t])) {
700 p = placemax;
701 sprintf(numstr, "%.*f", p, fabs(x[t]));
702 /* go to the end and drop trailing zeros */
703 len = strlen(numstr);
704 s = numstr + len - 1;
705 while (*s-- == '0') {
706 p--;
707 len--;
708 }
709 if (len > 10) {
710 /* this is going to be too big */
711 return PMAX_NOT_AVAILABLE;
712 }
713 if (p > pmax) {
714 pmax = p;
715 }
716 }
717 }
718
719 return pmax;
720 }
721
722 struct extmap {
723 GretlFileType ftype;
724 const char *ext;
725 };
726
727 static struct extmap data_ftype_map[] = {
728 { GRETL_XML_DATA, ".gdt" },
729 { GRETL_BINARY_DATA, ".gdtb" },
730 { GRETL_CSV, ".csv" },
731 { GRETL_OCTAVE, ".m" },
732 { GRETL_GNUMERIC, ".gnumeric" },
733 { GRETL_XLS, ".xls" },
734 { GRETL_XLSX, ".xlsx" },
735 { GRETL_ODS, ".ods" },
736 { GRETL_WF1, ".wf1" },
737 { GRETL_DTA, ".dta" },
738 { GRETL_SAV, ".sav" },
739 { GRETL_SAS, ".xpt" },
740 { GRETL_JMULTI, ".dat" }
741 };
742
743 static const char *map_suffixes[] = {
744 ".json",
745 ".geojson",
746 ".dbf",
747 ".shp"
748 };
749
get_filename_extension(const char * fname)750 static const char *get_filename_extension (const char *fname)
751 {
752 const char *ext = strrchr(fname, '.');
753
754 if (ext != NULL && strchr(ext, '/')) {
755 /* the rightmost dot is not in the basename */
756 ext = NULL;
757 }
758
759 #ifdef WIN32
760 if (ext != NULL && strchr(ext, '\\')) {
761 ext = NULL;
762 }
763 #endif
764
765 return ext;
766 }
767
data_file_type_from_extension(const char * ext)768 static GretlFileType data_file_type_from_extension (const char *ext)
769 {
770 int i, n = G_N_ELEMENTS(data_ftype_map);
771
772 for (i=0; i<n; i++) {
773 if (!g_ascii_strcasecmp(ext, data_ftype_map[i].ext)) {
774 return data_ftype_map[i].ftype;
775 }
776 }
777
778 /* a few extras */
779 if (!g_ascii_strcasecmp(ext, ".txt") ||
780 !g_ascii_strcasecmp(ext, ".asc") ||
781 !g_ascii_strcasecmp(ext, ".gz")) {
782 return GRETL_CSV;
783 }
784
785 /* map metadata */
786 n = G_N_ELEMENTS(map_suffixes);
787 for (i=0; i<n; i++) {
788 if (!g_ascii_strcasecmp(ext, map_suffixes[i])) {
789 return GRETL_MAP;
790 }
791 }
792
793 return GRETL_UNRECOGNIZED;
794 }
795
data_file_type_from_name(const char * fname)796 GretlFileType data_file_type_from_name (const char *fname)
797 {
798 const char *ext = strrchr(fname, '.');
799
800 if (ext != NULL && strchr(ext, '/')) {
801 /* the rightmost dot is not in the basename */
802 ext = NULL;
803 }
804
805 #ifdef WIN32
806 if (ext != NULL && strchr(ext, '\\')) {
807 ext = NULL;
808 }
809 #endif
810
811 if (ext != NULL) {
812 return data_file_type_from_extension(ext);
813 }
814
815 return GRETL_UNRECOGNIZED;
816 }
817
818 #define non_native(o) (o & (OPT_M | OPT_R | OPT_D | OPT_G | OPT_J))
819
820 static GretlDataFormat
format_from_opt_or_name(gretlopt opt,const char * fname,char * delim,int * add_ext,int * gzip,int * err)821 format_from_opt_or_name (gretlopt opt, const char *fname,
822 char *delim, int *add_ext,
823 int *gzip, int *err)
824 {
825 GretlDataFormat fmt = GRETL_FMT_GDT;
826
827 if (has_suffix(fname, ".gdt")) {
828 if (non_native(opt)) {
829 *err = E_BADOPT;
830 }
831 return GRETL_FMT_GDT;
832 } else if (has_suffix(fname, ".gdtb")) {
833 if (non_native(opt)) {
834 *err = E_BADOPT;
835 }
836 return GRETL_FMT_BINARY;
837 } else if (has_suffix(fname, ".geojson")) {
838 return GRETL_FMT_JSON;
839 }
840
841 if (opt & OPT_M) {
842 fmt = GRETL_FMT_OCTAVE;
843 } else if (opt & OPT_R) {
844 fmt = GRETL_FMT_R;
845 } else if (opt & OPT_D) {
846 fmt = GRETL_FMT_DB;
847 } else if (opt & OPT_G) {
848 fmt = GRETL_FMT_DAT;
849 } else if (opt & OPT_J) {
850 fmt = GRETL_FMT_JM;
851 }
852
853 if (fmt == GRETL_FMT_GDT) {
854 if (has_suffix(fname, ".R")) {
855 fmt = GRETL_FMT_R;
856 } else if (has_suffix(fname, ".csv")) {
857 fmt = GRETL_FMT_CSV;
858 } else if (has_suffix(fname, ".csv.gz")) {
859 fmt = GRETL_FMT_CSV;
860 *gzip = 1;
861 } else if (has_suffix(fname, ".m")) {
862 fmt = GRETL_FMT_OCTAVE;
863 } else if (has_suffix(fname, ".txt") ||
864 has_suffix(fname, ".asc")) {
865 fmt = GRETL_FMT_CSV;
866 *delim = ' ';
867 } else if (has_suffix(fname, ".dta")) {
868 fmt = GRETL_FMT_DTA;
869 } else if (has_suffix(fname, ".bin")) {
870 fmt = GRETL_FMT_DB;
871 }
872 }
873
874 if (fmt == GRETL_FMT_GDT) {
875 *add_ext = 1;
876 }
877
878 return fmt;
879 }
880
date_maj_min(int t,const DATASET * dset,int * maj,int * min)881 void date_maj_min (int t, const DATASET *dset, int *maj, int *min)
882 {
883 char obs[OBSLEN];
884
885 ntolabel(obs, t, dset);
886
887 if (maj != NULL) {
888 *maj = atoi(obs);
889 }
890
891 if (min != NULL) {
892 char *s, sep = ':';
893
894 if (strchr(obs, sep) == NULL) {
895 if (dset->pd == 4 && strchr(obs, 'Q')) {
896 sep = 'Q';
897 } else if (dset->pd == 12 && strchr(obs, 'M')) {
898 sep = 'M';
899 }
900 }
901
902 s = strchr(obs, sep);
903 if (s != NULL && strlen(s) > 1) {
904 *min = atoi(s + 1);
905 } else {
906 *min = 1;
907 }
908 }
909 }
910
911 #define NO_PMAX(p,k) (p == NULL || p[k-1] == PMAX_NOT_AVAILABLE)
912
913 #define TMPLEN 64
914
csv_data_out(const DATASET * dset,const int * list,int print_obs,int digits,char decpoint,char delim,FILE * fp,gzFile fz)915 static void csv_data_out (const DATASET *dset, const int *list,
916 int print_obs, int digits, char decpoint,
917 char delim, FILE *fp, gzFile fz)
918 {
919 const char *NA = get_csv_na_write_string();
920 char tmp[TMPLEN];
921 double xt;
922 int popit = 0, dotsub = 0;
923 int t, i, vi;
924
925 if (decpoint == '.' && get_local_decpoint() == ',') {
926 gretl_push_c_numeric_locale();
927 popit = 1;
928 } else if (decpoint == ',' && get_local_decpoint() == '.') {
929 dotsub = 1;
930 }
931
932 for (t=dset->t1; t<=dset->t2; t++) {
933 if (print_obs) {
934 if (dset->S != NULL) {
935 if (fz != NULL) {
936 gzprintf(fz, "\"%s\"%c", dset->S[t], delim);
937 } else {
938 fprintf(fp, "\"%s\"%c", dset->S[t], delim);
939 }
940 } else {
941 ntolabel(tmp, t, dset);
942 if (quarterly_or_monthly(dset)) {
943 modify_date_for_csv(tmp, dset->pd);
944 }
945 if (fz != NULL) {
946 gzprintf(fz, "%s%c", tmp, delim);
947 } else {
948 fprintf(fp, "%s%c", tmp, delim);
949 }
950 }
951 }
952
953 for (i=1; i<=list[0]; i++) {
954 vi = list[i];
955 xt = dset->Z[vi][t];
956 if (na(xt)) {
957 fputs(NA, fp);
958 } else {
959 if (is_string_valued(dset, vi)) {
960 const char *st;
961
962 st = series_get_string_for_obs(dset, vi, t);
963 if (st != NULL) {
964 *tmp = '\0';
965 strcat(tmp, "\"");
966 strncat(tmp, st, TMPLEN - 3);
967 strcat(tmp, "\"");
968 } else {
969 fprintf(stderr, "missing string at t=%d, vi=%d, xt=%g\n",
970 t, vi, xt);
971 strcpy(tmp, "\"NA\"");
972 }
973 } else if (series_is_coded(dset, vi)) {
974 sprintf(tmp, "\"%d\"", (int) xt);
975 } else {
976 sprintf(tmp, "%.*g", digits, xt);
977 }
978 if (dotsub) {
979 gretl_charsub(tmp, '.', ',');
980 }
981 if (fz != NULL) {
982 gzputs(fz, tmp);
983 } else {
984 fputs(tmp, fp);
985 }
986 }
987 if (fz != NULL) {
988 gzputc(fz, i < list[0] ? delim : '\n');
989 } else {
990 fputc(i < list[0] ? delim : '\n', fp);
991 }
992 }
993 }
994
995 if (popit) {
996 gretl_pop_c_numeric_locale();
997 }
998 }
999
markers_are_unique(const DATASET * dset)1000 static int markers_are_unique (const DATASET *dset)
1001 {
1002 int t, s;
1003
1004 for (t=dset->t1; t<dset->t2; t++) {
1005 for (s=t+1; s<=dset->t2; s++) {
1006 if (strcmp(dset->S[t], dset->S[s]) == 0) {
1007 return 0;
1008 }
1009 }
1010 }
1011
1012 return 1;
1013 }
1014
R_data_out(const DATASET * dset,const int * list,int digits,FILE * fp)1015 static void R_data_out (const DATASET *dset, const int *list,
1016 int digits, FILE *fp)
1017 {
1018 int print_markers = 0;
1019 double xt;
1020 int t, i, vi;
1021
1022 if (dset->S != NULL) {
1023 print_markers = markers_are_unique(dset);
1024 }
1025
1026 for (t=dset->t1; t<=dset->t2; t++) {
1027 if (print_markers) {
1028 fprintf(fp, "\"%s\" ", dset->S[t]);
1029 }
1030 for (i=1; i<=list[0]; i++) {
1031 vi = list[i];
1032 xt = dset->Z[vi][t];
1033 if (na(xt)) {
1034 fputs("NA", fp);
1035 } else if (is_string_valued(dset, vi)) {
1036 fprintf(fp, "\"%s\"", series_get_string_for_obs(dset, vi, t));
1037 } else if (series_is_coded(dset, vi)) {
1038 fprintf(fp, "\"%d\"", (int) xt);
1039 } else {
1040 fprintf(fp, "%.*g", digits, xt);
1041 }
1042 fputc(i < list[0] ? ' ' : '\n', fp);
1043 }
1044 }
1045 }
1046
write_dta_data(const char * fname,const int * list,gretlopt opt,const DATASET * dset)1047 static int write_dta_data (const char *fname, const int *list,
1048 gretlopt opt, const DATASET *dset)
1049 {
1050 int (*exporter) (const char *, const int *, gretlopt,
1051 const DATASET *);
1052 int err = 0;
1053
1054 exporter = get_plugin_function("stata_export");
1055
1056 if (exporter == NULL) {
1057 err = 1;
1058 } else {
1059 err = (*exporter)(fname, list, opt, dset);
1060 }
1061
1062 return err;
1063 }
1064
write_map_data(const char * fname,const int * list,const DATASET * dset)1065 static int write_map_data (const char *fname,
1066 const int *list,
1067 const DATASET *dset)
1068 {
1069 gretl_bundle *b = NULL;
1070 int err = 0;
1071
1072 b = get_current_map(dset, list, &err);
1073
1074 if (!err) {
1075 err = gretl_bundle_write_to_file(b, fname, 0);
1076 }
1077
1078 gretl_bundle_destroy(b);
1079
1080 return err;
1081 }
1082
1083 #define DEFAULT_CSV_DIGITS 15
1084
real_write_data(const char * fname,int * list,const DATASET * dset,gretlopt opt,int progress,PRN * prn)1085 static int real_write_data (const char *fname, int *list,
1086 const DATASET *dset, gretlopt opt,
1087 int progress, PRN *prn)
1088 {
1089 int i, t, v, l0;
1090 GretlDataFormat fmt;
1091 char datfile[MAXLEN];
1092 int n = dset->n;
1093 int pop_locale = 0;
1094 char delim = 0;
1095 FILE *fp = NULL;
1096 gzFile fz = NULL;
1097 int freelist = 0;
1098 int csv_digits = 0;
1099 int add_ext = 0;
1100 int gzip = 0;
1101 double xx;
1102 int err = 0;
1103
1104 gretl_error_clear();
1105
1106 if (list != NULL && list[0] == 0) {
1107 return E_ARGS;
1108 }
1109
1110 fmt = format_from_opt_or_name(opt, fname, &delim, &add_ext,
1111 &gzip, &err);
1112 if (err) {
1113 return err;
1114 }
1115
1116 if (list == NULL) {
1117 list = full_var_list(dset, &l0);
1118 if (l0 == 0) {
1119 return E_ARGS;
1120 } else if (list == NULL) {
1121 return E_ALLOC;
1122 } else {
1123 freelist = 1;
1124 }
1125 }
1126
1127 l0 = list[0];
1128 fname = gretl_maybe_switch_dir(fname);
1129
1130 if (fmt == GRETL_FMT_GDT || fmt == GRETL_FMT_BINARY) {
1131 /* write native data file (.gdt or .gdtb) */
1132 err = gretl_write_gdt(fname, list, dset, opt, progress);
1133 goto write_exit;
1134 }
1135
1136 if (fmt == GRETL_FMT_DB) {
1137 /* native type database file */
1138 err = write_db_data(fname, list, opt, dset);
1139 goto write_exit;
1140 }
1141
1142 if (fmt == GRETL_FMT_DTA) {
1143 /* Stata */
1144 err = write_dta_data(fname, list, opt, dset);
1145 goto write_exit;
1146 }
1147
1148 if (fmt == GRETL_FMT_JSON) {
1149 /* writing map as geojson */
1150 err = write_map_data(fname, list, dset);
1151 goto write_exit;
1152 }
1153
1154 strcpy(datfile, fname);
1155
1156 /* open file for output */
1157 if (gzip) {
1158 fz = gretl_gzopen(datfile, "wb1");
1159 } else {
1160 fp = gretl_fopen(datfile, "wb");
1161 }
1162 if (fp == NULL && fz == NULL) {
1163 err = E_FOPEN;
1164 goto write_exit;
1165 }
1166
1167 csv_digits = libset_get_int(CSV_DIGITS);
1168
1169 if (csv_digits <= 0) {
1170 csv_digits = DEFAULT_CSV_DIGITS;
1171 }
1172
1173 if (fmt != GRETL_FMT_CSV) {
1174 /* ensure C locale for data output */
1175 gretl_push_c_numeric_locale();
1176 pop_locale = 1;
1177 }
1178
1179 if (fmt == GRETL_FMT_CSV) {
1180 const char *msg = get_optval_string(STORE, OPT_E);
1181 char decpoint = get_data_export_decpoint();
1182 int print_obs = 0;
1183
1184 if (opt & OPT_I) {
1185 /* the CSV --decimal-comma option */
1186 decpoint = ',';
1187 delim = ';';
1188 } else if (delim == 0) {
1189 delim = get_data_export_delimiter();
1190 }
1191
1192 if (msg != NULL && *msg != '\0') {
1193 if (gzip) {
1194 gzprintf(fz, "# %s\n", msg);
1195 } else {
1196 fprintf(fp, "# %s\n", msg);
1197 }
1198 }
1199
1200 if (!(opt & OPT_X)) {
1201 /* OPT_X prohibits printing of observation strings */
1202 print_obs = dataset_is_time_series(dset) || dset->S != NULL;
1203 }
1204
1205 if (!(opt & OPT_N)) {
1206 /* header: variable names */
1207 if (print_obs && (dset->S != NULL || dset->structure != CROSS_SECTION)) {
1208 if (gzip) {
1209 gzprintf(fz, "obs%c", delim);
1210 } else {
1211 fprintf(fp, "obs%c", delim);
1212 }
1213 }
1214 for (i=1; i<l0; i++) {
1215 if (gzip) {
1216 gzprintf(fz, "%s%c", dset->varname[list[i]], delim);
1217 } else {
1218 fprintf(fp, "%s%c", dset->varname[list[i]], delim);
1219 }
1220 }
1221 if (gzip) {
1222 gzprintf(fz, "%s\n", dset->varname[list[l0]]);
1223 } else {
1224 fprintf(fp, "%s\n", dset->varname[list[l0]]);
1225 }
1226 }
1227
1228 csv_data_out(dset, list, print_obs, csv_digits,
1229 decpoint, delim, fp, fz);
1230 } else if (fmt == GRETL_FMT_R) {
1231 /* friendly to GNU R */
1232 if (dataset_is_time_series(dset)) {
1233 char datestr[OBSLEN];
1234
1235 ntolabel(datestr, dset->t1, dset);
1236 fprintf(fp, "# time-series data: start = %s, frequency = %d\n",
1237 datestr, dset->pd);
1238 }
1239
1240 for (i=1; i<l0; i++) {
1241 fprintf(fp, "%s ", dset->varname[list[i]]);
1242 }
1243 fprintf(fp, "%s\n", dset->varname[list[l0]]);
1244
1245 R_data_out(dset, list, csv_digits, fp);
1246 } else if (fmt == GRETL_FMT_OCTAVE) {
1247 /* GNU Octave: write out data as several matrices (one per
1248 series) in the same file */
1249
1250 for (i=1; i<=list[0]; i++) {
1251 v = list[i];
1252 fprintf(fp, "# name: %s\n# type: matrix\n# rows: %d\n# columns: 1\n",
1253 dset->varname[v], n);
1254 for (t=dset->t1; t<=dset->t2; t++) {
1255 xx = dset->Z[v][t];
1256 if (na(xx)) {
1257 fputs("NaN ", fp);
1258 } else {
1259 fprintf(fp, "%.*g\n", csv_digits, xx);
1260 }
1261 }
1262 }
1263 } else if (fmt == GRETL_FMT_DAT) {
1264 /* PcGive: data file with load info */
1265 int pd = dset->pd;
1266
1267 for (i=1; i<=list[0]; i++) {
1268 fprintf(fp, ">%s ", dset->varname[list[i]]);
1269 if (dset->structure == TIME_SERIES &&
1270 (pd == 1 || pd == 4 || pd == 12)) {
1271 int maj, min;
1272
1273 date_maj_min(dset->t1, dset, &maj, &min);
1274 fprintf(fp, "%d %d ", maj, min);
1275 date_maj_min(dset->t2, dset, &maj, &min);
1276 fprintf(fp, "%d %d %d", maj, min, pd);
1277 } else {
1278 fprintf(fp, "%d 1 %d 1 1", dset->t1, dset->t2);
1279 }
1280
1281 fputc('\n', fp);
1282
1283 for (t=dset->t1; t<=dset->t2; t++) {
1284 v = list[i];
1285 xx = dset->Z[v][t];
1286 if (na(xx)) {
1287 fprintf(fp, "-9999.99");
1288 } else {
1289 fprintf(fp, "%.*g", csv_digits, xx);
1290 }
1291 fputc('\n', fp);
1292 }
1293 fputc('\n', fp);
1294 }
1295 } else if (fmt == GRETL_FMT_JM) {
1296 /* JMulti: ascii with comments and date info */
1297 const char *vlabel;
1298 int maj, min;
1299
1300 fputs("/*\n", fp);
1301 for (i=1; i<=list[0]; i++) {
1302 v = list[i];
1303 vlabel = series_get_label(dset, v);
1304 fprintf(fp, " %s: %s\n", dset->varname[v],
1305 vlabel == NULL ? "" : vlabel);
1306 }
1307 fputs("*/\n", fp);
1308 date_maj_min(dset->t1, dset, &maj, &min);
1309 if (dset->pd == 4 || dset->pd == 12) {
1310 fprintf(fp, "<%d %c%d>\n", maj, (dset->pd == 4)? 'Q' : 'M', min);
1311 } else if (dset->pd == 1) {
1312 fprintf(fp, "<%d>\n", maj);
1313 } else {
1314 fputs("<1>\n", fp);
1315 }
1316 for (i=1; i<=list[0]; i++) {
1317 v = list[i];
1318 fprintf(fp, " %s", dset->varname[v]);
1319 }
1320 fputc('\n', fp);
1321 for (t=dset->t1; t<=dset->t2; t++) {
1322 for (i=1; i<=list[0]; i++) {
1323 v = list[i];
1324 if (na(dset->Z[v][t])) {
1325 fputs("NaN ", fp);
1326 } else {
1327 fprintf(fp, "%.*g ", csv_digits, dset->Z[v][t]);
1328 }
1329 }
1330 fputc('\n', fp);
1331 }
1332 }
1333
1334 if (pop_locale) {
1335 gretl_pop_c_numeric_locale();
1336 }
1337
1338 if (fz != NULL) {
1339 gzclose(fz);
1340 } else if (fp != NULL) {
1341 fclose(fp);
1342 }
1343
1344 write_exit:
1345
1346 if (!err && prn != NULL) {
1347 if (add_ext) {
1348 pprintf(prn, _("wrote %s.gdt\n"), fname);
1349 } else {
1350 pprintf(prn, _("wrote %s\n"), fname);
1351 }
1352 }
1353
1354 if (freelist) {
1355 free(list);
1356 }
1357
1358 return err;
1359 }
1360
1361 /**
1362 * write_data:
1363 * @fname: name of file to write.
1364 * @list: list of variables to write (or %NULL to write all series).
1365 * @dset: dataset struct.
1366 * @opt: option flag indicating format in which to write the data.
1367 * @prn: gretl printer or NULL.
1368 *
1369 * Write out a data file containing the values of the given set
1370 * of variables.
1371 *
1372 * Returns: 0 on successful completion, non-zero on error.
1373 */
1374
write_data(const char * fname,int * list,const DATASET * dset,gretlopt opt,PRN * prn)1375 int write_data (const char *fname, int *list, const DATASET *dset,
1376 gretlopt opt, PRN *prn)
1377 {
1378 return real_write_data(fname, list, dset, opt, 0, prn);
1379 }
1380
gui_write_data(const char * fname,int * list,const DATASET * dset,gretlopt opt)1381 int gui_write_data (const char *fname, int *list, const DATASET *dset,
1382 gretlopt opt)
1383 {
1384 return real_write_data(fname, list, dset, opt, 1, NULL);
1385 }
1386
1387 /**
1388 * is_gzipped:
1389 * @fname: filename to examine.
1390 *
1391 * Determine if the given file is gzipped.
1392 *
1393 * Returns: 1 in case of a gzipped file, 0 if not gzipped or
1394 * inaccessible.
1395 *
1396 */
1397
is_gzipped(const char * fname)1398 int is_gzipped (const char *fname)
1399 {
1400 FILE *fp;
1401 int gz = 0;
1402
1403 if (fname == NULL || *fname == '\0') {
1404 return 0;
1405 }
1406
1407 fp = gretl_fopen(fname, "rb");
1408 if (fp == NULL) {
1409 return 0;
1410 }
1411
1412 if (fgetc(fp) == 037 && fgetc(fp) == 0213) {
1413 gz = 1;
1414 }
1415
1416 fclose(fp);
1417
1418 return gz;
1419 }
1420
1421 /**
1422 * gretl_get_data:
1423 * @fname: name of file to try.
1424 * @dset: dataset struct.
1425 * @opt: option flags.
1426 * @prn: where messages should be written.
1427 *
1428 * Read "native" data from file into gretl's work space,
1429 * allocating space as required. This function handles
1430 * both native XML data format and native binary format.
1431 * It also handles incomplete information: it can perform
1432 * path-searching on @fname, and will try adding the .gdt
1433 * or .gdtb extension to @fname if this is not given.
1434 *
1435 * Note that a more straightforward function for reading a
1436 * native gretl data file, given the correct path, is
1437 * gretl_read_gdt().
1438 *
1439 * The only applicable option is that @opt may contain
1440 * OPT_T when appending data to a panel dataset: in
1441 * that case we try to interpret the new data as time
1442 * series, in common across all panel units. In most
1443 * cases, just give OPT_NONE.
1444 *
1445 * Returns: 0 on successful completion, non-zero otherwise.
1446 */
1447
gretl_get_data(char * fname,DATASET * dset,gretlopt opt,PRN * prn)1448 int gretl_get_data (char *fname, DATASET *dset,
1449 gretlopt opt, PRN *prn)
1450 {
1451 gretlopt append_opt = OPT_NONE;
1452 int gdtsuff;
1453 char *test;
1454 int err = 0;
1455
1456 gretl_error_clear();
1457
1458 #if 0
1459 fprintf(stderr, "gretl_get_data: calling addpath\n");
1460 #endif
1461
1462 test = gretl_addpath(fname, 0);
1463 if (test == NULL) {
1464 return E_FOPEN;
1465 }
1466
1467 gdtsuff = has_native_data_suffix(fname);
1468
1469 if (opt & OPT_T) {
1470 append_opt = OPT_T;
1471 }
1472
1473 if (gdtsuff) {
1474 /* specific processing for gretl datafiles */
1475 err = gretl_read_gdt(fname, dset, append_opt, prn);
1476 } else {
1477 /* try fallback to a "csv"-type import */
1478 err = import_csv(fname, dset, append_opt, prn);
1479 }
1480
1481 return err;
1482 }
1483
1484 /**
1485 * open_nulldata:
1486 * @dset: dataset struct.
1487 * @data_status: indicator for whether a data file is currently open
1488 * in gretl's work space (1) or not (0).
1489 * @length: desired length of data series.
1490 * @opt: may contain OPT_N to suppress addition of an index series.
1491 * @prn: gretl printing struct.
1492 *
1493 * Create an empty "dummy" data set, suitable for simulations.
1494 *
1495 * Returns: 0 on successful completion, non-zero otherwise.
1496 *
1497 */
1498
open_nulldata(DATASET * dset,int data_status,int length,gretlopt opt,PRN * prn)1499 int open_nulldata (DATASET *dset, int data_status, int length,
1500 gretlopt opt, PRN *prn)
1501 {
1502 int t;
1503
1504 /* clear any existing data info */
1505 if (data_status) {
1506 clear_datainfo(dset, CLEAR_FULL);
1507 }
1508
1509 /* dummy up the data info */
1510 dset->n = length;
1511 dset->v = (opt & OPT_N)? 1 : 2;
1512 dataset_obs_info_default(dset);
1513
1514 if (dataset_allocate_varnames(dset)) {
1515 return E_ALLOC;
1516 }
1517
1518 /* allocate data storage */
1519 if (allocate_Z(dset, 0)) {
1520 return E_ALLOC;
1521 }
1522
1523 if (dset->v > 1) {
1524 /* add an index var */
1525 strcpy(dset->varname[1], "index");
1526 series_set_label(dset, 1, _("index variable"));
1527 for (t=0; t<dset->n; t++) {
1528 dset->Z[1][t] = (double) (t + 1);
1529 }
1530 }
1531
1532 if (prn != NULL && gretl_messages_on()) {
1533 /* print basic info */
1534 pprintf(prn, _("periodicity: %d, maxobs: %d\n"
1535 "observations range: %s to %s\n"),
1536 dset->pd, dset->n, dset->stobs, dset->endobs);
1537 }
1538
1539 /* Set sample range to entire length of data-set by default */
1540 dset->t1 = 0;
1541 dset->t2 = dset->n - 1;
1542
1543 return 0;
1544 }
1545
extend_markers(DATASET * dset,int old_n,int new_n)1546 static int extend_markers (DATASET *dset, int old_n, int new_n)
1547 {
1548 char **S = realloc(dset->S, new_n * sizeof *S);
1549 int t, err = 0;
1550
1551 if (S == NULL) {
1552 err = 1;
1553 } else {
1554 dset->S = S;
1555 for (t=old_n; t<new_n && !err; t++) {
1556 S[t] = malloc(OBSLEN);
1557 if (S[t] == NULL) {
1558 err = 1;
1559 }
1560 }
1561 }
1562
1563 return err;
1564 }
1565
merge_error(const char * msg,PRN * prn)1566 static void merge_error (const char *msg, PRN *prn)
1567 {
1568 pputs(prn, msg);
1569 if (!printing_to_standard_stream(prn)) {
1570 gretl_errmsg_set(msg);
1571 }
1572 }
1573
merge_name_error(const char * objname,PRN * prn)1574 static void merge_name_error (const char *objname, PRN *prn)
1575 {
1576 gchar *msg;
1577
1578 msg = g_strdup_printf("Can't replace %s with a series", objname);
1579 pprintf(prn, "%s\n", msg);
1580 if (!printing_to_standard_stream(prn)) {
1581 gretl_errmsg_set(msg);
1582 }
1583 g_free(msg);
1584 }
1585
count_new_vars(const DATASET * d1,const DATASET * d2,PRN * prn)1586 static int count_new_vars (const DATASET *d1, const DATASET *d2,
1587 PRN *prn)
1588 {
1589 const char *vname;
1590 int addvars = d2->v - 1;
1591 int i, j;
1592
1593 /* We start by assuming that all the series in @d2 are new,
1594 then subtract those we find to be already present. We also
1595 check for collision between the names of series to be added and
1596 the names of existing objects other than series.
1597 */
1598
1599 for (i=1; i<d2->v && addvars >= 0; i++) {
1600 vname = d2->varname[i];
1601 if (gretl_is_user_var(vname)) {
1602 merge_name_error(vname, prn);
1603 addvars = -1;
1604 } else if (gretl_function_depth() > 0) {
1605 if (current_series_index(d1, vname) > 0) {
1606 addvars--;
1607 }
1608 } else {
1609 for (j=1; j<d1->v; j++) {
1610 if (!strcmp(vname, d1->varname[j])) {
1611 addvars--;
1612 break;
1613 }
1614 }
1615 }
1616 }
1617
1618 #if MERGE_DEBUG
1619 if (gretl_function_depth() == 0) {
1620 int found;
1621
1622 for (i=1; i<d2->v; i++) {
1623 found = 0;
1624 for (j=1; j<d1->v && !found; j++) {
1625 if (!strcmp(d2->varname[i], d1->varname[j])) {
1626 found = 1;
1627 }
1628 }
1629 if (!found) {
1630 fprintf(stderr, "'%s' in import but not current dataset\n",
1631 d2->varname[i]);
1632 }
1633 }
1634 }
1635 #endif
1636
1637 return addvars;
1638 }
1639
year_special_markers(const DATASET * dset,const DATASET * addset)1640 static int year_special_markers (const DATASET *dset,
1641 const DATASET *addset)
1642 {
1643 char *test;
1644 int overlap = 0;
1645 int i, t, err = 0;
1646
1647 /* See if we can match obs markers in @addset
1648 against years in @dset: we'll try this if all
1649 the markers in addset are integer strings, at
1650 least some of them are within the obs range of
1651 @dset, and none of them are outside of the
1652 "sanity" range of 1 to 2500.
1653 */
1654
1655 if (!dataset_is_time_series(dset) || dset->pd != 1) {
1656 return 0;
1657 }
1658
1659 if (dset->markers || !addset->markers) {
1660 return 0;
1661 }
1662
1663 errno = 0;
1664
1665 for (i=0; i<addset->n; i++) {
1666 t = strtol(addset->S[i], &test, 10);
1667 if (*test || errno) {
1668 errno = 0;
1669 err = 1;
1670 break;
1671 }
1672 if (t < 1 || t > 2500) {
1673 err = 1;
1674 break;
1675 }
1676 if (!overlap) {
1677 t = dateton(addset->S[i], dset);
1678 if (t >= 0 && t < dset->n) {
1679 overlap = 1;
1680 }
1681 }
1682 }
1683
1684 return !err && overlap;
1685 }
1686
compare_ranges(const DATASET * targ,const DATASET * src,int newvars,int * offset,int * yrspecial,int * err)1687 static int compare_ranges (const DATASET *targ,
1688 const DATASET *src,
1689 int newvars,
1690 int *offset,
1691 int *yrspecial,
1692 int *err)
1693 {
1694 int ed0 = dateton(targ->endobs, targ);
1695 int sd1, ed1, addobs = -1;
1696 int range_err = 0;
1697
1698 if (dataset_is_cross_section(targ) &&
1699 dataset_is_cross_section(src) &&
1700 !(targ->markers && src->markers)) {
1701 if (newvars == 0) {
1702 if (src->markers) {
1703 /* pass the problem on to just_append_rows */
1704 return 0;
1705 } else {
1706 /* assume the new data should be appended length-wise */
1707 *offset = ed0 + 1;
1708 return src->n;
1709 }
1710 } else {
1711 /* we've already determined that the series length in
1712 @src doesn't match either the full series length or
1713 the current sample range in @targ; we therefore have
1714 no information with which to match rows for new
1715 series
1716 */
1717 gretl_errmsg_set(_("append: don't know how to align the new series!"));
1718 *err = E_DATA;
1719 return -1;
1720 }
1721 }
1722
1723 sd1 = merge_dateton(src->stobs, targ);
1724 ed1 = merge_dateton(src->endobs, targ);
1725
1726 #if DATES_DEBUG
1727 fprintf(stderr, "compare_ranges:\n"
1728 " targ->n = %d, src->n = %d\n"
1729 " targ->stobs = '%s', src->stobs = '%s'\n"
1730 " sd1 = %d, ed1 = %d\n",
1731 targ->n, src->n, targ->stobs, src->stobs,
1732 sd1, ed1);
1733 #endif
1734
1735 if (sd1 < 0) {
1736 /* case: new data start earlier than old */
1737 if (ed1 < 0) {
1738 range_err = 1;
1739 } else if (ed1 > ed0) {
1740 range_err = 2;
1741 } else {
1742 *offset = sd1;
1743 addobs = 0;
1744 }
1745 } else if (sd1 == 0 && ed1 == ed0) {
1746 /* case: exact match of ranges */
1747 *offset = 0;
1748 addobs = 0;
1749 } else if (sd1 == 0) {
1750 /* case: starting obs the same */
1751 *offset = 0;
1752 if (ed1 > ed0) {
1753 addobs = ed1 - ed0;
1754 } else {
1755 addobs = 0;
1756 }
1757 } else if (sd1 == ed0 + 1) {
1758 /* case: new data start right after end of old */
1759 *offset = sd1;
1760 addobs = src->n;
1761 } else if (sd1 > 0) {
1762 /* case: new data start later than old */
1763 if (sd1 <= ed0) {
1764 /* but there's some overlap */
1765 *offset = sd1;
1766 if (ed1 > ed0) {
1767 addobs = ed1 - ed0;
1768 } else {
1769 addobs = 0;
1770 }
1771 }
1772 }
1773
1774 if (range_err) {
1775 /* try another approach? */
1776 *yrspecial = year_special_markers(targ, src);
1777 if (*yrspecial) {
1778 addobs = 0;
1779 }
1780 }
1781
1782 if (addobs < 0) {
1783 if (range_err == 1) {
1784 fputs("compare_ranges: no overlap, can't merge\n", stderr);
1785 } else if (range_err == 2) {
1786 fputs("compare ranges: new data start earlier, end later\n", stderr);
1787 } else {
1788 fputs("compare_ranges: flagging error\n", stderr);
1789 }
1790 }
1791
1792 return addobs;
1793 }
1794
1795 /* Determine whether there's any overlap between the calendar
1796 in @addset and that in @dset. Return 0 on success (there is
1797 an overlap), non-zero otherwise.
1798 */
1799
check_for_overlap(const DATASET * dset,const DATASET * addset,int * offset)1800 static int check_for_overlap (const DATASET *dset,
1801 const DATASET *addset,
1802 int *offset)
1803 {
1804 int at1 = merge_dateton(addset->stobs, dset);
1805 int at2 = merge_dateton(addset->endobs, dset);
1806
1807 if (!(at1 >= dset->n) && !(at2 < 0)) {
1808 /* OK, there must be some overlap */
1809 *offset = at1;
1810 return 0;
1811 } else {
1812 /* either the "add" data start after the original data end,
1813 or they end before the originals start, no there's no
1814 overlap
1815 */
1816 gretl_errmsg_set("No overlap in data ranges");
1817 return E_DATA;
1818 }
1819 }
1820
1821 /* When appending data to a current panel dataset, and the length of
1822 the series in the new data is less than the full panel size
1823 (n * T), try to determine if it's OK to expand the incoming data to
1824 match.
1825
1826 We'll say it's OK if the new series length equals the panel T: in
1827 that case we'll take the new data to be time-series, which should
1828 be replicated for each panel unit.
1829
1830 A second possibility arises if the length of the new series
1831 equals the panel n: in that case we could treat it as a time-
1832 invariant characteristic of the panel unit, which should be
1833 replicated for each time period. But note that if OPT_T is
1834 given, this second expansion is forbidden: the user has
1835 stipulated that the new data are time-varying.
1836 */
1837
panel_expand_ok(DATASET * dset,DATASET * addinfo,gretlopt opt)1838 static int panel_expand_ok (DATASET *dset, DATASET *addinfo,
1839 gretlopt opt)
1840 {
1841 int n = dset->n / dset->pd;
1842 int T = dset->pd;
1843 int ok = 0;
1844
1845 if (addinfo->n == T) {
1846 ok = 1;
1847 } else if (!(opt & OPT_T) &&
1848 addinfo->n == n &&
1849 addinfo->pd == 1) {
1850 ok = 1;
1851 }
1852
1853 return ok;
1854 }
1855
panel_append_special(int addvars,DATASET * dset,DATASET * addset,gretlopt opt,PRN * prn)1856 static int panel_append_special (int addvars,
1857 DATASET *dset,
1858 DATASET *addset,
1859 gretlopt opt,
1860 PRN *prn)
1861 {
1862 int n = dset->n / dset->pd;
1863 int T = dset->pd;
1864 int k = dset->v;
1865 int tsdata;
1866 int i, j, s, p, t;
1867 int err = 0;
1868
1869 if (addvars > 0 && dataset_add_series(dset, addvars)) {
1870 merge_error(_("Out of memory!\n"), prn);
1871 err = E_ALLOC;
1872 }
1873
1874 tsdata = ((opt & OPT_T) || addset->n != n);
1875
1876 for (i=1; i<addset->v && !err; i++) {
1877 int v = series_index(dset, addset->varname[i]);
1878
1879 if (v >= k) {
1880 /* a new variable */
1881 v = k++;
1882 strcpy(dset->varname[v], addset->varname[i]);
1883 copy_varinfo(dset->varinfo[v], addset->varinfo[i]);
1884 }
1885
1886 s = 0;
1887 for (j=0; j<n; j++) {
1888 /* loop across units */
1889 for (t=0; t<T; t++) {
1890 /* loop across periods */
1891 p = (tsdata)? t : j;
1892 dset->Z[v][s++] = addset->Z[i][p];
1893 }
1894 }
1895 }
1896
1897 return err;
1898 }
1899
markers_compatible(const DATASET * d1,DATASET * d2,int * offset)1900 static int markers_compatible (const DATASET *d1, DATASET *d2,
1901 int *offset)
1902 {
1903 int ret = 0;
1904
1905 if (d1->markers == 0 && d2->markers == 0) {
1906 *offset = d1->n;
1907 ret = 1;
1908 } else if (d1->markers == 0) {
1909 /* markers "on the right only": are they consecutive
1910 integers starting between 1 and d1->n + 1?
1911 */
1912 if (integer_string(d2->S[0])) {
1913 int k0 = atoi(d2->S[0]);
1914
1915 if (k0 >= 1 && k0 <= d1->n + 1) {
1916 int i, k1;
1917
1918 ret = 1;
1919 for (i=1; i<d2->n && ret; i++) {
1920 if (!integer_string(d2->S[i])) {
1921 ret = 0;
1922 } else if ((k1 = atoi(d2->S[i])) != k0 + 1) {
1923 ret = 0;
1924 } else {
1925 k0 = k1;
1926 }
1927 }
1928 }
1929 if (ret) {
1930 *offset = atoi(d2->S[0]) - 1;
1931 /* the @d2 markers have done their job -- yielding
1932 an @offset value -- and they can now be trashed
1933 */
1934 dataset_destroy_obs_markers(d2);
1935 }
1936 }
1937 } else {
1938 /* markers on both sides: are they totally distinct? */
1939 int i, j;
1940
1941 ret = 1;
1942 for (i=0; i<d2->n && ret; i++) {
1943 for (j=0; j<d1->n && ret; j++) {
1944 if (!strcmp(d2->S[i], d1->S[j])) {
1945 /* no, not totally distinct */
1946 ret = 0;
1947 }
1948 }
1949 }
1950 }
1951
1952 #if MERGE_DEBUG
1953 fprintf(stderr, " markers_compatible: ret=%d, offset=%d\n", ret, *offset);
1954 #endif
1955
1956 return ret;
1957 }
1958
1959 static int
just_append_rows(const DATASET * targ,DATASET * src,int * offset)1960 just_append_rows (const DATASET *targ, DATASET *src, int *offset)
1961 {
1962 int ret = 0;
1963
1964 if (targ->structure == CROSS_SECTION &&
1965 src->structure == CROSS_SECTION &&
1966 targ->sd0 == 1 && src->sd0 == 1) {
1967 int ok, test_offset = -1;
1968
1969 ok = markers_compatible(targ, src, &test_offset);
1970 if (ok) {
1971 /* note: we do this only if we're not adding any new
1972 series: we'll append to existing series lengthwise
1973 (or perhaps write data into existing existing rows)
1974 */
1975 *offset = test_offset;
1976 ret = src->n - (targ->n - *offset);
1977 if (ret < 0) {
1978 ret = 0;
1979 }
1980 }
1981 }
1982
1983 return ret;
1984 }
1985
simple_range_match(const DATASET * targ,const DATASET * src,int * offset)1986 static int simple_range_match (const DATASET *targ, const DATASET *src,
1987 int *offset)
1988 {
1989 int ret = 0;
1990
1991 if (src->pd == 1 && src->structure == CROSS_SECTION) {
1992 if (src->n == targ->n) {
1993 ret = 1;
1994 } else if (src->n == targ->t2 - targ->t1 + 1) {
1995 ret = 1;
1996 *offset = targ->t1;
1997 }
1998 }
1999
2000 return ret;
2001 }
2002
merge_lengthen_series(DATASET * dset,const DATASET * addset,int addobs,int offset)2003 static int merge_lengthen_series (DATASET *dset,
2004 const DATASET *addset,
2005 int addobs,
2006 int offset)
2007 {
2008 int i, t, new_n = dset->n + addobs;
2009 int err = 0;
2010
2011 if (dset->markers) {
2012 err = extend_markers(dset, dset->n, new_n);
2013 if (!err) {
2014 for (t=dset->n; t<new_n; t++) {
2015 strcpy(dset->S[t], addset->S[t-offset]);
2016 }
2017 }
2018 }
2019
2020 for (i=0; i<dset->v && !err; i++) {
2021 double *x;
2022
2023 x = realloc(dset->Z[i], new_n * sizeof *x);
2024 if (x == NULL) {
2025 err = E_ALLOC;
2026 break;
2027 }
2028
2029 for (t=dset->n; t<new_n; t++) {
2030 if (i == 0) {
2031 x[t] = 1.0;
2032 } else {
2033 x[t] = NADBL;
2034 }
2035 }
2036 dset->Z[i] = x;
2037 }
2038
2039 if (!err) {
2040 dset->n = new_n;
2041 ntolabel(dset->endobs, new_n - 1, dset);
2042 dset->t2 = dset->n - 1;
2043 }
2044
2045 return err;
2046 }
2047
2048 #if 0 /* not yet (maybe usable with DND */
2049
2050 int basic_data_merge_check (const DATASET *dset,
2051 DATASET *addset)
2052 {
2053 int dayspecial = 0;
2054 int yrspecial = 0;
2055 int addsimple = 0;
2056 int addvars = 0;
2057 int addobs = 0;
2058 int offset = 0;
2059 int err = 0;
2060
2061 /* first see how many new vars we have */
2062 addvars = count_new_vars(dset, addset, NULL);
2063 if (addvars < 0) {
2064 return 1;
2065 }
2066
2067 if (dated_daily_data(dset) && dated_daily_data(addset)) {
2068 dayspecial = 1;
2069 }
2070
2071 if (simple_range_match(dset, addset, &offset)) {
2072 addsimple = 1;
2073 } else if (dset->pd != addset->pd) {
2074 err = 1;
2075 }
2076
2077 if (!err) {
2078 if (!addsimple) {
2079 addobs = compare_ranges(dset, addset, addvars, &offset,
2080 &yrspecial, &err);
2081 }
2082 if (!err && addobs <= 0 && addvars == 0) {
2083 addobs = just_append_rows(dset, addset, &offset);
2084 }
2085 }
2086
2087 if (!err && (addobs < 0 || addvars < 0)) {
2088 err = E_DATA;
2089 }
2090
2091 if (!err && dset->markers != addset->markers) {
2092 if (addobs == 0 && addvars == 0) {
2093 err = E_DATA;
2094 } else if (addset->n != dset->n && !yrspecial && !dayspecial) {
2095 err = E_DATA;
2096 }
2097 }
2098
2099 return err;
2100 }
2101
2102 #endif
2103
2104 #define simple_structure(p) (p->structure == TIME_SERIES || \
2105 p->structure == SPECIAL_TIME_SERIES || \
2106 (p->structure == CROSS_SECTION && \
2107 p->S == NULL))
2108
2109 /**
2110 * merge_data:
2111 * @dset: dataset struct.
2112 * @addset: dataset to be merged in.
2113 * @opt: may include OPT_T to force a time-series interpretation
2114 * when appending to a panel dataset; may include OPT_U to update
2115 * values of overlapping observations.
2116 * @prn: print struct to accept messages.
2117 *
2118 * Attempt to merge the content of a newly opened data file into
2119 * gretl's current working data set.
2120 *
2121 * Returns: 0 on successful completion, non-zero otherwise.
2122 */
2123
merge_data(DATASET * dset,DATASET * addset,gretlopt opt,PRN * prn)2124 static int merge_data (DATASET *dset, DATASET *addset,
2125 gretlopt opt, PRN *prn)
2126 {
2127 int update_overlap = (opt & OPT_U);
2128 int orig_n = dset->n;
2129 int dayspecial = 0;
2130 int yrspecial = 0;
2131 int fixsample = 0;
2132 int addsimple = 0;
2133 int addpanel = 0;
2134 int addvars = 0;
2135 int addobs = 0;
2136 int offset = 0;
2137 int err = 0;
2138
2139 #if MERGE_DEBUG
2140 debug_print_option_flags("merge_data", opt);
2141 #endif
2142
2143 /* first see how many new vars we have */
2144 addvars = count_new_vars(dset, addset, prn);
2145 if (addvars < 0) {
2146 return 1;
2147 }
2148
2149 #if MERGE_DEBUG
2150 fprintf(stderr, " new series count = %d\n", addvars);
2151 #endif
2152
2153 if (dated_daily_data(dset) && dated_daily_data(addset)) {
2154 #if MERGE_DEBUG
2155 fprintf(stderr, " special: merging daily data\n");
2156 #endif
2157 dayspecial = 1;
2158 }
2159
2160 if (opt & OPT_X) {
2161 fixsample = 1;
2162 } else if (simple_range_match(dset, addset, &offset)) {
2163 /* we'll allow undated data to be merged with the existing
2164 dateset, sideways, provided the number of observations
2165 matches OK */
2166 addsimple = 1;
2167 } else if (dataset_is_panel(dset) &&
2168 panel_expand_ok(dset, addset, opt)) {
2169 /* allow appending to panel when the number of obs matches
2170 either the cross-section size or the time-series length
2171 */
2172 addpanel = 1;
2173 } else if (dset->pd != addset->pd) {
2174 merge_error(_("Data frequency does not match\n"), prn);
2175 err = 1;
2176 }
2177
2178 if (!err && fixsample) {
2179 err = check_for_overlap(dset, addset, &offset);
2180 } else if (!err && gretl_function_depth() > 0) {
2181 /* we won't add observations within a function, but
2182 we should still check for an error from compare_ranges()
2183 */
2184 if (!addsimple && !addpanel) {
2185 addobs = compare_ranges(dset, addset, addvars, &offset,
2186 &yrspecial, &err);
2187 if (!err && addobs > 0) {
2188 addobs = 0;
2189 }
2190 }
2191 } else if (!err) {
2192 if (!addsimple && !addpanel) {
2193 addobs = compare_ranges(dset, addset, addvars, &offset,
2194 &yrspecial, &err);
2195 #if MERGE_DEBUG
2196 fprintf(stderr, " added obs, from compare_ranges: %d\n", addobs);
2197 #endif
2198 }
2199 if (!err && addobs <= 0 && addvars == 0) {
2200 addobs = just_append_rows(dset, addset, &offset);
2201 #if MERGE_DEBUG
2202 fprintf(stderr, " added obs, from just_append_rows: %d\n", addobs);
2203 #endif
2204 }
2205 }
2206
2207 if (!err && (addobs < 0 || addvars < 0)) {
2208 merge_error(_("New data not conformable for appending\n"), prn);
2209 err = E_DATA;
2210 }
2211
2212 if (!err && !addpanel && dset->markers != addset->markers) {
2213 if (addobs == 0 && addvars == 0) {
2214 if (update_overlap) {
2215 ; /* might be OK? */
2216 } else {
2217 gretl_errmsg_set("Found no data conformable for appending");
2218 err = E_DATA;
2219 }
2220 } else if (addset->n != dset->n && !yrspecial && !dayspecial) {
2221 merge_error(_("Inconsistency in observation markers\n"), prn);
2222 err = E_DATA;
2223 } else if (addset->markers && !dset->markers &&
2224 !yrspecial && !dayspecial) {
2225 dataset_destroy_obs_markers(addset);
2226 }
2227 }
2228
2229 #if MERGE_DEBUG
2230 if (!err) {
2231 fprintf(stderr, " after preliminaries: addvars = %d, addobs = %d\n",
2232 addvars, addobs);
2233 } else {
2234 fprintf(stderr, " after preliminaries: err = %d\n", err);
2235 }
2236 #endif
2237
2238 /* if checks are passed, try merging the data */
2239
2240 if (!err && addobs > 0) {
2241 err = merge_lengthen_series(dset, addset, addobs, offset);
2242 if (err) {
2243 merge_error(_("Out of memory!\n"), prn);
2244 }
2245 }
2246
2247 if (!err && addpanel) {
2248 err = panel_append_special(addvars, dset, addset,
2249 opt, prn);
2250 } else if (!err) {
2251 int k = dset->v;
2252 int i, t;
2253
2254 if (addvars > 0 && dataset_add_series(dset, addvars)) {
2255 merge_error(_("Out of memory!\n"), prn);
2256 err = E_ALLOC;
2257 }
2258
2259 for (i=1; i<addset->v && !err; i++) {
2260 int v = series_index(dset, addset->varname[i]);
2261 int tmin, newvar = v >= k;
2262
2263 if (!newvar && !update_overlap) {
2264 tmin = orig_n;
2265 } else {
2266 tmin = 0;
2267 }
2268
2269 if (newvar) {
2270 v = k++;
2271 strcpy(dset->varname[v], addset->varname[i]);
2272 copy_varinfo(dset->varinfo[v], addset->varinfo[i]);
2273 if (is_string_valued(addset, i) &&
2274 addset->n == dset->n && offset == 0 &&
2275 addobs == 0) {
2276 /* attach the string table to the target
2277 series and detach it from @addset
2278 */
2279 series_table *st;
2280
2281 st = series_get_string_table(addset, i);
2282 series_attach_string_table(dset, v, st);
2283 series_attach_string_table(addset, i, NULL);
2284 }
2285 } else {
2286 /* not a new series */
2287 int lsval = is_string_valued(dset, v);
2288 int rsval = is_string_valued(addset, i);
2289
2290 if (lsval + rsval == 1) {
2291 gretl_errmsg_set(_("Can't concatenate string-valued and numeric series"));
2292 err = E_DATA;
2293 } else if (lsval) {
2294 err = merge_string_tables(dset, v, addset, i);
2295 }
2296 }
2297
2298 if (dayspecial) {
2299 char obs[OBSLEN];
2300 int s;
2301
2302 for (t=tmin; t<dset->n; t++) {
2303 ntolabel(obs, t, dset);
2304 s = dateton(obs, addset);
2305 if (s >= 0 && s < addset->n) {
2306 dset->Z[v][t] = addset->Z[i][s];
2307 } else {
2308 dset->Z[v][t] = NADBL;
2309 }
2310 }
2311 } else if (yrspecial) {
2312 int s;
2313
2314 if (newvar) {
2315 for (t=0; t<dset->n; t++) {
2316 dset->Z[v][t] = NADBL;
2317 }
2318 }
2319 for (s=0; s<addset->n; s++) {
2320 t = dateton(addset->S[s], dset);
2321 if (t >= tmin && t < dset->n) {
2322 dset->Z[v][t] = addset->Z[i][s];
2323 }
2324 }
2325 } else {
2326 for (t=tmin; t<dset->n; t++) {
2327 if (t >= offset && t - offset < addset->n) {
2328 dset->Z[v][t] = addset->Z[i][t - offset];
2329 } else if (newvar) {
2330 dset->Z[v][t] = NADBL;
2331 }
2332 }
2333 }
2334 }
2335 }
2336
2337 if (!err && (addvars || addobs) && gretl_messages_on()) {
2338 pputs(prn, _("Data appended OK\n"));
2339 }
2340
2341 return err;
2342 }
2343
2344 /* We want to ensure that calendar dates are recorded as per
2345 ISO 8601 -- that is, YYYY-MM-DD; here we remedy dates
2346 recorded in the form YYYY/MM/DD.
2347 */
2348
maybe_fix_calendar_dates(DATASET * dset)2349 static void maybe_fix_calendar_dates (DATASET *dset)
2350 {
2351 if (strchr(dset->stobs, '/') != NULL) {
2352 gretl_charsub(dset->stobs, '/', '-');
2353 gretl_charsub(dset->endobs, '/', '-');
2354 if (dset->S != NULL && dset->markers == DAILY_DATE_STRINGS) {
2355 int t;
2356
2357 for (t=0; t<dset->n; t++) {
2358 gretl_charsub(dset->S[t], '/', '-');
2359 }
2360 }
2361 }
2362 }
2363
2364 /**
2365 * get_merge_opts:
2366 * @opt: gretl options flags.
2367 *
2368 * Returns: just those components of @opt (if any) that
2369 * can be passed to merge_or_replace_data(); may be
2370 * useful when calling that function in the context
2371 * of a command only some of whose options should be
2372 * forwarded.
2373 */
2374
get_merge_opts(gretlopt opt)2375 gretlopt get_merge_opts (gretlopt opt)
2376 {
2377 gretlopt merge_opt = OPT_NONE;
2378
2379 if (opt & OPT_T) {
2380 /* panel, common time-series */
2381 merge_opt |= OPT_T;
2382 }
2383 if (opt & OPT_U) {
2384 /* update overlapping observations */
2385 merge_opt |= OPT_U;
2386 }
2387 if (opt & OPT_X) {
2388 /* fixed sample range */
2389 merge_opt |= OPT_X;
2390 }
2391
2392 return merge_opt;
2393 }
2394
2395 /* Apparatus for converting a dataset read from file
2396 into a gretl matrix, as opposed to replacing an
2397 existing dataset or merging with it.
2398 */
2399
2400 static gretl_matrix **dset_matrix;
2401
set_dset_matrix_target(gretl_matrix ** pm)2402 void set_dset_matrix_target (gretl_matrix **pm)
2403 {
2404 dset_matrix = pm;
2405 }
2406
get_dset_matrix_target(void)2407 void *get_dset_matrix_target (void)
2408 {
2409 return dset_matrix;
2410 }
2411
2412 /**
2413 * merge_or_replace_data:
2414 * @dset0: original dataset struct.
2415 * @pdset1: new dataset struct.
2416 * @opt: zero or more option flags (OPT_K presrves @pdset1,
2417 * otherwise it is destroyed).
2418 * @prn: print struct to accept messages.
2419 *
2420 * Given a newly-created dataset, pointed to by @pdset1, either
2421 * attempt to merge it with @dset0, if the original data array
2422 * is non-NULL, or replace the content of the original pointer
2423 * with the new dataset.
2424 *
2425 * In case merging is not successful, the new dataset is
2426 * destroyed.
2427 *
2428 * Returns: 0 on successful completion, non-zero otherwise.
2429 */
2430
merge_or_replace_data(DATASET * dset0,DATASET ** pdset1,gretlopt opt,PRN * prn)2431 int merge_or_replace_data (DATASET *dset0, DATASET **pdset1,
2432 gretlopt opt, PRN *prn)
2433 {
2434 int keep = (opt & OPT_K);
2435 int err = 0;
2436
2437 if (dset_matrix != NULL) {
2438 /* Convert the new dataset to matrix; don't touch
2439 the existing dataset, if any.
2440 */
2441 const DATASET *dset = *pdset1;
2442
2443 *dset_matrix = gretl_matrix_data_subset(NULL, dset,
2444 0, dset->n - 1,
2445 M_MISSING_OK,
2446 &err);
2447 destroy_dataset(*pdset1);
2448 *pdset1 = NULL;
2449 return err;
2450 }
2451
2452 if (dset0->Z != NULL) {
2453 /* we have an existing dataset into which the new data
2454 should be merged */
2455 gretlopt merge_opt = OPT_NONE;
2456
2457 if (opt & OPT_T) {
2458 /* panel, common time-series */
2459 merge_opt |= OPT_T;
2460 }
2461 if (opt & OPT_U) {
2462 /* update overlapping observations */
2463 merge_opt |= OPT_U;
2464 }
2465 if (opt & OPT_X) {
2466 /* fixed sample range */
2467 merge_opt |= OPT_X;
2468 }
2469 err = merge_data(dset0, *pdset1, merge_opt, prn);
2470 if (!keep) {
2471 destroy_dataset(*pdset1);
2472 }
2473 } else {
2474 /* starting from scratch */
2475 *dset0 = **pdset1;
2476 free(*pdset1);
2477 if (calendar_data(dset0)) {
2478 maybe_fix_calendar_dates(dset0);
2479 }
2480 }
2481
2482 if (!keep) {
2483 *pdset1 = NULL;
2484 }
2485
2486 return err;
2487 }
2488
check_imported_string(char * src,int i,size_t len)2489 static int check_imported_string (char *src, int i, size_t len)
2490 {
2491 int err = 0;
2492
2493 if (!g_utf8_validate(src, -1, NULL)) {
2494 gchar *trstr = NULL;
2495 gsize bytes;
2496
2497 trstr = g_locale_to_utf8(src, -1, NULL, &bytes, NULL);
2498
2499 if (trstr == NULL) {
2500 gretl_errmsg_sprintf("Invalid characters in imported string, line %d", i);
2501 err = E_DATA;
2502 } else {
2503 *src = '\0';
2504 strncat(src, trstr, len - 1);
2505 g_free(trstr);
2506 }
2507 }
2508
2509 return err;
2510 }
2511
count_markers(FILE * fp,char * line,int linelen,char * marker)2512 static int count_markers (FILE *fp, char *line, int linelen,
2513 char *marker)
2514 {
2515 int n = 0;
2516
2517 while (fgets(line, linelen, fp)) {
2518 if (sscanf(line, "%31[^\n\r]", marker) == 1) {
2519 g_strstrip(marker);
2520 if (*marker != '\0') {
2521 n++;
2522 }
2523 }
2524 }
2525
2526 rewind(fp);
2527
2528 return n;
2529 }
2530
2531 /**
2532 * add_obs_markers_from_file:
2533 * @dset: data information struct.
2534 * @fname: name of file containing case markers.
2535 *
2536 * Read case markers (strings of %OBSLEN - 1 characters or less that identify
2537 * the observations) from a file, and associate them with the
2538 * current data set. The file should contain one marker per line,
2539 * with a number of lines equal to the number of observations in
2540 * the current data set.
2541 *
2542 * Returns: 0 on successful completion, non-zero otherwise.
2543 */
2544
add_obs_markers_from_file(DATASET * dset,const char * fname)2545 int add_obs_markers_from_file (DATASET *dset, const char *fname)
2546 {
2547 char **S = NULL;
2548 FILE *fp;
2549 char line[128], marker[32];
2550 int done = 0;
2551 int t, err = 0;
2552
2553 fp = gretl_fopen(fname, "r");
2554 if (fp == NULL) {
2555 return E_FOPEN;
2556 }
2557
2558 S = strings_array_new_with_length(dset->n, OBSLEN);
2559 if (S == NULL) {
2560 fclose(fp);
2561 return E_ALLOC;
2562 }
2563
2564 if (dataset_is_panel(dset)) {
2565 /* allow the case where we get just enough markers to
2566 label the cross-sectional units */
2567 int nm = count_markers(fp, line, sizeof line, marker);
2568 int N = dset->n / dset->pd; /* = number of units */
2569
2570 if (nm == N) {
2571 int T = dset->pd;
2572 int t, i = 0;
2573
2574 while (fgets(line, sizeof line, fp) && !err) {
2575 *marker = '\0';
2576 if (sscanf(line, "%31[^\n\r]", marker) == 1) {
2577 g_strstrip(marker);
2578 strncat(S[i], marker, OBSLEN - 1);
2579 err = check_imported_string(S[i], i+1, OBSLEN);
2580 if (!err) {
2581 /* copy to remaining observations */
2582 for (t=1; t<T; t++) {
2583 strcpy(S[i+t], S[i]);
2584 }
2585 }
2586 i += T;
2587 }
2588 }
2589 done = 1;
2590 }
2591 }
2592
2593 if (!done) {
2594 for (t=0; t<dset->n && !err; t++) {
2595 if (fgets(line, sizeof line, fp) == NULL) {
2596 gretl_errmsg_sprintf("Expected %d markers; found %d\n",
2597 dset->n, t);
2598 err = E_DATA;
2599 } else if (sscanf(line, "%31[^\n\r]", marker) != 1) {
2600 gretl_errmsg_sprintf("Couldn't read marker on line %d", t+1);
2601 err = E_DATA;
2602 } else {
2603 g_strstrip(marker);
2604 strncat(S[t], marker, OBSLEN - 1);
2605 err = check_imported_string(S[t], t+1, OBSLEN);
2606 }
2607 }
2608 }
2609
2610 if (err) {
2611 strings_array_free(S, dset->n);
2612 } else {
2613 if (dset->S != NULL) {
2614 strings_array_free(dset->S, dset->n);
2615 }
2616 dset->markers = REGULAR_MARKERS;
2617 dset->S = S;
2618 }
2619
2620 return err;
2621 }
2622
2623 /**
2624 * dataset_has_var_labels:
2625 * @dset: data information struct.
2626 *
2627 * Returns: 1 if at least one variable in the current dataset
2628 * has a descriptive label, otherwise 0.
2629 */
2630
dataset_has_var_labels(const DATASET * dset)2631 int dataset_has_var_labels (const DATASET *dset)
2632 {
2633 const char *vlabel;
2634 int i, imin = 1;
2635
2636 if (dset->v > 1) {
2637 if (!strcmp(dset->varname[1], "index")) {
2638 vlabel = series_get_label(dset, 1);
2639 if (vlabel != NULL && !strcmp(vlabel, _("index variable"))) {
2640 imin = 2;
2641 }
2642 }
2643 }
2644
2645 for (i=imin; i<dset->v; i++) {
2646 vlabel = series_get_label(dset, i);
2647 if (vlabel != NULL && *vlabel != '\0') {
2648 return 1;
2649 }
2650 }
2651
2652 return 0;
2653 }
2654
2655 /**
2656 * save_var_labels_to_file:
2657 * @dset: data information struct.
2658 * @fname: name of file containing labels.
2659 *
2660 * Writes to @fname the descriptive labels for the series in
2661 * the current dataset.
2662 *
2663 * Returns: 0 on successful completion, non-zero otherwise.
2664 */
2665
save_var_labels_to_file(const DATASET * dset,const char * fname)2666 int save_var_labels_to_file (const DATASET *dset,
2667 const char *fname)
2668 {
2669 const char *vlabel;
2670 FILE *fp;
2671 int i, err = 0;
2672
2673 fp = gretl_fopen(fname, "w");
2674
2675 if (fp == NULL) {
2676 err = E_FOPEN;
2677 } else {
2678 for (i=1; i<dset->v; i++) {
2679 vlabel = series_get_label(dset, i);
2680 fprintf(fp, "%s\n", vlabel == NULL ? "" : vlabel);
2681 }
2682 fclose(fp);
2683 }
2684
2685 return err;
2686 }
2687
save_var_labels_to_array(const DATASET * dset,const char * aname)2688 static int save_var_labels_to_array (const DATASET *dset,
2689 const char *aname)
2690 {
2691 gretl_array *a = NULL;
2692 int err = 0;
2693
2694 if (gretl_is_series(aname, dset)) {
2695 err = E_TYPES;
2696 } else {
2697 err = check_identifier(aname);
2698 }
2699
2700 if (!err) {
2701 a = gretl_array_new(GRETL_TYPE_STRINGS, dset->v - 1, &err);
2702 }
2703
2704 if (!err) {
2705 err = user_var_add_or_replace(aname, GRETL_TYPE_STRINGS, a);
2706 }
2707
2708 if (!err) {
2709 char *vlabel;
2710 int i;
2711
2712 for (i=1; i<dset->v; i++) {
2713 vlabel = (char *) series_get_label(dset, i);
2714 gretl_array_set_element(a, i-1, vlabel != NULL ? vlabel : "",
2715 GRETL_TYPE_STRING, 1);
2716 }
2717 }
2718
2719 if (err && a != NULL) {
2720 gretl_array_destroy(a);
2721 a = NULL;
2722 }
2723
2724 return err;
2725 }
2726
save_obs_markers_to_array(const DATASET * dset,const char * aname)2727 static int save_obs_markers_to_array (const DATASET *dset,
2728 const char *aname)
2729 {
2730 gretl_array *a = NULL;
2731 int err = 0;
2732
2733 if (gretl_is_series(aname, dset)) {
2734 err = E_TYPES;
2735 } else {
2736 err = check_identifier(aname);
2737 }
2738
2739 if (!err) {
2740 a = gretl_array_new(GRETL_TYPE_STRINGS, dset->n, &err);
2741 }
2742
2743 if (!err) {
2744 err = user_var_add_or_replace(aname, GRETL_TYPE_STRINGS, a);
2745 }
2746
2747 if (!err) {
2748 char *marker;
2749 int i;
2750
2751 for (i=0; i<dset->n; i++) {
2752 marker = dset->S[i];
2753 gretl_array_set_element(a, i, marker != NULL ? marker : "",
2754 GRETL_TYPE_STRING, 1);
2755 }
2756 }
2757
2758 if (err && a != NULL) {
2759 gretl_array_destroy(a);
2760 a = NULL;
2761 }
2762
2763 return err;
2764 }
2765
2766 /**
2767 * add_var_labels_from_file:
2768 * @dset: data information struct.
2769 * @fname: name of file containing labels.
2770 *
2771 * Read descriptive variables for labels (strings of %MAXLABEL - 1
2772 * characters or less) from a file, and associate them with the
2773 * current data set. The file should contain one label per line,
2774 * with a number of lines equal to the number of variables in
2775 * the current data set, excluding the constant.
2776 *
2777 * Returns: 0 on successful completion, non-zero otherwise.
2778 */
2779
add_var_labels_from_file(DATASET * dset,const char * fname)2780 int add_var_labels_from_file (DATASET *dset, const char *fname)
2781 {
2782 FILE *fp;
2783 char line[1024];
2784 gchar *label;
2785 int nlabels = 0;
2786 int i, err = 0;
2787
2788 fp = gretl_fopen(fname, "r");
2789 if (fp == NULL) {
2790 return E_FOPEN;
2791 }
2792
2793 for (i=1; i<dset->v && !err; i++) {
2794 if (fgets(line, sizeof line, fp) == NULL) {
2795 break;
2796 } else {
2797 label = g_strstrip(g_strdup(line));
2798 if (strlen(label) > 0) {
2799 if (!g_utf8_validate(label, -1, NULL)) {
2800 gchar *trstr = NULL;
2801 gsize bytes;
2802
2803 trstr = g_locale_to_utf8(label, -1, NULL,
2804 &bytes, NULL);
2805 if (trstr != NULL) {
2806 series_set_label(dset, i, trstr);
2807 nlabels++;
2808 g_free(trstr);
2809 }
2810 } else {
2811 series_set_label(dset, i, label);
2812 nlabels++;
2813 }
2814 }
2815 g_free(label);
2816 }
2817 }
2818
2819 if (!err && nlabels == 0) {
2820 gretl_errmsg_set("No labels found");
2821 err = E_DATA;
2822 }
2823
2824 return err;
2825 }
2826
add_var_labels_from_array(DATASET * dset,const char * aname)2827 static int add_var_labels_from_array (DATASET *dset, const char *aname)
2828 {
2829 gretl_array *a = get_array_by_name(aname);
2830 int i, err = 0;
2831
2832 if (a == NULL) {
2833 gretl_errmsg_sprintf("%s: no such array", aname);
2834 err = E_DATA;
2835 } else if (gretl_array_get_type(a) != GRETL_TYPE_STRINGS ||
2836 gretl_array_get_length(a) < dset->v - 1) {
2837 err = E_TYPES;
2838 }
2839
2840 for (i=1; i<dset->v && !err; i++) {
2841 const char *s = gretl_array_get_data(a, i-1);
2842
2843 series_set_label(dset, i, s);
2844 }
2845
2846 return err;
2847 }
2848
read_or_write_var_labels(gretlopt opt,DATASET * dset,PRN * prn)2849 int read_or_write_var_labels (gretlopt opt, DATASET *dset, PRN *prn)
2850 {
2851 const char *lname = NULL;
2852 int err;
2853
2854 err = incompatible_options(opt, OPT_D | OPT_T | OPT_F |
2855 OPT_A | OPT_R);
2856 if (err) {
2857 return err;
2858 }
2859
2860 if (opt & (OPT_T | OPT_F | OPT_A | OPT_R)) {
2861 lname = get_optval_string(LABELS, opt);
2862 if (lname == NULL) {
2863 return E_BADOPT;
2864 } else if (opt & (OPT_T | OPT_F)) {
2865 gretl_maybe_switch_dir(lname);
2866 }
2867 }
2868
2869 if (opt & OPT_D) {
2870 /* delete */
2871 int i;
2872
2873 for (i=1; i<dset->v; i++) {
2874 series_set_label(dset, i, "");
2875 }
2876 } else if (opt & (OPT_T | OPT_R)) {
2877 /* to-file, to-array */
2878 if (!dataset_has_var_labels(dset)) {
2879 pprintf(prn, "No labels are available for writing\n");
2880 err = E_DATA;
2881 } else {
2882 if (opt & OPT_T) {
2883 err = save_var_labels_to_file(dset, lname);
2884 } else {
2885 err = save_var_labels_to_array(dset, lname);
2886 }
2887 if (!err && gretl_messages_on()) {
2888 pprintf(prn, "Labels written OK\n");
2889 }
2890 }
2891 } else if (opt & (OPT_F | OPT_A)) {
2892 /* from-file, from-array */
2893 if (opt & OPT_F) {
2894 err = add_var_labels_from_file(dset, lname);
2895 } else {
2896 err = add_var_labels_from_array(dset, lname);
2897 }
2898 if (!err && gretl_messages_on()) {
2899 pprintf(prn, "Labels loaded OK\n");
2900 }
2901 }
2902
2903 return err;
2904 }
2905
save_obs_markers_to_file(DATASET * dset,const char * fname)2906 static int save_obs_markers_to_file (DATASET *dset, const char *fname)
2907 {
2908 FILE *fp = gretl_fopen(fname, "w");
2909 int err = 0;
2910
2911 if (fp == NULL) {
2912 err = E_FOPEN;
2913 } else {
2914 int i;
2915
2916 for (i=0; i<dset->n; i++) {
2917 fprintf(fp, "%s\n", dset->S[i]);
2918 }
2919 fclose(fp);
2920 }
2921
2922 return err;
2923 }
2924
read_or_write_obs_markers(gretlopt opt,DATASET * dset,PRN * prn)2925 int read_or_write_obs_markers (gretlopt opt, DATASET *dset, PRN *prn)
2926 {
2927 const char *fname = NULL;
2928 int err;
2929
2930 err = incompatible_options(opt, OPT_D | OPT_T | OPT_F);
2931 if (err) {
2932 return err;
2933 }
2934
2935 if (opt & (OPT_T | OPT_F)) {
2936 fname = get_optval_string(MARKERS, opt);
2937 if (fname == NULL) {
2938 return E_BADOPT;
2939 } else {
2940 fname = gretl_maybe_switch_dir(fname);
2941 }
2942 }
2943
2944 if (opt & (OPT_A | OPT_T)) {
2945 /* writing to file or array */
2946 if (dset->S == NULL) {
2947 gretl_errmsg_set(_("No markers are available for writing"));
2948 return E_DATA;
2949 }
2950 }
2951
2952 if (opt & OPT_D) {
2953 /* delete */
2954 dataset_destroy_obs_markers(dset);
2955 } else if (opt & OPT_T) {
2956 /* to-file */
2957 err = save_obs_markers_to_file(dset, fname);
2958 if (!err && gretl_messages_on()) {
2959 pprintf(prn, "Markers written OK\n");
2960 }
2961 } else if (opt & OPT_F) {
2962 /* from-file */
2963 err = add_obs_markers_from_file(dset, fname);
2964 if (!err && gretl_messages_on()) {
2965 pprintf(prn, "Markers loaded OK\n");
2966 }
2967 } else if (opt & OPT_A) {
2968 /* to-array */
2969 const char *aname = get_optval_string(MARKERS, OPT_A);
2970
2971 err = save_obs_markers_to_array(dset, aname);
2972 }
2973
2974 return err;
2975 }
2976
2977 static void
octave_varname(char * name,const char * s,int nnum,int v)2978 octave_varname (char *name, const char *s, int nnum, int v)
2979 {
2980 char nstr[12];
2981 int len, tr;
2982
2983 if (nnum == 0) {
2984 strcpy(name, s);
2985 } else {
2986 sprintf(nstr, "%d", nnum);
2987 len = strlen(nstr);
2988 tr = VNAMELEN - len;
2989
2990 if (tr > 0) {
2991 strncat(name, s, tr);
2992 strcat(name, nstr);
2993 } else {
2994 sprintf(name, "v%d", v);
2995 }
2996 }
2997 }
2998
get_max_line_length(FILE * fp,PRN * prn)2999 static int get_max_line_length (FILE *fp, PRN *prn)
3000 {
3001 int c, c1, cc = 0;
3002 int maxlen = 0;
3003
3004 while ((c = fgetc(fp)) != EOF) {
3005 if (c == 0x0d) {
3006 /* CR */
3007 c1 = fgetc(fp);
3008 if (c1 == EOF) {
3009 break;
3010 } else if (c1 == 0x0a) {
3011 /* CR + LF -> LF */
3012 c = c1;
3013 } else {
3014 /* Mac-style: CR not followed by LF */
3015 c = 0x0a;
3016 ungetc(c1, fp);
3017 }
3018 }
3019 if (c == 0x0a) {
3020 if (cc > maxlen) {
3021 maxlen = cc;
3022 }
3023 cc = 0;
3024 continue;
3025 }
3026 if (!isspace((unsigned char) c) && !isprint((unsigned char) c) &&
3027 !(c == CTRLZ)) {
3028 pprintf(prn, _("Binary data (%d) encountered: this is not a valid "
3029 "text file\n"), c);
3030 return -1;
3031 }
3032 cc++;
3033 }
3034
3035 if (maxlen == 0) {
3036 pprintf(prn, _("Data file is empty\n"));
3037 }
3038
3039 if (maxlen > 0) {
3040 /* allow for newline and null terminator */
3041 maxlen += 3;
3042 }
3043
3044 return maxlen;
3045 }
3046
import_octave(const char * fname,DATASET * dset,gretlopt opt,PRN * prn)3047 static int import_octave (const char *fname, DATASET *dset,
3048 gretlopt opt, PRN *prn)
3049 {
3050 DATASET *octset = NULL;
3051 FILE *fp = NULL;
3052 char *line = NULL;
3053 char tmp[8], fmt[16], name[32];
3054 int nrows = 0, ncols = 0, nblocks = 0;
3055 int brows = 0, bcols = 0, oldbcols = 0;
3056 int maxlen, got_type = 0, got_name = 0;
3057 int i, t, err = 0;
3058
3059 fp = gretl_fopen(fname, "r");
3060 if (fp == NULL) {
3061 return E_FOPEN;
3062 }
3063
3064 pprintf(prn, "%s %s...\n", _("parsing"), fname);
3065
3066 maxlen = get_max_line_length(fp, prn);
3067 if (maxlen <= 0) {
3068 err = E_DATA;
3069 goto oct_bailout;
3070 }
3071
3072 line = malloc(maxlen);
3073 if (line == NULL) {
3074 err = E_ALLOC;
3075 goto oct_bailout;
3076 }
3077
3078 pprintf(prn, _(" longest line: %d characters\n"), maxlen - 1);
3079
3080 rewind(fp);
3081
3082 while (fgets(line, maxlen, fp) && !err) {
3083 if (*line == '#') {
3084 if (!got_name) {
3085 if (sscanf(line, "# name: %31s", name) == 1) {
3086 got_name = 1;
3087 nblocks++;
3088 continue;
3089 }
3090 }
3091 if (!got_type) {
3092 if (sscanf(line, "# type: %7s", tmp) == 1) {
3093 if (!got_name || strcmp(tmp, "matrix")) {
3094 err = 1;
3095 } else {
3096 got_type = 1;
3097 }
3098 continue;
3099 }
3100 }
3101 if (brows == 0) {
3102 if (sscanf(line, "# rows: %d", &brows) == 1) {
3103 if (!got_name || !got_type || brows <= 0) {
3104 err = 1;
3105 } else if (nrows > 0 && brows != nrows) {
3106 err = 1;
3107 } else {
3108 nrows = brows;
3109 }
3110 continue;
3111 }
3112 }
3113 if (bcols == 0) {
3114 if (sscanf(line, "# columns: %d", &bcols) == 1) {
3115 if (!got_name || !got_type || bcols <= 0) {
3116 err = 1;
3117 } else {
3118 ncols += bcols;
3119 pprintf(prn, _(" Found matrix '%s' with "
3120 "%d rows, %d columns\n"), name, brows, bcols);
3121 }
3122 continue;
3123 }
3124 }
3125 } else if (string_is_blank(line)) {
3126 continue;
3127 } else {
3128 got_name = 0;
3129 got_type = 0;
3130 brows = 0;
3131 bcols = 0;
3132 }
3133 }
3134
3135 if (err || nrows == 0 || ncols == 0) {
3136 pputs(prn, _("Invalid data file\n"));
3137 err = E_DATA;
3138 goto oct_bailout;
3139 }
3140
3141 /* initialize datainfo and Z */
3142
3143 octset = datainfo_new();
3144 if (octset == NULL) {
3145 pputs(prn, _("Out of memory!\n"));
3146 err = E_ALLOC;
3147 goto oct_bailout;
3148 }
3149
3150 octset->n = nrows;
3151 octset->v = ncols + 1;
3152
3153 if (start_new_Z(octset, 0)) {
3154 pputs(prn, _("Out of memory!\n"));
3155 err = E_ALLOC;
3156 goto oct_bailout;
3157 }
3158
3159 rewind(fp);
3160
3161 pprintf(prn, _(" number of variables: %d\n"), ncols);
3162 pprintf(prn, _(" number of observations: %d\n"), nrows);
3163 pprintf(prn, _(" number of data blocks: %d\n"), nblocks);
3164
3165 i = 1;
3166 t = 0;
3167
3168 sprintf(fmt, "# name: %%%ds", VNAMELEN - 1);
3169
3170 while (fgets(line, maxlen, fp) && !err) {
3171 char *s = line;
3172 int j;
3173
3174 if (*s == '#') {
3175 if (sscanf(line, fmt, name) == 1) {
3176 ;
3177 } else if (sscanf(line, "# rows: %d", &brows) == 1) {
3178 t = 0;
3179 } else if (sscanf(line, "# columns: %d", &bcols) == 1) {
3180 i += oldbcols;
3181 oldbcols = bcols;
3182 }
3183 }
3184
3185 if (*s == '#' || string_is_blank(s)) {
3186 continue;
3187 }
3188
3189 if (t >= octset->n) {
3190 err = 1;
3191 }
3192
3193 for (j=0; j<bcols && !err; j++) {
3194 double x;
3195 int v = i + j;
3196
3197 if (t == 0) {
3198 int nnum = (bcols > 1)? j + 1 : 0;
3199
3200 octave_varname(octset->varname[i+j], name, nnum, v);
3201 }
3202
3203 while (isspace(*s)) s++;
3204 if (sscanf(s, "%lf", &x) != 1) {
3205 fprintf(stderr, "error: '%s', didn't get double\n", s);
3206 err = 1;
3207 } else {
3208 octset->Z[v][t] = x;
3209 while (!isspace(*s)) s++;
3210 }
3211 }
3212 t++;
3213 }
3214
3215 if (err) {
3216 pputs(prn, _("Invalid data file\n"));
3217 err = E_DATA;
3218 } else {
3219 int merge = dset->Z != NULL;
3220 gretlopt merge_opt = 0;
3221
3222 if (merge && (opt & OPT_T)) {
3223 merge_opt = OPT_T;
3224 }
3225 err = merge_or_replace_data(dset, &octset, merge_opt, prn);
3226 }
3227
3228 oct_bailout:
3229
3230 if (fp != NULL) {
3231 fclose(fp);
3232 }
3233
3234 if (line != NULL) {
3235 free(line);
3236 }
3237
3238 if (octset != NULL) {
3239 clear_datainfo(octset, CLEAR_FULL);
3240 }
3241
3242 return err;
3243 }
3244
3245 /**
3246 * import_other:
3247 * @fname: name of file.
3248 * @ftype: type of data file.
3249 * @dset: pointer to dataset struct.
3250 * @opt: option flag; see gretl_get_data().
3251 * @prn: gretl printing struct.
3252 *
3253 * Open a data file of a type that requires a special plugin.
3254 *
3255 * Returns: 0 on successful completion, non-zero otherwise.
3256 */
3257
import_other(const char * fname,GretlFileType ftype,DATASET * dset,gretlopt opt,PRN * prn)3258 int import_other (const char *fname, GretlFileType ftype,
3259 DATASET *dset, gretlopt opt, PRN *prn)
3260 {
3261 FILE *fp;
3262 int (*importer) (const char *, DATASET *,
3263 gretlopt, PRN *);
3264 int err = 0;
3265
3266 fp = gretl_fopen(fname, "r");
3267 if (fp == NULL) {
3268 pprintf(prn, _("Couldn't open %s\n"), fname);
3269 err = E_FOPEN;
3270 goto bailout;
3271 }
3272
3273 fclose(fp);
3274
3275 if (ftype == GRETL_OCTAVE) {
3276 /* plugin not needed */
3277 return import_octave(fname, dset, opt, prn);
3278 }
3279
3280 if (ftype == GRETL_WF1) {
3281 importer = get_plugin_function("wf1_get_data");
3282 } else if (ftype == GRETL_DTA) {
3283 importer = get_plugin_function("dta_get_data");
3284 } else if (ftype == GRETL_SAV) {
3285 importer = get_plugin_function("sav_get_data");
3286 } else if (ftype == GRETL_SAS) {
3287 importer = get_plugin_function("xport_get_data");
3288 } else if (ftype == GRETL_JMULTI) {
3289 importer = get_plugin_function("jmulti_get_data");
3290 } else if (ftype == GRETL_MAP) {
3291 importer = get_plugin_function("map_get_data");
3292 } else {
3293 pprintf(prn, _("Unrecognized data type"));
3294 pputc(prn, '\n');
3295 return E_DATA;
3296 }
3297
3298 if (importer == NULL) {
3299 err = 1;
3300 } else {
3301 err = (*importer)(fname, dset, opt, prn);
3302 }
3303
3304 bailout:
3305
3306 return err;
3307 }
3308
3309 /**
3310 * import_spreadsheet:
3311 * @fname: name of file.
3312 * @ftype: type of data file.
3313 * @list: list of parameters for spreadsheet import, or NULL.
3314 * @sheetname: name of specific worksheet, or NULL.
3315 * @dset: dataset struct.
3316 * @opt: option flag; see gretl_get_data().
3317 * @prn: gretl printing struct.
3318 *
3319 * Open a data file of a type that requires a special plugin.
3320 * Acceptable values for @ftype are %GRETL_GNUMERIC,
3321 * %GRETL_XLS, %GRETL_XLSX and %GRETL_ODS.
3322 *
3323 * Returns: 0 on successful completion, non-zero otherwise.
3324 */
3325
import_spreadsheet(const char * fname,GretlFileType ftype,int * list,char * sheetname,DATASET * dset,gretlopt opt,PRN * prn)3326 int import_spreadsheet (const char *fname, GretlFileType ftype,
3327 int *list, char *sheetname,
3328 DATASET *dset, gretlopt opt, PRN *prn)
3329 {
3330 FILE *fp;
3331 int (*importer) (const char*, int *, char *,
3332 DATASET *, gretlopt, PRN *);
3333 int err = 0;
3334
3335 import_na_init();
3336
3337 fp = gretl_fopen(fname, "r");
3338
3339 if (fp == NULL) {
3340 pprintf(prn, _("Couldn't open %s\n"), fname);
3341 err = E_FOPEN;
3342 goto bailout;
3343 }
3344
3345 fclose(fp);
3346
3347 if (ftype == GRETL_GNUMERIC) {
3348 importer = get_plugin_function("gnumeric_get_data");
3349 } else if (ftype == GRETL_XLS) {
3350 importer = get_plugin_function("xls_get_data");
3351 } else if (ftype == GRETL_XLSX) {
3352 importer = get_plugin_function("xlsx_get_data");
3353 } else if (ftype == GRETL_ODS) {
3354 importer = get_plugin_function("ods_get_data");
3355 } else {
3356 pprintf(prn, _("Unrecognized data type"));
3357 pputc(prn, '\n');
3358 return E_DATA;
3359 }
3360
3361 if (importer == NULL) {
3362 err = 1;
3363 } else {
3364 gchar *thisdir = g_get_current_dir();
3365
3366 err = (*importer)(fname, list, sheetname, dset, opt, prn);
3367
3368 if (thisdir != NULL) {
3369 /* come back out of dotdir? */
3370 gretl_chdir(thisdir);
3371 g_free(thisdir);
3372 }
3373 }
3374
3375 bailout:
3376
3377 return err;
3378 }
3379
is_jmulti_datafile(const char * fname)3380 static int is_jmulti_datafile (const char *fname)
3381 {
3382 FILE *fp;
3383 int ret = 0;
3384
3385 fp = gretl_fopen(fname, "r");
3386
3387 if (fp != NULL) {
3388 char test[128] = {0};
3389 int gotobs = 0;
3390 int gotcomm = 0;
3391 int incomm = 0;
3392
3393 /* look for characteristic C-style comment and
3394 <obs stuff> field, outside of comment */
3395
3396 while (fgets(test, sizeof test, fp)) {
3397 if (!incomm && strstr(test, "/*")) {
3398 gotcomm = 1;
3399 incomm = 1;
3400 }
3401 if (incomm && strstr(test, "*/")) {
3402 incomm = 0;
3403 }
3404 if (!incomm && *test == '<' && strchr(test, '>')) {
3405 gotobs = 1;
3406 }
3407 if (gotcomm && gotobs) {
3408 ret = 1;
3409 break;
3410 }
3411 }
3412 fclose(fp);
3413 }
3414
3415 return ret;
3416 }
3417
3418 /**
3419 * gretl_is_pkzip_file:
3420 * @fname: name of file to examine.
3421 *
3422 * Returns: 1 if @fname is readable and is a PKZIP file,
3423 * else 0.
3424 */
3425
gretl_is_pkzip_file(const char * fname)3426 int gretl_is_pkzip_file (const char *fname)
3427 {
3428 FILE *fp;
3429 char test[3] = {0};
3430 int ret = 0;
3431
3432 fp = gretl_fopen(fname, "rb");
3433 if (fp != NULL) {
3434 if (fread(test, 1, 2, fp) == 2) {
3435 if (!strcmp(test, "PK")) ret = 1;
3436 }
3437 fclose(fp);
3438 }
3439
3440 return ret;
3441 }
3442
3443 /**
3444 * detect_filetype:
3445 * @fname: the name of the file to test.
3446 * @opt: OPT_P may be included to permit path-searching if @fname
3447 * is not an absolute path; in that case the @fname argument
3448 * may be modified, otherwise it will be left unchanged.
3449 *
3450 * Attempts to determine the type of a file to be opened in gretl:
3451 * data file (of various formats), or command script. If OPT_P
3452 * is given, the @fname argument must be an array of length
3453 * at least %MAXLEN: a path may be prepended and in some cases
3454 * an extension may be appended.
3455 *
3456 * Returns: integer code indicating the type of file.
3457 */
3458
detect_filetype(char * fname,gretlopt opt)3459 GretlFileType detect_filetype (char *fname, gretlopt opt)
3460 {
3461 const char *ext = get_filename_extension(fname);
3462 GretlFileType ftype = GRETL_UNRECOGNIZED;
3463
3464 if (ext != NULL) {
3465 /* First try judging the type by extension */
3466 if (!strcmp(ext, ".inp")) {
3467 ftype = GRETL_SCRIPT;
3468 } else if (!strcmp(ext, ".gretl")) {
3469 if (gretl_is_pkzip_file(fname)) {
3470 ftype = GRETL_SESSION;
3471 } else {
3472 ftype = GRETL_SCRIPT;
3473 }
3474 } else {
3475 ftype = data_file_type_from_extension(ext);
3476 if (ftype == GRETL_UNRECOGNIZED) {
3477 /* check for database types */
3478 if (!strcmp(ext, ".bin")) {
3479 ftype = GRETL_NATIVE_DB;
3480 } else if (!strcmp(ext, ".rat")) {
3481 ftype = GRETL_RATS_DB;
3482 } else if (!strcmp(ext, ".bn7")) {
3483 ftype = GRETL_PCGIVE_DB;
3484 }
3485 }
3486 }
3487 if (ftype != GRETL_UNRECOGNIZED) {
3488 /* We got a type from the extension, but can we find
3489 the file "as is"? If so, we're done.
3490 */
3491 if (gretl_test_fopen(fname, "r") == 0) {
3492 return ftype;
3493 }
3494 }
3495 }
3496
3497 if ((opt & OPT_P) && gretl_addpath(fname, 0) != NULL) {
3498 ext = get_filename_extension(fname);
3499 if (ext != NULL) {
3500 /* check again for known data file types */
3501 ftype = data_file_type_from_extension(ext);
3502 }
3503 }
3504
3505 if (ftype == GRETL_UNRECOGNIZED) {
3506 /* last gasp */
3507 if (gretl_is_xml_file(fname)) {
3508 ftype = GRETL_XML_DATA;
3509 } else if (has_suffix(fname, ".dat") && is_jmulti_datafile(fname)) {
3510 ftype = GRETL_JMULTI;
3511 } else {
3512 /* default to assuming plain text data */
3513 ftype = GRETL_CSV;
3514 }
3515 }
3516
3517 return ftype;
3518 }
3519
3520 /**
3521 * check_atof:
3522 * @numstr: string to check.
3523 *
3524 * Returns: 0 if @numstr is blank, or is a valid string representation
3525 * of a floating point number, else 1.
3526 */
3527
check_atof(const char * numstr)3528 int check_atof (const char *numstr)
3529 {
3530 char *test;
3531
3532 /* accept blank entries */
3533 if (*numstr == '\0') return 0;
3534
3535 errno = 0;
3536
3537 strtod(numstr, &test);
3538
3539 if (*test == '\0' && errno != ERANGE) return 0;
3540
3541 if (!strcmp(numstr, test)) {
3542 gretl_errmsg_sprintf(_("'%s' -- no numeric conversion performed!"), numstr);
3543 return 1;
3544 }
3545
3546 if (*test != '\0') {
3547 if (isprint(*test)) {
3548 gretl_errmsg_sprintf(_("Extraneous character '%c' in data"), *test);
3549 } else {
3550 gretl_errmsg_sprintf(_("Extraneous character (0x%x) in data"), *test);
3551 }
3552 return 1;
3553 }
3554
3555 if (errno == ERANGE) {
3556 gretl_errmsg_sprintf(_("'%s' -- number out of range!"), numstr);
3557 }
3558
3559 return 1;
3560 }
3561
3562 /**
3563 * check_atoi:
3564 * @numstr: string to check.
3565 *
3566 * Returns: 0 if @numstr is blank, or is a valid string representation
3567 * of an int, else 1.
3568 */
3569
check_atoi(const char * numstr)3570 int check_atoi (const char *numstr)
3571 {
3572 long int val;
3573 char *test;
3574
3575 /* accept blank entries */
3576 if (*numstr == '\0') return 0;
3577
3578 errno = 0;
3579
3580 val = strtol(numstr, &test, 10);
3581
3582 if (*test == '\0' && errno != ERANGE) return 0;
3583
3584 if (!strcmp(numstr, test)) {
3585 gretl_errmsg_sprintf(_("'%s' -- no numeric conversion performed!"), numstr);
3586 return 1;
3587 }
3588
3589 if (*test != '\0') {
3590 if (isprint(*test)) {
3591 gretl_errmsg_sprintf(_("Extraneous character '%c' in data"), *test);
3592 } else {
3593 gretl_errmsg_sprintf(_("Extraneous character (0x%x) in data"), *test);
3594 }
3595 return 1;
3596 }
3597
3598 if (errno == ERANGE || val <= INT_MIN || val >= INT_MAX) {
3599 gretl_errmsg_sprintf(_("'%s' -- number out of range!"), numstr);
3600 }
3601
3602 return 1;
3603 }
3604
transpose_varname_used(const char * vname,DATASET * dinfo,int imax)3605 static int transpose_varname_used (const char *vname,
3606 DATASET *dinfo,
3607 int imax)
3608 {
3609 int i;
3610
3611 for (i=0; i<imax; i++) {
3612 if (!strcmp(vname, dinfo->varname[i])) {
3613 return 1;
3614 }
3615 }
3616
3617 return 0;
3618 }
3619
3620 /**
3621 * transpose_data:
3622 * @dset: pointer to dataset information struct.
3623 *
3624 * Attempts to transpose the current dataset, so that each
3625 * variable becomes interpreted as an observation and each
3626 * observation as a variable.
3627 *
3628 * Returns: 0 on success, non-zero error code on error.
3629 */
3630
transpose_data(DATASET * dset)3631 int transpose_data (DATASET *dset)
3632 {
3633 DATASET *tset;
3634 int k = dset->n + 1;
3635 int T = dset->v - 1;
3636 int i, t;
3637
3638 tset = create_new_dataset(k, T, 0);
3639 if (tset == NULL) {
3640 return E_ALLOC;
3641 }
3642
3643 for (i=1; i<dset->v; i++) {
3644 for (t=0; t<dset->n; t++) {
3645 tset->Z[t+1][i-1] = dset->Z[i][t];
3646 }
3647 }
3648
3649 for (t=0; t<dset->n; t++) {
3650 int k = t + 1;
3651 char *targ = tset->varname[k];
3652
3653 if (dset->S != NULL && dset->S[t][0] != '\0') {
3654 int err;
3655
3656 *targ = '\0';
3657 strncat(targ, dset->S[t], VNAMELEN - 1);
3658 gretl_charsub(targ, ' ', '_');
3659 err = check_varname(targ);
3660 if (err) {
3661 sprintf(targ, "v%d", k);
3662 gretl_error_clear();
3663 } else if (transpose_varname_used(targ, tset, k)) {
3664 sprintf(targ, "v%d", k);
3665 }
3666 } else {
3667 sprintf(targ, "v%d", k);
3668 }
3669 }
3670
3671 free_Z(dset);
3672 dset->Z = tset->Z;
3673
3674 clear_datainfo(dset, CLEAR_FULL);
3675
3676 dset->v = k;
3677 dset->n = T;
3678 dset->t1 = 0;
3679 dset->t2 = dset->n - 1;
3680
3681 dset->varname = tset->varname;
3682 dset->varinfo = tset->varinfo;
3683
3684 dataset_obs_info_default(dset);
3685
3686 free(tset);
3687
3688 return 0;
3689 }
3690
dataset_set_regular_markers(DATASET * dset)3691 void dataset_set_regular_markers (DATASET *dset)
3692 {
3693 dset->markers = REGULAR_MARKERS;
3694 }
3695
3696 struct filetype_info {
3697 GretlFileType type;
3698 const char *src;
3699 };
3700
3701 /**
3702 * dataset_add_import_info:
3703 * @dset: pointer to dataset information struct.
3704 * @fname: the name of a file from which data have been imported.
3705 * @type: code representing the type of the file identified by
3706 * @fname.
3707 *
3708 * On successful import of data from some "foreign" format,
3709 * add a note to the "descrip" member of the new dataset
3710 * saying where it came from and when.
3711 */
3712
dataset_add_import_info(DATASET * dset,const char * fname,GretlFileType type)3713 void dataset_add_import_info (DATASET *dset, const char *fname,
3714 GretlFileType type)
3715 {
3716 struct filetype_info ftypes[] = {
3717 { GRETL_CSV, "CSV" },
3718 { GRETL_GNUMERIC, "Gnumeric" },
3719 { GRETL_XLS, "Excel" },
3720 { GRETL_XLSX, "Excel" },
3721 { GRETL_ODS, "Open Document" },
3722 { GRETL_WF1, "Eviews" },
3723 { GRETL_DTA, "Stata" },
3724 { GRETL_SAV, "SPSS" },
3725 { GRETL_SAS, "SAS" },
3726 { GRETL_JMULTI, "JMulTi" }
3727 };
3728 int i, nt = sizeof ftypes / sizeof ftypes[0];
3729 const char *src = NULL;
3730 gchar *note = NULL;
3731 char tstr[48];
3732
3733 for (i=0; i<nt; i++) {
3734 if (type == ftypes[i].type) {
3735 src = ftypes[i].src;
3736 break;
3737 }
3738 }
3739
3740 if (src == NULL) {
3741 return;
3742 }
3743
3744 print_time(tstr);
3745
3746 if (g_utf8_validate(fname, -1, NULL)) {
3747 const char *p = strrslash(fname);
3748
3749 if (p != NULL) {
3750 fname = p + 1;
3751 }
3752 note = g_strdup_printf(_("Data imported from %s file '%s', %s\n"),
3753 src, fname, tstr);
3754 } else {
3755 note = g_strdup_printf(_("Data imported from %s, %s\n"),
3756 src, tstr);
3757 }
3758
3759 if (note != NULL) {
3760 if (dset->descrip == NULL) {
3761 dset->descrip = gretl_strdup(note);
3762 } else {
3763 int dlen = strlen(dset->descrip);
3764 int nlen = strlen(note);
3765 char *tmp = realloc(dset->descrip, dlen + nlen + 5);
3766
3767 if (tmp != NULL) {
3768 dset->descrip = tmp;
3769 strcat(dset->descrip, "\n\n");
3770 strncat(dset->descrip, note, nlen);
3771 }
3772 }
3773 g_free(note);
3774 }
3775 }
3776
is_weekend(int t,int pd,int sat0,int sun0)3777 static int is_weekend (int t, int pd, int sat0, int sun0)
3778 {
3779 int sat = 0, sun = 0;
3780
3781 /* this is intended to identify weekend days for
3782 both 7- and 6-day data */
3783
3784 if (sat0 >= 0) {
3785 sat = (t - sat0) % pd == 0;
3786 }
3787
3788 if (!sat && sun0 >= 0) {
3789 sun = (t - sun0) % pd == 0;
3790 }
3791
3792 return sat || sun;
3793 }
3794
3795 /* Scan imported daily data for missing values, so as to
3796 be able to offer the user some options.
3797
3798 Return values:
3799
3800 0 : no missing values
3801 1 : all weekend data are missing (or possibly just all
3802 Saturdays, or just all Sundays), but no weekday
3803 data missing
3804 2 : as with 1, but also some weekdays missing
3805 3 : scattering of weekend and/or weekday data missing
3806 */
3807
analyse_daily_import(const DATASET * dset,PRN * prn)3808 int analyse_daily_import (const DATASET *dset, PRN *prn)
3809 {
3810 int all_weekends_blank = 0;
3811 int blank_weekends = 0;
3812 int blank_weekdays = 0;
3813 int n_weekdays = 0;
3814 int n_weekend_days = 0;
3815 int sat0 = -1, sun0 = -1;
3816 int i, t, pd = dset->pd;
3817 int all_missing, weekend;
3818 int ret = 0;
3819
3820 if (pd > 5) {
3821 char datestr[OBSLEN];
3822 int wkday;
3823
3824 /* start by finding first Sat and/or Sun */
3825 for (t=0; t<dset->n; t++) {
3826 ntolabel(datestr, t, dset);
3827 wkday = weekday_from_date(datestr);
3828 if (wkday == 6 && sat0 < 0) {
3829 sat0 = t;
3830 } else if (wkday == 0 && sun0 < 0) {
3831 sun0 = t;
3832 }
3833 if (sat0 >= 0 && sun0 >= 0) {
3834 break;
3835 } else if (dset->pd == 6 && (sat0 >= 0 || sun0 >= 0)) {
3836 break;
3837 }
3838 }
3839 all_weekends_blank = 1; /* may be revised below */
3840 } else {
3841 /* there are no weekend days in 5-day data */
3842 weekend = 0;
3843 }
3844
3845 for (t=0; t<dset->n; t++) {
3846 if (pd > 5) {
3847 weekend = is_weekend(t, pd, sat0, sun0);
3848 }
3849 all_missing = 1;
3850 for (i=1; i<dset->v; i++) {
3851 if (!na(dset->Z[i][t])) {
3852 all_missing = 0;
3853 break;
3854 }
3855 }
3856 if (weekend) {
3857 n_weekend_days++;
3858 if (!all_missing) {
3859 /* not all weekend data are missing */
3860 all_weekends_blank = 0;
3861 }
3862 } else {
3863 n_weekdays++;
3864 }
3865 if (all_missing) {
3866 if (weekend) {
3867 blank_weekends++;
3868 } else {
3869 blank_weekdays++;
3870 }
3871 }
3872 }
3873
3874 if (all_weekends_blank) {
3875 double misspc = 100.0 * blank_weekdays / (double) n_weekdays;
3876
3877 if (pd == 7) {
3878 pputs(prn, "This dataset is on 7-day calendar, but weekends are blank.");
3879 } else {
3880 pprintf(prn, "This dataset is on 6-day calendar, but %s are blank.",
3881 sat0 >= 0 ? "Sundays" : "Saturdays");
3882 }
3883 ret = 1;
3884 if (misspc > 0.0) {
3885 pputc(prn, '\n');
3886 pputs(prn, "In addition, ");
3887 if (misspc >= 0.01) {
3888 pprintf(prn, "%.2f percent of weekday observations are missing.",
3889 misspc);
3890 } else {
3891 pprintf(prn, "%g percent of weekday observations are missing.",
3892 misspc);
3893 }
3894 if (misspc < 10.0) {
3895 ret = 2;
3896 }
3897 }
3898 } else if (blank_weekdays || blank_weekends) {
3899 int ndays = n_weekdays + n_weekend_days;
3900 int nmiss = blank_weekdays + blank_weekends;
3901 double misspc = 100.0 * nmiss / (double) ndays;
3902
3903 if (misspc >= 0.01) {
3904 pprintf(prn, "%.2f percent of daily observations are missing.",
3905 misspc);
3906 } else {
3907 pprintf(prn, "%g percent of daily observations are missing.",
3908 misspc);
3909 }
3910 if (misspc < 10) {
3911 ret = 3;
3912 }
3913 }
3914
3915 return ret;
3916 }
3917