1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 /* dbread.c for gretl */
21 
22 #include "libgretl.h"
23 #include "swap_bytes.h"
24 #include "libset.h"
25 #include "uservar.h"
26 #include "matrix_extra.h"
27 #include "usermat.h"
28 #include "dbread.h"
29 #include "gretl_midas.h"
30 #include "gretl_typemap.h"
31 #ifdef USE_CURL
32 # include "gretl_www.h"
33 #endif
34 
35 #include <glib.h>
36 #include <unistd.h>
37 #include <errno.h>
38 
39 #if G_BYTE_ORDER == G_BIG_ENDIAN
40 # include <netinet/in.h>
41 #endif
42 
43 /**
44  * SECTION:dbread
45  * @short_description: reading from databases
46  * @title: DB read
47  * @include: gretl/libgretl.h, gretl/dbread.h
48  *
49  * Functions that read data from native gretl databases as
50  * well as RATS 4.0 and PcGive databases. As you will see,
51  * this area is mostly undocumented at present, but since it
52  * may ultimately be useful for third-party coders we will
53  * try to remedy this!
54  */
55 
56 #define DB_DEBUG 0
57 
58 #define RECNUM gint32
59 #define NAMELENGTH 16
60 #define RATSCOMMENTLENGTH 80
61 #define RATSCOMMENTS 2
62 #define RATS_PARSE_ERROR -999
63 
64 typedef struct {
65     gint32 daynumber;              /* Number of days from 1-1-90
66 				      to year, month, day */
67     short panel;                   /* 1 for panel set, 2 for intraday
68 				      date set , 0 o.w. */
69 #define LINEAR    0                /* Single time direction */
70 #define RATSPANEL 1                /* panel:period */
71 #define INTRADAY  2                /* date:intraday period */
72     gint32 panelrecord;            /* Size of panel or
73 				      number of periods per day */
74     short dclass;                  /* See definitions below */
75 #define UNDATEDCLASS   0           /* No time series properties */
76 #define IRREGULARCLASS 1           /* Time series (irregular) */
77 #define PERYEARCLASS   2           /* x periods / year */
78 #define PERWEEKCLASS   3           /* x periods / week */
79 #define DAILYCLASS     4           /* x days / period */
80     gint32 info;                   /* Number of periods per year or
81 				      per week */
82     short digits;                  /* Digits for representing panel
83 				      or intraday period */
84     short year,month,day;          /* Starting year, month, day */
85 } DATEINFO;
86 
87 typedef struct {
88     RECNUM back_point;             /* Pointer to previous series */
89     RECNUM forward_point;          /* Pointer to next series */
90     short back_class;              /* Reserved.  Should be 0 */
91     short forward_class;           /* Reserved.  Should be 0 */
92     RECNUM first_data;             /* First data record */
93     char series_name[NAMELENGTH];  /* Series name */
94     DATEINFO date_info;            /* Dating scheme for this series */
95     gint32 datapoints;             /* Number of data points */
96     short data_type;               /* real, char, complex.
97                                       Reserved.  Should be 0 */
98     short digits;                  /* . + digit count for representation
99 				      (0 = unspecified) */
100     short misc1;                   /* For future expansion should be 0 */
101     short misc2;
102     short comment_lines;           /* Number of comment lines (0,1,2) */
103     char series_class[NAMELENGTH]; /* Series class. Not used, blank */
104     char comments[RATSCOMMENTS][RATSCOMMENTLENGTH];
105     char pad[10];
106 } RATSDirect;
107 
108 typedef struct {
109     RECNUM back_point;             /* Previous record (0 for first) */
110     RECNUM forward_point;          /* Next record (0 for last) */
111     double data[31];               /* Data */
112 } RATSData;
113 
114 static char saved_db_name[MAXLEN];
115 static int saved_db_type;
116 
117 #if G_BYTE_ORDER == G_BIG_ENDIAN
retrieve_float(netfloat nf)118 float retrieve_float (netfloat nf)
119 {
120     short exp = ntohs(nf.exp);
121     long frac = ntohl(nf.frac);
122     double receive = frac / 10e6;
123 
124     return ldexp(receive, exp);
125 }
126 #endif
127 
128 static int lib_add_db_data (double **dbZ, SERIESINFO *sinfo,
129 			    DATASET *dset, CompactMethod cmethod,
130 			    int dbv, PRN *prn);
131 
132 static int do_compact_spread (DATASET *dset, int newpd);
133 
open_binfile(const char * dbbase,int code,int offset,int * err)134 static FILE *open_binfile (const char *dbbase, int code, int offset, int *err)
135 {
136     char dbbin[MAXLEN];
137     FILE *fp = NULL;
138 
139     strcpy(dbbin, dbbase);
140     if (code == GRETL_NATIVE_DB) {
141 	if (strstr(dbbin, ".bin") == NULL) {
142 	    strcat(dbbin, ".bin");
143 	}
144     } else {
145 	if (strstr(dbbin, ".bn7") == NULL) {
146 	    strcat(dbbin, ".bn7");
147 	}
148     }
149 
150     fp = gretl_fopen(dbbin, "rb");
151 
152     if (fp == NULL) {
153 	*err = E_FOPEN;
154     } else if (fseek(fp, (long) offset, SEEK_SET)) {
155 	*err = DB_PARSE_ERROR;
156 	fclose(fp);
157 	fp = NULL;
158     }
159 
160     return fp;
161 }
162 
163 /**
164  * get_native_db_data:
165  * @dbbase:
166  * @sinfo:
167  * @Z: data array.
168  *
169  * Returns: 0 on success, non-zero code on failure.
170  */
171 
get_native_db_data(const char * dbbase,SERIESINFO * sinfo,double ** Z)172 int get_native_db_data (const char *dbbase, SERIESINFO *sinfo,
173 			double **Z)
174 {
175     char numstr[32];
176     FILE *fp;
177     dbnumber x;
178     int v = sinfo->v;
179     int t, t2, err = 0;
180 
181     fp = open_binfile(dbbase, GRETL_NATIVE_DB, sinfo->offset, &err);
182     if (err) {
183 	return err;
184     }
185 
186     t2 = (sinfo->t2 > 0)? sinfo->t2 : sinfo->nobs - 1;
187 
188     for (t=sinfo->t1; t<=t2 && !err; t++) {
189 	if (fread(&x, sizeof x, 1, fp) != 1) {
190 	    err = DB_PARSE_ERROR;
191 	} else {
192 	    sprintf(numstr, "%.7g", (double) x); /* N.B. converting a float */
193 	    Z[v][t] = atof(numstr);
194 	    if (Z[v][t] == DBNA) {
195 		Z[v][t] = NADBL;
196 	    }
197 	}
198     }
199 
200     fclose(fp);
201 
202     return err;
203 }
204 
205 #ifdef USE_CURL
206 
207 /**
208  * get_remote_db_data:
209  * @dbbase:
210  * @sinfo:
211  * @Z: data array.
212  *
213  * Returns: 0 on success, non-zero code on failure.
214  */
215 
get_remote_db_data(const char * dbbase,SERIESINFO * sinfo,double ** Z)216 int get_remote_db_data (const char *dbbase, SERIESINFO *sinfo,
217 			double **Z)
218 {
219     char *getbuf = NULL;
220     int t, t2, err;
221     int v = sinfo->v;
222     dbnumber x;
223     size_t offset;
224 #if G_BYTE_ORDER == G_BIG_ENDIAN
225     netfloat nf;
226 #endif
227 
228 #if G_BYTE_ORDER == G_BIG_ENDIAN
229     err = retrieve_remote_db_data(dbbase, sinfo->varname, &getbuf,
230 				  GRAB_NBO_DATA);
231 #else
232     err = retrieve_remote_db_data(dbbase, sinfo->varname, &getbuf,
233 				  GRAB_DATA);
234 #endif
235 
236     if (err) {
237 	free(getbuf);
238 	return E_FOPEN;
239     }
240 
241     t2 = (sinfo->t2 > 0)? sinfo->t2 : sinfo->nobs - 1;
242 
243     offset = 0L;
244     for (t=sinfo->t1; t<=t2; t++) {
245 #if G_BYTE_ORDER == G_BIG_ENDIAN
246 	/* go via network byte order */
247 	memcpy(&(nf.frac), getbuf + offset, sizeof nf.frac);
248 	offset += sizeof nf.frac;
249 	memcpy(&(nf.exp), getbuf + offset, sizeof nf.exp);
250 	offset += sizeof nf.exp;
251 	x = retrieve_float(nf);
252 #else
253 	/* just read floats */
254 	memcpy(&x, getbuf + offset, sizeof x);
255 	offset += sizeof x;
256 #endif
257 	Z[v][t] = (x == DBNA)? NADBL : x;
258     }
259 
260     free(getbuf);
261 
262     return 0;
263 }
264 
265 #endif /* USE_CURL */
266 
267 /**
268  * get_pcgive_db_data:
269  * @dbbase:
270  * @sinfo:
271  * @Z: data array.
272  *
273  *
274  * Returns: 0 on success, non-zero code on failure.
275  */
276 
get_pcgive_db_data(const char * dbbase,SERIESINFO * sinfo,double ** Z)277 int get_pcgive_db_data (const char *dbbase, SERIESINFO *sinfo,
278 			double **Z)
279 {
280     FILE *fp;
281     double x;
282     int v = sinfo->v;
283     int t, t2, err = 0;
284 
285     fp = open_binfile(dbbase, GRETL_PCGIVE_DB, sinfo->offset, &err);
286     if (err) {
287 	return err;
288     }
289 
290     t2 = (sinfo->t2 > 0)? sinfo->t2 : sinfo->nobs - 1;
291 
292     for (t=sinfo->t1; t<=t2; t++) {
293 	if (fread(&x, sizeof x, 1, fp) != 1) {
294 	    err = E_DATA;
295 	    break;
296 	}
297 #if G_BYTE_ORDER == G_BIG_ENDIAN
298 	reverse_double(x);
299 #endif
300 	if (x == -9999.99 || isnan(x)) {
301 	    Z[v][t] = NADBL;
302 	    err = DB_MISSING_DATA;
303 	} else {
304 	    Z[v][t] = x;
305 	}
306     }
307 
308     fclose(fp);
309 
310     return err;
311 }
312 
get_native_series_comment(SERIESINFO * sinfo,const char * s)313 static void get_native_series_comment (SERIESINFO *sinfo, const char *s)
314 {
315     s += strcspn(s, " "); /* skip varname */
316     s += strspn(s, " ");  /* skip space */
317 
318     series_info_set_description(sinfo, s);
319     tailstrip(sinfo->descrip);
320 }
321 
get_native_series_pd(SERIESINFO * sinfo,char pdc)322 static int get_native_series_pd (SERIESINFO *sinfo, char pdc)
323 {
324     sinfo->pd = 1;
325     sinfo->undated = 0;
326 
327     if (pdc == 'M') sinfo->pd = 12;
328     else if (pdc == 'Q') sinfo->pd = 4;
329     else if (pdc == 'B') sinfo->pd = 5;
330     else if (pdc == 'S') sinfo->pd = 6;
331     else if (pdc == 'D') sinfo->pd = 7;
332     else if (pdc == 'U') sinfo->undated = 1;
333     else return 1;
334 
335     return 0;
336 }
337 
get_native_series_obs(SERIESINFO * sinfo,const char * stobs,const char * endobs)338 static int get_native_series_obs (SERIESINFO *sinfo,
339 				  const char *stobs,
340 				  const char *endobs)
341 {
342     char dc = 0;
343 
344     if (strchr(stobs, '-')) {
345 	dc = '-';
346     } else if (strchr(stobs, '/')) {
347 	dc = '/';
348     }
349 
350     if (dc != 0) {
351 	/* calendar data */
352 	const char *q = stobs;
353 	const char *p = strchr(stobs, dc);
354 
355 	if (p - q == 4) {
356 	    strcpy(sinfo->stobs, q);
357 	}
358 	q = endobs;
359 	p = strchr(endobs, dc);
360 	if (p && p - q == 4) {
361 	    strcpy(sinfo->endobs, q);
362 	}
363     } else {
364 	*sinfo->stobs = '\0';
365 	*sinfo->endobs = '\0';
366 	strncat(sinfo->stobs, stobs, OBSLEN-1);
367 	strncat(sinfo->endobs, endobs, OBSLEN-1);
368     }
369 
370     return 0;
371 }
372 
373 static int
open_native_db_files(const char * dname,FILE ** f1,char * name1,FILE ** f2,char * name2)374 open_native_db_files (const char *dname, FILE **f1, char *name1,
375 		      FILE **f2, char *name2)
376 {
377     char dbbase[FILENAME_MAX];
378     char fname[FILENAME_MAX];
379     FILE *fidx = NULL, *fbin = NULL;
380     int err = 0;
381 
382     if (dname != NULL) {
383 	strcpy(dbbase, dname);
384     } else {
385 	strcpy(dbbase, saved_db_name);
386     }
387 
388     if (has_suffix(dbbase, ".bin")) {
389 	dbbase[strlen(dbbase) - 4] = '\0';
390     }
391 
392     if (f1 != NULL) {
393 	strcpy(fname, dbbase);
394 	strcat(fname, ".idx");
395 
396 	if (name1 != NULL) {
397 	    err = gretl_write_access(fname);
398 	    if (!err) {
399 		strcpy(name1, fname);
400 	    }
401 	}
402 
403 	if (!err) {
404 	    fidx = gretl_fopen(fname, "r");
405 	    if (fidx == NULL) {
406 		gretl_errmsg_set(_("Couldn't open database index file"));
407 		err = E_FOPEN;
408 	    }
409 	}
410     }
411 
412     if (f2 != NULL && !err) {
413 	strcpy(fname, dbbase);
414 	strcat(fname, ".bin");
415 
416 	if (name2 != NULL) {
417 	    err = gretl_write_access(fname);
418 	    if (!err) {
419 		strcpy(name2, fname);
420 	    }
421 	}
422 
423 	if (!err) {
424 	    fbin = gretl_fopen(fname, "rb");
425 	    if (fbin == NULL) {
426 		gretl_errmsg_set(_("Couldn't open database binary file"));
427 		err = E_FOPEN;
428 	    }
429 	}
430     }
431 
432     if (err) {
433 	if (fidx != NULL) {
434 	    fclose(fidx);
435 	}
436     } else {
437 	if (f1 != NULL) {
438 	    *f1 = fidx;
439 	}
440 	if (f2 != NULL) {
441 	    *f2 = fbin;
442 	}
443     }
444 
445     return err;
446 }
447 
native_db_index_name(void)448 static char *native_db_index_name (void)
449 {
450     char *fname;
451 
452     if (has_suffix(saved_db_name, ".bin")) {
453 	fname = g_strdup(saved_db_name);
454 	strcpy(fname + strlen(fname) - 3, "idx");
455     } else {
456 	fname = g_strdup_printf("%s.idx", saved_db_name);
457     }
458 
459     return fname;
460 }
461 
db_match_glob(FILE * fp,char * line,int linelen,GPatternSpec * pspec,char ** S,int * err)462 static int db_match_glob (FILE *fp,
463 			  char *line, int linelen,
464 			  GPatternSpec *pspec,
465 			  char **S, int *err)
466 {
467     char vname[VNAMELEN], l2[72];
468     int n = 0;
469 
470     while (fgets(line, linelen, fp) && !*err) {
471 	if (*line == '#') {
472 	    continue;
473 	}
474 	if (gretl_scan_varname(line, vname) != 1) {
475 	    break;
476 	}
477 	if (g_pattern_match_string(pspec, vname)) {
478 	    if (S != NULL) {
479 		S[n] = gretl_strdup(vname);
480 	    }
481 	    n++;
482 	}
483 	if (fgets(l2, sizeof l2, fp) == NULL) {
484 	    *err = DB_PARSE_ERROR;
485 	}
486     }
487 
488     return n;
489 }
490 
native_db_match_series(const char * glob,int * nmatch,const char * idxname,int * err)491 static char **native_db_match_series (const char *glob, int *nmatch,
492 				      const char *idxname, int *err)
493 {
494     GPatternSpec *pspec;
495     char **S = NULL;
496     FILE *fp = NULL;
497     char line[256];
498 
499     fp = gretl_fopen(idxname, "rb");
500     if (fp == NULL) {
501 	*err = E_FOPEN;
502 	*nmatch = 0;
503 	return NULL;
504     }
505 
506     pspec = g_pattern_spec_new(glob);
507 
508     *nmatch = db_match_glob(fp, line, sizeof line, pspec, NULL, err);
509 
510     if (!*err && *nmatch > 0) {
511 	S = strings_array_new(*nmatch);
512 	if (S == NULL) {
513 	    *nmatch = 0;
514 	    *err = E_ALLOC;
515 	} else {
516 	    rewind(fp);
517 	    db_match_glob(fp, line, sizeof line, pspec, S, err);
518 	}
519     }
520 
521     g_pattern_spec_free(pspec);
522     fclose(fp);
523 
524     return S;
525 }
526 
get_native_series_info(const char * series,SERIESINFO * sinfo,const char * idxname)527 static int get_native_series_info (const char *series,
528 				   SERIESINFO *sinfo,
529 				   const char *idxname)
530 {
531     FILE *fp = NULL;
532     char sername[VNAMELEN];
533     /* 2019-01-08: enlarge @s1 from 256 to 1024 */
534     char s1[1024], s2[72];
535     char stobs[OBSLEN], endobs[OBSLEN];
536     char pdc;
537     int offset = 0;
538     int gotit = 0, err = 0;
539     int n;
540 
541     fp = gretl_fopen(idxname, "rb");
542     if (fp == NULL) {
543 	return E_FOPEN;
544     }
545 
546     while (fgets(s1, sizeof s1, fp) && !gotit) {
547 	if (*s1 == '#') {
548 	    continue;
549 	}
550 	if (gretl_scan_varname(s1, sername) != 1) {
551 	    break;
552 	}
553 	if (!strcmp(series, sername)) {
554 	    gotit = 1;
555 	    strcpy(sinfo->varname, sername);
556 	}
557 	if (fgets(s2, sizeof s2, fp) == NULL) {
558 	    err = DB_PARSE_ERROR;
559 	    break;
560 	}
561 	if (gotit) {
562 	    get_native_series_comment(sinfo, s1);
563 	    if (sscanf(s2, "%c %10s %*s %10s %*s %*s %d",
564 		       &pdc, stobs, endobs, &sinfo->nobs) != 4) {
565 		gretl_errmsg_set(_("Failed to parse series information"));
566 		err = DB_PARSE_ERROR;
567 	    } else {
568 		get_native_series_pd(sinfo, pdc);
569 		get_native_series_obs(sinfo, stobs, endobs);
570 		sinfo->offset = offset;
571 		sinfo->t2 = sinfo->nobs - 1;
572 	    }
573 	} else {
574 	    if (sscanf(s2, "%*c %*s %*s %*s %*s %*s %d", &n) != 1) {
575 		gretl_errmsg_set(_("Failed to parse series information"));
576 		err = DB_PARSE_ERROR;
577 	    } else {
578 		offset += n * sizeof(dbnumber);
579 	    }
580 	}
581     }
582 
583     fclose(fp);
584 
585     if (!err && !gotit) {
586 	gretl_errmsg_sprintf(_("Series not found, '%s'"), series);
587 	err = DB_NO_SUCH_SERIES;
588     }
589 
590     return err;
591 }
592 
593 #ifdef USE_CURL
594 
remote_db_index_to_file(const char * fname)595 static int remote_db_index_to_file (const char *fname)
596 {
597     char *buf = NULL;
598     int err;
599 
600     err = retrieve_remote_db_index(saved_db_name, &buf);
601 
602     if (!err) {
603 	FILE *fp = gretl_fopen(fname, "wb");
604 
605 	if (fp == NULL) {
606 	    err = E_FOPEN;
607 	} else {
608 	    fputs(buf, fp);
609 	    fclose(fp);
610 #if 1
611 	    fprintf(stderr, "remote db index saved\n");
612 #endif
613 	}
614 	free(buf);
615     }
616 
617     return err;
618 }
619 
620 #endif /* USE_CURL */
621 
in7_get_obs(int y0,int p0,int y1,int p1,SERIESINFO * sinfo)622 static int in7_get_obs (int y0, int p0, int y1, int p1,
623 			SERIESINFO *sinfo)
624 {
625     int pd = sinfo->pd;
626     int n = (y1 - y0 + 1) * pd - (p0 - 1) - (pd - p1);
627     int err = 0;
628 
629     if (n <= 0) {
630 	err = 1;
631     } else {
632 	sinfo->nobs = n;
633 	sinfo->t2 = n - 1;
634     }
635 
636     return err;
637 }
638 
639 static int
pcgive_set_stobs_endobs(int y0,int p0,int y1,int p1,SERIESINFO * sinfo)640 pcgive_set_stobs_endobs (int y0, int p0, int y1, int p1,
641 			 SERIESINFO *sinfo)
642 {
643     int err = 0;
644 
645     if (sinfo->pd == 1) {
646 	sprintf(sinfo->stobs, "%d", y0);
647 	sprintf(sinfo->endobs, "%d", y1);
648 	if (y0 == 1) {
649 	    sinfo->undated = 1;
650 	}
651     } else if (sinfo->pd == 4) {
652 	sprintf(sinfo->stobs, "%d:%d", y0, p0);
653 	sprintf(sinfo->endobs, "%d:%d", y1, p1);
654     } else if (sinfo->pd == 12 || sinfo->pd == 52) {
655 	sprintf(sinfo->stobs, "%d:%02d", y0, p0);
656 	sprintf(sinfo->endobs, "%d:%02d", y1, p1);
657     } else {
658 	err = E_DATA; /* FIXME? */
659     }
660 
661     return err;
662 }
663 
664 static int
get_pcgive_series_info(const char * series,SERIESINFO * sinfo)665 get_pcgive_series_info (const char *series, SERIESINFO *sinfo)
666 {
667     FILE *fp;
668     char dbidx[MAXLEN];
669     char line[1024];
670     char fmt[24];
671     char *p;
672     int y0, p0, y1, p1;
673     int nf, gotit = 0;
674     int err = 0;
675 
676     strcpy(dbidx, saved_db_name);
677     p = strstr(dbidx, ".bn7");
678     if (p != NULL) {
679 	strcpy(p, ".in7");
680     } else {
681 	strcat(dbidx, ".in7");
682     }
683 
684 #if DB_DEBUG
685     fprintf(stderr, "get_pcgive_series_info: dbidx = '%s'\n", dbidx);
686 #endif
687 
688     fp = gretl_fopen(dbidx, "r");
689     if (fp == NULL) {
690 	gretl_errmsg_set(_("Couldn't open database index file"));
691 	return E_FOPEN;
692     }
693 
694     sprintf(fmt, "%%%ds %%d %%d %%d %%d %%d %%d", VNAMELEN - 1);
695 
696     while (fgets(line, sizeof line, fp) && !gotit) {
697 	if (*line == '>') {
698 	    *sinfo->varname = 0;
699 	    nf = sscanf(line + 1, fmt, sinfo->varname, &y0, &p0,
700 			&y1, &p1, &sinfo->pd, &sinfo->offset);
701 	    fprintf(stderr, "in7: varname='%s'\n", sinfo->varname);
702 	    if (!strcmp(sinfo->varname, series)) {
703 		gotit = 1;
704 	    } else {
705 		continue;
706 	    }
707 	    if (nf == 7 && y0 > 0 && p0 > 0 && y1 > 0 && p1 > 0 &&
708 		sinfo->pd >= 1 && sinfo->offset > 0) {
709 		while (fgets(line, sizeof line, fp)) {
710 		    if (*line == ';') {
711 			gretl_strstrip(line);
712 			series_info_set_description(sinfo, line + 1);
713 		    } else {
714 			break;
715 		    }
716 		}
717 		/* transcribe info */
718 		err = in7_get_obs(y0, p0, y1, p1, sinfo);
719 		if (!err) {
720 		    err = pcgive_set_stobs_endobs(y0, p0, y1, p1, sinfo);
721 		}
722 	    } else {
723 		err = E_DATA;
724 	    }
725 	}
726     }
727 
728     fclose(fp);
729 
730     if (!err && !gotit) {
731 	gretl_errmsg_sprintf(_("Series not found, '%s'"), series);
732 	err = DB_NO_SUCH_SERIES;
733     }
734 
735     return err;
736 }
737 
738 /* Figure the ending observation date of a series */
739 
get_endobs(char * datestr,int startyr,int startfrac,int pd,int n)740 static int get_endobs (char *datestr, int startyr, int startfrac,
741 		       int pd, int n)
742 {
743     int endyr, endfrac;
744 
745     endyr = startyr + n / pd;
746     endfrac = startfrac - 1 + n % pd;
747 
748     if (endfrac >= pd) {
749 	endyr++;
750 	endfrac -= pd;
751     }
752 
753     if (endfrac == 0) {
754 	endyr--;
755 	endfrac = pd;
756     }
757 
758     if (pd == 1) {
759 	sprintf(datestr, "%d", endyr);
760     } else if (pd == 4) {
761 	sprintf(datestr, "%d.%d", endyr, endfrac);
762     } else if (pd == 12 || pd == 52) {
763 	sprintf(datestr, "%d.%02d", endyr, endfrac);
764     }
765 
766     return 0;
767 }
768 
dinfo_sanity_check(const DATEINFO * dinfo)769 static int dinfo_sanity_check (const DATEINFO *dinfo)
770 {
771     int err = 0;
772 
773     if (dinfo->info < 0 || dinfo->info > 365) {
774 	err = 1;
775     } else if (dinfo->day < 0 || dinfo->day > 365) {
776 	err = 1;
777     } else if (dinfo->year < 0 || dinfo->year > 3000) {
778 	err = 1;
779     } else if (dinfo->info == 52) {
780 	/* note: "month" = week */
781 	if (dinfo->month < 0 || dinfo->month > 52) {
782 	    err = 1;
783 	}
784     } else {
785 	/* annual, quarterly, monthly */
786 	if (dinfo->month < 0 || dinfo->month > 12) {
787 	    err = 1;
788 	}
789     }
790 
791     if (err) {
792 	gretl_errmsg_set(_("This is not a valid RATS 4.0 database"));
793 	fprintf(stderr, "rats database: failed dinfo_sanity_check:\n"
794 		" info=%d, year=%d, month=%d, day=%d\n",
795 		(int) dinfo->info, (int) dinfo->year, (int) dinfo->month,
796 		(int) dinfo->day);
797     }
798 
799     return err;
800 }
801 
dinfo_to_sinfo(const DATEINFO * dinfo,SERIESINFO * sinfo,const char * varname,const char * comment,int n,int offset)802 static int dinfo_to_sinfo (const DATEINFO *dinfo, SERIESINFO *sinfo,
803 			   const char *varname, const char *comment,
804 			   int n, int offset)
805 {
806     int startfrac = 0;
807     char pdstr[8] = {0};
808     int err = 0;
809 
810     if (dinfo_sanity_check(dinfo)) {
811 	return 1;
812     }
813 
814     sprintf(sinfo->stobs, "%d", dinfo->year);
815 
816     if (dinfo->info == 4) {
817 	sprintf(pdstr, ".%d", dinfo->month);
818 	if (dinfo->month == 1) {
819 	    startfrac = 1;
820 	} else if (dinfo->month > 1 && dinfo->month <= 4) {
821 	    startfrac = 2;
822 	} else if (dinfo->month > 4 && dinfo->month <= 7) {
823 	    startfrac = 3;
824 	} else {
825 	    startfrac = 4;
826 	}
827     } else if (dinfo->info == 12 || dinfo->info == 52) {
828 	sprintf(pdstr, ".%02d", dinfo->month);
829 	startfrac = dinfo->month;
830     } else if (dinfo->info == 1) {
831 	startfrac = 0;
832     } else {
833 	fprintf(stderr, "frequency (%d) does not seem to make sense\n",
834 		(int) dinfo->info);
835 	gretl_errmsg_sprintf(("frequency (%d) does not seem to make sense"),
836 			     (int) dinfo->info);
837 	err = 1;
838     }
839 
840     if (*pdstr) {
841 	strcat(sinfo->stobs, pdstr);
842     }
843 
844     get_endobs(sinfo->endobs, dinfo->year, startfrac, dinfo->info, n);
845 
846     sinfo->pd = dinfo->info;
847     sinfo->nobs = n;
848     sinfo->t2 = n - 1;
849     sinfo->offset = offset;
850 
851     strncat(sinfo->varname, varname, VNAMELEN - 1);
852     series_info_set_description(sinfo, comment);
853 
854 #if DB_DEBUG
855     fprintf(stderr, "dinfo_to_sinfo: '%s': set sinfo->offset = %d\n", varname,
856 	    (int) offset);
857 #endif
858 
859     return err;
860 }
861 
in7_to_sinfo(const char * varname,const char * comment,int y0,int p0,int y1,int p1,int pd,int offset,SERIESINFO * sinfo)862 static int in7_to_sinfo (const char *varname, const char *comment,
863 			 int y0, int p0, int y1, int p1, int pd,
864 			 int offset, SERIESINFO *sinfo)
865 {
866     int err = 0;
867 
868     if (pd == 4) {
869 	sprintf(sinfo->stobs, "%d.%d", y0, p0);
870 	sprintf(sinfo->endobs, "%d.%d", y1, p1);
871     } else if (pd == 12 || pd == 52) {
872 	sprintf(sinfo->stobs, "%d.%02d", y0, p0);
873 	sprintf(sinfo->endobs, "%d.%02d", y1, p1);
874     } else if (pd == 1) {
875 	sprintf(sinfo->stobs, "%d", y0);
876 	sprintf(sinfo->endobs, "%d", y1);
877     } else {
878 	fprintf(stderr, "frequency %d is not supported\n", pd);
879 	gretl_errmsg_sprintf(_("frequency %d is not supported"), pd);
880 	err = 1;
881     }
882 
883     if (!err) {
884 	sinfo->pd = pd;
885 	err = in7_get_obs(y0, p0, y1, p1, sinfo);
886     }
887 
888     if (!err) {
889 	strcpy(sinfo->varname, varname);
890 	if (comment != NULL && *comment != 0) {
891 	    series_info_set_description(sinfo, comment);
892 	}
893 	sinfo->pd = pd;
894 	sinfo->offset = offset;
895     }
896 
897     return err;
898 }
899 
read_rats_directory(FILE * fp,const char * series_name,SERIESINFO * sinfo)900 static RECNUM read_rats_directory (FILE *fp, const char *series_name,
901 				   SERIESINFO *sinfo)
902 {
903     RATSDirect rdir;
904     DATEINFO dinfo;
905     RECNUM ret;
906     int nread;
907     int i, err = 0;
908 
909     memset(rdir.series_name, 0, NAMELENGTH);
910 
911     if (fread(&rdir.back_point, sizeof(RECNUM), 1, fp) != 1) {
912 	err = 1;
913     } else if (fread(&rdir.forward_point, sizeof(RECNUM), 1, fp) != 1) {
914 	err = 1;
915     }
916     if (!err) {
917 	fseek(fp, 4L, SEEK_CUR); /* skip two shorts */
918 	if (fread(&rdir.first_data, sizeof(RECNUM), 1, fp) != 1) {
919 	    err = 1;
920 	} else if (fread(rdir.series_name, NAMELENGTH, 1, fp) != 1) {
921 	    err = 1;
922 	}
923     }
924 
925     if (!err) {
926 	rdir.series_name[NAMELENGTH-1] = '\0';
927 	gretl_strstrip(rdir.series_name);
928 #if DB_DEBUG
929 	fprintf(stderr, "read_rats_directory: name='%s'\n", rdir.series_name);
930 #endif
931 	if (!isprint(rdir.series_name[0])) {
932 	    err = 1;
933 	}
934     }
935 
936     if (err) {
937 	return RATS_PARSE_ERROR;
938     }
939 
940     if (series_name != NULL && strcmp(series_name, rdir.series_name)) {
941 	/* specific series not found yet: keep going */
942 	return rdir.forward_point;
943     }
944 
945     /* Now the dateinfo: we can't read this in one go either :-( */
946 
947     /* skip long, short, long, short */
948     fseek(fp, 12, SEEK_CUR);
949     nread = 0;
950     nread += fread(&dinfo.info, sizeof(gint32), 1, fp);
951     nread += fread(&dinfo.digits, sizeof(short), 1, fp);
952     nread += fread(&dinfo.year, sizeof(short), 1, fp);
953     nread += fread(&dinfo.month, sizeof(short), 1, fp);
954     nread += fread(&dinfo.day, sizeof(short), 1, fp);
955     nread += fread(&rdir.datapoints, sizeof(gint32), 1, fp);
956 
957     if (nread != 6) {
958 	return RATS_PARSE_ERROR;
959     }
960 
961     fseek(fp, sizeof(short) * 4L, SEEK_CUR);  /* skip 4 shorts */
962 
963 #if DB_DEBUG
964     fprintf(stderr, "info=%d, digits=%d, year=%d, mon=%d, day=%d\n",
965 	    (int) dinfo.info, (int) dinfo.digits, (int) dinfo.year,
966 	    (int) dinfo.month, (int) dinfo.day);
967     fprintf(stderr, "datapoints = %d\n", (int) rdir.datapoints);
968 #endif
969 
970     if (fread(&rdir.comment_lines, sizeof(short), 1, fp) != 1) {
971 	err = 1;
972     } else {
973 	fseek(fp, 1L, SEEK_CUR); /* skip one char */
974 	for (i=0; i<2 && !err; i++) {
975 	    if (i < rdir.comment_lines) {
976 		memset(rdir.comments[i], 0, 80);
977 		err = (fread(rdir.comments[i], 80, 1, fp) != 1);
978 		if (!err) {
979 		    rdir.comments[i][79] = '\0';
980 		    gretl_strstrip(rdir.comments[i]);
981 		}
982 	    } else {
983 		rdir.comments[i][0] = 0;
984 		fseek(fp, 80, SEEK_CUR);
985 	    }
986 	}
987     }
988 
989 #if DB_DEBUG
990     if (!err) {
991 	fprintf(stderr, "comment_lines = %d\n", (int) rdir.comment_lines);
992 	fprintf(stderr, "comment[0] = '%s'\n", rdir.comments[0]);
993 	fprintf(stderr, "comment[1] = '%s'\n", rdir.comments[1]);
994     }
995 #endif
996 
997     if (!err) {
998 	err = dinfo_to_sinfo(&dinfo, sinfo, rdir.series_name, rdir.comments[0],
999 			     rdir.datapoints, rdir.first_data);
1000     }
1001 
1002     ret = (err)? RATS_PARSE_ERROR : rdir.forward_point;
1003 
1004 #if DB_DEBUG
1005     fprintf(stderr, "read_rats_directory: err = %d, forward_point=%d, first_data=%d\n",
1006 	    err, (int) rdir.forward_point, (int) rdir.first_data);
1007     fprintf(stderr, "returning %d\n", (int) ret);
1008 #endif
1009 
1010     return ret;
1011 }
1012 
series_info_init(SERIESINFO * sinfo)1013 static void series_info_init (SERIESINFO *sinfo)
1014 {
1015     sinfo->t1 = sinfo->t2 = 0;
1016     sinfo->nobs = 0;
1017     sinfo->v = 1;
1018     sinfo->pd = 1;
1019     sinfo->offset = -1;
1020     sinfo->err = 0;
1021     sinfo->undated = 0;
1022 
1023     sinfo->varname[0] = '\0';
1024     sinfo->stobs[0] = '\0';
1025     sinfo->endobs[0] = '\0';
1026 
1027     sinfo->descrip = NULL;
1028     sinfo->data = NULL;
1029 }
1030 
series_info_set_description(SERIESINFO * sinfo,const char * s)1031 void series_info_set_description (SERIESINFO *sinfo,
1032 				  const char *s)
1033 {
1034     if (sinfo->descrip != NULL) {
1035 	free(sinfo->descrip);
1036 	sinfo->descrip = NULL;
1037     }
1038     if (s != NULL && *s != '\0') {
1039 	sinfo->descrip = gretl_strdup(s);
1040     }
1041 }
1042 
series_info_clear(SERIESINFO * sinfo)1043 static void series_info_clear (SERIESINFO *sinfo)
1044 {
1045     free(sinfo->descrip);
1046 }
1047 
1048 #define DB_INIT_ROWS 32
1049 
1050 /**
1051  * dbwrapper_destroy:
1052  * @dw: database series wrapper.
1053  *
1054  * Frees all resources associated with @dw as well as the pointer
1055  * itself.
1056  */
1057 
dbwrapper_destroy(dbwrapper * dw)1058 void dbwrapper_destroy (dbwrapper *dw)
1059 {
1060     if (dw != NULL) {
1061 	free(dw->fname);
1062 	free(dw->sinfo);
1063 	free(dw);
1064     }
1065 }
1066 
1067 /**
1068  * dbwrapper_new:
1069  * @n: initial number of series.
1070  * @fname: database filename.
1071  * @dbtype: database type code.
1072  *
1073  * Returns: an allocated database series wrapper.
1074  */
1075 
dbwrapper_new(int n,const char * fname,int dbtype)1076 dbwrapper *dbwrapper_new (int n, const char *fname, int dbtype)
1077 {
1078     dbwrapper *dw;
1079     int i;
1080 
1081     if (n == 0) {
1082 	n = DB_INIT_ROWS;
1083     }
1084 
1085     dw = malloc(sizeof *dw);
1086     if (dw == NULL) {
1087 	return NULL;
1088     }
1089 
1090     dw->fname = gretl_strdup(fname);
1091     dw->dbtype = dbtype;
1092 
1093     dw->sinfo = malloc(n * sizeof *dw->sinfo);
1094     if (dw->sinfo == NULL) {
1095 	free(dw);
1096 	return NULL;
1097     }
1098 
1099     for (i=0; i<n; i++) {
1100 	series_info_init(&dw->sinfo[i]);
1101     }
1102 
1103     dw->nv = 0;
1104     dw->nalloc = n;
1105 
1106     return dw;
1107 }
1108 
dbwrapper_expand(dbwrapper * dw)1109 static int dbwrapper_expand (dbwrapper *dw)
1110 {
1111     SERIESINFO *sinfo;
1112     int i, newsz;
1113 
1114     newsz = (dw->nv / DB_INIT_ROWS) + 1;
1115     newsz *= DB_INIT_ROWS;
1116 
1117     sinfo = realloc(dw->sinfo, newsz * sizeof *sinfo);
1118     if (sinfo == NULL) {
1119 	free(dw->sinfo);
1120 	dw->sinfo = NULL;
1121 	return 1;
1122     }
1123 
1124     dw->sinfo = sinfo;
1125 
1126     for (i=dw->nalloc; i<newsz; i++) {
1127 	series_info_init(&dw->sinfo[i]);
1128     }
1129 
1130     dw->nalloc = newsz;
1131 
1132     return 0;
1133 }
1134 
read_in7_series_info(FILE * fp,dbwrapper * dw)1135 static int read_in7_series_info (FILE *fp, dbwrapper *dw)
1136 {
1137     char line[1024];
1138     char sname[VNAMELEN];
1139     char desc[MAXLABEL];
1140     char fmt[24];
1141     int y0, p0, y1, p1;
1142     int pd, offset, pos;
1143     int i, nf;
1144     int err = 0;
1145 
1146     sprintf(fmt, "%%%ds %%d %%d %%d %%d %%d %%d", VNAMELEN - 1);
1147 
1148     i = 0;
1149     while (fgets(line, sizeof line, fp) && !err) {
1150 	if (*line == '>') {
1151 	    nf = sscanf(line + 1, fmt, sname, &y0, &p0, &y1,
1152 			&p1, &pd, &offset);
1153 	    if (nf == 7 && y0 > 0 && p0 > 0 && y1 > 0 && p1 > 0 &&
1154 		pd >= 1 && offset > 0) {
1155 		*desc = 0;
1156 		pos = ftell(fp);
1157 		while (fgets(line, sizeof line, fp)) {
1158 		    if (*line == ';') {
1159 			/* following series description */
1160 			int rem = MAXLABEL - strlen(desc) - 1;
1161 
1162 			if (rem > 0) {
1163 			    gretl_strstrip(line);
1164 			    strncat(desc, line + 1, rem);
1165 			}
1166 			pos = ftell(fp);
1167 		    } else {
1168 			/* not a description: throw the line back */
1169 			fseek(fp, pos, SEEK_SET);
1170 			break;
1171 		    }
1172 		}
1173 		/* record info */
1174 		err = in7_to_sinfo(sname, desc, y0, p0, y1, p1,
1175 				   pd, offset, &dw->sinfo[i++]);
1176 		if (!err) {
1177 		    dw->nv += 1;
1178 		}
1179 	    }
1180 	}
1181     }
1182 
1183     return err;
1184 }
1185 
count_in7_series(FILE * fp,int * err)1186 static int count_in7_series (FILE *fp, int *err)
1187 {
1188     char line[1024];
1189     char sname[VNAMELEN];
1190     char fmt[24];
1191     int y0, p0, y1, p1;
1192     int pd, offset;
1193     int nf, i = 0, nseries = 0;
1194 
1195     sprintf(fmt, "%%%ds %%d %%d %%d %%d %%d %%d", VNAMELEN - 1);
1196 
1197     while (fgets(line, sizeof line, fp)) {
1198 	if (i == 0 && strncmp(line, "pcgive 700", 10)) {
1199 	    *err = 1;
1200 	    gretl_errmsg_set("This is not a PcGive 700 data file");
1201 	    return 0;
1202 	}
1203 	if (*line == '>') {
1204 	    nf = sscanf(line + 1, fmt, sname, &y0, &p0, &y1,
1205 			&p1, &pd, &offset);
1206 	    if (nf < 7 || y0 < 0 || p0 < 0 || y1 < 0 || p1 < 0 ||
1207 		pd < 1 || offset < 0) {
1208 		fprintf(stderr, "Error reading series info\n");
1209 	    } else {
1210 		nseries++;
1211 	    }
1212 	}
1213 	i++;
1214     }
1215 
1216     return nseries;
1217 }
1218 
1219 /**
1220  * read_pcgive_db:
1221  * @fname: name of database file.
1222  * @fp: pre-opened stream (caller to close it)
1223  *
1224  * Read the series info from a PcGive database, .in7 file
1225  *
1226  * Returns: pointer to a #dbwrapper containing the series info,
1227  * or NULL in case of failure.
1228  */
1229 
read_pcgive_db(const char * fname,FILE * fp)1230 dbwrapper *read_pcgive_db (const char *fname, FILE *fp)
1231 {
1232     dbwrapper *dw;
1233     int ns, err = 0;
1234 
1235     gretl_error_clear();
1236 
1237     ns = count_in7_series(fp, &err);
1238     if (ns == 0) {
1239 	if (!err) {
1240 	    gretl_errmsg_set(_("No valid series found"));
1241 	}
1242 	return NULL;
1243     }
1244 
1245 #if DB_DEBUG
1246     fprintf(stderr, "in7: found %d series\n", ns);
1247 #endif
1248 
1249     /* allocate table for series rows */
1250     dw = dbwrapper_new(ns, fname, GRETL_PCGIVE_DB);
1251     if (dw == NULL) {
1252 	gretl_errmsg_set(_("Out of memory!"));
1253 	return NULL;
1254     }
1255 
1256     rewind(fp);
1257 
1258     /* Go find the series info */
1259     err = read_in7_series_info(fp, dw);
1260 
1261     if (err) {
1262 	dbwrapper_destroy(dw);
1263 	dw = NULL;
1264     }
1265 
1266     return dw;
1267 }
1268 
1269 /**
1270  * read_rats_db:
1271  * @fname: database filename.
1272  * @fp: pre-opened stream (caller to close it)
1273  *
1274  * Read the series info from a RATS 4.0 database: read the base
1275  * block at offset 0 in the data file, and recurse through the
1276  * directory entries.
1277  *
1278  * Returns: pointer to a #dbwrapper containing the series info,
1279  * or NULL in case of failure.
1280  */
1281 
read_rats_db(const char * fname,FILE * fp)1282 dbwrapper *read_rats_db (const char *fname, FILE *fp)
1283 {
1284     dbwrapper *dw;
1285     long forward = 0;
1286     int i, err = 0;
1287 
1288     gretl_error_clear();
1289 
1290     /* get into position */
1291     fseek(fp, 30L, SEEK_SET); /* skip unneeded fields */
1292     if (fread(&forward, sizeof forward, 1, fp) == 1) {
1293 	fseek(fp, 4L, SEEK_CUR);
1294     }
1295 
1296     /* basic check */
1297     if (forward <= 0) {
1298 	gretl_errmsg_set(_("This is not a valid RATS 4.0 database"));
1299 	fprintf(stderr, "rats database: got forward = %ld\n", forward);
1300 	return NULL;
1301     }
1302 
1303     /* allocate table for series rows */
1304     dw = dbwrapper_new(0, fname, GRETL_RATS_DB);
1305     if (dw == NULL) {
1306 	gretl_errmsg_set(_("Out of memory!"));
1307 	return NULL;
1308     }
1309 
1310     /* Go find the series */
1311     i = 0;
1312     while (forward && !err) {
1313 	dw->nv += 1;
1314 #if DB_DEBUG
1315 	fprintf(stderr, "read_rats_db: forward = %d, nv = %d\n",
1316 		(int) forward, dw->nv);
1317 #endif
1318 	if (dw->nv > 0 && dw->nv % DB_INIT_ROWS == 0) {
1319 	    err = dbwrapper_expand(dw);
1320 	    if (err) {
1321 		gretl_errmsg_set(_("Out of memory!"));
1322 	    }
1323 	}
1324 	if (!err) {
1325 	    err = fseek(fp, (forward - 1) * 256L, SEEK_SET);
1326 	    if (!err) {
1327 		forward = read_rats_directory(fp, NULL, &dw->sinfo[i++]);
1328 		if (forward == RATS_PARSE_ERROR) {
1329 		    err = 1;
1330 		}
1331 	    }
1332 	}
1333 #if DB_DEBUG
1334 	fprintf(stderr, "bottom of loop, err = %d\n", err);
1335 #endif
1336     }
1337 
1338 #if DB_DEBUG
1339     fprintf(stderr, "read_rats_db: err = %d, dw = %p\n",
1340 	    err, (void *) dw);
1341 #endif
1342 
1343     if (err) {
1344 	dbwrapper_destroy(dw);
1345 	return NULL;
1346     }
1347 
1348     return dw;
1349 }
1350 
1351 /* retrieve the actual data values from the data blocks */
1352 
get_rats_series(int offset,SERIESINFO * sinfo,FILE * fp,double ** Z)1353 static int get_rats_series (int offset, SERIESINFO *sinfo, FILE *fp,
1354 			    double **Z)
1355 {
1356     RATSData rdata;
1357     double x;
1358     int v = sinfo->v;
1359     int i, t, T;
1360     int miss = 0;
1361     int err = 0;
1362 
1363     fprintf(stderr, "get_rats_series: starting from offset %d\n", offset);
1364 
1365     if (sinfo->t2 > 0) {
1366 	T = sinfo->t2 + 1;
1367     } else {
1368 	T = sinfo->nobs;
1369     }
1370 
1371     rdata.forward_point = offset;
1372     t = sinfo->t1;
1373 
1374     while (rdata.forward_point) {
1375 	fseek(fp, (rdata.forward_point - 1) * 256L, SEEK_SET);
1376 	/* the RATSData struct is actually 256 bytes.  Yay! */
1377 	if (fread(&rdata, sizeof rdata, 1, fp) != 1) {
1378 	    err = E_DATA;
1379 	    break;
1380 	}
1381 	for (i=0; i<31 && t<T; i++) {
1382 	    x = rdata.data[i];
1383 #if G_BYTE_ORDER == G_BIG_ENDIAN
1384 	    reverse_double(x);
1385 #endif
1386 	    if (isnan(x)) {
1387 		x = NADBL;
1388 		miss = 1;
1389 	    }
1390 	    Z[v][t++] = x;
1391 	}
1392     }
1393 
1394     if (miss && !err) {
1395 	err = DB_MISSING_DATA;
1396     }
1397 
1398     return err;
1399 }
1400 
1401 /**
1402  * get_rats_db_data:
1403  * @fname: name of RATS 4.0 database to read from
1404  * @sinfo: holds info on the given series (input)
1405  * @Z: data matrix
1406  *
1407  * Read the actual data values for a series from a RATS database.
1408  *
1409  * Returns: 0 on successful completion, E_FOPEN if
1410  * the data could not be read, and DB_MISSING_DATA if the
1411  * data were found but there were some missing values.
1412  */
1413 
get_rats_db_data(const char * fname,SERIESINFO * sinfo,double ** Z)1414 int get_rats_db_data (const char *fname, SERIESINFO *sinfo,
1415 		      double **Z)
1416 {
1417     FILE *fp;
1418     int err = 0;
1419 
1420     fp = gretl_fopen(fname, "rb");
1421     if (fp == NULL) {
1422 	err = E_FOPEN;
1423     } else {
1424 	err = get_rats_series(sinfo->offset, sinfo, fp, Z);
1425 	fclose(fp);
1426     }
1427 
1428     return err;
1429 }
1430 
get_rats_series_info(const char * series_name,SERIESINFO * sinfo)1431 static int get_rats_series_info (const char *series_name, SERIESINFO *sinfo)
1432 {
1433     FILE *fp;
1434     long forward = 0;
1435     int err = 0;
1436 
1437     gretl_error_clear();
1438 
1439     fp = gretl_fopen(saved_db_name, "rb");
1440     if (fp == NULL) {
1441 	return E_FOPEN;
1442     }
1443 
1444 #if DB_DEBUG
1445     fprintf(stderr, "Opened %s\n", saved_db_name);
1446 #endif
1447 
1448     /* get into position */
1449     fseek(fp, 30L, SEEK_SET);
1450     if (fread(&forward, sizeof forward, 1, fp) == 1) {
1451 	fseek(fp, 4L, SEEK_CUR);
1452     }
1453 
1454     /* basic check */
1455     if (forward <= 0) {
1456 	gretl_errmsg_set(_("This is not a valid RATS 4.0 database"));
1457 	fprintf(stderr, "rats database: got forward = %ld\n", forward);
1458 	return DB_PARSE_ERROR;
1459     }
1460 
1461     sinfo->offset = 0;
1462 
1463     /* Go find the series */
1464     while (forward) {
1465 	fseek(fp, (forward - 1) * 256L, SEEK_SET);
1466 	forward = read_rats_directory(fp, series_name, sinfo);
1467 	if (forward == RATS_PARSE_ERROR) {
1468 	    sinfo->offset = -1;
1469 	}
1470 	if (sinfo->offset != 0) {
1471 	    break;
1472 	}
1473     }
1474 
1475     fclose(fp);
1476 
1477     if (sinfo->offset < 0) {
1478 	err = DB_NO_SUCH_SERIES;
1479     }
1480 
1481 #if DB_DEBUG
1482     fprintf(stderr, "get_rats_series_info: offset = %d\n", sinfo->offset);
1483     fprintf(stderr, " pd = %d, nobs = %d\n", sinfo->pd, sinfo->nobs);
1484 #endif
1485 
1486     return err;
1487 }
1488 
1489 /* For importation of database series */
1490 
get_compacted_xt(const double * src,int n,CompactMethod method,int compfac,int skip)1491 static double *get_compacted_xt (const double *src, int n,
1492 				 CompactMethod method,
1493 				 int compfac, int skip)
1494 {
1495     int p, t;
1496     double *x;
1497 
1498     x = malloc(n * sizeof *x);
1499     if (x == NULL) {
1500 	return NULL;
1501     }
1502 
1503     for (t=0; t<n; t++) {
1504 	p = (t + 1) * compfac;
1505 	x[t] = 0.0;
1506 	if (method == COMPACT_AVG || method == COMPACT_SUM) {
1507 	    int i, st;
1508 
1509 	    for (i=1; i<=compfac; i++) {
1510 		st = p - i + skip;
1511 		if (na(src[st])) {
1512 		    x[t] = NADBL;
1513 		    break;
1514 		} else {
1515 		    x[t] += src[st];
1516 		}
1517 	    }
1518 	    if (method == COMPACT_AVG) {
1519 		x[t] /= (double) compfac;
1520 	    }
1521 	} else if (method == COMPACT_EOP) {
1522 	    x[t] = src[p - 1 + skip];
1523 	} else if (method == COMPACT_SOP) {
1524 	    x[t] = src[p - compfac + skip];
1525 	}
1526     }
1527 
1528     return x;
1529 }
1530 
1531 /* Compact a single series from a database, for importation
1532    into a working dataset of lower frequency.  At present
1533    this is permitted only for the cases:
1534 
1535      quarterly -> annual
1536      monthly   -> quarterly
1537      monthly   -> annual
1538 */
1539 
compact_db_series(const double * src,int pd,int * pnobs,char * stobs,int target_pd,CompactMethod method)1540 static double *compact_db_series (const double *src,
1541 				  int pd, int *pnobs,
1542 				  char *stobs,
1543 				  int target_pd,
1544 				  CompactMethod method)
1545 {
1546     int p0, y0, endskip, goodobs;
1547     int skip = 0, compfac = pd / target_pd;
1548     double *x;
1549 
1550     if (target_pd == 1) {
1551 	/* figure the annual dates */
1552 	y0 = atoi(stobs);
1553 	p0 = atoi(stobs + 5);
1554 	if (p0 != 1) {
1555 	    ++y0;
1556 	    skip = compfac - (p0 + 1);
1557 	}
1558 	sprintf(stobs, "%d", y0);
1559     } else if (target_pd == 4) {
1560 	/* figure the quarterly dates */
1561 	float q;
1562 	int q0;
1563 
1564 	y0 = atoi(stobs);
1565 	p0 = atoi(stobs + 5);
1566 	q = 1.0 + p0 / 3.;
1567 	q0 = q + .5;
1568 	skip = ((q0 - 1) * 3) + 1 - p0;
1569 	if (q0 == 5) {
1570 	    y0++;
1571 	    q0 = 1;
1572 	}
1573 	sprintf(stobs, "%d.%d", y0, q0);
1574     } else {
1575 	return NULL;
1576     }
1577 
1578     endskip = (*pnobs - skip) % compfac;
1579     goodobs = (*pnobs - skip - endskip) / compfac;
1580     *pnobs = goodobs;
1581 
1582 #if DB_DEBUG
1583     fprintf(stderr, "startskip = %d\n", skip);
1584     fprintf(stderr, "endskip = %d\n", endskip);
1585     fprintf(stderr, "goodobs = %d\n", goodobs);
1586     fprintf(stderr, "compfac = %d\n", compfac);
1587     fprintf(stderr, "starting date = %s\n", stobs);
1588 #endif
1589 
1590     x = get_compacted_xt(src, goodobs, method, compfac, skip);
1591 
1592     return x;
1593 }
1594 
1595 #define EXPAND_DEBUG 0
1596 
1597 /* Expand a single series from a database, for importation
1598    into a working dataset of higher frequency.  At present
1599    this is permitted only for the cases:
1600 
1601    1) annual    -> quarterly
1602    2) annual    -> monthly
1603    3) quarterly -> monthly
1604 */
1605 
expand_db_series(const double * src,int pd,int * pnobs,char * stobs,DATASET * dset)1606 static double *expand_db_series (const double *src,
1607 				 int pd, int *pnobs,
1608 				 char *stobs,
1609 				 DATASET *dset)
1610 {
1611     char new_stobs[OBSLEN] = {0};
1612     int target_pd = dset->pd;
1613     int oldn = *pnobs;
1614     int mult, newn;
1615     double *x = NULL;
1616     int j, t;
1617     int err = 0;
1618 
1619     mult = target_pd / pd;
1620     newn = mult * oldn;
1621 
1622     x = malloc(newn * sizeof *x);
1623     if (x == NULL) {
1624 	err = E_ALLOC;
1625     } else {
1626 	int s = 0;
1627 
1628 	for (t=0; t<oldn; t++) {
1629 	    for (j=0; j<mult; j++) {
1630 		x[s++] = src[t];
1631 	    }
1632 	}
1633     }
1634 
1635 #if EXPAND_DEBUG
1636     fprintf(stderr, "expand_db_series 1: mult=%d, newn=%d, stobs='%s'\n",
1637 	    mult, newn, stobs);
1638 #endif
1639 
1640     if (err) {
1641 	return NULL;
1642     }
1643 
1644     if (pd == 1) {
1645 	strcpy(new_stobs, stobs);
1646 	if (target_pd == 4) {
1647 	    strcat(new_stobs, ":1");
1648 	} else {
1649 	    strcat(new_stobs, ":01");
1650 	}
1651     } else {
1652 	int yr, qtr, mo;
1653 
1654 	if (strchr(stobs, '.')) {
1655 	    sscanf(stobs, "%d.%d", &yr, &qtr);
1656 	} else {
1657 	    sscanf(stobs, "%d:%d", &yr, &qtr);
1658 	}
1659 	mo = (qtr - 1) * 3 + 1;
1660 	sprintf(new_stobs, "%d:%02d", yr, mo);
1661     }
1662 
1663     /* revise incoming values */
1664     strcpy(stobs, new_stobs);
1665     *pnobs = newn;
1666 
1667 #if EXPAND_DEBUG
1668     fprintf(stderr, "expand_db_series 2: pd=%d, stobs='%s'\n",
1669 	    pd, stobs);
1670 #endif
1671 
1672     return x;
1673 }
1674 
set_db_name(const char * fname,int filetype,PRN * prn)1675 int set_db_name (const char *fname, int filetype, PRN *prn)
1676 {
1677     FILE *fp;
1678     int err = 0;
1679 
1680     *saved_db_name = '\0';
1681     if (fname != NULL) {
1682 	strncat(saved_db_name, fname, MAXLEN - 1);
1683     }
1684 
1685     if (filetype == GRETL_DBNOMICS || filetype == 0) {
1686 	saved_db_type = filetype;
1687 	return 0;
1688     }
1689 
1690     if (filetype == GRETL_NATIVE_DB_WWW) {
1691 #ifdef USE_CURL
1692 	int n = strlen(saved_db_name);
1693 
1694 	if (n > 4) {
1695 	    n -= 4;
1696 	    if (!strcmp(saved_db_name + n, ".bin")) {
1697 		saved_db_name[n] = '\0';
1698 	    }
1699 	}
1700 	err = check_remote_db(saved_db_name);
1701 	if (!err) {
1702 	    saved_db_type = filetype;
1703 	    pprintf(prn, "%s\n", saved_db_name);
1704 	}
1705 #else
1706 	pprintf(prn, _("Internet access not supported"));
1707 	pputc(prn, '\n');
1708 	err = E_DATA;
1709 #endif
1710 	return err;
1711     }
1712 
1713     fp = gretl_fopen(saved_db_name, "rb");
1714 
1715     if (fp == NULL && !g_path_is_absolute(saved_db_name) &&
1716 	filetype == GRETL_NATIVE_DB) {
1717 	/* try looking a bit more */
1718 	const char *path = gretl_binbase();
1719 
1720 	if (path != NULL && *path != '\0') {
1721 	    gretl_build_path(saved_db_name, path, fname, NULL);
1722 	    fp = gretl_fopen(saved_db_name, "rb");
1723 	}
1724 
1725 #ifdef OS_OSX
1726 	if (fp == NULL) {
1727 	    gchar *tmp = g_build_filename(gretl_app_support_dir(), "db",
1728 					  fname, NULL);
1729 
1730 	    fp = gretl_fopen(tmp, "rb");
1731 	    if (fp != NULL) {
1732 		strcpy(saved_db_name, tmp);
1733 	    }
1734 	    g_free(tmp);
1735 	}
1736 #endif
1737     }
1738 
1739     if (fp == NULL) {
1740 	*saved_db_name = '\0';
1741 	pprintf(prn, _("Couldn't open %s\n"), fname);
1742 	err = E_FOPEN;
1743     } else {
1744 	fclose(fp);
1745 	saved_db_type = filetype;
1746 	pprintf(prn, "%s\n", saved_db_name);
1747     }
1748 
1749     return err;
1750 }
1751 
get_db_name(void)1752 const char *get_db_name (void)
1753 {
1754     return saved_db_name;
1755 }
1756 
1757 /* Handling of DSN setup for ODBC: grab the dsn, username
1758    and password strings.
1759 */
1760 
get_dsn_field(const char * tag,const char * src)1761 static char *get_dsn_field (const char *tag, const char *src)
1762 {
1763     const char *p;
1764     char needle[12];
1765     char *ret = NULL;
1766 
1767     sprintf(needle, "%s=", tag);
1768     p = strstr(src, needle);
1769 
1770     if (p != NULL) {
1771 	p += strlen(needle);
1772 	if (*p == '"' || *p == '\'') {
1773 	    ret = gretl_quoted_string_strdup(p, NULL);
1774 	} else {
1775 	    ret = gretl_strndup(p, strcspn(p, " "));
1776 	}
1777     }
1778 
1779     return ret;
1780 }
1781 
1782 static ODBC_info gretl_odinfo;
1783 
ODBC_info_clear_read(void)1784 static void ODBC_info_clear_read (void)
1785 {
1786     int i;
1787 
1788     free(gretl_odinfo.query);
1789     gretl_odinfo.query = NULL;
1790 
1791     doubles_array_free(gretl_odinfo.X, gretl_odinfo.nvars);
1792     gretl_odinfo.X = NULL;
1793 
1794     strings_array_free(gretl_odinfo.S, gretl_odinfo.nrows);
1795     gretl_odinfo.S = NULL;
1796 
1797     gretl_string_table_destroy(gretl_odinfo.gst);
1798     gretl_odinfo.gst = NULL;
1799 
1800     for (i=0; i<ODBC_OBSCOLS; i++) {
1801 	gretl_odinfo.coltypes[i] = 0;
1802     }
1803 
1804     if (gretl_odinfo.fmts != NULL) {
1805 	strings_array_free(gretl_odinfo.fmts, gretl_odinfo.obscols);
1806 	gretl_odinfo.fmts = NULL;
1807     }
1808 
1809     gretl_odinfo.nrows = 0;
1810     gretl_odinfo.obscols = 0;
1811     gretl_odinfo.nvars = 0;
1812 }
1813 
gretl_odbc_cleanup(void)1814 static void gretl_odbc_cleanup (void)
1815 {
1816     free(gretl_odinfo.dsn);
1817     gretl_odinfo.dsn = NULL;
1818 
1819     free(gretl_odinfo.username);
1820     gretl_odinfo.username = NULL;
1821 
1822     free(gretl_odinfo.password);
1823     gretl_odinfo.password = NULL;
1824 
1825     ODBC_info_clear_read();
1826 }
1827 
set_odbc_dsn(const char * line,PRN * prn)1828 int set_odbc_dsn (const char *line, PRN *prn)
1829 {
1830     int (*check_dsn) (ODBC_info *);
1831     char *dbname = NULL;
1832     char *uname = NULL;
1833     char *pword = NULL;
1834     int got_plugin = 0;
1835     int err = 0;
1836 
1837     gretl_odbc_cleanup();
1838 
1839     dbname = get_dsn_field("dsn", line);
1840     if (dbname == NULL) {
1841 	pputs(prn, "You must specify a DSN using 'dsn=...'\n");
1842 	return E_DATA;
1843     }
1844 
1845     uname = get_dsn_field("user", line);
1846     pword = get_dsn_field("password", line);
1847 
1848     gretl_odinfo.dsn = dbname;
1849     gretl_odinfo.username = uname;
1850     gretl_odinfo.password = pword;
1851 
1852     gretl_error_clear();
1853 
1854     check_dsn = get_plugin_function("gretl_odbc_check_dsn");
1855 
1856     if (check_dsn == NULL) {
1857         err = 1;
1858     } else {
1859 	got_plugin = 1;
1860         err = (*check_dsn) (&gretl_odinfo);
1861     }
1862 
1863     if (err) {
1864 	if (!got_plugin) {
1865 	    pprintf(prn, "Couldn't open the gretl ODBC plugin\n");
1866 	} else {
1867 	    pprintf(prn, "Failed to connect to ODBC data source '%s'\n",
1868 		    gretl_odinfo.dsn);
1869 	}
1870 	gretl_odbc_cleanup();
1871     } else if (gretl_messages_on()) {
1872 	pprintf(prn, "Connected to ODBC data source '%s'\n",
1873 		gretl_odinfo.dsn);
1874     }
1875 
1876     return err;
1877 }
1878 
db_set_sample(const char * start,const char * stop,DATASET * dset)1879 int db_set_sample (const char *start, const char *stop, DATASET *dset)
1880 {
1881     int t1 = 0, t2 = 0;
1882 
1883     if (strcmp(start, ";")) {
1884 	t1 = dateton(start, dset);
1885 	if (t1 < 0) {
1886 	    return 1;
1887 	}
1888     }
1889 
1890     t2 = dateton(stop, dset);
1891     if (t2 < 0) {
1892 	return 1;
1893     }
1894 
1895     if (t1 > t2) {
1896 	gretl_errmsg_set(_("Invalid null sample"));
1897 	return 1;
1898     }
1899 
1900     dset->t1 = t1;
1901     dset->t2 = t2;
1902     dset->n = t2 - t1 + 1;
1903     strcpy(dset->endobs, stop);
1904 
1905 #if DB_DEBUG
1906     fprintf(stderr, "db_set_sample: t1=%d, t2=%d, stobs='%s', endobs='%s' "
1907 	    "sd0 = %g, n = %d\n",
1908 	    dset->t1, dset->t2,
1909 	    dset->stobs, dset->endobs,
1910 	    dset->sd0, dset->n);
1911 #endif
1912 
1913     return 0;
1914 }
1915 
1916 static const char *
get_word_and_advance(const char * s,char * word,size_t maxlen)1917 get_word_and_advance (const char *s, char *word, size_t maxlen)
1918 {
1919     size_t i = 0;
1920 
1921     while (isspace(*s)) s++;
1922 
1923     *word = '\0';
1924 
1925     while (*s && !isspace(*s)) {
1926 	if (i < maxlen) word[i++] = *s;
1927 	s++;
1928     }
1929 
1930     word[i] = '\0';
1931 
1932     return (*word != '\0')? s : NULL;
1933 }
1934 
1935 static const char *
get_compact_method_and_advance(const char * s,CompactMethod * method)1936 get_compact_method_and_advance (const char *s, CompactMethod *method)
1937 {
1938     const char *p;
1939 
1940     *method = COMPACT_NONE;
1941 
1942     if ((p = strstr(s, "(compact")) != NULL) {
1943 	char comp[8];
1944 	int i = 0;
1945 
1946 	p += 8;
1947 	while (*p && *p != ')' && i < 7) {
1948 	    if (!isspace(*p) && *p != '=') {
1949 		comp[i++] = *p;
1950 	    }
1951 	    p++;
1952 	}
1953 	comp[i] = '\0';
1954 
1955 	if (!strcmp(comp, "average")) {
1956 	    *method = COMPACT_AVG;
1957 	} else if (!strcmp(comp, "sum")) {
1958 	    *method = COMPACT_SUM;
1959 	} else if (!strcmp(comp, "first")) {
1960 	    *method = COMPACT_SOP;
1961 	} else if (!strcmp(comp, "last")) {
1962 	    *method = COMPACT_EOP;
1963 	} else if (!strcmp(comp, "spread")) {
1964 	    *method = COMPACT_SPREAD;
1965 	}
1966 
1967 	p = strchr(p, ')');
1968 	if (p != NULL) p++;
1969     } else if ((p = strstr(s, "data ")) != NULL) {
1970 	p += 5;
1971     } else {
1972 	p = s;
1973     }
1974 
1975     return p;
1976 }
1977 
compact_method_from_option(int * err)1978 static CompactMethod compact_method_from_option (int *err)
1979 {
1980     const char *s = get_optval_string(DATA, OPT_C);
1981     CompactMethod method = COMPACT_NONE;
1982 
1983     if (s == NULL || *s == '\0') {
1984 	*err = E_PARSE;
1985     } else if (!strcmp(s, "average")) {
1986 	method = COMPACT_AVG;
1987     } else if (!strcmp(s, "sum")) {
1988 	method = COMPACT_SUM;
1989     } else if (!strcmp(s, "first")) {
1990 	method = COMPACT_SOP;
1991     } else if (!strcmp(s, "last")) {
1992 	method = COMPACT_EOP;
1993     } else if (!strcmp(s, "spread")) {
1994 	method = COMPACT_SPREAD;
1995     } else {
1996 	gretl_errmsg_sprintf(_("field '%s' in command is invalid"), s);
1997 	*err = E_PARSE;
1998     }
1999 
2000     return method;
2001 }
2002 
2003 /* 2-D array of doubles, allocated space in second
2004    position (as in a DATASET) */
2005 
new_dbZ(int n)2006 static double **new_dbZ (int n)
2007 {
2008     double **Z;
2009     int t;
2010 
2011     Z = malloc(2 * sizeof *Z);
2012     if (Z == NULL) return NULL;
2013 
2014     Z[0] = NULL;
2015     Z[1] = malloc(n * sizeof **Z);
2016 
2017     if (Z[1] == NULL) {
2018 	free(Z);
2019 	return NULL;
2020     }
2021 
2022     for (t=0; t<n; t++) {
2023 	Z[1][t] = NADBL;
2024     }
2025 
2026     return Z;
2027 }
2028 
free_dbZ(double ** dbZ)2029 static void free_dbZ (double **dbZ)
2030 {
2031     if (dbZ != NULL) {
2032 	free(dbZ[1]);
2033 	free(dbZ);
2034     }
2035 }
2036 
parse_odbc_format_chunk(char ** ps,int i)2037 static int parse_odbc_format_chunk (char **ps, int i)
2038 {
2039     const char *numchars = "0123456789";
2040     char *chunk = NULL;
2041     char *p = *ps;
2042     int n, err = 0;
2043 
2044     /* advance to '%' */
2045     while (*p && *p != '%') p++;
2046     if (*p == '\0') {
2047 	return E_PARSE;
2048     }
2049 
2050     p++; /* move past '%' */
2051 
2052     /* zero padding? */
2053     if (*p == '0') {
2054 	p++;
2055     }
2056 
2057     /* optional width? */
2058     n = strspn(p, numchars);
2059     if (n == 1) {
2060 	p++;
2061     } else if (n > 0) {
2062 	return E_PARSE;
2063     }
2064 
2065     /* optional dot plus precision? */
2066     if (*p == '.') {
2067 	p++;
2068 	n = strspn(p, numchars);
2069 	if (n == 1) {
2070 	    p++;
2071 	} else {
2072 	    return E_PARSE;
2073 	}
2074     }
2075 
2076     /* now we should have a conversion character */
2077     if (*p == 'd') {
2078 	gretl_odinfo.coltypes[i] = GRETL_TYPE_INT;
2079     } else if (*p == 's') {
2080 	gretl_odinfo.coltypes[i] = GRETL_TYPE_STRING;
2081     } else if (*p == 'f' || *p == 'g') {
2082 	gretl_odinfo.coltypes[i] = GRETL_TYPE_DOUBLE;
2083     } else if (*p == 'D') {
2084 	*p = 's';
2085 	gretl_odinfo.coltypes[i] = GRETL_TYPE_DATE;
2086     } else {
2087 	return E_PARSE;
2088     }
2089 
2090     /* append any trailing fixed chars */
2091     p++;
2092     while (*p && *p != '%') p++;
2093     n = p - *ps;
2094 
2095     chunk = gretl_strndup(*ps, n);
2096     if (chunk == NULL) {
2097 	err = E_ALLOC;
2098     } else {
2099 	err = strings_array_add(&gretl_odinfo.fmts,
2100 				&gretl_odinfo.obscols,
2101 				chunk);
2102 	free(chunk);
2103     }
2104 
2105     *ps = p;
2106 
2107 #if 1
2108     fprintf(stderr, "set obs coltype[%d] = %d (%s), fmt='%s'\n", i,
2109 	    gretl_odinfo.coltypes[i],
2110 	    gretl_type_get_name(gretl_odinfo.coltypes[i]),
2111 	    gretl_odinfo.fmts[i]);
2112 #endif
2113 
2114     return err;
2115 }
2116 
parse_odbc_format(char * fmt)2117 static int parse_odbc_format (char *fmt)
2118 {
2119     char *s = fmt;
2120     int i, err = 0;
2121 
2122     for (i=0; i<ODBC_OBSCOLS && !err && *s; i++) {
2123 	err = parse_odbc_format_chunk(&s, i);
2124     }
2125 
2126     if (!err && *s != '\0') {
2127 	err = E_PARSE;
2128     }
2129 
2130     free(fmt);
2131 
2132     return err;
2133 }
2134 
odbc_get_query(const char * s,int * err)2135 static char *odbc_get_query (const char *s, int *err)
2136 {
2137     char *query = NULL;
2138     const char *p;
2139 
2140     if (*s == '"') {
2141 	query = gretl_quoted_string_strdup(s, NULL);
2142     } else {
2143 	p = get_string_by_name(s);
2144 	if (p != NULL) {
2145 	    query = gretl_strdup(p);
2146 	} else {
2147 	    query = gretl_strdup(s);
2148 	}
2149     }
2150 
2151     if (query == NULL) {
2152 	*err = E_ALLOC;
2153     } else if (*query == '\0') {
2154 	gretl_errmsg_set(_("Expected an SQL query string"));
2155 	*err = E_PARSE;
2156     }
2157 
2158     return query;
2159 }
2160 
2161 /* Grab the series name(s) out of an ODBC "data" command.  If the SQL
2162    query is marked by "query=" (which was not required in the original
2163    gretl ODBC setup) we're able to get multiple series names,
2164    otherwise we're restricted to one.
2165 */
2166 
odbc_get_varnames(const char ** line,int * err)2167 static char **odbc_get_varnames (const char **line, int *err)
2168 {
2169     char **vnames = NULL;
2170     char vname[VNAMELEN];
2171     const char *s = *line;
2172     int len, loop_ok = 0, nv = 0;
2173 
2174     if (strstr(s, "query=")) {
2175 	/* we know where the SQL query starts */
2176 	loop_ok = 1;
2177     }
2178 
2179     while (!*err) {
2180 	*vname = '\0';
2181 	*err = extract_varname(vname, s, &len);
2182 
2183 	if (!*err && len == 0) {
2184 	    gretl_errmsg_set(_("Expected a valid variable name"));
2185 	    *err = E_PARSE;
2186 	}
2187 
2188 	if (!*err) {
2189 	    *err = check_varname(vname);
2190 	}
2191 
2192 	if (!*err) {
2193 	    *err = strings_array_add(&vnames, &nv, vname);
2194 	}
2195 
2196 	if (!*err) {
2197 	    s += len;
2198 	    s += strspn(s, " ");
2199 	}
2200 
2201 	if (!loop_ok || *s == '\0' || !strncmp(s, "obs-", 4) ||
2202 	    !strncmp(s, "query=", 6)) {
2203 	    /* got to the end of the varnames section */
2204 	    break;
2205 	}
2206     }
2207 
2208     if (*err) {
2209 	strings_array_free(vnames, nv);
2210 	vnames = NULL;
2211     } else {
2212 	gretl_odinfo.nvars = nv;
2213     }
2214 
2215     *line = s;
2216 
2217     return vnames;
2218 }
2219 
s_tab_get(int i,int t,series_table * stl,series_table * str)2220 static double s_tab_get (int i, int t, series_table *stl, series_table *str)
2221 {
2222     const double *x = gretl_odinfo.X[i];
2223     const char *sr;
2224     double ret = NADBL;
2225 
2226     /* get the string value for the imported obs */
2227     sr = series_table_get_string(str, x[t]);
2228     /* look up its index "on the left" */
2229     ret = series_table_get_value(stl, sr);
2230     if (na(ret)) {
2231 	/* not found: so try adding it to the LHS table */
2232 	series_table_add_string(stl, sr);
2233 	ret = series_table_get_value(stl, sr);
2234     }
2235 
2236     return ret;
2237 }
2238 
m2q(int m)2239 static int m2q (int m)
2240 {
2241     if (m == 1) return 1;
2242     else if (m == 4) return 2;
2243     else if (m == 7) return 3;
2244     else if (m == 10) return 4;
2245     else return -1;
2246 }
2247 
try_iso_8601(const char * s,DATASET * dset)2248 static int try_iso_8601 (const char *s, DATASET *dset)
2249 {
2250     int t = -1;
2251 
2252     if (dataset_is_time_series(dset)) {
2253 	char obsstr[OBSLEN] = {0};
2254 	int y, m, d;
2255 
2256 	if (sscanf(s, "%d-%d-%d", &y, &m, &d) == 3) {
2257 	    if (dset->pd == 4 && d == 1) {
2258 		sprintf(obsstr, "%04d:%d", y, m2q(m));
2259 	    } else if (dset->pd == 12 && d == 1) {
2260 		sprintf(obsstr, "%04d:%02d", y, m);
2261 	    } else if (dset->pd == 1 && m == 1 && d == 1) {
2262 		sprintf(obsstr, "%04d", y);
2263 	    }
2264 	    t = dateton(obsstr, dset);
2265 	}
2266     }
2267 
2268     return t;
2269 }
2270 
odbc_transcribe_data(char ** vnames,DATASET * dset,int vmin,int newvars,gretlopt opt,PRN * prn)2271 static int odbc_transcribe_data (char **vnames, DATASET *dset,
2272 				 int vmin, int newvars,
2273 				 gretlopt opt, PRN *prn)
2274 {
2275     char label[MAXLABEL];
2276     int *gstlist = NULL;
2277     int *gstlnew = NULL;
2278     int nv = gretl_odinfo.nvars;
2279     int n = gretl_odinfo.nrows;
2280     int nrepl = nv - newvars;
2281     int simple_fill = (opt & OPT_F);
2282     int i, s, t, v;
2283     int spos = 1;
2284     int err = 0;
2285 
2286     if (gretl_odinfo.gst != NULL) {
2287 	gstlist = string_table_copy_list(gretl_odinfo.gst);
2288 	gstlnew = gretl_list_new(gstlist[0]);
2289 	gstlnew[0] = 0;
2290     }
2291 
2292     for (i=0; i<nv && !err; i++) {
2293 	series_table *str = NULL;
2294 	series_table *stl = NULL;
2295 	int vnew = 1; /* is this a new series? */
2296 	int obs_used = 0;
2297 
2298 	if (nrepl > 0) {
2299 	    /* we're replacing some series */
2300 	    v = current_series_index(dset, vnames[i]);
2301 	} else {
2302 	    /* all the series are new */
2303 	    v = -1;
2304 	}
2305 
2306 	if (v < 0) {
2307 	    /* a new series */
2308 	    v = vmin++;
2309 	    strcpy(dset->varname[v], vnames[i]);
2310 	    sprintf(label, "ODBC series %d", i + 1);
2311 	    series_set_label(dset, v, label);
2312 	} else {
2313 	    /* an existing series */
2314 	    vnew = 0;
2315 	    stl = series_get_string_table(dset, v);
2316 	}
2317 
2318 	if (in_gretl_list(gstlist, i+1)) {
2319 	    /* the imported data are string-valued */
2320 	    if (vnew) {
2321 		gstlnew[spos++] = v;
2322 		gstlnew[0] += 1;
2323 	    } else if (stl == NULL) {
2324 		gretl_errmsg_sprintf("%s: can't mix numeric and string data",
2325 				     dset->varname[v]);
2326 		err = E_TYPES;
2327 	    } else {
2328 		str = gretl_string_table_detach_col(gretl_odinfo.gst, i+1);
2329 	    }
2330 	    if (!err && gretl_messages_on()) {
2331 		pprintf(prn, "%s: string-valued\n", dset->varname[v]);
2332 	    }
2333 	} else if (stl != NULL) {
2334 	    /* string-valued in dataset, numeric data from ODBC */
2335 	    gretl_errmsg_sprintf("%s: can't mix numeric and string data",
2336 				 dset->varname[v]);
2337 	    err = E_TYPES;
2338 	}
2339 
2340 	if (err) {
2341 	    break;
2342 	}
2343 
2344 	if (gretl_odinfo.S != NULL) {
2345 	    /* got obs identifiers via ODBC */
2346 	    if (vnew) {
2347 		for (t=0; t<dset->n; t++) {
2348 		    dset->Z[v][t] = NADBL;
2349 		}
2350 	    }
2351 	    for (s=0; s<n; s++) {
2352 		t = dateton(gretl_odinfo.S[s], dset);
2353 		if (t < 0) {
2354 		    t = try_iso_8601(gretl_odinfo.S[s], dset);
2355 		}
2356 		if (t >= dset->t1 && t <= dset->t2) {
2357 		    if (str != NULL) {
2358 			dset->Z[v][t] = s_tab_get(i, s, stl, str);
2359 		    } else {
2360 			dset->Z[v][t] = gretl_odinfo.X[i][s];
2361 		    }
2362 		    obs_used++;
2363 		} else {
2364 		    fprintf(stderr, "Rejecting obs '%s'\n", gretl_odinfo.S[s]);
2365 		}
2366 	    }
2367 	} else {
2368 	    /* no obs identifiers via ODBC */
2369 	    int ns = dset->t2 - dset->t1 + 1;
2370 
2371 	    if (n == ns || simple_fill) {
2372 		s = 0;
2373 	    } else if (n == dset->n) {
2374 		s = dset->t1;
2375 	    } else {
2376 		gretl_errmsg_sprintf("%s: don't know how to align the data!",
2377 				     dset->varname[v]);
2378 		err = E_DATA;
2379 	    }
2380 	    for (t=0; t<dset->n && !err; t++) {
2381 		if (t >= dset->t1 && t <= dset->t2 && s < n) {
2382 		    if (str != NULL) {
2383 			dset->Z[v][t] = s_tab_get(i, s++, stl, str);
2384 		    } else {
2385 			dset->Z[v][t] = gretl_odinfo.X[i][s++];
2386 		    }
2387 		    obs_used++;
2388 		} else if (vnew) {
2389 		    dset->Z[v][t] = NADBL;
2390 		}
2391 	    }
2392 	}
2393 
2394 	if (str != NULL) {
2395 	    series_table_destroy(str);
2396 	}
2397 
2398 	if (!err && vnew && obs_used == 0) {
2399 	    gretl_warnmsg_sprintf(_("ODBC import: '%s': no valid observations in sample range"),
2400 				  vnames[i]);
2401 	}
2402     }
2403 
2404     if (err) {
2405 	dataset_drop_last_variables(dset, newvars);
2406 	if (gretl_odinfo.gst != NULL) {
2407 	    gretl_string_table_destroy(gretl_odinfo.gst);
2408 	    gretl_odinfo.gst = NULL;
2409 	}
2410     } else if (gretl_odinfo.gst != NULL) {
2411 	if (gstlnew[0] == 0) {
2412 	    /* no series tables to transfer */
2413 	    gretl_string_table_destroy(gretl_odinfo.gst);
2414 	    gretl_odinfo.gst = NULL;
2415 	} else {
2416 	    string_table_replace_list(gretl_odinfo.gst, gstlnew);
2417 	    gstlnew = NULL; /* donated to table */
2418 	    gretl_string_table_save(gretl_odinfo.gst, dset);
2419 	}
2420     }
2421 
2422     free(gstlist);
2423     free(gstlnew);
2424 
2425     return err;
2426 }
2427 
odbc_count_new_vars(char ** vnames,int nv,const DATASET * dset)2428 static int odbc_count_new_vars (char **vnames, int nv,
2429 				const DATASET *dset)
2430 {
2431     int newv = nv;
2432 
2433     if (dset->v > 0) {
2434 	int i;
2435 
2436 	for (i=0; i<nv; i++) {
2437 	    if (current_series_index(dset, vnames[i]) > 0) {
2438 		newv--;
2439 	    }
2440 	}
2441     }
2442 
2443     return newv;
2444 }
2445 
2446 /* data series [obs-format=format-string] [query=]query-string */
2447 
odbc_get_series(const char * line,DATASET * dset,gretlopt opt,PRN * prn)2448 static int odbc_get_series (const char *line, DATASET *dset,
2449 			    gretlopt opt, PRN *prn)
2450 {
2451     int (*get_data) (ODBC_info *, gretlopt, PRN *);
2452     char **vnames = NULL;
2453     char *format = NULL;
2454     int err = 0;
2455 
2456     if (gretl_odinfo.dsn == NULL) {
2457 	gretl_errmsg_set(_("No database has been opened"));
2458 	return 1;
2459     } else if (dset->n == 0) {
2460 	return E_NODATA;
2461     }
2462 
2463     /* get "series" field */
2464     vnames = odbc_get_varnames(&line, &err);
2465     if (err) {
2466 	return err;
2467     }
2468 
2469     /* optional "obs-format" field */
2470     if (!strncmp(line, "obs-format=", 11)) {
2471 	line += 11;
2472 	format = gretl_quoted_string_strdup(line, (const char **) &line);
2473 	if (format == NULL) {
2474 	    err = E_PARSE;
2475 	} else {
2476 	    err = parse_odbc_format(format);
2477 	}
2478     }
2479 
2480     /* now the query to pass to the database */
2481     if (!err) {
2482 	line += strspn(line, " ");
2483 	if (!strncmp(line, "query=", 6)) {
2484 	    line += 6;
2485 	}
2486 	gretl_odinfo.query = odbc_get_query(line, &err);
2487     }
2488 
2489     if (!err) {
2490 	if (opt & OPT_V) {
2491 	    pprintf(prn, "SQL query: '%s'\n", gretl_odinfo.query);
2492 	}
2493 	gretl_error_clear();
2494 
2495 	get_data = get_plugin_function("gretl_odbc_get_data");
2496 
2497 	if (get_data == NULL) {
2498 	    err = 1;
2499 	} else {
2500 	    err = (*get_data) (&gretl_odinfo, opt, prn);
2501 	}
2502     }
2503 
2504     if (!err) {
2505 	int n = gretl_odinfo.nrows;
2506 	int nv = gretl_odinfo.nvars;
2507 	int newvars, vmin = 1;
2508 
2509 	if (gretl_messages_on()) {
2510 	    pprintf(prn, "Retrieved %d observations on %d series via ODBC\n",
2511 		    n, nv);
2512 	}
2513 
2514 	if (dset->v == 0) {
2515 	    /* the data array is still empty */
2516 	    newvars = nv;
2517 	    dset->v = 1 + nv;
2518 	    err = start_new_Z(dset, 0);
2519 	} else {
2520 	    newvars = odbc_count_new_vars(vnames, nv, dset);
2521 	    vmin = dset->v;
2522 	    if (newvars > 0) {
2523 		err = dataset_add_series(dset, newvars);
2524 	    }
2525 	}
2526 
2527 	if (!err) {
2528 	    err = odbc_transcribe_data(vnames, dset, vmin, newvars, opt, prn);
2529 	}
2530     }
2531 
2532     strings_array_free(vnames, gretl_odinfo.nvars);
2533     ODBC_info_clear_read();
2534 
2535     return err;
2536 }
2537 
2538 /* dbnomics function in separate file */
2539 
2540 #include "dbnread.c"
2541 
2542 /* called from loop in db_get_series() */
2543 
get_one_db_series(const char * sername,const char * altname,DATASET * dset,CompactMethod cmethod,const char * idxname,PRN * prn)2544 static int get_one_db_series (const char *sername,
2545 			      const char *altname,
2546 			      DATASET *dset,
2547 			      CompactMethod cmethod,
2548 			      const char *idxname,
2549 			      PRN *prn)
2550 {
2551     CompactMethod this_method = cmethod;
2552     const char *impname;
2553     SERIESINFO sinfo; /* sinfo declared */
2554     double **dbZ;
2555     int v, err = 0;
2556 
2557     series_info_init(&sinfo);
2558 
2559     /* are we using a specified name for importation? */
2560     impname = (*altname == '\0')? sername : altname;
2561 
2562     /* see if the series is already in the dataset */
2563     v = series_index(dset, impname);
2564     if (v < dset->v && cmethod == COMPACT_NONE) {
2565 	this_method = series_get_compact_method(dset, v);
2566     }
2567 
2568 #if DB_DEBUG
2569     fprintf(stderr, "get_one_db_series: dset->v=%d, v=%d, name='%s'\n",
2570 	    dset->v, v, impname);
2571     fprintf(stderr, "this_var_method = %d\n", this_method);
2572 #endif
2573 
2574     /* find the series information in the database */
2575     if (saved_db_type == GRETL_DBNOMICS) {
2576 	err = get_dbnomics_series_info(sername, &sinfo);
2577     } else if (saved_db_type == GRETL_RATS_DB) {
2578 	err = get_rats_series_info(sername, &sinfo);
2579     } else if (saved_db_type == GRETL_PCGIVE_DB) {
2580 	err = get_pcgive_series_info(sername, &sinfo);
2581     } else {
2582 	err = get_native_series_info(sername, &sinfo, idxname);
2583     }
2584 
2585     if (err) {
2586 	fprintf(stderr, "get_one_db_series: failed to get series info\n");
2587 	return err;
2588     }
2589 
2590     /* temporary data array */
2591     dbZ = new_dbZ(sinfo.nobs);
2592     if (dbZ == NULL) {
2593 	gretl_errmsg_set(_("Out of memory!"));
2594 	return E_ALLOC;
2595     }
2596 
2597 #if DB_DEBUG
2598     fprintf(stderr, "get_one_db_series: offset=%d, nobs=%d\n",
2599 	    sinfo.offset, sinfo.nobs);
2600 #endif
2601 
2602     if (saved_db_type == GRETL_DBNOMICS) {
2603 	err = get_dbnomics_data(saved_db_name, &sinfo, dbZ);
2604     } else if (saved_db_type == GRETL_RATS_DB) {
2605 	err = get_rats_db_data(saved_db_name, &sinfo, dbZ);
2606     } else if (saved_db_type == GRETL_PCGIVE_DB) {
2607 	err = get_pcgive_db_data(saved_db_name, &sinfo, dbZ);
2608 #ifdef USE_CURL
2609     } else if (saved_db_type == GRETL_NATIVE_DB_WWW) {
2610 	err = get_remote_db_data(saved_db_name, &sinfo, dbZ);
2611 #endif
2612     } else {
2613 	err = get_native_db_data(saved_db_name, &sinfo, dbZ);
2614     }
2615 
2616 #if DB_DEBUG
2617     fprintf(stderr, "get_one_db_series: get_db_data gave %d\n", err);
2618 #endif
2619 
2620     if (err == DB_MISSING_DATA) {
2621 	fprintf(stderr, "There were missing data\n");
2622 	err = 0;
2623     }
2624 
2625 #if DB_DEBUG
2626     fprintf(stderr, "sinfo.varname='%s', this_method=%d\n",
2627 	    sinfo.varname, this_method);
2628 #endif
2629 
2630     if (!err) {
2631 	if (*altname != '\0') {
2632 	    /* switch the recorded name now */
2633 	    strcpy(sinfo.varname, altname);
2634 	}
2635 	if (this_method == COMPACT_SPREAD) {
2636 	    err = lib_spread_db_data(dbZ, &sinfo, dset, prn);
2637 	} else {
2638 	    err = lib_add_db_data(dbZ, &sinfo, dset, this_method,
2639 				  v, prn);
2640 	}
2641     }
2642 
2643     series_info_clear(&sinfo);
2644     free_dbZ(dbZ);
2645 
2646     return err;
2647 }
2648 
is_glob(const char * s)2649 static int is_glob (const char *s)
2650 {
2651     return strchr(s, '*') || strchr(s, '?');
2652 }
2653 
process_import_name_option(char * vname)2654 static int process_import_name_option (char *vname)
2655 {
2656     const char *s = get_optval_string(DATA, OPT_N);
2657     int err = 0;
2658 
2659     if (s == NULL) {
2660 	err = E_DATA;
2661     } else {
2662 	err = check_varname(s);
2663     }
2664 
2665     if (!err) {
2666 	strcpy(vname, s);
2667     }
2668 
2669     return err;
2670 }
2671 
2672 /* main function for getting one or more series out of a
2673    database (including ODBC) via command-line/script
2674 */
2675 
db_get_series(const char * line,DATASET * dset,gretlopt opt,PRN * prn)2676 int db_get_series (const char *line, DATASET *dset,
2677 		   gretlopt opt, PRN *prn)
2678 {
2679     char altname[VNAMELEN] = {0};
2680     char **vnames = NULL;
2681     char *idxname = NULL;
2682     CompactMethod cmethod;
2683     int i, nnames = 0;
2684     int from_scratch = 0;
2685     int err = 0;
2686 
2687     if (opt & OPT_O) {
2688 	return odbc_get_series(line, dset, opt, prn);
2689     }
2690 
2691     if (opt & OPT_N) {
2692 	/* --name=whatever */
2693 	err = process_import_name_option(altname);
2694 	if (err) {
2695 	    return err;
2696 	}
2697     }
2698 
2699 #if DB_DEBUG
2700     fprintf(stderr, "db_get_series: line='%s', dset=%p\n",
2701 	    line, (void *) dset);
2702     fprintf(stderr, "db_name = '%s'\n", saved_db_name);
2703 #endif
2704 
2705     if (*saved_db_name == '\0') {
2706 	gretl_errmsg_set(_("No database has been opened"));
2707 	return 1;
2708     }
2709 
2710     from_scratch = (dset->n == 0);
2711 
2712     if (opt & OPT_C) {
2713 	/* new-style: compaction method supplied as option */
2714 	cmethod = compact_method_from_option(&err);
2715     } else {
2716 	/* legacy */
2717 	line = get_compact_method_and_advance(line, &cmethod);
2718     }
2719 
2720     if (!err) {
2721 	if (string_is_blank(line)) {
2722 	    err = E_DATA;
2723 	} else {
2724 	    /* get the variable names on the line */
2725 	    vnames = gretl_string_split(line, &nnames, NULL);
2726 	    if (vnames == NULL) {
2727 		err = E_ALLOC;
2728 	    }
2729 	}
2730     }
2731 
2732     if (!err && nnames > 1 && *altname != '\0') {
2733 	/* altname only works for a single series? */
2734 	err = E_BADOPT;
2735     }
2736 
2737     if (!err) {
2738 	if (saved_db_type == GRETL_NATIVE_DB) {
2739 	    idxname = native_db_index_name();
2740 	} else if (saved_db_type == GRETL_NATIVE_DB_WWW) {
2741 #ifdef USE_CURL
2742 	    idxname = g_strdup_printf("%sdbtmp.idx", gretl_dotdir());
2743 	    err = remote_db_index_to_file(idxname);
2744 #endif
2745 	}
2746     }
2747 
2748     /* now process the imports individually */
2749 
2750     for (i=0; i<nnames && !err; i++) {
2751 	if (is_glob(vnames[i])) {
2752 	    /* globbing works only for native databases */
2753 	    if (*altname != '\0') {
2754 		/* can't do it */
2755 		err = E_BADOPT;
2756 	    } else if (saved_db_type == GRETL_NATIVE_DB ||
2757 		       saved_db_type == GRETL_NATIVE_DB_WWW) {
2758 		char **tmp;
2759 		int j, nmatch;
2760 
2761 		tmp = native_db_match_series(vnames[i], &nmatch,
2762 					     idxname, &err);
2763 		for (j=0; j<nmatch && !err; j++) {
2764 		    err = get_one_db_series(tmp[j], altname, dset,
2765 					    cmethod, idxname, prn);
2766 		}
2767 		strings_array_free(tmp, nmatch);
2768 	    } else {
2769 		err = E_INVARG;
2770 	    }
2771 	} else {
2772 	    err = get_one_db_series(vnames[i], altname, dset,
2773 				    cmethod, idxname, prn);
2774 	}
2775     }
2776 
2777     strings_array_free(vnames, nnames);
2778 
2779     if (!err && !(opt & OPT_Q) && gretl_messages_on()) {
2780 	pprintf(prn, _("Series imported OK"));
2781 	pputc(prn, '\n');
2782 	if (from_scratch) {
2783 	    print_smpl(dset, 0, OPT_NONE, prn);
2784 	}
2785     }
2786 
2787     if (idxname != NULL) {
2788 	if (saved_db_type == GRETL_NATIVE_DB_WWW) {
2789 	    /* this file is a temporary download */
2790 	    gretl_remove(idxname);
2791 	}
2792 	free(idxname);
2793     }
2794 
2795     return err;
2796 }
2797 
tempfile_open(char * fname,int * err)2798 static FILE *tempfile_open (char *fname, int *err)
2799 {
2800     FILE *fp;
2801 
2802     strcat(fname, ".XXXXXX");
2803     fp = gretl_mktemp(fname, "w+");
2804     if (fp == NULL) {
2805 	*err = E_FOPEN;
2806     }
2807 
2808     return fp;
2809 }
2810 
maybe_fclose(FILE * fp)2811 static void maybe_fclose (FILE *fp)
2812 {
2813     if (fp != NULL) {
2814 	fclose(fp);
2815     }
2816 }
2817 
2818 #define DBUFLEN 1024
2819 
db_delete_series(const char * line,const int * list,const char * fname,PRN * prn)2820 static int db_delete_series (const char *line, const int *list,
2821 			     const char *fname, PRN *prn)
2822 {
2823     dbnumber buf[DBUFLEN];
2824     char src1[FILENAME_MAX];
2825     char src2[FILENAME_MAX];
2826     char tmp1[FILENAME_MAX];
2827     char tmp2[FILENAME_MAX];
2828     char series[VNAMELEN];
2829     char *p, s[512];
2830     char **snames = NULL;
2831     FILE *fidx = NULL, *fbin = NULL;
2832     FILE *f1 = NULL, *f2 = NULL;
2833     int i, j, k, print, n, ns;
2834     int ndel = 0;
2835     int err = 0;
2836 
2837     if (fname == NULL) {
2838 	if (*saved_db_name == '\0') {
2839 	    gretl_errmsg_set(_("No database has been opened"));
2840 	    err = 1;
2841 	} else if (saved_db_type != GRETL_NATIVE_DB) {
2842 	    gretl_errmsg_set("This only works for gretl databases");
2843 	    err = 1;
2844 	} else {
2845 	    err = open_native_db_files(saved_db_name, &fidx, src1, &fbin, src2);
2846 	}
2847     } else {
2848 	err = open_native_db_files(fname, &fidx, src1, &fbin, src2);
2849     }
2850 
2851     if (err) {
2852 	return err;
2853     }
2854 
2855     strcpy(tmp1, gretl_dotdir());
2856     strcat(tmp1, "tmpidx");
2857     f1 = tempfile_open(tmp1, &err);
2858     if (err) {
2859 	goto bailout;
2860     }
2861 
2862     strcpy(tmp2, gretl_dotdir());
2863     strcat(tmp2, "tmpbin");
2864     f2 = tempfile_open(tmp2, &err);
2865     if (err) {
2866 	goto bailout;
2867     }
2868 
2869     if (line != NULL) {
2870 	/* extract the variable names given on the line */
2871 	ns = 0;
2872 	while ((line = get_word_and_advance(line, series, VNAMELEN-1))
2873 	       && !err) {
2874 	    err = strings_array_add(&snames, &ns, series);
2875 	}
2876 	if (!err && ns == 0) {
2877 	    fprintf(stderr, "Found no series names\n");
2878 	    err = E_PARSE;
2879 	}
2880     }
2881 
2882     print = k = 1;
2883     i = j = 0;
2884 
2885     while (fgets(s, sizeof s, fidx) && !err) {
2886 	if (i == 0) {
2887 	    /* always reprint the header */
2888 	    fputs(s, f1);
2889 	    i++;
2890 	    continue;
2891 	}
2892 
2893 	if (i % 2 != 0) {
2894 	    /* odd lines contain varnames */
2895 	    print = 1;
2896 	    if (snames != NULL) {
2897 		sscanf(s, "%s", series);
2898 		for (j=0; j<ns; j++) {
2899 		    if (!strcmp(series, snames[j])) {
2900 			print = 0;
2901 			ndel++;
2902 			break;
2903 		    }
2904 		}
2905 	    } else {
2906 		if (k <= list[0] && list[k] == j) {
2907 		    k++;
2908 		    print = 0;
2909 		    ndel++;
2910 		}
2911 		j++;
2912 	    }
2913 	    if (print) {
2914 		fputs(s, f1);
2915 	    }
2916 	} else {
2917 	    /* even lines have obs information */
2918 	    p = strstr(s, "n = ");
2919 	    if (p != NULL) {
2920 		sscanf(p + 4, "%d", &n);
2921 	    } else {
2922 		err = E_DATA;
2923 		fprintf(stderr, "couldn't find obs for series\n");
2924 	    }
2925 
2926 	    if (!print) {
2927 		fseek(fbin, n * sizeof(dbnumber), SEEK_CUR);
2928 	    } else {
2929 		int get, got, rem = n;
2930 
2931 		fputs(s, f1);
2932 
2933 		while (rem > 0 && !err) {
2934 		    get = (rem > DBUFLEN)? DBUFLEN : rem;
2935 		    got = fread(buf, sizeof(dbnumber), get, fbin);
2936 		    if (got != get) {
2937 			fprintf(stderr, "error reading binary data\n");
2938 			err = E_DATA;
2939 		    } else {
2940 			fwrite(buf, sizeof(dbnumber), got, f2);
2941 			rem -= got;
2942 		    }
2943 		}
2944 	    }
2945 	}
2946 	i++;
2947     }
2948 
2949     if (snames != NULL) {
2950 	strings_array_free(snames, ns);
2951     }
2952 
2953  bailout:
2954 
2955     maybe_fclose(fidx);
2956     maybe_fclose(fbin);
2957     maybe_fclose(f1);
2958     maybe_fclose(f2);
2959 
2960     if (!err && ndel > 0) {
2961 	err = gretl_rename(tmp1, src1);
2962 	if (!err) {
2963 	    err = gretl_rename(tmp2, src2);
2964 	}
2965     } else {
2966 	gretl_remove(tmp1);
2967 	gretl_remove(tmp2);
2968     }
2969 
2970     if (!err && prn != NULL) {
2971 	pprintf(prn, "Deleted %d series from %s\n", ndel, src2);
2972     }
2973 
2974     return err;
2975 }
2976 
db_delete_series_by_name(const char * line,PRN * prn)2977 int db_delete_series_by_name (const char *line, PRN *prn)
2978 {
2979     return db_delete_series(line, NULL, NULL, prn);
2980 }
2981 
db_delete_series_by_number(const int * list,const char * fname)2982 int db_delete_series_by_number (const int *list, const char *fname)
2983 {
2984     return db_delete_series(NULL, list, fname, NULL);
2985 }
2986 
obs_to_ymd(const char * obs,int pd,int * y,int * m,int * d)2987 static void obs_to_ymd (const char *obs, int pd, int *y, int *m, int *d)
2988 {
2989     *y = atoi(obs);
2990     *d = 1;
2991 
2992     if (pd == 12) {
2993 	*m = atoi(obs + 5);
2994     } else if (pd == 4) {
2995 	int q = atoi(obs + 5);
2996 
2997 	*m = q * 3 - 2;
2998     } else {
2999 	*m = 1;
3000     }
3001 }
3002 
db_range_check(int db_pd,const char * db_stobs,const char * db_endobs,const char * varname,DATASET * dset)3003 int db_range_check (int db_pd,
3004 		    const char *db_stobs,
3005 		    const char *db_endobs,
3006 		    const char *varname,
3007 		    DATASET *dset)
3008 {
3009     double sd0_orig, sdn_orig, sd0, sdn;
3010     int err = 0;
3011 
3012     sd0 = get_date_x(db_pd, db_stobs);
3013     sdn = get_date_x(db_pd, db_endobs);
3014 
3015     if (db_pd >= 5 && db_pd <= 7 && !dated_daily_data(dset)) {
3016 	/* convert 'orig' info to daily dates */
3017 	int y, m, d;
3018 
3019 	obs_to_ymd(dset->stobs, dset->pd, &y, &m, &d);
3020 	sd0_orig = epoch_day_from_ymd(y, m, d);
3021 	obs_to_ymd(dset->endobs, dset->pd, &y, &m, &d);
3022 	sdn_orig = epoch_day_from_ymd(y, m, d);
3023     } else {
3024 	sd0_orig = dset->sd0;
3025 	sdn_orig = get_date_x(dset->pd, dset->endobs);
3026     }
3027 
3028     if (sd0 > sdn_orig || sdn < sd0_orig) {
3029 	gretl_errmsg_sprintf(_("%s: observation range does not overlap\n"
3030 			       "with the working data set"),
3031 			     varname);
3032 	err = 1;
3033     }
3034 
3035     return err;
3036 }
3037 
check_db_import_conversion(int pd,DATASET * dset)3038 int check_db_import_conversion (int pd, DATASET *dset)
3039 {
3040     int target = dset->pd;
3041     int err = 0;
3042 
3043     if (pd == target) {
3044 	; /* no conversion needed */
3045     } else if (pd == 1 && target == 4) {
3046 	; /* annual to quarterly expansion */
3047     } else if (pd == 1 && target == 12) {
3048 	; /* annual to monthly expansion */
3049     } else if (pd == 4 && target == 12) {
3050 	; /* quarterly to monthly expansion */
3051     } else if (pd == 12 && target == 1) {
3052 	; /* monthly to annual compaction */
3053     } else if (pd == 4 && target == 1) {
3054 	; /* quarterly to annual compaction */
3055     } else if (pd == 12 && target == 4) {
3056 	; /* monthly to quarterly compaction */
3057     } else {
3058 	fprintf(stderr, "db import fail: pd = %d, target %d\n", pd, target);
3059 	err = E_DATA;
3060     }
3061 
3062     return err;
3063 }
3064 
check_db_import_full(int pd,const char * stobs,const char * endobs,const char * varname,DATASET * dset)3065 static int check_db_import_full (int pd,
3066 				 const char *stobs,
3067 				 const char *endobs,
3068 				 const char *varname,
3069 				 DATASET *dset)
3070 {
3071     int err = check_db_import_conversion(pd, dset);
3072 
3073     if (err) {
3074 	gretl_errmsg_sprintf(_("%s: can't handle conversion"),
3075 			     varname);
3076     } else {
3077 	err = db_range_check(pd, stobs, endobs, varname, dset);
3078     }
3079 
3080 #if DB_DEBUG
3081     if (err) {
3082 	fprintf(stderr, "check_db_import_full: err = %d\n", err);
3083 	fprintf(stderr, "(dset->n = %d)\n", dset->n);
3084     }
3085 #endif
3086 
3087     return err;
3088 }
3089 
3090 /* We'll do "spread" compaction for monthly to quarterly or annual,
3091    quarterly to annual, or daily to monthly or quarterly. Other
3092    cases are rejected.
3093 */
3094 
compact_spread_pd_check(int high,int low)3095 static int compact_spread_pd_check (int high, int low)
3096 {
3097     if ((low == 12 || low == 4) &&
3098 	(high == 5 || high == 6 || high == 7)) {
3099 	/* daily to monthly or quarterly */
3100 	return 0;
3101     }
3102 
3103     if (!(high == 12 && low == 1) &&
3104 	!(high == 12 && low == 4) &&
3105 	!(high == 4 && low == 1)) {
3106 	gretl_errmsg_set("Unsupported conversion");
3107 	return E_DATA;
3108     }
3109 
3110     return 0;
3111 }
3112 
3113 static void
init_datainfo_from_sinfo(DATASET * dset,SERIESINFO * sinfo)3114 init_datainfo_from_sinfo (DATASET *dset, SERIESINFO *sinfo)
3115 {
3116     dset->pd = sinfo->pd;
3117 
3118     strcpy(dset->stobs, sinfo->stobs);
3119     strcpy(dset->endobs, sinfo->endobs);
3120     colonize_obs(dset->stobs);
3121     colonize_obs(dset->endobs);
3122 
3123     dset->sd0 = get_date_x(dset->pd, dset->stobs);
3124     dset->n = sinfo->nobs;
3125     dset->v = 2;
3126 
3127     dset->t1 = 0;
3128     dset->t2 = dset->n - 1;
3129 }
3130 
3131 /* construct a little dataset as a temporary wrapper for an
3132    import using compact=spread
3133 */
3134 
make_import_tmpset(const DATASET * dset,SERIESINFO * sinfo,double ** dbZ,int * err)3135 static DATASET *make_import_tmpset (const DATASET *dset,
3136 				    SERIESINFO *sinfo,
3137 				    double **dbZ,
3138 				    int *err)
3139 {
3140     DATASET *tmpset = NULL;
3141 
3142     *err = compact_spread_pd_check(sinfo->pd, dset->pd);
3143     if (*err) {
3144 	return NULL;
3145     }
3146 
3147     tmpset = datainfo_new();
3148     if (tmpset == NULL) {
3149 	*err = E_ALLOC;
3150 	return NULL;
3151     }
3152 
3153     tmpset->v = 2;
3154     tmpset->n = sinfo->nobs;
3155 
3156     tmpset->Z = malloc(2 * sizeof *tmpset->Z);
3157     if (tmpset->Z == NULL) {
3158 	*err = E_ALLOC;
3159 	free(tmpset);
3160 	return NULL;
3161     }
3162 
3163     *err = dataset_allocate_varnames(tmpset);
3164     if (*err) {
3165 	free(tmpset->Z[1]);
3166 	free(tmpset->Z);
3167 	free(tmpset);
3168 	return NULL;
3169     }
3170 
3171     tmpset->Z[0] = NULL;
3172     tmpset->Z[1] = dbZ[1];
3173     dbZ[1] = NULL; /* note: stolen! */
3174 
3175     tmpset->t1 = sinfo->t1;
3176     tmpset->t2 = sinfo->t2;
3177     tmpset->pd = sinfo->pd;
3178     strcpy(tmpset->stobs, sinfo->stobs);
3179     strcpy(tmpset->endobs, sinfo->endobs);
3180     tmpset->structure = TIME_SERIES;
3181     tmpset->sd0 = get_date_x(tmpset->pd, tmpset->stobs);
3182 
3183     strcpy(tmpset->varname[1], sinfo->varname);
3184 
3185 #if 0
3186     PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, NULL);
3187     fprintf(stderr, "import_tmpset: t1=%d, t2=%d, nobs=%d, pd=%d, offset=%d\n",
3188 	    sinfo->t1, sinfo->t2, sinfo->nobs, sinfo->pd, sinfo->offset);
3189     printdata(NULL, NULL, tmpset, OPT_O, prn);
3190     gretl_print_destroy(prn);
3191 #endif
3192 
3193     return tmpset;
3194 }
3195 
3196 static int
real_transcribe_db_data(const char * stobs,int nobs,const DATASET * dset,int dbv,const double * xvec)3197 real_transcribe_db_data (const char *stobs, int nobs,
3198 			 const DATASET *dset, int dbv,
3199 			 const double *xvec)
3200 {
3201     int t, pad1, pad2;
3202     int start, stop;
3203     double x;
3204 
3205     pad1 = dateton(stobs, dset);
3206     pad2 = dset->n - nobs - pad1;
3207 
3208     if (pad1 > 0) {
3209 	fprintf(stderr, "Padding at start, %d obs\n", pad1);
3210 	for (t=0; t<pad1; t++) {
3211 	    dset->Z[dbv][t] = NADBL;
3212 	}
3213 	start = pad1;
3214     } else {
3215 	start = 0;
3216     }
3217     if (pad2 > 0) {
3218 	int n = dset->n;
3219 
3220 	fprintf(stderr, "Padding at end, %d obs\n", pad2);
3221 	for (t=n-1; t>=n-1-pad2; t--) {
3222 	    dset->Z[dbv][t] = NADBL;
3223 	}
3224 	stop = n - pad2;
3225     } else {
3226 	stop = dset->n;
3227     }
3228 
3229     fprintf(stderr, "Filling in values from %d to %d\n", start, stop - 1);
3230     for (t=start; t<stop; t++) {
3231 	x = xvec[t - pad1];
3232 	dset->Z[dbv][t] = (x == DBNA)? NADBL : x;
3233     }
3234 
3235     return 0;
3236 }
3237 
transcribe_db_data(DATASET * dset,int targv,const double * src,int pd,int nobs,char * stobs,CompactMethod cmethod)3238 int transcribe_db_data (DATASET *dset, int targv,
3239 			const double *src, int pd,
3240 			int nobs, char *stobs,
3241 			CompactMethod cmethod)
3242 {
3243     double *xvec = (double *) src;
3244     int free_xvec = 0;
3245 
3246     if (pd != dset->pd) {
3247 	if (pd < dset->pd) {
3248 	    /* the series needs to be expanded */
3249 	    xvec = expand_db_series(src, pd, &nobs, stobs, dset);
3250 	} else {
3251 	    /* the series needs to be compacted */
3252 	    xvec = compact_db_series(src, pd, &nobs, stobs, dset->pd,
3253 				     cmethod);
3254 	}
3255 	if (xvec == NULL) {
3256 	    return E_ALLOC;
3257 	}
3258 	free_xvec = 1;
3259     }
3260 
3261     real_transcribe_db_data(stobs, nobs, dset, targv, xvec);
3262 
3263     if (free_xvec) {
3264 	free(xvec);
3265     }
3266 
3267     return 0;
3268 }
3269 
3270 /* Processes a single db series in "spread" mode, meaning
3271    that multiple series are added to the target dataset,
3272    @dset. This variant is associated with gretl databases.
3273 */
3274 
lib_spread_db_data(double ** dbZ,SERIESINFO * sinfo,DATASET * dset,PRN * prn)3275 int lib_spread_db_data (double **dbZ, SERIESINFO *sinfo,
3276 			DATASET *dset, PRN *prn)
3277 {
3278     int err = 0;
3279 
3280     if (dset == NULL || dset->v == 0) {
3281 	gretl_errmsg_set("\"compact=spread\": requires a dataset in place");
3282 	err = E_DATA;
3283     } else {
3284 	DATASET *tmpset = make_import_tmpset(dset, sinfo, dbZ, &err);
3285 
3286 	if (!err) {
3287 	    err = do_compact_spread(tmpset, dset->pd);
3288 	}
3289 	if (!err) {
3290 	    err = merge_or_replace_data(dset, &tmpset, OPT_X | OPT_U, prn);
3291 	}
3292     }
3293 
3294     return err;
3295 }
3296 
3297 /* Processes a single db series in "spread" mode, meaning
3298    that multiple series are added to the target dataset,
3299    @dset. This variant is associated with dbnomics import.
3300 */
3301 
lib_spread_dbnomics_data(DATASET * dset,DATASET * dbset,PRN * prn)3302 int lib_spread_dbnomics_data (DATASET *dset, DATASET *dbset,
3303 			      PRN *prn)
3304 {
3305     int err = 0;
3306 
3307     if (dset == NULL || dset->v == 0) {
3308 	gretl_errmsg_set("\"compact=spread\": requires a dataset in place");
3309 	err = E_DATA;
3310     } else {
3311 	err = do_compact_spread(dbset, dset->pd);
3312 	if (!err) {
3313 	    /* we add OPT_K ("keep") to prevent destruction of @dbset:
3314 	       we're bypassing get_merge_opts(), so we'd better know
3315 	       what we're doing!
3316 	    */
3317 	    gretlopt merge_opt = (OPT_X | OPT_U | OPT_K);
3318 
3319 	    err = merge_or_replace_data(dset, &dbset, merge_opt, prn);
3320 	}
3321     }
3322 
3323     return err;
3324 }
3325 
3326 /* Processes a single db series, adding it to @dset if
3327    possible (perhaps after compaction or expansion).
3328 */
3329 
lib_add_db_data(double ** dbZ,SERIESINFO * sinfo,DATASET * dset,CompactMethod cmethod,int dbv,PRN * prn)3330 static int lib_add_db_data (double **dbZ, SERIESINFO *sinfo,
3331 			    DATASET *dset, CompactMethod cmethod,
3332 			    int dbv, PRN *prn)
3333 {
3334     int new = (dbv == dset->v);
3335     int err = 0;
3336 
3337     if (sinfo == NULL && dbZ == NULL) {
3338 	fprintf(stderr, "lib_add_db_data: broken call!\n");
3339 	return E_DATA;
3340     }
3341 
3342     if (cmethod == COMPACT_NONE) {
3343 	/* impose default if need be */
3344 	cmethod = COMPACT_AVG;
3345     }
3346 
3347     if (dset->n == 0) {
3348 	/* if the existing dataset is empty, initialize it
3349 	   using info from the database series
3350 	*/
3351 	init_datainfo_from_sinfo(dset, sinfo);
3352 	dset->v = 0; /* trigger for creating data array below */
3353 	if (dset->pd != 1 || strcmp(dset->stobs, "1")) {
3354 	    dset->structure = TIME_SERIES;
3355 	}
3356     } else {
3357 	err = check_db_import_full(sinfo->pd, sinfo->stobs, sinfo->endobs,
3358 				   sinfo->varname, dset);
3359 	if (err) {
3360 	    return err;
3361 	}
3362     }
3363 
3364     if (dset->v == 0) {
3365 	/* the data array is still empty */
3366 	dset->v = 2;
3367 	dbv = 1;
3368 	if (start_new_Z(dset, 0)) {
3369 	    return E_ALLOC;
3370 	}
3371     } else if (new && dataset_add_series(dset, 1)) {
3372 	return E_ALLOC;
3373     }
3374 
3375 #if DB_DEBUG
3376     fprintf(stderr, "dset->Z=%p\n", (void *) dset->Z);
3377     fprintf(stderr, "dset->n = %d, dset->v = %d, dbv = %d\n",
3378 	    dset->n, dset->v, dbv);
3379 #endif
3380 
3381     err = transcribe_db_data(dset, dbv, dbZ[1], sinfo->pd, sinfo->nobs,
3382 			     sinfo->stobs, cmethod);
3383 
3384     if (!err) {
3385 	/* common stuff for adding a var */
3386 	strcpy(dset->varname[dbv], sinfo->varname);
3387 	series_set_label(dset, dbv, sinfo->descrip);
3388 	series_set_compact_method(dset, dbv, cmethod);
3389 	if (sinfo->pd < dset->pd) {
3390 	    series_set_orig_pd(dset, dbv, sinfo->pd);
3391 	}
3392     } else if (new) {
3393 	/* we added a series that has not been filled */
3394 	dataset_drop_last_variables(dset, 1);
3395     }
3396 
3397     return err;
3398 }
3399 
3400 /* compact an individual series, in the context of converting an
3401    entire working dataset to a lower frequency: used in all cases
3402    except conversion from daily to monthly
3403 */
3404 
compact_series(const DATASET * dset,int i,int oldn,int startskip,int min_startskip,int compfac,CompactMethod method)3405 static double *compact_series (const DATASET *dset, int i, int oldn,
3406 			       int startskip, int min_startskip,
3407 			       int compfac, CompactMethod method)
3408 {
3409     const double *src = dset->Z[i];
3410     double *x;
3411     int lead = startskip - min_startskip;
3412     int to_weekly = (compfac >= 5 && compfac <= 7);
3413     int t, idx;
3414 
3415 #if DB_DEBUG
3416     fprintf(stderr, "compact_series: startskip=%d, min_startskip=%d, compfac=%d "
3417 	    "lead=%d\n", startskip, min_startskip, compfac, lead);
3418 #endif
3419 
3420     x = malloc(dset->n * sizeof *x);
3421     if (x == NULL) {
3422 	return NULL;
3423     }
3424 
3425     for (t=0; t<dset->n; t++) {
3426 	x[t] = NADBL;
3427     }
3428 
3429     idx = startskip;
3430 
3431     for (t=lead; t<dset->n && idx<oldn; t++) {
3432 	if (method == COMPACT_SOP) {
3433 	    if (to_weekly && na(src[idx]) && idx < oldn - 1) {
3434 		/* allow one day's slack */
3435 		x[t] = src[idx + 1];
3436 	    } else {
3437 		x[t] = src[idx];
3438 	    }
3439 	} else if (method == COMPACT_EOP) {
3440 	    if (to_weekly && na(src[idx]) && idx > startskip) {
3441 		/* one day's slack */
3442 		x[t] = src[idx - 1];
3443 	    } else {
3444 		x[t] = src[idx];
3445 	    }
3446 	} else if (method == COMPACT_SUM || method == COMPACT_AVG) {
3447 	    int j, st, den = compfac;
3448 	    int n_ok = 0;
3449 
3450 	    if (idx + compfac - 1 > oldn - 1) {
3451 		break;
3452 	    }
3453 
3454 	    x[t] = 0.0;
3455 
3456 	    for (j=0; j<compfac; j++) {
3457 		st = idx + j;
3458 		if (na(src[st])) {
3459 		    if (to_weekly) {
3460 			den--;
3461 		    } else {
3462 			x[t] = NADBL;
3463 			break;
3464 		    }
3465 		} else {
3466 		    /* got a valid observation */
3467 		    n_ok++;
3468 		    x[t] += src[st];
3469 		}
3470 	    }
3471 
3472 	    if (n_ok == 0) {
3473 		x[t] = NADBL;
3474 	    }
3475 
3476 	    if (method == COMPACT_AVG && !na(x[t])) {
3477 		if (den > 0) {
3478 		    x[t] /= den;
3479 		} else {
3480 		    x[t] = NADBL;
3481 		}
3482 	    }
3483 	}
3484 	idx += compfac;
3485     }
3486 
3487     return x;
3488 }
3489 
3490 /* Determine year and period (either month or quarter,
3491    depending on the value of @pd) for observation @t in
3492    daily dataset @dset.
3493 */
3494 
daily_yp(const DATASET * dset,int t,int pd,int * y,int * p)3495 static int daily_yp (const DATASET *dset, int t,
3496 		     int pd, int *y, int *p)
3497 {
3498     char obs[12];
3499     int mon, day;
3500 
3501     ntolabel(obs, t, dset);
3502 
3503     if (sscanf(obs, YMD_READ_FMT, y, &mon, &day) != 3) {
3504 	return E_DATA;
3505     }
3506 
3507     if (pd == 12) {
3508 	*p = mon;
3509     } else {
3510 	/* convert month to quarter */
3511 	*p = 1 + (mon - 1) / 3;
3512     }
3513 
3514     return 0;
3515 }
3516 
3517 #define DAYDBG 0
3518 
3519 /* For a single row, @cset_t, of a compacted dataset,
3520    write daily values into the set of monthly or
3521    quarterly series that will represent them. The
3522    daily data are drawn from @dset and transcribed to
3523    @cset.
3524 */
3525 
fill_cset_t(const DATASET * dset,int * startday,DATASET * cset,int cset_t,int compfac,int qmonth)3526 static void fill_cset_t (const DATASET *dset,
3527 			 int *startday,
3528 			 DATASET *cset,
3529 			 int cset_t,
3530 			 int compfac,
3531 			 int qmonth)
3532 {
3533     char obs[OBSLEN];
3534     double cvec[30];
3535     int idx[30];
3536     const double *z;
3537     int y, p, pstart = 0;
3538     int effn, ndays = 0;
3539     int i, j, k, s, t, t0;
3540     double zsum = 0.0;
3541 
3542     t0 = *startday;
3543     y = p = 0;
3544 
3545     /* how many daily obs do we have in this month? */
3546     for (t=t0; t<dset->n; t++) {
3547 	daily_yp(dset, t, 12, &y, &p);
3548 	if (t == t0) {
3549 	    pstart = p;
3550 	} else if (p != pstart) {
3551 	    break;
3552 	}
3553 	ndays++;
3554     }
3555 
3556 #if 0
3557     fprintf(stderr, "fill_cset_t: ndays = %d, compfac = %d\n",
3558 	    ndays, compfac);
3559 #endif
3560 
3561     /* construct array of month-day indices */
3562     for (j=0; j<compfac && j<ndays; j++) {
3563 	ntolabel(obs, t0 + j, dset);
3564 	idx[j] = date_to_daily_index(obs, dset->pd);
3565     }
3566 
3567     /* the outer loop is over the daily series in the
3568        source dataset */
3569 
3570     k = 1 + qmonth * compfac;
3571 
3572     for (i=1; i<dset->v; i++) {
3573 	z = dset->Z[i] + t0;
3574 	for (j=0; j<compfac; j++) {
3575 	    cvec[j] = NADBL;
3576 	}
3577 	effn = 0;
3578 	zsum = 0.0;
3579 	for (j=0; j<compfac && j<ndays; j++) {
3580 	    s = idx[j];
3581 	    cvec[s] = z[j];
3582 	    if (!na(cvec[s])) {
3583 		zsum += cvec[s];
3584 		effn++;
3585 	    }
3586 	}
3587 	if (effn < compfac) {
3588 	    /* we have some padding to do */
3589 	    double zbar = zsum / effn;
3590 
3591 	    for (j=0; j<compfac; j++) {
3592 		if (na(cvec[j])) {
3593 		    cvec[j] = zbar;
3594 		}
3595 	    }
3596 	}
3597 	/* transcribe into target dataset */
3598 	for (j=0; j<compfac; j++) {
3599 	    cset->Z[k+j][cset_t] = cvec[j];
3600 	}
3601 
3602 	k += compfac;
3603     }
3604 
3605     *startday += ndays;
3606 }
3607 
3608 #define SPREAD_DEBUG 0
3609 
3610 /* compact daily data to monthly or quarterly using the
3611    "spread" method */
3612 
compact_daily_spread(const DATASET * dset,int newpd,int * nv,int * err)3613 static DATASET *compact_daily_spread (const DATASET *dset,
3614 				      int newpd,
3615 				      int *nv,
3616 				      int *err)
3617 {
3618     const char *periods[] = {
3619 	"month",
3620 	"quarter"
3621     };
3622     const char *period;
3623     DATASET *cset = NULL;
3624     char label[MAXLABEL];
3625     int oldpd = dset->pd;
3626     int compfac;
3627     int v, i, j, k, t, T;
3628     int startyr, startper;
3629     int endyr, endper;
3630     int startday;
3631 
3632     fprintf(stderr, "*** compact_daily_spread (newpd=%d) ***\n", newpd);
3633 
3634     daily_yp(dset, 0, newpd, &startyr, &startper);
3635     daily_yp(dset, dset->n - 1, newpd, &endyr, &endper);
3636     compfac = midas_days_per_period(dset->pd, newpd);
3637 
3638     if (newpd == 12) {
3639 	period = periods[0];
3640     } else if (newpd == 4) {
3641 	period = periods[1];
3642     } else {
3643 	*err = E_DATA;
3644 	return NULL;
3645     }
3646 
3647     T = newpd * (endyr - startyr) + (endper - startper + 1);
3648 
3649     if (T <= 1) {
3650 	*err = E_DATA;
3651 	return NULL;
3652     }
3653 
3654     /* the number of series, after compaction */
3655     v = 1 + (dset->v - 1) * compfac;
3656 
3657 #if SPREAD_DEBUG
3658     fprintf(stderr, "oldpd %d, newpd %d, nvars=%d, T=%d, start=%d:%d, end=%d:%d\n",
3659 	    dset->pd, newpd, v, T, startyr, startper, endyr, endper);
3660 #endif
3661 
3662     cset = create_new_dataset(v, T, 0);
3663     if (cset == NULL) {
3664 	*err = E_ALLOC;
3665 	return NULL;
3666     }
3667 
3668     if (newpd == 12) {
3669 	sprintf(cset->stobs, "%d:%02d", startyr, startper);
3670 	sprintf(cset->endobs, "%d:%02d", endyr, endper);
3671     } else {
3672 	sprintf(cset->stobs, "%d:%d", startyr, startper);
3673 	sprintf(cset->endobs, "%d:%d", endyr, endper);
3674     }
3675 
3676     cset->pd = newpd;
3677     cset->structure = TIME_SERIES;
3678     cset->sd0 = get_date_x(cset->pd, cset->stobs);
3679 
3680     /* ensure no uninitialized data */
3681     for (i=1; i<v; i++) {
3682 	for (t=0; t<T; t++) {
3683 	    cset->Z[i][t] = NADBL;
3684 	}
3685     }
3686 
3687     /* do the actual data transcription first */
3688     startday = 0;
3689     for (t=0; t<T; t++) {
3690 	if (newpd == 4) {
3691 	    fill_cset_t(dset, &startday, cset, t, compfac/3, 0);
3692 	    fill_cset_t(dset, &startday, cset, t, compfac/3, 1);
3693 	    fill_cset_t(dset, &startday, cset, t, compfac/3, 2);
3694 	} else {
3695 	    fill_cset_t(dset, &startday, cset, t, compfac, 0);
3696 	}
3697     }
3698 
3699     /* then name the series and reorganize */
3700 
3701     k = 1;
3702     for (i=1; i<dset->v; i++) {
3703 	double *xtmp;
3704 	char sfx[16];
3705 	int p;
3706 
3707 	/* switch data order */
3708 	for (j=0; j<compfac/2; j++) {
3709 	    p = k + compfac - j - 1;
3710 	    xtmp = cset->Z[k+j];
3711 	    cset->Z[k+j] = cset->Z[p];
3712 	    cset->Z[p] = xtmp;
3713 	}
3714 
3715 	/* names and labels */
3716 	for (j=0; j<compfac; j++) {
3717 	    strcpy(cset->varname[k+j], dset->varname[i]);
3718 	    gretl_trunc(cset->varname[k+j], VNAMELEN - 5);
3719 	    sprintf(sfx, "_d%02d", compfac - j);
3720 	    strcat(cset->varname[k+j], sfx);
3721 	    sprintf(label, "%s in day %d of %s", dset->varname[i],
3722 		    compfac - j, period);
3723 	    series_record_label(cset, k+j, label);
3724 	    series_set_midas_period(cset, k+j, compfac - j);
3725 	    series_set_midas_freq(cset, k+j, oldpd);
3726 	    if (j == 0) {
3727 		series_set_midas_anchor(cset, k+j);
3728 	    }
3729 	}
3730 
3731 	/* advance column write position for next source series */
3732 	k += compfac;
3733     }
3734 
3735 #if SPREAD_DEBUG > 1
3736     PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, NULL);
3737     printdata(NULL, NULL, cset, OPT_O, prn);
3738     gretl_print_destroy(prn);
3739 #endif
3740 
3741     return cset;
3742 }
3743 
3744 /* compact an entire dataset, transcribing from each higher-frequency
3745    series to a set of lower-frequency series, each of which holds the
3746    observations from a given sub-period
3747 */
3748 
compact_data_spread(const DATASET * dset,int newpd,int startmaj,int startmin,int endmaj,int endmin,int * nv,int * err)3749 static DATASET *compact_data_spread (const DATASET *dset, int newpd,
3750 				     int startmaj, int startmin,
3751 				     int endmaj, int endmin,
3752 				     int *nv, int *err)
3753 {
3754     const char *subper[] = {
3755 	"month",
3756 	"quarter"
3757     };
3758     const char *period[] = {
3759 	"year",
3760 	"quarter"
3761     };
3762     const char *p0, *p1;
3763     DATASET *cset = NULL;
3764     char sfx[16];
3765     char label[MAXLABEL];
3766     int oldpd = dset->pd;
3767     int compfac = oldpd / newpd;
3768     int v, i, j, k, t, T;
3769     int q0 = 0, qT = 0;
3770 
3771     /* calculate @T, the number of observations that the compacted
3772        dataset should comprise
3773     */
3774     if (newpd == 1) {
3775 	T = endmaj - startmaj + 1;
3776     } else if (newpd == 4) {
3777 	T = oldpd * (endmaj - startmaj + 1) / compfac;
3778 	q0 = 1 + (startmin - 1) / 3;
3779 	qT = 1 + (endmin - 1) / 3;
3780 	T += qT - q0 - 3;
3781     } else {
3782 	*err = E_DATA;
3783 	return NULL;
3784     }
3785 
3786     if (T <= 1) {
3787 	*err = E_DATA;
3788 	return NULL;
3789     }
3790 
3791     /* calculate @v, the number of series after compaction */
3792     v = 1 + (dset->v - 1) * compfac;
3793 
3794 #if SPREAD_DEBUG
3795     fprintf(stderr, "oldpd %d, newpd %d, v=%d, T=%d, start=%d:%d, end=%d:%d\n",
3796 	    oldpd, newpd, v, T, startmaj, startmin, endmaj, endmin);
3797 #endif
3798 
3799     cset = create_new_dataset(v, T, 0);
3800     if (cset == NULL) {
3801 	*err = E_ALLOC;
3802 	return NULL;
3803     }
3804 
3805     if (newpd == 1) {
3806 	sprintf(cset->stobs, "%d", startmaj);
3807 	sprintf(cset->endobs, "%d", endmaj);
3808 	p1 = period[0];
3809     } else {
3810 	/* newpd must be 4 */
3811 	sprintf(cset->stobs, "%d:%d", startmaj, q0);
3812 	sprintf(cset->endobs, "%d:%d", endmaj, qT);
3813 	p1 = period[1];
3814     }
3815 
3816     p0 = (oldpd == 12)? subper[0] : subper[1];
3817 
3818     cset->pd = newpd;
3819     cset->structure = TIME_SERIES;
3820     cset->sd0 = get_date_x(cset->pd, cset->stobs);
3821 
3822 #if SPREAD_DEBUG
3823     fprintf(stderr, "stobs '%s', endobs '%s', sd0=%g, q0=%d\n",
3824 	    cset->stobs, cset->endobs, cset->sd0, q0);
3825 #endif
3826 
3827     k = 1; /* the first new series */
3828 
3829     for (i=1; i<dset->v; i++) {
3830 	/* loop across original data series */
3831 	double *xtmp;
3832 	int offset;
3833 	int p, s = 0;
3834 
3835 	/* how many initial observations should be set to NA? */
3836 	if (newpd == 1) {
3837 	    offset = startmin - 1;
3838 	} else {
3839 	    offset = startmin - (1 + (q0 - 1) * compfac);
3840 	}
3841 
3842 	for (t=0; t<T; t++) {
3843 	    /* loop across new time periods */
3844 	    for (j=0; j<compfac; j++) {
3845 		/* loop across new series <- sub-periods */
3846 		while (s < offset) {
3847 		    cset->Z[k+j][t] = NADBL;
3848 		    offset--;
3849 		    j++;
3850 		}
3851 		if (s < dset->n) {
3852 		    cset->Z[k+j][t] = dset->Z[i][s];
3853 		} else {
3854 		    cset->Z[k+j][t] = NADBL;
3855 		}
3856 		s++;
3857 	    }
3858 	}
3859 
3860 	/* reverse the new columns: most recent first */
3861 	for (j=0; j<compfac/2; j++) {
3862 	    p = k + compfac - j - 1;
3863 	    xtmp = cset->Z[k+j];
3864 	    cset->Z[k+j] = cset->Z[p];
3865 	    cset->Z[p] = xtmp;
3866 	}
3867 
3868 	/* names and labels */
3869 	for (j=0; j<compfac; j++) {
3870 	    strcpy(cset->varname[k+j], dset->varname[i]);
3871 	    if (oldpd == 12 && newpd == 4) {
3872 		gretl_trunc(cset->varname[k+j], VNAMELEN - 4);
3873 		sprintf(sfx, "_m%d", compfac - j);
3874 	    } else if (oldpd == 12) {
3875 		/* going to annual */
3876 		gretl_trunc(cset->varname[k+j], VNAMELEN - 5);
3877 		sprintf(sfx, "_m%02d", compfac - j);
3878 	    } else {
3879 		gretl_trunc(cset->varname[k+j], VNAMELEN - 4);
3880 		sprintf(sfx, "_q%d", compfac - j);
3881 	    }
3882 	    strcat(cset->varname[k+j], sfx);
3883 	    sprintf(label, "%s in %s %d of %s", dset->varname[i],
3884 		    p0, compfac - j, p1);
3885 	    series_record_label(cset, k+j, label);
3886 	    series_set_midas_period(cset, k+j, compfac - j);
3887 	    series_set_midas_freq(cset, k+j, oldpd);
3888 	    if (j == 0) {
3889 		series_set_midas_anchor(cset, k+j);
3890 	    }
3891 	}
3892 
3893 	/* advance column write position for next source series */
3894 	k += compfac;
3895     }
3896 
3897 #if SPREAD_DEBUG > 1
3898     PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, NULL);
3899     printdata(NULL, NULL, cset, OPT_O, prn);
3900     gretl_print_destroy(prn);
3901 #endif
3902 
3903     return cset;
3904 }
3905 
3906 /* specific apparatus for converting daily time series to monthly */
3907 
extend_series(const double * z,int n)3908 static double *extend_series (const double *z, int n)
3909 {
3910     double *x = malloc(n * sizeof *x);
3911 
3912     if (x != NULL) {
3913 	int t;
3914 
3915 	x[0] = NADBL;
3916 	for (t=1; t<n; t++) {
3917 	    x[t] = z[t-1];
3918 	}
3919     }
3920 
3921     return x;
3922 }
3923 
3924 #define DMDEBUG 0
3925 
3926 static double *
daily_series_to_monthly(DATASET * dset,int i,int nm,int yr,int mon,int offset,int any_eop,CompactMethod method)3927 daily_series_to_monthly (DATASET *dset, int i,
3928 			 int nm, int yr, int mon, int offset,
3929 			 int any_eop, CompactMethod method)
3930 {
3931     double *x;
3932     const double *src = dset->Z[i];
3933     const double *z;
3934     double *tmp = NULL;
3935     int t, sop_t, eop_t;
3936 
3937     x = malloc(nm * sizeof *x);
3938     if (x == NULL) {
3939 	return NULL;
3940     }
3941 
3942     if (offset < 0) {
3943 	tmp = extend_series(src, dset->n + 1);
3944 	if (tmp == NULL) {
3945 	    free(x);
3946 	    return NULL;
3947 	}
3948 	/* this permits use of a negative offset */
3949 	z = tmp + 1;
3950     } else {
3951 	z = src;
3952     }
3953 
3954     /* Note: we can't necessarily assume that the first obs
3955        is the first day of a month. The @offset value gives the
3956        number of daily observations (allowing for the number of
3957        observed days in the week) in the first month of the daily
3958        data, prior to the data actually starting.
3959     */
3960 
3961     /* The "one day's slack" business below, with start-of-period and
3962        end-of-period compaction is designed to allow for the
3963        possibility that the first (or last) day of the month may not
3964        have been a trading day.
3965     */
3966 
3967     /* first obs for start-of-period */
3968     sop_t = offset;
3969 
3970     /* first obs for end-of-period */
3971     if (sop_t > 0) {
3972 	eop_t = offset - 1;
3973     } else {
3974 	/* the first obs starts a month */
3975 	eop_t = get_days_in_month(mon, yr, dset->pd, 0) - 1;
3976     }
3977 
3978 #if DMDEBUG
3979     fprintf(stderr, "starting: offset=%d, any_eop=%d, sop_t=%d, eop_t=%d\n",
3980 	    offset, any_eop, sop_t, eop_t);
3981 #endif
3982 
3983     for (t=0; t<nm; t++) {
3984 	/* loop across the months in the compacted data */
3985 	int mdays = get_days_in_month(mon, yr, dset->pd, 0);
3986 
3987 	if (t > 0) {
3988 	    eop_t += mdays;
3989 	}
3990 
3991 #if DMDEBUG
3992 	fprintf(stderr, "t=%d: mon=%d, mdays=%d, sop_t=%d, eop_t=%d\n",
3993 		t, mon, mdays, sop_t, eop_t);
3994 #endif
3995 
3996 	if (t == 0 && offset > 0 && any_eop && method != COMPACT_EOP) {
3997 	    /* we started with an incomplete month: so any
3998 	       method other than EOP yields an NA */
3999 	    x[t] = NADBL;
4000 	} else if (method == COMPACT_SOP) {
4001 	    /* allow one days's slack */
4002 	    if (na(z[sop_t]) && sop_t < dset->n - 1) {
4003 		x[t] = z[sop_t + 1];
4004 	    } else {
4005 		x[t] = z[sop_t];
4006 	    }
4007 	} else if (method == COMPACT_EOP) {
4008 	    if (eop_t >= dset->n) {
4009 		x[t] = NADBL;
4010 	    } else {
4011 		/* allow one days's slack */
4012 		if (na(z[eop_t]) && eop_t > 0) {
4013 		    x[t] = z[eop_t - 1];
4014 		} else {
4015 		    x[t] = z[eop_t];
4016 		}
4017 	    }
4018 	} else if (method == COMPACT_SUM ||
4019 		   method == COMPACT_AVG) {
4020 	    int j, dayt, den = mdays;
4021 	    int n_ok = 0;
4022 
4023 	    x[t] = 0.0;
4024 
4025 	    for (j=0; j<mdays; j++) {
4026 		dayt = sop_t + j;
4027 		if (dayt >= dset->n) {
4028 		    x[t] = NADBL;
4029 		    break;
4030 		} else if (na(z[dayt])) {
4031 		    if (method == COMPACT_AVG) {
4032 			den--;
4033 		    }
4034 		} else {
4035 		    /* got a valid observation */
4036 		    x[t] += z[dayt];
4037 		    n_ok++;
4038 		}
4039 	    }
4040 
4041 	    if (n_ok == 0) {
4042 		x[t] = NADBL;
4043 	    }
4044 
4045 	    if (method == COMPACT_AVG && !na(x[t])) {
4046 		if (den > 0) {
4047 		    x[t] /= (double) den;
4048 		} else {
4049 		    x[t] = NADBL;
4050 		}
4051 	    }
4052 	}
4053 
4054 	sop_t += mdays;
4055 
4056 	if (mon == 12) {
4057 	    mon = 1;
4058 	    yr++;
4059 	} else {
4060 	    mon++;
4061 	}
4062     }
4063 
4064     if (tmp != NULL) {
4065 	free(tmp);
4066     }
4067 
4068     return x;
4069 }
4070 
4071 static void
get_startskip_etc(int compfac,int startmin,int endmin,int oldn,CompactMethod method,int * startskip,int * newn)4072 get_startskip_etc (int compfac, int startmin, int endmin,
4073 		   int oldn, CompactMethod method,
4074 		   int *startskip, int *newn)
4075 {
4076     int ss = 0, n = 0;
4077 
4078     if (method == COMPACT_EOP) {
4079 	int unused;
4080 
4081 	ss = (compfac - (startmin % compfac)) % compfac;
4082 	n = oldn / compfac;
4083 	unused = oldn - 1 - ss - (n-1) * compfac;
4084 	if (unused >= compfac) {
4085 	    n++;
4086 	}
4087     } else if (method == COMPACT_SOP) {
4088 	int unused;
4089 
4090 	ss = (compfac - (startmin % compfac) + 1) % compfac;
4091 	n = oldn / compfac;
4092 	unused = oldn - 1 - ss - (n-1) * compfac;
4093 	if (unused >= compfac) {
4094 	    n++;
4095 	}
4096     } else {
4097 	int es = endmin % compfac;
4098 
4099 	ss = (compfac - (startmin % compfac) + 1) % compfac;
4100 	n = (oldn - ss - es) / compfac;
4101     }
4102 
4103     *startskip = ss;
4104     *newn = n;
4105 }
4106 
4107 /* specific to compaction of daily time series */
4108 
4109 static void
get_daily_compact_params(CompactMethod default_method,int * any_eop,int * any_sop,int * all_same,const DATASET * dset)4110 get_daily_compact_params (CompactMethod default_method,
4111 			  int *any_eop, int *any_sop,
4112 			  int *all_same,
4113 			  const DATASET *dset)
4114 {
4115     int i, n_not_eop = 0, n_not_sop = 0;
4116 
4117     *all_same = 1;
4118     *any_eop = (default_method == COMPACT_EOP)? 1 : 0;
4119     *any_sop = (default_method == COMPACT_SOP)? 1 : 0;
4120 
4121     for (i=1; i<dset->v; i++) {
4122 	CompactMethod method = series_get_compact_method(dset, i);
4123 
4124 	if (method != default_method && method != COMPACT_NONE) {
4125 	    *all_same = 0;
4126 	    if (method == COMPACT_EOP) {
4127 		*any_eop = 1;
4128 	    } else {
4129 		n_not_eop++;
4130 	    }
4131 	    if (method == COMPACT_SOP) {
4132 		*any_sop = 1;
4133 	    } else {
4134 		n_not_sop++;
4135 	    }
4136 	}
4137     }
4138 
4139     if (n_not_eop == dset->v - 1) {
4140 	*any_eop = 0;
4141     }
4142 
4143     if (n_not_sop == dset->v - 1) {
4144 	*any_sop = 0;
4145     }
4146 }
4147 
4148 /* specific to non-daily time series (monthly or quarterly) */
4149 
4150 static void
get_global_compact_params(int compfac,int startmin,int endmin,CompactMethod default_method,int * min_startskip,int * max_n,int * any_eop,int * all_same,DATASET * dset)4151 get_global_compact_params (int compfac, int startmin, int endmin,
4152 			   CompactMethod default_method,
4153 			   int *min_startskip, int *max_n,
4154 			   int *any_eop, int *all_same,
4155 			   DATASET *dset)
4156 {
4157     CompactMethod method;
4158     int i, startskip, n;
4159     int n_not_eop = 0;
4160 
4161     for (i=0; i<dset->v; i++) {
4162 	if (i == 0) {
4163 	    get_startskip_etc(compfac, startmin, endmin, dset->n,
4164 			      default_method, &startskip, &n);
4165 	    if (default_method == COMPACT_EOP) {
4166 		*any_eop = 1;
4167 	    }
4168 	} else {
4169 	    method = series_get_compact_method(dset, i);
4170 	    if (method != default_method && method != COMPACT_NONE) {
4171 		get_startskip_etc(compfac, startmin, endmin, dset->n,
4172 				  method, &startskip, &n);
4173 		*all_same = 0;
4174 		if (method == COMPACT_EOP) {
4175 		    *any_eop = 1;
4176 		} else {
4177 		    n_not_eop++;
4178 		}
4179 	    }
4180 	}
4181 	if (startskip < *min_startskip) {
4182 	    *min_startskip = startskip;
4183 	}
4184 	if (n > *max_n) {
4185 	    *max_n = n;
4186 	}
4187     }
4188 
4189     if (n_not_eop == dset->v - 1) {
4190 	*any_eop = 0;
4191     }
4192 }
4193 
get_obs_maj_min(const char * obs,int * maj,int * min)4194 static int get_obs_maj_min (const char *obs, int *maj, int *min)
4195 {
4196     int np = sscanf(obs, "%d:%d", maj, min);
4197 
4198     if (np < 2) {
4199 	np = sscanf(obs, "%d.%d", maj, min);
4200     }
4201 
4202     return (np == 2);
4203 }
4204 
4205 /* for daily data, figure the number of observations to
4206    be skipped at the start of each series
4207 */
4208 
get_daily_offset(const DATASET * dset,int y,int m,int d,int skip,int any_eop)4209 static int get_daily_offset (const DATASET *dset,
4210 			     int y, int m, int d,
4211 			     int skip, int any_eop)
4212 {
4213     int ret = 0;
4214 
4215     if (skip) {
4216 	/* moving to start of next month: offset = no. of
4217 	   observations in the first month */
4218 	ret = days_in_month_after(y, m, d, dset->pd) + 1;
4219     } else if (any_eop && !day_starts_month(d, m, y, dset->pd, NULL)) {
4220 	ret = days_in_month_after(y, m, d, dset->pd) + 1;
4221     } else {
4222 	/* offset = no. of obs missing at start of first month */
4223 	ret = days_in_month_before(y, m, d, dset->pd);
4224 #if DMDEBUG
4225 	fprintf(stderr, "days_in_month_before %d-%02d-%02d = %d "
4226 		"for pd=%d\n", y, m, d, ret, dset->pd);
4227 #endif
4228     }
4229 
4230     return ret;
4231 }
4232 
4233 /* for daily data, figure the number of valid monthly
4234    observations that can be constructed by compaction
4235 */
4236 
get_n_ok_months(const DATASET * dset,CompactMethod default_method,int * startyr,int * startmon,int * endyr,int * endmon,int * offset,int * p_any_eop)4237 static int get_n_ok_months (const DATASET *dset,
4238 			    CompactMethod default_method,
4239 			    int *startyr, int *startmon,
4240 			    int *endyr, int *endmon,
4241 			    int *offset, int *p_any_eop)
4242 {
4243     int y1, m1, d1, y2, m2, d2;
4244     int any_eop, any_sop, all_same;
4245     int skip = 0, pad = 0, nm = -1;
4246 
4247     if (sscanf(dset->stobs, YMD_READ_FMT, &y1, &m1, &d1) != 3) {
4248 	return -1;
4249     }
4250     if (sscanf(dset->endobs, YMD_READ_FMT, &y2, &m2, &d2) != 3) {
4251 	return -1;
4252     }
4253 
4254     if (y1 < 100) {
4255 	y1 = FOUR_DIGIT_YEAR(y1);
4256     }
4257     if (y2 < 100) {
4258 	y2 = FOUR_DIGIT_YEAR(y2);
4259     }
4260 
4261     nm = 12 * (y2 - y1) + m2 - m1 + 1;
4262 
4263     get_daily_compact_params(default_method, &any_eop, &any_sop,
4264 			     &all_same, dset);
4265 
4266     *startyr = y1;
4267     *startmon = m1;
4268     *endyr = y2;
4269     *endmon = m2;
4270 
4271 #if DMDEBUG
4272     fprintf(stderr, "get_n_ok_months: any_sop=%d, any_eop=%d, "
4273 	    "all_same=%d\n", any_sop, any_eop, all_same);
4274     fprintf(stderr, "y1=%d m1=%d d1=%d; y2=%d m2=%d d2=%d\n",
4275 	    y1, m1, d1, y2, m2, d2);
4276 #endif
4277 
4278     if (!day_starts_month(d1, m1, y1, dset->pd, &pad) && !any_eop) {
4279 	if (*startmon == 12) {
4280 	    *startmon = 1;
4281 	    *startyr += 1;
4282 	} else {
4283 	    *startmon += 1;
4284 	}
4285 	skip = 1;
4286 	nm--;
4287     }
4288 
4289     if (!day_ends_month(d2, m2, y2, dset->pd) && !any_sop) {
4290 	if (*endmon == 1) {
4291 	    *endmon = 12;
4292 	    *endyr -= 1;
4293 	} else {
4294 	    *endmon -= 1;
4295 	}
4296 	nm--;
4297     }
4298 
4299 #if DMDEBUG
4300     fprintf(stderr, "after adjustment: range %d:%02d to %d:%02d, "
4301 	    "pad=%d, skip=%d\n", *startyr, *startmon, *endyr, *endmon,
4302 	    pad, skip);
4303 #endif
4304 
4305     if (pad) {
4306 	*offset = -1;
4307     } else {
4308 	*offset = get_daily_offset(dset, y1, m1, d1, skip, any_eop);
4309     }
4310 
4311     *p_any_eop = any_eop;
4312 
4313     return nm;
4314 }
4315 
4316 #define WEEKLY_DEBUG 0
4317 
4318 static int
weeks_to_months_exec(double ** mZ,const DATASET * dset,CompactMethod method)4319 weeks_to_months_exec (double **mZ, const DATASET *dset,
4320 		      CompactMethod method)
4321 {
4322     char obsstr[OBSLEN];
4323     int *mn = NULL;
4324     int yr, mon, day;
4325     int monbak = 0;
4326     int i, s, t = 0;
4327     int err = 0;
4328 
4329     mn = malloc(dset->v * sizeof *mn);
4330     if (mn == NULL) {
4331 	return E_ALLOC;
4332     }
4333 
4334     for (i=1; i<dset->v; i++) {
4335 	/* initialize all series, first obs */
4336 	mZ[i][0] = NADBL;
4337 	mn[i] = 0;
4338     }
4339 
4340     for (s=0; s<dset->n; s++) {
4341 	/* loop across the weekly obs in this month */
4342 	ntolabel(obsstr, s, dset);
4343 	sscanf(obsstr, YMD_READ_FMT, &yr, &mon, &day);
4344 	if (monbak > 0 && mon != monbak) {
4345 	    /* new month: finalize the previous one */
4346 	    for (i=1; i<dset->v; i++) {
4347 		if (method == COMPACT_EOP) {
4348 		    if (s > 0) {
4349 			mZ[i][t] = dset->Z[i][s-1];
4350 		    }
4351 		} else if (method == COMPACT_AVG) {
4352 		    if (mn[i] > 0) {
4353 			mZ[i][t] /= (double) mn[i];
4354 		    }
4355 		}
4356 	    }
4357 	    /* and start another? */
4358 	    if (s < dset->n - 1) {
4359 		t++;
4360 		for (i=1; i<dset->v; i++) {
4361 		    /* initialize all series, current obs */
4362 		    if (method == COMPACT_SOP) {
4363 			mZ[i][t] = dset->Z[i][s];
4364 		    } else {
4365 			mZ[i][t] = NADBL;
4366 		    }
4367 		    mn[i] = 0;
4368 		}
4369 	    }
4370 	}
4371 
4372 	/* cumulate non-missing weekly observations? */
4373 	for (i=1; i<dset->v; i++) {
4374 	    if (method == COMPACT_SOP) {
4375 		; /* handled above */
4376 	    } else if (method == COMPACT_EOP) {
4377 		mZ[i][t] = dset->Z[i][s];
4378 	    } else if (!na(dset->Z[i][s])) {
4379 		if (na(mZ[i][t])) {
4380 		    mZ[i][t] = dset->Z[i][s];
4381 		} else {
4382 		    mZ[i][t] += dset->Z[i][s];
4383 		}
4384 		mn[i] += 1;
4385 	    }
4386 	    if (mon == monbak && s == dset->n - 1) {
4387 		/* reached the end: ship out last obs */
4388 		if (method == COMPACT_EOP) {
4389 		    mZ[i][t] = NADBL;
4390 		} else if (method == COMPACT_AVG && mn[i] > 0) {
4391 		    mZ[i][t] /= (double) mn[i];
4392 		}
4393 	    }
4394 	}
4395 	monbak = mon;
4396     }
4397 
4398     free(mn);
4399 
4400     return err;
4401 }
4402 
4403 static int
weeks_to_months_check(const DATASET * dset,int * startyr,int * endyr,int * startmon,int * endmon)4404 weeks_to_months_check (const DATASET *dset, int *startyr, int *endyr,
4405 		       int *startmon, int *endmon)
4406 {
4407     char obsstr[OBSLEN];
4408     int yr, mon, day;
4409     int wcount = 0, mcount = 0;
4410     int monbak = 0;
4411     int t, err = 0;
4412 
4413     for (t=0; t<dset->n; t++) {
4414 	ntolabel(obsstr, t, dset);
4415 	if (sscanf(obsstr, YMD_READ_FMT, &yr, &mon, &day) != 3) {
4416 	    err = 1;
4417 	    break;
4418 	}
4419 	if (monbak == 0) {
4420 	    /* first obs */
4421 	    fprintf(stderr, "starting month = '%d'\n", mon);
4422 	    *startyr = yr;
4423 	    *startmon = mon;
4424 	    mcount++;
4425 	    wcount = 1;
4426 	} else if (mon != monbak) {
4427 	    /* got a new month: report on previous one */
4428 #if WEEKLY_DEBUG
4429 	    fprintf(stderr, "month %d ('%d'), weekly obs = %d\n",
4430 		    mcount, monbak, wcount);
4431 #endif
4432 	    mcount++;
4433 	    wcount = 1;
4434 	} else {
4435 	    /* continuation of current month */
4436 	    wcount++;
4437 	}
4438 	monbak = mon;
4439     }
4440 
4441     if (err) {
4442 	mcount = 0;
4443     } else {
4444 	/* flush the last observation */
4445 #if WEEKLY_DEBUG
4446 	fprintf(stderr, "month %d ('%d'), weekly obs = %d\n",
4447 		mcount, monbak, wcount);
4448 #endif
4449 	*endyr = yr;
4450 	*endmon = mon;
4451     }
4452 
4453     return mcount;
4454 }
4455 
weekly_dataset_to_monthly(DATASET * dset,CompactMethod method)4456 static int weekly_dataset_to_monthly (DATASET *dset,
4457 				      CompactMethod method)
4458 {
4459     DATASET mset;
4460     int startyr = 1, endyr = 1;
4461     int startmon = 1, endmon = 1;
4462     int err = 0;
4463 
4464     mset.n = weeks_to_months_check(dset, &startyr, &endyr, &startmon, &endmon);
4465     fprintf(stderr, "Weekly data: found %d months\n", mset.n);
4466     if (mset.n <= 0) {
4467 	return E_DATA;
4468     }
4469 
4470     mset.v = dset->v;
4471     err = allocate_Z(&mset, 0);
4472     if (err) {
4473 	return err;
4474     }
4475 
4476     /* compact series */
4477     if (!err && dset->v > 1) {
4478 	err = weeks_to_months_exec(mset.Z, dset, method);
4479     }
4480 
4481     if (err) {
4482 	free_Z(&mset);
4483     } else {
4484 	free_Z(dset);
4485 	dset->Z = mset.Z;
4486 	dset->n = mset.n;
4487 	dset->pd = 12;
4488 	sprintf(dset->stobs, "%04d:%02d", startyr, startmon);
4489 	sprintf(dset->endobs, "%04d:%02d", endyr, endmon);
4490 	dset->sd0 = get_date_x(dset->pd, dset->stobs);
4491 	dset->t1 = 0;
4492 	dset->t2 = dset->n - 1;
4493     }
4494 
4495     return err;
4496 }
4497 
shorten_the_constant(double ** Z,int n)4498 static int shorten_the_constant (double **Z, int n)
4499 {
4500     double *tmp = realloc(Z[0], n * sizeof *tmp);
4501 
4502     if (tmp == NULL) {
4503 	return E_ALLOC;
4504     } else {
4505 	Z[0] = tmp;
4506 	return 0;
4507     }
4508 }
4509 
4510 /* conversion to weekly using a "representative day", e.g. use
4511    each Wednesday value: @repday is 0-based on Sunday.
4512 */
4513 
daily_dataset_to_weekly(DATASET * dset,int repday)4514 static int daily_dataset_to_weekly (DATASET *dset, int repday)
4515 {
4516     int y1, m1, d1;
4517     char obs[OBSLEN];
4518     double *x = NULL;
4519     double *tmp;
4520     int n = 0, n_ok = 0;
4521     int wday, ok;
4522     int i, t, err = 0;
4523 
4524     fprintf(stderr, "daily_dataset_to_weekly: repday = %d\n", repday);
4525 
4526     for (t=0; t<dset->n; t++) {
4527 	ntolabel(obs, t, dset);
4528 	wday = weekday_from_date(obs);
4529 	if (wday == repday) {
4530 	    ok = 0;
4531 	    for (i=1; i<dset->v; i++) {
4532 		if (!na(dset->Z[i][t])) {
4533 		    ok = 1;
4534 		    break;
4535 		}
4536 	    }
4537 	    if (ok) {
4538 		n_ok++;
4539 	    }
4540 	    if (n == 0) {
4541 		sscanf(obs, YMD_READ_FMT, &y1, &m1, &d1);
4542 	    }
4543 	    n++;
4544 	}
4545     }
4546 
4547     if (n_ok == 0) {
4548 	gretl_errmsg_set(_("Compacted dataset would be empty"));
4549 	return 1;
4550     }
4551 
4552     fprintf(stderr, "n=%d, n_ok=%d, y1=%d, m1=%d, d1=%d\n",
4553 	    n, n_ok, y1, m1, d1);
4554 
4555     x = malloc(n * sizeof *x);
4556     if (x == NULL) {
4557 	return E_ALLOC;
4558     }
4559 
4560     err = shorten_the_constant(dset->Z, n);
4561 
4562     for (i=1; i<dset->v && !err; i++) {
4563 	int s = 0;
4564 
4565 	for (t=0; t<dset->n; t++) {
4566 	    ntolabel(obs, t, dset);
4567 	    wday = weekday_from_date(obs);
4568 	    if (wday == repday) {
4569 		x[s++] = dset->Z[i][t];
4570 	    }
4571 	}
4572 	tmp = realloc(dset->Z[i], n * sizeof *tmp);
4573 	if (tmp == NULL) {
4574 	    err = E_ALLOC;
4575 	} else {
4576 	    dset->Z[i] = tmp;
4577 	    for (t=0; t<n; t++) {
4578 		dset->Z[i][t] = x[t];
4579 	    }
4580 	}
4581     }
4582 
4583     free(x);
4584 
4585     if (!err) {
4586 	dset->n = n;
4587 	dset->pd = 52;
4588 
4589 	sprintf(dset->stobs, YMD_WRITE_Y4_FMT, y1, m1, d1);
4590 	dset->sd0 = get_date_x(dset->pd, dset->stobs);
4591 	dset->t1 = 0;
4592 	dset->t2 = dset->n - 1;
4593 	ntolabel(dset->endobs, dset->t2, dset);
4594 
4595 	dataset_destroy_obs_markers(dset);
4596     }
4597 
4598     return err;
4599 }
4600 
daily_dataset_to_monthly(DATASET * dset,CompactMethod default_method)4601 static int daily_dataset_to_monthly (DATASET *dset,
4602 				     CompactMethod default_method)
4603 {
4604     int nm, startyr, startmon, endyr, endmon;
4605     int offset, any_eop;
4606     CompactMethod method;
4607     double *x;
4608     int i, err = 0;
4609 
4610     nm = get_n_ok_months(dset, default_method, &startyr, &startmon,
4611 			 &endyr, &endmon, &offset, &any_eop);
4612 
4613     if (nm <= 0) {
4614 	gretl_errmsg_set(_("Compacted dataset would be empty"));
4615 	return E_DATA;
4616     }
4617 
4618     err = shorten_the_constant(dset->Z, nm);
4619 
4620     for (i=1; i<dset->v && !err; i++) {
4621 	method = series_get_compact_method(dset, i);
4622 	if (method == COMPACT_NONE) {
4623 	    method = default_method;
4624 	}
4625 
4626 	x = daily_series_to_monthly(dset, i, nm,
4627 				    startyr, startmon,
4628 				    offset, any_eop, method);
4629 	if (x == NULL) {
4630 	    err = E_ALLOC;
4631 	} else {
4632 	    free(dset->Z[i]);
4633 	    dset->Z[i] = x;
4634 	}
4635     }
4636 
4637     if (!err) {
4638 	dset->n = nm;
4639 	dset->pd = 12;
4640 	sprintf(dset->stobs, "%04d:%02d", startyr, startmon);
4641 	sprintf(dset->endobs, "%04d:%02d", endyr, endmon);
4642 	dset->sd0 = get_date_x(dset->pd, dset->stobs);
4643 	dset->t1 = 0;
4644 	dset->t2 = dset->n - 1;
4645 
4646 	dataset_destroy_obs_markers(dset);
4647     }
4648 
4649     return err;
4650 }
4651 
get_daily_skip(const DATASET * dset,int t)4652 static int get_daily_skip (const DATASET *dset, int t)
4653 {
4654     int dd = calendar_obs_number(dset->S[t], dset) -
4655 	calendar_obs_number(dset->S[t-1], dset);
4656 
4657     if (dd == 0) {
4658 	fprintf(stderr, "get_daily_skip: S[%d] = '%s', S[%d] = '%s'\n",
4659 		t, dset->S[t], t-1, dset->S[t-1]);
4660     }
4661 
4662     return dd - 1;
4663 }
4664 
insert_missing_hidden_obs(DATASET * dset,int nmiss)4665 static int insert_missing_hidden_obs (DATASET *dset, int nmiss)
4666 {
4667     int oldn = dset->n;
4668     double *tmp, **Z;
4669     int i, s, t, skip;
4670     int err = 0;
4671 
4672     err = dataset_add_observations(dset, nmiss, OPT_NONE);
4673     if (err) {
4674 	return err;
4675     }
4676 
4677 #if DB_DEBUG
4678     fprintf(stderr, "daily data: expanded n from %d to %d\n",
4679 	    oldn, dset->n);
4680 #endif
4681 
4682     Z = dset->Z;
4683     tmp = Z[0];
4684 
4685     for (i=1; i<dset->v && !err; i++) {
4686 	for (s=0; s<oldn; s++) {
4687 	    tmp[s] = Z[i][s];
4688 	}
4689 
4690 	Z[i][0] = tmp[0];
4691 	t = 1;
4692 	for (s=1; s<oldn; s++) {
4693 	    skip = get_daily_skip(dset, s);
4694 	    if (skip < 0) {
4695 		err = E_DATA;
4696 		break;
4697 	    }
4698 	    while (skip--) {
4699 		Z[i][t++] = NADBL;
4700 	    }
4701 	    Z[i][t++] = tmp[s];
4702 	}
4703     }
4704 
4705     for (t=0; t<dset->n; t++) {
4706 	Z[0][t] = 1.0;
4707 	if (dset->S != NULL) {
4708 	    calendar_date_string(dset->S[t], t, dset);
4709 	}
4710     }
4711 
4712     if (!err) {
4713 	dset->t2 = dset->n - 1;
4714 	ntolabel(dset->endobs, dset->n - 1, dset);
4715     }
4716 
4717 #if DB_DEBUG > 1
4718     fprintf(stderr, "insert_missing_hidden_obs, done, err = %d\n", err);
4719     for (t=0; t<dset->n; t++) {
4720 	fprintf(stderr, "Z[1][%d] = %14g\n", t, Z[1][t]);
4721     }
4722 #endif
4723 
4724     return err;
4725 }
4726 
maybe_expand_daily_data(DATASET * dset)4727 int maybe_expand_daily_data (DATASET *dset)
4728 {
4729     int nmiss = n_hidden_missing_obs(dset, 0, dset->n - 1);
4730     int err = 0;
4731 
4732     fprintf(stderr, "n_hidden_missing_obs: nmiss = %d\n", nmiss);
4733 
4734     if (nmiss < 0) {
4735 	err = 1;
4736     } else if (nmiss > 0) {
4737 	err = insert_missing_hidden_obs(dset, nmiss);
4738     }
4739 
4740     return err;
4741 }
4742 
do_compact_spread(DATASET * dset,int newpd)4743 static int do_compact_spread (DATASET *dset, int newpd)
4744 {
4745     DATASET *cset = NULL;
4746     int nv = 0;
4747     int err;
4748 
4749     err = compact_spread_pd_check(dset->pd, newpd);
4750     if (err) {
4751 	return err;
4752     }
4753 
4754     if (dated_daily_data(dset)) {
4755 	err = maybe_expand_daily_data(dset);
4756 	if (err) {
4757 	    gretl_errmsg_set("Error expanding daily data with missing observations");
4758 	} else {
4759 	    cset = compact_daily_spread(dset, newpd, &nv, &err);
4760 	}
4761     } else {
4762 	int startmaj, startmin;
4763 	int endmaj, endmin;
4764 
4765 
4766 	/* get starting obs major and minor components */
4767 	if (!get_obs_maj_min(dset->stobs, &startmaj, &startmin)) {
4768 	    return E_DATA;
4769 	}
4770 
4771 	/* get ending obs major and minor components */
4772 	if (!get_obs_maj_min(dset->endobs, &endmaj, &endmin)) {
4773 	    return E_DATA;
4774 	}
4775 
4776 	cset = compact_data_spread(dset, newpd, startmaj, startmin,
4777 				   endmaj, endmin, &nv, &err);
4778     }
4779 
4780     if (!err) {
4781 	free_Z(dset);
4782 	clear_datainfo(dset, CLEAR_FULL);
4783 	*dset = *cset;
4784 	free(cset);
4785     }
4786 
4787     return err;
4788 }
4789 
4790 /**
4791  * compact_data_set:
4792  * @dset: dataset struct.
4793  * @newpd: target data frequency.
4794  * @default_method: code for the default compaction method.
4795  * @monstart: if non-zero, take Monday rather than Sunday as
4796  * the "start of the week" (only relevant for 7-day daily
4797  * data).
4798  * @repday: "representative day" for conversion from daily
4799  * to weekly data (with method %COMPACT_WDAY only).
4800  *
4801  * Compact the data set from higher to lower frequency.
4802  *
4803  * Returns: 0 on success, non-zero error code on failure.
4804  */
4805 
compact_data_set(DATASET * dset,int newpd,CompactMethod default_method,int monstart,int repday)4806 int compact_data_set (DATASET *dset, int newpd,
4807 		      CompactMethod default_method,
4808 		      int monstart, int repday)
4809 {
4810     int newn, oldn = dset->n, oldpd = dset->pd;
4811     int compfac;
4812     int startmaj, startmin;
4813     int endmaj, endmin;
4814     int any_eop, all_same;
4815     int min_startskip = 0;
4816     char stobs[OBSLEN];
4817     int i, err = 0;
4818 
4819     gretl_error_clear();
4820 
4821     if (default_method == COMPACT_SPREAD) {
4822 	return do_compact_spread(dset, newpd);
4823     }
4824 
4825     if (oldpd == 52) {
4826 	return weekly_dataset_to_monthly(dset, default_method);
4827     }
4828 
4829     if (dated_daily_data(dset)) {
4830 	/* allow for the possibility that the daily dataset
4831 	   contains "hidden" or suppressed missing observations
4832 	   (holidays are just skipped, not marked as NA)
4833 	*/
4834 	err = maybe_expand_daily_data(dset);
4835 	if (err) {
4836 	    gretl_errmsg_set("Error expanding daily data with missing observations");
4837 	    return err;
4838 	} else {
4839 	    oldn = dset->n;
4840 	}
4841     }
4842 
4843     if (newpd == 52 && oldpd >= 5 && oldpd <= 7 &&
4844 	default_method == COMPACT_WDAY) {
4845 	/* daily to weekly, using "representative day" */
4846 	return daily_dataset_to_weekly(dset, repday);
4847     } else if (newpd == 12 && oldpd >= 5 && oldpd <= 7) {
4848 	/* daily to monthly: special */
4849 	return daily_dataset_to_monthly(dset, default_method);
4850     } else if (oldpd >= 5 && oldpd <= 7) {
4851 	/* daily to weekly */
4852 	compfac = oldpd;
4853 	if (dated_daily_data(dset)) {
4854 	    startmin = weekday_from_date(dset->stobs);
4855 	    if (oldpd == 7) {
4856 		if (monstart) {
4857 		    if (startmin == 0) startmin = 7;
4858 		} else {
4859 		    startmin++;
4860 		}
4861 	    }
4862 	} else {
4863 	    startmin = 1;
4864 	}
4865     } else if (oldpd == 24 && newpd >= 5 && newpd <= 7) {
4866 	/* hourly to daily */
4867 	compfac = 24;
4868 	if (!get_obs_maj_min(dset->stobs, &startmaj, &startmin)) {
4869 	    return 1;
4870 	}
4871     } else {
4872 	compfac = oldpd / newpd;
4873 	/* get starting obs major and minor components */
4874 	if (!get_obs_maj_min(dset->stobs, &startmaj, &startmin)) {
4875 	    return 1;
4876 	}
4877 	/* get ending obs major and minor components */
4878 	if (!get_obs_maj_min(dset->endobs, &endmaj, &endmin)) {
4879 	    return 1;
4880 	}
4881     }
4882 
4883     min_startskip = oldpd;
4884     newn = 0;
4885     any_eop = 0;
4886     all_same = 1;
4887     get_global_compact_params(compfac, startmin, endmin, default_method,
4888 			      &min_startskip, &newn, &any_eop, &all_same,
4889 			      dset);
4890 
4891     if (newn == 0 && default_method != COMPACT_SPREAD) {
4892 	gretl_errmsg_set(_("Compacted dataset would be empty"));
4893 	return 1;
4894     }
4895 
4896     if (newpd == 1) {
4897 	if (min_startskip > 0 && !any_eop) {
4898 	    startmaj++;
4899 	}
4900 	sprintf(stobs, "%d", startmaj);
4901     } else if (newpd == 52) {
4902 	if (oldpd >= 5 && oldpd <= 7 && dset->S != NULL) {
4903 	    strcpy(stobs, dset->S[min_startskip]);
4904 	} else {
4905 	    strcpy(stobs, "1");
4906 	}
4907     } else {
4908 	int m0 = startmin + min_startskip;
4909 	int minor = m0 / compfac + (m0 % compfac > 0);
4910 
4911 	if (minor > newpd) {
4912 	    startmaj++;
4913 	    minor -= newpd;
4914 	}
4915 	format_obs(stobs, startmaj, minor, newpd);
4916     }
4917 
4918     /* revise datainfo members */
4919     strcpy(dset->stobs, stobs);
4920     dset->pd = newpd;
4921     dset->n = newn;
4922     dset->sd0 = get_date_x(dset->pd, dset->stobs);
4923     dset->t1 = 0;
4924     dset->t2 = dset->n - 1;
4925     ntolabel(dset->endobs, dset->t2, dset);
4926 
4927     if (oldpd >= 5 && oldpd <= 7 && dset->markers) {
4928 	/* remove any daily date strings; revise endobs */
4929 	dataset_destroy_obs_markers(dset);
4930 	ntolabel(dset->endobs, dset->t2, dset);
4931     }
4932 
4933     err = shorten_the_constant(dset->Z, dset->n);
4934 
4935     /* compact the individual data series */
4936     for (i=1; i<dset->v && !err; i++) {
4937 	CompactMethod this_method = default_method;
4938 	int startskip = min_startskip;
4939 	double *x;
4940 
4941 	if (!all_same) {
4942 	    CompactMethod m_i = series_get_compact_method(dset, i);
4943 
4944 	    if (m_i != COMPACT_NONE) {
4945 		this_method = m_i;
4946 	    }
4947 
4948 	    startskip = compfac - (startmin % compfac) + 1;
4949 	    startskip = startskip % compfac;
4950 	    if (this_method == COMPACT_EOP) {
4951 		if (startskip > 0) {
4952 		    startskip--;
4953 		} else {
4954 		    startskip = compfac - 1;
4955 		}
4956 	    }
4957 	}
4958 
4959 	x = compact_series(dset, i, oldn, startskip, min_startskip,
4960 			   compfac, this_method);
4961 	if (x == NULL) {
4962 	    err = E_ALLOC;
4963 	} else {
4964 	    free(dset->Z[i]);
4965 	    dset->Z[i] = x;
4966 	}
4967     }
4968 
4969     return err;
4970 }
4971 
4972 /**
4973  * expand_data_set:
4974  * @dset: dataset struct.
4975  * @newpd: target data frequency.
4976  *
4977  * Expand the data set from lower to higher frequency: an "expert"
4978  * option.  This is supported only for expansion from annual
4979  * to quarterly or monthly, or from quarterly to monthly.
4980  *
4981  * Returns: 0 on success, non-zero error code on failure.
4982  */
4983 
expand_data_set(DATASET * dset,int newpd)4984 int expand_data_set (DATASET *dset, int newpd)
4985 {
4986     char stobs[OBSLEN];
4987     int oldn = dset->n;
4988     int oldpd = dset->pd;
4989     int t1 = dset->t1;
4990     int t2 = dset->t2;
4991     int mult, newn, nadd;
4992     double *x = NULL;
4993     size_t sz;
4994     int i, j, t, s;
4995     int err = 0;
4996 
4997     if (oldpd != 1 && oldpd != 4) {
4998 	return E_PDWRONG;
4999     } else if (oldpd == 1 && newpd != 4 && newpd != 12) {
5000 	return E_DATA;
5001     } else if (oldpd == 4 && newpd != 12) {
5002 	return E_DATA;
5003     }
5004 
5005     x = malloc(oldn * sizeof *x);
5006     if (x == NULL) {
5007 	return E_ALLOC;
5008     }
5009 
5010     mult = newpd / oldpd;  /* frequency increase factor */
5011     newn = mult * dset->n; /* revised number of observations */
5012     nadd = newn - oldn;    /* number of obs to add */
5013 
5014     err = dataset_add_observations(dset, nadd, OPT_D);
5015     if (err) {
5016 	goto bailout;
5017     }
5018 
5019     sz = oldn * sizeof *x;
5020     for (i=1; i<dset->v; i++) {
5021 	memcpy(x, dset->Z[i], sz);
5022 	s = 0;
5023 	for (t=0; t<oldn; t++) {
5024 	    for (j=0; j<mult; j++) {
5025 		dset->Z[i][s++] = x[t];
5026 	    }
5027 	}
5028 	series_set_orig_pd(dset, i, oldpd);
5029     }
5030 
5031     if (dset->pd == 1) {
5032 	/* starting with annual data */
5033 	strcpy(stobs, dset->stobs);
5034 	if (newpd == 4) {
5035 	    strcat(stobs, ":1");
5036 	} else {
5037 	    strcat(stobs, ":01");
5038 	}
5039     } else {
5040 	/* starting with quarterly data */
5041 	int yr, qtr, mo;
5042 
5043 	sscanf(dset->stobs, "%d:%d", &yr, &qtr);
5044 	mo = (qtr - 1) * 3 + 1;
5045 	sprintf(stobs, "%d:%02d", yr, mo);
5046     }
5047 
5048     /* revise the sample range, if set */
5049     if (dset->t1 > 0) {
5050 	dset->t1 *= mult;
5051     }
5052     if (dset->t2 < oldn - 1) {
5053 	dset->t2 = dset->t1 + (t2 - t1 + 1) * mult - 1;
5054     }
5055 
5056     strcpy(dset->stobs, stobs);
5057     dset->pd = newpd;
5058     dset->sd0 = get_date_x(dset->pd, dset->stobs);
5059     ntolabel(dset->endobs, dset->n - 1, dset);
5060 
5061  bailout:
5062 
5063     free(x);
5064 
5065     return err;
5066 }
5067