1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 /* dbread.c for gretl */
21
22 #include "libgretl.h"
23 #include "swap_bytes.h"
24 #include "libset.h"
25 #include "uservar.h"
26 #include "matrix_extra.h"
27 #include "usermat.h"
28 #include "dbread.h"
29 #include "gretl_midas.h"
30 #include "gretl_typemap.h"
31 #ifdef USE_CURL
32 # include "gretl_www.h"
33 #endif
34
35 #include <glib.h>
36 #include <unistd.h>
37 #include <errno.h>
38
39 #if G_BYTE_ORDER == G_BIG_ENDIAN
40 # include <netinet/in.h>
41 #endif
42
43 /**
44 * SECTION:dbread
45 * @short_description: reading from databases
46 * @title: DB read
47 * @include: gretl/libgretl.h, gretl/dbread.h
48 *
49 * Functions that read data from native gretl databases as
50 * well as RATS 4.0 and PcGive databases. As you will see,
51 * this area is mostly undocumented at present, but since it
52 * may ultimately be useful for third-party coders we will
53 * try to remedy this!
54 */
55
56 #define DB_DEBUG 0
57
58 #define RECNUM gint32
59 #define NAMELENGTH 16
60 #define RATSCOMMENTLENGTH 80
61 #define RATSCOMMENTS 2
62 #define RATS_PARSE_ERROR -999
63
64 typedef struct {
65 gint32 daynumber; /* Number of days from 1-1-90
66 to year, month, day */
67 short panel; /* 1 for panel set, 2 for intraday
68 date set , 0 o.w. */
69 #define LINEAR 0 /* Single time direction */
70 #define RATSPANEL 1 /* panel:period */
71 #define INTRADAY 2 /* date:intraday period */
72 gint32 panelrecord; /* Size of panel or
73 number of periods per day */
74 short dclass; /* See definitions below */
75 #define UNDATEDCLASS 0 /* No time series properties */
76 #define IRREGULARCLASS 1 /* Time series (irregular) */
77 #define PERYEARCLASS 2 /* x periods / year */
78 #define PERWEEKCLASS 3 /* x periods / week */
79 #define DAILYCLASS 4 /* x days / period */
80 gint32 info; /* Number of periods per year or
81 per week */
82 short digits; /* Digits for representing panel
83 or intraday period */
84 short year,month,day; /* Starting year, month, day */
85 } DATEINFO;
86
87 typedef struct {
88 RECNUM back_point; /* Pointer to previous series */
89 RECNUM forward_point; /* Pointer to next series */
90 short back_class; /* Reserved. Should be 0 */
91 short forward_class; /* Reserved. Should be 0 */
92 RECNUM first_data; /* First data record */
93 char series_name[NAMELENGTH]; /* Series name */
94 DATEINFO date_info; /* Dating scheme for this series */
95 gint32 datapoints; /* Number of data points */
96 short data_type; /* real, char, complex.
97 Reserved. Should be 0 */
98 short digits; /* . + digit count for representation
99 (0 = unspecified) */
100 short misc1; /* For future expansion should be 0 */
101 short misc2;
102 short comment_lines; /* Number of comment lines (0,1,2) */
103 char series_class[NAMELENGTH]; /* Series class. Not used, blank */
104 char comments[RATSCOMMENTS][RATSCOMMENTLENGTH];
105 char pad[10];
106 } RATSDirect;
107
108 typedef struct {
109 RECNUM back_point; /* Previous record (0 for first) */
110 RECNUM forward_point; /* Next record (0 for last) */
111 double data[31]; /* Data */
112 } RATSData;
113
114 static char saved_db_name[MAXLEN];
115 static int saved_db_type;
116
117 #if G_BYTE_ORDER == G_BIG_ENDIAN
retrieve_float(netfloat nf)118 float retrieve_float (netfloat nf)
119 {
120 short exp = ntohs(nf.exp);
121 long frac = ntohl(nf.frac);
122 double receive = frac / 10e6;
123
124 return ldexp(receive, exp);
125 }
126 #endif
127
128 static int lib_add_db_data (double **dbZ, SERIESINFO *sinfo,
129 DATASET *dset, CompactMethod cmethod,
130 int dbv, PRN *prn);
131
132 static int do_compact_spread (DATASET *dset, int newpd);
133
open_binfile(const char * dbbase,int code,int offset,int * err)134 static FILE *open_binfile (const char *dbbase, int code, int offset, int *err)
135 {
136 char dbbin[MAXLEN];
137 FILE *fp = NULL;
138
139 strcpy(dbbin, dbbase);
140 if (code == GRETL_NATIVE_DB) {
141 if (strstr(dbbin, ".bin") == NULL) {
142 strcat(dbbin, ".bin");
143 }
144 } else {
145 if (strstr(dbbin, ".bn7") == NULL) {
146 strcat(dbbin, ".bn7");
147 }
148 }
149
150 fp = gretl_fopen(dbbin, "rb");
151
152 if (fp == NULL) {
153 *err = E_FOPEN;
154 } else if (fseek(fp, (long) offset, SEEK_SET)) {
155 *err = DB_PARSE_ERROR;
156 fclose(fp);
157 fp = NULL;
158 }
159
160 return fp;
161 }
162
163 /**
164 * get_native_db_data:
165 * @dbbase:
166 * @sinfo:
167 * @Z: data array.
168 *
169 * Returns: 0 on success, non-zero code on failure.
170 */
171
get_native_db_data(const char * dbbase,SERIESINFO * sinfo,double ** Z)172 int get_native_db_data (const char *dbbase, SERIESINFO *sinfo,
173 double **Z)
174 {
175 char numstr[32];
176 FILE *fp;
177 dbnumber x;
178 int v = sinfo->v;
179 int t, t2, err = 0;
180
181 fp = open_binfile(dbbase, GRETL_NATIVE_DB, sinfo->offset, &err);
182 if (err) {
183 return err;
184 }
185
186 t2 = (sinfo->t2 > 0)? sinfo->t2 : sinfo->nobs - 1;
187
188 for (t=sinfo->t1; t<=t2 && !err; t++) {
189 if (fread(&x, sizeof x, 1, fp) != 1) {
190 err = DB_PARSE_ERROR;
191 } else {
192 sprintf(numstr, "%.7g", (double) x); /* N.B. converting a float */
193 Z[v][t] = atof(numstr);
194 if (Z[v][t] == DBNA) {
195 Z[v][t] = NADBL;
196 }
197 }
198 }
199
200 fclose(fp);
201
202 return err;
203 }
204
205 #ifdef USE_CURL
206
207 /**
208 * get_remote_db_data:
209 * @dbbase:
210 * @sinfo:
211 * @Z: data array.
212 *
213 * Returns: 0 on success, non-zero code on failure.
214 */
215
get_remote_db_data(const char * dbbase,SERIESINFO * sinfo,double ** Z)216 int get_remote_db_data (const char *dbbase, SERIESINFO *sinfo,
217 double **Z)
218 {
219 char *getbuf = NULL;
220 int t, t2, err;
221 int v = sinfo->v;
222 dbnumber x;
223 size_t offset;
224 #if G_BYTE_ORDER == G_BIG_ENDIAN
225 netfloat nf;
226 #endif
227
228 #if G_BYTE_ORDER == G_BIG_ENDIAN
229 err = retrieve_remote_db_data(dbbase, sinfo->varname, &getbuf,
230 GRAB_NBO_DATA);
231 #else
232 err = retrieve_remote_db_data(dbbase, sinfo->varname, &getbuf,
233 GRAB_DATA);
234 #endif
235
236 if (err) {
237 free(getbuf);
238 return E_FOPEN;
239 }
240
241 t2 = (sinfo->t2 > 0)? sinfo->t2 : sinfo->nobs - 1;
242
243 offset = 0L;
244 for (t=sinfo->t1; t<=t2; t++) {
245 #if G_BYTE_ORDER == G_BIG_ENDIAN
246 /* go via network byte order */
247 memcpy(&(nf.frac), getbuf + offset, sizeof nf.frac);
248 offset += sizeof nf.frac;
249 memcpy(&(nf.exp), getbuf + offset, sizeof nf.exp);
250 offset += sizeof nf.exp;
251 x = retrieve_float(nf);
252 #else
253 /* just read floats */
254 memcpy(&x, getbuf + offset, sizeof x);
255 offset += sizeof x;
256 #endif
257 Z[v][t] = (x == DBNA)? NADBL : x;
258 }
259
260 free(getbuf);
261
262 return 0;
263 }
264
265 #endif /* USE_CURL */
266
267 /**
268 * get_pcgive_db_data:
269 * @dbbase:
270 * @sinfo:
271 * @Z: data array.
272 *
273 *
274 * Returns: 0 on success, non-zero code on failure.
275 */
276
get_pcgive_db_data(const char * dbbase,SERIESINFO * sinfo,double ** Z)277 int get_pcgive_db_data (const char *dbbase, SERIESINFO *sinfo,
278 double **Z)
279 {
280 FILE *fp;
281 double x;
282 int v = sinfo->v;
283 int t, t2, err = 0;
284
285 fp = open_binfile(dbbase, GRETL_PCGIVE_DB, sinfo->offset, &err);
286 if (err) {
287 return err;
288 }
289
290 t2 = (sinfo->t2 > 0)? sinfo->t2 : sinfo->nobs - 1;
291
292 for (t=sinfo->t1; t<=t2; t++) {
293 if (fread(&x, sizeof x, 1, fp) != 1) {
294 err = E_DATA;
295 break;
296 }
297 #if G_BYTE_ORDER == G_BIG_ENDIAN
298 reverse_double(x);
299 #endif
300 if (x == -9999.99 || isnan(x)) {
301 Z[v][t] = NADBL;
302 err = DB_MISSING_DATA;
303 } else {
304 Z[v][t] = x;
305 }
306 }
307
308 fclose(fp);
309
310 return err;
311 }
312
get_native_series_comment(SERIESINFO * sinfo,const char * s)313 static void get_native_series_comment (SERIESINFO *sinfo, const char *s)
314 {
315 s += strcspn(s, " "); /* skip varname */
316 s += strspn(s, " "); /* skip space */
317
318 series_info_set_description(sinfo, s);
319 tailstrip(sinfo->descrip);
320 }
321
get_native_series_pd(SERIESINFO * sinfo,char pdc)322 static int get_native_series_pd (SERIESINFO *sinfo, char pdc)
323 {
324 sinfo->pd = 1;
325 sinfo->undated = 0;
326
327 if (pdc == 'M') sinfo->pd = 12;
328 else if (pdc == 'Q') sinfo->pd = 4;
329 else if (pdc == 'B') sinfo->pd = 5;
330 else if (pdc == 'S') sinfo->pd = 6;
331 else if (pdc == 'D') sinfo->pd = 7;
332 else if (pdc == 'U') sinfo->undated = 1;
333 else return 1;
334
335 return 0;
336 }
337
get_native_series_obs(SERIESINFO * sinfo,const char * stobs,const char * endobs)338 static int get_native_series_obs (SERIESINFO *sinfo,
339 const char *stobs,
340 const char *endobs)
341 {
342 char dc = 0;
343
344 if (strchr(stobs, '-')) {
345 dc = '-';
346 } else if (strchr(stobs, '/')) {
347 dc = '/';
348 }
349
350 if (dc != 0) {
351 /* calendar data */
352 const char *q = stobs;
353 const char *p = strchr(stobs, dc);
354
355 if (p - q == 4) {
356 strcpy(sinfo->stobs, q);
357 }
358 q = endobs;
359 p = strchr(endobs, dc);
360 if (p && p - q == 4) {
361 strcpy(sinfo->endobs, q);
362 }
363 } else {
364 *sinfo->stobs = '\0';
365 *sinfo->endobs = '\0';
366 strncat(sinfo->stobs, stobs, OBSLEN-1);
367 strncat(sinfo->endobs, endobs, OBSLEN-1);
368 }
369
370 return 0;
371 }
372
373 static int
open_native_db_files(const char * dname,FILE ** f1,char * name1,FILE ** f2,char * name2)374 open_native_db_files (const char *dname, FILE **f1, char *name1,
375 FILE **f2, char *name2)
376 {
377 char dbbase[FILENAME_MAX];
378 char fname[FILENAME_MAX];
379 FILE *fidx = NULL, *fbin = NULL;
380 int err = 0;
381
382 if (dname != NULL) {
383 strcpy(dbbase, dname);
384 } else {
385 strcpy(dbbase, saved_db_name);
386 }
387
388 if (has_suffix(dbbase, ".bin")) {
389 dbbase[strlen(dbbase) - 4] = '\0';
390 }
391
392 if (f1 != NULL) {
393 strcpy(fname, dbbase);
394 strcat(fname, ".idx");
395
396 if (name1 != NULL) {
397 err = gretl_write_access(fname);
398 if (!err) {
399 strcpy(name1, fname);
400 }
401 }
402
403 if (!err) {
404 fidx = gretl_fopen(fname, "r");
405 if (fidx == NULL) {
406 gretl_errmsg_set(_("Couldn't open database index file"));
407 err = E_FOPEN;
408 }
409 }
410 }
411
412 if (f2 != NULL && !err) {
413 strcpy(fname, dbbase);
414 strcat(fname, ".bin");
415
416 if (name2 != NULL) {
417 err = gretl_write_access(fname);
418 if (!err) {
419 strcpy(name2, fname);
420 }
421 }
422
423 if (!err) {
424 fbin = gretl_fopen(fname, "rb");
425 if (fbin == NULL) {
426 gretl_errmsg_set(_("Couldn't open database binary file"));
427 err = E_FOPEN;
428 }
429 }
430 }
431
432 if (err) {
433 if (fidx != NULL) {
434 fclose(fidx);
435 }
436 } else {
437 if (f1 != NULL) {
438 *f1 = fidx;
439 }
440 if (f2 != NULL) {
441 *f2 = fbin;
442 }
443 }
444
445 return err;
446 }
447
native_db_index_name(void)448 static char *native_db_index_name (void)
449 {
450 char *fname;
451
452 if (has_suffix(saved_db_name, ".bin")) {
453 fname = g_strdup(saved_db_name);
454 strcpy(fname + strlen(fname) - 3, "idx");
455 } else {
456 fname = g_strdup_printf("%s.idx", saved_db_name);
457 }
458
459 return fname;
460 }
461
db_match_glob(FILE * fp,char * line,int linelen,GPatternSpec * pspec,char ** S,int * err)462 static int db_match_glob (FILE *fp,
463 char *line, int linelen,
464 GPatternSpec *pspec,
465 char **S, int *err)
466 {
467 char vname[VNAMELEN], l2[72];
468 int n = 0;
469
470 while (fgets(line, linelen, fp) && !*err) {
471 if (*line == '#') {
472 continue;
473 }
474 if (gretl_scan_varname(line, vname) != 1) {
475 break;
476 }
477 if (g_pattern_match_string(pspec, vname)) {
478 if (S != NULL) {
479 S[n] = gretl_strdup(vname);
480 }
481 n++;
482 }
483 if (fgets(l2, sizeof l2, fp) == NULL) {
484 *err = DB_PARSE_ERROR;
485 }
486 }
487
488 return n;
489 }
490
native_db_match_series(const char * glob,int * nmatch,const char * idxname,int * err)491 static char **native_db_match_series (const char *glob, int *nmatch,
492 const char *idxname, int *err)
493 {
494 GPatternSpec *pspec;
495 char **S = NULL;
496 FILE *fp = NULL;
497 char line[256];
498
499 fp = gretl_fopen(idxname, "rb");
500 if (fp == NULL) {
501 *err = E_FOPEN;
502 *nmatch = 0;
503 return NULL;
504 }
505
506 pspec = g_pattern_spec_new(glob);
507
508 *nmatch = db_match_glob(fp, line, sizeof line, pspec, NULL, err);
509
510 if (!*err && *nmatch > 0) {
511 S = strings_array_new(*nmatch);
512 if (S == NULL) {
513 *nmatch = 0;
514 *err = E_ALLOC;
515 } else {
516 rewind(fp);
517 db_match_glob(fp, line, sizeof line, pspec, S, err);
518 }
519 }
520
521 g_pattern_spec_free(pspec);
522 fclose(fp);
523
524 return S;
525 }
526
get_native_series_info(const char * series,SERIESINFO * sinfo,const char * idxname)527 static int get_native_series_info (const char *series,
528 SERIESINFO *sinfo,
529 const char *idxname)
530 {
531 FILE *fp = NULL;
532 char sername[VNAMELEN];
533 /* 2019-01-08: enlarge @s1 from 256 to 1024 */
534 char s1[1024], s2[72];
535 char stobs[OBSLEN], endobs[OBSLEN];
536 char pdc;
537 int offset = 0;
538 int gotit = 0, err = 0;
539 int n;
540
541 fp = gretl_fopen(idxname, "rb");
542 if (fp == NULL) {
543 return E_FOPEN;
544 }
545
546 while (fgets(s1, sizeof s1, fp) && !gotit) {
547 if (*s1 == '#') {
548 continue;
549 }
550 if (gretl_scan_varname(s1, sername) != 1) {
551 break;
552 }
553 if (!strcmp(series, sername)) {
554 gotit = 1;
555 strcpy(sinfo->varname, sername);
556 }
557 if (fgets(s2, sizeof s2, fp) == NULL) {
558 err = DB_PARSE_ERROR;
559 break;
560 }
561 if (gotit) {
562 get_native_series_comment(sinfo, s1);
563 if (sscanf(s2, "%c %10s %*s %10s %*s %*s %d",
564 &pdc, stobs, endobs, &sinfo->nobs) != 4) {
565 gretl_errmsg_set(_("Failed to parse series information"));
566 err = DB_PARSE_ERROR;
567 } else {
568 get_native_series_pd(sinfo, pdc);
569 get_native_series_obs(sinfo, stobs, endobs);
570 sinfo->offset = offset;
571 sinfo->t2 = sinfo->nobs - 1;
572 }
573 } else {
574 if (sscanf(s2, "%*c %*s %*s %*s %*s %*s %d", &n) != 1) {
575 gretl_errmsg_set(_("Failed to parse series information"));
576 err = DB_PARSE_ERROR;
577 } else {
578 offset += n * sizeof(dbnumber);
579 }
580 }
581 }
582
583 fclose(fp);
584
585 if (!err && !gotit) {
586 gretl_errmsg_sprintf(_("Series not found, '%s'"), series);
587 err = DB_NO_SUCH_SERIES;
588 }
589
590 return err;
591 }
592
593 #ifdef USE_CURL
594
remote_db_index_to_file(const char * fname)595 static int remote_db_index_to_file (const char *fname)
596 {
597 char *buf = NULL;
598 int err;
599
600 err = retrieve_remote_db_index(saved_db_name, &buf);
601
602 if (!err) {
603 FILE *fp = gretl_fopen(fname, "wb");
604
605 if (fp == NULL) {
606 err = E_FOPEN;
607 } else {
608 fputs(buf, fp);
609 fclose(fp);
610 #if 1
611 fprintf(stderr, "remote db index saved\n");
612 #endif
613 }
614 free(buf);
615 }
616
617 return err;
618 }
619
620 #endif /* USE_CURL */
621
in7_get_obs(int y0,int p0,int y1,int p1,SERIESINFO * sinfo)622 static int in7_get_obs (int y0, int p0, int y1, int p1,
623 SERIESINFO *sinfo)
624 {
625 int pd = sinfo->pd;
626 int n = (y1 - y0 + 1) * pd - (p0 - 1) - (pd - p1);
627 int err = 0;
628
629 if (n <= 0) {
630 err = 1;
631 } else {
632 sinfo->nobs = n;
633 sinfo->t2 = n - 1;
634 }
635
636 return err;
637 }
638
639 static int
pcgive_set_stobs_endobs(int y0,int p0,int y1,int p1,SERIESINFO * sinfo)640 pcgive_set_stobs_endobs (int y0, int p0, int y1, int p1,
641 SERIESINFO *sinfo)
642 {
643 int err = 0;
644
645 if (sinfo->pd == 1) {
646 sprintf(sinfo->stobs, "%d", y0);
647 sprintf(sinfo->endobs, "%d", y1);
648 if (y0 == 1) {
649 sinfo->undated = 1;
650 }
651 } else if (sinfo->pd == 4) {
652 sprintf(sinfo->stobs, "%d:%d", y0, p0);
653 sprintf(sinfo->endobs, "%d:%d", y1, p1);
654 } else if (sinfo->pd == 12 || sinfo->pd == 52) {
655 sprintf(sinfo->stobs, "%d:%02d", y0, p0);
656 sprintf(sinfo->endobs, "%d:%02d", y1, p1);
657 } else {
658 err = E_DATA; /* FIXME? */
659 }
660
661 return err;
662 }
663
664 static int
get_pcgive_series_info(const char * series,SERIESINFO * sinfo)665 get_pcgive_series_info (const char *series, SERIESINFO *sinfo)
666 {
667 FILE *fp;
668 char dbidx[MAXLEN];
669 char line[1024];
670 char fmt[24];
671 char *p;
672 int y0, p0, y1, p1;
673 int nf, gotit = 0;
674 int err = 0;
675
676 strcpy(dbidx, saved_db_name);
677 p = strstr(dbidx, ".bn7");
678 if (p != NULL) {
679 strcpy(p, ".in7");
680 } else {
681 strcat(dbidx, ".in7");
682 }
683
684 #if DB_DEBUG
685 fprintf(stderr, "get_pcgive_series_info: dbidx = '%s'\n", dbidx);
686 #endif
687
688 fp = gretl_fopen(dbidx, "r");
689 if (fp == NULL) {
690 gretl_errmsg_set(_("Couldn't open database index file"));
691 return E_FOPEN;
692 }
693
694 sprintf(fmt, "%%%ds %%d %%d %%d %%d %%d %%d", VNAMELEN - 1);
695
696 while (fgets(line, sizeof line, fp) && !gotit) {
697 if (*line == '>') {
698 *sinfo->varname = 0;
699 nf = sscanf(line + 1, fmt, sinfo->varname, &y0, &p0,
700 &y1, &p1, &sinfo->pd, &sinfo->offset);
701 fprintf(stderr, "in7: varname='%s'\n", sinfo->varname);
702 if (!strcmp(sinfo->varname, series)) {
703 gotit = 1;
704 } else {
705 continue;
706 }
707 if (nf == 7 && y0 > 0 && p0 > 0 && y1 > 0 && p1 > 0 &&
708 sinfo->pd >= 1 && sinfo->offset > 0) {
709 while (fgets(line, sizeof line, fp)) {
710 if (*line == ';') {
711 gretl_strstrip(line);
712 series_info_set_description(sinfo, line + 1);
713 } else {
714 break;
715 }
716 }
717 /* transcribe info */
718 err = in7_get_obs(y0, p0, y1, p1, sinfo);
719 if (!err) {
720 err = pcgive_set_stobs_endobs(y0, p0, y1, p1, sinfo);
721 }
722 } else {
723 err = E_DATA;
724 }
725 }
726 }
727
728 fclose(fp);
729
730 if (!err && !gotit) {
731 gretl_errmsg_sprintf(_("Series not found, '%s'"), series);
732 err = DB_NO_SUCH_SERIES;
733 }
734
735 return err;
736 }
737
738 /* Figure the ending observation date of a series */
739
get_endobs(char * datestr,int startyr,int startfrac,int pd,int n)740 static int get_endobs (char *datestr, int startyr, int startfrac,
741 int pd, int n)
742 {
743 int endyr, endfrac;
744
745 endyr = startyr + n / pd;
746 endfrac = startfrac - 1 + n % pd;
747
748 if (endfrac >= pd) {
749 endyr++;
750 endfrac -= pd;
751 }
752
753 if (endfrac == 0) {
754 endyr--;
755 endfrac = pd;
756 }
757
758 if (pd == 1) {
759 sprintf(datestr, "%d", endyr);
760 } else if (pd == 4) {
761 sprintf(datestr, "%d.%d", endyr, endfrac);
762 } else if (pd == 12 || pd == 52) {
763 sprintf(datestr, "%d.%02d", endyr, endfrac);
764 }
765
766 return 0;
767 }
768
dinfo_sanity_check(const DATEINFO * dinfo)769 static int dinfo_sanity_check (const DATEINFO *dinfo)
770 {
771 int err = 0;
772
773 if (dinfo->info < 0 || dinfo->info > 365) {
774 err = 1;
775 } else if (dinfo->day < 0 || dinfo->day > 365) {
776 err = 1;
777 } else if (dinfo->year < 0 || dinfo->year > 3000) {
778 err = 1;
779 } else if (dinfo->info == 52) {
780 /* note: "month" = week */
781 if (dinfo->month < 0 || dinfo->month > 52) {
782 err = 1;
783 }
784 } else {
785 /* annual, quarterly, monthly */
786 if (dinfo->month < 0 || dinfo->month > 12) {
787 err = 1;
788 }
789 }
790
791 if (err) {
792 gretl_errmsg_set(_("This is not a valid RATS 4.0 database"));
793 fprintf(stderr, "rats database: failed dinfo_sanity_check:\n"
794 " info=%d, year=%d, month=%d, day=%d\n",
795 (int) dinfo->info, (int) dinfo->year, (int) dinfo->month,
796 (int) dinfo->day);
797 }
798
799 return err;
800 }
801
dinfo_to_sinfo(const DATEINFO * dinfo,SERIESINFO * sinfo,const char * varname,const char * comment,int n,int offset)802 static int dinfo_to_sinfo (const DATEINFO *dinfo, SERIESINFO *sinfo,
803 const char *varname, const char *comment,
804 int n, int offset)
805 {
806 int startfrac = 0;
807 char pdstr[8] = {0};
808 int err = 0;
809
810 if (dinfo_sanity_check(dinfo)) {
811 return 1;
812 }
813
814 sprintf(sinfo->stobs, "%d", dinfo->year);
815
816 if (dinfo->info == 4) {
817 sprintf(pdstr, ".%d", dinfo->month);
818 if (dinfo->month == 1) {
819 startfrac = 1;
820 } else if (dinfo->month > 1 && dinfo->month <= 4) {
821 startfrac = 2;
822 } else if (dinfo->month > 4 && dinfo->month <= 7) {
823 startfrac = 3;
824 } else {
825 startfrac = 4;
826 }
827 } else if (dinfo->info == 12 || dinfo->info == 52) {
828 sprintf(pdstr, ".%02d", dinfo->month);
829 startfrac = dinfo->month;
830 } else if (dinfo->info == 1) {
831 startfrac = 0;
832 } else {
833 fprintf(stderr, "frequency (%d) does not seem to make sense\n",
834 (int) dinfo->info);
835 gretl_errmsg_sprintf(("frequency (%d) does not seem to make sense"),
836 (int) dinfo->info);
837 err = 1;
838 }
839
840 if (*pdstr) {
841 strcat(sinfo->stobs, pdstr);
842 }
843
844 get_endobs(sinfo->endobs, dinfo->year, startfrac, dinfo->info, n);
845
846 sinfo->pd = dinfo->info;
847 sinfo->nobs = n;
848 sinfo->t2 = n - 1;
849 sinfo->offset = offset;
850
851 strncat(sinfo->varname, varname, VNAMELEN - 1);
852 series_info_set_description(sinfo, comment);
853
854 #if DB_DEBUG
855 fprintf(stderr, "dinfo_to_sinfo: '%s': set sinfo->offset = %d\n", varname,
856 (int) offset);
857 #endif
858
859 return err;
860 }
861
in7_to_sinfo(const char * varname,const char * comment,int y0,int p0,int y1,int p1,int pd,int offset,SERIESINFO * sinfo)862 static int in7_to_sinfo (const char *varname, const char *comment,
863 int y0, int p0, int y1, int p1, int pd,
864 int offset, SERIESINFO *sinfo)
865 {
866 int err = 0;
867
868 if (pd == 4) {
869 sprintf(sinfo->stobs, "%d.%d", y0, p0);
870 sprintf(sinfo->endobs, "%d.%d", y1, p1);
871 } else if (pd == 12 || pd == 52) {
872 sprintf(sinfo->stobs, "%d.%02d", y0, p0);
873 sprintf(sinfo->endobs, "%d.%02d", y1, p1);
874 } else if (pd == 1) {
875 sprintf(sinfo->stobs, "%d", y0);
876 sprintf(sinfo->endobs, "%d", y1);
877 } else {
878 fprintf(stderr, "frequency %d is not supported\n", pd);
879 gretl_errmsg_sprintf(_("frequency %d is not supported"), pd);
880 err = 1;
881 }
882
883 if (!err) {
884 sinfo->pd = pd;
885 err = in7_get_obs(y0, p0, y1, p1, sinfo);
886 }
887
888 if (!err) {
889 strcpy(sinfo->varname, varname);
890 if (comment != NULL && *comment != 0) {
891 series_info_set_description(sinfo, comment);
892 }
893 sinfo->pd = pd;
894 sinfo->offset = offset;
895 }
896
897 return err;
898 }
899
read_rats_directory(FILE * fp,const char * series_name,SERIESINFO * sinfo)900 static RECNUM read_rats_directory (FILE *fp, const char *series_name,
901 SERIESINFO *sinfo)
902 {
903 RATSDirect rdir;
904 DATEINFO dinfo;
905 RECNUM ret;
906 int nread;
907 int i, err = 0;
908
909 memset(rdir.series_name, 0, NAMELENGTH);
910
911 if (fread(&rdir.back_point, sizeof(RECNUM), 1, fp) != 1) {
912 err = 1;
913 } else if (fread(&rdir.forward_point, sizeof(RECNUM), 1, fp) != 1) {
914 err = 1;
915 }
916 if (!err) {
917 fseek(fp, 4L, SEEK_CUR); /* skip two shorts */
918 if (fread(&rdir.first_data, sizeof(RECNUM), 1, fp) != 1) {
919 err = 1;
920 } else if (fread(rdir.series_name, NAMELENGTH, 1, fp) != 1) {
921 err = 1;
922 }
923 }
924
925 if (!err) {
926 rdir.series_name[NAMELENGTH-1] = '\0';
927 gretl_strstrip(rdir.series_name);
928 #if DB_DEBUG
929 fprintf(stderr, "read_rats_directory: name='%s'\n", rdir.series_name);
930 #endif
931 if (!isprint(rdir.series_name[0])) {
932 err = 1;
933 }
934 }
935
936 if (err) {
937 return RATS_PARSE_ERROR;
938 }
939
940 if (series_name != NULL && strcmp(series_name, rdir.series_name)) {
941 /* specific series not found yet: keep going */
942 return rdir.forward_point;
943 }
944
945 /* Now the dateinfo: we can't read this in one go either :-( */
946
947 /* skip long, short, long, short */
948 fseek(fp, 12, SEEK_CUR);
949 nread = 0;
950 nread += fread(&dinfo.info, sizeof(gint32), 1, fp);
951 nread += fread(&dinfo.digits, sizeof(short), 1, fp);
952 nread += fread(&dinfo.year, sizeof(short), 1, fp);
953 nread += fread(&dinfo.month, sizeof(short), 1, fp);
954 nread += fread(&dinfo.day, sizeof(short), 1, fp);
955 nread += fread(&rdir.datapoints, sizeof(gint32), 1, fp);
956
957 if (nread != 6) {
958 return RATS_PARSE_ERROR;
959 }
960
961 fseek(fp, sizeof(short) * 4L, SEEK_CUR); /* skip 4 shorts */
962
963 #if DB_DEBUG
964 fprintf(stderr, "info=%d, digits=%d, year=%d, mon=%d, day=%d\n",
965 (int) dinfo.info, (int) dinfo.digits, (int) dinfo.year,
966 (int) dinfo.month, (int) dinfo.day);
967 fprintf(stderr, "datapoints = %d\n", (int) rdir.datapoints);
968 #endif
969
970 if (fread(&rdir.comment_lines, sizeof(short), 1, fp) != 1) {
971 err = 1;
972 } else {
973 fseek(fp, 1L, SEEK_CUR); /* skip one char */
974 for (i=0; i<2 && !err; i++) {
975 if (i < rdir.comment_lines) {
976 memset(rdir.comments[i], 0, 80);
977 err = (fread(rdir.comments[i], 80, 1, fp) != 1);
978 if (!err) {
979 rdir.comments[i][79] = '\0';
980 gretl_strstrip(rdir.comments[i]);
981 }
982 } else {
983 rdir.comments[i][0] = 0;
984 fseek(fp, 80, SEEK_CUR);
985 }
986 }
987 }
988
989 #if DB_DEBUG
990 if (!err) {
991 fprintf(stderr, "comment_lines = %d\n", (int) rdir.comment_lines);
992 fprintf(stderr, "comment[0] = '%s'\n", rdir.comments[0]);
993 fprintf(stderr, "comment[1] = '%s'\n", rdir.comments[1]);
994 }
995 #endif
996
997 if (!err) {
998 err = dinfo_to_sinfo(&dinfo, sinfo, rdir.series_name, rdir.comments[0],
999 rdir.datapoints, rdir.first_data);
1000 }
1001
1002 ret = (err)? RATS_PARSE_ERROR : rdir.forward_point;
1003
1004 #if DB_DEBUG
1005 fprintf(stderr, "read_rats_directory: err = %d, forward_point=%d, first_data=%d\n",
1006 err, (int) rdir.forward_point, (int) rdir.first_data);
1007 fprintf(stderr, "returning %d\n", (int) ret);
1008 #endif
1009
1010 return ret;
1011 }
1012
series_info_init(SERIESINFO * sinfo)1013 static void series_info_init (SERIESINFO *sinfo)
1014 {
1015 sinfo->t1 = sinfo->t2 = 0;
1016 sinfo->nobs = 0;
1017 sinfo->v = 1;
1018 sinfo->pd = 1;
1019 sinfo->offset = -1;
1020 sinfo->err = 0;
1021 sinfo->undated = 0;
1022
1023 sinfo->varname[0] = '\0';
1024 sinfo->stobs[0] = '\0';
1025 sinfo->endobs[0] = '\0';
1026
1027 sinfo->descrip = NULL;
1028 sinfo->data = NULL;
1029 }
1030
series_info_set_description(SERIESINFO * sinfo,const char * s)1031 void series_info_set_description (SERIESINFO *sinfo,
1032 const char *s)
1033 {
1034 if (sinfo->descrip != NULL) {
1035 free(sinfo->descrip);
1036 sinfo->descrip = NULL;
1037 }
1038 if (s != NULL && *s != '\0') {
1039 sinfo->descrip = gretl_strdup(s);
1040 }
1041 }
1042
series_info_clear(SERIESINFO * sinfo)1043 static void series_info_clear (SERIESINFO *sinfo)
1044 {
1045 free(sinfo->descrip);
1046 }
1047
1048 #define DB_INIT_ROWS 32
1049
1050 /**
1051 * dbwrapper_destroy:
1052 * @dw: database series wrapper.
1053 *
1054 * Frees all resources associated with @dw as well as the pointer
1055 * itself.
1056 */
1057
dbwrapper_destroy(dbwrapper * dw)1058 void dbwrapper_destroy (dbwrapper *dw)
1059 {
1060 if (dw != NULL) {
1061 free(dw->fname);
1062 free(dw->sinfo);
1063 free(dw);
1064 }
1065 }
1066
1067 /**
1068 * dbwrapper_new:
1069 * @n: initial number of series.
1070 * @fname: database filename.
1071 * @dbtype: database type code.
1072 *
1073 * Returns: an allocated database series wrapper.
1074 */
1075
dbwrapper_new(int n,const char * fname,int dbtype)1076 dbwrapper *dbwrapper_new (int n, const char *fname, int dbtype)
1077 {
1078 dbwrapper *dw;
1079 int i;
1080
1081 if (n == 0) {
1082 n = DB_INIT_ROWS;
1083 }
1084
1085 dw = malloc(sizeof *dw);
1086 if (dw == NULL) {
1087 return NULL;
1088 }
1089
1090 dw->fname = gretl_strdup(fname);
1091 dw->dbtype = dbtype;
1092
1093 dw->sinfo = malloc(n * sizeof *dw->sinfo);
1094 if (dw->sinfo == NULL) {
1095 free(dw);
1096 return NULL;
1097 }
1098
1099 for (i=0; i<n; i++) {
1100 series_info_init(&dw->sinfo[i]);
1101 }
1102
1103 dw->nv = 0;
1104 dw->nalloc = n;
1105
1106 return dw;
1107 }
1108
dbwrapper_expand(dbwrapper * dw)1109 static int dbwrapper_expand (dbwrapper *dw)
1110 {
1111 SERIESINFO *sinfo;
1112 int i, newsz;
1113
1114 newsz = (dw->nv / DB_INIT_ROWS) + 1;
1115 newsz *= DB_INIT_ROWS;
1116
1117 sinfo = realloc(dw->sinfo, newsz * sizeof *sinfo);
1118 if (sinfo == NULL) {
1119 free(dw->sinfo);
1120 dw->sinfo = NULL;
1121 return 1;
1122 }
1123
1124 dw->sinfo = sinfo;
1125
1126 for (i=dw->nalloc; i<newsz; i++) {
1127 series_info_init(&dw->sinfo[i]);
1128 }
1129
1130 dw->nalloc = newsz;
1131
1132 return 0;
1133 }
1134
read_in7_series_info(FILE * fp,dbwrapper * dw)1135 static int read_in7_series_info (FILE *fp, dbwrapper *dw)
1136 {
1137 char line[1024];
1138 char sname[VNAMELEN];
1139 char desc[MAXLABEL];
1140 char fmt[24];
1141 int y0, p0, y1, p1;
1142 int pd, offset, pos;
1143 int i, nf;
1144 int err = 0;
1145
1146 sprintf(fmt, "%%%ds %%d %%d %%d %%d %%d %%d", VNAMELEN - 1);
1147
1148 i = 0;
1149 while (fgets(line, sizeof line, fp) && !err) {
1150 if (*line == '>') {
1151 nf = sscanf(line + 1, fmt, sname, &y0, &p0, &y1,
1152 &p1, &pd, &offset);
1153 if (nf == 7 && y0 > 0 && p0 > 0 && y1 > 0 && p1 > 0 &&
1154 pd >= 1 && offset > 0) {
1155 *desc = 0;
1156 pos = ftell(fp);
1157 while (fgets(line, sizeof line, fp)) {
1158 if (*line == ';') {
1159 /* following series description */
1160 int rem = MAXLABEL - strlen(desc) - 1;
1161
1162 if (rem > 0) {
1163 gretl_strstrip(line);
1164 strncat(desc, line + 1, rem);
1165 }
1166 pos = ftell(fp);
1167 } else {
1168 /* not a description: throw the line back */
1169 fseek(fp, pos, SEEK_SET);
1170 break;
1171 }
1172 }
1173 /* record info */
1174 err = in7_to_sinfo(sname, desc, y0, p0, y1, p1,
1175 pd, offset, &dw->sinfo[i++]);
1176 if (!err) {
1177 dw->nv += 1;
1178 }
1179 }
1180 }
1181 }
1182
1183 return err;
1184 }
1185
count_in7_series(FILE * fp,int * err)1186 static int count_in7_series (FILE *fp, int *err)
1187 {
1188 char line[1024];
1189 char sname[VNAMELEN];
1190 char fmt[24];
1191 int y0, p0, y1, p1;
1192 int pd, offset;
1193 int nf, i = 0, nseries = 0;
1194
1195 sprintf(fmt, "%%%ds %%d %%d %%d %%d %%d %%d", VNAMELEN - 1);
1196
1197 while (fgets(line, sizeof line, fp)) {
1198 if (i == 0 && strncmp(line, "pcgive 700", 10)) {
1199 *err = 1;
1200 gretl_errmsg_set("This is not a PcGive 700 data file");
1201 return 0;
1202 }
1203 if (*line == '>') {
1204 nf = sscanf(line + 1, fmt, sname, &y0, &p0, &y1,
1205 &p1, &pd, &offset);
1206 if (nf < 7 || y0 < 0 || p0 < 0 || y1 < 0 || p1 < 0 ||
1207 pd < 1 || offset < 0) {
1208 fprintf(stderr, "Error reading series info\n");
1209 } else {
1210 nseries++;
1211 }
1212 }
1213 i++;
1214 }
1215
1216 return nseries;
1217 }
1218
1219 /**
1220 * read_pcgive_db:
1221 * @fname: name of database file.
1222 * @fp: pre-opened stream (caller to close it)
1223 *
1224 * Read the series info from a PcGive database, .in7 file
1225 *
1226 * Returns: pointer to a #dbwrapper containing the series info,
1227 * or NULL in case of failure.
1228 */
1229
read_pcgive_db(const char * fname,FILE * fp)1230 dbwrapper *read_pcgive_db (const char *fname, FILE *fp)
1231 {
1232 dbwrapper *dw;
1233 int ns, err = 0;
1234
1235 gretl_error_clear();
1236
1237 ns = count_in7_series(fp, &err);
1238 if (ns == 0) {
1239 if (!err) {
1240 gretl_errmsg_set(_("No valid series found"));
1241 }
1242 return NULL;
1243 }
1244
1245 #if DB_DEBUG
1246 fprintf(stderr, "in7: found %d series\n", ns);
1247 #endif
1248
1249 /* allocate table for series rows */
1250 dw = dbwrapper_new(ns, fname, GRETL_PCGIVE_DB);
1251 if (dw == NULL) {
1252 gretl_errmsg_set(_("Out of memory!"));
1253 return NULL;
1254 }
1255
1256 rewind(fp);
1257
1258 /* Go find the series info */
1259 err = read_in7_series_info(fp, dw);
1260
1261 if (err) {
1262 dbwrapper_destroy(dw);
1263 dw = NULL;
1264 }
1265
1266 return dw;
1267 }
1268
1269 /**
1270 * read_rats_db:
1271 * @fname: database filename.
1272 * @fp: pre-opened stream (caller to close it)
1273 *
1274 * Read the series info from a RATS 4.0 database: read the base
1275 * block at offset 0 in the data file, and recurse through the
1276 * directory entries.
1277 *
1278 * Returns: pointer to a #dbwrapper containing the series info,
1279 * or NULL in case of failure.
1280 */
1281
read_rats_db(const char * fname,FILE * fp)1282 dbwrapper *read_rats_db (const char *fname, FILE *fp)
1283 {
1284 dbwrapper *dw;
1285 long forward = 0;
1286 int i, err = 0;
1287
1288 gretl_error_clear();
1289
1290 /* get into position */
1291 fseek(fp, 30L, SEEK_SET); /* skip unneeded fields */
1292 if (fread(&forward, sizeof forward, 1, fp) == 1) {
1293 fseek(fp, 4L, SEEK_CUR);
1294 }
1295
1296 /* basic check */
1297 if (forward <= 0) {
1298 gretl_errmsg_set(_("This is not a valid RATS 4.0 database"));
1299 fprintf(stderr, "rats database: got forward = %ld\n", forward);
1300 return NULL;
1301 }
1302
1303 /* allocate table for series rows */
1304 dw = dbwrapper_new(0, fname, GRETL_RATS_DB);
1305 if (dw == NULL) {
1306 gretl_errmsg_set(_("Out of memory!"));
1307 return NULL;
1308 }
1309
1310 /* Go find the series */
1311 i = 0;
1312 while (forward && !err) {
1313 dw->nv += 1;
1314 #if DB_DEBUG
1315 fprintf(stderr, "read_rats_db: forward = %d, nv = %d\n",
1316 (int) forward, dw->nv);
1317 #endif
1318 if (dw->nv > 0 && dw->nv % DB_INIT_ROWS == 0) {
1319 err = dbwrapper_expand(dw);
1320 if (err) {
1321 gretl_errmsg_set(_("Out of memory!"));
1322 }
1323 }
1324 if (!err) {
1325 err = fseek(fp, (forward - 1) * 256L, SEEK_SET);
1326 if (!err) {
1327 forward = read_rats_directory(fp, NULL, &dw->sinfo[i++]);
1328 if (forward == RATS_PARSE_ERROR) {
1329 err = 1;
1330 }
1331 }
1332 }
1333 #if DB_DEBUG
1334 fprintf(stderr, "bottom of loop, err = %d\n", err);
1335 #endif
1336 }
1337
1338 #if DB_DEBUG
1339 fprintf(stderr, "read_rats_db: err = %d, dw = %p\n",
1340 err, (void *) dw);
1341 #endif
1342
1343 if (err) {
1344 dbwrapper_destroy(dw);
1345 return NULL;
1346 }
1347
1348 return dw;
1349 }
1350
1351 /* retrieve the actual data values from the data blocks */
1352
get_rats_series(int offset,SERIESINFO * sinfo,FILE * fp,double ** Z)1353 static int get_rats_series (int offset, SERIESINFO *sinfo, FILE *fp,
1354 double **Z)
1355 {
1356 RATSData rdata;
1357 double x;
1358 int v = sinfo->v;
1359 int i, t, T;
1360 int miss = 0;
1361 int err = 0;
1362
1363 fprintf(stderr, "get_rats_series: starting from offset %d\n", offset);
1364
1365 if (sinfo->t2 > 0) {
1366 T = sinfo->t2 + 1;
1367 } else {
1368 T = sinfo->nobs;
1369 }
1370
1371 rdata.forward_point = offset;
1372 t = sinfo->t1;
1373
1374 while (rdata.forward_point) {
1375 fseek(fp, (rdata.forward_point - 1) * 256L, SEEK_SET);
1376 /* the RATSData struct is actually 256 bytes. Yay! */
1377 if (fread(&rdata, sizeof rdata, 1, fp) != 1) {
1378 err = E_DATA;
1379 break;
1380 }
1381 for (i=0; i<31 && t<T; i++) {
1382 x = rdata.data[i];
1383 #if G_BYTE_ORDER == G_BIG_ENDIAN
1384 reverse_double(x);
1385 #endif
1386 if (isnan(x)) {
1387 x = NADBL;
1388 miss = 1;
1389 }
1390 Z[v][t++] = x;
1391 }
1392 }
1393
1394 if (miss && !err) {
1395 err = DB_MISSING_DATA;
1396 }
1397
1398 return err;
1399 }
1400
1401 /**
1402 * get_rats_db_data:
1403 * @fname: name of RATS 4.0 database to read from
1404 * @sinfo: holds info on the given series (input)
1405 * @Z: data matrix
1406 *
1407 * Read the actual data values for a series from a RATS database.
1408 *
1409 * Returns: 0 on successful completion, E_FOPEN if
1410 * the data could not be read, and DB_MISSING_DATA if the
1411 * data were found but there were some missing values.
1412 */
1413
get_rats_db_data(const char * fname,SERIESINFO * sinfo,double ** Z)1414 int get_rats_db_data (const char *fname, SERIESINFO *sinfo,
1415 double **Z)
1416 {
1417 FILE *fp;
1418 int err = 0;
1419
1420 fp = gretl_fopen(fname, "rb");
1421 if (fp == NULL) {
1422 err = E_FOPEN;
1423 } else {
1424 err = get_rats_series(sinfo->offset, sinfo, fp, Z);
1425 fclose(fp);
1426 }
1427
1428 return err;
1429 }
1430
get_rats_series_info(const char * series_name,SERIESINFO * sinfo)1431 static int get_rats_series_info (const char *series_name, SERIESINFO *sinfo)
1432 {
1433 FILE *fp;
1434 long forward = 0;
1435 int err = 0;
1436
1437 gretl_error_clear();
1438
1439 fp = gretl_fopen(saved_db_name, "rb");
1440 if (fp == NULL) {
1441 return E_FOPEN;
1442 }
1443
1444 #if DB_DEBUG
1445 fprintf(stderr, "Opened %s\n", saved_db_name);
1446 #endif
1447
1448 /* get into position */
1449 fseek(fp, 30L, SEEK_SET);
1450 if (fread(&forward, sizeof forward, 1, fp) == 1) {
1451 fseek(fp, 4L, SEEK_CUR);
1452 }
1453
1454 /* basic check */
1455 if (forward <= 0) {
1456 gretl_errmsg_set(_("This is not a valid RATS 4.0 database"));
1457 fprintf(stderr, "rats database: got forward = %ld\n", forward);
1458 return DB_PARSE_ERROR;
1459 }
1460
1461 sinfo->offset = 0;
1462
1463 /* Go find the series */
1464 while (forward) {
1465 fseek(fp, (forward - 1) * 256L, SEEK_SET);
1466 forward = read_rats_directory(fp, series_name, sinfo);
1467 if (forward == RATS_PARSE_ERROR) {
1468 sinfo->offset = -1;
1469 }
1470 if (sinfo->offset != 0) {
1471 break;
1472 }
1473 }
1474
1475 fclose(fp);
1476
1477 if (sinfo->offset < 0) {
1478 err = DB_NO_SUCH_SERIES;
1479 }
1480
1481 #if DB_DEBUG
1482 fprintf(stderr, "get_rats_series_info: offset = %d\n", sinfo->offset);
1483 fprintf(stderr, " pd = %d, nobs = %d\n", sinfo->pd, sinfo->nobs);
1484 #endif
1485
1486 return err;
1487 }
1488
1489 /* For importation of database series */
1490
get_compacted_xt(const double * src,int n,CompactMethod method,int compfac,int skip)1491 static double *get_compacted_xt (const double *src, int n,
1492 CompactMethod method,
1493 int compfac, int skip)
1494 {
1495 int p, t;
1496 double *x;
1497
1498 x = malloc(n * sizeof *x);
1499 if (x == NULL) {
1500 return NULL;
1501 }
1502
1503 for (t=0; t<n; t++) {
1504 p = (t + 1) * compfac;
1505 x[t] = 0.0;
1506 if (method == COMPACT_AVG || method == COMPACT_SUM) {
1507 int i, st;
1508
1509 for (i=1; i<=compfac; i++) {
1510 st = p - i + skip;
1511 if (na(src[st])) {
1512 x[t] = NADBL;
1513 break;
1514 } else {
1515 x[t] += src[st];
1516 }
1517 }
1518 if (method == COMPACT_AVG) {
1519 x[t] /= (double) compfac;
1520 }
1521 } else if (method == COMPACT_EOP) {
1522 x[t] = src[p - 1 + skip];
1523 } else if (method == COMPACT_SOP) {
1524 x[t] = src[p - compfac + skip];
1525 }
1526 }
1527
1528 return x;
1529 }
1530
1531 /* Compact a single series from a database, for importation
1532 into a working dataset of lower frequency. At present
1533 this is permitted only for the cases:
1534
1535 quarterly -> annual
1536 monthly -> quarterly
1537 monthly -> annual
1538 */
1539
compact_db_series(const double * src,int pd,int * pnobs,char * stobs,int target_pd,CompactMethod method)1540 static double *compact_db_series (const double *src,
1541 int pd, int *pnobs,
1542 char *stobs,
1543 int target_pd,
1544 CompactMethod method)
1545 {
1546 int p0, y0, endskip, goodobs;
1547 int skip = 0, compfac = pd / target_pd;
1548 double *x;
1549
1550 if (target_pd == 1) {
1551 /* figure the annual dates */
1552 y0 = atoi(stobs);
1553 p0 = atoi(stobs + 5);
1554 if (p0 != 1) {
1555 ++y0;
1556 skip = compfac - (p0 + 1);
1557 }
1558 sprintf(stobs, "%d", y0);
1559 } else if (target_pd == 4) {
1560 /* figure the quarterly dates */
1561 float q;
1562 int q0;
1563
1564 y0 = atoi(stobs);
1565 p0 = atoi(stobs + 5);
1566 q = 1.0 + p0 / 3.;
1567 q0 = q + .5;
1568 skip = ((q0 - 1) * 3) + 1 - p0;
1569 if (q0 == 5) {
1570 y0++;
1571 q0 = 1;
1572 }
1573 sprintf(stobs, "%d.%d", y0, q0);
1574 } else {
1575 return NULL;
1576 }
1577
1578 endskip = (*pnobs - skip) % compfac;
1579 goodobs = (*pnobs - skip - endskip) / compfac;
1580 *pnobs = goodobs;
1581
1582 #if DB_DEBUG
1583 fprintf(stderr, "startskip = %d\n", skip);
1584 fprintf(stderr, "endskip = %d\n", endskip);
1585 fprintf(stderr, "goodobs = %d\n", goodobs);
1586 fprintf(stderr, "compfac = %d\n", compfac);
1587 fprintf(stderr, "starting date = %s\n", stobs);
1588 #endif
1589
1590 x = get_compacted_xt(src, goodobs, method, compfac, skip);
1591
1592 return x;
1593 }
1594
1595 #define EXPAND_DEBUG 0
1596
1597 /* Expand a single series from a database, for importation
1598 into a working dataset of higher frequency. At present
1599 this is permitted only for the cases:
1600
1601 1) annual -> quarterly
1602 2) annual -> monthly
1603 3) quarterly -> monthly
1604 */
1605
expand_db_series(const double * src,int pd,int * pnobs,char * stobs,DATASET * dset)1606 static double *expand_db_series (const double *src,
1607 int pd, int *pnobs,
1608 char *stobs,
1609 DATASET *dset)
1610 {
1611 char new_stobs[OBSLEN] = {0};
1612 int target_pd = dset->pd;
1613 int oldn = *pnobs;
1614 int mult, newn;
1615 double *x = NULL;
1616 int j, t;
1617 int err = 0;
1618
1619 mult = target_pd / pd;
1620 newn = mult * oldn;
1621
1622 x = malloc(newn * sizeof *x);
1623 if (x == NULL) {
1624 err = E_ALLOC;
1625 } else {
1626 int s = 0;
1627
1628 for (t=0; t<oldn; t++) {
1629 for (j=0; j<mult; j++) {
1630 x[s++] = src[t];
1631 }
1632 }
1633 }
1634
1635 #if EXPAND_DEBUG
1636 fprintf(stderr, "expand_db_series 1: mult=%d, newn=%d, stobs='%s'\n",
1637 mult, newn, stobs);
1638 #endif
1639
1640 if (err) {
1641 return NULL;
1642 }
1643
1644 if (pd == 1) {
1645 strcpy(new_stobs, stobs);
1646 if (target_pd == 4) {
1647 strcat(new_stobs, ":1");
1648 } else {
1649 strcat(new_stobs, ":01");
1650 }
1651 } else {
1652 int yr, qtr, mo;
1653
1654 if (strchr(stobs, '.')) {
1655 sscanf(stobs, "%d.%d", &yr, &qtr);
1656 } else {
1657 sscanf(stobs, "%d:%d", &yr, &qtr);
1658 }
1659 mo = (qtr - 1) * 3 + 1;
1660 sprintf(new_stobs, "%d:%02d", yr, mo);
1661 }
1662
1663 /* revise incoming values */
1664 strcpy(stobs, new_stobs);
1665 *pnobs = newn;
1666
1667 #if EXPAND_DEBUG
1668 fprintf(stderr, "expand_db_series 2: pd=%d, stobs='%s'\n",
1669 pd, stobs);
1670 #endif
1671
1672 return x;
1673 }
1674
set_db_name(const char * fname,int filetype,PRN * prn)1675 int set_db_name (const char *fname, int filetype, PRN *prn)
1676 {
1677 FILE *fp;
1678 int err = 0;
1679
1680 *saved_db_name = '\0';
1681 if (fname != NULL) {
1682 strncat(saved_db_name, fname, MAXLEN - 1);
1683 }
1684
1685 if (filetype == GRETL_DBNOMICS || filetype == 0) {
1686 saved_db_type = filetype;
1687 return 0;
1688 }
1689
1690 if (filetype == GRETL_NATIVE_DB_WWW) {
1691 #ifdef USE_CURL
1692 int n = strlen(saved_db_name);
1693
1694 if (n > 4) {
1695 n -= 4;
1696 if (!strcmp(saved_db_name + n, ".bin")) {
1697 saved_db_name[n] = '\0';
1698 }
1699 }
1700 err = check_remote_db(saved_db_name);
1701 if (!err) {
1702 saved_db_type = filetype;
1703 pprintf(prn, "%s\n", saved_db_name);
1704 }
1705 #else
1706 pprintf(prn, _("Internet access not supported"));
1707 pputc(prn, '\n');
1708 err = E_DATA;
1709 #endif
1710 return err;
1711 }
1712
1713 fp = gretl_fopen(saved_db_name, "rb");
1714
1715 if (fp == NULL && !g_path_is_absolute(saved_db_name) &&
1716 filetype == GRETL_NATIVE_DB) {
1717 /* try looking a bit more */
1718 const char *path = gretl_binbase();
1719
1720 if (path != NULL && *path != '\0') {
1721 gretl_build_path(saved_db_name, path, fname, NULL);
1722 fp = gretl_fopen(saved_db_name, "rb");
1723 }
1724
1725 #ifdef OS_OSX
1726 if (fp == NULL) {
1727 gchar *tmp = g_build_filename(gretl_app_support_dir(), "db",
1728 fname, NULL);
1729
1730 fp = gretl_fopen(tmp, "rb");
1731 if (fp != NULL) {
1732 strcpy(saved_db_name, tmp);
1733 }
1734 g_free(tmp);
1735 }
1736 #endif
1737 }
1738
1739 if (fp == NULL) {
1740 *saved_db_name = '\0';
1741 pprintf(prn, _("Couldn't open %s\n"), fname);
1742 err = E_FOPEN;
1743 } else {
1744 fclose(fp);
1745 saved_db_type = filetype;
1746 pprintf(prn, "%s\n", saved_db_name);
1747 }
1748
1749 return err;
1750 }
1751
get_db_name(void)1752 const char *get_db_name (void)
1753 {
1754 return saved_db_name;
1755 }
1756
1757 /* Handling of DSN setup for ODBC: grab the dsn, username
1758 and password strings.
1759 */
1760
get_dsn_field(const char * tag,const char * src)1761 static char *get_dsn_field (const char *tag, const char *src)
1762 {
1763 const char *p;
1764 char needle[12];
1765 char *ret = NULL;
1766
1767 sprintf(needle, "%s=", tag);
1768 p = strstr(src, needle);
1769
1770 if (p != NULL) {
1771 p += strlen(needle);
1772 if (*p == '"' || *p == '\'') {
1773 ret = gretl_quoted_string_strdup(p, NULL);
1774 } else {
1775 ret = gretl_strndup(p, strcspn(p, " "));
1776 }
1777 }
1778
1779 return ret;
1780 }
1781
1782 static ODBC_info gretl_odinfo;
1783
ODBC_info_clear_read(void)1784 static void ODBC_info_clear_read (void)
1785 {
1786 int i;
1787
1788 free(gretl_odinfo.query);
1789 gretl_odinfo.query = NULL;
1790
1791 doubles_array_free(gretl_odinfo.X, gretl_odinfo.nvars);
1792 gretl_odinfo.X = NULL;
1793
1794 strings_array_free(gretl_odinfo.S, gretl_odinfo.nrows);
1795 gretl_odinfo.S = NULL;
1796
1797 gretl_string_table_destroy(gretl_odinfo.gst);
1798 gretl_odinfo.gst = NULL;
1799
1800 for (i=0; i<ODBC_OBSCOLS; i++) {
1801 gretl_odinfo.coltypes[i] = 0;
1802 }
1803
1804 if (gretl_odinfo.fmts != NULL) {
1805 strings_array_free(gretl_odinfo.fmts, gretl_odinfo.obscols);
1806 gretl_odinfo.fmts = NULL;
1807 }
1808
1809 gretl_odinfo.nrows = 0;
1810 gretl_odinfo.obscols = 0;
1811 gretl_odinfo.nvars = 0;
1812 }
1813
gretl_odbc_cleanup(void)1814 static void gretl_odbc_cleanup (void)
1815 {
1816 free(gretl_odinfo.dsn);
1817 gretl_odinfo.dsn = NULL;
1818
1819 free(gretl_odinfo.username);
1820 gretl_odinfo.username = NULL;
1821
1822 free(gretl_odinfo.password);
1823 gretl_odinfo.password = NULL;
1824
1825 ODBC_info_clear_read();
1826 }
1827
set_odbc_dsn(const char * line,PRN * prn)1828 int set_odbc_dsn (const char *line, PRN *prn)
1829 {
1830 int (*check_dsn) (ODBC_info *);
1831 char *dbname = NULL;
1832 char *uname = NULL;
1833 char *pword = NULL;
1834 int got_plugin = 0;
1835 int err = 0;
1836
1837 gretl_odbc_cleanup();
1838
1839 dbname = get_dsn_field("dsn", line);
1840 if (dbname == NULL) {
1841 pputs(prn, "You must specify a DSN using 'dsn=...'\n");
1842 return E_DATA;
1843 }
1844
1845 uname = get_dsn_field("user", line);
1846 pword = get_dsn_field("password", line);
1847
1848 gretl_odinfo.dsn = dbname;
1849 gretl_odinfo.username = uname;
1850 gretl_odinfo.password = pword;
1851
1852 gretl_error_clear();
1853
1854 check_dsn = get_plugin_function("gretl_odbc_check_dsn");
1855
1856 if (check_dsn == NULL) {
1857 err = 1;
1858 } else {
1859 got_plugin = 1;
1860 err = (*check_dsn) (&gretl_odinfo);
1861 }
1862
1863 if (err) {
1864 if (!got_plugin) {
1865 pprintf(prn, "Couldn't open the gretl ODBC plugin\n");
1866 } else {
1867 pprintf(prn, "Failed to connect to ODBC data source '%s'\n",
1868 gretl_odinfo.dsn);
1869 }
1870 gretl_odbc_cleanup();
1871 } else if (gretl_messages_on()) {
1872 pprintf(prn, "Connected to ODBC data source '%s'\n",
1873 gretl_odinfo.dsn);
1874 }
1875
1876 return err;
1877 }
1878
db_set_sample(const char * start,const char * stop,DATASET * dset)1879 int db_set_sample (const char *start, const char *stop, DATASET *dset)
1880 {
1881 int t1 = 0, t2 = 0;
1882
1883 if (strcmp(start, ";")) {
1884 t1 = dateton(start, dset);
1885 if (t1 < 0) {
1886 return 1;
1887 }
1888 }
1889
1890 t2 = dateton(stop, dset);
1891 if (t2 < 0) {
1892 return 1;
1893 }
1894
1895 if (t1 > t2) {
1896 gretl_errmsg_set(_("Invalid null sample"));
1897 return 1;
1898 }
1899
1900 dset->t1 = t1;
1901 dset->t2 = t2;
1902 dset->n = t2 - t1 + 1;
1903 strcpy(dset->endobs, stop);
1904
1905 #if DB_DEBUG
1906 fprintf(stderr, "db_set_sample: t1=%d, t2=%d, stobs='%s', endobs='%s' "
1907 "sd0 = %g, n = %d\n",
1908 dset->t1, dset->t2,
1909 dset->stobs, dset->endobs,
1910 dset->sd0, dset->n);
1911 #endif
1912
1913 return 0;
1914 }
1915
1916 static const char *
get_word_and_advance(const char * s,char * word,size_t maxlen)1917 get_word_and_advance (const char *s, char *word, size_t maxlen)
1918 {
1919 size_t i = 0;
1920
1921 while (isspace(*s)) s++;
1922
1923 *word = '\0';
1924
1925 while (*s && !isspace(*s)) {
1926 if (i < maxlen) word[i++] = *s;
1927 s++;
1928 }
1929
1930 word[i] = '\0';
1931
1932 return (*word != '\0')? s : NULL;
1933 }
1934
1935 static const char *
get_compact_method_and_advance(const char * s,CompactMethod * method)1936 get_compact_method_and_advance (const char *s, CompactMethod *method)
1937 {
1938 const char *p;
1939
1940 *method = COMPACT_NONE;
1941
1942 if ((p = strstr(s, "(compact")) != NULL) {
1943 char comp[8];
1944 int i = 0;
1945
1946 p += 8;
1947 while (*p && *p != ')' && i < 7) {
1948 if (!isspace(*p) && *p != '=') {
1949 comp[i++] = *p;
1950 }
1951 p++;
1952 }
1953 comp[i] = '\0';
1954
1955 if (!strcmp(comp, "average")) {
1956 *method = COMPACT_AVG;
1957 } else if (!strcmp(comp, "sum")) {
1958 *method = COMPACT_SUM;
1959 } else if (!strcmp(comp, "first")) {
1960 *method = COMPACT_SOP;
1961 } else if (!strcmp(comp, "last")) {
1962 *method = COMPACT_EOP;
1963 } else if (!strcmp(comp, "spread")) {
1964 *method = COMPACT_SPREAD;
1965 }
1966
1967 p = strchr(p, ')');
1968 if (p != NULL) p++;
1969 } else if ((p = strstr(s, "data ")) != NULL) {
1970 p += 5;
1971 } else {
1972 p = s;
1973 }
1974
1975 return p;
1976 }
1977
compact_method_from_option(int * err)1978 static CompactMethod compact_method_from_option (int *err)
1979 {
1980 const char *s = get_optval_string(DATA, OPT_C);
1981 CompactMethod method = COMPACT_NONE;
1982
1983 if (s == NULL || *s == '\0') {
1984 *err = E_PARSE;
1985 } else if (!strcmp(s, "average")) {
1986 method = COMPACT_AVG;
1987 } else if (!strcmp(s, "sum")) {
1988 method = COMPACT_SUM;
1989 } else if (!strcmp(s, "first")) {
1990 method = COMPACT_SOP;
1991 } else if (!strcmp(s, "last")) {
1992 method = COMPACT_EOP;
1993 } else if (!strcmp(s, "spread")) {
1994 method = COMPACT_SPREAD;
1995 } else {
1996 gretl_errmsg_sprintf(_("field '%s' in command is invalid"), s);
1997 *err = E_PARSE;
1998 }
1999
2000 return method;
2001 }
2002
2003 /* 2-D array of doubles, allocated space in second
2004 position (as in a DATASET) */
2005
new_dbZ(int n)2006 static double **new_dbZ (int n)
2007 {
2008 double **Z;
2009 int t;
2010
2011 Z = malloc(2 * sizeof *Z);
2012 if (Z == NULL) return NULL;
2013
2014 Z[0] = NULL;
2015 Z[1] = malloc(n * sizeof **Z);
2016
2017 if (Z[1] == NULL) {
2018 free(Z);
2019 return NULL;
2020 }
2021
2022 for (t=0; t<n; t++) {
2023 Z[1][t] = NADBL;
2024 }
2025
2026 return Z;
2027 }
2028
free_dbZ(double ** dbZ)2029 static void free_dbZ (double **dbZ)
2030 {
2031 if (dbZ != NULL) {
2032 free(dbZ[1]);
2033 free(dbZ);
2034 }
2035 }
2036
parse_odbc_format_chunk(char ** ps,int i)2037 static int parse_odbc_format_chunk (char **ps, int i)
2038 {
2039 const char *numchars = "0123456789";
2040 char *chunk = NULL;
2041 char *p = *ps;
2042 int n, err = 0;
2043
2044 /* advance to '%' */
2045 while (*p && *p != '%') p++;
2046 if (*p == '\0') {
2047 return E_PARSE;
2048 }
2049
2050 p++; /* move past '%' */
2051
2052 /* zero padding? */
2053 if (*p == '0') {
2054 p++;
2055 }
2056
2057 /* optional width? */
2058 n = strspn(p, numchars);
2059 if (n == 1) {
2060 p++;
2061 } else if (n > 0) {
2062 return E_PARSE;
2063 }
2064
2065 /* optional dot plus precision? */
2066 if (*p == '.') {
2067 p++;
2068 n = strspn(p, numchars);
2069 if (n == 1) {
2070 p++;
2071 } else {
2072 return E_PARSE;
2073 }
2074 }
2075
2076 /* now we should have a conversion character */
2077 if (*p == 'd') {
2078 gretl_odinfo.coltypes[i] = GRETL_TYPE_INT;
2079 } else if (*p == 's') {
2080 gretl_odinfo.coltypes[i] = GRETL_TYPE_STRING;
2081 } else if (*p == 'f' || *p == 'g') {
2082 gretl_odinfo.coltypes[i] = GRETL_TYPE_DOUBLE;
2083 } else if (*p == 'D') {
2084 *p = 's';
2085 gretl_odinfo.coltypes[i] = GRETL_TYPE_DATE;
2086 } else {
2087 return E_PARSE;
2088 }
2089
2090 /* append any trailing fixed chars */
2091 p++;
2092 while (*p && *p != '%') p++;
2093 n = p - *ps;
2094
2095 chunk = gretl_strndup(*ps, n);
2096 if (chunk == NULL) {
2097 err = E_ALLOC;
2098 } else {
2099 err = strings_array_add(&gretl_odinfo.fmts,
2100 &gretl_odinfo.obscols,
2101 chunk);
2102 free(chunk);
2103 }
2104
2105 *ps = p;
2106
2107 #if 1
2108 fprintf(stderr, "set obs coltype[%d] = %d (%s), fmt='%s'\n", i,
2109 gretl_odinfo.coltypes[i],
2110 gretl_type_get_name(gretl_odinfo.coltypes[i]),
2111 gretl_odinfo.fmts[i]);
2112 #endif
2113
2114 return err;
2115 }
2116
parse_odbc_format(char * fmt)2117 static int parse_odbc_format (char *fmt)
2118 {
2119 char *s = fmt;
2120 int i, err = 0;
2121
2122 for (i=0; i<ODBC_OBSCOLS && !err && *s; i++) {
2123 err = parse_odbc_format_chunk(&s, i);
2124 }
2125
2126 if (!err && *s != '\0') {
2127 err = E_PARSE;
2128 }
2129
2130 free(fmt);
2131
2132 return err;
2133 }
2134
odbc_get_query(const char * s,int * err)2135 static char *odbc_get_query (const char *s, int *err)
2136 {
2137 char *query = NULL;
2138 const char *p;
2139
2140 if (*s == '"') {
2141 query = gretl_quoted_string_strdup(s, NULL);
2142 } else {
2143 p = get_string_by_name(s);
2144 if (p != NULL) {
2145 query = gretl_strdup(p);
2146 } else {
2147 query = gretl_strdup(s);
2148 }
2149 }
2150
2151 if (query == NULL) {
2152 *err = E_ALLOC;
2153 } else if (*query == '\0') {
2154 gretl_errmsg_set(_("Expected an SQL query string"));
2155 *err = E_PARSE;
2156 }
2157
2158 return query;
2159 }
2160
2161 /* Grab the series name(s) out of an ODBC "data" command. If the SQL
2162 query is marked by "query=" (which was not required in the original
2163 gretl ODBC setup) we're able to get multiple series names,
2164 otherwise we're restricted to one.
2165 */
2166
odbc_get_varnames(const char ** line,int * err)2167 static char **odbc_get_varnames (const char **line, int *err)
2168 {
2169 char **vnames = NULL;
2170 char vname[VNAMELEN];
2171 const char *s = *line;
2172 int len, loop_ok = 0, nv = 0;
2173
2174 if (strstr(s, "query=")) {
2175 /* we know where the SQL query starts */
2176 loop_ok = 1;
2177 }
2178
2179 while (!*err) {
2180 *vname = '\0';
2181 *err = extract_varname(vname, s, &len);
2182
2183 if (!*err && len == 0) {
2184 gretl_errmsg_set(_("Expected a valid variable name"));
2185 *err = E_PARSE;
2186 }
2187
2188 if (!*err) {
2189 *err = check_varname(vname);
2190 }
2191
2192 if (!*err) {
2193 *err = strings_array_add(&vnames, &nv, vname);
2194 }
2195
2196 if (!*err) {
2197 s += len;
2198 s += strspn(s, " ");
2199 }
2200
2201 if (!loop_ok || *s == '\0' || !strncmp(s, "obs-", 4) ||
2202 !strncmp(s, "query=", 6)) {
2203 /* got to the end of the varnames section */
2204 break;
2205 }
2206 }
2207
2208 if (*err) {
2209 strings_array_free(vnames, nv);
2210 vnames = NULL;
2211 } else {
2212 gretl_odinfo.nvars = nv;
2213 }
2214
2215 *line = s;
2216
2217 return vnames;
2218 }
2219
s_tab_get(int i,int t,series_table * stl,series_table * str)2220 static double s_tab_get (int i, int t, series_table *stl, series_table *str)
2221 {
2222 const double *x = gretl_odinfo.X[i];
2223 const char *sr;
2224 double ret = NADBL;
2225
2226 /* get the string value for the imported obs */
2227 sr = series_table_get_string(str, x[t]);
2228 /* look up its index "on the left" */
2229 ret = series_table_get_value(stl, sr);
2230 if (na(ret)) {
2231 /* not found: so try adding it to the LHS table */
2232 series_table_add_string(stl, sr);
2233 ret = series_table_get_value(stl, sr);
2234 }
2235
2236 return ret;
2237 }
2238
m2q(int m)2239 static int m2q (int m)
2240 {
2241 if (m == 1) return 1;
2242 else if (m == 4) return 2;
2243 else if (m == 7) return 3;
2244 else if (m == 10) return 4;
2245 else return -1;
2246 }
2247
try_iso_8601(const char * s,DATASET * dset)2248 static int try_iso_8601 (const char *s, DATASET *dset)
2249 {
2250 int t = -1;
2251
2252 if (dataset_is_time_series(dset)) {
2253 char obsstr[OBSLEN] = {0};
2254 int y, m, d;
2255
2256 if (sscanf(s, "%d-%d-%d", &y, &m, &d) == 3) {
2257 if (dset->pd == 4 && d == 1) {
2258 sprintf(obsstr, "%04d:%d", y, m2q(m));
2259 } else if (dset->pd == 12 && d == 1) {
2260 sprintf(obsstr, "%04d:%02d", y, m);
2261 } else if (dset->pd == 1 && m == 1 && d == 1) {
2262 sprintf(obsstr, "%04d", y);
2263 }
2264 t = dateton(obsstr, dset);
2265 }
2266 }
2267
2268 return t;
2269 }
2270
odbc_transcribe_data(char ** vnames,DATASET * dset,int vmin,int newvars,gretlopt opt,PRN * prn)2271 static int odbc_transcribe_data (char **vnames, DATASET *dset,
2272 int vmin, int newvars,
2273 gretlopt opt, PRN *prn)
2274 {
2275 char label[MAXLABEL];
2276 int *gstlist = NULL;
2277 int *gstlnew = NULL;
2278 int nv = gretl_odinfo.nvars;
2279 int n = gretl_odinfo.nrows;
2280 int nrepl = nv - newvars;
2281 int simple_fill = (opt & OPT_F);
2282 int i, s, t, v;
2283 int spos = 1;
2284 int err = 0;
2285
2286 if (gretl_odinfo.gst != NULL) {
2287 gstlist = string_table_copy_list(gretl_odinfo.gst);
2288 gstlnew = gretl_list_new(gstlist[0]);
2289 gstlnew[0] = 0;
2290 }
2291
2292 for (i=0; i<nv && !err; i++) {
2293 series_table *str = NULL;
2294 series_table *stl = NULL;
2295 int vnew = 1; /* is this a new series? */
2296 int obs_used = 0;
2297
2298 if (nrepl > 0) {
2299 /* we're replacing some series */
2300 v = current_series_index(dset, vnames[i]);
2301 } else {
2302 /* all the series are new */
2303 v = -1;
2304 }
2305
2306 if (v < 0) {
2307 /* a new series */
2308 v = vmin++;
2309 strcpy(dset->varname[v], vnames[i]);
2310 sprintf(label, "ODBC series %d", i + 1);
2311 series_set_label(dset, v, label);
2312 } else {
2313 /* an existing series */
2314 vnew = 0;
2315 stl = series_get_string_table(dset, v);
2316 }
2317
2318 if (in_gretl_list(gstlist, i+1)) {
2319 /* the imported data are string-valued */
2320 if (vnew) {
2321 gstlnew[spos++] = v;
2322 gstlnew[0] += 1;
2323 } else if (stl == NULL) {
2324 gretl_errmsg_sprintf("%s: can't mix numeric and string data",
2325 dset->varname[v]);
2326 err = E_TYPES;
2327 } else {
2328 str = gretl_string_table_detach_col(gretl_odinfo.gst, i+1);
2329 }
2330 if (!err && gretl_messages_on()) {
2331 pprintf(prn, "%s: string-valued\n", dset->varname[v]);
2332 }
2333 } else if (stl != NULL) {
2334 /* string-valued in dataset, numeric data from ODBC */
2335 gretl_errmsg_sprintf("%s: can't mix numeric and string data",
2336 dset->varname[v]);
2337 err = E_TYPES;
2338 }
2339
2340 if (err) {
2341 break;
2342 }
2343
2344 if (gretl_odinfo.S != NULL) {
2345 /* got obs identifiers via ODBC */
2346 if (vnew) {
2347 for (t=0; t<dset->n; t++) {
2348 dset->Z[v][t] = NADBL;
2349 }
2350 }
2351 for (s=0; s<n; s++) {
2352 t = dateton(gretl_odinfo.S[s], dset);
2353 if (t < 0) {
2354 t = try_iso_8601(gretl_odinfo.S[s], dset);
2355 }
2356 if (t >= dset->t1 && t <= dset->t2) {
2357 if (str != NULL) {
2358 dset->Z[v][t] = s_tab_get(i, s, stl, str);
2359 } else {
2360 dset->Z[v][t] = gretl_odinfo.X[i][s];
2361 }
2362 obs_used++;
2363 } else {
2364 fprintf(stderr, "Rejecting obs '%s'\n", gretl_odinfo.S[s]);
2365 }
2366 }
2367 } else {
2368 /* no obs identifiers via ODBC */
2369 int ns = dset->t2 - dset->t1 + 1;
2370
2371 if (n == ns || simple_fill) {
2372 s = 0;
2373 } else if (n == dset->n) {
2374 s = dset->t1;
2375 } else {
2376 gretl_errmsg_sprintf("%s: don't know how to align the data!",
2377 dset->varname[v]);
2378 err = E_DATA;
2379 }
2380 for (t=0; t<dset->n && !err; t++) {
2381 if (t >= dset->t1 && t <= dset->t2 && s < n) {
2382 if (str != NULL) {
2383 dset->Z[v][t] = s_tab_get(i, s++, stl, str);
2384 } else {
2385 dset->Z[v][t] = gretl_odinfo.X[i][s++];
2386 }
2387 obs_used++;
2388 } else if (vnew) {
2389 dset->Z[v][t] = NADBL;
2390 }
2391 }
2392 }
2393
2394 if (str != NULL) {
2395 series_table_destroy(str);
2396 }
2397
2398 if (!err && vnew && obs_used == 0) {
2399 gretl_warnmsg_sprintf(_("ODBC import: '%s': no valid observations in sample range"),
2400 vnames[i]);
2401 }
2402 }
2403
2404 if (err) {
2405 dataset_drop_last_variables(dset, newvars);
2406 if (gretl_odinfo.gst != NULL) {
2407 gretl_string_table_destroy(gretl_odinfo.gst);
2408 gretl_odinfo.gst = NULL;
2409 }
2410 } else if (gretl_odinfo.gst != NULL) {
2411 if (gstlnew[0] == 0) {
2412 /* no series tables to transfer */
2413 gretl_string_table_destroy(gretl_odinfo.gst);
2414 gretl_odinfo.gst = NULL;
2415 } else {
2416 string_table_replace_list(gretl_odinfo.gst, gstlnew);
2417 gstlnew = NULL; /* donated to table */
2418 gretl_string_table_save(gretl_odinfo.gst, dset);
2419 }
2420 }
2421
2422 free(gstlist);
2423 free(gstlnew);
2424
2425 return err;
2426 }
2427
odbc_count_new_vars(char ** vnames,int nv,const DATASET * dset)2428 static int odbc_count_new_vars (char **vnames, int nv,
2429 const DATASET *dset)
2430 {
2431 int newv = nv;
2432
2433 if (dset->v > 0) {
2434 int i;
2435
2436 for (i=0; i<nv; i++) {
2437 if (current_series_index(dset, vnames[i]) > 0) {
2438 newv--;
2439 }
2440 }
2441 }
2442
2443 return newv;
2444 }
2445
2446 /* data series [obs-format=format-string] [query=]query-string */
2447
odbc_get_series(const char * line,DATASET * dset,gretlopt opt,PRN * prn)2448 static int odbc_get_series (const char *line, DATASET *dset,
2449 gretlopt opt, PRN *prn)
2450 {
2451 int (*get_data) (ODBC_info *, gretlopt, PRN *);
2452 char **vnames = NULL;
2453 char *format = NULL;
2454 int err = 0;
2455
2456 if (gretl_odinfo.dsn == NULL) {
2457 gretl_errmsg_set(_("No database has been opened"));
2458 return 1;
2459 } else if (dset->n == 0) {
2460 return E_NODATA;
2461 }
2462
2463 /* get "series" field */
2464 vnames = odbc_get_varnames(&line, &err);
2465 if (err) {
2466 return err;
2467 }
2468
2469 /* optional "obs-format" field */
2470 if (!strncmp(line, "obs-format=", 11)) {
2471 line += 11;
2472 format = gretl_quoted_string_strdup(line, (const char **) &line);
2473 if (format == NULL) {
2474 err = E_PARSE;
2475 } else {
2476 err = parse_odbc_format(format);
2477 }
2478 }
2479
2480 /* now the query to pass to the database */
2481 if (!err) {
2482 line += strspn(line, " ");
2483 if (!strncmp(line, "query=", 6)) {
2484 line += 6;
2485 }
2486 gretl_odinfo.query = odbc_get_query(line, &err);
2487 }
2488
2489 if (!err) {
2490 if (opt & OPT_V) {
2491 pprintf(prn, "SQL query: '%s'\n", gretl_odinfo.query);
2492 }
2493 gretl_error_clear();
2494
2495 get_data = get_plugin_function("gretl_odbc_get_data");
2496
2497 if (get_data == NULL) {
2498 err = 1;
2499 } else {
2500 err = (*get_data) (&gretl_odinfo, opt, prn);
2501 }
2502 }
2503
2504 if (!err) {
2505 int n = gretl_odinfo.nrows;
2506 int nv = gretl_odinfo.nvars;
2507 int newvars, vmin = 1;
2508
2509 if (gretl_messages_on()) {
2510 pprintf(prn, "Retrieved %d observations on %d series via ODBC\n",
2511 n, nv);
2512 }
2513
2514 if (dset->v == 0) {
2515 /* the data array is still empty */
2516 newvars = nv;
2517 dset->v = 1 + nv;
2518 err = start_new_Z(dset, 0);
2519 } else {
2520 newvars = odbc_count_new_vars(vnames, nv, dset);
2521 vmin = dset->v;
2522 if (newvars > 0) {
2523 err = dataset_add_series(dset, newvars);
2524 }
2525 }
2526
2527 if (!err) {
2528 err = odbc_transcribe_data(vnames, dset, vmin, newvars, opt, prn);
2529 }
2530 }
2531
2532 strings_array_free(vnames, gretl_odinfo.nvars);
2533 ODBC_info_clear_read();
2534
2535 return err;
2536 }
2537
2538 /* dbnomics function in separate file */
2539
2540 #include "dbnread.c"
2541
2542 /* called from loop in db_get_series() */
2543
get_one_db_series(const char * sername,const char * altname,DATASET * dset,CompactMethod cmethod,const char * idxname,PRN * prn)2544 static int get_one_db_series (const char *sername,
2545 const char *altname,
2546 DATASET *dset,
2547 CompactMethod cmethod,
2548 const char *idxname,
2549 PRN *prn)
2550 {
2551 CompactMethod this_method = cmethod;
2552 const char *impname;
2553 SERIESINFO sinfo; /* sinfo declared */
2554 double **dbZ;
2555 int v, err = 0;
2556
2557 series_info_init(&sinfo);
2558
2559 /* are we using a specified name for importation? */
2560 impname = (*altname == '\0')? sername : altname;
2561
2562 /* see if the series is already in the dataset */
2563 v = series_index(dset, impname);
2564 if (v < dset->v && cmethod == COMPACT_NONE) {
2565 this_method = series_get_compact_method(dset, v);
2566 }
2567
2568 #if DB_DEBUG
2569 fprintf(stderr, "get_one_db_series: dset->v=%d, v=%d, name='%s'\n",
2570 dset->v, v, impname);
2571 fprintf(stderr, "this_var_method = %d\n", this_method);
2572 #endif
2573
2574 /* find the series information in the database */
2575 if (saved_db_type == GRETL_DBNOMICS) {
2576 err = get_dbnomics_series_info(sername, &sinfo);
2577 } else if (saved_db_type == GRETL_RATS_DB) {
2578 err = get_rats_series_info(sername, &sinfo);
2579 } else if (saved_db_type == GRETL_PCGIVE_DB) {
2580 err = get_pcgive_series_info(sername, &sinfo);
2581 } else {
2582 err = get_native_series_info(sername, &sinfo, idxname);
2583 }
2584
2585 if (err) {
2586 fprintf(stderr, "get_one_db_series: failed to get series info\n");
2587 return err;
2588 }
2589
2590 /* temporary data array */
2591 dbZ = new_dbZ(sinfo.nobs);
2592 if (dbZ == NULL) {
2593 gretl_errmsg_set(_("Out of memory!"));
2594 return E_ALLOC;
2595 }
2596
2597 #if DB_DEBUG
2598 fprintf(stderr, "get_one_db_series: offset=%d, nobs=%d\n",
2599 sinfo.offset, sinfo.nobs);
2600 #endif
2601
2602 if (saved_db_type == GRETL_DBNOMICS) {
2603 err = get_dbnomics_data(saved_db_name, &sinfo, dbZ);
2604 } else if (saved_db_type == GRETL_RATS_DB) {
2605 err = get_rats_db_data(saved_db_name, &sinfo, dbZ);
2606 } else if (saved_db_type == GRETL_PCGIVE_DB) {
2607 err = get_pcgive_db_data(saved_db_name, &sinfo, dbZ);
2608 #ifdef USE_CURL
2609 } else if (saved_db_type == GRETL_NATIVE_DB_WWW) {
2610 err = get_remote_db_data(saved_db_name, &sinfo, dbZ);
2611 #endif
2612 } else {
2613 err = get_native_db_data(saved_db_name, &sinfo, dbZ);
2614 }
2615
2616 #if DB_DEBUG
2617 fprintf(stderr, "get_one_db_series: get_db_data gave %d\n", err);
2618 #endif
2619
2620 if (err == DB_MISSING_DATA) {
2621 fprintf(stderr, "There were missing data\n");
2622 err = 0;
2623 }
2624
2625 #if DB_DEBUG
2626 fprintf(stderr, "sinfo.varname='%s', this_method=%d\n",
2627 sinfo.varname, this_method);
2628 #endif
2629
2630 if (!err) {
2631 if (*altname != '\0') {
2632 /* switch the recorded name now */
2633 strcpy(sinfo.varname, altname);
2634 }
2635 if (this_method == COMPACT_SPREAD) {
2636 err = lib_spread_db_data(dbZ, &sinfo, dset, prn);
2637 } else {
2638 err = lib_add_db_data(dbZ, &sinfo, dset, this_method,
2639 v, prn);
2640 }
2641 }
2642
2643 series_info_clear(&sinfo);
2644 free_dbZ(dbZ);
2645
2646 return err;
2647 }
2648
is_glob(const char * s)2649 static int is_glob (const char *s)
2650 {
2651 return strchr(s, '*') || strchr(s, '?');
2652 }
2653
process_import_name_option(char * vname)2654 static int process_import_name_option (char *vname)
2655 {
2656 const char *s = get_optval_string(DATA, OPT_N);
2657 int err = 0;
2658
2659 if (s == NULL) {
2660 err = E_DATA;
2661 } else {
2662 err = check_varname(s);
2663 }
2664
2665 if (!err) {
2666 strcpy(vname, s);
2667 }
2668
2669 return err;
2670 }
2671
2672 /* main function for getting one or more series out of a
2673 database (including ODBC) via command-line/script
2674 */
2675
db_get_series(const char * line,DATASET * dset,gretlopt opt,PRN * prn)2676 int db_get_series (const char *line, DATASET *dset,
2677 gretlopt opt, PRN *prn)
2678 {
2679 char altname[VNAMELEN] = {0};
2680 char **vnames = NULL;
2681 char *idxname = NULL;
2682 CompactMethod cmethod;
2683 int i, nnames = 0;
2684 int from_scratch = 0;
2685 int err = 0;
2686
2687 if (opt & OPT_O) {
2688 return odbc_get_series(line, dset, opt, prn);
2689 }
2690
2691 if (opt & OPT_N) {
2692 /* --name=whatever */
2693 err = process_import_name_option(altname);
2694 if (err) {
2695 return err;
2696 }
2697 }
2698
2699 #if DB_DEBUG
2700 fprintf(stderr, "db_get_series: line='%s', dset=%p\n",
2701 line, (void *) dset);
2702 fprintf(stderr, "db_name = '%s'\n", saved_db_name);
2703 #endif
2704
2705 if (*saved_db_name == '\0') {
2706 gretl_errmsg_set(_("No database has been opened"));
2707 return 1;
2708 }
2709
2710 from_scratch = (dset->n == 0);
2711
2712 if (opt & OPT_C) {
2713 /* new-style: compaction method supplied as option */
2714 cmethod = compact_method_from_option(&err);
2715 } else {
2716 /* legacy */
2717 line = get_compact_method_and_advance(line, &cmethod);
2718 }
2719
2720 if (!err) {
2721 if (string_is_blank(line)) {
2722 err = E_DATA;
2723 } else {
2724 /* get the variable names on the line */
2725 vnames = gretl_string_split(line, &nnames, NULL);
2726 if (vnames == NULL) {
2727 err = E_ALLOC;
2728 }
2729 }
2730 }
2731
2732 if (!err && nnames > 1 && *altname != '\0') {
2733 /* altname only works for a single series? */
2734 err = E_BADOPT;
2735 }
2736
2737 if (!err) {
2738 if (saved_db_type == GRETL_NATIVE_DB) {
2739 idxname = native_db_index_name();
2740 } else if (saved_db_type == GRETL_NATIVE_DB_WWW) {
2741 #ifdef USE_CURL
2742 idxname = g_strdup_printf("%sdbtmp.idx", gretl_dotdir());
2743 err = remote_db_index_to_file(idxname);
2744 #endif
2745 }
2746 }
2747
2748 /* now process the imports individually */
2749
2750 for (i=0; i<nnames && !err; i++) {
2751 if (is_glob(vnames[i])) {
2752 /* globbing works only for native databases */
2753 if (*altname != '\0') {
2754 /* can't do it */
2755 err = E_BADOPT;
2756 } else if (saved_db_type == GRETL_NATIVE_DB ||
2757 saved_db_type == GRETL_NATIVE_DB_WWW) {
2758 char **tmp;
2759 int j, nmatch;
2760
2761 tmp = native_db_match_series(vnames[i], &nmatch,
2762 idxname, &err);
2763 for (j=0; j<nmatch && !err; j++) {
2764 err = get_one_db_series(tmp[j], altname, dset,
2765 cmethod, idxname, prn);
2766 }
2767 strings_array_free(tmp, nmatch);
2768 } else {
2769 err = E_INVARG;
2770 }
2771 } else {
2772 err = get_one_db_series(vnames[i], altname, dset,
2773 cmethod, idxname, prn);
2774 }
2775 }
2776
2777 strings_array_free(vnames, nnames);
2778
2779 if (!err && !(opt & OPT_Q) && gretl_messages_on()) {
2780 pprintf(prn, _("Series imported OK"));
2781 pputc(prn, '\n');
2782 if (from_scratch) {
2783 print_smpl(dset, 0, OPT_NONE, prn);
2784 }
2785 }
2786
2787 if (idxname != NULL) {
2788 if (saved_db_type == GRETL_NATIVE_DB_WWW) {
2789 /* this file is a temporary download */
2790 gretl_remove(idxname);
2791 }
2792 free(idxname);
2793 }
2794
2795 return err;
2796 }
2797
tempfile_open(char * fname,int * err)2798 static FILE *tempfile_open (char *fname, int *err)
2799 {
2800 FILE *fp;
2801
2802 strcat(fname, ".XXXXXX");
2803 fp = gretl_mktemp(fname, "w+");
2804 if (fp == NULL) {
2805 *err = E_FOPEN;
2806 }
2807
2808 return fp;
2809 }
2810
maybe_fclose(FILE * fp)2811 static void maybe_fclose (FILE *fp)
2812 {
2813 if (fp != NULL) {
2814 fclose(fp);
2815 }
2816 }
2817
2818 #define DBUFLEN 1024
2819
db_delete_series(const char * line,const int * list,const char * fname,PRN * prn)2820 static int db_delete_series (const char *line, const int *list,
2821 const char *fname, PRN *prn)
2822 {
2823 dbnumber buf[DBUFLEN];
2824 char src1[FILENAME_MAX];
2825 char src2[FILENAME_MAX];
2826 char tmp1[FILENAME_MAX];
2827 char tmp2[FILENAME_MAX];
2828 char series[VNAMELEN];
2829 char *p, s[512];
2830 char **snames = NULL;
2831 FILE *fidx = NULL, *fbin = NULL;
2832 FILE *f1 = NULL, *f2 = NULL;
2833 int i, j, k, print, n, ns;
2834 int ndel = 0;
2835 int err = 0;
2836
2837 if (fname == NULL) {
2838 if (*saved_db_name == '\0') {
2839 gretl_errmsg_set(_("No database has been opened"));
2840 err = 1;
2841 } else if (saved_db_type != GRETL_NATIVE_DB) {
2842 gretl_errmsg_set("This only works for gretl databases");
2843 err = 1;
2844 } else {
2845 err = open_native_db_files(saved_db_name, &fidx, src1, &fbin, src2);
2846 }
2847 } else {
2848 err = open_native_db_files(fname, &fidx, src1, &fbin, src2);
2849 }
2850
2851 if (err) {
2852 return err;
2853 }
2854
2855 strcpy(tmp1, gretl_dotdir());
2856 strcat(tmp1, "tmpidx");
2857 f1 = tempfile_open(tmp1, &err);
2858 if (err) {
2859 goto bailout;
2860 }
2861
2862 strcpy(tmp2, gretl_dotdir());
2863 strcat(tmp2, "tmpbin");
2864 f2 = tempfile_open(tmp2, &err);
2865 if (err) {
2866 goto bailout;
2867 }
2868
2869 if (line != NULL) {
2870 /* extract the variable names given on the line */
2871 ns = 0;
2872 while ((line = get_word_and_advance(line, series, VNAMELEN-1))
2873 && !err) {
2874 err = strings_array_add(&snames, &ns, series);
2875 }
2876 if (!err && ns == 0) {
2877 fprintf(stderr, "Found no series names\n");
2878 err = E_PARSE;
2879 }
2880 }
2881
2882 print = k = 1;
2883 i = j = 0;
2884
2885 while (fgets(s, sizeof s, fidx) && !err) {
2886 if (i == 0) {
2887 /* always reprint the header */
2888 fputs(s, f1);
2889 i++;
2890 continue;
2891 }
2892
2893 if (i % 2 != 0) {
2894 /* odd lines contain varnames */
2895 print = 1;
2896 if (snames != NULL) {
2897 sscanf(s, "%s", series);
2898 for (j=0; j<ns; j++) {
2899 if (!strcmp(series, snames[j])) {
2900 print = 0;
2901 ndel++;
2902 break;
2903 }
2904 }
2905 } else {
2906 if (k <= list[0] && list[k] == j) {
2907 k++;
2908 print = 0;
2909 ndel++;
2910 }
2911 j++;
2912 }
2913 if (print) {
2914 fputs(s, f1);
2915 }
2916 } else {
2917 /* even lines have obs information */
2918 p = strstr(s, "n = ");
2919 if (p != NULL) {
2920 sscanf(p + 4, "%d", &n);
2921 } else {
2922 err = E_DATA;
2923 fprintf(stderr, "couldn't find obs for series\n");
2924 }
2925
2926 if (!print) {
2927 fseek(fbin, n * sizeof(dbnumber), SEEK_CUR);
2928 } else {
2929 int get, got, rem = n;
2930
2931 fputs(s, f1);
2932
2933 while (rem > 0 && !err) {
2934 get = (rem > DBUFLEN)? DBUFLEN : rem;
2935 got = fread(buf, sizeof(dbnumber), get, fbin);
2936 if (got != get) {
2937 fprintf(stderr, "error reading binary data\n");
2938 err = E_DATA;
2939 } else {
2940 fwrite(buf, sizeof(dbnumber), got, f2);
2941 rem -= got;
2942 }
2943 }
2944 }
2945 }
2946 i++;
2947 }
2948
2949 if (snames != NULL) {
2950 strings_array_free(snames, ns);
2951 }
2952
2953 bailout:
2954
2955 maybe_fclose(fidx);
2956 maybe_fclose(fbin);
2957 maybe_fclose(f1);
2958 maybe_fclose(f2);
2959
2960 if (!err && ndel > 0) {
2961 err = gretl_rename(tmp1, src1);
2962 if (!err) {
2963 err = gretl_rename(tmp2, src2);
2964 }
2965 } else {
2966 gretl_remove(tmp1);
2967 gretl_remove(tmp2);
2968 }
2969
2970 if (!err && prn != NULL) {
2971 pprintf(prn, "Deleted %d series from %s\n", ndel, src2);
2972 }
2973
2974 return err;
2975 }
2976
db_delete_series_by_name(const char * line,PRN * prn)2977 int db_delete_series_by_name (const char *line, PRN *prn)
2978 {
2979 return db_delete_series(line, NULL, NULL, prn);
2980 }
2981
db_delete_series_by_number(const int * list,const char * fname)2982 int db_delete_series_by_number (const int *list, const char *fname)
2983 {
2984 return db_delete_series(NULL, list, fname, NULL);
2985 }
2986
obs_to_ymd(const char * obs,int pd,int * y,int * m,int * d)2987 static void obs_to_ymd (const char *obs, int pd, int *y, int *m, int *d)
2988 {
2989 *y = atoi(obs);
2990 *d = 1;
2991
2992 if (pd == 12) {
2993 *m = atoi(obs + 5);
2994 } else if (pd == 4) {
2995 int q = atoi(obs + 5);
2996
2997 *m = q * 3 - 2;
2998 } else {
2999 *m = 1;
3000 }
3001 }
3002
db_range_check(int db_pd,const char * db_stobs,const char * db_endobs,const char * varname,DATASET * dset)3003 int db_range_check (int db_pd,
3004 const char *db_stobs,
3005 const char *db_endobs,
3006 const char *varname,
3007 DATASET *dset)
3008 {
3009 double sd0_orig, sdn_orig, sd0, sdn;
3010 int err = 0;
3011
3012 sd0 = get_date_x(db_pd, db_stobs);
3013 sdn = get_date_x(db_pd, db_endobs);
3014
3015 if (db_pd >= 5 && db_pd <= 7 && !dated_daily_data(dset)) {
3016 /* convert 'orig' info to daily dates */
3017 int y, m, d;
3018
3019 obs_to_ymd(dset->stobs, dset->pd, &y, &m, &d);
3020 sd0_orig = epoch_day_from_ymd(y, m, d);
3021 obs_to_ymd(dset->endobs, dset->pd, &y, &m, &d);
3022 sdn_orig = epoch_day_from_ymd(y, m, d);
3023 } else {
3024 sd0_orig = dset->sd0;
3025 sdn_orig = get_date_x(dset->pd, dset->endobs);
3026 }
3027
3028 if (sd0 > sdn_orig || sdn < sd0_orig) {
3029 gretl_errmsg_sprintf(_("%s: observation range does not overlap\n"
3030 "with the working data set"),
3031 varname);
3032 err = 1;
3033 }
3034
3035 return err;
3036 }
3037
check_db_import_conversion(int pd,DATASET * dset)3038 int check_db_import_conversion (int pd, DATASET *dset)
3039 {
3040 int target = dset->pd;
3041 int err = 0;
3042
3043 if (pd == target) {
3044 ; /* no conversion needed */
3045 } else if (pd == 1 && target == 4) {
3046 ; /* annual to quarterly expansion */
3047 } else if (pd == 1 && target == 12) {
3048 ; /* annual to monthly expansion */
3049 } else if (pd == 4 && target == 12) {
3050 ; /* quarterly to monthly expansion */
3051 } else if (pd == 12 && target == 1) {
3052 ; /* monthly to annual compaction */
3053 } else if (pd == 4 && target == 1) {
3054 ; /* quarterly to annual compaction */
3055 } else if (pd == 12 && target == 4) {
3056 ; /* monthly to quarterly compaction */
3057 } else {
3058 fprintf(stderr, "db import fail: pd = %d, target %d\n", pd, target);
3059 err = E_DATA;
3060 }
3061
3062 return err;
3063 }
3064
check_db_import_full(int pd,const char * stobs,const char * endobs,const char * varname,DATASET * dset)3065 static int check_db_import_full (int pd,
3066 const char *stobs,
3067 const char *endobs,
3068 const char *varname,
3069 DATASET *dset)
3070 {
3071 int err = check_db_import_conversion(pd, dset);
3072
3073 if (err) {
3074 gretl_errmsg_sprintf(_("%s: can't handle conversion"),
3075 varname);
3076 } else {
3077 err = db_range_check(pd, stobs, endobs, varname, dset);
3078 }
3079
3080 #if DB_DEBUG
3081 if (err) {
3082 fprintf(stderr, "check_db_import_full: err = %d\n", err);
3083 fprintf(stderr, "(dset->n = %d)\n", dset->n);
3084 }
3085 #endif
3086
3087 return err;
3088 }
3089
3090 /* We'll do "spread" compaction for monthly to quarterly or annual,
3091 quarterly to annual, or daily to monthly or quarterly. Other
3092 cases are rejected.
3093 */
3094
compact_spread_pd_check(int high,int low)3095 static int compact_spread_pd_check (int high, int low)
3096 {
3097 if ((low == 12 || low == 4) &&
3098 (high == 5 || high == 6 || high == 7)) {
3099 /* daily to monthly or quarterly */
3100 return 0;
3101 }
3102
3103 if (!(high == 12 && low == 1) &&
3104 !(high == 12 && low == 4) &&
3105 !(high == 4 && low == 1)) {
3106 gretl_errmsg_set("Unsupported conversion");
3107 return E_DATA;
3108 }
3109
3110 return 0;
3111 }
3112
3113 static void
init_datainfo_from_sinfo(DATASET * dset,SERIESINFO * sinfo)3114 init_datainfo_from_sinfo (DATASET *dset, SERIESINFO *sinfo)
3115 {
3116 dset->pd = sinfo->pd;
3117
3118 strcpy(dset->stobs, sinfo->stobs);
3119 strcpy(dset->endobs, sinfo->endobs);
3120 colonize_obs(dset->stobs);
3121 colonize_obs(dset->endobs);
3122
3123 dset->sd0 = get_date_x(dset->pd, dset->stobs);
3124 dset->n = sinfo->nobs;
3125 dset->v = 2;
3126
3127 dset->t1 = 0;
3128 dset->t2 = dset->n - 1;
3129 }
3130
3131 /* construct a little dataset as a temporary wrapper for an
3132 import using compact=spread
3133 */
3134
make_import_tmpset(const DATASET * dset,SERIESINFO * sinfo,double ** dbZ,int * err)3135 static DATASET *make_import_tmpset (const DATASET *dset,
3136 SERIESINFO *sinfo,
3137 double **dbZ,
3138 int *err)
3139 {
3140 DATASET *tmpset = NULL;
3141
3142 *err = compact_spread_pd_check(sinfo->pd, dset->pd);
3143 if (*err) {
3144 return NULL;
3145 }
3146
3147 tmpset = datainfo_new();
3148 if (tmpset == NULL) {
3149 *err = E_ALLOC;
3150 return NULL;
3151 }
3152
3153 tmpset->v = 2;
3154 tmpset->n = sinfo->nobs;
3155
3156 tmpset->Z = malloc(2 * sizeof *tmpset->Z);
3157 if (tmpset->Z == NULL) {
3158 *err = E_ALLOC;
3159 free(tmpset);
3160 return NULL;
3161 }
3162
3163 *err = dataset_allocate_varnames(tmpset);
3164 if (*err) {
3165 free(tmpset->Z[1]);
3166 free(tmpset->Z);
3167 free(tmpset);
3168 return NULL;
3169 }
3170
3171 tmpset->Z[0] = NULL;
3172 tmpset->Z[1] = dbZ[1];
3173 dbZ[1] = NULL; /* note: stolen! */
3174
3175 tmpset->t1 = sinfo->t1;
3176 tmpset->t2 = sinfo->t2;
3177 tmpset->pd = sinfo->pd;
3178 strcpy(tmpset->stobs, sinfo->stobs);
3179 strcpy(tmpset->endobs, sinfo->endobs);
3180 tmpset->structure = TIME_SERIES;
3181 tmpset->sd0 = get_date_x(tmpset->pd, tmpset->stobs);
3182
3183 strcpy(tmpset->varname[1], sinfo->varname);
3184
3185 #if 0
3186 PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, NULL);
3187 fprintf(stderr, "import_tmpset: t1=%d, t2=%d, nobs=%d, pd=%d, offset=%d\n",
3188 sinfo->t1, sinfo->t2, sinfo->nobs, sinfo->pd, sinfo->offset);
3189 printdata(NULL, NULL, tmpset, OPT_O, prn);
3190 gretl_print_destroy(prn);
3191 #endif
3192
3193 return tmpset;
3194 }
3195
3196 static int
real_transcribe_db_data(const char * stobs,int nobs,const DATASET * dset,int dbv,const double * xvec)3197 real_transcribe_db_data (const char *stobs, int nobs,
3198 const DATASET *dset, int dbv,
3199 const double *xvec)
3200 {
3201 int t, pad1, pad2;
3202 int start, stop;
3203 double x;
3204
3205 pad1 = dateton(stobs, dset);
3206 pad2 = dset->n - nobs - pad1;
3207
3208 if (pad1 > 0) {
3209 fprintf(stderr, "Padding at start, %d obs\n", pad1);
3210 for (t=0; t<pad1; t++) {
3211 dset->Z[dbv][t] = NADBL;
3212 }
3213 start = pad1;
3214 } else {
3215 start = 0;
3216 }
3217 if (pad2 > 0) {
3218 int n = dset->n;
3219
3220 fprintf(stderr, "Padding at end, %d obs\n", pad2);
3221 for (t=n-1; t>=n-1-pad2; t--) {
3222 dset->Z[dbv][t] = NADBL;
3223 }
3224 stop = n - pad2;
3225 } else {
3226 stop = dset->n;
3227 }
3228
3229 fprintf(stderr, "Filling in values from %d to %d\n", start, stop - 1);
3230 for (t=start; t<stop; t++) {
3231 x = xvec[t - pad1];
3232 dset->Z[dbv][t] = (x == DBNA)? NADBL : x;
3233 }
3234
3235 return 0;
3236 }
3237
transcribe_db_data(DATASET * dset,int targv,const double * src,int pd,int nobs,char * stobs,CompactMethod cmethod)3238 int transcribe_db_data (DATASET *dset, int targv,
3239 const double *src, int pd,
3240 int nobs, char *stobs,
3241 CompactMethod cmethod)
3242 {
3243 double *xvec = (double *) src;
3244 int free_xvec = 0;
3245
3246 if (pd != dset->pd) {
3247 if (pd < dset->pd) {
3248 /* the series needs to be expanded */
3249 xvec = expand_db_series(src, pd, &nobs, stobs, dset);
3250 } else {
3251 /* the series needs to be compacted */
3252 xvec = compact_db_series(src, pd, &nobs, stobs, dset->pd,
3253 cmethod);
3254 }
3255 if (xvec == NULL) {
3256 return E_ALLOC;
3257 }
3258 free_xvec = 1;
3259 }
3260
3261 real_transcribe_db_data(stobs, nobs, dset, targv, xvec);
3262
3263 if (free_xvec) {
3264 free(xvec);
3265 }
3266
3267 return 0;
3268 }
3269
3270 /* Processes a single db series in "spread" mode, meaning
3271 that multiple series are added to the target dataset,
3272 @dset. This variant is associated with gretl databases.
3273 */
3274
lib_spread_db_data(double ** dbZ,SERIESINFO * sinfo,DATASET * dset,PRN * prn)3275 int lib_spread_db_data (double **dbZ, SERIESINFO *sinfo,
3276 DATASET *dset, PRN *prn)
3277 {
3278 int err = 0;
3279
3280 if (dset == NULL || dset->v == 0) {
3281 gretl_errmsg_set("\"compact=spread\": requires a dataset in place");
3282 err = E_DATA;
3283 } else {
3284 DATASET *tmpset = make_import_tmpset(dset, sinfo, dbZ, &err);
3285
3286 if (!err) {
3287 err = do_compact_spread(tmpset, dset->pd);
3288 }
3289 if (!err) {
3290 err = merge_or_replace_data(dset, &tmpset, OPT_X | OPT_U, prn);
3291 }
3292 }
3293
3294 return err;
3295 }
3296
3297 /* Processes a single db series in "spread" mode, meaning
3298 that multiple series are added to the target dataset,
3299 @dset. This variant is associated with dbnomics import.
3300 */
3301
lib_spread_dbnomics_data(DATASET * dset,DATASET * dbset,PRN * prn)3302 int lib_spread_dbnomics_data (DATASET *dset, DATASET *dbset,
3303 PRN *prn)
3304 {
3305 int err = 0;
3306
3307 if (dset == NULL || dset->v == 0) {
3308 gretl_errmsg_set("\"compact=spread\": requires a dataset in place");
3309 err = E_DATA;
3310 } else {
3311 err = do_compact_spread(dbset, dset->pd);
3312 if (!err) {
3313 /* we add OPT_K ("keep") to prevent destruction of @dbset:
3314 we're bypassing get_merge_opts(), so we'd better know
3315 what we're doing!
3316 */
3317 gretlopt merge_opt = (OPT_X | OPT_U | OPT_K);
3318
3319 err = merge_or_replace_data(dset, &dbset, merge_opt, prn);
3320 }
3321 }
3322
3323 return err;
3324 }
3325
3326 /* Processes a single db series, adding it to @dset if
3327 possible (perhaps after compaction or expansion).
3328 */
3329
lib_add_db_data(double ** dbZ,SERIESINFO * sinfo,DATASET * dset,CompactMethod cmethod,int dbv,PRN * prn)3330 static int lib_add_db_data (double **dbZ, SERIESINFO *sinfo,
3331 DATASET *dset, CompactMethod cmethod,
3332 int dbv, PRN *prn)
3333 {
3334 int new = (dbv == dset->v);
3335 int err = 0;
3336
3337 if (sinfo == NULL && dbZ == NULL) {
3338 fprintf(stderr, "lib_add_db_data: broken call!\n");
3339 return E_DATA;
3340 }
3341
3342 if (cmethod == COMPACT_NONE) {
3343 /* impose default if need be */
3344 cmethod = COMPACT_AVG;
3345 }
3346
3347 if (dset->n == 0) {
3348 /* if the existing dataset is empty, initialize it
3349 using info from the database series
3350 */
3351 init_datainfo_from_sinfo(dset, sinfo);
3352 dset->v = 0; /* trigger for creating data array below */
3353 if (dset->pd != 1 || strcmp(dset->stobs, "1")) {
3354 dset->structure = TIME_SERIES;
3355 }
3356 } else {
3357 err = check_db_import_full(sinfo->pd, sinfo->stobs, sinfo->endobs,
3358 sinfo->varname, dset);
3359 if (err) {
3360 return err;
3361 }
3362 }
3363
3364 if (dset->v == 0) {
3365 /* the data array is still empty */
3366 dset->v = 2;
3367 dbv = 1;
3368 if (start_new_Z(dset, 0)) {
3369 return E_ALLOC;
3370 }
3371 } else if (new && dataset_add_series(dset, 1)) {
3372 return E_ALLOC;
3373 }
3374
3375 #if DB_DEBUG
3376 fprintf(stderr, "dset->Z=%p\n", (void *) dset->Z);
3377 fprintf(stderr, "dset->n = %d, dset->v = %d, dbv = %d\n",
3378 dset->n, dset->v, dbv);
3379 #endif
3380
3381 err = transcribe_db_data(dset, dbv, dbZ[1], sinfo->pd, sinfo->nobs,
3382 sinfo->stobs, cmethod);
3383
3384 if (!err) {
3385 /* common stuff for adding a var */
3386 strcpy(dset->varname[dbv], sinfo->varname);
3387 series_set_label(dset, dbv, sinfo->descrip);
3388 series_set_compact_method(dset, dbv, cmethod);
3389 if (sinfo->pd < dset->pd) {
3390 series_set_orig_pd(dset, dbv, sinfo->pd);
3391 }
3392 } else if (new) {
3393 /* we added a series that has not been filled */
3394 dataset_drop_last_variables(dset, 1);
3395 }
3396
3397 return err;
3398 }
3399
3400 /* compact an individual series, in the context of converting an
3401 entire working dataset to a lower frequency: used in all cases
3402 except conversion from daily to monthly
3403 */
3404
compact_series(const DATASET * dset,int i,int oldn,int startskip,int min_startskip,int compfac,CompactMethod method)3405 static double *compact_series (const DATASET *dset, int i, int oldn,
3406 int startskip, int min_startskip,
3407 int compfac, CompactMethod method)
3408 {
3409 const double *src = dset->Z[i];
3410 double *x;
3411 int lead = startskip - min_startskip;
3412 int to_weekly = (compfac >= 5 && compfac <= 7);
3413 int t, idx;
3414
3415 #if DB_DEBUG
3416 fprintf(stderr, "compact_series: startskip=%d, min_startskip=%d, compfac=%d "
3417 "lead=%d\n", startskip, min_startskip, compfac, lead);
3418 #endif
3419
3420 x = malloc(dset->n * sizeof *x);
3421 if (x == NULL) {
3422 return NULL;
3423 }
3424
3425 for (t=0; t<dset->n; t++) {
3426 x[t] = NADBL;
3427 }
3428
3429 idx = startskip;
3430
3431 for (t=lead; t<dset->n && idx<oldn; t++) {
3432 if (method == COMPACT_SOP) {
3433 if (to_weekly && na(src[idx]) && idx < oldn - 1) {
3434 /* allow one day's slack */
3435 x[t] = src[idx + 1];
3436 } else {
3437 x[t] = src[idx];
3438 }
3439 } else if (method == COMPACT_EOP) {
3440 if (to_weekly && na(src[idx]) && idx > startskip) {
3441 /* one day's slack */
3442 x[t] = src[idx - 1];
3443 } else {
3444 x[t] = src[idx];
3445 }
3446 } else if (method == COMPACT_SUM || method == COMPACT_AVG) {
3447 int j, st, den = compfac;
3448 int n_ok = 0;
3449
3450 if (idx + compfac - 1 > oldn - 1) {
3451 break;
3452 }
3453
3454 x[t] = 0.0;
3455
3456 for (j=0; j<compfac; j++) {
3457 st = idx + j;
3458 if (na(src[st])) {
3459 if (to_weekly) {
3460 den--;
3461 } else {
3462 x[t] = NADBL;
3463 break;
3464 }
3465 } else {
3466 /* got a valid observation */
3467 n_ok++;
3468 x[t] += src[st];
3469 }
3470 }
3471
3472 if (n_ok == 0) {
3473 x[t] = NADBL;
3474 }
3475
3476 if (method == COMPACT_AVG && !na(x[t])) {
3477 if (den > 0) {
3478 x[t] /= den;
3479 } else {
3480 x[t] = NADBL;
3481 }
3482 }
3483 }
3484 idx += compfac;
3485 }
3486
3487 return x;
3488 }
3489
3490 /* Determine year and period (either month or quarter,
3491 depending on the value of @pd) for observation @t in
3492 daily dataset @dset.
3493 */
3494
daily_yp(const DATASET * dset,int t,int pd,int * y,int * p)3495 static int daily_yp (const DATASET *dset, int t,
3496 int pd, int *y, int *p)
3497 {
3498 char obs[12];
3499 int mon, day;
3500
3501 ntolabel(obs, t, dset);
3502
3503 if (sscanf(obs, YMD_READ_FMT, y, &mon, &day) != 3) {
3504 return E_DATA;
3505 }
3506
3507 if (pd == 12) {
3508 *p = mon;
3509 } else {
3510 /* convert month to quarter */
3511 *p = 1 + (mon - 1) / 3;
3512 }
3513
3514 return 0;
3515 }
3516
3517 #define DAYDBG 0
3518
3519 /* For a single row, @cset_t, of a compacted dataset,
3520 write daily values into the set of monthly or
3521 quarterly series that will represent them. The
3522 daily data are drawn from @dset and transcribed to
3523 @cset.
3524 */
3525
fill_cset_t(const DATASET * dset,int * startday,DATASET * cset,int cset_t,int compfac,int qmonth)3526 static void fill_cset_t (const DATASET *dset,
3527 int *startday,
3528 DATASET *cset,
3529 int cset_t,
3530 int compfac,
3531 int qmonth)
3532 {
3533 char obs[OBSLEN];
3534 double cvec[30];
3535 int idx[30];
3536 const double *z;
3537 int y, p, pstart = 0;
3538 int effn, ndays = 0;
3539 int i, j, k, s, t, t0;
3540 double zsum = 0.0;
3541
3542 t0 = *startday;
3543 y = p = 0;
3544
3545 /* how many daily obs do we have in this month? */
3546 for (t=t0; t<dset->n; t++) {
3547 daily_yp(dset, t, 12, &y, &p);
3548 if (t == t0) {
3549 pstart = p;
3550 } else if (p != pstart) {
3551 break;
3552 }
3553 ndays++;
3554 }
3555
3556 #if 0
3557 fprintf(stderr, "fill_cset_t: ndays = %d, compfac = %d\n",
3558 ndays, compfac);
3559 #endif
3560
3561 /* construct array of month-day indices */
3562 for (j=0; j<compfac && j<ndays; j++) {
3563 ntolabel(obs, t0 + j, dset);
3564 idx[j] = date_to_daily_index(obs, dset->pd);
3565 }
3566
3567 /* the outer loop is over the daily series in the
3568 source dataset */
3569
3570 k = 1 + qmonth * compfac;
3571
3572 for (i=1; i<dset->v; i++) {
3573 z = dset->Z[i] + t0;
3574 for (j=0; j<compfac; j++) {
3575 cvec[j] = NADBL;
3576 }
3577 effn = 0;
3578 zsum = 0.0;
3579 for (j=0; j<compfac && j<ndays; j++) {
3580 s = idx[j];
3581 cvec[s] = z[j];
3582 if (!na(cvec[s])) {
3583 zsum += cvec[s];
3584 effn++;
3585 }
3586 }
3587 if (effn < compfac) {
3588 /* we have some padding to do */
3589 double zbar = zsum / effn;
3590
3591 for (j=0; j<compfac; j++) {
3592 if (na(cvec[j])) {
3593 cvec[j] = zbar;
3594 }
3595 }
3596 }
3597 /* transcribe into target dataset */
3598 for (j=0; j<compfac; j++) {
3599 cset->Z[k+j][cset_t] = cvec[j];
3600 }
3601
3602 k += compfac;
3603 }
3604
3605 *startday += ndays;
3606 }
3607
3608 #define SPREAD_DEBUG 0
3609
3610 /* compact daily data to monthly or quarterly using the
3611 "spread" method */
3612
compact_daily_spread(const DATASET * dset,int newpd,int * nv,int * err)3613 static DATASET *compact_daily_spread (const DATASET *dset,
3614 int newpd,
3615 int *nv,
3616 int *err)
3617 {
3618 const char *periods[] = {
3619 "month",
3620 "quarter"
3621 };
3622 const char *period;
3623 DATASET *cset = NULL;
3624 char label[MAXLABEL];
3625 int oldpd = dset->pd;
3626 int compfac;
3627 int v, i, j, k, t, T;
3628 int startyr, startper;
3629 int endyr, endper;
3630 int startday;
3631
3632 fprintf(stderr, "*** compact_daily_spread (newpd=%d) ***\n", newpd);
3633
3634 daily_yp(dset, 0, newpd, &startyr, &startper);
3635 daily_yp(dset, dset->n - 1, newpd, &endyr, &endper);
3636 compfac = midas_days_per_period(dset->pd, newpd);
3637
3638 if (newpd == 12) {
3639 period = periods[0];
3640 } else if (newpd == 4) {
3641 period = periods[1];
3642 } else {
3643 *err = E_DATA;
3644 return NULL;
3645 }
3646
3647 T = newpd * (endyr - startyr) + (endper - startper + 1);
3648
3649 if (T <= 1) {
3650 *err = E_DATA;
3651 return NULL;
3652 }
3653
3654 /* the number of series, after compaction */
3655 v = 1 + (dset->v - 1) * compfac;
3656
3657 #if SPREAD_DEBUG
3658 fprintf(stderr, "oldpd %d, newpd %d, nvars=%d, T=%d, start=%d:%d, end=%d:%d\n",
3659 dset->pd, newpd, v, T, startyr, startper, endyr, endper);
3660 #endif
3661
3662 cset = create_new_dataset(v, T, 0);
3663 if (cset == NULL) {
3664 *err = E_ALLOC;
3665 return NULL;
3666 }
3667
3668 if (newpd == 12) {
3669 sprintf(cset->stobs, "%d:%02d", startyr, startper);
3670 sprintf(cset->endobs, "%d:%02d", endyr, endper);
3671 } else {
3672 sprintf(cset->stobs, "%d:%d", startyr, startper);
3673 sprintf(cset->endobs, "%d:%d", endyr, endper);
3674 }
3675
3676 cset->pd = newpd;
3677 cset->structure = TIME_SERIES;
3678 cset->sd0 = get_date_x(cset->pd, cset->stobs);
3679
3680 /* ensure no uninitialized data */
3681 for (i=1; i<v; i++) {
3682 for (t=0; t<T; t++) {
3683 cset->Z[i][t] = NADBL;
3684 }
3685 }
3686
3687 /* do the actual data transcription first */
3688 startday = 0;
3689 for (t=0; t<T; t++) {
3690 if (newpd == 4) {
3691 fill_cset_t(dset, &startday, cset, t, compfac/3, 0);
3692 fill_cset_t(dset, &startday, cset, t, compfac/3, 1);
3693 fill_cset_t(dset, &startday, cset, t, compfac/3, 2);
3694 } else {
3695 fill_cset_t(dset, &startday, cset, t, compfac, 0);
3696 }
3697 }
3698
3699 /* then name the series and reorganize */
3700
3701 k = 1;
3702 for (i=1; i<dset->v; i++) {
3703 double *xtmp;
3704 char sfx[16];
3705 int p;
3706
3707 /* switch data order */
3708 for (j=0; j<compfac/2; j++) {
3709 p = k + compfac - j - 1;
3710 xtmp = cset->Z[k+j];
3711 cset->Z[k+j] = cset->Z[p];
3712 cset->Z[p] = xtmp;
3713 }
3714
3715 /* names and labels */
3716 for (j=0; j<compfac; j++) {
3717 strcpy(cset->varname[k+j], dset->varname[i]);
3718 gretl_trunc(cset->varname[k+j], VNAMELEN - 5);
3719 sprintf(sfx, "_d%02d", compfac - j);
3720 strcat(cset->varname[k+j], sfx);
3721 sprintf(label, "%s in day %d of %s", dset->varname[i],
3722 compfac - j, period);
3723 series_record_label(cset, k+j, label);
3724 series_set_midas_period(cset, k+j, compfac - j);
3725 series_set_midas_freq(cset, k+j, oldpd);
3726 if (j == 0) {
3727 series_set_midas_anchor(cset, k+j);
3728 }
3729 }
3730
3731 /* advance column write position for next source series */
3732 k += compfac;
3733 }
3734
3735 #if SPREAD_DEBUG > 1
3736 PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, NULL);
3737 printdata(NULL, NULL, cset, OPT_O, prn);
3738 gretl_print_destroy(prn);
3739 #endif
3740
3741 return cset;
3742 }
3743
3744 /* compact an entire dataset, transcribing from each higher-frequency
3745 series to a set of lower-frequency series, each of which holds the
3746 observations from a given sub-period
3747 */
3748
compact_data_spread(const DATASET * dset,int newpd,int startmaj,int startmin,int endmaj,int endmin,int * nv,int * err)3749 static DATASET *compact_data_spread (const DATASET *dset, int newpd,
3750 int startmaj, int startmin,
3751 int endmaj, int endmin,
3752 int *nv, int *err)
3753 {
3754 const char *subper[] = {
3755 "month",
3756 "quarter"
3757 };
3758 const char *period[] = {
3759 "year",
3760 "quarter"
3761 };
3762 const char *p0, *p1;
3763 DATASET *cset = NULL;
3764 char sfx[16];
3765 char label[MAXLABEL];
3766 int oldpd = dset->pd;
3767 int compfac = oldpd / newpd;
3768 int v, i, j, k, t, T;
3769 int q0 = 0, qT = 0;
3770
3771 /* calculate @T, the number of observations that the compacted
3772 dataset should comprise
3773 */
3774 if (newpd == 1) {
3775 T = endmaj - startmaj + 1;
3776 } else if (newpd == 4) {
3777 T = oldpd * (endmaj - startmaj + 1) / compfac;
3778 q0 = 1 + (startmin - 1) / 3;
3779 qT = 1 + (endmin - 1) / 3;
3780 T += qT - q0 - 3;
3781 } else {
3782 *err = E_DATA;
3783 return NULL;
3784 }
3785
3786 if (T <= 1) {
3787 *err = E_DATA;
3788 return NULL;
3789 }
3790
3791 /* calculate @v, the number of series after compaction */
3792 v = 1 + (dset->v - 1) * compfac;
3793
3794 #if SPREAD_DEBUG
3795 fprintf(stderr, "oldpd %d, newpd %d, v=%d, T=%d, start=%d:%d, end=%d:%d\n",
3796 oldpd, newpd, v, T, startmaj, startmin, endmaj, endmin);
3797 #endif
3798
3799 cset = create_new_dataset(v, T, 0);
3800 if (cset == NULL) {
3801 *err = E_ALLOC;
3802 return NULL;
3803 }
3804
3805 if (newpd == 1) {
3806 sprintf(cset->stobs, "%d", startmaj);
3807 sprintf(cset->endobs, "%d", endmaj);
3808 p1 = period[0];
3809 } else {
3810 /* newpd must be 4 */
3811 sprintf(cset->stobs, "%d:%d", startmaj, q0);
3812 sprintf(cset->endobs, "%d:%d", endmaj, qT);
3813 p1 = period[1];
3814 }
3815
3816 p0 = (oldpd == 12)? subper[0] : subper[1];
3817
3818 cset->pd = newpd;
3819 cset->structure = TIME_SERIES;
3820 cset->sd0 = get_date_x(cset->pd, cset->stobs);
3821
3822 #if SPREAD_DEBUG
3823 fprintf(stderr, "stobs '%s', endobs '%s', sd0=%g, q0=%d\n",
3824 cset->stobs, cset->endobs, cset->sd0, q0);
3825 #endif
3826
3827 k = 1; /* the first new series */
3828
3829 for (i=1; i<dset->v; i++) {
3830 /* loop across original data series */
3831 double *xtmp;
3832 int offset;
3833 int p, s = 0;
3834
3835 /* how many initial observations should be set to NA? */
3836 if (newpd == 1) {
3837 offset = startmin - 1;
3838 } else {
3839 offset = startmin - (1 + (q0 - 1) * compfac);
3840 }
3841
3842 for (t=0; t<T; t++) {
3843 /* loop across new time periods */
3844 for (j=0; j<compfac; j++) {
3845 /* loop across new series <- sub-periods */
3846 while (s < offset) {
3847 cset->Z[k+j][t] = NADBL;
3848 offset--;
3849 j++;
3850 }
3851 if (s < dset->n) {
3852 cset->Z[k+j][t] = dset->Z[i][s];
3853 } else {
3854 cset->Z[k+j][t] = NADBL;
3855 }
3856 s++;
3857 }
3858 }
3859
3860 /* reverse the new columns: most recent first */
3861 for (j=0; j<compfac/2; j++) {
3862 p = k + compfac - j - 1;
3863 xtmp = cset->Z[k+j];
3864 cset->Z[k+j] = cset->Z[p];
3865 cset->Z[p] = xtmp;
3866 }
3867
3868 /* names and labels */
3869 for (j=0; j<compfac; j++) {
3870 strcpy(cset->varname[k+j], dset->varname[i]);
3871 if (oldpd == 12 && newpd == 4) {
3872 gretl_trunc(cset->varname[k+j], VNAMELEN - 4);
3873 sprintf(sfx, "_m%d", compfac - j);
3874 } else if (oldpd == 12) {
3875 /* going to annual */
3876 gretl_trunc(cset->varname[k+j], VNAMELEN - 5);
3877 sprintf(sfx, "_m%02d", compfac - j);
3878 } else {
3879 gretl_trunc(cset->varname[k+j], VNAMELEN - 4);
3880 sprintf(sfx, "_q%d", compfac - j);
3881 }
3882 strcat(cset->varname[k+j], sfx);
3883 sprintf(label, "%s in %s %d of %s", dset->varname[i],
3884 p0, compfac - j, p1);
3885 series_record_label(cset, k+j, label);
3886 series_set_midas_period(cset, k+j, compfac - j);
3887 series_set_midas_freq(cset, k+j, oldpd);
3888 if (j == 0) {
3889 series_set_midas_anchor(cset, k+j);
3890 }
3891 }
3892
3893 /* advance column write position for next source series */
3894 k += compfac;
3895 }
3896
3897 #if SPREAD_DEBUG > 1
3898 PRN *prn = gretl_print_new(GRETL_PRINT_STDERR, NULL);
3899 printdata(NULL, NULL, cset, OPT_O, prn);
3900 gretl_print_destroy(prn);
3901 #endif
3902
3903 return cset;
3904 }
3905
3906 /* specific apparatus for converting daily time series to monthly */
3907
extend_series(const double * z,int n)3908 static double *extend_series (const double *z, int n)
3909 {
3910 double *x = malloc(n * sizeof *x);
3911
3912 if (x != NULL) {
3913 int t;
3914
3915 x[0] = NADBL;
3916 for (t=1; t<n; t++) {
3917 x[t] = z[t-1];
3918 }
3919 }
3920
3921 return x;
3922 }
3923
3924 #define DMDEBUG 0
3925
3926 static double *
daily_series_to_monthly(DATASET * dset,int i,int nm,int yr,int mon,int offset,int any_eop,CompactMethod method)3927 daily_series_to_monthly (DATASET *dset, int i,
3928 int nm, int yr, int mon, int offset,
3929 int any_eop, CompactMethod method)
3930 {
3931 double *x;
3932 const double *src = dset->Z[i];
3933 const double *z;
3934 double *tmp = NULL;
3935 int t, sop_t, eop_t;
3936
3937 x = malloc(nm * sizeof *x);
3938 if (x == NULL) {
3939 return NULL;
3940 }
3941
3942 if (offset < 0) {
3943 tmp = extend_series(src, dset->n + 1);
3944 if (tmp == NULL) {
3945 free(x);
3946 return NULL;
3947 }
3948 /* this permits use of a negative offset */
3949 z = tmp + 1;
3950 } else {
3951 z = src;
3952 }
3953
3954 /* Note: we can't necessarily assume that the first obs
3955 is the first day of a month. The @offset value gives the
3956 number of daily observations (allowing for the number of
3957 observed days in the week) in the first month of the daily
3958 data, prior to the data actually starting.
3959 */
3960
3961 /* The "one day's slack" business below, with start-of-period and
3962 end-of-period compaction is designed to allow for the
3963 possibility that the first (or last) day of the month may not
3964 have been a trading day.
3965 */
3966
3967 /* first obs for start-of-period */
3968 sop_t = offset;
3969
3970 /* first obs for end-of-period */
3971 if (sop_t > 0) {
3972 eop_t = offset - 1;
3973 } else {
3974 /* the first obs starts a month */
3975 eop_t = get_days_in_month(mon, yr, dset->pd, 0) - 1;
3976 }
3977
3978 #if DMDEBUG
3979 fprintf(stderr, "starting: offset=%d, any_eop=%d, sop_t=%d, eop_t=%d\n",
3980 offset, any_eop, sop_t, eop_t);
3981 #endif
3982
3983 for (t=0; t<nm; t++) {
3984 /* loop across the months in the compacted data */
3985 int mdays = get_days_in_month(mon, yr, dset->pd, 0);
3986
3987 if (t > 0) {
3988 eop_t += mdays;
3989 }
3990
3991 #if DMDEBUG
3992 fprintf(stderr, "t=%d: mon=%d, mdays=%d, sop_t=%d, eop_t=%d\n",
3993 t, mon, mdays, sop_t, eop_t);
3994 #endif
3995
3996 if (t == 0 && offset > 0 && any_eop && method != COMPACT_EOP) {
3997 /* we started with an incomplete month: so any
3998 method other than EOP yields an NA */
3999 x[t] = NADBL;
4000 } else if (method == COMPACT_SOP) {
4001 /* allow one days's slack */
4002 if (na(z[sop_t]) && sop_t < dset->n - 1) {
4003 x[t] = z[sop_t + 1];
4004 } else {
4005 x[t] = z[sop_t];
4006 }
4007 } else if (method == COMPACT_EOP) {
4008 if (eop_t >= dset->n) {
4009 x[t] = NADBL;
4010 } else {
4011 /* allow one days's slack */
4012 if (na(z[eop_t]) && eop_t > 0) {
4013 x[t] = z[eop_t - 1];
4014 } else {
4015 x[t] = z[eop_t];
4016 }
4017 }
4018 } else if (method == COMPACT_SUM ||
4019 method == COMPACT_AVG) {
4020 int j, dayt, den = mdays;
4021 int n_ok = 0;
4022
4023 x[t] = 0.0;
4024
4025 for (j=0; j<mdays; j++) {
4026 dayt = sop_t + j;
4027 if (dayt >= dset->n) {
4028 x[t] = NADBL;
4029 break;
4030 } else if (na(z[dayt])) {
4031 if (method == COMPACT_AVG) {
4032 den--;
4033 }
4034 } else {
4035 /* got a valid observation */
4036 x[t] += z[dayt];
4037 n_ok++;
4038 }
4039 }
4040
4041 if (n_ok == 0) {
4042 x[t] = NADBL;
4043 }
4044
4045 if (method == COMPACT_AVG && !na(x[t])) {
4046 if (den > 0) {
4047 x[t] /= (double) den;
4048 } else {
4049 x[t] = NADBL;
4050 }
4051 }
4052 }
4053
4054 sop_t += mdays;
4055
4056 if (mon == 12) {
4057 mon = 1;
4058 yr++;
4059 } else {
4060 mon++;
4061 }
4062 }
4063
4064 if (tmp != NULL) {
4065 free(tmp);
4066 }
4067
4068 return x;
4069 }
4070
4071 static void
get_startskip_etc(int compfac,int startmin,int endmin,int oldn,CompactMethod method,int * startskip,int * newn)4072 get_startskip_etc (int compfac, int startmin, int endmin,
4073 int oldn, CompactMethod method,
4074 int *startskip, int *newn)
4075 {
4076 int ss = 0, n = 0;
4077
4078 if (method == COMPACT_EOP) {
4079 int unused;
4080
4081 ss = (compfac - (startmin % compfac)) % compfac;
4082 n = oldn / compfac;
4083 unused = oldn - 1 - ss - (n-1) * compfac;
4084 if (unused >= compfac) {
4085 n++;
4086 }
4087 } else if (method == COMPACT_SOP) {
4088 int unused;
4089
4090 ss = (compfac - (startmin % compfac) + 1) % compfac;
4091 n = oldn / compfac;
4092 unused = oldn - 1 - ss - (n-1) * compfac;
4093 if (unused >= compfac) {
4094 n++;
4095 }
4096 } else {
4097 int es = endmin % compfac;
4098
4099 ss = (compfac - (startmin % compfac) + 1) % compfac;
4100 n = (oldn - ss - es) / compfac;
4101 }
4102
4103 *startskip = ss;
4104 *newn = n;
4105 }
4106
4107 /* specific to compaction of daily time series */
4108
4109 static void
get_daily_compact_params(CompactMethod default_method,int * any_eop,int * any_sop,int * all_same,const DATASET * dset)4110 get_daily_compact_params (CompactMethod default_method,
4111 int *any_eop, int *any_sop,
4112 int *all_same,
4113 const DATASET *dset)
4114 {
4115 int i, n_not_eop = 0, n_not_sop = 0;
4116
4117 *all_same = 1;
4118 *any_eop = (default_method == COMPACT_EOP)? 1 : 0;
4119 *any_sop = (default_method == COMPACT_SOP)? 1 : 0;
4120
4121 for (i=1; i<dset->v; i++) {
4122 CompactMethod method = series_get_compact_method(dset, i);
4123
4124 if (method != default_method && method != COMPACT_NONE) {
4125 *all_same = 0;
4126 if (method == COMPACT_EOP) {
4127 *any_eop = 1;
4128 } else {
4129 n_not_eop++;
4130 }
4131 if (method == COMPACT_SOP) {
4132 *any_sop = 1;
4133 } else {
4134 n_not_sop++;
4135 }
4136 }
4137 }
4138
4139 if (n_not_eop == dset->v - 1) {
4140 *any_eop = 0;
4141 }
4142
4143 if (n_not_sop == dset->v - 1) {
4144 *any_sop = 0;
4145 }
4146 }
4147
4148 /* specific to non-daily time series (monthly or quarterly) */
4149
4150 static void
get_global_compact_params(int compfac,int startmin,int endmin,CompactMethod default_method,int * min_startskip,int * max_n,int * any_eop,int * all_same,DATASET * dset)4151 get_global_compact_params (int compfac, int startmin, int endmin,
4152 CompactMethod default_method,
4153 int *min_startskip, int *max_n,
4154 int *any_eop, int *all_same,
4155 DATASET *dset)
4156 {
4157 CompactMethod method;
4158 int i, startskip, n;
4159 int n_not_eop = 0;
4160
4161 for (i=0; i<dset->v; i++) {
4162 if (i == 0) {
4163 get_startskip_etc(compfac, startmin, endmin, dset->n,
4164 default_method, &startskip, &n);
4165 if (default_method == COMPACT_EOP) {
4166 *any_eop = 1;
4167 }
4168 } else {
4169 method = series_get_compact_method(dset, i);
4170 if (method != default_method && method != COMPACT_NONE) {
4171 get_startskip_etc(compfac, startmin, endmin, dset->n,
4172 method, &startskip, &n);
4173 *all_same = 0;
4174 if (method == COMPACT_EOP) {
4175 *any_eop = 1;
4176 } else {
4177 n_not_eop++;
4178 }
4179 }
4180 }
4181 if (startskip < *min_startskip) {
4182 *min_startskip = startskip;
4183 }
4184 if (n > *max_n) {
4185 *max_n = n;
4186 }
4187 }
4188
4189 if (n_not_eop == dset->v - 1) {
4190 *any_eop = 0;
4191 }
4192 }
4193
get_obs_maj_min(const char * obs,int * maj,int * min)4194 static int get_obs_maj_min (const char *obs, int *maj, int *min)
4195 {
4196 int np = sscanf(obs, "%d:%d", maj, min);
4197
4198 if (np < 2) {
4199 np = sscanf(obs, "%d.%d", maj, min);
4200 }
4201
4202 return (np == 2);
4203 }
4204
4205 /* for daily data, figure the number of observations to
4206 be skipped at the start of each series
4207 */
4208
get_daily_offset(const DATASET * dset,int y,int m,int d,int skip,int any_eop)4209 static int get_daily_offset (const DATASET *dset,
4210 int y, int m, int d,
4211 int skip, int any_eop)
4212 {
4213 int ret = 0;
4214
4215 if (skip) {
4216 /* moving to start of next month: offset = no. of
4217 observations in the first month */
4218 ret = days_in_month_after(y, m, d, dset->pd) + 1;
4219 } else if (any_eop && !day_starts_month(d, m, y, dset->pd, NULL)) {
4220 ret = days_in_month_after(y, m, d, dset->pd) + 1;
4221 } else {
4222 /* offset = no. of obs missing at start of first month */
4223 ret = days_in_month_before(y, m, d, dset->pd);
4224 #if DMDEBUG
4225 fprintf(stderr, "days_in_month_before %d-%02d-%02d = %d "
4226 "for pd=%d\n", y, m, d, ret, dset->pd);
4227 #endif
4228 }
4229
4230 return ret;
4231 }
4232
4233 /* for daily data, figure the number of valid monthly
4234 observations that can be constructed by compaction
4235 */
4236
get_n_ok_months(const DATASET * dset,CompactMethod default_method,int * startyr,int * startmon,int * endyr,int * endmon,int * offset,int * p_any_eop)4237 static int get_n_ok_months (const DATASET *dset,
4238 CompactMethod default_method,
4239 int *startyr, int *startmon,
4240 int *endyr, int *endmon,
4241 int *offset, int *p_any_eop)
4242 {
4243 int y1, m1, d1, y2, m2, d2;
4244 int any_eop, any_sop, all_same;
4245 int skip = 0, pad = 0, nm = -1;
4246
4247 if (sscanf(dset->stobs, YMD_READ_FMT, &y1, &m1, &d1) != 3) {
4248 return -1;
4249 }
4250 if (sscanf(dset->endobs, YMD_READ_FMT, &y2, &m2, &d2) != 3) {
4251 return -1;
4252 }
4253
4254 if (y1 < 100) {
4255 y1 = FOUR_DIGIT_YEAR(y1);
4256 }
4257 if (y2 < 100) {
4258 y2 = FOUR_DIGIT_YEAR(y2);
4259 }
4260
4261 nm = 12 * (y2 - y1) + m2 - m1 + 1;
4262
4263 get_daily_compact_params(default_method, &any_eop, &any_sop,
4264 &all_same, dset);
4265
4266 *startyr = y1;
4267 *startmon = m1;
4268 *endyr = y2;
4269 *endmon = m2;
4270
4271 #if DMDEBUG
4272 fprintf(stderr, "get_n_ok_months: any_sop=%d, any_eop=%d, "
4273 "all_same=%d\n", any_sop, any_eop, all_same);
4274 fprintf(stderr, "y1=%d m1=%d d1=%d; y2=%d m2=%d d2=%d\n",
4275 y1, m1, d1, y2, m2, d2);
4276 #endif
4277
4278 if (!day_starts_month(d1, m1, y1, dset->pd, &pad) && !any_eop) {
4279 if (*startmon == 12) {
4280 *startmon = 1;
4281 *startyr += 1;
4282 } else {
4283 *startmon += 1;
4284 }
4285 skip = 1;
4286 nm--;
4287 }
4288
4289 if (!day_ends_month(d2, m2, y2, dset->pd) && !any_sop) {
4290 if (*endmon == 1) {
4291 *endmon = 12;
4292 *endyr -= 1;
4293 } else {
4294 *endmon -= 1;
4295 }
4296 nm--;
4297 }
4298
4299 #if DMDEBUG
4300 fprintf(stderr, "after adjustment: range %d:%02d to %d:%02d, "
4301 "pad=%d, skip=%d\n", *startyr, *startmon, *endyr, *endmon,
4302 pad, skip);
4303 #endif
4304
4305 if (pad) {
4306 *offset = -1;
4307 } else {
4308 *offset = get_daily_offset(dset, y1, m1, d1, skip, any_eop);
4309 }
4310
4311 *p_any_eop = any_eop;
4312
4313 return nm;
4314 }
4315
4316 #define WEEKLY_DEBUG 0
4317
4318 static int
weeks_to_months_exec(double ** mZ,const DATASET * dset,CompactMethod method)4319 weeks_to_months_exec (double **mZ, const DATASET *dset,
4320 CompactMethod method)
4321 {
4322 char obsstr[OBSLEN];
4323 int *mn = NULL;
4324 int yr, mon, day;
4325 int monbak = 0;
4326 int i, s, t = 0;
4327 int err = 0;
4328
4329 mn = malloc(dset->v * sizeof *mn);
4330 if (mn == NULL) {
4331 return E_ALLOC;
4332 }
4333
4334 for (i=1; i<dset->v; i++) {
4335 /* initialize all series, first obs */
4336 mZ[i][0] = NADBL;
4337 mn[i] = 0;
4338 }
4339
4340 for (s=0; s<dset->n; s++) {
4341 /* loop across the weekly obs in this month */
4342 ntolabel(obsstr, s, dset);
4343 sscanf(obsstr, YMD_READ_FMT, &yr, &mon, &day);
4344 if (monbak > 0 && mon != monbak) {
4345 /* new month: finalize the previous one */
4346 for (i=1; i<dset->v; i++) {
4347 if (method == COMPACT_EOP) {
4348 if (s > 0) {
4349 mZ[i][t] = dset->Z[i][s-1];
4350 }
4351 } else if (method == COMPACT_AVG) {
4352 if (mn[i] > 0) {
4353 mZ[i][t] /= (double) mn[i];
4354 }
4355 }
4356 }
4357 /* and start another? */
4358 if (s < dset->n - 1) {
4359 t++;
4360 for (i=1; i<dset->v; i++) {
4361 /* initialize all series, current obs */
4362 if (method == COMPACT_SOP) {
4363 mZ[i][t] = dset->Z[i][s];
4364 } else {
4365 mZ[i][t] = NADBL;
4366 }
4367 mn[i] = 0;
4368 }
4369 }
4370 }
4371
4372 /* cumulate non-missing weekly observations? */
4373 for (i=1; i<dset->v; i++) {
4374 if (method == COMPACT_SOP) {
4375 ; /* handled above */
4376 } else if (method == COMPACT_EOP) {
4377 mZ[i][t] = dset->Z[i][s];
4378 } else if (!na(dset->Z[i][s])) {
4379 if (na(mZ[i][t])) {
4380 mZ[i][t] = dset->Z[i][s];
4381 } else {
4382 mZ[i][t] += dset->Z[i][s];
4383 }
4384 mn[i] += 1;
4385 }
4386 if (mon == monbak && s == dset->n - 1) {
4387 /* reached the end: ship out last obs */
4388 if (method == COMPACT_EOP) {
4389 mZ[i][t] = NADBL;
4390 } else if (method == COMPACT_AVG && mn[i] > 0) {
4391 mZ[i][t] /= (double) mn[i];
4392 }
4393 }
4394 }
4395 monbak = mon;
4396 }
4397
4398 free(mn);
4399
4400 return err;
4401 }
4402
4403 static int
weeks_to_months_check(const DATASET * dset,int * startyr,int * endyr,int * startmon,int * endmon)4404 weeks_to_months_check (const DATASET *dset, int *startyr, int *endyr,
4405 int *startmon, int *endmon)
4406 {
4407 char obsstr[OBSLEN];
4408 int yr, mon, day;
4409 int wcount = 0, mcount = 0;
4410 int monbak = 0;
4411 int t, err = 0;
4412
4413 for (t=0; t<dset->n; t++) {
4414 ntolabel(obsstr, t, dset);
4415 if (sscanf(obsstr, YMD_READ_FMT, &yr, &mon, &day) != 3) {
4416 err = 1;
4417 break;
4418 }
4419 if (monbak == 0) {
4420 /* first obs */
4421 fprintf(stderr, "starting month = '%d'\n", mon);
4422 *startyr = yr;
4423 *startmon = mon;
4424 mcount++;
4425 wcount = 1;
4426 } else if (mon != monbak) {
4427 /* got a new month: report on previous one */
4428 #if WEEKLY_DEBUG
4429 fprintf(stderr, "month %d ('%d'), weekly obs = %d\n",
4430 mcount, monbak, wcount);
4431 #endif
4432 mcount++;
4433 wcount = 1;
4434 } else {
4435 /* continuation of current month */
4436 wcount++;
4437 }
4438 monbak = mon;
4439 }
4440
4441 if (err) {
4442 mcount = 0;
4443 } else {
4444 /* flush the last observation */
4445 #if WEEKLY_DEBUG
4446 fprintf(stderr, "month %d ('%d'), weekly obs = %d\n",
4447 mcount, monbak, wcount);
4448 #endif
4449 *endyr = yr;
4450 *endmon = mon;
4451 }
4452
4453 return mcount;
4454 }
4455
weekly_dataset_to_monthly(DATASET * dset,CompactMethod method)4456 static int weekly_dataset_to_monthly (DATASET *dset,
4457 CompactMethod method)
4458 {
4459 DATASET mset;
4460 int startyr = 1, endyr = 1;
4461 int startmon = 1, endmon = 1;
4462 int err = 0;
4463
4464 mset.n = weeks_to_months_check(dset, &startyr, &endyr, &startmon, &endmon);
4465 fprintf(stderr, "Weekly data: found %d months\n", mset.n);
4466 if (mset.n <= 0) {
4467 return E_DATA;
4468 }
4469
4470 mset.v = dset->v;
4471 err = allocate_Z(&mset, 0);
4472 if (err) {
4473 return err;
4474 }
4475
4476 /* compact series */
4477 if (!err && dset->v > 1) {
4478 err = weeks_to_months_exec(mset.Z, dset, method);
4479 }
4480
4481 if (err) {
4482 free_Z(&mset);
4483 } else {
4484 free_Z(dset);
4485 dset->Z = mset.Z;
4486 dset->n = mset.n;
4487 dset->pd = 12;
4488 sprintf(dset->stobs, "%04d:%02d", startyr, startmon);
4489 sprintf(dset->endobs, "%04d:%02d", endyr, endmon);
4490 dset->sd0 = get_date_x(dset->pd, dset->stobs);
4491 dset->t1 = 0;
4492 dset->t2 = dset->n - 1;
4493 }
4494
4495 return err;
4496 }
4497
shorten_the_constant(double ** Z,int n)4498 static int shorten_the_constant (double **Z, int n)
4499 {
4500 double *tmp = realloc(Z[0], n * sizeof *tmp);
4501
4502 if (tmp == NULL) {
4503 return E_ALLOC;
4504 } else {
4505 Z[0] = tmp;
4506 return 0;
4507 }
4508 }
4509
4510 /* conversion to weekly using a "representative day", e.g. use
4511 each Wednesday value: @repday is 0-based on Sunday.
4512 */
4513
daily_dataset_to_weekly(DATASET * dset,int repday)4514 static int daily_dataset_to_weekly (DATASET *dset, int repday)
4515 {
4516 int y1, m1, d1;
4517 char obs[OBSLEN];
4518 double *x = NULL;
4519 double *tmp;
4520 int n = 0, n_ok = 0;
4521 int wday, ok;
4522 int i, t, err = 0;
4523
4524 fprintf(stderr, "daily_dataset_to_weekly: repday = %d\n", repday);
4525
4526 for (t=0; t<dset->n; t++) {
4527 ntolabel(obs, t, dset);
4528 wday = weekday_from_date(obs);
4529 if (wday == repday) {
4530 ok = 0;
4531 for (i=1; i<dset->v; i++) {
4532 if (!na(dset->Z[i][t])) {
4533 ok = 1;
4534 break;
4535 }
4536 }
4537 if (ok) {
4538 n_ok++;
4539 }
4540 if (n == 0) {
4541 sscanf(obs, YMD_READ_FMT, &y1, &m1, &d1);
4542 }
4543 n++;
4544 }
4545 }
4546
4547 if (n_ok == 0) {
4548 gretl_errmsg_set(_("Compacted dataset would be empty"));
4549 return 1;
4550 }
4551
4552 fprintf(stderr, "n=%d, n_ok=%d, y1=%d, m1=%d, d1=%d\n",
4553 n, n_ok, y1, m1, d1);
4554
4555 x = malloc(n * sizeof *x);
4556 if (x == NULL) {
4557 return E_ALLOC;
4558 }
4559
4560 err = shorten_the_constant(dset->Z, n);
4561
4562 for (i=1; i<dset->v && !err; i++) {
4563 int s = 0;
4564
4565 for (t=0; t<dset->n; t++) {
4566 ntolabel(obs, t, dset);
4567 wday = weekday_from_date(obs);
4568 if (wday == repday) {
4569 x[s++] = dset->Z[i][t];
4570 }
4571 }
4572 tmp = realloc(dset->Z[i], n * sizeof *tmp);
4573 if (tmp == NULL) {
4574 err = E_ALLOC;
4575 } else {
4576 dset->Z[i] = tmp;
4577 for (t=0; t<n; t++) {
4578 dset->Z[i][t] = x[t];
4579 }
4580 }
4581 }
4582
4583 free(x);
4584
4585 if (!err) {
4586 dset->n = n;
4587 dset->pd = 52;
4588
4589 sprintf(dset->stobs, YMD_WRITE_Y4_FMT, y1, m1, d1);
4590 dset->sd0 = get_date_x(dset->pd, dset->stobs);
4591 dset->t1 = 0;
4592 dset->t2 = dset->n - 1;
4593 ntolabel(dset->endobs, dset->t2, dset);
4594
4595 dataset_destroy_obs_markers(dset);
4596 }
4597
4598 return err;
4599 }
4600
daily_dataset_to_monthly(DATASET * dset,CompactMethod default_method)4601 static int daily_dataset_to_monthly (DATASET *dset,
4602 CompactMethod default_method)
4603 {
4604 int nm, startyr, startmon, endyr, endmon;
4605 int offset, any_eop;
4606 CompactMethod method;
4607 double *x;
4608 int i, err = 0;
4609
4610 nm = get_n_ok_months(dset, default_method, &startyr, &startmon,
4611 &endyr, &endmon, &offset, &any_eop);
4612
4613 if (nm <= 0) {
4614 gretl_errmsg_set(_("Compacted dataset would be empty"));
4615 return E_DATA;
4616 }
4617
4618 err = shorten_the_constant(dset->Z, nm);
4619
4620 for (i=1; i<dset->v && !err; i++) {
4621 method = series_get_compact_method(dset, i);
4622 if (method == COMPACT_NONE) {
4623 method = default_method;
4624 }
4625
4626 x = daily_series_to_monthly(dset, i, nm,
4627 startyr, startmon,
4628 offset, any_eop, method);
4629 if (x == NULL) {
4630 err = E_ALLOC;
4631 } else {
4632 free(dset->Z[i]);
4633 dset->Z[i] = x;
4634 }
4635 }
4636
4637 if (!err) {
4638 dset->n = nm;
4639 dset->pd = 12;
4640 sprintf(dset->stobs, "%04d:%02d", startyr, startmon);
4641 sprintf(dset->endobs, "%04d:%02d", endyr, endmon);
4642 dset->sd0 = get_date_x(dset->pd, dset->stobs);
4643 dset->t1 = 0;
4644 dset->t2 = dset->n - 1;
4645
4646 dataset_destroy_obs_markers(dset);
4647 }
4648
4649 return err;
4650 }
4651
get_daily_skip(const DATASET * dset,int t)4652 static int get_daily_skip (const DATASET *dset, int t)
4653 {
4654 int dd = calendar_obs_number(dset->S[t], dset) -
4655 calendar_obs_number(dset->S[t-1], dset);
4656
4657 if (dd == 0) {
4658 fprintf(stderr, "get_daily_skip: S[%d] = '%s', S[%d] = '%s'\n",
4659 t, dset->S[t], t-1, dset->S[t-1]);
4660 }
4661
4662 return dd - 1;
4663 }
4664
insert_missing_hidden_obs(DATASET * dset,int nmiss)4665 static int insert_missing_hidden_obs (DATASET *dset, int nmiss)
4666 {
4667 int oldn = dset->n;
4668 double *tmp, **Z;
4669 int i, s, t, skip;
4670 int err = 0;
4671
4672 err = dataset_add_observations(dset, nmiss, OPT_NONE);
4673 if (err) {
4674 return err;
4675 }
4676
4677 #if DB_DEBUG
4678 fprintf(stderr, "daily data: expanded n from %d to %d\n",
4679 oldn, dset->n);
4680 #endif
4681
4682 Z = dset->Z;
4683 tmp = Z[0];
4684
4685 for (i=1; i<dset->v && !err; i++) {
4686 for (s=0; s<oldn; s++) {
4687 tmp[s] = Z[i][s];
4688 }
4689
4690 Z[i][0] = tmp[0];
4691 t = 1;
4692 for (s=1; s<oldn; s++) {
4693 skip = get_daily_skip(dset, s);
4694 if (skip < 0) {
4695 err = E_DATA;
4696 break;
4697 }
4698 while (skip--) {
4699 Z[i][t++] = NADBL;
4700 }
4701 Z[i][t++] = tmp[s];
4702 }
4703 }
4704
4705 for (t=0; t<dset->n; t++) {
4706 Z[0][t] = 1.0;
4707 if (dset->S != NULL) {
4708 calendar_date_string(dset->S[t], t, dset);
4709 }
4710 }
4711
4712 if (!err) {
4713 dset->t2 = dset->n - 1;
4714 ntolabel(dset->endobs, dset->n - 1, dset);
4715 }
4716
4717 #if DB_DEBUG > 1
4718 fprintf(stderr, "insert_missing_hidden_obs, done, err = %d\n", err);
4719 for (t=0; t<dset->n; t++) {
4720 fprintf(stderr, "Z[1][%d] = %14g\n", t, Z[1][t]);
4721 }
4722 #endif
4723
4724 return err;
4725 }
4726
maybe_expand_daily_data(DATASET * dset)4727 int maybe_expand_daily_data (DATASET *dset)
4728 {
4729 int nmiss = n_hidden_missing_obs(dset, 0, dset->n - 1);
4730 int err = 0;
4731
4732 fprintf(stderr, "n_hidden_missing_obs: nmiss = %d\n", nmiss);
4733
4734 if (nmiss < 0) {
4735 err = 1;
4736 } else if (nmiss > 0) {
4737 err = insert_missing_hidden_obs(dset, nmiss);
4738 }
4739
4740 return err;
4741 }
4742
do_compact_spread(DATASET * dset,int newpd)4743 static int do_compact_spread (DATASET *dset, int newpd)
4744 {
4745 DATASET *cset = NULL;
4746 int nv = 0;
4747 int err;
4748
4749 err = compact_spread_pd_check(dset->pd, newpd);
4750 if (err) {
4751 return err;
4752 }
4753
4754 if (dated_daily_data(dset)) {
4755 err = maybe_expand_daily_data(dset);
4756 if (err) {
4757 gretl_errmsg_set("Error expanding daily data with missing observations");
4758 } else {
4759 cset = compact_daily_spread(dset, newpd, &nv, &err);
4760 }
4761 } else {
4762 int startmaj, startmin;
4763 int endmaj, endmin;
4764
4765
4766 /* get starting obs major and minor components */
4767 if (!get_obs_maj_min(dset->stobs, &startmaj, &startmin)) {
4768 return E_DATA;
4769 }
4770
4771 /* get ending obs major and minor components */
4772 if (!get_obs_maj_min(dset->endobs, &endmaj, &endmin)) {
4773 return E_DATA;
4774 }
4775
4776 cset = compact_data_spread(dset, newpd, startmaj, startmin,
4777 endmaj, endmin, &nv, &err);
4778 }
4779
4780 if (!err) {
4781 free_Z(dset);
4782 clear_datainfo(dset, CLEAR_FULL);
4783 *dset = *cset;
4784 free(cset);
4785 }
4786
4787 return err;
4788 }
4789
4790 /**
4791 * compact_data_set:
4792 * @dset: dataset struct.
4793 * @newpd: target data frequency.
4794 * @default_method: code for the default compaction method.
4795 * @monstart: if non-zero, take Monday rather than Sunday as
4796 * the "start of the week" (only relevant for 7-day daily
4797 * data).
4798 * @repday: "representative day" for conversion from daily
4799 * to weekly data (with method %COMPACT_WDAY only).
4800 *
4801 * Compact the data set from higher to lower frequency.
4802 *
4803 * Returns: 0 on success, non-zero error code on failure.
4804 */
4805
compact_data_set(DATASET * dset,int newpd,CompactMethod default_method,int monstart,int repday)4806 int compact_data_set (DATASET *dset, int newpd,
4807 CompactMethod default_method,
4808 int monstart, int repday)
4809 {
4810 int newn, oldn = dset->n, oldpd = dset->pd;
4811 int compfac;
4812 int startmaj, startmin;
4813 int endmaj, endmin;
4814 int any_eop, all_same;
4815 int min_startskip = 0;
4816 char stobs[OBSLEN];
4817 int i, err = 0;
4818
4819 gretl_error_clear();
4820
4821 if (default_method == COMPACT_SPREAD) {
4822 return do_compact_spread(dset, newpd);
4823 }
4824
4825 if (oldpd == 52) {
4826 return weekly_dataset_to_monthly(dset, default_method);
4827 }
4828
4829 if (dated_daily_data(dset)) {
4830 /* allow for the possibility that the daily dataset
4831 contains "hidden" or suppressed missing observations
4832 (holidays are just skipped, not marked as NA)
4833 */
4834 err = maybe_expand_daily_data(dset);
4835 if (err) {
4836 gretl_errmsg_set("Error expanding daily data with missing observations");
4837 return err;
4838 } else {
4839 oldn = dset->n;
4840 }
4841 }
4842
4843 if (newpd == 52 && oldpd >= 5 && oldpd <= 7 &&
4844 default_method == COMPACT_WDAY) {
4845 /* daily to weekly, using "representative day" */
4846 return daily_dataset_to_weekly(dset, repday);
4847 } else if (newpd == 12 && oldpd >= 5 && oldpd <= 7) {
4848 /* daily to monthly: special */
4849 return daily_dataset_to_monthly(dset, default_method);
4850 } else if (oldpd >= 5 && oldpd <= 7) {
4851 /* daily to weekly */
4852 compfac = oldpd;
4853 if (dated_daily_data(dset)) {
4854 startmin = weekday_from_date(dset->stobs);
4855 if (oldpd == 7) {
4856 if (monstart) {
4857 if (startmin == 0) startmin = 7;
4858 } else {
4859 startmin++;
4860 }
4861 }
4862 } else {
4863 startmin = 1;
4864 }
4865 } else if (oldpd == 24 && newpd >= 5 && newpd <= 7) {
4866 /* hourly to daily */
4867 compfac = 24;
4868 if (!get_obs_maj_min(dset->stobs, &startmaj, &startmin)) {
4869 return 1;
4870 }
4871 } else {
4872 compfac = oldpd / newpd;
4873 /* get starting obs major and minor components */
4874 if (!get_obs_maj_min(dset->stobs, &startmaj, &startmin)) {
4875 return 1;
4876 }
4877 /* get ending obs major and minor components */
4878 if (!get_obs_maj_min(dset->endobs, &endmaj, &endmin)) {
4879 return 1;
4880 }
4881 }
4882
4883 min_startskip = oldpd;
4884 newn = 0;
4885 any_eop = 0;
4886 all_same = 1;
4887 get_global_compact_params(compfac, startmin, endmin, default_method,
4888 &min_startskip, &newn, &any_eop, &all_same,
4889 dset);
4890
4891 if (newn == 0 && default_method != COMPACT_SPREAD) {
4892 gretl_errmsg_set(_("Compacted dataset would be empty"));
4893 return 1;
4894 }
4895
4896 if (newpd == 1) {
4897 if (min_startskip > 0 && !any_eop) {
4898 startmaj++;
4899 }
4900 sprintf(stobs, "%d", startmaj);
4901 } else if (newpd == 52) {
4902 if (oldpd >= 5 && oldpd <= 7 && dset->S != NULL) {
4903 strcpy(stobs, dset->S[min_startskip]);
4904 } else {
4905 strcpy(stobs, "1");
4906 }
4907 } else {
4908 int m0 = startmin + min_startskip;
4909 int minor = m0 / compfac + (m0 % compfac > 0);
4910
4911 if (minor > newpd) {
4912 startmaj++;
4913 minor -= newpd;
4914 }
4915 format_obs(stobs, startmaj, minor, newpd);
4916 }
4917
4918 /* revise datainfo members */
4919 strcpy(dset->stobs, stobs);
4920 dset->pd = newpd;
4921 dset->n = newn;
4922 dset->sd0 = get_date_x(dset->pd, dset->stobs);
4923 dset->t1 = 0;
4924 dset->t2 = dset->n - 1;
4925 ntolabel(dset->endobs, dset->t2, dset);
4926
4927 if (oldpd >= 5 && oldpd <= 7 && dset->markers) {
4928 /* remove any daily date strings; revise endobs */
4929 dataset_destroy_obs_markers(dset);
4930 ntolabel(dset->endobs, dset->t2, dset);
4931 }
4932
4933 err = shorten_the_constant(dset->Z, dset->n);
4934
4935 /* compact the individual data series */
4936 for (i=1; i<dset->v && !err; i++) {
4937 CompactMethod this_method = default_method;
4938 int startskip = min_startskip;
4939 double *x;
4940
4941 if (!all_same) {
4942 CompactMethod m_i = series_get_compact_method(dset, i);
4943
4944 if (m_i != COMPACT_NONE) {
4945 this_method = m_i;
4946 }
4947
4948 startskip = compfac - (startmin % compfac) + 1;
4949 startskip = startskip % compfac;
4950 if (this_method == COMPACT_EOP) {
4951 if (startskip > 0) {
4952 startskip--;
4953 } else {
4954 startskip = compfac - 1;
4955 }
4956 }
4957 }
4958
4959 x = compact_series(dset, i, oldn, startskip, min_startskip,
4960 compfac, this_method);
4961 if (x == NULL) {
4962 err = E_ALLOC;
4963 } else {
4964 free(dset->Z[i]);
4965 dset->Z[i] = x;
4966 }
4967 }
4968
4969 return err;
4970 }
4971
4972 /**
4973 * expand_data_set:
4974 * @dset: dataset struct.
4975 * @newpd: target data frequency.
4976 *
4977 * Expand the data set from lower to higher frequency: an "expert"
4978 * option. This is supported only for expansion from annual
4979 * to quarterly or monthly, or from quarterly to monthly.
4980 *
4981 * Returns: 0 on success, non-zero error code on failure.
4982 */
4983
expand_data_set(DATASET * dset,int newpd)4984 int expand_data_set (DATASET *dset, int newpd)
4985 {
4986 char stobs[OBSLEN];
4987 int oldn = dset->n;
4988 int oldpd = dset->pd;
4989 int t1 = dset->t1;
4990 int t2 = dset->t2;
4991 int mult, newn, nadd;
4992 double *x = NULL;
4993 size_t sz;
4994 int i, j, t, s;
4995 int err = 0;
4996
4997 if (oldpd != 1 && oldpd != 4) {
4998 return E_PDWRONG;
4999 } else if (oldpd == 1 && newpd != 4 && newpd != 12) {
5000 return E_DATA;
5001 } else if (oldpd == 4 && newpd != 12) {
5002 return E_DATA;
5003 }
5004
5005 x = malloc(oldn * sizeof *x);
5006 if (x == NULL) {
5007 return E_ALLOC;
5008 }
5009
5010 mult = newpd / oldpd; /* frequency increase factor */
5011 newn = mult * dset->n; /* revised number of observations */
5012 nadd = newn - oldn; /* number of obs to add */
5013
5014 err = dataset_add_observations(dset, nadd, OPT_D);
5015 if (err) {
5016 goto bailout;
5017 }
5018
5019 sz = oldn * sizeof *x;
5020 for (i=1; i<dset->v; i++) {
5021 memcpy(x, dset->Z[i], sz);
5022 s = 0;
5023 for (t=0; t<oldn; t++) {
5024 for (j=0; j<mult; j++) {
5025 dset->Z[i][s++] = x[t];
5026 }
5027 }
5028 series_set_orig_pd(dset, i, oldpd);
5029 }
5030
5031 if (dset->pd == 1) {
5032 /* starting with annual data */
5033 strcpy(stobs, dset->stobs);
5034 if (newpd == 4) {
5035 strcat(stobs, ":1");
5036 } else {
5037 strcat(stobs, ":01");
5038 }
5039 } else {
5040 /* starting with quarterly data */
5041 int yr, qtr, mo;
5042
5043 sscanf(dset->stobs, "%d:%d", &yr, &qtr);
5044 mo = (qtr - 1) * 3 + 1;
5045 sprintf(stobs, "%d:%02d", yr, mo);
5046 }
5047
5048 /* revise the sample range, if set */
5049 if (dset->t1 > 0) {
5050 dset->t1 *= mult;
5051 }
5052 if (dset->t2 < oldn - 1) {
5053 dset->t2 = dset->t1 + (t2 - t1 + 1) * mult - 1;
5054 }
5055
5056 strcpy(dset->stobs, stobs);
5057 dset->pd = newpd;
5058 dset->sd0 = get_date_x(dset->pd, dset->stobs);
5059 ntolabel(dset->endobs, dset->n - 1, dset);
5060
5061 bailout:
5062
5063 free(x);
5064
5065 return err;
5066 }
5067