1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #define FULL_XML_HEADERS
21 
22 #include "libgretl.h"
23 #include "gretl_xml.h"
24 #include "gretl_panel.h"
25 #include "gretl_func.h"
26 #include "gretl_typemap.h"
27 #include "gretl_string_table.h"
28 #include "dbread.h"
29 #include "swap_bytes.h"
30 #include "gretl_zip.h"
31 
32 #ifdef HAVE_MPI
33 # include "gretl_mpi.h"
34 #endif
35 
36 #ifdef WIN32
37 # include "gretl_win32.h"
38 # include <glib/gstdio.h>
39 #endif
40 
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <unistd.h>
44 #include <errno.h>
45 
46 #undef XML_DEBUG
47 
48 #define GRETLDATA_VERSION "1.4"
49 #define GRETLDATA_COMPAT  "1.3" /* status quo as of gretl 2018b */
50 
51 #define GDT_DEBUG 0
52 
gretl_xml_open_doc_root(const char * fname,const char * rootname,xmlDocPtr * pdoc,xmlNodePtr * pnode)53 int gretl_xml_open_doc_root (const char *fname,
54 			     const char *rootname,
55 			     xmlDocPtr *pdoc,
56 			     xmlNodePtr *pnode)
57 {
58     xmlDocPtr doc;
59     xmlNodePtr node = NULL;
60     int err = 0;
61 
62     LIBXML_TEST_VERSION;
63     xmlKeepBlanksDefault(0);
64 
65     *pdoc = NULL;
66     if (pnode != NULL) {
67 	*pnode = NULL;
68     }
69 
70     doc = xmlParseFile(fname);
71     if (doc == NULL) {
72 	gretl_errmsg_sprintf(_("xmlParseFile failed on %s"), fname);
73 	err = 1;
74     }
75 
76     if (!err && pnode != NULL) {
77 	node = xmlDocGetRootElement(doc);
78 	if (node == NULL) {
79 	    gretl_errmsg_sprintf(_("%s: empty document"), fname);
80 	    xmlFreeDoc(doc);
81 	    err = 1;
82 	}
83     }
84 
85     if (!err && node != NULL && rootname != NULL) {
86 	if (xmlStrcmp(node->name, (XUC) rootname)) {
87 	    gretl_errmsg_sprintf(_("File of the wrong type, root node not %s"),
88 				 rootname);
89 	    fprintf(stderr, "Unexpected root node '%s'\n", (char *) node->name);
90 	    xmlFreeDoc(doc);
91 	    err = 1;
92 	}
93     }
94 
95     if (!err) {
96 	*pdoc = doc;
97 	if (pnode != NULL) {
98 	    *pnode = node;
99 	}
100     }
101 
102     return err;
103 }
104 
compact_method_to_string(int method)105 static char *compact_method_to_string (int method)
106 {
107     if (method == COMPACT_SUM) return "COMPACT_SUM";
108     else if (method == COMPACT_AVG) return "COMPACT_AVG";
109     else if (method == COMPACT_SOP) return "COMPACT_SOP";
110     else if (method == COMPACT_EOP) return "COMPACT_EOP";
111     else return "COMPACT_NONE";
112 }
113 
compact_string_to_int(const char * str)114 static int compact_string_to_int (const char *str)
115 {
116     if (!strcmp(str, "COMPACT_SUM")) return COMPACT_SUM;
117     else if (!strcmp(str, "COMPACT_AVG")) return COMPACT_AVG;
118     else if (!strcmp(str, "COMPACT_SOP")) return COMPACT_SOP;
119     else if (!strcmp(str, "COMPACT_EOP")) return COMPACT_EOP;
120     else return COMPACT_NONE;
121 }
122 
123 /* given a full filename in @src, write to @dest a "simple"
124    counterpart without leading path or extension
125 */
126 
simple_fname(char * dest,const char * src)127 static char *simple_fname (char *dest, const char *src)
128 {
129     char *p;
130     const char *s;
131 
132     s = strrslash(src);
133 
134     /* take last part of src filename */
135     if (s != NULL) {
136         strcpy(dest, s + 1);
137     } else {
138         strcpy(dest, src);
139     }
140 
141     /* trash any extension */
142     p = strrchr(dest, '.');
143     if (p != NULL && strlen(dest) > 3) {
144 	*p = '\0';
145     }
146 
147     return dest;
148 }
149 
data_structure_string(int s)150 static const char *data_structure_string (int s)
151 {
152     switch (s) {
153     case TIME_SERIES:
154     case SPECIAL_TIME_SERIES:
155 	return "time-series";
156     case STACKED_TIME_SERIES:
157 	return "stacked-time-series";
158     case STACKED_CROSS_SECTION:
159 	return "stacked-cross-section";
160     default:
161 	return "cross-section";
162     }
163 }
164 
savenum(const int * list,int i)165 static int savenum (const int *list, int i)
166 {
167     if (list != NULL) {
168 	return list[i];
169     } else {
170 	return i;
171     }
172 }
173 
174 /**
175  * gretl_xml_put_int:
176  * @tag: name to give value.
177  * @i: value to put (as attribute)
178  * @fp: file to which to write.
179  *
180  * Writes to @fp a string of the form "\%s=\%d".
181  */
182 
gretl_xml_put_int(const char * tag,int i,PRN * prn)183 void gretl_xml_put_int (const char *tag, int i, PRN *prn)
184 {
185     pprintf(prn, "%s=\"%d\" ", tag, i);
186 }
187 
188 /**
189  * gretl_xml_put_unsigned:
190  * @tag: name to give value.
191  * @i: value to put (as attribute)
192  * @fp: file to which to write.
193  *
194  * Writes to @fp a string of the form "\%s=\%u".
195  */
196 
gretl_xml_put_unsigned(const char * tag,unsigned int u,PRN * prn)197 void gretl_xml_put_unsigned (const char *tag, unsigned int u, PRN *prn)
198 {
199     pprintf(prn, "%s=\"%u\" ", tag, u);
200 }
201 
202 /**
203  * gretl_xml_put_double:
204  * @tag: name to give value.
205  * @x: value to put (as attribute)
206  * @fp: file to which to write.
207  *
208  * Writes to @fp a string of the form "\%s=\%.17g" if the value of
209  * @x is valid, otherwise "\%s=NA".
210  */
211 
gretl_xml_put_double(const char * tag,double x,PRN * prn)212 void gretl_xml_put_double (const char *tag, double x, PRN *prn)
213 {
214     if (na(x)) {
215 	pprintf(prn, "%s=\"NA\" ", tag);
216     } else {
217 	pprintf(prn, "%s=\"%.17g\" ", tag, x);
218     }
219 }
220 
221 /**
222  * gretl_xml_put_double_array:
223  * @tag: name to give array.
224  * @x: values to put.
225  * @n: number of values in @x.
226  * @fp: file to which to write.
227  *
228  */
229 
gretl_xml_put_double_array(const char * tag,double * x,int n,PRN * prn)230 void gretl_xml_put_double_array (const char *tag, double *x, int n,
231 				 PRN *prn)
232 {
233     int i;
234 
235     pprintf(prn, "<%s count=\"%d\">\n", tag, n);
236     for (i=0; i<n; i++) {
237 	if (na(x[i])) {
238 	    pputs(prn, "NA ");
239 	} else {
240 	    pprintf(prn, "%.17g ", x[i]);
241 	}
242     }
243     pprintf(prn, "</%s>\n", tag);
244 }
245 
246 /**
247  * gretl_xml_put_string:
248  * @str: string to put.
249  * @fp: file to which to write.
250  *
251  * Write @str to @fp.  If @str needs to have XML-special
252  * characters escaped, this will be done automatically.
253  * If @str is NULL, this is considered a no-op.
254  *
255  * Returns: 0 on success, non-zero error code on failure.
256  */
257 
gretl_xml_put_string(const char * str,PRN * prn)258 int gretl_xml_put_string (const char *str, PRN *prn)
259 {
260     int err = 0;
261 
262     if (str == NULL) {
263 	return 0;
264     }
265 
266     if (gretl_xml_validate(str)) {
267 	pputs(prn, str);
268     } else {
269 	char *xstr = gretl_xml_encode(str);
270 
271 	if (xstr != NULL) {
272 	    pputs(prn, xstr);
273 	    free(xstr);
274 	} else {
275 	    err = E_ALLOC;
276 	}
277     }
278 
279     return err;
280 }
281 
282 /**
283  * gretl_xml_put_strings_array:
284  * @tag: name to give array.
285  * @strs: array of strings to put.
286  * @n: number of strings in @strs.
287  * @fp: file to which to write.
288  *
289  */
290 
gretl_xml_put_strings_array(const char * tag,const char ** strs,int n,PRN * prn)291 void gretl_xml_put_strings_array (const char *tag, const char **strs,
292 				  int n, PRN *prn)
293 {
294     int i;
295 
296     if (n < 0) {
297 	/* assume NULL termination */
298 	const char **S = strs;
299 
300 	n = 0;
301 	while (*S != NULL) {
302 	    n++;
303 	    S++;
304 	}
305     }
306 
307     pprintf(prn, "<%s count=\"%d\">\n", tag, n);
308     for (i=0; i<n; i++) {
309 	gretl_xml_put_string(strs[i], prn);
310 	pputs(prn, " ");
311     }
312     pprintf(prn, "</%s>\n", tag);
313 }
314 
315 /**
316  * gretl_xml_put_strings_array_quoted:
317  * @tag: name to give array.
318  * @strs: array of strings to put.
319  * @n: number of strings in @strs.
320  * @fp: file to which to write.
321  *
322  */
323 
gretl_xml_put_strings_array_quoted(const char * tag,const char ** strs,int n,PRN * prn)324 void gretl_xml_put_strings_array_quoted (const char *tag,
325 					 const char **strs, int n,
326 					 PRN *prn)
327 {
328     int i;
329 
330     pprintf(prn, "<%s count=\"%d\">\n", tag, n);
331     for (i=0; i<n; i++) {
332 	pputs(prn, "\"");
333 	gretl_xml_put_string(strs[i], prn);
334 	pputs(prn, "\" ");
335     }
336     pprintf(prn, "</%s>\n", tag);
337 }
338 
339 /**
340  * gretl_xml_put_tagged_string:
341  * @tag: name to give string.
342  * @str: string to put.
343  * @fp: file to which to write.
344  *
345  * Write @str to @fp, enclosed in simple starting and ending
346  * tags specified by @tag.  If @str needs to have XML-special
347  * characters escaped, this will be done automatically.
348  * If @str is NULL, this is considered a no-op.
349  *
350  * Returns: 0 on success, non-zero error code on failure.
351  */
352 
gretl_xml_put_tagged_string(const char * tag,const char * str,PRN * prn)353 void gretl_xml_put_tagged_string (const char *tag, const char *str,
354 				  PRN *prn)
355 {
356     pprintf(prn, "<%s>", tag);
357     gretl_xml_put_string(str, prn);
358     pprintf(prn, "</%s>\n", tag);
359 }
360 
361 /**
362  * gretl_xml_put_tagged_string_plus:
363  * @tag: name to give string.
364  * @str: string to put.
365  * @attrib: name of attribute.
366  * @attval: value of attribute.
367  * @fp: file to which to write.
368  *
369  * Similar to gretl_xml_put_tagged_string(), but allows
370  * (in fact, requires) an attribute name and value along
371  * with the element name and value.
372  *
373  * Returns: 0 on success, non-zero error code on failure.
374  */
375 
gretl_xml_put_tagged_string_plus(const char * tag,const char * str,const char * attrib,const char * attval,PRN * prn)376 void gretl_xml_put_tagged_string_plus (const char *tag,
377 				       const char *str,
378 				       const char *attrib,
379 				       const char *attval,
380 				       PRN *prn)
381 {
382     pprintf(prn, "<%s %s=\"", tag, attrib);
383     gretl_xml_put_string(attval, prn);
384     pputs(prn, "\">");
385     gretl_xml_put_string(str, prn);
386     pprintf(prn, "</%s>\n", tag);
387 }
388 
389 /**
390  * gretl_list_serialize:
391  * @list: list of integers to be written.
392  * @name: name to give list, or NULL.
393  * @fp: file to which to write.
394  *
395  */
396 
gretl_list_serialize(const int * list,const char * name,PRN * prn)397 void gretl_list_serialize (const int *list, const char *name,
398 			   PRN *prn)
399 {
400     int i;
401 
402     if (list == NULL) {
403 	return;
404     }
405 
406     if (name == NULL) {
407 	pputs(prn, "<list>\n");
408     } else {
409 	pprintf(prn, "<list name=\"%s\">\n", name);
410     }
411 
412     for (i=0; i<=list[0]; i++) {
413 	pprintf(prn, "%d ", list[i]);
414     }
415     pputs(prn, "</list>\n");
416 }
417 
418 /**
419  * gretl_xml_put_tagged_list:
420  * @tag: tag in which list should be wrapped.
421  * @list: list of integers to be written.
422  * @fp: file to which to write.
423  *
424  */
425 
gretl_xml_put_tagged_list(const char * tag,const int * list,PRN * prn)426 void gretl_xml_put_tagged_list (const char *tag, const int *list,
427 				PRN *prn)
428 {
429     int i;
430 
431     if (list == NULL) {
432 	return;
433     }
434 
435     pprintf(prn, "<%s>\n", tag);
436     for (i=0; i<=list[0]; i++) {
437 	if (list[i] == LISTSEP) {
438 	    pputs(prn, "; ");
439 	} else {
440 	    pprintf(prn, "%d ", list[i]);
441 	}
442     }
443     pprintf(prn, "</%s>\n", tag);
444 }
445 
446 /**
447  * gretl_matrix_serialize:
448  * @m: matrix to be written.
449  * @name: name for matrix, or NULL.
450  * @fp: stream to which to print.
451  *
452  */
453 
gretl_matrix_serialize(const gretl_matrix * m,const char * name,PRN * prn)454 void gretl_matrix_serialize (const gretl_matrix *m,
455 			     const char *name,
456 			     PRN *prn)
457 {
458     int is_complex = 0;
459     const char **S;
460     double x;
461     int i, j;
462 
463     if (m == NULL) {
464 	return;
465     }
466 
467     if (name == NULL) {
468 	pprintf(prn, "<gretl-matrix rows=\"%d\" cols=\"%d\"\n",
469 			  m->rows, m->cols);
470     } else {
471 	pprintf(prn, "<gretl-matrix name=\"%s\" rows=\"%d\" cols=\"%d\"",
472 			  name, m->rows, m->cols);
473     }
474 
475     if (m->is_complex) {
476 	is_complex = 1;
477 	pputs(prn, " complex=\"1\"");
478     }
479 
480     if (gretl_matrix_is_dated(m)) {
481 	int mt1 = gretl_matrix_get_t1(m);
482 	int mt2 = gretl_matrix_get_t2(m);
483 
484 	pprintf(prn, " t1=\"%d\" t2=\"%d\"", mt1, mt2);
485     }
486 
487     S = gretl_matrix_get_colnames(m);
488 
489     if (S != NULL) {
490 	pputs(prn, " colnames=\"");
491 	for (j=0; j<m->cols; j++) {
492 	    pputs(prn, S[j]);
493 	    pputs(prn, (j < m->cols - 1)? " " : "\"");
494 	}
495     }
496 
497     S = gretl_matrix_get_rownames(m);
498 
499     if (S != NULL) {
500 	pputs(prn, " rownames=\"");
501 	for (j=0; j<m->rows; j++) {
502 	    pputs(prn, S[j]);
503 	    pputs(prn, (j < m->rows - 1)? " " : "\"");
504 	}
505     }
506 
507     pputs(prn, ">\n");
508 
509     if (is_complex) {
510 	gretl_matrix_set_complex_full((gretl_matrix *) m, 0);
511     }
512 
513     for (i=0; i<m->rows; i++) {
514 	for (j=0; j<m->cols; j++) {
515 	    x = gretl_matrix_get(m, i, j);
516 #ifdef WIN32
517 	    if (na(x)) {
518 		win32_pprint_nonfinite(prn, x, ' ');
519 		continue;
520 	    }
521 #endif
522 	    pprintf(prn, "%.17g ", x);
523 	}
524 	pputs(prn, "\n");
525     }
526 
527     if (is_complex) {
528 	gretl_matrix_set_complex_full((gretl_matrix *) m, 1);
529     }
530 
531     pputs(prn, "</gretl-matrix>\n");
532 }
533 
534 /**
535  * gretl_finite_scalar_serialize:
536  * @x: numerical value.
537  * @name: name of object.
538  * @fp: stream for writing.
539  *
540  * Note: not suitable for writing non-finite values.
541  */
542 
gretl_finite_scalar_serialize(double x,const char * name,PRN * prn)543 void gretl_finite_scalar_serialize (double x, const char *name, PRN *prn)
544 {
545     pprintf(prn, "<scalar name=\"%s\" value=\"%.17g\"/>\n", name, x);
546 }
547 
548 /**
549  * gretl_string_serialize:
550  * @s: UTF-8 string.
551  * @name: name of object.
552  * @fp: stream for writing.
553  *
554  */
555 
gretl_string_serialize(const char * s,const char * name,PRN * prn)556 void gretl_string_serialize (const char *s, const char *name, PRN *prn)
557 {
558     pprintf(prn, "<string name=\"%s\" value=\"%s\"/>\n", name, s);
559 }
560 
561 /**
562  * gretl_xml_get_prop_as_int:
563  * @node: XML node pointer.
564  * @tag: name by which integer property is known.
565  * @i: location to write int value.
566  *
567  * Returns: 1 if an int is found and read successfully, 0
568  * otherwise.
569  */
570 
gretl_xml_get_prop_as_int(xmlNodePtr node,const char * tag,int * i)571 int gretl_xml_get_prop_as_int (xmlNodePtr node, const char *tag,
572 			       int *i)
573 {
574     xmlChar *tmp = xmlGetProp(node, (XUC) tag);
575     int ret = 0;
576 
577     if (tmp != NULL) {
578 	ret = sscanf((const char *) tmp, "%d", i);
579 	free(tmp);
580     }
581 
582     return ret;
583 }
584 
585 /**
586  * gretl_xml_get_prop_as_opt:
587  * @node: XML node pointer.
588  * @tag: name by which integer property is known.
589  * @o: location to write gretlopt value.
590  *
591  * Returns: 1 if an int is found and read successfully, 0
592  * otherwise.
593  */
594 
gretl_xml_get_prop_as_opt(xmlNodePtr node,const char * tag,gretlopt * o)595 int gretl_xml_get_prop_as_opt (xmlNodePtr node, const char *tag,
596 			       gretlopt *o)
597 {
598     xmlChar *tmp = xmlGetProp(node, (XUC) tag);
599     int ret = 0;
600 
601     if (tmp != NULL) {
602 	int i;
603 
604 	ret = sscanf((const char *) tmp, "%d", &i);
605 	*o = (gretlopt) i;
606 	free(tmp);
607     }
608 
609     return ret;
610 }
611 
612 /**
613  * gretl_xml_get_prop_as_unsigned_int:
614  * @node: XML node pointer.
615  * @tag: name by which unsigned integer property is known.
616  * @u: location to write value.
617  *
618  * Returns: 1 if an unsigned int is found and read successfully, 0
619  * otherwise.
620  */
621 
622 unsigned int
gretl_xml_get_prop_as_unsigned_int(xmlNodePtr node,const char * tag,unsigned int * u)623 gretl_xml_get_prop_as_unsigned_int (xmlNodePtr node,
624 				    const char *tag,
625 				    unsigned int *u)
626 {
627     xmlChar *tmp = xmlGetProp(node, (XUC) tag);
628     int ret = 0;
629 
630     if (tmp != NULL) {
631 	ret = sscanf((const char *) tmp, "%u", u);
632 	free(tmp);
633     }
634 
635     return ret;
636 }
637 
638 /**
639  * gretl_xml_get_prop_as_uchar:
640  * @node: XML node pointer.
641  * @tag: name by which unsigned character property is known.
642  * @u: location to write value.
643  *
644  * Returns: 1 if an unsigned char is found and read successfully, 0
645  * otherwise.
646  */
647 
gretl_xml_get_prop_as_uchar(xmlNodePtr node,const char * tag,unsigned char * u)648 int gretl_xml_get_prop_as_uchar (xmlNodePtr node, const char *tag,
649 				 unsigned char *u)
650 {
651     xmlChar *tmp = xmlGetProp(node, (XUC) tag);
652     int ret = 0;
653 
654     if (tmp != NULL) {
655 	*u = (unsigned char) atoi((const char *) tmp);
656 	free(tmp);
657 	ret = 1;
658     }
659 
660     return ret;
661 }
662 
663 /**
664  * gretl_xml_get_prop_as_double:
665  * @node: XML node pointer.
666  * @tag: name by which floating-point property is known.
667  * @x: location to write double value.
668  *
669  * Returns: 1 if a double is found and read successfully, 0
670  * otherwise.
671  */
672 
gretl_xml_get_prop_as_double(xmlNodePtr node,const char * tag,double * x)673 int gretl_xml_get_prop_as_double (xmlNodePtr node, const char *tag,
674 				  double *x)
675 {
676     char *p, *s = (char *) xmlGetProp(node, (XUC) tag);
677     int ret = 0;
678 
679     *x = NADBL;
680 
681     if (s != NULL) {
682 	p = s;
683 	p += strspn(p, " \r\n");
684 	if (strncmp(p, "NA", 2)) {
685 	    *x = atof(p);
686 	}
687 	free(s);
688 	ret = 1;
689     }
690 
691     return ret;
692 }
693 
694 /**
695  * gretl_xml_get_prop_as_string:
696  * @node: XML node pointer.
697  * @tag: name by which string property is known.
698  * @pstr: location to assign string.
699  *
700  * Returns: 1 if a string is found and read successfully, 0
701  * otherwise.
702  */
703 
gretl_xml_get_prop_as_string(xmlNodePtr node,const char * tag,char ** pstr)704 int gretl_xml_get_prop_as_string (xmlNodePtr node, const char *tag,
705 				  char **pstr)
706 {
707     xmlChar *tmp = xmlGetProp(node, (XUC) tag);
708     int ret = 0;
709 
710     if (tmp != NULL) {
711 	*pstr = (char *) tmp;
712 	ret = 1;
713     }
714 
715     return ret;
716 }
717 
718 /**
719  * gretl_xml_get_type_property:
720  * @node: XML node pointer.
721  *
722  * Returns: the gretl type identifier associated with the
723  * property tagged "type" on @node, or 0 on failure.
724  */
725 
gretl_xml_get_type_property(xmlNodePtr node)726 GretlType gretl_xml_get_type_property (xmlNodePtr node)
727 {
728     char *tmp = (char *) xmlGetProp(node, (XUC) "type");
729     GretlType type = 0;
730 
731     if (tmp != NULL) {
732 	if (!strncmp(tmp, "gretl-", 6)) {
733 	    /* allow, e.g. "gretl-matrix" for "matrix" */
734 	    type = gretl_type_from_string(tmp + 6);
735 	} else {
736 	    type = gretl_type_from_string(tmp);
737 	}
738 	free(tmp);
739     }
740 
741     return type;
742 }
743 
744 /**
745  * gretl_xml_get_prop_as_bool:
746  * @node: XML node pointer.
747  * @tag: name by which property is known.
748  *
749  * Returns: 1 if the named property is found and has value %true,
750  * else 0.
751  */
752 
gretl_xml_get_prop_as_bool(xmlNodePtr node,const char * tag)753 int gretl_xml_get_prop_as_bool (xmlNodePtr node, const char *tag)
754 {
755     xmlChar *tmp = xmlGetProp(node, (XUC) tag);
756     int ret = 0;
757 
758     if (tmp != NULL) {
759 	if (!strcmp((char *) tmp, "true") ||
760 	    !strcmp((char *) tmp, "1")) {
761 	    ret = 1;
762 	}
763 	free(tmp);
764     }
765 
766     return ret;
767 }
768 
769 /**
770  * gretl_xml_node_get_int:
771  * @node: XML node pointer.
772  * @doc: XML document pointer.
773  * @i: location to receive integer.
774  *
775  * Returns: 1 if an int is found and read successfully, 0
776  * otherwise.
777  */
778 
gretl_xml_node_get_int(xmlNodePtr node,xmlDocPtr doc,int * i)779 int gretl_xml_node_get_int (xmlNodePtr node, xmlDocPtr doc, int *i)
780 {
781     xmlChar *tmp;
782     int ret = 0;
783 
784     tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
785 
786     if (tmp != NULL) {
787 	*i = atoi((const char *) tmp);
788 	free(tmp);
789 	ret = 1;
790     }
791 
792     return ret;
793 }
794 
795 /**
796  * gretl_xml_node_get_unsigned:
797  * @node: XML node pointer.
798  * @doc: XML document pointer.
799  * @i: location to receive integer.
800  *
801  * Returns: 1 if an unsigned int is found and read successfully, 0
802  * otherwise.
803  */
804 
gretl_xml_node_get_unsigned(xmlNodePtr node,xmlDocPtr doc,unsigned int * u)805 int gretl_xml_node_get_unsigned (xmlNodePtr node, xmlDocPtr doc,
806 				 unsigned int *u)
807 {
808     xmlChar *tmp;
809     int ret = 0;
810 
811     tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
812 
813     if (tmp != NULL) {
814 	int n = sscanf((const char *) tmp, "%u", u);
815 
816 	free(tmp);
817 	ret = (n == 1);
818     }
819 
820     return ret;
821 }
822 
823 /**
824  * gretl_xml_node_get_double:
825  * @node: XML node pointer.
826  * @doc: XML document pointer.
827  * @x: location to receive double.
828  *
829  * Returns: 1 if a double is found and read successfully, 0
830  * otherwise.
831  */
832 
gretl_xml_node_get_double(xmlNodePtr node,xmlDocPtr doc,double * x)833 int gretl_xml_node_get_double (xmlNodePtr node, xmlDocPtr doc,
834 			       double *x)
835 {
836     char *s, *p;
837     int ret = 0;
838 
839     s = (char *) xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
840 
841     if (s != NULL) {
842 	p = s;
843 	p += strspn(p, " \r\n");
844 	if (!strncmp(p, "NA", 2)) {
845 	    *x = NADBL;
846 	} else {
847 	    *x = atof(p);
848 	}
849 	free(s);
850 	ret = 1;
851     }
852 
853     return ret;
854 }
855 
856 /**
857  * gretl_xml_node_get_string:
858  * @node: XML node pointer.
859  * @doc: XML document pointer.
860  * @pstr: location to receive string.
861  *
862  * Returns: 1 if a string is found and read successfully, 0
863  * otherwise.
864  */
865 
gretl_xml_node_get_string(xmlNodePtr node,xmlDocPtr doc,char ** pstr)866 int gretl_xml_node_get_string (xmlNodePtr node, xmlDocPtr doc,
867 			       char **pstr)
868 {
869     xmlChar *tmp;
870     int ret = 0;
871 
872     tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
873 
874     if (tmp != NULL) {
875 	*pstr = (char *) tmp;
876 	ret = 1;
877     }
878 
879     return ret;
880 }
881 
882 /**
883  * gretl_xml_get_string:
884  * @node: XML node pointer.
885  * @doc: XML document pointer.
886  *
887  * Returns: allocated copy of the string content of @node, or
888  * NULL on error.
889  */
890 
gretl_xml_get_string(xmlNodePtr node,xmlDocPtr doc)891 char *gretl_xml_get_string (xmlNodePtr node, xmlDocPtr doc)
892 {
893     return (char *) xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
894 }
895 
896 /**
897  * gretl_xml_node_get_trimmed_string:
898  * @node: XML node pointer.
899  * @doc: XML document pointer.
900  * @pstr: location to receive string.
901  *
902  * Reads a string from @node and trims both leading and trailing
903  * white space.
904  *
905  * Returns: 1 if a string is found and read successfully, 0
906  * otherwise.
907  */
908 
gretl_xml_node_get_trimmed_string(xmlNodePtr node,xmlDocPtr doc,char ** pstr)909 int gretl_xml_node_get_trimmed_string (xmlNodePtr node, xmlDocPtr doc,
910 				       char **pstr)
911 {
912     char *tmp;
913     char *s;
914     int i, len, ret = 0;
915 
916     tmp = (char *) xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
917 
918     if (tmp != NULL) {
919 	s = tmp;
920 	s += strspn(s, " \t\n\r");
921 	len = strlen(s);
922 	for (i=len-1; i>=0; i--) {
923 	    if (s[i] == ' ' || s[i] == '\t' ||
924 		s[i] == '\r' || s[i] == '\n') {
925 		len--;
926 	    } else {
927 		break;
928 	    }
929 	}
930 	*pstr = gretl_strndup(s, len);
931 	if (*pstr != NULL) {
932 	    ret = 1;
933 	}
934 	free(tmp);
935     } else {
936 	/* empty string */
937 	*pstr = gretl_strdup("");
938 	if (*pstr != NULL) {
939 	    ret = 1;
940 	}
941     }
942 
943     return ret;
944 }
945 
946 /**
947  * gretl_xml_get_list:
948  * @node: XML node pointer.
949  * @doc: XML document pointer.
950  * @err: location to receive error code.
951  *
952  * Returns: allocated list read from @node, or %NULL on
953  * failure.
954  */
955 
gretl_xml_get_list(xmlNodePtr node,xmlDocPtr doc,int * err)956 int *gretl_xml_get_list (xmlNodePtr node, xmlDocPtr doc, int *err)
957 {
958     xmlChar *tmp;
959     const char *p;
960     int *list = NULL;
961     int i, n;
962 
963     tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
964 
965     if (tmp == NULL) {
966 	*err = E_DATA;
967     } else {
968 	p = (const char *) tmp;
969 	p += strspn(p, " \r\n"); /* skip space (get to first value) */
970 	if (sscanf(p, "%d", &n) != 1) {
971 	    *err = E_DATA;
972 	} else if (n == 0) {
973 	    list = gretl_null_list();
974 	    free(tmp);
975 	    return list;
976 	} else if (n < 0) {
977 	    *err = E_DATA;
978 	} else {
979 	    p += strcspn(p, " \r\n"); /* skip non-space (get beyond value) */
980 	    list = gretl_list_new(n);
981 	    if (list == NULL) {
982 		*err = E_ALLOC;
983 	    }
984 	}
985 
986 	if (list != NULL && !*err) {
987 	    for (i=1; i<=n && !*err; i++) {
988 		p += strspn(p, " \r\n"); /* skip space (get to next value) */
989 		if (*p == ';') {
990 		    list[i] = LISTSEP;
991 		} else if (sscanf(p, "%d", &list[i]) != 1) {
992 		    *err = E_DATA;
993 		}
994 		p += strcspn(p, " \r\n"); /* skip non-space (get beyond value) */
995 	    }
996 	}
997 
998 	free(tmp);
999     }
1000 
1001     if (list != NULL && *err) {
1002 	free(list);
1003 	list = NULL;
1004     }
1005 
1006     return list;
1007 }
1008 
1009 /**
1010  * gretl_xml_child_get_string:
1011  * @node: XML node pointer.
1012  * @doc: XML document pointer.
1013  * @name: name of child node.
1014  * @pstr: location to receive string.
1015  *
1016  * Returns: 1 if a string is found and read successfully, 0
1017  * otherwise.
1018  */
1019 
gretl_xml_child_get_string(xmlNodePtr node,xmlDocPtr doc,const char * name,char ** pstr)1020 int gretl_xml_child_get_string (xmlNodePtr node, xmlDocPtr doc,
1021 				const char *name, char **pstr)
1022 {
1023     xmlNodePtr cur;
1024     xmlChar *tmp;
1025     int ret = 0;
1026 
1027     *pstr = NULL;
1028 
1029     cur = node->xmlChildrenNode;
1030 
1031     while (cur != NULL) {
1032 	if (!xmlStrcmp(cur->name, (XUC) name)) {
1033 	    tmp = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
1034 	    if (tmp != NULL) {
1035 		*pstr = (char *) tmp;
1036 		ret = 1;
1037 	    }
1038 	    break;
1039 	}
1040 	cur = cur->next;
1041     }
1042 
1043     return ret;
1044 }
1045 
1046 
1047 #define SMALLVAL(x) (x > -1e-40 && x < 1e-40)
1048 
gretl_xml_get_array(xmlNodePtr node,xmlDocPtr doc,GretlType type,int * nelem,int * err)1049 static void *gretl_xml_get_array (xmlNodePtr node, xmlDocPtr doc,
1050 				  GretlType type,
1051 				  int *nelem, int *err)
1052 {
1053     xmlChar *tmp = xmlGetProp(node, (XUC) "count");
1054     int *ivals = NULL;
1055     double *xvals = NULL;
1056     cmplx *cvals = NULL;
1057     void *ptr = NULL;
1058     int nread = 0;
1059     int i, n = 0;
1060 
1061     *nelem = 0;
1062 
1063     if (tmp == NULL) {
1064 	tmp = xmlGetProp(node, (XUC) "size");
1065     }
1066 
1067     if (tmp == NULL) {
1068 	fprintf(stderr, "gretl_xml_get_array: didn't find count\n");
1069 	*err = E_DATA;
1070 	return NULL;
1071     }
1072 
1073     n = atoi((const char *) tmp);
1074     free(tmp);
1075 
1076     if (n <= 0) {
1077 	return NULL;
1078     }
1079 
1080     if (type == GRETL_TYPE_INT_ARRAY) {
1081 	ivals = malloc(n * sizeof *ivals);
1082 	ptr = ivals;
1083     } else if (type == GRETL_TYPE_DOUBLE_ARRAY) {
1084 	xvals = malloc(n * sizeof *xvals);
1085 	ptr = xvals;
1086     } else if (type == GRETL_TYPE_CMPLX_ARRAY) {
1087 	cvals = malloc(n * sizeof *cvals);
1088 	ptr = cvals;
1089     }
1090 
1091     if (ptr == NULL) {
1092 	*err = E_ALLOC;
1093 	return NULL;
1094     }
1095 
1096     tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
1097 
1098     if (tmp == NULL) {
1099 	*err = E_DATA;
1100     } else {
1101 	const char *s = (const char *) tmp;
1102 	char *test;
1103 
1104 	errno = 0;
1105 
1106 	if (type == GRETL_TYPE_DOUBLE_ARRAY) {
1107 	    double x;
1108 
1109 	    for (i=0; i<n && !*err && *s; i++) {
1110 		while (isspace(*s)) s++;
1111 		x = strtod(s, &test);
1112 #ifdef WIN32
1113 		/* remedial code for "1.#QNAN" */
1114 		if (!strncmp(test, "#QNAN", 5)) {
1115 		    x = NADBL;
1116 		    s = test + 5;
1117 		} else
1118 #endif
1119 		if (!strncmp(test, "NA", 2)) {
1120 		    x = NADBL;
1121 		    s = test + 2;
1122 		} else {
1123 		    s = test;
1124 		    if (*s != '\0' && !isspace(*s)) {
1125 			*err = E_DATA;
1126 		    } else if (errno) {
1127 			perror(NULL);
1128 			if (!SMALLVAL(x)) {
1129 			    x = NADBL;
1130 			}
1131 			errno = 0;
1132 		    }
1133 		}
1134 		xvals[i] = x;
1135 		nread++;
1136 	    }
1137 	} else if (type == GRETL_TYPE_INT_ARRAY) {
1138 	    long kl;
1139 
1140 	    for (i=0; i<n && !*err && *s; i++) {
1141 		while (isspace(*s)) s++;
1142 		kl = strtol(s, &test, 10);
1143 		if (errno) {
1144 		    *err = E_DATA;
1145 		} else if (*test != '\0' && !isspace(*test)) {
1146 		    *err = E_DATA;
1147 		} else {
1148 		    s = test;
1149 		    ivals[i] = kl;
1150 		    nread++;
1151 		}
1152 	    }
1153 	} else if (type == GRETL_TYPE_CMPLX_ARRAY) {
1154 	    double x;
1155 	    int n2 = n * 2;
1156 	    int rval = 1;
1157 
1158 	    for (i=0; i<n2 && !*err && *s; i++) {
1159 		while (isspace(*s)) s++;
1160 		x = strtod(s, &test);
1161 		if (!strncmp(test, "NA", 2)) {
1162 		    x = NADBL;
1163 		    test += 2;
1164 		} else if (errno) {
1165 		    if (SMALLVAL(x)) {
1166 			errno = 0;
1167 		    } else {
1168 			perror(NULL);
1169 			*err = E_DATA;
1170 		    }
1171 		} else if (*test != '\0' && !isspace(*test)) {
1172 		    *err = E_DATA;
1173 		}
1174 		if (!*err) {
1175 		    s = test;
1176 		    if (rval) {
1177 			cvals[nread].r = x;
1178 			rval = 0;
1179 		    } else {
1180 			cvals[nread].i = x;
1181 			rval = 1;
1182 			nread++;
1183 		    }
1184 		}
1185 	    }
1186 	}
1187 
1188 	free(tmp);
1189 
1190 	if (nread < n) {
1191 	    fprintf(stderr, "expected %d items in array, but got %d\n", n, nread);
1192 	    *err = E_DATA;
1193 	}
1194     }
1195 
1196     if (ptr != NULL && *err) {
1197 	free(ptr);
1198 	ptr = NULL;
1199     }
1200 
1201     if (!*err) {
1202 	*nelem = n;
1203     }
1204 
1205     return ptr;
1206 }
1207 
1208 /**
1209  * gretl_xml_get_int_array:
1210  * @node: XML node pointer.
1211  * @doc: XML document pointer.
1212  * @nelem: location to receive number of elements in array.
1213  * @err: location to receive error code.
1214  *
1215  * Returns: allocated array of integers read from @node, or %NULL on
1216  * failure.
1217  */
1218 
gretl_xml_get_int_array(xmlNodePtr node,xmlDocPtr doc,int * nelem,int * err)1219 int *gretl_xml_get_int_array (xmlNodePtr node, xmlDocPtr doc,
1220 			      int *nelem, int *err)
1221 {
1222     return gretl_xml_get_array(node, doc, GRETL_TYPE_INT_ARRAY,
1223 			       nelem, err);
1224 }
1225 
1226 /**
1227  * gretl_xml_get_double_array:
1228  * @node: XML node pointer.
1229  * @doc: XML document pointer.
1230  * @nelem: location to receive number of elements in array.
1231  * @err: location to receive error code.
1232  *
1233  * Returns: allocated array of doubles read from @node, or %NULL on
1234  * failure.
1235  */
1236 
gretl_xml_get_double_array(xmlNodePtr node,xmlDocPtr doc,int * nelem,int * err)1237 double *gretl_xml_get_double_array (xmlNodePtr node, xmlDocPtr doc,
1238 				    int *nelem, int *err)
1239 {
1240     int myerr = 0;
1241 
1242     if (err == NULL) {
1243 	err = &myerr;
1244     }
1245 
1246     return gretl_xml_get_array(node, doc, GRETL_TYPE_DOUBLE_ARRAY,
1247 			       nelem, err);
1248 }
1249 
1250 /**
1251  * gretl_xml_get_cmplx_array:
1252  * @node: XML node pointer.
1253  * @doc: XML document pointer.
1254  * @nelem: location to receive number of elements in array.
1255  * @err: location to receive error code.
1256  *
1257  * Returns: allocated array of cmplx (complex numbers) read from
1258  * @node, or %NULL on failure.
1259  */
1260 
gretl_xml_get_cmplx_array(xmlNodePtr node,xmlDocPtr doc,int * nelem,int * err)1261 cmplx *gretl_xml_get_cmplx_array (xmlNodePtr node, xmlDocPtr doc,
1262 				  int *nelem, int *err)
1263 {
1264     return gretl_xml_get_array(node, doc, GRETL_TYPE_CMPLX_ARRAY,
1265 			       nelem, err);
1266 }
1267 
chunk_strdup(const char * src,const char ** ptr,int * err)1268 static char *chunk_strdup (const char *src, const char **ptr, int *err)
1269 {
1270     char *targ = NULL;
1271 
1272     if (*src == '\0') {
1273 	*ptr = src;
1274     } else {
1275 	const char *p;
1276 	int len = 0;
1277 
1278 	src += strspn(src, " \n");
1279 	p = src;
1280 
1281 	if (*src == '"') {
1282 	    p = ++src;
1283 	    while (*src && *src != '"') {
1284 		len++;
1285 		src++;
1286 	    }
1287 	    if (*src == '"') {
1288 		src++;
1289 	    }
1290 	} else {
1291 	    while (*src && !isspace(*src)) {
1292 		len++;
1293 		src++;
1294 	    }
1295 	}
1296 
1297 	if (ptr != NULL) {
1298 	    *ptr = src;
1299 	}
1300 
1301 	if (len > 0) {
1302 	    targ = gretl_strndup(p, len);
1303 	    if (targ == NULL) {
1304 		*err = E_ALLOC;
1305 	    }
1306 	}
1307     }
1308 
1309     if (targ == NULL && !*err) {
1310 	*err = E_DATA;
1311     }
1312 
1313     return targ;
1314 }
1315 
1316 /**
1317  * gretl_xml_get_strings_array:
1318  * @node: XML node pointer.
1319  * @doc: XML document pointer.
1320  * @nelem: location to receive number of elements in array.
1321  * @slop: if non-zero, allow the number of strings to fall
1322  * short of the recorded string count by one.
1323  * @err: location to receive error code.
1324  *
1325  * Returns: allocated array of strings read from @node, or
1326  * %NULL on failure.
1327  */
1328 
gretl_xml_get_strings_array(xmlNodePtr node,xmlDocPtr doc,int * nelem,int slop,int * err)1329 char **gretl_xml_get_strings_array (xmlNodePtr node, xmlDocPtr doc,
1330 				    int *nelem, int slop, int *err)
1331 {
1332     xmlChar *tmp = xmlGetProp(node, (XUC) "count");
1333     char **S = NULL;
1334     const char *p;
1335     int i, n = 0;
1336 
1337     if (tmp == NULL) {
1338 	*err = E_DATA;
1339 	return NULL;
1340     }
1341 
1342     n = atoi((const char *) tmp);
1343     free(tmp);
1344 
1345 #if GDT_DEBUG
1346     fprintf(stderr, "gretl_xml_get_strings_array: count=%d\n", n);
1347 #endif
1348 
1349     if (n > 0) {
1350 	S = strings_array_new(n);
1351 	if (S == NULL) {
1352 	    *err = E_ALLOC;
1353 	} else {
1354 	    tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
1355 	    if (tmp == NULL) {
1356 		fprintf(stderr, "xmlNodeListGetString failed\n");
1357 		*err = E_DATA;
1358 	    } else {
1359 		p = (const char *) tmp;
1360 		for (i=0; i<n && !*err; i++) {
1361 		    S[i] = chunk_strdup(p, &p, err);
1362 		    if (*err == E_DATA) {
1363 			if (i == n - 1 && slop) {
1364 			    /* somebody got off by one somewhere? */
1365 			    *err = 0;
1366 			    n--;
1367 			} else {
1368 			    /* treat as non-fatal? */
1369 			    fprintf(stderr, "repairing missing string value!\n");
1370 			    S[i] = gretl_strdup("empty string");
1371 			    *err = 0;
1372 			}
1373 		    }
1374 		}
1375 		free(tmp);
1376 	    }
1377 	}
1378     }
1379 
1380     if (S != NULL && *err) {
1381 	strings_array_free(S, n);
1382 	S = NULL;
1383     }
1384 
1385     if (!*err) {
1386 	*nelem = n;
1387     }
1388 
1389     return S;
1390 }
1391 
1392 /**
1393  * gretl_xml_child_get_strings_array:
1394  * @node: XML node pointer.
1395  * @doc: XML document pointer.
1396  * @name: name of child node.
1397  * @pstrs: location to receive strings array.
1398  * @nstrs: location to receive number of strings.
1399  *
1400  * Returns: 1 if an array of strings is found and read successfully,
1401  * 0 otherwise.
1402  */
1403 
gretl_xml_child_get_strings_array(xmlNodePtr node,xmlDocPtr doc,const char * name,char *** pstrs,int * nstrs)1404 int gretl_xml_child_get_strings_array (xmlNodePtr node, xmlDocPtr doc,
1405 				       const char *name, char ***pstrs,
1406 				       int *nstrs)
1407 {
1408     xmlNodePtr cur = node->xmlChildrenNode;
1409     int ret = 0;
1410 
1411     while (cur != NULL) {
1412 	if (!xmlStrcmp(cur->name, (XUC) name)) {
1413 	    int err = 0;
1414 
1415 	    *pstrs = gretl_xml_get_strings_array(cur, doc, nstrs, 0, &err);
1416 	    ret = !err;
1417 	    break;
1418 	}
1419 	cur = cur->next;
1420     }
1421 
1422     return ret;
1423 }
1424 
get_matrix_values_via_file(gretl_matrix * m,const char * s)1425 static int get_matrix_values_via_file (gretl_matrix *m, const char *s)
1426 {
1427     gchar *fname;
1428     FILE *fp;
1429     int err = 0;
1430 
1431     fname = gretl_make_dotpath("matrix.xml.XXXXXX");
1432     if (fname == NULL) {
1433 	return E_ALLOC;
1434     }
1435 
1436     fp = gretl_mktemp(fname, "wb");
1437     if (fp == NULL) {
1438 	g_free(fname);
1439 	return E_FOPEN;
1440     }
1441 
1442     fputs(s, fp);
1443     fclose(fp);
1444     fp = gretl_fopen(fname, "r");
1445 
1446     if (fp == NULL) {
1447 	err = E_FOPEN;
1448     } else {
1449 	double x;
1450 	int i, j;
1451 
1452 	for (i=0; i<m->rows && !err; i++) {
1453 	    for (j=0; j<m->cols && !err; j++) {
1454 		if (fscanf(fp, "%lf", &x) != 1) {
1455 #ifdef WIN32
1456 		    x = win32_fscan_nonfinite(fp, &err);
1457 		    if (!err) {
1458 			gretl_matrix_set(m, i, j, x);
1459 		    }
1460 #else
1461 		    err = E_DATA;
1462 #endif
1463 		} else {
1464 		    gretl_matrix_set(m, i, j, x);
1465 		}
1466 	    }
1467 	}
1468 
1469 	fclose(fp);
1470     }
1471 
1472     remove(fname);
1473     g_free(fname);
1474 
1475     return err;
1476 }
1477 
maybe_add_matrix_labels(gretl_matrix * m,const char * s,int byrow)1478 static int maybe_add_matrix_labels (gretl_matrix *m,
1479 				    const char *s,
1480 				    int byrow)
1481 {
1482     int n, err = 0;
1483 
1484     n = (byrow)? m->rows : m->cols;
1485 
1486     if (s != NULL && *s != '\0') {
1487 	char **S;
1488 	int ns;
1489 
1490 	S = gretl_string_split(s, &ns, " \n\t");
1491 	if (S == NULL) {
1492 	    err = E_ALLOC;
1493 	} else if (ns != n) {
1494 	    err = E_NONCONF;
1495 	    strings_array_free(S, ns);
1496 	} else if (byrow) {
1497 	    gretl_matrix_set_rownames(m, S);
1498 	} else {
1499 	    gretl_matrix_set_colnames(m, S);
1500 	}
1501     }
1502 
1503     return err;
1504 }
1505 
1506 /**
1507  * gretl_xml_get_matrix:
1508  * @node: XML node pointer.
1509  * @doc: XML document pointer.
1510  * @err: location to receive error code.
1511  *
1512  * Returns: allocated gretl matrix read from @node, or %NULL
1513  * on failure.
1514  */
1515 
gretl_xml_get_matrix(xmlNodePtr node,xmlDocPtr doc,int * err)1516 gretl_matrix *gretl_xml_get_matrix (xmlNodePtr node,
1517 				    xmlDocPtr doc,
1518 				    int *err)
1519 {
1520     gretl_matrix *m = NULL;
1521     char *names = NULL;
1522     xmlChar *tmp = NULL;
1523     const char *p;
1524     double x;
1525     int rows = 0, cols = 0;
1526     int t1 = -1, t2 = -1;
1527     int is_complex = 0;
1528     int i, j;
1529 
1530     if (!gretl_xml_get_prop_as_int(node, "rows", &rows) ||
1531 	!gretl_xml_get_prop_as_int(node, "cols", &cols)) {
1532 	*err = E_DATA;
1533 	return NULL;
1534     }
1535 
1536     if (gretl_xml_get_prop_as_bool(node, "complex")) {
1537 	is_complex = 1;
1538 	rows *= 2; /* 8-byte indexing */
1539     }
1540 
1541     if (rows == 0 || cols == 0) {
1542 	/* allow case of empty matrix */
1543 	if (rows == 0 && cols == 0) {
1544 	    m = gretl_null_matrix_new();
1545 	} else {
1546 	    m = gretl_matrix_alloc(rows, cols);
1547 	}
1548 	if (m == NULL) {
1549 	    *err = E_ALLOC;
1550 	}
1551 	return m;
1552     } else if (rows <= 0 || cols <= 0) {
1553 	*err = E_DATA;
1554 	return NULL;
1555     }
1556 
1557     gretl_xml_get_prop_as_int(node, "t1", &t1);
1558     gretl_xml_get_prop_as_int(node, "t2", &t2);
1559 
1560     m = gretl_matrix_alloc(rows, cols);
1561     if (m == NULL) {
1562 	*err = E_ALLOC;
1563 	return NULL;
1564     }
1565 
1566     names = (char *) xmlGetProp(node, (XUC) "colnames");
1567     if (names != NULL) {
1568 	maybe_add_matrix_labels(m, names, 0);
1569     }
1570 
1571     names = (char *) xmlGetProp(node, (XUC) "rownames");
1572     if (names != NULL) {
1573 	maybe_add_matrix_labels(m, names, 1);
1574     }
1575 
1576     tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
1577     if (tmp == NULL) {
1578 	gretl_matrix_free(m);
1579 	*err = E_DATA;
1580 	return NULL;
1581     }
1582 
1583     p = (const char *) tmp;
1584 
1585     gretl_push_c_numeric_locale();
1586 
1587     if (rows * cols > 5000) {
1588 	/* it's relatively slow to crawl along the string holding
1589 	   many matrix elements using sscanf plus str* functions
1590 	*/
1591 	*err = get_matrix_values_via_file(m, p);
1592     } else {
1593 	p += strspn(p, " \r\n");
1594 	for (i=0; i<rows && !*err; i++) {
1595 	    for (j=0; j<cols && !*err; j++) {
1596 		if (sscanf(p, "%lf", &x) != 1) {
1597 #ifdef WIN32
1598 		    x = win32_sscan_nonfinite(p, err);
1599 		    if (!*err) {
1600 			gretl_matrix_set(m, i, j, x);
1601 		    }
1602 #else
1603 		    *err = E_DATA;
1604 #endif
1605 		} else {
1606 		    gretl_matrix_set(m, i, j, x);
1607 		}
1608 		p += strspn(p, " \r\n");
1609 		p += strcspn(p, " \r\n");
1610 	    }
1611 	}
1612     }
1613 
1614     gretl_pop_c_numeric_locale();
1615 
1616     free(tmp);
1617 
1618     if (*err) {
1619 	gretl_matrix_free(m);
1620 	m = NULL;
1621     } else {
1622 	if (is_complex) {
1623 	    gretl_matrix_set_complex_full(m, 1);
1624 	}
1625 	if (t1 >= 0 && t2 >= t1) {
1626 	    gretl_matrix_set_t1(m, t1);
1627 	    gretl_matrix_set_t2(m, t2);
1628 	}
1629     }
1630 
1631     return m;
1632 }
1633 
1634 /**
1635  * gretl_xml_get_submask:
1636  * @node: XML node pointer.
1637  * @doc: XML document pointer.
1638  * @pmask: location to receive allocated mask.
1639  *
1640  * Returns: 0 on success, non-zero on failure.
1641  */
1642 
gretl_xml_get_submask(xmlNodePtr node,xmlDocPtr doc,char ** pmask)1643 int gretl_xml_get_submask (xmlNodePtr node, xmlDocPtr doc, char **pmask)
1644 {
1645     char *mask = NULL;
1646     int i, len;
1647     int err = 0;
1648 
1649     if (!gretl_xml_get_prop_as_int(node, "length", &len)) {
1650 	return 1;
1651     }
1652 
1653     if (len == 0) {
1654 	*pmask = RESAMPLED;
1655 	return 0;
1656     }
1657 
1658     mask = calloc(len, 1);
1659 
1660     if (mask == NULL) {
1661 	err = 1;
1662     } else {
1663 	xmlChar *tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
1664 
1665 	if (tmp == NULL) {
1666 	    err = 1;
1667 	} else {
1668 	    char *s = (char *) tmp;
1669 
1670 	    for (i=0; i<len; i++) {
1671 		mask[i] = atoi(s);
1672 		s += 2;
1673 	    }
1674 	    free(tmp);
1675 	}
1676     }
1677 
1678     if (!err) {
1679 	*pmask = mask;
1680     }
1681 
1682     return err;
1683 }
1684 
gretl_xml_header(PRN * prn)1685 void gretl_xml_header (PRN *prn)
1686 {
1687     pputs(prn, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
1688 }
1689 
1690 /**
1691  * gretl_matrix_write_as_gdt:
1692  * @fname: name of file to write.
1693  * @X: matrix, variable in columns.
1694  * @varnames: column names.
1695  * @labels: descriptive labels for the variables, or %NULL.
1696  *
1697  * Write out a .gdt data file containing the elements of
1698  * of the given matrix.
1699  *
1700  * Returns: 0 on successful completion, non-zero on error.
1701  */
1702 
gretl_matrix_write_as_gdt(const char * fname,const gretl_matrix * X,const char ** varnames,const char ** labels)1703 int gretl_matrix_write_as_gdt (const char *fname,
1704 			       const gretl_matrix *X,
1705 			       const char **varnames,
1706 			       const char **labels)
1707 {
1708     gzFile fz = Z_NULL;
1709     char datname[MAXLEN];
1710     char *xmlbuf = NULL;
1711     int (*show_progress) (double, double, int) = NULL;
1712     double msize = 0;
1713     int T = X->rows;
1714     int k = X->cols;
1715     int in_c_locale = 0;
1716     int i, t, err = 0;
1717 
1718     fz = gretl_gzopen(fname, "wb");
1719 
1720     if (fz == Z_NULL) {
1721 	gretl_errmsg_sprintf(_("Couldn't open %s for writing"), fname);
1722 	return 1;
1723     }
1724 
1725     msize = T * k * sizeof(double);
1726 
1727     if (msize > 100000) {
1728 	fprintf(stderr, "Writing %.0f Kbytes of data\n", msize / 1024);
1729     } else {
1730 	msize = 0;
1731     }
1732 
1733     if (msize > 0) {
1734 	show_progress = get_plugin_function("show_progress");
1735 	if (show_progress == NULL) {
1736 	    msize = 0;
1737 	} else {
1738 	    (*show_progress)(0, msize, SP_SAVE_INIT);
1739 	}
1740     }
1741 
1742     simple_fname(datname, fname);
1743     xmlbuf = gretl_xml_encode(datname);
1744     if (xmlbuf == NULL) {
1745 	err = 1;
1746 	goto cleanup;
1747     }
1748 
1749     gzprintf(fz, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
1750 	     "<!DOCTYPE gretldata SYSTEM \"gretldata.dtd\">\n\n"
1751 	     "<gretldata version=\"%s\" name=\"%s\" frequency=\"1\" "
1752 	     "startobs=\"1\" endobs=\"%d\" type=\"cross-section\">\n",
1753 	     GRETLDATA_VERSION, datname, T);
1754 
1755     free(xmlbuf);
1756 
1757     gretl_push_c_numeric_locale();
1758     in_c_locale = 1;
1759 
1760     gzprintf(fz, "<variables count=\"%d\">\n", k);
1761 
1762     for (i=0; i<k; i++) {
1763 	gzprintf(fz, "<variable name=\"%s\"", varnames[i]);
1764 	if (labels != NULL && labels[i] != NULL) {
1765 	    gzprintf(fz, "\n label=\"%s\"", labels[i]);
1766 	}
1767 	gzputs(fz, "\n/>\n");
1768     }
1769 
1770     gzputs(fz, "</variables>\n");
1771 
1772     gzprintf(fz, "<observations count=\"%d\" labels=\"false\">\n", T);
1773 
1774     for (t=0; t<T; t++) {
1775 	gzputs(fz, "<obs>");
1776 	for (i=0; i<k; i++) {
1777 	    gzprintf(fz, "%.12g ", gretl_matrix_get(X, t, i));
1778 	}
1779 	gzputs(fz, "</obs>\n");
1780 	if (msize > 0 && t && (t % 50 == 0)) {
1781 	    (*show_progress) (50, T, SP_NONE);
1782 	}
1783     }
1784 
1785     gzputs(fz, "</observations>\n</gretldata>\n");
1786 
1787  cleanup:
1788 
1789     if (in_c_locale) {
1790 	gretl_pop_c_numeric_locale();
1791     }
1792 
1793     if (msize > 0) {
1794 	(*show_progress)(0, T, SP_FINISH);
1795     }
1796 
1797     gzclose(fz);
1798 
1799     return err;
1800 }
1801 
string_table_count(const DATASET * dset,const int * list,int nvars)1802 static int string_table_count (const DATASET *dset,
1803 			       const int *list,
1804 			       int nvars)
1805 {
1806     int i, v, n = 0;
1807 
1808     for (i=1; i<=nvars; i++) {
1809 	v = savenum(list, i);
1810 	if (is_string_valued(dset, v)) {
1811 	    n++;
1812 	}
1813     }
1814 
1815     return n;
1816 }
1817 
maybe_print_panel_info(const DATASET * dset,int skip_padding,PRN * prn)1818 static void maybe_print_panel_info (const DATASET *dset,
1819 				    int skip_padding,
1820 				    PRN *prn)
1821 {
1822     int names = panel_group_names_ok(dset, 0);
1823     int pd = dset->panel_pd;
1824     double sd0 = dset->panel_sd0;
1825     int times = pd > 0 && sd0 > 0.0;
1826 
1827     if (names || times || skip_padding) {
1828 	pputs(prn, "<panel-info\n");
1829 	if (names) {
1830 	    pprintf(prn, " group-names=\"%s\"\n", dset->pangrps);
1831 	}
1832 	if (times) {
1833 	    pprintf(prn, " time-frequency=\"%d\"\n", pd);
1834 	    pprintf(prn, " time-start=\"%.10g\"\n", sd0);
1835 	}
1836 	if (skip_padding) {
1837 	    pputs(prn, " skip-padding=\"1\"\n");
1838 	}
1839 	pputs(prn, "/>\n");
1840     }
1841 }
1842 
row_is_padding(const DATASET * dset,int t,int vmax)1843 static int row_is_padding (const DATASET *dset, int t, int vmax)
1844 {
1845     int i;
1846 
1847     for (i=1; i<vmax; i++) {
1848 	if (!na(dset->Z[i][t])) {
1849 	    return 0;
1850 	}
1851     }
1852 
1853     return 1;
1854 }
1855 
open_gdt_write_stream(const char * fname,gretlopt opt)1856 static PRN *open_gdt_write_stream (const char *fname,
1857 				   gretlopt opt)
1858 {
1859     int err = 0;
1860     PRN *prn;
1861 
1862     if (opt & OPT_Z) {
1863 	/* using zlib compression */
1864 	int gzlevel = get_compression_option(STORE);
1865 
1866 	prn = gretl_gzip_print_new(fname, gzlevel, &err);
1867     } else {
1868 	/* no compression */
1869 	prn = gretl_print_new_with_filename(fname, &err);
1870     }
1871 
1872     return prn;
1873 }
1874 
1875 #define BIN_HDRLEN 24
1876 
write_binary_header(FILE * fp)1877 static int write_binary_header (FILE *fp)
1878 {
1879     char header[BIN_HDRLEN] = {0};
1880     int err = 0;
1881 
1882 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
1883     strcpy(header, "gretl-bin:little-endian");
1884 #else
1885     strcpy(header, "gretl-bin:big-endian");
1886 #endif
1887 
1888     if (fwrite(header, 1, BIN_HDRLEN, fp) != BIN_HDRLEN) {
1889 	err = E_DATA;
1890     }
1891 
1892     return err;
1893 }
1894 
write_binary_data(const char * fname,const DATASET * dset,const int * list,int nvars,int nrows,gretlopt opt)1895 static int write_binary_data (const char *fname, const DATASET *dset,
1896 			      const int *list, int nvars, int nrows,
1897 			      gretlopt opt)
1898 {
1899     char *bname;
1900     FILE *fp;
1901     int T = dset->t2 - dset->t1 + 1;
1902     size_t wrote;
1903     int i, v, err = 0;
1904 
1905     bname = switch_ext_new(fname, "bin");
1906     fp = gretl_fopen(bname, "wb");
1907     free(bname);
1908 
1909     if (fp == NULL) {
1910 	return E_FOPEN;
1911     }
1912 
1913     write_binary_header(fp);
1914 
1915     if (nrows < T) {
1916 	/* panel data with skip-padding in force */
1917 	double *tmp = NULL;
1918 	int uv = 0, tv = 0;
1919 	int s, t, nv = 0;
1920 
1921 	/* add unit and time variables (initialized to zero) */
1922 	err = dataset_add_series((DATASET *) dset, 2);
1923 	if (!err) {
1924 	    tmp = malloc(nrows * sizeof *tmp);
1925 	    if (tmp == NULL) {
1926 		err = E_ALLOC;
1927 	    }
1928 	}
1929 
1930 	if (!err) {
1931 	    uv = dset->v - 2;
1932 	    tv = dset->v - 1;
1933 	    nv = dset->v - 2;
1934 	}
1935 
1936 	for (t=dset->t1; t<=dset->t2 && !err; t++) {
1937 	    if (!row_is_padding(dset, t, nv)) {
1938 		dset->Z[uv][t] = 1 + t / dset->pd;
1939 		dset->Z[tv][t] = t % dset->pd + 1;
1940 	    }
1941 	}
1942 
1943 	nv = nvars + 2;
1944 	for (i=1; i<=nv && !err; i++) {
1945 	    if (i <= nvars) {
1946 		v = savenum(list, i);
1947 	    } else {
1948 		v = (i == nvars + 1)? uv : tv;
1949 	    }
1950 	    s = 0;
1951 	    for (t=dset->t1; t<=dset->t2 && !err; t++) {
1952 		if (dset->Z[uv][t] != 0.0) {
1953 		    tmp[s++] = dset->Z[v][t];
1954 		}
1955 	    }
1956 	    wrote = fwrite(tmp, sizeof(double), nrows, fp);
1957 	    if (wrote != nrows) {
1958 		err = E_DATA;
1959 	    }
1960 	}
1961 
1962 	free(tmp);
1963 	if (uv > 0) {
1964 	    dataset_drop_last_variables((DATASET *) dset, 2);
1965 	}
1966     } else {
1967 	for (i=1; i<=nvars && !err; i++) {
1968 	    v = savenum(list, i);
1969 	    wrote = fwrite(dset->Z[v] + dset->t1, sizeof(double),
1970 			   T, fp);
1971 	    if (wrote != T) {
1972 		err = E_DATA;
1973 	    }
1974 	}
1975     }
1976 
1977     fclose(fp);
1978 
1979     return err;
1980 }
1981 
gdt_swap_endianness(DATASET * dset)1982 static void gdt_swap_endianness (DATASET *dset)
1983 {
1984     int i, t;
1985 
1986     for (i=1; i<dset->v; i++) {
1987 	for (t=0; t<dset->n; t++) {
1988 	    reverse_double(dset->Z[i][t]);
1989 	}
1990     }
1991 }
1992 
read_binary_header(FILE * fp,int order)1993 static int read_binary_header (FILE *fp, int order)
1994 {
1995     char hdr[BIN_HDRLEN] = {0};
1996     unsigned chk;
1997     int err = 0;
1998 
1999     chk = fread(hdr, 1, BIN_HDRLEN, fp);
2000 
2001     if (chk != BIN_HDRLEN) {
2002 	err = E_DATA;
2003     } else {
2004 	int bin_order = 0;
2005 
2006 	if (strncmp(hdr, "gretl-bin:", 10)) {
2007 	    err = E_DATA;
2008 	} else if (!strcmp(hdr + 10, "little-endian")) {
2009 	    bin_order = G_LITTLE_ENDIAN;
2010 	} else if (!strcmp(hdr + 10, "big-endian")) {
2011 	    bin_order = G_BIG_ENDIAN;
2012 	} else {
2013 	    err = E_DATA;
2014 	}
2015 	if (!err && bin_order != order) {
2016 	    err = E_DATA;
2017 	}
2018     }
2019 
2020     if (err) {
2021 	gretl_errmsg_set("Error reading binary data file");
2022     }
2023 
2024     return err;
2025 }
2026 
na_convert(double * x,int n)2027 static void na_convert (double *x, int n)
2028 {
2029     int i;
2030 
2031     for (i=0; i<n; i++) {
2032 	if (x[i] == DBL_MAX) {
2033 	    x[i] = NADBL;
2034 	}
2035     }
2036 }
2037 
read_binary_data(const char * fname,DATASET * dset,int order,double gdtversion,int fullv,const int * vlist)2038 static int read_binary_data (const char *fname,
2039 			     DATASET *dset,
2040 			     int order,
2041 			     double gdtversion,
2042 			     int fullv,
2043 			     const int *vlist)
2044 {
2045     char *bname;
2046     FILE *fp;
2047     int err = 0;
2048 
2049     bname = switch_ext_new(fname, "bin");
2050     fp = gretl_fopen(bname, "rb");
2051 
2052     if (fp == NULL) {
2053 	err = E_FOPEN;
2054     } else {
2055 	int T = dset->n;
2056 	long offset = T * sizeof(double);
2057 	size_t got;
2058 	int i, k = 1;
2059 
2060 	err = read_binary_header(fp, order);
2061 
2062 	for (i=1; i<fullv && !err; i++) {
2063 	    if (vlist == NULL || in_gretl_list(vlist, i)) {
2064 		got = fread(dset->Z[k], sizeof(double), T, fp);
2065 		if (got != T) {
2066 		    err = E_DATA;
2067 		}
2068 		if (!err && gdtversion < 1.4) {
2069 		    /* we need to convert old-style NAs */
2070 		    na_convert(dset->Z[k], dset->n);
2071 		}
2072 		k++;
2073 	    } else {
2074 		fseek(fp, offset, SEEK_CUR);
2075 	    }
2076 	}
2077 	fclose(fp);
2078     }
2079 
2080     free(bname);
2081 
2082     if (!err && order != G_BYTE_ORDER) {
2083 	gdt_swap_endianness(dset);
2084     }
2085 
2086     return err;
2087 }
2088 
write_binary_order(PRN * prn)2089 static void write_binary_order (PRN *prn)
2090 {
2091 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
2092     pputs(prn, " binary=\"little-endian\" ");
2093 #else
2094     pputs(prn, " binary=\"big-endian\" ");
2095 #endif
2096 }
2097 
2098 /* Here we're testing a series to see if it can be represented in full
2099    precision (discarding any artifacts) using the format "%.15g". The
2100    criterion is that each value must have a least one trailing zero
2101    when printed using "% .14e". We don't necessarily check every
2102    element in the series; assuming a reasonable degree of homogeneity
2103    in the data it ought to be enough if 100 members pass the test.
2104 */
2105 
p15_OK(const DATASET * dset,int v)2106 static int p15_OK (const DATASET *dset, int v)
2107 {
2108     const double *x = dset->Z[v];
2109     char s[32];
2110     int t, i, n_ok = 0;
2111     int ret = 1;
2112 
2113     for (t=dset->t1; t<=dset->t2; t++) {
2114 	if (!na(x[t])) {
2115 	    sprintf(s, "% .14e", x[t]);
2116 	    for (i=16; i>13; i--) {
2117 		if (s[i] != '0') {
2118 		    break;
2119 		}
2120 	    }
2121 	    if (i == 16) {
2122 		ret = 0;
2123 		break;
2124 	    }
2125 	    n_ok++;
2126 	}
2127 	if (n_ok > 100) {
2128 	    break;
2129 	}
2130     }
2131 
2132     return ret;
2133 }
2134 
2135 /* apparatus from trimming string-values on gdt save,
2136    and restoration afterwards
2137 */
2138 
2139 struct strval_saver_ {
2140     int *list;
2141     gretl_matrix *X;
2142     series_table **st;
2143     size_t sz;
2144 };
2145 
2146 typedef struct strval_saver_ strval_saver;
2147 
strval_saver_destroy(strval_saver * ss)2148 static void strval_saver_destroy (strval_saver *ss)
2149 {
2150     if (ss != NULL) {
2151 	free(ss->list);
2152 	gretl_matrix_free(ss->X);
2153 	free(ss->st);
2154 	free(ss);
2155     }
2156 }
2157 
strval_saver_setup(DATASET * dset,int nvars,int nsv,int * list,int * err)2158 static strval_saver *strval_saver_setup (DATASET *dset,
2159 					 int nvars, int nsv,
2160 					 int *list, int *err)
2161 {
2162     strval_saver *ss;
2163     int n_changed = 0;
2164     int i, n;
2165 
2166     ss = calloc(1, sizeof *ss);
2167     if (ss == NULL) {
2168 	*err = E_ALLOC;
2169 	return NULL;
2170     }
2171 
2172     n = sample_size(dset);
2173     ss->list = gretl_list_new(nsv);
2174     ss->X = gretl_matrix_alloc(n, nsv);
2175     ss->st = calloc(nsv, sizeof *ss->st);
2176     if (ss->list == NULL || ss->X == NULL || ss->st == NULL) {
2177 	*err = E_ALLOC;
2178     }
2179 
2180     if (!*err) {
2181 	double *x = ss->X->val;
2182 	int changed;
2183 	int v, j = 0;
2184 
2185 	ss->sz = n * sizeof *x;
2186 
2187 	for (i=1; i<=nvars && !*err; i++) {
2188 	    v = savenum(list, i);
2189 	    if (is_string_valued(dset, v)) {
2190 		ss->list[j+1] = v;
2191 		ss->st[j] = series_get_string_table(dset, v);
2192 		memcpy(x, dset->Z[v] + dset->t1, ss->sz);
2193 		*err = series_recode_strings(dset, v, OPT_P, &changed);
2194 		if (changed) {
2195 		    n_changed++;
2196 		} else {
2197 		    ss->st[j] = NULL;
2198 		}
2199 		j++;
2200 		x += n;
2201 	    }
2202 	}
2203     } else {
2204 	strval_saver_destroy(ss);
2205 	ss = NULL;
2206     }
2207 
2208     if (ss != NULL && n_changed == 0) {
2209 	/* we have no need for @ss */
2210 	strval_saver_destroy(ss);
2211 	ss = NULL;
2212     }
2213 
2214     return ss;
2215 }
2216 
strval_saver_restore(DATASET * dset,strval_saver * ss)2217 static void strval_saver_restore (DATASET *dset,
2218 				  strval_saver *ss)
2219 {
2220     double *x = ss->X->val;
2221     int n = ss->X->rows;
2222     int i, v;
2223 
2224     for (i=1; i<=ss->list[0]; i++) {
2225 	v = ss->list[i];
2226 	if (ss->st[i-1] != NULL) {
2227 	    series_destroy_string_table(dset, v);
2228 	    series_attach_string_table(dset, v, ss->st[i-1]);
2229 	    memcpy(dset->Z[v] + dset->t1, x, ss->sz);
2230 	}
2231 	x += n;
2232     }
2233 }
2234 
2235 /* end apparatus from trimming string-values */
2236 
real_write_gdt(const char * fname,const int * inlist,const DATASET * dset,gretlopt opt,int progress)2237 static int real_write_gdt (const char *fname, const int *inlist,
2238 			   const DATASET *dset, gretlopt opt,
2239 			   int progress)
2240 {
2241     PRN *prn = NULL;
2242     int tsamp = dset->t2 - dset->t1 + 1;
2243     int *list = NULL;
2244     char *p15 = NULL;
2245     char startdate[OBSLEN], enddate[OBSLEN];
2246     char datname[MAXLEN], freqstr[32];
2247     char numstr[128], xmlbuf[1024];
2248     const char *gdtver;
2249     int (*show_progress) (double, double, int) = NULL;
2250     strval_saver *ss = NULL;
2251     double dsize = 0;
2252     int i, t, v, ntabs, nvars = 0;
2253     int have_markers, in_c_locale = 0;
2254     int binary = 0, skip_padding = 0;
2255     int gdt_digits = 17;
2256     int uerr = 0;
2257     int err = 0;
2258 
2259     /* what are we supposed to be saving? */
2260     if (inlist != NULL) {
2261 	int lzero[] = {1, 0};
2262 
2263 	list = gretl_list_drop(inlist, lzero, &err);
2264 	if (err) {
2265 	    return err;
2266 	}
2267 	nvars = list[0];
2268     } else {
2269 	nvars = dset->v - 1;
2270     }
2271 
2272     if (nvars <= 0) {
2273 	gretl_errmsg_set("No data to save!");
2274 	free(list);
2275 	return E_DATA;
2276     }
2277 
2278     if (opt & OPT_B) {
2279 	binary = G_BYTE_ORDER;
2280 	progress = 0;
2281 	prn = open_gdt_write_stream(fname, OPT_NONE);
2282     } else {
2283 	const char *path = fname;
2284 	gchar *fullname = NULL;
2285 
2286 	if (!has_suffix(fname, ".gdt")) {
2287 	    /* force use of .gdt extension for native XML data */
2288 	    path = fullname = g_strdup_printf("%s.gdt", fname);
2289 	}
2290 	prn = open_gdt_write_stream(path, opt);
2291 	g_free(fullname);
2292     }
2293 
2294     if (prn == NULL) {
2295 	free(list);
2296 	return E_FOPEN;
2297     }
2298 
2299     dsize = tsamp * nvars * sizeof(double);
2300 
2301     if (dsize > 100000) {
2302 	fprintf(stderr, "Writing %.0f Kbytes of data\n", dsize / 1024);
2303     } else if (progress) {
2304 	/* suppress progress bar for smaller data */
2305 	progress = 0;
2306     }
2307 
2308     if (!binary) {
2309 	p15 = calloc(nvars, 1);
2310 	if (p15 != NULL) {
2311 	    for (i=0; i<nvars; i++) {
2312 		v = savenum(list, i+1);
2313 		p15[i] = p15_OK(dset, v);
2314 	    }
2315 	}
2316     }
2317 
2318     if (progress) {
2319 	show_progress = get_plugin_function("show_progress");
2320 	if (show_progress == NULL) {
2321 	    progress = 0;
2322 	} else {
2323 	    (*show_progress)(0, dsize, SP_SAVE_INIT);
2324 	}
2325     }
2326 
2327     ntolabel(startdate, dset->t1, dset);
2328     ntolabel(enddate, dset->t2, dset);
2329 
2330     simple_fname(datname, fname);
2331     uerr = gretl_xml_encode_to_buf(xmlbuf, datname, sizeof xmlbuf);
2332     if (uerr) {
2333 	strcpy(xmlbuf, "unknown");
2334     }
2335 
2336     if (custom_time_series(dset)) {
2337 	sprintf(freqstr, "special:%d", dset->pd);
2338     } else {
2339 	sprintf(freqstr, "%d", dset->pd);
2340     }
2341 
2342     gdtver = GRETLDATA_VERSION;
2343 
2344     if (binary) {
2345 	/* support --compat option */
2346 	const char *s = get_optval_string(STORE, OPT_C);
2347 
2348 	if (s != NULL && !strcmp(s, "2018b")) {
2349 	    gdtver = GRETLDATA_COMPAT;
2350 	}
2351     }
2352 
2353     pprintf(prn, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
2354 	    "<!DOCTYPE gretldata SYSTEM \"gretldata.dtd\">\n\n"
2355 	    "<gretldata version=\"%s\" name=\"%s\" frequency=\"%s\" "
2356 	    "startobs=\"%s\" endobs=\"%s\" ",
2357 	    gdtver, xmlbuf, freqstr, startdate, enddate);
2358 
2359     pprintf(prn, "type=\"%s\"", data_structure_string(dset->structure));
2360 
2361     if (binary) {
2362 	write_binary_order(prn);
2363     }
2364     if (dset->rseed > 0) {
2365 	/* record resampling info */
2366 	pprintf(prn, " rseed=\"%u\"", dset->rseed);
2367     }
2368     if (dset->mapfile != NULL) {
2369 	/* record map link */
2370 	uerr = gretl_xml_encode_to_buf(xmlbuf, dset->mapfile, sizeof xmlbuf);
2371 	if (uerr == 0) {
2372 	    pprintf(prn, " mapfile=\"%s\"", xmlbuf);
2373 	}
2374     }
2375 
2376     pputs(prn, ">\n");
2377 
2378     have_markers = dataset_has_markers(dset);
2379 
2380     if (dataset_is_panel(dset) && !have_markers &&
2381 	nvars == dset->v - 1 && dsize > 1024 * 1024 * 10) {
2382 	/* we have more than 10 MB of panel data */
2383 	int padrows = panel_padding_rows(dset);
2384 
2385 	if (padrows > 0.4 * dset->n) {
2386 	    fprintf(stderr, "skip-padding: dropping %d rows\n", padrows);
2387 	    skip_padding = 1;
2388 	    tsamp -= padrows;
2389 	}
2390     }
2391 
2392     /* deal with description, if any */
2393     if (dset->descrip != NULL) {
2394 	char *dbuf = gretl_xml_encode(dset->descrip);
2395 
2396 	if (dbuf != NULL) {
2397 	    pputs(prn, "<description>");
2398 	    pputs(prn, dbuf);
2399 	    pputs(prn, "</description>\n");
2400 	    free(dbuf);
2401 	}
2402     }
2403 
2404     gretl_push_c_numeric_locale();
2405     in_c_locale = 1;
2406 
2407     /* then listing of variable names and labels */
2408     if (skip_padding) {
2409 	pprintf(prn, "<variables count=\"%d\">\n", nvars + 2);
2410     } else {
2411 	pprintf(prn, "<variables count=\"%d\">\n", nvars);
2412     }
2413 
2414     ntabs = string_table_count(dset, list, nvars);
2415 
2416     if (ntabs > 0 && !(opt & OPT_P)) {
2417 	/* trimming strvals */
2418 	ss = strval_saver_setup((DATASET *) dset, nvars,
2419 				ntabs, list, &err);
2420     }
2421 
2422     for (i=1; i<=nvars; i++) {
2423 	const char *vstr;
2424 	int vprop, mpd;
2425 
2426 	v = savenum(list, i);
2427 	gretl_xml_encode_to_buf(xmlbuf, dset->varname[v], sizeof xmlbuf);
2428 	pprintf(prn, "<variable name=\"%s\"", xmlbuf);
2429 
2430 	vstr = series_get_label(dset, v);
2431 	if (vstr != NULL && *vstr != '\0') {
2432 	    uerr = gretl_xml_encode_to_buf(xmlbuf, vstr, sizeof xmlbuf);
2433 	    if (!uerr) {
2434 		pprintf(prn, "\n label=\"%s\"", xmlbuf);
2435 	    }
2436 	}
2437 
2438 	vstr = series_get_display_name(dset, v);
2439 	if (vstr != NULL && *vstr != '\0') {
2440 	    uerr = gretl_xml_encode_to_buf(xmlbuf, vstr, sizeof xmlbuf);
2441 	    if (!uerr) {
2442 		pprintf(prn, "\n displayname=\"%s\"", xmlbuf);
2443 	    }
2444 	}
2445 
2446 	vstr = series_get_parent_name(dset, v);
2447 	if (vstr != NULL) {
2448 	    uerr = gretl_xml_encode_to_buf(xmlbuf, vstr, sizeof xmlbuf);
2449 	    if (!uerr) {
2450 		pprintf(prn, "\n parent=\"%s\"", xmlbuf);
2451 	    }
2452 	}
2453 
2454 	vprop = series_get_transform(dset, v);
2455 	if (vprop != 0) {
2456 	    const char *tr = gretl_command_word(vprop);
2457 
2458 	    pprintf(prn, "\n transform=\"%s\"", tr);
2459 	}
2460 
2461 	vprop = series_get_lag(dset, v);
2462 	if (vprop != 0) {
2463 	    pprintf(prn, "\n lag=\"%d\"", vprop);
2464 	}
2465 
2466 	vprop = series_get_compact_method(dset, v);
2467 	if (vprop != COMPACT_NONE) {
2468 	    const char *meth = compact_method_to_string(vprop);
2469 
2470 	    pprintf(prn, "\n compact-method=\"%s\"", meth);
2471 	}
2472 
2473 	if (series_is_discrete(dset, v)) {
2474 	    pputs(prn, "\n discrete=\"true\"");
2475 	}
2476 
2477 	if (series_is_coded(dset, v)) {
2478 	    pputs(prn, "\n coded=\"true\"");
2479 	}
2480 
2481 	if (series_is_midas_anchor(dset, v)) {
2482 	    pputs(prn, "\n hf-anchor=\"true\"");
2483 	}
2484 
2485 	if ((mpd = series_get_midas_period(dset, v)) > 0) {
2486 	    pprintf(prn, "\n midas_period=\"%d\"", mpd);
2487 	}
2488 
2489 	if ((mpd = series_get_midas_freq(dset, v)) > 0) {
2490 	    pprintf(prn, "\n midas_freq=\"%d\"", mpd);
2491 	}
2492 
2493 	if ((mpd = series_get_orig_pd(dset, v)) > 0) {
2494 	    pprintf(prn, "\n orig_pd=\"%d\"", mpd);
2495 	}
2496 
2497 	pputs(prn, "\n/>\n");
2498     }
2499 
2500     if (skip_padding) {
2501 	pputs(prn, "<variable name=\"unit__\"\n/>\n");
2502 	pputs(prn, "<variable name=\"time__\"\n/>\n");
2503     }
2504 
2505     pputs(prn, "</variables>\n");
2506 
2507     /* then listing of observations */
2508     pputs(prn, "<observations ");
2509     pprintf(prn, "count=\"%d\" labels=\"%s\"",
2510 		      tsamp, (have_markers)? "true" : "false");
2511     pputs(prn, ">\n");
2512 
2513     if (binary) {
2514 	err = write_binary_data(fname, dset, list, nvars, tsamp, opt);
2515 	if (!have_markers) {
2516 	    goto binary_done;
2517 	}
2518     }
2519 
2520     for (t=dset->t1; t<=dset->t2; t++) {
2521 	if (skip_padding && row_is_padding(dset, t, dset->v)) {
2522 	    continue;
2523 	}
2524 	pputs(prn, "<obs");
2525 	if (have_markers) {
2526 	    uerr = gretl_xml_encode_to_buf(xmlbuf, dset->S[t], sizeof xmlbuf);
2527 	    if (!uerr) {
2528 		pprintf(prn, " label=\"%s\"", xmlbuf);
2529 	    }
2530 	}
2531 	if (binary) {
2532 	    pputs(prn, " />\n");
2533 	    continue;
2534 	}
2535 	pputs(prn, ">");
2536 	for (i=1; i<=nvars; i++) {
2537 	    v = savenum(list, i);
2538 	    if (na(dset->Z[v][t])) {
2539 		strcpy(numstr, "NA ");
2540 	    } else if (p15 == NULL || p15[i-1] == 0) {
2541 		/* use full default precision if required */
2542 		sprintf(numstr, "%.*g ", gdt_digits, dset->Z[v][t]);
2543 	    } else {
2544 		sprintf(numstr, "%.15g ", dset->Z[v][t]);
2545 	    }
2546 	    pputs(prn, numstr);
2547 	}
2548 	if (skip_padding) {
2549 	    int unit = 1 + t / dset->pd;
2550 	    int time = t % dset->pd + 1;
2551 
2552 	    sprintf(numstr, "%d %d ", unit, time);
2553 	    pputs(prn, numstr);
2554 	}
2555 	pputs(prn, "</obs>\n");
2556 
2557 	if (progress && t && ((t - dset->t1) % 50 == 0)) {
2558 	    (*show_progress) (50, tsamp, SP_NONE);
2559 	}
2560     }
2561 
2562  binary_done:
2563 
2564     pputs(prn, "</observations>\n");
2565 
2566     if (ntabs > 0) {
2567 	char *sbuf, **strs;
2568 	int j, n_strs;
2569 
2570 	pprintf(prn, "<string-tables count=\"%d\">\n", ntabs);
2571 
2572 	for (i=1; i<=nvars; i++) {
2573 	    v = savenum(list, i);
2574 	    if (!is_string_valued(dset, v)) {
2575 		continue;
2576 	    }
2577 	    strs = series_get_string_vals(dset, v, &n_strs, 0);
2578 	    gretl_xml_encode_to_buf(xmlbuf, dset->varname[v], sizeof xmlbuf);
2579 	    pprintf(prn, "<valstrings owner=\"%s\" count=\"%d\">", xmlbuf, n_strs);
2580 	    for (j=0; j<n_strs; j++) {
2581 		sbuf = gretl_xml_encode(strs[j]);
2582 		if (sbuf == NULL || *sbuf == '\0') {
2583 		    fprintf(stderr, "string values for var %d: string %d is empty\n",
2584 			    i, j);
2585 		    pputs(prn, "\"empty string\" ");
2586 		} else {
2587 		    pprintf(prn, "\"%s\" ", sbuf);
2588 		}
2589 		free(sbuf);
2590 	    }
2591 	    pputs(prn, "</valstrings>\n");
2592 	}
2593 	pputs(prn, "</string-tables>\n");
2594     }
2595 
2596     if (dataset_is_panel(dset)) {
2597 	maybe_print_panel_info(dset, skip_padding, prn);
2598     }
2599 
2600     pputs(prn, "</gretldata>\n");
2601 
2602     if (ss != NULL) {
2603 	strval_saver_restore((DATASET *) dset, ss);
2604 	strval_saver_destroy(ss);
2605     }
2606 
2607     if (in_c_locale) {
2608 	gretl_pop_c_numeric_locale();
2609     }
2610 
2611     if (progress) {
2612 	(*show_progress)(0, dset->t2 - dset->t1 + 1, SP_FINISH);
2613     }
2614 
2615     if (p15 != NULL) {
2616 	free(p15);
2617     }
2618     free(list);
2619 
2620     gretl_print_destroy(prn);
2621 
2622     return err;
2623 }
2624 
write_purebin(const char * fname,const int * list,const DATASET * dset,gretlopt opt)2625 static int write_purebin (const char *fname, const int *list,
2626 			  const DATASET *dset, gretlopt opt)
2627 {
2628     int (*writer) (const char *, const int *, const DATASET *,
2629 		   gretlopt);
2630     int err = 0;
2631 
2632     writer = get_plugin_function("purebin_write_data");
2633 
2634     if (writer == NULL) {
2635         err = 1;
2636     } else {
2637 	err = (*writer)(fname, list, dset, opt);
2638     }
2639 
2640     return err;
2641 }
2642 
2643 /* zipfile with gdt XML + binary */
2644 
write_old_gdtb(const char * fname,const int * list,const DATASET * dset,gretlopt opt)2645 static int write_old_gdtb (const char *fname, const int *list,
2646 			   const DATASET *dset, gretlopt opt)
2647 {
2648     gchar *zdir;
2649     int err;
2650 
2651     zdir = g_strdup_printf("%stmp-zip", gretl_dotdir());
2652     err = gretl_mkdir(zdir);
2653 
2654     if (!err) {
2655 	char xmlfile[FILENAME_MAX];
2656 
2657 	gretl_build_path(xmlfile, zdir, "data.xml", NULL);
2658 	err = real_write_gdt(xmlfile, list, dset, opt | OPT_B, 0);
2659 
2660 	if (!err) {
2661 	    int level = get_compression_option(STORE);
2662 
2663 	    err = gretl_zip_datafile(fname, zdir, level);
2664 	    if (err) {
2665 		gretl_errmsg_ensure("Problem writing data file");
2666 	    }
2667 	}
2668 	gretl_deltree(zdir);
2669     }
2670 
2671     g_free(zdir);
2672 
2673     return err;
2674 }
2675 
2676 /**
2677  * gretl_write_gdt:
2678  * @fname: name of file to write.
2679  * @list: list of variables to write (or %NULL to write all).
2680  * @dset: dataset struct.
2681  * @opt: if %OPT_Z write gzipped data, else uncompressed.
2682  * @progress: may be 1 when called from gui to display progress
2683  * bar in case of a large data write; generally should be 0.
2684  *
2685  * Write out in xml a data file containing the values of the given set
2686  * of variables.
2687  *
2688  * Returns: 0 on successful completion, non-zero on error.
2689  */
2690 
gretl_write_gdt(const char * fname,const int * list,const DATASET * dset,gretlopt opt,int progress)2691 int gretl_write_gdt (const char *fname, const int *list,
2692 		     const DATASET *dset, gretlopt opt,
2693 		     int progress)
2694 {
2695     int gdtb = has_suffix(fname, ".gdtb");
2696     int compat = (opt & OPT_C);
2697     int err = 0;
2698 
2699     if (gdtb && compat) {
2700 	/* backward-compatible gdtb */
2701 	err = write_old_gdtb(fname, list, dset, opt);
2702     } else if (gdtb) {
2703 	/* default binary format for gretl >= 2020b */
2704 	err = write_purebin(fname, list, dset, opt);
2705     } else {
2706 	/* plain gdt file */
2707 	err = real_write_gdt(fname, list, dset, opt, progress);
2708     }
2709 
2710     return err;
2711 }
2712 
transcribe_string(char * targ,const char * src,int maxlen)2713 static void transcribe_string (char *targ, const char *src, int maxlen)
2714 {
2715     *targ = '\0';
2716 
2717     strncat(targ, src, maxlen - 1);
2718 }
2719 
2720 /* Note: if @probe is non-zero, this means that we're really just
2721    scraping series names from the data file, and so we should not
2722    start allocating memory for dset->Z based on the number of
2723    series we find.
2724 */
2725 
process_varlist(xmlNodePtr node,DATASET * dset,int probe)2726 static int process_varlist (xmlNodePtr node, DATASET *dset, int probe)
2727 {
2728     xmlNodePtr cur;
2729     xmlChar *tmp = xmlGetProp(node, (XUC) "count");
2730     int i, nv = 0, err = 0;
2731 
2732     if (tmp != NULL) {
2733 	if (sscanf((char *) tmp, "%d", &nv) == 1) {
2734 	    dset->v = nv + 1;
2735 	} else {
2736 	    gretl_errmsg_set(_("Failed to parse count of variables"));
2737 	    err = E_DATA;
2738 	}
2739 	if (!err && dataset_allocate_varnames(dset)) {
2740 	    err = E_ALLOC;
2741 	}
2742 	if (!err && !probe) {
2743 	    dset->Z = doubles_array_new(dset->v, 0);
2744 	    if (dset->Z == NULL) {
2745 		err = E_ALLOC;
2746 	    }
2747 	}
2748 	free(tmp);
2749     } else {
2750 	gretl_errmsg_set(_("Got no variables"));
2751 	err = E_DATA;
2752     }
2753 
2754     if (err) {
2755 	return err;
2756     } else if (nv == 0) {
2757 	fprintf(stderr, "Empty dataset!\n");
2758 	return 0;
2759     }
2760 
2761     /* now get individual variable info: names and labels */
2762     cur = node->xmlChildrenNode;
2763     while (cur && xmlIsBlankNode(cur)) {
2764 	cur = cur->next;
2765     }
2766 
2767     if (cur == NULL) {
2768 	gretl_errmsg_set(_("Got no variables"));
2769 	return E_DATA;
2770     }
2771 
2772     i = 1;
2773     while (cur != NULL) {
2774         if (!xmlStrcmp(cur->name, (XUC) "variable")) {
2775 	    tmp = xmlGetProp(cur, (XUC) "name");
2776 	    if (tmp != NULL) {
2777 		if (strcmp((char *) tmp, "catch") && strcmp((char *) tmp, "const")) {
2778 		    /* temporary hack: allow 'catch', even though it's
2779 		       officially reserved, on account of Ramanathan
2780 		       data file data6-2.gdt
2781 		    */
2782 		    err = check_varname((const char *) tmp);
2783 		}
2784 		if (!err) {
2785 		    transcribe_string(dset->varname[i], (char *) tmp, VNAMELEN);
2786 		}
2787 		free(tmp);
2788 	    } else {
2789 		gretl_errmsg_sprintf(_("Variable %d has no name"), i);
2790 		err = E_DATA;
2791 	    }
2792 	    if (err) {
2793 		return err;
2794 	    }
2795 	    tmp = xmlGetProp(cur, (XUC) "label");
2796 	    if (tmp != NULL) {
2797 		series_set_label(dset, i, (char *) tmp);
2798 		free(tmp);
2799 	    }
2800 	    tmp = xmlGetProp(cur, (XUC) "displayname");
2801 	    if (tmp != NULL) {
2802 		series_set_display_name(dset, i, (char *) tmp);
2803 		free(tmp);
2804 	    }
2805 	    tmp = xmlGetProp(cur, (XUC) "parent");
2806 	    if (tmp != NULL) {
2807 		series_set_parent(dset, i, (char *) tmp);
2808 		free(tmp);
2809 	    }
2810 	    tmp = xmlGetProp(cur, (XUC) "transform");
2811 	    if (tmp != NULL) {
2812 		int ci = gretl_command_number((char *) tmp);
2813 
2814 		series_set_transform(dset, i, ci);
2815 		free(tmp);
2816 	    }
2817 	    tmp = xmlGetProp(cur, (XUC) "lag");
2818 	    if (tmp != NULL) {
2819 		series_set_lag(dset, i, atoi((char *) tmp));
2820 		free(tmp);
2821 	    }
2822 	    tmp = xmlGetProp(cur, (XUC) "compact-method");
2823 	    if (tmp != NULL) {
2824 		series_set_compact_method(dset, i, compact_string_to_int((char *) tmp));
2825 		free(tmp);
2826 	    }
2827 	    tmp = xmlGetProp(cur, (XUC) "discrete");
2828 	    if (tmp != NULL) {
2829 		if (!strcmp((char *) tmp, "true")) {
2830 		    series_set_flag(dset, i, VAR_DISCRETE);
2831 		}
2832 		free(tmp);
2833 	    }
2834 	    tmp = xmlGetProp(cur, (XUC) "coded");
2835 	    if (tmp != NULL) {
2836 		if (!strcmp((char *) tmp, "true")) {
2837 		    series_set_flag(dset, i, VAR_CODED);
2838 		}
2839 		free(tmp);
2840 	    }
2841 	    tmp = xmlGetProp(cur, (XUC) "hf-anchor");
2842 	    if (tmp != NULL) {
2843 		if (!strcmp((char *) tmp, "true")) {
2844 		    series_set_midas_anchor(dset, i);
2845 		}
2846 		free(tmp);
2847 	    }
2848 	    tmp = xmlGetProp(cur, (XUC) "midas_period");
2849 	    if (tmp != NULL) {
2850 		int mpd = atoi((char *) tmp);
2851 
2852 		if (mpd > 0) {
2853 		    series_set_midas_period(dset, i, mpd);
2854 		}
2855 		free(tmp);
2856 	    }
2857 	    tmp = xmlGetProp(cur, (XUC) "midas_freq");
2858 	    if (tmp != NULL) {
2859 		int mpc = atoi((char *) tmp);
2860 
2861 		if (mpc > 0) {
2862 		    series_set_midas_freq(dset, i, mpc);
2863 		}
2864 		free(tmp);
2865 	    }
2866 	    tmp = xmlGetProp(cur, (XUC) "orig_pd");
2867 	    if (tmp != NULL) {
2868 		int opd = atoi((char *) tmp);
2869 
2870 		if (opd > 0) {
2871 		    series_set_orig_pd(dset, i, opd);
2872 		}
2873 		free(tmp);
2874 	    }
2875 	    i++;
2876 	}
2877 	cur = cur->next;
2878     }
2879 
2880     if (i != dset->v) {
2881 	gretl_errmsg_set(_("Number of variables does not match declaration"));
2882 	err = E_DATA;
2883     }
2884 
2885     return err;
2886 }
2887 
process_varlist_subset(xmlNodePtr node,DATASET * dset,int * fullv,const int * vlist)2888 static int process_varlist_subset (xmlNodePtr node, DATASET *dset,
2889 				   int *fullv, const int *vlist)
2890 {
2891     xmlNodePtr cur;
2892     xmlChar *tmp = xmlGetProp(node, (XUC) "count");
2893     int nv = vlist[0];
2894     int i, k, err = 0;
2895 
2896     if (tmp != NULL) {
2897 	if (sscanf((char *) tmp, "%d", fullv) != 1) {
2898 	    err = E_DATA;
2899 	}
2900 	free(tmp);
2901     } else {
2902 	err = E_DATA;
2903     }
2904 
2905     if (err) {
2906 	return err;
2907     }
2908 
2909     *fullv += 1; /* allow for const */
2910 
2911 #if GDT_DEBUG
2912     fprintf(stderr, "process_varlist_subset: fullv = %d\n", *fullv);
2913 #endif
2914 
2915     cur = node->xmlChildrenNode;
2916     while (cur && xmlIsBlankNode(cur)) {
2917 	cur = cur->next;
2918     }
2919 
2920     if (cur == NULL) {
2921 	gretl_errmsg_set(_("Got no variables"));
2922 	return E_DATA;
2923     }
2924 
2925     /* allocate the dataset content */
2926 
2927     dset->v = nv + 1;
2928     if (!err && dataset_allocate_varnames(dset)) {
2929 	err = E_ALLOC;
2930     }
2931     if (!err) {
2932 	dset->Z = malloc(dset->v * sizeof *dset->Z);
2933 	if (dset->Z == NULL) {
2934 	    err = E_ALLOC;
2935 	}
2936     }
2937 
2938     if (err) {
2939 	return err;
2940     }
2941 
2942     /* actually read the info */
2943 
2944     k = i = 1;
2945 
2946     while (cur != NULL) {
2947         if (!xmlStrcmp(cur->name, (XUC) "variable")) {
2948 	    int wanted = 0;
2949 
2950 	    tmp = xmlGetProp(cur, (XUC) "name");
2951 	    if (tmp != NULL) {
2952 		if (in_gretl_list(vlist, i)) {
2953 		    wanted = 1;
2954 		    transcribe_string(dset->varname[k], (char *) tmp, VNAMELEN);
2955 		}
2956 		free(tmp);
2957 	    } else {
2958 		return E_DATA;
2959 	    }
2960 
2961 	    if (!wanted) {
2962 		i++;
2963 		cur = cur->next;
2964 		continue;
2965 	    }
2966 
2967 	    tmp = xmlGetProp(cur, (XUC) "label");
2968 	    if (tmp != NULL) {
2969 		series_set_label(dset, k, (char *) tmp);
2970 		free(tmp);
2971 	    }
2972 	    tmp = xmlGetProp(cur, (XUC) "displayname");
2973 	    if (tmp != NULL) {
2974 		series_set_display_name(dset, k, (char *) tmp);
2975 		free(tmp);
2976 	    }
2977 	    tmp = xmlGetProp(cur, (XUC) "parent");
2978 	    if (tmp != NULL) {
2979 		series_set_parent(dset, k, (char *) tmp);
2980 		free(tmp);
2981 	    }
2982 	    tmp = xmlGetProp(cur, (XUC) "transform");
2983 	    if (tmp != NULL) {
2984 		int ci = gretl_command_number((char *) tmp);
2985 
2986 		series_set_transform(dset, k, ci);
2987 		free(tmp);
2988 	    }
2989 	    tmp = xmlGetProp(cur, (XUC) "lag");
2990 	    if (tmp != NULL) {
2991 		series_set_lag(dset, k, atoi((char *) tmp));
2992 		free(tmp);
2993 	    }
2994 	    tmp = xmlGetProp(cur, (XUC) "compact-method");
2995 	    if (tmp != NULL) {
2996 		series_set_compact_method(dset, k, compact_string_to_int((char *) tmp));
2997 		free(tmp);
2998 	    }
2999 	    tmp = xmlGetProp(cur, (XUC) "discrete");
3000 	    if (tmp != NULL) {
3001 		if (!strcmp((char *) tmp, "true")) {
3002 		    series_set_flag(dset, k, VAR_DISCRETE);
3003 		}
3004 		free(tmp);
3005 	    }
3006 	    tmp = xmlGetProp(cur, (XUC) "coded");
3007 	    if (tmp != NULL) {
3008 		if (!strcmp((char *) tmp, "true")) {
3009 		    series_set_flag(dset, k, VAR_CODED);
3010 		}
3011 		free(tmp);
3012 	    }
3013 	    tmp = xmlGetProp(cur, (XUC) "midas_period");
3014 	    if (tmp != NULL) {
3015 		int mpd = atoi((char *) tmp);
3016 
3017 		if (mpd > 0) {
3018 		    series_set_midas_period(dset, k, mpd);
3019 		}
3020 		free(tmp);
3021 	    }
3022 	    tmp = xmlGetProp(cur, (XUC) "midas_freq");
3023 	    if (tmp != NULL) {
3024 		int mpf = atoi((char *) tmp);
3025 
3026 		if (mpf > 0) {
3027 		    series_set_midas_freq(dset, k, mpf);
3028 		}
3029 		free(tmp);
3030 	    }
3031 	    tmp = xmlGetProp(cur, (XUC) "hf-anchor");
3032 	    if (tmp != NULL) {
3033 		if (!strcmp((char *) tmp, "true")) {
3034 		    series_set_midas_anchor(dset, k);
3035 		}
3036 		free(tmp);
3037 	    }
3038 
3039 	    i++;
3040 	    k++;
3041 	}
3042 
3043 	cur = cur->next;
3044     }
3045 
3046     return err;
3047 }
3048 
set_underflow_warning(int n)3049 static void set_underflow_warning (int n)
3050 {
3051     gchar *msg;
3052 
3053     msg = g_strdup_printf(_("Data file contains %d subnormal values"), n);
3054     gretl_warnmsg_set(msg);
3055     g_free(msg);
3056 }
3057 
3058 /* Read the values for all (or selected) variables at
3059    observation @t */
3060 
process_values(DATASET * dset,int t,char * s,int fullv,const int * vlist,int * n_uflow)3061 static int process_values (DATASET *dset,
3062 			   int t, char *s,
3063 			   int fullv,
3064 			   const int *vlist,
3065 			   int *n_uflow)
3066 {
3067     char *test;
3068     double x;
3069     int i, k = 1;
3070     int err = 0;
3071 
3072     gretl_error_clear();
3073 
3074     for (i=1; i<fullv && !err; i++) {
3075 	while (isspace(*s)) s++;
3076 	if (vlist != NULL && !in_gretl_list(vlist, i)) {
3077 	    s += strcspn(s, " \t\r\n");
3078 	} else {
3079 	    x = strtod(s, &test);
3080 	    if (errno == ERANGE && SMALLVAL(x)) {
3081 		errno = 0; /* underflow, treat as OK? */
3082 		fprintf(stderr, "warning, underflow: %g for series %d (%s) at obs %d\n",
3083 			x, i, dset->varname[i], t + 1);
3084 		*n_uflow += 1;
3085 		s = test;
3086 	    } else if (errno) {
3087 		fprintf(stderr, "%s: %d: bad data\n", __FILE__, __LINE__);
3088 		perror(NULL);
3089 		err = E_DATA;
3090 	    } else if (!strncmp(test, "NA", 2)) {
3091 		x = NADBL;
3092 		s = test + 2;
3093 	    } else if (*test != '\0' && !isspace(*test)) {
3094 		err = 1;
3095 	    } else {
3096 		s = test;
3097 	    }
3098 	    if (!err && t < dset->n) {
3099 		dset->Z[k++][t] = x;
3100 	    }
3101 	}
3102     }
3103 
3104     /* check for trailing junk in <obs> line */
3105     s += strspn(s, " \t");
3106     if (*s) {
3107 	fprintf(stderr, "Warning: found trailing junk at obs %d:\n'%s'\n",
3108 		t + 1, s);
3109     }
3110 
3111     if (err && !gretl_errmsg_is_set()) {
3112 	gretl_errmsg_sprintf(_("Failed to parse data values at obs %d"), t+1);
3113     }
3114 
3115     return err;
3116 }
3117 
read_observations(xmlDocPtr doc,xmlNodePtr node,DATASET * dset,double dsize,int binary,double gdtversion,const char * fname)3118 static int read_observations (xmlDocPtr doc, xmlNodePtr node,
3119 			      DATASET *dset, double dsize,
3120 			      int binary, double gdtversion,
3121 			      const char *fname)
3122 {
3123     xmlNodePtr cur;
3124     xmlChar *tmp;
3125     int n, i, t;
3126     int (*show_progress) (double, double, int) = NULL;
3127     int progbar = 0;
3128     int n_uflow = 0;
3129     int err = 0;
3130 
3131     tmp = xmlGetProp(node, (XUC) "count");
3132     if (tmp == NULL) {
3133 	return E_DATA;
3134     }
3135 
3136     if (sscanf((char *) tmp, "%d", &n) == 1) {
3137 	dset->n = n;
3138 	free(tmp);
3139     } else {
3140 	gretl_errmsg_set(_("Failed to parse number of observations"));
3141 	free(tmp);
3142 	return E_DATA;
3143     }
3144 
3145     if (dsize > 100000 && !binary) {
3146 	show_progress = get_plugin_function("show_progress");
3147 	if (show_progress != NULL) {
3148 	    progbar = 1;
3149 	}
3150     }
3151 
3152     tmp = xmlGetProp(node, (XUC) "labels");
3153     if (tmp) {
3154 	if (!strcmp((char *) tmp, "true")) {
3155 	    if (dataset_allocate_obs_markers(dset)) {
3156 		return E_ALLOC;
3157 	    }
3158 	}
3159 	free(tmp);
3160     } else {
3161 	return E_DATA;
3162     }
3163 
3164     if (dset->endobs[0] == '\0') {
3165 	sprintf(dset->endobs, "%d", dset->n);
3166     }
3167 
3168     dset->t2 = dset->n - 1;
3169 
3170     for (i=0; i<dset->v; i++) {
3171 	dset->Z[i] = malloc(dset->n * sizeof **dset->Z);
3172 	if (dset->Z[i] == NULL) {
3173 	    return E_ALLOC;
3174 	}
3175     }
3176 
3177     for (t=0; t<dset->n; t++) {
3178 	dset->Z[0][t] = 1.0;
3179     }
3180 
3181     if (binary) {
3182 	err = read_binary_data(fname, dset, binary, gdtversion,
3183 			       dset->v, NULL);
3184 	if (!dset->markers) {
3185 	    goto bailout;
3186 	}
3187     }
3188 
3189     /* now get individual obs info: labels and values */
3190     cur = node->xmlChildrenNode;
3191     while (cur && xmlIsBlankNode(cur)) {
3192 	cur = cur->next;
3193     }
3194 
3195     if (cur == NULL) {
3196 	gretl_errmsg_set(_("Got no observations\n"));
3197 	return E_DATA;
3198     }
3199 
3200     if (progbar) {
3201 	(*show_progress)(0, dsize, SP_LOAD_INIT);
3202 #if GDT_DEBUG
3203 	fprintf(stderr, "read_observations: inited progess bar (n=%d)\n",
3204 		dset->n);
3205 #endif
3206     }
3207 
3208     t = 0;
3209     while (cur != NULL) {
3210         if (!xmlStrcmp(cur->name, (XUC) "obs")) {
3211 	    if (dset->markers) {
3212 		tmp = xmlGetProp(cur, (XUC) "label");
3213 		if (tmp) {
3214 		    transcribe_string(dset->S[t], (char *) tmp, OBSLEN);
3215 		    free(tmp);
3216 		} else {
3217 		    gretl_errmsg_sprintf(_("Case marker missing at obs %d"), t+1);
3218 		    return E_DATA;
3219 		}
3220 	    }
3221 	    if (!binary) {
3222 		tmp = xmlNodeListGetRawString(doc, cur->xmlChildrenNode, 1);
3223 		if (tmp) {
3224 		    err = process_values(dset, t, (char *) tmp, dset->v, NULL, &n_uflow);
3225 		    free(tmp);
3226 		} else if (dset->v > 1) {
3227 		    gretl_errmsg_sprintf(_("Values missing at observation %d"), t+1);
3228 		    err = E_DATA;
3229 		}
3230 	    }
3231 	    t++;
3232 	}
3233 
3234 	if (err) {
3235 	    break;
3236 	}
3237 
3238 	cur = cur->next;
3239 
3240 	if (cur != NULL && t == dset->n) {
3241 	    /* got too many observations */
3242 	    t = dset->n + 1;
3243 	    break;
3244 	}
3245 
3246 	if (progbar && t > 0 && t % 50 == 0) {
3247 	    (*show_progress) (50, dset->n, SP_NONE);
3248 	}
3249     }
3250 
3251  bailout:
3252 
3253     if (progbar) {
3254 #if GDT_DEBUG
3255 	fprintf(stderr, "finalizing progress bar (n = %d)\n", dset->n);
3256 #endif
3257 	(*show_progress)(0, dset->n, SP_FINISH);
3258     }
3259 
3260     if (!err && t != dset->n) {
3261 	gretl_errmsg_set(_("Number of observations does not match declaration"));
3262 	err = E_DATA;
3263     }
3264 
3265     if (!err && n_uflow > 0) {
3266 	set_underflow_warning(n_uflow);
3267     }
3268 
3269     return err;
3270 }
3271 
read_observations_subset(xmlDocPtr doc,xmlNodePtr node,DATASET * dset,int binary,double gdtversion,const char * fname,int fullv,const int * vlist,gretlopt opt)3272 static int read_observations_subset (xmlDocPtr doc,
3273 				     xmlNodePtr node,
3274 				     DATASET *dset,
3275 				     int binary,
3276 				     double gdtversion,
3277 				     const char *fname,
3278 				     int fullv,
3279 				     const int *vlist,
3280 				     gretlopt opt)
3281 {
3282     xmlNodePtr cur;
3283     xmlChar *tmp;
3284     int n, i, t;
3285     int n_uflow = 0;
3286     int err = 0;
3287 
3288     tmp = xmlGetProp(node, (XUC) "count");
3289     if (tmp == NULL) {
3290 	return E_DATA;
3291     }
3292 
3293     if (sscanf((char *) tmp, "%d", &n) == 1) {
3294 	dset->n = n;
3295 	free(tmp);
3296     } else {
3297 	gretl_errmsg_set(_("Failed to parse number of observations"));
3298 	free(tmp);
3299 	return E_DATA;
3300     }
3301 
3302     if (opt & OPT_M) {
3303 	tmp = xmlGetProp(node, (XUC) "labels");
3304 	if (tmp) {
3305 	    if (!strcmp((char *) tmp, "true")) {
3306 		if (dataset_allocate_obs_markers(dset)) {
3307 		    return E_ALLOC;
3308 		}
3309 	    }
3310 	    free(tmp);
3311 	} else {
3312 	    return E_DATA;
3313 	}
3314     }
3315 
3316     if (dset->endobs[0] == '\0') {
3317 	sprintf(dset->endobs, "%d", dset->n);
3318     }
3319 
3320     dset->t2 = dset->n - 1;
3321 
3322     for (i=0; i<dset->v; i++) {
3323 	dset->Z[i] = malloc(dset->n * sizeof **dset->Z);
3324 	if (dset->Z[i] == NULL) {
3325 	    return E_ALLOC;
3326 	}
3327     }
3328 
3329     for (t=0; t<dset->n; t++) {
3330 	dset->Z[0][t] = 1.0;
3331     }
3332 
3333     if (binary) {
3334 	err = read_binary_data(fname, dset, binary, gdtversion,
3335 			       fullv, vlist);
3336 	if (!dset->markers) {
3337 	    goto bailout;
3338 	}
3339     }
3340 
3341     /* now get individual obs info: labels and values */
3342     cur = node->xmlChildrenNode;
3343     while (cur && xmlIsBlankNode(cur)) {
3344 	cur = cur->next;
3345     }
3346 
3347     if (cur == NULL) {
3348 	gretl_errmsg_set(_("Got no observations\n"));
3349 	return E_DATA;
3350     }
3351 
3352     t = 0;
3353     while (cur != NULL) {
3354         if (!xmlStrcmp(cur->name, (XUC) "obs")) {
3355 	    if (dset->markers) {
3356 		tmp = xmlGetProp(cur, (XUC) "label");
3357 		if (tmp) {
3358 		    transcribe_string(dset->S[t], (char *) tmp, OBSLEN);
3359 		    free(tmp);
3360 		} else {
3361 		    gretl_errmsg_sprintf(_("Case marker missing at obs %d"), t+1);
3362 		    return E_DATA;
3363 		}
3364 	    }
3365 	    if (!binary) {
3366 		tmp = xmlNodeListGetRawString(doc, cur->xmlChildrenNode, 1);
3367 		if (tmp) {
3368 		    err = process_values(dset, t, (char *) tmp, fullv, vlist, &n_uflow);
3369 		    free(tmp);
3370 		} else if (dset->v > 1) {
3371 		    gretl_errmsg_sprintf(_("Values missing at observation %d"), t+1);
3372 		    err = E_DATA;
3373 		}
3374 	    }
3375 	    t++;
3376 	}
3377 
3378 	if (err) {
3379 	    break;
3380 	}
3381 
3382 	cur = cur->next;
3383 
3384 	if (cur != NULL && t == dset->n) {
3385 	    /* got too many observations */
3386 	    t = dset->n + 1;
3387 	    goto bailout;
3388 	}
3389     }
3390 
3391  bailout:
3392 
3393     if (!err && t != dset->n) {
3394 	gretl_errmsg_set(_("Number of observations does not match declaration"));
3395 	err = E_DATA;
3396     }
3397 
3398     if (!err && n_uflow > 0) {
3399 	set_underflow_warning(n_uflow);
3400     }
3401 
3402     return err;
3403 }
3404 
owner_id(const DATASET * dset,const char * s)3405 static int owner_id (const DATASET *dset, const char *s)
3406 {
3407     int i;
3408 
3409     for (i=1; i<dset->v; i++) {
3410 	if (!strcmp(s, dset->varname[i])) {
3411 	    return i;
3412 	}
3413     }
3414 
3415     return -1;
3416 }
3417 
process_string_tables(xmlDocPtr doc,xmlNodePtr node,DATASET * dset,int subset)3418 static int process_string_tables (xmlDocPtr doc,
3419 				  xmlNodePtr node,
3420 				  DATASET *dset,
3421 				  int subset)
3422 {
3423     xmlNodePtr cur = NULL;
3424     xmlChar *tmp;
3425     int ntabs = 0;
3426     int err = 0;
3427 
3428     tmp = xmlGetProp(node, (XUC) "count");
3429 
3430     if (tmp == NULL) {
3431 	err = E_DATA;
3432     } else {
3433 	if (sscanf((char *) tmp, "%d", &ntabs) != 1) {
3434 	    err = E_DATA;
3435 	}
3436 	free(tmp);
3437     }
3438 
3439 #if GDT_DEBUG
3440     fprintf(stderr, "process_string_tables: ntabs=%d, err=%d\n", ntabs, err);
3441 #endif
3442 
3443     if (!err) {
3444 	cur = node->xmlChildrenNode;
3445 	while (cur && xmlIsBlankNode(cur)) {
3446 	    cur = cur->next;
3447 	}
3448 	if (cur == NULL) {
3449 	    err = E_DATA;
3450 	}
3451     }
3452 
3453     if (err) {
3454 	fprintf(stderr, "process_string_tables: returning err = %d\n", err);
3455 	return err;
3456     }
3457 
3458     while (cur != NULL && !err) {
3459         if (!xmlStrcmp(cur->name, (XUC) "valstrings")) {
3460 	    xmlChar *owner = xmlGetProp(cur, (XUC) "owner");
3461 	    series_table *st;
3462 	    char **strs = NULL;
3463 	    int v = 0, n_strs = 0;
3464 
3465 	    if (owner == NULL) {
3466 		err = E_DATA;
3467 	    } else {
3468 		v = owner_id(dset, (const char *) owner);
3469 		if (v <= 0 && !subset) {
3470 		    fprintf(stderr, "process_string_tables: invalid owner_id\n");
3471 		    err = E_DATA;
3472 		}
3473 	    }
3474 	    if (v > 0) {
3475 		strs = gretl_xml_get_strings_array(cur, doc, &n_strs,
3476 						   0, &err);
3477 		if (err) {
3478 		    fprintf(stderr, "process_string_tables: get_strings_array "
3479 			    "gave error %d\n", err);
3480 		} else {
3481 		    st = series_table_new(strs, n_strs, &err);
3482 		    if (err) {
3483 			strings_array_free(strs, n_strs);
3484 		    } else {
3485 			series_attach_string_table(dset, v, st);
3486 		    }
3487 		}
3488 	    }
3489 	    free(owner);
3490 	}
3491 	if (!err) {
3492 	    cur = cur->next;
3493 	}
3494     }
3495 
3496     return err;
3497 }
3498 
process_panel_info(xmlNodePtr cur,DATASET * dset,int * repad)3499 static int process_panel_info (xmlNodePtr cur, DATASET *dset,
3500 			       int *repad)
3501 {
3502     xmlChar *tmp;
3503     double sd0 = 0.0;
3504     int pd = 0;
3505 
3506     tmp = xmlGetProp(cur, (XUC) "group-names");
3507     if (tmp != NULL) {
3508 	dset->pangrps = (char *) tmp;
3509     }
3510 
3511     tmp = xmlGetProp(cur, (XUC) "time-frequency");
3512     if (tmp != NULL) {
3513 	pd = atoi((const char *) tmp);
3514 	free(tmp);
3515     }
3516 
3517     tmp = xmlGetProp(cur, (XUC) "time-start");
3518     if (tmp != NULL) {
3519 	sd0 = atof((const char *) tmp);
3520 	free(tmp);
3521     }
3522 
3523     tmp = xmlGetProp(cur, (XUC) "skip-padding");
3524     if (tmp != NULL) {
3525 	*repad = 1;
3526 	free(tmp);
3527     }
3528 
3529     if (pd > 0 && sd0 > 0.0) {
3530 	dset->panel_pd = pd;
3531 	dset->panel_sd0 = sd0;
3532     }
3533 
3534     return 0;
3535 }
3536 
get_gdt_version(xmlNodePtr node)3537 static double get_gdt_version (xmlNodePtr node)
3538 {
3539     xmlChar *tmp = xmlGetProp(node, (XUC) "version");
3540     double v = 1.0;
3541 
3542     if (tmp != NULL) {
3543 	v = dot_atof((char *) tmp);
3544 	free(tmp);
3545     }
3546 
3547     return v;
3548 }
3549 
xml_get_data_structure(xmlNodePtr node,int * dattype)3550 static int xml_get_data_structure (xmlNodePtr node, int *dattype)
3551 {
3552     xmlChar *tmp = xmlGetProp(node, (XUC) "type");
3553     int err = 0;
3554 
3555     if (tmp == NULL) {
3556 	gretl_errmsg_set(_("Required attribute 'type' is missing from data file"));
3557 	err = 1;
3558     } else {
3559 	if (!strcmp((char *) tmp, "cross-section")) {
3560 	    *dattype = CROSS_SECTION;
3561 	} else if (!strcmp((char *) tmp, "time-series")) {
3562 	    *dattype = TIME_SERIES;
3563 	} else if (!strcmp((char *) tmp, "stacked-time-series")) {
3564 	    *dattype = STACKED_TIME_SERIES;
3565 	} else if (!strcmp((char *) tmp, "stacked-cross-section")) {
3566 	    *dattype = STACKED_CROSS_SECTION;
3567 	} else {
3568 	    gretl_errmsg_set(_("Unrecognized type attribute for data file"));
3569 	    err = 1;
3570 	}
3571 	free(tmp);
3572     }
3573 
3574     return err;
3575 }
3576 
xml_get_data_frequency(xmlNodePtr node,int * pd,int * dattype)3577 static int xml_get_data_frequency (xmlNodePtr node, int *pd, int *dattype)
3578 {
3579     xmlChar *tmp = xmlGetProp(node, (XUC) "frequency");
3580     int err = 0;
3581 
3582     *pd = 1;
3583 
3584     if (tmp != NULL) {
3585 	if (!strncmp((char *) tmp, "special", 7)) {
3586 	    *dattype = SPECIAL_TIME_SERIES;
3587 	    if (sscanf((char *) tmp + 7, ":%d", pd) == 1) {
3588 		fprintf(stderr, "custom time series, frequency %d\n", *pd);
3589 	    } else {
3590 		fprintf(stderr, "custom time series, using frequency 1\n");
3591 	    }
3592 	} else if (sscanf((char *) tmp, "%d", pd) != 1) {
3593 	    gretl_errmsg_set(_("Failed to parse data frequency"));
3594 	    err = 1;
3595 	}
3596 	free(tmp);
3597     }
3598 
3599     return err;
3600 }
3601 
likely_calendar(const char * s)3602 static int likely_calendar (const char *s)
3603 {
3604     return strchr(s, '-') || strchr(s, '/');
3605 }
3606 
xml_get_startobs(xmlNodePtr node,double * sd0,char * stobs,int caldata)3607 static int xml_get_startobs (xmlNodePtr node, double *sd0, char *stobs,
3608 			     int caldata)
3609 {
3610     xmlChar *tmp = xmlGetProp(node, (XUC) "startobs");
3611     int err = 0;
3612 
3613     if (tmp != NULL) {
3614 	char obstr[OBSLEN];
3615 	int y, m, d;
3616 
3617 	obstr[0] = '\0';
3618 	strncat(obstr, (char *) tmp, OBSLEN - 1);
3619 	gretl_charsub(obstr, ':', '.');
3620 
3621 	if (sscanf(obstr, "%d/%d/%d", &y, &m, &d) == 3) {
3622 	    /* handle legacy gdt dates */
3623 	    gretl_charsub(obstr, '/', '-');
3624 	}
3625 
3626 	if (likely_calendar(obstr) && caldata) {
3627 	    guint32 ed = get_epoch_day(obstr);
3628 
3629 	    if (ed <= 0) {
3630 		err = 1;
3631 	    } else {
3632 		*sd0 = ed;
3633 	    }
3634 	} else {
3635 	    double x;
3636 
3637 	    if (sscanf(obstr, "%lf", &x) != 1) {
3638 		err = 1;
3639 	    } else {
3640 		*sd0 = x;
3641 	    }
3642 	}
3643 
3644 	if (err) {
3645 	    gretl_errmsg_set(_("Failed to parse startobs"));
3646 	} else {
3647 	    stobs[0] = '\0';
3648 	    strncat(stobs, obstr, OBSLEN - 1);
3649 	    colonize_obs(stobs);
3650 	}
3651 
3652 	free(tmp);
3653     }
3654 
3655     return err;
3656 }
3657 
xml_get_endobs(xmlNodePtr node,char * endobs,int caldata)3658 static int xml_get_endobs (xmlNodePtr node, char *endobs, int caldata)
3659 {
3660     xmlChar *tmp = xmlGetProp(node, (XUC) "endobs");
3661     int err = 0;
3662 
3663     if (tmp != NULL) {
3664 	char obstr[OBSLEN];
3665 
3666 	obstr[0] = '\0';
3667 	strncat(obstr, (char *) tmp, OBSLEN - 1);
3668 	gretl_charsub(obstr, ':', '.');
3669 
3670 	if (caldata) {
3671 	    int y, m, d;
3672 	    guint32 ed;
3673 
3674 	    if (sscanf(obstr, "%d/%d/%d", &y, &m, &d) == 3) {
3675 		/* handle legacy gdt file */
3676 		gretl_charsub(obstr, '/', '-');
3677 	    }
3678 
3679 	    ed = get_epoch_day(obstr);
3680 	    if (ed <= 0) {
3681 		err = 1;
3682 	    }
3683 	} else {
3684 	    double x;
3685 
3686 	    if (sscanf(obstr, "%lf", &x) != 1) {
3687 		err = 1;
3688 	    }
3689 	}
3690 
3691 	if (err) {
3692 	    gretl_errmsg_set(_("Failed to parse endobs"));
3693 	} else {
3694 	    endobs[0] = '\0';
3695 	    strncat(endobs, obstr, OBSLEN - 1);
3696 	    colonize_obs(endobs);
3697 	}
3698 
3699 	free(tmp);
3700     }
3701 
3702     return err;
3703 }
3704 
gdt_binary_order(xmlNodePtr node)3705 static int gdt_binary_order (xmlNodePtr node)
3706 {
3707     xmlChar *tmp = xmlGetProp(node, (XUC) "binary");
3708     int ret = 0;
3709 
3710     if (tmp != NULL) {
3711 	if (!strcmp((char *) tmp, "little-endian")) {
3712 	    ret = G_LITTLE_ENDIAN;
3713 	} else if (!strcmp((char *) tmp, "big-endian")) {
3714 	    ret = G_BIG_ENDIAN;
3715 	}
3716 	free(tmp);
3717     }
3718 
3719     return ret;
3720 }
3721 
lag_from_label(int v,const DATASET * dset,int * lag)3722 static int lag_from_label (int v, const DATASET *dset, int *lag)
3723 {
3724     const char *test = series_get_label(dset, v);
3725     char pm, fmt[20], vname[VNAMELEN];
3726     int pv = 0;
3727 
3728     if (test == NULL) {
3729 	return 0;
3730     }
3731 
3732     sprintf(fmt, "= %%%d[^(](t %%c %%d)", VNAMELEN - 1);
3733 
3734     if (sscanf(test, fmt, vname, &pm, lag) == 3) {
3735 	pv = series_index(dset, vname);
3736 	pv = (pv < dset->v)? pv : 0;
3737     }
3738 
3739     return pv;
3740 }
3741 
dummy_child_from_label(int v,const DATASET * dset)3742 static int dummy_child_from_label (int v, const DATASET *dset)
3743 {
3744     const char *test = series_get_label(dset, v);
3745     char vname[VNAMELEN];
3746     double val;
3747     int pv = 0;
3748 
3749     if (test == NULL) {
3750 	return 0;
3751     }
3752 
3753     if (sscanf(test, _("dummy for %s = %lf"), vname, &val) == 2 ||
3754 	sscanf(test, "dummy for %s = %lf", vname, &val) == 2) {
3755 	pv = series_index(dset, vname);
3756 	pv = (pv < dset->v)? pv : 0;
3757     }
3758 
3759     return pv;
3760 }
3761 
record_transform_info(DATASET * dset,double version)3762 static void record_transform_info (DATASET *dset, double version)
3763 {
3764     int i, p, pv;
3765 
3766     for (i=1; i<dset->v; i++) {
3767 	if (series_get_transform(dset, i) == LAGS) {
3768 	    /* already handled */
3769 	    continue;
3770 	}
3771 	pv = lag_from_label(i, dset, &p);
3772 	if (pv > 0) {
3773 	    series_set_parent(dset, i, dset->varname[pv]);
3774 	    series_set_transform(dset, i, LAGS);
3775 	    series_set_lag(dset, i, p);
3776 	} else if (version < 1.1) {
3777 	    pv = dummy_child_from_label(i, dset);
3778 	    if (pv > 0) {
3779 		series_set_parent(dset, i, dset->varname[pv]);
3780 		series_set_transform(dset, i, DUMMIFY);
3781 	    }
3782 	}
3783     }
3784 }
3785 
data_read_message(const char * fname,DATASET * dset,PRN * prn)3786 static void data_read_message (const char *fname, DATASET *dset, PRN *prn)
3787 {
3788     pprintf(prn, _("\nRead datafile %s\n"), fname);
3789     pprintf(prn, _("periodicity: %d, maxobs: %d\n"
3790 		   "observations range: %s to %s\n"),
3791 	    (custom_time_series(dset))? 1 : dset->pd,
3792 	    dset->n, dset->stobs, dset->endobs);
3793     pputc(prn, '\n');
3794 }
3795 
get_filesize(const char * fname)3796 static long get_filesize (const char *fname)
3797 {
3798     struct stat buf;
3799     int err;
3800 
3801     err = gretl_stat(fname, &buf);
3802 
3803     return (err)? -1 : buf.st_size;
3804 }
3805 
remedy_empty_data(DATASET * dset)3806 static int remedy_empty_data (DATASET *dset)
3807 {
3808     int err = dataset_add_series(dset, 1);
3809 
3810     if (!err) {
3811 	int t;
3812 
3813 	strcpy(dset->varname[1], "index");
3814 	series_set_label(dset, 1, _("index variable"));
3815 	for (t=0; t<dset->n; t++) {
3816 	    dset->Z[1][t] = (double) (t + 1);
3817 	}
3818     }
3819 
3820     return err;
3821 }
3822 
check_for_daily_date_strings(DATASET * dset)3823 static void check_for_daily_date_strings (DATASET *dset)
3824 {
3825     int m, d, n;
3826     int y1 = 0, y2 = 0;
3827     int oldfmt = 0;
3828 
3829     n = sscanf(dset->S[0], YMD_READ_FMT, &y1, &m, &d);
3830     if (n != 3) {
3831 	oldfmt = 1;
3832 	n = sscanf(dset->S[0], "%d/%d/%d", &y1, &m, &d);
3833     }
3834 
3835     if (n == 3) {
3836 	int k = dset->n - 1;
3837 
3838 	if (oldfmt) {
3839 	    n = sscanf(dset->S[k], "%d/%d/%d", &y2, &m, &d);
3840 	} else {
3841 	    n = sscanf(dset->S[k], YMD_READ_FMT, &y2, &m, &d);
3842 	}
3843     }
3844 
3845     if (n == 3 && y2 >= y1) {
3846 	dset->markers = DAILY_DATE_STRINGS;
3847     }
3848 }
3849 
replace_panel_padding(DATASET * dset)3850 static int replace_panel_padding (DATASET *dset)
3851 {
3852     int uv = dset->v - 2;
3853     int tv = dset->v - 1;
3854     int err = 0;
3855 
3856     if (!strcmp(dset->varname[uv], "unit__") &&
3857 	!strcmp(dset->varname[tv], "time__")) {
3858 	err = set_panel_structure_from_vars(uv, tv, dset);
3859 	if (!err) {
3860 	    dataset_drop_last_variables(dset, 2);
3861 	}
3862     }
3863 
3864     return err;
3865 }
3866 
future_datafile_warning(double v1,double v2)3867 static void future_datafile_warning (double v1, double v2)
3868 {
3869     const char *fmt =
3870 	_("The version of this datafile (%g) is higher than that fully\n"
3871 	  "supported by this build of gretl (%g). Some features may not be\n"
3872 	  "correctly recognized.\n");
3873 
3874     gretl_warnmsg_sprintf(fmt, v1, v2);
3875 }
3876 
real_read_gdt(const char * fname,const char * srcname,DATASET * dset,gretlopt opt,PRN * prn)3877 static int real_read_gdt (const char *fname, const char *srcname,
3878 			  DATASET *dset, gretlopt opt, PRN *prn)
3879 {
3880     DATASET *tmpset;
3881     xmlDocPtr doc = NULL;
3882     xmlNodePtr cur;
3883     int gotvars = 0, gotobs = 0, err = 0;
3884     int caldata = 0, repad = 0;
3885     double gdtversion = 1.0;
3886     double myversion;
3887     int in_c_locale = 0;
3888     int gz, binary = 0;
3889     long fsz;
3890 
3891     gretl_error_clear();
3892     fsz = get_filesize(fname);
3893     gz = is_gzipped(fname);
3894 
3895     if (fsz < 0) {
3896 	return E_FOPEN;
3897     } else if (fsz > 100000) {
3898 	fprintf(stderr, "%s %.0f Kbytes %s...\n",
3899 		gz ? "Uncompressing" : "Reading",
3900 		(double) fsz / 1024.0, "of data");
3901     }
3902 
3903     tmpset = datainfo_new();
3904     if (tmpset == NULL) {
3905 	err = E_ALLOC;
3906 	goto bailout;
3907     }
3908 
3909     err = gretl_xml_open_doc_root(fname, "gretldata",
3910 				  &doc, &cur);
3911     if (err) {
3912 	goto bailout;
3913     }
3914 
3915     gdtversion = get_gdt_version(cur);
3916     myversion = dot_atof(GRETLDATA_VERSION);
3917     if (gdtversion > myversion) {
3918 	future_datafile_warning(gdtversion, myversion);
3919     }
3920 
3921     /* optional */
3922     gretl_xml_get_prop_as_unsigned_int(cur, "rseed", &tmpset->rseed);
3923 
3924     /* optional */
3925     gretl_xml_get_prop_as_string(cur, "mapfile", &tmpset->mapfile);
3926 
3927     /* set some required datainfo parameters */
3928 
3929     err = xml_get_data_structure(cur, &tmpset->structure);
3930     if (err) {
3931 	goto bailout;
3932     }
3933 
3934     err = xml_get_data_frequency(cur, &tmpset->pd, &tmpset->structure);
3935     if (err) {
3936 	goto bailout;
3937     }
3938 
3939     gretl_push_c_numeric_locale();
3940     in_c_locale = 1;
3941 
3942     strcpy(tmpset->stobs, "1");
3943     caldata = dataset_is_daily(tmpset) || dataset_is_weekly(tmpset);
3944 
3945     err = xml_get_startobs(cur, &tmpset->sd0, tmpset->stobs, caldata);
3946     if (err) {
3947 	goto bailout;
3948     }
3949 
3950     *tmpset->endobs = '\0';
3951     caldata = calendar_data(tmpset);
3952 
3953     err = xml_get_endobs(cur, tmpset->endobs, caldata);
3954     if (err) {
3955 	goto bailout;
3956     }
3957 
3958     binary = gdt_binary_order(cur);
3959 
3960 #if GDT_DEBUG
3961     fprintf(stderr, "starting to walk XML tree...\n");
3962 #endif
3963 
3964     /* Now walk the tree */
3965     cur = cur->xmlChildrenNode;
3966     while (cur != NULL && !err) {
3967         if (!xmlStrcmp(cur->name, (XUC) "description")) {
3968 	    tmpset->descrip = (char *)
3969 		xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
3970         } else if (!xmlStrcmp(cur->name, (XUC) "variables")) {
3971 	    err = process_varlist(cur, tmpset, 0);
3972 	    if (err) {
3973 		fprintf(stderr, "error processing varlist\n");
3974 	    } else {
3975 		gotvars = 1;
3976 	    }
3977 	} else if (!xmlStrcmp(cur->name, (XUC) "observations")) {
3978 	    if (!gotvars) {
3979 		gretl_errmsg_set(_("Variables information is missing"));
3980 		err = 1;
3981 	    } else {
3982 		double dsize = (opt & OPT_B)? (double) fsz : 0;
3983 
3984 		err = read_observations(doc, cur, tmpset, dsize,
3985 					binary, gdtversion, fname);
3986 		if (err) {
3987 		    fprintf(stderr, "error %d in read_observations\n", err);
3988 		} else {
3989 		    gotobs = 1;
3990 		}
3991 	    }
3992 	} else if (!xmlStrcmp(cur->name, (XUC) "string-tables")) {
3993 	    if (!gotvars) {
3994 		gretl_errmsg_set(_("Variables information is missing"));
3995 		err = E_DATA;
3996 	    } else {
3997 		err = process_string_tables(doc, cur, tmpset, 0);
3998 		if (err) {
3999 		    fprintf(stderr, "error %d processing string tables\n", err);
4000 		}
4001 	    }
4002 	} else if (!xmlStrcmp(cur->name, (XUC) "panel-info")) {
4003 	    if (!gotvars) {
4004 		gretl_errmsg_set(_("Variables information is missing"));
4005 		err = E_DATA;
4006 	    } else {
4007 		err = process_panel_info(cur, tmpset, &repad);
4008 		if (err) {
4009 		    fprintf(stderr, "error %d processing panel info\n", err);
4010 		}
4011 	    }
4012 	}
4013 	if (!err) {
4014 	    cur = cur->next;
4015 	}
4016     }
4017 
4018 #if GDT_DEBUG
4019     fprintf(stderr, "done walking XML tree, err = %d\n", err);
4020 #endif
4021 
4022     if (!err && !gotvars) {
4023 	gretl_errmsg_set(_("Variables information is missing"));
4024 	err = 1;
4025     }
4026 
4027     if (!err && !gotobs) {
4028 	gretl_errmsg_set(_("No observations were found"));
4029 	err = 1;
4030     }
4031 
4032     if (!err && caldata && tmpset->S != NULL) {
4033 	check_for_daily_date_strings(tmpset);
4034     }
4035 
4036     if (!err && repad) {
4037 	err = replace_panel_padding(tmpset);
4038     }
4039 
4040     if (!err) {
4041 	if (srcname == NULL) {
4042 	    srcname = fname;
4043 	}
4044 	data_read_message(srcname, tmpset, prn);
4045 	err = merge_or_replace_data(dset, &tmpset,
4046 				    get_merge_opts(opt), prn);
4047     }
4048 
4049  bailout:
4050 
4051     if (in_c_locale) {
4052 	gretl_pop_c_numeric_locale();
4053     }
4054 
4055     if (doc != NULL) {
4056 	xmlFreeDoc(doc);
4057     }
4058 
4059     if (dset != NULL) {
4060 	/* pre-process stacked cross-sectional panels: put into canonical
4061 	   stacked time series form
4062 	*/
4063 	if (!err && dset->structure == STACKED_CROSS_SECTION) {
4064 	    err = switch_panel_orientation(dset);
4065 	}
4066 
4067 	if (!err && dset->v == 1) {
4068 	    err = remedy_empty_data(dset);
4069 	}
4070 
4071 	if (!err && gdtversion < 1.2) {
4072 	    record_transform_info(dset, gdtversion);
4073 	}
4074     }
4075 
4076     if (err && tmpset != NULL) {
4077 	destroy_dataset(tmpset);
4078     }
4079 
4080     if (err && err != E_ALLOC) {
4081 	/* ensure we don't just show "unspecified error" */
4082 	gchar *msg;
4083 
4084 	if (err == E_DATA) {
4085 	    msg = g_strdup_printf(_("'%s': malformed gdt file"), fname);
4086 	} else {
4087 	    msg = g_strdup_printf(_("Couldn't read '%s'"), fname);
4088 	}
4089 
4090 	gretl_errmsg_ensure(msg);
4091 	g_free(msg);
4092     }
4093 
4094 #if GDT_DEBUG
4095     fprintf(stderr, "gretl_read_gdt: returning %d\n", err);
4096 #endif
4097 
4098     return err;
4099 }
4100 
real_read_gdt_subset(const char * fname,DATASET * dset,const int * vlist,gretlopt opt)4101 static int real_read_gdt_subset (const char *fname,
4102 				 DATASET *dset,
4103 				 const int *vlist,
4104 				 gretlopt opt)
4105 {
4106     DATASET *tmpset;
4107     xmlDocPtr doc = NULL;
4108     xmlNodePtr cur;
4109     double gdtversion = 1.0;
4110     int gotvars = 0, gotobs = 0;
4111     int caldata = 0;
4112     int in_c_locale = 0;
4113     int binary = 0;
4114     int fullv = 0;
4115     int err = 0;
4116 
4117     gretl_error_clear();
4118 
4119     tmpset = datainfo_new();
4120     if (tmpset == NULL) {
4121 	err = E_ALLOC;
4122 	goto bailout;
4123     }
4124 
4125     err = gretl_xml_open_doc_root(fname, "gretldata", &doc, &cur);
4126     if (err) {
4127 	goto bailout;
4128     }
4129 
4130     /* set some datainfo parameters */
4131 
4132     err = xml_get_data_structure(cur, &tmpset->structure);
4133     if (err) {
4134 	goto bailout;
4135     }
4136 
4137     err = xml_get_data_frequency(cur, &tmpset->pd, &tmpset->structure);
4138     if (err) {
4139 	goto bailout;
4140     }
4141 
4142     gdtversion = get_gdt_version(cur);
4143 
4144     gretl_push_c_numeric_locale();
4145     in_c_locale = 1;
4146 
4147     strcpy(tmpset->stobs, "1");
4148     caldata = dataset_is_daily(tmpset) || dataset_is_weekly(tmpset);
4149 
4150     err = xml_get_startobs(cur, &tmpset->sd0, tmpset->stobs, caldata);
4151     if (err) {
4152 	goto bailout;
4153     }
4154 
4155     *tmpset->endobs = '\0';
4156     caldata = calendar_data(tmpset);
4157 
4158     err = xml_get_endobs(cur, tmpset->endobs, caldata);
4159     if (err) {
4160 	goto bailout;
4161     }
4162 
4163     binary = gdt_binary_order(cur);
4164 
4165 #if GDT_DEBUG
4166     fprintf(stderr, "%s: starting to walk XML tree...\n", fname);
4167 #endif
4168 
4169     /* Now walk the tree */
4170     cur = cur->xmlChildrenNode;
4171     while (cur != NULL && !err) {
4172         if (!xmlStrcmp(cur->name, (XUC) "variables")) {
4173 	    err = process_varlist_subset(cur, tmpset, &fullv, vlist);
4174 	    if (!err) {
4175 		gotvars = 1;
4176 	    }
4177 	} else if (!xmlStrcmp(cur->name, (XUC) "observations")) {
4178 	    if (!gotvars) {
4179 		gretl_errmsg_set(_("Variables information is missing"));
4180 		err = E_DATA;
4181 	    } else {
4182 		err = read_observations_subset(doc, cur, tmpset,
4183 					       binary, gdtversion,
4184 					       fname, fullv, vlist,
4185 					       opt);
4186 	    }
4187 	    if (!err) {
4188 		gotobs = 1;
4189 	    }
4190 	} else if (!xmlStrcmp(cur->name, (XUC) "string-tables")) {
4191 	    if (!gotvars) {
4192 		gretl_errmsg_set(_("Variables information is missing"));
4193 		err = 1;
4194 	    } else {
4195 		err = process_string_tables(doc, cur, tmpset, 1);
4196 	    }
4197 	}
4198 	if (!err) {
4199 	    cur = cur->next;
4200 	}
4201     }
4202 
4203 #if GDT_DEBUG
4204     fprintf(stderr, "done walking XML tree...\n");
4205 #endif
4206 
4207     if (!err && !gotvars) {
4208 	gretl_errmsg_set(_("Variables information is missing"));
4209 	err = 1;
4210     }
4211 
4212     if (!err && !gotobs) {
4213 	gretl_errmsg_set(_("No observations were found"));
4214 	err = 1;
4215     }
4216 
4217     if (!err && caldata && tmpset->S != NULL) {
4218 	check_for_daily_date_strings(tmpset);
4219     }
4220 
4221  bailout:
4222 
4223     if (in_c_locale) {
4224 	gretl_pop_c_numeric_locale();
4225     }
4226 
4227     if (doc != NULL) {
4228 	xmlFreeDoc(doc);
4229     }
4230 
4231     if (!err) {
4232 	*dset = *tmpset;
4233 	free(tmpset);
4234     } else {
4235 	destroy_dataset(tmpset);
4236     }
4237 
4238 #if GDT_DEBUG
4239     fprintf(stderr, "real_read_gdt_subset: returning %d\n", err);
4240 #endif
4241 
4242     return err;
4243 }
4244 
real_read_gdt_varnames(const char * fname,char *** vnames,int * nvars)4245 static int real_read_gdt_varnames (const char *fname,
4246 				   char ***vnames,
4247 				   int *nvars)
4248 {
4249     DATASET *tmpset;
4250     xmlDocPtr doc = NULL;
4251     xmlNodePtr cur;
4252     int gotvars = 0;
4253     int caldata = 0;
4254     int in_c_locale = 0;
4255     int err = 0;
4256 
4257     gretl_error_clear();
4258 
4259     tmpset = datainfo_new();
4260     if (tmpset == NULL) {
4261 	err = E_ALLOC;
4262 	goto bailout;
4263     }
4264 
4265     err = gretl_xml_open_doc_root(fname, "gretldata", &doc, &cur);
4266     if (err) {
4267 	goto bailout;
4268     }
4269 
4270     /* set some datainfo parameters */
4271 
4272     err = xml_get_data_structure(cur, &tmpset->structure);
4273     if (err) {
4274 	goto bailout;
4275     }
4276 
4277     err = xml_get_data_frequency(cur, &tmpset->pd, &tmpset->structure);
4278     if (err) {
4279 	goto bailout;
4280     }
4281 
4282     gretl_push_c_numeric_locale();
4283     in_c_locale = 1;
4284 
4285     strcpy(tmpset->stobs, "1");
4286     caldata = dataset_is_daily(tmpset) || dataset_is_weekly(tmpset);
4287 
4288     err = xml_get_startobs(cur, &tmpset->sd0, tmpset->stobs, caldata);
4289     if (err) {
4290 	goto bailout;
4291     }
4292 
4293     *tmpset->endobs = '\0';
4294     caldata = calendar_data(tmpset);
4295 
4296     err = xml_get_endobs(cur, tmpset->endobs, caldata);
4297     if (err) {
4298 	goto bailout;
4299     }
4300 
4301     /* Now walk the tree */
4302     cur = cur->xmlChildrenNode;
4303     while (cur != NULL && !err) {
4304         if (!xmlStrcmp(cur->name, (XUC) "variables")) {
4305 	    err = process_varlist(cur, tmpset, 1);
4306 	    if (!err) {
4307 		gotvars = 1;
4308 	    }
4309 	    break;
4310 	}
4311 	cur = cur->next;
4312     }
4313 
4314     if (!err && !gotvars) {
4315 	gretl_errmsg_set(_("Variables information is missing"));
4316 	err = 1;
4317     }
4318 
4319  bailout:
4320 
4321     if (in_c_locale) {
4322 	gretl_pop_c_numeric_locale();
4323     }
4324 
4325     if (doc != NULL) {
4326 	xmlFreeDoc(doc);
4327     }
4328 
4329     if (!err) {
4330 	*vnames = tmpset->varname;
4331 	*nvars = tmpset->v;
4332 	tmpset->varname = NULL;
4333     }
4334 
4335     destroy_dataset(tmpset);
4336 
4337     return err;
4338 }
4339 
read_gbin(const char * fname,DATASET * dset,gretlopt opt,PRN * prn)4340 static int read_gbin (const char *fname, DATASET *dset,
4341 		      gretlopt opt, PRN *prn)
4342 {
4343     int (*reader) (const char *, DATASET *, gretlopt,
4344 		   PRN *prn);
4345     int err = 0;
4346 
4347     reader = get_plugin_function("purebin_read_data");
4348 
4349     if (reader == NULL) {
4350         err = 1;
4351     } else {
4352 	err = (*reader)(fname, dset, opt, prn);
4353     }
4354 
4355     return err;
4356 }
4357 
read_gbin_subset(const char * fname,DATASET * dset,int * vlist,gretlopt opt)4358 static int read_gbin_subset (const char *fname, DATASET *dset,
4359 			     int *vlist, gretlopt opt)
4360 {
4361     int (*reader) (const char *, DATASET *, int *, gretlopt);
4362     int err = 0;
4363 
4364     reader = get_plugin_function("purebin_read_subset");
4365 
4366     if (reader == NULL) {
4367         err = 1;
4368     } else {
4369 	err = (*reader)(fname, dset, vlist, opt);
4370     }
4371 
4372     return err;
4373 }
4374 
read_gbin_varnames(const char * fname,char *** pS,int * pns)4375 static int read_gbin_varnames (const char *fname, char ***pS,
4376 			       int *pns)
4377 {
4378     int (*reader) (const char *, char ***, int *);
4379     int err = 0;
4380 
4381     reader = get_plugin_function("purebin_read_varnames");
4382 
4383     if (reader == NULL) {
4384         err = 1;
4385     } else {
4386 	err = (*reader)(fname, pS, pns);
4387     }
4388 
4389     return err;
4390 }
4391 
is_purebin_file(const char * fname)4392 static int is_purebin_file (const char *fname)
4393 {
4394     FILE *fp = gretl_fopen(fname, "rb");
4395     int ret = 0;
4396 
4397     if (fp != NULL) {
4398 	char buf[16];
4399 
4400 	if (fread(buf, 1, 14, fp) == 14 &&
4401 	    !strcmp(buf, "gretl-purebin")) {
4402 	    ret = 1;
4403 	}
4404 	fclose(fp);
4405     }
4406 
4407     return ret;
4408 }
4409 
4410 /**
4411  * gretl_read_gdt:
4412  * @fname: name of file to open for reading.
4413  * @dset: dataset struct.
4414  * @opt: use OPT_B to display gui progress bar; may also
4415  * use OPT_T when appending to panel data (see the "append"
4416  * command in the gretl manual). Otherwise use OPT_NONE.
4417  * @prn: where any messages should be written.
4418  *
4419  * Read data from native file into gretl's workspace.
4420  *
4421  * Returns: 0 on successful completion, non-zero otherwise.
4422  */
4423 
gretl_read_gdt(const char * fname,DATASET * dset,gretlopt opt,PRN * prn)4424 int gretl_read_gdt (const char *fname, DATASET *dset,
4425 		    gretlopt opt, PRN *prn)
4426 {
4427     int gdtb = has_suffix(fname, ".gdtb");
4428 
4429     if (gdtb && is_purebin_file(fname)) {
4430 	return read_gbin(fname, dset, opt, prn);
4431     } else if (gdtb) {
4432 	/* zipfile with gdt + binary */
4433 	gchar *zdir;
4434 	int id = -1;
4435 	int err;
4436 
4437 #ifdef HAVE_MPI
4438 	if (gretl_mpi_initialized()) {
4439 	    id = gretl_mpi_rank();
4440 	}
4441 #endif
4442 	if (id >= 0) {
4443 	    zdir = g_strdup_printf("%stmp%d-unzip", gretl_dotdir(), id);
4444 	} else {
4445 	    zdir = g_strdup_printf("%stmp-unzip", gretl_dotdir());
4446 	}
4447 	err = gretl_mkdir(zdir);
4448 
4449 	if (!err) {
4450 	    err = gretl_unzip_into(fname, zdir);
4451 	    if (err) {
4452 		gretl_errmsg_ensure("Problem opening data file");
4453 	    } else {
4454 		char xmlfile[FILENAME_MAX];
4455 
4456 		gretl_build_path(xmlfile, zdir, "data.xml", NULL);
4457 		err = real_read_gdt(xmlfile, fname, dset, opt, prn);
4458 	    }
4459 	    gretl_deltree(zdir);
4460 	}
4461 
4462 	g_free(zdir);
4463 	return err;
4464     } else {
4465 	/* plain XML file */
4466 	return real_read_gdt(fname, NULL, dset, opt, prn);
4467     }
4468 }
4469 
4470 /**
4471  * gretl_read_gdt_subset:
4472  * @fname: name of file to open for reading.
4473  * @dset: dataset struct.
4474  * @vlist: list of series to extract.
4475  * @opt: may include OPT_M to retrieve the observation
4476  * markers associated with the data, if any.
4477  *
4478  * Read specified series from native file into @dset,
4479  * which should be "empty" on input.
4480  *
4481  * Returns: 0 on successful completion, non-zero otherwise.
4482  */
4483 
gretl_read_gdt_subset(const char * fname,DATASET * dset,int * vlist,gretlopt opt)4484 int gretl_read_gdt_subset (const char *fname, DATASET *dset,
4485 			   int *vlist, gretlopt opt)
4486 {
4487     int gdtb = has_suffix(fname, ".gdtb");
4488     int err = 0;
4489 
4490     if (gdtb && is_purebin_file(fname)) {
4491 	err = read_gbin_subset(fname, dset, vlist, opt);
4492     } else if (gdtb) {
4493 	/* zipfile with gdt + binary */
4494 	gchar *zdir;
4495 	int err;
4496 
4497 	zdir = g_strdup_printf("%stmp-unzip", gretl_dotdir());
4498 	err = gretl_mkdir(zdir);
4499 
4500 	if (!err) {
4501 	    err = gretl_unzip_into(fname, zdir);
4502 	    if (err) {
4503 		gretl_errmsg_ensure("Problem opening data file");
4504 	    } else {
4505 		char xmlfile[FILENAME_MAX];
4506 
4507 		gretl_build_path(xmlfile, zdir, "data.xml", NULL);
4508 		err = real_read_gdt_subset(xmlfile, dset, vlist, opt);
4509 	    }
4510 	    gretl_deltree(zdir);
4511 	}
4512 
4513 	g_free(zdir);
4514     } else {
4515 	/* plain XML file */
4516 	err = real_read_gdt_subset(fname, dset, vlist, opt);
4517     }
4518 
4519 #if GDT_DEBUG
4520     fprintf(stderr, "gretl_read_gdt_subset: returning %d\n", err);
4521 #endif
4522 
4523     return err;
4524 }
4525 
4526 /**
4527  * gretl_read_gdt_varnames:
4528  * @fname: name of file to open for reading.
4529  * @vnames: location to receive array of series names.
4530  * @nvars: location to receive the number of series.
4531  *
4532  * Read the array of series names from the specified file.
4533  *
4534  * Returns: 0 on successful completion, non-zero otherwise.
4535  */
4536 
gretl_read_gdt_varnames(const char * fname,char *** vnames,int * nvars)4537 int gretl_read_gdt_varnames (const char *fname,
4538 			     char ***vnames,
4539 			     int *nvars)
4540 {
4541     int gdtb = has_suffix(fname, ".gdtb");
4542     int err = 0;
4543 
4544     if (gdtb && is_purebin_file(fname)) {
4545 	err = read_gbin_varnames(fname, vnames, nvars);
4546     } else if (gdtb) {
4547 	/* zipfile with gdt + binary */
4548 	gchar *zdir;
4549 	int err;
4550 
4551 	zdir = g_strdup_printf("%stmp-unzip", gretl_dotdir());
4552 	err = gretl_mkdir(zdir);
4553 
4554 	if (!err) {
4555 	    err = gretl_unzip_into(fname, zdir);
4556 	    if (err) {
4557 		gretl_errmsg_ensure("Problem opening data file");
4558 	    } else {
4559 		char xmlfile[FILENAME_MAX];
4560 
4561 		gretl_build_path(xmlfile, zdir, "data.xml", NULL);
4562 		err = real_read_gdt_varnames(xmlfile, vnames,
4563 					     nvars);
4564 	    }
4565 	    gretl_deltree(zdir);
4566 	}
4567 
4568 	g_free(zdir);
4569     } else {
4570 	/* plain XML file */
4571 	err = real_read_gdt_varnames(fname, vnames, nvars);
4572     }
4573 
4574     return err;
4575 }
4576 
4577 /**
4578  * gretl_get_gdt_description:
4579  * @fname: name of file to try.
4580  *
4581  * Read data description for gretl native data file.
4582  *
4583  * Returns: buffer containing description, or NULL on failure.
4584  */
4585 
gretl_get_gdt_description(const char * fname,int * err)4586 char *gretl_get_gdt_description (const char *fname, int *err)
4587 {
4588     xmlDocPtr doc;
4589     xmlNodePtr cur;
4590     int found = 0;
4591     xmlChar *buf = NULL;
4592 
4593     gretl_error_clear();
4594 
4595     if (has_suffix(fname, ".gdtb")) {
4596 	gretl_errmsg_set("Binary data file, cannot access description");
4597 	*err = E_DATA;
4598 	return NULL;
4599     }
4600 
4601     *err = gretl_xml_open_doc_root(fname, "gretldata", &doc, &cur);
4602     if (*err) {
4603 	return NULL;
4604     }
4605 
4606     cur = cur->xmlChildrenNode;
4607     while (cur != NULL && !found) {
4608         if (!xmlStrcmp(cur->name, (XUC) "description")) {
4609 	    buf = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
4610 	    found = 1;
4611         }
4612 	cur = cur->next;
4613     }
4614 
4615     if (!found) {
4616 	gretl_errmsg_set("No description was found");
4617 	*err = E_DATA;
4618     }
4619 
4620     xmlFreeDoc(doc);
4621 
4622     return (char *) buf;
4623 }
4624 
gretl_xml_get_doc_type(const char * fname,int * err)4625 static char *gretl_xml_get_doc_type (const char *fname, int *err)
4626 {
4627     xmlDocPtr doc;
4628     xmlNodePtr node;
4629     char *ret = NULL;
4630 
4631     doc = xmlParseFile(fname);
4632 
4633     if (doc == NULL) {
4634 	gretl_errmsg_sprintf(_("xmlParseFile failed on %s"), fname);
4635 	*err = E_DATA;
4636     } else {
4637 	node = xmlDocGetRootElement(doc);
4638 	if (node == NULL) {
4639 	    gretl_errmsg_sprintf(_("%s: empty document"), fname);
4640 	    *err = E_DATA;
4641 	} else {
4642 	    ret = gretl_strdup((char *) node->name);
4643 	    if (ret == NULL) {
4644 		*err = E_ALLOC;
4645 	    }
4646 	}
4647     }
4648 
4649     if (doc != NULL) {
4650 	xmlFreeDoc(doc);
4651     }
4652 
4653     return ret;
4654 }
4655 
4656 /* This is called in response to the "include" command in
4657    the CLI program, the GUI program, and in interact.c,
4658    if we detect that the named file is XML.
4659 */
4660 
load_XML_functions_file(const char * fname,gretlopt opt,PRN * prn)4661 int load_XML_functions_file (const char *fname, gretlopt opt, PRN *prn)
4662 {
4663     char *rootname = NULL;
4664     int err = 0;
4665 
4666     rootname = gretl_xml_get_doc_type(fname, &err);
4667     if (err) {
4668 	return err;
4669     }
4670 
4671     if (!strcmp(rootname, "gretl-functions")) {
4672 	if (has_suffix(fname, ".gfn")) {
4673 	    err = include_gfn(fname, opt, prn);
4674 	} else {
4675 	    err = read_session_functions_file(fname);
4676 	}
4677     } else {
4678 	err = E_DATA;
4679     }
4680 
4681     free(rootname);
4682 
4683     return err;
4684 }
4685 
gretl_xml_init(void)4686 void gretl_xml_init (void)
4687 {
4688     xmlInitParser();
4689 }
4690 
gretl_xml_cleanup(void)4691 void gretl_xml_cleanup (void)
4692 {
4693     xmlCleanupParser();
4694 }
4695