1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #define FULL_XML_HEADERS
21
22 #include "libgretl.h"
23 #include "gretl_xml.h"
24 #include "gretl_panel.h"
25 #include "gretl_func.h"
26 #include "gretl_typemap.h"
27 #include "gretl_string_table.h"
28 #include "dbread.h"
29 #include "swap_bytes.h"
30 #include "gretl_zip.h"
31
32 #ifdef HAVE_MPI
33 # include "gretl_mpi.h"
34 #endif
35
36 #ifdef WIN32
37 # include "gretl_win32.h"
38 # include <glib/gstdio.h>
39 #endif
40
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <unistd.h>
44 #include <errno.h>
45
46 #undef XML_DEBUG
47
48 #define GRETLDATA_VERSION "1.4"
49 #define GRETLDATA_COMPAT "1.3" /* status quo as of gretl 2018b */
50
51 #define GDT_DEBUG 0
52
gretl_xml_open_doc_root(const char * fname,const char * rootname,xmlDocPtr * pdoc,xmlNodePtr * pnode)53 int gretl_xml_open_doc_root (const char *fname,
54 const char *rootname,
55 xmlDocPtr *pdoc,
56 xmlNodePtr *pnode)
57 {
58 xmlDocPtr doc;
59 xmlNodePtr node = NULL;
60 int err = 0;
61
62 LIBXML_TEST_VERSION;
63 xmlKeepBlanksDefault(0);
64
65 *pdoc = NULL;
66 if (pnode != NULL) {
67 *pnode = NULL;
68 }
69
70 doc = xmlParseFile(fname);
71 if (doc == NULL) {
72 gretl_errmsg_sprintf(_("xmlParseFile failed on %s"), fname);
73 err = 1;
74 }
75
76 if (!err && pnode != NULL) {
77 node = xmlDocGetRootElement(doc);
78 if (node == NULL) {
79 gretl_errmsg_sprintf(_("%s: empty document"), fname);
80 xmlFreeDoc(doc);
81 err = 1;
82 }
83 }
84
85 if (!err && node != NULL && rootname != NULL) {
86 if (xmlStrcmp(node->name, (XUC) rootname)) {
87 gretl_errmsg_sprintf(_("File of the wrong type, root node not %s"),
88 rootname);
89 fprintf(stderr, "Unexpected root node '%s'\n", (char *) node->name);
90 xmlFreeDoc(doc);
91 err = 1;
92 }
93 }
94
95 if (!err) {
96 *pdoc = doc;
97 if (pnode != NULL) {
98 *pnode = node;
99 }
100 }
101
102 return err;
103 }
104
compact_method_to_string(int method)105 static char *compact_method_to_string (int method)
106 {
107 if (method == COMPACT_SUM) return "COMPACT_SUM";
108 else if (method == COMPACT_AVG) return "COMPACT_AVG";
109 else if (method == COMPACT_SOP) return "COMPACT_SOP";
110 else if (method == COMPACT_EOP) return "COMPACT_EOP";
111 else return "COMPACT_NONE";
112 }
113
compact_string_to_int(const char * str)114 static int compact_string_to_int (const char *str)
115 {
116 if (!strcmp(str, "COMPACT_SUM")) return COMPACT_SUM;
117 else if (!strcmp(str, "COMPACT_AVG")) return COMPACT_AVG;
118 else if (!strcmp(str, "COMPACT_SOP")) return COMPACT_SOP;
119 else if (!strcmp(str, "COMPACT_EOP")) return COMPACT_EOP;
120 else return COMPACT_NONE;
121 }
122
123 /* given a full filename in @src, write to @dest a "simple"
124 counterpart without leading path or extension
125 */
126
simple_fname(char * dest,const char * src)127 static char *simple_fname (char *dest, const char *src)
128 {
129 char *p;
130 const char *s;
131
132 s = strrslash(src);
133
134 /* take last part of src filename */
135 if (s != NULL) {
136 strcpy(dest, s + 1);
137 } else {
138 strcpy(dest, src);
139 }
140
141 /* trash any extension */
142 p = strrchr(dest, '.');
143 if (p != NULL && strlen(dest) > 3) {
144 *p = '\0';
145 }
146
147 return dest;
148 }
149
data_structure_string(int s)150 static const char *data_structure_string (int s)
151 {
152 switch (s) {
153 case TIME_SERIES:
154 case SPECIAL_TIME_SERIES:
155 return "time-series";
156 case STACKED_TIME_SERIES:
157 return "stacked-time-series";
158 case STACKED_CROSS_SECTION:
159 return "stacked-cross-section";
160 default:
161 return "cross-section";
162 }
163 }
164
savenum(const int * list,int i)165 static int savenum (const int *list, int i)
166 {
167 if (list != NULL) {
168 return list[i];
169 } else {
170 return i;
171 }
172 }
173
174 /**
175 * gretl_xml_put_int:
176 * @tag: name to give value.
177 * @i: value to put (as attribute)
178 * @fp: file to which to write.
179 *
180 * Writes to @fp a string of the form "\%s=\%d".
181 */
182
gretl_xml_put_int(const char * tag,int i,PRN * prn)183 void gretl_xml_put_int (const char *tag, int i, PRN *prn)
184 {
185 pprintf(prn, "%s=\"%d\" ", tag, i);
186 }
187
188 /**
189 * gretl_xml_put_unsigned:
190 * @tag: name to give value.
191 * @i: value to put (as attribute)
192 * @fp: file to which to write.
193 *
194 * Writes to @fp a string of the form "\%s=\%u".
195 */
196
gretl_xml_put_unsigned(const char * tag,unsigned int u,PRN * prn)197 void gretl_xml_put_unsigned (const char *tag, unsigned int u, PRN *prn)
198 {
199 pprintf(prn, "%s=\"%u\" ", tag, u);
200 }
201
202 /**
203 * gretl_xml_put_double:
204 * @tag: name to give value.
205 * @x: value to put (as attribute)
206 * @fp: file to which to write.
207 *
208 * Writes to @fp a string of the form "\%s=\%.17g" if the value of
209 * @x is valid, otherwise "\%s=NA".
210 */
211
gretl_xml_put_double(const char * tag,double x,PRN * prn)212 void gretl_xml_put_double (const char *tag, double x, PRN *prn)
213 {
214 if (na(x)) {
215 pprintf(prn, "%s=\"NA\" ", tag);
216 } else {
217 pprintf(prn, "%s=\"%.17g\" ", tag, x);
218 }
219 }
220
221 /**
222 * gretl_xml_put_double_array:
223 * @tag: name to give array.
224 * @x: values to put.
225 * @n: number of values in @x.
226 * @fp: file to which to write.
227 *
228 */
229
gretl_xml_put_double_array(const char * tag,double * x,int n,PRN * prn)230 void gretl_xml_put_double_array (const char *tag, double *x, int n,
231 PRN *prn)
232 {
233 int i;
234
235 pprintf(prn, "<%s count=\"%d\">\n", tag, n);
236 for (i=0; i<n; i++) {
237 if (na(x[i])) {
238 pputs(prn, "NA ");
239 } else {
240 pprintf(prn, "%.17g ", x[i]);
241 }
242 }
243 pprintf(prn, "</%s>\n", tag);
244 }
245
246 /**
247 * gretl_xml_put_string:
248 * @str: string to put.
249 * @fp: file to which to write.
250 *
251 * Write @str to @fp. If @str needs to have XML-special
252 * characters escaped, this will be done automatically.
253 * If @str is NULL, this is considered a no-op.
254 *
255 * Returns: 0 on success, non-zero error code on failure.
256 */
257
gretl_xml_put_string(const char * str,PRN * prn)258 int gretl_xml_put_string (const char *str, PRN *prn)
259 {
260 int err = 0;
261
262 if (str == NULL) {
263 return 0;
264 }
265
266 if (gretl_xml_validate(str)) {
267 pputs(prn, str);
268 } else {
269 char *xstr = gretl_xml_encode(str);
270
271 if (xstr != NULL) {
272 pputs(prn, xstr);
273 free(xstr);
274 } else {
275 err = E_ALLOC;
276 }
277 }
278
279 return err;
280 }
281
282 /**
283 * gretl_xml_put_strings_array:
284 * @tag: name to give array.
285 * @strs: array of strings to put.
286 * @n: number of strings in @strs.
287 * @fp: file to which to write.
288 *
289 */
290
gretl_xml_put_strings_array(const char * tag,const char ** strs,int n,PRN * prn)291 void gretl_xml_put_strings_array (const char *tag, const char **strs,
292 int n, PRN *prn)
293 {
294 int i;
295
296 if (n < 0) {
297 /* assume NULL termination */
298 const char **S = strs;
299
300 n = 0;
301 while (*S != NULL) {
302 n++;
303 S++;
304 }
305 }
306
307 pprintf(prn, "<%s count=\"%d\">\n", tag, n);
308 for (i=0; i<n; i++) {
309 gretl_xml_put_string(strs[i], prn);
310 pputs(prn, " ");
311 }
312 pprintf(prn, "</%s>\n", tag);
313 }
314
315 /**
316 * gretl_xml_put_strings_array_quoted:
317 * @tag: name to give array.
318 * @strs: array of strings to put.
319 * @n: number of strings in @strs.
320 * @fp: file to which to write.
321 *
322 */
323
gretl_xml_put_strings_array_quoted(const char * tag,const char ** strs,int n,PRN * prn)324 void gretl_xml_put_strings_array_quoted (const char *tag,
325 const char **strs, int n,
326 PRN *prn)
327 {
328 int i;
329
330 pprintf(prn, "<%s count=\"%d\">\n", tag, n);
331 for (i=0; i<n; i++) {
332 pputs(prn, "\"");
333 gretl_xml_put_string(strs[i], prn);
334 pputs(prn, "\" ");
335 }
336 pprintf(prn, "</%s>\n", tag);
337 }
338
339 /**
340 * gretl_xml_put_tagged_string:
341 * @tag: name to give string.
342 * @str: string to put.
343 * @fp: file to which to write.
344 *
345 * Write @str to @fp, enclosed in simple starting and ending
346 * tags specified by @tag. If @str needs to have XML-special
347 * characters escaped, this will be done automatically.
348 * If @str is NULL, this is considered a no-op.
349 *
350 * Returns: 0 on success, non-zero error code on failure.
351 */
352
gretl_xml_put_tagged_string(const char * tag,const char * str,PRN * prn)353 void gretl_xml_put_tagged_string (const char *tag, const char *str,
354 PRN *prn)
355 {
356 pprintf(prn, "<%s>", tag);
357 gretl_xml_put_string(str, prn);
358 pprintf(prn, "</%s>\n", tag);
359 }
360
361 /**
362 * gretl_xml_put_tagged_string_plus:
363 * @tag: name to give string.
364 * @str: string to put.
365 * @attrib: name of attribute.
366 * @attval: value of attribute.
367 * @fp: file to which to write.
368 *
369 * Similar to gretl_xml_put_tagged_string(), but allows
370 * (in fact, requires) an attribute name and value along
371 * with the element name and value.
372 *
373 * Returns: 0 on success, non-zero error code on failure.
374 */
375
gretl_xml_put_tagged_string_plus(const char * tag,const char * str,const char * attrib,const char * attval,PRN * prn)376 void gretl_xml_put_tagged_string_plus (const char *tag,
377 const char *str,
378 const char *attrib,
379 const char *attval,
380 PRN *prn)
381 {
382 pprintf(prn, "<%s %s=\"", tag, attrib);
383 gretl_xml_put_string(attval, prn);
384 pputs(prn, "\">");
385 gretl_xml_put_string(str, prn);
386 pprintf(prn, "</%s>\n", tag);
387 }
388
389 /**
390 * gretl_list_serialize:
391 * @list: list of integers to be written.
392 * @name: name to give list, or NULL.
393 * @fp: file to which to write.
394 *
395 */
396
gretl_list_serialize(const int * list,const char * name,PRN * prn)397 void gretl_list_serialize (const int *list, const char *name,
398 PRN *prn)
399 {
400 int i;
401
402 if (list == NULL) {
403 return;
404 }
405
406 if (name == NULL) {
407 pputs(prn, "<list>\n");
408 } else {
409 pprintf(prn, "<list name=\"%s\">\n", name);
410 }
411
412 for (i=0; i<=list[0]; i++) {
413 pprintf(prn, "%d ", list[i]);
414 }
415 pputs(prn, "</list>\n");
416 }
417
418 /**
419 * gretl_xml_put_tagged_list:
420 * @tag: tag in which list should be wrapped.
421 * @list: list of integers to be written.
422 * @fp: file to which to write.
423 *
424 */
425
gretl_xml_put_tagged_list(const char * tag,const int * list,PRN * prn)426 void gretl_xml_put_tagged_list (const char *tag, const int *list,
427 PRN *prn)
428 {
429 int i;
430
431 if (list == NULL) {
432 return;
433 }
434
435 pprintf(prn, "<%s>\n", tag);
436 for (i=0; i<=list[0]; i++) {
437 if (list[i] == LISTSEP) {
438 pputs(prn, "; ");
439 } else {
440 pprintf(prn, "%d ", list[i]);
441 }
442 }
443 pprintf(prn, "</%s>\n", tag);
444 }
445
446 /**
447 * gretl_matrix_serialize:
448 * @m: matrix to be written.
449 * @name: name for matrix, or NULL.
450 * @fp: stream to which to print.
451 *
452 */
453
gretl_matrix_serialize(const gretl_matrix * m,const char * name,PRN * prn)454 void gretl_matrix_serialize (const gretl_matrix *m,
455 const char *name,
456 PRN *prn)
457 {
458 int is_complex = 0;
459 const char **S;
460 double x;
461 int i, j;
462
463 if (m == NULL) {
464 return;
465 }
466
467 if (name == NULL) {
468 pprintf(prn, "<gretl-matrix rows=\"%d\" cols=\"%d\"\n",
469 m->rows, m->cols);
470 } else {
471 pprintf(prn, "<gretl-matrix name=\"%s\" rows=\"%d\" cols=\"%d\"",
472 name, m->rows, m->cols);
473 }
474
475 if (m->is_complex) {
476 is_complex = 1;
477 pputs(prn, " complex=\"1\"");
478 }
479
480 if (gretl_matrix_is_dated(m)) {
481 int mt1 = gretl_matrix_get_t1(m);
482 int mt2 = gretl_matrix_get_t2(m);
483
484 pprintf(prn, " t1=\"%d\" t2=\"%d\"", mt1, mt2);
485 }
486
487 S = gretl_matrix_get_colnames(m);
488
489 if (S != NULL) {
490 pputs(prn, " colnames=\"");
491 for (j=0; j<m->cols; j++) {
492 pputs(prn, S[j]);
493 pputs(prn, (j < m->cols - 1)? " " : "\"");
494 }
495 }
496
497 S = gretl_matrix_get_rownames(m);
498
499 if (S != NULL) {
500 pputs(prn, " rownames=\"");
501 for (j=0; j<m->rows; j++) {
502 pputs(prn, S[j]);
503 pputs(prn, (j < m->rows - 1)? " " : "\"");
504 }
505 }
506
507 pputs(prn, ">\n");
508
509 if (is_complex) {
510 gretl_matrix_set_complex_full((gretl_matrix *) m, 0);
511 }
512
513 for (i=0; i<m->rows; i++) {
514 for (j=0; j<m->cols; j++) {
515 x = gretl_matrix_get(m, i, j);
516 #ifdef WIN32
517 if (na(x)) {
518 win32_pprint_nonfinite(prn, x, ' ');
519 continue;
520 }
521 #endif
522 pprintf(prn, "%.17g ", x);
523 }
524 pputs(prn, "\n");
525 }
526
527 if (is_complex) {
528 gretl_matrix_set_complex_full((gretl_matrix *) m, 1);
529 }
530
531 pputs(prn, "</gretl-matrix>\n");
532 }
533
534 /**
535 * gretl_finite_scalar_serialize:
536 * @x: numerical value.
537 * @name: name of object.
538 * @fp: stream for writing.
539 *
540 * Note: not suitable for writing non-finite values.
541 */
542
gretl_finite_scalar_serialize(double x,const char * name,PRN * prn)543 void gretl_finite_scalar_serialize (double x, const char *name, PRN *prn)
544 {
545 pprintf(prn, "<scalar name=\"%s\" value=\"%.17g\"/>\n", name, x);
546 }
547
548 /**
549 * gretl_string_serialize:
550 * @s: UTF-8 string.
551 * @name: name of object.
552 * @fp: stream for writing.
553 *
554 */
555
gretl_string_serialize(const char * s,const char * name,PRN * prn)556 void gretl_string_serialize (const char *s, const char *name, PRN *prn)
557 {
558 pprintf(prn, "<string name=\"%s\" value=\"%s\"/>\n", name, s);
559 }
560
561 /**
562 * gretl_xml_get_prop_as_int:
563 * @node: XML node pointer.
564 * @tag: name by which integer property is known.
565 * @i: location to write int value.
566 *
567 * Returns: 1 if an int is found and read successfully, 0
568 * otherwise.
569 */
570
gretl_xml_get_prop_as_int(xmlNodePtr node,const char * tag,int * i)571 int gretl_xml_get_prop_as_int (xmlNodePtr node, const char *tag,
572 int *i)
573 {
574 xmlChar *tmp = xmlGetProp(node, (XUC) tag);
575 int ret = 0;
576
577 if (tmp != NULL) {
578 ret = sscanf((const char *) tmp, "%d", i);
579 free(tmp);
580 }
581
582 return ret;
583 }
584
585 /**
586 * gretl_xml_get_prop_as_opt:
587 * @node: XML node pointer.
588 * @tag: name by which integer property is known.
589 * @o: location to write gretlopt value.
590 *
591 * Returns: 1 if an int is found and read successfully, 0
592 * otherwise.
593 */
594
gretl_xml_get_prop_as_opt(xmlNodePtr node,const char * tag,gretlopt * o)595 int gretl_xml_get_prop_as_opt (xmlNodePtr node, const char *tag,
596 gretlopt *o)
597 {
598 xmlChar *tmp = xmlGetProp(node, (XUC) tag);
599 int ret = 0;
600
601 if (tmp != NULL) {
602 int i;
603
604 ret = sscanf((const char *) tmp, "%d", &i);
605 *o = (gretlopt) i;
606 free(tmp);
607 }
608
609 return ret;
610 }
611
612 /**
613 * gretl_xml_get_prop_as_unsigned_int:
614 * @node: XML node pointer.
615 * @tag: name by which unsigned integer property is known.
616 * @u: location to write value.
617 *
618 * Returns: 1 if an unsigned int is found and read successfully, 0
619 * otherwise.
620 */
621
622 unsigned int
gretl_xml_get_prop_as_unsigned_int(xmlNodePtr node,const char * tag,unsigned int * u)623 gretl_xml_get_prop_as_unsigned_int (xmlNodePtr node,
624 const char *tag,
625 unsigned int *u)
626 {
627 xmlChar *tmp = xmlGetProp(node, (XUC) tag);
628 int ret = 0;
629
630 if (tmp != NULL) {
631 ret = sscanf((const char *) tmp, "%u", u);
632 free(tmp);
633 }
634
635 return ret;
636 }
637
638 /**
639 * gretl_xml_get_prop_as_uchar:
640 * @node: XML node pointer.
641 * @tag: name by which unsigned character property is known.
642 * @u: location to write value.
643 *
644 * Returns: 1 if an unsigned char is found and read successfully, 0
645 * otherwise.
646 */
647
gretl_xml_get_prop_as_uchar(xmlNodePtr node,const char * tag,unsigned char * u)648 int gretl_xml_get_prop_as_uchar (xmlNodePtr node, const char *tag,
649 unsigned char *u)
650 {
651 xmlChar *tmp = xmlGetProp(node, (XUC) tag);
652 int ret = 0;
653
654 if (tmp != NULL) {
655 *u = (unsigned char) atoi((const char *) tmp);
656 free(tmp);
657 ret = 1;
658 }
659
660 return ret;
661 }
662
663 /**
664 * gretl_xml_get_prop_as_double:
665 * @node: XML node pointer.
666 * @tag: name by which floating-point property is known.
667 * @x: location to write double value.
668 *
669 * Returns: 1 if a double is found and read successfully, 0
670 * otherwise.
671 */
672
gretl_xml_get_prop_as_double(xmlNodePtr node,const char * tag,double * x)673 int gretl_xml_get_prop_as_double (xmlNodePtr node, const char *tag,
674 double *x)
675 {
676 char *p, *s = (char *) xmlGetProp(node, (XUC) tag);
677 int ret = 0;
678
679 *x = NADBL;
680
681 if (s != NULL) {
682 p = s;
683 p += strspn(p, " \r\n");
684 if (strncmp(p, "NA", 2)) {
685 *x = atof(p);
686 }
687 free(s);
688 ret = 1;
689 }
690
691 return ret;
692 }
693
694 /**
695 * gretl_xml_get_prop_as_string:
696 * @node: XML node pointer.
697 * @tag: name by which string property is known.
698 * @pstr: location to assign string.
699 *
700 * Returns: 1 if a string is found and read successfully, 0
701 * otherwise.
702 */
703
gretl_xml_get_prop_as_string(xmlNodePtr node,const char * tag,char ** pstr)704 int gretl_xml_get_prop_as_string (xmlNodePtr node, const char *tag,
705 char **pstr)
706 {
707 xmlChar *tmp = xmlGetProp(node, (XUC) tag);
708 int ret = 0;
709
710 if (tmp != NULL) {
711 *pstr = (char *) tmp;
712 ret = 1;
713 }
714
715 return ret;
716 }
717
718 /**
719 * gretl_xml_get_type_property:
720 * @node: XML node pointer.
721 *
722 * Returns: the gretl type identifier associated with the
723 * property tagged "type" on @node, or 0 on failure.
724 */
725
gretl_xml_get_type_property(xmlNodePtr node)726 GretlType gretl_xml_get_type_property (xmlNodePtr node)
727 {
728 char *tmp = (char *) xmlGetProp(node, (XUC) "type");
729 GretlType type = 0;
730
731 if (tmp != NULL) {
732 if (!strncmp(tmp, "gretl-", 6)) {
733 /* allow, e.g. "gretl-matrix" for "matrix" */
734 type = gretl_type_from_string(tmp + 6);
735 } else {
736 type = gretl_type_from_string(tmp);
737 }
738 free(tmp);
739 }
740
741 return type;
742 }
743
744 /**
745 * gretl_xml_get_prop_as_bool:
746 * @node: XML node pointer.
747 * @tag: name by which property is known.
748 *
749 * Returns: 1 if the named property is found and has value %true,
750 * else 0.
751 */
752
gretl_xml_get_prop_as_bool(xmlNodePtr node,const char * tag)753 int gretl_xml_get_prop_as_bool (xmlNodePtr node, const char *tag)
754 {
755 xmlChar *tmp = xmlGetProp(node, (XUC) tag);
756 int ret = 0;
757
758 if (tmp != NULL) {
759 if (!strcmp((char *) tmp, "true") ||
760 !strcmp((char *) tmp, "1")) {
761 ret = 1;
762 }
763 free(tmp);
764 }
765
766 return ret;
767 }
768
769 /**
770 * gretl_xml_node_get_int:
771 * @node: XML node pointer.
772 * @doc: XML document pointer.
773 * @i: location to receive integer.
774 *
775 * Returns: 1 if an int is found and read successfully, 0
776 * otherwise.
777 */
778
gretl_xml_node_get_int(xmlNodePtr node,xmlDocPtr doc,int * i)779 int gretl_xml_node_get_int (xmlNodePtr node, xmlDocPtr doc, int *i)
780 {
781 xmlChar *tmp;
782 int ret = 0;
783
784 tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
785
786 if (tmp != NULL) {
787 *i = atoi((const char *) tmp);
788 free(tmp);
789 ret = 1;
790 }
791
792 return ret;
793 }
794
795 /**
796 * gretl_xml_node_get_unsigned:
797 * @node: XML node pointer.
798 * @doc: XML document pointer.
799 * @i: location to receive integer.
800 *
801 * Returns: 1 if an unsigned int is found and read successfully, 0
802 * otherwise.
803 */
804
gretl_xml_node_get_unsigned(xmlNodePtr node,xmlDocPtr doc,unsigned int * u)805 int gretl_xml_node_get_unsigned (xmlNodePtr node, xmlDocPtr doc,
806 unsigned int *u)
807 {
808 xmlChar *tmp;
809 int ret = 0;
810
811 tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
812
813 if (tmp != NULL) {
814 int n = sscanf((const char *) tmp, "%u", u);
815
816 free(tmp);
817 ret = (n == 1);
818 }
819
820 return ret;
821 }
822
823 /**
824 * gretl_xml_node_get_double:
825 * @node: XML node pointer.
826 * @doc: XML document pointer.
827 * @x: location to receive double.
828 *
829 * Returns: 1 if a double is found and read successfully, 0
830 * otherwise.
831 */
832
gretl_xml_node_get_double(xmlNodePtr node,xmlDocPtr doc,double * x)833 int gretl_xml_node_get_double (xmlNodePtr node, xmlDocPtr doc,
834 double *x)
835 {
836 char *s, *p;
837 int ret = 0;
838
839 s = (char *) xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
840
841 if (s != NULL) {
842 p = s;
843 p += strspn(p, " \r\n");
844 if (!strncmp(p, "NA", 2)) {
845 *x = NADBL;
846 } else {
847 *x = atof(p);
848 }
849 free(s);
850 ret = 1;
851 }
852
853 return ret;
854 }
855
856 /**
857 * gretl_xml_node_get_string:
858 * @node: XML node pointer.
859 * @doc: XML document pointer.
860 * @pstr: location to receive string.
861 *
862 * Returns: 1 if a string is found and read successfully, 0
863 * otherwise.
864 */
865
gretl_xml_node_get_string(xmlNodePtr node,xmlDocPtr doc,char ** pstr)866 int gretl_xml_node_get_string (xmlNodePtr node, xmlDocPtr doc,
867 char **pstr)
868 {
869 xmlChar *tmp;
870 int ret = 0;
871
872 tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
873
874 if (tmp != NULL) {
875 *pstr = (char *) tmp;
876 ret = 1;
877 }
878
879 return ret;
880 }
881
882 /**
883 * gretl_xml_get_string:
884 * @node: XML node pointer.
885 * @doc: XML document pointer.
886 *
887 * Returns: allocated copy of the string content of @node, or
888 * NULL on error.
889 */
890
gretl_xml_get_string(xmlNodePtr node,xmlDocPtr doc)891 char *gretl_xml_get_string (xmlNodePtr node, xmlDocPtr doc)
892 {
893 return (char *) xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
894 }
895
896 /**
897 * gretl_xml_node_get_trimmed_string:
898 * @node: XML node pointer.
899 * @doc: XML document pointer.
900 * @pstr: location to receive string.
901 *
902 * Reads a string from @node and trims both leading and trailing
903 * white space.
904 *
905 * Returns: 1 if a string is found and read successfully, 0
906 * otherwise.
907 */
908
gretl_xml_node_get_trimmed_string(xmlNodePtr node,xmlDocPtr doc,char ** pstr)909 int gretl_xml_node_get_trimmed_string (xmlNodePtr node, xmlDocPtr doc,
910 char **pstr)
911 {
912 char *tmp;
913 char *s;
914 int i, len, ret = 0;
915
916 tmp = (char *) xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
917
918 if (tmp != NULL) {
919 s = tmp;
920 s += strspn(s, " \t\n\r");
921 len = strlen(s);
922 for (i=len-1; i>=0; i--) {
923 if (s[i] == ' ' || s[i] == '\t' ||
924 s[i] == '\r' || s[i] == '\n') {
925 len--;
926 } else {
927 break;
928 }
929 }
930 *pstr = gretl_strndup(s, len);
931 if (*pstr != NULL) {
932 ret = 1;
933 }
934 free(tmp);
935 } else {
936 /* empty string */
937 *pstr = gretl_strdup("");
938 if (*pstr != NULL) {
939 ret = 1;
940 }
941 }
942
943 return ret;
944 }
945
946 /**
947 * gretl_xml_get_list:
948 * @node: XML node pointer.
949 * @doc: XML document pointer.
950 * @err: location to receive error code.
951 *
952 * Returns: allocated list read from @node, or %NULL on
953 * failure.
954 */
955
gretl_xml_get_list(xmlNodePtr node,xmlDocPtr doc,int * err)956 int *gretl_xml_get_list (xmlNodePtr node, xmlDocPtr doc, int *err)
957 {
958 xmlChar *tmp;
959 const char *p;
960 int *list = NULL;
961 int i, n;
962
963 tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
964
965 if (tmp == NULL) {
966 *err = E_DATA;
967 } else {
968 p = (const char *) tmp;
969 p += strspn(p, " \r\n"); /* skip space (get to first value) */
970 if (sscanf(p, "%d", &n) != 1) {
971 *err = E_DATA;
972 } else if (n == 0) {
973 list = gretl_null_list();
974 free(tmp);
975 return list;
976 } else if (n < 0) {
977 *err = E_DATA;
978 } else {
979 p += strcspn(p, " \r\n"); /* skip non-space (get beyond value) */
980 list = gretl_list_new(n);
981 if (list == NULL) {
982 *err = E_ALLOC;
983 }
984 }
985
986 if (list != NULL && !*err) {
987 for (i=1; i<=n && !*err; i++) {
988 p += strspn(p, " \r\n"); /* skip space (get to next value) */
989 if (*p == ';') {
990 list[i] = LISTSEP;
991 } else if (sscanf(p, "%d", &list[i]) != 1) {
992 *err = E_DATA;
993 }
994 p += strcspn(p, " \r\n"); /* skip non-space (get beyond value) */
995 }
996 }
997
998 free(tmp);
999 }
1000
1001 if (list != NULL && *err) {
1002 free(list);
1003 list = NULL;
1004 }
1005
1006 return list;
1007 }
1008
1009 /**
1010 * gretl_xml_child_get_string:
1011 * @node: XML node pointer.
1012 * @doc: XML document pointer.
1013 * @name: name of child node.
1014 * @pstr: location to receive string.
1015 *
1016 * Returns: 1 if a string is found and read successfully, 0
1017 * otherwise.
1018 */
1019
gretl_xml_child_get_string(xmlNodePtr node,xmlDocPtr doc,const char * name,char ** pstr)1020 int gretl_xml_child_get_string (xmlNodePtr node, xmlDocPtr doc,
1021 const char *name, char **pstr)
1022 {
1023 xmlNodePtr cur;
1024 xmlChar *tmp;
1025 int ret = 0;
1026
1027 *pstr = NULL;
1028
1029 cur = node->xmlChildrenNode;
1030
1031 while (cur != NULL) {
1032 if (!xmlStrcmp(cur->name, (XUC) name)) {
1033 tmp = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
1034 if (tmp != NULL) {
1035 *pstr = (char *) tmp;
1036 ret = 1;
1037 }
1038 break;
1039 }
1040 cur = cur->next;
1041 }
1042
1043 return ret;
1044 }
1045
1046
1047 #define SMALLVAL(x) (x > -1e-40 && x < 1e-40)
1048
gretl_xml_get_array(xmlNodePtr node,xmlDocPtr doc,GretlType type,int * nelem,int * err)1049 static void *gretl_xml_get_array (xmlNodePtr node, xmlDocPtr doc,
1050 GretlType type,
1051 int *nelem, int *err)
1052 {
1053 xmlChar *tmp = xmlGetProp(node, (XUC) "count");
1054 int *ivals = NULL;
1055 double *xvals = NULL;
1056 cmplx *cvals = NULL;
1057 void *ptr = NULL;
1058 int nread = 0;
1059 int i, n = 0;
1060
1061 *nelem = 0;
1062
1063 if (tmp == NULL) {
1064 tmp = xmlGetProp(node, (XUC) "size");
1065 }
1066
1067 if (tmp == NULL) {
1068 fprintf(stderr, "gretl_xml_get_array: didn't find count\n");
1069 *err = E_DATA;
1070 return NULL;
1071 }
1072
1073 n = atoi((const char *) tmp);
1074 free(tmp);
1075
1076 if (n <= 0) {
1077 return NULL;
1078 }
1079
1080 if (type == GRETL_TYPE_INT_ARRAY) {
1081 ivals = malloc(n * sizeof *ivals);
1082 ptr = ivals;
1083 } else if (type == GRETL_TYPE_DOUBLE_ARRAY) {
1084 xvals = malloc(n * sizeof *xvals);
1085 ptr = xvals;
1086 } else if (type == GRETL_TYPE_CMPLX_ARRAY) {
1087 cvals = malloc(n * sizeof *cvals);
1088 ptr = cvals;
1089 }
1090
1091 if (ptr == NULL) {
1092 *err = E_ALLOC;
1093 return NULL;
1094 }
1095
1096 tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
1097
1098 if (tmp == NULL) {
1099 *err = E_DATA;
1100 } else {
1101 const char *s = (const char *) tmp;
1102 char *test;
1103
1104 errno = 0;
1105
1106 if (type == GRETL_TYPE_DOUBLE_ARRAY) {
1107 double x;
1108
1109 for (i=0; i<n && !*err && *s; i++) {
1110 while (isspace(*s)) s++;
1111 x = strtod(s, &test);
1112 #ifdef WIN32
1113 /* remedial code for "1.#QNAN" */
1114 if (!strncmp(test, "#QNAN", 5)) {
1115 x = NADBL;
1116 s = test + 5;
1117 } else
1118 #endif
1119 if (!strncmp(test, "NA", 2)) {
1120 x = NADBL;
1121 s = test + 2;
1122 } else {
1123 s = test;
1124 if (*s != '\0' && !isspace(*s)) {
1125 *err = E_DATA;
1126 } else if (errno) {
1127 perror(NULL);
1128 if (!SMALLVAL(x)) {
1129 x = NADBL;
1130 }
1131 errno = 0;
1132 }
1133 }
1134 xvals[i] = x;
1135 nread++;
1136 }
1137 } else if (type == GRETL_TYPE_INT_ARRAY) {
1138 long kl;
1139
1140 for (i=0; i<n && !*err && *s; i++) {
1141 while (isspace(*s)) s++;
1142 kl = strtol(s, &test, 10);
1143 if (errno) {
1144 *err = E_DATA;
1145 } else if (*test != '\0' && !isspace(*test)) {
1146 *err = E_DATA;
1147 } else {
1148 s = test;
1149 ivals[i] = kl;
1150 nread++;
1151 }
1152 }
1153 } else if (type == GRETL_TYPE_CMPLX_ARRAY) {
1154 double x;
1155 int n2 = n * 2;
1156 int rval = 1;
1157
1158 for (i=0; i<n2 && !*err && *s; i++) {
1159 while (isspace(*s)) s++;
1160 x = strtod(s, &test);
1161 if (!strncmp(test, "NA", 2)) {
1162 x = NADBL;
1163 test += 2;
1164 } else if (errno) {
1165 if (SMALLVAL(x)) {
1166 errno = 0;
1167 } else {
1168 perror(NULL);
1169 *err = E_DATA;
1170 }
1171 } else if (*test != '\0' && !isspace(*test)) {
1172 *err = E_DATA;
1173 }
1174 if (!*err) {
1175 s = test;
1176 if (rval) {
1177 cvals[nread].r = x;
1178 rval = 0;
1179 } else {
1180 cvals[nread].i = x;
1181 rval = 1;
1182 nread++;
1183 }
1184 }
1185 }
1186 }
1187
1188 free(tmp);
1189
1190 if (nread < n) {
1191 fprintf(stderr, "expected %d items in array, but got %d\n", n, nread);
1192 *err = E_DATA;
1193 }
1194 }
1195
1196 if (ptr != NULL && *err) {
1197 free(ptr);
1198 ptr = NULL;
1199 }
1200
1201 if (!*err) {
1202 *nelem = n;
1203 }
1204
1205 return ptr;
1206 }
1207
1208 /**
1209 * gretl_xml_get_int_array:
1210 * @node: XML node pointer.
1211 * @doc: XML document pointer.
1212 * @nelem: location to receive number of elements in array.
1213 * @err: location to receive error code.
1214 *
1215 * Returns: allocated array of integers read from @node, or %NULL on
1216 * failure.
1217 */
1218
gretl_xml_get_int_array(xmlNodePtr node,xmlDocPtr doc,int * nelem,int * err)1219 int *gretl_xml_get_int_array (xmlNodePtr node, xmlDocPtr doc,
1220 int *nelem, int *err)
1221 {
1222 return gretl_xml_get_array(node, doc, GRETL_TYPE_INT_ARRAY,
1223 nelem, err);
1224 }
1225
1226 /**
1227 * gretl_xml_get_double_array:
1228 * @node: XML node pointer.
1229 * @doc: XML document pointer.
1230 * @nelem: location to receive number of elements in array.
1231 * @err: location to receive error code.
1232 *
1233 * Returns: allocated array of doubles read from @node, or %NULL on
1234 * failure.
1235 */
1236
gretl_xml_get_double_array(xmlNodePtr node,xmlDocPtr doc,int * nelem,int * err)1237 double *gretl_xml_get_double_array (xmlNodePtr node, xmlDocPtr doc,
1238 int *nelem, int *err)
1239 {
1240 int myerr = 0;
1241
1242 if (err == NULL) {
1243 err = &myerr;
1244 }
1245
1246 return gretl_xml_get_array(node, doc, GRETL_TYPE_DOUBLE_ARRAY,
1247 nelem, err);
1248 }
1249
1250 /**
1251 * gretl_xml_get_cmplx_array:
1252 * @node: XML node pointer.
1253 * @doc: XML document pointer.
1254 * @nelem: location to receive number of elements in array.
1255 * @err: location to receive error code.
1256 *
1257 * Returns: allocated array of cmplx (complex numbers) read from
1258 * @node, or %NULL on failure.
1259 */
1260
gretl_xml_get_cmplx_array(xmlNodePtr node,xmlDocPtr doc,int * nelem,int * err)1261 cmplx *gretl_xml_get_cmplx_array (xmlNodePtr node, xmlDocPtr doc,
1262 int *nelem, int *err)
1263 {
1264 return gretl_xml_get_array(node, doc, GRETL_TYPE_CMPLX_ARRAY,
1265 nelem, err);
1266 }
1267
chunk_strdup(const char * src,const char ** ptr,int * err)1268 static char *chunk_strdup (const char *src, const char **ptr, int *err)
1269 {
1270 char *targ = NULL;
1271
1272 if (*src == '\0') {
1273 *ptr = src;
1274 } else {
1275 const char *p;
1276 int len = 0;
1277
1278 src += strspn(src, " \n");
1279 p = src;
1280
1281 if (*src == '"') {
1282 p = ++src;
1283 while (*src && *src != '"') {
1284 len++;
1285 src++;
1286 }
1287 if (*src == '"') {
1288 src++;
1289 }
1290 } else {
1291 while (*src && !isspace(*src)) {
1292 len++;
1293 src++;
1294 }
1295 }
1296
1297 if (ptr != NULL) {
1298 *ptr = src;
1299 }
1300
1301 if (len > 0) {
1302 targ = gretl_strndup(p, len);
1303 if (targ == NULL) {
1304 *err = E_ALLOC;
1305 }
1306 }
1307 }
1308
1309 if (targ == NULL && !*err) {
1310 *err = E_DATA;
1311 }
1312
1313 return targ;
1314 }
1315
1316 /**
1317 * gretl_xml_get_strings_array:
1318 * @node: XML node pointer.
1319 * @doc: XML document pointer.
1320 * @nelem: location to receive number of elements in array.
1321 * @slop: if non-zero, allow the number of strings to fall
1322 * short of the recorded string count by one.
1323 * @err: location to receive error code.
1324 *
1325 * Returns: allocated array of strings read from @node, or
1326 * %NULL on failure.
1327 */
1328
gretl_xml_get_strings_array(xmlNodePtr node,xmlDocPtr doc,int * nelem,int slop,int * err)1329 char **gretl_xml_get_strings_array (xmlNodePtr node, xmlDocPtr doc,
1330 int *nelem, int slop, int *err)
1331 {
1332 xmlChar *tmp = xmlGetProp(node, (XUC) "count");
1333 char **S = NULL;
1334 const char *p;
1335 int i, n = 0;
1336
1337 if (tmp == NULL) {
1338 *err = E_DATA;
1339 return NULL;
1340 }
1341
1342 n = atoi((const char *) tmp);
1343 free(tmp);
1344
1345 #if GDT_DEBUG
1346 fprintf(stderr, "gretl_xml_get_strings_array: count=%d\n", n);
1347 #endif
1348
1349 if (n > 0) {
1350 S = strings_array_new(n);
1351 if (S == NULL) {
1352 *err = E_ALLOC;
1353 } else {
1354 tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
1355 if (tmp == NULL) {
1356 fprintf(stderr, "xmlNodeListGetString failed\n");
1357 *err = E_DATA;
1358 } else {
1359 p = (const char *) tmp;
1360 for (i=0; i<n && !*err; i++) {
1361 S[i] = chunk_strdup(p, &p, err);
1362 if (*err == E_DATA) {
1363 if (i == n - 1 && slop) {
1364 /* somebody got off by one somewhere? */
1365 *err = 0;
1366 n--;
1367 } else {
1368 /* treat as non-fatal? */
1369 fprintf(stderr, "repairing missing string value!\n");
1370 S[i] = gretl_strdup("empty string");
1371 *err = 0;
1372 }
1373 }
1374 }
1375 free(tmp);
1376 }
1377 }
1378 }
1379
1380 if (S != NULL && *err) {
1381 strings_array_free(S, n);
1382 S = NULL;
1383 }
1384
1385 if (!*err) {
1386 *nelem = n;
1387 }
1388
1389 return S;
1390 }
1391
1392 /**
1393 * gretl_xml_child_get_strings_array:
1394 * @node: XML node pointer.
1395 * @doc: XML document pointer.
1396 * @name: name of child node.
1397 * @pstrs: location to receive strings array.
1398 * @nstrs: location to receive number of strings.
1399 *
1400 * Returns: 1 if an array of strings is found and read successfully,
1401 * 0 otherwise.
1402 */
1403
gretl_xml_child_get_strings_array(xmlNodePtr node,xmlDocPtr doc,const char * name,char *** pstrs,int * nstrs)1404 int gretl_xml_child_get_strings_array (xmlNodePtr node, xmlDocPtr doc,
1405 const char *name, char ***pstrs,
1406 int *nstrs)
1407 {
1408 xmlNodePtr cur = node->xmlChildrenNode;
1409 int ret = 0;
1410
1411 while (cur != NULL) {
1412 if (!xmlStrcmp(cur->name, (XUC) name)) {
1413 int err = 0;
1414
1415 *pstrs = gretl_xml_get_strings_array(cur, doc, nstrs, 0, &err);
1416 ret = !err;
1417 break;
1418 }
1419 cur = cur->next;
1420 }
1421
1422 return ret;
1423 }
1424
get_matrix_values_via_file(gretl_matrix * m,const char * s)1425 static int get_matrix_values_via_file (gretl_matrix *m, const char *s)
1426 {
1427 gchar *fname;
1428 FILE *fp;
1429 int err = 0;
1430
1431 fname = gretl_make_dotpath("matrix.xml.XXXXXX");
1432 if (fname == NULL) {
1433 return E_ALLOC;
1434 }
1435
1436 fp = gretl_mktemp(fname, "wb");
1437 if (fp == NULL) {
1438 g_free(fname);
1439 return E_FOPEN;
1440 }
1441
1442 fputs(s, fp);
1443 fclose(fp);
1444 fp = gretl_fopen(fname, "r");
1445
1446 if (fp == NULL) {
1447 err = E_FOPEN;
1448 } else {
1449 double x;
1450 int i, j;
1451
1452 for (i=0; i<m->rows && !err; i++) {
1453 for (j=0; j<m->cols && !err; j++) {
1454 if (fscanf(fp, "%lf", &x) != 1) {
1455 #ifdef WIN32
1456 x = win32_fscan_nonfinite(fp, &err);
1457 if (!err) {
1458 gretl_matrix_set(m, i, j, x);
1459 }
1460 #else
1461 err = E_DATA;
1462 #endif
1463 } else {
1464 gretl_matrix_set(m, i, j, x);
1465 }
1466 }
1467 }
1468
1469 fclose(fp);
1470 }
1471
1472 remove(fname);
1473 g_free(fname);
1474
1475 return err;
1476 }
1477
maybe_add_matrix_labels(gretl_matrix * m,const char * s,int byrow)1478 static int maybe_add_matrix_labels (gretl_matrix *m,
1479 const char *s,
1480 int byrow)
1481 {
1482 int n, err = 0;
1483
1484 n = (byrow)? m->rows : m->cols;
1485
1486 if (s != NULL && *s != '\0') {
1487 char **S;
1488 int ns;
1489
1490 S = gretl_string_split(s, &ns, " \n\t");
1491 if (S == NULL) {
1492 err = E_ALLOC;
1493 } else if (ns != n) {
1494 err = E_NONCONF;
1495 strings_array_free(S, ns);
1496 } else if (byrow) {
1497 gretl_matrix_set_rownames(m, S);
1498 } else {
1499 gretl_matrix_set_colnames(m, S);
1500 }
1501 }
1502
1503 return err;
1504 }
1505
1506 /**
1507 * gretl_xml_get_matrix:
1508 * @node: XML node pointer.
1509 * @doc: XML document pointer.
1510 * @err: location to receive error code.
1511 *
1512 * Returns: allocated gretl matrix read from @node, or %NULL
1513 * on failure.
1514 */
1515
gretl_xml_get_matrix(xmlNodePtr node,xmlDocPtr doc,int * err)1516 gretl_matrix *gretl_xml_get_matrix (xmlNodePtr node,
1517 xmlDocPtr doc,
1518 int *err)
1519 {
1520 gretl_matrix *m = NULL;
1521 char *names = NULL;
1522 xmlChar *tmp = NULL;
1523 const char *p;
1524 double x;
1525 int rows = 0, cols = 0;
1526 int t1 = -1, t2 = -1;
1527 int is_complex = 0;
1528 int i, j;
1529
1530 if (!gretl_xml_get_prop_as_int(node, "rows", &rows) ||
1531 !gretl_xml_get_prop_as_int(node, "cols", &cols)) {
1532 *err = E_DATA;
1533 return NULL;
1534 }
1535
1536 if (gretl_xml_get_prop_as_bool(node, "complex")) {
1537 is_complex = 1;
1538 rows *= 2; /* 8-byte indexing */
1539 }
1540
1541 if (rows == 0 || cols == 0) {
1542 /* allow case of empty matrix */
1543 if (rows == 0 && cols == 0) {
1544 m = gretl_null_matrix_new();
1545 } else {
1546 m = gretl_matrix_alloc(rows, cols);
1547 }
1548 if (m == NULL) {
1549 *err = E_ALLOC;
1550 }
1551 return m;
1552 } else if (rows <= 0 || cols <= 0) {
1553 *err = E_DATA;
1554 return NULL;
1555 }
1556
1557 gretl_xml_get_prop_as_int(node, "t1", &t1);
1558 gretl_xml_get_prop_as_int(node, "t2", &t2);
1559
1560 m = gretl_matrix_alloc(rows, cols);
1561 if (m == NULL) {
1562 *err = E_ALLOC;
1563 return NULL;
1564 }
1565
1566 names = (char *) xmlGetProp(node, (XUC) "colnames");
1567 if (names != NULL) {
1568 maybe_add_matrix_labels(m, names, 0);
1569 }
1570
1571 names = (char *) xmlGetProp(node, (XUC) "rownames");
1572 if (names != NULL) {
1573 maybe_add_matrix_labels(m, names, 1);
1574 }
1575
1576 tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
1577 if (tmp == NULL) {
1578 gretl_matrix_free(m);
1579 *err = E_DATA;
1580 return NULL;
1581 }
1582
1583 p = (const char *) tmp;
1584
1585 gretl_push_c_numeric_locale();
1586
1587 if (rows * cols > 5000) {
1588 /* it's relatively slow to crawl along the string holding
1589 many matrix elements using sscanf plus str* functions
1590 */
1591 *err = get_matrix_values_via_file(m, p);
1592 } else {
1593 p += strspn(p, " \r\n");
1594 for (i=0; i<rows && !*err; i++) {
1595 for (j=0; j<cols && !*err; j++) {
1596 if (sscanf(p, "%lf", &x) != 1) {
1597 #ifdef WIN32
1598 x = win32_sscan_nonfinite(p, err);
1599 if (!*err) {
1600 gretl_matrix_set(m, i, j, x);
1601 }
1602 #else
1603 *err = E_DATA;
1604 #endif
1605 } else {
1606 gretl_matrix_set(m, i, j, x);
1607 }
1608 p += strspn(p, " \r\n");
1609 p += strcspn(p, " \r\n");
1610 }
1611 }
1612 }
1613
1614 gretl_pop_c_numeric_locale();
1615
1616 free(tmp);
1617
1618 if (*err) {
1619 gretl_matrix_free(m);
1620 m = NULL;
1621 } else {
1622 if (is_complex) {
1623 gretl_matrix_set_complex_full(m, 1);
1624 }
1625 if (t1 >= 0 && t2 >= t1) {
1626 gretl_matrix_set_t1(m, t1);
1627 gretl_matrix_set_t2(m, t2);
1628 }
1629 }
1630
1631 return m;
1632 }
1633
1634 /**
1635 * gretl_xml_get_submask:
1636 * @node: XML node pointer.
1637 * @doc: XML document pointer.
1638 * @pmask: location to receive allocated mask.
1639 *
1640 * Returns: 0 on success, non-zero on failure.
1641 */
1642
gretl_xml_get_submask(xmlNodePtr node,xmlDocPtr doc,char ** pmask)1643 int gretl_xml_get_submask (xmlNodePtr node, xmlDocPtr doc, char **pmask)
1644 {
1645 char *mask = NULL;
1646 int i, len;
1647 int err = 0;
1648
1649 if (!gretl_xml_get_prop_as_int(node, "length", &len)) {
1650 return 1;
1651 }
1652
1653 if (len == 0) {
1654 *pmask = RESAMPLED;
1655 return 0;
1656 }
1657
1658 mask = calloc(len, 1);
1659
1660 if (mask == NULL) {
1661 err = 1;
1662 } else {
1663 xmlChar *tmp = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
1664
1665 if (tmp == NULL) {
1666 err = 1;
1667 } else {
1668 char *s = (char *) tmp;
1669
1670 for (i=0; i<len; i++) {
1671 mask[i] = atoi(s);
1672 s += 2;
1673 }
1674 free(tmp);
1675 }
1676 }
1677
1678 if (!err) {
1679 *pmask = mask;
1680 }
1681
1682 return err;
1683 }
1684
gretl_xml_header(PRN * prn)1685 void gretl_xml_header (PRN *prn)
1686 {
1687 pputs(prn, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
1688 }
1689
1690 /**
1691 * gretl_matrix_write_as_gdt:
1692 * @fname: name of file to write.
1693 * @X: matrix, variable in columns.
1694 * @varnames: column names.
1695 * @labels: descriptive labels for the variables, or %NULL.
1696 *
1697 * Write out a .gdt data file containing the elements of
1698 * of the given matrix.
1699 *
1700 * Returns: 0 on successful completion, non-zero on error.
1701 */
1702
gretl_matrix_write_as_gdt(const char * fname,const gretl_matrix * X,const char ** varnames,const char ** labels)1703 int gretl_matrix_write_as_gdt (const char *fname,
1704 const gretl_matrix *X,
1705 const char **varnames,
1706 const char **labels)
1707 {
1708 gzFile fz = Z_NULL;
1709 char datname[MAXLEN];
1710 char *xmlbuf = NULL;
1711 int (*show_progress) (double, double, int) = NULL;
1712 double msize = 0;
1713 int T = X->rows;
1714 int k = X->cols;
1715 int in_c_locale = 0;
1716 int i, t, err = 0;
1717
1718 fz = gretl_gzopen(fname, "wb");
1719
1720 if (fz == Z_NULL) {
1721 gretl_errmsg_sprintf(_("Couldn't open %s for writing"), fname);
1722 return 1;
1723 }
1724
1725 msize = T * k * sizeof(double);
1726
1727 if (msize > 100000) {
1728 fprintf(stderr, "Writing %.0f Kbytes of data\n", msize / 1024);
1729 } else {
1730 msize = 0;
1731 }
1732
1733 if (msize > 0) {
1734 show_progress = get_plugin_function("show_progress");
1735 if (show_progress == NULL) {
1736 msize = 0;
1737 } else {
1738 (*show_progress)(0, msize, SP_SAVE_INIT);
1739 }
1740 }
1741
1742 simple_fname(datname, fname);
1743 xmlbuf = gretl_xml_encode(datname);
1744 if (xmlbuf == NULL) {
1745 err = 1;
1746 goto cleanup;
1747 }
1748
1749 gzprintf(fz, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
1750 "<!DOCTYPE gretldata SYSTEM \"gretldata.dtd\">\n\n"
1751 "<gretldata version=\"%s\" name=\"%s\" frequency=\"1\" "
1752 "startobs=\"1\" endobs=\"%d\" type=\"cross-section\">\n",
1753 GRETLDATA_VERSION, datname, T);
1754
1755 free(xmlbuf);
1756
1757 gretl_push_c_numeric_locale();
1758 in_c_locale = 1;
1759
1760 gzprintf(fz, "<variables count=\"%d\">\n", k);
1761
1762 for (i=0; i<k; i++) {
1763 gzprintf(fz, "<variable name=\"%s\"", varnames[i]);
1764 if (labels != NULL && labels[i] != NULL) {
1765 gzprintf(fz, "\n label=\"%s\"", labels[i]);
1766 }
1767 gzputs(fz, "\n/>\n");
1768 }
1769
1770 gzputs(fz, "</variables>\n");
1771
1772 gzprintf(fz, "<observations count=\"%d\" labels=\"false\">\n", T);
1773
1774 for (t=0; t<T; t++) {
1775 gzputs(fz, "<obs>");
1776 for (i=0; i<k; i++) {
1777 gzprintf(fz, "%.12g ", gretl_matrix_get(X, t, i));
1778 }
1779 gzputs(fz, "</obs>\n");
1780 if (msize > 0 && t && (t % 50 == 0)) {
1781 (*show_progress) (50, T, SP_NONE);
1782 }
1783 }
1784
1785 gzputs(fz, "</observations>\n</gretldata>\n");
1786
1787 cleanup:
1788
1789 if (in_c_locale) {
1790 gretl_pop_c_numeric_locale();
1791 }
1792
1793 if (msize > 0) {
1794 (*show_progress)(0, T, SP_FINISH);
1795 }
1796
1797 gzclose(fz);
1798
1799 return err;
1800 }
1801
string_table_count(const DATASET * dset,const int * list,int nvars)1802 static int string_table_count (const DATASET *dset,
1803 const int *list,
1804 int nvars)
1805 {
1806 int i, v, n = 0;
1807
1808 for (i=1; i<=nvars; i++) {
1809 v = savenum(list, i);
1810 if (is_string_valued(dset, v)) {
1811 n++;
1812 }
1813 }
1814
1815 return n;
1816 }
1817
maybe_print_panel_info(const DATASET * dset,int skip_padding,PRN * prn)1818 static void maybe_print_panel_info (const DATASET *dset,
1819 int skip_padding,
1820 PRN *prn)
1821 {
1822 int names = panel_group_names_ok(dset, 0);
1823 int pd = dset->panel_pd;
1824 double sd0 = dset->panel_sd0;
1825 int times = pd > 0 && sd0 > 0.0;
1826
1827 if (names || times || skip_padding) {
1828 pputs(prn, "<panel-info\n");
1829 if (names) {
1830 pprintf(prn, " group-names=\"%s\"\n", dset->pangrps);
1831 }
1832 if (times) {
1833 pprintf(prn, " time-frequency=\"%d\"\n", pd);
1834 pprintf(prn, " time-start=\"%.10g\"\n", sd0);
1835 }
1836 if (skip_padding) {
1837 pputs(prn, " skip-padding=\"1\"\n");
1838 }
1839 pputs(prn, "/>\n");
1840 }
1841 }
1842
row_is_padding(const DATASET * dset,int t,int vmax)1843 static int row_is_padding (const DATASET *dset, int t, int vmax)
1844 {
1845 int i;
1846
1847 for (i=1; i<vmax; i++) {
1848 if (!na(dset->Z[i][t])) {
1849 return 0;
1850 }
1851 }
1852
1853 return 1;
1854 }
1855
open_gdt_write_stream(const char * fname,gretlopt opt)1856 static PRN *open_gdt_write_stream (const char *fname,
1857 gretlopt opt)
1858 {
1859 int err = 0;
1860 PRN *prn;
1861
1862 if (opt & OPT_Z) {
1863 /* using zlib compression */
1864 int gzlevel = get_compression_option(STORE);
1865
1866 prn = gretl_gzip_print_new(fname, gzlevel, &err);
1867 } else {
1868 /* no compression */
1869 prn = gretl_print_new_with_filename(fname, &err);
1870 }
1871
1872 return prn;
1873 }
1874
1875 #define BIN_HDRLEN 24
1876
write_binary_header(FILE * fp)1877 static int write_binary_header (FILE *fp)
1878 {
1879 char header[BIN_HDRLEN] = {0};
1880 int err = 0;
1881
1882 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
1883 strcpy(header, "gretl-bin:little-endian");
1884 #else
1885 strcpy(header, "gretl-bin:big-endian");
1886 #endif
1887
1888 if (fwrite(header, 1, BIN_HDRLEN, fp) != BIN_HDRLEN) {
1889 err = E_DATA;
1890 }
1891
1892 return err;
1893 }
1894
write_binary_data(const char * fname,const DATASET * dset,const int * list,int nvars,int nrows,gretlopt opt)1895 static int write_binary_data (const char *fname, const DATASET *dset,
1896 const int *list, int nvars, int nrows,
1897 gretlopt opt)
1898 {
1899 char *bname;
1900 FILE *fp;
1901 int T = dset->t2 - dset->t1 + 1;
1902 size_t wrote;
1903 int i, v, err = 0;
1904
1905 bname = switch_ext_new(fname, "bin");
1906 fp = gretl_fopen(bname, "wb");
1907 free(bname);
1908
1909 if (fp == NULL) {
1910 return E_FOPEN;
1911 }
1912
1913 write_binary_header(fp);
1914
1915 if (nrows < T) {
1916 /* panel data with skip-padding in force */
1917 double *tmp = NULL;
1918 int uv = 0, tv = 0;
1919 int s, t, nv = 0;
1920
1921 /* add unit and time variables (initialized to zero) */
1922 err = dataset_add_series((DATASET *) dset, 2);
1923 if (!err) {
1924 tmp = malloc(nrows * sizeof *tmp);
1925 if (tmp == NULL) {
1926 err = E_ALLOC;
1927 }
1928 }
1929
1930 if (!err) {
1931 uv = dset->v - 2;
1932 tv = dset->v - 1;
1933 nv = dset->v - 2;
1934 }
1935
1936 for (t=dset->t1; t<=dset->t2 && !err; t++) {
1937 if (!row_is_padding(dset, t, nv)) {
1938 dset->Z[uv][t] = 1 + t / dset->pd;
1939 dset->Z[tv][t] = t % dset->pd + 1;
1940 }
1941 }
1942
1943 nv = nvars + 2;
1944 for (i=1; i<=nv && !err; i++) {
1945 if (i <= nvars) {
1946 v = savenum(list, i);
1947 } else {
1948 v = (i == nvars + 1)? uv : tv;
1949 }
1950 s = 0;
1951 for (t=dset->t1; t<=dset->t2 && !err; t++) {
1952 if (dset->Z[uv][t] != 0.0) {
1953 tmp[s++] = dset->Z[v][t];
1954 }
1955 }
1956 wrote = fwrite(tmp, sizeof(double), nrows, fp);
1957 if (wrote != nrows) {
1958 err = E_DATA;
1959 }
1960 }
1961
1962 free(tmp);
1963 if (uv > 0) {
1964 dataset_drop_last_variables((DATASET *) dset, 2);
1965 }
1966 } else {
1967 for (i=1; i<=nvars && !err; i++) {
1968 v = savenum(list, i);
1969 wrote = fwrite(dset->Z[v] + dset->t1, sizeof(double),
1970 T, fp);
1971 if (wrote != T) {
1972 err = E_DATA;
1973 }
1974 }
1975 }
1976
1977 fclose(fp);
1978
1979 return err;
1980 }
1981
gdt_swap_endianness(DATASET * dset)1982 static void gdt_swap_endianness (DATASET *dset)
1983 {
1984 int i, t;
1985
1986 for (i=1; i<dset->v; i++) {
1987 for (t=0; t<dset->n; t++) {
1988 reverse_double(dset->Z[i][t]);
1989 }
1990 }
1991 }
1992
read_binary_header(FILE * fp,int order)1993 static int read_binary_header (FILE *fp, int order)
1994 {
1995 char hdr[BIN_HDRLEN] = {0};
1996 unsigned chk;
1997 int err = 0;
1998
1999 chk = fread(hdr, 1, BIN_HDRLEN, fp);
2000
2001 if (chk != BIN_HDRLEN) {
2002 err = E_DATA;
2003 } else {
2004 int bin_order = 0;
2005
2006 if (strncmp(hdr, "gretl-bin:", 10)) {
2007 err = E_DATA;
2008 } else if (!strcmp(hdr + 10, "little-endian")) {
2009 bin_order = G_LITTLE_ENDIAN;
2010 } else if (!strcmp(hdr + 10, "big-endian")) {
2011 bin_order = G_BIG_ENDIAN;
2012 } else {
2013 err = E_DATA;
2014 }
2015 if (!err && bin_order != order) {
2016 err = E_DATA;
2017 }
2018 }
2019
2020 if (err) {
2021 gretl_errmsg_set("Error reading binary data file");
2022 }
2023
2024 return err;
2025 }
2026
na_convert(double * x,int n)2027 static void na_convert (double *x, int n)
2028 {
2029 int i;
2030
2031 for (i=0; i<n; i++) {
2032 if (x[i] == DBL_MAX) {
2033 x[i] = NADBL;
2034 }
2035 }
2036 }
2037
read_binary_data(const char * fname,DATASET * dset,int order,double gdtversion,int fullv,const int * vlist)2038 static int read_binary_data (const char *fname,
2039 DATASET *dset,
2040 int order,
2041 double gdtversion,
2042 int fullv,
2043 const int *vlist)
2044 {
2045 char *bname;
2046 FILE *fp;
2047 int err = 0;
2048
2049 bname = switch_ext_new(fname, "bin");
2050 fp = gretl_fopen(bname, "rb");
2051
2052 if (fp == NULL) {
2053 err = E_FOPEN;
2054 } else {
2055 int T = dset->n;
2056 long offset = T * sizeof(double);
2057 size_t got;
2058 int i, k = 1;
2059
2060 err = read_binary_header(fp, order);
2061
2062 for (i=1; i<fullv && !err; i++) {
2063 if (vlist == NULL || in_gretl_list(vlist, i)) {
2064 got = fread(dset->Z[k], sizeof(double), T, fp);
2065 if (got != T) {
2066 err = E_DATA;
2067 }
2068 if (!err && gdtversion < 1.4) {
2069 /* we need to convert old-style NAs */
2070 na_convert(dset->Z[k], dset->n);
2071 }
2072 k++;
2073 } else {
2074 fseek(fp, offset, SEEK_CUR);
2075 }
2076 }
2077 fclose(fp);
2078 }
2079
2080 free(bname);
2081
2082 if (!err && order != G_BYTE_ORDER) {
2083 gdt_swap_endianness(dset);
2084 }
2085
2086 return err;
2087 }
2088
write_binary_order(PRN * prn)2089 static void write_binary_order (PRN *prn)
2090 {
2091 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
2092 pputs(prn, " binary=\"little-endian\" ");
2093 #else
2094 pputs(prn, " binary=\"big-endian\" ");
2095 #endif
2096 }
2097
2098 /* Here we're testing a series to see if it can be represented in full
2099 precision (discarding any artifacts) using the format "%.15g". The
2100 criterion is that each value must have a least one trailing zero
2101 when printed using "% .14e". We don't necessarily check every
2102 element in the series; assuming a reasonable degree of homogeneity
2103 in the data it ought to be enough if 100 members pass the test.
2104 */
2105
p15_OK(const DATASET * dset,int v)2106 static int p15_OK (const DATASET *dset, int v)
2107 {
2108 const double *x = dset->Z[v];
2109 char s[32];
2110 int t, i, n_ok = 0;
2111 int ret = 1;
2112
2113 for (t=dset->t1; t<=dset->t2; t++) {
2114 if (!na(x[t])) {
2115 sprintf(s, "% .14e", x[t]);
2116 for (i=16; i>13; i--) {
2117 if (s[i] != '0') {
2118 break;
2119 }
2120 }
2121 if (i == 16) {
2122 ret = 0;
2123 break;
2124 }
2125 n_ok++;
2126 }
2127 if (n_ok > 100) {
2128 break;
2129 }
2130 }
2131
2132 return ret;
2133 }
2134
2135 /* apparatus from trimming string-values on gdt save,
2136 and restoration afterwards
2137 */
2138
2139 struct strval_saver_ {
2140 int *list;
2141 gretl_matrix *X;
2142 series_table **st;
2143 size_t sz;
2144 };
2145
2146 typedef struct strval_saver_ strval_saver;
2147
strval_saver_destroy(strval_saver * ss)2148 static void strval_saver_destroy (strval_saver *ss)
2149 {
2150 if (ss != NULL) {
2151 free(ss->list);
2152 gretl_matrix_free(ss->X);
2153 free(ss->st);
2154 free(ss);
2155 }
2156 }
2157
strval_saver_setup(DATASET * dset,int nvars,int nsv,int * list,int * err)2158 static strval_saver *strval_saver_setup (DATASET *dset,
2159 int nvars, int nsv,
2160 int *list, int *err)
2161 {
2162 strval_saver *ss;
2163 int n_changed = 0;
2164 int i, n;
2165
2166 ss = calloc(1, sizeof *ss);
2167 if (ss == NULL) {
2168 *err = E_ALLOC;
2169 return NULL;
2170 }
2171
2172 n = sample_size(dset);
2173 ss->list = gretl_list_new(nsv);
2174 ss->X = gretl_matrix_alloc(n, nsv);
2175 ss->st = calloc(nsv, sizeof *ss->st);
2176 if (ss->list == NULL || ss->X == NULL || ss->st == NULL) {
2177 *err = E_ALLOC;
2178 }
2179
2180 if (!*err) {
2181 double *x = ss->X->val;
2182 int changed;
2183 int v, j = 0;
2184
2185 ss->sz = n * sizeof *x;
2186
2187 for (i=1; i<=nvars && !*err; i++) {
2188 v = savenum(list, i);
2189 if (is_string_valued(dset, v)) {
2190 ss->list[j+1] = v;
2191 ss->st[j] = series_get_string_table(dset, v);
2192 memcpy(x, dset->Z[v] + dset->t1, ss->sz);
2193 *err = series_recode_strings(dset, v, OPT_P, &changed);
2194 if (changed) {
2195 n_changed++;
2196 } else {
2197 ss->st[j] = NULL;
2198 }
2199 j++;
2200 x += n;
2201 }
2202 }
2203 } else {
2204 strval_saver_destroy(ss);
2205 ss = NULL;
2206 }
2207
2208 if (ss != NULL && n_changed == 0) {
2209 /* we have no need for @ss */
2210 strval_saver_destroy(ss);
2211 ss = NULL;
2212 }
2213
2214 return ss;
2215 }
2216
strval_saver_restore(DATASET * dset,strval_saver * ss)2217 static void strval_saver_restore (DATASET *dset,
2218 strval_saver *ss)
2219 {
2220 double *x = ss->X->val;
2221 int n = ss->X->rows;
2222 int i, v;
2223
2224 for (i=1; i<=ss->list[0]; i++) {
2225 v = ss->list[i];
2226 if (ss->st[i-1] != NULL) {
2227 series_destroy_string_table(dset, v);
2228 series_attach_string_table(dset, v, ss->st[i-1]);
2229 memcpy(dset->Z[v] + dset->t1, x, ss->sz);
2230 }
2231 x += n;
2232 }
2233 }
2234
2235 /* end apparatus from trimming string-values */
2236
real_write_gdt(const char * fname,const int * inlist,const DATASET * dset,gretlopt opt,int progress)2237 static int real_write_gdt (const char *fname, const int *inlist,
2238 const DATASET *dset, gretlopt opt,
2239 int progress)
2240 {
2241 PRN *prn = NULL;
2242 int tsamp = dset->t2 - dset->t1 + 1;
2243 int *list = NULL;
2244 char *p15 = NULL;
2245 char startdate[OBSLEN], enddate[OBSLEN];
2246 char datname[MAXLEN], freqstr[32];
2247 char numstr[128], xmlbuf[1024];
2248 const char *gdtver;
2249 int (*show_progress) (double, double, int) = NULL;
2250 strval_saver *ss = NULL;
2251 double dsize = 0;
2252 int i, t, v, ntabs, nvars = 0;
2253 int have_markers, in_c_locale = 0;
2254 int binary = 0, skip_padding = 0;
2255 int gdt_digits = 17;
2256 int uerr = 0;
2257 int err = 0;
2258
2259 /* what are we supposed to be saving? */
2260 if (inlist != NULL) {
2261 int lzero[] = {1, 0};
2262
2263 list = gretl_list_drop(inlist, lzero, &err);
2264 if (err) {
2265 return err;
2266 }
2267 nvars = list[0];
2268 } else {
2269 nvars = dset->v - 1;
2270 }
2271
2272 if (nvars <= 0) {
2273 gretl_errmsg_set("No data to save!");
2274 free(list);
2275 return E_DATA;
2276 }
2277
2278 if (opt & OPT_B) {
2279 binary = G_BYTE_ORDER;
2280 progress = 0;
2281 prn = open_gdt_write_stream(fname, OPT_NONE);
2282 } else {
2283 const char *path = fname;
2284 gchar *fullname = NULL;
2285
2286 if (!has_suffix(fname, ".gdt")) {
2287 /* force use of .gdt extension for native XML data */
2288 path = fullname = g_strdup_printf("%s.gdt", fname);
2289 }
2290 prn = open_gdt_write_stream(path, opt);
2291 g_free(fullname);
2292 }
2293
2294 if (prn == NULL) {
2295 free(list);
2296 return E_FOPEN;
2297 }
2298
2299 dsize = tsamp * nvars * sizeof(double);
2300
2301 if (dsize > 100000) {
2302 fprintf(stderr, "Writing %.0f Kbytes of data\n", dsize / 1024);
2303 } else if (progress) {
2304 /* suppress progress bar for smaller data */
2305 progress = 0;
2306 }
2307
2308 if (!binary) {
2309 p15 = calloc(nvars, 1);
2310 if (p15 != NULL) {
2311 for (i=0; i<nvars; i++) {
2312 v = savenum(list, i+1);
2313 p15[i] = p15_OK(dset, v);
2314 }
2315 }
2316 }
2317
2318 if (progress) {
2319 show_progress = get_plugin_function("show_progress");
2320 if (show_progress == NULL) {
2321 progress = 0;
2322 } else {
2323 (*show_progress)(0, dsize, SP_SAVE_INIT);
2324 }
2325 }
2326
2327 ntolabel(startdate, dset->t1, dset);
2328 ntolabel(enddate, dset->t2, dset);
2329
2330 simple_fname(datname, fname);
2331 uerr = gretl_xml_encode_to_buf(xmlbuf, datname, sizeof xmlbuf);
2332 if (uerr) {
2333 strcpy(xmlbuf, "unknown");
2334 }
2335
2336 if (custom_time_series(dset)) {
2337 sprintf(freqstr, "special:%d", dset->pd);
2338 } else {
2339 sprintf(freqstr, "%d", dset->pd);
2340 }
2341
2342 gdtver = GRETLDATA_VERSION;
2343
2344 if (binary) {
2345 /* support --compat option */
2346 const char *s = get_optval_string(STORE, OPT_C);
2347
2348 if (s != NULL && !strcmp(s, "2018b")) {
2349 gdtver = GRETLDATA_COMPAT;
2350 }
2351 }
2352
2353 pprintf(prn, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
2354 "<!DOCTYPE gretldata SYSTEM \"gretldata.dtd\">\n\n"
2355 "<gretldata version=\"%s\" name=\"%s\" frequency=\"%s\" "
2356 "startobs=\"%s\" endobs=\"%s\" ",
2357 gdtver, xmlbuf, freqstr, startdate, enddate);
2358
2359 pprintf(prn, "type=\"%s\"", data_structure_string(dset->structure));
2360
2361 if (binary) {
2362 write_binary_order(prn);
2363 }
2364 if (dset->rseed > 0) {
2365 /* record resampling info */
2366 pprintf(prn, " rseed=\"%u\"", dset->rseed);
2367 }
2368 if (dset->mapfile != NULL) {
2369 /* record map link */
2370 uerr = gretl_xml_encode_to_buf(xmlbuf, dset->mapfile, sizeof xmlbuf);
2371 if (uerr == 0) {
2372 pprintf(prn, " mapfile=\"%s\"", xmlbuf);
2373 }
2374 }
2375
2376 pputs(prn, ">\n");
2377
2378 have_markers = dataset_has_markers(dset);
2379
2380 if (dataset_is_panel(dset) && !have_markers &&
2381 nvars == dset->v - 1 && dsize > 1024 * 1024 * 10) {
2382 /* we have more than 10 MB of panel data */
2383 int padrows = panel_padding_rows(dset);
2384
2385 if (padrows > 0.4 * dset->n) {
2386 fprintf(stderr, "skip-padding: dropping %d rows\n", padrows);
2387 skip_padding = 1;
2388 tsamp -= padrows;
2389 }
2390 }
2391
2392 /* deal with description, if any */
2393 if (dset->descrip != NULL) {
2394 char *dbuf = gretl_xml_encode(dset->descrip);
2395
2396 if (dbuf != NULL) {
2397 pputs(prn, "<description>");
2398 pputs(prn, dbuf);
2399 pputs(prn, "</description>\n");
2400 free(dbuf);
2401 }
2402 }
2403
2404 gretl_push_c_numeric_locale();
2405 in_c_locale = 1;
2406
2407 /* then listing of variable names and labels */
2408 if (skip_padding) {
2409 pprintf(prn, "<variables count=\"%d\">\n", nvars + 2);
2410 } else {
2411 pprintf(prn, "<variables count=\"%d\">\n", nvars);
2412 }
2413
2414 ntabs = string_table_count(dset, list, nvars);
2415
2416 if (ntabs > 0 && !(opt & OPT_P)) {
2417 /* trimming strvals */
2418 ss = strval_saver_setup((DATASET *) dset, nvars,
2419 ntabs, list, &err);
2420 }
2421
2422 for (i=1; i<=nvars; i++) {
2423 const char *vstr;
2424 int vprop, mpd;
2425
2426 v = savenum(list, i);
2427 gretl_xml_encode_to_buf(xmlbuf, dset->varname[v], sizeof xmlbuf);
2428 pprintf(prn, "<variable name=\"%s\"", xmlbuf);
2429
2430 vstr = series_get_label(dset, v);
2431 if (vstr != NULL && *vstr != '\0') {
2432 uerr = gretl_xml_encode_to_buf(xmlbuf, vstr, sizeof xmlbuf);
2433 if (!uerr) {
2434 pprintf(prn, "\n label=\"%s\"", xmlbuf);
2435 }
2436 }
2437
2438 vstr = series_get_display_name(dset, v);
2439 if (vstr != NULL && *vstr != '\0') {
2440 uerr = gretl_xml_encode_to_buf(xmlbuf, vstr, sizeof xmlbuf);
2441 if (!uerr) {
2442 pprintf(prn, "\n displayname=\"%s\"", xmlbuf);
2443 }
2444 }
2445
2446 vstr = series_get_parent_name(dset, v);
2447 if (vstr != NULL) {
2448 uerr = gretl_xml_encode_to_buf(xmlbuf, vstr, sizeof xmlbuf);
2449 if (!uerr) {
2450 pprintf(prn, "\n parent=\"%s\"", xmlbuf);
2451 }
2452 }
2453
2454 vprop = series_get_transform(dset, v);
2455 if (vprop != 0) {
2456 const char *tr = gretl_command_word(vprop);
2457
2458 pprintf(prn, "\n transform=\"%s\"", tr);
2459 }
2460
2461 vprop = series_get_lag(dset, v);
2462 if (vprop != 0) {
2463 pprintf(prn, "\n lag=\"%d\"", vprop);
2464 }
2465
2466 vprop = series_get_compact_method(dset, v);
2467 if (vprop != COMPACT_NONE) {
2468 const char *meth = compact_method_to_string(vprop);
2469
2470 pprintf(prn, "\n compact-method=\"%s\"", meth);
2471 }
2472
2473 if (series_is_discrete(dset, v)) {
2474 pputs(prn, "\n discrete=\"true\"");
2475 }
2476
2477 if (series_is_coded(dset, v)) {
2478 pputs(prn, "\n coded=\"true\"");
2479 }
2480
2481 if (series_is_midas_anchor(dset, v)) {
2482 pputs(prn, "\n hf-anchor=\"true\"");
2483 }
2484
2485 if ((mpd = series_get_midas_period(dset, v)) > 0) {
2486 pprintf(prn, "\n midas_period=\"%d\"", mpd);
2487 }
2488
2489 if ((mpd = series_get_midas_freq(dset, v)) > 0) {
2490 pprintf(prn, "\n midas_freq=\"%d\"", mpd);
2491 }
2492
2493 if ((mpd = series_get_orig_pd(dset, v)) > 0) {
2494 pprintf(prn, "\n orig_pd=\"%d\"", mpd);
2495 }
2496
2497 pputs(prn, "\n/>\n");
2498 }
2499
2500 if (skip_padding) {
2501 pputs(prn, "<variable name=\"unit__\"\n/>\n");
2502 pputs(prn, "<variable name=\"time__\"\n/>\n");
2503 }
2504
2505 pputs(prn, "</variables>\n");
2506
2507 /* then listing of observations */
2508 pputs(prn, "<observations ");
2509 pprintf(prn, "count=\"%d\" labels=\"%s\"",
2510 tsamp, (have_markers)? "true" : "false");
2511 pputs(prn, ">\n");
2512
2513 if (binary) {
2514 err = write_binary_data(fname, dset, list, nvars, tsamp, opt);
2515 if (!have_markers) {
2516 goto binary_done;
2517 }
2518 }
2519
2520 for (t=dset->t1; t<=dset->t2; t++) {
2521 if (skip_padding && row_is_padding(dset, t, dset->v)) {
2522 continue;
2523 }
2524 pputs(prn, "<obs");
2525 if (have_markers) {
2526 uerr = gretl_xml_encode_to_buf(xmlbuf, dset->S[t], sizeof xmlbuf);
2527 if (!uerr) {
2528 pprintf(prn, " label=\"%s\"", xmlbuf);
2529 }
2530 }
2531 if (binary) {
2532 pputs(prn, " />\n");
2533 continue;
2534 }
2535 pputs(prn, ">");
2536 for (i=1; i<=nvars; i++) {
2537 v = savenum(list, i);
2538 if (na(dset->Z[v][t])) {
2539 strcpy(numstr, "NA ");
2540 } else if (p15 == NULL || p15[i-1] == 0) {
2541 /* use full default precision if required */
2542 sprintf(numstr, "%.*g ", gdt_digits, dset->Z[v][t]);
2543 } else {
2544 sprintf(numstr, "%.15g ", dset->Z[v][t]);
2545 }
2546 pputs(prn, numstr);
2547 }
2548 if (skip_padding) {
2549 int unit = 1 + t / dset->pd;
2550 int time = t % dset->pd + 1;
2551
2552 sprintf(numstr, "%d %d ", unit, time);
2553 pputs(prn, numstr);
2554 }
2555 pputs(prn, "</obs>\n");
2556
2557 if (progress && t && ((t - dset->t1) % 50 == 0)) {
2558 (*show_progress) (50, tsamp, SP_NONE);
2559 }
2560 }
2561
2562 binary_done:
2563
2564 pputs(prn, "</observations>\n");
2565
2566 if (ntabs > 0) {
2567 char *sbuf, **strs;
2568 int j, n_strs;
2569
2570 pprintf(prn, "<string-tables count=\"%d\">\n", ntabs);
2571
2572 for (i=1; i<=nvars; i++) {
2573 v = savenum(list, i);
2574 if (!is_string_valued(dset, v)) {
2575 continue;
2576 }
2577 strs = series_get_string_vals(dset, v, &n_strs, 0);
2578 gretl_xml_encode_to_buf(xmlbuf, dset->varname[v], sizeof xmlbuf);
2579 pprintf(prn, "<valstrings owner=\"%s\" count=\"%d\">", xmlbuf, n_strs);
2580 for (j=0; j<n_strs; j++) {
2581 sbuf = gretl_xml_encode(strs[j]);
2582 if (sbuf == NULL || *sbuf == '\0') {
2583 fprintf(stderr, "string values for var %d: string %d is empty\n",
2584 i, j);
2585 pputs(prn, "\"empty string\" ");
2586 } else {
2587 pprintf(prn, "\"%s\" ", sbuf);
2588 }
2589 free(sbuf);
2590 }
2591 pputs(prn, "</valstrings>\n");
2592 }
2593 pputs(prn, "</string-tables>\n");
2594 }
2595
2596 if (dataset_is_panel(dset)) {
2597 maybe_print_panel_info(dset, skip_padding, prn);
2598 }
2599
2600 pputs(prn, "</gretldata>\n");
2601
2602 if (ss != NULL) {
2603 strval_saver_restore((DATASET *) dset, ss);
2604 strval_saver_destroy(ss);
2605 }
2606
2607 if (in_c_locale) {
2608 gretl_pop_c_numeric_locale();
2609 }
2610
2611 if (progress) {
2612 (*show_progress)(0, dset->t2 - dset->t1 + 1, SP_FINISH);
2613 }
2614
2615 if (p15 != NULL) {
2616 free(p15);
2617 }
2618 free(list);
2619
2620 gretl_print_destroy(prn);
2621
2622 return err;
2623 }
2624
write_purebin(const char * fname,const int * list,const DATASET * dset,gretlopt opt)2625 static int write_purebin (const char *fname, const int *list,
2626 const DATASET *dset, gretlopt opt)
2627 {
2628 int (*writer) (const char *, const int *, const DATASET *,
2629 gretlopt);
2630 int err = 0;
2631
2632 writer = get_plugin_function("purebin_write_data");
2633
2634 if (writer == NULL) {
2635 err = 1;
2636 } else {
2637 err = (*writer)(fname, list, dset, opt);
2638 }
2639
2640 return err;
2641 }
2642
2643 /* zipfile with gdt XML + binary */
2644
write_old_gdtb(const char * fname,const int * list,const DATASET * dset,gretlopt opt)2645 static int write_old_gdtb (const char *fname, const int *list,
2646 const DATASET *dset, gretlopt opt)
2647 {
2648 gchar *zdir;
2649 int err;
2650
2651 zdir = g_strdup_printf("%stmp-zip", gretl_dotdir());
2652 err = gretl_mkdir(zdir);
2653
2654 if (!err) {
2655 char xmlfile[FILENAME_MAX];
2656
2657 gretl_build_path(xmlfile, zdir, "data.xml", NULL);
2658 err = real_write_gdt(xmlfile, list, dset, opt | OPT_B, 0);
2659
2660 if (!err) {
2661 int level = get_compression_option(STORE);
2662
2663 err = gretl_zip_datafile(fname, zdir, level);
2664 if (err) {
2665 gretl_errmsg_ensure("Problem writing data file");
2666 }
2667 }
2668 gretl_deltree(zdir);
2669 }
2670
2671 g_free(zdir);
2672
2673 return err;
2674 }
2675
2676 /**
2677 * gretl_write_gdt:
2678 * @fname: name of file to write.
2679 * @list: list of variables to write (or %NULL to write all).
2680 * @dset: dataset struct.
2681 * @opt: if %OPT_Z write gzipped data, else uncompressed.
2682 * @progress: may be 1 when called from gui to display progress
2683 * bar in case of a large data write; generally should be 0.
2684 *
2685 * Write out in xml a data file containing the values of the given set
2686 * of variables.
2687 *
2688 * Returns: 0 on successful completion, non-zero on error.
2689 */
2690
gretl_write_gdt(const char * fname,const int * list,const DATASET * dset,gretlopt opt,int progress)2691 int gretl_write_gdt (const char *fname, const int *list,
2692 const DATASET *dset, gretlopt opt,
2693 int progress)
2694 {
2695 int gdtb = has_suffix(fname, ".gdtb");
2696 int compat = (opt & OPT_C);
2697 int err = 0;
2698
2699 if (gdtb && compat) {
2700 /* backward-compatible gdtb */
2701 err = write_old_gdtb(fname, list, dset, opt);
2702 } else if (gdtb) {
2703 /* default binary format for gretl >= 2020b */
2704 err = write_purebin(fname, list, dset, opt);
2705 } else {
2706 /* plain gdt file */
2707 err = real_write_gdt(fname, list, dset, opt, progress);
2708 }
2709
2710 return err;
2711 }
2712
transcribe_string(char * targ,const char * src,int maxlen)2713 static void transcribe_string (char *targ, const char *src, int maxlen)
2714 {
2715 *targ = '\0';
2716
2717 strncat(targ, src, maxlen - 1);
2718 }
2719
2720 /* Note: if @probe is non-zero, this means that we're really just
2721 scraping series names from the data file, and so we should not
2722 start allocating memory for dset->Z based on the number of
2723 series we find.
2724 */
2725
process_varlist(xmlNodePtr node,DATASET * dset,int probe)2726 static int process_varlist (xmlNodePtr node, DATASET *dset, int probe)
2727 {
2728 xmlNodePtr cur;
2729 xmlChar *tmp = xmlGetProp(node, (XUC) "count");
2730 int i, nv = 0, err = 0;
2731
2732 if (tmp != NULL) {
2733 if (sscanf((char *) tmp, "%d", &nv) == 1) {
2734 dset->v = nv + 1;
2735 } else {
2736 gretl_errmsg_set(_("Failed to parse count of variables"));
2737 err = E_DATA;
2738 }
2739 if (!err && dataset_allocate_varnames(dset)) {
2740 err = E_ALLOC;
2741 }
2742 if (!err && !probe) {
2743 dset->Z = doubles_array_new(dset->v, 0);
2744 if (dset->Z == NULL) {
2745 err = E_ALLOC;
2746 }
2747 }
2748 free(tmp);
2749 } else {
2750 gretl_errmsg_set(_("Got no variables"));
2751 err = E_DATA;
2752 }
2753
2754 if (err) {
2755 return err;
2756 } else if (nv == 0) {
2757 fprintf(stderr, "Empty dataset!\n");
2758 return 0;
2759 }
2760
2761 /* now get individual variable info: names and labels */
2762 cur = node->xmlChildrenNode;
2763 while (cur && xmlIsBlankNode(cur)) {
2764 cur = cur->next;
2765 }
2766
2767 if (cur == NULL) {
2768 gretl_errmsg_set(_("Got no variables"));
2769 return E_DATA;
2770 }
2771
2772 i = 1;
2773 while (cur != NULL) {
2774 if (!xmlStrcmp(cur->name, (XUC) "variable")) {
2775 tmp = xmlGetProp(cur, (XUC) "name");
2776 if (tmp != NULL) {
2777 if (strcmp((char *) tmp, "catch") && strcmp((char *) tmp, "const")) {
2778 /* temporary hack: allow 'catch', even though it's
2779 officially reserved, on account of Ramanathan
2780 data file data6-2.gdt
2781 */
2782 err = check_varname((const char *) tmp);
2783 }
2784 if (!err) {
2785 transcribe_string(dset->varname[i], (char *) tmp, VNAMELEN);
2786 }
2787 free(tmp);
2788 } else {
2789 gretl_errmsg_sprintf(_("Variable %d has no name"), i);
2790 err = E_DATA;
2791 }
2792 if (err) {
2793 return err;
2794 }
2795 tmp = xmlGetProp(cur, (XUC) "label");
2796 if (tmp != NULL) {
2797 series_set_label(dset, i, (char *) tmp);
2798 free(tmp);
2799 }
2800 tmp = xmlGetProp(cur, (XUC) "displayname");
2801 if (tmp != NULL) {
2802 series_set_display_name(dset, i, (char *) tmp);
2803 free(tmp);
2804 }
2805 tmp = xmlGetProp(cur, (XUC) "parent");
2806 if (tmp != NULL) {
2807 series_set_parent(dset, i, (char *) tmp);
2808 free(tmp);
2809 }
2810 tmp = xmlGetProp(cur, (XUC) "transform");
2811 if (tmp != NULL) {
2812 int ci = gretl_command_number((char *) tmp);
2813
2814 series_set_transform(dset, i, ci);
2815 free(tmp);
2816 }
2817 tmp = xmlGetProp(cur, (XUC) "lag");
2818 if (tmp != NULL) {
2819 series_set_lag(dset, i, atoi((char *) tmp));
2820 free(tmp);
2821 }
2822 tmp = xmlGetProp(cur, (XUC) "compact-method");
2823 if (tmp != NULL) {
2824 series_set_compact_method(dset, i, compact_string_to_int((char *) tmp));
2825 free(tmp);
2826 }
2827 tmp = xmlGetProp(cur, (XUC) "discrete");
2828 if (tmp != NULL) {
2829 if (!strcmp((char *) tmp, "true")) {
2830 series_set_flag(dset, i, VAR_DISCRETE);
2831 }
2832 free(tmp);
2833 }
2834 tmp = xmlGetProp(cur, (XUC) "coded");
2835 if (tmp != NULL) {
2836 if (!strcmp((char *) tmp, "true")) {
2837 series_set_flag(dset, i, VAR_CODED);
2838 }
2839 free(tmp);
2840 }
2841 tmp = xmlGetProp(cur, (XUC) "hf-anchor");
2842 if (tmp != NULL) {
2843 if (!strcmp((char *) tmp, "true")) {
2844 series_set_midas_anchor(dset, i);
2845 }
2846 free(tmp);
2847 }
2848 tmp = xmlGetProp(cur, (XUC) "midas_period");
2849 if (tmp != NULL) {
2850 int mpd = atoi((char *) tmp);
2851
2852 if (mpd > 0) {
2853 series_set_midas_period(dset, i, mpd);
2854 }
2855 free(tmp);
2856 }
2857 tmp = xmlGetProp(cur, (XUC) "midas_freq");
2858 if (tmp != NULL) {
2859 int mpc = atoi((char *) tmp);
2860
2861 if (mpc > 0) {
2862 series_set_midas_freq(dset, i, mpc);
2863 }
2864 free(tmp);
2865 }
2866 tmp = xmlGetProp(cur, (XUC) "orig_pd");
2867 if (tmp != NULL) {
2868 int opd = atoi((char *) tmp);
2869
2870 if (opd > 0) {
2871 series_set_orig_pd(dset, i, opd);
2872 }
2873 free(tmp);
2874 }
2875 i++;
2876 }
2877 cur = cur->next;
2878 }
2879
2880 if (i != dset->v) {
2881 gretl_errmsg_set(_("Number of variables does not match declaration"));
2882 err = E_DATA;
2883 }
2884
2885 return err;
2886 }
2887
process_varlist_subset(xmlNodePtr node,DATASET * dset,int * fullv,const int * vlist)2888 static int process_varlist_subset (xmlNodePtr node, DATASET *dset,
2889 int *fullv, const int *vlist)
2890 {
2891 xmlNodePtr cur;
2892 xmlChar *tmp = xmlGetProp(node, (XUC) "count");
2893 int nv = vlist[0];
2894 int i, k, err = 0;
2895
2896 if (tmp != NULL) {
2897 if (sscanf((char *) tmp, "%d", fullv) != 1) {
2898 err = E_DATA;
2899 }
2900 free(tmp);
2901 } else {
2902 err = E_DATA;
2903 }
2904
2905 if (err) {
2906 return err;
2907 }
2908
2909 *fullv += 1; /* allow for const */
2910
2911 #if GDT_DEBUG
2912 fprintf(stderr, "process_varlist_subset: fullv = %d\n", *fullv);
2913 #endif
2914
2915 cur = node->xmlChildrenNode;
2916 while (cur && xmlIsBlankNode(cur)) {
2917 cur = cur->next;
2918 }
2919
2920 if (cur == NULL) {
2921 gretl_errmsg_set(_("Got no variables"));
2922 return E_DATA;
2923 }
2924
2925 /* allocate the dataset content */
2926
2927 dset->v = nv + 1;
2928 if (!err && dataset_allocate_varnames(dset)) {
2929 err = E_ALLOC;
2930 }
2931 if (!err) {
2932 dset->Z = malloc(dset->v * sizeof *dset->Z);
2933 if (dset->Z == NULL) {
2934 err = E_ALLOC;
2935 }
2936 }
2937
2938 if (err) {
2939 return err;
2940 }
2941
2942 /* actually read the info */
2943
2944 k = i = 1;
2945
2946 while (cur != NULL) {
2947 if (!xmlStrcmp(cur->name, (XUC) "variable")) {
2948 int wanted = 0;
2949
2950 tmp = xmlGetProp(cur, (XUC) "name");
2951 if (tmp != NULL) {
2952 if (in_gretl_list(vlist, i)) {
2953 wanted = 1;
2954 transcribe_string(dset->varname[k], (char *) tmp, VNAMELEN);
2955 }
2956 free(tmp);
2957 } else {
2958 return E_DATA;
2959 }
2960
2961 if (!wanted) {
2962 i++;
2963 cur = cur->next;
2964 continue;
2965 }
2966
2967 tmp = xmlGetProp(cur, (XUC) "label");
2968 if (tmp != NULL) {
2969 series_set_label(dset, k, (char *) tmp);
2970 free(tmp);
2971 }
2972 tmp = xmlGetProp(cur, (XUC) "displayname");
2973 if (tmp != NULL) {
2974 series_set_display_name(dset, k, (char *) tmp);
2975 free(tmp);
2976 }
2977 tmp = xmlGetProp(cur, (XUC) "parent");
2978 if (tmp != NULL) {
2979 series_set_parent(dset, k, (char *) tmp);
2980 free(tmp);
2981 }
2982 tmp = xmlGetProp(cur, (XUC) "transform");
2983 if (tmp != NULL) {
2984 int ci = gretl_command_number((char *) tmp);
2985
2986 series_set_transform(dset, k, ci);
2987 free(tmp);
2988 }
2989 tmp = xmlGetProp(cur, (XUC) "lag");
2990 if (tmp != NULL) {
2991 series_set_lag(dset, k, atoi((char *) tmp));
2992 free(tmp);
2993 }
2994 tmp = xmlGetProp(cur, (XUC) "compact-method");
2995 if (tmp != NULL) {
2996 series_set_compact_method(dset, k, compact_string_to_int((char *) tmp));
2997 free(tmp);
2998 }
2999 tmp = xmlGetProp(cur, (XUC) "discrete");
3000 if (tmp != NULL) {
3001 if (!strcmp((char *) tmp, "true")) {
3002 series_set_flag(dset, k, VAR_DISCRETE);
3003 }
3004 free(tmp);
3005 }
3006 tmp = xmlGetProp(cur, (XUC) "coded");
3007 if (tmp != NULL) {
3008 if (!strcmp((char *) tmp, "true")) {
3009 series_set_flag(dset, k, VAR_CODED);
3010 }
3011 free(tmp);
3012 }
3013 tmp = xmlGetProp(cur, (XUC) "midas_period");
3014 if (tmp != NULL) {
3015 int mpd = atoi((char *) tmp);
3016
3017 if (mpd > 0) {
3018 series_set_midas_period(dset, k, mpd);
3019 }
3020 free(tmp);
3021 }
3022 tmp = xmlGetProp(cur, (XUC) "midas_freq");
3023 if (tmp != NULL) {
3024 int mpf = atoi((char *) tmp);
3025
3026 if (mpf > 0) {
3027 series_set_midas_freq(dset, k, mpf);
3028 }
3029 free(tmp);
3030 }
3031 tmp = xmlGetProp(cur, (XUC) "hf-anchor");
3032 if (tmp != NULL) {
3033 if (!strcmp((char *) tmp, "true")) {
3034 series_set_midas_anchor(dset, k);
3035 }
3036 free(tmp);
3037 }
3038
3039 i++;
3040 k++;
3041 }
3042
3043 cur = cur->next;
3044 }
3045
3046 return err;
3047 }
3048
set_underflow_warning(int n)3049 static void set_underflow_warning (int n)
3050 {
3051 gchar *msg;
3052
3053 msg = g_strdup_printf(_("Data file contains %d subnormal values"), n);
3054 gretl_warnmsg_set(msg);
3055 g_free(msg);
3056 }
3057
3058 /* Read the values for all (or selected) variables at
3059 observation @t */
3060
process_values(DATASET * dset,int t,char * s,int fullv,const int * vlist,int * n_uflow)3061 static int process_values (DATASET *dset,
3062 int t, char *s,
3063 int fullv,
3064 const int *vlist,
3065 int *n_uflow)
3066 {
3067 char *test;
3068 double x;
3069 int i, k = 1;
3070 int err = 0;
3071
3072 gretl_error_clear();
3073
3074 for (i=1; i<fullv && !err; i++) {
3075 while (isspace(*s)) s++;
3076 if (vlist != NULL && !in_gretl_list(vlist, i)) {
3077 s += strcspn(s, " \t\r\n");
3078 } else {
3079 x = strtod(s, &test);
3080 if (errno == ERANGE && SMALLVAL(x)) {
3081 errno = 0; /* underflow, treat as OK? */
3082 fprintf(stderr, "warning, underflow: %g for series %d (%s) at obs %d\n",
3083 x, i, dset->varname[i], t + 1);
3084 *n_uflow += 1;
3085 s = test;
3086 } else if (errno) {
3087 fprintf(stderr, "%s: %d: bad data\n", __FILE__, __LINE__);
3088 perror(NULL);
3089 err = E_DATA;
3090 } else if (!strncmp(test, "NA", 2)) {
3091 x = NADBL;
3092 s = test + 2;
3093 } else if (*test != '\0' && !isspace(*test)) {
3094 err = 1;
3095 } else {
3096 s = test;
3097 }
3098 if (!err && t < dset->n) {
3099 dset->Z[k++][t] = x;
3100 }
3101 }
3102 }
3103
3104 /* check for trailing junk in <obs> line */
3105 s += strspn(s, " \t");
3106 if (*s) {
3107 fprintf(stderr, "Warning: found trailing junk at obs %d:\n'%s'\n",
3108 t + 1, s);
3109 }
3110
3111 if (err && !gretl_errmsg_is_set()) {
3112 gretl_errmsg_sprintf(_("Failed to parse data values at obs %d"), t+1);
3113 }
3114
3115 return err;
3116 }
3117
read_observations(xmlDocPtr doc,xmlNodePtr node,DATASET * dset,double dsize,int binary,double gdtversion,const char * fname)3118 static int read_observations (xmlDocPtr doc, xmlNodePtr node,
3119 DATASET *dset, double dsize,
3120 int binary, double gdtversion,
3121 const char *fname)
3122 {
3123 xmlNodePtr cur;
3124 xmlChar *tmp;
3125 int n, i, t;
3126 int (*show_progress) (double, double, int) = NULL;
3127 int progbar = 0;
3128 int n_uflow = 0;
3129 int err = 0;
3130
3131 tmp = xmlGetProp(node, (XUC) "count");
3132 if (tmp == NULL) {
3133 return E_DATA;
3134 }
3135
3136 if (sscanf((char *) tmp, "%d", &n) == 1) {
3137 dset->n = n;
3138 free(tmp);
3139 } else {
3140 gretl_errmsg_set(_("Failed to parse number of observations"));
3141 free(tmp);
3142 return E_DATA;
3143 }
3144
3145 if (dsize > 100000 && !binary) {
3146 show_progress = get_plugin_function("show_progress");
3147 if (show_progress != NULL) {
3148 progbar = 1;
3149 }
3150 }
3151
3152 tmp = xmlGetProp(node, (XUC) "labels");
3153 if (tmp) {
3154 if (!strcmp((char *) tmp, "true")) {
3155 if (dataset_allocate_obs_markers(dset)) {
3156 return E_ALLOC;
3157 }
3158 }
3159 free(tmp);
3160 } else {
3161 return E_DATA;
3162 }
3163
3164 if (dset->endobs[0] == '\0') {
3165 sprintf(dset->endobs, "%d", dset->n);
3166 }
3167
3168 dset->t2 = dset->n - 1;
3169
3170 for (i=0; i<dset->v; i++) {
3171 dset->Z[i] = malloc(dset->n * sizeof **dset->Z);
3172 if (dset->Z[i] == NULL) {
3173 return E_ALLOC;
3174 }
3175 }
3176
3177 for (t=0; t<dset->n; t++) {
3178 dset->Z[0][t] = 1.0;
3179 }
3180
3181 if (binary) {
3182 err = read_binary_data(fname, dset, binary, gdtversion,
3183 dset->v, NULL);
3184 if (!dset->markers) {
3185 goto bailout;
3186 }
3187 }
3188
3189 /* now get individual obs info: labels and values */
3190 cur = node->xmlChildrenNode;
3191 while (cur && xmlIsBlankNode(cur)) {
3192 cur = cur->next;
3193 }
3194
3195 if (cur == NULL) {
3196 gretl_errmsg_set(_("Got no observations\n"));
3197 return E_DATA;
3198 }
3199
3200 if (progbar) {
3201 (*show_progress)(0, dsize, SP_LOAD_INIT);
3202 #if GDT_DEBUG
3203 fprintf(stderr, "read_observations: inited progess bar (n=%d)\n",
3204 dset->n);
3205 #endif
3206 }
3207
3208 t = 0;
3209 while (cur != NULL) {
3210 if (!xmlStrcmp(cur->name, (XUC) "obs")) {
3211 if (dset->markers) {
3212 tmp = xmlGetProp(cur, (XUC) "label");
3213 if (tmp) {
3214 transcribe_string(dset->S[t], (char *) tmp, OBSLEN);
3215 free(tmp);
3216 } else {
3217 gretl_errmsg_sprintf(_("Case marker missing at obs %d"), t+1);
3218 return E_DATA;
3219 }
3220 }
3221 if (!binary) {
3222 tmp = xmlNodeListGetRawString(doc, cur->xmlChildrenNode, 1);
3223 if (tmp) {
3224 err = process_values(dset, t, (char *) tmp, dset->v, NULL, &n_uflow);
3225 free(tmp);
3226 } else if (dset->v > 1) {
3227 gretl_errmsg_sprintf(_("Values missing at observation %d"), t+1);
3228 err = E_DATA;
3229 }
3230 }
3231 t++;
3232 }
3233
3234 if (err) {
3235 break;
3236 }
3237
3238 cur = cur->next;
3239
3240 if (cur != NULL && t == dset->n) {
3241 /* got too many observations */
3242 t = dset->n + 1;
3243 break;
3244 }
3245
3246 if (progbar && t > 0 && t % 50 == 0) {
3247 (*show_progress) (50, dset->n, SP_NONE);
3248 }
3249 }
3250
3251 bailout:
3252
3253 if (progbar) {
3254 #if GDT_DEBUG
3255 fprintf(stderr, "finalizing progress bar (n = %d)\n", dset->n);
3256 #endif
3257 (*show_progress)(0, dset->n, SP_FINISH);
3258 }
3259
3260 if (!err && t != dset->n) {
3261 gretl_errmsg_set(_("Number of observations does not match declaration"));
3262 err = E_DATA;
3263 }
3264
3265 if (!err && n_uflow > 0) {
3266 set_underflow_warning(n_uflow);
3267 }
3268
3269 return err;
3270 }
3271
read_observations_subset(xmlDocPtr doc,xmlNodePtr node,DATASET * dset,int binary,double gdtversion,const char * fname,int fullv,const int * vlist,gretlopt opt)3272 static int read_observations_subset (xmlDocPtr doc,
3273 xmlNodePtr node,
3274 DATASET *dset,
3275 int binary,
3276 double gdtversion,
3277 const char *fname,
3278 int fullv,
3279 const int *vlist,
3280 gretlopt opt)
3281 {
3282 xmlNodePtr cur;
3283 xmlChar *tmp;
3284 int n, i, t;
3285 int n_uflow = 0;
3286 int err = 0;
3287
3288 tmp = xmlGetProp(node, (XUC) "count");
3289 if (tmp == NULL) {
3290 return E_DATA;
3291 }
3292
3293 if (sscanf((char *) tmp, "%d", &n) == 1) {
3294 dset->n = n;
3295 free(tmp);
3296 } else {
3297 gretl_errmsg_set(_("Failed to parse number of observations"));
3298 free(tmp);
3299 return E_DATA;
3300 }
3301
3302 if (opt & OPT_M) {
3303 tmp = xmlGetProp(node, (XUC) "labels");
3304 if (tmp) {
3305 if (!strcmp((char *) tmp, "true")) {
3306 if (dataset_allocate_obs_markers(dset)) {
3307 return E_ALLOC;
3308 }
3309 }
3310 free(tmp);
3311 } else {
3312 return E_DATA;
3313 }
3314 }
3315
3316 if (dset->endobs[0] == '\0') {
3317 sprintf(dset->endobs, "%d", dset->n);
3318 }
3319
3320 dset->t2 = dset->n - 1;
3321
3322 for (i=0; i<dset->v; i++) {
3323 dset->Z[i] = malloc(dset->n * sizeof **dset->Z);
3324 if (dset->Z[i] == NULL) {
3325 return E_ALLOC;
3326 }
3327 }
3328
3329 for (t=0; t<dset->n; t++) {
3330 dset->Z[0][t] = 1.0;
3331 }
3332
3333 if (binary) {
3334 err = read_binary_data(fname, dset, binary, gdtversion,
3335 fullv, vlist);
3336 if (!dset->markers) {
3337 goto bailout;
3338 }
3339 }
3340
3341 /* now get individual obs info: labels and values */
3342 cur = node->xmlChildrenNode;
3343 while (cur && xmlIsBlankNode(cur)) {
3344 cur = cur->next;
3345 }
3346
3347 if (cur == NULL) {
3348 gretl_errmsg_set(_("Got no observations\n"));
3349 return E_DATA;
3350 }
3351
3352 t = 0;
3353 while (cur != NULL) {
3354 if (!xmlStrcmp(cur->name, (XUC) "obs")) {
3355 if (dset->markers) {
3356 tmp = xmlGetProp(cur, (XUC) "label");
3357 if (tmp) {
3358 transcribe_string(dset->S[t], (char *) tmp, OBSLEN);
3359 free(tmp);
3360 } else {
3361 gretl_errmsg_sprintf(_("Case marker missing at obs %d"), t+1);
3362 return E_DATA;
3363 }
3364 }
3365 if (!binary) {
3366 tmp = xmlNodeListGetRawString(doc, cur->xmlChildrenNode, 1);
3367 if (tmp) {
3368 err = process_values(dset, t, (char *) tmp, fullv, vlist, &n_uflow);
3369 free(tmp);
3370 } else if (dset->v > 1) {
3371 gretl_errmsg_sprintf(_("Values missing at observation %d"), t+1);
3372 err = E_DATA;
3373 }
3374 }
3375 t++;
3376 }
3377
3378 if (err) {
3379 break;
3380 }
3381
3382 cur = cur->next;
3383
3384 if (cur != NULL && t == dset->n) {
3385 /* got too many observations */
3386 t = dset->n + 1;
3387 goto bailout;
3388 }
3389 }
3390
3391 bailout:
3392
3393 if (!err && t != dset->n) {
3394 gretl_errmsg_set(_("Number of observations does not match declaration"));
3395 err = E_DATA;
3396 }
3397
3398 if (!err && n_uflow > 0) {
3399 set_underflow_warning(n_uflow);
3400 }
3401
3402 return err;
3403 }
3404
owner_id(const DATASET * dset,const char * s)3405 static int owner_id (const DATASET *dset, const char *s)
3406 {
3407 int i;
3408
3409 for (i=1; i<dset->v; i++) {
3410 if (!strcmp(s, dset->varname[i])) {
3411 return i;
3412 }
3413 }
3414
3415 return -1;
3416 }
3417
process_string_tables(xmlDocPtr doc,xmlNodePtr node,DATASET * dset,int subset)3418 static int process_string_tables (xmlDocPtr doc,
3419 xmlNodePtr node,
3420 DATASET *dset,
3421 int subset)
3422 {
3423 xmlNodePtr cur = NULL;
3424 xmlChar *tmp;
3425 int ntabs = 0;
3426 int err = 0;
3427
3428 tmp = xmlGetProp(node, (XUC) "count");
3429
3430 if (tmp == NULL) {
3431 err = E_DATA;
3432 } else {
3433 if (sscanf((char *) tmp, "%d", &ntabs) != 1) {
3434 err = E_DATA;
3435 }
3436 free(tmp);
3437 }
3438
3439 #if GDT_DEBUG
3440 fprintf(stderr, "process_string_tables: ntabs=%d, err=%d\n", ntabs, err);
3441 #endif
3442
3443 if (!err) {
3444 cur = node->xmlChildrenNode;
3445 while (cur && xmlIsBlankNode(cur)) {
3446 cur = cur->next;
3447 }
3448 if (cur == NULL) {
3449 err = E_DATA;
3450 }
3451 }
3452
3453 if (err) {
3454 fprintf(stderr, "process_string_tables: returning err = %d\n", err);
3455 return err;
3456 }
3457
3458 while (cur != NULL && !err) {
3459 if (!xmlStrcmp(cur->name, (XUC) "valstrings")) {
3460 xmlChar *owner = xmlGetProp(cur, (XUC) "owner");
3461 series_table *st;
3462 char **strs = NULL;
3463 int v = 0, n_strs = 0;
3464
3465 if (owner == NULL) {
3466 err = E_DATA;
3467 } else {
3468 v = owner_id(dset, (const char *) owner);
3469 if (v <= 0 && !subset) {
3470 fprintf(stderr, "process_string_tables: invalid owner_id\n");
3471 err = E_DATA;
3472 }
3473 }
3474 if (v > 0) {
3475 strs = gretl_xml_get_strings_array(cur, doc, &n_strs,
3476 0, &err);
3477 if (err) {
3478 fprintf(stderr, "process_string_tables: get_strings_array "
3479 "gave error %d\n", err);
3480 } else {
3481 st = series_table_new(strs, n_strs, &err);
3482 if (err) {
3483 strings_array_free(strs, n_strs);
3484 } else {
3485 series_attach_string_table(dset, v, st);
3486 }
3487 }
3488 }
3489 free(owner);
3490 }
3491 if (!err) {
3492 cur = cur->next;
3493 }
3494 }
3495
3496 return err;
3497 }
3498
process_panel_info(xmlNodePtr cur,DATASET * dset,int * repad)3499 static int process_panel_info (xmlNodePtr cur, DATASET *dset,
3500 int *repad)
3501 {
3502 xmlChar *tmp;
3503 double sd0 = 0.0;
3504 int pd = 0;
3505
3506 tmp = xmlGetProp(cur, (XUC) "group-names");
3507 if (tmp != NULL) {
3508 dset->pangrps = (char *) tmp;
3509 }
3510
3511 tmp = xmlGetProp(cur, (XUC) "time-frequency");
3512 if (tmp != NULL) {
3513 pd = atoi((const char *) tmp);
3514 free(tmp);
3515 }
3516
3517 tmp = xmlGetProp(cur, (XUC) "time-start");
3518 if (tmp != NULL) {
3519 sd0 = atof((const char *) tmp);
3520 free(tmp);
3521 }
3522
3523 tmp = xmlGetProp(cur, (XUC) "skip-padding");
3524 if (tmp != NULL) {
3525 *repad = 1;
3526 free(tmp);
3527 }
3528
3529 if (pd > 0 && sd0 > 0.0) {
3530 dset->panel_pd = pd;
3531 dset->panel_sd0 = sd0;
3532 }
3533
3534 return 0;
3535 }
3536
get_gdt_version(xmlNodePtr node)3537 static double get_gdt_version (xmlNodePtr node)
3538 {
3539 xmlChar *tmp = xmlGetProp(node, (XUC) "version");
3540 double v = 1.0;
3541
3542 if (tmp != NULL) {
3543 v = dot_atof((char *) tmp);
3544 free(tmp);
3545 }
3546
3547 return v;
3548 }
3549
xml_get_data_structure(xmlNodePtr node,int * dattype)3550 static int xml_get_data_structure (xmlNodePtr node, int *dattype)
3551 {
3552 xmlChar *tmp = xmlGetProp(node, (XUC) "type");
3553 int err = 0;
3554
3555 if (tmp == NULL) {
3556 gretl_errmsg_set(_("Required attribute 'type' is missing from data file"));
3557 err = 1;
3558 } else {
3559 if (!strcmp((char *) tmp, "cross-section")) {
3560 *dattype = CROSS_SECTION;
3561 } else if (!strcmp((char *) tmp, "time-series")) {
3562 *dattype = TIME_SERIES;
3563 } else if (!strcmp((char *) tmp, "stacked-time-series")) {
3564 *dattype = STACKED_TIME_SERIES;
3565 } else if (!strcmp((char *) tmp, "stacked-cross-section")) {
3566 *dattype = STACKED_CROSS_SECTION;
3567 } else {
3568 gretl_errmsg_set(_("Unrecognized type attribute for data file"));
3569 err = 1;
3570 }
3571 free(tmp);
3572 }
3573
3574 return err;
3575 }
3576
xml_get_data_frequency(xmlNodePtr node,int * pd,int * dattype)3577 static int xml_get_data_frequency (xmlNodePtr node, int *pd, int *dattype)
3578 {
3579 xmlChar *tmp = xmlGetProp(node, (XUC) "frequency");
3580 int err = 0;
3581
3582 *pd = 1;
3583
3584 if (tmp != NULL) {
3585 if (!strncmp((char *) tmp, "special", 7)) {
3586 *dattype = SPECIAL_TIME_SERIES;
3587 if (sscanf((char *) tmp + 7, ":%d", pd) == 1) {
3588 fprintf(stderr, "custom time series, frequency %d\n", *pd);
3589 } else {
3590 fprintf(stderr, "custom time series, using frequency 1\n");
3591 }
3592 } else if (sscanf((char *) tmp, "%d", pd) != 1) {
3593 gretl_errmsg_set(_("Failed to parse data frequency"));
3594 err = 1;
3595 }
3596 free(tmp);
3597 }
3598
3599 return err;
3600 }
3601
likely_calendar(const char * s)3602 static int likely_calendar (const char *s)
3603 {
3604 return strchr(s, '-') || strchr(s, '/');
3605 }
3606
xml_get_startobs(xmlNodePtr node,double * sd0,char * stobs,int caldata)3607 static int xml_get_startobs (xmlNodePtr node, double *sd0, char *stobs,
3608 int caldata)
3609 {
3610 xmlChar *tmp = xmlGetProp(node, (XUC) "startobs");
3611 int err = 0;
3612
3613 if (tmp != NULL) {
3614 char obstr[OBSLEN];
3615 int y, m, d;
3616
3617 obstr[0] = '\0';
3618 strncat(obstr, (char *) tmp, OBSLEN - 1);
3619 gretl_charsub(obstr, ':', '.');
3620
3621 if (sscanf(obstr, "%d/%d/%d", &y, &m, &d) == 3) {
3622 /* handle legacy gdt dates */
3623 gretl_charsub(obstr, '/', '-');
3624 }
3625
3626 if (likely_calendar(obstr) && caldata) {
3627 guint32 ed = get_epoch_day(obstr);
3628
3629 if (ed <= 0) {
3630 err = 1;
3631 } else {
3632 *sd0 = ed;
3633 }
3634 } else {
3635 double x;
3636
3637 if (sscanf(obstr, "%lf", &x) != 1) {
3638 err = 1;
3639 } else {
3640 *sd0 = x;
3641 }
3642 }
3643
3644 if (err) {
3645 gretl_errmsg_set(_("Failed to parse startobs"));
3646 } else {
3647 stobs[0] = '\0';
3648 strncat(stobs, obstr, OBSLEN - 1);
3649 colonize_obs(stobs);
3650 }
3651
3652 free(tmp);
3653 }
3654
3655 return err;
3656 }
3657
xml_get_endobs(xmlNodePtr node,char * endobs,int caldata)3658 static int xml_get_endobs (xmlNodePtr node, char *endobs, int caldata)
3659 {
3660 xmlChar *tmp = xmlGetProp(node, (XUC) "endobs");
3661 int err = 0;
3662
3663 if (tmp != NULL) {
3664 char obstr[OBSLEN];
3665
3666 obstr[0] = '\0';
3667 strncat(obstr, (char *) tmp, OBSLEN - 1);
3668 gretl_charsub(obstr, ':', '.');
3669
3670 if (caldata) {
3671 int y, m, d;
3672 guint32 ed;
3673
3674 if (sscanf(obstr, "%d/%d/%d", &y, &m, &d) == 3) {
3675 /* handle legacy gdt file */
3676 gretl_charsub(obstr, '/', '-');
3677 }
3678
3679 ed = get_epoch_day(obstr);
3680 if (ed <= 0) {
3681 err = 1;
3682 }
3683 } else {
3684 double x;
3685
3686 if (sscanf(obstr, "%lf", &x) != 1) {
3687 err = 1;
3688 }
3689 }
3690
3691 if (err) {
3692 gretl_errmsg_set(_("Failed to parse endobs"));
3693 } else {
3694 endobs[0] = '\0';
3695 strncat(endobs, obstr, OBSLEN - 1);
3696 colonize_obs(endobs);
3697 }
3698
3699 free(tmp);
3700 }
3701
3702 return err;
3703 }
3704
gdt_binary_order(xmlNodePtr node)3705 static int gdt_binary_order (xmlNodePtr node)
3706 {
3707 xmlChar *tmp = xmlGetProp(node, (XUC) "binary");
3708 int ret = 0;
3709
3710 if (tmp != NULL) {
3711 if (!strcmp((char *) tmp, "little-endian")) {
3712 ret = G_LITTLE_ENDIAN;
3713 } else if (!strcmp((char *) tmp, "big-endian")) {
3714 ret = G_BIG_ENDIAN;
3715 }
3716 free(tmp);
3717 }
3718
3719 return ret;
3720 }
3721
lag_from_label(int v,const DATASET * dset,int * lag)3722 static int lag_from_label (int v, const DATASET *dset, int *lag)
3723 {
3724 const char *test = series_get_label(dset, v);
3725 char pm, fmt[20], vname[VNAMELEN];
3726 int pv = 0;
3727
3728 if (test == NULL) {
3729 return 0;
3730 }
3731
3732 sprintf(fmt, "= %%%d[^(](t %%c %%d)", VNAMELEN - 1);
3733
3734 if (sscanf(test, fmt, vname, &pm, lag) == 3) {
3735 pv = series_index(dset, vname);
3736 pv = (pv < dset->v)? pv : 0;
3737 }
3738
3739 return pv;
3740 }
3741
dummy_child_from_label(int v,const DATASET * dset)3742 static int dummy_child_from_label (int v, const DATASET *dset)
3743 {
3744 const char *test = series_get_label(dset, v);
3745 char vname[VNAMELEN];
3746 double val;
3747 int pv = 0;
3748
3749 if (test == NULL) {
3750 return 0;
3751 }
3752
3753 if (sscanf(test, _("dummy for %s = %lf"), vname, &val) == 2 ||
3754 sscanf(test, "dummy for %s = %lf", vname, &val) == 2) {
3755 pv = series_index(dset, vname);
3756 pv = (pv < dset->v)? pv : 0;
3757 }
3758
3759 return pv;
3760 }
3761
record_transform_info(DATASET * dset,double version)3762 static void record_transform_info (DATASET *dset, double version)
3763 {
3764 int i, p, pv;
3765
3766 for (i=1; i<dset->v; i++) {
3767 if (series_get_transform(dset, i) == LAGS) {
3768 /* already handled */
3769 continue;
3770 }
3771 pv = lag_from_label(i, dset, &p);
3772 if (pv > 0) {
3773 series_set_parent(dset, i, dset->varname[pv]);
3774 series_set_transform(dset, i, LAGS);
3775 series_set_lag(dset, i, p);
3776 } else if (version < 1.1) {
3777 pv = dummy_child_from_label(i, dset);
3778 if (pv > 0) {
3779 series_set_parent(dset, i, dset->varname[pv]);
3780 series_set_transform(dset, i, DUMMIFY);
3781 }
3782 }
3783 }
3784 }
3785
data_read_message(const char * fname,DATASET * dset,PRN * prn)3786 static void data_read_message (const char *fname, DATASET *dset, PRN *prn)
3787 {
3788 pprintf(prn, _("\nRead datafile %s\n"), fname);
3789 pprintf(prn, _("periodicity: %d, maxobs: %d\n"
3790 "observations range: %s to %s\n"),
3791 (custom_time_series(dset))? 1 : dset->pd,
3792 dset->n, dset->stobs, dset->endobs);
3793 pputc(prn, '\n');
3794 }
3795
get_filesize(const char * fname)3796 static long get_filesize (const char *fname)
3797 {
3798 struct stat buf;
3799 int err;
3800
3801 err = gretl_stat(fname, &buf);
3802
3803 return (err)? -1 : buf.st_size;
3804 }
3805
remedy_empty_data(DATASET * dset)3806 static int remedy_empty_data (DATASET *dset)
3807 {
3808 int err = dataset_add_series(dset, 1);
3809
3810 if (!err) {
3811 int t;
3812
3813 strcpy(dset->varname[1], "index");
3814 series_set_label(dset, 1, _("index variable"));
3815 for (t=0; t<dset->n; t++) {
3816 dset->Z[1][t] = (double) (t + 1);
3817 }
3818 }
3819
3820 return err;
3821 }
3822
check_for_daily_date_strings(DATASET * dset)3823 static void check_for_daily_date_strings (DATASET *dset)
3824 {
3825 int m, d, n;
3826 int y1 = 0, y2 = 0;
3827 int oldfmt = 0;
3828
3829 n = sscanf(dset->S[0], YMD_READ_FMT, &y1, &m, &d);
3830 if (n != 3) {
3831 oldfmt = 1;
3832 n = sscanf(dset->S[0], "%d/%d/%d", &y1, &m, &d);
3833 }
3834
3835 if (n == 3) {
3836 int k = dset->n - 1;
3837
3838 if (oldfmt) {
3839 n = sscanf(dset->S[k], "%d/%d/%d", &y2, &m, &d);
3840 } else {
3841 n = sscanf(dset->S[k], YMD_READ_FMT, &y2, &m, &d);
3842 }
3843 }
3844
3845 if (n == 3 && y2 >= y1) {
3846 dset->markers = DAILY_DATE_STRINGS;
3847 }
3848 }
3849
replace_panel_padding(DATASET * dset)3850 static int replace_panel_padding (DATASET *dset)
3851 {
3852 int uv = dset->v - 2;
3853 int tv = dset->v - 1;
3854 int err = 0;
3855
3856 if (!strcmp(dset->varname[uv], "unit__") &&
3857 !strcmp(dset->varname[tv], "time__")) {
3858 err = set_panel_structure_from_vars(uv, tv, dset);
3859 if (!err) {
3860 dataset_drop_last_variables(dset, 2);
3861 }
3862 }
3863
3864 return err;
3865 }
3866
future_datafile_warning(double v1,double v2)3867 static void future_datafile_warning (double v1, double v2)
3868 {
3869 const char *fmt =
3870 _("The version of this datafile (%g) is higher than that fully\n"
3871 "supported by this build of gretl (%g). Some features may not be\n"
3872 "correctly recognized.\n");
3873
3874 gretl_warnmsg_sprintf(fmt, v1, v2);
3875 }
3876
real_read_gdt(const char * fname,const char * srcname,DATASET * dset,gretlopt opt,PRN * prn)3877 static int real_read_gdt (const char *fname, const char *srcname,
3878 DATASET *dset, gretlopt opt, PRN *prn)
3879 {
3880 DATASET *tmpset;
3881 xmlDocPtr doc = NULL;
3882 xmlNodePtr cur;
3883 int gotvars = 0, gotobs = 0, err = 0;
3884 int caldata = 0, repad = 0;
3885 double gdtversion = 1.0;
3886 double myversion;
3887 int in_c_locale = 0;
3888 int gz, binary = 0;
3889 long fsz;
3890
3891 gretl_error_clear();
3892 fsz = get_filesize(fname);
3893 gz = is_gzipped(fname);
3894
3895 if (fsz < 0) {
3896 return E_FOPEN;
3897 } else if (fsz > 100000) {
3898 fprintf(stderr, "%s %.0f Kbytes %s...\n",
3899 gz ? "Uncompressing" : "Reading",
3900 (double) fsz / 1024.0, "of data");
3901 }
3902
3903 tmpset = datainfo_new();
3904 if (tmpset == NULL) {
3905 err = E_ALLOC;
3906 goto bailout;
3907 }
3908
3909 err = gretl_xml_open_doc_root(fname, "gretldata",
3910 &doc, &cur);
3911 if (err) {
3912 goto bailout;
3913 }
3914
3915 gdtversion = get_gdt_version(cur);
3916 myversion = dot_atof(GRETLDATA_VERSION);
3917 if (gdtversion > myversion) {
3918 future_datafile_warning(gdtversion, myversion);
3919 }
3920
3921 /* optional */
3922 gretl_xml_get_prop_as_unsigned_int(cur, "rseed", &tmpset->rseed);
3923
3924 /* optional */
3925 gretl_xml_get_prop_as_string(cur, "mapfile", &tmpset->mapfile);
3926
3927 /* set some required datainfo parameters */
3928
3929 err = xml_get_data_structure(cur, &tmpset->structure);
3930 if (err) {
3931 goto bailout;
3932 }
3933
3934 err = xml_get_data_frequency(cur, &tmpset->pd, &tmpset->structure);
3935 if (err) {
3936 goto bailout;
3937 }
3938
3939 gretl_push_c_numeric_locale();
3940 in_c_locale = 1;
3941
3942 strcpy(tmpset->stobs, "1");
3943 caldata = dataset_is_daily(tmpset) || dataset_is_weekly(tmpset);
3944
3945 err = xml_get_startobs(cur, &tmpset->sd0, tmpset->stobs, caldata);
3946 if (err) {
3947 goto bailout;
3948 }
3949
3950 *tmpset->endobs = '\0';
3951 caldata = calendar_data(tmpset);
3952
3953 err = xml_get_endobs(cur, tmpset->endobs, caldata);
3954 if (err) {
3955 goto bailout;
3956 }
3957
3958 binary = gdt_binary_order(cur);
3959
3960 #if GDT_DEBUG
3961 fprintf(stderr, "starting to walk XML tree...\n");
3962 #endif
3963
3964 /* Now walk the tree */
3965 cur = cur->xmlChildrenNode;
3966 while (cur != NULL && !err) {
3967 if (!xmlStrcmp(cur->name, (XUC) "description")) {
3968 tmpset->descrip = (char *)
3969 xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
3970 } else if (!xmlStrcmp(cur->name, (XUC) "variables")) {
3971 err = process_varlist(cur, tmpset, 0);
3972 if (err) {
3973 fprintf(stderr, "error processing varlist\n");
3974 } else {
3975 gotvars = 1;
3976 }
3977 } else if (!xmlStrcmp(cur->name, (XUC) "observations")) {
3978 if (!gotvars) {
3979 gretl_errmsg_set(_("Variables information is missing"));
3980 err = 1;
3981 } else {
3982 double dsize = (opt & OPT_B)? (double) fsz : 0;
3983
3984 err = read_observations(doc, cur, tmpset, dsize,
3985 binary, gdtversion, fname);
3986 if (err) {
3987 fprintf(stderr, "error %d in read_observations\n", err);
3988 } else {
3989 gotobs = 1;
3990 }
3991 }
3992 } else if (!xmlStrcmp(cur->name, (XUC) "string-tables")) {
3993 if (!gotvars) {
3994 gretl_errmsg_set(_("Variables information is missing"));
3995 err = E_DATA;
3996 } else {
3997 err = process_string_tables(doc, cur, tmpset, 0);
3998 if (err) {
3999 fprintf(stderr, "error %d processing string tables\n", err);
4000 }
4001 }
4002 } else if (!xmlStrcmp(cur->name, (XUC) "panel-info")) {
4003 if (!gotvars) {
4004 gretl_errmsg_set(_("Variables information is missing"));
4005 err = E_DATA;
4006 } else {
4007 err = process_panel_info(cur, tmpset, &repad);
4008 if (err) {
4009 fprintf(stderr, "error %d processing panel info\n", err);
4010 }
4011 }
4012 }
4013 if (!err) {
4014 cur = cur->next;
4015 }
4016 }
4017
4018 #if GDT_DEBUG
4019 fprintf(stderr, "done walking XML tree, err = %d\n", err);
4020 #endif
4021
4022 if (!err && !gotvars) {
4023 gretl_errmsg_set(_("Variables information is missing"));
4024 err = 1;
4025 }
4026
4027 if (!err && !gotobs) {
4028 gretl_errmsg_set(_("No observations were found"));
4029 err = 1;
4030 }
4031
4032 if (!err && caldata && tmpset->S != NULL) {
4033 check_for_daily_date_strings(tmpset);
4034 }
4035
4036 if (!err && repad) {
4037 err = replace_panel_padding(tmpset);
4038 }
4039
4040 if (!err) {
4041 if (srcname == NULL) {
4042 srcname = fname;
4043 }
4044 data_read_message(srcname, tmpset, prn);
4045 err = merge_or_replace_data(dset, &tmpset,
4046 get_merge_opts(opt), prn);
4047 }
4048
4049 bailout:
4050
4051 if (in_c_locale) {
4052 gretl_pop_c_numeric_locale();
4053 }
4054
4055 if (doc != NULL) {
4056 xmlFreeDoc(doc);
4057 }
4058
4059 if (dset != NULL) {
4060 /* pre-process stacked cross-sectional panels: put into canonical
4061 stacked time series form
4062 */
4063 if (!err && dset->structure == STACKED_CROSS_SECTION) {
4064 err = switch_panel_orientation(dset);
4065 }
4066
4067 if (!err && dset->v == 1) {
4068 err = remedy_empty_data(dset);
4069 }
4070
4071 if (!err && gdtversion < 1.2) {
4072 record_transform_info(dset, gdtversion);
4073 }
4074 }
4075
4076 if (err && tmpset != NULL) {
4077 destroy_dataset(tmpset);
4078 }
4079
4080 if (err && err != E_ALLOC) {
4081 /* ensure we don't just show "unspecified error" */
4082 gchar *msg;
4083
4084 if (err == E_DATA) {
4085 msg = g_strdup_printf(_("'%s': malformed gdt file"), fname);
4086 } else {
4087 msg = g_strdup_printf(_("Couldn't read '%s'"), fname);
4088 }
4089
4090 gretl_errmsg_ensure(msg);
4091 g_free(msg);
4092 }
4093
4094 #if GDT_DEBUG
4095 fprintf(stderr, "gretl_read_gdt: returning %d\n", err);
4096 #endif
4097
4098 return err;
4099 }
4100
real_read_gdt_subset(const char * fname,DATASET * dset,const int * vlist,gretlopt opt)4101 static int real_read_gdt_subset (const char *fname,
4102 DATASET *dset,
4103 const int *vlist,
4104 gretlopt opt)
4105 {
4106 DATASET *tmpset;
4107 xmlDocPtr doc = NULL;
4108 xmlNodePtr cur;
4109 double gdtversion = 1.0;
4110 int gotvars = 0, gotobs = 0;
4111 int caldata = 0;
4112 int in_c_locale = 0;
4113 int binary = 0;
4114 int fullv = 0;
4115 int err = 0;
4116
4117 gretl_error_clear();
4118
4119 tmpset = datainfo_new();
4120 if (tmpset == NULL) {
4121 err = E_ALLOC;
4122 goto bailout;
4123 }
4124
4125 err = gretl_xml_open_doc_root(fname, "gretldata", &doc, &cur);
4126 if (err) {
4127 goto bailout;
4128 }
4129
4130 /* set some datainfo parameters */
4131
4132 err = xml_get_data_structure(cur, &tmpset->structure);
4133 if (err) {
4134 goto bailout;
4135 }
4136
4137 err = xml_get_data_frequency(cur, &tmpset->pd, &tmpset->structure);
4138 if (err) {
4139 goto bailout;
4140 }
4141
4142 gdtversion = get_gdt_version(cur);
4143
4144 gretl_push_c_numeric_locale();
4145 in_c_locale = 1;
4146
4147 strcpy(tmpset->stobs, "1");
4148 caldata = dataset_is_daily(tmpset) || dataset_is_weekly(tmpset);
4149
4150 err = xml_get_startobs(cur, &tmpset->sd0, tmpset->stobs, caldata);
4151 if (err) {
4152 goto bailout;
4153 }
4154
4155 *tmpset->endobs = '\0';
4156 caldata = calendar_data(tmpset);
4157
4158 err = xml_get_endobs(cur, tmpset->endobs, caldata);
4159 if (err) {
4160 goto bailout;
4161 }
4162
4163 binary = gdt_binary_order(cur);
4164
4165 #if GDT_DEBUG
4166 fprintf(stderr, "%s: starting to walk XML tree...\n", fname);
4167 #endif
4168
4169 /* Now walk the tree */
4170 cur = cur->xmlChildrenNode;
4171 while (cur != NULL && !err) {
4172 if (!xmlStrcmp(cur->name, (XUC) "variables")) {
4173 err = process_varlist_subset(cur, tmpset, &fullv, vlist);
4174 if (!err) {
4175 gotvars = 1;
4176 }
4177 } else if (!xmlStrcmp(cur->name, (XUC) "observations")) {
4178 if (!gotvars) {
4179 gretl_errmsg_set(_("Variables information is missing"));
4180 err = E_DATA;
4181 } else {
4182 err = read_observations_subset(doc, cur, tmpset,
4183 binary, gdtversion,
4184 fname, fullv, vlist,
4185 opt);
4186 }
4187 if (!err) {
4188 gotobs = 1;
4189 }
4190 } else if (!xmlStrcmp(cur->name, (XUC) "string-tables")) {
4191 if (!gotvars) {
4192 gretl_errmsg_set(_("Variables information is missing"));
4193 err = 1;
4194 } else {
4195 err = process_string_tables(doc, cur, tmpset, 1);
4196 }
4197 }
4198 if (!err) {
4199 cur = cur->next;
4200 }
4201 }
4202
4203 #if GDT_DEBUG
4204 fprintf(stderr, "done walking XML tree...\n");
4205 #endif
4206
4207 if (!err && !gotvars) {
4208 gretl_errmsg_set(_("Variables information is missing"));
4209 err = 1;
4210 }
4211
4212 if (!err && !gotobs) {
4213 gretl_errmsg_set(_("No observations were found"));
4214 err = 1;
4215 }
4216
4217 if (!err && caldata && tmpset->S != NULL) {
4218 check_for_daily_date_strings(tmpset);
4219 }
4220
4221 bailout:
4222
4223 if (in_c_locale) {
4224 gretl_pop_c_numeric_locale();
4225 }
4226
4227 if (doc != NULL) {
4228 xmlFreeDoc(doc);
4229 }
4230
4231 if (!err) {
4232 *dset = *tmpset;
4233 free(tmpset);
4234 } else {
4235 destroy_dataset(tmpset);
4236 }
4237
4238 #if GDT_DEBUG
4239 fprintf(stderr, "real_read_gdt_subset: returning %d\n", err);
4240 #endif
4241
4242 return err;
4243 }
4244
real_read_gdt_varnames(const char * fname,char *** vnames,int * nvars)4245 static int real_read_gdt_varnames (const char *fname,
4246 char ***vnames,
4247 int *nvars)
4248 {
4249 DATASET *tmpset;
4250 xmlDocPtr doc = NULL;
4251 xmlNodePtr cur;
4252 int gotvars = 0;
4253 int caldata = 0;
4254 int in_c_locale = 0;
4255 int err = 0;
4256
4257 gretl_error_clear();
4258
4259 tmpset = datainfo_new();
4260 if (tmpset == NULL) {
4261 err = E_ALLOC;
4262 goto bailout;
4263 }
4264
4265 err = gretl_xml_open_doc_root(fname, "gretldata", &doc, &cur);
4266 if (err) {
4267 goto bailout;
4268 }
4269
4270 /* set some datainfo parameters */
4271
4272 err = xml_get_data_structure(cur, &tmpset->structure);
4273 if (err) {
4274 goto bailout;
4275 }
4276
4277 err = xml_get_data_frequency(cur, &tmpset->pd, &tmpset->structure);
4278 if (err) {
4279 goto bailout;
4280 }
4281
4282 gretl_push_c_numeric_locale();
4283 in_c_locale = 1;
4284
4285 strcpy(tmpset->stobs, "1");
4286 caldata = dataset_is_daily(tmpset) || dataset_is_weekly(tmpset);
4287
4288 err = xml_get_startobs(cur, &tmpset->sd0, tmpset->stobs, caldata);
4289 if (err) {
4290 goto bailout;
4291 }
4292
4293 *tmpset->endobs = '\0';
4294 caldata = calendar_data(tmpset);
4295
4296 err = xml_get_endobs(cur, tmpset->endobs, caldata);
4297 if (err) {
4298 goto bailout;
4299 }
4300
4301 /* Now walk the tree */
4302 cur = cur->xmlChildrenNode;
4303 while (cur != NULL && !err) {
4304 if (!xmlStrcmp(cur->name, (XUC) "variables")) {
4305 err = process_varlist(cur, tmpset, 1);
4306 if (!err) {
4307 gotvars = 1;
4308 }
4309 break;
4310 }
4311 cur = cur->next;
4312 }
4313
4314 if (!err && !gotvars) {
4315 gretl_errmsg_set(_("Variables information is missing"));
4316 err = 1;
4317 }
4318
4319 bailout:
4320
4321 if (in_c_locale) {
4322 gretl_pop_c_numeric_locale();
4323 }
4324
4325 if (doc != NULL) {
4326 xmlFreeDoc(doc);
4327 }
4328
4329 if (!err) {
4330 *vnames = tmpset->varname;
4331 *nvars = tmpset->v;
4332 tmpset->varname = NULL;
4333 }
4334
4335 destroy_dataset(tmpset);
4336
4337 return err;
4338 }
4339
read_gbin(const char * fname,DATASET * dset,gretlopt opt,PRN * prn)4340 static int read_gbin (const char *fname, DATASET *dset,
4341 gretlopt opt, PRN *prn)
4342 {
4343 int (*reader) (const char *, DATASET *, gretlopt,
4344 PRN *prn);
4345 int err = 0;
4346
4347 reader = get_plugin_function("purebin_read_data");
4348
4349 if (reader == NULL) {
4350 err = 1;
4351 } else {
4352 err = (*reader)(fname, dset, opt, prn);
4353 }
4354
4355 return err;
4356 }
4357
read_gbin_subset(const char * fname,DATASET * dset,int * vlist,gretlopt opt)4358 static int read_gbin_subset (const char *fname, DATASET *dset,
4359 int *vlist, gretlopt opt)
4360 {
4361 int (*reader) (const char *, DATASET *, int *, gretlopt);
4362 int err = 0;
4363
4364 reader = get_plugin_function("purebin_read_subset");
4365
4366 if (reader == NULL) {
4367 err = 1;
4368 } else {
4369 err = (*reader)(fname, dset, vlist, opt);
4370 }
4371
4372 return err;
4373 }
4374
read_gbin_varnames(const char * fname,char *** pS,int * pns)4375 static int read_gbin_varnames (const char *fname, char ***pS,
4376 int *pns)
4377 {
4378 int (*reader) (const char *, char ***, int *);
4379 int err = 0;
4380
4381 reader = get_plugin_function("purebin_read_varnames");
4382
4383 if (reader == NULL) {
4384 err = 1;
4385 } else {
4386 err = (*reader)(fname, pS, pns);
4387 }
4388
4389 return err;
4390 }
4391
is_purebin_file(const char * fname)4392 static int is_purebin_file (const char *fname)
4393 {
4394 FILE *fp = gretl_fopen(fname, "rb");
4395 int ret = 0;
4396
4397 if (fp != NULL) {
4398 char buf[16];
4399
4400 if (fread(buf, 1, 14, fp) == 14 &&
4401 !strcmp(buf, "gretl-purebin")) {
4402 ret = 1;
4403 }
4404 fclose(fp);
4405 }
4406
4407 return ret;
4408 }
4409
4410 /**
4411 * gretl_read_gdt:
4412 * @fname: name of file to open for reading.
4413 * @dset: dataset struct.
4414 * @opt: use OPT_B to display gui progress bar; may also
4415 * use OPT_T when appending to panel data (see the "append"
4416 * command in the gretl manual). Otherwise use OPT_NONE.
4417 * @prn: where any messages should be written.
4418 *
4419 * Read data from native file into gretl's workspace.
4420 *
4421 * Returns: 0 on successful completion, non-zero otherwise.
4422 */
4423
gretl_read_gdt(const char * fname,DATASET * dset,gretlopt opt,PRN * prn)4424 int gretl_read_gdt (const char *fname, DATASET *dset,
4425 gretlopt opt, PRN *prn)
4426 {
4427 int gdtb = has_suffix(fname, ".gdtb");
4428
4429 if (gdtb && is_purebin_file(fname)) {
4430 return read_gbin(fname, dset, opt, prn);
4431 } else if (gdtb) {
4432 /* zipfile with gdt + binary */
4433 gchar *zdir;
4434 int id = -1;
4435 int err;
4436
4437 #ifdef HAVE_MPI
4438 if (gretl_mpi_initialized()) {
4439 id = gretl_mpi_rank();
4440 }
4441 #endif
4442 if (id >= 0) {
4443 zdir = g_strdup_printf("%stmp%d-unzip", gretl_dotdir(), id);
4444 } else {
4445 zdir = g_strdup_printf("%stmp-unzip", gretl_dotdir());
4446 }
4447 err = gretl_mkdir(zdir);
4448
4449 if (!err) {
4450 err = gretl_unzip_into(fname, zdir);
4451 if (err) {
4452 gretl_errmsg_ensure("Problem opening data file");
4453 } else {
4454 char xmlfile[FILENAME_MAX];
4455
4456 gretl_build_path(xmlfile, zdir, "data.xml", NULL);
4457 err = real_read_gdt(xmlfile, fname, dset, opt, prn);
4458 }
4459 gretl_deltree(zdir);
4460 }
4461
4462 g_free(zdir);
4463 return err;
4464 } else {
4465 /* plain XML file */
4466 return real_read_gdt(fname, NULL, dset, opt, prn);
4467 }
4468 }
4469
4470 /**
4471 * gretl_read_gdt_subset:
4472 * @fname: name of file to open for reading.
4473 * @dset: dataset struct.
4474 * @vlist: list of series to extract.
4475 * @opt: may include OPT_M to retrieve the observation
4476 * markers associated with the data, if any.
4477 *
4478 * Read specified series from native file into @dset,
4479 * which should be "empty" on input.
4480 *
4481 * Returns: 0 on successful completion, non-zero otherwise.
4482 */
4483
gretl_read_gdt_subset(const char * fname,DATASET * dset,int * vlist,gretlopt opt)4484 int gretl_read_gdt_subset (const char *fname, DATASET *dset,
4485 int *vlist, gretlopt opt)
4486 {
4487 int gdtb = has_suffix(fname, ".gdtb");
4488 int err = 0;
4489
4490 if (gdtb && is_purebin_file(fname)) {
4491 err = read_gbin_subset(fname, dset, vlist, opt);
4492 } else if (gdtb) {
4493 /* zipfile with gdt + binary */
4494 gchar *zdir;
4495 int err;
4496
4497 zdir = g_strdup_printf("%stmp-unzip", gretl_dotdir());
4498 err = gretl_mkdir(zdir);
4499
4500 if (!err) {
4501 err = gretl_unzip_into(fname, zdir);
4502 if (err) {
4503 gretl_errmsg_ensure("Problem opening data file");
4504 } else {
4505 char xmlfile[FILENAME_MAX];
4506
4507 gretl_build_path(xmlfile, zdir, "data.xml", NULL);
4508 err = real_read_gdt_subset(xmlfile, dset, vlist, opt);
4509 }
4510 gretl_deltree(zdir);
4511 }
4512
4513 g_free(zdir);
4514 } else {
4515 /* plain XML file */
4516 err = real_read_gdt_subset(fname, dset, vlist, opt);
4517 }
4518
4519 #if GDT_DEBUG
4520 fprintf(stderr, "gretl_read_gdt_subset: returning %d\n", err);
4521 #endif
4522
4523 return err;
4524 }
4525
4526 /**
4527 * gretl_read_gdt_varnames:
4528 * @fname: name of file to open for reading.
4529 * @vnames: location to receive array of series names.
4530 * @nvars: location to receive the number of series.
4531 *
4532 * Read the array of series names from the specified file.
4533 *
4534 * Returns: 0 on successful completion, non-zero otherwise.
4535 */
4536
gretl_read_gdt_varnames(const char * fname,char *** vnames,int * nvars)4537 int gretl_read_gdt_varnames (const char *fname,
4538 char ***vnames,
4539 int *nvars)
4540 {
4541 int gdtb = has_suffix(fname, ".gdtb");
4542 int err = 0;
4543
4544 if (gdtb && is_purebin_file(fname)) {
4545 err = read_gbin_varnames(fname, vnames, nvars);
4546 } else if (gdtb) {
4547 /* zipfile with gdt + binary */
4548 gchar *zdir;
4549 int err;
4550
4551 zdir = g_strdup_printf("%stmp-unzip", gretl_dotdir());
4552 err = gretl_mkdir(zdir);
4553
4554 if (!err) {
4555 err = gretl_unzip_into(fname, zdir);
4556 if (err) {
4557 gretl_errmsg_ensure("Problem opening data file");
4558 } else {
4559 char xmlfile[FILENAME_MAX];
4560
4561 gretl_build_path(xmlfile, zdir, "data.xml", NULL);
4562 err = real_read_gdt_varnames(xmlfile, vnames,
4563 nvars);
4564 }
4565 gretl_deltree(zdir);
4566 }
4567
4568 g_free(zdir);
4569 } else {
4570 /* plain XML file */
4571 err = real_read_gdt_varnames(fname, vnames, nvars);
4572 }
4573
4574 return err;
4575 }
4576
4577 /**
4578 * gretl_get_gdt_description:
4579 * @fname: name of file to try.
4580 *
4581 * Read data description for gretl native data file.
4582 *
4583 * Returns: buffer containing description, or NULL on failure.
4584 */
4585
gretl_get_gdt_description(const char * fname,int * err)4586 char *gretl_get_gdt_description (const char *fname, int *err)
4587 {
4588 xmlDocPtr doc;
4589 xmlNodePtr cur;
4590 int found = 0;
4591 xmlChar *buf = NULL;
4592
4593 gretl_error_clear();
4594
4595 if (has_suffix(fname, ".gdtb")) {
4596 gretl_errmsg_set("Binary data file, cannot access description");
4597 *err = E_DATA;
4598 return NULL;
4599 }
4600
4601 *err = gretl_xml_open_doc_root(fname, "gretldata", &doc, &cur);
4602 if (*err) {
4603 return NULL;
4604 }
4605
4606 cur = cur->xmlChildrenNode;
4607 while (cur != NULL && !found) {
4608 if (!xmlStrcmp(cur->name, (XUC) "description")) {
4609 buf = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
4610 found = 1;
4611 }
4612 cur = cur->next;
4613 }
4614
4615 if (!found) {
4616 gretl_errmsg_set("No description was found");
4617 *err = E_DATA;
4618 }
4619
4620 xmlFreeDoc(doc);
4621
4622 return (char *) buf;
4623 }
4624
gretl_xml_get_doc_type(const char * fname,int * err)4625 static char *gretl_xml_get_doc_type (const char *fname, int *err)
4626 {
4627 xmlDocPtr doc;
4628 xmlNodePtr node;
4629 char *ret = NULL;
4630
4631 doc = xmlParseFile(fname);
4632
4633 if (doc == NULL) {
4634 gretl_errmsg_sprintf(_("xmlParseFile failed on %s"), fname);
4635 *err = E_DATA;
4636 } else {
4637 node = xmlDocGetRootElement(doc);
4638 if (node == NULL) {
4639 gretl_errmsg_sprintf(_("%s: empty document"), fname);
4640 *err = E_DATA;
4641 } else {
4642 ret = gretl_strdup((char *) node->name);
4643 if (ret == NULL) {
4644 *err = E_ALLOC;
4645 }
4646 }
4647 }
4648
4649 if (doc != NULL) {
4650 xmlFreeDoc(doc);
4651 }
4652
4653 return ret;
4654 }
4655
4656 /* This is called in response to the "include" command in
4657 the CLI program, the GUI program, and in interact.c,
4658 if we detect that the named file is XML.
4659 */
4660
load_XML_functions_file(const char * fname,gretlopt opt,PRN * prn)4661 int load_XML_functions_file (const char *fname, gretlopt opt, PRN *prn)
4662 {
4663 char *rootname = NULL;
4664 int err = 0;
4665
4666 rootname = gretl_xml_get_doc_type(fname, &err);
4667 if (err) {
4668 return err;
4669 }
4670
4671 if (!strcmp(rootname, "gretl-functions")) {
4672 if (has_suffix(fname, ".gfn")) {
4673 err = include_gfn(fname, opt, prn);
4674 } else {
4675 err = read_session_functions_file(fname);
4676 }
4677 } else {
4678 err = E_DATA;
4679 }
4680
4681 free(rootname);
4682
4683 return err;
4684 }
4685
gretl_xml_init(void)4686 void gretl_xml_init (void)
4687 {
4688 xmlInitParser();
4689 }
4690
gretl_xml_cleanup(void)4691 void gretl_xml_cleanup (void)
4692 {
4693 xmlCleanupParser();
4694 }
4695