1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #define FULL_XML_HEADERS
21 #include "libgretl.h"
22 #include "version.h"
23 #include "gretl_xml.h"
24 #include "gretl_string_table.h"
25 #include "csvdata.h"
26 #include "importer.h"
27
28 #include <errno.h>
29
30 #ifdef WIN32
31 # include "gretl_win32.h"
32 #else
33 # include <unistd.h>
34 #endif
35
36 #include <gtk/gtk.h>
37
38 #define ODS_IMPORTER
39
40 #include "import_common.c"
41
42 #define ODEBUG 0
43
44 enum {
45 ODS_NONE,
46 ODS_NUMERIC,
47 ODS_DATE,
48 ODS_TIME,
49 ODS_BOOL,
50 ODS_STRING
51 };
52
53 #define XOFF_UNDEF 999999
54
55 typedef struct ods_table_ ods_table;
56 typedef struct ods_sheet_ ods_sheet;
57
58 struct ods_table_ {
59 char *name;
60 xmlNodePtr node;
61 int rows; /* total rows defined */
62 int cols; /* total columns */
63 int xoffset; /* offset to first non-blank column */
64 int yoffset; /* offset to first non-blank row */
65 int empty; /* has any content (0) or not (1) */
66 };
67
68 struct ods_sheet_ {
69 int flags;
70 xmlDocPtr doc; /* document pointer */
71 int n_tables; /* number of tables */
72 ods_table **tables; /* pointers to table info */
73 int seltab; /* number of selected table */
74 int xoffset; /* col offset chosen by user */
75 int yoffset; /* row offset chosen by user */
76 DATASET *dset; /* dataset struct */
77 int *codelist; /* list of string-valued variables */
78 gretl_string_table *st; /* table for the above */
79 };
80
ods_table_new(xmlNodePtr node,int * err)81 static ods_table *ods_table_new (xmlNodePtr node, int *err)
82 {
83 ods_table *tab = NULL;
84 char *name;
85
86 name = (char *) xmlGetProp(node, (XUC) "name");
87 if (name == NULL) {
88 *err = E_DATA;
89 return NULL;
90 }
91
92 tab = malloc(sizeof *tab);
93
94 if (tab != NULL) {
95 tab->name = name;
96 tab->node = node;
97 tab->rows = tab->cols = 0;
98 tab->xoffset = XOFF_UNDEF;
99 tab->yoffset = 0;
100 tab->empty = 1;
101 } else {
102 *err = E_ALLOC;
103 free(name);
104 }
105
106 return tab;
107 }
108
109 static int
ods_sheet_add_table(ods_sheet * sheet,ods_table * tab)110 ods_sheet_add_table (ods_sheet *sheet, ods_table *tab)
111 {
112 int n = sheet->n_tables;
113 ods_table **tabs = NULL;
114
115 tabs = realloc(sheet->tables, (n+1) * sizeof *sheet->tables);
116 if (tabs == NULL) {
117 return E_ALLOC;
118 }
119
120 sheet->tables = tabs;
121 sheet->tables[n] = tab;
122 sheet->n_tables += 1;
123
124 return 0;
125 }
126
ods_sheet_new(xmlDocPtr doc,int * err)127 static ods_sheet *ods_sheet_new (xmlDocPtr doc, int *err)
128 {
129 ods_sheet *sheet;
130
131 sheet = malloc(sizeof *sheet);
132
133 if (sheet != NULL) {
134 sheet->flags = 0;
135 sheet->doc = doc;
136 sheet->n_tables = 0;
137 sheet->tables = NULL;
138 sheet->seltab = -1;
139 sheet->xoffset = 0;
140 sheet->yoffset = 0;
141 sheet->dset = NULL;
142 sheet->codelist = NULL;
143 sheet->st = NULL;
144 } else {
145 *err = E_ALLOC;
146 }
147
148 return sheet;
149 }
150
ods_table_free(ods_table * tab)151 static void ods_table_free (ods_table *tab)
152 {
153 free(tab->name);
154 free(tab);
155 }
156
ods_sheet_free(ods_sheet * sheet)157 static void ods_sheet_free (ods_sheet *sheet)
158 {
159 if (sheet != NULL) {
160 int i;
161
162 for (i=0; i<sheet->n_tables; i++) {
163 ods_table_free(sheet->tables[i]);
164 }
165 free(sheet->tables);
166
167 if (sheet->doc != NULL) {
168 xmlFreeDoc(sheet->doc);
169 }
170
171 destroy_dataset(sheet->dset);
172 free(sheet->codelist);
173 if (sheet->st != NULL) {
174 gretl_string_table_destroy(sheet->st);
175 }
176
177 free(sheet);
178 }
179 }
180
ods_table_print(ods_table * tab)181 static void ods_table_print (ods_table *tab)
182 {
183 fprintf(stderr, "Table \"%s\": ", tab->name);
184
185 if (tab->empty) {
186 fprintf(stderr, "(empty)\n");
187 } else {
188 fprintf(stderr, "%d x %d, xoff = %d, yoff = %d, data area %d x %d\n",
189 tab->rows, tab->cols, tab->xoffset, tab->yoffset,
190 tab->rows - tab->yoffset, tab->cols - tab->xoffset);
191 }
192 }
193
ods_sheet_print(ods_sheet * sheet)194 static void ods_sheet_print (ods_sheet *sheet)
195 {
196 if (sheet != NULL) {
197 int i;
198
199 fprintf(stderr, "Sheet: %d tables\n", sheet->n_tables);
200
201 for (i=0; i<sheet->n_tables; i++) {
202 ods_table_print(sheet->tables[i]);
203 }
204 }
205 }
206
ods_sheet_prune(ods_sheet * sheet,PRN * prn)207 static int ods_sheet_prune (ods_sheet *sheet, PRN *prn)
208 {
209 int err = 0;
210
211 if (sheet == NULL) {
212 err = E_DATA;
213 } else {
214 int i, j;
215
216 for (i=0; i<sheet->n_tables; i++) {
217 if (sheet->tables[i]->empty) {
218 ods_table_free(sheet->tables[i]);
219 sheet->n_tables -= 1;
220 for (j=i; j<sheet->n_tables; j++) {
221 sheet->tables[j] = sheet->tables[j+1];
222 }
223 i--;
224 }
225 }
226
227 if (sheet->n_tables == 0) {
228 pputs(prn, "File contains no data");
229 err = E_DATA;
230 }
231 }
232
233 return err;
234 }
235
get_ods_value_type(xmlNodePtr node)236 static int get_ods_value_type (xmlNodePtr node)
237 {
238 int ret = ODS_NONE;
239 char *s;
240
241 s = (char *) xmlGetProp(node, (XUC) "value-type");
242 if (s == NULL) {
243 return ret;
244 }
245
246 if (!strcmp(s, "float") ||
247 !strcmp(s, "percentage") ||
248 !strcmp(s, "currency")) {
249 ret = ODS_NUMERIC;
250 } else if (!strcmp(s, "date")) {
251 ret = ODS_DATE;
252 } else if (!strcmp(s, "time")) {
253 ret = ODS_TIME;
254 } else if (!strcmp(s, "boolean")) {
255 ret = ODS_BOOL;
256 } else if (!strcmp(s, "string")) {
257 ret = ODS_STRING;
258 }
259
260 free(s);
261
262 return ret;
263 }
264
ods_non_numeric_check(ods_sheet * sheet,PRN * prn)265 static int ods_non_numeric_check (ods_sheet *sheet, PRN *prn)
266 {
267 gretl_string_table *st = NULL;
268 int *nlist = NULL;
269 int err = 0;
270
271 err = non_numeric_check(sheet->dset, &nlist, &st, prn);
272
273 if (!err) {
274 sheet->codelist = nlist;
275 sheet->st = st;
276 }
277
278 return err;
279 }
280
p_content_NA(xmlNodePtr cur)281 static int p_content_NA (xmlNodePtr cur)
282 {
283 char *s = NULL;
284 int ret = 0;
285
286 cur = cur->xmlChildrenNode;
287
288 while (cur != NULL) {
289 if (!xmlStrcmp(cur->name, (XUC) "p")) {
290 s = (char *) xmlNodeGetContent(cur);
291 break;
292 }
293 cur = cur->next;
294 }
295
296 if (s != NULL) {
297 if (!strcmp(s, "#N/A")) {
298 ret = 1;
299 }
300 free(s);
301 }
302
303 return ret;
304 }
305
get_ods_string_value(xmlNodePtr cur)306 static char *get_ods_string_value (xmlNodePtr cur)
307 {
308 char *sval;
309
310 sval = (char *) xmlGetProp(cur, (XUC) "string-value");
311 if (sval != NULL) {
312 return sval;
313 }
314
315 cur = cur->xmlChildrenNode;
316
317 while (cur != NULL) {
318 if (!xmlStrcmp(cur->name, (XUC) "p")) {
319 sval = (char *) xmlNodeGetContent(cur);
320 break;
321 }
322 cur = cur->next;
323 }
324
325 return sval;
326 }
327
get_ods_bool_value(xmlNodePtr cur)328 static int get_ods_bool_value (xmlNodePtr cur)
329 {
330 char *tmp;
331 int ret = 0;
332
333 tmp = (char *) xmlGetProp(cur, (XUC) "boolean-value");
334 if (tmp != NULL) {
335 ret = (strcmp(tmp, "true") == 0);
336 free(tmp);
337 }
338
339 return ret;
340 }
341
get_ods_numeric_value(xmlNodePtr cur)342 static double get_ods_numeric_value (xmlNodePtr cur)
343 {
344 char *tmp;
345 double ret = NADBL;
346
347 tmp = (char *) xmlGetProp(cur, (XUC) "value");
348 if (tmp != NULL) {
349 if (!strcmp(tmp, "0") && p_content_NA(cur)) {
350 ret = NADBL;
351 } else {
352 ret = atof(tmp);
353 }
354 free(tmp);
355 }
356
357 return ret;
358 }
359
ods_cell_has_content(xmlNodePtr node)360 static int ods_cell_has_content (xmlNodePtr node)
361 {
362 return (get_ods_value_type(node) != ODS_NONE);
363 }
364
ods_row_height(xmlNodePtr p)365 static int ods_row_height (xmlNodePtr p)
366 {
367 char *s;
368 int h = 1;
369
370 s = (char *) xmlGetProp(p, (XUC) "number-rows-repeated");
371 if (s != NULL) {
372 if (*s != '\0') {
373 h = atoi(s);
374 }
375 free(s);
376 }
377
378 return h;
379 }
380
ods_cell_width(xmlNodePtr p)381 static int ods_cell_width (xmlNodePtr p)
382 {
383 char *s;
384 int w = 1;
385
386 s = (char *) xmlGetProp(p, (XUC) "number-columns-repeated");
387 if (s != NULL) {
388 if (*s != '\0') {
389 w = atoi(s);
390 }
391 free(s);
392 }
393
394 return w;
395 }
396
ods_name(int t)397 static const char *ods_name (int t)
398 {
399 if (t == ODS_NONE)
400 return "blank";
401 if (t == ODS_NUMERIC)
402 return "numerical value";
403 if (t == ODS_DATE)
404 return "date string";
405 if (t == ODS_TIME)
406 return "time string";
407 if (t == ODS_BOOL)
408 return "boolean";
409 if (t == ODS_STRING)
410 return "string";
411
412 return "blank";
413 }
414
ods_error(ods_sheet * sheet,int i,int j,int etype,int vtype,PRN * prn)415 static int ods_error (ods_sheet *sheet,
416 int i, int j,
417 int etype, int vtype,
418 PRN *prn)
419 {
420 int si = i + sheet->yoffset + 1;
421 int sj = j + sheet->xoffset + 1;
422
423 pprintf(prn, _("Sheet row %d, column %d"), si, sj);
424
425 if ((sheet->flags & BOOK_AUTO_VARNAMES) || i == 0) {
426 pputs(prn, ":\n");
427 } else {
428 int v = (sheet->flags & BOOK_OBS_LABELS)? j : j + 1;
429
430 if (v > 0 && v < sheet->dset->v) {
431 pprintf(prn, " (\"%s\"):\n", sheet->dset->varname[v]);
432 } else {
433 pputs(prn, ":\n");
434 }
435 }
436
437 pprintf(prn, _("expected %s but found %s"),
438 ods_name(etype), ods_name(vtype));
439 pputc(prn, '\n');
440
441 return E_DATA;
442 }
443
ods_handle_stringval(ods_sheet * sheet,int i,int t,const char * s,int nr,PRN * prn)444 static int ods_handle_stringval (ods_sheet *sheet,
445 int i, int t,
446 const char *s,
447 int nr,
448 PRN *prn)
449 {
450 int err = 0;
451
452 if (sheet->dset->Z[i][t] == NON_NUMERIC) {
453 int j, vj, ix;
454
455 for (j=0, vj=i; j<nr && vj<sheet->dset->v; j++, vj++) {
456 ix = gretl_string_table_index(sheet->st, s, vj, 0, prn);
457 if (ix > 0) {
458 sheet->dset->Z[vj][t] = (double) ix;
459 } else {
460 err = E_DATA;
461 break;
462 }
463 }
464 }
465
466 return err;
467 }
468
real_read_cell(xmlNodePtr cur,ods_sheet * sheet,int pass,int iread,int * preadcol,PRN * prn)469 static int real_read_cell (xmlNodePtr cur,
470 ods_sheet *sheet, int pass,
471 int iread, int *preadcol,
472 PRN *prn)
473 {
474 #if ODEBUG
475 int verbose = 1;
476 #else
477 int verbose = 0;
478 #endif
479 char *val = NULL;
480 int jread = *preadcol;
481 int obscol = (sheet->flags & BOOK_OBS_LABELS)? 1 : 0;
482 int blank0 = (sheet->flags & BOOK_OBS_BLANK)? 1 : 0;
483 int vnames = (sheet->flags & BOOK_AUTO_VARNAMES)? 0 : 1;
484 int nr, j, v, vj, t, vtype;
485 double x = NADBL;
486 int err = 0;
487
488 v = jread + 1 - obscol;
489 t = iread - vnames;
490
491 if (v >= sheet->dset->v || t >= sheet->dset->n) {
492 fprintf(stderr, "v = %d, t = %d: out of bounds?\n", v, t);
493 return E_DATA;
494 }
495
496 vtype = get_ods_value_type(cur);
497 nr = ods_cell_width(cur);
498
499 *preadcol += nr;
500
501 #if ODEBUG
502 fprintf(stderr, "real_read_cell: i=%d, j=%d, v=%d, t=%d\n",
503 iread, jread, v, t);
504 #endif
505
506 if (pass == 2) {
507 /* just going after string-valued variables */
508 if (iread == 0 && vnames) {
509 return 0;
510 } else if (jread == 0 && obscol) {
511 return 0;
512 } else if (vtype == ODS_STRING && in_gretl_list(sheet->codelist, v)) {
513 val = get_ods_string_value(cur);
514 err = ods_handle_stringval(sheet, v, t, val, nr, prn);
515 free(val);
516 }
517 return err;
518 }
519
520 /* reading a variable name? */
521
522 if (iread == 0 && vnames) {
523 jread += blank0;
524 v += blank0;
525 if (jread == 0 && obscol) {
526 return 0;
527 }
528 if (vtype == ODS_STRING) {
529 val = get_ods_string_value(cur);
530 if (val != NULL) {
531 *sheet->dset->varname[v] = '\0';
532 strncat(sheet->dset->varname[v], val, VNAMELEN - 1);
533 err = check_imported_varname(sheet->dset->varname[v],
534 v, iread, jread, prn);
535 free(val);
536 } else {
537 err = ods_error(sheet, iread, jread, ODS_STRING,
538 ODS_NONE, prn);
539 }
540 } else if (vtype != ODS_NONE) {
541 err = ods_error(sheet, iread, jread, ODS_STRING,
542 vtype, prn);
543 }
544 return err;
545 }
546
547 /* reading an observation label? */
548
549 if (jread == 0 && obscol) {
550 if (vtype == ODS_STRING) {
551 val = get_ods_string_value(cur);
552 if (val != NULL) {
553 if (verbose) {
554 fprintf(stderr, " obs string: '%s'\n", val);
555 }
556 } else {
557 err = ods_error(sheet, iread, jread, ODS_STRING,
558 ODS_NONE, prn);
559 }
560 } else if (vtype == ODS_DATE) {
561 val = (char *) xmlGetProp(cur, (XUC) "date-value");
562 if (val != NULL) {
563 if (verbose) {
564 fprintf(stderr, " date: '%s'\n", val);
565 }
566 } else {
567 err = ods_error(sheet, iread, jread, ODS_DATE,
568 ODS_NONE, prn);
569 }
570 } else if (vtype == ODS_NUMERIC) {
571 val = (char *) xmlGetProp(cur, (XUC) "value");
572 if (val != NULL) {
573 if (verbose) {
574 fprintf(stderr, " numeric obs: '%s'\n", val);
575 }
576 } else {
577 err = ods_error(sheet, iread, jread, ODS_NUMERIC,
578 ODS_NONE, prn);
579 }
580 } else {
581 err = ods_error(sheet, iread, jread, ODS_DATE,
582 vtype, prn);
583 }
584
585 if (!err) {
586 gretl_utf8_strncat_trim(sheet->dset->S[t], val, OBSLEN - 1);
587 }
588
589 free(val);
590
591 return err;
592 }
593
594 /* reading actual data */
595
596 if (vtype == ODS_NUMERIC) {
597 x = get_ods_numeric_value(cur);
598 #if ODEBUG
599 fprintf(stderr, " float: %.15g\n", x);
600 #endif
601 } else if (vtype == ODS_BOOL) {
602 x = get_ods_bool_value(cur);
603 #if ODEBUG
604 fprintf(stderr, " boolean: %g\n", x);
605 #endif
606 } else if (vtype == ODS_NONE) {
607 #if ODEBUG
608 fprintf(stderr, " blank: NA?\n");
609 #endif
610 } else if (vtype == ODS_STRING) {
611 val = get_ods_string_value(cur);
612 if (val != NULL && import_na_string(val)) {
613 #if ODEBUG
614 fprintf(stderr, " string: NA?\n");
615 #endif
616 } else if (val != NULL && *val != '\0') {
617 x = NON_NUMERIC;
618 sheet->flags |= BOOK_NON_NUMERIC;
619 }
620 free(val);
621 } else {
622 fprintf(stderr, " vtype = %d??\n", vtype);
623 err = E_DATA;
624 }
625
626 if (err) {
627 ods_error(sheet, iread, jread, ODS_NUMERIC, vtype, prn);
628 } else {
629 for (j=0, vj=v; j<nr && vj<sheet->dset->v; j++, vj++) {
630 sheet->dset->Z[vj][t] = x;
631 }
632 }
633
634 return err;
635 }
636
read_data_row(xmlNodePtr cur,ods_table * tab,ods_sheet * sheet,int pass,int readrow,PRN * prn)637 static int read_data_row (xmlNodePtr cur,
638 ods_table *tab,
639 ods_sheet *sheet,
640 int pass,
641 int readrow,
642 PRN *prn)
643 {
644 int readmax = tab->cols - sheet->xoffset;
645 int tabcol = 0, readcol = 0;
646 int err = 0;
647
648 cur = cur->xmlChildrenNode;
649
650 while (cur != NULL && !err && readcol < readmax) {
651 if (!xmlStrcmp(cur->name, (XUC) "table-cell")) {
652 if (tabcol >= sheet->xoffset) {
653 err = real_read_cell(cur, sheet, pass,
654 readrow, &readcol,
655 prn);
656 }
657 tabcol += ods_cell_width(cur);
658 }
659 cur = cur->next;
660 }
661
662 return err;
663 }
664
sheet_allocate_data(ods_sheet * sheet,ods_table * tab)665 static int sheet_allocate_data (ods_sheet *sheet,
666 ods_table *tab)
667 {
668 int n = tab->rows - sheet->yoffset - 1;
669 int v = tab->cols - sheet->xoffset + 1;
670 int i, labels = 0;
671
672 if (sheet->flags & BOOK_AUTO_VARNAMES) {
673 n++;
674 }
675
676 if (sheet->flags & BOOK_OBS_LABELS) {
677 labels = 1;
678 v--;
679 }
680
681 if (n <= 0 || v <= 1) {
682 return E_DATA;
683 }
684
685 fprintf(stderr, "sheet_allocate_data: n=%d, v=%d\n",
686 n, v);
687
688 sheet->dset = create_new_dataset(v, n, labels);
689 if (sheet->dset == NULL) {
690 return E_ALLOC;
691 }
692
693 /* write fallback variable names */
694 for (i=1; i<v; i++) {
695 sprintf(sheet->dset->varname[i], "v%d", i);
696 }
697
698 return 0;
699 }
700
701 /* Look at the cells in the top left-hand corner of the reading area
702 of the table: try to determine (a) if we have an observations
703 column and (b) if we have a varnames row.
704 */
705
706 static int
analyse_top_left(ods_sheet * sheet,ods_table * tab)707 analyse_top_left (ods_sheet *sheet, ods_table *tab)
708 {
709 xmlNodePtr colp, rowp = tab->node->xmlChildrenNode;
710 xmlNodePtr p00 = NULL, p01 = NULL, p10 = NULL;
711 int readcols = tab->cols - sheet->xoffset;
712 int nr, tabcol, tabrow = 0;
713 int done = 0;
714 int err = 0;
715
716 fprintf(stderr, "analyse_top_left: sheet->xoffset = %d, readcols = %d\n",
717 sheet->xoffset, readcols);
718
719 while (!err && rowp != NULL && !done) {
720 if (!xmlStrcmp(rowp->name, (XUC) "table-row")) {
721 nr = ods_row_height(rowp);
722 if (tabrow == sheet->yoffset) {
723 colp = rowp->xmlChildrenNode;
724 tabcol = 0;
725 while (colp != NULL && !err) {
726 if (!xmlStrcmp(colp->name, (XUC) "table-cell")) {
727 if (tabcol == sheet->xoffset) {
728 p00 = colp;
729 } else if (tabcol == sheet->xoffset + 1) {
730 p01 = colp;
731 }
732 tabcol += ods_cell_width(colp);
733 }
734 colp = colp->next;
735 }
736 } else if (tabrow == sheet->yoffset + 1) {
737 colp = rowp->xmlChildrenNode;
738 tabcol = 0;
739 while (colp != NULL && !err) {
740 if (!xmlStrcmp(colp->name, (XUC) "table-cell")) {
741 if (tabcol == sheet->xoffset) {
742 p10 = colp;
743 }
744 tabcol += ods_cell_width(colp);
745 }
746 colp = colp->next;
747 }
748 }
749 tabrow += nr;
750 }
751 if (readcols > 1) {
752 done = (p01 != NULL && p10 != NULL);
753 } else {
754 done = (p10 != NULL);
755 }
756 rowp = rowp->next;
757 }
758
759 if (!done) {
760 fprintf(stderr, "analyse_top_left: failed\n");
761 err = E_DATA;
762 } else {
763 int vt10, vt00 = ODS_NONE, vt01 = ODS_NONE;
764
765 if (p00 != NULL) {
766 vt00 = get_ods_value_type(p00);
767 fprintf(stderr, "cell(0,0): type = %s\n", ods_name(vt00));
768 } else {
769 fprintf(stderr, "cell(0,0): blank\n");
770 sheet->flags |= BOOK_OBS_BLANK;
771 }
772
773 vt10 = get_ods_value_type(p10);
774 fprintf(stderr, "cell(1,0): type = %s\n", ods_name(vt10));
775
776 if (p01 == NULL) {
777 /* single column */
778 if (vt00 != ODS_STRING) {
779 sheet->flags |= BOOK_AUTO_VARNAMES;
780 }
781 } else {
782 vt01 = get_ods_value_type(p01);
783 fprintf(stderr, "cell(0,1): type = %s\n", ods_name(vt01));
784 if (vt01 != ODS_STRING) {
785 sheet->flags |= BOOK_AUTO_VARNAMES;
786 }
787 if (vt00 == ODS_NONE) {
788 sheet->flags |= BOOK_OBS_LABELS;
789 } else if (vt00 == ODS_STRING) {
790 char *val = get_ods_string_value(p00);
791
792 fprintf(stderr, "cell(0,0): val = '%s'\n", val);
793 if (import_obs_label(val)) {
794 fprintf(stderr, "looks like obs label\n");
795 sheet->flags |= BOOK_OBS_LABELS;
796 }
797 free(val);
798 }
799 }
800 }
801
802 fprintf(stderr, "analyse_top_left: vnames=%d, obscol=%d, returning %d\n",
803 (sheet->flags & BOOK_AUTO_VARNAMES)? 0 : 1,
804 (sheet->flags & BOOK_OBS_LABELS)? 1 : 0, err);
805
806 return err;
807 }
808
repeat_data_row(ods_sheet * sheet,int iread,PRN * prn)809 static int repeat_data_row (ods_sheet *sheet, int iread,
810 PRN *prn)
811 {
812 int vnames = (sheet->flags & BOOK_AUTO_VARNAMES)? 0 : 1;
813 int i, t = iread - vnames;
814
815 if (t < 1 || t >= sheet->dset->n) {
816 pprintf(prn, "Found a repeated row in the wrong place\n");
817 return E_DATA;
818 }
819
820 for (i=1; i<sheet->dset->v; i++) {
821 sheet->dset->Z[i][t] = sheet->dset->Z[i][t-1];
822 }
823
824 if (sheet->dset->S != NULL) {
825 strcpy(sheet->dset->S[t], sheet->dset->S[t-1]);
826 }
827
828 return 0;
829 }
830
read_table_content(ods_sheet * sheet,PRN * prn)831 static int read_table_content (ods_sheet *sheet, PRN *prn)
832 {
833 ods_table *tab;
834 xmlNodePtr top, cur;
835 int i, nr, maxrow;
836 int tabrow = 0, readrow = 0;
837 int pass = 1;
838 int err = 0;
839
840 #if ODEBUG
841 fprintf(stderr, "\n*** read_table_content ***\n");
842 #endif
843
844 if (sheet->seltab < 0 || sheet->seltab >= sheet->n_tables) {
845 return E_DATA;
846 }
847
848 tab = sheet->tables[sheet->seltab];
849 err = analyse_top_left(sheet, tab);
850
851 if (!err) {
852 err = sheet_allocate_data(sheet, tab);
853 }
854
855 if (err) {
856 return err;
857 }
858
859 maxrow = tab->rows - sheet->yoffset;
860 top = cur = tab->node->xmlChildrenNode;
861
862 gretl_push_c_numeric_locale();
863
864 #if ODEBUG
865 fprintf(stderr, "starting read_data_row loop\n");
866 #endif
867
868 tryagain:
869
870 while (cur != NULL && !err && readrow < maxrow) {
871 if (!xmlStrcmp(cur->name, (XUC) "table-row")) {
872 nr = ods_row_height(cur);
873 if (tabrow >= sheet->yoffset) {
874 err = read_data_row(cur, tab, sheet, pass, readrow++, prn);
875 for (i=1; i<nr && !err; i++) {
876 err = repeat_data_row(sheet, readrow++, prn);
877 }
878 }
879 tabrow += nr;
880 }
881 cur = cur->next;
882 }
883
884 if (pass == 1 && (sheet->flags & BOOK_NON_NUMERIC)) {
885 err = ods_non_numeric_check(sheet, prn);
886 if (sheet->codelist != NULL) {
887 tabrow = readrow = 0;
888 cur = top;
889 pass = 2;
890 goto tryagain;
891 }
892 }
893
894 gretl_pop_c_numeric_locale();
895
896 #if ODEBUG
897 fprintf(stderr, "read_table_content, returning %d\n\n", err);
898 #endif
899
900 return err;
901 }
902
903 static int
get_table_dimensions(xmlNodePtr cur,ods_sheet * sheet)904 get_table_dimensions (xmlNodePtr cur, ods_sheet *sheet)
905 {
906 ods_table *tab = NULL;
907 xmlNodePtr rowp;
908 int hascont, nr, nc, row_empty;
909 int cols, xoffset, xtrail;
910 int rows, rchk;
911 int err = 0;
912 #if ODEBUG > 2
913 int i = 0;
914 #endif
915
916 #if ODEBUG
917 fprintf(stderr, "** get_table_dimensions *** \n");
918 #endif
919
920 tab = ods_table_new(cur, &err);
921 if (tab == NULL) {
922 return err;
923 }
924
925 err = ods_sheet_add_table(sheet, tab);
926 if (err) {
927 return err;
928 }
929
930 cur = cur->xmlChildrenNode;
931
932 rows = rchk = 0;
933
934 while (cur != NULL && !err) {
935 if (!xmlStrcmp(cur->name, (XUC) "table-row")) {
936 nr = ods_row_height(cur);
937 cols = xoffset = xtrail = 0;
938 row_empty = 1;
939 rowp = cur->xmlChildrenNode;
940 while (rowp != NULL && !err) {
941 if (!xmlStrcmp(rowp->name, (XUC) "table-cell")) {
942 hascont = ods_cell_has_content(rowp);
943 nc = ods_cell_width(rowp);
944 if (hascont) {
945 row_empty = 0;
946 tab->empty = 0;
947 xtrail = 0;
948 } else if (row_empty) {
949 xoffset += nc;
950 }
951 if (rowp->next == NULL) {
952 /* last cell(s) in row: ignore if blank */
953 cols += (hascont)? nc : 0;
954 } else {
955 cols += nc;
956 if (!hascont) {
957 xtrail += nc;
958 }
959 }
960 }
961 rowp = rowp->next;
962 }
963 #if ODEBUG > 2
964 fprintf(stderr, "row %d: cols = %d, trailing empty cols = %d\n",
965 ++i, cols, xtrail);
966 #endif
967 cols -= xtrail;
968 if (!err) {
969 rows += nr;
970 if (!row_empty) {
971 rchk = rows;
972 }
973 if (cols > tab->cols) {
974 tab->cols = cols;
975 }
976 if (tab->empty) {
977 tab->yoffset += nr;
978 }
979 if (!row_empty && xoffset < tab->xoffset) {
980 tab->xoffset = xoffset;
981 }
982 }
983 }
984 cur = cur->next;
985 }
986
987 tab->rows = rchk;
988
989 #if ODEBUG
990 fprintf(stderr, "get_table_dimensions, done: rows=%d, err=%d\n\n",
991 tab->rows, err);
992 #endif
993
994 return err;
995 }
996
ods_read_content(PRN * prn,int * err)997 static ods_sheet *ods_read_content (PRN *prn, int *err)
998 {
999 ods_sheet *sheet = NULL;
1000 xmlDocPtr doc = NULL;
1001 xmlNodePtr cur = NULL;
1002 xmlNodePtr c1, c2;
1003
1004 *err = gretl_xml_open_doc_root("content.xml",
1005 "document-content",
1006 &doc, &cur);
1007
1008 if (*err) {
1009 pprintf(prn, "didn't get office:document-content\n");
1010 pprintf(prn, "%s", gretl_errmsg_get());
1011 return NULL;
1012 }
1013
1014 sheet = ods_sheet_new(doc, err);
1015 if (sheet == NULL) {
1016 xmlFreeDoc(doc);
1017 return NULL;
1018 }
1019
1020 /* Now walk the tree */
1021 cur = cur->xmlChildrenNode;
1022 while (cur != NULL && !*err) {
1023 if (!xmlStrcmp(cur->name, (XUC) "body")) {
1024 c1 = cur->xmlChildrenNode;
1025 while (c1 != NULL && !*err) {
1026 if (!xmlStrcmp(c1->name, (XUC) "spreadsheet")) {
1027 c2 = c1->xmlChildrenNode;
1028 while (c2 != NULL && !*err) {
1029 if (!xmlStrcmp(c2->name, (XUC) "table")) {
1030 *err = get_table_dimensions(c2, sheet);
1031 }
1032 c2 = c2->next;
1033 }
1034 }
1035 c1 = c1->next;
1036 }
1037 }
1038 cur = cur->next;
1039 }
1040
1041 return sheet;
1042 }
1043
check_mimetype(PRN * prn)1044 static int check_mimetype (PRN *prn)
1045 {
1046 const char *odsmime =
1047 "application/vnd.oasis.opendocument.spreadsheet";
1048 char buf[48] = {0};
1049 FILE *fp;
1050 int err = 0;
1051
1052 fp = fopen("mimetype", "r");
1053 if (fp == NULL) {
1054 pprintf(prn, "Couldn't find mimetype\n");
1055 err = E_FOPEN;
1056 } else {
1057 if (fread(buf, 1, 46, fp) != 46 ||
1058 strcmp(buf, odsmime)) {
1059 pprintf(prn, "Wrong or missing mime type,\n should be '%s'\n",
1060 odsmime);
1061 err = E_DATA;
1062 }
1063 fclose(fp);
1064 }
1065
1066 return err;
1067 }
1068
ods_min_offset(wbook * book,int k)1069 static int ods_min_offset (wbook *book, int k)
1070 {
1071 ods_sheet *sheet = book->data;
1072 int i = -1, ret = 1;
1073
1074 if (sheet != NULL) {
1075 i = book->selected;
1076
1077 if (i >= 0 && i < sheet->n_tables) {
1078 ods_table *tab = sheet->tables[i];
1079
1080 if (k == COL_OFFSET) {
1081 ret = tab->xoffset + 1;
1082 } else {
1083 ret = tab->yoffset + 1;
1084 }
1085 }
1086 }
1087
1088 return ret;
1089 }
1090
ods_book_init(wbook * book,ods_sheet * sheet,char * sheetname)1091 static int ods_book_init (wbook *book, ods_sheet *sheet, char *sheetname)
1092 {
1093 int i, err = 0;
1094
1095 wbook_init(book, NULL, sheetname);
1096
1097 if (sheet->n_tables > 0) {
1098 book->sheetnames = strings_array_new(sheet->n_tables);
1099 if (book->sheetnames == NULL) {
1100 err = E_ALLOC;
1101 } else {
1102 for (i=0; i<sheet->n_tables; i++) {
1103 book->sheetnames[i] = sheet->tables[i]->name;
1104 }
1105 book->nsheets = sheet->n_tables;
1106 }
1107 }
1108
1109 if (!err) {
1110 book->get_min_offset = ods_min_offset;
1111 book->data = sheet;
1112 }
1113
1114 return err;
1115 }
1116
record_ods_params(ods_sheet * sheet,int * list)1117 static void record_ods_params (ods_sheet *sheet, int *list)
1118 {
1119 if (list != NULL && list[0] == 3) {
1120 list[1] = sheet->seltab + 1;
1121 list[2] = sheet->xoffset;
1122 list[3] = sheet->yoffset;
1123 }
1124 }
1125
set_ods_params_from_cli(ods_sheet * sheet,const int * list,char * sheetname)1126 static int set_ods_params_from_cli (ods_sheet *sheet,
1127 const int *list,
1128 char *sheetname)
1129 {
1130 int gotname = (sheetname != NULL && *sheetname != '\0');
1131 int gotlist = (list != NULL && list[0] == 3);
1132 int i;
1133
1134 sheet->seltab = 0; /* default to first */
1135
1136 if (!gotname && !gotlist) {
1137 /* no user specs */
1138 sheet->xoffset = sheet->tables[0]->xoffset;
1139 sheet->yoffset = sheet->tables[0]->yoffset;
1140 return 0;
1141 }
1142
1143 if (gotname || (gotlist && list[1] > 0)) {
1144 /* invalidate this pro tem */
1145 sheet->seltab = -1;
1146 }
1147
1148 if (gotname) {
1149 for (i=0; i<sheet->n_tables; i++) {
1150 if (!strcmp(sheetname, sheet->tables[i]->name)) {
1151 sheet->seltab = i;
1152 break;
1153 }
1154 }
1155 if (sheet->seltab < 0 && integer_string(sheetname)) {
1156 i = atoi(sheetname);
1157 if (i >= 1 && i <= sheet->n_tables) {
1158 sheet->seltab = i - 1;
1159 }
1160 }
1161 }
1162
1163 if (gotlist) {
1164 if (!gotname && list[1] > 0) {
1165 /* convert to zero-based */
1166 sheet->seltab = list[1] - 1;
1167 }
1168 sheet->xoffset = list[2];
1169 sheet->yoffset = list[3];
1170 }
1171
1172 if (sheet->seltab < 0 || sheet->seltab >= sheet->n_tables ||
1173 sheet->xoffset < 0 || sheet->yoffset < 0) {
1174 gretl_errmsg_set(_("Invalid argument for worksheet import"));
1175 fprintf(stderr, "seltab=%d, xoffset=%d, yoffset=%d\n",
1176 sheet->seltab, sheet->xoffset, sheet->yoffset);
1177 return E_DATA;
1178 }
1179
1180 return 0;
1181 }
1182
ods_sheet_dialog(ods_sheet * sheet,int * err)1183 static int ods_sheet_dialog (ods_sheet *sheet, int *err)
1184 {
1185 wbook book;
1186
1187 *err = ods_book_init(&book, sheet, NULL);
1188 if (*err) {
1189 return -1;
1190 }
1191
1192 book.col_offset = sheet->tables[0]->xoffset;
1193 book.row_offset = sheet->tables[0]->yoffset;
1194
1195 if (book.nsheets > 1) {
1196 wsheet_menu(&book, 1);
1197 sheet->seltab = book.selected;
1198 } else {
1199 wsheet_menu(&book, 0);
1200 sheet->seltab = 0;
1201 }
1202
1203 sheet->xoffset = book.col_offset;
1204 sheet->yoffset = book.row_offset;
1205
1206 #if ODEBUG
1207 fprintf(stderr, "sheet->xoffset = %d, sheet->yoffset = %d\n",
1208 sheet->xoffset, sheet->yoffset);
1209 #endif
1210
1211 free(book.sheetnames);
1212
1213 return book.selected;
1214 }
1215
finalize_ods_import(DATASET * dset,ods_sheet * sheet,const char * fname,gretlopt opt,PRN * prn)1216 static int finalize_ods_import (DATASET *dset,
1217 ods_sheet *sheet,
1218 const char *fname,
1219 gretlopt opt,
1220 PRN *prn)
1221 {
1222 int err = import_prune_columns(sheet->dset);
1223 int merge = (dset->Z != NULL);
1224
1225 if (!err && sheet->dset->S != NULL) {
1226 import_ts_check(sheet->dset);
1227 }
1228
1229 if (!err) {
1230 err = merge_or_replace_data(dset, &sheet->dset,
1231 get_merge_opts(opt), prn);
1232 }
1233
1234 if (!err && !merge) {
1235 dataset_add_import_info(dset, fname, GRETL_ODS);
1236 }
1237
1238 return err;
1239 }
1240
ods_get_data(const char * fname,int * list,char * sheetname,DATASET * dset,gretlopt opt,PRN * prn)1241 int ods_get_data (const char *fname, int *list, char *sheetname,
1242 DATASET *dset, gretlopt opt, PRN *prn)
1243 {
1244 int gui = (opt & OPT_G);
1245 ods_sheet *sheet = NULL;
1246 char dname[32];
1247 int err;
1248
1249 err = open_import_zipfile(fname, dname, prn);
1250 if (err) {
1251 return err;
1252 }
1253
1254 if (!err) {
1255 err = check_mimetype(prn);
1256 }
1257
1258 if (!err) {
1259 sheet = ods_read_content(prn, &err);
1260 }
1261
1262 remove_temp_dir(dname);
1263
1264 ods_sheet_print(sheet);
1265
1266 if (!err) {
1267 err = ods_sheet_prune(sheet, prn);
1268 }
1269
1270 printlist(list, "ods list");
1271 fprintf(stderr, "sheetname='%s'\n", sheetname);
1272
1273 if (!err) {
1274 if (gui) {
1275 int resp = ods_sheet_dialog(sheet, &err);
1276
1277 if (resp < 0) {
1278 /* canceled */
1279 err = -1;
1280 goto bailout;
1281 }
1282 } else {
1283 err = set_ods_params_from_cli(sheet, list, sheetname);
1284 }
1285 }
1286
1287 if (!err) {
1288 err = read_table_content(sheet, prn);
1289 }
1290
1291 if (!err && sheet->st != NULL) {
1292 err = gretl_string_table_validate(sheet->st, OPT_S);
1293 if (err) {
1294 pputs(prn, _("Failed to interpret the data as numeric\n"));
1295 } else {
1296 gretl_string_table_print(sheet->st, sheet->dset, fname, prn);
1297 }
1298 }
1299
1300 if (!err) {
1301 err = finalize_ods_import(dset, sheet, fname, opt, prn);
1302 if (!err && gui) {
1303 record_ods_params(sheet, list);
1304 }
1305 }
1306
1307 bailout:
1308
1309 ods_sheet_free(sheet);
1310
1311 return err;
1312 }
1313