1 /* PSPP - a program for statistical analysis. 2 Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013, 2016 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation, either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17 #include <config.h> 18 19 #include "data/gnumeric-reader.h" 20 #include "spreadsheet-reader.h" 21 22 #include <assert.h> 23 #include <stdbool.h> 24 #include <errno.h> 25 #include <libxml/xmlreader.h> 26 #include <zlib.h> 27 28 #include "data/case.h" 29 #include "data/casereader-provider.h" 30 #include "data/data-in.h" 31 #include "data/dictionary.h" 32 #include "data/format.h" 33 #include "data/identifier.h" 34 #include "data/value.h" 35 #include "data/variable.h" 36 #include "libpspp/i18n.h" 37 #include "libpspp/message.h" 38 #include "libpspp/misc.h" 39 #include "libpspp/str.h" 40 41 #include "gl/c-strtod.h" 42 #include "gl/minmax.h" 43 #include "gl/xalloc.h" 44 45 #include "gettext.h" 46 #define _(msgid) gettext (msgid) 47 #define N_(msgid) (msgid) 48 49 /* Shamelessly lifted from the Gnumeric sources: 50 https://git.gnome.org/browse/gnumeric/tree/src/value.h 51 */ 52 enum gnm_value_type 53 { 54 VALUE_EMPTY = 10, 55 VALUE_BOOLEAN = 20, 56 VALUE_INTEGER = 30, /* Note, this was removed from gnumeric in 2006 - old versions may of 57 course still be around. New ones are supposed to use float.*/ 58 VALUE_FLOAT = 40, 59 VALUE_ERROR = 50, 60 VALUE_STRING = 60, 61 VALUE_CELLRANGE = 70, 62 VALUE_ARRAY = 80 63 }; 64 65 66 67 static void gnm_file_casereader_destroy (struct casereader *, void *); 68 69 static struct ccase *gnm_file_casereader_read (struct casereader *, void *); 70 71 72 static const struct casereader_class gnm_file_casereader_class = 73 { 74 gnm_file_casereader_read, 75 gnm_file_casereader_destroy, 76 NULL, 77 NULL, 78 }; 79 80 enum reader_state 81 { 82 STATE_PRE_INIT = 0, /* Initial state */ 83 STATE_SHEET_COUNT, /* Found the sheet index */ 84 STATE_INIT , /* Other Initial state */ 85 STATE_SHEET_START, /* Found the start of a sheet */ 86 STATE_SHEET_NAME, /* Found the sheet name */ 87 STATE_MAXROW, 88 STATE_MAXCOL, 89 STATE_SHEET_FOUND, /* Found the sheet that we actually want */ 90 STATE_CELLS_START, /* Found the start of the cell array */ 91 STATE_CELL /* Found a cell */ 92 }; 93 94 struct sheet_detail 95 { 96 /* The name of the sheet (utf8 encoding) */ 97 char *name; 98 99 int start_col; 100 int stop_col; 101 int start_row; 102 int stop_row; 103 104 int maxcol; 105 int maxrow; 106 }; 107 108 struct state_data 109 { 110 /* The libxml reader for this instance */ 111 xmlTextReaderPtr xtr; 112 113 /* An internal state variable */ 114 enum reader_state state; 115 116 int node_type; 117 int current_sheet; 118 119 int row; 120 int col; 121 122 int min_col; 123 }; 124 125 126 static void 127 state_data_destroy (struct state_data *sd) 128 { 129 xmlFreeTextReader (sd->xtr); 130 } 131 132 133 struct gnumeric_reader 134 { 135 struct spreadsheet spreadsheet; 136 137 struct state_data rsd; 138 struct state_data msd; 139 140 int start_col; 141 int stop_col; 142 int start_row; 143 int stop_row; 144 145 struct sheet_detail *sheets; 146 147 const xmlChar *target_sheet; 148 int target_sheet_index; 149 150 struct caseproto *proto; 151 struct dictionary *dict; 152 struct ccase *first_case; 153 bool used_first_case; 154 155 enum gnm_value_type vtype; 156 }; 157 158 159 void 160 gnumeric_unref (struct spreadsheet *s) 161 { 162 struct gnumeric_reader *r = (struct gnumeric_reader *) s; 163 164 if (0 == --s->ref_cnt) 165 { 166 int i; 167 168 for (i = 0; i < s->n_sheets; ++i) 169 { 170 xmlFree (r->sheets[i].name); 171 } 172 173 174 free (r->sheets); 175 state_data_destroy (&r->msd); 176 177 dict_unref (r->dict); 178 179 free (s->file_name); 180 181 free (r); 182 } 183 } 184 185 186 const char * 187 gnumeric_get_sheet_name (struct spreadsheet *s, int n) 188 { 189 struct gnumeric_reader *gr = (struct gnumeric_reader *) s; 190 assert (n < s->n_sheets); 191 192 return gr->sheets[n].name; 193 } 194 195 196 static void process_node (struct gnumeric_reader *r, struct state_data *sd); 197 198 199 200 char * 201 gnumeric_get_sheet_range (struct spreadsheet *s, int n) 202 { 203 int ret; 204 struct gnumeric_reader *gr = (struct gnumeric_reader *) s; 205 206 assert (n < s->n_sheets); 207 208 while ( 209 (gr->sheets[n].stop_col == -1) 210 && 211 (1 == (ret = xmlTextReaderRead (gr->msd.xtr))) 212 ) 213 { 214 process_node (gr, &gr->msd); 215 } 216 217 return create_cell_range ( 218 gr->sheets[n].start_col, 219 gr->sheets[n].start_row, 220 gr->sheets[n].stop_col, 221 gr->sheets[n].stop_row); 222 } 223 224 225 static void 226 gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) 227 { 228 struct gnumeric_reader *r = r_; 229 230 if (r == NULL) 231 return ; 232 233 state_data_destroy (&r->rsd); 234 235 if (r->first_case && ! r->used_first_case) 236 case_unref (r->first_case); 237 238 if (r->proto) 239 caseproto_unref (r->proto); 240 241 gnumeric_unref (&r->spreadsheet); 242 } 243 244 245 static void 246 process_node (struct gnumeric_reader *r, struct state_data *sd) 247 { 248 xmlChar *name = xmlTextReaderName (sd->xtr); 249 if (name == NULL) 250 name = xmlStrdup (_xml ("--")); 251 252 sd->node_type = xmlTextReaderNodeType (sd->xtr); 253 254 switch (sd->state) 255 { 256 case STATE_PRE_INIT: 257 sd->current_sheet = -1; 258 if (0 == xmlStrcasecmp (name, _xml("gnm:SheetNameIndex")) && 259 XML_READER_TYPE_ELEMENT == sd->node_type) 260 { 261 sd->state = STATE_SHEET_COUNT; 262 } 263 break; 264 265 case STATE_SHEET_COUNT: 266 if (0 == xmlStrcasecmp (name, _xml("gnm:SheetName")) && 267 XML_READER_TYPE_ELEMENT == sd->node_type) 268 { 269 ++sd->current_sheet; 270 if (sd->current_sheet + 1 > r->spreadsheet.n_sheets) 271 { 272 struct sheet_detail *detail ; 273 r->sheets = xrealloc (r->sheets, (sd->current_sheet + 1) * sizeof *r->sheets); 274 detail = &r->sheets[sd->current_sheet]; 275 detail->start_col = detail->stop_col = detail->start_row = detail->stop_row = -1; 276 detail->name = NULL; 277 r->spreadsheet.n_sheets = sd->current_sheet + 1; 278 } 279 } 280 else if (0 == xmlStrcasecmp (name, _xml("gnm:SheetNameIndex")) && 281 XML_READER_TYPE_END_ELEMENT == sd->node_type) 282 { 283 sd->state = STATE_INIT; 284 sd->current_sheet = -1; 285 } 286 else if (XML_READER_TYPE_TEXT == sd->node_type) 287 { 288 if (r->sheets [r->spreadsheet.n_sheets - 1].name == NULL) 289 r->sheets [r->spreadsheet.n_sheets - 1].name = CHAR_CAST (char *, xmlTextReaderValue (sd->xtr)); 290 } 291 break; 292 293 case STATE_INIT: 294 if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && 295 XML_READER_TYPE_ELEMENT == sd->node_type) 296 { 297 ++sd->current_sheet; 298 sd->state = STATE_SHEET_START; 299 } 300 break; 301 case STATE_SHEET_START: 302 if (0 == xmlStrcasecmp (name, _xml("gnm:Name")) && 303 XML_READER_TYPE_ELEMENT == sd->node_type) 304 { 305 sd->state = STATE_SHEET_NAME; 306 } 307 break; 308 case STATE_SHEET_NAME: 309 if (0 == xmlStrcasecmp (name, _xml("gnm:Name")) && 310 XML_READER_TYPE_END_ELEMENT == sd->node_type) 311 { 312 sd->state = STATE_INIT; 313 } 314 else if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && 315 XML_READER_TYPE_END_ELEMENT == sd->node_type) 316 { 317 sd->state = STATE_INIT; 318 } 319 else if (XML_READER_TYPE_TEXT == sd->node_type) 320 { 321 if (r->target_sheet != NULL) 322 { 323 xmlChar *value = xmlTextReaderValue (sd->xtr); 324 if (0 == xmlStrcmp (value, r->target_sheet)) 325 sd->state = STATE_SHEET_FOUND; 326 free (value); 327 } 328 else if (r->target_sheet_index == sd->current_sheet + 1) 329 { 330 sd->state = STATE_SHEET_FOUND; 331 } 332 else if (r->target_sheet_index == -1) 333 { 334 sd->state = STATE_SHEET_FOUND; 335 } 336 } 337 break; 338 case STATE_SHEET_FOUND: 339 if (0 == xmlStrcasecmp (name, _xml("gnm:Cells")) && 340 XML_READER_TYPE_ELEMENT == sd->node_type) 341 { 342 sd->min_col = INT_MAX; 343 if (! xmlTextReaderIsEmptyElement (sd->xtr)) 344 sd->state = STATE_CELLS_START; 345 } 346 else if (0 == xmlStrcasecmp (name, _xml("gnm:MaxRow")) && 347 XML_READER_TYPE_ELEMENT == sd->node_type) 348 { 349 sd->state = STATE_MAXROW; 350 } 351 else if (0 == xmlStrcasecmp (name, _xml("gnm:MaxCol")) && 352 XML_READER_TYPE_ELEMENT == sd->node_type) 353 { 354 sd->state = STATE_MAXCOL; 355 } 356 else if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && 357 XML_READER_TYPE_END_ELEMENT == sd->node_type) 358 { 359 sd->state = STATE_INIT; 360 } 361 break; 362 case STATE_MAXROW: 363 if (0 == xmlStrcasecmp (name, _xml("gnm:MaxRow")) && 364 XML_READER_TYPE_END_ELEMENT == sd->node_type) 365 { 366 sd->state = STATE_SHEET_FOUND; 367 } 368 else if (sd->node_type == XML_READER_TYPE_TEXT) 369 { 370 xmlChar *value = xmlTextReaderValue (sd->xtr); 371 r->sheets[sd->current_sheet].maxrow = _xmlchar_to_int (value); 372 xmlFree (value); 373 } 374 break; 375 case STATE_MAXCOL: 376 if (0 == xmlStrcasecmp (name, _xml("gnm:MaxCol")) && 377 XML_READER_TYPE_END_ELEMENT == sd->node_type) 378 { 379 sd->state = STATE_SHEET_FOUND; 380 } 381 else if (sd->node_type == XML_READER_TYPE_TEXT) 382 { 383 xmlChar *value = xmlTextReaderValue (sd->xtr); 384 r->sheets[sd->current_sheet].maxcol = _xmlchar_to_int (value); 385 xmlFree (value); 386 } 387 break; 388 case STATE_CELLS_START: 389 if (0 == xmlStrcasecmp (name, _xml ("gnm:Cell")) && 390 XML_READER_TYPE_ELEMENT == sd->node_type) 391 { 392 xmlChar *attr = NULL; 393 394 attr = xmlTextReaderGetAttribute (sd->xtr, _xml ("Col")); 395 sd->col = _xmlchar_to_int (attr); 396 free (attr); 397 398 if (sd->col < sd->min_col) 399 sd->min_col = sd->col; 400 401 attr = xmlTextReaderGetAttribute (sd->xtr, _xml ("Row")); 402 sd->row = _xmlchar_to_int (attr); 403 free (attr); 404 405 if (r->sheets[sd->current_sheet].start_row == -1) 406 { 407 r->sheets[sd->current_sheet].start_row = sd->row; 408 } 409 410 if (r->sheets[sd->current_sheet].start_col == -1) 411 { 412 r->sheets[sd->current_sheet].start_col = sd->col; 413 } 414 if (! xmlTextReaderIsEmptyElement (sd->xtr)) 415 sd->state = STATE_CELL; 416 } 417 else if ((0 == xmlStrcasecmp (name, _xml("gnm:Cells"))) && (XML_READER_TYPE_END_ELEMENT == sd->node_type)) 418 { 419 r->sheets[sd->current_sheet].stop_col = sd->col; 420 r->sheets[sd->current_sheet].stop_row = sd->row; 421 sd->state = STATE_SHEET_NAME; 422 } 423 break; 424 case STATE_CELL: 425 if (0 == xmlStrcasecmp (name, _xml("gnm:Cell")) && XML_READER_TYPE_END_ELEMENT == sd->node_type) 426 { 427 sd->state = STATE_CELLS_START; 428 } 429 break; 430 default: 431 break; 432 }; 433 434 xmlFree (name); 435 } 436 437 438 /* 439 Sets the VAR of case C, to the value corresponding to the xml string XV 440 */ 441 static void 442 convert_xml_string_to_value (struct ccase *c, const struct variable *var, 443 const xmlChar *xv, enum gnm_value_type type, int col, int row) 444 { 445 union value *v = case_data_rw (c, var); 446 447 if (xv == NULL) 448 value_set_missing (v, var_get_width (var)); 449 else if (var_is_alpha (var)) 450 value_copy_str_rpad (v, var_get_width (var), xv, ' '); 451 else if (type == VALUE_FLOAT || type == VALUE_INTEGER) 452 { 453 const char *text = CHAR_CAST (const char *, xv); 454 char *endptr; 455 456 errno = 0; 457 v->f = c_strtod (text, &endptr); 458 if (errno != 0 || endptr == text) 459 v->f = SYSMIS; 460 } 461 else 462 { 463 const char *text = CHAR_CAST (const char *, xv); 464 465 const struct fmt_spec *fmt = var_get_write_format (var); 466 467 char *m = data_in (ss_cstr (text), "UTF-8", 468 fmt->type, 469 v, 470 var_get_width (var), 471 "UTF-8"); 472 473 if (m) 474 { 475 char buf [FMT_STRING_LEN_MAX + 1]; 476 char *cell = create_cell_ref (col, row); 477 478 msg (MW, _("Cannot convert the value in the spreadsheet cell %s to format (%s): %s"), 479 cell, fmt_to_string (fmt, buf), m); 480 free (cell); 481 } 482 free (m); 483 } 484 } 485 486 struct var_spec 487 { 488 char *name; 489 int width; 490 xmlChar *first_value; 491 int first_type; 492 }; 493 494 495 static void 496 gnumeric_error_handler (void *ctx, const char *mesg, 497 xmlParserSeverities sev UNUSED, 498 xmlTextReaderLocatorPtr loc) 499 { 500 struct gnumeric_reader *r = ctx; 501 502 msg (MW, _("There was a problem whilst reading the %s file `%s' (near line %d): `%s'"), 503 "Gnumeric", 504 r->spreadsheet.file_name, 505 xmlTextReaderLocatorLineNumber (loc), 506 mesg); 507 } 508 509 static struct gnumeric_reader * 510 gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_errors) 511 { 512 int ret = -1; 513 struct state_data *sd; 514 515 xmlTextReaderPtr xtr; 516 gzFile gz; 517 518 assert (r == NULL || filename == NULL); 519 520 if (filename) 521 { 522 gz = gzopen (filename, "r"); 523 } 524 else 525 { 526 gz = gzopen (r->spreadsheet.file_name, "r"); 527 } 528 529 if (NULL == gz) 530 return NULL; 531 532 533 xtr = xmlReaderForIO ((xmlInputReadCallback) gzread, 534 (xmlInputCloseCallback) gzclose, gz, 535 NULL, NULL, 536 show_errors ? 0 : (XML_PARSE_NOERROR | XML_PARSE_NOWARNING)); 537 538 if (xtr == NULL) 539 { 540 gzclose (gz); 541 return NULL; 542 } 543 544 if (r == NULL) 545 { 546 r = xzalloc (sizeof *r); 547 r->spreadsheet.n_sheets = -1; 548 r->spreadsheet.file_name = strdup (filename); 549 sd = &r->msd; 550 } 551 else 552 { 553 sd = &r->rsd; 554 } 555 556 if (show_errors) 557 xmlTextReaderSetErrorHandler (xtr, gnumeric_error_handler, r); 558 559 r->target_sheet = NULL; 560 r->target_sheet_index = -1; 561 562 sd->row = sd->col = -1; 563 sd->state = STATE_PRE_INIT; 564 sd->xtr = xtr; 565 r->spreadsheet.ref_cnt++; 566 567 568 /* Advance to the start of the workbook. 569 This gives us some confidence that we are actually dealing with a gnumeric 570 spreadsheet. 571 */ 572 while ((sd->state != STATE_INIT) 573 && 1 == (ret = xmlTextReaderRead (sd->xtr))) 574 { 575 process_node (r, sd); 576 } 577 578 579 if (ret != 1) 580 { 581 /* Does not seem to be a gnumeric file */ 582 gnumeric_unref (&r->spreadsheet); 583 return NULL; 584 } 585 586 r->spreadsheet.type = SPREADSHEET_GNUMERIC; 587 588 if (show_errors) 589 { 590 const xmlChar *enc = xmlTextReaderConstEncoding (sd->xtr); 591 xmlCharEncoding xce = xmlParseCharEncoding (CHAR_CAST (const char *, enc)); 592 593 if (XML_CHAR_ENCODING_UTF8 != xce) 594 { 595 /* I have been told that ALL gnumeric files are UTF8 encoded. If that is correct, this 596 can never happen. */ 597 msg (MW, _("The gnumeric file `%s' is encoded as %s instead of the usual UTF-8 encoding. " 598 "Any non-ascii characters will be incorrectly imported."), 599 r->spreadsheet.file_name, 600 enc); 601 } 602 } 603 604 return r; 605 } 606 607 608 struct spreadsheet * 609 gnumeric_probe (const char *filename, bool report_errors) 610 { 611 struct gnumeric_reader *r = gnumeric_reopen (NULL, filename, report_errors); 612 613 return &r->spreadsheet; 614 } 615 616 617 struct casereader * 618 gnumeric_make_reader (struct spreadsheet *spreadsheet, 619 const struct spreadsheet_read_options *opts) 620 { 621 int type = 0; 622 int x = 0; 623 struct gnumeric_reader *r = NULL; 624 unsigned long int vstart = 0; 625 int ret; 626 casenumber n_cases = CASENUMBER_MAX; 627 int i; 628 struct var_spec *var_spec = NULL; 629 int n_var_specs = 0; 630 631 r = (struct gnumeric_reader *) (spreadsheet); 632 633 r = gnumeric_reopen (r, NULL, true); 634 635 if (opts->cell_range) 636 { 637 if (! convert_cell_ref (opts->cell_range, 638 &r->start_col, &r->start_row, 639 &r->stop_col, &r->stop_row)) 640 { 641 msg (SE, _("Invalid cell range `%s'"), 642 opts->cell_range); 643 goto error; 644 } 645 } 646 else 647 { 648 r->start_col = -1; 649 r->start_row = 0; 650 r->stop_col = -1; 651 r->stop_row = -1; 652 } 653 654 r->target_sheet = BAD_CAST opts->sheet_name; 655 r->target_sheet_index = opts->sheet_index; 656 r->rsd.row = r->rsd.col = -1; 657 r->rsd.current_sheet = -1; 658 r->first_case = NULL; 659 r->proto = NULL; 660 661 /* Advance to the start of the cells for the target sheet */ 662 while ((r->rsd.state != STATE_CELL || r->rsd.row < r->start_row) 663 && 1 == (ret = xmlTextReaderRead (r->rsd.xtr))) 664 { 665 xmlChar *value ; 666 process_node (r, &r->rsd); 667 value = xmlTextReaderValue (r->rsd.xtr); 668 669 if (r->rsd.state == STATE_MAXROW && r->rsd.node_type == XML_READER_TYPE_TEXT) 670 { 671 n_cases = 1 + _xmlchar_to_int (value) ; 672 } 673 free (value); 674 } 675 676 /* If a range has been given, then use that to calculate the number 677 of cases */ 678 if (opts->cell_range) 679 { 680 n_cases = MIN (n_cases, r->stop_row - r->start_row + 1); 681 } 682 683 if (opts->read_names) 684 { 685 r->start_row++; 686 n_cases --; 687 } 688 689 690 /* Read in the first row of cells, 691 including the headers if read_names was set */ 692 while ( 693 ((r->rsd.state == STATE_CELLS_START && r->rsd.row <= r->start_row) || r->rsd.state == STATE_CELL) 694 && (ret = xmlTextReaderRead (r->rsd.xtr)) 695 ) 696 { 697 int idx; 698 699 if (r->rsd.state == STATE_CELL && r->rsd.node_type == XML_READER_TYPE_TEXT) 700 { 701 xmlChar *attr = 702 xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType")); 703 704 type = _xmlchar_to_int (attr); 705 706 xmlFree (attr); 707 } 708 709 process_node (r, &r->rsd); 710 711 if (r->rsd.row > r->start_row) 712 { 713 xmlChar *attr = 714 xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType")); 715 716 r->vtype = _xmlchar_to_int (attr); 717 718 xmlFree (attr); 719 break; 720 } 721 722 if (r->rsd.col < r->start_col || 723 (r->stop_col != -1 && r->rsd.col > r->stop_col)) 724 continue; 725 726 idx = r->rsd.col - r->start_col; 727 728 if (idx >= n_var_specs) 729 { 730 int i; 731 var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1)); 732 for (i = n_var_specs; i <= idx; ++i) 733 { 734 var_spec [i].name = NULL; 735 var_spec [i].width = -1; 736 var_spec [i].first_value = NULL; 737 var_spec [i].first_type = -1; 738 } 739 n_var_specs = idx + 1 ; 740 } 741 742 var_spec [idx].first_type = type; 743 744 if (r->rsd.node_type == XML_READER_TYPE_TEXT) 745 { 746 xmlChar *value = xmlTextReaderValue (r->rsd.xtr); 747 const char *text = CHAR_CAST (const char *, value); 748 749 if (r->rsd.row < r->start_row) 750 { 751 if (opts->read_names) 752 { 753 var_spec [idx].name = xstrdup (text); 754 } 755 } 756 else 757 { 758 var_spec [idx].first_value = xmlStrdup (value); 759 760 if (-1 == var_spec [idx].width) 761 var_spec [idx].width = (opts->asw == -1) ? 762 ROUND_UP (strlen(text), SPREADSHEET_DEFAULT_WIDTH) : opts->asw; 763 } 764 765 free (value); 766 } 767 else if (r->rsd.node_type == XML_READER_TYPE_ELEMENT 768 && r->rsd.state == STATE_CELL) 769 { 770 if (r->rsd.row == r->start_row) 771 { 772 xmlChar *attr = 773 xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType")); 774 775 if (NULL == attr || VALUE_STRING != _xmlchar_to_int (attr)) 776 var_spec [idx].width = 0; 777 778 free (attr); 779 } 780 } 781 } 782 783 { 784 const xmlChar *enc = xmlTextReaderConstEncoding (r->rsd.xtr); 785 if (enc == NULL) 786 goto error; 787 /* Create the dictionary and populate it */ 788 spreadsheet->dict = r->dict = dict_create (CHAR_CAST (const char *, enc)); 789 } 790 791 for (i = 0 ; i < n_var_specs ; ++i) 792 { 793 char *name; 794 795 if ((var_spec[i].name == NULL) && (var_spec[i].first_value == NULL)) 796 continue; 797 798 /* Probably no data exists for this variable, so allocate a 799 default width */ 800 if (var_spec[i].width == -1) 801 var_spec[i].width = SPREADSHEET_DEFAULT_WIDTH; 802 803 name = dict_make_unique_var_name (r->dict, var_spec[i].name, &vstart); 804 dict_create_var (r->dict, name, var_spec[i].width); 805 free (name); 806 } 807 808 /* Create the first case, and cache it */ 809 r->used_first_case = false; 810 811 if (n_var_specs == 0) 812 { 813 msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."), 814 spreadsheet->file_name); 815 goto error; 816 } 817 818 r->proto = caseproto_ref (dict_get_proto (r->dict)); 819 r->first_case = case_create (r->proto); 820 case_set_missing (r->first_case); 821 822 823 for (i = 0 ; i < n_var_specs ; ++i) 824 { 825 const struct variable *var; 826 827 if ((var_spec[i].name == NULL) && (var_spec[i].first_value == NULL)) 828 continue; 829 830 var = dict_get_var (r->dict, x++); 831 832 convert_xml_string_to_value (r->first_case, var, 833 var_spec[i].first_value, 834 var_spec[i].first_type, 835 r->rsd.col + i - 1, 836 r->rsd.row - 1); 837 } 838 839 for (i = 0 ; i < n_var_specs ; ++i) 840 { 841 free (var_spec[i].first_value); 842 free (var_spec[i].name); 843 } 844 845 free (var_spec); 846 847 848 return casereader_create_sequential 849 (NULL, 850 r->proto, 851 n_cases, 852 &gnm_file_casereader_class, r); 853 854 855 error: 856 for (i = 0 ; i < n_var_specs ; ++i) 857 { 858 free (var_spec[i].first_value); 859 free (var_spec[i].name); 860 } 861 862 free (var_spec); 863 864 gnm_file_casereader_destroy (NULL, r); 865 866 return NULL; 867 }; 868 869 870 /* Reads and returns one case from READER's file. Returns a null 871 pointer on failure. */ 872 static struct ccase * 873 gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_) 874 { 875 struct ccase *c; 876 int ret = 0; 877 878 struct gnumeric_reader *r = r_; 879 int current_row = r->rsd.row; 880 881 if (!r->used_first_case) 882 { 883 r->used_first_case = true; 884 return r->first_case; 885 } 886 887 c = case_create (r->proto); 888 case_set_missing (c); 889 890 if (r->start_col == -1) 891 r->start_col = r->rsd.min_col; 892 893 894 while ((r->rsd.state == STATE_CELL || r->rsd.state == STATE_CELLS_START) 895 && r->rsd.row == current_row && (ret = xmlTextReaderRead (r->rsd.xtr))) 896 { 897 process_node (r, &r->rsd); 898 899 if (r->rsd.state == STATE_CELL && r->rsd.node_type == XML_READER_TYPE_ELEMENT) 900 { 901 xmlChar *attr = 902 xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType")); 903 904 r->vtype = _xmlchar_to_int (attr); 905 906 xmlFree (attr); 907 } 908 909 if (r->rsd.col < r->start_col || (r->stop_col != -1 && 910 r->rsd.col > r->stop_col)) 911 continue; 912 913 if (r->rsd.col - r->start_col >= caseproto_get_n_widths (r->proto)) 914 continue; 915 916 if (r->stop_row != -1 && r->rsd.row > r->stop_row) 917 break; 918 919 920 if (r->rsd.node_type == XML_READER_TYPE_TEXT) 921 { 922 xmlChar *value = xmlTextReaderValue (r->rsd.xtr); 923 const int idx = r->rsd.col - r->start_col; 924 const struct variable *var = dict_get_var (r->dict, idx); 925 926 convert_xml_string_to_value (c, var, value, r->vtype, 927 r->rsd.col, r->rsd.row); 928 929 xmlFree (value); 930 } 931 } 932 933 if (ret == 1) 934 return c; 935 else 936 { 937 case_unref (c); 938 return NULL; 939 } 940 } 941