1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 *
3 * Copyright 2004 Komarov Valery
4 * Copyright 2006 Christophe Leitienne
5 * Copyright 2008-2017 David Hoerl
6 * Copyright 2013 Bob Colbert
7 * Copyright 2013-2018 Evan Miller
8 *
9 * This file is part of libxls -- A multiplatform, C/C++ library for parsing
10 * Excel(TM) files.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions are met:
14 *
15 * 1. Redistributions of source code must retain the above copyright notice,
16 * this list of conditions and the following disclaimer.
17 *
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS
23 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
26 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
29 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
30 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
31 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
32 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36 #include "config.h"
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <stddef.h>
41 #include <errno.h>
42
43 #include <memory.h>
44 #include <math.h>
45 #include <sys/types.h>
46 #include <string.h>
47 #include <wchar.h>
48
49 #include "libxls/endian.h"
50 #include "libxls/xls.h"
51
52 #ifndef min
53 #define min(a,b) ((a) < (b) ? (a) : (b))
54 #endif
55
56 //#define DEBUG_DRAWINGS
57 int xls_debug = 0;
58
59 static double NumFromRk(DWORD drk);
60 static xls_formula_handler formula_handler;
61
62 static xls_error_t xls_addSST(xlsWorkBook* pWB, SST* sst, DWORD size);
63 static xls_error_t xls_appendSST(xlsWorkBook* pWB, BYTE* buf, DWORD size);
64 static xls_error_t xls_addFormat(xlsWorkBook* pWB, FORMAT* format, DWORD size);
65 static char* xls_addSheet(xlsWorkBook* pWB, BOUNDSHEET* bs, DWORD size);
66 static xls_error_t xls_addRow(xlsWorkSheet* pWS,ROW* row);
67 static xls_error_t xls_makeTable(xlsWorkSheet* pWS);
68 static struct st_cell_data *xls_addCell(xlsWorkSheet* pWS, BOF* bof, BYTE* buf);
69 static char *xls_addFont(xlsWorkBook* pWB, FONT* font, DWORD size);
70 static xls_error_t xls_addXF8(xlsWorkBook* pWB, XF8* xf);
71 static xls_error_t xls_addXF5(xlsWorkBook* pWB, XF5* xf);
72 static xls_error_t xls_addColinfo(xlsWorkSheet* pWS, COLINFO* colinfo);
73 static xls_error_t xls_mergedCells(xlsWorkSheet* pWS, BOF* bof, BYTE* buf);
74 static xls_error_t xls_preparseWorkSheet(xlsWorkSheet* pWS);
75 static xls_error_t xls_formatColumn(xlsWorkSheet* pWS);
76 static void xls_dumpSummary(char *buf, int isSummary, xlsSummaryInfo *pSI);
77
78 #if defined(_AIX) || defined(__sun)
79 #pragma pack(1)
80 #else
81 #pragma pack(push, 1)
82 #endif
83
84 typedef struct {
85 uint32_t format[4];
86 uint32_t offset;
87 } sectionList;
88
89 typedef struct {
90 uint16_t sig;
91 uint16_t _empty;
92 uint32_t os;
93 uint32_t format[4];
94 uint32_t count;
95 sectionList secList[1];
96 } header;
97
98 typedef struct {
99 uint32_t propertyID;
100 uint32_t sectionOffset;
101 } propertyList;
102
103 typedef struct {
104 uint32_t length;
105 uint32_t numProperties;
106 propertyList properties[1];
107 } sectionHeader;
108
109 typedef struct {
110 uint32_t propertyID;
111 uint32_t data[1];
112 } property;
113
114 #pragma pack(pop)
115
xls(int debug)116 int xls(int debug)
117 {
118 xls_debug = debug;
119 return 1;
120 }
121
xls_addSST(xlsWorkBook * pWB,SST * sst,DWORD size)122 static xls_error_t xls_addSST(xlsWorkBook* pWB,SST* sst,DWORD size)
123 {
124 verbose("xls_addSST");
125
126 pWB->sst.continued=0;
127 pWB->sst.lastln=0;
128 pWB->sst.lastid=0;
129 pWB->sst.lastrt=0;
130 pWB->sst.lastsz=0;
131
132 if (sst->num > (1<<24))
133 return LIBXLS_ERROR_MALLOC;
134
135 if (pWB->sst.string)
136 return LIBXLS_ERROR_PARSE;
137
138 if ((pWB->sst.string = calloc(pWB->sst.count = sst->num,
139 sizeof(struct str_sst_string))) == NULL)
140 return LIBXLS_ERROR_MALLOC;
141
142 return xls_appendSST(pWB, sst->strings, size - offsetof(SST, strings));
143 }
144
xls_appendSST(xlsWorkBook * pWB,BYTE * buf,DWORD size)145 static xls_error_t xls_appendSST(xlsWorkBook* pWB, BYTE* buf, DWORD size)
146 {
147 DWORD ln; // String character count
148 DWORD ofs; // Current offset in SST buffer
149 DWORD rt; // Count of rich text formatting runs
150 DWORD sz; // Size of asian phonetic settings block
151 BYTE flag; // String flags
152 char* ret = NULL;
153
154 if (xls_debug) {
155 printf("xls_appendSST %u\n", size);
156 }
157
158 sz = rt = ln = 0; // kch
159 ofs=0;
160
161 while(ofs<size)
162 {
163 int ln_toread;
164
165 // Restore state when we're in a continue record
166 // or read string length
167 if (pWB->sst.continued) {
168 ln=pWB->sst.lastln;
169 rt=pWB->sst.lastrt;
170 sz=pWB->sst.lastsz;
171 } else {
172 if (ofs + 2 > size) {
173 return LIBXLS_ERROR_PARSE;
174 }
175 ln = buf[ofs+0] + (buf[ofs+1] << 8);
176 rt = 0;
177 sz = 0;
178
179 ofs+=2;
180 }
181
182 if (xls_debug) {
183 printf("ln=%u\n", ln);
184 }
185
186 // Read flags
187 if ( !pWB->sst.continued || (pWB->sst.continued && ln != 0) ) {
188 if (ofs + sizeof(BYTE) > size) {
189 return LIBXLS_ERROR_PARSE;
190 }
191 flag=*(BYTE *)(buf+ofs);
192 ofs++;
193
194 // Count of rich text formatting runs
195 if (flag & 0x8) {
196 if (ofs + sizeof(WORD) > size) {
197 return LIBXLS_ERROR_PARSE;
198 }
199 rt = buf[ofs+0] + (buf[ofs+1] << 8);
200 ofs+=2;
201 }
202
203 // Size of asian phonetic settings block
204 if (flag & 0x4) {
205 if (ofs + sizeof(DWORD) > size) {
206 return LIBXLS_ERROR_PARSE;
207 }
208 sz = buf[ofs+0] + (buf[ofs+1] << 8) + (buf[ofs+2] << 16) + ((DWORD)buf[ofs+3] << 24);
209 ofs+=4;
210
211 if (xls_debug) {
212 printf("sz=%u\n", sz);
213 }
214 }
215 } else {
216 flag = 0;
217 }
218
219 // Read characters (compressed or not)
220 ln_toread = 0;
221 if (ln > 0) {
222 if (flag & 0x1) {
223 size_t new_len = 0;
224 ln_toread = min((size-ofs)/2, ln);
225 ret=unicode_decode((char *)buf+ofs,ln_toread*2,&new_len,pWB->charset);
226
227 if (ret == NULL)
228 {
229 ret = strdup("*failed to decode utf16*");
230 new_len = strlen(ret);
231 }
232
233 ret = realloc(ret,new_len+1);
234 ret[new_len]=0;
235
236 ln -= ln_toread;
237 ofs+=ln_toread*2;
238
239 if (xls_debug) {
240 printf("String16SST: %s(%lu)\n", ret, (unsigned long)new_len);
241 }
242 } else {
243 ln_toread = min((size-ofs), ln);
244
245 ret = utf8_decode((char *)buf+ofs, ln_toread, pWB->charset);
246
247 ln -= ln_toread;
248 ofs += ln_toread;
249
250 if (xls_debug) {
251 printf("String8SST: %s(%u) \n",ret,ln);
252 }
253 }
254 } else {
255 ret = strdup("");
256 }
257
258 if (ln_toread > 0 || !pWB->sst.continued) {
259 // Concat string if it's a continue, or add string in table
260 if (!pWB->sst.continued) {
261 if (pWB->sst.lastid >= pWB->sst.count) {
262 free(ret);
263 return LIBXLS_ERROR_PARSE;
264 }
265 pWB->sst.lastid++;
266 pWB->sst.string[pWB->sst.lastid-1].str=ret;
267 } else {
268 char *tmp = pWB->sst.string[pWB->sst.lastid-1].str;
269 if (tmp == NULL) {
270 free(ret);
271 return LIBXLS_ERROR_PARSE;
272 }
273 tmp = realloc(tmp, strlen(tmp)+strlen(ret)+1);
274 if (tmp == NULL) {
275 free(ret);
276 return LIBXLS_ERROR_MALLOC;
277 }
278 pWB->sst.string[pWB->sst.lastid-1].str=tmp;
279 memcpy(tmp+strlen(tmp), ret, strlen(ret)+1);
280 free(ret);
281 }
282
283 if (xls_debug) {
284 printf("String %4u: %s<end>\n", pWB->sst.lastid-1, pWB->sst.string[pWB->sst.lastid-1].str);
285 }
286 } else {
287 free(ret);
288 }
289
290 // Jump list of rich text formatting runs
291 if (ofs < size && rt > 0) {
292 int rt_toread = min((size-ofs)/4, rt);
293 rt -= rt_toread;
294 ofs += rt_toread*4;
295 }
296
297 // Jump asian phonetic settings block
298 if (ofs < size && sz > 0) {
299 int sz_toread = min((size-ofs), sz);
300 sz -= sz_toread;
301 ofs += sz_toread;
302 }
303
304 pWB->sst.continued=0;
305 }
306
307 // Save current character count and count of rich text formatting runs and size of asian phonetic settings block
308 if (ln > 0 || rt > 0 || sz > 0) {
309 pWB->sst.continued = 1;
310 pWB->sst.lastln = ln;
311 pWB->sst.lastrt = rt;
312 pWB->sst.lastsz = sz;
313
314 if (xls_debug) {
315 printf("continued: ln=%u, rt=%u, sz=%u\n", ln, rt, sz);
316 }
317 }
318
319 return LIBXLS_OK;
320 }
321
NumFromRk(DWORD drk)322 static double NumFromRk(DWORD drk)
323 {
324 double ret;
325
326 // What kind of value is this ?
327 if (drk & 0x02) {
328 // Integer value
329 int tmp = (int)drk >> 2; // cast to keep it negative in < 0
330 ret = (double)tmp;
331 } else {
332 // Floating point value;
333 unsigned64_t tmp = drk & 0xfffffffc;
334 tmp <<= 32;
335 memcpy(&ret, &tmp, sizeof(unsigned64_t));
336 }
337 // Is value multiplied by 100 ?
338 if (drk & 0x01) {
339 ret /= 100.0;
340 }
341 return ret;
342 }
343
xls_addSheet(xlsWorkBook * pWB,BOUNDSHEET * bs,DWORD size)344 static char * xls_addSheet(xlsWorkBook* pWB, BOUNDSHEET *bs, DWORD size)
345 {
346 char * name;
347 DWORD filepos;
348 BYTE visible, type;
349
350 filepos = bs->filepos;
351 visible = bs->visible;
352 type = bs->type;
353
354 // printf("charset=%s uni=%d\n", pWB->charset, unicode);
355 // printf("bs name %.*s\n", bs->name[0], bs->name+1);
356 name = get_string(bs->name, size - offsetof(BOUNDSHEET, name), 0, pWB->is5ver, pWB->charset);
357 // printf("name=%s\n", name);
358
359 if(xls_debug) {
360 printf ("xls_addSheet[0x%x]\n", type);
361 switch (type & 0x0f)
362 {
363 case 0x00:
364 /* worksheet or dialog sheet */
365 printf ("85: Worksheet or dialog sheet\n");
366 break;
367 case 0x01:
368 /* Microsoft Excel 4.0 macro sheet */
369 printf ("85: Microsoft Excel 4.0 macro sheet\n");
370 break;
371 case 0x02:
372 /* Chart */
373 printf ("85: Chart sheet\n");
374 break;
375 case 0x06:
376 /* Visual Basic module */
377 printf ("85: Visual Basic sheet\n");
378 break;
379 default:
380 printf ("???\n");
381 break;
382 }
383 printf("visible: %x\n", visible);
384 printf(" Pos: %Xh\n",filepos);
385 printf(" type: %.4Xh\n",type);
386 printf(" name: %s\n", name);
387 }
388
389 pWB->sheets.sheet = realloc(pWB->sheets.sheet,(pWB->sheets.count+1)*sizeof (struct st_sheet_data));
390 if (pWB->sheets.sheet == NULL)
391 return NULL;
392
393 pWB->sheets.sheet[pWB->sheets.count].name=name;
394 pWB->sheets.sheet[pWB->sheets.count].filepos=filepos;
395 pWB->sheets.sheet[pWB->sheets.count].visibility=visible;
396 pWB->sheets.sheet[pWB->sheets.count].type=type;
397 pWB->sheets.count++;
398
399 return name;
400 }
401
402
xls_addRow(xlsWorkSheet * pWS,ROW * row)403 static xls_error_t xls_addRow(xlsWorkSheet* pWS,ROW* row)
404 {
405 struct st_row_data* tmp;
406
407 //verbose ("xls_addRow");
408
409 if (row->index > pWS->rows.lastrow)
410 return LIBXLS_ERROR_PARSE;
411
412 tmp=&pWS->rows.row[row->index];
413 tmp->height=row->height;
414 tmp->fcell=row->fcell;
415 tmp->lcell=row->lcell;
416 tmp->flags=row->flags;
417 tmp->xf=row->xf&0xfff;
418 tmp->xfflags=(row->xf >> 8)&0xf0;
419 if(xls_debug) xls_showROW(tmp);
420
421 return LIBXLS_OK;
422 }
423
xls_makeTable(xlsWorkSheet * pWS)424 static xls_error_t xls_makeTable(xlsWorkSheet* pWS)
425 {
426 DWORD i,t;
427 struct st_row_data* tmp;
428 verbose ("xls_makeTable");
429
430 if ((pWS->rows.row = calloc((pWS->rows.lastrow+1),sizeof(struct st_row_data))) == NULL)
431 return LIBXLS_ERROR_MALLOC;
432
433 // printf("ALLOC: rows=%d cols=%d\n", pWS->rows.lastrow, pWS->rows.lastcol);
434 for (t=0;t<=pWS->rows.lastrow;t++)
435 {
436 tmp=&pWS->rows.row[t];
437 tmp->index=t;
438 tmp->fcell=0;
439 tmp->lcell=pWS->rows.lastcol;
440
441 tmp->cells.count = pWS->rows.lastcol+1;
442 if ((tmp->cells.cell = calloc(tmp->cells.count, sizeof(struct st_cell_data))) == NULL)
443 return LIBXLS_ERROR_MALLOC;
444
445 for (i=0;i<=pWS->rows.lastcol;i++)
446 {
447 tmp->cells.cell[i].col = i;
448 tmp->cells.cell[i].row = t;
449 tmp->cells.cell[i].width = pWS->defcolwidth;
450 tmp->cells.cell[i].id = XLS_RECORD_BLANK;
451 }
452 }
453 return LIBXLS_OK;
454 }
455
xls_isCellTooSmall(xlsWorkBook * pWB,BOF * bof,BYTE * buf)456 int xls_isCellTooSmall(xlsWorkBook* pWB, BOF* bof, BYTE* buf) {
457 if (bof->size < sizeof(COL))
458 return 1;
459
460 if (bof->id == XLS_RECORD_FORMULA || bof->id == XLS_RECORD_FORMULA_ALT)
461 return (bof->size < sizeof(FORMULA));
462
463 if (bof->id == XLS_RECORD_MULRK)
464 return (bof->size < offsetof(MULRK, rk));
465
466 if (bof->id == XLS_RECORD_MULBLANK)
467 return (bof->size < offsetof(MULBLANK, xf));
468
469 if (bof->id == XLS_RECORD_LABELSST)
470 return (bof->size < offsetof(LABEL, value) + (pWB->is5ver ? 2 : 4));
471
472 if (bof->id == XLS_RECORD_LABEL) {
473 if (bof->size < offsetof(LABEL, value) + 2)
474 return 1;
475
476 size_t label_len = ((LABEL*)buf)->value[0] + (((LABEL*)buf)->value[1] << 8);
477 if (pWB->is5ver) {
478 return (bof->size < offsetof(LABEL, value) + 2 + label_len);
479 }
480
481 if (bof->size < offsetof(LABEL, value) + 3)
482 return 1;
483
484 if ((((LABEL*)buf)->value[2] & 0x01) == 0) {
485 return (bof->size < offsetof(LABEL, value) + 3 + label_len);
486 }
487 return (bof->size < offsetof(LABEL, value) + 3 + 2 * label_len);
488 }
489
490 if (bof->id == XLS_RECORD_RK)
491 return (bof->size < sizeof(RK));
492
493 if (bof->id == XLS_RECORD_NUMBER)
494 return (bof->size < sizeof(BR_NUMBER));
495
496 if (bof->id == XLS_RECORD_BOOLERR)
497 return (bof->size < sizeof(BOOLERR));
498
499 return 0;
500 }
501
xls_cell_set_str(struct st_cell_data * cell,char * str)502 void xls_cell_set_str(struct st_cell_data *cell, char *str) {
503 if (cell->str) {
504 free(cell->str);
505 }
506 cell->str = str;
507 }
508
xls_addCell(xlsWorkSheet * pWS,BOF * bof,BYTE * buf)509 static struct st_cell_data *xls_addCell(xlsWorkSheet* pWS,BOF* bof,BYTE* buf)
510 {
511 struct st_cell_data* cell;
512 struct st_row_data* row;
513 WORD col;
514 int i;
515
516 verbose ("xls_addCell");
517
518 if (xls_isCellTooSmall(pWS->workbook, bof, buf))
519 return NULL;
520
521 // printf("ROW: %u COL: %u\n", xlsShortVal(((COL*)buf)->row), xlsShortVal(((COL*)buf)->col));
522 row=&pWS->rows.row[xlsShortVal(((COL*)buf)->row)];
523
524 col = xlsShortVal(((COL*)buf)->col);
525 if (col >= row->cells.count) {
526 if (xls_debug) fprintf(stderr, "Error: Column index out of bounds\n");
527 return NULL;
528 }
529 cell = &row->cells.cell[col];
530
531 cell->id=bof->id;
532 cell->xf=xlsShortVal(((COL*)buf)->xf);
533
534 switch (bof->id)
535 {
536 case XLS_RECORD_FORMULA:
537 case XLS_RECORD_FORMULA_ALT:
538 xlsConvertFormula((FORMULA *)buf);
539 cell->id=XLS_RECORD_FORMULA;
540 if (((FORMULA*)buf)->res!=0xffff) {
541 // if a double, then set double and clear l
542 cell->l=0;
543 memcpy(&cell->d, &((FORMULA*)buf)->resid, sizeof(double)); // Required for ARM
544 cell->id = XLS_RECORD_NUMBER; // hack
545 xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL));
546 cell->id = bof->id;
547 } else {
548 double d = ((FORMULA*)buf)->resdata[1];
549 cell->l = 0xFFFF;
550 switch(((FORMULA*)buf)->resid) {
551 case 0: // String
552 break; // cell is half complete, get the STRING next record
553 case 1: // Boolean
554 memcpy(&cell->d, &d, sizeof(double)); // Required for ARM
555 xls_cell_set_str(cell, strdup("bool"));
556 break;
557 case 2: // error
558 memcpy(&cell->d, &d, sizeof(double)); // Required for ARM
559 xls_cell_set_str(cell, strdup("error"));
560 break;
561 case 3: // empty string
562 xls_cell_set_str(cell, strdup(""));
563 break;
564 }
565 }
566 if(formula_handler) formula_handler(bof->id, bof->size, buf);
567 break;
568 case XLS_RECORD_MULRK:
569 for (i = 0; i < (bof->size - 6)/6; i++) // 6 == 2 row + 2 col + 2 trailing index
570 {
571 WORD index = col + i;
572 if(index >= row->cells.count) {
573 if (xls_debug) fprintf(stderr, "Error: MULTI-RK index out of bounds\n");
574 return NULL;
575 }
576 cell=&row->cells.cell[index];
577 cell->id=XLS_RECORD_RK;
578 cell->xf=xlsShortVal(((MULRK*)buf)->rk[i].xf);
579 cell->d=NumFromRk(xlsIntVal(((MULRK*)buf)->rk[i].value));
580 xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL));
581 }
582 break;
583 case XLS_RECORD_MULBLANK:
584 for (i = 0; i < (bof->size - 6)/2; i++) // 6 == 2 row + 2 col + 2 trailing index
585 {
586 WORD index = col + i;
587 if(index >= row->cells.count) {
588 if (xls_debug) fprintf(stderr, "Error: MULTI-BLANK index out of bounds\n");
589 return NULL;
590 }
591 cell=&row->cells.cell[index];
592 cell->id=XLS_RECORD_BLANK;
593 cell->xf=xlsShortVal(((MULBLANK*)buf)->xf[i]);
594 xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL));
595 }
596 break;
597 case XLS_RECORD_LABELSST:
598 case XLS_RECORD_LABEL:
599 xls_cell_set_str(cell, xls_getfcell(pWS->workbook, cell, ((LABEL*)buf)->value));
600 if (cell->str) {
601 sscanf((char *)cell->str, "%d", &cell->l);
602 sscanf((char *)cell->str, "%lf", &cell->d);
603 }
604 break;
605 case XLS_RECORD_RK:
606 cell->d=NumFromRk(xlsIntVal(((RK*)buf)->value));
607 xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL));
608 break;
609 case XLS_RECORD_BLANK:
610 break;
611 case XLS_RECORD_NUMBER:
612 xlsConvertDouble((BYTE *)&((BR_NUMBER*)buf)->value);
613 memcpy(&cell->d, &((BR_NUMBER*)buf)->value, sizeof(double)); // Required for ARM
614 xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL));
615 break;
616 case XLS_RECORD_BOOLERR:
617 cell->d = ((BOOLERR *)buf)->value;
618 if (((BOOLERR *)buf)->iserror) {
619 xls_cell_set_str(cell, strdup("error"));
620 } else {
621 xls_cell_set_str(cell, strdup("bool"));
622 }
623 break;
624 default:
625 xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL));
626 break;
627 }
628 if (xls_debug) xls_showCell(cell);
629
630 return cell;
631 }
632
xls_addFont(xlsWorkBook * pWB,FONT * font,DWORD size)633 static char *xls_addFont(xlsWorkBook* pWB, FONT* font, DWORD size)
634 {
635 struct st_font_data* tmp;
636
637 verbose("xls_addFont");
638
639 pWB->fonts.font = realloc(pWB->fonts.font,(pWB->fonts.count+1)*sizeof(struct st_font_data));
640 if (pWB->fonts.font == NULL)
641 return NULL;
642
643 tmp=&pWB->fonts.font[pWB->fonts.count];
644
645 tmp->name = get_string(font->name, size - offsetof(FONT, name), 0, pWB->is5ver, pWB->charset);
646
647 tmp->height=font->height;
648 tmp->flag=font->flag;
649 tmp->color=font->color;
650 tmp->bold=font->bold;
651 tmp->escapement=font->escapement;
652 tmp->underline=font->underline;
653 tmp->family=font->family;
654 tmp->charset=font->charset;
655
656 // xls_showFont(tmp);
657 pWB->fonts.count++;
658
659 return tmp->name;
660 }
661
xls_addFormat(xlsWorkBook * pWB,FORMAT * format,DWORD size)662 static xls_error_t xls_addFormat(xlsWorkBook* pWB, FORMAT* format, DWORD size)
663 {
664 struct st_format_data* tmp;
665
666 verbose("xls_addFormat");
667 pWB->formats.format = realloc(pWB->formats.format, (pWB->formats.count+1)*sizeof(struct st_format_data));
668 if (pWB->formats.format == NULL)
669 return LIBXLS_ERROR_MALLOC;
670
671 tmp = &pWB->formats.format[pWB->formats.count];
672 tmp->index = format->index;
673 tmp->value = get_string(format->value, size - offsetof(FORMAT, value), (BYTE)!pWB->is5ver, (BYTE)pWB->is5ver, pWB->charset);
674 if(xls_debug) xls_showFormat(tmp);
675 pWB->formats.count++;
676
677 return LIBXLS_OK;
678 }
679
xls_addXF8(xlsWorkBook * pWB,XF8 * xf)680 static xls_error_t xls_addXF8(xlsWorkBook* pWB,XF8* xf)
681 {
682 struct st_xf_data* tmp;
683
684 verbose("xls_addXF");
685 pWB->xfs.xf= realloc(pWB->xfs.xf, (pWB->xfs.count+1)*sizeof(struct st_xf_data));
686 if (pWB->xfs.xf == NULL)
687 return LIBXLS_ERROR_MALLOC;
688
689 tmp=&pWB->xfs.xf[pWB->xfs.count];
690
691 tmp->font=xf->font;
692 tmp->format=xf->format;
693 tmp->type=xf->type;
694 tmp->align=xf->align;
695 tmp->rotation=xf->rotation;
696 tmp->ident=xf->ident;
697 tmp->usedattr=xf->usedattr;
698 tmp->linestyle=xf->linestyle;
699 tmp->linecolor=xf->linecolor;
700 tmp->groundcolor=xf->groundcolor;
701
702 // xls_showXF(tmp);
703 pWB->xfs.count++;
704
705 return LIBXLS_OK;
706 }
707
xls_addXF5(xlsWorkBook * pWB,XF5 * xf)708 static xls_error_t xls_addXF5(xlsWorkBook* pWB,XF5* xf)
709 {
710 struct st_xf_data* tmp;
711
712 verbose("xls_addXF");
713 pWB->xfs.xf = realloc(pWB->xfs.xf, (pWB->xfs.count+1)*sizeof(struct st_xf_data));
714 if (pWB->xfs.xf == NULL)
715 return LIBXLS_ERROR_MALLOC;
716
717 tmp=&pWB->xfs.xf[pWB->xfs.count];
718
719 tmp->font=xf->font;
720 tmp->format=xf->format;
721 tmp->type=xf->type;
722 tmp->align=(BYTE)xf->align;
723 /*
724 tmp->rotation=xf->rotation;
725 tmp->ident=xf->ident;
726 tmp->usedattr=xf->usedattr;
727 tmp->linestyle=xf->linestyle;
728 tmp->linecolor=xf->linecolor;
729 tmp->groundcolor=xf->groundcolor;
730 */
731
732 // xls_showXF(tmp);
733 pWB->xfs.count++;
734 return LIBXLS_OK;
735 }
736
xls_addColinfo(xlsWorkSheet * pWS,COLINFO * colinfo)737 static xls_error_t xls_addColinfo(xlsWorkSheet* pWS,COLINFO* colinfo)
738 {
739 struct st_colinfo_data* tmp;
740
741 verbose("xls_addColinfo");
742 pWS->colinfo.col = realloc(pWS->colinfo.col,(pWS->colinfo.count+1)*sizeof(struct st_colinfo_data));
743 if (pWS->colinfo.col == NULL)
744 return LIBXLS_ERROR_MALLOC;
745
746 tmp=&pWS->colinfo.col[pWS->colinfo.count];
747 tmp->first=colinfo->first;
748 tmp->last=colinfo->last;
749 tmp->width=colinfo->width;
750 tmp->xf=colinfo->xf;
751 tmp->flags=colinfo->flags;
752
753 if(xls_debug) xls_showColinfo(tmp);
754 pWS->colinfo.count++;
755
756 return LIBXLS_OK;
757 }
758
xls_mergedCells(xlsWorkSheet * pWS,BOF * bof,BYTE * buf)759 static xls_error_t xls_mergedCells(xlsWorkSheet* pWS,BOF* bof,BYTE* buf)
760 {
761 if (bof->size < sizeof(WORD))
762 return LIBXLS_ERROR_PARSE;
763
764 int count = buf[0] + (buf[1] << 8);
765 DWORD limit = sizeof(WORD)+count*sizeof(struct MERGEDCELLS);
766 if(limit > (DWORD)bof->size) {
767 verbose("Merged Cells Count out of range");
768 return LIBXLS_ERROR_PARSE;
769 }
770 int i,c,r;
771 struct MERGEDCELLS *span;
772 verbose("Merged Cells");
773 for (i=0;i<count;i++)
774 {
775 span=(struct MERGEDCELLS*)(buf+(2+i*sizeof(struct MERGEDCELLS)));
776 xlsConvertMergedcells(span);
777 // printf("Merged Cells: [%i,%i] [%i,%i] \n",span->colf,span->rowf,span->coll,span->rowl);
778 // Sanity check:
779 if(!( span->rowf <= span->rowl &&
780 span->rowl <= pWS->rows.lastrow &&
781 span->colf <= span->coll &&
782 span->coll <= pWS->rows.lastcol
783 )) {
784 return LIBXLS_ERROR_PARSE;
785 }
786
787 for (r=span->rowf;r<=span->rowl;r++)
788 for (c=span->colf;c<=span->coll;c++)
789 pWS->rows.row[r].cells.cell[c].isHidden=1;
790 pWS->rows.row[span->rowf].cells.cell[span->colf].colspan=(span->coll-span->colf+1);
791 pWS->rows.row[span->rowf].cells.cell[span->colf].rowspan=(span->rowl-span->rowf+1);
792 pWS->rows.row[span->rowf].cells.cell[span->colf].isHidden=0;
793 }
794 return LIBXLS_OK;
795 }
796
xls_isRecordTooSmall(xlsWorkBook * pWB,BOF * bof1)797 int xls_isRecordTooSmall(xlsWorkBook *pWB, BOF *bof1) {
798 switch (bof1->id) {
799 case XLS_RECORD_BOF: // BIFF5-8
800 return (bof1->size < 2 * sizeof(WORD));
801 case XLS_RECORD_CODEPAGE:
802 return (bof1->size < sizeof(WORD));
803 case XLS_RECORD_WINDOW1:
804 return (bof1->size < sizeof(WIND1));
805 case XLS_RECORD_SST:
806 return (bof1->size < offsetof(SST, strings));
807 case XLS_RECORD_BOUNDSHEET:
808 return (bof1->size < offsetof(BOUNDSHEET, name));
809 case XLS_RECORD_XF:
810 if(pWB->is5ver) {
811 return (bof1->size < sizeof(XF5));
812 }
813 return (bof1->size < sizeof(XF8));
814 case XLS_RECORD_FONT:
815 case XLS_RECORD_FONT_ALT:
816 return (bof1->size < offsetof(FONT, name));
817 case XLS_RECORD_FORMAT:
818 return (bof1->size < offsetof(FORMAT, value));
819 case XLS_RECORD_1904:
820 return (bof1->size < sizeof(BYTE));
821 default:
822 break;
823 }
824 return 0;
825 }
826
xls_parseWorkBook(xlsWorkBook * pWB)827 xls_error_t xls_parseWorkBook(xlsWorkBook* pWB)
828 {
829 BOF bof1 = { .id = 0, .size = 0 };
830 BOF bof2 = { .id = 0, .size = 0 };
831 BYTE* buf = NULL;
832 BYTE once = 0;
833 xls_error_t retval = LIBXLS_OK;
834
835 verbose ("xls_parseWorkBook");
836 do {
837 if(xls_debug > 10) {
838 printf("READ WORKBOOK filePos=%ld\n", (long)pWB->filepos);
839 printf(" OLE: start=%d pos=%u size=%u fatPos=%u\n",
840 pWB->olestr->start, (unsigned int)pWB->olestr->pos,
841 (unsigned int)pWB->olestr->size, (unsigned int)pWB->olestr->fatpos);
842 }
843
844 if (ole2_read(&bof1, 1, 4, pWB->olestr) != 4) {
845 retval = LIBXLS_ERROR_READ;
846 goto cleanup;
847 }
848 xlsConvertBof(&bof1);
849 if(xls_debug) xls_showBOF(&bof1);
850
851 if (bof1.size) {
852 if ((buf = realloc(buf, bof1.size)) == NULL) {
853 if (xls_debug) fprintf(stderr, "Error: failed to allocate buffer of size %d\n", (int)bof1.size);
854 retval = LIBXLS_ERROR_MALLOC;
855 goto cleanup;
856 }
857 if (ole2_read(buf, 1, bof1.size, pWB->olestr) != bof1.size) {
858 if (xls_debug) fprintf(stderr, "Error: failed to read OLE block\n");
859 retval = LIBXLS_ERROR_READ;
860 goto cleanup;
861 }
862 }
863
864 if (xls_isRecordTooSmall(pWB, &bof1)) {
865 retval = LIBXLS_ERROR_PARSE;
866 goto cleanup;
867 }
868
869 switch (bof1.id) {
870 case XLS_RECORD_EOF:
871 //verbose("EOF");
872 break;
873 case XLS_RECORD_BOF: // BIFF5-8
874 pWB->is5ver = (buf[0] + (buf[1] << 8) != 0x600);
875 pWB->type = buf[2] + (buf[3] << 8);
876
877 if(xls_debug) {
878 printf("version: %s\n", pWB->is5ver ? "BIFF5" : "BIFF8" );
879 printf(" type: %.2X\n", pWB->type);
880 }
881 break;
882
883 case XLS_RECORD_CODEPAGE:
884 pWB->codepage = buf[0] + (buf[1] << 8);
885 if(xls_debug) printf("codepage=%x\n", pWB->codepage);
886 break;
887
888 case XLS_RECORD_CONTINUE:
889 if(once) {
890 if (bof2.id==XLS_RECORD_SST) {
891 if ((retval = xls_appendSST(pWB,buf,bof1.size)) != LIBXLS_OK)
892 goto cleanup;
893 }
894 bof1=bof2;
895 }
896 break;
897
898 case XLS_RECORD_WINDOW1:
899 {
900 WIND1 *w = (WIND1*)buf;
901 xlsConvertWindow(w);
902 pWB->activeSheetIdx = w->itabCur;
903 if(xls_debug) {
904 printf("WINDOW1: ");
905 printf("xWn : %d\n", w->xWn/20);
906 printf("yWn : %d\n", w->yWn/20);
907 printf("dxWn : %d\n", w->dxWn/20);
908 printf("dyWn : %d\n", w->dyWn/20);
909 printf("grbit : %d\n", w->grbit);
910 printf("itabCur: %d\n", w->itabCur);
911 printf("itabFi : %d\n", w->itabFirst);
912 printf("ctabSel: %d\n", w->ctabSel);
913 printf("wTabRat: %d\n", w->wTabRatio);
914 }
915 }
916 break;
917
918 case XLS_RECORD_SST:
919 //printf("ADD SST\n");
920 xlsConvertSst((SST *)buf);
921 if ((retval = xls_addSST(pWB,(SST*)buf,bof1.size)) != LIBXLS_OK) {
922 goto cleanup;
923 }
924 break;
925
926 case XLS_RECORD_EXTSST:
927 break;
928
929 case XLS_RECORD_BOUNDSHEET:
930 {
931 //printf("ADD SHEET\n");
932 BOUNDSHEET *bs = (BOUNDSHEET *)buf;
933 xlsConvertBoundsheet(bs);
934 //char *s;
935 // different for BIFF5 and BIFF8
936 /*s = */ xls_addSheet(pWB, bs, bof1.size);
937 }
938 break;
939
940 case XLS_RECORD_XF:
941 if(pWB->is5ver) {
942 XF5 *xf;
943 xf = (XF5 *)buf;
944 xlsConvertXf5(xf);
945
946 if ((retval = xls_addXF5(pWB,xf)) != LIBXLS_OK) {
947 goto cleanup;
948 }
949 if(xls_debug) {
950 printf(" font: %d\n", xf->font);
951 printf(" format: %d\n", xf->format);
952 printf(" type: %.4x\n", xf->type);
953 printf(" align: %.4x\n", xf->align);
954 printf("rotatio: %.4x\n", xf->color);
955 printf(" ident: %.4x\n", xf->fill);
956 printf("usedatt: %.4x\n", xf->border);
957 printf("linesty: %.4x\n", xf->linestyle);
958 }
959 } else {
960 XF8 *xf;
961 xf = (XF8 *)buf;
962 xlsConvertXf8(xf);
963
964 if ((retval = xls_addXF8(pWB,xf)) != LIBXLS_OK) {
965 goto cleanup;
966 }
967
968 if(xls_debug) {
969 xls_showXF(xf);
970 }
971 }
972 break;
973
974 case XLS_RECORD_FONT:
975 case XLS_RECORD_FONT_ALT:
976 {
977 char *s;
978 FONT *f = (FONT*)buf;
979 xlsConvertFont(f);
980 s = xls_addFont(pWB,f, bof1.size);
981 if(xls_debug) {
982 printf(" height: %d\n", f->height);
983 printf(" flag: 0x%x\n", f->flag);
984 printf(" color: 0x%x\n", f->color);
985 printf(" weight: %d\n", f->bold);
986 printf("escapem: 0x%x\n", f->escapement);
987 printf("underln: 0x%x\n", f->underline);
988 printf(" family: 0x%x\n", f->family);
989 printf("charset: 0x%x\n", f->charset);
990 if(s) printf(" name: %s\n", s);
991 }
992 }
993 break;
994
995 case XLS_RECORD_FORMAT:
996 xlsConvertFormat((FORMAT *)buf);
997 if ((retval = xls_addFormat(pWB, (FORMAT*)buf, bof1.size)) != LIBXLS_OK) {
998 goto cleanup;
999 }
1000 break;
1001
1002 case XLS_RECORD_STYLE:
1003 if(xls_debug) {
1004 struct { unsigned short idx; unsigned char ident; unsigned char lvl; } *styl;
1005 styl = (void *)buf;
1006
1007 printf(" idx: 0x%x\n", styl->idx & 0x07FF);
1008 if(styl->idx & 0x8000) {
1009 printf(" ident: 0x%x\n", styl->ident);
1010 printf(" level: 0x%x\n", styl->lvl);
1011 } else {
1012 char *s = get_string((char *)&buf[2], bof1.size - 2, 1, pWB->is5ver, pWB->charset);
1013 printf(" name=%s\n", s);
1014 free(s);
1015 }
1016 }
1017 break;
1018
1019 case XLS_RECORD_PALETTE:
1020 if(xls_debug > 10) {
1021 unsigned char *p = buf + 2;
1022 int idx, len;
1023
1024 len = buf[0] + (buf[1] << 8);
1025 for(idx=0; idx<len; ++idx) {
1026 printf(" Index=0x%2.2x %2.2x%2.2x%2.2x\n", idx+8, p[0], p[1], p[2] );
1027 p += 4;
1028 }
1029 }
1030 break;
1031
1032 case XLS_RECORD_1904:
1033 pWB->is1904 = *(BYTE *)buf; // the field is a short, but with little endian the first byte is 0 or 1
1034 if(xls_debug) {
1035 printf(" mode: 0x%x\n", pWB->is1904);
1036 }
1037 break;
1038
1039 case XLS_RECORD_DEFINEDNAME:
1040 if(xls_debug) {
1041 int i;
1042 printf(" DEFINEDNAME: ");
1043 for(i=0; i<bof1.size; ++i) printf("%2.2x ", buf[i]);
1044 printf("\n");
1045 }
1046 break;
1047
1048 default:
1049 if(xls_debug)
1050 {
1051 //xls_showBOF(&bof1);
1052 printf(" Not Processed in parseWoorkBook(): BOF=0x%4.4X size=%d\n", bof1.id, bof1.size);
1053 }
1054 break;
1055 }
1056 bof2=bof1;
1057 once=1;
1058 }
1059 while ((!pWB->olestr->eof)&&(bof1.id!=XLS_RECORD_EOF));
1060
1061 cleanup:
1062 if (buf)
1063 free(buf);
1064
1065 return retval;
1066 }
1067
1068
xls_preparseWorkSheet(xlsWorkSheet * pWS)1069 static xls_error_t xls_preparseWorkSheet(xlsWorkSheet* pWS)
1070 {
1071 BOF tmp;
1072 BYTE* buf = NULL;
1073 xls_error_t retval = LIBXLS_OK;
1074
1075 verbose ("xls_preparseWorkSheet");
1076
1077 if (ole2_seek(pWS->workbook->olestr,pWS->filepos) == -1) {
1078 retval = LIBXLS_ERROR_SEEK;
1079 goto cleanup;
1080 }
1081 do
1082 {
1083 size_t read;
1084 if((read = ole2_read(&tmp, 1, 4, pWS->workbook->olestr)) != 4) {
1085 if (xls_debug) fprintf(stderr, "Error: failed to read OLE size\n");
1086 retval = LIBXLS_ERROR_READ;
1087 goto cleanup;
1088 }
1089 xlsConvertBof(&tmp);
1090 if (tmp.size) {
1091 if ((buf = realloc(buf, tmp.size)) == NULL) {
1092 if (xls_debug) fprintf(stderr, "Error: failed to allocate buffer of size %d\n", (int)tmp.size);
1093 retval = LIBXLS_ERROR_MALLOC;
1094 goto cleanup;
1095 }
1096 if((read = ole2_read(buf, 1, tmp.size, pWS->workbook->olestr)) != tmp.size) {
1097 if (xls_debug) fprintf(stderr, "Error: failed to read OLE block\n");
1098 retval = LIBXLS_ERROR_READ;
1099 goto cleanup;
1100 }
1101 }
1102
1103 switch (tmp.id)
1104 {
1105 case XLS_RECORD_DEFCOLWIDTH:
1106 if (tmp.size < sizeof(WORD)) {
1107 retval = LIBXLS_ERROR_PARSE;
1108 goto cleanup;
1109 }
1110 pWS->defcolwidth = (buf[0] << 8) + (buf[1] << 16);
1111 break;
1112 case XLS_RECORD_COLINFO:
1113 if (tmp.size < sizeof(COLINFO)) {
1114 retval = LIBXLS_ERROR_PARSE;
1115 goto cleanup;
1116 }
1117 xlsConvertColinfo((COLINFO*)buf);
1118 if ((retval = xls_addColinfo(pWS,(COLINFO*)buf)) != LIBXLS_OK)
1119 goto cleanup;
1120 break;
1121 case XLS_RECORD_ROW:
1122 if (tmp.size < sizeof(ROW)) {
1123 retval = LIBXLS_ERROR_PARSE;
1124 goto cleanup;
1125 }
1126 xlsConvertRow((ROW*)buf);
1127 if (pWS->rows.lastcol<((ROW*)buf)->lcell)
1128 pWS->rows.lastcol=((ROW*)buf)->lcell;
1129 if (pWS->rows.lastrow<((ROW*)buf)->index)
1130 pWS->rows.lastrow=((ROW*)buf)->index;
1131 break;
1132 /* If the ROW record is incorrect or missing, infer the information from
1133 * cell data. */
1134 case XLS_RECORD_MULRK:
1135 if (xls_isCellTooSmall(pWS->workbook, &tmp, buf)) {
1136 retval = LIBXLS_ERROR_PARSE;
1137 goto cleanup;
1138 }
1139 if (pWS->rows.lastcol<xlsShortVal(((MULRK*)buf)->col) + (tmp.size - 6)/6 - 1)
1140 pWS->rows.lastcol=xlsShortVal(((MULRK*)buf)->col) + (tmp.size - 6)/6 - 1;
1141 if (pWS->rows.lastrow<xlsShortVal(((MULRK*)buf)->row))
1142 pWS->rows.lastrow=xlsShortVal(((MULRK*)buf)->row);
1143 break;
1144 case XLS_RECORD_MULBLANK:
1145 if (xls_isCellTooSmall(pWS->workbook, &tmp, buf)) {
1146 retval = LIBXLS_ERROR_PARSE;
1147 goto cleanup;
1148 }
1149 if (pWS->rows.lastcol<xlsShortVal(((MULBLANK*)buf)->col) + (tmp.size - 6)/2 - 1)
1150 pWS->rows.lastcol=xlsShortVal(((MULBLANK*)buf)->col) + (tmp.size - 6)/2 - 1;
1151 if (pWS->rows.lastrow<xlsShortVal(((MULBLANK*)buf)->row))
1152 pWS->rows.lastrow=xlsShortVal(((MULBLANK*)buf)->row);
1153 break;
1154 case XLS_RECORD_NUMBER:
1155 case XLS_RECORD_RK:
1156 case XLS_RECORD_LABELSST:
1157 case XLS_RECORD_BLANK:
1158 case XLS_RECORD_LABEL:
1159 case XLS_RECORD_FORMULA:
1160 case XLS_RECORD_FORMULA_ALT:
1161 case XLS_RECORD_BOOLERR:
1162 if (xls_isCellTooSmall(pWS->workbook, &tmp, buf)) {
1163 retval = LIBXLS_ERROR_PARSE;
1164 goto cleanup;
1165 }
1166 if (pWS->rows.lastcol<xlsShortVal(((COL*)buf)->col))
1167 pWS->rows.lastcol=xlsShortVal(((COL*)buf)->col);
1168 if (pWS->rows.lastrow<xlsShortVal(((COL*)buf)->row))
1169 pWS->rows.lastrow=xlsShortVal(((COL*)buf)->row);
1170 break;
1171 }
1172 if (pWS->rows.lastcol > 256) {
1173 retval = LIBXLS_ERROR_PARSE;
1174 goto cleanup;
1175 }
1176 }
1177 while ((!pWS->workbook->olestr->eof)&&(tmp.id!=XLS_RECORD_EOF));
1178
1179 cleanup:
1180 if (buf)
1181 free(buf);
1182 return retval;
1183 }
1184
xls_formatColumn(xlsWorkSheet * pWS)1185 static xls_error_t xls_formatColumn(xlsWorkSheet* pWS)
1186 {
1187 DWORD i,t,ii;
1188 DWORD fcol,lcol;
1189 WORD width;
1190 BYTE isHidden;
1191
1192 for (i=0;i<pWS->colinfo.count;i++)
1193 {
1194 width = pWS->colinfo.col[i].width;
1195 isHidden = (pWS->colinfo.col[i].flags&1);
1196 if (pWS->colinfo.col[i].first<=pWS->rows.lastcol)
1197 fcol=pWS->colinfo.col[i].first;
1198 else
1199 fcol=pWS->rows.lastcol;
1200
1201 if (pWS->colinfo.col[i].last<=pWS->rows.lastcol)
1202 lcol=pWS->colinfo.col[i].last;
1203 else
1204 lcol=pWS->rows.lastcol;
1205
1206 for (ii=0;ii<=pWS->rows.lastrow;ii++) {
1207 for (t=fcol;t<=lcol;t++) {
1208 pWS->rows.row[ii].cells.cell[t].isHidden |= isHidden;
1209 pWS->rows.row[ii].cells.cell[t].width = width;
1210 }
1211 }
1212 }
1213 return LIBXLS_OK;
1214 }
1215
xls_parseWorkSheet(xlsWorkSheet * pWS)1216 xls_error_t xls_parseWorkSheet(xlsWorkSheet* pWS)
1217 {
1218 BOF tmp;
1219 BYTE* buf = NULL;
1220 long offset = pWS->filepos;
1221 size_t read;
1222 xls_error_t retval = 0;
1223
1224 struct st_cell_data *cell = NULL;
1225 xlsWorkBook *pWB = pWS->workbook;
1226
1227 verbose ("xls_parseWorkSheet");
1228
1229 if ((retval = xls_preparseWorkSheet(pWS)) != LIBXLS_OK) {
1230 goto cleanup;
1231 }
1232 // printf("size=%d fatpos=%d)\n", pWS->workbook->olestr->size, pWS->workbook->olestr->fatpos);
1233
1234 if ((retval = xls_makeTable(pWS)) != LIBXLS_OK) {
1235 goto cleanup;
1236 }
1237
1238 if ((retval = xls_formatColumn(pWS)) != LIBXLS_OK) {
1239 goto cleanup;
1240 }
1241
1242 if (ole2_seek(pWS->workbook->olestr,pWS->filepos) == -1) {
1243 retval = LIBXLS_ERROR_SEEK;
1244 goto cleanup;
1245 }
1246 do
1247 {
1248 long lastPos = offset;
1249
1250 if(xls_debug > 10) {
1251 printf("LASTPOS=%ld pos=%d filePos=%d filePos=%d\n", lastPos, (int)pWB->olestr->pos, pWS->filepos, pWB->filepos);
1252 }
1253 if((read = ole2_read(&tmp, 1, 4, pWS->workbook->olestr)) != 4) {
1254 if (xls_debug) fprintf(stderr, "Error: failed to read OLE size\n");
1255 retval = LIBXLS_ERROR_READ;
1256 goto cleanup;
1257 }
1258 xlsConvertBof((BOF *)&tmp);
1259 if (tmp.size) {
1260 if ((buf = realloc(buf, tmp.size)) == NULL) {
1261 if (xls_debug) fprintf(stderr, "Error: failed to allocate buffer of size %d\n", (int)tmp.size);
1262 retval = LIBXLS_ERROR_MALLOC;
1263 goto cleanup;
1264 }
1265 if((read = ole2_read(buf, 1, tmp.size, pWS->workbook->olestr)) != tmp.size) {
1266 if (xls_debug) fprintf(stderr, "Error: failed to read OLE block\n");
1267 retval = LIBXLS_ERROR_READ;
1268 goto cleanup;
1269 }
1270 }
1271 offset += 4 + tmp.size;
1272
1273 if(xls_debug)
1274 xls_showBOF(&tmp);
1275
1276 switch (tmp.id)
1277 {
1278 case XLS_RECORD_EOF:
1279 break;
1280 case XLS_RECORD_MERGEDCELLS:
1281 if ((retval = xls_mergedCells(pWS,&tmp,buf)) != LIBXLS_OK) {
1282 goto cleanup;
1283 }
1284 break;
1285 case XLS_RECORD_ROW:
1286 if (tmp.size < sizeof(ROW)) {
1287 retval = LIBXLS_ERROR_PARSE;
1288 goto cleanup;
1289 }
1290 if(xls_debug > 10) printf("ROW: %x at pos=%ld\n", tmp.id, lastPos);
1291 xlsConvertRow((ROW *)buf);
1292 if ((retval = xls_addRow(pWS,(ROW*)buf)) != LIBXLS_OK) {
1293 goto cleanup;
1294 }
1295 break;
1296 case XLS_RECORD_DEFCOLWIDTH:
1297 if (tmp.size < sizeof(WORD)) {
1298 retval = LIBXLS_ERROR_PARSE;
1299 goto cleanup;
1300 }
1301 if(xls_debug > 10) printf("DEFAULT COL WIDTH: %d\n", ((WORD *)buf)[0]);
1302 break;
1303 case XLS_RECORD_DEFAULTROWHEIGHT:
1304 if (tmp.size < 2 * sizeof(WORD)) {
1305 retval = LIBXLS_ERROR_PARSE;
1306 goto cleanup;
1307 }
1308 if(xls_debug > 10) printf("DEFAULT ROW Height: 0x%x %d\n", ((WORD *)buf)[0], ((WORD *)buf)[1]);
1309 break;
1310 case XLS_RECORD_DBCELL:
1311 if(xls_debug > 10) {
1312 DWORD *foo = (DWORD *)buf;
1313 WORD *goo;
1314 int i;
1315 printf("DBCELL: size %d\n", tmp.size);
1316 printf("DBCELL OFFSET=%4.4u -> ROW %ld\n", foo[0], lastPos-foo[0]);
1317 ++foo;
1318 goo = (WORD *)foo;
1319 for(i=0; i<5; ++i) printf("goo[%d]=%4.4x %u\n", i, goo[i], goo[i]);
1320 }
1321 break;
1322 case XLS_RECORD_INDEX:
1323 if(xls_debug > 10) {
1324 DWORD *foo = (DWORD *)buf;
1325 int i;
1326 printf("INDEX: size %d\n", tmp.size);
1327 for(i=0; i<5; ++i) printf("FOO[%d]=%4.4x %u\n", i, foo[i], foo[i]);
1328 }
1329 #if 0
1330 0 4 4 4 8 4
1331 12 4 16 4∙nm
1332 Not used Index to first used row (rf, 0-based) Index to first row of unused tail of sheet (rl, last used row + 1, 0-based)
1333 Absolute stream position of the DEFCOLWIDTH record (➜5.32) of the current sheet. If this record does not exist, the offset points to the record at the position where the DEFCOLWIDTH record would occur.
1334 Array of nm absolute stream positions to the DBCELL record (➜5.29) of each Row Block
1335 #endif
1336 break;
1337 case XLS_RECORD_MULRK:
1338 case XLS_RECORD_MULBLANK:
1339 case XLS_RECORD_NUMBER:
1340 case XLS_RECORD_BOOLERR:
1341 case XLS_RECORD_RK:
1342 case XLS_RECORD_LABELSST:
1343 case XLS_RECORD_BLANK:
1344 case XLS_RECORD_LABEL:
1345 case XLS_RECORD_FORMULA:
1346 case XLS_RECORD_FORMULA_ALT:
1347 if ((cell = xls_addCell(pWS, &tmp, buf)) == NULL) {
1348 retval = LIBXLS_ERROR_PARSE;
1349 goto cleanup;
1350 }
1351 break;
1352 case XLS_RECORD_ARRAY:
1353 if(formula_handler) formula_handler(tmp.id, tmp.size, buf);
1354 break;
1355
1356 case XLS_RECORD_STRING:
1357 if(cell && (cell->id == XLS_RECORD_FORMULA || cell->id == XLS_RECORD_FORMULA_ALT)) {
1358 xls_cell_set_str(cell, get_string((char *)buf, tmp.size,
1359 (BYTE)!pWB->is5ver, pWB->is5ver, pWB->charset));
1360 if (xls_debug) xls_showCell(cell);
1361 }
1362 break;
1363
1364 default:
1365 if(xls_debug)
1366 {
1367 //xls_showBOF(&tmp);
1368 if (tmp.size >= sizeof(COL)) {
1369 printf(" [%d:%d]: 0x%X at pos=%lu size=%u\n", xlsShortVal(((COL*)buf)->row), xlsShortVal(((COL*)buf)->col),
1370 tmp.id, lastPos, tmp.size);
1371 } else {
1372 printf(" 0x%X at pos=%lu size=%u\n", tmp.id, lastPos, tmp.size);
1373 }
1374 }
1375 break;
1376 }
1377 }
1378 while ((!pWS->workbook->olestr->eof)&&(tmp.id!=XLS_RECORD_EOF));
1379
1380 cleanup:
1381 if (buf)
1382 free(buf);
1383
1384 return retval;
1385 }
1386
xls_getWorkSheet(xlsWorkBook * pWB,int num)1387 xlsWorkSheet * xls_getWorkSheet(xlsWorkBook* pWB,int num)
1388 {
1389 xlsWorkSheet * pWS = NULL;
1390 verbose ("xls_getWorkSheet");
1391 if (num >= 0 && num < (int)pWB->sheets.count) {
1392 pWS = calloc(1, sizeof(xlsWorkSheet));
1393 pWS->filepos=pWB->sheets.sheet[num].filepos;
1394 pWS->workbook=pWB;
1395 pWS->rows.lastcol=0;
1396 pWS->rows.lastrow=0;
1397 pWS->colinfo.count=0;
1398 }
1399 return pWS;
1400 }
1401
xls_open_ole(OLE2 * ole,const char * charset,xls_error_t * outError)1402 static xlsWorkBook *xls_open_ole(OLE2 *ole, const char *charset, xls_error_t *outError) {
1403 xlsWorkBook* pWB;
1404 xls_error_t retval = LIBXLS_OK;
1405
1406 pWB = calloc(1, sizeof(xlsWorkBook));
1407 verbose ("xls_open_ole");
1408
1409 if ((pWB->olestr=ole2_fopen(ole, "\005SummaryInformation")))
1410 {
1411 pWB->summary = calloc(1,4096);
1412 if (ole2_read(pWB->summary, 4096, 1, pWB->olestr) == -1) {
1413 if (xls_debug) fprintf(stderr, "SummaryInformation not found\n");
1414 retval = LIBXLS_ERROR_READ;
1415 goto cleanup;
1416 }
1417 ole2_fclose(pWB->olestr);
1418 }
1419
1420 if ((pWB->olestr=ole2_fopen(ole, "\005DocumentSummaryInformation")))
1421 {
1422 pWB->docSummary = calloc(1, 4096);
1423 if (ole2_read(pWB->docSummary, 4096, 1, pWB->olestr) == -1) {
1424 if (xls_debug) fprintf(stderr, "DocumentSummaryInformation not found\n");
1425 retval = LIBXLS_ERROR_READ;
1426 goto cleanup;
1427 }
1428 ole2_fclose(pWB->olestr);
1429 }
1430
1431 #if 0
1432 if(xls_debug) {
1433 printf("summary=%d docsummary=%d\n", pWB->summary ? 1 : 0, pWB->docSummary ? 1 : 0);
1434 xlsSummaryInfo *si = xls_summaryInfo(pWB);
1435 printf("title=%s\n", si->title);
1436 printf("subject=%s\n", si->subject);
1437 printf("author=%s\n", si->author);
1438 printf("keywords=%s\n", si->keywords);
1439 printf("comment=%s\n", si->comment);
1440 printf("lastAuthor=%s\n", si->lastAuthor);
1441 printf("appName=%s\n", si->appName);
1442 printf("category=%s\n", si->category);
1443 printf("manager=%s\n", si->manager);
1444 printf("company=%s\n", si->company);
1445 }
1446 #endif
1447
1448 // open Workbook
1449 if (!(pWB->olestr=ole2_fopen(ole,"Workbook")) && !(pWB->olestr=ole2_fopen(ole,"Book")))
1450 {
1451 if(xls_debug) fprintf(stderr, "Workbook not found\n");
1452 retval = LIBXLS_ERROR_PARSE;
1453 goto cleanup;
1454 }
1455
1456 pWB->sheets.count=0;
1457 pWB->xfs.count=0;
1458 pWB->fonts.count=0;
1459 if (charset) {
1460 pWB->charset = malloc(strlen(charset) * sizeof(char)+1);
1461 strcpy(pWB->charset, charset);
1462 } else {
1463 pWB->charset = strdup("UTF-8");
1464 }
1465
1466 retval = xls_parseWorkBook(pWB);
1467
1468 cleanup:
1469 if (retval != LIBXLS_OK) {
1470 if (!pWB->olestr)
1471 ole2_close(ole);
1472 xls_close_WB(pWB);
1473 pWB = NULL;
1474 }
1475 if (outError)
1476 *outError = retval;
1477
1478 return pWB;
1479 }
1480
xls_open(const char * file,const char * charset)1481 xlsWorkBook* xls_open(const char *file, const char* charset)
1482 {
1483 return xls_open_file(file, charset, NULL);
1484 }
1485
xls_open_file(const char * file,const char * charset,xls_error_t * outError)1486 xlsWorkBook* xls_open_file(const char *file, const char* charset, xls_error_t *outError) {
1487 OLE2* ole = NULL;
1488
1489 if (!(ole=ole2_open_file(file)))
1490 {
1491 if (xls_debug) fprintf(stderr, "File \"%s\" not found\n",file);
1492 if (outError) *outError = LIBXLS_ERROR_OPEN;
1493 return NULL;
1494 }
1495
1496 return xls_open_ole(ole, charset, outError);
1497 }
1498
xls_open_buffer(const unsigned char * buffer,size_t len,const char * charset,xls_error_t * outError)1499 xlsWorkBook *xls_open_buffer(const unsigned char *buffer, size_t len,
1500 const char *charset, xls_error_t *outError) {
1501 OLE2* ole = NULL;
1502
1503 if (!(ole=ole2_open_buffer(buffer, len)))
1504 {
1505 if (outError) *outError = LIBXLS_ERROR_OPEN;
1506 return NULL;
1507 }
1508
1509 return xls_open_ole(ole, charset, outError);
1510 }
1511
xls_row(xlsWorkSheet * pWS,WORD cellRow)1512 xlsRow *xls_row(xlsWorkSheet* pWS, WORD cellRow)
1513 {
1514 if(cellRow > pWS->rows.lastrow)
1515 return NULL;
1516
1517 if (pWS->rows.row == NULL)
1518 return NULL;
1519
1520 return &pWS->rows.row[cellRow];
1521 }
1522
xls_cell(xlsWorkSheet * pWS,WORD cellRow,WORD cellCol)1523 xlsCell *xls_cell(xlsWorkSheet* pWS, WORD cellRow, WORD cellCol)
1524 {
1525 struct st_row_data *row;
1526
1527 if ((row = xls_row(pWS, cellRow)) == NULL)
1528 return NULL;
1529
1530 if(cellCol >= row->cells.count)
1531 return NULL;
1532
1533 return &row->cells.cell[cellCol];
1534 }
1535
xls_close_WB(xlsWorkBook * pWB)1536 void xls_close_WB(xlsWorkBook* pWB)
1537 {
1538 OLE2* ole;
1539
1540 verbose ("xls_close");
1541
1542 if(!pWB) return;
1543
1544 // OLE first
1545 if (pWB->olestr) {
1546 ole=pWB->olestr->ole;
1547 ole2_fclose(pWB->olestr);
1548 ole2_close(ole);
1549 }
1550
1551 // WorkBook
1552 free(pWB->charset);
1553
1554 // Sheets
1555 {
1556 DWORD i;
1557 for(i=0; i<pWB->sheets.count; ++i) {
1558 free(pWB->sheets.sheet[i].name);
1559 }
1560 free(pWB->sheets.sheet);
1561 }
1562
1563 // SST
1564 {
1565 DWORD i;
1566 for(i=0; i<pWB->sst.count; ++i) {
1567 free(pWB->sst.string[i].str);
1568 }
1569 free(pWB->sst.string);
1570 }
1571
1572 // xfs
1573 {
1574 free(pWB->xfs.xf);
1575 }
1576
1577 // fonts
1578 {
1579 DWORD i;
1580 for(i=0; i<pWB->fonts.count; ++i) {
1581 free(pWB->fonts.font[i].name);
1582 }
1583 free(pWB->fonts.font);
1584 }
1585
1586 // formats
1587 {
1588 DWORD i;
1589 for(i=0; i<pWB->formats.count; ++i) {
1590 free(pWB->formats.format[i].value);
1591 }
1592 free(pWB->formats.format);
1593 }
1594
1595 // buffers
1596 if(pWB->summary) free(pWB->summary);
1597 if(pWB->docSummary) free(pWB->docSummary);
1598
1599 // TODO - free other dynamically allocated objects like string table??
1600 free(pWB);
1601 }
1602
xls_close_WS(xlsWorkSheet * pWS)1603 void xls_close_WS(xlsWorkSheet* pWS)
1604 {
1605 if(!pWS) return;
1606
1607 if (pWS->rows.row) {
1608 DWORD i, j;
1609 for(j=0; j<=pWS->rows.lastrow; ++j) {
1610 struct st_row_data *row = &pWS->rows.row[j];
1611 for(i=0; i<row->cells.count; ++i) {
1612 free(row->cells.cell[i].str);
1613 }
1614 free(row->cells.cell);
1615 }
1616 free(pWS->rows.row);
1617 }
1618
1619 // COLINFO
1620 {
1621 free(pWS->colinfo.col);
1622 }
1623 free(pWS);
1624 }
1625
xls_getVersion(void)1626 const char* xls_getVersion(void)
1627 {
1628 return PACKAGE_VERSION;
1629 }
1630
xls_getError(xls_error_t code)1631 const char* xls_getError(xls_error_t code) {
1632 if (code == LIBXLS_OK)
1633 return "No error";
1634 if (code == LIBXLS_ERROR_READ)
1635 return "Unable to read from file";
1636 if (code == LIBXLS_ERROR_OPEN)
1637 return "Unable to open file";
1638 if (code == LIBXLS_ERROR_SEEK)
1639 return "Unable to seek within file";
1640 if (code == LIBXLS_ERROR_MALLOC)
1641 return "Unable to allocate memory";
1642 if (code == LIBXLS_ERROR_PARSE)
1643 return "Unable to parse file";
1644
1645 return "Unknown error";
1646 }
1647
1648 //
1649 // http://poi.apache.org/hpsf/internals.html
1650 // or google "DocumentSummaryInformation and UserDefined Property Sets" and look for MSDN hits
1651 //
1652
xls_summaryInfo(xlsWorkBook * pWB)1653 xlsSummaryInfo *xls_summaryInfo(xlsWorkBook* pWB)
1654 {
1655 xlsSummaryInfo *pSI;
1656
1657 pSI = (xlsSummaryInfo *)calloc(1, sizeof(xlsSummaryInfo));
1658 xls_dumpSummary(pWB->summary, 1, pSI);
1659 xls_dumpSummary(pWB->docSummary, 0, pSI);
1660
1661 return pSI;
1662 }
1663
xls_close_summaryInfo(xlsSummaryInfo * pSI)1664 void xls_close_summaryInfo(xlsSummaryInfo *pSI)
1665 {
1666 if(!pSI) return;
1667
1668 if(pSI->title) free(pSI->title);
1669 if(pSI->subject) free(pSI->subject);
1670 if(pSI->author) free(pSI->author);
1671 if(pSI->keywords) free(pSI->keywords);
1672 if(pSI->comment) free(pSI->comment);
1673 if(pSI->lastAuthor) free(pSI->lastAuthor);
1674 if(pSI->appName) free(pSI->appName);
1675 if(pSI->category) free(pSI->category);
1676 if(pSI->manager) free(pSI->manager);
1677 if(pSI->company) free(pSI->company);
1678
1679 free(pSI);
1680 }
1681
xls_dumpSummary(char * buf,int isSummary,xlsSummaryInfo * pSI)1682 static void xls_dumpSummary(char *buf,int isSummary,xlsSummaryInfo *pSI) {
1683 header *head;
1684 sectionList *secList;
1685 propertyList *plist;
1686 sectionHeader *secHead;
1687 property *prop;
1688 uint32_t i, j;
1689
1690 if(!buf) return; // perhaps the document was missing??
1691
1692 head = (header *)buf;
1693 //printf("header: \n");
1694 //printf(" sig=%x\n", head->sig);
1695 //printf(" os=%x\n", head->os >> 16);
1696 //printf(" class=%8.8x%8.8x%8.8x%8.8x\n", head->format[0], head->format[1], head->format[2], head->format[3]);
1697 //printf(" count=%x\n", head->count);
1698
1699 for(i=0; i<head->count; ++i) {
1700 secList = &head->secList[i];
1701 //printf("Section %d:\n", i);
1702 //printf(" class=%8.8x%8.8x%8.8x%8.8x\n", secList->format[0], secList->format[1], secList->format[2], secList->format[3]);
1703 //printf(" offset=%d (now at %ld\n", secList->offset, (char *)secList - (char *)buf + sizeof(sectionList));
1704
1705
1706 secHead = (sectionHeader *)((char *)head + secList->offset);
1707 //printf(" len=%d\n", secHead->length);
1708 //printf(" properties=%d\n", secHead->numProperties);
1709 for(j=0; j<secHead->numProperties; ++j) {
1710 BYTE **s;
1711
1712 plist = &secHead->properties[j];
1713 //printf(" ---------\n");
1714 //printf(" propID=%d offset=%d\n", plist->propertyID, plist->sectionOffset);
1715 prop = (property *)((char *)secHead + plist->sectionOffset);
1716 //printf(" propType=%d\n", prop->propertyID);
1717
1718 switch(prop->propertyID) {
1719 case 2:
1720 //printf(" xlsShortVal=%x\n", *(uint16_t *)prop->data);
1721 break;
1722 case 3:
1723 //printf(" wordVal=%x\n", *(uint32_t *)prop->data);
1724 break;
1725 case 30:
1726 //printf(" longVal=%llx\n", *(uint64_t *)prop->data);
1727 //printf(" s[%u]=%s\n", *(uint32_t *)prop->data, (char *)prop->data + 4);
1728 if(isSummary) {
1729 switch(plist->propertyID) {
1730 case 2: s = &pSI->title; break;
1731 case 3: s = &pSI->subject; break;
1732 case 4: s = &pSI->author; break;
1733 case 5: s = &pSI->keywords; break;
1734 case 6: s = &pSI->comment; break;
1735 case 8: s = &pSI->lastAuthor; break;
1736 case 18: s = &pSI->appName; break;
1737 default: s = NULL; break;
1738 }
1739 } else {
1740 switch(plist->propertyID) {
1741 case 2: s = &pSI->category; break;
1742 case 14: s = &pSI->manager; break;
1743 case 15: s = &pSI->company; break;
1744 default: s = NULL; break;
1745 }
1746 }
1747 if(s) *s = (BYTE *)strdup((char *)prop->data + 4);
1748 break;
1749 case 64:
1750 //printf(" longVal=%llx\n", *(uint64_t *)prop->data);
1751 break;
1752 case 65:
1753 #if 0
1754 {
1755 uint32_t k;
1756 for(k=0; k<*(uint32_t *)prop->data; ++k) {
1757 unsigned char *t = (unsigned char *)prop->data + 4 + k;
1758 printf(" %2.2x(%c)", *t, *t);
1759 }
1760 printf("\n");
1761 }
1762 #endif
1763 break;
1764 default:
1765 //printf(" UNKNOWN!\n");
1766 break;
1767 }
1768 }
1769 }
1770 }
1771
xls_set_formula_hander(xls_formula_handler handler)1772 void xls_set_formula_hander(xls_formula_handler handler)
1773 {
1774 formula_handler = handler;
1775 }
1776