1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  *
3  * Copyright 2004 Komarov Valery
4  * Copyright 2006 Christophe Leitienne
5  * Copyright 2008-2017 David Hoerl
6  * Copyright 2013 Bob Colbert
7  * Copyright 2013-2018 Evan Miller
8  *
9  * This file is part of libxls -- A multiplatform, C/C++ library for parsing
10  * Excel(TM) files.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions are met:
14  *
15  *    1. Redistributions of source code must retain the above copyright notice,
16  *    this list of conditions and the following disclaimer.
17  *
18  *    2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS
23  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
26  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
29  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
30  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
31  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
32  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  */
35 
36 #include "config.h"
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <stddef.h>
41 #include <errno.h>
42 
43 #include <memory.h>
44 #include <math.h>
45 #include <sys/types.h>
46 #include <string.h>
47 #include <wchar.h>
48 
49 #include "libxls/endian.h"
50 #include "libxls/xls.h"
51 
52 #ifndef min
53 #define min(a,b) ((a) < (b) ? (a) : (b))
54 #endif
55 
56 //#define DEBUG_DRAWINGS
57 int xls_debug = 0;
58 
59 static double NumFromRk(DWORD drk);
60 static xls_formula_handler formula_handler;
61 
62 static xls_error_t xls_addSST(xlsWorkBook* pWB, SST* sst, DWORD size);
63 static xls_error_t xls_appendSST(xlsWorkBook* pWB, BYTE* buf, DWORD size);
64 static xls_error_t xls_addFormat(xlsWorkBook* pWB, FORMAT* format, DWORD size);
65 static char* xls_addSheet(xlsWorkBook* pWB, BOUNDSHEET* bs, DWORD size);
66 static xls_error_t xls_addRow(xlsWorkSheet* pWS,ROW* row);
67 static xls_error_t xls_makeTable(xlsWorkSheet* pWS);
68 static struct st_cell_data *xls_addCell(xlsWorkSheet* pWS, BOF* bof, BYTE* buf);
69 static char *xls_addFont(xlsWorkBook* pWB, FONT* font, DWORD size);
70 static xls_error_t xls_addXF8(xlsWorkBook* pWB, XF8* xf);
71 static xls_error_t xls_addXF5(xlsWorkBook* pWB, XF5* xf);
72 static xls_error_t xls_addColinfo(xlsWorkSheet* pWS, COLINFO* colinfo);
73 static xls_error_t xls_mergedCells(xlsWorkSheet* pWS, BOF* bof, BYTE* buf);
74 static xls_error_t xls_preparseWorkSheet(xlsWorkSheet* pWS);
75 static xls_error_t xls_formatColumn(xlsWorkSheet* pWS);
76 static void xls_dumpSummary(char *buf, int isSummary, xlsSummaryInfo *pSI);
77 
78 #if defined(_AIX) || defined(__sun)
79 #pragma pack(1)
80 #else
81 #pragma pack(push, 1)
82 #endif
83 
84 typedef struct {
85 	uint32_t		format[4];
86 	uint32_t		offset;
87 } sectionList;
88 
89 typedef struct {
90 	uint16_t		sig;
91 	uint16_t		_empty;
92 	uint32_t		os;
93 	uint32_t		format[4];
94 	uint32_t		count;
95 	sectionList		secList[1];
96 } header;
97 
98 typedef struct {
99 	uint32_t		propertyID;
100 	uint32_t		sectionOffset;
101 } propertyList;
102 
103 typedef struct {
104 	uint32_t		length;
105 	uint32_t		numProperties;
106 	propertyList	properties[1];
107 } sectionHeader;
108 
109 typedef struct {
110 	uint32_t		propertyID;
111 	uint32_t		data[1];
112 } property;
113 
114 #pragma pack(pop)
115 
xls(int debug)116 int xls(int debug)
117 {
118 	xls_debug = debug;
119     return 1;
120 }
121 
xls_addSST(xlsWorkBook * pWB,SST * sst,DWORD size)122 static xls_error_t xls_addSST(xlsWorkBook* pWB,SST* sst,DWORD size)
123 {
124     verbose("xls_addSST");
125 
126     pWB->sst.continued=0;
127     pWB->sst.lastln=0;
128     pWB->sst.lastid=0;
129     pWB->sst.lastrt=0;
130     pWB->sst.lastsz=0;
131 
132     if (sst->num > (1<<24))
133         return LIBXLS_ERROR_MALLOC;
134 
135     if (pWB->sst.string)
136         return LIBXLS_ERROR_PARSE;
137 
138     if ((pWB->sst.string = calloc(pWB->sst.count = sst->num,
139                     sizeof(struct str_sst_string))) == NULL)
140         return LIBXLS_ERROR_MALLOC;
141 
142     return xls_appendSST(pWB, sst->strings, size - offsetof(SST, strings));
143 }
144 
xls_appendSST(xlsWorkBook * pWB,BYTE * buf,DWORD size)145 static xls_error_t xls_appendSST(xlsWorkBook* pWB, BYTE* buf, DWORD size)
146 {
147     DWORD ln;	// String character count
148     DWORD ofs;	// Current offset in SST buffer
149     DWORD rt;	// Count of rich text formatting runs
150     DWORD sz;	// Size of asian phonetic settings block
151     BYTE flag;	// String flags
152     char* ret = NULL;
153 
154     if (xls_debug) {
155 	    printf("xls_appendSST %u\n", size);
156     }
157 
158 	sz = rt = ln = 0;	// kch
159     ofs=0;
160 
161 	while(ofs<size)
162     {
163         int ln_toread;
164 
165         // Restore state when we're in a continue record
166         // or read string length
167         if (pWB->sst.continued) {
168             ln=pWB->sst.lastln;
169             rt=pWB->sst.lastrt;
170             sz=pWB->sst.lastsz;
171         } else {
172             if (ofs + 2 > size) {
173                 return LIBXLS_ERROR_PARSE;
174             }
175             ln = buf[ofs+0] + (buf[ofs+1] << 8);
176             rt = 0;
177             sz = 0;
178 
179             ofs+=2;
180         }
181 
182 		if (xls_debug) {
183         	printf("ln=%u\n", ln);
184 		}
185 
186         // Read flags
187         if ( !pWB->sst.continued || (pWB->sst.continued && ln != 0) ) {
188             if (ofs + sizeof(BYTE) > size) {
189                 return LIBXLS_ERROR_PARSE;
190             }
191             flag=*(BYTE *)(buf+ofs);
192             ofs++;
193 
194             // Count of rich text formatting runs
195             if (flag & 0x8) {
196                 if (ofs + sizeof(WORD) > size) {
197                     return LIBXLS_ERROR_PARSE;
198                 }
199                 rt = buf[ofs+0] + (buf[ofs+1] << 8);
200                 ofs+=2;
201             }
202 
203             // Size of asian phonetic settings block
204             if (flag & 0x4) {
205                 if (ofs + sizeof(DWORD) > size) {
206                     return LIBXLS_ERROR_PARSE;
207                 }
208                 sz = buf[ofs+0] + (buf[ofs+1] << 8) + (buf[ofs+2] << 16) + ((DWORD)buf[ofs+3] << 24);
209                 ofs+=4;
210 
211 				if (xls_debug) {
212 					printf("sz=%u\n", sz);
213 				}
214             }
215         } else {
216             flag = 0;
217         }
218 
219 		// Read characters (compressed or not)
220         ln_toread = 0;
221         if (ln > 0) {
222             if (flag & 0x1) {
223                 size_t new_len = 0;
224                 ln_toread = min((size-ofs)/2, ln);
225                 ret=unicode_decode((char *)buf+ofs,ln_toread*2,&new_len,pWB->charset);
226 
227                 if (ret == NULL)
228                 {
229                     ret = strdup("*failed to decode utf16*");
230                     new_len = strlen(ret);
231                 }
232 
233                 ret = realloc(ret,new_len+1);
234                 ret[new_len]=0;
235 
236                 ln -= ln_toread;
237                 ofs+=ln_toread*2;
238 
239                 if (xls_debug) {
240 	                printf("String16SST: %s(%lu)\n", ret, (unsigned long)new_len);
241                 }
242             } else {
243                 ln_toread = min((size-ofs), ln);
244 
245 				ret = utf8_decode((char *)buf+ofs, ln_toread, pWB->charset);
246 
247                 ln  -= ln_toread;
248                 ofs += ln_toread;
249 
250                 if (xls_debug) {
251                 	printf("String8SST: %s(%u) \n",ret,ln);
252                 }
253             }
254         } else {
255             ret = strdup("");
256         }
257 
258         if (ln_toread > 0 || !pWB->sst.continued) {
259             // Concat string if it's a continue, or add string in table
260             if (!pWB->sst.continued) {
261                 if (pWB->sst.lastid >= pWB->sst.count) {
262                     free(ret);
263                     return LIBXLS_ERROR_PARSE;
264                 }
265                 pWB->sst.lastid++;
266                 pWB->sst.string[pWB->sst.lastid-1].str=ret;
267             } else {
268                 char *tmp = pWB->sst.string[pWB->sst.lastid-1].str;
269                 if (tmp == NULL) {
270                     free(ret);
271                     return LIBXLS_ERROR_PARSE;
272                 }
273                 tmp = realloc(tmp, strlen(tmp)+strlen(ret)+1);
274                 if (tmp == NULL)  {
275                     free(ret);
276                     return LIBXLS_ERROR_MALLOC;
277                 }
278                 pWB->sst.string[pWB->sst.lastid-1].str=tmp;
279                 memcpy(tmp+strlen(tmp), ret, strlen(ret)+1);
280 				free(ret);
281             }
282 
283 			if (xls_debug) {
284 	            printf("String %4u: %s<end>\n", pWB->sst.lastid-1, pWB->sst.string[pWB->sst.lastid-1].str);
285 			}
286         } else {
287             free(ret);
288 	}
289 
290 		// Jump list of rich text formatting runs
291         if (ofs < size && rt > 0) {
292             int rt_toread = min((size-ofs)/4, rt);
293             rt -= rt_toread;
294             ofs += rt_toread*4;
295         }
296 
297 		// Jump asian phonetic settings block
298         if (ofs < size && sz > 0) {
299             int sz_toread = min((size-ofs), sz);
300             sz -= sz_toread;
301             ofs += sz_toread;
302         }
303 
304         pWB->sst.continued=0;
305     }
306 
307     // Save current character count and count of rich text formatting runs and size of asian phonetic settings block
308 	if (ln > 0 || rt > 0 || sz > 0) {
309 		pWB->sst.continued = 1;
310 		pWB->sst.lastln = ln;
311 		pWB->sst.lastrt = rt;
312 		pWB->sst.lastsz = sz;
313 
314 		if (xls_debug) {
315 			printf("continued: ln=%u, rt=%u, sz=%u\n", ln, rt, sz);
316 		}
317 	}
318 
319     return LIBXLS_OK;
320 }
321 
NumFromRk(DWORD drk)322 static double NumFromRk(DWORD drk)
323 {
324 	double ret;
325 
326 	// What kind of value is this ?
327     if (drk & 0x02) {
328     	// Integer value
329 		int tmp = (int)drk >> 2;	// cast to keep it negative in < 0
330         ret = (double)tmp;
331     } else {
332     	// Floating point value;
333 		unsigned64_t tmp = drk & 0xfffffffc;
334 		tmp <<= 32;
335 		memcpy(&ret, &tmp, sizeof(unsigned64_t));
336     }
337     // Is value multiplied by 100 ?
338     if (drk & 0x01) {
339         ret /= 100.0;
340     }
341     return ret;
342 }
343 
xls_addSheet(xlsWorkBook * pWB,BOUNDSHEET * bs,DWORD size)344 static char * xls_addSheet(xlsWorkBook* pWB, BOUNDSHEET *bs, DWORD size)
345 {
346 	char * name;
347 	DWORD filepos;
348 	BYTE visible, type;
349 
350 	filepos = bs->filepos;
351 	visible = bs->visible;
352 	type = bs->type;
353 
354 	// printf("charset=%s uni=%d\n", pWB->charset, unicode);
355 	// printf("bs name %.*s\n", bs->name[0], bs->name+1);
356 	name = get_string(bs->name, size - offsetof(BOUNDSHEET, name), 0, pWB->is5ver, pWB->charset);
357 	// printf("name=%s\n", name);
358 
359 	if(xls_debug) {
360 		printf ("xls_addSheet[0x%x]\n", type);
361 		switch (type & 0x0f)
362 		{
363 		case 0x00:
364 			/* worksheet or dialog sheet */
365 			printf ("85: Worksheet or dialog sheet\n");
366 			break;
367 		case 0x01:
368 			/* Microsoft Excel 4.0 macro sheet */
369 			printf ("85: Microsoft Excel 4.0 macro sheet\n");
370 			break;
371 		case 0x02:
372 			/* Chart */
373 			printf ("85: Chart sheet\n");
374 			break;
375 		case 0x06:
376 			/* Visual Basic module */
377 			printf ("85: Visual Basic sheet\n");
378 			break;
379 		default:
380 			printf ("???\n");
381 			break;
382 		}
383 		printf("visible: %x\n", visible);
384 		printf("    Pos: %Xh\n",filepos);
385 		printf("   type: %.4Xh\n",type);
386 		printf("   name: %s\n", name);
387 	}
388 
389     pWB->sheets.sheet = realloc(pWB->sheets.sheet,(pWB->sheets.count+1)*sizeof (struct st_sheet_data));
390     if (pWB->sheets.sheet == NULL)
391         return NULL;
392 
393     pWB->sheets.sheet[pWB->sheets.count].name=name;
394     pWB->sheets.sheet[pWB->sheets.count].filepos=filepos;
395     pWB->sheets.sheet[pWB->sheets.count].visibility=visible;
396     pWB->sheets.sheet[pWB->sheets.count].type=type;
397     pWB->sheets.count++;
398 
399 	return name;
400 }
401 
402 
xls_addRow(xlsWorkSheet * pWS,ROW * row)403 static xls_error_t xls_addRow(xlsWorkSheet* pWS,ROW* row)
404 {
405     struct st_row_data* tmp;
406 
407     //verbose ("xls_addRow");
408 
409     if (row->index > pWS->rows.lastrow)
410         return LIBXLS_ERROR_PARSE;
411 
412     tmp=&pWS->rows.row[row->index];
413     tmp->height=row->height;
414     tmp->fcell=row->fcell;
415     tmp->lcell=row->lcell;
416     tmp->flags=row->flags;
417     tmp->xf=row->xf&0xfff;
418     tmp->xfflags=(row->xf >> 8)&0xf0;
419     if(xls_debug) xls_showROW(tmp);
420 
421     return LIBXLS_OK;
422 }
423 
xls_makeTable(xlsWorkSheet * pWS)424 static xls_error_t xls_makeTable(xlsWorkSheet* pWS)
425 {
426     DWORD i,t;
427     struct st_row_data* tmp;
428     verbose ("xls_makeTable");
429 
430     if ((pWS->rows.row = calloc((pWS->rows.lastrow+1),sizeof(struct st_row_data))) == NULL)
431         return LIBXLS_ERROR_MALLOC;
432 
433 	// printf("ALLOC: rows=%d cols=%d\n", pWS->rows.lastrow, pWS->rows.lastcol);
434     for (t=0;t<=pWS->rows.lastrow;t++)
435     {
436         tmp=&pWS->rows.row[t];
437         tmp->index=t;
438         tmp->fcell=0;
439         tmp->lcell=pWS->rows.lastcol;
440 
441 		tmp->cells.count = pWS->rows.lastcol+1;
442         if ((tmp->cells.cell = calloc(tmp->cells.count, sizeof(struct st_cell_data))) == NULL)
443             return LIBXLS_ERROR_MALLOC;
444 
445         for (i=0;i<=pWS->rows.lastcol;i++)
446         {
447             tmp->cells.cell[i].col = i;
448             tmp->cells.cell[i].row = t;
449             tmp->cells.cell[i].width = pWS->defcolwidth;
450             tmp->cells.cell[i].id = XLS_RECORD_BLANK;
451         }
452     }
453     return LIBXLS_OK;
454 }
455 
xls_isCellTooSmall(xlsWorkBook * pWB,BOF * bof,BYTE * buf)456 int xls_isCellTooSmall(xlsWorkBook* pWB, BOF* bof, BYTE* buf) {
457     if (bof->size < sizeof(COL))
458         return 1;
459 
460     if (bof->id == XLS_RECORD_FORMULA || bof->id == XLS_RECORD_FORMULA_ALT)
461         return (bof->size < sizeof(FORMULA));
462 
463     if (bof->id == XLS_RECORD_MULRK)
464         return (bof->size < offsetof(MULRK, rk));
465 
466     if (bof->id == XLS_RECORD_MULBLANK)
467         return (bof->size < offsetof(MULBLANK, xf));
468 
469     if (bof->id == XLS_RECORD_LABELSST)
470         return (bof->size < offsetof(LABEL, value) + (pWB->is5ver ? 2 : 4));
471 
472     if (bof->id == XLS_RECORD_LABEL) {
473         if (bof->size < offsetof(LABEL, value) + 2)
474             return 1;
475 
476         size_t label_len = ((LABEL*)buf)->value[0] + (((LABEL*)buf)->value[1] << 8);
477         if (pWB->is5ver) {
478             return (bof->size < offsetof(LABEL, value) + 2 + label_len);
479         }
480 
481         if (bof->size < offsetof(LABEL, value) + 3)
482             return 1;
483 
484         if ((((LABEL*)buf)->value[2] & 0x01) == 0) {
485             return (bof->size < offsetof(LABEL, value) + 3 + label_len);
486         }
487         return (bof->size < offsetof(LABEL, value) + 3 + 2 * label_len);
488     }
489 
490     if (bof->id == XLS_RECORD_RK)
491         return (bof->size < sizeof(RK));
492 
493     if (bof->id == XLS_RECORD_NUMBER)
494         return (bof->size < sizeof(BR_NUMBER));
495 
496     if (bof->id == XLS_RECORD_BOOLERR)
497         return (bof->size < sizeof(BOOLERR));
498 
499     return 0;
500 }
501 
xls_cell_set_str(struct st_cell_data * cell,char * str)502 void xls_cell_set_str(struct st_cell_data *cell, char *str) {
503     if (cell->str) {
504         free(cell->str);
505     }
506     cell->str = str;
507 }
508 
xls_addCell(xlsWorkSheet * pWS,BOF * bof,BYTE * buf)509 static struct st_cell_data *xls_addCell(xlsWorkSheet* pWS,BOF* bof,BYTE* buf)
510 {
511     struct st_cell_data*	cell;
512     struct st_row_data*		row;
513     WORD                    col;
514     int						i;
515 
516 	verbose ("xls_addCell");
517 
518     if (xls_isCellTooSmall(pWS->workbook, bof, buf))
519         return NULL;
520 
521 	// printf("ROW: %u COL: %u\n", xlsShortVal(((COL*)buf)->row), xlsShortVal(((COL*)buf)->col));
522     row=&pWS->rows.row[xlsShortVal(((COL*)buf)->row)];
523 
524     col = xlsShortVal(((COL*)buf)->col);
525     if (col >= row->cells.count) {
526         if (xls_debug) fprintf(stderr, "Error: Column index out of bounds\n");
527         return NULL;
528     }
529     cell = &row->cells.cell[col];
530 
531     cell->id=bof->id;
532     cell->xf=xlsShortVal(((COL*)buf)->xf);
533 
534     switch (bof->id)
535     {
536     case XLS_RECORD_FORMULA:
537     case XLS_RECORD_FORMULA_ALT:
538 		xlsConvertFormula((FORMULA *)buf);
539         cell->id=XLS_RECORD_FORMULA;
540         if (((FORMULA*)buf)->res!=0xffff) {
541 			// if a double, then set double and clear l
542 			cell->l=0;
543 			memcpy(&cell->d, &((FORMULA*)buf)->resid, sizeof(double));	// Required for ARM
544             cell->id = XLS_RECORD_NUMBER; // hack
545             xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL));
546             cell->id = bof->id;
547 		} else {
548 			double d = ((FORMULA*)buf)->resdata[1];
549 			cell->l = 0xFFFF;
550 			switch(((FORMULA*)buf)->resid) {
551 			case 0:		// String
552 				break;	// cell is half complete, get the STRING next record
553 			case 1:		// Boolean
554 				memcpy(&cell->d, &d, sizeof(double)); // Required for ARM
555                 xls_cell_set_str(cell, strdup("bool"));
556 				break;
557 			case 2:		// error
558 				memcpy(&cell->d, &d, sizeof(double)); // Required for ARM
559                 xls_cell_set_str(cell, strdup("error"));
560 				break;
561 			case 3:		// empty string
562                 xls_cell_set_str(cell, strdup(""));
563 				break;
564 			}
565 		}
566 		if(formula_handler) formula_handler(bof->id, bof->size, buf);
567         break;
568     case XLS_RECORD_MULRK:
569         for (i = 0; i < (bof->size - 6)/6; i++)	// 6 == 2 row + 2 col + 2 trailing index
570         {
571             WORD index = col + i;
572             if(index >= row->cells.count) {
573                 if (xls_debug) fprintf(stderr, "Error: MULTI-RK index out of bounds\n");
574                 return NULL;
575             }
576             cell=&row->cells.cell[index];
577             cell->id=XLS_RECORD_RK;
578             cell->xf=xlsShortVal(((MULRK*)buf)->rk[i].xf);
579             cell->d=NumFromRk(xlsIntVal(((MULRK*)buf)->rk[i].value));
580             xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL));
581         }
582         break;
583     case XLS_RECORD_MULBLANK:
584         for (i = 0; i < (bof->size - 6)/2; i++)	// 6 == 2 row + 2 col + 2 trailing index
585         {
586             WORD index = col + i;
587             if(index >= row->cells.count) {
588                 if (xls_debug) fprintf(stderr, "Error: MULTI-BLANK index out of bounds\n");
589                 return NULL;
590             }
591             cell=&row->cells.cell[index];
592             cell->id=XLS_RECORD_BLANK;
593             cell->xf=xlsShortVal(((MULBLANK*)buf)->xf[i]);
594             xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL));
595         }
596         break;
597     case XLS_RECORD_LABELSST:
598     case XLS_RECORD_LABEL:
599         xls_cell_set_str(cell, xls_getfcell(pWS->workbook, cell, ((LABEL*)buf)->value));
600         if (cell->str) {
601             sscanf((char *)cell->str, "%d", &cell->l);
602             sscanf((char *)cell->str, "%lf", &cell->d);
603         }
604 		break;
605     case XLS_RECORD_RK:
606         cell->d=NumFromRk(xlsIntVal(((RK*)buf)->value));
607         xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL));
608         break;
609     case XLS_RECORD_BLANK:
610         break;
611     case XLS_RECORD_NUMBER:
612         xlsConvertDouble((BYTE *)&((BR_NUMBER*)buf)->value);
613 		memcpy(&cell->d, &((BR_NUMBER*)buf)->value, sizeof(double)); // Required for ARM
614         xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL));
615         break;
616     case XLS_RECORD_BOOLERR:
617         cell->d = ((BOOLERR *)buf)->value;
618         if (((BOOLERR *)buf)->iserror) {
619             xls_cell_set_str(cell, strdup("error"));
620         } else {
621             xls_cell_set_str(cell, strdup("bool"));
622         }
623         break;
624     default:
625         xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL));
626         break;
627     }
628     if (xls_debug) xls_showCell(cell);
629 
630 	return cell;
631 }
632 
xls_addFont(xlsWorkBook * pWB,FONT * font,DWORD size)633 static char *xls_addFont(xlsWorkBook* pWB, FONT* font, DWORD size)
634 {
635     struct st_font_data* tmp;
636 
637     verbose("xls_addFont");
638 
639     pWB->fonts.font = realloc(pWB->fonts.font,(pWB->fonts.count+1)*sizeof(struct st_font_data));
640     if (pWB->fonts.font == NULL)
641         return NULL;
642 
643     tmp=&pWB->fonts.font[pWB->fonts.count];
644 
645     tmp->name = get_string(font->name, size - offsetof(FONT, name), 0, pWB->is5ver, pWB->charset);
646 
647     tmp->height=font->height;
648     tmp->flag=font->flag;
649     tmp->color=font->color;
650     tmp->bold=font->bold;
651     tmp->escapement=font->escapement;
652     tmp->underline=font->underline;
653     tmp->family=font->family;
654     tmp->charset=font->charset;
655 
656     //	xls_showFont(tmp);
657     pWB->fonts.count++;
658 
659 	return tmp->name;
660 }
661 
xls_addFormat(xlsWorkBook * pWB,FORMAT * format,DWORD size)662 static xls_error_t xls_addFormat(xlsWorkBook* pWB, FORMAT* format, DWORD size)
663 {
664     struct st_format_data* tmp;
665 
666     verbose("xls_addFormat");
667     pWB->formats.format = realloc(pWB->formats.format, (pWB->formats.count+1)*sizeof(struct st_format_data));
668     if (pWB->formats.format == NULL)
669         return LIBXLS_ERROR_MALLOC;
670 
671     tmp = &pWB->formats.format[pWB->formats.count];
672     tmp->index = format->index;
673     tmp->value = get_string(format->value, size - offsetof(FORMAT, value), (BYTE)!pWB->is5ver, (BYTE)pWB->is5ver, pWB->charset);
674     if(xls_debug) xls_showFormat(tmp);
675     pWB->formats.count++;
676 
677     return LIBXLS_OK;
678 }
679 
xls_addXF8(xlsWorkBook * pWB,XF8 * xf)680 static xls_error_t xls_addXF8(xlsWorkBook* pWB,XF8* xf)
681 {
682     struct st_xf_data* tmp;
683 
684     verbose("xls_addXF");
685     pWB->xfs.xf= realloc(pWB->xfs.xf, (pWB->xfs.count+1)*sizeof(struct st_xf_data));
686     if (pWB->xfs.xf == NULL)
687         return LIBXLS_ERROR_MALLOC;
688 
689     tmp=&pWB->xfs.xf[pWB->xfs.count];
690 
691     tmp->font=xf->font;
692     tmp->format=xf->format;
693     tmp->type=xf->type;
694     tmp->align=xf->align;
695     tmp->rotation=xf->rotation;
696     tmp->ident=xf->ident;
697     tmp->usedattr=xf->usedattr;
698     tmp->linestyle=xf->linestyle;
699     tmp->linecolor=xf->linecolor;
700     tmp->groundcolor=xf->groundcolor;
701 
702     //	xls_showXF(tmp);
703     pWB->xfs.count++;
704 
705     return LIBXLS_OK;
706 }
707 
xls_addXF5(xlsWorkBook * pWB,XF5 * xf)708 static xls_error_t xls_addXF5(xlsWorkBook* pWB,XF5* xf)
709 {
710     struct st_xf_data* tmp;
711 
712     verbose("xls_addXF");
713     pWB->xfs.xf = realloc(pWB->xfs.xf, (pWB->xfs.count+1)*sizeof(struct st_xf_data));
714     if (pWB->xfs.xf == NULL)
715         return LIBXLS_ERROR_MALLOC;
716 
717     tmp=&pWB->xfs.xf[pWB->xfs.count];
718 
719     tmp->font=xf->font;
720     tmp->format=xf->format;
721     tmp->type=xf->type;
722     tmp->align=(BYTE)xf->align;
723 /*
724     tmp->rotation=xf->rotation;
725     tmp->ident=xf->ident;
726     tmp->usedattr=xf->usedattr;
727     tmp->linestyle=xf->linestyle;
728     tmp->linecolor=xf->linecolor;
729     tmp->groundcolor=xf->groundcolor;
730 */
731 
732     //	xls_showXF(tmp);
733     pWB->xfs.count++;
734     return LIBXLS_OK;
735 }
736 
xls_addColinfo(xlsWorkSheet * pWS,COLINFO * colinfo)737 static xls_error_t xls_addColinfo(xlsWorkSheet* pWS,COLINFO* colinfo)
738 {
739     struct st_colinfo_data* tmp;
740 
741     verbose("xls_addColinfo");
742     pWS->colinfo.col =  realloc(pWS->colinfo.col,(pWS->colinfo.count+1)*sizeof(struct st_colinfo_data));
743     if (pWS->colinfo.col == NULL)
744         return LIBXLS_ERROR_MALLOC;
745 
746     tmp=&pWS->colinfo.col[pWS->colinfo.count];
747     tmp->first=colinfo->first;
748     tmp->last=colinfo->last;
749     tmp->width=colinfo->width;
750     tmp->xf=colinfo->xf;
751     tmp->flags=colinfo->flags;
752 
753     if(xls_debug) xls_showColinfo(tmp);
754     pWS->colinfo.count++;
755 
756     return LIBXLS_OK;
757 }
758 
xls_mergedCells(xlsWorkSheet * pWS,BOF * bof,BYTE * buf)759 static xls_error_t xls_mergedCells(xlsWorkSheet* pWS,BOF* bof,BYTE* buf)
760 {
761     if (bof->size < sizeof(WORD))
762         return LIBXLS_ERROR_PARSE;
763 
764     int count = buf[0] + (buf[1] << 8);
765     DWORD limit = sizeof(WORD)+count*sizeof(struct MERGEDCELLS);
766     if(limit > (DWORD)bof->size) {
767         verbose("Merged Cells Count out of range");
768         return LIBXLS_ERROR_PARSE;
769     }
770     int i,c,r;
771     struct MERGEDCELLS *span;
772     verbose("Merged Cells");
773     for (i=0;i<count;i++)
774     {
775         span=(struct MERGEDCELLS*)(buf+(2+i*sizeof(struct MERGEDCELLS)));
776         xlsConvertMergedcells(span);
777         //		printf("Merged Cells: [%i,%i] [%i,%i] \n",span->colf,span->rowf,span->coll,span->rowl);
778         // Sanity check:
779         if(!(   span->rowf <= span->rowl &&
780                 span->rowl <= pWS->rows.lastrow &&
781                 span->colf <= span->coll &&
782                 span->coll <= pWS->rows.lastcol
783         )) {
784             return LIBXLS_ERROR_PARSE;
785         }
786 
787         for (r=span->rowf;r<=span->rowl;r++)
788             for (c=span->colf;c<=span->coll;c++)
789                 pWS->rows.row[r].cells.cell[c].isHidden=1;
790         pWS->rows.row[span->rowf].cells.cell[span->colf].colspan=(span->coll-span->colf+1);
791         pWS->rows.row[span->rowf].cells.cell[span->colf].rowspan=(span->rowl-span->rowf+1);
792         pWS->rows.row[span->rowf].cells.cell[span->colf].isHidden=0;
793     }
794     return LIBXLS_OK;
795 }
796 
xls_isRecordTooSmall(xlsWorkBook * pWB,BOF * bof1)797 int xls_isRecordTooSmall(xlsWorkBook *pWB, BOF *bof1) {
798     switch (bof1->id) {
799         case XLS_RECORD_BOF:	// BIFF5-8
800             return (bof1->size < 2 * sizeof(WORD));
801         case XLS_RECORD_CODEPAGE:
802             return (bof1->size < sizeof(WORD));
803 		case XLS_RECORD_WINDOW1:
804             return (bof1->size < sizeof(WIND1));
805         case XLS_RECORD_SST:
806             return (bof1->size < offsetof(SST, strings));
807         case XLS_RECORD_BOUNDSHEET:
808             return (bof1->size < offsetof(BOUNDSHEET, name));
809         case XLS_RECORD_XF:
810 			if(pWB->is5ver) {
811                 return (bof1->size < sizeof(XF5));
812             }
813             return (bof1->size < sizeof(XF8));
814         case XLS_RECORD_FONT:
815         case XLS_RECORD_FONT_ALT:
816             return (bof1->size < offsetof(FONT, name));
817         case XLS_RECORD_FORMAT:
818             return (bof1->size < offsetof(FORMAT, value));
819 		case XLS_RECORD_1904:
820             return (bof1->size < sizeof(BYTE));
821         default:
822             break;
823     }
824     return 0;
825 }
826 
xls_parseWorkBook(xlsWorkBook * pWB)827 xls_error_t xls_parseWorkBook(xlsWorkBook* pWB)
828 {
829     BOF bof1 = { .id = 0, .size = 0 };
830     BOF bof2 = { .id = 0, .size = 0 };
831     BYTE* buf = NULL;
832 	BYTE once = 0;
833     xls_error_t retval = LIBXLS_OK;
834 
835     verbose ("xls_parseWorkBook");
836     do {
837 		if(xls_debug > 10) {
838 			printf("READ WORKBOOK filePos=%ld\n",  (long)pWB->filepos);
839 			printf("  OLE: start=%d pos=%u size=%u fatPos=%u\n",
840                     pWB->olestr->start, (unsigned int)pWB->olestr->pos,
841                     (unsigned int)pWB->olestr->size, (unsigned int)pWB->olestr->fatpos);
842 		}
843 
844         if (ole2_read(&bof1, 1, 4, pWB->olestr) != 4) {
845             retval = LIBXLS_ERROR_READ;
846             goto cleanup;
847         }
848         xlsConvertBof(&bof1);
849  		if(xls_debug) xls_showBOF(&bof1);
850 
851         if (bof1.size) {
852             if ((buf = realloc(buf, bof1.size)) == NULL) {
853                 if (xls_debug) fprintf(stderr, "Error: failed to allocate buffer of size %d\n", (int)bof1.size);
854                 retval = LIBXLS_ERROR_MALLOC;
855                 goto cleanup;
856             }
857             if (ole2_read(buf, 1, bof1.size, pWB->olestr) != bof1.size) {
858                 if (xls_debug) fprintf(stderr, "Error: failed to read OLE block\n");
859                 retval = LIBXLS_ERROR_READ;
860                 goto  cleanup;
861             }
862         }
863 
864         if (xls_isRecordTooSmall(pWB, &bof1)) {
865             retval = LIBXLS_ERROR_PARSE;
866             goto cleanup;
867         }
868 
869         switch (bof1.id) {
870         case XLS_RECORD_EOF:
871             //verbose("EOF");
872             break;
873         case XLS_RECORD_BOF:	// BIFF5-8
874             pWB->is5ver = (buf[0] + (buf[1] << 8) != 0x600);
875             pWB->type = buf[2] + (buf[3] << 8);
876 
877             if(xls_debug) {
878                 printf("version: %s\n", pWB->is5ver ? "BIFF5" : "BIFF8" );
879                 printf("   type: %.2X\n", pWB->type);
880             }
881             break;
882 
883         case XLS_RECORD_CODEPAGE:
884             pWB->codepage = buf[0] + (buf[1] << 8);
885 			if(xls_debug) printf("codepage=%x\n", pWB->codepage);
886             break;
887 
888         case XLS_RECORD_CONTINUE:
889 			if(once) {
890 				if (bof2.id==XLS_RECORD_SST) {
891 					if ((retval = xls_appendSST(pWB,buf,bof1.size)) != LIBXLS_OK)
892                         goto cleanup;
893                 }
894 				bof1=bof2;
895 			}
896             break;
897 
898 		case XLS_RECORD_WINDOW1:
899 			{
900 				WIND1 *w = (WIND1*)buf;
901                 xlsConvertWindow(w);
902 				pWB->activeSheetIdx = w->itabCur;
903 				if(xls_debug) {
904 					printf("WINDOW1: ");
905 					printf("xWn    : %d\n", w->xWn/20);
906 					printf("yWn    : %d\n", w->yWn/20);
907 					printf("dxWn   : %d\n", w->dxWn/20);
908 					printf("dyWn   : %d\n", w->dyWn/20);
909 					printf("grbit  : %d\n", w->grbit);
910 					printf("itabCur: %d\n", w->itabCur);
911 					printf("itabFi : %d\n", w->itabFirst);
912 					printf("ctabSel: %d\n", w->ctabSel);
913 					printf("wTabRat: %d\n", w->wTabRatio);
914 				}
915 			}
916 			break;
917 
918         case XLS_RECORD_SST:
919 			//printf("ADD SST\n");
920             xlsConvertSst((SST *)buf);
921             if ((retval = xls_addSST(pWB,(SST*)buf,bof1.size)) != LIBXLS_OK) {
922                 goto cleanup;
923             }
924             break;
925 
926         case XLS_RECORD_EXTSST:
927             break;
928 
929         case XLS_RECORD_BOUNDSHEET:
930 			{
931 				//printf("ADD SHEET\n");
932 				BOUNDSHEET *bs = (BOUNDSHEET *)buf;
933                 xlsConvertBoundsheet(bs);
934 				//char *s;
935 				// different for BIFF5 and BIFF8
936 				/*s = */ xls_addSheet(pWB, bs, bof1.size);
937 			}
938             break;
939 
940         case XLS_RECORD_XF:
941 			if(pWB->is5ver) {
942 				XF5 *xf;
943 				xf = (XF5 *)buf;
944                 xlsConvertXf5(xf);
945 
946 				if ((retval = xls_addXF5(pWB,xf)) != LIBXLS_OK) {
947                     goto cleanup;
948                 }
949 				if(xls_debug) {
950 					printf("   font: %d\n", xf->font);
951 					printf(" format: %d\n", xf->format);
952 					printf("   type: %.4x\n", xf->type);
953 					printf("  align: %.4x\n", xf->align);
954 					printf("rotatio: %.4x\n", xf->color);
955 					printf("  ident: %.4x\n", xf->fill);
956 					printf("usedatt: %.4x\n", xf->border);
957 					printf("linesty: %.4x\n", xf->linestyle);
958 				}
959 			} else {
960 				XF8 *xf;
961 				xf = (XF8 *)buf;
962                 xlsConvertXf8(xf);
963 
964 				if ((retval = xls_addXF8(pWB,xf)) != LIBXLS_OK) {
965                     goto cleanup;
966                 }
967 
968 				if(xls_debug) {
969 					xls_showXF(xf);
970 				}
971 			}
972             break;
973 
974         case XLS_RECORD_FONT:
975         case XLS_RECORD_FONT_ALT:
976 			{
977 				char *s;
978 				FONT *f = (FONT*)buf;
979                 xlsConvertFont(f);
980 				s = xls_addFont(pWB,f, bof1.size);
981 				if(xls_debug) {
982 					printf(" height: %d\n", f->height);
983 					printf("   flag: 0x%x\n", f->flag);
984 					printf("  color: 0x%x\n", f->color);
985 					printf(" weight: %d\n", f->bold);
986 					printf("escapem: 0x%x\n", f->escapement);
987 					printf("underln: 0x%x\n", f->underline);
988 					printf(" family: 0x%x\n", f->family);
989 					printf("charset: 0x%x\n", f->charset);
990 					if(s) printf("   name: %s\n", s);
991 				}
992 			}
993 			break;
994 
995         case XLS_RECORD_FORMAT:
996             xlsConvertFormat((FORMAT *)buf);
997             if ((retval = xls_addFormat(pWB, (FORMAT*)buf, bof1.size)) != LIBXLS_OK) {
998                 goto cleanup;
999             }
1000             break;
1001 
1002 		case XLS_RECORD_STYLE:
1003 			if(xls_debug) {
1004 				struct { unsigned short idx; unsigned char ident; unsigned char lvl; } *styl;
1005 				styl = (void *)buf;
1006 
1007 				printf("    idx: 0x%x\n", styl->idx & 0x07FF);
1008 				if(styl->idx & 0x8000) {
1009 					printf("  ident: 0x%x\n", styl->ident);
1010 					printf("  level: 0x%x\n", styl->lvl);
1011 				} else {
1012 					char *s = get_string((char *)&buf[2], bof1.size - 2, 1, pWB->is5ver, pWB->charset);
1013 					printf("  name=%s\n", s);
1014                     free(s);
1015 				}
1016 			}
1017 			break;
1018 
1019         case XLS_RECORD_PALETTE:
1020 			if(xls_debug > 10) {
1021 				unsigned char *p = buf + 2;
1022 				int idx, len;
1023 
1024 				len = buf[0] + (buf[1] << 8);
1025 				for(idx=0; idx<len; ++idx) {
1026 					printf("   Index=0x%2.2x %2.2x%2.2x%2.2x\n", idx+8, p[0], p[1], p[2] );
1027 					p += 4;
1028 				}
1029 			}
1030 			break;
1031 
1032 		case XLS_RECORD_1904:
1033 			pWB->is1904 = *(BYTE *)buf;	// the field is a short, but with little endian the first byte is 0 or 1
1034 			if(xls_debug) {
1035 				printf("   mode: 0x%x\n", pWB->is1904);
1036 			}
1037 			break;
1038 
1039 		case XLS_RECORD_DEFINEDNAME:
1040 			if(xls_debug) {
1041 				int i;
1042                 printf("   DEFINEDNAME: ");
1043 				for(i=0; i<bof1.size; ++i) printf("%2.2x ", buf[i]);
1044 				printf("\n");
1045 			}
1046 			break;
1047 
1048         default:
1049 			if(xls_debug)
1050 			{
1051 				//xls_showBOF(&bof1);
1052 				printf("    Not Processed in parseWoorkBook():  BOF=0x%4.4X size=%d\n", bof1.id, bof1.size);
1053 			}
1054             break;
1055         }
1056         bof2=bof1;
1057 		once=1;
1058     }
1059     while ((!pWB->olestr->eof)&&(bof1.id!=XLS_RECORD_EOF));
1060 
1061 cleanup:
1062     if (buf)
1063         free(buf);
1064 
1065     return retval;
1066 }
1067 
1068 
xls_preparseWorkSheet(xlsWorkSheet * pWS)1069 static xls_error_t xls_preparseWorkSheet(xlsWorkSheet* pWS)
1070 {
1071     BOF tmp;
1072     BYTE* buf = NULL;
1073     xls_error_t retval = LIBXLS_OK;
1074 
1075     verbose ("xls_preparseWorkSheet");
1076 
1077     if (ole2_seek(pWS->workbook->olestr,pWS->filepos) == -1) {
1078         retval = LIBXLS_ERROR_SEEK;
1079         goto cleanup;
1080     }
1081     do
1082     {
1083 		size_t read;
1084 		if((read = ole2_read(&tmp, 1, 4, pWS->workbook->olestr)) != 4) {
1085             if (xls_debug) fprintf(stderr, "Error: failed to read OLE size\n");
1086             retval = LIBXLS_ERROR_READ;
1087             goto cleanup;
1088         }
1089         xlsConvertBof(&tmp);
1090         if (tmp.size) {
1091             if ((buf = realloc(buf, tmp.size)) == NULL) {
1092                 if (xls_debug) fprintf(stderr, "Error: failed to allocate buffer of size %d\n", (int)tmp.size);
1093                 retval = LIBXLS_ERROR_MALLOC;
1094                 goto cleanup;
1095             }
1096             if((read = ole2_read(buf, 1, tmp.size, pWS->workbook->olestr)) != tmp.size) {
1097                 if (xls_debug) fprintf(stderr, "Error: failed to read OLE block\n");
1098                 retval = LIBXLS_ERROR_READ;
1099                 goto cleanup;
1100             }
1101         }
1102 
1103         switch (tmp.id)
1104         {
1105         case XLS_RECORD_DEFCOLWIDTH:
1106             if (tmp.size < sizeof(WORD)) {
1107                 retval = LIBXLS_ERROR_PARSE;
1108                 goto cleanup;
1109             }
1110             pWS->defcolwidth = (buf[0] << 8) + (buf[1] << 16);
1111             break;
1112         case XLS_RECORD_COLINFO:
1113             if (tmp.size < sizeof(COLINFO)) {
1114                 retval = LIBXLS_ERROR_PARSE;
1115                 goto cleanup;
1116             }
1117             xlsConvertColinfo((COLINFO*)buf);
1118             if ((retval = xls_addColinfo(pWS,(COLINFO*)buf)) != LIBXLS_OK)
1119                 goto cleanup;
1120             break;
1121         case XLS_RECORD_ROW:
1122             if (tmp.size < sizeof(ROW)) {
1123                 retval = LIBXLS_ERROR_PARSE;
1124                 goto cleanup;
1125             }
1126             xlsConvertRow((ROW*)buf);
1127             if (pWS->rows.lastcol<((ROW*)buf)->lcell)
1128                 pWS->rows.lastcol=((ROW*)buf)->lcell;
1129             if (pWS->rows.lastrow<((ROW*)buf)->index)
1130                 pWS->rows.lastrow=((ROW*)buf)->index;
1131             break;
1132         /* If the ROW record is incorrect or missing, infer the information from
1133          * cell data. */
1134         case XLS_RECORD_MULRK:
1135             if (xls_isCellTooSmall(pWS->workbook, &tmp, buf)) {
1136                 retval = LIBXLS_ERROR_PARSE;
1137                 goto cleanup;
1138             }
1139             if (pWS->rows.lastcol<xlsShortVal(((MULRK*)buf)->col) + (tmp.size - 6)/6 - 1)
1140                 pWS->rows.lastcol=xlsShortVal(((MULRK*)buf)->col) + (tmp.size - 6)/6 - 1;
1141             if (pWS->rows.lastrow<xlsShortVal(((MULRK*)buf)->row))
1142                 pWS->rows.lastrow=xlsShortVal(((MULRK*)buf)->row);
1143             break;
1144         case XLS_RECORD_MULBLANK:
1145             if (xls_isCellTooSmall(pWS->workbook, &tmp, buf)) {
1146                 retval = LIBXLS_ERROR_PARSE;
1147                 goto cleanup;
1148             }
1149             if (pWS->rows.lastcol<xlsShortVal(((MULBLANK*)buf)->col) + (tmp.size - 6)/2 - 1)
1150                 pWS->rows.lastcol=xlsShortVal(((MULBLANK*)buf)->col) + (tmp.size - 6)/2 - 1;
1151             if (pWS->rows.lastrow<xlsShortVal(((MULBLANK*)buf)->row))
1152                 pWS->rows.lastrow=xlsShortVal(((MULBLANK*)buf)->row);
1153             break;
1154         case XLS_RECORD_NUMBER:
1155         case XLS_RECORD_RK:
1156         case XLS_RECORD_LABELSST:
1157         case XLS_RECORD_BLANK:
1158         case XLS_RECORD_LABEL:
1159         case XLS_RECORD_FORMULA:
1160         case XLS_RECORD_FORMULA_ALT:
1161         case XLS_RECORD_BOOLERR:
1162             if (xls_isCellTooSmall(pWS->workbook, &tmp, buf)) {
1163                 retval = LIBXLS_ERROR_PARSE;
1164                 goto cleanup;
1165             }
1166             if (pWS->rows.lastcol<xlsShortVal(((COL*)buf)->col))
1167                 pWS->rows.lastcol=xlsShortVal(((COL*)buf)->col);
1168             if (pWS->rows.lastrow<xlsShortVal(((COL*)buf)->row))
1169                 pWS->rows.lastrow=xlsShortVal(((COL*)buf)->row);
1170             break;
1171         }
1172         if (pWS->rows.lastcol > 256) {
1173             retval = LIBXLS_ERROR_PARSE;
1174             goto cleanup;
1175         }
1176     }
1177     while ((!pWS->workbook->olestr->eof)&&(tmp.id!=XLS_RECORD_EOF));
1178 
1179 cleanup:
1180     if (buf)
1181         free(buf);
1182     return retval;
1183 }
1184 
xls_formatColumn(xlsWorkSheet * pWS)1185 static xls_error_t xls_formatColumn(xlsWorkSheet* pWS)
1186 {
1187     DWORD i,t,ii;
1188     DWORD fcol,lcol;
1189     WORD width;
1190     BYTE isHidden;
1191 
1192     for (i=0;i<pWS->colinfo.count;i++)
1193     {
1194         width = pWS->colinfo.col[i].width;
1195         isHidden = (pWS->colinfo.col[i].flags&1);
1196         if (pWS->colinfo.col[i].first<=pWS->rows.lastcol)
1197             fcol=pWS->colinfo.col[i].first;
1198         else
1199             fcol=pWS->rows.lastcol;
1200 
1201         if (pWS->colinfo.col[i].last<=pWS->rows.lastcol)
1202             lcol=pWS->colinfo.col[i].last;
1203         else
1204             lcol=pWS->rows.lastcol;
1205 
1206         for (ii=0;ii<=pWS->rows.lastrow;ii++) {
1207             for (t=fcol;t<=lcol;t++) {
1208                 pWS->rows.row[ii].cells.cell[t].isHidden |= isHidden;
1209                 pWS->rows.row[ii].cells.cell[t].width = width;
1210             }
1211         }
1212     }
1213     return LIBXLS_OK;
1214 }
1215 
xls_parseWorkSheet(xlsWorkSheet * pWS)1216 xls_error_t xls_parseWorkSheet(xlsWorkSheet* pWS)
1217 {
1218     BOF tmp;
1219     BYTE* buf = NULL;
1220 	long offset = pWS->filepos;
1221     size_t read;
1222     xls_error_t retval = 0;
1223 
1224 	struct st_cell_data *cell = NULL;
1225 	xlsWorkBook *pWB = pWS->workbook;
1226 
1227     verbose ("xls_parseWorkSheet");
1228 
1229     if ((retval = xls_preparseWorkSheet(pWS)) != LIBXLS_OK) {
1230         goto cleanup;
1231     }
1232 	// printf("size=%d fatpos=%d)\n", pWS->workbook->olestr->size, pWS->workbook->olestr->fatpos);
1233 
1234     if ((retval = xls_makeTable(pWS)) != LIBXLS_OK) {
1235         goto cleanup;
1236     }
1237 
1238     if ((retval = xls_formatColumn(pWS)) != LIBXLS_OK) {
1239         goto cleanup;
1240     }
1241 
1242     if (ole2_seek(pWS->workbook->olestr,pWS->filepos) == -1) {
1243         retval = LIBXLS_ERROR_SEEK;
1244         goto cleanup;
1245     }
1246     do
1247     {
1248 		long lastPos = offset;
1249 
1250 		if(xls_debug > 10) {
1251 			printf("LASTPOS=%ld pos=%d filePos=%d filePos=%d\n", lastPos, (int)pWB->olestr->pos, pWS->filepos, pWB->filepos);
1252 		}
1253 		if((read = ole2_read(&tmp, 1, 4, pWS->workbook->olestr)) != 4) {
1254             if (xls_debug) fprintf(stderr, "Error: failed to read OLE size\n");
1255             retval = LIBXLS_ERROR_READ;
1256             goto cleanup;
1257         }
1258         xlsConvertBof((BOF *)&tmp);
1259         if (tmp.size) {
1260             if ((buf = realloc(buf, tmp.size)) == NULL) {
1261                 if (xls_debug) fprintf(stderr, "Error: failed to allocate buffer of size %d\n", (int)tmp.size);
1262                 retval = LIBXLS_ERROR_MALLOC;
1263                 goto cleanup;
1264             }
1265             if((read = ole2_read(buf, 1, tmp.size, pWS->workbook->olestr)) != tmp.size) {
1266                 if (xls_debug) fprintf(stderr, "Error: failed to read OLE block\n");
1267                 retval = LIBXLS_ERROR_READ;
1268                 goto cleanup;
1269             }
1270         }
1271 		offset += 4 + tmp.size;
1272 
1273 		if(xls_debug)
1274 			xls_showBOF(&tmp);
1275 
1276         switch (tmp.id)
1277         {
1278         case XLS_RECORD_EOF:
1279             break;
1280         case XLS_RECORD_MERGEDCELLS:
1281             if ((retval = xls_mergedCells(pWS,&tmp,buf)) != LIBXLS_OK) {
1282                 goto cleanup;
1283             }
1284             break;
1285         case XLS_RECORD_ROW:
1286             if (tmp.size < sizeof(ROW)) {
1287                 retval = LIBXLS_ERROR_PARSE;
1288                 goto cleanup;
1289             }
1290 			if(xls_debug > 10) printf("ROW: %x at pos=%ld\n", tmp.id, lastPos);
1291             xlsConvertRow((ROW *)buf);
1292             if ((retval = xls_addRow(pWS,(ROW*)buf)) != LIBXLS_OK) {
1293                 goto cleanup;
1294             }
1295             break;
1296 		case XLS_RECORD_DEFCOLWIDTH:
1297             if (tmp.size < sizeof(WORD)) {
1298                 retval = LIBXLS_ERROR_PARSE;
1299                 goto cleanup;
1300             }
1301 			if(xls_debug > 10) printf("DEFAULT COL WIDTH: %d\n", ((WORD *)buf)[0]);
1302 			break;
1303 		case XLS_RECORD_DEFAULTROWHEIGHT:
1304             if (tmp.size < 2 * sizeof(WORD)) {
1305                 retval = LIBXLS_ERROR_PARSE;
1306                 goto cleanup;
1307             }
1308 			if(xls_debug > 10) printf("DEFAULT ROW Height: 0x%x %d\n", ((WORD *)buf)[0], ((WORD *)buf)[1]);
1309 			break;
1310 		case XLS_RECORD_DBCELL:
1311 			if(xls_debug > 10) {
1312 				DWORD *foo = (DWORD *)buf;
1313                 WORD *goo;
1314 				int i;
1315                 printf("DBCELL: size %d\n", tmp.size);
1316 				printf("DBCELL OFFSET=%4.4u -> ROW %ld\n", foo[0], lastPos-foo[0]);
1317 				++foo;
1318 				goo = (WORD *)foo;
1319 				for(i=0; i<5; ++i) printf("goo[%d]=%4.4x %u\n", i, goo[i], goo[i]);
1320 			}
1321 			break;
1322         case XLS_RECORD_INDEX:
1323 			if(xls_debug > 10) {
1324 				DWORD *foo = (DWORD *)buf;
1325                 int i;
1326 				printf("INDEX: size %d\n", tmp.size);
1327 				for(i=0; i<5; ++i) printf("FOO[%d]=%4.4x %u\n", i, foo[i], foo[i]);
1328 			}
1329 #if 0
1330 			0	4 4	4 8	4
1331 			12	4 16	4nm
1332 			Not used Index to first used row (rf, 0-based) Index to first row of unused tail of sheet (rl, last used row + 1, 0-based)
1333 			Absolute stream position of the DEFCOLWIDTH record (➜5.32) of the current sheet. If this record does not exist, the offset points to the record at the position where the DEFCOLWIDTH record would occur.
1334 			Array of nm absolute stream positions to the DBCELL record (➜5.29) of each Row Block
1335 #endif
1336             break;
1337         case XLS_RECORD_MULRK:
1338         case XLS_RECORD_MULBLANK:
1339         case XLS_RECORD_NUMBER:
1340         case XLS_RECORD_BOOLERR:
1341         case XLS_RECORD_RK:
1342         case XLS_RECORD_LABELSST:
1343         case XLS_RECORD_BLANK:
1344         case XLS_RECORD_LABEL:
1345         case XLS_RECORD_FORMULA:
1346         case XLS_RECORD_FORMULA_ALT:
1347             if ((cell = xls_addCell(pWS, &tmp, buf)) == NULL) {
1348                 retval = LIBXLS_ERROR_PARSE;
1349                 goto cleanup;
1350             }
1351             break;
1352 		case XLS_RECORD_ARRAY:
1353 			if(formula_handler) formula_handler(tmp.id, tmp.size, buf);
1354 			break;
1355 
1356 		case XLS_RECORD_STRING:
1357 			if(cell && (cell->id == XLS_RECORD_FORMULA || cell->id == XLS_RECORD_FORMULA_ALT)) {
1358                 xls_cell_set_str(cell, get_string((char *)buf, tmp.size,
1359                             (BYTE)!pWB->is5ver, pWB->is5ver, pWB->charset));
1360 				if (xls_debug) xls_showCell(cell);
1361 			}
1362 			break;
1363 
1364         default:
1365 			if(xls_debug)
1366 			{
1367 				//xls_showBOF(&tmp);
1368                 if (tmp.size >= sizeof(COL)) {
1369                     printf("   [%d:%d]: 0x%X at pos=%lu size=%u\n", xlsShortVal(((COL*)buf)->row), xlsShortVal(((COL*)buf)->col),
1370                             tmp.id, lastPos, tmp.size);
1371                 } else {
1372                     printf("   0x%X at pos=%lu size=%u\n", tmp.id, lastPos, tmp.size);
1373                 }
1374 			}
1375             break;
1376         }
1377     }
1378     while ((!pWS->workbook->olestr->eof)&&(tmp.id!=XLS_RECORD_EOF));
1379 
1380 cleanup:
1381     if (buf)
1382         free(buf);
1383 
1384     return retval;
1385 }
1386 
xls_getWorkSheet(xlsWorkBook * pWB,int num)1387 xlsWorkSheet * xls_getWorkSheet(xlsWorkBook* pWB,int num)
1388 {
1389     xlsWorkSheet * pWS = NULL;
1390     verbose ("xls_getWorkSheet");
1391     if (num >= 0 && num < (int)pWB->sheets.count) {
1392         pWS = calloc(1, sizeof(xlsWorkSheet));
1393         pWS->filepos=pWB->sheets.sheet[num].filepos;
1394         pWS->workbook=pWB;
1395         pWS->rows.lastcol=0;
1396         pWS->rows.lastrow=0;
1397         pWS->colinfo.count=0;
1398     }
1399     return pWS;
1400 }
1401 
xls_open_ole(OLE2 * ole,const char * charset,xls_error_t * outError)1402 static xlsWorkBook *xls_open_ole(OLE2 *ole, const char *charset, xls_error_t *outError) {
1403     xlsWorkBook* pWB;
1404     xls_error_t retval = LIBXLS_OK;
1405 
1406     pWB = calloc(1, sizeof(xlsWorkBook));
1407     verbose ("xls_open_ole");
1408 
1409     if ((pWB->olestr=ole2_fopen(ole, "\005SummaryInformation")))
1410     {
1411         pWB->summary = calloc(1,4096);
1412 		if (ole2_read(pWB->summary, 4096, 1, pWB->olestr) == -1) {
1413             if (xls_debug) fprintf(stderr, "SummaryInformation not found\n");
1414             retval = LIBXLS_ERROR_READ;
1415             goto cleanup;
1416         }
1417 		ole2_fclose(pWB->olestr);
1418 	}
1419 
1420     if ((pWB->olestr=ole2_fopen(ole, "\005DocumentSummaryInformation")))
1421     {
1422         pWB->docSummary = calloc(1, 4096);
1423 		if (ole2_read(pWB->docSummary, 4096, 1, pWB->olestr) == -1) {
1424             if (xls_debug) fprintf(stderr, "DocumentSummaryInformation not found\n");
1425             retval = LIBXLS_ERROR_READ;
1426             goto cleanup;
1427         }
1428 		ole2_fclose(pWB->olestr);
1429 	}
1430 
1431 #if 0
1432 	if(xls_debug) {
1433 		printf("summary=%d docsummary=%d\n", pWB->summary ? 1 : 0, pWB->docSummary ? 1 : 0);
1434 		xlsSummaryInfo *si = xls_summaryInfo(pWB);
1435 		printf("title=%s\n", si->title);
1436 		printf("subject=%s\n", si->subject);
1437 		printf("author=%s\n", si->author);
1438 		printf("keywords=%s\n", si->keywords);
1439 		printf("comment=%s\n", si->comment);
1440 		printf("lastAuthor=%s\n", si->lastAuthor);
1441 		printf("appName=%s\n", si->appName);
1442 		printf("category=%s\n", si->category);
1443 		printf("manager=%s\n", si->manager);
1444 		printf("company=%s\n", si->company);
1445 	}
1446 #endif
1447 
1448     // open Workbook
1449     if (!(pWB->olestr=ole2_fopen(ole,"Workbook")) && !(pWB->olestr=ole2_fopen(ole,"Book")))
1450     {
1451         if(xls_debug) fprintf(stderr, "Workbook not found\n");
1452         retval = LIBXLS_ERROR_PARSE;
1453         goto cleanup;
1454     }
1455 
1456     pWB->sheets.count=0;
1457     pWB->xfs.count=0;
1458     pWB->fonts.count=0;
1459     if (charset) {
1460         pWB->charset = malloc(strlen(charset) * sizeof(char)+1);
1461         strcpy(pWB->charset, charset);
1462     } else {
1463         pWB->charset = strdup("UTF-8");
1464     }
1465 
1466     retval = xls_parseWorkBook(pWB);
1467 
1468 cleanup:
1469     if (retval != LIBXLS_OK) {
1470         if (!pWB->olestr)
1471             ole2_close(ole);
1472         xls_close_WB(pWB);
1473         pWB = NULL;
1474     }
1475     if (outError)
1476         *outError = retval;
1477 
1478     return pWB;
1479 }
1480 
xls_open(const char * file,const char * charset)1481 xlsWorkBook* xls_open(const char *file, const char* charset)
1482 {
1483     return xls_open_file(file, charset, NULL);
1484 }
1485 
xls_open_file(const char * file,const char * charset,xls_error_t * outError)1486 xlsWorkBook* xls_open_file(const char *file, const char* charset, xls_error_t *outError) {
1487     OLE2* ole = NULL;
1488 
1489     if (!(ole=ole2_open_file(file)))
1490     {
1491         if (xls_debug) fprintf(stderr, "File \"%s\" not found\n",file);
1492         if (outError) *outError = LIBXLS_ERROR_OPEN;
1493         return NULL;
1494     }
1495 
1496     return xls_open_ole(ole, charset, outError);
1497 }
1498 
xls_open_buffer(const unsigned char * buffer,size_t len,const char * charset,xls_error_t * outError)1499 xlsWorkBook *xls_open_buffer(const unsigned char *buffer, size_t len,
1500         const char *charset, xls_error_t *outError) {
1501     OLE2* ole = NULL;
1502 
1503     if (!(ole=ole2_open_buffer(buffer, len)))
1504     {
1505         if (outError) *outError = LIBXLS_ERROR_OPEN;
1506         return NULL;
1507     }
1508 
1509     return xls_open_ole(ole, charset, outError);
1510 }
1511 
xls_row(xlsWorkSheet * pWS,WORD cellRow)1512 xlsRow *xls_row(xlsWorkSheet* pWS, WORD cellRow)
1513 {
1514     if(cellRow > pWS->rows.lastrow)
1515         return NULL;
1516 
1517     if (pWS->rows.row == NULL)
1518         return NULL;
1519 
1520     return &pWS->rows.row[cellRow];
1521 }
1522 
xls_cell(xlsWorkSheet * pWS,WORD cellRow,WORD cellCol)1523 xlsCell	*xls_cell(xlsWorkSheet* pWS, WORD cellRow, WORD cellCol)
1524 {
1525     struct st_row_data	*row;
1526 
1527     if ((row = xls_row(pWS, cellRow)) == NULL)
1528         return NULL;
1529 
1530     if(cellCol >= row->cells.count)
1531         return NULL;
1532 
1533     return &row->cells.cell[cellCol];
1534 }
1535 
xls_close_WB(xlsWorkBook * pWB)1536 void xls_close_WB(xlsWorkBook* pWB)
1537 {
1538 	OLE2*		ole;
1539 
1540 	verbose ("xls_close");
1541 
1542 	if(!pWB) return;
1543 
1544     // OLE first
1545     if (pWB->olestr) {
1546         ole=pWB->olestr->ole;
1547         ole2_fclose(pWB->olestr);
1548         ole2_close(ole);
1549     }
1550 
1551     // WorkBook
1552     free(pWB->charset);
1553 
1554     // Sheets
1555     {
1556         DWORD i;
1557         for(i=0; i<pWB->sheets.count; ++i) {
1558             free(pWB->sheets.sheet[i].name);
1559         }
1560         free(pWB->sheets.sheet);
1561     }
1562 
1563     // SST
1564     {
1565         DWORD i;
1566         for(i=0; i<pWB->sst.count; ++i) {
1567             free(pWB->sst.string[i].str);
1568         }
1569         free(pWB->sst.string);
1570     }
1571 
1572     // xfs
1573     {
1574         free(pWB->xfs.xf);
1575     }
1576 
1577     // fonts
1578     {
1579         DWORD i;
1580         for(i=0; i<pWB->fonts.count; ++i) {
1581             free(pWB->fonts.font[i].name);
1582         }
1583         free(pWB->fonts.font);
1584     }
1585 
1586     // formats
1587     {
1588         DWORD i;
1589         for(i=0; i<pWB->formats.count; ++i) {
1590             free(pWB->formats.format[i].value);
1591         }
1592         free(pWB->formats.format);
1593     }
1594 
1595     // buffers
1596 	if(pWB->summary)  free(pWB->summary);
1597 	if(pWB->docSummary) free(pWB->docSummary);
1598 
1599 	// TODO - free other dynamically allocated objects like string table??
1600 	free(pWB);
1601 }
1602 
xls_close_WS(xlsWorkSheet * pWS)1603 void xls_close_WS(xlsWorkSheet* pWS)
1604 {
1605 	if(!pWS) return;
1606 
1607     if (pWS->rows.row) {
1608         DWORD i, j;
1609         for(j=0; j<=pWS->rows.lastrow; ++j) {
1610             struct st_row_data *row = &pWS->rows.row[j];
1611             for(i=0; i<row->cells.count; ++i) {
1612                 free(row->cells.cell[i].str);
1613             }
1614             free(row->cells.cell);
1615         }
1616         free(pWS->rows.row);
1617     }
1618 
1619     // COLINFO
1620     {
1621         free(pWS->colinfo.col);
1622     }
1623     free(pWS);
1624 }
1625 
xls_getVersion(void)1626 const char* xls_getVersion(void)
1627 {
1628     return PACKAGE_VERSION;
1629 }
1630 
xls_getError(xls_error_t code)1631 const char* xls_getError(xls_error_t code) {
1632     if (code == LIBXLS_OK)
1633         return "No error";
1634     if (code == LIBXLS_ERROR_READ)
1635         return "Unable to read from file";
1636     if (code == LIBXLS_ERROR_OPEN)
1637         return "Unable to open file";
1638     if (code == LIBXLS_ERROR_SEEK)
1639         return "Unable to seek within file";
1640     if (code == LIBXLS_ERROR_MALLOC)
1641         return "Unable to allocate memory";
1642     if (code == LIBXLS_ERROR_PARSE)
1643         return "Unable to parse file";
1644 
1645     return "Unknown error";
1646 }
1647 
1648 //
1649 // http://poi.apache.org/hpsf/internals.html
1650 // or google "DocumentSummaryInformation and UserDefined Property Sets" and look for MSDN hits
1651 //
1652 
xls_summaryInfo(xlsWorkBook * pWB)1653 xlsSummaryInfo *xls_summaryInfo(xlsWorkBook* pWB)
1654 {
1655 	xlsSummaryInfo	*pSI;
1656 
1657 	pSI = (xlsSummaryInfo *)calloc(1, sizeof(xlsSummaryInfo));
1658 	xls_dumpSummary(pWB->summary, 1, pSI);
1659 	xls_dumpSummary(pWB->docSummary, 0, pSI);
1660 
1661 	return pSI;
1662 }
1663 
xls_close_summaryInfo(xlsSummaryInfo * pSI)1664 void xls_close_summaryInfo(xlsSummaryInfo *pSI)
1665 {
1666 	if(!pSI) return;
1667 
1668 	if(pSI->title)		free(pSI->title);
1669 	if(pSI->subject)	free(pSI->subject);
1670 	if(pSI->author)		free(pSI->author);
1671 	if(pSI->keywords)	free(pSI->keywords);
1672 	if(pSI->comment)	free(pSI->comment);
1673 	if(pSI->lastAuthor)	free(pSI->lastAuthor);
1674 	if(pSI->appName)	free(pSI->appName);
1675 	if(pSI->category)	free(pSI->category);
1676 	if(pSI->manager)	free(pSI->manager);
1677 	if(pSI->company)	free(pSI->company);
1678 
1679 	free(pSI);
1680 }
1681 
xls_dumpSummary(char * buf,int isSummary,xlsSummaryInfo * pSI)1682 static void xls_dumpSummary(char *buf,int isSummary,xlsSummaryInfo *pSI) {
1683 	header			*head;
1684 	sectionList		*secList;
1685 	propertyList	*plist;
1686 	sectionHeader	*secHead;
1687 	property		*prop;
1688 	uint32_t i, j;
1689 
1690 	if(!buf) return;	// perhaps the document was missing??
1691 
1692 	head = (header *)buf;
1693 	//printf("header: \n");
1694 	//printf("  sig=%x\n", head->sig);
1695 	//printf("  os=%x\n", head->os >> 16);
1696 	//printf("  class=%8.8x%8.8x%8.8x%8.8x\n", head->format[0], head->format[1], head->format[2], head->format[3]);
1697 	//printf("  count=%x\n", head->count);
1698 
1699 	for(i=0; i<head->count; ++i) {
1700 		secList = &head->secList[i];
1701 		//printf("Section %d:\n", i);
1702 		//printf("  class=%8.8x%8.8x%8.8x%8.8x\n", secList->format[0], secList->format[1], secList->format[2], secList->format[3]);
1703 		//printf("  offset=%d (now at %ld\n", secList->offset, (char *)secList - (char *)buf + sizeof(sectionList));
1704 
1705 
1706 		secHead = (sectionHeader *)((char *)head + secList->offset);
1707 		//printf("  len=%d\n", secHead->length);
1708 		//printf("  properties=%d\n", secHead->numProperties);
1709 		for(j=0; j<secHead->numProperties; ++j) {
1710 			BYTE **s;
1711 
1712 			plist = &secHead->properties[j];
1713 			//printf("      ---------\n");
1714 			//printf("      propID=%d offset=%d\n", plist->propertyID, plist->sectionOffset);
1715 			prop = (property *)((char *)secHead + plist->sectionOffset);
1716 			//printf("      propType=%d\n", prop->propertyID);
1717 
1718 			switch(prop->propertyID) {
1719 			case 2:
1720 				//printf("      xlsShortVal=%x\n", *(uint16_t *)prop->data);
1721 				break;
1722 			case 3:
1723 				//printf("      wordVal=%x\n", *(uint32_t *)prop->data);
1724 				break;
1725 			case 30:
1726 				//printf("      longVal=%llx\n", *(uint64_t *)prop->data);
1727 				//printf("      s[%u]=%s\n", *(uint32_t  *)prop->data, (char *)prop->data + 4);
1728 				if(isSummary) {
1729 					switch(plist->propertyID) {
1730 					case 2:		s = &pSI->title;		break;
1731 					case 3:		s = &pSI->subject;		break;
1732 					case 4:		s = &pSI->author;		break;
1733 					case 5:		s = &pSI->keywords;		break;
1734 					case 6:		s = &pSI->comment;		break;
1735 					case 8:		s = &pSI->lastAuthor;	break;
1736 					case 18:	s = &pSI->appName;		break;
1737 					default:	s = NULL;				break;
1738 					}
1739 				} else {
1740 					switch(plist->propertyID) {
1741 					case 2:		s = &pSI->category;		break;
1742 					case 14:	s = &pSI->manager;		break;
1743 					case 15:	s = &pSI->company;		break;
1744 					default:	s = NULL;				break;
1745 					}
1746 				}
1747 				if(s) *s = (BYTE *)strdup((char *)prop->data + 4);
1748 				break;
1749 			case 64:
1750 				//printf("      longVal=%llx\n", *(uint64_t *)prop->data);
1751 				break;
1752 			case 65:
1753 #if 0
1754 				{
1755 				uint32_t k;
1756 				for(k=0; k<*(uint32_t  *)prop->data; ++k) {
1757 				unsigned char *t = (unsigned char *)prop->data + 4 + k;
1758 				printf(" %2.2x(%c)", *t, *t);
1759 				}
1760 				printf("\n");
1761 				}
1762 #endif
1763 				break;
1764 			default:
1765 				//printf("      UNKNOWN!\n");
1766 				break;
1767 			}
1768 		}
1769 	}
1770 }
1771 
xls_set_formula_hander(xls_formula_handler handler)1772 void xls_set_formula_hander(xls_formula_handler handler)
1773 {
1774 	formula_handler = handler;
1775 }
1776