1 /*
2 / freexl_internals.h
3 /
4 / internal declarations
5 /
6 / version  1.0, 2011 July 26
7 /
8 / Author: Sandro Furieri a.furieri@lqt.it
9 /
10 / ------------------------------------------------------------------------------
11 /
12 / Version: MPL 1.1/GPL 2.0/LGPL 2.1
13 /
14 / The contents of this file are subject to the Mozilla Public License Version
15 / 1.1 (the "License"); you may not use this file except in compliance with
16 / the License. You may obtain a copy of the License at
17 / http://www.mozilla.org/MPL/
18 /
19 / Software distributed under the License is distributed on an "AS IS" basis,
20 / WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
21 / for the specific language governing rights and limitations under the
22 / License.
23 /
24 / The Original Code is the FreeXL library
25 /
26 / The Initial Developer of the Original Code is Alessandro Furieri
27 /
28 / Portions created by the Initial Developer are Copyright (C) 2011
29 / the Initial Developer. All Rights Reserved.
30 /
31 / Contributor(s):
32 / Brad Hards
33 /
34 / Alternatively, the contents of this file may be used under the terms of
35 / either the GNU General Public License Version 2 or later (the "GPL"), or
36 / the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
37 / in which case the provisions of the GPL or the LGPL are applicable instead
38 / of those above. If you wish to allow use of your version of this file only
39 / under the terms of either the GPL or the LGPL, and not to allow others to
40 / use your version of this file under the terms of the MPL, indicate your
41 / decision by deleting the provisions above and replace them with the notice
42 / and other provisions required by the GPL or the LGPL. If you do not delete
43 / the provisions above, a recipient may use your version of this file under
44 / the terms of any one of the MPL, the GPL or the LGPL.
45 /
46 */
47 
48 #define BIFF_MAX_FORMAT	2048
49 #define BIFF_MAX_XF	8192
50 
51 #define FREEXL_MAGIC_INFO	1675437821
52 #define FREEXL_MAGIC_START	1675431287
53 #define FREEXL_MAGIC_END	178213456
54 
55 /* BIFF record types */
56 #define BIFF_EOF		0x000A
57 #define BIFF_DATEMODE		0x0022
58 #define BIFF_FILEPASS		0x002F
59 #define BIFF_CODEPAGE		0x0042
60 #define BIFF_SHEET		0x0085
61 #define BIFF_SST		0x00FC
62 #define BIFF_DIMENSION		0x0200
63 #define BIFF_SHEETSOFFSET	0x008E
64 #define BIFF_BOF_2		0x0009
65 #define BIFF_BOF_3		0x0209
66 #define BIFF_BOF_4		0x0409
67 #define BIFF_BOF		0x0809
68 #define BIFF_CONTINUE		0x003C
69 #define BIFF_FORMAT_2		0x001E
70 #define BIFF_FORMAT		0x041E
71 #define BIFF_XF_2		0x0043
72 #define BIFF_XF_3		0x0243
73 #define BIFF_XF_4		0x0443
74 #define BIFF_XF			0x00E0
75 #define BIFF_INTEGER_2		0x0002
76 #define BIFF_NUMBER_2		0x0003
77 #define BIFF_NUMBER		0x0203
78 #define BIFF_LABEL_2		0x0004
79 #define BIFF_LABEL		0x0204
80 #define BIFF_LABEL_SST		0x00FD
81 #define BIFF_RK			0x027E
82 #define BIFF_MULRK		0x00BD
83 #define BIFF_BOOLERR_2	0x0005
84 #define BIFF_BOOLERR	0x0205
85 
86 typedef union biff_word
87 {
88     unsigned char bytes[2];
89     unsigned short value;
90 } biff_word16;
91 
92 typedef union biff_double_word
93 {
94     unsigned char bytes[4];
95     unsigned int value;
96     int signed_value;
97 } biff_word32;
98 
99 typedef union biff_float_word
100 {
101     unsigned char bytes[8];
102     double value;
103 } biff_float;
104 
105 
106 typedef struct cfbf_header_struct
107 {
108 /*
109  * a struct wrapping the CFBF
110  * Compound File Binary Format
111  *
112  * this one is a Microsoft format used e.g. by Office,
113  * and more or less is intended to implement a multi-stream
114  * arch within a single file
115  * the whole struct is a file-system like FAT (File Allocation Table)
116  */
117     unsigned char signature[8];	/* magic signature */
118     unsigned char classid[16];	/* Classid [usually zero] */
119     biff_word16 minor_version;	/* minor version: unused */
120     biff_word16 major_version;	/* 3 [512 bytes/sector] or 4 [4096 bytes/sector] */
121     biff_word16 byte_order;	/* endiannes [always little-endian] */
122     biff_word16 sector_shift;	/* 9=512 bytes/sector or 12=4096 bytes/sector */
123     biff_word16 mini_sector_shift;	/* 6=64 bytes/sector */
124     biff_word16 reserved1;	/* unused, ZERO */
125     biff_word32 reserved2;	/* unused, ZERO */
126     biff_word32 directory_sectors;	/* usually ZERO */
127     biff_word32 fat_sectors;	/* #sectors in FAT chain */
128     biff_word32 directory_start;	/* sector index for directory */
129     biff_word32 transaction_signature;	/* usually ZERO */
130     biff_word32 mini_cutoff;	/* tipically 4096 */
131     biff_word32 mini_fat_start;	/* sector index for mini-FAT chain */
132     biff_word32 mini_fat_sectors;	/* #sectors in mini-FAT chain */
133     biff_word32 difat_start;	/* sector index for first DoubleIndirect-FAT */
134     biff_word32 difat_sectors;	/* #sectors for DIFAT chain */
135     unsigned char fat_sector_map[436];	/* first 109 FAT sectors */
136 } cfbf_header;
137 
138 typedef struct cfbf_dir_entry_struct
139 {
140 /* a struct representing a CFBF Directory entry */
141     char name[64];		/* file name */
142     biff_word16 name_size;	/* file name size */
143     unsigned char type;		/* 1=directory; 2=file, 5=root */
144     unsigned char node_color;	/* 0=red; 1=black */
145     biff_word32 previous;	/* previuous item index */
146     biff_word32 next;		/* next item index */
147     biff_word32 child;		/* first child index */
148     unsigned char classid[16];	/* Classid [unused] */
149     biff_word32 state;		/* state bits [unused] */
150     biff_word32 timestamp_1;	/* timestamp (1) [unused] */
151     biff_word32 timestamp_2;	/* timestamp (2) [unused] */
152     biff_word32 timestamp_3;	/* timestamp (3) [unused] */
153     biff_word32 timestamp_4;	/* timestamp (4) [unused] */
154     biff_word32 start_sector;	/* start sector */
155     biff_word32 size;		/* actual file-size */
156     biff_word32 extra_size;	/* extra size (> 2GB) [ignored] */
157 } cfbf_dir_entry;
158 
159 typedef struct biff_string_table_struct
160 {
161 /*
162  * in BIFF8 a Shared String Table (SST) exists:
163  * any Text String referenced by the whole Workbook will
164  * be stored only once on the SST, and then Text Cells will
165  * simply refer the corresponding SST entry
166  */
167     unsigned int string_count;	/* how many strings are into the SST */
168     char **utf8_strings;	/* the String Array [UTF-8] */
169     unsigned int current_index;	/* array index for currently parsed string */
170     char *current_utf16_buf;	/* current UTF-16 buffer */
171     unsigned int current_utf16_len;	/* current UTF-16 length */
172     unsigned int current_utf16_off;	/* current UTF-16 offset */
173     unsigned int current_utf16_skip;	/* bytes to be skipped after the current string */
174     unsigned int next_utf16_skip;	/* remaining bytes to be skipped in the next record */
175 } biff_string_table;
176 
177 typedef struct biff_cell_value_struct
178 {
179 /* a struct representing a Cell value */
180     unsigned char type;
181     union multivalue_cell
182     {
183 	int int_value;
184 	double dbl_value;
185 	char *text_value;
186 	const char *sst_value;
187     } value;
188 } biff_cell_value;
189 
190 typedef struct biff_sheet_struct
191 {
192 /* a strunct representing a BIFF Sheet */
193     unsigned int start_offset;	/* start offset within the stream */
194     unsigned char visible;	/* 0x00=visible; 0x01=hidden; 0x02=very-hidden; */
195     unsigned char type;		/* 0x00=work-sheet; 0x01=macro-sheet; 0x02=chart; 0x06=VB-module; */
196     char *utf8_name;		/* UTF8 name */
197     unsigned int rows;		/* number of rows */
198     unsigned short columns;	/* number of columns */
199     biff_cell_value *cell_values;	/* cell values array */
200     int valid_dimension;	/* set to 1=TRUE only when DIMENSION is surely known */
201     int already_done;		/* set to 1=TRUE if already loaded in pass #1 */
202     struct biff_sheet_struct *next;	/* linked-list pointer */
203 } biff_sheet;
204 
205 typedef struct biff_format_struct
206 {
207 /* a struct representing DATE/DATETIME/TIME formats */
208     unsigned int format_index;
209     int is_date;
210     int is_datetime;
211     int is_time;
212 } biff_format;
213 
214 typedef struct fat_entry_struct
215 {
216 /* a FAT entry */
217     unsigned int current_sector;
218     unsigned int next_sector;
219     struct fat_entry_struct *next;
220 } fat_entry;
221 
222 typedef struct fat_chain_struct
223 {
224 /* a struct representing the FAT chain */
225     int swap;			/* Endiannes; swap required */
226     unsigned short sector_size;	/* sector size */
227     unsigned int next_sector;
228     unsigned int directory_start;	/* sector index for directory */
229     fat_entry *first;
230     fat_entry *last;
231     fat_entry **fat_array;
232     unsigned int fat_array_count;
233     unsigned int miniCutOff;
234     unsigned int next_sectorMini;
235     fat_entry *firstMini;
236     fat_entry *lastMini;
237     fat_entry **miniFAT_array;
238     unsigned int miniFAT_array_count;
239     unsigned int miniFAT_start;
240     unsigned int miniFAT_len;
241     unsigned char *miniStream;	/* the whole mini-stream */
242 } fat_chain;
243 
244 typedef struct biff_workbook_struct
245 {
246 /*
247  * a struct representing a BIFF Workbook
248  * BIFF stands for: Binary Interchange File Format
249  * which is a "file format" often used by MS Office
250  *
251  * an Excel Spreadsheet (.xls) is stored within a CFBF
252  * as a Workbook stream [pseudo-file]
253  */
254     int magic1;			/* magic signature #1 */
255     FILE *xls;			/* file handle */
256     fat_chain *fat;		/* FAT chain */
257     unsigned short cfbf_version;	/* CFBF version */
258     unsigned short cfbf_sector_size;	/* CFBF sector size */
259     unsigned int start_sector;	/* starting sector for Workbook */
260     unsigned int size;		/* total size of the Workbook stream */
261     unsigned int current_sector;	/* currently bufferd sector */
262     unsigned int bytes_read;	/* total bytes read since start */
263     unsigned int current_offset;	/* current stream offset */
264     unsigned char sector_buf[8192];	/* currently buffered sector(s) */
265     unsigned char *p_in;	/* current buffer pointer */
266     unsigned short sector_end;	/* current sector end */
267     int sector_ready;		/* 1=yes; 0=no; */
268     int ok_bof;			/* valid BOF found (BeginOfFile): -1=expected; 1=yes; 0=no; */
269     unsigned short biff_version;	/* BIFF version number */
270     unsigned short biff_max_record_size;	/* 2080 or 8224 depending on version */
271     unsigned short biff_content_type;	/* the type of the current sub-stream */
272     unsigned short biff_code_page;	/* the code-page for the current sub-stream */
273     unsigned short biff_book_code_page;	/* the Workbook code-page */
274     unsigned short biff_date_mode;	/* the date-mode: 0=1900-Jan-01; 1=1904-Jan-02; */
275     int biff_obfuscated;	/* 0=no; 1=yes (encrypted file) */
276     iconv_t utf8_converter;	/* ICONV charset converter */
277     iconv_t utf16_converter;	/* ICONV charset converter (for UTF-16) */
278     unsigned char record[8224];	/* current record */
279     unsigned short record_type;	/* current record identifier */
280     unsigned short prev_record_type;	/* previous record identifier */
281     unsigned int record_size;	/* current record size */
282     biff_string_table shared_strings;	/* the SST */
283     biff_sheet *first_sheet;	/* SHEET linked list - first item */
284     biff_sheet *last_sheet;	/* SHEET linked list - last item */
285     biff_sheet *active_sheet;	/* currently active SHEET */
286     int second_pass;		/* set to 1=TRUE for pass #2 */
287     biff_format format_array[BIFF_MAX_FORMAT];	/* the array for DATE/DATETIME/TIME formats */
288     unsigned short max_format_index;	/* max array index [formats] */
289     unsigned short biff_xf_array[BIFF_MAX_XF];	/* the array for XF/Format association */
290     unsigned short biff_xf_next_index;	/* next XF index */
291     int magic2;			/* magic signature #2 */
292 } biff_workbook;
293