1 /*
2 * wordole.c
3 * Copyright (C) 1998-2004 A.J. van Os; Released under GPL
4 *
5 * Description:
6 * Deal with the OLE internals of a MS Word file
7 */
8
9 #include <string.h>
10 #include "antiword.h"
11
12 /* Private type for Property Set Storage entries */
13 typedef struct pps_entry_tag {
14 ULONG ulNext;
15 ULONG ulPrevious;
16 ULONG ulDir;
17 ULONG ulSB;
18 ULONG ulSize;
19 int iLevel;
20 char szName[32];
21 UCHAR ucType;
22 } pps_entry_type;
23
24 /* Show that a PPS number or index should not be used */
25 #define PPS_NUMBER_INVALID 0xffffffffUL
26
27
28 /* Macro to make sure all such statements will be identical */
29 #define FREE_ALL() \
30 do {\
31 vDestroySmallBlockList();\
32 aulRootList = xfree(aulRootList);\
33 aulSbdList = xfree(aulSbdList);\
34 aulBbdList = xfree(aulBbdList);\
35 aulSBD = xfree(aulSBD);\
36 aulBBD = xfree(aulBBD);\
37 } while(0)
38
39
40 /*
41 * ulReadLong - read four bytes from the given file and offset
42 */
43 static ULONG
ulReadLong(FILE * pFile,ULONG ulOffset)44 ulReadLong(FILE *pFile, ULONG ulOffset)
45 {
46 UCHAR aucBytes[4];
47
48 fail(pFile == NULL);
49
50 if (!bReadBytes(aucBytes, 4, ulOffset, pFile)) {
51 werr(1, "Read long 0x%lx not possible", ulOffset);
52 }
53 return ulGetLong(0, aucBytes);
54 } /* end of ulReadLong */
55
56 /*
57 * vName2String - turn the name into a proper string.
58 */
59 static void
vName2String(char * szName,const UCHAR * aucBytes,size_t tNameSize)60 vName2String(char *szName, const UCHAR *aucBytes, size_t tNameSize)
61 {
62 char *pcChar;
63 size_t tIndex;
64
65 fail(aucBytes == NULL || szName == NULL);
66
67 if (tNameSize < 2) {
68 szName[0] = '\0';
69 return;
70 }
71 for (tIndex = 0, pcChar = szName;
72 tIndex < 2 * tNameSize;
73 tIndex += 2, pcChar++) {
74 *pcChar = (char)aucBytes[tIndex];
75 }
76 szName[tNameSize - 1] = '\0';
77 } /* end of vName2String */
78
79 /*
80 * tReadBlockIndices - read the Big/Small Block Depot indices
81 *
82 * Returns the number of indices read
83 */
84 static size_t
tReadBlockIndices(FILE * pFile,ULONG * aulBlockDepot,size_t tMaxRec,ULONG ulOffset)85 tReadBlockIndices(FILE *pFile, ULONG *aulBlockDepot,
86 size_t tMaxRec, ULONG ulOffset)
87 {
88 size_t tDone;
89 int iIndex;
90 UCHAR aucBytes[BIG_BLOCK_SIZE];
91
92 fail(pFile == NULL || aulBlockDepot == NULL);
93 fail(tMaxRec == 0);
94
95 /* Read a big block with BBD or SBD indices */
96 if (!bReadBytes(aucBytes, BIG_BLOCK_SIZE, ulOffset, pFile)) {
97 werr(0, "Reading big block from 0x%lx is not possible",
98 ulOffset);
99 return 0;
100 }
101 /* Split the big block into indices, an index is four bytes */
102 tDone = min(tMaxRec, (size_t)BIG_BLOCK_SIZE / 4);
103 for (iIndex = 0; iIndex < (int)tDone; iIndex++) {
104 aulBlockDepot[iIndex] = ulGetLong(4 * iIndex, aucBytes);
105 NO_DBG_DEC(aulBlockDepot[iIndex]);
106 }
107 return tDone;
108 } /* end of tReadBlockIndices */
109
110 /*
111 * bGetBBD - get the Big Block Depot indices from the index-blocks
112 */
113 static BOOL
bGetBBD(FILE * pFile,const ULONG * aulDepot,size_t tDepotLen,ULONG * aulBBD,size_t tBBDLen)114 bGetBBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
115 ULONG *aulBBD, size_t tBBDLen)
116 {
117 ULONG ulBegin;
118 size_t tToGo, tDone;
119 int iIndex;
120
121 fail(pFile == NULL || aulDepot == NULL || aulBBD == NULL);
122
123 DBG_MSG("bGetBBD");
124
125 tToGo = tBBDLen;
126 for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
127 ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
128 NO_DBG_HEX(ulBegin);
129 tDone = tReadBlockIndices(pFile, aulBBD, tToGo, ulBegin);
130 fail(tDone > tToGo);
131 if (tDone == 0) {
132 return FALSE;
133 }
134 aulBBD += tDone;
135 tToGo -= tDone;
136 }
137 return tToGo == 0;
138 } /* end of bGetBBD */
139
140 /*
141 * bGetSBD - get the Small Block Depot indices from the index-blocks
142 */
143 static BOOL
bGetSBD(FILE * pFile,const ULONG * aulDepot,size_t tDepotLen,ULONG * aulSBD,size_t tSBDLen)144 bGetSBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
145 ULONG *aulSBD, size_t tSBDLen)
146 {
147 ULONG ulBegin;
148 size_t tToGo, tDone;
149 int iIndex;
150
151 fail(pFile == NULL || aulDepot == NULL || aulSBD == NULL);
152
153 DBG_MSG("bGetSBD");
154
155 tToGo = tSBDLen;
156 for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
157 fail(aulDepot[iIndex] >= ULONG_MAX / BIG_BLOCK_SIZE);
158 ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
159 NO_DBG_HEX(ulBegin);
160 tDone = tReadBlockIndices(pFile, aulSBD, tToGo, ulBegin);
161 fail(tDone > tToGo);
162 if (tDone == 0) {
163 return FALSE;
164 }
165 aulSBD += tDone;
166 tToGo -= tDone;
167 }
168 return tToGo == 0;
169 } /* end of bGetSBD */
170
171 /*
172 * vComputePPSlevels - compute the levels of the Property Set Storage entries
173 */
174 static void
vComputePPSlevels(pps_entry_type * atPPSlist,pps_entry_type * pNode,int iLevel,int iRecursionLevel)175 vComputePPSlevels(pps_entry_type *atPPSlist, pps_entry_type *pNode,
176 int iLevel, int iRecursionLevel)
177 {
178 fail(atPPSlist == NULL || pNode == NULL);
179 fail(iLevel < 0 || iRecursionLevel < 0);
180
181 if (iRecursionLevel > 25) {
182 /* This removes the possibility of an infinite recursion */
183 DBG_DEC(iRecursionLevel);
184 return;
185 }
186 if (pNode->iLevel <= iLevel) {
187 /* Avoid entering a loop */
188 DBG_DEC(iLevel);
189 DBG_DEC(pNode->iLevel);
190 return;
191 }
192
193 pNode->iLevel = iLevel;
194
195 if (pNode->ulDir != PPS_NUMBER_INVALID) {
196 vComputePPSlevels(atPPSlist,
197 &atPPSlist[pNode->ulDir],
198 iLevel + 1,
199 iRecursionLevel + 1);
200 }
201 if (pNode->ulNext != PPS_NUMBER_INVALID) {
202 vComputePPSlevels(atPPSlist,
203 &atPPSlist[pNode->ulNext],
204 iLevel,
205 iRecursionLevel + 1);
206 }
207 if (pNode->ulPrevious != PPS_NUMBER_INVALID) {
208 vComputePPSlevels(atPPSlist,
209 &atPPSlist[pNode->ulPrevious],
210 iLevel,
211 iRecursionLevel + 1);
212 }
213 } /* end of vComputePPSlevels */
214
215 /*
216 * bGetPPS - search the Property Set Storage for three sets
217 *
218 * Return TRUE if the WordDocument PPS is found
219 */
220 static BOOL
bGetPPS(FILE * pFile,const ULONG * aulRootList,size_t tRootListLen,pps_info_type * pPPS)221 bGetPPS(FILE *pFile,
222 const ULONG *aulRootList, size_t tRootListLen, pps_info_type *pPPS)
223 {
224 pps_entry_type *atPPSlist;
225 ULONG ulBegin, ulOffset, ulTmp;
226 size_t tNbrOfPPS, tNameSize;
227 int iIndex, iStartBlock, iRootIndex;
228 BOOL bWord, bExcel;
229 UCHAR aucBytes[PROPERTY_SET_STORAGE_SIZE];
230
231 fail(pFile == NULL || aulRootList == NULL || pPPS == NULL);
232
233 DBG_MSG("bGetPPS");
234
235 NO_DBG_DEC(tRootListLen);
236
237 bWord = FALSE;
238 bExcel = FALSE;
239 (void)memset(pPPS, 0, sizeof(*pPPS));
240
241 /* Read and store all the Property Set Storage entries */
242
243 tNbrOfPPS = tRootListLen * BIG_BLOCK_SIZE / PROPERTY_SET_STORAGE_SIZE;
244 atPPSlist = xcalloc(tNbrOfPPS, sizeof(pps_entry_type));
245 iRootIndex = 0;
246
247 for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
248 ulTmp = (ULONG)iIndex * PROPERTY_SET_STORAGE_SIZE;
249 iStartBlock = (int)(ulTmp / BIG_BLOCK_SIZE);
250 ulOffset = ulTmp % BIG_BLOCK_SIZE;
251 ulBegin = (aulRootList[iStartBlock] + 1) * BIG_BLOCK_SIZE +
252 ulOffset;
253 NO_DBG_HEX(ulBegin);
254 if (!bReadBytes(aucBytes, PROPERTY_SET_STORAGE_SIZE,
255 ulBegin, pFile)) {
256 werr(0, "Reading PPS %d is not possible", iIndex);
257 atPPSlist = xfree(atPPSlist);
258 return FALSE;
259 }
260 tNameSize = (size_t)usGetWord(0x40, aucBytes);
261 tNameSize = (tNameSize + 1) / 2;
262 vName2String(atPPSlist[iIndex].szName, aucBytes, tNameSize);
263 atPPSlist[iIndex].ucType = ucGetByte(0x42, aucBytes);
264 if (atPPSlist[iIndex].ucType == 5) {
265 iRootIndex = iIndex;
266 }
267 atPPSlist[iIndex].ulPrevious = ulGetLong(0x44, aucBytes);
268 atPPSlist[iIndex].ulNext = ulGetLong(0x48, aucBytes);
269 atPPSlist[iIndex].ulDir = ulGetLong(0x4c, aucBytes);
270 atPPSlist[iIndex].ulSB = ulGetLong(0x74, aucBytes);
271 atPPSlist[iIndex].ulSize = ulGetLong(0x78, aucBytes);
272 atPPSlist[iIndex].iLevel = INT_MAX;
273 if ((atPPSlist[iIndex].ulPrevious >= (ULONG)tNbrOfPPS &&
274 atPPSlist[iIndex].ulPrevious != PPS_NUMBER_INVALID) ||
275 (atPPSlist[iIndex].ulNext >= (ULONG)tNbrOfPPS &&
276 atPPSlist[iIndex].ulNext != PPS_NUMBER_INVALID) ||
277 (atPPSlist[iIndex].ulDir >= (ULONG)tNbrOfPPS &&
278 atPPSlist[iIndex].ulDir != PPS_NUMBER_INVALID)) {
279 DBG_DEC(iIndex);
280 DBG_DEC(atPPSlist[iIndex].ulPrevious);
281 DBG_DEC(atPPSlist[iIndex].ulNext);
282 DBG_DEC(atPPSlist[iIndex].ulDir);
283 DBG_DEC(tNbrOfPPS);
284 werr(0, "The Property Set Storage is damaged");
285 atPPSlist = xfree(atPPSlist);
286 return FALSE;
287 }
288 }
289
290 #if 0 /* defined(DEBUG) */
291 DBG_MSG("Before");
292 for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
293 DBG_MSG(atPPSlist[iIndex].szName);
294 DBG_HEX(atPPSlist[iIndex].ulDir);
295 DBG_HEX(atPPSlist[iIndex].ulPrevious);
296 DBG_HEX(atPPSlist[iIndex].ulNext);
297 DBG_DEC(atPPSlist[iIndex].ulSB);
298 DBG_HEX(atPPSlist[iIndex].ulSize);
299 DBG_DEC(atPPSlist[iIndex].iLevel);
300 }
301 #endif /* DEBUG */
302
303 /* Add level information to each entry */
304 vComputePPSlevels(atPPSlist, &atPPSlist[iRootIndex], 0, 0);
305
306 /* Check the entries on level 1 for the required information */
307 NO_DBG_MSG("After");
308 for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
309 #if 0 /* defined(DEBUG) */
310 DBG_MSG(atPPSlist[iIndex].szName);
311 DBG_HEX(atPPSlist[iIndex].ulDir);
312 DBG_HEX(atPPSlist[iIndex].ulPrevious);
313 DBG_HEX(atPPSlist[iIndex].ulNext);
314 DBG_DEC(atPPSlist[iIndex].ulSB);
315 DBG_HEX(atPPSlist[iIndex].ulSize);
316 DBG_DEC(atPPSlist[iIndex].iLevel);
317 #endif /* DEBUG */
318 if (atPPSlist[iIndex].iLevel != 1 ||
319 atPPSlist[iIndex].ucType != 2 ||
320 atPPSlist[iIndex].szName[0] == '\0' ||
321 atPPSlist[iIndex].ulSize == 0) {
322 /* This entry can be ignored */
323 continue;
324 }
325 if (pPPS->tWordDocument.ulSize == 0 &&
326 STREQ(atPPSlist[iIndex].szName, "WordDocument")) {
327 pPPS->tWordDocument.ulSB = atPPSlist[iIndex].ulSB;
328 pPPS->tWordDocument.ulSize = atPPSlist[iIndex].ulSize;
329 bWord = TRUE;
330 } else if (pPPS->tData.ulSize == 0 &&
331 STREQ(atPPSlist[iIndex].szName, "Data")) {
332 pPPS->tData.ulSB = atPPSlist[iIndex].ulSB;
333 pPPS->tData.ulSize = atPPSlist[iIndex].ulSize;
334 } else if (pPPS->t0Table.ulSize == 0 &&
335 STREQ(atPPSlist[iIndex].szName, "0Table")) {
336 pPPS->t0Table.ulSB = atPPSlist[iIndex].ulSB;
337 pPPS->t0Table.ulSize = atPPSlist[iIndex].ulSize;
338 } else if (pPPS->t1Table.ulSize == 0 &&
339 STREQ(atPPSlist[iIndex].szName, "1Table")) {
340 pPPS->t1Table.ulSB = atPPSlist[iIndex].ulSB;
341 pPPS->t1Table.ulSize = atPPSlist[iIndex].ulSize;
342 } else if (pPPS->tSummaryInfo.ulSize == 0 &&
343 STREQ(atPPSlist[iIndex].szName,
344 "\005SummaryInformation")) {
345 pPPS->tSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
346 pPPS->tSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
347 } else if (pPPS->tDocSummaryInfo.ulSize == 0 &&
348 STREQ(atPPSlist[iIndex].szName,
349 "\005DocumentSummaryInformation")) {
350 pPPS->tDocSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
351 pPPS->tDocSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
352 } else if (STREQ(atPPSlist[iIndex].szName, "Book") ||
353 STREQ(atPPSlist[iIndex].szName, "Workbook")) {
354 bExcel = TRUE;
355 }
356 }
357
358 /* Free the space for the Property Set Storage entries */
359 atPPSlist = xfree(atPPSlist);
360
361 /* Draw your conclusions */
362 if (bWord) {
363 return TRUE;
364 }
365
366 if (bExcel) {
367 werr(0, "Sorry, but this is an Excel spreadsheet");
368 } else {
369 werr(0, "This OLE file does not contain a Word document");
370 }
371 return FALSE;
372 } /* end of bGetPPS */
373
374 /*
375 * vGetBbdList - make a list of the places to find big blocks
376 */
377 static void
vGetBbdList(FILE * pFile,int iNbr,ULONG * aulBbdList,ULONG ulOffset)378 vGetBbdList(FILE *pFile, int iNbr, ULONG *aulBbdList, ULONG ulOffset)
379 {
380 int iIndex;
381
382 fail(pFile == NULL);
383 fail(iNbr > 127);
384 fail(aulBbdList == NULL);
385
386 NO_DBG_DEC(iNbr);
387 for (iIndex = 0; iIndex < iNbr; iIndex++) {
388 aulBbdList[iIndex] =
389 ulReadLong(pFile, ulOffset + 4 * (ULONG)iIndex);
390 NO_DBG_DEC(iIndex);
391 NO_DBG_HEX(aulBbdList[iIndex]);
392 }
393 } /* end of vGetBbdList */
394
395 /*
396 * bGetDocumentText - make a list of the text blocks of a Word document
397 *
398 * Return TRUE when succesful, otherwise FALSE
399 */
400 static BOOL
bGetDocumentText(FILE * pFile,const pps_info_type * pPPS,const ULONG * aulBBD,size_t tBBDLen,const ULONG * aulSBD,size_t tSBDLen,const UCHAR * aucHeader,int iWordVersion)401 bGetDocumentText(FILE *pFile, const pps_info_type *pPPS,
402 const ULONG *aulBBD, size_t tBBDLen,
403 const ULONG *aulSBD, size_t tSBDLen,
404 const UCHAR *aucHeader, int iWordVersion)
405 {
406 ULONG ulBeginOfText;
407 ULONG ulTextLen, ulFootnoteLen, ulEndnoteLen;
408 ULONG ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
409 ULONG ulTextBoxLen, ulHdrTextBoxLen;
410 UINT uiQuickSaves;
411 BOOL bFarEastWord, bTemplate, bFastSaved, bEncrypted, bSuccess;
412 USHORT usIdent, usDocStatus;
413
414 fail(pFile == NULL || pPPS == NULL);
415 fail(aulBBD == NULL);
416 fail(aulSBD == NULL);
417
418 DBG_MSG("bGetDocumentText");
419
420 /* Get the "magic number" from the header */
421 usIdent = usGetWord(0x00, aucHeader);
422 DBG_HEX(usIdent);
423 bFarEastWord = usIdent == 0x8098 || usIdent == 0x8099 ||
424 usIdent == 0xa697 || usIdent == 0xa699;
425 /* Get the status flags from the header */
426 usDocStatus = usGetWord(0x0a, aucHeader);
427 DBG_HEX(usDocStatus);
428 bTemplate = (usDocStatus & BIT(0)) != 0;
429 DBG_MSG_C(bTemplate, "This document is a Template");
430 bFastSaved = (usDocStatus & BIT(2)) != 0;
431 uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
432 DBG_MSG_C(bFastSaved, "This document is Fast Saved");
433 DBG_DEC_C(bFastSaved, uiQuickSaves);
434 bEncrypted = (usDocStatus & BIT(8)) != 0;
435 if (bEncrypted) {
436 werr(0, "Encrypted documents are not supported");
437 return FALSE;
438 }
439
440 /* Get length information */
441 ulBeginOfText = ulGetLong(0x18, aucHeader);
442 DBG_HEX(ulBeginOfText);
443 switch (iWordVersion) {
444 case 6:
445 case 7:
446 ulTextLen = ulGetLong(0x34, aucHeader);
447 ulFootnoteLen = ulGetLong(0x38, aucHeader);
448 ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
449 ulMacroLen = ulGetLong(0x40, aucHeader);
450 ulAnnotationLen = ulGetLong(0x44, aucHeader);
451 ulEndnoteLen = ulGetLong(0x48, aucHeader);
452 ulTextBoxLen = ulGetLong(0x4c, aucHeader);
453 ulHdrTextBoxLen = ulGetLong(0x50, aucHeader);
454 break;
455 case 8:
456 ulTextLen = ulGetLong(0x4c, aucHeader);
457 ulFootnoteLen = ulGetLong(0x50, aucHeader);
458 ulHdrFtrLen = ulGetLong(0x54, aucHeader);
459 ulMacroLen = ulGetLong(0x58, aucHeader);
460 ulAnnotationLen = ulGetLong(0x5c, aucHeader);
461 ulEndnoteLen = ulGetLong(0x60, aucHeader);
462 ulTextBoxLen = ulGetLong(0x64, aucHeader);
463 ulHdrTextBoxLen = ulGetLong(0x68, aucHeader);
464 break;
465 default:
466 werr(0, "This version of Word is not supported");
467 return FALSE;
468 }
469 DBG_DEC(ulTextLen);
470 DBG_DEC(ulFootnoteLen);
471 DBG_DEC(ulHdrFtrLen);
472 DBG_DEC(ulMacroLen);
473 DBG_DEC(ulAnnotationLen);
474 DBG_DEC(ulEndnoteLen);
475 DBG_DEC(ulTextBoxLen);
476 DBG_DEC(ulHdrTextBoxLen);
477
478 /* Make a list of the text blocks */
479 switch (iWordVersion) {
480 case 6:
481 case 7:
482 if (bFastSaved) {
483 bSuccess = bGet6DocumentText(pFile,
484 bFarEastWord,
485 pPPS->tWordDocument.ulSB,
486 aulBBD, tBBDLen,
487 aucHeader);
488 } else {
489 bSuccess = bAddTextBlocks(ulBeginOfText,
490 ulTextLen +
491 ulFootnoteLen +
492 ulHdrFtrLen +
493 ulMacroLen + ulAnnotationLen +
494 ulEndnoteLen +
495 ulTextBoxLen + ulHdrTextBoxLen,
496 bFarEastWord,
497 IGNORE_PROPMOD,
498 pPPS->tWordDocument.ulSB,
499 aulBBD, tBBDLen);
500 }
501 break;
502 case 8:
503 bSuccess = bGet8DocumentText(pFile,
504 pPPS,
505 aulBBD, tBBDLen, aulSBD, tSBDLen,
506 aucHeader);
507 break;
508 default:
509 werr(0, "This version of Word is not supported");
510 bSuccess = FALSE;
511 break;
512 }
513
514 if (bSuccess) {
515 vSplitBlockList(pFile,
516 ulTextLen,
517 ulFootnoteLen,
518 ulHdrFtrLen,
519 ulMacroLen,
520 ulAnnotationLen,
521 ulEndnoteLen,
522 ulTextBoxLen,
523 ulHdrTextBoxLen,
524 !bFastSaved && iWordVersion == 8);
525 } else {
526 vDestroyTextBlockList();
527 werr(0, "I can't find the text of this document");
528 }
529 return bSuccess;
530 } /* end of bGetDocumentText */
531
532 /*
533 * vGetDocumentData - make a list of the data blocks of a Word document
534 */
535 static void
vGetDocumentData(FILE * pFile,const pps_info_type * pPPS,const ULONG * aulBBD,size_t tBBDLen,const UCHAR * aucHeader,int iWordVersion)536 vGetDocumentData(FILE *pFile, const pps_info_type *pPPS,
537 const ULONG *aulBBD, size_t tBBDLen,
538 const UCHAR *aucHeader, int iWordVersion)
539 {
540 options_type tOptions;
541 ULONG ulBeginOfText;
542 BOOL bFastSaved, bHasImages, bSuccess;
543 USHORT usDocStatus;
544
545 fail(pFile == NULL);
546 fail(pPPS == NULL);
547 fail(aulBBD == NULL);
548
549 /* Get the options */
550 vGetOptions(&tOptions);
551
552 /* Get the status flags from the header */
553 usDocStatus = usGetWord(0x0a, aucHeader);
554 DBG_HEX(usDocStatus);
555 bFastSaved = (usDocStatus & BIT(2)) != 0;
556 bHasImages = (usDocStatus & BIT(3)) != 0;
557
558 if (!bHasImages ||
559 tOptions.eConversionType == conversion_text ||
560 tOptions.eConversionType == conversion_fmt_text ||
561 #if CR3_ANTIWORD_PATCH!=1
562 tOptions.eConversionType == conversion_xml ||
563 #endif
564 tOptions.eImageLevel == level_no_images) {
565 /*
566 * No images in the document or text-only output or
567 * no images wanted, so no data blocks will be needed
568 */
569 vDestroyDataBlockList();
570 return;
571 }
572
573 /* Get length information */
574 ulBeginOfText = ulGetLong(0x18, aucHeader);
575 DBG_HEX(ulBeginOfText);
576
577 /* Make a list of the data blocks */
578 switch (iWordVersion) {
579 case 6:
580 case 7:
581 /*
582 * The data blocks are in the text stream. The text stream
583 * is in "fast saved" format or "normal saved" format
584 */
585 if (bFastSaved) {
586 bSuccess = bGet6DocumentData(pFile,
587 pPPS->tWordDocument.ulSB,
588 aulBBD, tBBDLen,
589 aucHeader);
590 } else {
591 bSuccess = bAddDataBlocks(ulBeginOfText,
592 (ULONG)LONG_MAX,
593 pPPS->tWordDocument.ulSB,
594 aulBBD, tBBDLen);
595 }
596 break;
597 case 8:
598 /*
599 * The data blocks are in the data stream. The data stream
600 * is always in "normal saved" format
601 */
602 bSuccess = bAddDataBlocks(0, (ULONG)LONG_MAX,
603 pPPS->tData.ulSB, aulBBD, tBBDLen);
604 break;
605 default:
606 werr(0, "This version of Word is not supported");
607 bSuccess = FALSE;
608 break;
609 }
610
611 if (!bSuccess) {
612 vDestroyDataBlockList();
613 werr(0, "I can't find the data of this document");
614 }
615 } /* end of vGetDocumentData */
616
617 /*
618 * iInitDocumentOLE - initialize an OLE document
619 *
620 * Returns the version of Word that made the document or -1
621 */
622 int
iInitDocumentOLE(FILE * pFile,long lFilesize)623 iInitDocumentOLE(FILE *pFile, long lFilesize)
624 {
625 pps_info_type PPS_info;
626 ULONG *aulBBD, *aulSBD;
627 ULONG *aulRootList, *aulBbdList, *aulSbdList;
628 ULONG ulBdbListStart, ulAdditionalBBDlist;
629 ULONG ulRootStartblock, ulSbdStartblock, ulSBLstartblock;
630 ULONG ulStart, ulTmp;
631 long lMaxBlock;
632 size_t tBBDLen, tSBDLen, tNumBbdBlocks, tRootListLen;
633 int iWordVersion, iIndex, iToGo;
634 BOOL bSuccess;
635 USHORT usIdent, usDocStatus;
636 UCHAR aucHeader[HEADER_SIZE];
637
638 fail(pFile == NULL);
639
640 lMaxBlock = lFilesize / BIG_BLOCK_SIZE - 2;
641 DBG_DEC(lMaxBlock);
642 if (lMaxBlock < 1) {
643 return -1;
644 }
645 tBBDLen = (size_t)(lMaxBlock + 1);
646 tNumBbdBlocks = (size_t)ulReadLong(pFile, 0x2c);
647 DBG_DEC(tNumBbdBlocks);
648 ulRootStartblock = ulReadLong(pFile, 0x30);
649 DBG_DEC(ulRootStartblock);
650 ulSbdStartblock = ulReadLong(pFile, 0x3c);
651 DBG_DEC(ulSbdStartblock);
652 ulAdditionalBBDlist = ulReadLong(pFile, 0x44);
653 DBG_HEX(ulAdditionalBBDlist);
654 ulSBLstartblock = ulReadLong(pFile,
655 (ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x74);
656 DBG_DEC(ulSBLstartblock);
657 tSBDLen = (size_t)(ulReadLong(pFile,
658 (ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x78) /
659 SMALL_BLOCK_SIZE);
660 /* All to be xcalloc-ed pointers to NULL */
661 aulRootList = NULL;
662 aulSbdList = NULL;
663 aulBbdList = NULL;
664 aulSBD = NULL;
665 aulBBD = NULL;
666 /* Big Block Depot */
667 aulBbdList = xcalloc(tNumBbdBlocks, sizeof(ULONG));
668 aulBBD = xcalloc(tBBDLen, sizeof(ULONG));
669 iToGo = (int)tNumBbdBlocks;
670 vGetBbdList(pFile, min(iToGo, 109), aulBbdList, 0x4c);
671 ulStart = 109;
672 iToGo -= 109;
673 while (ulAdditionalBBDlist != END_OF_CHAIN && iToGo > 0) {
674 ulBdbListStart = (ulAdditionalBBDlist + 1) * BIG_BLOCK_SIZE;
675 vGetBbdList(pFile, min(iToGo, 127),
676 aulBbdList + ulStart, ulBdbListStart);
677 ulAdditionalBBDlist = ulReadLong(pFile,
678 ulBdbListStart + 4 * 127);
679 DBG_DEC(ulAdditionalBBDlist);
680 DBG_HEX(ulAdditionalBBDlist);
681 ulStart += 127;
682 iToGo -= 127;
683 }
684 if (!bGetBBD(pFile, aulBbdList, tNumBbdBlocks, aulBBD, tBBDLen)) {
685 FREE_ALL();
686 return -1;
687 }
688 aulBbdList = xfree(aulBbdList);
689 /* Small Block Depot */
690 aulSbdList = xcalloc(tBBDLen, sizeof(ULONG));
691 aulSBD = xcalloc(tSBDLen, sizeof(ULONG));
692 for (iIndex = 0, ulTmp = ulSbdStartblock;
693 iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
694 iIndex++, ulTmp = aulBBD[ulTmp]) {
695 if (ulTmp >= (ULONG)tBBDLen) {
696 DBG_DEC(ulTmp);
697 DBG_DEC(tBBDLen);
698 werr(1, "The Big Block Depot is damaged");
699 }
700 aulSbdList[iIndex] = ulTmp;
701 NO_DBG_HEX(aulSbdList[iIndex]);
702 }
703 if (!bGetSBD(pFile, aulSbdList, tBBDLen, aulSBD, tSBDLen)) {
704 FREE_ALL();
705 return -1;
706 }
707 aulSbdList = xfree(aulSbdList);
708 /* Root list */
709 for (tRootListLen = 0, ulTmp = ulRootStartblock;
710 tRootListLen < tBBDLen && ulTmp != END_OF_CHAIN;
711 tRootListLen++, ulTmp = aulBBD[ulTmp]) {
712 if (ulTmp >= (ULONG)tBBDLen) {
713 DBG_DEC(ulTmp);
714 DBG_DEC(tBBDLen);
715 werr(1, "The Big Block Depot is damaged");
716 }
717 }
718 if (tRootListLen == 0) {
719 werr(0, "No Rootlist found");
720 FREE_ALL();
721 return -1;
722 }
723 aulRootList = xcalloc(tRootListLen, sizeof(ULONG));
724 for (iIndex = 0, ulTmp = ulRootStartblock;
725 iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
726 iIndex++, ulTmp = aulBBD[ulTmp]) {
727 if (ulTmp >= (ULONG)tBBDLen) {
728 DBG_DEC(ulTmp);
729 DBG_DEC(tBBDLen);
730 werr(1, "The Big Block Depot is damaged");
731 }
732 aulRootList[iIndex] = ulTmp;
733 NO_DBG_DEC(aulRootList[iIndex]);
734 }
735 fail(tRootListLen != (size_t)iIndex);
736 bSuccess = bGetPPS(pFile, aulRootList, tRootListLen, &PPS_info);
737 aulRootList = xfree(aulRootList);
738 if (!bSuccess) {
739 FREE_ALL();
740 return -1;
741 }
742 /* Small block list */
743 if (!bCreateSmallBlockList(ulSBLstartblock, aulBBD, tBBDLen)) {
744 FREE_ALL();
745 return -1;
746 }
747
748 if (PPS_info.tWordDocument.ulSize < MIN_SIZE_FOR_BBD_USE) {
749 DBG_DEC(PPS_info.tWordDocument.ulSize);
750 FREE_ALL();
751 werr(0, "I'm afraid the text stream of this file "
752 "is too small to handle.");
753 return -1;
754 }
755 /* Read the headerblock */
756 if (!bReadBuffer(pFile, PPS_info.tWordDocument.ulSB,
757 aulBBD, tBBDLen, BIG_BLOCK_SIZE,
758 aucHeader, 0, HEADER_SIZE)) {
759 FREE_ALL();
760 return -1;
761 }
762 usIdent = usGetWord(0x00, aucHeader);
763 DBG_HEX(usIdent);
764 fail(usIdent != 0x8098 && /* Word 7 for oriental languages */
765 usIdent != 0x8099 && /* Word 7 for oriental languages */
766 usIdent != 0xa5dc && /* Word 6 & 7 */
767 usIdent != 0xa5ec && /* Word 7 & 97 & 98 */
768 usIdent != 0xa697 && /* Word 7 for oriental languages */
769 usIdent != 0xa699); /* Word 7 for oriental languages */
770 iWordVersion = iGetVersionNumber(aucHeader);
771 if (iWordVersion < 6) {
772 FREE_ALL();
773 werr(0, "This file is from a version of Word before Word 6.");
774 return -1;
775 }
776
777 /* Get the status flags from the header */
778 usDocStatus = usGetWord(0x0a, aucHeader);
779 if (usDocStatus & BIT(9)) {
780 PPS_info.tTable = PPS_info.t1Table;
781 } else {
782 PPS_info.tTable = PPS_info.t0Table;
783 }
784 /* Clean the entries that should not be used */
785 memset(&PPS_info.t0Table, 0, sizeof(PPS_info.t0Table));
786 memset(&PPS_info.t1Table, 0, sizeof(PPS_info.t1Table));
787
788 bSuccess = bGetDocumentText(pFile, &PPS_info,
789 aulBBD, tBBDLen, aulSBD, tSBDLen,
790 aucHeader, iWordVersion);
791 if (bSuccess) {
792 vGetDocumentData(pFile, &PPS_info,
793 aulBBD, tBBDLen, aucHeader, iWordVersion);
794 vGetPropertyInfo(pFile, &PPS_info,
795 aulBBD, tBBDLen, aulSBD, tSBDLen,
796 aucHeader, iWordVersion);
797 vSetDefaultTabWidth(pFile, &PPS_info,
798 aulBBD, tBBDLen, aulSBD, tSBDLen,
799 aucHeader, iWordVersion);
800 vGetNotesInfo(pFile, &PPS_info,
801 aulBBD, tBBDLen, aulSBD, tSBDLen,
802 aucHeader, iWordVersion);
803 }
804 FREE_ALL();
805 return bSuccess ? iWordVersion : -1;
806 } /* end of iInitDocumentOLE */
807