1 /*
2  * wordole.c
3  * Copyright (C) 1998-2004 A.J. van Os; Released under GPL
4  *
5  * Description:
6  * Deal with the OLE internals of a MS Word file
7  */
8 
9 #include <string.h>
10 #include "antiword.h"
11 
12 /* Private type for Property Set Storage entries */
13 typedef struct pps_entry_tag {
14 	ULONG	ulNext;
15 	ULONG	ulPrevious;
16 	ULONG	ulDir;
17 	ULONG	ulSB;
18 	ULONG	ulSize;
19 	int	iLevel;
20 	char	szName[32];
21 	UCHAR	ucType;
22 } pps_entry_type;
23 
24 /* Show that a PPS number or index should not be used */
25 #define PPS_NUMBER_INVALID	0xffffffffUL
26 
27 
28 /* Macro to make sure all such statements will be identical */
29 #define FREE_ALL()		\
30 	do {\
31 		vDestroySmallBlockList();\
32 		aulRootList = xfree(aulRootList);\
33 		aulSbdList = xfree(aulSbdList);\
34 		aulBbdList = xfree(aulBbdList);\
35 		aulSBD = xfree(aulSBD);\
36 		aulBBD = xfree(aulBBD);\
37 	} while(0)
38 
39 
40 /*
41  * ulReadLong - read four bytes from the given file and offset
42  */
43 static ULONG
ulReadLong(FILE * pFile,ULONG ulOffset)44 ulReadLong(FILE *pFile, ULONG ulOffset)
45 {
46 	UCHAR	aucBytes[4];
47 
48 	fail(pFile == NULL);
49 
50 	if (!bReadBytes(aucBytes, 4, ulOffset, pFile)) {
51 		werr(1, "Read long 0x%lx not possible", ulOffset);
52 	}
53 	return ulGetLong(0, aucBytes);
54 } /* end of ulReadLong */
55 
56 /*
57  * vName2String - turn the name into a proper string.
58  */
59 static void
vName2String(char * szName,const UCHAR * aucBytes,size_t tNameSize)60 vName2String(char *szName, const UCHAR *aucBytes, size_t tNameSize)
61 {
62 	char	*pcChar;
63 	size_t	tIndex;
64 
65 	fail(aucBytes == NULL || szName == NULL);
66 
67 	if (tNameSize < 2) {
68 		szName[0] = '\0';
69 		return;
70 	}
71 	for (tIndex = 0, pcChar = szName;
72 	     tIndex < 2 * tNameSize;
73 	     tIndex += 2, pcChar++) {
74 		*pcChar = (char)aucBytes[tIndex];
75 	}
76 	szName[tNameSize - 1] = '\0';
77 } /* end of vName2String */
78 
79 /*
80  * tReadBlockIndices - read the Big/Small Block Depot indices
81  *
82  * Returns the number of indices read
83  */
84 static size_t
tReadBlockIndices(FILE * pFile,ULONG * aulBlockDepot,size_t tMaxRec,ULONG ulOffset)85 tReadBlockIndices(FILE *pFile, ULONG *aulBlockDepot,
86 	size_t tMaxRec, ULONG ulOffset)
87 {
88 	size_t	tDone;
89 	int	iIndex;
90 	UCHAR	aucBytes[BIG_BLOCK_SIZE];
91 
92 	fail(pFile == NULL || aulBlockDepot == NULL);
93 	fail(tMaxRec == 0);
94 
95 	/* Read a big block with BBD or SBD indices */
96 	if (!bReadBytes(aucBytes, BIG_BLOCK_SIZE, ulOffset, pFile)) {
97 		werr(0, "Reading big block from 0x%lx is not possible",
98 			ulOffset);
99 		return 0;
100 	}
101 	/* Split the big block into indices, an index is four bytes */
102 	tDone = min(tMaxRec, (size_t)BIG_BLOCK_SIZE / 4);
103 	for (iIndex = 0; iIndex < (int)tDone; iIndex++) {
104 		aulBlockDepot[iIndex] = ulGetLong(4 * iIndex, aucBytes);
105 		NO_DBG_DEC(aulBlockDepot[iIndex]);
106 	}
107 	return tDone;
108 } /* end of tReadBlockIndices */
109 
110 /*
111  * bGetBBD - get the Big Block Depot indices from the index-blocks
112  */
113 static BOOL
bGetBBD(FILE * pFile,const ULONG * aulDepot,size_t tDepotLen,ULONG * aulBBD,size_t tBBDLen)114 bGetBBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
115 	ULONG *aulBBD, size_t tBBDLen)
116 {
117 	ULONG	ulBegin;
118 	size_t	tToGo, tDone;
119 	int	iIndex;
120 
121 	fail(pFile == NULL || aulDepot == NULL || aulBBD == NULL);
122 
123 	DBG_MSG("bGetBBD");
124 
125 	tToGo = tBBDLen;
126 	for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
127 		ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
128 		NO_DBG_HEX(ulBegin);
129 		tDone = tReadBlockIndices(pFile, aulBBD, tToGo, ulBegin);
130 		fail(tDone > tToGo);
131 		if (tDone == 0) {
132 			return FALSE;
133 		}
134 		aulBBD += tDone;
135 		tToGo -= tDone;
136 	}
137 	return tToGo == 0;
138 } /* end of bGetBBD */
139 
140 /*
141  * bGetSBD - get the Small Block Depot indices from the index-blocks
142  */
143 static BOOL
bGetSBD(FILE * pFile,const ULONG * aulDepot,size_t tDepotLen,ULONG * aulSBD,size_t tSBDLen)144 bGetSBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
145 	ULONG *aulSBD, size_t tSBDLen)
146 {
147 	ULONG	ulBegin;
148 	size_t	tToGo, tDone;
149 	int	iIndex;
150 
151 	fail(pFile == NULL || aulDepot == NULL || aulSBD == NULL);
152 
153 	DBG_MSG("bGetSBD");
154 
155 	tToGo = tSBDLen;
156 	for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
157 		fail(aulDepot[iIndex] >= ULONG_MAX / BIG_BLOCK_SIZE);
158 		ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
159 		NO_DBG_HEX(ulBegin);
160 		tDone = tReadBlockIndices(pFile, aulSBD, tToGo, ulBegin);
161 		fail(tDone > tToGo);
162 		if (tDone == 0) {
163 			return FALSE;
164 		}
165 		aulSBD += tDone;
166 		tToGo -= tDone;
167 	}
168 	return tToGo == 0;
169 } /* end of bGetSBD */
170 
171 /*
172  * vComputePPSlevels - compute the levels of the Property Set Storage entries
173  */
174 static void
vComputePPSlevels(pps_entry_type * atPPSlist,pps_entry_type * pNode,int iLevel,int iRecursionLevel)175 vComputePPSlevels(pps_entry_type *atPPSlist, pps_entry_type *pNode,
176 			int iLevel, int iRecursionLevel)
177 {
178 	fail(atPPSlist == NULL || pNode == NULL);
179 	fail(iLevel < 0 || iRecursionLevel < 0);
180 
181 	if (iRecursionLevel > 25) {
182 		/* This removes the possibility of an infinite recursion */
183 		DBG_DEC(iRecursionLevel);
184 		return;
185 	}
186 	if (pNode->iLevel <= iLevel) {
187 		/* Avoid entering a loop */
188 		DBG_DEC(iLevel);
189 		DBG_DEC(pNode->iLevel);
190 		return;
191 	}
192 
193 	pNode->iLevel = iLevel;
194 
195 	if (pNode->ulDir != PPS_NUMBER_INVALID) {
196 		vComputePPSlevels(atPPSlist,
197 				&atPPSlist[pNode->ulDir],
198 				iLevel + 1,
199 				iRecursionLevel + 1);
200 	}
201 	if (pNode->ulNext != PPS_NUMBER_INVALID) {
202 		vComputePPSlevels(atPPSlist,
203 				&atPPSlist[pNode->ulNext],
204 				iLevel,
205 				iRecursionLevel + 1);
206 	}
207 	if (pNode->ulPrevious != PPS_NUMBER_INVALID) {
208 		vComputePPSlevels(atPPSlist,
209 				&atPPSlist[pNode->ulPrevious],
210 				iLevel,
211 				iRecursionLevel + 1);
212 	}
213 } /* end of vComputePPSlevels */
214 
215 /*
216  * bGetPPS - search the Property Set Storage for three sets
217  *
218  * Return TRUE if the WordDocument PPS is found
219  */
220 static BOOL
bGetPPS(FILE * pFile,const ULONG * aulRootList,size_t tRootListLen,pps_info_type * pPPS)221 bGetPPS(FILE *pFile,
222 	const ULONG *aulRootList, size_t tRootListLen, pps_info_type *pPPS)
223 {
224 	pps_entry_type	*atPPSlist;
225 	ULONG	ulBegin, ulOffset, ulTmp;
226 	size_t	tNbrOfPPS, tNameSize;
227 	int	iIndex, iStartBlock, iRootIndex;
228 	BOOL	bWord, bExcel;
229 	UCHAR	aucBytes[PROPERTY_SET_STORAGE_SIZE];
230 
231 	fail(pFile == NULL || aulRootList == NULL || pPPS == NULL);
232 
233 	DBG_MSG("bGetPPS");
234 
235 	NO_DBG_DEC(tRootListLen);
236 
237 	bWord = FALSE;
238 	bExcel = FALSE;
239 	(void)memset(pPPS, 0, sizeof(*pPPS));
240 
241 	/* Read and store all the Property Set Storage entries */
242 
243 	tNbrOfPPS = tRootListLen * BIG_BLOCK_SIZE / PROPERTY_SET_STORAGE_SIZE;
244 	atPPSlist = xcalloc(tNbrOfPPS, sizeof(pps_entry_type));
245 	iRootIndex = 0;
246 
247 	for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
248 		ulTmp = (ULONG)iIndex * PROPERTY_SET_STORAGE_SIZE;
249 		iStartBlock = (int)(ulTmp / BIG_BLOCK_SIZE);
250 		ulOffset = ulTmp % BIG_BLOCK_SIZE;
251 		ulBegin = (aulRootList[iStartBlock] + 1) * BIG_BLOCK_SIZE +
252 				ulOffset;
253 		NO_DBG_HEX(ulBegin);
254 		if (!bReadBytes(aucBytes, PROPERTY_SET_STORAGE_SIZE,
255 							ulBegin, pFile)) {
256 			werr(0, "Reading PPS %d is not possible", iIndex);
257 			atPPSlist = xfree(atPPSlist);
258 			return FALSE;
259 		}
260 		tNameSize = (size_t)usGetWord(0x40, aucBytes);
261 		tNameSize = (tNameSize + 1) / 2;
262 		vName2String(atPPSlist[iIndex].szName, aucBytes, tNameSize);
263 		atPPSlist[iIndex].ucType = ucGetByte(0x42, aucBytes);
264 		if (atPPSlist[iIndex].ucType == 5) {
265 			iRootIndex = iIndex;
266 		}
267 		atPPSlist[iIndex].ulPrevious = ulGetLong(0x44, aucBytes);
268 		atPPSlist[iIndex].ulNext = ulGetLong(0x48, aucBytes);
269 		atPPSlist[iIndex].ulDir = ulGetLong(0x4c, aucBytes);
270 		atPPSlist[iIndex].ulSB = ulGetLong(0x74, aucBytes);
271 		atPPSlist[iIndex].ulSize = ulGetLong(0x78, aucBytes);
272 		atPPSlist[iIndex].iLevel = INT_MAX;
273 		if ((atPPSlist[iIndex].ulPrevious >= (ULONG)tNbrOfPPS &&
274 		     atPPSlist[iIndex].ulPrevious != PPS_NUMBER_INVALID) ||
275 		    (atPPSlist[iIndex].ulNext >= (ULONG)tNbrOfPPS &&
276 		     atPPSlist[iIndex].ulNext != PPS_NUMBER_INVALID) ||
277 		    (atPPSlist[iIndex].ulDir >= (ULONG)tNbrOfPPS &&
278 		     atPPSlist[iIndex].ulDir != PPS_NUMBER_INVALID)) {
279 			DBG_DEC(iIndex);
280 			DBG_DEC(atPPSlist[iIndex].ulPrevious);
281 			DBG_DEC(atPPSlist[iIndex].ulNext);
282 			DBG_DEC(atPPSlist[iIndex].ulDir);
283 			DBG_DEC(tNbrOfPPS);
284 			werr(0, "The Property Set Storage is damaged");
285 			atPPSlist = xfree(atPPSlist);
286 			return FALSE;
287 		}
288 	}
289 
290 #if 0 /* defined(DEBUG) */
291 	DBG_MSG("Before");
292 	for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
293 		DBG_MSG(atPPSlist[iIndex].szName);
294 		DBG_HEX(atPPSlist[iIndex].ulDir);
295 		DBG_HEX(atPPSlist[iIndex].ulPrevious);
296 		DBG_HEX(atPPSlist[iIndex].ulNext);
297 		DBG_DEC(atPPSlist[iIndex].ulSB);
298 		DBG_HEX(atPPSlist[iIndex].ulSize);
299 		DBG_DEC(atPPSlist[iIndex].iLevel);
300 	}
301 #endif /* DEBUG */
302 
303 	/* Add level information to each entry */
304 	vComputePPSlevels(atPPSlist, &atPPSlist[iRootIndex], 0, 0);
305 
306 	/* Check the entries on level 1 for the required information */
307 	NO_DBG_MSG("After");
308 	for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
309 #if 0 /* defined(DEBUG) */
310 		DBG_MSG(atPPSlist[iIndex].szName);
311 		DBG_HEX(atPPSlist[iIndex].ulDir);
312 		DBG_HEX(atPPSlist[iIndex].ulPrevious);
313 		DBG_HEX(atPPSlist[iIndex].ulNext);
314 		DBG_DEC(atPPSlist[iIndex].ulSB);
315 		DBG_HEX(atPPSlist[iIndex].ulSize);
316 		DBG_DEC(atPPSlist[iIndex].iLevel);
317 #endif /* DEBUG */
318 		if (atPPSlist[iIndex].iLevel != 1 ||
319 		    atPPSlist[iIndex].ucType != 2 ||
320 		    atPPSlist[iIndex].szName[0] == '\0' ||
321 		    atPPSlist[iIndex].ulSize == 0) {
322 			/* This entry can be ignored */
323 			continue;
324 		}
325 		if (pPPS->tWordDocument.ulSize == 0 &&
326 		    STREQ(atPPSlist[iIndex].szName, "WordDocument")) {
327 			pPPS->tWordDocument.ulSB = atPPSlist[iIndex].ulSB;
328 			pPPS->tWordDocument.ulSize = atPPSlist[iIndex].ulSize;
329 			bWord = TRUE;
330 		} else if (pPPS->tData.ulSize == 0 &&
331 			   STREQ(atPPSlist[iIndex].szName, "Data")) {
332 			pPPS->tData.ulSB = atPPSlist[iIndex].ulSB;
333 			pPPS->tData.ulSize = atPPSlist[iIndex].ulSize;
334 		} else if (pPPS->t0Table.ulSize == 0 &&
335 			   STREQ(atPPSlist[iIndex].szName, "0Table")) {
336 			pPPS->t0Table.ulSB = atPPSlist[iIndex].ulSB;
337 			pPPS->t0Table.ulSize = atPPSlist[iIndex].ulSize;
338 		} else if (pPPS->t1Table.ulSize == 0 &&
339 			   STREQ(atPPSlist[iIndex].szName, "1Table")) {
340 			pPPS->t1Table.ulSB = atPPSlist[iIndex].ulSB;
341 			pPPS->t1Table.ulSize = atPPSlist[iIndex].ulSize;
342 		} else if (pPPS->tSummaryInfo.ulSize == 0 &&
343 			   STREQ(atPPSlist[iIndex].szName,
344 						"\005SummaryInformation")) {
345 			pPPS->tSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
346 			pPPS->tSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
347 		} else if (pPPS->tDocSummaryInfo.ulSize == 0 &&
348 			   STREQ(atPPSlist[iIndex].szName,
349 					"\005DocumentSummaryInformation")) {
350 			pPPS->tDocSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
351 			pPPS->tDocSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
352 		} else if (STREQ(atPPSlist[iIndex].szName, "Book") ||
353 			   STREQ(atPPSlist[iIndex].szName, "Workbook")) {
354 			bExcel = TRUE;
355 		}
356 	}
357 
358 	/* Free the space for the Property Set Storage entries */
359 	atPPSlist = xfree(atPPSlist);
360 
361 	/* Draw your conclusions */
362 	if (bWord) {
363 		return TRUE;
364 	}
365 
366 	if (bExcel) {
367 		werr(0, "Sorry, but this is an Excel spreadsheet");
368 	} else {
369 		werr(0, "This OLE file does not contain a Word document");
370 	}
371 	return FALSE;
372 } /* end of bGetPPS */
373 
374 /*
375  * vGetBbdList - make a list of the places to find big blocks
376  */
377 static void
vGetBbdList(FILE * pFile,int iNbr,ULONG * aulBbdList,ULONG ulOffset)378 vGetBbdList(FILE *pFile, int iNbr, ULONG *aulBbdList, ULONG ulOffset)
379 {
380 	int	iIndex;
381 
382 	fail(pFile == NULL);
383 	fail(iNbr > 127);
384 	fail(aulBbdList == NULL);
385 
386 	NO_DBG_DEC(iNbr);
387 	for (iIndex = 0; iIndex < iNbr; iIndex++) {
388                 aulBbdList[iIndex] =
389                         ulReadLong(pFile, ulOffset + 4 * (ULONG)iIndex);
390 		NO_DBG_DEC(iIndex);
391                 NO_DBG_HEX(aulBbdList[iIndex]);
392         }
393 } /* end of vGetBbdList */
394 
395 /*
396  * bGetDocumentText - make a list of the text blocks of a Word document
397  *
398  * Return TRUE when succesful, otherwise FALSE
399  */
400 static BOOL
bGetDocumentText(FILE * pFile,const pps_info_type * pPPS,const ULONG * aulBBD,size_t tBBDLen,const ULONG * aulSBD,size_t tSBDLen,const UCHAR * aucHeader,int iWordVersion)401 bGetDocumentText(FILE *pFile, const pps_info_type *pPPS,
402 	const ULONG *aulBBD, size_t tBBDLen,
403 	const ULONG *aulSBD, size_t tSBDLen,
404 	const UCHAR *aucHeader, int iWordVersion)
405 {
406 	ULONG	ulBeginOfText;
407 	ULONG	ulTextLen, ulFootnoteLen, ulEndnoteLen;
408 	ULONG	ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
409 	ULONG	ulTextBoxLen, ulHdrTextBoxLen;
410 	UINT	uiQuickSaves;
411 	BOOL	bFarEastWord, bTemplate, bFastSaved, bEncrypted, bSuccess;
412 	USHORT	usIdent, usDocStatus;
413 
414 	fail(pFile == NULL || pPPS == NULL);
415 	fail(aulBBD == NULL);
416 	fail(aulSBD == NULL);
417 
418 	DBG_MSG("bGetDocumentText");
419 
420 	/* Get the "magic number" from the header */
421 	usIdent = usGetWord(0x00, aucHeader);
422 	DBG_HEX(usIdent);
423 	bFarEastWord = usIdent == 0x8098 || usIdent == 0x8099 ||
424 			usIdent == 0xa697 || usIdent == 0xa699;
425 	/* Get the status flags from the header */
426 	usDocStatus = usGetWord(0x0a, aucHeader);
427 	DBG_HEX(usDocStatus);
428 	bTemplate = (usDocStatus & BIT(0)) != 0;
429 	DBG_MSG_C(bTemplate, "This document is a Template");
430 	bFastSaved = (usDocStatus & BIT(2)) != 0;
431 	uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
432 	DBG_MSG_C(bFastSaved, "This document is Fast Saved");
433 	DBG_DEC_C(bFastSaved, uiQuickSaves);
434 	bEncrypted = (usDocStatus & BIT(8)) != 0;
435 	if (bEncrypted) {
436 		werr(0, "Encrypted documents are not supported");
437 		return FALSE;
438 	}
439 
440 	/* Get length information */
441 	ulBeginOfText = ulGetLong(0x18, aucHeader);
442 	DBG_HEX(ulBeginOfText);
443 	switch (iWordVersion) {
444 	case 6:
445 	case 7:
446 		ulTextLen = ulGetLong(0x34, aucHeader);
447 		ulFootnoteLen = ulGetLong(0x38, aucHeader);
448 		ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
449 		ulMacroLen = ulGetLong(0x40, aucHeader);
450 		ulAnnotationLen = ulGetLong(0x44, aucHeader);
451 		ulEndnoteLen = ulGetLong(0x48, aucHeader);
452 		ulTextBoxLen = ulGetLong(0x4c, aucHeader);
453 		ulHdrTextBoxLen = ulGetLong(0x50, aucHeader);
454 		break;
455 	case 8:
456 		ulTextLen = ulGetLong(0x4c, aucHeader);
457 		ulFootnoteLen = ulGetLong(0x50, aucHeader);
458 		ulHdrFtrLen = ulGetLong(0x54, aucHeader);
459 		ulMacroLen = ulGetLong(0x58, aucHeader);
460 		ulAnnotationLen = ulGetLong(0x5c, aucHeader);
461 		ulEndnoteLen = ulGetLong(0x60, aucHeader);
462 		ulTextBoxLen = ulGetLong(0x64, aucHeader);
463 		ulHdrTextBoxLen = ulGetLong(0x68, aucHeader);
464 		break;
465 	default:
466 		werr(0, "This version of Word is not supported");
467 		return FALSE;
468 	}
469 	DBG_DEC(ulTextLen);
470 	DBG_DEC(ulFootnoteLen);
471 	DBG_DEC(ulHdrFtrLen);
472 	DBG_DEC(ulMacroLen);
473 	DBG_DEC(ulAnnotationLen);
474 	DBG_DEC(ulEndnoteLen);
475 	DBG_DEC(ulTextBoxLen);
476 	DBG_DEC(ulHdrTextBoxLen);
477 
478 	/* Make a list of the text blocks */
479 	switch (iWordVersion) {
480 	case 6:
481 	case 7:
482 		if (bFastSaved) {
483 			bSuccess = bGet6DocumentText(pFile,
484 					bFarEastWord,
485 					pPPS->tWordDocument.ulSB,
486 					aulBBD, tBBDLen,
487 					aucHeader);
488 		} else {
489 		  	bSuccess = bAddTextBlocks(ulBeginOfText,
490 				ulTextLen +
491 				ulFootnoteLen +
492 				ulHdrFtrLen +
493 				ulMacroLen + ulAnnotationLen +
494 				ulEndnoteLen +
495 				ulTextBoxLen + ulHdrTextBoxLen,
496 				bFarEastWord,
497 				IGNORE_PROPMOD,
498 				pPPS->tWordDocument.ulSB,
499 				aulBBD, tBBDLen);
500 		}
501 		break;
502 	case 8:
503 		bSuccess = bGet8DocumentText(pFile,
504 				pPPS,
505 				aulBBD, tBBDLen, aulSBD, tSBDLen,
506 				aucHeader);
507 		break;
508 	default:
509 		werr(0, "This version of Word is not supported");
510 		bSuccess = FALSE;
511 		break;
512 	}
513 
514 	if (bSuccess) {
515 		vSplitBlockList(pFile,
516 				ulTextLen,
517 				ulFootnoteLen,
518 				ulHdrFtrLen,
519 				ulMacroLen,
520 				ulAnnotationLen,
521 				ulEndnoteLen,
522 				ulTextBoxLen,
523 				ulHdrTextBoxLen,
524 				!bFastSaved && iWordVersion == 8);
525 	} else {
526 		vDestroyTextBlockList();
527 		werr(0, "I can't find the text of this document");
528 	}
529 	return bSuccess;
530 } /* end of bGetDocumentText */
531 
532 /*
533  * vGetDocumentData - make a list of the data blocks of a Word document
534  */
535 static void
vGetDocumentData(FILE * pFile,const pps_info_type * pPPS,const ULONG * aulBBD,size_t tBBDLen,const UCHAR * aucHeader,int iWordVersion)536 vGetDocumentData(FILE *pFile, const pps_info_type *pPPS,
537 	const ULONG *aulBBD, size_t tBBDLen,
538 	const UCHAR *aucHeader, int iWordVersion)
539 {
540 	options_type	tOptions;
541 	ULONG	ulBeginOfText;
542 	BOOL	bFastSaved, bHasImages, bSuccess;
543 	USHORT	usDocStatus;
544 
545 	fail(pFile == NULL);
546 	fail(pPPS == NULL);
547 	fail(aulBBD == NULL);
548 
549 	/* Get the options */
550 	vGetOptions(&tOptions);
551 
552 	/* Get the status flags from the header */
553 	usDocStatus = usGetWord(0x0a, aucHeader);
554 	DBG_HEX(usDocStatus);
555 	bFastSaved = (usDocStatus & BIT(2)) != 0;
556 	bHasImages = (usDocStatus & BIT(3)) != 0;
557 
558 	if (!bHasImages ||
559 	    tOptions.eConversionType == conversion_text ||
560 	    tOptions.eConversionType == conversion_fmt_text ||
561 #if CR3_ANTIWORD_PATCH!=1
562         tOptions.eConversionType == conversion_xml ||
563 #endif
564 	    tOptions.eImageLevel == level_no_images) {
565 		/*
566 		 * No images in the document or text-only output or
567 		 * no images wanted, so no data blocks will be needed
568 		 */
569 		vDestroyDataBlockList();
570 		return;
571 	}
572 
573 	/* Get length information */
574 	ulBeginOfText = ulGetLong(0x18, aucHeader);
575 	DBG_HEX(ulBeginOfText);
576 
577 	/* Make a list of the data blocks */
578 	switch (iWordVersion) {
579 	case 6:
580 	case 7:
581 		/*
582 		 * The data blocks are in the text stream. The text stream
583 		 * is in "fast saved" format or "normal saved" format
584 		 */
585 		if (bFastSaved) {
586 			bSuccess = bGet6DocumentData(pFile,
587 					pPPS->tWordDocument.ulSB,
588 					aulBBD, tBBDLen,
589 					aucHeader);
590 		} else {
591 		  	bSuccess = bAddDataBlocks(ulBeginOfText,
592 					(ULONG)LONG_MAX,
593 					pPPS->tWordDocument.ulSB,
594 					aulBBD, tBBDLen);
595 		}
596 		break;
597 	case 8:
598 		/*
599 		 * The data blocks are in the data stream. The data stream
600 		 * is always in "normal saved" format
601 		 */
602 		bSuccess = bAddDataBlocks(0, (ULONG)LONG_MAX,
603 				pPPS->tData.ulSB, aulBBD, tBBDLen);
604 		break;
605 	default:
606 		werr(0, "This version of Word is not supported");
607 		bSuccess = FALSE;
608 		break;
609 	}
610 
611 	if (!bSuccess) {
612 		vDestroyDataBlockList();
613 		werr(0, "I can't find the data of this document");
614 	}
615 } /* end of vGetDocumentData */
616 
617 /*
618  * iInitDocumentOLE - initialize an OLE document
619  *
620  * Returns the version of Word that made the document or -1
621  */
622 int
iInitDocumentOLE(FILE * pFile,long lFilesize)623 iInitDocumentOLE(FILE *pFile, long lFilesize)
624 {
625 	pps_info_type	PPS_info;
626 	ULONG	*aulBBD, *aulSBD;
627 	ULONG	*aulRootList, *aulBbdList, *aulSbdList;
628 	ULONG	ulBdbListStart, ulAdditionalBBDlist;
629 	ULONG	ulRootStartblock, ulSbdStartblock, ulSBLstartblock;
630 	ULONG	ulStart, ulTmp;
631 	long	lMaxBlock;
632 	size_t	tBBDLen, tSBDLen, tNumBbdBlocks, tRootListLen;
633 	int	iWordVersion, iIndex, iToGo;
634 	BOOL	bSuccess;
635 	USHORT	usIdent, usDocStatus;
636 	UCHAR	aucHeader[HEADER_SIZE];
637 
638 	fail(pFile == NULL);
639 
640 	lMaxBlock = lFilesize / BIG_BLOCK_SIZE - 2;
641 	DBG_DEC(lMaxBlock);
642 	if (lMaxBlock < 1) {
643 		return -1;
644 	}
645 	tBBDLen = (size_t)(lMaxBlock + 1);
646 	tNumBbdBlocks = (size_t)ulReadLong(pFile, 0x2c);
647 	DBG_DEC(tNumBbdBlocks);
648 	ulRootStartblock = ulReadLong(pFile, 0x30);
649 	DBG_DEC(ulRootStartblock);
650 	ulSbdStartblock = ulReadLong(pFile, 0x3c);
651 	DBG_DEC(ulSbdStartblock);
652 	ulAdditionalBBDlist = ulReadLong(pFile, 0x44);
653 	DBG_HEX(ulAdditionalBBDlist);
654 	ulSBLstartblock = ulReadLong(pFile,
655 			(ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x74);
656 	DBG_DEC(ulSBLstartblock);
657 	tSBDLen = (size_t)(ulReadLong(pFile,
658 			(ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x78) /
659 			SMALL_BLOCK_SIZE);
660 	/* All to be xcalloc-ed pointers to NULL */
661 	aulRootList = NULL;
662 	aulSbdList = NULL;
663 	aulBbdList = NULL;
664 	aulSBD = NULL;
665 	aulBBD = NULL;
666 /* Big Block Depot */
667 	aulBbdList = xcalloc(tNumBbdBlocks, sizeof(ULONG));
668 	aulBBD = xcalloc(tBBDLen, sizeof(ULONG));
669 	iToGo = (int)tNumBbdBlocks;
670 	vGetBbdList(pFile, min(iToGo, 109),  aulBbdList, 0x4c);
671 	ulStart = 109;
672 	iToGo -= 109;
673 	while (ulAdditionalBBDlist != END_OF_CHAIN && iToGo > 0) {
674 		ulBdbListStart = (ulAdditionalBBDlist + 1) * BIG_BLOCK_SIZE;
675 		vGetBbdList(pFile, min(iToGo, 127),
676 					aulBbdList + ulStart, ulBdbListStart);
677 		ulAdditionalBBDlist = ulReadLong(pFile,
678 					ulBdbListStart + 4 * 127);
679 		DBG_DEC(ulAdditionalBBDlist);
680 		DBG_HEX(ulAdditionalBBDlist);
681 		ulStart += 127;
682 		iToGo -= 127;
683 	}
684 	if (!bGetBBD(pFile, aulBbdList, tNumBbdBlocks, aulBBD, tBBDLen)) {
685 		FREE_ALL();
686 		return -1;
687 	}
688 	aulBbdList = xfree(aulBbdList);
689 /* Small Block Depot */
690 	aulSbdList = xcalloc(tBBDLen, sizeof(ULONG));
691 	aulSBD = xcalloc(tSBDLen, sizeof(ULONG));
692 	for (iIndex = 0, ulTmp = ulSbdStartblock;
693 	     iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
694 	     iIndex++, ulTmp = aulBBD[ulTmp]) {
695 		if (ulTmp >= (ULONG)tBBDLen) {
696 			DBG_DEC(ulTmp);
697 			DBG_DEC(tBBDLen);
698 			werr(1, "The Big Block Depot is damaged");
699 		}
700 		aulSbdList[iIndex] = ulTmp;
701 		NO_DBG_HEX(aulSbdList[iIndex]);
702 	}
703 	if (!bGetSBD(pFile, aulSbdList, tBBDLen, aulSBD, tSBDLen)) {
704 		FREE_ALL();
705 		return -1;
706 	}
707 	aulSbdList = xfree(aulSbdList);
708 /* Root list */
709 	for (tRootListLen = 0, ulTmp = ulRootStartblock;
710 	     tRootListLen < tBBDLen && ulTmp != END_OF_CHAIN;
711 	     tRootListLen++, ulTmp = aulBBD[ulTmp]) {
712 		if (ulTmp >= (ULONG)tBBDLen) {
713 			DBG_DEC(ulTmp);
714 			DBG_DEC(tBBDLen);
715 			werr(1, "The Big Block Depot is damaged");
716 		}
717 	}
718 	if (tRootListLen == 0) {
719 		werr(0, "No Rootlist found");
720 		FREE_ALL();
721 		return -1;
722 	}
723 	aulRootList = xcalloc(tRootListLen, sizeof(ULONG));
724 	for (iIndex = 0, ulTmp = ulRootStartblock;
725 	     iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
726 	     iIndex++, ulTmp = aulBBD[ulTmp]) {
727 		if (ulTmp >= (ULONG)tBBDLen) {
728 			DBG_DEC(ulTmp);
729 			DBG_DEC(tBBDLen);
730 			werr(1, "The Big Block Depot is damaged");
731 		}
732 		aulRootList[iIndex] = ulTmp;
733 		NO_DBG_DEC(aulRootList[iIndex]);
734 	}
735 	fail(tRootListLen != (size_t)iIndex);
736 	bSuccess = bGetPPS(pFile, aulRootList, tRootListLen, &PPS_info);
737 	aulRootList = xfree(aulRootList);
738 	if (!bSuccess) {
739 		FREE_ALL();
740 		return -1;
741 	}
742 /* Small block list */
743 	if (!bCreateSmallBlockList(ulSBLstartblock, aulBBD, tBBDLen)) {
744 		FREE_ALL();
745 		return -1;
746 	}
747 
748 	if (PPS_info.tWordDocument.ulSize < MIN_SIZE_FOR_BBD_USE) {
749 		DBG_DEC(PPS_info.tWordDocument.ulSize);
750 		FREE_ALL();
751 		werr(0, "I'm afraid the text stream of this file "
752 			"is too small to handle.");
753 		return -1;
754 	}
755 	/* Read the headerblock */
756 	if (!bReadBuffer(pFile, PPS_info.tWordDocument.ulSB,
757 			aulBBD, tBBDLen, BIG_BLOCK_SIZE,
758 			aucHeader, 0, HEADER_SIZE)) {
759 		FREE_ALL();
760 		return -1;
761 	}
762 	usIdent = usGetWord(0x00, aucHeader);
763 	DBG_HEX(usIdent);
764 	fail(usIdent != 0x8098 &&	/* Word 7 for oriental languages */
765 	     usIdent != 0x8099 &&	/* Word 7 for oriental languages */
766 	     usIdent != 0xa5dc &&	/* Word 6 & 7 */
767 	     usIdent != 0xa5ec &&	/* Word 7 & 97 & 98 */
768 	     usIdent != 0xa697 &&	/* Word 7 for oriental languages */
769 	     usIdent != 0xa699);	/* Word 7 for oriental languages */
770 	iWordVersion = iGetVersionNumber(aucHeader);
771 	if (iWordVersion < 6) {
772 		FREE_ALL();
773 		werr(0, "This file is from a version of Word before Word 6.");
774 		return -1;
775 	}
776 
777 	/* Get the status flags from the header */
778 	usDocStatus = usGetWord(0x0a, aucHeader);
779         if (usDocStatus & BIT(9)) {
780 		PPS_info.tTable = PPS_info.t1Table;
781 	} else {
782 		PPS_info.tTable = PPS_info.t0Table;
783 	}
784 	/* Clean the entries that should not be used */
785 	memset(&PPS_info.t0Table, 0, sizeof(PPS_info.t0Table));
786 	memset(&PPS_info.t1Table, 0, sizeof(PPS_info.t1Table));
787 
788 	bSuccess = bGetDocumentText(pFile, &PPS_info,
789 			aulBBD, tBBDLen, aulSBD, tSBDLen,
790 			aucHeader, iWordVersion);
791 	if (bSuccess) {
792 		vGetDocumentData(pFile, &PPS_info,
793 			aulBBD, tBBDLen, aucHeader, iWordVersion);
794 		vGetPropertyInfo(pFile, &PPS_info,
795 			aulBBD, tBBDLen, aulSBD, tSBDLen,
796 			aucHeader, iWordVersion);
797 		vSetDefaultTabWidth(pFile, &PPS_info,
798 			aulBBD, tBBDLen, aulSBD, tSBDLen,
799 			aucHeader, iWordVersion);
800 		vGetNotesInfo(pFile, &PPS_info,
801 			aulBBD, tBBDLen, aulSBD, tSBDLen,
802 			aucHeader, iWordVersion);
803 	}
804 	FREE_ALL();
805 	return bSuccess ? iWordVersion : -1;
806 } /* end of iInitDocumentOLE */
807