1 /*
2  * wordlib.c
3  * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL
4  *
5  * Description:
6  * Deal with the internals of a MS Word file
7  */
8 
9 #include "antiword.h"
10 
11 static BOOL	bOldMacFile = FALSE;
12 
13 
14 /*
15  * Common part of the file checking functions
16  */
17 static BOOL
bCheckBytes(FILE * pFile,const UCHAR * aucBytes,size_t tBytes)18 bCheckBytes(FILE *pFile, const UCHAR *aucBytes, size_t tBytes)
19 {
20 	int	iIndex, iChar;
21 
22 	fail(pFile == NULL || aucBytes == NULL || tBytes == 0);
23 
24     aw_rewind(pFile);
25 
26 	for (iIndex = 0; iIndex < (int)tBytes; iIndex++) {
27         iChar = aw_getc(pFile);
28 		if (iChar == EOF || iChar != (int)aucBytes[iIndex]) {
29 			NO_DBG_HEX(iChar);
30 			NO_DBG_HEX(aucBytes[iIndex]);
31 			return FALSE;
32 		}
33 	}
34 	return TRUE;
35 } /* end of bCheckBytes */
36 
37 /*
38  * This function checks whether the given file is or is not a "Word for DOS"
39  * document
40  */
41 BOOL
bIsWordForDosFile(FILE * pFile,long lFilesize)42 bIsWordForDosFile(FILE *pFile, long lFilesize)
43 {
44 	static UCHAR	aucBytes[] =
45 		{ 0x31, 0xbe, 0x00, 0x00, 0x00, 0xab };	/* Word for DOS */
46 
47 	DBG_MSG("bIsWordForDosFile");
48 
49 	if (pFile == NULL || lFilesize < 0) {
50 		DBG_MSG("No proper file given");
51 		return FALSE;
52 	}
53 	if (lFilesize < 128) {
54 		DBG_MSG("File too small to be a Word document");
55 		return FALSE;
56 	}
57 	return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
58 } /* end of bIsWordForDosFile */
59 
60 /*
61  * This function checks whether the given file is or is not a file with an
62  * OLE envelope (That is a document made by Word 6 or later)
63  */
64 static BOOL
bIsWordFileWithOLE(FILE * pFile,long lFilesize)65 bIsWordFileWithOLE(FILE *pFile, long lFilesize)
66 {
67 	static UCHAR	aucBytes[] =
68 		{ 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
69 	int	iTailLen;
70 
71 	if (pFile == NULL || lFilesize < 0) {
72 		DBG_MSG("No proper file given");
73 		return FALSE;
74 	}
75 	if (lFilesize < (long)BIG_BLOCK_SIZE * 3) {
76 		DBG_MSG("This file is too small to be a Word document");
77 		return FALSE;
78 	}
79 
80 	iTailLen = (int)(lFilesize % BIG_BLOCK_SIZE);
81 	switch (iTailLen) {
82 	case 0:		/* No tail, as it should be */
83 		break;
84 	case 1:
85 	case 2:		/* Filesize mismatch or a buggy email program */
86 		if ((int)(lFilesize % 3) == iTailLen) {
87 			DBG_DEC(lFilesize);
88 			return FALSE;
89 		}
90 		/*
91 		 * Ignore extra bytes caused by buggy email programs.
92 		 * They have bugs in their base64 encoding or decoding.
93 		 * 3 bytes -> 4 ascii chars -> 3 bytes
94 		 */
95 		DBG_MSG("Document with extra bytes");
96 		break;
97 	default:	/* Wrong filesize for a Word document */
98 		DBG_DEC(lFilesize);
99 		DBG_DEC(iTailLen);
100 		return FALSE;
101 	}
102 	return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
103 } /* end of bIsWordFileWithOLE */
104 
105 /*
106  * This function checks whether the given file is or is not a RTF document
107  */
108 BOOL
bIsRtfFile(FILE * pFile)109 bIsRtfFile(FILE *pFile)
110 {
111 	static UCHAR	aucBytes[] =
112 		{ '{', '\\', 'r', 't', 'f', '1' };
113 
114 	DBG_MSG("bIsRtfFile");
115 
116 	return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
117 } /* end of bIsRtfFile */
118 
119 /*
120  * This function checks whether the given file is or is not a WP document
121  */
122 BOOL
bIsWordPerfectFile(FILE * pFile)123 bIsWordPerfectFile(FILE *pFile)
124 {
125 	static UCHAR	aucBytes[] =
126 		{ 0xff, 'W', 'P', 'C' };
127 
128 	DBG_MSG("bIsWordPerfectFile");
129 
130 	return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
131 } /* end of bIsWordPerfectFile */
132 
133 /*
134  * This function checks whether the given file is or is not a "Win Word 1 or 2"
135  * document
136  */
137 BOOL
bIsWinWord12File(FILE * pFile,long lFilesize)138 bIsWinWord12File(FILE *pFile, long lFilesize)
139 {
140 	static UCHAR	aucBytes[2][4] = {
141 		{ 0x9b, 0xa5, 0x21, 0x00 },	/* Win Word 1.x */
142 		{ 0xdb, 0xa5, 0x2d, 0x00 },	/* Win Word 2.0 */
143 	};
144 	int	iIndex;
145 
146 	DBG_MSG("bIsWinWord12File");
147 
148 	if (pFile == NULL || lFilesize < 0) {
149 		DBG_MSG("No proper file given");
150 		return FALSE;
151 	}
152 	if (lFilesize < 384) {
153 		DBG_MSG("This file is too small to be a Word document");
154 		return FALSE;
155 	}
156 
157 	for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
158 		if (bCheckBytes(pFile,
159 				aucBytes[iIndex],
160 				elementsof(aucBytes[iIndex]))) {
161 			return TRUE;
162 		}
163 	}
164 	return FALSE;
165 } /* end of bIsWinWord12File */
166 
167 /*
168  * This function checks whether the given file is or is not a "Mac Word 4 or 5"
169  * document
170  */
171 BOOL
bIsMacWord45File(FILE * pFile)172 bIsMacWord45File(FILE *pFile)
173 {
174 	static UCHAR	aucBytes[2][6] = {
175 		{ 0xfe, 0x37, 0x00, 0x1c, 0x00, 0x00 },	/* Mac Word 4 */
176 		{ 0xfe, 0x37, 0x00, 0x23, 0x00, 0x00 },	/* Mac Word 5 */
177 	};
178 	int	iIndex;
179 
180 	DBG_MSG("bIsMacWord45File");
181 
182 	for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
183 		if (bCheckBytes(pFile,
184 				aucBytes[iIndex],
185 				elementsof(aucBytes[iIndex]))) {
186 			return TRUE;
187 		}
188 	}
189 	return FALSE;
190 } /* end of bIsMacWord45File */
191 
192 /*
193  * iGuessVersionNumber - guess the Word version number from first few bytes
194  *
195  * Returns the guessed version number or -1 when no guess it possible
196  */
197 int
iGuessVersionNumber(FILE * pFile,long lFilesize)198 iGuessVersionNumber(FILE *pFile, long lFilesize)
199 {
200 	if(bIsWordForDosFile(pFile, lFilesize)) {
201 		return 0;
202 	}
203 	if (bIsWinWord12File(pFile, lFilesize)) {
204 		return 2;
205 	}
206 	if (bIsMacWord45File(pFile)) {
207 		return 5;
208 	}
209 	if (bIsWordFileWithOLE(pFile, lFilesize)) {
210 		return 6;
211 	}
212 	return -1;
213 } /* end of iGuessVersionNumber */
214 
215 /*
216  * iGetVersionNumber - get the Word version number from the header
217  *
218  * Returns the version number or -1 when unknown
219  */
220 int
iGetVersionNumber(const UCHAR * aucHeader)221 iGetVersionNumber(const UCHAR *aucHeader)
222 {
223 	USHORT	usFib, usChse;
224 
225 	usFib = usGetWord(0x02, aucHeader);
226 	if (usFib >= 0x1000) {
227 		/* To big: must be MacWord using Big Endian */
228 		DBG_HEX(usFib);
229 		usFib = usGetWordBE(0x02, aucHeader);
230 	}
231 	DBG_DEC(usFib);
232 	bOldMacFile = FALSE;
233 	switch (usFib) {
234 	case   0:
235 		DBG_MSG("Word for DOS");
236 		return 0;
237 	case  28:
238 		DBG_MSG("Word 4 for Macintosh");
239 		bOldMacFile = TRUE;
240 		return 4;
241 	case  33:
242 		DBG_MSG("Word 1.x for Windows");
243 		return 1;
244 	case  35:
245 		DBG_MSG("Word 5 for Macintosh");
246 		bOldMacFile = TRUE;
247 		return 5;
248 	case  45:
249 		DBG_MSG("Word 2 for Windows");
250 		return 2;
251 	case 101:
252 	case 102:
253 		DBG_MSG("Word 6 for Windows");
254 		return 6;
255 	case 103:
256 	case 104:
257 		usChse = usGetWord(0x14, aucHeader);
258 		DBG_DEC(usChse);
259 		switch (usChse) {
260 		case 0:
261 			DBG_MSG("Word 7 for Win95");
262 			return 7;
263 		case 256:
264 			DBG_MSG("Word 6 for Macintosh");
265 			bOldMacFile = TRUE;
266 			return 6;
267 		default:
268 			DBG_FIXME();
269 			if ((int)ucGetByte(0x05, aucHeader) == 0xe0) {
270 				DBG_MSG("Word 7 for Win95");
271 				return 7;
272 			}
273 			DBG_MSG("Word 6 for Macintosh");
274 			bOldMacFile = TRUE;
275 			return 6;
276 		}
277 	default:
278 		usChse = usGetWord(0x14, aucHeader);
279 		DBG_DEC(usChse);
280 		if (usFib < 192) {
281 			/* Unknown or unsupported version of Word */
282 			DBG_DEC(usFib);
283 			return -1;
284 		}
285 		DBG_MSG_C(usChse != 256, "Word97 for Win95/98/NT");
286 		DBG_MSG_C(usChse == 256, "Word98 for Macintosh");
287 		return 8;
288 	}
289 } /* end of iGetVersionNumber */
290 
291 /*
292  * TRUE if the current file was made by Word version 6 or older on an
293  * Apple Macintosh, otherwise FALSE.
294  * This function hides the methode of how to find out from the rest of the
295  * program.
296  */
297 BOOL
bIsOldMacFile(void)298 bIsOldMacFile(void)
299 {
300 	return bOldMacFile;
301 } /* end of bIsOldMacFile */
302 
303 /*
304  * iInitDocument - initialize a document
305  *
306  * Returns the version of Word that made the document or -1
307  */
308 int
iInitDocument(FILE * pFile,long lFilesize)309 iInitDocument(FILE *pFile, long lFilesize)
310 {
311 	int	iGuess, iWordVersion;
312 
313 	iGuess = iGuessVersionNumber(pFile, lFilesize);
314 	switch (iGuess) {
315 	case 0:
316 		iWordVersion = iInitDocumentDOS(pFile, lFilesize);
317 		break;
318 	case 2:
319 		iWordVersion = iInitDocumentWIN(pFile, lFilesize);
320 		break;
321 	case 5:
322 		iWordVersion = iInitDocumentMAC(pFile, lFilesize);
323 		break;
324 	case 6:
325 		iWordVersion = iInitDocumentOLE(pFile, lFilesize);
326 		break;
327 	default:
328 		DBG_DEC(iGuess);
329 		iWordVersion = -1;
330 		break;
331 	}
332 	return iWordVersion;
333 } /* end of iInitDocument */
334 
335 /*
336  * vFreeDocument - free a document by free-ing its parts
337  */
338 void
vFreeDocument(void)339 vFreeDocument(void)
340 {
341 	DBG_MSG("vFreeDocument");
342 
343 	/* Free the memory */
344 	vDestroyTextBlockList();
345 	vDestroyDataBlockList();
346 	vDestroyListInfoList();
347 	vDestroyRowInfoList();
348 	vDestroyStyleInfoList();
349 	vDestroyFontInfoList();
350 	vDestroyStylesheetList();
351 	vDestroyPictInfoList();
352 	vDestroyDocumentInfoList();
353 	vDestroySectionInfoList();
354 	vDestroyHdrFtrInfoList();
355 	vDestroyPropModList();
356 	vDestroyNotesInfoLists();
357 	vDestroyFontTable();
358 	vDestroySummaryInfo();
359 } /* end of vFreeDocument */
360