1 /*
2  * wordwin.c
3  * Copyright (C) 2002-2005 A.J. van Os; Released under GPL
4  *
5  * Description:
6  * Deal with the WIN internals of a MS Word file
7  */
8 
9 #include "antiword.h"
10 
11 
12 /*
13  * bGetDocumentText - make a list of the text blocks of a Word document
14  *
15  * Return TRUE when succesful, otherwise FALSE
16  */
17 static BOOL
bGetDocumentText(FILE * pFile,const UCHAR * aucHeader)18 bGetDocumentText(FILE *pFile, const UCHAR *aucHeader)
19 {
20 	text_block_type	tTextBlock;
21 	ULONG	ulBeginOfText;
22 	ULONG	ulTextLen, ulFootnoteLen;
23 	ULONG	ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
24 	UINT	uiQuickSaves;
25 	USHORT	usDocStatus;
26 	BOOL	bTemplate, bFastSaved, bEncrypted, bSuccess;
27 
28 	fail(pFile == NULL);
29 	fail(aucHeader == NULL);
30 
31 	DBG_MSG("bGetDocumentText");
32 
33 	/* Get the status flags from the header */
34 	usDocStatus = usGetWord(0x0a, aucHeader);
35 	DBG_HEX(usDocStatus);
36 	bTemplate = (usDocStatus & BIT(0)) != 0;
37 	DBG_MSG_C(bTemplate, "This document is a Template");
38 	bFastSaved = (usDocStatus & BIT(2)) != 0;
39 	uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
40 	DBG_MSG_C(bFastSaved, "This document is Fast Saved");
41 	DBG_DEC_C(bFastSaved, uiQuickSaves);
42 	if (bFastSaved) {
43 		werr(0, "Word2: fast saved documents are not supported yet");
44 		return FALSE;
45 	}
46 	bEncrypted = (usDocStatus & BIT(8)) != 0;
47 	if (bEncrypted) {
48 		werr(0, "Encrypted documents are not supported");
49 		return FALSE;
50 	}
51 
52 	/* Get length information */
53 	ulBeginOfText = ulGetLong(0x18, aucHeader);
54 	DBG_HEX(ulBeginOfText);
55 	ulTextLen = ulGetLong(0x34, aucHeader);
56 	ulFootnoteLen = ulGetLong(0x38, aucHeader);
57 	ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
58 	ulMacroLen = ulGetLong(0x40, aucHeader);
59 	ulAnnotationLen = ulGetLong(0x44, aucHeader);
60 	DBG_DEC(ulTextLen);
61 	DBG_DEC(ulFootnoteLen);
62 	DBG_DEC(ulHdrFtrLen);
63 	DBG_DEC(ulMacroLen);
64 	DBG_DEC(ulAnnotationLen);
65 	if (bFastSaved) {
66 		bSuccess = FALSE;
67 	} else {
68 		tTextBlock.ulFileOffset = ulBeginOfText;
69 		tTextBlock.ulCharPos = ulBeginOfText;
70 		tTextBlock.ulLength = ulTextLen +
71 				ulFootnoteLen +
72 				ulHdrFtrLen + ulMacroLen + ulAnnotationLen;
73 		tTextBlock.bUsesUnicode = FALSE;
74 		tTextBlock.usPropMod = IGNORE_PROPMOD;
75 		bSuccess = bAdd2TextBlockList(&tTextBlock);
76 		DBG_HEX_C(!bSuccess, tTextBlock.ulFileOffset);
77 		DBG_HEX_C(!bSuccess, tTextBlock.ulCharPos);
78 		DBG_DEC_C(!bSuccess, tTextBlock.ulLength);
79 		DBG_DEC_C(!bSuccess, tTextBlock.bUsesUnicode);
80 		DBG_DEC_C(!bSuccess, tTextBlock.usPropMod);
81 	}
82 
83 	if (bSuccess) {
84 		vSplitBlockList(pFile,
85 				ulTextLen,
86 				ulFootnoteLen,
87 				ulHdrFtrLen,
88 				ulMacroLen,
89 				ulAnnotationLen,
90 				0,
91 				0,
92 				0,
93 				FALSE);
94 	} else {
95 		vDestroyTextBlockList();
96 		werr(0, "I can't find the text of this document");
97 	}
98 	return bSuccess;
99 } /* end of bGetDocumentText */
100 
101 /*
102  * vGetDocumentData - make a list of the data blocks of a Word document
103  */
104 static void
vGetDocumentData(FILE * pFile,const UCHAR * aucHeader)105 vGetDocumentData(FILE *pFile, const UCHAR *aucHeader)
106 {
107 	data_block_type	tDataBlock;
108 	options_type	tOptions;
109 	ULONG	ulEndOfText, ulBeginCharInfo;
110 	BOOL	bFastSaved, bHasImages, bSuccess;
111 	USHORT	usDocStatus;
112 
113 	/* Get the options */
114 	vGetOptions(&tOptions);
115 
116 	/* Get the status flags from the header */
117 	usDocStatus = usGetWord(0x0a, aucHeader);
118 	DBG_HEX(usDocStatus);
119 	bFastSaved = (usDocStatus & BIT(2)) != 0;
120 	bHasImages = (usDocStatus & BIT(3)) != 0;
121 
122 	if (!bHasImages ||
123 	    tOptions.eConversionType == conversion_text ||
124 	    tOptions.eConversionType == conversion_fmt_text ||
125 	    tOptions.eConversionType == conversion_xml ||
126 	    tOptions.eImageLevel == level_no_images) {
127 		/*
128 		 * No images in the document or text-only output or
129 		 * no images wanted, so no data blocks will be needed
130 		 */
131 		vDestroyDataBlockList();
132 		return;
133 	}
134 
135 	if (bFastSaved) {
136 		bSuccess = FALSE;
137 	} else {
138 		/* This datablock is too big, but it contains all images */
139 		ulEndOfText = ulGetLong(0x1c, aucHeader);
140 		DBG_HEX(ulEndOfText);
141 		ulBeginCharInfo = ulGetLong(0xa0, aucHeader);
142 		DBG_HEX(ulBeginCharInfo);
143 		if (ulBeginCharInfo > ulEndOfText) {
144 			tDataBlock.ulFileOffset = ulEndOfText;
145 			tDataBlock.ulDataPos = ulEndOfText;
146 			tDataBlock.ulLength = ulBeginCharInfo - ulEndOfText;
147 			bSuccess = bAdd2DataBlockList(&tDataBlock);
148 			DBG_HEX_C(!bSuccess, tDataBlock.ulFileOffset);
149 			DBG_HEX_C(!bSuccess, tDataBlock.ulDataPos);
150 			DBG_DEC_C(!bSuccess, tDataBlock.ulLength);
151 		} else {
152 			bSuccess = ulBeginCharInfo == ulEndOfText;
153 		}
154 	}
155 
156 	if (!bSuccess) {
157 		vDestroyDataBlockList();
158 		werr(0, "I can't find the data of this document");
159 	}
160 } /* end of vGetDocumentData */
161 
162 /*
163  * iInitDocumentWIN - initialize an WIN document
164  *
165  * Returns the version of Word that made the document or -1
166  */
167 int
iInitDocumentWIN(FILE * pFile,long lFilesize)168 iInitDocumentWIN(FILE *pFile, long lFilesize)
169 {
170 	int	iWordVersion;
171 	BOOL	bSuccess;
172 	USHORT	usIdent;
173 	UCHAR	aucHeader[384];
174 
175 	fail(pFile == NULL);
176 
177 	if (lFilesize < 384) {
178 		return -1;
179 	}
180 
181 	/* Read the headerblock */
182 	if (!bReadBytes(aucHeader, 384, 0x00, pFile)) {
183 		return -1;
184 	}
185 	/* Get the "magic number" from the header */
186 	usIdent = usGetWord(0x00, aucHeader);
187 	DBG_HEX(usIdent);
188 	fail(usIdent != 0xa59b &&	/* WinWord 1.x */
189 		usIdent != 0xa5db);	/* WinWord 2.0 */
190 	iWordVersion = iGetVersionNumber(aucHeader);
191 	if (iWordVersion != 1 && iWordVersion != 2) {
192 		werr(0, "This file is not from ''Win Word 1 or 2'.");
193 		return -1;
194 	}
195 	bSuccess = bGetDocumentText(pFile, aucHeader);
196 	if (bSuccess) {
197 		vGetDocumentData(pFile, aucHeader);
198 		vGetPropertyInfo(pFile, NULL,
199 				NULL, 0, NULL, 0,
200 				aucHeader, iWordVersion);
201 		vSetDefaultTabWidth(pFile, NULL,
202 				NULL, 0, NULL, 0,
203 				aucHeader, iWordVersion);
204 		vGetNotesInfo(pFile, NULL,
205 				NULL, 0, NULL, 0,
206 				aucHeader, iWordVersion);
207 	}
208 	return bSuccess ? iWordVersion : -1;
209 } /* end of iInitDocumentWIN */
210