1 /*
2 * wordwin.c
3 * Copyright (C) 2002-2005 A.J. van Os; Released under GPL
4 *
5 * Description:
6 * Deal with the WIN internals of a MS Word file
7 */
8
9 #include "antiword.h"
10
11
12 /*
13 * bGetDocumentText - make a list of the text blocks of a Word document
14 *
15 * Return TRUE when succesful, otherwise FALSE
16 */
17 static BOOL
bGetDocumentText(FILE * pFile,const UCHAR * aucHeader)18 bGetDocumentText(FILE *pFile, const UCHAR *aucHeader)
19 {
20 text_block_type tTextBlock;
21 ULONG ulBeginOfText;
22 ULONG ulTextLen, ulFootnoteLen;
23 ULONG ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
24 UINT uiQuickSaves;
25 USHORT usDocStatus;
26 BOOL bTemplate, bFastSaved, bEncrypted, bSuccess;
27
28 fail(pFile == NULL);
29 fail(aucHeader == NULL);
30
31 DBG_MSG("bGetDocumentText");
32
33 /* Get the status flags from the header */
34 usDocStatus = usGetWord(0x0a, aucHeader);
35 DBG_HEX(usDocStatus);
36 bTemplate = (usDocStatus & BIT(0)) != 0;
37 DBG_MSG_C(bTemplate, "This document is a Template");
38 bFastSaved = (usDocStatus & BIT(2)) != 0;
39 uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
40 DBG_MSG_C(bFastSaved, "This document is Fast Saved");
41 DBG_DEC_C(bFastSaved, uiQuickSaves);
42 if (bFastSaved) {
43 werr(0, "Word2: fast saved documents are not supported yet");
44 return FALSE;
45 }
46 bEncrypted = (usDocStatus & BIT(8)) != 0;
47 if (bEncrypted) {
48 werr(0, "Encrypted documents are not supported");
49 return FALSE;
50 }
51
52 /* Get length information */
53 ulBeginOfText = ulGetLong(0x18, aucHeader);
54 DBG_HEX(ulBeginOfText);
55 ulTextLen = ulGetLong(0x34, aucHeader);
56 ulFootnoteLen = ulGetLong(0x38, aucHeader);
57 ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
58 ulMacroLen = ulGetLong(0x40, aucHeader);
59 ulAnnotationLen = ulGetLong(0x44, aucHeader);
60 DBG_DEC(ulTextLen);
61 DBG_DEC(ulFootnoteLen);
62 DBG_DEC(ulHdrFtrLen);
63 DBG_DEC(ulMacroLen);
64 DBG_DEC(ulAnnotationLen);
65 if (bFastSaved) {
66 bSuccess = FALSE;
67 } else {
68 tTextBlock.ulFileOffset = ulBeginOfText;
69 tTextBlock.ulCharPos = ulBeginOfText;
70 tTextBlock.ulLength = ulTextLen +
71 ulFootnoteLen +
72 ulHdrFtrLen + ulMacroLen + ulAnnotationLen;
73 tTextBlock.bUsesUnicode = FALSE;
74 tTextBlock.usPropMod = IGNORE_PROPMOD;
75 bSuccess = bAdd2TextBlockList(&tTextBlock);
76 DBG_HEX_C(!bSuccess, tTextBlock.ulFileOffset);
77 DBG_HEX_C(!bSuccess, tTextBlock.ulCharPos);
78 DBG_DEC_C(!bSuccess, tTextBlock.ulLength);
79 DBG_DEC_C(!bSuccess, tTextBlock.bUsesUnicode);
80 DBG_DEC_C(!bSuccess, tTextBlock.usPropMod);
81 }
82
83 if (bSuccess) {
84 vSplitBlockList(pFile,
85 ulTextLen,
86 ulFootnoteLen,
87 ulHdrFtrLen,
88 ulMacroLen,
89 ulAnnotationLen,
90 0,
91 0,
92 0,
93 FALSE);
94 } else {
95 vDestroyTextBlockList();
96 werr(0, "I can't find the text of this document");
97 }
98 return bSuccess;
99 } /* end of bGetDocumentText */
100
101 /*
102 * vGetDocumentData - make a list of the data blocks of a Word document
103 */
104 static void
vGetDocumentData(FILE * pFile,const UCHAR * aucHeader)105 vGetDocumentData(FILE *pFile, const UCHAR *aucHeader)
106 {
107 data_block_type tDataBlock;
108 options_type tOptions;
109 ULONG ulEndOfText, ulBeginCharInfo;
110 BOOL bFastSaved, bHasImages, bSuccess;
111 USHORT usDocStatus;
112
113 /* Get the options */
114 vGetOptions(&tOptions);
115
116 /* Get the status flags from the header */
117 usDocStatus = usGetWord(0x0a, aucHeader);
118 DBG_HEX(usDocStatus);
119 bFastSaved = (usDocStatus & BIT(2)) != 0;
120 bHasImages = (usDocStatus & BIT(3)) != 0;
121
122 if (!bHasImages ||
123 tOptions.eConversionType == conversion_text ||
124 tOptions.eConversionType == conversion_fmt_text ||
125 tOptions.eConversionType == conversion_xml ||
126 tOptions.eImageLevel == level_no_images) {
127 /*
128 * No images in the document or text-only output or
129 * no images wanted, so no data blocks will be needed
130 */
131 vDestroyDataBlockList();
132 return;
133 }
134
135 if (bFastSaved) {
136 bSuccess = FALSE;
137 } else {
138 /* This datablock is too big, but it contains all images */
139 ulEndOfText = ulGetLong(0x1c, aucHeader);
140 DBG_HEX(ulEndOfText);
141 ulBeginCharInfo = ulGetLong(0xa0, aucHeader);
142 DBG_HEX(ulBeginCharInfo);
143 if (ulBeginCharInfo > ulEndOfText) {
144 tDataBlock.ulFileOffset = ulEndOfText;
145 tDataBlock.ulDataPos = ulEndOfText;
146 tDataBlock.ulLength = ulBeginCharInfo - ulEndOfText;
147 bSuccess = bAdd2DataBlockList(&tDataBlock);
148 DBG_HEX_C(!bSuccess, tDataBlock.ulFileOffset);
149 DBG_HEX_C(!bSuccess, tDataBlock.ulDataPos);
150 DBG_DEC_C(!bSuccess, tDataBlock.ulLength);
151 } else {
152 bSuccess = ulBeginCharInfo == ulEndOfText;
153 }
154 }
155
156 if (!bSuccess) {
157 vDestroyDataBlockList();
158 werr(0, "I can't find the data of this document");
159 }
160 } /* end of vGetDocumentData */
161
162 /*
163 * iInitDocumentWIN - initialize an WIN document
164 *
165 * Returns the version of Word that made the document or -1
166 */
167 int
iInitDocumentWIN(FILE * pFile,long lFilesize)168 iInitDocumentWIN(FILE *pFile, long lFilesize)
169 {
170 int iWordVersion;
171 BOOL bSuccess;
172 USHORT usIdent;
173 UCHAR aucHeader[384];
174
175 fail(pFile == NULL);
176
177 if (lFilesize < 384) {
178 return -1;
179 }
180
181 /* Read the headerblock */
182 if (!bReadBytes(aucHeader, 384, 0x00, pFile)) {
183 return -1;
184 }
185 /* Get the "magic number" from the header */
186 usIdent = usGetWord(0x00, aucHeader);
187 DBG_HEX(usIdent);
188 fail(usIdent != 0xa59b && /* WinWord 1.x */
189 usIdent != 0xa5db); /* WinWord 2.0 */
190 iWordVersion = iGetVersionNumber(aucHeader);
191 if (iWordVersion != 1 && iWordVersion != 2) {
192 werr(0, "This file is not from ''Win Word 1 or 2'.");
193 return -1;
194 }
195 bSuccess = bGetDocumentText(pFile, aucHeader);
196 if (bSuccess) {
197 vGetDocumentData(pFile, aucHeader);
198 vGetPropertyInfo(pFile, NULL,
199 NULL, 0, NULL, 0,
200 aucHeader, iWordVersion);
201 vSetDefaultTabWidth(pFile, NULL,
202 NULL, 0, NULL, 0,
203 aucHeader, iWordVersion);
204 vGetNotesInfo(pFile, NULL,
205 NULL, 0, NULL, 0,
206 aucHeader, iWordVersion);
207 }
208 return bSuccess ? iWordVersion : -1;
209 } /* end of iInitDocumentWIN */
210