1 /*
2  * prop0.c
3  * Copyright (C) 2002-2004 A.J. van Os; Released under GNU GPL
4  *
5  * Description:
6  * Read the property information from a Word for DOS file
7  */
8 
9 #include <string.h>
10 #include <time.h>
11 #include "antiword.h"
12 
13 
14 /*
15  * tConvertDosDate - convert DOS date format
16  *
17  * returns Unix time_t or -1
18  */
19 static time_t
tConvertDosDate(const char * szDosDate)20 tConvertDosDate(const char *szDosDate)
21 {
22 	struct tm	tTime;
23 	const char	*pcTmp;
24 	time_t		tResult;
25 
26 	memset(&tTime, 0, sizeof(tTime));
27 	pcTmp = szDosDate;
28 	/* Get the month */
29 	if (!isdigit(*pcTmp)) {
30 		return (time_t)-1;
31 	}
32 	tTime.tm_mon = (int)(*pcTmp - '0');
33 	pcTmp++;
34 	if (isdigit(*pcTmp)) {
35 		tTime.tm_mon *= 10;
36 		tTime.tm_mon += (int)(*pcTmp - '0');
37 		pcTmp++;
38 	}
39 	/* Get the first separater */
40 	if (isalnum(*pcTmp)) {
41 		return (time_t)-1;
42 	}
43 	pcTmp++;
44 	/* Get the day */
45 	if (!isdigit(*pcTmp)) {
46 		return (time_t)-1;
47 	}
48 	tTime.tm_mday = (int)(*pcTmp - '0');
49 	pcTmp++;
50 	if (isdigit(*pcTmp)) {
51 		tTime.tm_mday *= 10;
52 		tTime.tm_mday += (int)(*pcTmp - '0');
53 		pcTmp++;
54 	}
55 	/* Get the second separater */
56 	if (isalnum(*pcTmp)) {
57 		return (time_t)-1;
58 	}
59 	pcTmp++;
60 	/* Get the year */
61 	if (!isdigit(*pcTmp)) {
62 		return (time_t)-1;
63 	}
64 	tTime.tm_year = (int)(*pcTmp - '0');
65 	pcTmp++;
66 	if (isdigit(*pcTmp)) {
67 		tTime.tm_year *= 10;
68 		tTime.tm_year += (int)(*pcTmp - '0');
69 		pcTmp++;
70 	}
71 	/* Check the values */
72 	if (tTime.tm_mon == 0 || tTime.tm_mday == 0 || tTime.tm_mday > 31) {
73 		return (time_t)-1;
74 	}
75 	/* Correct the values */
76 	tTime.tm_mon--;		/* From 01-12 to 00-11 */
77 	if (tTime.tm_year < 80) {
78 		tTime.tm_year += 100;	/* 00 means 2000 is 100 */
79 	}
80 	tTime.tm_isdst = -1;
81 	tResult = mktime(&tTime);
82 	NO_DBG_MSG(ctime(&tResult));
83 	return tResult;
84 } /* end of tConvertDosDate */
85 
86 /*
87  * Build the lists with Document Property Information for Word for DOS files
88  */
89 void
vGet0DopInfo(FILE * pFile,const UCHAR * aucHeader)90 vGet0DopInfo(FILE *pFile, const UCHAR *aucHeader)
91 {
92 	document_block_type	tDocument;
93 	UCHAR	*aucBuffer;
94 	ULONG	ulBeginSumdInfo, ulBeginNextBlock;
95 	size_t	tLen;
96 	USHORT	usOffset;
97 
98         tDocument.ucHdrFtrSpecification = 0;
99         tDocument.usDefaultTabWidth = usGetWord(0x70, aucHeader); /* dxaTab */
100         tDocument.tCreateDate = (time_t)-1;
101         tDocument.tRevisedDate = (time_t)-1;
102 
103 	ulBeginSumdInfo = 128 * (ULONG)usGetWord(0x1c, aucHeader);
104 	DBG_HEX(ulBeginSumdInfo);
105 	ulBeginNextBlock = 128 * (ULONG)usGetWord(0x6a, aucHeader);
106 	DBG_HEX(ulBeginNextBlock);
107 
108 	if (ulBeginSumdInfo < ulBeginNextBlock && ulBeginNextBlock != 0) {
109 		/* There is a summary information block */
110 		tLen = (size_t)(ulBeginNextBlock - ulBeginSumdInfo);
111 		aucBuffer = xmalloc(tLen);
112 		/* Read the summary information block */
113 		if (bReadBytes(aucBuffer, tLen, ulBeginSumdInfo, pFile)) {
114        			usOffset = usGetWord(12, aucBuffer);
115 			if (aucBuffer[usOffset] != 0) {
116 				NO_DBG_STRN(aucBuffer + usOffset, 8);
117 				tDocument.tRevisedDate =
118 				tConvertDosDate((char *)aucBuffer + usOffset);
119 			}
120 			usOffset = usGetWord(14, aucBuffer);
121 			if (aucBuffer[usOffset] != 0) {
122 				NO_DBG_STRN(aucBuffer + usOffset, 8);
123 				tDocument.tCreateDate =
124 				tConvertDosDate((char *)aucBuffer + usOffset);
125 			}
126 		}
127 		aucBuffer = xfree(aucBuffer);
128 	}
129         vCreateDocumentInfoList(&tDocument);
130 } /* end of vGet0DopInfo */
131 
132 /*
133  * Fill the section information block with information
134  * from a Word for DOS file.
135  */
136 static void
vGet0SectionInfo(const UCHAR * aucGrpprl,size_t tBytes,section_block_type * pSection)137 vGet0SectionInfo(const UCHAR *aucGrpprl, size_t tBytes,
138 		section_block_type *pSection)
139 {
140 	USHORT	usCcol;
141 	UCHAR	ucTmp;
142 
143 	fail(aucGrpprl == NULL || pSection == NULL);
144 
145 	if (tBytes < 2) {
146 		return;
147 	}
148 	/* bkc */
149 	ucTmp = ucGetByte(1, aucGrpprl);
150 	DBG_HEX(ucTmp);
151 	ucTmp &= 0x07;
152 	DBG_HEX(ucTmp);
153 	pSection->bNewPage = ucTmp != 0 && ucTmp != 1;
154 	if (tBytes < 18) {
155 		return;
156 	}
157 	/* ccolM1 */
158 	usCcol = (USHORT)ucGetByte(17, aucGrpprl);
159 	DBG_DEC(usCcol);
160 } /* end of vGet0SectionInfo */
161 
162 /*
163  * Build the lists with Section Property Information for Word for DOS files
164  */
165 void
vGet0SepInfo(FILE * pFile,const UCHAR * aucHeader)166 vGet0SepInfo(FILE *pFile, const UCHAR *aucHeader)
167 {
168 	section_block_type	tSection;
169 	UCHAR	*aucBuffer;
170 	ULONG	ulBeginOfText, ulTextOffset, ulBeginSectInfo;
171 	ULONG	ulCharPos, ulSectPage, ulBeginNextBlock;
172 	size_t	tSectInfoLen, tIndex, tSections, tBytes;
173 	UCHAR	aucTmp[2], aucFpage[35];
174 
175 	fail(pFile == NULL || aucHeader == NULL);
176 
177 	ulBeginOfText = 128;
178 	NO_DBG_HEX(ulBeginOfText);
179 	ulBeginSectInfo = 128 * (ULONG)usGetWord(0x18, aucHeader);
180 	DBG_HEX(ulBeginSectInfo);
181 	ulBeginNextBlock = 128 * (ULONG)usGetWord(0x1a, aucHeader);
182 	DBG_HEX(ulBeginNextBlock);
183 	if (ulBeginSectInfo == ulBeginNextBlock) {
184 		/* There is no section information block */
185 		return;
186 	}
187 
188 	/* Get the the number of sections */
189 	if (!bReadBytes(aucTmp, 2, ulBeginSectInfo, pFile)) {
190 		return;
191 	}
192 	tSections = (size_t)usGetWord(0, aucTmp);
193 	NO_DBG_DEC(tSections);
194 
195 	/* Read the Section Descriptors */
196 	tSectInfoLen = 10 * tSections;
197 	NO_DBG_DEC(tSectInfoLen);
198 	aucBuffer = xmalloc(tSectInfoLen);
199 	if (!bReadBytes(aucBuffer, tSectInfoLen, ulBeginSectInfo + 4, pFile)) {
200 		aucBuffer = xfree(aucBuffer);
201 		return;
202 	}
203 	NO_DBG_PRINT_BLOCK(aucBuffer, tSectInfoLen);
204 
205 	/* Read the Section Properties */
206 	for (tIndex = 0; tIndex < tSections; tIndex++) {
207 		ulTextOffset = ulGetLong(10 * tIndex, aucBuffer);
208 		NO_DBG_HEX(ulTextOffset);
209 		ulCharPos = ulBeginOfText + ulTextOffset;
210 		NO_DBG_HEX(ulTextOffset);
211 		ulSectPage = ulGetLong(10 * tIndex + 6, aucBuffer);
212 		NO_DBG_HEX(ulSectPage);
213 		if (ulSectPage == FC_INVALID ||		/* Must use defaults */
214 		    ulSectPage < 128 ||			/* Should not happen */
215 		    ulSectPage >= ulBeginSectInfo) {	/* Should not happen */
216 			DBG_HEX_C(ulSectPage != FC_INVALID, ulSectPage);
217 			vDefault2SectionInfoList(ulCharPos);
218 			continue;
219 		}
220 		/* Get the number of bytes to read */
221 		if (!bReadBytes(aucTmp, 1, ulSectPage, pFile)) {
222 			continue;
223 		}
224 		tBytes = 1 + (size_t)ucGetByte(0, aucTmp);
225 		NO_DBG_DEC(tBytes);
226 		if (tBytes > sizeof(aucFpage)) {
227 			DBG_DEC(tBytes);
228 			tBytes = sizeof(aucFpage);
229 		}
230 		/* Read the bytes */
231 		if (!bReadBytes(aucFpage, tBytes, ulSectPage, pFile)) {
232 			continue;
233 		}
234 		NO_DBG_PRINT_BLOCK(aucFpage, tBytes);
235 		/* Process the bytes */
236 		vGetDefaultSection(&tSection);
237 		vGet0SectionInfo(aucFpage + 1, tBytes - 1, &tSection);
238 		vAdd2SectionInfoList(&tSection, ulCharPos);
239 	}
240 	/* Clean up before you leave */
241 	aucBuffer = xfree(aucBuffer);
242 } /* end of vGet0SepInfo */
243 
244 /*
245  * Fill the style information block with information
246  * from a Word for DOS file.
247  */
248 static void
vGet0StyleInfo(int iFodo,const UCHAR * aucGrpprl,style_block_type * pStyle)249 vGet0StyleInfo(int iFodo, const UCHAR *aucGrpprl, style_block_type *pStyle)
250 {
251 	int	iBytes;
252 	UCHAR	ucTmp;
253 
254 	fail(iFodo <= 0 || aucGrpprl == NULL || pStyle == NULL);
255 
256 	pStyle->usIstdNext = ISTD_NORMAL;
257 
258 	iBytes = (int)ucGetByte(iFodo, aucGrpprl);
259 	if (iBytes < 1) {
260 		return;
261 	}
262 	/* stc if styled */
263 	ucTmp = ucGetByte(iFodo + 1, aucGrpprl);
264 	if ((ucTmp & BIT(0)) != 0) {
265 		ucTmp >>= 1;
266 		if (ucTmp >= 88 && ucTmp <= 94) {
267 			/* Header levels 1 through 7 */
268 			pStyle->usIstd = ucTmp - 87;
269 			pStyle->ucNumLevel = 1;
270 		}
271 	}
272 	if (iBytes < 2) {
273 		return;
274 	}
275 	/* jc */
276 	ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
277 	pStyle->ucAlignment = ucTmp & 0x02;
278 	if (iBytes < 3) {
279 		return;
280 	}
281 	/* stc */
282 	ucTmp = ucGetByte(iFodo + 3, aucGrpprl);
283 	ucTmp &= 0x7f;
284 	if (ucTmp >= 88 && ucTmp <= 94) {
285 		/* Header levels 1 through 7 */
286 		pStyle->usIstd = ucTmp - 87;
287 		pStyle->ucNumLevel = 1;
288 	}
289 	if (iBytes < 6) {
290 		return;
291 	}
292 	/* dxaRight */
293 	pStyle->sRightIndent = (short)usGetWord(iFodo + 5, aucGrpprl);
294 	NO_DBG_DEC(pStyle->sRightIndent);
295 	if (iBytes < 8) {
296 		return;
297 	}
298 	/* dxaLeft */
299 	pStyle->sLeftIndent = (short)usGetWord(iFodo + 7, aucGrpprl);
300 	NO_DBG_DEC(pStyle->sLeftIndent);
301 	if (iBytes < 10) {
302 		return;
303 	}
304 	/* dxaLeft1 */
305 	pStyle->sLeftIndent1 = (short)usGetWord(iFodo + 9, aucGrpprl);
306 	NO_DBG_DEC(pStyle->sLeftIndent1);
307 	if (iBytes < 14) {
308 		return;
309 	}
310 	/* dyaBefore */
311 	pStyle->usBeforeIndent = usGetWord(iFodo + 13, aucGrpprl);
312 	NO_DBG_DEC(pStyle->usBeforeIndent);
313 	if (iBytes < 16) {
314 		return;
315 	}
316 	/* dyaAfter */
317 	pStyle->usAfterIndent = usGetWord(iFodo + 15, aucGrpprl);
318 	NO_DBG_DEC(pStyle->usAfterIndent);
319 } /* end of vGet0StyleInfo */
320 
321 /*
322  * Build the lists with Paragraph Information for Word for DOS files
323  */
324 void
vGet0PapInfo(FILE * pFile,const UCHAR * aucHeader)325 vGet0PapInfo(FILE *pFile, const UCHAR *aucHeader)
326 {
327 	style_block_type	tStyle;
328 	ULONG	ulBeginParfInfo, ulCharPos, ulCharPosNext;
329 	int	iIndex, iRun, iFodo;
330 	UCHAR	aucFpage[128];
331 
332 	fail(pFile == NULL || aucHeader == NULL);
333 
334 	ulBeginParfInfo = 128 * (ULONG)usGetWord(0x12, aucHeader);
335 	NO_DBG_HEX(ulBeginParfInfo);
336 
337 	do {
338 		if (!bReadBytes(aucFpage, 128, ulBeginParfInfo, pFile)) {
339 			return;
340 		}
341 		NO_DBG_PRINT_BLOCK(aucFpage, 128);
342 		ulCharPosNext = ulGetLong(0, aucFpage);
343 		iRun = (int)ucGetByte(0x7f, aucFpage);
344 		NO_DBG_DEC(iRun);
345 		for (iIndex = 0; iIndex < iRun; iIndex++) {
346 			iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
347 			if (iFodo <= 0 || iFodo > 0x79) {
348 				DBG_DEC_C(iFodo != (int)0xffff, iFodo);
349 				continue;
350 			}
351 			vFillStyleFromStylesheet(0, &tStyle);
352 			vGet0StyleInfo(iFodo, aucFpage + 4, &tStyle);
353 			ulCharPos = ulCharPosNext;
354 			ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
355 			tStyle.ulFileOffset = ulCharPos;
356 			vAdd2StyleInfoList(&tStyle);
357 		}
358 		ulBeginParfInfo += 128;
359 	} while (ulCharPosNext == ulBeginParfInfo);
360 } /* end of vGet0PapInfo */
361 
362 /*
363  * Fill the font information block with information
364  * from a Word for DOS file.
365  */
366 static void
vGet0FontInfo(int iFodo,const UCHAR * aucGrpprl,font_block_type * pFont)367 vGet0FontInfo(int iFodo, const UCHAR *aucGrpprl, font_block_type *pFont)
368 {
369 	int	iBytes;
370 	UCHAR	ucTmp;
371 
372 	fail(iFodo <= 0 || aucGrpprl == NULL || pFont == NULL);
373 
374 	iBytes = (int)ucGetByte(iFodo, aucGrpprl);
375 	if (iBytes < 2) {
376 		return;
377 	}
378 	/* fBold, fItalic, cFtc */
379 	ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
380 	if ((ucTmp & BIT(0)) != 0) {
381 		pFont->usFontStyle |= FONT_BOLD;
382 	}
383 	if ((ucTmp & BIT(1)) != 0) {
384 		pFont->usFontStyle |= FONT_ITALIC;
385 	}
386 	pFont->ucFontNumber = ucTmp >> 2;
387 	NO_DBG_DEC(pFont->ucFontNumber);
388 	if (iBytes < 3) {
389 		return;
390 	}
391 	/* cHps */
392 	pFont->usFontSize = (USHORT)ucGetByte(iFodo + 3, aucGrpprl);
393 	NO_DBG_DEC(pFont->usFontSize);
394 	if (iBytes < 4) {
395 		return;
396 	}
397 	/* cKul, fStrike, fCaps, fSmallCaps, fVanish */
398 	ucTmp = ucGetByte(iFodo + 4, aucGrpprl);
399 	if ((ucTmp & BIT(0)) != 0 || (ucTmp & BIT(2)) != 0) {
400 		pFont->usFontStyle |= FONT_UNDERLINE;
401 	}
402 	if ((ucTmp & BIT(1)) != 0) {
403 		pFont->usFontStyle |= FONT_STRIKE;
404 	}
405 	if ((ucTmp & BIT(4)) != 0) {
406 		pFont->usFontStyle |= FONT_CAPITALS;
407 	}
408 	if ((ucTmp & BIT(5)) != 0) {
409 		pFont->usFontStyle |= FONT_SMALL_CAPITALS;
410 	}
411 	if ((ucTmp & BIT(7)) != 0) {
412 		pFont->usFontStyle |= FONT_HIDDEN;
413 	}
414 	DBG_HEX(pFont->usFontStyle);
415 	if (iBytes < 6) {
416 		return;
417 	}
418 	/* cIss */
419 	ucTmp = ucGetByte(iFodo + 6, aucGrpprl);
420 	if (ucTmp != 0) {
421 		if (ucTmp < 128) {
422 			pFont->usFontStyle |= FONT_SUPERSCRIPT;
423 			DBG_MSG("Superscript");
424 		} else {
425 			pFont->usFontStyle |= FONT_SUBSCRIPT;
426 			DBG_MSG("Subscript");
427 		}
428 	}
429 	if (iBytes < 7) {
430 		return;
431 	}
432 	/* cIco */
433 	ucTmp = ucGetByte(iFodo + 7, aucGrpprl);
434 	switch (ucTmp & 0x07) {
435 	case 0: pFont->ucFontColor = FONT_COLOR_BLACK; break;
436 	case 1: pFont->ucFontColor = FONT_COLOR_RED; break;
437 	case 2: pFont->ucFontColor = FONT_COLOR_GREEN; break;
438 	case 3: pFont->ucFontColor = FONT_COLOR_BLUE; break;
439 	case 4: pFont->ucFontColor = FONT_COLOR_CYAN; break;
440 	case 5: pFont->ucFontColor = FONT_COLOR_MAGENTA; break;
441 	case 6: pFont->ucFontColor = FONT_COLOR_YELLOW; break;
442 	case 7: pFont->ucFontColor = FONT_COLOR_WHITE; break;
443 	default:pFont->ucFontColor = FONT_COLOR_BLACK; break;
444 	}
445 	NO_DBG_DEC(pFont->ucFontColor);
446 } /* end of vGet0FontInfo */
447 
448 /*
449  * Build the lists with Character Information for Word for DOS files
450  */
451 void
vGet0ChrInfo(FILE * pFile,const UCHAR * aucHeader)452 vGet0ChrInfo(FILE *pFile, const UCHAR *aucHeader)
453 {
454 	font_block_type		tFont;
455 	ULONG	ulBeginCharInfo, ulCharPos, ulCharPosNext;
456 	int	iIndex, iRun, iFodo;
457 	UCHAR	aucFpage[128];
458 
459 	fail(pFile == NULL || aucHeader == NULL);
460 
461 	ulBeginCharInfo = ulGetLong(0x0e, aucHeader);
462 	NO_DBG_HEX(ulBeginCharInfo);
463 	ulBeginCharInfo = ROUND128(ulBeginCharInfo);
464 	NO_DBG_HEX(ulBeginCharInfo);
465 
466 	do {
467 		if (!bReadBytes(aucFpage, 128, ulBeginCharInfo, pFile)) {
468 			return;
469 		}
470 		NO_DBG_PRINT_BLOCK(aucFpage, 128);
471 		ulCharPosNext = ulGetLong(0, aucFpage);
472 		iRun = (int)ucGetByte(0x7f, aucFpage);
473 		NO_DBG_DEC(iRun);
474 		for (iIndex = 0; iIndex < iRun; iIndex++) {
475 			iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
476 			if (iFodo <= 0 || iFodo > 0x79) {
477 				DBG_DEC_C(iFodo != (int)0xffff, iFodo);
478 				continue;
479 			}
480 			vFillFontFromStylesheet(0, &tFont);
481 			vGet0FontInfo(iFodo, aucFpage + 4, &tFont);
482 			ulCharPos = ulCharPosNext;
483 			ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
484 			tFont.ulFileOffset = ulCharPos;
485 			vAdd2FontInfoList(&tFont);
486 		}
487 		ulBeginCharInfo += 128;
488 	} while (ulCharPosNext == ulBeginCharInfo);
489 } /* end of vGet0ChrInfo */
490