1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
2 /* libwps
3  * Version: MPL 2.0 / LGPLv2.1+
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * Major Contributor(s):
10  * Copyright (C) 2015 Sean Young <sean@mess.org>
11  *
12  * For minor contributions see the git repository.
13  *
14  * Alternatively, the contents of this file may be used under the terms
15  * of the GNU Lesser General Public License Version 2.1 or later
16  * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
17  * applicable instead of those above.
18  *
19  * For further information visit http://libwps.sourceforge.net
20  */
21 
22 #include <librevenge-stream/librevenge-stream.h>
23 #include "libwps_internal.h"
24 
25 #include "WPSContentListener.h"
26 #include "WPSParagraph.h"
27 #include "WPSFont.h"
28 #include "WPSHeader.h"
29 
30 #include "MSWrite.h"
31 #include "DosWord.h"
32 
33 #include <algorithm>
34 #include <cstring>
35 #include <cstdio>
36 
37 namespace DosWordParserInternal
38 {
39 
40 struct CHP
41 {
CHPDosWordParserInternal::CHP42 	CHP()
43 		: m_fStyled(0)
44 		, m_fBold(0)
45 		, m_hps(0)
46 		, m_fUline(0)
47 		, m_unused(0)
48 		, m_hpsPos(0)
49 		, m_clr(0)
50 	{
51 	}
52 	uint8_t	m_fStyled;
53 	uint8_t	m_fBold;
54 	uint8_t	m_hps;
55 	uint8_t	m_fUline;
56 	uint8_t	m_unused;
57 	uint8_t	m_hpsPos;
58 	uint8_t m_clr;
59 };
60 
61 struct PAP
62 {
63 	//! constructor
PAPDosWordParserInternal::PAP64 	PAP()
65 		: m_style(0)
66 		, m_justification(0)
67 		, m_dxaRight(0)
68 		, m_dxaLeft(0)
69 		, m_dxaLeft1(0)
70 		, m_dyaLine(0)
71 		, m_dyaBefore(0)
72 		, m_dyaAfter(0)
73 		, m_rhc(0)
74 		, m_border(0)
75 		, m_shade(0)
76 		, m_pos(0)
77 		, m_dxaFromText(0)
78 	{
79 		m_reserved2[0]=m_reserved2[1]=0;
80 	}
81 	uint8_t	m_style;
82 	uint8_t	m_justification;
83 	uint8_t	m_reserved2[2];
84 	uint16_t m_dxaRight, m_dxaLeft, m_dxaLeft1, m_dyaLine;
85 	uint16_t m_dyaBefore, m_dyaAfter;
86 	uint8_t	m_rhc;
87 	uint8_t m_border;
88 	uint8_t m_shade;
89 	uint8_t m_pos;
90 	uint16_t m_dxaFromText;
91 	struct TBD
92 	{
TBDDosWordParserInternal::PAP::TBD93 		TBD()
94 			: m_dxa(0)
95 			, m_jcTab(0)
96 			, m_chAlign(0)
97 		{
98 		}
99 		uint16_t    m_dxa;
100 		uint8_t	    m_jcTab;
101 		uint8_t	    m_chAlign;
102 	} m_TBD[20];
103 };
104 
105 // the file header offsets
106 enum HeaderOffset
107 {
108 	HEADER_W_WIDENT = 0,
109 	HEADER_W_DTY = 2,
110 	HEADER_W_WTOOL = 4,
111 	HEADER_D_FCMAC = 14,
112 	HEADER_W_PNPARA = 18,
113 	HEADER_W_PNFNTB = 20,
114 	HEADER_W_PNBKMK = 22,
115 	HEADER_W_PNSETB = 24,
116 	HEADER_W_PNBFTB = 26,
117 	HEADER_W_PNSUMD = 28,
118 	HEADER_W_PNMAC = 106,
119 	HEADER_B_VERSION = 116,
120 	HEADER_B_ASV = 117,
121 	HEADER_W_CODEPAGE = 126
122 };
123 
124 }
125 
DosWordParser(RVNGInputStreamPtr & input,WPSHeaderPtr & header,libwps_tools_win::Font::Type encoding)126 DosWordParser::DosWordParser(RVNGInputStreamPtr &input, WPSHeaderPtr &header,
127                              libwps_tools_win::Font::Type encoding)
128 	: MSWriteParser(input, header, encoding)
129 {
130 	m_fontType = getFileEncoding(encoding);
131 }
132 
~DosWordParser()133 DosWordParser::~DosWordParser()
134 {
135 }
136 
readFFNTB()137 void DosWordParser::readFFNTB()
138 {
139 	// Microsoft Word for DOS does not have this section
140 	WPS_DEBUG_MSG(("DosWordParser::readFFNTB: unexpected\n"));
141 }
142 
143 // basic function to check if the header is ok
checkHeader(WPSHeader * header,bool)144 bool DosWordParser::checkHeader(WPSHeader *header, bool /*strict*/)
145 {
146 	RVNGInputStreamPtr input = getInput();
147 	if (!input || !checkFilePosition(0x100))
148 	{
149 		WPS_DEBUG_MSG(("DosWordParser::checkHeader: file is too short\n"));
150 		return false;
151 	}
152 
153 	input->seek(DosWordParserInternal::HEADER_B_ASV, librevenge::RVNG_SEEK_SET);
154 	if (libwps::readU8(input) & 2)
155 	{
156 		WPS_DEBUG_MSG(("DosWordParser::checkHeader: file is autosaved\n"));
157 		return false;
158 	}
159 
160 	input->seek(DosWordParserInternal::HEADER_B_VERSION, librevenge::RVNG_SEEK_SET);
161 	uint8_t ver = libwps::readU8(input);
162 
163 	switch (ver)
164 	{
165 	case 0:
166 		WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 4.0 or earlier\n"));
167 		header->setMajorVersion(4);
168 		break;
169 	case 3:
170 		WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 5 OEM\n"));
171 		header->setMajorVersion(5);
172 		break;
173 	case 4:
174 		WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 5\n"));
175 		header->setMajorVersion(5);
176 		break;
177 	case 7:
178 		WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 5.5\n"));
179 		header->setMajorVersion(5);
180 		break;
181 	case 9:
182 		WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 6.0\n"));
183 		header->setMajorVersion(6);
184 		break;
185 	default:
186 		WPS_DEBUG_MSG(("DosWordParser::checkHeader: unknown version %u\n", ver));
187 		break;
188 	}
189 
190 	input->seek(DosWordParserInternal::HEADER_W_CODEPAGE, librevenge::RVNG_SEEK_SET);
191 	uint16_t codepage = libwps::readU16(input);
192 
193 	if (!codepage)
194 		header->setNeedEncoding(true);
195 
196 	return true;
197 }
198 
getFileEncoding(libwps_tools_win::Font::Type encoding)199 libwps_tools_win::Font::Type DosWordParser::getFileEncoding(libwps_tools_win::Font::Type encoding)
200 {
201 	RVNGInputStreamPtr input = getInput();
202 
203 	input->seek(DosWordParserInternal::HEADER_W_CODEPAGE, librevenge::RVNG_SEEK_SET);
204 	uint16_t codepage = libwps::readU16(input);
205 
206 	WPS_DEBUG_MSG(("DosWordParser::getFileEncoding: codepage %u\n", codepage));
207 	if (codepage)
208 		encoding = libwps_tools_win::Font::getTypeForOEM(codepage);
209 
210 	if (encoding == libwps_tools_win::Font::UNKNOWN)
211 		encoding = libwps_tools_win::Font::CP_437;
212 
213 	return encoding;
214 }
215 
color(int clr)216 WPSColor DosWordParser::color(int clr)
217 {
218 	switch (clr)
219 	{
220 	default:
221 	case 0: // black (default)
222 		return WPSColor(0, 0, 0);
223 	case 1: // red
224 		return WPSColor(255, 0, 0);
225 	case 2: // green
226 		return WPSColor(0, 255, 0);
227 	case 3: // blue
228 		return WPSColor(0, 0, 255);
229 	case 4: // violet
230 		return WPSColor(127, 0, 255);
231 	case 5: // magenta
232 		return WPSColor(255, 0, 255);
233 	case 6: // yellow
234 		return WPSColor(0, 255, 255);
235 	case 7: // white
236 		return WPSColor(255, 255, 255);
237 	}
238 }
239 
readCHP(uint32_t fcFirst,uint32_t fcLim,unsigned cch)240 void DosWordParser::readCHP(uint32_t fcFirst, uint32_t fcLim, unsigned cch)
241 {
242 	RVNGInputStreamPtr input = getInput();
243 
244 	DosWordParserInternal::CHP chp;
245 
246 	chp.m_hps = 24;
247 
248 	if (cch)
249 	{
250 		if (cch > sizeof(chp))
251 			cch = sizeof(chp);
252 
253 		unsigned long read_bytes;
254 		const unsigned char *p = input->read(cch, read_bytes);
255 		if (read_bytes != cch)
256 		{
257 			WPS_DEBUG_MSG(("DosWordParser::readCHP failed to read CHP entry\n"));
258 			throw (libwps::ParseException());
259 		}
260 
261 		memcpy(&chp, p, cch);
262 	}
263 
264 	MSWriteParserInternal::Font font;
265 
266 	if (chp.m_fStyled & 1)
267 	{
268 		switch (chp.m_fStyled / 2)
269 		{
270 		case 13: // footnote reference
271 			font.m_footnote = true;
272 			break;
273 		case 26: // annotation reference
274 			font.m_annotation = true;
275 			break;
276 		default:
277 			WPS_DEBUG_MSG(("Style sheet stc=%u %x-%x\n", chp.m_fStyled / 2, fcFirst, fcLim));
278 			break;
279 		}
280 	}
281 
282 	unsigned ftc = (chp.m_fBold / 4);
283 
284 	// Note the font depends on the printer driver
285 	if (ftc <= 15)
286 		font.m_name.sprintf("modern %c", 'a' + ftc);
287 	else if (ftc <= 31)
288 		font.m_name.sprintf("roman %c", 'a' + (ftc - 16));
289 	else if (ftc <= 39)
290 		font.m_name.sprintf("script %c", 'a' + (ftc - 32));
291 	else if (ftc <= 47)
292 		font.m_name.sprintf("foreign %c", 'a' + (ftc - 40));
293 	else if (ftc <= 55)
294 		font.m_name.sprintf("decor %c", 'a' + (ftc - 48));
295 	else
296 		font.m_name.sprintf("symbol %c", 'a' + (ftc - 56));
297 
298 	font.m_size = chp.m_hps / 2.0;
299 	if (chp.m_fBold & 1)
300 		font.m_attributes |= WPS_BOLD_BIT;
301 	if (chp.m_fBold & 2)
302 		font.m_attributes |= WPS_ITALICS_BIT;
303 	if (chp.m_fUline & 1)
304 		font.m_attributes |= WPS_UNDERLINE_BIT;
305 	if (chp.m_fUline & 2)
306 		font.m_attributes |= WPS_STRIKEOUT_BIT;
307 	if (chp.m_fUline & 4)
308 		font.m_attributes |= WPS_DOUBLE_UNDERLINE_BIT;
309 	// FIXME: if (chp.m_fUline & 8) marks a text new (not accepted)
310 	if ((chp.m_fUline & 0x30) == 0x10)
311 		font.m_attributes |= WPS_ALL_CAPS_BIT;
312 	else if ((chp.m_fUline & 0x30) == 0x30)
313 		font.m_attributes |= WPS_SMALL_CAPS_BIT;
314 	if (chp.m_fUline & 0x40)
315 		font.m_special = true;
316 	if (chp.m_fUline & 0x80)
317 		font.m_attributes |= WPS_HIDDEN_BIT;
318 	if (chp.m_hpsPos)
319 	{
320 		if (chp.m_hpsPos & 0x80)
321 			font.m_attributes |= WPS_SUBSCRIPT_BIT;
322 		else
323 			font.m_attributes |= WPS_SUPERSCRIPT_BIT;
324 	}
325 
326 	font.m_fcFirst = fcFirst;
327 	font.m_fcLim = fcLim;
328 	font.m_encoding = libwps_tools_win::Font::getFontType(font.m_name);
329 	if (font.m_encoding == libwps_tools_win::Font::UNKNOWN)
330 		font.m_encoding = m_fontType;
331 
332 	font.m_color = color(chp.m_clr & 7);
333 
334 	m_fontList.push_back(font);
335 }
336 
readPAP(uint32_t fcFirst,uint32_t fcLim,unsigned cch)337 void DosWordParser::readPAP(uint32_t fcFirst, uint32_t fcLim, unsigned cch)
338 {
339 	RVNGInputStreamPtr input = getInput();
340 
341 	DosWordParserInternal::PAP pap;
342 
343 	WPS_LE_PUT_GUINT16(&pap.m_dyaLine, 240);
344 
345 	if (cch)
346 	{
347 		if (cch > sizeof(pap))
348 			cch = sizeof(pap);
349 
350 		unsigned long read_bytes;
351 		const unsigned char *p = input->read(cch, read_bytes);
352 		if (read_bytes != cch)
353 		{
354 			WPS_DEBUG_MSG(("DosWordParser::readPAP failed to read PAP\n"));
355 			throw (libwps::ParseException());
356 		}
357 
358 		memcpy(&pap, p, cch);
359 	}
360 
361 	auto dxaLeft = int16_t(WPS_LE_GET_GUINT16(&pap.m_dxaLeft));
362 	auto dxaLeft1 = int16_t(WPS_LE_GET_GUINT16(&pap.m_dxaLeft1));
363 	auto dxaRight = int16_t(WPS_LE_GET_GUINT16(&pap.m_dxaRight));
364 
365 	MSWriteParserInternal::Paragraph para;
366 	int i;
367 
368 	for (i=0; i<20; i++)
369 	{
370 		auto pos = WPS_LE_GET_GUINT16(&pap.m_TBD[i].m_dxa);
371 
372 		if (!pos)
373 			break;
374 
375 		WPSTabStop::Alignment align;
376 
377 		switch (pap.m_TBD[i].m_jcTab & 3)
378 		{
379 		default:
380 		case 0:
381 			align = WPSTabStop::LEFT;
382 			break;
383 		case 1:
384 			align = WPSTabStop::CENTER;
385 			break;
386 		case 2:
387 			align = WPSTabStop::RIGHT;
388 			break;
389 		case 3:
390 			align = WPSTabStop::DECIMAL;
391 			break;
392 		}
393 
394 		unsigned leader = (pap.m_TBD[i].m_jcTab >> 3) & 3;
395 		WPSTabStop tab(pos / 1440., align, uint16_t("\0.-_"[leader]));
396 
397 		para.m_tabs.push_back(tab);
398 
399 		if (dxaLeft + dxaLeft1 == pos)
400 			para.m_skiptab = true;
401 	}
402 
403 	switch (pap.m_justification & 3)
404 	{
405 	default:
406 	case 0:
407 		para.m_justify = libwps::JustificationLeft;
408 		break;
409 	case 1:
410 		para.m_justify = libwps::JustificationCenter;
411 		break;
412 	case 2:
413 		para.m_justify = libwps::JustificationRight;
414 		break;
415 	case 3:
416 		para.m_justify = libwps::JustificationFull;
417 		break;
418 	}
419 
420 	para.m_margins[0] = dxaLeft1 / 1440.0;
421 	para.m_margins[1] = dxaLeft / 1440.0;
422 	para.m_margins[2] = dxaRight / 1440.0;
423 
424 	// spacings
425 	auto dyaLine = int16_t(WPS_LE_GET_GUINT16(&pap.m_dyaLine));
426 	auto dyaBefore = WPS_LE_GET_GUINT16(&pap.m_dyaBefore);
427 	auto dyaAfter = WPS_LE_GET_GUINT16(&pap.m_dyaAfter);
428 	// dyaLine = -40 means "auto"
429 	if (dyaLine > 0)
430 		para.m_spacings[0] = dyaLine / 240.0;
431 	para.m_spacings[1] = dyaBefore / 240.0;
432 	para.m_spacings[2] = dyaAfter / 240.0;
433 
434 	para.m_fcFirst = fcFirst;
435 	para.m_fcLim = fcLim;
436 
437 	if (pap.m_rhc & 0xe)
438 	{
439 		if (pap.m_rhc & 1)
440 			para.m_Location = MSWriteParserInternal::Paragraph::FOOTER;
441 		else
442 			para.m_Location = MSWriteParserInternal::Paragraph::HEADER;
443 
444 		switch ((pap.m_rhc >> 1) & 3)
445 		{
446 		default:
447 		case 3: // all
448 			para.m_HeaderFooterOccurrence = WPSPageSpan::ALL;
449 			break;
450 		case 2: // even
451 			para.m_HeaderFooterOccurrence = WPSPageSpan::EVEN;
452 			break;
453 		case 1: // odd
454 			para.m_HeaderFooterOccurrence = WPSPageSpan::ODD;
455 			break;
456 		case 0: // never; however might be on first page
457 			para.m_HeaderFooterOccurrence = WPSPageSpan::NEVER;
458 			break;
459 		}
460 
461 		para.m_firstpage = (pap.m_rhc & 0x08) != 0;
462 	}
463 
464 	if (pap.m_justification & 0x20)
465 		para.m_headerUseMargin = true;
466 
467 	if (pap.m_style & 1)
468 	{
469 		switch (pap.m_style / 2)
470 		{
471 		case 39: // footnote
472 		case 87: // annotation
473 			para.m_Location = MSWriteParserInternal::Paragraph::FOOTNOTE;
474 			break;
475 		default:
476 			WPS_DEBUG_MSG(("DosWordParser::readPAP pap unknown style stc=%u %x-%x\n", pap.m_style / 2, fcFirst, fcLim));
477 			break;
478 		}
479 	}
480 
481 	// Borders
482 	if (pap.m_rhc & 0x30)
483 	{
484 		if ((pap.m_rhc & 0x30) == 0x10)
485 			para.m_border = 15;
486 		else
487 			para.m_border = pap.m_border & 15;
488 
489 		WPSBorder::Type type = WPSBorder::Single;
490 		int width = 1;
491 
492 		switch (pap.m_rhc & 0xc0)
493 		{
494 		default:
495 		case 0:	// normal
496 			break;
497 		case 0x40: // bold
498 			width = 2;
499 			break;
500 		case 0x80: // Double
501 			type = WPSBorder::Double;
502 			break;
503 		case 0xc0: // thick
504 			width = 8;
505 			break;
506 		}
507 
508 		para.m_borderStyle.m_type = type;
509 		para.m_borderStyle.m_width = width;
510 		para.m_borderStyle.m_color = color((pap.m_border / 16) & 7);
511 	}
512 
513 	if (pap.m_justification & 4)
514 		para.m_breakStatus |= libwps::NoBreakBit;
515 	if (pap.m_justification & 8)
516 		para.m_breakStatus |= libwps::NoBreakWithNextBit;
517 
518 	// paragraph shading
519 	if (pap.m_shade & 0x7f)
520 	{
521 		WPSColor c = color((pap.m_pos >> 4) & 7);
522 
523 		unsigned percent = std::min(pap.m_shade & 0x7fu, 100u);
524 
525 		// Use percent to increase brightness
526 		// 100% means color stays the same
527 		// 0% means white
528 
529 		unsigned add = (255 * (100 - percent)) / 100;
530 
531 		para.m_backgroundColor = WPSColor(
532 		                             static_cast<unsigned char>(std::min(c.getRed() + add, 255u)),
533 		                             static_cast<unsigned char>(std::min(c.getGreen() + add, 255u)),
534 		                             static_cast<unsigned char>(std::min(c.getBlue() + add, 255u)));
535 	}
536 
537 	// FIXME: side-by-side
538 	// FIXME: paragraph position
539 
540 	m_paragraphList.push_back(para);
541 }
542 
insertSpecial(uint8_t val,uint32_t fc,MSWriteParserInternal::Paragraph::Location location)543 void DosWordParser::insertSpecial(uint8_t val, uint32_t fc, MSWriteParserInternal::Paragraph::Location location)
544 {
545 	librevenge::RVNGString empty;
546 
547 	switch (val)
548 	{
549 	case 1: // page name
550 		m_listener->insertField(WPSField(WPSField::PageNumber));
551 		break;
552 	case 2: // current date
553 		m_listener->insertField(WPSField(WPSField::Date));
554 		break;
555 	case 3: // current time
556 		m_listener->insertField(WPSField(WPSField::Time));
557 		break;
558 	case 4: // annotation reference
559 		if (location == MSWriteParserInternal::Paragraph::MAIN)
560 			insertNote(true, fc, empty);
561 		break;
562 	case 5: // footnote reference
563 		if (location == MSWriteParserInternal::Paragraph::MAIN)
564 			insertNote(false, fc, empty);
565 		break;
566 	case 7: // sequence mark
567 		WPS_DEBUG_MSG(("Sequence mark\n"));
568 		break;
569 	case 8: // sequence reference mark
570 		WPS_DEBUG_MSG(("Sequence reference mark\n"));
571 		break;
572 	case 9: // next page
573 		m_listener->insertField(WPSField(WPSField::PageNumberNext));
574 		break;
575 	default:
576 		WPS_DEBUG_MSG(("DosWordParser::insertSpecial: unknown special %u encountered\n", val));
577 		break;
578 	}
579 }
580 
insertControl(uint8_t val,uint32_t fc)581 void DosWordParser::insertControl(uint8_t val, uint32_t fc)
582 {
583 	// 0xc4 = normal hyphen, 0xff = nbsp, already handled by cp437
584 	switch (val)
585 	{
586 	case 9:
587 		m_listener->insertTab();
588 		break;
589 	case 10:
590 	case 11:
591 		m_listener->insertEOL();
592 		break;
593 	case 12:
594 	{
595 		// each section is ended with 0x0c but no page break might
596 		// be required
597 		for (auto const &s : m_sections)
598 		{
599 			if (fc + 1 == s.m_fcLim)
600 			{
601 				switch (s.m_bkc)
602 				{
603 				case 0:
604 					break;
605 				case 1:
606 					m_listener->insertBreak(WPS_COLUMN_BREAK);
607 					break;
608 				default:
609 					m_listener->insertBreak(WPS_PAGE_BREAK);
610 					break;
611 				}
612 				return;
613 			}
614 		}
615 
616 		m_listener->insertBreak(WPS_PAGE_BREAK);
617 		break;
618 	}
619 	case 13: // carriage return
620 		break;
621 	case 14: // column break
622 		m_listener->insertBreak(WPS_COLUMN_BREAK);
623 		break;
624 	case 15: // em-hyphen
625 		m_listener->insertUnicode(0x8212);
626 		break;
627 	case 31: // soft hyphen
628 		m_listener->insertUnicode(0xad);
629 		break;
630 	default:
631 		WPS_DEBUG_MSG(("DosWordParser::insertControl: unexpected control %u\n", val));
632 		break;
633 	}
634 }
635 
readSUMD()636 void DosWordParser::readSUMD()
637 {
638 	RVNGInputStreamPtr input = getInput();
639 
640 	input->seek(DosWordParserInternal::HEADER_W_PNSUMD, librevenge::RVNG_SEEK_SET);
641 	uint16_t pnSumd = libwps::readU16(input);
642 
643 	input->seek(DosWordParserInternal::HEADER_W_PNMAC, librevenge::RVNG_SEEK_SET);
644 	uint16_t pnMac = libwps::readU16(input);
645 
646 	if (!pnSumd || pnSumd == pnMac)
647 	{
648 		// No summary page
649 		return;
650 	}
651 
652 	/*
653 	 * The page starts with 9 uint16_t values which are offsets. Sometimes
654 	 * the page contains garbage; when it does not, the first offset is 0.
655 	 */
656 	uint32_t fc = pnSumd * 0x80;
657 	int i;
658 
659 	if (!checkFilePosition(fc + 20))
660 	{
661 		WPS_DEBUG_MSG(("DosWordParser::readSUMD: summary missing\n"));
662 		return;
663 	}
664 
665 	input->seek(long(fc), librevenge::RVNG_SEEK_SET);
666 	if (libwps::readU16(input) >= 0x80)
667 	{
668 		WPS_DEBUG_MSG(("DosWordParser::readSUMD: garbage\n"));
669 		return;
670 	}
671 
672 	// Step over the offsets; it's packed together anyway.
673 	fc += 0x14;
674 
675 	static const char *sum_types[] =
676 	{
677 		"dc:title", // title
678 		"dc:creator", // author
679 		"dc:publisher", // operator
680 		"meta:keyword", // keywords
681 		"dc:description", // comments
682 		"librevenge:version-number", // version
683 		nullptr
684 	};
685 
686 	input->seek(long(fc), librevenge::RVNG_SEEK_SET);
687 
688 	for (i=0; sum_types[i]; i++)
689 	{
690 		std::string str;
691 
692 		for (;;)
693 		{
694 			if (!checkFilePosition(++fc))
695 			{
696 				WPS_DEBUG_MSG(("DosWordParser::readSUMD: summary missing\n"));
697 				return;
698 			}
699 
700 			auto ch = char(libwps::readU8(input));
701 			if (!ch)
702 				break;
703 			str.push_back(ch);
704 		}
705 
706 		if (str.size())
707 		{
708 			librevenge::RVNGString conv = libwps_tools_win::Font::unicodeString(str, m_fontType);
709 			WPS_DEBUG_MSG(("DosWordParser::readSUMD: %d %s\n", i, conv.cstr()));
710 			m_metaData.insert(sum_types[i], conv);
711 		}
712 	}
713 
714 	librevenge::RVNGString creationDate, revisionDate;
715 	int month, day, year;
716 
717 	for (i=0; i<8; i++)
718 	{
719 		if (!checkFilePosition(++fc))
720 		{
721 			WPS_DEBUG_MSG(("DosWordParser::readSUMD: summary missing\n"));
722 			return;
723 		}
724 
725 		auto ch = char(libwps::readU8(input));
726 		if (!ch)
727 			break;
728 		creationDate.append(ch);
729 	}
730 
731 	// Year is given in two decimals since 1900 so fudge any value
732 	// < 50 to be after 2000
733 	if (3 == sscanf(creationDate.cstr(), "%2d/%2d/%4d", &month, &day, &year))
734 	{
735 		librevenge::RVNGString str;
736 		if (year > 50)
737 			year += 1900;
738 		else
739 			year += 2000;
740 		str.sprintf("%d-%d-%d", year, month, day);
741 		m_metaData.insert("meta:creation-date", str);
742 	}
743 
744 	for (i=0; i<8; i++)
745 	{
746 		if (!checkFilePosition(++fc))
747 		{
748 			WPS_DEBUG_MSG(("DosWordParser::readSUMD: summary missing\n"));
749 			return;
750 		}
751 
752 		auto ch = char(libwps::readU8(input));
753 		if (!ch)
754 			break;
755 		revisionDate.append(ch);
756 	}
757 
758 	if (3 == sscanf(revisionDate.cstr(), "%2d/%2d/%4d", &month, &day, &year))
759 	{
760 		librevenge::RVNGString str;
761 		if (year > 50)
762 			year += 1900;
763 		else
764 			year += 2000;
765 		str.sprintf("%d-%d-%d", year, month, day);
766 		m_metaData.insert("dc:date", str);
767 	}
768 }
769 
readFNTB()770 void DosWordParser::readFNTB()
771 {
772 	RVNGInputStreamPtr input = getInput();
773 
774 	input->seek(DosWordParserInternal::HEADER_W_PNFNTB, librevenge::RVNG_SEEK_SET);
775 	uint16_t pnFntb = libwps::readU16(input);
776 
777 	input->seek(DosWordParserInternal::HEADER_W_PNBKMK, librevenge::RVNG_SEEK_SET);
778 	uint16_t pnBkmk = libwps::readU16(input);
779 
780 	if (pnFntb == 0 || pnFntb == pnBkmk)
781 		return;
782 
783 	uint32_t fc = pnFntb * 0x80;
784 
785 	if (!checkFilePosition(fc + 4))
786 	{
787 		WPS_DEBUG_MSG(("DosWordParser::readFNTB: footnote table missing\n"));
788 		return;
789 	}
790 
791 	input->seek(long(fc), librevenge::RVNG_SEEK_SET);
792 
793 	uint16_t noteCount = libwps::readU16(input);
794 	uint16_t noteCountAndDeleted = libwps::readU16(input);
795 
796 	if (noteCount > noteCountAndDeleted)
797 	{
798 		WPS_DEBUG_MSG(("DosWordParser::readFNTB: note counts do not makesense %u %u\n", noteCount, noteCountAndDeleted));
799 	}
800 
801 	for (unsigned note=0; note<noteCount; note++)
802 	{
803 		if (!checkFilePosition(fc + 8))
804 		{
805 			WPS_DEBUG_MSG(("DosWordParser::readFNTB: footnote %u missing\n", note));
806 			return;
807 		}
808 		fc += 8;
809 
810 		MSWriteParserInternal::Footnote footnote;
811 
812 		footnote.m_fcRef = libwps::readU32(input) + 0x80;
813 		footnote.m_fcFtn = libwps::readU32(input) + 0x80;
814 
815 		m_footnotes.push_back(footnote);
816 	}
817 }
818 
readSED()819 void DosWordParser::readSED()
820 {
821 	unsigned pnSetb, pnBftb;
822 	RVNGInputStreamPtr input = getInput();
823 
824 	input->seek(DosWordParserInternal::HEADER_W_PNSETB, librevenge::RVNG_SEEK_SET);
825 	pnSetb = libwps::readU16(input);
826 
827 	input->seek(DosWordParserInternal::HEADER_W_PNBFTB, librevenge::RVNG_SEEK_SET);
828 	pnBftb = libwps::readU16(input);
829 
830 	if (pnSetb && pnSetb != pnBftb)
831 	{
832 		if (!checkFilePosition(pnSetb * 0x80 + 4))
833 		{
834 			WPS_DEBUG_MSG(("Section is truncated\n"));
835 			throw (libwps::ParseException());
836 		}
837 
838 		input->seek(long(pnSetb) * 0x80, librevenge::RVNG_SEEK_SET);
839 		uint16_t cset = libwps::readU16(input);
840 
841 		// ignore csetMax
842 
843 		for (unsigned sed = 0; sed<cset; sed++)
844 		{
845 			if (!checkFilePosition(pnSetb * 0x80 + (sed + 1) * 10 + 4))
846 			{
847 				WPS_DEBUG_MSG(("is truncated\n"));
848 				throw (libwps::ParseException());
849 			}
850 
851 			input->seek(long(pnSetb * 0x80 + sed * 10 + 4), librevenge::RVNG_SEEK_SET);
852 
853 			uint32_t fcLim = libwps::readU32(input) + 0x80;
854 
855 			// unknown
856 			input->seek(2, librevenge::RVNG_SEEK_CUR);
857 
858 			uint32_t fcSep = libwps::readU32(input);
859 
860 			if (fcSep == 0xffffffff)
861 				break;
862 
863 			readSECT(fcSep, fcLim);
864 
865 			if (fcLim >= m_fcMac)
866 				break;
867 		}
868 	}
869 
870 	if (m_sections.empty() || m_sections.back().m_fcLim < m_fcMac)
871 	{
872 		// create default section by reading invalid fc
873 		readSECT(m_fileLength, m_fcMac);
874 	}
875 }
876 
readSECT(uint32_t fcSep,uint32_t fcLim)877 void DosWordParser::readSECT(uint32_t fcSep, uint32_t fcLim)
878 {
879 	RVNGInputStreamPtr input = getInput();
880 	MSWriteParserInternal::Section sep;
881 
882 	if (checkFilePosition(fcSep + 1))
883 	{
884 		input->seek(long(fcSep), librevenge::RVNG_SEEK_SET);
885 		uint8_t headerSize = libwps::readU8(input);
886 		if (headerSize<1 || !checkFilePosition(fcSep+1+headerSize))
887 		{
888 			WPS_DEBUG_MSG(("DosWordParser::readSECT: can not read the structure, using default\n"));
889 		}
890 		else
891 		{
892 			do
893 			{
894 				if (headerSize < 2) break;
895 				input->seek(1, librevenge::RVNG_SEEK_CUR); // skip style
896 				sep.m_bkc = libwps::readU8(input) & 7;
897 
898 				// read section
899 				if (headerSize < 4) break;
900 				sep.m_yaMac=double(libwps::readU16(input))/1440.;
901 				if (headerSize < 6) break;
902 				sep.m_xaMac=double(libwps::readU16(input))/1440.;
903 				if (headerSize < 8) break;
904 				sep.m_startPageNumber=libwps::readU16(input);
905 				if (headerSize < 10) break;
906 				sep.m_yaTop=double(libwps::readU16(input))/1440.;
907 				if (headerSize < 12) break;
908 				sep.m_dyaText=double(libwps::readU16(input))/1440.;
909 				if (headerSize < 14) break;
910 				sep.m_xaLeft=double(libwps::readU16(input))/1440.;
911 				if (headerSize < 16) break;
912 				sep.m_dxaText=double(libwps::readU16(input))/1440.;
913 
914 				if (headerSize < 17) break;
915 				sep.m_endFtns = (libwps::readU8(input) & 0x80) != 0;
916 				if (headerSize < 18) break;
917 				sep.m_columns = libwps::readU8(input);
918 
919 				if (headerSize < 20) break;
920 				sep.m_yaHeader=double(libwps::readU16(input))/1440.;
921 				if (headerSize < 22) break;
922 				sep.m_yaFooter=double(libwps::readU16(input))/1440.;
923 
924 				if (headerSize < 24) break;
925 				sep.m_dxaColumns=double(libwps::readU16(input))/1440.;
926 				if (headerSize < 26) break;
927 				sep.m_dxaGutter=double(libwps::readU16(input))/1440.;
928 			}
929 			while (0);
930 		}
931 	}
932 
933 	sep.m_fcLim = fcLim;
934 	m_sections.push_back(sep);
935 }
936 /* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */
937