1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
2 /* libwps
3 * Version: MPL 2.0 / LGPLv2.1+
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * Major Contributor(s):
10 * Copyright (C) 2015 Sean Young <sean@mess.org>
11 *
12 * For minor contributions see the git repository.
13 *
14 * Alternatively, the contents of this file may be used under the terms
15 * of the GNU Lesser General Public License Version 2.1 or later
16 * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
17 * applicable instead of those above.
18 *
19 * For further information visit http://libwps.sourceforge.net
20 */
21
22 #include <librevenge-stream/librevenge-stream.h>
23 #include "libwps_internal.h"
24
25 #include "WPSContentListener.h"
26 #include "WPSParagraph.h"
27 #include "WPSFont.h"
28 #include "WPSHeader.h"
29
30 #include "MSWrite.h"
31 #include "DosWord.h"
32
33 #include <algorithm>
34 #include <cstring>
35 #include <cstdio>
36
37 namespace DosWordParserInternal
38 {
39
40 struct CHP
41 {
CHPDosWordParserInternal::CHP42 CHP()
43 : m_fStyled(0)
44 , m_fBold(0)
45 , m_hps(0)
46 , m_fUline(0)
47 , m_unused(0)
48 , m_hpsPos(0)
49 , m_clr(0)
50 {
51 }
52 uint8_t m_fStyled;
53 uint8_t m_fBold;
54 uint8_t m_hps;
55 uint8_t m_fUline;
56 uint8_t m_unused;
57 uint8_t m_hpsPos;
58 uint8_t m_clr;
59 };
60
61 struct PAP
62 {
63 //! constructor
PAPDosWordParserInternal::PAP64 PAP()
65 : m_style(0)
66 , m_justification(0)
67 , m_dxaRight(0)
68 , m_dxaLeft(0)
69 , m_dxaLeft1(0)
70 , m_dyaLine(0)
71 , m_dyaBefore(0)
72 , m_dyaAfter(0)
73 , m_rhc(0)
74 , m_border(0)
75 , m_shade(0)
76 , m_pos(0)
77 , m_dxaFromText(0)
78 {
79 m_reserved2[0]=m_reserved2[1]=0;
80 }
81 uint8_t m_style;
82 uint8_t m_justification;
83 uint8_t m_reserved2[2];
84 uint16_t m_dxaRight, m_dxaLeft, m_dxaLeft1, m_dyaLine;
85 uint16_t m_dyaBefore, m_dyaAfter;
86 uint8_t m_rhc;
87 uint8_t m_border;
88 uint8_t m_shade;
89 uint8_t m_pos;
90 uint16_t m_dxaFromText;
91 struct TBD
92 {
TBDDosWordParserInternal::PAP::TBD93 TBD()
94 : m_dxa(0)
95 , m_jcTab(0)
96 , m_chAlign(0)
97 {
98 }
99 uint16_t m_dxa;
100 uint8_t m_jcTab;
101 uint8_t m_chAlign;
102 } m_TBD[20];
103 };
104
105 // the file header offsets
106 enum HeaderOffset
107 {
108 HEADER_W_WIDENT = 0,
109 HEADER_W_DTY = 2,
110 HEADER_W_WTOOL = 4,
111 HEADER_D_FCMAC = 14,
112 HEADER_W_PNPARA = 18,
113 HEADER_W_PNFNTB = 20,
114 HEADER_W_PNBKMK = 22,
115 HEADER_W_PNSETB = 24,
116 HEADER_W_PNBFTB = 26,
117 HEADER_W_PNSUMD = 28,
118 HEADER_W_PNMAC = 106,
119 HEADER_B_VERSION = 116,
120 HEADER_B_ASV = 117,
121 HEADER_W_CODEPAGE = 126
122 };
123
124 }
125
DosWordParser(RVNGInputStreamPtr & input,WPSHeaderPtr & header,libwps_tools_win::Font::Type encoding)126 DosWordParser::DosWordParser(RVNGInputStreamPtr &input, WPSHeaderPtr &header,
127 libwps_tools_win::Font::Type encoding)
128 : MSWriteParser(input, header, encoding)
129 {
130 m_fontType = getFileEncoding(encoding);
131 }
132
~DosWordParser()133 DosWordParser::~DosWordParser()
134 {
135 }
136
readFFNTB()137 void DosWordParser::readFFNTB()
138 {
139 // Microsoft Word for DOS does not have this section
140 WPS_DEBUG_MSG(("DosWordParser::readFFNTB: unexpected\n"));
141 }
142
143 // basic function to check if the header is ok
checkHeader(WPSHeader * header,bool)144 bool DosWordParser::checkHeader(WPSHeader *header, bool /*strict*/)
145 {
146 RVNGInputStreamPtr input = getInput();
147 if (!input || !checkFilePosition(0x100))
148 {
149 WPS_DEBUG_MSG(("DosWordParser::checkHeader: file is too short\n"));
150 return false;
151 }
152
153 input->seek(DosWordParserInternal::HEADER_B_ASV, librevenge::RVNG_SEEK_SET);
154 if (libwps::readU8(input) & 2)
155 {
156 WPS_DEBUG_MSG(("DosWordParser::checkHeader: file is autosaved\n"));
157 return false;
158 }
159
160 input->seek(DosWordParserInternal::HEADER_B_VERSION, librevenge::RVNG_SEEK_SET);
161 uint8_t ver = libwps::readU8(input);
162
163 switch (ver)
164 {
165 case 0:
166 WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 4.0 or earlier\n"));
167 header->setMajorVersion(4);
168 break;
169 case 3:
170 WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 5 OEM\n"));
171 header->setMajorVersion(5);
172 break;
173 case 4:
174 WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 5\n"));
175 header->setMajorVersion(5);
176 break;
177 case 7:
178 WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 5.5\n"));
179 header->setMajorVersion(5);
180 break;
181 case 9:
182 WPS_DEBUG_MSG(("DosWordParser::checkHeader: version 6.0\n"));
183 header->setMajorVersion(6);
184 break;
185 default:
186 WPS_DEBUG_MSG(("DosWordParser::checkHeader: unknown version %u\n", ver));
187 break;
188 }
189
190 input->seek(DosWordParserInternal::HEADER_W_CODEPAGE, librevenge::RVNG_SEEK_SET);
191 uint16_t codepage = libwps::readU16(input);
192
193 if (!codepage)
194 header->setNeedEncoding(true);
195
196 return true;
197 }
198
getFileEncoding(libwps_tools_win::Font::Type encoding)199 libwps_tools_win::Font::Type DosWordParser::getFileEncoding(libwps_tools_win::Font::Type encoding)
200 {
201 RVNGInputStreamPtr input = getInput();
202
203 input->seek(DosWordParserInternal::HEADER_W_CODEPAGE, librevenge::RVNG_SEEK_SET);
204 uint16_t codepage = libwps::readU16(input);
205
206 WPS_DEBUG_MSG(("DosWordParser::getFileEncoding: codepage %u\n", codepage));
207 if (codepage)
208 encoding = libwps_tools_win::Font::getTypeForOEM(codepage);
209
210 if (encoding == libwps_tools_win::Font::UNKNOWN)
211 encoding = libwps_tools_win::Font::CP_437;
212
213 return encoding;
214 }
215
color(int clr)216 WPSColor DosWordParser::color(int clr)
217 {
218 switch (clr)
219 {
220 default:
221 case 0: // black (default)
222 return WPSColor(0, 0, 0);
223 case 1: // red
224 return WPSColor(255, 0, 0);
225 case 2: // green
226 return WPSColor(0, 255, 0);
227 case 3: // blue
228 return WPSColor(0, 0, 255);
229 case 4: // violet
230 return WPSColor(127, 0, 255);
231 case 5: // magenta
232 return WPSColor(255, 0, 255);
233 case 6: // yellow
234 return WPSColor(0, 255, 255);
235 case 7: // white
236 return WPSColor(255, 255, 255);
237 }
238 }
239
readCHP(uint32_t fcFirst,uint32_t fcLim,unsigned cch)240 void DosWordParser::readCHP(uint32_t fcFirst, uint32_t fcLim, unsigned cch)
241 {
242 RVNGInputStreamPtr input = getInput();
243
244 DosWordParserInternal::CHP chp;
245
246 chp.m_hps = 24;
247
248 if (cch)
249 {
250 if (cch > sizeof(chp))
251 cch = sizeof(chp);
252
253 unsigned long read_bytes;
254 const unsigned char *p = input->read(cch, read_bytes);
255 if (read_bytes != cch)
256 {
257 WPS_DEBUG_MSG(("DosWordParser::readCHP failed to read CHP entry\n"));
258 throw (libwps::ParseException());
259 }
260
261 memcpy(&chp, p, cch);
262 }
263
264 MSWriteParserInternal::Font font;
265
266 if (chp.m_fStyled & 1)
267 {
268 switch (chp.m_fStyled / 2)
269 {
270 case 13: // footnote reference
271 font.m_footnote = true;
272 break;
273 case 26: // annotation reference
274 font.m_annotation = true;
275 break;
276 default:
277 WPS_DEBUG_MSG(("Style sheet stc=%u %x-%x\n", chp.m_fStyled / 2, fcFirst, fcLim));
278 break;
279 }
280 }
281
282 unsigned ftc = (chp.m_fBold / 4);
283
284 // Note the font depends on the printer driver
285 if (ftc <= 15)
286 font.m_name.sprintf("modern %c", 'a' + ftc);
287 else if (ftc <= 31)
288 font.m_name.sprintf("roman %c", 'a' + (ftc - 16));
289 else if (ftc <= 39)
290 font.m_name.sprintf("script %c", 'a' + (ftc - 32));
291 else if (ftc <= 47)
292 font.m_name.sprintf("foreign %c", 'a' + (ftc - 40));
293 else if (ftc <= 55)
294 font.m_name.sprintf("decor %c", 'a' + (ftc - 48));
295 else
296 font.m_name.sprintf("symbol %c", 'a' + (ftc - 56));
297
298 font.m_size = chp.m_hps / 2.0;
299 if (chp.m_fBold & 1)
300 font.m_attributes |= WPS_BOLD_BIT;
301 if (chp.m_fBold & 2)
302 font.m_attributes |= WPS_ITALICS_BIT;
303 if (chp.m_fUline & 1)
304 font.m_attributes |= WPS_UNDERLINE_BIT;
305 if (chp.m_fUline & 2)
306 font.m_attributes |= WPS_STRIKEOUT_BIT;
307 if (chp.m_fUline & 4)
308 font.m_attributes |= WPS_DOUBLE_UNDERLINE_BIT;
309 // FIXME: if (chp.m_fUline & 8) marks a text new (not accepted)
310 if ((chp.m_fUline & 0x30) == 0x10)
311 font.m_attributes |= WPS_ALL_CAPS_BIT;
312 else if ((chp.m_fUline & 0x30) == 0x30)
313 font.m_attributes |= WPS_SMALL_CAPS_BIT;
314 if (chp.m_fUline & 0x40)
315 font.m_special = true;
316 if (chp.m_fUline & 0x80)
317 font.m_attributes |= WPS_HIDDEN_BIT;
318 if (chp.m_hpsPos)
319 {
320 if (chp.m_hpsPos & 0x80)
321 font.m_attributes |= WPS_SUBSCRIPT_BIT;
322 else
323 font.m_attributes |= WPS_SUPERSCRIPT_BIT;
324 }
325
326 font.m_fcFirst = fcFirst;
327 font.m_fcLim = fcLim;
328 font.m_encoding = libwps_tools_win::Font::getFontType(font.m_name);
329 if (font.m_encoding == libwps_tools_win::Font::UNKNOWN)
330 font.m_encoding = m_fontType;
331
332 font.m_color = color(chp.m_clr & 7);
333
334 m_fontList.push_back(font);
335 }
336
readPAP(uint32_t fcFirst,uint32_t fcLim,unsigned cch)337 void DosWordParser::readPAP(uint32_t fcFirst, uint32_t fcLim, unsigned cch)
338 {
339 RVNGInputStreamPtr input = getInput();
340
341 DosWordParserInternal::PAP pap;
342
343 WPS_LE_PUT_GUINT16(&pap.m_dyaLine, 240);
344
345 if (cch)
346 {
347 if (cch > sizeof(pap))
348 cch = sizeof(pap);
349
350 unsigned long read_bytes;
351 const unsigned char *p = input->read(cch, read_bytes);
352 if (read_bytes != cch)
353 {
354 WPS_DEBUG_MSG(("DosWordParser::readPAP failed to read PAP\n"));
355 throw (libwps::ParseException());
356 }
357
358 memcpy(&pap, p, cch);
359 }
360
361 auto dxaLeft = int16_t(WPS_LE_GET_GUINT16(&pap.m_dxaLeft));
362 auto dxaLeft1 = int16_t(WPS_LE_GET_GUINT16(&pap.m_dxaLeft1));
363 auto dxaRight = int16_t(WPS_LE_GET_GUINT16(&pap.m_dxaRight));
364
365 MSWriteParserInternal::Paragraph para;
366 int i;
367
368 for (i=0; i<20; i++)
369 {
370 auto pos = WPS_LE_GET_GUINT16(&pap.m_TBD[i].m_dxa);
371
372 if (!pos)
373 break;
374
375 WPSTabStop::Alignment align;
376
377 switch (pap.m_TBD[i].m_jcTab & 3)
378 {
379 default:
380 case 0:
381 align = WPSTabStop::LEFT;
382 break;
383 case 1:
384 align = WPSTabStop::CENTER;
385 break;
386 case 2:
387 align = WPSTabStop::RIGHT;
388 break;
389 case 3:
390 align = WPSTabStop::DECIMAL;
391 break;
392 }
393
394 unsigned leader = (pap.m_TBD[i].m_jcTab >> 3) & 3;
395 WPSTabStop tab(pos / 1440., align, uint16_t("\0.-_"[leader]));
396
397 para.m_tabs.push_back(tab);
398
399 if (dxaLeft + dxaLeft1 == pos)
400 para.m_skiptab = true;
401 }
402
403 switch (pap.m_justification & 3)
404 {
405 default:
406 case 0:
407 para.m_justify = libwps::JustificationLeft;
408 break;
409 case 1:
410 para.m_justify = libwps::JustificationCenter;
411 break;
412 case 2:
413 para.m_justify = libwps::JustificationRight;
414 break;
415 case 3:
416 para.m_justify = libwps::JustificationFull;
417 break;
418 }
419
420 para.m_margins[0] = dxaLeft1 / 1440.0;
421 para.m_margins[1] = dxaLeft / 1440.0;
422 para.m_margins[2] = dxaRight / 1440.0;
423
424 // spacings
425 auto dyaLine = int16_t(WPS_LE_GET_GUINT16(&pap.m_dyaLine));
426 auto dyaBefore = WPS_LE_GET_GUINT16(&pap.m_dyaBefore);
427 auto dyaAfter = WPS_LE_GET_GUINT16(&pap.m_dyaAfter);
428 // dyaLine = -40 means "auto"
429 if (dyaLine > 0)
430 para.m_spacings[0] = dyaLine / 240.0;
431 para.m_spacings[1] = dyaBefore / 240.0;
432 para.m_spacings[2] = dyaAfter / 240.0;
433
434 para.m_fcFirst = fcFirst;
435 para.m_fcLim = fcLim;
436
437 if (pap.m_rhc & 0xe)
438 {
439 if (pap.m_rhc & 1)
440 para.m_Location = MSWriteParserInternal::Paragraph::FOOTER;
441 else
442 para.m_Location = MSWriteParserInternal::Paragraph::HEADER;
443
444 switch ((pap.m_rhc >> 1) & 3)
445 {
446 default:
447 case 3: // all
448 para.m_HeaderFooterOccurrence = WPSPageSpan::ALL;
449 break;
450 case 2: // even
451 para.m_HeaderFooterOccurrence = WPSPageSpan::EVEN;
452 break;
453 case 1: // odd
454 para.m_HeaderFooterOccurrence = WPSPageSpan::ODD;
455 break;
456 case 0: // never; however might be on first page
457 para.m_HeaderFooterOccurrence = WPSPageSpan::NEVER;
458 break;
459 }
460
461 para.m_firstpage = (pap.m_rhc & 0x08) != 0;
462 }
463
464 if (pap.m_justification & 0x20)
465 para.m_headerUseMargin = true;
466
467 if (pap.m_style & 1)
468 {
469 switch (pap.m_style / 2)
470 {
471 case 39: // footnote
472 case 87: // annotation
473 para.m_Location = MSWriteParserInternal::Paragraph::FOOTNOTE;
474 break;
475 default:
476 WPS_DEBUG_MSG(("DosWordParser::readPAP pap unknown style stc=%u %x-%x\n", pap.m_style / 2, fcFirst, fcLim));
477 break;
478 }
479 }
480
481 // Borders
482 if (pap.m_rhc & 0x30)
483 {
484 if ((pap.m_rhc & 0x30) == 0x10)
485 para.m_border = 15;
486 else
487 para.m_border = pap.m_border & 15;
488
489 WPSBorder::Type type = WPSBorder::Single;
490 int width = 1;
491
492 switch (pap.m_rhc & 0xc0)
493 {
494 default:
495 case 0: // normal
496 break;
497 case 0x40: // bold
498 width = 2;
499 break;
500 case 0x80: // Double
501 type = WPSBorder::Double;
502 break;
503 case 0xc0: // thick
504 width = 8;
505 break;
506 }
507
508 para.m_borderStyle.m_type = type;
509 para.m_borderStyle.m_width = width;
510 para.m_borderStyle.m_color = color((pap.m_border / 16) & 7);
511 }
512
513 if (pap.m_justification & 4)
514 para.m_breakStatus |= libwps::NoBreakBit;
515 if (pap.m_justification & 8)
516 para.m_breakStatus |= libwps::NoBreakWithNextBit;
517
518 // paragraph shading
519 if (pap.m_shade & 0x7f)
520 {
521 WPSColor c = color((pap.m_pos >> 4) & 7);
522
523 unsigned percent = std::min(pap.m_shade & 0x7fu, 100u);
524
525 // Use percent to increase brightness
526 // 100% means color stays the same
527 // 0% means white
528
529 unsigned add = (255 * (100 - percent)) / 100;
530
531 para.m_backgroundColor = WPSColor(
532 static_cast<unsigned char>(std::min(c.getRed() + add, 255u)),
533 static_cast<unsigned char>(std::min(c.getGreen() + add, 255u)),
534 static_cast<unsigned char>(std::min(c.getBlue() + add, 255u)));
535 }
536
537 // FIXME: side-by-side
538 // FIXME: paragraph position
539
540 m_paragraphList.push_back(para);
541 }
542
insertSpecial(uint8_t val,uint32_t fc,MSWriteParserInternal::Paragraph::Location location)543 void DosWordParser::insertSpecial(uint8_t val, uint32_t fc, MSWriteParserInternal::Paragraph::Location location)
544 {
545 librevenge::RVNGString empty;
546
547 switch (val)
548 {
549 case 1: // page name
550 m_listener->insertField(WPSField(WPSField::PageNumber));
551 break;
552 case 2: // current date
553 m_listener->insertField(WPSField(WPSField::Date));
554 break;
555 case 3: // current time
556 m_listener->insertField(WPSField(WPSField::Time));
557 break;
558 case 4: // annotation reference
559 if (location == MSWriteParserInternal::Paragraph::MAIN)
560 insertNote(true, fc, empty);
561 break;
562 case 5: // footnote reference
563 if (location == MSWriteParserInternal::Paragraph::MAIN)
564 insertNote(false, fc, empty);
565 break;
566 case 7: // sequence mark
567 WPS_DEBUG_MSG(("Sequence mark\n"));
568 break;
569 case 8: // sequence reference mark
570 WPS_DEBUG_MSG(("Sequence reference mark\n"));
571 break;
572 case 9: // next page
573 m_listener->insertField(WPSField(WPSField::PageNumberNext));
574 break;
575 default:
576 WPS_DEBUG_MSG(("DosWordParser::insertSpecial: unknown special %u encountered\n", val));
577 break;
578 }
579 }
580
insertControl(uint8_t val,uint32_t fc)581 void DosWordParser::insertControl(uint8_t val, uint32_t fc)
582 {
583 // 0xc4 = normal hyphen, 0xff = nbsp, already handled by cp437
584 switch (val)
585 {
586 case 9:
587 m_listener->insertTab();
588 break;
589 case 10:
590 case 11:
591 m_listener->insertEOL();
592 break;
593 case 12:
594 {
595 // each section is ended with 0x0c but no page break might
596 // be required
597 for (auto const &s : m_sections)
598 {
599 if (fc + 1 == s.m_fcLim)
600 {
601 switch (s.m_bkc)
602 {
603 case 0:
604 break;
605 case 1:
606 m_listener->insertBreak(WPS_COLUMN_BREAK);
607 break;
608 default:
609 m_listener->insertBreak(WPS_PAGE_BREAK);
610 break;
611 }
612 return;
613 }
614 }
615
616 m_listener->insertBreak(WPS_PAGE_BREAK);
617 break;
618 }
619 case 13: // carriage return
620 break;
621 case 14: // column break
622 m_listener->insertBreak(WPS_COLUMN_BREAK);
623 break;
624 case 15: // em-hyphen
625 m_listener->insertUnicode(0x8212);
626 break;
627 case 31: // soft hyphen
628 m_listener->insertUnicode(0xad);
629 break;
630 default:
631 WPS_DEBUG_MSG(("DosWordParser::insertControl: unexpected control %u\n", val));
632 break;
633 }
634 }
635
readSUMD()636 void DosWordParser::readSUMD()
637 {
638 RVNGInputStreamPtr input = getInput();
639
640 input->seek(DosWordParserInternal::HEADER_W_PNSUMD, librevenge::RVNG_SEEK_SET);
641 uint16_t pnSumd = libwps::readU16(input);
642
643 input->seek(DosWordParserInternal::HEADER_W_PNMAC, librevenge::RVNG_SEEK_SET);
644 uint16_t pnMac = libwps::readU16(input);
645
646 if (!pnSumd || pnSumd == pnMac)
647 {
648 // No summary page
649 return;
650 }
651
652 /*
653 * The page starts with 9 uint16_t values which are offsets. Sometimes
654 * the page contains garbage; when it does not, the first offset is 0.
655 */
656 uint32_t fc = pnSumd * 0x80;
657 int i;
658
659 if (!checkFilePosition(fc + 20))
660 {
661 WPS_DEBUG_MSG(("DosWordParser::readSUMD: summary missing\n"));
662 return;
663 }
664
665 input->seek(long(fc), librevenge::RVNG_SEEK_SET);
666 if (libwps::readU16(input) >= 0x80)
667 {
668 WPS_DEBUG_MSG(("DosWordParser::readSUMD: garbage\n"));
669 return;
670 }
671
672 // Step over the offsets; it's packed together anyway.
673 fc += 0x14;
674
675 static const char *sum_types[] =
676 {
677 "dc:title", // title
678 "dc:creator", // author
679 "dc:publisher", // operator
680 "meta:keyword", // keywords
681 "dc:description", // comments
682 "librevenge:version-number", // version
683 nullptr
684 };
685
686 input->seek(long(fc), librevenge::RVNG_SEEK_SET);
687
688 for (i=0; sum_types[i]; i++)
689 {
690 std::string str;
691
692 for (;;)
693 {
694 if (!checkFilePosition(++fc))
695 {
696 WPS_DEBUG_MSG(("DosWordParser::readSUMD: summary missing\n"));
697 return;
698 }
699
700 auto ch = char(libwps::readU8(input));
701 if (!ch)
702 break;
703 str.push_back(ch);
704 }
705
706 if (str.size())
707 {
708 librevenge::RVNGString conv = libwps_tools_win::Font::unicodeString(str, m_fontType);
709 WPS_DEBUG_MSG(("DosWordParser::readSUMD: %d %s\n", i, conv.cstr()));
710 m_metaData.insert(sum_types[i], conv);
711 }
712 }
713
714 librevenge::RVNGString creationDate, revisionDate;
715 int month, day, year;
716
717 for (i=0; i<8; i++)
718 {
719 if (!checkFilePosition(++fc))
720 {
721 WPS_DEBUG_MSG(("DosWordParser::readSUMD: summary missing\n"));
722 return;
723 }
724
725 auto ch = char(libwps::readU8(input));
726 if (!ch)
727 break;
728 creationDate.append(ch);
729 }
730
731 // Year is given in two decimals since 1900 so fudge any value
732 // < 50 to be after 2000
733 if (3 == sscanf(creationDate.cstr(), "%2d/%2d/%4d", &month, &day, &year))
734 {
735 librevenge::RVNGString str;
736 if (year > 50)
737 year += 1900;
738 else
739 year += 2000;
740 str.sprintf("%d-%d-%d", year, month, day);
741 m_metaData.insert("meta:creation-date", str);
742 }
743
744 for (i=0; i<8; i++)
745 {
746 if (!checkFilePosition(++fc))
747 {
748 WPS_DEBUG_MSG(("DosWordParser::readSUMD: summary missing\n"));
749 return;
750 }
751
752 auto ch = char(libwps::readU8(input));
753 if (!ch)
754 break;
755 revisionDate.append(ch);
756 }
757
758 if (3 == sscanf(revisionDate.cstr(), "%2d/%2d/%4d", &month, &day, &year))
759 {
760 librevenge::RVNGString str;
761 if (year > 50)
762 year += 1900;
763 else
764 year += 2000;
765 str.sprintf("%d-%d-%d", year, month, day);
766 m_metaData.insert("dc:date", str);
767 }
768 }
769
readFNTB()770 void DosWordParser::readFNTB()
771 {
772 RVNGInputStreamPtr input = getInput();
773
774 input->seek(DosWordParserInternal::HEADER_W_PNFNTB, librevenge::RVNG_SEEK_SET);
775 uint16_t pnFntb = libwps::readU16(input);
776
777 input->seek(DosWordParserInternal::HEADER_W_PNBKMK, librevenge::RVNG_SEEK_SET);
778 uint16_t pnBkmk = libwps::readU16(input);
779
780 if (pnFntb == 0 || pnFntb == pnBkmk)
781 return;
782
783 uint32_t fc = pnFntb * 0x80;
784
785 if (!checkFilePosition(fc + 4))
786 {
787 WPS_DEBUG_MSG(("DosWordParser::readFNTB: footnote table missing\n"));
788 return;
789 }
790
791 input->seek(long(fc), librevenge::RVNG_SEEK_SET);
792
793 uint16_t noteCount = libwps::readU16(input);
794 uint16_t noteCountAndDeleted = libwps::readU16(input);
795
796 if (noteCount > noteCountAndDeleted)
797 {
798 WPS_DEBUG_MSG(("DosWordParser::readFNTB: note counts do not makesense %u %u\n", noteCount, noteCountAndDeleted));
799 }
800
801 for (unsigned note=0; note<noteCount; note++)
802 {
803 if (!checkFilePosition(fc + 8))
804 {
805 WPS_DEBUG_MSG(("DosWordParser::readFNTB: footnote %u missing\n", note));
806 return;
807 }
808 fc += 8;
809
810 MSWriteParserInternal::Footnote footnote;
811
812 footnote.m_fcRef = libwps::readU32(input) + 0x80;
813 footnote.m_fcFtn = libwps::readU32(input) + 0x80;
814
815 m_footnotes.push_back(footnote);
816 }
817 }
818
readSED()819 void DosWordParser::readSED()
820 {
821 unsigned pnSetb, pnBftb;
822 RVNGInputStreamPtr input = getInput();
823
824 input->seek(DosWordParserInternal::HEADER_W_PNSETB, librevenge::RVNG_SEEK_SET);
825 pnSetb = libwps::readU16(input);
826
827 input->seek(DosWordParserInternal::HEADER_W_PNBFTB, librevenge::RVNG_SEEK_SET);
828 pnBftb = libwps::readU16(input);
829
830 if (pnSetb && pnSetb != pnBftb)
831 {
832 if (!checkFilePosition(pnSetb * 0x80 + 4))
833 {
834 WPS_DEBUG_MSG(("Section is truncated\n"));
835 throw (libwps::ParseException());
836 }
837
838 input->seek(long(pnSetb) * 0x80, librevenge::RVNG_SEEK_SET);
839 uint16_t cset = libwps::readU16(input);
840
841 // ignore csetMax
842
843 for (unsigned sed = 0; sed<cset; sed++)
844 {
845 if (!checkFilePosition(pnSetb * 0x80 + (sed + 1) * 10 + 4))
846 {
847 WPS_DEBUG_MSG(("is truncated\n"));
848 throw (libwps::ParseException());
849 }
850
851 input->seek(long(pnSetb * 0x80 + sed * 10 + 4), librevenge::RVNG_SEEK_SET);
852
853 uint32_t fcLim = libwps::readU32(input) + 0x80;
854
855 // unknown
856 input->seek(2, librevenge::RVNG_SEEK_CUR);
857
858 uint32_t fcSep = libwps::readU32(input);
859
860 if (fcSep == 0xffffffff)
861 break;
862
863 readSECT(fcSep, fcLim);
864
865 if (fcLim >= m_fcMac)
866 break;
867 }
868 }
869
870 if (m_sections.empty() || m_sections.back().m_fcLim < m_fcMac)
871 {
872 // create default section by reading invalid fc
873 readSECT(m_fileLength, m_fcMac);
874 }
875 }
876
readSECT(uint32_t fcSep,uint32_t fcLim)877 void DosWordParser::readSECT(uint32_t fcSep, uint32_t fcLim)
878 {
879 RVNGInputStreamPtr input = getInput();
880 MSWriteParserInternal::Section sep;
881
882 if (checkFilePosition(fcSep + 1))
883 {
884 input->seek(long(fcSep), librevenge::RVNG_SEEK_SET);
885 uint8_t headerSize = libwps::readU8(input);
886 if (headerSize<1 || !checkFilePosition(fcSep+1+headerSize))
887 {
888 WPS_DEBUG_MSG(("DosWordParser::readSECT: can not read the structure, using default\n"));
889 }
890 else
891 {
892 do
893 {
894 if (headerSize < 2) break;
895 input->seek(1, librevenge::RVNG_SEEK_CUR); // skip style
896 sep.m_bkc = libwps::readU8(input) & 7;
897
898 // read section
899 if (headerSize < 4) break;
900 sep.m_yaMac=double(libwps::readU16(input))/1440.;
901 if (headerSize < 6) break;
902 sep.m_xaMac=double(libwps::readU16(input))/1440.;
903 if (headerSize < 8) break;
904 sep.m_startPageNumber=libwps::readU16(input);
905 if (headerSize < 10) break;
906 sep.m_yaTop=double(libwps::readU16(input))/1440.;
907 if (headerSize < 12) break;
908 sep.m_dyaText=double(libwps::readU16(input))/1440.;
909 if (headerSize < 14) break;
910 sep.m_xaLeft=double(libwps::readU16(input))/1440.;
911 if (headerSize < 16) break;
912 sep.m_dxaText=double(libwps::readU16(input))/1440.;
913
914 if (headerSize < 17) break;
915 sep.m_endFtns = (libwps::readU8(input) & 0x80) != 0;
916 if (headerSize < 18) break;
917 sep.m_columns = libwps::readU8(input);
918
919 if (headerSize < 20) break;
920 sep.m_yaHeader=double(libwps::readU16(input))/1440.;
921 if (headerSize < 22) break;
922 sep.m_yaFooter=double(libwps::readU16(input))/1440.;
923
924 if (headerSize < 24) break;
925 sep.m_dxaColumns=double(libwps::readU16(input))/1440.;
926 if (headerSize < 26) break;
927 sep.m_dxaGutter=double(libwps::readU16(input))/1440.;
928 }
929 while (0);
930 }
931 }
932
933 sep.m_fcLim = fcLim;
934 m_sections.push_back(sep);
935 }
936 /* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */
937