1 /* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */
2
3 /* libmwaw
4 * Version: MPL 2.0 / LGPLv2+
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 2.0 (the "License"); you may not use this file except in compliance with
8 * the License or as specified alternatively below. You may obtain a copy of
9 * the License at http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Major Contributor(s):
17 * Copyright (C) 2002 William Lachance (wrlach@gmail.com)
18 * Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
19 * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
20 * Copyright (C) 2006, 2007 Andrew Ziem
21 * Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
22 *
23 *
24 * All Rights Reserved.
25 *
26 * For minor contributions see the git repository.
27 *
28 * Alternatively, the contents of this file may be used under the terms of
29 * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
30 * in which case the provisions of the LGPLv2+ are applicable
31 * instead of those above.
32 */
33
34 #include <iomanip>
35 #include <iostream>
36 #include <limits>
37 #include <sstream>
38
39 #include <librevenge/librevenge.h>
40
41 #include "MWAWTextListener.hxx"
42 #include "MWAWFont.hxx"
43 #include "MWAWFontConverter.hxx"
44 #include "MWAWHeader.hxx"
45 #include "MWAWParagraph.hxx"
46 #include "MWAWPictMac.hxx"
47 #include "MWAWPosition.hxx"
48 #include "MWAWPrinter.hxx"
49 #include "MWAWSection.hxx"
50 #include "MWAWSubDocument.hxx"
51
52 #include "MsWrd1Parser.hxx"
53
54 /** Internal: the structures of a MsWrd1Parser */
55 namespace MsWrd1ParserInternal
56 {
57 /** different types
58 *
59 * - FONT: font
60 * - RULER: ruler
61 * - PAGE: page break
62 * - FOOTNOTE: footnote marker
63 * - ZONE: unknown(zone4)
64 */
65 enum PLCType { FONT=0, RULER, FOOTNOTE, PAGE, ZONE, UNKNOWN};
66
67 /** Internal: class to store the PLC: Pointer List Content ? */
68 struct PLC {
69 //! constructor
PLCMsWrd1ParserInternal::PLC70 explicit PLC(PLCType type=UNKNOWN)
71 : m_type(type)
72 , m_id(-1)
73 , m_extras("")
74 {
75 }
76 //! operator<<
77 friend std::ostream &operator<<(std::ostream &o, PLC const &plc);
78 //! the type
79 PLCType m_type;
80 //! the id
81 int m_id;
82 //! a string used to store the parsing extrass
83 std::string m_extras;
84 };
85
operator <<(std::ostream & o,PLC const & plc)86 std::ostream &operator<<(std::ostream &o, PLC const &plc)
87 {
88 switch (plc.m_type) {
89 case FONT:
90 o << "F";
91 break;
92 case RULER:
93 o << "P";
94 break;
95 case FOOTNOTE:
96 o << "Fn";
97 break;
98 case PAGE:
99 o << "Page";
100 break;
101 case ZONE:
102 o << "Z";
103 break;
104 case UNKNOWN:
105 #if !defined(__clang__)
106 default:
107 #endif
108 o << "#type" << int(plc.m_type);
109 break;
110 }
111 if (plc.m_id != -1) o << plc.m_id;
112 else o << "_";
113 if (!plc.m_extras.empty()) o << ":" << plc.m_extras;
114 return o;
115 }
116
117 ////////////////////////////////////////
118 //! Internal: the font of a MsWrd1Parser
119 struct Font {
120 //! constructor
FontMsWrd1ParserInternal::Font121 Font()
122 : m_font()
123 , m_type(0)
124 , m_extras("")
125 {
126 }
127 //! operator<<
128 friend std::ostream &operator<<(std::ostream &o, Font const &ft);
129 //! the basic font property
130 MWAWFont m_font;
131 //! a unknown int, maybe 0x80 means defined font
132 int m_type;
133 //! a string used to store the parsing extrass
134 std::string m_extras;
135 };
136
operator <<(std::ostream & o,Font const & ft)137 std::ostream &operator<<(std::ostream &o, Font const &ft)
138 {
139 if (ft.m_type) o << "type=" << std::hex << ft.m_type << std::dec << ",";
140 if (!ft.m_extras.empty()) o << ft.m_extras;
141 return o;
142 }
143
144 ////////////////////////////////////////
145 //! Internal: the paragraph of a MsWrd1Parser
146 struct Paragraph final : public MWAWParagraph {
147 //! constructor
ParagraphMsWrd1ParserInternal::Paragraph148 Paragraph()
149 : MWAWParagraph()
150 , m_type(0)
151 , m_type2(0)
152 {
153 }
154 Paragraph(Paragraph const &)=default;
155 Paragraph &operator=(Paragraph const &)=default;
156 Paragraph &operator=(Paragraph &&)=default;
157 //! destructor
158 ~Paragraph() final;
159 //! operator<<
160 friend std::ostream &operator<<(std::ostream &o, Paragraph const &ft);
161 //! the initial type
162 int m_type;
163 //! another type
164 int m_type2;
165 };
166
~Paragraph()167 Paragraph::~Paragraph()
168 {
169 }
170
operator <<(std::ostream & o,Paragraph const & para)171 std::ostream &operator<<(std::ostream &o, Paragraph const ¶)
172 {
173 o << static_cast<MWAWParagraph const &>(para);
174 // 0|80 frequent: means redefine paragraph? find also a7 in a footnote
175 if (para.m_type) o << "type=" << std::hex << para.m_type << std::dec << ",";
176
177 if (para.m_type2 & 0xF0) {
178 bool foot = (para.m_type2 & 0x10);
179 if (foot) o << "footer/footnote[";
180 else o << "header[";
181 if (para.m_type2 & 0x20) o << (foot ? "even," : "odd,");
182 if (para.m_type2 & 0x40) o << (foot ? "odd," : "even,");
183 if (para.m_type2 & 0x80) o << "first,";
184 o << "]";
185 }
186 if (para.m_type2 & 0xF)
187 o << "#type2=" << std::hex << (para.m_type2 & 0xF) << std::dec << ",";
188
189 return o;
190 }
191
192 ////////////////////////////////////////
193 //! Internal: the state of a MsWrd1Parser
194 struct State {
195 //! constructor
StateMsWrd1ParserInternal::State196 State()
197 : m_eot(-1)
198 , m_numColumns(1)
199 , m_columnsSep(0)
200 , m_textZonesList()
201 , m_mainTextZonesList()
202 , m_fontsList()
203 , m_paragraphsList()
204 , m_endNote(false)
205 , m_footnotesList()
206 , m_plcMap()
207 , m_actPage(0)
208 , m_numPages(1)
209 , m_headersId()
210 , m_footersId()
211 {
212 for (auto &limit : m_fileZonesLimit) limit = -1;
213 }
214
215 //! end of text
216 long m_eot;
217 //! the number of columns
218 int m_numColumns;
219 //! the column separator
220 float m_columnsSep;
221 //! the zones limits
222 int m_fileZonesLimit[7];
223 //! the list of text zones
224 std::vector<MWAWVec2l> m_textZonesList;
225 //! the list of main text zones
226 std::vector<int> m_mainTextZonesList;
227 //! the list of fonts
228 std::vector<Font> m_fontsList;
229 //! the list of paragraph
230 std::vector<Paragraph> m_paragraphsList;
231 //! a flag to know if we send endnote or footnote
232 bool m_endNote;
233 //! the footnote positions ( list of beginPos, endPos)
234 std::vector<MWAWVec2l> m_footnotesList;
235 //! the text correspondance zone ( filepos, plc )
236 std::multimap<long, PLC> m_plcMap;
237
238 int m_actPage /** the actual page */, m_numPages /** the number of page of the final document */;
239 /** the list of header id which corresponds to each page */
240 std::vector<int> m_headersId;
241 /** the list of footer id which corresponds to each page */
242 std::vector<int> m_footersId;
243 };
244
245 ////////////////////////////////////////
246 //! Internal: the subdocument of a MsWrdParser
247 class SubDocument final : public MWAWSubDocument
248 {
249 public:
250 //! constructor for footnote, header
SubDocument(MsWrd1Parser & pars,MWAWInputStreamPtr const & input,MWAWEntry const & position)251 SubDocument(MsWrd1Parser &pars, MWAWInputStreamPtr const &input, MWAWEntry const &position)
252 : MWAWSubDocument(&pars, input, position)
253 {
254 }
255
256 //! destructor
~SubDocument()257 ~SubDocument() final {}
258
259 //! operator!=
operator !=(MWAWSubDocument const & doc) const260 bool operator!=(MWAWSubDocument const &doc) const final
261 {
262 return MWAWSubDocument::operator!=(doc);
263 }
264
265 //! the parser function
266 void parse(MWAWListenerPtr &listener, libmwaw::SubDocumentType type) final;
267
268 protected:
269 };
270
parse(MWAWListenerPtr & listener,libmwaw::SubDocumentType)271 void SubDocument::parse(MWAWListenerPtr &listener, libmwaw::SubDocumentType)
272 {
273 if (!listener.get()) {
274 MWAW_DEBUG_MSG(("MsWrd1ParserInternal::SubDocument::parse: no listener\n"));
275 return;
276 }
277 auto *parser=dynamic_cast<MsWrd1Parser *>(m_parser);
278 if (!parser) {
279 MWAW_DEBUG_MSG(("MsWrd1ParserInternal::SubDocument::parse: no parser\n"));
280 return;
281 }
282
283 if (!m_zone.valid()) {
284 listener->insertChar(' ');
285 return;
286 }
287 long pos = m_input->tell();
288 parser->sendText(m_zone);
289 m_input->seek(pos, librevenge::RVNG_SEEK_SET);
290 }
291 }
292
293
294 ////////////////////////////////////////////////////////////
295 // constructor/destructor, ...
296 ////////////////////////////////////////////////////////////
MsWrd1Parser(MWAWInputStreamPtr const & input,MWAWRSRCParserPtr const & rsrcParser,MWAWHeader * header)297 MsWrd1Parser::MsWrd1Parser(MWAWInputStreamPtr const &input, MWAWRSRCParserPtr const &rsrcParser, MWAWHeader *header)
298 : MWAWTextParser(input, rsrcParser, header)
299 , m_state()
300 {
301 init();
302 }
303
~MsWrd1Parser()304 MsWrd1Parser::~MsWrd1Parser()
305 {
306 }
307
init()308 void MsWrd1Parser::init()
309 {
310 resetTextListener();
311 setAsciiName("main-1");
312
313 m_state.reset(new MsWrd1ParserInternal::State);
314
315 // reduce the margin (in case, the page is not defined)
316 getPageSpan().setMargins(0.1);
317 }
318
319 ////////////////////////////////////////////////////////////
320 // new page
321 ////////////////////////////////////////////////////////////
newPage(int number)322 void MsWrd1Parser::newPage(int number)
323 {
324 if (number <= m_state->m_actPage || number > m_state->m_numPages)
325 return;
326
327 while (m_state->m_actPage < number) {
328 m_state->m_actPage++;
329 if (!getTextListener() || m_state->m_actPage == 1)
330 continue;
331 getTextListener()->insertBreak(MWAWTextListener::PageBreak);
332 }
333 }
334
removeLastCharIfEOL(MWAWEntry & entry)335 void MsWrd1Parser::removeLastCharIfEOL(MWAWEntry &entry)
336 {
337 if (!entry.valid()) return;
338 MWAWInputStreamPtr input = getInput();
339 long actPos = input->tell();
340 input->seek(entry.end()-1, librevenge::RVNG_SEEK_SET);
341 if (input->readLong(1)==0xd)
342 entry.setLength(entry.length()-1);
343 input->seek(actPos, librevenge::RVNG_SEEK_SET);
344 }
345 ////////////////////////////////////////////////////////////
346 // the parser
347 ////////////////////////////////////////////////////////////
parse(librevenge::RVNGTextInterface * docInterface)348 void MsWrd1Parser::parse(librevenge::RVNGTextInterface *docInterface)
349 {
350 if (!getInput().get() || !checkHeader(nullptr)) throw(libmwaw::ParseException());
351 bool ok = true;
352 try {
353 // create the asciiFile
354 ascii().setStream(getInput());
355 ascii().open(asciiName());
356 checkHeader(nullptr);
357 ok = createZones();
358 if (ok) {
359 createDocument(docInterface);
360 sendMain();
361 }
362
363 ascii().reset();
364 }
365 catch (...) {
366 MWAW_DEBUG_MSG(("MsWrd1Parser::parse: exception catched when parsing\n"));
367 ok = false;
368 }
369
370 resetTextListener();
371 if (!ok) throw(libmwaw::ParseException());
372 }
373
374 ////////////////////////////////////////////////////////////
375 // send the main zone
376 ////////////////////////////////////////////////////////////
sendMain()377 void MsWrd1Parser::sendMain()
378 {
379 for (auto id : m_state->m_mainTextZonesList) {
380 if (id < 0 || id >= int(m_state->m_textZonesList.size()))
381 continue;
382 MWAWEntry entry;
383 entry.setBegin(m_state->m_textZonesList[size_t(id)][0]);
384 entry.setEnd(m_state->m_textZonesList[size_t(id)][1]);
385 sendText(entry, true);
386 }
387 // maybe need if we have no text ; if not, nobody will see it
388 if (getTextListener())
389 getTextListener()->insertChar(' ');
390 }
391
392 ////////////////////////////////////////////////////////////
393 // create the document
394 ////////////////////////////////////////////////////////////
createDocument(librevenge::RVNGTextInterface * documentInterface)395 void MsWrd1Parser::createDocument(librevenge::RVNGTextInterface *documentInterface)
396 {
397 if (!documentInterface) return;
398 if (getTextListener()) {
399 MWAW_DEBUG_MSG(("MsWrd1Parser::createDocument: listener already exist\n"));
400 return;
401 }
402
403 // update the page
404 m_state->m_actPage = 0;
405 // create the page list
406 std::vector<MWAWPageSpan> pageList;
407 auto numHeaders=int(m_state->m_headersId.size());
408 auto numFooters=int(m_state->m_footersId.size());
409 for (int i = 0; i <= m_state->m_numPages;) {
410 int numSim[2]= {1,1};
411 MWAWPageSpan ps(getPageSpan());
412 while (i < numHeaders) {
413 int id = m_state->m_headersId[size_t(i)];
414 if (id < 0 || id >= int(m_state->m_textZonesList.size()))
415 break;
416 MWAWEntry entry;
417 entry.setBegin(m_state->m_textZonesList[size_t(id)][0]);
418 entry.setEnd(m_state->m_textZonesList[size_t(id)][1]);
419 removeLastCharIfEOL(entry);
420 if (!entry.valid()) break;
421 MWAWHeaderFooter header(MWAWHeaderFooter::HEADER, MWAWHeaderFooter::ALL);
422 header.m_subDocument.reset
423 (new MsWrd1ParserInternal::SubDocument(*this, getInput(), entry));
424 ps.setHeaderFooter(header);
425 int j = i+1;
426 while (j < numHeaders && m_state->m_headersId[size_t(j)]==id) {
427 numSim[0]++;
428 j++;
429 }
430 break;
431 }
432 while (i < int(numFooters)) {
433 int id = m_state->m_footersId[size_t(i)];
434 if (id < 0 || id >= int(m_state->m_textZonesList.size()))
435 break;
436 MWAWEntry entry;
437 entry.setBegin(m_state->m_textZonesList[size_t(id)][0]);
438 entry.setEnd(m_state->m_textZonesList[size_t(id)][1]);
439 removeLastCharIfEOL(entry);
440 if (!entry.valid()) break;
441 MWAWHeaderFooter footer(MWAWHeaderFooter::FOOTER, MWAWHeaderFooter::ALL);
442 footer.m_subDocument.reset
443 (new MsWrd1ParserInternal::SubDocument(*this, getInput(), entry));
444 ps.setHeaderFooter(footer);
445 int j = i+1;
446 while (j < numFooters && m_state->m_footersId[size_t(j)]==id) {
447 numSim[1]++;
448 j++;
449 }
450 break;
451 }
452 if (numSim[1] < numSim[0]) numSim[0]=numSim[1];
453 if (numSim[0] < 1) numSim[0]=1;
454 ps.setPageSpan(numSim[0]);
455 i+=numSim[0];
456 pageList.push_back(ps);
457 }
458
459 //
460 MWAWTextListenerPtr listen(new MWAWTextListener(*getParserState(), pageList, documentInterface));
461 setTextListener(listen);
462 listen->startDocument();
463 }
464
465
466 ////////////////////////////////////////////////////////////
467 // Intermediate level
468 ////////////////////////////////////////////////////////////
469
470 // create the different zones
createZones()471 bool MsWrd1Parser::createZones()
472 {
473 libmwaw::DebugStream f;
474 if (m_state->m_eot < 0x80) return false;
475
476 ascii().addPos(0x80);
477 ascii().addNote("TextContent");
478
479 ascii().addPos(m_state->m_eot);
480 ascii().addNote("_");
481
482 MWAWInputStreamPtr input = getInput();
483 for (int z = 5; z >= 0; z--) {
484 if (m_state->m_fileZonesLimit[z] == m_state->m_fileZonesLimit[z+1])
485 continue;
486 if (!input->checkPosition(m_state->m_fileZonesLimit[z+1]*0x80) ||
487 m_state->m_fileZonesLimit[z] > m_state->m_fileZonesLimit[z+1]) {
488 f.str("");
489 f << "Entries(Zone" << z << "):###";
490 MWAW_DEBUG_MSG(("MsWrd1Parser::createZones: zone %d is too long\n",z));
491 ascii().addPos(m_state->m_fileZonesLimit[z]*0x80);
492 ascii().addNote(f.str().c_str());
493 break;
494 }
495 MWAWVec2i limit(m_state->m_fileZonesLimit[z],m_state->m_fileZonesLimit[z+1]);
496 bool done = false;
497 switch (z) {
498 case 0:
499 case 1:
500 done = readPLC(limit,z);
501 break;
502 case 2:
503 done = readFootnoteCorrespondance(limit);
504 break;
505 case 3:
506 done = readDocInfo(limit);
507 break;
508 case 4:
509 done = readZones(limit);
510 break;
511 case 5:
512 done = readPageBreak(limit);
513 break;
514 default:
515 break;
516 }
517 if (done) continue;
518 for (int p = m_state->m_fileZonesLimit[z], i=0; p < m_state->m_fileZonesLimit[z+1]; p++, i++) {
519 f.str("");
520 f << "Entries(Zone" << z << ")[" << i << "]:";
521 ascii().addPos(p*0x80);
522 ascii().addNote(f.str().c_str());
523 }
524 ascii().addPos(m_state->m_fileZonesLimit[z+1]*0x80);
525 ascii().addNote("_");
526 }
527 prepareTextZones();
528 return true;
529 }
530
531 // try to read retrieve the header/footer zones ...
prepareTextZones()532 bool MsWrd1Parser::prepareTextZones()
533 {
534 m_state->m_numPages = 1;
535 m_state->m_textZonesList.resize(0);
536 m_state->m_mainTextZonesList.resize(0);
537 m_state->m_headersId.resize(0);
538 m_state->m_footersId.resize(0);
539 long endMain = m_state->m_eot;
540 for (auto const &footnote : m_state->m_footnotesList) {
541 long pos = footnote[0];
542 if (pos >= 0x80 && pos < endMain)
543 endMain = pos;
544 }
545 if (endMain < 0x80) {
546 MWAW_DEBUG_MSG(("MsWrd1Parser::sendText: oops problem computing the limit of the main section"));
547 m_state->m_textZonesList.push_back(MWAWVec2l(0x80, m_state->m_eot));
548 m_state->m_mainTextZonesList.push_back(0);
549 return false;
550 }
551
552 auto plcIt = m_state->m_plcMap.begin();
553 long pos = 0x80, prevMainPos=pos;
554 int actPage = 1;
555 int actType = 0;
556 MWAWVec2i headerId(-1,-1), footerId(-1,-1);
557 int firstHeaderId=-1, firstFooterId=-1;
558 while (pos < endMain) {
559 int newType = 0;
560 if (plcIt == m_state->m_plcMap.end() || plcIt->first>=endMain) {
561 pos = endMain;
562 newType = -1;
563 }
564 else {
565 pos = plcIt->first;
566 MsWrd1ParserInternal::PLC const &plc = plcIt++->second;
567 if (plc.m_type==MsWrd1ParserInternal::PAGE && pos!=0x80) {
568 if (actPage> int(m_state->m_headersId.size())) {
569 m_state->m_headersId.resize(size_t(actPage),-1);
570 m_state->m_headersId[size_t(actPage)-1] = headerId[(actPage%2)];
571 }
572 if (actPage> int(m_state->m_footersId.size())) {
573 m_state->m_footersId.resize(size_t(actPage),-1);
574 m_state->m_footersId[size_t(actPage)-1] = footerId[(actPage%2)];
575 }
576 actPage++;
577 }
578 if (plc.m_type!=MsWrd1ParserInternal::RULER) continue;
579 if (plc.m_id >= 0 && plc.m_id < int(m_state->m_paragraphsList.size()))
580 newType = (m_state->m_paragraphsList[size_t(plc.m_id)].m_type2>>4);
581 if (newType == actType)
582 continue;
583 }
584 if (pos==prevMainPos) {
585 actType = newType;
586 continue;
587 }
588
589 auto id = int(m_state->m_textZonesList.size());
590 m_state->m_textZonesList.push_back(MWAWVec2l(prevMainPos, pos));
591 prevMainPos=pos;
592 if (actType==0) {
593 m_state->m_mainTextZonesList.push_back(id);
594 actType = newType;
595 continue;
596 }
597 if (actType&1) {
598 if (actType&2) footerId[1]=id;
599 if (actType&4) footerId[0]=id;
600 if (actType&8) firstFooterId=id;
601 m_state->m_footersId.resize(size_t(actPage),-1);
602 m_state->m_footersId[size_t(actPage)-1] =
603 (actPage==1 && firstFooterId >= 0) ? firstFooterId :
604 (actPage%2) ? footerId[1] : footerId[0];
605 }
606 else {
607 if (actType&2) headerId[0]=id;
608 if (actType&4) headerId[1]=id;
609 if (actType&8) firstHeaderId=id;
610 m_state->m_headersId.resize(size_t(actPage),-1);
611 m_state->m_headersId[size_t(actPage)-1] =
612 (actPage==1 && firstHeaderId >= 0) ? firstHeaderId :
613 (actPage%2) ? headerId[1] : headerId[0];
614 }
615 actType = newType;
616 }
617 if (actPage> int(m_state->m_headersId.size())) {
618 m_state->m_headersId.resize(size_t(actPage),-1);
619 m_state->m_headersId[size_t(actPage)-1] = headerId[(actPage%2)];
620 }
621 if (actPage> int(m_state->m_footersId.size())) {
622 m_state->m_footersId.resize(size_t(actPage),-1);
623 m_state->m_footersId[size_t(actPage)-1] = footerId[(actPage%2)];
624 }
625 m_state->m_numPages = actPage;
626 return true;
627 }
628
629 ////////////////////////////////////////////////////////////
630 // try to read the different zones
631 ////////////////////////////////////////////////////////////
632
633 // read the character property
readFont(long fPos,MsWrd1ParserInternal::Font & font)634 bool MsWrd1Parser::readFont(long fPos, MsWrd1ParserInternal::Font &font)
635 {
636 font = MsWrd1ParserInternal::Font();
637 libmwaw::DebugStream f;
638 MWAWInputStreamPtr input = getInput();
639 input->seek(fPos, librevenge::RVNG_SEEK_SET);
640 auto sz = static_cast<int>(input->readLong(1));
641 if (sz < 1 || sz > 0x7f || !input->checkPosition(fPos+1+sz)) {
642 MWAW_DEBUG_MSG(("MsWrd1Parser::readFont: the zone size seems bad\n"));
643 return false;
644 }
645 font.m_type = static_cast<int>(input->readULong(1));
646 int val;
647 uint32_t flags=0;
648 if (sz >= 2) {
649 val = static_cast<int>(input->readULong(1));
650 if (val & 0x80) flags |= MWAWFont::boldBit;
651 if (val & 0x40) flags |= MWAWFont::italicBit;
652 if (val & 0x3f)
653 font.m_font.setId((val & 0x3f));
654 }
655 if (sz >= 3) {
656 val = static_cast<int>(input->readULong(1));
657 if (val) font.m_font.setSize(float(val)/2.f);
658 }
659 if (sz >= 4) {
660 val = static_cast<int>(input->readULong(1));
661 if (val & 0x80) font.m_font.setUnderlineStyle(MWAWFont::Line::Simple);
662 switch ((val&0xc)>>2) {
663 case 0:
664 break;
665 case 3:
666 flags |= MWAWFont::uppercaseBit;
667 break;
668 default:
669 f << "#capBits=" << int((val&0xc)>>2) << ",";
670 }
671 // find also &2 for footnote
672 if (val & 0x73)
673 f << "#flags1=" << std::hex << (val & 0x73) << std::dec << ",";
674 }
675 if (sz >= 5) {
676 val = static_cast<int>(input->readULong(1));
677 if (val & 0x10) flags |= MWAWFont::embossBit;
678 if (val & 0x8) flags |= MWAWFont::shadowBit;
679 if (val & 0xe7)
680 f << "#flags2=" << std::hex << (val & 0xe7) << std::dec << ",";
681 }
682 if (sz >= 6) { // vdepl
683 val = static_cast<int>(input->readLong(1));
684 if (val > 0) font.m_font.set(MWAWFont::Script::super100());
685 else if (val < 0) font.m_font.set(MWAWFont::Script::sub100());
686 }
687 if (sz >= 7) {
688 f << "###";
689 ascii().addDelimiter(input->tell(),'|');
690 }
691 font.m_font.setFlags(flags);
692 font.m_extras = f.str();
693
694 return true;
695 }
696
697 /* read the paragraph property */
readParagraph(long fPos,MsWrd1ParserInternal::Paragraph & para)698 bool MsWrd1Parser::readParagraph(long fPos, MsWrd1ParserInternal::Paragraph ¶)
699 {
700 para = MsWrd1ParserInternal::Paragraph();
701 libmwaw::DebugStream f;
702 MWAWInputStreamPtr input = getInput();
703 input->seek(fPos, librevenge::RVNG_SEEK_SET);
704 auto sz = static_cast<int>(input->readLong(1));
705 if (sz < 1 || sz > 0x7f || !input->checkPosition(fPos+1+sz)) {
706 MWAW_DEBUG_MSG(("MsWrd1Parser::readParagraph: the zone size seems bad\n"));
707 return false;
708 }
709 para.m_type = static_cast<int>(input->readULong(1));
710 int val;
711 if (sz >= 2) {
712 val = static_cast<int>(input->readULong(1));
713 switch (val>>6) {
714 case 0:
715 break; // left
716 case 1:
717 para.m_justify = MWAWParagraph::JustificationCenter;
718 break;
719 case 2:
720 para.m_justify = MWAWParagraph::JustificationRight;
721 break;
722 case 3:
723 para.m_justify = MWAWParagraph::JustificationFull;
724 break;
725 default:
726 break;
727 }
728 if (val & 0x10) f << "dontbreak[para],";
729 if (val & 0x20) f << "dontbreak[line],";
730 if (val & 0xf)
731 f << "#justify=" << std::hex << (val & 0xf) << std::dec << ",";
732 }
733 if (sz >= 4) { // find always 0 here
734 val = static_cast<int>(input->readLong(2));
735 if (val) f << "#f0=" << val << ",";
736 }
737 if (sz >= 6) {
738 val = static_cast<int>(input->readLong(2));
739 if (val)
740 para.m_margins[2] = double(val)/1440.0;
741 }
742 if (sz >= 8) {
743 val = static_cast<int>(input->readLong(2));
744 if (val)
745 para.m_margins[0] = double(val)/1440.0;
746 }
747 if (sz >= 10) {
748 val = static_cast<int>(input->readLong(2));
749 if (val && !para.m_margins[0].isSet())
750 para.m_margins[1] = double(val)/1440.0;
751 else if (val)
752 para.m_margins[1] = para.m_margins[0].get()+double(val)/1440.0;
753 }
754 if (sz >= 12) {
755 val = static_cast<int>(input->readLong(2));
756 if (val)
757 para.setInterline(double(val)/1440.0, librevenge::RVNG_INCH);
758 }
759 if (sz >= 14) {
760 val = static_cast<int>(input->readLong(2));
761 if (val)
762 para.m_spacings[1] = double(val)/1440.0;
763 }
764 if (sz >= 16) {
765 val = static_cast<int>(input->readLong(2));
766 if (val)
767 para.m_spacings[2] = double(val)/1440.0;
768 }
769 if (sz >= 17)
770 para.m_type2 = static_cast<int>(input->readULong(1));
771 // checkme: not sure what is the exact decomposition of the following
772 if (sz >= 22) { // find always 0 here
773 for (int i = 0; i < 5; i++) {
774 val = static_cast<int>(input->readLong(1));
775 if (val) f << "#f" << i+1 << "=" << val << ",";
776 }
777 }
778 if (sz >= 26) {
779 int numTabs = (sz-26)/4;
780 for (int i = 0; i < numTabs; i++) {
781 MWAWTabStop newTab;
782 newTab.m_position = double(input->readLong(2))/1440.;
783 auto flags = static_cast<int>(input->readULong(1));
784 switch ((flags>>5)&3) {
785 case 0:
786 break;
787 case 1:
788 newTab.m_alignment = MWAWTabStop::CENTER;
789 break;
790 case 2:
791 newTab.m_alignment = MWAWTabStop::RIGHT;
792 break;
793 case 3:
794 newTab.m_alignment = MWAWTabStop::DECIMAL;
795 break;
796 default:
797 break;
798 }
799 switch ((flags>>2)&3) {
800 case 0:
801 break;
802 case 1:
803 newTab.m_leaderCharacter = '.';
804 break;
805 case 2:
806 newTab.m_leaderCharacter = '-';
807 break;
808 case 3:
809 newTab.m_leaderCharacter = '_';
810 break;
811 default:
812 break;
813 }
814 if (flags & 0x93)
815 f << "#tabs" << i << "[fl1=" << std::hex << (flags & 0x93) << std::dec << ",";
816 val = static_cast<int>(input->readULong(1));
817 if (val)
818 f << "#tabs" << i << "[fl2=" << std::hex << val << std::dec << ",";
819 para.m_tabs->push_back(newTab);
820 }
821 }
822 if (input->tell() != fPos+1+sz)
823 ascii().addDelimiter(input->tell(), '|');
824 para.m_extra = f.str();
825 return true;
826 }
827
828 /* read the page break separation */
readPageBreak(MWAWVec2i limits)829 bool MsWrd1Parser::readPageBreak(MWAWVec2i limits)
830 {
831 MWAWInputStreamPtr input = getInput();
832 if (limits[1] <= limits[0] || !input->checkPosition(limits[1]*0x80)) {
833 MWAW_DEBUG_MSG(("MsWrd1Parser::readPageBreak: the zone is not well defined\n"));
834 return false;
835 }
836 libmwaw::DebugStream f;
837 long pos = limits[0]*0x80;
838 input->seek(pos, librevenge::RVNG_SEEK_SET);
839 f << "Entries(PageBreak):";
840 auto N = static_cast<int>(input->readULong(2));
841 f << "N=" << N << ",";
842 if (N==0 || 4+6*N > (limits[1]-limits[0])*0x80) {
843 MWAW_DEBUG_MSG(("MsWrd1Parser::readPageBreak: the number of element seems odds\n"));
844 f << "###";
845 ascii().addPos(pos);
846 ascii().addNote(f.str().c_str());
847 return false;
848 }
849 long val = static_cast<int>(input->readULong(2)); // 1|a
850 f << "unkn=" << val << ",";
851 MsWrd1ParserInternal::PLC plc(MsWrd1ParserInternal::PAGE);
852 for (int i = 0; i < N; i++) {
853 auto pg = static_cast<int>(input->readULong(2));
854 long textPos = long(input->readULong(4))+0x80;
855 f << "Page" << i << "=" << std::hex << textPos << std::dec;
856 if (pg != i+1) f << "[page=" << pg << "]";
857 if (textPos < m_state->m_eot) {
858 plc.m_id = pg;
859 m_state->m_plcMap.insert
860 (std::multimap<long,MsWrd1ParserInternal::PLC>::value_type(textPos, plc));
861 }
862 else if (i != N-1)
863 f << "###";
864 f << ",";
865 }
866 if (input->tell() != limits[1]*0x80)
867 ascii().addDelimiter(input->tell(),'|');
868 ascii().addPos(pos);
869 ascii().addNote(f.str().c_str());
870 return true;
871 }
872
873 /* read the footnote zone */
readFootnoteCorrespondance(MWAWVec2i limits)874 bool MsWrd1Parser::readFootnoteCorrespondance(MWAWVec2i limits)
875 {
876 MWAWInputStreamPtr input = getInput();
877 if (limits[1] <= limits[0] || !input->checkPosition(limits[1]*0x80)) {
878 MWAW_DEBUG_MSG(("MsWrd1Parser::readFootnoteCorrespondance: the zone is not well defined\n"));
879 return false;
880 }
881 libmwaw::DebugStream f;
882
883 long textEnd = m_state->m_eot;
884 MsWrd1ParserInternal::PLC plc(MsWrd1ParserInternal::FOOTNOTE);
885 long pos = limits[0]*0x80;
886 input->seek(pos, librevenge::RVNG_SEEK_SET);
887 f << "Entries(Footnote):";
888 auto N = static_cast<int>(input->readULong(2));
889 auto N1 = static_cast<int>(input->readULong(2));
890 f << "N=" << N << ",";
891 if (N!=N1) f << "N1=" << N1 << ",";
892 if (N!=N1 || N==0 || 4+8*N > (limits[1]-limits[0])*0x80) {
893 MWAW_DEBUG_MSG(("MsWrd1Parser::readFootnoteCorrespondance: the number of element seems odds\n"));
894 f << "###";
895 ascii().addPos(pos);
896 ascii().addNote(f.str().c_str());
897 return false;
898 }
899 std::map<long, int> footnoteMap;
900 for (int i = 0; i < N; i++) {
901 long textPos = long(input->readULong(4))+0x80;
902 long notePos = long(input->readULong(4))+0x80;
903 bool ok = textPos <= textEnd && notePos <= textEnd;
904 f << "Fn" << i << ":" << std::hex << textPos << "<->" << notePos << std::dec << ",";
905 if (!ok) {
906 if (i==N-1) break;
907 f << "###";
908 continue;
909 }
910 plc.m_id = int(footnoteMap.size());
911 footnoteMap[notePos]=plc.m_id;
912 m_state->m_plcMap.insert
913 (std::multimap<long,MsWrd1ParserInternal::PLC>::value_type(textPos, plc));
914 m_state->m_plcMap.insert
915 (std::multimap<long,MsWrd1ParserInternal::PLC>::value_type(notePos, plc));
916 }
917 m_state->m_footnotesList.resize(footnoteMap.size(),MWAWVec2l(0,0));
918 for (auto fIt=footnoteMap.begin(); fIt!=footnoteMap.end();) {
919 MWAWVec2l fPos;
920 fPos[0] = fIt->first;
921 int id = fIt++->second;
922 fPos[1] = fIt==footnoteMap.end() ? m_state->m_eot : fIt->first;
923 if (id >= int(m_state->m_footnotesList.size()))
924 m_state->m_footnotesList.resize(size_t(id)+1,MWAWVec2l(0,0));
925 m_state->m_footnotesList[size_t(id)]=fPos;
926 }
927 ascii().addDelimiter(input->tell(),'|');
928 ascii().addPos(pos);
929 ascii().addNote(f.str().c_str());
930
931 return true;
932 }
933
934 /* read the zone4: a list of main zone ( headers, footers ) ? */
readZones(MWAWVec2i limits)935 bool MsWrd1Parser::readZones(MWAWVec2i limits)
936 {
937 MWAWInputStreamPtr input = getInput();
938 if (limits[1] <= limits[0] || !input->checkPosition(limits[1]*0x80)) {
939 MWAW_DEBUG_MSG(("MsWrd1Parser::readZones: the zone is not well defined\n"));
940 return false;
941 }
942 libmwaw::DebugStream f;
943
944 MsWrd1ParserInternal::PLC plc(MsWrd1ParserInternal::ZONE);
945 long pos = limits[0]*0x80;
946 input->seek(pos, librevenge::RVNG_SEEK_SET);
947 f << "Entries(Zones):";
948 auto N = static_cast<int>(input->readULong(2));
949 auto N1 = static_cast<int>(input->readULong(2));
950 f << "N=" << N << ",";
951 if (N!=N1) f << "N1=" << N1 << ",";
952 if (N!=N1 || N==0 || 4+10*N > (limits[1]-limits[0])*0x80) {
953 MWAW_DEBUG_MSG(("MsWrd1Parser::readZones: the number of element seems odds\n"));
954 f << "###";
955 ascii().addPos(pos);
956 ascii().addNote(f.str().c_str());
957 return false;
958 }
959 for (int i = 0; i < N; i++) {
960 long textPos = long(input->readULong(4))+0x80;
961 f << std::hex << textPos << std::dec;
962 f << ":f0=" << input->readLong(2); // find 1|2|3
963 auto val = static_cast<int>(input->readLong(4)); // find -1, 0x900, 0xa00
964 if (val!=-1) f << ":f1=" << std::hex << val << std::dec;
965 if (textPos < m_state->m_eot) {
966 plc.m_id = i;
967 m_state->m_plcMap.insert
968 (std::multimap<long,MsWrd1ParserInternal::PLC>::value_type(textPos, plc));
969 }
970 else if (textPos != m_state->m_eot && i != N-1)
971 f << "###";
972 f << ",";
973 }
974 ascii().addDelimiter(input->tell(),'|');
975 ascii().addPos(pos);
976 ascii().addNote(f.str().c_str());
977
978 return true;
979 }
980
981 /* read the document information */
readDocInfo(MWAWVec2i limits)982 bool MsWrd1Parser::readDocInfo(MWAWVec2i limits)
983 {
984 MWAWInputStreamPtr input = getInput();
985 if (limits[1] != limits[0]+1 || !input->checkPosition(limits[1]*0x80)) {
986 MWAW_DEBUG_MSG(("MsWrd1Parser::readDocInfo: the zone is not well defined\n"));
987 return false;
988 }
989
990 libmwaw::DebugStream f;
991 long pos = limits[0]*0x80;
992 input->seek(pos, librevenge::RVNG_SEEK_SET);
993 f << "Entries(DocInfo):";
994 int val;
995 for (int i=0; i < 2; i++) { // find 66|0
996 val = static_cast<int>(input->readULong(1));
997 if (val)
998 f << "f" << i << "=" << std::hex << val << std::dec << ",";
999 }
1000 auto flags = static_cast<int>(input->readULong(1));
1001 switch (flags>>5) {
1002 case 0:
1003 f << "division=no,";
1004 break;
1005 case 1:
1006 f << "division=columns,";
1007 break;
1008 case 2:
1009 f << "division=page,";
1010 break; // default
1011 case 3:
1012 f << "division=evenpage,";
1013 break;
1014 case 4:
1015 f << "division=oddpage,";
1016 break;
1017 default:
1018 f << "#division=" << (flags>>5) << ",";
1019 break;
1020 }
1021 switch ((flags>>2)&7) {
1022 case 0: // default (numeric)
1023 break;
1024 case 1:
1025 f << "numbering=roman[upper],";
1026 break;
1027 case 2:
1028 f << "numbering=roman[lower],";
1029 break;
1030 case 3:
1031 f << "numbering=alpha[upper],";
1032 break;
1033 case 4:
1034 f << "numbering=alpha[lower],";
1035 break;
1036 default:
1037 f << "#numbering[type]=" << ((flags>>2)&7) << ",";
1038 break;
1039 }
1040 if (flags&3) f << "flags=" << (flags&3) << ",";
1041
1042 float pageDim[2];
1043 for (auto &d : pageDim) d = float(input->readULong(2))/1440.f;
1044 f << "dim=[" << pageDim[1] << "x" << pageDim[0] << "],";
1045 val = static_cast<int>(input->readLong(2));
1046 if (val != -1) f << "firstPage=" << val << ",";
1047 // check me
1048 float pagePos[2][2]; // [Y|X][header|size]
1049 char const *wh[] = {"TopMargin", "Y[page]", "LeftMargin", "X[page]" };
1050 for (int i = 0; i < 2; i++) {
1051 for (int j = 0; j < 2; j++) {
1052 pagePos[i][j] = float(input->readULong(2))/1440.f;
1053 f << wh[i*2+j] << "=" << pagePos[i][j] << ",";
1054 }
1055 }
1056 flags = static_cast<int>(input->readULong(1));
1057 bool endNote = false;
1058 if (flags&1) {
1059 f << "endnote,";
1060 endNote = true;
1061 }
1062 if (flags&2)
1063 f << "autonumbering,";
1064 if (flags&0xFC)
1065 f << "flags2=" << std::hex << (flags&0xFC) << std::dec << ",";
1066 ascii().addPos(pos);
1067 ascii().addNote(f.str().c_str());
1068
1069 pos = input->tell();
1070 f.str("");
1071 f << "DocInfo(II):";
1072 auto numCols = static_cast<int>(input->readULong(1));
1073 if (numCols != 1) {
1074 f << "nCols=" << numCols << ",";
1075 if (numCols < 1 || numCols > 6) {
1076 f << "###";
1077 numCols = 1;
1078 }
1079 }
1080 float hfLength[2];
1081 for (auto &hf : hfLength) hf = float(input->readULong(2))/1440.f;
1082 hfLength[1]=pageDim[0]-hfLength[1];
1083
1084 f << "headerLength=" << hfLength[0] << ",";
1085 f << "footerLength=" << hfLength[1] << ",";
1086 float colSep = float(input->readULong(2))/1440.f;
1087 f << "colSep=" << colSep << ",";
1088 val = static_cast<int>(input->readLong(2));
1089 if (val)
1090 f << "f3=" << val << ",";
1091 f << "distToHeader=" << float(input->readULong(2))/1440.f << ",";
1092 f << "distToNote=" << float(input->readULong(2))/1440.f << ",";
1093 // probably follows by other distance
1094
1095 if (pageDim[0] > 0 && pageDim[1] > 0 &&
1096 pagePos[0][0]>=0 && pagePos[0][1]>=0 && pageDim[0] >= pagePos[0][0]+pagePos[0][1] &&
1097 pagePos[1][0]>=0 && pagePos[1][1]>=0 && pageDim[1] >= pagePos[1][0]+pagePos[1][1] &&
1098 pageDim[1] >= float(numCols)*pagePos[1][1]) {
1099 getPageSpan().setMarginTop(double(pagePos[0][0]));
1100 getPageSpan().setMarginLeft(double(pagePos[1][0]));
1101 getPageSpan().setFormLength(double(pageDim[0]));
1102 getPageSpan().setFormWidth(double(pageDim[1]));
1103 m_state->m_endNote = endNote;
1104 m_state->m_numColumns = numCols;
1105 m_state->m_columnsSep = colSep;
1106 }
1107 else {
1108 MWAW_DEBUG_MSG(("MsWrd1Parser::readDocInfo: some dimension do not look good\n"));
1109 }
1110 ascii().addDelimiter(input->tell(),'|');
1111 ascii().addPos(pos);
1112 ascii().addNote(f.str().c_str());
1113 ascii().addPos(pos+53);
1114 ascii().addNote("DocInfo(III)");
1115 return true;
1116 }
1117
1118 // read a plc zone (char or paragraph properties )
readPLC(MWAWVec2i limits,int wh)1119 bool MsWrd1Parser::readPLC(MWAWVec2i limits, int wh)
1120 {
1121 MWAWInputStreamPtr input = getInput();
1122 if (limits[1] <= limits[0] || !input->checkPosition(limits[1]*0x80)) {
1123 MWAW_DEBUG_MSG(("MsWrd1Parser::readPLC: the zone is not well defined\n"));
1124 return false;
1125 }
1126 libmwaw::DebugStream f, f2;
1127
1128 std::map<long, int> posIdMap;
1129 MsWrd1ParserInternal::PLC plc(wh==0 ? MsWrd1ParserInternal::FONT :
1130 MsWrd1ParserInternal::RULER);
1131 char const *what = wh==0 ? "Char" : "Para";
1132
1133 for (int z = limits[0], n=0; z < limits[1]; z++, n++) {
1134 f.str("");
1135 f << "Entries(" << what << ")[" << n << "]:";
1136 long pos = z*0x80;
1137 input->seek(pos+0x7f, librevenge::RVNG_SEEK_SET);
1138 auto N = static_cast<int>(input->readULong(1));
1139 f << "N=" << N << ",";
1140 if (4+N*6 > 0x7f) {
1141 f << "###";
1142 MWAW_DEBUG_MSG(("MsWrd1Parser::readPLC: the number of element seems to big\n"));
1143 ascii().addDelimiter(input->tell(),'|');
1144 ascii().addPos(pos);
1145 ascii().addNote(f.str().c_str());
1146 continue;
1147 }
1148 input->seek(pos, librevenge::RVNG_SEEK_SET);
1149 auto fPos = long(input->readULong(4));
1150
1151 for (int i = 0; i < N; i++) {
1152 f << "fPos=" << std::hex << fPos;
1153 auto newPos = long(input->readULong(4));
1154 f << "->" << newPos << std::dec;
1155
1156 auto depl = static_cast<int>(input->readLong(2));
1157 if (depl == -1)
1158 plc.m_id = -1;
1159 else if (depl < N*6 || 4+depl >= 0x7f) {
1160 f << "[###pos=" << std::hex << depl << std::dec << "]";
1161 plc.m_id = -1;
1162 }
1163 else {
1164 long dataPos = pos+depl+4;
1165 long actPos = input->tell();
1166 if (posIdMap.find(dataPos) == posIdMap.end()) {
1167 f2.str("");
1168 f2 << what << "-";
1169 if (wh == 0) {
1170 MsWrd1ParserInternal::Font font;
1171 if (readFont(dataPos, font)) {
1172 plc.m_id=int(m_state->m_fontsList.size());
1173 m_state->m_fontsList.push_back(font);
1174 f2 << plc.m_id << ":";
1175 #ifdef DEBUG
1176 f2 << font.m_font.getDebugString(getFontConverter()) << font;
1177 #endif
1178 }
1179 else {
1180 plc.m_id = -1;
1181 f2 << "###";
1182 }
1183 ascii().addPos(dataPos);
1184 ascii().addNote(f2.str().c_str());
1185 }
1186 else {
1187 MsWrd1ParserInternal::Paragraph para;
1188 if (readParagraph(dataPos, para)) {
1189 plc.m_id=int(m_state->m_paragraphsList.size());
1190 m_state->m_paragraphsList.push_back(para);
1191 f2 << plc.m_id << ":" << para;
1192 }
1193 else {
1194 plc.m_id = -1;
1195 f2 << "###";
1196 }
1197 ascii().addPos(dataPos);
1198 ascii().addNote(f2.str().c_str());
1199 }
1200 posIdMap[dataPos] = plc.m_id;
1201 }
1202 else
1203 plc.m_id = posIdMap.find(dataPos)->second;
1204 input->seek(actPos, librevenge::RVNG_SEEK_SET);
1205 }
1206 m_state->m_plcMap.insert
1207 (std::multimap<long,MsWrd1ParserInternal::PLC>::value_type(fPos, plc));
1208 fPos = newPos;
1209 f << ":" << plc << ",";
1210 }
1211 ascii().addDelimiter(input->tell(),'|');
1212 ascii().addPos(pos);
1213 ascii().addNote(f.str().c_str());
1214 }
1215
1216 return true;
1217 }
1218
1219 ////////////////////////////////////////////////////////////
1220 // try to read a text entry
1221 ////////////////////////////////////////////////////////////
sendText(MWAWEntry const & textEntry,bool isMain)1222 bool MsWrd1Parser::sendText(MWAWEntry const &textEntry, bool isMain)
1223 {
1224 if (!textEntry.valid()) return false;
1225 if (!getTextListener()) {
1226 MWAW_DEBUG_MSG(("MsWrd1Parser::sendText: can not find a listener!"));
1227 return true;
1228 }
1229 if (isMain) {
1230 int numCols = m_state->m_numColumns;
1231 if (numCols > 1 && !getTextListener()->isSectionOpened()) {
1232 MWAWSection sec;
1233 sec.setColumns(numCols, getPageWidth()/double(numCols), librevenge::RVNG_INCH, double(m_state->m_columnsSep));
1234 getTextListener()->openSection(sec);
1235 }
1236 }
1237 long pos = textEntry.begin();
1238 MWAWInputStreamPtr input = getInput();
1239
1240 input->seek(pos, librevenge::RVNG_SEEK_SET);
1241 libmwaw::DebugStream f;
1242 f << "TextContent:";
1243 int actFId=-1, actRId = -1, actPage=0;
1244 auto plcIt = m_state->m_plcMap.begin();
1245 while (plcIt != m_state->m_plcMap.end() && plcIt->first < pos) {
1246 MsWrd1ParserInternal::PLC const &plc = plcIt++->second;
1247 if (plc.m_type == MsWrd1ParserInternal::FONT)
1248 actFId = plc.m_id;
1249 else if (plc.m_type == MsWrd1ParserInternal::RULER)
1250 actRId = plc.m_id;
1251 else if (plc.m_type == MsWrd1ParserInternal::PAGE)
1252 actPage++;
1253 }
1254 // new page can be in header, ..., so sometimes we must force a new page...
1255 if (isMain && actPage > m_state->m_actPage)
1256 newPage(actPage);
1257 MsWrd1ParserInternal::Font actFont, defFont;
1258 defFont.m_font = MWAWFont(3,12);
1259 if (actFId>=0 && actFId < int(m_state->m_fontsList.size()))
1260 actFont = m_state->m_fontsList[size_t(actFId)];
1261 else
1262 actFont = defFont;
1263 bool rulerNotSent = actRId != -1, fontNotSent = true;
1264 while (!input->isEnd() && input->tell() < textEntry.end()) {
1265 long actPos = input->tell();
1266 bool firstPlc = true;
1267 while (plcIt != m_state->m_plcMap.end() && plcIt->first <= actPos) {
1268 if (firstPlc) {
1269 ascii().addPos(pos);
1270 ascii().addNote(f.str().c_str());
1271 pos = actPos;
1272 f.str("");
1273 f << "TextContent:";
1274 firstPlc = false;
1275 }
1276
1277 auto const &plc = plcIt++->second;
1278 switch (plc.m_type) {
1279 case MsWrd1ParserInternal::FONT:
1280 if (plc.m_id >= 0 && plc.m_id < int(m_state->m_fontsList.size()))
1281 getTextListener()->setFont(m_state->m_fontsList[size_t(plc.m_id)].m_font);
1282 else
1283 getTextListener()->setFont(defFont.m_font);
1284 actFont.m_font = getTextListener()->getFont();
1285 fontNotSent = false;
1286 break;
1287 case MsWrd1ParserInternal::RULER:
1288 actRId = plc.m_id;
1289 rulerNotSent = true;
1290 break;
1291 case MsWrd1ParserInternal::PAGE:
1292 if (isMain) newPage(++actPage);
1293 break;
1294 case MsWrd1ParserInternal::FOOTNOTE: {
1295 if (!isMain) break;
1296 if (plc.m_id < 0 || plc.m_id >= int(m_state->m_footnotesList.size())) {
1297 MWAW_DEBUG_MSG(("MsWrd1Parser::sendText: oops, can not find a footnote!\n"));
1298 break;
1299 }
1300 MWAWEntry entry;
1301 entry.setBegin(m_state->m_footnotesList[size_t(plc.m_id)][0]);
1302 entry.setEnd(m_state->m_footnotesList[size_t(plc.m_id)][1]);
1303 removeLastCharIfEOL(entry);
1304 std::shared_ptr<MWAWSubDocument> subdoc
1305 (new MsWrd1ParserInternal::SubDocument(*this, getInput(), entry));
1306 getTextListener()->insertNote(MWAWNote(m_state->m_endNote ? MWAWNote::EndNote : MWAWNote::FootNote), subdoc);
1307 break;
1308 }
1309 case MsWrd1ParserInternal::ZONE:
1310 case MsWrd1ParserInternal::UNKNOWN:
1311 #if !defined(__clang__)
1312 default:
1313 #endif
1314 break;
1315 }
1316 f << "[" << plc << "]";
1317 }
1318 if (rulerNotSent) {
1319 if (actRId >= 0 && actRId < int(m_state->m_paragraphsList.size()))
1320 setProperty(m_state->m_paragraphsList[size_t(actRId)]);
1321 else
1322 setProperty(MsWrd1ParserInternal::Paragraph());
1323 rulerNotSent = false;
1324 }
1325 if (fontNotSent) getTextListener()->setFont(actFont.m_font);
1326 auto c = static_cast<unsigned char>(input->readULong(1));
1327 f << char(c);
1328 switch (c) {
1329 case 1:
1330 getTextListener()->insertUnicodeString(librevenge::RVNGString("(picture)"));
1331 break;
1332 case 5: // footnote mark
1333 case 0xc: // end of file
1334 break;
1335 case 0x9:
1336 getTextListener()->insertTab();
1337 break;
1338 case 0xd:
1339 getTextListener()->insertEOL();
1340 break;
1341 default:
1342 getTextListener()->insertCharacter(static_cast<unsigned char>(c), input, textEntry.end());
1343 break;
1344 }
1345 }
1346 ascii().addPos(pos);
1347 ascii().addNote(f.str().c_str());
1348 return true;
1349 }
1350
1351 // send the ruler properties
setProperty(MsWrd1ParserInternal::Paragraph const & para)1352 void MsWrd1Parser::setProperty(MsWrd1ParserInternal::Paragraph const ¶)
1353 {
1354 if (!getTextListener()) return;
1355 getTextListener()->setParagraph(para);
1356 }
1357
1358 ////////////////////////////////////////////////////////////
1359 // Low level
1360 ////////////////////////////////////////////////////////////
1361
1362 // read the header
checkHeader(MWAWHeader * header,bool strict)1363 bool MsWrd1Parser::checkHeader(MWAWHeader *header, bool strict)
1364 {
1365 *m_state = MsWrd1ParserInternal::State();
1366 MWAWInputStreamPtr input = getInput();
1367 if (!input || !input->hasDataFork())
1368 return false;
1369
1370 libmwaw::DebugStream f;
1371 if (!input->checkPosition(0x80)) {
1372 MWAW_DEBUG_MSG(("MsWrd1Parser::checkHeader: file is too short\n"));
1373 return false;
1374 }
1375 long pos = 0;
1376 input->seek(pos, librevenge::RVNG_SEEK_SET);
1377 auto val = static_cast<int>(input->readULong(2));
1378 switch (val) {
1379 case 0xfe32:
1380 switch (input->readULong(2)) {
1381 case 0x0:
1382 setVersion(1);
1383 break;
1384 default:
1385 return false;
1386 }
1387 break;
1388 default:
1389 return false;
1390 }
1391
1392 f << "FileHeader:";
1393 val = static_cast<int>(input->readULong(1)); // v1: ab other 0 ?
1394 if (val) f << "f0=" << val << ",";
1395 for (int i = 1; i < 3; i++) { // always 0
1396 val = static_cast<int>(input->readLong(2));
1397 if (val) f << "f" << i << "=" << val << ",";
1398 }
1399 for (int i = 0; i < 5; i++) { // always 0 ?
1400 val = static_cast<int>(input->readLong(1));
1401 if (val) f << "g" << i << "=" << val << ",";
1402 }
1403
1404 m_state->m_eot = long(input->readULong(4));
1405 f << "text=" << std::hex << 0x80 << "<->" << m_state->m_eot << ",";
1406 if (0x80 > m_state->m_eot || !input->checkPosition(m_state->m_eot)) {
1407 MWAW_DEBUG_MSG(("MsWrd1Parser::checkHeader: problem with text position must stop\n"));
1408 return false;
1409 }
1410
1411 m_state->m_fileZonesLimit[0] = int((m_state->m_eot+0x7f)/0x80);
1412 f << "zonesPos=[" << std::hex;
1413 for (int i = 0; i < 6; i++) {
1414 m_state->m_fileZonesLimit[i+1] = static_cast<int>(input->readLong(2));
1415 if (m_state->m_fileZonesLimit[i]==m_state->m_fileZonesLimit[i+1]) {
1416 f << "_,";
1417 continue;
1418 }
1419 if (m_state->m_fileZonesLimit[i]<m_state->m_fileZonesLimit[i+1]) {
1420 f << m_state->m_fileZonesLimit[i]*0x80 << "<->"
1421 << m_state->m_fileZonesLimit[i+1]*0x80 << ",";
1422 continue;
1423 }
1424 MWAW_DEBUG_MSG(("MsWrd1Parser::checkHeader: problem reading the zones positions\n"));
1425 if (strict) return false;
1426 f << "###" << m_state->m_fileZonesLimit[i+1]*0x80 << ",";
1427 m_state->m_fileZonesLimit[i+1] = m_state->m_fileZonesLimit[i];
1428 }
1429 f << std::dec << "],";
1430 ascii().addPos(pos);
1431 ascii().addNote(f.str().c_str());
1432 pos = input->tell();
1433 f.str("");
1434 f << "FileHeader[A]:";
1435 for (int i = 0; i < 17; i++) {
1436 val = static_cast<int>(input->readLong(2));
1437 if (val) f << "f" << i << "=" << val << ",";
1438 }
1439 ascii().addPos(pos);
1440 ascii().addNote(f.str().c_str());
1441
1442 long textSize[2];
1443 for (auto &tSize : textSize) tSize = input->readLong(4);
1444 if (textSize[0] != textSize[1] || 0x80+textSize[0] != m_state->m_eot) {
1445 MWAW_DEBUG_MSG(("MsWrd1Parser::checkHeader: problem with text position length\n"));
1446 if (strict) return false;
1447 f << "##textSize=" << std::hex << textSize[0] << ":" << textSize[1] << std::dec << ",";
1448 if (textSize[1] > textSize[0]) textSize[0] = textSize[1];
1449 if (0x80+textSize[0] > m_state->m_eot && input->checkPosition(0x80+textSize[0]))
1450 m_state->m_eot = 0x80+textSize[0];
1451 }
1452 pos=input->tell();
1453 f.str("");
1454 f << "FileHeader[B]:";
1455 for (int i = 0; i < 28; i++) { // always 0
1456 val = static_cast<int>(input->readLong(2));
1457 if (val) f << "f" << i << "=" << val << ",";
1458 }
1459 ascii().addPos(pos);
1460 ascii().addNote(f.str().c_str());
1461 if (header)
1462 header->reset(MWAWDocument::MWAW_T_MICROSOFTWORD, 1);
1463 return true;
1464 }
1465
1466 // vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab:
1467