1 /* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */
2 
3 /* libmwaw
4 * Version: MPL 2.0 / LGPLv2+
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 2.0 (the "License"); you may not use this file except in compliance with
8 * the License or as specified alternatively below. You may obtain a copy of
9 * the License at http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Major Contributor(s):
17 * Copyright (C) 2002 William Lachance (wrlach@gmail.com)
18 * Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
19 * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
20 * Copyright (C) 2006, 2007 Andrew Ziem
21 * Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
22 *
23 *
24 * All Rights Reserved.
25 *
26 * For minor contributions see the git repository.
27 *
28 * Alternatively, the contents of this file may be used under the terms of
29 * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
30 * in which case the provisions of the LGPLv2+ are applicable
31 * instead of those above.
32 */
33 
34 #include <iomanip>
35 #include <iostream>
36 #include <limits>
37 #include <sstream>
38 
39 #include <librevenge/librevenge.h>
40 
41 #include "MWAWTextListener.hxx"
42 #include "MWAWFont.hxx"
43 #include "MWAWFontConverter.hxx"
44 #include "MWAWHeader.hxx"
45 #include "MWAWParagraph.hxx"
46 #include "MWAWPictMac.hxx"
47 #include "MWAWPosition.hxx"
48 #include "MWAWPrinter.hxx"
49 #include "MWAWSection.hxx"
50 #include "MWAWSubDocument.hxx"
51 
52 #include "MsWrd1Parser.hxx"
53 
54 /** Internal: the structures of a MsWrd1Parser */
55 namespace MsWrd1ParserInternal
56 {
57 /** different types
58  *
59  * - FONT: font
60  * - RULER: ruler
61  * - PAGE: page break
62  * - FOOTNOTE: footnote marker
63  * - ZONE: unknown(zone4)
64  */
65 enum PLCType { FONT=0, RULER, FOOTNOTE, PAGE, ZONE, UNKNOWN};
66 
67 /** Internal: class to store the PLC: Pointer List Content ? */
68 struct PLC {
69   //! constructor
PLCMsWrd1ParserInternal::PLC70   explicit PLC(PLCType type=UNKNOWN)
71     : m_type(type)
72     , m_id(-1)
73     , m_extras("")
74   {
75   }
76   //! operator<<
77   friend std::ostream &operator<<(std::ostream &o, PLC const &plc);
78   //! the type
79   PLCType m_type;
80   //! the id
81   int m_id;
82   //! a string used to store the parsing extrass
83   std::string m_extras;
84 };
85 
operator <<(std::ostream & o,PLC const & plc)86 std::ostream &operator<<(std::ostream &o, PLC const &plc)
87 {
88   switch (plc.m_type) {
89   case FONT:
90     o << "F";
91     break;
92   case RULER:
93     o << "P";
94     break;
95   case FOOTNOTE:
96     o << "Fn";
97     break;
98   case PAGE:
99     o << "Page";
100     break;
101   case ZONE:
102     o << "Z";
103     break;
104   case UNKNOWN:
105 #if !defined(__clang__)
106   default:
107 #endif
108     o << "#type" << int(plc.m_type);
109     break;
110   }
111   if (plc.m_id != -1) o << plc.m_id;
112   else o << "_";
113   if (!plc.m_extras.empty()) o << ":" << plc.m_extras;
114   return o;
115 }
116 
117 ////////////////////////////////////////
118 //! Internal: the font of a MsWrd1Parser
119 struct Font {
120   //! constructor
FontMsWrd1ParserInternal::Font121   Font()
122     : m_font()
123     , m_type(0)
124     , m_extras("")
125   {
126   }
127   //! operator<<
128   friend std::ostream &operator<<(std::ostream &o, Font const &ft);
129   //! the basic font property
130   MWAWFont m_font;
131   //! a unknown int, maybe 0x80 means defined font
132   int m_type;
133   //! a string used to store the parsing extrass
134   std::string m_extras;
135 };
136 
operator <<(std::ostream & o,Font const & ft)137 std::ostream &operator<<(std::ostream &o, Font const &ft)
138 {
139   if (ft.m_type) o << "type=" << std::hex << ft.m_type << std::dec << ",";
140   if (!ft.m_extras.empty()) o << ft.m_extras;
141   return o;
142 }
143 
144 ////////////////////////////////////////
145 //! Internal: the paragraph of a MsWrd1Parser
146 struct Paragraph final : public MWAWParagraph {
147   //! constructor
ParagraphMsWrd1ParserInternal::Paragraph148   Paragraph()
149     : MWAWParagraph()
150     , m_type(0)
151     , m_type2(0)
152   {
153   }
154   Paragraph(Paragraph const &)=default;
155   Paragraph &operator=(Paragraph const &)=default;
156   Paragraph &operator=(Paragraph &&)=default;
157   //! destructor
158   ~Paragraph() final;
159   //! operator<<
160   friend std::ostream &operator<<(std::ostream &o, Paragraph const &ft);
161   //! the initial type
162   int m_type;
163   //! another type
164   int m_type2;
165 };
166 
~Paragraph()167 Paragraph::~Paragraph()
168 {
169 }
170 
operator <<(std::ostream & o,Paragraph const & para)171 std::ostream &operator<<(std::ostream &o, Paragraph const &para)
172 {
173   o << static_cast<MWAWParagraph const &>(para);
174   // 0|80 frequent: means redefine paragraph? find also a7 in a footnote
175   if (para.m_type) o << "type=" << std::hex << para.m_type << std::dec << ",";
176 
177   if (para.m_type2 & 0xF0) {
178     bool foot = (para.m_type2 & 0x10);
179     if (foot) o << "footer/footnote[";
180     else o << "header[";
181     if (para.m_type2 & 0x20) o << (foot ? "even," : "odd,");
182     if (para.m_type2 & 0x40) o << (foot ? "odd," : "even,");
183     if (para.m_type2 & 0x80) o << "first,";
184     o << "]";
185   }
186   if (para.m_type2 & 0xF)
187     o << "#type2=" << std::hex << (para.m_type2 & 0xF) << std::dec << ",";
188 
189   return o;
190 }
191 
192 ////////////////////////////////////////
193 //! Internal: the state of a MsWrd1Parser
194 struct State {
195   //! constructor
StateMsWrd1ParserInternal::State196   State()
197     : m_eot(-1)
198     , m_numColumns(1)
199     , m_columnsSep(0)
200     , m_textZonesList()
201     , m_mainTextZonesList()
202     , m_fontsList()
203     , m_paragraphsList()
204     , m_endNote(false)
205     , m_footnotesList()
206     , m_plcMap()
207     , m_actPage(0)
208     , m_numPages(1)
209     , m_headersId()
210     , m_footersId()
211   {
212     for (auto &limit : m_fileZonesLimit) limit = -1;
213   }
214 
215   //! end of text
216   long m_eot;
217   //! the number of columns
218   int m_numColumns;
219   //! the column separator
220   float m_columnsSep;
221   //! the zones limits
222   int m_fileZonesLimit[7];
223   //! the list of text zones
224   std::vector<MWAWVec2l> m_textZonesList;
225   //! the list of main text zones
226   std::vector<int> m_mainTextZonesList;
227   //! the list of fonts
228   std::vector<Font> m_fontsList;
229   //! the list of paragraph
230   std::vector<Paragraph> m_paragraphsList;
231   //! a flag to know if we send endnote or footnote
232   bool m_endNote;
233   //! the footnote positions ( list of beginPos, endPos)
234   std::vector<MWAWVec2l> m_footnotesList;
235   //! the text correspondance zone ( filepos, plc )
236   std::multimap<long, PLC> m_plcMap;
237 
238   int m_actPage /** the actual page */, m_numPages /** the number of page of the final document */;
239   /** the list of header id which corresponds to each page */
240   std::vector<int> m_headersId;
241   /** the list of footer id which corresponds to each page */
242   std::vector<int> m_footersId;
243 };
244 
245 ////////////////////////////////////////
246 //! Internal: the subdocument of a MsWrdParser
247 class SubDocument final : public MWAWSubDocument
248 {
249 public:
250   //! constructor for footnote, header
SubDocument(MsWrd1Parser & pars,MWAWInputStreamPtr const & input,MWAWEntry const & position)251   SubDocument(MsWrd1Parser &pars, MWAWInputStreamPtr const &input, MWAWEntry const &position)
252     : MWAWSubDocument(&pars, input, position)
253   {
254   }
255 
256   //! destructor
~SubDocument()257   ~SubDocument() final {}
258 
259   //! operator!=
operator !=(MWAWSubDocument const & doc) const260   bool operator!=(MWAWSubDocument const &doc) const final
261   {
262     return MWAWSubDocument::operator!=(doc);
263   }
264 
265   //! the parser function
266   void parse(MWAWListenerPtr &listener, libmwaw::SubDocumentType type) final;
267 
268 protected:
269 };
270 
parse(MWAWListenerPtr & listener,libmwaw::SubDocumentType)271 void SubDocument::parse(MWAWListenerPtr &listener, libmwaw::SubDocumentType)
272 {
273   if (!listener.get()) {
274     MWAW_DEBUG_MSG(("MsWrd1ParserInternal::SubDocument::parse: no listener\n"));
275     return;
276   }
277   auto *parser=dynamic_cast<MsWrd1Parser *>(m_parser);
278   if (!parser) {
279     MWAW_DEBUG_MSG(("MsWrd1ParserInternal::SubDocument::parse: no parser\n"));
280     return;
281   }
282 
283   if (!m_zone.valid()) {
284     listener->insertChar(' ');
285     return;
286   }
287   long pos = m_input->tell();
288   parser->sendText(m_zone);
289   m_input->seek(pos, librevenge::RVNG_SEEK_SET);
290 }
291 }
292 
293 
294 ////////////////////////////////////////////////////////////
295 // constructor/destructor, ...
296 ////////////////////////////////////////////////////////////
MsWrd1Parser(MWAWInputStreamPtr const & input,MWAWRSRCParserPtr const & rsrcParser,MWAWHeader * header)297 MsWrd1Parser::MsWrd1Parser(MWAWInputStreamPtr const &input, MWAWRSRCParserPtr const &rsrcParser, MWAWHeader *header)
298   : MWAWTextParser(input, rsrcParser, header)
299   , m_state()
300 {
301   init();
302 }
303 
~MsWrd1Parser()304 MsWrd1Parser::~MsWrd1Parser()
305 {
306 }
307 
init()308 void MsWrd1Parser::init()
309 {
310   resetTextListener();
311   setAsciiName("main-1");
312 
313   m_state.reset(new MsWrd1ParserInternal::State);
314 
315   // reduce the margin (in case, the page is not defined)
316   getPageSpan().setMargins(0.1);
317 }
318 
319 ////////////////////////////////////////////////////////////
320 // new page
321 ////////////////////////////////////////////////////////////
newPage(int number)322 void MsWrd1Parser::newPage(int number)
323 {
324   if (number <= m_state->m_actPage || number > m_state->m_numPages)
325     return;
326 
327   while (m_state->m_actPage < number) {
328     m_state->m_actPage++;
329     if (!getTextListener() || m_state->m_actPage == 1)
330       continue;
331     getTextListener()->insertBreak(MWAWTextListener::PageBreak);
332   }
333 }
334 
removeLastCharIfEOL(MWAWEntry & entry)335 void MsWrd1Parser::removeLastCharIfEOL(MWAWEntry &entry)
336 {
337   if (!entry.valid()) return;
338   MWAWInputStreamPtr input = getInput();
339   long actPos = input->tell();
340   input->seek(entry.end()-1, librevenge::RVNG_SEEK_SET);
341   if (input->readLong(1)==0xd)
342     entry.setLength(entry.length()-1);
343   input->seek(actPos, librevenge::RVNG_SEEK_SET);
344 }
345 ////////////////////////////////////////////////////////////
346 // the parser
347 ////////////////////////////////////////////////////////////
parse(librevenge::RVNGTextInterface * docInterface)348 void MsWrd1Parser::parse(librevenge::RVNGTextInterface *docInterface)
349 {
350   if (!getInput().get() || !checkHeader(nullptr))  throw(libmwaw::ParseException());
351   bool ok = true;
352   try {
353     // create the asciiFile
354     ascii().setStream(getInput());
355     ascii().open(asciiName());
356     checkHeader(nullptr);
357     ok = createZones();
358     if (ok) {
359       createDocument(docInterface);
360       sendMain();
361     }
362 
363     ascii().reset();
364   }
365   catch (...) {
366     MWAW_DEBUG_MSG(("MsWrd1Parser::parse: exception catched when parsing\n"));
367     ok = false;
368   }
369 
370   resetTextListener();
371   if (!ok) throw(libmwaw::ParseException());
372 }
373 
374 ////////////////////////////////////////////////////////////
375 // send the main zone
376 ////////////////////////////////////////////////////////////
sendMain()377 void MsWrd1Parser::sendMain()
378 {
379   for (auto id : m_state->m_mainTextZonesList) {
380     if (id < 0 || id >= int(m_state->m_textZonesList.size()))
381       continue;
382     MWAWEntry entry;
383     entry.setBegin(m_state->m_textZonesList[size_t(id)][0]);
384     entry.setEnd(m_state->m_textZonesList[size_t(id)][1]);
385     sendText(entry, true);
386   }
387   // maybe need if we have no text ; if not, nobody will see it
388   if (getTextListener())
389     getTextListener()->insertChar(' ');
390 }
391 
392 ////////////////////////////////////////////////////////////
393 // create the document
394 ////////////////////////////////////////////////////////////
createDocument(librevenge::RVNGTextInterface * documentInterface)395 void MsWrd1Parser::createDocument(librevenge::RVNGTextInterface *documentInterface)
396 {
397   if (!documentInterface) return;
398   if (getTextListener()) {
399     MWAW_DEBUG_MSG(("MsWrd1Parser::createDocument: listener already exist\n"));
400     return;
401   }
402 
403   // update the page
404   m_state->m_actPage = 0;
405   // create the page list
406   std::vector<MWAWPageSpan> pageList;
407   auto numHeaders=int(m_state->m_headersId.size());
408   auto numFooters=int(m_state->m_footersId.size());
409   for (int i = 0; i <= m_state->m_numPages;) {
410     int numSim[2]= {1,1};
411     MWAWPageSpan ps(getPageSpan());
412     while (i < numHeaders) {
413       int id = m_state->m_headersId[size_t(i)];
414       if (id < 0 || id >= int(m_state->m_textZonesList.size()))
415         break;
416       MWAWEntry entry;
417       entry.setBegin(m_state->m_textZonesList[size_t(id)][0]);
418       entry.setEnd(m_state->m_textZonesList[size_t(id)][1]);
419       removeLastCharIfEOL(entry);
420       if (!entry.valid()) break;
421       MWAWHeaderFooter header(MWAWHeaderFooter::HEADER, MWAWHeaderFooter::ALL);
422       header.m_subDocument.reset
423       (new MsWrd1ParserInternal::SubDocument(*this, getInput(), entry));
424       ps.setHeaderFooter(header);
425       int j = i+1;
426       while (j < numHeaders && m_state->m_headersId[size_t(j)]==id) {
427         numSim[0]++;
428         j++;
429       }
430       break;
431     }
432     while (i < int(numFooters)) {
433       int id = m_state->m_footersId[size_t(i)];
434       if (id < 0 || id >= int(m_state->m_textZonesList.size()))
435         break;
436       MWAWEntry entry;
437       entry.setBegin(m_state->m_textZonesList[size_t(id)][0]);
438       entry.setEnd(m_state->m_textZonesList[size_t(id)][1]);
439       removeLastCharIfEOL(entry);
440       if (!entry.valid()) break;
441       MWAWHeaderFooter footer(MWAWHeaderFooter::FOOTER, MWAWHeaderFooter::ALL);
442       footer.m_subDocument.reset
443       (new MsWrd1ParserInternal::SubDocument(*this, getInput(), entry));
444       ps.setHeaderFooter(footer);
445       int j = i+1;
446       while (j < numFooters && m_state->m_footersId[size_t(j)]==id) {
447         numSim[1]++;
448         j++;
449       }
450       break;
451     }
452     if (numSim[1] < numSim[0]) numSim[0]=numSim[1];
453     if (numSim[0] < 1) numSim[0]=1;
454     ps.setPageSpan(numSim[0]);
455     i+=numSim[0];
456     pageList.push_back(ps);
457   }
458 
459   //
460   MWAWTextListenerPtr listen(new MWAWTextListener(*getParserState(), pageList, documentInterface));
461   setTextListener(listen);
462   listen->startDocument();
463 }
464 
465 
466 ////////////////////////////////////////////////////////////
467 // Intermediate level
468 ////////////////////////////////////////////////////////////
469 
470 // create the different zones
createZones()471 bool MsWrd1Parser::createZones()
472 {
473   libmwaw::DebugStream f;
474   if (m_state->m_eot < 0x80) return false;
475 
476   ascii().addPos(0x80);
477   ascii().addNote("TextContent");
478 
479   ascii().addPos(m_state->m_eot);
480   ascii().addNote("_");
481 
482   MWAWInputStreamPtr input = getInput();
483   for (int z = 5; z >= 0; z--) {
484     if (m_state->m_fileZonesLimit[z] == m_state->m_fileZonesLimit[z+1])
485       continue;
486     if (!input->checkPosition(m_state->m_fileZonesLimit[z+1]*0x80) ||
487         m_state->m_fileZonesLimit[z] > m_state->m_fileZonesLimit[z+1]) {
488       f.str("");
489       f << "Entries(Zone" << z << "):###";
490       MWAW_DEBUG_MSG(("MsWrd1Parser::createZones: zone %d is too long\n",z));
491       ascii().addPos(m_state->m_fileZonesLimit[z]*0x80);
492       ascii().addNote(f.str().c_str());
493       break;
494     }
495     MWAWVec2i limit(m_state->m_fileZonesLimit[z],m_state->m_fileZonesLimit[z+1]);
496     bool done = false;
497     switch (z) {
498     case 0:
499     case 1:
500       done = readPLC(limit,z);
501       break;
502     case 2:
503       done = readFootnoteCorrespondance(limit);
504       break;
505     case 3:
506       done = readDocInfo(limit);
507       break;
508     case 4:
509       done = readZones(limit);
510       break;
511     case 5:
512       done = readPageBreak(limit);
513       break;
514     default:
515       break;
516     }
517     if (done) continue;
518     for (int p = m_state->m_fileZonesLimit[z], i=0; p < m_state->m_fileZonesLimit[z+1]; p++, i++) {
519       f.str("");
520       f << "Entries(Zone" << z << ")[" << i << "]:";
521       ascii().addPos(p*0x80);
522       ascii().addNote(f.str().c_str());
523     }
524     ascii().addPos(m_state->m_fileZonesLimit[z+1]*0x80);
525     ascii().addNote("_");
526   }
527   prepareTextZones();
528   return true;
529 }
530 
531 // try to read retrieve the header/footer zones ...
prepareTextZones()532 bool MsWrd1Parser::prepareTextZones()
533 {
534   m_state->m_numPages = 1;
535   m_state->m_textZonesList.resize(0);
536   m_state->m_mainTextZonesList.resize(0);
537   m_state->m_headersId.resize(0);
538   m_state->m_footersId.resize(0);
539   long endMain = m_state->m_eot;
540   for (auto const &footnote : m_state->m_footnotesList) {
541     long pos = footnote[0];
542     if (pos >= 0x80 && pos < endMain)
543       endMain = pos;
544   }
545   if (endMain < 0x80) {
546     MWAW_DEBUG_MSG(("MsWrd1Parser::sendText: oops problem computing the limit of the main section"));
547     m_state->m_textZonesList.push_back(MWAWVec2l(0x80, m_state->m_eot));
548     m_state->m_mainTextZonesList.push_back(0);
549     return false;
550   }
551 
552   auto plcIt = m_state->m_plcMap.begin();
553   long pos = 0x80, prevMainPos=pos;
554   int actPage = 1;
555   int actType = 0;
556   MWAWVec2i headerId(-1,-1), footerId(-1,-1);
557   int firstHeaderId=-1, firstFooterId=-1;
558   while (pos < endMain) {
559     int newType = 0;
560     if (plcIt == m_state->m_plcMap.end() || plcIt->first>=endMain) {
561       pos = endMain;
562       newType = -1;
563     }
564     else {
565       pos = plcIt->first;
566       MsWrd1ParserInternal::PLC const &plc = plcIt++->second;
567       if (plc.m_type==MsWrd1ParserInternal::PAGE && pos!=0x80) {
568         if (actPage> int(m_state->m_headersId.size())) {
569           m_state->m_headersId.resize(size_t(actPage),-1);
570           m_state->m_headersId[size_t(actPage)-1] = headerId[(actPage%2)];
571         }
572         if (actPage> int(m_state->m_footersId.size())) {
573           m_state->m_footersId.resize(size_t(actPage),-1);
574           m_state->m_footersId[size_t(actPage)-1] = footerId[(actPage%2)];
575         }
576         actPage++;
577       }
578       if (plc.m_type!=MsWrd1ParserInternal::RULER) continue;
579       if (plc.m_id >= 0 && plc.m_id < int(m_state->m_paragraphsList.size()))
580         newType = (m_state->m_paragraphsList[size_t(plc.m_id)].m_type2>>4);
581       if (newType == actType)
582         continue;
583     }
584     if (pos==prevMainPos) {
585       actType = newType;
586       continue;
587     }
588 
589     auto id = int(m_state->m_textZonesList.size());
590     m_state->m_textZonesList.push_back(MWAWVec2l(prevMainPos, pos));
591     prevMainPos=pos;
592     if (actType==0) {
593       m_state->m_mainTextZonesList.push_back(id);
594       actType = newType;
595       continue;
596     }
597     if (actType&1) {
598       if (actType&2) footerId[1]=id;
599       if (actType&4) footerId[0]=id;
600       if (actType&8) firstFooterId=id;
601       m_state->m_footersId.resize(size_t(actPage),-1);
602       m_state->m_footersId[size_t(actPage)-1] =
603         (actPage==1 && firstFooterId >= 0) ? firstFooterId :
604         (actPage%2) ? footerId[1] : footerId[0];
605     }
606     else {
607       if (actType&2) headerId[0]=id;
608       if (actType&4) headerId[1]=id;
609       if (actType&8) firstHeaderId=id;
610       m_state->m_headersId.resize(size_t(actPage),-1);
611       m_state->m_headersId[size_t(actPage)-1] =
612         (actPage==1 && firstHeaderId >= 0) ? firstHeaderId :
613         (actPage%2) ? headerId[1] : headerId[0];
614     }
615     actType = newType;
616   }
617   if (actPage> int(m_state->m_headersId.size())) {
618     m_state->m_headersId.resize(size_t(actPage),-1);
619     m_state->m_headersId[size_t(actPage)-1] = headerId[(actPage%2)];
620   }
621   if (actPage> int(m_state->m_footersId.size())) {
622     m_state->m_footersId.resize(size_t(actPage),-1);
623     m_state->m_footersId[size_t(actPage)-1] = footerId[(actPage%2)];
624   }
625   m_state->m_numPages = actPage;
626   return true;
627 }
628 
629 ////////////////////////////////////////////////////////////
630 // try to read the different zones
631 ////////////////////////////////////////////////////////////
632 
633 // read the character property
readFont(long fPos,MsWrd1ParserInternal::Font & font)634 bool MsWrd1Parser::readFont(long fPos, MsWrd1ParserInternal::Font &font)
635 {
636   font = MsWrd1ParserInternal::Font();
637   libmwaw::DebugStream f;
638   MWAWInputStreamPtr input = getInput();
639   input->seek(fPos, librevenge::RVNG_SEEK_SET);
640   auto sz = static_cast<int>(input->readLong(1));
641   if (sz < 1 || sz > 0x7f || !input->checkPosition(fPos+1+sz)) {
642     MWAW_DEBUG_MSG(("MsWrd1Parser::readFont: the zone size seems bad\n"));
643     return false;
644   }
645   font.m_type = static_cast<int>(input->readULong(1));
646   int val;
647   uint32_t flags=0;
648   if (sz >= 2) {
649     val = static_cast<int>(input->readULong(1));
650     if (val & 0x80) flags |= MWAWFont::boldBit;
651     if (val & 0x40) flags |= MWAWFont::italicBit;
652     if (val & 0x3f)
653       font.m_font.setId((val & 0x3f));
654   }
655   if (sz >= 3) {
656     val = static_cast<int>(input->readULong(1));
657     if (val) font.m_font.setSize(float(val)/2.f);
658   }
659   if (sz >= 4) {
660     val = static_cast<int>(input->readULong(1));
661     if (val & 0x80) font.m_font.setUnderlineStyle(MWAWFont::Line::Simple);
662     switch ((val&0xc)>>2) {
663     case 0:
664       break;
665     case 3:
666       flags |= MWAWFont::uppercaseBit;
667       break;
668     default:
669       f << "#capBits=" << int((val&0xc)>>2) << ",";
670     }
671     // find also &2 for footnote
672     if (val & 0x73)
673       f << "#flags1=" << std::hex << (val & 0x73) << std::dec << ",";
674   }
675   if (sz >= 5) {
676     val = static_cast<int>(input->readULong(1));
677     if (val & 0x10) flags |= MWAWFont::embossBit;
678     if (val & 0x8) flags |= MWAWFont::shadowBit;
679     if (val & 0xe7)
680       f << "#flags2=" << std::hex << (val & 0xe7) << std::dec << ",";
681   }
682   if (sz >= 6) { // vdepl
683     val = static_cast<int>(input->readLong(1));
684     if (val > 0) font.m_font.set(MWAWFont::Script::super100());
685     else if (val < 0) font.m_font.set(MWAWFont::Script::sub100());
686   }
687   if (sz >= 7) {
688     f << "###";
689     ascii().addDelimiter(input->tell(),'|');
690   }
691   font.m_font.setFlags(flags);
692   font.m_extras = f.str();
693 
694   return true;
695 }
696 
697 /* read the paragraph property */
readParagraph(long fPos,MsWrd1ParserInternal::Paragraph & para)698 bool MsWrd1Parser::readParagraph(long fPos, MsWrd1ParserInternal::Paragraph &para)
699 {
700   para = MsWrd1ParserInternal::Paragraph();
701   libmwaw::DebugStream f;
702   MWAWInputStreamPtr input = getInput();
703   input->seek(fPos, librevenge::RVNG_SEEK_SET);
704   auto sz = static_cast<int>(input->readLong(1));
705   if (sz < 1 || sz > 0x7f || !input->checkPosition(fPos+1+sz)) {
706     MWAW_DEBUG_MSG(("MsWrd1Parser::readParagraph: the zone size seems bad\n"));
707     return false;
708   }
709   para.m_type = static_cast<int>(input->readULong(1));
710   int val;
711   if (sz >= 2) {
712     val = static_cast<int>(input->readULong(1));
713     switch (val>>6) {
714     case 0:
715       break; // left
716     case 1:
717       para.m_justify = MWAWParagraph::JustificationCenter;
718       break;
719     case 2:
720       para.m_justify = MWAWParagraph::JustificationRight;
721       break;
722     case 3:
723       para.m_justify = MWAWParagraph::JustificationFull;
724       break;
725     default:
726       break;
727     }
728     if (val & 0x10) f << "dontbreak[para],";
729     if (val & 0x20) f << "dontbreak[line],";
730     if (val & 0xf)
731       f << "#justify=" << std::hex << (val & 0xf) << std::dec << ",";
732   }
733   if (sz >= 4) { // find always 0 here
734     val = static_cast<int>(input->readLong(2));
735     if (val) f << "#f0=" << val << ",";
736   }
737   if (sz >= 6) {
738     val = static_cast<int>(input->readLong(2));
739     if (val)
740       para.m_margins[2] = double(val)/1440.0;
741   }
742   if (sz >= 8) {
743     val = static_cast<int>(input->readLong(2));
744     if (val)
745       para.m_margins[0] = double(val)/1440.0;
746   }
747   if (sz >= 10) {
748     val = static_cast<int>(input->readLong(2));
749     if (val && !para.m_margins[0].isSet())
750       para.m_margins[1] = double(val)/1440.0;
751     else if (val)
752       para.m_margins[1] = para.m_margins[0].get()+double(val)/1440.0;
753   }
754   if (sz >= 12) {
755     val = static_cast<int>(input->readLong(2));
756     if (val)
757       para.setInterline(double(val)/1440.0, librevenge::RVNG_INCH);
758   }
759   if (sz >= 14) {
760     val = static_cast<int>(input->readLong(2));
761     if (val)
762       para.m_spacings[1] = double(val)/1440.0;
763   }
764   if (sz >= 16) {
765     val = static_cast<int>(input->readLong(2));
766     if (val)
767       para.m_spacings[2] = double(val)/1440.0;
768   }
769   if (sz >= 17)
770     para.m_type2 = static_cast<int>(input->readULong(1));
771   // checkme: not sure what is the exact decomposition of the following
772   if (sz >= 22) { // find always 0 here
773     for (int i = 0; i < 5; i++) {
774       val = static_cast<int>(input->readLong(1));
775       if (val) f << "#f" << i+1 << "=" << val << ",";
776     }
777   }
778   if (sz >= 26) {
779     int numTabs = (sz-26)/4;
780     for (int i = 0; i < numTabs; i++) {
781       MWAWTabStop newTab;
782       newTab.m_position = double(input->readLong(2))/1440.;
783       auto flags = static_cast<int>(input->readULong(1));
784       switch ((flags>>5)&3) {
785       case 0:
786         break;
787       case 1:
788         newTab.m_alignment = MWAWTabStop::CENTER;
789         break;
790       case 2:
791         newTab.m_alignment = MWAWTabStop::RIGHT;
792         break;
793       case 3:
794         newTab.m_alignment = MWAWTabStop::DECIMAL;
795         break;
796       default:
797         break;
798       }
799       switch ((flags>>2)&3) {
800       case 0:
801         break;
802       case 1:
803         newTab.m_leaderCharacter = '.';
804         break;
805       case 2:
806         newTab.m_leaderCharacter = '-';
807         break;
808       case 3:
809         newTab.m_leaderCharacter = '_';
810         break;
811       default:
812         break;
813       }
814       if (flags & 0x93)
815         f << "#tabs" << i << "[fl1=" << std::hex << (flags & 0x93) << std::dec << ",";
816       val = static_cast<int>(input->readULong(1));
817       if (val)
818         f << "#tabs" << i << "[fl2=" << std::hex << val << std::dec << ",";
819       para.m_tabs->push_back(newTab);
820     }
821   }
822   if (input->tell() != fPos+1+sz)
823     ascii().addDelimiter(input->tell(), '|');
824   para.m_extra = f.str();
825   return true;
826 }
827 
828 /* read the page break separation */
readPageBreak(MWAWVec2i limits)829 bool MsWrd1Parser::readPageBreak(MWAWVec2i limits)
830 {
831   MWAWInputStreamPtr input = getInput();
832   if (limits[1] <= limits[0] || !input->checkPosition(limits[1]*0x80)) {
833     MWAW_DEBUG_MSG(("MsWrd1Parser::readPageBreak: the zone is not well defined\n"));
834     return false;
835   }
836   libmwaw::DebugStream f;
837   long pos = limits[0]*0x80;
838   input->seek(pos, librevenge::RVNG_SEEK_SET);
839   f << "Entries(PageBreak):";
840   auto N = static_cast<int>(input->readULong(2));
841   f << "N=" << N << ",";
842   if (N==0 || 4+6*N > (limits[1]-limits[0])*0x80) {
843     MWAW_DEBUG_MSG(("MsWrd1Parser::readPageBreak: the number of element seems odds\n"));
844     f << "###";
845     ascii().addPos(pos);
846     ascii().addNote(f.str().c_str());
847     return false;
848   }
849   long val = static_cast<int>(input->readULong(2)); // 1|a
850   f << "unkn=" << val << ",";
851   MsWrd1ParserInternal::PLC plc(MsWrd1ParserInternal::PAGE);
852   for (int i = 0; i < N; i++) {
853     auto pg = static_cast<int>(input->readULong(2));
854     long textPos = long(input->readULong(4))+0x80;
855     f << "Page" << i << "=" << std::hex << textPos << std::dec;
856     if (pg != i+1) f << "[page=" << pg << "]";
857     if (textPos < m_state->m_eot) {
858       plc.m_id = pg;
859       m_state->m_plcMap.insert
860       (std::multimap<long,MsWrd1ParserInternal::PLC>::value_type(textPos, plc));
861     }
862     else if (i != N-1)
863       f << "###";
864     f << ",";
865   }
866   if (input->tell() != limits[1]*0x80)
867     ascii().addDelimiter(input->tell(),'|');
868   ascii().addPos(pos);
869   ascii().addNote(f.str().c_str());
870   return true;
871 }
872 
873 /* read the footnote zone */
readFootnoteCorrespondance(MWAWVec2i limits)874 bool MsWrd1Parser::readFootnoteCorrespondance(MWAWVec2i limits)
875 {
876   MWAWInputStreamPtr input = getInput();
877   if (limits[1] <= limits[0] || !input->checkPosition(limits[1]*0x80)) {
878     MWAW_DEBUG_MSG(("MsWrd1Parser::readFootnoteCorrespondance: the zone is not well defined\n"));
879     return false;
880   }
881   libmwaw::DebugStream f;
882 
883   long textEnd = m_state->m_eot;
884   MsWrd1ParserInternal::PLC plc(MsWrd1ParserInternal::FOOTNOTE);
885   long pos = limits[0]*0x80;
886   input->seek(pos, librevenge::RVNG_SEEK_SET);
887   f << "Entries(Footnote):";
888   auto N = static_cast<int>(input->readULong(2));
889   auto N1 = static_cast<int>(input->readULong(2));
890   f << "N=" << N << ",";
891   if (N!=N1) f << "N1=" << N1 << ",";
892   if (N!=N1 || N==0 || 4+8*N > (limits[1]-limits[0])*0x80) {
893     MWAW_DEBUG_MSG(("MsWrd1Parser::readFootnoteCorrespondance: the number of element seems odds\n"));
894     f << "###";
895     ascii().addPos(pos);
896     ascii().addNote(f.str().c_str());
897     return false;
898   }
899   std::map<long, int> footnoteMap;
900   for (int i = 0; i < N; i++) {
901     long textPos = long(input->readULong(4))+0x80;
902     long notePos = long(input->readULong(4))+0x80;
903     bool ok = textPos <= textEnd && notePos <= textEnd;
904     f << "Fn" << i << ":" << std::hex << textPos << "<->" << notePos << std::dec << ",";
905     if (!ok) {
906       if (i==N-1) break;
907       f << "###";
908       continue;
909     }
910     plc.m_id = int(footnoteMap.size());
911     footnoteMap[notePos]=plc.m_id;
912     m_state->m_plcMap.insert
913     (std::multimap<long,MsWrd1ParserInternal::PLC>::value_type(textPos, plc));
914     m_state->m_plcMap.insert
915     (std::multimap<long,MsWrd1ParserInternal::PLC>::value_type(notePos, plc));
916   }
917   m_state->m_footnotesList.resize(footnoteMap.size(),MWAWVec2l(0,0));
918   for (auto fIt=footnoteMap.begin(); fIt!=footnoteMap.end();) {
919     MWAWVec2l fPos;
920     fPos[0] = fIt->first;
921     int id = fIt++->second;
922     fPos[1] = fIt==footnoteMap.end() ? m_state->m_eot : fIt->first;
923     if (id >= int(m_state->m_footnotesList.size()))
924       m_state->m_footnotesList.resize(size_t(id)+1,MWAWVec2l(0,0));
925     m_state->m_footnotesList[size_t(id)]=fPos;
926   }
927   ascii().addDelimiter(input->tell(),'|');
928   ascii().addPos(pos);
929   ascii().addNote(f.str().c_str());
930 
931   return true;
932 }
933 
934 /* read the zone4: a list of main zone ( headers, footers ) ? */
readZones(MWAWVec2i limits)935 bool MsWrd1Parser::readZones(MWAWVec2i limits)
936 {
937   MWAWInputStreamPtr input = getInput();
938   if (limits[1] <= limits[0] || !input->checkPosition(limits[1]*0x80)) {
939     MWAW_DEBUG_MSG(("MsWrd1Parser::readZones: the zone is not well defined\n"));
940     return false;
941   }
942   libmwaw::DebugStream f;
943 
944   MsWrd1ParserInternal::PLC plc(MsWrd1ParserInternal::ZONE);
945   long pos = limits[0]*0x80;
946   input->seek(pos, librevenge::RVNG_SEEK_SET);
947   f << "Entries(Zones):";
948   auto N = static_cast<int>(input->readULong(2));
949   auto N1 = static_cast<int>(input->readULong(2));
950   f << "N=" << N << ",";
951   if (N!=N1) f << "N1=" << N1 << ",";
952   if (N!=N1 || N==0 || 4+10*N > (limits[1]-limits[0])*0x80) {
953     MWAW_DEBUG_MSG(("MsWrd1Parser::readZones: the number of element seems odds\n"));
954     f << "###";
955     ascii().addPos(pos);
956     ascii().addNote(f.str().c_str());
957     return false;
958   }
959   for (int i = 0; i < N; i++) {
960     long textPos = long(input->readULong(4))+0x80;
961     f << std::hex << textPos << std::dec;
962     f << ":f0=" << input->readLong(2); // find 1|2|3
963     auto val = static_cast<int>(input->readLong(4)); // find -1, 0x900, 0xa00
964     if (val!=-1) f << ":f1=" << std::hex << val << std::dec;
965     if (textPos < m_state->m_eot) {
966       plc.m_id = i;
967       m_state->m_plcMap.insert
968       (std::multimap<long,MsWrd1ParserInternal::PLC>::value_type(textPos, plc));
969     }
970     else if (textPos != m_state->m_eot && i != N-1)
971       f << "###";
972     f << ",";
973   }
974   ascii().addDelimiter(input->tell(),'|');
975   ascii().addPos(pos);
976   ascii().addNote(f.str().c_str());
977 
978   return true;
979 }
980 
981 /* read the document information */
readDocInfo(MWAWVec2i limits)982 bool MsWrd1Parser::readDocInfo(MWAWVec2i limits)
983 {
984   MWAWInputStreamPtr input = getInput();
985   if (limits[1] != limits[0]+1 || !input->checkPosition(limits[1]*0x80)) {
986     MWAW_DEBUG_MSG(("MsWrd1Parser::readDocInfo: the zone is not well defined\n"));
987     return false;
988   }
989 
990   libmwaw::DebugStream f;
991   long pos = limits[0]*0x80;
992   input->seek(pos, librevenge::RVNG_SEEK_SET);
993   f << "Entries(DocInfo):";
994   int val;
995   for (int i=0; i < 2; i++) { // find 66|0
996     val = static_cast<int>(input->readULong(1));
997     if (val)
998       f << "f" << i << "=" << std::hex << val << std::dec << ",";
999   }
1000   auto flags = static_cast<int>(input->readULong(1));
1001   switch (flags>>5) {
1002   case 0:
1003     f << "division=no,";
1004     break;
1005   case 1:
1006     f << "division=columns,";
1007     break;
1008   case 2:
1009     f << "division=page,";
1010     break; // default
1011   case 3:
1012     f << "division=evenpage,";
1013     break;
1014   case 4:
1015     f << "division=oddpage,";
1016     break;
1017   default:
1018     f << "#division=" << (flags>>5) << ",";
1019     break;
1020   }
1021   switch ((flags>>2)&7) {
1022   case 0: // default (numeric)
1023     break;
1024   case 1:
1025     f << "numbering=roman[upper],";
1026     break;
1027   case 2:
1028     f << "numbering=roman[lower],";
1029     break;
1030   case 3:
1031     f << "numbering=alpha[upper],";
1032     break;
1033   case 4:
1034     f << "numbering=alpha[lower],";
1035     break;
1036   default:
1037     f << "#numbering[type]=" << ((flags>>2)&7) << ",";
1038     break;
1039   }
1040   if (flags&3) f << "flags=" << (flags&3) << ",";
1041 
1042   float pageDim[2];
1043   for (auto &d : pageDim) d = float(input->readULong(2))/1440.f;
1044   f << "dim=[" << pageDim[1] << "x" << pageDim[0] << "],";
1045   val = static_cast<int>(input->readLong(2));
1046   if (val != -1) f << "firstPage=" << val << ",";
1047   // check me
1048   float pagePos[2][2]; // [Y|X][header|size]
1049   char const *wh[] = {"TopMargin", "Y[page]", "LeftMargin", "X[page]" };
1050   for (int i = 0; i < 2; i++) {
1051     for (int j = 0; j < 2; j++) {
1052       pagePos[i][j] = float(input->readULong(2))/1440.f;
1053       f << wh[i*2+j] << "=" << pagePos[i][j] << ",";
1054     }
1055   }
1056   flags = static_cast<int>(input->readULong(1));
1057   bool endNote = false;
1058   if (flags&1) {
1059     f << "endnote,";
1060     endNote = true;
1061   }
1062   if (flags&2)
1063     f << "autonumbering,";
1064   if (flags&0xFC)
1065     f << "flags2=" << std::hex << (flags&0xFC) << std::dec << ",";
1066   ascii().addPos(pos);
1067   ascii().addNote(f.str().c_str());
1068 
1069   pos = input->tell();
1070   f.str("");
1071   f << "DocInfo(II):";
1072   auto numCols = static_cast<int>(input->readULong(1));
1073   if (numCols != 1) {
1074     f << "nCols=" << numCols << ",";
1075     if (numCols < 1 || numCols > 6) {
1076       f << "###";
1077       numCols = 1;
1078     }
1079   }
1080   float hfLength[2];
1081   for (auto &hf : hfLength) hf = float(input->readULong(2))/1440.f;
1082   hfLength[1]=pageDim[0]-hfLength[1];
1083 
1084   f << "headerLength=" << hfLength[0] << ",";
1085   f << "footerLength=" << hfLength[1] << ",";
1086   float colSep = float(input->readULong(2))/1440.f;
1087   f << "colSep=" << colSep << ",";
1088   val = static_cast<int>(input->readLong(2));
1089   if (val)
1090     f << "f3=" << val << ",";
1091   f << "distToHeader=" << float(input->readULong(2))/1440.f << ",";
1092   f << "distToNote=" << float(input->readULong(2))/1440.f << ",";
1093   // probably follows by other distance
1094 
1095   if (pageDim[0] > 0 && pageDim[1] > 0 &&
1096       pagePos[0][0]>=0 && pagePos[0][1]>=0 && pageDim[0] >= pagePos[0][0]+pagePos[0][1] &&
1097       pagePos[1][0]>=0 && pagePos[1][1]>=0 && pageDim[1] >= pagePos[1][0]+pagePos[1][1] &&
1098       pageDim[1] >= float(numCols)*pagePos[1][1]) {
1099     getPageSpan().setMarginTop(double(pagePos[0][0]));
1100     getPageSpan().setMarginLeft(double(pagePos[1][0]));
1101     getPageSpan().setFormLength(double(pageDim[0]));
1102     getPageSpan().setFormWidth(double(pageDim[1]));
1103     m_state->m_endNote = endNote;
1104     m_state->m_numColumns = numCols;
1105     m_state->m_columnsSep = colSep;
1106   }
1107   else {
1108     MWAW_DEBUG_MSG(("MsWrd1Parser::readDocInfo: some dimension do not look good\n"));
1109   }
1110   ascii().addDelimiter(input->tell(),'|');
1111   ascii().addPos(pos);
1112   ascii().addNote(f.str().c_str());
1113   ascii().addPos(pos+53);
1114   ascii().addNote("DocInfo(III)");
1115   return true;
1116 }
1117 
1118 // read a plc zone (char or paragraph properties )
readPLC(MWAWVec2i limits,int wh)1119 bool MsWrd1Parser::readPLC(MWAWVec2i limits, int wh)
1120 {
1121   MWAWInputStreamPtr input = getInput();
1122   if (limits[1] <= limits[0] || !input->checkPosition(limits[1]*0x80)) {
1123     MWAW_DEBUG_MSG(("MsWrd1Parser::readPLC: the zone is not well defined\n"));
1124     return false;
1125   }
1126   libmwaw::DebugStream f, f2;
1127 
1128   std::map<long, int> posIdMap;
1129   MsWrd1ParserInternal::PLC plc(wh==0 ? MsWrd1ParserInternal::FONT :
1130                                 MsWrd1ParserInternal::RULER);
1131   char const *what = wh==0 ? "Char" : "Para";
1132 
1133   for (int z = limits[0], n=0; z < limits[1]; z++, n++) {
1134     f.str("");
1135     f << "Entries(" << what << ")[" << n << "]:";
1136     long pos = z*0x80;
1137     input->seek(pos+0x7f, librevenge::RVNG_SEEK_SET);
1138     auto N = static_cast<int>(input->readULong(1));
1139     f << "N=" << N << ",";
1140     if (4+N*6 > 0x7f) {
1141       f << "###";
1142       MWAW_DEBUG_MSG(("MsWrd1Parser::readPLC: the number of element seems to big\n"));
1143       ascii().addDelimiter(input->tell(),'|');
1144       ascii().addPos(pos);
1145       ascii().addNote(f.str().c_str());
1146       continue;
1147     }
1148     input->seek(pos, librevenge::RVNG_SEEK_SET);
1149     auto fPos = long(input->readULong(4));
1150 
1151     for (int i = 0; i < N; i++) {
1152       f << "fPos=" << std::hex << fPos;
1153       auto newPos = long(input->readULong(4));
1154       f << "->" << newPos << std::dec;
1155 
1156       auto depl = static_cast<int>(input->readLong(2));
1157       if (depl == -1)
1158         plc.m_id = -1;
1159       else if (depl < N*6 || 4+depl >= 0x7f) {
1160         f << "[###pos=" << std::hex << depl << std::dec << "]";
1161         plc.m_id = -1;
1162       }
1163       else {
1164         long dataPos = pos+depl+4;
1165         long actPos = input->tell();
1166         if (posIdMap.find(dataPos) == posIdMap.end()) {
1167           f2.str("");
1168           f2 << what << "-";
1169           if (wh == 0) {
1170             MsWrd1ParserInternal::Font font;
1171             if (readFont(dataPos, font)) {
1172               plc.m_id=int(m_state->m_fontsList.size());
1173               m_state->m_fontsList.push_back(font);
1174               f2 << plc.m_id << ":";
1175 #ifdef DEBUG
1176               f2 << font.m_font.getDebugString(getFontConverter()) << font;
1177 #endif
1178             }
1179             else {
1180               plc.m_id = -1;
1181               f2 << "###";
1182             }
1183             ascii().addPos(dataPos);
1184             ascii().addNote(f2.str().c_str());
1185           }
1186           else {
1187             MsWrd1ParserInternal::Paragraph para;
1188             if (readParagraph(dataPos, para)) {
1189               plc.m_id=int(m_state->m_paragraphsList.size());
1190               m_state->m_paragraphsList.push_back(para);
1191               f2 << plc.m_id << ":" << para;
1192             }
1193             else {
1194               plc.m_id = -1;
1195               f2 << "###";
1196             }
1197             ascii().addPos(dataPos);
1198             ascii().addNote(f2.str().c_str());
1199           }
1200           posIdMap[dataPos] = plc.m_id;
1201         }
1202         else
1203           plc.m_id = posIdMap.find(dataPos)->second;
1204         input->seek(actPos, librevenge::RVNG_SEEK_SET);
1205       }
1206       m_state->m_plcMap.insert
1207       (std::multimap<long,MsWrd1ParserInternal::PLC>::value_type(fPos, plc));
1208       fPos = newPos;
1209       f << ":" << plc << ",";
1210     }
1211     ascii().addDelimiter(input->tell(),'|');
1212     ascii().addPos(pos);
1213     ascii().addNote(f.str().c_str());
1214   }
1215 
1216   return true;
1217 }
1218 
1219 ////////////////////////////////////////////////////////////
1220 // try to read a text entry
1221 ////////////////////////////////////////////////////////////
sendText(MWAWEntry const & textEntry,bool isMain)1222 bool MsWrd1Parser::sendText(MWAWEntry const &textEntry, bool isMain)
1223 {
1224   if (!textEntry.valid()) return false;
1225   if (!getTextListener()) {
1226     MWAW_DEBUG_MSG(("MsWrd1Parser::sendText: can not find a listener!"));
1227     return true;
1228   }
1229   if (isMain) {
1230     int numCols = m_state->m_numColumns;
1231     if (numCols > 1 && !getTextListener()->isSectionOpened()) {
1232       MWAWSection sec;
1233       sec.setColumns(numCols, getPageWidth()/double(numCols), librevenge::RVNG_INCH, double(m_state->m_columnsSep));
1234       getTextListener()->openSection(sec);
1235     }
1236   }
1237   long pos = textEntry.begin();
1238   MWAWInputStreamPtr input = getInput();
1239 
1240   input->seek(pos, librevenge::RVNG_SEEK_SET);
1241   libmwaw::DebugStream f;
1242   f << "TextContent:";
1243   int actFId=-1, actRId = -1, actPage=0;
1244   auto plcIt = m_state->m_plcMap.begin();
1245   while (plcIt != m_state->m_plcMap.end() && plcIt->first < pos) {
1246     MsWrd1ParserInternal::PLC const &plc = plcIt++->second;
1247     if (plc.m_type == MsWrd1ParserInternal::FONT)
1248       actFId = plc.m_id;
1249     else if (plc.m_type == MsWrd1ParserInternal::RULER)
1250       actRId = plc.m_id;
1251     else if (plc.m_type == MsWrd1ParserInternal::PAGE)
1252       actPage++;
1253   }
1254   // new page can be in header, ..., so sometimes we must force a new page...
1255   if (isMain && actPage > m_state->m_actPage)
1256     newPage(actPage);
1257   MsWrd1ParserInternal::Font actFont, defFont;
1258   defFont.m_font = MWAWFont(3,12);
1259   if (actFId>=0 && actFId < int(m_state->m_fontsList.size()))
1260     actFont = m_state->m_fontsList[size_t(actFId)];
1261   else
1262     actFont = defFont;
1263   bool rulerNotSent = actRId != -1, fontNotSent = true;
1264   while (!input->isEnd() && input->tell() < textEntry.end()) {
1265     long actPos = input->tell();
1266     bool firstPlc = true;
1267     while (plcIt != m_state->m_plcMap.end() && plcIt->first <= actPos) {
1268       if (firstPlc) {
1269         ascii().addPos(pos);
1270         ascii().addNote(f.str().c_str());
1271         pos = actPos;
1272         f.str("");
1273         f << "TextContent:";
1274         firstPlc = false;
1275       }
1276 
1277       auto const &plc = plcIt++->second;
1278       switch (plc.m_type) {
1279       case MsWrd1ParserInternal::FONT:
1280         if (plc.m_id >= 0 && plc.m_id < int(m_state->m_fontsList.size()))
1281           getTextListener()->setFont(m_state->m_fontsList[size_t(plc.m_id)].m_font);
1282         else
1283           getTextListener()->setFont(defFont.m_font);
1284         actFont.m_font = getTextListener()->getFont();
1285         fontNotSent = false;
1286         break;
1287       case MsWrd1ParserInternal::RULER:
1288         actRId = plc.m_id;
1289         rulerNotSent = true;
1290         break;
1291       case MsWrd1ParserInternal::PAGE:
1292         if (isMain) newPage(++actPage);
1293         break;
1294       case MsWrd1ParserInternal::FOOTNOTE: {
1295         if (!isMain) break;
1296         if (plc.m_id < 0 || plc.m_id >= int(m_state->m_footnotesList.size())) {
1297           MWAW_DEBUG_MSG(("MsWrd1Parser::sendText: oops, can not find a footnote!\n"));
1298           break;
1299         }
1300         MWAWEntry entry;
1301         entry.setBegin(m_state->m_footnotesList[size_t(plc.m_id)][0]);
1302         entry.setEnd(m_state->m_footnotesList[size_t(plc.m_id)][1]);
1303         removeLastCharIfEOL(entry);
1304         std::shared_ptr<MWAWSubDocument> subdoc
1305         (new MsWrd1ParserInternal::SubDocument(*this, getInput(), entry));
1306         getTextListener()->insertNote(MWAWNote(m_state->m_endNote ? MWAWNote::EndNote : MWAWNote::FootNote), subdoc);
1307         break;
1308       }
1309       case MsWrd1ParserInternal::ZONE:
1310       case MsWrd1ParserInternal::UNKNOWN:
1311 #if !defined(__clang__)
1312       default:
1313 #endif
1314         break;
1315       }
1316       f << "[" << plc << "]";
1317     }
1318     if (rulerNotSent) {
1319       if (actRId >= 0 && actRId < int(m_state->m_paragraphsList.size()))
1320         setProperty(m_state->m_paragraphsList[size_t(actRId)]);
1321       else
1322         setProperty(MsWrd1ParserInternal::Paragraph());
1323       rulerNotSent = false;
1324     }
1325     if (fontNotSent) getTextListener()->setFont(actFont.m_font);
1326     auto c = static_cast<unsigned char>(input->readULong(1));
1327     f << char(c);
1328     switch (c) {
1329     case 1:
1330       getTextListener()->insertUnicodeString(librevenge::RVNGString("(picture)"));
1331       break;
1332     case 5: // footnote mark
1333     case 0xc: // end of file
1334       break;
1335     case 0x9:
1336       getTextListener()->insertTab();
1337       break;
1338     case 0xd:
1339       getTextListener()->insertEOL();
1340       break;
1341     default:
1342       getTextListener()->insertCharacter(static_cast<unsigned char>(c), input, textEntry.end());
1343       break;
1344     }
1345   }
1346   ascii().addPos(pos);
1347   ascii().addNote(f.str().c_str());
1348   return true;
1349 }
1350 
1351 // send the ruler properties
setProperty(MsWrd1ParserInternal::Paragraph const & para)1352 void MsWrd1Parser::setProperty(MsWrd1ParserInternal::Paragraph const &para)
1353 {
1354   if (!getTextListener()) return;
1355   getTextListener()->setParagraph(para);
1356 }
1357 
1358 ////////////////////////////////////////////////////////////
1359 // Low level
1360 ////////////////////////////////////////////////////////////
1361 
1362 // read the header
checkHeader(MWAWHeader * header,bool strict)1363 bool MsWrd1Parser::checkHeader(MWAWHeader *header, bool strict)
1364 {
1365   *m_state = MsWrd1ParserInternal::State();
1366   MWAWInputStreamPtr input = getInput();
1367   if (!input || !input->hasDataFork())
1368     return false;
1369 
1370   libmwaw::DebugStream f;
1371   if (!input->checkPosition(0x80)) {
1372     MWAW_DEBUG_MSG(("MsWrd1Parser::checkHeader: file is too short\n"));
1373     return false;
1374   }
1375   long pos = 0;
1376   input->seek(pos, librevenge::RVNG_SEEK_SET);
1377   auto val = static_cast<int>(input->readULong(2));
1378   switch (val) {
1379   case 0xfe32:
1380     switch (input->readULong(2)) {
1381     case 0x0:
1382       setVersion(1);
1383       break;
1384     default:
1385       return false;
1386     }
1387     break;
1388   default:
1389     return false;
1390   }
1391 
1392   f << "FileHeader:";
1393   val = static_cast<int>(input->readULong(1)); // v1: ab other 0 ?
1394   if (val) f << "f0=" << val << ",";
1395   for (int i = 1; i < 3; i++) { // always 0
1396     val = static_cast<int>(input->readLong(2));
1397     if (val) f << "f" << i << "=" << val << ",";
1398   }
1399   for (int i = 0; i < 5; i++) { // always 0 ?
1400     val = static_cast<int>(input->readLong(1));
1401     if (val) f << "g" << i << "=" << val << ",";
1402   }
1403 
1404   m_state->m_eot = long(input->readULong(4));
1405   f << "text=" << std::hex << 0x80 << "<->" << m_state->m_eot << ",";
1406   if (0x80 > m_state->m_eot || !input->checkPosition(m_state->m_eot)) {
1407     MWAW_DEBUG_MSG(("MsWrd1Parser::checkHeader: problem with text position must stop\n"));
1408     return false;
1409   }
1410 
1411   m_state->m_fileZonesLimit[0] = int((m_state->m_eot+0x7f)/0x80);
1412   f << "zonesPos=[" << std::hex;
1413   for (int i = 0; i < 6; i++) {
1414     m_state->m_fileZonesLimit[i+1] = static_cast<int>(input->readLong(2));
1415     if (m_state->m_fileZonesLimit[i]==m_state->m_fileZonesLimit[i+1]) {
1416       f << "_,";
1417       continue;
1418     }
1419     if (m_state->m_fileZonesLimit[i]<m_state->m_fileZonesLimit[i+1]) {
1420       f << m_state->m_fileZonesLimit[i]*0x80 << "<->"
1421         << m_state->m_fileZonesLimit[i+1]*0x80 << ",";
1422       continue;
1423     }
1424     MWAW_DEBUG_MSG(("MsWrd1Parser::checkHeader: problem reading the zones positions\n"));
1425     if (strict) return false;
1426     f << "###" << m_state->m_fileZonesLimit[i+1]*0x80 << ",";
1427     m_state->m_fileZonesLimit[i+1] = m_state->m_fileZonesLimit[i];
1428   }
1429   f << std::dec << "],";
1430   ascii().addPos(pos);
1431   ascii().addNote(f.str().c_str());
1432   pos = input->tell();
1433   f.str("");
1434   f << "FileHeader[A]:";
1435   for (int i = 0; i < 17; i++) {
1436     val = static_cast<int>(input->readLong(2));
1437     if (val) f << "f" << i << "=" << val << ",";
1438   }
1439   ascii().addPos(pos);
1440   ascii().addNote(f.str().c_str());
1441 
1442   long textSize[2];
1443   for (auto &tSize : textSize) tSize = input->readLong(4);
1444   if (textSize[0] != textSize[1] || 0x80+textSize[0] != m_state->m_eot) {
1445     MWAW_DEBUG_MSG(("MsWrd1Parser::checkHeader: problem with text position length\n"));
1446     if (strict) return false;
1447     f << "##textSize=" << std::hex << textSize[0] << ":" << textSize[1] << std::dec << ",";
1448     if (textSize[1] > textSize[0]) textSize[0] = textSize[1];
1449     if (0x80+textSize[0] > m_state->m_eot && input->checkPosition(0x80+textSize[0]))
1450       m_state->m_eot = 0x80+textSize[0];
1451   }
1452   pos=input->tell();
1453   f.str("");
1454   f << "FileHeader[B]:";
1455   for (int i = 0; i < 28; i++) { // always 0
1456     val = static_cast<int>(input->readLong(2));
1457     if (val) f << "f" << i << "=" << val << ",";
1458   }
1459   ascii().addPos(pos);
1460   ascii().addNote(f.str().c_str());
1461   if (header)
1462     header->reset(MWAWDocument::MWAW_T_MICROSOFTWORD, 1);
1463   return true;
1464 }
1465 
1466 // vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab:
1467