1 /* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */
2 
3 /* libmwaw
4 * Version: MPL 2.0 / LGPLv2+
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 2.0 (the "License"); you may not use this file except in compliance with
8 * the License or as specified alternatively below. You may obtain a copy of
9 * the License at http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Major Contributor(s):
17 * Copyright (C) 2002 William Lachance (wrlach@gmail.com)
18 * Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
19 * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
20 * Copyright (C) 2006, 2007 Andrew Ziem
21 * Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
22 *
23 *
24 * All Rights Reserved.
25 *
26 * For minor contributions see the git repository.
27 *
28 * Alternatively, the contents of this file may be used under the terms of
29 * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
30 * in which case the provisions of the LGPLv2+ are applicable
31 * instead of those above.
32 */
33 
34 #include <algorithm>
35 #include <iomanip>
36 #include <iostream>
37 #include <limits>
38 #include <map>
39 #include <set>
40 #include <sstream>
41 
42 #if defined(DEBUG_WITH_FILES)
43 # include <fstream>
44 #endif
45 
46 #include <librevenge/librevenge.h>
47 
48 #include "MWAWCell.hxx"
49 #include "MWAWTextListener.hxx"
50 #include "MWAWFont.hxx"
51 #include "MWAWFontConverter.hxx"
52 #include "MWAWParagraph.hxx"
53 #include "MWAWPosition.hxx"
54 #include "MWAWTable.hxx"
55 
56 #include "MsWrdParser.hxx"
57 #include "MsWrdStruct.hxx"
58 
59 #include "MsWrdText.hxx"
60 
61 #define DEBUG_FONT 1
62 #define DEBUG_PLC 1
63 #define DEBUG_PAGE 1
64 #define DEBUG_PARAGRAPH 1
65 #define DEBUG_SECTION 1
66 #define DEBUG_PARAGRAPHINFO 1
67 
68 /** Internal: the structures of a MsWrdText */
69 namespace MsWrdTextInternal
70 {
71 ////////////////////////////////////////
72 //! Internal: the entry of MsWrdParser
73 struct TextStruct final : public MWAWEntry {
74   //! construtor
TextStructMsWrdTextInternal::TextStruct75   TextStruct()
76     : MWAWEntry()
77     , m_pos(-1)
78     , m_styleId(0)
79     , m_flags(0)
80     , m_paragraphId(-1)
81     , m_complex(false)
82   {
83   }
84   TextStruct(TextStruct const &)=default;
85   //! destructor
86   ~TextStruct() final;
87   //! operator<<
operator <<(std::ostream & o,TextStruct const & entry)88   friend std::ostream &operator<<(std::ostream &o, TextStruct const &entry)
89   {
90     if (entry.m_pos>=0) o << "textPos=" << entry.m_pos << ",";
91     o << "styleId?=" << entry.m_styleId << ",";
92     if (entry.m_complex) o << "complex,";
93     if (entry.m_paragraphId >= 0) o << "tP" << entry.m_paragraphId << ",";
94     // checkme
95     if (entry.m_flags&1)
96       o << "noEndPara,";
97     if (entry.m_flags&2)
98       o << "paphNil,";
99     if (entry.m_flags&4)
100       o << "dirty,";
101     switch (entry.m_flags&0xF8) { // fNoParaLast
102     case 0x80: // sameline
103       break;
104     case 0:
105       o << "newline,";
106       break;
107     default:
108       o << "#type=" << std::hex << entry.m_flags << std::dec << ",";
109       break;
110     }
111     if (entry.valid())
112       o << std::hex << "fPos=" << entry.begin() << ":" << entry.end() << std::dec << ",";
113     if (entry.extra().length())
114       o << entry.extra() << ",";
115     return o;
116   }
117 
118   //! returns the paragraph id ( or -1, if unknown )
getParagraphIdMsWrdTextInternal::TextStruct119   int getParagraphId() const
120   {
121     return m_paragraphId;
122   }
123   //! a struct used to compare file textpos
124   struct CompareFilePos {
125     //! comparaison function
operator ()MsWrdTextInternal::TextStruct::CompareFilePos126     bool operator()(TextStruct const *t1, TextStruct const *t2) const
127     {
128       long diff = t1->begin()-t2->begin();
129       return (diff < 0);
130     }
131   };
132   //! the text position
133   int m_pos;
134   //! some identificator(maybe style)
135   int m_styleId;
136   //! some flags
137   int m_flags;
138   //! the paragraph id
139   int m_paragraphId;
140   //! a flag to know if we read a complex or a simple PRM
141   bool m_complex;
142 };
143 
~TextStruct()144 TextStruct::~TextStruct()
145 {
146 }
147 
148 ////////////////////////////////////////
149 //! Internal: the page
150 struct Page {
151   //! constructor
PageMsWrdTextInternal::Page152   Page()
153     : m_id(-1)
154     , m_type(0)
155     , m_page(-1)
156     , m_paragraphId(-2)
157     , m_error("")
158   {
159     for (auto &val : m_values) val = 0;
160   }
161   //! operator<<
operator <<(std::ostream & o,Page const & page)162   friend std::ostream &operator<<(std::ostream &o, Page const &page)
163   {
164     if (page.m_id >= 0) o << "Pg" << page.m_id << ":";
165     else o << "Pg_:";
166     if (page.m_paragraphId >= 0) o << "P" << page.m_paragraphId << ",";
167     if (page.m_page != page.m_id+1) o << "page=" << page.m_page << ",";
168     if (page.m_type&0x10)
169       o << "right,";
170     // find also page.m_type&0x40 : pageDirty?
171     if (page.m_type&0xEF)
172       o << "type=" << std::hex << (page.m_type&0xEF) << std::dec << ",";
173     for (int i = 0; i < 3; i++) {
174       if (page.m_values[i])
175         o << "f" << i << "=" << page.m_values[i] << ",";
176     }
177     if (page.m_values[3])
178       o << "f3=" << std::hex << page.m_values[3] << std::dec << ",";
179     if (page.m_error.length()) o << page.m_error << ",";
180     return o;
181   }
182   //! the identificator
183   int m_id;
184   //! the type
185   int m_type;
186   //! the page number
187   int m_page;
188   //! the paragraph id
189   int m_paragraphId;
190   //! some values ( 0, -1, 0, small number )
191   int m_values[4];
192   /** the errors */
193   std::string m_error;
194 };
195 
196 ////////////////////////////////////////
197 //! Internal: the footnote
198 struct Footnote {
199   //! constructor
FootnoteMsWrdTextInternal::Footnote200   Footnote()
201     : m_pos()
202     , m_id(-1)
203     , m_value(0)
204     , m_error("")
205   {
206   }
207   //! operator<<
operator <<(std::ostream & o,Footnote const & note)208   friend std::ostream &operator<<(std::ostream &o, Footnote const &note)
209   {
210     if (note.m_id >= 0) o << "Fn" << note.m_id << ":";
211     else o << "Fn_:";
212     if (note.m_pos.valid())
213       o << std::hex << note.m_pos.begin() << "-" << note.m_pos.end() << std::dec << ",";
214     if (note.m_value) o << "f0=" << note.m_value << ",";
215     if (note.m_error.length()) o << note.m_error << ",";
216     return o;
217   }
218   //! the footnote data
219   MWAWEntry m_pos;
220   //! the id
221   int m_id;
222   //! a value ( 1, 4)
223   int m_value;
224   /** the errors */
225   std::string m_error;
226 };
227 
228 ////////////////////////////////////////
229 //! Internal: the field of MsWrdParser
230 struct Field {
231   //! constructor
FieldMsWrdTextInternal::Field232   Field()
233     : m_text("")
234     , m_id(-1)
235     , m_error("")
236   {
237   }
238   //! operator<<
operator <<(std::ostream & o,Field const & field)239   friend std::ostream &operator<<(std::ostream &o, Field const &field)
240   {
241     o << field.m_text;
242     if (field.m_id >= 0) o << "[" << field.m_id << "]";
243     if (field.m_error.length()) o << "," << field.m_error << ",";
244     return o;
245   }
246   //! the text
247   std::string m_text;
248   //! the id
249   int m_id;
250   /** the errors */
251   std::string m_error;
252 };
253 
254 ////////////////////////////////////////
255 //! Internal: a list of plc
256 struct Property {
PropertyMsWrdTextInternal::Property257   Property()
258     : m_fPos(-1)
259     , m_plcList()
260     , m_debugPrint(false)
261   {
262   }
263   //! the character position in the file
264   long m_fPos;
265   //! the list of plc
266   std::vector<MsWrdText::PLC> m_plcList;
267   //! a flag to know if we have print data
268   bool m_debugPrint;
269 };
270 
271 ////////////////////////////////////////
272 //! Internal and low level: a structure to store a line or a cell of a MsWrdText
273 struct Line {
274   //! an enum used to differentiate line and cell
275   enum Type { L_Line, L_Cell, L_LastLineCell, L_LastRowCell };
276   //! constructor
LineMsWrdTextInternal::Line277   Line()
278     : m_type(L_Line)
279     , m_cPos()
280   {
281   }
282   //! the line type
283   Type m_type;
284   //! the caracter position
285   MWAWVec2l m_cPos;
286 };
287 
288 ////////////////////////////////////////
289 //! Internal and low level: a structure to store a table of a MsWrdText
290 struct Table final : public MWAWTable {
291   //! constructor
TableMsWrdTextInternal::Table292   Table()
293     : MWAWTable(MWAWTable::TableDimBit)
294     , m_cellPos()
295     , m_delimiterPos()
296     , m_height(0)
297     , m_backgroundColor(MWAWColor::white())
298     , m_cells()
299   {
300   }
301   //! destructor
302   ~Table() final;
303   //! the list of cPos corresponding to cells limits
304   std::vector<long> m_cellPos;
305   //! the list of the delimiter cPos (ie. end of each cell)
306   std::vector<long> m_delimiterPos;
307   //! the row height
308   float m_height;
309   //! the background color
310   MWAWColor m_backgroundColor;
311   //! the table cells
312   std::vector<MWAWVariable<MsWrdStruct::Table::Cell> > m_cells;
313 };
314 
~Table()315 Table::~Table()
316 {
317 }
318 ////////////////////////////////////////
319 //! Internal: the state of a MsWrdParser
320 struct State {
321   //! constructor
StateMsWrdTextInternal::State322   State()
323     : m_version(-1)
324     , m_bot(0x100)
325     , m_headerFooterZones()
326     , m_textposList()
327     , m_plcMap()
328     , m_filePlcMap()
329     , m_lineList()
330     , m_paragraphLimitMap()
331     , m_sectionLimitList()
332     , m_fontMap()
333     , m_paragraphMap()
334     , m_propertyMap()
335     , m_tableCellPosSet()
336     , m_tableMap()
337     , m_paraInfoList()
338     , m_pageList()
339     , m_fieldList()
340     , m_footnoteList()
341     , m_actPage(0)
342     , m_numPages(-1)
343 #if defined(DEBUG_WITH_FILES)
344     , m_debugFile()
345 #endif
346   {
347     for (auto &tLength : m_textLength) tLength = 0;
348   }
349   //! returns the total text size
getTotalTextSizeMsWrdTextInternal::State350   long getTotalTextSize() const
351   {
352     long res=0;
353     for (auto tLength : m_textLength) res+=tLength;
354     return res;
355   }
356   //! returns the id of textpos corresponding to a cPos or -1
getTextStructIdMsWrdTextInternal::State357   int getTextStructId(long textPos) const
358   {
359     if (m_textposList.empty() || textPos < m_textposList[0].m_pos)
360       return -1;
361     int minVal = 0, maxVal = int(m_textposList.size())-1;
362     while (minVal != maxVal) {
363       int mid = (minVal+1+maxVal)/2;
364       if (m_textposList[size_t(mid)].m_pos == textPos)
365         return mid;
366       if (m_textposList[size_t(mid)].m_pos > textPos)
367         maxVal = mid-1;
368       else
369         minVal = mid;
370     }
371     return minVal;
372   }
373   //! returns the file position corresponding to a text entry
getFilePosMsWrdTextInternal::State374   long getFilePos(long textPos) const
375   {
376     int tId=getTextStructId(textPos);
377     if (tId==-1)
378       return m_bot+textPos;
379     return m_textposList[size_t(tId)].begin() + (textPos-m_textposList[size_t(tId)].m_pos);
380   }
381   //! try to return a table which begins at a character position
getTableMsWrdTextInternal::State382   std::shared_ptr<Table> getTable(long cPos) const
383   {
384     std::shared_ptr<Table> empty;
385     auto tableIt=m_tableMap.find(cPos);
386     if (tableIt==m_tableMap.end()||!tableIt->second) return empty;
387     auto table=tableIt->second;
388     if (table->m_cellPos.empty()||table->m_cellPos[0]!=cPos)
389       return empty;
390     return table;
391   }
392 
393 #if defined(DEBUG_WITH_FILES)
394   // use cut -c13- main-2.data|sort -n to retrieve the data
395   //! internal and low level: defined a second debug file
debugFile2MsWrdTextInternal::State396   std::fstream &debugFile2()
397   {
398     static bool init=false;
399     if (!init) {
400       init=true;
401       m_debugFile.open("main-2.data", std::ios_base::out | std::ios_base::trunc);
402     }
403     return m_debugFile;
404   }
405 #endif
406 
407   //! the file version
408   int m_version;
409 
410   //! the default text begin
411   long m_bot;
412 
413   //! the text length (main, footnote, header+footer)
414   long m_textLength[3];
415 
416   //! the header/footer zones
417   std::vector<MWAWEntry> m_headerFooterZones;
418   //! the text positions
419   std::vector<TextStruct> m_textposList;
420 
421   //! the text correspondance zone ( textpos, plc )
422   std::multimap<long, MsWrdText::PLC> m_plcMap;
423   //! the file correspondance zone ( filepos, plc )
424   std::multimap<long, MsWrdText::PLC> m_filePlcMap;
425 
426   //! the list of lines
427   std::vector<Line> m_lineList;
428   //! the paragraph limit -> textposition (or -1)
429   std::map<long, int> m_paragraphLimitMap;
430   //! the section cPos limit
431   std::vector<long> m_sectionLimitList;
432   //! the final correspondance font zone ( textpos, font)
433   std::map<long, MsWrdStruct::Font> m_fontMap;
434 
435   //! the final correspondance paragraph zone ( textpos, paragraph)
436   std::map<long, MsWrdStruct::Paragraph> m_paragraphMap;
437   //! the position where we have new data ( textpos -> [ we have done debug printing ])
438   std::map<long, Property> m_propertyMap;
439   //! a set of all begin cell position
440   std::set<long> m_tableCellPosSet;
441   //! the final correspondance table zone ( textpos, font)
442   std::map<long, std::shared_ptr<Table> > m_tableMap;
443   //! the list of paragraph info modifier
444   std::vector<MsWrdStruct::ParagraphInfo> m_paraInfoList;
445 
446   //! the list of pages
447   std::vector<Page> m_pageList;
448 
449   //! the list of fields
450   std::vector<Field> m_fieldList;
451 
452   //! the list of footnotes
453   std::vector<Footnote> m_footnoteList;
454 
455   int m_actPage/** the actual page*/, m_numPages /** the number of page of the final document */;
456 #if defined(DEBUG_WITH_FILES)
457   //! internal and low level: defined a second debug file
458   std::fstream m_debugFile;
459 #endif
460 };
461 
462 }
463 
464 ////////////////////////////////////////////////////////////
465 // constructor/destructor, ...
466 ////////////////////////////////////////////////////////////
MsWrdText(MsWrdParser & parser)467 MsWrdText::MsWrdText(MsWrdParser &parser)
468   : m_parserState(parser.getParserState())
469   , m_state(new MsWrdTextInternal::State)
470   , m_stylesManager()
471   , m_mainParser(&parser)
472 {
473   m_stylesManager.reset(new MsWrdTextStyles(*this));
474 }
475 
~MsWrdText()476 MsWrdText::~MsWrdText()
477 { }
478 
version() const479 int MsWrdText::version() const
480 {
481   if (m_state->m_version < 0)
482     m_state->m_version = m_parserState->m_version;
483   return m_state->m_version;
484 }
485 
numPages() const486 int MsWrdText::numPages() const
487 {
488   m_state->m_numPages = int(m_state->m_pageList.size());
489   return m_state->m_numPages;
490 }
491 
getMainTextLength() const492 long MsWrdText::getMainTextLength() const
493 {
494   return m_state->m_textLength[0];
495 }
496 
getHeader() const497 MWAWEntry MsWrdText::getHeader() const
498 {
499   if (m_state->m_headerFooterZones.size() == 0)
500     return MWAWEntry();
501   MWAWEntry entry=m_state->m_headerFooterZones[0];
502   bool ok=entry.valid();
503   if (ok && entry.length()<=2)  {
504     // small header, check if contains data
505     MWAWInputStreamPtr &input= m_parserState->m_input;
506     long pos = input->tell();
507     ok=false;
508     for (long cPos=entry.begin(); cPos<entry.end(); ++cPos) {
509       input->seek(m_state->getFilePos(cPos), librevenge::RVNG_SEEK_SET);
510       if (input->readLong(1)==0xd)
511         continue;
512       ok=true;
513       break;
514     }
515     input->seek(pos, librevenge::RVNG_SEEK_SET);
516   }
517   return ok ? entry : MWAWEntry();
518 }
519 
getFooter() const520 MWAWEntry MsWrdText::getFooter() const
521 {
522   if (m_state->m_headerFooterZones.size() < 2)
523     return MWAWEntry();
524   MWAWEntry entry=m_state->m_headerFooterZones[1];
525   bool ok=entry.valid();
526   if (ok && entry.length()<=2)  {
527     // check if it contains data
528     MWAWInputStreamPtr &input= m_parserState->m_input;
529     long pos = input->tell();
530     ok=false;
531     for (long cPos=entry.begin(); cPos<entry.end(); ++cPos) {
532       input->seek(m_state->getFilePos(cPos), librevenge::RVNG_SEEK_SET);
533       if (input->readLong(1)==0xd)
534         continue;
535       ok=true;
536       break;
537     }
538     input->seek(pos, librevenge::RVNG_SEEK_SET);
539   }
540   return ok ? entry : MWAWEntry();
541 }
542 
getTextPLCMap()543 std::multimap<long, MsWrdText::PLC> &MsWrdText::getTextPLCMap()
544 {
545   return m_state->m_plcMap;
546 }
547 
getFilePLCMap()548 std::multimap<long, MsWrdText::PLC> &MsWrdText::getFilePLCMap()
549 {
550   return m_state->m_filePlcMap;
551 }
552 
553 ////////////////////////////////////////////////////////////
554 // Intermediate level
555 ////////////////////////////////////////////////////////////
556 // PLC
operator <<(std::ostream & o,MsWrdText::PLC const & plc)557 std::ostream &operator<<(std::ostream &o, MsWrdText::PLC const &plc)
558 {
559   switch (plc.m_type) {
560   case MsWrdText::PLC::ParagraphInfo:
561     o << "Pi";
562     break;
563   case MsWrdText::PLC::Section:
564     o << "S";
565     break;
566   case MsWrdText::PLC::Footnote:
567     o << "Fn";
568     break;
569   case MsWrdText::PLC::FootnoteDef:
570     o << "vFn";
571     break;
572   case MsWrdText::PLC::Field:
573     o << "Field";
574     break;
575   case MsWrdText::PLC::Page:
576     o << "Pg";
577     break;
578   case MsWrdText::PLC::Font:
579     o << "F";
580     break;
581   case MsWrdText::PLC::Object:
582     o << "O";
583     break;
584   case MsWrdText::PLC::Paragraph:
585     o << "P";
586     break;
587   case MsWrdText::PLC::HeaderFooter:
588     o << "hfP";
589     break;
590   case MsWrdText::PLC::TextPosition:
591     o << "textPos";
592     break;
593 #if !defined(__clang__)
594   default:
595     o << "#type" << char('a'+int(plc.m_type));
596 #endif
597   }
598   if (plc.m_id < 0) o << "_";
599   else o << plc.m_id;
600   if (plc.m_extra.length()) o << "[" << plc.m_extra << "]";
601   return o;
602 }
603 
readHeaderTextLength()604 bool MsWrdText::readHeaderTextLength()
605 {
606   MWAWInputStreamPtr &input= m_parserState->m_input;
607   long pos = input->tell();
608   long endPos = pos+12;
609   if (!input->checkPosition(endPos))
610     return false;
611   for (auto &tLength  : m_state->m_textLength) {
612     const auto length = long(input->readULong(4));
613     // a minimal check that the lengths are not insanely out of bounds
614     tLength= input->checkPosition(length) ? length : 0;
615   }
616   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
617   libmwaw::DebugStream f;
618   f << "FileHeader(textLength):text="
619     << std::hex << m_state->m_textLength[0] << ",";
620   if (m_state->m_textLength[1])
621     f << "footnote=" << m_state->m_textLength[1] << ",";
622   if (m_state->m_textLength[2])
623     f << "headerFooter=" << m_state->m_textLength[2] << ",";
624   ascFile.addPos(pos);
625   ascFile.addNote(f.str().c_str());
626   ascFile.addPos(endPos);
627   ascFile.addNote("_");
628   return true;
629 }
630 
631 ////////////////////////////////////////////////////////////
632 // try to find the different zone
633 ////////////////////////////////////////////////////////////
createZones(long bot)634 bool MsWrdText::createZones(long bot)
635 {
636   // int const vers=version();
637   m_state->m_bot = bot;
638 
639   auto &entryMap = m_mainParser->m_entryMap;
640   // the fonts
641   auto it = entryMap.find("FontIds");
642   if (it != entryMap.end()) {
643     std::vector<long> list;
644     readLongZone(it->second, 2, list);
645   }
646   it = entryMap.find("FontNames");
647   if (it != entryMap.end())
648     readFontNames(it->second);
649   // the styles
650   it = entryMap.find("Styles");
651   long prevDeb = 0;
652   while (it != entryMap.end()) {
653     if (!it->second.hasType("Styles")) break;
654     MsWrdEntry &entry=it++->second;
655 #ifndef DEBUG
656     // first entry is often bad or share the same data than the second
657     if (entry.id() == 0)
658       continue;
659 #endif
660     if (entry.begin() == prevDeb) continue;
661     prevDeb = entry.begin();
662     m_stylesManager->readStyles(entry);
663   }
664   // read the text structure
665   it = entryMap.find("TextStruct");
666   if (it != entryMap.end())
667     readTextStruct(it->second);
668 
669   //! the break position
670   it = entryMap.find("PageBreak");
671   if (it != entryMap.end())
672     readPageBreak(it->second);
673   it = entryMap.find("ParaInfo");
674   if (it != entryMap.end())
675     readParagraphInfo(it->second);
676   it = entryMap.find("Section");
677   if (it != entryMap.end() &&
678       !m_stylesManager->readSection(it->second, m_state->m_sectionLimitList))
679     m_state->m_sectionLimitList.resize(0);
680 
681   //! read the header footer limit
682   it = entryMap.find("HeaderFooter");
683   std::vector<long> hfLimits;
684   if (it != entryMap.end()) {
685     readLongZone(it->second, 4, hfLimits);
686 
687     long debHeader = m_state->m_textLength[0]+m_state->m_textLength[1];
688     MsWrdText::PLC plc(MsWrdText::PLC::HeaderFooter);
689     // list Header0,Footer0,Header1,Footer1,...,Footern, 3
690     for (size_t i = 0; i+2 < hfLimits.size(); i++) {
691       plc.m_id = int(i);
692       m_state->m_plcMap.insert(std::multimap<long,MsWrdText::PLC>::value_type
693                                (hfLimits[i]+debHeader, plc));
694 
695       MWAWEntry entry;
696       entry.setBegin(debHeader+hfLimits[i]);
697       entry.setEnd(debHeader+hfLimits[i+1]);
698       m_state->m_headerFooterZones.push_back(entry);
699     }
700   }
701 
702   //! read the note
703   std::vector<long> fieldPos;
704   it = entryMap.find("FieldPos");
705   if (it != entryMap.end()) { // a list of text pos ( or a size from ? )
706     readLongZone(it->second, 4, fieldPos);
707   }
708   it = entryMap.find("FieldName");
709   if (it != entryMap.end())
710     readFields(it->second, fieldPos);
711 
712   //! read the footenote
713   std::vector<long> footnoteDef;
714   it = entryMap.find("FootnoteDef");
715   if (it != entryMap.end()) { // list of pos in footnote data
716     readLongZone(it->second, 4, footnoteDef);
717   }
718   it = entryMap.find("FootnotePos");
719   if (it != entryMap.end()) { // a list of text pos
720     readFootnotesPos(it->second, footnoteDef);
721   }
722   /* CHECKME: this zone seems presents only when FootnoteDef and FootnotePos,
723      but what does it means ?
724    */
725   it = entryMap.find("FootnoteData");
726   if (it != entryMap.end()) { // a list of text pos
727     readFootnotesData(it->second);
728   }
729 
730   it = entryMap.find("ParagList");
731   if (it != entryMap.end())
732     m_stylesManager->readPLCList(it->second);
733   it = entryMap.find("CharList");
734   if (it != entryMap.end())
735     m_stylesManager->readPLCList(it->second);
736 
737   prepareData();
738   return true;
739 }
740 
741 ////////////////////////////////////////////////////////////
742 // read the text structure ( the PieCe Descriptors : plcfpcd )
743 ////////////////////////////////////////////////////////////
readTextStruct(MsWrdEntry & entry)744 bool MsWrdText::readTextStruct(MsWrdEntry &entry)
745 {
746   if (entry.length() < 19) {
747     MWAW_DEBUG_MSG(("MsWrdText::readTextStruct: the zone seems to short\n"));
748     return false;
749   }
750   if (!m_stylesManager->readTextStructList(entry))
751     return false;
752   MWAWInputStreamPtr &input= m_parserState->m_input;
753   long pos = input->tell();
754   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
755   libmwaw::DebugStream f;
756   auto type = static_cast<int>(input->readLong(1));
757   if (type != 2) {
758     MWAW_DEBUG_MSG(("MsWrdText::readTextStruct: find odd type %d\n", type));
759     return false;
760   }
761   entry.setParsed(true);
762   f << "TextStruct-pos:";
763   auto sz = static_cast<int>(input->readULong(2));
764   long endPos = pos+3+sz;
765   if (endPos > entry.end() || (sz%12) != 4) {
766     f << "#";
767     ascFile.addPos(pos);
768     ascFile.addNote(f.str().c_str());
769     MWAW_DEBUG_MSG(("MsWrdText::readTextStruct: can not read the position zone\n"));
770     return false;
771   }
772   int N=sz/12;
773   long textLength=m_state->getTotalTextSize();
774   std::vector<long> textPos; // checkme
775   textPos.resize(size_t(N+1));
776   f << "pos=[" << std::hex;
777   for (size_t i = 0; i <= size_t(N); i++) {
778     textPos[i] = static_cast<int>(input->readULong(4));
779     if (i && textPos[i] <= textPos[i-1]) {
780       MWAW_DEBUG_MSG(("MsWrdText::readTextStruct: find backward text pos\n"));
781       f << "#" << textPos[i] << ",";
782       textPos[i]=textPos[i-1];
783     }
784     else {
785       if (i != size_t(N) && textPos[i] > textLength) {
786         MWAW_DEBUG_MSG(("MsWrdText::readTextStruct: find a text position which is too big\n"));
787         f << "#";
788       }
789       f << textPos[i] << ",";
790     }
791   }
792   f << std::dec << "],";
793   ascFile.addPos(pos);
794   ascFile.addNote(f.str().c_str());
795   PLC plc(PLC::TextPosition);
796 
797   for (int i = 0; i < N; i++) {
798     pos = input->tell();
799     MsWrdTextInternal::TextStruct tEntry;
800     f.str("");
801     f<< "TextStruct-pos" << i << ":";
802     tEntry.m_pos = static_cast<int>(textPos[size_t(i)]);
803     tEntry.m_flags = static_cast<int>(input->readULong(1));
804     // fN internal...
805     tEntry.m_styleId = static_cast<int>(input->readULong(1));
806     auto ptr = long(input->readULong(4));
807     tEntry.setBegin(ptr);
808     tEntry.setLength(textPos[size_t(i)+1]-textPos[size_t(i)]);
809     std::string extra;
810     tEntry.m_paragraphId = m_stylesManager->readPropertyModifier(tEntry.m_complex, extra);
811     tEntry.setExtra(extra);
812     m_state->m_textposList.push_back(tEntry);
813     if (!input->checkPosition(ptr)) {
814       MWAW_DEBUG_MSG(("MsWrdText::readTextStruct: find a bad file position \n"));
815       f << "#";
816     }
817     else {
818       plc.m_id = i;
819       m_state->m_plcMap.insert(std::multimap<long,PLC>::value_type
820                                (textPos[size_t(i)],plc));
821     }
822     f << tEntry;
823     input->seek(pos+8, librevenge::RVNG_SEEK_SET);
824     ascFile.addPos(pos);
825     ascFile.addNote(f.str().c_str());
826 #if defined(DEBUG_WITH_FILES)
827     f.str("");
828     f<< "TextContent[" << tEntry.m_pos << "]:" << tEntry << ",";
829     m_state->debugFile2() << f.str() << "\n";
830 #endif
831   }
832 
833   pos = input->tell();
834   if (pos != entry.end()) {
835     ascFile.addPos(pos);
836     ascFile.addNote("TextStruct-pos#");
837   }
838   ascFile.addPos(entry.end());
839   ascFile.addNote("_");
840   return true;
841 }
842 
843 ////////////////////////////////////////////////////////////
844 // read the font name
845 ////////////////////////////////////////////////////////////
readFontNames(MsWrdEntry & entry)846 bool MsWrdText::readFontNames(MsWrdEntry &entry)
847 {
848   if (entry.length() < 2) {
849     MWAW_DEBUG_MSG(("MsWrdText::readFontNames: the zone seems to short\n"));
850     return false;
851   }
852 
853   long pos = entry.begin();
854   MWAWInputStreamPtr &input= m_parserState->m_input;
855   input->seek(pos, librevenge::RVNG_SEEK_SET);
856   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
857   libmwaw::DebugStream f;
858   auto N = static_cast<int>(input->readULong(2));
859   if (N*5+2 > entry.length()) {
860     MWAW_DEBUG_MSG(("MsWrdText::readFontNames: the number of fonts seems bad\n"));
861     return false;
862   }
863   entry.setParsed(true);
864   f << "FontNames:" << N;
865   ascFile.addPos(pos);
866   ascFile.addNote(f.str().c_str());
867   for (int i = 0; i < N; i++) {
868     pos = input->tell();
869     if (pos+5 > entry.end()) {
870       input->seek(pos, librevenge::RVNG_SEEK_SET);
871       MWAW_DEBUG_MSG(("MsWrdText::readFontNames: the fonts %d seems bad\n", i));
872       break;
873     }
874     f.str("");
875     f << "FontNames-" << i << ":";
876     auto val = static_cast<int>(input->readLong(2));
877     if (val) f << "f0=" << val << ",";
878     auto fId = static_cast<int>(input->readULong(2));
879     f << "fId=" << fId << ",";
880     auto fSz = static_cast<int>(input->readULong(1));
881     if (pos +5 > entry.end()) {
882       input->seek(pos, librevenge::RVNG_SEEK_SET);
883       MWAW_DEBUG_MSG(("MsWrdText::readFontNames: the fonts name %d seems bad\n", i));
884       break;
885     }
886     std::string name("");
887     for (int j = 0; j < fSz; j++)
888       name += char(input->readLong(1));
889     if (name.length())
890       m_parserState->m_fontConverter->setCorrespondance(fId, name);
891     f << name;
892     ascFile.addPos(pos);
893     ascFile.addNote(f.str().c_str());
894   }
895   pos = input->tell();
896   if (pos != entry.end()) {
897     ascFile.addPos(pos);
898     ascFile.addNote("FontNames#");
899   }
900 
901   return true;
902 }
903 
904 ////////////////////////////////////////////////////////////
905 // read the zone info zone
906 ////////////////////////////////////////////////////////////
readParagraphInfo(MsWrdEntry & entry)907 bool MsWrdText::readParagraphInfo(MsWrdEntry &entry)
908 {
909   int vers=version();
910   if (vers<=3) {
911     MWAW_DEBUG_MSG(("MsWrdText::readParagraphInfo: does not know how to read a paragraphInfo in v3 or less\n"));
912     return false;
913   }
914   if (entry.length() < 4 || (entry.length()%10) != 4) {
915     MWAW_DEBUG_MSG(("MsWrdText::readParagraphInfo: the zone size seems odd\n"));
916     return false;
917   }
918   entry.setParsed(true);
919 
920   long pos = entry.begin();
921   MWAWInputStreamPtr &input= m_parserState->m_input;
922   input->seek(pos, librevenge::RVNG_SEEK_SET);
923   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
924   libmwaw::DebugStream f;
925   f << "ParaInfo:";
926   auto N=int(entry.length()/10);
927 
928   std::vector<long> textPositions;
929   f << "[";
930   for (int i = 0; i <= N; i++) {
931     auto textPos = long(input->readULong(4));
932     textPositions.push_back(textPos);
933     f << std::hex << textPos << std::dec << ",";
934   }
935   f << "],";
936   ascFile.addPos(pos);
937   ascFile.addNote(f.str().c_str());
938 
939   PLC plc(PLC::ParagraphInfo);
940   for (int i = 0; i < N; i++) {
941     pos = input->tell();
942     f.str("");
943     f << "ParaInfo-Pi" << i << ":" << std::hex << textPositions[size_t(i)] << std::dec << ",";
944     MsWrdStruct::ParagraphInfo paraMod;
945     if (!paraMod.read(input, pos+6, vers))
946       f << "###";
947     f << paraMod;
948     m_state->m_paraInfoList.push_back(paraMod);
949 
950     if (textPositions[size_t(i)] > m_state->m_textLength[0]) {
951       MWAW_DEBUG_MSG(("MsWrdText::readParagraphInfo: text positions is bad...\n"));
952       f << "#";
953     }
954     else {
955       plc.m_id=i;
956       m_state->m_plcMap.insert(std::multimap<long,PLC>::value_type
957                                (textPositions[size_t(i)],plc));
958     }
959     input->seek(pos+6, librevenge::RVNG_SEEK_SET);
960     ascFile.addPos(pos);
961     ascFile.addNote(f.str().c_str());
962   }
963 
964   ascFile.addPos(entry.end());
965   ascFile.addNote("_");
966   return true;
967 
968 }
969 
970 ////////////////////////////////////////////////////////////
971 // read the page break
972 ////////////////////////////////////////////////////////////
readPageBreak(MsWrdEntry & entry)973 bool MsWrdText::readPageBreak(MsWrdEntry &entry)
974 {
975   int const vers = version();
976   int const fSz = vers <= 3 ? 8 : 10;
977   if (entry.length() < fSz+8 || (entry.length()%(fSz+4)) != 4) {
978     MWAW_DEBUG_MSG(("MsWrdText::readPageBreak: the zone size seems odd\n"));
979     return false;
980   }
981   long pos = entry.begin();
982   entry.setParsed(true);
983   MWAWInputStreamPtr &input= m_parserState->m_input;
984   input->seek(pos, librevenge::RVNG_SEEK_SET);
985   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
986   libmwaw::DebugStream f;
987   f << "PageBreak:";
988   auto N=int(entry.length()/(fSz+4));
989   std::vector<long> textPos; // checkme
990   textPos.resize(size_t(N)+1);
991   for (auto &tPos : textPos) tPos = long(input->readULong(4));
992   PLC plc(PLC::Page);
993   int prevPage=-1;
994   for (int i = 0; i < N; i++) {
995     MsWrdTextInternal::Page page;
996     page.m_id = i;
997     page.m_type = static_cast<int>(input->readULong(1));
998     page.m_values[0] = static_cast<int>(input->readLong(1)); // always 0,1,2
999     for (int j = 1; j < 3; j++) // always -1, 0
1000       page.m_values[j] = static_cast<int>(input->readLong(2));
1001     page.m_page = static_cast<int>(input->readLong(2));
1002     if (vers > 3)
1003       page.m_values[3] = static_cast<int>(input->readLong(2));
1004     if (i && textPos[size_t(i)]==textPos[size_t(i)-1] && page.m_page==prevPage) {
1005       // find this one time in v3...
1006       MWAW_DEBUG_MSG(("MsWrdText::readPageBreak: page %d is duplicated...\n", i));
1007       f << "#dup,";
1008       continue;
1009     }
1010     prevPage=page.m_page;
1011     m_state->m_pageList.push_back(page);
1012 
1013     if (textPos[size_t(i)] > m_state->m_textLength[0]) {
1014       MWAW_DEBUG_MSG(("MsWrdText::readPageBreak: text positions is bad...\n"));
1015       f << "#";
1016     }
1017     else {
1018       plc.m_id = i;
1019       m_state->m_plcMap.insert(std::multimap<long,PLC>::value_type
1020                                (textPos[size_t(i)],plc));
1021     }
1022     f << "[pos=" << textPos[size_t(i)] << "," << page << "],";
1023   }
1024   f << "end=" << std::hex << textPos[size_t(N)] << std::dec << ",";
1025   ascFile.addPos(pos);
1026   ascFile.addNote(f.str().c_str());
1027 
1028   ascFile.addPos(entry.end());
1029   ascFile.addNote("_");
1030   return true;
1031 }
1032 
1033 ////////////////////////////////////////////////////////////
1034 // read the footnotes pos + val
1035 ////////////////////////////////////////////////////////////
readFootnotesPos(MsWrdEntry & entry,std::vector<long> const & noteDef)1036 bool MsWrdText::readFootnotesPos(MsWrdEntry &entry, std::vector<long> const &noteDef)
1037 {
1038   if (entry.length() < 4 || (entry.length()%6) != 4) {
1039     MWAW_DEBUG_MSG(("MsWrdText::readFootnotesPos: the zone size seems odd\n"));
1040     return false;
1041   }
1042   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1043   libmwaw::DebugStream f;
1044   auto N=int(entry.length()/6);
1045   if (N+2 != int(noteDef.size())) {
1046     MWAW_DEBUG_MSG(("MsWrdText::readFootnotesPos: the number N seems odd\n"));
1047     return false;
1048   }
1049   long pos = entry.begin();
1050   entry.setParsed(true);
1051   MWAWInputStreamPtr &input= m_parserState->m_input;
1052   input->seek(pos, librevenge::RVNG_SEEK_SET);
1053   f << "FootnotePos:";
1054 
1055   std::vector<long> textPos;
1056   textPos.resize(size_t(N)+1);
1057   for (auto &tPos : textPos) tPos = long(input->readULong(4));
1058   long debFootnote = m_state->m_textLength[0];
1059   PLC plc(PLC::Footnote);
1060   PLC defPlc(PLC::FootnoteDef);
1061   for (int i = 0; i < N; i++) {
1062     MsWrdTextInternal::Footnote note;
1063     note.m_id = i;
1064     note.m_pos.setBegin(debFootnote+noteDef[size_t(i)]);
1065     note.m_pos.setEnd(debFootnote+noteDef[size_t(i)+1]);
1066     note.m_value = static_cast<int>(input->readLong(2));
1067     m_state->m_footnoteList.push_back(note);
1068 
1069     if (textPos[size_t(i)] > m_state->getTotalTextSize()) {
1070       MWAW_DEBUG_MSG(("MsWrdText::readFootnotesPos: can not find text position\n"));
1071       f << "#";
1072     }
1073     else if (noteDef[size_t(i)+1] > m_state->m_textLength[1]) {
1074       MWAW_DEBUG_MSG(("MsWrdText::readFootnotesPos: can not find definition position\n"));
1075       f << "#";
1076     }
1077     else {
1078       defPlc.m_id = plc.m_id = i;
1079       m_state->m_plcMap.insert(std::multimap<long,PLC>::value_type
1080                                (textPos[size_t(i)], plc));
1081       m_state->m_plcMap.insert(std::multimap<long,PLC>::value_type
1082                                (note.m_pos.begin(), defPlc));
1083     }
1084     f << std::hex << textPos[size_t(i)] << std::dec << ":" << note;
1085   }
1086   f << "end=" << std::hex << textPos[size_t(N)] << std::dec << ",";
1087   ascFile.addPos(entry.begin());
1088   ascFile.addNote(f.str().c_str());
1089   ascFile.addPos(entry.end());
1090   ascFile.addNote("_");
1091   return true;
1092 }
1093 
1094 ////////////////////////////////////////////////////////////
1095 // read the footnotes pos?
1096 ////////////////////////////////////////////////////////////
readFootnotesData(MsWrdEntry & entry)1097 bool MsWrdText::readFootnotesData(MsWrdEntry &entry)
1098 {
1099   if (entry.length() < 4 || (entry.length()%14) != 4) {
1100     MWAW_DEBUG_MSG(("MsWrdText::readFootnotesData: the zone size seems odd\n"));
1101     return false;
1102   }
1103   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1104   libmwaw::DebugStream f;
1105   auto N=int(entry.length()/14);
1106   long pos = entry.begin();
1107   entry.setParsed(true);
1108   MWAWInputStreamPtr &input= m_parserState->m_input;
1109   input->seek(pos, librevenge::RVNG_SEEK_SET);
1110   f << "FootnoteData[" << N << "/" << m_state->m_footnoteList.size() << "]:";
1111 
1112   std::vector<long> textPos; // checkme
1113   textPos.resize(size_t(N)+1);
1114   for (auto &tPos : textPos) tPos = long(input->readULong(4));
1115   for (int i = 0; i < N; i++) {
1116     if (textPos[size_t(i)] > m_state->m_textLength[1]) {
1117       MWAW_DEBUG_MSG(("MsWrdText::readFootnotesData: textPositions seems bad\n"));
1118       f << "#";
1119     }
1120     f << "N" << i << "=[";
1121     if (textPos[size_t(i)])
1122       f << "pos=" << std::hex << textPos[size_t(i)] << std::dec << ",";
1123     for (int j = 0; j < 5; j++) { // always 0|4000, -1, 0, id, 0 ?
1124       auto val=static_cast<int>(input->readLong(2));
1125       if (val && j == 0)
1126         f << std::hex << val << std::dec << ",";
1127       else if (val)
1128         f << val << ",";
1129       else f << "_,";
1130     }
1131     f << "],";
1132   }
1133   f << "end=" << std::hex << textPos[size_t(N)] << std::dec << ",";
1134   ascFile.addPos(entry.begin());
1135   ascFile.addNote(f.str().c_str());
1136   ascFile.addPos(entry.end());
1137   ascFile.addNote("_");
1138   return true;
1139 }
1140 
1141 ////////////////////////////////////////////////////////////
1142 // read the note
1143 ////////////////////////////////////////////////////////////
readFields(MsWrdEntry & entry,std::vector<long> const & fieldPos)1144 bool MsWrdText::readFields(MsWrdEntry &entry, std::vector<long> const &fieldPos)
1145 {
1146   long pos = entry.begin();
1147   auto N = int(fieldPos.size());
1148   long textLength = m_state->getTotalTextSize();
1149   if (N==0) {
1150     MWAW_DEBUG_MSG(("MsWrdText::readFields: number of fields is 0\n"));
1151     return false;
1152   }
1153   N--;
1154   entry.setParsed(true);
1155   MWAWInputStreamPtr &input= m_parserState->m_input;
1156   input->seek(pos, librevenge::RVNG_SEEK_SET);
1157 
1158   auto sz = long(input->readULong(2));
1159   if (entry.length() != sz) {
1160     MWAW_DEBUG_MSG(("MsWrdText::readFields: the zone size seems odd\n"));
1161     return false;
1162   }
1163   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1164   libmwaw::DebugStream f, f2;
1165   f << "FieldName:";
1166   int const endSize = (version()==5) ? 2 : 1;
1167   PLC plc(PLC::Field);
1168   for (int n = 1; n < N; n++) {
1169     if (input->tell() >= entry.end()) {
1170       MWAW_DEBUG_MSG(("MsWrdText::readFields: can not find all field\n"));
1171       break;
1172     }
1173     pos = input->tell();
1174     auto fSz = static_cast<int>(input->readULong(1));
1175     if (pos+1+fSz > entry.end()) {
1176       MWAW_DEBUG_MSG(("MsWrdText::readFields: can not read a string\n"));
1177       input->seek(pos, librevenge::RVNG_SEEK_SET);
1178       f << "#";
1179       break;
1180     }
1181     int endSz = fSz < endSize ? 0 : endSize;
1182 
1183     f2.str("");
1184     std::string text("");
1185     for (int i = 0; i < fSz-endSz; i++) {
1186       auto c = char(input->readULong(1));
1187       if (c==0) f2 << '#';
1188       else text+=c;
1189     }
1190     MsWrdTextInternal::Field field;
1191     if (!endSz) ;
1192     else if (version()>=5 && input->readULong(1) != 0xc) {
1193       input->seek(-1, librevenge::RVNG_SEEK_CUR);
1194       for (int i = 0; i < 2; i++) text+=char(input->readULong(1));
1195     }
1196     else {
1197       auto id = static_cast<int>(input->readULong(1));
1198       if (id >= N) {
1199         if (version()>=5) {
1200           MWAW_DEBUG_MSG(("MsWrdText::readFields: find a strange id\n"));
1201           f2 << "#";
1202         }
1203         else
1204           text+=char(id);
1205       }
1206       else
1207         field.m_id = id;
1208     }
1209     field.m_text = text;
1210     field.m_error = f2.str();
1211     m_state->m_fieldList.push_back(field);
1212 
1213     f << "N" << n << "=" << field << ",";
1214     if (fieldPos[size_t(n)] >= textLength) {
1215       MWAW_DEBUG_MSG(("MsWrdText::readFields: text positions is bad...\n"));
1216       f << "#";
1217     }
1218     else {
1219       plc.m_id = n-1;
1220       m_state->m_plcMap.insert(std::multimap<long,PLC>::value_type
1221                                (fieldPos[size_t(n)], plc));
1222     }
1223   }
1224   if (long(input->tell()) != entry.end())
1225     ascFile.addDelimiter(input->tell(), '|');
1226   ascFile.addPos(entry.begin());
1227   ascFile.addNote(f.str().c_str());
1228   ascFile.addPos(entry.end());
1229   ascFile.addNote("_");
1230   return true;
1231 }
1232 
1233 ////////////////////////////////////////////////////////////
1234 // read  a list of ints zone
1235 ////////////////////////////////////////////////////////////
readLongZone(MsWrdEntry & entry,int sz,std::vector<long> & list)1236 bool MsWrdText::readLongZone(MsWrdEntry &entry, int sz, std::vector<long> &list)
1237 {
1238   list.resize(0);
1239   if (entry.length() < sz || (entry.length()%sz)) {
1240     MWAW_DEBUG_MSG(("MsWrdText::readIntsZone: the size of zone %s seems to odd\n", entry.type().c_str()));
1241     return false;
1242   }
1243 
1244   long pos = entry.begin();
1245   MWAWInputStreamPtr &input= m_parserState->m_input;
1246   input->seek(pos, librevenge::RVNG_SEEK_SET);
1247   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1248   libmwaw::DebugStream f;
1249   f << entry.type() << ":";
1250   auto N = int(entry.length()/sz);
1251   for (int i = 0; i < N; i++) {
1252     auto val = static_cast<int>(input->readLong(sz));
1253     if (input->checkPosition(val))
1254       list.push_back(val);
1255     f << std::hex << val << std::dec << ",";
1256   }
1257 
1258   if (long(input->tell()) != entry.end())
1259     ascFile.addDelimiter(input->tell(), '|');
1260 
1261   entry.setParsed(true);
1262 
1263   ascFile.addPos(entry.begin());
1264   ascFile.addNote(f.str().c_str());
1265 
1266   ascFile.addPos(entry.end());
1267   ascFile.addNote("_");
1268   return true;
1269 }
1270 
1271 ////////////////////////////////////////////////////////////
1272 // sort/prepare data
1273 ////////////////////////////////////////////////////////////
prepareLines()1274 void MsWrdText::prepareLines()
1275 {
1276   m_state->m_lineList.clear();
1277   long cPos = 0, cEnd = m_state->getTotalTextSize();
1278   if (cEnd <= 0) return;
1279 
1280   MWAWInputStreamPtr &input= m_parserState->m_input;
1281   input->seek(m_state->getFilePos(0), librevenge::RVNG_SEEK_SET);
1282 
1283   MsWrdTextInternal::Line line;
1284   line.m_cPos[0]=0;
1285   size_t numTextPos = m_state->m_textposList.size();
1286   while (!input->isEnd() && cPos < cEnd) {
1287     auto plcIt = m_state->m_plcMap.lower_bound(cPos);
1288     while (plcIt != m_state->m_plcMap.end() && plcIt->first==cPos) {
1289       auto const &plc = plcIt++->second;
1290       if (plc.m_type != PLC::TextPosition)
1291         continue;
1292       if (plc.m_id < 0 || plc.m_id >= static_cast<int>(numTextPos))
1293         continue;
1294       auto const &textEntry= m_state->m_textposList[size_t(plc.m_id)];
1295       input->seek(textEntry.begin(), librevenge::RVNG_SEEK_SET);
1296     }
1297     auto c=char(input->readLong(1));
1298     ++cPos;
1299     if (c!=0x7 && c!=0xd && cPos!=cEnd)
1300       continue;
1301     line.m_cPos[1]=cPos;
1302     if (c==0x7)
1303       line.m_type=MsWrdTextInternal::Line::L_LastLineCell;
1304     else
1305       line.m_type=MsWrdTextInternal::Line::L_Line;
1306     m_state->m_lineList.push_back(line);
1307 
1308     line.m_cPos[0]=cPos;
1309   }
1310 }
1311 
convertFilePLCPos()1312 void MsWrdText::convertFilePLCPos()
1313 {
1314   size_t numTextPos = m_state->m_textposList.size();
1315   auto &cMap=m_state->m_plcMap;
1316 
1317   // create the list of table delimiters
1318   std::set<long> tableSet;
1319   for (auto const &line : m_state->m_lineList) {
1320     if (line.m_type==MsWrdTextInternal::Line::L_Line)
1321       tableSet.insert(line.m_cPos[1]);
1322   }
1323 
1324   auto tableIt=tableSet.begin();
1325   MsWrdText::PLC resetParaPLC(PLC::Paragraph,-1);
1326   // simplest case
1327   if (!numTextPos) {
1328     long const bottom = m_state->m_bot;
1329     long pPos=bottom;
1330     for (auto it : m_state->m_filePlcMap) {
1331       long pos=it.first, prevPos=0;
1332       MsWrdText::PLC const &plc=it.second;
1333       if (plc.m_type==PLC::Paragraph) {
1334         while (tableIt!=tableSet.end() && *tableIt<=pos-bottom) {
1335           long resPos=*(tableIt++);
1336           if (resPos<pos-bottom) {
1337             m_state->m_paragraphLimitMap[pPos-bottom]=-1;
1338             cMap.insert(std::map<long, MsWrdText::PLC>::value_type(pPos-bottom, resetParaPLC));
1339             pPos=resPos;
1340           }
1341         }
1342         m_state->m_paragraphLimitMap[pPos-bottom]=-1;
1343         prevPos=pPos;
1344         pPos=pos;
1345       }
1346       else if (plc.m_type==PLC::Font)
1347         prevPos=pos;
1348       else {
1349         MWAW_DEBUG_MSG(("MsWrdText::convertFilePLCPos: unexpected plc type: %d\n", plc.m_type));
1350         continue;
1351       }
1352       cMap.insert(std::map<long, MsWrdText::PLC>::value_type(prevPos-bottom, plc));
1353     }
1354     return;
1355   }
1356 
1357   long cPos=0, pPos=0;
1358   int fontId=-1;
1359   for (size_t i=0; i < numTextPos; ++i) {
1360     auto const &tPos=m_state->m_textposList[i];
1361     long const begPos= tPos.begin();
1362     long const endPos=tPos.end();
1363     bool fontCheck=false;
1364     auto it=m_state->m_filePlcMap.lower_bound(begPos);
1365     while (it!=m_state->m_filePlcMap.end()) {
1366       long pos=it->first;
1367       if (!fontCheck && pos!=begPos) {
1368         // time to check if the font has changed
1369         auto fIt=m_state->m_filePlcMap.lower_bound(begPos);
1370         while (fIt!=m_state->m_filePlcMap.begin()) {
1371           if (fIt==m_state->m_filePlcMap.end()||fIt->first>=begPos)
1372             --fIt;
1373           else
1374             break;
1375         }
1376         while (fIt!=m_state->m_filePlcMap.end()) {
1377           if (fIt->first >= begPos)
1378             break;
1379           auto const &plc=fIt->second;
1380           if (plc.m_type==PLC::Font) {
1381             if (fontId!=plc.m_id) {
1382               fontId=plc.m_id;
1383               cMap.insert(std::map<long, MsWrdText::PLC>::value_type(cPos, plc));
1384             }
1385             break;
1386           }
1387           if (fIt==m_state->m_filePlcMap.begin())
1388             break;
1389           --fIt;
1390         }
1391         fontCheck=true;
1392       }
1393       if (pos>endPos)
1394         break;
1395       auto const &plc=it++->second;
1396       long newCPos=cPos+(pos-begPos), prevPos=0;
1397       if (plc.m_type==PLC::Paragraph) {
1398         if (pos==begPos)
1399           continue;
1400         while (tableIt!=tableSet.end() && *tableIt<=newCPos) {
1401           long resPos=*(tableIt++);
1402           if (resPos<newCPos) {
1403             m_state->m_paragraphLimitMap[pPos]=-1;
1404             cMap.insert(std::map<long, MsWrdText::PLC>::value_type(pPos, resetParaPLC));
1405             pPos=resPos;
1406           }
1407         }
1408         m_state->m_paragraphLimitMap[pPos]=int(i);
1409         prevPos=pPos;
1410         pPos=newCPos;
1411       }
1412       else if (plc.m_type==PLC::Font) {
1413         if (pos==endPos)
1414           continue;
1415         fontCheck=true;
1416         fontId=plc.m_id;
1417         prevPos=newCPos;
1418       }
1419       else {
1420         MWAW_DEBUG_MSG(("MsWrdText::convertFilePLCPos: unexpected plc type: %d\n", plc.m_type));
1421         continue;
1422       }
1423       cMap.insert(std::map<long, MsWrdText::PLC>::value_type(prevPos, plc));
1424     }
1425     cPos+=tPos.length();
1426   }
1427 }
1428 
prepareParagraphProperties()1429 void MsWrdText::prepareParagraphProperties()
1430 {
1431   int const vers=version();
1432   auto textposSize = int(m_state->m_textposList.size());
1433   MsWrdTextInternal::Line::Type lineType=MsWrdTextInternal::Line::L_Line;
1434   MsWrdStruct::Paragraph paragraph(vers), tablePara(vers);
1435   long cTableEndPos=-1;
1436   bool inTable=false;
1437   size_t numLines=m_state->m_lineList.size();
1438   for (int i=0; i<int(numLines); ++i) {
1439     MsWrdTextInternal::Line &line = m_state->m_lineList[size_t(i)];
1440 
1441     long cPos=line.m_cPos[0];
1442     if (inTable && cPos>=cTableEndPos) {
1443       inTable=false;
1444       lineType=MsWrdTextInternal::Line::L_Line;
1445     }
1446     auto pIt=m_state->m_paragraphLimitMap.lower_bound(cPos);
1447     if (pIt==m_state->m_paragraphLimitMap.end() || pIt->first!=cPos) {
1448       line.m_type=lineType;
1449       continue;
1450     }
1451     int textId=pIt->second;
1452 
1453     // first retrieve the paragraph
1454     auto plcIt=m_state->m_plcMap.lower_bound(cPos);
1455     while (plcIt != m_state->m_plcMap.end() && plcIt->first==cPos) {
1456       MsWrdText::PLC const &plc = plcIt++->second;
1457       if (plc.m_type != PLC::Paragraph)
1458         continue;
1459       if (plc.m_id>=0)
1460         m_stylesManager->getParagraph(MsWrdTextStyles::TextZone,
1461                                       plc.m_id, paragraph);
1462       else
1463         paragraph=MsWrdStruct::Paragraph(vers);
1464       if (inTable) {
1465         MsWrdStruct::Paragraph tmpPara=tablePara;
1466         tmpPara.insert(paragraph);
1467         paragraph=tmpPara;
1468       }
1469     }
1470 
1471     MsWrdStruct::Paragraph finalPara(paragraph);
1472     if (textId>=0 && textId < textposSize) {
1473       auto const &textEntry=m_state->m_textposList[size_t(textId)];
1474       int id=textEntry.getParagraphId();
1475       // checkme do we need to test (textEntry.m_flags&0x80)==0 here
1476       if (id>=0) {
1477         MsWrdStruct::Paragraph modifier(vers);
1478         m_stylesManager->getParagraph(MsWrdTextStyles::TextStructZone, id, modifier);
1479         finalPara.insert(modifier);
1480       }
1481     }
1482 
1483     if (finalPara.m_styleId.isSet()) {
1484       MsWrdStruct::Paragraph style(vers);
1485       m_stylesManager->getParagraph(MsWrdTextStyles::StyleZone,*finalPara.m_styleId, style);
1486       MsWrdStruct::Paragraph tmpPara(style);
1487       tmpPara.insert(finalPara);
1488       tmpPara.updateParagraphToFinalState(&style);
1489       finalPara=tmpPara;
1490     }
1491     else
1492       finalPara.updateParagraphToFinalState();
1493 
1494     if (!inTable && (finalPara.inTable()||line.m_type==MsWrdTextInternal::Line::L_LastLineCell) &&
1495         updateTableBeginnningAt(cPos, cTableEndPos) && cPos<cTableEndPos) {
1496       inTable=true;
1497       // ok, find the main table paragraph and loop
1498       tablePara=MsWrdStruct::Paragraph(vers);
1499       plcIt=m_state->m_plcMap.lower_bound(cTableEndPos-1);
1500       while (plcIt != m_state->m_plcMap.end() && plcIt->first==cTableEndPos-1) {
1501         MsWrdText::PLC const &plc = plcIt++->second;
1502         if (plc.m_type != PLC::Paragraph)
1503           continue;
1504         if (plc.m_id>=0)
1505           m_stylesManager->getParagraph(MsWrdTextStyles::TextZone, plc.m_id, tablePara);
1506       }
1507       paragraph=tablePara;
1508       --i;
1509       continue;
1510     }
1511     if (inTable && line.m_type==MsWrdTextInternal::Line::L_Line)
1512       line.m_type=MsWrdTextInternal::Line::L_Cell;
1513 
1514     // store the result
1515     m_state->m_paragraphMap.insert
1516     (std::map<long, MsWrdStruct::Paragraph>::value_type(cPos,finalPara));
1517     lineType=line.m_type;
1518   }
1519 }
1520 
prepareFontProperties()1521 void MsWrdText::prepareFontProperties()
1522 {
1523   int const vers = version();
1524   long cPos = 0, cEnd = m_state->getTotalTextSize();
1525   if (cEnd <= 0) return;
1526 
1527   auto &map = m_state->m_plcMap;
1528   auto textposSize = int(m_state->m_textposList.size());
1529   MsWrdStruct::Font font, modifier, paraFont, styleFont;
1530   int actStyle=-1;
1531   while (cPos < cEnd) {
1532     bool fontChanged=false;
1533     if (m_state->m_paragraphMap.find(cPos)!=m_state->m_paragraphMap.end()) {
1534       auto const &para= m_state->m_paragraphMap.find(cPos)->second;
1535       para.getFont(paraFont);
1536       if (para.m_styleId.isSet() && actStyle!=*para.m_styleId) {
1537         actStyle=*para.m_styleId;
1538         styleFont=MsWrdStruct::Font();
1539         m_stylesManager->getFont(MsWrdTextStyles::StyleZone, *para.m_styleId, styleFont);
1540       }
1541       fontChanged=true; // force a font change (even if no needed)
1542     }
1543 
1544     long cNextPos = cEnd;
1545     auto plcIt = map.lower_bound(cPos);
1546     int textPId=-2;
1547     while (plcIt != map.end()) {
1548       if (plcIt->first != cPos) {
1549         cNextPos=plcIt->first;
1550         break;
1551       }
1552       PLC const &plc = plcIt++->second;
1553       int pId = plc.m_id;
1554       switch (plc.m_type) {
1555       case PLC::TextPosition: {
1556         if (pId < 0 || pId > textposSize) {
1557           MWAW_DEBUG_MSG(("MsWrdText::prepareFontProperties: oops can not find textstruct!!!!\n"));
1558           break;
1559         }
1560         auto const &textEntry=m_state->m_textposList[size_t(pId)];
1561         textPId=textEntry.getParagraphId();
1562         break;
1563       }
1564       case PLC::Font:
1565         fontChanged=true;
1566         modifier=font=MsWrdStruct::Font();
1567         if (pId >= 0)
1568           m_stylesManager->getFont(MsWrdTextStyles::TextZone, pId, font);
1569         break;
1570       case PLC::Field:
1571       case PLC::Footnote:
1572       case PLC::FootnoteDef:
1573       case PLC::HeaderFooter:
1574       case PLC::Object:
1575       case PLC::Page:
1576       case PLC::Paragraph:
1577       case PLC::ParagraphInfo:
1578       case PLC::Section:
1579 #if !defined(__clang__)
1580       default:
1581 #endif
1582         break;
1583       }
1584     }
1585     if (textPId>=0) {
1586       MsWrdStruct::Paragraph para(vers);
1587       m_stylesManager->getParagraph(MsWrdTextStyles::TextStructZone, textPId, para);
1588       modifier=MsWrdStruct::Font();
1589       para.getFont(modifier);
1590       fontChanged=true;
1591     }
1592     else if (textPId==-1) {
1593       modifier=MsWrdStruct::Font();
1594       fontChanged=true;
1595     }
1596     if (fontChanged) {
1597       MsWrdStruct::Font final(paraFont); // or stylefont
1598       final.insert(font, &styleFont);
1599       final.insert(modifier, &styleFont);
1600       m_state->m_fontMap[cPos] = final;
1601     }
1602     cPos = cNextPos;
1603   }
1604 }
1605 
prepareTableLimits()1606 void MsWrdText::prepareTableLimits()
1607 {
1608   int const vers=version();
1609   size_t numLines=m_state->m_lineList.size();
1610   // first find the table delimiters
1611   std::map<long,size_t> cposToLineMap;
1612   for (size_t l=0; l < numLines; ++l) {
1613     MsWrdTextInternal::Line const &line = m_state->m_lineList[l];
1614     if (line.m_type != MsWrdTextInternal::Line::L_LastLineCell)
1615       continue;
1616     cposToLineMap[line.m_cPos[1]-1]=l;
1617   }
1618 
1619   size_t numTextpos=m_state->m_textposList.size();
1620   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1621   auto tPosIt=cposToLineMap.begin();
1622   while (tPosIt!=cposToLineMap.end()) {
1623     size_t lId=tPosIt->second;
1624     if (lId>=numLines) {
1625       MWAW_DEBUG_MSG(("MsWrdText::prepareTableLimits: lId is bad\n"));
1626       ++tPosIt;
1627       continue;
1628     }
1629     auto line = m_state->m_lineList[lId];
1630     std::vector<long> listDelimiterCells;
1631     bool ok=false;
1632     auto actTPosIt=tPosIt;
1633     while (tPosIt!=cposToLineMap.end()) {
1634       long cPos=tPosIt->first;
1635       lId=tPosIt++->second;
1636       listDelimiterCells.push_back(cPos);
1637       if (lId>=numLines) {
1638         MWAW_DEBUG_MSG(("MsWrdText::prepareTableLimits: lId is bad(II)\n"));
1639         break;
1640       }
1641       line=m_state->m_lineList[lId];
1642       MsWrdStruct::Paragraph para(vers);
1643       // try to retrieve the paragraph attributes
1644       auto plcIt=m_state->m_plcMap.lower_bound(cPos);
1645       while (plcIt != m_state->m_plcMap.end() && plcIt->first==cPos) {
1646         auto const &plc = plcIt++->second;
1647         if (plc.m_type != PLC::Paragraph)
1648           continue;
1649         if (plc.m_id>=0)
1650           m_stylesManager->getParagraph(MsWrdTextStyles::TextZone, plc.m_id, para);
1651         if (para.m_styleId.isSet()) {
1652           MsWrdStruct::Paragraph style(vers);
1653           m_stylesManager->getParagraph(MsWrdTextStyles::StyleZone,*para.m_styleId, style);
1654           style.insert(para);
1655           para=style;
1656         }
1657       }
1658       auto pIt=m_state->m_paragraphLimitMap.find(line.m_cPos[0]);
1659       if (pIt!=m_state->m_paragraphLimitMap.end() && pIt->second>0 && pIt->second<static_cast<int>(numTextpos)) {
1660         auto const &textEntry=m_state->m_textposList[size_t(pIt->second)];
1661         int id=textEntry.getParagraphId();
1662         if (id>=0) {
1663           MsWrdStruct::Paragraph modifier(vers);
1664           m_stylesManager->getParagraph(MsWrdTextStyles::TextStructZone, id, modifier);
1665           para.insert(modifier);
1666         }
1667       }
1668       if (!para.m_tableDef.get() || !para.m_table.isSet() || !para.m_table->m_columns.isSet())
1669         continue;
1670       m_state->m_lineList[lId].m_type=MsWrdTextInternal::Line::L_LastRowCell;
1671 
1672       // ok, we have find the end of the table
1673       auto const &table = para.m_table.get();
1674       size_t numCols=table.m_columns->size();
1675       if (!numCols || listDelimiterCells.size()!=numCols) {
1676         MWAW_DEBUG_MSG(("MsWrdText::prepareTableLimits: can not find the number of row for position %ld(%d,%d)\n", line.m_cPos[0], int(listDelimiterCells.size()), static_cast<int>(numCols)));
1677         break;
1678       }
1679 
1680       std::shared_ptr<MsWrdTextInternal::Table> finalTable(new MsWrdTextInternal::Table);
1681       finalTable->m_delimiterPos = listDelimiterCells;
1682       finalTable->m_cells = table.m_cells;
1683       if (table.m_height.isSet())
1684         finalTable->m_height=*table.m_height;
1685       std::vector<float> width(numCols-1);
1686       for (size_t c = 0; c < numCols-1; c++)
1687         width[c]=table.m_columns.get()[c+1]-table.m_columns.get()[c];
1688       finalTable->setColsSize(width);
1689       for (auto id : listDelimiterCells)
1690         m_state->m_tableMap[id]=finalTable;
1691       listDelimiterCells.clear();
1692       ok=true;
1693       break;
1694     }
1695     if (ok)
1696       continue;
1697 
1698     ascFile.addPos(m_state->getFilePos(listDelimiterCells[0]));
1699     ascFile.addNote("###table");
1700     m_state->m_tableMap[listDelimiterCells[0]]=std::shared_ptr<MsWrdTextInternal::Table>();
1701     tPosIt=++actTPosIt;
1702     MWAW_DEBUG_MSG(("MsWrdText::prepareTableLimits: problem finding some table limits\n"));
1703   }
1704 }
1705 
updateTableBeginnningAt(long cPos,long & nextCPos)1706 bool MsWrdText::updateTableBeginnningAt(long cPos, long &nextCPos)
1707 {
1708   auto tableIt=m_state->m_tableMap.lower_bound(cPos);
1709   if (tableIt==m_state->m_tableMap.end() || !tableIt->second ||
1710       tableIt->second->m_delimiterPos.empty() ||
1711       tableIt->second->m_delimiterPos[0] < cPos) {
1712     MWAW_DEBUG_MSG(("MsWrdText::updateTableBeginnningAt: can find no table at position %ld\n", cPos));
1713     return false;
1714   }
1715   auto table=tableIt->second;
1716   size_t numDelim=table->m_delimiterPos.size();
1717   table->m_cellPos.resize(numDelim);
1718   table->m_cellPos[0]=cPos;
1719   for (size_t c=0; c+1<numDelim; ++c)
1720     table->m_cellPos[c+1]=table->m_delimiterPos[c]+1;
1721   for (size_t c=0; c+1<table->m_cellPos.size(); ++c)
1722     m_state->m_tableCellPosSet.insert(table->m_cellPos[c]);
1723   if (table->m_delimiterPos[0]!=cPos)
1724     m_state->m_tableMap[cPos]=table;
1725   nextCPos=table->m_delimiterPos[numDelim-1]+1;
1726   return true;
1727 }
1728 
prepareData()1729 void MsWrdText::prepareData()
1730 {
1731 #if defined(DEBUG_WITH_FILES) && DEBUG_PARAGRAPH
1732   int const vers = version();
1733 #endif
1734   long cPos = 0, cEnd = m_state->getTotalTextSize();
1735   if (cEnd <= 0) return;
1736   prepareLines();
1737   convertFilePLCPos();
1738   prepareTableLimits();
1739 
1740   prepareParagraphProperties();
1741   prepareFontProperties();
1742 
1743   MsWrdStruct::Font defaultFont;
1744   long pos = m_state->getFilePos(cPos);
1745   auto textposSize = int(m_state->m_textposList.size());
1746 
1747   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1748   libmwaw::DebugStream f, f2;
1749   PLC::ltstr compare;
1750 
1751   auto &map = m_state->m_plcMap;
1752   while (cPos < cEnd) {
1753     f.str("");
1754     // first find the list of the plc
1755     long cNextPos = cEnd;
1756 
1757     std::set<PLC, PLC::ltstr> sortedPLC(compare);
1758     auto plcIt = map.lower_bound(cPos);
1759     while (plcIt != map.end()) {
1760       if (plcIt->first != cPos) {
1761         cNextPos=plcIt->first;
1762         break;
1763       }
1764       PLC const &plc = plcIt++->second;
1765       if (plc.m_type!=PLC::Paragraph&&plc.m_type!=PLC::Font)
1766         sortedPLC.insert(plc);
1767 #if DEBUG_PLC
1768       if (plc.m_type != PLC::TextPosition)
1769         f << "[" << plc << "],";
1770 #endif
1771 
1772       int pId = plc.m_id;
1773       switch (plc.m_type) {
1774       case PLC::TextPosition:
1775         if (pId < 0 || pId > textposSize) {
1776           MWAW_DEBUG_MSG(("MsWrdText::prepareData: oops can not find textstruct!!!!\n"));
1777           f << "[###tP" << pId << "]";
1778         }
1779         else {
1780           auto const &textEntry=m_state->m_textposList[size_t(pId)];
1781           pos = textEntry.begin();
1782 #if defined(DEBUG_WITH_FILES) && DEBUG_PARAGRAPH
1783           int paraId=textEntry.getParagraphId();
1784           if (paraId < 0)
1785             f << "tP_,";
1786           else {
1787             MsWrdStruct::Paragraph para(vers);
1788             m_stylesManager->getParagraph(MsWrdTextStyles::TextStructZone, paraId, para);
1789             f << "tP" << paraId << "=[";
1790             para.print(f, m_parserState->m_fontConverter);
1791             f << "],";
1792           }
1793 #endif
1794         }
1795         break;
1796       case PLC::Section:
1797 #if defined(DEBUG_WITH_FILES) && DEBUG_SECTION
1798         if (pId >= 0) {
1799           MsWrdStruct::Section sec;
1800           m_stylesManager->getSection(MsWrdTextStyles::TextZone, pId, sec);
1801           f << "S" << pId << "=[" << sec << "],";
1802         }
1803         else
1804           f << "S_,";
1805 #endif
1806         break;
1807       case PLC::ParagraphInfo:
1808 #if defined(DEBUG_WITH_FILES) && DEBUG_PARAGRAPHINFO
1809         if (pId >= 0 && pId < int(m_state->m_paraInfoList.size())) {
1810           MsWrdStruct::ParagraphInfo info=m_state->m_paraInfoList[size_t(pId)];
1811           f << "Pi" << pId  << "=[" << info << "],";
1812         }
1813         else
1814           f << "Pi_,";
1815 #endif
1816         break;
1817       case PLC::Page:
1818 #if defined(DEBUG_WITH_FILES) && DEBUG_PAGE
1819         if (pId  >= 0 && pId < int(m_state->m_pageList.size()))
1820           f << "Pg" << pId << "=[" << m_state->m_pageList[size_t(pId)] << "],";
1821         else
1822           f << "Pg_,";
1823 #endif
1824         break;
1825       case PLC::Paragraph:
1826 #if defined(DEBUG_WITH_FILES) && DEBUG_PARAGRAPH
1827         if (pId >= 0) {
1828           MsWrdStruct::Paragraph para(vers);
1829           m_stylesManager->getParagraph(MsWrdTextStyles::TextZone, pId, para);
1830           f << "P" << pId << "=[";
1831           para.print(f, m_parserState->m_fontConverter);
1832           f << "],";
1833         }
1834         else f << "P_,";
1835 #endif
1836         break;
1837       case PLC::Font: {
1838 #if defined(DEBUG_WITH_FILES) && DEBUG_FONT
1839         if (pId >= 0) {
1840           MsWrdStruct::Font font;
1841           m_stylesManager->getFont(MsWrdTextStyles::TextZone, pId, font);
1842           f << "F" << pId << "=[" << font.m_font->getDebugString(m_parserState->m_fontConverter) << font << "],";
1843         }
1844         else
1845           f << "F_,";
1846 #endif
1847         break;
1848       }
1849       case PLC::Field:
1850       case PLC::Footnote:
1851       case PLC::FootnoteDef:
1852       case PLC::HeaderFooter:
1853       case PLC::Object:
1854 #if !defined(__clang__)
1855       default:
1856 #endif
1857         break;
1858       }
1859     }
1860     MsWrdTextInternal::Property prop;
1861     prop.m_fPos = pos;
1862     prop.m_plcList=std::vector<PLC>(sortedPLC.begin(), sortedPLC.end());
1863 
1864     if (f.str().length()) {
1865       f2.str("");
1866       f2 << "TextContent["<<cPos<<"]:" << f.str();
1867       ascFile.addPos(pos);
1868       ascFile.addNote(f2.str().c_str());
1869 #if defined(DEBUG_WITH_FILES)
1870       m_state->debugFile2() << f2.str() << "\n";
1871 #endif
1872       prop.m_debugPrint = true;
1873     }
1874     m_state->m_propertyMap[cPos] = prop;
1875     pos+=(cNextPos-cPos);
1876     cPos = cNextPos;
1877   }
1878 }
1879 
1880 ////////////////////////////////////////////////////////////
1881 // try to read a text entry
1882 ////////////////////////////////////////////////////////////
sendText(MWAWEntry const & textEntry,bool mainZone,bool tableCell)1883 bool MsWrdText::sendText(MWAWEntry const &textEntry, bool mainZone, bool tableCell)
1884 {
1885   if (!textEntry.valid()) return false;
1886   MWAWTextListenerPtr listener=m_parserState->m_textListener;
1887   if (!listener) {
1888     MWAW_DEBUG_MSG(("MsWrdText::sendText: can not find a listener!"));
1889     return true;
1890   }
1891   long cPos = textEntry.begin();
1892   long debPos = m_state->getFilePos(cPos), pos=debPos;
1893   MWAWInputStreamPtr &input= m_parserState->m_input;
1894   input->seek(pos, librevenge::RVNG_SEEK_SET);
1895   long cEnd = textEntry.end();
1896 
1897   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1898   libmwaw::DebugStream f;
1899   f << "TextContent[" << cPos << "]:";
1900   long pictPos = -1;
1901   while (!input->isEnd() && cPos < cEnd) {
1902     bool newTable = false;
1903     long cEndPos = cEnd;
1904 
1905     MsWrdTextInternal::Property *prop = nullptr;
1906     auto propIt = m_state->m_propertyMap.upper_bound(cPos);
1907     if (propIt != m_state->m_propertyMap.end() && propIt->first < cEndPos && propIt->first > cPos)
1908       cEndPos = propIt->first;
1909 
1910     size_t numPLC = 0;
1911     propIt = m_state->m_propertyMap.find(cPos);
1912     if (propIt != m_state->m_propertyMap.end()) {
1913       prop = &propIt->second;
1914       pos = prop->m_fPos;
1915       newTable = !tableCell && m_state->getTable(cPos);
1916       input->seek(pos, librevenge::RVNG_SEEK_SET);
1917       numPLC = prop->m_plcList.size();
1918     }
1919     int newSectionId=-1;
1920     for (size_t i = 0; i < numPLC; i++) {
1921       PLC const &plc = prop->m_plcList[i];
1922       if (newTable && int(plc.m_type) >= int(PLC::ParagraphInfo)) continue;
1923       switch (plc.m_type) {
1924       case PLC::Page: {
1925         if (tableCell) break;
1926         if (mainZone) m_mainParser->newPage(++m_state->m_actPage);
1927         break;
1928       }
1929       case PLC::Section:
1930         if (tableCell) break;
1931         newSectionId=plc.m_id;
1932         break;
1933       case PLC::Field: // some fields ?
1934 #ifdef DEBUG
1935         m_mainParser->sendFieldComment(plc.m_id);
1936 #endif
1937         break;
1938       case PLC::Footnote:
1939         m_mainParser->sendFootnote(plc.m_id);
1940         break;
1941       case PLC::TextPosition:
1942       case PLC::Font:
1943       case PLC::FootnoteDef:
1944       case PLC::HeaderFooter:
1945       case PLC::Object:
1946       case PLC::Paragraph:
1947       case PLC::ParagraphInfo:
1948 #if !defined(__clang__)
1949       default:
1950 #endif
1951         break;
1952       }
1953     }
1954     if (newSectionId >= 0)
1955       sendSection(newSectionId);
1956     if ((prop && prop->m_debugPrint)  || newTable) {
1957       ascFile.addPos(debPos);
1958       ascFile.addNote(f.str().c_str());
1959 #if defined(DEBUG_WITH_FILES)
1960       m_state->debugFile2() << f.str() << "\n";
1961 #endif
1962       f.str("");
1963       f << "TextContent["<<cPos<<"]:";
1964       debPos = pos;
1965     }
1966     // time to send the table
1967     std::shared_ptr<MsWrdTextInternal::Table> table;
1968     if (newTable && (table=m_state->getTable(cPos))) {
1969       long actCPos = cPos;
1970       bool ok = sendTable(*table);
1971       cPos = ok ? table->m_cellPos.back()+1 : actCPos;
1972       pos=debPos=m_state->getFilePos(cPos);
1973       input->seek(pos, librevenge::RVNG_SEEK_SET);
1974       f.str("");
1975       f << "TextContent["<<cPos<<"]:";
1976       if (ok)
1977         continue;
1978     }
1979     if (m_state->m_paragraphMap.find(cPos) != m_state->m_paragraphMap.end())
1980       listener->setParagraph(m_state->m_paragraphMap.find(cPos)->second);
1981     if (m_state->m_fontMap.find(cPos) != m_state->m_fontMap.end()) {
1982       auto const &font = m_state->m_fontMap.find(cPos)->second;
1983       pictPos = font.m_picturePos.get();
1984       m_stylesManager->setProperty(font);
1985     }
1986     for (long p = cPos; p < cEndPos; p++) {
1987       auto c = static_cast<int>(input->readULong(1));
1988       cPos++;
1989       pos++;
1990       switch (c) {
1991       case 0x1:
1992         if (pictPos <= 0) {
1993           MWAW_DEBUG_MSG(("MsWrdText::sendText: can not find picture\n"));
1994           f << "###";
1995           break;
1996         }
1997         m_mainParser->sendPicture(pictPos, int(cPos), MWAWPosition::Char);
1998         break;
1999       case 0x7: // FIXME: cell end ?
2000         listener->insertEOL();
2001         break;
2002       case 0xc: // end section (ok)
2003         break;
2004       case 0x2:
2005         listener->insertField(MWAWField(MWAWField::PageNumber));
2006         break;
2007       case 0x6:
2008         listener->insertChar('\\');
2009         break;
2010       case 0x1e: // unbreaking - ?
2011         listener->insertChar('-');
2012         break;
2013       case 0x1f: // hyphen
2014         break;
2015       case 0x13: // month
2016       case 0x1a: // month abreviated
2017       case 0x1b: { // checkme month long
2018         MWAWField field(MWAWField::Date);
2019         field.m_DTFormat = "%m";
2020         listener->insertField(field);
2021         break;
2022       }
2023       case 0x10: // day
2024       case 0x16: // checkme: day abbreviated
2025       case 0x17: { // checkme: day long
2026         MWAWField field(MWAWField::Date);
2027         field.m_DTFormat = "%d";
2028         listener->insertField(field);
2029         break;
2030       }
2031       case 0x15: { // year
2032         MWAWField field(MWAWField::Date);
2033         field.m_DTFormat = "%y";
2034         listener->insertField(field);
2035         break;
2036       }
2037       case 0x1d: {
2038         MWAWField field(MWAWField::Date);
2039         field.m_DTFormat = "%b %d, %Y";
2040         listener->insertField(field);
2041         break;
2042       }
2043       case 0x18: // checkme hour
2044       case 0x19: { // checkme hour
2045         MWAWField field(MWAWField::Time);
2046         field.m_DTFormat = "%H";
2047         listener->insertField(field);
2048         break;
2049       }
2050       case 0x3: // v3
2051         listener->insertField(MWAWField(MWAWField::Date));
2052         break;
2053       case 0x4:
2054         listener->insertField(MWAWField(MWAWField::Time));
2055         break;
2056       case 0x5: // footnote mark (ok)
2057         break;
2058       case 0x9:
2059         listener->insertTab();
2060         break;
2061       case 0xb: // line break (simple but no a paragraph break ~soft)
2062         if (cPos!=cEnd)
2063           listener->insertEOL(true);
2064         break;
2065       case 0xd: // line break hard
2066         if (cPos!=cEnd)
2067           listener->insertEOL();
2068         break;
2069       case 0x11: // command key in help
2070         listener->insertUnicode(0x2318);
2071         break;
2072       case 0x14: // apple logo ( note only in private zone)
2073         listener->insertUnicode(0xf8ff);
2074         break;
2075       default:
2076         p+=listener->insertCharacter(static_cast<unsigned char>(c), input, input->tell()+(cEndPos-1-p));
2077         break;
2078       }
2079       if (c)
2080         f << char(c);
2081       else
2082         f << "###";
2083     }
2084   }
2085 
2086   ascFile.addPos(debPos);
2087   ascFile.addNote(f.str().c_str());
2088   ascFile.addPos(input->tell());
2089   ascFile.addNote("_");
2090   return true;
2091 }
2092 
sendSection(int secId)2093 bool MsWrdText::sendSection(int secId)
2094 {
2095   int textStructId=-1;
2096   if (!m_state->m_textposList.empty() &&
2097       secId>=0 && secId+1<static_cast<int>(m_state->m_sectionLimitList.size())) {
2098     int tId=m_state->getTextStructId
2099             (m_state->m_sectionLimitList[size_t(secId)+1]-1);
2100     if (tId>=0 && tId<static_cast<int>(m_state->m_textposList.size()))
2101       textStructId=m_state->m_textposList[size_t(tId)].getParagraphId();
2102   }
2103   return m_stylesManager->sendSection(secId, textStructId);
2104 }
2105 
2106 ////////////////////////////////////////////////////////////
2107 // try to read a table
2108 ////////////////////////////////////////////////////////////
sendTable(MsWrdTextInternal::Table const & table)2109 bool MsWrdText::sendTable(MsWrdTextInternal::Table const &table)
2110 {
2111   MWAWTextListenerPtr listener=m_parserState->m_textListener;
2112   if (!listener) {
2113     MWAW_DEBUG_MSG(("MsWrdText::sendTable: can not find a listener!\n"));
2114     return true;
2115   }
2116   size_t nCells = table.m_cellPos.size();
2117   if (nCells < 1) {
2118     MWAW_DEBUG_MSG(("MsWrdText::sendTable: numcols pos is bad\n"));
2119     return true;
2120   }
2121 
2122   size_t numCols = table.getColsSize().size()+1;
2123   size_t numRows = nCells/numCols;
2124 
2125   float height = table.m_height;
2126   if (height > 0) height*=-1;
2127 
2128   listener->openTable(table);
2129   size_t numCells = table.m_cells.size();
2130   for (size_t r = 0; r < numRows; r++) {
2131     listener->openTableRow(height, librevenge::RVNG_INCH);
2132     for (size_t c = 0; c < numCols-1; c++) {
2133       MWAWCell cell;
2134       size_t cellPos = r*numCols+c;
2135       if (cellPos < numCells && table.m_cells[cellPos].isSet()) {
2136         int const wh[] = { libmwaw::TopBit, libmwaw::LeftBit,
2137                            libmwaw::BottomBit, libmwaw::RightBit
2138                          };
2139         auto const &tCell = table.m_cells[cellPos].get();
2140         for (size_t i = 0; i < 4 && i < tCell.m_borders.size(); i++) {
2141           if (!tCell.m_borders[i].isSet() ||
2142               tCell.m_borders[i]->m_style==MWAWBorder::None) continue;
2143           cell.setBorders(wh[i], tCell.m_borders[i].get());
2144         }
2145         if (tCell.m_backColor.isSet()) {
2146           auto col = static_cast<unsigned char>(tCell.m_backColor.get()*255.f);
2147           cell.setBackgroundColor(MWAWColor(col,col,col));
2148         }
2149         else if (!table.m_backgroundColor.isWhite())
2150           cell.setBackgroundColor(table.m_backgroundColor);
2151       }
2152       cell.setPosition(MWAWVec2i(static_cast<int>(c),static_cast<int>(r)));
2153 
2154       listener->openTableCell(cell);
2155 
2156       MsWrdEntry textData;
2157       textData.setBegin(table.m_cellPos[cellPos]);
2158       long cEndPos = table.m_cellPos[cellPos+1]-1;
2159       textData.setEnd(cEndPos);
2160       if (textData.length()<=0)
2161         listener->insertChar(' ');
2162       else
2163         sendText(textData, false, true);
2164 #if defined(DEBUG_WITH_FILES)
2165       m_state->debugFile2() << "TextContent["<<cEndPos<<"]:" << char(7) << "\n";
2166 #endif
2167       listener->closeTableCell();
2168     }
2169     listener->closeTableRow();
2170   }
2171   listener->closeTable();
2172   return true;
2173 }
2174 
sendMainText()2175 bool MsWrdText::sendMainText()
2176 {
2177   MWAWEntry entry;
2178   entry.setBegin(0);
2179   entry.setLength(m_state->m_textLength[0]);
2180   sendText(entry, true);
2181   return true;
2182 }
2183 
sendFootnote(int id)2184 bool MsWrdText::sendFootnote(int id)
2185 {
2186   MWAWTextListenerPtr listener=m_parserState->m_textListener;
2187   if (!listener) return true;
2188   if (id < 0 || id >= int(m_state->m_footnoteList.size())) {
2189     MWAW_DEBUG_MSG(("MsWrdText::sendFootnote: can not find footnote %d\n", id));
2190     listener->insertChar(' ');
2191     return false;
2192   }
2193   auto const &footnote = m_state->m_footnoteList[size_t(id)];
2194   if (footnote.m_pos.isParsed())
2195     listener->insertChar(' ');
2196   else
2197     sendText(footnote.m_pos, false);
2198   footnote.m_pos.setParsed();
2199   return true;
2200 }
2201 
sendFieldComment(int id)2202 bool MsWrdText::sendFieldComment(int id)
2203 {
2204   MWAWTextListenerPtr listener=m_parserState->m_textListener;
2205   if (!listener) return true;
2206   if (id < 0 || id >= int(m_state->m_fieldList.size())) {
2207     MWAW_DEBUG_MSG(("MsWrdText::sendFieldComment: can not find field %d\n", id));
2208     listener->insertChar(' ');
2209     return false;
2210   }
2211   MsWrdStruct::Font defFont;
2212   defFont.m_font = m_stylesManager->getDefaultFont();
2213   m_stylesManager->setProperty(defFont);
2214   m_stylesManager->sendDefaultParagraph();
2215   std::string const &text = m_state->m_fieldList[size_t(id)].m_text;
2216   if (!text.length()) listener->insertChar(' ');
2217   for (char c : text)
2218     listener->insertCharacter(static_cast<unsigned char>(c));
2219   return true;
2220 }
2221 
flushExtra()2222 void MsWrdText::flushExtra()
2223 {
2224 #ifdef DEBUG
2225   if (m_state->m_textLength[1]) {
2226     for (auto footnote : m_state->m_footnoteList) {
2227       if (footnote.m_pos.isParsed()) continue;
2228       sendText(footnote.m_pos, false);
2229       footnote.m_pos.setParsed();
2230     }
2231   }
2232 #endif
2233 }
2234 
2235 
2236 // vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab:
2237