1 /* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */
2
3 /* libmwaw
4 * Version: MPL 2.0 / LGPLv2+
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 2.0 (the "License"); you may not use this file except in compliance with
8 * the License or as specified alternatively below. You may obtain a copy of
9 * the License at http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Major Contributor(s):
17 * Copyright (C) 2002 William Lachance (wrlach@gmail.com)
18 * Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
19 * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
20 * Copyright (C) 2006, 2007 Andrew Ziem
21 * Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
22 *
23 *
24 * All Rights Reserved.
25 *
26 * For minor contributions see the git repository.
27 *
28 * Alternatively, the contents of this file may be used under the terms of
29 * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
30 * in which case the provisions of the LGPLv2+ are applicable
31 * instead of those above.
32 */
33
34 #include <algorithm>
35 #include <iomanip>
36 #include <iostream>
37 #include <limits>
38 #include <map>
39 #include <set>
40 #include <sstream>
41
42 #if defined(DEBUG_WITH_FILES)
43 # include <fstream>
44 #endif
45
46 #include <librevenge/librevenge.h>
47
48 #include "MWAWCell.hxx"
49 #include "MWAWTextListener.hxx"
50 #include "MWAWFont.hxx"
51 #include "MWAWFontConverter.hxx"
52 #include "MWAWParagraph.hxx"
53 #include "MWAWPosition.hxx"
54 #include "MWAWTable.hxx"
55
56 #include "MsWrdParser.hxx"
57 #include "MsWrdStruct.hxx"
58
59 #include "MsWrdText.hxx"
60
61 #define DEBUG_FONT 1
62 #define DEBUG_PLC 1
63 #define DEBUG_PAGE 1
64 #define DEBUG_PARAGRAPH 1
65 #define DEBUG_SECTION 1
66 #define DEBUG_PARAGRAPHINFO 1
67
68 /** Internal: the structures of a MsWrdText */
69 namespace MsWrdTextInternal
70 {
71 ////////////////////////////////////////
72 //! Internal: the entry of MsWrdParser
73 struct TextStruct final : public MWAWEntry {
74 //! construtor
TextStructMsWrdTextInternal::TextStruct75 TextStruct()
76 : MWAWEntry()
77 , m_pos(-1)
78 , m_styleId(0)
79 , m_flags(0)
80 , m_paragraphId(-1)
81 , m_complex(false)
82 {
83 }
84 TextStruct(TextStruct const &)=default;
85 //! destructor
86 ~TextStruct() final;
87 //! operator<<
operator <<(std::ostream & o,TextStruct const & entry)88 friend std::ostream &operator<<(std::ostream &o, TextStruct const &entry)
89 {
90 if (entry.m_pos>=0) o << "textPos=" << entry.m_pos << ",";
91 o << "styleId?=" << entry.m_styleId << ",";
92 if (entry.m_complex) o << "complex,";
93 if (entry.m_paragraphId >= 0) o << "tP" << entry.m_paragraphId << ",";
94 // checkme
95 if (entry.m_flags&1)
96 o << "noEndPara,";
97 if (entry.m_flags&2)
98 o << "paphNil,";
99 if (entry.m_flags&4)
100 o << "dirty,";
101 switch (entry.m_flags&0xF8) { // fNoParaLast
102 case 0x80: // sameline
103 break;
104 case 0:
105 o << "newline,";
106 break;
107 default:
108 o << "#type=" << std::hex << entry.m_flags << std::dec << ",";
109 break;
110 }
111 if (entry.valid())
112 o << std::hex << "fPos=" << entry.begin() << ":" << entry.end() << std::dec << ",";
113 if (entry.extra().length())
114 o << entry.extra() << ",";
115 return o;
116 }
117
118 //! returns the paragraph id ( or -1, if unknown )
getParagraphIdMsWrdTextInternal::TextStruct119 int getParagraphId() const
120 {
121 return m_paragraphId;
122 }
123 //! a struct used to compare file textpos
124 struct CompareFilePos {
125 //! comparaison function
operator ()MsWrdTextInternal::TextStruct::CompareFilePos126 bool operator()(TextStruct const *t1, TextStruct const *t2) const
127 {
128 long diff = t1->begin()-t2->begin();
129 return (diff < 0);
130 }
131 };
132 //! the text position
133 int m_pos;
134 //! some identificator(maybe style)
135 int m_styleId;
136 //! some flags
137 int m_flags;
138 //! the paragraph id
139 int m_paragraphId;
140 //! a flag to know if we read a complex or a simple PRM
141 bool m_complex;
142 };
143
~TextStruct()144 TextStruct::~TextStruct()
145 {
146 }
147
148 ////////////////////////////////////////
149 //! Internal: the page
150 struct Page {
151 //! constructor
PageMsWrdTextInternal::Page152 Page()
153 : m_id(-1)
154 , m_type(0)
155 , m_page(-1)
156 , m_paragraphId(-2)
157 , m_error("")
158 {
159 for (auto &val : m_values) val = 0;
160 }
161 //! operator<<
operator <<(std::ostream & o,Page const & page)162 friend std::ostream &operator<<(std::ostream &o, Page const &page)
163 {
164 if (page.m_id >= 0) o << "Pg" << page.m_id << ":";
165 else o << "Pg_:";
166 if (page.m_paragraphId >= 0) o << "P" << page.m_paragraphId << ",";
167 if (page.m_page != page.m_id+1) o << "page=" << page.m_page << ",";
168 if (page.m_type&0x10)
169 o << "right,";
170 // find also page.m_type&0x40 : pageDirty?
171 if (page.m_type&0xEF)
172 o << "type=" << std::hex << (page.m_type&0xEF) << std::dec << ",";
173 for (int i = 0; i < 3; i++) {
174 if (page.m_values[i])
175 o << "f" << i << "=" << page.m_values[i] << ",";
176 }
177 if (page.m_values[3])
178 o << "f3=" << std::hex << page.m_values[3] << std::dec << ",";
179 if (page.m_error.length()) o << page.m_error << ",";
180 return o;
181 }
182 //! the identificator
183 int m_id;
184 //! the type
185 int m_type;
186 //! the page number
187 int m_page;
188 //! the paragraph id
189 int m_paragraphId;
190 //! some values ( 0, -1, 0, small number )
191 int m_values[4];
192 /** the errors */
193 std::string m_error;
194 };
195
196 ////////////////////////////////////////
197 //! Internal: the footnote
198 struct Footnote {
199 //! constructor
FootnoteMsWrdTextInternal::Footnote200 Footnote()
201 : m_pos()
202 , m_id(-1)
203 , m_value(0)
204 , m_error("")
205 {
206 }
207 //! operator<<
operator <<(std::ostream & o,Footnote const & note)208 friend std::ostream &operator<<(std::ostream &o, Footnote const ¬e)
209 {
210 if (note.m_id >= 0) o << "Fn" << note.m_id << ":";
211 else o << "Fn_:";
212 if (note.m_pos.valid())
213 o << std::hex << note.m_pos.begin() << "-" << note.m_pos.end() << std::dec << ",";
214 if (note.m_value) o << "f0=" << note.m_value << ",";
215 if (note.m_error.length()) o << note.m_error << ",";
216 return o;
217 }
218 //! the footnote data
219 MWAWEntry m_pos;
220 //! the id
221 int m_id;
222 //! a value ( 1, 4)
223 int m_value;
224 /** the errors */
225 std::string m_error;
226 };
227
228 ////////////////////////////////////////
229 //! Internal: the field of MsWrdParser
230 struct Field {
231 //! constructor
FieldMsWrdTextInternal::Field232 Field()
233 : m_text("")
234 , m_id(-1)
235 , m_error("")
236 {
237 }
238 //! operator<<
operator <<(std::ostream & o,Field const & field)239 friend std::ostream &operator<<(std::ostream &o, Field const &field)
240 {
241 o << field.m_text;
242 if (field.m_id >= 0) o << "[" << field.m_id << "]";
243 if (field.m_error.length()) o << "," << field.m_error << ",";
244 return o;
245 }
246 //! the text
247 std::string m_text;
248 //! the id
249 int m_id;
250 /** the errors */
251 std::string m_error;
252 };
253
254 ////////////////////////////////////////
255 //! Internal: a list of plc
256 struct Property {
PropertyMsWrdTextInternal::Property257 Property()
258 : m_fPos(-1)
259 , m_plcList()
260 , m_debugPrint(false)
261 {
262 }
263 //! the character position in the file
264 long m_fPos;
265 //! the list of plc
266 std::vector<MsWrdText::PLC> m_plcList;
267 //! a flag to know if we have print data
268 bool m_debugPrint;
269 };
270
271 ////////////////////////////////////////
272 //! Internal and low level: a structure to store a line or a cell of a MsWrdText
273 struct Line {
274 //! an enum used to differentiate line and cell
275 enum Type { L_Line, L_Cell, L_LastLineCell, L_LastRowCell };
276 //! constructor
LineMsWrdTextInternal::Line277 Line()
278 : m_type(L_Line)
279 , m_cPos()
280 {
281 }
282 //! the line type
283 Type m_type;
284 //! the caracter position
285 MWAWVec2l m_cPos;
286 };
287
288 ////////////////////////////////////////
289 //! Internal and low level: a structure to store a table of a MsWrdText
290 struct Table final : public MWAWTable {
291 //! constructor
TableMsWrdTextInternal::Table292 Table()
293 : MWAWTable(MWAWTable::TableDimBit)
294 , m_cellPos()
295 , m_delimiterPos()
296 , m_height(0)
297 , m_backgroundColor(MWAWColor::white())
298 , m_cells()
299 {
300 }
301 //! destructor
302 ~Table() final;
303 //! the list of cPos corresponding to cells limits
304 std::vector<long> m_cellPos;
305 //! the list of the delimiter cPos (ie. end of each cell)
306 std::vector<long> m_delimiterPos;
307 //! the row height
308 float m_height;
309 //! the background color
310 MWAWColor m_backgroundColor;
311 //! the table cells
312 std::vector<MWAWVariable<MsWrdStruct::Table::Cell> > m_cells;
313 };
314
~Table()315 Table::~Table()
316 {
317 }
318 ////////////////////////////////////////
319 //! Internal: the state of a MsWrdParser
320 struct State {
321 //! constructor
StateMsWrdTextInternal::State322 State()
323 : m_version(-1)
324 , m_bot(0x100)
325 , m_headerFooterZones()
326 , m_textposList()
327 , m_plcMap()
328 , m_filePlcMap()
329 , m_lineList()
330 , m_paragraphLimitMap()
331 , m_sectionLimitList()
332 , m_fontMap()
333 , m_paragraphMap()
334 , m_propertyMap()
335 , m_tableCellPosSet()
336 , m_tableMap()
337 , m_paraInfoList()
338 , m_pageList()
339 , m_fieldList()
340 , m_footnoteList()
341 , m_actPage(0)
342 , m_numPages(-1)
343 #if defined(DEBUG_WITH_FILES)
344 , m_debugFile()
345 #endif
346 {
347 for (auto &tLength : m_textLength) tLength = 0;
348 }
349 //! returns the total text size
getTotalTextSizeMsWrdTextInternal::State350 long getTotalTextSize() const
351 {
352 long res=0;
353 for (auto tLength : m_textLength) res+=tLength;
354 return res;
355 }
356 //! returns the id of textpos corresponding to a cPos or -1
getTextStructIdMsWrdTextInternal::State357 int getTextStructId(long textPos) const
358 {
359 if (m_textposList.empty() || textPos < m_textposList[0].m_pos)
360 return -1;
361 int minVal = 0, maxVal = int(m_textposList.size())-1;
362 while (minVal != maxVal) {
363 int mid = (minVal+1+maxVal)/2;
364 if (m_textposList[size_t(mid)].m_pos == textPos)
365 return mid;
366 if (m_textposList[size_t(mid)].m_pos > textPos)
367 maxVal = mid-1;
368 else
369 minVal = mid;
370 }
371 return minVal;
372 }
373 //! returns the file position corresponding to a text entry
getFilePosMsWrdTextInternal::State374 long getFilePos(long textPos) const
375 {
376 int tId=getTextStructId(textPos);
377 if (tId==-1)
378 return m_bot+textPos;
379 return m_textposList[size_t(tId)].begin() + (textPos-m_textposList[size_t(tId)].m_pos);
380 }
381 //! try to return a table which begins at a character position
getTableMsWrdTextInternal::State382 std::shared_ptr<Table> getTable(long cPos) const
383 {
384 std::shared_ptr<Table> empty;
385 auto tableIt=m_tableMap.find(cPos);
386 if (tableIt==m_tableMap.end()||!tableIt->second) return empty;
387 auto table=tableIt->second;
388 if (table->m_cellPos.empty()||table->m_cellPos[0]!=cPos)
389 return empty;
390 return table;
391 }
392
393 #if defined(DEBUG_WITH_FILES)
394 // use cut -c13- main-2.data|sort -n to retrieve the data
395 //! internal and low level: defined a second debug file
debugFile2MsWrdTextInternal::State396 std::fstream &debugFile2()
397 {
398 static bool init=false;
399 if (!init) {
400 init=true;
401 m_debugFile.open("main-2.data", std::ios_base::out | std::ios_base::trunc);
402 }
403 return m_debugFile;
404 }
405 #endif
406
407 //! the file version
408 int m_version;
409
410 //! the default text begin
411 long m_bot;
412
413 //! the text length (main, footnote, header+footer)
414 long m_textLength[3];
415
416 //! the header/footer zones
417 std::vector<MWAWEntry> m_headerFooterZones;
418 //! the text positions
419 std::vector<TextStruct> m_textposList;
420
421 //! the text correspondance zone ( textpos, plc )
422 std::multimap<long, MsWrdText::PLC> m_plcMap;
423 //! the file correspondance zone ( filepos, plc )
424 std::multimap<long, MsWrdText::PLC> m_filePlcMap;
425
426 //! the list of lines
427 std::vector<Line> m_lineList;
428 //! the paragraph limit -> textposition (or -1)
429 std::map<long, int> m_paragraphLimitMap;
430 //! the section cPos limit
431 std::vector<long> m_sectionLimitList;
432 //! the final correspondance font zone ( textpos, font)
433 std::map<long, MsWrdStruct::Font> m_fontMap;
434
435 //! the final correspondance paragraph zone ( textpos, paragraph)
436 std::map<long, MsWrdStruct::Paragraph> m_paragraphMap;
437 //! the position where we have new data ( textpos -> [ we have done debug printing ])
438 std::map<long, Property> m_propertyMap;
439 //! a set of all begin cell position
440 std::set<long> m_tableCellPosSet;
441 //! the final correspondance table zone ( textpos, font)
442 std::map<long, std::shared_ptr<Table> > m_tableMap;
443 //! the list of paragraph info modifier
444 std::vector<MsWrdStruct::ParagraphInfo> m_paraInfoList;
445
446 //! the list of pages
447 std::vector<Page> m_pageList;
448
449 //! the list of fields
450 std::vector<Field> m_fieldList;
451
452 //! the list of footnotes
453 std::vector<Footnote> m_footnoteList;
454
455 int m_actPage/** the actual page*/, m_numPages /** the number of page of the final document */;
456 #if defined(DEBUG_WITH_FILES)
457 //! internal and low level: defined a second debug file
458 std::fstream m_debugFile;
459 #endif
460 };
461
462 }
463
464 ////////////////////////////////////////////////////////////
465 // constructor/destructor, ...
466 ////////////////////////////////////////////////////////////
MsWrdText(MsWrdParser & parser)467 MsWrdText::MsWrdText(MsWrdParser &parser)
468 : m_parserState(parser.getParserState())
469 , m_state(new MsWrdTextInternal::State)
470 , m_stylesManager()
471 , m_mainParser(&parser)
472 {
473 m_stylesManager.reset(new MsWrdTextStyles(*this));
474 }
475
~MsWrdText()476 MsWrdText::~MsWrdText()
477 { }
478
version() const479 int MsWrdText::version() const
480 {
481 if (m_state->m_version < 0)
482 m_state->m_version = m_parserState->m_version;
483 return m_state->m_version;
484 }
485
numPages() const486 int MsWrdText::numPages() const
487 {
488 m_state->m_numPages = int(m_state->m_pageList.size());
489 return m_state->m_numPages;
490 }
491
getMainTextLength() const492 long MsWrdText::getMainTextLength() const
493 {
494 return m_state->m_textLength[0];
495 }
496
getHeader() const497 MWAWEntry MsWrdText::getHeader() const
498 {
499 if (m_state->m_headerFooterZones.size() == 0)
500 return MWAWEntry();
501 MWAWEntry entry=m_state->m_headerFooterZones[0];
502 bool ok=entry.valid();
503 if (ok && entry.length()<=2) {
504 // small header, check if contains data
505 MWAWInputStreamPtr &input= m_parserState->m_input;
506 long pos = input->tell();
507 ok=false;
508 for (long cPos=entry.begin(); cPos<entry.end(); ++cPos) {
509 input->seek(m_state->getFilePos(cPos), librevenge::RVNG_SEEK_SET);
510 if (input->readLong(1)==0xd)
511 continue;
512 ok=true;
513 break;
514 }
515 input->seek(pos, librevenge::RVNG_SEEK_SET);
516 }
517 return ok ? entry : MWAWEntry();
518 }
519
getFooter() const520 MWAWEntry MsWrdText::getFooter() const
521 {
522 if (m_state->m_headerFooterZones.size() < 2)
523 return MWAWEntry();
524 MWAWEntry entry=m_state->m_headerFooterZones[1];
525 bool ok=entry.valid();
526 if (ok && entry.length()<=2) {
527 // check if it contains data
528 MWAWInputStreamPtr &input= m_parserState->m_input;
529 long pos = input->tell();
530 ok=false;
531 for (long cPos=entry.begin(); cPos<entry.end(); ++cPos) {
532 input->seek(m_state->getFilePos(cPos), librevenge::RVNG_SEEK_SET);
533 if (input->readLong(1)==0xd)
534 continue;
535 ok=true;
536 break;
537 }
538 input->seek(pos, librevenge::RVNG_SEEK_SET);
539 }
540 return ok ? entry : MWAWEntry();
541 }
542
getTextPLCMap()543 std::multimap<long, MsWrdText::PLC> &MsWrdText::getTextPLCMap()
544 {
545 return m_state->m_plcMap;
546 }
547
getFilePLCMap()548 std::multimap<long, MsWrdText::PLC> &MsWrdText::getFilePLCMap()
549 {
550 return m_state->m_filePlcMap;
551 }
552
553 ////////////////////////////////////////////////////////////
554 // Intermediate level
555 ////////////////////////////////////////////////////////////
556 // PLC
operator <<(std::ostream & o,MsWrdText::PLC const & plc)557 std::ostream &operator<<(std::ostream &o, MsWrdText::PLC const &plc)
558 {
559 switch (plc.m_type) {
560 case MsWrdText::PLC::ParagraphInfo:
561 o << "Pi";
562 break;
563 case MsWrdText::PLC::Section:
564 o << "S";
565 break;
566 case MsWrdText::PLC::Footnote:
567 o << "Fn";
568 break;
569 case MsWrdText::PLC::FootnoteDef:
570 o << "vFn";
571 break;
572 case MsWrdText::PLC::Field:
573 o << "Field";
574 break;
575 case MsWrdText::PLC::Page:
576 o << "Pg";
577 break;
578 case MsWrdText::PLC::Font:
579 o << "F";
580 break;
581 case MsWrdText::PLC::Object:
582 o << "O";
583 break;
584 case MsWrdText::PLC::Paragraph:
585 o << "P";
586 break;
587 case MsWrdText::PLC::HeaderFooter:
588 o << "hfP";
589 break;
590 case MsWrdText::PLC::TextPosition:
591 o << "textPos";
592 break;
593 #if !defined(__clang__)
594 default:
595 o << "#type" << char('a'+int(plc.m_type));
596 #endif
597 }
598 if (plc.m_id < 0) o << "_";
599 else o << plc.m_id;
600 if (plc.m_extra.length()) o << "[" << plc.m_extra << "]";
601 return o;
602 }
603
readHeaderTextLength()604 bool MsWrdText::readHeaderTextLength()
605 {
606 MWAWInputStreamPtr &input= m_parserState->m_input;
607 long pos = input->tell();
608 long endPos = pos+12;
609 if (!input->checkPosition(endPos))
610 return false;
611 for (auto &tLength : m_state->m_textLength) {
612 const auto length = long(input->readULong(4));
613 // a minimal check that the lengths are not insanely out of bounds
614 tLength= input->checkPosition(length) ? length : 0;
615 }
616 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
617 libmwaw::DebugStream f;
618 f << "FileHeader(textLength):text="
619 << std::hex << m_state->m_textLength[0] << ",";
620 if (m_state->m_textLength[1])
621 f << "footnote=" << m_state->m_textLength[1] << ",";
622 if (m_state->m_textLength[2])
623 f << "headerFooter=" << m_state->m_textLength[2] << ",";
624 ascFile.addPos(pos);
625 ascFile.addNote(f.str().c_str());
626 ascFile.addPos(endPos);
627 ascFile.addNote("_");
628 return true;
629 }
630
631 ////////////////////////////////////////////////////////////
632 // try to find the different zone
633 ////////////////////////////////////////////////////////////
createZones(long bot)634 bool MsWrdText::createZones(long bot)
635 {
636 // int const vers=version();
637 m_state->m_bot = bot;
638
639 auto &entryMap = m_mainParser->m_entryMap;
640 // the fonts
641 auto it = entryMap.find("FontIds");
642 if (it != entryMap.end()) {
643 std::vector<long> list;
644 readLongZone(it->second, 2, list);
645 }
646 it = entryMap.find("FontNames");
647 if (it != entryMap.end())
648 readFontNames(it->second);
649 // the styles
650 it = entryMap.find("Styles");
651 long prevDeb = 0;
652 while (it != entryMap.end()) {
653 if (!it->second.hasType("Styles")) break;
654 MsWrdEntry &entry=it++->second;
655 #ifndef DEBUG
656 // first entry is often bad or share the same data than the second
657 if (entry.id() == 0)
658 continue;
659 #endif
660 if (entry.begin() == prevDeb) continue;
661 prevDeb = entry.begin();
662 m_stylesManager->readStyles(entry);
663 }
664 // read the text structure
665 it = entryMap.find("TextStruct");
666 if (it != entryMap.end())
667 readTextStruct(it->second);
668
669 //! the break position
670 it = entryMap.find("PageBreak");
671 if (it != entryMap.end())
672 readPageBreak(it->second);
673 it = entryMap.find("ParaInfo");
674 if (it != entryMap.end())
675 readParagraphInfo(it->second);
676 it = entryMap.find("Section");
677 if (it != entryMap.end() &&
678 !m_stylesManager->readSection(it->second, m_state->m_sectionLimitList))
679 m_state->m_sectionLimitList.resize(0);
680
681 //! read the header footer limit
682 it = entryMap.find("HeaderFooter");
683 std::vector<long> hfLimits;
684 if (it != entryMap.end()) {
685 readLongZone(it->second, 4, hfLimits);
686
687 long debHeader = m_state->m_textLength[0]+m_state->m_textLength[1];
688 MsWrdText::PLC plc(MsWrdText::PLC::HeaderFooter);
689 // list Header0,Footer0,Header1,Footer1,...,Footern, 3
690 for (size_t i = 0; i+2 < hfLimits.size(); i++) {
691 plc.m_id = int(i);
692 m_state->m_plcMap.insert(std::multimap<long,MsWrdText::PLC>::value_type
693 (hfLimits[i]+debHeader, plc));
694
695 MWAWEntry entry;
696 entry.setBegin(debHeader+hfLimits[i]);
697 entry.setEnd(debHeader+hfLimits[i+1]);
698 m_state->m_headerFooterZones.push_back(entry);
699 }
700 }
701
702 //! read the note
703 std::vector<long> fieldPos;
704 it = entryMap.find("FieldPos");
705 if (it != entryMap.end()) { // a list of text pos ( or a size from ? )
706 readLongZone(it->second, 4, fieldPos);
707 }
708 it = entryMap.find("FieldName");
709 if (it != entryMap.end())
710 readFields(it->second, fieldPos);
711
712 //! read the footenote
713 std::vector<long> footnoteDef;
714 it = entryMap.find("FootnoteDef");
715 if (it != entryMap.end()) { // list of pos in footnote data
716 readLongZone(it->second, 4, footnoteDef);
717 }
718 it = entryMap.find("FootnotePos");
719 if (it != entryMap.end()) { // a list of text pos
720 readFootnotesPos(it->second, footnoteDef);
721 }
722 /* CHECKME: this zone seems presents only when FootnoteDef and FootnotePos,
723 but what does it means ?
724 */
725 it = entryMap.find("FootnoteData");
726 if (it != entryMap.end()) { // a list of text pos
727 readFootnotesData(it->second);
728 }
729
730 it = entryMap.find("ParagList");
731 if (it != entryMap.end())
732 m_stylesManager->readPLCList(it->second);
733 it = entryMap.find("CharList");
734 if (it != entryMap.end())
735 m_stylesManager->readPLCList(it->second);
736
737 prepareData();
738 return true;
739 }
740
741 ////////////////////////////////////////////////////////////
742 // read the text structure ( the PieCe Descriptors : plcfpcd )
743 ////////////////////////////////////////////////////////////
readTextStruct(MsWrdEntry & entry)744 bool MsWrdText::readTextStruct(MsWrdEntry &entry)
745 {
746 if (entry.length() < 19) {
747 MWAW_DEBUG_MSG(("MsWrdText::readTextStruct: the zone seems to short\n"));
748 return false;
749 }
750 if (!m_stylesManager->readTextStructList(entry))
751 return false;
752 MWAWInputStreamPtr &input= m_parserState->m_input;
753 long pos = input->tell();
754 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
755 libmwaw::DebugStream f;
756 auto type = static_cast<int>(input->readLong(1));
757 if (type != 2) {
758 MWAW_DEBUG_MSG(("MsWrdText::readTextStruct: find odd type %d\n", type));
759 return false;
760 }
761 entry.setParsed(true);
762 f << "TextStruct-pos:";
763 auto sz = static_cast<int>(input->readULong(2));
764 long endPos = pos+3+sz;
765 if (endPos > entry.end() || (sz%12) != 4) {
766 f << "#";
767 ascFile.addPos(pos);
768 ascFile.addNote(f.str().c_str());
769 MWAW_DEBUG_MSG(("MsWrdText::readTextStruct: can not read the position zone\n"));
770 return false;
771 }
772 int N=sz/12;
773 long textLength=m_state->getTotalTextSize();
774 std::vector<long> textPos; // checkme
775 textPos.resize(size_t(N+1));
776 f << "pos=[" << std::hex;
777 for (size_t i = 0; i <= size_t(N); i++) {
778 textPos[i] = static_cast<int>(input->readULong(4));
779 if (i && textPos[i] <= textPos[i-1]) {
780 MWAW_DEBUG_MSG(("MsWrdText::readTextStruct: find backward text pos\n"));
781 f << "#" << textPos[i] << ",";
782 textPos[i]=textPos[i-1];
783 }
784 else {
785 if (i != size_t(N) && textPos[i] > textLength) {
786 MWAW_DEBUG_MSG(("MsWrdText::readTextStruct: find a text position which is too big\n"));
787 f << "#";
788 }
789 f << textPos[i] << ",";
790 }
791 }
792 f << std::dec << "],";
793 ascFile.addPos(pos);
794 ascFile.addNote(f.str().c_str());
795 PLC plc(PLC::TextPosition);
796
797 for (int i = 0; i < N; i++) {
798 pos = input->tell();
799 MsWrdTextInternal::TextStruct tEntry;
800 f.str("");
801 f<< "TextStruct-pos" << i << ":";
802 tEntry.m_pos = static_cast<int>(textPos[size_t(i)]);
803 tEntry.m_flags = static_cast<int>(input->readULong(1));
804 // fN internal...
805 tEntry.m_styleId = static_cast<int>(input->readULong(1));
806 auto ptr = long(input->readULong(4));
807 tEntry.setBegin(ptr);
808 tEntry.setLength(textPos[size_t(i)+1]-textPos[size_t(i)]);
809 std::string extra;
810 tEntry.m_paragraphId = m_stylesManager->readPropertyModifier(tEntry.m_complex, extra);
811 tEntry.setExtra(extra);
812 m_state->m_textposList.push_back(tEntry);
813 if (!input->checkPosition(ptr)) {
814 MWAW_DEBUG_MSG(("MsWrdText::readTextStruct: find a bad file position \n"));
815 f << "#";
816 }
817 else {
818 plc.m_id = i;
819 m_state->m_plcMap.insert(std::multimap<long,PLC>::value_type
820 (textPos[size_t(i)],plc));
821 }
822 f << tEntry;
823 input->seek(pos+8, librevenge::RVNG_SEEK_SET);
824 ascFile.addPos(pos);
825 ascFile.addNote(f.str().c_str());
826 #if defined(DEBUG_WITH_FILES)
827 f.str("");
828 f<< "TextContent[" << tEntry.m_pos << "]:" << tEntry << ",";
829 m_state->debugFile2() << f.str() << "\n";
830 #endif
831 }
832
833 pos = input->tell();
834 if (pos != entry.end()) {
835 ascFile.addPos(pos);
836 ascFile.addNote("TextStruct-pos#");
837 }
838 ascFile.addPos(entry.end());
839 ascFile.addNote("_");
840 return true;
841 }
842
843 ////////////////////////////////////////////////////////////
844 // read the font name
845 ////////////////////////////////////////////////////////////
readFontNames(MsWrdEntry & entry)846 bool MsWrdText::readFontNames(MsWrdEntry &entry)
847 {
848 if (entry.length() < 2) {
849 MWAW_DEBUG_MSG(("MsWrdText::readFontNames: the zone seems to short\n"));
850 return false;
851 }
852
853 long pos = entry.begin();
854 MWAWInputStreamPtr &input= m_parserState->m_input;
855 input->seek(pos, librevenge::RVNG_SEEK_SET);
856 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
857 libmwaw::DebugStream f;
858 auto N = static_cast<int>(input->readULong(2));
859 if (N*5+2 > entry.length()) {
860 MWAW_DEBUG_MSG(("MsWrdText::readFontNames: the number of fonts seems bad\n"));
861 return false;
862 }
863 entry.setParsed(true);
864 f << "FontNames:" << N;
865 ascFile.addPos(pos);
866 ascFile.addNote(f.str().c_str());
867 for (int i = 0; i < N; i++) {
868 pos = input->tell();
869 if (pos+5 > entry.end()) {
870 input->seek(pos, librevenge::RVNG_SEEK_SET);
871 MWAW_DEBUG_MSG(("MsWrdText::readFontNames: the fonts %d seems bad\n", i));
872 break;
873 }
874 f.str("");
875 f << "FontNames-" << i << ":";
876 auto val = static_cast<int>(input->readLong(2));
877 if (val) f << "f0=" << val << ",";
878 auto fId = static_cast<int>(input->readULong(2));
879 f << "fId=" << fId << ",";
880 auto fSz = static_cast<int>(input->readULong(1));
881 if (pos +5 > entry.end()) {
882 input->seek(pos, librevenge::RVNG_SEEK_SET);
883 MWAW_DEBUG_MSG(("MsWrdText::readFontNames: the fonts name %d seems bad\n", i));
884 break;
885 }
886 std::string name("");
887 for (int j = 0; j < fSz; j++)
888 name += char(input->readLong(1));
889 if (name.length())
890 m_parserState->m_fontConverter->setCorrespondance(fId, name);
891 f << name;
892 ascFile.addPos(pos);
893 ascFile.addNote(f.str().c_str());
894 }
895 pos = input->tell();
896 if (pos != entry.end()) {
897 ascFile.addPos(pos);
898 ascFile.addNote("FontNames#");
899 }
900
901 return true;
902 }
903
904 ////////////////////////////////////////////////////////////
905 // read the zone info zone
906 ////////////////////////////////////////////////////////////
readParagraphInfo(MsWrdEntry & entry)907 bool MsWrdText::readParagraphInfo(MsWrdEntry &entry)
908 {
909 int vers=version();
910 if (vers<=3) {
911 MWAW_DEBUG_MSG(("MsWrdText::readParagraphInfo: does not know how to read a paragraphInfo in v3 or less\n"));
912 return false;
913 }
914 if (entry.length() < 4 || (entry.length()%10) != 4) {
915 MWAW_DEBUG_MSG(("MsWrdText::readParagraphInfo: the zone size seems odd\n"));
916 return false;
917 }
918 entry.setParsed(true);
919
920 long pos = entry.begin();
921 MWAWInputStreamPtr &input= m_parserState->m_input;
922 input->seek(pos, librevenge::RVNG_SEEK_SET);
923 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
924 libmwaw::DebugStream f;
925 f << "ParaInfo:";
926 auto N=int(entry.length()/10);
927
928 std::vector<long> textPositions;
929 f << "[";
930 for (int i = 0; i <= N; i++) {
931 auto textPos = long(input->readULong(4));
932 textPositions.push_back(textPos);
933 f << std::hex << textPos << std::dec << ",";
934 }
935 f << "],";
936 ascFile.addPos(pos);
937 ascFile.addNote(f.str().c_str());
938
939 PLC plc(PLC::ParagraphInfo);
940 for (int i = 0; i < N; i++) {
941 pos = input->tell();
942 f.str("");
943 f << "ParaInfo-Pi" << i << ":" << std::hex << textPositions[size_t(i)] << std::dec << ",";
944 MsWrdStruct::ParagraphInfo paraMod;
945 if (!paraMod.read(input, pos+6, vers))
946 f << "###";
947 f << paraMod;
948 m_state->m_paraInfoList.push_back(paraMod);
949
950 if (textPositions[size_t(i)] > m_state->m_textLength[0]) {
951 MWAW_DEBUG_MSG(("MsWrdText::readParagraphInfo: text positions is bad...\n"));
952 f << "#";
953 }
954 else {
955 plc.m_id=i;
956 m_state->m_plcMap.insert(std::multimap<long,PLC>::value_type
957 (textPositions[size_t(i)],plc));
958 }
959 input->seek(pos+6, librevenge::RVNG_SEEK_SET);
960 ascFile.addPos(pos);
961 ascFile.addNote(f.str().c_str());
962 }
963
964 ascFile.addPos(entry.end());
965 ascFile.addNote("_");
966 return true;
967
968 }
969
970 ////////////////////////////////////////////////////////////
971 // read the page break
972 ////////////////////////////////////////////////////////////
readPageBreak(MsWrdEntry & entry)973 bool MsWrdText::readPageBreak(MsWrdEntry &entry)
974 {
975 int const vers = version();
976 int const fSz = vers <= 3 ? 8 : 10;
977 if (entry.length() < fSz+8 || (entry.length()%(fSz+4)) != 4) {
978 MWAW_DEBUG_MSG(("MsWrdText::readPageBreak: the zone size seems odd\n"));
979 return false;
980 }
981 long pos = entry.begin();
982 entry.setParsed(true);
983 MWAWInputStreamPtr &input= m_parserState->m_input;
984 input->seek(pos, librevenge::RVNG_SEEK_SET);
985 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
986 libmwaw::DebugStream f;
987 f << "PageBreak:";
988 auto N=int(entry.length()/(fSz+4));
989 std::vector<long> textPos; // checkme
990 textPos.resize(size_t(N)+1);
991 for (auto &tPos : textPos) tPos = long(input->readULong(4));
992 PLC plc(PLC::Page);
993 int prevPage=-1;
994 for (int i = 0; i < N; i++) {
995 MsWrdTextInternal::Page page;
996 page.m_id = i;
997 page.m_type = static_cast<int>(input->readULong(1));
998 page.m_values[0] = static_cast<int>(input->readLong(1)); // always 0,1,2
999 for (int j = 1; j < 3; j++) // always -1, 0
1000 page.m_values[j] = static_cast<int>(input->readLong(2));
1001 page.m_page = static_cast<int>(input->readLong(2));
1002 if (vers > 3)
1003 page.m_values[3] = static_cast<int>(input->readLong(2));
1004 if (i && textPos[size_t(i)]==textPos[size_t(i)-1] && page.m_page==prevPage) {
1005 // find this one time in v3...
1006 MWAW_DEBUG_MSG(("MsWrdText::readPageBreak: page %d is duplicated...\n", i));
1007 f << "#dup,";
1008 continue;
1009 }
1010 prevPage=page.m_page;
1011 m_state->m_pageList.push_back(page);
1012
1013 if (textPos[size_t(i)] > m_state->m_textLength[0]) {
1014 MWAW_DEBUG_MSG(("MsWrdText::readPageBreak: text positions is bad...\n"));
1015 f << "#";
1016 }
1017 else {
1018 plc.m_id = i;
1019 m_state->m_plcMap.insert(std::multimap<long,PLC>::value_type
1020 (textPos[size_t(i)],plc));
1021 }
1022 f << "[pos=" << textPos[size_t(i)] << "," << page << "],";
1023 }
1024 f << "end=" << std::hex << textPos[size_t(N)] << std::dec << ",";
1025 ascFile.addPos(pos);
1026 ascFile.addNote(f.str().c_str());
1027
1028 ascFile.addPos(entry.end());
1029 ascFile.addNote("_");
1030 return true;
1031 }
1032
1033 ////////////////////////////////////////////////////////////
1034 // read the footnotes pos + val
1035 ////////////////////////////////////////////////////////////
readFootnotesPos(MsWrdEntry & entry,std::vector<long> const & noteDef)1036 bool MsWrdText::readFootnotesPos(MsWrdEntry &entry, std::vector<long> const ¬eDef)
1037 {
1038 if (entry.length() < 4 || (entry.length()%6) != 4) {
1039 MWAW_DEBUG_MSG(("MsWrdText::readFootnotesPos: the zone size seems odd\n"));
1040 return false;
1041 }
1042 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1043 libmwaw::DebugStream f;
1044 auto N=int(entry.length()/6);
1045 if (N+2 != int(noteDef.size())) {
1046 MWAW_DEBUG_MSG(("MsWrdText::readFootnotesPos: the number N seems odd\n"));
1047 return false;
1048 }
1049 long pos = entry.begin();
1050 entry.setParsed(true);
1051 MWAWInputStreamPtr &input= m_parserState->m_input;
1052 input->seek(pos, librevenge::RVNG_SEEK_SET);
1053 f << "FootnotePos:";
1054
1055 std::vector<long> textPos;
1056 textPos.resize(size_t(N)+1);
1057 for (auto &tPos : textPos) tPos = long(input->readULong(4));
1058 long debFootnote = m_state->m_textLength[0];
1059 PLC plc(PLC::Footnote);
1060 PLC defPlc(PLC::FootnoteDef);
1061 for (int i = 0; i < N; i++) {
1062 MsWrdTextInternal::Footnote note;
1063 note.m_id = i;
1064 note.m_pos.setBegin(debFootnote+noteDef[size_t(i)]);
1065 note.m_pos.setEnd(debFootnote+noteDef[size_t(i)+1]);
1066 note.m_value = static_cast<int>(input->readLong(2));
1067 m_state->m_footnoteList.push_back(note);
1068
1069 if (textPos[size_t(i)] > m_state->getTotalTextSize()) {
1070 MWAW_DEBUG_MSG(("MsWrdText::readFootnotesPos: can not find text position\n"));
1071 f << "#";
1072 }
1073 else if (noteDef[size_t(i)+1] > m_state->m_textLength[1]) {
1074 MWAW_DEBUG_MSG(("MsWrdText::readFootnotesPos: can not find definition position\n"));
1075 f << "#";
1076 }
1077 else {
1078 defPlc.m_id = plc.m_id = i;
1079 m_state->m_plcMap.insert(std::multimap<long,PLC>::value_type
1080 (textPos[size_t(i)], plc));
1081 m_state->m_plcMap.insert(std::multimap<long,PLC>::value_type
1082 (note.m_pos.begin(), defPlc));
1083 }
1084 f << std::hex << textPos[size_t(i)] << std::dec << ":" << note;
1085 }
1086 f << "end=" << std::hex << textPos[size_t(N)] << std::dec << ",";
1087 ascFile.addPos(entry.begin());
1088 ascFile.addNote(f.str().c_str());
1089 ascFile.addPos(entry.end());
1090 ascFile.addNote("_");
1091 return true;
1092 }
1093
1094 ////////////////////////////////////////////////////////////
1095 // read the footnotes pos?
1096 ////////////////////////////////////////////////////////////
readFootnotesData(MsWrdEntry & entry)1097 bool MsWrdText::readFootnotesData(MsWrdEntry &entry)
1098 {
1099 if (entry.length() < 4 || (entry.length()%14) != 4) {
1100 MWAW_DEBUG_MSG(("MsWrdText::readFootnotesData: the zone size seems odd\n"));
1101 return false;
1102 }
1103 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1104 libmwaw::DebugStream f;
1105 auto N=int(entry.length()/14);
1106 long pos = entry.begin();
1107 entry.setParsed(true);
1108 MWAWInputStreamPtr &input= m_parserState->m_input;
1109 input->seek(pos, librevenge::RVNG_SEEK_SET);
1110 f << "FootnoteData[" << N << "/" << m_state->m_footnoteList.size() << "]:";
1111
1112 std::vector<long> textPos; // checkme
1113 textPos.resize(size_t(N)+1);
1114 for (auto &tPos : textPos) tPos = long(input->readULong(4));
1115 for (int i = 0; i < N; i++) {
1116 if (textPos[size_t(i)] > m_state->m_textLength[1]) {
1117 MWAW_DEBUG_MSG(("MsWrdText::readFootnotesData: textPositions seems bad\n"));
1118 f << "#";
1119 }
1120 f << "N" << i << "=[";
1121 if (textPos[size_t(i)])
1122 f << "pos=" << std::hex << textPos[size_t(i)] << std::dec << ",";
1123 for (int j = 0; j < 5; j++) { // always 0|4000, -1, 0, id, 0 ?
1124 auto val=static_cast<int>(input->readLong(2));
1125 if (val && j == 0)
1126 f << std::hex << val << std::dec << ",";
1127 else if (val)
1128 f << val << ",";
1129 else f << "_,";
1130 }
1131 f << "],";
1132 }
1133 f << "end=" << std::hex << textPos[size_t(N)] << std::dec << ",";
1134 ascFile.addPos(entry.begin());
1135 ascFile.addNote(f.str().c_str());
1136 ascFile.addPos(entry.end());
1137 ascFile.addNote("_");
1138 return true;
1139 }
1140
1141 ////////////////////////////////////////////////////////////
1142 // read the note
1143 ////////////////////////////////////////////////////////////
readFields(MsWrdEntry & entry,std::vector<long> const & fieldPos)1144 bool MsWrdText::readFields(MsWrdEntry &entry, std::vector<long> const &fieldPos)
1145 {
1146 long pos = entry.begin();
1147 auto N = int(fieldPos.size());
1148 long textLength = m_state->getTotalTextSize();
1149 if (N==0) {
1150 MWAW_DEBUG_MSG(("MsWrdText::readFields: number of fields is 0\n"));
1151 return false;
1152 }
1153 N--;
1154 entry.setParsed(true);
1155 MWAWInputStreamPtr &input= m_parserState->m_input;
1156 input->seek(pos, librevenge::RVNG_SEEK_SET);
1157
1158 auto sz = long(input->readULong(2));
1159 if (entry.length() != sz) {
1160 MWAW_DEBUG_MSG(("MsWrdText::readFields: the zone size seems odd\n"));
1161 return false;
1162 }
1163 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1164 libmwaw::DebugStream f, f2;
1165 f << "FieldName:";
1166 int const endSize = (version()==5) ? 2 : 1;
1167 PLC plc(PLC::Field);
1168 for (int n = 1; n < N; n++) {
1169 if (input->tell() >= entry.end()) {
1170 MWAW_DEBUG_MSG(("MsWrdText::readFields: can not find all field\n"));
1171 break;
1172 }
1173 pos = input->tell();
1174 auto fSz = static_cast<int>(input->readULong(1));
1175 if (pos+1+fSz > entry.end()) {
1176 MWAW_DEBUG_MSG(("MsWrdText::readFields: can not read a string\n"));
1177 input->seek(pos, librevenge::RVNG_SEEK_SET);
1178 f << "#";
1179 break;
1180 }
1181 int endSz = fSz < endSize ? 0 : endSize;
1182
1183 f2.str("");
1184 std::string text("");
1185 for (int i = 0; i < fSz-endSz; i++) {
1186 auto c = char(input->readULong(1));
1187 if (c==0) f2 << '#';
1188 else text+=c;
1189 }
1190 MsWrdTextInternal::Field field;
1191 if (!endSz) ;
1192 else if (version()>=5 && input->readULong(1) != 0xc) {
1193 input->seek(-1, librevenge::RVNG_SEEK_CUR);
1194 for (int i = 0; i < 2; i++) text+=char(input->readULong(1));
1195 }
1196 else {
1197 auto id = static_cast<int>(input->readULong(1));
1198 if (id >= N) {
1199 if (version()>=5) {
1200 MWAW_DEBUG_MSG(("MsWrdText::readFields: find a strange id\n"));
1201 f2 << "#";
1202 }
1203 else
1204 text+=char(id);
1205 }
1206 else
1207 field.m_id = id;
1208 }
1209 field.m_text = text;
1210 field.m_error = f2.str();
1211 m_state->m_fieldList.push_back(field);
1212
1213 f << "N" << n << "=" << field << ",";
1214 if (fieldPos[size_t(n)] >= textLength) {
1215 MWAW_DEBUG_MSG(("MsWrdText::readFields: text positions is bad...\n"));
1216 f << "#";
1217 }
1218 else {
1219 plc.m_id = n-1;
1220 m_state->m_plcMap.insert(std::multimap<long,PLC>::value_type
1221 (fieldPos[size_t(n)], plc));
1222 }
1223 }
1224 if (long(input->tell()) != entry.end())
1225 ascFile.addDelimiter(input->tell(), '|');
1226 ascFile.addPos(entry.begin());
1227 ascFile.addNote(f.str().c_str());
1228 ascFile.addPos(entry.end());
1229 ascFile.addNote("_");
1230 return true;
1231 }
1232
1233 ////////////////////////////////////////////////////////////
1234 // read a list of ints zone
1235 ////////////////////////////////////////////////////////////
readLongZone(MsWrdEntry & entry,int sz,std::vector<long> & list)1236 bool MsWrdText::readLongZone(MsWrdEntry &entry, int sz, std::vector<long> &list)
1237 {
1238 list.resize(0);
1239 if (entry.length() < sz || (entry.length()%sz)) {
1240 MWAW_DEBUG_MSG(("MsWrdText::readIntsZone: the size of zone %s seems to odd\n", entry.type().c_str()));
1241 return false;
1242 }
1243
1244 long pos = entry.begin();
1245 MWAWInputStreamPtr &input= m_parserState->m_input;
1246 input->seek(pos, librevenge::RVNG_SEEK_SET);
1247 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1248 libmwaw::DebugStream f;
1249 f << entry.type() << ":";
1250 auto N = int(entry.length()/sz);
1251 for (int i = 0; i < N; i++) {
1252 auto val = static_cast<int>(input->readLong(sz));
1253 if (input->checkPosition(val))
1254 list.push_back(val);
1255 f << std::hex << val << std::dec << ",";
1256 }
1257
1258 if (long(input->tell()) != entry.end())
1259 ascFile.addDelimiter(input->tell(), '|');
1260
1261 entry.setParsed(true);
1262
1263 ascFile.addPos(entry.begin());
1264 ascFile.addNote(f.str().c_str());
1265
1266 ascFile.addPos(entry.end());
1267 ascFile.addNote("_");
1268 return true;
1269 }
1270
1271 ////////////////////////////////////////////////////////////
1272 // sort/prepare data
1273 ////////////////////////////////////////////////////////////
prepareLines()1274 void MsWrdText::prepareLines()
1275 {
1276 m_state->m_lineList.clear();
1277 long cPos = 0, cEnd = m_state->getTotalTextSize();
1278 if (cEnd <= 0) return;
1279
1280 MWAWInputStreamPtr &input= m_parserState->m_input;
1281 input->seek(m_state->getFilePos(0), librevenge::RVNG_SEEK_SET);
1282
1283 MsWrdTextInternal::Line line;
1284 line.m_cPos[0]=0;
1285 size_t numTextPos = m_state->m_textposList.size();
1286 while (!input->isEnd() && cPos < cEnd) {
1287 auto plcIt = m_state->m_plcMap.lower_bound(cPos);
1288 while (plcIt != m_state->m_plcMap.end() && plcIt->first==cPos) {
1289 auto const &plc = plcIt++->second;
1290 if (plc.m_type != PLC::TextPosition)
1291 continue;
1292 if (plc.m_id < 0 || plc.m_id >= static_cast<int>(numTextPos))
1293 continue;
1294 auto const &textEntry= m_state->m_textposList[size_t(plc.m_id)];
1295 input->seek(textEntry.begin(), librevenge::RVNG_SEEK_SET);
1296 }
1297 auto c=char(input->readLong(1));
1298 ++cPos;
1299 if (c!=0x7 && c!=0xd && cPos!=cEnd)
1300 continue;
1301 line.m_cPos[1]=cPos;
1302 if (c==0x7)
1303 line.m_type=MsWrdTextInternal::Line::L_LastLineCell;
1304 else
1305 line.m_type=MsWrdTextInternal::Line::L_Line;
1306 m_state->m_lineList.push_back(line);
1307
1308 line.m_cPos[0]=cPos;
1309 }
1310 }
1311
convertFilePLCPos()1312 void MsWrdText::convertFilePLCPos()
1313 {
1314 size_t numTextPos = m_state->m_textposList.size();
1315 auto &cMap=m_state->m_plcMap;
1316
1317 // create the list of table delimiters
1318 std::set<long> tableSet;
1319 for (auto const &line : m_state->m_lineList) {
1320 if (line.m_type==MsWrdTextInternal::Line::L_Line)
1321 tableSet.insert(line.m_cPos[1]);
1322 }
1323
1324 auto tableIt=tableSet.begin();
1325 MsWrdText::PLC resetParaPLC(PLC::Paragraph,-1);
1326 // simplest case
1327 if (!numTextPos) {
1328 long const bottom = m_state->m_bot;
1329 long pPos=bottom;
1330 for (auto it : m_state->m_filePlcMap) {
1331 long pos=it.first, prevPos=0;
1332 MsWrdText::PLC const &plc=it.second;
1333 if (plc.m_type==PLC::Paragraph) {
1334 while (tableIt!=tableSet.end() && *tableIt<=pos-bottom) {
1335 long resPos=*(tableIt++);
1336 if (resPos<pos-bottom) {
1337 m_state->m_paragraphLimitMap[pPos-bottom]=-1;
1338 cMap.insert(std::map<long, MsWrdText::PLC>::value_type(pPos-bottom, resetParaPLC));
1339 pPos=resPos;
1340 }
1341 }
1342 m_state->m_paragraphLimitMap[pPos-bottom]=-1;
1343 prevPos=pPos;
1344 pPos=pos;
1345 }
1346 else if (plc.m_type==PLC::Font)
1347 prevPos=pos;
1348 else {
1349 MWAW_DEBUG_MSG(("MsWrdText::convertFilePLCPos: unexpected plc type: %d\n", plc.m_type));
1350 continue;
1351 }
1352 cMap.insert(std::map<long, MsWrdText::PLC>::value_type(prevPos-bottom, plc));
1353 }
1354 return;
1355 }
1356
1357 long cPos=0, pPos=0;
1358 int fontId=-1;
1359 for (size_t i=0; i < numTextPos; ++i) {
1360 auto const &tPos=m_state->m_textposList[i];
1361 long const begPos= tPos.begin();
1362 long const endPos=tPos.end();
1363 bool fontCheck=false;
1364 auto it=m_state->m_filePlcMap.lower_bound(begPos);
1365 while (it!=m_state->m_filePlcMap.end()) {
1366 long pos=it->first;
1367 if (!fontCheck && pos!=begPos) {
1368 // time to check if the font has changed
1369 auto fIt=m_state->m_filePlcMap.lower_bound(begPos);
1370 while (fIt!=m_state->m_filePlcMap.begin()) {
1371 if (fIt==m_state->m_filePlcMap.end()||fIt->first>=begPos)
1372 --fIt;
1373 else
1374 break;
1375 }
1376 while (fIt!=m_state->m_filePlcMap.end()) {
1377 if (fIt->first >= begPos)
1378 break;
1379 auto const &plc=fIt->second;
1380 if (plc.m_type==PLC::Font) {
1381 if (fontId!=plc.m_id) {
1382 fontId=plc.m_id;
1383 cMap.insert(std::map<long, MsWrdText::PLC>::value_type(cPos, plc));
1384 }
1385 break;
1386 }
1387 if (fIt==m_state->m_filePlcMap.begin())
1388 break;
1389 --fIt;
1390 }
1391 fontCheck=true;
1392 }
1393 if (pos>endPos)
1394 break;
1395 auto const &plc=it++->second;
1396 long newCPos=cPos+(pos-begPos), prevPos=0;
1397 if (plc.m_type==PLC::Paragraph) {
1398 if (pos==begPos)
1399 continue;
1400 while (tableIt!=tableSet.end() && *tableIt<=newCPos) {
1401 long resPos=*(tableIt++);
1402 if (resPos<newCPos) {
1403 m_state->m_paragraphLimitMap[pPos]=-1;
1404 cMap.insert(std::map<long, MsWrdText::PLC>::value_type(pPos, resetParaPLC));
1405 pPos=resPos;
1406 }
1407 }
1408 m_state->m_paragraphLimitMap[pPos]=int(i);
1409 prevPos=pPos;
1410 pPos=newCPos;
1411 }
1412 else if (plc.m_type==PLC::Font) {
1413 if (pos==endPos)
1414 continue;
1415 fontCheck=true;
1416 fontId=plc.m_id;
1417 prevPos=newCPos;
1418 }
1419 else {
1420 MWAW_DEBUG_MSG(("MsWrdText::convertFilePLCPos: unexpected plc type: %d\n", plc.m_type));
1421 continue;
1422 }
1423 cMap.insert(std::map<long, MsWrdText::PLC>::value_type(prevPos, plc));
1424 }
1425 cPos+=tPos.length();
1426 }
1427 }
1428
prepareParagraphProperties()1429 void MsWrdText::prepareParagraphProperties()
1430 {
1431 int const vers=version();
1432 auto textposSize = int(m_state->m_textposList.size());
1433 MsWrdTextInternal::Line::Type lineType=MsWrdTextInternal::Line::L_Line;
1434 MsWrdStruct::Paragraph paragraph(vers), tablePara(vers);
1435 long cTableEndPos=-1;
1436 bool inTable=false;
1437 size_t numLines=m_state->m_lineList.size();
1438 for (int i=0; i<int(numLines); ++i) {
1439 MsWrdTextInternal::Line &line = m_state->m_lineList[size_t(i)];
1440
1441 long cPos=line.m_cPos[0];
1442 if (inTable && cPos>=cTableEndPos) {
1443 inTable=false;
1444 lineType=MsWrdTextInternal::Line::L_Line;
1445 }
1446 auto pIt=m_state->m_paragraphLimitMap.lower_bound(cPos);
1447 if (pIt==m_state->m_paragraphLimitMap.end() || pIt->first!=cPos) {
1448 line.m_type=lineType;
1449 continue;
1450 }
1451 int textId=pIt->second;
1452
1453 // first retrieve the paragraph
1454 auto plcIt=m_state->m_plcMap.lower_bound(cPos);
1455 while (plcIt != m_state->m_plcMap.end() && plcIt->first==cPos) {
1456 MsWrdText::PLC const &plc = plcIt++->second;
1457 if (plc.m_type != PLC::Paragraph)
1458 continue;
1459 if (plc.m_id>=0)
1460 m_stylesManager->getParagraph(MsWrdTextStyles::TextZone,
1461 plc.m_id, paragraph);
1462 else
1463 paragraph=MsWrdStruct::Paragraph(vers);
1464 if (inTable) {
1465 MsWrdStruct::Paragraph tmpPara=tablePara;
1466 tmpPara.insert(paragraph);
1467 paragraph=tmpPara;
1468 }
1469 }
1470
1471 MsWrdStruct::Paragraph finalPara(paragraph);
1472 if (textId>=0 && textId < textposSize) {
1473 auto const &textEntry=m_state->m_textposList[size_t(textId)];
1474 int id=textEntry.getParagraphId();
1475 // checkme do we need to test (textEntry.m_flags&0x80)==0 here
1476 if (id>=0) {
1477 MsWrdStruct::Paragraph modifier(vers);
1478 m_stylesManager->getParagraph(MsWrdTextStyles::TextStructZone, id, modifier);
1479 finalPara.insert(modifier);
1480 }
1481 }
1482
1483 if (finalPara.m_styleId.isSet()) {
1484 MsWrdStruct::Paragraph style(vers);
1485 m_stylesManager->getParagraph(MsWrdTextStyles::StyleZone,*finalPara.m_styleId, style);
1486 MsWrdStruct::Paragraph tmpPara(style);
1487 tmpPara.insert(finalPara);
1488 tmpPara.updateParagraphToFinalState(&style);
1489 finalPara=tmpPara;
1490 }
1491 else
1492 finalPara.updateParagraphToFinalState();
1493
1494 if (!inTable && (finalPara.inTable()||line.m_type==MsWrdTextInternal::Line::L_LastLineCell) &&
1495 updateTableBeginnningAt(cPos, cTableEndPos) && cPos<cTableEndPos) {
1496 inTable=true;
1497 // ok, find the main table paragraph and loop
1498 tablePara=MsWrdStruct::Paragraph(vers);
1499 plcIt=m_state->m_plcMap.lower_bound(cTableEndPos-1);
1500 while (plcIt != m_state->m_plcMap.end() && plcIt->first==cTableEndPos-1) {
1501 MsWrdText::PLC const &plc = plcIt++->second;
1502 if (plc.m_type != PLC::Paragraph)
1503 continue;
1504 if (plc.m_id>=0)
1505 m_stylesManager->getParagraph(MsWrdTextStyles::TextZone, plc.m_id, tablePara);
1506 }
1507 paragraph=tablePara;
1508 --i;
1509 continue;
1510 }
1511 if (inTable && line.m_type==MsWrdTextInternal::Line::L_Line)
1512 line.m_type=MsWrdTextInternal::Line::L_Cell;
1513
1514 // store the result
1515 m_state->m_paragraphMap.insert
1516 (std::map<long, MsWrdStruct::Paragraph>::value_type(cPos,finalPara));
1517 lineType=line.m_type;
1518 }
1519 }
1520
prepareFontProperties()1521 void MsWrdText::prepareFontProperties()
1522 {
1523 int const vers = version();
1524 long cPos = 0, cEnd = m_state->getTotalTextSize();
1525 if (cEnd <= 0) return;
1526
1527 auto &map = m_state->m_plcMap;
1528 auto textposSize = int(m_state->m_textposList.size());
1529 MsWrdStruct::Font font, modifier, paraFont, styleFont;
1530 int actStyle=-1;
1531 while (cPos < cEnd) {
1532 bool fontChanged=false;
1533 if (m_state->m_paragraphMap.find(cPos)!=m_state->m_paragraphMap.end()) {
1534 auto const ¶= m_state->m_paragraphMap.find(cPos)->second;
1535 para.getFont(paraFont);
1536 if (para.m_styleId.isSet() && actStyle!=*para.m_styleId) {
1537 actStyle=*para.m_styleId;
1538 styleFont=MsWrdStruct::Font();
1539 m_stylesManager->getFont(MsWrdTextStyles::StyleZone, *para.m_styleId, styleFont);
1540 }
1541 fontChanged=true; // force a font change (even if no needed)
1542 }
1543
1544 long cNextPos = cEnd;
1545 auto plcIt = map.lower_bound(cPos);
1546 int textPId=-2;
1547 while (plcIt != map.end()) {
1548 if (plcIt->first != cPos) {
1549 cNextPos=plcIt->first;
1550 break;
1551 }
1552 PLC const &plc = plcIt++->second;
1553 int pId = plc.m_id;
1554 switch (plc.m_type) {
1555 case PLC::TextPosition: {
1556 if (pId < 0 || pId > textposSize) {
1557 MWAW_DEBUG_MSG(("MsWrdText::prepareFontProperties: oops can not find textstruct!!!!\n"));
1558 break;
1559 }
1560 auto const &textEntry=m_state->m_textposList[size_t(pId)];
1561 textPId=textEntry.getParagraphId();
1562 break;
1563 }
1564 case PLC::Font:
1565 fontChanged=true;
1566 modifier=font=MsWrdStruct::Font();
1567 if (pId >= 0)
1568 m_stylesManager->getFont(MsWrdTextStyles::TextZone, pId, font);
1569 break;
1570 case PLC::Field:
1571 case PLC::Footnote:
1572 case PLC::FootnoteDef:
1573 case PLC::HeaderFooter:
1574 case PLC::Object:
1575 case PLC::Page:
1576 case PLC::Paragraph:
1577 case PLC::ParagraphInfo:
1578 case PLC::Section:
1579 #if !defined(__clang__)
1580 default:
1581 #endif
1582 break;
1583 }
1584 }
1585 if (textPId>=0) {
1586 MsWrdStruct::Paragraph para(vers);
1587 m_stylesManager->getParagraph(MsWrdTextStyles::TextStructZone, textPId, para);
1588 modifier=MsWrdStruct::Font();
1589 para.getFont(modifier);
1590 fontChanged=true;
1591 }
1592 else if (textPId==-1) {
1593 modifier=MsWrdStruct::Font();
1594 fontChanged=true;
1595 }
1596 if (fontChanged) {
1597 MsWrdStruct::Font final(paraFont); // or stylefont
1598 final.insert(font, &styleFont);
1599 final.insert(modifier, &styleFont);
1600 m_state->m_fontMap[cPos] = final;
1601 }
1602 cPos = cNextPos;
1603 }
1604 }
1605
prepareTableLimits()1606 void MsWrdText::prepareTableLimits()
1607 {
1608 int const vers=version();
1609 size_t numLines=m_state->m_lineList.size();
1610 // first find the table delimiters
1611 std::map<long,size_t> cposToLineMap;
1612 for (size_t l=0; l < numLines; ++l) {
1613 MsWrdTextInternal::Line const &line = m_state->m_lineList[l];
1614 if (line.m_type != MsWrdTextInternal::Line::L_LastLineCell)
1615 continue;
1616 cposToLineMap[line.m_cPos[1]-1]=l;
1617 }
1618
1619 size_t numTextpos=m_state->m_textposList.size();
1620 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1621 auto tPosIt=cposToLineMap.begin();
1622 while (tPosIt!=cposToLineMap.end()) {
1623 size_t lId=tPosIt->second;
1624 if (lId>=numLines) {
1625 MWAW_DEBUG_MSG(("MsWrdText::prepareTableLimits: lId is bad\n"));
1626 ++tPosIt;
1627 continue;
1628 }
1629 auto line = m_state->m_lineList[lId];
1630 std::vector<long> listDelimiterCells;
1631 bool ok=false;
1632 auto actTPosIt=tPosIt;
1633 while (tPosIt!=cposToLineMap.end()) {
1634 long cPos=tPosIt->first;
1635 lId=tPosIt++->second;
1636 listDelimiterCells.push_back(cPos);
1637 if (lId>=numLines) {
1638 MWAW_DEBUG_MSG(("MsWrdText::prepareTableLimits: lId is bad(II)\n"));
1639 break;
1640 }
1641 line=m_state->m_lineList[lId];
1642 MsWrdStruct::Paragraph para(vers);
1643 // try to retrieve the paragraph attributes
1644 auto plcIt=m_state->m_plcMap.lower_bound(cPos);
1645 while (plcIt != m_state->m_plcMap.end() && plcIt->first==cPos) {
1646 auto const &plc = plcIt++->second;
1647 if (plc.m_type != PLC::Paragraph)
1648 continue;
1649 if (plc.m_id>=0)
1650 m_stylesManager->getParagraph(MsWrdTextStyles::TextZone, plc.m_id, para);
1651 if (para.m_styleId.isSet()) {
1652 MsWrdStruct::Paragraph style(vers);
1653 m_stylesManager->getParagraph(MsWrdTextStyles::StyleZone,*para.m_styleId, style);
1654 style.insert(para);
1655 para=style;
1656 }
1657 }
1658 auto pIt=m_state->m_paragraphLimitMap.find(line.m_cPos[0]);
1659 if (pIt!=m_state->m_paragraphLimitMap.end() && pIt->second>0 && pIt->second<static_cast<int>(numTextpos)) {
1660 auto const &textEntry=m_state->m_textposList[size_t(pIt->second)];
1661 int id=textEntry.getParagraphId();
1662 if (id>=0) {
1663 MsWrdStruct::Paragraph modifier(vers);
1664 m_stylesManager->getParagraph(MsWrdTextStyles::TextStructZone, id, modifier);
1665 para.insert(modifier);
1666 }
1667 }
1668 if (!para.m_tableDef.get() || !para.m_table.isSet() || !para.m_table->m_columns.isSet())
1669 continue;
1670 m_state->m_lineList[lId].m_type=MsWrdTextInternal::Line::L_LastRowCell;
1671
1672 // ok, we have find the end of the table
1673 auto const &table = para.m_table.get();
1674 size_t numCols=table.m_columns->size();
1675 if (!numCols || listDelimiterCells.size()!=numCols) {
1676 MWAW_DEBUG_MSG(("MsWrdText::prepareTableLimits: can not find the number of row for position %ld(%d,%d)\n", line.m_cPos[0], int(listDelimiterCells.size()), static_cast<int>(numCols)));
1677 break;
1678 }
1679
1680 std::shared_ptr<MsWrdTextInternal::Table> finalTable(new MsWrdTextInternal::Table);
1681 finalTable->m_delimiterPos = listDelimiterCells;
1682 finalTable->m_cells = table.m_cells;
1683 if (table.m_height.isSet())
1684 finalTable->m_height=*table.m_height;
1685 std::vector<float> width(numCols-1);
1686 for (size_t c = 0; c < numCols-1; c++)
1687 width[c]=table.m_columns.get()[c+1]-table.m_columns.get()[c];
1688 finalTable->setColsSize(width);
1689 for (auto id : listDelimiterCells)
1690 m_state->m_tableMap[id]=finalTable;
1691 listDelimiterCells.clear();
1692 ok=true;
1693 break;
1694 }
1695 if (ok)
1696 continue;
1697
1698 ascFile.addPos(m_state->getFilePos(listDelimiterCells[0]));
1699 ascFile.addNote("###table");
1700 m_state->m_tableMap[listDelimiterCells[0]]=std::shared_ptr<MsWrdTextInternal::Table>();
1701 tPosIt=++actTPosIt;
1702 MWAW_DEBUG_MSG(("MsWrdText::prepareTableLimits: problem finding some table limits\n"));
1703 }
1704 }
1705
updateTableBeginnningAt(long cPos,long & nextCPos)1706 bool MsWrdText::updateTableBeginnningAt(long cPos, long &nextCPos)
1707 {
1708 auto tableIt=m_state->m_tableMap.lower_bound(cPos);
1709 if (tableIt==m_state->m_tableMap.end() || !tableIt->second ||
1710 tableIt->second->m_delimiterPos.empty() ||
1711 tableIt->second->m_delimiterPos[0] < cPos) {
1712 MWAW_DEBUG_MSG(("MsWrdText::updateTableBeginnningAt: can find no table at position %ld\n", cPos));
1713 return false;
1714 }
1715 auto table=tableIt->second;
1716 size_t numDelim=table->m_delimiterPos.size();
1717 table->m_cellPos.resize(numDelim);
1718 table->m_cellPos[0]=cPos;
1719 for (size_t c=0; c+1<numDelim; ++c)
1720 table->m_cellPos[c+1]=table->m_delimiterPos[c]+1;
1721 for (size_t c=0; c+1<table->m_cellPos.size(); ++c)
1722 m_state->m_tableCellPosSet.insert(table->m_cellPos[c]);
1723 if (table->m_delimiterPos[0]!=cPos)
1724 m_state->m_tableMap[cPos]=table;
1725 nextCPos=table->m_delimiterPos[numDelim-1]+1;
1726 return true;
1727 }
1728
prepareData()1729 void MsWrdText::prepareData()
1730 {
1731 #if defined(DEBUG_WITH_FILES) && DEBUG_PARAGRAPH
1732 int const vers = version();
1733 #endif
1734 long cPos = 0, cEnd = m_state->getTotalTextSize();
1735 if (cEnd <= 0) return;
1736 prepareLines();
1737 convertFilePLCPos();
1738 prepareTableLimits();
1739
1740 prepareParagraphProperties();
1741 prepareFontProperties();
1742
1743 MsWrdStruct::Font defaultFont;
1744 long pos = m_state->getFilePos(cPos);
1745 auto textposSize = int(m_state->m_textposList.size());
1746
1747 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1748 libmwaw::DebugStream f, f2;
1749 PLC::ltstr compare;
1750
1751 auto &map = m_state->m_plcMap;
1752 while (cPos < cEnd) {
1753 f.str("");
1754 // first find the list of the plc
1755 long cNextPos = cEnd;
1756
1757 std::set<PLC, PLC::ltstr> sortedPLC(compare);
1758 auto plcIt = map.lower_bound(cPos);
1759 while (plcIt != map.end()) {
1760 if (plcIt->first != cPos) {
1761 cNextPos=plcIt->first;
1762 break;
1763 }
1764 PLC const &plc = plcIt++->second;
1765 if (plc.m_type!=PLC::Paragraph&&plc.m_type!=PLC::Font)
1766 sortedPLC.insert(plc);
1767 #if DEBUG_PLC
1768 if (plc.m_type != PLC::TextPosition)
1769 f << "[" << plc << "],";
1770 #endif
1771
1772 int pId = plc.m_id;
1773 switch (plc.m_type) {
1774 case PLC::TextPosition:
1775 if (pId < 0 || pId > textposSize) {
1776 MWAW_DEBUG_MSG(("MsWrdText::prepareData: oops can not find textstruct!!!!\n"));
1777 f << "[###tP" << pId << "]";
1778 }
1779 else {
1780 auto const &textEntry=m_state->m_textposList[size_t(pId)];
1781 pos = textEntry.begin();
1782 #if defined(DEBUG_WITH_FILES) && DEBUG_PARAGRAPH
1783 int paraId=textEntry.getParagraphId();
1784 if (paraId < 0)
1785 f << "tP_,";
1786 else {
1787 MsWrdStruct::Paragraph para(vers);
1788 m_stylesManager->getParagraph(MsWrdTextStyles::TextStructZone, paraId, para);
1789 f << "tP" << paraId << "=[";
1790 para.print(f, m_parserState->m_fontConverter);
1791 f << "],";
1792 }
1793 #endif
1794 }
1795 break;
1796 case PLC::Section:
1797 #if defined(DEBUG_WITH_FILES) && DEBUG_SECTION
1798 if (pId >= 0) {
1799 MsWrdStruct::Section sec;
1800 m_stylesManager->getSection(MsWrdTextStyles::TextZone, pId, sec);
1801 f << "S" << pId << "=[" << sec << "],";
1802 }
1803 else
1804 f << "S_,";
1805 #endif
1806 break;
1807 case PLC::ParagraphInfo:
1808 #if defined(DEBUG_WITH_FILES) && DEBUG_PARAGRAPHINFO
1809 if (pId >= 0 && pId < int(m_state->m_paraInfoList.size())) {
1810 MsWrdStruct::ParagraphInfo info=m_state->m_paraInfoList[size_t(pId)];
1811 f << "Pi" << pId << "=[" << info << "],";
1812 }
1813 else
1814 f << "Pi_,";
1815 #endif
1816 break;
1817 case PLC::Page:
1818 #if defined(DEBUG_WITH_FILES) && DEBUG_PAGE
1819 if (pId >= 0 && pId < int(m_state->m_pageList.size()))
1820 f << "Pg" << pId << "=[" << m_state->m_pageList[size_t(pId)] << "],";
1821 else
1822 f << "Pg_,";
1823 #endif
1824 break;
1825 case PLC::Paragraph:
1826 #if defined(DEBUG_WITH_FILES) && DEBUG_PARAGRAPH
1827 if (pId >= 0) {
1828 MsWrdStruct::Paragraph para(vers);
1829 m_stylesManager->getParagraph(MsWrdTextStyles::TextZone, pId, para);
1830 f << "P" << pId << "=[";
1831 para.print(f, m_parserState->m_fontConverter);
1832 f << "],";
1833 }
1834 else f << "P_,";
1835 #endif
1836 break;
1837 case PLC::Font: {
1838 #if defined(DEBUG_WITH_FILES) && DEBUG_FONT
1839 if (pId >= 0) {
1840 MsWrdStruct::Font font;
1841 m_stylesManager->getFont(MsWrdTextStyles::TextZone, pId, font);
1842 f << "F" << pId << "=[" << font.m_font->getDebugString(m_parserState->m_fontConverter) << font << "],";
1843 }
1844 else
1845 f << "F_,";
1846 #endif
1847 break;
1848 }
1849 case PLC::Field:
1850 case PLC::Footnote:
1851 case PLC::FootnoteDef:
1852 case PLC::HeaderFooter:
1853 case PLC::Object:
1854 #if !defined(__clang__)
1855 default:
1856 #endif
1857 break;
1858 }
1859 }
1860 MsWrdTextInternal::Property prop;
1861 prop.m_fPos = pos;
1862 prop.m_plcList=std::vector<PLC>(sortedPLC.begin(), sortedPLC.end());
1863
1864 if (f.str().length()) {
1865 f2.str("");
1866 f2 << "TextContent["<<cPos<<"]:" << f.str();
1867 ascFile.addPos(pos);
1868 ascFile.addNote(f2.str().c_str());
1869 #if defined(DEBUG_WITH_FILES)
1870 m_state->debugFile2() << f2.str() << "\n";
1871 #endif
1872 prop.m_debugPrint = true;
1873 }
1874 m_state->m_propertyMap[cPos] = prop;
1875 pos+=(cNextPos-cPos);
1876 cPos = cNextPos;
1877 }
1878 }
1879
1880 ////////////////////////////////////////////////////////////
1881 // try to read a text entry
1882 ////////////////////////////////////////////////////////////
sendText(MWAWEntry const & textEntry,bool mainZone,bool tableCell)1883 bool MsWrdText::sendText(MWAWEntry const &textEntry, bool mainZone, bool tableCell)
1884 {
1885 if (!textEntry.valid()) return false;
1886 MWAWTextListenerPtr listener=m_parserState->m_textListener;
1887 if (!listener) {
1888 MWAW_DEBUG_MSG(("MsWrdText::sendText: can not find a listener!"));
1889 return true;
1890 }
1891 long cPos = textEntry.begin();
1892 long debPos = m_state->getFilePos(cPos), pos=debPos;
1893 MWAWInputStreamPtr &input= m_parserState->m_input;
1894 input->seek(pos, librevenge::RVNG_SEEK_SET);
1895 long cEnd = textEntry.end();
1896
1897 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1898 libmwaw::DebugStream f;
1899 f << "TextContent[" << cPos << "]:";
1900 long pictPos = -1;
1901 while (!input->isEnd() && cPos < cEnd) {
1902 bool newTable = false;
1903 long cEndPos = cEnd;
1904
1905 MsWrdTextInternal::Property *prop = nullptr;
1906 auto propIt = m_state->m_propertyMap.upper_bound(cPos);
1907 if (propIt != m_state->m_propertyMap.end() && propIt->first < cEndPos && propIt->first > cPos)
1908 cEndPos = propIt->first;
1909
1910 size_t numPLC = 0;
1911 propIt = m_state->m_propertyMap.find(cPos);
1912 if (propIt != m_state->m_propertyMap.end()) {
1913 prop = &propIt->second;
1914 pos = prop->m_fPos;
1915 newTable = !tableCell && m_state->getTable(cPos);
1916 input->seek(pos, librevenge::RVNG_SEEK_SET);
1917 numPLC = prop->m_plcList.size();
1918 }
1919 int newSectionId=-1;
1920 for (size_t i = 0; i < numPLC; i++) {
1921 PLC const &plc = prop->m_plcList[i];
1922 if (newTable && int(plc.m_type) >= int(PLC::ParagraphInfo)) continue;
1923 switch (plc.m_type) {
1924 case PLC::Page: {
1925 if (tableCell) break;
1926 if (mainZone) m_mainParser->newPage(++m_state->m_actPage);
1927 break;
1928 }
1929 case PLC::Section:
1930 if (tableCell) break;
1931 newSectionId=plc.m_id;
1932 break;
1933 case PLC::Field: // some fields ?
1934 #ifdef DEBUG
1935 m_mainParser->sendFieldComment(plc.m_id);
1936 #endif
1937 break;
1938 case PLC::Footnote:
1939 m_mainParser->sendFootnote(plc.m_id);
1940 break;
1941 case PLC::TextPosition:
1942 case PLC::Font:
1943 case PLC::FootnoteDef:
1944 case PLC::HeaderFooter:
1945 case PLC::Object:
1946 case PLC::Paragraph:
1947 case PLC::ParagraphInfo:
1948 #if !defined(__clang__)
1949 default:
1950 #endif
1951 break;
1952 }
1953 }
1954 if (newSectionId >= 0)
1955 sendSection(newSectionId);
1956 if ((prop && prop->m_debugPrint) || newTable) {
1957 ascFile.addPos(debPos);
1958 ascFile.addNote(f.str().c_str());
1959 #if defined(DEBUG_WITH_FILES)
1960 m_state->debugFile2() << f.str() << "\n";
1961 #endif
1962 f.str("");
1963 f << "TextContent["<<cPos<<"]:";
1964 debPos = pos;
1965 }
1966 // time to send the table
1967 std::shared_ptr<MsWrdTextInternal::Table> table;
1968 if (newTable && (table=m_state->getTable(cPos))) {
1969 long actCPos = cPos;
1970 bool ok = sendTable(*table);
1971 cPos = ok ? table->m_cellPos.back()+1 : actCPos;
1972 pos=debPos=m_state->getFilePos(cPos);
1973 input->seek(pos, librevenge::RVNG_SEEK_SET);
1974 f.str("");
1975 f << "TextContent["<<cPos<<"]:";
1976 if (ok)
1977 continue;
1978 }
1979 if (m_state->m_paragraphMap.find(cPos) != m_state->m_paragraphMap.end())
1980 listener->setParagraph(m_state->m_paragraphMap.find(cPos)->second);
1981 if (m_state->m_fontMap.find(cPos) != m_state->m_fontMap.end()) {
1982 auto const &font = m_state->m_fontMap.find(cPos)->second;
1983 pictPos = font.m_picturePos.get();
1984 m_stylesManager->setProperty(font);
1985 }
1986 for (long p = cPos; p < cEndPos; p++) {
1987 auto c = static_cast<int>(input->readULong(1));
1988 cPos++;
1989 pos++;
1990 switch (c) {
1991 case 0x1:
1992 if (pictPos <= 0) {
1993 MWAW_DEBUG_MSG(("MsWrdText::sendText: can not find picture\n"));
1994 f << "###";
1995 break;
1996 }
1997 m_mainParser->sendPicture(pictPos, int(cPos), MWAWPosition::Char);
1998 break;
1999 case 0x7: // FIXME: cell end ?
2000 listener->insertEOL();
2001 break;
2002 case 0xc: // end section (ok)
2003 break;
2004 case 0x2:
2005 listener->insertField(MWAWField(MWAWField::PageNumber));
2006 break;
2007 case 0x6:
2008 listener->insertChar('\\');
2009 break;
2010 case 0x1e: // unbreaking - ?
2011 listener->insertChar('-');
2012 break;
2013 case 0x1f: // hyphen
2014 break;
2015 case 0x13: // month
2016 case 0x1a: // month abreviated
2017 case 0x1b: { // checkme month long
2018 MWAWField field(MWAWField::Date);
2019 field.m_DTFormat = "%m";
2020 listener->insertField(field);
2021 break;
2022 }
2023 case 0x10: // day
2024 case 0x16: // checkme: day abbreviated
2025 case 0x17: { // checkme: day long
2026 MWAWField field(MWAWField::Date);
2027 field.m_DTFormat = "%d";
2028 listener->insertField(field);
2029 break;
2030 }
2031 case 0x15: { // year
2032 MWAWField field(MWAWField::Date);
2033 field.m_DTFormat = "%y";
2034 listener->insertField(field);
2035 break;
2036 }
2037 case 0x1d: {
2038 MWAWField field(MWAWField::Date);
2039 field.m_DTFormat = "%b %d, %Y";
2040 listener->insertField(field);
2041 break;
2042 }
2043 case 0x18: // checkme hour
2044 case 0x19: { // checkme hour
2045 MWAWField field(MWAWField::Time);
2046 field.m_DTFormat = "%H";
2047 listener->insertField(field);
2048 break;
2049 }
2050 case 0x3: // v3
2051 listener->insertField(MWAWField(MWAWField::Date));
2052 break;
2053 case 0x4:
2054 listener->insertField(MWAWField(MWAWField::Time));
2055 break;
2056 case 0x5: // footnote mark (ok)
2057 break;
2058 case 0x9:
2059 listener->insertTab();
2060 break;
2061 case 0xb: // line break (simple but no a paragraph break ~soft)
2062 if (cPos!=cEnd)
2063 listener->insertEOL(true);
2064 break;
2065 case 0xd: // line break hard
2066 if (cPos!=cEnd)
2067 listener->insertEOL();
2068 break;
2069 case 0x11: // command key in help
2070 listener->insertUnicode(0x2318);
2071 break;
2072 case 0x14: // apple logo ( note only in private zone)
2073 listener->insertUnicode(0xf8ff);
2074 break;
2075 default:
2076 p+=listener->insertCharacter(static_cast<unsigned char>(c), input, input->tell()+(cEndPos-1-p));
2077 break;
2078 }
2079 if (c)
2080 f << char(c);
2081 else
2082 f << "###";
2083 }
2084 }
2085
2086 ascFile.addPos(debPos);
2087 ascFile.addNote(f.str().c_str());
2088 ascFile.addPos(input->tell());
2089 ascFile.addNote("_");
2090 return true;
2091 }
2092
sendSection(int secId)2093 bool MsWrdText::sendSection(int secId)
2094 {
2095 int textStructId=-1;
2096 if (!m_state->m_textposList.empty() &&
2097 secId>=0 && secId+1<static_cast<int>(m_state->m_sectionLimitList.size())) {
2098 int tId=m_state->getTextStructId
2099 (m_state->m_sectionLimitList[size_t(secId)+1]-1);
2100 if (tId>=0 && tId<static_cast<int>(m_state->m_textposList.size()))
2101 textStructId=m_state->m_textposList[size_t(tId)].getParagraphId();
2102 }
2103 return m_stylesManager->sendSection(secId, textStructId);
2104 }
2105
2106 ////////////////////////////////////////////////////////////
2107 // try to read a table
2108 ////////////////////////////////////////////////////////////
sendTable(MsWrdTextInternal::Table const & table)2109 bool MsWrdText::sendTable(MsWrdTextInternal::Table const &table)
2110 {
2111 MWAWTextListenerPtr listener=m_parserState->m_textListener;
2112 if (!listener) {
2113 MWAW_DEBUG_MSG(("MsWrdText::sendTable: can not find a listener!\n"));
2114 return true;
2115 }
2116 size_t nCells = table.m_cellPos.size();
2117 if (nCells < 1) {
2118 MWAW_DEBUG_MSG(("MsWrdText::sendTable: numcols pos is bad\n"));
2119 return true;
2120 }
2121
2122 size_t numCols = table.getColsSize().size()+1;
2123 size_t numRows = nCells/numCols;
2124
2125 float height = table.m_height;
2126 if (height > 0) height*=-1;
2127
2128 listener->openTable(table);
2129 size_t numCells = table.m_cells.size();
2130 for (size_t r = 0; r < numRows; r++) {
2131 listener->openTableRow(height, librevenge::RVNG_INCH);
2132 for (size_t c = 0; c < numCols-1; c++) {
2133 MWAWCell cell;
2134 size_t cellPos = r*numCols+c;
2135 if (cellPos < numCells && table.m_cells[cellPos].isSet()) {
2136 int const wh[] = { libmwaw::TopBit, libmwaw::LeftBit,
2137 libmwaw::BottomBit, libmwaw::RightBit
2138 };
2139 auto const &tCell = table.m_cells[cellPos].get();
2140 for (size_t i = 0; i < 4 && i < tCell.m_borders.size(); i++) {
2141 if (!tCell.m_borders[i].isSet() ||
2142 tCell.m_borders[i]->m_style==MWAWBorder::None) continue;
2143 cell.setBorders(wh[i], tCell.m_borders[i].get());
2144 }
2145 if (tCell.m_backColor.isSet()) {
2146 auto col = static_cast<unsigned char>(tCell.m_backColor.get()*255.f);
2147 cell.setBackgroundColor(MWAWColor(col,col,col));
2148 }
2149 else if (!table.m_backgroundColor.isWhite())
2150 cell.setBackgroundColor(table.m_backgroundColor);
2151 }
2152 cell.setPosition(MWAWVec2i(static_cast<int>(c),static_cast<int>(r)));
2153
2154 listener->openTableCell(cell);
2155
2156 MsWrdEntry textData;
2157 textData.setBegin(table.m_cellPos[cellPos]);
2158 long cEndPos = table.m_cellPos[cellPos+1]-1;
2159 textData.setEnd(cEndPos);
2160 if (textData.length()<=0)
2161 listener->insertChar(' ');
2162 else
2163 sendText(textData, false, true);
2164 #if defined(DEBUG_WITH_FILES)
2165 m_state->debugFile2() << "TextContent["<<cEndPos<<"]:" << char(7) << "\n";
2166 #endif
2167 listener->closeTableCell();
2168 }
2169 listener->closeTableRow();
2170 }
2171 listener->closeTable();
2172 return true;
2173 }
2174
sendMainText()2175 bool MsWrdText::sendMainText()
2176 {
2177 MWAWEntry entry;
2178 entry.setBegin(0);
2179 entry.setLength(m_state->m_textLength[0]);
2180 sendText(entry, true);
2181 return true;
2182 }
2183
sendFootnote(int id)2184 bool MsWrdText::sendFootnote(int id)
2185 {
2186 MWAWTextListenerPtr listener=m_parserState->m_textListener;
2187 if (!listener) return true;
2188 if (id < 0 || id >= int(m_state->m_footnoteList.size())) {
2189 MWAW_DEBUG_MSG(("MsWrdText::sendFootnote: can not find footnote %d\n", id));
2190 listener->insertChar(' ');
2191 return false;
2192 }
2193 auto const &footnote = m_state->m_footnoteList[size_t(id)];
2194 if (footnote.m_pos.isParsed())
2195 listener->insertChar(' ');
2196 else
2197 sendText(footnote.m_pos, false);
2198 footnote.m_pos.setParsed();
2199 return true;
2200 }
2201
sendFieldComment(int id)2202 bool MsWrdText::sendFieldComment(int id)
2203 {
2204 MWAWTextListenerPtr listener=m_parserState->m_textListener;
2205 if (!listener) return true;
2206 if (id < 0 || id >= int(m_state->m_fieldList.size())) {
2207 MWAW_DEBUG_MSG(("MsWrdText::sendFieldComment: can not find field %d\n", id));
2208 listener->insertChar(' ');
2209 return false;
2210 }
2211 MsWrdStruct::Font defFont;
2212 defFont.m_font = m_stylesManager->getDefaultFont();
2213 m_stylesManager->setProperty(defFont);
2214 m_stylesManager->sendDefaultParagraph();
2215 std::string const &text = m_state->m_fieldList[size_t(id)].m_text;
2216 if (!text.length()) listener->insertChar(' ');
2217 for (char c : text)
2218 listener->insertCharacter(static_cast<unsigned char>(c));
2219 return true;
2220 }
2221
flushExtra()2222 void MsWrdText::flushExtra()
2223 {
2224 #ifdef DEBUG
2225 if (m_state->m_textLength[1]) {
2226 for (auto footnote : m_state->m_footnoteList) {
2227 if (footnote.m_pos.isParsed()) continue;
2228 sendText(footnote.m_pos, false);
2229 footnote.m_pos.setParsed();
2230 }
2231 }
2232 #endif
2233 }
2234
2235
2236 // vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab:
2237