1 /* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */
2 
3 /* libmwaw
4 * Version: MPL 2.0 / LGPLv2+
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 2.0 (the "License"); you may not use this file except in compliance with
8 * the License or as specified alternatively below. You may obtain a copy of
9 * the License at http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Major Contributor(s):
17 * Copyright (C) 2002 William Lachance (wrlach@gmail.com)
18 * Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
19 * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
20 * Copyright (C) 2006, 2007 Andrew Ziem
21 * Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
22 *
23 *
24 * All Rights Reserved.
25 *
26 * For minor contributions see the git repository.
27 *
28 * Alternatively, the contents of this file may be used under the terms of
29 * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
30 * in which case the provisions of the LGPLv2+ are applicable
31 * instead of those above.
32 */
33 
34 #include <cstring>
35 #include <iomanip>
36 #include <iostream>
37 #include <limits>
38 #include <map>
39 #include <sstream>
40 
41 #include <librevenge/librevenge.h>
42 
43 #include "MWAWTextListener.hxx"
44 #include "MWAWDebug.hxx"
45 #include "MWAWFont.hxx"
46 #include "MWAWFontConverter.hxx"
47 #include "MWAWInputStream.hxx"
48 #include "MWAWPageSpan.hxx"
49 #include "MWAWParagraph.hxx"
50 #include "MWAWPosition.hxx"
51 #include "MWAWSection.hxx"
52 #include "MWAWSubDocument.hxx"
53 
54 #include "BeagleWksParser.hxx"
55 #include "BeagleWksStructManager.hxx"
56 
57 #include "BeagleWksText.hxx"
58 
59 /** Internal: the structures of a BeagleWksText */
60 namespace BeagleWksTextInternal
61 {
62 ////////////////////////////////////////
63 //! Internal: a class used to store the font data of a BeagleWksText
64 struct Font {
65   //! constructor
FontBeagleWksTextInternal::Font66   Font()
67     : m_id(0)
68     , m_size(12)
69     , m_flags(0)
70     , m_color(0)
71     , m_extra()
72   {
73   }
74   /** returns a MWAWFont.
75       \note the font id remains filled with the local id */
getFontBeagleWksTextInternal::Font76   MWAWFont getFont() const
77   {
78     MWAWFont res(m_id,float(m_size));
79     uint32_t flags=0;
80     if (m_flags&1) flags |= MWAWFont::boldBit;
81     if (m_flags&2) flags |= MWAWFont::italicBit;
82     if (m_flags&4) res.setUnderlineStyle(MWAWFont::Line::Simple);
83     if (m_flags&8) flags |= MWAWFont::embossBit;
84     if (m_flags&0x10) flags |= MWAWFont::shadowBit;
85     if (m_flags&0x100) res.set(MWAWFont::Script::super());
86     if (m_flags&0x200) res.set(MWAWFont::Script::sub());
87     if (m_flags&0x400) flags |= MWAWFont::uppercaseBit;
88     if (m_flags&0x800) flags |= MWAWFont::lowercaseBit;
89     res.setFlags(flags);
90     switch (m_color) {
91     case 63:
92       res.setColor(MWAWColor::white());
93       break;
94     case 100:
95       res.setColor(MWAWColor(0xFF,0xFF,0));
96       break;
97     case 168:
98       res.setColor(MWAWColor(0xFF,0,0xFF));
99       break;
100     case 236:
101       res.setColor(MWAWColor(0xFF,0,0));
102       break;
103     case 304:
104       res.setColor(MWAWColor(0,0xFF,0xFF));
105       break;
106     case 372:
107       res.setColor(MWAWColor(0,0xFF,0));
108       break;
109     case 440:
110       res.setColor(MWAWColor(0,0,0xFF));
111       break;
112     default:
113       break;
114     }
115     return res;
116   }
117   //! operator<<
operator <<(std::ostream & o,Font const & fnt)118   friend std::ostream &operator<<(std::ostream &o, Font const &fnt)
119   {
120     if (fnt.m_id) o << "id=" << fnt.m_id << ",";
121     if (fnt.m_size!=12) o << "sz=" << fnt.m_size << ",";
122     if (fnt.m_flags&1) o << "b,";
123     if (fnt.m_flags&2) o << "it,";
124     if (fnt.m_flags&4) o << "underline,";
125     if (fnt.m_flags&8) o << "outline,";
126     if (fnt.m_flags&0x10) o << "shadow,";
127     if (fnt.m_flags&0x100) o << "sup,";
128     if (fnt.m_flags&0x200) o << "sub,";
129     if (fnt.m_flags&0x400) o << "uppercase,";
130     if (fnt.m_flags&0x800) o << "lowercase,";
131     if (fnt.m_flags&0xF0E0)
132       o << "fl=" << std::hex << (fnt.m_flags&0xF0E0) << std::dec << ",";
133     switch (fnt.m_color) {
134     case 0: // black
135       break;
136     case 63:
137       o << "white,";
138       break;
139     case 100:
140       o << "yellow,";
141       break;
142     case 168:
143       o << "magenta,";
144       break;
145     case 236:
146       o << "red,";
147       break;
148     case 304:
149       o << "cyan,";
150       break;
151     case 372:
152       o << "green,";
153       break;
154     case 440:
155       o << "blue,";
156       break;
157     default:
158       o << "#color=" << fnt.m_color << ",";
159       break;
160     }
161     o << fnt.m_extra;
162     return o;
163   }
164 
165   //! the font id
166   int m_id;
167   //! the font size
168   int m_size;
169   //! the font flags
170   int m_flags;
171   //! the font color
172   int m_color;
173   //! extra data
174   std::string m_extra;
175 };
176 ////////////////////////////////////////
177 //! Internal: a class used to store the section data of a BeagleWksText
178 struct Section final : public MWAWSection {
179   //! constructor
SectionBeagleWksTextInternal::Section180   Section()
181     : MWAWSection()
182     , m_ruler()
183     , m_hasFirstPage(false)
184     , m_hasHeader(false)
185     , m_hasFooter(false)
186     , m_pageNumber(1)
187     , m_usePageNumber(false)
188     , m_extra("")
189   {
190     for (auto &i : m_limitPos) i=0;
191     for (auto &i : m_parsed) i=false;
192     m_heights[0]=m_heights[1]=0;
193     m_balanceText=true;
194   }
195   Section(Section const &)=default;
196   Section &operator=(Section const &)=default;
197   Section &operator=(Section &&)=default;
198   //! destructor
199   ~Section() final;
200   //! return the i^th entry
getEntryBeagleWksTextInternal::Section201   MWAWEntry getEntry(int i) const
202   {
203     MWAWEntry res;
204     if (i<0||i>=4) {
205       MWAW_DEBUG_MSG(("BeagleWksTextInternal::getEntry: called with bad id=%d\n",i));
206       return res;
207     }
208     if (m_limitPos[i]<=0)
209       return res;
210     res.setBegin(m_limitPos[i]);
211     res.setEnd(m_limitPos[i+1]-2);
212     return res;
213   }
214   //! return the header entry
getHeaderEntryBeagleWksTextInternal::Section215   MWAWEntry getHeaderEntry(bool fPage) const
216   {
217     return getEntry(fPage?0:2);
218   }
219   //! return true if we have a header
getFooterEntryBeagleWksTextInternal::Section220   MWAWEntry getFooterEntry(bool fPage) const
221   {
222     return getEntry(fPage?1:3);
223   }
224   //! operator<<
operator <<(std::ostream & o,Section const & sec)225   friend std::ostream &operator<<(std::ostream &o, Section const &sec)
226   {
227     o << static_cast<MWAWSection const &>(sec);
228     for (int i=0; i<4; ++i) {
229       if (sec.m_limitPos[i+1]<=sec.m_limitPos[i]+2)
230         continue;
231       static char const *wh[] = {"header[fP]", "footer[fP]", "header", "footer"};
232       o << wh[i] << "=" << std::hex << sec.m_limitPos[i]
233         << "->" << sec.m_limitPos[i+1] << std::hex << ",";
234     }
235     if (sec.m_hasFirstPage) o << "firstPage[special],";
236     if (!sec.m_hasHeader) o << "hide[header],";
237     else if (sec.m_heights[0]) o << "h[header]=" << sec.m_heights[0] << ",";
238     if (!sec.m_hasFooter) o << "hide[footer],";
239     else if (sec.m_heights[1]) o << "h[footer]=" << sec.m_heights[1] << ",";
240     if (sec.m_pageNumber != 1) o << "pagenumber=" << sec.m_pageNumber << ",";
241     if (sec.m_usePageNumber) o << "pagenumber[use],";
242     o << sec.m_extra;
243     return o;
244   }
245   //! the default section ruler
246   MWAWParagraph m_ruler;
247   //! a flag to know if the first page is special
248   bool m_hasFirstPage;
249   //! a flag to know if we need to print the header
250   bool m_hasHeader;
251   //! a flag to know if we need to print the footer
252   bool m_hasFooter;
253   //! the data limits ( first page header, first page footer, header, footer, end)
254   long m_limitPos[5];
255   //! true if the data are send to the listener
256   mutable bool m_parsed[4];
257   //! the header/footer height
258   int m_heights[2];
259   //! the page number
260   int m_pageNumber;
261   //! true if we need to use the page number
262   bool m_usePageNumber;
263   //! extra data
264   std::string m_extra;
265 };
266 
~Section()267 Section::~Section()
268 {
269 }
270 
271 ////////////////////////////////////////
272 //! Internal: the state of a BeagleWksText
273 struct State {
274   //! constructor
StateBeagleWksTextInternal::State275   State()
276     : m_textEntry()
277     , m_sectionList()
278     , m_numPagesBySectionList()
279     , m_version(-1)
280     , m_numPages(-1)
281     , m_actualPage(1)
282   {
283   }
284   //! the main text entry
285   MWAWEntry m_textEntry;
286   //! the section list
287   std::vector<Section> m_sectionList;
288   //! the number of page by section
289   std::vector<int> m_numPagesBySectionList;
290   //! the file version
291   mutable int m_version;
292   int m_numPages /* the number of pages */, m_actualPage /* the actual page */;
293 };
294 
295 ////////////////////////////////////////
296 //! Internal: the subdocument of a BeagleWksText
297 class SubDocument final : public MWAWSubDocument
298 {
299 public:
SubDocument(BeagleWksText & pars,MWAWInputStreamPtr & input,int hFId,int sId)300   SubDocument(BeagleWksText &pars, MWAWInputStreamPtr &input, int hFId, int sId)
301     : MWAWSubDocument(pars.m_mainParser, input, MWAWEntry())
302     , m_textParser(&pars)
303     , m_hfId(hFId)
304     , m_sectId(sId)
305   {
306   }
307 
308   //! destructor
~SubDocument()309   ~SubDocument() final {}
310 
311   //! operator!=
312   bool operator!=(MWAWSubDocument const &doc) const final;
313 
314   //! the parser function
315   void parse(MWAWListenerPtr &listener, libmwaw::SubDocumentType type) final;
316 
317 protected:
318   /** the text parser */
319   BeagleWksText *m_textParser;
320   //! the header/footer id
321   int m_hfId;
322   //! the section id
323   int m_sectId;
324 private:
325   SubDocument(SubDocument const &orig) = delete;
326   SubDocument &operator=(SubDocument const &orig) = delete;
327 };
328 
operator !=(MWAWSubDocument const & doc) const329 bool SubDocument::operator!=(MWAWSubDocument const &doc) const
330 {
331   if (MWAWSubDocument::operator!=(doc)) return true;
332   auto const *sDoc = dynamic_cast<SubDocument const *>(&doc);
333   if (!sDoc) return true;
334   if (m_textParser != sDoc->m_textParser) return true;
335   if (m_hfId != sDoc->m_hfId) return true;
336   if (m_sectId != sDoc->m_sectId) return true;
337   return false;
338 }
339 
parse(MWAWListenerPtr & listener,libmwaw::SubDocumentType)340 void SubDocument::parse(MWAWListenerPtr &listener, libmwaw::SubDocumentType /*type*/)
341 {
342   if (!listener.get()) {
343     MWAW_DEBUG_MSG(("BeagleWksTextInternal::SubDocument::parse: no listener\n"));
344     return;
345   }
346   if (!m_textParser) {
347     MWAW_DEBUG_MSG(("BeagleWksTextInternal::SubDocument::parse: no text parser\n"));
348     return;
349   }
350 
351   long pos = m_input->tell();
352   m_textParser->sendHF(m_hfId, m_sectId);
353   m_input->seek(pos, librevenge::RVNG_SEEK_SET);
354 }
355 }
356 
357 ////////////////////////////////////////////////////////////
358 // constructor/destructor, ...
359 ////////////////////////////////////////////////////////////
BeagleWksText(BeagleWksParser & parser)360 BeagleWksText::BeagleWksText(BeagleWksParser &parser)
361   : m_parserState(parser.getParserState())
362   , m_state(new BeagleWksTextInternal::State)
363   , m_structureManager(parser.m_structureManager)
364   , m_mainParser(&parser)
365 {
366 }
367 
~BeagleWksText()368 BeagleWksText::~BeagleWksText()
369 {
370 }
371 
version() const372 int BeagleWksText::version() const
373 {
374   if (m_state->m_version < 0)
375     m_state->m_version = m_parserState->m_version;
376   return m_state->m_version;
377 }
378 
numPages() const379 int BeagleWksText::numPages() const
380 {
381   if (m_state->m_numPages <= 0)
382     const_cast<BeagleWksText *>(this)->countPages();
383   return m_state->m_numPages;
384 }
385 
getFont(BeagleWksTextInternal::Font const & ft) const386 MWAWFont BeagleWksText::getFont(BeagleWksTextInternal::Font const &ft) const
387 {
388   MWAWFont font=ft.getFont();
389   font.setId(m_structureManager->getFontId(font.id()));
390   return font;
391 }
392 
getHeader(int page,int & numSimilar)393 std::shared_ptr<MWAWSubDocument> BeagleWksText::getHeader(int page, int &numSimilar)
394 {
395   numSimilar=1;
396   std::shared_ptr<MWAWSubDocument> res;
397   int actPage=0, newSectionPage=0;
398   size_t s=0;
399   for (; s < m_state->m_numPagesBySectionList.size(); s++) {
400     newSectionPage+=m_state->m_numPagesBySectionList[s];
401     if (newSectionPage>page)
402       break;
403     actPage=newSectionPage;
404   }
405   if (s >= m_state->m_sectionList.size()) {
406     if (m_state->m_numPages>page)
407       numSimilar=m_state->m_numPages-page+1;
408     return res;
409   }
410   auto const &sec=m_state->m_sectionList[s];
411   bool useFPage=page==actPage && sec.m_hasFirstPage;
412   if (!useFPage)
413     numSimilar=newSectionPage-page;
414   if (sec.getHeaderEntry(useFPage).valid())
415     res.reset(new BeagleWksTextInternal::SubDocument
416               (*this, m_parserState->m_input, useFPage?0:2, int(s)));
417   return res;
418 }
419 
getFooter(int page,int & numSimilar)420 std::shared_ptr<MWAWSubDocument> BeagleWksText::getFooter(int page, int &numSimilar)
421 {
422   numSimilar=1;
423   std::shared_ptr<MWAWSubDocument> res;
424   int actPage=0, newSectionPage=0;
425   size_t s=0;
426   for (; s < m_state->m_numPagesBySectionList.size(); s++) {
427     newSectionPage+=m_state->m_numPagesBySectionList[s];
428     if (newSectionPage>page)
429       break;
430     actPage=newSectionPage;
431   }
432   if (s >= m_state->m_sectionList.size()) {
433     if (m_state->m_numPages>page)
434       numSimilar=m_state->m_numPages-page+1;
435     return res;
436   }
437   auto const &sec=m_state->m_sectionList[s];
438   bool useFPage=page==actPage && sec.m_hasFirstPage;
439   if (!useFPage)
440     numSimilar=newSectionPage-page;
441   if (sec.getFooterEntry(useFPage).valid())
442     res.reset(new BeagleWksTextInternal::SubDocument
443               (*this, m_parserState->m_input, useFPage?1:3, int(s)));
444   return res;
445 }
446 
447 ////////////////////////////////////////////////////////////
448 // Intermediate level
449 ////////////////////////////////////////////////////////////
450 
451 //
452 // find the different zones
453 //
createZones(MWAWEntry & entry)454 bool BeagleWksText::createZones(MWAWEntry &entry)
455 {
456   if (!entry.valid() || entry.length()<22) {
457     MWAW_DEBUG_MSG(("BeagleWksText::createZones: the entry seems bad\n"));
458     return false;
459   }
460 
461   MWAWInputStreamPtr &input= m_parserState->m_input;
462   long pos=entry.begin();
463   input->seek(pos+4, librevenge::RVNG_SEEK_SET);
464   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
465   libmwaw::DebugStream f;
466   f << "Entries(THeader):";
467   long val=input->readLong(4); // always 0x238
468   if (val!=0x238)
469     f << "f0=" << val << ",";
470   val=input->readLong(2);
471   if (val!=1)
472     f << "f1=" << val << ",";
473   val=long(input->readULong(4));
474   if (val >= entry.length())
475     val = 0;
476   auto nSections=int(entry.length()-val);
477   if (val<22|| nSections<6 || (nSections%6)) {
478     f << "###";
479     MWAW_DEBUG_MSG(("BeagleWksText::createZones: the data size seems bad\n"));
480     return false;
481   }
482   long endPos = pos+val;
483   nSections/=6;
484   for (int i=0; i<2; i++) { // f2=0, f3=6
485     val=input->readLong(2);
486     if (val)
487       f << "f" << i+2 << "=" << val << ",";
488   }
489   val=input->readLong(2);
490   f << "nSect=" << val << ",";
491   if (val!=nSections) {
492     f << "###";
493     MWAW_DEBUG_MSG(("BeagleWksText::createZones: the number of sections/pages seems bad\n"));
494   }
495   // checkme: after junk ?
496   ascFile.addDelimiter(input->tell(),'|');
497   ascFile.addPos(pos);
498   ascFile.addNote(f.str().c_str());
499 
500   input->seek(endPos, librevenge::RVNG_SEEK_SET);
501   std::vector<MWAWEntry> listEntries;
502   listEntries.reserve(size_t(nSections));
503   f.str("");
504   f << "Entries(Text):";
505   for (int i=0; i <nSections; ++i) {
506     pos=input->tell();
507     MWAWEntry pEntry;
508     pEntry.setBegin(entry.begin()+long(input->readULong(4)));
509     pEntry.setLength(long(input->readULong(2)));
510     f << std::hex << pEntry.begin() << "<->" << pEntry.end() << std::dec << ",";
511     if (!pEntry.valid() || pEntry.begin()+16 < entry.begin()
512         || pEntry.end()>endPos) {
513       pEntry=MWAWEntry();
514       f << "###";
515       MWAW_DEBUG_MSG(("BeagleWksText::createZones: the page entry %d seems bad\n", i));
516     }
517     listEntries.push_back(pEntry);
518     input->seek(pos+6, librevenge::RVNG_SEEK_SET);
519   }
520   ascFile.addPos(endPos);
521   ascFile.addNote(f.str().c_str());
522 
523   size_t p=0;
524   m_state->m_textEntry.setBegin(listEntries[0].begin());
525 
526   for (p=0; p+1 < listEntries.size(); ++p) {
527     if (!listEntries[p].valid())
528       continue;
529     if (p) {
530       // use the section signature to diffentiate text/section (changeme)
531       input->seek(listEntries[p].begin(), librevenge::RVNG_SEEK_SET);
532       if (input->readLong(2)==0xdc)
533         break;
534     }
535     m_state->m_textEntry.setEnd(listEntries[p].end());
536   }
537   for (; p < listEntries.size(); ++p) {
538     BeagleWksTextInternal::Section sec;
539     if (listEntries[p].valid() && !readSection(listEntries[p], sec))
540       sec = BeagleWksTextInternal::Section();
541     m_state->m_sectionList.push_back(sec);
542   }
543   input->seek(entry.end(), librevenge::RVNG_SEEK_SET);
544   return m_state->m_textEntry.valid();
545 }
546 
countPages()547 void BeagleWksText::countPages()
548 {
549   if (!m_state->m_textEntry.valid()) {
550     MWAW_DEBUG_MSG(("BeagleWksText::countPages: can not find the main entry\n"));
551     return;
552   }
553   MWAWInputStreamPtr &input= m_parserState->m_input;
554   long pos=m_state->m_textEntry.begin(), endPos=m_state->m_textEntry.end();
555   input->seek(pos, librevenge::RVNG_SEEK_SET);
556   int nSectPages=0, nPages=1;
557   while (!input->isEnd()) {
558     pos=input->tell();
559     if (pos>=endPos) break;
560     auto c = static_cast<unsigned char>(input->readULong(1));
561     if (c) continue;
562     c=static_cast<unsigned char>(input->readULong(1));
563     bool done=false;
564     input->seek(pos, librevenge::RVNG_SEEK_SET);
565     switch (c) {
566     case 0: {
567       BeagleWksTextInternal::Font font;
568       done=readFont(font,endPos);
569       break;
570     }
571     case 1: {
572       MWAWParagraph para;
573       done=readParagraph(para,endPos);
574       break;
575     }
576     case 2:
577       if (pos+6 > endPos)
578         break;
579       input->seek(4, librevenge::RVNG_SEEK_CUR);
580       done = input->readLong(2)==0x200;
581       break;
582     case 3: { // type 3:page 4:section
583       if (pos+6 > endPos)
584         break;
585       input->seek(2, librevenge::RVNG_SEEK_CUR);
586       auto type=static_cast<int>(input->readLong(2));
587       if (input->readLong(2)!=0x300)
588         break;
589       if (type==3) {
590         nSectPages++;
591         nPages++;
592       }
593       else if (type==4) {
594         m_state->m_numPagesBySectionList.push_back(nSectPages);
595         nSectPages=0;
596       }
597       done=true;
598       break;
599     }
600     case 4: // picture
601       if (pos+8 > endPos)
602         break;
603       input->seek(6, librevenge::RVNG_SEEK_CUR);
604       done = input->readLong(2)==0x400;
605       break;
606     case 5: // a field
607       if (pos+36 > endPos)
608         break;
609       input->seek(34, librevenge::RVNG_SEEK_CUR);
610       done=input->readLong(2)==0x500;
611       break;
612     default:
613       break;
614     }
615     if (!done)
616       break;
617   }
618 }
619 
620 //
621 // send the text
622 //
sendMainText()623 bool BeagleWksText::sendMainText()
624 {
625   return sendText(m_state->m_textEntry);
626 }
627 
sendHF(int hfId,int sectId)628 bool BeagleWksText::sendHF(int hfId, int sectId)
629 {
630   if (hfId<0||hfId>=4) {
631     MWAW_DEBUG_MSG(("BeagleWksText::sendHF: hfId=%d is bad\n", hfId));
632     return false;
633   }
634   if (sectId<0||sectId>=static_cast<int>(m_state->m_sectionList.size())) {
635     MWAW_DEBUG_MSG(("BeagleWksText::sendHF: can not find section %d\n", sectId));
636     return false;
637   }
638 
639   MWAWInputStreamPtr &input= m_parserState->m_input;
640   long pos=input->tell();
641   auto const &sec=m_state->m_sectionList[size_t(sectId)];
642   sec.m_parsed[hfId]=true;
643   bool ok=sendText(sec.getEntry(hfId));
644   input->seek(pos,librevenge::RVNG_SEEK_SET);
645   return ok;
646 }
647 
flushExtra()648 void BeagleWksText::flushExtra()
649 {
650   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
651   for (auto const &sec : m_state->m_sectionList) {
652     for (int j=0; j < 4; ++j) {
653       if (sec.m_parsed[j])
654         continue;
655       MWAWEntry hfEntry=sec.getEntry(j);
656       if (!hfEntry.valid()) {
657         if (hfEntry.begin()>0) {
658           ascFile.addPos(hfEntry.begin());
659           ascFile.addNote("_");
660         }
661         continue;
662       }
663       sendText(hfEntry);
664     }
665   }
666 }
667 
sendText(MWAWEntry const & entry)668 bool BeagleWksText::sendText(MWAWEntry const &entry)
669 {
670   MWAWTextListenerPtr listener=m_parserState->m_textListener;
671   if (!listener) {
672     MWAW_DEBUG_MSG(("BeagleWksText::sendText: can not find the listener\n"));
673     return false;
674   }
675   if (!entry.valid()) {
676     MWAW_DEBUG_MSG(("BeagleWksText::sendText: can not find the entry\n"));
677     return false;
678   }
679 
680   MWAWInputStreamPtr &input= m_parserState->m_input;
681   long pos=entry.begin(), debPos=pos, endPos=entry.end();
682   bool isMain=entry.begin()==m_state->m_textEntry.begin();
683   size_t actSection=0, numSection=isMain ? m_state->m_sectionList.size() : 0;
684   if (actSection<numSection) {
685     if (listener->isSectionOpened())
686       listener->closeSection();
687     listener->openSection(m_state->m_sectionList[actSection++]);
688   }
689 
690   input->seek(pos, librevenge::RVNG_SEEK_SET);
691   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
692   libmwaw::DebugStream f;
693   f << "Text:";
694   BeagleWksTextInternal::Font font;
695   listener->setFont(getFont(font));
696   int actPage = 1, sectPage=1;
697   while (!input->isEnd()) {
698     pos=input->tell();
699     bool last=pos==endPos;
700     unsigned char c = last ? static_cast<unsigned char>(0) :
701                       static_cast<unsigned char>(input->readULong(1));
702     if ((c==0 || c==0xd) && pos!=debPos) {
703       ascFile.addPos(debPos);
704       ascFile.addNote(f.str().c_str());
705       debPos=(c==0xd) ? pos+1:pos;
706       f.str("");
707       f << "Text:";
708     }
709     if (last) break;
710     if (c) {
711       f << c;
712       switch (c) {
713       case 0x1: // end zone marker, probably save to ignore
714         break;
715       case 0x9:
716         listener->insertTab();
717         break;
718       case 0xd:
719         listener->insertEOL();
720         break;
721       default:
722         listener->insertCharacter(static_cast<unsigned char>(c));
723         break;
724       }
725       continue;
726     }
727     c=static_cast<unsigned char>(input->readULong(1));
728     bool done=false;
729     input->seek(pos, librevenge::RVNG_SEEK_SET);
730     switch (c) {
731     case 0:
732       if (!readFont(font,endPos))
733         break;
734       done=true;
735       listener->setFont(getFont(font));
736       break;
737     case 1: {
738       MWAWParagraph para;
739       if (!readParagraph(para,endPos))
740         break;
741       done=true;
742       listener->setParagraph(para);
743       break;
744     }
745     case 2: {
746       if (pos+6 > endPos)
747         break;
748       input->seek(2, librevenge::RVNG_SEEK_CUR);
749       auto type=static_cast<int>(input->readLong(2));
750       if (input->readLong(2)!=0x200)
751         break;
752       f.str("");
753       f << "Entries(Field):";
754       switch (type) {
755       case 0:
756       case 1: {
757         std::stringstream s;
758         if (type==0) {
759           f << "pagenumber[section]";
760           s << sectPage;
761         }
762         else {
763           f << "section";
764           s << actSection;
765         }
766         listener->insertUnicodeString(librevenge::RVNGString(s.str().c_str()));
767         break;
768       }
769       case 2:
770         listener->insertField(MWAWField(MWAWField::PageNumber));
771         f << "pagenumber";
772         break;
773       case 3:
774         listener->insertField(MWAWField(MWAWField::Date));
775         f << "date";
776         break;
777       case 4: {
778         MWAWField field(MWAWField::Time);
779         field.m_DTFormat="%H:%M";
780         listener->insertField(field);
781         f << "time";
782         break;
783       }
784       default:
785         MWAW_DEBUG_MSG(("BeagleWksText::sendText: find unknown field type=%d\n", type));
786         f << "#type=" << type << ",";
787         break;
788       }
789       ascFile.addPos(pos);
790       ascFile.addNote(f.str().c_str());
791       done=true;
792       break;
793     }
794     case 3: {
795       if (pos+6 > endPos)
796         break;
797       input->seek(2, librevenge::RVNG_SEEK_CUR);
798       auto type=static_cast<int>(input->readLong(2));
799       if (input->readLong(2)!=0x300)
800         break;
801       f.str("");
802       f << "Entries(Break):";
803       switch (type) {
804       case 3:
805         f << "pagebreak";
806         sectPage++;
807         if (!isMain) break;
808         m_mainParser->newPage(++actPage);
809         break;
810       case 4:
811         f << "sectionbreak";
812         sectPage=1;
813         if (!isMain) break;
814         if (actSection<numSection) {
815           if (listener->isSectionOpened())
816             listener->closeSection();
817           listener->openSection(m_state->m_sectionList[actSection++]);
818         }
819         else {
820           MWAW_DEBUG_MSG(("BeagleWksText::sendText: can not find the new section\n"));
821         }
822         break;
823       default:
824         MWAW_DEBUG_MSG(("BeagleWksText::sendText: find unknown break type=%d\n", type));
825         f << "#type=" << type << ",";
826         break;
827       }
828       ascFile.addPos(pos);
829       ascFile.addNote(f.str().c_str());
830       done=true;
831       break;
832     }
833     case 4: { // picture
834       if (pos+8 > endPos)
835         break;
836       input->seek(2, librevenge::RVNG_SEEK_CUR);
837       auto val=static_cast<int>(input->readLong(2));
838       auto id=static_cast<int>(input->readULong(2));
839       if (input->readLong(2)!=0x400)
840         break;
841       f.str("");
842       f << "Entries(Picture):id?=" << id << ",";
843       if (val) f << "f0=" << val << ",";
844       m_mainParser->sendFrame(id);
845       ascFile.addPos(pos);
846       ascFile.addNote(f.str().c_str());
847       done=true;
848       break;
849     }
850     case 5: { // a field
851       if (pos+36 > endPos)
852         break;
853       input->seek(2, librevenge::RVNG_SEEK_CUR);
854       f.str("");
855       f << "Entries(Database):";
856       auto fl=static_cast<int>(input->readULong(1)); // find 40
857       if (fl) f << "fl=" << std::hex << fl << std::dec << ",";
858       auto fSz=static_cast<int>(input->readULong(1));
859       if (fSz>30) {
860         MWAW_DEBUG_MSG(("BeagleWksText::sendText: field name size seems bad\n"));
861         fSz=0;
862         f << "###";
863       }
864       std::string name("");
865       listener->insertUnicode(0xab);
866       for (int i=0; i < fSz; ++i) {
867         auto ch=static_cast<unsigned char>(input->readULong(1));
868         listener->insertCharacter(ch);
869         name+=char(ch);
870       }
871       listener->insertUnicode(0xbb);
872       f << name;
873       input->seek(pos+34, librevenge::RVNG_SEEK_SET);
874       if (input->readLong(2)!=0x500)
875         break;
876       ascFile.addPos(pos);
877       ascFile.addNote(f.str().c_str());
878       done=true;
879       break;
880 
881     }
882     default:
883       break;
884     }
885     if (done) {
886       debPos=input->tell();
887       f.str("");
888       f << "Text:";
889       continue;
890     }
891     input->seek(pos, librevenge::RVNG_SEEK_SET);
892     break;
893   }
894   if (input->tell()!=endPos) {
895     ascFile.addPos(input->tell());
896     ascFile.addNote("Text:###");
897 
898     MWAW_DEBUG_MSG(("BeagleWksText::sendText: find extra data\n"));
899     input->seek(endPos, librevenge::RVNG_SEEK_SET);
900   }
901   ascFile.addPos(endPos);
902   ascFile.addNote("_");
903   return true;
904 }
905 
906 //////////////////////////////////////////////
907 // Fonts
908 //////////////////////////////////////////////
readFont(BeagleWksTextInternal::Font & font,long endPos)909 bool BeagleWksText::readFont(BeagleWksTextInternal::Font &font, long endPos)
910 {
911   MWAWInputStreamPtr &input= m_parserState->m_input;
912   long pos=input->tell();
913   if (pos+12 > endPos || input->readLong(2)) {
914     input->seek(pos, librevenge::RVNG_SEEK_SET);
915     return false;
916   }
917 
918   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
919   libmwaw::DebugStream f;
920   font.m_size ^= static_cast<int>(input->readLong(2));
921   font.m_flags ^= static_cast<int>(input->readULong(2));
922   font.m_color ^= static_cast<int>(input->readLong(2));
923   auto val=static_cast<int>(input->readULong(1));
924   if (val) // find b1 and 20
925     f << "#f0=" << std::hex << val << std::dec << ",";
926   font.m_id ^= static_cast<int>(input->readULong(1));
927   font.m_extra=f.str();
928   f.str("");
929   f << "Entries(FontDef):" << font;
930   ascFile.addPos(pos);
931   ascFile.addNote(f.str().c_str());
932   // now the reverse header
933   if (input->readLong(2)) {
934     input->seek(pos, librevenge::RVNG_SEEK_SET);
935     return false;
936   }
937   input->seek(pos+12, librevenge::RVNG_SEEK_SET);
938   return true;
939 }
940 
941 //////////////////////////////////////////////
942 // Paragraph
943 //////////////////////////////////////////////
readParagraph(MWAWParagraph & para,long endPos,bool inSection)944 bool BeagleWksText::readParagraph(MWAWParagraph &para, long endPos, bool inSection)
945 {
946   para=MWAWParagraph();
947   MWAWInputStreamPtr &input= m_parserState->m_input;
948   long pos=input->tell();
949   if (pos+23 > endPos) return false;
950 
951   int fSz=0;
952   if (!inSection) {
953     bool ok= input->readLong(2)==1;
954     fSz=ok ? static_cast<int>(input->readULong(1)) : 0;
955     if (!ok || fSz < 19 || pos+4+fSz > endPos) {
956       input->seek(pos, librevenge::RVNG_SEEK_SET);
957       return false;
958     }
959   }
960 
961   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
962   libmwaw::DebugStream f;
963   para.setInterline(1.+double(input->readULong(1))/10., librevenge::RVNG_PERCENT);
964   // para spacing, before/after
965   para.m_spacings[1] = para.m_spacings[2] =
966                          (double(input->readULong(1))/10.)*6./72.;
967   auto fl=static_cast<int>(input->readULong(1));
968   switch (fl&0xf) {
969   case 1: // left
970     break;
971   case 2:
972     para.m_justify=MWAWParagraph::JustificationRight;
973     break;
974   case 4:
975     para.m_justify=MWAWParagraph::JustificationCenter;
976     break;
977   case 8:
978     para.m_justify=MWAWParagraph::JustificationFull;
979     break;
980   default:
981     f << "#align=" << (fl&0xf) << ",";
982     break;
983   }
984   fl &=0xFFF0; // find 60 or 70
985   if (fl) f << "flags=" << std::hex << fl << std::dec << ",";
986   para.m_marginsUnit = librevenge::RVNG_POINT;
987   for (int i=0; i<3; ++i) // left, right, indent
988     para.m_margins[i==2 ? 0 : i+1]=double(input->readLong(4))/65536.;
989   auto nTabs=static_cast<int>(input->readLong(2));
990   if ((inSection && (nTabs < 0 || nTabs>20)) ||
991       (!inSection && 19+nTabs*6!=fSz)) {
992     MWAW_DEBUG_MSG(("BeagleWksText::readParagraph: the number of tabs seems bad\n"));
993     f << "###numTabs=" << nTabs << ",";
994     nTabs=0;
995   }
996   for (int i=0; i<nTabs; ++i) {
997     MWAWTabStop tab;
998     tab.m_position=double(input->readLong(4))/65536./72;
999     auto val=static_cast<int>(input->readLong(1));
1000     switch (val) {
1001     case 1: // left
1002       break;
1003     case 2:
1004       tab.m_alignment=MWAWTabStop::RIGHT;
1005       break;
1006     case 3:
1007       tab.m_alignment=MWAWTabStop::CENTER;
1008       break;
1009     case 4:
1010       tab.m_alignment=MWAWTabStop::DECIMAL;
1011       break;
1012     case 5:
1013       tab.m_alignment=MWAWTabStop::BAR;
1014       break;
1015     default:
1016       MWAW_DEBUG_MSG(("BeagleWksText::readParagraph: find unknown tab align=%d\n", val));
1017       f << "tabs" << i << "[#align=" << tab.m_alignment << "],";
1018       break;
1019     }
1020     auto leader=static_cast<unsigned char>(input->readULong(1));
1021     if (leader) {
1022       int unicode= m_parserState->m_fontConverter->unicode(3, leader);
1023       if (unicode==-1)
1024         tab.m_leaderCharacter =static_cast<unsigned short>(leader);
1025       else
1026         tab.m_leaderCharacter =static_cast<unsigned short>(unicode);
1027     }
1028     para.m_tabs->push_back(tab);
1029   }
1030   para.m_extra=f.str();
1031   f.str("");
1032   f << "Entries(Ruler):" << para;
1033   ascFile.addPos(pos);
1034   ascFile.addNote(f.str().c_str());
1035   if (inSection)
1036     return true;
1037   // now the reverse header
1038   if (static_cast<int>(input->readULong(1))!=fSz || input->readLong(2)!=0x100) {
1039     input->seek(pos, librevenge::RVNG_SEEK_SET);
1040     return false;
1041   }
1042   input->seek(pos+4+fSz, librevenge::RVNG_SEEK_SET);
1043   return true;
1044 }
1045 
1046 //////////////////////////////////////////////
1047 // Section
1048 //////////////////////////////////////////////
readSection(MWAWEntry const & entry,BeagleWksTextInternal::Section & sec)1049 bool BeagleWksText::readSection(MWAWEntry const &entry, BeagleWksTextInternal::Section &sec)
1050 {
1051   sec=BeagleWksTextInternal::Section();
1052   if (entry.length()<0xdc) {
1053     MWAW_DEBUG_MSG(("BeagleWksText::readSection: the entry seems bad\n"));
1054     return false;
1055   }
1056   MWAWInputStreamPtr &input= m_parserState->m_input;
1057   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1058   libmwaw::DebugStream f;
1059 
1060   long pos=entry.begin();
1061   input->seek(pos, librevenge::RVNG_SEEK_SET);
1062   if (input->readULong(2)!=0xdc) {
1063     MWAW_DEBUG_MSG(("BeagleWksText::readSection: the section header seems bad\n"));
1064     ascFile.addPos(pos);
1065     ascFile.addNote("Entries(Section):###");
1066     return false;
1067   }
1068 
1069   sec.m_limitPos[0]=pos+0xdc;
1070   for (int i=1; i < 5; ++i) {
1071     sec.m_limitPos[i]=pos+long(input->readULong(2));
1072     if (sec.m_limitPos[i]>entry.end()) {
1073       MWAW_DEBUG_MSG(("BeagleWksText::readSection: some limits seem too big\n"));
1074       f << "###limit-" << i << "=" << std::hex << sec.m_limitPos[i-1] << std::dec << ",";
1075       sec.m_limitPos[i]=0;
1076     }
1077     if (sec.m_limitPos[i]<=sec.m_limitPos[i-1]) {
1078       MWAW_DEBUG_MSG(("BeagleWksText::readSection: some limits seem incoherent\n"));
1079       f << "###limit-" << i << "=" << std::hex << sec.m_limitPos[i-1] << "x"
1080         << sec.m_limitPos[i]  << std::dec << ",";
1081     }
1082   }
1083   auto nCols=static_cast<int>(input->readULong(1));
1084   if (nCols<0 || nCols>16) {
1085     MWAW_DEBUG_MSG(("BeagleWksText::readSection: the number of columns seems bad\n"));
1086     f << "###nCols=" << nCols << ",";
1087     nCols=1;
1088   }
1089   auto val=long(input->readULong(1)); // 0|1|6|1e
1090   if (val) f << "f0=" << std::hex << val << std::dec << ",";
1091   double colSep=double(input->readLong(4))/65536;
1092   if (colSep<48 || colSep>48)
1093     f << "colSep=" << colSep << ",";
1094   if (nCols>1)
1095     sec.setColumns(nCols, m_mainParser->getPageWidth()/double(nCols), librevenge::RVNG_INCH, colSep/72.);
1096   for (int st=0; st<2; ++st) {
1097     f << ((st==0) ? "header=[" : "footer=[");
1098     sec.m_heights[st]=static_cast<int>(input->readLong(2));
1099     val = input->readLong(2);
1100     if (val) f << "fl=" << val << ",";
1101     val = input->readLong(2); // right/left page ?
1102     if (val!=sec.m_heights[st]) f << "dim2=" << val << ",";
1103     f << "],";
1104   }
1105   sec.m_pageNumber=static_cast<int>(input->readLong(2));
1106   unsigned long flags= input->readULong(4);
1107   sec.m_hasFirstPage = (flags & 0x10000);
1108   if (flags & 0x20000) f << "newPage,";
1109   sec.m_hasHeader = (flags & 0x40000);
1110   sec.m_hasFooter = (flags & 0x80000);
1111   sec.m_usePageNumber = (flags & 0x100000);
1112   if (flags & 0x400000)
1113     sec.m_columnSeparator=MWAWBorder();
1114   flags &= 0xFFA0FFFF;
1115   if (val) f << "flags=" << std::hex << flags << std::dec << ",";
1116   val=input->readLong(2);
1117   if (val!=1) f << "page=" << val << ",";
1118   val=input->readLong(2);
1119   if (val) f << "yPos=" << val << ",";
1120   sec.m_extra=f.str();
1121   f.str("");
1122   f << "Entries(Section):" << sec;
1123   ascFile.addPos(pos);
1124   ascFile.addNote(f.str().c_str());
1125 
1126   pos=input->tell();
1127   ascFile.addPos(pos);
1128   ascFile.addNote("Section-II:");
1129 
1130   input->seek(entry.begin()+81,librevenge::RVNG_SEEK_SET);
1131   if (!readParagraph(sec.m_ruler, pos+0xda, true)) {
1132     sec.m_ruler=MWAWParagraph();
1133     MWAW_DEBUG_MSG(("BeagleWksText::readSection: can not read the section ruler\n"));
1134     ascFile.addPos(pos+81);
1135     ascFile.addNote("Section(Ruler):###");
1136   }
1137 
1138   input->seek(entry.begin()+0xda,librevenge::RVNG_SEEK_SET);
1139   pos=input->tell();
1140   f.str("");
1141   f << "Section-III:";
1142   val=long(input->readULong(2)); // find 3007, 4fef, 7006, fff9 ?
1143   if (val) f << "f0=" << std::hex << val << std::dec << ",";
1144   ascFile.addPos(pos);
1145   ascFile.addNote(f.str().c_str());
1146 
1147   input->seek(entry.end(),librevenge::RVNG_SEEK_SET);
1148   return true;
1149 }
1150 
1151 // vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab:
1152