1 /* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */
2
3 /* libmwaw
4 * Version: MPL 2.0 / LGPLv2+
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 2.0 (the "License"); you may not use this file except in compliance with
8 * the License or as specified alternatively below. You may obtain a copy of
9 * the License at http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Major Contributor(s):
17 * Copyright (C) 2002 William Lachance (wrlach@gmail.com)
18 * Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
19 * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
20 * Copyright (C) 2006, 2007 Andrew Ziem
21 * Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
22 *
23 *
24 * All Rights Reserved.
25 *
26 * For minor contributions see the git repository.
27 *
28 * Alternatively, the contents of this file may be used under the terms of
29 * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
30 * in which case the provisions of the LGPLv2+ are applicable
31 * instead of those above.
32 */
33
34 #include <cstring>
35 #include <iomanip>
36 #include <iostream>
37 #include <limits>
38 #include <map>
39 #include <sstream>
40
41 #include <librevenge/librevenge.h>
42
43 #include "MWAWTextListener.hxx"
44 #include "MWAWDebug.hxx"
45 #include "MWAWFont.hxx"
46 #include "MWAWFontConverter.hxx"
47 #include "MWAWInputStream.hxx"
48 #include "MWAWPageSpan.hxx"
49 #include "MWAWParagraph.hxx"
50 #include "MWAWPosition.hxx"
51 #include "MWAWSection.hxx"
52 #include "MWAWSubDocument.hxx"
53
54 #include "BeagleWksParser.hxx"
55 #include "BeagleWksStructManager.hxx"
56
57 #include "BeagleWksText.hxx"
58
59 /** Internal: the structures of a BeagleWksText */
60 namespace BeagleWksTextInternal
61 {
62 ////////////////////////////////////////
63 //! Internal: a class used to store the font data of a BeagleWksText
64 struct Font {
65 //! constructor
FontBeagleWksTextInternal::Font66 Font()
67 : m_id(0)
68 , m_size(12)
69 , m_flags(0)
70 , m_color(0)
71 , m_extra()
72 {
73 }
74 /** returns a MWAWFont.
75 \note the font id remains filled with the local id */
getFontBeagleWksTextInternal::Font76 MWAWFont getFont() const
77 {
78 MWAWFont res(m_id,float(m_size));
79 uint32_t flags=0;
80 if (m_flags&1) flags |= MWAWFont::boldBit;
81 if (m_flags&2) flags |= MWAWFont::italicBit;
82 if (m_flags&4) res.setUnderlineStyle(MWAWFont::Line::Simple);
83 if (m_flags&8) flags |= MWAWFont::embossBit;
84 if (m_flags&0x10) flags |= MWAWFont::shadowBit;
85 if (m_flags&0x100) res.set(MWAWFont::Script::super());
86 if (m_flags&0x200) res.set(MWAWFont::Script::sub());
87 if (m_flags&0x400) flags |= MWAWFont::uppercaseBit;
88 if (m_flags&0x800) flags |= MWAWFont::lowercaseBit;
89 res.setFlags(flags);
90 switch (m_color) {
91 case 63:
92 res.setColor(MWAWColor::white());
93 break;
94 case 100:
95 res.setColor(MWAWColor(0xFF,0xFF,0));
96 break;
97 case 168:
98 res.setColor(MWAWColor(0xFF,0,0xFF));
99 break;
100 case 236:
101 res.setColor(MWAWColor(0xFF,0,0));
102 break;
103 case 304:
104 res.setColor(MWAWColor(0,0xFF,0xFF));
105 break;
106 case 372:
107 res.setColor(MWAWColor(0,0xFF,0));
108 break;
109 case 440:
110 res.setColor(MWAWColor(0,0,0xFF));
111 break;
112 default:
113 break;
114 }
115 return res;
116 }
117 //! operator<<
operator <<(std::ostream & o,Font const & fnt)118 friend std::ostream &operator<<(std::ostream &o, Font const &fnt)
119 {
120 if (fnt.m_id) o << "id=" << fnt.m_id << ",";
121 if (fnt.m_size!=12) o << "sz=" << fnt.m_size << ",";
122 if (fnt.m_flags&1) o << "b,";
123 if (fnt.m_flags&2) o << "it,";
124 if (fnt.m_flags&4) o << "underline,";
125 if (fnt.m_flags&8) o << "outline,";
126 if (fnt.m_flags&0x10) o << "shadow,";
127 if (fnt.m_flags&0x100) o << "sup,";
128 if (fnt.m_flags&0x200) o << "sub,";
129 if (fnt.m_flags&0x400) o << "uppercase,";
130 if (fnt.m_flags&0x800) o << "lowercase,";
131 if (fnt.m_flags&0xF0E0)
132 o << "fl=" << std::hex << (fnt.m_flags&0xF0E0) << std::dec << ",";
133 switch (fnt.m_color) {
134 case 0: // black
135 break;
136 case 63:
137 o << "white,";
138 break;
139 case 100:
140 o << "yellow,";
141 break;
142 case 168:
143 o << "magenta,";
144 break;
145 case 236:
146 o << "red,";
147 break;
148 case 304:
149 o << "cyan,";
150 break;
151 case 372:
152 o << "green,";
153 break;
154 case 440:
155 o << "blue,";
156 break;
157 default:
158 o << "#color=" << fnt.m_color << ",";
159 break;
160 }
161 o << fnt.m_extra;
162 return o;
163 }
164
165 //! the font id
166 int m_id;
167 //! the font size
168 int m_size;
169 //! the font flags
170 int m_flags;
171 //! the font color
172 int m_color;
173 //! extra data
174 std::string m_extra;
175 };
176 ////////////////////////////////////////
177 //! Internal: a class used to store the section data of a BeagleWksText
178 struct Section final : public MWAWSection {
179 //! constructor
SectionBeagleWksTextInternal::Section180 Section()
181 : MWAWSection()
182 , m_ruler()
183 , m_hasFirstPage(false)
184 , m_hasHeader(false)
185 , m_hasFooter(false)
186 , m_pageNumber(1)
187 , m_usePageNumber(false)
188 , m_extra("")
189 {
190 for (auto &i : m_limitPos) i=0;
191 for (auto &i : m_parsed) i=false;
192 m_heights[0]=m_heights[1]=0;
193 m_balanceText=true;
194 }
195 Section(Section const &)=default;
196 Section &operator=(Section const &)=default;
197 Section &operator=(Section &&)=default;
198 //! destructor
199 ~Section() final;
200 //! return the i^th entry
getEntryBeagleWksTextInternal::Section201 MWAWEntry getEntry(int i) const
202 {
203 MWAWEntry res;
204 if (i<0||i>=4) {
205 MWAW_DEBUG_MSG(("BeagleWksTextInternal::getEntry: called with bad id=%d\n",i));
206 return res;
207 }
208 if (m_limitPos[i]<=0)
209 return res;
210 res.setBegin(m_limitPos[i]);
211 res.setEnd(m_limitPos[i+1]-2);
212 return res;
213 }
214 //! return the header entry
getHeaderEntryBeagleWksTextInternal::Section215 MWAWEntry getHeaderEntry(bool fPage) const
216 {
217 return getEntry(fPage?0:2);
218 }
219 //! return true if we have a header
getFooterEntryBeagleWksTextInternal::Section220 MWAWEntry getFooterEntry(bool fPage) const
221 {
222 return getEntry(fPage?1:3);
223 }
224 //! operator<<
operator <<(std::ostream & o,Section const & sec)225 friend std::ostream &operator<<(std::ostream &o, Section const &sec)
226 {
227 o << static_cast<MWAWSection const &>(sec);
228 for (int i=0; i<4; ++i) {
229 if (sec.m_limitPos[i+1]<=sec.m_limitPos[i]+2)
230 continue;
231 static char const *wh[] = {"header[fP]", "footer[fP]", "header", "footer"};
232 o << wh[i] << "=" << std::hex << sec.m_limitPos[i]
233 << "->" << sec.m_limitPos[i+1] << std::hex << ",";
234 }
235 if (sec.m_hasFirstPage) o << "firstPage[special],";
236 if (!sec.m_hasHeader) o << "hide[header],";
237 else if (sec.m_heights[0]) o << "h[header]=" << sec.m_heights[0] << ",";
238 if (!sec.m_hasFooter) o << "hide[footer],";
239 else if (sec.m_heights[1]) o << "h[footer]=" << sec.m_heights[1] << ",";
240 if (sec.m_pageNumber != 1) o << "pagenumber=" << sec.m_pageNumber << ",";
241 if (sec.m_usePageNumber) o << "pagenumber[use],";
242 o << sec.m_extra;
243 return o;
244 }
245 //! the default section ruler
246 MWAWParagraph m_ruler;
247 //! a flag to know if the first page is special
248 bool m_hasFirstPage;
249 //! a flag to know if we need to print the header
250 bool m_hasHeader;
251 //! a flag to know if we need to print the footer
252 bool m_hasFooter;
253 //! the data limits ( first page header, first page footer, header, footer, end)
254 long m_limitPos[5];
255 //! true if the data are send to the listener
256 mutable bool m_parsed[4];
257 //! the header/footer height
258 int m_heights[2];
259 //! the page number
260 int m_pageNumber;
261 //! true if we need to use the page number
262 bool m_usePageNumber;
263 //! extra data
264 std::string m_extra;
265 };
266
~Section()267 Section::~Section()
268 {
269 }
270
271 ////////////////////////////////////////
272 //! Internal: the state of a BeagleWksText
273 struct State {
274 //! constructor
StateBeagleWksTextInternal::State275 State()
276 : m_textEntry()
277 , m_sectionList()
278 , m_numPagesBySectionList()
279 , m_version(-1)
280 , m_numPages(-1)
281 , m_actualPage(1)
282 {
283 }
284 //! the main text entry
285 MWAWEntry m_textEntry;
286 //! the section list
287 std::vector<Section> m_sectionList;
288 //! the number of page by section
289 std::vector<int> m_numPagesBySectionList;
290 //! the file version
291 mutable int m_version;
292 int m_numPages /* the number of pages */, m_actualPage /* the actual page */;
293 };
294
295 ////////////////////////////////////////
296 //! Internal: the subdocument of a BeagleWksText
297 class SubDocument final : public MWAWSubDocument
298 {
299 public:
SubDocument(BeagleWksText & pars,MWAWInputStreamPtr & input,int hFId,int sId)300 SubDocument(BeagleWksText &pars, MWAWInputStreamPtr &input, int hFId, int sId)
301 : MWAWSubDocument(pars.m_mainParser, input, MWAWEntry())
302 , m_textParser(&pars)
303 , m_hfId(hFId)
304 , m_sectId(sId)
305 {
306 }
307
308 //! destructor
~SubDocument()309 ~SubDocument() final {}
310
311 //! operator!=
312 bool operator!=(MWAWSubDocument const &doc) const final;
313
314 //! the parser function
315 void parse(MWAWListenerPtr &listener, libmwaw::SubDocumentType type) final;
316
317 protected:
318 /** the text parser */
319 BeagleWksText *m_textParser;
320 //! the header/footer id
321 int m_hfId;
322 //! the section id
323 int m_sectId;
324 private:
325 SubDocument(SubDocument const &orig) = delete;
326 SubDocument &operator=(SubDocument const &orig) = delete;
327 };
328
operator !=(MWAWSubDocument const & doc) const329 bool SubDocument::operator!=(MWAWSubDocument const &doc) const
330 {
331 if (MWAWSubDocument::operator!=(doc)) return true;
332 auto const *sDoc = dynamic_cast<SubDocument const *>(&doc);
333 if (!sDoc) return true;
334 if (m_textParser != sDoc->m_textParser) return true;
335 if (m_hfId != sDoc->m_hfId) return true;
336 if (m_sectId != sDoc->m_sectId) return true;
337 return false;
338 }
339
parse(MWAWListenerPtr & listener,libmwaw::SubDocumentType)340 void SubDocument::parse(MWAWListenerPtr &listener, libmwaw::SubDocumentType /*type*/)
341 {
342 if (!listener.get()) {
343 MWAW_DEBUG_MSG(("BeagleWksTextInternal::SubDocument::parse: no listener\n"));
344 return;
345 }
346 if (!m_textParser) {
347 MWAW_DEBUG_MSG(("BeagleWksTextInternal::SubDocument::parse: no text parser\n"));
348 return;
349 }
350
351 long pos = m_input->tell();
352 m_textParser->sendHF(m_hfId, m_sectId);
353 m_input->seek(pos, librevenge::RVNG_SEEK_SET);
354 }
355 }
356
357 ////////////////////////////////////////////////////////////
358 // constructor/destructor, ...
359 ////////////////////////////////////////////////////////////
BeagleWksText(BeagleWksParser & parser)360 BeagleWksText::BeagleWksText(BeagleWksParser &parser)
361 : m_parserState(parser.getParserState())
362 , m_state(new BeagleWksTextInternal::State)
363 , m_structureManager(parser.m_structureManager)
364 , m_mainParser(&parser)
365 {
366 }
367
~BeagleWksText()368 BeagleWksText::~BeagleWksText()
369 {
370 }
371
version() const372 int BeagleWksText::version() const
373 {
374 if (m_state->m_version < 0)
375 m_state->m_version = m_parserState->m_version;
376 return m_state->m_version;
377 }
378
numPages() const379 int BeagleWksText::numPages() const
380 {
381 if (m_state->m_numPages <= 0)
382 const_cast<BeagleWksText *>(this)->countPages();
383 return m_state->m_numPages;
384 }
385
getFont(BeagleWksTextInternal::Font const & ft) const386 MWAWFont BeagleWksText::getFont(BeagleWksTextInternal::Font const &ft) const
387 {
388 MWAWFont font=ft.getFont();
389 font.setId(m_structureManager->getFontId(font.id()));
390 return font;
391 }
392
getHeader(int page,int & numSimilar)393 std::shared_ptr<MWAWSubDocument> BeagleWksText::getHeader(int page, int &numSimilar)
394 {
395 numSimilar=1;
396 std::shared_ptr<MWAWSubDocument> res;
397 int actPage=0, newSectionPage=0;
398 size_t s=0;
399 for (; s < m_state->m_numPagesBySectionList.size(); s++) {
400 newSectionPage+=m_state->m_numPagesBySectionList[s];
401 if (newSectionPage>page)
402 break;
403 actPage=newSectionPage;
404 }
405 if (s >= m_state->m_sectionList.size()) {
406 if (m_state->m_numPages>page)
407 numSimilar=m_state->m_numPages-page+1;
408 return res;
409 }
410 auto const &sec=m_state->m_sectionList[s];
411 bool useFPage=page==actPage && sec.m_hasFirstPage;
412 if (!useFPage)
413 numSimilar=newSectionPage-page;
414 if (sec.getHeaderEntry(useFPage).valid())
415 res.reset(new BeagleWksTextInternal::SubDocument
416 (*this, m_parserState->m_input, useFPage?0:2, int(s)));
417 return res;
418 }
419
getFooter(int page,int & numSimilar)420 std::shared_ptr<MWAWSubDocument> BeagleWksText::getFooter(int page, int &numSimilar)
421 {
422 numSimilar=1;
423 std::shared_ptr<MWAWSubDocument> res;
424 int actPage=0, newSectionPage=0;
425 size_t s=0;
426 for (; s < m_state->m_numPagesBySectionList.size(); s++) {
427 newSectionPage+=m_state->m_numPagesBySectionList[s];
428 if (newSectionPage>page)
429 break;
430 actPage=newSectionPage;
431 }
432 if (s >= m_state->m_sectionList.size()) {
433 if (m_state->m_numPages>page)
434 numSimilar=m_state->m_numPages-page+1;
435 return res;
436 }
437 auto const &sec=m_state->m_sectionList[s];
438 bool useFPage=page==actPage && sec.m_hasFirstPage;
439 if (!useFPage)
440 numSimilar=newSectionPage-page;
441 if (sec.getFooterEntry(useFPage).valid())
442 res.reset(new BeagleWksTextInternal::SubDocument
443 (*this, m_parserState->m_input, useFPage?1:3, int(s)));
444 return res;
445 }
446
447 ////////////////////////////////////////////////////////////
448 // Intermediate level
449 ////////////////////////////////////////////////////////////
450
451 //
452 // find the different zones
453 //
createZones(MWAWEntry & entry)454 bool BeagleWksText::createZones(MWAWEntry &entry)
455 {
456 if (!entry.valid() || entry.length()<22) {
457 MWAW_DEBUG_MSG(("BeagleWksText::createZones: the entry seems bad\n"));
458 return false;
459 }
460
461 MWAWInputStreamPtr &input= m_parserState->m_input;
462 long pos=entry.begin();
463 input->seek(pos+4, librevenge::RVNG_SEEK_SET);
464 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
465 libmwaw::DebugStream f;
466 f << "Entries(THeader):";
467 long val=input->readLong(4); // always 0x238
468 if (val!=0x238)
469 f << "f0=" << val << ",";
470 val=input->readLong(2);
471 if (val!=1)
472 f << "f1=" << val << ",";
473 val=long(input->readULong(4));
474 if (val >= entry.length())
475 val = 0;
476 auto nSections=int(entry.length()-val);
477 if (val<22|| nSections<6 || (nSections%6)) {
478 f << "###";
479 MWAW_DEBUG_MSG(("BeagleWksText::createZones: the data size seems bad\n"));
480 return false;
481 }
482 long endPos = pos+val;
483 nSections/=6;
484 for (int i=0; i<2; i++) { // f2=0, f3=6
485 val=input->readLong(2);
486 if (val)
487 f << "f" << i+2 << "=" << val << ",";
488 }
489 val=input->readLong(2);
490 f << "nSect=" << val << ",";
491 if (val!=nSections) {
492 f << "###";
493 MWAW_DEBUG_MSG(("BeagleWksText::createZones: the number of sections/pages seems bad\n"));
494 }
495 // checkme: after junk ?
496 ascFile.addDelimiter(input->tell(),'|');
497 ascFile.addPos(pos);
498 ascFile.addNote(f.str().c_str());
499
500 input->seek(endPos, librevenge::RVNG_SEEK_SET);
501 std::vector<MWAWEntry> listEntries;
502 listEntries.reserve(size_t(nSections));
503 f.str("");
504 f << "Entries(Text):";
505 for (int i=0; i <nSections; ++i) {
506 pos=input->tell();
507 MWAWEntry pEntry;
508 pEntry.setBegin(entry.begin()+long(input->readULong(4)));
509 pEntry.setLength(long(input->readULong(2)));
510 f << std::hex << pEntry.begin() << "<->" << pEntry.end() << std::dec << ",";
511 if (!pEntry.valid() || pEntry.begin()+16 < entry.begin()
512 || pEntry.end()>endPos) {
513 pEntry=MWAWEntry();
514 f << "###";
515 MWAW_DEBUG_MSG(("BeagleWksText::createZones: the page entry %d seems bad\n", i));
516 }
517 listEntries.push_back(pEntry);
518 input->seek(pos+6, librevenge::RVNG_SEEK_SET);
519 }
520 ascFile.addPos(endPos);
521 ascFile.addNote(f.str().c_str());
522
523 size_t p=0;
524 m_state->m_textEntry.setBegin(listEntries[0].begin());
525
526 for (p=0; p+1 < listEntries.size(); ++p) {
527 if (!listEntries[p].valid())
528 continue;
529 if (p) {
530 // use the section signature to diffentiate text/section (changeme)
531 input->seek(listEntries[p].begin(), librevenge::RVNG_SEEK_SET);
532 if (input->readLong(2)==0xdc)
533 break;
534 }
535 m_state->m_textEntry.setEnd(listEntries[p].end());
536 }
537 for (; p < listEntries.size(); ++p) {
538 BeagleWksTextInternal::Section sec;
539 if (listEntries[p].valid() && !readSection(listEntries[p], sec))
540 sec = BeagleWksTextInternal::Section();
541 m_state->m_sectionList.push_back(sec);
542 }
543 input->seek(entry.end(), librevenge::RVNG_SEEK_SET);
544 return m_state->m_textEntry.valid();
545 }
546
countPages()547 void BeagleWksText::countPages()
548 {
549 if (!m_state->m_textEntry.valid()) {
550 MWAW_DEBUG_MSG(("BeagleWksText::countPages: can not find the main entry\n"));
551 return;
552 }
553 MWAWInputStreamPtr &input= m_parserState->m_input;
554 long pos=m_state->m_textEntry.begin(), endPos=m_state->m_textEntry.end();
555 input->seek(pos, librevenge::RVNG_SEEK_SET);
556 int nSectPages=0, nPages=1;
557 while (!input->isEnd()) {
558 pos=input->tell();
559 if (pos>=endPos) break;
560 auto c = static_cast<unsigned char>(input->readULong(1));
561 if (c) continue;
562 c=static_cast<unsigned char>(input->readULong(1));
563 bool done=false;
564 input->seek(pos, librevenge::RVNG_SEEK_SET);
565 switch (c) {
566 case 0: {
567 BeagleWksTextInternal::Font font;
568 done=readFont(font,endPos);
569 break;
570 }
571 case 1: {
572 MWAWParagraph para;
573 done=readParagraph(para,endPos);
574 break;
575 }
576 case 2:
577 if (pos+6 > endPos)
578 break;
579 input->seek(4, librevenge::RVNG_SEEK_CUR);
580 done = input->readLong(2)==0x200;
581 break;
582 case 3: { // type 3:page 4:section
583 if (pos+6 > endPos)
584 break;
585 input->seek(2, librevenge::RVNG_SEEK_CUR);
586 auto type=static_cast<int>(input->readLong(2));
587 if (input->readLong(2)!=0x300)
588 break;
589 if (type==3) {
590 nSectPages++;
591 nPages++;
592 }
593 else if (type==4) {
594 m_state->m_numPagesBySectionList.push_back(nSectPages);
595 nSectPages=0;
596 }
597 done=true;
598 break;
599 }
600 case 4: // picture
601 if (pos+8 > endPos)
602 break;
603 input->seek(6, librevenge::RVNG_SEEK_CUR);
604 done = input->readLong(2)==0x400;
605 break;
606 case 5: // a field
607 if (pos+36 > endPos)
608 break;
609 input->seek(34, librevenge::RVNG_SEEK_CUR);
610 done=input->readLong(2)==0x500;
611 break;
612 default:
613 break;
614 }
615 if (!done)
616 break;
617 }
618 }
619
620 //
621 // send the text
622 //
sendMainText()623 bool BeagleWksText::sendMainText()
624 {
625 return sendText(m_state->m_textEntry);
626 }
627
sendHF(int hfId,int sectId)628 bool BeagleWksText::sendHF(int hfId, int sectId)
629 {
630 if (hfId<0||hfId>=4) {
631 MWAW_DEBUG_MSG(("BeagleWksText::sendHF: hfId=%d is bad\n", hfId));
632 return false;
633 }
634 if (sectId<0||sectId>=static_cast<int>(m_state->m_sectionList.size())) {
635 MWAW_DEBUG_MSG(("BeagleWksText::sendHF: can not find section %d\n", sectId));
636 return false;
637 }
638
639 MWAWInputStreamPtr &input= m_parserState->m_input;
640 long pos=input->tell();
641 auto const &sec=m_state->m_sectionList[size_t(sectId)];
642 sec.m_parsed[hfId]=true;
643 bool ok=sendText(sec.getEntry(hfId));
644 input->seek(pos,librevenge::RVNG_SEEK_SET);
645 return ok;
646 }
647
flushExtra()648 void BeagleWksText::flushExtra()
649 {
650 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
651 for (auto const &sec : m_state->m_sectionList) {
652 for (int j=0; j < 4; ++j) {
653 if (sec.m_parsed[j])
654 continue;
655 MWAWEntry hfEntry=sec.getEntry(j);
656 if (!hfEntry.valid()) {
657 if (hfEntry.begin()>0) {
658 ascFile.addPos(hfEntry.begin());
659 ascFile.addNote("_");
660 }
661 continue;
662 }
663 sendText(hfEntry);
664 }
665 }
666 }
667
sendText(MWAWEntry const & entry)668 bool BeagleWksText::sendText(MWAWEntry const &entry)
669 {
670 MWAWTextListenerPtr listener=m_parserState->m_textListener;
671 if (!listener) {
672 MWAW_DEBUG_MSG(("BeagleWksText::sendText: can not find the listener\n"));
673 return false;
674 }
675 if (!entry.valid()) {
676 MWAW_DEBUG_MSG(("BeagleWksText::sendText: can not find the entry\n"));
677 return false;
678 }
679
680 MWAWInputStreamPtr &input= m_parserState->m_input;
681 long pos=entry.begin(), debPos=pos, endPos=entry.end();
682 bool isMain=entry.begin()==m_state->m_textEntry.begin();
683 size_t actSection=0, numSection=isMain ? m_state->m_sectionList.size() : 0;
684 if (actSection<numSection) {
685 if (listener->isSectionOpened())
686 listener->closeSection();
687 listener->openSection(m_state->m_sectionList[actSection++]);
688 }
689
690 input->seek(pos, librevenge::RVNG_SEEK_SET);
691 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
692 libmwaw::DebugStream f;
693 f << "Text:";
694 BeagleWksTextInternal::Font font;
695 listener->setFont(getFont(font));
696 int actPage = 1, sectPage=1;
697 while (!input->isEnd()) {
698 pos=input->tell();
699 bool last=pos==endPos;
700 unsigned char c = last ? static_cast<unsigned char>(0) :
701 static_cast<unsigned char>(input->readULong(1));
702 if ((c==0 || c==0xd) && pos!=debPos) {
703 ascFile.addPos(debPos);
704 ascFile.addNote(f.str().c_str());
705 debPos=(c==0xd) ? pos+1:pos;
706 f.str("");
707 f << "Text:";
708 }
709 if (last) break;
710 if (c) {
711 f << c;
712 switch (c) {
713 case 0x1: // end zone marker, probably save to ignore
714 break;
715 case 0x9:
716 listener->insertTab();
717 break;
718 case 0xd:
719 listener->insertEOL();
720 break;
721 default:
722 listener->insertCharacter(static_cast<unsigned char>(c));
723 break;
724 }
725 continue;
726 }
727 c=static_cast<unsigned char>(input->readULong(1));
728 bool done=false;
729 input->seek(pos, librevenge::RVNG_SEEK_SET);
730 switch (c) {
731 case 0:
732 if (!readFont(font,endPos))
733 break;
734 done=true;
735 listener->setFont(getFont(font));
736 break;
737 case 1: {
738 MWAWParagraph para;
739 if (!readParagraph(para,endPos))
740 break;
741 done=true;
742 listener->setParagraph(para);
743 break;
744 }
745 case 2: {
746 if (pos+6 > endPos)
747 break;
748 input->seek(2, librevenge::RVNG_SEEK_CUR);
749 auto type=static_cast<int>(input->readLong(2));
750 if (input->readLong(2)!=0x200)
751 break;
752 f.str("");
753 f << "Entries(Field):";
754 switch (type) {
755 case 0:
756 case 1: {
757 std::stringstream s;
758 if (type==0) {
759 f << "pagenumber[section]";
760 s << sectPage;
761 }
762 else {
763 f << "section";
764 s << actSection;
765 }
766 listener->insertUnicodeString(librevenge::RVNGString(s.str().c_str()));
767 break;
768 }
769 case 2:
770 listener->insertField(MWAWField(MWAWField::PageNumber));
771 f << "pagenumber";
772 break;
773 case 3:
774 listener->insertField(MWAWField(MWAWField::Date));
775 f << "date";
776 break;
777 case 4: {
778 MWAWField field(MWAWField::Time);
779 field.m_DTFormat="%H:%M";
780 listener->insertField(field);
781 f << "time";
782 break;
783 }
784 default:
785 MWAW_DEBUG_MSG(("BeagleWksText::sendText: find unknown field type=%d\n", type));
786 f << "#type=" << type << ",";
787 break;
788 }
789 ascFile.addPos(pos);
790 ascFile.addNote(f.str().c_str());
791 done=true;
792 break;
793 }
794 case 3: {
795 if (pos+6 > endPos)
796 break;
797 input->seek(2, librevenge::RVNG_SEEK_CUR);
798 auto type=static_cast<int>(input->readLong(2));
799 if (input->readLong(2)!=0x300)
800 break;
801 f.str("");
802 f << "Entries(Break):";
803 switch (type) {
804 case 3:
805 f << "pagebreak";
806 sectPage++;
807 if (!isMain) break;
808 m_mainParser->newPage(++actPage);
809 break;
810 case 4:
811 f << "sectionbreak";
812 sectPage=1;
813 if (!isMain) break;
814 if (actSection<numSection) {
815 if (listener->isSectionOpened())
816 listener->closeSection();
817 listener->openSection(m_state->m_sectionList[actSection++]);
818 }
819 else {
820 MWAW_DEBUG_MSG(("BeagleWksText::sendText: can not find the new section\n"));
821 }
822 break;
823 default:
824 MWAW_DEBUG_MSG(("BeagleWksText::sendText: find unknown break type=%d\n", type));
825 f << "#type=" << type << ",";
826 break;
827 }
828 ascFile.addPos(pos);
829 ascFile.addNote(f.str().c_str());
830 done=true;
831 break;
832 }
833 case 4: { // picture
834 if (pos+8 > endPos)
835 break;
836 input->seek(2, librevenge::RVNG_SEEK_CUR);
837 auto val=static_cast<int>(input->readLong(2));
838 auto id=static_cast<int>(input->readULong(2));
839 if (input->readLong(2)!=0x400)
840 break;
841 f.str("");
842 f << "Entries(Picture):id?=" << id << ",";
843 if (val) f << "f0=" << val << ",";
844 m_mainParser->sendFrame(id);
845 ascFile.addPos(pos);
846 ascFile.addNote(f.str().c_str());
847 done=true;
848 break;
849 }
850 case 5: { // a field
851 if (pos+36 > endPos)
852 break;
853 input->seek(2, librevenge::RVNG_SEEK_CUR);
854 f.str("");
855 f << "Entries(Database):";
856 auto fl=static_cast<int>(input->readULong(1)); // find 40
857 if (fl) f << "fl=" << std::hex << fl << std::dec << ",";
858 auto fSz=static_cast<int>(input->readULong(1));
859 if (fSz>30) {
860 MWAW_DEBUG_MSG(("BeagleWksText::sendText: field name size seems bad\n"));
861 fSz=0;
862 f << "###";
863 }
864 std::string name("");
865 listener->insertUnicode(0xab);
866 for (int i=0; i < fSz; ++i) {
867 auto ch=static_cast<unsigned char>(input->readULong(1));
868 listener->insertCharacter(ch);
869 name+=char(ch);
870 }
871 listener->insertUnicode(0xbb);
872 f << name;
873 input->seek(pos+34, librevenge::RVNG_SEEK_SET);
874 if (input->readLong(2)!=0x500)
875 break;
876 ascFile.addPos(pos);
877 ascFile.addNote(f.str().c_str());
878 done=true;
879 break;
880
881 }
882 default:
883 break;
884 }
885 if (done) {
886 debPos=input->tell();
887 f.str("");
888 f << "Text:";
889 continue;
890 }
891 input->seek(pos, librevenge::RVNG_SEEK_SET);
892 break;
893 }
894 if (input->tell()!=endPos) {
895 ascFile.addPos(input->tell());
896 ascFile.addNote("Text:###");
897
898 MWAW_DEBUG_MSG(("BeagleWksText::sendText: find extra data\n"));
899 input->seek(endPos, librevenge::RVNG_SEEK_SET);
900 }
901 ascFile.addPos(endPos);
902 ascFile.addNote("_");
903 return true;
904 }
905
906 //////////////////////////////////////////////
907 // Fonts
908 //////////////////////////////////////////////
readFont(BeagleWksTextInternal::Font & font,long endPos)909 bool BeagleWksText::readFont(BeagleWksTextInternal::Font &font, long endPos)
910 {
911 MWAWInputStreamPtr &input= m_parserState->m_input;
912 long pos=input->tell();
913 if (pos+12 > endPos || input->readLong(2)) {
914 input->seek(pos, librevenge::RVNG_SEEK_SET);
915 return false;
916 }
917
918 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
919 libmwaw::DebugStream f;
920 font.m_size ^= static_cast<int>(input->readLong(2));
921 font.m_flags ^= static_cast<int>(input->readULong(2));
922 font.m_color ^= static_cast<int>(input->readLong(2));
923 auto val=static_cast<int>(input->readULong(1));
924 if (val) // find b1 and 20
925 f << "#f0=" << std::hex << val << std::dec << ",";
926 font.m_id ^= static_cast<int>(input->readULong(1));
927 font.m_extra=f.str();
928 f.str("");
929 f << "Entries(FontDef):" << font;
930 ascFile.addPos(pos);
931 ascFile.addNote(f.str().c_str());
932 // now the reverse header
933 if (input->readLong(2)) {
934 input->seek(pos, librevenge::RVNG_SEEK_SET);
935 return false;
936 }
937 input->seek(pos+12, librevenge::RVNG_SEEK_SET);
938 return true;
939 }
940
941 //////////////////////////////////////////////
942 // Paragraph
943 //////////////////////////////////////////////
readParagraph(MWAWParagraph & para,long endPos,bool inSection)944 bool BeagleWksText::readParagraph(MWAWParagraph ¶, long endPos, bool inSection)
945 {
946 para=MWAWParagraph();
947 MWAWInputStreamPtr &input= m_parserState->m_input;
948 long pos=input->tell();
949 if (pos+23 > endPos) return false;
950
951 int fSz=0;
952 if (!inSection) {
953 bool ok= input->readLong(2)==1;
954 fSz=ok ? static_cast<int>(input->readULong(1)) : 0;
955 if (!ok || fSz < 19 || pos+4+fSz > endPos) {
956 input->seek(pos, librevenge::RVNG_SEEK_SET);
957 return false;
958 }
959 }
960
961 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
962 libmwaw::DebugStream f;
963 para.setInterline(1.+double(input->readULong(1))/10., librevenge::RVNG_PERCENT);
964 // para spacing, before/after
965 para.m_spacings[1] = para.m_spacings[2] =
966 (double(input->readULong(1))/10.)*6./72.;
967 auto fl=static_cast<int>(input->readULong(1));
968 switch (fl&0xf) {
969 case 1: // left
970 break;
971 case 2:
972 para.m_justify=MWAWParagraph::JustificationRight;
973 break;
974 case 4:
975 para.m_justify=MWAWParagraph::JustificationCenter;
976 break;
977 case 8:
978 para.m_justify=MWAWParagraph::JustificationFull;
979 break;
980 default:
981 f << "#align=" << (fl&0xf) << ",";
982 break;
983 }
984 fl &=0xFFF0; // find 60 or 70
985 if (fl) f << "flags=" << std::hex << fl << std::dec << ",";
986 para.m_marginsUnit = librevenge::RVNG_POINT;
987 for (int i=0; i<3; ++i) // left, right, indent
988 para.m_margins[i==2 ? 0 : i+1]=double(input->readLong(4))/65536.;
989 auto nTabs=static_cast<int>(input->readLong(2));
990 if ((inSection && (nTabs < 0 || nTabs>20)) ||
991 (!inSection && 19+nTabs*6!=fSz)) {
992 MWAW_DEBUG_MSG(("BeagleWksText::readParagraph: the number of tabs seems bad\n"));
993 f << "###numTabs=" << nTabs << ",";
994 nTabs=0;
995 }
996 for (int i=0; i<nTabs; ++i) {
997 MWAWTabStop tab;
998 tab.m_position=double(input->readLong(4))/65536./72;
999 auto val=static_cast<int>(input->readLong(1));
1000 switch (val) {
1001 case 1: // left
1002 break;
1003 case 2:
1004 tab.m_alignment=MWAWTabStop::RIGHT;
1005 break;
1006 case 3:
1007 tab.m_alignment=MWAWTabStop::CENTER;
1008 break;
1009 case 4:
1010 tab.m_alignment=MWAWTabStop::DECIMAL;
1011 break;
1012 case 5:
1013 tab.m_alignment=MWAWTabStop::BAR;
1014 break;
1015 default:
1016 MWAW_DEBUG_MSG(("BeagleWksText::readParagraph: find unknown tab align=%d\n", val));
1017 f << "tabs" << i << "[#align=" << tab.m_alignment << "],";
1018 break;
1019 }
1020 auto leader=static_cast<unsigned char>(input->readULong(1));
1021 if (leader) {
1022 int unicode= m_parserState->m_fontConverter->unicode(3, leader);
1023 if (unicode==-1)
1024 tab.m_leaderCharacter =static_cast<unsigned short>(leader);
1025 else
1026 tab.m_leaderCharacter =static_cast<unsigned short>(unicode);
1027 }
1028 para.m_tabs->push_back(tab);
1029 }
1030 para.m_extra=f.str();
1031 f.str("");
1032 f << "Entries(Ruler):" << para;
1033 ascFile.addPos(pos);
1034 ascFile.addNote(f.str().c_str());
1035 if (inSection)
1036 return true;
1037 // now the reverse header
1038 if (static_cast<int>(input->readULong(1))!=fSz || input->readLong(2)!=0x100) {
1039 input->seek(pos, librevenge::RVNG_SEEK_SET);
1040 return false;
1041 }
1042 input->seek(pos+4+fSz, librevenge::RVNG_SEEK_SET);
1043 return true;
1044 }
1045
1046 //////////////////////////////////////////////
1047 // Section
1048 //////////////////////////////////////////////
readSection(MWAWEntry const & entry,BeagleWksTextInternal::Section & sec)1049 bool BeagleWksText::readSection(MWAWEntry const &entry, BeagleWksTextInternal::Section &sec)
1050 {
1051 sec=BeagleWksTextInternal::Section();
1052 if (entry.length()<0xdc) {
1053 MWAW_DEBUG_MSG(("BeagleWksText::readSection: the entry seems bad\n"));
1054 return false;
1055 }
1056 MWAWInputStreamPtr &input= m_parserState->m_input;
1057 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
1058 libmwaw::DebugStream f;
1059
1060 long pos=entry.begin();
1061 input->seek(pos, librevenge::RVNG_SEEK_SET);
1062 if (input->readULong(2)!=0xdc) {
1063 MWAW_DEBUG_MSG(("BeagleWksText::readSection: the section header seems bad\n"));
1064 ascFile.addPos(pos);
1065 ascFile.addNote("Entries(Section):###");
1066 return false;
1067 }
1068
1069 sec.m_limitPos[0]=pos+0xdc;
1070 for (int i=1; i < 5; ++i) {
1071 sec.m_limitPos[i]=pos+long(input->readULong(2));
1072 if (sec.m_limitPos[i]>entry.end()) {
1073 MWAW_DEBUG_MSG(("BeagleWksText::readSection: some limits seem too big\n"));
1074 f << "###limit-" << i << "=" << std::hex << sec.m_limitPos[i-1] << std::dec << ",";
1075 sec.m_limitPos[i]=0;
1076 }
1077 if (sec.m_limitPos[i]<=sec.m_limitPos[i-1]) {
1078 MWAW_DEBUG_MSG(("BeagleWksText::readSection: some limits seem incoherent\n"));
1079 f << "###limit-" << i << "=" << std::hex << sec.m_limitPos[i-1] << "x"
1080 << sec.m_limitPos[i] << std::dec << ",";
1081 }
1082 }
1083 auto nCols=static_cast<int>(input->readULong(1));
1084 if (nCols<0 || nCols>16) {
1085 MWAW_DEBUG_MSG(("BeagleWksText::readSection: the number of columns seems bad\n"));
1086 f << "###nCols=" << nCols << ",";
1087 nCols=1;
1088 }
1089 auto val=long(input->readULong(1)); // 0|1|6|1e
1090 if (val) f << "f0=" << std::hex << val << std::dec << ",";
1091 double colSep=double(input->readLong(4))/65536;
1092 if (colSep<48 || colSep>48)
1093 f << "colSep=" << colSep << ",";
1094 if (nCols>1)
1095 sec.setColumns(nCols, m_mainParser->getPageWidth()/double(nCols), librevenge::RVNG_INCH, colSep/72.);
1096 for (int st=0; st<2; ++st) {
1097 f << ((st==0) ? "header=[" : "footer=[");
1098 sec.m_heights[st]=static_cast<int>(input->readLong(2));
1099 val = input->readLong(2);
1100 if (val) f << "fl=" << val << ",";
1101 val = input->readLong(2); // right/left page ?
1102 if (val!=sec.m_heights[st]) f << "dim2=" << val << ",";
1103 f << "],";
1104 }
1105 sec.m_pageNumber=static_cast<int>(input->readLong(2));
1106 unsigned long flags= input->readULong(4);
1107 sec.m_hasFirstPage = (flags & 0x10000);
1108 if (flags & 0x20000) f << "newPage,";
1109 sec.m_hasHeader = (flags & 0x40000);
1110 sec.m_hasFooter = (flags & 0x80000);
1111 sec.m_usePageNumber = (flags & 0x100000);
1112 if (flags & 0x400000)
1113 sec.m_columnSeparator=MWAWBorder();
1114 flags &= 0xFFA0FFFF;
1115 if (val) f << "flags=" << std::hex << flags << std::dec << ",";
1116 val=input->readLong(2);
1117 if (val!=1) f << "page=" << val << ",";
1118 val=input->readLong(2);
1119 if (val) f << "yPos=" << val << ",";
1120 sec.m_extra=f.str();
1121 f.str("");
1122 f << "Entries(Section):" << sec;
1123 ascFile.addPos(pos);
1124 ascFile.addNote(f.str().c_str());
1125
1126 pos=input->tell();
1127 ascFile.addPos(pos);
1128 ascFile.addNote("Section-II:");
1129
1130 input->seek(entry.begin()+81,librevenge::RVNG_SEEK_SET);
1131 if (!readParagraph(sec.m_ruler, pos+0xda, true)) {
1132 sec.m_ruler=MWAWParagraph();
1133 MWAW_DEBUG_MSG(("BeagleWksText::readSection: can not read the section ruler\n"));
1134 ascFile.addPos(pos+81);
1135 ascFile.addNote("Section(Ruler):###");
1136 }
1137
1138 input->seek(entry.begin()+0xda,librevenge::RVNG_SEEK_SET);
1139 pos=input->tell();
1140 f.str("");
1141 f << "Section-III:";
1142 val=long(input->readULong(2)); // find 3007, 4fef, 7006, fff9 ?
1143 if (val) f << "f0=" << std::hex << val << std::dec << ",";
1144 ascFile.addPos(pos);
1145 ascFile.addNote(f.str().c_str());
1146
1147 input->seek(entry.end(),librevenge::RVNG_SEEK_SET);
1148 return true;
1149 }
1150
1151 // vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab:
1152