1 /* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */
2 
3 /* libmwaw
4 * Version: MPL 2.0 / LGPLv2+
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 2.0 (the "License"); you may not use this file except in compliance with
8 * the License or as specified alternatively below. You may obtain a copy of
9 * the License at http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Major Contributor(s):
17 * Copyright (C) 2002 William Lachance (wrlach@gmail.com)
18 * Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
19 * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
20 * Copyright (C) 2006, 2007 Andrew Ziem
21 * Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
22 *
23 *
24 * All Rights Reserved.
25 *
26 * For minor contributions see the git repository.
27 *
28 * Alternatively, the contents of this file may be used under the terms of
29 * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
30 * in which case the provisions of the LGPLv2+ are applicable
31 * instead of those above.
32 */
33 
34 #include <iomanip>
35 #include <iostream>
36 #include <limits>
37 #include <map>
38 #include <set>
39 #include <sstream>
40 
41 #include <librevenge/librevenge.h>
42 
43 #include "MWAWTextListener.hxx"
44 #include "MWAWFont.hxx"
45 #include "MWAWFontConverter.hxx"
46 #include "MWAWParagraph.hxx"
47 #include "MWAWPictMac.hxx"
48 #include "MWAWPosition.hxx"
49 
50 #include "RagTimeParser.hxx"
51 
52 #include "RagTimeText.hxx"
53 
54 /** Internal: the structures of a RagTimeText */
55 namespace RagTimeTextInternal
56 {
57 //! Internal: a token of a RagTimeText
58 struct Token {
59   //! the token's types
60   enum Type { List, Page, PageCount, PageAfter, Date, Time, Unknown };
61   //! constructor
TokenRagTimeTextInternal::Token62   Token()
63     : m_type(Unknown)
64     , m_listLevel(0)
65     , m_DTFormat("")
66     , m_extra("")
67   {
68     for (auto &id : m_listIndices) id=0;
69   }
70   //! operator<<
operator <<(std::ostream & o,Token const & tkn)71   friend std::ostream &operator<<(std::ostream &o, Token const &tkn)
72   {
73     switch (tkn.m_type) {
74     case Token::List:
75       o << "list[" << tkn.m_listLevel << "]=[";
76       for (int i=0; i<4 && i<tkn.m_listLevel; ++i)
77         o << tkn.m_listIndices[i] << ",";
78       o << "],";
79       break;
80     case Token::Page:
81       o << "page,";
82       break;
83     case Token::PageAfter:
84       o << "page+1,";
85       break;
86     case Token::PageCount:
87       o << "page[num],";
88       break;
89     case Token::Date:
90       o << "date[" << tkn.m_DTFormat << "],";
91       break;
92     case Token::Time:
93       o << "time[" << tkn.m_DTFormat << "],";
94       break;
95     case Token::Unknown:
96 #if !defined(__clang__)
97     default:
98 #endif
99       o << "#type[unkn],";
100       break;
101     }
102     o << tkn.m_extra;
103     return o;
104   }
105   //! returns a field corresponding to the token if possible
getFieldRagTimeTextInternal::Token106   bool getField(MWAWField &field) const
107   {
108     switch (m_type) {
109     case Page:
110       field=MWAWField(MWAWField::PageNumber);
111       break;
112     case PageCount:
113       field=MWAWField(MWAWField::PageCount);
114       break;
115     case Date:
116       field=MWAWField(MWAWField::Date);
117       field.m_DTFormat=m_DTFormat;
118       break;
119     case Time:
120       field=MWAWField(MWAWField::Time);
121       field.m_DTFormat=m_DTFormat;
122       break;
123     case PageAfter:
124     case Unknown:
125     case List:
126 #if !defined(__clang__)
127     default:
128 #endif
129       return false;
130     }
131     return true;
132   }
133   //! returns a string corresponding to the list indices
getIndicesStringRagTimeTextInternal::Token134   bool getIndicesString(std::string &str) const
135   {
136     if (m_type!=List) {
137       MWAW_DEBUG_MSG(("RagTimeTextInternal::Token::getIndicesString: must only be called on list token\n"));
138       return false;
139     }
140     std::stringstream s;
141     for (int i=0; i<4 && i<m_listLevel; ++i) {
142       s << m_listIndices[i];
143       if (i==0 || i+1<m_listLevel) s << ".";
144     }
145     str=s.str();
146     return true;
147   }
148 
149   //! the token type
150   Type m_type;
151   //! the list level(for a list)
152   int m_listLevel;
153   //! the four list indices
154   int m_listIndices[4];
155   //! the date time format
156   std::string m_DTFormat;
157   //! extra data
158   std::string m_extra;
159 };
160 
161 //! Internal: a text's zone of a RagTimeText
162 struct TextZone {
163   //! constructor
TextZoneRagTimeTextInternal::TextZone164   TextZone()
165     : m_textPos()
166     , m_fontPosList()
167     , m_fontList()
168     , m_paragraphPosList()
169     , m_paragraphList()
170     , m_tokenList()
171     , m_isSent(false)
172   {
173   }
174   //! the text zone
175   MWAWEntry m_textPos;
176   //! the beginning of character properties in the text zone
177   std::vector<long> m_fontPosList;
178   //! the list of character's properties
179   std::vector<MWAWFont> m_fontList;
180   //! the beginning of paragraph properties in the text zone
181   std::vector<long> m_paragraphPosList;
182   //! the list of paragraph's properties
183   std::vector<MWAWParagraph> m_paragraphList;
184   //! the list of tokens
185   std::vector<Token> m_tokenList;
186   //! true if the zone is sent to the listener
187   mutable bool m_isSent;
188 };
189 
190 ////////////////////////////////////////
191 //! Internal: the state of a RagTimeText
192 struct State {
193   //! constructor
StateRagTimeTextInternal::State194   State()
195     : m_version(-1)
196     , m_localFIdMap()
197     , m_charPropList()
198     , m_idTextMap()
199   {
200   }
201 
202   //! return a mac font id corresponding to a local id
getFontIdRagTimeTextInternal::State203   int getFontId(int localId) const
204   {
205     if (m_localFIdMap.find(localId)==m_localFIdMap.end())
206       return localId;
207     return m_localFIdMap.find(localId)->second;
208   }
209 
210   //! the file version
211   mutable int m_version;
212   //! a map local fontId->fontId
213   std::map<int, int> m_localFIdMap;
214   //! the character properties
215   std::vector<MWAWFont> m_charPropList;
216   //! a map entry id to text zone
217   std::map<int, std::shared_ptr<TextZone> > m_idTextMap;
218 };
219 
220 }
221 
222 ////////////////////////////////////////////////////////////
223 // constructor/destructor, ...
224 ////////////////////////////////////////////////////////////
RagTimeText(RagTimeParser & parser)225 RagTimeText::RagTimeText(RagTimeParser &parser)
226   : m_parserState(parser.getParserState())
227   , m_state(new RagTimeTextInternal::State)
228   , m_mainParser(&parser)
229 {
230 }
231 
~RagTimeText()232 RagTimeText::~RagTimeText()
233 { }
234 
version() const235 int RagTimeText::version() const
236 {
237   if (m_state->m_version < 0)
238     m_state->m_version = m_parserState->m_version;
239   return m_state->m_version;
240 }
241 
getFontId(int localId) const242 int RagTimeText::getFontId(int localId) const
243 {
244   return m_state->getFontId(localId);
245 }
246 
getCharStyle(int charId,MWAWFont & font) const247 bool RagTimeText::getCharStyle(int charId, MWAWFont &font) const
248 {
249   if (charId<0 || charId>=int(m_state->m_charPropList.size())) {
250     MWAW_DEBUG_MSG(("RagTimeText::readFontNames: can not find char style %d\n", charId));
251     return false;
252   }
253   font=m_state->m_charPropList[size_t(charId)];
254   return true;
255 }
256 
257 ////////////////////////////////////////////////////////////
258 // rsrc zone: fonts/character properties
259 ////////////////////////////////////////////////////////////
readFontNames(MWAWEntry & entry)260 bool RagTimeText::readFontNames(MWAWEntry &entry)
261 {
262   MWAWInputStreamPtr input = m_parserState->m_input;
263   long pos=entry.begin();
264   if (pos<=0 || !input->checkPosition(pos+2+0x26)) {
265     MWAW_DEBUG_MSG(("RagTimeText::readFontNames: the position seems bad\n"));
266     return false;
267   }
268   entry.setParsed(true);
269   input->seek(pos, librevenge::RVNG_SEEK_SET);
270   libmwaw::DebugFile &ascFile=m_parserState->m_asciiFile;
271   libmwaw::DebugStream f;
272   f << "Entries(" << entry.type() << ")[" << entry.id() << "]:";
273   auto dSz=static_cast<int>(input->readULong(2));
274   long endPos=pos+2+dSz;
275   auto headerSz=static_cast<int>(input->readULong(2));
276   auto fSz=static_cast<int>(input->readULong(2));
277   auto N=static_cast<int>(input->readULong(2));
278   f << "N=" << N << ",";
279   if (headerSz<0x20 || fSz<0x10 || dSz<headerSz+long(N+1)*fSz || !input->checkPosition(endPos)) {
280     MWAW_DEBUG_MSG(("RagTimeText::readFontNames: the size seems bad\n"));
281     f << "###";
282     ascFile.addPos(pos);
283     ascFile.addNote(f.str().c_str());
284     return false;
285   }
286   input->seek(pos+2+headerSz, librevenge::RVNG_SEEK_SET);
287   ascFile.addPos(pos);
288   ascFile.addNote(f.str().c_str());
289 
290   std::set<long> posSet;
291   std::map<int, long> fontIdPosMap;
292   posSet.insert(endPos);
293   for (int i=0; i<=N; ++i) {
294     pos=input->tell();
295     f.str("");
296     f << entry.type() << "-" << i << ":";
297     auto val=static_cast<int>(input->readLong(2)); // small number
298     if (val) f << "f0=" << val << ",";
299     val=static_cast<int>(input->readLong(2)); // always 0?
300     if (val) f << "f1=" << val << ",";
301     auto fPos=static_cast<int>(input->readULong(2));
302     f << "pos[name]=" << std::hex << entry.begin()+2+fPos << std::dec << ",";
303     posSet.insert(entry.begin()+2+fPos);
304     auto fId=static_cast<int>(input->readLong(2));
305     if (fId) f << "fId=" << fId << ",";
306     fontIdPosMap[i]=entry.begin()+2+fPos;
307     ascFile.addDelimiter(input->tell(),'|');
308     input->seek(pos+fSz, librevenge::RVNG_SEEK_SET);
309     ascFile.addPos(pos);
310     ascFile.addNote(f.str().c_str());
311   }
312 
313   for (auto it : fontIdPosMap) {
314     pos=it.second;
315     int fId=it.first;
316     if (pos>=endPos) continue;
317     f.str("");
318     f << entry.type() << "[name]:id=" << fId << ",";
319     auto pIt=posSet.find(pos);
320     if (pIt==posSet.end()|| ++pIt==posSet.end()) {
321       MWAW_DEBUG_MSG(("RagTimeText::readFontNames: can not find the end name position\n"));
322       f << "###";
323       ascFile.addPos(pos);
324       ascFile.addNote(f.str().c_str());
325       continue;
326     }
327     input->seek(pos, librevenge::RVNG_SEEK_SET);
328     std::string name("");
329     long nextPos=*pIt;
330     while (!input->isEnd() && input->tell()<nextPos) {
331       auto c=char(input->readULong(1));
332       if (c=='\0') break;
333       name+=c;
334     }
335     f << name;
336     // ok, let update the conversion map
337     m_state->m_localFIdMap[fId]=m_parserState->m_fontConverter->getId(name);
338     ascFile.addPos(pos);
339     ascFile.addNote(f.str().c_str());
340   }
341   return true;
342 }
343 
readCharProperties(MWAWEntry & entry)344 bool RagTimeText::readCharProperties(MWAWEntry &entry)
345 {
346   MWAWInputStreamPtr input = m_parserState->m_input;
347   long pos=entry.begin();
348   if (pos<=0 || !input->checkPosition(pos+2+0x26)) {
349     MWAW_DEBUG_MSG(("RagTimeText::readCharProperties: the position seems bad\n"));
350     return false;
351   }
352   entry.setParsed(true);
353   input->seek(pos, librevenge::RVNG_SEEK_SET);
354   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
355   libmwaw::DebugStream f;
356   f << "Entries(CharProp)[" << entry.id() << "]:";
357   auto dSz=static_cast<int>(input->readULong(2));
358   long endPos=pos+2+dSz;
359   auto headerSz=static_cast<int>(input->readULong(2));
360   auto fSz=static_cast<int>(input->readULong(2));
361   auto N=static_cast<int>(input->readULong(2));
362   f << "N=" << N << ",";
363   if (headerSz<0x2c || fSz<42 || dSz!=headerSz+long(N+1)*fSz || !input->checkPosition(endPos)) {
364     MWAW_DEBUG_MSG(("RagTimeText::readCharProperties: the data size seems bad\n"));
365     f << "###";
366     ascFile.addPos(pos);
367     ascFile.addNote(f.str().c_str());
368     return false;
369   }
370   if (fSz>42) {
371     MWAW_DEBUG_MSG(("RagTimeText::readCharProperties: the data size seems odds\n"));
372     f << "###";
373   }
374   input->seek(pos+2+headerSz, librevenge::RVNG_SEEK_SET);
375   ascFile.addPos(pos);
376   ascFile.addNote(f.str().c_str());
377 
378   for (int i=0; i<=N; ++i) {
379     pos=input->tell();
380     if (i==N) {
381       input->seek(pos+fSz, librevenge::RVNG_SEEK_SET);
382       ascFile.addPos(pos);
383       ascFile.addNote("CharProp[end]:");
384       break;
385     }
386     f.str("");
387     f << "CharProp-S" << i << ":";
388 
389 
390     auto val=static_cast<int>(input->readLong(2)); // always 0 or a small negative number
391     if (val) f << "f0=" << val;
392     val=static_cast<int>(input->readLong(2));
393     if (val) f << "used=" << val << ",";
394 
395     MWAWFont font;
396     font.setId(getFontId(static_cast<int>(input->readULong(2))-1));
397     auto size= static_cast<int>(input->readULong(2));
398     if (size>1000) {
399       MWAW_DEBUG_MSG(("RagTimeText::readCharProperties: the font size seems bad\n"));
400       f << "###sz=" << size << ",";
401     }
402     font.setSize(float(size));
403     val=static_cast<int>(input->readLong(2)); // always 0?
404     if (val) f << "f1=" << val;
405 
406     auto flag = static_cast<int>(input->readULong(2));
407     uint32_t flags=0;
408     if (flag&0x1) flags |= MWAWFont::boldBit;
409     if (flag&0x2) flags |= MWAWFont::italicBit;
410     if (flag&0x4) font.setUnderlineStyle(MWAWFont::Line::Simple);
411     if (flag&0x8) flags |= MWAWFont::embossBit;
412     if (flag&0x10) flags |= MWAWFont::shadowBit;
413     if (flag&0x20) font.setDeltaLetterSpacing(-1);
414     if (flag&0x40) font.setDeltaLetterSpacing(1);
415     if (flag&0x80) font.set(MWAWFont::Script::super100());
416     if (flag&0x100) font.set(MWAWFont::Script::sub100());
417     font.setFlags(flags);
418     // checkme: does the following contains interesting data ?
419     ascFile.addDelimiter(input->tell(), '|');
420     f << font.getDebugString(m_parserState->m_fontConverter);
421     m_state->m_charPropList.push_back(font);
422     input->seek(pos+fSz, librevenge::RVNG_SEEK_SET);
423     ascFile.addPos(pos);
424     ascFile.addNote(f.str().c_str());
425   }
426   return true;
427 }
428 
429 ////////////////////////////////////////////////////////////
430 // Intermediate level
431 ////////////////////////////////////////////////////////////
432 
433 ////////////////////////////////////////////////////////////
434 // read a zone of text
435 ////////////////////////////////////////////////////////////
readTextZone(MWAWEntry & entry,int width,MWAWColor const & color)436 bool RagTimeText::readTextZone(MWAWEntry &entry, int width, MWAWColor const &color)
437 {
438   MWAWInputStreamPtr input = m_parserState->m_input;
439   int const vers=version();
440   int dataFieldSize=(vers==1||entry.valid()) ? 2 : m_mainParser->getZoneDataFieldSize(entry.id());
441   long pos=entry.begin();
442   if (pos<=0 || !input->checkPosition(pos+5+dataFieldSize+2+6)) {
443     MWAW_DEBUG_MSG(("RagTimeText::readTextZone: the position seems bad\n"));
444     return false;
445   }
446   entry.setParsed(true);
447   input->seek(pos, librevenge::RVNG_SEEK_SET);
448   libmwaw::DebugFile &ascFile=m_parserState->m_asciiFile;
449   libmwaw::DebugStream f;
450   f << "Entries(TextZone):";
451   long endPos=entry.end();
452   if (!entry.valid()) {
453     auto dSz=static_cast<int>(input->readULong(dataFieldSize));
454     endPos=pos+dataFieldSize+dSz;
455   }
456   long begTextZonePos=input->tell();
457   auto numChar=static_cast<int>(input->readULong(2));
458   f << "N=" << numChar << ",";
459   if (!input->checkPosition(endPos) || begTextZonePos+numChar>endPos) {
460     MWAW_DEBUG_MSG(("RagTimeText::readTextZone: the numChar seems bad\n"));
461     f << "###";
462     ascFile.addPos(pos);
463     ascFile.addNote(f.str().c_str());
464     return false;
465   }
466   ascFile.addPos(pos);
467   ascFile.addNote(f.str().c_str());
468   ascFile.addPos(endPos);
469   ascFile.addNote("_");
470 
471   std::shared_ptr<RagTimeTextInternal::TextZone> zone(new RagTimeTextInternal::TextZone);
472   pos = input->tell();
473   zone->m_textPos.setBegin(pos);
474   zone->m_textPos.setLength(numChar);
475   if (vers>=2 && (numChar%2)==1)
476     ++numChar;
477   input->seek(pos+numChar, librevenge::RVNG_SEEK_SET);
478 
479   if (!readFonts(*zone, color, endPos))
480     return false;
481 
482   if (m_state->m_idTextMap.find(entry.id())!=m_state->m_idTextMap.end()) {
483     MWAW_DEBUG_MSG(("RagTimeText::readTextZone: a zone with id=%d already exists\n", entry.id()));
484   }
485   else
486     m_state->m_idTextMap[entry.id()]=zone;
487   if (input->tell()==endPos)
488     return true;
489 
490   if (!readParagraphs(*zone, width, endPos))
491     return false;
492   pos=input->tell();
493   if (vers==1) {
494     if (pos!=endPos) {
495       MWAW_DEBUG_MSG(("RagTimeText::readTextZone: find some extra data\n"));
496       ascFile.addPos(pos);
497       ascFile.addNote("TextZone[end]:###");
498     }
499     return true;
500   }
501   // checkme: can this size be a uint32 ?
502   auto dSz=static_cast<int>(input->readULong(2));
503   f.str("");
504   f << "TextZone[A]:";
505   if (pos+2+dSz>endPos) {
506     MWAW_DEBUG_MSG(("RagTimeText::readTextZone: the zoneA size seems bad\n"));
507     f << "###";
508     ascFile.addPos(pos);
509     ascFile.addNote(f.str().c_str());
510     return true;
511   }
512   if (dSz==0) {
513     ascFile.addPos(pos);
514     ascFile.addNote("_");
515   }
516   else {
517     // never seems
518     MWAW_DEBUG_MSG(("RagTimeText::readTextZone: find a zoneA zone!!!\n"));
519     f << "#";
520     ascFile.addPos(pos);
521     ascFile.addNote(f.str().c_str());
522     input->seek(pos+2+dSz, librevenge::RVNG_SEEK_SET);
523   }
524 
525   // now the token?
526   if (!readTokens(*zone, endPos))
527     return true;
528 
529   if (input->tell()!=endPos) {
530     MWAW_DEBUG_MSG(("RagTimeText::readTextZone: find extra data\n"));
531     ascFile.addPos(pos);
532     ascFile.addNote("TextZone[extra]:###");
533   }
534   return true;
535 }
536 
readFonts(RagTimeTextInternal::TextZone & zone,MWAWColor const & color,long endPos)537 bool RagTimeText::readFonts(RagTimeTextInternal::TextZone &zone, MWAWColor const &color, long endPos)
538 {
539   MWAWInputStreamPtr input = m_parserState->m_input;
540   int const vers=version();
541   long pos=input->tell();
542 
543   libmwaw::DebugFile &ascFile=m_parserState->m_asciiFile;
544   libmwaw::DebugStream f;
545   f << "Entries(TextChar):";
546   auto N=static_cast<int>(input->readULong(2));
547   f << "N=" << N << ",";
548   int const fSz=vers>=2 ? 10:8;
549   if (pos+2+fSz*N>endPos+2+4) {
550     MWAW_DEBUG_MSG(("RagTimeText::readFonts: the number of styles seems bad\n"));
551     f << "###";
552     ascFile.addPos(pos);
553     ascFile.addNote(f.str().c_str());
554     return false;
555   }
556   ascFile.addPos(pos);
557   ascFile.addNote(f.str().c_str());
558 
559   for (int i=0; i < N; ++i) {
560     pos = input->tell();
561     f.str("");
562     f << "TextChar-C" << i << ":";
563     auto textPos=long(input->readULong(2));
564     f << "pos=" << textPos << ",";
565     MWAWFont font;
566     if (vers <=1)  {
567       font.setColor(color);
568       auto size= static_cast<int>(input->readULong(1));
569       auto flag = static_cast<int>(input->readULong(1));
570       uint32_t flags=0;
571       if (flag&0x1) flags |= MWAWFont::boldBit;
572       if (flag&0x2) flags |= MWAWFont::italicBit;
573       if (flag&0x4) font.setUnderlineStyle(MWAWFont::Line::Simple);
574       if (flag&0x8) flags |= MWAWFont::embossBit;
575       if (flag&0x10) flags |= MWAWFont::shadowBit;
576       if (flag&0x20) font.setDeltaLetterSpacing(-1);
577       if (flag&0x40) font.setDeltaLetterSpacing(1);
578       if (flag&0x80) font.set(MWAWFont::Script::super100());
579       if (size&0x80) {
580         font.set(MWAWFont::Script::sub100());
581         size&=0x7f;
582       }
583       font.setSize(float(size));
584       font.setFlags(flags);
585       font.setId(getFontId(static_cast<int>(input->readULong(2))));
586       auto val=static_cast<int>(input->readLong(1));
587       if (val) font.setDeltaLetterSpacing(-float(val)/16.0f);
588       val=static_cast<int>(input->readLong(1));
589       if (val) font.set(MWAWFont::Script(-float(val),librevenge::RVNG_POINT));
590     }
591     else {
592       int id=static_cast<int>(input->readULong(2))-1;
593       if (id<0 || id>=static_cast<int>(m_state->m_charPropList.size())) {
594         MWAW_DEBUG_MSG(("RagTimeText::readFonts: the character id seems bad\n"));
595         f << "###";
596       }
597       else
598         font=m_state->m_charPropList[size_t(id)];
599       f << "S" << id << ",";
600       auto val=static_cast<int>(input->readLong(1));
601       if (val) font.setDeltaLetterSpacing(float(val));
602       val=static_cast<int>(input->readLong(1));
603       if (val) f << "f0=" << val << ",";
604       val=static_cast<int>(input->readLong(1));
605       if (val) font.set(MWAWFont::Script(-float(val),librevenge::RVNG_POINT));
606       val=static_cast<int>(input->readULong(1));
607       switch (val) {
608       case 0:
609         font.setLanguage("en_US");
610         break;
611       case 1:
612         font.setLanguage("fr_FR");
613         break;
614       case 2:
615         font.setLanguage("en_UK");
616         break;
617       case 3:
618         font.setLanguage("de_DE");
619         break;
620       case 4:
621         font.setLanguage("it_IT");
622         break;
623       case 5:
624         font.setLanguage("nl_NL");
625         break;
626       case 7:
627         font.setLanguage("sv_SE");
628         break;
629       case 8:
630         font.setLanguage("es_ES");
631         break;
632       case 9:
633         font.setLanguage("da_DK");
634         break;
635       case 10:
636         font.setLanguage("pt_PT");
637         break;
638       case 12:
639         font.setLanguage("nb_NO");
640         break;
641       case 19:
642         font.setLanguage("de_CH");
643         break;
644       case 20:
645         font.setLanguage("el_GR");
646         break;
647       case 24:
648         font.setLanguage("tr_TR");
649         break;
650       case 25:
651         font.setLanguage("hr_HR");
652         break;
653       case 49:
654         font.setLanguage("ru_RU");
655         break;
656       case 0xF4:
657         font.setLanguage("nn_NO");
658         break;
659       default: {
660         static bool first = true;
661         if (first) {
662           first=false;
663           MWAW_DEBUG_MSG(("RagTimeText::readFonts: find some unknown language\n"));
664         }
665         f << "#lang=" << val << ",";
666         break;
667       }
668       }
669       val=static_cast<int>(input->readULong(2))-1;
670       MWAWColor col;
671       if (val && m_mainParser->getColor(val, col))
672         font.setColor(col);
673       else if (val)
674         f << "#col=" << val << ",";
675     }
676     zone.m_fontPosList.push_back(textPos);
677     zone.m_fontList.push_back(font);
678     f << font.getDebugString(m_parserState->m_fontConverter);
679 
680     input->seek(pos+fSz, librevenge::RVNG_SEEK_SET);
681     ascFile.addPos(pos);
682     ascFile.addNote(f.str().c_str());
683   }
684   return true;
685 }
686 
readParagraphs(RagTimeTextInternal::TextZone & zone,int width,long endPos)687 bool RagTimeText::readParagraphs(RagTimeTextInternal::TextZone &zone, int width, long endPos)
688 {
689   MWAWInputStreamPtr input = m_parserState->m_input;
690   int const vers=version();
691   long pos=input->tell();
692 
693   libmwaw::DebugFile &ascFile=m_parserState->m_asciiFile;
694   libmwaw::DebugStream f;
695 
696   f << "Entries(TextPara):";
697   auto N=static_cast<int>(input->readULong(2));
698   f << "N=" << N << ",";
699   int const paraSz=vers>=2 ? 48 : 34;
700   if (pos+2+paraSz*N>endPos) {
701     MWAW_DEBUG_MSG(("RagTimeText::readParagraphs: the number of paragrphs seems bad\n"));
702     f << "###";
703     ascFile.addPos(pos);
704     ascFile.addNote(f.str().c_str());
705     return false;
706   }
707   ascFile.addPos(pos);
708   ascFile.addNote(f.str().c_str());
709   int maxNumTabs=vers==1 ? 10 : 16;
710   for (int i=0; i < N; ++i) {
711     pos = input->tell();
712     f.str("");
713     f << "TextPara-P" << i << ":";
714     auto textPos=long(input->readULong(2));
715     f << "pos=" << textPos << ",";
716     MWAWParagraph para;
717     para.m_marginsUnit=librevenge::RVNG_POINT;
718     // add a default border to mimick frame distance to text
719     double const borderSize=4;
720     para.m_margins[1]=borderSize+double(input->readLong(2));
721     para.m_margins[2]=double(width-static_cast<int>(input->readULong(2)))-2*borderSize;
722     if (*para.m_margins[2]<-borderSize) {
723       if (*para.m_margins[2]<-borderSize*2) {
724         MWAW_DEBUG_MSG(("RagTimeText::readParagraphs: the right margins seems bad\n"));
725         f << "##";
726       }
727       f << "margins[right]=" << *para.m_margins[2] << ",";
728       para.m_margins[2]=0;
729     }
730     auto align=static_cast<int>(input->readULong(1));
731     switch (align) {
732     case 0: // left
733       break;
734     case 1:
735       para.m_justify = MWAWParagraph::JustificationCenter;
736       break;
737     case 2:
738       para.m_justify = MWAWParagraph::JustificationRight;
739       break;
740     case 3: // in pratical, look like basic left justification
741       f << "justify,";
742       break;
743     default:
744       MWAW_DEBUG_MSG(("RagTimeText::readParagraphs: find unknown align value\n"));
745       f << "###align=" << align << ",";
746       break;
747     }
748     auto numTabs=static_cast<int>(input->readULong(1));
749     if (numTabs>maxNumTabs) {
750       MWAW_DEBUG_MSG(("RagTimeText::readParagraphs: the number of tabs seems odd\n"));
751       f << "###tabs[num]=" << numTabs << ",";
752       numTabs=0;
753     }
754     auto interlinePoint=static_cast<int>(input->readLong(1));
755     auto interline=static_cast<int>(input->readULong(1));
756     if (interline & 0xF8)
757       f << "interline[high]=" << std::hex << (interline & 0xFC) << std::dec << ",";
758     interline &= 0x7;
759     switch (interline) {
760     case 0:
761     case 1:
762     case 2:
763       para.setInterline(1.+interline*0.5, librevenge::RVNG_PERCENT);
764       break;
765     case 3: // 1line +/- nbPt
766       para.setInterline(1.+interlinePoint/12., librevenge::RVNG_PERCENT, MWAWParagraph::AtLeast);
767       break;
768     case 4:
769       para.setInterline(interlinePoint, librevenge::RVNG_POINT);
770       break;
771     default:
772       MWAW_DEBUG_MSG(("RagTimeText::readParagraphs: unknown interline type\n"));
773       f << "#interline=" << interline << ",";
774       break;
775     }
776 
777     para.m_margins[0]=double(input->readLong(2))-*para.m_margins[1];
778     for (int j=0; j<numTabs; ++j) {
779       auto tabPos=static_cast<int>(input->readLong(2));
780       MWAWTabStop tab;
781       if (tabPos<0) {
782         tab.m_alignment=MWAWTabStop::DECIMAL;
783         tabPos*=-1;
784       }
785       else if (tabPos&0x4000) {
786         tab.m_alignment=MWAWTabStop::CENTER;
787         tabPos &= 0x1FFF;
788       }
789       else if (tabPos&0x2000) {
790         tab.m_alignment=MWAWTabStop::RIGHT;
791         tabPos &= 0x1FFF;
792       }
793       tab.m_position=double(tabPos)/72.;
794       para.m_tabs->push_back(tab);
795     }
796     input->seek(pos+12+2*maxNumTabs, librevenge::RVNG_SEEK_SET);
797     auto prev=static_cast<int>(input->readULong(1));
798     auto next=static_cast<int>(input->readULong(1));
799     int wh=0;
800     if (prev&0x80) {
801       wh=2;
802       prev&=0x7f;
803     }
804     if (next&0x80) {
805       wh|=1;
806       next&=0x7f;
807     }
808     switch (wh) {
809     default:
810     case 0: // normal;
811       break;
812     case 1: // +0.5 interline
813     case 2: // +1 interline
814       para.m_spacings[1]=(wh-1)*0.5*12./72.;
815       break;
816     case 3:
817       para.m_spacings[1]=prev/72.;
818       para.m_spacings[2]=next/72.;
819       break;
820     }
821     if (vers>=2) {
822       auto tabSep=char(input->readULong(1));
823       if (tabSep!='.') // fixme: we need to update the decimal tab
824         f << "tab[sep]=" << tabSep << ",";
825       auto val=static_cast<int>(input->readLong(1)); // always 0?
826       if (val) f << "g0=" << val << ",";
827     }
828     f << para;
829     zone.m_paragraphPosList.push_back(textPos);
830     zone.m_paragraphList.push_back(para);
831 
832     input->seek(pos+paraSz, librevenge::RVNG_SEEK_SET);
833     ascFile.addPos(pos);
834     ascFile.addNote(f.str().c_str());
835   }
836   return true;
837 }
838 
839 
readTokens(RagTimeTextInternal::TextZone & zone,long endPos)840 bool RagTimeText::readTokens(RagTimeTextInternal::TextZone &zone, long endPos)
841 {
842   MWAWInputStreamPtr input = m_parserState->m_input;
843   int const vers=version();
844   long pos=input->tell();
845   libmwaw::DebugFile &ascFile=m_parserState->m_asciiFile;
846   libmwaw::DebugStream f;
847   f << "Entries(TextToken):";
848   auto dSz=static_cast<int>(input->readULong(2));
849   if (vers <= 1 || pos+2+dSz>endPos) {
850     MWAW_DEBUG_MSG(("RagTimeText::readTokens: the tokens size seems bad (or unexpected version)\n"));
851     f << "###";
852     ascFile.addPos(pos);
853     ascFile.addNote(f.str().c_str());
854     return false;
855   }
856   endPos=pos+2+dSz;
857   if (dSz==0) {
858     ascFile.addPos(pos);
859     ascFile.addNote("_");
860     return true;
861   }
862   ascFile.addPos(pos);
863   ascFile.addNote(f.str().c_str());
864 
865   int n=0;
866   while (!input->isEnd()) {
867     pos=input->tell();
868     if (pos>=endPos) break;
869     f.str("");
870     dSz=static_cast<int>(input->readULong(2));
871     long fEndPos=pos+dSz;
872     if (dSz<3 || fEndPos>endPos) {
873       MWAW_DEBUG_MSG(("RagTimeText::readTokens: the token zone size seems bad\n"));
874       f << "###TextToken";
875       input->seek(endPos, librevenge::RVNG_SEEK_SET);
876       ascFile.addPos(pos);
877       ascFile.addNote(f.str().c_str());
878       return true;
879     }
880     int val;
881     RagTimeTextInternal::Token token;
882     if (dSz==4) {
883       val=static_cast<int>(input->readLong(1));
884       if (val!=1) f << "f0=" << 1 << ",";
885       val=static_cast<int>(input->readLong(1));
886       switch (val) {
887       case 0x2c:
888         token.m_type=RagTimeTextInternal::Token::Page;
889         break;
890       case 0x2d:
891         token.m_type=RagTimeTextInternal::Token::PageAfter;
892         break;
893       case 0x2e:
894         token.m_type=RagTimeTextInternal::Token::PageCount;
895         break;
896       default:
897         MWAW_DEBUG_MSG(("RagTimeText::readTokens: find unknown field\n"));
898         f << "#f1=" << val << ",";
899       }
900     }
901     else if (dSz==6) {
902       val=static_cast<int>(input->readLong(2));
903       if (val!=100) f << "f0=" << val << ",";
904       int format=static_cast<int>(input->readLong(2))-1;
905       // using default, fixme: use file format type here,
906       token.m_type=(format==4 || format==5) ? RagTimeTextInternal::Token::Time :
907                    RagTimeTextInternal::Token::Date;
908       if (!m_mainParser->getDateTimeFormat(format, token.m_DTFormat))
909         f << "#";
910       f << "F" << format << ",";
911     }
912     else if (dSz>14) {
913       f << "id?=" << input->readLong(2) << ",";
914       token.m_type=RagTimeTextInternal::Token::List;
915       token.m_listLevel=0;
916       for (int i=0; i< 4; ++i) { // small number
917         token.m_listIndices[i]=static_cast<int>(input->readLong(2));
918         if (token.m_listIndices[i])
919           token.m_listLevel=i+1;
920       }
921       auto sSz=static_cast<int>(input->readULong(1));
922       if (sSz+13>dSz) {
923         MWAW_DEBUG_MSG(("RagTimeText::readTokens: can not find the item format name\n"));
924         f << "###";
925       }
926       else {
927         std::string text("");
928         for (int i=0; i<sSz; ++i)
929           text+=char(input->readULong(1));
930         f << "\"" << text << "\",";
931       }
932       // in 3.2 the size field seems constant
933       if (input->tell()!=fEndPos)
934         ascFile.addDelimiter(input->tell(),'|');
935     }
936     else {
937       MWAW_DEBUG_MSG(("RagTimeText::readTokens: can not determine the token type\n"));
938       f << "###";
939     }
940     token.m_extra=f.str();
941     zone.m_tokenList.push_back(token);
942     f.str("");
943     f << "TextToken-" << n++ << ":" << token;
944     input->seek(fEndPos, librevenge::RVNG_SEEK_SET);
945     ascFile.addPos(pos);
946     ascFile.addNote(f.str().c_str());
947   }
948   return true;
949 }
950 
951 ////////////////////////////////////////////////////////////
952 //
953 // Low level
954 //
955 ////////////////////////////////////////////////////////////
956 
957 ////////////////////////////////////////////////////////////
958 // send data to the listener
send(int zId,MWAWListenerPtr listener)959 bool RagTimeText::send(int zId, MWAWListenerPtr listener)
960 {
961   if (m_state->m_idTextMap.find(zId)==m_state->m_idTextMap.end() ||
962       !m_state->m_idTextMap.find(zId)->second) {
963     MWAW_DEBUG_MSG(("RagTimeText::send: can not find the text zone %d\n", zId));
964     return false;
965   }
966   return send(*m_state->m_idTextMap.find(zId)->second, listener);
967 }
968 
send(RagTimeTextInternal::TextZone const & zone,MWAWListenerPtr listener)969 bool RagTimeText::send(RagTimeTextInternal::TextZone const &zone, MWAWListenerPtr listener)
970 {
971   if (!listener)
972     listener=m_parserState->getMainListener();
973   if (!listener) {
974     MWAW_DEBUG_MSG(("RagTimeText::send: can not find the listener\n"));
975     return false;
976   }
977   zone.m_isSent=true;
978   MWAWEntry entry=zone.m_textPos;
979   if (!entry.valid()) {
980     MWAW_DEBUG_MSG(("RagTimeText::send: the text zone is empty\n"));
981     return false;
982   }
983 
984   MWAWInputStreamPtr input = m_parserState->m_input;
985   int const vers=version();
986   libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
987   libmwaw::DebugStream f;
988   long pos=entry.begin(), lPos=pos;
989   input->seek(pos, librevenge::RVNG_SEEK_SET);
990   size_t actFont=0, numFont=zone.m_fontPosList.size();
991   if (numFont>zone.m_fontList.size()) numFont=zone.m_fontList.size();
992   size_t actPara=0, numPara=zone.m_paragraphPosList.size();
993   if (numPara>zone.m_paragraphList.size()) numPara=zone.m_paragraphList.size();
994 
995   f << "TextZone:";
996   int actToken=0;
997   for (long tPos=0; tPos<entry.length(); ++tPos, ++pos) {
998     if (input->isEnd()) {
999       MWAW_DEBUG_MSG(("RagTimeText::send: oops, find end of file\n"));
1000       break;
1001     }
1002     if (actPara<numPara && zone.m_paragraphPosList[actPara]==tPos) {
1003       if (pos!=lPos) {
1004         ascFile.addPos(lPos);
1005         ascFile.addNote(f.str().c_str());
1006         lPos=pos;
1007         f.str("");
1008         f << "TextZone:";
1009       }
1010       f << "[P" << actPara << "]";
1011       listener->setParagraph(zone.m_paragraphList[actPara++]);
1012     }
1013     if (actFont<numFont && zone.m_fontPosList[actFont]==tPos) {
1014       if (pos!=lPos) {
1015         ascFile.addPos(lPos);
1016         ascFile.addNote(f.str().c_str());
1017         lPos=pos;
1018         f.str("");
1019         f << "TextZone:";
1020       }
1021       f << "[C" << actFont << "]";
1022       listener->setFont(zone.m_fontList[actFont++]);
1023     }
1024     auto c = static_cast<unsigned char>(input->readULong(1));
1025     switch (c) {
1026     case 0: // at the beginning of a zone of text: related to section?
1027       break;
1028     case 1: {
1029       if (vers>=2) {
1030         if (actToken>=int(zone.m_tokenList.size())) {
1031           MWAW_DEBUG_MSG(("RagTimeText::send: can not find token %d\n", actToken));
1032           f << "[#token]";
1033           break;
1034         }
1035         auto const &token=zone.m_tokenList[size_t(actToken++)];
1036         f << "[" << token << "]";
1037         MWAWField field(MWAWField::None);
1038         if (token.getField(field))
1039           listener->insertField(field);
1040         else if (token.m_type==RagTimeTextInternal::Token::PageAfter)
1041           listener->insertUnicodeString(librevenge::RVNGString("#P+1#"));
1042         else if (token.m_type==RagTimeTextInternal::Token::List) {
1043           std::string indices;
1044           if (token.getIndicesString(indices))
1045             listener->insertUnicodeString(librevenge::RVNGString(indices.c_str()));
1046         }
1047         else {
1048           MWAW_DEBUG_MSG(("RagTimeText::send: does not know how to send a token\n"));
1049           f << "##";
1050         }
1051         break;
1052       }
1053       f << "[date]";
1054       MWAWField date(MWAWField::Date);
1055       date.m_DTFormat = "%d/%m/%y";
1056       listener->insertField(date);
1057       break;
1058     }
1059     case 2: {
1060       if (vers>=2) {
1061         MWAW_DEBUG_MSG(("RagTimeText::send:  find unexpected char 2\n"));
1062         f << "[#2]";
1063         break;
1064       }
1065       f << "[time]";
1066       MWAWField time(MWAWField::Time);
1067       time.m_DTFormat="%H:%M";
1068       listener->insertField(time);
1069       break;
1070     }
1071     case 3:
1072       if (vers>=2) {
1073         MWAW_DEBUG_MSG(("RagTimeText::send:  find unexpected char 3\n"));
1074         f << "[#3]";
1075         break;
1076       }
1077       f << "[page]";
1078       listener->insertField(MWAWField(MWAWField::PageNumber));
1079       break;
1080     case 4:
1081       if (vers>=2) {
1082         MWAW_DEBUG_MSG(("RagTimeText::send:  find unexpected char 4\n"));
1083         f << "[#4]";
1084         break;
1085       }
1086       f << "[page+1]";
1087       listener->insertUnicodeString(librevenge::RVNGString("#P+1#"));
1088       break;
1089     case 5:
1090       if (vers>=2) {
1091         MWAW_DEBUG_MSG(("RagTimeText::send:  find unexpected char 5\n"));
1092         f << "[#5]";
1093         break;
1094       }
1095       f << "[section]";
1096       listener->insertUnicodeString(librevenge::RVNGString("#S#"));
1097       break;
1098     case 6: // ok, must be the end of the zone
1099       if (vers>=2) {
1100         MWAW_DEBUG_MSG(("RagTimeText::send:  find unexpected char 6\n"));
1101         f << "[#6]";
1102         break;
1103       }
1104       f << "[pagebreak]";
1105       break;
1106     case 9:
1107       listener->insertTab();
1108       f << c;
1109       break;
1110     case 0xb:
1111     case 0xd:
1112       listener->insertEOL(c==0xb);
1113       ascFile.addPos(lPos);
1114       ascFile.addNote(f.str().c_str());
1115       lPos=pos+1;
1116       f.str("");
1117       f << "TextZone:";
1118       break;
1119     case 0x1f: // soft hyphen
1120       break;
1121     default:
1122       if (c<=0x1f) {
1123         MWAW_DEBUG_MSG(("RagTimeText::send:  find an odd char %x\n", static_cast<unsigned int>(c)));
1124         f << "[#" << std::hex << int(c) << std::dec << "]";
1125         break;
1126       }
1127       listener->insertCharacter(c);
1128       f << c;
1129       break;
1130     }
1131 
1132   }
1133 
1134   if (lPos!=entry.end()) {
1135     ascFile.addPos(lPos);
1136     ascFile.addNote(f.str().c_str());
1137   }
1138   return true;
1139 }
1140 
flushExtra()1141 void RagTimeText::flushExtra()
1142 {
1143   MWAWListenerPtr listener=m_parserState->getMainListener();
1144   if (!listener) {
1145     MWAW_DEBUG_MSG(("RagTimeText::flushExtra: can not find the listener\n"));
1146     return;
1147   }
1148   for (auto it : m_state->m_idTextMap) {
1149     if (!it.second) continue;
1150     RagTimeTextInternal::TextZone const &zone=*it.second;
1151     if (zone.m_isSent) continue;
1152     static bool first=true;
1153     if (first) {
1154       MWAW_DEBUG_MSG(("RagTimeText::flushExtra: find some unsend zone\n"));
1155       first=false;
1156     }
1157     send(zone, listener);
1158     listener->insertEOL();
1159   }
1160 }
1161 // vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab:
1162