1 /* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */
2
3 /* libmwaw
4 * Version: MPL 2.0 / LGPLv2+
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 2.0 (the "License"); you may not use this file except in compliance with
8 * the License or as specified alternatively below. You may obtain a copy of
9 * the License at http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Major Contributor(s):
17 * Copyright (C) 2002 William Lachance (wrlach@gmail.com)
18 * Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
19 * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
20 * Copyright (C) 2006, 2007 Andrew Ziem
21 * Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
22 *
23 *
24 * All Rights Reserved.
25 *
26 * For minor contributions see the git repository.
27 *
28 * Alternatively, the contents of this file may be used under the terms of
29 * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
30 * in which case the provisions of the LGPLv2+ are applicable
31 * instead of those above.
32 */
33
34 #include <iomanip>
35 #include <iostream>
36 #include <limits>
37 #include <map>
38 #include <set>
39 #include <sstream>
40
41 #include <librevenge/librevenge.h>
42
43 #include "MWAWTextListener.hxx"
44 #include "MWAWFont.hxx"
45 #include "MWAWFontConverter.hxx"
46 #include "MWAWParagraph.hxx"
47 #include "MWAWPictMac.hxx"
48 #include "MWAWPosition.hxx"
49
50 #include "RagTimeParser.hxx"
51
52 #include "RagTimeText.hxx"
53
54 /** Internal: the structures of a RagTimeText */
55 namespace RagTimeTextInternal
56 {
57 //! Internal: a token of a RagTimeText
58 struct Token {
59 //! the token's types
60 enum Type { List, Page, PageCount, PageAfter, Date, Time, Unknown };
61 //! constructor
TokenRagTimeTextInternal::Token62 Token()
63 : m_type(Unknown)
64 , m_listLevel(0)
65 , m_DTFormat("")
66 , m_extra("")
67 {
68 for (auto &id : m_listIndices) id=0;
69 }
70 //! operator<<
operator <<(std::ostream & o,Token const & tkn)71 friend std::ostream &operator<<(std::ostream &o, Token const &tkn)
72 {
73 switch (tkn.m_type) {
74 case Token::List:
75 o << "list[" << tkn.m_listLevel << "]=[";
76 for (int i=0; i<4 && i<tkn.m_listLevel; ++i)
77 o << tkn.m_listIndices[i] << ",";
78 o << "],";
79 break;
80 case Token::Page:
81 o << "page,";
82 break;
83 case Token::PageAfter:
84 o << "page+1,";
85 break;
86 case Token::PageCount:
87 o << "page[num],";
88 break;
89 case Token::Date:
90 o << "date[" << tkn.m_DTFormat << "],";
91 break;
92 case Token::Time:
93 o << "time[" << tkn.m_DTFormat << "],";
94 break;
95 case Token::Unknown:
96 #if !defined(__clang__)
97 default:
98 #endif
99 o << "#type[unkn],";
100 break;
101 }
102 o << tkn.m_extra;
103 return o;
104 }
105 //! returns a field corresponding to the token if possible
getFieldRagTimeTextInternal::Token106 bool getField(MWAWField &field) const
107 {
108 switch (m_type) {
109 case Page:
110 field=MWAWField(MWAWField::PageNumber);
111 break;
112 case PageCount:
113 field=MWAWField(MWAWField::PageCount);
114 break;
115 case Date:
116 field=MWAWField(MWAWField::Date);
117 field.m_DTFormat=m_DTFormat;
118 break;
119 case Time:
120 field=MWAWField(MWAWField::Time);
121 field.m_DTFormat=m_DTFormat;
122 break;
123 case PageAfter:
124 case Unknown:
125 case List:
126 #if !defined(__clang__)
127 default:
128 #endif
129 return false;
130 }
131 return true;
132 }
133 //! returns a string corresponding to the list indices
getIndicesStringRagTimeTextInternal::Token134 bool getIndicesString(std::string &str) const
135 {
136 if (m_type!=List) {
137 MWAW_DEBUG_MSG(("RagTimeTextInternal::Token::getIndicesString: must only be called on list token\n"));
138 return false;
139 }
140 std::stringstream s;
141 for (int i=0; i<4 && i<m_listLevel; ++i) {
142 s << m_listIndices[i];
143 if (i==0 || i+1<m_listLevel) s << ".";
144 }
145 str=s.str();
146 return true;
147 }
148
149 //! the token type
150 Type m_type;
151 //! the list level(for a list)
152 int m_listLevel;
153 //! the four list indices
154 int m_listIndices[4];
155 //! the date time format
156 std::string m_DTFormat;
157 //! extra data
158 std::string m_extra;
159 };
160
161 //! Internal: a text's zone of a RagTimeText
162 struct TextZone {
163 //! constructor
TextZoneRagTimeTextInternal::TextZone164 TextZone()
165 : m_textPos()
166 , m_fontPosList()
167 , m_fontList()
168 , m_paragraphPosList()
169 , m_paragraphList()
170 , m_tokenList()
171 , m_isSent(false)
172 {
173 }
174 //! the text zone
175 MWAWEntry m_textPos;
176 //! the beginning of character properties in the text zone
177 std::vector<long> m_fontPosList;
178 //! the list of character's properties
179 std::vector<MWAWFont> m_fontList;
180 //! the beginning of paragraph properties in the text zone
181 std::vector<long> m_paragraphPosList;
182 //! the list of paragraph's properties
183 std::vector<MWAWParagraph> m_paragraphList;
184 //! the list of tokens
185 std::vector<Token> m_tokenList;
186 //! true if the zone is sent to the listener
187 mutable bool m_isSent;
188 };
189
190 ////////////////////////////////////////
191 //! Internal: the state of a RagTimeText
192 struct State {
193 //! constructor
StateRagTimeTextInternal::State194 State()
195 : m_version(-1)
196 , m_localFIdMap()
197 , m_charPropList()
198 , m_idTextMap()
199 {
200 }
201
202 //! return a mac font id corresponding to a local id
getFontIdRagTimeTextInternal::State203 int getFontId(int localId) const
204 {
205 if (m_localFIdMap.find(localId)==m_localFIdMap.end())
206 return localId;
207 return m_localFIdMap.find(localId)->second;
208 }
209
210 //! the file version
211 mutable int m_version;
212 //! a map local fontId->fontId
213 std::map<int, int> m_localFIdMap;
214 //! the character properties
215 std::vector<MWAWFont> m_charPropList;
216 //! a map entry id to text zone
217 std::map<int, std::shared_ptr<TextZone> > m_idTextMap;
218 };
219
220 }
221
222 ////////////////////////////////////////////////////////////
223 // constructor/destructor, ...
224 ////////////////////////////////////////////////////////////
RagTimeText(RagTimeParser & parser)225 RagTimeText::RagTimeText(RagTimeParser &parser)
226 : m_parserState(parser.getParserState())
227 , m_state(new RagTimeTextInternal::State)
228 , m_mainParser(&parser)
229 {
230 }
231
~RagTimeText()232 RagTimeText::~RagTimeText()
233 { }
234
version() const235 int RagTimeText::version() const
236 {
237 if (m_state->m_version < 0)
238 m_state->m_version = m_parserState->m_version;
239 return m_state->m_version;
240 }
241
getFontId(int localId) const242 int RagTimeText::getFontId(int localId) const
243 {
244 return m_state->getFontId(localId);
245 }
246
getCharStyle(int charId,MWAWFont & font) const247 bool RagTimeText::getCharStyle(int charId, MWAWFont &font) const
248 {
249 if (charId<0 || charId>=int(m_state->m_charPropList.size())) {
250 MWAW_DEBUG_MSG(("RagTimeText::readFontNames: can not find char style %d\n", charId));
251 return false;
252 }
253 font=m_state->m_charPropList[size_t(charId)];
254 return true;
255 }
256
257 ////////////////////////////////////////////////////////////
258 // rsrc zone: fonts/character properties
259 ////////////////////////////////////////////////////////////
readFontNames(MWAWEntry & entry)260 bool RagTimeText::readFontNames(MWAWEntry &entry)
261 {
262 MWAWInputStreamPtr input = m_parserState->m_input;
263 long pos=entry.begin();
264 if (pos<=0 || !input->checkPosition(pos+2+0x26)) {
265 MWAW_DEBUG_MSG(("RagTimeText::readFontNames: the position seems bad\n"));
266 return false;
267 }
268 entry.setParsed(true);
269 input->seek(pos, librevenge::RVNG_SEEK_SET);
270 libmwaw::DebugFile &ascFile=m_parserState->m_asciiFile;
271 libmwaw::DebugStream f;
272 f << "Entries(" << entry.type() << ")[" << entry.id() << "]:";
273 auto dSz=static_cast<int>(input->readULong(2));
274 long endPos=pos+2+dSz;
275 auto headerSz=static_cast<int>(input->readULong(2));
276 auto fSz=static_cast<int>(input->readULong(2));
277 auto N=static_cast<int>(input->readULong(2));
278 f << "N=" << N << ",";
279 if (headerSz<0x20 || fSz<0x10 || dSz<headerSz+long(N+1)*fSz || !input->checkPosition(endPos)) {
280 MWAW_DEBUG_MSG(("RagTimeText::readFontNames: the size seems bad\n"));
281 f << "###";
282 ascFile.addPos(pos);
283 ascFile.addNote(f.str().c_str());
284 return false;
285 }
286 input->seek(pos+2+headerSz, librevenge::RVNG_SEEK_SET);
287 ascFile.addPos(pos);
288 ascFile.addNote(f.str().c_str());
289
290 std::set<long> posSet;
291 std::map<int, long> fontIdPosMap;
292 posSet.insert(endPos);
293 for (int i=0; i<=N; ++i) {
294 pos=input->tell();
295 f.str("");
296 f << entry.type() << "-" << i << ":";
297 auto val=static_cast<int>(input->readLong(2)); // small number
298 if (val) f << "f0=" << val << ",";
299 val=static_cast<int>(input->readLong(2)); // always 0?
300 if (val) f << "f1=" << val << ",";
301 auto fPos=static_cast<int>(input->readULong(2));
302 f << "pos[name]=" << std::hex << entry.begin()+2+fPos << std::dec << ",";
303 posSet.insert(entry.begin()+2+fPos);
304 auto fId=static_cast<int>(input->readLong(2));
305 if (fId) f << "fId=" << fId << ",";
306 fontIdPosMap[i]=entry.begin()+2+fPos;
307 ascFile.addDelimiter(input->tell(),'|');
308 input->seek(pos+fSz, librevenge::RVNG_SEEK_SET);
309 ascFile.addPos(pos);
310 ascFile.addNote(f.str().c_str());
311 }
312
313 for (auto it : fontIdPosMap) {
314 pos=it.second;
315 int fId=it.first;
316 if (pos>=endPos) continue;
317 f.str("");
318 f << entry.type() << "[name]:id=" << fId << ",";
319 auto pIt=posSet.find(pos);
320 if (pIt==posSet.end()|| ++pIt==posSet.end()) {
321 MWAW_DEBUG_MSG(("RagTimeText::readFontNames: can not find the end name position\n"));
322 f << "###";
323 ascFile.addPos(pos);
324 ascFile.addNote(f.str().c_str());
325 continue;
326 }
327 input->seek(pos, librevenge::RVNG_SEEK_SET);
328 std::string name("");
329 long nextPos=*pIt;
330 while (!input->isEnd() && input->tell()<nextPos) {
331 auto c=char(input->readULong(1));
332 if (c=='\0') break;
333 name+=c;
334 }
335 f << name;
336 // ok, let update the conversion map
337 m_state->m_localFIdMap[fId]=m_parserState->m_fontConverter->getId(name);
338 ascFile.addPos(pos);
339 ascFile.addNote(f.str().c_str());
340 }
341 return true;
342 }
343
readCharProperties(MWAWEntry & entry)344 bool RagTimeText::readCharProperties(MWAWEntry &entry)
345 {
346 MWAWInputStreamPtr input = m_parserState->m_input;
347 long pos=entry.begin();
348 if (pos<=0 || !input->checkPosition(pos+2+0x26)) {
349 MWAW_DEBUG_MSG(("RagTimeText::readCharProperties: the position seems bad\n"));
350 return false;
351 }
352 entry.setParsed(true);
353 input->seek(pos, librevenge::RVNG_SEEK_SET);
354 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
355 libmwaw::DebugStream f;
356 f << "Entries(CharProp)[" << entry.id() << "]:";
357 auto dSz=static_cast<int>(input->readULong(2));
358 long endPos=pos+2+dSz;
359 auto headerSz=static_cast<int>(input->readULong(2));
360 auto fSz=static_cast<int>(input->readULong(2));
361 auto N=static_cast<int>(input->readULong(2));
362 f << "N=" << N << ",";
363 if (headerSz<0x2c || fSz<42 || dSz!=headerSz+long(N+1)*fSz || !input->checkPosition(endPos)) {
364 MWAW_DEBUG_MSG(("RagTimeText::readCharProperties: the data size seems bad\n"));
365 f << "###";
366 ascFile.addPos(pos);
367 ascFile.addNote(f.str().c_str());
368 return false;
369 }
370 if (fSz>42) {
371 MWAW_DEBUG_MSG(("RagTimeText::readCharProperties: the data size seems odds\n"));
372 f << "###";
373 }
374 input->seek(pos+2+headerSz, librevenge::RVNG_SEEK_SET);
375 ascFile.addPos(pos);
376 ascFile.addNote(f.str().c_str());
377
378 for (int i=0; i<=N; ++i) {
379 pos=input->tell();
380 if (i==N) {
381 input->seek(pos+fSz, librevenge::RVNG_SEEK_SET);
382 ascFile.addPos(pos);
383 ascFile.addNote("CharProp[end]:");
384 break;
385 }
386 f.str("");
387 f << "CharProp-S" << i << ":";
388
389
390 auto val=static_cast<int>(input->readLong(2)); // always 0 or a small negative number
391 if (val) f << "f0=" << val;
392 val=static_cast<int>(input->readLong(2));
393 if (val) f << "used=" << val << ",";
394
395 MWAWFont font;
396 font.setId(getFontId(static_cast<int>(input->readULong(2))-1));
397 auto size= static_cast<int>(input->readULong(2));
398 if (size>1000) {
399 MWAW_DEBUG_MSG(("RagTimeText::readCharProperties: the font size seems bad\n"));
400 f << "###sz=" << size << ",";
401 }
402 font.setSize(float(size));
403 val=static_cast<int>(input->readLong(2)); // always 0?
404 if (val) f << "f1=" << val;
405
406 auto flag = static_cast<int>(input->readULong(2));
407 uint32_t flags=0;
408 if (flag&0x1) flags |= MWAWFont::boldBit;
409 if (flag&0x2) flags |= MWAWFont::italicBit;
410 if (flag&0x4) font.setUnderlineStyle(MWAWFont::Line::Simple);
411 if (flag&0x8) flags |= MWAWFont::embossBit;
412 if (flag&0x10) flags |= MWAWFont::shadowBit;
413 if (flag&0x20) font.setDeltaLetterSpacing(-1);
414 if (flag&0x40) font.setDeltaLetterSpacing(1);
415 if (flag&0x80) font.set(MWAWFont::Script::super100());
416 if (flag&0x100) font.set(MWAWFont::Script::sub100());
417 font.setFlags(flags);
418 // checkme: does the following contains interesting data ?
419 ascFile.addDelimiter(input->tell(), '|');
420 f << font.getDebugString(m_parserState->m_fontConverter);
421 m_state->m_charPropList.push_back(font);
422 input->seek(pos+fSz, librevenge::RVNG_SEEK_SET);
423 ascFile.addPos(pos);
424 ascFile.addNote(f.str().c_str());
425 }
426 return true;
427 }
428
429 ////////////////////////////////////////////////////////////
430 // Intermediate level
431 ////////////////////////////////////////////////////////////
432
433 ////////////////////////////////////////////////////////////
434 // read a zone of text
435 ////////////////////////////////////////////////////////////
readTextZone(MWAWEntry & entry,int width,MWAWColor const & color)436 bool RagTimeText::readTextZone(MWAWEntry &entry, int width, MWAWColor const &color)
437 {
438 MWAWInputStreamPtr input = m_parserState->m_input;
439 int const vers=version();
440 int dataFieldSize=(vers==1||entry.valid()) ? 2 : m_mainParser->getZoneDataFieldSize(entry.id());
441 long pos=entry.begin();
442 if (pos<=0 || !input->checkPosition(pos+5+dataFieldSize+2+6)) {
443 MWAW_DEBUG_MSG(("RagTimeText::readTextZone: the position seems bad\n"));
444 return false;
445 }
446 entry.setParsed(true);
447 input->seek(pos, librevenge::RVNG_SEEK_SET);
448 libmwaw::DebugFile &ascFile=m_parserState->m_asciiFile;
449 libmwaw::DebugStream f;
450 f << "Entries(TextZone):";
451 long endPos=entry.end();
452 if (!entry.valid()) {
453 auto dSz=static_cast<int>(input->readULong(dataFieldSize));
454 endPos=pos+dataFieldSize+dSz;
455 }
456 long begTextZonePos=input->tell();
457 auto numChar=static_cast<int>(input->readULong(2));
458 f << "N=" << numChar << ",";
459 if (!input->checkPosition(endPos) || begTextZonePos+numChar>endPos) {
460 MWAW_DEBUG_MSG(("RagTimeText::readTextZone: the numChar seems bad\n"));
461 f << "###";
462 ascFile.addPos(pos);
463 ascFile.addNote(f.str().c_str());
464 return false;
465 }
466 ascFile.addPos(pos);
467 ascFile.addNote(f.str().c_str());
468 ascFile.addPos(endPos);
469 ascFile.addNote("_");
470
471 std::shared_ptr<RagTimeTextInternal::TextZone> zone(new RagTimeTextInternal::TextZone);
472 pos = input->tell();
473 zone->m_textPos.setBegin(pos);
474 zone->m_textPos.setLength(numChar);
475 if (vers>=2 && (numChar%2)==1)
476 ++numChar;
477 input->seek(pos+numChar, librevenge::RVNG_SEEK_SET);
478
479 if (!readFonts(*zone, color, endPos))
480 return false;
481
482 if (m_state->m_idTextMap.find(entry.id())!=m_state->m_idTextMap.end()) {
483 MWAW_DEBUG_MSG(("RagTimeText::readTextZone: a zone with id=%d already exists\n", entry.id()));
484 }
485 else
486 m_state->m_idTextMap[entry.id()]=zone;
487 if (input->tell()==endPos)
488 return true;
489
490 if (!readParagraphs(*zone, width, endPos))
491 return false;
492 pos=input->tell();
493 if (vers==1) {
494 if (pos!=endPos) {
495 MWAW_DEBUG_MSG(("RagTimeText::readTextZone: find some extra data\n"));
496 ascFile.addPos(pos);
497 ascFile.addNote("TextZone[end]:###");
498 }
499 return true;
500 }
501 // checkme: can this size be a uint32 ?
502 auto dSz=static_cast<int>(input->readULong(2));
503 f.str("");
504 f << "TextZone[A]:";
505 if (pos+2+dSz>endPos) {
506 MWAW_DEBUG_MSG(("RagTimeText::readTextZone: the zoneA size seems bad\n"));
507 f << "###";
508 ascFile.addPos(pos);
509 ascFile.addNote(f.str().c_str());
510 return true;
511 }
512 if (dSz==0) {
513 ascFile.addPos(pos);
514 ascFile.addNote("_");
515 }
516 else {
517 // never seems
518 MWAW_DEBUG_MSG(("RagTimeText::readTextZone: find a zoneA zone!!!\n"));
519 f << "#";
520 ascFile.addPos(pos);
521 ascFile.addNote(f.str().c_str());
522 input->seek(pos+2+dSz, librevenge::RVNG_SEEK_SET);
523 }
524
525 // now the token?
526 if (!readTokens(*zone, endPos))
527 return true;
528
529 if (input->tell()!=endPos) {
530 MWAW_DEBUG_MSG(("RagTimeText::readTextZone: find extra data\n"));
531 ascFile.addPos(pos);
532 ascFile.addNote("TextZone[extra]:###");
533 }
534 return true;
535 }
536
readFonts(RagTimeTextInternal::TextZone & zone,MWAWColor const & color,long endPos)537 bool RagTimeText::readFonts(RagTimeTextInternal::TextZone &zone, MWAWColor const &color, long endPos)
538 {
539 MWAWInputStreamPtr input = m_parserState->m_input;
540 int const vers=version();
541 long pos=input->tell();
542
543 libmwaw::DebugFile &ascFile=m_parserState->m_asciiFile;
544 libmwaw::DebugStream f;
545 f << "Entries(TextChar):";
546 auto N=static_cast<int>(input->readULong(2));
547 f << "N=" << N << ",";
548 int const fSz=vers>=2 ? 10:8;
549 if (pos+2+fSz*N>endPos+2+4) {
550 MWAW_DEBUG_MSG(("RagTimeText::readFonts: the number of styles seems bad\n"));
551 f << "###";
552 ascFile.addPos(pos);
553 ascFile.addNote(f.str().c_str());
554 return false;
555 }
556 ascFile.addPos(pos);
557 ascFile.addNote(f.str().c_str());
558
559 for (int i=0; i < N; ++i) {
560 pos = input->tell();
561 f.str("");
562 f << "TextChar-C" << i << ":";
563 auto textPos=long(input->readULong(2));
564 f << "pos=" << textPos << ",";
565 MWAWFont font;
566 if (vers <=1) {
567 font.setColor(color);
568 auto size= static_cast<int>(input->readULong(1));
569 auto flag = static_cast<int>(input->readULong(1));
570 uint32_t flags=0;
571 if (flag&0x1) flags |= MWAWFont::boldBit;
572 if (flag&0x2) flags |= MWAWFont::italicBit;
573 if (flag&0x4) font.setUnderlineStyle(MWAWFont::Line::Simple);
574 if (flag&0x8) flags |= MWAWFont::embossBit;
575 if (flag&0x10) flags |= MWAWFont::shadowBit;
576 if (flag&0x20) font.setDeltaLetterSpacing(-1);
577 if (flag&0x40) font.setDeltaLetterSpacing(1);
578 if (flag&0x80) font.set(MWAWFont::Script::super100());
579 if (size&0x80) {
580 font.set(MWAWFont::Script::sub100());
581 size&=0x7f;
582 }
583 font.setSize(float(size));
584 font.setFlags(flags);
585 font.setId(getFontId(static_cast<int>(input->readULong(2))));
586 auto val=static_cast<int>(input->readLong(1));
587 if (val) font.setDeltaLetterSpacing(-float(val)/16.0f);
588 val=static_cast<int>(input->readLong(1));
589 if (val) font.set(MWAWFont::Script(-float(val),librevenge::RVNG_POINT));
590 }
591 else {
592 int id=static_cast<int>(input->readULong(2))-1;
593 if (id<0 || id>=static_cast<int>(m_state->m_charPropList.size())) {
594 MWAW_DEBUG_MSG(("RagTimeText::readFonts: the character id seems bad\n"));
595 f << "###";
596 }
597 else
598 font=m_state->m_charPropList[size_t(id)];
599 f << "S" << id << ",";
600 auto val=static_cast<int>(input->readLong(1));
601 if (val) font.setDeltaLetterSpacing(float(val));
602 val=static_cast<int>(input->readLong(1));
603 if (val) f << "f0=" << val << ",";
604 val=static_cast<int>(input->readLong(1));
605 if (val) font.set(MWAWFont::Script(-float(val),librevenge::RVNG_POINT));
606 val=static_cast<int>(input->readULong(1));
607 switch (val) {
608 case 0:
609 font.setLanguage("en_US");
610 break;
611 case 1:
612 font.setLanguage("fr_FR");
613 break;
614 case 2:
615 font.setLanguage("en_UK");
616 break;
617 case 3:
618 font.setLanguage("de_DE");
619 break;
620 case 4:
621 font.setLanguage("it_IT");
622 break;
623 case 5:
624 font.setLanguage("nl_NL");
625 break;
626 case 7:
627 font.setLanguage("sv_SE");
628 break;
629 case 8:
630 font.setLanguage("es_ES");
631 break;
632 case 9:
633 font.setLanguage("da_DK");
634 break;
635 case 10:
636 font.setLanguage("pt_PT");
637 break;
638 case 12:
639 font.setLanguage("nb_NO");
640 break;
641 case 19:
642 font.setLanguage("de_CH");
643 break;
644 case 20:
645 font.setLanguage("el_GR");
646 break;
647 case 24:
648 font.setLanguage("tr_TR");
649 break;
650 case 25:
651 font.setLanguage("hr_HR");
652 break;
653 case 49:
654 font.setLanguage("ru_RU");
655 break;
656 case 0xF4:
657 font.setLanguage("nn_NO");
658 break;
659 default: {
660 static bool first = true;
661 if (first) {
662 first=false;
663 MWAW_DEBUG_MSG(("RagTimeText::readFonts: find some unknown language\n"));
664 }
665 f << "#lang=" << val << ",";
666 break;
667 }
668 }
669 val=static_cast<int>(input->readULong(2))-1;
670 MWAWColor col;
671 if (val && m_mainParser->getColor(val, col))
672 font.setColor(col);
673 else if (val)
674 f << "#col=" << val << ",";
675 }
676 zone.m_fontPosList.push_back(textPos);
677 zone.m_fontList.push_back(font);
678 f << font.getDebugString(m_parserState->m_fontConverter);
679
680 input->seek(pos+fSz, librevenge::RVNG_SEEK_SET);
681 ascFile.addPos(pos);
682 ascFile.addNote(f.str().c_str());
683 }
684 return true;
685 }
686
readParagraphs(RagTimeTextInternal::TextZone & zone,int width,long endPos)687 bool RagTimeText::readParagraphs(RagTimeTextInternal::TextZone &zone, int width, long endPos)
688 {
689 MWAWInputStreamPtr input = m_parserState->m_input;
690 int const vers=version();
691 long pos=input->tell();
692
693 libmwaw::DebugFile &ascFile=m_parserState->m_asciiFile;
694 libmwaw::DebugStream f;
695
696 f << "Entries(TextPara):";
697 auto N=static_cast<int>(input->readULong(2));
698 f << "N=" << N << ",";
699 int const paraSz=vers>=2 ? 48 : 34;
700 if (pos+2+paraSz*N>endPos) {
701 MWAW_DEBUG_MSG(("RagTimeText::readParagraphs: the number of paragrphs seems bad\n"));
702 f << "###";
703 ascFile.addPos(pos);
704 ascFile.addNote(f.str().c_str());
705 return false;
706 }
707 ascFile.addPos(pos);
708 ascFile.addNote(f.str().c_str());
709 int maxNumTabs=vers==1 ? 10 : 16;
710 for (int i=0; i < N; ++i) {
711 pos = input->tell();
712 f.str("");
713 f << "TextPara-P" << i << ":";
714 auto textPos=long(input->readULong(2));
715 f << "pos=" << textPos << ",";
716 MWAWParagraph para;
717 para.m_marginsUnit=librevenge::RVNG_POINT;
718 // add a default border to mimick frame distance to text
719 double const borderSize=4;
720 para.m_margins[1]=borderSize+double(input->readLong(2));
721 para.m_margins[2]=double(width-static_cast<int>(input->readULong(2)))-2*borderSize;
722 if (*para.m_margins[2]<-borderSize) {
723 if (*para.m_margins[2]<-borderSize*2) {
724 MWAW_DEBUG_MSG(("RagTimeText::readParagraphs: the right margins seems bad\n"));
725 f << "##";
726 }
727 f << "margins[right]=" << *para.m_margins[2] << ",";
728 para.m_margins[2]=0;
729 }
730 auto align=static_cast<int>(input->readULong(1));
731 switch (align) {
732 case 0: // left
733 break;
734 case 1:
735 para.m_justify = MWAWParagraph::JustificationCenter;
736 break;
737 case 2:
738 para.m_justify = MWAWParagraph::JustificationRight;
739 break;
740 case 3: // in pratical, look like basic left justification
741 f << "justify,";
742 break;
743 default:
744 MWAW_DEBUG_MSG(("RagTimeText::readParagraphs: find unknown align value\n"));
745 f << "###align=" << align << ",";
746 break;
747 }
748 auto numTabs=static_cast<int>(input->readULong(1));
749 if (numTabs>maxNumTabs) {
750 MWAW_DEBUG_MSG(("RagTimeText::readParagraphs: the number of tabs seems odd\n"));
751 f << "###tabs[num]=" << numTabs << ",";
752 numTabs=0;
753 }
754 auto interlinePoint=static_cast<int>(input->readLong(1));
755 auto interline=static_cast<int>(input->readULong(1));
756 if (interline & 0xF8)
757 f << "interline[high]=" << std::hex << (interline & 0xFC) << std::dec << ",";
758 interline &= 0x7;
759 switch (interline) {
760 case 0:
761 case 1:
762 case 2:
763 para.setInterline(1.+interline*0.5, librevenge::RVNG_PERCENT);
764 break;
765 case 3: // 1line +/- nbPt
766 para.setInterline(1.+interlinePoint/12., librevenge::RVNG_PERCENT, MWAWParagraph::AtLeast);
767 break;
768 case 4:
769 para.setInterline(interlinePoint, librevenge::RVNG_POINT);
770 break;
771 default:
772 MWAW_DEBUG_MSG(("RagTimeText::readParagraphs: unknown interline type\n"));
773 f << "#interline=" << interline << ",";
774 break;
775 }
776
777 para.m_margins[0]=double(input->readLong(2))-*para.m_margins[1];
778 for (int j=0; j<numTabs; ++j) {
779 auto tabPos=static_cast<int>(input->readLong(2));
780 MWAWTabStop tab;
781 if (tabPos<0) {
782 tab.m_alignment=MWAWTabStop::DECIMAL;
783 tabPos*=-1;
784 }
785 else if (tabPos&0x4000) {
786 tab.m_alignment=MWAWTabStop::CENTER;
787 tabPos &= 0x1FFF;
788 }
789 else if (tabPos&0x2000) {
790 tab.m_alignment=MWAWTabStop::RIGHT;
791 tabPos &= 0x1FFF;
792 }
793 tab.m_position=double(tabPos)/72.;
794 para.m_tabs->push_back(tab);
795 }
796 input->seek(pos+12+2*maxNumTabs, librevenge::RVNG_SEEK_SET);
797 auto prev=static_cast<int>(input->readULong(1));
798 auto next=static_cast<int>(input->readULong(1));
799 int wh=0;
800 if (prev&0x80) {
801 wh=2;
802 prev&=0x7f;
803 }
804 if (next&0x80) {
805 wh|=1;
806 next&=0x7f;
807 }
808 switch (wh) {
809 default:
810 case 0: // normal;
811 break;
812 case 1: // +0.5 interline
813 case 2: // +1 interline
814 para.m_spacings[1]=(wh-1)*0.5*12./72.;
815 break;
816 case 3:
817 para.m_spacings[1]=prev/72.;
818 para.m_spacings[2]=next/72.;
819 break;
820 }
821 if (vers>=2) {
822 auto tabSep=char(input->readULong(1));
823 if (tabSep!='.') // fixme: we need to update the decimal tab
824 f << "tab[sep]=" << tabSep << ",";
825 auto val=static_cast<int>(input->readLong(1)); // always 0?
826 if (val) f << "g0=" << val << ",";
827 }
828 f << para;
829 zone.m_paragraphPosList.push_back(textPos);
830 zone.m_paragraphList.push_back(para);
831
832 input->seek(pos+paraSz, librevenge::RVNG_SEEK_SET);
833 ascFile.addPos(pos);
834 ascFile.addNote(f.str().c_str());
835 }
836 return true;
837 }
838
839
readTokens(RagTimeTextInternal::TextZone & zone,long endPos)840 bool RagTimeText::readTokens(RagTimeTextInternal::TextZone &zone, long endPos)
841 {
842 MWAWInputStreamPtr input = m_parserState->m_input;
843 int const vers=version();
844 long pos=input->tell();
845 libmwaw::DebugFile &ascFile=m_parserState->m_asciiFile;
846 libmwaw::DebugStream f;
847 f << "Entries(TextToken):";
848 auto dSz=static_cast<int>(input->readULong(2));
849 if (vers <= 1 || pos+2+dSz>endPos) {
850 MWAW_DEBUG_MSG(("RagTimeText::readTokens: the tokens size seems bad (or unexpected version)\n"));
851 f << "###";
852 ascFile.addPos(pos);
853 ascFile.addNote(f.str().c_str());
854 return false;
855 }
856 endPos=pos+2+dSz;
857 if (dSz==0) {
858 ascFile.addPos(pos);
859 ascFile.addNote("_");
860 return true;
861 }
862 ascFile.addPos(pos);
863 ascFile.addNote(f.str().c_str());
864
865 int n=0;
866 while (!input->isEnd()) {
867 pos=input->tell();
868 if (pos>=endPos) break;
869 f.str("");
870 dSz=static_cast<int>(input->readULong(2));
871 long fEndPos=pos+dSz;
872 if (dSz<3 || fEndPos>endPos) {
873 MWAW_DEBUG_MSG(("RagTimeText::readTokens: the token zone size seems bad\n"));
874 f << "###TextToken";
875 input->seek(endPos, librevenge::RVNG_SEEK_SET);
876 ascFile.addPos(pos);
877 ascFile.addNote(f.str().c_str());
878 return true;
879 }
880 int val;
881 RagTimeTextInternal::Token token;
882 if (dSz==4) {
883 val=static_cast<int>(input->readLong(1));
884 if (val!=1) f << "f0=" << 1 << ",";
885 val=static_cast<int>(input->readLong(1));
886 switch (val) {
887 case 0x2c:
888 token.m_type=RagTimeTextInternal::Token::Page;
889 break;
890 case 0x2d:
891 token.m_type=RagTimeTextInternal::Token::PageAfter;
892 break;
893 case 0x2e:
894 token.m_type=RagTimeTextInternal::Token::PageCount;
895 break;
896 default:
897 MWAW_DEBUG_MSG(("RagTimeText::readTokens: find unknown field\n"));
898 f << "#f1=" << val << ",";
899 }
900 }
901 else if (dSz==6) {
902 val=static_cast<int>(input->readLong(2));
903 if (val!=100) f << "f0=" << val << ",";
904 int format=static_cast<int>(input->readLong(2))-1;
905 // using default, fixme: use file format type here,
906 token.m_type=(format==4 || format==5) ? RagTimeTextInternal::Token::Time :
907 RagTimeTextInternal::Token::Date;
908 if (!m_mainParser->getDateTimeFormat(format, token.m_DTFormat))
909 f << "#";
910 f << "F" << format << ",";
911 }
912 else if (dSz>14) {
913 f << "id?=" << input->readLong(2) << ",";
914 token.m_type=RagTimeTextInternal::Token::List;
915 token.m_listLevel=0;
916 for (int i=0; i< 4; ++i) { // small number
917 token.m_listIndices[i]=static_cast<int>(input->readLong(2));
918 if (token.m_listIndices[i])
919 token.m_listLevel=i+1;
920 }
921 auto sSz=static_cast<int>(input->readULong(1));
922 if (sSz+13>dSz) {
923 MWAW_DEBUG_MSG(("RagTimeText::readTokens: can not find the item format name\n"));
924 f << "###";
925 }
926 else {
927 std::string text("");
928 for (int i=0; i<sSz; ++i)
929 text+=char(input->readULong(1));
930 f << "\"" << text << "\",";
931 }
932 // in 3.2 the size field seems constant
933 if (input->tell()!=fEndPos)
934 ascFile.addDelimiter(input->tell(),'|');
935 }
936 else {
937 MWAW_DEBUG_MSG(("RagTimeText::readTokens: can not determine the token type\n"));
938 f << "###";
939 }
940 token.m_extra=f.str();
941 zone.m_tokenList.push_back(token);
942 f.str("");
943 f << "TextToken-" << n++ << ":" << token;
944 input->seek(fEndPos, librevenge::RVNG_SEEK_SET);
945 ascFile.addPos(pos);
946 ascFile.addNote(f.str().c_str());
947 }
948 return true;
949 }
950
951 ////////////////////////////////////////////////////////////
952 //
953 // Low level
954 //
955 ////////////////////////////////////////////////////////////
956
957 ////////////////////////////////////////////////////////////
958 // send data to the listener
send(int zId,MWAWListenerPtr listener)959 bool RagTimeText::send(int zId, MWAWListenerPtr listener)
960 {
961 if (m_state->m_idTextMap.find(zId)==m_state->m_idTextMap.end() ||
962 !m_state->m_idTextMap.find(zId)->second) {
963 MWAW_DEBUG_MSG(("RagTimeText::send: can not find the text zone %d\n", zId));
964 return false;
965 }
966 return send(*m_state->m_idTextMap.find(zId)->second, listener);
967 }
968
send(RagTimeTextInternal::TextZone const & zone,MWAWListenerPtr listener)969 bool RagTimeText::send(RagTimeTextInternal::TextZone const &zone, MWAWListenerPtr listener)
970 {
971 if (!listener)
972 listener=m_parserState->getMainListener();
973 if (!listener) {
974 MWAW_DEBUG_MSG(("RagTimeText::send: can not find the listener\n"));
975 return false;
976 }
977 zone.m_isSent=true;
978 MWAWEntry entry=zone.m_textPos;
979 if (!entry.valid()) {
980 MWAW_DEBUG_MSG(("RagTimeText::send: the text zone is empty\n"));
981 return false;
982 }
983
984 MWAWInputStreamPtr input = m_parserState->m_input;
985 int const vers=version();
986 libmwaw::DebugFile &ascFile = m_parserState->m_asciiFile;
987 libmwaw::DebugStream f;
988 long pos=entry.begin(), lPos=pos;
989 input->seek(pos, librevenge::RVNG_SEEK_SET);
990 size_t actFont=0, numFont=zone.m_fontPosList.size();
991 if (numFont>zone.m_fontList.size()) numFont=zone.m_fontList.size();
992 size_t actPara=0, numPara=zone.m_paragraphPosList.size();
993 if (numPara>zone.m_paragraphList.size()) numPara=zone.m_paragraphList.size();
994
995 f << "TextZone:";
996 int actToken=0;
997 for (long tPos=0; tPos<entry.length(); ++tPos, ++pos) {
998 if (input->isEnd()) {
999 MWAW_DEBUG_MSG(("RagTimeText::send: oops, find end of file\n"));
1000 break;
1001 }
1002 if (actPara<numPara && zone.m_paragraphPosList[actPara]==tPos) {
1003 if (pos!=lPos) {
1004 ascFile.addPos(lPos);
1005 ascFile.addNote(f.str().c_str());
1006 lPos=pos;
1007 f.str("");
1008 f << "TextZone:";
1009 }
1010 f << "[P" << actPara << "]";
1011 listener->setParagraph(zone.m_paragraphList[actPara++]);
1012 }
1013 if (actFont<numFont && zone.m_fontPosList[actFont]==tPos) {
1014 if (pos!=lPos) {
1015 ascFile.addPos(lPos);
1016 ascFile.addNote(f.str().c_str());
1017 lPos=pos;
1018 f.str("");
1019 f << "TextZone:";
1020 }
1021 f << "[C" << actFont << "]";
1022 listener->setFont(zone.m_fontList[actFont++]);
1023 }
1024 auto c = static_cast<unsigned char>(input->readULong(1));
1025 switch (c) {
1026 case 0: // at the beginning of a zone of text: related to section?
1027 break;
1028 case 1: {
1029 if (vers>=2) {
1030 if (actToken>=int(zone.m_tokenList.size())) {
1031 MWAW_DEBUG_MSG(("RagTimeText::send: can not find token %d\n", actToken));
1032 f << "[#token]";
1033 break;
1034 }
1035 auto const &token=zone.m_tokenList[size_t(actToken++)];
1036 f << "[" << token << "]";
1037 MWAWField field(MWAWField::None);
1038 if (token.getField(field))
1039 listener->insertField(field);
1040 else if (token.m_type==RagTimeTextInternal::Token::PageAfter)
1041 listener->insertUnicodeString(librevenge::RVNGString("#P+1#"));
1042 else if (token.m_type==RagTimeTextInternal::Token::List) {
1043 std::string indices;
1044 if (token.getIndicesString(indices))
1045 listener->insertUnicodeString(librevenge::RVNGString(indices.c_str()));
1046 }
1047 else {
1048 MWAW_DEBUG_MSG(("RagTimeText::send: does not know how to send a token\n"));
1049 f << "##";
1050 }
1051 break;
1052 }
1053 f << "[date]";
1054 MWAWField date(MWAWField::Date);
1055 date.m_DTFormat = "%d/%m/%y";
1056 listener->insertField(date);
1057 break;
1058 }
1059 case 2: {
1060 if (vers>=2) {
1061 MWAW_DEBUG_MSG(("RagTimeText::send: find unexpected char 2\n"));
1062 f << "[#2]";
1063 break;
1064 }
1065 f << "[time]";
1066 MWAWField time(MWAWField::Time);
1067 time.m_DTFormat="%H:%M";
1068 listener->insertField(time);
1069 break;
1070 }
1071 case 3:
1072 if (vers>=2) {
1073 MWAW_DEBUG_MSG(("RagTimeText::send: find unexpected char 3\n"));
1074 f << "[#3]";
1075 break;
1076 }
1077 f << "[page]";
1078 listener->insertField(MWAWField(MWAWField::PageNumber));
1079 break;
1080 case 4:
1081 if (vers>=2) {
1082 MWAW_DEBUG_MSG(("RagTimeText::send: find unexpected char 4\n"));
1083 f << "[#4]";
1084 break;
1085 }
1086 f << "[page+1]";
1087 listener->insertUnicodeString(librevenge::RVNGString("#P+1#"));
1088 break;
1089 case 5:
1090 if (vers>=2) {
1091 MWAW_DEBUG_MSG(("RagTimeText::send: find unexpected char 5\n"));
1092 f << "[#5]";
1093 break;
1094 }
1095 f << "[section]";
1096 listener->insertUnicodeString(librevenge::RVNGString("#S#"));
1097 break;
1098 case 6: // ok, must be the end of the zone
1099 if (vers>=2) {
1100 MWAW_DEBUG_MSG(("RagTimeText::send: find unexpected char 6\n"));
1101 f << "[#6]";
1102 break;
1103 }
1104 f << "[pagebreak]";
1105 break;
1106 case 9:
1107 listener->insertTab();
1108 f << c;
1109 break;
1110 case 0xb:
1111 case 0xd:
1112 listener->insertEOL(c==0xb);
1113 ascFile.addPos(lPos);
1114 ascFile.addNote(f.str().c_str());
1115 lPos=pos+1;
1116 f.str("");
1117 f << "TextZone:";
1118 break;
1119 case 0x1f: // soft hyphen
1120 break;
1121 default:
1122 if (c<=0x1f) {
1123 MWAW_DEBUG_MSG(("RagTimeText::send: find an odd char %x\n", static_cast<unsigned int>(c)));
1124 f << "[#" << std::hex << int(c) << std::dec << "]";
1125 break;
1126 }
1127 listener->insertCharacter(c);
1128 f << c;
1129 break;
1130 }
1131
1132 }
1133
1134 if (lPos!=entry.end()) {
1135 ascFile.addPos(lPos);
1136 ascFile.addNote(f.str().c_str());
1137 }
1138 return true;
1139 }
1140
flushExtra()1141 void RagTimeText::flushExtra()
1142 {
1143 MWAWListenerPtr listener=m_parserState->getMainListener();
1144 if (!listener) {
1145 MWAW_DEBUG_MSG(("RagTimeText::flushExtra: can not find the listener\n"));
1146 return;
1147 }
1148 for (auto it : m_state->m_idTextMap) {
1149 if (!it.second) continue;
1150 RagTimeTextInternal::TextZone const &zone=*it.second;
1151 if (zone.m_isSent) continue;
1152 static bool first=true;
1153 if (first) {
1154 MWAW_DEBUG_MSG(("RagTimeText::flushExtra: find some unsend zone\n"));
1155 first=false;
1156 }
1157 send(zone, listener);
1158 listener->insertEOL();
1159 }
1160 }
1161 // vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab:
1162