1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
2 /* libwps
3 * Version: MPL 2.0 / LGPLv2.1+
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * Major Contributor(s):
10 * Copyright (C) 2009, 2011 Alonso Laurent (alonso@loria.fr)
11 * Copyright (C) 2006, 2007 Andrew Ziem
12 * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
13 * Copyright (C) 2004 Marc Maurer (uwog@uwog.net)
14 * Copyright (C) 2003-2005 William Lachance (william.lachance@sympatico.ca)
15 *
16 * For minor contributions see the git repository.
17 *
18 * Alternatively, the contents of this file may be used under the terms
19 * of the GNU Lesser General Public License Version 2.1 or later
20 * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
21 * applicable instead of those above.
22 *
23 * For further information visit http://libwps.sourceforge.net
24 */
25
26 #ifdef DEBUG_WITH_FILES
27 // set to 1 to debug the font property
28 # define DEBUG_FP 1
29 // set to 1 to debug the paragraph property
30 # define DEBUG_PP 1
31 // set to 1 to print the plc position
32 # define DEBUG_PLC_POS 1
33 #endif
34
35 #include <iomanip>
36 #include <iostream>
37
38 #include <map>
39 #include <vector>
40
41 #include <librevenge/librevenge.h>
42
43 #include "libwps_internal.h"
44 #include "libwps_tools_win.h"
45
46 #include "WPSContentListener.h"
47 #include "WPSFont.h"
48 #include "WPSPosition.h"
49 #include "WPSParagraph.h"
50
51 #include "WPS4.h"
52
53 #include "WPS4Text.h"
54
55 /** Internal and low level: the structures of a WPS4Text used to parse PLC*/
56 namespace WPS4PLCInternal
57 {
58 /** Internal and low level: the PLC different types and their structures */
59 struct PLC;
60
61 //! a map of known plc
62 struct KnownPLC
63 {
64 public:
65 //! constructor
66 KnownPLC();
67 //! destructor
68 ~KnownPLC();
69 //! returns the PLC corresponding to a name
70 PLC get(std::string const &name);
71 protected:
72 //! creates the map of known PLC
73 void createMapping();
74 //! map name -> known PLC
75 std::map<std::string, PLC> m_knowns;
76 };
77 }
78
79 //////////////////////////////////////////////////////////////////////////////
80 // general enum
81 //////////////////////////////////////////////////////////////////////////////
82 namespace WPS4TextInternal
83 {
84 /** a enum used to type a zone */
85 enum ZoneType { Z_String=-1, Z_Header=0, Z_Footer=1, Z_Main=2, Z_Note, Z_Bookmark, Z_DLink, Z_Unknown};
86 /** Internal: class to store a font name: name with encoding type */
87 struct FontName
88 {
89 //! constructor with file's version to define the default encoding */
FontNameWPS4TextInternal::FontName90 FontName(int version=4) : m_name(""), m_type(libwps_tools_win::Font::WIN3_WEUROPE)
91 {
92 if (version <= 2) m_type = libwps_tools_win::Font::DOS_850;
93 }
94 //! operator<<
95 friend std::ostream &operator<<(std::ostream &o, FontName const &ft);
96 /** returns the default dos name corresponding to \a id th font */
97 static std::string getDosName(int id);
98
99 //! font name
100 std::string m_name;
101 //! font encoding type
102 libwps_tools_win::Font::Type m_type;
103 };
104 //! operator<< for a font name
operator <<(std::ostream & o,FontName const & ft)105 std::ostream &operator<<(std::ostream &o, FontName const &ft)
106 {
107 if (!ft.m_name.empty()) o << "name='" << ft.m_name << "'";
108 else o << "name='Unknown'";
109 if (ft.m_type!=libwps_tools_win::Font::WIN3_WEUROPE &&
110 ft.m_type!=libwps_tools_win::Font::DOS_850)
111 o << ",type=" << libwps_tools_win::Font::getTypeName(ft.m_type) << ",";
112 return o;
113 }
114
getDosName(int id)115 std::string FontName::getDosName(int id)
116 {
117 switch (id)
118 {
119 case 0:
120 return "Courier";
121 case 1:
122 return "Courier PC";
123 case 3:
124 return "Univers_Scale";
125 case 4:
126 return "Universe";
127 case 6:
128 return "LinePrinterPC";
129 case 7:
130 return "LinePrinter";
131 case 16:
132 return "CGTimes_Scale";
133 case 24:
134 return "CGTimes";
135 default:
136 break;
137 }
138
139 WPS_DEBUG_MSG(("WPS4TextInternal::FontName::getDosName: encountered unknown font %i\n", id));
140 return "Courier";
141 }
142 /** Internal: class to store font properties */
143 struct Font : public WPSFont
144 {
145 //! constructor with file's version to define the default encoding */
FontWPS4TextInternal::Font146 Font(int version=4) : WPSFont(), m_type(libwps_tools_win::Font::WIN3_WEUROPE),
147 m_backColor(0xFFFFFF), m_special(false), m_dlinkId(-1)
148 {
149 if (version <= 2) m_type = libwps_tools_win::Font::DOS_850;
150 }
151 //! returns a default font (Courier12) with file's version to define the default encoding */
getDefaultWPS4TextInternal::Font152 static Font getDefault(int version)
153 {
154 Font res(version);
155 if (version <= 2)
156 res.m_name="Courier";
157 else
158 res.m_name="Times New Roman";
159 res.m_size=12;
160 return res;
161 }
162
163 //! operator<<
164 friend std::ostream &operator<<(std::ostream &o, Font const &ft);
165
166 //! the font encoding type
167 libwps_tools_win::Font::Type m_type;
168 //! background color index
169 uint32_t m_backColor;
170 //! a flag to know if we have a special field (a note), ...
171 bool m_special;
172 //! a id to retrieve a file name ( dos )
173 int m_dlinkId;
174 };
175
176 //! operator<< for font properties
operator <<(std::ostream & o,Font const & ft)177 std::ostream &operator<<(std::ostream &o, Font const &ft)
178 {
179 o << static_cast<WPSFont const &>(ft) << ",";
180
181 if (ft.m_special)
182 {
183 if (ft.m_dlinkId >= 0)
184 o << "spec[" << ft.m_dlinkId << "],";
185 else
186 o << "spec,";
187 }
188
189 if (ft.m_backColor != 0xFFFFFF)
190 o << "bgCol=" << ft.m_backColor << ",";
191 return o;
192 }
193 /** Internal: class to store paragraph properties */
194 struct Paragraph : public WPSParagraph
195 {
196 //! constructor
ParagraphWPS4TextInternal::Paragraph197 Paragraph() : WPSParagraph() { }
198 };
199
200 /** Internal: class to store an note type */
201 struct Note : public WPSEntry
202 {
203 //! constructor
NoteWPS4TextInternal::Note204 Note() : WPSEntry(), m_label(""), m_error("") {}
isNumericWPS4TextInternal::Note205 bool isNumeric() const
206 {
207 return m_label.len()==0;
208 }
209 //! operator <<
operator <<(std::ostream & o,Note const & note)210 friend std::ostream &operator<<(std::ostream &o, Note const ¬e)
211 {
212 if (note.m_label.len())
213 o << "lab=" << note.m_label.cstr() << ",";
214 else
215 o << "numeric,";
216 if (!note.m_error.empty()) o << note.m_error << ",";
217 return o;
218 }
219 //! the label if not numeric
220 librevenge::RVNGString m_label;
221 //! a string used to store the parsing errors
222 std::string m_error;
223 };
224
225
226 /** Internal: class to store an object definition */
227 struct Object
228 {
229 //! constructor
ObjectWPS4TextInternal::Object230 Object() : m_id(-1), m_size(), m_pos(), m_unknown(0), m_extra("") {}
231 //! operator <<
232 friend std::ostream &operator<<(std::ostream &o, Object const &obj);
233
234 //! the object identificator
235 int m_id;
236 //! the object size in the document
237 Vec2f m_size;
238 //! an entry which indicates where the object is defined in the file
239 WPSEntry m_pos;
240 //! unknown data
241 long m_unknown;
242 //! a string used to store the parsing errors
243 std::string m_extra;
244 };
245 //! operator<< for an object
operator <<(std::ostream & o,Object const & obj)246 std::ostream &operator<<(std::ostream &o, Object const &obj)
247 {
248 if (obj.m_id > -1) o << "ole" << obj.m_id;
249 o <<": size(" << obj.m_size << ")";
250 if (obj.m_pos.valid()) o << std::hex << ", def=(0x" << obj.m_pos.begin() << "->" << obj.m_pos.end() << ")" << std::dec;
251 if (obj.m_unknown) o << std::hex << ", unkn=" << obj.m_unknown << std::dec;
252 if (!obj.m_extra.empty()) o << ", err=" << obj.m_extra;
253 return o;
254 }
255
256 /** Internal: class to store an object definition */
257 struct DosLink
258 {
259 //! constructor
DosLinkWPS4TextInternal::DosLink260 DosLink() : m_type(-1), m_width(-1), m_size(), m_name(""), m_pos(), m_extra("") {}
261 //! operator <<
262 friend std::ostream &operator<<(std::ostream &o, DosLink const &dlink);
263
264 //! the type
265 int m_type;
266 //! the width
267 float m_width;
268 //! the object size in the document
269 Vec2f m_size;
270 //! the file name
271 std::string m_name;
272 //! an entry which indicates where the object is defined in the file
273 WPSEntry m_pos;
274 //! a string used to store the parsing errors
275 std::string m_extra;
276 };
277 //! operator<< for an object
operator <<(std::ostream & o,DosLink const & dlink)278 std::ostream &operator<<(std::ostream &o, DosLink const &dlink)
279 {
280 switch (dlink.m_type)
281 {
282 case -1:
283 break;
284 case 1:
285 o << "chart,";
286 break;
287 case 0x81:
288 o << "pict,";
289 break;
290 case 0x40:
291 o << "spreadsheet,";
292 break;
293 default:
294 o << "#type=" << dlink.m_type << ",";
295 break;
296 }
297 if (dlink.m_width >= 0) o << "width?=" << dlink.m_width << ",";
298 if (dlink.m_size.x() >= 0 && (dlink.m_size.y()<0 || dlink.m_size.y()>0))
299 o <<"size=" << dlink.m_size << ",";
300 if (dlink.m_name.length()) o << "name='" << dlink.m_name << "',";
301 if (!dlink.m_extra.empty()) o << ", err=" << dlink.m_extra;
302 return o;
303 }
304
305 /** Internal: class to store a date/time format */
306 struct DateTime
307 {
308 //! constructor
DateTimeWPS4TextInternal::DateTime309 DateTime() : m_type(-1), m_extra("") {}
310 //! returns a format to used with strftime
311 std::string format() const;
312 //! operator <<
313 friend std::ostream &operator<<(std::ostream &o, DateTime const &dtime);
314
315 //! the type
316 int m_type;
317 //! a string used to store the parsing errors
318 std::string m_extra;
319 };
320
format() const321 std::string DateTime::format() const
322 {
323 switch (m_type)
324 {
325 case 0:
326 return "%m/%d/%Y";
327 case 1:
328 return "%m/%Y";
329 case 2:
330 return "%d %B %Y";
331 case 3:
332 return "%A %d %B %Y";
333 case 4:
334 return "%B %Y";
335 case 5:
336 return "%m/%d/%Y %I:%M";
337 case 6:
338 return "%m/%d/%Y %I:%M:%S";
339 case 7:
340 return "%I:%M:%S";
341 case 8:
342 return "%I:%M";
343 case 9:
344 return "%H:%M:%S";
345 case 10:
346 return "%H:%M";
347 default:
348 break;
349 }
350 return "";
351 }
352
353 //! operator<< for an object
operator <<(std::ostream & o,DateTime const & dtime)354 std::ostream &operator<<(std::ostream &o, DateTime const &dtime)
355 {
356 switch (dtime.m_type)
357 {
358 case -1:
359 break;
360 case 0:
361 case 1:
362 case 2:
363 case 3:
364 case 4:
365 o << "date[F"<<dtime.m_type<<"],";
366 break;
367 case 5:
368 case 6:
369 o << "date&time[F"<<dtime.m_type-5<<"],";
370 break;
371 case 7:
372 case 8:
373 case 9:
374 case 10:
375 o << "time[F"<<dtime.m_type-7<<"],";
376 break;
377 default:
378 o << "#type=" << dtime.m_type << ",";
379 break;
380 }
381 if (!dtime.m_extra.empty()) o << ", err=" << dtime.m_extra;
382 return o;
383 }
384
385 /** different types
386 *
387 * - BTE: font/paragraph properties
388 * - OBJECT: object properties
389 * - FTNp, FTNd: footnote position in text and footnote content
390 * - BKMK: comment field
391 * - DTTM: field type: date/time/..
392 */
393 enum PLCType { BTE=0, OBJECT, FTNp, FTNd, BKMK, DTTM, Unknown};
394
395 /** Internal: class to store the PLC: Pointer List Content ? */
396 struct DataPLC
397 {
398 //! constructor
DataPLCWPS4TextInternal::DataPLC399 DataPLC(): m_name(""), m_type(Unknown), m_value(-1), m_extra() {}
400 //! operator<<
401 friend std::ostream &operator<<(std::ostream &o, DataPLC const &plc);
402 //! the entry field name
403 std::string m_name;
404 //! the plc type
405 PLCType m_type;
406 //! a potential value
407 long m_value;
408 //! a string used to store the parsing errors
409 std::string m_extra;
410 };
411 //! operator<< for a dataPLC
operator <<(std::ostream & o,DataPLC const & plc)412 std::ostream &operator<<(std::ostream &o, DataPLC const &plc)
413 {
414 o << "type=" << plc.m_name << ",";
415 if (plc.m_value != -1) o << "val=" << std::hex << plc.m_value << std::dec << ", ";
416 if (!plc.m_extra.empty()) o << "errors=(" << plc.m_extra << ")";
417 return o;
418 }
419
420 /** Internal: the state of a WPS4Text */
421 struct State
422 {
423 //! constructor
StateWPS4TextInternal::State424 State() : m_fontNames(), m_fontList(), m_paragraphList(),
425 m_FDPCs(), m_FDPPs(), m_footnoteList(), m_footnoteMap(), m_bookmarkMap(), m_dosLinkList(),
426 m_main(), m_header(), m_footer(), m_otherZones(),
427 m_objectMap(), m_dateTimeMap(), m_plcList(), m_knownPLC()
428 {}
429
430 //! the list of fonts names
431 std::map<int,FontName> m_fontNames;
432 //! the list of all font properties
433 std::vector<Font> m_fontList;
434 //! the list of all paragraph properties
435 std::vector<Paragraph> m_paragraphList;
436
437 //! the list of FDPC entries (ie list to find the font properties lists )
438 std::vector<WPSEntry> m_FDPCs;
439 //! the list of FDPP entries (ie list to find the paragraph properties lists )
440 std::vector<WPSEntry> m_FDPPs;
441
442 //! the footnote entries
443 std::vector<Note> m_footnoteList;
444 //! map: footnote in text -> footnote entry
445 std::map<long,Note const *> m_footnoteMap;
446 //! map: bookmark in text -> bookmark
447 std::map<long, WPSEntry> m_bookmarkMap;
448 //! the dos file links
449 std::vector<DosLink> m_dosLinkList;
450
451 WPSEntry m_main /** the main text zone entry*/,
452 m_header /** the header text entry*/, m_footer /** the footer text entry*/;
453
454 //! the entries which are not in main/header/footer text and in the footnotes
455 std::vector<WPSEntry> m_otherZones;
456 //! map: object in text -> object
457 std::map<long, Object> m_objectMap;
458 //! map: date field in text -> date time format
459 std::map<long, DateTime> m_dateTimeMap;
460 //! a list of all PLCs
461 std::vector<DataPLC> m_plcList;
462 //! the known plc
463 WPS4PLCInternal::KnownPLC m_knownPLC;
464 };
465 }
466
467 //////////////////////////////////////////////////////////////////////////////
468 //
469 // MAIN CODE
470 //
471 //////////////////////////////////////////////////////////////////////////////
472
473 // constructor/destructor
WPS4Text(WPS4Parser & parser,RVNGInputStreamPtr & input)474 WPS4Text::WPS4Text(WPS4Parser &parser, RVNGInputStreamPtr &input) :
475 WPSTextParser(parser, input), m_listener(), m_state()
476 {
477 m_state.reset(new WPS4TextInternal::State);
478 }
479
~WPS4Text()480 WPS4Text::~WPS4Text()
481 {
482 }
483
484 // number of page
numPages() const485 int WPS4Text::numPages() const
486 {
487 int numPage = 1;
488 m_input->seek(m_textPositions.begin(), librevenge::RVNG_SEEK_SET);
489 while (!m_input->isEnd() && m_input->tell() != m_textPositions.end())
490 {
491 if (libwps::readU8(m_input.get()) == 0x0C) numPage++;
492 }
493 return numPage;
494 }
495
496 // return main/header/footer/all entry
getHeaderEntry() const497 WPSEntry WPS4Text::getHeaderEntry() const
498 {
499 if (m_state->m_header.valid()) return m_state->m_header;
500 WPS4Parser::NameMultiMap const &nameMultiMap = getNameEntryMap();
501 WPS4Parser::NameMultiMap::const_iterator pos;
502 pos = nameMultiMap.find("SHdr");
503 if (pos == nameMultiMap.end()) return WPSEntry();
504 WPSEntry res = pos->second;
505 res.setType("TEXT");
506 res.setId(WPS4TextInternal::Z_String);
507 return res;
508 }
509
getFooterEntry() const510 WPSEntry WPS4Text::getFooterEntry() const
511 {
512 if (m_state->m_footer.valid()) return m_state->m_footer;
513 WPS4Parser::NameMultiMap const &nameMultiMap = getNameEntryMap();
514 WPS4Parser::NameMultiMap::const_iterator pos;
515 pos = nameMultiMap.find("SFtr");
516 if (pos == nameMultiMap.end()) return WPSEntry();
517 WPSEntry res = pos->second;
518 res.setType("TEXT");
519 res.setId(WPS4TextInternal::Z_String);
520 return res;
521 }
522
getMainTextEntry() const523 WPSEntry WPS4Text::getMainTextEntry() const
524 {
525 return m_state->m_main;
526 }
527
getDefaultFontType() const528 libwps_tools_win::Font::Type WPS4Text::getDefaultFontType() const
529 {
530 if (version()<=2)
531 return libwps_tools_win::Font::DOS_850;
532 return libwps_tools_win::Font::WIN3_WEUROPE;
533 }
534
535 ////////////////////////////////////////////////////////////
536 // send the data
537 ////////////////////////////////////////////////////////////
flushExtra()538 void WPS4Text::flushExtra()
539 {
540 if (!m_listener)
541 {
542 WPS_DEBUG_MSG(("WPS4Text::flushExtra can not find the listener\n"));
543 return;
544 }
545 size_t numExtra = m_state->m_otherZones.size();
546 if (numExtra == 0) return;
547
548 m_listener->setFont(WPS4TextInternal::Font::getDefault(version()));
549 m_listener->setParagraph(WPS4TextInternal::Paragraph());
550 m_listener->insertEOL();
551 #ifdef DEBUG
552 librevenge::RVNGString message = "--------- extra text zone -------- ";
553 m_listener->insertUnicodeString(message);
554 #endif
555 for (size_t i = 0; i < numExtra; ++i)
556 readText(m_state->m_otherZones[i]);
557 }
558
readText(WPSEntry const & zone)559 bool WPS4Text::readText(WPSEntry const &zone)
560 {
561 bool bookmark = zone.id() == WPS4TextInternal::Z_Bookmark;
562 bool dlink = zone.id() == WPS4TextInternal::Z_DLink;
563 bool simpleString = zone.id() == WPS4TextInternal::Z_String || bookmark || dlink;
564 bool mainZone = zone.id() == WPS4TextInternal::Z_Main;
565
566 if (m_listener.get() == 0L)
567 {
568 WPS_DEBUG_MSG(("WPS4Text::readText can not find the listener\n"));
569 return false;
570 }
571 if (!zone.valid())
572 {
573 WPS_DEBUG_MSG(("WPS4Text::readText invalid zone, must not happen\n"));
574 m_listener->insertCharacter(' ');
575 return false;
576 }
577 if (mainZone)
578 {
579 int numCols = mainParser().numColumns();
580 if (numCols > 1)
581 {
582 if (m_listener->isSectionOpened())
583 {
584 WPS_DEBUG_MSG(("WPS4Text::readText the section is already open\n"));
585 }
586 else
587 {
588 int w = int(72.0*mainParser().pageWidth())/numCols;
589 std::vector<int> width;
590 width.resize(size_t(numCols), w);
591 m_listener->openSection(width,librevenge::RVNG_POINT);
592 }
593 }
594 }
595 std::vector<DataFOD>::iterator FODs_iter = m_FODList.begin();
596
597 // update the property to correspond to the text
598 int prevFId = -1, prevPId = -1;
599 if (simpleString) FODs_iter = m_FODList.end();
600 else if (FODs_iter == m_FODList.end() && mainZone)
601 {
602 WPS_DEBUG_MSG(("WPS4Text::readText: CAN NOT FIND any FODs for main zone, REVERT to basic string!!!!!!!!!\n"));
603 simpleString = true;
604 }
605
606 for (; FODs_iter!= m_FODList.end(); ++FODs_iter)
607 {
608 DataFOD const &fod = *(FODs_iter);
609 if (fod.m_pos >= zone.begin()) break;
610
611 int id = (*FODs_iter).m_id;
612 if (fod.m_type == DataFOD::ATTR_TEXT) prevFId = id;
613 else if (fod.m_type == DataFOD::ATTR_PARAG) prevPId = id;
614 }
615
616 WPS4TextInternal::Font actFont;
617 if (prevFId != -1)
618 actFont = m_state->m_fontList[size_t(prevFId)];
619 else
620 {
621 actFont = WPS4TextInternal::Font::getDefault(version());
622 actFont.m_type=getDefaultFontType();
623 }
624 m_listener->setFont(actFont);
625
626 if (prevPId != -1)
627 m_listener->setParagraph(m_state->m_paragraphList[size_t(prevPId)]);
628 else
629 m_listener->setParagraph(WPS4TextInternal::Paragraph());
630
631 if (dlink)
632 {
633 m_listener->insertUnicodeString("include ");
634 m_listener->insertUnicode(0x226a);
635 }
636 bool first = true;
637 int actPage = 1;
638 for (; simpleString || FODs_iter!= m_FODList.end(); ++FODs_iter)
639 {
640 long actPos;
641 long lastPos;
642
643
644 libwps::DebugStream f;
645 f << "Text";
646
647 if (simpleString)
648 {
649 actPos = zone.begin();
650 lastPos = zone.end();
651 }
652 else
653 {
654 DataFOD fod = *(FODs_iter);
655 actPos = first ? zone.begin() : fod.m_pos;
656 if (long(actPos) >= zone.end()) break;
657 first = false;
658
659 if (++FODs_iter!= m_FODList.end())
660 {
661 lastPos = (*FODs_iter).m_pos;
662 if (long(lastPos) >= zone.end()) lastPos = zone.end();
663 }
664 else
665 lastPos = zone.end();
666 --FODs_iter;
667 int fId = fod.m_id;
668 switch (fod.m_type)
669 {
670 case DataFOD::ATTR_TEXT:
671 if (fId >= 0)
672 actFont = m_state->m_fontList[size_t(fId)];
673 else
674 {
675 actFont = WPS4TextInternal::Font::getDefault(version());
676 actFont.m_type=getDefaultFontType();
677 }
678 m_listener->setFont(actFont);
679 #if DEBUG_FP
680 f << "[";
681 if (fId >= 0) f << "C" << fId << ":" << actFont << "]";
682 else f << "_]";
683 #endif
684 break;
685 case DataFOD::ATTR_PARAG:
686 if (fId >= 0)
687 m_listener->setParagraph(m_state->m_paragraphList[size_t(fId)]);
688 else
689 m_listener->setParagraph(WPS4TextInternal::Paragraph());
690 #if DEBUG_PP
691 f << "[";
692 if (fId >= 0) f << "P" << fId << ":" << m_state->m_paragraphList[size_t(fId)] << "]";
693 else f << "_]";
694 #endif
695 break;
696 case DataFOD::ATTR_PLC:
697 if (fId >= 0 && m_state->m_plcList[size_t(fId)].m_type == WPS4TextInternal::BKMK)
698 {
699 WPSEntry bkmk;
700 if (m_state->m_bookmarkMap.find(actPos) == m_state->m_bookmarkMap.end())
701 {
702 WPS_DEBUG_MSG(("WPS4Text::readText: can not find the bookmark entry\n"));
703 }
704 else
705 bkmk = m_state->m_bookmarkMap.find(actPos)->second;
706 bkmk.setType("TEXT");
707 bkmk.setId(WPS4TextInternal::Z_Bookmark);
708 mainParser().createDocument(bkmk, libwps::DOC_COMMENT_ANNOTATION);
709 }
710 #if DEBUG_PLC_POS
711 f << "[PLC";
712 if (fId>= 0) f << m_state->m_plcList[size_t(fId)] << "]";
713 else f << "_]";
714 #endif
715 break;
716 case DataFOD::ATTR_UNKN:
717 default:
718 WPS_DEBUG_MSG(("WPS4Text::readText: find unknown plc\n"));
719 #if DEBUG_PLC_POS
720 f << "[DataFOD(###Unknown)]";
721 #endif
722 break;
723 }
724 }
725 m_input->seek(actPos, librevenge::RVNG_SEEK_SET);
726 std::string chaine("");
727 long len = lastPos-actPos;
728 for (long i = len; i>0; i--)
729 {
730 long pos = m_input->tell();
731 uint8_t readVal = libwps::readU8(m_input);
732 if (0x00 == readVal)
733 {
734 if (i != 1)
735 {
736 WPS_DEBUG_MSG(("WPS4Text::readText: find some unexpected 0 character\n"));
737 // probably an error, but we can ignore id
738 chaine += '#';
739 }
740 continue;
741 }
742
743 chaine += char(readVal);
744 switch (readVal)
745 {
746 case 0x01: // chart ?
747 case 0x08: // spreadsheet range
748 case 0x0e: // picture
749 {
750 if (!actFont.m_special || m_state->m_dosLinkList.empty() || actFont.m_dlinkId >= int(m_state->m_dosLinkList.size()))
751 {
752 WPS_DEBUG_MSG(("WPS4Text::readText: send DLINK can not find id\n"));
753 break;
754 }
755 int id = actFont.m_dlinkId >= 0 ? actFont.m_dlinkId : 0;
756 WPSEntry ent = m_state->m_dosLinkList[size_t(id)].m_pos;
757 ent.setType("TEXT");
758 ent.setId(WPS4TextInternal::Z_DLink);
759 WPSPosition pos_(Vec2f(),Vec2f(3.0f,0.2f));
760 pos_.setRelativePosition(WPSPosition::Paragraph, WPSPosition::XCenter);
761 pos_.m_wrapping = WPSPosition::WNone;
762 librevenge::RVNGPropertyList extras;
763 mainParser().createTextBox(ent, pos_, extras);
764 m_listener->insertEOL();
765 break;
766 }
767 case 0x02:
768 m_listener->insertField(WPSContentListener::PageNumber);
769 break;
770 case 0x03:
771 m_listener->insertField(WPSContentListener::Date);
772 break;
773 case 0x04:
774 m_listener->insertField(WPSContentListener::Time);
775 break;
776 case 0x05:
777 m_listener->insertField(WPSContentListener::Title);
778 break;
779 case 0x07:
780 {
781 if (m_state->m_objectMap.find(actPos) == m_state->m_objectMap.end())
782 {
783 WPS_DEBUG_MSG(("WPS4Text::readText: can not find object for position : %lX\n", actPos));
784 }
785 else
786 {
787 WPS4TextInternal::Object const &obj = m_state->m_objectMap[actPos];
788 if (obj.m_id < 0) break;
789
790 mainParser().sendObject(obj.m_size, obj.m_id);
791 }
792 break;
793 }
794 case 0x0f:
795 {
796 if (m_state->m_dateTimeMap.find(actPos) == m_state->m_dateTimeMap.end())
797 {
798 WPS_DEBUG_MSG(("WPS4Text::readText: can not find date/time for position : %lX\n", actPos));
799 }
800 else
801 {
802 WPS4TextInternal::DateTime const &form = m_state->m_dateTimeMap[actPos];
803 std::string format = form.format();
804 if (format.length())
805 m_listener->insertDateTimeField(format.c_str());
806 else
807 {
808 WPS_DEBUG_MSG(("WPS4Text::readText: unknown date/time format for position : %lX\n", actPos));
809 }
810 }
811 break;
812 }
813 case 0x06: // footnote
814 {
815 // ok if this is the first character of the footnote definition
816 if (zone.id() == WPS4TextInternal::Z_Note) break;
817 if (m_state->m_footnoteMap.find(pos) == m_state->m_footnoteMap.end() ||
818 m_state->m_footnoteMap[pos] == 0L)
819 {
820 WPS_DEBUG_MSG(("WPS4Text::readText:do not find the footnote zone\n"));
821 break;
822 }
823 WPS4TextInternal::Note const ¬e = *m_state->m_footnoteMap[pos];
824 mainParser().createNote(note, note.m_label);
825 break;
826 }
827 case 0x09:
828 m_listener->insertTab();
829 break;
830 case 0x0C:
831 if (mainZone) mainParser().newPage(++actPage);
832 break;
833 case 0x0d:
834 break; // 0d0a = end of line
835 case 0x0a:
836 m_listener->insertEOL();
837 break;
838 case 0x0b: // check me
839 m_listener->insertEOL(true);
840 break;
841 case 0x11: // insecable hyphen
842 m_listener->insertUnicode(0x2011);
843 break;
844 case 0x12: // insecable space
845 m_listener->insertUnicode(0xA0);
846 break;
847 case 0x1F: // optional hyphen
848 break;
849 case '&':
850 if (simpleString && pos+2 <= lastPos)
851 {
852 int nextVal = libwps::readU8(m_input);
853 bool done = true;
854 switch (nextVal) // check me
855 {
856 case 'p':
857 case 'P':
858 m_listener->insertField(WPSContentListener::PageNumber);
859 break;
860 case 'd':
861 case 'D':
862 m_listener->insertField(WPSContentListener::Date);
863 break;
864 case 't':
865 case 'T':
866 m_listener->insertField(WPSContentListener::Time);
867 break;
868 case 'f':
869 case 'F':
870 m_listener->insertField(WPSContentListener::Title);
871 break;
872 // case '&': check me does '&&'->'&' ?
873 default:
874 done = false;
875 break;
876 }
877 if (done)
878 {
879 i--;
880 break;
881 }
882 m_input->seek(-1, librevenge::RVNG_SEEK_CUR);
883 }
884 default:
885 if (version()<=2)
886 {
887 // special caracter
888 if (readVal==0xca) // not breaking space
889 {
890 m_listener->insertCharacter(0xA0);
891 break;
892 }
893 }
894 m_listener->insertUnicode((uint32_t)libwps_tools_win::Font::unicode(readVal, actFont.m_type));
895 break;
896 }
897 }
898
899 if (simpleString) break;
900
901 f << "='"<<chaine<<"'";
902 ascii().addPos(actPos);
903 ascii().addNote(f.str().c_str());
904 }
905
906 if (dlink)
907 m_listener->insertUnicode(0x226b);
908
909 return true;
910 }
911
912 ////////////////////////////////////////////////////////////
913 // find all the text entries
914 ////////////////////////////////////////////////////////////
readEntries()915 bool WPS4Text::readEntries()
916 {
917 WPS4Parser::NameMultiMap &nameMultiMap = getNameEntryMap();
918 WPS4Parser::NameMultiMap::iterator pos;
919
920 libwps::DebugStream f;
921 long actPos = m_input->tell();
922 f << "ZZHeader-Text:Limit(";
923
924 int textLimits[4];
925 // look like begin of text : end of header/end of footer/end text
926 // but sometimes the zones overlaps !!!
927 for (int i = 0; i < 4; ++i) textLimits[i] = libwps::read32(m_input);
928
929 bool first = true, ok = true;
930 long lastPos = textLimits[0] < 0x100 ? 0x100 : textLimits[0];
931 for (int i = 0; i < 3; ++i)
932 {
933 long newPos = textLimits[i+1];
934 WPSEntry zone;
935 zone.setBegin(lastPos);
936 zone.setEnd(newPos);
937 zone.setType("TEXT");
938 zone.setId(i);
939
940 if (newPos >= lastPos)
941 lastPos = newPos;
942 if (!zone.valid() || zone.begin() < 0x100)
943 {
944 if (newPos != 0x100 && newPos != -1)
945 {
946 WPS_DEBUG_MSG(("WPS4Text::readEntries: find odd text limit\n"));
947 ok = false;
948 }
949 f << "_, ";
950 continue;
951 }
952
953 if (first)
954 {
955 m_textPositions.setBegin(zone.begin());
956 first = false;
957 }
958
959 m_textPositions.setEnd(zone.end());
960 nameMultiMap.insert(WPS4Parser::NameMultiMap::value_type(zone.type(), zone));
961
962 switch (i)
963 {
964 case 0:
965 m_state->m_header = zone;
966 break;
967 case 1:
968 m_state->m_footer = zone;
969 break;
970 case 2:
971 m_state->m_main = zone;
972 break;
973 default:
974 break;
975 }
976
977 f << "Text"<<i << "=" << std::hex << zone.begin() << "x" << zone.end() << ",";
978 ascii().addPos(zone.begin());
979 std::string name = "ZZ";
980 name+= zone.type();
981 name+=char('0'+i);
982 ascii().addNote(name.c_str());
983 ascii().addPos(zone.end());
984 ascii().addNote("_");
985 }
986 f << ")";
987 if (!ok)
988 {
989 m_state->m_header = m_state->m_footer = WPSEntry();
990 m_state->m_main = m_textPositions;
991 }
992 if (!m_textPositions.valid())
993 {
994 WPS_DEBUG_MSG(("WPS4Text::readEntries: textPosition is not valid"));
995 return false;
996 }
997
998 /* stream offset to end of file */
999 long eof = (long) libwps::readU32(m_input);
1000
1001 if (m_textPositions.end() > eof)
1002 {
1003 WPS_DEBUG_MSG(("WPS4Text:readEntries: can not find text positions\n"));
1004 return false;
1005 }
1006
1007 // check if fPositions.offset_eos
1008 long newPos = m_input->tell();
1009 if (m_input->seek(eof-1, librevenge::RVNG_SEEK_SET) != 0 || m_input->tell() != eof-1)
1010 {
1011 eof = m_input->tell();
1012 WPS_DEBUG_MSG(("WPS4Text:readEntries: incomplete file\n"));
1013 if (eof < m_textPositions.end()) return false;
1014 }
1015 mainParser().setSizeFile(eof);
1016
1017 f << ", endFile=" << eof;
1018 ascii().addPos(actPos);
1019 ascii().addNote(f.str().c_str());
1020
1021 m_input->seek(newPos, librevenge::RVNG_SEEK_SET);
1022
1023 static char const * (zName[]) =
1024 { "BTEC", "BTEP", "SHdr", "SFtr", "DLINK", "FTNp", "FTNd", "BKMK", "FONT" };
1025
1026 for (int i = 0; i < 9; ++i)
1027 mainParser().parseEntry(zName[i]);
1028
1029 return true;
1030 }
1031
1032 ////////////////////////////////////////////////////////////
1033 // find all the text structures
1034 ////////////////////////////////////////////////////////////
readStructures()1035 bool WPS4Text::readStructures()
1036 {
1037 WPS4Parser::NameMultiMap &nameMultiMap = getNameEntryMap();
1038 WPS4Parser::NameMultiMap::iterator pos;
1039
1040 // first find the font name
1041 pos = nameMultiMap.find("FONT");
1042 if (pos != nameMultiMap.end()) readFontNames(pos->second);
1043
1044 // now find the character and paragraph properties
1045 for (int i = 0; i < 2; ++i)
1046 {
1047 // we begin by i = 1 to create firsts the fdpc structure
1048 if (findFDPStructures(1-i)) continue;
1049 findFDPStructuresByHand(1-i);
1050 }
1051
1052 /* read character FODs (FOrmatting Descriptors) */
1053 size_t numFDP = m_state->m_FDPCs.size();
1054 std::vector<DataFOD> fdps;
1055 for (size_t i = 0; i < numFDP; ++i)
1056 readFDP(m_state->m_FDPCs[i], fdps, (FDPParser)&WPS4Text::readFont);
1057 m_FODList = mergeSortedFODLists(fdps, m_FODList);
1058
1059
1060 /* read paragraphs FODs (FOrmatting Descriptors) */
1061 fdps.resize(0);
1062 numFDP = m_state->m_FDPPs.size();
1063 for (size_t i = 0; i < numFDP; ++i)
1064 readFDP(m_state->m_FDPPs[i], fdps, (FDPParser)&WPS4Text::readParagraph);
1065 m_FODList = mergeSortedFODLists(fdps, m_FODList);
1066
1067 /* read the object structures */
1068 pos = nameMultiMap.find("EOBJ");
1069 if (pos != nameMultiMap.end())
1070 {
1071 std::vector<long> textPtrs, listValues;
1072 readPLC(pos->second, textPtrs, listValues, &WPS4Text::objectDataParser);
1073 }
1074
1075 // update the footnote
1076 WPSEntry ftnD, ftnP;
1077 pos = nameMultiMap.find("FTNd");
1078 if (pos != nameMultiMap.end()) ftnD = pos->second;
1079 pos = nameMultiMap.find("FTNp");
1080 if (pos != nameMultiMap.end()) ftnP = pos->second;
1081 readFootNotes(ftnD, ftnP);
1082
1083 // bookmark
1084 pos = nameMultiMap.find("BKMK");
1085 if (pos != nameMultiMap.end())
1086 {
1087 std::vector<long> textPtrs, listValues;
1088 readPLC(pos->second, textPtrs, listValues, &WPS4Text::bkmkDataParser);
1089 }
1090
1091 // the list of file
1092 pos = nameMultiMap.find("DLINK");
1093 if (pos != nameMultiMap.end())
1094 readDosLink(pos->second);
1095
1096 // date/time format
1097 pos = nameMultiMap.find("DTTM");
1098 if (pos != nameMultiMap.end())
1099 {
1100 WPSEntry const &zone = pos->second;
1101 std::vector<long> textPtrs, listValues;
1102 readPLC(zone, textPtrs, listValues, &WPS4Text::dttmDataParser);
1103 }
1104
1105 // finally, we must remove the footnote of textposition...
1106 long bot = m_state->m_main.begin();
1107 long endPos = m_state->m_main.end();
1108 size_t numFootNotes = m_state->m_footnoteList.size(), actNote = 0;
1109 bool textPosUpdated = false;
1110 while (bot < endPos)
1111 {
1112 if (actNote < numFootNotes &&
1113 m_state->m_footnoteList[actNote].begin()==bot)
1114 {
1115 bot = m_state->m_footnoteList[actNote].end();
1116 actNote++;
1117 continue;
1118 }
1119 long lastPos = actNote < numFootNotes ?
1120 m_state->m_footnoteList[actNote].begin() : endPos;
1121 if (lastPos > endPos) lastPos = endPos;
1122 WPSEntry mZone;
1123 mZone.setBegin(bot);
1124 mZone.setEnd(lastPos);
1125 mZone.setType("TEXT");
1126 if (!textPosUpdated)
1127 {
1128 mZone.setId(WPS4TextInternal::Z_Main);
1129 m_state->m_main = mZone;
1130 textPosUpdated = true;
1131 }
1132 else
1133 {
1134 if (m_state->m_otherZones.size() == 0)
1135 {
1136 WPS_DEBUG_MSG(("WPS4Text::readStructures: find unknown text zone\n"));
1137 }
1138 mZone.setId(WPS4TextInternal::Z_Unknown);
1139 m_state->m_otherZones.push_back(mZone);
1140 }
1141 bot = lastPos;
1142 }
1143
1144
1145 return true;
1146 }
1147
1148 ////////////////////////////////////////////////////////////
1149 // find FDP zones ( normal method followed by another method
1150 // which may works for some bad files )
1151 ////////////////////////////////////////////////////////////
findFDPStructures(int which)1152 bool WPS4Text::findFDPStructures(int which)
1153 {
1154 std::vector<WPSEntry> &zones = which ? m_state->m_FDPCs : m_state->m_FDPPs;
1155 zones.resize(0);
1156
1157 char const *indexName = which ? "BTEC" : "BTEP";
1158 char const *sIndexName = which ? "FDPC" : "FDPP";
1159
1160 WPS4Parser::NameMultiMap &nameMultiMap =getNameEntryMap();
1161 WPS4Parser::NameMultiMap::iterator pos = nameMultiMap.find(indexName);
1162 if (pos == nameMultiMap.end()) return false;
1163
1164 std::vector<long> textPtrs;
1165 std::vector<long> listValues;
1166
1167 if (!readPLC(pos->second, textPtrs, listValues)) return false;
1168
1169 size_t numV = listValues.size();
1170 if (textPtrs.size() != numV+1) return false;
1171
1172 WPSEntry zone;
1173 zone.setType(sIndexName);
1174
1175 for (size_t i = 0; i < numV; ++i)
1176 {
1177 long bPos = listValues[i];
1178 if (bPos <= 0) return false;
1179 zone.setBegin(bPos);
1180 zone.setLength(0x80);
1181
1182 zones.push_back(zone);
1183 }
1184
1185 return true;
1186 }
1187
findFDPStructuresByHand(int which)1188 bool WPS4Text::findFDPStructuresByHand(int which)
1189 {
1190 char const *indexName = which ? "FDPC" : "FDPP";
1191 WPS_DEBUG_MSG(("WPS4Text::findFDPStructuresByHand: need to create %s list by hand \n", indexName));
1192
1193 std::vector<WPSEntry> &zones = which ? m_state->m_FDPCs : m_state->m_FDPPs;
1194 zones.resize(0);
1195
1196 long debPos;
1197 if (which == 1)
1198 {
1199 // hack: each fdp block is aligned with 0x80,
1200 // and appears consecutively just after the text
1201 uint32_t pnChar = uint32_t((m_textPositions.end()+127)>>7);
1202 /* sanity check */
1203 if (0 == pnChar)
1204 {
1205 WPS_DEBUG_MSG(("WPS4Text::findFDPStructuresByHand: pnChar is 0, so file may be corrupt\n"));
1206 throw libwps::ParseException();
1207 }
1208 debPos = 0x80 * (long) pnChar;
1209 }
1210 else
1211 {
1212 size_t nFDPC = m_state->m_FDPCs.size();
1213 if (!nFDPC)
1214 {
1215 WPS_DEBUG_MSG(("WPS4Text::findFDPStructuresByHand: can not find last fdpc pos\n"));
1216 return false;
1217 }
1218 debPos = m_state->m_FDPCs[nFDPC-1].end();
1219 }
1220
1221 WPSEntry fdp;
1222 fdp.setType(indexName);
1223
1224 long lastPos = m_textPositions.begin();
1225 while (1)
1226 {
1227 m_input->seek(debPos+0x7f, librevenge::RVNG_SEEK_SET);
1228 if (m_input->tell() != debPos+0x7f)
1229 {
1230 WPS_DEBUG_MSG(("WPS4Text: find EOF while parsing the %s\n", indexName));
1231 return false;
1232 }
1233 int nbElt = libwps::readU8(m_input);
1234 if (5*nbElt+4 > 0x80)
1235 {
1236 WPS_DEBUG_MSG(("WPS4Text: find too big number of data while parsing the %s\n", indexName));
1237 return false;
1238 }
1239 m_input->seek(debPos, librevenge::RVNG_SEEK_SET);
1240 if (long(libwps::readU32(m_input)) != lastPos)
1241 {
1242 WPS_DEBUG_MSG(("WPS4Text: find incorrect linking while parsing the %s\n", indexName));
1243 return false;
1244 }
1245 if (nbElt != 1)
1246 m_input->seek(4*nbElt-4, librevenge::RVNG_SEEK_CUR);
1247
1248 long newPos = (long) libwps::readU32(m_input);
1249 if (newPos < lastPos || newPos > m_textPositions.end())
1250 {
1251 WPS_DEBUG_MSG(("WPS4Text: find incorrect linking while parsing the %s\n", indexName));
1252 return false;
1253 }
1254 fdp.setBegin(debPos);
1255 fdp.setLength(0x80);
1256 zones.push_back(fdp);
1257
1258 if (newPos == m_textPositions.end()) break;
1259
1260 lastPos = newPos;
1261 debPos = fdp.end();
1262 }
1263
1264 return true;
1265 }
1266
1267 // PLC Data: default parser
defDataParser(long,long,int,long endPos,std::string & mess)1268 bool WPS4Text::defDataParser(long , long , int , long endPos, std::string &mess)
1269 {
1270 mess = "";
1271 libwps::DebugStream f;
1272
1273 long actPos = m_input->tell();
1274 long length = endPos+1-actPos;
1275 int sz = (length%4)==0 ? 4 : (length%2)==0 ? 2 : 1;
1276 f << "unk["<< sz << "]=";
1277 while (m_input->tell() <= endPos+1-sz)
1278 {
1279 long val = 0;
1280 switch (sz)
1281 {
1282 case 1:
1283 val = libwps::readU8(m_input);
1284 break;
1285 case 2:
1286 val = libwps::readU16(m_input);
1287 break;
1288 case 4:
1289 val = (long) libwps::readU32(m_input);
1290 break;
1291 default:
1292 break;
1293 }
1294 f << std::hex << val << std::dec << ",";
1295 }
1296 mess = f.str();
1297 return true;
1298 }
1299
1300 ////////////////////////////////////////////////////////////
1301 // the fonts name zone (zone8)
1302 ////////////////////////////////////////////////////////////
readFontNames(WPSEntry const & entry)1303 bool WPS4Text::readFontNames(WPSEntry const &entry)
1304 {
1305 if (!entry.valid()) return false;
1306
1307 m_input->seek(entry.begin(), librevenge::RVNG_SEEK_SET);
1308
1309 long endPos = entry.end();
1310 int nFonts = 0;
1311 libwps_tools_win::Font::Type docType=getDefaultFontType();
1312 while (m_input->tell() < endPos)
1313 {
1314 long actPos;
1315 actPos = m_input->tell();
1316 libwps::DebugStream f;
1317
1318 /* Sometimes the font numbers start at 0 and increment nicely.
1319 However, other times the font numbers jump around. */
1320 uint8_t font_number = libwps::readU8(m_input);
1321 if (m_state->m_fontNames.find(font_number) != m_state->m_fontNames.end())
1322 {
1323 WPS_DEBUG_MSG(("WPS4Text::readFontNames: at position 0x%lx: font number %i duplicated\n",
1324 (m_input->tell())-2, font_number));
1325 throw libwps::ParseException();
1326 }
1327
1328 f << "Font" << nFonts++ << ": id=" << (int)font_number << ", ";
1329 //fixme: what is this byte? maybe a font class
1330 uint8_t unknown_byte = libwps::readU8(m_input);
1331 f << "unk=" << (int)unknown_byte << ", ";
1332
1333 std::string s;
1334 uint8_t nChar = libwps::readU8(m_input);
1335 for (uint8_t i = nChar; i>0; i--)
1336 {
1337 if (m_input->isEnd())
1338 {
1339 WPS_DEBUG_MSG(("WPS4Text::readFontNames: can not read the font number %i (end of file)\n",
1340 font_number));
1341 throw libwps::ParseException();
1342 }
1343 unsigned char val = libwps::readU8(m_input);
1344 // sanity check (because sometimes contains char > 0x80 .. )
1345 if (val >= ' ' && val <= 'z') s.append(1,char(val));
1346 else
1347 {
1348 static bool first = true;
1349 if (first)
1350 {
1351 first = false;
1352 WPS_DEBUG_MSG(("WPS4Text:readFontNames find odd caracters in font name : %d\n", (int) val));
1353 }
1354 f << "##oddC=" << (unsigned int) val << ", ";
1355 }
1356 }
1357 libwps_tools_win::Font::Type fType=libwps_tools_win::Font::getFontType(s);
1358 if (fType==libwps_tools_win::Font::UNKNOWN)
1359 fType=docType;
1360 WPS4TextInternal::FontName font;
1361 font.m_name = s;
1362 font.m_type = fType;
1363 f << font;
1364
1365 m_state->m_fontNames[font_number] = font;
1366
1367 ascii().addPos(actPos);
1368 ascii().addNote(f.str().c_str());
1369 ascii().addPos(m_input->tell());
1370 }
1371
1372 return true;
1373 }
1374
1375 ////////////////////////////////////////////////////////////
1376 // the font:
1377 ////////////////////////////////////////////////////////////
readFont(long endPos,int & id,std::string & mess)1378 bool WPS4Text::readFont(long endPos, int &id, std::string &mess)
1379 {
1380 WPS4TextInternal::Font font(version());
1381 font.m_size = 12;
1382
1383 libwps::DebugStream f;
1384
1385 int fl[4] = { 0, 0, 0, 0};
1386 if (m_input->tell() < endPos) fl[0] = libwps::readU8(m_input);
1387
1388 /* set difference from default properties */
1389 uint32_t attributes = 0;
1390 if (fl[0] & 0x01) attributes |= WPS_BOLD_BIT;
1391 if (fl[0] & 0x02) attributes |= WPS_ITALICS_BIT;
1392 if (fl[0] & 0x04) attributes |= WPS_STRIKEOUT_BIT;
1393 fl[0] &= 0xf8;
1394
1395 // what & 0x01 -> ???
1396 // what & 0x02 -> note
1397 // what & 0x04 -> ???
1398 // what & 0x08 -> fName
1399 // what & 0x10 -> size
1400 // what & 0x20 -> underline (fl[2])
1401 // what & 0x40 -> decalage
1402 // what & 0x80 -> color
1403 int what = 0;
1404 if (m_input->tell() < endPos) what = libwps::readU8(m_input);
1405
1406 font.m_special = ((what & 2) != 0);
1407 what &= 0xfd;
1408
1409 if (m_input->tell() < endPos)
1410 {
1411 // the fonts
1412 // FIXME: find some properties with size=3,
1413 // for which this character seems
1414 // related to size, not font
1415 uint8_t font_n = libwps::readU8(m_input);
1416
1417 if (m_state->m_fontNames.find(font_n) != m_state->m_fontNames.end())
1418 {
1419 font.m_name=m_state->m_fontNames[font_n].m_name;
1420 font.m_type=m_state->m_fontNames[font_n].m_type;
1421 }
1422 else if (version() <= 2)
1423 {
1424 font.m_name=WPS4TextInternal::FontName::getDosName(font_n);
1425 font.m_type=getDefaultFontType();
1426 }
1427 else
1428 {
1429 WPS_DEBUG_MSG(("WPS4Text: error: encountered font %i which is not indexed\n",
1430 font_n));
1431 }
1432
1433 if (font.m_name.empty()) f << "###nameId=" << int(font_n) << ",";
1434 }
1435
1436 if (m_input->tell() < endPos)
1437 {
1438 // underline, ...
1439 int underlinePos = libwps::readU8(m_input);
1440 if (underlinePos)
1441 {
1442 if (!(what & 0x20)) f << "undFl,";
1443 else what &= 0xdf;
1444 attributes |= WPS_UNDERLINE_BIT;
1445 }
1446 }
1447
1448 if (m_input->tell() < endPos) // font size * 2
1449 {
1450 int fSize = libwps::readU8(m_input);
1451 if (fSize)
1452 {
1453 if (!(what & 0x10)) f << "szFl,";
1454 else what &= 0xef;
1455 font.m_size = (fSize/2);
1456 }
1457 }
1458
1459 if (m_input->tell() < endPos) // height decalage -> sub/superscript
1460 {
1461 int fDec = libwps::read8(m_input);
1462 if (fDec)
1463 {
1464 if (!(what & 0x40)) f << "sub/supFl(val=" << fDec<<"),";
1465 else what &= 0xbf;
1466
1467 if (fDec > 0) attributes |= WPS_SUPERSCRIPT_BIT;
1468 else attributes |= WPS_SUBSCRIPT_BIT;
1469 }
1470 }
1471 if (m_input->tell()+2 <= endPos) // color field
1472 {
1473 int bkColor = libwps::readU8(m_input);
1474 int ftColor = libwps::readU8(m_input);
1475 bool setColor = !!(what & 0x80);
1476 what &= 0x7F;
1477
1478 if ((bkColor || ftColor) && !setColor)
1479 {
1480 setColor = true;
1481 f << "colorFl,";
1482 }
1483 if (setColor)
1484 {
1485 uint32_t color;
1486 if (mainParser().getColor(bkColor, color))
1487 font.m_backColor = color;
1488 if (mainParser().getColor(ftColor, color))
1489 font.m_color = color;
1490 }
1491 }
1492 if (m_input->tell() < endPos)
1493 font.m_dlinkId = libwps::readU8(m_input);
1494 if (what) f << "#what=" << std::hex << what << std::dec << ",";
1495 if (fl[0]) f << "unkn0=" << std::hex << fl[0] << std::dec << ",";
1496
1497 if (m_input->tell() != endPos)
1498 {
1499 f << "#unknEnd=(";
1500 while (m_input->tell() < endPos) f << std::hex << libwps::readU8(m_input) <<",";
1501 f << ")";
1502 }
1503
1504 font.m_attributes = attributes;
1505 font.m_extra = f.str();
1506
1507 id = int(m_state->m_fontList.size());
1508 m_state->m_fontList.push_back(font);
1509 f.str("");
1510 f << font;
1511 mess = f.str();
1512
1513 return true;
1514 }
1515
1516 ////////////////////////////////////////////////////////////
1517 // the file list: only in dos3 ?
1518 ////////////////////////////////////////////////////////////
readDosLink(WPSEntry const & entry)1519 bool WPS4Text::readDosLink(WPSEntry const &entry)
1520 {
1521 if (!entry.valid()) return false;
1522
1523 long length = entry.length();
1524 if (length%44)
1525 {
1526 WPS_DEBUG_MSG(("WPS4Text::readDosLink: length::=%ld seem odd\n", length));
1527 return false;
1528 }
1529
1530 m_input->seek(entry.begin(), librevenge::RVNG_SEEK_SET);
1531 libwps::DebugStream f;
1532 long numElt = length/44;
1533 long val;
1534 for (long n = 0; n < numElt; ++n)
1535 {
1536 WPS4TextInternal::DosLink link;
1537 long pos = m_input->tell();
1538 long endPos = pos+44;
1539 f.str("");
1540 for (int i = 0; i < 2; ++i) // always 0, 0
1541 {
1542 val = libwps::readU16(m_input);
1543 if (val) f << "unkn" << i << "=" << std::hex << val << std::dec << ",";
1544 }
1545 link.m_width = float(libwps::readU16(m_input)/1440.);
1546 for (int i = 2; i < 4; ++i) // always f0, f0
1547 {
1548 val = libwps::readU16(m_input);
1549 if (val != 0xf0) f << "unkn" << i << "=" << std::hex << val << std::dec << ",";
1550 }
1551 link.m_type = libwps::readU8(m_input);
1552 val = libwps::readU8(m_input);
1553 if (val) // find 0x18 for a spreadsheet
1554 f << "unk4=" << std::hex << val << std::dec << ",";
1555 switch (link.m_type)
1556 {
1557 case 0x81: // picture ?
1558 {
1559 long dim[2];
1560 for (int i = 0; i < 2; ++i) dim[i] = libwps::readU16(m_input);
1561 link.m_size = Vec2f(float(dim[0])/1440.f, float(dim[1])/1440.f);
1562 val = libwps::readU16(m_input); // always 0
1563 if (val) f << "g0=" << val << ",";
1564 val = libwps::readU16(m_input); // always 4
1565 if (val != 4) f << "g1=" << val << ",";
1566 }
1567 // fall-through intended
1568 case 0x40: // spreadsheet range
1569 case 0x01: // char ?
1570 {
1571 std::string name("");
1572 link.m_pos.setBegin(m_input->tell());
1573 while (!m_input->isEnd() && long(m_input->tell()) < endPos)
1574 {
1575 char c = char(libwps::readU8(m_input));
1576 if (!c)
1577 {
1578 m_input->seek(-1, librevenge::RVNG_SEEK_CUR);
1579 break;
1580 }
1581 name += c;
1582 }
1583 link.m_pos.setEnd(m_input->tell());
1584 link.m_pos.setId(WPS4TextInternal::Z_DLink);
1585 link.m_name = name;
1586 break;
1587 }
1588 default:
1589 break;
1590 }
1591 link.m_extra = f.str();
1592 m_state->m_dosLinkList.push_back(link);
1593 f.str("");
1594 f << "ZZDLINK-" << n << ":" << link;
1595 if (long(m_input->tell()) != endPos)
1596 ascii().addDelimiter(m_input->tell(),'|');
1597 ascii().addPos(pos);
1598 ascii().addNote(f.str().c_str());
1599 m_input->seek(endPos, librevenge::RVNG_SEEK_SET);
1600 }
1601 return true;
1602 }
1603
1604 ////////////////////////////////////////////////////////////
1605 // the paragraph properties:
1606 ////////////////////////////////////////////////////////////
readParagraph(long endPos,int & id,std::string & mess)1607 bool WPS4Text::readParagraph(long endPos, int &id, std::string &mess)
1608 {
1609 long actPos = m_input->tell();
1610 long size = endPos - actPos;
1611
1612 WPS4TextInternal::Paragraph pp;
1613 if (size && size < 3)
1614 {
1615 WPS_DEBUG_MSG(("WPS4Text:readParagraph:(sz=%ld)\n", size));
1616 return false;
1617 }
1618
1619 libwps::DebugStream f;
1620 for (int i = 0; i < 3; ++i)
1621 {
1622 int v = libwps::readU8(m_input);
1623 if (v != 0) f << "unkn"<<i<< "=" << v;
1624 }
1625
1626 while (m_input->tell() < endPos)
1627 {
1628 int v = libwps::readU8(m_input);
1629 long pos = m_input->tell();
1630 bool ok = true, done = true;
1631 int arg = -1;
1632 switch (v)
1633 {
1634 case 0x2:
1635 {
1636 if (pos+1 > endPos)
1637 {
1638 ok = false;
1639 break;
1640 }
1641 arg = libwps::readU8(m_input);
1642 f << "f2=" << arg << ",";
1643 break;
1644 }
1645 case 0x5:
1646 {
1647 if (pos+1 > endPos)
1648 {
1649 ok = false;
1650 break;
1651 }
1652 arg = libwps::readU8(m_input);
1653 switch (arg)
1654 {
1655 case 0:
1656 pp.m_justify = libwps::JustificationLeft;
1657 break;
1658 case 1:
1659 pp.m_justify = libwps::JustificationCenter;
1660 break;
1661 case 2:
1662 pp.m_justify = libwps::JustificationRight;
1663 break;
1664 case 3:
1665 pp.m_justify = libwps::JustificationFull;
1666 break;
1667 default:
1668 f << "#just=" << arg << ",";
1669 pp.m_justify = libwps::JustificationLeft;
1670 }
1671 break;
1672 }
1673 case 0x7: // 1: marked don't break paragraph
1674 case 0x8: // 1: marked keep paragraph with next
1675 {
1676 if (pos+1 > endPos)
1677 {
1678 ok = false;
1679 break;
1680 }
1681 arg = libwps::readU8(m_input);
1682 if (arg == 0) break;
1683 if (arg == 1) pp.m_breakStatus |= ((v == 7) ? libwps::NoBreakBit : libwps::NoBreakWithNextBit);
1684 else f << "#status=" << arg << ",";
1685 break;
1686 }
1687
1688 // BORDER
1689 case 0x9:
1690 {
1691 if (pos+1 > endPos)
1692 {
1693 ok = false;
1694 break;
1695 }
1696 arg = libwps::readU8(m_input);
1697 pp.m_borderStyle.m_style = WPSBorder::Single;
1698 pp.m_borderStyle.m_width = 1;
1699 int style = (arg&0xf);
1700 switch (style)
1701 {
1702 case 0:
1703 break;
1704 case 1:
1705 pp.m_borderStyle.m_width = 2;
1706 break;
1707 case 2:
1708 pp.m_borderStyle.m_style = WPSBorder::Double;
1709 break;
1710 case 3:
1711 pp.m_borderStyle.m_style = WPSBorder::Dot;
1712 break;
1713 case 4:
1714 pp.m_borderStyle.m_style = WPSBorder::LargeDot;
1715 break;
1716 case 5:
1717 pp.m_borderStyle.m_style = WPSBorder::Dash;
1718 break;
1719 case 6:
1720 case 7:
1721 case 8:
1722 pp.m_borderStyle.m_width = style-3;
1723 break;
1724 case 9:
1725 case 10:
1726 pp.m_borderStyle.m_width = style-7;
1727 pp.m_borderStyle.m_style = WPSBorder::Double;
1728 break;
1729 default:
1730 f << "#borderStyle=" << style << ",";
1731 WPS_DEBUG_MSG(("WPS4Text:readParagraph: unknown border style\n"));
1732 break;
1733 }
1734 int high = (arg>>4);
1735 if (version() < 3)
1736 {
1737 uint32_t color;
1738 if (high && mainParser().getColor(high, color))
1739 pp.m_borderStyle.m_color = color;
1740 else if (high)
1741 f << "#borderColor=" << high << ",";
1742 }
1743 else
1744 {
1745 switch (high)
1746 {
1747 case 0:
1748 break;
1749 case 4:
1750 pp.m_border = 0xf;
1751 break;
1752 case 8:
1753 pp.m_border = 0xf;
1754 f << "borderShaded,";
1755 break;
1756 default:
1757 f << "#borderStyle[high]=" << high << ",";
1758 break;
1759 }
1760 }
1761 break;
1762 }
1763 case 0xa: // 1: top border
1764 case 0xb: // : bottom border
1765 case 0xc: // : left border
1766 case 0xd: // : right border
1767 {
1768 if (pos+1 > endPos)
1769 {
1770 ok = false;
1771 break;
1772 }
1773 arg = libwps::readU8(m_input);
1774 if (arg == 0) break;
1775 if (arg == 1)
1776 {
1777 switch (v)
1778 {
1779 case 0xa:
1780 pp.m_border |= WPSBorder::TopBit;
1781 break;
1782 case 0xb:
1783 pp.m_border |= WPSBorder::BottomBit;
1784 break;
1785 case 0xc:
1786 pp.m_border |= WPSBorder::LeftBit;
1787 break;
1788 case 0xd:
1789 pp.m_border |= WPSBorder::RightBit;
1790 break;
1791 default:
1792 break;
1793 }
1794 }
1795 else f << "#border=" << arg << ",";
1796 break;
1797 }
1798 case 0x18: // border color
1799 {
1800 if (long(pos)==endPos)
1801 {
1802 ok = false;
1803 break;
1804 }
1805 int colorId = libwps::readU8(m_input);
1806 uint32_t color;
1807 if (mainParser().getColor(colorId, color))
1808 pp.m_borderStyle.m_color = color;
1809 else
1810 f << "#colorId=" << colorId << ",";
1811 break;
1812 }
1813 case 0xe: // 1: bullet
1814 {
1815 if (pos+1 > endPos)
1816 {
1817 ok = false;
1818 break;
1819 }
1820 arg = libwps::readU8(m_input);
1821 if (arg == 0) break;
1822
1823 pp.m_listLevelIndex = 1;
1824 pp.m_listLevel.m_type = libwps::BULLET;
1825 static const uint32_t bulletList[]=
1826 {
1827 0x2022, 0x3e, 0x25c6, 0x21d2, 0x25c7, 0x2605, /* 1-6 */
1828 0, 0, 0, 0, 0, 0, /* 7-12 unknown */
1829 0, 0, 0, 0, 0, 0x2750, /* 13-17 unknown and document... */
1830 0x2713, 0x261e, 0x2704, 0x2611, 0x2612, 0x270e /* 18-24 */
1831 };
1832 if (arg <= 24 && bulletList[arg-1])
1833 WPSContentListener::appendUnicode(bulletList[arg-1], pp.m_listLevel.m_bullet);
1834 else
1835 WPSContentListener::appendUnicode(0x2022, pp.m_listLevel.m_bullet);
1836 break;
1837 }
1838 case 0x1b:
1839 case 0x1a:
1840 case 0x10: // the bullet char : 0x18
1841 {
1842 if (pos+1 > endPos)
1843 {
1844 ok = false;
1845 break;
1846 }
1847 arg = libwps::readU8(m_input);
1848 done = true;
1849 switch (v)
1850 {
1851 case 0x1a:
1852 if (arg) f << "backPattern=" << arg << ",";
1853 break;
1854 case 0x1b:
1855 {
1856 if (arg==0) break;
1857 uint32_t color;
1858 if (mainParser().getColor(arg>>4, color))
1859 f << "backPatternBackColor=" << std::hex << color << std::dec << ";";
1860 else
1861 f << "#backPatternBackColor=" << (arg>>4) << ",";
1862 if (mainParser().getColor(arg&0xf, color))
1863 f << "backPatternFrontColor=" << std::hex << color << std::dec << ";";
1864 else
1865 f << "#backPatternFrontColor=" << (arg&0xf) << ",";
1866 break;
1867 }
1868 case 0x10:
1869 if (arg!=0x18) f << "bullet?=" << arg << ",";
1870 break;
1871 default:
1872 done = false;
1873 break;
1874 }
1875 break;
1876 }
1877 case 0xf: // tabs:
1878 {
1879 if (pos+1 > endPos)
1880 {
1881 ok = false;
1882 break;
1883 }
1884 int nVal = libwps::read8(m_input);
1885 if (nVal < 2 || pos + 1 + nVal > endPos)
1886 {
1887 ok = false;
1888 break;
1889 }
1890 int flag = libwps::readU8(m_input);
1891 if (flag) f << "#tabsFl=" << flag << ",";
1892 size_t nItem = libwps::readU8(m_input);
1893 if ((unsigned long)nVal != 2 + 3*nItem)
1894 {
1895 ok = false;
1896 break;
1897 }
1898 pp.m_tabs.resize(nItem);
1899 for (size_t i = 0; i < nItem; ++i)
1900 pp.m_tabs[i].m_position = libwps::read16(m_input)/1440.;
1901 for (size_t i = 0; i < nItem; ++i)
1902 {
1903 enum WPSTabStop::Alignment align = WPSTabStop::LEFT;
1904 int val = libwps::readU8(m_input);
1905 switch ((val & 0x3))
1906 {
1907 case 0:
1908 align = WPSTabStop::LEFT;
1909 break;
1910 case 1:
1911 align = WPSTabStop::CENTER;
1912 break;
1913 case 2:
1914 align = WPSTabStop::RIGHT;
1915 break;
1916 case 3:
1917 align = WPSTabStop::DECIMAL;
1918 break;
1919 default:
1920 break;
1921 }
1922 pp.m_tabs[i].m_alignment = align;
1923
1924 if (val&4) f << "#Tabbits3";
1925 val = (val>>3);
1926
1927 switch (val)
1928 {
1929 case 0:
1930 break;
1931 case 1:
1932 pp.m_tabs[i].m_leaderCharacter = '.';
1933 break;
1934 case 2:
1935 pp.m_tabs[i].m_leaderCharacter = '-';
1936 break;
1937 case 3:
1938 pp.m_tabs[i].m_leaderCharacter = '_';
1939 break;
1940 case 4:
1941 pp.m_tabs[i].m_leaderCharacter = '=';
1942 break;
1943 default:
1944 f << "#TabSep=" << val;
1945 }
1946 }
1947
1948 break;
1949 }
1950 case 0x11: // right margin : 1440*inches
1951 case 0x12: // left margin
1952 case 0x13: // another margin ( check me )
1953 case 0x14: // left text indent (relative to left margin)
1954 case 0x15: // line spacing (inter line) 240
1955 case 0x16: // line spacing before 240 = 1 line spacing
1956 case 0x17: // line spacing after
1957 {
1958 if (pos+2 > endPos)
1959 {
1960 ok = false;
1961 break;
1962 }
1963
1964 arg = libwps::read16(m_input);
1965 switch (v)
1966 {
1967 case 0x11:
1968 pp.m_margins[2] = arg/1440.;
1969 break;
1970 case 0x13: // seems another way to define the left margin
1971 f << "#left,";
1972 // fall-through intended
1973 case 0x12:
1974 pp.m_margins[1] = arg/1440.;
1975 break;
1976 case 0x14:
1977 pp.m_margins[0] = arg/1440.;
1978 break;
1979 case 0x15:
1980 {
1981 pp.m_spacings[0] = arg ? arg/240. : 1.0;
1982 if (pp.m_spacings[0] < 1.0 || pp.m_spacings[0] > 2.0)
1983 {
1984 f << "##interLineSpacing=" << pp.m_spacings[0] << ",";
1985 pp.m_spacings[0] = (pp.m_spacings[0] < 1.0) ? 1.0 : 2.0;
1986 }
1987 break;
1988 }
1989 case 0x16:
1990 pp.m_spacings[1] = arg/240.;
1991 break;
1992 case 0x17:
1993 pp.m_spacings[2] = arg/240.;
1994 break;
1995 default:
1996 done = false;
1997 }
1998 break;
1999 }
2000 default:
2001 ok = false;
2002 }
2003 if (!ok)
2004 {
2005 m_input->seek(pos, librevenge::RVNG_SEEK_SET);
2006 f << "###v" << v<<"=" <<std::hex;
2007 while (m_input->tell() < endPos)
2008 f << (int) libwps::readU8(m_input) << ",";
2009 break;
2010 }
2011
2012 if (done) continue;
2013
2014 f << "f" << v << "=" << std::hex << arg << std::dec << ",";
2015 }
2016 if (pp.m_listLevelIndex >= 1)
2017 pp.m_margins[0] += pp.m_margins[1];
2018 else if (pp.m_margins[0] + pp.m_margins[1] < 0.0)
2019 {
2020 // sanity check
2021 if (pp.m_margins[1] < 0.0) pp.m_margins[1] = 0.0;
2022 pp.m_margins[0] = -pp.m_margins[1];
2023 }
2024 pp.m_extra = f.str();
2025
2026 id = int(m_state->m_paragraphList.size());
2027 m_state->m_paragraphList.push_back(pp);
2028
2029 f.str("");
2030 f << pp;
2031 mess = f.str();
2032 return true;
2033 }
2034
2035 ////////////////////////////////////////////////////////////
2036 // the foot note properties:
2037 ////////////////////////////////////////////////////////////
readFootNotes(WPSEntry const & ftnD,WPSEntry const & ftnP)2038 bool WPS4Text::readFootNotes(WPSEntry const &ftnD, WPSEntry const &ftnP)
2039 {
2040 if (!ftnD.valid() && !ftnP.valid()) return true;
2041 if (!ftnD.valid() || !ftnP.valid())
2042 {
2043 WPS_DEBUG_MSG(("WPS4Text::readFootNotes: one of the two entry is not valid, footnote will be ignored\n"));
2044 return false;
2045 }
2046
2047 std::vector<long> footNotePos,footNoteDef, listValues;
2048 if (!readPLC(ftnP, footNotePos, listValues, &WPS4Text::footNotesDataParser))
2049 {
2050 WPS_DEBUG_MSG(("WPS4Text::readFootNotes: can not read positions\n"));
2051 return false;
2052 }
2053
2054 if (!readPLC(ftnD, footNoteDef, listValues))
2055 {
2056 WPS_DEBUG_MSG(("WPS4Text::readFootNotes: can not read definitions\n"));
2057 return false;
2058 }
2059
2060 int numFootNotes = int(footNotePos.size())-1;
2061 if (numFootNotes <= 0 || int(footNoteDef.size())-1 != numFootNotes)
2062 {
2063 WPS_DEBUG_MSG(("WPS4Text::readFootNotes: no footnotes\n"));
2064 return false;
2065 }
2066
2067 // save the actual type and create a list of footnote entries
2068 std::vector<WPS4TextInternal::Note> noteTypes=m_state->m_footnoteList;
2069 m_state->m_footnoteList.resize(0);
2070
2071 std::vector<int> corresp;
2072 for (size_t i = 0; i < size_t(numFootNotes); ++i)
2073 {
2074 WPS4TextInternal::Note fZone;
2075 fZone.setBegin(footNoteDef[i]);
2076 fZone.setEnd(footNoteDef[i+1]);
2077 fZone.setType("TEXT");
2078 fZone.setId(WPS4TextInternal::Z_Note);
2079 m_state->m_footnoteList.push_back(fZone);
2080 corresp.push_back(int(i));
2081
2082 // sort the footnote
2083 for (size_t j = i; j > 0; j--)
2084 {
2085 if (m_state->m_footnoteList[j].begin() >=
2086 m_state->m_footnoteList[j-1].end()) break;
2087
2088 if (m_state->m_footnoteList[j].end() >
2089 m_state->m_footnoteList[j-1].begin())
2090 {
2091 WPS_DEBUG_MSG
2092 (("WPS4Text: error: can not create footnotes zone, found %lx and %lx\n",
2093 m_state->m_footnoteList[j].end(),m_state->m_footnoteList[j-1].begin()));
2094
2095 m_state->m_footnoteList.resize(0);
2096 return false;
2097 }
2098
2099 WPS4TextInternal::Note tmpZ = m_state->m_footnoteList[j];
2100 m_state->m_footnoteList[j] = m_state->m_footnoteList[j-1];
2101 m_state->m_footnoteList[j-1] = tmpZ;
2102
2103 int pos = corresp[j];
2104 corresp[j] = corresp[j-1];
2105 corresp[j-1] = pos;
2106 }
2107 }
2108 // ok, we can create the map, ...
2109 for (size_t i = 0; i < size_t(numFootNotes); ++i)
2110 {
2111 size_t id = size_t(corresp[i]);
2112 WPS4TextInternal::Note &z = m_state->m_footnoteList[id];
2113 if (id < noteTypes.size())
2114 {
2115 z.m_label = noteTypes[id].m_label;
2116 z.m_error = noteTypes[id].m_error;
2117 }
2118 m_state->m_footnoteMap[footNotePos[id]] = &z;
2119 }
2120 return true;
2121 }
2122
footNotesDataParser(long,long,int id,long endPos,std::string & mess)2123 bool WPS4Text::footNotesDataParser(long /*bot*/, long /*eot*/, int id,
2124 long endPos, std::string &mess)
2125 {
2126 mess = "";
2127
2128 long actPos = m_input->tell();
2129 long length = endPos+1-actPos;
2130 if (length != 12)
2131 {
2132 WPS_DEBUG_MSG(("WPS4Text::footNotesDataParser: unknown size %ld for footdata data\n", length));
2133 return false;
2134 }
2135 libwps::DebugStream f;
2136 WPS4TextInternal::Note note;
2137 int type = libwps::readU16(m_input);
2138 if (type & 1)
2139 {
2140 if (type != 1)
2141 f << "###numeric=" << std::hex << type << std::dec << ",";
2142 }
2143 else if (type == 0 || type > 20)
2144 f << "###char,";
2145 else
2146 {
2147 int numC = type/2;
2148 librevenge::RVNGString label("");
2149 libwps_tools_win::Font::Type actType = getDefaultFontType();
2150 for (int i=0; i < numC; ++i)
2151 {
2152 unsigned char c = libwps::readU8(m_input);
2153 WPSContentListener::appendUnicode(uint32_t(libwps_tools_win::Font::unicode(c, actType)),label);
2154 if (c < 0x20)
2155 f << "#(" << std::hex << int(c) << std::dec << ")";
2156 }
2157 note.m_label = label;
2158 }
2159 note.m_error=f.str();
2160 if (id >= int(m_state->m_footnoteList.size()))
2161 m_state->m_footnoteList.resize(size_t(id+1));
2162 m_state->m_footnoteList[size_t(id)]=note;
2163 f.str("");
2164 f << note;
2165 mess = f.str();
2166 m_input->seek(endPos+1, librevenge::RVNG_SEEK_SET);
2167 return true;
2168 }
2169
2170 ////////////////////////////////////////////////////////////
2171 // the bookmark properties:
2172 ////////////////////////////////////////////////////////////
bkmkDataParser(long bot,long,int,long endPos,std::string & mess)2173 bool WPS4Text::bkmkDataParser(long bot, long /*eot*/, int /*id*/,
2174 long endPos, std::string &mess)
2175 {
2176 mess = "";
2177 if (m_state->m_bookmarkMap.find(bot) != m_state->m_bookmarkMap.end())
2178 {
2179 WPS_DEBUG_MSG(("WPS4Text:bkmkDataParser: bookmark already exists in this position\n"));
2180 return true;
2181 }
2182
2183 long actPos = m_input->tell();
2184 long length = endPos+1-actPos;
2185 if (length != 16)
2186 {
2187 WPS_DEBUG_MSG(("WPS4Text::bkmkDataParser: unknown size %ld for bkmkdata data\n", length));
2188 return false;
2189 }
2190
2191 for (int i = 0; i < 16; ++i)
2192 {
2193 char c = char(libwps::readU8(m_input));
2194 if (c == '\0') break;
2195 mess += c;
2196 }
2197 WPSEntry ent;
2198 ent.setBegin(actPos);
2199 ent.setEnd(m_input->tell());
2200 ent.setId(WPS4TextInternal::Z_String);
2201 m_state->m_bookmarkMap[bot] = ent;
2202 m_input->seek(endPos+1, librevenge::RVNG_SEEK_SET);
2203 return true;
2204 }
2205
2206 ////////////////////////////////////////////////////////////
2207 // the object properties:
2208 ////////////////////////////////////////////////////////////
objectDataParser(long bot,long,int id,long endPos,std::string & mess)2209 bool WPS4Text::objectDataParser(long bot, long /*eot*/, int id,
2210 long endPos, std::string &mess)
2211 {
2212 mess = "";
2213 if (m_state->m_objectMap.find(bot) != m_state->m_objectMap.end())
2214 {
2215 WPS_DEBUG_MSG(("WPS4Text:objectDataParser: object already exists in this position\n"));
2216 return true;
2217 }
2218
2219 libwps::DebugStream f;
2220
2221 long actPos = m_input->tell();
2222 long length = endPos+1-actPos;
2223 if (length != 36)
2224 {
2225 WPS_DEBUG_MSG(("WPS4Text:objectDataParser unknown size %ld for object data\n", length));
2226 return false;
2227 }
2228
2229 f << "type(?)=" <<libwps::read16(m_input) << ","; // 3->08 4->4f4d or 68->list?
2230 for (int i = 0; i < 2; ++i)
2231 {
2232 int v =libwps::read16(m_input);
2233 if (v) f << "unkn1:" << i << "=" << v << ",";
2234 }
2235 float dim[4];
2236 for (int i = 0; i < 4; ++i)
2237 dim[i] =float(libwps::read16(m_input)/1440.);
2238
2239 // CHECKME: the next two sizes are often simillar,
2240 // maybe the first one is the original size and the second
2241 // size in the document...
2242 f << "origSz?=[" << dim[0] << "," << dim[1] << "],";
2243
2244 WPS4TextInternal::Object obj;
2245 obj.m_size = Vec2f(dim[2], dim[3]); // CHECKME: unit
2246
2247 long size = (long) libwps::readU32(m_input);
2248 long pos = (long) libwps::readU32(m_input);
2249
2250 actPos = m_input->tell();
2251 if (pos >= 0 && size > 0 && mainParser().checkFilePosition(pos+size))
2252 {
2253 obj.m_pos.setBegin(pos);
2254 obj.m_pos.setLength(size);
2255 obj.m_pos.setId(id);
2256
2257 int objectId = mainParser().readObject(m_input, obj.m_pos);
2258 if (objectId == -1)
2259 {
2260 WPS_DEBUG_MSG(("WPS4Text::objectDataParser: can not find the object %d\n", id));
2261 }
2262 obj.m_id = objectId;
2263 m_state->m_objectMap[bot] = obj;
2264 }
2265 else
2266 {
2267 WPS_DEBUG_MSG(("WPS4Text::objectDataParser: bad object position\n"));
2268 }
2269
2270 m_input->seek(actPos, librevenge::RVNG_SEEK_SET);
2271
2272 for (int i = 0; i < 7; ++i)
2273 {
2274 long val =libwps::read16(m_input);
2275 if (val) f << "unkn2:" << i << "=" << val << ",";
2276 }
2277
2278 obj.m_extra = f.str();
2279 f.str("");
2280 f << obj;
2281
2282 mess = f.str();
2283 return true;
2284 }
2285
2286 ////////////////////////////////////////////////////////////
2287 // the dttm properties:
2288 ////////////////////////////////////////////////////////////
dttmDataParser(long bot,long,int,long endPos,std::string & mess)2289 bool WPS4Text::dttmDataParser(long bot, long /*eot*/, int /*id*/,
2290 long endPos, std::string &mess)
2291 {
2292 mess = "";
2293 if (m_state->m_dateTimeMap.find(bot) != m_state->m_dateTimeMap.end())
2294 {
2295 WPS_DEBUG_MSG(("WPS4Text:dttmDataParser: dttm already exists in this position\n"));
2296 return true;
2297 }
2298
2299 libwps::DebugStream f;
2300
2301 long actPos = m_input->tell();
2302 long length = endPos+1-actPos;
2303 if (length != 42)
2304 {
2305 WPS_DEBUG_MSG(("WPS4Text:dttmDataParser unknown size %ld for dttm data\n", length));
2306 return false;
2307 }
2308
2309 WPS4TextInternal::DateTime form;
2310 int val;
2311 for (int i = 0; i < 3; ++i) // always 0, 0, 0 ?
2312 {
2313 val =libwps::read16(m_input);
2314 if (val) f << "f" << i << "=" << val << ",";
2315 }
2316 form.m_type=libwps::read16(m_input);
2317 val =libwps::read16(m_input); // alway 0 ?
2318 if (val) f << "f3=" << val << ",";
2319 // end unknown
2320 for (int i = 0; i < 16; ++i)
2321 {
2322 val =libwps::readU16(m_input);
2323 if (val) f << "g" << i << "=" << std::hex << val << std::dec << ",";
2324 }
2325 form.m_extra = f.str();
2326 m_state->m_dateTimeMap[bot] = form;
2327 f.str("");
2328 f << form;
2329 mess = f.str();
2330 return true;
2331 }
2332
2333 ////////////////////////////////////////
2334 // VERY LOW LEVEL ( plc )
2335 ////////////////////////////////////////
2336 /** Internal and low level: the structures of a WPS4Text used to parse PLC*/
2337 namespace WPS4PLCInternal
2338 {
2339 /** Internal and low level: the PLC different types and their structures */
2340 struct PLC
2341 {
2342 /** the PLC types */
2343 typedef enum WPS4TextInternal::PLCType PLCType;
2344 /** the way to define the text positions
2345 *
2346 * - P_ABS: absolute position,
2347 * - P_REL: position are relative to the beginning text offset */
2348 typedef enum { P_ABS=0, P_REL, P_UNKNOWN} Position;
2349 /** the type of the content
2350 *
2351 * - T_CST: size is constant
2352 * - T_STRUCT: a structured type ( which unknown size) */
2353 typedef enum { T_CST=0, T_COMPLEX, T_UNKNOWN} Type;
2354
2355 //! constructor
PLCWPS4PLCInternal::PLC2356 PLC(PLCType w= WPS4TextInternal::Unknown, Position p=P_UNKNOWN, Type t=T_UNKNOWN, unsigned char tChar='\0', int f=1) :
2357 m_type(w), m_pos(p), m_contentType(t), m_textChar(tChar), m_cstFactor(f) {}
2358
2359 //! PLC type
2360 PLCType m_type;
2361 //! the way to define the text positions
2362 Position m_pos;
2363 //! the type of the content
2364 Type m_contentType;
2365 /** the character which appears in the text when this PLC is found
2366 *
2367 * '\\0' means that there is not default character */
2368 unsigned char m_textChar;
2369 //! some data are stored divided by some unit
2370 int m_cstFactor;
2371 };
2372
KnownPLC()2373 KnownPLC::KnownPLC() : m_knowns()
2374 {
2375 createMapping();
2376 }
2377
~KnownPLC()2378 KnownPLC::~KnownPLC()
2379 {
2380 }
2381
get(std::string const & name)2382 PLC KnownPLC::get(std::string const &name)
2383 {
2384 std::map<std::string, PLC>::iterator pos = m_knowns.find(name);
2385 if (pos == m_knowns.end()) return PLC();
2386 return pos->second;
2387 }
2388
createMapping()2389 void KnownPLC::createMapping()
2390 {
2391 m_knowns["BTEP"] =
2392 PLC(WPS4TextInternal::BTE, PLC::P_ABS, PLC::T_CST, '\0', 0x80);
2393 m_knowns["BTEC"] =
2394 PLC(WPS4TextInternal::BTE,PLC::P_ABS, PLC::T_CST, '\0', 0x80);
2395 m_knowns["EOBJ"] =
2396 PLC(WPS4TextInternal::OBJECT,PLC::P_UNKNOWN, PLC::T_COMPLEX, 0x7);
2397 m_knowns["FTNp"] =
2398 PLC(WPS4TextInternal::FTNp,PLC::P_REL, PLC::T_CST, 0x6);
2399 m_knowns["FTNd"] =
2400 PLC(WPS4TextInternal::FTNd,PLC::P_REL, PLC::T_COMPLEX, 0x6);
2401 m_knowns["BKMK"] =
2402 PLC(WPS4TextInternal::BKMK,PLC::P_REL, PLC::T_COMPLEX);
2403 m_knowns["DTTM"] =
2404 PLC(WPS4TextInternal::DTTM,PLC::P_REL, PLC::T_COMPLEX, 0xf);
2405 }
2406 }
2407
readPLC(WPSEntry const & zone,std::vector<long> & textPtrs,std::vector<long> & listValues,WPS4Text::DataParser parser)2408 bool WPS4Text::readPLC
2409 (WPSEntry const &zone,
2410 std::vector<long> &textPtrs, std::vector<long> &listValues, WPS4Text::DataParser parser)
2411 {
2412 textPtrs.resize(0);
2413 listValues.resize(0);
2414 long size = zone.length();
2415 if (zone.begin() <= 0 || size < 8) return false;
2416 WPS4PLCInternal::PLC plcType = m_state->m_knownPLC.get(zone.type());
2417
2418 libwps::DebugStream f;
2419 ascii().addPos(zone.begin());
2420 m_input->seek(zone.begin(), librevenge::RVNG_SEEK_SET);
2421
2422 long lastPos = 0;
2423 std::vector<DataFOD> fods;
2424 unsigned numElt = 0;
2425 f << "pos=(";
2426 while (numElt*4+4 <= unsigned(size))
2427 {
2428 long newPos = (long) libwps::readU32(m_input);
2429 if (plcType.m_pos == WPS4PLCInternal::PLC::P_UNKNOWN)
2430 {
2431 if (newPos < m_textPositions.begin())
2432 plcType.m_pos = WPS4PLCInternal::PLC::P_REL;
2433 else if (newPos+m_textPositions.begin() > m_textPositions.end())
2434 plcType.m_pos = WPS4PLCInternal::PLC::P_ABS;
2435 else if (plcType.m_textChar=='\0')
2436 {
2437 WPS_DEBUG_MSG(("WPS4Text:readPLC Can not decide position for PLC: %s\n", zone.type().c_str()));
2438 plcType.m_pos = WPS4PLCInternal::PLC::P_REL;
2439 }
2440 else
2441 {
2442 long actPos = m_input->tell();
2443 m_input->seek(newPos, librevenge::RVNG_SEEK_SET);
2444 if (libwps::readU8(m_input) == plcType.m_textChar)
2445 plcType.m_pos = WPS4PLCInternal::PLC::P_ABS;
2446 else plcType.m_pos = WPS4PLCInternal::PLC::P_REL;
2447 m_input->seek(actPos, librevenge::RVNG_SEEK_SET);
2448 }
2449 }
2450
2451 if (plcType.m_pos == WPS4PLCInternal::PLC::P_REL)
2452 newPos += m_textPositions.begin();
2453
2454 if (newPos < lastPos ||
2455 newPos > m_textPositions.end())
2456 {
2457 // sometimes the convertissor do not their jobs correctly
2458 // for the last element
2459 if (plcType.m_pos == WPS4PLCInternal::PLC::P_REL &&
2460 newPos == m_textPositions.end()+m_textPositions.begin())
2461 newPos = m_textPositions.end();
2462 else
2463 return false;
2464 }
2465
2466 textPtrs.push_back(newPos);
2467
2468 DataFOD fod;
2469 fod.m_type = DataFOD::ATTR_PLC;
2470 fod.m_pos = newPos;
2471
2472 f << std::hex << newPos << ", ";
2473 if (newPos == m_textPositions.end()) break;
2474
2475 numElt++;
2476 lastPos = newPos;
2477 fods.push_back(fod);
2478 }
2479 f << ")";
2480
2481 if (long(numElt) < 1) return false;
2482
2483 long dataSize = (size-4*long(numElt)-4)/long(numElt);
2484 if (dataSize > 100) return false;
2485 if (size!= long(numElt)*(4+dataSize)+4) return false;
2486
2487 ascii().addNote(f.str().c_str());
2488
2489 if (!dataSize)
2490 {
2491 for (size_t i = 0; i < numElt; ++i)
2492 {
2493 listValues.push_back(-1);
2494 fods[i].m_id = int(m_state->m_plcList.size());
2495 }
2496 WPS4TextInternal::DataPLC plc;
2497 plc.m_name = zone.type();
2498 plc.m_type = plcType.m_type;
2499 m_state->m_plcList.push_back(plc);
2500 m_FODList = mergeSortedFODLists(fods, m_FODList);
2501 return true;
2502 }
2503
2504 // ok we have some data
2505 bool ok = true;
2506 long pos = m_input->tell();
2507 WPS4Text::DataParser pars = parser;
2508 if ((dataSize == 3 || dataSize > 4) && !pars)
2509 pars = &WPS4Text::defDataParser;
2510
2511 for (size_t i = 0; i < numElt; ++i)
2512 {
2513 WPS4TextInternal::DataPLC plc;
2514
2515 if (!pars && dataSize <= 4)
2516 {
2517 switch (dataSize)
2518 {
2519 case 1:
2520 plc.m_value = libwps::readU8(m_input);
2521 break;
2522 case 2:
2523 plc.m_value = libwps::readU16(m_input);
2524 break;
2525 case 4:
2526 plc.m_value = (long) libwps::readU32(m_input);
2527 break;
2528 default:
2529 WPS_DEBUG_MSG(("WPS4Text:readPLC: unexpected PLC size\n"));
2530 // fallthrough intended
2531 case 0:
2532 plc.m_value = 0;
2533 }
2534 plc.m_value *=plcType.m_cstFactor;
2535 }
2536 else if (pars)
2537 {
2538 std::string mess;
2539 if (!(this->*pars)(textPtrs[i], textPtrs[i+1], int(i), pos+dataSize-1, mess))
2540 {
2541 ok = false;
2542 break;
2543 }
2544 plc.m_extra = mess;
2545 m_input->seek(pos+dataSize, librevenge::RVNG_SEEK_SET);
2546 }
2547
2548 listValues.push_back(plc.m_value);
2549
2550 fods[i].m_id = int(m_state->m_plcList.size());
2551 fods[i].m_defPos = pos;
2552
2553 plc.m_name = zone.type();
2554 plc.m_type = plcType.m_type;
2555 m_state->m_plcList.push_back(plc);
2556
2557 f.str("");
2558 f << "ZZ" << zone.type() << i << ":" << plc;
2559 ascii().addPos(pos);
2560 ascii().addNote(f.str().c_str());
2561
2562 pos += dataSize;
2563 }
2564
2565 if (ok) m_FODList = mergeSortedFODLists(fods, m_FODList);
2566 return true;
2567 }
2568
2569 /* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */
2570