1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
2 /* libwps
3  * Version: MPL 2.0 / LGPLv2.1+
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * Major Contributor(s):
10  * Copyright (C) 2009, 2011 Alonso Laurent (alonso@loria.fr)
11  * Copyright (C) 2006, 2007 Andrew Ziem
12  * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
13  * Copyright (C) 2004 Marc Maurer (uwog@uwog.net)
14  * Copyright (C) 2003-2005 William Lachance (william.lachance@sympatico.ca)
15  *
16  * For minor contributions see the git repository.
17  *
18  * Alternatively, the contents of this file may be used under the terms
19  * of the GNU Lesser General Public License Version 2.1 or later
20  * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
21  * applicable instead of those above.
22  *
23  * For further information visit http://libwps.sourceforge.net
24  */
25 
26 #ifdef DEBUG_WITH_FILES
27 // set to 1 to debug the font property
28 #  define DEBUG_FP 1
29 // set to 1 to debug the paragraph property
30 #  define DEBUG_PP 1
31 // set to 1 to print the plc position
32 #  define DEBUG_PLC_POS 1
33 #endif
34 
35 #include <iomanip>
36 #include <iostream>
37 
38 #include <map>
39 #include <vector>
40 
41 #include <librevenge/librevenge.h>
42 
43 #include "libwps_internal.h"
44 #include "libwps_tools_win.h"
45 
46 #include "WPSContentListener.h"
47 #include "WPSFont.h"
48 #include "WPSPosition.h"
49 #include "WPSParagraph.h"
50 
51 #include "WPS4.h"
52 
53 #include "WPS4Text.h"
54 
55 /** Internal and low level: the structures of a WPS4Text used to parse PLC*/
56 namespace WPS4PLCInternal
57 {
58 /** Internal and low level: the PLC different types and their structures */
59 struct PLC;
60 
61 //! a map of known plc
62 struct KnownPLC
63 {
64 public:
65 	//! constructor
66 	KnownPLC();
67 	//! destructor
68 	~KnownPLC();
69 	//! returns the PLC corresponding to a name
70 	PLC get(std::string const &name);
71 protected:
72 	//! creates the map of known PLC
73 	void createMapping();
74 	//! map name -> known PLC
75 	std::map<std::string, PLC> m_knowns;
76 };
77 }
78 
79 //////////////////////////////////////////////////////////////////////////////
80 // general enum
81 //////////////////////////////////////////////////////////////////////////////
82 namespace WPS4TextInternal
83 {
84 /** a enum used to type a zone */
85 enum ZoneType { Z_String=-1, Z_Header=0, Z_Footer=1, Z_Main=2, Z_Note, Z_Bookmark, Z_DLink, Z_Unknown};
86 /** Internal: class to store a font name: name with encoding type */
87 struct FontName
88 {
89 	//! constructor with file's version to define the default encoding */
FontNameWPS4TextInternal::FontName90 	FontName(int version=4) : m_name(""), m_type(libwps_tools_win::Font::WIN3_WEUROPE)
91 	{
92 		if (version <= 2) m_type = libwps_tools_win::Font::DOS_850;
93 	}
94 	//! operator<<
95 	friend std::ostream &operator<<(std::ostream &o, FontName const &ft);
96 	/** returns the default dos name corresponding to \a id th font */
97 	static std::string getDosName(int id);
98 
99 	//! font name
100 	std::string m_name;
101 	//! font encoding type
102 	libwps_tools_win::Font::Type m_type;
103 };
104 //! operator<< for a font name
operator <<(std::ostream & o,FontName const & ft)105 std::ostream &operator<<(std::ostream &o, FontName const &ft)
106 {
107 	if (!ft.m_name.empty()) o << "name='" << ft.m_name << "'";
108 	else o << "name='Unknown'";
109 	if (ft.m_type!=libwps_tools_win::Font::WIN3_WEUROPE &&
110 	        ft.m_type!=libwps_tools_win::Font::DOS_850)
111 		o << ",type=" << libwps_tools_win::Font::getTypeName(ft.m_type) << ",";
112 	return o;
113 }
114 
getDosName(int id)115 std::string FontName::getDosName(int id)
116 {
117 	switch (id)
118 	{
119 	case 0:
120 		return "Courier";
121 	case 1:
122 		return "Courier PC";
123 	case 3:
124 		return "Univers_Scale";
125 	case 4:
126 		return "Universe";
127 	case 6:
128 		return "LinePrinterPC";
129 	case 7:
130 		return "LinePrinter";
131 	case 16:
132 		return "CGTimes_Scale";
133 	case 24:
134 		return "CGTimes";
135 	default:
136 		break;
137 	}
138 
139 	WPS_DEBUG_MSG(("WPS4TextInternal::FontName::getDosName: encountered unknown font %i\n", id));
140 	return "Courier";
141 }
142 /** Internal: class to store font properties */
143 struct Font : public WPSFont
144 {
145 	//! constructor with file's version to define the default encoding */
FontWPS4TextInternal::Font146 	Font(int version=4) : WPSFont(), m_type(libwps_tools_win::Font::WIN3_WEUROPE),
147 		m_backColor(0xFFFFFF), m_special(false), m_dlinkId(-1)
148 	{
149 		if (version <= 2) m_type = libwps_tools_win::Font::DOS_850;
150 	}
151 	//! returns a default font (Courier12) with file's version to define the default encoding */
getDefaultWPS4TextInternal::Font152 	static Font getDefault(int version)
153 	{
154 		Font res(version);
155 		if (version <= 2)
156 			res.m_name="Courier";
157 		else
158 			res.m_name="Times New Roman";
159 		res.m_size=12;
160 		return res;
161 	}
162 
163 	//! operator<<
164 	friend std::ostream &operator<<(std::ostream &o, Font const &ft);
165 
166 	//! the font encoding type
167 	libwps_tools_win::Font::Type m_type;
168 	//! background  color index
169 	uint32_t m_backColor;
170 	//! a flag to know if we have a special field (a note), ...
171 	bool m_special;
172 	//! a id to retrieve a file name ( dos )
173 	int m_dlinkId;
174 };
175 
176 //! operator<< for font properties
operator <<(std::ostream & o,Font const & ft)177 std::ostream &operator<<(std::ostream &o, Font const &ft)
178 {
179 	o << static_cast<WPSFont const &>(ft) << ",";
180 
181 	if (ft.m_special)
182 	{
183 		if (ft.m_dlinkId >= 0)
184 			o << "spec[" << ft.m_dlinkId << "],";
185 		else
186 			o << "spec,";
187 	}
188 
189 	if (ft.m_backColor != 0xFFFFFF)
190 		o << "bgCol=" << ft.m_backColor << ",";
191 	return o;
192 }
193 /** Internal: class to store paragraph properties */
194 struct Paragraph : public WPSParagraph
195 {
196 	//! constructor
ParagraphWPS4TextInternal::Paragraph197 	Paragraph() : WPSParagraph()  { }
198 };
199 
200 /** Internal: class to store an note type */
201 struct Note : public WPSEntry
202 {
203 	//! constructor
NoteWPS4TextInternal::Note204 	Note() : WPSEntry(), m_label(""), m_error("") {}
isNumericWPS4TextInternal::Note205 	bool isNumeric() const
206 	{
207 		return m_label.len()==0;
208 	}
209 	//! operator <<
operator <<(std::ostream & o,Note const & note)210 	friend std::ostream &operator<<(std::ostream &o, Note const &note)
211 	{
212 		if (note.m_label.len())
213 			o << "lab=" << note.m_label.cstr() << ",";
214 		else
215 			o << "numeric,";
216 		if (!note.m_error.empty()) o << note.m_error << ",";
217 		return o;
218 	}
219 	//! the label if not numeric
220 	librevenge::RVNGString m_label;
221 	//! a string used to store the parsing errors
222 	std::string m_error;
223 };
224 
225 
226 /** Internal: class to store an object definition */
227 struct Object
228 {
229 	//! constructor
ObjectWPS4TextInternal::Object230 	Object() : m_id(-1), m_size(), m_pos(), m_unknown(0), m_extra("") {}
231 	//! operator <<
232 	friend std::ostream &operator<<(std::ostream &o, Object const &obj);
233 
234 	//! the object identificator
235 	int m_id;
236 	//! the object size in the document
237 	Vec2f m_size;
238 	//! an entry which indicates where the object is defined in the file
239 	WPSEntry m_pos;
240 	//! unknown data
241 	long m_unknown;
242 	//! a string used to store the parsing errors
243 	std::string m_extra;
244 };
245 //! operator<< for an object
operator <<(std::ostream & o,Object const & obj)246 std::ostream &operator<<(std::ostream &o, Object const &obj)
247 {
248 	if (obj.m_id > -1) o << "ole" << obj.m_id;
249 	o <<": size(" << obj.m_size << ")";
250 	if (obj.m_pos.valid()) o << std::hex << ", def=(0x" << obj.m_pos.begin() << "->" << obj.m_pos.end() << ")" << std::dec;
251 	if (obj.m_unknown) o << std::hex << ", unkn=" << obj.m_unknown << std::dec;
252 	if (!obj.m_extra.empty()) o << ", err=" << obj.m_extra;
253 	return o;
254 }
255 
256 /** Internal: class to store an object definition */
257 struct DosLink
258 {
259 	//! constructor
DosLinkWPS4TextInternal::DosLink260 	DosLink() : m_type(-1), m_width(-1), m_size(), m_name(""), m_pos(), m_extra("") {}
261 	//! operator <<
262 	friend std::ostream &operator<<(std::ostream &o, DosLink const &dlink);
263 
264 	//! the type
265 	int m_type;
266 	//! the width
267 	float m_width;
268 	//! the object size in the document
269 	Vec2f m_size;
270 	//! the file name
271 	std::string m_name;
272 	//! an entry which indicates where the object is defined in the file
273 	WPSEntry m_pos;
274 	//! a string used to store the parsing errors
275 	std::string m_extra;
276 };
277 //! operator<< for an object
operator <<(std::ostream & o,DosLink const & dlink)278 std::ostream &operator<<(std::ostream &o, DosLink const &dlink)
279 {
280 	switch (dlink.m_type)
281 	{
282 	case -1:
283 		break;
284 	case 1:
285 		o << "chart,";
286 		break;
287 	case 0x81:
288 		o << "pict,";
289 		break;
290 	case 0x40:
291 		o << "spreadsheet,";
292 		break;
293 	default:
294 		o << "#type=" << dlink.m_type << ",";
295 		break;
296 	}
297 	if (dlink.m_width >= 0) o << "width?=" << dlink.m_width << ",";
298 	if (dlink.m_size.x() >= 0 && (dlink.m_size.y()<0 || dlink.m_size.y()>0))
299 		o <<"size=" << dlink.m_size << ",";
300 	if (dlink.m_name.length()) o << "name='" << dlink.m_name << "',";
301 	if (!dlink.m_extra.empty()) o << ", err=" << dlink.m_extra;
302 	return o;
303 }
304 
305 /** Internal: class to store a date/time format */
306 struct DateTime
307 {
308 	//! constructor
DateTimeWPS4TextInternal::DateTime309 	DateTime() : m_type(-1), m_extra("") {}
310 	//! returns a format to used with strftime
311 	std::string format() const;
312 	//! operator <<
313 	friend std::ostream &operator<<(std::ostream &o, DateTime const &dtime);
314 
315 	//! the type
316 	int m_type;
317 	//! a string used to store the parsing errors
318 	std::string m_extra;
319 };
320 
format() const321 std::string DateTime::format() const
322 {
323 	switch (m_type)
324 	{
325 	case 0:
326 		return "%m/%d/%Y";
327 	case 1:
328 		return "%m/%Y";
329 	case 2:
330 		return "%d %B %Y";
331 	case 3:
332 		return "%A %d %B %Y";
333 	case 4:
334 		return "%B %Y";
335 	case 5:
336 		return "%m/%d/%Y %I:%M";
337 	case 6:
338 		return "%m/%d/%Y %I:%M:%S";
339 	case 7:
340 		return "%I:%M:%S";
341 	case 8:
342 		return "%I:%M";
343 	case 9:
344 		return "%H:%M:%S";
345 	case 10:
346 		return "%H:%M";
347 	default:
348 		break;
349 	}
350 	return "";
351 }
352 
353 //! operator<< for an object
operator <<(std::ostream & o,DateTime const & dtime)354 std::ostream &operator<<(std::ostream &o, DateTime const &dtime)
355 {
356 	switch (dtime.m_type)
357 	{
358 	case -1:
359 		break;
360 	case 0:
361 	case 1:
362 	case 2:
363 	case 3:
364 	case 4:
365 		o << "date[F"<<dtime.m_type<<"],";
366 		break;
367 	case 5:
368 	case 6:
369 		o << "date&time[F"<<dtime.m_type-5<<"],";
370 		break;
371 	case 7:
372 	case 8:
373 	case 9:
374 	case 10:
375 		o << "time[F"<<dtime.m_type-7<<"],";
376 		break;
377 	default:
378 		o << "#type=" << dtime.m_type << ",";
379 		break;
380 	}
381 	if (!dtime.m_extra.empty()) o << ", err=" << dtime.m_extra;
382 	return o;
383 }
384 
385 /** different types
386  *
387  * - BTE: font/paragraph properties
388  * - OBJECT: object properties
389  * - FTNp, FTNd: footnote position in text and footnote content
390  * - BKMK: comment field
391  * - DTTM: field type: date/time/..
392  */
393 enum PLCType { BTE=0, OBJECT, FTNp, FTNd, BKMK, DTTM, Unknown};
394 
395 /** Internal: class to store the PLC: Pointer List Content ? */
396 struct DataPLC
397 {
398 	//! constructor
DataPLCWPS4TextInternal::DataPLC399 	DataPLC(): m_name(""), m_type(Unknown), m_value(-1), m_extra() {}
400 	//! operator<<
401 	friend std::ostream &operator<<(std::ostream &o, DataPLC const &plc);
402 	//! the entry field name
403 	std::string m_name;
404 	//! the plc type
405 	PLCType m_type;
406 	//! a potential value
407 	long m_value;
408 	//! a string used to store the parsing errors
409 	std::string m_extra;
410 };
411 //! operator<< for a dataPLC
operator <<(std::ostream & o,DataPLC const & plc)412 std::ostream &operator<<(std::ostream &o, DataPLC const &plc)
413 {
414 	o << "type=" << plc.m_name << ",";
415 	if (plc.m_value != -1) o << "val=" << std::hex << plc.m_value << std::dec << ", ";
416 	if (!plc.m_extra.empty()) o << "errors=(" << plc.m_extra << ")";
417 	return o;
418 }
419 
420 /** Internal: the state of a WPS4Text */
421 struct State
422 {
423 	//! constructor
StateWPS4TextInternal::State424 	State() : m_fontNames(), m_fontList(),  m_paragraphList(),
425 		m_FDPCs(), m_FDPPs(), m_footnoteList(), m_footnoteMap(), m_bookmarkMap(), m_dosLinkList(),
426 		m_main(), m_header(), m_footer(), m_otherZones(),
427 		m_objectMap(), m_dateTimeMap(), m_plcList(), m_knownPLC()
428 	{}
429 
430 	//! the list of fonts names
431 	std::map<int,FontName> m_fontNames;
432 	//! the list of all font properties
433 	std::vector<Font> m_fontList;
434 	//! the list of all paragraph properties
435 	std::vector<Paragraph> m_paragraphList;
436 
437 	//! the list of FDPC entries (ie list to find the font properties lists )
438 	std::vector<WPSEntry> m_FDPCs;
439 	//! the list of FDPP entries (ie list to find the paragraph properties lists )
440 	std::vector<WPSEntry> m_FDPPs;
441 
442 	//! the footnote entries
443 	std::vector<Note> m_footnoteList;
444 	//! map: footnote in text -> footnote entry
445 	std::map<long,Note const *> m_footnoteMap;
446 	//! map: bookmark in text -> bookmark
447 	std::map<long, WPSEntry> m_bookmarkMap;
448 	//! the dos file links
449 	std::vector<DosLink> m_dosLinkList;
450 
451 	WPSEntry m_main /** the main text zone entry*/,
452 	         m_header /** the header text entry*/, m_footer /** the footer text entry*/;
453 
454 	//! the entries which are not in main/header/footer text and in the footnotes
455 	std::vector<WPSEntry> m_otherZones;
456 	//! map: object in text -> object
457 	std::map<long, Object> m_objectMap;
458 	//! map: date field in text -> date time format
459 	std::map<long, DateTime> m_dateTimeMap;
460 	//! a list of all PLCs
461 	std::vector<DataPLC> m_plcList;
462 	//! the known plc
463 	WPS4PLCInternal::KnownPLC m_knownPLC;
464 };
465 }
466 
467 //////////////////////////////////////////////////////////////////////////////
468 //
469 //   MAIN CODE
470 //
471 //////////////////////////////////////////////////////////////////////////////
472 
473 // constructor/destructor
WPS4Text(WPS4Parser & parser,RVNGInputStreamPtr & input)474 WPS4Text::WPS4Text(WPS4Parser &parser, RVNGInputStreamPtr &input) :
475 	WPSTextParser(parser, input), m_listener(), m_state()
476 {
477 	m_state.reset(new WPS4TextInternal::State);
478 }
479 
~WPS4Text()480 WPS4Text::~WPS4Text()
481 {
482 }
483 
484 // number of page
numPages() const485 int WPS4Text::numPages() const
486 {
487 	int numPage = 1;
488 	m_input->seek(m_textPositions.begin(), librevenge::RVNG_SEEK_SET);
489 	while (!m_input->isEnd() && m_input->tell() != m_textPositions.end())
490 	{
491 		if (libwps::readU8(m_input.get()) == 0x0C) numPage++;
492 	}
493 	return numPage;
494 }
495 
496 // return main/header/footer/all entry
getHeaderEntry() const497 WPSEntry WPS4Text::getHeaderEntry() const
498 {
499 	if (m_state->m_header.valid()) return m_state->m_header;
500 	WPS4Parser::NameMultiMap const &nameMultiMap = getNameEntryMap();
501 	WPS4Parser::NameMultiMap::const_iterator pos;
502 	pos = nameMultiMap.find("SHdr");
503 	if (pos == nameMultiMap.end()) return WPSEntry();
504 	WPSEntry res = pos->second;
505 	res.setType("TEXT");
506 	res.setId(WPS4TextInternal::Z_String);
507 	return res;
508 }
509 
getFooterEntry() const510 WPSEntry WPS4Text::getFooterEntry() const
511 {
512 	if (m_state->m_footer.valid()) return m_state->m_footer;
513 	WPS4Parser::NameMultiMap const &nameMultiMap = getNameEntryMap();
514 	WPS4Parser::NameMultiMap::const_iterator pos;
515 	pos = nameMultiMap.find("SFtr");
516 	if (pos == nameMultiMap.end()) return WPSEntry();
517 	WPSEntry res = pos->second;
518 	res.setType("TEXT");
519 	res.setId(WPS4TextInternal::Z_String);
520 	return res;
521 }
522 
getMainTextEntry() const523 WPSEntry WPS4Text::getMainTextEntry() const
524 {
525 	return m_state->m_main;
526 }
527 
getDefaultFontType() const528 libwps_tools_win::Font::Type WPS4Text::getDefaultFontType() const
529 {
530 	if (version()<=2)
531 		return libwps_tools_win::Font::DOS_850;
532 	return libwps_tools_win::Font::WIN3_WEUROPE;
533 }
534 
535 ////////////////////////////////////////////////////////////
536 // send the data
537 ////////////////////////////////////////////////////////////
flushExtra()538 void WPS4Text::flushExtra()
539 {
540 	if (!m_listener)
541 	{
542 		WPS_DEBUG_MSG(("WPS4Text::flushExtra can not find the listener\n"));
543 		return;
544 	}
545 	size_t numExtra = m_state->m_otherZones.size();
546 	if (numExtra == 0) return;
547 
548 	m_listener->setFont(WPS4TextInternal::Font::getDefault(version()));
549 	m_listener->setParagraph(WPS4TextInternal::Paragraph());
550 	m_listener->insertEOL();
551 #ifdef DEBUG
552 	librevenge::RVNGString message = "--------- extra text zone -------- ";
553 	m_listener->insertUnicodeString(message);
554 #endif
555 	for (size_t i = 0; i < numExtra; ++i)
556 		readText(m_state->m_otherZones[i]);
557 }
558 
readText(WPSEntry const & zone)559 bool WPS4Text::readText(WPSEntry const &zone)
560 {
561 	bool bookmark = zone.id() == WPS4TextInternal::Z_Bookmark;
562 	bool dlink = zone.id() == WPS4TextInternal::Z_DLink;
563 	bool simpleString = zone.id() == WPS4TextInternal::Z_String || bookmark || dlink;
564 	bool mainZone = zone.id() == WPS4TextInternal::Z_Main;
565 
566 	if (m_listener.get() == 0L)
567 	{
568 		WPS_DEBUG_MSG(("WPS4Text::readText can not find the listener\n"));
569 		return false;
570 	}
571 	if (!zone.valid())
572 	{
573 		WPS_DEBUG_MSG(("WPS4Text::readText invalid zone, must not happen\n"));
574 		m_listener->insertCharacter(' ');
575 		return false;
576 	}
577 	if (mainZone)
578 	{
579 		int numCols = mainParser().numColumns();
580 		if (numCols > 1)
581 		{
582 			if (m_listener->isSectionOpened())
583 			{
584 				WPS_DEBUG_MSG(("WPS4Text::readText the section is already open\n"));
585 			}
586 			else
587 			{
588 				int w = int(72.0*mainParser().pageWidth())/numCols;
589 				std::vector<int> width;
590 				width.resize(size_t(numCols), w);
591 				m_listener->openSection(width,librevenge::RVNG_POINT);
592 			}
593 		}
594 	}
595 	std::vector<DataFOD>::iterator FODs_iter = m_FODList.begin();
596 
597 	// update the property to correspond to the text
598 	int prevFId = -1, prevPId = -1;
599 	if (simpleString) FODs_iter = m_FODList.end();
600 	else if (FODs_iter == m_FODList.end() && mainZone)
601 	{
602 		WPS_DEBUG_MSG(("WPS4Text::readText: CAN NOT FIND any FODs for main zone, REVERT to basic string!!!!!!!!!\n"));
603 		simpleString = true;
604 	}
605 
606 	for (; FODs_iter!= m_FODList.end(); ++FODs_iter)
607 	{
608 		DataFOD const &fod = *(FODs_iter);
609 		if (fod.m_pos >= zone.begin()) break;
610 
611 		int id = (*FODs_iter).m_id;
612 		if (fod.m_type == DataFOD::ATTR_TEXT) prevFId = id;
613 		else if (fod.m_type == DataFOD::ATTR_PARAG) prevPId = id;
614 	}
615 
616 	WPS4TextInternal::Font actFont;
617 	if (prevFId != -1)
618 		actFont = m_state->m_fontList[size_t(prevFId)];
619 	else
620 	{
621 		actFont = WPS4TextInternal::Font::getDefault(version());
622 		actFont.m_type=getDefaultFontType();
623 	}
624 	m_listener->setFont(actFont);
625 
626 	if (prevPId != -1)
627 		m_listener->setParagraph(m_state->m_paragraphList[size_t(prevPId)]);
628 	else
629 		m_listener->setParagraph(WPS4TextInternal::Paragraph());
630 
631 	if (dlink)
632 	{
633 		m_listener->insertUnicodeString("include ");
634 		m_listener->insertUnicode(0x226a);
635 	}
636 	bool first = true;
637 	int actPage = 1;
638 	for (; simpleString || FODs_iter!= m_FODList.end(); ++FODs_iter)
639 	{
640 		long actPos;
641 		long lastPos;
642 
643 
644 		libwps::DebugStream f;
645 		f << "Text";
646 
647 		if (simpleString)
648 		{
649 			actPos = zone.begin();
650 			lastPos = zone.end();
651 		}
652 		else
653 		{
654 			DataFOD fod = *(FODs_iter);
655 			actPos = first ? zone.begin() : fod.m_pos;
656 			if (long(actPos) >= zone.end()) break;
657 			first = false;
658 
659 			if (++FODs_iter!= m_FODList.end())
660 			{
661 				lastPos = (*FODs_iter).m_pos;
662 				if (long(lastPos) >= zone.end()) lastPos = zone.end();
663 			}
664 			else
665 				lastPos = zone.end();
666 			--FODs_iter;
667 			int fId = fod.m_id;
668 			switch (fod.m_type)
669 			{
670 			case DataFOD::ATTR_TEXT:
671 				if (fId >= 0)
672 					actFont = m_state->m_fontList[size_t(fId)];
673 				else
674 				{
675 					actFont = WPS4TextInternal::Font::getDefault(version());
676 					actFont.m_type=getDefaultFontType();
677 				}
678 				m_listener->setFont(actFont);
679 #if DEBUG_FP
680 				f << "[";
681 				if (fId >= 0) f << "C" << fId << ":" << actFont << "]";
682 				else f << "_]";
683 #endif
684 				break;
685 			case DataFOD::ATTR_PARAG:
686 				if (fId >= 0)
687 					m_listener->setParagraph(m_state->m_paragraphList[size_t(fId)]);
688 				else
689 					m_listener->setParagraph(WPS4TextInternal::Paragraph());
690 #if DEBUG_PP
691 				f << "[";
692 				if (fId >= 0) f << "P" << fId << ":" << m_state->m_paragraphList[size_t(fId)] << "]";
693 				else f << "_]";
694 #endif
695 				break;
696 			case DataFOD::ATTR_PLC:
697 				if (fId >= 0 && m_state->m_plcList[size_t(fId)].m_type == WPS4TextInternal::BKMK)
698 				{
699 					WPSEntry bkmk;
700 					if (m_state->m_bookmarkMap.find(actPos) == m_state->m_bookmarkMap.end())
701 					{
702 						WPS_DEBUG_MSG(("WPS4Text::readText: can not find the bookmark entry\n"));
703 					}
704 					else
705 						bkmk = m_state->m_bookmarkMap.find(actPos)->second;
706 					bkmk.setType("TEXT");
707 					bkmk.setId(WPS4TextInternal::Z_Bookmark);
708 					mainParser().createDocument(bkmk, libwps::DOC_COMMENT_ANNOTATION);
709 				}
710 #if DEBUG_PLC_POS
711 				f << "[PLC";
712 				if (fId>= 0) f << m_state->m_plcList[size_t(fId)] << "]";
713 				else f << "_]";
714 #endif
715 				break;
716 			case DataFOD::ATTR_UNKN:
717 			default:
718 				WPS_DEBUG_MSG(("WPS4Text::readText: find unknown plc\n"));
719 #if DEBUG_PLC_POS
720 				f << "[DataFOD(###Unknown)]";
721 #endif
722 				break;
723 			}
724 		}
725 		m_input->seek(actPos, librevenge::RVNG_SEEK_SET);
726 		std::string chaine("");
727 		long len = lastPos-actPos;
728 		for (long i = len; i>0; i--)
729 		{
730 			long pos = m_input->tell();
731 			uint8_t readVal = libwps::readU8(m_input);
732 			if (0x00 == readVal)
733 			{
734 				if (i != 1)
735 				{
736 					WPS_DEBUG_MSG(("WPS4Text::readText: find some unexpected 0 character\n"));
737 					// probably an error, but we can ignore id
738 					chaine += '#';
739 				}
740 				continue;
741 			}
742 
743 			chaine += char(readVal);
744 			switch (readVal)
745 			{
746 			case 0x01: // chart ?
747 			case 0x08: // spreadsheet range
748 			case 0x0e: // picture
749 			{
750 				if (!actFont.m_special || m_state->m_dosLinkList.empty() || actFont.m_dlinkId >= int(m_state->m_dosLinkList.size()))
751 				{
752 					WPS_DEBUG_MSG(("WPS4Text::readText: send DLINK can not find id\n"));
753 					break;
754 				}
755 				int id = actFont.m_dlinkId >= 0 ? actFont.m_dlinkId : 0;
756 				WPSEntry ent = m_state->m_dosLinkList[size_t(id)].m_pos;
757 				ent.setType("TEXT");
758 				ent.setId(WPS4TextInternal::Z_DLink);
759 				WPSPosition pos_(Vec2f(),Vec2f(3.0f,0.2f));
760 				pos_.setRelativePosition(WPSPosition::Paragraph, WPSPosition::XCenter);
761 				pos_.m_wrapping = WPSPosition::WNone;
762 				librevenge::RVNGPropertyList extras;
763 				mainParser().createTextBox(ent, pos_, extras);
764 				m_listener->insertEOL();
765 				break;
766 			}
767 			case 0x02:
768 				m_listener->insertField(WPSContentListener::PageNumber);
769 				break;
770 			case 0x03:
771 				m_listener->insertField(WPSContentListener::Date);
772 				break;
773 			case 0x04:
774 				m_listener->insertField(WPSContentListener::Time);
775 				break;
776 			case 0x05:
777 				m_listener->insertField(WPSContentListener::Title);
778 				break;
779 			case 0x07:
780 			{
781 				if (m_state->m_objectMap.find(actPos) == m_state->m_objectMap.end())
782 				{
783 					WPS_DEBUG_MSG(("WPS4Text::readText: can not find object for position : %lX\n", actPos));
784 				}
785 				else
786 				{
787 					WPS4TextInternal::Object const &obj = m_state->m_objectMap[actPos];
788 					if (obj.m_id < 0) break;
789 
790 					mainParser().sendObject(obj.m_size, obj.m_id);
791 				}
792 				break;
793 			}
794 			case 0x0f:
795 			{
796 				if (m_state->m_dateTimeMap.find(actPos) == m_state->m_dateTimeMap.end())
797 				{
798 					WPS_DEBUG_MSG(("WPS4Text::readText: can not find date/time for position : %lX\n", actPos));
799 				}
800 				else
801 				{
802 					WPS4TextInternal::DateTime const &form = m_state->m_dateTimeMap[actPos];
803 					std::string format = form.format();
804 					if (format.length())
805 						m_listener->insertDateTimeField(format.c_str());
806 					else
807 					{
808 						WPS_DEBUG_MSG(("WPS4Text::readText: unknown date/time format for position : %lX\n", actPos));
809 					}
810 				}
811 				break;
812 			}
813 			case 0x06:   // footnote
814 			{
815 				// ok if this is the first character of the footnote definition
816 				if (zone.id() == WPS4TextInternal::Z_Note) break;
817 				if (m_state->m_footnoteMap.find(pos) == m_state->m_footnoteMap.end() ||
818 				        m_state->m_footnoteMap[pos] == 0L)
819 				{
820 					WPS_DEBUG_MSG(("WPS4Text::readText:do not find the footnote zone\n"));
821 					break;
822 				}
823 				WPS4TextInternal::Note const &note = *m_state->m_footnoteMap[pos];
824 				mainParser().createNote(note, note.m_label);
825 				break;
826 			}
827 			case 0x09:
828 				m_listener->insertTab();
829 				break;
830 			case 0x0C:
831 				if (mainZone) mainParser().newPage(++actPage);
832 				break;
833 			case 0x0d:
834 				break; // 0d0a = end of line
835 			case 0x0a:
836 				m_listener->insertEOL();
837 				break;
838 			case 0x0b: // check me
839 				m_listener->insertEOL(true);
840 				break;
841 			case 0x11: // insecable hyphen
842 				m_listener->insertUnicode(0x2011);
843 				break;
844 			case 0x12: // insecable space
845 				m_listener->insertUnicode(0xA0);
846 				break;
847 			case 0x1F: // optional hyphen
848 				break;
849 			case '&':
850 				if (simpleString && pos+2 <= lastPos)
851 				{
852 					int nextVal = libwps::readU8(m_input);
853 					bool done = true;
854 					switch (nextVal)   // check me
855 					{
856 					case 'p':
857 					case 'P':
858 						m_listener->insertField(WPSContentListener::PageNumber);
859 						break;
860 					case 'd':
861 					case 'D':
862 						m_listener->insertField(WPSContentListener::Date);
863 						break;
864 					case 't':
865 					case 'T':
866 						m_listener->insertField(WPSContentListener::Time);
867 						break;
868 					case 'f':
869 					case 'F':
870 						m_listener->insertField(WPSContentListener::Title);
871 						break;
872 					// case '&': check me does '&&'->'&' ?
873 					default:
874 						done = false;
875 						break;
876 					}
877 					if (done)
878 					{
879 						i--;
880 						break;
881 					}
882 					m_input->seek(-1, librevenge::RVNG_SEEK_CUR);
883 				}
884 			default:
885 				if (version()<=2)
886 				{
887 					// special caracter
888 					if (readVal==0xca) // not breaking space
889 					{
890 						m_listener->insertCharacter(0xA0);
891 						break;
892 					}
893 				}
894 				m_listener->insertUnicode((uint32_t)libwps_tools_win::Font::unicode(readVal, actFont.m_type));
895 				break;
896 			}
897 		}
898 
899 		if (simpleString) break;
900 
901 		f << "='"<<chaine<<"'";
902 		ascii().addPos(actPos);
903 		ascii().addNote(f.str().c_str());
904 	}
905 
906 	if (dlink)
907 		m_listener->insertUnicode(0x226b);
908 
909 	return true;
910 }
911 
912 ////////////////////////////////////////////////////////////
913 // find all the text entries
914 ////////////////////////////////////////////////////////////
readEntries()915 bool WPS4Text::readEntries()
916 {
917 	WPS4Parser::NameMultiMap &nameMultiMap = getNameEntryMap();
918 	WPS4Parser::NameMultiMap::iterator pos;
919 
920 	libwps::DebugStream f;
921 	long actPos = m_input->tell();
922 	f << "ZZHeader-Text:Limit(";
923 
924 	int textLimits[4];
925 	// look like begin of text : end of header/end of footer/end text
926 	// but sometimes the zones overlaps !!!
927 	for (int i = 0; i < 4; ++i) textLimits[i] = libwps::read32(m_input);
928 
929 	bool first = true, ok = true;
930 	long lastPos = textLimits[0] < 0x100 ? 0x100 : textLimits[0];
931 	for (int i = 0; i < 3; ++i)
932 	{
933 		long newPos = textLimits[i+1];
934 		WPSEntry zone;
935 		zone.setBegin(lastPos);
936 		zone.setEnd(newPos);
937 		zone.setType("TEXT");
938 		zone.setId(i);
939 
940 		if (newPos >= lastPos)
941 			lastPos = newPos;
942 		if (!zone.valid() || zone.begin() < 0x100)
943 		{
944 			if (newPos != 0x100 && newPos != -1)
945 			{
946 				WPS_DEBUG_MSG(("WPS4Text::readEntries: find odd text limit\n"));
947 				ok = false;
948 			}
949 			f << "_, ";
950 			continue;
951 		}
952 
953 		if (first)
954 		{
955 			m_textPositions.setBegin(zone.begin());
956 			first = false;
957 		}
958 
959 		m_textPositions.setEnd(zone.end());
960 		nameMultiMap.insert(WPS4Parser::NameMultiMap::value_type(zone.type(), zone));
961 
962 		switch (i)
963 		{
964 		case 0:
965 			m_state->m_header = zone;
966 			break;
967 		case 1:
968 			m_state->m_footer = zone;
969 			break;
970 		case 2:
971 			m_state->m_main = zone;
972 			break;
973 		default:
974 			break;
975 		}
976 
977 		f << "Text"<<i << "=" << std::hex << zone.begin() << "x" << zone.end() << ",";
978 		ascii().addPos(zone.begin());
979 		std::string name = "ZZ";
980 		name+= zone.type();
981 		name+=char('0'+i);
982 		ascii().addNote(name.c_str());
983 		ascii().addPos(zone.end());
984 		ascii().addNote("_");
985 	}
986 	f << ")";
987 	if (!ok)
988 	{
989 		m_state->m_header = m_state->m_footer = WPSEntry();
990 		m_state->m_main = m_textPositions;
991 	}
992 	if (!m_textPositions.valid())
993 	{
994 		WPS_DEBUG_MSG(("WPS4Text::readEntries: textPosition is not valid"));
995 		return false;
996 	}
997 
998 	/* stream offset to end of file */
999 	long eof = (long) libwps::readU32(m_input);
1000 
1001 	if (m_textPositions.end() > eof)
1002 	{
1003 		WPS_DEBUG_MSG(("WPS4Text:readEntries: can not find text positions\n"));
1004 		return false;
1005 	}
1006 
1007 	// check if fPositions.offset_eos
1008 	long newPos = m_input->tell();
1009 	if (m_input->seek(eof-1, librevenge::RVNG_SEEK_SET) != 0 || m_input->tell() != eof-1)
1010 	{
1011 		eof = m_input->tell();
1012 		WPS_DEBUG_MSG(("WPS4Text:readEntries: incomplete file\n"));
1013 		if (eof < m_textPositions.end()) return false;
1014 	}
1015 	mainParser().setSizeFile(eof);
1016 
1017 	f << ", endFile=" << eof;
1018 	ascii().addPos(actPos);
1019 	ascii().addNote(f.str().c_str());
1020 
1021 	m_input->seek(newPos, librevenge::RVNG_SEEK_SET);
1022 
1023 	static char const * (zName[]) =
1024 	{ "BTEC", "BTEP", "SHdr", "SFtr", "DLINK", "FTNp", "FTNd", "BKMK", "FONT" };
1025 
1026 	for (int i = 0; i < 9; ++i)
1027 		mainParser().parseEntry(zName[i]);
1028 
1029 	return true;
1030 }
1031 
1032 ////////////////////////////////////////////////////////////
1033 // find all the text structures
1034 ////////////////////////////////////////////////////////////
readStructures()1035 bool WPS4Text::readStructures()
1036 {
1037 	WPS4Parser::NameMultiMap &nameMultiMap = getNameEntryMap();
1038 	WPS4Parser::NameMultiMap::iterator pos;
1039 
1040 	// first find the font name
1041 	pos = nameMultiMap.find("FONT");
1042 	if (pos != nameMultiMap.end()) readFontNames(pos->second);
1043 
1044 	// now find the character and paragraph properties
1045 	for (int i = 0; i < 2; ++i)
1046 	{
1047 		// we begin by i = 1 to create firsts the fdpc structure
1048 		if (findFDPStructures(1-i)) continue;
1049 		findFDPStructuresByHand(1-i);
1050 	}
1051 
1052 	/* read character FODs (FOrmatting Descriptors) */
1053 	size_t numFDP = m_state->m_FDPCs.size();
1054 	std::vector<DataFOD> fdps;
1055 	for (size_t i = 0; i < numFDP; ++i)
1056 		readFDP(m_state->m_FDPCs[i], fdps, (FDPParser)&WPS4Text::readFont);
1057 	m_FODList = mergeSortedFODLists(fdps, m_FODList);
1058 
1059 
1060 	/* read paragraphs FODs (FOrmatting Descriptors) */
1061 	fdps.resize(0);
1062 	numFDP = m_state->m_FDPPs.size();
1063 	for (size_t i = 0; i < numFDP; ++i)
1064 		readFDP(m_state->m_FDPPs[i], fdps, (FDPParser)&WPS4Text::readParagraph);
1065 	m_FODList = mergeSortedFODLists(fdps, m_FODList);
1066 
1067 	/* read the object structures */
1068 	pos = nameMultiMap.find("EOBJ");
1069 	if (pos != nameMultiMap.end())
1070 	{
1071 		std::vector<long> textPtrs, listValues;
1072 		readPLC(pos->second, textPtrs, listValues, &WPS4Text::objectDataParser);
1073 	}
1074 
1075 	// update the footnote
1076 	WPSEntry ftnD, ftnP;
1077 	pos = nameMultiMap.find("FTNd");
1078 	if (pos != nameMultiMap.end()) ftnD = pos->second;
1079 	pos = nameMultiMap.find("FTNp");
1080 	if (pos != nameMultiMap.end()) ftnP = pos->second;
1081 	readFootNotes(ftnD, ftnP);
1082 
1083 	// bookmark
1084 	pos = nameMultiMap.find("BKMK");
1085 	if (pos != nameMultiMap.end())
1086 	{
1087 		std::vector<long> textPtrs, listValues;
1088 		readPLC(pos->second, textPtrs, listValues, &WPS4Text::bkmkDataParser);
1089 	}
1090 
1091 	// the list of file
1092 	pos = nameMultiMap.find("DLINK");
1093 	if (pos != nameMultiMap.end())
1094 		readDosLink(pos->second);
1095 
1096 	// date/time format
1097 	pos = nameMultiMap.find("DTTM");
1098 	if (pos != nameMultiMap.end())
1099 	{
1100 		WPSEntry const &zone = pos->second;
1101 		std::vector<long> textPtrs, listValues;
1102 		readPLC(zone, textPtrs, listValues, &WPS4Text::dttmDataParser);
1103 	}
1104 
1105 	// finally, we must remove the footnote of textposition...
1106 	long bot = m_state->m_main.begin();
1107 	long endPos = m_state->m_main.end();
1108 	size_t numFootNotes = m_state->m_footnoteList.size(), actNote = 0;
1109 	bool textPosUpdated = false;
1110 	while (bot < endPos)
1111 	{
1112 		if (actNote < numFootNotes &&
1113 		        m_state->m_footnoteList[actNote].begin()==bot)
1114 		{
1115 			bot = m_state->m_footnoteList[actNote].end();
1116 			actNote++;
1117 			continue;
1118 		}
1119 		long lastPos = actNote < numFootNotes ?
1120 		               m_state->m_footnoteList[actNote].begin() : endPos;
1121 		if (lastPos > endPos) lastPos = endPos;
1122 		WPSEntry mZone;
1123 		mZone.setBegin(bot);
1124 		mZone.setEnd(lastPos);
1125 		mZone.setType("TEXT");
1126 		if (!textPosUpdated)
1127 		{
1128 			mZone.setId(WPS4TextInternal::Z_Main);
1129 			m_state->m_main = mZone;
1130 			textPosUpdated = true;
1131 		}
1132 		else
1133 		{
1134 			if (m_state->m_otherZones.size() == 0)
1135 			{
1136 				WPS_DEBUG_MSG(("WPS4Text::readStructures: find unknown text zone\n"));
1137 			}
1138 			mZone.setId(WPS4TextInternal::Z_Unknown);
1139 			m_state->m_otherZones.push_back(mZone);
1140 		}
1141 		bot = lastPos;
1142 	}
1143 
1144 
1145 	return true;
1146 }
1147 
1148 ////////////////////////////////////////////////////////////
1149 //  find FDP zones ( normal method followed by another method
1150 //   which may works for some bad files )
1151 ////////////////////////////////////////////////////////////
findFDPStructures(int which)1152 bool WPS4Text::findFDPStructures(int which)
1153 {
1154 	std::vector<WPSEntry> &zones = which ? m_state->m_FDPCs : m_state->m_FDPPs;
1155 	zones.resize(0);
1156 
1157 	char const *indexName = which ? "BTEC" : "BTEP";
1158 	char const *sIndexName = which ? "FDPC" : "FDPP";
1159 
1160 	WPS4Parser::NameMultiMap &nameMultiMap =getNameEntryMap();
1161 	WPS4Parser::NameMultiMap::iterator pos = nameMultiMap.find(indexName);
1162 	if (pos == nameMultiMap.end()) return false;
1163 
1164 	std::vector<long> textPtrs;
1165 	std::vector<long> listValues;
1166 
1167 	if (!readPLC(pos->second, textPtrs, listValues)) return false;
1168 
1169 	size_t numV = listValues.size();
1170 	if (textPtrs.size() != numV+1) return false;
1171 
1172 	WPSEntry zone;
1173 	zone.setType(sIndexName);
1174 
1175 	for (size_t i = 0; i < numV; ++i)
1176 	{
1177 		long bPos = listValues[i];
1178 		if (bPos <= 0) return false;
1179 		zone.setBegin(bPos);
1180 		zone.setLength(0x80);
1181 
1182 		zones.push_back(zone);
1183 	}
1184 
1185 	return true;
1186 }
1187 
findFDPStructuresByHand(int which)1188 bool WPS4Text::findFDPStructuresByHand(int which)
1189 {
1190 	char const *indexName = which ? "FDPC" : "FDPP";
1191 	WPS_DEBUG_MSG(("WPS4Text::findFDPStructuresByHand: need to create %s list by hand \n", indexName));
1192 
1193 	std::vector<WPSEntry> &zones = which ? m_state->m_FDPCs : m_state->m_FDPPs;
1194 	zones.resize(0);
1195 
1196 	long debPos;
1197 	if (which == 1)
1198 	{
1199 		// hack: each fdp block is aligned with 0x80,
1200 		//       and appears consecutively just after the text
1201 		uint32_t pnChar = uint32_t((m_textPositions.end()+127)>>7);
1202 		/* sanity check */
1203 		if (0 == pnChar)
1204 		{
1205 			WPS_DEBUG_MSG(("WPS4Text::findFDPStructuresByHand: pnChar is 0, so file may be corrupt\n"));
1206 			throw libwps::ParseException();
1207 		}
1208 		debPos = 0x80 * (long) pnChar;
1209 	}
1210 	else
1211 	{
1212 		size_t nFDPC = m_state->m_FDPCs.size();
1213 		if (!nFDPC)
1214 		{
1215 			WPS_DEBUG_MSG(("WPS4Text::findFDPStructuresByHand: can not find last fdpc pos\n"));
1216 			return false;
1217 		}
1218 		debPos = m_state->m_FDPCs[nFDPC-1].end();
1219 	}
1220 
1221 	WPSEntry fdp;
1222 	fdp.setType(indexName);
1223 
1224 	long lastPos = m_textPositions.begin();
1225 	while (1)
1226 	{
1227 		m_input->seek(debPos+0x7f, librevenge::RVNG_SEEK_SET);
1228 		if (m_input->tell() != debPos+0x7f)
1229 		{
1230 			WPS_DEBUG_MSG(("WPS4Text: find EOF while parsing the %s\n", indexName));
1231 			return false;
1232 		}
1233 		int nbElt = libwps::readU8(m_input);
1234 		if (5*nbElt+4 > 0x80)
1235 		{
1236 			WPS_DEBUG_MSG(("WPS4Text: find too big number of data while parsing the %s\n", indexName));
1237 			return false;
1238 		}
1239 		m_input->seek(debPos, librevenge::RVNG_SEEK_SET);
1240 		if (long(libwps::readU32(m_input)) != lastPos)
1241 		{
1242 			WPS_DEBUG_MSG(("WPS4Text: find incorrect linking while parsing the %s\n", indexName));
1243 			return false;
1244 		}
1245 		if (nbElt != 1)
1246 			m_input->seek(4*nbElt-4, librevenge::RVNG_SEEK_CUR);
1247 
1248 		long newPos = (long) libwps::readU32(m_input);
1249 		if (newPos < lastPos || newPos > m_textPositions.end())
1250 		{
1251 			WPS_DEBUG_MSG(("WPS4Text: find incorrect linking while parsing the %s\n", indexName));
1252 			return false;
1253 		}
1254 		fdp.setBegin(debPos);
1255 		fdp.setLength(0x80);
1256 		zones.push_back(fdp);
1257 
1258 		if (newPos == m_textPositions.end()) break;
1259 
1260 		lastPos = newPos;
1261 		debPos = fdp.end();
1262 	}
1263 
1264 	return true;
1265 }
1266 
1267 // PLC Data: default parser
defDataParser(long,long,int,long endPos,std::string & mess)1268 bool WPS4Text::defDataParser(long , long , int , long endPos, std::string &mess)
1269 {
1270 	mess = "";
1271 	libwps::DebugStream f;
1272 
1273 	long actPos = m_input->tell();
1274 	long length = endPos+1-actPos;
1275 	int sz = (length%4)==0 ? 4 : (length%2)==0 ? 2 : 1;
1276 	f << "unk["<< sz << "]=";
1277 	while (m_input->tell() <= endPos+1-sz)
1278 	{
1279 		long val = 0;
1280 		switch (sz)
1281 		{
1282 		case 1:
1283 			val = libwps::readU8(m_input);
1284 			break;
1285 		case 2:
1286 			val = libwps::readU16(m_input);
1287 			break;
1288 		case 4:
1289 			val = (long) libwps::readU32(m_input);
1290 			break;
1291 		default:
1292 			break;
1293 		}
1294 		f << std::hex << val << std::dec << ",";
1295 	}
1296 	mess = f.str();
1297 	return true;
1298 }
1299 
1300 ////////////////////////////////////////////////////////////
1301 // the fonts name zone (zone8)
1302 ////////////////////////////////////////////////////////////
readFontNames(WPSEntry const & entry)1303 bool WPS4Text::readFontNames(WPSEntry const &entry)
1304 {
1305 	if (!entry.valid()) return false;
1306 
1307 	m_input->seek(entry.begin(), librevenge::RVNG_SEEK_SET);
1308 
1309 	long endPos = entry.end();
1310 	int nFonts = 0;
1311 	libwps_tools_win::Font::Type docType=getDefaultFontType();
1312 	while (m_input->tell() < endPos)
1313 	{
1314 		long actPos;
1315 		actPos = m_input->tell();
1316 		libwps::DebugStream f;
1317 
1318 		/* Sometimes the font numbers start at 0 and increment nicely.
1319 		   However, other times the font numbers jump around. */
1320 		uint8_t font_number = libwps::readU8(m_input);
1321 		if (m_state->m_fontNames.find(font_number) != m_state->m_fontNames.end())
1322 		{
1323 			WPS_DEBUG_MSG(("WPS4Text::readFontNames: at position 0x%lx: font number %i duplicated\n",
1324 			               (m_input->tell())-2, font_number));
1325 			throw libwps::ParseException();
1326 		}
1327 
1328 		f << "Font" << nFonts++ << ": id=" << (int)font_number << ", ";
1329 		//fixme: what is this byte? maybe a font class
1330 		uint8_t unknown_byte = libwps::readU8(m_input);
1331 		f << "unk=" << (int)unknown_byte << ", ";
1332 
1333 		std::string s;
1334 		uint8_t nChar = libwps::readU8(m_input);
1335 		for (uint8_t i = nChar; i>0; i--)
1336 		{
1337 			if (m_input->isEnd())
1338 			{
1339 				WPS_DEBUG_MSG(("WPS4Text::readFontNames: can not read the font number %i (end of file)\n",
1340 				               font_number));
1341 				throw libwps::ParseException();
1342 			}
1343 			unsigned char val = libwps::readU8(m_input);
1344 			// sanity check (because sometimes contains char > 0x80 .. )
1345 			if (val >= ' ' && val <= 'z') s.append(1,char(val));
1346 			else
1347 			{
1348 				static bool first = true;
1349 				if (first)
1350 				{
1351 					first = false;
1352 					WPS_DEBUG_MSG(("WPS4Text:readFontNames find odd caracters in font name : %d\n", (int) val));
1353 				}
1354 				f << "##oddC=" << (unsigned int) val << ", ";
1355 			}
1356 		}
1357 		libwps_tools_win::Font::Type fType=libwps_tools_win::Font::getFontType(s);
1358 		if (fType==libwps_tools_win::Font::UNKNOWN)
1359 			fType=docType;
1360 		WPS4TextInternal::FontName font;
1361 		font.m_name = s;
1362 		font.m_type = fType;
1363 		f << font;
1364 
1365 		m_state->m_fontNames[font_number] = font;
1366 
1367 		ascii().addPos(actPos);
1368 		ascii().addNote(f.str().c_str());
1369 		ascii().addPos(m_input->tell());
1370 	}
1371 
1372 	return true;
1373 }
1374 
1375 ////////////////////////////////////////////////////////////
1376 // the font:
1377 ////////////////////////////////////////////////////////////
readFont(long endPos,int & id,std::string & mess)1378 bool WPS4Text::readFont(long endPos, int &id, std::string &mess)
1379 {
1380 	WPS4TextInternal::Font font(version());
1381 	font.m_size = 12;
1382 
1383 	libwps::DebugStream f;
1384 
1385 	int fl[4] = { 0, 0, 0, 0};
1386 	if (m_input->tell() < endPos) fl[0] = libwps::readU8(m_input);
1387 
1388 	/* set difference from default properties */
1389 	uint32_t attributes = 0;
1390 	if (fl[0] & 0x01) attributes |= WPS_BOLD_BIT;
1391 	if (fl[0] & 0x02) attributes |= WPS_ITALICS_BIT;
1392 	if (fl[0] & 0x04) attributes |= WPS_STRIKEOUT_BIT;
1393 	fl[0] &= 0xf8;
1394 
1395 	// what & 0x01 -> ???
1396 	// what & 0x02 -> note
1397 	// what & 0x04 -> ???
1398 	// what & 0x08 -> fName
1399 	// what & 0x10 -> size
1400 	// what & 0x20 -> underline (fl[2])
1401 	// what & 0x40 -> decalage
1402 	// what & 0x80 -> color
1403 	int what = 0;
1404 	if (m_input->tell() < endPos) what = libwps::readU8(m_input);
1405 
1406 	font.m_special = ((what & 2) != 0);
1407 	what &= 0xfd;
1408 
1409 	if (m_input->tell() < endPos)
1410 	{
1411 		// the fonts
1412 		// FIXME: find some properties with size=3,
1413 		//        for which this character seems
1414 		//        related to size, not font
1415 		uint8_t font_n = libwps::readU8(m_input);
1416 
1417 		if (m_state->m_fontNames.find(font_n) != m_state->m_fontNames.end())
1418 		{
1419 			font.m_name=m_state->m_fontNames[font_n].m_name;
1420 			font.m_type=m_state->m_fontNames[font_n].m_type;
1421 		}
1422 		else if (version() <= 2)
1423 		{
1424 			font.m_name=WPS4TextInternal::FontName::getDosName(font_n);
1425 			font.m_type=getDefaultFontType();
1426 		}
1427 		else
1428 		{
1429 			WPS_DEBUG_MSG(("WPS4Text: error: encountered font %i which is not indexed\n",
1430 			               font_n));
1431 		}
1432 
1433 		if (font.m_name.empty()) f << "###nameId=" << int(font_n) << ",";
1434 	}
1435 
1436 	if (m_input->tell() < endPos)
1437 	{
1438 		// underline, ...
1439 		int underlinePos = libwps::readU8(m_input);
1440 		if (underlinePos)
1441 		{
1442 			if (!(what & 0x20)) f << "undFl,";
1443 			else what &= 0xdf;
1444 			attributes |= WPS_UNDERLINE_BIT;
1445 		}
1446 	}
1447 
1448 	if (m_input->tell() < endPos)   // font size * 2
1449 	{
1450 		int fSize = libwps::readU8(m_input);
1451 		if (fSize)
1452 		{
1453 			if (!(what & 0x10)) f << "szFl,";
1454 			else what &= 0xef;
1455 			font.m_size = (fSize/2);
1456 		}
1457 	}
1458 
1459 	if (m_input->tell() < endPos)   // height decalage -> sub/superscript
1460 	{
1461 		int fDec = libwps::read8(m_input);
1462 		if (fDec)
1463 		{
1464 			if (!(what & 0x40)) f << "sub/supFl(val=" << fDec<<"),";
1465 			else what &= 0xbf;
1466 
1467 			if (fDec > 0) attributes |= WPS_SUPERSCRIPT_BIT;
1468 			else attributes |= WPS_SUBSCRIPT_BIT;
1469 		}
1470 	}
1471 	if (m_input->tell()+2 <= endPos)   // color field
1472 	{
1473 		int bkColor = libwps::readU8(m_input);
1474 		int ftColor = libwps::readU8(m_input);
1475 		bool setColor = !!(what & 0x80);
1476 		what &= 0x7F;
1477 
1478 		if ((bkColor || ftColor) && !setColor)
1479 		{
1480 			setColor = true;
1481 			f << "colorFl,";
1482 		}
1483 		if (setColor)
1484 		{
1485 			uint32_t color;
1486 			if (mainParser().getColor(bkColor, color))
1487 				font.m_backColor = color;
1488 			if (mainParser().getColor(ftColor, color))
1489 				font.m_color = color;
1490 		}
1491 	}
1492 	if (m_input->tell() < endPos)
1493 		font.m_dlinkId = libwps::readU8(m_input);
1494 	if (what)  f << "#what=" << std::hex << what << std::dec << ",";
1495 	if (fl[0])  f << "unkn0=" << std::hex << fl[0] << std::dec << ",";
1496 
1497 	if (m_input->tell() != endPos)
1498 	{
1499 		f << "#unknEnd=(";
1500 		while (m_input->tell() < endPos) f << std::hex << libwps::readU8(m_input) <<",";
1501 		f << ")";
1502 	}
1503 
1504 	font.m_attributes = attributes;
1505 	font.m_extra = f.str();
1506 
1507 	id = int(m_state->m_fontList.size());
1508 	m_state->m_fontList.push_back(font);
1509 	f.str("");
1510 	f << font;
1511 	mess = f.str();
1512 
1513 	return true;
1514 }
1515 
1516 ////////////////////////////////////////////////////////////
1517 // the file list: only in dos3  ?
1518 ////////////////////////////////////////////////////////////
readDosLink(WPSEntry const & entry)1519 bool WPS4Text::readDosLink(WPSEntry const &entry)
1520 {
1521 	if (!entry.valid()) return false;
1522 
1523 	long length = entry.length();
1524 	if (length%44)
1525 	{
1526 		WPS_DEBUG_MSG(("WPS4Text::readDosLink: length::=%ld seem odd\n", length));
1527 		return false;
1528 	}
1529 
1530 	m_input->seek(entry.begin(), librevenge::RVNG_SEEK_SET);
1531 	libwps::DebugStream f;
1532 	long numElt = length/44;
1533 	long val;
1534 	for (long n = 0; n < numElt; ++n)
1535 	{
1536 		WPS4TextInternal::DosLink link;
1537 		long pos = m_input->tell();
1538 		long endPos = pos+44;
1539 		f.str("");
1540 		for (int i = 0; i < 2; ++i) // always 0, 0
1541 		{
1542 			val = libwps::readU16(m_input);
1543 			if (val) f << "unkn" << i << "=" << std::hex << val << std::dec << ",";
1544 		}
1545 		link.m_width = float(libwps::readU16(m_input)/1440.);
1546 		for (int i = 2; i < 4; ++i) // always f0, f0
1547 		{
1548 			val = libwps::readU16(m_input);
1549 			if (val != 0xf0) f << "unkn" << i << "=" << std::hex << val << std::dec << ",";
1550 		}
1551 		link.m_type = libwps::readU8(m_input);
1552 		val = libwps::readU8(m_input);
1553 		if (val) // find 0x18 for a spreadsheet
1554 			f << "unk4=" << std::hex << val << std::dec << ",";
1555 		switch (link.m_type)
1556 		{
1557 		case 0x81: // picture ?
1558 		{
1559 			long dim[2];
1560 			for (int i = 0; i < 2; ++i) dim[i] = libwps::readU16(m_input);
1561 			link.m_size = Vec2f(float(dim[0])/1440.f, float(dim[1])/1440.f);
1562 			val = libwps::readU16(m_input); // always 0
1563 			if (val) f << "g0=" << val << ",";
1564 			val = libwps::readU16(m_input); // always 4
1565 			if (val != 4) f << "g1=" << val << ",";
1566 		}
1567 		// fall-through intended
1568 		case 0x40: // spreadsheet range
1569 		case 0x01: // char ?
1570 		{
1571 			std::string name("");
1572 			link.m_pos.setBegin(m_input->tell());
1573 			while (!m_input->isEnd() && long(m_input->tell()) < endPos)
1574 			{
1575 				char c = char(libwps::readU8(m_input));
1576 				if (!c)
1577 				{
1578 					m_input->seek(-1, librevenge::RVNG_SEEK_CUR);
1579 					break;
1580 				}
1581 				name += c;
1582 			}
1583 			link.m_pos.setEnd(m_input->tell());
1584 			link.m_pos.setId(WPS4TextInternal::Z_DLink);
1585 			link.m_name = name;
1586 			break;
1587 		}
1588 		default:
1589 			break;
1590 		}
1591 		link.m_extra = f.str();
1592 		m_state->m_dosLinkList.push_back(link);
1593 		f.str("");
1594 		f << "ZZDLINK-" << n << ":" << link;
1595 		if (long(m_input->tell()) != endPos)
1596 			ascii().addDelimiter(m_input->tell(),'|');
1597 		ascii().addPos(pos);
1598 		ascii().addNote(f.str().c_str());
1599 		m_input->seek(endPos, librevenge::RVNG_SEEK_SET);
1600 	}
1601 	return true;
1602 }
1603 
1604 ////////////////////////////////////////////////////////////
1605 // the paragraph properties:
1606 ////////////////////////////////////////////////////////////
readParagraph(long endPos,int & id,std::string & mess)1607 bool WPS4Text::readParagraph(long endPos, int &id, std::string &mess)
1608 {
1609 	long actPos = m_input->tell();
1610 	long size = endPos - actPos;
1611 
1612 	WPS4TextInternal::Paragraph pp;
1613 	if (size && size < 3)
1614 	{
1615 		WPS_DEBUG_MSG(("WPS4Text:readParagraph:(sz=%ld)\n", size));
1616 		return false;
1617 	}
1618 
1619 	libwps::DebugStream f;
1620 	for (int i = 0; i < 3; ++i)
1621 	{
1622 		int v = libwps::readU8(m_input);
1623 		if (v != 0) f << "unkn"<<i<< "=" << v;
1624 	}
1625 
1626 	while (m_input->tell() < endPos)
1627 	{
1628 		int v = libwps::readU8(m_input);
1629 		long pos = m_input->tell();
1630 		bool ok = true, done = true;
1631 		int arg = -1;
1632 		switch (v)
1633 		{
1634 		case 0x2:
1635 		{
1636 			if (pos+1 > endPos)
1637 			{
1638 				ok = false;
1639 				break;
1640 			}
1641 			arg = libwps::readU8(m_input);
1642 			f << "f2=" << arg << ",";
1643 			break;
1644 		}
1645 		case 0x5:
1646 		{
1647 			if (pos+1 > endPos)
1648 			{
1649 				ok = false;
1650 				break;
1651 			}
1652 			arg = libwps::readU8(m_input);
1653 			switch (arg)
1654 			{
1655 			case 0:
1656 				pp.m_justify = libwps::JustificationLeft;
1657 				break;
1658 			case 1:
1659 				pp.m_justify = libwps::JustificationCenter;
1660 				break;
1661 			case 2:
1662 				pp.m_justify = libwps::JustificationRight;
1663 				break;
1664 			case 3:
1665 				pp.m_justify = libwps::JustificationFull;
1666 				break;
1667 			default:
1668 				f << "#just=" << arg << ",";
1669 				pp.m_justify = libwps::JustificationLeft;
1670 			}
1671 			break;
1672 		}
1673 		case 0x7:   // 1: marked don't break paragraph
1674 		case 0x8:   // 1: marked keep paragraph with next
1675 		{
1676 			if (pos+1 > endPos)
1677 			{
1678 				ok = false;
1679 				break;
1680 			}
1681 			arg = libwps::readU8(m_input);
1682 			if (arg == 0) break;
1683 			if (arg == 1) pp.m_breakStatus |= ((v == 7) ? libwps::NoBreakBit : libwps::NoBreakWithNextBit);
1684 			else f << "#status=" << arg << ",";
1685 			break;
1686 		}
1687 
1688 		// BORDER
1689 		case 0x9:
1690 		{
1691 			if (pos+1 > endPos)
1692 			{
1693 				ok = false;
1694 				break;
1695 			}
1696 			arg = libwps::readU8(m_input);
1697 			pp.m_borderStyle.m_style = WPSBorder::Single;
1698 			pp.m_borderStyle.m_width = 1;
1699 			int style = (arg&0xf);
1700 			switch (style)
1701 			{
1702 			case 0:
1703 				break;
1704 			case 1:
1705 				pp.m_borderStyle.m_width = 2;
1706 				break;
1707 			case 2:
1708 				pp.m_borderStyle.m_style = WPSBorder::Double;
1709 				break;
1710 			case 3:
1711 				pp.m_borderStyle.m_style = WPSBorder::Dot;
1712 				break;
1713 			case 4:
1714 				pp.m_borderStyle.m_style = WPSBorder::LargeDot;
1715 				break;
1716 			case 5:
1717 				pp.m_borderStyle.m_style = WPSBorder::Dash;
1718 				break;
1719 			case 6:
1720 			case 7:
1721 			case 8:
1722 				pp.m_borderStyle.m_width = style-3;
1723 				break;
1724 			case 9:
1725 			case 10:
1726 				pp.m_borderStyle.m_width = style-7;
1727 				pp.m_borderStyle.m_style = WPSBorder::Double;
1728 				break;
1729 			default:
1730 				f << "#borderStyle=" << style << ",";
1731 				WPS_DEBUG_MSG(("WPS4Text:readParagraph: unknown border style\n"));
1732 				break;
1733 			}
1734 			int high = (arg>>4);
1735 			if (version() < 3)
1736 			{
1737 				uint32_t color;
1738 				if (high && mainParser().getColor(high, color))
1739 					pp.m_borderStyle.m_color = color;
1740 				else if (high)
1741 					f << "#borderColor=" << high << ",";
1742 			}
1743 			else
1744 			{
1745 				switch (high)
1746 				{
1747 				case 0:
1748 					break;
1749 				case 4:
1750 					pp.m_border = 0xf;
1751 					break;
1752 				case 8:
1753 					pp.m_border = 0xf;
1754 					f << "borderShaded,";
1755 					break;
1756 				default:
1757 					f << "#borderStyle[high]=" << high << ",";
1758 					break;
1759 				}
1760 			}
1761 			break;
1762 		}
1763 		case 0xa: // 1: top border
1764 		case 0xb: //  : bottom border
1765 		case 0xc: //  : left border
1766 		case 0xd: //  : right border
1767 		{
1768 			if (pos+1 > endPos)
1769 			{
1770 				ok = false;
1771 				break;
1772 			}
1773 			arg = libwps::readU8(m_input);
1774 			if (arg == 0) break;
1775 			if (arg == 1)
1776 			{
1777 				switch (v)
1778 				{
1779 				case 0xa:
1780 					pp.m_border |= WPSBorder::TopBit;
1781 					break;
1782 				case 0xb:
1783 					pp.m_border |= WPSBorder::BottomBit;
1784 					break;
1785 				case 0xc:
1786 					pp.m_border |= WPSBorder::LeftBit;
1787 					break;
1788 				case 0xd:
1789 					pp.m_border |= WPSBorder::RightBit;
1790 					break;
1791 				default:
1792 					break;
1793 				}
1794 			}
1795 			else f << "#border=" << arg << ",";
1796 			break;
1797 		}
1798 		case 0x18:   // border color
1799 		{
1800 			if (long(pos)==endPos)
1801 			{
1802 				ok = false;
1803 				break;
1804 			}
1805 			int colorId = libwps::readU8(m_input);
1806 			uint32_t color;
1807 			if (mainParser().getColor(colorId, color))
1808 				pp.m_borderStyle.m_color = color;
1809 			else
1810 				f << "#colorId=" << colorId << ",";
1811 			break;
1812 		}
1813 		case 0xe:   // 1: bullet
1814 		{
1815 			if (pos+1 > endPos)
1816 			{
1817 				ok = false;
1818 				break;
1819 			}
1820 			arg = libwps::readU8(m_input);
1821 			if (arg == 0) break;
1822 
1823 			pp.m_listLevelIndex = 1;
1824 			pp.m_listLevel.m_type = libwps::BULLET;
1825 			static const uint32_t bulletList[]=
1826 			{
1827 				0x2022, 0x3e, 0x25c6, 0x21d2, 0x25c7, 0x2605, /* 1-6 */
1828 				0, 0, 0, 0, 0, 0, /* 7-12 unknown */
1829 				0, 0, 0, 0, 0, 0x2750, /* 13-17 unknown and document... */
1830 				0x2713, 0x261e, 0x2704, 0x2611, 0x2612, 0x270e /* 18-24 */
1831 			};
1832 			if (arg <= 24 && bulletList[arg-1])
1833 				WPSContentListener::appendUnicode(bulletList[arg-1], pp.m_listLevel.m_bullet);
1834 			else
1835 				WPSContentListener::appendUnicode(0x2022, pp.m_listLevel.m_bullet);
1836 			break;
1837 		}
1838 		case 0x1b:
1839 		case 0x1a:
1840 		case 0x10:   // the bullet char : 0x18
1841 		{
1842 			if (pos+1 > endPos)
1843 			{
1844 				ok = false;
1845 				break;
1846 			}
1847 			arg = libwps::readU8(m_input);
1848 			done = true;
1849 			switch (v)
1850 			{
1851 			case 0x1a:
1852 				if (arg) f << "backPattern=" << arg << ",";
1853 				break;
1854 			case 0x1b:
1855 			{
1856 				if (arg==0) break;
1857 				uint32_t color;
1858 				if (mainParser().getColor(arg>>4, color))
1859 					f << "backPatternBackColor=" << std::hex << color << std::dec << ";";
1860 				else
1861 					f << "#backPatternBackColor=" << (arg>>4) << ",";
1862 				if (mainParser().getColor(arg&0xf, color))
1863 					f << "backPatternFrontColor=" << std::hex << color << std::dec << ";";
1864 				else
1865 					f << "#backPatternFrontColor=" << (arg&0xf) << ",";
1866 				break;
1867 			}
1868 			case 0x10:
1869 				if (arg!=0x18) f << "bullet?=" << arg << ",";
1870 				break;
1871 			default:
1872 				done = false;
1873 				break;
1874 			}
1875 			break;
1876 		}
1877 		case 0xf:   // tabs:
1878 		{
1879 			if (pos+1 > endPos)
1880 			{
1881 				ok = false;
1882 				break;
1883 			}
1884 			int nVal = libwps::read8(m_input);
1885 			if (nVal < 2 || pos + 1 + nVal > endPos)
1886 			{
1887 				ok = false;
1888 				break;
1889 			}
1890 			int flag = libwps::readU8(m_input);
1891 			if (flag) f << "#tabsFl=" << flag << ",";
1892 			size_t nItem = libwps::readU8(m_input);
1893 			if ((unsigned long)nVal != 2 + 3*nItem)
1894 			{
1895 				ok = false;
1896 				break;
1897 			}
1898 			pp.m_tabs.resize(nItem);
1899 			for (size_t i = 0; i < nItem; ++i)
1900 				pp.m_tabs[i].m_position = libwps::read16(m_input)/1440.;
1901 			for (size_t i = 0; i < nItem; ++i)
1902 			{
1903 				enum WPSTabStop::Alignment align = WPSTabStop::LEFT;
1904 				int val = libwps::readU8(m_input);
1905 				switch ((val & 0x3))
1906 				{
1907 				case 0:
1908 					align = WPSTabStop::LEFT;
1909 					break;
1910 				case 1:
1911 					align = WPSTabStop::CENTER;
1912 					break;
1913 				case 2:
1914 					align = WPSTabStop::RIGHT;
1915 					break;
1916 				case 3:
1917 					align = WPSTabStop::DECIMAL;
1918 					break;
1919 				default:
1920 					break;
1921 				}
1922 				pp.m_tabs[i].m_alignment = align;
1923 
1924 				if (val&4) f << "#Tabbits3";
1925 				val = (val>>3);
1926 
1927 				switch (val)
1928 				{
1929 				case 0:
1930 					break;
1931 				case 1:
1932 					pp.m_tabs[i].m_leaderCharacter = '.';
1933 					break;
1934 				case 2:
1935 					pp.m_tabs[i].m_leaderCharacter = '-';
1936 					break;
1937 				case 3:
1938 					pp.m_tabs[i].m_leaderCharacter = '_';
1939 					break;
1940 				case 4:
1941 					pp.m_tabs[i].m_leaderCharacter = '=';
1942 					break;
1943 				default:
1944 					f << "#TabSep=" << val;
1945 				}
1946 			}
1947 
1948 			break;
1949 		}
1950 		case 0x11: // right margin : 1440*inches
1951 		case 0x12: // left margin
1952 		case 0x13: // another margin ( check me )
1953 		case 0x14: // left text indent (relative to left margin)
1954 		case 0x15: // line spacing (inter line) 240
1955 		case 0x16: // line spacing before 240 = 1 line spacing
1956 		case 0x17:   // line spacing after
1957 		{
1958 			if (pos+2 > endPos)
1959 			{
1960 				ok = false;
1961 				break;
1962 			}
1963 
1964 			arg = libwps::read16(m_input);
1965 			switch (v)
1966 			{
1967 			case 0x11:
1968 				pp.m_margins[2] = arg/1440.;
1969 				break;
1970 			case 0x13: // seems another way to define the left margin
1971 				f << "#left,";
1972 			// fall-through intended
1973 			case 0x12:
1974 				pp.m_margins[1] = arg/1440.;
1975 				break;
1976 			case 0x14:
1977 				pp.m_margins[0] = arg/1440.;
1978 				break;
1979 			case 0x15:
1980 			{
1981 				pp.m_spacings[0] = arg ? arg/240. : 1.0;
1982 				if (pp.m_spacings[0] < 1.0 || pp.m_spacings[0] > 2.0)
1983 				{
1984 					f << "##interLineSpacing=" << pp.m_spacings[0] << ",";
1985 					pp.m_spacings[0] = (pp.m_spacings[0] < 1.0) ? 1.0 : 2.0;
1986 				}
1987 				break;
1988 			}
1989 			case 0x16:
1990 				pp.m_spacings[1] = arg/240.;
1991 				break;
1992 			case 0x17:
1993 				pp.m_spacings[2] = arg/240.;
1994 				break;
1995 			default:
1996 				done = false;
1997 			}
1998 			break;
1999 		}
2000 		default:
2001 			ok = false;
2002 		}
2003 		if (!ok)
2004 		{
2005 			m_input->seek(pos, librevenge::RVNG_SEEK_SET);
2006 			f << "###v" << v<<"=" <<std::hex;
2007 			while (m_input->tell() < endPos)
2008 				f << (int) libwps::readU8(m_input) << ",";
2009 			break;
2010 		}
2011 
2012 		if (done) continue;
2013 
2014 		f << "f" << v << "=" << std::hex << arg << std::dec << ",";
2015 	}
2016 	if (pp.m_listLevelIndex >= 1)
2017 		pp.m_margins[0] +=  pp.m_margins[1];
2018 	else if (pp.m_margins[0] + pp.m_margins[1] < 0.0)
2019 	{
2020 		// sanity check
2021 		if (pp.m_margins[1] < 0.0) pp.m_margins[1] = 0.0;
2022 		pp.m_margins[0] = -pp.m_margins[1];
2023 	}
2024 	pp.m_extra = f.str();
2025 
2026 	id = int(m_state->m_paragraphList.size());
2027 	m_state->m_paragraphList.push_back(pp);
2028 
2029 	f.str("");
2030 	f << pp;
2031 	mess = f.str();
2032 	return true;
2033 }
2034 
2035 ////////////////////////////////////////////////////////////
2036 // the foot note properties:
2037 ////////////////////////////////////////////////////////////
readFootNotes(WPSEntry const & ftnD,WPSEntry const & ftnP)2038 bool WPS4Text::readFootNotes(WPSEntry const &ftnD, WPSEntry const &ftnP)
2039 {
2040 	if (!ftnD.valid() && !ftnP.valid()) return true;
2041 	if (!ftnD.valid() || !ftnP.valid())
2042 	{
2043 		WPS_DEBUG_MSG(("WPS4Text::readFootNotes: one of the two entry is not valid, footnote will be ignored\n"));
2044 		return false;
2045 	}
2046 
2047 	std::vector<long> footNotePos,footNoteDef, listValues;
2048 	if (!readPLC(ftnP, footNotePos, listValues, &WPS4Text::footNotesDataParser))
2049 	{
2050 		WPS_DEBUG_MSG(("WPS4Text::readFootNotes: can not read positions\n"));
2051 		return false;
2052 	}
2053 
2054 	if (!readPLC(ftnD, footNoteDef, listValues))
2055 	{
2056 		WPS_DEBUG_MSG(("WPS4Text::readFootNotes: can not read definitions\n"));
2057 		return false;
2058 	}
2059 
2060 	int numFootNotes = int(footNotePos.size())-1;
2061 	if (numFootNotes <= 0 || int(footNoteDef.size())-1 != numFootNotes)
2062 	{
2063 		WPS_DEBUG_MSG(("WPS4Text::readFootNotes: no footnotes\n"));
2064 		return false;
2065 	}
2066 
2067 	// save the actual type and create a list of footnote entries
2068 	std::vector<WPS4TextInternal::Note> noteTypes=m_state->m_footnoteList;
2069 	m_state->m_footnoteList.resize(0);
2070 
2071 	std::vector<int> corresp;
2072 	for (size_t i = 0; i < size_t(numFootNotes); ++i)
2073 	{
2074 		WPS4TextInternal::Note fZone;
2075 		fZone.setBegin(footNoteDef[i]);
2076 		fZone.setEnd(footNoteDef[i+1]);
2077 		fZone.setType("TEXT");
2078 		fZone.setId(WPS4TextInternal::Z_Note);
2079 		m_state->m_footnoteList.push_back(fZone);
2080 		corresp.push_back(int(i));
2081 
2082 		// sort the footnote
2083 		for (size_t j = i; j > 0; j--)
2084 		{
2085 			if (m_state->m_footnoteList[j].begin() >=
2086 			        m_state->m_footnoteList[j-1].end()) break;
2087 
2088 			if (m_state->m_footnoteList[j].end() >
2089 			        m_state->m_footnoteList[j-1].begin())
2090 			{
2091 				WPS_DEBUG_MSG
2092 				(("WPS4Text: error: can not create footnotes zone, found %lx and %lx\n",
2093 				  m_state->m_footnoteList[j].end(),m_state->m_footnoteList[j-1].begin()));
2094 
2095 				m_state->m_footnoteList.resize(0);
2096 				return false;
2097 			}
2098 
2099 			WPS4TextInternal::Note tmpZ = m_state->m_footnoteList[j];
2100 			m_state->m_footnoteList[j] = m_state->m_footnoteList[j-1];
2101 			m_state->m_footnoteList[j-1] = tmpZ;
2102 
2103 			int pos = corresp[j];
2104 			corresp[j] = corresp[j-1];
2105 			corresp[j-1] = pos;
2106 		}
2107 	}
2108 	// ok, we can create the map, ...
2109 	for (size_t i = 0; i < size_t(numFootNotes); ++i)
2110 	{
2111 		size_t id = size_t(corresp[i]);
2112 		WPS4TextInternal::Note &z = m_state->m_footnoteList[id];
2113 		if (id < noteTypes.size())
2114 		{
2115 			z.m_label = noteTypes[id].m_label;
2116 			z.m_error = noteTypes[id].m_error;
2117 		}
2118 		m_state->m_footnoteMap[footNotePos[id]] = &z;
2119 	}
2120 	return true;
2121 }
2122 
footNotesDataParser(long,long,int id,long endPos,std::string & mess)2123 bool WPS4Text::footNotesDataParser(long /*bot*/, long /*eot*/, int id,
2124                                    long endPos, std::string &mess)
2125 {
2126 	mess = "";
2127 
2128 	long actPos = m_input->tell();
2129 	long length = endPos+1-actPos;
2130 	if (length != 12)
2131 	{
2132 		WPS_DEBUG_MSG(("WPS4Text::footNotesDataParser: unknown size %ld for footdata data\n", length));
2133 		return false;
2134 	}
2135 	libwps::DebugStream f;
2136 	WPS4TextInternal::Note note;
2137 	int type = libwps::readU16(m_input);
2138 	if (type & 1)
2139 	{
2140 		if (type != 1)
2141 			f << "###numeric=" << std::hex << type << std::dec << ",";
2142 	}
2143 	else if (type == 0 || type > 20)
2144 		f << "###char,";
2145 	else
2146 	{
2147 		int numC = type/2;
2148 		librevenge::RVNGString label("");
2149 		libwps_tools_win::Font::Type actType = getDefaultFontType();
2150 		for (int i=0; i < numC; ++i)
2151 		{
2152 			unsigned char c = libwps::readU8(m_input);
2153 			WPSContentListener::appendUnicode(uint32_t(libwps_tools_win::Font::unicode(c, actType)),label);
2154 			if (c < 0x20)
2155 				f << "#(" << std::hex << int(c) << std::dec << ")";
2156 		}
2157 		note.m_label = label;
2158 	}
2159 	note.m_error=f.str();
2160 	if (id >= int(m_state->m_footnoteList.size()))
2161 		m_state->m_footnoteList.resize(size_t(id+1));
2162 	m_state->m_footnoteList[size_t(id)]=note;
2163 	f.str("");
2164 	f << note;
2165 	mess = f.str();
2166 	m_input->seek(endPos+1, librevenge::RVNG_SEEK_SET);
2167 	return true;
2168 }
2169 
2170 ////////////////////////////////////////////////////////////
2171 // the bookmark properties:
2172 ////////////////////////////////////////////////////////////
bkmkDataParser(long bot,long,int,long endPos,std::string & mess)2173 bool WPS4Text::bkmkDataParser(long bot, long /*eot*/, int /*id*/,
2174                               long endPos, std::string &mess)
2175 {
2176 	mess = "";
2177 	if (m_state->m_bookmarkMap.find(bot) != m_state->m_bookmarkMap.end())
2178 	{
2179 		WPS_DEBUG_MSG(("WPS4Text:bkmkDataParser: bookmark already exists in this position\n"));
2180 		return true;
2181 	}
2182 
2183 	long actPos = m_input->tell();
2184 	long length = endPos+1-actPos;
2185 	if (length != 16)
2186 	{
2187 		WPS_DEBUG_MSG(("WPS4Text::bkmkDataParser: unknown size %ld for bkmkdata data\n", length));
2188 		return false;
2189 	}
2190 
2191 	for (int i = 0; i < 16; ++i)
2192 	{
2193 		char c = char(libwps::readU8(m_input));
2194 		if (c == '\0') break;
2195 		mess += c;
2196 	}
2197 	WPSEntry ent;
2198 	ent.setBegin(actPos);
2199 	ent.setEnd(m_input->tell());
2200 	ent.setId(WPS4TextInternal::Z_String);
2201 	m_state->m_bookmarkMap[bot] = ent;
2202 	m_input->seek(endPos+1, librevenge::RVNG_SEEK_SET);
2203 	return true;
2204 }
2205 
2206 ////////////////////////////////////////////////////////////
2207 // the object properties:
2208 ////////////////////////////////////////////////////////////
objectDataParser(long bot,long,int id,long endPos,std::string & mess)2209 bool WPS4Text::objectDataParser(long bot, long /*eot*/, int id,
2210                                 long endPos, std::string &mess)
2211 {
2212 	mess = "";
2213 	if (m_state->m_objectMap.find(bot) != m_state->m_objectMap.end())
2214 	{
2215 		WPS_DEBUG_MSG(("WPS4Text:objectDataParser: object already exists in this position\n"));
2216 		return true;
2217 	}
2218 
2219 	libwps::DebugStream f;
2220 
2221 	long actPos = m_input->tell();
2222 	long length = endPos+1-actPos;
2223 	if (length != 36)
2224 	{
2225 		WPS_DEBUG_MSG(("WPS4Text:objectDataParser unknown size %ld for object data\n", length));
2226 		return false;
2227 	}
2228 
2229 	f << "type(?)=" <<libwps::read16(m_input) << ","; // 3->08 4->4f4d or 68->list?
2230 	for (int i = 0; i < 2; ++i)
2231 	{
2232 		int v =libwps::read16(m_input);
2233 		if (v) f << "unkn1:" << i << "=" << v << ",";
2234 	}
2235 	float dim[4];
2236 	for (int i = 0; i < 4; ++i)
2237 		dim[i] =float(libwps::read16(m_input)/1440.);
2238 
2239 	// CHECKME: the next two sizes are often simillar,
2240 	//         maybe the first one is the original size and the second
2241 	//         size in the document...
2242 	f << "origSz?=[" << dim[0] << "," << dim[1] << "],";
2243 
2244 	WPS4TextInternal::Object obj;
2245 	obj.m_size = Vec2f(dim[2], dim[3]); // CHECKME: unit
2246 
2247 	long size = (long) libwps::readU32(m_input);
2248 	long pos = (long) libwps::readU32(m_input);
2249 
2250 	actPos = m_input->tell();
2251 	if (pos >= 0 && size > 0 && mainParser().checkFilePosition(pos+size))
2252 	{
2253 		obj.m_pos.setBegin(pos);
2254 		obj.m_pos.setLength(size);
2255 		obj.m_pos.setId(id);
2256 
2257 		int objectId = mainParser().readObject(m_input, obj.m_pos);
2258 		if (objectId == -1)
2259 		{
2260 			WPS_DEBUG_MSG(("WPS4Text::objectDataParser: can not find the object %d\n", id));
2261 		}
2262 		obj.m_id = objectId;
2263 		m_state->m_objectMap[bot] = obj;
2264 	}
2265 	else
2266 	{
2267 		WPS_DEBUG_MSG(("WPS4Text::objectDataParser: bad object position\n"));
2268 	}
2269 
2270 	m_input->seek(actPos, librevenge::RVNG_SEEK_SET);
2271 
2272 	for (int i = 0; i < 7; ++i)
2273 	{
2274 		long val =libwps::read16(m_input);
2275 		if (val) f << "unkn2:" << i << "=" << val << ",";
2276 	}
2277 
2278 	obj.m_extra = f.str();
2279 	f.str("");
2280 	f << obj;
2281 
2282 	mess = f.str();
2283 	return true;
2284 }
2285 
2286 ////////////////////////////////////////////////////////////
2287 // the dttm properties:
2288 ////////////////////////////////////////////////////////////
dttmDataParser(long bot,long,int,long endPos,std::string & mess)2289 bool WPS4Text::dttmDataParser(long bot, long /*eot*/, int /*id*/,
2290                               long endPos, std::string &mess)
2291 {
2292 	mess = "";
2293 	if (m_state->m_dateTimeMap.find(bot) != m_state->m_dateTimeMap.end())
2294 	{
2295 		WPS_DEBUG_MSG(("WPS4Text:dttmDataParser: dttm already exists in this position\n"));
2296 		return true;
2297 	}
2298 
2299 	libwps::DebugStream f;
2300 
2301 	long actPos = m_input->tell();
2302 	long length = endPos+1-actPos;
2303 	if (length != 42)
2304 	{
2305 		WPS_DEBUG_MSG(("WPS4Text:dttmDataParser unknown size %ld for dttm data\n", length));
2306 		return false;
2307 	}
2308 
2309 	WPS4TextInternal::DateTime form;
2310 	int val;
2311 	for (int i = 0; i < 3; ++i) // always 0, 0, 0 ?
2312 	{
2313 		val =libwps::read16(m_input);
2314 		if (val) f << "f" << i << "=" << val << ",";
2315 	}
2316 	form.m_type=libwps::read16(m_input);
2317 	val =libwps::read16(m_input); // alway 0 ?
2318 	if (val) f << "f3=" << val << ",";
2319 	// end unknown
2320 	for (int i = 0; i < 16; ++i)
2321 	{
2322 		val =libwps::readU16(m_input);
2323 		if (val) f << "g" << i << "=" << std::hex << val << std::dec << ",";
2324 	}
2325 	form.m_extra = f.str();
2326 	m_state->m_dateTimeMap[bot] = form;
2327 	f.str("");
2328 	f << form;
2329 	mess = f.str();
2330 	return true;
2331 }
2332 
2333 ////////////////////////////////////////
2334 // VERY LOW LEVEL ( plc )
2335 ////////////////////////////////////////
2336 /** Internal and low level: the structures of a WPS4Text used to parse PLC*/
2337 namespace WPS4PLCInternal
2338 {
2339 /** Internal and low level: the PLC different types and their structures */
2340 struct PLC
2341 {
2342 	/** the PLC types */
2343 	typedef enum WPS4TextInternal::PLCType PLCType;
2344 	/** the way to define the text positions
2345 	 *
2346 	 * - P_ABS: absolute position,
2347 	 * - P_REL: position are relative to the beginning text offset */
2348 	typedef enum { P_ABS=0, P_REL, P_UNKNOWN} Position;
2349 	/** the type of the content
2350 	 *
2351 	 * - T_CST: size is constant
2352 	 * - T_STRUCT: a structured type ( which unknown size) */
2353 	typedef enum { T_CST=0, T_COMPLEX, T_UNKNOWN} Type;
2354 
2355 	//! constructor
PLCWPS4PLCInternal::PLC2356 	PLC(PLCType w= WPS4TextInternal::Unknown, Position p=P_UNKNOWN, Type t=T_UNKNOWN, unsigned char tChar='\0', int f=1) :
2357 		m_type(w), m_pos(p), m_contentType(t), m_textChar(tChar), m_cstFactor(f) {}
2358 
2359 	//! PLC type
2360 	PLCType m_type;
2361 	//! the way to define the text positions
2362 	Position m_pos;
2363 	//! the type of the content
2364 	Type m_contentType;
2365 	/** the character which appears in the text when this PLC is found
2366 	 *
2367 	 * '\\0' means that there is not default character */
2368 	unsigned char m_textChar;
2369 	//! some data are stored divided by some unit
2370 	int m_cstFactor;
2371 };
2372 
KnownPLC()2373 KnownPLC::KnownPLC() : m_knowns()
2374 {
2375 	createMapping();
2376 }
2377 
~KnownPLC()2378 KnownPLC::~KnownPLC()
2379 {
2380 }
2381 
get(std::string const & name)2382 PLC KnownPLC::get(std::string const &name)
2383 {
2384 	std::map<std::string, PLC>::iterator pos = m_knowns.find(name);
2385 	if (pos == m_knowns.end()) return PLC();
2386 	return pos->second;
2387 }
2388 
createMapping()2389 void KnownPLC::createMapping()
2390 {
2391 	m_knowns["BTEP"] =
2392 	    PLC(WPS4TextInternal::BTE, PLC::P_ABS, PLC::T_CST, '\0', 0x80);
2393 	m_knowns["BTEC"] =
2394 	    PLC(WPS4TextInternal::BTE,PLC::P_ABS, PLC::T_CST, '\0', 0x80);
2395 	m_knowns["EOBJ"] =
2396 	    PLC(WPS4TextInternal::OBJECT,PLC::P_UNKNOWN, PLC::T_COMPLEX, 0x7);
2397 	m_knowns["FTNp"] =
2398 	    PLC(WPS4TextInternal::FTNp,PLC::P_REL, PLC::T_CST, 0x6);
2399 	m_knowns["FTNd"] =
2400 	    PLC(WPS4TextInternal::FTNd,PLC::P_REL, PLC::T_COMPLEX, 0x6);
2401 	m_knowns["BKMK"] =
2402 	    PLC(WPS4TextInternal::BKMK,PLC::P_REL, PLC::T_COMPLEX);
2403 	m_knowns["DTTM"] =
2404 	    PLC(WPS4TextInternal::DTTM,PLC::P_REL, PLC::T_COMPLEX, 0xf);
2405 }
2406 }
2407 
readPLC(WPSEntry const & zone,std::vector<long> & textPtrs,std::vector<long> & listValues,WPS4Text::DataParser parser)2408 bool WPS4Text::readPLC
2409 (WPSEntry const &zone,
2410  std::vector<long> &textPtrs, std::vector<long> &listValues, WPS4Text::DataParser parser)
2411 {
2412 	textPtrs.resize(0);
2413 	listValues.resize(0);
2414 	long size = zone.length();
2415 	if (zone.begin() <= 0 || size < 8) return false;
2416 	WPS4PLCInternal::PLC plcType = m_state->m_knownPLC.get(zone.type());
2417 
2418 	libwps::DebugStream f;
2419 	ascii().addPos(zone.begin());
2420 	m_input->seek(zone.begin(), librevenge::RVNG_SEEK_SET);
2421 
2422 	long lastPos = 0;
2423 	std::vector<DataFOD> fods;
2424 	unsigned numElt = 0;
2425 	f << "pos=(";
2426 	while (numElt*4+4 <= unsigned(size))
2427 	{
2428 		long newPos = (long) libwps::readU32(m_input);
2429 		if (plcType.m_pos == WPS4PLCInternal::PLC::P_UNKNOWN)
2430 		{
2431 			if (newPos < m_textPositions.begin())
2432 				plcType.m_pos = WPS4PLCInternal::PLC::P_REL;
2433 			else if (newPos+m_textPositions.begin() > m_textPositions.end())
2434 				plcType.m_pos = WPS4PLCInternal::PLC::P_ABS;
2435 			else if (plcType.m_textChar=='\0')
2436 			{
2437 				WPS_DEBUG_MSG(("WPS4Text:readPLC Can not decide position for PLC: %s\n", zone.type().c_str()));
2438 				plcType.m_pos = WPS4PLCInternal::PLC::P_REL;
2439 			}
2440 			else
2441 			{
2442 				long actPos = m_input->tell();
2443 				m_input->seek(newPos, librevenge::RVNG_SEEK_SET);
2444 				if (libwps::readU8(m_input) == plcType.m_textChar)
2445 					plcType.m_pos = WPS4PLCInternal::PLC::P_ABS;
2446 				else plcType.m_pos = WPS4PLCInternal::PLC::P_REL;
2447 				m_input->seek(actPos, librevenge::RVNG_SEEK_SET);
2448 			}
2449 		}
2450 
2451 		if (plcType.m_pos == WPS4PLCInternal::PLC::P_REL)
2452 			newPos += m_textPositions.begin();
2453 
2454 		if (newPos < lastPos ||
2455 		        newPos > m_textPositions.end())
2456 		{
2457 			// sometimes the convertissor do not their jobs correctly
2458 			// for the last element
2459 			if (plcType.m_pos == WPS4PLCInternal::PLC::P_REL &&
2460 			        newPos == m_textPositions.end()+m_textPositions.begin())
2461 				newPos = m_textPositions.end();
2462 			else
2463 				return false;
2464 		}
2465 
2466 		textPtrs.push_back(newPos);
2467 
2468 		DataFOD fod;
2469 		fod.m_type = DataFOD::ATTR_PLC;
2470 		fod.m_pos = newPos;
2471 
2472 		f << std::hex << newPos << ", ";
2473 		if (newPos == m_textPositions.end()) break;
2474 
2475 		numElt++;
2476 		lastPos = newPos;
2477 		fods.push_back(fod);
2478 	}
2479 	f << ")";
2480 
2481 	if (long(numElt) < 1) return false;
2482 
2483 	long dataSize = (size-4*long(numElt)-4)/long(numElt);
2484 	if (dataSize > 100) return false;
2485 	if (size!= long(numElt)*(4+dataSize)+4) return false;
2486 
2487 	ascii().addNote(f.str().c_str());
2488 
2489 	if (!dataSize)
2490 	{
2491 		for (size_t i = 0; i < numElt; ++i)
2492 		{
2493 			listValues.push_back(-1);
2494 			fods[i].m_id = int(m_state->m_plcList.size());
2495 		}
2496 		WPS4TextInternal::DataPLC plc;
2497 		plc.m_name = zone.type();
2498 		plc.m_type = plcType.m_type;
2499 		m_state->m_plcList.push_back(plc);
2500 		m_FODList = mergeSortedFODLists(fods, m_FODList);
2501 		return true;
2502 	}
2503 
2504 	// ok we have some data
2505 	bool ok = true;
2506 	long pos = m_input->tell();
2507 	WPS4Text::DataParser pars = parser;
2508 	if ((dataSize == 3 || dataSize > 4) && !pars)
2509 		pars = &WPS4Text::defDataParser;
2510 
2511 	for (size_t i = 0; i < numElt; ++i)
2512 	{
2513 		WPS4TextInternal::DataPLC plc;
2514 
2515 		if (!pars && dataSize <= 4)
2516 		{
2517 			switch (dataSize)
2518 			{
2519 			case 1:
2520 				plc.m_value = libwps::readU8(m_input);
2521 				break;
2522 			case 2:
2523 				plc.m_value = libwps::readU16(m_input);
2524 				break;
2525 			case 4:
2526 				plc.m_value = (long) libwps::readU32(m_input);
2527 				break;
2528 			default:
2529 				WPS_DEBUG_MSG(("WPS4Text:readPLC: unexpected PLC size\n"));
2530 			// fallthrough intended
2531 			case 0:
2532 				plc.m_value = 0;
2533 			}
2534 			plc.m_value *=plcType.m_cstFactor;
2535 		}
2536 		else if (pars)
2537 		{
2538 			std::string mess;
2539 			if (!(this->*pars)(textPtrs[i], textPtrs[i+1], int(i), pos+dataSize-1, mess))
2540 			{
2541 				ok = false;
2542 				break;
2543 			}
2544 			plc.m_extra = mess;
2545 			m_input->seek(pos+dataSize, librevenge::RVNG_SEEK_SET);
2546 		}
2547 
2548 		listValues.push_back(plc.m_value);
2549 
2550 		fods[i].m_id = int(m_state->m_plcList.size());
2551 		fods[i].m_defPos = pos;
2552 
2553 		plc.m_name = zone.type();
2554 		plc.m_type = plcType.m_type;
2555 		m_state->m_plcList.push_back(plc);
2556 
2557 		f.str("");
2558 		f << "ZZ" << zone.type() << i << ":" << plc;
2559 		ascii().addPos(pos);
2560 		ascii().addNote(f.str().c_str());
2561 
2562 		pos += dataSize;
2563 	}
2564 
2565 	if (ok) m_FODList = mergeSortedFODLists(fods, m_FODList);
2566 	return true;
2567 }
2568 
2569 /* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */
2570