1 /* -*- c-basic-offset: 4; tab-width: 4; indent-tabs-mode: t -*- */
2 /* Copyright (C) 1999 AbiSource, Inc.
3  * Copyright (C) 2003 Tomas Frydrych <tomas@frydrych.uklinux.net>
4  * Copyright (C) 2004, 2009 Hubert Figuiere <hub@figuiere.net>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301 USA.
20  */
21 
22 /* RTF importer by Peter Arnold <petera@intrinsica.co.uk> */
23 
24 #ifndef IE_IMP_RTF_H
25 #define IE_IMP_RTF_H
26 
27 #include <stdio.h>
28 #include <string>
29 #include <vector>
30 #include "ie_imp.h"
31 #include "ut_growbuf.h"
32 #include "ut_stack.h"
33 #include "pt_Types.h"
34 #include "pd_Document.h"
35 #include "pp_Revision.h"
36 #include "ut_mbtowc.h"
37 #include "fl_AutoLists.h"
38 #include "fl_AutoNum.h"
39 #include "fl_BlockLayout.h"
40 #include  "ie_Table.h"
41 #include "ie_imp_RTFParse.h"
42 
43 class IE_Imp_RTF;
44 class RTF_msword97_list;
45 class ie_imp_cell;
46 class ie_imp_table;
47 class ie_imp_table_control;
48 class RTFProps_FrameProps;
49 class FG_Graphic;
50 
51 
52 // Font table entry
53 struct ABI_EXPORT RTFFontTableItem
54 {
55  public:
56 	enum FontFamilyEnum { ffNone, ffRoman, ffSwiss, ffModern, ffScript, ffDecorative, ffTechnical, ffBiDirectional};
57 	enum FontPitch { fpDefault, fpFixed, fpVariable};
58 
59 	RTFFontTableItem(FontFamilyEnum fontFamily, int charSet, int codepage, FontPitch pitch,
60 						const char* panose, const char* pFontName, const char* pAlternativeFontName);
61 	~RTFFontTableItem();
62 
63 	FontFamilyEnum m_family;
64 	int m_charSet;
65 	int m_codepage;
66 	const char* m_szEncoding;
67 	FontPitch m_pitch;
68 	unsigned char m_panose[10];
69 	char* m_pFontName;
70 	char* m_pAlternativeFontName;
71 };
72 
73 // Set true if Character properties have been changed in list structure.
74 class ABI_EXPORT RTFProps_CharProps
75 {
76 public:
77 	RTFProps_CharProps(void);
78 	~RTFProps_CharProps(void);
79 	bool	m_deleted;
80 	bool	m_bold;
81 	bool	m_italic;
82 	bool	m_underline;
83 	bool    m_overline;
84 	bool	m_strikeout;
85 	bool	m_topline;
86 	bool	m_botline;
87 	bool    m_superscript;
88 	double  m_superscript_pos;       // unit is pt. if 0.0, ignore
89 	bool    m_subscript;
90 	double  m_subscript_pos;         // unit is pt. if 0.0, ignore
91 	double  m_fontSize;			// font size in points
92 	UT_uint32    m_fontNumber;		// index into font table
93 	bool    m_hasColour;        // if false, ignore colour number
94     UT_uint32    m_colourNumber;	// index into colour table
95 	bool    m_hasBgColour; // if false, ignore colour number
96 	UT_uint32  m_bgcolourNumber; // index into colour table
97 	UT_sint32  m_styleNumber ; //index into the style table
98 	UT_uint32  m_listTag; // tag for lists to hang off
99 	const char * m_szLang;
100 	UT_BidiCharType m_dir;
101 	UT_BidiCharType m_dirOverride;
102 	bool    m_Hidden;
103 	PP_RevisionType m_eRevision;
104 	UT_uint32 m_iCurrentRevisionId;
105 };
106 
107 class ABI_EXPORT RTFProps_bCharProps
108 {
109 public:
110 	RTFProps_bCharProps(void);
111 	~RTFProps_bCharProps(void);
112 
113 	bool	bm_deleted;
114 	bool	bm_bold;
115 	bool	bm_italic;
116 	bool	bm_underline;
117 	bool    bm_overline;
118 	bool	bm_strikeout;
119 	bool	bm_topline;
120 	bool	bm_botline;
121 	bool bm_superscript;
122 	bool bm_superscript_pos;       // unit is pt. if 0.0, ignore
123 	bool bm_subscript;
124 	bool bm_subscript_pos;         // unit is pt. if 0.0, ignore
125 	bool bm_fontSize;			// font size in points
126 	bool bm_fontNumber;		// index into font table
127 	bool    bm_hasColour;        // if false, ignore colour number
128 	bool bm_colourNumber;	// index into colour table
129 	bool bm_hasBgColour; // if false, ignore colour number
130 	bool bm_bgcolourNumber; // index into colour table
131 	bool bm_listTag; // tag for lists to hanfg off
132 	bool bm_RTL;
133 	bool bm_dirOverride;
134 	bool bm_Hidden;
135 };
136 
137 struct ABI_EXPORT _rtfListTable
138 {
139 	UT_uint32 start_value;
140 	UT_uint32 level;
141 	bool bullet;
142 	bool simple;
143 	bool continueList;
144 	bool hangingIndent;
145 	FL_ListType type;
146 	bool bold;
147 	bool italic;
148 	bool caps;
149 	bool scaps;
150 	bool underline;
151 	bool nounderline;
152 	bool strike;
153 	bool isList;
154 	UT_uint32 forecolor;
155 	UT_uint32 font;
156 	UT_uint32 fontsize;
157 	UT_uint32 indent;
158 	bool prevlist;
159 	char textbefore[129];
160 	char textafter[129];
161 	UT_uint32 iWord97Override;
162 	UT_uint32 iWord97Level;
163 };
164 
165 enum rtfCellBorder
166 {
167 	rtfCellBorderBot,
168 	rtfCellBorderLeft,
169 	rtfCellBorderRight,
170 	rtfCellBorderTop
171 };
172 
173 
174 enum rtfBorder
175 {
176 	rtfBorderBot,
177 	rtfBorderLeft,
178 	rtfBorderRight,
179 	rtfBorderTop
180 };
181 
182 struct ABI_EXPORT RTFProps_CellProps
183 {
184 	RTFProps_CellProps();
185 	RTFProps_CellProps & operator=(const RTFProps_CellProps&);
186 	bool      m_bVerticalMerged;
187 	bool      m_bVerticalMergedFirst;
188 	bool      m_bHorizontalMerged;
189 	bool      m_bHorizontalMergedFirst;
190 	std::string m_sCellProps;
191 	rtfCellBorder  m_iCurBorder;
192 	bool      m_bLeftBorder;
193 	bool      m_bRightBorder;
194 	bool      m_bTopBorder;
195 	bool      m_bBotBorder;
196 	UT_sint32 m_iCellx;
197 };
198 
199 struct ABI_EXPORT RTFProps_TableProps
200 {
201 	RTFProps_TableProps();
202 	RTFProps_TableProps& operator=(const RTFProps_TableProps&);
203 	bool      m_bAutoFit;
204 };
205 
206 // Paragraph properties
207 struct ABI_EXPORT RTFProps_ParaProps
208 {
209 	enum ParaJustification { pjLeft, pjCentre, pjRight, pjFull};
210 
211 	RTFProps_ParaProps();
212 	RTFProps_ParaProps& operator=(const RTFProps_ParaProps&);
213 
214 	ParaJustification	m_justification;
215 	UT_uint32	m_spaceBefore;	// space above paragraph in twips
216 	UT_uint32	m_spaceAfter;	// space above paragraph in twips
217     UT_sint32	m_indentLeft;	// left indent in twips
218     UT_sint32	m_indentRight;	// right indent in twips
219     UT_sint32	m_indentFirst;	// first line indent in twips
220 	double	m_lineSpaceVal;		// line spaceing value
221 	bool	m_lineSpaceExact;	// TRUE if m_lineSpaceVal is an exact value, FALSE if multiple
222 	std::vector<UT_sint32> m_tabStops;
223 	std::vector<eTabType> m_tabTypes;
224 	std::vector<eTabLeader> m_tabLeader;
225 	bool         m_isList;       // TRUE if para is an element of a list
226 	UT_sint32       m_level;        // Level of list in para
227 	char            m_pszStyle[30]; // Type of List
228 	UT_uint32       m_rawID;        // raw ID of list
229 	UT_uint32       m_rawParentID;        // raw Parent ID of list
230 	char            m_pszListDecimal[64]; // char between levels
231 	char            m_pszListDelim[64];   // char between levels
232 	char            m_pszFieldFont[64];   // field font name
233 	UT_uint32       m_startValue;         // Start value of the list
234 	eTabType        m_curTabType;        // Current Tab type
235 	eTabLeader      m_curTabLeader;       // Current Tab Leader
236 	UT_uint32       m_iOverride;          // 1's index to override table
237 	UT_uint32       m_iOverrideLevel;     // 0's index to the level
238 	_rtfListTable   m_rtfListTable;
239 	UT_sint32  m_styleNumber ; //index into the style table
240 	UT_BidiCharType m_dir;
241 	UT_sint32       m_tableLevel; //nesting level of the paragram in a table.
242 	bool            m_bInTable; // true if paragraph is in a table
243 	PP_RevisionType m_eRevision;
244 	UT_uint32 m_iCurrentRevisionId;
245 	// Borders & shading
246 	bool            m_bMergeBordersShading;
247 	bool            m_bBotBorder;
248 	UT_sint32       m_iBotBorderStyle; // Number to represent style of border
249 	UT_sint32       m_iBotBorderCol; // index to color table
250 	UT_sint32       m_iBotBorderWidth;  // Thickness in twips
251 	UT_sint32       m_iBotBorderSpacing; // Spacing to text in twips
252 	bool            m_bLeftBorder;
253 	UT_sint32       m_iLeftBorderStyle; // Number to represent style of border
254 	UT_sint32       m_iLeftBorderCol; // index to color table
255 	UT_sint32       m_iLeftBorderWidth;  // Thickness in twips
256 	UT_sint32       m_iLeftBorderSpacing; // Spacing to text in twips
257 	bool            m_bRightBorder;
258 	UT_sint32       m_iRightBorderStyle; // Number to represent style of border
259 	UT_sint32       m_iRightBorderCol; // index to color table
260 	UT_sint32       m_iRightBorderWidth;  // Thickness in twips
261 	UT_sint32       m_iRightBorderSpacing; // Spacing to text in twips
262 	bool            m_bTopBorder;
263 	UT_sint32       m_iTopBorderStyle; // Number to represent style of border
264 	                                   // 0 no border
265 	                                   // 1 solid
266                                        // 2 dotted
267                                        // 3 dashed
268 	UT_sint32       m_iTopBorderCol; // index to color table
269 	UT_sint32       m_iTopBorderWidth;  // Thickness in twips
270 	UT_sint32       m_iTopBorderSpacing; // Spacing to text in twips
271 	UT_sint32       m_iCurBorder; // 0=bot,1=left,2=right,3=top
272 	UT_sint32       m_iShadingPattern; // Number to represent the style of shading
273 	UT_sint32       m_iShadingForeCol; // The Foreground color
274 	UT_sint32       m_iShadingBackCol; // The Foreground color
275 
276 };
277 
278 // These are set true if changed in list definitions.
279 class ABI_EXPORT RTFProps_bParaProps
280 {
281 public:
282     RTFProps_bParaProps(void);
283     ~RTFProps_bParaProps(void);
284 
285 	bool        bm_justification;
286 	bool    	bm_spaceBefore;	// space above paragraph in twips
287 	bool    	bm_spaceAfter;	// space above paragraph in twips
288     bool    	bm_indentLeft;	// left indent in twips
289     bool    	bm_indentRight;	// right indent in twips
290     bool    	bm_indentFirst;	// first line indent in twips
291 	bool	    bm_lineSpaceVal;		// line spaceing value
292 	bool	    bm_lineSpaceExact;	// TRUE if m_lineSpaceVal is an exact value, FALSE if multiple
293 	bool        bm_tabStops;
294 	bool        bm_tabTypes;
295 	bool        bm_tabLeader;
296 	bool        bm_curTabType;        // Current Tab type
297 	bool        bm_curTabLeader;       // Current Tab Leader
298 	bool        bm_rtfListTable;
299 	bool        bm_dom_dir;
300 };
301 
302 
303 //typedef struct sect_prop
304 //{
305 //    int cCols;                  // number of columns
306 //    SBK sbk;                    // section break type
307 //    int xaPgn;                  // x position of page number in twips
308 //    int yaPgn;                  // y position of page number in twips
309 //    PGN pgnFormat;              // how the page number is formatted
310 //} SEP;                  // SEction Properties
311 
312 
313 // Lists Level class
314 class ABI_EXPORT RTF_msword97_level
315 {
316 public:
317     RTF_msword97_level(	RTF_msword97_list * pmsword97List, UT_uint32 level);
318 	~RTF_msword97_level();
319 	void buildAbiListProperties( const char ** szListID,
320 								 const char ** szParentID,
321 								 const char ** szLevel,
322 								 const char ** szStartat,
323 								 const char ** szFieldFont,
324 								 const char ** szListDelim,
325 								 const char ** szListDecimal,
326 								 const char ** szAlign,
327 								 const char ** szIndent,
328 								 const char ** szListStyle);
329 	bool ParseLevelText(const std::string & szLevelText,const std::string & szLevelNumbers, UT_uint32 iLevel);
330 	UT_sint32 m_levelStartAt;
331 	UT_uint32 m_AbiLevelID;
332 	static UT_uint32 m_sPreviousLevel;
333 	UT_uint32 m_RTFListType;
334 	std::string m_listDelim;
335 	char      m_cLevelFollow;
336 	bool m_bStartNewList;
337 	bool m_bRestart;
338     RTFProps_ParaProps * m_pParaProps;
339 	RTFProps_CharProps *  m_pCharProps;
340     RTFProps_bParaProps * m_pbParaProps;
341 	RTFProps_bCharProps *  m_pbCharProps;
342 private:
343 	UT_uint32 m_localLevel;
344 	RTF_msword97_list * m_pMSWord97_list ;
345 };
346 
347 // List Header Class
348 class ABI_EXPORT RTF_msword97_list
349 {
350 public:
351 	RTF_msword97_list(	IE_Imp_RTF * pie_rtf);
352     ~RTF_msword97_list();
353 	UT_uint32 m_RTF_listID;
354 	UT_uint32 m_RTF_listTemplateID;
355 	RTF_msword97_level * m_RTF_level[9];
356 	IE_Imp_RTF * m_pie_rtf;
357 };
358 
359 // List Header Override
360 class ABI_EXPORT RTF_msword97_listOverride
361 {
362 public:
363 	RTF_msword97_listOverride(	IE_Imp_RTF * pie_rtf);
364     ~RTF_msword97_listOverride();
365 	void buildAbiListProperties( const char ** szListID,
366 								 const char ** szParentID,
367 								 const char ** szLevel,
368 								 const char ** szStartat,
369 								 const char ** szFieldFont,
370 								 const char ** szListDelim,
371 								 const char ** szListDecimal,
372 								 const char ** szAlign,
373 								 const char ** szIndent,
374 								 const char ** szListStyle,
375 								 UT_uint32 iLevel);
376 	UT_uint32 m_RTF_listID;
377 	UT_uint32 m_OverrideCount;
378 	RTFProps_ParaProps * m_pParaProps;
379 	RTFProps_CharProps * m_pCharProps;
380 	RTFProps_bParaProps * m_pbParaProps;
381 	RTFProps_bCharProps * m_pbCharProps;
382 	bool setList(void);
383 	bool isTab(UT_uint32 iLevel);
384 	std::vector<UT_sint32>* getTabStopVect(UT_uint32 iLevel);
385 	std::vector<eTabType>* getTabTypeVect(UT_uint32 iLevel);
386 	std::vector<eTabLeader>* getTabLeaderVect(UT_uint32 iLevel);
387 	bool isDeletedChanged(UT_uint32 iLevel);
388 	bool getDeleted(UT_uint32 iLevel);
389 	bool isBoldChanged(UT_uint32 iLevel);
390 	bool getBold(UT_uint32 iLevel);
391 	bool isItalicChanged(UT_uint32 iLevel);
392 	bool getItalic(UT_uint32 iLevel);
393 	bool isUnderlineChanged(UT_uint32 iLevel);
394 	bool getUnderline(UT_uint32 iLevel);
395 	bool isStrikeoutChanged(UT_uint32 iLevel);
396 	bool getStrikeout(UT_uint32 iLevel);
397 	bool isSuperscriptChanged(UT_uint32 iLevel);
398 	bool getSuperscript(UT_uint32 iLevel);
399 	bool isSuperscriptPosChanged(UT_uint32 iLevel);
400 	double getSuperscriptPos(UT_uint32 iLevel);
401 	bool isSubscriptChanged(UT_uint32 iLevel);
402 	bool getSubscript(UT_uint32 iLevel);
403 	bool isSubscriptPosChanged(UT_uint32 iLevel);
404 	double getSubscriptPos(UT_uint32 iLevel);
405  	bool isFontSizeChanged(UT_uint32 iLevel);
406  	double getFontSize(UT_uint32 iLevel);
407  	bool isHasColourChanged(UT_uint32 iLevel);
408  	bool getHasColour(UT_uint32 iLevel);
409  	bool isColourNumberChanged(UT_uint32 iLevel);
410  	UT_uint32 getColourNumber(UT_uint32 iLevel);
411  	bool isHasBgColourChanged(UT_uint32 iLevel);
412  	bool getHasBgColour(UT_uint32 iLevel);
413 	bool isBgColourNumberChanged(UT_uint32 iLevel);
414  	UT_uint32 getBgColourNumber(UT_uint32 iLevel);
415  	bool isFontNumberChanged(UT_uint32 iLevel);
416  	UT_uint32 getFontNumber(UT_uint32 iLevel);
417 
418 private:
419 	IE_Imp_RTF * m_pie_rtf;
420 	RTF_msword97_list* m_pList;
421 };
422 
423 struct ABI_EXPORT RTFProps_ImageProps
424 {
425 	enum IPSizeType { ipstNone, ipstGoal, ipstScale };
426 
427 	RTFProps_ImageProps ();
428 	IPSizeType sizeType;
429 	UT_uint16 wGoal;
430 	UT_uint16 hGoal;
431 	UT_uint16 scaleX;
432 	UT_uint16 scaleY;
433 	UT_uint32 width;
434 	UT_uint32 height;
435 	bool bCrop;
436 	UT_sint32 cropt;
437 	UT_sint32 cropb;
438 	UT_sint32 cropl;
439 	UT_sint32 cropr;
440 };
441 
442 // Section properties
443 struct ABI_EXPORT RTFProps_SectionProps
444 {
445 	enum ESectionBreak {sbkNone, sbkColumn, sbkEven, sbkOdd, sbkPage};
446 	enum EPageNumber {pgDecimal, pgURoman, pgLRoman, pgULtr, pgLLtr};
447 
448 	RTFProps_SectionProps();
449 
450 	UT_uint32		m_numCols;
451 	bool m_bColumnLine;
452 	ESectionBreak	m_breakType;
453 	EPageNumber		m_pageNumFormat;
454 	UT_sint32       m_leftMargTwips;
455 	UT_sint32       m_rightMargTwips;
456 	UT_sint32       m_topMargTwips;
457 	UT_sint32       m_bottomMargTwips;
458 	UT_sint32       m_headerYTwips;
459 	UT_sint32       m_footerYTwips;
460 	UT_sint32       m_gutterTwips;
461 	UT_sint32       m_colSpaceTwips;
462 	UT_BidiCharType m_dir;
463 };
464 
465 
466 /*!
467   Stores a RTF header and footer.
468   headers and footer are NOT section properties. But they are defined
469   before any section data begins.
470   \todo add right and left headers and footer. Not yet supported by AbiWord
471  */
472 struct ABI_EXPORT RTFHdrFtr
473 {
474 	enum HdrFtrType {hftNone,
475 					 hftHeader,
476 					 hftHeaderEven,
477 					 hftHeaderFirst,
478 					 hftHeaderLast,
479 					 hftFooter,
480 					 hftFooterEven,
481 					 hftFooterFirst,
482 					 hftFooterLast };
483 
RTFHdrFtrRTFHdrFtr484 	RTFHdrFtr () : m_type(hftNone), m_id(0), m_buf(1024) {}
485 
486 	HdrFtrType      m_type;
487 	UT_uint32       m_id;
488 	UT_ByteBuf      m_buf;
489 };
490 
491 // RTFStateStore
492 struct ABI_EXPORT RTFStateStore
493 {
494 	RTFStateStore();
495     RTFStateStore * clone();
496 	// RTFStateStore& operator=(const RTFStateStore& s)
497 
498 	enum DestinationStateTypes { rdsNorm, rdsSkip, rdsFootnote, rdsHeader, rdsFooter, rdsField };
499 	enum InternalStateTypes { risNorm, risBin, risHex };
500 
501 	DestinationStateTypes	m_destinationState;		// Reading or skipping text
502 	InternalStateTypes		m_internalState;		// Normal, binary or hex
503 	RTFProps_CharProps		m_charProps;			// Character properties
504 	RTFProps_ParaProps		m_paraProps;			// Paragraph properties
505 	RTFProps_SectionProps	m_sectionProps;			// Section properties
506 	RTFProps_CellProps      m_cellProps;            // Cell properties
507     RTFProps_TableProps     m_tableProps;           // Table properties
508 	UT_uint32				m_unicodeAlternateSkipCount;	// value of N in "\ucN"
509 	UT_uint32				m_unicodeInAlternate;			// chars left in alternate "\u<u><A>"
510 	UT_UTF8String           m_revAttr;              // the revision attribute stored in abirevision
511 	bool                    m_bInKeywordStar;        // true if in star keyword
512 };
513 
514 
515 /*
516 typedef struct doc_prop
517 {
518     int xaPage;                 // page width in twips
519     int yaPage;                 // page height in twips
520     int xaLeft;                 // left margin in twips
521     int yaTop;                  // top margin in twips
522     int xaRight;                // right margin in twips
523     int yaBottom;               // bottom margin in twips
524     int pgnStart;               // starting page number in twips
525     char fFacingp;              // facing pages enabled?
526     char fLandscape;            // landscape or portrait??
527 } DOP;                  // DOcument Properties
528 */
529 
530 
531 //
532 // Little class to be used for tracking pasted table state.
533 //
534 class ABI_EXPORT ABI_Paste_Table
535 {
536 public:
537 	ABI_Paste_Table(void);
538     virtual ~ABI_Paste_Table(void);
539 	bool                  m_bHasPastedTableStrux;
540 	bool                  m_bHasPastedCellStrux;
541 	UT_sint32             m_iRowNumberAtPaste;
542 	bool                  m_bHasPastedBlockStrux;
543 	UT_sint32             m_iMaxRightCell;
544 	UT_sint32             m_iCurRightCell;
545 	UT_sint32             m_iCurTopCell;
546 	bool                  m_bPasteAfterRow;
547 	UT_sint32             m_iPrevPasteTop;
548 	UT_sint32             m_iNumRows;
549 };
550 
551 //
552 // Little class to be used for importing annotations
553 //
554 class ABI_EXPORT ABI_RTF_Annotation
555 {
556  public:
557 	ABI_RTF_Annotation();
~ABI_RTF_Annotation()558 	virtual ~ABI_RTF_Annotation() {}
559 	UT_sint32          m_iAnnNumber;
560 	UT_UTF8String      m_sAuthor;
561 	UT_UTF8String      m_sAuthorId;
562 	UT_UTF8String      m_sDate;
563 	UT_UTF8String      m_sTitle;
564 	pf_Frag *          m_pInsertFrag;
565 	PT_DocPosition     m_Annpos;
566 	UT_sint32          m_iRTFLevel;
567 	static UT_sint32   newNumber();
568 private:
569 	static UT_sint32 sAnnotationNumber;
570 };
571 
572 // The importer/reader for Rich Text Format files
573 
574 class ABI_EXPORT IE_Imp_RTF_Sniffer : public IE_ImpSniffer
575 {
576 	friend class IE_Imp;
577 
578 public:
579 	IE_Imp_RTF_Sniffer();
~IE_Imp_RTF_Sniffer()580 	virtual ~IE_Imp_RTF_Sniffer() {}
581 
582 	virtual const IE_SuffixConfidence * getSuffixConfidence ();
583 	virtual const IE_MimeConfidence * getMimeConfidence ();
584 	virtual UT_Confidence_t recognizeContents (const char * szBuf,
585 									UT_uint32 iNumbytes);
586 	virtual bool getDlgLabels (const char ** szDesc,
587 							   const char ** szSuffixList,
588 							   IEFileType * ft);
589 	virtual UT_Error constructImporter (PD_Document * pDocument,
590 										IE_Imp ** ppie);
591 
592 };
593 
594 class ABI_EXPORT IE_Imp_RTF : public IE_Imp
595 {
596 public:
597 	IE_Imp_RTF(PD_Document * pDocument);
598 	~IE_Imp_RTF();
599 
supportsLoadStylesOnly()600 	virtual bool supportsLoadStylesOnly() const {return true;}
601 	virtual bool		pasteFromBuffer(PD_DocumentRange * pDocRange,
602 										const unsigned char * pData,
603                                         UT_uint32 lenData,
604                                         const char * szEncoding = 0);
get_vecWord97ListsCount(void)605 	UT_sint32 get_vecWord97ListsCount(void) const
606         { return m_vecWord97Lists.size();}
get_vecWord97NthList(UT_sint32 i)607 	RTF_msword97_list *  get_vecWord97NthList(UT_sint32 i) const
608         { return m_vecWord97Lists.at(i);}
isWord97Lists(void)609     bool  isWord97Lists(void) const
610 		{ return !m_vecWord97Lists.empty();}
611 
612 	enum PictFormat {
613 		picNone,
614 		picPNG,
615 		picJPEG,
616 		picBMP,
617 		picWMF,
618 		picPICT,
619 		picEMF,
620 		picGIF,
621 		picSVG
622 	};
623 
624 protected:
625 	virtual UT_Error	_loadFile(GsfInput * input);
626 	UT_Error			_parseFile(GsfInput * fp);
627 	UT_Error			_writeHeader(GsfInput * fp);
628 	UT_Error            _parseHdrFtr ();
629 	UT_Error            _parseText();
630 
631 
632 // importer helper methods
633 private:
634 
635 
636 	typedef enum {
637 		RBT_START = 0,
638 		RBT_END
639 	} RTFBookmarkType;
640 
641 	bool AddChar(UT_UCSChar ch);
642 public:
643 	bool FlushStoredChars(bool forceInsertPara = false);
644 private:
645 	bool StartNewPara();
646 	bool HandleParKeyword();
647 	bool StartNewSection();
648 public:
649 	bool PushRTFState();
650 	bool PopRTFState();
651 	bool ParseChar(UT_UCSChar ch,bool no_convert=1);
setStruxImage(bool b)652 	void setStruxImage(bool b)
653 		{ m_bStruxImage = b;}
isStruxImage(void)654 	bool isStruxImage(void) const
655 		{ return m_bStruxImage;}
656 private:
657 	bool ParseRTFKeyword();
658 	bool ReadCharFromFileWithCRLF(unsigned char* pCh);
659 	bool ReadCharFromFile(unsigned char* pCh);
660 	bool ReadContentFromFile(UT_UTF8String & str);
661 	std::string s_unEscapeXMLString();
662 	UT_UCS4Char ReadHexChar(void);
663 	bool SkipBackChar(unsigned char ch);
664 	bool ReadKeyword(unsigned char* pKeyword, UT_sint32* pParam,
665                      bool* pParamUsed,
666 					 UT_uint32 keywordBuffLen);
667 	bool TranslateKeyword(unsigned char* pKeyword, UT_sint32 param,
668                           bool fParam);
669 public:
670 	bool TranslateKeywordID(RTF_KEYWORD_ID keywordID,
671 							UT_sint32 param, bool fParam);
672 	bool insertStrux(PTStruxType pts , const gchar ** attrs=NULL, const gchar ** props=NULL);
673 
674 	/*  Parser stuff */
675 	bool StandardKeywordParser(IE_Imp_RTFGroupParser *parser);
676 private:
677 	RTF_KEYWORD_ID KeywordToID(const char * keyword);
678 
679 	bool HandleStarKeyword();
680 
681 	bool HandlePCData(UT_UTF8String & str);
682 	bool HandlePCData(std::string& str);
683 
684 	bool ReadColourTable();
685 	bool ReadFontTable();
686 	bool RegisterFont(RTFFontTableItem::FontFamilyEnum fontFamily,
687 	                  RTFFontTableItem::FontPitch pitch,
688 	                  UT_uint16 fontIndex,
689 	                  int charSet, int codepage,
690 	                  UT_UTF8String sFontNames[]);
691 	bool PostProcessAndValidatePanose(UT_UTF8String &Panose);
692 	bool ReadRevisionTable();
693 	void setEncoding();
694 	bool ReadRDFTriples();
695 public:
696 	bool HandlePicture();
clearImageName(void)697 	void clearImageName(void)
698 		{ m_sImageName.clear();}
699 
700 private:
701 	bool HandleObject();
702 	bool HandleField();
703 	bool HandleStyleDefinition(void);
704 	bool HandleHeaderFooter(RTFHdrFtr::HdrFtrType hftype, UT_uint32 & headerID);
705 public:
706 	bool SkipCurrentGroup(bool bConsumeLastBrace = false);
707 private:
708 	bool StuffCurrentGroup(UT_ByteBuf & buf);
709 	bool LoadPictData(PictFormat format, const char * image_name,
710 					  struct RTFProps_ImageProps & imgProps,
711 					  bool isBinary = false, long binaryLen = 0);
712 	bool InsertImage (const FG_Graphic *, const char * image_name,
713 					  const struct RTFProps_ImageProps & imgProps);
714 
715 	RTFFontTableItem* GetNthTableFont(UT_sint32 fontNum);
716 	UT_uint32 GetNthTableColour(UT_sint32 colNum);
717 	UT_sint32 GetNthTableBgColour(UT_sint32 colNum);
718 
719 // ListTable handlers.
720 	bool ReadListTable(void);
721 	bool HandleListLevel(RTF_msword97_list * pList, UT_uint32 levelCount  );
722 	bool HandleTableList(void);
723 	char * getCharsInsideBrace(void);
724 	bool ParseCharParaProps(unsigned char * pKeyword,
725                             UT_sint32 param, bool fParam,
726                             RTFProps_CharProps * pChars,
727                             RTFProps_ParaProps * pParas,
728                             RTFProps_bCharProps * pbChars,
729                             RTFProps_bParaProps * pbParas);
730 	bool ReadListOverrideTable(void);
731 	bool HandleTableListOverride(void);
732 
733 	bool buildAllProps( std::string & s,  RTFProps_ParaProps * pParas,
734 					   RTFProps_CharProps * pChars,
735 					   RTFProps_bParaProps * pbParas,
736 					   RTFProps_bCharProps * pbChars);
737 
738 
739 	// Character property handlers
740 	bool ResetCharacterAttributes();
741 	bool buildCharacterProps(std::string & propBuffer);
742 	bool ApplyCharacterAttributes();
743 	bool HandleBoolCharacterProp(bool state, bool* pProp);
744 	bool HandleDeleted(bool state);
745 	bool HandleBold(bool state);
746 	bool HandleItalic(bool state);
747 	bool HandleHidden(bool state);
748 	bool HandleUnderline(bool state);
749 	bool HandleOverline(bool state);
750 	bool HandleStrikeout(bool state);
751 	bool HandleTopline(bool state);
752 	bool HandleBotline(bool state);
753 	bool HandleSuperscript(bool state);
754 	bool HandleSuperscriptPosition(UT_uint32 pos);
755 	bool HandleSubscript(bool state);
756 	bool HandleSubscriptPosition(UT_uint32 pos);
757 	bool HandleFontSize(long sizeInHalfPoints);
758 	bool HandleBookmark (RTFBookmarkType type);
759 	bool HandleRDFAnchor (RTFBookmarkType type);
760 	bool HandleListTag(long id);
761 
762 	bool HandleDeltaMoveID();
763 	bool HandleRevisedText(PP_RevisionType eType, UT_uint32 iId);
764 	bool HandleRevisedTextTimestamp(UT_uint32 iDttm);
765 
766 	// Generic handlers
767 	bool HandleFloatCharacterProp(double val, double* pProp);
768 	bool HandleU32CharacterProp(UT_uint32 val, UT_uint32* pProp);
769 	bool HandleFace(UT_uint32 fontNumber);
770 	bool HandleColour(UT_uint32 colourNumber);
771 	bool HandleBackgroundColour (UT_uint32 colourNumber);
772 
773 	// Paragraph property handlers
774 	bool ResetParagraphAttributes();
775 	bool ApplyParagraphAttributes(bool bDontInsert = false);
776 	bool SetParaJustification(RTFProps_ParaProps::ParaJustification just);
777 	bool AddTabstop(UT_sint32 stopDist, eTabType tabType, eTabLeader tableader);
778 	bool AddTabstop(UT_sint32 stopDist, eTabType tabType,
779                     eTabLeader tabLeader,  RTFProps_ParaProps * pParas);
780 
781 
782 // Paste AbiWord tables
783  public:
784 	bool HandleAbiTable(void);
785 	bool HandleAbiCell(void);
786 	bool HandleAbiEndTable(void);
787 	bool HandleAbiEndCell(void);
788 	bool HandleAbiLists(void);
789 	bool HandleAbiMathml(void);
790 	bool CreateDataItemfromStream(void);
791 	bool HandleAbiEmbed(void);
792 	bool isPastedTableOpen(void);
793 	bool markPasteBlock(void);
794 	bool isBlockNeededForPasteTable(void);
795 
796 	bool       HandleLists(_rtfListTable & rtfListTable );
797     UT_uint32  mapID(UT_uint32 id);
798 	UT_uint32  mapParentID(UT_uint32 id);
799 
800 // Table methods
801     bool           ResetCellAttributes(void);
802 	bool           ResetTableAttributes(void);
803     ie_imp_table * getTable(void);
804 	ie_imp_cell *  getCell(void);
805 	void           FlushCellProps(void);
806 	void           FlushTableProps(void);
807 	void           OpenTable(bool bDontFlush = false);
808 	void           CloseTable(bool bForceClose = false);
809 	void           SaveRowInfo(void);
810 	void           RemoveRowInfo(void);
811     void           HandleCell(void);
812 	void           HandleCellX(UT_sint32 cellx);
813     void           HandleRow(void);
814 	UT_sint32      getPasteDepth(void);
815 	void           closePastedTableIfNeeded(void);
816 
817  private:
818 
819 	void           HandleNote();
820 	void           StartAnnotation();
821 	void           EndAnnotation();
822 	void           HandleAnnotation();
823 	void           HandleNoteReference();
824 // Shape handlers in ie_imp_RTFObjectsAndPicts.cpp
825 	void           HandleShape(void);
826 	void           HandleShapePict(void);
827 
828 public:
829 	void           HandleShapeText(RTFProps_FrameProps & frame);
830 	void           addFrame(RTFProps_FrameProps & frame);
isFrameIn(void)831 	bool           isFrameIn(void)
832 		{ return m_bFrameStruxIn;}
833 
834 	bool           bUseInsertNotAppend(void);
835 
836 private:
837 //	void           HandleEndShape(void);
838 // Meta data
839 	bool           HandleInfoMetaData(void);
840 // Little convience wrapper
841 	void           _setStringProperty(std::string & sPropString,
842                                       const char * szProp, const char * szVal);
843 
844 	// Section property handlers
845 	bool ApplySectionAttributes();
846 	bool ResetSectionAttributes();
847 	typedef enum {
848 	    RTF_TOKEN_NONE = 0,
849 	    RTF_TOKEN_OPEN_BRACE,
850 	    RTF_TOKEN_CLOSE_BRACE,
851 	    RTF_TOKEN_KEYWORD,
852 	    RTF_TOKEN_DATA,
853 	    RTF_TOKEN_ERROR = -1
854 	} RTFTokenType;
855 	RTFTokenType NextToken (unsigned char *pKeyword, UT_sint32* pParam,
856 							bool* pParamUsed, UT_uint32 len,
857                             bool bIgnoreWhiteSpace = false);
858 
859 	UT_Error _isBidiDocument();
860 	bool     _appendSpan();
861 	bool     _insertSpan();
862 	void     _formRevisionAttr(std::string & s, const std::string & props, const std::string & style);
863 
864 
865 private:
866 	// static helpers to decode
867 	static bool digVal(char ch, int& value, int base);
868 	static bool hexVal(char c, int& value);
869 
870 
871 // import member vars
872 private:
873 	/* keyword for new gen parser */
874 	static bool keywordSorted;
875 	static void _initialKeywordSort(void);
876 
877 	UT_GrowBuf m_gbBlock;
878 	char *m_szFileDirName;
879 
880 	int m_groupCount;
881 	bool m_newParaFlagged;
882 	bool m_newSectionFlagged;
883 	int m_cbBin;
884 
885 	// headers and footers
886 	// headers and footers are NOT part of the state. They change each time
887 	// they are defined and sections inherit them from the previous
888 	// this is not part of section properties, they are not reset by \sectd
889 	// TODO: handle \titlepg and \facingpg cases.
890 	UT_uint32       m_currentHdrID;     // these are numbers.
891 	UT_uint32       m_currentFtrID;
892 	UT_uint32       m_currentHdrEvenID;     // these are numbers.
893 	UT_uint32       m_currentFtrEvenID;
894 	UT_uint32       m_currentHdrFirstID;     // these are numbers.
895 	UT_uint32       m_currentFtrFirstID;
896 	UT_uint32       m_currentHdrLastID;     // these are numbers.
897 	UT_uint32       m_currentFtrLastID;
898 
899 
900 	UT_Stack m_stateStack;
901 	RTFStateStore m_currentRTFState;
902 
903 	std::vector<RTFFontTableItem*> m_fontTable;
904 	std::vector<UT_sint32> m_colourTable;
905 	std::vector<RTFHdrFtr*> m_hdrFtrTable;
906 	std::vector<std::string> m_styleTable;
907 
908 	struct _rtfAbiListTable
909 	{
910 		UT_uint32 orig_id;
911 		UT_uint32 orig_parentid;
912 		UT_uint32 start_value;
913 		UT_uint32 level;
914 		bool hasBeenMapped;
915 		UT_uint32 mapped_id;
916 		UT_uint32 mapped_parentid;
917 	};
918 	std::vector<_rtfAbiListTable *> m_vecAbiListTable;
getAbiList(UT_uint32 i)919 	_rtfAbiListTable * getAbiList( UT_uint32 i) const
920         { return m_vecAbiListTable.at(i); }
921 
922 	RTF_msword97_listOverride* _getTableListOverride(UT_uint32 id);
923 
924 	UT_uint32 m_numLists;
925 	bool m_bisAbiList;
926 	bool m_bisNOTList; // true if the current stream does not have  abi list extensions
927 	bool m_bParaHasRTFList;
928 	bool m_bParaHasRTFContinue;
929 
930 	GsfInput* m_pImportFile;
931 
932 	const unsigned char *		m_pPasteBuffer;
933 	UT_uint32 			m_lenPasteBuffer;
934 	const unsigned char *		m_pCurrentCharInPasteBuffer;
935 	PT_DocPosition		m_dposPaste;
936 	PT_DocPosition		m_dOrigPos;
937 	UT_uint32		    deflangid;
938 	UT_UCS4_mbtowc		m_mbtowc;
939 	bool                m_parsingHdrFtr;
940 	UT_uint32           m_icurOverride;
941 	UT_uint32           m_icurOverrideLevel;
942 	std::vector<RTF_msword97_list *> m_vecWord97Lists;
943 	std::vector<RTF_msword97_listOverride*> m_vecWord97ListOverride;
944 	void _appendHdrFtr ();
945 	bool _appendField (const gchar *xmlField,
946                        const gchar ** pszAttribs=NULL);
947 	gchar *_parseFldinstBlock (UT_ByteBuf & buf, gchar *xmlField,
948                                   bool & isXML);
949 	bool                m_bAppendAnyway;
950 	RTFProps_SectionProps m_sectdProps ;
951 	ie_imp_table_control  m_TableControl;
952 	pf_Frag_Strux*     m_lastCellSDH;
953 	bool                  m_bNestTableProps;
954 	bool                  m_bParaWrittenForSection;
955 	bool                  m_bCellBlank;
956 	bool                  m_bEndTableOpen;
957 	bool                  m_bInFootnote;
958 	UT_sint32             m_iDepthAtFootnote;
959 	UT_uint32             m_iLastFootnoteId;
960 	UT_uint32             m_iLastEndnoteId;
961 	std::string             m_hyperlinkBase;
962 	UT_uint32             m_iHyperlinkOpen;
963 	UT_uint32             m_iRDFAnchorOpen;
964 	bool                  m_bBidiMode;
965 	UT_Stack              m_pasteTableStack;
966 	bool                  m_bFootnotePending;
967 	bool                  m_bFtnReferencePending;
968 	bool                  m_bNoteIsFNote;
969 	bool                  m_bStyleImportDone;
970 	bool                  m_bCellHandled;
971 	bool                  m_bContentFlushed;
972 	bool                  m_bRowJustPassed;
973 	UT_sint32             m_iStackLevelAtRow;
974 	bool                  m_bDoCloseTable;
975 	UT_sint32             m_iNoCellsSinceLastRow;
976 	RTFStateStore         m_FootnoteRefState;
977 	bool                  m_bFieldRecognized;
978 	UT_sint32             m_iIsInHeaderFooter;
979 	bool                  m_bSectionHasPara;
980 	bool                  m_bStruxInserted;
981 	bool		          m_bStruxImage;
982 	UT_UTF8String	      m_sImageName;
983 	bool                  m_bFrameStruxIn;
984 
985 	UT_BidiCharType       m_iAutoBidiOverride;
986 	UT_BidiCharType       m_iBidiLastType;
987 	UT_BidiCharType       m_iBidiNextType;
988 
989 	const char* m_szDefaultEncoding; // Document default char encoding.
990 	int m_iDefaultFontNumber;        // Document default font.
991 	PT_DocPosition        m_dPosBeforeFootnote;
992 	bool                  m_bMovedPos;
993 	ABI_RTF_Annotation *  m_pAnnotation;
994 	pf_Frag *             m_pDelayedFrag; // insert before this frag if non-null
995 	PT_DocPosition        m_posSavedDocPosition;
996 	bool                  m_bInAnnotation;
997 	bool                  m_bFrameTextBox;
998 	bool                  m_bParaActive;
999 	bool                  m_bCellActive;
1000 
1001 	std::string           m_ctMoveID;
1002 
1003 	PD_XMLIDCreatorHandle  m_XMLIDCreatorHandle;
1004 	std::map< std::string, std::string > m_rdfAnchorCloseXMLIDs;
1005 
1006 };
1007 
1008 #endif /* IE_IMP_RTF_H */
1009 
1010