1 /* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */
2 
3 /* AbiWord
4  * Copyright (C) 2001 AbiSource, Inc.
5  * Copyright (C) 2001 Dom Lachowicz <dominicl@seas.upenn.edu>
6  * Copyright (C) 2001-2003 Tomas Frydrych
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version 2
11  * of the License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21  * 02110-1301 USA.
22  */
23 
24 #ifndef IE_IMP_MSWORD_H
25 #define IE_IMP_MSWORD_H
26 
27 // The importer/reader for Microsoft Word Documents
28 
29 #include "ie_imp.h"
30 #include "ut_string_class.h"
31 #include "fl_DocLayout.h"
32 #include "fl_AutoLists.h"
33 #include "ut_units.h"
34 //
35 // forward decls so that we don't have to #include "wv.h" here
36 //
37 typedef struct _wvParseStruct wvParseStruct;
38 typedef struct _Blip Blip;
39 typedef struct _CHP CHP;
40 typedef struct _PAP PAP;
41 class PD_Document;
42 class pf_Frag;
43 class UT_Stack;
44 
45 struct field;
46 
47 struct bookmark
48 {
49 	gchar * name;
50 	UT_uint32  pos;
51 	bool	   start;
52 };
53 
54 struct footnote
55 {
56 	UT_uint32  type;
57 	UT_uint32  ref_pos;
58 	UT_uint32  txt_pos;
59 	UT_uint32  txt_len;
60 	UT_uint32  pid;
61 };
62 
63 
64 struct textbox
65 {
66 	UT_uint32  lid;
67 	UT_uint32  ref_pos;
68 	UT_uint32  txt_pos;
69 	UT_uint32  txt_len;
70 	UT_sint32  iLeft;
71 	UT_sint32  iWidth;
72 	UT_sint32  iTop;
73 	UT_sint32  iHeight;
74 	UT_sint32  iPosType;
75 	UT_sint32  iBorderWidth;
76 };
77 
78 struct textboxPos
79 {
80 	UT_uint32 lid;
81 	pf_Frag * endFrame;
82 };
83 
84 typedef enum
85 	{
86 		HF_HeaderFirst = 0,
87 		HF_FooterFirst,
88 		HF_HeaderOdd,
89 		HF_FooterOdd,
90 		HF_HeaderEven,
91 		HF_FooterEven,
92 		HF_Unsupported
93 	}_headerTypes;
94 
95 
96 struct header
97 {
98 	_headerTypes type;
99 	UT_uint32    pos;
100 	UT_uint32    len;
101 	UT_uint32    pid;
102 
103 	struct _d
104 	{
105 		UT_Vector hdr;
106 		UT_Vector frag;
107 	}d;
108 };
109 
110 class ABI_EXPORT MsColSpan
111 {
112 public:
MsColSpan(void)113 	MsColSpan(void):iLeft(0),iRight(0),width(0){}
~MsColSpan(void)114 	virtual ~MsColSpan(void) {}
115 	UT_sint32 iLeft;
116 	UT_sint32 iRight;
117 	UT_sint32 width;
118 };
119 
120 class ABI_EXPORT emObject
121 {
122 public:
123 	UT_String props1;
124 	UT_String props2;
125 	PTObjectType objType;
126 };
127 
128 //
129 // The Sniffer/Manager/Creator Class for DOC
130 //
131 class ABI_EXPORT IE_Imp_MsWord_97_Sniffer : public IE_ImpSniffer
132 {
133 	friend class IE_Imp;
134 
135 public:
136 	IE_Imp_MsWord_97_Sniffer();
~IE_Imp_MsWord_97_Sniffer()137 	virtual ~IE_Imp_MsWord_97_Sniffer() {}
138 
139 	virtual const IE_SuffixConfidence * getSuffixConfidence ();
140 	virtual const IE_MimeConfidence * getMimeConfidence ();
141 	virtual UT_Confidence_t recognizeContents (const char * szBuf,
142 									UT_uint32 iNumbytes);
143 	virtual UT_Confidence_t recognizeContents (GsfInput * input);
144 	virtual bool getDlgLabels (const char ** szDesc,
145 							   const char ** szSuffixList,
146 							   IEFileType * ft);
147 	virtual UT_Error constructImporter (PD_Document * pDocument,
148 										IE_Imp ** ppie);
149 };
150 
151 // how many chars to buffer in our fields implementation
152 #define FLD_SIZE 40000
153 
154 //
155 // The import class for the MSFT Word DOC format
156 //
157 class ABI_EXPORT IE_Imp_MsWord_97 : public IE_Imp
158 {
159 public:
160 	IE_Imp_MsWord_97 (PD_Document * pDocument);
161 	~IE_Imp_MsWord_97 ();
162 
supportsLoadStylesOnly()163 	virtual bool        supportsLoadStylesOnly() {return true;}
164 
165 	// wv's callbacks need access to these, so they have to be public
166 	int 			_specCharProc (wvParseStruct *ps, UT_uint16 eachchar,
167 								   CHP * achp);
168 	int 			_charProc (wvParseStruct *ps, UT_uint16 eachchar,
169 							   UT_Byte chartype,  UT_uint16 lid);
170 	int 			_docProc  (wvParseStruct *ps, UT_uint32 tag);
171 	int 			_eleProc  (wvParseStruct *ps, UT_uint32 tag,
172 							   void *props, int dirty);
173 
174 protected:
175 
176 	UT_Error			_loadFile (GsfInput * input);
177 
178 private:
179 
180 	void       _handleMetaData(wvParseStruct *ps);
181 
182 	int 	   _beginSect (wvParseStruct *ps, UT_uint32 tag,
183 						   void *props, int dirty);
184 	int 	   _endSect (wvParseStruct *ps, UT_uint32 tag,
185 						 void *props, int dirty);
186 
187 	int 	   _beginPara (wvParseStruct *ps, UT_uint32 tag,
188 						   void *props, int dirty);
189 	int 	   _endPara (wvParseStruct *ps, UT_uint32 tag,
190 						 void *props, int dirty);
191 
192 	int 	   _beginChar (wvParseStruct *ps, UT_uint32 tag,
193 						   void *props, int dirty);
194 	int 	   _endChar (wvParseStruct *ps, UT_uint32 tag,
195 						 void *props, int dirty);
196 	int 	   _beginComment (wvParseStruct *ps, UT_uint32 tag,
197 						   void *props, int dirty);
198 	int 	   _endComment (wvParseStruct *ps, UT_uint32 tag,
199 						 void *props, int dirty);
200 	gchar * _getBookmarkName(const wvParseStruct * ps, UT_uint32 pos);
201 	bool	   _insertBookmarkIfAppropriate(UT_uint32 iPos);
202 	bool	   _insertBookmark(bookmark * bm);
203 	UT_Error   _handleImage (Blip *, long width, long height, long cropt, long cropb, long cropl, long cropr);
204 	UT_Error   _handlePositionedImage (Blip *, UT_String & sImageName);
205 	bool	   _handleCommandField (char *command);
206 	bool	   _handleFieldEnd (char * command, UT_uint32 iPos);
207 	int 	   _fieldProc (wvParseStruct *ps, UT_uint16 eachchar,
208 						   UT_Byte chartype, UT_uint16 lid);
209 	void	   _appendChar (UT_UCSChar ch);
210 	void	   _flush ();
211 
212 	void		_table_open();
213 	void		_table_close(const wvParseStruct *ps, const PAP *apap);
214 	void		_row_open(const wvParseStruct *ps);
215 	void		_row_close();
216 	void		_cell_open(const wvParseStruct *ps, const PAP *apap);
217 	void		_cell_close();
218 	void        _handleStyleSheet(const wvParseStruct *ps);
219 	void        _generateCharProps(UT_String &s, const CHP * achp, wvParseStruct *ps);
220 	void        _generateParaProps(UT_String &s, const PAP * apap, wvParseStruct *ps);
221 	int         _handleBookmarks(const wvParseStruct *ps);
222 	void        _handleNotes(const wvParseStruct *ps);
223 	void        _handleTextBoxes(const wvParseStruct *ps);
224 	bool        _insertNoteIfAppropriate(UT_uint32 iDocPosition,UT_UCS4Char c);
225 	bool        _insertFootnote(const footnote * f, UT_UCS4Char c);
226 	bool        _insertEndnote(const footnote * f, UT_UCS4Char c);
227 	bool        _handleNotesText(UT_uint32 iPos);
228 	bool        _handleTextboxesText(UT_uint32 iPos);
229 	bool        _findNextTextboxSection();
230 	bool        _findNextFNoteSection();
231 	bool        _findNextENoteSection();
232 	bool        _shouldUseInsert()const;
233 	bool        _ensureInBlock();
234 	bool        _appendStrux(PTStruxType pts, const gchar ** attributes);
235 	bool        _appendObject(PTObjectType pto, const gchar ** attributes);
236 	bool        _appendSpan(const UT_UCSChar * p, UT_uint32 length);
237 	bool        _appendStruxHdrFtr(PTStruxType pts, const gchar ** attributes);
238 	bool        _appendObjectHdrFtr(PTObjectType pto, const gchar ** attributes);
239 	bool        _appendSpanHdrFtr(const UT_UCSChar * p, UT_uint32 length);
240 	bool		_appendFmt(const gchar ** attributes);
241 	void        _handleHeaders(const wvParseStruct *ps);
242 	bool        _handleHeadersText(UT_uint32 iPos, bool bDoBlockIns);
243 	bool        _insertHeaderSection(bool bDoBlockIns);
244 	bool        _build_ColumnWidths(UT_NumberVector & colWidths);
245 	bool        _isVectorFull(UT_NumberVector & vec);
246 	void        setNumberVector(UT_NumberVector & vec, UT_sint32 i, UT_sint32 val);
247 	bool        findMatchSpan(UT_sint32 iLeft,UT_sint32 iRight);
248 	bool        _ignorePosition(UT_uint32 pos);
249 
250 	bool        _isTOCsupported(field *f);
251 	bool        _insertTOC(field *f);
252 
253 
254 	UT_UCS4String		m_pTextRun;
255 	//UT_uint32			m_iImageCount;
256 	UT_uint32			m_nSections;
257 	bool				m_bSetPageSize;
258 #if 0
259 	UT_UCS2Char m_command [FLD_SIZE];
260 	UT_UCS2Char m_argument [FLD_SIZE];
261 	UT_UCS2Char *m_fieldWhich;
262 	UT_sint32	m_fieldI;
263 	char *		m_fieldC;
264 	UT_sint32	m_fieldRet;
265 	UT_sint32	m_fieldDepth;
266 #else
267 	UT_Stack    m_stackField;
268 #endif
269 	//char *	  m_fieldA;
270 	bool	   m_bIsLower;
271 
272 	bool m_bInSect;
273 	bool m_bInPara;
274 	bool m_bLTRCharContext;
275 	bool m_bLTRParaContext;
276 	UT_BidiCharType  m_iOverrideIssued;
277 	bool m_bBidiMode;
278 	bool m_bInLink;
279 	bookmark * m_pBookmarks;
280 	UT_uint32  m_iBookmarksCount;
281 	footnote * m_pFootnotes;
282 	UT_uint32  m_iFootnotesCount;
283 	footnote * m_pEndnotes;
284 	UT_uint32  m_iEndnotesCount;
285 	textbox *  m_pTextboxes;
286 	UT_sint32  m_iTextboxCount;
287 	UT_Vector  m_vLists;
288 	UT_uint32  m_iListIdIncrement[9];
289 	UT_uint32  m_iMSWordListId;
290 
291 	bool m_bEncounteredRevision;
292 	bool		m_bInTable;						// are we in a table ?
293 	int			m_iRowsRemaining;				// number of rows left to process
294 	int			m_iCellsRemaining;				// number of cells left to process in the current row
295 	int			m_iCurrentRow;					//
296 	int			m_iCurrentCell;					//
297 	bool		m_bRowOpen;						// row strux open ?
298 	bool		m_bCellOpen;					// cell strux open ?
299 	UT_NumberVector	m_vecColumnSpansForCurrentRow;	// placeholder for horizontal cell spans
300 	UT_GenericVector<MsColSpan *>	m_vecColumnWidths;
301 	UT_GenericVector<emObject*>   m_vecEmObjects;               // Objects between cell
302 											  // struxes
303 	UT_NumberVector m_vecColumnPositions;
304 	UT_String   m_charProps;
305 	UT_String   m_charRevs;
306 	UT_String   m_charStyle;
307 	UT_String   m_paraProps;
308 	UT_String   m_paraStyle;
309 
310 	UT_uint32   m_iFootnotesStart;
311 	UT_uint32   m_iFootnotesEnd;
312 	UT_uint32   m_iEndnotesStart;
313 	UT_uint32   m_iEndnotesEnd;
314 	UT_uint32   m_iNextFNote;
315 	UT_uint32   m_iNextENote;
316 	bool        m_bInFNotes;
317 	bool        m_bInENotes;
318 	pf_Frag *   m_pNotesEndSection;
319 	header *    m_pHeaders;
320 	UT_uint32   m_iHeadersCount;
321 	UT_uint32   m_iHeadersStart;
322 	UT_uint32   m_iHeadersEnd;
323 	UT_uint32   m_iCurrentHeader;
324 	bool        m_bInHeaders;
325 	UT_uint32   m_iCurrentSectId;
326 	UT_uint32   m_iAnnotationsStart;
327 	UT_uint32   m_iAnnotationsEnd;
328 	UT_uint32   m_iMacrosStart;
329 	UT_uint32   m_iMacrosEnd;
330 	UT_uint32   m_iTextStart;
331 	UT_uint32   m_iTextEnd;
332 	bool        m_bPageBreakPending;
333 	bool        m_bLineBreakPending;
334 	UT_NumberVector m_vListIdMap;
335 	bool        m_bSymbolFont;
336 	UT_Dimension m_dim;
337 	UT_sint32    m_iLeft;
338 	UT_sint32    m_iRight;
339 	UT_uint32    m_iTextboxesStart;
340 	UT_uint32    m_iTextboxesEnd;
341 	UT_sint32    m_iNextTextbox;
342 	UT_uint32    m_iPrevHeaderPosition;
343 	bool         m_bEvenOddHeaders;
344 
345 	UT_sint32    m_bInTOC;
346 	bool         m_bTOCsupported;
347 	bool         m_bInTextboxes;
348 	pf_Frag *    m_pTextboxEndSection;
349 	UT_GenericVector<textboxPos *> m_vecTextboxPos;
350 	UT_sint32    m_iLeftCellPos;
351 	UT_uint32    m_iLastAppendedHeader;
352 };
353 
354 #endif /* IE_IMP_MSWORD_H */
355