1 /* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */ 2 3 /* AbiWord 4 * Copyright (C) 2001 AbiSource, Inc. 5 * Copyright (C) 2001 Dom Lachowicz <dominicl@seas.upenn.edu> 6 * Copyright (C) 2001-2003 Tomas Frydrych 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 2 11 * of the License, or (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 21 * 02110-1301 USA. 22 */ 23 24 #ifndef IE_IMP_MSWORD_H 25 #define IE_IMP_MSWORD_H 26 27 // The importer/reader for Microsoft Word Documents 28 29 #include "ie_imp.h" 30 #include "ut_string_class.h" 31 #include "fl_DocLayout.h" 32 #include "fl_AutoLists.h" 33 #include "ut_units.h" 34 // 35 // forward decls so that we don't have to #include "wv.h" here 36 // 37 typedef struct _wvParseStruct wvParseStruct; 38 typedef struct _Blip Blip; 39 typedef struct _CHP CHP; 40 typedef struct _PAP PAP; 41 class PD_Document; 42 class pf_Frag; 43 class UT_Stack; 44 45 struct field; 46 47 struct bookmark 48 { 49 gchar * name; 50 UT_uint32 pos; 51 bool start; 52 }; 53 54 struct footnote 55 { 56 UT_uint32 type; 57 UT_uint32 ref_pos; 58 UT_uint32 txt_pos; 59 UT_uint32 txt_len; 60 UT_uint32 pid; 61 }; 62 63 64 struct textbox 65 { 66 UT_uint32 lid; 67 UT_uint32 ref_pos; 68 UT_uint32 txt_pos; 69 UT_uint32 txt_len; 70 UT_sint32 iLeft; 71 UT_sint32 iWidth; 72 UT_sint32 iTop; 73 UT_sint32 iHeight; 74 UT_sint32 iPosType; 75 UT_sint32 iBorderWidth; 76 }; 77 78 struct textboxPos 79 { 80 UT_uint32 lid; 81 pf_Frag * endFrame; 82 }; 83 84 typedef enum 85 { 86 HF_HeaderFirst = 0, 87 HF_FooterFirst, 88 HF_HeaderOdd, 89 HF_FooterOdd, 90 HF_HeaderEven, 91 HF_FooterEven, 92 HF_Unsupported 93 }_headerTypes; 94 95 96 struct header 97 { 98 _headerTypes type; 99 UT_uint32 pos; 100 UT_uint32 len; 101 UT_uint32 pid; 102 103 struct _d 104 { 105 UT_Vector hdr; 106 UT_Vector frag; 107 }d; 108 }; 109 110 class ABI_EXPORT MsColSpan 111 { 112 public: MsColSpan(void)113 MsColSpan(void):iLeft(0),iRight(0),width(0){} ~MsColSpan(void)114 virtual ~MsColSpan(void) {} 115 UT_sint32 iLeft; 116 UT_sint32 iRight; 117 UT_sint32 width; 118 }; 119 120 class ABI_EXPORT emObject 121 { 122 public: 123 UT_String props1; 124 UT_String props2; 125 PTObjectType objType; 126 }; 127 128 // 129 // The Sniffer/Manager/Creator Class for DOC 130 // 131 class ABI_EXPORT IE_Imp_MsWord_97_Sniffer : public IE_ImpSniffer 132 { 133 friend class IE_Imp; 134 135 public: 136 IE_Imp_MsWord_97_Sniffer(); ~IE_Imp_MsWord_97_Sniffer()137 virtual ~IE_Imp_MsWord_97_Sniffer() {} 138 139 virtual const IE_SuffixConfidence * getSuffixConfidence (); 140 virtual const IE_MimeConfidence * getMimeConfidence (); 141 virtual UT_Confidence_t recognizeContents (const char * szBuf, 142 UT_uint32 iNumbytes); 143 virtual UT_Confidence_t recognizeContents (GsfInput * input); 144 virtual bool getDlgLabels (const char ** szDesc, 145 const char ** szSuffixList, 146 IEFileType * ft); 147 virtual UT_Error constructImporter (PD_Document * pDocument, 148 IE_Imp ** ppie); 149 }; 150 151 // how many chars to buffer in our fields implementation 152 #define FLD_SIZE 40000 153 154 // 155 // The import class for the MSFT Word DOC format 156 // 157 class ABI_EXPORT IE_Imp_MsWord_97 : public IE_Imp 158 { 159 public: 160 IE_Imp_MsWord_97 (PD_Document * pDocument); 161 ~IE_Imp_MsWord_97 (); 162 supportsLoadStylesOnly()163 virtual bool supportsLoadStylesOnly() {return true;} 164 165 // wv's callbacks need access to these, so they have to be public 166 int _specCharProc (wvParseStruct *ps, UT_uint16 eachchar, 167 CHP * achp); 168 int _charProc (wvParseStruct *ps, UT_uint16 eachchar, 169 UT_Byte chartype, UT_uint16 lid); 170 int _docProc (wvParseStruct *ps, UT_uint32 tag); 171 int _eleProc (wvParseStruct *ps, UT_uint32 tag, 172 void *props, int dirty); 173 174 protected: 175 176 UT_Error _loadFile (GsfInput * input); 177 178 private: 179 180 void _handleMetaData(wvParseStruct *ps); 181 182 int _beginSect (wvParseStruct *ps, UT_uint32 tag, 183 void *props, int dirty); 184 int _endSect (wvParseStruct *ps, UT_uint32 tag, 185 void *props, int dirty); 186 187 int _beginPara (wvParseStruct *ps, UT_uint32 tag, 188 void *props, int dirty); 189 int _endPara (wvParseStruct *ps, UT_uint32 tag, 190 void *props, int dirty); 191 192 int _beginChar (wvParseStruct *ps, UT_uint32 tag, 193 void *props, int dirty); 194 int _endChar (wvParseStruct *ps, UT_uint32 tag, 195 void *props, int dirty); 196 int _beginComment (wvParseStruct *ps, UT_uint32 tag, 197 void *props, int dirty); 198 int _endComment (wvParseStruct *ps, UT_uint32 tag, 199 void *props, int dirty); 200 gchar * _getBookmarkName(const wvParseStruct * ps, UT_uint32 pos); 201 bool _insertBookmarkIfAppropriate(UT_uint32 iPos); 202 bool _insertBookmark(bookmark * bm); 203 UT_Error _handleImage (Blip *, long width, long height, long cropt, long cropb, long cropl, long cropr); 204 UT_Error _handlePositionedImage (Blip *, UT_String & sImageName); 205 bool _handleCommandField (char *command); 206 bool _handleFieldEnd (char * command, UT_uint32 iPos); 207 int _fieldProc (wvParseStruct *ps, UT_uint16 eachchar, 208 UT_Byte chartype, UT_uint16 lid); 209 void _appendChar (UT_UCSChar ch); 210 void _flush (); 211 212 void _table_open(); 213 void _table_close(const wvParseStruct *ps, const PAP *apap); 214 void _row_open(const wvParseStruct *ps); 215 void _row_close(); 216 void _cell_open(const wvParseStruct *ps, const PAP *apap); 217 void _cell_close(); 218 void _handleStyleSheet(const wvParseStruct *ps); 219 void _generateCharProps(UT_String &s, const CHP * achp, wvParseStruct *ps); 220 void _generateParaProps(UT_String &s, const PAP * apap, wvParseStruct *ps); 221 int _handleBookmarks(const wvParseStruct *ps); 222 void _handleNotes(const wvParseStruct *ps); 223 void _handleTextBoxes(const wvParseStruct *ps); 224 bool _insertNoteIfAppropriate(UT_uint32 iDocPosition,UT_UCS4Char c); 225 bool _insertFootnote(const footnote * f, UT_UCS4Char c); 226 bool _insertEndnote(const footnote * f, UT_UCS4Char c); 227 bool _handleNotesText(UT_uint32 iPos); 228 bool _handleTextboxesText(UT_uint32 iPos); 229 bool _findNextTextboxSection(); 230 bool _findNextFNoteSection(); 231 bool _findNextENoteSection(); 232 bool _shouldUseInsert()const; 233 bool _ensureInBlock(); 234 bool _appendStrux(PTStruxType pts, const gchar ** attributes); 235 bool _appendObject(PTObjectType pto, const gchar ** attributes); 236 bool _appendSpan(const UT_UCSChar * p, UT_uint32 length); 237 bool _appendStruxHdrFtr(PTStruxType pts, const gchar ** attributes); 238 bool _appendObjectHdrFtr(PTObjectType pto, const gchar ** attributes); 239 bool _appendSpanHdrFtr(const UT_UCSChar * p, UT_uint32 length); 240 bool _appendFmt(const gchar ** attributes); 241 void _handleHeaders(const wvParseStruct *ps); 242 bool _handleHeadersText(UT_uint32 iPos, bool bDoBlockIns); 243 bool _insertHeaderSection(bool bDoBlockIns); 244 bool _build_ColumnWidths(UT_NumberVector & colWidths); 245 bool _isVectorFull(UT_NumberVector & vec); 246 void setNumberVector(UT_NumberVector & vec, UT_sint32 i, UT_sint32 val); 247 bool findMatchSpan(UT_sint32 iLeft,UT_sint32 iRight); 248 bool _ignorePosition(UT_uint32 pos); 249 250 bool _isTOCsupported(field *f); 251 bool _insertTOC(field *f); 252 253 254 UT_UCS4String m_pTextRun; 255 //UT_uint32 m_iImageCount; 256 UT_uint32 m_nSections; 257 bool m_bSetPageSize; 258 #if 0 259 UT_UCS2Char m_command [FLD_SIZE]; 260 UT_UCS2Char m_argument [FLD_SIZE]; 261 UT_UCS2Char *m_fieldWhich; 262 UT_sint32 m_fieldI; 263 char * m_fieldC; 264 UT_sint32 m_fieldRet; 265 UT_sint32 m_fieldDepth; 266 #else 267 UT_Stack m_stackField; 268 #endif 269 //char * m_fieldA; 270 bool m_bIsLower; 271 272 bool m_bInSect; 273 bool m_bInPara; 274 bool m_bLTRCharContext; 275 bool m_bLTRParaContext; 276 UT_BidiCharType m_iOverrideIssued; 277 bool m_bBidiMode; 278 bool m_bInLink; 279 bookmark * m_pBookmarks; 280 UT_uint32 m_iBookmarksCount; 281 footnote * m_pFootnotes; 282 UT_uint32 m_iFootnotesCount; 283 footnote * m_pEndnotes; 284 UT_uint32 m_iEndnotesCount; 285 textbox * m_pTextboxes; 286 UT_sint32 m_iTextboxCount; 287 UT_Vector m_vLists; 288 UT_uint32 m_iListIdIncrement[9]; 289 UT_uint32 m_iMSWordListId; 290 291 bool m_bEncounteredRevision; 292 bool m_bInTable; // are we in a table ? 293 int m_iRowsRemaining; // number of rows left to process 294 int m_iCellsRemaining; // number of cells left to process in the current row 295 int m_iCurrentRow; // 296 int m_iCurrentCell; // 297 bool m_bRowOpen; // row strux open ? 298 bool m_bCellOpen; // cell strux open ? 299 UT_NumberVector m_vecColumnSpansForCurrentRow; // placeholder for horizontal cell spans 300 UT_GenericVector<MsColSpan *> m_vecColumnWidths; 301 UT_GenericVector<emObject*> m_vecEmObjects; // Objects between cell 302 // struxes 303 UT_NumberVector m_vecColumnPositions; 304 UT_String m_charProps; 305 UT_String m_charRevs; 306 UT_String m_charStyle; 307 UT_String m_paraProps; 308 UT_String m_paraStyle; 309 310 UT_uint32 m_iFootnotesStart; 311 UT_uint32 m_iFootnotesEnd; 312 UT_uint32 m_iEndnotesStart; 313 UT_uint32 m_iEndnotesEnd; 314 UT_uint32 m_iNextFNote; 315 UT_uint32 m_iNextENote; 316 bool m_bInFNotes; 317 bool m_bInENotes; 318 pf_Frag * m_pNotesEndSection; 319 header * m_pHeaders; 320 UT_uint32 m_iHeadersCount; 321 UT_uint32 m_iHeadersStart; 322 UT_uint32 m_iHeadersEnd; 323 UT_uint32 m_iCurrentHeader; 324 bool m_bInHeaders; 325 UT_uint32 m_iCurrentSectId; 326 UT_uint32 m_iAnnotationsStart; 327 UT_uint32 m_iAnnotationsEnd; 328 UT_uint32 m_iMacrosStart; 329 UT_uint32 m_iMacrosEnd; 330 UT_uint32 m_iTextStart; 331 UT_uint32 m_iTextEnd; 332 bool m_bPageBreakPending; 333 bool m_bLineBreakPending; 334 UT_NumberVector m_vListIdMap; 335 bool m_bSymbolFont; 336 UT_Dimension m_dim; 337 UT_sint32 m_iLeft; 338 UT_sint32 m_iRight; 339 UT_uint32 m_iTextboxesStart; 340 UT_uint32 m_iTextboxesEnd; 341 UT_sint32 m_iNextTextbox; 342 UT_uint32 m_iPrevHeaderPosition; 343 bool m_bEvenOddHeaders; 344 345 UT_sint32 m_bInTOC; 346 bool m_bTOCsupported; 347 bool m_bInTextboxes; 348 pf_Frag * m_pTextboxEndSection; 349 UT_GenericVector<textboxPos *> m_vecTextboxPos; 350 UT_sint32 m_iLeftCellPos; 351 UT_uint32 m_iLastAppendedHeader; 352 }; 353 354 #endif /* IE_IMP_MSWORD_H */ 355