1 //* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */ 2 3 /* AbiWord 4 * Copyright (C) 2001 AbiSource, Inc. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 * 02110-1301 USA. 20 */ 21 22 23 #ifndef IE_IMP_XML_H 24 #define IE_IMP_XML_H 25 26 #include <stdio.h> 27 #include <string> 28 #include <map> 29 30 #include "ut_xml.h" 31 32 #include "ut_vector.h" 33 #include "ut_stack.h" 34 #include "ie_imp.h" 35 #include "ut_bytebuf.h" 36 #include "ut_string_class.h" 37 38 class PD_Document; 39 40 #include <boost/shared_ptr.hpp> 41 class PD_DocumentRDFMutation; 42 typedef boost::shared_ptr<PD_DocumentRDFMutation> PD_DocumentRDFMutationHandle; 43 44 45 struct ABI_EXPORT xmlToIdMapping { 46 const char *m_name; 47 int m_type; 48 }; 49 50 // The importer/reader for reading generic 51 // XML documents. Currently, the following classes derive from this: 52 // 53 // ABW, AWT, GZABW 54 // DBK 55 // WML 56 // XHTML 57 // XSL-FO 58 // KWORD 1 && 2 (soon) 59 60 class ABI_EXPORT IE_Imp_XML : public IE_Imp, public UT_XML::Listener 61 { 62 public: 63 IE_Imp_XML(PD_Document * pDocument, bool whiteSignificant); 64 virtual ~IE_Imp_XML(); 65 virtual UT_Error importFile(const char * data, UT_uint32 length); 66 virtual UT_Error importFile(const UT_ByteBuf * data); 67 68 virtual bool pasteFromBuffer(PD_DocumentRange * pDocRange, 69 const unsigned char * pData, 70 UT_uint32 lenData, 71 const char * szEncoding = 0); 72 73 /* (Partial) Implementation of UT_XML::Listener 74 * 75 * You *must* override these next two methods: 76 */ 77 virtual void startElement (const gchar * name, const gchar ** atts); 78 virtual void endElement (const gchar * name); 79 /* 80 * but you get this one for free: 81 */ 82 virtual void charData (const gchar * buffer, int length); 83 84 /* If you don't wish the XML parser to use the standard/default file handler, you 85 * can provide your own via an implementation of UT_XML::Reader here: 86 */ 87 protected: setReader(UT_XML::Reader * pReader)88 void setReader (UT_XML::Reader * pReader) { m_pReader = pReader; } 89 private: 90 UT_XML::Reader * m_pReader; 91 92 /* If you wish to use a non-standard parser (e.g., for HTML), then maybe this 93 * is useful... 94 */ 95 protected: setParser(UT_XML * pParser)96 void setParser (UT_XML * pParser) { m_pParser = pParser; } stopParser(void)97 void stopParser(void) {if(m_pParser) m_pParser->stop();} 98 private: 99 UT_XML * m_pParser; 100 101 public: incOperationCount(void)102 void incOperationCount(void) { m_iOperationCount++; } getOperationCount(void)103 UT_uint32 getOperationCount(void) const { return m_iOperationCount; } 104 105 protected: 106 107 virtual UT_Error _loadFile(GsfInput * input); 108 int _mapNameToToken (const char * name, xmlToIdMapping * idlist, int len); 109 110 const gchar* _getXMLPropValue(const gchar *name, const gchar **atts); 111 112 UT_uint32 _getInlineDepth(void) const; 113 bool _pushInlineFmt(const gchar ** atts); 114 void _popInlineFmt(void); 115 116 typedef enum _parseState { _PS_Init, 117 _PS_Doc, 118 _PS_Sec, 119 _PS_Block, 120 _PS_DataSec, 121 _PS_DataItem, 122 _PS_StyleSec, 123 _PS_Style, 124 _PS_IgnoredWordsSec, 125 _PS_IgnoredWordsItem, 126 _PS_ListSec, 127 _PS_List, 128 _PS_Field, 129 _PS_PageSize, 130 _PS_MetaData, 131 _PS_Meta, 132 _PS_RevisionSec, 133 _PS_Revision, 134 _PS_AuthorSec, 135 _PS_Author, 136 _PS_HistorySec, 137 _PS_Table, 138 _PS_Cell, 139 _PS_Version, 140 _PS_RDFTriple, 141 _PS_RDFData, 142 } ParseState; 143 144 protected: 145 146 // TODO: make us private, refactor code 147 UT_Error m_error; 148 ParseState m_parseState; 149 150 gchar m_charDataSeen[4]; 151 UT_uint32 m_lenCharDataSeen; 152 UT_uint32 m_lenCharDataExpected; 153 UT_uint32 m_iOperationCount; 154 bool m_bSeenCR; 155 bool m_bWhiteSignificant; 156 bool m_bWasSpace; 157 158 UT_GenericVector<const gchar*> m_vecInlineFmt; 159 UT_NumberStack m_nstackFmtStartIndex; 160 161 UT_ByteBuf m_currentDataItem; 162 gchar * m_currentDataItemName; 163 std::string m_currentDataItemMimeType; 164 bool m_currentDataItemEncoded; 165 166 const char * m_szFileName; 167 168 std::string m_currentMetaDataName; 169 UT_uint32 m_currentRevisionId; 170 time_t m_currentRevisionTime; 171 UT_uint32 m_currentRevisionVersion; 172 173 // For reading RDF triples 174 std::string m_rdfSubject; 175 std::string m_rdfPredicate; 176 std::string m_rdfXSDType; 177 int m_rdfObjectType; 178 PD_DocumentRDFMutationHandle m_rdfMutation; 179 180 typedef std::map<std::string, UT_sint32> token_map_t; 181 token_map_t m_tokens; 182 183 private: 184 UT_uint32 m_iCharCount; 185 bool m_bStripLeading; 186 protected: _data_CharCount()187 UT_uint32 _data_CharCount () const { return m_iCharCount; } 188 void _data_NewBlock (); 189 }; 190 191 #endif /* IE_IMP_XML_H */ 192