1 //* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */
2 
3 /* AbiWord
4  * Copyright (C) 2001 AbiSource, Inc.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301 USA.
20  */
21 
22 
23 #ifndef IE_IMP_XML_H
24 #define IE_IMP_XML_H
25 
26 #include <stdio.h>
27 #include <string>
28 #include <map>
29 
30 #include "ut_xml.h"
31 
32 #include "ut_vector.h"
33 #include "ut_stack.h"
34 #include "ie_imp.h"
35 #include "ut_bytebuf.h"
36 #include "ut_string_class.h"
37 
38 class PD_Document;
39 
40 #include <boost/shared_ptr.hpp>
41 class PD_DocumentRDFMutation;
42 typedef boost::shared_ptr<PD_DocumentRDFMutation> PD_DocumentRDFMutationHandle;
43 
44 
45 struct ABI_EXPORT xmlToIdMapping {
46   const char *m_name;
47   int m_type;
48 };
49 
50 // The importer/reader for reading generic
51 // XML documents. Currently, the following classes derive from this:
52 //
53 // ABW, AWT, GZABW
54 // DBK
55 // WML
56 // XHTML
57 // XSL-FO
58 // KWORD 1 && 2 (soon)
59 
60 class ABI_EXPORT IE_Imp_XML : public IE_Imp, public UT_XML::Listener
61 {
62 public:
63     IE_Imp_XML(PD_Document * pDocument, bool whiteSignificant);
64     virtual ~IE_Imp_XML();
65     virtual UT_Error	importFile(const char * data, UT_uint32 length);
66 	virtual UT_Error    importFile(const UT_ByteBuf * data);
67 
68 	virtual bool		pasteFromBuffer(PD_DocumentRange * pDocRange,
69 										const unsigned char * pData,
70 										UT_uint32 lenData,
71 										const char * szEncoding = 0);
72 
73     /* (Partial) Implementation of UT_XML::Listener
74      *
75      * You *must* override these next two methods:
76      */
77     virtual void startElement (const gchar * name, const gchar ** atts);
78     virtual void endElement (const gchar * name);
79     /*
80      * but you get this one for free:
81      */
82     virtual void charData (const gchar * buffer, int length);
83 
84     /* If you don't wish the XML parser to use the standard/default file handler, you
85      * can provide your own via an implementation of UT_XML::Reader here:
86      */
87 protected:
setReader(UT_XML::Reader * pReader)88     void setReader (UT_XML::Reader * pReader) { m_pReader = pReader; }
89 private:
90     UT_XML::Reader * m_pReader;
91 
92     /* If you wish to use a non-standard parser (e.g., for HTML), then maybe this
93      * is useful...
94      */
95 protected:
setParser(UT_XML * pParser)96     void setParser (UT_XML * pParser) { m_pParser = pParser; }
stopParser(void)97 	void stopParser(void) {if(m_pParser) m_pParser->stop();}
98 private:
99     UT_XML * m_pParser;
100 
101 public:
incOperationCount(void)102     void		    incOperationCount(void) { m_iOperationCount++; }
getOperationCount(void)103     UT_uint32		getOperationCount(void) const { return m_iOperationCount; }
104 
105 protected:
106 
107     virtual UT_Error	_loadFile(GsfInput * input);
108     int             _mapNameToToken (const char * name, xmlToIdMapping * idlist, int len);
109 
110     const gchar* _getXMLPropValue(const gchar *name, const gchar **atts);
111 
112     UT_uint32		_getInlineDepth(void) const;
113     bool			_pushInlineFmt(const gchar ** atts);
114     void			_popInlineFmt(void);
115 
116     typedef enum _parseState { _PS_Init,
117 			       _PS_Doc,
118 			       _PS_Sec,
119 			       _PS_Block,
120 			       _PS_DataSec,
121 			       _PS_DataItem,
122 			       _PS_StyleSec,
123 			       _PS_Style,
124 			       _PS_IgnoredWordsSec,
125 			       _PS_IgnoredWordsItem,
126 			       _PS_ListSec,
127 			       _PS_List,
128 			       _PS_Field,
129 			       _PS_PageSize,
130 			       _PS_MetaData,
131 				   _PS_Meta,
132 				   _PS_RevisionSec,
133 				   _PS_Revision,
134 				   _PS_AuthorSec,
135 				   _PS_Author,
136 				   _PS_HistorySec,
137 				   _PS_Table,
138 				   _PS_Cell,
139 				   _PS_Version,
140 				   _PS_RDFTriple,
141 				   _PS_RDFData,
142     } ParseState;
143 
144  protected:
145 
146     // TODO: make us private, refactor code
147     UT_Error        m_error;
148     ParseState      m_parseState;
149 
150     gchar		m_charDataSeen[4];
151     UT_uint32		m_lenCharDataSeen;
152     UT_uint32		m_lenCharDataExpected;
153     UT_uint32		m_iOperationCount;
154     bool			m_bSeenCR;
155     bool            m_bWhiteSignificant;
156     bool            m_bWasSpace;
157 
158     UT_GenericVector<const gchar*> m_vecInlineFmt;
159     UT_NumberStack		m_nstackFmtStartIndex;
160 
161     UT_ByteBuf		m_currentDataItem;
162     gchar *		m_currentDataItemName;
163     std::string		m_currentDataItemMimeType;
164     bool			m_currentDataItemEncoded;
165 
166 	const char *	m_szFileName;
167 
168 	std::string		m_currentMetaDataName;
169 	UT_uint32       m_currentRevisionId;
170 	time_t          m_currentRevisionTime;
171 	UT_uint32       m_currentRevisionVersion;
172 
173     // For reading RDF triples
174     std::string     m_rdfSubject;
175     std::string     m_rdfPredicate;
176     std::string     m_rdfXSDType;
177     int             m_rdfObjectType;
178     PD_DocumentRDFMutationHandle m_rdfMutation;
179 
180 	typedef std::map<std::string, UT_sint32> token_map_t;
181 	token_map_t m_tokens;
182 
183 private:
184 	UT_uint32	m_iCharCount;
185 	bool		m_bStripLeading;
186 protected:
_data_CharCount()187 	UT_uint32	_data_CharCount () const { return m_iCharCount; }
188 	void		_data_NewBlock ();
189 };
190 
191 #endif /* IE_IMP_XML_H */
192