1 /*
2 * Copyright 2005-2007 Gerald Schmidt.
3 *
4 * This file is part of Xml Copy Editor.
5 *
6 * Xml Copy Editor is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * Xml Copy Editor is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with Xml Copy Editor; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "wrapxerces.h"
22 #include "pathresolver.h"
23 #include "xercesnetaccessor.h"
24
25 #include <xercesc/parsers/XercesDOMParser.hpp>
26 #include <xercesc/sax2/XMLReaderFactory.hpp>
27 #include <xercesc/sax2/SAX2XMLReader.hpp>
28 #include <xercesc/sax2/DefaultHandler.hpp>
29 #include <xercesc/util/XMLUni.hpp>
30 #include <xercesc/framework/MemBufInputSource.hpp>
31 #include <xercesc/framework/LocalFileInputSource.hpp>
32 #include <xercesc/framework/URLInputSource.hpp>
33 #include <sstream>
34 #include <utility>
35 #include <stdexcept>
36 #include <boost/scoped_ptr.hpp>
37 #include <boost/static_assert.hpp>
38
39 using namespace xercesc;
40
41 XMLNetAccessor *WrapXerces::mOriginalNetAccessor = NULL;
42
Init(bool enableNetAccess)43 void WrapXerces::Init ( bool enableNetAccess ) throw()
44 {
45 static class Initializer
46 {
47 public:
48 Initializer ()
49 {
50 XMLPlatformUtils::Initialize();
51
52 mOriginalNetAccessor = XMLPlatformUtils::fgNetAccessor;
53 if ( mOriginalNetAccessor != NULL )
54 {
55 mOriginalNetAccessor = new XercesNetAccessor ( mOriginalNetAccessor );
56 }
57 }
58 ~Initializer()
59 {
60 if ( mOriginalNetAccessor != NULL ) {
61 XMLPlatformUtils::fgNetAccessor = ( ( XercesNetAccessor * ) mOriginalNetAccessor )->getNetAccessor();
62 delete mOriginalNetAccessor;
63 }
64 XMLPlatformUtils::Terminate();
65 }
66 } dummy;
67
68 enableNetwork ( enableNetAccess );
69 }
70
WrapXerces()71 WrapXerces::WrapXerces()
72 {
73 catalogResolver = new XercesCatalogResolver();
74 }
75
~WrapXerces()76 WrapXerces::~WrapXerces()
77 {
78 delete catalogResolver;
79 }
80
81 // Returns true if the file is valid. But there can be warnings
validate(const wxString & fileName)82 bool WrapXerces::validate ( const wxString& fileName )
83 {
84 return validateMemory ( NULL, 0, fileName );
85 }
86
87 // tbd: cache grammar
88 // Returns true if the content is valid. But there can be warnings
validateMemory(const char * utf8Buffer,size_t len,const wxString & fileName,wxThread * thread,bool forceGrammarCheck,const wxChar * messageEOL)89 bool WrapXerces::validateMemory (
90 const char *utf8Buffer,
91 size_t len,
92 const wxString &fileName,
93 wxThread *thread /*= NULL*/,
94 bool forceGrammarCheck /*= true*/,
95 const wxChar *messageEOL /*= _T("[br]")*/)
96 {
97 #if 0 // Test DOM parser
98 boost::scoped_ptr<XercesDOMParser> parser ( new XercesDOMParser() );
99
100 parser->setDoNamespaces(true);
101 parser->setExitOnFirstFatalError(true);
102 parser->setValidationConstraintFatal(true);
103 //parser->setCreateEntityReferenceNodes(true); // Default is true
104 parser->setValidationScheme(XercesDOMParser::Val_Auto);
105 parser->setDoSchema(true);
106 parser->setValidationSchemaFullChecking(true);
107 parser->setCreateCommentNodes(false);
108 #else
109 boost::scoped_ptr<SAX2XMLReader> parser ( XMLReaderFactory::createXMLReader() );
110
111 parser->setFeature ( XMLUni::fgSAX2CoreNameSpaces, true );
112 parser->setFeature ( XMLUni::fgSAX2CoreValidation, true );
113 parser->setFeature ( XMLUni::fgXercesDynamic, !forceGrammarCheck );
114 parser->setFeature ( XMLUni::fgXercesSchema, true );
115 parser->setFeature ( XMLUni::fgXercesSchemaFullChecking, true);
116 parser->setFeature ( XMLUni::fgXercesValidationErrorAsFatal, true );
117 parser->setFeature ( XMLUni::fgXercesLoadExternalDTD, true );
118
119 mySAX2Handler.reset();
120 parser->setContentHandler ( &mySAX2Handler );
121 #endif
122
123 parser->setErrorHandler ( &mySAX2Handler );
124 //parser->setEntityResolver ( &handler );
125 parser->setEntityResolver ( catalogResolver );
126
127 mySAX2Handler.setEOL ( messageEOL );
128
129 boost::scoped_ptr<InputSource> source;
130 if ( utf8Buffer != NULL )
131 {
132 source.reset ( new MemBufInputSource ( (XMLByte*) utf8Buffer, len,
133 (const XMLCh *) toString ( fileName ).GetData() ) );
134 wxString utf8 = _T("UTF-8");
135 source->setEncoding ( (const XMLCh *) toString ( utf8 ).GetData() );
136 }
137 else
138 {
139 source.reset ( new LocalFileInputSource (
140 (const XMLCh *) toString ( fileName ).GetData() ) );
141 }
142 try
143 {
144 if ( thread == NULL )
145 {
146 parser->parse ( *source );
147 }
148 else if ( !thread->TestDestroy() )
149 {
150 XMLPScanToken token;
151 if ( parser->parseFirst ( *source, token ) )
152 while ( (!thread->TestDestroy()) && parser->parseNext ( token ) )
153 continue;
154 }
155 }
156 catch ( XMLException& e )
157 {
158 wxString error = toString ( e.getMessage() );
159 int i = error.Find( _T("Message:") );
160 if ( i != wxNOT_FOUND )
161 error = error.substr( i );
162 mySAX2Handler.getErrors() << error;
163 return false;
164 }
165 catch ( SAXParseException& e )
166 {
167 // It has already been processed in mySAX2Handler
168 return false;
169 }
170 catch ( ... )
171 {
172 if ( thread != NULL && thread->TestDestroy() )
173 throw;
174 mySAX2Handler.getErrors() << _("Unexpected validation error");
175 return false;
176 }
177
178 return true;//mySAX2Handler.getErrors().empty();
179 }
180
getMBConv()181 const wxMBConv &WrapXerces::getMBConv()
182 {
183 switch ( sizeof ( XMLCh ) )
184 {
185 case 1:
186 return wxConvUTF8;
187 case 2:
188 {
189 const static wxMBConvUTF16 conv = wxMBConvUTF16();
190 return conv;
191 }
192 case 4:
193 {
194 const static wxMBConvUTF32 conv = wxMBConvUTF32();
195 return conv;
196 }
197 default:
198 #ifdef BOOST_STATIC_ASSERT_MSG
199 BOOST_STATIC_ASSERT_MSG ( sizeof ( XMLCh ) == 2
200 , "Xerces-C doesn't use UTF-16 strings any more");
201 #else
202 BOOST_STATIC_ASSERT ( sizeof ( XMLCh ) == 2 );
203 #endif
204 break;
205 }
206 return wxConvUTF8;
207 }
208
toString(const XMLCh * str)209 wxString WrapXerces::toString ( const XMLCh *str )
210 {
211 return wxString ( ( const char * ) str, getMBConv() );
212 }
213
toString(const wxString & str)214 wxMemoryBuffer WrapXerces::toString ( const wxString &str )
215 {
216 const static XMLCh chNull = '\0'; // Xerces-C crashes when the file name is NULL. We'd better return something other than NULL.
217 wxMemoryBuffer buffer ( 0 );
218 const size_t lenWC = str.length() + 1; // Plus '\0'. This is important. Otherwise we can call wxString::mb_str(getMBConv()).
219 size_t lenMB = getMBConv().FromWChar ( NULL, 0, str.c_str(), lenWC );
220 if ( lenMB == wxCONV_FAILED )
221 {
222 buffer.AppendData ( &chNull, sizeof chNull );
223 return buffer;
224 }
225
226 buffer.SetBufSize ( lenMB );
227 lenMB = getMBConv().FromWChar ( ( char * ) buffer.GetData(), lenMB, str.c_str(), lenWC );
228 buffer.SetDataLen ( lenMB );
229
230 return buffer;
231 }
232
enableNetwork(bool enable)233 bool WrapXerces::enableNetwork ( bool enable /*= true*/ )
234 {
235 bool ret = XMLPlatformUtils::fgNetAccessor != NULL;
236 if ( enable )
237 {
238 wxASSERT ( mOriginalNetAccessor != NULL );
239 XMLPlatformUtils::fgNetAccessor = mOriginalNetAccessor;
240 }
241 else
242 {
243 XMLPlatformUtils::fgNetAccessor = NULL;
244 }
245 return ret;
246 }
247
logError(const wxString & type,wxLogLevel level,const SAXParseException & e)248 void MySAX2Handler::logError ( const wxString &type, wxLogLevel level,
249 const SAXParseException& e )
250 {
251 mErrors << wxString::Format (
252 _("%s at line %llu, column %llu: %s%s"),
253 type.c_str(), e.getLineNumber(), e.getColumnNumber(),
254 WrapXerces::toString ( e.getMessage() ).c_str(), mEOL.c_str() );
255
256 // Only save the first error position
257 BOOST_STATIC_ASSERT ( wxLOG_Error < wxLOG_Warning );
258 if ( level < mLevel || ( level == mLevel && mErrorPosition.first == 1
259 && mErrorPosition.second == 1 ) )
260 {
261 mErrorPosition.first = e.getLineNumber();
262 mErrorPosition.second = e.getColumnNumber();
263 mLevel = level;
264 }
265 }
266
resolveEntity(const wxString & publicId,const wxString & systemId,const wxString & fileName)267 InputSource *WrapXerces::resolveEntity (
268 const wxString &publicId,
269 const wxString &systemId,
270 const wxString &fileName
271 )
272 {
273 XercesCatalogResolver cr;
274 InputSource *source = cr.resolveEntity
275 ( ( const XMLCh * ) WrapXerces::toString ( publicId ).GetData()
276 , ( const XMLCh * ) WrapXerces::toString ( systemId ).GetData()
277 );
278 if ( source )
279 return source;
280
281 BOOST_STATIC_ASSERT ( sizeof( xmlChar ) == sizeof ( char ) );
282
283 // Xerces-C++ can't open a file URL when there are multi-byte characters.
284 // Let's use the file name instead.
285 wxString file = PathResolver::run ( systemId, fileName );
286 if ( wxFileExists ( file ) )
287 return new LocalFileInputSource (
288 ( const XMLCh * ) WrapXerces::toString ( file ).GetData() );
289
290 if (systemId.empty() && publicId.empty())
291 return NULL;
292
293 wxString fileURL = WrapLibxml::FileNameToURL ( fileName );
294 return new URLInputSource
295 ( ( const XMLCh * ) WrapXerces::toString ( fileURL ).GetData()
296 , ( const XMLCh * ) WrapXerces::toString ( systemId ).GetData()
297 , ( const XMLCh * ) WrapXerces::toString ( publicId ).GetData()
298 );
299 }
300
getFirstElementChild(const DOMElement & element)301 DOMElement *WrapXerces::getFirstElementChild ( const DOMElement &element )
302 {
303 #if _XERCES_VERSION >= 30100
304 return element.getFirstElementChild();
305 #else
306 // Copied from Xerces-C
307 DOMNode* n = element.getFirstChild();
308 while ( n )
309 {
310 switch ( n->getNodeType() )
311 {
312 case DOMNode::ELEMENT_NODE:
313 return ( DOMElement * ) n;
314
315 case DOMNode::ENTITY_REFERENCE_NODE:
316 {
317 DOMElement* e = getFirstElementChild ( n );
318 if ( e )
319 return e;
320 break;
321 }
322
323 default:
324 break;
325 }
326 n = n->getNextSibling();
327 }
328 return NULL;
329 #endif
330 }
331
getFirstElementChild(const DOMNode * n)332 DOMElement *WrapXerces::getFirstElementChild ( const DOMNode *n )
333 {
334 // Copied from Xerces-C
335 const DOMNode *top = n;
336 while ( n )
337 {
338 if ( n->getNodeType() == DOMNode::ELEMENT_NODE )
339 return ( DOMElement * ) n;
340
341 DOMNode *next = n->getFirstChild();
342 while ( !next )
343 {
344 if (top == n)
345 break;
346
347 next = n->getNextSibling();
348 if ( !next )
349 {
350 n = n->getParentNode();
351 if ( top == n || !n )
352 return NULL;
353 }
354 }
355 n = next;
356 }
357 return NULL;
358 }
359
getNextElementSibling(const DOMElement & element)360 DOMElement *WrapXerces::getNextElementSibling (
361 const DOMElement &element )
362 {
363 #if _XERCES_VERSION >= 30100
364 return element.getNextElementSibling();
365 #else
366 // Copied from Xerces-C
367 DOMNode *n = getNextLogicalSibling ( &element );
368 while ( n ) {
369 switch ( n->getNodeType() )
370 {
371 case DOMNode::ELEMENT_NODE:
372 return ( DOMElement * ) n;
373
374 case DOMNode::ENTITY_REFERENCE_NODE:
375 {
376 DOMElement* e = getFirstElementChild ( n );
377 if ( e )
378 return e;
379 break;
380 }
381 default:
382 break;
383 }
384 n = getNextLogicalSibling ( n );
385 }
386 return NULL;
387 #endif
388 }
389
getNextLogicalSibling(const DOMNode * n)390 DOMNode *WrapXerces::getNextLogicalSibling (
391 const DOMNode* n )
392 {
393 // Copied from Xerces-C
394 DOMNode* next = n->getNextSibling();
395 // If "n" has no following sibling and its parent is an entity reference node we
396 // need to continue the search through the following siblings of the entity
397 // reference as these are logically siblings of the given node.
398 if ( !next ) {
399 DOMNode* parent = n->getParentNode();
400 while ( parent
401 && parent->getNodeType() == DOMNode::ENTITY_REFERENCE_NODE )
402 {
403 next = parent->getNextSibling();
404 if ( next )
405 break;
406
407 parent = parent->getParentNode();
408 }
409 }
410 return next;
411 }
412