1 /*
2  * Copyright 2005-2007 Gerald Schmidt.
3  *
4  * This file is part of Xml Copy Editor.
5  *
6  * Xml Copy Editor is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * Xml Copy Editor is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with Xml Copy Editor; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  */
20 
21 #include "wrapxerces.h"
22 #include "pathresolver.h"
23 #include "xercesnetaccessor.h"
24 
25 #include <xercesc/parsers/XercesDOMParser.hpp>
26 #include <xercesc/sax2/XMLReaderFactory.hpp>
27 #include <xercesc/sax2/SAX2XMLReader.hpp>
28 #include <xercesc/sax2/DefaultHandler.hpp>
29 #include <xercesc/util/XMLUni.hpp>
30 #include <xercesc/framework/MemBufInputSource.hpp>
31 #include <xercesc/framework/LocalFileInputSource.hpp>
32 #include <xercesc/framework/URLInputSource.hpp>
33 #include <sstream>
34 #include <utility>
35 #include <stdexcept>
36 #include <boost/scoped_ptr.hpp>
37 #include <boost/static_assert.hpp>
38 
39 using namespace xercesc;
40 
41 XMLNetAccessor *WrapXerces::mOriginalNetAccessor = NULL;
42 
Init(bool enableNetAccess)43 void WrapXerces::Init ( bool enableNetAccess ) throw()
44 {
45 	static class Initializer
46 	{
47 	public:
48 		Initializer ()
49 		{
50 			XMLPlatformUtils::Initialize();
51 
52 			mOriginalNetAccessor = XMLPlatformUtils::fgNetAccessor;
53 			if ( mOriginalNetAccessor != NULL )
54 			{
55 				mOriginalNetAccessor = new XercesNetAccessor ( mOriginalNetAccessor );
56 			}
57 		}
58 		~Initializer()
59 		{
60 			if ( mOriginalNetAccessor != NULL ) {
61 				XMLPlatformUtils::fgNetAccessor = ( ( XercesNetAccessor * ) mOriginalNetAccessor )->getNetAccessor();
62 				delete mOriginalNetAccessor;
63 			}
64 			XMLPlatformUtils::Terminate();
65 		}
66 	} dummy;
67 
68 	enableNetwork ( enableNetAccess );
69 }
70 
WrapXerces()71 WrapXerces::WrapXerces()
72 {
73 	catalogResolver = new XercesCatalogResolver();
74 }
75 
~WrapXerces()76 WrapXerces::~WrapXerces()
77 {
78 	delete catalogResolver;
79 }
80 
81 // Returns true if the file is valid. But there can be warnings
validate(const wxString & fileName)82 bool WrapXerces::validate ( const wxString& fileName )
83 {
84 	return validateMemory ( NULL, 0, fileName );
85 }
86 
87 // tbd: cache grammar
88 // Returns true if the content is valid. But there can be warnings
validateMemory(const char * utf8Buffer,size_t len,const wxString & fileName,wxThread * thread,bool forceGrammarCheck,const wxChar * messageEOL)89 bool WrapXerces::validateMemory (
90 	const char *utf8Buffer,
91 	size_t len,
92 	const wxString &fileName,
93 	wxThread *thread /*= NULL*/,
94 	bool forceGrammarCheck /*= true*/,
95 	const wxChar *messageEOL /*= _T("[br]")*/)
96 {
97 #if 0 // Test DOM parser
98 	boost::scoped_ptr<XercesDOMParser> parser ( new XercesDOMParser() );
99 
100 	parser->setDoNamespaces(true);
101 	parser->setExitOnFirstFatalError(true);
102 	parser->setValidationConstraintFatal(true);
103 	//parser->setCreateEntityReferenceNodes(true); // Default is true
104 	parser->setValidationScheme(XercesDOMParser::Val_Auto);
105 	parser->setDoSchema(true);
106 	parser->setValidationSchemaFullChecking(true);
107 	parser->setCreateCommentNodes(false);
108 #else
109 	boost::scoped_ptr<SAX2XMLReader> parser ( XMLReaderFactory::createXMLReader() );
110 
111 	parser->setFeature ( XMLUni::fgSAX2CoreNameSpaces, true );
112 	parser->setFeature ( XMLUni::fgSAX2CoreValidation, true );
113 	parser->setFeature ( XMLUni::fgXercesDynamic, !forceGrammarCheck );
114 	parser->setFeature ( XMLUni::fgXercesSchema, true );
115 	parser->setFeature ( XMLUni::fgXercesSchemaFullChecking, true);
116 	parser->setFeature ( XMLUni::fgXercesValidationErrorAsFatal, true );
117 	parser->setFeature ( XMLUni::fgXercesLoadExternalDTD, true );
118 
119 	mySAX2Handler.reset();
120 	parser->setContentHandler ( &mySAX2Handler );
121 #endif
122 
123 	parser->setErrorHandler ( &mySAX2Handler );
124 	//parser->setEntityResolver ( &handler );
125 	parser->setEntityResolver ( catalogResolver );
126 
127 	mySAX2Handler.setEOL ( messageEOL );
128 
129 	boost::scoped_ptr<InputSource> source;
130 	if ( utf8Buffer != NULL )
131 	{
132 		source.reset ( new MemBufInputSource ( (XMLByte*) utf8Buffer, len,
133 				(const XMLCh *) toString ( fileName ).GetData() ) );
134 		wxString utf8 = _T("UTF-8");
135 		source->setEncoding ( (const XMLCh *) toString ( utf8 ).GetData() );
136 	}
137 	else
138 	{
139 		source.reset ( new LocalFileInputSource (
140 				(const XMLCh *) toString ( fileName ).GetData() ) );
141 	}
142 	try
143 	{
144 		if ( thread == NULL )
145 		{
146 			parser->parse ( *source );
147 		}
148 		else if ( !thread->TestDestroy() )
149 		{
150 			XMLPScanToken token;
151 			if ( parser->parseFirst ( *source, token ) )
152 				while ( (!thread->TestDestroy()) && parser->parseNext ( token ) )
153 					continue;
154 		}
155 	}
156 	catch ( XMLException& e )
157 	{
158 		wxString error = toString ( e.getMessage() );
159 		int i = error.Find( _T("Message:") );
160 		if ( i != wxNOT_FOUND )
161 			error = error.substr( i );
162 		mySAX2Handler.getErrors() << error;
163 		return false;
164 	}
165 	catch ( SAXParseException& e )
166 	{
167 		// It has already been processed in mySAX2Handler
168 		return false;
169 	}
170 	catch ( ... )
171 	{
172 		if ( thread != NULL && thread->TestDestroy() )
173 			throw;
174 		mySAX2Handler.getErrors() << _("Unexpected validation error");
175 		return false;
176 	}
177 
178 	return true;//mySAX2Handler.getErrors().empty();
179 }
180 
getMBConv()181 const wxMBConv &WrapXerces::getMBConv()
182 {
183 	switch ( sizeof ( XMLCh ) )
184 	{
185 	case 1:
186 		return wxConvUTF8;
187 	case 2:
188 	{
189 		const static wxMBConvUTF16 conv = wxMBConvUTF16();
190 		return conv;
191 	}
192 	case 4:
193 	{
194 		const static wxMBConvUTF32 conv = wxMBConvUTF32();
195 		return conv;
196 	}
197 	default:
198 #ifdef BOOST_STATIC_ASSERT_MSG
199 		BOOST_STATIC_ASSERT_MSG ( sizeof ( XMLCh ) == 2
200 			, "Xerces-C doesn't use UTF-16 strings any more");
201 #else
202 		BOOST_STATIC_ASSERT ( sizeof ( XMLCh ) == 2 );
203 #endif
204 		break;
205 	}
206 	return wxConvUTF8;
207 }
208 
toString(const XMLCh * str)209 wxString WrapXerces::toString ( const XMLCh *str )
210 {
211 	return wxString ( ( const char * ) str, getMBConv() );
212 }
213 
toString(const wxString & str)214 wxMemoryBuffer WrapXerces::toString ( const wxString &str )
215 {
216 	const static XMLCh chNull = '\0'; // Xerces-C crashes when the file name is NULL. We'd better return something other than NULL.
217 	wxMemoryBuffer buffer ( 0 );
218 	const size_t lenWC = str.length() + 1; // Plus '\0'. This is important. Otherwise we can call wxString::mb_str(getMBConv()).
219 	size_t lenMB = getMBConv().FromWChar ( NULL, 0, str.c_str(), lenWC );
220 	if ( lenMB == wxCONV_FAILED )
221 	{
222 		buffer.AppendData ( &chNull, sizeof chNull );
223 		return buffer;
224 	}
225 
226 	buffer.SetBufSize ( lenMB );
227 	lenMB = getMBConv().FromWChar ( ( char * ) buffer.GetData(), lenMB, str.c_str(), lenWC );
228 	buffer.SetDataLen ( lenMB );
229 
230 	return buffer;
231 }
232 
enableNetwork(bool enable)233 bool WrapXerces::enableNetwork ( bool enable /*= true*/ )
234 {
235 	bool ret = XMLPlatformUtils::fgNetAccessor != NULL;
236 	if ( enable )
237 	{
238 		wxASSERT ( mOriginalNetAccessor != NULL );
239 		XMLPlatformUtils::fgNetAccessor = mOriginalNetAccessor;
240 	}
241 	else
242 	{
243 		XMLPlatformUtils::fgNetAccessor = NULL;
244 	}
245 	return ret;
246 }
247 
logError(const wxString & type,wxLogLevel level,const SAXParseException & e)248 void MySAX2Handler::logError ( const wxString &type, wxLogLevel level,
249 		const SAXParseException& e )
250 {
251 	mErrors << wxString::Format (
252 			_("%s at line %llu, column %llu: %s%s"),
253 			type.c_str(), e.getLineNumber(), e.getColumnNumber(),
254 			WrapXerces::toString ( e.getMessage() ).c_str(), mEOL.c_str() );
255 
256 	// Only save the first error position
257 	BOOST_STATIC_ASSERT ( wxLOG_Error < wxLOG_Warning );
258 	if ( level < mLevel	|| ( level == mLevel && mErrorPosition.first == 1
259 			&& mErrorPosition.second == 1 ) )
260 	{
261 		mErrorPosition.first = e.getLineNumber();
262 		mErrorPosition.second = e.getColumnNumber();
263 		mLevel = level;
264 	}
265 }
266 
resolveEntity(const wxString & publicId,const wxString & systemId,const wxString & fileName)267 InputSource *WrapXerces::resolveEntity (
268 	const wxString &publicId,
269 	const wxString &systemId,
270 	const wxString &fileName
271 )
272 {
273 	XercesCatalogResolver cr;
274 	InputSource *source = cr.resolveEntity
275 			( ( const XMLCh * ) WrapXerces::toString ( publicId ).GetData()
276 			, ( const XMLCh * ) WrapXerces::toString ( systemId ).GetData()
277 			);
278 	if ( source )
279 		return source;
280 
281 	BOOST_STATIC_ASSERT ( sizeof( xmlChar ) == sizeof ( char ) );
282 
283 	// Xerces-C++ can't open a file URL when there are multi-byte characters.
284 	// Let's use the file name instead.
285 	wxString file = PathResolver::run ( systemId, fileName );
286 	if ( wxFileExists ( file ) )
287 		return new LocalFileInputSource (
288 				( const XMLCh * ) WrapXerces::toString ( file ).GetData() );
289 
290 	if (systemId.empty() && publicId.empty())
291 		return NULL;
292 
293 	wxString fileURL = WrapLibxml::FileNameToURL ( fileName );
294 	return new URLInputSource
295 		( ( const XMLCh * ) WrapXerces::toString ( fileURL ).GetData()
296 		, ( const XMLCh * ) WrapXerces::toString ( systemId ).GetData()
297 		, ( const XMLCh * ) WrapXerces::toString ( publicId ).GetData()
298 		);
299 }
300 
getFirstElementChild(const DOMElement & element)301 DOMElement *WrapXerces::getFirstElementChild ( const DOMElement &element )
302 {
303 #if _XERCES_VERSION >= 30100
304 	return element.getFirstElementChild();
305 #else
306 	// Copied from Xerces-C
307 	DOMNode* n = element.getFirstChild();
308 	while ( n )
309 	{
310 		switch ( n->getNodeType() )
311 		{
312 		case DOMNode::ELEMENT_NODE:
313 			return ( DOMElement * ) n;
314 
315 		case DOMNode::ENTITY_REFERENCE_NODE:
316 		{
317 			DOMElement* e = getFirstElementChild ( n );
318 			if ( e )
319 				return e;
320 			break;
321 		}
322 
323 		default:
324 			break;
325 		}
326 		n = n->getNextSibling();
327 	}
328 	return NULL;
329 #endif
330 }
331 
getFirstElementChild(const DOMNode * n)332 DOMElement *WrapXerces::getFirstElementChild ( const DOMNode *n )
333 {
334 	// Copied from Xerces-C
335 	const DOMNode *top = n;
336 	while ( n )
337 	{
338 		if ( n->getNodeType() == DOMNode::ELEMENT_NODE )
339 			return ( DOMElement * ) n;
340 
341 		DOMNode *next = n->getFirstChild();
342 		while ( !next )
343 		{
344 			if (top == n)
345 				break;
346 
347 			next = n->getNextSibling();
348 			if ( !next )
349 			{
350 				n = n->getParentNode();
351 				if ( top == n || !n )
352 					return NULL;
353 			}
354 		}
355 		n = next;
356 	}
357 	return NULL;
358 }
359 
getNextElementSibling(const DOMElement & element)360 DOMElement *WrapXerces::getNextElementSibling (
361 	const DOMElement &element )
362 {
363 #if _XERCES_VERSION >= 30100
364 	return element.getNextElementSibling();
365 #else
366 	// Copied from Xerces-C
367 	DOMNode *n = getNextLogicalSibling ( &element );
368 	while ( n ) {
369 		switch ( n->getNodeType() )
370 		{
371 		case DOMNode::ELEMENT_NODE:
372 			return ( DOMElement * ) n;
373 
374 		case DOMNode::ENTITY_REFERENCE_NODE:
375 		{
376 			DOMElement* e = getFirstElementChild ( n );
377 			if ( e )
378 				return e;
379 			break;
380 		}
381 		default:
382 			break;
383 		}
384 		n = getNextLogicalSibling ( n );
385 	}
386 	return NULL;
387 #endif
388 }
389 
getNextLogicalSibling(const DOMNode * n)390 DOMNode *WrapXerces::getNextLogicalSibling (
391 	const DOMNode* n )
392 {
393 	// Copied from Xerces-C
394 	DOMNode* next = n->getNextSibling();
395 	// If "n" has no following sibling and its parent is an entity reference node we
396 	// need to continue the search through the following siblings of the entity
397 	// reference as these are logically siblings of the given node.
398 	if ( !next ) {
399 		DOMNode* parent = n->getParentNode();
400 		while ( parent
401 				&& parent->getNodeType() == DOMNode::ENTITY_REFERENCE_NODE )
402 		{
403 			next = parent->getNextSibling();
404 			if ( next )
405 				break;
406 
407 			parent = parent->getParentNode();
408 		}
409 	}
410 	return next;
411 }
412