1 // =================================================================================================
2 // Copyright 2005-2008 Adobe Systems Incorporated
3 // All Rights Reserved.
4 //
5 // NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
6 // of the Adobe license agreement accompanying it.
7 // =================================================================================================
8 
9 #include "XMP_Environment.h"	// ! Must be the first #include!
10 #include "XMPCore_Impl.hpp"
11 
12 #include "ExpatAdapter.hpp"
13 #include "XMPMeta.hpp"
14 
15 #include "expat.h"
16 
17 #include <string.h>
18 
19 using namespace std;
20 
21 #if XMP_WinBuild
22 	#pragma warning ( disable : 4996 )	// '...' was declared deprecated
23 #endif
24 
25 // *** Set memory handlers.
26 
27 #ifndef DumpXMLParseEvents
28 	#define DumpXMLParseEvents	0
29 #endif
30 
31 #define FullNameSeparator	'@'
32 
33 // =================================================================================================
34 
35 static void StartNamespaceDeclHandler    ( void * userData, XMP_StringPtr prefix, XMP_StringPtr uri );
36 static void EndNamespaceDeclHandler      ( void * userData, XMP_StringPtr prefix );
37 
38 static void StartElementHandler          ( void * userData, XMP_StringPtr name, XMP_StringPtr* attrs );
39 static void EndElementHandler            ( void * userData, XMP_StringPtr name );
40 
41 static void CharacterDataHandler         ( void * userData, XMP_StringPtr cData, int len );
42 static void StartCdataSectionHandler     ( void * userData );
43 static void EndCdataSectionHandler       ( void * userData );
44 
45 static void ProcessingInstructionHandler ( void * userData, XMP_StringPtr target, XMP_StringPtr data );
46 static void CommentHandler               ( void * userData, XMP_StringPtr comment );
47 
48 #if BanAllEntityUsage
49 
50 	// For now we do this by banning DOCTYPE entirely. This is easy and consistent with what is
51 	// available in recent Java XML parsers. Another, somewhat less drastic, approach would be to
52 	// ban all entity declarations. We can't allow declarations and ban references, Expat does not
53 	// call the SkippedEntityHandler for references in attribute values.
54 
55 	// ! Standard entities (&amp;, &lt;, &gt;, &quot;, &apos;, and numeric character references) are
56 	// ! not banned. Expat handles them transparently no matter what.
57 
58 	static void StartDoctypeDeclHandler ( void * userData, XMP_StringPtr doctypeName,
59 										  XMP_StringPtr sysid, XMP_StringPtr pubid, int has_internal_subset );
60 
61 #endif
62 
63 // =================================================================================================
64 
XMP_NewExpatAdapter()65 extern "C" ExpatAdapter * XMP_NewExpatAdapter()
66 {
67 	return new ExpatAdapter;
68 }	// XMP_NewExpatAdapter
69 
70 // =================================================================================================
71 
ExpatAdapter()72 ExpatAdapter::ExpatAdapter() : parser(0)
73 {
74 
75 	#if XMP_DebugBuild
76 		this->elemNesting = 0;
77 		#if DumpXMLParseEvents
78 			if ( this->parseLog == 0 ) this->parseLog = stdout;
79 		#endif
80 	#endif
81 
82 	this->parser = XML_ParserCreateNS ( 0, FullNameSeparator );
83 	if ( this->parser == 0 ) XMP_Throw ( "Failure creating Expat parser", kXMPErr_ExternalFailure );
84 
85 	XML_SetUserData ( this->parser, this );
86 
87 	XML_SetNamespaceDeclHandler ( this->parser, StartNamespaceDeclHandler, EndNamespaceDeclHandler );
88 	XML_SetElementHandler ( this->parser, StartElementHandler, EndElementHandler );
89 
90 	XML_SetCharacterDataHandler ( this->parser, CharacterDataHandler );
91 	XML_SetCdataSectionHandler ( this->parser, StartCdataSectionHandler, EndCdataSectionHandler );
92 
93 	XML_SetProcessingInstructionHandler ( this->parser, ProcessingInstructionHandler );
94 	XML_SetCommentHandler ( this->parser, CommentHandler );
95 
96 	#if BanAllEntityUsage
97 		XML_SetStartDoctypeDeclHandler ( this->parser, StartDoctypeDeclHandler );
98 		isAborted = false;
99 	#endif
100 
101 	this->parseStack.push_back ( &this->tree );	// Push the XML root node.
102 
103 }	// ExpatAdapter::ExpatAdapter
104 
105 // =================================================================================================
106 
~ExpatAdapter()107 ExpatAdapter::~ExpatAdapter()
108 {
109 
110 	if ( this->parser != 0 ) XML_ParserFree ( this->parser );
111 	this->parser = 0;
112 
113 }	// ExpatAdapter::~ExpatAdapter
114 
115 // =================================================================================================
116 
117 #if XMP_DebugBuild
118 	static XMP_VarString sExpatMessage;
119 #endif
120 
121 static const char * kOneSpace = " ";
122 
ParseBuffer(const void * buffer,size_t length,bool last)123 void ExpatAdapter::ParseBuffer ( const void * buffer, size_t length, bool last /* = true */ )
124 {
125 	enum XML_Status status;
126 
127 	if ( length == 0 ) {	// Expat does not like empty buffers.
128 		if ( ! last ) return;
129 		buffer = kOneSpace;
130 		length = 1;
131 	}
132 
133 	status = XML_Parse ( this->parser, (const char *)buffer, length, last );
134 
135 	#if BanAllEntityUsage
136 		if ( this->isAborted ) XMP_Throw ( "DOCTYPE is not allowed", kXMPErr_BadXML );
137 	#endif
138 
139 	if ( status != XML_STATUS_OK ) {
140 
141 		XMP_StringPtr errMsg = "XML parsing failure";
142 
143 		#if 0	// XMP_DebugBuild	// Disable for now to make test output uniform. Restore later with thread safety.
144 
145 			// *** This is a good candidate for a callback error notification mechanism.
146 			// *** This code is not thread safe, the sExpatMessage isn't locked. But that's OK for debug usage.
147 
148 			enum XML_Error expatErr = XML_GetErrorCode ( this->parser );
149 			const char *   expatMsg = XML_ErrorString ( expatErr );
150 			int errLine = XML_GetCurrentLineNumber ( this->parser );
151 
152 			char msgBuffer[1000];
153 			// AUDIT: Use of sizeof(msgBuffer) for snprintf length is safe.
154 			snprintf ( msgBuffer, sizeof(msgBuffer), "# Expat error %d at line %d, \"%s\"", expatErr, errLine, expatMsg );
155 			sExpatMessage = msgBuffer;
156 			errMsg = sExpatMessage.c_str();
157 
158 			#if  DumpXMLParseEvents
159 				if ( this->parseLog != 0 ) fprintf ( this->parseLog, "%s\n", errMsg, expatErr, errLine, expatMsg );
160 			#endif
161 
162 		#endif
163 
164 		XMP_Throw ( errMsg, kXMPErr_BadXML );
165 
166 	}
167 
168 }	// ExpatAdapter::ParseBuffer
169 
170 // =================================================================================================
171 // =================================================================================================
172 
173 #if XMP_DebugBuild & DumpXMLParseEvents
174 
PrintIndent(FILE * file,size_t count)175 	static inline void PrintIndent ( FILE * file, size_t count )
176 	{
177 		for ( ; count > 0; --count ) fprintf ( file, "  " );
178 	}
179 
180 #endif
181 
182 // =================================================================================================
183 
SetQualName(XMP_StringPtr fullName,XML_Node * node)184 static void SetQualName ( XMP_StringPtr fullName, XML_Node * node )
185 {
186 	// Expat delivers the full name as a catenation of namespace URI, separator, and local name.
187 
188 	// As a compatibility hack, an "about" or "ID" attribute of an rdf:Description element is
189 	// changed to "rdf:about" or rdf:ID. Easier done here than in the RDF recognizer.
190 
191 	// As a bug fix hack, change a URI of "http://purl.org/dc/1.1/" to ""http://purl.org/dc/elements/1.1/.
192 	// Early versions of Flash that put XMP in SWF used a bad URI for the dc: namespace.
193 
194 	// ! This code presumes the RDF namespace prefix is "rdf".
195 
196 	size_t sepPos = strlen(fullName);
197 	for ( --sepPos; sepPos > 0; --sepPos ) {
198 		if ( fullName[sepPos] == FullNameSeparator ) break;
199 	}
200 
201 	if ( fullName[sepPos] == FullNameSeparator ) {
202 
203 		XMP_StringPtr prefix;
204 		XMP_StringLen prefixLen;
205 		XMP_StringPtr localPart = fullName + sepPos + 1;
206 
207 		node->ns.assign ( fullName, sepPos );
208 		if ( node->ns == "http://purl.org/dc/1.1/" ) node->ns = "http://purl.org/dc/elements/1.1/";
209 
210 		bool found = XMPMeta::GetNamespacePrefix ( node->ns.c_str(), &prefix, &prefixLen );
211 		if ( ! found ) XMP_Throw ( "Unknown URI in Expat full name", kXMPErr_ExternalFailure );
212 		node->nsPrefixLen = prefixLen;	// ! Includes the ':'.
213 
214 		node->name = prefix;
215 		node->name += localPart;
216 
217 	} else {
218 
219 		node->name = fullName;	// The name is not in a namespace.
220 
221 		if ( node->parent->name == "rdf:Description" ) {
222 			if ( node->name == "about" ) {
223 				node->ns   = kXMP_NS_RDF;
224 				node->name = "rdf:about";
225 				node->nsPrefixLen = 4;	// ! Include the ':'.
226 			} else if ( node->name == "ID" ) {
227 				node->ns   = kXMP_NS_RDF;
228 				node->name = "rdf:ID";
229 				node->nsPrefixLen = 4;	// ! Include the ':'.
230 			}
231 		}
232 
233 	}
234 
235 }	// SetQualName
236 
237 // =================================================================================================
238 
StartNamespaceDeclHandler(void * userData,XMP_StringPtr prefix,XMP_StringPtr uri)239 static void StartNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix, XMP_StringPtr uri )
240 {
241 	IgnoreParam(userData);
242 
243 	// As a bug fix hack, change a URI of "http://purl.org/dc/1.1/" to ""http://purl.org/dc/elements/1.1/.
244 	// Early versions of Flash that put XMP in SWF used a bad URI for the dc: namespace.
245 
246 	#if XMP_DebugBuild & DumpXMLParseEvents		// Avoid unused variable warning.
247 		ExpatAdapter * thiz = (ExpatAdapter*)userData;
248 	#endif
249 
250 	if ( prefix == 0 ) prefix = "_dflt_";	// Have default namespace.
251 	if ( uri == 0 ) return;	// Ignore, have xmlns:pre="", no URI to register.
252 
253 	#if XMP_DebugBuild & DumpXMLParseEvents
254 		if ( thiz->parseLog != 0 ) {
255 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
256 			fprintf ( thiz->parseLog, "StartNamespace: %s - \"%s\"\n", prefix, uri );
257 		}
258 	#endif
259 
260 	if ( XMP_LitMatch ( uri, "http://purl.org/dc/1.1/" ) ) uri = "http://purl.org/dc/elements/1.1/";
261 	(void) XMPMeta::RegisterNamespace ( uri, prefix, &voidStringPtr, &voidStringLen );
262 
263 }	// StartNamespaceDeclHandler
264 
265 // =================================================================================================
266 
EndNamespaceDeclHandler(void * userData,XMP_StringPtr prefix)267 static void EndNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix )
268 {
269 	IgnoreParam(userData);
270 
271 	#if XMP_DebugBuild & DumpXMLParseEvents		// Avoid unused variable warning.
272 		ExpatAdapter * thiz = (ExpatAdapter*)userData;
273 	#endif
274 
275 	if ( prefix == 0 ) prefix = "_dflt_";	// Have default namespace.
276 
277 	#if XMP_DebugBuild & DumpXMLParseEvents
278 		if ( thiz->parseLog != 0 ) {
279 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
280 			fprintf ( thiz->parseLog, "EndNamespace: %s\n", prefix );
281 		}
282 	#endif
283 
284 	// ! Nothing to do, Expat has done all of the XML processing.
285 
286 }	// EndNamespaceDeclHandler
287 
288 // =================================================================================================
289 
StartElementHandler(void * userData,XMP_StringPtr name,XMP_StringPtr * attrs)290 static void StartElementHandler ( void * userData, XMP_StringPtr name, XMP_StringPtr* attrs )
291 {
292 	XMP_Assert ( attrs != 0 );
293 	ExpatAdapter * thiz = (ExpatAdapter*)userData;
294 
295 	size_t attrCount = 0;
296 	for ( XMP_StringPtr* a = attrs; *a != 0; ++a ) ++attrCount;
297 	if ( (attrCount & 1) != 0 )	XMP_Throw ( "Expat attribute info has odd length", kXMPErr_ExternalFailure );
298 	attrCount = attrCount/2;	// They are name/value pairs.
299 
300 	#if XMP_DebugBuild & DumpXMLParseEvents
301 		if ( thiz->parseLog != 0 ) {
302 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
303 			fprintf ( thiz->parseLog, "StartElement: %s, %d attrs", name, attrCount );
304 			for ( XMP_StringPtr* attr = attrs; *attr != 0; attr += 2 ) {
305 				XMP_StringPtr attrName = *attr;
306 				XMP_StringPtr attrValue = *(attr+1);
307 				fprintf ( thiz->parseLog, ", %s = \"%s\"", attrName, attrValue );
308 			}
309 			fprintf ( thiz->parseLog, "\n" );
310 		}
311 	#endif
312 
313 	XML_Node * parentNode = thiz->parseStack.back();
314 	XML_Node * elemNode   = new XML_Node ( parentNode, "", kElemNode );
315 
316 	SetQualName ( name, elemNode );
317 
318 	for ( XMP_StringPtr* attr = attrs; *attr != 0; attr += 2 ) {
319 
320 		XMP_StringPtr attrName = *attr;
321 		XMP_StringPtr attrValue = *(attr+1);
322 		XML_Node * attrNode = new XML_Node ( elemNode, "", kAttrNode );
323 
324 		SetQualName ( attrName, attrNode );
325 		attrNode->value = attrValue;
326 		if ( attrNode->name == "xml:lang" ) NormalizeLangValue ( &attrNode->value );
327 		elemNode->attrs.push_back ( attrNode );
328 
329 	}
330 
331 	parentNode->content.push_back ( elemNode );
332 	thiz->parseStack.push_back ( elemNode );
333 
334 	if ( elemNode->name == "rdf:RDF" ) {
335 		thiz->rootNode = elemNode;
336 		++thiz->rootCount;
337 	}
338 	#if XMP_DebugBuild
339 		++thiz->elemNesting;
340 	#endif
341 
342 }	// StartElementHandler
343 
344 // =================================================================================================
345 
EndElementHandler(void * userData,XMP_StringPtr name)346 static void EndElementHandler ( void * userData, XMP_StringPtr name )
347 {
348 	IgnoreParam(name);
349 
350 	ExpatAdapter * thiz = (ExpatAdapter*)userData;
351 
352 	#if XMP_DebugBuild
353 		--thiz->elemNesting;
354 	#endif
355 	(void) thiz->parseStack.pop_back();
356 
357 	#if XMP_DebugBuild & DumpXMLParseEvents
358 		if ( thiz->parseLog != 0 ) {
359 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
360 			fprintf ( thiz->parseLog, "EndElement: %s\n", name );
361 		}
362 	#endif
363 
364 }	// EndElementHandler
365 
366 // =================================================================================================
367 
CharacterDataHandler(void * userData,XMP_StringPtr cData,int len)368 static void CharacterDataHandler ( void * userData, XMP_StringPtr cData, int len )
369 {
370 	ExpatAdapter * thiz = (ExpatAdapter*)userData;
371 
372 	if ( (cData == 0) || (len == 0) ) { cData = ""; len = 0; }
373 
374 	#if XMP_DebugBuild & DumpXMLParseEvents
375 		if ( thiz->parseLog != 0 ) {
376 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
377 			fprintf ( thiz->parseLog, "CharContent: \"" );
378 			for ( int i = 0; i < len; ++i ) fprintf ( thiz->parseLog, "%c", cData[i] );
379 			fprintf ( thiz->parseLog, "\"\n" );
380 		}
381 	#endif
382 
383 	XML_Node * parentNode = thiz->parseStack.back();
384 	XML_Node * cDataNode  = new XML_Node ( parentNode, "", kCDataNode );
385 
386 	cDataNode->value.assign ( cData, len );
387 	parentNode->content.push_back ( cDataNode );
388 
389 }	// CharacterDataHandler
390 
391 // =================================================================================================
392 
StartCdataSectionHandler(void * userData)393 static void StartCdataSectionHandler ( void * userData )
394 {
395 	IgnoreParam(userData);
396 
397 	#if XMP_DebugBuild & DumpXMLParseEvents		// Avoid unused variable warning.
398 		ExpatAdapter * thiz = (ExpatAdapter*)userData;
399 	#endif
400 
401 	#if XMP_DebugBuild & DumpXMLParseEvents
402 		if ( thiz->parseLog != 0 ) {
403 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
404 			fprintf ( thiz->parseLog, "StartCDATA\n" );
405 		}
406 	#endif
407 
408 	// *** Since markup isn't recognized inside CDATA, this affects XMP's double escaping.
409 
410 }	// StartCdataSectionHandler
411 
412 // =================================================================================================
413 
EndCdataSectionHandler(void * userData)414 static void EndCdataSectionHandler ( void * userData )
415 {
416 	IgnoreParam(userData);
417 
418 	#if XMP_DebugBuild & DumpXMLParseEvents		// Avoid unused variable warning.
419 		ExpatAdapter * thiz = (ExpatAdapter*)userData;
420 	#endif
421 
422 	#if XMP_DebugBuild & DumpXMLParseEvents
423 		if ( thiz->parseLog != 0 ) {
424 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
425 			fprintf ( thiz->parseLog, "EndCDATA\n" );
426 		}
427 	#endif
428 
429 }	// EndCdataSectionHandler
430 
431 // =================================================================================================
432 
ProcessingInstructionHandler(void * userData,XMP_StringPtr target,XMP_StringPtr data)433 static void ProcessingInstructionHandler ( void * userData, XMP_StringPtr target, XMP_StringPtr data )
434 {
435 	XMP_Assert ( target != 0 );
436 	ExpatAdapter * thiz = (ExpatAdapter*)userData;
437 
438 	if ( ! XMP_LitMatch ( target, "xpacket" ) ) return;	// Ignore all PIs except the XMP packet wrapper.
439 	if ( data == 0 ) data = "";
440 
441 	#if XMP_DebugBuild & DumpXMLParseEvents
442 		if ( thiz->parseLog != 0 ) {
443 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
444 			fprintf ( thiz->parseLog, "PI: %s - \"%s\"\n", target, data );
445 		}
446 	#endif
447 
448 	XML_Node * parentNode = thiz->parseStack.back();
449 	XML_Node * piNode  = new XML_Node ( parentNode, target, kPINode );
450 
451 	piNode->value.assign ( data );
452 	parentNode->content.push_back ( piNode );
453 
454 }	// ProcessingInstructionHandler
455 
456 // =================================================================================================
457 
CommentHandler(void * userData,XMP_StringPtr comment)458 static void CommentHandler ( void * userData, XMP_StringPtr comment )
459 {
460 	IgnoreParam(userData);
461 
462 	#if XMP_DebugBuild & DumpXMLParseEvents		// Avoid unused variable warning.
463 		ExpatAdapter * thiz = (ExpatAdapter*)userData;
464 	#endif
465 
466 	if ( comment == 0 ) comment = "";
467 
468 	#if XMP_DebugBuild & DumpXMLParseEvents
469 		if ( thiz->parseLog != 0 ) {
470 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
471 			fprintf ( thiz->parseLog, "Comment: \"%s\"\n", comment );
472 		}
473 	#endif
474 
475 	// ! Comments are ignored.
476 
477 }	// CommentHandler
478 
479 // =================================================================================================
480 
481 #if BanAllEntityUsage
StartDoctypeDeclHandler(void * userData,XMP_StringPtr doctypeName,XMP_StringPtr sysid,XMP_StringPtr pubid,int has_internal_subset)482 static void StartDoctypeDeclHandler ( void * userData, XMP_StringPtr doctypeName,
483 									  XMP_StringPtr sysid, XMP_StringPtr pubid, int has_internal_subset )
484 {
485 	IgnoreParam(userData);
486 
487 	ExpatAdapter * thiz = (ExpatAdapter*)userData;
488 
489 	#if XMP_DebugBuild & DumpXMLParseEvents		// Avoid unused variable warning.
490 		if ( thiz->parseLog != 0 ) {
491 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
492 			fprintf ( thiz->parseLog, "DocType: \"%s\"\n", doctypeName );
493 		}
494 	#endif
495 
496 	thiz->isAborted = true;	// ! Can't throw an exception across the plain C Expat frames.
497 	(void) XML_StopParser ( thiz->parser, XML_FALSE /* not resumable */ );
498 
499 }	// StartDoctypeDeclHandler
500 #endif
501 
502 // =================================================================================================
503