1 // =================================================================================================
2 // Copyright 2005 Adobe Systems Incorporated
3 // All Rights Reserved.
4 //
5 // NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
6 // of the Adobe license agreement accompanying it.
7 // =================================================================================================
8 
9 #include "public/include/XMP_Environment.h"	// ! Must be the first #include!
10 #include "XMPCore/source/XMPCore_Impl.hpp"
11 
12 #include "source/ExpatAdapter.hpp"
13 #include "XMPCore/source/XMPMeta.hpp"
14 
15 #include "expat.h"
16 #include <string.h>
17 
18 using namespace std;
19 
20 #if XMP_WinBuild
21 	#pragma warning ( disable : 4996 )	// '...' was declared deprecated
22 #endif
23 
24 // *** Set memory handlers.
25 
26 #ifndef DumpXMLParseEvents
27 	#define DumpXMLParseEvents	0
28 #endif
29 
30 #define FullNameSeparator	'@'
31 
32 // =================================================================================================
33 
34 static void StartNamespaceDeclHandler    ( void * userData, XMP_StringPtr prefix, XMP_StringPtr uri );
35 static void EndNamespaceDeclHandler      ( void * userData, XMP_StringPtr prefix );
36 
37 static void StartElementHandler          ( void * userData, XMP_StringPtr name, XMP_StringPtr* attrs );
38 static void EndElementHandler            ( void * userData, XMP_StringPtr name );
39 
40 static void CharacterDataHandler         ( void * userData, XMP_StringPtr cData, int len );
41 static void StartCdataSectionHandler     ( void * userData );
42 static void EndCdataSectionHandler       ( void * userData );
43 
44 static void ProcessingInstructionHandler ( void * userData, XMP_StringPtr target, XMP_StringPtr data );
45 static void CommentHandler               ( void * userData, XMP_StringPtr comment );
46 
47 #if BanAllEntityUsage
48 
49 	// For now we do this by banning DOCTYPE entirely. This is easy and consistent with what is
50 	// available in recent Java XML parsers. Another, somewhat less drastic, approach would be to
51 	// ban all entity declarations. We can't allow declarations and ban references, Expat does not
52 	// call the SkippedEntityHandler for references in attribute values.
53 
54 	// ! Standard entities (&amp;, &lt;, &gt;, &quot;, &apos;, and numeric character references) are
55 	// ! not banned. Expat handles them transparently no matter what.
56 
57 	static void StartDoctypeDeclHandler ( void * userData, XMP_StringPtr doctypeName,
58 										  XMP_StringPtr sysid, XMP_StringPtr pubid, int has_internal_subset );
59 
60 #endif
61 
62 // =================================================================================================
63 
XMP_NewExpatAdapter(bool useGlobalNamespaces)64 extern "C" ExpatAdapter * XMP_NewExpatAdapter ( bool useGlobalNamespaces )
65 {
66 
67 	return new ExpatAdapter ( useGlobalNamespaces );
68 
69 }	// XMP_NewExpatAdapter
70 
71 // =================================================================================================
72 
ExpatAdapter(bool useGlobalNamespaces)73 ExpatAdapter::ExpatAdapter ( bool useGlobalNamespaces ) : parser(0), registeredNamespaces(0)
74 {
75 
76 	#if XMP_DebugBuild
77 		this->elemNesting = 0;
78 		#if DumpXMLParseEvents
79 			if ( this->parseLog == 0 ) this->parseLog = stdout;
80 		#endif
81 	#endif
82 
83 	this->parser = XML_ParserCreateNS ( 0, FullNameSeparator );
84 	if ( this->parser == 0 ) {
85 		XMP_Error error(kXMPErr_NoMemory, "Failure creating Expat parser" );
86 		this->NotifyClient ( kXMPErrSev_ProcessFatal, error );
87 	}else{
88 		if ( useGlobalNamespaces ) {
89 			this->registeredNamespaces = sRegisteredNamespaces;
90 		} else {
91 			this->registeredNamespaces = new XMP_NamespaceTable ( *sRegisteredNamespaces );
92 		}
93 
94 		XML_SetUserData ( this->parser, this );
95 
96 		XML_SetNamespaceDeclHandler ( this->parser, StartNamespaceDeclHandler, EndNamespaceDeclHandler );
97 		XML_SetElementHandler ( this->parser, StartElementHandler, EndElementHandler );
98 
99 		XML_SetCharacterDataHandler ( this->parser, CharacterDataHandler );
100 		XML_SetCdataSectionHandler ( this->parser, StartCdataSectionHandler, EndCdataSectionHandler );
101 
102 		XML_SetProcessingInstructionHandler ( this->parser, ProcessingInstructionHandler );
103 		XML_SetCommentHandler ( this->parser, CommentHandler );
104 
105 		#if BanAllEntityUsage
106 			XML_SetStartDoctypeDeclHandler ( this->parser, StartDoctypeDeclHandler );
107 			isAborted = false;
108 		#endif
109 
110 		this->parseStack.push_back ( &this->tree );	// Push the XML root node.
111 	}
112 }	// ExpatAdapter::ExpatAdapter
113 
114 // =================================================================================================
115 
~ExpatAdapter()116 ExpatAdapter::~ExpatAdapter()
117 {
118 
119 	if ( this->parser != 0 ) XML_ParserFree ( this->parser );
120 	this->parser = 0;
121 
122 	if ( this->registeredNamespaces != sRegisteredNamespaces ) delete ( this->registeredNamespaces );
123 	this->registeredNamespaces = 0;
124 
125 }	// ExpatAdapter::~ExpatAdapter
126 
127 // =================================================================================================
128 
129 #if XMP_DebugBuild
130 	static XMP_VarString sExpatMessage;
131 #endif
132 
133 static const char * kOneSpace = " ";
134 
ParseBuffer(const void * buffer,size_t length,bool last)135 void ExpatAdapter::ParseBuffer ( const void * buffer, size_t length, bool last /* = true */ )
136 {
137 	enum XML_Status status;
138 
139 	if ( length == 0 ) {	// Expat does not like empty buffers.
140 		if ( ! last ) return;
141 		buffer = kOneSpace;
142 		length = 1;
143 	}
144 
145 	status = XML_Parse ( this->parser, (const char *)buffer, static_cast< XMP_StringLen >( length ), last );
146 
147 	#if BanAllEntityUsage
148 		if ( this->isAborted ) {
149 			XMP_Error error(kXMPErr_BadXML, "DOCTYPE is not allowed" );
150 			this->NotifyClient ( kXMPErrSev_Recoverable, error );
151 		}
152 	#endif
153 
154 	if ( status != XML_STATUS_OK ) {
155 
156 		XMP_StringPtr errMsg = "XML parsing failure";
157 
158 		#if 0	// XMP_DebugBuild	// Disable for now to make test output uniform. Restore later with thread safety.
159 
160 			// *** This is a good candidate for a callback error notification mechanism.
161 			// *** This code is not thread safe, the sExpatMessage isn't locked. But that's OK for debug usage.
162 
163 			enum XML_Error expatErr = XML_GetErrorCode ( this->parser );
164 			const char *   expatMsg = XML_ErrorString ( expatErr );
165 			int errLine = XML_GetCurrentLineNumber ( this->parser );
166 
167 			char msgBuffer[1000];
168 			// AUDIT: Use of sizeof(msgBuffer) for snprintf length is safe.
169 			snprintf ( msgBuffer, sizeof(msgBuffer), "# Expat error %d at line %d, \"%s\"", expatErr, errLine, expatMsg );
170 			sExpatMessage = msgBuffer;
171 			errMsg = sExpatMessage.c_str();
172 
173 			#if  DumpXMLParseEvents
174 				if ( this->parseLog != 0 ) fprintf ( this->parseLog, "%s\n", errMsg, expatErr, errLine, expatMsg );
175 			#endif
176 
177 		#endif
178 
179 		XMP_Error error(kXMPErr_BadXML, errMsg);
180 		this->NotifyClient ( kXMPErrSev_Recoverable, error );
181 
182 	}
183 
184 }	// ExpatAdapter::ParseBuffer
185 
186 // =================================================================================================
187 // =================================================================================================
188 
189 #if XMP_DebugBuild & DumpXMLParseEvents
190 
PrintIndent(FILE * file,size_t count)191 	static inline void PrintIndent ( FILE * file, size_t count )
192 	{
193 		for ( ; count > 0; --count ) fprintf ( file, "  " );
194 	}
195 
196 #endif
197 
198 // =================================================================================================
199 
SetQualName(ExpatAdapter * thiz,XMP_StringPtr fullName,XML_Node * node)200 static void SetQualName ( ExpatAdapter * thiz, XMP_StringPtr fullName, XML_Node * node )
201 {
202 	// Expat delivers the full name as a catenation of namespace URI, separator, and local name.
203 
204 	// As a compatibility hack, an "about" or "ID" attribute of an rdf:Description element is
205 	// changed to "rdf:about" or rdf:ID. Easier done here than in the RDF recognizer.
206 
207 	// As a bug fix hack, change a URI of "http://purl.org/dc/1.1/" to ""http://purl.org/dc/elements/1.1/.
208 	// Early versions of Flash that put XMP in SWF used a bad URI for the dc: namespace.
209 
210 	// ! This code presumes the RDF namespace prefix is "rdf".
211 
212 	size_t sepPos = strlen(fullName);
213 	for ( --sepPos; sepPos > 0; --sepPos ) {
214 		if ( fullName[sepPos] == FullNameSeparator ) break;
215 	}
216 
217 	if ( fullName[sepPos] == FullNameSeparator ) {
218 
219 		XMP_StringPtr prefix;
220 		XMP_StringLen prefixLen;
221 		XMP_StringPtr localPart = fullName + sepPos + 1;
222 
223 		node->ns.assign ( fullName, sepPos );
224 		if ( node->ns == "http://purl.org/dc/1.1/" ) node->ns = "http://purl.org/dc/elements/1.1/";
225 
226 		bool found = thiz->registeredNamespaces->GetPrefix ( node->ns.c_str(), &prefix, &prefixLen );
227 		if ( ! found ) {
228 			XMP_Error error(kXMPErr_ExternalFailure, "Unknown URI in Expat full name" );
229 			thiz->NotifyClient ( kXMPErrSev_OperationFatal, error );
230 		}
231 		node->nsPrefixLen = prefixLen;	// ! Includes the ':'.
232 
233 		node->name = prefix;
234 		node->name += localPart;
235 
236 	} else {
237 
238 		node->name = fullName;	// The name is not in a namespace.
239 
240 		if ( node->parent->name == "rdf:Description" ) {
241 			if ( node->name == "about" ) {
242 				node->ns   = kXMP_NS_RDF;
243 				node->name = "rdf:about";
244 				node->nsPrefixLen = 4;	// ! Include the ':'.
245 			} else if ( node->name == "ID" ) {
246 				node->ns   = kXMP_NS_RDF;
247 				node->name = "rdf:ID";
248 				node->nsPrefixLen = 4;	// ! Include the ':'.
249 			}
250 		}
251 
252 	}
253 
254 }	// SetQualName
255 
256 // =================================================================================================
257 
StartNamespaceDeclHandler(void * userData,XMP_StringPtr prefix,XMP_StringPtr uri)258 static void StartNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix, XMP_StringPtr uri )
259 {
260 	IgnoreParam(userData);
261 
262 	// As a bug fix hack, change a URI of "http://purl.org/dc/1.1/" to ""http://purl.org/dc/elements/1.1/.
263 	// Early versions of Flash that put XMP in SWF used a bad URI for the dc: namespace.
264 
265 	ExpatAdapter * thiz = (ExpatAdapter*)userData;
266 
267 	if ( prefix == 0 ) prefix = "_dflt_";	// Have default namespace.
268 	if ( uri == 0 ) return;	// Ignore, have xmlns:pre="", no URI to register.
269 
270 	#if XMP_DebugBuild & DumpXMLParseEvents
271 		if ( thiz->parseLog != 0 ) {
272 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
273 			fprintf ( thiz->parseLog, "StartNamespace: %s - \"%s\"\n", prefix, uri );
274 		}
275 	#endif
276 
277 	if ( XMP_LitMatch ( uri, "http://purl.org/dc/1.1/" ) ) uri = "http://purl.org/dc/elements/1.1/";
278 	if (thiz->registeredNamespaces == sRegisteredNamespaces) {
279 		(void)XMPMeta::RegisterNamespace(uri, prefix, 0, 0);
280 	}
281 	else {
282 		(void)thiz->registeredNamespaces->Define(uri, prefix, 0, 0);
283 	}
284 
285 }	// StartNamespaceDeclHandler
286 
287 // =================================================================================================
288 
EndNamespaceDeclHandler(void * userData,XMP_StringPtr prefix)289 static void EndNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix )
290 {
291 	IgnoreParam(userData);
292 
293 	#if XMP_DebugBuild & DumpXMLParseEvents		// Avoid unused variable warning.
294 		ExpatAdapter * thiz = (ExpatAdapter*)userData;
295 	#endif
296 
297 	if ( prefix == 0 ) prefix = "_dflt_";	// Have default namespace.
298 
299 	#if XMP_DebugBuild & DumpXMLParseEvents
300 		if ( thiz->parseLog != 0 ) {
301 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
302 			fprintf ( thiz->parseLog, "EndNamespace: %s\n", prefix );
303 		}
304 	#endif
305 
306 	// ! Nothing to do, Expat has done all of the XML processing.
307 
308 }	// EndNamespaceDeclHandler
309 
310 // =================================================================================================
311 
StartElementHandler(void * userData,XMP_StringPtr name,XMP_StringPtr * attrs)312 static void StartElementHandler ( void * userData, XMP_StringPtr name, XMP_StringPtr* attrs )
313 {
314 	XMP_Assert ( attrs != 0 );
315 	ExpatAdapter * thiz = (ExpatAdapter*)userData;
316 
317 	size_t attrCount = 0;
318 	for ( XMP_StringPtr* a = attrs; *a != 0; ++a ) ++attrCount;
319 	if ( (attrCount & 1) != 0 ) {
320 		XMP_Error error(kXMPErr_ExternalFailure, "Expat attribute info has odd length");
321 		thiz->NotifyClient ( kXMPErrSev_OperationFatal, error );
322 	}
323 	attrCount = attrCount/2;	// They are name/value pairs.
324 
325 	#if XMP_DebugBuild & DumpXMLParseEvents
326 		if ( thiz->parseLog != 0 ) {
327 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
328 			fprintf ( thiz->parseLog, "StartElement: %s, %d attrs", name, attrCount );
329 			for ( XMP_StringPtr* attr = attrs; *attr != 0; attr += 2 ) {
330 				XMP_StringPtr attrName = *attr;
331 				XMP_StringPtr attrValue = *(attr+1);
332 				fprintf ( thiz->parseLog, ", %s = \"%s\"", attrName, attrValue );
333 			}
334 			fprintf ( thiz->parseLog, "\n" );
335 		}
336 	#endif
337 
338 	XML_Node * parentNode = thiz->parseStack.back();
339 	XML_Node * elemNode   = new XML_Node ( parentNode, "", kElemNode );
340 
341 	SetQualName ( thiz, name, elemNode );
342 
343 	for ( XMP_StringPtr* attr = attrs; *attr != 0; attr += 2 ) {
344 
345 		XMP_StringPtr attrName = *attr;
346 		XMP_StringPtr attrValue = *(attr+1);
347 		XML_Node * attrNode = new XML_Node ( elemNode, "", kAttrNode );
348 
349 		SetQualName ( thiz, attrName, attrNode );
350 		attrNode->value = attrValue;
351 		if ( attrNode->name == "xml:lang" ) NormalizeLangValue ( &attrNode->value );
352 		elemNode->attrs.push_back ( attrNode );
353 
354 	}
355 
356 	parentNode->content.push_back ( elemNode );
357 	thiz->parseStack.push_back ( elemNode );
358 
359 	if ( elemNode->name == "rdf:RDF" ) {
360 		thiz->rootNode = elemNode;
361 		++thiz->rootCount;
362 	}
363 	#if XMP_DebugBuild
364 		++thiz->elemNesting;
365 	#endif
366 
367 }	// StartElementHandler
368 
369 // =================================================================================================
370 
EndElementHandler(void * userData,XMP_StringPtr name)371 static void EndElementHandler ( void * userData, XMP_StringPtr name )
372 {
373 	IgnoreParam(name);
374 
375 	ExpatAdapter * thiz = (ExpatAdapter*)userData;
376 
377 	#if XMP_DebugBuild
378 		--thiz->elemNesting;
379 	#endif
380 	(void) thiz->parseStack.pop_back();
381 
382 	#if XMP_DebugBuild & DumpXMLParseEvents
383 		if ( thiz->parseLog != 0 ) {
384 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
385 			fprintf ( thiz->parseLog, "EndElement: %s\n", name );
386 		}
387 	#endif
388 
389 }	// EndElementHandler
390 
391 // =================================================================================================
392 
CharacterDataHandler(void * userData,XMP_StringPtr cData,int len)393 static void CharacterDataHandler ( void * userData, XMP_StringPtr cData, int len )
394 {
395 	ExpatAdapter * thiz = (ExpatAdapter*)userData;
396 
397 	if ( (cData == 0) || (len == 0) ) { cData = ""; len = 0; }
398 
399 	#if XMP_DebugBuild & DumpXMLParseEvents
400 		if ( thiz->parseLog != 0 ) {
401 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
402 			fprintf ( thiz->parseLog, "CharContent: \"" );
403 			for ( int i = 0; i < len; ++i ) fprintf ( thiz->parseLog, "%c", cData[i] );
404 			fprintf ( thiz->parseLog, "\"\n" );
405 		}
406 	#endif
407 
408 	XML_Node * parentNode = thiz->parseStack.back();
409 	XML_Node * cDataNode  = new XML_Node ( parentNode, "", kCDataNode );
410 
411 	cDataNode->value.assign ( cData, len );
412 	parentNode->content.push_back ( cDataNode );
413 
414 }	// CharacterDataHandler
415 
416 // =================================================================================================
417 
StartCdataSectionHandler(void * userData)418 static void StartCdataSectionHandler ( void * userData )
419 {
420 	IgnoreParam(userData);
421 
422 	#if XMP_DebugBuild & DumpXMLParseEvents		// Avoid unused variable warning.
423 		ExpatAdapter * thiz = (ExpatAdapter*)userData;
424 	#endif
425 
426 	#if XMP_DebugBuild & DumpXMLParseEvents
427 		if ( thiz->parseLog != 0 ) {
428 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
429 			fprintf ( thiz->parseLog, "StartCDATA\n" );
430 		}
431 	#endif
432 
433 	// *** Since markup isn't recognized inside CDATA, this affects XMP's double escaping.
434 
435 }	// StartCdataSectionHandler
436 
437 // =================================================================================================
438 
EndCdataSectionHandler(void * userData)439 static void EndCdataSectionHandler ( void * userData )
440 {
441 	IgnoreParam(userData);
442 
443 	#if XMP_DebugBuild & DumpXMLParseEvents		// Avoid unused variable warning.
444 		ExpatAdapter * thiz = (ExpatAdapter*)userData;
445 	#endif
446 
447 	#if XMP_DebugBuild & DumpXMLParseEvents
448 		if ( thiz->parseLog != 0 ) {
449 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
450 			fprintf ( thiz->parseLog, "EndCDATA\n" );
451 		}
452 	#endif
453 
454 }	// EndCdataSectionHandler
455 
456 // =================================================================================================
457 
ProcessingInstructionHandler(void * userData,XMP_StringPtr target,XMP_StringPtr data)458 static void ProcessingInstructionHandler ( void * userData, XMP_StringPtr target, XMP_StringPtr data )
459 {
460 	XMP_Assert ( target != 0 );
461 	ExpatAdapter * thiz = (ExpatAdapter*)userData;
462 
463 	if ( ! XMP_LitMatch ( target, "xpacket" ) ) return;	// Ignore all PIs except the XMP packet wrapper.
464 	if ( data == 0 ) data = "";
465 
466 	#if XMP_DebugBuild & DumpXMLParseEvents
467 		if ( thiz->parseLog != 0 ) {
468 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
469 			fprintf ( thiz->parseLog, "PI: %s - \"%s\"\n", target, data );
470 		}
471 	#endif
472 
473 	XML_Node * parentNode = thiz->parseStack.back();
474 	XML_Node * piNode  = new XML_Node ( parentNode, target, kPINode );
475 
476 	piNode->value.assign ( data );
477 	parentNode->content.push_back ( piNode );
478 
479 }	// ProcessingInstructionHandler
480 
481 // =================================================================================================
482 
CommentHandler(void * userData,XMP_StringPtr comment)483 static void CommentHandler ( void * userData, XMP_StringPtr comment )
484 {
485 	IgnoreParam(userData);
486 
487 	#if XMP_DebugBuild & DumpXMLParseEvents		// Avoid unused variable warning.
488 		ExpatAdapter * thiz = (ExpatAdapter*)userData;
489 	#endif
490 
491 	if ( comment == 0 ) comment = "";
492 
493 	#if XMP_DebugBuild & DumpXMLParseEvents
494 		if ( thiz->parseLog != 0 ) {
495 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
496 			fprintf ( thiz->parseLog, "Comment: \"%s\"\n", comment );
497 		}
498 	#endif
499 
500 	// ! Comments are ignored.
501 
502 }	// CommentHandler
503 
504 // =================================================================================================
505 
506 #if BanAllEntityUsage
StartDoctypeDeclHandler(void * userData,XMP_StringPtr doctypeName,XMP_StringPtr sysid,XMP_StringPtr pubid,int has_internal_subset)507 static void StartDoctypeDeclHandler ( void * userData, XMP_StringPtr doctypeName,
508 									  XMP_StringPtr sysid, XMP_StringPtr pubid, int has_internal_subset )
509 {
510 	IgnoreParam(userData);
511 
512 	ExpatAdapter * thiz = (ExpatAdapter*)userData;
513 
514 	#if XMP_DebugBuild & DumpXMLParseEvents		// Avoid unused variable warning.
515 		if ( thiz->parseLog != 0 ) {
516 			PrintIndent ( thiz->parseLog, thiz->elemNesting );
517 			fprintf ( thiz->parseLog, "DocType: \"%s\"\n", doctypeName );
518 		}
519 	#endif
520 
521 	thiz->isAborted = true;	// ! Can't throw an exception across the plain C Expat frames.
522 	(void) XML_StopParser ( thiz->parser, XML_FALSE /* not resumable */ );
523 
524 }	// StartDoctypeDeclHandler
525 #endif
526 
527 // =================================================================================================
528