1 // =================================================================================================
2 // Copyright 2005 Adobe Systems Incorporated
3 // All Rights Reserved.
4 //
5 // NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms
6 // of the Adobe license agreement accompanying it.
7 // =================================================================================================
8
9 #include "public/include/XMP_Environment.h" // ! Must be the first #include!
10 #include "XMPCore/source/XMPCore_Impl.hpp"
11
12 #include "source/ExpatAdapter.hpp"
13 #include "XMPCore/source/XMPMeta.hpp"
14
15 #include "expat.h"
16 #include <string.h>
17
18 using namespace std;
19
20 #if XMP_WinBuild
21 #pragma warning ( disable : 4996 ) // '...' was declared deprecated
22 #endif
23
24 // *** Set memory handlers.
25
26 #ifndef DumpXMLParseEvents
27 #define DumpXMLParseEvents 0
28 #endif
29
30 #define FullNameSeparator '@'
31
32 // =================================================================================================
33
34 static void StartNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix, XMP_StringPtr uri );
35 static void EndNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix );
36
37 static void StartElementHandler ( void * userData, XMP_StringPtr name, XMP_StringPtr* attrs );
38 static void EndElementHandler ( void * userData, XMP_StringPtr name );
39
40 static void CharacterDataHandler ( void * userData, XMP_StringPtr cData, int len );
41 static void StartCdataSectionHandler ( void * userData );
42 static void EndCdataSectionHandler ( void * userData );
43
44 static void ProcessingInstructionHandler ( void * userData, XMP_StringPtr target, XMP_StringPtr data );
45 static void CommentHandler ( void * userData, XMP_StringPtr comment );
46
47 #if BanAllEntityUsage
48
49 // For now we do this by banning DOCTYPE entirely. This is easy and consistent with what is
50 // available in recent Java XML parsers. Another, somewhat less drastic, approach would be to
51 // ban all entity declarations. We can't allow declarations and ban references, Expat does not
52 // call the SkippedEntityHandler for references in attribute values.
53
54 // ! Standard entities (&, <, >, ", ', and numeric character references) are
55 // ! not banned. Expat handles them transparently no matter what.
56
57 static void StartDoctypeDeclHandler ( void * userData, XMP_StringPtr doctypeName,
58 XMP_StringPtr sysid, XMP_StringPtr pubid, int has_internal_subset );
59
60 #endif
61
62 // =================================================================================================
63
XMP_NewExpatAdapter(bool useGlobalNamespaces)64 extern "C" ExpatAdapter * XMP_NewExpatAdapter ( bool useGlobalNamespaces )
65 {
66
67 return new ExpatAdapter ( useGlobalNamespaces );
68
69 } // XMP_NewExpatAdapter
70
71 // =================================================================================================
72
ExpatAdapter(bool useGlobalNamespaces)73 ExpatAdapter::ExpatAdapter ( bool useGlobalNamespaces ) : parser(0), registeredNamespaces(0)
74 {
75
76 #if XMP_DebugBuild
77 this->elemNesting = 0;
78 #if DumpXMLParseEvents
79 if ( this->parseLog == 0 ) this->parseLog = stdout;
80 #endif
81 #endif
82
83 this->parser = XML_ParserCreateNS ( 0, FullNameSeparator );
84 if ( this->parser == 0 ) {
85 XMP_Error error(kXMPErr_NoMemory, "Failure creating Expat parser" );
86 this->NotifyClient ( kXMPErrSev_ProcessFatal, error );
87 }else{
88 if ( useGlobalNamespaces ) {
89 this->registeredNamespaces = sRegisteredNamespaces;
90 } else {
91 this->registeredNamespaces = new XMP_NamespaceTable ( *sRegisteredNamespaces );
92 }
93
94 XML_SetUserData ( this->parser, this );
95
96 XML_SetNamespaceDeclHandler ( this->parser, StartNamespaceDeclHandler, EndNamespaceDeclHandler );
97 XML_SetElementHandler ( this->parser, StartElementHandler, EndElementHandler );
98
99 XML_SetCharacterDataHandler ( this->parser, CharacterDataHandler );
100 XML_SetCdataSectionHandler ( this->parser, StartCdataSectionHandler, EndCdataSectionHandler );
101
102 XML_SetProcessingInstructionHandler ( this->parser, ProcessingInstructionHandler );
103 XML_SetCommentHandler ( this->parser, CommentHandler );
104
105 #if BanAllEntityUsage
106 XML_SetStartDoctypeDeclHandler ( this->parser, StartDoctypeDeclHandler );
107 isAborted = false;
108 #endif
109
110 this->parseStack.push_back ( &this->tree ); // Push the XML root node.
111 }
112 } // ExpatAdapter::ExpatAdapter
113
114 // =================================================================================================
115
~ExpatAdapter()116 ExpatAdapter::~ExpatAdapter()
117 {
118
119 if ( this->parser != 0 ) XML_ParserFree ( this->parser );
120 this->parser = 0;
121
122 if ( this->registeredNamespaces != sRegisteredNamespaces ) delete ( this->registeredNamespaces );
123 this->registeredNamespaces = 0;
124
125 } // ExpatAdapter::~ExpatAdapter
126
127 // =================================================================================================
128
129 #if XMP_DebugBuild
130 static XMP_VarString sExpatMessage;
131 #endif
132
133 static const char * kOneSpace = " ";
134
ParseBuffer(const void * buffer,size_t length,bool last)135 void ExpatAdapter::ParseBuffer ( const void * buffer, size_t length, bool last /* = true */ )
136 {
137 enum XML_Status status;
138
139 if ( length == 0 ) { // Expat does not like empty buffers.
140 if ( ! last ) return;
141 buffer = kOneSpace;
142 length = 1;
143 }
144
145 status = XML_Parse ( this->parser, (const char *)buffer, static_cast< XMP_StringLen >( length ), last );
146
147 #if BanAllEntityUsage
148 if ( this->isAborted ) {
149 XMP_Error error(kXMPErr_BadXML, "DOCTYPE is not allowed" );
150 this->NotifyClient ( kXMPErrSev_Recoverable, error );
151 }
152 #endif
153
154 if ( status != XML_STATUS_OK ) {
155
156 XMP_StringPtr errMsg = "XML parsing failure";
157
158 #if 0 // XMP_DebugBuild // Disable for now to make test output uniform. Restore later with thread safety.
159
160 // *** This is a good candidate for a callback error notification mechanism.
161 // *** This code is not thread safe, the sExpatMessage isn't locked. But that's OK for debug usage.
162
163 enum XML_Error expatErr = XML_GetErrorCode ( this->parser );
164 const char * expatMsg = XML_ErrorString ( expatErr );
165 int errLine = XML_GetCurrentLineNumber ( this->parser );
166
167 char msgBuffer[1000];
168 // AUDIT: Use of sizeof(msgBuffer) for snprintf length is safe.
169 snprintf ( msgBuffer, sizeof(msgBuffer), "# Expat error %d at line %d, \"%s\"", expatErr, errLine, expatMsg );
170 sExpatMessage = msgBuffer;
171 errMsg = sExpatMessage.c_str();
172
173 #if DumpXMLParseEvents
174 if ( this->parseLog != 0 ) fprintf ( this->parseLog, "%s\n", errMsg, expatErr, errLine, expatMsg );
175 #endif
176
177 #endif
178
179 XMP_Error error(kXMPErr_BadXML, errMsg);
180 this->NotifyClient ( kXMPErrSev_Recoverable, error );
181
182 }
183
184 } // ExpatAdapter::ParseBuffer
185
186 // =================================================================================================
187 // =================================================================================================
188
189 #if XMP_DebugBuild & DumpXMLParseEvents
190
PrintIndent(FILE * file,size_t count)191 static inline void PrintIndent ( FILE * file, size_t count )
192 {
193 for ( ; count > 0; --count ) fprintf ( file, " " );
194 }
195
196 #endif
197
198 // =================================================================================================
199
SetQualName(ExpatAdapter * thiz,XMP_StringPtr fullName,XML_Node * node)200 static void SetQualName ( ExpatAdapter * thiz, XMP_StringPtr fullName, XML_Node * node )
201 {
202 // Expat delivers the full name as a catenation of namespace URI, separator, and local name.
203
204 // As a compatibility hack, an "about" or "ID" attribute of an rdf:Description element is
205 // changed to "rdf:about" or rdf:ID. Easier done here than in the RDF recognizer.
206
207 // As a bug fix hack, change a URI of "http://purl.org/dc/1.1/" to ""http://purl.org/dc/elements/1.1/.
208 // Early versions of Flash that put XMP in SWF used a bad URI for the dc: namespace.
209
210 // ! This code presumes the RDF namespace prefix is "rdf".
211
212 size_t sepPos = strlen(fullName);
213 for ( --sepPos; sepPos > 0; --sepPos ) {
214 if ( fullName[sepPos] == FullNameSeparator ) break;
215 }
216
217 if ( fullName[sepPos] == FullNameSeparator ) {
218
219 XMP_StringPtr prefix;
220 XMP_StringLen prefixLen;
221 XMP_StringPtr localPart = fullName + sepPos + 1;
222
223 node->ns.assign ( fullName, sepPos );
224 if ( node->ns == "http://purl.org/dc/1.1/" ) node->ns = "http://purl.org/dc/elements/1.1/";
225
226 bool found = thiz->registeredNamespaces->GetPrefix ( node->ns.c_str(), &prefix, &prefixLen );
227 if ( ! found ) {
228 XMP_Error error(kXMPErr_ExternalFailure, "Unknown URI in Expat full name" );
229 thiz->NotifyClient ( kXMPErrSev_OperationFatal, error );
230 }
231 node->nsPrefixLen = prefixLen; // ! Includes the ':'.
232
233 node->name = prefix;
234 node->name += localPart;
235
236 } else {
237
238 node->name = fullName; // The name is not in a namespace.
239
240 if ( node->parent->name == "rdf:Description" ) {
241 if ( node->name == "about" ) {
242 node->ns = kXMP_NS_RDF;
243 node->name = "rdf:about";
244 node->nsPrefixLen = 4; // ! Include the ':'.
245 } else if ( node->name == "ID" ) {
246 node->ns = kXMP_NS_RDF;
247 node->name = "rdf:ID";
248 node->nsPrefixLen = 4; // ! Include the ':'.
249 }
250 }
251
252 }
253
254 } // SetQualName
255
256 // =================================================================================================
257
StartNamespaceDeclHandler(void * userData,XMP_StringPtr prefix,XMP_StringPtr uri)258 static void StartNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix, XMP_StringPtr uri )
259 {
260 IgnoreParam(userData);
261
262 // As a bug fix hack, change a URI of "http://purl.org/dc/1.1/" to ""http://purl.org/dc/elements/1.1/.
263 // Early versions of Flash that put XMP in SWF used a bad URI for the dc: namespace.
264
265 ExpatAdapter * thiz = (ExpatAdapter*)userData;
266
267 if ( prefix == 0 ) prefix = "_dflt_"; // Have default namespace.
268 if ( uri == 0 ) return; // Ignore, have xmlns:pre="", no URI to register.
269
270 #if XMP_DebugBuild & DumpXMLParseEvents
271 if ( thiz->parseLog != 0 ) {
272 PrintIndent ( thiz->parseLog, thiz->elemNesting );
273 fprintf ( thiz->parseLog, "StartNamespace: %s - \"%s\"\n", prefix, uri );
274 }
275 #endif
276
277 if ( XMP_LitMatch ( uri, "http://purl.org/dc/1.1/" ) ) uri = "http://purl.org/dc/elements/1.1/";
278 if (thiz->registeredNamespaces == sRegisteredNamespaces) {
279 (void)XMPMeta::RegisterNamespace(uri, prefix, 0, 0);
280 }
281 else {
282 (void)thiz->registeredNamespaces->Define(uri, prefix, 0, 0);
283 }
284
285 } // StartNamespaceDeclHandler
286
287 // =================================================================================================
288
EndNamespaceDeclHandler(void * userData,XMP_StringPtr prefix)289 static void EndNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix )
290 {
291 IgnoreParam(userData);
292
293 #if XMP_DebugBuild & DumpXMLParseEvents // Avoid unused variable warning.
294 ExpatAdapter * thiz = (ExpatAdapter*)userData;
295 #endif
296
297 if ( prefix == 0 ) prefix = "_dflt_"; // Have default namespace.
298
299 #if XMP_DebugBuild & DumpXMLParseEvents
300 if ( thiz->parseLog != 0 ) {
301 PrintIndent ( thiz->parseLog, thiz->elemNesting );
302 fprintf ( thiz->parseLog, "EndNamespace: %s\n", prefix );
303 }
304 #endif
305
306 // ! Nothing to do, Expat has done all of the XML processing.
307
308 } // EndNamespaceDeclHandler
309
310 // =================================================================================================
311
StartElementHandler(void * userData,XMP_StringPtr name,XMP_StringPtr * attrs)312 static void StartElementHandler ( void * userData, XMP_StringPtr name, XMP_StringPtr* attrs )
313 {
314 XMP_Assert ( attrs != 0 );
315 ExpatAdapter * thiz = (ExpatAdapter*)userData;
316
317 size_t attrCount = 0;
318 for ( XMP_StringPtr* a = attrs; *a != 0; ++a ) ++attrCount;
319 if ( (attrCount & 1) != 0 ) {
320 XMP_Error error(kXMPErr_ExternalFailure, "Expat attribute info has odd length");
321 thiz->NotifyClient ( kXMPErrSev_OperationFatal, error );
322 }
323 attrCount = attrCount/2; // They are name/value pairs.
324
325 #if XMP_DebugBuild & DumpXMLParseEvents
326 if ( thiz->parseLog != 0 ) {
327 PrintIndent ( thiz->parseLog, thiz->elemNesting );
328 fprintf ( thiz->parseLog, "StartElement: %s, %d attrs", name, attrCount );
329 for ( XMP_StringPtr* attr = attrs; *attr != 0; attr += 2 ) {
330 XMP_StringPtr attrName = *attr;
331 XMP_StringPtr attrValue = *(attr+1);
332 fprintf ( thiz->parseLog, ", %s = \"%s\"", attrName, attrValue );
333 }
334 fprintf ( thiz->parseLog, "\n" );
335 }
336 #endif
337
338 XML_Node * parentNode = thiz->parseStack.back();
339 XML_Node * elemNode = new XML_Node ( parentNode, "", kElemNode );
340
341 SetQualName ( thiz, name, elemNode );
342
343 for ( XMP_StringPtr* attr = attrs; *attr != 0; attr += 2 ) {
344
345 XMP_StringPtr attrName = *attr;
346 XMP_StringPtr attrValue = *(attr+1);
347 XML_Node * attrNode = new XML_Node ( elemNode, "", kAttrNode );
348
349 SetQualName ( thiz, attrName, attrNode );
350 attrNode->value = attrValue;
351 if ( attrNode->name == "xml:lang" ) NormalizeLangValue ( &attrNode->value );
352 elemNode->attrs.push_back ( attrNode );
353
354 }
355
356 parentNode->content.push_back ( elemNode );
357 thiz->parseStack.push_back ( elemNode );
358
359 if ( elemNode->name == "rdf:RDF" ) {
360 thiz->rootNode = elemNode;
361 ++thiz->rootCount;
362 }
363 #if XMP_DebugBuild
364 ++thiz->elemNesting;
365 #endif
366
367 } // StartElementHandler
368
369 // =================================================================================================
370
EndElementHandler(void * userData,XMP_StringPtr name)371 static void EndElementHandler ( void * userData, XMP_StringPtr name )
372 {
373 IgnoreParam(name);
374
375 ExpatAdapter * thiz = (ExpatAdapter*)userData;
376
377 #if XMP_DebugBuild
378 --thiz->elemNesting;
379 #endif
380 (void) thiz->parseStack.pop_back();
381
382 #if XMP_DebugBuild & DumpXMLParseEvents
383 if ( thiz->parseLog != 0 ) {
384 PrintIndent ( thiz->parseLog, thiz->elemNesting );
385 fprintf ( thiz->parseLog, "EndElement: %s\n", name );
386 }
387 #endif
388
389 } // EndElementHandler
390
391 // =================================================================================================
392
CharacterDataHandler(void * userData,XMP_StringPtr cData,int len)393 static void CharacterDataHandler ( void * userData, XMP_StringPtr cData, int len )
394 {
395 ExpatAdapter * thiz = (ExpatAdapter*)userData;
396
397 if ( (cData == 0) || (len == 0) ) { cData = ""; len = 0; }
398
399 #if XMP_DebugBuild & DumpXMLParseEvents
400 if ( thiz->parseLog != 0 ) {
401 PrintIndent ( thiz->parseLog, thiz->elemNesting );
402 fprintf ( thiz->parseLog, "CharContent: \"" );
403 for ( int i = 0; i < len; ++i ) fprintf ( thiz->parseLog, "%c", cData[i] );
404 fprintf ( thiz->parseLog, "\"\n" );
405 }
406 #endif
407
408 XML_Node * parentNode = thiz->parseStack.back();
409 XML_Node * cDataNode = new XML_Node ( parentNode, "", kCDataNode );
410
411 cDataNode->value.assign ( cData, len );
412 parentNode->content.push_back ( cDataNode );
413
414 } // CharacterDataHandler
415
416 // =================================================================================================
417
StartCdataSectionHandler(void * userData)418 static void StartCdataSectionHandler ( void * userData )
419 {
420 IgnoreParam(userData);
421
422 #if XMP_DebugBuild & DumpXMLParseEvents // Avoid unused variable warning.
423 ExpatAdapter * thiz = (ExpatAdapter*)userData;
424 #endif
425
426 #if XMP_DebugBuild & DumpXMLParseEvents
427 if ( thiz->parseLog != 0 ) {
428 PrintIndent ( thiz->parseLog, thiz->elemNesting );
429 fprintf ( thiz->parseLog, "StartCDATA\n" );
430 }
431 #endif
432
433 // *** Since markup isn't recognized inside CDATA, this affects XMP's double escaping.
434
435 } // StartCdataSectionHandler
436
437 // =================================================================================================
438
EndCdataSectionHandler(void * userData)439 static void EndCdataSectionHandler ( void * userData )
440 {
441 IgnoreParam(userData);
442
443 #if XMP_DebugBuild & DumpXMLParseEvents // Avoid unused variable warning.
444 ExpatAdapter * thiz = (ExpatAdapter*)userData;
445 #endif
446
447 #if XMP_DebugBuild & DumpXMLParseEvents
448 if ( thiz->parseLog != 0 ) {
449 PrintIndent ( thiz->parseLog, thiz->elemNesting );
450 fprintf ( thiz->parseLog, "EndCDATA\n" );
451 }
452 #endif
453
454 } // EndCdataSectionHandler
455
456 // =================================================================================================
457
ProcessingInstructionHandler(void * userData,XMP_StringPtr target,XMP_StringPtr data)458 static void ProcessingInstructionHandler ( void * userData, XMP_StringPtr target, XMP_StringPtr data )
459 {
460 XMP_Assert ( target != 0 );
461 ExpatAdapter * thiz = (ExpatAdapter*)userData;
462
463 if ( ! XMP_LitMatch ( target, "xpacket" ) ) return; // Ignore all PIs except the XMP packet wrapper.
464 if ( data == 0 ) data = "";
465
466 #if XMP_DebugBuild & DumpXMLParseEvents
467 if ( thiz->parseLog != 0 ) {
468 PrintIndent ( thiz->parseLog, thiz->elemNesting );
469 fprintf ( thiz->parseLog, "PI: %s - \"%s\"\n", target, data );
470 }
471 #endif
472
473 XML_Node * parentNode = thiz->parseStack.back();
474 XML_Node * piNode = new XML_Node ( parentNode, target, kPINode );
475
476 piNode->value.assign ( data );
477 parentNode->content.push_back ( piNode );
478
479 } // ProcessingInstructionHandler
480
481 // =================================================================================================
482
CommentHandler(void * userData,XMP_StringPtr comment)483 static void CommentHandler ( void * userData, XMP_StringPtr comment )
484 {
485 IgnoreParam(userData);
486
487 #if XMP_DebugBuild & DumpXMLParseEvents // Avoid unused variable warning.
488 ExpatAdapter * thiz = (ExpatAdapter*)userData;
489 #endif
490
491 if ( comment == 0 ) comment = "";
492
493 #if XMP_DebugBuild & DumpXMLParseEvents
494 if ( thiz->parseLog != 0 ) {
495 PrintIndent ( thiz->parseLog, thiz->elemNesting );
496 fprintf ( thiz->parseLog, "Comment: \"%s\"\n", comment );
497 }
498 #endif
499
500 // ! Comments are ignored.
501
502 } // CommentHandler
503
504 // =================================================================================================
505
506 #if BanAllEntityUsage
StartDoctypeDeclHandler(void * userData,XMP_StringPtr doctypeName,XMP_StringPtr sysid,XMP_StringPtr pubid,int has_internal_subset)507 static void StartDoctypeDeclHandler ( void * userData, XMP_StringPtr doctypeName,
508 XMP_StringPtr sysid, XMP_StringPtr pubid, int has_internal_subset )
509 {
510 IgnoreParam(userData);
511
512 ExpatAdapter * thiz = (ExpatAdapter*)userData;
513
514 #if XMP_DebugBuild & DumpXMLParseEvents // Avoid unused variable warning.
515 if ( thiz->parseLog != 0 ) {
516 PrintIndent ( thiz->parseLog, thiz->elemNesting );
517 fprintf ( thiz->parseLog, "DocType: \"%s\"\n", doctypeName );
518 }
519 #endif
520
521 thiz->isAborted = true; // ! Can't throw an exception across the plain C Expat frames.
522 (void) XML_StopParser ( thiz->parser, XML_FALSE /* not resumable */ );
523
524 } // StartDoctypeDeclHandler
525 #endif
526
527 // =================================================================================================
528