1 // =================================================================================================
2 // Copyright 2005-2008 Adobe Systems Incorporated
3 // All Rights Reserved.
4 //
5 // NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms
6 // of the Adobe license agreement accompanying it.
7 // =================================================================================================
8
9 #include "XMP_Environment.h" // ! Must be the first #include!
10 #include "XMPCore_Impl.hpp"
11
12 #include "ExpatAdapter.hpp"
13 #include "XMPMeta.hpp"
14
15 #include "expat.h"
16
17 #include <string.h>
18
19 using namespace std;
20
21 #if XMP_WinBuild
22 #pragma warning ( disable : 4996 ) // '...' was declared deprecated
23 #endif
24
25 // *** Set memory handlers.
26
27 #ifndef DumpXMLParseEvents
28 #define DumpXMLParseEvents 0
29 #endif
30
31 #define FullNameSeparator '@'
32
33 // =================================================================================================
34
35 static void StartNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix, XMP_StringPtr uri );
36 static void EndNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix );
37
38 static void StartElementHandler ( void * userData, XMP_StringPtr name, XMP_StringPtr* attrs );
39 static void EndElementHandler ( void * userData, XMP_StringPtr name );
40
41 static void CharacterDataHandler ( void * userData, XMP_StringPtr cData, int len );
42 static void StartCdataSectionHandler ( void * userData );
43 static void EndCdataSectionHandler ( void * userData );
44
45 static void ProcessingInstructionHandler ( void * userData, XMP_StringPtr target, XMP_StringPtr data );
46 static void CommentHandler ( void * userData, XMP_StringPtr comment );
47
48 #if BanAllEntityUsage
49
50 // For now we do this by banning DOCTYPE entirely. This is easy and consistent with what is
51 // available in recent Java XML parsers. Another, somewhat less drastic, approach would be to
52 // ban all entity declarations. We can't allow declarations and ban references, Expat does not
53 // call the SkippedEntityHandler for references in attribute values.
54
55 // ! Standard entities (&, <, >, ", ', and numeric character references) are
56 // ! not banned. Expat handles them transparently no matter what.
57
58 static void StartDoctypeDeclHandler ( void * userData, XMP_StringPtr doctypeName,
59 XMP_StringPtr sysid, XMP_StringPtr pubid, int has_internal_subset );
60
61 #endif
62
63 // =================================================================================================
64
XMP_NewExpatAdapter()65 extern "C" ExpatAdapter * XMP_NewExpatAdapter()
66 {
67 return new ExpatAdapter;
68 } // XMP_NewExpatAdapter
69
70 // =================================================================================================
71
ExpatAdapter()72 ExpatAdapter::ExpatAdapter() : parser(0)
73 {
74
75 #if XMP_DebugBuild
76 this->elemNesting = 0;
77 #if DumpXMLParseEvents
78 if ( this->parseLog == 0 ) this->parseLog = stdout;
79 #endif
80 #endif
81
82 this->parser = XML_ParserCreateNS ( 0, FullNameSeparator );
83 if ( this->parser == 0 ) XMP_Throw ( "Failure creating Expat parser", kXMPErr_ExternalFailure );
84
85 XML_SetUserData ( this->parser, this );
86
87 XML_SetNamespaceDeclHandler ( this->parser, StartNamespaceDeclHandler, EndNamespaceDeclHandler );
88 XML_SetElementHandler ( this->parser, StartElementHandler, EndElementHandler );
89
90 XML_SetCharacterDataHandler ( this->parser, CharacterDataHandler );
91 XML_SetCdataSectionHandler ( this->parser, StartCdataSectionHandler, EndCdataSectionHandler );
92
93 XML_SetProcessingInstructionHandler ( this->parser, ProcessingInstructionHandler );
94 XML_SetCommentHandler ( this->parser, CommentHandler );
95
96 #if BanAllEntityUsage
97 XML_SetStartDoctypeDeclHandler ( this->parser, StartDoctypeDeclHandler );
98 isAborted = false;
99 #endif
100
101 this->parseStack.push_back ( &this->tree ); // Push the XML root node.
102
103 } // ExpatAdapter::ExpatAdapter
104
105 // =================================================================================================
106
~ExpatAdapter()107 ExpatAdapter::~ExpatAdapter()
108 {
109
110 if ( this->parser != 0 ) XML_ParserFree ( this->parser );
111 this->parser = 0;
112
113 } // ExpatAdapter::~ExpatAdapter
114
115 // =================================================================================================
116
117 #if XMP_DebugBuild
118 static XMP_VarString sExpatMessage;
119 #endif
120
121 static const char * kOneSpace = " ";
122
ParseBuffer(const void * buffer,size_t length,bool last)123 void ExpatAdapter::ParseBuffer ( const void * buffer, size_t length, bool last /* = true */ )
124 {
125 enum XML_Status status;
126
127 if ( length == 0 ) { // Expat does not like empty buffers.
128 if ( ! last ) return;
129 buffer = kOneSpace;
130 length = 1;
131 }
132
133 status = XML_Parse ( this->parser, (const char *)buffer, length, last );
134
135 #if BanAllEntityUsage
136 if ( this->isAborted ) XMP_Throw ( "DOCTYPE is not allowed", kXMPErr_BadXML );
137 #endif
138
139 if ( status != XML_STATUS_OK ) {
140
141 XMP_StringPtr errMsg = "XML parsing failure";
142
143 #if 0 // XMP_DebugBuild // Disable for now to make test output uniform. Restore later with thread safety.
144
145 // *** This is a good candidate for a callback error notification mechanism.
146 // *** This code is not thread safe, the sExpatMessage isn't locked. But that's OK for debug usage.
147
148 enum XML_Error expatErr = XML_GetErrorCode ( this->parser );
149 const char * expatMsg = XML_ErrorString ( expatErr );
150 int errLine = XML_GetCurrentLineNumber ( this->parser );
151
152 char msgBuffer[1000];
153 // AUDIT: Use of sizeof(msgBuffer) for snprintf length is safe.
154 snprintf ( msgBuffer, sizeof(msgBuffer), "# Expat error %d at line %d, \"%s\"", expatErr, errLine, expatMsg );
155 sExpatMessage = msgBuffer;
156 errMsg = sExpatMessage.c_str();
157
158 #if DumpXMLParseEvents
159 if ( this->parseLog != 0 ) fprintf ( this->parseLog, "%s\n", errMsg, expatErr, errLine, expatMsg );
160 #endif
161
162 #endif
163
164 XMP_Throw ( errMsg, kXMPErr_BadXML );
165
166 }
167
168 } // ExpatAdapter::ParseBuffer
169
170 // =================================================================================================
171 // =================================================================================================
172
173 #if XMP_DebugBuild & DumpXMLParseEvents
174
PrintIndent(FILE * file,size_t count)175 static inline void PrintIndent ( FILE * file, size_t count )
176 {
177 for ( ; count > 0; --count ) fprintf ( file, " " );
178 }
179
180 #endif
181
182 // =================================================================================================
183
SetQualName(XMP_StringPtr fullName,XML_Node * node)184 static void SetQualName ( XMP_StringPtr fullName, XML_Node * node )
185 {
186 // Expat delivers the full name as a catenation of namespace URI, separator, and local name.
187
188 // As a compatibility hack, an "about" or "ID" attribute of an rdf:Description element is
189 // changed to "rdf:about" or rdf:ID. Easier done here than in the RDF recognizer.
190
191 // As a bug fix hack, change a URI of "http://purl.org/dc/1.1/" to ""http://purl.org/dc/elements/1.1/.
192 // Early versions of Flash that put XMP in SWF used a bad URI for the dc: namespace.
193
194 // ! This code presumes the RDF namespace prefix is "rdf".
195
196 size_t sepPos = strlen(fullName);
197 for ( --sepPos; sepPos > 0; --sepPos ) {
198 if ( fullName[sepPos] == FullNameSeparator ) break;
199 }
200
201 if ( fullName[sepPos] == FullNameSeparator ) {
202
203 XMP_StringPtr prefix;
204 XMP_StringLen prefixLen;
205 XMP_StringPtr localPart = fullName + sepPos + 1;
206
207 node->ns.assign ( fullName, sepPos );
208 if ( node->ns == "http://purl.org/dc/1.1/" ) node->ns = "http://purl.org/dc/elements/1.1/";
209
210 bool found = XMPMeta::GetNamespacePrefix ( node->ns.c_str(), &prefix, &prefixLen );
211 if ( ! found ) XMP_Throw ( "Unknown URI in Expat full name", kXMPErr_ExternalFailure );
212 node->nsPrefixLen = prefixLen; // ! Includes the ':'.
213
214 node->name = prefix;
215 node->name += localPart;
216
217 } else {
218
219 node->name = fullName; // The name is not in a namespace.
220
221 if ( node->parent->name == "rdf:Description" ) {
222 if ( node->name == "about" ) {
223 node->ns = kXMP_NS_RDF;
224 node->name = "rdf:about";
225 node->nsPrefixLen = 4; // ! Include the ':'.
226 } else if ( node->name == "ID" ) {
227 node->ns = kXMP_NS_RDF;
228 node->name = "rdf:ID";
229 node->nsPrefixLen = 4; // ! Include the ':'.
230 }
231 }
232
233 }
234
235 } // SetQualName
236
237 // =================================================================================================
238
StartNamespaceDeclHandler(void * userData,XMP_StringPtr prefix,XMP_StringPtr uri)239 static void StartNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix, XMP_StringPtr uri )
240 {
241 IgnoreParam(userData);
242
243 // As a bug fix hack, change a URI of "http://purl.org/dc/1.1/" to ""http://purl.org/dc/elements/1.1/.
244 // Early versions of Flash that put XMP in SWF used a bad URI for the dc: namespace.
245
246 #if XMP_DebugBuild & DumpXMLParseEvents // Avoid unused variable warning.
247 ExpatAdapter * thiz = (ExpatAdapter*)userData;
248 #endif
249
250 if ( prefix == 0 ) prefix = "_dflt_"; // Have default namespace.
251 if ( uri == 0 ) return; // Ignore, have xmlns:pre="", no URI to register.
252
253 #if XMP_DebugBuild & DumpXMLParseEvents
254 if ( thiz->parseLog != 0 ) {
255 PrintIndent ( thiz->parseLog, thiz->elemNesting );
256 fprintf ( thiz->parseLog, "StartNamespace: %s - \"%s\"\n", prefix, uri );
257 }
258 #endif
259
260 if ( XMP_LitMatch ( uri, "http://purl.org/dc/1.1/" ) ) uri = "http://purl.org/dc/elements/1.1/";
261 (void) XMPMeta::RegisterNamespace ( uri, prefix, &voidStringPtr, &voidStringLen );
262
263 } // StartNamespaceDeclHandler
264
265 // =================================================================================================
266
EndNamespaceDeclHandler(void * userData,XMP_StringPtr prefix)267 static void EndNamespaceDeclHandler ( void * userData, XMP_StringPtr prefix )
268 {
269 IgnoreParam(userData);
270
271 #if XMP_DebugBuild & DumpXMLParseEvents // Avoid unused variable warning.
272 ExpatAdapter * thiz = (ExpatAdapter*)userData;
273 #endif
274
275 if ( prefix == 0 ) prefix = "_dflt_"; // Have default namespace.
276
277 #if XMP_DebugBuild & DumpXMLParseEvents
278 if ( thiz->parseLog != 0 ) {
279 PrintIndent ( thiz->parseLog, thiz->elemNesting );
280 fprintf ( thiz->parseLog, "EndNamespace: %s\n", prefix );
281 }
282 #endif
283
284 // ! Nothing to do, Expat has done all of the XML processing.
285
286 } // EndNamespaceDeclHandler
287
288 // =================================================================================================
289
StartElementHandler(void * userData,XMP_StringPtr name,XMP_StringPtr * attrs)290 static void StartElementHandler ( void * userData, XMP_StringPtr name, XMP_StringPtr* attrs )
291 {
292 XMP_Assert ( attrs != 0 );
293 ExpatAdapter * thiz = (ExpatAdapter*)userData;
294
295 size_t attrCount = 0;
296 for ( XMP_StringPtr* a = attrs; *a != 0; ++a ) ++attrCount;
297 if ( (attrCount & 1) != 0 ) XMP_Throw ( "Expat attribute info has odd length", kXMPErr_ExternalFailure );
298 attrCount = attrCount/2; // They are name/value pairs.
299
300 #if XMP_DebugBuild & DumpXMLParseEvents
301 if ( thiz->parseLog != 0 ) {
302 PrintIndent ( thiz->parseLog, thiz->elemNesting );
303 fprintf ( thiz->parseLog, "StartElement: %s, %d attrs", name, attrCount );
304 for ( XMP_StringPtr* attr = attrs; *attr != 0; attr += 2 ) {
305 XMP_StringPtr attrName = *attr;
306 XMP_StringPtr attrValue = *(attr+1);
307 fprintf ( thiz->parseLog, ", %s = \"%s\"", attrName, attrValue );
308 }
309 fprintf ( thiz->parseLog, "\n" );
310 }
311 #endif
312
313 XML_Node * parentNode = thiz->parseStack.back();
314 XML_Node * elemNode = new XML_Node ( parentNode, "", kElemNode );
315
316 SetQualName ( name, elemNode );
317
318 for ( XMP_StringPtr* attr = attrs; *attr != 0; attr += 2 ) {
319
320 XMP_StringPtr attrName = *attr;
321 XMP_StringPtr attrValue = *(attr+1);
322 XML_Node * attrNode = new XML_Node ( elemNode, "", kAttrNode );
323
324 SetQualName ( attrName, attrNode );
325 attrNode->value = attrValue;
326 if ( attrNode->name == "xml:lang" ) NormalizeLangValue ( &attrNode->value );
327 elemNode->attrs.push_back ( attrNode );
328
329 }
330
331 parentNode->content.push_back ( elemNode );
332 thiz->parseStack.push_back ( elemNode );
333
334 if ( elemNode->name == "rdf:RDF" ) {
335 thiz->rootNode = elemNode;
336 ++thiz->rootCount;
337 }
338 #if XMP_DebugBuild
339 ++thiz->elemNesting;
340 #endif
341
342 } // StartElementHandler
343
344 // =================================================================================================
345
EndElementHandler(void * userData,XMP_StringPtr name)346 static void EndElementHandler ( void * userData, XMP_StringPtr name )
347 {
348 IgnoreParam(name);
349
350 ExpatAdapter * thiz = (ExpatAdapter*)userData;
351
352 #if XMP_DebugBuild
353 --thiz->elemNesting;
354 #endif
355 (void) thiz->parseStack.pop_back();
356
357 #if XMP_DebugBuild & DumpXMLParseEvents
358 if ( thiz->parseLog != 0 ) {
359 PrintIndent ( thiz->parseLog, thiz->elemNesting );
360 fprintf ( thiz->parseLog, "EndElement: %s\n", name );
361 }
362 #endif
363
364 } // EndElementHandler
365
366 // =================================================================================================
367
CharacterDataHandler(void * userData,XMP_StringPtr cData,int len)368 static void CharacterDataHandler ( void * userData, XMP_StringPtr cData, int len )
369 {
370 ExpatAdapter * thiz = (ExpatAdapter*)userData;
371
372 if ( (cData == 0) || (len == 0) ) { cData = ""; len = 0; }
373
374 #if XMP_DebugBuild & DumpXMLParseEvents
375 if ( thiz->parseLog != 0 ) {
376 PrintIndent ( thiz->parseLog, thiz->elemNesting );
377 fprintf ( thiz->parseLog, "CharContent: \"" );
378 for ( int i = 0; i < len; ++i ) fprintf ( thiz->parseLog, "%c", cData[i] );
379 fprintf ( thiz->parseLog, "\"\n" );
380 }
381 #endif
382
383 XML_Node * parentNode = thiz->parseStack.back();
384 XML_Node * cDataNode = new XML_Node ( parentNode, "", kCDataNode );
385
386 cDataNode->value.assign ( cData, len );
387 parentNode->content.push_back ( cDataNode );
388
389 } // CharacterDataHandler
390
391 // =================================================================================================
392
StartCdataSectionHandler(void * userData)393 static void StartCdataSectionHandler ( void * userData )
394 {
395 IgnoreParam(userData);
396
397 #if XMP_DebugBuild & DumpXMLParseEvents // Avoid unused variable warning.
398 ExpatAdapter * thiz = (ExpatAdapter*)userData;
399 #endif
400
401 #if XMP_DebugBuild & DumpXMLParseEvents
402 if ( thiz->parseLog != 0 ) {
403 PrintIndent ( thiz->parseLog, thiz->elemNesting );
404 fprintf ( thiz->parseLog, "StartCDATA\n" );
405 }
406 #endif
407
408 // *** Since markup isn't recognized inside CDATA, this affects XMP's double escaping.
409
410 } // StartCdataSectionHandler
411
412 // =================================================================================================
413
EndCdataSectionHandler(void * userData)414 static void EndCdataSectionHandler ( void * userData )
415 {
416 IgnoreParam(userData);
417
418 #if XMP_DebugBuild & DumpXMLParseEvents // Avoid unused variable warning.
419 ExpatAdapter * thiz = (ExpatAdapter*)userData;
420 #endif
421
422 #if XMP_DebugBuild & DumpXMLParseEvents
423 if ( thiz->parseLog != 0 ) {
424 PrintIndent ( thiz->parseLog, thiz->elemNesting );
425 fprintf ( thiz->parseLog, "EndCDATA\n" );
426 }
427 #endif
428
429 } // EndCdataSectionHandler
430
431 // =================================================================================================
432
ProcessingInstructionHandler(void * userData,XMP_StringPtr target,XMP_StringPtr data)433 static void ProcessingInstructionHandler ( void * userData, XMP_StringPtr target, XMP_StringPtr data )
434 {
435 XMP_Assert ( target != 0 );
436 ExpatAdapter * thiz = (ExpatAdapter*)userData;
437
438 if ( ! XMP_LitMatch ( target, "xpacket" ) ) return; // Ignore all PIs except the XMP packet wrapper.
439 if ( data == 0 ) data = "";
440
441 #if XMP_DebugBuild & DumpXMLParseEvents
442 if ( thiz->parseLog != 0 ) {
443 PrintIndent ( thiz->parseLog, thiz->elemNesting );
444 fprintf ( thiz->parseLog, "PI: %s - \"%s\"\n", target, data );
445 }
446 #endif
447
448 XML_Node * parentNode = thiz->parseStack.back();
449 XML_Node * piNode = new XML_Node ( parentNode, target, kPINode );
450
451 piNode->value.assign ( data );
452 parentNode->content.push_back ( piNode );
453
454 } // ProcessingInstructionHandler
455
456 // =================================================================================================
457
CommentHandler(void * userData,XMP_StringPtr comment)458 static void CommentHandler ( void * userData, XMP_StringPtr comment )
459 {
460 IgnoreParam(userData);
461
462 #if XMP_DebugBuild & DumpXMLParseEvents // Avoid unused variable warning.
463 ExpatAdapter * thiz = (ExpatAdapter*)userData;
464 #endif
465
466 if ( comment == 0 ) comment = "";
467
468 #if XMP_DebugBuild & DumpXMLParseEvents
469 if ( thiz->parseLog != 0 ) {
470 PrintIndent ( thiz->parseLog, thiz->elemNesting );
471 fprintf ( thiz->parseLog, "Comment: \"%s\"\n", comment );
472 }
473 #endif
474
475 // ! Comments are ignored.
476
477 } // CommentHandler
478
479 // =================================================================================================
480
481 #if BanAllEntityUsage
StartDoctypeDeclHandler(void * userData,XMP_StringPtr doctypeName,XMP_StringPtr sysid,XMP_StringPtr pubid,int has_internal_subset)482 static void StartDoctypeDeclHandler ( void * userData, XMP_StringPtr doctypeName,
483 XMP_StringPtr sysid, XMP_StringPtr pubid, int has_internal_subset )
484 {
485 IgnoreParam(userData);
486
487 ExpatAdapter * thiz = (ExpatAdapter*)userData;
488
489 #if XMP_DebugBuild & DumpXMLParseEvents // Avoid unused variable warning.
490 if ( thiz->parseLog != 0 ) {
491 PrintIndent ( thiz->parseLog, thiz->elemNesting );
492 fprintf ( thiz->parseLog, "DocType: \"%s\"\n", doctypeName );
493 }
494 #endif
495
496 thiz->isAborted = true; // ! Can't throw an exception across the plain C Expat frames.
497 (void) XML_StopParser ( thiz->parser, XML_FALSE /* not resumable */ );
498
499 } // StartDoctypeDeclHandler
500 #endif
501
502 // =================================================================================================
503