1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * raptor_sax2.c - Raptor SAX2 API
4  *
5  * Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/
6  * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
7  *
8  * This package is Free Software and part of Redland http://librdf.org/
9  *
10  * It is licensed under the following three licenses as alternatives:
11  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12  *   2. GNU General Public License (GPL) V2 or any newer version
13  *   3. Apache License, V2.0 or any newer version
14  *
15  * You may not use this file except in compliance with at least one of
16  * the above three licenses.
17  *
18  * See LICENSE.html or LICENSE.txt at the top of this package for the
19  * complete terms and further detail along with the license texts for
20  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21  *
22  *
23  */
24 
25 
26 #ifdef HAVE_CONFIG_H
27 #include <raptor_config.h>
28 #endif
29 
30 #ifdef WIN32
31 #include <win32_raptor_config.h>
32 #endif
33 
34 
35 #include <stdio.h>
36 #include <string.h>
37 #include <ctype.h>
38 #include <stdarg.h>
39 #ifdef HAVE_ERRNO_H
40 #include <errno.h>
41 #endif
42 #ifdef HAVE_STDLIB_H
43 #include <stdlib.h>
44 #endif
45 
46 /* Raptor includes */
47 #include "raptor.h"
48 #include "raptor_internal.h"
49 
50 
51 /* Define this for far too much output */
52 #undef RAPTOR_DEBUG_CDATA
53 
54 
55 int
raptor_sax2_init(raptor_world * world)56 raptor_sax2_init(raptor_world* world)
57 {
58   if(world->sax2_initialized++)
59     return 0;
60 
61 #ifdef RAPTOR_XML_LIBXML
62   xmlInitParser();
63 #endif
64   return 0;
65 }
66 
67 
68 void
raptor_sax2_finish(raptor_world * world)69 raptor_sax2_finish(raptor_world* world)
70 {
71   if(--world->sax2_initialized)
72     return;
73 
74 #ifdef RAPTOR_XML_LIBXML
75   /* Should call this after all uses of libxml are done.
76    * In particular after xmlSetStructuredErrorFunc() otherwise
77    * it has reportedly caused an access violation on windows.
78    */
79   xmlCleanupParser();
80 #endif
81 }
82 
83 
84 /**
85  * raptor_new_sax2:
86  * @user_data: pointer context information to pass to handlers
87  * @error_handlers: error handlers pointer
88  *
89  * Constructor - Create a new SAX2 with error handlers
90  *
91  * Return value: new #raptor_sax2 object or NULL on failure
92  */
93 raptor_sax2*
raptor_new_sax2(void * user_data,raptor_error_handlers * error_handlers)94 raptor_new_sax2(void* user_data, raptor_error_handlers* error_handlers)
95 {
96   raptor_sax2* sax2;
97   sax2=(raptor_sax2*)RAPTOR_CALLOC(raptor_sax2, 1, sizeof(raptor_sax2));
98   if(!sax2)
99     return NULL;
100 
101 #ifdef RAPTOR_XML_LIBXML
102   sax2->magic=RAPTOR_LIBXML_MAGIC;
103 #endif
104 
105   sax2->world=error_handlers->world;
106 
107   sax2->user_data=user_data;
108 
109   sax2->enabled = 1;
110 
111   sax2->locator=error_handlers->locator;
112 
113   sax2->error_handlers=error_handlers;
114 
115 #ifdef RAPTOR_XML_LIBXML
116   if(sax2->world->libxml_flags & RAPTOR_LIBXML_FLAGS_STRUCTURED_ERROR_SAVE) {
117     sax2->saved_structured_error_context = xmlGenericErrorContext;
118     sax2->saved_structured_error_handler = xmlStructuredError;
119     /* sets xmlGenericErrorContext and xmlStructuredError */
120     xmlSetStructuredErrorFunc(&sax2->error_handlers,
121                               (xmlStructuredErrorFunc)raptor_libxml_xmlStructuredErrorFunc);
122   }
123 
124   if(sax2->world->libxml_flags & RAPTOR_LIBXML_FLAGS_GENERIC_ERROR_SAVE) {
125     sax2->saved_generic_error_context = xmlGenericErrorContext;
126     sax2->saved_generic_error_handler = xmlGenericError;
127     /* sets xmlGenericErrorContext and xmlGenericError */
128     xmlSetGenericErrorFunc(&sax2->error_handlers,
129                            (xmlGenericErrorFunc)raptor_libxml_generic_error);
130   }
131 #endif
132 
133   return sax2;
134 }
135 
136 
137 /**
138  * raptor_free_sax2:
139  * @sax2: SAX2 object
140  *
141  * Destructor - destroy a SAX2 object
142  */
143 void
raptor_free_sax2(raptor_sax2 * sax2)144 raptor_free_sax2(raptor_sax2 *sax2)
145 {
146   raptor_xml_element *xml_element;
147 
148   RAPTOR_ASSERT_OBJECT_POINTER_RETURN(sax2, raptor_sax2);
149 
150 #ifdef RAPTOR_XML_EXPAT
151   if(sax2->xp) {
152     XML_ParserFree(sax2->xp);
153     sax2->xp=NULL;
154   }
155 #endif
156 
157 #ifdef RAPTOR_XML_LIBXML
158   if(sax2->xc) {
159     raptor_libxml_free(sax2->xc);
160     sax2->xc=NULL;
161   }
162 
163   if(sax2->world->libxml_flags & RAPTOR_LIBXML_FLAGS_STRUCTURED_ERROR_SAVE)
164     xmlSetStructuredErrorFunc(sax2->saved_structured_error_context,
165                               sax2->saved_structured_error_handler);
166 
167   if(sax2->world->libxml_flags & RAPTOR_LIBXML_FLAGS_GENERIC_ERROR_SAVE)
168     xmlSetGenericErrorFunc(sax2->saved_generic_error_context,
169                            sax2->saved_generic_error_handler);
170 #endif
171 
172   while( (xml_element=raptor_xml_element_pop(sax2)) )
173     raptor_free_xml_element(xml_element);
174 
175   raptor_namespaces_clear(&sax2->namespaces);
176 
177   if(sax2->base_uri)
178     raptor_free_uri_v2(sax2->world, sax2->base_uri);
179 
180   RAPTOR_FREE(raptor_sax2, sax2);
181 }
182 
183 
184 /**
185  * raptor_sax2_set_start_element_handler:
186  * @sax2: SAX2 object
187  * @handler: start element handler
188  *
189  * Set SAX2 start element handler.
190  */
191 void
raptor_sax2_set_start_element_handler(raptor_sax2 * sax2,raptor_sax2_start_element_handler handler)192 raptor_sax2_set_start_element_handler(raptor_sax2* sax2,
193                                       raptor_sax2_start_element_handler handler)
194 {
195   sax2->start_element_handler=handler;
196 }
197 
198 
199 /**
200  * raptor_sax2_set_end_element_handler:
201  * @sax2: SAX2 object
202  * @handler: end element handler
203  *
204  * Set SAX2 end element handler.
205  */
206 void
raptor_sax2_set_end_element_handler(raptor_sax2 * sax2,raptor_sax2_end_element_handler handler)207 raptor_sax2_set_end_element_handler(raptor_sax2* sax2,
208                                     raptor_sax2_end_element_handler handler)
209 {
210   sax2->end_element_handler=handler;
211 }
212 
213 
214 /**
215  * raptor_sax2_set_characters_handler:
216  * @sax2: SAX2 object
217  * @handler: characters handler
218  *
219  * Set SAX2 characters handler.
220  */
221 void
raptor_sax2_set_characters_handler(raptor_sax2 * sax2,raptor_sax2_characters_handler handler)222 raptor_sax2_set_characters_handler(raptor_sax2* sax2,
223                                    raptor_sax2_characters_handler handler)
224 {
225   sax2->characters_handler=handler;
226 }
227 
228 
229 /**
230  * raptor_sax2_set_cdata_handler:
231  * @sax2: SAX2 object
232  * @handler: CDATA handler
233  *
234  * Set SAX2 CDATA handler.
235  */
236 void
raptor_sax2_set_cdata_handler(raptor_sax2 * sax2,raptor_sax2_cdata_handler handler)237 raptor_sax2_set_cdata_handler(raptor_sax2* sax2,
238                               raptor_sax2_cdata_handler handler)
239 {
240   sax2->cdata_handler=handler;
241 }
242 
243 
244 /**
245  * raptor_sax2_set_comment_handler:
246  * @sax2: SAX2 object
247  * @handler: comment handler
248  *
249  * Set SAX2 XML comment handler.
250  */
251 void
raptor_sax2_set_comment_handler(raptor_sax2 * sax2,raptor_sax2_comment_handler handler)252 raptor_sax2_set_comment_handler(raptor_sax2* sax2,
253                                 raptor_sax2_comment_handler handler)
254 {
255   sax2->comment_handler=handler;
256 }
257 
258 
259 /**
260  * raptor_sax2_set_unparsed_entity_decl_handler:
261  * @sax2: SAX2 object
262  * @handler: unparsed entity declaration handler
263  *
264  * Set SAX2 XML unparsed entity declaration handler.
265  */
266 void
raptor_sax2_set_unparsed_entity_decl_handler(raptor_sax2 * sax2,raptor_sax2_unparsed_entity_decl_handler handler)267 raptor_sax2_set_unparsed_entity_decl_handler(raptor_sax2* sax2,
268                                              raptor_sax2_unparsed_entity_decl_handler handler)
269 {
270   sax2->unparsed_entity_decl_handler=handler;
271 }
272 
273 
274 /**
275  * raptor_sax2_set_external_entity_ref_handler:
276  * @sax2: SAX2 object
277  * @handler: entity reference handler
278  *
279  * Set SAX2 XML entity reference handler.
280  */
281 void
raptor_sax2_set_external_entity_ref_handler(raptor_sax2 * sax2,raptor_sax2_external_entity_ref_handler handler)282 raptor_sax2_set_external_entity_ref_handler(raptor_sax2* sax2,
283                                             raptor_sax2_external_entity_ref_handler handler)
284 {
285   sax2->external_entity_ref_handler=handler;
286 }
287 
288 
289 /**
290  * raptor_sax2_set_namespace_handler:
291  * @sax2: #raptor_sax2 object
292  * @handler: new namespace callback function
293  *
294  * Set the XML namespace handler function.
295  *
296  * When a prefix/namespace is seen in an XML parser, call the given
297  * @handler with the prefix string and the #raptor_uri namespace URI.
298  * Either can be NULL for the default prefix or default namespace.
299  *
300  * The handler function does not deal with duplicates so any
301  * namespace may be declared multiple times when a namespace is seen
302  * in different parts of a document.
303  *
304  */
305 void
raptor_sax2_set_namespace_handler(raptor_sax2 * sax2,raptor_namespace_handler handler)306 raptor_sax2_set_namespace_handler(raptor_sax2* sax2,
307                                   raptor_namespace_handler handler)
308 {
309   sax2->namespace_handler=handler;
310 }
311 
312 
313 raptor_xml_element*
raptor_xml_element_pop(raptor_sax2 * sax2)314 raptor_xml_element_pop(raptor_sax2 *sax2)
315 {
316   raptor_xml_element *element=sax2->current_element;
317 
318   if(!element)
319     return NULL;
320 
321   sax2->current_element=element->parent;
322   if(sax2->root_element == element) /* just deleted root */
323     sax2->root_element=NULL;
324 
325   return element;
326 }
327 
328 
329 void
raptor_xml_element_push(raptor_sax2 * sax2,raptor_xml_element * element)330 raptor_xml_element_push(raptor_sax2 *sax2, raptor_xml_element* element)
331 {
332   element->parent=sax2->current_element;
333   sax2->current_element=element;
334   if(!sax2->root_element)
335     sax2->root_element=element;
336 }
337 
338 
339 /**
340  * raptor_xml_element_is_empty:
341  * @xml_element: XML Element
342  *
343  * Check if an XML Element is empty.
344  *
345  * Return value: non-0 if the element is empty.
346  */
347 int
raptor_xml_element_is_empty(raptor_xml_element * xml_element)348 raptor_xml_element_is_empty(raptor_xml_element* xml_element)
349 {
350   return !xml_element->content_cdata_seen &&
351          !xml_element->content_element_seen;
352 }
353 
354 
355 /**
356  * raptor_sax2_inscope_xml_language:
357  * @sax2: SAX2 object
358  *
359  * Get the in-scope XML language
360  *
361  * Return value: the XML language or NULL if none is in scope.
362  */
363 const unsigned char*
raptor_sax2_inscope_xml_language(raptor_sax2 * sax2)364 raptor_sax2_inscope_xml_language(raptor_sax2 *sax2)
365 {
366   raptor_xml_element* xml_element;
367 
368   for(xml_element=sax2->current_element;
369       xml_element;
370       xml_element=xml_element->parent)
371     if(xml_element->xml_language) {
372       if(!*xml_element->xml_language)
373         return NULL;
374       return xml_element->xml_language;
375     }
376 
377 
378   return NULL;
379 }
380 
381 
382 /**
383  * raptor_sax2_inscope_base_uri:
384  * @sax2: SAX2 object
385  *
386  * Get the in-scope base URI
387  *
388  * Return value: the in-scope base URI shared object or NULL if none is in scope.
389  */
390 raptor_uri*
raptor_sax2_inscope_base_uri(raptor_sax2 * sax2)391 raptor_sax2_inscope_base_uri(raptor_sax2 *sax2)
392 {
393   raptor_xml_element *xml_element;
394 
395   for(xml_element=sax2->current_element;
396       xml_element;
397       xml_element=xml_element->parent)
398     if(xml_element->base_uri)
399       return xml_element->base_uri;
400 
401   return sax2->base_uri;
402 }
403 
404 
405 int
raptor_sax2_get_depth(raptor_sax2 * sax2)406 raptor_sax2_get_depth(raptor_sax2 *sax2)
407 {
408   return sax2->depth;
409 }
410 
411 void
raptor_sax2_inc_depth(raptor_sax2 * sax2)412 raptor_sax2_inc_depth(raptor_sax2 *sax2)
413 {
414   sax2->depth++;
415 }
416 
417 void
raptor_sax2_dec_depth(raptor_sax2 * sax2)418 raptor_sax2_dec_depth(raptor_sax2 *sax2)
419 {
420   sax2->depth--;
421 }
422 
423 
424 static void raptor_sax2_simple_error(void* user_data, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3);
425 
426 /*
427  * raptor_sax2_simple_error - Error from a sax2 - Internal
428  *
429  * Matches the raptor_simple_message_handler API but calls
430  * the sax2 error_handler
431  */
432 static void
raptor_sax2_simple_error(void * user_data,const char * message,...)433 raptor_sax2_simple_error(void* user_data, const char *message, ...)
434 {
435   raptor_sax2* sax2=(raptor_sax2*)user_data;
436   va_list arguments;
437 
438   va_start(arguments, message);
439 
440   if(sax2) {
441     raptor_log_level level=RAPTOR_LOG_LEVEL_ERROR;
442     raptor_message_handler_closure* cl;
443     cl=&sax2->error_handlers->handlers[level];
444     raptor_log_error_varargs(sax2->world,
445                              level, cl->handler, cl->user_data,
446                              sax2->locator,
447                              message, arguments);
448   }
449 
450   va_end(arguments);
451 }
452 
453 
454 
455 /**
456  * raptor_sax2_parse_start:
457  * @sax2: sax2 object
458  * @base_uri: base URI
459  *
460  * Start an XML SAX2 parse.
461  */
462 void
raptor_sax2_parse_start(raptor_sax2 * sax2,raptor_uri * base_uri)463 raptor_sax2_parse_start(raptor_sax2* sax2, raptor_uri *base_uri)
464 {
465   sax2->depth=0;
466   sax2->root_element=NULL;
467   sax2->current_element=NULL;
468 
469   if(sax2->base_uri)
470     raptor_free_uri_v2(sax2->world, sax2->base_uri);
471   if(base_uri)
472     sax2->base_uri=raptor_uri_copy_v2(sax2->world, base_uri);
473   else
474     sax2->base_uri=NULL;
475 
476 #ifdef RAPTOR_XML_EXPAT
477   if(sax2->xp) {
478     XML_ParserFree(sax2->xp);
479     sax2->xp=NULL;
480   }
481 
482   raptor_expat_init(sax2, base_uri);
483 #endif
484 
485 #ifdef RAPTOR_XML_LIBXML
486   raptor_libxml_init(sax2, base_uri);
487 
488   xmlSetStructuredErrorFunc(&sax2->error_handlers,
489                             raptor_libxml_xmlStructuredErrorFunc);
490 
491 #if LIBXML_VERSION < 20425
492   sax2->first_read=1;
493 #endif
494 
495   if(sax2->xc) {
496     raptor_libxml_free(sax2->xc);
497     sax2->xc=NULL;
498   }
499 #endif
500 
501   raptor_namespaces_clear(&sax2->namespaces);
502 
503   if(raptor_namespaces_init_v2(sax2->world,
504                                &sax2->namespaces,
505                                (raptor_simple_message_handler)raptor_sax2_simple_error, sax2,
506                                1)) {
507     /* log a fatal error and set sax2 to failed state
508        since the function signature does not currently support returning an error */
509     raptor_log_error_to_handlers(sax2->world,
510                                  sax2->error_handlers,
511                                  RAPTOR_LOG_LEVEL_FATAL, sax2->locator,
512                                  "raptor_namespaces_init_v2() failed");
513     sax2->failed = 1;
514   }
515 }
516 
517 
518 /**
519  * raptor_sax2_parse_chunk:
520  * @sax2: sax2 object
521  * @buffer: input buffer
522  * @len: input buffer lenght
523  * @is_end: non-0 if end of data
524  *
525  * Parse a chunk of XML data generating SAX2 events
526  *
527  * Return value: non-0 on failure
528  */
529 int
raptor_sax2_parse_chunk(raptor_sax2 * sax2,const unsigned char * buffer,size_t len,int is_end)530 raptor_sax2_parse_chunk(raptor_sax2* sax2, const unsigned char *buffer,
531                         size_t len, int is_end)
532 {
533 #ifdef RAPTOR_XML_EXPAT
534   XML_Parser xp=sax2->xp;
535   int rc;
536 #endif
537 #ifdef RAPTOR_XML_LIBXML
538   /* parser context */
539   xmlParserCtxtPtr xc=sax2->xc;
540   int rc;
541 #endif
542 
543 #ifdef RAPTOR_XML_LIBXML
544   if(!xc) {
545     int libxml_options = 0;
546 
547     if(!len) {
548       /* no data given at all - emit a similar message to expat */
549       raptor_sax2_update_document_locator(sax2, sax2->locator);
550       raptor_log_error_to_handlers(sax2->world,
551                                    sax2->error_handlers,
552                                    RAPTOR_LOG_LEVEL_ERROR, sax2->locator,
553                                    "XML Parsing failed - no element found");
554       return 1;
555     }
556 
557     xc = xmlCreatePushParserCtxt(&sax2->sax, sax2, /* user data */
558                                  (char*)buffer, len,
559                                  NULL);
560     if(!xc)
561       goto handle_error;
562 
563 #ifdef RAPTOR_LIBXML_XML_PARSE_NONET
564     if(sax2->feature_no_net)
565       libxml_options |= XML_PARSE_NONET;
566 #endif
567 #ifdef HAVE_XMLCTXTUSEOPTIONS
568     xmlCtxtUseOptions(xc, libxml_options);
569 #endif
570 
571     xc->userData = sax2; /* user data */
572     xc->vctxt.userData = sax2; /* user data */
573     xc->vctxt.error=(xmlValidityErrorFunc)raptor_libxml_validation_error;
574     xc->vctxt.warning=(xmlValidityWarningFunc)raptor_libxml_validation_warning;
575     xc->replaceEntities = 1;
576 
577     sax2->xc = xc;
578 
579     if(is_end)
580       len=0;
581     else
582       return 0;
583   }
584 #endif
585 
586   if(!len) {
587 #ifdef RAPTOR_XML_EXPAT
588     rc=XML_Parse(xp, (char*)buffer, 0, 1);
589     if(!rc) /* expat: 0 is failure */
590       goto handle_error;
591 #endif
592 #ifdef RAPTOR_XML_LIBXML
593     xmlParseChunk(xc, (char*)buffer, 0, 1);
594 #endif
595     return 0;
596   }
597 
598 
599 #ifdef RAPTOR_XML_EXPAT
600   rc=XML_Parse(xp, (char*)buffer, len, is_end);
601   if(!rc) /* expat: 0 is failure */
602     goto handle_error;
603   if(is_end)
604     return 0;
605 #endif
606 
607 #ifdef RAPTOR_XML_LIBXML
608 
609   /* This works around some libxml versions that fail to work
610    * if the buffer size is larger than the entire file
611    * and thus the entire parsing is done in one operation.
612    *
613    * The code below:
614    *   2.4.19 (oldest tested) to 2.4.24 - required
615    *   2.4.25                           - works with or without it
616    *   2.4.26 or later                  - fails with this code
617    */
618 
619 #if LIBXML_VERSION < 20425
620   if(sax2->first_read && is_end) {
621     /* parse all but the last character */
622     rc = xmlParseChunk(xc, (char*)buffer, len-1, 0);
623     if(rc && rc != XML_WAR_UNDECLARED_ENTITY)
624       goto handle_error;
625     /* last character */
626     rc = xmlParseChunk(xc, (char*)buffer + (len-1), 1, 0);
627     if(rc && rc != XML_WAR_UNDECLARED_ENTITY)
628       goto handle_error;
629     /* end */
630     xmlParseChunk(xc, (char*)buffer, 0, 1);
631     return 0;
632   }
633 #endif
634 
635 #if LIBXML_VERSION < 20425
636   sax2->first_read=0;
637 #endif
638 
639   rc = xmlParseChunk(xc, (char*)buffer, len, is_end);
640   if(rc && rc != XML_WAR_UNDECLARED_ENTITY) /* libxml: non 0 is failure */
641     goto handle_error;
642   if(is_end)
643     return 0;
644 #endif
645 
646   return 0;
647 
648 #if defined(RAPTOR_XML_EXPAT) || defined(RAPTOR_XML_LIBXML)
649   handle_error:
650 #endif
651 
652 #ifdef RAPTOR_XML_EXPAT
653 #ifdef EXPAT_UTF8_BOM_CRASH
654   if(sax2->tokens_count) {
655 #endif
656     /* Work around a bug with the expat 1.95.1 shipped with RedHat 7.2
657      * which dies here if the error is before <?xml?...
658      * The expat 1.95.1 source release version works fine.
659      */
660     if(sax2->locator)
661       raptor_sax2_update_document_locator(sax2, sax2->locator);
662 #ifdef EXPAT_UTF8_BOM_CRASH
663   }
664 #endif
665 #endif /* EXPAT */
666 
667 #ifdef RAPTOR_XML_EXPAT
668   if(1) {
669     const char *error_prefix="XML Parsing failed - "; /* 21 chars */
670     #define ERROR_PREFIX_LEN 21
671     const char *error_message=XML_ErrorString(XML_GetErrorCode(xp));
672     size_t error_length;
673     char *error_buffer;
674 
675     error_length=strlen(error_message);
676     error_buffer=(char*)RAPTOR_MALLOC(cstring,
677                                       ERROR_PREFIX_LEN + error_length+1);
678     if(error_buffer) {
679       strncpy(error_buffer, error_prefix, ERROR_PREFIX_LEN);
680       strncpy(error_buffer+ERROR_PREFIX_LEN, error_message, error_length+1);
681 
682       raptor_log_error_to_handlers(sax2->world,
683                                    sax2->error_handlers,
684                                    RAPTOR_LOG_LEVEL_ERROR,
685                                    sax2->locator, error_buffer);
686       RAPTOR_FREE(cstring, error_buffer);
687     } else
688       raptor_log_error_to_handlers(sax2->world,
689                                    sax2->error_handlers,
690                                    RAPTOR_LOG_LEVEL_ERROR,
691                                    sax2->locator, "XML Parsing failed");
692   }
693 #endif
694 
695   return 1;
696 }
697 
698 
699 /**
700  * raptor_sax2_set_feature:
701  * @sax2: #raptor_sax2 SAX2 object
702  * @feature: feature to set from enumerated #raptor_feature values
703  * @value: integer feature value (0 or larger)
704  *
705  * Set various SAX2 features.
706  *
707  * The allowed features are available via raptor_sax2_features_enumerate().
708  *
709  * Return value: non 0 on failure or if the feature is unknown
710  */
711 int
raptor_sax2_set_feature(raptor_sax2 * sax2,raptor_feature feature,int value)712 raptor_sax2_set_feature(raptor_sax2 *sax2, raptor_feature feature, int value)
713 {
714   if(value < 0)
715     return -1;
716 
717   switch(feature) {
718     case RAPTOR_FEATURE_NORMALIZE_LANGUAGE:
719       sax2->feature_normalize_language=value;
720       break;
721 
722     case RAPTOR_FEATURE_NO_NET:
723       sax2->feature_no_net=value;
724       break;
725 
726     case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES:
727       sax2->feature_load_external_entities=value;
728       break;
729 
730     case RAPTOR_FEATURE_SCANNING:
731     case RAPTOR_FEATURE_ASSUME_IS_RDF:
732     case RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES:
733     case RAPTOR_FEATURE_ALLOW_OTHER_PARSETYPES:
734     case RAPTOR_FEATURE_ALLOW_BAGID:
735     case RAPTOR_FEATURE_ALLOW_RDF_TYPE_RDF_LIST:
736     case RAPTOR_FEATURE_NON_NFC_FATAL:
737     case RAPTOR_FEATURE_WARN_OTHER_PARSETYPES:
738     case RAPTOR_FEATURE_CHECK_RDF_ID:
739     case RAPTOR_FEATURE_HTML_TAG_SOUP:
740     case RAPTOR_FEATURE_MICROFORMATS:
741     case RAPTOR_FEATURE_HTML_LINK:
742     case RAPTOR_FEATURE_WWW_TIMEOUT:
743     case RAPTOR_FEATURE_RELATIVE_URIS:
744     case RAPTOR_FEATURE_START_URI:
745     case RAPTOR_FEATURE_WRITER_AUTO_INDENT:
746     case RAPTOR_FEATURE_WRITER_AUTO_EMPTY:
747     case RAPTOR_FEATURE_WRITER_INDENT_WIDTH:
748     case RAPTOR_FEATURE_WRITER_XML_VERSION:
749     case RAPTOR_FEATURE_WRITER_XML_DECLARATION:
750 
751     /* DOT serializer features */
752     case RAPTOR_FEATURE_RESOURCE_BORDER:
753     case RAPTOR_FEATURE_LITERAL_BORDER:
754     case RAPTOR_FEATURE_BNODE_BORDER:
755     case RAPTOR_FEATURE_RESOURCE_FILL:
756     case RAPTOR_FEATURE_LITERAL_FILL:
757     case RAPTOR_FEATURE_BNODE_FILL:
758 
759     /* JSON serializer features */
760     case RAPTOR_FEATURE_JSON_CALLBACK:
761     case RAPTOR_FEATURE_JSON_EXTRA_DATA:
762     case RAPTOR_FEATURE_RSS_TRIPLES:
763     case RAPTOR_FEATURE_ATOM_ENTRY_URI:
764     case RAPTOR_FEATURE_PREFIX_ELEMENTS:
765 
766     /* Turtle serializer feature */
767     case RAPTOR_FEATURE_WRITE_BASE_URI:
768 
769     /* WWW feature */
770     case RAPTOR_FEATURE_WWW_HTTP_CACHE_CONTROL:
771     case RAPTOR_FEATURE_WWW_HTTP_USER_AGENT:
772 
773     default:
774       return -1;
775       break;
776   }
777 
778   return 0;
779 }
780 
781 
782 void
raptor_sax2_update_document_locator(raptor_sax2 * sax2,raptor_locator * locator)783 raptor_sax2_update_document_locator(raptor_sax2* sax2,
784                                     raptor_locator* locator)
785 {
786 #ifdef RAPTOR_XML_EXPAT
787   raptor_expat_update_document_locator(sax2, locator);
788 #endif
789 #ifdef RAPTOR_XML_LIBXML
790   raptor_libxml_update_document_locator(sax2, locator);
791 #endif
792 }
793 
794 
795 /* start of an element */
796 void
raptor_sax2_start_element(void * user_data,const unsigned char * name,const unsigned char ** atts)797 raptor_sax2_start_element(void* user_data, const unsigned char *name,
798                           const unsigned char **atts)
799 {
800   raptor_sax2* sax2=(raptor_sax2*)user_data;
801   raptor_qname* el_name;
802   unsigned char **xml_atts_copy=NULL;
803   size_t xml_atts_size=0;
804   int all_atts_count=0;
805   int ns_attributes_count=0;
806   raptor_qname** named_attrs=NULL;
807   raptor_xml_element* xml_element=NULL;
808   unsigned char *xml_language=NULL;
809   raptor_uri *xml_base=NULL;
810 
811   if(sax2->failed || !sax2->enabled)
812     return;
813 
814 #ifdef RAPTOR_XML_EXPAT
815 #ifdef EXPAT_UTF8_BOM_CRASH
816   sax2->tokens_count++;
817 #endif
818 #endif
819 
820 #ifdef RAPTOR_XML_LIBXML
821   if(atts) {
822     int i;
823 
824     /* Do XML attribute value normalization */
825     for (i = 0; atts[i]; i+=2) {
826       unsigned char *value=(unsigned char*)atts[i+1];
827       unsigned char *src = value;
828       unsigned char *dst = xmlStrdup(value);
829 
830       if(!dst) {
831         raptor_log_error_to_handlers(sax2->world,
832                                      sax2->error_handlers,
833                                      RAPTOR_LOG_LEVEL_FATAL,
834                                      sax2->locator, "Out of memory");
835         return;
836       }
837 
838       atts[i+1]=dst;
839 
840       while (*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09)
841         src++;
842       while (*src) {
843         if (*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09) {
844           while (*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09)
845             src++;
846           if (*src)
847             *dst++ = 0x20;
848         } else {
849           *dst++ = *src++;
850         }
851       }
852       *dst = '\0';
853       xmlFree(value);
854     }
855   }
856 #endif
857 
858   raptor_sax2_inc_depth(sax2);
859 
860   if(atts) {
861     int i;
862 
863     /* Save passed in XML attributes pointers so we can
864      * NULL the pointers when they get handled below (various atts[i]=NULL)
865      */
866     for (i = 0; atts[i]; i++) ;
867     xml_atts_size=sizeof(unsigned char*) * i;
868     if(xml_atts_size) {
869       xml_atts_copy=(unsigned char**)RAPTOR_MALLOC(cstringpointer,xml_atts_size);
870       if(!xml_atts_copy)
871         goto fail;
872       memcpy(xml_atts_copy, atts, xml_atts_size);
873     }
874 
875     /* XML attributes processing:
876      *   xmlns*   - XML namespaces (Namespaces in XML REC)
877      *     Deleted and used to synthesise namespaces declarations
878      *   xml:lang - XML language (XML REC)
879      *     Deleted and optionally normalised to lowercase
880      *   xml:base - XML Base (XML Base REC)
881      *     Deleted and used to set the in-scope base URI for this XML element
882      */
883     for (i = 0; atts[i]; i+= 2) {
884       all_atts_count++;
885 
886       if(strncmp((char*)atts[i], "xml", 3)) {
887         /* count and skip non xml* attributes */
888         ns_attributes_count++;
889         continue;
890       }
891 
892       /* synthesise the XML namespace events */
893       if(!memcmp((const char*)atts[i], "xmlns", 5)) {
894         const unsigned char *prefix=atts[i][5] ? &atts[i][6] : NULL;
895         const unsigned char *namespace_name=atts[i+1];
896 
897         raptor_namespace* nspace;
898         nspace=raptor_new_namespace(&sax2->namespaces,
899                                     prefix, namespace_name,
900                                     raptor_sax2_get_depth(sax2));
901 
902         if(nspace) {
903           raptor_namespaces_start_namespace(&sax2->namespaces, nspace);
904 
905           if(sax2->namespace_handler)
906             (*sax2->namespace_handler)(sax2->user_data, nspace);
907         }
908       } else if(!strcmp((char*)atts[i], "xml:lang")) {
909         xml_language=(unsigned char*)RAPTOR_MALLOC(cstring, strlen((char*)atts[i+1])+1);
910         if(!xml_language) {
911           raptor_log_error_to_handlers(sax2->world,
912                                        sax2->error_handlers,
913                                        RAPTOR_LOG_LEVEL_FATAL,
914                                        sax2->locator, "Out of memory");
915           goto fail;
916         }
917 
918         /* optionally normalize language to lowercase */
919         if(sax2->feature_normalize_language) {
920           unsigned char *from=(unsigned char*)atts[i+1];
921           unsigned char *to=xml_language;
922 
923           while(*from) {
924             if(isupper(*from))
925               *to++ =tolower(*from++);
926             else
927               *to++ =*from++;
928           }
929           *to='\0';
930         } else
931           strcpy((char*)xml_language, (char*)atts[i+1]);
932       } else if(!strcmp((char*)atts[i], "xml:base")) {
933         raptor_uri* base_uri;
934         raptor_uri* xuri;
935         base_uri=raptor_sax2_inscope_base_uri(sax2);
936         xuri=raptor_new_uri_relative_to_base_v2(sax2->world, base_uri, atts[i+1]);
937         xml_base=raptor_new_uri_for_xmlbase_v2(sax2->world, xuri);
938         raptor_free_uri_v2(sax2->world, xuri);
939       }
940 
941       /* delete all xml attributes whether processed above or not */
942       atts[i]=NULL;
943     }
944   }
945 
946 
947   /* Create new element structure */
948   el_name=raptor_new_qname(&sax2->namespaces, name, NULL,
949                            (raptor_simple_message_handler)raptor_sax2_simple_error, sax2);
950   if(!el_name)
951     goto fail;
952 
953   xml_element=raptor_new_xml_element(el_name, xml_language, xml_base);
954   if(!xml_element) {
955     raptor_free_qname(el_name);
956     goto fail;
957   }
958   /* xml_language,xml_base now owned by xml_element */
959   xml_language = NULL;
960   xml_base = NULL;
961 
962   /* Turn string attributes into namespaced-attributes */
963   if(ns_attributes_count) {
964     int i;
965     int offset = 0;
966 
967     /* Allocate new array to hold namespaced-attributes */
968     named_attrs=(raptor_qname**)RAPTOR_CALLOC(raptor_qname_array,
969                                               ns_attributes_count,
970                                               sizeof(raptor_qname*));
971     if(!named_attrs) {
972       raptor_log_error_to_handlers(sax2->world,
973                                    sax2->error_handlers,
974                                    RAPTOR_LOG_LEVEL_FATAL,
975                                    sax2->locator, "Out of memory");
976       goto fail;
977     }
978 
979     for (i = 0; i < all_atts_count; i++) {
980       raptor_qname* attr;
981 
982       /* Skip previously processed attributes */
983       if(!atts[i<<1])
984         continue;
985 
986       /* namespace-name[i] stored in named_attrs[i] */
987       attr=raptor_new_qname(&sax2->namespaces,
988                             atts[i<<1], atts[(i<<1)+1],
989                             (raptor_simple_message_handler)raptor_sax2_simple_error, sax2);
990       if(!attr) { /* failed - tidy up and return */
991         int j;
992 
993         for (j=0; j < i; j++)
994           RAPTOR_FREE(raptor_qname, named_attrs[j]);
995         RAPTOR_FREE(raptor_qname_array, named_attrs);
996         goto fail;
997       }
998 
999       named_attrs[offset++]=attr;
1000     }
1001   } /* end if ns_attributes_count */
1002 
1003 
1004   if(named_attrs)
1005     raptor_xml_element_set_attributes(xml_element,
1006                                       named_attrs, ns_attributes_count);
1007 
1008   raptor_xml_element_push(sax2, xml_element);
1009 
1010   if(sax2->start_element_handler)
1011     sax2->start_element_handler(sax2->user_data, xml_element);
1012 
1013   if(xml_atts_copy) {
1014     /* Restore passed in XML attributes, free the copy */
1015     memcpy((void*)atts, xml_atts_copy, xml_atts_size);
1016     RAPTOR_FREE(cstringpointer, xml_atts_copy);
1017   }
1018 
1019   return;
1020 
1021   fail:
1022   if(xml_atts_copy)
1023     RAPTOR_FREE(cstringpointer, xml_atts_copy);
1024   if(xml_base)
1025     raptor_free_uri_v2(sax2->world, xml_base);
1026   if(xml_language)
1027     RAPTOR_FREE(cstring, xml_language);
1028   if(xml_element)
1029     raptor_free_xml_element(xml_element);
1030 }
1031 
1032 
1033 /* end of an element */
1034 void
raptor_sax2_end_element(void * user_data,const unsigned char * name)1035 raptor_sax2_end_element(void* user_data, const unsigned char *name)
1036 {
1037   raptor_sax2* sax2=(raptor_sax2*)user_data;
1038   raptor_xml_element* xml_element;
1039 
1040   if(sax2->failed || !sax2->enabled)
1041     return;
1042 
1043 #ifdef RAPTOR_XML_EXPAT
1044 #ifdef EXPAT_UTF8_BOM_CRASH
1045   sax2->tokens_count++;
1046 #endif
1047 #endif
1048 
1049   xml_element=sax2->current_element;
1050   if(xml_element) {
1051 #ifdef RAPTOR_DEBUG_VERBOSE
1052     fprintf(stderr, "\nraptor_rdfxml_end_element_handler: End ns-element: ");
1053     raptor_qname_print(stderr, xml_element->name);
1054     fputc('\n', stderr);
1055 #endif
1056 
1057     if(sax2->end_element_handler)
1058       sax2->end_element_handler(sax2->user_data, xml_element);
1059   }
1060 
1061   raptor_namespaces_end_for_depth(&sax2->namespaces,
1062                                   raptor_sax2_get_depth(sax2));
1063   xml_element=raptor_xml_element_pop(sax2);
1064   if(xml_element)
1065     raptor_free_xml_element(xml_element);
1066 
1067   raptor_sax2_dec_depth(sax2);
1068 }
1069 
1070 
1071 
1072 
1073 /* characters */
1074 void
raptor_sax2_characters(void * user_data,const unsigned char * s,int len)1075 raptor_sax2_characters(void* user_data, const unsigned char *s, int len)
1076 {
1077   raptor_sax2* sax2=(raptor_sax2*)user_data;
1078 
1079   if(sax2->failed || !sax2->enabled)
1080     return;
1081 
1082   if(sax2->characters_handler)
1083     sax2->characters_handler(sax2->user_data, sax2->current_element, s, len);
1084 }
1085 
1086 
1087 /* like <![CDATA[...]> */
1088 void
raptor_sax2_cdata(void * user_data,const unsigned char * s,int len)1089 raptor_sax2_cdata(void* user_data, const unsigned char *s, int len)
1090 {
1091   raptor_sax2* sax2=(raptor_sax2*)user_data;
1092 #ifdef RAPTOR_XML_EXPAT
1093 #ifdef EXPAT_UTF8_BOM_CRASH
1094   sax2->tokens_count++;
1095 #endif
1096 #endif
1097 
1098   if(sax2->failed || !sax2->enabled)
1099     return;
1100 
1101   if(sax2->cdata_handler)
1102     sax2->cdata_handler(sax2->user_data, sax2->current_element, s, len);
1103 }
1104 
1105 
1106 /* comment */
1107 void
raptor_sax2_comment(void * user_data,const unsigned char * s)1108 raptor_sax2_comment(void* user_data, const unsigned char *s)
1109 {
1110   raptor_sax2* sax2=(raptor_sax2*)user_data;
1111 
1112   if(sax2->failed || !sax2->enabled)
1113     return;
1114 
1115   if(sax2->comment_handler)
1116     sax2->comment_handler(sax2->user_data, sax2->current_element, s);
1117 }
1118 
1119 
1120 /* unparsed (NDATA) entity */
1121 void
raptor_sax2_unparsed_entity_decl(void * user_data,const unsigned char * entityName,const unsigned char * base,const unsigned char * systemId,const unsigned char * publicId,const unsigned char * notationName)1122 raptor_sax2_unparsed_entity_decl(void* user_data,
1123                                  const unsigned char* entityName,
1124                                  const unsigned char* base,
1125                                  const unsigned char* systemId,
1126                                  const unsigned char* publicId,
1127                                  const unsigned char* notationName)
1128 {
1129   raptor_sax2* sax2=(raptor_sax2*)user_data;
1130 
1131   if(sax2->failed || !sax2->enabled)
1132     return;
1133 
1134   if(sax2->unparsed_entity_decl_handler)
1135     sax2->unparsed_entity_decl_handler(sax2->user_data,
1136                                        entityName, base, systemId,
1137                                        publicId, notationName);
1138 }
1139 
1140 
1141 /* external entity reference */
1142 int
raptor_sax2_external_entity_ref(void * user_data,const unsigned char * context,const unsigned char * base,const unsigned char * systemId,const unsigned char * publicId)1143 raptor_sax2_external_entity_ref(void* user_data,
1144                                 const unsigned char* context,
1145                                 const unsigned char* base,
1146                                 const unsigned char* systemId,
1147                                 const unsigned char* publicId)
1148 {
1149   raptor_sax2* sax2=(raptor_sax2*)user_data;
1150 
1151   if(sax2->failed || !sax2->enabled)
1152     return 0;
1153 
1154   if(sax2->external_entity_ref_handler)
1155     return sax2->external_entity_ref_handler(sax2->user_data,
1156                                              context, base, systemId, publicId);
1157 
1158   raptor_sax2_simple_error((void*)sax2,
1159                            "Failed to handle external entity reference with base %s systemId %s publicId %s",
1160                            (base ?  (const char*)base : "(None)"),
1161                            systemId,
1162                            (publicId ?  (const char*)publicId: "(None)"));
1163 
1164   /* Failed to handle external entity reference */
1165   return 0;
1166 }
1167