1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * raptor_sax2.c - Raptor SAX2 API
4 *
5 * Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/
6 * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
7 *
8 * This package is Free Software and part of Redland http://librdf.org/
9 *
10 * It is licensed under the following three licenses as alternatives:
11 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12 * 2. GNU General Public License (GPL) V2 or any newer version
13 * 3. Apache License, V2.0 or any newer version
14 *
15 * You may not use this file except in compliance with at least one of
16 * the above three licenses.
17 *
18 * See LICENSE.html or LICENSE.txt at the top of this package for the
19 * complete terms and further detail along with the license texts for
20 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21 *
22 *
23 */
24
25
26 #ifdef HAVE_CONFIG_H
27 #include <raptor_config.h>
28 #endif
29
30 #ifdef WIN32
31 #include <win32_raptor_config.h>
32 #endif
33
34
35 #include <stdio.h>
36 #include <string.h>
37 #include <ctype.h>
38 #include <stdarg.h>
39 #ifdef HAVE_ERRNO_H
40 #include <errno.h>
41 #endif
42 #ifdef HAVE_STDLIB_H
43 #include <stdlib.h>
44 #endif
45
46 /* Raptor includes */
47 #include "raptor.h"
48 #include "raptor_internal.h"
49
50
51 /* Define this for far too much output */
52 #undef RAPTOR_DEBUG_CDATA
53
54
55 int
raptor_sax2_init(raptor_world * world)56 raptor_sax2_init(raptor_world* world)
57 {
58 if(world->sax2_initialized++)
59 return 0;
60
61 #ifdef RAPTOR_XML_LIBXML
62 xmlInitParser();
63 #endif
64 return 0;
65 }
66
67
68 void
raptor_sax2_finish(raptor_world * world)69 raptor_sax2_finish(raptor_world* world)
70 {
71 if(--world->sax2_initialized)
72 return;
73
74 #ifdef RAPTOR_XML_LIBXML
75 /* Should call this after all uses of libxml are done.
76 * In particular after xmlSetStructuredErrorFunc() otherwise
77 * it has reportedly caused an access violation on windows.
78 */
79 xmlCleanupParser();
80 #endif
81 }
82
83
84 /**
85 * raptor_new_sax2:
86 * @user_data: pointer context information to pass to handlers
87 * @error_handlers: error handlers pointer
88 *
89 * Constructor - Create a new SAX2 with error handlers
90 *
91 * Return value: new #raptor_sax2 object or NULL on failure
92 */
93 raptor_sax2*
raptor_new_sax2(void * user_data,raptor_error_handlers * error_handlers)94 raptor_new_sax2(void* user_data, raptor_error_handlers* error_handlers)
95 {
96 raptor_sax2* sax2;
97 sax2=(raptor_sax2*)RAPTOR_CALLOC(raptor_sax2, 1, sizeof(raptor_sax2));
98 if(!sax2)
99 return NULL;
100
101 #ifdef RAPTOR_XML_LIBXML
102 sax2->magic=RAPTOR_LIBXML_MAGIC;
103 #endif
104
105 sax2->world=error_handlers->world;
106
107 sax2->user_data=user_data;
108
109 sax2->enabled = 1;
110
111 sax2->locator=error_handlers->locator;
112
113 sax2->error_handlers=error_handlers;
114
115 #ifdef RAPTOR_XML_LIBXML
116 if(sax2->world->libxml_flags & RAPTOR_LIBXML_FLAGS_STRUCTURED_ERROR_SAVE) {
117 sax2->saved_structured_error_context = xmlGenericErrorContext;
118 sax2->saved_structured_error_handler = xmlStructuredError;
119 /* sets xmlGenericErrorContext and xmlStructuredError */
120 xmlSetStructuredErrorFunc(&sax2->error_handlers,
121 (xmlStructuredErrorFunc)raptor_libxml_xmlStructuredErrorFunc);
122 }
123
124 if(sax2->world->libxml_flags & RAPTOR_LIBXML_FLAGS_GENERIC_ERROR_SAVE) {
125 sax2->saved_generic_error_context = xmlGenericErrorContext;
126 sax2->saved_generic_error_handler = xmlGenericError;
127 /* sets xmlGenericErrorContext and xmlGenericError */
128 xmlSetGenericErrorFunc(&sax2->error_handlers,
129 (xmlGenericErrorFunc)raptor_libxml_generic_error);
130 }
131 #endif
132
133 return sax2;
134 }
135
136
137 /**
138 * raptor_free_sax2:
139 * @sax2: SAX2 object
140 *
141 * Destructor - destroy a SAX2 object
142 */
143 void
raptor_free_sax2(raptor_sax2 * sax2)144 raptor_free_sax2(raptor_sax2 *sax2)
145 {
146 raptor_xml_element *xml_element;
147
148 RAPTOR_ASSERT_OBJECT_POINTER_RETURN(sax2, raptor_sax2);
149
150 #ifdef RAPTOR_XML_EXPAT
151 if(sax2->xp) {
152 XML_ParserFree(sax2->xp);
153 sax2->xp=NULL;
154 }
155 #endif
156
157 #ifdef RAPTOR_XML_LIBXML
158 if(sax2->xc) {
159 raptor_libxml_free(sax2->xc);
160 sax2->xc=NULL;
161 }
162
163 if(sax2->world->libxml_flags & RAPTOR_LIBXML_FLAGS_STRUCTURED_ERROR_SAVE)
164 xmlSetStructuredErrorFunc(sax2->saved_structured_error_context,
165 sax2->saved_structured_error_handler);
166
167 if(sax2->world->libxml_flags & RAPTOR_LIBXML_FLAGS_GENERIC_ERROR_SAVE)
168 xmlSetGenericErrorFunc(sax2->saved_generic_error_context,
169 sax2->saved_generic_error_handler);
170 #endif
171
172 while( (xml_element=raptor_xml_element_pop(sax2)) )
173 raptor_free_xml_element(xml_element);
174
175 raptor_namespaces_clear(&sax2->namespaces);
176
177 if(sax2->base_uri)
178 raptor_free_uri_v2(sax2->world, sax2->base_uri);
179
180 RAPTOR_FREE(raptor_sax2, sax2);
181 }
182
183
184 /**
185 * raptor_sax2_set_start_element_handler:
186 * @sax2: SAX2 object
187 * @handler: start element handler
188 *
189 * Set SAX2 start element handler.
190 */
191 void
raptor_sax2_set_start_element_handler(raptor_sax2 * sax2,raptor_sax2_start_element_handler handler)192 raptor_sax2_set_start_element_handler(raptor_sax2* sax2,
193 raptor_sax2_start_element_handler handler)
194 {
195 sax2->start_element_handler=handler;
196 }
197
198
199 /**
200 * raptor_sax2_set_end_element_handler:
201 * @sax2: SAX2 object
202 * @handler: end element handler
203 *
204 * Set SAX2 end element handler.
205 */
206 void
raptor_sax2_set_end_element_handler(raptor_sax2 * sax2,raptor_sax2_end_element_handler handler)207 raptor_sax2_set_end_element_handler(raptor_sax2* sax2,
208 raptor_sax2_end_element_handler handler)
209 {
210 sax2->end_element_handler=handler;
211 }
212
213
214 /**
215 * raptor_sax2_set_characters_handler:
216 * @sax2: SAX2 object
217 * @handler: characters handler
218 *
219 * Set SAX2 characters handler.
220 */
221 void
raptor_sax2_set_characters_handler(raptor_sax2 * sax2,raptor_sax2_characters_handler handler)222 raptor_sax2_set_characters_handler(raptor_sax2* sax2,
223 raptor_sax2_characters_handler handler)
224 {
225 sax2->characters_handler=handler;
226 }
227
228
229 /**
230 * raptor_sax2_set_cdata_handler:
231 * @sax2: SAX2 object
232 * @handler: CDATA handler
233 *
234 * Set SAX2 CDATA handler.
235 */
236 void
raptor_sax2_set_cdata_handler(raptor_sax2 * sax2,raptor_sax2_cdata_handler handler)237 raptor_sax2_set_cdata_handler(raptor_sax2* sax2,
238 raptor_sax2_cdata_handler handler)
239 {
240 sax2->cdata_handler=handler;
241 }
242
243
244 /**
245 * raptor_sax2_set_comment_handler:
246 * @sax2: SAX2 object
247 * @handler: comment handler
248 *
249 * Set SAX2 XML comment handler.
250 */
251 void
raptor_sax2_set_comment_handler(raptor_sax2 * sax2,raptor_sax2_comment_handler handler)252 raptor_sax2_set_comment_handler(raptor_sax2* sax2,
253 raptor_sax2_comment_handler handler)
254 {
255 sax2->comment_handler=handler;
256 }
257
258
259 /**
260 * raptor_sax2_set_unparsed_entity_decl_handler:
261 * @sax2: SAX2 object
262 * @handler: unparsed entity declaration handler
263 *
264 * Set SAX2 XML unparsed entity declaration handler.
265 */
266 void
raptor_sax2_set_unparsed_entity_decl_handler(raptor_sax2 * sax2,raptor_sax2_unparsed_entity_decl_handler handler)267 raptor_sax2_set_unparsed_entity_decl_handler(raptor_sax2* sax2,
268 raptor_sax2_unparsed_entity_decl_handler handler)
269 {
270 sax2->unparsed_entity_decl_handler=handler;
271 }
272
273
274 /**
275 * raptor_sax2_set_external_entity_ref_handler:
276 * @sax2: SAX2 object
277 * @handler: entity reference handler
278 *
279 * Set SAX2 XML entity reference handler.
280 */
281 void
raptor_sax2_set_external_entity_ref_handler(raptor_sax2 * sax2,raptor_sax2_external_entity_ref_handler handler)282 raptor_sax2_set_external_entity_ref_handler(raptor_sax2* sax2,
283 raptor_sax2_external_entity_ref_handler handler)
284 {
285 sax2->external_entity_ref_handler=handler;
286 }
287
288
289 /**
290 * raptor_sax2_set_namespace_handler:
291 * @sax2: #raptor_sax2 object
292 * @handler: new namespace callback function
293 *
294 * Set the XML namespace handler function.
295 *
296 * When a prefix/namespace is seen in an XML parser, call the given
297 * @handler with the prefix string and the #raptor_uri namespace URI.
298 * Either can be NULL for the default prefix or default namespace.
299 *
300 * The handler function does not deal with duplicates so any
301 * namespace may be declared multiple times when a namespace is seen
302 * in different parts of a document.
303 *
304 */
305 void
raptor_sax2_set_namespace_handler(raptor_sax2 * sax2,raptor_namespace_handler handler)306 raptor_sax2_set_namespace_handler(raptor_sax2* sax2,
307 raptor_namespace_handler handler)
308 {
309 sax2->namespace_handler=handler;
310 }
311
312
313 raptor_xml_element*
raptor_xml_element_pop(raptor_sax2 * sax2)314 raptor_xml_element_pop(raptor_sax2 *sax2)
315 {
316 raptor_xml_element *element=sax2->current_element;
317
318 if(!element)
319 return NULL;
320
321 sax2->current_element=element->parent;
322 if(sax2->root_element == element) /* just deleted root */
323 sax2->root_element=NULL;
324
325 return element;
326 }
327
328
329 void
raptor_xml_element_push(raptor_sax2 * sax2,raptor_xml_element * element)330 raptor_xml_element_push(raptor_sax2 *sax2, raptor_xml_element* element)
331 {
332 element->parent=sax2->current_element;
333 sax2->current_element=element;
334 if(!sax2->root_element)
335 sax2->root_element=element;
336 }
337
338
339 /**
340 * raptor_xml_element_is_empty:
341 * @xml_element: XML Element
342 *
343 * Check if an XML Element is empty.
344 *
345 * Return value: non-0 if the element is empty.
346 */
347 int
raptor_xml_element_is_empty(raptor_xml_element * xml_element)348 raptor_xml_element_is_empty(raptor_xml_element* xml_element)
349 {
350 return !xml_element->content_cdata_seen &&
351 !xml_element->content_element_seen;
352 }
353
354
355 /**
356 * raptor_sax2_inscope_xml_language:
357 * @sax2: SAX2 object
358 *
359 * Get the in-scope XML language
360 *
361 * Return value: the XML language or NULL if none is in scope.
362 */
363 const unsigned char*
raptor_sax2_inscope_xml_language(raptor_sax2 * sax2)364 raptor_sax2_inscope_xml_language(raptor_sax2 *sax2)
365 {
366 raptor_xml_element* xml_element;
367
368 for(xml_element=sax2->current_element;
369 xml_element;
370 xml_element=xml_element->parent)
371 if(xml_element->xml_language) {
372 if(!*xml_element->xml_language)
373 return NULL;
374 return xml_element->xml_language;
375 }
376
377
378 return NULL;
379 }
380
381
382 /**
383 * raptor_sax2_inscope_base_uri:
384 * @sax2: SAX2 object
385 *
386 * Get the in-scope base URI
387 *
388 * Return value: the in-scope base URI shared object or NULL if none is in scope.
389 */
390 raptor_uri*
raptor_sax2_inscope_base_uri(raptor_sax2 * sax2)391 raptor_sax2_inscope_base_uri(raptor_sax2 *sax2)
392 {
393 raptor_xml_element *xml_element;
394
395 for(xml_element=sax2->current_element;
396 xml_element;
397 xml_element=xml_element->parent)
398 if(xml_element->base_uri)
399 return xml_element->base_uri;
400
401 return sax2->base_uri;
402 }
403
404
405 int
raptor_sax2_get_depth(raptor_sax2 * sax2)406 raptor_sax2_get_depth(raptor_sax2 *sax2)
407 {
408 return sax2->depth;
409 }
410
411 void
raptor_sax2_inc_depth(raptor_sax2 * sax2)412 raptor_sax2_inc_depth(raptor_sax2 *sax2)
413 {
414 sax2->depth++;
415 }
416
417 void
raptor_sax2_dec_depth(raptor_sax2 * sax2)418 raptor_sax2_dec_depth(raptor_sax2 *sax2)
419 {
420 sax2->depth--;
421 }
422
423
424 static void raptor_sax2_simple_error(void* user_data, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3);
425
426 /*
427 * raptor_sax2_simple_error - Error from a sax2 - Internal
428 *
429 * Matches the raptor_simple_message_handler API but calls
430 * the sax2 error_handler
431 */
432 static void
raptor_sax2_simple_error(void * user_data,const char * message,...)433 raptor_sax2_simple_error(void* user_data, const char *message, ...)
434 {
435 raptor_sax2* sax2=(raptor_sax2*)user_data;
436 va_list arguments;
437
438 va_start(arguments, message);
439
440 if(sax2) {
441 raptor_log_level level=RAPTOR_LOG_LEVEL_ERROR;
442 raptor_message_handler_closure* cl;
443 cl=&sax2->error_handlers->handlers[level];
444 raptor_log_error_varargs(sax2->world,
445 level, cl->handler, cl->user_data,
446 sax2->locator,
447 message, arguments);
448 }
449
450 va_end(arguments);
451 }
452
453
454
455 /**
456 * raptor_sax2_parse_start:
457 * @sax2: sax2 object
458 * @base_uri: base URI
459 *
460 * Start an XML SAX2 parse.
461 */
462 void
raptor_sax2_parse_start(raptor_sax2 * sax2,raptor_uri * base_uri)463 raptor_sax2_parse_start(raptor_sax2* sax2, raptor_uri *base_uri)
464 {
465 sax2->depth=0;
466 sax2->root_element=NULL;
467 sax2->current_element=NULL;
468
469 if(sax2->base_uri)
470 raptor_free_uri_v2(sax2->world, sax2->base_uri);
471 if(base_uri)
472 sax2->base_uri=raptor_uri_copy_v2(sax2->world, base_uri);
473 else
474 sax2->base_uri=NULL;
475
476 #ifdef RAPTOR_XML_EXPAT
477 if(sax2->xp) {
478 XML_ParserFree(sax2->xp);
479 sax2->xp=NULL;
480 }
481
482 raptor_expat_init(sax2, base_uri);
483 #endif
484
485 #ifdef RAPTOR_XML_LIBXML
486 raptor_libxml_init(sax2, base_uri);
487
488 xmlSetStructuredErrorFunc(&sax2->error_handlers,
489 raptor_libxml_xmlStructuredErrorFunc);
490
491 #if LIBXML_VERSION < 20425
492 sax2->first_read=1;
493 #endif
494
495 if(sax2->xc) {
496 raptor_libxml_free(sax2->xc);
497 sax2->xc=NULL;
498 }
499 #endif
500
501 raptor_namespaces_clear(&sax2->namespaces);
502
503 if(raptor_namespaces_init_v2(sax2->world,
504 &sax2->namespaces,
505 (raptor_simple_message_handler)raptor_sax2_simple_error, sax2,
506 1)) {
507 /* log a fatal error and set sax2 to failed state
508 since the function signature does not currently support returning an error */
509 raptor_log_error_to_handlers(sax2->world,
510 sax2->error_handlers,
511 RAPTOR_LOG_LEVEL_FATAL, sax2->locator,
512 "raptor_namespaces_init_v2() failed");
513 sax2->failed = 1;
514 }
515 }
516
517
518 /**
519 * raptor_sax2_parse_chunk:
520 * @sax2: sax2 object
521 * @buffer: input buffer
522 * @len: input buffer lenght
523 * @is_end: non-0 if end of data
524 *
525 * Parse a chunk of XML data generating SAX2 events
526 *
527 * Return value: non-0 on failure
528 */
529 int
raptor_sax2_parse_chunk(raptor_sax2 * sax2,const unsigned char * buffer,size_t len,int is_end)530 raptor_sax2_parse_chunk(raptor_sax2* sax2, const unsigned char *buffer,
531 size_t len, int is_end)
532 {
533 #ifdef RAPTOR_XML_EXPAT
534 XML_Parser xp=sax2->xp;
535 int rc;
536 #endif
537 #ifdef RAPTOR_XML_LIBXML
538 /* parser context */
539 xmlParserCtxtPtr xc=sax2->xc;
540 int rc;
541 #endif
542
543 #ifdef RAPTOR_XML_LIBXML
544 if(!xc) {
545 int libxml_options = 0;
546
547 if(!len) {
548 /* no data given at all - emit a similar message to expat */
549 raptor_sax2_update_document_locator(sax2, sax2->locator);
550 raptor_log_error_to_handlers(sax2->world,
551 sax2->error_handlers,
552 RAPTOR_LOG_LEVEL_ERROR, sax2->locator,
553 "XML Parsing failed - no element found");
554 return 1;
555 }
556
557 xc = xmlCreatePushParserCtxt(&sax2->sax, sax2, /* user data */
558 (char*)buffer, len,
559 NULL);
560 if(!xc)
561 goto handle_error;
562
563 #ifdef RAPTOR_LIBXML_XML_PARSE_NONET
564 if(sax2->feature_no_net)
565 libxml_options |= XML_PARSE_NONET;
566 #endif
567 #ifdef HAVE_XMLCTXTUSEOPTIONS
568 xmlCtxtUseOptions(xc, libxml_options);
569 #endif
570
571 xc->userData = sax2; /* user data */
572 xc->vctxt.userData = sax2; /* user data */
573 xc->vctxt.error=(xmlValidityErrorFunc)raptor_libxml_validation_error;
574 xc->vctxt.warning=(xmlValidityWarningFunc)raptor_libxml_validation_warning;
575 xc->replaceEntities = 1;
576
577 sax2->xc = xc;
578
579 if(is_end)
580 len=0;
581 else
582 return 0;
583 }
584 #endif
585
586 if(!len) {
587 #ifdef RAPTOR_XML_EXPAT
588 rc=XML_Parse(xp, (char*)buffer, 0, 1);
589 if(!rc) /* expat: 0 is failure */
590 goto handle_error;
591 #endif
592 #ifdef RAPTOR_XML_LIBXML
593 xmlParseChunk(xc, (char*)buffer, 0, 1);
594 #endif
595 return 0;
596 }
597
598
599 #ifdef RAPTOR_XML_EXPAT
600 rc=XML_Parse(xp, (char*)buffer, len, is_end);
601 if(!rc) /* expat: 0 is failure */
602 goto handle_error;
603 if(is_end)
604 return 0;
605 #endif
606
607 #ifdef RAPTOR_XML_LIBXML
608
609 /* This works around some libxml versions that fail to work
610 * if the buffer size is larger than the entire file
611 * and thus the entire parsing is done in one operation.
612 *
613 * The code below:
614 * 2.4.19 (oldest tested) to 2.4.24 - required
615 * 2.4.25 - works with or without it
616 * 2.4.26 or later - fails with this code
617 */
618
619 #if LIBXML_VERSION < 20425
620 if(sax2->first_read && is_end) {
621 /* parse all but the last character */
622 rc = xmlParseChunk(xc, (char*)buffer, len-1, 0);
623 if(rc && rc != XML_WAR_UNDECLARED_ENTITY)
624 goto handle_error;
625 /* last character */
626 rc = xmlParseChunk(xc, (char*)buffer + (len-1), 1, 0);
627 if(rc && rc != XML_WAR_UNDECLARED_ENTITY)
628 goto handle_error;
629 /* end */
630 xmlParseChunk(xc, (char*)buffer, 0, 1);
631 return 0;
632 }
633 #endif
634
635 #if LIBXML_VERSION < 20425
636 sax2->first_read=0;
637 #endif
638
639 rc = xmlParseChunk(xc, (char*)buffer, len, is_end);
640 if(rc && rc != XML_WAR_UNDECLARED_ENTITY) /* libxml: non 0 is failure */
641 goto handle_error;
642 if(is_end)
643 return 0;
644 #endif
645
646 return 0;
647
648 #if defined(RAPTOR_XML_EXPAT) || defined(RAPTOR_XML_LIBXML)
649 handle_error:
650 #endif
651
652 #ifdef RAPTOR_XML_EXPAT
653 #ifdef EXPAT_UTF8_BOM_CRASH
654 if(sax2->tokens_count) {
655 #endif
656 /* Work around a bug with the expat 1.95.1 shipped with RedHat 7.2
657 * which dies here if the error is before <?xml?...
658 * The expat 1.95.1 source release version works fine.
659 */
660 if(sax2->locator)
661 raptor_sax2_update_document_locator(sax2, sax2->locator);
662 #ifdef EXPAT_UTF8_BOM_CRASH
663 }
664 #endif
665 #endif /* EXPAT */
666
667 #ifdef RAPTOR_XML_EXPAT
668 if(1) {
669 const char *error_prefix="XML Parsing failed - "; /* 21 chars */
670 #define ERROR_PREFIX_LEN 21
671 const char *error_message=XML_ErrorString(XML_GetErrorCode(xp));
672 size_t error_length;
673 char *error_buffer;
674
675 error_length=strlen(error_message);
676 error_buffer=(char*)RAPTOR_MALLOC(cstring,
677 ERROR_PREFIX_LEN + error_length+1);
678 if(error_buffer) {
679 strncpy(error_buffer, error_prefix, ERROR_PREFIX_LEN);
680 strncpy(error_buffer+ERROR_PREFIX_LEN, error_message, error_length+1);
681
682 raptor_log_error_to_handlers(sax2->world,
683 sax2->error_handlers,
684 RAPTOR_LOG_LEVEL_ERROR,
685 sax2->locator, error_buffer);
686 RAPTOR_FREE(cstring, error_buffer);
687 } else
688 raptor_log_error_to_handlers(sax2->world,
689 sax2->error_handlers,
690 RAPTOR_LOG_LEVEL_ERROR,
691 sax2->locator, "XML Parsing failed");
692 }
693 #endif
694
695 return 1;
696 }
697
698
699 /**
700 * raptor_sax2_set_feature:
701 * @sax2: #raptor_sax2 SAX2 object
702 * @feature: feature to set from enumerated #raptor_feature values
703 * @value: integer feature value (0 or larger)
704 *
705 * Set various SAX2 features.
706 *
707 * The allowed features are available via raptor_sax2_features_enumerate().
708 *
709 * Return value: non 0 on failure or if the feature is unknown
710 */
711 int
raptor_sax2_set_feature(raptor_sax2 * sax2,raptor_feature feature,int value)712 raptor_sax2_set_feature(raptor_sax2 *sax2, raptor_feature feature, int value)
713 {
714 if(value < 0)
715 return -1;
716
717 switch(feature) {
718 case RAPTOR_FEATURE_NORMALIZE_LANGUAGE:
719 sax2->feature_normalize_language=value;
720 break;
721
722 case RAPTOR_FEATURE_NO_NET:
723 sax2->feature_no_net=value;
724 break;
725
726 case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES:
727 sax2->feature_load_external_entities=value;
728 break;
729
730 case RAPTOR_FEATURE_SCANNING:
731 case RAPTOR_FEATURE_ASSUME_IS_RDF:
732 case RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES:
733 case RAPTOR_FEATURE_ALLOW_OTHER_PARSETYPES:
734 case RAPTOR_FEATURE_ALLOW_BAGID:
735 case RAPTOR_FEATURE_ALLOW_RDF_TYPE_RDF_LIST:
736 case RAPTOR_FEATURE_NON_NFC_FATAL:
737 case RAPTOR_FEATURE_WARN_OTHER_PARSETYPES:
738 case RAPTOR_FEATURE_CHECK_RDF_ID:
739 case RAPTOR_FEATURE_HTML_TAG_SOUP:
740 case RAPTOR_FEATURE_MICROFORMATS:
741 case RAPTOR_FEATURE_HTML_LINK:
742 case RAPTOR_FEATURE_WWW_TIMEOUT:
743 case RAPTOR_FEATURE_RELATIVE_URIS:
744 case RAPTOR_FEATURE_START_URI:
745 case RAPTOR_FEATURE_WRITER_AUTO_INDENT:
746 case RAPTOR_FEATURE_WRITER_AUTO_EMPTY:
747 case RAPTOR_FEATURE_WRITER_INDENT_WIDTH:
748 case RAPTOR_FEATURE_WRITER_XML_VERSION:
749 case RAPTOR_FEATURE_WRITER_XML_DECLARATION:
750
751 /* DOT serializer features */
752 case RAPTOR_FEATURE_RESOURCE_BORDER:
753 case RAPTOR_FEATURE_LITERAL_BORDER:
754 case RAPTOR_FEATURE_BNODE_BORDER:
755 case RAPTOR_FEATURE_RESOURCE_FILL:
756 case RAPTOR_FEATURE_LITERAL_FILL:
757 case RAPTOR_FEATURE_BNODE_FILL:
758
759 /* JSON serializer features */
760 case RAPTOR_FEATURE_JSON_CALLBACK:
761 case RAPTOR_FEATURE_JSON_EXTRA_DATA:
762 case RAPTOR_FEATURE_RSS_TRIPLES:
763 case RAPTOR_FEATURE_ATOM_ENTRY_URI:
764 case RAPTOR_FEATURE_PREFIX_ELEMENTS:
765
766 /* Turtle serializer feature */
767 case RAPTOR_FEATURE_WRITE_BASE_URI:
768
769 /* WWW feature */
770 case RAPTOR_FEATURE_WWW_HTTP_CACHE_CONTROL:
771 case RAPTOR_FEATURE_WWW_HTTP_USER_AGENT:
772
773 default:
774 return -1;
775 break;
776 }
777
778 return 0;
779 }
780
781
782 void
raptor_sax2_update_document_locator(raptor_sax2 * sax2,raptor_locator * locator)783 raptor_sax2_update_document_locator(raptor_sax2* sax2,
784 raptor_locator* locator)
785 {
786 #ifdef RAPTOR_XML_EXPAT
787 raptor_expat_update_document_locator(sax2, locator);
788 #endif
789 #ifdef RAPTOR_XML_LIBXML
790 raptor_libxml_update_document_locator(sax2, locator);
791 #endif
792 }
793
794
795 /* start of an element */
796 void
raptor_sax2_start_element(void * user_data,const unsigned char * name,const unsigned char ** atts)797 raptor_sax2_start_element(void* user_data, const unsigned char *name,
798 const unsigned char **atts)
799 {
800 raptor_sax2* sax2=(raptor_sax2*)user_data;
801 raptor_qname* el_name;
802 unsigned char **xml_atts_copy=NULL;
803 size_t xml_atts_size=0;
804 int all_atts_count=0;
805 int ns_attributes_count=0;
806 raptor_qname** named_attrs=NULL;
807 raptor_xml_element* xml_element=NULL;
808 unsigned char *xml_language=NULL;
809 raptor_uri *xml_base=NULL;
810
811 if(sax2->failed || !sax2->enabled)
812 return;
813
814 #ifdef RAPTOR_XML_EXPAT
815 #ifdef EXPAT_UTF8_BOM_CRASH
816 sax2->tokens_count++;
817 #endif
818 #endif
819
820 #ifdef RAPTOR_XML_LIBXML
821 if(atts) {
822 int i;
823
824 /* Do XML attribute value normalization */
825 for (i = 0; atts[i]; i+=2) {
826 unsigned char *value=(unsigned char*)atts[i+1];
827 unsigned char *src = value;
828 unsigned char *dst = xmlStrdup(value);
829
830 if(!dst) {
831 raptor_log_error_to_handlers(sax2->world,
832 sax2->error_handlers,
833 RAPTOR_LOG_LEVEL_FATAL,
834 sax2->locator, "Out of memory");
835 return;
836 }
837
838 atts[i+1]=dst;
839
840 while (*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09)
841 src++;
842 while (*src) {
843 if (*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09) {
844 while (*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09)
845 src++;
846 if (*src)
847 *dst++ = 0x20;
848 } else {
849 *dst++ = *src++;
850 }
851 }
852 *dst = '\0';
853 xmlFree(value);
854 }
855 }
856 #endif
857
858 raptor_sax2_inc_depth(sax2);
859
860 if(atts) {
861 int i;
862
863 /* Save passed in XML attributes pointers so we can
864 * NULL the pointers when they get handled below (various atts[i]=NULL)
865 */
866 for (i = 0; atts[i]; i++) ;
867 xml_atts_size=sizeof(unsigned char*) * i;
868 if(xml_atts_size) {
869 xml_atts_copy=(unsigned char**)RAPTOR_MALLOC(cstringpointer,xml_atts_size);
870 if(!xml_atts_copy)
871 goto fail;
872 memcpy(xml_atts_copy, atts, xml_atts_size);
873 }
874
875 /* XML attributes processing:
876 * xmlns* - XML namespaces (Namespaces in XML REC)
877 * Deleted and used to synthesise namespaces declarations
878 * xml:lang - XML language (XML REC)
879 * Deleted and optionally normalised to lowercase
880 * xml:base - XML Base (XML Base REC)
881 * Deleted and used to set the in-scope base URI for this XML element
882 */
883 for (i = 0; atts[i]; i+= 2) {
884 all_atts_count++;
885
886 if(strncmp((char*)atts[i], "xml", 3)) {
887 /* count and skip non xml* attributes */
888 ns_attributes_count++;
889 continue;
890 }
891
892 /* synthesise the XML namespace events */
893 if(!memcmp((const char*)atts[i], "xmlns", 5)) {
894 const unsigned char *prefix=atts[i][5] ? &atts[i][6] : NULL;
895 const unsigned char *namespace_name=atts[i+1];
896
897 raptor_namespace* nspace;
898 nspace=raptor_new_namespace(&sax2->namespaces,
899 prefix, namespace_name,
900 raptor_sax2_get_depth(sax2));
901
902 if(nspace) {
903 raptor_namespaces_start_namespace(&sax2->namespaces, nspace);
904
905 if(sax2->namespace_handler)
906 (*sax2->namespace_handler)(sax2->user_data, nspace);
907 }
908 } else if(!strcmp((char*)atts[i], "xml:lang")) {
909 xml_language=(unsigned char*)RAPTOR_MALLOC(cstring, strlen((char*)atts[i+1])+1);
910 if(!xml_language) {
911 raptor_log_error_to_handlers(sax2->world,
912 sax2->error_handlers,
913 RAPTOR_LOG_LEVEL_FATAL,
914 sax2->locator, "Out of memory");
915 goto fail;
916 }
917
918 /* optionally normalize language to lowercase */
919 if(sax2->feature_normalize_language) {
920 unsigned char *from=(unsigned char*)atts[i+1];
921 unsigned char *to=xml_language;
922
923 while(*from) {
924 if(isupper(*from))
925 *to++ =tolower(*from++);
926 else
927 *to++ =*from++;
928 }
929 *to='\0';
930 } else
931 strcpy((char*)xml_language, (char*)atts[i+1]);
932 } else if(!strcmp((char*)atts[i], "xml:base")) {
933 raptor_uri* base_uri;
934 raptor_uri* xuri;
935 base_uri=raptor_sax2_inscope_base_uri(sax2);
936 xuri=raptor_new_uri_relative_to_base_v2(sax2->world, base_uri, atts[i+1]);
937 xml_base=raptor_new_uri_for_xmlbase_v2(sax2->world, xuri);
938 raptor_free_uri_v2(sax2->world, xuri);
939 }
940
941 /* delete all xml attributes whether processed above or not */
942 atts[i]=NULL;
943 }
944 }
945
946
947 /* Create new element structure */
948 el_name=raptor_new_qname(&sax2->namespaces, name, NULL,
949 (raptor_simple_message_handler)raptor_sax2_simple_error, sax2);
950 if(!el_name)
951 goto fail;
952
953 xml_element=raptor_new_xml_element(el_name, xml_language, xml_base);
954 if(!xml_element) {
955 raptor_free_qname(el_name);
956 goto fail;
957 }
958 /* xml_language,xml_base now owned by xml_element */
959 xml_language = NULL;
960 xml_base = NULL;
961
962 /* Turn string attributes into namespaced-attributes */
963 if(ns_attributes_count) {
964 int i;
965 int offset = 0;
966
967 /* Allocate new array to hold namespaced-attributes */
968 named_attrs=(raptor_qname**)RAPTOR_CALLOC(raptor_qname_array,
969 ns_attributes_count,
970 sizeof(raptor_qname*));
971 if(!named_attrs) {
972 raptor_log_error_to_handlers(sax2->world,
973 sax2->error_handlers,
974 RAPTOR_LOG_LEVEL_FATAL,
975 sax2->locator, "Out of memory");
976 goto fail;
977 }
978
979 for (i = 0; i < all_atts_count; i++) {
980 raptor_qname* attr;
981
982 /* Skip previously processed attributes */
983 if(!atts[i<<1])
984 continue;
985
986 /* namespace-name[i] stored in named_attrs[i] */
987 attr=raptor_new_qname(&sax2->namespaces,
988 atts[i<<1], atts[(i<<1)+1],
989 (raptor_simple_message_handler)raptor_sax2_simple_error, sax2);
990 if(!attr) { /* failed - tidy up and return */
991 int j;
992
993 for (j=0; j < i; j++)
994 RAPTOR_FREE(raptor_qname, named_attrs[j]);
995 RAPTOR_FREE(raptor_qname_array, named_attrs);
996 goto fail;
997 }
998
999 named_attrs[offset++]=attr;
1000 }
1001 } /* end if ns_attributes_count */
1002
1003
1004 if(named_attrs)
1005 raptor_xml_element_set_attributes(xml_element,
1006 named_attrs, ns_attributes_count);
1007
1008 raptor_xml_element_push(sax2, xml_element);
1009
1010 if(sax2->start_element_handler)
1011 sax2->start_element_handler(sax2->user_data, xml_element);
1012
1013 if(xml_atts_copy) {
1014 /* Restore passed in XML attributes, free the copy */
1015 memcpy((void*)atts, xml_atts_copy, xml_atts_size);
1016 RAPTOR_FREE(cstringpointer, xml_atts_copy);
1017 }
1018
1019 return;
1020
1021 fail:
1022 if(xml_atts_copy)
1023 RAPTOR_FREE(cstringpointer, xml_atts_copy);
1024 if(xml_base)
1025 raptor_free_uri_v2(sax2->world, xml_base);
1026 if(xml_language)
1027 RAPTOR_FREE(cstring, xml_language);
1028 if(xml_element)
1029 raptor_free_xml_element(xml_element);
1030 }
1031
1032
1033 /* end of an element */
1034 void
raptor_sax2_end_element(void * user_data,const unsigned char * name)1035 raptor_sax2_end_element(void* user_data, const unsigned char *name)
1036 {
1037 raptor_sax2* sax2=(raptor_sax2*)user_data;
1038 raptor_xml_element* xml_element;
1039
1040 if(sax2->failed || !sax2->enabled)
1041 return;
1042
1043 #ifdef RAPTOR_XML_EXPAT
1044 #ifdef EXPAT_UTF8_BOM_CRASH
1045 sax2->tokens_count++;
1046 #endif
1047 #endif
1048
1049 xml_element=sax2->current_element;
1050 if(xml_element) {
1051 #ifdef RAPTOR_DEBUG_VERBOSE
1052 fprintf(stderr, "\nraptor_rdfxml_end_element_handler: End ns-element: ");
1053 raptor_qname_print(stderr, xml_element->name);
1054 fputc('\n', stderr);
1055 #endif
1056
1057 if(sax2->end_element_handler)
1058 sax2->end_element_handler(sax2->user_data, xml_element);
1059 }
1060
1061 raptor_namespaces_end_for_depth(&sax2->namespaces,
1062 raptor_sax2_get_depth(sax2));
1063 xml_element=raptor_xml_element_pop(sax2);
1064 if(xml_element)
1065 raptor_free_xml_element(xml_element);
1066
1067 raptor_sax2_dec_depth(sax2);
1068 }
1069
1070
1071
1072
1073 /* characters */
1074 void
raptor_sax2_characters(void * user_data,const unsigned char * s,int len)1075 raptor_sax2_characters(void* user_data, const unsigned char *s, int len)
1076 {
1077 raptor_sax2* sax2=(raptor_sax2*)user_data;
1078
1079 if(sax2->failed || !sax2->enabled)
1080 return;
1081
1082 if(sax2->characters_handler)
1083 sax2->characters_handler(sax2->user_data, sax2->current_element, s, len);
1084 }
1085
1086
1087 /* like <![CDATA[...]> */
1088 void
raptor_sax2_cdata(void * user_data,const unsigned char * s,int len)1089 raptor_sax2_cdata(void* user_data, const unsigned char *s, int len)
1090 {
1091 raptor_sax2* sax2=(raptor_sax2*)user_data;
1092 #ifdef RAPTOR_XML_EXPAT
1093 #ifdef EXPAT_UTF8_BOM_CRASH
1094 sax2->tokens_count++;
1095 #endif
1096 #endif
1097
1098 if(sax2->failed || !sax2->enabled)
1099 return;
1100
1101 if(sax2->cdata_handler)
1102 sax2->cdata_handler(sax2->user_data, sax2->current_element, s, len);
1103 }
1104
1105
1106 /* comment */
1107 void
raptor_sax2_comment(void * user_data,const unsigned char * s)1108 raptor_sax2_comment(void* user_data, const unsigned char *s)
1109 {
1110 raptor_sax2* sax2=(raptor_sax2*)user_data;
1111
1112 if(sax2->failed || !sax2->enabled)
1113 return;
1114
1115 if(sax2->comment_handler)
1116 sax2->comment_handler(sax2->user_data, sax2->current_element, s);
1117 }
1118
1119
1120 /* unparsed (NDATA) entity */
1121 void
raptor_sax2_unparsed_entity_decl(void * user_data,const unsigned char * entityName,const unsigned char * base,const unsigned char * systemId,const unsigned char * publicId,const unsigned char * notationName)1122 raptor_sax2_unparsed_entity_decl(void* user_data,
1123 const unsigned char* entityName,
1124 const unsigned char* base,
1125 const unsigned char* systemId,
1126 const unsigned char* publicId,
1127 const unsigned char* notationName)
1128 {
1129 raptor_sax2* sax2=(raptor_sax2*)user_data;
1130
1131 if(sax2->failed || !sax2->enabled)
1132 return;
1133
1134 if(sax2->unparsed_entity_decl_handler)
1135 sax2->unparsed_entity_decl_handler(sax2->user_data,
1136 entityName, base, systemId,
1137 publicId, notationName);
1138 }
1139
1140
1141 /* external entity reference */
1142 int
raptor_sax2_external_entity_ref(void * user_data,const unsigned char * context,const unsigned char * base,const unsigned char * systemId,const unsigned char * publicId)1143 raptor_sax2_external_entity_ref(void* user_data,
1144 const unsigned char* context,
1145 const unsigned char* base,
1146 const unsigned char* systemId,
1147 const unsigned char* publicId)
1148 {
1149 raptor_sax2* sax2=(raptor_sax2*)user_data;
1150
1151 if(sax2->failed || !sax2->enabled)
1152 return 0;
1153
1154 if(sax2->external_entity_ref_handler)
1155 return sax2->external_entity_ref_handler(sax2->user_data,
1156 context, base, systemId, publicId);
1157
1158 raptor_sax2_simple_error((void*)sax2,
1159 "Failed to handle external entity reference with base %s systemId %s publicId %s",
1160 (base ? (const char*)base : "(None)"),
1161 systemId,
1162 (publicId ? (const char*)publicId: "(None)"));
1163
1164 /* Failed to handle external entity reference */
1165 return 0;
1166 }
1167