1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * raptor_rdfxml.c - Raptor RDF/XML Parser
4 *
5 * Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/
6 * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
7 *
8 * This package is Free Software and part of Redland http://librdf.org/
9 *
10 * It is licensed under the following three licenses as alternatives:
11 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12 * 2. GNU General Public License (GPL) V2 or any newer version
13 * 3. Apache License, V2.0 or any newer version
14 *
15 * You may not use this file except in compliance with at least one of
16 * the above three licenses.
17 *
18 * See LICENSE.html or LICENSE.txt at the top of this package for the
19 * complete terms and further detail along with the license texts for
20 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21 *
22 *
23 */
24
25
26 #ifdef HAVE_CONFIG_H
27 #include <raptor_config.h>
28 #endif
29
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <stdarg.h>
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_STDLIB_H
38 #include <stdlib.h>
39 #endif
40
41 /* Raptor includes */
42 #include "raptor2.h"
43 #include "raptor_internal.h"
44
45
46 /* Define these for far too much output */
47 #undef RAPTOR_DEBUG_VERBOSE
48 #undef RAPTOR_DEBUG_CDATA
49
50
51 /* Raptor structures */
52
53 typedef enum {
54 /* Catch uninitialised state */
55 RAPTOR_STATE_INVALID = 0,
56
57 /* Skipping current tree of elements - used to recover finding
58 * illegal content, when parsling permissively.
59 */
60 RAPTOR_STATE_SKIPPING,
61
62 /* Not in RDF grammar yet - searching for a start element.
63 *
64 * This can be <rdf:RDF> (goto NODE_ELEMENT_LIST) but since it is optional,
65 * the start element can also be one of
66 * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementURIs
67 *
68 * If RDF content is assumed, go straight to OBJ
69 */
70 RAPTOR_STATE_UNKNOWN,
71
72 /* A list of node elements
73 * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList
74 */
75 RAPTOR_STATE_NODE_ELEMENT_LIST,
76
77 /* Found an <rdf:Description> */
78 RAPTOR_STATE_DESCRIPTION,
79
80 /* Found a property element
81 * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
82 */
83 RAPTOR_STATE_PROPERTYELT,
84
85 /* A property element that is an ordinal - rdf:li, rdf:_n
86 */
87 RAPTOR_STATE_MEMBER_PROPERTYELT,
88
89 /* Found a node element
90 * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement
91 */
92 RAPTOR_STATE_NODE_ELEMENT,
93
94 /* A property element with rdf:parseType="Literal"
95 * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeLiteralPropertyElt
96 */
97 RAPTOR_STATE_PARSETYPE_LITERAL,
98
99 /* A property element with rdf:parseType="Resource"
100 * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt
101 */
102 RAPTOR_STATE_PARSETYPE_RESOURCE,
103
104 /* A property element with rdf:parseType="Collection"
105 * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt
106 *
107 * (This also handles daml:Collection)
108 */
109 RAPTOR_STATE_PARSETYPE_COLLECTION,
110
111 /* A property element with a rdf:parseType attribute and a value
112 * not "Literal" or "Resource"
113 * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt
114 */
115 RAPTOR_STATE_PARSETYPE_OTHER,
116
117 RAPTOR_STATE_PARSETYPE_LAST = RAPTOR_STATE_PARSETYPE_OTHER
118
119
120 } raptor_state;
121
122
123 static const char* const raptor_state_names[RAPTOR_STATE_PARSETYPE_LAST+2] = {
124 "INVALID",
125 "SKIPPING",
126 "UNKNOWN",
127 "nodeElementList",
128 "propertyElt",
129 "Description",
130 "propertyElt",
131 "memberPropertyElt",
132 "nodeElement",
133 "parseTypeLiteral",
134 "parseTypeResource",
135 "parseTypeCollection",
136 "parseTypeOther"
137 };
138
139
raptor_rdfxml_state_as_string(raptor_state state)140 static const char * raptor_rdfxml_state_as_string(raptor_state state)
141 {
142 if(state < 1 || state > RAPTOR_STATE_PARSETYPE_LAST)
143 state = (raptor_state)0;
144 return raptor_state_names[(int)state];
145 }
146
147
148 /*
149 * raptor_rdfxml_check_propertyElement_name:
150 * @name: rdf namespace term
151 *
152 * Check if an rdf namespace name is allowed to be used as a Node Element.
153 *
154 * Return value: < 0 if unknown rdf namespace term, 0 if known and not allowed, > 0 if known and allowed
155 */
156 static int
raptor_rdfxml_check_nodeElement_name(const char * name)157 raptor_rdfxml_check_nodeElement_name(const char *name)
158 {
159 int i;
160
161 if(*name == '_')
162 return 1;
163
164 for(i = 0; raptor_rdf_ns_terms_info[i].name; i++)
165 if(!strcmp(raptor_rdf_ns_terms_info[i].name, name))
166 return raptor_rdf_ns_terms_info[i].allowed_as_nodeElement;
167
168 return -1;
169 }
170
171
172 /*
173 * raptor_rdfxml_check_propertyElement_name:
174 * @name: rdf namespace term
175 *
176 * Check if an rdf namespace name is allowed to be used as a Property Element.
177 *
178 * Return value: < 0 if unknown rdf namespace term, 0 if known and not allowed, > 0 if known and allowed
179 */
180 static int
raptor_rdfxml_check_propertyElement_name(const char * name)181 raptor_rdfxml_check_propertyElement_name(const char *name)
182 {
183 int i;
184
185 if(*name == '_')
186 return 1;
187
188 for(i = 0; raptor_rdf_ns_terms_info[i].name; i++)
189 if(!strcmp(raptor_rdf_ns_terms_info[i].name, (const char*)name))
190 return raptor_rdf_ns_terms_info[i].allowed_as_propertyElement;
191
192 return -1;
193 }
194
195
196 static int
raptor_rdfxml_check_propertyAttribute_name(const char * name)197 raptor_rdfxml_check_propertyAttribute_name(const char *name)
198 {
199 int i;
200
201 if(*name == '_')
202 return 1;
203
204 for(i = 0; raptor_rdf_ns_terms_info[i].name; i++)
205 if(!strcmp(raptor_rdf_ns_terms_info[i].name, (const char*)name))
206 return raptor_rdf_ns_terms_info[i].allowed_as_propertyAttribute;
207
208 return -1;
209 }
210
211
212 typedef enum {
213 /* undetermined yet - whitespace is stored */
214 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN,
215
216 /* literal content - no elements, cdata allowed, whitespace significant
217 * <propElement> blah </propElement>
218 */
219 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL,
220
221 /* parseType literal content (WF XML) - all content preserved
222 * <propElement rdf:parseType="Literal"><em>blah</em></propElement>
223 */
224 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL,
225
226 /* top-level nodes - 0+ elements expected, no cdata, whitespace ignored,
227 * any non-whitespace cdata is error
228 * only used for <rdf:RDF> or implict <rdf:RDF>
229 */
230 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES,
231
232 /* properties - 0+ elements expected, no cdata, whitespace ignored,
233 * any non-whitespace cdata is error
234 * <nodeElement><prop1>blah</prop1> <prop2>blah</prop2> </nodeElement>
235 */
236 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES,
237
238 /* property content - all content preserved
239 * any content type changes when first non-whitespace found
240 * <propElement>...
241 */
242 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT,
243
244 /* resource URI given - no element, no cdata, whitespace ignored,
245 * any non-whitespace cdata is error
246 * <propElement rdf:resource="uri"/>
247 * <propElement rdf:resource="uri"></propElement>
248 */
249 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE,
250
251 /* skipping content - all content is preserved
252 * Used when skipping content for unknown parseType-s,
253 * error recovery, some other reason
254 */
255 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED,
256
257 /* parseType Collection - all content preserved
258 * Parsing of this determined by RDF/XML (Revised) closed collection rules
259 * <propElement rdf:parseType="Collection">...</propElement>
260 */
261 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION,
262
263 /* Like above but handles "daml:collection" */
264 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION,
265
266 /* dummy for use in strings below */
267 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST
268
269 } raptor_rdfxml_element_content_type;
270
271
272 static const struct {
273 const char * name;
274 int whitespace_significant;
275 /* non-blank cdata */
276 int cdata_allowed;
277 /* XML element content */
278 int element_allowed;
279 /* Do RDF-specific processing? (property attributes, rdf: attributes, ...) */
280 int rdf_processing;
281 } rdf_content_type_info[RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST]={
282 {"Unknown", 1, 1, 1, 0 },
283 {"Literal", 1, 1, 0, 0 },
284 {"XML Literal", 1, 1, 1, 0 },
285 {"Nodes", 0, 0, 1, 1 },
286 {"Properties", 0, 1, 1, 1 },
287 {"Property Content",1, 1, 1, 1 },
288 {"Resource", 0, 0, 0, 0 },
289 {"Preserved", 1, 1, 1, 0 },
290 {"Collection", 1, 1, 1, 1 },
291 {"DAML Collection", 1, 1, 1, 1 },
292 };
293
294
295
296 static const char *
raptor_rdfxml_element_content_type_as_string(raptor_rdfxml_element_content_type type)297 raptor_rdfxml_element_content_type_as_string(raptor_rdfxml_element_content_type type)
298 {
299 if(type >= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST)
300 return "INVALID";
301
302 return rdf_content_type_info[type].name;
303 }
304
305
306
307
308
309 /*
310 * Raptor Element/attributes on stack
311 */
312 struct raptor_rdfxml_element_s {
313 raptor_world* world;
314
315 raptor_xml_element *xml_element;
316
317 /* NULL at bottom of stack */
318 struct raptor_rdfxml_element_s *parent;
319
320 /* attributes declared in M&S */
321 const unsigned char * rdf_attr[RDF_NS_LAST + 1];
322 /* how many of above seen */
323 int rdf_attr_count;
324
325 /* state that this production matches */
326 raptor_state state;
327
328 /* how to handle the content inside this XML element */
329 raptor_rdfxml_element_content_type content_type;
330
331
332 /* starting state for children of this element */
333 raptor_state child_state;
334
335 /* starting content type for children of this element */
336 raptor_rdfxml_element_content_type child_content_type;
337
338
339 /* Reified statement identifier */
340 raptor_term* reified;
341
342 unsigned const char* reified_id;
343
344 /* Bag identifier */
345 raptor_term* bag;
346 int last_bag_ordinal; /* starts at 0, so first predicate is rdf:_1 */
347
348 /* Subject identifier (URI/anon ID), type, source
349 *
350 * When the XML element represents a node, this is the identifier
351 */
352 raptor_term* subject;
353
354 /* Predicate URI
355 *
356 * When the XML element represents a node or predicate,
357 * this is the identifier of the predicate
358 */
359 raptor_term* predicate;
360
361 /* Object identifier (URI/anon ID), type, source
362 *
363 * When this XML element generates a statement that needs an object,
364 * possibly from a child element, this is the identifier of the object
365 */
366 raptor_term* object;
367
368 /* URI of datatype of literal */
369 raptor_uri *object_literal_datatype;
370
371 /* last ordinal used, so initialising to 0 works, emitting rdf:_1 first */
372 int last_ordinal;
373
374 /* If this element's parseType is a Collection
375 * this identifies the anon node of current tail of the collection(list).
376 */
377 const unsigned char *tail_id;
378
379 /* RDF/XML specific checks */
380
381 /* all cdata so far is whitespace */
382 unsigned int content_cdata_all_whitespace;
383 };
384
385 typedef struct raptor_rdfxml_element_s raptor_rdfxml_element;
386
387
388 #define RAPTOR_RDFXML_N_CONCEPTS 5
389
390 /*
391 * Raptor parser object
392 */
393 struct raptor_rdfxml_parser_s {
394 raptor_sax2 *sax2;
395
396 /* stack of elements - elements add after current_element */
397 raptor_rdfxml_element *root_element;
398 raptor_rdfxml_element *current_element;
399
400 raptor_uri* concepts[RAPTOR_RDFXML_N_CONCEPTS];
401
402 /* set of seen rdf:ID / rdf:bagID values (with in-scope base URI) */
403 raptor_id_set* id_set;
404
405 void *xml_content;
406 size_t xml_content_length;
407 raptor_iostream* iostream;
408
409 /* writer for building parseType="Literal" content */
410 raptor_xml_writer* xml_writer;
411 };
412
413
414
415
416 /* static variables */
417
418 #define RAPTOR_DAML_NS_URI(rdf_xml_parser) rdf_xml_parser->concepts[0]
419
420 #define RAPTOR_DAML_List_URI(rdf_xml_parser) rdf_xml_parser->concepts[1]
421 #define RAPTOR_DAML_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[2]
422 #define RAPTOR_DAML_rest_URI(rdf_xml_parser) rdf_xml_parser->concepts[3]
423 #define RAPTOR_DAML_nil_URI(rdf_xml_parser) rdf_xml_parser->concepts[4]
424
425 /* RAPTOR_RDFXML_N_CONCEPTS defines size of array */
426
427
428 /* prototypes for element functions */
429 static raptor_rdfxml_element* raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_parser);
430 static void raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_parser, raptor_rdfxml_element* element);
431
432 static int raptor_rdfxml_record_ID(raptor_parser *rdf_parser, raptor_rdfxml_element *element, const unsigned char *id);
433
434 /* prototypes for grammar functions */
435 static void raptor_rdfxml_start_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element);
436 static void raptor_rdfxml_end_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element);
437 static void raptor_rdfxml_cdata_grammar(raptor_parser *parser, const unsigned char *s, int len, int is_cdata);
438
439
440 /* prototype for statement related functions */
441 static void raptor_rdfxml_generate_statement(raptor_parser *rdf_parser, raptor_term *subject, raptor_uri *predicate_uri, raptor_term *object, raptor_term *reified, raptor_rdfxml_element *bag_element);
442
443
444
445 /* Prototypes for parsing data functions */
446 static int raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name);
447 static void raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser);
448 static int raptor_rdfxml_parse_start(raptor_parser* rdf_parser);
449 static int raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer, size_t len, int is_end);
450 static void raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser);
451
452 static raptor_uri* raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser);
453
454
455 static raptor_rdfxml_element*
raptor_rdfxml_element_pop(raptor_rdfxml_parser * rdf_xml_parser)456 raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_xml_parser)
457 {
458 raptor_rdfxml_element *element = rdf_xml_parser->current_element;
459
460 if(!element)
461 return NULL;
462
463 rdf_xml_parser->current_element = element->parent;
464 if(rdf_xml_parser->root_element == element) /* just deleted root */
465 rdf_xml_parser->root_element = NULL;
466
467 return element;
468 }
469
470
471 static void
raptor_rdfxml_element_push(raptor_rdfxml_parser * rdf_xml_parser,raptor_rdfxml_element * element)472 raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_xml_parser, raptor_rdfxml_element* element)
473 {
474 element->parent = rdf_xml_parser->current_element;
475 rdf_xml_parser->current_element = element;
476 if(!rdf_xml_parser->root_element)
477 rdf_xml_parser->root_element = element;
478 }
479
480
481 static void
raptor_free_rdfxml_element(raptor_rdfxml_element * element)482 raptor_free_rdfxml_element(raptor_rdfxml_element *element)
483 {
484 int i;
485
486 /* Free special RDF M&S attributes */
487 for(i = 0; i <= RDF_NS_LAST; i++)
488 if(element->rdf_attr[i])
489 RAPTOR_FREE(char*, element->rdf_attr[i]);
490
491 if(element->subject)
492 raptor_free_term(element->subject);
493 if(element->predicate)
494 raptor_free_term(element->predicate);
495 if(element->object)
496 raptor_free_term(element->object);
497 if(element->bag)
498 raptor_free_term(element->bag);
499 if(element->reified)
500 raptor_free_term(element->reified);
501
502 if(element->tail_id)
503 RAPTOR_FREE(char*, (char*)element->tail_id);
504 if(element->object_literal_datatype)
505 raptor_free_uri(element->object_literal_datatype);
506
507 if(element->reified_id)
508 RAPTOR_FREE(char*, (char*)element->reified_id);
509
510 RAPTOR_FREE(raptor_rdfxml_element, element);
511 }
512
513
514 static void
raptor_rdfxml_sax2_new_namespace_handler(void * user_data,raptor_namespace * nspace)515 raptor_rdfxml_sax2_new_namespace_handler(void *user_data,
516 raptor_namespace* nspace)
517 {
518 raptor_parser* rdf_parser;
519 const unsigned char* namespace_name;
520 size_t namespace_name_len;
521 raptor_uri* uri = raptor_namespace_get_uri(nspace);
522
523 rdf_parser = (raptor_parser*)user_data;
524 raptor_parser_start_namespace(rdf_parser, nspace);
525
526 if(!uri)
527 return;
528
529 namespace_name = raptor_uri_as_counted_string(uri, &namespace_name_len);
530
531 if(namespace_name_len == raptor_rdf_namespace_uri_len-1 &&
532 !strncmp((const char*)namespace_name,
533 (const char*)raptor_rdf_namespace_uri,
534 namespace_name_len)) {
535 const unsigned char *prefix = raptor_namespace_get_prefix(nspace);
536 raptor_parser_warning(rdf_parser,
537 "Declaring a namespace with prefix %s to URI %s - one letter short of the RDF namespace URI and probably a mistake.",
538 prefix, namespace_name);
539 }
540
541 if(namespace_name_len > raptor_rdf_namespace_uri_len &&
542 !strncmp((const char*)namespace_name,
543 (const char*)raptor_rdf_namespace_uri,
544 raptor_rdf_namespace_uri_len)) {
545 raptor_parser_error(rdf_parser,
546 "Declaring a namespace URI %s to which the RDF namespace URI is a prefix is forbidden.",
547 namespace_name);
548 }
549 }
550
551
552
553 static void
raptor_rdfxml_start_element_handler(void * user_data,raptor_xml_element * xml_element)554 raptor_rdfxml_start_element_handler(void *user_data,
555 raptor_xml_element* xml_element)
556 {
557 raptor_parser* rdf_parser;
558 raptor_rdfxml_parser* rdf_xml_parser;
559 raptor_rdfxml_element* element;
560 int ns_attributes_count = 0;
561 raptor_qname** named_attrs = NULL;
562 int i;
563 int count_bumped = 0;
564
565 rdf_parser = (raptor_parser*)user_data;
566 rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
567
568 if(rdf_parser->failed)
569 return;
570
571 raptor_rdfxml_update_document_locator(rdf_parser);
572
573 /* Create new element structure */
574 element = RAPTOR_CALLOC(raptor_rdfxml_element*, 1, sizeof(*element));
575 if(!element) {
576 raptor_parser_fatal_error(rdf_parser, "Out of memory");
577 rdf_parser->failed = 1;
578 return;
579 }
580 element->world = rdf_parser->world;
581 element->xml_element = xml_element;
582
583 raptor_rdfxml_element_push(rdf_xml_parser, element);
584
585 named_attrs = raptor_xml_element_get_attributes(xml_element);
586 ns_attributes_count = raptor_xml_element_get_attributes_count(xml_element);
587
588 /* RDF-specific processing of attributes */
589 if(ns_attributes_count) {
590 raptor_qname** new_named_attrs;
591 int offset = 0;
592 raptor_rdfxml_element* parent_element;
593
594 parent_element = element->parent;
595
596 /* Allocate new array to move namespaced-attributes to if
597 * rdf processing is performed
598 */
599 new_named_attrs = RAPTOR_CALLOC(raptor_qname**, ns_attributes_count,
600 sizeof(raptor_qname*));
601 if(!new_named_attrs) {
602 raptor_parser_fatal_error(rdf_parser, "Out of memory");
603 rdf_parser->failed = 1;
604 return;
605 }
606
607 for(i = 0; i < ns_attributes_count; i++) {
608 raptor_qname* attr = named_attrs[i];
609
610 /* If:
611 * 1 We are handling RDF content and RDF processing is allowed on
612 * this element
613 * OR
614 * 2 We are not handling RDF content and
615 * this element is at the top level (top level Desc. / typedNode)
616 * i.e. we have no parent
617 * then handle the RDF attributes
618 */
619 if((parent_element &&
620 rdf_content_type_info[parent_element->child_content_type].rdf_processing) ||
621 !parent_element) {
622
623 /* Save pointers to some RDF M&S attributes */
624
625 /* If RDF namespace-prefixed attributes */
626 if(attr->nspace && attr->nspace->is_rdf_ms) {
627 const unsigned char *attr_name = attr->local_name;
628 int j;
629
630 for(j = 0; j <= RDF_NS_LAST; j++)
631 if(!strcmp((const char*)attr_name,
632 raptor_rdf_ns_terms_info[j].name)) {
633 element->rdf_attr[j] = attr->value;
634 element->rdf_attr_count++;
635 /* Delete it if it was stored elsewhere */
636 #ifdef RAPTOR_DEBUG_VERBOSE
637 RAPTOR_DEBUG3("Found RDF namespace attribute '%s' URI %s\n",
638 (char*)attr_name, attr->value);
639 #endif
640 /* make sure value isn't deleted from qname structure */
641 attr->value = NULL;
642 raptor_free_qname(attr);
643 attr = NULL;
644 break;
645 }
646 } /* end if RDF namespaced-prefixed attributes */
647
648 if(!attr)
649 continue;
650
651 /* If non namespace-prefixed RDF attributes found on an element */
652 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_NON_NS_ATTRIBUTES) &&
653 !attr->nspace) {
654 const unsigned char *attr_name = attr->local_name;
655 int j;
656
657 for(j = 0; j <= RDF_NS_LAST; j++)
658 if(!strcmp((const char*)attr_name,
659 raptor_rdf_ns_terms_info[j].name)) {
660 element->rdf_attr[j] = attr->value;
661 element->rdf_attr_count++;
662 if(!raptor_rdf_ns_terms_info[j].allowed_unprefixed_on_attribute)
663 raptor_parser_warning(rdf_parser,
664 "Using rdf attribute '%s' without the RDF namespace has been deprecated.",
665 attr_name);
666
667 /* Delete it if it was stored elsewhere */
668 /* make sure value isn't deleted from qname structure */
669 attr->value = NULL;
670 raptor_free_qname(attr);
671 attr = NULL;
672 break;
673 }
674 } /* end if non-namespace prefixed RDF attributes */
675
676 if(!attr)
677 continue;
678
679 } /* end if leave literal XML alone */
680
681 if(attr)
682 new_named_attrs[offset++] = attr;
683 }
684
685 /* new attribute count is set from attributes that haven't been skipped */
686 ns_attributes_count = offset;
687 if(!ns_attributes_count) {
688 /* all attributes were deleted so delete the new array */
689 RAPTOR_FREE(raptor_qname_array, new_named_attrs);
690 new_named_attrs = NULL;
691 }
692
693 RAPTOR_FREE(raptor_qname_array, named_attrs);
694 named_attrs = new_named_attrs;
695 raptor_xml_element_set_attributes(xml_element,
696 named_attrs, ns_attributes_count);
697 } /* end if ns_attributes_count */
698
699
700 /* start from unknown; if we have a parent, it may set this */
701 element->state = RAPTOR_STATE_UNKNOWN;
702 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN;
703
704 if(element->parent &&
705 element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN) {
706 element->content_type = element->parent->child_content_type;
707
708 if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE &&
709 element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
710 element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
711 raptor_qname* parent_el_name;
712 parent_el_name = raptor_xml_element_get_name(element->parent->xml_element);
713 /* If parent has an rdf:resource, this element should not be here */
714 raptor_parser_error(rdf_parser,
715 "property element '%s' has multiple object node elements, skipping.",
716 parent_el_name->local_name);
717 element->state = RAPTOR_STATE_SKIPPING;
718 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
719
720 } else {
721 if(!element->parent->child_state) {
722 raptor_parser_fatal_error(rdf_parser,
723 "%s: Internal error: no parent element child_state set",
724 __FUNCTION__);
725 return;
726 }
727
728 element->state = element->parent->child_state;
729 element->parent->xml_element->content_element_seen++;
730 count_bumped++;
731
732 /* leave literal XML alone */
733 if(!rdf_content_type_info[element->content_type].cdata_allowed) {
734 if(element->parent->xml_element->content_element_seen &&
735 element->parent->xml_element->content_cdata_seen) {
736 raptor_qname* parent_el_name;
737
738 parent_el_name = raptor_xml_element_get_name(element->parent->xml_element);
739 /* Uh oh - mixed content, the parent element has cdata too */
740 raptor_parser_warning(rdf_parser, "element '%s' has mixed content.",
741 parent_el_name->local_name);
742 }
743
744 /* If there is some existing all-whitespace content cdata
745 * before this node element, delete it
746 */
747 if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES &&
748 element->parent->xml_element->content_element_seen &&
749 element->parent->content_cdata_all_whitespace &&
750 element->parent->xml_element->content_cdata_length) {
751
752 element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
753
754 raptor_free_stringbuffer(element->parent->xml_element->content_cdata_sb);
755 element->parent->xml_element->content_cdata_sb = NULL;
756 element->parent->xml_element->content_cdata_length = 0;
757 }
758
759 } /* end if leave literal XML alone */
760
761 } /* end if parent has no rdf:resource */
762
763 } /* end if element->parent */
764
765
766 #ifdef RAPTOR_DEBUG_VERBOSE
767 RAPTOR_DEBUG2("Using content type %s\n",
768 rdf_content_type_info[element->content_type].name);
769
770 fprintf(stderr, "raptor_rdfxml_start_element_handler: Start ns-element: ");
771 raptor_print_xml_element(xml_element, stderr);
772 #endif
773
774
775 /* Check for non namespaced stuff when not in a parseType literal, other */
776 if(rdf_content_type_info[element->content_type].rdf_processing) {
777 const raptor_namespace* ns;
778
779 ns = raptor_xml_element_get_name(xml_element)->nspace;
780 /* The element */
781
782 /* If has no namespace or the namespace has no name (xmlns="") */
783 if((!ns || (ns && !raptor_namespace_get_uri(ns))) && element->parent) {
784 raptor_qname* parent_el_name;
785
786 parent_el_name = raptor_xml_element_get_name(element->parent->xml_element);
787
788 raptor_parser_error(rdf_parser,
789 "Using an element '%s' without a namespace is forbidden.",
790 parent_el_name->local_name);
791 element->state = RAPTOR_STATE_SKIPPING;
792 /* Remove count above so that parent thinks this is empty */
793 if(count_bumped)
794 element->parent->xml_element->content_element_seen--;
795 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
796 }
797
798
799 /* Check for any remaining non-namespaced attributes */
800 if(named_attrs) {
801 for(i = 0; i < ns_attributes_count; i++) {
802 raptor_qname *attr = named_attrs[i];
803 /* Check if any attributes are non-namespaced */
804 if(!attr->nspace ||
805 (attr->nspace && !raptor_namespace_get_uri(attr->nspace))) {
806 raptor_parser_error(rdf_parser,
807 "Using an attribute '%s' without a namespace is forbidden.",
808 attr->local_name);
809 raptor_free_qname(attr);
810 named_attrs[i] = NULL;
811 }
812 }
813 }
814 }
815
816
817 if(element->rdf_attr[RDF_NS_aboutEach] ||
818 element->rdf_attr[RDF_NS_aboutEachPrefix]) {
819 raptor_parser_warning(rdf_parser,
820 "element '%s' has aboutEach / aboutEachPrefix, skipping.",
821 raptor_xml_element_get_name(xml_element)->local_name);
822 element->state = RAPTOR_STATE_SKIPPING;
823 /* Remove count above so that parent thinks this is empty */
824 if(count_bumped)
825 element->parent->xml_element->content_element_seen--;
826 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
827 }
828
829 /* Right, now ready to enter the grammar */
830 raptor_rdfxml_start_element_grammar(rdf_parser, element);
831
832 return;
833 }
834
835
836 static void
raptor_rdfxml_end_element_handler(void * user_data,raptor_xml_element * xml_element)837 raptor_rdfxml_end_element_handler(void *user_data,
838 raptor_xml_element* xml_element)
839 {
840 raptor_parser* rdf_parser;
841 raptor_rdfxml_parser* rdf_xml_parser;
842 raptor_rdfxml_element* element;
843
844 rdf_parser = (raptor_parser*)user_data;
845 rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
846
847 if(!rdf_parser->failed) {
848 raptor_rdfxml_update_document_locator(rdf_parser);
849
850 raptor_rdfxml_end_element_grammar(rdf_parser,
851 rdf_xml_parser->current_element);
852 }
853
854 element = raptor_rdfxml_element_pop(rdf_xml_parser);
855 if(element) {
856 if(element->parent) {
857 /* Do not change this; PROPERTYELT will turn into MEMBER if necessary
858 * See the switch case for MEMBER / PROPERTYELT where the test is done.
859 *
860 * PARSETYPE_RESOURCE should never be propogated up since it
861 * will turn the next child (node) element into a property
862 */
863 if(element->state != RAPTOR_STATE_MEMBER_PROPERTYELT &&
864 element->state != RAPTOR_STATE_PARSETYPE_RESOURCE)
865 element->parent->child_state = element->state;
866 }
867
868 raptor_free_rdfxml_element(element);
869 }
870 }
871
872
873 /* cdata (and ignorable whitespace for libxml).
874 * s 0 terminated is for libxml
875 */
876 static void
raptor_rdfxml_characters_handler(void * user_data,raptor_xml_element * xml_element,const unsigned char * s,int len)877 raptor_rdfxml_characters_handler(void *user_data,
878 raptor_xml_element* xml_element,
879 const unsigned char *s, int len)
880 {
881 raptor_parser* rdf_parser = (raptor_parser*)user_data;
882
883 raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 0);
884 }
885
886
887 /* cdata (and ignorable whitespace for libxml).
888 * s is 0 terminated for libxml2
889 */
890 static void
raptor_rdfxml_cdata_handler(void * user_data,raptor_xml_element * xml_element,const unsigned char * s,int len)891 raptor_rdfxml_cdata_handler(void *user_data, raptor_xml_element* xml_element,
892 const unsigned char *s, int len)
893 {
894 raptor_parser* rdf_parser = (raptor_parser*)user_data;
895
896 raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 1);
897 }
898
899
900 /* comment handler
901 * s is 0 terminated
902 */
903 static void
raptor_rdfxml_comment_handler(void * user_data,raptor_xml_element * xml_element,const unsigned char * s)904 raptor_rdfxml_comment_handler(void *user_data, raptor_xml_element* xml_element,
905 const unsigned char *s)
906 {
907 raptor_parser* rdf_parser = (raptor_parser*)user_data;
908 raptor_rdfxml_parser* rdf_xml_parser;
909 raptor_rdfxml_element* element;
910
911 if(rdf_parser->failed || !xml_element)
912 return;
913
914 rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
915 element = rdf_xml_parser->current_element;
916
917 if(element) {
918 if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL)
919 raptor_xml_writer_comment(rdf_xml_parser->xml_writer, s);
920 }
921
922
923 #ifdef RAPTOR_DEBUG_VERBOSE
924 RAPTOR_DEBUG2("XML Comment '%s'\n", s);
925 #endif
926 }
927
928
929 static const unsigned char* const daml_namespace_uri_string = (const unsigned char*)"http://www.daml.org/2001/03/daml+oil#";
930 static const int daml_namespace_uri_string_len = 37;
931
932
933 static int
raptor_rdfxml_parse_init(raptor_parser * rdf_parser,const char * name)934 raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name)
935 {
936 raptor_rdfxml_parser* rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
937 raptor_sax2* sax2;
938 raptor_world* world = rdf_parser->world;
939
940 /* Allocate sax2 object */
941 sax2 = raptor_new_sax2(rdf_parser->world, &rdf_parser->locator, rdf_parser);
942 rdf_xml_parser->sax2 = sax2;
943 if(!sax2)
944 return 1;
945
946 /* Initialize sax2 element handlers */
947 raptor_sax2_set_start_element_handler(sax2, raptor_rdfxml_start_element_handler);
948 raptor_sax2_set_end_element_handler(sax2, raptor_rdfxml_end_element_handler);
949 raptor_sax2_set_characters_handler(sax2, raptor_rdfxml_characters_handler);
950 raptor_sax2_set_cdata_handler(sax2, raptor_rdfxml_cdata_handler);
951 raptor_sax2_set_comment_handler(sax2, raptor_rdfxml_comment_handler);
952 raptor_sax2_set_namespace_handler(sax2, raptor_rdfxml_sax2_new_namespace_handler);
953
954 /* Allocate uris */
955 RAPTOR_DAML_NS_URI(rdf_xml_parser) = raptor_new_uri_from_counted_string(world,
956 daml_namespace_uri_string,
957 daml_namespace_uri_string_len);
958
959 RAPTOR_DAML_List_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"List");
960 RAPTOR_DAML_first_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser) ,(const unsigned char *)"first");
961 RAPTOR_DAML_rest_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"rest");
962 RAPTOR_DAML_nil_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"nil");
963
964 /* Check for uri allocation failures */
965 if(!RAPTOR_DAML_NS_URI(rdf_xml_parser) ||
966 !RAPTOR_DAML_List_URI(rdf_xml_parser) ||
967 !RAPTOR_DAML_first_URI(rdf_xml_parser) ||
968 !RAPTOR_DAML_rest_URI(rdf_xml_parser) ||
969 !RAPTOR_DAML_nil_URI(rdf_xml_parser))
970 return 1;
971
972 /* Everything succeeded */
973 return 0;
974 }
975
976
977 static int
raptor_rdfxml_parse_start(raptor_parser * rdf_parser)978 raptor_rdfxml_parse_start(raptor_parser* rdf_parser)
979 {
980 raptor_uri *uri = rdf_parser->base_uri;
981 raptor_rdfxml_parser* rdf_xml_parser;
982
983 rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
984
985 /* base URI required for RDF/XML */
986 if(!uri)
987 return 1;
988
989 /* Optionally normalize language to lowercase
990 * http://www.w3.org/TR/rdf-concepts/#dfn-language-identifier
991 */
992 raptor_sax2_set_option(rdf_xml_parser->sax2,
993 RAPTOR_OPTION_NORMALIZE_LANGUAGE, NULL,
994 RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NORMALIZE_LANGUAGE));
995
996 /* Optionally forbid internal network and file requests in the XML parser */
997 raptor_sax2_set_option(rdf_xml_parser->sax2,
998 RAPTOR_OPTION_NO_NET, NULL,
999 RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET));
1000 raptor_sax2_set_option(rdf_xml_parser->sax2,
1001 RAPTOR_OPTION_NO_FILE, NULL,
1002 RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_FILE));
1003 raptor_sax2_set_option(rdf_xml_parser->sax2,
1004 RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES, NULL,
1005 RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES));
1006 if(rdf_parser->uri_filter)
1007 raptor_sax2_set_uri_filter(rdf_xml_parser->sax2, rdf_parser->uri_filter,
1008 rdf_parser->uri_filter_user_data);
1009
1010 raptor_sax2_parse_start(rdf_xml_parser->sax2, uri);
1011
1012 /* Delete any existing id_set */
1013 if(rdf_xml_parser->id_set) {
1014 raptor_free_id_set(rdf_xml_parser->id_set);
1015 rdf_xml_parser->id_set = NULL;
1016 }
1017
1018 /* Create a new id_set if needed */
1019 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_CHECK_RDF_ID)) {
1020 rdf_xml_parser->id_set = raptor_new_id_set(rdf_parser->world);
1021 if(!rdf_xml_parser->id_set)
1022 return 1;
1023 }
1024
1025 return 0;
1026 }
1027
1028
1029 static void
raptor_rdfxml_parse_terminate(raptor_parser * rdf_parser)1030 raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser)
1031 {
1032 raptor_rdfxml_parser* rdf_xml_parser;
1033 raptor_rdfxml_element* element;
1034 int i;
1035
1036 rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
1037
1038 if(rdf_xml_parser->sax2) {
1039 raptor_free_sax2(rdf_xml_parser->sax2);
1040 rdf_xml_parser->sax2 = NULL;
1041 }
1042
1043 while( (element = raptor_rdfxml_element_pop(rdf_xml_parser)) )
1044 raptor_free_rdfxml_element(element);
1045
1046
1047 for(i = 0; i < RAPTOR_RDFXML_N_CONCEPTS; i++) {
1048 raptor_uri* concept_uri = rdf_xml_parser->concepts[i];
1049 if(concept_uri) {
1050 raptor_free_uri(concept_uri);
1051 rdf_xml_parser->concepts[i] = NULL;
1052 }
1053 }
1054
1055 if(rdf_xml_parser->id_set) {
1056 raptor_free_id_set(rdf_xml_parser->id_set);
1057 rdf_xml_parser->id_set = NULL;
1058 }
1059
1060 if (rdf_xml_parser->xml_writer) {
1061 raptor_free_xml_writer(rdf_xml_parser->xml_writer);
1062 rdf_xml_parser->xml_writer = NULL;
1063 }
1064
1065 if (rdf_xml_parser->iostream) {
1066 raptor_free_iostream(rdf_xml_parser->iostream);
1067 rdf_xml_parser->iostream = NULL;
1068 }
1069
1070 if (rdf_xml_parser->xml_content) {
1071 RAPTOR_FREE(char*, rdf_xml_parser->xml_content);
1072 rdf_xml_parser->xml_content = NULL;
1073 rdf_xml_parser->xml_content_length = 0;
1074 }
1075 }
1076
1077
1078 static int
raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)1079 raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory* factory,
1080 const unsigned char *buffer, size_t len,
1081 const unsigned char *identifier,
1082 const unsigned char *suffix,
1083 const char *mime_type)
1084 {
1085 int score = 0;
1086
1087 if(suffix) {
1088 if(!strcmp((const char*)suffix, "rdf") ||
1089 !strcmp((const char*)suffix, "rdfs") ||
1090 !strcmp((const char*)suffix, "foaf") ||
1091 !strcmp((const char*)suffix, "doap") ||
1092 !strcmp((const char*)suffix, "owl") ||
1093 !strcmp((const char*)suffix, "daml"))
1094 score = 9;
1095 if(!strcmp((const char*)suffix, "rss"))
1096 score = 3;
1097 }
1098
1099 if(identifier) {
1100 if(strstr((const char*)identifier, "rss1"))
1101 score += 5;
1102 else if(!suffix && strstr((const char*)identifier, "rss"))
1103 score += 3;
1104 else if(!suffix && strstr((const char*)identifier, "rdf"))
1105 score += 2;
1106 else if(!suffix && strstr((const char*)identifier, "RDF"))
1107 score += 2;
1108 }
1109
1110 if(mime_type) {
1111 if(strstr((const char*)mime_type, "html"))
1112 score -= 4;
1113 else if(!strcmp((const char*)mime_type, "text/rdf"))
1114 score += 7;
1115 else if(!strcmp((const char*)mime_type, "application/xml"))
1116 score += 5;
1117 }
1118
1119 if(buffer && len) {
1120 /* Check it's an XML namespace declared and not N3 or Turtle which
1121 * mention the namespace URI but not in this form.
1122 */
1123 #define HAS_RDF_XMLNS1 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1124 #define HAS_RDF_XMLNS2 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1125 #define HAS_RDF_XMLNS3 (raptor_memstr((const char*)buffer, len, "xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1126 #define HAS_RDF_XMLNS4 (raptor_memstr((const char*)buffer, len, "xmlns='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1127 #define HAS_RDF_ENTITY1 (raptor_memstr((const char*)buffer, len, "!ENTITY rdf 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'") != NULL)
1128 #define HAS_RDF_ENTITY2 (raptor_memstr((const char*)buffer, len, "!ENTITY rdf \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"") != NULL)
1129 #define HAS_RDF_ENTITY3 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"&rdf;\"") != NULL)
1130 #define HAS_RDF_ENTITY4 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='&rdf;'") != NULL)
1131 #define HAS_HTML_NS (raptor_memstr((const char*)buffer, len, "http://www.w3.org/1999/xhtml") != NULL)
1132 #define HAS_HTML_ROOT (raptor_memstr((const char*)buffer, len, "<html") != NULL)
1133
1134 if(!HAS_HTML_NS && !HAS_HTML_ROOT &&
1135 (HAS_RDF_XMLNS1 || HAS_RDF_XMLNS2 || HAS_RDF_XMLNS3 || HAS_RDF_XMLNS4 ||
1136 HAS_RDF_ENTITY1 || HAS_RDF_ENTITY2 || HAS_RDF_ENTITY3 || HAS_RDF_ENTITY4)
1137 ) {
1138 int has_rdf_RDF = (raptor_memstr((const char*)buffer, len, "<rdf:RDF") != NULL);
1139 int has_rdf_Description = (raptor_memstr((const char*)buffer, len, "rdf:Description") != NULL);
1140 int has_rdf_about = (raptor_memstr((const char*)buffer, len, "rdf:about") != NULL);
1141
1142 score += 7;
1143 if(has_rdf_RDF)
1144 score++;
1145 if(has_rdf_Description)
1146 score++;
1147 if(has_rdf_about)
1148 score++;
1149 }
1150 }
1151
1152 return score;
1153 }
1154
1155
1156
1157 static int
raptor_rdfxml_parse_chunk(raptor_parser * rdf_parser,const unsigned char * buffer,size_t len,int is_end)1158 raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser,
1159 const unsigned char *buffer,
1160 size_t len, int is_end)
1161 {
1162 raptor_rdfxml_parser* rdf_xml_parser;
1163 int rc;
1164
1165 rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
1166 if(rdf_parser->failed)
1167 return 1;
1168
1169 rc = raptor_sax2_parse_chunk(rdf_xml_parser->sax2, buffer, len, is_end);
1170
1171 if(is_end) {
1172 if(rdf_parser->emitted_default_graph) {
1173 raptor_parser_end_graph(rdf_parser, NULL, 0);
1174 rdf_parser->emitted_default_graph--;
1175 }
1176 }
1177
1178 return rc;
1179 }
1180
1181
1182 static void
raptor_rdfxml_generate_statement(raptor_parser * rdf_parser,raptor_term * subject_term,raptor_uri * predicate_uri,raptor_term * object_term,raptor_term * reified_term,raptor_rdfxml_element * bag_element)1183 raptor_rdfxml_generate_statement(raptor_parser *rdf_parser,
1184 raptor_term *subject_term,
1185 raptor_uri *predicate_uri,
1186 raptor_term *object_term,
1187 raptor_term *reified_term,
1188 raptor_rdfxml_element* bag_element)
1189 {
1190 raptor_statement *statement = &rdf_parser->statement;
1191 raptor_term* predicate_term = NULL;
1192 int free_reified_term = 0;
1193
1194 if(rdf_parser->failed)
1195 return;
1196
1197 #ifdef RAPTOR_DEBUG_VERBOSE
1198 if(!subject_term)
1199 RAPTOR_FATAL1("Statement has no subject\n");
1200
1201 if(!predicate_uri)
1202 RAPTOR_FATAL1("Statement has no predicate\n");
1203
1204 if(!object_term)
1205 RAPTOR_FATAL1("Statement has no object\n");
1206
1207 #endif
1208
1209 predicate_term = raptor_new_term_from_uri(rdf_parser->world, predicate_uri);
1210 if(!predicate_term)
1211 return;
1212
1213 statement->subject = subject_term;
1214 statement->predicate = predicate_term;
1215 statement->object = object_term;
1216
1217 #ifdef RAPTOR_DEBUG_VERBOSE
1218 fprintf(stderr, "raptor_rdfxml_generate_statement: Generating statement: ");
1219 raptor_statement_print(statement, stderr);
1220 fputc('\n', stderr);
1221 #endif
1222
1223 if(!rdf_parser->emitted_default_graph) {
1224 raptor_parser_start_graph(rdf_parser, NULL, 0);
1225 rdf_parser->emitted_default_graph++;
1226 }
1227
1228 if(!rdf_parser->statement_handler)
1229 goto generate_tidy;
1230
1231 /* Generate the statement; or is it a fact? */
1232 (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1233
1234
1235 /* the bagID mess */
1236 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID) &&
1237 bag_element && bag_element->bag) {
1238 raptor_term* bag = bag_element->bag;
1239 raptor_uri* bag_predicate_uri = NULL;
1240 raptor_term* bag_predicate_term = NULL;
1241
1242 statement->subject = bag;
1243
1244 bag_element->last_bag_ordinal++;
1245
1246 /* new URI object */
1247 bag_predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world,
1248 bag_element->last_bag_ordinal);
1249 if(!bag_predicate_uri)
1250 goto generate_tidy;
1251
1252 bag_predicate_term = raptor_new_term_from_uri(rdf_parser->world,
1253 bag_predicate_uri);
1254 raptor_free_uri(bag_predicate_uri);
1255
1256 if(!bag_predicate_term)
1257 goto generate_tidy;
1258
1259 statement->predicate = bag_predicate_term;
1260
1261 if(!reified_term || !reified_term->value.blank.string) {
1262 unsigned char *reified_id = NULL;
1263
1264 /* reified_term is NULL so generate a bag ID */
1265 reified_id = raptor_world_generate_bnodeid(rdf_parser->world);
1266 if(!reified_id)
1267 goto generate_tidy;
1268
1269 reified_term = raptor_new_term_from_blank(rdf_parser->world, reified_id);
1270 RAPTOR_FREE(char*, reified_id);
1271
1272 if(!reified_term)
1273 goto generate_tidy;
1274 free_reified_term = 1;
1275 }
1276
1277 statement->object = reified_term;
1278 (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1279
1280 if(bag_predicate_term)
1281 raptor_free_term(bag_predicate_term);
1282 }
1283
1284
1285 /* return if is there no reified ID (that is valid) */
1286 if(!reified_term || !reified_term->value.blank.string)
1287 goto generate_tidy;
1288
1289
1290 /* otherwise generate reified statements */
1291
1292 statement->subject = reified_term;
1293 statement->predicate = RAPTOR_RDF_type_term(rdf_parser->world);
1294 statement->object = RAPTOR_RDF_Statement_term(rdf_parser->world);
1295 (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1296
1297 /* statement->subject = reified_term; */
1298 statement->predicate = RAPTOR_RDF_subject_term(rdf_parser->world);
1299 statement->object = subject_term;
1300 (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1301
1302
1303 /* statement->subject = reified_term; */
1304 statement->predicate = RAPTOR_RDF_predicate_term(rdf_parser->world);
1305 statement->object = predicate_term;
1306 (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1307
1308 /* statement->subject = reified_term; */
1309 statement->predicate = RAPTOR_RDF_object_term(rdf_parser->world);
1310 statement->object = object_term;
1311 (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1312
1313
1314 generate_tidy:
1315 /* Tidy up things allocated here */
1316 if(predicate_term)
1317 raptor_free_term(predicate_term);
1318 if(free_reified_term && reified_term)
1319 raptor_free_term(reified_term);
1320 }
1321
1322
1323
1324 /**
1325 * raptor_rdfxml_element_has_property_attributes:
1326 * @element: element with the property attributes
1327 *
1328 * Return true if the element has at least one property attribute.
1329 *
1330 **/
1331 static int
raptor_rdfxml_element_has_property_attributes(raptor_rdfxml_element * element)1332 raptor_rdfxml_element_has_property_attributes(raptor_rdfxml_element *element)
1333 {
1334 int i;
1335
1336 if(element->xml_element->attribute_count > 0)
1337 return 1;
1338
1339 /* look for rdf: properties */
1340 for(i = 0; i <= RDF_NS_LAST; i++) {
1341 if(element->rdf_attr[i] &&
1342 raptor_rdf_ns_terms_info[i].type != RAPTOR_TERM_TYPE_UNKNOWN)
1343 return 1;
1344 }
1345 return 0;
1346 }
1347
1348
1349 /**
1350 * raptor_rdfxml_process_property_attributes:
1351 * @rdf_parser: Raptor parser object
1352 * @attributes_element: element with the property attributes
1353 * @resource_element: element that defines the resource URI
1354 * subject->value etc.
1355 * @property_node_identifier: Use this identifier for the resource URI
1356 * and count any ordinals for it locally
1357 *
1358 * Process the property attributes for an element for a given resource.
1359 *
1360 **/
1361 static int
raptor_rdfxml_process_property_attributes(raptor_parser * rdf_parser,raptor_rdfxml_element * attributes_element,raptor_rdfxml_element * resource_element,raptor_term * property_node_identifier)1362 raptor_rdfxml_process_property_attributes(raptor_parser *rdf_parser,
1363 raptor_rdfxml_element *attributes_element,
1364 raptor_rdfxml_element *resource_element,
1365 raptor_term *property_node_identifier)
1366 {
1367 unsigned int i;
1368 raptor_term *resource_identifier;
1369
1370 resource_identifier = property_node_identifier ? property_node_identifier : resource_element->subject;
1371
1372
1373 /* Process attributes as propAttr* = * (propName="string")*
1374 */
1375 for(i = 0; i < attributes_element->xml_element->attribute_count; i++) {
1376 raptor_qname* attr = attributes_element->xml_element->attributes[i];
1377 const unsigned char *name;
1378 const unsigned char *value;
1379 int handled = 0;
1380
1381 if(!attr)
1382 continue;
1383
1384 name = attr->local_name;
1385 value = attr->value;
1386
1387 if(!attr->nspace) {
1388 raptor_rdfxml_update_document_locator(rdf_parser);
1389 raptor_parser_error(rdf_parser,
1390 "Using property attribute '%s' without a namespace is forbidden.",
1391 name);
1392 continue;
1393 }
1394
1395
1396 if(!raptor_unicode_check_utf8_nfc_string(value, strlen((const char*)value),
1397 NULL)) {
1398 const char *message;
1399
1400 message = "Property attribute '%s' has a string not in Unicode Normal Form C: %s";
1401 raptor_rdfxml_update_document_locator(rdf_parser);
1402 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL))
1403 raptor_parser_error(rdf_parser, message, name, value);
1404 else
1405 raptor_parser_warning(rdf_parser, message, name, value);
1406 continue;
1407 }
1408
1409
1410 /* Generate the property statement using one of these properties:
1411 * 1) rdf:_n
1412 * 2) the URI from the rdf:* attribute where allowed
1413 * 3) otherwise forbidden (including rdf:li)
1414 */
1415 if(attr->nspace->is_rdf_ms) {
1416 /* is rdf: namespace */
1417
1418 if(*name == '_') {
1419 int ordinal;
1420
1421 /* recognise rdf:_ */
1422 name++;
1423 ordinal = raptor_check_ordinal(name);
1424 if(ordinal < 1) {
1425 raptor_rdfxml_update_document_locator(rdf_parser);
1426 raptor_parser_error(rdf_parser,
1427 "Illegal ordinal value %d in property attribute '%s' seen on containing element '%s'.",
1428 ordinal, attr->local_name, name);
1429 }
1430 } else {
1431 int rc;
1432
1433 raptor_rdfxml_update_document_locator(rdf_parser);
1434
1435 rc = raptor_rdfxml_check_propertyAttribute_name((const char*)name);
1436 if(!rc)
1437 raptor_parser_error(rdf_parser,
1438 "RDF term %s is forbidden as a property attribute.",
1439 name);
1440 else if(rc < 0)
1441 raptor_parser_warning(rdf_parser,
1442 "Unknown RDF namespace property attribute '%s'.",
1443 name);
1444 }
1445
1446 } /* end is RDF namespace property */
1447
1448
1449 if(!handled) {
1450 raptor_term* object_term;
1451
1452 object_term = raptor_new_term_from_literal(rdf_parser->world,
1453 (unsigned char*)value,
1454 NULL, NULL);
1455
1456 /* else not rdf: namespace or unknown in rdf: namespace so
1457 * generate a statement with a literal object
1458 */
1459 raptor_rdfxml_generate_statement(rdf_parser,
1460 resource_identifier,
1461 attr->uri,
1462 object_term,
1463 NULL, /* Property attributes are never reified*/
1464 resource_element);
1465
1466 raptor_free_term(object_term);
1467 }
1468
1469 } /* end for ... attributes */
1470
1471
1472 /* Handle rdf property attributes
1473 * (only rdf:type and rdf:value at present)
1474 */
1475 for(i = 0; i <= RDF_NS_LAST; i++) {
1476 const unsigned char *value = attributes_element->rdf_attr[i];
1477 size_t value_len;
1478 int object_is_literal;
1479 raptor_uri *property_uri;
1480 raptor_term* object_term;
1481
1482 if(!value)
1483 continue;
1484
1485 value_len = strlen((const char*)value);
1486
1487 object_is_literal = (raptor_rdf_ns_terms_info[i].type == RAPTOR_TERM_TYPE_LITERAL);
1488
1489 if(raptor_rdf_ns_terms_info[i].type == RAPTOR_TERM_TYPE_UNKNOWN) {
1490 const char *name = raptor_rdf_ns_terms_info[i].name;
1491 int rc = raptor_rdfxml_check_propertyAttribute_name(name);
1492 if(!rc) {
1493 raptor_rdfxml_update_document_locator(rdf_parser);
1494 raptor_parser_error(rdf_parser,
1495 "RDF term %s is forbidden as a property attribute.",
1496 name);
1497 continue;
1498 } else if(rc < 0)
1499 raptor_parser_warning(rdf_parser,
1500 "Unknown RDF namespace property attribute '%s'.",
1501 name);
1502 }
1503
1504 if(object_is_literal &&
1505 !raptor_unicode_check_utf8_nfc_string(value, value_len, NULL)) {
1506 const char *message;
1507 message = "Property attribute '%s' has a string not in Unicode Normal Form C: %s";
1508 raptor_rdfxml_update_document_locator(rdf_parser);
1509 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL))
1510 raptor_parser_error(rdf_parser, message,
1511 raptor_rdf_ns_terms_info[i].name, value);
1512 else
1513 raptor_parser_warning(rdf_parser, message,
1514 raptor_rdf_ns_terms_info[i].name, value);
1515 continue;
1516 }
1517
1518 property_uri = raptor_new_uri_for_rdf_concept(rdf_parser->world,
1519 (const unsigned char*)raptor_rdf_ns_terms_info[i].name);
1520
1521 if(object_is_literal) {
1522 object_term = raptor_new_term_from_literal(rdf_parser->world,
1523 (unsigned char*)value,
1524 NULL, NULL);
1525 } else {
1526 raptor_uri *base_uri;
1527 raptor_uri *object_uri;
1528 base_uri = raptor_rdfxml_inscope_base_uri(rdf_parser);
1529 object_uri = raptor_new_uri_relative_to_base(rdf_parser->world,
1530 base_uri, value);
1531 object_term = raptor_new_term_from_uri(rdf_parser->world, object_uri);
1532 raptor_free_uri(object_uri);
1533 }
1534
1535 raptor_rdfxml_generate_statement(rdf_parser,
1536 resource_identifier,
1537 property_uri,
1538 object_term,
1539 NULL, /* Property attributes are never reified*/
1540 resource_element);
1541
1542 raptor_free_term(object_term);
1543
1544 raptor_free_uri(property_uri);
1545
1546 } /* end for rdf:property values */
1547
1548 return 0;
1549 }
1550
1551
1552 static void
raptor_rdfxml_start_element_grammar(raptor_parser * rdf_parser,raptor_rdfxml_element * element)1553 raptor_rdfxml_start_element_grammar(raptor_parser *rdf_parser,
1554 raptor_rdfxml_element *element)
1555 {
1556 raptor_rdfxml_parser *rdf_xml_parser;
1557 int finished;
1558 raptor_state state;
1559 raptor_xml_element* xml_element;
1560 raptor_qname* el_qname;
1561 const unsigned char *el_name;
1562 int element_in_rdf_ns;
1563 int rc = 0;
1564 raptor_uri* base_uri;
1565 raptor_uri* element_name_uri;
1566
1567 rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
1568
1569 xml_element = element->xml_element;
1570 el_qname = raptor_xml_element_get_name(xml_element);
1571 el_name = el_qname->local_name;
1572 element_in_rdf_ns = (el_qname->nspace && el_qname->nspace->is_rdf_ms);
1573 base_uri = raptor_rdfxml_inscope_base_uri(rdf_parser);
1574 element_name_uri = el_qname->uri;
1575
1576 state = element->state;
1577 #ifdef RAPTOR_DEBUG_VERBOSE
1578 RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state));
1579 #endif
1580
1581 finished = 0;
1582 while(!finished) {
1583
1584 switch(state) {
1585 case RAPTOR_STATE_SKIPPING:
1586 element->child_state = state;
1587 element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
1588 finished = 1;
1589 break;
1590
1591 case RAPTOR_STATE_UNKNOWN:
1592 /* found <rdf:RDF> ? */
1593
1594 if(element_in_rdf_ns) {
1595 if(raptor_uri_equals(element_name_uri,
1596 RAPTOR_RDF_RDF_URI(rdf_parser->world))) {
1597 element->child_state = RAPTOR_STATE_NODE_ELEMENT_LIST;
1598 element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES;
1599 /* Yes - need more content before can continue,
1600 * so wait for another element
1601 */
1602 finished = 1;
1603 break;
1604 }
1605 if(raptor_uri_equals(element_name_uri,
1606 RAPTOR_RDF_Description_URI(rdf_parser->world))) {
1607 state = RAPTOR_STATE_DESCRIPTION;
1608 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
1609 /* Yes - found something so move immediately to description */
1610 break;
1611 }
1612
1613 if(element_in_rdf_ns) {
1614 rc = raptor_rdfxml_check_nodeElement_name((const char*)el_name);
1615 if(!rc) {
1616 raptor_parser_error(rdf_parser,
1617 "rdf:%s is forbidden as a node element.",
1618 el_name);
1619 state = RAPTOR_STATE_SKIPPING;
1620 element->child_state = RAPTOR_STATE_SKIPPING;
1621 finished = 1;
1622 break;
1623 } else if(rc < 0) {
1624 raptor_parser_warning(rdf_parser,
1625 "rdf:%s is an unknown RDF namespaced element.",
1626 el_name);
1627 }
1628 }
1629 }
1630
1631 /* If scanning for element, can continue */
1632 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING)) {
1633 finished = 1;
1634 break;
1635 }
1636
1637 /* Otherwise the choice of the next state can be made
1638 * from the current element by the OBJ state
1639 */
1640 state = RAPTOR_STATE_NODE_ELEMENT_LIST;
1641 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES;
1642 break;
1643
1644
1645 case RAPTOR_STATE_NODE_ELEMENT_LIST:
1646 /* Handling
1647 * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList
1648 *
1649 * Everything goes to nodeElement
1650 */
1651
1652 state = RAPTOR_STATE_NODE_ELEMENT;
1653
1654 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
1655
1656 break;
1657
1658
1659
1660 case RAPTOR_STATE_DESCRIPTION:
1661 case RAPTOR_STATE_NODE_ELEMENT:
1662 case RAPTOR_STATE_PARSETYPE_RESOURCE:
1663 case RAPTOR_STATE_PARSETYPE_COLLECTION:
1664 /* Handling <rdf:Description> or other node element
1665 * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement
1666 *
1667 * or a property element acting as a node element for
1668 * rdf:parseType="Resource"
1669 * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt
1670 * or rdf:parseType="Collection" (and daml:Collection)
1671 * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt
1672 *
1673 * Only create a bag if bagID given
1674 */
1675
1676 if(!element_name_uri) {
1677 /* We cannot handle this */
1678 raptor_parser_warning(rdf_parser, "Using node element '%s' without a namespace is forbidden.",
1679 el_qname->local_name);
1680 raptor_rdfxml_update_document_locator(rdf_parser);
1681 element->state = RAPTOR_STATE_SKIPPING;
1682 element->child_state = RAPTOR_STATE_SKIPPING;
1683 finished = 1;
1684 break;
1685 }
1686
1687 if(element_in_rdf_ns) {
1688 rc = raptor_rdfxml_check_nodeElement_name((const char*)el_name);
1689 if(!rc) {
1690 raptor_parser_error(rdf_parser,
1691 "rdf:%s is forbidden as a node element.",
1692 el_name);
1693 state = RAPTOR_STATE_SKIPPING;
1694 element->state = RAPTOR_STATE_SKIPPING;
1695 element->child_state = RAPTOR_STATE_SKIPPING;
1696 finished = 1;
1697 break;
1698 } else if(rc < 0) {
1699 raptor_parser_warning(rdf_parser,
1700 "rdf:%s is an unknown RDF namespaced element.",
1701 el_name);
1702 }
1703 }
1704
1705 if(element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
1706 element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION &&
1707 element->parent &&
1708 (element->parent->state == RAPTOR_STATE_PROPERTYELT ||
1709 element->parent->state == RAPTOR_STATE_MEMBER_PROPERTYELT) &&
1710 element->parent->xml_element->content_element_seen > 1) {
1711 raptor_rdfxml_update_document_locator(rdf_parser);
1712 raptor_parser_error(rdf_parser, "The enclosing property already has an object");
1713 state = RAPTOR_STATE_SKIPPING;
1714 element->child_state = RAPTOR_STATE_SKIPPING;
1715 finished = 1;
1716 break;
1717 }
1718
1719 if(state == RAPTOR_STATE_NODE_ELEMENT ||
1720 state == RAPTOR_STATE_DESCRIPTION ||
1721 state == RAPTOR_STATE_PARSETYPE_COLLECTION) {
1722 if(element_in_rdf_ns &&
1723 raptor_uri_equals(element_name_uri,
1724 RAPTOR_RDF_Description_URI(rdf_parser->world)))
1725 state = RAPTOR_STATE_DESCRIPTION;
1726 else
1727 state = RAPTOR_STATE_NODE_ELEMENT;
1728 }
1729
1730
1731 if((element->rdf_attr[RDF_NS_ID]!=NULL) +
1732 (element->rdf_attr[RDF_NS_about]!=NULL) +
1733 (element->rdf_attr[RDF_NS_nodeID]!=NULL) > 1) {
1734 raptor_rdfxml_update_document_locator(rdf_parser);
1735 raptor_parser_error(rdf_parser, "Multiple attributes of rdf:ID, rdf:about and rdf:nodeID on element '%s' - only one allowed.", el_name);
1736 }
1737
1738 if(element->rdf_attr[RDF_NS_ID]) {
1739 unsigned char* subject_id;
1740 raptor_uri* subject_uri;
1741
1742 subject_id = (unsigned char*)element->rdf_attr[RDF_NS_ID];
1743
1744 if(!raptor_valid_xml_ID(rdf_parser, subject_id)) {
1745 raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'",
1746 subject_id);
1747 state = RAPTOR_STATE_SKIPPING;
1748 element->child_state = RAPTOR_STATE_SKIPPING;
1749 finished = 1;
1750 break;
1751 }
1752 if(raptor_rdfxml_record_ID(rdf_parser, element, subject_id)) {
1753 raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'",
1754 subject_id);
1755 state = RAPTOR_STATE_SKIPPING;
1756 element->child_state = RAPTOR_STATE_SKIPPING;
1757 finished = 1;
1758 break;
1759 }
1760
1761 /* after this, subject_id is the owner of the ID string */
1762 element->rdf_attr[RDF_NS_ID] = NULL;
1763
1764 subject_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri,
1765 subject_id);
1766 RAPTOR_FREE(char*, subject_id);
1767
1768 if(!subject_uri)
1769 goto oom;
1770 element->subject = raptor_new_term_from_uri(rdf_parser->world,
1771 subject_uri);
1772 raptor_free_uri(subject_uri);
1773
1774 if(!element->subject)
1775 goto oom;
1776
1777 } else if(element->rdf_attr[RDF_NS_about]) {
1778 raptor_uri* subject_uri;
1779
1780 subject_uri = raptor_new_uri_relative_to_base(rdf_parser->world,
1781 base_uri,
1782 (const unsigned char*)element->rdf_attr[RDF_NS_about]);
1783 if(!subject_uri)
1784 goto oom;
1785
1786 element->subject = raptor_new_term_from_uri(rdf_parser->world,
1787 subject_uri);
1788 raptor_free_uri(subject_uri);
1789
1790 RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_about]);
1791 element->rdf_attr[RDF_NS_about] = NULL;
1792 if(!element->subject)
1793 goto oom;
1794
1795 } else if(element->rdf_attr[RDF_NS_nodeID]) {
1796 unsigned char* subject_id;
1797 subject_id = raptor_world_internal_generate_id(rdf_parser->world,
1798 (unsigned char*)element->rdf_attr[RDF_NS_nodeID]);
1799 if(!subject_id)
1800 goto oom;
1801
1802 element->subject = raptor_new_term_from_blank(rdf_parser->world,
1803 subject_id);
1804 RAPTOR_FREE(char*, subject_id);
1805
1806 element->rdf_attr[RDF_NS_nodeID] = NULL;
1807 if(!element->subject)
1808 goto oom;
1809
1810 if(!raptor_valid_xml_ID(rdf_parser, element->subject->value.blank.string)) {
1811 raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'",
1812 (const char*)element->subject->value.blank.string);
1813 state = RAPTOR_STATE_SKIPPING;
1814 element->child_state = RAPTOR_STATE_SKIPPING;
1815 finished = 1;
1816 break;
1817 }
1818 } else if(element->parent &&
1819 element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
1820 element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION &&
1821 element->parent->object) {
1822 /* copy from parent (property element), it has a URI for us */
1823 element->subject = raptor_term_copy(element->parent->object);
1824 } else {
1825 unsigned char* subject_id;
1826 subject_id = raptor_world_generate_bnodeid(rdf_parser->world);
1827 if(!subject_id)
1828 goto oom;
1829
1830 element->subject = raptor_new_term_from_blank(rdf_parser->world,
1831 subject_id);
1832 RAPTOR_FREE(char*, subject_id);
1833
1834 if(!element->subject)
1835 goto oom;
1836 }
1837
1838
1839 if(element->rdf_attr[RDF_NS_bagID]) {
1840 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID)) {
1841 unsigned char* bag_id;
1842 raptor_uri* bag_uri = NULL;
1843
1844 bag_id = (unsigned char*)element->rdf_attr[RDF_NS_bagID];
1845 element->rdf_attr[RDF_NS_bagID] = NULL;
1846
1847 bag_uri = raptor_new_uri_from_id(rdf_parser->world,
1848 base_uri, bag_id);
1849 if(!bag_uri) {
1850 RAPTOR_FREE(char*, bag_id);
1851 goto oom;
1852 }
1853
1854 element->bag = raptor_new_term_from_uri(rdf_parser->world, bag_uri);
1855 raptor_free_uri(bag_uri);
1856
1857 if(!raptor_valid_xml_ID(rdf_parser, bag_id)) {
1858 raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'",
1859 bag_id);
1860 state = RAPTOR_STATE_SKIPPING;
1861 element->child_state = RAPTOR_STATE_SKIPPING;
1862 finished = 1;
1863 RAPTOR_FREE(char*, bag_id);
1864 break;
1865 }
1866 if(raptor_rdfxml_record_ID(rdf_parser, element, bag_id)) {
1867 raptor_parser_error(rdf_parser, "Duplicated rdf:bagID value '%s'",
1868 bag_id);
1869 state = RAPTOR_STATE_SKIPPING;
1870 element->child_state = RAPTOR_STATE_SKIPPING;
1871 finished = 1;
1872 RAPTOR_FREE(char*, bag_id);
1873 break;
1874 }
1875
1876 RAPTOR_FREE(char*, bag_id);
1877 raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated.");
1878
1879
1880 raptor_rdfxml_generate_statement(rdf_parser,
1881 element->bag,
1882 RAPTOR_RDF_type_URI(rdf_parser->world),
1883 RAPTOR_RDF_Bag_term(rdf_parser->world),
1884 NULL,
1885 NULL);
1886 } else {
1887 /* bagID forbidden */
1888 raptor_parser_error(rdf_parser, "rdf:bagID is forbidden.");
1889 state = RAPTOR_STATE_SKIPPING;
1890 element->child_state = RAPTOR_STATE_SKIPPING;
1891 finished = 1;
1892 break;
1893 }
1894 }
1895
1896
1897 if(element->parent) {
1898
1899 /* In a rdf:parseType="Collection" the resources are appended
1900 * to the list at the genid element->parent->tail_id
1901 */
1902 if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION ||
1903 element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
1904 /* <idList> rdf:type rdf:List */
1905 const unsigned char * idList;
1906 raptor_uri *predicate_uri;
1907 raptor_term* idList_term;
1908 raptor_term* object_term;
1909
1910 idList = raptor_world_generate_bnodeid(rdf_parser->world);
1911 if(!idList)
1912 goto oom;
1913 /* idList string is saved below in element->parent->tail_id */
1914
1915 idList_term = raptor_new_term_from_blank(rdf_parser->world, idList);
1916 if(!idList_term) {
1917 RAPTOR_FREE(char*, idList);
1918 goto oom;
1919 }
1920
1921 if((element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ||
1922 RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_RDF_TYPE_RDF_LIST)) {
1923 raptor_uri* class_uri = NULL;
1924
1925 if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
1926 class_uri = RAPTOR_DAML_List_URI(rdf_xml_parser);
1927 object_term = raptor_new_term_from_uri(rdf_parser->world,
1928 class_uri);
1929 } else
1930 object_term = raptor_term_copy(RAPTOR_RDF_List_term(rdf_parser->world));
1931
1932 raptor_rdfxml_generate_statement(rdf_parser,
1933 idList_term,
1934 RAPTOR_RDF_type_URI(rdf_parser->world),
1935 object_term,
1936 NULL,
1937 element);
1938 raptor_free_term(object_term);
1939 }
1940
1941 predicate_uri = (element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_first_URI(rdf_xml_parser) : RAPTOR_RDF_first_URI(rdf_parser->world);
1942
1943 /* <idList> rdf:first <element->uri> */
1944 raptor_rdfxml_generate_statement(rdf_parser,
1945 idList_term,
1946 predicate_uri,
1947 element->subject,
1948 NULL,
1949 NULL);
1950
1951 /* If there is no rdf:parseType="Collection" */
1952 if(!element->parent->tail_id) {
1953 /* Free any existing object still around.
1954 * I suspect this can never happen.
1955 */
1956 if(element->parent->object)
1957 raptor_free_term(element->parent->object);
1958
1959 element->parent->object = raptor_new_term_from_blank(rdf_parser->world,
1960 idList);
1961 } else {
1962 raptor_term* tail_id_term;
1963
1964 tail_id_term = raptor_new_term_from_blank(rdf_parser->world,
1965 element->parent->tail_id);
1966
1967 predicate_uri = (element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_rest_URI(rdf_xml_parser) : RAPTOR_RDF_rest_URI(rdf_parser->world);
1968
1969 /* _:tail_id rdf:rest _:listRest */
1970 raptor_rdfxml_generate_statement(rdf_parser,
1971 tail_id_term,
1972 predicate_uri,
1973 idList_term,
1974 NULL,
1975 NULL);
1976
1977 raptor_free_term(tail_id_term);
1978 }
1979
1980 /* update new tail */
1981 if(element->parent->tail_id)
1982 RAPTOR_FREE(char*, (char*)element->parent->tail_id);
1983
1984 element->parent->tail_id = idList;
1985
1986 raptor_free_term(idList_term);
1987 } else if(element->parent->state != RAPTOR_STATE_UNKNOWN &&
1988 element->state != RAPTOR_STATE_PARSETYPE_RESOURCE) {
1989 /* If there is a parent element (property) containing this
1990 * element (node) and it has no object, set it from this subject
1991 */
1992
1993 if(element->parent->object) {
1994 raptor_rdfxml_update_document_locator(rdf_parser);
1995 raptor_parser_error(rdf_parser,
1996 "Tried to set multiple objects of a statement");
1997 } else {
1998 /* Store URI of this node in our parent as the property object */
1999 element->parent->object = raptor_term_copy(element->subject);
2000 element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2001 }
2002
2003 }
2004 }
2005
2006
2007 /* If this is a node element, generate the rdf:type statement
2008 * from this node
2009 */
2010 if(state == RAPTOR_STATE_NODE_ELEMENT) {
2011 raptor_term* el_name_term;
2012
2013 el_name_term = raptor_new_term_from_uri(rdf_parser->world,
2014 element_name_uri);
2015
2016 raptor_rdfxml_generate_statement(rdf_parser,
2017 element->subject,
2018 RAPTOR_RDF_type_URI(rdf_parser->world),
2019 el_name_term,
2020 element->reified,
2021 element);
2022
2023 raptor_free_term(el_name_term);
2024 }
2025
2026 if(raptor_rdfxml_process_property_attributes(rdf_parser, element,
2027 element, NULL))
2028 goto oom;
2029
2030 /* for both productions now need some more content or
2031 * property elements before can do any more work.
2032 */
2033
2034 element->child_state = RAPTOR_STATE_PROPERTYELT;
2035 element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
2036 finished = 1;
2037 break;
2038
2039
2040 case RAPTOR_STATE_PARSETYPE_OTHER:
2041 /* FALLTHROUGH */
2042
2043 case RAPTOR_STATE_PARSETYPE_LITERAL:
2044 raptor_xml_writer_start_element(rdf_xml_parser->xml_writer, xml_element);
2045 element->child_state = RAPTOR_STATE_PARSETYPE_LITERAL;
2046 element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2047
2048 finished = 1;
2049 break;
2050
2051 /* Handle all the detail of the various options of property element
2052 * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
2053 *
2054 * All the attributes must be scanned here to see what additional
2055 * property element work is needed. No triples are generated
2056 * until the end of this element, until it is clear if the
2057 * element was empty.
2058 */
2059 case RAPTOR_STATE_MEMBER_PROPERTYELT:
2060 case RAPTOR_STATE_PROPERTYELT:
2061
2062 if(!element_name_uri) {
2063 raptor_parser_error(rdf_parser, "Using property element '%s' without a namespace is forbidden.",
2064 raptor_xml_element_get_name(element->parent->xml_element)->local_name);
2065 raptor_rdfxml_update_document_locator(rdf_parser);
2066 element->state = RAPTOR_STATE_SKIPPING;
2067 element->child_state = RAPTOR_STATE_SKIPPING;
2068 finished = 1;
2069 break;
2070 }
2071
2072 /* Handling rdf:li as a property, noting special processing */
2073 if(element_in_rdf_ns &&
2074 raptor_uri_equals(element_name_uri,
2075 RAPTOR_RDF_li_URI(rdf_parser->world))) {
2076 state = RAPTOR_STATE_MEMBER_PROPERTYELT;
2077 }
2078
2079
2080 if(element_in_rdf_ns) {
2081 rc = raptor_rdfxml_check_propertyElement_name((const char*)el_name);
2082 if(!rc) {
2083 raptor_parser_error(rdf_parser,
2084 "rdf:%s is forbidden as a property element.",
2085 el_name);
2086 state = RAPTOR_STATE_SKIPPING;
2087 element->child_state = RAPTOR_STATE_SKIPPING;
2088 finished = 1;
2089 break;
2090 } else if(rc < 0) {
2091 raptor_parser_warning(rdf_parser,
2092 "rdf:%s is an unknown RDF namespaced element.",
2093 el_name);
2094 }
2095 }
2096
2097
2098 /* rdf:ID on a property element - reify a statement.
2099 * Allowed on all property element forms
2100 */
2101 if(element->rdf_attr[RDF_NS_ID]) {
2102 raptor_uri *reified_uri;
2103
2104 element->reified_id = element->rdf_attr[RDF_NS_ID];
2105 element->rdf_attr[RDF_NS_ID] = NULL;
2106 reified_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri,
2107 element->reified_id);
2108 if(!reified_uri)
2109 goto oom;
2110
2111 element->reified = raptor_new_term_from_uri(rdf_parser->world,
2112 reified_uri);
2113 raptor_free_uri(reified_uri);
2114
2115 if(!element->reified)
2116 goto oom;
2117
2118 if(!raptor_valid_xml_ID(rdf_parser, element->reified_id)) {
2119 raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'",
2120 element->reified_id);
2121 state = RAPTOR_STATE_SKIPPING;
2122 element->child_state = RAPTOR_STATE_SKIPPING;
2123 finished = 1;
2124 break;
2125 }
2126 if(raptor_rdfxml_record_ID(rdf_parser, element, element->reified_id)) {
2127 raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'",
2128 element->reified_id);
2129 state = RAPTOR_STATE_SKIPPING;
2130 element->child_state = RAPTOR_STATE_SKIPPING;
2131 finished = 1;
2132 break;
2133 }
2134 }
2135
2136 /* rdf:datatype on a property element.
2137 * Only allowed for
2138 * http://www.w3.org/TR/rdf-syntax-grammar/#literalPropertyElt
2139 */
2140 if(element->rdf_attr[RDF_NS_datatype]) {
2141 raptor_uri *datatype_uri;
2142
2143 datatype_uri = raptor_new_uri_relative_to_base(rdf_parser->world,
2144 base_uri,
2145 (const unsigned char*)element->rdf_attr[RDF_NS_datatype]);
2146 element->object_literal_datatype = datatype_uri;
2147 RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_datatype]);
2148 element->rdf_attr[RDF_NS_datatype] = NULL;
2149 if(!element->object_literal_datatype)
2150 goto oom;
2151 }
2152
2153 if(element->rdf_attr[RDF_NS_bagID]) {
2154
2155 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID)) {
2156
2157 if(element->rdf_attr[RDF_NS_resource] ||
2158 element->rdf_attr[RDF_NS_parseType]) {
2159
2160 raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on property element '%s' with an rdf:resource or rdf:parseType attribute.", el_name);
2161 /* prevent this being used later either */
2162 RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_bagID]);
2163 element->rdf_attr[RDF_NS_bagID] = NULL;
2164 } else {
2165 unsigned char* bag_id;
2166 raptor_uri* bag_uri;
2167
2168 bag_id = (unsigned char*)element->rdf_attr[RDF_NS_bagID];
2169 element->rdf_attr[RDF_NS_bagID] = NULL;
2170 bag_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri,
2171 bag_id);
2172 if(!bag_uri) {
2173 RAPTOR_FREE(char*, bag_id);
2174 goto oom;
2175 }
2176
2177 element->bag = raptor_new_term_from_uri(rdf_parser->world,
2178 bag_uri);
2179 raptor_free_uri(bag_uri);
2180
2181 if(!element->bag) {
2182 RAPTOR_FREE(char*, bag_id);
2183 goto oom;
2184 }
2185
2186 if(!raptor_valid_xml_ID(rdf_parser, bag_id)) {
2187 raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'",
2188 bag_id);
2189 state = RAPTOR_STATE_SKIPPING;
2190 element->child_state = RAPTOR_STATE_SKIPPING;
2191 finished = 1;
2192 RAPTOR_FREE(char*, bag_id);
2193 break;
2194 }
2195 if(raptor_rdfxml_record_ID(rdf_parser, element, bag_id)) {
2196 raptor_parser_error(rdf_parser,
2197 "Duplicated rdf:bagID value '%s'", bag_id);
2198 state = RAPTOR_STATE_SKIPPING;
2199 element->child_state = RAPTOR_STATE_SKIPPING;
2200 RAPTOR_FREE(char*, bag_id);
2201 finished = 1;
2202 break;
2203 }
2204
2205 RAPTOR_FREE(char*, bag_id);
2206 raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated.");
2207 }
2208 } else {
2209 /* bagID forbidden */
2210 raptor_parser_error(rdf_parser, "rdf:bagID is forbidden.");
2211 state = RAPTOR_STATE_SKIPPING;
2212 element->child_state = RAPTOR_STATE_SKIPPING;
2213 finished = 1;
2214 break;
2215 }
2216 } /* if rdf:bagID on property element */
2217
2218
2219 element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT;
2220
2221 if(element->rdf_attr[RDF_NS_parseType]) {
2222 const unsigned char *parse_type;
2223 int i;
2224 int is_parseType_Literal = 0;
2225
2226 parse_type = element->rdf_attr[RDF_NS_parseType];
2227
2228 if(raptor_rdfxml_element_has_property_attributes(element)) {
2229 raptor_parser_error(rdf_parser, "Property attributes cannot be used with rdf:parseType='%s'", parse_type);
2230 state = RAPTOR_STATE_SKIPPING;
2231 element->child_state = RAPTOR_STATE_SKIPPING;
2232 finished = 1;
2233 break;
2234 }
2235
2236 /* Check for bad combinations of things with parseType */
2237 for(i = 0; i <= RDF_NS_LAST; i++)
2238 if(element->rdf_attr[i] && i != RDF_NS_parseType) {
2239 raptor_parser_error(rdf_parser, "Attribute '%s' cannot be used with rdf:parseType='%s'", raptor_rdf_ns_terms_info[i].name, parse_type);
2240 state = RAPTOR_STATE_SKIPPING;
2241 element->child_state = RAPTOR_STATE_SKIPPING;
2242 break;
2243 }
2244
2245
2246 if(!strcmp((char*)parse_type, "Literal"))
2247 is_parseType_Literal = 1;
2248 else if(!strcmp((char*)parse_type, "Resource")) {
2249 unsigned char* subject_id;
2250
2251 state = RAPTOR_STATE_PARSETYPE_RESOURCE;
2252 element->child_state = RAPTOR_STATE_PROPERTYELT;
2253 element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
2254
2255 /* create a node for the subject of the contained properties */
2256 subject_id = raptor_world_generate_bnodeid(rdf_parser->world);
2257 if(!subject_id)
2258 goto oom;
2259
2260 element->subject = raptor_new_term_from_blank(rdf_parser->world,
2261 subject_id);
2262 RAPTOR_FREE(char*, subject_id);
2263
2264 if(!element->subject)
2265 goto oom;
2266 } else if(!strcmp((char*)parse_type, "Collection")) {
2267 /* An rdf:parseType="Collection" appears as a single node */
2268 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2269 element->child_state = RAPTOR_STATE_PARSETYPE_COLLECTION;
2270 element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION;
2271 } else {
2272 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_OTHER_PARSETYPES) &&
2273 !raptor_strcasecmp((char*)parse_type, "daml:collection")) {
2274 /* A DAML collection appears as a single node */
2275 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2276 element->child_state = RAPTOR_STATE_PARSETYPE_COLLECTION;
2277 element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION;
2278 } else {
2279 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_WARN_OTHER_PARSETYPES)) {
2280 raptor_parser_warning(rdf_parser, "Unknown rdf:parseType value '%s' taken as 'Literal'", parse_type);
2281 }
2282 is_parseType_Literal = 1;
2283 }
2284
2285 }
2286
2287 if(is_parseType_Literal) {
2288 raptor_xml_writer* xml_writer;
2289
2290 /* rdf:parseType="Literal" - explicitly or default
2291 * if the parseType value is not recognised
2292 */
2293 rdf_xml_parser->xml_content = NULL;
2294 rdf_xml_parser->xml_content_length = 0;
2295 rdf_xml_parser->iostream =
2296 raptor_new_iostream_to_string(rdf_parser->world,
2297 &rdf_xml_parser->xml_content,
2298 &rdf_xml_parser->xml_content_length,
2299 raptor_alloc_memory);
2300 if(!rdf_xml_parser->iostream)
2301 goto oom;
2302 xml_writer = raptor_new_xml_writer(rdf_parser->world, NULL,
2303 rdf_xml_parser->iostream);
2304 rdf_xml_parser->xml_writer = xml_writer;
2305 if(!rdf_xml_parser->xml_writer)
2306 goto oom;
2307
2308 raptor_xml_writer_set_option(rdf_xml_parser->xml_writer,
2309 RAPTOR_OPTION_WRITER_XML_DECLARATION,
2310 NULL, 0);
2311
2312 element->child_state = RAPTOR_STATE_PARSETYPE_LITERAL;
2313 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2314 element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2315 }
2316 } else {
2317
2318 /* Can only be the empty property element case
2319 * http://www.w3.org/TR/rdf-syntax-grammar/#emptyPropertyElt
2320 */
2321
2322 /* The presence of the rdf:resource or rdf:nodeID
2323 * attributes is checked at element close time
2324 */
2325
2326 /*
2327 * Assign reified URI here so we don't reify property attributes
2328 * using this id
2329 */
2330 if(element->reified_id && !element->reified) {
2331 raptor_uri* reified_uri;
2332 reified_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri,
2333 element->reified_id);
2334 if(!reified_uri)
2335 goto oom;
2336 element->reified = raptor_new_term_from_uri(rdf_parser->world,
2337 reified_uri);
2338 raptor_free_uri(reified_uri);
2339
2340 if(!element->reified)
2341 goto oom;
2342 }
2343
2344 if(element->rdf_attr[RDF_NS_resource] ||
2345 element->rdf_attr[RDF_NS_nodeID]) {
2346 /* Done - wait for end of this element to end in order to
2347 * check the element was empty as expected */
2348 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2349 } else {
2350 /* Otherwise process content in obj (value) state */
2351 element->child_state = RAPTOR_STATE_NODE_ELEMENT_LIST;
2352 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT;
2353 }
2354 }
2355
2356 finished = 1;
2357
2358 break;
2359
2360
2361 case RAPTOR_STATE_INVALID:
2362 default:
2363 raptor_parser_fatal_error(rdf_parser,
2364 "%s Internal error - unexpected parser state %d - %s",
2365 __FUNCTION__,
2366 state, raptor_rdfxml_state_as_string(state));
2367 finished = 1;
2368
2369 } /* end switch */
2370
2371 if(state != element->state) {
2372 element->state = state;
2373 #ifdef RAPTOR_DEBUG_VERBOSE
2374 RAPTOR_DEBUG3("Moved to state %d - %s\n", state,
2375 raptor_rdfxml_state_as_string(state));
2376 #endif
2377 }
2378
2379 } /* end while */
2380
2381 #ifdef RAPTOR_DEBUG_VERBOSE
2382 RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
2383 #endif
2384
2385 return;
2386
2387 oom:
2388 raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping");
2389 element->state = RAPTOR_STATE_SKIPPING;
2390 }
2391
2392
2393 static void
raptor_rdfxml_end_element_grammar(raptor_parser * rdf_parser,raptor_rdfxml_element * element)2394 raptor_rdfxml_end_element_grammar(raptor_parser *rdf_parser,
2395 raptor_rdfxml_element *element)
2396 {
2397 raptor_rdfxml_parser *rdf_xml_parser;
2398 raptor_state state;
2399 int finished;
2400 raptor_xml_element* xml_element = element->xml_element;
2401 raptor_qname* el_qname;
2402 const unsigned char *el_name;
2403 int element_in_rdf_ns;
2404 raptor_uri* element_name_uri;
2405
2406 rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
2407
2408 el_qname = raptor_xml_element_get_name(xml_element);
2409 el_name = el_qname->local_name;
2410 element_in_rdf_ns= (el_qname->nspace && el_qname->nspace->is_rdf_ms);
2411 element_name_uri = el_qname->uri;
2412
2413
2414 state = element->state;
2415 #ifdef RAPTOR_DEBUG_VERBOSE
2416 RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state));
2417 #endif
2418
2419 finished= 0;
2420 while(!finished) {
2421 switch(state) {
2422 case RAPTOR_STATE_SKIPPING:
2423 finished = 1;
2424 break;
2425
2426 case RAPTOR_STATE_UNKNOWN:
2427 finished = 1;
2428 break;
2429
2430 case RAPTOR_STATE_NODE_ELEMENT_LIST:
2431 if(element_in_rdf_ns &&
2432 raptor_uri_equals(element_name_uri,
2433 RAPTOR_RDF_RDF_URI(rdf_parser->world))) {
2434 /* end of RDF - boo hoo */
2435 state = RAPTOR_STATE_UNKNOWN;
2436 finished = 1;
2437 break;
2438 }
2439 /* When scanning, another element ending is outside the RDF
2440 * world so this can happen without further work
2441 */
2442 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING)) {
2443 state = RAPTOR_STATE_UNKNOWN;
2444 finished = 1;
2445 break;
2446 }
2447 /* otherwise found some junk after RDF content in an RDF-only
2448 * document (probably never get here since this would be
2449 * a mismatched XML tag and cause an error earlier)
2450 */
2451 raptor_rdfxml_update_document_locator(rdf_parser);
2452 raptor_parser_warning(rdf_parser,
2453 "Element '%s' ended, expected end of RDF element",
2454 el_name);
2455 state = RAPTOR_STATE_UNKNOWN;
2456 finished = 1;
2457 break;
2458
2459
2460 case RAPTOR_STATE_DESCRIPTION:
2461 case RAPTOR_STATE_NODE_ELEMENT:
2462 case RAPTOR_STATE_PARSETYPE_RESOURCE:
2463
2464 /* If there is a parent element containing this element and
2465 * the parent isn't a description, has an identifier,
2466 * create the statement between this node using parent property
2467 * (Need to check for identifier so that top-level typed nodes
2468 * don't get connect to <rdf:RDF> parent element)
2469 */
2470 if(state == RAPTOR_STATE_NODE_ELEMENT &&
2471 element->parent && element->parent->subject) {
2472 raptor_rdfxml_generate_statement(rdf_parser,
2473 element->parent->subject,
2474 element_name_uri,
2475 element->subject,
2476 NULL,
2477 element);
2478 } else if(state == RAPTOR_STATE_PARSETYPE_RESOURCE &&
2479 element->parent && element->parent->subject) {
2480 /* Handle rdf:li as the rdf:parseType="resource" property */
2481 if(element_in_rdf_ns &&
2482 raptor_uri_equals(element_name_uri,
2483 RAPTOR_RDF_li_URI(rdf_parser->world))) {
2484 raptor_uri* ordinal_predicate_uri;
2485
2486 element->parent->last_ordinal++;
2487 ordinal_predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world, element->parent->last_ordinal);
2488
2489 raptor_rdfxml_generate_statement(rdf_parser,
2490 element->parent->subject,
2491 ordinal_predicate_uri,
2492 element->subject,
2493 element->reified,
2494 element->parent);
2495 raptor_free_uri(ordinal_predicate_uri);
2496 } else {
2497 raptor_rdfxml_generate_statement(rdf_parser,
2498 element->parent->subject,
2499 element_name_uri,
2500 element->subject,
2501 element->reified,
2502 element->parent);
2503 }
2504 }
2505 finished = 1;
2506 break;
2507
2508 case RAPTOR_STATE_PARSETYPE_COLLECTION:
2509
2510 finished = 1;
2511 break;
2512
2513 case RAPTOR_STATE_PARSETYPE_OTHER:
2514 /* FALLTHROUGH */
2515
2516 case RAPTOR_STATE_PARSETYPE_LITERAL:
2517 element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2518
2519 raptor_xml_writer_end_element(rdf_xml_parser->xml_writer, xml_element);
2520
2521 finished = 1;
2522 break;
2523
2524
2525 case RAPTOR_STATE_PROPERTYELT:
2526 case RAPTOR_STATE_MEMBER_PROPERTYELT:
2527 /* A property element
2528 * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
2529 *
2530 * Literal content part is handled here.
2531 * The element content is handled in the internal states
2532 * Empty content is checked here.
2533 */
2534
2535 if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) {
2536 if(xml_element->content_cdata_seen)
2537 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
2538 else if(xml_element->content_element_seen)
2539 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
2540 else {
2541 /* Empty Literal */
2542 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
2543 }
2544
2545 }
2546
2547
2548 /* Handle terminating a rdf:parseType="Collection" list */
2549 if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION ||
2550 element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
2551 raptor_term* nil_term;
2552
2553 if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
2554 raptor_uri* nil_uri = RAPTOR_DAML_nil_URI(rdf_xml_parser);
2555 nil_term = raptor_new_term_from_uri(rdf_parser->world, nil_uri);
2556 } else {
2557 nil_term = raptor_term_copy(RAPTOR_RDF_nil_term(rdf_parser->world));
2558 }
2559
2560 if(!element->tail_id) {
2561 /* If No List: set object of statement to rdf:nil */
2562 element->object = raptor_term_copy(nil_term);
2563 } else {
2564 raptor_uri* rest_uri = NULL;
2565 raptor_term* tail_id_term;
2566
2567 if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION)
2568 rest_uri = RAPTOR_DAML_rest_URI(rdf_xml_parser);
2569 else
2570 rest_uri = RAPTOR_RDF_rest_URI(rdf_parser->world);
2571
2572 tail_id_term = raptor_new_term_from_blank(rdf_parser->world,
2573 element->tail_id);
2574
2575 /* terminate the list */
2576 raptor_rdfxml_generate_statement(rdf_parser,
2577 tail_id_term,
2578 rest_uri,
2579 nil_term,
2580 NULL,
2581 NULL);
2582
2583 raptor_free_term(tail_id_term);
2584 }
2585
2586 raptor_free_term(nil_term);
2587
2588 } /* end rdf:parseType="Collection" termination */
2589
2590
2591 #ifdef RAPTOR_DEBUG_VERBOSE
2592 RAPTOR_DEBUG3("Content type %s (%d)\n",
2593 raptor_rdfxml_element_content_type_as_string(element->content_type),
2594 element->content_type);
2595 #endif
2596
2597 switch(element->content_type) {
2598 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE:
2599
2600 if(raptor_rdfxml_element_has_property_attributes(element) &&
2601 element->child_state == RAPTOR_STATE_DESCRIPTION) {
2602 raptor_parser_error(rdf_parser,
2603 "Property element '%s' has both property attributes and a node element content",
2604 el_name);
2605 state = RAPTOR_STATE_SKIPPING;
2606 element->child_state = RAPTOR_STATE_SKIPPING;
2607 break;
2608 }
2609
2610 if(!element->object) {
2611 if(element->rdf_attr[RDF_NS_resource]) {
2612 raptor_uri* resource_uri;
2613 resource_uri = raptor_new_uri_relative_to_base(rdf_parser->world,
2614 raptor_rdfxml_inscope_base_uri(rdf_parser),
2615 (const unsigned char*)element->rdf_attr[RDF_NS_resource]);
2616 if(!resource_uri)
2617 goto oom;
2618
2619 element->object = raptor_new_term_from_uri(rdf_parser->world,
2620 resource_uri);
2621 raptor_free_uri(resource_uri);
2622
2623 RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_resource]);
2624 element->rdf_attr[RDF_NS_resource] = NULL;
2625 if(!element->object)
2626 goto oom;
2627 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2628 } else if(element->rdf_attr[RDF_NS_nodeID]) {
2629 unsigned char* resource_id;
2630 resource_id = raptor_world_internal_generate_id(rdf_parser->world,
2631 (unsigned char*)element->rdf_attr[RDF_NS_nodeID]);
2632 if(!resource_id)
2633 goto oom;
2634
2635 element->object = raptor_new_term_from_blank(rdf_parser->world,
2636 resource_id);
2637 RAPTOR_FREE(char*, resource_id);
2638 element->rdf_attr[RDF_NS_nodeID] = NULL;
2639 if(!element->object)
2640 goto oom;
2641
2642 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2643 if(!raptor_valid_xml_ID(rdf_parser,
2644 element->object->value.blank.string)) {
2645 raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", (const char*)element->object->value.blank.string);
2646 state = RAPTOR_STATE_SKIPPING;
2647 element->child_state = RAPTOR_STATE_SKIPPING;
2648 break;
2649 }
2650 } else {
2651 unsigned char* resource_id;
2652 resource_id = raptor_world_generate_bnodeid(rdf_parser->world);
2653 if(!resource_id)
2654 goto oom;
2655
2656 element->object = raptor_new_term_from_blank(rdf_parser->world,
2657 resource_id);
2658 RAPTOR_FREE(char*, resource_id);
2659
2660 if(!element->object)
2661 goto oom;
2662 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2663 }
2664
2665 if(raptor_rdfxml_process_property_attributes(rdf_parser, element,
2666 element->parent,
2667 element->object))
2668 goto oom;
2669
2670 }
2671
2672 /* We know object is a resource, so delete any unsignficant
2673 * whitespace so that FALLTHROUGH code below finds the object.
2674 */
2675 if(xml_element->content_cdata_length) {
2676 raptor_free_stringbuffer(xml_element->content_cdata_sb);
2677 xml_element->content_cdata_sb = NULL;
2678 xml_element->content_cdata_length = 0;
2679 }
2680
2681 /* FALLTHROUGH */
2682 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL:
2683
2684 if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) {
2685
2686 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID)) {
2687 /* Only an empty literal can have a rdf:bagID */
2688 if(element->bag) {
2689 if(xml_element->content_cdata_length > 0) {
2690 raptor_parser_error(rdf_parser,
2691 "rdf:bagID is forbidden on a literal property element '%s'.",
2692 el_name);
2693
2694 /* prevent this being used later either */
2695 element->rdf_attr[RDF_NS_bagID] = NULL;
2696 } else {
2697 raptor_rdfxml_generate_statement(rdf_parser,
2698 element->bag,
2699 RAPTOR_RDF_type_URI(rdf_parser->world),
2700 RAPTOR_RDF_Bag_term(rdf_parser->world),
2701 NULL,
2702 NULL);
2703 }
2704 }
2705 } /* if rdf:bagID */
2706
2707 /* If there is empty literal content with properties
2708 * generate a node to hang properties off
2709 */
2710 if(raptor_rdfxml_element_has_property_attributes(element) &&
2711 xml_element->content_cdata_length > 0) {
2712 raptor_parser_error(rdf_parser,
2713 "Literal property element '%s' has property attributes",
2714 el_name);
2715 state = RAPTOR_STATE_SKIPPING;
2716 element->child_state = RAPTOR_STATE_SKIPPING;
2717 break;
2718 }
2719
2720 if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL &&
2721 raptor_rdfxml_element_has_property_attributes(element) &&
2722 !element->object) {
2723 unsigned char* object_id;
2724 object_id = raptor_world_generate_bnodeid(rdf_parser->world);
2725 if(!object_id)
2726 goto oom;
2727
2728 element->object = raptor_new_term_from_blank(rdf_parser->world,
2729 object_id);
2730 RAPTOR_FREE(char*, object_id);
2731
2732 if(!element->object)
2733 goto oom;
2734 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2735 }
2736
2737 if(raptor_rdfxml_process_property_attributes(rdf_parser, element,
2738 element,
2739 element->object))
2740 goto oom;
2741 }
2742
2743
2744 /* just be friendly to older compilers and don't declare
2745 * variables in the middle of a block
2746 */
2747 if(1) {
2748 raptor_uri *predicate_uri = NULL;
2749 int predicate_ordinal = -1;
2750 raptor_term* object_term = NULL;
2751
2752 if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) {
2753 predicate_ordinal = ++element->parent->last_ordinal;
2754 predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world,
2755 predicate_ordinal);
2756
2757 } else {
2758 predicate_uri = element_name_uri;
2759 }
2760
2761
2762 if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) {
2763 unsigned char* literal = NULL;
2764 raptor_uri* literal_datatype;
2765 unsigned char* literal_language = NULL;
2766
2767 /* an empty stringbuffer - empty CDATA - is OK */
2768 if(raptor_stringbuffer_length(xml_element->content_cdata_sb)) {
2769 literal = raptor_stringbuffer_as_string(xml_element->content_cdata_sb);
2770 if(!literal)
2771 goto oom;
2772 }
2773
2774 literal_datatype = element->object_literal_datatype;
2775 if(!literal_datatype)
2776 literal_language = (unsigned char*)raptor_sax2_inscope_xml_language(rdf_xml_parser->sax2);
2777
2778 if(!literal_datatype && literal &&
2779 !raptor_unicode_check_utf8_nfc_string(literal,
2780 xml_element->content_cdata_length,
2781 NULL)) {
2782 const char *message;
2783 message = "Property element '%s' has a string not in Unicode Normal Form C: %s";
2784 raptor_rdfxml_update_document_locator(rdf_parser);
2785 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL))
2786 raptor_parser_error(rdf_parser, message, el_name, literal);
2787 else
2788 raptor_parser_warning(rdf_parser, message, el_name, literal);
2789 }
2790
2791 object_term = raptor_new_term_from_literal(rdf_parser->world,
2792 literal,
2793 literal_datatype,
2794 literal_language);
2795 } else {
2796 object_term = raptor_term_copy(element->object);
2797 }
2798
2799 raptor_rdfxml_generate_statement(rdf_parser,
2800 element->parent->subject,
2801 predicate_uri,
2802 object_term,
2803 element->reified,
2804 element->parent);
2805
2806 if(predicate_ordinal >= 0)
2807 raptor_free_uri(predicate_uri);
2808
2809 raptor_free_term(object_term);
2810 }
2811
2812 break;
2813
2814 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED:
2815 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL:
2816 {
2817 unsigned char *buffer;
2818 size_t length;
2819 raptor_term* xmlliteral_term = NULL;
2820
2821 if(rdf_xml_parser->xml_writer) {
2822 raptor_xml_writer_flush(rdf_xml_parser->xml_writer);
2823
2824 raptor_free_iostream(rdf_xml_parser->iostream);
2825 rdf_xml_parser->iostream = NULL;
2826
2827 buffer = (unsigned char*)rdf_xml_parser->xml_content;
2828 length = rdf_xml_parser->xml_content_length;
2829 } else {
2830 buffer = raptor_stringbuffer_as_string(xml_element->content_cdata_sb);
2831 length = xml_element->content_cdata_length;
2832 }
2833
2834 if(!raptor_unicode_check_utf8_nfc_string(buffer, length, NULL)) {
2835 const char *message;
2836 message = "Property element '%s' has XML literal content not in Unicode Normal Form C: %s";
2837 raptor_rdfxml_update_document_locator(rdf_parser);
2838 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL))
2839 raptor_parser_error(rdf_parser, message, el_name, buffer);
2840 else
2841 raptor_parser_warning(rdf_parser, message, el_name, buffer);
2842 }
2843
2844 xmlliteral_term = raptor_new_term_from_literal(rdf_parser->world,
2845 buffer,
2846 RAPTOR_RDF_XMLLiteral_URI(rdf_parser->world),
2847 NULL);
2848
2849 if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) {
2850 raptor_uri* predicate_uri;
2851
2852 element->parent->last_ordinal++;
2853 predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world, element->parent->last_ordinal);
2854
2855 raptor_rdfxml_generate_statement(rdf_parser,
2856 element->parent->subject,
2857 predicate_uri,
2858 xmlliteral_term,
2859 element->reified,
2860 element->parent);
2861
2862 raptor_free_uri(predicate_uri);
2863 } else {
2864 raptor_rdfxml_generate_statement(rdf_parser,
2865 element->parent->subject,
2866 element_name_uri,
2867 xmlliteral_term,
2868 element->reified,
2869 element->parent);
2870 }
2871
2872 raptor_free_term(xmlliteral_term);
2873
2874 /* Finish the xml writer iostream for parseType="Literal" */
2875 if(rdf_xml_parser->xml_writer) {
2876 raptor_free_xml_writer(rdf_xml_parser->xml_writer);
2877 rdf_xml_parser->xml_writer = NULL;
2878 RAPTOR_FREE(char*, rdf_xml_parser->xml_content);
2879 rdf_xml_parser->xml_content = NULL;
2880 rdf_xml_parser->xml_content_length = 0;
2881 }
2882 }
2883
2884 break;
2885
2886 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION:
2887 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION:
2888
2889 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES:
2890 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES:
2891 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT:
2892
2893 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN:
2894 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST:
2895 default:
2896 raptor_parser_fatal_error(rdf_parser,
2897 "%s: Internal error in state RAPTOR_STATE_PROPERTYELT - got unexpected content type %s (%d)",
2898 __FUNCTION__,
2899 raptor_rdfxml_element_content_type_as_string(element->content_type),
2900 element->content_type);
2901 } /* end switch */
2902
2903 finished = 1;
2904 break;
2905
2906 case RAPTOR_STATE_INVALID:
2907 default:
2908 raptor_parser_fatal_error(rdf_parser,
2909 "%s: Internal error - unexpected parser state %d - %s",
2910 __FUNCTION__,
2911 state,
2912 raptor_rdfxml_state_as_string(state));
2913 finished = 1;
2914
2915 } /* end switch */
2916
2917 if(state != element->state) {
2918 element->state = state;
2919 #ifdef RAPTOR_DEBUG_VERBOSE
2920 RAPTOR_DEBUG3("Moved to state %d - %s\n", state,
2921 raptor_rdfxml_state_as_string(state));
2922 #endif
2923 }
2924
2925 } /* end while */
2926
2927 #ifdef RAPTOR_DEBUG_VERBOSE
2928 RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
2929 #endif
2930
2931 return;
2932
2933 oom:
2934 raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping");
2935 element->state = RAPTOR_STATE_SKIPPING;
2936 }
2937
2938
2939
2940 static void
raptor_rdfxml_cdata_grammar(raptor_parser * rdf_parser,const unsigned char * s,int len,int is_cdata)2941 raptor_rdfxml_cdata_grammar(raptor_parser *rdf_parser,
2942 const unsigned char *s, int len,
2943 int is_cdata)
2944 {
2945 raptor_rdfxml_parser* rdf_xml_parser;
2946 raptor_rdfxml_element* element;
2947 raptor_xml_element* xml_element;
2948 raptor_state state;
2949 int all_whitespace = 1;
2950 int i;
2951
2952 rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
2953
2954 if(rdf_parser->failed)
2955 return;
2956
2957 #ifdef RAPTOR_DEBUG_CDATA
2958 RAPTOR_DEBUG2("Adding characters (is_cdata=%d): '", is_cdata);
2959 (void)fwrite(s, 1, len, stderr);
2960 fprintf(stderr, "' (%d bytes)\n", len);
2961 #endif
2962
2963 for(i = 0; i < len; i++)
2964 if(!isspace(s[i])) {
2965 all_whitespace = 0;
2966 break;
2967 }
2968
2969 element = rdf_xml_parser->current_element;
2970
2971 /* this file is very broke - probably not XML, whatever */
2972 if(!element)
2973 return;
2974
2975 xml_element = element->xml_element;
2976
2977 raptor_rdfxml_update_document_locator(rdf_parser);
2978
2979 /* cdata never changes the parser state
2980 * and the containing element state always determines what to do.
2981 * Use the child_state first if there is one, since that applies
2982 */
2983 state = element->child_state;
2984 #ifdef RAPTOR_DEBUG_VERBOSE
2985 RAPTOR_DEBUG2("Working in state %s\n", raptor_rdfxml_state_as_string(state));
2986 #endif
2987
2988
2989 #ifdef RAPTOR_DEBUG_VERBOSE
2990 RAPTOR_DEBUG3("Content type %s (%d)\n",
2991 raptor_rdfxml_element_content_type_as_string(element->content_type),
2992 element->content_type);
2993 #endif
2994
2995
2996
2997 if(state == RAPTOR_STATE_SKIPPING)
2998 return;
2999
3000 if(state == RAPTOR_STATE_UNKNOWN) {
3001 /* Ignore all cdata if still looking for RDF */
3002 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING))
3003 return;
3004
3005 /* Ignore all whitespace cdata before first element */
3006 if(all_whitespace)
3007 return;
3008
3009 /* This probably will never happen since that would make the
3010 * XML not be well-formed
3011 */
3012 raptor_parser_warning(rdf_parser, "Character data before RDF element.");
3013 }
3014
3015
3016 if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES) {
3017 /* If found non-whitespace content, move to literal content */
3018 if(!all_whitespace)
3019 element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
3020 }
3021
3022
3023 if(!rdf_content_type_info[element->child_content_type].whitespace_significant) {
3024
3025 /* Whitespace is ignored except for literal or preserved content types */
3026 if(all_whitespace) {
3027 #ifdef RAPTOR_DEBUG_CDATA
3028 RAPTOR_DEBUG2("Ignoring whitespace cdata inside element '%s'\n",
3029 raptor_xml_element_get_name(element->parent->xml_element)->local_name);
3030 #endif
3031 return;
3032 }
3033
3034 if(xml_element->content_cdata_seen && xml_element->content_element_seen) {
3035 raptor_qname* parent_el_name;
3036
3037 parent_el_name = raptor_xml_element_get_name(element->parent->xml_element);
3038 /* Uh oh - mixed content, this element has elements too */
3039 raptor_parser_warning(rdf_parser, "element '%s' has mixed content.",
3040 parent_el_name->local_name);
3041 }
3042 }
3043
3044
3045 if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) {
3046 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
3047 #ifdef RAPTOR_DEBUG_VERBOSE
3048 RAPTOR_DEBUG3("Content type changed to %s (%d)\n",
3049 raptor_rdfxml_element_content_type_as_string(element->content_type),
3050 element->content_type);
3051 #endif
3052 }
3053
3054 if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL)
3055 raptor_xml_writer_cdata_counted(rdf_xml_parser->xml_writer, s, len);
3056 else {
3057 raptor_stringbuffer_append_counted_string(xml_element->content_cdata_sb,
3058 s, len, 1);
3059 element->content_cdata_all_whitespace &= all_whitespace;
3060
3061 /* adjust stored length */
3062 xml_element->content_cdata_length += len;
3063 }
3064
3065
3066 #ifdef RAPTOR_DEBUG_CDATA
3067 RAPTOR_DEBUG3("Content cdata now: %d bytes\n",
3068 xml_element->content_cdata_length);
3069 #endif
3070 #ifdef RAPTOR_DEBUG_VERBOSE
3071 RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
3072 #endif
3073 }
3074
3075
3076
3077 /**
3078 * raptor_rdfxml_inscope_base_uri:
3079 * @rdf_parser: Raptor parser object
3080 *
3081 * Return the in-scope base URI.
3082 *
3083 * Looks for the innermost xml:base on an element or document URI
3084 *
3085 * Return value: The URI string value or NULL on failure.
3086 **/
3087 static raptor_uri*
raptor_rdfxml_inscope_base_uri(raptor_parser * rdf_parser)3088 raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser)
3089 {
3090 raptor_rdfxml_parser* rdf_xml_parser;
3091 raptor_uri* base_uri;
3092
3093 rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
3094
3095 base_uri = raptor_sax2_inscope_base_uri(rdf_xml_parser->sax2);
3096 if(!base_uri)
3097 base_uri = rdf_parser->base_uri;
3098
3099 return base_uri;
3100 }
3101
3102
3103 /**
3104 * raptor_rdfxml_record_ID:
3105 * @rdf_parser: Raptor parser object
3106 * @element: Current element
3107 * @id: ID string
3108 *
3109 * Record an rdf:ID / rdf:bagID value (with xml base) and check it hasn't been seen already.
3110 *
3111 * Record and check the ID values, if they have been seen already.
3112 * per in-scope-base URI.
3113 *
3114 * Return value: non-zero if already seen, or failure
3115 **/
3116 static int
raptor_rdfxml_record_ID(raptor_parser * rdf_parser,raptor_rdfxml_element * element,const unsigned char * id)3117 raptor_rdfxml_record_ID(raptor_parser *rdf_parser,
3118 raptor_rdfxml_element *element,
3119 const unsigned char *id)
3120 {
3121 raptor_rdfxml_parser *rdf_xml_parser;
3122 raptor_uri* base_uri;
3123 size_t id_len;
3124 int rc;
3125
3126 rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
3127
3128 if(!RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_CHECK_RDF_ID))
3129 return 0;
3130
3131 base_uri = raptor_rdfxml_inscope_base_uri(rdf_parser);
3132
3133 id_len = strlen((const char*)id);
3134
3135 rc = raptor_id_set_add(rdf_xml_parser->id_set, base_uri, id, id_len);
3136
3137 return (rc != 0);
3138 }
3139
3140
3141
3142 static void
raptor_rdfxml_update_document_locator(raptor_parser * rdf_parser)3143 raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser)
3144 {
3145 raptor_rdfxml_parser *rdf_xml_parser;
3146
3147 rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
3148
3149 raptor_sax2_update_document_locator(rdf_xml_parser->sax2,
3150 &rdf_parser->locator);
3151 }
3152
3153
3154
3155 static void
raptor_rdfxml_parse_finish_factory(raptor_parser_factory * factory)3156 raptor_rdfxml_parse_finish_factory(raptor_parser_factory* factory)
3157 {
3158 }
3159
3160
3161 static const char* const rdfxml_names[3] = { "rdfxml", "raptor", NULL};
3162
3163 static const char* const rdfxml_uri_strings[3] = {
3164 "http://www.w3.org/ns/formats/RDF_XML",
3165 "http://www.w3.org/TR/rdf-syntax-grammar",
3166 NULL
3167 };
3168
3169 #define RDFXML_TYPES_COUNT 2
3170 static const raptor_type_q rdfxml_types[RDFXML_TYPES_COUNT + 1] = {
3171 { "application/rdf+xml", 19, 10},
3172 { "text/rdf", 8, 6},
3173 { NULL, 0, 0}
3174 };
3175
3176 static int
raptor_rdfxml_parser_register_factory(raptor_parser_factory * factory)3177 raptor_rdfxml_parser_register_factory(raptor_parser_factory *factory)
3178 {
3179 int rc = 0;
3180
3181 factory->desc.names = rdfxml_names;
3182
3183 factory->desc.mime_types = rdfxml_types;
3184
3185 factory->desc.label = "RDF/XML";
3186 factory->desc.uri_strings = rdfxml_uri_strings;
3187
3188 factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI;
3189
3190 factory->context_length = sizeof(raptor_rdfxml_parser);
3191
3192 factory->init = raptor_rdfxml_parse_init;
3193 factory->terminate = raptor_rdfxml_parse_terminate;
3194 factory->start = raptor_rdfxml_parse_start;
3195 factory->chunk = raptor_rdfxml_parse_chunk;
3196 factory->finish_factory = raptor_rdfxml_parse_finish_factory;
3197 factory->recognise_syntax = raptor_rdfxml_parse_recognise_syntax;
3198
3199 return rc;
3200 }
3201
3202
3203 int
raptor_init_parser_rdfxml(raptor_world * world)3204 raptor_init_parser_rdfxml(raptor_world* world)
3205 {
3206 return !raptor_world_register_parser_factory(world,
3207 &raptor_rdfxml_parser_register_factory);
3208 }
3209
3210
3211 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
3212 void
raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser * rdf_xml_parser,FILE * stream)3213 raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser* rdf_xml_parser,
3214 FILE *stream)
3215 {
3216 fputs("rdf:ID set ", stream);
3217 raptor_id_set_stats_print(rdf_xml_parser->id_set, stream);
3218 }
3219 #endif
3220