1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * raptor_rdfxml.c - Raptor RDF/XML Parser
4  *
5  * Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/
6  * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
7  *
8  * This package is Free Software and part of Redland http://librdf.org/
9  *
10  * It is licensed under the following three licenses as alternatives:
11  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12  *   2. GNU General Public License (GPL) V2 or any newer version
13  *   3. Apache License, V2.0 or any newer version
14  *
15  * You may not use this file except in compliance with at least one of
16  * the above three licenses.
17  *
18  * See LICENSE.html or LICENSE.txt at the top of this package for the
19  * complete terms and further detail along with the license texts for
20  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21  *
22  *
23  */
24 
25 
26 #ifdef HAVE_CONFIG_H
27 #include <raptor_config.h>
28 #endif
29 
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <stdarg.h>
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_STDLIB_H
38 #include <stdlib.h>
39 #endif
40 
41 /* Raptor includes */
42 #include "raptor2.h"
43 #include "raptor_internal.h"
44 
45 
46 /* Define these for far too much output */
47 #undef RAPTOR_DEBUG_VERBOSE
48 #undef RAPTOR_DEBUG_CDATA
49 
50 
51 /* Raptor structures */
52 
53 typedef enum {
54   /* Catch uninitialised state */
55   RAPTOR_STATE_INVALID = 0,
56 
57   /* Skipping current tree of elements - used to recover finding
58    * illegal content, when parsling permissively.
59    */
60   RAPTOR_STATE_SKIPPING,
61 
62   /* Not in RDF grammar yet - searching for a start element.
63    *
64    * This can be <rdf:RDF> (goto NODE_ELEMENT_LIST) but since it is optional,
65    * the start element can also be one of
66    *   http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementURIs
67    *
68    * If RDF content is assumed, go straight to OBJ
69    */
70   RAPTOR_STATE_UNKNOWN,
71 
72   /* A list of node elements
73    *   http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList
74    */
75   RAPTOR_STATE_NODE_ELEMENT_LIST,
76 
77   /* Found an <rdf:Description> */
78   RAPTOR_STATE_DESCRIPTION,
79 
80   /* Found a property element
81    *   http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
82    */
83   RAPTOR_STATE_PROPERTYELT,
84 
85   /* A property element that is an ordinal - rdf:li, rdf:_n
86    */
87   RAPTOR_STATE_MEMBER_PROPERTYELT,
88 
89   /* Found a node element
90    *   http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement
91    */
92   RAPTOR_STATE_NODE_ELEMENT,
93 
94   /* A property element with rdf:parseType="Literal"
95    *   http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeLiteralPropertyElt
96    */
97   RAPTOR_STATE_PARSETYPE_LITERAL,
98 
99   /* A property element with rdf:parseType="Resource"
100    *   http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt
101    */
102   RAPTOR_STATE_PARSETYPE_RESOURCE,
103 
104   /* A property element with rdf:parseType="Collection"
105    *  http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt
106    *
107    * (This also handles daml:Collection)
108    */
109   RAPTOR_STATE_PARSETYPE_COLLECTION,
110 
111   /* A property element with a rdf:parseType attribute and a value
112    * not "Literal" or "Resource"
113    *   http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt
114    */
115   RAPTOR_STATE_PARSETYPE_OTHER,
116 
117   RAPTOR_STATE_PARSETYPE_LAST = RAPTOR_STATE_PARSETYPE_OTHER
118 
119 
120 } raptor_state;
121 
122 
123 static const char* const raptor_state_names[RAPTOR_STATE_PARSETYPE_LAST+2] = {
124   "INVALID",
125   "SKIPPING",
126   "UNKNOWN",
127   "nodeElementList",
128   "propertyElt",
129   "Description",
130   "propertyElt",
131   "memberPropertyElt",
132   "nodeElement",
133   "parseTypeLiteral",
134   "parseTypeResource",
135   "parseTypeCollection",
136   "parseTypeOther"
137 };
138 
139 
raptor_rdfxml_state_as_string(raptor_state state)140 static const char * raptor_rdfxml_state_as_string(raptor_state state)
141 {
142   if(state < 1 || state > RAPTOR_STATE_PARSETYPE_LAST)
143     state = (raptor_state)0;
144   return raptor_state_names[(int)state];
145 }
146 
147 
148 /*
149  * raptor_rdfxml_check_propertyElement_name:
150  * @name: rdf namespace term
151  *
152  * Check if an rdf namespace name is allowed to be used as a Node Element.
153  *
154  * Return value: < 0 if unknown rdf namespace term, 0 if known and not allowed, > 0 if known and allowed
155  */
156 static int
raptor_rdfxml_check_nodeElement_name(const char * name)157 raptor_rdfxml_check_nodeElement_name(const char *name)
158 {
159   int i;
160 
161   if(*name == '_')
162     return 1;
163 
164   for(i = 0; raptor_rdf_ns_terms_info[i].name; i++)
165     if(!strcmp(raptor_rdf_ns_terms_info[i].name, name))
166       return raptor_rdf_ns_terms_info[i].allowed_as_nodeElement;
167 
168   return -1;
169 }
170 
171 
172 /*
173  * raptor_rdfxml_check_propertyElement_name:
174  * @name: rdf namespace term
175  *
176  * Check if an rdf namespace name is allowed to be used as a Property Element.
177  *
178  * Return value: < 0 if unknown rdf namespace term, 0 if known and not allowed, > 0 if known and allowed
179  */
180 static int
raptor_rdfxml_check_propertyElement_name(const char * name)181 raptor_rdfxml_check_propertyElement_name(const char *name)
182 {
183   int i;
184 
185   if(*name == '_')
186     return 1;
187 
188   for(i = 0; raptor_rdf_ns_terms_info[i].name; i++)
189     if(!strcmp(raptor_rdf_ns_terms_info[i].name, (const char*)name))
190       return raptor_rdf_ns_terms_info[i].allowed_as_propertyElement;
191 
192   return -1;
193 }
194 
195 
196 static int
raptor_rdfxml_check_propertyAttribute_name(const char * name)197 raptor_rdfxml_check_propertyAttribute_name(const char *name)
198 {
199   int i;
200 
201   if(*name == '_')
202     return 1;
203 
204   for(i = 0; raptor_rdf_ns_terms_info[i].name; i++)
205     if(!strcmp(raptor_rdf_ns_terms_info[i].name, (const char*)name))
206       return raptor_rdf_ns_terms_info[i].allowed_as_propertyAttribute;
207 
208   return -1;
209 }
210 
211 
212 typedef enum {
213   /* undetermined yet - whitespace is stored */
214   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN,
215 
216   /* literal content - no elements, cdata allowed, whitespace significant
217    * <propElement> blah </propElement>
218    */
219   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL,
220 
221   /* parseType literal content (WF XML) - all content preserved
222    * <propElement rdf:parseType="Literal"><em>blah</em></propElement>
223    */
224   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL,
225 
226   /* top-level nodes - 0+ elements expected, no cdata, whitespace ignored,
227    * any non-whitespace cdata is error
228    * only used for <rdf:RDF> or implict <rdf:RDF>
229    */
230   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES,
231 
232   /* properties - 0+ elements expected, no cdata, whitespace ignored,
233    * any non-whitespace cdata is error
234    * <nodeElement><prop1>blah</prop1> <prop2>blah</prop2> </nodeElement>
235    */
236   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES,
237 
238   /* property content - all content preserved
239    * any content type changes when first non-whitespace found
240    * <propElement>...
241    */
242   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT,
243 
244   /* resource URI given - no element, no cdata, whitespace ignored,
245    * any non-whitespace cdata is error
246    * <propElement rdf:resource="uri"/>
247    * <propElement rdf:resource="uri"></propElement>
248    */
249   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE,
250 
251   /* skipping content - all content is preserved
252    * Used when skipping content for unknown parseType-s,
253    * error recovery, some other reason
254    */
255   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED,
256 
257   /* parseType Collection - all content preserved
258    * Parsing of this determined by RDF/XML (Revised) closed collection rules
259    * <propElement rdf:parseType="Collection">...</propElement>
260    */
261   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION,
262 
263   /* Like above but handles "daml:collection" */
264   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION,
265 
266   /* dummy for use in strings below */
267   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST
268 
269 } raptor_rdfxml_element_content_type;
270 
271 
272 static const struct {
273   const char * name;
274   int whitespace_significant;
275   /* non-blank cdata */
276   int cdata_allowed;
277   /* XML element content */
278   int element_allowed;
279   /* Do RDF-specific processing? (property attributes, rdf: attributes, ...) */
280   int rdf_processing;
281 } rdf_content_type_info[RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST]={
282   {"Unknown",         1, 1, 1, 0 },
283   {"Literal",         1, 1, 0, 0 },
284   {"XML Literal",     1, 1, 1, 0 },
285   {"Nodes",           0, 0, 1, 1 },
286   {"Properties",      0, 1, 1, 1 },
287   {"Property Content",1, 1, 1, 1 },
288   {"Resource",        0, 0, 0, 0 },
289   {"Preserved",       1, 1, 1, 0 },
290   {"Collection",      1, 1, 1, 1 },
291   {"DAML Collection", 1, 1, 1, 1 },
292 };
293 
294 
295 
296 static const char *
raptor_rdfxml_element_content_type_as_string(raptor_rdfxml_element_content_type type)297 raptor_rdfxml_element_content_type_as_string(raptor_rdfxml_element_content_type type)
298 {
299   if(type >= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST)
300     return "INVALID";
301 
302   return rdf_content_type_info[type].name;
303 }
304 
305 
306 
307 
308 
309 /*
310  * Raptor Element/attributes on stack
311  */
312 struct raptor_rdfxml_element_s {
313   raptor_world* world;
314 
315   raptor_xml_element *xml_element;
316 
317   /* NULL at bottom of stack */
318   struct raptor_rdfxml_element_s *parent;
319 
320   /* attributes declared in M&S */
321   const unsigned char * rdf_attr[RDF_NS_LAST + 1];
322   /* how many of above seen */
323   int rdf_attr_count;
324 
325   /* state that this production matches */
326   raptor_state state;
327 
328   /* how to handle the content inside this XML element */
329   raptor_rdfxml_element_content_type content_type;
330 
331 
332   /* starting state for children of this element */
333   raptor_state child_state;
334 
335   /* starting content type for children of this element */
336   raptor_rdfxml_element_content_type child_content_type;
337 
338 
339   /* Reified statement identifier */
340   raptor_term* reified;
341 
342   unsigned const char* reified_id;
343 
344   /* Bag identifier */
345   raptor_term* bag;
346   int last_bag_ordinal; /* starts at 0, so first predicate is rdf:_1 */
347 
348   /* Subject identifier (URI/anon ID), type, source
349    *
350    * When the XML element represents a node, this is the identifier
351    */
352   raptor_term* subject;
353 
354   /* Predicate URI
355    *
356    * When the XML element represents a node or predicate,
357    * this is the identifier of the predicate
358    */
359   raptor_term* predicate;
360 
361   /* Object identifier (URI/anon ID), type, source
362    *
363    * When this XML element generates a statement that needs an object,
364    * possibly from a child element, this is the identifier of the object
365    */
366   raptor_term* object;
367 
368   /* URI of datatype of literal */
369   raptor_uri *object_literal_datatype;
370 
371   /* last ordinal used, so initialising to 0 works, emitting rdf:_1 first */
372   int last_ordinal;
373 
374   /* If this element's parseType is a Collection
375    * this identifies the anon node of current tail of the collection(list).
376    */
377   const unsigned char *tail_id;
378 
379   /* RDF/XML specific checks */
380 
381   /* all cdata so far is whitespace */
382   unsigned int content_cdata_all_whitespace;
383 };
384 
385 typedef struct raptor_rdfxml_element_s raptor_rdfxml_element;
386 
387 
388 #define RAPTOR_RDFXML_N_CONCEPTS 5
389 
390 /*
391  * Raptor parser object
392  */
393 struct raptor_rdfxml_parser_s {
394   raptor_sax2 *sax2;
395 
396   /* stack of elements - elements add after current_element */
397   raptor_rdfxml_element *root_element;
398   raptor_rdfxml_element *current_element;
399 
400   raptor_uri* concepts[RAPTOR_RDFXML_N_CONCEPTS];
401 
402   /* set of seen rdf:ID / rdf:bagID values (with in-scope base URI) */
403   raptor_id_set* id_set;
404 
405   void *xml_content;
406   size_t xml_content_length;
407   raptor_iostream* iostream;
408 
409   /* writer for building parseType="Literal" content */
410   raptor_xml_writer* xml_writer;
411 };
412 
413 
414 
415 
416 /* static variables */
417 
418 #define RAPTOR_DAML_NS_URI(rdf_xml_parser)   rdf_xml_parser->concepts[0]
419 
420 #define RAPTOR_DAML_List_URI(rdf_xml_parser)  rdf_xml_parser->concepts[1]
421 #define RAPTOR_DAML_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[2]
422 #define RAPTOR_DAML_rest_URI(rdf_xml_parser)  rdf_xml_parser->concepts[3]
423 #define RAPTOR_DAML_nil_URI(rdf_xml_parser)   rdf_xml_parser->concepts[4]
424 
425 /* RAPTOR_RDFXML_N_CONCEPTS defines size of array */
426 
427 
428 /* prototypes for element functions */
429 static raptor_rdfxml_element* raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_parser);
430 static void raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_parser, raptor_rdfxml_element* element);
431 
432 static int raptor_rdfxml_record_ID(raptor_parser *rdf_parser, raptor_rdfxml_element *element, const unsigned char *id);
433 
434 /* prototypes for grammar functions */
435 static void raptor_rdfxml_start_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element);
436 static void raptor_rdfxml_end_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element);
437 static void raptor_rdfxml_cdata_grammar(raptor_parser *parser, const unsigned char *s, int len, int is_cdata);
438 
439 
440 /* prototype for statement related functions */
441 static void raptor_rdfxml_generate_statement(raptor_parser *rdf_parser, raptor_term *subject,  raptor_uri *predicate_uri, raptor_term *object, raptor_term *reified, raptor_rdfxml_element *bag_element);
442 
443 
444 
445 /* Prototypes for parsing data functions */
446 static int raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name);
447 static void raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser);
448 static int raptor_rdfxml_parse_start(raptor_parser* rdf_parser);
449 static int raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer, size_t len, int is_end);
450 static void raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser);
451 
452 static raptor_uri* raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser);
453 
454 
455 static raptor_rdfxml_element*
raptor_rdfxml_element_pop(raptor_rdfxml_parser * rdf_xml_parser)456 raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_xml_parser)
457 {
458   raptor_rdfxml_element *element = rdf_xml_parser->current_element;
459 
460   if(!element)
461     return NULL;
462 
463   rdf_xml_parser->current_element = element->parent;
464   if(rdf_xml_parser->root_element == element) /* just deleted root */
465     rdf_xml_parser->root_element = NULL;
466 
467   return element;
468 }
469 
470 
471 static void
raptor_rdfxml_element_push(raptor_rdfxml_parser * rdf_xml_parser,raptor_rdfxml_element * element)472 raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_xml_parser, raptor_rdfxml_element* element)
473 {
474   element->parent = rdf_xml_parser->current_element;
475   rdf_xml_parser->current_element = element;
476   if(!rdf_xml_parser->root_element)
477     rdf_xml_parser->root_element = element;
478 }
479 
480 
481 static void
raptor_free_rdfxml_element(raptor_rdfxml_element * element)482 raptor_free_rdfxml_element(raptor_rdfxml_element *element)
483 {
484   int i;
485 
486   /* Free special RDF M&S attributes */
487   for(i = 0; i <= RDF_NS_LAST; i++)
488     if(element->rdf_attr[i])
489       RAPTOR_FREE(char*, element->rdf_attr[i]);
490 
491   if(element->subject)
492     raptor_free_term(element->subject);
493   if(element->predicate)
494     raptor_free_term(element->predicate);
495   if(element->object)
496     raptor_free_term(element->object);
497   if(element->bag)
498     raptor_free_term(element->bag);
499   if(element->reified)
500     raptor_free_term(element->reified);
501 
502   if(element->tail_id)
503     RAPTOR_FREE(char*, (char*)element->tail_id);
504   if(element->object_literal_datatype)
505     raptor_free_uri(element->object_literal_datatype);
506 
507   if(element->reified_id)
508     RAPTOR_FREE(char*, (char*)element->reified_id);
509 
510   RAPTOR_FREE(raptor_rdfxml_element, element);
511 }
512 
513 
514 static void
raptor_rdfxml_sax2_new_namespace_handler(void * user_data,raptor_namespace * nspace)515 raptor_rdfxml_sax2_new_namespace_handler(void *user_data,
516                                          raptor_namespace* nspace)
517 {
518   raptor_parser* rdf_parser;
519   const unsigned char* namespace_name;
520   size_t namespace_name_len;
521   raptor_uri* uri = raptor_namespace_get_uri(nspace);
522 
523   rdf_parser = (raptor_parser*)user_data;
524   raptor_parser_start_namespace(rdf_parser, nspace);
525 
526   if(!uri)
527     return;
528 
529   namespace_name = raptor_uri_as_counted_string(uri, &namespace_name_len);
530 
531   if(namespace_name_len == raptor_rdf_namespace_uri_len-1 &&
532      !strncmp((const char*)namespace_name,
533               (const char*)raptor_rdf_namespace_uri,
534               namespace_name_len)) {
535     const unsigned char *prefix = raptor_namespace_get_prefix(nspace);
536     raptor_parser_warning(rdf_parser,
537                           "Declaring a namespace with prefix %s to URI %s - one letter short of the RDF namespace URI and probably a mistake.",
538                           prefix, namespace_name);
539   }
540 
541   if(namespace_name_len > raptor_rdf_namespace_uri_len &&
542      !strncmp((const char*)namespace_name,
543               (const char*)raptor_rdf_namespace_uri,
544               raptor_rdf_namespace_uri_len)) {
545     raptor_parser_error(rdf_parser,
546                         "Declaring a namespace URI %s to which the RDF namespace URI is a prefix is forbidden.",
547                         namespace_name);
548   }
549 }
550 
551 
552 
553 static void
raptor_rdfxml_start_element_handler(void * user_data,raptor_xml_element * xml_element)554 raptor_rdfxml_start_element_handler(void *user_data,
555                                     raptor_xml_element* xml_element)
556 {
557   raptor_parser* rdf_parser;
558   raptor_rdfxml_parser* rdf_xml_parser;
559   raptor_rdfxml_element* element;
560   int ns_attributes_count = 0;
561   raptor_qname** named_attrs = NULL;
562   int i;
563   int count_bumped = 0;
564 
565   rdf_parser = (raptor_parser*)user_data;
566   rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
567 
568   if(rdf_parser->failed)
569     return;
570 
571   raptor_rdfxml_update_document_locator(rdf_parser);
572 
573   /* Create new element structure */
574   element = RAPTOR_CALLOC(raptor_rdfxml_element*, 1, sizeof(*element));
575   if(!element) {
576     raptor_parser_fatal_error(rdf_parser, "Out of memory");
577     rdf_parser->failed = 1;
578     return;
579   }
580   element->world = rdf_parser->world;
581   element->xml_element = xml_element;
582 
583   raptor_rdfxml_element_push(rdf_xml_parser, element);
584 
585   named_attrs = raptor_xml_element_get_attributes(xml_element);
586   ns_attributes_count = raptor_xml_element_get_attributes_count(xml_element);
587 
588   /* RDF-specific processing of attributes */
589   if(ns_attributes_count) {
590     raptor_qname** new_named_attrs;
591     int offset = 0;
592     raptor_rdfxml_element* parent_element;
593 
594     parent_element = element->parent;
595 
596     /* Allocate new array to move namespaced-attributes to if
597      * rdf processing is performed
598      */
599     new_named_attrs = RAPTOR_CALLOC(raptor_qname**, ns_attributes_count,
600                                     sizeof(raptor_qname*));
601     if(!new_named_attrs) {
602       raptor_parser_fatal_error(rdf_parser, "Out of memory");
603       rdf_parser->failed = 1;
604       return;
605     }
606 
607     for(i = 0; i < ns_attributes_count; i++) {
608       raptor_qname* attr = named_attrs[i];
609 
610       /* If:
611        *  1 We are handling RDF content and RDF processing is allowed on
612        *    this element
613        * OR
614        *  2 We are not handling RDF content and
615        *    this element is at the top level (top level Desc. / typedNode)
616        *    i.e. we have no parent
617        * then handle the RDF attributes
618        */
619       if((parent_element &&
620           rdf_content_type_info[parent_element->child_content_type].rdf_processing) ||
621          !parent_element) {
622 
623         /* Save pointers to some RDF M&S attributes */
624 
625         /* If RDF namespace-prefixed attributes */
626         if(attr->nspace && attr->nspace->is_rdf_ms) {
627           const unsigned char *attr_name = attr->local_name;
628           int j;
629 
630           for(j = 0; j <= RDF_NS_LAST; j++)
631             if(!strcmp((const char*)attr_name,
632                        raptor_rdf_ns_terms_info[j].name)) {
633               element->rdf_attr[j] = attr->value;
634               element->rdf_attr_count++;
635               /* Delete it if it was stored elsewhere */
636 #ifdef RAPTOR_DEBUG_VERBOSE
637               RAPTOR_DEBUG3("Found RDF namespace attribute '%s' URI %s\n",
638                             (char*)attr_name, attr->value);
639 #endif
640               /* make sure value isn't deleted from qname structure */
641               attr->value = NULL;
642               raptor_free_qname(attr);
643               attr = NULL;
644               break;
645             }
646         } /* end if RDF namespaced-prefixed attributes */
647 
648         if(!attr)
649           continue;
650 
651         /* If non namespace-prefixed RDF attributes found on an element */
652         if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_NON_NS_ATTRIBUTES) &&
653            !attr->nspace) {
654           const unsigned char *attr_name = attr->local_name;
655           int j;
656 
657           for(j = 0; j <= RDF_NS_LAST; j++)
658             if(!strcmp((const char*)attr_name,
659                        raptor_rdf_ns_terms_info[j].name)) {
660               element->rdf_attr[j] = attr->value;
661               element->rdf_attr_count++;
662               if(!raptor_rdf_ns_terms_info[j].allowed_unprefixed_on_attribute)
663                 raptor_parser_warning(rdf_parser,
664                                       "Using rdf attribute '%s' without the RDF namespace has been deprecated.",
665                                       attr_name);
666 
667               /* Delete it if it was stored elsewhere */
668               /* make sure value isn't deleted from qname structure */
669               attr->value = NULL;
670               raptor_free_qname(attr);
671               attr = NULL;
672               break;
673             }
674         } /* end if non-namespace prefixed RDF attributes */
675 
676         if(!attr)
677           continue;
678 
679       } /* end if leave literal XML alone */
680 
681       if(attr)
682         new_named_attrs[offset++] = attr;
683     }
684 
685     /* new attribute count is set from attributes that haven't been skipped */
686     ns_attributes_count = offset;
687     if(!ns_attributes_count) {
688       /* all attributes were deleted so delete the new array */
689       RAPTOR_FREE(raptor_qname_array, new_named_attrs);
690       new_named_attrs = NULL;
691     }
692 
693     RAPTOR_FREE(raptor_qname_array, named_attrs);
694     named_attrs = new_named_attrs;
695     raptor_xml_element_set_attributes(xml_element,
696                                       named_attrs, ns_attributes_count);
697   } /* end if ns_attributes_count */
698 
699 
700   /* start from unknown; if we have a parent, it may set this */
701   element->state = RAPTOR_STATE_UNKNOWN;
702   element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN;
703 
704   if(element->parent &&
705      element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN) {
706     element->content_type = element->parent->child_content_type;
707 
708     if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE &&
709        element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
710        element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
711       raptor_qname* parent_el_name;
712       parent_el_name = raptor_xml_element_get_name(element->parent->xml_element);
713       /* If parent has an rdf:resource, this element should not be here */
714       raptor_parser_error(rdf_parser,
715                           "property element '%s' has multiple object node elements, skipping.",
716                           parent_el_name->local_name);
717       element->state = RAPTOR_STATE_SKIPPING;
718       element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
719 
720     } else {
721       if(!element->parent->child_state) {
722         raptor_parser_fatal_error(rdf_parser,
723                                   "%s: Internal error: no parent element child_state set",
724                                   __FUNCTION__);
725         return;
726       }
727 
728       element->state = element->parent->child_state;
729       element->parent->xml_element->content_element_seen++;
730       count_bumped++;
731 
732       /* leave literal XML alone */
733       if(!rdf_content_type_info[element->content_type].cdata_allowed) {
734         if(element->parent->xml_element->content_element_seen &&
735            element->parent->xml_element->content_cdata_seen) {
736           raptor_qname* parent_el_name;
737 
738           parent_el_name = raptor_xml_element_get_name(element->parent->xml_element);
739           /* Uh oh - mixed content, the parent element has cdata too */
740           raptor_parser_warning(rdf_parser, "element '%s' has mixed content.",
741                                 parent_el_name->local_name);
742         }
743 
744         /* If there is some existing all-whitespace content cdata
745          * before this node element, delete it
746          */
747         if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES &&
748            element->parent->xml_element->content_element_seen &&
749            element->parent->content_cdata_all_whitespace &&
750            element->parent->xml_element->content_cdata_length) {
751 
752           element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
753 
754           raptor_free_stringbuffer(element->parent->xml_element->content_cdata_sb);
755           element->parent->xml_element->content_cdata_sb = NULL;
756           element->parent->xml_element->content_cdata_length = 0;
757         }
758 
759       } /* end if leave literal XML alone */
760 
761     } /* end if parent has no rdf:resource */
762 
763   } /* end if element->parent */
764 
765 
766 #ifdef RAPTOR_DEBUG_VERBOSE
767   RAPTOR_DEBUG2("Using content type %s\n",
768                 rdf_content_type_info[element->content_type].name);
769 
770   fprintf(stderr, "raptor_rdfxml_start_element_handler: Start ns-element: ");
771   raptor_print_xml_element(xml_element, stderr);
772 #endif
773 
774 
775   /* Check for non namespaced stuff when not in a parseType literal, other */
776   if(rdf_content_type_info[element->content_type].rdf_processing) {
777     const raptor_namespace* ns;
778 
779     ns = raptor_xml_element_get_name(xml_element)->nspace;
780     /* The element */
781 
782     /* If has no namespace or the namespace has no name (xmlns="") */
783     if((!ns || (ns && !raptor_namespace_get_uri(ns))) && element->parent) {
784       raptor_qname* parent_el_name;
785 
786       parent_el_name = raptor_xml_element_get_name(element->parent->xml_element);
787 
788       raptor_parser_error(rdf_parser,
789                           "Using an element '%s' without a namespace is forbidden.",
790                           parent_el_name->local_name);
791       element->state = RAPTOR_STATE_SKIPPING;
792       /* Remove count above so that parent thinks this is empty */
793       if(count_bumped)
794         element->parent->xml_element->content_element_seen--;
795       element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
796     }
797 
798 
799     /* Check for any remaining non-namespaced attributes */
800     if(named_attrs) {
801       for(i = 0; i < ns_attributes_count; i++) {
802         raptor_qname *attr = named_attrs[i];
803         /* Check if any attributes are non-namespaced */
804         if(!attr->nspace ||
805            (attr->nspace && !raptor_namespace_get_uri(attr->nspace))) {
806           raptor_parser_error(rdf_parser,
807                               "Using an attribute '%s' without a namespace is forbidden.",
808                               attr->local_name);
809           raptor_free_qname(attr);
810           named_attrs[i] = NULL;
811         }
812       }
813     }
814   }
815 
816 
817   if(element->rdf_attr[RDF_NS_aboutEach] ||
818      element->rdf_attr[RDF_NS_aboutEachPrefix]) {
819     raptor_parser_warning(rdf_parser,
820                           "element '%s' has aboutEach / aboutEachPrefix, skipping.",
821                           raptor_xml_element_get_name(xml_element)->local_name);
822     element->state = RAPTOR_STATE_SKIPPING;
823     /* Remove count above so that parent thinks this is empty */
824     if(count_bumped)
825       element->parent->xml_element->content_element_seen--;
826     element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
827   }
828 
829   /* Right, now ready to enter the grammar */
830   raptor_rdfxml_start_element_grammar(rdf_parser, element);
831 
832   return;
833 }
834 
835 
836 static void
raptor_rdfxml_end_element_handler(void * user_data,raptor_xml_element * xml_element)837 raptor_rdfxml_end_element_handler(void *user_data,
838                                   raptor_xml_element* xml_element)
839 {
840   raptor_parser* rdf_parser;
841   raptor_rdfxml_parser* rdf_xml_parser;
842   raptor_rdfxml_element* element;
843 
844   rdf_parser = (raptor_parser*)user_data;
845   rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
846 
847   if(!rdf_parser->failed) {
848     raptor_rdfxml_update_document_locator(rdf_parser);
849 
850     raptor_rdfxml_end_element_grammar(rdf_parser,
851                                       rdf_xml_parser->current_element);
852   }
853 
854   element = raptor_rdfxml_element_pop(rdf_xml_parser);
855   if(element) {
856     if(element->parent) {
857       /* Do not change this; PROPERTYELT will turn into MEMBER if necessary
858        * See the switch case for MEMBER / PROPERTYELT where the test is done.
859        *
860        * PARSETYPE_RESOURCE should never be propogated up since it
861        * will turn the next child (node) element into a property
862        */
863       if(element->state != RAPTOR_STATE_MEMBER_PROPERTYELT &&
864          element->state != RAPTOR_STATE_PARSETYPE_RESOURCE)
865         element->parent->child_state = element->state;
866     }
867 
868     raptor_free_rdfxml_element(element);
869   }
870 }
871 
872 
873 /* cdata (and ignorable whitespace for libxml).
874  * s 0 terminated is for libxml
875  */
876 static void
raptor_rdfxml_characters_handler(void * user_data,raptor_xml_element * xml_element,const unsigned char * s,int len)877 raptor_rdfxml_characters_handler(void *user_data,
878                                  raptor_xml_element* xml_element,
879                                  const unsigned char *s, int len)
880 {
881   raptor_parser* rdf_parser = (raptor_parser*)user_data;
882 
883   raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 0);
884 }
885 
886 
887 /* cdata (and ignorable whitespace for libxml).
888  * s is 0 terminated for libxml2
889  */
890 static void
raptor_rdfxml_cdata_handler(void * user_data,raptor_xml_element * xml_element,const unsigned char * s,int len)891 raptor_rdfxml_cdata_handler(void *user_data, raptor_xml_element* xml_element,
892                             const unsigned char *s, int len)
893 {
894   raptor_parser* rdf_parser = (raptor_parser*)user_data;
895 
896   raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 1);
897 }
898 
899 
900 /* comment handler
901  * s is 0 terminated
902  */
903 static void
raptor_rdfxml_comment_handler(void * user_data,raptor_xml_element * xml_element,const unsigned char * s)904 raptor_rdfxml_comment_handler(void *user_data, raptor_xml_element* xml_element,
905                               const unsigned char *s)
906 {
907   raptor_parser* rdf_parser = (raptor_parser*)user_data;
908   raptor_rdfxml_parser* rdf_xml_parser;
909   raptor_rdfxml_element* element;
910 
911   if(rdf_parser->failed || !xml_element)
912     return;
913 
914   rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
915   element = rdf_xml_parser->current_element;
916 
917   if(element) {
918     if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL)
919       raptor_xml_writer_comment(rdf_xml_parser->xml_writer, s);
920   }
921 
922 
923 #ifdef RAPTOR_DEBUG_VERBOSE
924   RAPTOR_DEBUG2("XML Comment '%s'\n", s);
925 #endif
926 }
927 
928 
929 static const unsigned char* const daml_namespace_uri_string = (const unsigned char*)"http://www.daml.org/2001/03/daml+oil#";
930 static const int daml_namespace_uri_string_len = 37;
931 
932 
933 static int
raptor_rdfxml_parse_init(raptor_parser * rdf_parser,const char * name)934 raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name)
935 {
936   raptor_rdfxml_parser* rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
937   raptor_sax2* sax2;
938   raptor_world* world = rdf_parser->world;
939 
940   /* Allocate sax2 object */
941   sax2 = raptor_new_sax2(rdf_parser->world, &rdf_parser->locator, rdf_parser);
942   rdf_xml_parser->sax2 = sax2;
943   if(!sax2)
944     return 1;
945 
946   /* Initialize sax2 element handlers */
947   raptor_sax2_set_start_element_handler(sax2, raptor_rdfxml_start_element_handler);
948   raptor_sax2_set_end_element_handler(sax2, raptor_rdfxml_end_element_handler);
949   raptor_sax2_set_characters_handler(sax2, raptor_rdfxml_characters_handler);
950   raptor_sax2_set_cdata_handler(sax2, raptor_rdfxml_cdata_handler);
951   raptor_sax2_set_comment_handler(sax2, raptor_rdfxml_comment_handler);
952   raptor_sax2_set_namespace_handler(sax2, raptor_rdfxml_sax2_new_namespace_handler);
953 
954   /* Allocate uris */
955   RAPTOR_DAML_NS_URI(rdf_xml_parser) = raptor_new_uri_from_counted_string(world,
956                                                                           daml_namespace_uri_string,
957                                                                           daml_namespace_uri_string_len);
958 
959   RAPTOR_DAML_List_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"List");
960   RAPTOR_DAML_first_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser) ,(const unsigned char *)"first");
961   RAPTOR_DAML_rest_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"rest");
962   RAPTOR_DAML_nil_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"nil");
963 
964   /* Check for uri allocation failures */
965   if(!RAPTOR_DAML_NS_URI(rdf_xml_parser) ||
966      !RAPTOR_DAML_List_URI(rdf_xml_parser) ||
967      !RAPTOR_DAML_first_URI(rdf_xml_parser) ||
968      !RAPTOR_DAML_rest_URI(rdf_xml_parser) ||
969      !RAPTOR_DAML_nil_URI(rdf_xml_parser))
970     return 1;
971 
972   /* Everything succeeded */
973   return 0;
974 }
975 
976 
977 static int
raptor_rdfxml_parse_start(raptor_parser * rdf_parser)978 raptor_rdfxml_parse_start(raptor_parser* rdf_parser)
979 {
980   raptor_uri *uri = rdf_parser->base_uri;
981   raptor_rdfxml_parser* rdf_xml_parser;
982 
983   rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
984 
985   /* base URI required for RDF/XML */
986   if(!uri)
987     return 1;
988 
989   /* Optionally normalize language to lowercase
990    * http://www.w3.org/TR/rdf-concepts/#dfn-language-identifier
991    */
992   raptor_sax2_set_option(rdf_xml_parser->sax2,
993                          RAPTOR_OPTION_NORMALIZE_LANGUAGE, NULL,
994                          RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NORMALIZE_LANGUAGE));
995 
996   /* Optionally forbid internal network and file requests in the XML parser */
997   raptor_sax2_set_option(rdf_xml_parser->sax2,
998                          RAPTOR_OPTION_NO_NET, NULL,
999                          RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET));
1000   raptor_sax2_set_option(rdf_xml_parser->sax2,
1001                          RAPTOR_OPTION_NO_FILE, NULL,
1002                          RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_FILE));
1003   raptor_sax2_set_option(rdf_xml_parser->sax2,
1004                          RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES, NULL,
1005                          RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES));
1006   if(rdf_parser->uri_filter)
1007     raptor_sax2_set_uri_filter(rdf_xml_parser->sax2, rdf_parser->uri_filter,
1008                                rdf_parser->uri_filter_user_data);
1009 
1010   raptor_sax2_parse_start(rdf_xml_parser->sax2, uri);
1011 
1012   /* Delete any existing id_set */
1013   if(rdf_xml_parser->id_set) {
1014     raptor_free_id_set(rdf_xml_parser->id_set);
1015     rdf_xml_parser->id_set = NULL;
1016   }
1017 
1018   /* Create a new id_set if needed */
1019   if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_CHECK_RDF_ID)) {
1020     rdf_xml_parser->id_set = raptor_new_id_set(rdf_parser->world);
1021     if(!rdf_xml_parser->id_set)
1022       return 1;
1023   }
1024 
1025   return 0;
1026 }
1027 
1028 
1029 static void
raptor_rdfxml_parse_terminate(raptor_parser * rdf_parser)1030 raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser)
1031 {
1032   raptor_rdfxml_parser* rdf_xml_parser;
1033   raptor_rdfxml_element* element;
1034   int i;
1035 
1036   rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
1037 
1038   if(rdf_xml_parser->sax2) {
1039     raptor_free_sax2(rdf_xml_parser->sax2);
1040     rdf_xml_parser->sax2 = NULL;
1041   }
1042 
1043   while( (element = raptor_rdfxml_element_pop(rdf_xml_parser)) )
1044     raptor_free_rdfxml_element(element);
1045 
1046 
1047   for(i = 0; i < RAPTOR_RDFXML_N_CONCEPTS; i++) {
1048     raptor_uri* concept_uri = rdf_xml_parser->concepts[i];
1049     if(concept_uri) {
1050       raptor_free_uri(concept_uri);
1051       rdf_xml_parser->concepts[i] = NULL;
1052     }
1053   }
1054 
1055   if(rdf_xml_parser->id_set) {
1056     raptor_free_id_set(rdf_xml_parser->id_set);
1057     rdf_xml_parser->id_set = NULL;
1058   }
1059 
1060   if (rdf_xml_parser->xml_writer) {
1061     raptor_free_xml_writer(rdf_xml_parser->xml_writer);
1062     rdf_xml_parser->xml_writer = NULL;
1063   }
1064 
1065   if (rdf_xml_parser->iostream) {
1066     raptor_free_iostream(rdf_xml_parser->iostream);
1067     rdf_xml_parser->iostream = NULL;
1068   }
1069 
1070   if (rdf_xml_parser->xml_content) {
1071     RAPTOR_FREE(char*, rdf_xml_parser->xml_content);
1072     rdf_xml_parser->xml_content = NULL;
1073     rdf_xml_parser->xml_content_length = 0;
1074   }
1075 }
1076 
1077 
1078 static int
raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)1079 raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory* factory,
1080                                      const unsigned char *buffer, size_t len,
1081                                      const unsigned char *identifier,
1082                                      const unsigned char *suffix,
1083                                      const char *mime_type)
1084 {
1085   int score = 0;
1086 
1087   if(suffix) {
1088     if(!strcmp((const char*)suffix, "rdf") ||
1089        !strcmp((const char*)suffix, "rdfs") ||
1090        !strcmp((const char*)suffix, "foaf") ||
1091        !strcmp((const char*)suffix, "doap") ||
1092        !strcmp((const char*)suffix, "owl") ||
1093        !strcmp((const char*)suffix, "daml"))
1094       score = 9;
1095     if(!strcmp((const char*)suffix, "rss"))
1096       score = 3;
1097   }
1098 
1099   if(identifier) {
1100     if(strstr((const char*)identifier, "rss1"))
1101       score += 5;
1102     else if(!suffix && strstr((const char*)identifier, "rss"))
1103       score += 3;
1104     else if(!suffix && strstr((const char*)identifier, "rdf"))
1105       score += 2;
1106     else if(!suffix && strstr((const char*)identifier, "RDF"))
1107       score += 2;
1108   }
1109 
1110   if(mime_type) {
1111     if(strstr((const char*)mime_type, "html"))
1112       score -= 4;
1113     else if(!strcmp((const char*)mime_type, "text/rdf"))
1114       score += 7;
1115     else if(!strcmp((const char*)mime_type, "application/xml"))
1116       score += 5;
1117   }
1118 
1119   if(buffer && len) {
1120     /* Check it's an XML namespace declared and not N3 or Turtle which
1121      * mention the namespace URI but not in this form.
1122      */
1123 #define  HAS_RDF_XMLNS1 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1124 #define  HAS_RDF_XMLNS2 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1125 #define  HAS_RDF_XMLNS3 (raptor_memstr((const char*)buffer, len, "xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1126 #define  HAS_RDF_XMLNS4 (raptor_memstr((const char*)buffer, len, "xmlns='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1127 #define  HAS_RDF_ENTITY1 (raptor_memstr((const char*)buffer, len, "!ENTITY rdf 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'") != NULL)
1128 #define  HAS_RDF_ENTITY2 (raptor_memstr((const char*)buffer, len, "!ENTITY rdf \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"") != NULL)
1129 #define  HAS_RDF_ENTITY3 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"&rdf;\"") != NULL)
1130 #define  HAS_RDF_ENTITY4 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='&rdf;'") != NULL)
1131 #define  HAS_HTML_NS (raptor_memstr((const char*)buffer, len, "http://www.w3.org/1999/xhtml") != NULL)
1132 #define  HAS_HTML_ROOT (raptor_memstr((const char*)buffer, len, "<html") != NULL)
1133 
1134     if(!HAS_HTML_NS && !HAS_HTML_ROOT &&
1135        (HAS_RDF_XMLNS1 || HAS_RDF_XMLNS2 || HAS_RDF_XMLNS3 || HAS_RDF_XMLNS4 ||
1136         HAS_RDF_ENTITY1 || HAS_RDF_ENTITY2 || HAS_RDF_ENTITY3 || HAS_RDF_ENTITY4)
1137       ) {
1138       int has_rdf_RDF = (raptor_memstr((const char*)buffer, len, "<rdf:RDF") != NULL);
1139       int has_rdf_Description = (raptor_memstr((const char*)buffer, len, "rdf:Description") != NULL);
1140       int has_rdf_about = (raptor_memstr((const char*)buffer, len, "rdf:about") != NULL);
1141 
1142       score += 7;
1143       if(has_rdf_RDF)
1144         score++;
1145       if(has_rdf_Description)
1146         score++;
1147       if(has_rdf_about)
1148         score++;
1149     }
1150   }
1151 
1152   return score;
1153 }
1154 
1155 
1156 
1157 static int
raptor_rdfxml_parse_chunk(raptor_parser * rdf_parser,const unsigned char * buffer,size_t len,int is_end)1158 raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser,
1159                           const unsigned char *buffer,
1160                           size_t len, int is_end)
1161 {
1162   raptor_rdfxml_parser* rdf_xml_parser;
1163   int rc;
1164 
1165   rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
1166   if(rdf_parser->failed)
1167     return 1;
1168 
1169   rc = raptor_sax2_parse_chunk(rdf_xml_parser->sax2, buffer, len, is_end);
1170 
1171   if(is_end) {
1172     if(rdf_parser->emitted_default_graph) {
1173       raptor_parser_end_graph(rdf_parser, NULL, 0);
1174       rdf_parser->emitted_default_graph--;
1175     }
1176   }
1177 
1178   return rc;
1179 }
1180 
1181 
1182 static void
raptor_rdfxml_generate_statement(raptor_parser * rdf_parser,raptor_term * subject_term,raptor_uri * predicate_uri,raptor_term * object_term,raptor_term * reified_term,raptor_rdfxml_element * bag_element)1183 raptor_rdfxml_generate_statement(raptor_parser *rdf_parser,
1184                                  raptor_term *subject_term,
1185                                  raptor_uri *predicate_uri,
1186                                  raptor_term *object_term,
1187                                  raptor_term *reified_term,
1188                                  raptor_rdfxml_element* bag_element)
1189 {
1190   raptor_statement *statement = &rdf_parser->statement;
1191   raptor_term* predicate_term = NULL;
1192   int free_reified_term = 0;
1193 
1194   if(rdf_parser->failed)
1195     return;
1196 
1197 #ifdef RAPTOR_DEBUG_VERBOSE
1198   if(!subject_term)
1199     RAPTOR_FATAL1("Statement has no subject\n");
1200 
1201   if(!predicate_uri)
1202     RAPTOR_FATAL1("Statement has no predicate\n");
1203 
1204   if(!object_term)
1205     RAPTOR_FATAL1("Statement has no object\n");
1206 
1207 #endif
1208 
1209   predicate_term = raptor_new_term_from_uri(rdf_parser->world, predicate_uri);
1210   if(!predicate_term)
1211     return;
1212 
1213   statement->subject = subject_term;
1214   statement->predicate = predicate_term;
1215   statement->object = object_term;
1216 
1217 #ifdef RAPTOR_DEBUG_VERBOSE
1218   fprintf(stderr, "raptor_rdfxml_generate_statement: Generating statement: ");
1219   raptor_statement_print(statement, stderr);
1220   fputc('\n', stderr);
1221 #endif
1222 
1223   if(!rdf_parser->emitted_default_graph) {
1224     raptor_parser_start_graph(rdf_parser, NULL, 0);
1225     rdf_parser->emitted_default_graph++;
1226   }
1227 
1228   if(!rdf_parser->statement_handler)
1229     goto generate_tidy;
1230 
1231   /* Generate the statement; or is it a fact? */
1232   (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1233 
1234 
1235   /* the bagID mess */
1236   if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID) &&
1237      bag_element && bag_element->bag) {
1238     raptor_term* bag = bag_element->bag;
1239     raptor_uri* bag_predicate_uri = NULL;
1240     raptor_term* bag_predicate_term = NULL;
1241 
1242     statement->subject = bag;
1243 
1244     bag_element->last_bag_ordinal++;
1245 
1246     /* new URI object */
1247     bag_predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world,
1248                                                         bag_element->last_bag_ordinal);
1249     if(!bag_predicate_uri)
1250       goto generate_tidy;
1251 
1252     bag_predicate_term = raptor_new_term_from_uri(rdf_parser->world,
1253                                                   bag_predicate_uri);
1254     raptor_free_uri(bag_predicate_uri);
1255 
1256     if(!bag_predicate_term)
1257       goto generate_tidy;
1258 
1259     statement->predicate = bag_predicate_term;
1260 
1261     if(!reified_term || !reified_term->value.blank.string) {
1262       unsigned char *reified_id = NULL;
1263 
1264       /* reified_term is NULL so generate a bag ID */
1265       reified_id = raptor_world_generate_bnodeid(rdf_parser->world);
1266       if(!reified_id)
1267         goto generate_tidy;
1268 
1269       reified_term = raptor_new_term_from_blank(rdf_parser->world, reified_id);
1270       RAPTOR_FREE(char*, reified_id);
1271 
1272       if(!reified_term)
1273         goto generate_tidy;
1274       free_reified_term = 1;
1275     }
1276 
1277     statement->object = reified_term;
1278     (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1279 
1280     if(bag_predicate_term)
1281       raptor_free_term(bag_predicate_term);
1282   }
1283 
1284 
1285   /* return if is there no reified ID (that is valid) */
1286   if(!reified_term || !reified_term->value.blank.string)
1287     goto generate_tidy;
1288 
1289 
1290   /* otherwise generate reified statements */
1291 
1292   statement->subject = reified_term;
1293   statement->predicate = RAPTOR_RDF_type_term(rdf_parser->world);
1294   statement->object = RAPTOR_RDF_Statement_term(rdf_parser->world);
1295   (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1296 
1297   /* statement->subject = reified_term; */
1298   statement->predicate = RAPTOR_RDF_subject_term(rdf_parser->world);
1299   statement->object = subject_term;
1300   (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1301 
1302 
1303   /* statement->subject = reified_term; */
1304   statement->predicate = RAPTOR_RDF_predicate_term(rdf_parser->world);
1305   statement->object = predicate_term;
1306   (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1307 
1308   /* statement->subject = reified_term; */
1309   statement->predicate = RAPTOR_RDF_object_term(rdf_parser->world);
1310   statement->object = object_term;
1311   (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1312 
1313 
1314  generate_tidy:
1315   /* Tidy up things allocated here */
1316   if(predicate_term)
1317     raptor_free_term(predicate_term);
1318   if(free_reified_term && reified_term)
1319     raptor_free_term(reified_term);
1320 }
1321 
1322 
1323 
1324 /**
1325  * raptor_rdfxml_element_has_property_attributes:
1326  * @element: element with the property attributes
1327  *
1328  * Return true if the element has at least one property attribute.
1329  *
1330  **/
1331 static int
raptor_rdfxml_element_has_property_attributes(raptor_rdfxml_element * element)1332 raptor_rdfxml_element_has_property_attributes(raptor_rdfxml_element *element)
1333 {
1334   int i;
1335 
1336   if(element->xml_element->attribute_count > 0)
1337     return 1;
1338 
1339   /* look for rdf: properties */
1340   for(i = 0; i <= RDF_NS_LAST; i++) {
1341     if(element->rdf_attr[i] &&
1342        raptor_rdf_ns_terms_info[i].type != RAPTOR_TERM_TYPE_UNKNOWN)
1343       return 1;
1344   }
1345   return 0;
1346 }
1347 
1348 
1349 /**
1350  * raptor_rdfxml_process_property_attributes:
1351  * @rdf_parser: Raptor parser object
1352  * @attributes_element: element with the property attributes
1353  * @resource_element: element that defines the resource URI
1354  *                    subject->value etc.
1355  * @property_node_identifier: Use this identifier for the resource URI
1356  *   and count any ordinals for it locally
1357  *
1358  * Process the property attributes for an element for a given resource.
1359  *
1360  **/
1361 static int
raptor_rdfxml_process_property_attributes(raptor_parser * rdf_parser,raptor_rdfxml_element * attributes_element,raptor_rdfxml_element * resource_element,raptor_term * property_node_identifier)1362 raptor_rdfxml_process_property_attributes(raptor_parser *rdf_parser,
1363                                           raptor_rdfxml_element *attributes_element,
1364                                           raptor_rdfxml_element *resource_element,
1365                                           raptor_term *property_node_identifier)
1366 {
1367   unsigned int i;
1368   raptor_term *resource_identifier;
1369 
1370   resource_identifier = property_node_identifier ? property_node_identifier : resource_element->subject;
1371 
1372 
1373   /* Process attributes as propAttr* = * (propName="string")*
1374    */
1375   for(i = 0; i < attributes_element->xml_element->attribute_count; i++) {
1376     raptor_qname* attr = attributes_element->xml_element->attributes[i];
1377     const unsigned char *name;
1378     const unsigned char *value;
1379     int handled = 0;
1380 
1381     if(!attr)
1382       continue;
1383 
1384     name = attr->local_name;
1385     value = attr->value;
1386 
1387     if(!attr->nspace) {
1388       raptor_rdfxml_update_document_locator(rdf_parser);
1389       raptor_parser_error(rdf_parser,
1390                           "Using property attribute '%s' without a namespace is forbidden.",
1391                           name);
1392       continue;
1393     }
1394 
1395 
1396     if(!raptor_unicode_check_utf8_nfc_string(value, strlen((const char*)value),
1397                                              NULL)) {
1398       const char *message;
1399 
1400       message = "Property attribute '%s' has a string not in Unicode Normal Form C: %s";
1401       raptor_rdfxml_update_document_locator(rdf_parser);
1402       if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL))
1403         raptor_parser_error(rdf_parser, message, name, value);
1404       else
1405         raptor_parser_warning(rdf_parser, message, name, value);
1406       continue;
1407     }
1408 
1409 
1410     /* Generate the property statement using one of these properties:
1411      * 1) rdf:_n
1412      * 2) the URI from the rdf:* attribute where allowed
1413      * 3) otherwise forbidden (including rdf:li)
1414      */
1415     if(attr->nspace->is_rdf_ms) {
1416       /* is rdf: namespace */
1417 
1418       if(*name == '_') {
1419         int ordinal;
1420 
1421         /* recognise rdf:_ */
1422         name++;
1423         ordinal = raptor_check_ordinal(name);
1424         if(ordinal < 1) {
1425           raptor_rdfxml_update_document_locator(rdf_parser);
1426           raptor_parser_error(rdf_parser,
1427                               "Illegal ordinal value %d in property attribute '%s' seen on containing element '%s'.",
1428                               ordinal, attr->local_name, name);
1429         }
1430       } else {
1431         int rc;
1432 
1433         raptor_rdfxml_update_document_locator(rdf_parser);
1434 
1435         rc = raptor_rdfxml_check_propertyAttribute_name((const char*)name);
1436         if(!rc)
1437           raptor_parser_error(rdf_parser,
1438                               "RDF term %s is forbidden as a property attribute.",
1439                               name);
1440         else if(rc < 0)
1441           raptor_parser_warning(rdf_parser,
1442                                 "Unknown RDF namespace property attribute '%s'.",
1443                                 name);
1444       }
1445 
1446     } /* end is RDF namespace property */
1447 
1448 
1449     if(!handled) {
1450       raptor_term* object_term;
1451 
1452       object_term = raptor_new_term_from_literal(rdf_parser->world,
1453                                                  (unsigned char*)value,
1454                                                  NULL, NULL);
1455 
1456       /* else not rdf: namespace or unknown in rdf: namespace so
1457        * generate a statement with a literal object
1458        */
1459       raptor_rdfxml_generate_statement(rdf_parser,
1460                                        resource_identifier,
1461                                        attr->uri,
1462                                        object_term,
1463                                        NULL, /* Property attributes are never reified*/
1464                                        resource_element);
1465 
1466       raptor_free_term(object_term);
1467     }
1468 
1469   } /* end for ... attributes */
1470 
1471 
1472   /* Handle rdf property attributes
1473    * (only rdf:type and rdf:value at present)
1474    */
1475   for(i = 0; i <= RDF_NS_LAST; i++) {
1476     const unsigned char *value = attributes_element->rdf_attr[i];
1477     size_t value_len;
1478     int object_is_literal;
1479     raptor_uri *property_uri;
1480     raptor_term* object_term;
1481 
1482     if(!value)
1483       continue;
1484 
1485     value_len = strlen((const char*)value);
1486 
1487     object_is_literal = (raptor_rdf_ns_terms_info[i].type == RAPTOR_TERM_TYPE_LITERAL);
1488 
1489     if(raptor_rdf_ns_terms_info[i].type == RAPTOR_TERM_TYPE_UNKNOWN) {
1490       const char *name = raptor_rdf_ns_terms_info[i].name;
1491       int rc = raptor_rdfxml_check_propertyAttribute_name(name);
1492       if(!rc) {
1493         raptor_rdfxml_update_document_locator(rdf_parser);
1494         raptor_parser_error(rdf_parser,
1495                             "RDF term %s is forbidden as a property attribute.",
1496                             name);
1497         continue;
1498       } else if(rc < 0)
1499         raptor_parser_warning(rdf_parser,
1500                               "Unknown RDF namespace property attribute '%s'.",
1501                               name);
1502     }
1503 
1504     if(object_is_literal &&
1505        !raptor_unicode_check_utf8_nfc_string(value, value_len, NULL)) {
1506       const char *message;
1507       message = "Property attribute '%s' has a string not in Unicode Normal Form C: %s";
1508       raptor_rdfxml_update_document_locator(rdf_parser);
1509       if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL))
1510         raptor_parser_error(rdf_parser, message,
1511                             raptor_rdf_ns_terms_info[i].name, value);
1512       else
1513         raptor_parser_warning(rdf_parser, message,
1514                               raptor_rdf_ns_terms_info[i].name, value);
1515       continue;
1516     }
1517 
1518     property_uri = raptor_new_uri_for_rdf_concept(rdf_parser->world,
1519                                                   (const unsigned char*)raptor_rdf_ns_terms_info[i].name);
1520 
1521     if(object_is_literal) {
1522       object_term = raptor_new_term_from_literal(rdf_parser->world,
1523                                                  (unsigned char*)value,
1524                                                  NULL, NULL);
1525     } else {
1526       raptor_uri *base_uri;
1527       raptor_uri *object_uri;
1528       base_uri = raptor_rdfxml_inscope_base_uri(rdf_parser);
1529       object_uri = raptor_new_uri_relative_to_base(rdf_parser->world,
1530                                                    base_uri, value);
1531       object_term = raptor_new_term_from_uri(rdf_parser->world, object_uri);
1532       raptor_free_uri(object_uri);
1533     }
1534 
1535     raptor_rdfxml_generate_statement(rdf_parser,
1536                                      resource_identifier,
1537                                      property_uri,
1538                                      object_term,
1539                                      NULL, /* Property attributes are never reified*/
1540                                      resource_element);
1541 
1542     raptor_free_term(object_term);
1543 
1544     raptor_free_uri(property_uri);
1545 
1546   } /* end for rdf:property values */
1547 
1548   return 0;
1549 }
1550 
1551 
1552 static void
raptor_rdfxml_start_element_grammar(raptor_parser * rdf_parser,raptor_rdfxml_element * element)1553 raptor_rdfxml_start_element_grammar(raptor_parser *rdf_parser,
1554                                     raptor_rdfxml_element *element)
1555 {
1556   raptor_rdfxml_parser *rdf_xml_parser;
1557   int finished;
1558   raptor_state state;
1559   raptor_xml_element* xml_element;
1560   raptor_qname* el_qname;
1561   const unsigned char *el_name;
1562   int element_in_rdf_ns;
1563   int rc = 0;
1564   raptor_uri* base_uri;
1565   raptor_uri* element_name_uri;
1566 
1567   rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
1568 
1569   xml_element = element->xml_element;
1570   el_qname = raptor_xml_element_get_name(xml_element);
1571   el_name = el_qname->local_name;
1572   element_in_rdf_ns = (el_qname->nspace && el_qname->nspace->is_rdf_ms);
1573   base_uri = raptor_rdfxml_inscope_base_uri(rdf_parser);
1574   element_name_uri = el_qname->uri;
1575 
1576   state = element->state;
1577 #ifdef RAPTOR_DEBUG_VERBOSE
1578   RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state));
1579 #endif
1580 
1581   finished = 0;
1582   while(!finished) {
1583 
1584     switch(state) {
1585       case RAPTOR_STATE_SKIPPING:
1586         element->child_state = state;
1587         element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
1588         finished = 1;
1589         break;
1590 
1591       case RAPTOR_STATE_UNKNOWN:
1592         /* found <rdf:RDF> ? */
1593 
1594         if(element_in_rdf_ns) {
1595           if(raptor_uri_equals(element_name_uri,
1596                                RAPTOR_RDF_RDF_URI(rdf_parser->world))) {
1597             element->child_state = RAPTOR_STATE_NODE_ELEMENT_LIST;
1598             element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES;
1599             /* Yes - need more content before can continue,
1600              * so wait for another element
1601              */
1602             finished = 1;
1603             break;
1604           }
1605           if(raptor_uri_equals(element_name_uri,
1606                                RAPTOR_RDF_Description_URI(rdf_parser->world))) {
1607             state = RAPTOR_STATE_DESCRIPTION;
1608             element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
1609             /* Yes - found something so move immediately to description */
1610             break;
1611           }
1612 
1613           if(element_in_rdf_ns) {
1614             rc = raptor_rdfxml_check_nodeElement_name((const char*)el_name);
1615             if(!rc) {
1616               raptor_parser_error(rdf_parser,
1617                                   "rdf:%s is forbidden as a node element.",
1618                                   el_name);
1619               state = RAPTOR_STATE_SKIPPING;
1620               element->child_state = RAPTOR_STATE_SKIPPING;
1621               finished = 1;
1622               break;
1623             } else if(rc < 0) {
1624               raptor_parser_warning(rdf_parser,
1625                                     "rdf:%s is an unknown RDF namespaced element.",
1626                                     el_name);
1627             }
1628           }
1629         }
1630 
1631         /* If scanning for element, can continue */
1632         if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING)) {
1633           finished = 1;
1634           break;
1635         }
1636 
1637         /* Otherwise the choice of the next state can be made
1638          * from the current element by the OBJ state
1639          */
1640         state = RAPTOR_STATE_NODE_ELEMENT_LIST;
1641         element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES;
1642         break;
1643 
1644 
1645       case RAPTOR_STATE_NODE_ELEMENT_LIST:
1646         /* Handling
1647          *   http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList
1648          *
1649          * Everything goes to nodeElement
1650          */
1651 
1652         state = RAPTOR_STATE_NODE_ELEMENT;
1653 
1654         element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
1655 
1656         break;
1657 
1658 
1659 
1660       case RAPTOR_STATE_DESCRIPTION:
1661       case RAPTOR_STATE_NODE_ELEMENT:
1662       case RAPTOR_STATE_PARSETYPE_RESOURCE:
1663       case RAPTOR_STATE_PARSETYPE_COLLECTION:
1664         /* Handling <rdf:Description> or other node element
1665          *   http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement
1666          *
1667          * or a property element acting as a node element for
1668          * rdf:parseType="Resource"
1669          *   http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt
1670          * or rdf:parseType="Collection" (and daml:Collection)
1671          *   http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt
1672          *
1673          * Only create a bag if bagID given
1674          */
1675 
1676         if(!element_name_uri) {
1677           /* We cannot handle this */
1678           raptor_parser_warning(rdf_parser, "Using node element '%s' without a namespace is forbidden.",
1679                                 el_qname->local_name);
1680           raptor_rdfxml_update_document_locator(rdf_parser);
1681           element->state = RAPTOR_STATE_SKIPPING;
1682           element->child_state = RAPTOR_STATE_SKIPPING;
1683           finished = 1;
1684           break;
1685         }
1686 
1687         if(element_in_rdf_ns) {
1688           rc = raptor_rdfxml_check_nodeElement_name((const char*)el_name);
1689           if(!rc) {
1690             raptor_parser_error(rdf_parser,
1691                                 "rdf:%s is forbidden as a node element.",
1692                                 el_name);
1693             state = RAPTOR_STATE_SKIPPING;
1694             element->state = RAPTOR_STATE_SKIPPING;
1695             element->child_state = RAPTOR_STATE_SKIPPING;
1696             finished = 1;
1697             break;
1698           } else if(rc < 0) {
1699             raptor_parser_warning(rdf_parser,
1700                                   "rdf:%s is an unknown RDF namespaced element.",
1701                                   el_name);
1702           }
1703         }
1704 
1705         if(element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
1706            element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION &&
1707            element->parent &&
1708            (element->parent->state == RAPTOR_STATE_PROPERTYELT ||
1709             element->parent->state == RAPTOR_STATE_MEMBER_PROPERTYELT) &&
1710            element->parent->xml_element->content_element_seen > 1) {
1711           raptor_rdfxml_update_document_locator(rdf_parser);
1712           raptor_parser_error(rdf_parser, "The enclosing property already has an object");
1713           state = RAPTOR_STATE_SKIPPING;
1714           element->child_state = RAPTOR_STATE_SKIPPING;
1715           finished = 1;
1716           break;
1717         }
1718 
1719         if(state == RAPTOR_STATE_NODE_ELEMENT ||
1720            state == RAPTOR_STATE_DESCRIPTION ||
1721            state == RAPTOR_STATE_PARSETYPE_COLLECTION) {
1722           if(element_in_rdf_ns &&
1723              raptor_uri_equals(element_name_uri,
1724                                RAPTOR_RDF_Description_URI(rdf_parser->world)))
1725             state = RAPTOR_STATE_DESCRIPTION;
1726           else
1727             state = RAPTOR_STATE_NODE_ELEMENT;
1728         }
1729 
1730 
1731         if((element->rdf_attr[RDF_NS_ID]!=NULL) +
1732            (element->rdf_attr[RDF_NS_about]!=NULL) +
1733            (element->rdf_attr[RDF_NS_nodeID]!=NULL) > 1) {
1734           raptor_rdfxml_update_document_locator(rdf_parser);
1735           raptor_parser_error(rdf_parser, "Multiple attributes of rdf:ID, rdf:about and rdf:nodeID on element '%s' - only one allowed.", el_name);
1736         }
1737 
1738         if(element->rdf_attr[RDF_NS_ID]) {
1739           unsigned char* subject_id;
1740           raptor_uri* subject_uri;
1741 
1742           subject_id = (unsigned char*)element->rdf_attr[RDF_NS_ID];
1743 
1744           if(!raptor_valid_xml_ID(rdf_parser, subject_id)) {
1745             raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'",
1746                                 subject_id);
1747             state = RAPTOR_STATE_SKIPPING;
1748             element->child_state = RAPTOR_STATE_SKIPPING;
1749             finished = 1;
1750             break;
1751           }
1752           if(raptor_rdfxml_record_ID(rdf_parser, element, subject_id)) {
1753             raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'",
1754                                 subject_id);
1755             state = RAPTOR_STATE_SKIPPING;
1756             element->child_state = RAPTOR_STATE_SKIPPING;
1757             finished = 1;
1758             break;
1759           }
1760 
1761           /* after this, subject_id is the owner of the ID string */
1762           element->rdf_attr[RDF_NS_ID] = NULL;
1763 
1764           subject_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri,
1765                                                subject_id);
1766           RAPTOR_FREE(char*, subject_id);
1767 
1768           if(!subject_uri)
1769             goto oom;
1770           element->subject = raptor_new_term_from_uri(rdf_parser->world,
1771                                                       subject_uri);
1772           raptor_free_uri(subject_uri);
1773 
1774           if(!element->subject)
1775             goto oom;
1776 
1777         } else if(element->rdf_attr[RDF_NS_about]) {
1778           raptor_uri* subject_uri;
1779 
1780           subject_uri = raptor_new_uri_relative_to_base(rdf_parser->world,
1781                                                         base_uri,
1782                                                         (const unsigned char*)element->rdf_attr[RDF_NS_about]);
1783           if(!subject_uri)
1784             goto oom;
1785 
1786           element->subject = raptor_new_term_from_uri(rdf_parser->world,
1787                                                       subject_uri);
1788           raptor_free_uri(subject_uri);
1789 
1790           RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_about]);
1791           element->rdf_attr[RDF_NS_about] = NULL;
1792           if(!element->subject)
1793             goto oom;
1794 
1795         } else if(element->rdf_attr[RDF_NS_nodeID]) {
1796           unsigned char* subject_id;
1797           subject_id = raptor_world_internal_generate_id(rdf_parser->world,
1798                                                          (unsigned char*)element->rdf_attr[RDF_NS_nodeID]);
1799           if(!subject_id)
1800             goto oom;
1801 
1802           element->subject = raptor_new_term_from_blank(rdf_parser->world,
1803                                                         subject_id);
1804           RAPTOR_FREE(char*, subject_id);
1805 
1806           element->rdf_attr[RDF_NS_nodeID] = NULL;
1807           if(!element->subject)
1808             goto oom;
1809 
1810           if(!raptor_valid_xml_ID(rdf_parser, element->subject->value.blank.string)) {
1811             raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'",
1812                                 (const char*)element->subject->value.blank.string);
1813             state = RAPTOR_STATE_SKIPPING;
1814             element->child_state = RAPTOR_STATE_SKIPPING;
1815             finished = 1;
1816             break;
1817           }
1818         } else if(element->parent &&
1819                    element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
1820                    element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION &&
1821                    element->parent->object) {
1822           /* copy from parent (property element), it has a URI for us */
1823           element->subject = raptor_term_copy(element->parent->object);
1824         } else {
1825           unsigned char* subject_id;
1826           subject_id = raptor_world_generate_bnodeid(rdf_parser->world);
1827           if(!subject_id)
1828             goto oom;
1829 
1830           element->subject = raptor_new_term_from_blank(rdf_parser->world,
1831                                                         subject_id);
1832           RAPTOR_FREE(char*, subject_id);
1833 
1834           if(!element->subject)
1835             goto oom;
1836         }
1837 
1838 
1839         if(element->rdf_attr[RDF_NS_bagID]) {
1840           if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID)) {
1841             unsigned char* bag_id;
1842             raptor_uri* bag_uri = NULL;
1843 
1844             bag_id = (unsigned char*)element->rdf_attr[RDF_NS_bagID];
1845             element->rdf_attr[RDF_NS_bagID] = NULL;
1846 
1847             bag_uri = raptor_new_uri_from_id(rdf_parser->world,
1848                                              base_uri, bag_id);
1849             if(!bag_uri) {
1850               RAPTOR_FREE(char*, bag_id);
1851               goto oom;
1852             }
1853 
1854             element->bag = raptor_new_term_from_uri(rdf_parser->world, bag_uri);
1855             raptor_free_uri(bag_uri);
1856 
1857             if(!raptor_valid_xml_ID(rdf_parser, bag_id)) {
1858               raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'",
1859                                   bag_id);
1860               state = RAPTOR_STATE_SKIPPING;
1861               element->child_state = RAPTOR_STATE_SKIPPING;
1862               finished = 1;
1863               RAPTOR_FREE(char*, bag_id);
1864               break;
1865             }
1866             if(raptor_rdfxml_record_ID(rdf_parser, element, bag_id)) {
1867               raptor_parser_error(rdf_parser, "Duplicated rdf:bagID value '%s'",
1868                                   bag_id);
1869               state = RAPTOR_STATE_SKIPPING;
1870               element->child_state = RAPTOR_STATE_SKIPPING;
1871               finished = 1;
1872               RAPTOR_FREE(char*, bag_id);
1873               break;
1874             }
1875 
1876             RAPTOR_FREE(char*, bag_id);
1877             raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated.");
1878 
1879 
1880             raptor_rdfxml_generate_statement(rdf_parser,
1881                                              element->bag,
1882                                              RAPTOR_RDF_type_URI(rdf_parser->world),
1883                                              RAPTOR_RDF_Bag_term(rdf_parser->world),
1884                                              NULL,
1885                                              NULL);
1886           } else {
1887             /* bagID forbidden */
1888             raptor_parser_error(rdf_parser, "rdf:bagID is forbidden.");
1889             state = RAPTOR_STATE_SKIPPING;
1890             element->child_state = RAPTOR_STATE_SKIPPING;
1891             finished = 1;
1892             break;
1893           }
1894         }
1895 
1896 
1897         if(element->parent) {
1898 
1899           /* In a rdf:parseType="Collection" the resources are appended
1900            * to the list at the genid element->parent->tail_id
1901            */
1902           if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION ||
1903              element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
1904             /* <idList> rdf:type rdf:List */
1905             const unsigned char * idList;
1906             raptor_uri *predicate_uri;
1907             raptor_term* idList_term;
1908             raptor_term* object_term;
1909 
1910             idList = raptor_world_generate_bnodeid(rdf_parser->world);
1911             if(!idList)
1912               goto oom;
1913             /* idList string is saved below in element->parent->tail_id */
1914 
1915             idList_term = raptor_new_term_from_blank(rdf_parser->world, idList);
1916             if(!idList_term) {
1917               RAPTOR_FREE(char*, idList);
1918               goto oom;
1919             }
1920 
1921             if((element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ||
1922                RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_RDF_TYPE_RDF_LIST)) {
1923               raptor_uri* class_uri = NULL;
1924 
1925               if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
1926                 class_uri = RAPTOR_DAML_List_URI(rdf_xml_parser);
1927                 object_term = raptor_new_term_from_uri(rdf_parser->world,
1928                                                        class_uri);
1929               } else
1930                 object_term = raptor_term_copy(RAPTOR_RDF_List_term(rdf_parser->world));
1931 
1932               raptor_rdfxml_generate_statement(rdf_parser,
1933                                                idList_term,
1934                                                RAPTOR_RDF_type_URI(rdf_parser->world),
1935                                                object_term,
1936                                                NULL,
1937                                                element);
1938               raptor_free_term(object_term);
1939             }
1940 
1941             predicate_uri = (element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_first_URI(rdf_xml_parser) : RAPTOR_RDF_first_URI(rdf_parser->world);
1942 
1943             /* <idList> rdf:first <element->uri> */
1944             raptor_rdfxml_generate_statement(rdf_parser,
1945                                              idList_term,
1946                                              predicate_uri,
1947                                              element->subject,
1948                                              NULL,
1949                                              NULL);
1950 
1951             /* If there is no rdf:parseType="Collection" */
1952             if(!element->parent->tail_id) {
1953               /* Free any existing object still around.
1954                * I suspect this can never happen.
1955                */
1956               if(element->parent->object)
1957                 raptor_free_term(element->parent->object);
1958 
1959               element->parent->object = raptor_new_term_from_blank(rdf_parser->world,
1960                                                                    idList);
1961             } else {
1962               raptor_term* tail_id_term;
1963 
1964               tail_id_term = raptor_new_term_from_blank(rdf_parser->world,
1965                                                         element->parent->tail_id);
1966 
1967               predicate_uri = (element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_rest_URI(rdf_xml_parser) : RAPTOR_RDF_rest_URI(rdf_parser->world);
1968 
1969               /* _:tail_id rdf:rest _:listRest */
1970               raptor_rdfxml_generate_statement(rdf_parser,
1971                                                tail_id_term,
1972                                                predicate_uri,
1973                                                idList_term,
1974                                                NULL,
1975                                                NULL);
1976 
1977               raptor_free_term(tail_id_term);
1978             }
1979 
1980             /* update new tail */
1981             if(element->parent->tail_id)
1982               RAPTOR_FREE(char*, (char*)element->parent->tail_id);
1983 
1984             element->parent->tail_id = idList;
1985 
1986             raptor_free_term(idList_term);
1987           } else if(element->parent->state != RAPTOR_STATE_UNKNOWN &&
1988                     element->state != RAPTOR_STATE_PARSETYPE_RESOURCE) {
1989             /* If there is a parent element (property) containing this
1990              * element (node) and it has no object, set it from this subject
1991              */
1992 
1993             if(element->parent->object) {
1994               raptor_rdfxml_update_document_locator(rdf_parser);
1995               raptor_parser_error(rdf_parser,
1996                                   "Tried to set multiple objects of a statement");
1997             } else {
1998               /* Store URI of this node in our parent as the property object */
1999               element->parent->object = raptor_term_copy(element->subject);
2000               element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2001             }
2002 
2003           }
2004         }
2005 
2006 
2007         /* If this is a node element, generate the rdf:type statement
2008          * from this node
2009          */
2010         if(state == RAPTOR_STATE_NODE_ELEMENT) {
2011           raptor_term* el_name_term;
2012 
2013           el_name_term = raptor_new_term_from_uri(rdf_parser->world,
2014                                                   element_name_uri);
2015 
2016           raptor_rdfxml_generate_statement(rdf_parser,
2017                                            element->subject,
2018                                            RAPTOR_RDF_type_URI(rdf_parser->world),
2019                                            el_name_term,
2020                                            element->reified,
2021                                            element);
2022 
2023           raptor_free_term(el_name_term);
2024         }
2025 
2026         if(raptor_rdfxml_process_property_attributes(rdf_parser, element,
2027                                                      element, NULL))
2028           goto oom;
2029 
2030         /* for both productions now need some more content or
2031          * property elements before can do any more work.
2032          */
2033 
2034         element->child_state = RAPTOR_STATE_PROPERTYELT;
2035         element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
2036         finished = 1;
2037         break;
2038 
2039 
2040       case RAPTOR_STATE_PARSETYPE_OTHER:
2041         /* FALLTHROUGH */
2042 
2043       case RAPTOR_STATE_PARSETYPE_LITERAL:
2044         raptor_xml_writer_start_element(rdf_xml_parser->xml_writer, xml_element);
2045         element->child_state = RAPTOR_STATE_PARSETYPE_LITERAL;
2046         element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2047 
2048         finished = 1;
2049         break;
2050 
2051         /* Handle all the detail of the various options of property element
2052          *   http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
2053          *
2054          * All the attributes must be scanned here to see what additional
2055          * property element work is needed.  No triples are generated
2056          * until the end of this element, until it is clear if the
2057          * element was empty.
2058          */
2059       case RAPTOR_STATE_MEMBER_PROPERTYELT:
2060       case RAPTOR_STATE_PROPERTYELT:
2061 
2062         if(!element_name_uri) {
2063           raptor_parser_error(rdf_parser, "Using property element '%s' without a namespace is forbidden.",
2064                               raptor_xml_element_get_name(element->parent->xml_element)->local_name);
2065           raptor_rdfxml_update_document_locator(rdf_parser);
2066           element->state = RAPTOR_STATE_SKIPPING;
2067           element->child_state = RAPTOR_STATE_SKIPPING;
2068           finished = 1;
2069           break;
2070         }
2071 
2072         /* Handling rdf:li as a property, noting special processing */
2073         if(element_in_rdf_ns &&
2074            raptor_uri_equals(element_name_uri,
2075                              RAPTOR_RDF_li_URI(rdf_parser->world))) {
2076           state = RAPTOR_STATE_MEMBER_PROPERTYELT;
2077         }
2078 
2079 
2080         if(element_in_rdf_ns) {
2081           rc = raptor_rdfxml_check_propertyElement_name((const char*)el_name);
2082           if(!rc) {
2083             raptor_parser_error(rdf_parser,
2084                                 "rdf:%s is forbidden as a property element.",
2085                                 el_name);
2086             state = RAPTOR_STATE_SKIPPING;
2087             element->child_state = RAPTOR_STATE_SKIPPING;
2088             finished = 1;
2089             break;
2090           } else if(rc < 0) {
2091             raptor_parser_warning(rdf_parser,
2092                                   "rdf:%s is an unknown RDF namespaced element.",
2093                                   el_name);
2094           }
2095         }
2096 
2097 
2098         /* rdf:ID on a property element - reify a statement.
2099          * Allowed on all property element forms
2100          */
2101         if(element->rdf_attr[RDF_NS_ID]) {
2102           raptor_uri *reified_uri;
2103 
2104           element->reified_id = element->rdf_attr[RDF_NS_ID];
2105           element->rdf_attr[RDF_NS_ID] = NULL;
2106           reified_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri,
2107                                                element->reified_id);
2108           if(!reified_uri)
2109             goto oom;
2110 
2111           element->reified = raptor_new_term_from_uri(rdf_parser->world,
2112                                                       reified_uri);
2113           raptor_free_uri(reified_uri);
2114 
2115           if(!element->reified)
2116             goto oom;
2117 
2118           if(!raptor_valid_xml_ID(rdf_parser, element->reified_id)) {
2119             raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'",
2120                                 element->reified_id);
2121             state = RAPTOR_STATE_SKIPPING;
2122             element->child_state = RAPTOR_STATE_SKIPPING;
2123             finished = 1;
2124             break;
2125           }
2126           if(raptor_rdfxml_record_ID(rdf_parser, element, element->reified_id)) {
2127             raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'",
2128                                 element->reified_id);
2129             state = RAPTOR_STATE_SKIPPING;
2130             element->child_state = RAPTOR_STATE_SKIPPING;
2131             finished = 1;
2132             break;
2133           }
2134         }
2135 
2136         /* rdf:datatype on a property element.
2137          * Only allowed for
2138          *   http://www.w3.org/TR/rdf-syntax-grammar/#literalPropertyElt
2139          */
2140         if(element->rdf_attr[RDF_NS_datatype]) {
2141           raptor_uri *datatype_uri;
2142 
2143           datatype_uri = raptor_new_uri_relative_to_base(rdf_parser->world,
2144                                                          base_uri,
2145                                                          (const unsigned char*)element->rdf_attr[RDF_NS_datatype]);
2146           element->object_literal_datatype = datatype_uri;
2147           RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_datatype]);
2148           element->rdf_attr[RDF_NS_datatype] = NULL;
2149           if(!element->object_literal_datatype)
2150             goto oom;
2151         }
2152 
2153         if(element->rdf_attr[RDF_NS_bagID]) {
2154 
2155           if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID)) {
2156 
2157             if(element->rdf_attr[RDF_NS_resource] ||
2158                element->rdf_attr[RDF_NS_parseType]) {
2159 
2160               raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on property element '%s' with an rdf:resource or rdf:parseType attribute.", el_name);
2161               /* prevent this being used later either */
2162               RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_bagID]);
2163               element->rdf_attr[RDF_NS_bagID] = NULL;
2164             } else {
2165               unsigned char* bag_id;
2166               raptor_uri* bag_uri;
2167 
2168               bag_id = (unsigned char*)element->rdf_attr[RDF_NS_bagID];
2169               element->rdf_attr[RDF_NS_bagID] = NULL;
2170               bag_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri,
2171                                                bag_id);
2172               if(!bag_uri) {
2173                 RAPTOR_FREE(char*, bag_id);
2174                 goto oom;
2175               }
2176 
2177               element->bag = raptor_new_term_from_uri(rdf_parser->world,
2178                                                       bag_uri);
2179               raptor_free_uri(bag_uri);
2180 
2181               if(!element->bag) {
2182                 RAPTOR_FREE(char*, bag_id);
2183                 goto oom;
2184               }
2185 
2186               if(!raptor_valid_xml_ID(rdf_parser, bag_id)) {
2187                 raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'",
2188                                     bag_id);
2189                 state = RAPTOR_STATE_SKIPPING;
2190                 element->child_state = RAPTOR_STATE_SKIPPING;
2191                 finished = 1;
2192                 RAPTOR_FREE(char*, bag_id);
2193                 break;
2194               }
2195               if(raptor_rdfxml_record_ID(rdf_parser, element, bag_id)) {
2196                 raptor_parser_error(rdf_parser,
2197                                     "Duplicated rdf:bagID value '%s'", bag_id);
2198                 state = RAPTOR_STATE_SKIPPING;
2199                 element->child_state = RAPTOR_STATE_SKIPPING;
2200                 RAPTOR_FREE(char*, bag_id);
2201                 finished = 1;
2202                 break;
2203               }
2204 
2205               RAPTOR_FREE(char*, bag_id);
2206               raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated.");
2207             }
2208           } else {
2209             /* bagID forbidden */
2210             raptor_parser_error(rdf_parser, "rdf:bagID is forbidden.");
2211             state = RAPTOR_STATE_SKIPPING;
2212             element->child_state = RAPTOR_STATE_SKIPPING;
2213             finished = 1;
2214             break;
2215           }
2216         } /* if rdf:bagID on property element */
2217 
2218 
2219         element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT;
2220 
2221         if(element->rdf_attr[RDF_NS_parseType]) {
2222           const unsigned char *parse_type;
2223           int i;
2224           int is_parseType_Literal = 0;
2225 
2226           parse_type = element->rdf_attr[RDF_NS_parseType];
2227 
2228           if(raptor_rdfxml_element_has_property_attributes(element)) {
2229             raptor_parser_error(rdf_parser, "Property attributes cannot be used with rdf:parseType='%s'", parse_type);
2230             state = RAPTOR_STATE_SKIPPING;
2231             element->child_state = RAPTOR_STATE_SKIPPING;
2232             finished = 1;
2233             break;
2234           }
2235 
2236           /* Check for bad combinations of things with parseType */
2237           for(i = 0; i <= RDF_NS_LAST; i++)
2238             if(element->rdf_attr[i] && i != RDF_NS_parseType) {
2239               raptor_parser_error(rdf_parser, "Attribute '%s' cannot be used with rdf:parseType='%s'", raptor_rdf_ns_terms_info[i].name, parse_type);
2240               state = RAPTOR_STATE_SKIPPING;
2241               element->child_state = RAPTOR_STATE_SKIPPING;
2242               break;
2243             }
2244 
2245 
2246           if(!strcmp((char*)parse_type, "Literal"))
2247             is_parseType_Literal = 1;
2248           else if(!strcmp((char*)parse_type, "Resource")) {
2249             unsigned char* subject_id;
2250 
2251             state = RAPTOR_STATE_PARSETYPE_RESOURCE;
2252             element->child_state = RAPTOR_STATE_PROPERTYELT;
2253             element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
2254 
2255             /* create a node for the subject of the contained properties */
2256             subject_id = raptor_world_generate_bnodeid(rdf_parser->world);
2257             if(!subject_id)
2258               goto oom;
2259 
2260             element->subject = raptor_new_term_from_blank(rdf_parser->world,
2261                                                           subject_id);
2262             RAPTOR_FREE(char*, subject_id);
2263 
2264             if(!element->subject)
2265               goto oom;
2266           } else if(!strcmp((char*)parse_type, "Collection")) {
2267             /* An rdf:parseType="Collection" appears as a single node */
2268             element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2269             element->child_state = RAPTOR_STATE_PARSETYPE_COLLECTION;
2270             element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION;
2271           } else {
2272             if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_OTHER_PARSETYPES) &&
2273                !raptor_strcasecmp((char*)parse_type, "daml:collection")) {
2274                 /* A DAML collection appears as a single node */
2275                 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2276                 element->child_state = RAPTOR_STATE_PARSETYPE_COLLECTION;
2277                 element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION;
2278             } else {
2279               if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_WARN_OTHER_PARSETYPES)) {
2280                 raptor_parser_warning(rdf_parser, "Unknown rdf:parseType value '%s' taken as 'Literal'", parse_type);
2281               }
2282               is_parseType_Literal = 1;
2283             }
2284 
2285           }
2286 
2287           if(is_parseType_Literal) {
2288             raptor_xml_writer* xml_writer;
2289 
2290             /* rdf:parseType="Literal" - explicitly or default
2291              * if the parseType value is not recognised
2292              */
2293             rdf_xml_parser->xml_content = NULL;
2294             rdf_xml_parser->xml_content_length = 0;
2295             rdf_xml_parser->iostream =
2296               raptor_new_iostream_to_string(rdf_parser->world,
2297                                             &rdf_xml_parser->xml_content,
2298                                             &rdf_xml_parser->xml_content_length,
2299                                             raptor_alloc_memory);
2300             if(!rdf_xml_parser->iostream)
2301               goto oom;
2302             xml_writer = raptor_new_xml_writer(rdf_parser->world, NULL,
2303                                                rdf_xml_parser->iostream);
2304             rdf_xml_parser->xml_writer = xml_writer;
2305             if(!rdf_xml_parser->xml_writer)
2306               goto oom;
2307 
2308             raptor_xml_writer_set_option(rdf_xml_parser->xml_writer,
2309                                          RAPTOR_OPTION_WRITER_XML_DECLARATION,
2310                                          NULL, 0);
2311 
2312             element->child_state = RAPTOR_STATE_PARSETYPE_LITERAL;
2313             element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2314             element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2315           }
2316         } else {
2317 
2318           /* Can only be the empty property element case
2319            *   http://www.w3.org/TR/rdf-syntax-grammar/#emptyPropertyElt
2320            */
2321 
2322           /* The presence of the rdf:resource or rdf:nodeID
2323            * attributes is checked at element close time
2324            */
2325 
2326           /*
2327            * Assign reified URI here so we don't reify property attributes
2328            * using this id
2329            */
2330           if(element->reified_id && !element->reified) {
2331             raptor_uri* reified_uri;
2332             reified_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri,
2333                                                  element->reified_id);
2334             if(!reified_uri)
2335               goto oom;
2336             element->reified = raptor_new_term_from_uri(rdf_parser->world,
2337                                                         reified_uri);
2338             raptor_free_uri(reified_uri);
2339 
2340             if(!element->reified)
2341               goto oom;
2342           }
2343 
2344           if(element->rdf_attr[RDF_NS_resource] ||
2345              element->rdf_attr[RDF_NS_nodeID]) {
2346             /* Done - wait for end of this element to end in order to
2347              * check the element was empty as expected */
2348             element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2349           } else {
2350             /* Otherwise process content in obj (value) state */
2351             element->child_state = RAPTOR_STATE_NODE_ELEMENT_LIST;
2352             element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT;
2353           }
2354         }
2355 
2356         finished = 1;
2357 
2358         break;
2359 
2360 
2361       case RAPTOR_STATE_INVALID:
2362       default:
2363         raptor_parser_fatal_error(rdf_parser,
2364                                   "%s Internal error - unexpected parser state %d - %s",
2365                                   __FUNCTION__,
2366                                   state, raptor_rdfxml_state_as_string(state));
2367         finished = 1;
2368 
2369     } /* end switch */
2370 
2371     if(state != element->state) {
2372       element->state = state;
2373 #ifdef RAPTOR_DEBUG_VERBOSE
2374       RAPTOR_DEBUG3("Moved to state %d - %s\n", state,
2375                     raptor_rdfxml_state_as_string(state));
2376 #endif
2377     }
2378 
2379   } /* end while */
2380 
2381 #ifdef RAPTOR_DEBUG_VERBOSE
2382   RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
2383 #endif
2384 
2385   return;
2386 
2387   oom:
2388   raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping");
2389   element->state = RAPTOR_STATE_SKIPPING;
2390 }
2391 
2392 
2393 static void
raptor_rdfxml_end_element_grammar(raptor_parser * rdf_parser,raptor_rdfxml_element * element)2394 raptor_rdfxml_end_element_grammar(raptor_parser *rdf_parser,
2395                                   raptor_rdfxml_element *element)
2396 {
2397   raptor_rdfxml_parser *rdf_xml_parser;
2398   raptor_state state;
2399   int finished;
2400   raptor_xml_element* xml_element = element->xml_element;
2401   raptor_qname* el_qname;
2402   const unsigned char *el_name;
2403   int element_in_rdf_ns;
2404   raptor_uri* element_name_uri;
2405 
2406   rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
2407 
2408   el_qname = raptor_xml_element_get_name(xml_element);
2409   el_name = el_qname->local_name;
2410   element_in_rdf_ns= (el_qname->nspace && el_qname->nspace->is_rdf_ms);
2411   element_name_uri = el_qname->uri;
2412 
2413 
2414   state = element->state;
2415 #ifdef RAPTOR_DEBUG_VERBOSE
2416   RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state));
2417 #endif
2418 
2419   finished= 0;
2420   while(!finished) {
2421     switch(state) {
2422       case RAPTOR_STATE_SKIPPING:
2423         finished = 1;
2424         break;
2425 
2426       case RAPTOR_STATE_UNKNOWN:
2427         finished = 1;
2428         break;
2429 
2430       case RAPTOR_STATE_NODE_ELEMENT_LIST:
2431         if(element_in_rdf_ns &&
2432            raptor_uri_equals(element_name_uri,
2433                              RAPTOR_RDF_RDF_URI(rdf_parser->world))) {
2434           /* end of RDF - boo hoo */
2435           state = RAPTOR_STATE_UNKNOWN;
2436           finished = 1;
2437           break;
2438         }
2439         /* When scanning, another element ending is outside the RDF
2440          * world so this can happen without further work
2441          */
2442         if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING)) {
2443           state = RAPTOR_STATE_UNKNOWN;
2444           finished = 1;
2445           break;
2446         }
2447         /* otherwise found some junk after RDF content in an RDF-only
2448          * document (probably never get here since this would be
2449          * a mismatched XML tag and cause an error earlier)
2450          */
2451         raptor_rdfxml_update_document_locator(rdf_parser);
2452         raptor_parser_warning(rdf_parser,
2453                               "Element '%s' ended, expected end of RDF element",
2454                               el_name);
2455         state = RAPTOR_STATE_UNKNOWN;
2456         finished = 1;
2457         break;
2458 
2459 
2460       case RAPTOR_STATE_DESCRIPTION:
2461       case RAPTOR_STATE_NODE_ELEMENT:
2462       case RAPTOR_STATE_PARSETYPE_RESOURCE:
2463 
2464         /* If there is a parent element containing this element and
2465          * the parent isn't a description, has an identifier,
2466          * create the statement between this node using parent property
2467          * (Need to check for identifier so that top-level typed nodes
2468          * don't get connect to <rdf:RDF> parent element)
2469          */
2470         if(state == RAPTOR_STATE_NODE_ELEMENT &&
2471            element->parent && element->parent->subject) {
2472           raptor_rdfxml_generate_statement(rdf_parser,
2473                                            element->parent->subject,
2474                                            element_name_uri,
2475                                            element->subject,
2476                                            NULL,
2477                                            element);
2478         } else if(state == RAPTOR_STATE_PARSETYPE_RESOURCE &&
2479                   element->parent && element->parent->subject) {
2480           /* Handle rdf:li as the rdf:parseType="resource" property */
2481           if(element_in_rdf_ns &&
2482              raptor_uri_equals(element_name_uri,
2483                                RAPTOR_RDF_li_URI(rdf_parser->world))) {
2484             raptor_uri* ordinal_predicate_uri;
2485 
2486             element->parent->last_ordinal++;
2487             ordinal_predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world, element->parent->last_ordinal);
2488 
2489             raptor_rdfxml_generate_statement(rdf_parser,
2490                                              element->parent->subject,
2491                                              ordinal_predicate_uri,
2492                                              element->subject,
2493                                              element->reified,
2494                                              element->parent);
2495             raptor_free_uri(ordinal_predicate_uri);
2496           } else {
2497             raptor_rdfxml_generate_statement(rdf_parser,
2498                                              element->parent->subject,
2499                                              element_name_uri,
2500                                              element->subject,
2501                                              element->reified,
2502                                              element->parent);
2503           }
2504         }
2505         finished = 1;
2506         break;
2507 
2508       case RAPTOR_STATE_PARSETYPE_COLLECTION:
2509 
2510         finished = 1;
2511         break;
2512 
2513       case RAPTOR_STATE_PARSETYPE_OTHER:
2514         /* FALLTHROUGH */
2515 
2516       case RAPTOR_STATE_PARSETYPE_LITERAL:
2517         element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2518 
2519         raptor_xml_writer_end_element(rdf_xml_parser->xml_writer, xml_element);
2520 
2521         finished = 1;
2522         break;
2523 
2524 
2525       case RAPTOR_STATE_PROPERTYELT:
2526       case RAPTOR_STATE_MEMBER_PROPERTYELT:
2527         /* A property element
2528          *   http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
2529          *
2530          * Literal content part is handled here.
2531          * The element content is handled in the internal states
2532          * Empty content is checked here.
2533          */
2534 
2535         if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) {
2536           if(xml_element->content_cdata_seen)
2537             element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
2538           else if(xml_element->content_element_seen)
2539             element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
2540           else {
2541             /* Empty Literal */
2542             element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
2543           }
2544 
2545         }
2546 
2547 
2548         /* Handle terminating a rdf:parseType="Collection" list */
2549         if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION ||
2550            element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
2551           raptor_term* nil_term;
2552 
2553           if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
2554             raptor_uri* nil_uri = RAPTOR_DAML_nil_URI(rdf_xml_parser);
2555             nil_term = raptor_new_term_from_uri(rdf_parser->world, nil_uri);
2556           } else {
2557             nil_term = raptor_term_copy(RAPTOR_RDF_nil_term(rdf_parser->world));
2558           }
2559 
2560           if(!element->tail_id) {
2561             /* If No List: set object of statement to rdf:nil */
2562             element->object = raptor_term_copy(nil_term);
2563           } else {
2564             raptor_uri* rest_uri = NULL;
2565             raptor_term* tail_id_term;
2566 
2567             if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION)
2568               rest_uri =  RAPTOR_DAML_rest_URI(rdf_xml_parser);
2569             else
2570               rest_uri = RAPTOR_RDF_rest_URI(rdf_parser->world);
2571 
2572             tail_id_term = raptor_new_term_from_blank(rdf_parser->world,
2573                                                       element->tail_id);
2574 
2575             /* terminate the list */
2576             raptor_rdfxml_generate_statement(rdf_parser,
2577                                              tail_id_term,
2578                                              rest_uri,
2579                                              nil_term,
2580                                              NULL,
2581                                              NULL);
2582 
2583             raptor_free_term(tail_id_term);
2584           }
2585 
2586           raptor_free_term(nil_term);
2587 
2588         } /* end rdf:parseType="Collection" termination */
2589 
2590 
2591 #ifdef RAPTOR_DEBUG_VERBOSE
2592         RAPTOR_DEBUG3("Content type %s (%d)\n",
2593                       raptor_rdfxml_element_content_type_as_string(element->content_type),
2594                       element->content_type);
2595 #endif
2596 
2597         switch(element->content_type) {
2598           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE:
2599 
2600             if(raptor_rdfxml_element_has_property_attributes(element) &&
2601                element->child_state == RAPTOR_STATE_DESCRIPTION) {
2602               raptor_parser_error(rdf_parser,
2603                                   "Property element '%s' has both property attributes and a node element content",
2604                                   el_name);
2605               state = RAPTOR_STATE_SKIPPING;
2606               element->child_state = RAPTOR_STATE_SKIPPING;
2607               break;
2608             }
2609 
2610             if(!element->object) {
2611               if(element->rdf_attr[RDF_NS_resource]) {
2612                 raptor_uri* resource_uri;
2613                 resource_uri = raptor_new_uri_relative_to_base(rdf_parser->world,
2614                                                                raptor_rdfxml_inscope_base_uri(rdf_parser),
2615                                                                (const unsigned char*)element->rdf_attr[RDF_NS_resource]);
2616                 if(!resource_uri)
2617                   goto oom;
2618 
2619                 element->object = raptor_new_term_from_uri(rdf_parser->world,
2620                                                            resource_uri);
2621                 raptor_free_uri(resource_uri);
2622 
2623                 RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_resource]);
2624                 element->rdf_attr[RDF_NS_resource] = NULL;
2625                 if(!element->object)
2626                   goto oom;
2627                 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2628               } else if(element->rdf_attr[RDF_NS_nodeID]) {
2629                 unsigned char* resource_id;
2630                 resource_id = raptor_world_internal_generate_id(rdf_parser->world,
2631                                                                 (unsigned char*)element->rdf_attr[RDF_NS_nodeID]);
2632                 if(!resource_id)
2633                   goto oom;
2634 
2635                 element->object = raptor_new_term_from_blank(rdf_parser->world,
2636                                                              resource_id);
2637                 RAPTOR_FREE(char*, resource_id);
2638                 element->rdf_attr[RDF_NS_nodeID] = NULL;
2639                 if(!element->object)
2640                   goto oom;
2641 
2642                 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2643                 if(!raptor_valid_xml_ID(rdf_parser,
2644                                         element->object->value.blank.string)) {
2645                   raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", (const char*)element->object->value.blank.string);
2646                   state = RAPTOR_STATE_SKIPPING;
2647                   element->child_state = RAPTOR_STATE_SKIPPING;
2648                   break;
2649                 }
2650               } else {
2651                 unsigned char* resource_id;
2652                 resource_id = raptor_world_generate_bnodeid(rdf_parser->world);
2653                 if(!resource_id)
2654                   goto oom;
2655 
2656                 element->object = raptor_new_term_from_blank(rdf_parser->world,
2657                                                              resource_id);
2658                 RAPTOR_FREE(char*, resource_id);
2659 
2660                 if(!element->object)
2661                   goto oom;
2662                 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2663               }
2664 
2665               if(raptor_rdfxml_process_property_attributes(rdf_parser, element,
2666                                                            element->parent,
2667                                                            element->object))
2668                  goto oom;
2669 
2670             }
2671 
2672             /* We know object is a resource, so delete any unsignficant
2673              * whitespace so that FALLTHROUGH code below finds the object.
2674              */
2675             if(xml_element->content_cdata_length) {
2676               raptor_free_stringbuffer(xml_element->content_cdata_sb);
2677               xml_element->content_cdata_sb = NULL;
2678               xml_element->content_cdata_length = 0;
2679             }
2680 
2681             /* FALLTHROUGH */
2682           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL:
2683 
2684             if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) {
2685 
2686               if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID)) {
2687                 /* Only an empty literal can have a rdf:bagID */
2688                 if(element->bag) {
2689                   if(xml_element->content_cdata_length > 0) {
2690                     raptor_parser_error(rdf_parser,
2691                                         "rdf:bagID is forbidden on a literal property element '%s'.",
2692                                         el_name);
2693 
2694                     /* prevent this being used later either */
2695                     element->rdf_attr[RDF_NS_bagID] = NULL;
2696                   } else {
2697                     raptor_rdfxml_generate_statement(rdf_parser,
2698                                                      element->bag,
2699                                                      RAPTOR_RDF_type_URI(rdf_parser->world),
2700                                                      RAPTOR_RDF_Bag_term(rdf_parser->world),
2701                                                      NULL,
2702                                                      NULL);
2703                   }
2704                 }
2705               } /* if rdf:bagID */
2706 
2707               /* If there is empty literal content with properties
2708                * generate a node to hang properties off
2709                */
2710               if(raptor_rdfxml_element_has_property_attributes(element) &&
2711                  xml_element->content_cdata_length > 0) {
2712                 raptor_parser_error(rdf_parser,
2713                                     "Literal property element '%s' has property attributes",
2714                                     el_name);
2715                 state = RAPTOR_STATE_SKIPPING;
2716                 element->child_state = RAPTOR_STATE_SKIPPING;
2717                 break;
2718               }
2719 
2720               if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL &&
2721                  raptor_rdfxml_element_has_property_attributes(element) &&
2722                  !element->object) {
2723                 unsigned char* object_id;
2724                 object_id = raptor_world_generate_bnodeid(rdf_parser->world);
2725                 if(!object_id)
2726                   goto oom;
2727 
2728                 element->object = raptor_new_term_from_blank(rdf_parser->world,
2729                                                              object_id);
2730                 RAPTOR_FREE(char*, object_id);
2731 
2732                 if(!element->object)
2733                   goto oom;
2734                 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2735               }
2736 
2737               if(raptor_rdfxml_process_property_attributes(rdf_parser, element,
2738                                                            element,
2739                                                            element->object))
2740                  goto oom;
2741             }
2742 
2743 
2744             /* just be friendly to older compilers and don't declare
2745              * variables in the middle of a block
2746              */
2747             if(1) {
2748               raptor_uri *predicate_uri = NULL;
2749               int predicate_ordinal = -1;
2750               raptor_term* object_term = NULL;
2751 
2752               if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) {
2753                 predicate_ordinal = ++element->parent->last_ordinal;
2754                 predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world,
2755                                                                 predicate_ordinal);
2756 
2757               } else {
2758                 predicate_uri = element_name_uri;
2759               }
2760 
2761 
2762               if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) {
2763                 unsigned char* literal = NULL;
2764                 raptor_uri* literal_datatype;
2765                 unsigned char* literal_language = NULL;
2766 
2767                 /* an empty stringbuffer - empty CDATA - is OK */
2768                 if(raptor_stringbuffer_length(xml_element->content_cdata_sb)) {
2769                   literal = raptor_stringbuffer_as_string(xml_element->content_cdata_sb);
2770                   if(!literal)
2771                     goto oom;
2772                 }
2773 
2774                 literal_datatype = element->object_literal_datatype;
2775                 if(!literal_datatype)
2776                   literal_language = (unsigned char*)raptor_sax2_inscope_xml_language(rdf_xml_parser->sax2);
2777 
2778                 if(!literal_datatype && literal &&
2779                    !raptor_unicode_check_utf8_nfc_string(literal,
2780                                                          xml_element->content_cdata_length,
2781                                                          NULL)) {
2782                   const char *message;
2783                   message = "Property element '%s' has a string not in Unicode Normal Form C: %s";
2784                   raptor_rdfxml_update_document_locator(rdf_parser);
2785                   if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL))
2786                     raptor_parser_error(rdf_parser, message, el_name, literal);
2787                   else
2788                     raptor_parser_warning(rdf_parser, message, el_name, literal);
2789                 }
2790 
2791                 object_term = raptor_new_term_from_literal(rdf_parser->world,
2792                                                            literal,
2793                                                            literal_datatype,
2794                                                            literal_language);
2795               } else {
2796                 object_term = raptor_term_copy(element->object);
2797               }
2798 
2799               raptor_rdfxml_generate_statement(rdf_parser,
2800                                                element->parent->subject,
2801                                                predicate_uri,
2802                                                object_term,
2803                                                element->reified,
2804                                                element->parent);
2805 
2806               if(predicate_ordinal >= 0)
2807                 raptor_free_uri(predicate_uri);
2808 
2809               raptor_free_term(object_term);
2810             }
2811 
2812             break;
2813 
2814         case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED:
2815         case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL:
2816             {
2817               unsigned char *buffer;
2818               size_t length;
2819               raptor_term* xmlliteral_term = NULL;
2820 
2821               if(rdf_xml_parser->xml_writer) {
2822                 raptor_xml_writer_flush(rdf_xml_parser->xml_writer);
2823 
2824                 raptor_free_iostream(rdf_xml_parser->iostream);
2825                 rdf_xml_parser->iostream = NULL;
2826 
2827                 buffer = (unsigned char*)rdf_xml_parser->xml_content;
2828                 length = rdf_xml_parser->xml_content_length;
2829               } else {
2830                 buffer = raptor_stringbuffer_as_string(xml_element->content_cdata_sb);
2831                 length = xml_element->content_cdata_length;
2832               }
2833 
2834               if(!raptor_unicode_check_utf8_nfc_string(buffer, length, NULL)) {
2835                 const char *message;
2836                 message = "Property element '%s' has XML literal content not in Unicode Normal Form C: %s";
2837                 raptor_rdfxml_update_document_locator(rdf_parser);
2838                 if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL))
2839                   raptor_parser_error(rdf_parser, message, el_name, buffer);
2840                 else
2841                   raptor_parser_warning(rdf_parser, message, el_name, buffer);
2842               }
2843 
2844               xmlliteral_term = raptor_new_term_from_literal(rdf_parser->world,
2845                                                              buffer,
2846                                                              RAPTOR_RDF_XMLLiteral_URI(rdf_parser->world),
2847                                                              NULL);
2848 
2849               if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) {
2850                 raptor_uri* predicate_uri;
2851 
2852                 element->parent->last_ordinal++;
2853                 predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world, element->parent->last_ordinal);
2854 
2855                 raptor_rdfxml_generate_statement(rdf_parser,
2856                                                  element->parent->subject,
2857                                                  predicate_uri,
2858                                                  xmlliteral_term,
2859                                                  element->reified,
2860                                                  element->parent);
2861 
2862                 raptor_free_uri(predicate_uri);
2863               } else {
2864                 raptor_rdfxml_generate_statement(rdf_parser,
2865                                                  element->parent->subject,
2866                                                  element_name_uri,
2867                                                  xmlliteral_term,
2868                                                  element->reified,
2869                                                  element->parent);
2870               }
2871 
2872               raptor_free_term(xmlliteral_term);
2873 
2874               /* Finish the xml writer iostream for parseType="Literal" */
2875               if(rdf_xml_parser->xml_writer) {
2876                 raptor_free_xml_writer(rdf_xml_parser->xml_writer);
2877                 rdf_xml_parser->xml_writer = NULL;
2878                 RAPTOR_FREE(char*, rdf_xml_parser->xml_content);
2879                 rdf_xml_parser->xml_content = NULL;
2880                 rdf_xml_parser->xml_content_length = 0;
2881               }
2882             }
2883 
2884           break;
2885 
2886           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION:
2887           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION:
2888 
2889           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES:
2890           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES:
2891           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT:
2892 
2893           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN:
2894           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST:
2895           default:
2896             raptor_parser_fatal_error(rdf_parser,
2897                                       "%s: Internal error in state RAPTOR_STATE_PROPERTYELT - got unexpected content type %s (%d)",
2898                                       __FUNCTION__,
2899                                       raptor_rdfxml_element_content_type_as_string(element->content_type),
2900                                       element->content_type);
2901         } /* end switch */
2902 
2903       finished = 1;
2904       break;
2905 
2906       case RAPTOR_STATE_INVALID:
2907       default:
2908         raptor_parser_fatal_error(rdf_parser,
2909                                   "%s: Internal error - unexpected parser state %d - %s",
2910                                   __FUNCTION__,
2911                                   state,
2912                                   raptor_rdfxml_state_as_string(state));
2913         finished = 1;
2914 
2915     } /* end switch */
2916 
2917     if(state != element->state) {
2918       element->state = state;
2919 #ifdef RAPTOR_DEBUG_VERBOSE
2920       RAPTOR_DEBUG3("Moved to state %d - %s\n", state,
2921                     raptor_rdfxml_state_as_string(state));
2922 #endif
2923     }
2924 
2925   } /* end while */
2926 
2927 #ifdef RAPTOR_DEBUG_VERBOSE
2928   RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
2929 #endif
2930 
2931   return;
2932 
2933   oom:
2934   raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping");
2935   element->state = RAPTOR_STATE_SKIPPING;
2936 }
2937 
2938 
2939 
2940 static void
raptor_rdfxml_cdata_grammar(raptor_parser * rdf_parser,const unsigned char * s,int len,int is_cdata)2941 raptor_rdfxml_cdata_grammar(raptor_parser *rdf_parser,
2942                             const unsigned char *s, int len,
2943                             int is_cdata)
2944 {
2945   raptor_rdfxml_parser* rdf_xml_parser;
2946   raptor_rdfxml_element* element;
2947   raptor_xml_element* xml_element;
2948   raptor_state state;
2949   int all_whitespace = 1;
2950   int i;
2951 
2952   rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
2953 
2954   if(rdf_parser->failed)
2955     return;
2956 
2957 #ifdef RAPTOR_DEBUG_CDATA
2958   RAPTOR_DEBUG2("Adding characters (is_cdata=%d): '", is_cdata);
2959   (void)fwrite(s, 1, len, stderr);
2960   fprintf(stderr, "' (%d bytes)\n", len);
2961 #endif
2962 
2963   for(i = 0; i < len; i++)
2964     if(!isspace(s[i])) {
2965       all_whitespace = 0;
2966       break;
2967     }
2968 
2969   element = rdf_xml_parser->current_element;
2970 
2971   /* this file is very broke - probably not XML, whatever */
2972   if(!element)
2973     return;
2974 
2975   xml_element = element->xml_element;
2976 
2977   raptor_rdfxml_update_document_locator(rdf_parser);
2978 
2979   /* cdata never changes the parser state
2980    * and the containing element state always determines what to do.
2981    * Use the child_state first if there is one, since that applies
2982    */
2983   state = element->child_state;
2984 #ifdef RAPTOR_DEBUG_VERBOSE
2985   RAPTOR_DEBUG2("Working in state %s\n", raptor_rdfxml_state_as_string(state));
2986 #endif
2987 
2988 
2989 #ifdef RAPTOR_DEBUG_VERBOSE
2990   RAPTOR_DEBUG3("Content type %s (%d)\n",
2991                 raptor_rdfxml_element_content_type_as_string(element->content_type),
2992                 element->content_type);
2993 #endif
2994 
2995 
2996 
2997   if(state == RAPTOR_STATE_SKIPPING)
2998     return;
2999 
3000   if(state == RAPTOR_STATE_UNKNOWN) {
3001     /* Ignore all cdata if still looking for RDF */
3002     if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING))
3003       return;
3004 
3005     /* Ignore all whitespace cdata before first element */
3006     if(all_whitespace)
3007       return;
3008 
3009     /* This probably will never happen since that would make the
3010      * XML not be well-formed
3011      */
3012     raptor_parser_warning(rdf_parser, "Character data before RDF element.");
3013   }
3014 
3015 
3016   if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES) {
3017     /* If found non-whitespace content, move to literal content */
3018     if(!all_whitespace)
3019       element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
3020   }
3021 
3022 
3023   if(!rdf_content_type_info[element->child_content_type].whitespace_significant) {
3024 
3025     /* Whitespace is ignored except for literal or preserved content types */
3026     if(all_whitespace) {
3027 #ifdef RAPTOR_DEBUG_CDATA
3028       RAPTOR_DEBUG2("Ignoring whitespace cdata inside element '%s'\n",
3029                     raptor_xml_element_get_name(element->parent->xml_element)->local_name);
3030 #endif
3031       return;
3032     }
3033 
3034     if(xml_element->content_cdata_seen && xml_element->content_element_seen) {
3035       raptor_qname* parent_el_name;
3036 
3037       parent_el_name = raptor_xml_element_get_name(element->parent->xml_element);
3038       /* Uh oh - mixed content, this element has elements too */
3039       raptor_parser_warning(rdf_parser, "element '%s' has mixed content.",
3040                             parent_el_name->local_name);
3041     }
3042   }
3043 
3044 
3045   if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) {
3046     element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
3047 #ifdef RAPTOR_DEBUG_VERBOSE
3048     RAPTOR_DEBUG3("Content type changed to %s (%d)\n",
3049                   raptor_rdfxml_element_content_type_as_string(element->content_type),
3050                   element->content_type);
3051 #endif
3052   }
3053 
3054   if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL)
3055     raptor_xml_writer_cdata_counted(rdf_xml_parser->xml_writer, s, len);
3056   else {
3057     raptor_stringbuffer_append_counted_string(xml_element->content_cdata_sb,
3058                                               s, len, 1);
3059     element->content_cdata_all_whitespace &= all_whitespace;
3060 
3061     /* adjust stored length */
3062     xml_element->content_cdata_length += len;
3063   }
3064 
3065 
3066 #ifdef RAPTOR_DEBUG_CDATA
3067   RAPTOR_DEBUG3("Content cdata now: %d bytes\n",
3068                 xml_element->content_cdata_length);
3069 #endif
3070 #ifdef RAPTOR_DEBUG_VERBOSE
3071   RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
3072 #endif
3073 }
3074 
3075 
3076 
3077 /**
3078  * raptor_rdfxml_inscope_base_uri:
3079  * @rdf_parser: Raptor parser object
3080  *
3081  * Return the in-scope base URI.
3082  *
3083  * Looks for the innermost xml:base on an element or document URI
3084  *
3085  * Return value: The URI string value or NULL on failure.
3086  **/
3087 static raptor_uri*
raptor_rdfxml_inscope_base_uri(raptor_parser * rdf_parser)3088 raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser)
3089 {
3090   raptor_rdfxml_parser* rdf_xml_parser;
3091   raptor_uri* base_uri;
3092 
3093   rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
3094 
3095   base_uri = raptor_sax2_inscope_base_uri(rdf_xml_parser->sax2);
3096   if(!base_uri)
3097     base_uri = rdf_parser->base_uri;
3098 
3099   return base_uri;
3100 }
3101 
3102 
3103 /**
3104  * raptor_rdfxml_record_ID:
3105  * @rdf_parser: Raptor parser object
3106  * @element: Current element
3107  * @id: ID string
3108  *
3109  * Record an rdf:ID / rdf:bagID value (with xml base) and check it hasn't been seen already.
3110  *
3111  * Record and check the ID values, if they have been seen already.
3112  * per in-scope-base URI.
3113  *
3114  * Return value: non-zero if already seen, or failure
3115  **/
3116 static int
raptor_rdfxml_record_ID(raptor_parser * rdf_parser,raptor_rdfxml_element * element,const unsigned char * id)3117 raptor_rdfxml_record_ID(raptor_parser *rdf_parser,
3118                         raptor_rdfxml_element *element,
3119                         const unsigned char *id)
3120 {
3121   raptor_rdfxml_parser *rdf_xml_parser;
3122   raptor_uri* base_uri;
3123   size_t id_len;
3124   int rc;
3125 
3126   rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
3127 
3128   if(!RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_CHECK_RDF_ID))
3129     return 0;
3130 
3131   base_uri = raptor_rdfxml_inscope_base_uri(rdf_parser);
3132 
3133   id_len = strlen((const char*)id);
3134 
3135   rc = raptor_id_set_add(rdf_xml_parser->id_set, base_uri, id, id_len);
3136 
3137   return (rc != 0);
3138 }
3139 
3140 
3141 
3142 static void
raptor_rdfxml_update_document_locator(raptor_parser * rdf_parser)3143 raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser)
3144 {
3145   raptor_rdfxml_parser *rdf_xml_parser;
3146 
3147   rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
3148 
3149   raptor_sax2_update_document_locator(rdf_xml_parser->sax2,
3150                                       &rdf_parser->locator);
3151 }
3152 
3153 
3154 
3155 static void
raptor_rdfxml_parse_finish_factory(raptor_parser_factory * factory)3156 raptor_rdfxml_parse_finish_factory(raptor_parser_factory* factory)
3157 {
3158 }
3159 
3160 
3161 static const char* const rdfxml_names[3] = { "rdfxml", "raptor", NULL};
3162 
3163 static const char* const rdfxml_uri_strings[3] = {
3164   "http://www.w3.org/ns/formats/RDF_XML",
3165   "http://www.w3.org/TR/rdf-syntax-grammar",
3166   NULL
3167 };
3168 
3169 #define RDFXML_TYPES_COUNT 2
3170 static const raptor_type_q rdfxml_types[RDFXML_TYPES_COUNT + 1] = {
3171   { "application/rdf+xml", 19, 10},
3172   { "text/rdf", 8, 6},
3173   { NULL, 0, 0}
3174 };
3175 
3176 static int
raptor_rdfxml_parser_register_factory(raptor_parser_factory * factory)3177 raptor_rdfxml_parser_register_factory(raptor_parser_factory *factory)
3178 {
3179   int rc = 0;
3180 
3181   factory->desc.names = rdfxml_names;
3182 
3183   factory->desc.mime_types = rdfxml_types;
3184 
3185   factory->desc.label = "RDF/XML";
3186   factory->desc.uri_strings = rdfxml_uri_strings;
3187 
3188   factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI;
3189 
3190   factory->context_length     = sizeof(raptor_rdfxml_parser);
3191 
3192   factory->init      = raptor_rdfxml_parse_init;
3193   factory->terminate = raptor_rdfxml_parse_terminate;
3194   factory->start     = raptor_rdfxml_parse_start;
3195   factory->chunk     = raptor_rdfxml_parse_chunk;
3196   factory->finish_factory = raptor_rdfxml_parse_finish_factory;
3197   factory->recognise_syntax = raptor_rdfxml_parse_recognise_syntax;
3198 
3199   return rc;
3200 }
3201 
3202 
3203 int
raptor_init_parser_rdfxml(raptor_world * world)3204 raptor_init_parser_rdfxml(raptor_world* world)
3205 {
3206   return !raptor_world_register_parser_factory(world,
3207                                                &raptor_rdfxml_parser_register_factory);
3208 }
3209 
3210 
3211 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
3212 void
raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser * rdf_xml_parser,FILE * stream)3213 raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser* rdf_xml_parser,
3214                                  FILE *stream)
3215 {
3216   fputs("rdf:ID set ", stream);
3217   raptor_id_set_stats_print(rdf_xml_parser->id_set, stream);
3218 }
3219 #endif
3220