1 /* -*- Mode: c; c-basic-offset: 2 -*-
2  *
3  * raptor_rdfxml.c - Raptor RDF/XML Parser
4  *
5  * Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/
6  * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
7  *
8  * This package is Free Software and part of Redland http://librdf.org/
9  *
10  * It is licensed under the following three licenses as alternatives:
11  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12  *   2. GNU General Public License (GPL) V2 or any newer version
13  *   3. Apache License, V2.0 or any newer version
14  *
15  * You may not use this file except in compliance with at least one of
16  * the above three licenses.
17  *
18  * See LICENSE.html or LICENSE.txt at the top of this package for the
19  * complete terms and further detail along with the license texts for
20  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21  *
22  *
23  */
24 
25 
26 #ifdef HAVE_CONFIG_H
27 #include <raptor_config.h>
28 #endif
29 
30 #ifdef WIN32
31 #include <win32_raptor_config.h>
32 #endif
33 
34 
35 #include <stdio.h>
36 #include <string.h>
37 #include <ctype.h>
38 #include <stdarg.h>
39 #ifdef HAVE_ERRNO_H
40 #include <errno.h>
41 #endif
42 #ifdef HAVE_STDLIB_H
43 #include <stdlib.h>
44 #endif
45 
46 /* Raptor includes */
47 #include "raptor.h"
48 #include "raptor_internal.h"
49 
50 
51 /* Define these for far too much output */
52 #undef RAPTOR_DEBUG_VERBOSE
53 #undef RAPTOR_DEBUG_CDATA
54 
55 
56 /* Raptor structures */
57 
58 typedef enum {
59   /* Catch uninitialised state */
60   RAPTOR_STATE_INVALID = 0,
61 
62   /* Skipping current tree of elements - used to recover finding
63    * illegal content, when parsling permissively.
64    */
65   RAPTOR_STATE_SKIPPING,
66 
67   /* Not in RDF grammar yet - searching for a start element.
68    *
69    * This can be <rdf:RDF> (goto NODE_ELEMENT_LIST) but since it is optional,
70    * the start element can also be one of
71    *   http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementURIs
72    *
73    * If RDF content is assumed, go straight to OBJ
74    */
75   RAPTOR_STATE_UNKNOWN,
76 
77   /* A list of node elements
78    *   http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList
79    */
80   RAPTOR_STATE_NODE_ELEMENT_LIST,
81 
82   /* Found an <rdf:Description> */
83   RAPTOR_STATE_DESCRIPTION,
84 
85   /* Found a property element
86    *   http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
87    */
88   RAPTOR_STATE_PROPERTYELT,
89 
90   /* A property element that is an ordinal - rdf:li, rdf:_n
91    */
92   RAPTOR_STATE_MEMBER_PROPERTYELT,
93 
94   /* Found a node element
95    *   http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement
96    */
97   RAPTOR_STATE_NODE_ELEMENT,
98 
99   /* A property element with rdf:parseType="Literal"
100    *   http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeLiteralPropertyElt
101    */
102   RAPTOR_STATE_PARSETYPE_LITERAL,
103 
104   /* A property element with rdf:parseType="Resource"
105    *   http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt
106    */
107   RAPTOR_STATE_PARSETYPE_RESOURCE,
108 
109   /* A property element with rdf:parseType="Collection"
110    *  http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt
111    *
112    * (This also handles daml:Collection)
113    */
114   RAPTOR_STATE_PARSETYPE_COLLECTION,
115 
116   /* A property element with a rdf:parseType attribute and a value
117    * not "Literal" or "Resource"
118    *   http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt
119    */
120   RAPTOR_STATE_PARSETYPE_OTHER,
121 
122   RAPTOR_STATE_PARSETYPE_LAST = RAPTOR_STATE_PARSETYPE_OTHER
123 
124 
125 } raptor_state;
126 
127 
128 static const char * const raptor_state_names[RAPTOR_STATE_PARSETYPE_LAST+2]={
129   "INVALID",
130   "SKIPPING",
131   "UNKNOWN",
132   "nodeElementList",
133   "propertyElt",
134   "Description",
135   "propertyElt",
136   "memberPropertyElt",
137   "nodeElement",
138   "parseTypeLiteral",
139   "parseTypeResource",
140   "parseTypeCollection",
141   "parseTypeOther"
142 };
143 
144 
raptor_rdfxml_state_as_string(raptor_state state)145 static const char * raptor_rdfxml_state_as_string(raptor_state state)
146 {
147   if(state<1 || state > RAPTOR_STATE_PARSETYPE_LAST)
148     state=(raptor_state)0;
149   return raptor_state_names[(int)state];
150 }
151 
152 
153 /*
154  * RDF/XML syntax terms, properties and classes.
155  * Must match names in rdf_syntax_terms_info below.
156  */
157 typedef enum {
158   RDF_ATTR_RDF             = 0,
159   RDF_ATTR_Description     = 1,
160   RDF_ATTR_li              = 2,
161   RDF_ATTR_about           = 3, /* value of rdf:about attribute */
162   RDF_ATTR_aboutEach       = 4, /* " rdf:aboutEach */
163   RDF_ATTR_aboutEachPrefix = 5, /* " rdf:aboutEachPrefix */
164   RDF_ATTR_ID              = 6, /* " rdf:ID */
165   RDF_ATTR_bagID           = 7, /* " rdf:bagID */
166   RDF_ATTR_resource        = 8, /* " rdf:resource */
167   RDF_ATTR_parseType       = 9, /* " rdf:parseType */
168   RDF_ATTR_nodeID          = 10, /* " rdf:nodeID */
169   RDF_ATTR_datatype        = 11, /* " rdf:datatype */
170   /* rdf:Property-s */
171   RDF_ATTR_type            = 12, /* " rdf:type -- a property in RDF Model */
172   RDF_ATTR_value           = 13, /* " rdf:value -- a property in RDF model */
173   RDF_ATTR_subject         = 14, /* " rdf:subject -- a property in RDF model */
174   RDF_ATTR_predicate       = 15, /* " rdf:predicate -- a property in RDF model */
175   RDF_ATTR_object          = 16, /* " rdf:object -- a property in RDF model */
176   RDF_ATTR_first           = 17, /* " rdf:first -- a property in RDF model */
177   RDF_ATTR_rest            = 18, /* " rdf:rest -- a property in RDF model */
178   /* rdfs:Class-s */
179   RDF_ATTR_Seq             = 19, /* " rdf:Seq -- a class in RDF Model */
180   RDF_ATTR_Bag             = 20, /* " rdf:Bag -- a class in RDF model */
181   RDF_ATTR_Alt             = 21, /* " rdf:Alt -- a class in RDF model */
182   RDF_ATTR_Statement       = 22, /* " rdf:Statement -- a class in RDF model */
183   RDF_ATTR_Property        = 23, /* " rdf:Property -- a class in RDF model */
184   RDF_ATTR_List            = 24, /* " rdf:List -- a class in RDF model */
185   RDF_ATTR_XMLLiteral      = 25, /* " rdf:XMLLiteral - a cless in RDF graph */
186   /* rdfs:Resource-s */
187   RDF_ATTR_nil             = 26, /* " rdf:nil -- a resource in RDF graph */
188 
189   RDF_ATTR_LAST            = RDF_ATTR_nil
190 } rdf_attr;
191 
192 
193 /*
194  * http://www.w3.org/TR/rdf-syntax-grammar/#section-grammar-summary
195  *
196  * coreSyntaxTerms := rdf:RDF | rdf:ID | rdf:about | rdf:bagID |
197                       rdf:parseType | rdf:resource | rdf:nodeID | rdf:datatype
198  * syntaxTerms     := coreSyntaxTerms | rdf:Description | rdf:li
199  * oldTerms        := rdf:aboutEach | rdf:aboutEachPrefix | rdf:bagID
200  *
201  * nodeElementURIs       := anyURI - ( coreSyntaxTerms | rdf:li | oldTerms )
202  * propertyElementURIs   := anyURI - ( coreSyntaxTerms | rdf:Description | oldTerms )
203  * propertyAttributeURIs := anyURI - ( coreSyntaxTerms | rdf:Description | rdf:li | oldTerms )
204  *
205  * So, forbidden terms in the RDF namespace are:
206  * nodeElements
207  *   RDF | ID | about | bagID | parseType | resource | nodeID | datatype |
208  *   li | aboutEach | aboutEachPrefix | bagID
209  *
210  * propertyElements
211  *   RDF | ID | about | bagID | parseType | resource | nodeID | datatype |
212  *   Description | aboutEach | aboutEachPrefix | bagID
213  *
214  * propertyAttributes
215  *   RDF | ID | about | bagID | parseType | resource | nodeID | datatype |
216  *   Description | li | aboutEach | aboutEachPrefix | bagID
217  *
218  * Information about rdf attributes:
219  *   raptor_identifier_type type
220  *     Set when the attribute is a property rather than just syntax
221  *     NOTE: raptor_rdfxml_process_property_attributes() expects only
222  *      RAPTOR_IDENTIFIER_TYPE_NONE,
223  *       RAPTOR_IDENTIFIER_TYPE_LITERAL or RAPTOR_IDENTIFIER_TYPE_RESOURCE
224  *   allowed_unprefixed_on_attribute
225  *     If allowed for legacy reasons to be unprefixed as an attribute.
226  *
227  */
228 
229 static const struct {
230   const char *name;            /* term name */
231   int forbidden_as_nodeElement;
232   int forbidden_as_propertyElement;
233   int forbidden_as_propertyAttribute;
234   raptor_identifier_type type;  /* statement value */
235   int allowed_unprefixed_on_attribute;
236 } rdf_syntax_terms_info[]={
237   /* syntax only */
238   { "RDF",             1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
239   { "Description",     0, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
240   { "li",              1, 0, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
241   { "about",           1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
242   { "aboutEach",       1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
243   { "aboutEachPrefix", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
244   { "ID",              1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
245   { "bagID",           1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
246   { "resource",        1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
247   { "parseType",       1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
248   { "nodeID",          1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
249   { "datatype",        1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
250   /* rdf:Property-s */
251   { "type",            0, 0, 0, RAPTOR_IDENTIFIER_TYPE_RESOURCE, 1 },
252   { "value",           0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
253   { "subject",         0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
254   { "predicate",       0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
255   { "object",          0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
256   { "first",           0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
257   { "rest",            0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
258   /* rdfs:Class-s */
259   { "Seq",             0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
260   { "Bag",             0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
261   { "Alt",             0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
262   { "Statement",       0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
263   { "Property",        0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
264   { "List",            0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
265   { "XMLLiteral",      0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
266   /* rdfs:Resource-s */
267   { "nil",             0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
268   { NULL ,             0, 0, 0, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 }
269 };
270 
271 
272 static int
raptor_rdfxml_forbidden_nodeElement_name(const char * name)273 raptor_rdfxml_forbidden_nodeElement_name(const char *name)
274 {
275   int i;
276 
277   if(*name == '_')
278     return 0;
279 
280   for(i=0; rdf_syntax_terms_info[i].name; i++)
281     if(!strcmp(rdf_syntax_terms_info[i].name, name))
282       return rdf_syntax_terms_info[i].forbidden_as_nodeElement;
283 
284   return -1;
285 }
286 
287 
288 static int
raptor_rdfxml_forbidden_propertyElement_name(const char * name)289 raptor_rdfxml_forbidden_propertyElement_name(const char *name)
290 {
291   int i;
292 
293   if(*name == '_')
294     return 0;
295 
296   for(i=0; rdf_syntax_terms_info[i].name; i++)
297     if(!strcmp(rdf_syntax_terms_info[i].name, (const char*)name))
298       return rdf_syntax_terms_info[i].forbidden_as_propertyElement;
299 
300   return -1;
301 }
302 
303 
304 static int
raptor_rdfxml_forbidden_propertyAttribute_name(const char * name)305 raptor_rdfxml_forbidden_propertyAttribute_name(const char *name)
306 {
307   int i;
308 
309   if(*name == '_')
310     return 0;
311 
312   for(i=0; rdf_syntax_terms_info[i].name; i++)
313     if(!strcmp(rdf_syntax_terms_info[i].name, (const char*)name))
314       return rdf_syntax_terms_info[i].forbidden_as_propertyAttribute;
315 
316   return -1;
317 }
318 
319 
320 typedef enum {
321   /* undetermined yet - whitespace is stored */
322   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN,
323 
324   /* literal content - no elements, cdata allowed, whitespace significant
325    * <propElement> blah </propElement>
326    */
327   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL,
328 
329   /* parseType literal content (WF XML) - all content preserved
330    * <propElement rdf:parseType="Literal"><em>blah</em></propElement>
331    */
332   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL,
333 
334   /* top-level nodes - 0+ elements expected, no cdata, whitespace ignored,
335    * any non-whitespace cdata is error
336    * only used for <rdf:RDF> or implict <rdf:RDF>
337    */
338   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES,
339 
340   /* properties - 0+ elements expected, no cdata, whitespace ignored,
341    * any non-whitespace cdata is error
342    * <nodeElement><prop1>blah</prop1> <prop2>blah</prop2> </nodeElement>
343    */
344   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES,
345 
346   /* property content - all content preserved
347    * any content type changes when first non-whitespace found
348    * <propElement>...
349    */
350   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT,
351 
352   /* resource URI given - no element, no cdata, whitespace ignored,
353    * any non-whitespace cdata is error
354    * <propElement rdf:resource="uri"/>
355    * <propElement rdf:resource="uri"></propElement>
356    */
357   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE,
358 
359   /* skipping content - all content is preserved
360    * Used when skipping content for unknown parseType-s,
361    * error recovery, some other reason
362    */
363   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED,
364 
365   /* parseType Collection - all content preserved
366    * Parsing of this determined by RDF/XML (Revised) closed collection rules
367    * <propElement rdf:parseType="Collection">...</propElement>
368    */
369   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION,
370 
371   /* Like above but handles "daml:collection" */
372   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION,
373 
374   /* dummy for use in strings below */
375   RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST
376 
377 } raptor_rdfxml_element_content_type;
378 
379 
380 static const struct {
381   const char * name;
382   int whitespace_significant;
383   /* non-blank cdata */
384   int cdata_allowed;
385   /* XML element content */
386   int element_allowed;
387   /* Do RDF-specific processing? (property attributes, rdf: attributes, ...) */
388   int rdf_processing;
389 } rdf_content_type_info[RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST]={
390   {"Unknown",         1, 1, 1, 0 },
391   {"Literal",         1, 1, 0, 0 },
392   {"XML Literal",     1, 1, 1, 0 },
393   {"Nodes",           0, 0, 1, 1 },
394   {"Properties",      0, 1, 1, 1 },
395   {"Property Content",1, 1, 1, 1 },
396   {"Resource",        0, 0, 0, 0 },
397   {"Preserved",       1, 1, 1, 0 },
398   {"Collection",      1, 1, 1, 1 },
399   {"DAML Collection", 1, 1, 1, 1 },
400 };
401 
402 
403 
404 static const char *
raptor_rdfxml_element_content_type_as_string(raptor_rdfxml_element_content_type type)405 raptor_rdfxml_element_content_type_as_string(raptor_rdfxml_element_content_type type)
406 {
407   if(type > RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST)
408     return "INVALID";
409   return rdf_content_type_info[type].name;
410 }
411 
412 
413 
414 
415 
416 /*
417  * Raptor Element/attributes on stack
418  */
419 struct raptor_rdfxml_element_s {
420   raptor_world* world;
421 
422   raptor_xml_element *xml_element;
423 
424   /* NULL at bottom of stack */
425   struct raptor_rdfxml_element_s *parent;
426 
427   /* attributes declared in M&S */
428   const unsigned char * rdf_attr[RDF_ATTR_LAST+1];
429   /* how many of above seen */
430   int rdf_attr_count;
431 
432   /* state that this production matches */
433   raptor_state state;
434 
435   /* how to handle the content inside this XML element */
436   raptor_rdfxml_element_content_type content_type;
437 
438 
439   /* starting state for children of this element */
440   raptor_state child_state;
441 
442   /* starting content type for children of this element */
443   raptor_rdfxml_element_content_type child_content_type;
444 
445 
446   /* STATIC Reified statement identifier */
447   raptor_identifier reified;
448 
449   /* STATIC Bag identifier */
450   raptor_identifier bag;
451   int last_bag_ordinal; /* starts at 0, so first predicate is rdf:_1 */
452 
453   /* STATIC Subject identifier (URI/anon ID), type, source
454    *
455    * When the XML element represents a node, this is the identifier
456    */
457   raptor_identifier subject;
458 
459   /* STATIC Predicate URI, source is either
460    * RAPTOR_URI_SOURCE_ELEMENT or RAPTOR_URI_SOURCE_ATTRIBUTE
461    *
462    * When the XML element represents a node or predicate,
463    * this is the identifier of the predicate
464    */
465   raptor_identifier predicate;
466 
467   /* STATIC Object identifier (URI/anon ID), type, source
468    *
469    * When this XML element generates a statement that needs an object,
470    * possibly from a child element, this is the identifier of the object
471    */
472   raptor_identifier object;
473 
474   /* URI of datatype of literal */
475   raptor_uri *object_literal_datatype;
476 
477   /* last ordinal used, so initialising to 0 works, emitting rdf:_1 first */
478   int last_ordinal;
479 
480   /* If this element's parseType is a Collection
481    * this identifies the anon node of current tail of the collection(list).
482    */
483   const unsigned char *tail_id;
484 
485   /* RDF/XML specific checks */
486 
487   /* all cdata so far is whitespace */
488   unsigned int content_cdata_all_whitespace;
489 };
490 
491 typedef struct raptor_rdfxml_element_s raptor_rdfxml_element;
492 
493 
494 #define RAPTOR_RDFXML_N_CONCEPTS 22
495 
496 /*
497  * Raptor parser object
498  */
499 struct raptor_rdfxml_parser_s {
500   raptor_sax2 *sax2;
501 
502   /* stack of elements - elements add after current_element */
503   raptor_rdfxml_element *root_element;
504   raptor_rdfxml_element *current_element;
505 
506   raptor_uri* concepts[RAPTOR_RDFXML_N_CONCEPTS];
507 
508   /* set of seen rdf:ID / rdf:bagID values (with in-scope base URI) */
509   raptor_id_set* id_set;
510 
511   void *xml_content;
512   size_t xml_content_length;
513   raptor_iostream* iostream;
514 
515   /* writer for building parseType="Literal" content */
516   raptor_xml_writer* xml_writer;
517 };
518 
519 
520 
521 
522 /* static variables */
523 
524 #define RAPTOR_RDF_type_URI(rdf_xml_parser)      rdf_xml_parser->concepts[0]
525 #define RAPTOR_RDF_value_URI(rdf_xml_parser)     rdf_xml_parser->concepts[1]
526 #define RAPTOR_RDF_subject_URI(rdf_xml_parser)   rdf_xml_parser->concepts[2]
527 #define RAPTOR_RDF_predicate_URI(rdf_xml_parser) rdf_xml_parser->concepts[3]
528 #define RAPTOR_RDF_object_URI(rdf_xml_parser)    rdf_xml_parser->concepts[4]
529 #define RAPTOR_RDF_Statement_URI(rdf_xml_parser) rdf_xml_parser->concepts[5]
530 
531 #define RAPTOR_RDF_Seq_URI(rdf_xml_parser) rdf_xml_parser->concepts[6]
532 #define RAPTOR_RDF_Bag_URI(rdf_xml_parser) rdf_xml_parser->concepts[7]
533 #define RAPTOR_RDF_Alt_URI(rdf_xml_parser) rdf_xml_parser->concepts[8]
534 
535 #define RAPTOR_RDF_List_URI(rdf_xml_parser)  rdf_xml_parser->concepts[9]
536 #define RAPTOR_RDF_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[10]
537 #define RAPTOR_RDF_rest_URI(rdf_xml_parser)  rdf_xml_parser->concepts[11]
538 #define RAPTOR_RDF_nil_URI(rdf_xml_parser)   rdf_xml_parser->concepts[12]
539 
540 #define RAPTOR_DAML_NS_URI(rdf_xml_parser)   rdf_xml_parser->concepts[13]
541 
542 #define RAPTOR_DAML_List_URI(rdf_xml_parser)  rdf_xml_parser->concepts[14]
543 #define RAPTOR_DAML_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[15]
544 #define RAPTOR_DAML_rest_URI(rdf_xml_parser)  rdf_xml_parser->concepts[16]
545 #define RAPTOR_DAML_nil_URI(rdf_xml_parser)   rdf_xml_parser->concepts[17]
546 
547 #define RAPTOR_RDF_RDF_URI(rdf_xml_parser)         rdf_xml_parser->concepts[18]
548 #define RAPTOR_RDF_Description_URI(rdf_xml_parser) rdf_xml_parser->concepts[19]
549 #define RAPTOR_RDF_li_URI(rdf_xml_parser)          rdf_xml_parser->concepts[20]
550 
551 #define RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser)  rdf_xml_parser->concepts[21]
552 
553 /* RAPTOR_RDFXML_N_CONCEPTS defines size of array */
554 
555 
556 /* prototypes for element functions */
557 static raptor_rdfxml_element* raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_parser);
558 static void raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_parser, raptor_rdfxml_element* element);
559 
560 static int raptor_rdfxml_record_ID(raptor_parser *rdf_parser, raptor_rdfxml_element *element, const unsigned char *id);
561 
562 /* prototypes for grammar functions */
563 static void raptor_rdfxml_start_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element);
564 static void raptor_rdfxml_end_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element);
565 static void raptor_rdfxml_cdata_grammar(raptor_parser *parser, const unsigned char *s, int len, int is_cdata);
566 
567 
568 /* prototype for statement related functions */
569 static void raptor_rdfxml_generate_statement(raptor_parser *rdf_parser, raptor_uri *subject_uri, const unsigned char *subject_id, const raptor_identifier_type subject_type, const raptor_uri_source subject_uri_source, raptor_uri *predicate_uri, const unsigned char *predicate_id, const raptor_identifier_type predicate_type, const raptor_uri_source predicate_uri_source, int predicate_ordinal, raptor_uri *object_uri, const unsigned char *object_id, const raptor_identifier_type object_type, const raptor_uri_source object_uri_source, raptor_uri *literal_datatype, raptor_identifier *reified, raptor_rdfxml_element *bag_element);
570 
571 
572 
573 /* Prototypes for parsing data functions */
574 static int raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name);
575 static void raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser);
576 static int raptor_rdfxml_parse_start(raptor_parser* rdf_parser);
577 static int raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer, size_t len, int is_end);
578 static void raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser);
579 
580 static raptor_uri* raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser);
581 
582 
583 static raptor_rdfxml_element*
raptor_rdfxml_element_pop(raptor_rdfxml_parser * rdf_xml_parser)584 raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_xml_parser)
585 {
586   raptor_rdfxml_element *element=rdf_xml_parser->current_element;
587 
588   if(!element)
589     return NULL;
590 
591   rdf_xml_parser->current_element=element->parent;
592   if(rdf_xml_parser->root_element == element) /* just deleted root */
593     rdf_xml_parser->root_element=NULL;
594 
595   return element;
596 }
597 
598 
599 static void
raptor_rdfxml_element_push(raptor_rdfxml_parser * rdf_xml_parser,raptor_rdfxml_element * element)600 raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_xml_parser, raptor_rdfxml_element* element)
601 {
602   element->parent=rdf_xml_parser->current_element;
603   rdf_xml_parser->current_element=element;
604   if(!rdf_xml_parser->root_element)
605     rdf_xml_parser->root_element=element;
606 }
607 
608 
609 static void
raptor_free_rdfxml_element(raptor_rdfxml_element * element)610 raptor_free_rdfxml_element(raptor_rdfxml_element *element)
611 {
612   int i;
613 
614   /* Free special RDF M&S attributes */
615   for(i=0; i<= RDF_ATTR_LAST; i++)
616     if(element->rdf_attr[i])
617       RAPTOR_FREE(cstring, (void*)element->rdf_attr[i]);
618 
619   raptor_free_identifier(&element->subject);
620   raptor_free_identifier(&element->predicate);
621   raptor_free_identifier(&element->object);
622   raptor_free_identifier(&element->bag);
623   raptor_free_identifier(&element->reified);
624 
625   if(element->tail_id)
626     RAPTOR_FREE(cstring, (char*)element->tail_id);
627   if(element->object_literal_datatype)
628     raptor_free_uri_v2(element->world, element->object_literal_datatype);
629 
630   RAPTOR_FREE(raptor_rdfxml_element, element);
631 }
632 
633 
634 static void
raptor_rdfxml_sax2_new_namespace_handler(void * user_data,raptor_namespace * nspace)635 raptor_rdfxml_sax2_new_namespace_handler(void *user_data,
636                                          raptor_namespace* nspace)
637 {
638   raptor_parser* rdf_parser;
639   const unsigned char* namespace_name;
640   size_t namespace_name_len;
641   raptor_uri* uri=raptor_namespace_get_uri(nspace);
642 
643   rdf_parser=(raptor_parser*)user_data;
644   raptor_parser_start_namespace(rdf_parser, nspace);
645 
646   if(!uri)
647     return;
648 
649   namespace_name=raptor_uri_as_counted_string_v2(nspace->nstack->world, uri, &namespace_name_len);
650 
651   if(namespace_name_len == raptor_rdf_namespace_uri_len-1 &&
652      !strncmp((const char*)namespace_name,
653               (const char*)raptor_rdf_namespace_uri,
654               namespace_name_len)) {
655     const unsigned char *prefix=raptor_namespace_get_prefix(nspace);
656     raptor_parser_warning(rdf_parser, "Declaring a namespace with prefix %s to URI %s - one letter short of the RDF namespace URI and probably a mistake.", prefix, namespace_name);
657   }
658 
659   if(namespace_name_len > raptor_rdf_namespace_uri_len &&
660      !strncmp((const char*)namespace_name,
661               (const char*)raptor_rdf_namespace_uri,
662               raptor_rdf_namespace_uri_len)) {
663     raptor_parser_error(rdf_parser, "Declaring a namespace URI %s to which the RDF namespace URI is a prefix is forbidden.", namespace_name);
664   }
665 }
666 
667 
668 
669 static void
raptor_rdfxml_start_element_handler(void * user_data,raptor_xml_element * xml_element)670 raptor_rdfxml_start_element_handler(void *user_data,
671                                     raptor_xml_element* xml_element)
672 {
673   raptor_parser* rdf_parser;
674   raptor_rdfxml_parser* rdf_xml_parser;
675   raptor_rdfxml_element* element;
676   int ns_attributes_count=0;
677   raptor_qname** named_attrs=NULL;
678   int i;
679   int count_bumped=0;
680 
681   rdf_parser=(raptor_parser*)user_data;
682   rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
683 
684   if(rdf_parser->failed)
685     return;
686 
687   raptor_rdfxml_update_document_locator(rdf_parser);
688 
689   /* Create new element structure */
690   element=(raptor_rdfxml_element*)RAPTOR_CALLOC(raptor_rdfxml_element, 1,
691                                          sizeof(raptor_rdfxml_element));
692   if(!element) {
693     raptor_parser_fatal_error(rdf_parser, "Out of memory");
694     rdf_parser->failed=1;
695     return;
696   }
697   element->world=rdf_parser->world;
698   element->xml_element=xml_element;
699 
700   /* init world fields in identifiers not created with raptor_new_identifier() */
701   element->reified.world=
702     element->bag.world=
703     element->subject.world=
704     element->predicate.world=
705     element->object.world=
706     rdf_parser->world;
707 
708   raptor_rdfxml_element_push(rdf_xml_parser, element);
709 
710   named_attrs=raptor_xml_element_get_attributes(xml_element);
711   ns_attributes_count=raptor_xml_element_get_attributes_count(xml_element);
712 
713   /* RDF-specific processing of attributes */
714   if(ns_attributes_count) {
715     raptor_qname** new_named_attrs;
716     int offset = 0;
717     raptor_rdfxml_element* parent_element;
718 
719     parent_element=element->parent;
720 
721     /* Allocate new array to move namespaced-attributes to if
722      * rdf processing is performed
723      */
724     new_named_attrs=(raptor_qname**)RAPTOR_CALLOC(raptor_qname_array,
725                                                   ns_attributes_count,
726                                                   sizeof(raptor_qname*));
727     if(!new_named_attrs) {
728       raptor_parser_fatal_error(rdf_parser, "Out of memory");
729       rdf_parser->failed=1;
730       return;
731     }
732 
733     for (i = 0; i < ns_attributes_count; i++) {
734       raptor_qname* attr=named_attrs[i];
735 
736       /* If:
737        *  1 We are handling RDF content and RDF processing is allowed on
738        *    this element
739        * OR
740        *  2 We are not handling RDF content and
741        *    this element is at the top level (top level Desc. / typedNode)
742        *    i.e. we have no parent
743        * then handle the RDF attributes
744        */
745       if((parent_element &&
746           rdf_content_type_info[parent_element->child_content_type].rdf_processing) ||
747          !parent_element) {
748 
749         /* Save pointers to some RDF M&S attributes */
750 
751         /* If RDF namespace-prefixed attributes */
752         if(attr->nspace && attr->nspace->is_rdf_ms) {
753           const unsigned char *attr_name=attr->local_name;
754           int j;
755 
756           for(j=0; j<= RDF_ATTR_LAST; j++)
757             if(!strcmp((const char*)attr_name, rdf_syntax_terms_info[j].name)) {
758               element->rdf_attr[j]=attr->value;
759               element->rdf_attr_count++;
760               /* Delete it if it was stored elsewhere */
761 #ifdef RAPTOR_DEBUG_VERBOSE
762               RAPTOR_DEBUG3("Found RDF namespace attribute '%s' URI %s\n", (char*)attr_name, attr->value);
763 #endif
764               /* make sure value isn't deleted from qname structure */
765               attr->value=NULL;
766               raptor_free_qname(attr);
767               attr=NULL;
768               break;
769             }
770         } /* end if RDF namespaced-prefixed attributes */
771 
772         if(!attr)
773           continue;
774 
775         /* If non namespace-prefixed RDF attributes found on an element */
776         if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES] &&
777            !attr->nspace) {
778           const unsigned char *attr_name=attr->local_name;
779           int j;
780 
781           for(j=0; j<= RDF_ATTR_LAST; j++)
782             if(!strcmp((const char*)attr_name, rdf_syntax_terms_info[j].name)) {
783               element->rdf_attr[j]=attr->value;
784               element->rdf_attr_count++;
785               if(!rdf_syntax_terms_info[j].allowed_unprefixed_on_attribute)
786                 raptor_parser_warning(rdf_parser, "Using rdf attribute '%s' without the RDF namespace has been deprecated.", attr_name);
787               /* Delete it if it was stored elsewhere */
788               /* make sure value isn't deleted from qname structure */
789               attr->value=NULL;
790               raptor_free_qname(attr);
791               attr=NULL;
792               break;
793             }
794         } /* end if non-namespace prefixed RDF attributes */
795 
796         if(!attr)
797           continue;
798 
799       } /* end if leave literal XML alone */
800 
801       if(attr)
802         new_named_attrs[offset++]=attr;
803     }
804 
805     /* new attribute count is set from attributes that haven't been skipped */
806     ns_attributes_count=offset;
807     if(!ns_attributes_count) {
808       /* all attributes were deleted so delete the new array */
809       RAPTOR_FREE(raptor_qname_array, new_named_attrs);
810       new_named_attrs=NULL;
811     }
812 
813     RAPTOR_FREE(raptor_qname_array, named_attrs);
814     named_attrs=new_named_attrs;
815     raptor_xml_element_set_attributes(xml_element,
816                                       named_attrs, ns_attributes_count);
817   } /* end if ns_attributes_count */
818 
819 
820   /* start from unknown; if we have a parent, it may set this */
821   element->state=RAPTOR_STATE_UNKNOWN;
822   element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN;
823 
824   if(element->parent &&
825      element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN) {
826     element->content_type=element->parent->child_content_type;
827 
828     if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE &&
829        element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
830        element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
831       /* If parent has an rdf:resource, this element should not be here */
832       raptor_parser_error(rdf_parser, "property element '%s' has multiple object node elements, skipping.",
833                             raptor_xml_element_get_name(element->parent->xml_element)->local_name);
834       element->state=RAPTOR_STATE_SKIPPING;
835       element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
836 
837     } else {
838       if(!element->parent->child_state) {
839         raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_start_element_handler: no parent element child_state set");
840         return;
841       }
842 
843       element->state=element->parent->child_state;
844       element->parent->xml_element->content_element_seen++;
845       count_bumped++;
846 
847       /* leave literal XML alone */
848       if (!rdf_content_type_info[element->content_type].cdata_allowed) {
849         if(element->parent->xml_element->content_element_seen &&
850            element->parent->xml_element->content_cdata_seen) {
851           /* Uh oh - mixed content, the parent element has cdata too */
852           raptor_parser_warning(rdf_parser, "element '%s' has mixed content.",
853                                 raptor_xml_element_get_name(element->parent->xml_element)->local_name);
854         }
855 
856         /* If there is some existing all-whitespace content cdata
857          * before this node element, delete it
858          */
859         if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES &&
860            element->parent->xml_element->content_element_seen &&
861            element->parent->content_cdata_all_whitespace &&
862            element->parent->xml_element->content_cdata_length) {
863 
864           element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
865 
866           raptor_free_stringbuffer(element->parent->xml_element->content_cdata_sb);
867           element->parent->xml_element->content_cdata_sb=NULL;
868           element->parent->xml_element->content_cdata_length=0;
869         }
870 
871       } /* end if leave literal XML alone */
872 
873     } /* end if parent has no rdf:resource */
874 
875   } /* end if element->parent */
876 
877 
878 #ifdef RAPTOR_DEBUG_VERBOSE
879   RAPTOR_DEBUG2("Using content type %s\n", rdf_content_type_info[element->content_type].name);
880 
881   fprintf(stderr, "raptor_rdfxml_start_element_handler: Start ns-element: ");
882   raptor_print_xml_element(xml_element, stderr);
883 #endif
884 
885 
886   /* Check for non namespaced stuff when not in a parseType literal, other */
887   if (rdf_content_type_info[element->content_type].rdf_processing) {
888 
889     /* The element */
890     /* If has no namespace or the namespace has no name (xmlns="") */
891     if(!raptor_xml_element_get_name(xml_element)->nspace ||
892        (raptor_xml_element_get_name(xml_element)->nspace &&
893         !raptor_namespace_get_uri(raptor_xml_element_get_name(xml_element)->nspace))) {
894       raptor_parser_error(rdf_parser, "Using an element '%s' without a namespace is forbidden.",
895                           raptor_xml_element_get_name(element->parent->xml_element)->local_name);
896       element->state=RAPTOR_STATE_SKIPPING;
897       /* Remove count above so that parent thinks this is empty */
898       if(count_bumped)
899         element->parent->xml_element->content_element_seen--;
900       element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
901     }
902 
903 
904     /* Check for any remaining non-namespaced attributes */
905     if (named_attrs) {
906       for(i=0; i < ns_attributes_count; i++) {
907         raptor_qname *attr=named_attrs[i];
908         /* Check if any attributes are non-namespaced */
909         if(!attr->nspace ||
910            (attr->nspace && !raptor_namespace_get_uri(attr->nspace))) {
911           raptor_parser_error(rdf_parser, "Using an attribute '%s' without a namespace is forbidden.", attr->local_name);
912           raptor_free_qname(attr);
913           named_attrs[i]=NULL;
914         }
915       }
916     }
917   }
918 
919 
920   if (element->rdf_attr[RDF_ATTR_aboutEach] ||
921       element->rdf_attr[RDF_ATTR_aboutEachPrefix]) {
922     raptor_parser_warning(rdf_parser, "element '%s' has aboutEach / aboutEachPrefix, skipping.",
923                           raptor_xml_element_get_name(xml_element)->local_name);
924     element->state=RAPTOR_STATE_SKIPPING;
925     /* Remove count above so that parent thinks this is empty */
926     if(count_bumped)
927       element->parent->xml_element->content_element_seen--;
928     element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
929   }
930 
931   /* Right, now ready to enter the grammar */
932   raptor_rdfxml_start_element_grammar(rdf_parser, element);
933 
934   return;
935 }
936 
937 
938 static void
raptor_rdfxml_end_element_handler(void * user_data,raptor_xml_element * xml_element)939 raptor_rdfxml_end_element_handler(void *user_data,
940                                   raptor_xml_element* xml_element)
941 {
942   raptor_parser* rdf_parser;
943   raptor_rdfxml_parser* rdf_xml_parser;
944   raptor_rdfxml_element* element;
945 
946   rdf_parser=(raptor_parser*)user_data;
947   rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
948 
949   if(!rdf_parser->failed) {
950     raptor_rdfxml_update_document_locator(rdf_parser);
951 
952     raptor_rdfxml_end_element_grammar(rdf_parser, rdf_xml_parser->current_element);
953   }
954 
955   element=raptor_rdfxml_element_pop(rdf_xml_parser);
956   if(element) {
957     if(element->parent) {
958       /* Do not change this; PROPERTYELT will turn into MEMBER if necessary
959        * See the switch case for MEMBER / PROPERTYELT where the test is done.
960        *
961        * PARSETYPE_RESOURCE should never be propogated up since it
962        * will turn the next child (node) element into a property
963        */
964       if(element->state != RAPTOR_STATE_MEMBER_PROPERTYELT &&
965          element->state != RAPTOR_STATE_PARSETYPE_RESOURCE)
966         element->parent->child_state=element->state;
967     }
968 
969     raptor_free_rdfxml_element(element);
970   }
971 }
972 
973 
974 /* cdata (and ignorable whitespace for libxml).
975  * s is not 0 terminated for expat, is for libxml - grrrr.
976  */
977 static void
raptor_rdfxml_characters_handler(void * user_data,raptor_xml_element * xml_element,const unsigned char * s,int len)978 raptor_rdfxml_characters_handler(void *user_data,
979                                  raptor_xml_element* xml_element,
980                                  const unsigned char *s, int len)
981 {
982   raptor_parser* rdf_parser=(raptor_parser*)user_data;
983 
984   raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 0);
985 }
986 
987 
988 /* cdata (and ignorable whitespace for libxml).
989  * s is not 0 terminated for expat, is for libxml - grrrr.
990  */
991 static void
raptor_rdfxml_cdata_handler(void * user_data,raptor_xml_element * xml_element,const unsigned char * s,int len)992 raptor_rdfxml_cdata_handler(void *user_data, raptor_xml_element* xml_element,
993                             const unsigned char *s, int len)
994 {
995   raptor_parser* rdf_parser=(raptor_parser*)user_data;
996 
997   raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 1);
998 }
999 
1000 
1001 /* comment handler
1002  * s is 0 terminated
1003  */
1004 static void
raptor_rdfxml_comment_handler(void * user_data,raptor_xml_element * xml_element,const unsigned char * s)1005 raptor_rdfxml_comment_handler(void *user_data, raptor_xml_element* xml_element,
1006                               const unsigned char *s)
1007 {
1008   raptor_parser* rdf_parser=(raptor_parser*)user_data;
1009   raptor_rdfxml_parser* rdf_xml_parser;
1010   raptor_rdfxml_element* element;
1011 
1012   if(rdf_parser->failed || !xml_element)
1013     return;
1014 
1015   rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1016   element=rdf_xml_parser->current_element;
1017 
1018   if(element) {
1019     if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL)
1020       raptor_xml_writer_comment(rdf_xml_parser->xml_writer, s);
1021   }
1022 
1023 
1024 #ifdef RAPTOR_DEBUG_VERBOSE
1025   RAPTOR_DEBUG2("XML Comment '%s'\n", s);
1026 #endif
1027 }
1028 
1029 
1030 
1031 static int
raptor_rdfxml_parse_init(raptor_parser * rdf_parser,const char * name)1032 raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name)
1033 {
1034   raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1035   raptor_sax2* sax2;
1036   raptor_world* world=rdf_parser->world;
1037 
1038   /* Allocate sax2 object */
1039   sax2=raptor_new_sax2(rdf_parser, &rdf_parser->error_handlers);
1040   rdf_xml_parser->sax2=sax2;
1041   if(!sax2)
1042     return 1;
1043 
1044   /* Initialize sax2 element handlers */
1045   raptor_sax2_set_start_element_handler(sax2, raptor_rdfxml_start_element_handler);
1046   raptor_sax2_set_end_element_handler(sax2, raptor_rdfxml_end_element_handler);
1047   raptor_sax2_set_characters_handler(sax2, raptor_rdfxml_characters_handler);
1048   raptor_sax2_set_cdata_handler(sax2, raptor_rdfxml_cdata_handler);
1049   raptor_sax2_set_comment_handler(sax2, raptor_rdfxml_comment_handler);
1050   raptor_sax2_set_namespace_handler(sax2, raptor_rdfxml_sax2_new_namespace_handler);
1051 
1052   /* Allocate uris */
1053   RAPTOR_RDF_type_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "type");
1054   RAPTOR_RDF_value_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "value");
1055   RAPTOR_RDF_subject_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "subject");
1056   RAPTOR_RDF_predicate_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "predicate");
1057   RAPTOR_RDF_object_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "object");
1058   RAPTOR_RDF_Statement_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Statement");
1059 
1060   RAPTOR_RDF_Seq_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Seq");
1061   RAPTOR_RDF_Bag_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Bag");
1062   RAPTOR_RDF_Alt_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Alt");
1063 
1064   RAPTOR_RDF_List_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "List");
1065   RAPTOR_RDF_first_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "first");
1066   RAPTOR_RDF_rest_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "rest");
1067   RAPTOR_RDF_nil_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "nil");
1068 
1069   RAPTOR_DAML_NS_URI(rdf_xml_parser)=raptor_new_uri_v2(world, (const unsigned char*)"http://www.daml.org/2001/03/daml+oil#");
1070 
1071   RAPTOR_DAML_List_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name_v2(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"List");
1072   RAPTOR_DAML_first_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name_v2(world, RAPTOR_DAML_NS_URI(rdf_xml_parser) ,(const unsigned char *)"first");
1073   RAPTOR_DAML_rest_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name_v2(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"rest");
1074   RAPTOR_DAML_nil_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name_v2(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"nil");
1075 
1076   RAPTOR_RDF_RDF_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "RDF");
1077   RAPTOR_RDF_Description_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Description");
1078   RAPTOR_RDF_li_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "li");
1079 
1080   RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser)=raptor_new_uri_v2(world, raptor_xml_literal_datatype_uri_string);
1081 
1082   /* Check for uri allocation failures */
1083   if(!RAPTOR_RDF_type_URI(rdf_xml_parser) ||
1084      !RAPTOR_RDF_value_URI(rdf_xml_parser) ||
1085      !RAPTOR_RDF_subject_URI(rdf_xml_parser) ||
1086      !RAPTOR_RDF_predicate_URI(rdf_xml_parser) ||
1087      !RAPTOR_RDF_object_URI(rdf_xml_parser) ||
1088      !RAPTOR_RDF_Statement_URI(rdf_xml_parser) ||
1089      !RAPTOR_RDF_Seq_URI(rdf_xml_parser) ||
1090      !RAPTOR_RDF_Bag_URI(rdf_xml_parser) ||
1091      !RAPTOR_RDF_Alt_URI(rdf_xml_parser) ||
1092      !RAPTOR_RDF_List_URI(rdf_xml_parser) ||
1093      !RAPTOR_RDF_first_URI(rdf_xml_parser) ||
1094      !RAPTOR_RDF_rest_URI(rdf_xml_parser) ||
1095      !RAPTOR_RDF_nil_URI(rdf_xml_parser) ||
1096      !RAPTOR_DAML_NS_URI(rdf_xml_parser) ||
1097      !RAPTOR_DAML_List_URI(rdf_xml_parser) ||
1098      !RAPTOR_DAML_first_URI(rdf_xml_parser) ||
1099      !RAPTOR_DAML_rest_URI(rdf_xml_parser) ||
1100      !RAPTOR_DAML_nil_URI(rdf_xml_parser) ||
1101      !RAPTOR_RDF_RDF_URI(rdf_xml_parser) ||
1102      !RAPTOR_RDF_Description_URI(rdf_xml_parser) ||
1103      !RAPTOR_RDF_li_URI(rdf_xml_parser) ||
1104      !RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser))
1105     return 1;
1106 
1107   /* Everything succeeded */
1108   return 0;
1109 }
1110 
1111 
1112 static int
raptor_rdfxml_parse_start(raptor_parser * rdf_parser)1113 raptor_rdfxml_parse_start(raptor_parser* rdf_parser)
1114 {
1115   raptor_uri *uri=rdf_parser->base_uri;
1116   raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1117 
1118   /* base URI required for RDF/XML */
1119   if(!uri)
1120     return 1;
1121 
1122   /* Optionally normalize language to lowercase
1123    * http://www.w3.org/TR/rdf-concepts/#dfn-language-identifier
1124    */
1125   raptor_sax2_set_feature(rdf_xml_parser->sax2,
1126                           RAPTOR_FEATURE_NORMALIZE_LANGUAGE,
1127                           rdf_parser->features[RAPTOR_FEATURE_NORMALIZE_LANGUAGE]);
1128 
1129   /* Optionally forbid network requests in the XML parser */
1130   raptor_sax2_set_feature(rdf_xml_parser->sax2,
1131                           RAPTOR_FEATURE_NO_NET,
1132                           rdf_parser->features[RAPTOR_FEATURE_NO_NET]);
1133   raptor_sax2_set_feature(rdf_xml_parser->sax2,
1134                           RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES,
1135                           rdf_parser->features[RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES]);
1136 
1137   raptor_sax2_parse_start(rdf_xml_parser->sax2, uri);
1138 
1139   /* Delete any existing id_set */
1140   if(rdf_xml_parser->id_set) {
1141     raptor_free_id_set(rdf_xml_parser->id_set);
1142     rdf_xml_parser->id_set = NULL;
1143   }
1144 
1145   /* Create a new id_set if needed */
1146   if(rdf_parser->features[RAPTOR_FEATURE_CHECK_RDF_ID]) {
1147     rdf_xml_parser->id_set = raptor_new_id_set(rdf_parser->world);
1148     if(!rdf_xml_parser->id_set)
1149       return 1;
1150   }
1151 
1152   return 0;
1153 }
1154 
1155 
1156 static void
raptor_rdfxml_parse_terminate(raptor_parser * rdf_parser)1157 raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser)
1158 {
1159   raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1160   raptor_rdfxml_element* element;
1161   int i;
1162 
1163   if(rdf_xml_parser->sax2) {
1164     raptor_free_sax2(rdf_xml_parser->sax2);
1165     rdf_xml_parser->sax2=NULL;
1166   }
1167 
1168   while( (element=raptor_rdfxml_element_pop(rdf_xml_parser)) )
1169     raptor_free_rdfxml_element(element);
1170 
1171 
1172   for(i=0; i< RAPTOR_RDFXML_N_CONCEPTS; i++) {
1173     raptor_uri* concept_uri=rdf_xml_parser->concepts[i];
1174     if(concept_uri) {
1175       raptor_free_uri_v2(rdf_parser->world, concept_uri);
1176       rdf_xml_parser->concepts[i]=NULL;
1177     }
1178   }
1179 
1180   if(rdf_xml_parser->id_set) {
1181     raptor_free_id_set(rdf_xml_parser->id_set);
1182     rdf_xml_parser->id_set=NULL;
1183   }
1184 
1185 }
1186 
1187 
1188 static int
raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)1189 raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory* factory,
1190                                      const unsigned char *buffer, size_t len,
1191                                      const unsigned char *identifier,
1192                                      const unsigned char *suffix,
1193                                      const char *mime_type)
1194 {
1195   int score= 0;
1196 
1197   if(suffix) {
1198     if(!strcmp((const char*)suffix, "rdf") ||
1199        !strcmp((const char*)suffix, "rdfs") ||
1200        !strcmp((const char*)suffix, "foaf") ||
1201        !strcmp((const char*)suffix, "doap") ||
1202        !strcmp((const char*)suffix, "owl") ||
1203        !strcmp((const char*)suffix, "daml"))
1204       score=9;
1205     if(!strcmp((const char*)suffix, "rss"))
1206       score=3;
1207   }
1208 
1209   if(identifier) {
1210     if(strstr((const char*)identifier, "rss1"))
1211       score+=5;
1212     else if(!suffix && strstr((const char*)identifier, "rss"))
1213       score+=3;
1214     else if(!suffix && strstr((const char*)identifier, "rdf"))
1215       score+=2;
1216     else if(!suffix && strstr((const char*)identifier, "RDF"))
1217       score+=2;
1218   }
1219 
1220   if(mime_type) {
1221     if(strstr((const char*)mime_type, "html"))
1222       score-= 4;
1223     else if(!strcmp((const char*)mime_type, "text/rdf"))
1224       score+= 7;
1225     else if(!strcmp((const char*)mime_type, "application/xml"))
1226       score+= 5;
1227   }
1228 
1229   if(buffer && len) {
1230     /* Check it's an XML namespace declared and not N3 or Turtle which
1231      * mention the namespace URI but not in this form.
1232      */
1233 #define  HAS_RDF_XMLNS1 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1234 #define  HAS_RDF_XMLNS2 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1235 #define  HAS_RDF_XMLNS3 (raptor_memstr((const char*)buffer, len, "xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1236 #define  HAS_RDF_XMLNS4 (raptor_memstr((const char*)buffer, len, "xmlns='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1237 #define  HAS_RDF_ENTITY1 (raptor_memstr((const char*)buffer, len, "<!ENTITY rdf 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'>") != NULL)
1238 #define  HAS_RDF_ENTITY2 (raptor_memstr((const char*)buffer, len, "<!ENTITY rdf \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">") != NULL)
1239 #define  HAS_RDF_ENTITY3 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"&rdf;\"") != NULL)
1240 #define  HAS_RDF_ENTITY4 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='&rdf;'") != NULL)
1241 #define  HAS_HTML_NS (raptor_memstr((const char*)buffer, len, "http://www.w3.org/1999/xhtml") != NULL)
1242 #define  HAS_HTML_ROOT (raptor_memstr((const char*)buffer, len, "<html") != NULL)
1243 
1244     if(!HAS_HTML_NS && !HAS_HTML_ROOT &&
1245        (HAS_RDF_XMLNS1 || HAS_RDF_XMLNS2 || HAS_RDF_XMLNS3 || HAS_RDF_XMLNS4 ||
1246         HAS_RDF_ENTITY1 || HAS_RDF_ENTITY2 || HAS_RDF_ENTITY3 || HAS_RDF_ENTITY4)
1247       ) {
1248       int has_rdf_RDF=(raptor_memstr((const char*)buffer, len, "<rdf:RDF") != NULL);
1249       int has_rdf_Description=(raptor_memstr((const char*)buffer, len, "rdf:Description") != NULL);
1250       int has_rdf_about=(raptor_memstr((const char*)buffer, len, "rdf:about") != NULL);
1251 
1252       score+= 7;
1253       if(has_rdf_RDF)
1254         score++;
1255       if(has_rdf_Description)
1256         score++;
1257       if(has_rdf_about)
1258         score++;
1259     }
1260   }
1261 
1262   return score;
1263 }
1264 
1265 
1266 
1267 static int
raptor_rdfxml_parse_chunk(raptor_parser * rdf_parser,const unsigned char * buffer,size_t len,int is_end)1268 raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer,
1269                        size_t len, int is_end)
1270 {
1271   raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1272   if(rdf_parser->failed)
1273     return 1;
1274 
1275   return raptor_sax2_parse_chunk(rdf_xml_parser->sax2, buffer, len, is_end);
1276 }
1277 
1278 
1279 static void
raptor_rdfxml_generate_statement(raptor_parser * rdf_parser,raptor_uri * subject_uri,const unsigned char * subject_id,const raptor_identifier_type subject_type,const raptor_uri_source subject_uri_source,raptor_uri * predicate_uri,const unsigned char * predicate_id,raptor_identifier_type predicate_type,const raptor_uri_source predicate_uri_source,int predicate_ordinal,raptor_uri * object_uri,const unsigned char * object_id,const raptor_identifier_type object_type,const raptor_uri_source object_uri_source,raptor_uri * literal_datatype,raptor_identifier * reified,raptor_rdfxml_element * bag_element)1280 raptor_rdfxml_generate_statement(raptor_parser *rdf_parser,
1281                           raptor_uri *subject_uri,
1282                           const unsigned char *subject_id,
1283                           const raptor_identifier_type subject_type,
1284                           const raptor_uri_source subject_uri_source,
1285                           raptor_uri *predicate_uri,
1286                           const unsigned char *predicate_id,
1287                           raptor_identifier_type predicate_type,
1288                           const raptor_uri_source predicate_uri_source,
1289                           int predicate_ordinal,
1290                           raptor_uri *object_uri,
1291                           const unsigned char *object_id,
1292                           const raptor_identifier_type object_type,
1293                           const raptor_uri_source object_uri_source,
1294                           raptor_uri *literal_datatype,
1295                           raptor_identifier *reified,
1296                           raptor_rdfxml_element* bag_element)
1297 {
1298   raptor_statement *statement=&rdf_parser->statement;
1299   const unsigned char *language=NULL;
1300   static const char empty_literal[1]="";
1301   raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1302   char *reified_id=NULL;
1303   raptor_uri* uri1=NULL;
1304   raptor_uri* uri2=NULL;
1305 
1306   if(rdf_parser->failed)
1307     return;
1308 
1309   if((object_type == RAPTOR_IDENTIFIER_TYPE_LITERAL ||
1310       object_type == RAPTOR_IDENTIFIER_TYPE_XML_LITERAL) &&
1311      !literal_datatype) {
1312     language=raptor_sax2_inscope_xml_language(rdf_xml_parser->sax2);
1313     if(!object_uri)
1314       object_uri=(raptor_uri*)empty_literal;
1315   }
1316 
1317   statement->subject=subject_uri ? (void*)subject_uri : (void*)subject_id;
1318   statement->subject_type=subject_type;
1319 
1320   statement->predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1321   if(predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
1322     /* new URI object */
1323     uri1=raptor_new_uri_from_rdf_ordinal(rdf_parser->world, predicate_ordinal);
1324     predicate_uri=uri1;
1325     predicate_id=NULL;
1326     predicate_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1327   }
1328   statement->predicate=predicate_uri;
1329 
1330   statement->object=object_uri ? (void*)object_uri : (void*)object_id;
1331   statement->object_type=object_type;
1332 
1333   statement->object_literal_language=language;
1334   statement->object_literal_datatype=literal_datatype;
1335 
1336 
1337 #ifdef RAPTOR_DEBUG_VERBOSE
1338   fprintf(stderr, "raptor_rdfxml_generate_statement: Generating statement: ");
1339   raptor_print_statement(statement, stderr);
1340   fputc('\n', stderr);
1341 
1342   if(!(subject_uri||subject_id))
1343     RAPTOR_FATAL1("Statement has no subject\n");
1344 
1345   if(!(predicate_uri||predicate_id))
1346     RAPTOR_FATAL1("Statement has no predicate\n");
1347 
1348   if(!(object_uri||object_id))
1349     RAPTOR_FATAL1("Statement has no object\n");
1350 
1351 #endif
1352 
1353   if(!rdf_parser->statement_handler)
1354     goto generate_tidy;
1355 
1356   /* Generate the statement; or is it fact? */
1357   (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1358 
1359 
1360   /* the bagID mess */
1361   if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID] &&
1362      bag_element && (bag_element->bag.uri || bag_element->bag.id)) {
1363     raptor_identifier* bag=&bag_element->bag;
1364 
1365     statement->subject=bag->uri ? (void*)bag->uri : (void*)bag->id;
1366     statement->subject_type=bag->type;
1367 
1368     bag_element->last_bag_ordinal++;
1369 
1370     /* new URI object */
1371     uri2=raptor_new_uri_from_rdf_ordinal(rdf_parser->world, bag_element->last_bag_ordinal);
1372     statement->predicate=uri2;
1373 
1374     if(reified && (reified->uri || reified->id)) {
1375       statement->object=reified->uri ? (void*)reified->uri : (void*)reified->id;
1376       statement->object_type=reified->type;
1377     } else {
1378       /* reified may be NULL so do not use it */
1379       reified_id=(char*)raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
1380       statement->object=reified_id;
1381       statement->object_type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
1382     }
1383 
1384     (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1385 
1386   } else if(!reified || (!reified->uri && !reified->id))
1387     goto generate_tidy;
1388 
1389   /* generate reified statements */
1390   statement->subject_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1391   statement->predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1392   statement->object_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1393 
1394   statement->object_literal_language=NULL;
1395 
1396   if(reified_id) {
1397     /* reified may be NULL so do not use it */
1398     statement->subject=reified_id;
1399     statement->subject_type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
1400   } else {
1401     statement->subject=reified->uri ? (void*)reified->uri : (void*)reified->id;
1402     statement->subject_type=reified->type;
1403   }
1404 
1405   statement->predicate=RAPTOR_RDF_type_URI(rdf_xml_parser);
1406   statement->object=RAPTOR_RDF_Statement_URI(rdf_xml_parser);
1407   (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1408 
1409   statement->predicate=RAPTOR_RDF_subject_URI(rdf_xml_parser);
1410   statement->object=subject_uri ? (void*)subject_uri : (void*)subject_id;
1411   statement->object_type=subject_type;
1412   (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1413 
1414   statement->predicate=RAPTOR_RDF_predicate_URI(rdf_xml_parser);
1415   statement->object=predicate_uri ? (void*)predicate_uri : (void*)predicate_id;
1416   statement->object_type=predicate_type;
1417   (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1418 
1419   statement->predicate=RAPTOR_RDF_object_URI(rdf_xml_parser);
1420   statement->object=object_uri ? (void*)object_uri : (void*)object_id;
1421   statement->object_type=object_type;
1422   statement->object_literal_language=language;
1423 
1424   (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1425 
1426  generate_tidy:
1427   /* Tidy up things allocated here */
1428   if(reified_id)
1429     RAPTOR_FREE(cstring, reified_id);
1430   if(uri1)
1431     raptor_free_uri_v2(rdf_parser->world, uri1);
1432   if(uri2)
1433     raptor_free_uri_v2(rdf_parser->world, uri2);
1434 }
1435 
1436 
1437 
1438 /**
1439  * raptor_rdfxml_element_has_property_attributes:
1440  * @element: element with the property attributes
1441  *
1442  * Return true if the element has at least one property attribute.
1443  *
1444  **/
1445 static int
raptor_rdfxml_element_has_property_attributes(raptor_rdfxml_element * element)1446 raptor_rdfxml_element_has_property_attributes(raptor_rdfxml_element *element)
1447 {
1448   int i;
1449 
1450   if(element->xml_element->attribute_count >0)
1451     return 1;
1452 
1453   /* look for rdf: properties */
1454   for(i=0; i<= RDF_ATTR_LAST; i++) {
1455     if(element->rdf_attr[i] &&
1456        rdf_syntax_terms_info[i].type != RAPTOR_IDENTIFIER_TYPE_UNKNOWN)
1457       return 1;
1458   }
1459   return 0;
1460 }
1461 
1462 
1463 /**
1464  * raptor_rdfxml_process_property_attributes:
1465  * @rdf_parser: Raptor parser object
1466  * @attributes_element: element with the property attributes
1467  * @resource_element: element that defines the resource URI
1468  *                    subject_uri, subject_uri_source etc.
1469  * @property_node_identifier: Use this identifier for the resource URI
1470  *   and count any ordinals for it locally
1471  *
1472  * Process the property attributes for an element for a given resource.
1473  *
1474  **/
1475 static void
raptor_rdfxml_process_property_attributes(raptor_parser * rdf_parser,raptor_rdfxml_element * attributes_element,raptor_rdfxml_element * resource_element,raptor_identifier * property_node_identifier)1476 raptor_rdfxml_process_property_attributes(raptor_parser *rdf_parser,
1477                                           raptor_rdfxml_element *attributes_element,
1478                                           raptor_rdfxml_element *resource_element,
1479                                           raptor_identifier *property_node_identifier)
1480 {
1481   unsigned int i;
1482   raptor_identifier *resource_identifier;
1483 
1484   resource_identifier=property_node_identifier ? property_node_identifier : &resource_element->subject;
1485 
1486 
1487   /* Process attributes as propAttr* = * (propName="string")*
1488    */
1489   for(i=0; i < attributes_element->xml_element->attribute_count; i++) {
1490     raptor_qname* attr=attributes_element->xml_element->attributes[i];
1491     const unsigned char *name;
1492     const unsigned char *value;
1493     int handled=0;
1494 
1495     if(!attr)
1496       continue;
1497 
1498     name=attr->local_name;
1499     value = attr->value;
1500 
1501     if(!attr->nspace) {
1502       raptor_rdfxml_update_document_locator(rdf_parser);
1503       raptor_parser_error(rdf_parser, "Using property attribute '%s' without a namespace is forbidden.", name);
1504       continue;
1505     }
1506 
1507 
1508     if(!raptor_utf8_is_nfc(value, strlen((const char*)value))) {
1509       const char *message="Property attribute '%s' has a string not in Unicode Normal Form C: %s";
1510       raptor_rdfxml_update_document_locator(rdf_parser);
1511       if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
1512         raptor_parser_error(rdf_parser, message, name, value);
1513       else
1514         raptor_parser_warning(rdf_parser, message, name, value);
1515       continue;
1516     }
1517 
1518 
1519     /* Generate the property statement using one of these properties:
1520      * 1) rdf:_n
1521      * 2) the URI from the rdf:* attribute where allowed
1522      * 3) otherwise forbidden (including rdf:li)
1523      */
1524     if(attr->nspace->is_rdf_ms) {
1525       /* is rdf: namespace */
1526       int ordinal=0;
1527 
1528       if(*name == '_') {
1529         /* recognise rdf:_ */
1530         name++;
1531         ordinal=raptor_check_ordinal(name);
1532         if(ordinal < 1) {
1533           raptor_rdfxml_update_document_locator(rdf_parser);
1534           raptor_parser_error(rdf_parser, "Illegal ordinal value %d in property attribute '%s' seen on containing element '%s'.", ordinal, attr->local_name, name);
1535           ordinal=1;
1536         }
1537       } else {
1538         raptor_rdfxml_update_document_locator(rdf_parser);
1539         if(raptor_rdfxml_forbidden_propertyAttribute_name((const char*)name) > 0)
1540           raptor_parser_error(rdf_parser, "RDF term %s is forbidden as a property attribute.", name);
1541         else
1542           raptor_parser_warning(rdf_parser, "Unknown RDF namespace property attribute '%s'.",
1543                                 name);
1544       }
1545 
1546       if(ordinal >= 1) {
1547         /* Generate an ordinal property when there are no problems */
1548         raptor_rdfxml_generate_statement(rdf_parser,
1549                                   resource_identifier->uri,
1550                                   resource_identifier->id,
1551                                   resource_identifier->type,
1552                                   resource_identifier->uri_source,
1553 
1554                                   NULL,
1555                                   NULL,
1556                                   RAPTOR_IDENTIFIER_TYPE_ORDINAL,
1557                                   RAPTOR_URI_SOURCE_NOT_URI,
1558                                   ordinal,
1559 
1560                                   (raptor_uri*)value,
1561                                   NULL,
1562                                   RAPTOR_IDENTIFIER_TYPE_LITERAL,
1563                                   RAPTOR_URI_SOURCE_NOT_URI,
1564                                   NULL,
1565 
1566                                   NULL, /* Property attributes are never reified*/
1567                                   resource_element);
1568         handled=1;
1569       }
1570 
1571     } /* end is RDF namespace property */
1572 
1573 
1574     if(!handled)
1575       /* else not rdf: namespace or unknown in rdf: namespace so
1576        * generate a statement with a literal object
1577        */
1578       raptor_rdfxml_generate_statement(rdf_parser,
1579                                 resource_identifier->uri,
1580                                 resource_identifier->id,
1581                                 resource_identifier->type,
1582                                 resource_identifier->uri_source,
1583 
1584                                 attr->uri,
1585                                 NULL,
1586                                 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1587                                 RAPTOR_URI_SOURCE_ATTRIBUTE,
1588                                 0,
1589 
1590                                 (raptor_uri*)value,
1591                                 NULL,
1592                                 RAPTOR_IDENTIFIER_TYPE_LITERAL,
1593                                 RAPTOR_URI_SOURCE_NOT_URI,
1594                                 NULL,
1595 
1596                                 NULL, /* Property attributes are never reified*/
1597                                 resource_element);
1598 
1599   } /* end for ... attributes */
1600 
1601 
1602   /* Handle rdf property attributes
1603    * (only rdf:type and rdf:value at present)
1604    */
1605   for(i=0; i<= RDF_ATTR_LAST; i++) {
1606     const unsigned char *value=attributes_element->rdf_attr[i];
1607     int object_is_literal=(rdf_syntax_terms_info[i].type == RAPTOR_IDENTIFIER_TYPE_LITERAL);
1608     raptor_uri *property_uri, *object_uri;
1609     raptor_identifier_type object_type;
1610 
1611     if(!value)
1612       continue;
1613 
1614     if(rdf_syntax_terms_info[i].type == RAPTOR_IDENTIFIER_TYPE_UNKNOWN) {
1615       const char *name=rdf_syntax_terms_info[i].name;
1616       if(raptor_rdfxml_forbidden_propertyAttribute_name(name)) {
1617         raptor_rdfxml_update_document_locator(rdf_parser);
1618         raptor_parser_error(rdf_parser, "RDF term %s is forbidden as a property attribute.", name);
1619         continue;
1620       }
1621     }
1622 
1623     if(object_is_literal && !raptor_utf8_is_nfc(value, strlen((const char*)value))) {
1624       const char *message="Property attribute '%s' has a string not in Unicode Normal Form C: %s";
1625       raptor_rdfxml_update_document_locator(rdf_parser);
1626       if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
1627         raptor_parser_error(rdf_parser, message, rdf_syntax_terms_info[i].name, value);
1628       else
1629         raptor_parser_warning(rdf_parser, message, rdf_syntax_terms_info[i].name, value);
1630       continue;
1631     }
1632 
1633     property_uri=raptor_new_uri_for_rdf_concept_v2(rdf_parser->world, (rdf_syntax_terms_info[i].name));
1634 
1635     object_uri=object_is_literal ? (raptor_uri*)value : raptor_new_uri_relative_to_base_v2(rdf_parser->world, raptor_rdfxml_inscope_base_uri(rdf_parser), value);
1636     object_type=object_is_literal ? RAPTOR_IDENTIFIER_TYPE_LITERAL : RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1637 
1638     raptor_rdfxml_generate_statement(rdf_parser,
1639                               resource_identifier->uri,
1640                               resource_identifier->id,
1641                               resource_identifier->type,
1642                               resource_identifier->uri_source,
1643 
1644                               property_uri,
1645                               NULL,
1646                               RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1647                               RAPTOR_URI_SOURCE_ATTRIBUTE,
1648                               0,
1649 
1650                               object_uri,
1651                               NULL,
1652                               object_type,
1653                               RAPTOR_URI_SOURCE_NOT_URI,
1654                               NULL,
1655 
1656                               NULL, /* Property attributes are never reified*/
1657                               resource_element);
1658     if(!object_is_literal)
1659       raptor_free_uri_v2(rdf_parser->world, object_uri);
1660 
1661     raptor_free_uri_v2(rdf_parser->world, property_uri);
1662 
1663   } /* end for rdf:property values */
1664 
1665 }
1666 
1667 
1668 static void
raptor_rdfxml_start_element_grammar(raptor_parser * rdf_parser,raptor_rdfxml_element * element)1669 raptor_rdfxml_start_element_grammar(raptor_parser *rdf_parser,
1670                                     raptor_rdfxml_element *element)
1671 {
1672   int finished;
1673   raptor_state state;
1674   raptor_xml_element* xml_element=element->xml_element;
1675   const unsigned char *el_name=raptor_xml_element_get_name(xml_element)->local_name;
1676   int element_in_rdf_ns=(raptor_xml_element_get_name(xml_element)->nspace &&
1677                          raptor_xml_element_get_name(xml_element)->nspace->is_rdf_ms);
1678   raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1679   int rc=0;
1680   raptor_uri* base_uri;
1681 
1682   state=element->state;
1683 #ifdef RAPTOR_DEBUG_VERBOSE
1684   RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state));
1685 #endif
1686 
1687   base_uri=raptor_rdfxml_inscope_base_uri(rdf_parser);
1688 
1689   finished= 0;
1690   while(!finished) {
1691     switch(state) {
1692       case RAPTOR_STATE_SKIPPING:
1693         element->child_state=state;
1694         element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
1695         finished=1;
1696         break;
1697 
1698       case RAPTOR_STATE_UNKNOWN:
1699         /* found <rdf:RDF> ? */
1700 
1701         if(element_in_rdf_ns) {
1702           if(raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_RDF_URI(rdf_xml_parser))) {
1703             element->child_state=RAPTOR_STATE_NODE_ELEMENT_LIST;
1704             element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES;
1705             /* Yes - need more content before can continue,
1706              * so wait for another element
1707              */
1708             finished=1;
1709             break;
1710           }
1711           if(raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_Description_URI(rdf_xml_parser))) {
1712             state=RAPTOR_STATE_DESCRIPTION;
1713             element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
1714             /* Yes - found something so move immediately to description */
1715             break;
1716           }
1717 
1718           if(element_in_rdf_ns && (rc=raptor_rdfxml_forbidden_nodeElement_name((const char*)el_name))) {
1719             if(rc > 0) {
1720               raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a node element.", el_name);
1721               state=RAPTOR_STATE_SKIPPING;
1722               element->child_state=RAPTOR_STATE_SKIPPING;
1723               finished=1;
1724               break;
1725             } else
1726               raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name);
1727           }
1728         }
1729 
1730         /* If scanning for element, can continue */
1731         if(rdf_parser->features[RAPTOR_FEATURE_SCANNING]) {
1732           finished=1;
1733           break;
1734         }
1735 
1736         /* Otherwise the choice of the next state can be made
1737          * from the current element by the OBJ state
1738          */
1739         state=RAPTOR_STATE_NODE_ELEMENT_LIST;
1740         element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES;
1741         break;
1742 
1743 
1744       case RAPTOR_STATE_NODE_ELEMENT_LIST:
1745         /* Handling
1746          *   http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList
1747          *
1748          * Everything goes to nodeElement
1749          */
1750 
1751         state=RAPTOR_STATE_NODE_ELEMENT;
1752 
1753         element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
1754 
1755         break;
1756 
1757 
1758 
1759       case RAPTOR_STATE_DESCRIPTION:
1760       case RAPTOR_STATE_NODE_ELEMENT:
1761       case RAPTOR_STATE_PARSETYPE_RESOURCE:
1762       case RAPTOR_STATE_PARSETYPE_COLLECTION:
1763         /* Handling <rdf:Description> or other node element
1764          *   http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement
1765          *
1766          * or a property element acting as a node element for
1767          * rdf:parseType="Resource"
1768          *   http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt
1769          * or rdf:parseType="Collection" (and daml:Collection)
1770          *   http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt
1771          *
1772          * Only create a bag if bagID given
1773          */
1774 
1775         if(!raptor_xml_element_get_name(xml_element)->uri) {
1776           /* We cannot handle this */
1777           raptor_parser_warning(rdf_parser, "Using node element '%s' without a namespace is forbidden.",
1778                                 raptor_xml_element_get_name(xml_element)->local_name);
1779           raptor_rdfxml_update_document_locator(rdf_parser);
1780           element->state=RAPTOR_STATE_SKIPPING;
1781           element->child_state=RAPTOR_STATE_SKIPPING;
1782           finished=1;
1783           break;
1784         }
1785 
1786         if(element_in_rdf_ns &&
1787            (rc = raptor_rdfxml_forbidden_nodeElement_name((const char*)el_name))) {
1788           if(rc > 0) {
1789             raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a node element.", el_name);
1790             state=RAPTOR_STATE_SKIPPING;
1791             element->state=RAPTOR_STATE_SKIPPING;
1792             element->child_state=RAPTOR_STATE_SKIPPING;
1793             finished=1;
1794             break;
1795           } else
1796             raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name);
1797         }
1798 
1799         if(element->content_type !=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
1800            element->content_type !=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION &&
1801            element->parent &&
1802            (element->parent->state == RAPTOR_STATE_PROPERTYELT ||
1803             element->parent->state == RAPTOR_STATE_MEMBER_PROPERTYELT) &&
1804            element->parent->xml_element->content_element_seen > 1) {
1805           raptor_rdfxml_update_document_locator(rdf_parser);
1806           raptor_parser_error(rdf_parser, "The enclosing property already has an object");
1807           state=RAPTOR_STATE_SKIPPING;
1808           element->child_state=RAPTOR_STATE_SKIPPING;
1809           finished=1;
1810           break;
1811         }
1812 
1813         if(state == RAPTOR_STATE_NODE_ELEMENT ||
1814            state == RAPTOR_STATE_DESCRIPTION ||
1815            state == RAPTOR_STATE_PARSETYPE_COLLECTION) {
1816           if(element_in_rdf_ns &&
1817              raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_Description_URI(rdf_xml_parser)))
1818             state=RAPTOR_STATE_DESCRIPTION;
1819           else
1820             state=RAPTOR_STATE_NODE_ELEMENT;
1821         }
1822 
1823 
1824         if((element->rdf_attr[RDF_ATTR_ID]!=NULL) +
1825            (element->rdf_attr[RDF_ATTR_about]!=NULL) +
1826            (element->rdf_attr[RDF_ATTR_nodeID]!=NULL)>1) {
1827           raptor_rdfxml_update_document_locator(rdf_parser);
1828           raptor_parser_error(rdf_parser, "Multiple attributes of rdf:ID, rdf:about and rdf:nodeID on element '%s' - only one allowed.", el_name);
1829         }
1830 
1831         if(element->rdf_attr[RDF_ATTR_ID]) {
1832           element->subject.id=element->rdf_attr[RDF_ATTR_ID];
1833           element->rdf_attr[RDF_ATTR_ID]=NULL;
1834           element->subject.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->subject.id);
1835           if(!element->subject.uri)
1836             goto oom;
1837           element->subject.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1838           element->subject.uri_source=RAPTOR_URI_SOURCE_ID;
1839           if(!raptor_valid_xml_ID(rdf_parser, element->subject.id)) {
1840             raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'", element->subject.id);
1841             state=RAPTOR_STATE_SKIPPING;
1842             element->child_state=RAPTOR_STATE_SKIPPING;
1843             finished=1;
1844             break;
1845           }
1846           if(raptor_rdfxml_record_ID(rdf_parser, element, element->subject.id)) {
1847             raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'", element->subject.id);
1848             state=RAPTOR_STATE_SKIPPING;
1849             element->child_state=RAPTOR_STATE_SKIPPING;
1850             finished=1;
1851             break;
1852           }
1853         } else if (element->rdf_attr[RDF_ATTR_about]) {
1854           element->subject.uri=raptor_new_uri_relative_to_base_v2(rdf_parser->world, base_uri, (const unsigned char*)element->rdf_attr[RDF_ATTR_about]);
1855           RAPTOR_FREE(cstring, (void*)element->rdf_attr[RDF_ATTR_about]);
1856           element->rdf_attr[RDF_ATTR_about]=NULL;
1857           if(!element->subject.uri)
1858             goto oom;
1859           element->subject.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1860           element->subject.uri_source=RAPTOR_URI_SOURCE_URI;
1861         } else if (element->rdf_attr[RDF_ATTR_nodeID]) {
1862           element->subject.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, (unsigned char*)element->rdf_attr[RDF_ATTR_nodeID]);
1863           element->rdf_attr[RDF_ATTR_nodeID]=NULL;
1864           if(!element->subject.id)
1865             goto oom;
1866           element->subject.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
1867           element->subject.uri_source=RAPTOR_URI_SOURCE_BLANK_ID;
1868           if(!raptor_valid_xml_ID(rdf_parser, element->subject.id)) {
1869             raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", element->subject.id);
1870             state=RAPTOR_STATE_SKIPPING;
1871             element->child_state=RAPTOR_STATE_SKIPPING;
1872             finished=1;
1873             break;
1874           }
1875         } else if (element->parent &&
1876                    element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
1877                    element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION &&
1878                    (element->parent->object.uri || element->parent->object.id)) {
1879           /* copy from parent (property element), it has a URI for us */
1880           raptor_copy_identifier(&element->subject, &element->parent->object);
1881         } else {
1882           element->subject.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
1883           if(!element->subject.id)
1884             goto oom;
1885           element->subject.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
1886           element->subject.uri_source=RAPTOR_URI_SOURCE_GENERATED;
1887         }
1888 
1889 
1890         if(element->rdf_attr[RDF_ATTR_bagID]) {
1891           if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]) {
1892             element->bag.id=element->rdf_attr[RDF_ATTR_bagID];
1893             element->rdf_attr[RDF_ATTR_bagID]=NULL;
1894             element->bag.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->bag.id);
1895             if(!element->bag.uri)
1896               goto oom;
1897             element->bag.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1898             element->bag.uri_source=RAPTOR_URI_SOURCE_GENERATED;
1899 
1900             if(!raptor_valid_xml_ID(rdf_parser, element->bag.id)) {
1901               raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'", element->bag.id);
1902               state=RAPTOR_STATE_SKIPPING;
1903               element->child_state=RAPTOR_STATE_SKIPPING;
1904               finished=1;
1905               break;
1906             }
1907             if(raptor_rdfxml_record_ID(rdf_parser, element, element->bag.id)) {
1908               raptor_parser_error(rdf_parser, "Duplicated rdf:bagID value '%s'", element->bag.id);
1909               state=RAPTOR_STATE_SKIPPING;
1910               element->child_state=RAPTOR_STATE_SKIPPING;
1911               finished=1;
1912               break;
1913             }
1914 
1915             raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated.");
1916 
1917             raptor_rdfxml_generate_statement(rdf_parser,
1918                                       element->bag.uri,
1919                                       element->bag.id,
1920                                       element->bag.type,
1921                                       element->bag.uri_source,
1922 
1923                                       RAPTOR_RDF_type_URI(rdf_xml_parser),
1924                                       NULL,
1925                                       RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1926                                       RAPTOR_URI_SOURCE_URI,
1927                                       0,
1928 
1929                                       RAPTOR_RDF_Bag_URI(rdf_xml_parser),
1930                                       NULL,
1931                                       RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1932                                       RAPTOR_URI_SOURCE_NOT_URI,
1933                                       NULL,
1934 
1935                                       NULL,
1936                                       NULL);
1937           } else {
1938             /* bagID forbidden */
1939             raptor_parser_error(rdf_parser, "rdf:bagID is forbidden.");
1940             state=RAPTOR_STATE_SKIPPING;
1941             element->child_state=RAPTOR_STATE_SKIPPING;
1942             finished=1;
1943             break;
1944           }
1945         }
1946 
1947 
1948         if(element->parent) {
1949 
1950           /* In a rdf:parseType="Collection" the resources are appended
1951            * to the list at the genid element->parent->tail_id
1952            */
1953           if (element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION ||
1954               element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
1955             const unsigned char * idList = raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
1956 
1957             /* <idList> rdf:type rdf:List */
1958             raptor_uri *collection_uri=(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_List_URI(rdf_xml_parser) : RAPTOR_RDF_List_URI(rdf_xml_parser);
1959 
1960             if(!idList)
1961               goto oom;
1962 
1963             if((element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ||
1964                rdf_parser->features[RAPTOR_FEATURE_ALLOW_RDF_TYPE_RDF_LIST])
1965               raptor_rdfxml_generate_statement(rdf_parser,
1966                                         NULL,
1967                                         idList,
1968                                         RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
1969                                         RAPTOR_URI_SOURCE_ID,
1970 
1971                                         RAPTOR_RDF_type_URI(rdf_xml_parser),
1972                                         NULL,
1973                                         RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1974                                         RAPTOR_URI_SOURCE_URI,
1975                                         0,
1976 
1977                                         collection_uri,
1978                                         NULL,
1979                                         RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1980                                         RAPTOR_URI_SOURCE_URI,
1981                                         NULL,
1982 
1983                                         NULL,
1984                                         element);
1985 
1986             collection_uri=(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_first_URI(rdf_xml_parser) : RAPTOR_RDF_first_URI(rdf_xml_parser);
1987 
1988             /* <idList> rdf:first <element->uri> */
1989             raptor_rdfxml_generate_statement(rdf_parser,
1990                                       NULL,
1991                                       idList,
1992                                       RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
1993                                       RAPTOR_URI_SOURCE_ID,
1994 
1995                                       collection_uri,
1996                                       NULL,
1997                                       RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1998                                       RAPTOR_URI_SOURCE_URI,
1999                                       0,
2000 
2001                                       element->subject.uri,
2002                                       element->subject.id,
2003                                       element->subject.type,
2004                                       element->subject.uri_source,
2005                                       NULL,
2006 
2007                                       NULL,
2008                                       NULL);
2009 
2010             /* If there is no rdf:parseType="Collection" */
2011             if (!element->parent->tail_id) {
2012               int len;
2013               unsigned char *new_id;
2014 
2015               /* Free any existing object URI still around
2016                * I suspect this can never happen.
2017                */
2018               if(element->parent->object.uri)
2019                 raptor_free_uri_v2(rdf_parser->world, element->parent->object.uri);
2020 
2021               len=strlen((char*)idList);
2022               new_id=(unsigned char*)RAPTOR_MALLOC(cstring, len+1);
2023               if(!len) {
2024                 if(new_id)
2025                   RAPTOR_FREE(cstring, new_id);
2026                 return;
2027               }
2028               strncpy((char*)new_id, (char*)idList, len+1);
2029 
2030               element->parent->object.id=new_id;
2031               element->parent->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
2032               element->parent->object.uri_source=RAPTOR_URI_SOURCE_ID;
2033             } else {
2034               collection_uri=(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_rest_URI(rdf_xml_parser) : RAPTOR_RDF_rest_URI(rdf_xml_parser);
2035               /* _:tail_id rdf:rest _:listRest */
2036               raptor_rdfxml_generate_statement(rdf_parser,
2037                                         NULL,
2038                                         element->parent->tail_id,
2039                                         RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
2040                                         RAPTOR_URI_SOURCE_ID,
2041 
2042                                         collection_uri,
2043                                         NULL,
2044                                         RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2045                                         RAPTOR_URI_SOURCE_URI,
2046                                         0,
2047 
2048                                         NULL,
2049                                         idList,
2050                                         RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
2051                                         RAPTOR_URI_SOURCE_ID,
2052                                         NULL,
2053 
2054                                         NULL,
2055                                         NULL);
2056             }
2057 
2058             /* update new tail */
2059             if(element->parent->tail_id)
2060               RAPTOR_FREE(cstring, (char*)element->parent->tail_id);
2061 
2062             element->parent->tail_id=idList;
2063 
2064           } else if(element->parent->state != RAPTOR_STATE_UNKNOWN &&
2065                     element->state != RAPTOR_STATE_PARSETYPE_RESOURCE) {
2066             /* If there is a parent element (property) containing this
2067              * element (node) and it has no object, set it from this subject
2068              */
2069 
2070             if(element->parent->object.uri) {
2071               raptor_rdfxml_update_document_locator(rdf_parser);
2072               raptor_parser_error(rdf_parser, "Tried to set multiple objects of a statement");
2073             } else {
2074               /* Store URI of this node in our parent as the property object */
2075               raptor_copy_identifier(&element->parent->object, &element->subject);
2076               element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2077             }
2078 
2079           }
2080         }
2081 
2082 
2083         /* If this is a node element, generate the rdf:type statement
2084          * from this node
2085          */
2086         if(state == RAPTOR_STATE_NODE_ELEMENT)
2087           raptor_rdfxml_generate_statement(rdf_parser,
2088                                     element->subject.uri,
2089                                     element->subject.id,
2090                                     element->subject.type,
2091                                     element->subject.uri_source,
2092 
2093                                     RAPTOR_RDF_type_URI(rdf_xml_parser),
2094                                     NULL,
2095                                     RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2096                                     RAPTOR_URI_SOURCE_URI,
2097                                     0,
2098 
2099                                     raptor_xml_element_get_name(xml_element)->uri,
2100                                     NULL,
2101                                     RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2102                                     element->object.uri_source,
2103                                     NULL,
2104 
2105                                     &element->reified,
2106                                     element);
2107 
2108         raptor_rdfxml_process_property_attributes(rdf_parser, element, element, NULL);
2109 
2110         /* for both productions now need some more content or
2111          * property elements before can do any more work.
2112          */
2113 
2114         element->child_state=RAPTOR_STATE_PROPERTYELT;
2115         element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
2116         finished=1;
2117         break;
2118 
2119 
2120       case RAPTOR_STATE_PARSETYPE_OTHER:
2121         /* FALLTHROUGH */
2122 
2123       case RAPTOR_STATE_PARSETYPE_LITERAL:
2124         raptor_xml_writer_start_element(rdf_xml_parser->xml_writer, xml_element);
2125         element->child_state = RAPTOR_STATE_PARSETYPE_LITERAL;
2126         element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2127 
2128         finished=1;
2129         break;
2130 
2131         /* Handle all the detail of the various options of property element
2132          *   http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
2133          *
2134          * All the attributes must be scanned here to see what additional
2135          * property element work is needed.  No triples are generated
2136          * until the end of this element, until it is clear if the
2137          * element was empty.
2138          */
2139       case RAPTOR_STATE_MEMBER_PROPERTYELT:
2140       case RAPTOR_STATE_PROPERTYELT:
2141 
2142         if(!raptor_xml_element_get_name(xml_element)->uri) {
2143           raptor_parser_error(rdf_parser, "Using property element '%s' without a namespace is forbidden.",
2144                               raptor_xml_element_get_name(element->parent->xml_element)->local_name);
2145           raptor_rdfxml_update_document_locator(rdf_parser);
2146           element->state=RAPTOR_STATE_SKIPPING;
2147           element->child_state=RAPTOR_STATE_SKIPPING;
2148           finished=1;
2149           break;
2150         }
2151 
2152         /* Handling rdf:li as a property, noting special processing */
2153         if(element_in_rdf_ns &&
2154            raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_li_URI(rdf_xml_parser))) {
2155           state=RAPTOR_STATE_MEMBER_PROPERTYELT;
2156         }
2157 
2158 
2159         if(element_in_rdf_ns &&
2160            (rc = raptor_rdfxml_forbidden_propertyElement_name((const char*)el_name))) {
2161           if(rc > 0) {
2162             raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a property element.", el_name);
2163             state=RAPTOR_STATE_SKIPPING;
2164             element->child_state=RAPTOR_STATE_SKIPPING;
2165             finished=1;
2166             break;
2167           } else
2168             raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name);
2169         }
2170 
2171 
2172         /* rdf:ID on a property element - reify a statement.
2173          * Allowed on all property element forms
2174          */
2175         if(element->rdf_attr[RDF_ATTR_ID]) {
2176           element->reified.id=element->rdf_attr[RDF_ATTR_ID];
2177           element->rdf_attr[RDF_ATTR_ID]=NULL;
2178           element->reified.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->reified.id);
2179           if(!element->reified.uri)
2180             goto oom;
2181           element->reified.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
2182           element->reified.uri_source=RAPTOR_URI_SOURCE_GENERATED;
2183 
2184           if(!raptor_valid_xml_ID(rdf_parser, element->reified.id)) {
2185             raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'", element->reified.id);
2186             state=RAPTOR_STATE_SKIPPING;
2187             element->child_state=RAPTOR_STATE_SKIPPING;
2188             finished=1;
2189             break;
2190           }
2191           if(raptor_rdfxml_record_ID(rdf_parser, element, element->reified.id)) {
2192             raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'", element->reified.id);
2193             state=RAPTOR_STATE_SKIPPING;
2194             element->child_state=RAPTOR_STATE_SKIPPING;
2195             finished=1;
2196             break;
2197           }
2198         }
2199 
2200         /* rdf:datatype on a property element.
2201          * Only allowed for
2202          *   http://www.w3.org/TR/rdf-syntax-grammar/#literalPropertyElt
2203          */
2204         if (element->rdf_attr[RDF_ATTR_datatype]) {
2205           element->object_literal_datatype=raptor_new_uri_relative_to_base_v2(rdf_parser->world, base_uri, (const unsigned char*)element->rdf_attr[RDF_ATTR_datatype]);
2206           RAPTOR_FREE(cstring, (void*)element->rdf_attr[RDF_ATTR_datatype]);
2207           element->rdf_attr[RDF_ATTR_datatype]=NULL;
2208           if(!element->object_literal_datatype)
2209             goto oom;
2210         }
2211 
2212         if(element->rdf_attr[RDF_ATTR_bagID]) {
2213 
2214           if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]) {
2215 
2216             if(element->rdf_attr[RDF_ATTR_resource] ||
2217                element->rdf_attr[RDF_ATTR_parseType]) {
2218 
2219               raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on property element '%s' with an rdf:resource or rdf:parseType attribute.", el_name);
2220               /* prevent this being used later either */
2221               element->rdf_attr[RDF_ATTR_bagID]=NULL;
2222             } else {
2223               element->bag.id=element->rdf_attr[RDF_ATTR_bagID];
2224               element->rdf_attr[RDF_ATTR_bagID]=NULL;
2225               element->bag.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->bag.id);
2226               if(!element->bag.uri)
2227                 goto oom;
2228               element->bag.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
2229               element->bag.uri_source=RAPTOR_URI_SOURCE_GENERATED;
2230 
2231               if(!raptor_valid_xml_ID(rdf_parser, element->bag.id)) {
2232                 raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'", element->bag.id);
2233                 state=RAPTOR_STATE_SKIPPING;
2234                 element->child_state=RAPTOR_STATE_SKIPPING;
2235                 finished=1;
2236                 break;
2237               }
2238               if(raptor_rdfxml_record_ID(rdf_parser, element, element->bag.id)) {
2239                 raptor_parser_error(rdf_parser, "Duplicated rdf:bagID value '%s'", element->bag.id);
2240                 state=RAPTOR_STATE_SKIPPING;
2241                 element->child_state=RAPTOR_STATE_SKIPPING;
2242                 finished=1;
2243                 break;
2244               }
2245 
2246               raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated.");
2247             }
2248           } else {
2249             /* bagID forbidden */
2250             raptor_parser_error(rdf_parser, "rdf:bagID is forbidden.");
2251             state=RAPTOR_STATE_SKIPPING;
2252             element->child_state=RAPTOR_STATE_SKIPPING;
2253             finished=1;
2254             break;
2255           }
2256         } /* if rdf:bagID on property element */
2257 
2258 
2259         element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT;
2260 
2261         if (element->rdf_attr[RDF_ATTR_parseType]) {
2262           const unsigned char *parse_type=element->rdf_attr[RDF_ATTR_parseType];
2263           int i;
2264           int is_parseType_Literal=0;
2265 
2266           if(raptor_rdfxml_element_has_property_attributes(element)) {
2267             raptor_parser_error(rdf_parser, "Property attributes cannot be used with rdf:parseType='%s'", parse_type);
2268             state=RAPTOR_STATE_SKIPPING;
2269             element->child_state=RAPTOR_STATE_SKIPPING;
2270             finished=1;
2271             break;
2272           }
2273 
2274           /* Check for bad combinations of things with parseType */
2275           for(i=0; i<= RDF_ATTR_LAST; i++)
2276             if(element->rdf_attr[i] && i != RDF_ATTR_parseType) {
2277               raptor_parser_error(rdf_parser, "Attribute '%s' cannot be used with rdf:parseType='%s'", rdf_syntax_terms_info[i].name, parse_type);
2278               state=RAPTOR_STATE_SKIPPING;
2279               element->child_state=RAPTOR_STATE_SKIPPING;
2280               finished=1;
2281               break;
2282             }
2283 
2284 
2285           if(!strcmp((char*)parse_type, "Literal"))
2286             is_parseType_Literal=1;
2287           else if (!strcmp((char*)parse_type, "Resource")) {
2288             state=RAPTOR_STATE_PARSETYPE_RESOURCE;
2289             element->child_state=RAPTOR_STATE_PROPERTYELT;
2290             element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
2291 
2292             /* create a node for the subject of the contained properties */
2293             element->subject.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
2294             if(!element->subject.id)
2295               goto oom;
2296             element->subject.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
2297             element->subject.uri_source=RAPTOR_URI_SOURCE_GENERATED;
2298           } else if(!strcmp((char*)parse_type, "Collection")) {
2299             /* An rdf:parseType="Collection" appears as a single node */
2300             element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2301             element->child_state=RAPTOR_STATE_PARSETYPE_COLLECTION;
2302             element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION;
2303           } else {
2304             if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_OTHER_PARSETYPES] &&
2305                !raptor_strcasecmp((char*)parse_type, "daml:collection")) {
2306                 /* A DAML collection appears as a single node */
2307                 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2308                 element->child_state=RAPTOR_STATE_PARSETYPE_COLLECTION;
2309                 element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION;
2310             } else {
2311               if(rdf_parser->features[RAPTOR_FEATURE_WARN_OTHER_PARSETYPES]) {
2312                 raptor_parser_warning(rdf_parser, "Unknown rdf:parseType value '%s' taken as 'Literal'", parse_type);
2313               }
2314               is_parseType_Literal=1;
2315             }
2316 
2317           }
2318 
2319           if(is_parseType_Literal) {
2320             /* rdf:parseType="Literal" - explicitly or default
2321              * if the parseType value is not recognised
2322              */
2323             rdf_xml_parser->xml_content=NULL;
2324             rdf_xml_parser->xml_content_length=0;
2325             rdf_xml_parser->iostream=raptor_new_iostream_to_string(&rdf_xml_parser->xml_content, &rdf_xml_parser->xml_content_length, raptor_alloc_memory);
2326             if(!rdf_xml_parser->iostream)
2327               goto oom;
2328             rdf_xml_parser->xml_writer=raptor_new_xml_writer_v2(rdf_parser->world,
2329                                                                 NULL,
2330                                                                 rdf_xml_parser->iostream,
2331                                                                 (raptor_simple_message_handler)raptor_parser_simple_error, rdf_parser,
2332                                                                 1);
2333             if(!rdf_xml_parser->xml_writer)
2334               goto oom;
2335 
2336             raptor_xml_writer_set_feature(rdf_xml_parser->xml_writer,
2337                                           RAPTOR_FEATURE_WRITER_XML_DECLARATION, 0);
2338 
2339             element->child_state=RAPTOR_STATE_PARSETYPE_LITERAL;
2340             element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2341             element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2342           }
2343         } else {
2344 
2345           /* Can only be the empty property element case
2346            *   http://www.w3.org/TR/rdf-syntax-grammar/#emptyPropertyElt
2347            */
2348 
2349           /* The presence of the rdf:resource or rdf:nodeID
2350            * attributes is checked at element close time
2351            */
2352 
2353           /*
2354            * Assign reified URI here so we don't reify property attributes
2355            * using this id
2356            */
2357           if(element->reified.id && !element->reified.uri) {
2358             element->reified.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->reified.id);
2359             if(!element->reified.uri)
2360               goto oom;
2361             element->reified.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
2362             element->reified.uri_source=RAPTOR_URI_SOURCE_GENERATED;
2363           }
2364 
2365           if(element->rdf_attr[RDF_ATTR_resource] ||
2366              element->rdf_attr[RDF_ATTR_nodeID]) {
2367             /* Done - wait for end of this element to end in order to
2368              * check the element was empty as expected */
2369             element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2370           } else {
2371             /* Otherwise process content in obj (value) state */
2372             element->child_state=RAPTOR_STATE_NODE_ELEMENT_LIST;
2373             element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT;
2374           }
2375         }
2376 
2377         finished=1;
2378 
2379         break;
2380 
2381 
2382       case RAPTOR_STATE_INVALID:
2383       default:
2384         raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_start_element_grammar: Unexpected parser state %d - %s", state, raptor_rdfxml_state_as_string(state));
2385         finished=1;
2386 
2387     } /* end switch */
2388 
2389     if(state != element->state) {
2390       element->state=state;
2391 #ifdef RAPTOR_DEBUG_VERBOSE
2392       RAPTOR_DEBUG3("Moved to state %d - %s\n", state, raptor_rdfxml_state_as_string(state));
2393 #endif
2394     }
2395 
2396   } /* end while */
2397 
2398 #ifdef RAPTOR_DEBUG_VERBOSE
2399   RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
2400 #endif
2401 
2402   return;
2403 
2404   oom:
2405   raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping");
2406   element->state=RAPTOR_STATE_SKIPPING;
2407 }
2408 
2409 
2410 static void
raptor_rdfxml_end_element_grammar(raptor_parser * rdf_parser,raptor_rdfxml_element * element)2411 raptor_rdfxml_end_element_grammar(raptor_parser *rdf_parser,
2412                                   raptor_rdfxml_element *element)
2413 {
2414   raptor_state state;
2415   int finished;
2416   raptor_xml_element* xml_element=element->xml_element;
2417   const unsigned char *el_name=raptor_xml_element_get_name(xml_element)->local_name;
2418   int element_in_rdf_ns=(raptor_xml_element_get_name(xml_element)->nspace &&
2419                          raptor_xml_element_get_name(xml_element)->nspace->is_rdf_ms);
2420   raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
2421 
2422 
2423   state=element->state;
2424 #ifdef RAPTOR_DEBUG_VERBOSE
2425   RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state));
2426 #endif
2427 
2428   finished= 0;
2429   while(!finished) {
2430     switch(state) {
2431       case RAPTOR_STATE_SKIPPING:
2432         finished=1;
2433         break;
2434 
2435       case RAPTOR_STATE_UNKNOWN:
2436         finished=1;
2437         break;
2438 
2439       case RAPTOR_STATE_NODE_ELEMENT_LIST:
2440         if(element_in_rdf_ns &&
2441            raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_RDF_URI(rdf_xml_parser))) {
2442           /* end of RDF - boo hoo */
2443           state=RAPTOR_STATE_UNKNOWN;
2444           finished=1;
2445           break;
2446         }
2447         /* When scanning, another element ending is outside the RDF
2448          * world so this can happen without further work
2449          */
2450         if(rdf_parser->features[RAPTOR_FEATURE_SCANNING]) {
2451           state=RAPTOR_STATE_UNKNOWN;
2452           finished=1;
2453           break;
2454         }
2455         /* otherwise found some junk after RDF content in an RDF-only
2456          * document (probably never get here since this would be
2457          * a mismatched XML tag and cause an error earlier)
2458          */
2459         raptor_rdfxml_update_document_locator(rdf_parser);
2460         raptor_parser_warning(rdf_parser, "Element '%s' ended, expected end of RDF element", el_name);
2461         state=RAPTOR_STATE_UNKNOWN;
2462         finished=1;
2463         break;
2464 
2465 
2466       case RAPTOR_STATE_DESCRIPTION:
2467       case RAPTOR_STATE_NODE_ELEMENT:
2468       case RAPTOR_STATE_PARSETYPE_RESOURCE:
2469 
2470         /* If there is a parent element containing this element and
2471          * the parent isn't a description, has an identifier,
2472          * create the statement between this node using parent property
2473          * (Need to check for identifier so that top-level typed nodes
2474          * don't get connect to <rdf:RDF> parent element)
2475          */
2476         if(state == RAPTOR_STATE_NODE_ELEMENT &&
2477            element->parent &&
2478            (element->parent->subject.uri || element->parent->subject.id))
2479           raptor_rdfxml_generate_statement(rdf_parser,
2480                                     element->parent->subject.uri,
2481                                     element->parent->subject.id,
2482                                     element->parent->subject.type,
2483                                     element->parent->subject.uri_source,
2484 
2485                                     raptor_xml_element_get_name(element->parent->xml_element)->uri,
2486                                     NULL,
2487                                     RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2488                                     RAPTOR_URI_SOURCE_ELEMENT,
2489                                     0,
2490 
2491                                     element->subject.uri,
2492                                     element->subject.id,
2493                                     element->subject.type,
2494                                     element->subject.uri_source,
2495                                     NULL,
2496 
2497                                     NULL,
2498                                     element);
2499         else if(state == RAPTOR_STATE_PARSETYPE_RESOURCE &&
2500                 element->parent &&
2501                 (element->parent->subject.uri || element->parent->subject.id)) {
2502           /* Handle rdf:li as the rdf:parseType="resource" property */
2503           if(element_in_rdf_ns &&
2504              raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_li_URI(rdf_xml_parser))) {
2505             element->parent->last_ordinal++;
2506             raptor_rdfxml_generate_statement(rdf_parser,
2507                                       element->parent->subject.uri,
2508                                       element->parent->subject.id,
2509                                       element->parent->subject.type,
2510                                       element->parent->subject.uri_source,
2511 
2512                                       NULL,
2513                                       NULL,
2514                                       RAPTOR_IDENTIFIER_TYPE_ORDINAL,
2515                                       RAPTOR_URI_SOURCE_NOT_URI,
2516                                       element->parent->last_ordinal,
2517 
2518                                       element->subject.uri,
2519                                       element->subject.id,
2520                                       element->subject.type,
2521                                       element->subject.uri_source,
2522                                       NULL,
2523 
2524                                       &element->reified,
2525                                       element->parent);
2526           } else {
2527             raptor_rdfxml_generate_statement(rdf_parser,
2528                                       element->parent->subject.uri,
2529                                       element->parent->subject.id,
2530                                       element->parent->subject.type,
2531                                       element->parent->subject.uri_source,
2532 
2533                                       raptor_xml_element_get_name(xml_element)->uri,
2534                                       NULL,
2535                                       RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2536                                       RAPTOR_URI_SOURCE_ELEMENT,
2537                                       0,
2538 
2539                                       element->subject.uri,
2540                                       element->subject.id,
2541                                       element->subject.type,
2542                                       element->subject.uri_source,
2543                                       NULL,
2544 
2545                                       &element->reified,
2546                                       element->parent);
2547           }
2548         }
2549         finished=1;
2550         break;
2551 
2552       case RAPTOR_STATE_PARSETYPE_COLLECTION:
2553 
2554         finished=1;
2555         break;
2556 
2557       case RAPTOR_STATE_PARSETYPE_OTHER:
2558         /* FALLTHROUGH */
2559 
2560       case RAPTOR_STATE_PARSETYPE_LITERAL:
2561         element->parent->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2562 
2563         raptor_xml_writer_end_element(rdf_xml_parser->xml_writer, xml_element);
2564 
2565         finished=1;
2566         break;
2567 
2568 
2569       case RAPTOR_STATE_PROPERTYELT:
2570       case RAPTOR_STATE_MEMBER_PROPERTYELT:
2571         /* A property element
2572          *   http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
2573          *
2574          * Literal content part is handled here.
2575          * The element content is handled in the internal states
2576          * Empty content is checked here.
2577          */
2578 
2579         if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) {
2580           if(xml_element->content_cdata_seen)
2581             element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
2582           else if (xml_element->content_element_seen)
2583             element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
2584           else { /* Empty Literal */
2585             element->object.type= RAPTOR_IDENTIFIER_TYPE_LITERAL;
2586             element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
2587           }
2588 
2589         }
2590 
2591 
2592         /* Handle terminating a rdf:parseType="Collection" list */
2593         if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION ||
2594            element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
2595           raptor_uri* nil_uri=(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_nil_URI(rdf_xml_parser) : RAPTOR_RDF_nil_URI(rdf_xml_parser);
2596           if (!element->tail_id) {
2597             /* If No List: set object of statement to rdf:nil */
2598             element->object.uri= raptor_uri_copy_v2(rdf_parser->world, nil_uri);
2599             element->object.id= NULL;
2600             element->object.type= RAPTOR_IDENTIFIER_TYPE_RESOURCE;
2601             element->object.uri_source= RAPTOR_URI_SOURCE_URI;
2602           } else {
2603             raptor_uri* rest_uri=(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_rest_URI(rdf_xml_parser) : RAPTOR_RDF_rest_URI(rdf_xml_parser);
2604             /* terminate the list */
2605             raptor_rdfxml_generate_statement(rdf_parser,
2606                                       NULL,
2607                                       element->tail_id,
2608                                       RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
2609                                       RAPTOR_URI_SOURCE_ID,
2610 
2611                                       rest_uri,
2612                                       NULL,
2613                                       RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2614                                       RAPTOR_URI_SOURCE_URI,
2615                                       0,
2616 
2617                                       nil_uri,
2618                                       NULL,
2619                                       RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2620                                       RAPTOR_URI_SOURCE_URI,
2621                                       NULL,
2622 
2623                                       NULL,
2624                                       NULL);
2625           }
2626 
2627         } /* end rdf:parseType="Collection" termination */
2628 
2629 
2630 #ifdef RAPTOR_DEBUG_VERBOSE
2631         RAPTOR_DEBUG3("Content type %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
2632 #endif
2633 
2634         switch(element->content_type) {
2635           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE:
2636 
2637             if(raptor_rdfxml_element_has_property_attributes(element) &&
2638                element->child_state == RAPTOR_STATE_DESCRIPTION) {
2639               raptor_parser_error(rdf_parser, "Property element '%s' has both property attributes and a node element content", el_name);
2640               state=RAPTOR_STATE_SKIPPING;
2641               element->child_state=RAPTOR_STATE_SKIPPING;
2642               finished=1;
2643               break;
2644             }
2645 
2646             if(element->object.type == RAPTOR_IDENTIFIER_TYPE_UNKNOWN) {
2647               if(element->rdf_attr[RDF_ATTR_resource]) {
2648                 element->object.uri=raptor_new_uri_relative_to_base_v2(rdf_parser->world,
2649                                                                        raptor_rdfxml_inscope_base_uri(rdf_parser),
2650                                                                        (const unsigned char*)element->rdf_attr[RDF_ATTR_resource]);
2651                 RAPTOR_FREE(cstring, (void*)element->rdf_attr[RDF_ATTR_resource]);
2652                 element->rdf_attr[RDF_ATTR_resource]=NULL;
2653                 if(!element->object.uri)
2654                   goto oom;
2655                 element->object.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
2656                 element->object.uri_source=RAPTOR_URI_SOURCE_URI;
2657                 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2658               } else if(element->rdf_attr[RDF_ATTR_nodeID]) {
2659                 element->object.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, (unsigned char*)element->rdf_attr[RDF_ATTR_nodeID]);
2660                 element->rdf_attr[RDF_ATTR_nodeID]=NULL;
2661                 if(!element->object.id)
2662                   goto oom;
2663                 element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
2664                 element->object.uri_source=RAPTOR_URI_SOURCE_BLANK_ID;
2665                 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2666                 if(!raptor_valid_xml_ID(rdf_parser, element->object.id)) {
2667                   raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", element->object.id);
2668                   state=RAPTOR_STATE_SKIPPING;
2669                   element->child_state=RAPTOR_STATE_SKIPPING;
2670                   finished=1;
2671                   break;
2672                 }
2673               } else {
2674                 element->object.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
2675                 if(!element->object.id)
2676                   goto oom;
2677                 element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
2678                 element->object.uri_source=RAPTOR_URI_SOURCE_GENERATED;
2679                 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2680               }
2681 
2682               raptor_rdfxml_process_property_attributes(rdf_parser, element,
2683                                                         element->parent,
2684                                                         &element->object);
2685 
2686             }
2687 
2688             /* We know object is a resource, so delete any unsignficant
2689              * whitespace so that FALLTHROUGH code below finds the object.
2690              */
2691             if(xml_element->content_cdata_length) {
2692               raptor_free_stringbuffer(xml_element->content_cdata_sb);
2693               xml_element->content_cdata_sb=NULL;
2694               xml_element->content_cdata_length=0;
2695             }
2696 
2697             /* FALLTHROUGH */
2698           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL:
2699 
2700             if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) {
2701 
2702               if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]) {
2703                 /* Only an empty literal can have a rdf:bagID */
2704                 if(element->bag.uri || element->bag.id) {
2705                   if(xml_element->content_cdata_length > 0) {
2706                     raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on a literal property element '%s'.", el_name);
2707                     /* prevent this being used later either */
2708                     element->rdf_attr[RDF_ATTR_bagID]=NULL;
2709                   } else
2710                     raptor_rdfxml_generate_statement(rdf_parser,
2711                                               element->bag.uri,
2712                                               element->bag.id,
2713                                               element->bag.type,
2714                                               element->bag.uri_source,
2715 
2716                                               RAPTOR_RDF_type_URI(rdf_xml_parser),
2717                                               NULL,
2718                                               RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2719                                               RAPTOR_URI_SOURCE_URI,
2720                                               0,
2721 
2722                                               RAPTOR_RDF_Bag_URI(rdf_xml_parser),
2723                                               NULL,
2724                                               RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2725                                               RAPTOR_URI_SOURCE_NOT_URI,
2726                                               NULL,
2727 
2728                                               NULL,
2729                                               NULL);
2730                 }
2731               } /* if rdf:bagID */
2732 
2733               /* If there is empty literal content with properties
2734                * generate a node to hang properties off
2735                */
2736               if(raptor_rdfxml_element_has_property_attributes(element) &&
2737                  xml_element->content_cdata_length > 0) {
2738                 raptor_parser_error(rdf_parser, "Literal property element '%s' has property attributes", el_name);
2739                 state=RAPTOR_STATE_SKIPPING;
2740                 element->child_state=RAPTOR_STATE_SKIPPING;
2741                 finished=1;
2742                 break;
2743               }
2744 
2745               if(element->object.type == RAPTOR_IDENTIFIER_TYPE_LITERAL &&
2746                  raptor_rdfxml_element_has_property_attributes(element) &&
2747                  !element->object.uri) {
2748                 element->object.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
2749                 if(!element->object.id)
2750                   goto oom;
2751                 element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
2752                 element->object.uri_source=RAPTOR_URI_SOURCE_GENERATED;
2753                 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2754               }
2755 
2756               raptor_rdfxml_process_property_attributes(rdf_parser, element,
2757                                                         element,
2758                                                         &element->object);
2759             }
2760 
2761 
2762             /* just be friendly to older compilers and don't declare
2763              * variables in the middle of a block
2764              */
2765             if(1) {
2766               raptor_uri *predicate_uri=NULL;
2767               raptor_identifier_type predicate_type;
2768               int predicate_ordinal=0;
2769               raptor_uri *object_uri;
2770               raptor_identifier_type object_type;
2771               raptor_uri *literal_datatype=NULL;
2772               const unsigned char* empty_literal=(const unsigned char*)"";
2773 
2774               if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) {
2775                 element->parent->last_ordinal++;
2776                 predicate_ordinal=element->parent->last_ordinal;
2777                 predicate_type=RAPTOR_IDENTIFIER_TYPE_ORDINAL;
2778 
2779               } else {
2780                 predicate_uri=raptor_xml_element_get_name(xml_element)->uri;
2781                 predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
2782               }
2783 
2784 
2785               if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) {
2786                 unsigned char* literal;
2787 
2788                 object_type=RAPTOR_IDENTIFIER_TYPE_LITERAL;
2789                 literal=raptor_stringbuffer_as_string(xml_element->content_cdata_sb);
2790                 literal_datatype=element->object_literal_datatype;
2791 
2792                 if(!literal_datatype && literal &&
2793                    !raptor_utf8_is_nfc(literal, xml_element->content_cdata_length)) {
2794                   const char *message="Property element '%s' has a string not in Unicode Normal Form C: %s";
2795                   raptor_rdfxml_update_document_locator(rdf_parser);
2796                   if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
2797                     raptor_parser_error(rdf_parser, message, el_name, literal);
2798                   else
2799                     raptor_parser_warning(rdf_parser, message, el_name, literal);
2800                 }
2801 
2802                 if(!literal)
2803                   /* empty literal */
2804                   literal=(unsigned char*)empty_literal;
2805 
2806                 object_uri=(raptor_uri*)literal;
2807               } else {
2808                 object_type=element->object.type;
2809                 object_uri=element->object.uri;
2810               }
2811 
2812               raptor_rdfxml_generate_statement(rdf_parser,
2813                                         element->parent->subject.uri,
2814                                         element->parent->subject.id,
2815                                         element->parent->subject.type,
2816                                         RAPTOR_URI_SOURCE_ELEMENT,
2817 
2818                                         predicate_uri,
2819                                         NULL,
2820                                         predicate_type,
2821                                         RAPTOR_URI_SOURCE_NOT_URI,
2822                                         predicate_ordinal,
2823 
2824                                         object_uri,
2825                                         element->object.id,
2826                                         object_type,
2827                                         element->object.uri_source,
2828                                         literal_datatype,
2829 
2830                                         &element->reified,
2831                                         element->parent);
2832 
2833             }
2834 
2835             break;
2836 
2837         case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED:
2838         case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL:
2839             {
2840               unsigned char *buffer;
2841               unsigned int length;
2842 
2843               if(rdf_xml_parser->xml_writer) {
2844                 raptor_xml_writer_flush(rdf_xml_parser->xml_writer);
2845 
2846                 raptor_free_iostream(rdf_xml_parser->iostream);
2847                 rdf_xml_parser->iostream=NULL;
2848 
2849                 buffer=(unsigned char*)rdf_xml_parser->xml_content;
2850                 length=rdf_xml_parser->xml_content_length;
2851               } else {
2852                 buffer=raptor_stringbuffer_as_string(xml_element->content_cdata_sb);
2853                 length=xml_element->content_cdata_length;
2854               }
2855 
2856               if(!raptor_utf8_is_nfc(buffer, length)) {
2857                 const char *message="Property element '%s' has XML literal content not in Unicode Normal Form C: %s";
2858                 raptor_rdfxml_update_document_locator(rdf_parser);
2859                 if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
2860                   raptor_parser_error(rdf_parser, message, el_name, buffer);
2861                 else
2862                   raptor_parser_warning(rdf_parser, message, el_name, buffer);
2863               }
2864 
2865 
2866               if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) {
2867                 element->parent->last_ordinal++;
2868                 raptor_rdfxml_generate_statement(rdf_parser,
2869                                           element->parent->subject.uri,
2870                                           element->parent->subject.id,
2871                                           element->parent->subject.type,
2872                                           element->parent->subject.uri_source,
2873 
2874                                           NULL,
2875                                           NULL,
2876                                           RAPTOR_IDENTIFIER_TYPE_ORDINAL,
2877                                           RAPTOR_URI_SOURCE_NOT_URI,
2878                                           element->parent->last_ordinal,
2879 
2880                                           (raptor_uri*)buffer,
2881                                           NULL,
2882                                           RAPTOR_IDENTIFIER_TYPE_LITERAL,
2883                                           RAPTOR_URI_SOURCE_NOT_URI,
2884                                           RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser),
2885 
2886                                           &element->reified,
2887                                           element->parent);
2888               } else {
2889                 raptor_rdfxml_generate_statement(rdf_parser,
2890                                           element->parent->subject.uri,
2891                                           element->parent->subject.id,
2892                                           element->parent->subject.type,
2893                                           element->parent->subject.uri_source,
2894 
2895                                           raptor_xml_element_get_name(xml_element)->uri,
2896                                           NULL,
2897                                           RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2898                                           RAPTOR_URI_SOURCE_ELEMENT,
2899                                           0,
2900 
2901                                           (raptor_uri*)buffer,
2902                                           NULL,
2903                                           RAPTOR_IDENTIFIER_TYPE_LITERAL,
2904                                           RAPTOR_URI_SOURCE_NOT_URI,
2905                                           RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser),
2906 
2907                                           &element->reified,
2908                                           element->parent);
2909               }
2910 
2911               /* Finish the xml writer iostream for parseType="Literal" */
2912               if(rdf_xml_parser->xml_writer) {
2913                 raptor_free_xml_writer(rdf_xml_parser->xml_writer);
2914                 RAPTOR_FREE(cstring, rdf_xml_parser->xml_content);
2915                 rdf_xml_parser->xml_content=NULL;
2916                 rdf_xml_parser->xml_content_length=0;
2917               }
2918             }
2919 
2920           break;
2921 
2922           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION:
2923           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION:
2924 
2925           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES:
2926           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES:
2927           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT:
2928 
2929           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN:
2930           case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST:
2931           default:
2932             raptor_parser_fatal_error(rdf_parser, "%s: Internal error in state RAPTOR_STATE_PROPERTYELT - got unexpected content type %s (%d)", __func__, raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
2933         } /* end switch */
2934 
2935       finished=1;
2936       break;
2937 
2938       case RAPTOR_STATE_INVALID:
2939       default:
2940         raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_end_element_grammar: Unexpected parser state %d - %s", state, raptor_rdfxml_state_as_string(state));
2941         finished=1;
2942 
2943     } /* end switch */
2944 
2945     if(state != element->state) {
2946       element->state=state;
2947 #ifdef RAPTOR_DEBUG_VERBOSE
2948       RAPTOR_DEBUG3("Moved to state %d - %s\n", state, raptor_rdfxml_state_as_string(state));
2949 #endif
2950     }
2951 
2952   } /* end while */
2953 
2954 #ifdef RAPTOR_DEBUG_VERBOSE
2955   RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
2956 #endif
2957 
2958   return;
2959 
2960   oom:
2961   raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping");
2962   element->state=RAPTOR_STATE_SKIPPING;
2963 }
2964 
2965 
2966 
2967 static void
raptor_rdfxml_cdata_grammar(raptor_parser * rdf_parser,const unsigned char * s,int len,int is_cdata)2968 raptor_rdfxml_cdata_grammar(raptor_parser *rdf_parser,
2969                             const unsigned char *s, int len,
2970                             int is_cdata)
2971 {
2972   raptor_rdfxml_parser* rdf_xml_parser;
2973   raptor_rdfxml_element* element;
2974   raptor_xml_element* xml_element;
2975   raptor_state state;
2976   int all_whitespace=1;
2977   int i;
2978 
2979   rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
2980 
2981   if(rdf_parser->failed)
2982     return;
2983 
2984 #ifdef RAPTOR_DEBUG_CDATA
2985   RAPTOR_DEBUG2("Adding characters (is_cdata=%d): '", is_cdata);
2986   (void)fwrite(s, 1, len, stderr);
2987   fprintf(stderr, "' (%d bytes)\n", len);
2988 #endif
2989 
2990   for(i=0; i<len; i++)
2991     if(!isspace(s[i])) {
2992       all_whitespace=0;
2993       break;
2994     }
2995 
2996   element=rdf_xml_parser->current_element;
2997 
2998   /* this file is very broke - probably not XML, whatever */
2999   if(!element)
3000     return;
3001 
3002   xml_element=element->xml_element;
3003 
3004   raptor_rdfxml_update_document_locator(rdf_parser);
3005 
3006   /* cdata never changes the parser state
3007    * and the containing element state always determines what to do.
3008    * Use the child_state first if there is one, since that applies
3009    */
3010   state=element->child_state;
3011 #ifdef RAPTOR_DEBUG_VERBOSE
3012   RAPTOR_DEBUG2("Working in state %s\n", raptor_rdfxml_state_as_string(state));
3013 #endif
3014 
3015 
3016 #ifdef RAPTOR_DEBUG_VERBOSE
3017   RAPTOR_DEBUG3("Content type %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
3018 #endif
3019 
3020 
3021 
3022   if(state == RAPTOR_STATE_SKIPPING)
3023     return;
3024 
3025   if(state == RAPTOR_STATE_UNKNOWN) {
3026     /* Ignore all cdata if still looking for RDF */
3027     if(rdf_parser->features[RAPTOR_FEATURE_SCANNING])
3028       return;
3029 
3030     /* Ignore all whitespace cdata before first element */
3031     if(all_whitespace)
3032       return;
3033 
3034     /* This probably will never happen since that would make the
3035      * XML not be well-formed
3036      */
3037     raptor_parser_warning(rdf_parser, "Character data before RDF element.");
3038   }
3039 
3040 
3041   if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES) {
3042     /* If found non-whitespace content, move to literal content */
3043     if(!all_whitespace)
3044       element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
3045   }
3046 
3047 
3048   if(!rdf_content_type_info[element->child_content_type].whitespace_significant) {
3049 
3050     /* Whitespace is ignored except for literal or preserved content types */
3051     if(all_whitespace) {
3052 #ifdef RAPTOR_DEBUG_CDATA
3053       RAPTOR_DEBUG2("Ignoring whitespace cdata inside element '%s'\n", raptor_xml_element_get_name(element->parent->xml_element)->local_name);
3054 #endif
3055       return;
3056     }
3057 
3058     if(xml_element->content_cdata_seen && xml_element->content_element_seen) {
3059       /* Uh oh - mixed content, this element has elements too */
3060       raptor_parser_warning(rdf_parser, "element '%s' has mixed content.",
3061                             raptor_xml_element_get_name(element->parent->xml_element)->local_name);
3062     }
3063   }
3064 
3065 
3066   if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) {
3067     element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
3068 #ifdef RAPTOR_DEBUG_VERBOSE
3069     RAPTOR_DEBUG3("Content type changed to %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
3070 #endif
3071   }
3072 
3073   if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL)
3074     raptor_xml_writer_cdata_counted(rdf_xml_parser->xml_writer, s, len);
3075   else {
3076     raptor_stringbuffer_append_counted_string(xml_element->content_cdata_sb,
3077                                               s, len, 1);
3078     element->content_cdata_all_whitespace &= all_whitespace;
3079 
3080     /* adjust stored length */
3081     xml_element->content_cdata_length += len;
3082   }
3083 
3084 
3085 #ifdef RAPTOR_DEBUG_CDATA
3086   RAPTOR_DEBUG3("Content cdata now: %d bytes\n", xml_element->content_cdata_length);
3087 #endif
3088 #ifdef RAPTOR_DEBUG_VERBOSE
3089   RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
3090 #endif
3091 }
3092 
3093 
3094 
3095 /**
3096  * raptor_rdfxml_inscope_base_uri:
3097  * @rdf_parser: Raptor parser object
3098  *
3099  * Return the in-scope base URI.
3100  *
3101  * Looks for the innermost xml:base on an element or document URI
3102  *
3103  * Return value: The URI string value or NULL on failure.
3104  **/
3105 static raptor_uri*
raptor_rdfxml_inscope_base_uri(raptor_parser * rdf_parser)3106 raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser)
3107 {
3108   raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
3109   raptor_uri* base_uri;
3110 
3111   base_uri=raptor_sax2_inscope_base_uri(rdf_xml_parser->sax2);
3112   if(!base_uri)
3113     base_uri=rdf_parser->base_uri;
3114 
3115   return base_uri;
3116 }
3117 
3118 
3119 /**
3120  * raptor_rdfxml_record_ID:
3121  * @rdf_parser: Raptor parser object
3122  * @element: Current element
3123  * @id: ID string
3124  *
3125  * Record an rdf:ID / rdf:bagID value (with xml base) and check it hasn't been seen already.
3126  *
3127  * Record and check the ID values, if they have been seen already.
3128  * per in-scope-base URI.
3129  *
3130  * Return value: non-zero if already seen, or failure
3131  **/
3132 static int
raptor_rdfxml_record_ID(raptor_parser * rdf_parser,raptor_rdfxml_element * element,const unsigned char * id)3133 raptor_rdfxml_record_ID(raptor_parser *rdf_parser,
3134                         raptor_rdfxml_element *element,
3135                         const unsigned char *id)
3136 {
3137   raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
3138   raptor_uri* base_uri=raptor_rdfxml_inscope_base_uri(rdf_parser);
3139   size_t id_len=strlen((const char*)id);
3140   int rc;
3141 
3142   if(!rdf_parser->features[RAPTOR_FEATURE_CHECK_RDF_ID])
3143     return 0;
3144 
3145   rc=raptor_id_set_add(rdf_xml_parser->id_set, base_uri, id, id_len);
3146 
3147   return (rc != 0);
3148 }
3149 
3150 
3151 
3152 static void
raptor_rdfxml_update_document_locator(raptor_parser * rdf_parser)3153 raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser)
3154 {
3155   raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
3156   raptor_sax2_update_document_locator(rdf_xml_parser->sax2,
3157                                       &rdf_parser->locator);
3158 }
3159 
3160 
3161 
3162 static void
raptor_rdfxml_parse_finish_factory(raptor_parser_factory * factory)3163 raptor_rdfxml_parse_finish_factory(raptor_parser_factory* factory)
3164 {
3165 }
3166 
3167 
3168 static int
raptor_rdfxml_parser_register_factory(raptor_parser_factory * factory)3169 raptor_rdfxml_parser_register_factory(raptor_parser_factory *factory)
3170 {
3171   int rc=0;
3172 
3173   factory->context_length     = sizeof(raptor_rdfxml_parser);
3174 
3175   factory->need_base_uri = 1;
3176 
3177   factory->init      = raptor_rdfxml_parse_init;
3178   factory->terminate = raptor_rdfxml_parse_terminate;
3179   factory->start     = raptor_rdfxml_parse_start;
3180   factory->chunk     = raptor_rdfxml_parse_chunk;
3181   factory->finish_factory = raptor_rdfxml_parse_finish_factory;
3182   factory->recognise_syntax = raptor_rdfxml_parse_recognise_syntax;
3183 
3184   rc+= raptor_parser_factory_add_alias(factory, "raptor") != 0;
3185 
3186   rc+= raptor_parser_factory_add_uri(factory,
3187                                      (const unsigned char*)"http://www.w3.org/TR/rdf-syntax-grammar") != 0;
3188 
3189   rc+= raptor_parser_factory_add_mime_type(factory, "application/rdf+xml", 10) != 0;
3190   rc+= raptor_parser_factory_add_mime_type(factory, "text/rdf", 6) != 0;
3191 
3192   return rc;
3193 }
3194 
3195 
3196 int
raptor_init_parser_rdfxml(raptor_world * world)3197 raptor_init_parser_rdfxml(raptor_world* world)
3198 {
3199   return !raptor_parser_register_factory(world, "rdfxml", "RDF/XML",
3200                                          &raptor_rdfxml_parser_register_factory);
3201 }
3202 
3203 
3204 #if RAPTOR_DEBUG > 1
3205 void
raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser * rdf_xml_parser,FILE * stream)3206 raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser* rdf_xml_parser,
3207                                  FILE *stream)
3208 {
3209   fputs("rdf:ID set ", stream);
3210   raptor_id_set_stats_print(rdf_xml_parser->id_set, stream);
3211 }
3212 #endif
3213