1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * raptor_rdfxml.c - Raptor RDF/XML Parser
4 *
5 * Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/
6 * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
7 *
8 * This package is Free Software and part of Redland http://librdf.org/
9 *
10 * It is licensed under the following three licenses as alternatives:
11 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12 * 2. GNU General Public License (GPL) V2 or any newer version
13 * 3. Apache License, V2.0 or any newer version
14 *
15 * You may not use this file except in compliance with at least one of
16 * the above three licenses.
17 *
18 * See LICENSE.html or LICENSE.txt at the top of this package for the
19 * complete terms and further detail along with the license texts for
20 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21 *
22 *
23 */
24
25
26 #ifdef HAVE_CONFIG_H
27 #include <raptor_config.h>
28 #endif
29
30 #ifdef WIN32
31 #include <win32_raptor_config.h>
32 #endif
33
34
35 #include <stdio.h>
36 #include <string.h>
37 #include <ctype.h>
38 #include <stdarg.h>
39 #ifdef HAVE_ERRNO_H
40 #include <errno.h>
41 #endif
42 #ifdef HAVE_STDLIB_H
43 #include <stdlib.h>
44 #endif
45
46 /* Raptor includes */
47 #include "raptor.h"
48 #include "raptor_internal.h"
49
50
51 /* Define these for far too much output */
52 #undef RAPTOR_DEBUG_VERBOSE
53 #undef RAPTOR_DEBUG_CDATA
54
55
56 /* Raptor structures */
57
58 typedef enum {
59 /* Catch uninitialised state */
60 RAPTOR_STATE_INVALID = 0,
61
62 /* Skipping current tree of elements - used to recover finding
63 * illegal content, when parsling permissively.
64 */
65 RAPTOR_STATE_SKIPPING,
66
67 /* Not in RDF grammar yet - searching for a start element.
68 *
69 * This can be <rdf:RDF> (goto NODE_ELEMENT_LIST) but since it is optional,
70 * the start element can also be one of
71 * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementURIs
72 *
73 * If RDF content is assumed, go straight to OBJ
74 */
75 RAPTOR_STATE_UNKNOWN,
76
77 /* A list of node elements
78 * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList
79 */
80 RAPTOR_STATE_NODE_ELEMENT_LIST,
81
82 /* Found an <rdf:Description> */
83 RAPTOR_STATE_DESCRIPTION,
84
85 /* Found a property element
86 * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
87 */
88 RAPTOR_STATE_PROPERTYELT,
89
90 /* A property element that is an ordinal - rdf:li, rdf:_n
91 */
92 RAPTOR_STATE_MEMBER_PROPERTYELT,
93
94 /* Found a node element
95 * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement
96 */
97 RAPTOR_STATE_NODE_ELEMENT,
98
99 /* A property element with rdf:parseType="Literal"
100 * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeLiteralPropertyElt
101 */
102 RAPTOR_STATE_PARSETYPE_LITERAL,
103
104 /* A property element with rdf:parseType="Resource"
105 * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt
106 */
107 RAPTOR_STATE_PARSETYPE_RESOURCE,
108
109 /* A property element with rdf:parseType="Collection"
110 * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt
111 *
112 * (This also handles daml:Collection)
113 */
114 RAPTOR_STATE_PARSETYPE_COLLECTION,
115
116 /* A property element with a rdf:parseType attribute and a value
117 * not "Literal" or "Resource"
118 * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt
119 */
120 RAPTOR_STATE_PARSETYPE_OTHER,
121
122 RAPTOR_STATE_PARSETYPE_LAST = RAPTOR_STATE_PARSETYPE_OTHER
123
124
125 } raptor_state;
126
127
128 static const char * const raptor_state_names[RAPTOR_STATE_PARSETYPE_LAST+2]={
129 "INVALID",
130 "SKIPPING",
131 "UNKNOWN",
132 "nodeElementList",
133 "propertyElt",
134 "Description",
135 "propertyElt",
136 "memberPropertyElt",
137 "nodeElement",
138 "parseTypeLiteral",
139 "parseTypeResource",
140 "parseTypeCollection",
141 "parseTypeOther"
142 };
143
144
raptor_rdfxml_state_as_string(raptor_state state)145 static const char * raptor_rdfxml_state_as_string(raptor_state state)
146 {
147 if(state<1 || state > RAPTOR_STATE_PARSETYPE_LAST)
148 state=(raptor_state)0;
149 return raptor_state_names[(int)state];
150 }
151
152
153 /*
154 * RDF/XML syntax terms, properties and classes.
155 * Must match names in rdf_syntax_terms_info below.
156 */
157 typedef enum {
158 RDF_ATTR_RDF = 0,
159 RDF_ATTR_Description = 1,
160 RDF_ATTR_li = 2,
161 RDF_ATTR_about = 3, /* value of rdf:about attribute */
162 RDF_ATTR_aboutEach = 4, /* " rdf:aboutEach */
163 RDF_ATTR_aboutEachPrefix = 5, /* " rdf:aboutEachPrefix */
164 RDF_ATTR_ID = 6, /* " rdf:ID */
165 RDF_ATTR_bagID = 7, /* " rdf:bagID */
166 RDF_ATTR_resource = 8, /* " rdf:resource */
167 RDF_ATTR_parseType = 9, /* " rdf:parseType */
168 RDF_ATTR_nodeID = 10, /* " rdf:nodeID */
169 RDF_ATTR_datatype = 11, /* " rdf:datatype */
170 /* rdf:Property-s */
171 RDF_ATTR_type = 12, /* " rdf:type -- a property in RDF Model */
172 RDF_ATTR_value = 13, /* " rdf:value -- a property in RDF model */
173 RDF_ATTR_subject = 14, /* " rdf:subject -- a property in RDF model */
174 RDF_ATTR_predicate = 15, /* " rdf:predicate -- a property in RDF model */
175 RDF_ATTR_object = 16, /* " rdf:object -- a property in RDF model */
176 RDF_ATTR_first = 17, /* " rdf:first -- a property in RDF model */
177 RDF_ATTR_rest = 18, /* " rdf:rest -- a property in RDF model */
178 /* rdfs:Class-s */
179 RDF_ATTR_Seq = 19, /* " rdf:Seq -- a class in RDF Model */
180 RDF_ATTR_Bag = 20, /* " rdf:Bag -- a class in RDF model */
181 RDF_ATTR_Alt = 21, /* " rdf:Alt -- a class in RDF model */
182 RDF_ATTR_Statement = 22, /* " rdf:Statement -- a class in RDF model */
183 RDF_ATTR_Property = 23, /* " rdf:Property -- a class in RDF model */
184 RDF_ATTR_List = 24, /* " rdf:List -- a class in RDF model */
185 RDF_ATTR_XMLLiteral = 25, /* " rdf:XMLLiteral - a cless in RDF graph */
186 /* rdfs:Resource-s */
187 RDF_ATTR_nil = 26, /* " rdf:nil -- a resource in RDF graph */
188
189 RDF_ATTR_LAST = RDF_ATTR_nil
190 } rdf_attr;
191
192
193 /*
194 * http://www.w3.org/TR/rdf-syntax-grammar/#section-grammar-summary
195 *
196 * coreSyntaxTerms := rdf:RDF | rdf:ID | rdf:about | rdf:bagID |
197 rdf:parseType | rdf:resource | rdf:nodeID | rdf:datatype
198 * syntaxTerms := coreSyntaxTerms | rdf:Description | rdf:li
199 * oldTerms := rdf:aboutEach | rdf:aboutEachPrefix | rdf:bagID
200 *
201 * nodeElementURIs := anyURI - ( coreSyntaxTerms | rdf:li | oldTerms )
202 * propertyElementURIs := anyURI - ( coreSyntaxTerms | rdf:Description | oldTerms )
203 * propertyAttributeURIs := anyURI - ( coreSyntaxTerms | rdf:Description | rdf:li | oldTerms )
204 *
205 * So, forbidden terms in the RDF namespace are:
206 * nodeElements
207 * RDF | ID | about | bagID | parseType | resource | nodeID | datatype |
208 * li | aboutEach | aboutEachPrefix | bagID
209 *
210 * propertyElements
211 * RDF | ID | about | bagID | parseType | resource | nodeID | datatype |
212 * Description | aboutEach | aboutEachPrefix | bagID
213 *
214 * propertyAttributes
215 * RDF | ID | about | bagID | parseType | resource | nodeID | datatype |
216 * Description | li | aboutEach | aboutEachPrefix | bagID
217 *
218 * Information about rdf attributes:
219 * raptor_identifier_type type
220 * Set when the attribute is a property rather than just syntax
221 * NOTE: raptor_rdfxml_process_property_attributes() expects only
222 * RAPTOR_IDENTIFIER_TYPE_NONE,
223 * RAPTOR_IDENTIFIER_TYPE_LITERAL or RAPTOR_IDENTIFIER_TYPE_RESOURCE
224 * allowed_unprefixed_on_attribute
225 * If allowed for legacy reasons to be unprefixed as an attribute.
226 *
227 */
228
229 static const struct {
230 const char *name; /* term name */
231 int forbidden_as_nodeElement;
232 int forbidden_as_propertyElement;
233 int forbidden_as_propertyAttribute;
234 raptor_identifier_type type; /* statement value */
235 int allowed_unprefixed_on_attribute;
236 } rdf_syntax_terms_info[]={
237 /* syntax only */
238 { "RDF", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
239 { "Description", 0, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
240 { "li", 1, 0, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
241 { "about", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
242 { "aboutEach", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
243 { "aboutEachPrefix", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
244 { "ID", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
245 { "bagID", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
246 { "resource", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
247 { "parseType", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
248 { "nodeID", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
249 { "datatype", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
250 /* rdf:Property-s */
251 { "type", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_RESOURCE, 1 },
252 { "value", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
253 { "subject", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
254 { "predicate", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
255 { "object", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
256 { "first", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
257 { "rest", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
258 /* rdfs:Class-s */
259 { "Seq", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
260 { "Bag", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
261 { "Alt", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
262 { "Statement", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
263 { "Property", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
264 { "List", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
265 { "XMLLiteral", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
266 /* rdfs:Resource-s */
267 { "nil", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
268 { NULL , 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 }
269 };
270
271
272 static int
raptor_rdfxml_forbidden_nodeElement_name(const char * name)273 raptor_rdfxml_forbidden_nodeElement_name(const char *name)
274 {
275 int i;
276
277 if(*name == '_')
278 return 0;
279
280 for(i=0; rdf_syntax_terms_info[i].name; i++)
281 if(!strcmp(rdf_syntax_terms_info[i].name, name))
282 return rdf_syntax_terms_info[i].forbidden_as_nodeElement;
283
284 return -1;
285 }
286
287
288 static int
raptor_rdfxml_forbidden_propertyElement_name(const char * name)289 raptor_rdfxml_forbidden_propertyElement_name(const char *name)
290 {
291 int i;
292
293 if(*name == '_')
294 return 0;
295
296 for(i=0; rdf_syntax_terms_info[i].name; i++)
297 if(!strcmp(rdf_syntax_terms_info[i].name, (const char*)name))
298 return rdf_syntax_terms_info[i].forbidden_as_propertyElement;
299
300 return -1;
301 }
302
303
304 static int
raptor_rdfxml_forbidden_propertyAttribute_name(const char * name)305 raptor_rdfxml_forbidden_propertyAttribute_name(const char *name)
306 {
307 int i;
308
309 if(*name == '_')
310 return 0;
311
312 for(i=0; rdf_syntax_terms_info[i].name; i++)
313 if(!strcmp(rdf_syntax_terms_info[i].name, (const char*)name))
314 return rdf_syntax_terms_info[i].forbidden_as_propertyAttribute;
315
316 return -1;
317 }
318
319
320 typedef enum {
321 /* undetermined yet - whitespace is stored */
322 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN,
323
324 /* literal content - no elements, cdata allowed, whitespace significant
325 * <propElement> blah </propElement>
326 */
327 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL,
328
329 /* parseType literal content (WF XML) - all content preserved
330 * <propElement rdf:parseType="Literal"><em>blah</em></propElement>
331 */
332 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL,
333
334 /* top-level nodes - 0+ elements expected, no cdata, whitespace ignored,
335 * any non-whitespace cdata is error
336 * only used for <rdf:RDF> or implict <rdf:RDF>
337 */
338 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES,
339
340 /* properties - 0+ elements expected, no cdata, whitespace ignored,
341 * any non-whitespace cdata is error
342 * <nodeElement><prop1>blah</prop1> <prop2>blah</prop2> </nodeElement>
343 */
344 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES,
345
346 /* property content - all content preserved
347 * any content type changes when first non-whitespace found
348 * <propElement>...
349 */
350 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT,
351
352 /* resource URI given - no element, no cdata, whitespace ignored,
353 * any non-whitespace cdata is error
354 * <propElement rdf:resource="uri"/>
355 * <propElement rdf:resource="uri"></propElement>
356 */
357 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE,
358
359 /* skipping content - all content is preserved
360 * Used when skipping content for unknown parseType-s,
361 * error recovery, some other reason
362 */
363 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED,
364
365 /* parseType Collection - all content preserved
366 * Parsing of this determined by RDF/XML (Revised) closed collection rules
367 * <propElement rdf:parseType="Collection">...</propElement>
368 */
369 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION,
370
371 /* Like above but handles "daml:collection" */
372 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION,
373
374 /* dummy for use in strings below */
375 RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST
376
377 } raptor_rdfxml_element_content_type;
378
379
380 static const struct {
381 const char * name;
382 int whitespace_significant;
383 /* non-blank cdata */
384 int cdata_allowed;
385 /* XML element content */
386 int element_allowed;
387 /* Do RDF-specific processing? (property attributes, rdf: attributes, ...) */
388 int rdf_processing;
389 } rdf_content_type_info[RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST]={
390 {"Unknown", 1, 1, 1, 0 },
391 {"Literal", 1, 1, 0, 0 },
392 {"XML Literal", 1, 1, 1, 0 },
393 {"Nodes", 0, 0, 1, 1 },
394 {"Properties", 0, 1, 1, 1 },
395 {"Property Content",1, 1, 1, 1 },
396 {"Resource", 0, 0, 0, 0 },
397 {"Preserved", 1, 1, 1, 0 },
398 {"Collection", 1, 1, 1, 1 },
399 {"DAML Collection", 1, 1, 1, 1 },
400 };
401
402
403
404 static const char *
raptor_rdfxml_element_content_type_as_string(raptor_rdfxml_element_content_type type)405 raptor_rdfxml_element_content_type_as_string(raptor_rdfxml_element_content_type type)
406 {
407 if(type > RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST)
408 return "INVALID";
409 return rdf_content_type_info[type].name;
410 }
411
412
413
414
415
416 /*
417 * Raptor Element/attributes on stack
418 */
419 struct raptor_rdfxml_element_s {
420 raptor_world* world;
421
422 raptor_xml_element *xml_element;
423
424 /* NULL at bottom of stack */
425 struct raptor_rdfxml_element_s *parent;
426
427 /* attributes declared in M&S */
428 const unsigned char * rdf_attr[RDF_ATTR_LAST+1];
429 /* how many of above seen */
430 int rdf_attr_count;
431
432 /* state that this production matches */
433 raptor_state state;
434
435 /* how to handle the content inside this XML element */
436 raptor_rdfxml_element_content_type content_type;
437
438
439 /* starting state for children of this element */
440 raptor_state child_state;
441
442 /* starting content type for children of this element */
443 raptor_rdfxml_element_content_type child_content_type;
444
445
446 /* STATIC Reified statement identifier */
447 raptor_identifier reified;
448
449 /* STATIC Bag identifier */
450 raptor_identifier bag;
451 int last_bag_ordinal; /* starts at 0, so first predicate is rdf:_1 */
452
453 /* STATIC Subject identifier (URI/anon ID), type, source
454 *
455 * When the XML element represents a node, this is the identifier
456 */
457 raptor_identifier subject;
458
459 /* STATIC Predicate URI, source is either
460 * RAPTOR_URI_SOURCE_ELEMENT or RAPTOR_URI_SOURCE_ATTRIBUTE
461 *
462 * When the XML element represents a node or predicate,
463 * this is the identifier of the predicate
464 */
465 raptor_identifier predicate;
466
467 /* STATIC Object identifier (URI/anon ID), type, source
468 *
469 * When this XML element generates a statement that needs an object,
470 * possibly from a child element, this is the identifier of the object
471 */
472 raptor_identifier object;
473
474 /* URI of datatype of literal */
475 raptor_uri *object_literal_datatype;
476
477 /* last ordinal used, so initialising to 0 works, emitting rdf:_1 first */
478 int last_ordinal;
479
480 /* If this element's parseType is a Collection
481 * this identifies the anon node of current tail of the collection(list).
482 */
483 const unsigned char *tail_id;
484
485 /* RDF/XML specific checks */
486
487 /* all cdata so far is whitespace */
488 unsigned int content_cdata_all_whitespace;
489 };
490
491 typedef struct raptor_rdfxml_element_s raptor_rdfxml_element;
492
493
494 #define RAPTOR_RDFXML_N_CONCEPTS 22
495
496 /*
497 * Raptor parser object
498 */
499 struct raptor_rdfxml_parser_s {
500 raptor_sax2 *sax2;
501
502 /* stack of elements - elements add after current_element */
503 raptor_rdfxml_element *root_element;
504 raptor_rdfxml_element *current_element;
505
506 raptor_uri* concepts[RAPTOR_RDFXML_N_CONCEPTS];
507
508 /* set of seen rdf:ID / rdf:bagID values (with in-scope base URI) */
509 raptor_id_set* id_set;
510
511 void *xml_content;
512 size_t xml_content_length;
513 raptor_iostream* iostream;
514
515 /* writer for building parseType="Literal" content */
516 raptor_xml_writer* xml_writer;
517 };
518
519
520
521
522 /* static variables */
523
524 #define RAPTOR_RDF_type_URI(rdf_xml_parser) rdf_xml_parser->concepts[0]
525 #define RAPTOR_RDF_value_URI(rdf_xml_parser) rdf_xml_parser->concepts[1]
526 #define RAPTOR_RDF_subject_URI(rdf_xml_parser) rdf_xml_parser->concepts[2]
527 #define RAPTOR_RDF_predicate_URI(rdf_xml_parser) rdf_xml_parser->concepts[3]
528 #define RAPTOR_RDF_object_URI(rdf_xml_parser) rdf_xml_parser->concepts[4]
529 #define RAPTOR_RDF_Statement_URI(rdf_xml_parser) rdf_xml_parser->concepts[5]
530
531 #define RAPTOR_RDF_Seq_URI(rdf_xml_parser) rdf_xml_parser->concepts[6]
532 #define RAPTOR_RDF_Bag_URI(rdf_xml_parser) rdf_xml_parser->concepts[7]
533 #define RAPTOR_RDF_Alt_URI(rdf_xml_parser) rdf_xml_parser->concepts[8]
534
535 #define RAPTOR_RDF_List_URI(rdf_xml_parser) rdf_xml_parser->concepts[9]
536 #define RAPTOR_RDF_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[10]
537 #define RAPTOR_RDF_rest_URI(rdf_xml_parser) rdf_xml_parser->concepts[11]
538 #define RAPTOR_RDF_nil_URI(rdf_xml_parser) rdf_xml_parser->concepts[12]
539
540 #define RAPTOR_DAML_NS_URI(rdf_xml_parser) rdf_xml_parser->concepts[13]
541
542 #define RAPTOR_DAML_List_URI(rdf_xml_parser) rdf_xml_parser->concepts[14]
543 #define RAPTOR_DAML_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[15]
544 #define RAPTOR_DAML_rest_URI(rdf_xml_parser) rdf_xml_parser->concepts[16]
545 #define RAPTOR_DAML_nil_URI(rdf_xml_parser) rdf_xml_parser->concepts[17]
546
547 #define RAPTOR_RDF_RDF_URI(rdf_xml_parser) rdf_xml_parser->concepts[18]
548 #define RAPTOR_RDF_Description_URI(rdf_xml_parser) rdf_xml_parser->concepts[19]
549 #define RAPTOR_RDF_li_URI(rdf_xml_parser) rdf_xml_parser->concepts[20]
550
551 #define RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser) rdf_xml_parser->concepts[21]
552
553 /* RAPTOR_RDFXML_N_CONCEPTS defines size of array */
554
555
556 /* prototypes for element functions */
557 static raptor_rdfxml_element* raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_parser);
558 static void raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_parser, raptor_rdfxml_element* element);
559
560 static int raptor_rdfxml_record_ID(raptor_parser *rdf_parser, raptor_rdfxml_element *element, const unsigned char *id);
561
562 /* prototypes for grammar functions */
563 static void raptor_rdfxml_start_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element);
564 static void raptor_rdfxml_end_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element);
565 static void raptor_rdfxml_cdata_grammar(raptor_parser *parser, const unsigned char *s, int len, int is_cdata);
566
567
568 /* prototype for statement related functions */
569 static void raptor_rdfxml_generate_statement(raptor_parser *rdf_parser, raptor_uri *subject_uri, const unsigned char *subject_id, const raptor_identifier_type subject_type, const raptor_uri_source subject_uri_source, raptor_uri *predicate_uri, const unsigned char *predicate_id, const raptor_identifier_type predicate_type, const raptor_uri_source predicate_uri_source, int predicate_ordinal, raptor_uri *object_uri, const unsigned char *object_id, const raptor_identifier_type object_type, const raptor_uri_source object_uri_source, raptor_uri *literal_datatype, raptor_identifier *reified, raptor_rdfxml_element *bag_element);
570
571
572
573 /* Prototypes for parsing data functions */
574 static int raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name);
575 static void raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser);
576 static int raptor_rdfxml_parse_start(raptor_parser* rdf_parser);
577 static int raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer, size_t len, int is_end);
578 static void raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser);
579
580 static raptor_uri* raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser);
581
582
583 static raptor_rdfxml_element*
raptor_rdfxml_element_pop(raptor_rdfxml_parser * rdf_xml_parser)584 raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_xml_parser)
585 {
586 raptor_rdfxml_element *element=rdf_xml_parser->current_element;
587
588 if(!element)
589 return NULL;
590
591 rdf_xml_parser->current_element=element->parent;
592 if(rdf_xml_parser->root_element == element) /* just deleted root */
593 rdf_xml_parser->root_element=NULL;
594
595 return element;
596 }
597
598
599 static void
raptor_rdfxml_element_push(raptor_rdfxml_parser * rdf_xml_parser,raptor_rdfxml_element * element)600 raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_xml_parser, raptor_rdfxml_element* element)
601 {
602 element->parent=rdf_xml_parser->current_element;
603 rdf_xml_parser->current_element=element;
604 if(!rdf_xml_parser->root_element)
605 rdf_xml_parser->root_element=element;
606 }
607
608
609 static void
raptor_free_rdfxml_element(raptor_rdfxml_element * element)610 raptor_free_rdfxml_element(raptor_rdfxml_element *element)
611 {
612 int i;
613
614 /* Free special RDF M&S attributes */
615 for(i=0; i<= RDF_ATTR_LAST; i++)
616 if(element->rdf_attr[i])
617 RAPTOR_FREE(cstring, (void*)element->rdf_attr[i]);
618
619 raptor_free_identifier(&element->subject);
620 raptor_free_identifier(&element->predicate);
621 raptor_free_identifier(&element->object);
622 raptor_free_identifier(&element->bag);
623 raptor_free_identifier(&element->reified);
624
625 if(element->tail_id)
626 RAPTOR_FREE(cstring, (char*)element->tail_id);
627 if(element->object_literal_datatype)
628 raptor_free_uri_v2(element->world, element->object_literal_datatype);
629
630 RAPTOR_FREE(raptor_rdfxml_element, element);
631 }
632
633
634 static void
raptor_rdfxml_sax2_new_namespace_handler(void * user_data,raptor_namespace * nspace)635 raptor_rdfxml_sax2_new_namespace_handler(void *user_data,
636 raptor_namespace* nspace)
637 {
638 raptor_parser* rdf_parser;
639 const unsigned char* namespace_name;
640 size_t namespace_name_len;
641 raptor_uri* uri=raptor_namespace_get_uri(nspace);
642
643 rdf_parser=(raptor_parser*)user_data;
644 raptor_parser_start_namespace(rdf_parser, nspace);
645
646 if(!uri)
647 return;
648
649 namespace_name=raptor_uri_as_counted_string_v2(nspace->nstack->world, uri, &namespace_name_len);
650
651 if(namespace_name_len == raptor_rdf_namespace_uri_len-1 &&
652 !strncmp((const char*)namespace_name,
653 (const char*)raptor_rdf_namespace_uri,
654 namespace_name_len)) {
655 const unsigned char *prefix=raptor_namespace_get_prefix(nspace);
656 raptor_parser_warning(rdf_parser, "Declaring a namespace with prefix %s to URI %s - one letter short of the RDF namespace URI and probably a mistake.", prefix, namespace_name);
657 }
658
659 if(namespace_name_len > raptor_rdf_namespace_uri_len &&
660 !strncmp((const char*)namespace_name,
661 (const char*)raptor_rdf_namespace_uri,
662 raptor_rdf_namespace_uri_len)) {
663 raptor_parser_error(rdf_parser, "Declaring a namespace URI %s to which the RDF namespace URI is a prefix is forbidden.", namespace_name);
664 }
665 }
666
667
668
669 static void
raptor_rdfxml_start_element_handler(void * user_data,raptor_xml_element * xml_element)670 raptor_rdfxml_start_element_handler(void *user_data,
671 raptor_xml_element* xml_element)
672 {
673 raptor_parser* rdf_parser;
674 raptor_rdfxml_parser* rdf_xml_parser;
675 raptor_rdfxml_element* element;
676 int ns_attributes_count=0;
677 raptor_qname** named_attrs=NULL;
678 int i;
679 int count_bumped=0;
680
681 rdf_parser=(raptor_parser*)user_data;
682 rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
683
684 if(rdf_parser->failed)
685 return;
686
687 raptor_rdfxml_update_document_locator(rdf_parser);
688
689 /* Create new element structure */
690 element=(raptor_rdfxml_element*)RAPTOR_CALLOC(raptor_rdfxml_element, 1,
691 sizeof(raptor_rdfxml_element));
692 if(!element) {
693 raptor_parser_fatal_error(rdf_parser, "Out of memory");
694 rdf_parser->failed=1;
695 return;
696 }
697 element->world=rdf_parser->world;
698 element->xml_element=xml_element;
699
700 /* init world fields in identifiers not created with raptor_new_identifier() */
701 element->reified.world=
702 element->bag.world=
703 element->subject.world=
704 element->predicate.world=
705 element->object.world=
706 rdf_parser->world;
707
708 raptor_rdfxml_element_push(rdf_xml_parser, element);
709
710 named_attrs=raptor_xml_element_get_attributes(xml_element);
711 ns_attributes_count=raptor_xml_element_get_attributes_count(xml_element);
712
713 /* RDF-specific processing of attributes */
714 if(ns_attributes_count) {
715 raptor_qname** new_named_attrs;
716 int offset = 0;
717 raptor_rdfxml_element* parent_element;
718
719 parent_element=element->parent;
720
721 /* Allocate new array to move namespaced-attributes to if
722 * rdf processing is performed
723 */
724 new_named_attrs=(raptor_qname**)RAPTOR_CALLOC(raptor_qname_array,
725 ns_attributes_count,
726 sizeof(raptor_qname*));
727 if(!new_named_attrs) {
728 raptor_parser_fatal_error(rdf_parser, "Out of memory");
729 rdf_parser->failed=1;
730 return;
731 }
732
733 for (i = 0; i < ns_attributes_count; i++) {
734 raptor_qname* attr=named_attrs[i];
735
736 /* If:
737 * 1 We are handling RDF content and RDF processing is allowed on
738 * this element
739 * OR
740 * 2 We are not handling RDF content and
741 * this element is at the top level (top level Desc. / typedNode)
742 * i.e. we have no parent
743 * then handle the RDF attributes
744 */
745 if((parent_element &&
746 rdf_content_type_info[parent_element->child_content_type].rdf_processing) ||
747 !parent_element) {
748
749 /* Save pointers to some RDF M&S attributes */
750
751 /* If RDF namespace-prefixed attributes */
752 if(attr->nspace && attr->nspace->is_rdf_ms) {
753 const unsigned char *attr_name=attr->local_name;
754 int j;
755
756 for(j=0; j<= RDF_ATTR_LAST; j++)
757 if(!strcmp((const char*)attr_name, rdf_syntax_terms_info[j].name)) {
758 element->rdf_attr[j]=attr->value;
759 element->rdf_attr_count++;
760 /* Delete it if it was stored elsewhere */
761 #ifdef RAPTOR_DEBUG_VERBOSE
762 RAPTOR_DEBUG3("Found RDF namespace attribute '%s' URI %s\n", (char*)attr_name, attr->value);
763 #endif
764 /* make sure value isn't deleted from qname structure */
765 attr->value=NULL;
766 raptor_free_qname(attr);
767 attr=NULL;
768 break;
769 }
770 } /* end if RDF namespaced-prefixed attributes */
771
772 if(!attr)
773 continue;
774
775 /* If non namespace-prefixed RDF attributes found on an element */
776 if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES] &&
777 !attr->nspace) {
778 const unsigned char *attr_name=attr->local_name;
779 int j;
780
781 for(j=0; j<= RDF_ATTR_LAST; j++)
782 if(!strcmp((const char*)attr_name, rdf_syntax_terms_info[j].name)) {
783 element->rdf_attr[j]=attr->value;
784 element->rdf_attr_count++;
785 if(!rdf_syntax_terms_info[j].allowed_unprefixed_on_attribute)
786 raptor_parser_warning(rdf_parser, "Using rdf attribute '%s' without the RDF namespace has been deprecated.", attr_name);
787 /* Delete it if it was stored elsewhere */
788 /* make sure value isn't deleted from qname structure */
789 attr->value=NULL;
790 raptor_free_qname(attr);
791 attr=NULL;
792 break;
793 }
794 } /* end if non-namespace prefixed RDF attributes */
795
796 if(!attr)
797 continue;
798
799 } /* end if leave literal XML alone */
800
801 if(attr)
802 new_named_attrs[offset++]=attr;
803 }
804
805 /* new attribute count is set from attributes that haven't been skipped */
806 ns_attributes_count=offset;
807 if(!ns_attributes_count) {
808 /* all attributes were deleted so delete the new array */
809 RAPTOR_FREE(raptor_qname_array, new_named_attrs);
810 new_named_attrs=NULL;
811 }
812
813 RAPTOR_FREE(raptor_qname_array, named_attrs);
814 named_attrs=new_named_attrs;
815 raptor_xml_element_set_attributes(xml_element,
816 named_attrs, ns_attributes_count);
817 } /* end if ns_attributes_count */
818
819
820 /* start from unknown; if we have a parent, it may set this */
821 element->state=RAPTOR_STATE_UNKNOWN;
822 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN;
823
824 if(element->parent &&
825 element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN) {
826 element->content_type=element->parent->child_content_type;
827
828 if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE &&
829 element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
830 element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
831 /* If parent has an rdf:resource, this element should not be here */
832 raptor_parser_error(rdf_parser, "property element '%s' has multiple object node elements, skipping.",
833 raptor_xml_element_get_name(element->parent->xml_element)->local_name);
834 element->state=RAPTOR_STATE_SKIPPING;
835 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
836
837 } else {
838 if(!element->parent->child_state) {
839 raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_start_element_handler: no parent element child_state set");
840 return;
841 }
842
843 element->state=element->parent->child_state;
844 element->parent->xml_element->content_element_seen++;
845 count_bumped++;
846
847 /* leave literal XML alone */
848 if (!rdf_content_type_info[element->content_type].cdata_allowed) {
849 if(element->parent->xml_element->content_element_seen &&
850 element->parent->xml_element->content_cdata_seen) {
851 /* Uh oh - mixed content, the parent element has cdata too */
852 raptor_parser_warning(rdf_parser, "element '%s' has mixed content.",
853 raptor_xml_element_get_name(element->parent->xml_element)->local_name);
854 }
855
856 /* If there is some existing all-whitespace content cdata
857 * before this node element, delete it
858 */
859 if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES &&
860 element->parent->xml_element->content_element_seen &&
861 element->parent->content_cdata_all_whitespace &&
862 element->parent->xml_element->content_cdata_length) {
863
864 element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
865
866 raptor_free_stringbuffer(element->parent->xml_element->content_cdata_sb);
867 element->parent->xml_element->content_cdata_sb=NULL;
868 element->parent->xml_element->content_cdata_length=0;
869 }
870
871 } /* end if leave literal XML alone */
872
873 } /* end if parent has no rdf:resource */
874
875 } /* end if element->parent */
876
877
878 #ifdef RAPTOR_DEBUG_VERBOSE
879 RAPTOR_DEBUG2("Using content type %s\n", rdf_content_type_info[element->content_type].name);
880
881 fprintf(stderr, "raptor_rdfxml_start_element_handler: Start ns-element: ");
882 raptor_print_xml_element(xml_element, stderr);
883 #endif
884
885
886 /* Check for non namespaced stuff when not in a parseType literal, other */
887 if (rdf_content_type_info[element->content_type].rdf_processing) {
888
889 /* The element */
890 /* If has no namespace or the namespace has no name (xmlns="") */
891 if(!raptor_xml_element_get_name(xml_element)->nspace ||
892 (raptor_xml_element_get_name(xml_element)->nspace &&
893 !raptor_namespace_get_uri(raptor_xml_element_get_name(xml_element)->nspace))) {
894 raptor_parser_error(rdf_parser, "Using an element '%s' without a namespace is forbidden.",
895 raptor_xml_element_get_name(element->parent->xml_element)->local_name);
896 element->state=RAPTOR_STATE_SKIPPING;
897 /* Remove count above so that parent thinks this is empty */
898 if(count_bumped)
899 element->parent->xml_element->content_element_seen--;
900 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
901 }
902
903
904 /* Check for any remaining non-namespaced attributes */
905 if (named_attrs) {
906 for(i=0; i < ns_attributes_count; i++) {
907 raptor_qname *attr=named_attrs[i];
908 /* Check if any attributes are non-namespaced */
909 if(!attr->nspace ||
910 (attr->nspace && !raptor_namespace_get_uri(attr->nspace))) {
911 raptor_parser_error(rdf_parser, "Using an attribute '%s' without a namespace is forbidden.", attr->local_name);
912 raptor_free_qname(attr);
913 named_attrs[i]=NULL;
914 }
915 }
916 }
917 }
918
919
920 if (element->rdf_attr[RDF_ATTR_aboutEach] ||
921 element->rdf_attr[RDF_ATTR_aboutEachPrefix]) {
922 raptor_parser_warning(rdf_parser, "element '%s' has aboutEach / aboutEachPrefix, skipping.",
923 raptor_xml_element_get_name(xml_element)->local_name);
924 element->state=RAPTOR_STATE_SKIPPING;
925 /* Remove count above so that parent thinks this is empty */
926 if(count_bumped)
927 element->parent->xml_element->content_element_seen--;
928 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
929 }
930
931 /* Right, now ready to enter the grammar */
932 raptor_rdfxml_start_element_grammar(rdf_parser, element);
933
934 return;
935 }
936
937
938 static void
raptor_rdfxml_end_element_handler(void * user_data,raptor_xml_element * xml_element)939 raptor_rdfxml_end_element_handler(void *user_data,
940 raptor_xml_element* xml_element)
941 {
942 raptor_parser* rdf_parser;
943 raptor_rdfxml_parser* rdf_xml_parser;
944 raptor_rdfxml_element* element;
945
946 rdf_parser=(raptor_parser*)user_data;
947 rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
948
949 if(!rdf_parser->failed) {
950 raptor_rdfxml_update_document_locator(rdf_parser);
951
952 raptor_rdfxml_end_element_grammar(rdf_parser, rdf_xml_parser->current_element);
953 }
954
955 element=raptor_rdfxml_element_pop(rdf_xml_parser);
956 if(element) {
957 if(element->parent) {
958 /* Do not change this; PROPERTYELT will turn into MEMBER if necessary
959 * See the switch case for MEMBER / PROPERTYELT where the test is done.
960 *
961 * PARSETYPE_RESOURCE should never be propogated up since it
962 * will turn the next child (node) element into a property
963 */
964 if(element->state != RAPTOR_STATE_MEMBER_PROPERTYELT &&
965 element->state != RAPTOR_STATE_PARSETYPE_RESOURCE)
966 element->parent->child_state=element->state;
967 }
968
969 raptor_free_rdfxml_element(element);
970 }
971 }
972
973
974 /* cdata (and ignorable whitespace for libxml).
975 * s is not 0 terminated for expat, is for libxml - grrrr.
976 */
977 static void
raptor_rdfxml_characters_handler(void * user_data,raptor_xml_element * xml_element,const unsigned char * s,int len)978 raptor_rdfxml_characters_handler(void *user_data,
979 raptor_xml_element* xml_element,
980 const unsigned char *s, int len)
981 {
982 raptor_parser* rdf_parser=(raptor_parser*)user_data;
983
984 raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 0);
985 }
986
987
988 /* cdata (and ignorable whitespace for libxml).
989 * s is not 0 terminated for expat, is for libxml - grrrr.
990 */
991 static void
raptor_rdfxml_cdata_handler(void * user_data,raptor_xml_element * xml_element,const unsigned char * s,int len)992 raptor_rdfxml_cdata_handler(void *user_data, raptor_xml_element* xml_element,
993 const unsigned char *s, int len)
994 {
995 raptor_parser* rdf_parser=(raptor_parser*)user_data;
996
997 raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 1);
998 }
999
1000
1001 /* comment handler
1002 * s is 0 terminated
1003 */
1004 static void
raptor_rdfxml_comment_handler(void * user_data,raptor_xml_element * xml_element,const unsigned char * s)1005 raptor_rdfxml_comment_handler(void *user_data, raptor_xml_element* xml_element,
1006 const unsigned char *s)
1007 {
1008 raptor_parser* rdf_parser=(raptor_parser*)user_data;
1009 raptor_rdfxml_parser* rdf_xml_parser;
1010 raptor_rdfxml_element* element;
1011
1012 if(rdf_parser->failed || !xml_element)
1013 return;
1014
1015 rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1016 element=rdf_xml_parser->current_element;
1017
1018 if(element) {
1019 if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL)
1020 raptor_xml_writer_comment(rdf_xml_parser->xml_writer, s);
1021 }
1022
1023
1024 #ifdef RAPTOR_DEBUG_VERBOSE
1025 RAPTOR_DEBUG2("XML Comment '%s'\n", s);
1026 #endif
1027 }
1028
1029
1030
1031 static int
raptor_rdfxml_parse_init(raptor_parser * rdf_parser,const char * name)1032 raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name)
1033 {
1034 raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1035 raptor_sax2* sax2;
1036 raptor_world* world=rdf_parser->world;
1037
1038 /* Allocate sax2 object */
1039 sax2=raptor_new_sax2(rdf_parser, &rdf_parser->error_handlers);
1040 rdf_xml_parser->sax2=sax2;
1041 if(!sax2)
1042 return 1;
1043
1044 /* Initialize sax2 element handlers */
1045 raptor_sax2_set_start_element_handler(sax2, raptor_rdfxml_start_element_handler);
1046 raptor_sax2_set_end_element_handler(sax2, raptor_rdfxml_end_element_handler);
1047 raptor_sax2_set_characters_handler(sax2, raptor_rdfxml_characters_handler);
1048 raptor_sax2_set_cdata_handler(sax2, raptor_rdfxml_cdata_handler);
1049 raptor_sax2_set_comment_handler(sax2, raptor_rdfxml_comment_handler);
1050 raptor_sax2_set_namespace_handler(sax2, raptor_rdfxml_sax2_new_namespace_handler);
1051
1052 /* Allocate uris */
1053 RAPTOR_RDF_type_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "type");
1054 RAPTOR_RDF_value_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "value");
1055 RAPTOR_RDF_subject_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "subject");
1056 RAPTOR_RDF_predicate_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "predicate");
1057 RAPTOR_RDF_object_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "object");
1058 RAPTOR_RDF_Statement_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Statement");
1059
1060 RAPTOR_RDF_Seq_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Seq");
1061 RAPTOR_RDF_Bag_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Bag");
1062 RAPTOR_RDF_Alt_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Alt");
1063
1064 RAPTOR_RDF_List_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "List");
1065 RAPTOR_RDF_first_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "first");
1066 RAPTOR_RDF_rest_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "rest");
1067 RAPTOR_RDF_nil_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "nil");
1068
1069 RAPTOR_DAML_NS_URI(rdf_xml_parser)=raptor_new_uri_v2(world, (const unsigned char*)"http://www.daml.org/2001/03/daml+oil#");
1070
1071 RAPTOR_DAML_List_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name_v2(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"List");
1072 RAPTOR_DAML_first_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name_v2(world, RAPTOR_DAML_NS_URI(rdf_xml_parser) ,(const unsigned char *)"first");
1073 RAPTOR_DAML_rest_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name_v2(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"rest");
1074 RAPTOR_DAML_nil_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name_v2(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"nil");
1075
1076 RAPTOR_RDF_RDF_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "RDF");
1077 RAPTOR_RDF_Description_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "Description");
1078 RAPTOR_RDF_li_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept_v2(world, "li");
1079
1080 RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser)=raptor_new_uri_v2(world, raptor_xml_literal_datatype_uri_string);
1081
1082 /* Check for uri allocation failures */
1083 if(!RAPTOR_RDF_type_URI(rdf_xml_parser) ||
1084 !RAPTOR_RDF_value_URI(rdf_xml_parser) ||
1085 !RAPTOR_RDF_subject_URI(rdf_xml_parser) ||
1086 !RAPTOR_RDF_predicate_URI(rdf_xml_parser) ||
1087 !RAPTOR_RDF_object_URI(rdf_xml_parser) ||
1088 !RAPTOR_RDF_Statement_URI(rdf_xml_parser) ||
1089 !RAPTOR_RDF_Seq_URI(rdf_xml_parser) ||
1090 !RAPTOR_RDF_Bag_URI(rdf_xml_parser) ||
1091 !RAPTOR_RDF_Alt_URI(rdf_xml_parser) ||
1092 !RAPTOR_RDF_List_URI(rdf_xml_parser) ||
1093 !RAPTOR_RDF_first_URI(rdf_xml_parser) ||
1094 !RAPTOR_RDF_rest_URI(rdf_xml_parser) ||
1095 !RAPTOR_RDF_nil_URI(rdf_xml_parser) ||
1096 !RAPTOR_DAML_NS_URI(rdf_xml_parser) ||
1097 !RAPTOR_DAML_List_URI(rdf_xml_parser) ||
1098 !RAPTOR_DAML_first_URI(rdf_xml_parser) ||
1099 !RAPTOR_DAML_rest_URI(rdf_xml_parser) ||
1100 !RAPTOR_DAML_nil_URI(rdf_xml_parser) ||
1101 !RAPTOR_RDF_RDF_URI(rdf_xml_parser) ||
1102 !RAPTOR_RDF_Description_URI(rdf_xml_parser) ||
1103 !RAPTOR_RDF_li_URI(rdf_xml_parser) ||
1104 !RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser))
1105 return 1;
1106
1107 /* Everything succeeded */
1108 return 0;
1109 }
1110
1111
1112 static int
raptor_rdfxml_parse_start(raptor_parser * rdf_parser)1113 raptor_rdfxml_parse_start(raptor_parser* rdf_parser)
1114 {
1115 raptor_uri *uri=rdf_parser->base_uri;
1116 raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1117
1118 /* base URI required for RDF/XML */
1119 if(!uri)
1120 return 1;
1121
1122 /* Optionally normalize language to lowercase
1123 * http://www.w3.org/TR/rdf-concepts/#dfn-language-identifier
1124 */
1125 raptor_sax2_set_feature(rdf_xml_parser->sax2,
1126 RAPTOR_FEATURE_NORMALIZE_LANGUAGE,
1127 rdf_parser->features[RAPTOR_FEATURE_NORMALIZE_LANGUAGE]);
1128
1129 /* Optionally forbid network requests in the XML parser */
1130 raptor_sax2_set_feature(rdf_xml_parser->sax2,
1131 RAPTOR_FEATURE_NO_NET,
1132 rdf_parser->features[RAPTOR_FEATURE_NO_NET]);
1133 raptor_sax2_set_feature(rdf_xml_parser->sax2,
1134 RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES,
1135 rdf_parser->features[RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES]);
1136
1137 raptor_sax2_parse_start(rdf_xml_parser->sax2, uri);
1138
1139 /* Delete any existing id_set */
1140 if(rdf_xml_parser->id_set) {
1141 raptor_free_id_set(rdf_xml_parser->id_set);
1142 rdf_xml_parser->id_set = NULL;
1143 }
1144
1145 /* Create a new id_set if needed */
1146 if(rdf_parser->features[RAPTOR_FEATURE_CHECK_RDF_ID]) {
1147 rdf_xml_parser->id_set = raptor_new_id_set(rdf_parser->world);
1148 if(!rdf_xml_parser->id_set)
1149 return 1;
1150 }
1151
1152 return 0;
1153 }
1154
1155
1156 static void
raptor_rdfxml_parse_terminate(raptor_parser * rdf_parser)1157 raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser)
1158 {
1159 raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1160 raptor_rdfxml_element* element;
1161 int i;
1162
1163 if(rdf_xml_parser->sax2) {
1164 raptor_free_sax2(rdf_xml_parser->sax2);
1165 rdf_xml_parser->sax2=NULL;
1166 }
1167
1168 while( (element=raptor_rdfxml_element_pop(rdf_xml_parser)) )
1169 raptor_free_rdfxml_element(element);
1170
1171
1172 for(i=0; i< RAPTOR_RDFXML_N_CONCEPTS; i++) {
1173 raptor_uri* concept_uri=rdf_xml_parser->concepts[i];
1174 if(concept_uri) {
1175 raptor_free_uri_v2(rdf_parser->world, concept_uri);
1176 rdf_xml_parser->concepts[i]=NULL;
1177 }
1178 }
1179
1180 if(rdf_xml_parser->id_set) {
1181 raptor_free_id_set(rdf_xml_parser->id_set);
1182 rdf_xml_parser->id_set=NULL;
1183 }
1184
1185 }
1186
1187
1188 static int
raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory * factory,const unsigned char * buffer,size_t len,const unsigned char * identifier,const unsigned char * suffix,const char * mime_type)1189 raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory* factory,
1190 const unsigned char *buffer, size_t len,
1191 const unsigned char *identifier,
1192 const unsigned char *suffix,
1193 const char *mime_type)
1194 {
1195 int score= 0;
1196
1197 if(suffix) {
1198 if(!strcmp((const char*)suffix, "rdf") ||
1199 !strcmp((const char*)suffix, "rdfs") ||
1200 !strcmp((const char*)suffix, "foaf") ||
1201 !strcmp((const char*)suffix, "doap") ||
1202 !strcmp((const char*)suffix, "owl") ||
1203 !strcmp((const char*)suffix, "daml"))
1204 score=9;
1205 if(!strcmp((const char*)suffix, "rss"))
1206 score=3;
1207 }
1208
1209 if(identifier) {
1210 if(strstr((const char*)identifier, "rss1"))
1211 score+=5;
1212 else if(!suffix && strstr((const char*)identifier, "rss"))
1213 score+=3;
1214 else if(!suffix && strstr((const char*)identifier, "rdf"))
1215 score+=2;
1216 else if(!suffix && strstr((const char*)identifier, "RDF"))
1217 score+=2;
1218 }
1219
1220 if(mime_type) {
1221 if(strstr((const char*)mime_type, "html"))
1222 score-= 4;
1223 else if(!strcmp((const char*)mime_type, "text/rdf"))
1224 score+= 7;
1225 else if(!strcmp((const char*)mime_type, "application/xml"))
1226 score+= 5;
1227 }
1228
1229 if(buffer && len) {
1230 /* Check it's an XML namespace declared and not N3 or Turtle which
1231 * mention the namespace URI but not in this form.
1232 */
1233 #define HAS_RDF_XMLNS1 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1234 #define HAS_RDF_XMLNS2 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1235 #define HAS_RDF_XMLNS3 (raptor_memstr((const char*)buffer, len, "xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1236 #define HAS_RDF_XMLNS4 (raptor_memstr((const char*)buffer, len, "xmlns='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
1237 #define HAS_RDF_ENTITY1 (raptor_memstr((const char*)buffer, len, "<!ENTITY rdf 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'>") != NULL)
1238 #define HAS_RDF_ENTITY2 (raptor_memstr((const char*)buffer, len, "<!ENTITY rdf \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">") != NULL)
1239 #define HAS_RDF_ENTITY3 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"&rdf;\"") != NULL)
1240 #define HAS_RDF_ENTITY4 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='&rdf;'") != NULL)
1241 #define HAS_HTML_NS (raptor_memstr((const char*)buffer, len, "http://www.w3.org/1999/xhtml") != NULL)
1242 #define HAS_HTML_ROOT (raptor_memstr((const char*)buffer, len, "<html") != NULL)
1243
1244 if(!HAS_HTML_NS && !HAS_HTML_ROOT &&
1245 (HAS_RDF_XMLNS1 || HAS_RDF_XMLNS2 || HAS_RDF_XMLNS3 || HAS_RDF_XMLNS4 ||
1246 HAS_RDF_ENTITY1 || HAS_RDF_ENTITY2 || HAS_RDF_ENTITY3 || HAS_RDF_ENTITY4)
1247 ) {
1248 int has_rdf_RDF=(raptor_memstr((const char*)buffer, len, "<rdf:RDF") != NULL);
1249 int has_rdf_Description=(raptor_memstr((const char*)buffer, len, "rdf:Description") != NULL);
1250 int has_rdf_about=(raptor_memstr((const char*)buffer, len, "rdf:about") != NULL);
1251
1252 score+= 7;
1253 if(has_rdf_RDF)
1254 score++;
1255 if(has_rdf_Description)
1256 score++;
1257 if(has_rdf_about)
1258 score++;
1259 }
1260 }
1261
1262 return score;
1263 }
1264
1265
1266
1267 static int
raptor_rdfxml_parse_chunk(raptor_parser * rdf_parser,const unsigned char * buffer,size_t len,int is_end)1268 raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer,
1269 size_t len, int is_end)
1270 {
1271 raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1272 if(rdf_parser->failed)
1273 return 1;
1274
1275 return raptor_sax2_parse_chunk(rdf_xml_parser->sax2, buffer, len, is_end);
1276 }
1277
1278
1279 static void
raptor_rdfxml_generate_statement(raptor_parser * rdf_parser,raptor_uri * subject_uri,const unsigned char * subject_id,const raptor_identifier_type subject_type,const raptor_uri_source subject_uri_source,raptor_uri * predicate_uri,const unsigned char * predicate_id,raptor_identifier_type predicate_type,const raptor_uri_source predicate_uri_source,int predicate_ordinal,raptor_uri * object_uri,const unsigned char * object_id,const raptor_identifier_type object_type,const raptor_uri_source object_uri_source,raptor_uri * literal_datatype,raptor_identifier * reified,raptor_rdfxml_element * bag_element)1280 raptor_rdfxml_generate_statement(raptor_parser *rdf_parser,
1281 raptor_uri *subject_uri,
1282 const unsigned char *subject_id,
1283 const raptor_identifier_type subject_type,
1284 const raptor_uri_source subject_uri_source,
1285 raptor_uri *predicate_uri,
1286 const unsigned char *predicate_id,
1287 raptor_identifier_type predicate_type,
1288 const raptor_uri_source predicate_uri_source,
1289 int predicate_ordinal,
1290 raptor_uri *object_uri,
1291 const unsigned char *object_id,
1292 const raptor_identifier_type object_type,
1293 const raptor_uri_source object_uri_source,
1294 raptor_uri *literal_datatype,
1295 raptor_identifier *reified,
1296 raptor_rdfxml_element* bag_element)
1297 {
1298 raptor_statement *statement=&rdf_parser->statement;
1299 const unsigned char *language=NULL;
1300 static const char empty_literal[1]="";
1301 raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1302 char *reified_id=NULL;
1303 raptor_uri* uri1=NULL;
1304 raptor_uri* uri2=NULL;
1305
1306 if(rdf_parser->failed)
1307 return;
1308
1309 if((object_type == RAPTOR_IDENTIFIER_TYPE_LITERAL ||
1310 object_type == RAPTOR_IDENTIFIER_TYPE_XML_LITERAL) &&
1311 !literal_datatype) {
1312 language=raptor_sax2_inscope_xml_language(rdf_xml_parser->sax2);
1313 if(!object_uri)
1314 object_uri=(raptor_uri*)empty_literal;
1315 }
1316
1317 statement->subject=subject_uri ? (void*)subject_uri : (void*)subject_id;
1318 statement->subject_type=subject_type;
1319
1320 statement->predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1321 if(predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
1322 /* new URI object */
1323 uri1=raptor_new_uri_from_rdf_ordinal(rdf_parser->world, predicate_ordinal);
1324 predicate_uri=uri1;
1325 predicate_id=NULL;
1326 predicate_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1327 }
1328 statement->predicate=predicate_uri;
1329
1330 statement->object=object_uri ? (void*)object_uri : (void*)object_id;
1331 statement->object_type=object_type;
1332
1333 statement->object_literal_language=language;
1334 statement->object_literal_datatype=literal_datatype;
1335
1336
1337 #ifdef RAPTOR_DEBUG_VERBOSE
1338 fprintf(stderr, "raptor_rdfxml_generate_statement: Generating statement: ");
1339 raptor_print_statement(statement, stderr);
1340 fputc('\n', stderr);
1341
1342 if(!(subject_uri||subject_id))
1343 RAPTOR_FATAL1("Statement has no subject\n");
1344
1345 if(!(predicate_uri||predicate_id))
1346 RAPTOR_FATAL1("Statement has no predicate\n");
1347
1348 if(!(object_uri||object_id))
1349 RAPTOR_FATAL1("Statement has no object\n");
1350
1351 #endif
1352
1353 if(!rdf_parser->statement_handler)
1354 goto generate_tidy;
1355
1356 /* Generate the statement; or is it fact? */
1357 (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1358
1359
1360 /* the bagID mess */
1361 if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID] &&
1362 bag_element && (bag_element->bag.uri || bag_element->bag.id)) {
1363 raptor_identifier* bag=&bag_element->bag;
1364
1365 statement->subject=bag->uri ? (void*)bag->uri : (void*)bag->id;
1366 statement->subject_type=bag->type;
1367
1368 bag_element->last_bag_ordinal++;
1369
1370 /* new URI object */
1371 uri2=raptor_new_uri_from_rdf_ordinal(rdf_parser->world, bag_element->last_bag_ordinal);
1372 statement->predicate=uri2;
1373
1374 if(reified && (reified->uri || reified->id)) {
1375 statement->object=reified->uri ? (void*)reified->uri : (void*)reified->id;
1376 statement->object_type=reified->type;
1377 } else {
1378 /* reified may be NULL so do not use it */
1379 reified_id=(char*)raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
1380 statement->object=reified_id;
1381 statement->object_type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
1382 }
1383
1384 (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1385
1386 } else if(!reified || (!reified->uri && !reified->id))
1387 goto generate_tidy;
1388
1389 /* generate reified statements */
1390 statement->subject_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1391 statement->predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1392 statement->object_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1393
1394 statement->object_literal_language=NULL;
1395
1396 if(reified_id) {
1397 /* reified may be NULL so do not use it */
1398 statement->subject=reified_id;
1399 statement->subject_type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
1400 } else {
1401 statement->subject=reified->uri ? (void*)reified->uri : (void*)reified->id;
1402 statement->subject_type=reified->type;
1403 }
1404
1405 statement->predicate=RAPTOR_RDF_type_URI(rdf_xml_parser);
1406 statement->object=RAPTOR_RDF_Statement_URI(rdf_xml_parser);
1407 (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1408
1409 statement->predicate=RAPTOR_RDF_subject_URI(rdf_xml_parser);
1410 statement->object=subject_uri ? (void*)subject_uri : (void*)subject_id;
1411 statement->object_type=subject_type;
1412 (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1413
1414 statement->predicate=RAPTOR_RDF_predicate_URI(rdf_xml_parser);
1415 statement->object=predicate_uri ? (void*)predicate_uri : (void*)predicate_id;
1416 statement->object_type=predicate_type;
1417 (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1418
1419 statement->predicate=RAPTOR_RDF_object_URI(rdf_xml_parser);
1420 statement->object=object_uri ? (void*)object_uri : (void*)object_id;
1421 statement->object_type=object_type;
1422 statement->object_literal_language=language;
1423
1424 (*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
1425
1426 generate_tidy:
1427 /* Tidy up things allocated here */
1428 if(reified_id)
1429 RAPTOR_FREE(cstring, reified_id);
1430 if(uri1)
1431 raptor_free_uri_v2(rdf_parser->world, uri1);
1432 if(uri2)
1433 raptor_free_uri_v2(rdf_parser->world, uri2);
1434 }
1435
1436
1437
1438 /**
1439 * raptor_rdfxml_element_has_property_attributes:
1440 * @element: element with the property attributes
1441 *
1442 * Return true if the element has at least one property attribute.
1443 *
1444 **/
1445 static int
raptor_rdfxml_element_has_property_attributes(raptor_rdfxml_element * element)1446 raptor_rdfxml_element_has_property_attributes(raptor_rdfxml_element *element)
1447 {
1448 int i;
1449
1450 if(element->xml_element->attribute_count >0)
1451 return 1;
1452
1453 /* look for rdf: properties */
1454 for(i=0; i<= RDF_ATTR_LAST; i++) {
1455 if(element->rdf_attr[i] &&
1456 rdf_syntax_terms_info[i].type != RAPTOR_IDENTIFIER_TYPE_UNKNOWN)
1457 return 1;
1458 }
1459 return 0;
1460 }
1461
1462
1463 /**
1464 * raptor_rdfxml_process_property_attributes:
1465 * @rdf_parser: Raptor parser object
1466 * @attributes_element: element with the property attributes
1467 * @resource_element: element that defines the resource URI
1468 * subject_uri, subject_uri_source etc.
1469 * @property_node_identifier: Use this identifier for the resource URI
1470 * and count any ordinals for it locally
1471 *
1472 * Process the property attributes for an element for a given resource.
1473 *
1474 **/
1475 static void
raptor_rdfxml_process_property_attributes(raptor_parser * rdf_parser,raptor_rdfxml_element * attributes_element,raptor_rdfxml_element * resource_element,raptor_identifier * property_node_identifier)1476 raptor_rdfxml_process_property_attributes(raptor_parser *rdf_parser,
1477 raptor_rdfxml_element *attributes_element,
1478 raptor_rdfxml_element *resource_element,
1479 raptor_identifier *property_node_identifier)
1480 {
1481 unsigned int i;
1482 raptor_identifier *resource_identifier;
1483
1484 resource_identifier=property_node_identifier ? property_node_identifier : &resource_element->subject;
1485
1486
1487 /* Process attributes as propAttr* = * (propName="string")*
1488 */
1489 for(i=0; i < attributes_element->xml_element->attribute_count; i++) {
1490 raptor_qname* attr=attributes_element->xml_element->attributes[i];
1491 const unsigned char *name;
1492 const unsigned char *value;
1493 int handled=0;
1494
1495 if(!attr)
1496 continue;
1497
1498 name=attr->local_name;
1499 value = attr->value;
1500
1501 if(!attr->nspace) {
1502 raptor_rdfxml_update_document_locator(rdf_parser);
1503 raptor_parser_error(rdf_parser, "Using property attribute '%s' without a namespace is forbidden.", name);
1504 continue;
1505 }
1506
1507
1508 if(!raptor_utf8_is_nfc(value, strlen((const char*)value))) {
1509 const char *message="Property attribute '%s' has a string not in Unicode Normal Form C: %s";
1510 raptor_rdfxml_update_document_locator(rdf_parser);
1511 if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
1512 raptor_parser_error(rdf_parser, message, name, value);
1513 else
1514 raptor_parser_warning(rdf_parser, message, name, value);
1515 continue;
1516 }
1517
1518
1519 /* Generate the property statement using one of these properties:
1520 * 1) rdf:_n
1521 * 2) the URI from the rdf:* attribute where allowed
1522 * 3) otherwise forbidden (including rdf:li)
1523 */
1524 if(attr->nspace->is_rdf_ms) {
1525 /* is rdf: namespace */
1526 int ordinal=0;
1527
1528 if(*name == '_') {
1529 /* recognise rdf:_ */
1530 name++;
1531 ordinal=raptor_check_ordinal(name);
1532 if(ordinal < 1) {
1533 raptor_rdfxml_update_document_locator(rdf_parser);
1534 raptor_parser_error(rdf_parser, "Illegal ordinal value %d in property attribute '%s' seen on containing element '%s'.", ordinal, attr->local_name, name);
1535 ordinal=1;
1536 }
1537 } else {
1538 raptor_rdfxml_update_document_locator(rdf_parser);
1539 if(raptor_rdfxml_forbidden_propertyAttribute_name((const char*)name) > 0)
1540 raptor_parser_error(rdf_parser, "RDF term %s is forbidden as a property attribute.", name);
1541 else
1542 raptor_parser_warning(rdf_parser, "Unknown RDF namespace property attribute '%s'.",
1543 name);
1544 }
1545
1546 if(ordinal >= 1) {
1547 /* Generate an ordinal property when there are no problems */
1548 raptor_rdfxml_generate_statement(rdf_parser,
1549 resource_identifier->uri,
1550 resource_identifier->id,
1551 resource_identifier->type,
1552 resource_identifier->uri_source,
1553
1554 NULL,
1555 NULL,
1556 RAPTOR_IDENTIFIER_TYPE_ORDINAL,
1557 RAPTOR_URI_SOURCE_NOT_URI,
1558 ordinal,
1559
1560 (raptor_uri*)value,
1561 NULL,
1562 RAPTOR_IDENTIFIER_TYPE_LITERAL,
1563 RAPTOR_URI_SOURCE_NOT_URI,
1564 NULL,
1565
1566 NULL, /* Property attributes are never reified*/
1567 resource_element);
1568 handled=1;
1569 }
1570
1571 } /* end is RDF namespace property */
1572
1573
1574 if(!handled)
1575 /* else not rdf: namespace or unknown in rdf: namespace so
1576 * generate a statement with a literal object
1577 */
1578 raptor_rdfxml_generate_statement(rdf_parser,
1579 resource_identifier->uri,
1580 resource_identifier->id,
1581 resource_identifier->type,
1582 resource_identifier->uri_source,
1583
1584 attr->uri,
1585 NULL,
1586 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1587 RAPTOR_URI_SOURCE_ATTRIBUTE,
1588 0,
1589
1590 (raptor_uri*)value,
1591 NULL,
1592 RAPTOR_IDENTIFIER_TYPE_LITERAL,
1593 RAPTOR_URI_SOURCE_NOT_URI,
1594 NULL,
1595
1596 NULL, /* Property attributes are never reified*/
1597 resource_element);
1598
1599 } /* end for ... attributes */
1600
1601
1602 /* Handle rdf property attributes
1603 * (only rdf:type and rdf:value at present)
1604 */
1605 for(i=0; i<= RDF_ATTR_LAST; i++) {
1606 const unsigned char *value=attributes_element->rdf_attr[i];
1607 int object_is_literal=(rdf_syntax_terms_info[i].type == RAPTOR_IDENTIFIER_TYPE_LITERAL);
1608 raptor_uri *property_uri, *object_uri;
1609 raptor_identifier_type object_type;
1610
1611 if(!value)
1612 continue;
1613
1614 if(rdf_syntax_terms_info[i].type == RAPTOR_IDENTIFIER_TYPE_UNKNOWN) {
1615 const char *name=rdf_syntax_terms_info[i].name;
1616 if(raptor_rdfxml_forbidden_propertyAttribute_name(name)) {
1617 raptor_rdfxml_update_document_locator(rdf_parser);
1618 raptor_parser_error(rdf_parser, "RDF term %s is forbidden as a property attribute.", name);
1619 continue;
1620 }
1621 }
1622
1623 if(object_is_literal && !raptor_utf8_is_nfc(value, strlen((const char*)value))) {
1624 const char *message="Property attribute '%s' has a string not in Unicode Normal Form C: %s";
1625 raptor_rdfxml_update_document_locator(rdf_parser);
1626 if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
1627 raptor_parser_error(rdf_parser, message, rdf_syntax_terms_info[i].name, value);
1628 else
1629 raptor_parser_warning(rdf_parser, message, rdf_syntax_terms_info[i].name, value);
1630 continue;
1631 }
1632
1633 property_uri=raptor_new_uri_for_rdf_concept_v2(rdf_parser->world, (rdf_syntax_terms_info[i].name));
1634
1635 object_uri=object_is_literal ? (raptor_uri*)value : raptor_new_uri_relative_to_base_v2(rdf_parser->world, raptor_rdfxml_inscope_base_uri(rdf_parser), value);
1636 object_type=object_is_literal ? RAPTOR_IDENTIFIER_TYPE_LITERAL : RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1637
1638 raptor_rdfxml_generate_statement(rdf_parser,
1639 resource_identifier->uri,
1640 resource_identifier->id,
1641 resource_identifier->type,
1642 resource_identifier->uri_source,
1643
1644 property_uri,
1645 NULL,
1646 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1647 RAPTOR_URI_SOURCE_ATTRIBUTE,
1648 0,
1649
1650 object_uri,
1651 NULL,
1652 object_type,
1653 RAPTOR_URI_SOURCE_NOT_URI,
1654 NULL,
1655
1656 NULL, /* Property attributes are never reified*/
1657 resource_element);
1658 if(!object_is_literal)
1659 raptor_free_uri_v2(rdf_parser->world, object_uri);
1660
1661 raptor_free_uri_v2(rdf_parser->world, property_uri);
1662
1663 } /* end for rdf:property values */
1664
1665 }
1666
1667
1668 static void
raptor_rdfxml_start_element_grammar(raptor_parser * rdf_parser,raptor_rdfxml_element * element)1669 raptor_rdfxml_start_element_grammar(raptor_parser *rdf_parser,
1670 raptor_rdfxml_element *element)
1671 {
1672 int finished;
1673 raptor_state state;
1674 raptor_xml_element* xml_element=element->xml_element;
1675 const unsigned char *el_name=raptor_xml_element_get_name(xml_element)->local_name;
1676 int element_in_rdf_ns=(raptor_xml_element_get_name(xml_element)->nspace &&
1677 raptor_xml_element_get_name(xml_element)->nspace->is_rdf_ms);
1678 raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
1679 int rc=0;
1680 raptor_uri* base_uri;
1681
1682 state=element->state;
1683 #ifdef RAPTOR_DEBUG_VERBOSE
1684 RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state));
1685 #endif
1686
1687 base_uri=raptor_rdfxml_inscope_base_uri(rdf_parser);
1688
1689 finished= 0;
1690 while(!finished) {
1691 switch(state) {
1692 case RAPTOR_STATE_SKIPPING:
1693 element->child_state=state;
1694 element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
1695 finished=1;
1696 break;
1697
1698 case RAPTOR_STATE_UNKNOWN:
1699 /* found <rdf:RDF> ? */
1700
1701 if(element_in_rdf_ns) {
1702 if(raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_RDF_URI(rdf_xml_parser))) {
1703 element->child_state=RAPTOR_STATE_NODE_ELEMENT_LIST;
1704 element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES;
1705 /* Yes - need more content before can continue,
1706 * so wait for another element
1707 */
1708 finished=1;
1709 break;
1710 }
1711 if(raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_Description_URI(rdf_xml_parser))) {
1712 state=RAPTOR_STATE_DESCRIPTION;
1713 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
1714 /* Yes - found something so move immediately to description */
1715 break;
1716 }
1717
1718 if(element_in_rdf_ns && (rc=raptor_rdfxml_forbidden_nodeElement_name((const char*)el_name))) {
1719 if(rc > 0) {
1720 raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a node element.", el_name);
1721 state=RAPTOR_STATE_SKIPPING;
1722 element->child_state=RAPTOR_STATE_SKIPPING;
1723 finished=1;
1724 break;
1725 } else
1726 raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name);
1727 }
1728 }
1729
1730 /* If scanning for element, can continue */
1731 if(rdf_parser->features[RAPTOR_FEATURE_SCANNING]) {
1732 finished=1;
1733 break;
1734 }
1735
1736 /* Otherwise the choice of the next state can be made
1737 * from the current element by the OBJ state
1738 */
1739 state=RAPTOR_STATE_NODE_ELEMENT_LIST;
1740 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES;
1741 break;
1742
1743
1744 case RAPTOR_STATE_NODE_ELEMENT_LIST:
1745 /* Handling
1746 * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList
1747 *
1748 * Everything goes to nodeElement
1749 */
1750
1751 state=RAPTOR_STATE_NODE_ELEMENT;
1752
1753 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
1754
1755 break;
1756
1757
1758
1759 case RAPTOR_STATE_DESCRIPTION:
1760 case RAPTOR_STATE_NODE_ELEMENT:
1761 case RAPTOR_STATE_PARSETYPE_RESOURCE:
1762 case RAPTOR_STATE_PARSETYPE_COLLECTION:
1763 /* Handling <rdf:Description> or other node element
1764 * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement
1765 *
1766 * or a property element acting as a node element for
1767 * rdf:parseType="Resource"
1768 * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt
1769 * or rdf:parseType="Collection" (and daml:Collection)
1770 * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt
1771 *
1772 * Only create a bag if bagID given
1773 */
1774
1775 if(!raptor_xml_element_get_name(xml_element)->uri) {
1776 /* We cannot handle this */
1777 raptor_parser_warning(rdf_parser, "Using node element '%s' without a namespace is forbidden.",
1778 raptor_xml_element_get_name(xml_element)->local_name);
1779 raptor_rdfxml_update_document_locator(rdf_parser);
1780 element->state=RAPTOR_STATE_SKIPPING;
1781 element->child_state=RAPTOR_STATE_SKIPPING;
1782 finished=1;
1783 break;
1784 }
1785
1786 if(element_in_rdf_ns &&
1787 (rc = raptor_rdfxml_forbidden_nodeElement_name((const char*)el_name))) {
1788 if(rc > 0) {
1789 raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a node element.", el_name);
1790 state=RAPTOR_STATE_SKIPPING;
1791 element->state=RAPTOR_STATE_SKIPPING;
1792 element->child_state=RAPTOR_STATE_SKIPPING;
1793 finished=1;
1794 break;
1795 } else
1796 raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name);
1797 }
1798
1799 if(element->content_type !=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
1800 element->content_type !=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION &&
1801 element->parent &&
1802 (element->parent->state == RAPTOR_STATE_PROPERTYELT ||
1803 element->parent->state == RAPTOR_STATE_MEMBER_PROPERTYELT) &&
1804 element->parent->xml_element->content_element_seen > 1) {
1805 raptor_rdfxml_update_document_locator(rdf_parser);
1806 raptor_parser_error(rdf_parser, "The enclosing property already has an object");
1807 state=RAPTOR_STATE_SKIPPING;
1808 element->child_state=RAPTOR_STATE_SKIPPING;
1809 finished=1;
1810 break;
1811 }
1812
1813 if(state == RAPTOR_STATE_NODE_ELEMENT ||
1814 state == RAPTOR_STATE_DESCRIPTION ||
1815 state == RAPTOR_STATE_PARSETYPE_COLLECTION) {
1816 if(element_in_rdf_ns &&
1817 raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_Description_URI(rdf_xml_parser)))
1818 state=RAPTOR_STATE_DESCRIPTION;
1819 else
1820 state=RAPTOR_STATE_NODE_ELEMENT;
1821 }
1822
1823
1824 if((element->rdf_attr[RDF_ATTR_ID]!=NULL) +
1825 (element->rdf_attr[RDF_ATTR_about]!=NULL) +
1826 (element->rdf_attr[RDF_ATTR_nodeID]!=NULL)>1) {
1827 raptor_rdfxml_update_document_locator(rdf_parser);
1828 raptor_parser_error(rdf_parser, "Multiple attributes of rdf:ID, rdf:about and rdf:nodeID on element '%s' - only one allowed.", el_name);
1829 }
1830
1831 if(element->rdf_attr[RDF_ATTR_ID]) {
1832 element->subject.id=element->rdf_attr[RDF_ATTR_ID];
1833 element->rdf_attr[RDF_ATTR_ID]=NULL;
1834 element->subject.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->subject.id);
1835 if(!element->subject.uri)
1836 goto oom;
1837 element->subject.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1838 element->subject.uri_source=RAPTOR_URI_SOURCE_ID;
1839 if(!raptor_valid_xml_ID(rdf_parser, element->subject.id)) {
1840 raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'", element->subject.id);
1841 state=RAPTOR_STATE_SKIPPING;
1842 element->child_state=RAPTOR_STATE_SKIPPING;
1843 finished=1;
1844 break;
1845 }
1846 if(raptor_rdfxml_record_ID(rdf_parser, element, element->subject.id)) {
1847 raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'", element->subject.id);
1848 state=RAPTOR_STATE_SKIPPING;
1849 element->child_state=RAPTOR_STATE_SKIPPING;
1850 finished=1;
1851 break;
1852 }
1853 } else if (element->rdf_attr[RDF_ATTR_about]) {
1854 element->subject.uri=raptor_new_uri_relative_to_base_v2(rdf_parser->world, base_uri, (const unsigned char*)element->rdf_attr[RDF_ATTR_about]);
1855 RAPTOR_FREE(cstring, (void*)element->rdf_attr[RDF_ATTR_about]);
1856 element->rdf_attr[RDF_ATTR_about]=NULL;
1857 if(!element->subject.uri)
1858 goto oom;
1859 element->subject.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1860 element->subject.uri_source=RAPTOR_URI_SOURCE_URI;
1861 } else if (element->rdf_attr[RDF_ATTR_nodeID]) {
1862 element->subject.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, (unsigned char*)element->rdf_attr[RDF_ATTR_nodeID]);
1863 element->rdf_attr[RDF_ATTR_nodeID]=NULL;
1864 if(!element->subject.id)
1865 goto oom;
1866 element->subject.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
1867 element->subject.uri_source=RAPTOR_URI_SOURCE_BLANK_ID;
1868 if(!raptor_valid_xml_ID(rdf_parser, element->subject.id)) {
1869 raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", element->subject.id);
1870 state=RAPTOR_STATE_SKIPPING;
1871 element->child_state=RAPTOR_STATE_SKIPPING;
1872 finished=1;
1873 break;
1874 }
1875 } else if (element->parent &&
1876 element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
1877 element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION &&
1878 (element->parent->object.uri || element->parent->object.id)) {
1879 /* copy from parent (property element), it has a URI for us */
1880 raptor_copy_identifier(&element->subject, &element->parent->object);
1881 } else {
1882 element->subject.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
1883 if(!element->subject.id)
1884 goto oom;
1885 element->subject.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
1886 element->subject.uri_source=RAPTOR_URI_SOURCE_GENERATED;
1887 }
1888
1889
1890 if(element->rdf_attr[RDF_ATTR_bagID]) {
1891 if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]) {
1892 element->bag.id=element->rdf_attr[RDF_ATTR_bagID];
1893 element->rdf_attr[RDF_ATTR_bagID]=NULL;
1894 element->bag.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->bag.id);
1895 if(!element->bag.uri)
1896 goto oom;
1897 element->bag.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
1898 element->bag.uri_source=RAPTOR_URI_SOURCE_GENERATED;
1899
1900 if(!raptor_valid_xml_ID(rdf_parser, element->bag.id)) {
1901 raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'", element->bag.id);
1902 state=RAPTOR_STATE_SKIPPING;
1903 element->child_state=RAPTOR_STATE_SKIPPING;
1904 finished=1;
1905 break;
1906 }
1907 if(raptor_rdfxml_record_ID(rdf_parser, element, element->bag.id)) {
1908 raptor_parser_error(rdf_parser, "Duplicated rdf:bagID value '%s'", element->bag.id);
1909 state=RAPTOR_STATE_SKIPPING;
1910 element->child_state=RAPTOR_STATE_SKIPPING;
1911 finished=1;
1912 break;
1913 }
1914
1915 raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated.");
1916
1917 raptor_rdfxml_generate_statement(rdf_parser,
1918 element->bag.uri,
1919 element->bag.id,
1920 element->bag.type,
1921 element->bag.uri_source,
1922
1923 RAPTOR_RDF_type_URI(rdf_xml_parser),
1924 NULL,
1925 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1926 RAPTOR_URI_SOURCE_URI,
1927 0,
1928
1929 RAPTOR_RDF_Bag_URI(rdf_xml_parser),
1930 NULL,
1931 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1932 RAPTOR_URI_SOURCE_NOT_URI,
1933 NULL,
1934
1935 NULL,
1936 NULL);
1937 } else {
1938 /* bagID forbidden */
1939 raptor_parser_error(rdf_parser, "rdf:bagID is forbidden.");
1940 state=RAPTOR_STATE_SKIPPING;
1941 element->child_state=RAPTOR_STATE_SKIPPING;
1942 finished=1;
1943 break;
1944 }
1945 }
1946
1947
1948 if(element->parent) {
1949
1950 /* In a rdf:parseType="Collection" the resources are appended
1951 * to the list at the genid element->parent->tail_id
1952 */
1953 if (element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION ||
1954 element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
1955 const unsigned char * idList = raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
1956
1957 /* <idList> rdf:type rdf:List */
1958 raptor_uri *collection_uri=(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_List_URI(rdf_xml_parser) : RAPTOR_RDF_List_URI(rdf_xml_parser);
1959
1960 if(!idList)
1961 goto oom;
1962
1963 if((element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ||
1964 rdf_parser->features[RAPTOR_FEATURE_ALLOW_RDF_TYPE_RDF_LIST])
1965 raptor_rdfxml_generate_statement(rdf_parser,
1966 NULL,
1967 idList,
1968 RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
1969 RAPTOR_URI_SOURCE_ID,
1970
1971 RAPTOR_RDF_type_URI(rdf_xml_parser),
1972 NULL,
1973 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1974 RAPTOR_URI_SOURCE_URI,
1975 0,
1976
1977 collection_uri,
1978 NULL,
1979 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1980 RAPTOR_URI_SOURCE_URI,
1981 NULL,
1982
1983 NULL,
1984 element);
1985
1986 collection_uri=(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_first_URI(rdf_xml_parser) : RAPTOR_RDF_first_URI(rdf_xml_parser);
1987
1988 /* <idList> rdf:first <element->uri> */
1989 raptor_rdfxml_generate_statement(rdf_parser,
1990 NULL,
1991 idList,
1992 RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
1993 RAPTOR_URI_SOURCE_ID,
1994
1995 collection_uri,
1996 NULL,
1997 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
1998 RAPTOR_URI_SOURCE_URI,
1999 0,
2000
2001 element->subject.uri,
2002 element->subject.id,
2003 element->subject.type,
2004 element->subject.uri_source,
2005 NULL,
2006
2007 NULL,
2008 NULL);
2009
2010 /* If there is no rdf:parseType="Collection" */
2011 if (!element->parent->tail_id) {
2012 int len;
2013 unsigned char *new_id;
2014
2015 /* Free any existing object URI still around
2016 * I suspect this can never happen.
2017 */
2018 if(element->parent->object.uri)
2019 raptor_free_uri_v2(rdf_parser->world, element->parent->object.uri);
2020
2021 len=strlen((char*)idList);
2022 new_id=(unsigned char*)RAPTOR_MALLOC(cstring, len+1);
2023 if(!len) {
2024 if(new_id)
2025 RAPTOR_FREE(cstring, new_id);
2026 return;
2027 }
2028 strncpy((char*)new_id, (char*)idList, len+1);
2029
2030 element->parent->object.id=new_id;
2031 element->parent->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
2032 element->parent->object.uri_source=RAPTOR_URI_SOURCE_ID;
2033 } else {
2034 collection_uri=(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_rest_URI(rdf_xml_parser) : RAPTOR_RDF_rest_URI(rdf_xml_parser);
2035 /* _:tail_id rdf:rest _:listRest */
2036 raptor_rdfxml_generate_statement(rdf_parser,
2037 NULL,
2038 element->parent->tail_id,
2039 RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
2040 RAPTOR_URI_SOURCE_ID,
2041
2042 collection_uri,
2043 NULL,
2044 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2045 RAPTOR_URI_SOURCE_URI,
2046 0,
2047
2048 NULL,
2049 idList,
2050 RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
2051 RAPTOR_URI_SOURCE_ID,
2052 NULL,
2053
2054 NULL,
2055 NULL);
2056 }
2057
2058 /* update new tail */
2059 if(element->parent->tail_id)
2060 RAPTOR_FREE(cstring, (char*)element->parent->tail_id);
2061
2062 element->parent->tail_id=idList;
2063
2064 } else if(element->parent->state != RAPTOR_STATE_UNKNOWN &&
2065 element->state != RAPTOR_STATE_PARSETYPE_RESOURCE) {
2066 /* If there is a parent element (property) containing this
2067 * element (node) and it has no object, set it from this subject
2068 */
2069
2070 if(element->parent->object.uri) {
2071 raptor_rdfxml_update_document_locator(rdf_parser);
2072 raptor_parser_error(rdf_parser, "Tried to set multiple objects of a statement");
2073 } else {
2074 /* Store URI of this node in our parent as the property object */
2075 raptor_copy_identifier(&element->parent->object, &element->subject);
2076 element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2077 }
2078
2079 }
2080 }
2081
2082
2083 /* If this is a node element, generate the rdf:type statement
2084 * from this node
2085 */
2086 if(state == RAPTOR_STATE_NODE_ELEMENT)
2087 raptor_rdfxml_generate_statement(rdf_parser,
2088 element->subject.uri,
2089 element->subject.id,
2090 element->subject.type,
2091 element->subject.uri_source,
2092
2093 RAPTOR_RDF_type_URI(rdf_xml_parser),
2094 NULL,
2095 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2096 RAPTOR_URI_SOURCE_URI,
2097 0,
2098
2099 raptor_xml_element_get_name(xml_element)->uri,
2100 NULL,
2101 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2102 element->object.uri_source,
2103 NULL,
2104
2105 &element->reified,
2106 element);
2107
2108 raptor_rdfxml_process_property_attributes(rdf_parser, element, element, NULL);
2109
2110 /* for both productions now need some more content or
2111 * property elements before can do any more work.
2112 */
2113
2114 element->child_state=RAPTOR_STATE_PROPERTYELT;
2115 element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
2116 finished=1;
2117 break;
2118
2119
2120 case RAPTOR_STATE_PARSETYPE_OTHER:
2121 /* FALLTHROUGH */
2122
2123 case RAPTOR_STATE_PARSETYPE_LITERAL:
2124 raptor_xml_writer_start_element(rdf_xml_parser->xml_writer, xml_element);
2125 element->child_state = RAPTOR_STATE_PARSETYPE_LITERAL;
2126 element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2127
2128 finished=1;
2129 break;
2130
2131 /* Handle all the detail of the various options of property element
2132 * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
2133 *
2134 * All the attributes must be scanned here to see what additional
2135 * property element work is needed. No triples are generated
2136 * until the end of this element, until it is clear if the
2137 * element was empty.
2138 */
2139 case RAPTOR_STATE_MEMBER_PROPERTYELT:
2140 case RAPTOR_STATE_PROPERTYELT:
2141
2142 if(!raptor_xml_element_get_name(xml_element)->uri) {
2143 raptor_parser_error(rdf_parser, "Using property element '%s' without a namespace is forbidden.",
2144 raptor_xml_element_get_name(element->parent->xml_element)->local_name);
2145 raptor_rdfxml_update_document_locator(rdf_parser);
2146 element->state=RAPTOR_STATE_SKIPPING;
2147 element->child_state=RAPTOR_STATE_SKIPPING;
2148 finished=1;
2149 break;
2150 }
2151
2152 /* Handling rdf:li as a property, noting special processing */
2153 if(element_in_rdf_ns &&
2154 raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_li_URI(rdf_xml_parser))) {
2155 state=RAPTOR_STATE_MEMBER_PROPERTYELT;
2156 }
2157
2158
2159 if(element_in_rdf_ns &&
2160 (rc = raptor_rdfxml_forbidden_propertyElement_name((const char*)el_name))) {
2161 if(rc > 0) {
2162 raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a property element.", el_name);
2163 state=RAPTOR_STATE_SKIPPING;
2164 element->child_state=RAPTOR_STATE_SKIPPING;
2165 finished=1;
2166 break;
2167 } else
2168 raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name);
2169 }
2170
2171
2172 /* rdf:ID on a property element - reify a statement.
2173 * Allowed on all property element forms
2174 */
2175 if(element->rdf_attr[RDF_ATTR_ID]) {
2176 element->reified.id=element->rdf_attr[RDF_ATTR_ID];
2177 element->rdf_attr[RDF_ATTR_ID]=NULL;
2178 element->reified.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->reified.id);
2179 if(!element->reified.uri)
2180 goto oom;
2181 element->reified.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
2182 element->reified.uri_source=RAPTOR_URI_SOURCE_GENERATED;
2183
2184 if(!raptor_valid_xml_ID(rdf_parser, element->reified.id)) {
2185 raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'", element->reified.id);
2186 state=RAPTOR_STATE_SKIPPING;
2187 element->child_state=RAPTOR_STATE_SKIPPING;
2188 finished=1;
2189 break;
2190 }
2191 if(raptor_rdfxml_record_ID(rdf_parser, element, element->reified.id)) {
2192 raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'", element->reified.id);
2193 state=RAPTOR_STATE_SKIPPING;
2194 element->child_state=RAPTOR_STATE_SKIPPING;
2195 finished=1;
2196 break;
2197 }
2198 }
2199
2200 /* rdf:datatype on a property element.
2201 * Only allowed for
2202 * http://www.w3.org/TR/rdf-syntax-grammar/#literalPropertyElt
2203 */
2204 if (element->rdf_attr[RDF_ATTR_datatype]) {
2205 element->object_literal_datatype=raptor_new_uri_relative_to_base_v2(rdf_parser->world, base_uri, (const unsigned char*)element->rdf_attr[RDF_ATTR_datatype]);
2206 RAPTOR_FREE(cstring, (void*)element->rdf_attr[RDF_ATTR_datatype]);
2207 element->rdf_attr[RDF_ATTR_datatype]=NULL;
2208 if(!element->object_literal_datatype)
2209 goto oom;
2210 }
2211
2212 if(element->rdf_attr[RDF_ATTR_bagID]) {
2213
2214 if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]) {
2215
2216 if(element->rdf_attr[RDF_ATTR_resource] ||
2217 element->rdf_attr[RDF_ATTR_parseType]) {
2218
2219 raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on property element '%s' with an rdf:resource or rdf:parseType attribute.", el_name);
2220 /* prevent this being used later either */
2221 element->rdf_attr[RDF_ATTR_bagID]=NULL;
2222 } else {
2223 element->bag.id=element->rdf_attr[RDF_ATTR_bagID];
2224 element->rdf_attr[RDF_ATTR_bagID]=NULL;
2225 element->bag.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->bag.id);
2226 if(!element->bag.uri)
2227 goto oom;
2228 element->bag.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
2229 element->bag.uri_source=RAPTOR_URI_SOURCE_GENERATED;
2230
2231 if(!raptor_valid_xml_ID(rdf_parser, element->bag.id)) {
2232 raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'", element->bag.id);
2233 state=RAPTOR_STATE_SKIPPING;
2234 element->child_state=RAPTOR_STATE_SKIPPING;
2235 finished=1;
2236 break;
2237 }
2238 if(raptor_rdfxml_record_ID(rdf_parser, element, element->bag.id)) {
2239 raptor_parser_error(rdf_parser, "Duplicated rdf:bagID value '%s'", element->bag.id);
2240 state=RAPTOR_STATE_SKIPPING;
2241 element->child_state=RAPTOR_STATE_SKIPPING;
2242 finished=1;
2243 break;
2244 }
2245
2246 raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated.");
2247 }
2248 } else {
2249 /* bagID forbidden */
2250 raptor_parser_error(rdf_parser, "rdf:bagID is forbidden.");
2251 state=RAPTOR_STATE_SKIPPING;
2252 element->child_state=RAPTOR_STATE_SKIPPING;
2253 finished=1;
2254 break;
2255 }
2256 } /* if rdf:bagID on property element */
2257
2258
2259 element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT;
2260
2261 if (element->rdf_attr[RDF_ATTR_parseType]) {
2262 const unsigned char *parse_type=element->rdf_attr[RDF_ATTR_parseType];
2263 int i;
2264 int is_parseType_Literal=0;
2265
2266 if(raptor_rdfxml_element_has_property_attributes(element)) {
2267 raptor_parser_error(rdf_parser, "Property attributes cannot be used with rdf:parseType='%s'", parse_type);
2268 state=RAPTOR_STATE_SKIPPING;
2269 element->child_state=RAPTOR_STATE_SKIPPING;
2270 finished=1;
2271 break;
2272 }
2273
2274 /* Check for bad combinations of things with parseType */
2275 for(i=0; i<= RDF_ATTR_LAST; i++)
2276 if(element->rdf_attr[i] && i != RDF_ATTR_parseType) {
2277 raptor_parser_error(rdf_parser, "Attribute '%s' cannot be used with rdf:parseType='%s'", rdf_syntax_terms_info[i].name, parse_type);
2278 state=RAPTOR_STATE_SKIPPING;
2279 element->child_state=RAPTOR_STATE_SKIPPING;
2280 finished=1;
2281 break;
2282 }
2283
2284
2285 if(!strcmp((char*)parse_type, "Literal"))
2286 is_parseType_Literal=1;
2287 else if (!strcmp((char*)parse_type, "Resource")) {
2288 state=RAPTOR_STATE_PARSETYPE_RESOURCE;
2289 element->child_state=RAPTOR_STATE_PROPERTYELT;
2290 element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
2291
2292 /* create a node for the subject of the contained properties */
2293 element->subject.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
2294 if(!element->subject.id)
2295 goto oom;
2296 element->subject.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
2297 element->subject.uri_source=RAPTOR_URI_SOURCE_GENERATED;
2298 } else if(!strcmp((char*)parse_type, "Collection")) {
2299 /* An rdf:parseType="Collection" appears as a single node */
2300 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2301 element->child_state=RAPTOR_STATE_PARSETYPE_COLLECTION;
2302 element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION;
2303 } else {
2304 if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_OTHER_PARSETYPES] &&
2305 !raptor_strcasecmp((char*)parse_type, "daml:collection")) {
2306 /* A DAML collection appears as a single node */
2307 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2308 element->child_state=RAPTOR_STATE_PARSETYPE_COLLECTION;
2309 element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION;
2310 } else {
2311 if(rdf_parser->features[RAPTOR_FEATURE_WARN_OTHER_PARSETYPES]) {
2312 raptor_parser_warning(rdf_parser, "Unknown rdf:parseType value '%s' taken as 'Literal'", parse_type);
2313 }
2314 is_parseType_Literal=1;
2315 }
2316
2317 }
2318
2319 if(is_parseType_Literal) {
2320 /* rdf:parseType="Literal" - explicitly or default
2321 * if the parseType value is not recognised
2322 */
2323 rdf_xml_parser->xml_content=NULL;
2324 rdf_xml_parser->xml_content_length=0;
2325 rdf_xml_parser->iostream=raptor_new_iostream_to_string(&rdf_xml_parser->xml_content, &rdf_xml_parser->xml_content_length, raptor_alloc_memory);
2326 if(!rdf_xml_parser->iostream)
2327 goto oom;
2328 rdf_xml_parser->xml_writer=raptor_new_xml_writer_v2(rdf_parser->world,
2329 NULL,
2330 rdf_xml_parser->iostream,
2331 (raptor_simple_message_handler)raptor_parser_simple_error, rdf_parser,
2332 1);
2333 if(!rdf_xml_parser->xml_writer)
2334 goto oom;
2335
2336 raptor_xml_writer_set_feature(rdf_xml_parser->xml_writer,
2337 RAPTOR_FEATURE_WRITER_XML_DECLARATION, 0);
2338
2339 element->child_state=RAPTOR_STATE_PARSETYPE_LITERAL;
2340 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2341 element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2342 }
2343 } else {
2344
2345 /* Can only be the empty property element case
2346 * http://www.w3.org/TR/rdf-syntax-grammar/#emptyPropertyElt
2347 */
2348
2349 /* The presence of the rdf:resource or rdf:nodeID
2350 * attributes is checked at element close time
2351 */
2352
2353 /*
2354 * Assign reified URI here so we don't reify property attributes
2355 * using this id
2356 */
2357 if(element->reified.id && !element->reified.uri) {
2358 element->reified.uri=raptor_new_uri_from_id_v2(rdf_parser->world, base_uri, element->reified.id);
2359 if(!element->reified.uri)
2360 goto oom;
2361 element->reified.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
2362 element->reified.uri_source=RAPTOR_URI_SOURCE_GENERATED;
2363 }
2364
2365 if(element->rdf_attr[RDF_ATTR_resource] ||
2366 element->rdf_attr[RDF_ATTR_nodeID]) {
2367 /* Done - wait for end of this element to end in order to
2368 * check the element was empty as expected */
2369 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2370 } else {
2371 /* Otherwise process content in obj (value) state */
2372 element->child_state=RAPTOR_STATE_NODE_ELEMENT_LIST;
2373 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT;
2374 }
2375 }
2376
2377 finished=1;
2378
2379 break;
2380
2381
2382 case RAPTOR_STATE_INVALID:
2383 default:
2384 raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_start_element_grammar: Unexpected parser state %d - %s", state, raptor_rdfxml_state_as_string(state));
2385 finished=1;
2386
2387 } /* end switch */
2388
2389 if(state != element->state) {
2390 element->state=state;
2391 #ifdef RAPTOR_DEBUG_VERBOSE
2392 RAPTOR_DEBUG3("Moved to state %d - %s\n", state, raptor_rdfxml_state_as_string(state));
2393 #endif
2394 }
2395
2396 } /* end while */
2397
2398 #ifdef RAPTOR_DEBUG_VERBOSE
2399 RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
2400 #endif
2401
2402 return;
2403
2404 oom:
2405 raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping");
2406 element->state=RAPTOR_STATE_SKIPPING;
2407 }
2408
2409
2410 static void
raptor_rdfxml_end_element_grammar(raptor_parser * rdf_parser,raptor_rdfxml_element * element)2411 raptor_rdfxml_end_element_grammar(raptor_parser *rdf_parser,
2412 raptor_rdfxml_element *element)
2413 {
2414 raptor_state state;
2415 int finished;
2416 raptor_xml_element* xml_element=element->xml_element;
2417 const unsigned char *el_name=raptor_xml_element_get_name(xml_element)->local_name;
2418 int element_in_rdf_ns=(raptor_xml_element_get_name(xml_element)->nspace &&
2419 raptor_xml_element_get_name(xml_element)->nspace->is_rdf_ms);
2420 raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
2421
2422
2423 state=element->state;
2424 #ifdef RAPTOR_DEBUG_VERBOSE
2425 RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state));
2426 #endif
2427
2428 finished= 0;
2429 while(!finished) {
2430 switch(state) {
2431 case RAPTOR_STATE_SKIPPING:
2432 finished=1;
2433 break;
2434
2435 case RAPTOR_STATE_UNKNOWN:
2436 finished=1;
2437 break;
2438
2439 case RAPTOR_STATE_NODE_ELEMENT_LIST:
2440 if(element_in_rdf_ns &&
2441 raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_RDF_URI(rdf_xml_parser))) {
2442 /* end of RDF - boo hoo */
2443 state=RAPTOR_STATE_UNKNOWN;
2444 finished=1;
2445 break;
2446 }
2447 /* When scanning, another element ending is outside the RDF
2448 * world so this can happen without further work
2449 */
2450 if(rdf_parser->features[RAPTOR_FEATURE_SCANNING]) {
2451 state=RAPTOR_STATE_UNKNOWN;
2452 finished=1;
2453 break;
2454 }
2455 /* otherwise found some junk after RDF content in an RDF-only
2456 * document (probably never get here since this would be
2457 * a mismatched XML tag and cause an error earlier)
2458 */
2459 raptor_rdfxml_update_document_locator(rdf_parser);
2460 raptor_parser_warning(rdf_parser, "Element '%s' ended, expected end of RDF element", el_name);
2461 state=RAPTOR_STATE_UNKNOWN;
2462 finished=1;
2463 break;
2464
2465
2466 case RAPTOR_STATE_DESCRIPTION:
2467 case RAPTOR_STATE_NODE_ELEMENT:
2468 case RAPTOR_STATE_PARSETYPE_RESOURCE:
2469
2470 /* If there is a parent element containing this element and
2471 * the parent isn't a description, has an identifier,
2472 * create the statement between this node using parent property
2473 * (Need to check for identifier so that top-level typed nodes
2474 * don't get connect to <rdf:RDF> parent element)
2475 */
2476 if(state == RAPTOR_STATE_NODE_ELEMENT &&
2477 element->parent &&
2478 (element->parent->subject.uri || element->parent->subject.id))
2479 raptor_rdfxml_generate_statement(rdf_parser,
2480 element->parent->subject.uri,
2481 element->parent->subject.id,
2482 element->parent->subject.type,
2483 element->parent->subject.uri_source,
2484
2485 raptor_xml_element_get_name(element->parent->xml_element)->uri,
2486 NULL,
2487 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2488 RAPTOR_URI_SOURCE_ELEMENT,
2489 0,
2490
2491 element->subject.uri,
2492 element->subject.id,
2493 element->subject.type,
2494 element->subject.uri_source,
2495 NULL,
2496
2497 NULL,
2498 element);
2499 else if(state == RAPTOR_STATE_PARSETYPE_RESOURCE &&
2500 element->parent &&
2501 (element->parent->subject.uri || element->parent->subject.id)) {
2502 /* Handle rdf:li as the rdf:parseType="resource" property */
2503 if(element_in_rdf_ns &&
2504 raptor_uri_equals_v2(rdf_parser->world, raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_li_URI(rdf_xml_parser))) {
2505 element->parent->last_ordinal++;
2506 raptor_rdfxml_generate_statement(rdf_parser,
2507 element->parent->subject.uri,
2508 element->parent->subject.id,
2509 element->parent->subject.type,
2510 element->parent->subject.uri_source,
2511
2512 NULL,
2513 NULL,
2514 RAPTOR_IDENTIFIER_TYPE_ORDINAL,
2515 RAPTOR_URI_SOURCE_NOT_URI,
2516 element->parent->last_ordinal,
2517
2518 element->subject.uri,
2519 element->subject.id,
2520 element->subject.type,
2521 element->subject.uri_source,
2522 NULL,
2523
2524 &element->reified,
2525 element->parent);
2526 } else {
2527 raptor_rdfxml_generate_statement(rdf_parser,
2528 element->parent->subject.uri,
2529 element->parent->subject.id,
2530 element->parent->subject.type,
2531 element->parent->subject.uri_source,
2532
2533 raptor_xml_element_get_name(xml_element)->uri,
2534 NULL,
2535 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2536 RAPTOR_URI_SOURCE_ELEMENT,
2537 0,
2538
2539 element->subject.uri,
2540 element->subject.id,
2541 element->subject.type,
2542 element->subject.uri_source,
2543 NULL,
2544
2545 &element->reified,
2546 element->parent);
2547 }
2548 }
2549 finished=1;
2550 break;
2551
2552 case RAPTOR_STATE_PARSETYPE_COLLECTION:
2553
2554 finished=1;
2555 break;
2556
2557 case RAPTOR_STATE_PARSETYPE_OTHER:
2558 /* FALLTHROUGH */
2559
2560 case RAPTOR_STATE_PARSETYPE_LITERAL:
2561 element->parent->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
2562
2563 raptor_xml_writer_end_element(rdf_xml_parser->xml_writer, xml_element);
2564
2565 finished=1;
2566 break;
2567
2568
2569 case RAPTOR_STATE_PROPERTYELT:
2570 case RAPTOR_STATE_MEMBER_PROPERTYELT:
2571 /* A property element
2572 * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
2573 *
2574 * Literal content part is handled here.
2575 * The element content is handled in the internal states
2576 * Empty content is checked here.
2577 */
2578
2579 if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) {
2580 if(xml_element->content_cdata_seen)
2581 element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
2582 else if (xml_element->content_element_seen)
2583 element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
2584 else { /* Empty Literal */
2585 element->object.type= RAPTOR_IDENTIFIER_TYPE_LITERAL;
2586 element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
2587 }
2588
2589 }
2590
2591
2592 /* Handle terminating a rdf:parseType="Collection" list */
2593 if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION ||
2594 element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
2595 raptor_uri* nil_uri=(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_nil_URI(rdf_xml_parser) : RAPTOR_RDF_nil_URI(rdf_xml_parser);
2596 if (!element->tail_id) {
2597 /* If No List: set object of statement to rdf:nil */
2598 element->object.uri= raptor_uri_copy_v2(rdf_parser->world, nil_uri);
2599 element->object.id= NULL;
2600 element->object.type= RAPTOR_IDENTIFIER_TYPE_RESOURCE;
2601 element->object.uri_source= RAPTOR_URI_SOURCE_URI;
2602 } else {
2603 raptor_uri* rest_uri=(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_rest_URI(rdf_xml_parser) : RAPTOR_RDF_rest_URI(rdf_xml_parser);
2604 /* terminate the list */
2605 raptor_rdfxml_generate_statement(rdf_parser,
2606 NULL,
2607 element->tail_id,
2608 RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
2609 RAPTOR_URI_SOURCE_ID,
2610
2611 rest_uri,
2612 NULL,
2613 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2614 RAPTOR_URI_SOURCE_URI,
2615 0,
2616
2617 nil_uri,
2618 NULL,
2619 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2620 RAPTOR_URI_SOURCE_URI,
2621 NULL,
2622
2623 NULL,
2624 NULL);
2625 }
2626
2627 } /* end rdf:parseType="Collection" termination */
2628
2629
2630 #ifdef RAPTOR_DEBUG_VERBOSE
2631 RAPTOR_DEBUG3("Content type %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
2632 #endif
2633
2634 switch(element->content_type) {
2635 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE:
2636
2637 if(raptor_rdfxml_element_has_property_attributes(element) &&
2638 element->child_state == RAPTOR_STATE_DESCRIPTION) {
2639 raptor_parser_error(rdf_parser, "Property element '%s' has both property attributes and a node element content", el_name);
2640 state=RAPTOR_STATE_SKIPPING;
2641 element->child_state=RAPTOR_STATE_SKIPPING;
2642 finished=1;
2643 break;
2644 }
2645
2646 if(element->object.type == RAPTOR_IDENTIFIER_TYPE_UNKNOWN) {
2647 if(element->rdf_attr[RDF_ATTR_resource]) {
2648 element->object.uri=raptor_new_uri_relative_to_base_v2(rdf_parser->world,
2649 raptor_rdfxml_inscope_base_uri(rdf_parser),
2650 (const unsigned char*)element->rdf_attr[RDF_ATTR_resource]);
2651 RAPTOR_FREE(cstring, (void*)element->rdf_attr[RDF_ATTR_resource]);
2652 element->rdf_attr[RDF_ATTR_resource]=NULL;
2653 if(!element->object.uri)
2654 goto oom;
2655 element->object.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
2656 element->object.uri_source=RAPTOR_URI_SOURCE_URI;
2657 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2658 } else if(element->rdf_attr[RDF_ATTR_nodeID]) {
2659 element->object.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, (unsigned char*)element->rdf_attr[RDF_ATTR_nodeID]);
2660 element->rdf_attr[RDF_ATTR_nodeID]=NULL;
2661 if(!element->object.id)
2662 goto oom;
2663 element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
2664 element->object.uri_source=RAPTOR_URI_SOURCE_BLANK_ID;
2665 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2666 if(!raptor_valid_xml_ID(rdf_parser, element->object.id)) {
2667 raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", element->object.id);
2668 state=RAPTOR_STATE_SKIPPING;
2669 element->child_state=RAPTOR_STATE_SKIPPING;
2670 finished=1;
2671 break;
2672 }
2673 } else {
2674 element->object.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
2675 if(!element->object.id)
2676 goto oom;
2677 element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
2678 element->object.uri_source=RAPTOR_URI_SOURCE_GENERATED;
2679 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2680 }
2681
2682 raptor_rdfxml_process_property_attributes(rdf_parser, element,
2683 element->parent,
2684 &element->object);
2685
2686 }
2687
2688 /* We know object is a resource, so delete any unsignficant
2689 * whitespace so that FALLTHROUGH code below finds the object.
2690 */
2691 if(xml_element->content_cdata_length) {
2692 raptor_free_stringbuffer(xml_element->content_cdata_sb);
2693 xml_element->content_cdata_sb=NULL;
2694 xml_element->content_cdata_length=0;
2695 }
2696
2697 /* FALLTHROUGH */
2698 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL:
2699
2700 if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) {
2701
2702 if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]) {
2703 /* Only an empty literal can have a rdf:bagID */
2704 if(element->bag.uri || element->bag.id) {
2705 if(xml_element->content_cdata_length > 0) {
2706 raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on a literal property element '%s'.", el_name);
2707 /* prevent this being used later either */
2708 element->rdf_attr[RDF_ATTR_bagID]=NULL;
2709 } else
2710 raptor_rdfxml_generate_statement(rdf_parser,
2711 element->bag.uri,
2712 element->bag.id,
2713 element->bag.type,
2714 element->bag.uri_source,
2715
2716 RAPTOR_RDF_type_URI(rdf_xml_parser),
2717 NULL,
2718 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2719 RAPTOR_URI_SOURCE_URI,
2720 0,
2721
2722 RAPTOR_RDF_Bag_URI(rdf_xml_parser),
2723 NULL,
2724 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2725 RAPTOR_URI_SOURCE_NOT_URI,
2726 NULL,
2727
2728 NULL,
2729 NULL);
2730 }
2731 } /* if rdf:bagID */
2732
2733 /* If there is empty literal content with properties
2734 * generate a node to hang properties off
2735 */
2736 if(raptor_rdfxml_element_has_property_attributes(element) &&
2737 xml_element->content_cdata_length > 0) {
2738 raptor_parser_error(rdf_parser, "Literal property element '%s' has property attributes", el_name);
2739 state=RAPTOR_STATE_SKIPPING;
2740 element->child_state=RAPTOR_STATE_SKIPPING;
2741 finished=1;
2742 break;
2743 }
2744
2745 if(element->object.type == RAPTOR_IDENTIFIER_TYPE_LITERAL &&
2746 raptor_rdfxml_element_has_property_attributes(element) &&
2747 !element->object.uri) {
2748 element->object.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
2749 if(!element->object.id)
2750 goto oom;
2751 element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
2752 element->object.uri_source=RAPTOR_URI_SOURCE_GENERATED;
2753 element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
2754 }
2755
2756 raptor_rdfxml_process_property_attributes(rdf_parser, element,
2757 element,
2758 &element->object);
2759 }
2760
2761
2762 /* just be friendly to older compilers and don't declare
2763 * variables in the middle of a block
2764 */
2765 if(1) {
2766 raptor_uri *predicate_uri=NULL;
2767 raptor_identifier_type predicate_type;
2768 int predicate_ordinal=0;
2769 raptor_uri *object_uri;
2770 raptor_identifier_type object_type;
2771 raptor_uri *literal_datatype=NULL;
2772 const unsigned char* empty_literal=(const unsigned char*)"";
2773
2774 if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) {
2775 element->parent->last_ordinal++;
2776 predicate_ordinal=element->parent->last_ordinal;
2777 predicate_type=RAPTOR_IDENTIFIER_TYPE_ORDINAL;
2778
2779 } else {
2780 predicate_uri=raptor_xml_element_get_name(xml_element)->uri;
2781 predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
2782 }
2783
2784
2785 if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) {
2786 unsigned char* literal;
2787
2788 object_type=RAPTOR_IDENTIFIER_TYPE_LITERAL;
2789 literal=raptor_stringbuffer_as_string(xml_element->content_cdata_sb);
2790 literal_datatype=element->object_literal_datatype;
2791
2792 if(!literal_datatype && literal &&
2793 !raptor_utf8_is_nfc(literal, xml_element->content_cdata_length)) {
2794 const char *message="Property element '%s' has a string not in Unicode Normal Form C: %s";
2795 raptor_rdfxml_update_document_locator(rdf_parser);
2796 if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
2797 raptor_parser_error(rdf_parser, message, el_name, literal);
2798 else
2799 raptor_parser_warning(rdf_parser, message, el_name, literal);
2800 }
2801
2802 if(!literal)
2803 /* empty literal */
2804 literal=(unsigned char*)empty_literal;
2805
2806 object_uri=(raptor_uri*)literal;
2807 } else {
2808 object_type=element->object.type;
2809 object_uri=element->object.uri;
2810 }
2811
2812 raptor_rdfxml_generate_statement(rdf_parser,
2813 element->parent->subject.uri,
2814 element->parent->subject.id,
2815 element->parent->subject.type,
2816 RAPTOR_URI_SOURCE_ELEMENT,
2817
2818 predicate_uri,
2819 NULL,
2820 predicate_type,
2821 RAPTOR_URI_SOURCE_NOT_URI,
2822 predicate_ordinal,
2823
2824 object_uri,
2825 element->object.id,
2826 object_type,
2827 element->object.uri_source,
2828 literal_datatype,
2829
2830 &element->reified,
2831 element->parent);
2832
2833 }
2834
2835 break;
2836
2837 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED:
2838 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL:
2839 {
2840 unsigned char *buffer;
2841 unsigned int length;
2842
2843 if(rdf_xml_parser->xml_writer) {
2844 raptor_xml_writer_flush(rdf_xml_parser->xml_writer);
2845
2846 raptor_free_iostream(rdf_xml_parser->iostream);
2847 rdf_xml_parser->iostream=NULL;
2848
2849 buffer=(unsigned char*)rdf_xml_parser->xml_content;
2850 length=rdf_xml_parser->xml_content_length;
2851 } else {
2852 buffer=raptor_stringbuffer_as_string(xml_element->content_cdata_sb);
2853 length=xml_element->content_cdata_length;
2854 }
2855
2856 if(!raptor_utf8_is_nfc(buffer, length)) {
2857 const char *message="Property element '%s' has XML literal content not in Unicode Normal Form C: %s";
2858 raptor_rdfxml_update_document_locator(rdf_parser);
2859 if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
2860 raptor_parser_error(rdf_parser, message, el_name, buffer);
2861 else
2862 raptor_parser_warning(rdf_parser, message, el_name, buffer);
2863 }
2864
2865
2866 if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) {
2867 element->parent->last_ordinal++;
2868 raptor_rdfxml_generate_statement(rdf_parser,
2869 element->parent->subject.uri,
2870 element->parent->subject.id,
2871 element->parent->subject.type,
2872 element->parent->subject.uri_source,
2873
2874 NULL,
2875 NULL,
2876 RAPTOR_IDENTIFIER_TYPE_ORDINAL,
2877 RAPTOR_URI_SOURCE_NOT_URI,
2878 element->parent->last_ordinal,
2879
2880 (raptor_uri*)buffer,
2881 NULL,
2882 RAPTOR_IDENTIFIER_TYPE_LITERAL,
2883 RAPTOR_URI_SOURCE_NOT_URI,
2884 RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser),
2885
2886 &element->reified,
2887 element->parent);
2888 } else {
2889 raptor_rdfxml_generate_statement(rdf_parser,
2890 element->parent->subject.uri,
2891 element->parent->subject.id,
2892 element->parent->subject.type,
2893 element->parent->subject.uri_source,
2894
2895 raptor_xml_element_get_name(xml_element)->uri,
2896 NULL,
2897 RAPTOR_IDENTIFIER_TYPE_RESOURCE,
2898 RAPTOR_URI_SOURCE_ELEMENT,
2899 0,
2900
2901 (raptor_uri*)buffer,
2902 NULL,
2903 RAPTOR_IDENTIFIER_TYPE_LITERAL,
2904 RAPTOR_URI_SOURCE_NOT_URI,
2905 RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser),
2906
2907 &element->reified,
2908 element->parent);
2909 }
2910
2911 /* Finish the xml writer iostream for parseType="Literal" */
2912 if(rdf_xml_parser->xml_writer) {
2913 raptor_free_xml_writer(rdf_xml_parser->xml_writer);
2914 RAPTOR_FREE(cstring, rdf_xml_parser->xml_content);
2915 rdf_xml_parser->xml_content=NULL;
2916 rdf_xml_parser->xml_content_length=0;
2917 }
2918 }
2919
2920 break;
2921
2922 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION:
2923 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION:
2924
2925 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES:
2926 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES:
2927 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT:
2928
2929 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN:
2930 case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST:
2931 default:
2932 raptor_parser_fatal_error(rdf_parser, "%s: Internal error in state RAPTOR_STATE_PROPERTYELT - got unexpected content type %s (%d)", __func__, raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
2933 } /* end switch */
2934
2935 finished=1;
2936 break;
2937
2938 case RAPTOR_STATE_INVALID:
2939 default:
2940 raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_end_element_grammar: Unexpected parser state %d - %s", state, raptor_rdfxml_state_as_string(state));
2941 finished=1;
2942
2943 } /* end switch */
2944
2945 if(state != element->state) {
2946 element->state=state;
2947 #ifdef RAPTOR_DEBUG_VERBOSE
2948 RAPTOR_DEBUG3("Moved to state %d - %s\n", state, raptor_rdfxml_state_as_string(state));
2949 #endif
2950 }
2951
2952 } /* end while */
2953
2954 #ifdef RAPTOR_DEBUG_VERBOSE
2955 RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
2956 #endif
2957
2958 return;
2959
2960 oom:
2961 raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping");
2962 element->state=RAPTOR_STATE_SKIPPING;
2963 }
2964
2965
2966
2967 static void
raptor_rdfxml_cdata_grammar(raptor_parser * rdf_parser,const unsigned char * s,int len,int is_cdata)2968 raptor_rdfxml_cdata_grammar(raptor_parser *rdf_parser,
2969 const unsigned char *s, int len,
2970 int is_cdata)
2971 {
2972 raptor_rdfxml_parser* rdf_xml_parser;
2973 raptor_rdfxml_element* element;
2974 raptor_xml_element* xml_element;
2975 raptor_state state;
2976 int all_whitespace=1;
2977 int i;
2978
2979 rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
2980
2981 if(rdf_parser->failed)
2982 return;
2983
2984 #ifdef RAPTOR_DEBUG_CDATA
2985 RAPTOR_DEBUG2("Adding characters (is_cdata=%d): '", is_cdata);
2986 (void)fwrite(s, 1, len, stderr);
2987 fprintf(stderr, "' (%d bytes)\n", len);
2988 #endif
2989
2990 for(i=0; i<len; i++)
2991 if(!isspace(s[i])) {
2992 all_whitespace=0;
2993 break;
2994 }
2995
2996 element=rdf_xml_parser->current_element;
2997
2998 /* this file is very broke - probably not XML, whatever */
2999 if(!element)
3000 return;
3001
3002 xml_element=element->xml_element;
3003
3004 raptor_rdfxml_update_document_locator(rdf_parser);
3005
3006 /* cdata never changes the parser state
3007 * and the containing element state always determines what to do.
3008 * Use the child_state first if there is one, since that applies
3009 */
3010 state=element->child_state;
3011 #ifdef RAPTOR_DEBUG_VERBOSE
3012 RAPTOR_DEBUG2("Working in state %s\n", raptor_rdfxml_state_as_string(state));
3013 #endif
3014
3015
3016 #ifdef RAPTOR_DEBUG_VERBOSE
3017 RAPTOR_DEBUG3("Content type %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
3018 #endif
3019
3020
3021
3022 if(state == RAPTOR_STATE_SKIPPING)
3023 return;
3024
3025 if(state == RAPTOR_STATE_UNKNOWN) {
3026 /* Ignore all cdata if still looking for RDF */
3027 if(rdf_parser->features[RAPTOR_FEATURE_SCANNING])
3028 return;
3029
3030 /* Ignore all whitespace cdata before first element */
3031 if(all_whitespace)
3032 return;
3033
3034 /* This probably will never happen since that would make the
3035 * XML not be well-formed
3036 */
3037 raptor_parser_warning(rdf_parser, "Character data before RDF element.");
3038 }
3039
3040
3041 if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES) {
3042 /* If found non-whitespace content, move to literal content */
3043 if(!all_whitespace)
3044 element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
3045 }
3046
3047
3048 if(!rdf_content_type_info[element->child_content_type].whitespace_significant) {
3049
3050 /* Whitespace is ignored except for literal or preserved content types */
3051 if(all_whitespace) {
3052 #ifdef RAPTOR_DEBUG_CDATA
3053 RAPTOR_DEBUG2("Ignoring whitespace cdata inside element '%s'\n", raptor_xml_element_get_name(element->parent->xml_element)->local_name);
3054 #endif
3055 return;
3056 }
3057
3058 if(xml_element->content_cdata_seen && xml_element->content_element_seen) {
3059 /* Uh oh - mixed content, this element has elements too */
3060 raptor_parser_warning(rdf_parser, "element '%s' has mixed content.",
3061 raptor_xml_element_get_name(element->parent->xml_element)->local_name);
3062 }
3063 }
3064
3065
3066 if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) {
3067 element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
3068 #ifdef RAPTOR_DEBUG_VERBOSE
3069 RAPTOR_DEBUG3("Content type changed to %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
3070 #endif
3071 }
3072
3073 if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL)
3074 raptor_xml_writer_cdata_counted(rdf_xml_parser->xml_writer, s, len);
3075 else {
3076 raptor_stringbuffer_append_counted_string(xml_element->content_cdata_sb,
3077 s, len, 1);
3078 element->content_cdata_all_whitespace &= all_whitespace;
3079
3080 /* adjust stored length */
3081 xml_element->content_cdata_length += len;
3082 }
3083
3084
3085 #ifdef RAPTOR_DEBUG_CDATA
3086 RAPTOR_DEBUG3("Content cdata now: %d bytes\n", xml_element->content_cdata_length);
3087 #endif
3088 #ifdef RAPTOR_DEBUG_VERBOSE
3089 RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
3090 #endif
3091 }
3092
3093
3094
3095 /**
3096 * raptor_rdfxml_inscope_base_uri:
3097 * @rdf_parser: Raptor parser object
3098 *
3099 * Return the in-scope base URI.
3100 *
3101 * Looks for the innermost xml:base on an element or document URI
3102 *
3103 * Return value: The URI string value or NULL on failure.
3104 **/
3105 static raptor_uri*
raptor_rdfxml_inscope_base_uri(raptor_parser * rdf_parser)3106 raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser)
3107 {
3108 raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
3109 raptor_uri* base_uri;
3110
3111 base_uri=raptor_sax2_inscope_base_uri(rdf_xml_parser->sax2);
3112 if(!base_uri)
3113 base_uri=rdf_parser->base_uri;
3114
3115 return base_uri;
3116 }
3117
3118
3119 /**
3120 * raptor_rdfxml_record_ID:
3121 * @rdf_parser: Raptor parser object
3122 * @element: Current element
3123 * @id: ID string
3124 *
3125 * Record an rdf:ID / rdf:bagID value (with xml base) and check it hasn't been seen already.
3126 *
3127 * Record and check the ID values, if they have been seen already.
3128 * per in-scope-base URI.
3129 *
3130 * Return value: non-zero if already seen, or failure
3131 **/
3132 static int
raptor_rdfxml_record_ID(raptor_parser * rdf_parser,raptor_rdfxml_element * element,const unsigned char * id)3133 raptor_rdfxml_record_ID(raptor_parser *rdf_parser,
3134 raptor_rdfxml_element *element,
3135 const unsigned char *id)
3136 {
3137 raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
3138 raptor_uri* base_uri=raptor_rdfxml_inscope_base_uri(rdf_parser);
3139 size_t id_len=strlen((const char*)id);
3140 int rc;
3141
3142 if(!rdf_parser->features[RAPTOR_FEATURE_CHECK_RDF_ID])
3143 return 0;
3144
3145 rc=raptor_id_set_add(rdf_xml_parser->id_set, base_uri, id, id_len);
3146
3147 return (rc != 0);
3148 }
3149
3150
3151
3152 static void
raptor_rdfxml_update_document_locator(raptor_parser * rdf_parser)3153 raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser)
3154 {
3155 raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
3156 raptor_sax2_update_document_locator(rdf_xml_parser->sax2,
3157 &rdf_parser->locator);
3158 }
3159
3160
3161
3162 static void
raptor_rdfxml_parse_finish_factory(raptor_parser_factory * factory)3163 raptor_rdfxml_parse_finish_factory(raptor_parser_factory* factory)
3164 {
3165 }
3166
3167
3168 static int
raptor_rdfxml_parser_register_factory(raptor_parser_factory * factory)3169 raptor_rdfxml_parser_register_factory(raptor_parser_factory *factory)
3170 {
3171 int rc=0;
3172
3173 factory->context_length = sizeof(raptor_rdfxml_parser);
3174
3175 factory->need_base_uri = 1;
3176
3177 factory->init = raptor_rdfxml_parse_init;
3178 factory->terminate = raptor_rdfxml_parse_terminate;
3179 factory->start = raptor_rdfxml_parse_start;
3180 factory->chunk = raptor_rdfxml_parse_chunk;
3181 factory->finish_factory = raptor_rdfxml_parse_finish_factory;
3182 factory->recognise_syntax = raptor_rdfxml_parse_recognise_syntax;
3183
3184 rc+= raptor_parser_factory_add_alias(factory, "raptor") != 0;
3185
3186 rc+= raptor_parser_factory_add_uri(factory,
3187 (const unsigned char*)"http://www.w3.org/TR/rdf-syntax-grammar") != 0;
3188
3189 rc+= raptor_parser_factory_add_mime_type(factory, "application/rdf+xml", 10) != 0;
3190 rc+= raptor_parser_factory_add_mime_type(factory, "text/rdf", 6) != 0;
3191
3192 return rc;
3193 }
3194
3195
3196 int
raptor_init_parser_rdfxml(raptor_world * world)3197 raptor_init_parser_rdfxml(raptor_world* world)
3198 {
3199 return !raptor_parser_register_factory(world, "rdfxml", "RDF/XML",
3200 &raptor_rdfxml_parser_register_factory);
3201 }
3202
3203
3204 #if RAPTOR_DEBUG > 1
3205 void
raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser * rdf_xml_parser,FILE * stream)3206 raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser* rdf_xml_parser,
3207 FILE *stream)
3208 {
3209 fputs("rdf:ID set ", stream);
3210 raptor_id_set_stats_print(rdf_xml_parser->id_set, stream);
3211 }
3212 #endif
3213