1 /* -*- Mode: c; c-basic-offset: 2 -*- 2 * 3 * raptor_internal.h - Redland Parser Toolkit for RDF (Raptor) internals 4 * 5 * Copyright (C) 2002-2010, David Beckett http://www.dajobe.org/ 6 * Copyright (C) 2002-2004, University of Bristol, UK http://www.bristol.ac.uk/ 7 * 8 * This package is Free Software and part of Redland http://librdf.org/ 9 * 10 * It is licensed under the following three licenses as alternatives: 11 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version 12 * 2. GNU General Public License (GPL) V2 or any newer version 13 * 3. Apache License, V2.0 or any newer version 14 * 15 * You may not use this file except in compliance with at least one of 16 * the above three licenses. 17 * 18 * See LICENSE.html or LICENSE.txt at the top of this package for the 19 * complete terms and further detail along with the license texts for 20 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. 21 * 22 * 23 */ 24 25 26 27 #ifndef RAPTOR_INTERNAL_H 28 #define RAPTOR_INTERNAL_H 29 30 #ifdef __cplusplus 31 extern "C" { 32 #define RAPTOR_EXTERN_C extern "C" 33 #else 34 #define RAPTOR_EXTERN_C 35 #endif 36 37 #ifdef RAPTOR_INTERNAL 38 39 /* for the memory allocation functions */ 40 #ifdef HAVE_STDLIB_H 41 #include <stdlib.h> 42 #undef HAVE_STDLIB_H 43 #endif 44 45 /* Some internal functions are needed by the test programs */ 46 #ifndef RAPTOR_INTERNAL_API 47 #define RAPTOR_INTERNAL_API RAPTOR_API 48 #endif 49 50 /* Can be over-ridden or undefined in a config.h file or -Ddefine */ 51 #ifndef RAPTOR_INLINE 52 #define RAPTOR_INLINE inline 53 #endif 54 55 #ifdef LIBRDF_DEBUG 56 #define RAPTOR_DEBUG 1 57 #endif 58 59 #if defined(RAPTOR_MEMORY_SIGN) 60 #define RAPTOR_SIGN_KEY 0x08A61080 61 void* raptor_sign_malloc(size_t size); 62 void* raptor_sign_calloc(size_t nmemb, size_t size); 63 void* raptor_sign_realloc(void *ptr, size_t size); 64 void raptor_sign_free(void *ptr); 65 66 #define RAPTOR_MALLOC(type, size) (type)raptor_sign_malloc(size) 67 #define RAPTOR_CALLOC(type, nmemb, size) (type)raptor_sign_calloc(nmemb, size) 68 #define RAPTOR_REALLOC(type, ptr, size) (type)raptor_sign_realloc(ptr, size) 69 #define RAPTOR_FREE(type, ptr) raptor_sign_free((void*)ptr) 70 71 #else 72 #define RAPTOR_MALLOC(type, size) (type)malloc(size) 73 #define RAPTOR_CALLOC(type, nmemb, size) (type)calloc(nmemb, size) 74 #define RAPTOR_REALLOC(type, ptr, size) (type)realloc(ptr, size) 75 #define RAPTOR_FREE(type, ptr) free((void*)ptr) 76 77 #endif 78 79 #ifdef HAVE___FUNCTION__ 80 #else 81 #define __FUNCTION__ "???" 82 #endif 83 84 #ifndef RAPTOR_DEBUG_FH 85 #define RAPTOR_DEBUG_FH stderr 86 #endif 87 88 #ifdef RAPTOR_DEBUG 89 /* Debugging messages */ 90 #define RAPTOR_DEBUG1(msg) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: " msg, __FILE__, __LINE__, __FUNCTION__); } while(0) 91 #define RAPTOR_DEBUG2(msg, arg1) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: " msg, __FILE__, __LINE__, __FUNCTION__, arg1);} while(0) 92 #define RAPTOR_DEBUG3(msg, arg1, arg2) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: " msg, __FILE__, __LINE__, __FUNCTION__, arg1, arg2);} while(0) 93 #define RAPTOR_DEBUG4(msg, arg1, arg2, arg3) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: " msg, __FILE__, __LINE__, __FUNCTION__, arg1, arg2, arg3);} while(0) 94 #define RAPTOR_DEBUG5(msg, arg1, arg2, arg3, arg4) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: " msg, __FILE__, __LINE__, __FUNCTION__, arg1, arg2, arg3, arg4);} while(0) 95 #define RAPTOR_DEBUG6(msg, arg1, arg2, arg3, arg4, arg5) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: " msg, __FILE__, __LINE__, __FUNCTION__, arg1, arg2, arg3, arg4, arg5);} while(0) 96 97 #ifndef RAPTOR_ASSERT_DIE 98 #define RAPTOR_ASSERT_DIE abort(); 99 #endif 100 101 #else 102 /* DEBUGGING TURNED OFF */ 103 104 /* No debugging messages */ 105 #define RAPTOR_DEBUG1(msg) 106 #define RAPTOR_DEBUG2(msg, arg1) 107 #define RAPTOR_DEBUG3(msg, arg1, arg2) 108 #define RAPTOR_DEBUG4(msg, arg1, arg2, arg3) 109 #define RAPTOR_DEBUG5(msg, arg1, arg2, arg3, arg4) 110 #define RAPTOR_DEBUG6(msg, arg1, arg2, arg3, arg4, arg5) 111 112 #define SYSTEM_MALLOC(size) malloc(size) 113 #define SYSTEM_FREE(ptr) free(ptr) 114 115 #ifndef RAPTOR_ASSERT_DIE 116 #define RAPTOR_ASSERT_DIE 117 #endif 118 119 #endif 120 121 122 #ifdef RAPTOR_DISABLE_ASSERT_MESSAGES 123 #define RAPTOR_ASSERT_REPORT(line) 124 #else 125 #define RAPTOR_ASSERT_REPORT(msg) fprintf(RAPTOR_DEBUG_FH, "%s:%d: (%s) assertion failed: " msg "\n", __FILE__, __LINE__, __FUNCTION__); 126 #endif 127 128 129 #ifdef RAPTOR_DISABLE_ASSERT 130 131 #define RAPTOR_ASSERT(condition, msg) 132 #define RAPTOR_ASSERT_RETURN(condition, msg, ret) 133 #define RAPTOR_ASSERT_OBJECT_POINTER_RETURN(pointer, type) do { \ 134 if(!pointer) \ 135 return; \ 136 } while(0) 137 #define RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(pointer, type, ret) 138 139 #else 140 141 #define RAPTOR_ASSERT(condition, msg) do { \ 142 if(condition) { \ 143 RAPTOR_ASSERT_REPORT(msg) \ 144 RAPTOR_ASSERT_DIE \ 145 } \ 146 } while(0) 147 148 #define RAPTOR_ASSERT_RETURN(condition, msg, ret) do { \ 149 if(condition) { \ 150 RAPTOR_ASSERT_REPORT(msg) \ 151 RAPTOR_ASSERT_DIE \ 152 return ret; \ 153 } \ 154 } while(0) 155 156 #define RAPTOR_ASSERT_OBJECT_POINTER_RETURN(pointer, type) do { \ 157 if(!pointer) { \ 158 RAPTOR_ASSERT_REPORT("object pointer of type " #type " is NULL.") \ 159 RAPTOR_ASSERT_DIE \ 160 return; \ 161 } \ 162 } while(0) 163 164 #define RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(pointer, type, ret) do { \ 165 if(!pointer) { \ 166 RAPTOR_ASSERT_REPORT("object pointer of type " #type " is NULL.") \ 167 RAPTOR_ASSERT_DIE \ 168 return ret; \ 169 } \ 170 } while(0) 171 172 #endif 173 174 175 /* Fatal errors - always happen */ 176 #define RAPTOR_FATAL1(msg) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: fatal error: " msg, __FILE__, __LINE__ , __FUNCTION__); abort();} while(0) 177 #define RAPTOR_FATAL2(msg,arg) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: fatal error: " msg, __FILE__, __LINE__ , __FUNCTION__, arg); abort();} while(0) 178 #define RAPTOR_FATAL3(msg,arg1,arg2) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: fatal error: " msg, __FILE__, __LINE__ , __FUNCTION__, arg1, arg2); abort();} while(0) 179 #define RAPTOR_FATAL4(msg,arg1,arg2,arg3) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: fatal error: " msg, __FILE__, __LINE__ , __FUNCTION__, arg1, arg2, arg3); abort();} while(0) 180 181 #define MAX_ASCII_INT_SIZE 13 182 183 /* XML parser includes */ 184 185 #ifdef RAPTOR_XML_LIBXML 186 187 #include <libxml/parser.h> 188 189 190 /* libxml-only prototypes */ 191 192 193 /* raptor_libxml.c exports */ 194 extern void raptor_libxml_sax_init(raptor_sax2* sax2); 195 extern void raptor_libxml_generic_error(void* user_data, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 3); 196 197 extern int raptor_libxml_init(raptor_world* world); 198 extern void raptor_libxml_finish(raptor_world* world); 199 200 extern void raptor_libxml_validation_error(void *context, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 3); 201 extern void raptor_libxml_validation_warning(void *context, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 3); 202 void raptor_libxml_free(xmlParserCtxtPtr xc); 203 204 /* raptor_parse.c - exported to libxml part */ 205 extern void raptor_libxml_update_document_locator(raptor_sax2* sax2, raptor_locator* locator); 206 207 /* end of libxml-only */ 208 #endif 209 210 211 typedef struct raptor_parser_factory_s raptor_parser_factory; 212 typedef struct raptor_serializer_factory_s raptor_serializer_factory; 213 typedef struct raptor_id_set_s raptor_id_set; 214 typedef struct raptor_uri_detail_s raptor_uri_detail; 215 216 217 /* raptor_option.c */ 218 219 /* These are bits and may be bit-ORed */ 220 /** 221 * raptor_option_area: 222 * @RAPTOR_OPTION_AREA_NONE: internal 223 * @RAPTOR_OPTION_AREA_PARSER: #raptor_parser (public) 224 * @RAPTOR_OPTION_AREA_SERIALIZER: #raptor_serializer (public) 225 * @RAPTOR_OPTION_AREA_XML_WRITER: #raptor_xml_writer (public) 226 * @RAPTOR_OPTION_AREA_TURTLE_WRITER: #raptor_turtle_writer (internal) 227 * @RAPTOR_OPTION_AREA_SAX2: #raptor_sax2 (public) 228 * 229 * Internal - raptor option areas 230 */ 231 typedef enum { 232 RAPTOR_OPTION_AREA_NONE = 0, 233 RAPTOR_OPTION_AREA_PARSER = 1, 234 RAPTOR_OPTION_AREA_SERIALIZER = 2, 235 RAPTOR_OPTION_AREA_XML_WRITER = 4, 236 RAPTOR_OPTION_AREA_TURTLE_WRITER = 8, 237 RAPTOR_OPTION_AREA_SAX2 = 16 238 } raptor_option_area; 239 240 typedef union 241 { 242 char* string; 243 int integer; 244 } raptor_str_int; 245 246 typedef struct 247 { 248 raptor_option_area area; 249 raptor_str_int options[RAPTOR_OPTION_LAST+1]; 250 } raptor_object_options; 251 252 253 #define RAPTOR_OPTIONS_GET_NUMERIC(object, option) \ 254 ((object)->options.options[(int)option].integer) 255 #define RAPTOR_OPTIONS_GET_STRING(object, option) \ 256 ((object)->options.options[(int)option].string) 257 258 #define RAPTOR_OPTIONS_SET_NUMERIC(object, option, value) do { \ 259 (object)->options.options[(int)option].integer = value; \ 260 } while(0) 261 #define RAPTOR_OPTIONS_SET_STRING(object, option, value) do { \ 262 (object)->options.options[(int)option].string = value; \ 263 } while(0) 264 265 int raptor_option_value_is_numeric(const raptor_option option); 266 int raptor_option_is_valid_for_area(const raptor_option option, raptor_option_area area); 267 268 void raptor_object_options_init(raptor_object_options* options, raptor_option_area area); 269 void raptor_object_options_clear(raptor_object_options* options); 270 int raptor_object_options_copy_state(raptor_object_options* to, raptor_object_options* from); 271 int raptor_object_options_get_option(raptor_object_options *options, raptor_option option, char** string_p, int* integer_p); 272 int raptor_object_options_set_option(raptor_object_options *options, raptor_option option, const char* string, int integer); 273 274 275 276 277 /* raptor_concepts.c */ 278 279 /* 280 * raptor_rdf_ns_term_id: 281 * 282 * RDF namespace syntax terms, properties and classes. 283 * 284 * The order must match names in the raptor_rdf_ns_terms_info table 285 * 286 */ 287 typedef enum { 288 /* These terms are used only in the RDF/XML syntax; never in RDF graph */ 289 RDF_NS_RDF = 0, 290 RDF_NS_Description = 1, 291 RDF_NS_li = 2, 292 RDF_NS_about = 3, 293 RDF_NS_aboutEach = 4, 294 RDF_NS_aboutEachPrefix = 5, 295 RDF_NS_ID = 6, 296 RDF_NS_bagID = 7, 297 RDF_NS_resource = 8, 298 RDF_NS_parseType = 9, 299 RDF_NS_nodeID = 10, 300 RDF_NS_datatype = 11, 301 /* These terms are all properties in RDF model (of type rdf:Property) */ 302 RDF_NS_type = 12, 303 RDF_NS_value = 13, 304 RDF_NS_subject = 14, 305 RDF_NS_predicate = 15, 306 RDF_NS_object = 16, 307 RDF_NS_first = 17, 308 RDF_NS_rest = 18, 309 /* These terms are all classes in the RDF model (of type rdfs:Class) */ 310 RDF_NS_Seq = 19, 311 RDF_NS_Bag = 20, 312 RDF_NS_Alt = 21, 313 RDF_NS_Statement = 22, 314 RDF_NS_Property = 23, 315 RDF_NS_List = 24, 316 /* These terms are all resources in the RDF model (of type rdfs:Resource) */ 317 RDF_NS_nil = 25, 318 319 /* These terms are datatypes (used as a literal datatype URI) */ 320 RDF_NS_XMLLiteral = 26, 321 RDF_NS_PlainLiteral = 27, /* http://www.w3.org/TR/rdf-text/ */ 322 /* RDF 1.1 datatypes */ 323 RDF_NS_HTML = 28, 324 RDF_NS_langString = 29, 325 326 /* These terms are internal */ 327 RDF_NS_LAST_SYNTAX_TERM = RDF_NS_datatype, 328 329 RDF_NS_LAST = RDF_NS_langString 330 } raptor_rdf_ns_term_id; 331 332 333 typedef struct { 334 /* term name */ 335 const char *name; 336 337 /* RDF/XML: the statement object type of this when used as an attribute */ 338 raptor_term_type type; 339 340 /* RDF/XML: name restrictions */ 341 unsigned int allowed_as_nodeElement : 1; 342 unsigned int allowed_as_propertyElement : 1; 343 unsigned int allowed_as_propertyAttribute : 1; 344 unsigned int allowed_unprefixed_on_attribute : 1; 345 } raptor_rdf_ns_term_info; 346 347 348 extern const raptor_rdf_ns_term_info raptor_rdf_ns_terms_info[(RDF_NS_LAST + 1) + 1]; 349 350 #define RAPTOR_RDF_RDF_URI(world) world->concepts[RDF_NS_RDF] 351 #define RAPTOR_RDF_Description_URI(world) world->concepts[RDF_NS_Description] 352 #define RAPTOR_RDF_li_URI(world) world->concepts[RDF_NS_li] 353 #define RAPTOR_RDF_about(world) world->concepts[RDF_NS_about] 354 #define RAPTOR_RDF_aboutEach(world) world->concepts[RDF_NS_aboutEach] 355 #define RAPTOR_RDF_aboutEachPrefix(world) world->concepts[RDF_NS_aboutEachPrefix] 356 #define RAPTOR_RDF_ID_URI(world) world->concepts[RDF_NS_ID] 357 #define RAPTOR_RDF_bagID_URI(world) world->concepts[RDF_NS_bagID] 358 #define RAPTOR_RDF_resource_URI(world) world->concepts[RDF_NS_resource] 359 #define RAPTOR_RDF_parseType_URI(world) world->concepts[RDF_NS_parseType] 360 #define RAPTOR_RDF_nodeID_URI(world) world->concepts[RDF_NS_nodeID] 361 #define RAPTOR_RDF_datatype_URI(world) world->concepts[RDF_NS_datatype] 362 363 #define RAPTOR_RDF_type_URI(world) world->concepts[RDF_NS_type] 364 #define RAPTOR_RDF_value_URI(world) world->concepts[RDF_NS_value] 365 #define RAPTOR_RDF_subject_URI(world) world->concepts[RDF_NS_subject] 366 #define RAPTOR_RDF_predicate_URI(world) world->concepts[RDF_NS_predicate] 367 #define RAPTOR_RDF_object_URI(world) world->concepts[RDF_NS_object] 368 #define RAPTOR_RDF_first_URI(world) world->concepts[RDF_NS_first] 369 #define RAPTOR_RDF_rest_URI(world) world->concepts[RDF_NS_rest] 370 371 #define RAPTOR_RDF_Seq_URI(world) world->concepts[RDF_NS_Seq] 372 #define RAPTOR_RDF_Bag_URI(world) world->concepts[RDF_NS_Bag] 373 #define RAPTOR_RDF_Alt_URI(world) world->concepts[RDF_NS_Alt] 374 #define RAPTOR_RDF_Statement_URI(world) world->concepts[RDF_NS_Statement] 375 #define RAPTOR_RDF_Property_URI(world) world->concepts[RDF_NS_Property] 376 #define RAPTOR_RDF_List_URI(world) world->concepts[RDF_NS_List] 377 378 #define RAPTOR_RDF_nil_URI(world) world->concepts[RDF_NS_nil] 379 #define RAPTOR_RDF_XMLLiteral_URI(world) world->concepts[RDF_NS_XMLLiteral] 380 #define RAPTOR_RDF_PlainLiteral_URI(world) world->concepts[RDF_NS_PlainLiteral] 381 382 383 /* syntax only (RDF:RDF ... RDF:datatype) are not provided as terms */ 384 385 #define RAPTOR_RDF_type_term(world) world->terms[RDF_NS_type] 386 #define RAPTOR_RDF_value_term(world) world->terms[RDF_NS_value] 387 #define RAPTOR_RDF_subject_term(world) world->terms[RDF_NS_subject] 388 #define RAPTOR_RDF_predicate_term(world) world->terms[RDF_NS_predicate] 389 #define RAPTOR_RDF_object_term(world) world->terms[RDF_NS_object] 390 #define RAPTOR_RDF_first_term(world) world->terms[RDF_NS_first] 391 #define RAPTOR_RDF_rest_term(world) world->terms[RDF_NS_rest] 392 393 #define RAPTOR_RDF_Seq_term(world) world->terms[RDF_NS_Seq] 394 #define RAPTOR_RDF_Bag_term(world) world->terms[RDF_NS_Bag] 395 #define RAPTOR_RDF_Alt_term(world) world->terms[RDF_NS_Alt] 396 #define RAPTOR_RDF_Statement_term(world) world->terms[RDF_NS_Statement] 397 #define RAPTOR_RDF_Property_term(world) world->terms[RDF_NS_Property] 398 #define RAPTOR_RDF_List_term(world) world->terms[RDF_NS_List] 399 400 #define RAPTOR_RDF_nil_term(world) world->terms[RDF_NS_nil] 401 #define RAPTOR_RDF_XMLLiteral_term(world) world->terms[RDF_NS_XMLLiteral] 402 #define RAPTOR_RDF_PlainLiteral_term(world) world->terms[RDF_NS_PlainLiteral] 403 404 405 int raptor_concepts_init(raptor_world* world); 406 void raptor_concepts_finish(raptor_world* world); 407 408 409 410 /* raptor_iostream.c */ 411 raptor_world* raptor_iostream_get_world(raptor_iostream *iostr); 412 413 414 /* Raptor Namespace Stack node */ 415 struct raptor_namespace_stack_s { 416 raptor_world* world; 417 int size; 418 419 int table_size; 420 raptor_namespace** table; 421 raptor_namespace* def_namespace; 422 423 raptor_uri *rdf_ms_uri; 424 raptor_uri *rdf_schema_uri; 425 }; 426 427 428 /* Forms: 429 * 1) prefix=NULL uri=<URI> - default namespace defined 430 * 2) prefix=NULL, uri=NULL - no default namespace 431 * 3) prefix=<prefix>, uri=<URI> - regular pair defined <prefix>:<URI> 432 */ 433 struct raptor_namespace_s { 434 /* next down the stack, NULL at bottom */ 435 struct raptor_namespace_s* next; 436 437 raptor_namespace_stack *nstack; 438 439 /* NULL means is the default namespace */ 440 const unsigned char *prefix; 441 /* needed to safely compare prefixed-names */ 442 unsigned int prefix_length; 443 /* URI of namespace or NULL for default */ 444 raptor_uri *uri; 445 /* parsing depth that this ns was added. It will 446 * be deleted when the parser leaves this depth 447 */ 448 int depth; 449 /* Non 0 if is xml: prefixed name */ 450 int is_xml; 451 /* Non 0 if is RDF M&S Namespace */ 452 int is_rdf_ms; 453 /* Non 0 if is RDF Schema Namespace */ 454 int is_rdf_schema; 455 }; 456 457 raptor_namespace** raptor_namespace_stack_to_array(raptor_namespace_stack *nstack, size_t *size_p); 458 459 #ifdef RAPTOR_XML_LIBXML 460 #define RAPTOR_LIBXML_MAGIC 0x8AF108 461 #endif 462 463 464 /* Size of buffer to use when reading from a file */ 465 #if defined(BUFSIZ) && BUFSIZ > 4096 466 #define RAPTOR_READ_BUFFER_SIZE BUFSIZ 467 #else 468 #define RAPTOR_READ_BUFFER_SIZE 4096 469 #endif 470 471 472 /* 473 * Raptor parser object 474 */ 475 struct raptor_parser_s { 476 raptor_world* world; 477 478 #ifdef RAPTOR_XML_LIBXML 479 int magic; 480 #endif 481 482 /* can be filled with error location information */ 483 raptor_locator locator; 484 485 /* non-0 if parser had fatal error and cannot continue */ 486 int failed : 1; 487 488 /* non-0 to enable emitting graph marks (default set). Intended 489 * for use by GRDDL the parser on it's child parsers to prevent 490 * multiple start/end marks on the default graph. 491 */ 492 int emit_graph_marks : 1; 493 494 /* non-0 if have emitted start default graph mark */ 495 int emitted_default_graph : 1; 496 497 /* generated ID counter */ 498 int genid; 499 500 /* base URI of RDF/XML */ 501 raptor_uri *base_uri; 502 503 /* static statement for use in passing to user code */ 504 raptor_statement statement; 505 506 /* Options (per-object) */ 507 raptor_object_options options; 508 509 /* stuff for our user */ 510 void *user_data; 511 512 /* parser callbacks */ 513 raptor_statement_handler statement_handler; 514 515 raptor_graph_mark_handler graph_mark_handler; 516 517 void* uri_filter_user_data; 518 raptor_uri_filter_func uri_filter; 519 520 /* parser specific stuff */ 521 void *context; 522 523 struct raptor_parser_factory_s* factory; 524 525 /* namespace callback */ 526 raptor_namespace_handler namespace_handler; 527 528 void* namespace_handler_user_data; 529 530 raptor_stringbuffer* sb; 531 532 /* raptor_www pointer stored here to allow cleanup on error */ 533 raptor_www* www; 534 535 /* internal data for lexers */ 536 void* lexer_user_data; 537 538 /* internal read buffer */ 539 unsigned char buffer[RAPTOR_READ_BUFFER_SIZE + 1]; 540 }; 541 542 543 /** A Parser Factory */ 544 struct raptor_parser_factory_s { 545 raptor_world* world; 546 547 struct raptor_parser_factory_s* next; 548 549 /* the rest of this structure is populated by the 550 parser-specific register function */ 551 552 size_t context_length; 553 554 /* static desc that the parser registration initialises */ 555 raptor_syntax_description desc; 556 557 /* create a new parser */ 558 int (*init)(raptor_parser* parser, const char *name); 559 560 /* destroy a parser */ 561 void (*terminate)(raptor_parser* parser); 562 563 /* start a parse */ 564 int (*start)(raptor_parser* parser); 565 566 /* parse a chunk of memory */ 567 int (*chunk)(raptor_parser* parser, const unsigned char *buffer, size_t len, int is_end); 568 569 /* finish the parser factory */ 570 void (*finish_factory)(raptor_parser_factory* factory); 571 572 /* score recognition of the syntax by a block of characters, the 573 * content identifier or it's suffix or a mime type 574 * (different from the factory-registered one) 575 */ 576 int (*recognise_syntax)(raptor_parser_factory* factory, const unsigned char *buffer, size_t len, const unsigned char *identifier, const unsigned char *suffix, const char *mime_type); 577 578 /* get the Content-Type value of a URI request */ 579 void (*content_type_handler)(raptor_parser* rdf_parser, const char* content_type); 580 581 /* get the Accept header of a URI request (OPTIONAL) */ 582 const char* (*accept_header)(raptor_parser* rdf_parser); 583 584 /* get the name (OPTIONAL) */ 585 const char* (*get_name)(raptor_parser* rdf_parser); 586 587 /* get the description (OPTIONAL) */ 588 const raptor_syntax_description* (*get_description)(raptor_parser* rdf_parser); 589 590 /* get the current graph (OPTIONAL) - if not implemented, the current graph is always the default (NULL) and start/end graph marks are synthesised */ 591 raptor_uri* (*get_graph)(raptor_parser* rdf_parser); 592 593 /* get the locator (OPTIONAL) */ 594 raptor_locator* (*get_locator)(raptor_parser* rdf_parser); 595 }; 596 597 598 /* 599 * Raptor serializer object 600 */ 601 struct raptor_serializer_s { 602 raptor_world* world; 603 604 /* can be filled with error location information */ 605 raptor_locator locator; 606 607 /* non 0 if serializer had fatal error and cannot continue */ 608 int failed; 609 610 /* base URI of RDF/XML */ 611 raptor_uri *base_uri; 612 613 /* serializer specific stuff */ 614 void *context; 615 616 /* destination stream for the serialization */ 617 raptor_iostream *iostream; 618 619 /* if true, iostream was made here so free it */ 620 int free_iostream_on_end; 621 622 struct raptor_serializer_factory_s* factory; 623 624 /* Options (per-object) */ 625 raptor_object_options options; 626 }; 627 628 629 /** A Serializer Factory for a syntax */ 630 struct raptor_serializer_factory_s { 631 raptor_world* world; 632 633 struct raptor_serializer_factory_s* next; 634 635 /* the rest of this structure is populated by the 636 serializer-specific register function */ 637 size_t context_length; 638 639 /* static desc that the parser registration initialises */ 640 raptor_syntax_description desc; 641 642 /* create a new serializer */ 643 int (*init)(raptor_serializer* serializer, const char *name); 644 645 /* destroy a serializer */ 646 void (*terminate)(raptor_serializer* serializer); 647 648 /* add a namespace */ 649 int (*declare_namespace)(raptor_serializer* serializer, raptor_uri *uri, const unsigned char *prefix); 650 651 /* start a serialization */ 652 int (*serialize_start)(raptor_serializer* serializer); 653 654 /* serialize a statement */ 655 int (*serialize_statement)(raptor_serializer* serializer, raptor_statement *statment); 656 657 /* end a serialization */ 658 int (*serialize_end)(raptor_serializer* serializer); 659 660 /* finish the serializer factory */ 661 void (*finish_factory)(raptor_serializer_factory* factory); 662 663 /* add a namespace using an existing namespace */ 664 int (*declare_namespace_from_namespace)(raptor_serializer* serializer, raptor_namespace *nspace); 665 666 /* flush current serialization state */ 667 int (*serialize_flush)(raptor_serializer* serializer); 668 }; 669 670 671 /* for raptor_parser_parse_uri_write_bytes() when used as a handler for 672 * raptor_www_set_write_bytes_handler() 673 */ 674 typedef struct 675 { 676 raptor_parser* rdf_parser; 677 raptor_uri* base_uri; 678 raptor_uri* final_uri; 679 int started; 680 } raptor_parse_bytes_context; 681 682 683 /* raptor_serialize.c */ 684 raptor_serializer_factory* raptor_serializer_register_factory(raptor_world* world, int (*factory) (raptor_serializer_factory*)); 685 686 687 /* raptor_general.c */ 688 689 raptor_parser_factory* raptor_world_register_parser_factory(raptor_world* world, int (*factory) (raptor_parser_factory*)); 690 int raptor_parser_factory_add_mime_type(raptor_parser_factory* factory, const char* mime_type, int q); 691 692 unsigned char* raptor_world_internal_generate_id(raptor_world *world, unsigned char *user_bnodeid); 693 694 #ifdef RAPTOR_DEBUG 695 void raptor_stats_print(raptor_parser *rdf_parser, FILE *stream); 696 #endif 697 RAPTOR_INTERNAL_API const char* raptor_basename(const char *name); 698 int raptor_term_print_as_ntriples(const raptor_term *term, FILE* stream); 699 700 /* raptor_ntriples.c */ 701 size_t raptor_ntriples_parse_term(raptor_world* world, raptor_locator* locator, unsigned char *string, size_t *len_p, raptor_term** term_p, int allow_turtle); 702 703 /* raptor_parse.c */ 704 raptor_parser_factory* raptor_world_get_parser_factory(raptor_world* world, const char *name); 705 void raptor_delete_parser_factories(void); 706 RAPTOR_INTERNAL_API const char* raptor_parser_get_accept_header_all(raptor_world* world); 707 int raptor_parser_set_uri_filter_no_net(void *user_data, raptor_uri* uri); 708 void raptor_parser_parse_uri_write_bytes(raptor_www* www, void *userdata, const void *ptr, size_t size, size_t nmemb); 709 void raptor_parser_fatal_error(raptor_parser* parser, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); 710 void raptor_parser_error(raptor_parser* parser, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); 711 RAPTOR_INTERNAL_API void raptor_parser_log_error_varargs(raptor_parser* parser, raptor_log_level level, const char *message, va_list arguments) RAPTOR_PRINTF_FORMAT(3, 0); 712 void raptor_parser_warning(raptor_parser* parser, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); 713 714 /* logging */ 715 void raptor_world_internal_set_ignore_errors(raptor_world* world, int flag); 716 void raptor_log_error_varargs(raptor_world* world, raptor_log_level level, raptor_locator* locator, const char* message, va_list arguments) RAPTOR_PRINTF_FORMAT(4, 0); 717 RAPTOR_INTERNAL_API void raptor_log_error_formatted(raptor_world* world, raptor_log_level level, raptor_locator* locator, const char* message, ...) RAPTOR_PRINTF_FORMAT(4, 5); 718 void raptor_log_error(raptor_world* world, raptor_log_level level, raptor_locator* locator, const char* message); 719 720 721 /* raptor_parse.c */ 722 723 typedef struct raptor_rdfxml_parser_s raptor_rdfxml_parser; 724 725 /* Prototypes for common libxml parsing event-handling functions */ 726 extern void raptor_xml_start_element_handler(void *user_data, const unsigned char *name, const unsigned char **atts); 727 extern void raptor_xml_end_element_handler(void *user_data, const unsigned char *name); 728 /* s is not 0 terminated. */ 729 extern void raptor_xml_characters_handler(void *user_data, const unsigned char *s, int len); 730 extern void raptor_xml_cdata_handler(void *user_data, const unsigned char *s, int len); 731 void raptor_xml_comment_handler(void *user_data, const unsigned char *s); 732 733 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 734 void raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser* rdf_xml_parser, FILE *stream); 735 #endif 736 737 void raptor_parser_copy_flags_state(raptor_parser *to_parser, raptor_parser *from_parser); 738 int raptor_parser_copy_user_state(raptor_parser *to_parser, raptor_parser *from_parser); 739 740 /* raptor_general.c */ 741 extern int raptor_valid_xml_ID(raptor_parser *rdf_parser, const unsigned char *string); 742 int raptor_check_ordinal(const unsigned char *name); 743 744 /* raptor_locator.c */ 745 746 747 #ifdef HAVE_STRCASECMP 748 #define raptor_strcasecmp strcasecmp 749 #define raptor_strncasecmp strncasecmp 750 #else 751 #ifdef HAVE_STRICMP 752 #define raptor_strcasecmp stricmp 753 #define raptor_strncasecmp strnicmp 754 #endif 755 #endif 756 757 758 /* raptor_nfc_icu.c */ 759 int raptor_nfc_icu_check (const unsigned char* string, size_t len, int *error); 760 761 762 /* raptor_namespace.c */ 763 764 #ifdef RAPTOR_DEBUG 765 void raptor_namespace_print(FILE *stream, raptor_namespace* ns); 766 #endif 767 768 void raptor_parser_start_namespace(raptor_parser* rdf_parser, raptor_namespace* nspace); 769 770 771 /* 772 * Raptor XML-namespace qualified name (qname), for elements or attributes 773 * 774 * namespace is only defined when the XML name has a namespace and 775 * only then is uri also given. 776 */ 777 struct raptor_qname_s { 778 raptor_world* world; 779 /* Name - always present */ 780 const unsigned char *local_name; 781 int local_name_length; 782 /* Namespace or NULL if not in a namespace */ 783 const raptor_namespace *nspace; 784 /* URI of namespace+local_name or NULL if not defined */ 785 raptor_uri *uri; 786 /* optional value - used when name is an attribute */ 787 const unsigned char *value; 788 size_t value_length; 789 }; 790 791 792 793 /* raptor_qname.c */ 794 #ifdef RAPTOR_DEBUG 795 void raptor_qname_print(FILE *stream, raptor_qname* name); 796 #endif 797 798 799 /* raptor_uri.c */ 800 801 int raptor_uri_init(raptor_world* world); 802 void raptor_uri_finish(raptor_world* world); 803 raptor_uri* raptor_new_uri_from_rdf_ordinal(raptor_world* world, int ordinal); 804 size_t raptor_uri_normalize_path(unsigned char* path_buffer, size_t path_len); 805 806 /* parsers */ 807 int raptor_init_parser_rdfxml(raptor_world* world); 808 int raptor_init_parser_ntriples(raptor_world* world); 809 int raptor_init_parser_turtle(raptor_world* world); 810 int raptor_init_parser_trig(raptor_world* world); 811 int raptor_init_parser_n3(raptor_world* world); 812 int raptor_init_parser_grddl_common(raptor_world* world); 813 int raptor_init_parser_grddl(raptor_world* world); 814 int raptor_init_parser_guess(raptor_world* world); 815 int raptor_init_parser_rss(raptor_world* world); 816 int raptor_init_parser_rdfa(raptor_world* world); 817 int raptor_init_parser_json(raptor_world* world); 818 int raptor_init_parser_nquads(raptor_world* world); 819 820 void raptor_terminate_parser_grddl_common(raptor_world *world); 821 822 #ifdef RAPTOR_PARSER_RDFA 823 #define rdfa_add_item raptor_librdfa_rdfa_add_item 824 #define rdfa_append_to_list_mapping raptor_librdfa_rdfa_append_to_list_mapping 825 #define rdfa_canonicalize_string raptor_librdfa_rdfa_canonicalize_string 826 #define rdfa_complete_current_property_value_triples raptor_librdfa_rdfa_complete_current_property_value_triples 827 #define rdfa_complete_incomplete_triples raptor_librdfa_rdfa_complete_incomplete_triples 828 #define rdfa_complete_list_triples raptor_librdfa_rdfa_complete_list_triples 829 #define rdfa_complete_object_literal_triples raptor_librdfa_rdfa_complete_object_literal_triples 830 #define rdfa_complete_relrev_triples raptor_librdfa_rdfa_complete_relrev_triples 831 #define rdfa_complete_type_triples raptor_librdfa_rdfa_complete_type_triples 832 #define rdfa_copy_list raptor_librdfa_rdfa_copy_list 833 #define rdfa_copy_mapping raptor_librdfa_rdfa_copy_mapping 834 #define rdfa_create_bnode raptor_librdfa_rdfa_create_bnode 835 #define rdfa_create_context raptor_librdfa_rdfa_create_context 836 #define rdfa_create_list raptor_librdfa_rdfa_create_list 837 #define rdfa_create_list_mapping raptor_librdfa_rdfa_create_list_mapping 838 #define rdfa_create_mapping raptor_librdfa_rdfa_create_mapping 839 #define rdfa_create_new_element_context raptor_librdfa_rdfa_create_new_element_context 840 #define rdfa_create_triple raptor_librdfa_rdfa_create_triple 841 #define rdfa_establish_new_1_0_subject raptor_librdfa_rdfa_establish_new_1_0_subject 842 #define rdfa_establish_new_1_0_subject_with_relrev raptor_librdfa_ablish_new_1_0_subject_with_relrev 843 #define rdfa_establish_new_1_1_subject raptor_librdfa_ablish_new_1_1_subject 844 #define rdfa_establish_new_1_1_subject_with_relrev raptor_librdfa_ablish_new_1_1_subject_with_relrev 845 #define rdfa_establish_new_inlist_triples raptor_librdfa_ablish_new_inlist_triples 846 #define rdfa_free_context raptor_librdfa_free_context 847 #define rdfa_free_context_stack raptor_librdfa_free_context_stack 848 #define rdfa_free_list raptor_librdfa_rdfa_free_list 849 #define rdfa_free_mapping raptor_librdfa_rdfa_free_mapping 850 #define rdfa_free_triple raptor_librdfa_rdfa_free_triple 851 #define rdfa_get_buffer raptor_librdfa_rdfa_get_buffer 852 #define rdfa_get_curie_type raptor_librdfa_rdfa_get_curie_type 853 #define rdfa_get_list_mapping raptor_librdfa_rdfa_get_list_mapping 854 #define rdfa_get_mapping raptor_librdfa_rdfa_get_mapping 855 #define rdfa_init_base raptor_librdfa_rdfa_init_base 856 #define rdfa_init_context raptor_librdfa_rdfa_init_context 857 #define rdfa_iri_get_base raptor_librdfa_rdfa_iri_get_base 858 #define rdfa_join_string raptor_librdfa_rdfa_join_string 859 #define rdfa_n_append_string raptor_librdfa_rdfa_n_append_string 860 #define rdfa_names raptor_librdfa_rdfa_names 861 #define rdfa_next_mapping raptor_librdfa_rdfa_next_mapping 862 #define rdfa_parse raptor_librdfa_rdfa_parse 863 #define rdfa_parse_buffer raptor_librdfa_rdfa_parse_buffer 864 #define rdfa_parse_chunk raptor_librdfa_rdfa_parse_chunk 865 #define rdfa_parse_end raptor_librdfa_rdfa_parse_end 866 #define rdfa_parse_start raptor_librdfa_rdfa_parse_start 867 #define rdfa_pop_item raptor_librdfa_rdfa_pop_item 868 #define rdfa_print_list raptor_librdfa_rdfa_print_list 869 #define rdfa_print_mapping raptor_librdfa_rdfa_print_mapping 870 #define rdfa_print_string raptor_librdfa_rdfa_print_string 871 #define rdfa_print_triple raptor_librdfa_rdfa_print_triple 872 #define rdfa_print_triple_list raptor_librdfa_rdfa_print_triple_list 873 #define rdfa_push_item raptor_librdfa_rdfa_push_item 874 #define rdfa_replace_list raptor_librdfa_rdfa_replace_list 875 #define rdfa_replace_string raptor_librdfa_rdfa_replace_string 876 #define rdfa_resolve_curie raptor_librdfa_rdfa_resolve_curie 877 #define rdfa_resolve_curie_list raptor_librdfa_rdfa_resolve_curie_list 878 #define rdfa_resolve_relrev_curie raptor_librdfa_rdfa_resolve_relrev_curie 879 #define rdfa_resolve_uri raptor_librdfa_rdfa_resolve_uri 880 #define rdfa_save_incomplete_list_triples raptor_librdfa_rdfa_save_incomplete_list_triples 881 #define rdfa_save_incomplete_triples raptor_librdfa_rdfa_save_incomplete_triples 882 #define rdfa_set_buffer_filler raptor_librdfa_rdfa_set_buffer_filler 883 #define rdfa_set_default_graph_triple_handler raptor_librdfa_rdfa_set_default_graph_triple_handler 884 #define rdfa_set_processor_graph_triple_handler raptor_librdfa_rdfa_set_processor_graph_triple_handler 885 #define rdfa_setup_initial_context raptor_librdfa_rdfa_setup_initial_context 886 #define rdfa_update_language raptor_librdfa_rdfa_update_language 887 #define rdfa_update_mapping raptor_librdfa_rdfa_update_mapping 888 #define rdfa_update_uri_mappings raptor_librdfa_rdfa_update_uri_mappings 889 #define rdfa_uri_strings raptor_librdfa_rdfa_uri_strings 890 #endif 891 892 /* raptor_parse.c */ 893 int raptor_parsers_init(raptor_world* world); 894 void raptor_parsers_finish(raptor_world *world); 895 896 void raptor_parser_save_content(raptor_parser* rdf_parser, int save); 897 const unsigned char* raptor_parser_get_content(raptor_parser* rdf_parser, size_t* length_p); 898 void raptor_parser_start_graph(raptor_parser* parser, raptor_uri* uri, int is_declared); 899 void raptor_parser_end_graph(raptor_parser* parser, raptor_uri* uri, int is_declared); 900 901 /* raptor_rss.c */ 902 int raptor_init_serializer_rss10(raptor_world* world); 903 int raptor_init_serializer_atom(raptor_world* world); 904 905 extern const unsigned char * const raptor_atom_namespace_uri; 906 907 /* raptor_rfc2396.c */ 908 RAPTOR_INTERNAL_API raptor_uri_detail* raptor_new_uri_detail(const unsigned char *uri_string); 909 RAPTOR_INTERNAL_API void raptor_free_uri_detail(raptor_uri_detail* uri_detail); 910 unsigned char* raptor_uri_detail_to_string(raptor_uri_detail *ud, size_t* len_p); 911 912 /* serializers */ 913 /* raptor_serializer.c */ 914 int raptor_serializers_init(raptor_world* world); 915 void raptor_serializers_finish(raptor_world* world); 916 917 /* raptor_serializer_dot.c */ 918 int raptor_init_serializer_dot(raptor_world* world); 919 920 /* raptor_serializer_ntriples.c */ 921 int raptor_init_serializer_ntriples(raptor_world* world); 922 int raptor_init_serializer_nquads(raptor_world* world); 923 924 /* raptor_serialize_rdfxml.c */ 925 int raptor_init_serializer_rdfxml(raptor_world* world); 926 927 /* raptor_serialize_rdfxmla.c */ 928 int raptor_init_serializer_rdfxmla(raptor_world* world); 929 930 /* raptor_serialize_turtle.c */ 931 int raptor_init_serializer_turtle(raptor_world* world); 932 933 /* raptor_serialize_html.c */ 934 int raptor_init_serializer_html(raptor_world* world); 935 936 /* raptor_serialize_json.c */ 937 int raptor_init_serializer_json(raptor_world* world); 938 939 /* raptor_unicode.c */ 940 extern const raptor_unichar raptor_unicode_max_codepoint; 941 942 int raptor_unicode_is_namestartchar(raptor_unichar c); 943 int raptor_unicode_is_namechar(raptor_unichar c); 944 int raptor_unicode_check_utf8_nfc_string(const unsigned char *input, size_t length, int* error); 945 946 /* raptor_www*.c */ 947 #ifdef RAPTOR_WWW_LIBXML 948 #include <libxml/parser.h> 949 #include <libxml/xmlerror.h> 950 #include <libxml/nanohttp.h> 951 #endif 952 953 #ifdef RAPTOR_WWW_LIBCURL 954 #include <curl/curl.h> 955 #include <curl/easy.h> 956 #endif 957 958 /* Size of buffer used in various raptor_www places for I/O */ 959 #ifndef RAPTOR_WWW_BUFFER_SIZE 960 #define RAPTOR_WWW_BUFFER_SIZE 4096 961 #endif 962 963 /* WWW library state */ 964 struct raptor_www_s { 965 raptor_world* world; 966 char *type; 967 int free_type; 968 size_t total_bytes; 969 int failed; 970 int status_code; 971 972 raptor_uri *uri; 973 974 #ifdef RAPTOR_WWW_LIBCURL 975 CURL* curl_handle; 976 char error_buffer[CURL_ERROR_SIZE]; 977 int curl_init_here; 978 int checked_status; 979 #endif 980 981 #ifdef RAPTOR_WWW_LIBXML 982 void *ctxt; 983 int is_end; 984 void *old_xmlGenericErrorContext; 985 #endif 986 987 char buffer[RAPTOR_WWW_BUFFER_SIZE + 1]; 988 989 char *user_agent; 990 991 /* proxy URL string or NULL for none */ 992 char *proxy; 993 994 void *write_bytes_userdata; 995 raptor_www_write_bytes_handler write_bytes; 996 void *content_type_userdata; 997 raptor_www_content_type_handler content_type; 998 999 void* uri_filter_user_data; 1000 raptor_uri_filter_func uri_filter; 1001 1002 /* can be filled with error location information */ 1003 raptor_locator locator; 1004 1005 char *http_accept; 1006 1007 FILE* handle; 1008 1009 int connection_timeout; 1010 1011 /* The URI returned after any redirections */ 1012 raptor_uri* final_uri; 1013 1014 void *final_uri_userdata; 1015 raptor_www_final_uri_handler final_uri_handler; 1016 1017 char* cache_control; 1018 }; 1019 1020 1021 1022 /* internal */ 1023 void raptor_www_libxml_init(raptor_www *www); 1024 void raptor_www_libxml_free(raptor_www *www); 1025 int raptor_www_libxml_fetch(raptor_www *www); 1026 1027 void raptor_www_error(raptor_www *www, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); 1028 1029 void raptor_www_curl_init(raptor_www *www); 1030 void raptor_www_curl_free(raptor_www *www); 1031 int raptor_www_curl_fetch(raptor_www *www); 1032 int raptor_www_curl_set_ssl_cert_options(raptor_www* www, const char* cert_filename, const char* cert_type, const char* cert_passphrase); 1033 int raptor_www_curl_set_ssl_verify_options(raptor_www* www, int verify_peer, int verify_host); 1034 1035 void raptor_www_libfetch_init(raptor_www *www); 1036 void raptor_www_libfetch_free(raptor_www *www); 1037 int raptor_www_libfetch_fetch(raptor_www *www); 1038 1039 /* raptor_set.c */ 1040 RAPTOR_INTERNAL_API raptor_id_set* raptor_new_id_set(raptor_world* world); 1041 RAPTOR_INTERNAL_API void raptor_free_id_set(raptor_id_set* set); 1042 RAPTOR_INTERNAL_API int raptor_id_set_add(raptor_id_set* set, raptor_uri* base_uri, const unsigned char *item, size_t item_len); 1043 #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 1044 void raptor_id_set_stats_print(raptor_id_set* set, FILE *stream); 1045 #endif 1046 1047 /* raptor_sax2.c */ 1048 /* 1049 * SAX2 elements/attributes on stack 1050 */ 1051 struct raptor_xml_element_s { 1052 /* NULL at bottom of stack */ 1053 struct raptor_xml_element_s *parent; 1054 raptor_qname *name; 1055 raptor_qname **attributes; 1056 unsigned int attribute_count; 1057 1058 /* value of xml:lang attribute on this element or NULL */ 1059 const unsigned char *xml_language; 1060 1061 /* URI of xml:base attribute value on this element or NULL */ 1062 raptor_uri *base_uri; 1063 1064 /* CDATA content of element and checks for mixed content */ 1065 raptor_stringbuffer* content_cdata_sb; 1066 unsigned int content_cdata_length; 1067 /* how many cdata blocks seen */ 1068 unsigned int content_cdata_seen; 1069 /* how many contained elements seen */ 1070 unsigned int content_element_seen; 1071 1072 raptor_sequence *declared_nspaces; 1073 1074 void* user_data; 1075 }; 1076 1077 1078 struct raptor_sax2_s { 1079 #ifdef RAPTOR_XML_LIBXML 1080 int magic; 1081 #endif 1082 raptor_world* world; 1083 void* user_data; 1084 1085 #ifdef RAPTOR_XML_LIBXML 1086 /* structure holding sax event handlers */ 1087 xmlSAXHandler sax; 1088 /* parser context */ 1089 xmlParserCtxtPtr xc; 1090 /* pointer to SAX document locator */ 1091 xmlSAXLocatorPtr loc; 1092 1093 #if LIBXML_VERSION < 20425 1094 /* flag for some libxml eversions*/ 1095 int first_read; 1096 #endif 1097 1098 #endif 1099 1100 /* element depth */ 1101 int depth; 1102 1103 /* stack of elements - elements add after current_element */ 1104 raptor_xml_element *root_element; 1105 raptor_xml_element *current_element; 1106 1107 /* start of an element */ 1108 raptor_sax2_start_element_handler start_element_handler; 1109 /* end of an element */ 1110 raptor_sax2_end_element_handler end_element_handler; 1111 /* characters */ 1112 raptor_sax2_characters_handler characters_handler; 1113 /* like <![CDATA[...]> */ 1114 raptor_sax2_cdata_handler cdata_handler; 1115 /* comment */ 1116 raptor_sax2_comment_handler comment_handler; 1117 /* unparsed (NDATA) entity */ 1118 raptor_sax2_unparsed_entity_decl_handler unparsed_entity_decl_handler; 1119 /* external entity reference */ 1120 raptor_sax2_external_entity_ref_handler external_entity_ref_handler; 1121 1122 raptor_locator *locator; 1123 1124 /* New XML namespace callback */ 1125 raptor_namespace_handler namespace_handler; 1126 1127 raptor_object_options options; 1128 1129 /* stack of namespaces, most recently added at top */ 1130 raptor_namespace_stack namespaces; /* static */ 1131 1132 /* base URI for resolving relative URIs or xml:base URIs */ 1133 raptor_uri* base_uri; 1134 1135 /* sax2 init failed - do not try to do anything with it */ 1136 int failed; 1137 1138 /* call SAX2 handlers if non-0 */ 1139 int enabled; 1140 1141 void* uri_filter_user_data; 1142 raptor_uri_filter_func uri_filter; 1143 }; 1144 1145 int raptor_sax2_init(raptor_world* world); 1146 void raptor_sax2_finish(raptor_world* world); 1147 1148 1149 raptor_xml_element* raptor_xml_element_pop(raptor_sax2* sax2); 1150 void raptor_xml_element_push(raptor_sax2* sax2, raptor_xml_element* element); 1151 int raptor_sax2_get_depth(raptor_sax2* sax2); 1152 void raptor_sax2_inc_depth(raptor_sax2* sax2); 1153 void raptor_sax2_dec_depth(raptor_sax2* sax2); 1154 void raptor_sax2_update_document_locator(raptor_sax2* sax2, raptor_locator* locator); 1155 int raptor_sax2_set_option(raptor_sax2 *sax2, raptor_option option, char* string, int integer); 1156 1157 #ifdef RAPTOR_DEBUG 1158 void raptor_print_xml_element(raptor_xml_element *element, FILE* stream); 1159 #endif 1160 1161 void raptor_sax2_start_element(void* user_data, const unsigned char *name, const unsigned char **atts); 1162 void raptor_sax2_end_element(void* user_data, const unsigned char *name); 1163 void raptor_sax2_characters(void* user_data, const unsigned char *s, int len); 1164 void raptor_sax2_cdata(void* user_data, const unsigned char *s, int len); 1165 void raptor_sax2_comment(void* user_data, const unsigned char *s); 1166 void raptor_sax2_unparsed_entity_decl(void* user_data, const unsigned char* entityName, const unsigned char* base, const unsigned char* systemId, const unsigned char* publicId, const unsigned char* notationName); 1167 int raptor_sax2_external_entity_ref(void* user_data, const unsigned char* context, const unsigned char* base, const unsigned char* systemId, const unsigned char* publicId); 1168 int raptor_sax2_check_load_uri_string(raptor_sax2* sax2, const unsigned char* uri_string); 1169 1170 /* turtle_parser.y and turtle_lexer.l */ 1171 typedef struct raptor_turtle_parser_s raptor_turtle_parser; 1172 1173 /* n3_parser.y and n3_lexer.l */ 1174 typedef struct raptor_n3_parser_s raptor_n3_parser; 1175 1176 /* raptor_rfc2396.c */ 1177 struct raptor_uri_detail_s 1178 { 1179 size_t uri_len; 1180 /* buffer is the same size as the original uri_len */ 1181 unsigned char *buffer; 1182 1183 /* URI Components. These all point into buffer */ 1184 unsigned char *scheme; 1185 unsigned char *authority; 1186 unsigned char *path; 1187 unsigned char *query; 1188 unsigned char *fragment; 1189 1190 /* Lengths of the URI Components */ 1191 size_t scheme_len; 1192 size_t authority_len; 1193 size_t path_len; 1194 size_t query_len; 1195 size_t fragment_len; 1196 1197 /* Flags */ 1198 int is_hierarchical; 1199 }; 1200 1201 1202 /* for time_t */ 1203 #ifdef TIME_WITH_SYS_TIME 1204 # include <sys/time.h> 1205 # include <time.h> 1206 #else 1207 # ifdef HAVE_SYS_TIME_H 1208 # include <sys/time.h> 1209 # else 1210 # include <time.h> 1211 # endif 1212 #endif 1213 1214 /* parsedate.c */ 1215 #ifdef HAVE_INN_PARSEDATE 1216 #include <libinn.h> 1217 #define RAPTOR_PARSEDATE_FUNCTION parsedate 1218 #else 1219 #ifdef HAVE_RAPTOR_PARSE_DATE 1220 time_t raptor_parse_date(const char *p, time_t *now); 1221 #define RAPTOR_PARSEDATE_FUNCTION raptor_parse_date 1222 #else 1223 #ifdef HAVE_CURL_CURL_H 1224 #include <curl/curl.h> 1225 #define RAPTOR_PARSEDATE_FUNCTION curl_getdate 1226 #endif 1227 #endif 1228 #endif 1229 1230 /* only used internally now */ 1231 typedef void (*raptor_simple_message_handler)(void *user_data, const char *message, ...); 1232 1233 1234 /* turtle_common.c */ 1235 RAPTOR_INTERNAL_API int raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer, const unsigned char *text, size_t len, int delim, raptor_simple_message_handler error_handler, void *error_data, int is_uri); 1236 1237 1238 /* raptor_abbrev.c */ 1239 1240 typedef struct { 1241 raptor_world* world; 1242 int ref_count; /* count of references to this node */ 1243 int count_as_subject; /* count of this blank/resource node as subject */ 1244 int count_as_object; /* count of this blank/resource node as object */ 1245 1246 raptor_term* term; 1247 } raptor_abbrev_node; 1248 1249 #ifdef RAPTOR_DEBUG 1250 #define RAPTOR_DEBUG_ABBREV_NODE(label, node) \ 1251 do { \ 1252 RAPTOR_DEBUG1(label " "); \ 1253 raptor_term_print_as_ntriples(node->term, RAPTOR_DEBUG_FH); \ 1254 fprintf(RAPTOR_DEBUG_FH, " (refcount %d subject %d object %d)\n", \ 1255 node->ref_count, \ 1256 node->count_as_subject, \ 1257 node->count_as_object); \ 1258 } while(0) 1259 #else 1260 #define RAPTOR_DEBUG_ABBREV_NODE(label, node) 1261 #endif 1262 1263 typedef struct { 1264 raptor_abbrev_node* node; /* node representing the subject of 1265 * this resource */ 1266 raptor_abbrev_node* node_type; /* the rdf:type of this resource */ 1267 raptor_avltree *properties; /* list of properties 1268 * (predicate/object pair) of this 1269 * subject */ 1270 raptor_sequence *list_items; /* list of container elements if 1271 * is rdf container */ 1272 int valid; /* set 0 for blank nodes that do not 1273 * need to be referred to again */ 1274 } raptor_abbrev_subject; 1275 1276 1277 raptor_abbrev_node* raptor_new_abbrev_node(raptor_world* world, raptor_term* term); 1278 void raptor_free_abbrev_node(raptor_abbrev_node* node); 1279 int raptor_abbrev_node_compare(raptor_abbrev_node* node1, raptor_abbrev_node* node2); 1280 int raptor_abbrev_node_equals(raptor_abbrev_node* node1, raptor_abbrev_node* node2); 1281 raptor_abbrev_node* raptor_abbrev_node_lookup(raptor_avltree* nodes, raptor_term* term); 1282 1283 void raptor_free_abbrev_subject(raptor_abbrev_subject* subject); 1284 int raptor_abbrev_subject_add_property(raptor_abbrev_subject* subject, raptor_abbrev_node* predicate, raptor_abbrev_node* object); 1285 int raptor_abbrev_subject_compare(raptor_abbrev_subject* subject1, raptor_abbrev_subject* subject2); 1286 raptor_abbrev_subject* raptor_abbrev_subject_find(raptor_avltree *subjects, raptor_term* node); 1287 raptor_abbrev_subject* raptor_abbrev_subject_lookup(raptor_avltree* nodes, raptor_avltree* subjects, raptor_avltree* blanks, raptor_term* term); 1288 int raptor_abbrev_subject_valid(raptor_abbrev_subject *subject); 1289 int raptor_abbrev_subject_invalidate(raptor_abbrev_subject *subject); 1290 1291 1292 /* avltree */ 1293 #ifdef RAPTOR_DEBUG 1294 int raptor_avltree_dump(raptor_avltree* tree, FILE* stream); 1295 void raptor_avltree_check(raptor_avltree* tree); 1296 #endif 1297 1298 1299 raptor_qname* raptor_new_qname_from_resource(raptor_sequence* namespaces, raptor_namespace_stack* nstack, int* namespace_count, raptor_abbrev_node* node); 1300 1301 1302 /** 1303 * raptor_turtle_writer: 1304 * 1305 * Raptor Turtle Writer class 1306 */ 1307 typedef struct raptor_turtle_writer_s raptor_turtle_writer; 1308 1309 /* Turtle Writer Class (raptor_turtle_writer) */ 1310 RAPTOR_INTERNAL_API raptor_turtle_writer* raptor_new_turtle_writer(raptor_world* world, raptor_uri* base_uri, int write_base_uri, raptor_namespace_stack *nstack, raptor_iostream* iostr); 1311 RAPTOR_INTERNAL_API void raptor_free_turtle_writer(raptor_turtle_writer* turtle_writer); 1312 RAPTOR_INTERNAL_API void raptor_turtle_writer_raw(raptor_turtle_writer* turtle_writer, const unsigned char *s); 1313 RAPTOR_INTERNAL_API void raptor_turtle_writer_raw_counted(raptor_turtle_writer* turtle_writer, const unsigned char *s, unsigned int len); 1314 RAPTOR_INTERNAL_API void raptor_turtle_writer_namespace_prefix(raptor_turtle_writer* turtle_writer, raptor_namespace* ns); 1315 void raptor_turtle_writer_base(raptor_turtle_writer* turtle_writer, raptor_uri* base_uri); 1316 RAPTOR_INTERNAL_API void raptor_turtle_writer_increase_indent(raptor_turtle_writer *turtle_writer); 1317 RAPTOR_INTERNAL_API void raptor_turtle_writer_decrease_indent(raptor_turtle_writer *turtle_writer); 1318 RAPTOR_INTERNAL_API void raptor_turtle_writer_newline(raptor_turtle_writer *turtle_writer); 1319 RAPTOR_INTERNAL_API int raptor_turtle_writer_reference(raptor_turtle_writer* turtle_writer, raptor_uri* uri); 1320 RAPTOR_INTERNAL_API int raptor_turtle_writer_literal(raptor_turtle_writer* turtle_writer, raptor_namespace_stack *nstack, const unsigned char *s, const unsigned char* lang, raptor_uri* datatype); 1321 RAPTOR_INTERNAL_API void raptor_turtle_writer_qname(raptor_turtle_writer* turtle_writer, raptor_qname* qname); 1322 RAPTOR_INTERNAL_API int raptor_turtle_writer_quoted_counted_string(raptor_turtle_writer* turtle_writer, const unsigned char *s, size_t length); 1323 void raptor_turtle_writer_comment(raptor_turtle_writer* turtle_writer, const unsigned char *s); 1324 RAPTOR_INTERNAL_API int raptor_turtle_writer_set_option(raptor_turtle_writer *turtle_writer, raptor_option option, int value); 1325 int raptor_turtle_writer_set_option_string(raptor_turtle_writer *turtle_writer, raptor_option option, const unsigned char *value); 1326 int raptor_turtle_writer_get_option(raptor_turtle_writer *turtle_writer, raptor_option option); 1327 const unsigned char *raptor_turtle_writer_get_option_string(raptor_turtle_writer *turtle_writer, raptor_option option); 1328 void raptor_turtle_writer_bnodeid(raptor_turtle_writer* turtle_writer, const unsigned char *bnodeid, size_t len); 1329 int raptor_turtle_writer_uri(raptor_turtle_writer* turtle_writer, raptor_uri* uri); 1330 int raptor_turtle_writer_term(raptor_turtle_writer* turtle_writer, raptor_term* term); 1331 int raptor_turtle_is_legal_turtle_qname(raptor_qname* qname); 1332 1333 /** 1334 * raptor_json_writer: 1335 * 1336 * Raptor JSON Writer class 1337 */ 1338 typedef struct raptor_json_writer_s raptor_json_writer; 1339 1340 /* raptor_json_writer.c */ 1341 raptor_json_writer* raptor_new_json_writer(raptor_world* world, raptor_uri* base_uri, raptor_iostream* iostr); 1342 void raptor_free_json_writer(raptor_json_writer* json_writer); 1343 1344 int raptor_json_writer_newline(raptor_json_writer* json_writer); 1345 int raptor_json_writer_key_value(raptor_json_writer* json_writer, const char* key, size_t key_len, const char* value, size_t value_len); 1346 int raptor_json_writer_start_block(raptor_json_writer* json_writer, char c); 1347 int raptor_json_writer_end_block(raptor_json_writer* json_writer, char c); 1348 int raptor_json_writer_literal_object(raptor_json_writer* json_writer, unsigned char* s, size_t s_len, unsigned char* lang, raptor_uri* datatype); 1349 int raptor_json_writer_blank_object(raptor_json_writer* json_writer, const unsigned char* blank, size_t blank_len); 1350 int raptor_json_writer_uri_object(raptor_json_writer* json_writer, raptor_uri* uri); 1351 int raptor_json_writer_term(raptor_json_writer* json_writer, raptor_term *term); 1352 int raptor_json_writer_key_uri_value(raptor_json_writer* json_writer, const char* key, size_t key_len, raptor_uri* uri); 1353 1354 /* raptor_memstr.c */ 1355 const char* raptor_memstr(const char *haystack, size_t haystack_len, const char *needle); 1356 1357 /* raptor_serialize_rdfxmla.c special functions for embedding rdf/xml */ 1358 int raptor_rdfxmla_serialize_set_write_rdf_RDF(raptor_serializer* serializer, int value); 1359 int raptor_rdfxmla_serialize_set_xml_writer(raptor_serializer* serializer, raptor_xml_writer* xml_writer, raptor_namespace_stack *nstack); 1360 int raptor_rdfxmla_serialize_set_single_node(raptor_serializer* serializer, raptor_uri* uri); 1361 int raptor_rdfxmla_serialize_set_write_typed_nodes(raptor_serializer* serializer, int value); 1362 1363 /* snprintf.c */ 1364 size_t raptor_format_integer(char* buffer, size_t bufsize, int integer, unsigned int base, int width, char padding); 1365 1366 /* raptor_world structure */ 1367 #define RAPTOR1_WORLD_MAGIC_1 0 1368 #define RAPTOR1_WORLD_MAGIC_2 1 1369 #define RAPTOR2_WORLD_MAGIC 0xC4129CEF 1370 1371 #define RAPTOR_CHECK_CONSTRUCTOR_WORLD(world) \ 1372 do { \ 1373 if(raptor_check_world_internal(world, __FUNCTION__)) \ 1374 return NULL; \ 1375 } while(0) 1376 1377 1378 RAPTOR_INTERNAL_API int raptor_check_world_internal(raptor_world* world, const char* name); 1379 1380 1381 1382 struct raptor_world_s { 1383 /* signature to check this is a world object */ 1384 unsigned int magic; 1385 1386 /* world has been initialized with raptor_world_open() */ 1387 int opened; 1388 1389 /* internal flag used to ignore errors for e.g. child GRDDL parsers */ 1390 int internal_ignore_errors; 1391 1392 void* message_handler_user_data; 1393 raptor_log_handler message_handler; 1394 1395 /* sequence of parser factories */ 1396 raptor_sequence *parsers; 1397 1398 /* sequence of serializer factories */ 1399 raptor_sequence *serializers; 1400 1401 /* raptor_rss_common initialisation counter */ 1402 int rss_common_initialised; 1403 1404 /* raptor_rss_{namespaces,types,fields}_info const data initialized to raptor_uri,raptor_qname objects */ 1405 raptor_uri **rss_namespaces_info_uris; 1406 raptor_uri **rss_types_info_uris; 1407 raptor_qname **rss_types_info_qnames; 1408 raptor_uri **rss_fields_info_uris; 1409 raptor_qname **rss_fields_info_qnames; 1410 1411 /* raptor_www v2 flags */ 1412 int www_skip_www_init_finish; 1413 int www_initialized; 1414 1415 /* This is used to store a #xsltSecurityPrefsPtr typed object 1416 * pointer when libxslt is compiled in. 1417 */ 1418 void* xslt_security_preferences; 1419 /* 0 raptor owns the above object and should free it with 1420 * xsltFreeSecurityPrefs() on exit 1421 * 1 user set the above object and raptor does not own it 1422 */ 1423 int xslt_security_preferences_policy; 1424 1425 /* Flags for libxml set by raptor_world_set_libxml_flags(). 1426 * See #raptor_libxml_flags for meanings 1427 */ 1428 int libxml_flags; 1429 1430 #ifdef RAPTOR_XML_LIBXML 1431 void *libxml_saved_structured_error_context; 1432 xmlStructuredErrorFunc libxml_saved_structured_error_handler; 1433 1434 void *libxml_saved_generic_error_context; 1435 xmlGenericErrorFunc libxml_saved_generic_error_handler; 1436 #endif 1437 1438 raptor_avltree *uris_tree; 1439 1440 raptor_uri* concepts[RDF_NS_LAST + 1]; 1441 1442 raptor_term* terms[RDF_NS_LAST + 1]; 1443 1444 /* last log message - points to data it does not own */ 1445 raptor_log_message message; 1446 1447 /* should */ 1448 int uri_interning; 1449 1450 /* generate blank node ID policy */ 1451 void *generate_bnodeid_handler_user_data; 1452 raptor_generate_bnodeid_handler generate_bnodeid_handler; 1453 1454 int default_generate_bnodeid_handler_base; 1455 char *default_generate_bnodeid_handler_prefix; 1456 unsigned int default_generate_bnodeid_handler_prefix_length; 1457 1458 raptor_uri* xsd_namespace_uri; 1459 raptor_uri* xsd_boolean_uri; 1460 raptor_uri* xsd_decimal_uri; 1461 raptor_uri* xsd_double_uri; 1462 raptor_uri* xsd_integer_uri; 1463 }; 1464 1465 /* raptor_www.c */ 1466 int raptor_www_init(raptor_world* world); 1467 void raptor_www_finish(raptor_world* world); 1468 1469 1470 1471 #define RAPTOR_LANG_LEN_FROM_INT(len) (int)(len) 1472 #define RAPTOR_LANG_LEN_TO_SIZE_T(len) (size_t)(len) 1473 1474 /* Safe casts: widening a value */ 1475 #define RAPTOR_GOOD_CAST(t, v) (t)(v) 1476 1477 /* Unsafe casts: narrowing a value */ 1478 #define RAPTOR_BAD_CAST(t, v) (t)(v) 1479 1480 /* end of RAPTOR_INTERNAL */ 1481 #endif 1482 1483 1484 #ifdef __cplusplus 1485 } 1486 #endif 1487 1488 #endif 1489