1 /* 2 Copyright 2011-2017 David Robillard <http://drobilla.net> 3 4 Permission to use, copy, modify, and/or distribute this software for any 5 purpose with or without fee is hereby granted, provided that the above 6 copyright notice and this permission notice appear in all copies. 7 8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /** 18 @file serd.h API for Serd, a lightweight RDF syntax library. 19 */ 20 21 #ifndef SERD_SERD_H 22 #define SERD_SERD_H 23 24 #include <stdarg.h> 25 #include <stddef.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 29 #ifdef SERD_SHARED 30 # ifdef _WIN32 31 # define SERD_LIB_IMPORT __declspec(dllimport) 32 # define SERD_LIB_EXPORT __declspec(dllexport) 33 # else 34 # define SERD_LIB_IMPORT __attribute__((visibility("default"))) 35 # define SERD_LIB_EXPORT __attribute__((visibility("default"))) 36 # endif 37 # ifdef SERD_INTERNAL 38 # define SERD_API SERD_LIB_EXPORT 39 # else 40 # define SERD_API SERD_LIB_IMPORT 41 # endif 42 #else 43 # define SERD_API 44 #endif 45 46 #ifdef __cplusplus 47 extern "C" { 48 #else 49 # include <stdbool.h> 50 #endif 51 52 /** 53 @defgroup serd Serd 54 A lightweight RDF syntax library. 55 @{ 56 */ 57 58 /** 59 Environment. 60 61 Represents the state required to resolve a CURIE or relative URI, e.g. the 62 base URI and set of namespace prefixes at a particular point. 63 */ 64 typedef struct SerdEnvImpl SerdEnv; 65 66 /** 67 RDF reader. 68 69 Parses RDF by calling user-provided sink functions as input is consumed 70 (much like an XML SAX parser). 71 */ 72 typedef struct SerdReaderImpl SerdReader; 73 74 /** 75 RDF writer. 76 77 Provides a number of functions to allow writing RDF syntax out to some 78 stream. These functions are deliberately compatible with the sink functions 79 used by SerdReader, so a reader can be directly connected to a writer to 80 re-serialise a document with minimal overhead. 81 */ 82 typedef struct SerdWriterImpl SerdWriter; 83 84 /** 85 Return status code. 86 */ 87 typedef enum { 88 SERD_SUCCESS, /**< No error */ 89 SERD_FAILURE, /**< Non-fatal failure */ 90 SERD_ERR_UNKNOWN, /**< Unknown error */ 91 SERD_ERR_BAD_SYNTAX, /**< Invalid syntax */ 92 SERD_ERR_BAD_ARG, /**< Invalid argument */ 93 SERD_ERR_NOT_FOUND, /**< Not found */ 94 SERD_ERR_ID_CLASH, /**< Encountered clashing blank node IDs */ 95 SERD_ERR_BAD_CURIE, /**< Invalid CURIE (e.g. prefix does not exist) */ 96 SERD_ERR_INTERNAL /**< Unexpected internal error (should not happen) */ 97 } SerdStatus; 98 99 /** 100 RDF syntax type. 101 */ 102 typedef enum { 103 /** 104 Turtle - Terse RDF Triple Language (UTF-8). 105 @see <a href="http://www.w3.org/TeamSubmission/turtle/">Turtle</a> 106 */ 107 SERD_TURTLE = 1, 108 109 /** 110 NTriples - Line-based RDF triples (ASCII). 111 @see <a href="http://www.w3.org/TR/rdf-testcases#ntriples">NTriples</a> 112 */ 113 SERD_NTRIPLES = 2, 114 115 /** 116 NQuads - Line-based RDF quads (UTF-8). 117 @see <a href="https://www.w3.org/TR/n-quads/">NQuads</a> 118 */ 119 SERD_NQUADS = 3, 120 121 /** 122 TRiG - Terse RDF quads (UTF-8). 123 @see <a href="https://www.w3.org/TR/trig/">Trig</a> 124 */ 125 SERD_TRIG = 4 126 } SerdSyntax; 127 128 /** 129 Flags indication inline abbreviation information for a statement. 130 */ 131 typedef enum { 132 SERD_EMPTY_S = 1 << 1, /**< Empty blank node subject */ 133 SERD_EMPTY_O = 1 << 2, /**< Empty blank node object */ 134 SERD_ANON_S_BEGIN = 1 << 3, /**< Start of anonymous subject */ 135 SERD_ANON_O_BEGIN = 1 << 4, /**< Start of anonymous object */ 136 SERD_ANON_CONT = 1 << 5, /**< Continuation of anonymous node */ 137 SERD_LIST_S_BEGIN = 1 << 6, /**< Start of list subject */ 138 SERD_LIST_O_BEGIN = 1 << 7, /**< Start of list object */ 139 SERD_LIST_CONT = 1 << 8 /**< Continuation of list */ 140 } SerdStatementFlag; 141 142 /** 143 Bitwise OR of SerdNodeFlag values. 144 */ 145 typedef uint32_t SerdStatementFlags; 146 147 /** 148 Type of a syntactic RDF node. 149 150 This is more precise than the type of an abstract RDF node. An abstract 151 node is either a resource, literal, or blank. In syntax there are two ways 152 to refer to a resource (by URI or CURIE) and two ways to refer to a blank 153 (by ID or anonymously). Anonymous (inline) blank nodes are expressed using 154 SerdStatementFlags rather than this type. 155 */ 156 typedef enum { 157 /** 158 The type of a nonexistent node. 159 160 This type is useful as a sentinel, but is never emitted by the reader. 161 */ 162 SERD_NOTHING = 0, 163 164 /** 165 Literal value. 166 167 A literal optionally has either a language, or a datatype (not both). 168 */ 169 SERD_LITERAL = 1, 170 171 /** 172 URI (absolute or relative). 173 174 Value is an unquoted URI string, which is either a relative reference 175 with respect to the current base URI (e.g. "foo/bar"), or an absolute 176 URI (e.g. "http://example.org/foo"). 177 @see <a href="http://tools.ietf.org/html/rfc3986">RFC3986</a>. 178 */ 179 SERD_URI = 2, 180 181 /** 182 CURIE, a shortened URI. 183 184 Value is an unquoted CURIE string relative to the current environment, 185 e.g. "rdf:type". 186 @see <a href="http://www.w3.org/TR/curie">CURIE Syntax 1.0</a> 187 */ 188 SERD_CURIE = 3, 189 190 /** 191 A blank node. 192 193 Value is a blank node ID, e.g. "id3", which is meaningful only within 194 this serialisation. 195 @see <a href="http://www.w3.org/TeamSubmission/turtle#nodeID">Turtle 196 <tt>nodeID</tt></a> 197 */ 198 SERD_BLANK = 4 199 } SerdType; 200 201 /** 202 Flags indicating certain string properties relevant to serialisation. 203 */ 204 typedef enum { 205 SERD_HAS_NEWLINE = 1, /**< Contains line breaks ('\\n' or '\\r') */ 206 SERD_HAS_QUOTE = 1 << 1 /**< Contains quotes ('"') */ 207 } SerdNodeFlag; 208 209 /** 210 Bitwise OR of SerdNodeFlag values. 211 */ 212 typedef uint32_t SerdNodeFlags; 213 214 /** 215 A syntactic RDF node. 216 */ 217 typedef struct { 218 const uint8_t* buf; /**< Value string */ 219 size_t n_bytes; /**< Size in bytes (not including null) */ 220 size_t n_chars; /**< Length in characters (not including null)*/ 221 SerdNodeFlags flags; /**< Node flags (e.g. string properties) */ 222 SerdType type; /**< Node type */ 223 } SerdNode; 224 225 /** 226 An unterminated string fragment. 227 */ 228 typedef struct { 229 const uint8_t* buf; /**< Start of chunk */ 230 size_t len; /**< Length of chunk in bytes */ 231 } SerdChunk; 232 233 /** 234 An error description. 235 */ 236 typedef struct { 237 SerdStatus status; /**< Error code */ 238 const uint8_t* filename; /**< File where error was encountered, or NULL */ 239 unsigned line; /**< Line where error was encountered, or 0 */ 240 unsigned col; /**< Column where error was encountered */ 241 const char* fmt; /**< Message format string (printf style) */ 242 va_list* args; /**< Arguments for fmt */ 243 } SerdError; 244 245 /** 246 A parsed URI. 247 248 This struct directly refers to chunks in other strings, it does not own any 249 memory itself. Thus, URIs can be parsed and/or resolved against a base URI 250 in-place without allocating memory. 251 */ 252 typedef struct { 253 SerdChunk scheme; /**< Scheme */ 254 SerdChunk authority; /**< Authority */ 255 SerdChunk path_base; /**< Path prefix if relative */ 256 SerdChunk path; /**< Path suffix */ 257 SerdChunk query; /**< Query */ 258 SerdChunk fragment; /**< Fragment */ 259 } SerdURI; 260 261 /** 262 Syntax style options. 263 264 The style of the writer output can be controlled by ORing together 265 values from this enumeration. Note that some options are only supported 266 for some syntaxes (e.g. NTriples does not support abbreviation and is 267 always ASCII). 268 */ 269 typedef enum { 270 SERD_STYLE_ABBREVIATED = 1, /**< Abbreviate triples when possible. */ 271 SERD_STYLE_ASCII = 1 << 1, /**< Escape all non-ASCII characters. */ 272 SERD_STYLE_RESOLVED = 1 << 2, /**< Resolve URIs against base URI. */ 273 SERD_STYLE_CURIED = 1 << 3, /**< Shorten URIs into CURIEs. */ 274 SERD_STYLE_BULK = 1 << 4 /**< Write output in pages. */ 275 } SerdStyle; 276 277 /** 278 @name String Utilities 279 @{ 280 */ 281 282 /** 283 Return a string describing a status code. 284 */ 285 SERD_API 286 const uint8_t* 287 serd_strerror(SerdStatus status); 288 289 /** 290 Measure a UTF-8 string. 291 @return Length of `str` in characters (except NULL). 292 @param str A null-terminated UTF-8 string. 293 @param n_bytes (Output) Set to the size of `str` in bytes (except NULL). 294 @param flags (Output) Set to the applicable flags. 295 */ 296 SERD_API 297 size_t 298 serd_strlen(const uint8_t* str, size_t* n_bytes, SerdNodeFlags* flags); 299 300 /** 301 Parse a string to a double. 302 303 The API of this function is identical to the standard C strtod function, 304 except this function is locale-independent and always matches the lexical 305 format used in the Turtle grammar (the decimal point is always "."). 306 */ 307 SERD_API 308 double 309 serd_strtod(const char* str, char** endptr); 310 311 /** 312 Decode a base64 string. 313 This function can be used to deserialise a blob node created with 314 serd_node_new_blob(). 315 316 @param str Base64 string to decode. 317 @param len The length of `str`. 318 @param size Set to the size of the returned blob in bytes. 319 @return A newly allocated blob which must be freed with free(). 320 */ 321 SERD_API 322 void* 323 serd_base64_decode(const uint8_t* str, size_t len, size_t* size); 324 325 /** 326 @} 327 @name URI 328 @{ 329 */ 330 331 static const SerdURI SERD_URI_NULL = { 332 {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0} 333 }; 334 335 /** 336 Return the local path for `uri`, or NULL if `uri` is not a file URI. 337 Note this (inappropriately named) function only removes the file scheme if 338 necessary, and returns `uri` unmodified if it is an absolute path. Percent 339 encoding and other issues are not handled, to properly convert a file URI to 340 a path, use serd_file_uri_parse(). 341 */ 342 SERD_API 343 const uint8_t* 344 serd_uri_to_path(const uint8_t* uri); 345 346 /** 347 Get the unescaped path and hostname from a file URI. 348 @param uri A file URI. 349 @param hostname If non-NULL, set to the hostname, if present. 350 @return The path component of the URI. 351 352 The returned path and `*hostname` must be freed with free(). 353 */ 354 SERD_API 355 uint8_t* 356 serd_file_uri_parse(const uint8_t* uri, uint8_t** hostname); 357 358 /** 359 Return true iff `utf8` starts with a valid URI scheme. 360 */ 361 SERD_API 362 bool 363 serd_uri_string_has_scheme(const uint8_t* utf8); 364 365 /** 366 Parse `utf8`, writing result to `out`. 367 */ 368 SERD_API 369 SerdStatus 370 serd_uri_parse(const uint8_t* utf8, SerdURI* out); 371 372 /** 373 Set `out` to `uri` resolved against `base`. 374 */ 375 SERD_API 376 void 377 serd_uri_resolve(const SerdURI* uri, const SerdURI* base, SerdURI* out); 378 379 /** 380 Function to detect I/O stream errors. 381 382 Identical semantics to `ferror`. 383 384 @return Non-zero if `stream` has encountered an error. 385 */ 386 typedef int (*SerdStreamErrorFunc)(void* stream); 387 388 /** 389 Source function for raw string input. 390 391 Identical semantics to `fread`, but may set errno for more informative error 392 reporting than supported by SerdStreamErrorFunc. 393 394 @param buf Output buffer. 395 @param size Size of a single element of data in bytes (always 1). 396 @param nmemb Number of elements to read. 397 @param stream Stream to read from (FILE* for fread). 398 @return Number of elements (bytes) read. 399 */ 400 typedef size_t (*SerdSource)(void* buf, 401 size_t size, 402 size_t nmemb, 403 void* stream); 404 405 /** 406 Sink function for raw string output. 407 */ 408 typedef size_t (*SerdSink)(const void* buf, size_t len, void* stream); 409 410 /** 411 Serialise `uri` with a series of calls to `sink`. 412 */ 413 SERD_API 414 size_t 415 serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream); 416 417 /** 418 Serialise `uri` relative to `base` with a series of calls to `sink`. 419 420 The `uri` is written as a relative URI iff if it a child of `base` and @c 421 root. The optional `root` parameter must be a prefix of `base` and can be 422 used keep up-references ("../") within a certain namespace. 423 */ 424 SERD_API 425 size_t 426 serd_uri_serialise_relative(const SerdURI* uri, 427 const SerdURI* base, 428 const SerdURI* root, 429 SerdSink sink, 430 void* stream); 431 432 /** 433 @} 434 @name Node 435 @{ 436 */ 437 438 static const SerdNode SERD_NODE_NULL = { NULL, 0, 0, 0, SERD_NOTHING }; 439 440 /** 441 Make a (shallow) node from `str`. 442 443 This measures, but does not copy, `str`. No memory is allocated. 444 */ 445 SERD_API 446 SerdNode 447 serd_node_from_string(SerdType type, const uint8_t* str); 448 449 /** 450 Make a deep copy of `node`. 451 452 @return a node that the caller must free with serd_node_free(). 453 */ 454 SERD_API 455 SerdNode 456 serd_node_copy(const SerdNode* node); 457 458 /** 459 Return true iff `a` is equal to `b`. 460 */ 461 SERD_API 462 bool 463 serd_node_equals(const SerdNode* a, const SerdNode* b); 464 465 /** 466 Simple wrapper for serd_node_new_uri() to resolve a URI node. 467 */ 468 SERD_API 469 SerdNode 470 serd_node_new_uri_from_node(const SerdNode* uri_node, 471 const SerdURI* base, 472 SerdURI* out); 473 474 /** 475 Simple wrapper for serd_node_new_uri() to resolve a URI string. 476 */ 477 SERD_API 478 SerdNode 479 serd_node_new_uri_from_string(const uint8_t* str, 480 const SerdURI* base, 481 SerdURI* out); 482 483 /** 484 Create a new file URI node from a file system path and optional hostname. 485 486 Backslashes in Windows paths will be converted and '%' will always be 487 percent encoded. If `escape` is true, all other invalid characters will be 488 percent encoded as well. 489 490 If `path` is relative, `hostname` is ignored. 491 If `out` is not NULL, it will be set to the parsed URI. 492 */ 493 SERD_API 494 SerdNode 495 serd_node_new_file_uri(const uint8_t* path, 496 const uint8_t* hostname, 497 SerdURI* out, 498 bool escape); 499 500 /** 501 Create a new node by serialising `uri` into a new string. 502 503 @param uri The URI to serialise. 504 505 @param base Base URI to resolve `uri` against (or NULL for no resolution). 506 507 @param out Set to the parsing of the new URI (i.e. points only to 508 memory owned by the new returned node). 509 */ 510 SERD_API 511 SerdNode 512 serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out); 513 514 /** 515 Create a new node by serialising `uri` into a new relative URI. 516 517 @param uri The URI to serialise. 518 519 @param base Base URI to make `uri` relative to, if possible. 520 521 @param root Root URI for resolution (see serd_uri_serialise_relative()). 522 523 @param out Set to the parsing of the new URI (i.e. points only to 524 memory owned by the new returned node). 525 */ 526 SERD_API 527 SerdNode 528 serd_node_new_relative_uri(const SerdURI* uri, 529 const SerdURI* base, 530 const SerdURI* root, 531 SerdURI* out); 532 533 /** 534 Create a new node by serialising `d` into an xsd:decimal string. 535 536 The resulting node will always contain a `.', start with a digit, and end 537 with a digit (i.e. will have a leading and/or trailing `0' if necessary). 538 It will never be in scientific notation. A maximum of `frac_digits` digits 539 will be written after the decimal point, but trailing zeros will 540 automatically be omitted (except one if `d` is a round integer). 541 542 Note that about 16 and 8 fractional digits are required to precisely 543 represent a double and float, respectively. 544 545 @param d The value for the new node. 546 @param frac_digits The maximum number of digits after the decimal place. 547 */ 548 SERD_API 549 SerdNode 550 serd_node_new_decimal(double d, unsigned frac_digits); 551 552 /** 553 Create a new node by serialising `i` into an xsd:integer string. 554 */ 555 SERD_API 556 SerdNode 557 serd_node_new_integer(int64_t i); 558 559 /** 560 Create a node by serialising `buf` into an xsd:base64Binary string. 561 This function can be used to make a serialisable node out of arbitrary 562 binary data, which can be decoded using serd_base64_decode(). 563 564 @param buf Raw binary input data. 565 @param size Size of `buf`. 566 @param wrap_lines Wrap lines at 76 characters to conform to RFC 2045. 567 */ 568 SERD_API 569 SerdNode 570 serd_node_new_blob(const void* buf, size_t size, bool wrap_lines); 571 572 /** 573 Free any data owned by `node`. 574 575 Note that if `node` is itself dynamically allocated (which is not the case 576 for nodes created internally by serd), it will not be freed. 577 */ 578 SERD_API 579 void 580 serd_node_free(SerdNode* node); 581 582 /** 583 @} 584 @name Event Handlers 585 @{ 586 */ 587 588 /** 589 Sink (callback) for errors. 590 591 @param handle Handle for user data. 592 @param error Error description. 593 */ 594 typedef SerdStatus (*SerdErrorSink)(void* handle, 595 const SerdError* error); 596 597 /** 598 Sink (callback) for base URI changes. 599 600 Called whenever the base URI of the serialisation changes. 601 */ 602 typedef SerdStatus (*SerdBaseSink)(void* handle, 603 const SerdNode* uri); 604 605 /** 606 Sink (callback) for namespace definitions. 607 608 Called whenever a prefix is defined in the serialisation. 609 */ 610 typedef SerdStatus (*SerdPrefixSink)(void* handle, 611 const SerdNode* name, 612 const SerdNode* uri); 613 614 /** 615 Sink (callback) for statements. 616 617 Called for every RDF statement in the serialisation. 618 */ 619 typedef SerdStatus (*SerdStatementSink)(void* handle, 620 SerdStatementFlags flags, 621 const SerdNode* graph, 622 const SerdNode* subject, 623 const SerdNode* predicate, 624 const SerdNode* object, 625 const SerdNode* object_datatype, 626 const SerdNode* object_lang); 627 628 /** 629 Sink (callback) for anonymous node end markers. 630 631 This is called to indicate that the anonymous node with the given 632 `value` will no longer be referred to by any future statements 633 (i.e. the anonymous serialisation of the node is finished). 634 */ 635 typedef SerdStatus (*SerdEndSink)(void* handle, 636 const SerdNode* node); 637 638 /** 639 @} 640 @name Environment 641 @{ 642 */ 643 644 /** 645 Create a new environment. 646 */ 647 SERD_API 648 SerdEnv* 649 serd_env_new(const SerdNode* base_uri); 650 651 /** 652 Free `ns`. 653 */ 654 SERD_API 655 void 656 serd_env_free(SerdEnv* env); 657 658 /** 659 Get the current base URI. 660 */ 661 SERD_API 662 const SerdNode* 663 serd_env_get_base_uri(const SerdEnv* env, 664 SerdURI* out); 665 666 /** 667 Set the current base URI. 668 */ 669 SERD_API 670 SerdStatus 671 serd_env_set_base_uri(SerdEnv* env, 672 const SerdNode* uri); 673 674 /** 675 Set a namespace prefix. 676 */ 677 SERD_API 678 SerdStatus 679 serd_env_set_prefix(SerdEnv* env, 680 const SerdNode* name, 681 const SerdNode* uri); 682 683 /** 684 Set a namespace prefix. 685 */ 686 SERD_API 687 SerdStatus 688 serd_env_set_prefix_from_strings(SerdEnv* env, 689 const uint8_t* name, 690 const uint8_t* uri); 691 692 /** 693 Qualify `uri` into a CURIE if possible. 694 */ 695 SERD_API 696 bool 697 serd_env_qualify(const SerdEnv* env, 698 const SerdNode* uri, 699 SerdNode* prefix, 700 SerdChunk* suffix); 701 702 /** 703 Expand `curie`. 704 */ 705 SERD_API 706 SerdStatus 707 serd_env_expand(const SerdEnv* env, 708 const SerdNode* curie, 709 SerdChunk* uri_prefix, 710 SerdChunk* uri_suffix); 711 712 /** 713 Expand `node`, which must be a CURIE or URI, to a full URI. 714 */ 715 SERD_API 716 SerdNode 717 serd_env_expand_node(const SerdEnv* env, 718 const SerdNode* node); 719 720 /** 721 Call `func` for each prefix defined in `env`. 722 */ 723 SERD_API 724 void 725 serd_env_foreach(const SerdEnv* env, 726 SerdPrefixSink func, 727 void* handle); 728 729 /** 730 @} 731 @name Reader 732 @{ 733 */ 734 735 /** 736 Create a new RDF reader. 737 */ 738 SERD_API 739 SerdReader* 740 serd_reader_new(SerdSyntax syntax, 741 void* handle, 742 void (*free_handle)(void*), 743 SerdBaseSink base_sink, 744 SerdPrefixSink prefix_sink, 745 SerdStatementSink statement_sink, 746 SerdEndSink end_sink); 747 748 /** 749 Enable or disable strict parsing. 750 751 The reader is non-strict (lax) by default, which will tolerate URIs with 752 invalid characters. Setting strict will fail when parsing such files. An 753 error is printed for invalid input in either case. 754 */ 755 SERD_API 756 void 757 serd_reader_set_strict(SerdReader* reader, bool strict); 758 759 /** 760 Set a function to be called when errors occur during reading. 761 762 The `error_sink` will be called with `handle` as its first argument. If 763 no error function is set, errors are printed to stderr in GCC style. 764 */ 765 SERD_API 766 void 767 serd_reader_set_error_sink(SerdReader* reader, 768 SerdErrorSink error_sink, 769 void* handle); 770 771 /** 772 Return the `handle` passed to serd_reader_new(). 773 */ 774 SERD_API 775 void* 776 serd_reader_get_handle(const SerdReader* reader); 777 778 /** 779 Set a prefix to be added to all blank node identifiers. 780 781 This is useful when multiple files are to be parsed into the same output 782 (e.g. a store, or other files). Since Serd preserves blank node IDs, this 783 could cause conflicts where two non-equivalent blank nodes are merged, 784 resulting in corrupt data. By setting a unique blank node prefix for each 785 parsed file, this can be avoided, while preserving blank node names. 786 */ 787 SERD_API 788 void 789 serd_reader_add_blank_prefix(SerdReader* reader, 790 const uint8_t* prefix); 791 792 /** 793 Set the URI of the default graph. 794 795 If this is set, the reader will emit quads with the graph set to the given 796 node for any statements that are not in a named graph (which is currently 797 all of them since Serd currently does not support any graph syntaxes). 798 */ 799 SERD_API 800 void 801 serd_reader_set_default_graph(SerdReader* reader, 802 const SerdNode* graph); 803 804 /** 805 Read a file at a given `uri`. 806 */ 807 SERD_API 808 SerdStatus 809 serd_reader_read_file(SerdReader* reader, 810 const uint8_t* uri); 811 812 /** 813 Start an incremental read from a file handle. 814 815 Iff `bulk` is true, `file` will be read a page at a time. This is more 816 efficient, but uses a page of memory and means that an entire page of input 817 must be ready before any callbacks will fire. To react as soon as input 818 arrives, set `bulk` to false. 819 */ 820 SERD_API 821 SerdStatus 822 serd_reader_start_stream(SerdReader* me, 823 FILE* file, 824 const uint8_t* name, 825 bool bulk); 826 827 /** 828 Start an incremental read from a user-specified source. 829 830 The `read_func` is guaranteed to only be called for `page_size` elements 831 with size 1 (i.e. `page_size` bytes). 832 */ 833 SERD_API 834 SerdStatus 835 serd_reader_start_source_stream(SerdReader* me, 836 SerdSource read_func, 837 SerdStreamErrorFunc error_func, 838 void* stream, 839 const uint8_t* name, 840 size_t page_size); 841 842 /** 843 Read a single "chunk" of data during an incremental read. 844 845 This function will read a single top level description, and return. This 846 may be a directive, statement, or several statements; essentially it reads 847 until a '.' is encountered. This is particularly useful for reading 848 directly from a pipe or socket. 849 */ 850 SERD_API 851 SerdStatus 852 serd_reader_read_chunk(SerdReader* me); 853 854 /** 855 Finish an incremental read from a file handle. 856 */ 857 SERD_API 858 SerdStatus 859 serd_reader_end_stream(SerdReader* me); 860 861 /** 862 Read `file`. 863 */ 864 SERD_API 865 SerdStatus 866 serd_reader_read_file_handle(SerdReader* reader, 867 FILE* file, 868 const uint8_t* name); 869 870 /** 871 Read a user-specified byte source. 872 */ 873 SERD_API 874 SerdStatus 875 serd_reader_read_source(SerdReader* reader, 876 SerdSource source, 877 SerdStreamErrorFunc error, 878 void* stream, 879 const uint8_t* name, 880 size_t page_size); 881 882 /** 883 Read `utf8`. 884 */ 885 SERD_API 886 SerdStatus 887 serd_reader_read_string(SerdReader* me, const uint8_t* utf8); 888 889 /** 890 Free `reader`. 891 */ 892 SERD_API 893 void 894 serd_reader_free(SerdReader* reader); 895 896 /** 897 @} 898 @name Writer 899 @{ 900 */ 901 902 /** 903 Create a new RDF writer. 904 */ 905 SERD_API 906 SerdWriter* 907 serd_writer_new(SerdSyntax syntax, 908 SerdStyle style, 909 SerdEnv* env, 910 const SerdURI* base_uri, 911 SerdSink sink, 912 void* stream); 913 914 /** 915 Free `writer`. 916 */ 917 SERD_API 918 void 919 serd_writer_free(SerdWriter* writer); 920 921 /** 922 Return the env used by `writer`. 923 */ 924 SERD_API 925 SerdEnv* 926 serd_writer_get_env(SerdWriter* writer); 927 928 /** 929 A convenience sink function for writing to a FILE*. 930 931 This function can be used as a SerdSink when writing to a FILE*. The 932 `stream` parameter must be a FILE* opened for writing. 933 */ 934 SERD_API 935 size_t 936 serd_file_sink(const void* buf, size_t len, void* stream); 937 938 /** 939 A convenience sink function for writing to a string. 940 941 This function can be used as a SerdSink to write to a SerdChunk which is 942 resized as necessary with realloc(). The `stream` parameter must point to 943 an initialized SerdChunk. When the write is finished, the string should be 944 retrieved with serd_chunk_sink_finish(). 945 */ 946 SERD_API 947 size_t 948 serd_chunk_sink(const void* buf, size_t len, void* stream); 949 950 /** 951 Finish a serialisation to a chunk with serd_chunk_sink(). 952 953 The returned string is the result of the serialisation, which is NULL 954 terminated (by this function) and owned by the caller. 955 */ 956 SERD_API 957 uint8_t* 958 serd_chunk_sink_finish(SerdChunk* stream); 959 960 /** 961 Set a function to be called when errors occur during writing. 962 963 The `error_sink` will be called with `handle` as its first argument. If 964 no error function is set, errors are printed to stderr. 965 */ 966 SERD_API 967 void 968 serd_writer_set_error_sink(SerdWriter* writer, 969 SerdErrorSink error_sink, 970 void* handle); 971 972 /** 973 Set a prefix to be removed from matching blank node identifiers. 974 */ 975 SERD_API 976 void 977 serd_writer_chop_blank_prefix(SerdWriter* writer, 978 const uint8_t* prefix); 979 980 /** 981 Set the current output base URI (and emit directive if applicable). 982 983 Note this function can be safely casted to SerdBaseSink. 984 */ 985 SERD_API 986 SerdStatus 987 serd_writer_set_base_uri(SerdWriter* writer, 988 const SerdNode* uri); 989 990 /** 991 Set the current root URI. 992 993 The root URI should be a prefix of the base URI. The path of the root URI 994 is the highest path any relative up-reference can refer to. For example, 995 with root <file:///foo/root> and base <file:///foo/root/base>, 996 <file:///foo/root> will be written as <../>, but <file:///foo> will be 997 written non-relatively as <file:///foo>. If the root is not explicitly set, 998 it defaults to the base URI, so no up-references will be created at all. 999 */ 1000 SERD_API 1001 SerdStatus 1002 serd_writer_set_root_uri(SerdWriter* writer, 1003 const SerdNode* uri); 1004 1005 /** 1006 Set a namespace prefix (and emit directive if applicable). 1007 1008 Note this function can be safely casted to SerdPrefixSink. 1009 */ 1010 SERD_API 1011 SerdStatus 1012 serd_writer_set_prefix(SerdWriter* writer, 1013 const SerdNode* name, 1014 const SerdNode* uri); 1015 1016 /** 1017 Write a statement. 1018 1019 Note this function can be safely casted to SerdStatementSink. 1020 */ 1021 SERD_API 1022 SerdStatus 1023 serd_writer_write_statement(SerdWriter* writer, 1024 SerdStatementFlags flags, 1025 const SerdNode* graph, 1026 const SerdNode* subject, 1027 const SerdNode* predicate, 1028 const SerdNode* object, 1029 const SerdNode* object_datatype, 1030 const SerdNode* object_lang); 1031 1032 /** 1033 Mark the end of an anonymous node's description. 1034 1035 Note this function can be safely casted to SerdEndSink. 1036 */ 1037 SERD_API 1038 SerdStatus 1039 serd_writer_end_anon(SerdWriter* writer, 1040 const SerdNode* node); 1041 1042 /** 1043 Finish a write. 1044 */ 1045 SERD_API 1046 SerdStatus 1047 serd_writer_finish(SerdWriter* writer); 1048 1049 /** 1050 @} 1051 @} 1052 */ 1053 1054 #ifdef __cplusplus 1055 } /* extern "C" */ 1056 #endif 1057 1058 #endif /* SERD_SERD_H */ 1059