1 /*
2 Copyright 2011-2017 David Robillard <http://drobilla.net>
3
4 Permission to use, copy, modify, and/or distribute this software for any
5 purpose with or without fee is hereby granted, provided that the above
6 copyright notice and this permission notice appear in all copies.
7
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #include "serd_internal.h"
18
19 #include <assert.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23
24 typedef struct {
25 SerdNode graph;
26 SerdNode subject;
27 SerdNode predicate;
28 } WriteContext;
29
30 static const WriteContext WRITE_CONTEXT_NULL = {
31 { 0, 0, 0, 0, SERD_NOTHING },
32 { 0, 0, 0, 0, SERD_NOTHING },
33 { 0, 0, 0, 0, SERD_NOTHING }
34 };
35
36 typedef enum {
37 SEP_NONE,
38 SEP_END_S, ///< End of a subject ('.')
39 SEP_END_P, ///< End of a predicate (';')
40 SEP_END_O, ///< End of an object (',')
41 SEP_S_P, ///< Between a subject and predicate (whitespace)
42 SEP_P_O, ///< Between a predicate and object (whitespace)
43 SEP_ANON_BEGIN, ///< Start of anonymous node ('[')
44 SEP_ANON_END, ///< End of anonymous node (']')
45 SEP_LIST_BEGIN, ///< Start of list ('(')
46 SEP_LIST_SEP, ///< List separator (whitespace)
47 SEP_LIST_END, ///< End of list (')')
48 SEP_GRAPH_BEGIN, ///< Start of graph ('{')
49 SEP_GRAPH_END, ///< End of graph ('}')
50 SEP_URI_BEGIN, ///< URI start quote ('<')
51 SEP_URI_END ///< URI end quote ('>')
52 } Sep;
53
54 typedef struct {
55 const char* str; ///< Sep string
56 uint8_t len; ///< Length of sep string
57 uint8_t space_before; ///< Newline before sep
58 uint8_t space_after_node; ///< Newline after sep if after node
59 uint8_t space_after_sep; ///< Newline after sep if after sep
60 } SepRule;
61
62 static const SepRule rules[] = {
63 { NULL, 0, 0, 0, 0 },
64 { " .\n\n", 4, 0, 0, 0 },
65 { " ;", 2, 0, 1, 1 },
66 { " ,", 2, 0, 1, 0 },
67 { NULL, 0, 0, 1, 0 },
68 { " ", 1, 0, 0, 0 },
69 { "[", 1, 0, 1, 1 },
70 { "]", 1, 1, 0, 0 },
71 { "(", 1, 0, 0, 0 },
72 { NULL, 1, 0, 1, 0 },
73 { ")", 1, 1, 0, 0 },
74 { " {", 2, 0, 1, 1 },
75 { " }", 2, 0, 1, 1 },
76 { "<", 1, 0, 0, 0 },
77 { ">", 1, 0, 0, 0 },
78 { "\n", 1, 0, 1, 0 }
79 };
80
81 struct SerdWriterImpl {
82 SerdSyntax syntax;
83 SerdStyle style;
84 SerdEnv* env;
85 SerdNode root_node;
86 SerdURI root_uri;
87 SerdURI base_uri;
88 SerdStack anon_stack;
89 SerdByteSink byte_sink;
90 SerdErrorSink error_sink;
91 void* error_handle;
92 WriteContext context;
93 SerdNode list_subj;
94 unsigned list_depth;
95 unsigned indent;
96 uint8_t* bprefix;
97 size_t bprefix_len;
98 Sep last_sep;
99 bool empty;
100 };
101
102 typedef enum {
103 WRITE_STRING,
104 WRITE_LONG_STRING
105 } TextContext;
106
107 static bool
108 write_node(SerdWriter* writer,
109 const SerdNode* node,
110 const SerdNode* datatype,
111 const SerdNode* lang,
112 Field field,
113 SerdStatementFlags flags);
114
115 static bool
supports_abbrev(const SerdWriter * writer)116 supports_abbrev(const SerdWriter* writer)
117 {
118 return writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG;
119 }
120
121 static void
w_err(SerdWriter * writer,SerdStatus st,const char * fmt,...)122 w_err(SerdWriter* writer, SerdStatus st, const char* fmt, ...)
123 {
124 /* TODO: This results in errors with no file information, which is not
125 helpful when re-serializing a file (particularly for "undefined
126 namespace prefix" errors. The statement sink API needs to be changed to
127 add a Cursor parameter so the source can notify the writer of the
128 statement origin for better error reporting. */
129
130 va_list args;
131 va_start(args, fmt);
132 const SerdError e = { st, NULL, 0, 0, fmt, &args };
133 serd_error(writer->error_sink, writer->error_handle, &e);
134 va_end(args);
135 }
136
137 static inline WriteContext*
anon_stack_top(SerdWriter * writer)138 anon_stack_top(SerdWriter* writer)
139 {
140 assert(!serd_stack_is_empty(&writer->anon_stack));
141 return (WriteContext*)(writer->anon_stack.buf
142 + writer->anon_stack.size - sizeof(WriteContext));
143 }
144
145 static void
copy_node(SerdNode * dst,const SerdNode * src)146 copy_node(SerdNode* dst, const SerdNode* src)
147 {
148 if (src) {
149 dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1);
150 dst->n_bytes = src->n_bytes;
151 dst->n_chars = src->n_chars;
152 dst->flags = src->flags;
153 dst->type = src->type;
154 memcpy((char*)dst->buf, src->buf, src->n_bytes + 1);
155 } else {
156 dst->type = SERD_NOTHING;
157 }
158 }
159
160 static inline size_t
sink(const void * buf,size_t len,SerdWriter * writer)161 sink(const void* buf, size_t len, SerdWriter* writer)
162 {
163 return serd_byte_sink_write(buf, len, &writer->byte_sink);
164 }
165
166 // Write a single character, as an escape for single byte characters
167 // (Caller prints any single byte characters that don't need escaping)
168 static size_t
write_character(SerdWriter * writer,const uint8_t * utf8,size_t * size)169 write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size)
170 {
171 char escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
172 const uint32_t c = parse_utf8_char(utf8, size);
173 switch (*size) {
174 case 0:
175 w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]);
176 return sink(replacement_char, sizeof(replacement_char), writer);
177 case 1:
178 snprintf(escape, sizeof(escape), "\\u%04X", utf8[0]);
179 return sink(escape, 6, writer);
180 default:
181 break;
182 }
183
184 if (!(writer->style & SERD_STYLE_ASCII)) {
185 // Write UTF-8 character directly to UTF-8 output
186 return sink(utf8, *size, writer);
187 }
188
189 if (c <= 0xFFFF) {
190 snprintf(escape, sizeof(escape), "\\u%04X", c);
191 return sink(escape, 6, writer);
192 } else {
193 snprintf(escape, sizeof(escape), "\\U%08X", c);
194 return sink(escape, 10, writer);
195 }
196 }
197
198 static inline bool
uri_must_escape(const uint8_t c)199 uri_must_escape(const uint8_t c)
200 {
201 switch (c) {
202 case ' ': case '"': case '<': case '>': case '\\':
203 case '^': case '`': case '{': case '|': case '}':
204 return true;
205 default:
206 return !in_range(c, 0x20, 0x7E);
207 }
208 }
209
210 static size_t
write_uri(SerdWriter * writer,const uint8_t * utf8,size_t n_bytes)211 write_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes)
212 {
213 size_t len = 0;
214 for (size_t i = 0; i < n_bytes;) {
215 size_t j = i; // Index of next character that must be escaped
216 for (; j < n_bytes; ++j) {
217 if (uri_must_escape(utf8[j])) {
218 break;
219 }
220 }
221
222 // Bulk write all characters up to this special one
223 len += sink(&utf8[i], j - i, writer);
224 if ((i = j) == n_bytes) {
225 break; // Reached end
226 }
227
228 // Write UTF-8 character
229 size_t size = 0;
230 len += write_character(writer, utf8 + i, &size);
231 i += size;
232 if (size == 0) {
233 // Corrupt input, scan to start of next character
234 for (++i; i < n_bytes && (utf8[i] & 0x80); ++i) {}
235 }
236 }
237 return len;
238 }
239
240 static bool
lname_must_escape(const uint8_t c)241 lname_must_escape(const uint8_t c)
242 {
243 /* This arbitrary list of characters, most of which have nothing to do with
244 Turtle, must be handled as special cases here because the RDF and SPARQL
245 WGs are apparently intent on making the once elegant Turtle a baroque
246 and inconsistent mess, throwing elegance and extensibility completely
247 out the window for no good reason.
248
249 Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped
250 in local names, so they are not escaped here. */
251
252 switch (c) {
253 case '\'': case '!': case '#': case '$': case '%': case '&':
254 case '(': case ')': case '*': case '+': case ',': case '/':
255 case ';': case '=': case '?': case '@': case '~':
256 return true;
257 }
258 return false;
259 }
260
261 static size_t
write_lname(SerdWriter * writer,const uint8_t * utf8,size_t n_bytes)262 write_lname(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes)
263 {
264 size_t len = 0;
265 for (size_t i = 0; i < n_bytes; ++i) {
266 size_t j = i; // Index of next character that must be escaped
267 for (; j < n_bytes; ++j) {
268 if (lname_must_escape(utf8[j])) {
269 break;
270 }
271 }
272
273 // Bulk write all characters up to this special one
274 len += sink(&utf8[i], j - i, writer);
275 if ((i = j) == n_bytes) {
276 break; // Reached end
277 }
278
279 // Write escape
280 len += sink("\\", 1, writer);
281 len += sink(&utf8[i], 1, writer);
282 }
283 return len;
284 }
285
286 static size_t
write_text(SerdWriter * writer,TextContext ctx,const uint8_t * utf8,size_t n_bytes)287 write_text(SerdWriter* writer, TextContext ctx,
288 const uint8_t* utf8, size_t n_bytes)
289 {
290 size_t len = 0;
291 for (size_t i = 0; i < n_bytes;) {
292 // Fast bulk write for long strings of printable ASCII
293 size_t j = i;
294 for (; j < n_bytes; ++j) {
295 if (utf8[j] == '\\' || utf8[j] == '"'
296 || (!in_range(utf8[j], 0x20, 0x7E))) {
297 break;
298 }
299 }
300
301 len += sink(&utf8[i], j - i, writer);
302 if ((i = j) == n_bytes) {
303 break; // Reached end
304 }
305
306 const uint8_t in = utf8[i++];
307 if (ctx == WRITE_LONG_STRING) {
308 switch (in) {
309 case '\\': len += sink("\\\\", 2, writer); continue;
310 case '\b': len += sink("\\b", 2, writer); continue;
311 case '\n': case '\r': case '\t': case '\f':
312 len += sink(&in, 1, writer); // Write character as-is
313 continue;
314 case '\"':
315 if (i == n_bytes) { // '"' at string end
316 len += sink("\\\"", 2, writer);
317 } else {
318 len += sink(&in, 1, writer);
319 }
320 continue;
321 default: break;
322 }
323 } else if (ctx == WRITE_STRING) {
324 switch (in) {
325 case '\\': len += sink("\\\\", 2, writer); continue;
326 case '\n': len += sink("\\n", 2, writer); continue;
327 case '\r': len += sink("\\r", 2, writer); continue;
328 case '\t': len += sink("\\t", 2, writer); continue;
329 case '"': len += sink("\\\"", 2, writer); continue;
330 default: break;
331 }
332 if (writer->syntax == SERD_TURTLE) {
333 switch (in) {
334 case '\b': len += sink("\\b", 2, writer); continue;
335 case '\f': len += sink("\\f", 2, writer); continue;
336 }
337 }
338 }
339
340 // Write UTF-8 character
341 size_t size = 0;
342 len += write_character(writer, utf8 + i - 1, &size);
343 if (size == 0) {
344 // Corrupt input, scan to start of next character
345 for (; i < n_bytes && (utf8[i] & 0x80); ++i) {}
346 } else {
347 i += size - 1;
348 }
349 }
350 return len;
351 }
352
353 static size_t
uri_sink(const void * buf,size_t len,void * stream)354 uri_sink(const void* buf, size_t len, void* stream)
355 {
356 return write_uri((SerdWriter*)stream, (const uint8_t*)buf, len);
357 }
358
359 static void
write_newline(SerdWriter * writer)360 write_newline(SerdWriter* writer)
361 {
362 sink("\n", 1, writer);
363 for (unsigned i = 0; i < writer->indent; ++i) {
364 sink("\t", 1, writer);
365 }
366 }
367
368 static bool
write_sep(SerdWriter * writer,const Sep sep)369 write_sep(SerdWriter* writer, const Sep sep)
370 {
371 const SepRule* rule = &rules[sep];
372 if (rule->space_before) {
373 write_newline(writer);
374 }
375 if (rule->str) {
376 sink(rule->str, rule->len, writer);
377 }
378 if ((writer->last_sep && rule->space_after_sep) ||
379 (!writer->last_sep && rule->space_after_node)) {
380 write_newline(writer);
381 } else if (writer->last_sep && rule->space_after_node) {
382 sink(" ", 1, writer);
383 }
384 writer->last_sep = sep;
385 return true;
386 }
387
388 static SerdStatus
reset_context(SerdWriter * writer,bool graph)389 reset_context(SerdWriter* writer, bool graph)
390 {
391 if (graph) {
392 writer->context.graph.type = SERD_NOTHING;
393 }
394 writer->context.subject.type = SERD_NOTHING;
395 writer->context.predicate.type = SERD_NOTHING;
396 writer->empty = false;
397 return SERD_SUCCESS;
398 }
399
400 static SerdStatus
free_context(SerdWriter * writer)401 free_context(SerdWriter* writer)
402 {
403 serd_node_free(&writer->context.graph);
404 serd_node_free(&writer->context.subject);
405 serd_node_free(&writer->context.predicate);
406 return reset_context(writer, true);
407 }
408
409 static bool
is_inline_start(const SerdWriter * writer,Field field,SerdStatementFlags flags)410 is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags)
411 {
412 return (supports_abbrev(writer) &&
413 ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) ||
414 (field == FIELD_OBJECT && (flags & SERD_ANON_O_BEGIN))));
415 }
416
417 static bool
write_literal(SerdWriter * writer,const SerdNode * node,const SerdNode * datatype,const SerdNode * lang,SerdStatementFlags flags)418 write_literal(SerdWriter* writer,
419 const SerdNode* node,
420 const SerdNode* datatype,
421 const SerdNode* lang,
422 SerdStatementFlags flags)
423 {
424 if (supports_abbrev(writer) && datatype && datatype->buf) {
425 const char* type_uri = (const char*)datatype->buf;
426 if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && (
427 !strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") ||
428 !strcmp(type_uri + sizeof(NS_XSD) - 1, "integer"))) {
429 sink(node->buf, node->n_bytes, writer);
430 return true;
431 } else if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) &&
432 !strcmp(type_uri + sizeof(NS_XSD) - 1, "decimal") &&
433 strchr((const char*)node->buf, '.') &&
434 node->buf[node->n_bytes - 1] != '.') {
435 /* xsd:decimal literals without trailing digits, e.g. "5.", can
436 not be written bare in Turtle. We could add a 0 which is
437 prettier, but changes the text and breaks round tripping.
438 */
439 sink(node->buf, node->n_bytes, writer);
440 return true;
441 }
442 }
443
444 if (supports_abbrev(writer)
445 && (node->flags & (SERD_HAS_NEWLINE|SERD_HAS_QUOTE))) {
446 sink("\"\"\"", 3, writer);
447 write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes);
448 sink("\"\"\"", 3, writer);
449 } else {
450 sink("\"", 1, writer);
451 write_text(writer, WRITE_STRING, node->buf, node->n_bytes);
452 sink("\"", 1, writer);
453 }
454 if (lang && lang->buf) {
455 sink("@", 1, writer);
456 sink(lang->buf, lang->n_bytes, writer);
457 } else if (datatype && datatype->buf) {
458 sink("^^", 2, writer);
459 return write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags);
460 }
461 return true;
462 }
463
464 // Return true iff `buf` is a valid prefixed name suffix
465 static inline bool
is_name(const uint8_t * buf,const size_t len)466 is_name(const uint8_t* buf, const size_t len)
467 {
468 // TODO: This is more strict than it should be.
469 for (size_t i = 0; i < len; ++i) {
470 if (!(is_alpha(buf[i]) || is_digit(buf[i]))) {
471 return false;
472 }
473 }
474 return true;
475 }
476
477 static bool
write_uri_node(SerdWriter * const writer,const SerdNode * node,const Field field,const SerdStatementFlags flags)478 write_uri_node(SerdWriter* const writer,
479 const SerdNode* node,
480 const Field field,
481 const SerdStatementFlags flags)
482 {
483 SerdNode prefix;
484 SerdChunk suffix;
485
486 if (is_inline_start(writer, field, flags)) {
487 ++writer->indent;
488 write_sep(writer, SEP_ANON_BEGIN);
489 sink("== ", 3, writer);
490 }
491
492 const bool has_scheme = serd_uri_string_has_scheme(node->buf);
493 if (field == FIELD_PREDICATE && supports_abbrev(writer)
494 && !strcmp((const char*)node->buf, NS_RDF "type")) {
495 return sink("a", 1, writer) == 1;
496 } else if (supports_abbrev(writer)
497 && !strcmp((const char*)node->buf, NS_RDF "nil")) {
498 return sink("()", 2, writer) == 2;
499 } else if (has_scheme && (writer->style & SERD_STYLE_CURIED) &&
500 serd_env_qualify(writer->env, node, &prefix, &suffix) &&
501 is_name(suffix.buf, suffix.len)) {
502 write_uri(writer, prefix.buf, prefix.n_bytes);
503 sink(":", 1, writer);
504 write_uri(writer, suffix.buf, suffix.len);
505 return true;
506 }
507
508 write_sep(writer, SEP_URI_BEGIN);
509 if (writer->style & SERD_STYLE_RESOLVED) {
510 SerdURI in_base_uri, uri, abs_uri;
511 serd_env_get_base_uri(writer->env, &in_base_uri);
512 serd_uri_parse(node->buf, &uri);
513 serd_uri_resolve(&uri, &in_base_uri, &abs_uri);
514 bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri);
515 SerdURI* root = rooted ? &writer->root_uri : & writer->base_uri;
516 if (!uri_is_under(&abs_uri, root) ||
517 writer->syntax == SERD_NTRIPLES ||
518 writer->syntax == SERD_NQUADS) {
519 serd_uri_serialise(&abs_uri, uri_sink, writer);
520 } else {
521 serd_uri_serialise_relative(
522 &uri, &writer->base_uri, root, uri_sink, writer);
523 }
524 } else {
525 write_uri(writer, node->buf, node->n_bytes);
526 }
527 write_sep(writer, SEP_URI_END);
528 if (is_inline_start(writer, field, flags)) {
529 sink(" ;", 2, writer);
530 write_newline(writer);
531 }
532 return true;
533 }
534
535 static bool
write_curie(SerdWriter * const writer,const SerdNode * node,const Field field,const SerdStatementFlags flags)536 write_curie(SerdWriter* const writer,
537 const SerdNode* node,
538 const Field field,
539 const SerdStatementFlags flags)
540 {
541 SerdChunk prefix;
542 SerdChunk suffix;
543 SerdStatus st;
544 switch (writer->syntax) {
545 case SERD_NTRIPLES:
546 case SERD_NQUADS:
547 if ((st = serd_env_expand(writer->env, node, &prefix, &suffix))) {
548 w_err(writer, st, "undefined namespace prefix `%s'\n", node->buf);
549 return false;
550 }
551 write_sep(writer, SEP_URI_BEGIN);
552 write_uri(writer, prefix.buf, prefix.len);
553 write_uri(writer, suffix.buf, suffix.len);
554 write_sep(writer, SEP_URI_END);
555 break;
556 case SERD_TURTLE:
557 case SERD_TRIG:
558 if (is_inline_start(writer, field, flags)) {
559 ++writer->indent;
560 write_sep(writer, SEP_ANON_BEGIN);
561 sink("== ", 3, writer);
562 }
563 write_lname(writer, node->buf, node->n_bytes);
564 if (is_inline_start(writer, field, flags)) {
565 sink(" ;", 2, writer);
566 write_newline(writer);
567 }
568 }
569 return true;
570 }
571
572 static bool
write_blank(SerdWriter * const writer,const SerdNode * node,const Field field,const SerdStatementFlags flags)573 write_blank(SerdWriter* const writer,
574 const SerdNode* node,
575 const Field field,
576 const SerdStatementFlags flags)
577 {
578 if (supports_abbrev(writer)) {
579 if (is_inline_start(writer, field, flags)) {
580 ++writer->indent;
581 return write_sep(writer, SEP_ANON_BEGIN);
582 } else if (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN)) {
583 assert(writer->list_depth == 0);
584 copy_node(&writer->list_subj, node);
585 ++writer->list_depth;
586 ++writer->indent;
587 return write_sep(writer, SEP_LIST_BEGIN);
588 } else if (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN)) {
589 ++writer->indent;
590 ++writer->list_depth;
591 return write_sep(writer, SEP_LIST_BEGIN);
592 } else if ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S)) ||
593 (field == FIELD_OBJECT && (flags & SERD_EMPTY_O))) {
594 return sink("[]", 2, writer) == 2;
595 }
596 }
597
598 sink("_:", 2, writer);
599 if (writer->bprefix && !strncmp((const char*)node->buf,
600 (const char*)writer->bprefix,
601 writer->bprefix_len)) {
602 sink(node->buf + writer->bprefix_len,
603 node->n_bytes - writer->bprefix_len,
604 writer);
605 } else {
606 sink(node->buf, node->n_bytes, writer);
607 }
608
609 return true;
610 }
611
612 static bool
write_node(SerdWriter * writer,const SerdNode * node,const SerdNode * datatype,const SerdNode * lang,Field field,SerdStatementFlags flags)613 write_node(SerdWriter* writer,
614 const SerdNode* node,
615 const SerdNode* datatype,
616 const SerdNode* lang,
617 Field field,
618 SerdStatementFlags flags)
619 {
620 bool ret = false;
621 switch (node->type) {
622 case SERD_LITERAL:
623 ret = write_literal(writer, node, datatype, lang, flags);
624 break;
625 case SERD_URI:
626 ret = write_uri_node(writer, node, field, flags);
627 break;
628 case SERD_CURIE:
629 ret = write_curie(writer, node, field, flags);
630 break;
631 case SERD_BLANK:
632 ret = write_blank(writer, node, field, flags);
633 default: break;
634 }
635 writer->last_sep = SEP_NONE;
636 return ret;
637 }
638
639 static inline bool
is_resource(const SerdNode * node)640 is_resource(const SerdNode* node)
641 {
642 return node->type > SERD_LITERAL;
643 }
644
645 static void
write_pred(SerdWriter * writer,SerdStatementFlags flags,const SerdNode * pred)646 write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred)
647 {
648 write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags);
649 write_sep(writer, SEP_P_O);
650 copy_node(&writer->context.predicate, pred);
651 }
652
653 static bool
write_list_obj(SerdWriter * writer,SerdStatementFlags flags,const SerdNode * predicate,const SerdNode * object,const SerdNode * datatype,const SerdNode * lang)654 write_list_obj(SerdWriter* writer,
655 SerdStatementFlags flags,
656 const SerdNode* predicate,
657 const SerdNode* object,
658 const SerdNode* datatype,
659 const SerdNode* lang)
660 {
661 if (!strcmp((const char*)object->buf, NS_RDF "nil")) {
662 --writer->indent;
663 write_sep(writer, SEP_LIST_END);
664 return true;
665 } else if (!strcmp((const char*)predicate->buf, NS_RDF "first")) {
666 write_sep(writer, SEP_LIST_SEP);
667 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
668 }
669 return false;
670 }
671
672 SerdStatus
serd_writer_write_statement(SerdWriter * writer,SerdStatementFlags flags,const SerdNode * graph,const SerdNode * subject,const SerdNode * predicate,const SerdNode * object,const SerdNode * datatype,const SerdNode * lang)673 serd_writer_write_statement(SerdWriter* writer,
674 SerdStatementFlags flags,
675 const SerdNode* graph,
676 const SerdNode* subject,
677 const SerdNode* predicate,
678 const SerdNode* object,
679 const SerdNode* datatype,
680 const SerdNode* lang)
681 {
682 if (!subject || !predicate || !object
683 || !subject->buf || !predicate->buf || !object->buf
684 || !is_resource(subject) || !is_resource(predicate)) {
685 return SERD_ERR_BAD_ARG;
686 }
687
688 #define TRY(write_result) \
689 if (!(write_result)) { \
690 return SERD_ERR_UNKNOWN; \
691 }
692
693 switch (writer->syntax) {
694 case SERD_NTRIPLES:
695 case SERD_NQUADS:
696 TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags));
697 sink(" ", 1, writer);
698 TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags));
699 sink(" ", 1, writer);
700 TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags));
701 if (writer->syntax == SERD_NQUADS && graph) {
702 sink(" ", 1, writer);
703 TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags));
704 }
705 sink(" .\n", 3, writer);
706 return SERD_SUCCESS;
707 default:
708 break;
709 }
710
711 if ((graph && !serd_node_equals(graph, &writer->context.graph)) ||
712 (!graph && writer->context.graph.type)) {
713 writer->indent = 0;
714 if (writer->context.subject.type) {
715 write_sep(writer, SEP_END_S);
716 }
717 if (writer->context.graph.type) {
718 write_sep(writer, SEP_GRAPH_END);
719 }
720
721 reset_context(writer, true);
722 if (graph) {
723 TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags));
724 ++writer->indent;
725 write_sep(writer, SEP_GRAPH_BEGIN);
726 copy_node(&writer->context.graph, graph);
727 }
728 }
729
730 if ((flags & SERD_LIST_CONT)) {
731 if (write_list_obj(writer, flags, predicate, object, datatype, lang)) {
732 // Reached end of list
733 if (--writer->list_depth == 0 && writer->list_subj.type) {
734 reset_context(writer, false);
735 serd_node_free(&writer->context.subject);
736 writer->context.subject = writer->list_subj;
737 writer->list_subj = SERD_NODE_NULL;
738 }
739 return SERD_SUCCESS;
740 }
741 } else if (serd_node_equals(subject, &writer->context.subject)) {
742 if (serd_node_equals(predicate, &writer->context.predicate)) {
743 // Abbreviate S P
744 if (!(flags & SERD_ANON_O_BEGIN)) {
745 ++writer->indent;
746 }
747 write_sep(writer, SEP_END_O);
748 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
749 if (!(flags & SERD_ANON_O_BEGIN)) {
750 --writer->indent;
751 }
752 } else {
753 // Abbreviate S
754 Sep sep = writer->context.predicate.type ? SEP_END_P : SEP_S_P;
755 write_sep(writer, sep);
756 write_pred(writer, flags, predicate);
757 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
758 }
759 } else {
760 // No abbreviation
761 if (writer->context.subject.type) {
762 assert(writer->indent > 0);
763 --writer->indent;
764 if (serd_stack_is_empty(&writer->anon_stack)) {
765 write_sep(writer, SEP_END_S);
766 }
767 } else if (!writer->empty) {
768 write_sep(writer, SEP_S_P);
769 }
770
771 if (!(flags & SERD_ANON_CONT)) {
772 write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags);
773 ++writer->indent;
774 write_sep(writer, SEP_S_P);
775 } else {
776 ++writer->indent;
777 }
778
779 reset_context(writer, false);
780 copy_node(&writer->context.subject, subject);
781
782 if (!(flags & SERD_LIST_S_BEGIN)) {
783 write_pred(writer, flags, predicate);
784 }
785
786 write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
787 }
788
789 if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) {
790 WriteContext* ctx = (WriteContext*)serd_stack_push(
791 &writer->anon_stack, sizeof(WriteContext));
792 *ctx = writer->context;
793 WriteContext new_context = {
794 serd_node_copy(graph), serd_node_copy(subject), SERD_NODE_NULL };
795 if ((flags & SERD_ANON_S_BEGIN)) {
796 new_context.predicate = serd_node_copy(predicate);
797 }
798 writer->context = new_context;
799 } else {
800 copy_node(&writer->context.graph, graph);
801 copy_node(&writer->context.subject, subject);
802 copy_node(&writer->context.predicate, predicate);
803 }
804
805 return SERD_SUCCESS;
806 }
807
808 SerdStatus
serd_writer_end_anon(SerdWriter * writer,const SerdNode * node)809 serd_writer_end_anon(SerdWriter* writer,
810 const SerdNode* node)
811 {
812 if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) {
813 return SERD_SUCCESS;
814 }
815 if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) {
816 w_err(writer, SERD_ERR_UNKNOWN,
817 "unexpected end of anonymous node\n");
818 return SERD_ERR_UNKNOWN;
819 }
820 --writer->indent;
821 write_sep(writer, SEP_ANON_END);
822 free_context(writer);
823 writer->context = *anon_stack_top(writer);
824 serd_stack_pop(&writer->anon_stack, sizeof(WriteContext));
825 const bool is_subject = serd_node_equals(node, &writer->context.subject);
826 if (is_subject) {
827 copy_node(&writer->context.subject, node);
828 writer->context.predicate.type = SERD_NOTHING;
829 }
830 return SERD_SUCCESS;
831 }
832
833 SerdStatus
serd_writer_finish(SerdWriter * writer)834 serd_writer_finish(SerdWriter* writer)
835 {
836 if (writer->context.subject.type) {
837 write_sep(writer, SEP_END_S);
838 }
839 if (writer->context.graph.type) {
840 write_sep(writer, SEP_GRAPH_END);
841 }
842 serd_byte_sink_flush(&writer->byte_sink);
843 writer->indent = 0;
844 return free_context(writer);
845 }
846
847 SerdWriter*
serd_writer_new(SerdSyntax syntax,SerdStyle style,SerdEnv * env,const SerdURI * base_uri,SerdSink ssink,void * stream)848 serd_writer_new(SerdSyntax syntax,
849 SerdStyle style,
850 SerdEnv* env,
851 const SerdURI* base_uri,
852 SerdSink ssink,
853 void* stream)
854 {
855 const WriteContext context = WRITE_CONTEXT_NULL;
856 SerdWriter* writer = (SerdWriter*)calloc(1, sizeof(SerdWriter));
857 writer->syntax = syntax;
858 writer->style = style;
859 writer->env = env;
860 writer->root_node = SERD_NODE_NULL;
861 writer->root_uri = SERD_URI_NULL;
862 writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL;
863 writer->anon_stack = serd_stack_new(4 * sizeof(WriteContext));
864 writer->context = context;
865 writer->list_subj = SERD_NODE_NULL;
866 writer->empty = true;
867 writer->byte_sink = serd_byte_sink_new(
868 ssink, stream, (style & SERD_STYLE_BULK) ? SERD_PAGE_SIZE : 1);
869 return writer;
870 }
871
872 void
serd_writer_set_error_sink(SerdWriter * writer,SerdErrorSink error_sink,void * error_handle)873 serd_writer_set_error_sink(SerdWriter* writer,
874 SerdErrorSink error_sink,
875 void* error_handle)
876 {
877 writer->error_sink = error_sink;
878 writer->error_handle = error_handle;
879 }
880
881 void
serd_writer_chop_blank_prefix(SerdWriter * writer,const uint8_t * prefix)882 serd_writer_chop_blank_prefix(SerdWriter* writer,
883 const uint8_t* prefix)
884 {
885 free(writer->bprefix);
886 writer->bprefix_len = 0;
887 writer->bprefix = NULL;
888 if (prefix) {
889 writer->bprefix_len = strlen((const char*)prefix);
890 writer->bprefix = (uint8_t*)malloc(writer->bprefix_len + 1);
891 memcpy(writer->bprefix, prefix, writer->bprefix_len + 1);
892 }
893 }
894
895 SerdStatus
serd_writer_set_base_uri(SerdWriter * writer,const SerdNode * uri)896 serd_writer_set_base_uri(SerdWriter* writer,
897 const SerdNode* uri)
898 {
899 if (!serd_env_set_base_uri(writer->env, uri)) {
900 serd_env_get_base_uri(writer->env, &writer->base_uri);
901
902 if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
903 if (writer->context.graph.type || writer->context.subject.type) {
904 sink(" .\n\n", 4, writer);
905 reset_context(writer, true);
906 }
907 sink("@base <", 7, writer);
908 sink(uri->buf, uri->n_bytes, writer);
909 sink("> .\n", 4, writer);
910 }
911 writer->indent = 0;
912 return reset_context(writer, true);
913 }
914 return SERD_ERR_UNKNOWN;
915 }
916
917 SerdStatus
serd_writer_set_root_uri(SerdWriter * writer,const SerdNode * uri)918 serd_writer_set_root_uri(SerdWriter* writer,
919 const SerdNode* uri)
920 {
921 serd_node_free(&writer->root_node);
922 if (uri && uri->buf) {
923 writer->root_node = serd_node_copy(uri);
924 serd_uri_parse(uri->buf, &writer->root_uri);
925 } else {
926 writer->root_node = SERD_NODE_NULL;
927 writer->root_uri = SERD_URI_NULL;
928 }
929 return SERD_SUCCESS;
930 }
931
932 SerdStatus
serd_writer_set_prefix(SerdWriter * writer,const SerdNode * name,const SerdNode * uri)933 serd_writer_set_prefix(SerdWriter* writer,
934 const SerdNode* name,
935 const SerdNode* uri)
936 {
937 if (!serd_env_set_prefix(writer->env, name, uri)) {
938 if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
939 if (writer->context.graph.type || writer->context.subject.type) {
940 sink(" .\n\n", 4, writer);
941 reset_context(writer, true);
942 }
943 sink("@prefix ", 8, writer);
944 sink(name->buf, name->n_bytes, writer);
945 sink(": <", 3, writer);
946 write_uri(writer, uri->buf, uri->n_bytes);
947 sink("> .\n", 4, writer);
948 }
949 writer->indent = 0;
950 return reset_context(writer, true);
951 }
952 return SERD_ERR_UNKNOWN;
953 }
954
955 void
serd_writer_free(SerdWriter * writer)956 serd_writer_free(SerdWriter* writer)
957 {
958 serd_writer_finish(writer);
959 serd_stack_free(&writer->anon_stack);
960 free(writer->bprefix);
961 serd_byte_sink_free(&writer->byte_sink);
962 serd_node_free(&writer->root_node);
963 free(writer);
964 }
965
966 SerdEnv*
serd_writer_get_env(SerdWriter * writer)967 serd_writer_get_env(SerdWriter* writer)
968 {
969 return writer->env;
970 }
971
972 size_t
serd_file_sink(const void * buf,size_t len,void * stream)973 serd_file_sink(const void* buf, size_t len, void* stream)
974 {
975 return fwrite(buf, 1, len, (FILE*)stream);
976 }
977
978 size_t
serd_chunk_sink(const void * buf,size_t len,void * stream)979 serd_chunk_sink(const void* buf, size_t len, void* stream)
980 {
981 SerdChunk* chunk = (SerdChunk*)stream;
982 chunk->buf = (uint8_t*)realloc((uint8_t*)chunk->buf, chunk->len + len);
983 memcpy((uint8_t*)chunk->buf + chunk->len, buf, len);
984 chunk->len += len;
985 return len;
986 }
987
988 uint8_t*
serd_chunk_sink_finish(SerdChunk * stream)989 serd_chunk_sink_finish(SerdChunk* stream)
990 {
991 serd_chunk_sink("", 1, stream);
992 return (uint8_t*)stream->buf;
993 }
994