1 /*
2   Copyright 2011-2017 David Robillard <http://drobilla.net>
3 
4   Permission to use, copy, modify, and/or distribute this software for any
5   purpose with or without fee is hereby granted, provided that the above
6   copyright notice and this permission notice appear in all copies.
7 
8   THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16 
17 #include "serd_internal.h"
18 
19 #include <assert.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 
24 typedef struct {
25 	SerdNode graph;
26 	SerdNode subject;
27 	SerdNode predicate;
28 } WriteContext;
29 
30 static const WriteContext WRITE_CONTEXT_NULL = {
31 	{ 0, 0, 0, 0, SERD_NOTHING },
32 	{ 0, 0, 0, 0, SERD_NOTHING },
33 	{ 0, 0, 0, 0, SERD_NOTHING }
34 };
35 
36 typedef enum {
37 	SEP_NONE,
38 	SEP_END_S,       ///< End of a subject ('.')
39 	SEP_END_P,       ///< End of a predicate (';')
40 	SEP_END_O,       ///< End of an object (',')
41 	SEP_S_P,         ///< Between a subject and predicate (whitespace)
42 	SEP_P_O,         ///< Between a predicate and object (whitespace)
43 	SEP_ANON_BEGIN,  ///< Start of anonymous node ('[')
44 	SEP_ANON_END,    ///< End of anonymous node (']')
45 	SEP_LIST_BEGIN,  ///< Start of list ('(')
46 	SEP_LIST_SEP,    ///< List separator (whitespace)
47 	SEP_LIST_END,    ///< End of list (')')
48 	SEP_GRAPH_BEGIN, ///< Start of graph ('{')
49 	SEP_GRAPH_END,   ///< End of graph ('}')
50 	SEP_URI_BEGIN,   ///< URI start quote ('<')
51 	SEP_URI_END      ///< URI end quote ('>')
52 } Sep;
53 
54 typedef struct {
55 	const char* str;               ///< Sep string
56 	uint8_t     len;               ///< Length of sep string
57 	uint8_t     space_before;      ///< Newline before sep
58 	uint8_t     space_after_node;  ///< Newline after sep if after node
59 	uint8_t     space_after_sep;   ///< Newline after sep if after sep
60 } SepRule;
61 
62 static const SepRule rules[] = {
63 	{ NULL,     0, 0, 0, 0 },
64 	{ " .\n\n", 4, 0, 0, 0 },
65 	{ " ;",     2, 0, 1, 1 },
66 	{ " ,",     2, 0, 1, 0 },
67 	{ NULL,     0, 0, 1, 0 },
68 	{ " ",      1, 0, 0, 0 },
69 	{ "[",      1, 0, 1, 1 },
70 	{ "]",      1, 1, 0, 0 },
71 	{ "(",      1, 0, 0, 0 },
72 	{ NULL,     1, 0, 1, 0 },
73 	{ ")",      1, 1, 0, 0 },
74 	{ " {",     2, 0, 1, 1 },
75 	{ " }",     2, 0, 1, 1 },
76 	{ "<",      1, 0, 0, 0 },
77 	{ ">",      1, 0, 0, 0 },
78 	{ "\n",     1, 0, 1, 0 }
79 };
80 
81 struct SerdWriterImpl {
82 	SerdSyntax    syntax;
83 	SerdStyle     style;
84 	SerdEnv*      env;
85 	SerdNode      root_node;
86 	SerdURI       root_uri;
87 	SerdURI       base_uri;
88 	SerdStack     anon_stack;
89 	SerdByteSink  byte_sink;
90 	SerdErrorSink error_sink;
91 	void*         error_handle;
92 	WriteContext  context;
93 	SerdNode      list_subj;
94 	unsigned      list_depth;
95 	unsigned      indent;
96 	uint8_t*      bprefix;
97 	size_t        bprefix_len;
98 	Sep           last_sep;
99 	bool          empty;
100 };
101 
102 typedef enum {
103 	WRITE_STRING,
104 	WRITE_LONG_STRING
105 } TextContext;
106 
107 static bool
108 write_node(SerdWriter*        writer,
109            const SerdNode*    node,
110            const SerdNode*    datatype,
111            const SerdNode*    lang,
112            Field              field,
113            SerdStatementFlags flags);
114 
115 static bool
supports_abbrev(const SerdWriter * writer)116 supports_abbrev(const SerdWriter* writer)
117 {
118 	return writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG;
119 }
120 
121 static void
w_err(SerdWriter * writer,SerdStatus st,const char * fmt,...)122 w_err(SerdWriter* writer, SerdStatus st, const char* fmt, ...)
123 {
124 	/* TODO: This results in errors with no file information, which is not
125 	   helpful when re-serializing a file (particularly for "undefined
126 	   namespace prefix" errors.  The statement sink API needs to be changed to
127 	   add a Cursor parameter so the source can notify the writer of the
128 	   statement origin for better error reporting. */
129 
130 	va_list args;
131 	va_start(args, fmt);
132 	const SerdError e = { st, NULL, 0, 0, fmt, &args };
133 	serd_error(writer->error_sink, writer->error_handle, &e);
134 	va_end(args);
135 }
136 
137 static inline WriteContext*
anon_stack_top(SerdWriter * writer)138 anon_stack_top(SerdWriter* writer)
139 {
140 	assert(!serd_stack_is_empty(&writer->anon_stack));
141 	return (WriteContext*)(writer->anon_stack.buf
142 	                       + writer->anon_stack.size - sizeof(WriteContext));
143 }
144 
145 static void
copy_node(SerdNode * dst,const SerdNode * src)146 copy_node(SerdNode* dst, const SerdNode* src)
147 {
148 	if (src) {
149 		dst->buf = (uint8_t*)realloc((char*)dst->buf, src->n_bytes + 1);
150 		dst->n_bytes = src->n_bytes;
151 		dst->n_chars = src->n_chars;
152 		dst->flags   = src->flags;
153 		dst->type    = src->type;
154 		memcpy((char*)dst->buf, src->buf, src->n_bytes + 1);
155 	} else {
156 		dst->type = SERD_NOTHING;
157 	}
158 }
159 
160 static inline size_t
sink(const void * buf,size_t len,SerdWriter * writer)161 sink(const void* buf, size_t len, SerdWriter* writer)
162 {
163 	return serd_byte_sink_write(buf, len, &writer->byte_sink);
164 }
165 
166 // Write a single character, as an escape for single byte characters
167 // (Caller prints any single byte characters that don't need escaping)
168 static size_t
write_character(SerdWriter * writer,const uint8_t * utf8,size_t * size)169 write_character(SerdWriter* writer, const uint8_t* utf8, size_t* size)
170 {
171 	char           escape[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
172 	const uint32_t c          = parse_utf8_char(utf8, size);
173 	switch (*size) {
174 	case 0:
175 		w_err(writer, SERD_ERR_BAD_ARG, "invalid UTF-8: %X\n", utf8[0]);
176 		return sink(replacement_char, sizeof(replacement_char), writer);
177 	case 1:
178 		snprintf(escape, sizeof(escape), "\\u%04X", utf8[0]);
179 		return sink(escape, 6, writer);
180 	default:
181 		break;
182 	}
183 
184 	if (!(writer->style & SERD_STYLE_ASCII)) {
185 		// Write UTF-8 character directly to UTF-8 output
186 		return sink(utf8, *size, writer);
187 	}
188 
189 	if (c <= 0xFFFF) {
190 		snprintf(escape, sizeof(escape), "\\u%04X", c);
191 		return sink(escape, 6, writer);
192 	} else {
193 		snprintf(escape, sizeof(escape), "\\U%08X", c);
194 		return sink(escape, 10, writer);
195 	}
196 }
197 
198 static inline bool
uri_must_escape(const uint8_t c)199 uri_must_escape(const uint8_t c)
200 {
201 	switch (c) {
202 	case ' ': case '"': case '<': case '>': case '\\':
203 	case '^': case '`': case '{': case '|': case '}':
204 		return true;
205 	default:
206 		return !in_range(c, 0x20, 0x7E);
207 	}
208 }
209 
210 static size_t
write_uri(SerdWriter * writer,const uint8_t * utf8,size_t n_bytes)211 write_uri(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes)
212 {
213 	size_t len = 0;
214 	for (size_t i = 0; i < n_bytes;) {
215 		size_t j = i;  // Index of next character that must be escaped
216 		for (; j < n_bytes; ++j) {
217 			if (uri_must_escape(utf8[j])) {
218 				break;
219 			}
220 		}
221 
222 		// Bulk write all characters up to this special one
223 		len += sink(&utf8[i], j - i, writer);
224 		if ((i = j) == n_bytes) {
225 			break;  // Reached end
226 		}
227 
228 		// Write UTF-8 character
229 		size_t size = 0;
230 		len += write_character(writer, utf8 + i, &size);
231 		i   += size;
232 		if (size == 0) {
233 			// Corrupt input, scan to start of next character
234 			for (++i; i < n_bytes && (utf8[i] & 0x80); ++i) {}
235 		}
236 	}
237 	return len;
238 }
239 
240 static bool
lname_must_escape(const uint8_t c)241 lname_must_escape(const uint8_t c)
242 {
243 	/* This arbitrary list of characters, most of which have nothing to do with
244 	   Turtle, must be handled as special cases here because the RDF and SPARQL
245 	   WGs are apparently intent on making the once elegant Turtle a baroque
246 	   and inconsistent mess, throwing elegance and extensibility completely
247 	   out the window for no good reason.
248 
249 	   Note '-', '.', and '_' are also in PN_LOCAL_ESC, but are valid unescaped
250 	   in local names, so they are not escaped here. */
251 
252 	switch (c) {
253 	case '\'': case '!': case '#': case '$': case '%': case '&':
254 	case '(': case ')': case '*': case '+': case ',': case '/':
255 	case ';': case '=': case '?': case '@': case '~':
256 		return true;
257 	}
258 	return false;
259 }
260 
261 static size_t
write_lname(SerdWriter * writer,const uint8_t * utf8,size_t n_bytes)262 write_lname(SerdWriter* writer, const uint8_t* utf8, size_t n_bytes)
263 {
264 	size_t len = 0;
265 	for (size_t i = 0; i < n_bytes; ++i) {
266 		size_t j = i;  // Index of next character that must be escaped
267 		for (; j < n_bytes; ++j) {
268 			if (lname_must_escape(utf8[j])) {
269 				break;
270 			}
271 		}
272 
273 		// Bulk write all characters up to this special one
274 		len += sink(&utf8[i], j - i, writer);
275 		if ((i = j) == n_bytes) {
276 			break;  // Reached end
277 		}
278 
279 		// Write escape
280 		len += sink("\\", 1, writer);
281 		len += sink(&utf8[i], 1, writer);
282 	}
283 	return len;
284 }
285 
286 static size_t
write_text(SerdWriter * writer,TextContext ctx,const uint8_t * utf8,size_t n_bytes)287 write_text(SerdWriter* writer, TextContext ctx,
288            const uint8_t* utf8, size_t n_bytes)
289 {
290 	size_t len = 0;
291 	for (size_t i = 0; i < n_bytes;) {
292 		// Fast bulk write for long strings of printable ASCII
293 		size_t j = i;
294 		for (; j < n_bytes; ++j) {
295 			if (utf8[j] == '\\' || utf8[j] == '"'
296 			    || (!in_range(utf8[j], 0x20, 0x7E))) {
297 				break;
298 			}
299 		}
300 
301 		len += sink(&utf8[i], j - i, writer);
302 		if ((i = j) == n_bytes) {
303 			break;  // Reached end
304 		}
305 
306 		const uint8_t in = utf8[i++];
307 		if (ctx == WRITE_LONG_STRING) {
308 			switch (in) {
309 			case '\\': len += sink("\\\\", 2, writer); continue;
310 			case '\b': len += sink("\\b", 2, writer);  continue;
311 			case '\n': case '\r': case '\t': case '\f':
312 				len += sink(&in, 1, writer);  // Write character as-is
313 				continue;
314 			case '\"':
315 				if (i == n_bytes) {  // '"' at string end
316 					len += sink("\\\"", 2, writer);
317 				} else {
318 					len += sink(&in, 1, writer);
319 				}
320 				continue;
321 			default: break;
322 			}
323 		} else if (ctx == WRITE_STRING) {
324 			switch (in) {
325 			case '\\': len += sink("\\\\", 2, writer); continue;
326 			case '\n': len += sink("\\n", 2, writer);  continue;
327 			case '\r': len += sink("\\r", 2, writer);  continue;
328 			case '\t': len += sink("\\t", 2, writer);  continue;
329 			case '"':  len += sink("\\\"", 2, writer); continue;
330 			default: break;
331 			}
332 			if (writer->syntax == SERD_TURTLE) {
333 				switch (in) {
334 				case '\b': len += sink("\\b", 2, writer); continue;
335 				case '\f': len += sink("\\f", 2, writer); continue;
336 				}
337 			}
338 		}
339 
340 		// Write UTF-8 character
341 		size_t size = 0;
342 		len += write_character(writer, utf8 + i - 1, &size);
343 		if (size == 0) {
344 			// Corrupt input, scan to start of next character
345 			for (; i < n_bytes && (utf8[i] & 0x80); ++i) {}
346 		} else {
347 			i += size - 1;
348 		}
349 	}
350 	return len;
351 }
352 
353 static size_t
uri_sink(const void * buf,size_t len,void * stream)354 uri_sink(const void* buf, size_t len, void* stream)
355 {
356 	return write_uri((SerdWriter*)stream, (const uint8_t*)buf, len);
357 }
358 
359 static void
write_newline(SerdWriter * writer)360 write_newline(SerdWriter* writer)
361 {
362 	sink("\n", 1, writer);
363 	for (unsigned i = 0; i < writer->indent; ++i) {
364 		sink("\t", 1, writer);
365 	}
366 }
367 
368 static bool
write_sep(SerdWriter * writer,const Sep sep)369 write_sep(SerdWriter* writer, const Sep sep)
370 {
371 	const SepRule* rule = &rules[sep];
372 	if (rule->space_before) {
373 		write_newline(writer);
374 	}
375 	if (rule->str) {
376 		sink(rule->str, rule->len, writer);
377 	}
378 	if ((writer->last_sep && rule->space_after_sep) ||
379 	    (!writer->last_sep && rule->space_after_node)) {
380 		write_newline(writer);
381 	} else if (writer->last_sep && rule->space_after_node) {
382 		sink(" ", 1, writer);
383 	}
384 	writer->last_sep = sep;
385 	return true;
386 }
387 
388 static SerdStatus
reset_context(SerdWriter * writer,bool graph)389 reset_context(SerdWriter* writer, bool graph)
390 {
391 	if (graph) {
392 		writer->context.graph.type = SERD_NOTHING;
393 	}
394 	writer->context.subject.type   = SERD_NOTHING;
395 	writer->context.predicate.type = SERD_NOTHING;
396 	writer->empty                  = false;
397 	return SERD_SUCCESS;
398 }
399 
400 static SerdStatus
free_context(SerdWriter * writer)401 free_context(SerdWriter* writer)
402 {
403 	serd_node_free(&writer->context.graph);
404 	serd_node_free(&writer->context.subject);
405 	serd_node_free(&writer->context.predicate);
406 	return reset_context(writer, true);
407 }
408 
409 static bool
is_inline_start(const SerdWriter * writer,Field field,SerdStatementFlags flags)410 is_inline_start(const SerdWriter* writer, Field field, SerdStatementFlags flags)
411 {
412 	return (supports_abbrev(writer) &&
413 	        ((field == FIELD_SUBJECT && (flags & SERD_ANON_S_BEGIN)) ||
414 	         (field == FIELD_OBJECT &&  (flags & SERD_ANON_O_BEGIN))));
415 }
416 
417 static bool
write_literal(SerdWriter * writer,const SerdNode * node,const SerdNode * datatype,const SerdNode * lang,SerdStatementFlags flags)418 write_literal(SerdWriter*        writer,
419               const SerdNode*    node,
420               const SerdNode*    datatype,
421               const SerdNode*    lang,
422               SerdStatementFlags flags)
423 {
424 	if (supports_abbrev(writer) && datatype && datatype->buf) {
425 		const char* type_uri = (const char*)datatype->buf;
426 		if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) && (
427 			    !strcmp(type_uri + sizeof(NS_XSD) - 1, "boolean") ||
428 			    !strcmp(type_uri + sizeof(NS_XSD) - 1, "integer"))) {
429 			sink(node->buf, node->n_bytes, writer);
430 			return true;
431 		} else if (!strncmp(type_uri, NS_XSD, sizeof(NS_XSD) - 1) &&
432 		           !strcmp(type_uri + sizeof(NS_XSD) - 1, "decimal") &&
433 		           strchr((const char*)node->buf, '.') &&
434 		           node->buf[node->n_bytes - 1] != '.') {
435 			/* xsd:decimal literals without trailing digits, e.g. "5.", can
436 			   not be written bare in Turtle.  We could add a 0 which is
437 			   prettier, but changes the text and breaks round tripping.
438 			*/
439 			sink(node->buf, node->n_bytes, writer);
440 			return true;
441 		}
442 	}
443 
444 	if (supports_abbrev(writer)
445 	    && (node->flags & (SERD_HAS_NEWLINE|SERD_HAS_QUOTE))) {
446 		sink("\"\"\"", 3, writer);
447 		write_text(writer, WRITE_LONG_STRING, node->buf, node->n_bytes);
448 		sink("\"\"\"", 3, writer);
449 	} else {
450 		sink("\"", 1, writer);
451 		write_text(writer, WRITE_STRING, node->buf, node->n_bytes);
452 		sink("\"", 1, writer);
453 	}
454 	if (lang && lang->buf) {
455 		sink("@", 1, writer);
456 		sink(lang->buf, lang->n_bytes, writer);
457 	} else if (datatype && datatype->buf) {
458 		sink("^^", 2, writer);
459 		return write_node(writer, datatype, NULL, NULL, FIELD_NONE, flags);
460 	}
461 	return true;
462 }
463 
464 // Return true iff `buf` is a valid prefixed name suffix
465 static inline bool
is_name(const uint8_t * buf,const size_t len)466 is_name(const uint8_t* buf, const size_t len)
467 {
468 	// TODO: This is more strict than it should be.
469 	for (size_t i = 0; i < len; ++i) {
470 		if (!(is_alpha(buf[i]) || is_digit(buf[i]))) {
471 			return false;
472 		}
473 	}
474 	return true;
475 }
476 
477 static bool
write_uri_node(SerdWriter * const writer,const SerdNode * node,const Field field,const SerdStatementFlags flags)478 write_uri_node(SerdWriter* const        writer,
479                const SerdNode*          node,
480                const Field              field,
481                const SerdStatementFlags flags)
482 {
483 	SerdNode  prefix;
484 	SerdChunk suffix;
485 
486 	if (is_inline_start(writer, field, flags)) {
487 		++writer->indent;
488 		write_sep(writer, SEP_ANON_BEGIN);
489 		sink("== ", 3, writer);
490 	}
491 
492 	const bool has_scheme = serd_uri_string_has_scheme(node->buf);
493 	if (field == FIELD_PREDICATE && supports_abbrev(writer)
494 	    && !strcmp((const char*)node->buf, NS_RDF "type")) {
495 		return sink("a", 1, writer) == 1;
496 	} else if (supports_abbrev(writer)
497 	           && !strcmp((const char*)node->buf, NS_RDF "nil")) {
498 		return sink("()", 2, writer) == 2;
499 	} else if (has_scheme && (writer->style & SERD_STYLE_CURIED) &&
500 	           serd_env_qualify(writer->env, node, &prefix, &suffix) &&
501 	           is_name(suffix.buf, suffix.len)) {
502 		write_uri(writer, prefix.buf, prefix.n_bytes);
503 		sink(":", 1, writer);
504 		write_uri(writer, suffix.buf, suffix.len);
505 		return true;
506 	}
507 
508 	write_sep(writer, SEP_URI_BEGIN);
509 	if (writer->style & SERD_STYLE_RESOLVED) {
510 		SerdURI in_base_uri, uri, abs_uri;
511 		serd_env_get_base_uri(writer->env, &in_base_uri);
512 		serd_uri_parse(node->buf, &uri);
513 		serd_uri_resolve(&uri, &in_base_uri, &abs_uri);
514 		bool rooted = uri_is_under(&writer->base_uri, &writer->root_uri);
515 		SerdURI* root = rooted ? &writer->root_uri : & writer->base_uri;
516 		if (!uri_is_under(&abs_uri, root) ||
517 		    writer->syntax == SERD_NTRIPLES ||
518 		    writer->syntax == SERD_NQUADS) {
519 			serd_uri_serialise(&abs_uri, uri_sink, writer);
520 		} else {
521 			serd_uri_serialise_relative(
522 				&uri, &writer->base_uri, root, uri_sink, writer);
523 		}
524 	} else {
525 		write_uri(writer, node->buf, node->n_bytes);
526 	}
527 	write_sep(writer, SEP_URI_END);
528 	if (is_inline_start(writer, field, flags)) {
529 		sink(" ;", 2, writer);
530 		write_newline(writer);
531 	}
532 	return true;
533 }
534 
535 static bool
write_curie(SerdWriter * const writer,const SerdNode * node,const Field field,const SerdStatementFlags flags)536 write_curie(SerdWriter* const        writer,
537             const SerdNode*          node,
538             const Field              field,
539             const SerdStatementFlags flags)
540 {
541 	SerdChunk  prefix;
542 	SerdChunk  suffix;
543 	SerdStatus st;
544 	switch (writer->syntax) {
545 	case SERD_NTRIPLES:
546 	case SERD_NQUADS:
547 		if ((st = serd_env_expand(writer->env, node, &prefix, &suffix))) {
548 			w_err(writer, st, "undefined namespace prefix `%s'\n", node->buf);
549 			return false;
550 		}
551 		write_sep(writer, SEP_URI_BEGIN);
552 		write_uri(writer, prefix.buf, prefix.len);
553 		write_uri(writer, suffix.buf, suffix.len);
554 		write_sep(writer, SEP_URI_END);
555 		break;
556 	case SERD_TURTLE:
557 	case SERD_TRIG:
558 		if (is_inline_start(writer, field, flags)) {
559 			++writer->indent;
560 			write_sep(writer, SEP_ANON_BEGIN);
561 			sink("== ", 3, writer);
562 		}
563 		write_lname(writer, node->buf, node->n_bytes);
564 		if (is_inline_start(writer, field, flags)) {
565 			sink(" ;", 2, writer);
566 			write_newline(writer);
567 		}
568 	}
569 	return true;
570 }
571 
572 static bool
write_blank(SerdWriter * const writer,const SerdNode * node,const Field field,const SerdStatementFlags flags)573 write_blank(SerdWriter* const        writer,
574             const SerdNode*          node,
575             const Field              field,
576             const SerdStatementFlags flags)
577 {
578 	if (supports_abbrev(writer)) {
579 		if (is_inline_start(writer, field, flags)) {
580 			++writer->indent;
581 			return write_sep(writer, SEP_ANON_BEGIN);
582 		} else if (field == FIELD_SUBJECT && (flags & SERD_LIST_S_BEGIN)) {
583 			assert(writer->list_depth == 0);
584 			copy_node(&writer->list_subj, node);
585 			++writer->list_depth;
586 			++writer->indent;
587 			return write_sep(writer, SEP_LIST_BEGIN);
588 		} else if (field == FIELD_OBJECT && (flags & SERD_LIST_O_BEGIN)) {
589 			++writer->indent;
590 			++writer->list_depth;
591 			return write_sep(writer, SEP_LIST_BEGIN);
592 		} else if ((field == FIELD_SUBJECT && (flags & SERD_EMPTY_S)) ||
593 		           (field == FIELD_OBJECT && (flags & SERD_EMPTY_O))) {
594 			return sink("[]", 2, writer) == 2;
595 		}
596 	}
597 
598 	sink("_:", 2, writer);
599 	if (writer->bprefix && !strncmp((const char*)node->buf,
600 	                                (const char*)writer->bprefix,
601 	                                writer->bprefix_len)) {
602 		sink(node->buf + writer->bprefix_len,
603 		     node->n_bytes - writer->bprefix_len,
604 		     writer);
605 	} else {
606 		sink(node->buf, node->n_bytes, writer);
607 	}
608 
609 	return true;
610 }
611 
612 static bool
write_node(SerdWriter * writer,const SerdNode * node,const SerdNode * datatype,const SerdNode * lang,Field field,SerdStatementFlags flags)613 write_node(SerdWriter*        writer,
614            const SerdNode*    node,
615            const SerdNode*    datatype,
616            const SerdNode*    lang,
617            Field              field,
618            SerdStatementFlags flags)
619 {
620 	bool ret = false;
621 	switch (node->type) {
622 	case SERD_LITERAL:
623 		ret = write_literal(writer, node, datatype, lang, flags);
624 		break;
625 	case SERD_URI:
626 		ret = write_uri_node(writer, node, field, flags);
627 		break;
628 	case SERD_CURIE:
629 		ret = write_curie(writer, node, field, flags);
630 		break;
631 	case SERD_BLANK:
632 		ret = write_blank(writer, node, field, flags);
633 	default: break;
634 	}
635 	writer->last_sep = SEP_NONE;
636 	return ret;
637 }
638 
639 static inline bool
is_resource(const SerdNode * node)640 is_resource(const SerdNode* node)
641 {
642 	return node->type > SERD_LITERAL;
643 }
644 
645 static void
write_pred(SerdWriter * writer,SerdStatementFlags flags,const SerdNode * pred)646 write_pred(SerdWriter* writer, SerdStatementFlags flags, const SerdNode* pred)
647 {
648 	write_node(writer, pred, NULL, NULL, FIELD_PREDICATE, flags);
649 	write_sep(writer, SEP_P_O);
650 	copy_node(&writer->context.predicate, pred);
651 }
652 
653 static bool
write_list_obj(SerdWriter * writer,SerdStatementFlags flags,const SerdNode * predicate,const SerdNode * object,const SerdNode * datatype,const SerdNode * lang)654 write_list_obj(SerdWriter*        writer,
655                SerdStatementFlags flags,
656                const SerdNode*    predicate,
657                const SerdNode*    object,
658                const SerdNode*    datatype,
659                const SerdNode*    lang)
660 {
661 	if (!strcmp((const char*)object->buf, NS_RDF "nil")) {
662 		--writer->indent;
663 		write_sep(writer, SEP_LIST_END);
664 		return true;
665 	} else if (!strcmp((const char*)predicate->buf, NS_RDF "first")) {
666 		write_sep(writer, SEP_LIST_SEP);
667 		write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
668 	}
669 	return false;
670 }
671 
672 SerdStatus
serd_writer_write_statement(SerdWriter * writer,SerdStatementFlags flags,const SerdNode * graph,const SerdNode * subject,const SerdNode * predicate,const SerdNode * object,const SerdNode * datatype,const SerdNode * lang)673 serd_writer_write_statement(SerdWriter*        writer,
674                             SerdStatementFlags flags,
675                             const SerdNode*    graph,
676                             const SerdNode*    subject,
677                             const SerdNode*    predicate,
678                             const SerdNode*    object,
679                             const SerdNode*    datatype,
680                             const SerdNode*    lang)
681 {
682 	if (!subject || !predicate || !object
683 	    || !subject->buf || !predicate->buf || !object->buf
684 	    || !is_resource(subject) || !is_resource(predicate)) {
685 		return SERD_ERR_BAD_ARG;
686 	}
687 
688 #define TRY(write_result) \
689 	if (!(write_result)) { \
690 		return SERD_ERR_UNKNOWN; \
691 	}
692 
693 	switch (writer->syntax) {
694 	case SERD_NTRIPLES:
695 	case SERD_NQUADS:
696 		TRY(write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags));
697 		sink(" ", 1, writer);
698 		TRY(write_node(writer, predicate, NULL, NULL, FIELD_PREDICATE, flags));
699 		sink(" ", 1, writer);
700 		TRY(write_node(writer, object, datatype, lang, FIELD_OBJECT, flags));
701 		if (writer->syntax == SERD_NQUADS && graph) {
702 			sink(" ", 1, writer);
703 			TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags));
704 		}
705 		sink(" .\n", 3, writer);
706 		return SERD_SUCCESS;
707 	default:
708 		break;
709 	}
710 
711 	if ((graph && !serd_node_equals(graph, &writer->context.graph)) ||
712 	    (!graph && writer->context.graph.type)) {
713 		writer->indent = 0;
714 		if (writer->context.subject.type) {
715 			write_sep(writer, SEP_END_S);
716 		}
717 		if (writer->context.graph.type) {
718 			write_sep(writer, SEP_GRAPH_END);
719 		}
720 
721 		reset_context(writer, true);
722 		if (graph) {
723 			TRY(write_node(writer, graph, datatype, lang, FIELD_GRAPH, flags));
724 			++writer->indent;
725 			write_sep(writer, SEP_GRAPH_BEGIN);
726 			copy_node(&writer->context.graph, graph);
727 		}
728 	}
729 
730 	if ((flags & SERD_LIST_CONT)) {
731 		if (write_list_obj(writer, flags, predicate, object, datatype, lang)) {
732 			// Reached end of list
733 			if (--writer->list_depth == 0 && writer->list_subj.type) {
734 				reset_context(writer, false);
735 				serd_node_free(&writer->context.subject);
736 				writer->context.subject = writer->list_subj;
737 				writer->list_subj       = SERD_NODE_NULL;
738 			}
739 			return SERD_SUCCESS;
740 		}
741 	} else if (serd_node_equals(subject, &writer->context.subject)) {
742 		if (serd_node_equals(predicate, &writer->context.predicate)) {
743 			// Abbreviate S P
744 			if (!(flags & SERD_ANON_O_BEGIN)) {
745 				++writer->indent;
746 			}
747 			write_sep(writer, SEP_END_O);
748 			write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
749 			if (!(flags & SERD_ANON_O_BEGIN)) {
750 				--writer->indent;
751 			}
752 		} else {
753 			// Abbreviate S
754 			Sep sep = writer->context.predicate.type ? SEP_END_P : SEP_S_P;
755 			write_sep(writer, sep);
756 			write_pred(writer, flags, predicate);
757 			write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
758 		}
759 	} else {
760 		// No abbreviation
761 		if (writer->context.subject.type) {
762 			assert(writer->indent > 0);
763 			--writer->indent;
764 			if (serd_stack_is_empty(&writer->anon_stack)) {
765 				write_sep(writer, SEP_END_S);
766 			}
767 		} else if (!writer->empty) {
768 			write_sep(writer, SEP_S_P);
769 		}
770 
771 		if (!(flags & SERD_ANON_CONT)) {
772 			write_node(writer, subject, NULL, NULL, FIELD_SUBJECT, flags);
773 			++writer->indent;
774 			write_sep(writer, SEP_S_P);
775 		} else {
776 			++writer->indent;
777 		}
778 
779 		reset_context(writer, false);
780 		copy_node(&writer->context.subject, subject);
781 
782 		if (!(flags & SERD_LIST_S_BEGIN)) {
783 			write_pred(writer, flags, predicate);
784 		}
785 
786 		write_node(writer, object, datatype, lang, FIELD_OBJECT, flags);
787 	}
788 
789 	if (flags & (SERD_ANON_S_BEGIN|SERD_ANON_O_BEGIN)) {
790 		WriteContext* ctx = (WriteContext*)serd_stack_push(
791 			&writer->anon_stack, sizeof(WriteContext));
792 		*ctx = writer->context;
793 		WriteContext new_context = {
794 			serd_node_copy(graph), serd_node_copy(subject), SERD_NODE_NULL };
795 		if ((flags & SERD_ANON_S_BEGIN)) {
796 			new_context.predicate = serd_node_copy(predicate);
797 		}
798 		writer->context = new_context;
799 	} else {
800 		copy_node(&writer->context.graph, graph);
801 		copy_node(&writer->context.subject, subject);
802 		copy_node(&writer->context.predicate, predicate);
803 	}
804 
805 	return SERD_SUCCESS;
806 }
807 
808 SerdStatus
serd_writer_end_anon(SerdWriter * writer,const SerdNode * node)809 serd_writer_end_anon(SerdWriter*     writer,
810                      const SerdNode* node)
811 {
812 	if (writer->syntax == SERD_NTRIPLES || writer->syntax == SERD_NQUADS) {
813 		return SERD_SUCCESS;
814 	}
815 	if (serd_stack_is_empty(&writer->anon_stack) || writer->indent == 0) {
816 		w_err(writer, SERD_ERR_UNKNOWN,
817 		      "unexpected end of anonymous node\n");
818 		return SERD_ERR_UNKNOWN;
819 	}
820 	--writer->indent;
821 	write_sep(writer, SEP_ANON_END);
822 	free_context(writer);
823 	writer->context = *anon_stack_top(writer);
824 	serd_stack_pop(&writer->anon_stack, sizeof(WriteContext));
825 	const bool is_subject = serd_node_equals(node, &writer->context.subject);
826 	if (is_subject) {
827 		copy_node(&writer->context.subject, node);
828 		writer->context.predicate.type = SERD_NOTHING;
829 	}
830 	return SERD_SUCCESS;
831 }
832 
833 SerdStatus
serd_writer_finish(SerdWriter * writer)834 serd_writer_finish(SerdWriter* writer)
835 {
836 	if (writer->context.subject.type) {
837 		write_sep(writer, SEP_END_S);
838 	}
839 	if (writer->context.graph.type) {
840 		write_sep(writer, SEP_GRAPH_END);
841 	}
842 	serd_byte_sink_flush(&writer->byte_sink);
843 	writer->indent = 0;
844 	return free_context(writer);
845 }
846 
847 SerdWriter*
serd_writer_new(SerdSyntax syntax,SerdStyle style,SerdEnv * env,const SerdURI * base_uri,SerdSink ssink,void * stream)848 serd_writer_new(SerdSyntax     syntax,
849                 SerdStyle      style,
850                 SerdEnv*       env,
851                 const SerdURI* base_uri,
852                 SerdSink       ssink,
853                 void*          stream)
854 {
855 	const WriteContext context = WRITE_CONTEXT_NULL;
856 	SerdWriter*        writer  = (SerdWriter*)calloc(1, sizeof(SerdWriter));
857 	writer->syntax       = syntax;
858 	writer->style        = style;
859 	writer->env          = env;
860 	writer->root_node    = SERD_NODE_NULL;
861 	writer->root_uri     = SERD_URI_NULL;
862 	writer->base_uri     = base_uri ? *base_uri : SERD_URI_NULL;
863 	writer->anon_stack   = serd_stack_new(4 * sizeof(WriteContext));
864 	writer->context      = context;
865 	writer->list_subj    = SERD_NODE_NULL;
866 	writer->empty        = true;
867 	writer->byte_sink    = serd_byte_sink_new(
868 		ssink, stream, (style & SERD_STYLE_BULK) ? SERD_PAGE_SIZE : 1);
869 	return writer;
870 }
871 
872 void
serd_writer_set_error_sink(SerdWriter * writer,SerdErrorSink error_sink,void * error_handle)873 serd_writer_set_error_sink(SerdWriter*   writer,
874                            SerdErrorSink error_sink,
875                            void*         error_handle)
876 {
877 	writer->error_sink   = error_sink;
878 	writer->error_handle = error_handle;
879 }
880 
881 void
serd_writer_chop_blank_prefix(SerdWriter * writer,const uint8_t * prefix)882 serd_writer_chop_blank_prefix(SerdWriter*    writer,
883                               const uint8_t* prefix)
884 {
885 	free(writer->bprefix);
886 	writer->bprefix_len = 0;
887 	writer->bprefix     = NULL;
888 	if (prefix) {
889 		writer->bprefix_len = strlen((const char*)prefix);
890 		writer->bprefix     = (uint8_t*)malloc(writer->bprefix_len + 1);
891 		memcpy(writer->bprefix, prefix, writer->bprefix_len + 1);
892 	}
893 }
894 
895 SerdStatus
serd_writer_set_base_uri(SerdWriter * writer,const SerdNode * uri)896 serd_writer_set_base_uri(SerdWriter*     writer,
897                          const SerdNode* uri)
898 {
899 	if (!serd_env_set_base_uri(writer->env, uri)) {
900 		serd_env_get_base_uri(writer->env, &writer->base_uri);
901 
902 		if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
903 			if (writer->context.graph.type || writer->context.subject.type) {
904 				sink(" .\n\n", 4, writer);
905 				reset_context(writer, true);
906 			}
907 			sink("@base <", 7, writer);
908 			sink(uri->buf, uri->n_bytes, writer);
909 			sink("> .\n", 4, writer);
910 		}
911 		writer->indent = 0;
912 		return reset_context(writer, true);
913 	}
914 	return SERD_ERR_UNKNOWN;
915 }
916 
917 SerdStatus
serd_writer_set_root_uri(SerdWriter * writer,const SerdNode * uri)918 serd_writer_set_root_uri(SerdWriter*     writer,
919                          const SerdNode* uri)
920 {
921 	serd_node_free(&writer->root_node);
922 	if (uri && uri->buf) {
923 		writer->root_node = serd_node_copy(uri);
924 		serd_uri_parse(uri->buf, &writer->root_uri);
925 	} else {
926 		writer->root_node = SERD_NODE_NULL;
927 		writer->root_uri  = SERD_URI_NULL;
928 	}
929 	return SERD_SUCCESS;
930 }
931 
932 SerdStatus
serd_writer_set_prefix(SerdWriter * writer,const SerdNode * name,const SerdNode * uri)933 serd_writer_set_prefix(SerdWriter*     writer,
934                        const SerdNode* name,
935                        const SerdNode* uri)
936 {
937 	if (!serd_env_set_prefix(writer->env, name, uri)) {
938 		if (writer->syntax == SERD_TURTLE || writer->syntax == SERD_TRIG) {
939 			if (writer->context.graph.type || writer->context.subject.type) {
940 				sink(" .\n\n", 4, writer);
941 				reset_context(writer, true);
942 			}
943 			sink("@prefix ", 8, writer);
944 			sink(name->buf, name->n_bytes, writer);
945 			sink(": <", 3, writer);
946 			write_uri(writer, uri->buf, uri->n_bytes);
947 			sink("> .\n", 4, writer);
948 		}
949 		writer->indent = 0;
950 		return reset_context(writer, true);
951 	}
952 	return SERD_ERR_UNKNOWN;
953 }
954 
955 void
serd_writer_free(SerdWriter * writer)956 serd_writer_free(SerdWriter* writer)
957 {
958 	serd_writer_finish(writer);
959 	serd_stack_free(&writer->anon_stack);
960 	free(writer->bprefix);
961 	serd_byte_sink_free(&writer->byte_sink);
962 	serd_node_free(&writer->root_node);
963 	free(writer);
964 }
965 
966 SerdEnv*
serd_writer_get_env(SerdWriter * writer)967 serd_writer_get_env(SerdWriter* writer)
968 {
969 	return writer->env;
970 }
971 
972 size_t
serd_file_sink(const void * buf,size_t len,void * stream)973 serd_file_sink(const void* buf, size_t len, void* stream)
974 {
975 	return fwrite(buf, 1, len, (FILE*)stream);
976 }
977 
978 size_t
serd_chunk_sink(const void * buf,size_t len,void * stream)979 serd_chunk_sink(const void* buf, size_t len, void* stream)
980 {
981 	SerdChunk* chunk = (SerdChunk*)stream;
982 	chunk->buf = (uint8_t*)realloc((uint8_t*)chunk->buf, chunk->len + len);
983 	memcpy((uint8_t*)chunk->buf + chunk->len, buf, len);
984 	chunk->len += len;
985 	return len;
986 }
987 
988 uint8_t*
serd_chunk_sink_finish(SerdChunk * stream)989 serd_chunk_sink_finish(SerdChunk* stream)
990 {
991 	serd_chunk_sink("", 1, stream);
992 	return (uint8_t*)stream->buf;
993 }
994