1 /*
2 Copyright 2011-2016 David Robillard <http://drobilla.net>
3
4 Permission to use, copy, modify, and/or distribute this software for any
5 purpose with or without fee is hereby granted, provided that the above
6 copyright notice and this permission notice appear in all copies.
7
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 // C99
18 #include <assert.h>
19 #include <errno.h>
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24
25 #define ZIX_INLINE
26 #include "zix/digest.c"
27 #include "zix/hash.c"
28 #include "zix/btree.c"
29
30 #include "sord_config.h"
31 #include "sord_internal.h"
32
33 #define SORD_LOG(prefix, ...) fprintf(stderr, "[Sord::" prefix "] " __VA_ARGS__)
34
35 #ifdef SORD_DEBUG_ITER
36 # define SORD_ITER_LOG(...) SORD_LOG("iter", __VA_ARGS__)
37 #else
38 # define SORD_ITER_LOG(...)
39 #endif
40 #ifdef SORD_DEBUG_SEARCH
41 # define SORD_FIND_LOG(...) SORD_LOG("search", __VA_ARGS__)
42 #else
43 # define SORD_FIND_LOG(...)
44 #endif
45 #ifdef SORD_DEBUG_WRITE
46 # define SORD_WRITE_LOG(...) SORD_LOG("write", __VA_ARGS__)
47 #else
48 # define SORD_WRITE_LOG(...)
49 #endif
50
51 #define NUM_ORDERS 12
52 #define STATEMENT_LEN 3
53 #define TUP_LEN (STATEMENT_LEN + 1)
54 #define DEFAULT_ORDER SPO
55 #define DEFAULT_GRAPH_ORDER GSPO
56
57 #define TUP_FMT "(%s %s %s %s)"
58 #define TUP_FMT_ELEM(e) ((e) ? sord_node_get_string(e) : (const uint8_t*)"*")
59 #define TUP_FMT_ARGS(t) \
60 TUP_FMT_ELEM((t)[0]), \
61 TUP_FMT_ELEM((t)[1]), \
62 TUP_FMT_ELEM((t)[2]), \
63 TUP_FMT_ELEM((t)[3])
64
65 #define TUP_S 0
66 #define TUP_P 1
67 #define TUP_O 2
68 #define TUP_G 3
69
70 /** Triple ordering */
71 typedef enum {
72 SPO, ///< Subject, Predicate, Object
73 SOP, ///< Subject, Object, Predicate
74 OPS, ///< Object, Predicate, Subject
75 OSP, ///< Object, Subject, Predicate
76 PSO, ///< Predicate, Subject, Object
77 POS, ///< Predicate, Object, Subject
78 GSPO, ///< Graph, Subject, Predicate, Object
79 GSOP, ///< Graph, Subject, Object, Predicate
80 GOPS, ///< Graph, Object, Predicate, Subject
81 GOSP, ///< Graph, Object, Subject, Predicate
82 GPSO, ///< Graph, Predicate, Subject, Object
83 GPOS ///< Graph, Predicate, Object, Subject
84 } SordOrder;
85
86 #ifdef SORD_DEBUG_SEARCH
87 /** String name of each ordering (array indexed by SordOrder) */
88 static const char* const order_names[NUM_ORDERS] = {
89 "spo", "sop", "ops", "osp", "pso", "pos",
90 "gspo", "gsop", "gops", "gosp", "gpso", "gpos"
91 };
92 #endif
93
94 /**
95 Quads of indices for each order, from most to least significant
96 (array indexed by SordOrder)
97 */
98 static const int orderings[NUM_ORDERS][TUP_LEN] = {
99 { 0, 1, 2, 3 }, { 0, 2, 1, 3 }, // SPO, SOP
100 { 2, 1, 0, 3 }, { 2, 0, 1, 3 }, // OPS, OSP
101 { 1, 0, 2, 3 }, { 1, 2, 0, 3 }, // PSO, POS
102 { 3, 0, 1, 2 }, { 3, 0, 2, 1 }, // GSPO, GSOP
103 { 3, 2, 1, 0 }, { 3, 2, 0, 1 }, // GOPS, GOSP
104 { 3, 1, 0, 2 }, { 3, 1, 2, 0 } // GPSO, GPOS
105 };
106
107 /** World */
108 struct SordWorldImpl {
109 ZixHash* nodes;
110 SerdErrorSink error_sink;
111 void* error_handle;
112 };
113
114 /** Store */
115 struct SordModelImpl {
116 SordWorld* world;
117
118 /** Index for each possible triple ordering (may or may not exist).
119 * Each index is a tree of SordQuad with the appropriate ordering.
120 */
121 ZixBTree* indices[NUM_ORDERS];
122
123 size_t n_quads;
124 size_t n_iters;
125 };
126
127 /** Mode for searching or iteration */
128 typedef enum {
129 ALL, ///< Iterate over entire store
130 SINGLE, ///< Iteration over a single element (exact search)
131 RANGE, ///< Iterate over range with equal prefix
132 FILTER_RANGE, ///< Iterate over range with equal prefix, filtering
133 FILTER_ALL ///< Iterate to end of store, filtering
134 } SearchMode;
135
136 /** Iterator over some range of a store */
137 struct SordIterImpl {
138 const SordModel* sord; ///< Model being iterated over
139 ZixBTreeIter* cur; ///< Current DB cursor
140 SordQuad pat; ///< Pattern (in ordering order)
141 SordOrder order; ///< Store order (which index)
142 SearchMode mode; ///< Iteration mode
143 int n_prefix; ///< Prefix for RANGE and FILTER_RANGE
144 bool end; ///< True iff reached end
145 bool skip_graphs; ///< Iteration should ignore graphs
146 };
147
148 static uint32_t
sord_node_hash(const void * n)149 sord_node_hash(const void* n)
150 {
151 const SordNode* node = (const SordNode*)n;
152 uint32_t hash = zix_digest_start();
153 hash = zix_digest_add(hash, node->node.buf, node->node.n_bytes);
154 hash = zix_digest_add(hash, &node->node.type, sizeof(node->node.type));
155 if (node->node.type == SERD_LITERAL) {
156 hash = zix_digest_add(hash, &node->meta.lit, sizeof(node->meta.lit));
157 }
158 return hash;
159 }
160
161 static bool
sord_node_hash_equal(const void * a,const void * b)162 sord_node_hash_equal(const void* a, const void* b)
163 {
164 const SordNode* a_node = (const SordNode*)a;
165 const SordNode* b_node = (const SordNode*)b;
166 return (a_node == b_node)
167 || ((a_node->node.type == b_node->node.type) &&
168 (a_node->node.type != SERD_LITERAL ||
169 (a_node->meta.lit.datatype == b_node->meta.lit.datatype &&
170 !strncmp(a_node->meta.lit.lang,
171 b_node->meta.lit.lang,
172 sizeof(a_node->meta.lit.lang)))) &&
173 (serd_node_equals(&a_node->node, &b_node->node)));
174 }
175
176 static void
error(SordWorld * world,SerdStatus st,const char * fmt,...)177 error(SordWorld* world, SerdStatus st, const char* fmt, ...)
178 {
179 va_list args;
180 va_start(args, fmt);
181 const SerdError e = { st, NULL, 0, 0, fmt, &args };
182 if (world->error_sink) {
183 world->error_sink(world->error_handle, &e);
184 } else {
185 fprintf(stderr, "error: ");
186 vfprintf(stderr, fmt, args);
187 }
188 va_end(args);
189 }
190
191 SordWorld*
sord_world_new(void)192 sord_world_new(void)
193 {
194 SordWorld* world = (SordWorld*)malloc(sizeof(SordWorld));
195 world->error_sink = NULL;
196 world->error_handle = NULL;
197
198 world->nodes = zix_hash_new(
199 sord_node_hash, sord_node_hash_equal, sizeof(SordNode));
200
201 return world;
202 }
203
204 static void
free_node_entry(void * value,void * user_data)205 free_node_entry(void* value, void* user_data)
206 {
207 SordNode* node = (SordNode*)value;
208 if (node->node.type == SERD_LITERAL) {
209 sord_node_free((SordWorld*)user_data, node->meta.lit.datatype);
210 }
211 free((uint8_t*)node->node.buf);
212 }
213
214 void
sord_world_free(SordWorld * world)215 sord_world_free(SordWorld* world)
216 {
217 zix_hash_foreach(world->nodes, free_node_entry, world);
218 zix_hash_free(world->nodes);
219 free(world);
220 }
221
222 void
sord_world_set_error_sink(SordWorld * world,SerdErrorSink error_sink,void * handle)223 sord_world_set_error_sink(SordWorld* world,
224 SerdErrorSink error_sink,
225 void* handle)
226 {
227 world->error_sink = error_sink;
228 world->error_handle = handle;
229 }
230
231 /** Compare nodes, considering NULL a wildcard match. */
232 static inline int
sord_node_compare(const SordNode * a,const SordNode * b)233 sord_node_compare(const SordNode* a, const SordNode* b)
234 {
235 if (a == b || !a || !b) {
236 return 0; // Exact or wildcard match
237 } else if (a->node.type != b->node.type) {
238 return a->node.type - b->node.type;
239 }
240
241 int cmp = 0;
242 switch (a->node.type) {
243 case SERD_URI:
244 case SERD_BLANK:
245 return strcmp((const char*)a->node.buf, (const char*)b->node.buf);
246 case SERD_LITERAL:
247 cmp = strcmp((const char*)sord_node_get_string(a),
248 (const char*)sord_node_get_string(b));
249 if (cmp == 0) {
250 // Note: Can't use sord_node_compare here since it does wildcards
251 if (!a->meta.lit.datatype || !b->meta.lit.datatype) {
252 cmp = a->meta.lit.datatype - b->meta.lit.datatype;
253 } else {
254 cmp = strcmp((const char*)a->meta.lit.datatype->node.buf,
255 (const char*)b->meta.lit.datatype->node.buf);
256 }
257 }
258 if (cmp == 0) {
259 cmp = strcmp(a->meta.lit.lang, b->meta.lit.lang);
260 }
261 default:
262 break;
263 }
264 return cmp;
265 }
266
267 bool
sord_node_equals(const SordNode * a,const SordNode * b)268 sord_node_equals(const SordNode* a, const SordNode* b)
269 {
270 return a == b; // Nodes are interned
271 }
272
273 /** Return true iff IDs are equivalent, or one is a wildcard */
274 static inline bool
sord_id_match(const SordNode * a,const SordNode * b)275 sord_id_match(const SordNode* a, const SordNode* b)
276 {
277 return !a || !b || (a == b);
278 }
279
280 static inline bool
sord_quad_match_inline(const SordQuad x,const SordQuad y)281 sord_quad_match_inline(const SordQuad x, const SordQuad y)
282 {
283 return sord_id_match(x[0], y[0])
284 && sord_id_match(x[1], y[1])
285 && sord_id_match(x[2], y[2])
286 && sord_id_match(x[3], y[3]);
287 }
288
289 bool
sord_quad_match(const SordQuad x,const SordQuad y)290 sord_quad_match(const SordQuad x, const SordQuad y)
291 {
292 return sord_quad_match_inline(x, y);
293 }
294
295 /**
296 Compare two quad IDs lexicographically.
297 NULL IDs (equal to 0) are treated as wildcards, always less than every
298 other possible ID, except itself.
299 */
300 static int
sord_quad_compare(const void * x_ptr,const void * y_ptr,void * user_data)301 sord_quad_compare(const void* x_ptr, const void* y_ptr, void* user_data)
302 {
303 const int* const ordering = (const int*)user_data;
304 const SordNode*const*const x = (const SordNode*const*)x_ptr;
305 const SordNode*const*const y = (const SordNode*const*)y_ptr;
306
307 for (int i = 0; i < TUP_LEN; ++i) {
308 const int idx = ordering[i];
309 const int cmp = sord_node_compare(x[idx], y[idx]);
310 if (cmp) {
311 return cmp;
312 }
313 }
314
315 return 0;
316 }
317
318 static inline bool
sord_iter_forward(SordIter * iter)319 sord_iter_forward(SordIter* iter)
320 {
321 if (!iter->skip_graphs) {
322 zix_btree_iter_increment(iter->cur);
323 return zix_btree_iter_is_end(iter->cur);
324 }
325
326 SordNode** key = (SordNode**)zix_btree_get(iter->cur);
327 const SordQuad initial = { key[0], key[1], key[2], key[3] };
328 zix_btree_iter_increment(iter->cur);
329 while (!zix_btree_iter_is_end(iter->cur)) {
330 key = (SordNode**)zix_btree_get(iter->cur);
331 for (int i = 0; i < 3; ++i) {
332 if (key[i] != initial[i]) {
333 return false;
334 }
335 }
336
337 zix_btree_iter_increment(iter->cur);
338 }
339
340 return true;
341 }
342
343 /**
344 Seek forward as necessary until `iter` points at a match.
345 @return true iff iterator reached end of valid range.
346 */
347 static inline bool
sord_iter_seek_match(SordIter * iter)348 sord_iter_seek_match(SordIter* iter)
349 {
350 for (iter->end = true;
351 !zix_btree_iter_is_end(iter->cur);
352 sord_iter_forward(iter)) {
353 const SordNode** const key = (const SordNode**)zix_btree_get(iter->cur);
354 if (sord_quad_match_inline(key, iter->pat)) {
355 return (iter->end = false);
356 }
357 }
358 return true;
359 }
360
361 /**
362 Seek forward as necessary until `iter` points at a match, or the prefix
363 no longer matches iter->pat.
364 @return true iff iterator reached end of valid range.
365 */
366 static inline bool
sord_iter_seek_match_range(SordIter * iter)367 sord_iter_seek_match_range(SordIter* iter)
368 {
369 assert(!iter->end);
370
371 do {
372 const SordNode** key = (const SordNode**)zix_btree_get(iter->cur);
373
374 if (sord_quad_match_inline(key, iter->pat)) {
375 return false; // Found match
376 }
377
378 for (int i = 0; i < iter->n_prefix; ++i) {
379 const int idx = orderings[iter->order][i];
380 if (!sord_id_match(key[idx], iter->pat[idx])) {
381 iter->end = true; // Reached end of valid range
382 return true;
383 }
384 }
385 } while (!sord_iter_forward(iter));
386
387 return (iter->end = true); // Reached end
388 }
389
390 static SordIter*
sord_iter_new(const SordModel * sord,ZixBTreeIter * cur,const SordQuad pat,SordOrder order,SearchMode mode,int n_prefix)391 sord_iter_new(const SordModel* sord, ZixBTreeIter* cur, const SordQuad pat,
392 SordOrder order, SearchMode mode, int n_prefix)
393 {
394 SordIter* iter = (SordIter*)malloc(sizeof(SordIter));
395 iter->sord = sord;
396 iter->cur = cur;
397 iter->order = order;
398 iter->mode = mode;
399 iter->n_prefix = n_prefix;
400 iter->end = false;
401 iter->skip_graphs = order < GSPO;
402 for (int i = 0; i < TUP_LEN; ++i) {
403 iter->pat[i] = pat[i];
404 }
405
406 switch (iter->mode) {
407 case ALL:
408 case SINGLE:
409 case RANGE:
410 assert(
411 sord_quad_match_inline((const SordNode**)zix_btree_get(iter->cur),
412 iter->pat));
413 break;
414 case FILTER_RANGE:
415 sord_iter_seek_match_range(iter);
416 break;
417 case FILTER_ALL:
418 sord_iter_seek_match(iter);
419 break;
420 }
421
422 #ifdef SORD_DEBUG_ITER
423 SordQuad value;
424 sord_iter_get(iter, value);
425 SORD_ITER_LOG("New %p pat=" TUP_FMT " cur=" TUP_FMT " end=%d skip=%d\n",
426 (void*)iter, TUP_FMT_ARGS(pat), TUP_FMT_ARGS(value),
427 iter->end, iter->skip_graphs);
428 #endif
429
430 ++((SordModel*)sord)->n_iters;
431 return iter;
432 }
433
434 const SordModel*
sord_iter_get_model(SordIter * iter)435 sord_iter_get_model(SordIter* iter)
436 {
437 return iter->sord;
438 }
439
440 void
sord_iter_get(const SordIter * iter,SordQuad tup)441 sord_iter_get(const SordIter* iter, SordQuad tup)
442 {
443 SordNode** key = (SordNode**)zix_btree_get(iter->cur);
444 for (int i = 0; i < TUP_LEN; ++i) {
445 tup[i] = key[i];
446 }
447 }
448
449 const SordNode*
sord_iter_get_node(const SordIter * iter,SordQuadIndex index)450 sord_iter_get_node(const SordIter* iter, SordQuadIndex index)
451 {
452 return (!sord_iter_end(iter)
453 ? ((SordNode**)zix_btree_get(iter->cur))[index]
454 : NULL);
455 }
456
457 static bool
sord_iter_scan_next(SordIter * iter)458 sord_iter_scan_next(SordIter* iter)
459 {
460 if (iter->end) {
461 return true;
462 }
463
464 const SordNode** key;
465 if (!iter->end) {
466 switch (iter->mode) {
467 case ALL:
468 // At the end if the cursor is (assigned above)
469 break;
470 case SINGLE:
471 iter->end = true;
472 SORD_ITER_LOG("%p reached single end\n", (void*)iter);
473 break;
474 case RANGE:
475 SORD_ITER_LOG("%p range next\n", (void*)iter);
476 // At the end if the MSNs no longer match
477 key = (const SordNode**)zix_btree_get(iter->cur);
478 assert(key);
479 for (int i = 0; i < iter->n_prefix; ++i) {
480 const int idx = orderings[iter->order][i];
481 if (!sord_id_match(key[idx], iter->pat[idx])) {
482 iter->end = true;
483 SORD_ITER_LOG("%p reached non-match end\n", (void*)iter);
484 break;
485 }
486 }
487 break;
488 case FILTER_RANGE:
489 // Seek forward to next match, stopping if prefix changes
490 sord_iter_seek_match_range(iter);
491 break;
492 case FILTER_ALL:
493 // Seek forward to next match
494 sord_iter_seek_match(iter);
495 break;
496 }
497 } else {
498 SORD_ITER_LOG("%p reached index end\n", (void*)iter);
499 }
500
501 if (iter->end) {
502 SORD_ITER_LOG("%p Reached end\n", (void*)iter);
503 return true;
504 } else {
505 #ifdef SORD_DEBUG_ITER
506 SordQuad tup;
507 sord_iter_get(iter, tup);
508 SORD_ITER_LOG("%p Increment to " TUP_FMT "\n",
509 (void*)iter, TUP_FMT_ARGS(tup));
510 #endif
511 return false;
512 }
513 }
514
515 bool
sord_iter_next(SordIter * iter)516 sord_iter_next(SordIter* iter)
517 {
518 if (iter->end) {
519 return true;
520 }
521
522 iter->end = sord_iter_forward(iter);
523 return sord_iter_scan_next(iter);
524 }
525
526 bool
sord_iter_end(const SordIter * iter)527 sord_iter_end(const SordIter* iter)
528 {
529 return !iter || iter->end;
530 }
531
532 void
sord_iter_free(SordIter * iter)533 sord_iter_free(SordIter* iter)
534 {
535 SORD_ITER_LOG("%p Free\n", (void*)iter);
536 if (iter) {
537 --((SordModel*)iter->sord)->n_iters;
538 zix_btree_iter_free(iter->cur);
539 free(iter);
540 }
541 }
542
543 /**
544 Return true iff `sord` has an index for `order`.
545 If `graphs` is true, `order` will be modified to be the
546 corresponding order with a G prepended (so G will be the MSN).
547 */
548 static inline bool
sord_has_index(SordModel * model,SordOrder * order,int * n_prefix,bool graphs)549 sord_has_index(SordModel* model, SordOrder* order, int* n_prefix, bool graphs)
550 {
551 if (graphs) {
552 *order = (SordOrder)(*order + GSPO);
553 *n_prefix += 1;
554 }
555
556 return model->indices[*order];
557 }
558
559 /**
560 Return the best available index for a pattern.
561 @param pat Pattern in standard (S P O G) order
562 @param mode Set to the (best) iteration mode for iterating over results
563 @param n_prefix Set to the length of the range prefix
564 (for `mode` == RANGE and `mode` == FILTER_RANGE)
565 */
566 static inline SordOrder
sord_best_index(SordModel * sord,const SordQuad pat,SearchMode * mode,int * n_prefix)567 sord_best_index(SordModel* sord,
568 const SordQuad pat,
569 SearchMode* mode,
570 int* n_prefix)
571 {
572 const bool graph_search = (pat[TUP_G] != 0);
573
574 const unsigned sig
575 = (pat[0] ? 1 : 0) * 0x100
576 + (pat[1] ? 1 : 0) * 0x010
577 + (pat[2] ? 1 : 0) * 0x001;
578
579 SordOrder good[2] = { (SordOrder)-1, (SordOrder)-1 };
580
581 #define PAT_CASE(sig, m, g0, g1, np) \
582 case sig: \
583 *mode = m; \
584 good[0] = g0; \
585 good[1] = g1; \
586 *n_prefix = np; \
587 break
588
589 // Good orderings that don't require filtering
590 *mode = RANGE;
591 *n_prefix = 0;
592 switch (sig) {
593 case 0x000:
594 assert(graph_search);
595 *mode = RANGE;
596 *n_prefix = 1;
597 return DEFAULT_GRAPH_ORDER;
598 case 0x111:
599 *mode = SINGLE;
600 return graph_search ? DEFAULT_GRAPH_ORDER : DEFAULT_ORDER;
601
602 PAT_CASE(0x001, RANGE, OPS, OSP, 1);
603 PAT_CASE(0x010, RANGE, POS, PSO, 1);
604 PAT_CASE(0x011, RANGE, OPS, POS, 2);
605 PAT_CASE(0x100, RANGE, SPO, SOP, 1);
606 PAT_CASE(0x101, RANGE, SOP, OSP, 2);
607 PAT_CASE(0x110, RANGE, SPO, PSO, 2);
608 }
609
610 if (*mode == RANGE) {
611 if (sord_has_index(sord, &good[0], n_prefix, graph_search)) {
612 return good[0];
613 } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) {
614 return good[1];
615 }
616 }
617
618 // Not so good orderings that require filtering, but can
619 // still be constrained to a range
620 switch (sig) {
621 PAT_CASE(0x011, FILTER_RANGE, OSP, PSO, 1);
622 PAT_CASE(0x101, FILTER_RANGE, SPO, OPS, 1);
623 // SPO is always present, so 0x110 is never reached here
624 default: break;
625 }
626
627 if (*mode == FILTER_RANGE) {
628 if (sord_has_index(sord, &good[0], n_prefix, graph_search)) {
629 return good[0];
630 } else if (sord_has_index(sord, &good[1], n_prefix, graph_search)) {
631 return good[1];
632 }
633 }
634
635 if (graph_search) {
636 *mode = FILTER_RANGE;
637 *n_prefix = 1;
638 return DEFAULT_GRAPH_ORDER;
639 } else {
640 *mode = FILTER_ALL;
641 return DEFAULT_ORDER;
642 }
643 }
644
645 SordModel*
sord_new(SordWorld * world,unsigned indices,bool graphs)646 sord_new(SordWorld* world, unsigned indices, bool graphs)
647 {
648 SordModel* model = (SordModel*)malloc(sizeof(struct SordModelImpl));
649 model->world = world;
650 model->n_quads = 0;
651 model->n_iters = 0;
652
653 for (unsigned i = 0; i < (NUM_ORDERS / 2); ++i) {
654 const int* const ordering = orderings[i];
655 const int* const g_ordering = orderings[i + (NUM_ORDERS / 2)];
656
657 if (indices & (1 << i)) {
658 model->indices[i] = zix_btree_new(
659 sord_quad_compare, (void*)ordering, NULL);
660 if (graphs) {
661 model->indices[i + (NUM_ORDERS / 2)] = zix_btree_new(
662 sord_quad_compare, (void*)g_ordering, NULL);
663 } else {
664 model->indices[i + (NUM_ORDERS / 2)] = NULL;
665 }
666 } else {
667 model->indices[i] = NULL;
668 model->indices[i + (NUM_ORDERS / 2)] = NULL;
669 }
670 }
671
672 if (!model->indices[DEFAULT_ORDER]) {
673 model->indices[DEFAULT_ORDER] = zix_btree_new(
674 sord_quad_compare, (void*)orderings[DEFAULT_ORDER], NULL);
675 }
676 if (graphs && !model->indices[DEFAULT_GRAPH_ORDER]) {
677 model->indices[DEFAULT_GRAPH_ORDER] = zix_btree_new(
678 sord_quad_compare, (void*)orderings[DEFAULT_GRAPH_ORDER], NULL);
679 }
680
681 return model;
682 }
683
684 static void
sord_node_free_internal(SordWorld * world,SordNode * node)685 sord_node_free_internal(SordWorld* world, SordNode* node)
686 {
687 assert(node->refs == 0);
688
689 // Cache pointer to buffer to free after node removal and destruction
690 const uint8_t* const buf = node->node.buf;
691
692 // Remove node from hash (which frees the node)
693 if (zix_hash_remove(world->nodes, node)) {
694 error(world, SERD_ERR_INTERNAL, "failed to remove node from hash\n");
695 }
696
697 // Free buffer
698 free((uint8_t*)buf);
699 }
700
701 static void
sord_add_quad_ref(SordModel * model,const SordNode * node,SordQuadIndex i)702 sord_add_quad_ref(SordModel* model, const SordNode* node, SordQuadIndex i)
703 {
704 if (node) {
705 assert(node->refs > 0);
706 ++((SordNode*)node)->refs;
707 if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) {
708 ++((SordNode*)node)->meta.res.refs_as_obj;
709 }
710 }
711 }
712
713 static void
sord_drop_quad_ref(SordModel * model,const SordNode * node,SordQuadIndex i)714 sord_drop_quad_ref(SordModel* model, const SordNode* node, SordQuadIndex i)
715 {
716 if (!node) {
717 return;
718 }
719
720 assert(node->refs > 0);
721 if (node->node.type != SERD_LITERAL && i == SORD_OBJECT) {
722 assert(node->meta.res.refs_as_obj > 0);
723 --((SordNode*)node)->meta.res.refs_as_obj;
724 }
725 if (--((SordNode*)node)->refs == 0) {
726 sord_node_free_internal(sord_get_world(model), (SordNode*)node);
727 }
728 }
729
730 void
sord_free(SordModel * model)731 sord_free(SordModel* model)
732 {
733 if (!model) {
734 return;
735 }
736
737 // Free nodes
738 SordQuad tup;
739 SordIter* i = sord_begin(model);
740 for (; !sord_iter_end(i); sord_iter_next(i)) {
741 sord_iter_get(i, tup);
742 for (int t = 0; t < TUP_LEN; ++t) {
743 sord_drop_quad_ref(model, tup[t], (SordQuadIndex)t);
744 }
745 }
746 sord_iter_free(i);
747
748 // Free quads
749 ZixBTreeIter* t = zix_btree_begin(model->indices[DEFAULT_ORDER]);
750 for (; !zix_btree_iter_is_end(t); zix_btree_iter_increment(t)) {
751 free(zix_btree_get(t));
752 }
753 zix_btree_iter_free(t);
754
755 // Free indices
756 for (unsigned o = 0; o < NUM_ORDERS; ++o) {
757 if (model->indices[o]) {
758 zix_btree_free(model->indices[o]);
759 }
760 }
761
762 free(model);
763 }
764
765 SordWorld*
sord_get_world(SordModel * model)766 sord_get_world(SordModel* model)
767 {
768 return model->world;
769 }
770
771 size_t
sord_num_quads(const SordModel * model)772 sord_num_quads(const SordModel* model)
773 {
774 return model->n_quads;
775 }
776
777 size_t
sord_num_nodes(const SordWorld * world)778 sord_num_nodes(const SordWorld* world)
779 {
780 return zix_hash_size(world->nodes);
781 }
782
783 SordIter*
sord_begin(const SordModel * model)784 sord_begin(const SordModel* model)
785 {
786 if (sord_num_quads(model) == 0) {
787 return NULL;
788 } else {
789 ZixBTreeIter* cur = zix_btree_begin(model->indices[DEFAULT_ORDER]);
790 SordQuad pat = { 0, 0, 0, 0 };
791 return sord_iter_new(model, cur, pat, DEFAULT_ORDER, ALL, 0);
792 }
793 }
794
795 SordIter*
sord_find(SordModel * model,const SordQuad pat)796 sord_find(SordModel* model, const SordQuad pat)
797 {
798 if (!pat[0] && !pat[1] && !pat[2] && !pat[3]) {
799 return sord_begin(model);
800 }
801
802 SearchMode mode;
803 int n_prefix;
804 const SordOrder index_order = sord_best_index(model, pat, &mode, &n_prefix);
805
806 SORD_FIND_LOG("Find " TUP_FMT " index=%s mode=%d n_prefix=%d\n",
807 TUP_FMT_ARGS(pat), order_names[index_order], mode, n_prefix);
808
809 if (pat[0] && pat[1] && pat[2] && pat[3]) {
810 mode = SINGLE; // No duplicate quads (Sord is a set)
811 }
812
813 ZixBTree* const db = model->indices[index_order];
814 ZixBTreeIter* cur = NULL;
815 zix_btree_lower_bound(db, pat, &cur);
816 if (zix_btree_iter_is_end(cur)) {
817 SORD_FIND_LOG("No match found\n");
818 zix_btree_iter_free(cur);
819 return NULL;
820 }
821 const SordNode** const key = (const SordNode**)zix_btree_get(cur);
822 if (!key || ( (mode == RANGE || mode == SINGLE)
823 && !sord_quad_match_inline(pat, key) )) {
824 SORD_FIND_LOG("No match found\n");
825 zix_btree_iter_free(cur);
826 return NULL;
827 }
828
829 return sord_iter_new(model, cur, pat, index_order, mode, n_prefix);
830 }
831
832 SordIter*
sord_search(SordModel * model,const SordNode * s,const SordNode * p,const SordNode * o,const SordNode * g)833 sord_search(SordModel* model,
834 const SordNode* s,
835 const SordNode* p,
836 const SordNode* o,
837 const SordNode* g)
838 {
839 SordQuad pat = { s, p, o, g };
840 return sord_find(model, pat);
841 }
842
843 SordNode*
sord_get(SordModel * model,const SordNode * s,const SordNode * p,const SordNode * o,const SordNode * g)844 sord_get(SordModel* model,
845 const SordNode* s,
846 const SordNode* p,
847 const SordNode* o,
848 const SordNode* g)
849 {
850 if ((bool)s + (bool)p + (bool)o != 2) {
851 return NULL;
852 }
853
854 SordIter* i = sord_search(model, s, p, o, g);
855 SordNode* ret = NULL;
856 if (!s) {
857 ret = sord_node_copy(sord_iter_get_node(i, SORD_SUBJECT));
858 } else if (!p) {
859 ret = sord_node_copy(sord_iter_get_node(i, SORD_PREDICATE));
860 } else if (!o) {
861 ret = sord_node_copy(sord_iter_get_node(i, SORD_OBJECT));
862 }
863
864 sord_iter_free(i);
865 return ret;
866 }
867
868 bool
sord_ask(SordModel * model,const SordNode * s,const SordNode * p,const SordNode * o,const SordNode * g)869 sord_ask(SordModel* model,
870 const SordNode* s,
871 const SordNode* p,
872 const SordNode* o,
873 const SordNode* g)
874 {
875 SordQuad pat = { s, p, o, g };
876 return sord_contains(model, pat);
877 }
878
879 uint64_t
sord_count(SordModel * model,const SordNode * s,const SordNode * p,const SordNode * o,const SordNode * g)880 sord_count(SordModel* model,
881 const SordNode* s,
882 const SordNode* p,
883 const SordNode* o,
884 const SordNode* g)
885 {
886 SordIter* i = sord_search(model, s, p, o, g);
887 uint64_t n = 0;
888 for (; !sord_iter_end(i); sord_iter_next(i)) {
889 ++n;
890 }
891 sord_iter_free(i);
892 return n;
893 }
894
895 bool
sord_contains(SordModel * model,const SordQuad pat)896 sord_contains(SordModel* model, const SordQuad pat)
897 {
898 SordIter* iter = sord_find(model, pat);
899 bool ret = (iter != NULL);
900 sord_iter_free(iter);
901 return ret;
902 }
903
904 static uint8_t*
sord_strndup(const uint8_t * str,size_t len)905 sord_strndup(const uint8_t* str, size_t len)
906 {
907 uint8_t* dup = (uint8_t*)malloc(len + 1);
908 memcpy(dup, str, len + 1);
909 return dup;
910 }
911
912 SordNodeType
sord_node_get_type(const SordNode * node)913 sord_node_get_type(const SordNode* node)
914 {
915 switch (node->node.type) {
916 case SERD_URI:
917 return SORD_URI;
918 case SERD_BLANK:
919 return SORD_BLANK;
920 default:
921 return SORD_LITERAL;
922 }
923 SORD_UNREACHABLE();
924 }
925
926 const uint8_t*
sord_node_get_string(const SordNode * node)927 sord_node_get_string(const SordNode* node)
928 {
929 return node->node.buf;
930 }
931
932 const uint8_t*
sord_node_get_string_counted(const SordNode * node,size_t * bytes)933 sord_node_get_string_counted(const SordNode* node, size_t* bytes)
934 {
935 *bytes = node->node.n_bytes;
936 return node->node.buf;
937 }
938
939 const uint8_t*
sord_node_get_string_measured(const SordNode * node,size_t * bytes,size_t * chars)940 sord_node_get_string_measured(const SordNode* node,
941 size_t* bytes,
942 size_t* chars)
943 {
944 *bytes = node->node.n_bytes;
945 *chars = node->node.n_chars;
946 return node->node.buf;
947 }
948
949 const char*
sord_node_get_language(const SordNode * node)950 sord_node_get_language(const SordNode* node)
951 {
952 if (node->node.type != SERD_LITERAL || !node->meta.lit.lang[0]) {
953 return NULL;
954 }
955 return node->meta.lit.lang;
956 }
957
958 SordNode*
sord_node_get_datatype(const SordNode * node)959 sord_node_get_datatype(const SordNode* node)
960 {
961 return (node->node.type == SERD_LITERAL) ? node->meta.lit.datatype : NULL;
962 }
963
964 SerdNodeFlags
sord_node_get_flags(const SordNode * node)965 sord_node_get_flags(const SordNode* node)
966 {
967 return node->node.flags;
968 }
969
970 bool
sord_node_is_inline_object(const SordNode * node)971 sord_node_is_inline_object(const SordNode* node)
972 {
973 return (node->node.type == SERD_BLANK) && (node->meta.res.refs_as_obj == 1);
974 }
975
976 static SordNode*
sord_insert_node(SordWorld * world,const SordNode * key,bool copy)977 sord_insert_node(SordWorld* world, const SordNode* key, bool copy)
978 {
979 SordNode* node = NULL;
980 ZixStatus st = zix_hash_insert(world->nodes, key, (const void**)&node);
981 switch (st) {
982 case ZIX_STATUS_EXISTS:
983 ++node->refs;
984 break;
985 case ZIX_STATUS_SUCCESS:
986 assert(node->refs == 1);
987 if (copy) {
988 node->node.buf = sord_strndup(node->node.buf, node->node.n_bytes);
989 }
990 if (node->node.type == SERD_LITERAL) {
991 node->meta.lit.datatype = sord_node_copy(node->meta.lit.datatype);
992 }
993 return node;
994 default:
995 error(world, SERD_ERR_INTERNAL,
996 "error inserting node `%s'\n", key->node.buf);
997 }
998
999 if (!copy) {
1000 // Free the buffer we would have copied if a new node was created
1001 free((uint8_t*)key->node.buf);
1002 }
1003
1004 return node;
1005 }
1006
1007 static SordNode*
sord_new_uri_counted(SordWorld * world,const uint8_t * str,size_t n_bytes,size_t n_chars,bool copy)1008 sord_new_uri_counted(SordWorld* world, const uint8_t* str,
1009 size_t n_bytes, size_t n_chars, bool copy)
1010 {
1011 if (!serd_uri_string_has_scheme(str)) {
1012 error(world, SERD_ERR_BAD_ARG,
1013 "attempt to map invalid URI `%s'\n", str);
1014 return NULL; // Can't intern relative URIs
1015 }
1016
1017 const SordNode key = {
1018 { str, n_bytes, n_chars, 0, SERD_URI }, 1, { { 0 } }
1019 };
1020
1021 return sord_insert_node(world, &key, copy);
1022 }
1023
1024 SordNode*
sord_new_uri(SordWorld * world,const uint8_t * uri)1025 sord_new_uri(SordWorld* world, const uint8_t* uri)
1026 {
1027 const SerdNode node = serd_node_from_string(SERD_URI, uri);
1028 return sord_new_uri_counted(world, uri, node.n_bytes, node.n_chars, true);
1029 }
1030
1031 SordNode*
sord_new_relative_uri(SordWorld * world,const uint8_t * uri,const uint8_t * base_uri)1032 sord_new_relative_uri(SordWorld* world,
1033 const uint8_t* uri,
1034 const uint8_t* base_uri)
1035 {
1036 if (serd_uri_string_has_scheme(uri)) {
1037 return sord_new_uri(world, uri);
1038 }
1039 SerdURI buri = SERD_URI_NULL;
1040 SerdNode base = serd_node_new_uri_from_string(base_uri, NULL, &buri);
1041 SerdNode node = serd_node_new_uri_from_string(uri, &buri, NULL);
1042
1043 SordNode* ret = sord_new_uri_counted(
1044 world, node.buf, node.n_bytes, node.n_chars, false);
1045
1046 serd_node_free(&base);
1047 return ret;
1048 }
1049
1050 static SordNode*
sord_new_blank_counted(SordWorld * world,const uint8_t * str,size_t n_bytes,size_t n_chars)1051 sord_new_blank_counted(SordWorld* world, const uint8_t* str,
1052 size_t n_bytes, size_t n_chars)
1053 {
1054 const SordNode key = {
1055 { str, n_bytes, n_chars, 0, SERD_BLANK }, 1, { { 0 } }
1056 };
1057
1058 return sord_insert_node(world, &key, true);
1059 }
1060
1061 SordNode*
sord_new_blank(SordWorld * world,const uint8_t * str)1062 sord_new_blank(SordWorld* world, const uint8_t* str)
1063 {
1064 const SerdNode node = serd_node_from_string(SERD_URI, str);
1065 return sord_new_blank_counted(world, str, node.n_bytes, node.n_chars);
1066 }
1067
1068 static SordNode*
sord_new_literal_counted(SordWorld * world,SordNode * datatype,const uint8_t * str,size_t n_bytes,size_t n_chars,SerdNodeFlags flags,const char * lang)1069 sord_new_literal_counted(SordWorld* world,
1070 SordNode* datatype,
1071 const uint8_t* str,
1072 size_t n_bytes,
1073 size_t n_chars,
1074 SerdNodeFlags flags,
1075 const char* lang)
1076 {
1077 SordNode key = {
1078 { str, n_bytes, n_chars, flags, SERD_LITERAL }, 1, { { 0 } }
1079 };
1080 key.meta.lit.datatype = sord_node_copy(datatype);
1081 memset(key.meta.lit.lang, 0, sizeof(key.meta.lit.lang));
1082 if (lang) {
1083 strncpy(key.meta.lit.lang, lang, sizeof(key.meta.lit.lang));
1084 }
1085
1086 return sord_insert_node(world, &key, true);
1087 }
1088
1089 SordNode*
sord_new_literal(SordWorld * world,SordNode * datatype,const uint8_t * str,const char * lang)1090 sord_new_literal(SordWorld* world, SordNode* datatype,
1091 const uint8_t* str, const char* lang)
1092 {
1093 SerdNodeFlags flags = 0;
1094 size_t n_bytes = 0;
1095 size_t n_chars = serd_strlen(str, &n_bytes, &flags);
1096 return sord_new_literal_counted(world, datatype,
1097 str, n_bytes, n_chars, flags,
1098 lang);
1099 }
1100
1101 SordNode*
sord_node_from_serd_node(SordWorld * world,SerdEnv * env,const SerdNode * node,const SerdNode * datatype,const SerdNode * lang)1102 sord_node_from_serd_node(SordWorld* world,
1103 SerdEnv* env,
1104 const SerdNode* node,
1105 const SerdNode* datatype,
1106 const SerdNode* lang)
1107 {
1108 if (!node) {
1109 return NULL;
1110 }
1111
1112 SordNode* datatype_node = NULL;
1113 SordNode* ret = NULL;
1114 switch (node->type) {
1115 case SERD_NOTHING:
1116 return NULL;
1117 case SERD_LITERAL:
1118 datatype_node = sord_node_from_serd_node(
1119 world, env, datatype, NULL, NULL),
1120 ret = sord_new_literal_counted(
1121 world,
1122 datatype_node,
1123 node->buf,
1124 node->n_bytes,
1125 node->n_chars,
1126 node->flags,
1127 lang ? (const char*)lang->buf : NULL);
1128 sord_node_free(world, datatype_node);
1129 return ret;
1130 case SERD_URI:
1131 if (serd_uri_string_has_scheme(node->buf)) {
1132 return sord_new_uri_counted(
1133 world, node->buf, node->n_bytes, node->n_chars, true);
1134 } else {
1135 SerdURI base_uri;
1136 serd_env_get_base_uri(env, &base_uri);
1137 SerdURI abs_uri;
1138 SerdNode abs_uri_node = serd_node_new_uri_from_node(
1139 node, &base_uri, &abs_uri);
1140 ret = sord_new_uri_counted(world,
1141 abs_uri_node.buf,
1142 abs_uri_node.n_bytes,
1143 abs_uri_node.n_chars,
1144 true);
1145 serd_node_free(&abs_uri_node);
1146 return ret;
1147 }
1148 case SERD_CURIE: {
1149 SerdChunk uri_prefix;
1150 SerdChunk uri_suffix;
1151 if (serd_env_expand(env, node, &uri_prefix, &uri_suffix)) {
1152 error(world, SERD_ERR_BAD_CURIE,
1153 "failed to expand CURIE `%s'\n", node->buf);
1154 return NULL;
1155 }
1156 const size_t uri_len = uri_prefix.len + uri_suffix.len;
1157 uint8_t* buf = (uint8_t*)malloc(uri_len + 1);
1158 memcpy(buf, uri_prefix.buf, uri_prefix.len);
1159 memcpy(buf + uri_prefix.len, uri_suffix.buf, uri_suffix.len);
1160 buf[uri_len] = '\0';
1161 ret = sord_new_uri_counted(
1162 world, buf, uri_len, serd_strlen(buf, NULL, NULL), false);
1163 return ret;
1164 }
1165 case SERD_BLANK:
1166 return sord_new_blank_counted(
1167 world, node->buf, node->n_bytes, node->n_chars);
1168 }
1169 return NULL;
1170 }
1171
1172 const SerdNode*
sord_node_to_serd_node(const SordNode * node)1173 sord_node_to_serd_node(const SordNode* node)
1174 {
1175 return node ? &node->node : &SERD_NODE_NULL;
1176 }
1177
1178 void
sord_node_free(SordWorld * world,SordNode * node)1179 sord_node_free(SordWorld* world, SordNode* node)
1180 {
1181 if (!node) {
1182 return;
1183 } else if (node->refs == 0) {
1184 error(world, SERD_ERR_BAD_ARG, "attempt to free garbage node\n");
1185 } else if (--node->refs == 0) {
1186 sord_node_free_internal(world, node);
1187 }
1188 }
1189
1190 SordNode*
sord_node_copy(const SordNode * node)1191 sord_node_copy(const SordNode* node)
1192 {
1193 SordNode* copy = (SordNode*)node;
1194 if (copy) {
1195 ++copy->refs;
1196 }
1197 return copy;
1198 }
1199
1200 static inline bool
sord_add_to_index(SordModel * model,const SordNode ** tup,SordOrder order)1201 sord_add_to_index(SordModel* model, const SordNode** tup, SordOrder order)
1202 {
1203 return !zix_btree_insert(model->indices[order], tup);
1204 }
1205
1206 bool
sord_add(SordModel * model,const SordQuad tup)1207 sord_add(SordModel* model, const SordQuad tup)
1208 {
1209 SORD_WRITE_LOG("Add " TUP_FMT "\n", TUP_FMT_ARGS(tup));
1210 if (!tup[0] || !tup[1] || !tup[2]) {
1211 error(model->world, SERD_ERR_BAD_ARG,
1212 "attempt to add quad with NULL field\n");
1213 return false;
1214 } else if (model->n_iters > 0) {
1215 error(model->world, SERD_ERR_BAD_ARG, "added tuple during iteration\n");
1216 }
1217
1218 const SordNode** quad = (const SordNode**)malloc(sizeof(SordQuad));
1219 memcpy(quad, tup, sizeof(SordQuad));
1220
1221 for (unsigned i = 0; i < NUM_ORDERS; ++i) {
1222 if (model->indices[i] && (i < GSPO || tup[3])) {
1223 if (!sord_add_to_index(model, quad, (SordOrder)i)) {
1224 assert(i == 0); // Assuming index coherency
1225 free(quad);
1226 return false; // Quad already stored, do nothing
1227 }
1228 }
1229 }
1230
1231 for (int i = 0; i < TUP_LEN; ++i) {
1232 sord_add_quad_ref(model, tup[i], (SordQuadIndex)i);
1233 }
1234
1235 ++model->n_quads;
1236 return true;
1237 }
1238
1239 void
sord_remove(SordModel * model,const SordQuad tup)1240 sord_remove(SordModel* model, const SordQuad tup)
1241 {
1242 SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup));
1243 if (model->n_iters > 0) {
1244 error(model->world, SERD_ERR_BAD_ARG, "remove with iterator\n");
1245 }
1246
1247 SordNode* quad = NULL;
1248 for (unsigned i = 0; i < NUM_ORDERS; ++i) {
1249 if (model->indices[i] && (i < GSPO || tup[3])) {
1250 if (zix_btree_remove(model->indices[i], tup, (void**)&quad, NULL)) {
1251 assert(i == 0); // Assuming index coherency
1252 return; // Quad not found, do nothing
1253 }
1254 }
1255 }
1256
1257 free(quad);
1258
1259 for (int i = 0; i < TUP_LEN; ++i) {
1260 sord_drop_quad_ref(model, tup[i], (SordQuadIndex)i);
1261 }
1262
1263 --model->n_quads;
1264 }
1265
1266 SerdStatus
sord_erase(SordModel * model,SordIter * iter)1267 sord_erase(SordModel* model, SordIter* iter)
1268 {
1269 if (model->n_iters > 1) {
1270 error(model->world, SERD_ERR_BAD_ARG, "erased with many iterators\n");
1271 return SERD_ERR_BAD_ARG;
1272 }
1273
1274 SordQuad tup;
1275 sord_iter_get(iter, tup);
1276
1277 SORD_WRITE_LOG("Remove " TUP_FMT "\n", TUP_FMT_ARGS(tup));
1278
1279 SordNode* quad = NULL;
1280 for (unsigned i = 0; i < NUM_ORDERS; ++i) {
1281 if (model->indices[i] && (i < GSPO || tup[3])) {
1282 if (zix_btree_remove(model->indices[i], tup, (void**)&quad,
1283 i == iter->order ? &iter->cur : NULL)) {
1284 return (i == 0) ? SERD_ERR_NOT_FOUND : SERD_ERR_INTERNAL;
1285 }
1286 }
1287 }
1288 iter->end = zix_btree_iter_is_end(iter->cur);
1289 sord_iter_scan_next(iter);
1290
1291 free(quad);
1292
1293 for (int i = 0; i < TUP_LEN; ++i) {
1294 sord_drop_quad_ref(model, tup[i], (SordQuadIndex)i);
1295 }
1296
1297 --model->n_quads;
1298 return SERD_SUCCESS;
1299 }
1300