1 /* Part of SWI-Prolog
2
3 Author: Jan Wielemaker
4 E-mail: J.Wielemaker@vu.nl
5 WWW: http://www.swi-prolog.org
6 Copyright (c) 2003-2020, University of Amsterdam
7 VU University Amsterdam
8 All rights reserved.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
12 are met:
13
14 1. Redistributions of source code must retain the above copyright
15 notice, this list of conditions and the following disclaimer.
16
17 2. Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in
19 the documentation and/or other materials provided with the
20 distribution.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #define WITH_PL_MUTEX 1
41
42 #ifdef __WINDOWS__
43 #include <malloc.h>
44 #define inline __inline
45 #ifndef SIZEOF_LONG
46 #define SIZEOF_LONG 4
47 #endif
48 #endif
49
50 #include "rdf_db.h"
51 #include <wctype.h>
52 #include <ctype.h>
53 #include "murmur.h"
54 #include "memory.h"
55 #include "buffer.h"
56 #ifdef WITH_MD5
57 #include "md5.h"
58
59 #undef ERROR /* also in wingdi.h; we do not care */
60 #define ERROR -1
61
62 static void md5_triple(triple *t, md5_byte_t *digest);
63 static void sum_digest(md5_byte_t *digest, md5_byte_t *add);
64 static void dec_digest(md5_byte_t *digest, md5_byte_t *add);
65 static int md5_unify_digest(term_t t, md5_byte_t digest[16]);
66 #endif
67
68 void *
rdf_malloc(rdf_db * db,size_t size)69 rdf_malloc(rdf_db *db, size_t size)
70 { return malloc(size);
71 }
72
73 void
rdf_free(rdf_db * db,void * ptr,size_t size)74 rdf_free(rdf_db *db, void *ptr, size_t size)
75 { free(ptr);
76 }
77
78 static functor_t FUNCTOR_literal1;
79 static functor_t FUNCTOR_literal2;
80 static functor_t FUNCTOR_colon2;
81 static functor_t FUNCTOR_plus2;
82
83 static functor_t FUNCTOR_triples1;
84 static functor_t FUNCTOR_triples2;
85 static functor_t FUNCTOR_resources1;
86 static functor_t FUNCTOR_predicates1;
87 static functor_t FUNCTOR_duplicates1;
88 static functor_t FUNCTOR_lingering1;
89 static functor_t FUNCTOR_literals1;
90 static functor_t FUNCTOR_subject1;
91 static functor_t FUNCTOR_predicate1;
92 static functor_t FUNCTOR_object1;
93 static functor_t FUNCTOR_graph1;
94 static functor_t FUNCTOR_indexed16;
95 static functor_t FUNCTOR_hash_quality1;
96 static functor_t FUNCTOR_hash3;
97 static functor_t FUNCTOR_hash4;
98
99 static functor_t FUNCTOR_exact1;
100 static functor_t FUNCTOR_icase1;
101 static functor_t FUNCTOR_plain1;
102 static functor_t FUNCTOR_substring1;
103 static functor_t FUNCTOR_word1;
104 static functor_t FUNCTOR_prefix1;
105 static functor_t FUNCTOR_like1;
106 static functor_t FUNCTOR_lt1;
107 static functor_t FUNCTOR_le1;
108 static functor_t FUNCTOR_eq1;
109 static functor_t FUNCTOR_between2;
110 static functor_t FUNCTOR_ge1;
111 static functor_t FUNCTOR_gt1;
112
113 static functor_t FUNCTOR_symmetric1;
114 static functor_t FUNCTOR_inverse_of1;
115 static functor_t FUNCTOR_transitive1;
116 static functor_t FUNCTOR_rdf_subject_branch_factor1; /* S --> BF*O */
117 static functor_t FUNCTOR_rdf_object_branch_factor1; /* O --> BF*S */
118 static functor_t FUNCTOR_rdfs_subject_branch_factor1; /* S --> BF*O */
119 static functor_t FUNCTOR_rdfs_object_branch_factor1; /* O --> BF*S */
120
121 static functor_t FUNCTOR_searched_nodes1;
122 static functor_t FUNCTOR_lang2;
123 static functor_t FUNCTOR_type2;
124
125 static functor_t FUNCTOR_gc4;
126 static functor_t FUNCTOR_graphs1;
127
128 static functor_t FUNCTOR_assert4;
129 static functor_t FUNCTOR_retract4;
130 static functor_t FUNCTOR_update5;
131 static functor_t FUNCTOR_new_literal1;
132 static functor_t FUNCTOR_old_literal1;
133 static functor_t FUNCTOR_transaction2;
134 static functor_t FUNCTOR_load2;
135 static functor_t FUNCTOR_begin1;
136 static functor_t FUNCTOR_end1;
137 static functor_t FUNCTOR_create_graph1;
138
139 static atom_t ATOM_user;
140 static atom_t ATOM_exact;
141 static atom_t ATOM_icase;
142 static atom_t ATOM_plain;
143 static atom_t ATOM_prefix;
144 static atom_t ATOM_substring;
145 static atom_t ATOM_word;
146 static atom_t ATOM_like;
147 static atom_t ATOM_error;
148 static atom_t ATOM_begin;
149 static atom_t ATOM_end;
150 static atom_t ATOM_error;
151 static atom_t ATOM_infinite;
152 static atom_t ATOM_snapshot;
153 static atom_t ATOM_true;
154 static atom_t ATOM_size;
155 static atom_t ATOM_optimize_threshold;
156 static atom_t ATOM_average_chain_len;
157 static atom_t ATOM_reset;
158 static atom_t ATOM_lt; /* < */
159 static atom_t ATOM_eq; /* = */
160 static atom_t ATOM_gt; /* > */
161 static atom_t ATOM_XSDString;
162
163 static atom_t ATOM_subPropertyOf;
164 static atom_t ATOM_xsdString;
165 static atom_t ATOM_xsdDouble;
166
167 static predicate_t PRED_call1;
168
169 #define MATCH_EXACT 0x01 /* exact triple match */
170 #define MATCH_SUBPROPERTY 0x02 /* Use subPropertyOf relations */
171 #define MATCH_SRC 0x04 /* Match graph location */
172 #define MATCH_INVERSE 0x08 /* use symmetric match too */
173 #define MATCH_QUAL 0x10 /* Match qualifiers too */
174 #define MATCH_NUMERIC 0x20 /* Match typed objects numerically */
175 #define MATCH_DUPLICATE (MATCH_EXACT|MATCH_QUAL)
176
177 static int match_triples(rdf_db *db, triple *t, triple *p,
178 query *q, unsigned flags);
179 static void unlock_atoms(rdf_db *db, triple *t);
180 static void lock_atoms(rdf_db *db, triple *t);
181 static void unlock_atoms_literal(literal *lit);
182
183 static size_t triple_hash_key(triple *t, int which);
184 static size_t object_hash(triple *t);
185 static void mark_duplicate(rdf_db *db, triple *t, query *q);
186 static void link_triple_hash(rdf_db *db, triple *t);
187 static void free_triple(rdf_db *db, triple *t, int linger);
188
189 static sub_p_matrix *create_reachability_matrix(rdf_db *db,
190 predicate_cloud *cloud,
191 query *q);
192 static void free_reachability_matrix(rdf_db *db, sub_p_matrix *rm);
193 static void gc_is_leaf(rdf_db *db, predicate *p, gen_t gen);
194 static int get_predicate(rdf_db *db, term_t t, predicate **p, query *q);
195 static int get_existing_predicate(rdf_db *db, term_t t, predicate **p);
196 static void free_bitmatrix(rdf_db *db, bitmatrix *bm);
197 static predicate_cloud *new_predicate_cloud(rdf_db *db,
198 predicate **p, size_t count);
199 static int unify_literal(term_t lit, literal *l);
200 static int free_literal(rdf_db *db, literal *lit);
201 static int check_predicate_cloud(predicate_cloud *c);
202 static void invalidate_is_leaf(predicate *p, query *q, int add);
203 static void create_triple_hashes(rdf_db *db, int count, int *ic);
204 static void free_literal_value(rdf_db *db, literal *lit);
205 static void finalize_graph(void *g, void *db);
206
207
208 /*******************************
209 * LOCKING *
210 *******************************/
211
212 static void
INIT_LOCK(rdf_db * db)213 INIT_LOCK(rdf_db *db)
214 { simpleMutexInit(&db->locks.literal);
215 simpleMutexInit(&db->locks.misc);
216 simpleMutexInit(&db->locks.gc);
217 simpleMutexInit(&db->locks.duplicates);
218 simpleMutexInit(&db->locks.erase);
219 simpleMutexInit(&db->locks.prefixes);
220 }
221
222 static simpleMutex rdf_lock;
223
224
225 /*******************************
226 * DEBUG SUPPORT *
227 *******************************/
228
229 #ifdef O_DEBUG
230
231 #define PRT_SRC 0x1 /* print source */
232 #define PRT_NL 0x2 /* add newline */
233 #define PRT_GEN 0x4 /* print generation info */
234 #define PRT_ADR 0x8 /* print address */
235
236 static void
print_literal(literal * lit)237 print_literal(literal *lit)
238 { switch(lit->objtype)
239 { case OBJ_STRING:
240 switch(lit->qualifier)
241 { case Q_TYPE:
242 Sdprintf("%s^^\"%s\"",
243 PL_atom_chars(lit->value.string),
244 PL_atom_chars(ID_ATOM(lit->type_or_lang)));
245 break;
246 case Q_LANG:
247 Sdprintf("%s@\"%s\"",
248 PL_atom_chars(lit->value.string),
249 PL_atom_chars(ID_ATOM(lit->type_or_lang)));
250 break;
251 default:
252 { size_t len;
253 const char *s;
254 const wchar_t *w;
255
256 if ( (s = PL_atom_nchars(lit->value.string, &len)) )
257 { if ( strlen(s) == len )
258 Sdprintf("\"%s\"", s);
259 else
260 Sdprintf("\"%s\" (len=%d)", s, len);
261 } else if ( (w = PL_atom_wchars(lit->value.string, &len)) )
262 { unsigned int i;
263 Sputc('L', Serror);
264 Sputc('"', Serror);
265 for(i=0; i<len; i++)
266 { if ( w[i] < 0x7f )
267 Sputc(w[i], Serror);
268 else
269 Sfprintf(Serror, "\\\\u%04x", w[i]);
270 }
271 Sputc('"', Serror);
272 }
273 break;
274 }
275 }
276 break;
277 case OBJ_INTEGER:
278 Sdprintf("%ld", lit->value.integer);
279 break;
280 case OBJ_DOUBLE:
281 Sdprintf("%f", lit->value.real);
282 break;
283 case OBJ_TERM:
284 { fid_t fid = PL_open_foreign_frame();
285 term_t term = PL_new_term_ref();
286
287 PL_recorded_external(lit->value.term.record, term);
288 PL_write_term(Serror, term, 1200,
289 PL_WRT_QUOTED|PL_WRT_NUMBERVARS|PL_WRT_PORTRAY);
290 PL_discard_foreign_frame(fid);
291 break;
292 }
293 default:
294 assert(0);
295 }
296 }
297
298
299 static void
print_object(triple * t)300 print_object(triple *t)
301 { if ( t->object_is_literal )
302 { print_literal(t->object.literal);
303 } else
304 { Sdprintf("%s", t->object.resource ? PL_atom_chars(t->object.resource) : "?o");
305 }
306 }
307
308
309 static void
print_src(triple * t)310 print_src(triple *t)
311 { if ( t->graph_id )
312 { if ( t->line == NO_LINE )
313 Sdprintf(" [%s]", PL_atom_chars(ID_ATOM(t->graph_id)));
314 else
315 Sdprintf(" [%s:%ld]", PL_atom_chars(ID_ATOM(t->graph_id)), t->line);
316 } else
317 { Sdprintf(" ?g");
318 }
319 }
320
321
322 static char *
triple_status_flags(triple * t,char * buf)323 triple_status_flags(triple *t, char *buf)
324 { char *o = buf;
325
326 *o++ = ' ';
327 if ( t->atoms_locked )
328 *o++ = 'L';
329 if ( t->is_duplicate )
330 *o++ = 'D';
331
332 if ( o > buf+1 )
333 *o = '\0';
334 else
335 buf[0] = '\0';
336
337 return buf;
338 }
339
340
341 static void
print_gen(triple * t)342 print_gen(triple *t)
343 { char buf[3][24];
344
345 Sdprintf(" (%s..%s%s)",
346 gen_name(t->lifespan.born, buf[0]),
347 gen_name(t->lifespan.died, buf[1]),
348 triple_status_flags(t, buf[2]));
349 }
350
351
352 static void
print_triple(triple * t,int flags)353 print_triple(triple *t, int flags)
354 { Sdprintf("<%s %s ",
355 t->subject_id ? PL_atom_chars(ID_ATOM(t->subject_id)) : "?s",
356 t->predicate.r->name ? PL_atom_chars(t->predicate.r->name) : "?p");
357 print_object(t);
358 if ( (flags & PRT_SRC) )
359 print_src(t);
360 if ( (flags & PRT_GEN) )
361 print_gen(t);
362 if ( (flags & PRT_ADR) )
363 Sdprintf(" &%p", t);
364 Sdprintf((flags & PRT_NL) ? ">\n" : ">");
365 }
366
367 #endif
368
369 /*******************************
370 * STORAGE *
371 *******************************/
372
373 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
374 Tables that allow finding the hash-chains for a particular index. They
375 are currently crafted by hand, such that the compiler knowns the mapping
376 is constant. check_index_tables() verifies that the tables are
377 consistent. To add an index:
378
379 * Increment INDEX_TABLES in rdf_db.h
380 * Add the index to col_index[]
381 * Assign it a (consistent) position in index_col[]
382 * If decide wich unindexed queries are best mapped
383 to the new index and add them to alt_index[]
384 * Add entries to col_name[], col_avg_len[], col_opt_threshold[]
385 * Deal with the new index in consider_triple_rehash() and
386 initial_size_triple_hash()
387
388 Make sure you compile with support for assert(). If you make a mistake
389 in the above, you are likely to get an assertion failure. Thanks to
390 Haitao Zhang for debugging these notes.
391 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
392
393 #define ICOL(i) (index_col[i])
394
395 static const int index_col[16] =
396 { 0, /* BY_NONE */
397 1, /* BY_S */
398 2, /* BY_P */
399 3, /* BY_SP */
400 4, /* BY_O */
401 ~0, /* BY_SO */
402 5, /* BY_PO */
403 6, /* BY_SPO */
404
405 7, /* BY_G */
406 8, /* BY_SG */
407 9, /* BY_PG */
408 ~0, /* BY_SPG */
409 ~0, /* BY_OG */
410 ~0, /* BY_SOG */
411 ~0, /* BY_POG */
412 ~0 /* BY_SPOG */
413 };
414
415 static int col_index[INDEX_TABLES] =
416 { BY_NONE,
417 BY_S,
418 BY_P,
419 BY_SP,
420 BY_O,
421 BY_PO,
422 BY_SPO,
423 BY_G,
424 BY_SG,
425 BY_PG
426 };
427
428 static const char *col_name[INDEX_TABLES] =
429 { "-",
430 "s",
431 "p",
432 "sp",
433 "o",
434 "po",
435 "spo",
436 "g",
437 "sg",
438 "pg"
439 };
440
441 static const int col_avg_len[INDEX_TABLES] =
442 { 0, /*BY_NONE*/
443 2, /*BY_S*/
444 2, /*BY_P*/
445 2, /*BY_SP*/
446 4, /*BY_O*/
447 2, /*BY_PO*/
448 2, /*BY_SPO*/
449 1, /*BY_G*/
450 2, /*BY_SG*/
451 2 /*BY_PG*/
452 };
453
454 static const int col_opt_threshold[INDEX_TABLES] =
455 { 0, /*BY_NONE*/
456 2, /*BY_S*/
457 2, /*BY_P*/
458 2, /*BY_SP*/
459 2, /*BY_O*/
460 2, /*BY_PO*/
461 2, /*BY_SPO*/
462 2, /*BY_G*/
463 2, /*BY_SG*/
464 2 /*BY_PG*/
465 };
466
467 static const int alt_index[16] =
468 { BY_NONE, /* BY_NONE */
469 BY_S, /* BY_S */
470 BY_P, /* BY_P */
471 BY_SP, /* BY_SP */
472 BY_O, /* BY_O */
473 BY_S, /* BY_SO */
474 BY_PO, /* BY_PO */
475 BY_SPO, /* BY_SPO */
476
477 BY_G, /* BY_G */
478 BY_SG, /* BY_SG */
479 BY_PG, /* BY_PG */
480 BY_SP, /* BY_SPG */
481 BY_O, /* BY_OG */
482 BY_S, /* BY_SOG */
483 BY_PO, /* BY_POG */
484 BY_SPO /* BY_SPOG */
485 };
486
487
488 static void
check_index_tables(void)489 check_index_tables(void)
490 {
491 #ifndef NDEBUG
492 int i, ic;
493
494 for(i=0; i<16; i++)
495 { if ( (ic=index_col[i]) != ~0 )
496 { assert(col_index[ic] == i);
497 }
498 }
499
500 for(i=0; i<16; i++)
501 { int ai = alt_index[i];
502
503 assert(index_col[ai] != ~0);
504 }
505
506 for(i=0; i<INDEX_TABLES; i++)
507 { ic = col_index[i];
508 assert(alt_index[ic] == ic);
509 }
510 #endif
511 }
512
513
514 /*******************************
515 * LISTS *
516 *******************************/
517
518 static int
add_list(rdf_db * db,list * list,void * value)519 add_list(rdf_db *db, list *list, void *value)
520 { cell *c;
521
522 for(c=list->head; c; c=c->next)
523 { if ( c->value == value )
524 return FALSE; /* already a member */
525 }
526
527 c = rdf_malloc(db, sizeof(*c));
528 c->value = value;
529 c->next = NULL;
530
531 if ( list->tail )
532 list->tail->next = c;
533 else
534 list->head = c;
535
536 list->tail = c;
537
538 return TRUE;
539 }
540
541
542 static int
del_list(rdf_db * db,list * list,void * value)543 del_list(rdf_db *db, list *list, void *value)
544 { cell *c, *p = NULL;
545
546 for(c=list->head; c; p=c, c=c->next)
547 { if ( c->value == value )
548 { if ( p )
549 p->next = c->next;
550 else
551 list->head = c->next;
552
553 if ( !c->next )
554 list->tail = p;
555
556 rdf_free(db, c, sizeof(*c));
557
558 return TRUE;
559 }
560 }
561
562 return FALSE; /* not a member */
563 }
564
565
566 static void
free_list(rdf_db * db,list * list)567 free_list(rdf_db *db, list *list)
568 { cell *c, *n;
569
570 for(c=list->head; c; c=n)
571 { n = c->next;
572 rdf_free(db, c, sizeof(*c));
573 }
574
575 list->head = list->tail = NULL;
576 }
577
578
579 /*******************************
580 * TMP STORE *
581 *******************************/
582
583 static void
init_tmp_store(tmp_store * s)584 init_tmp_store(tmp_store *s)
585 { s->chunks = &s->store0;
586 s->chunks->next = NULL;
587 s->chunks->used = 0;
588 }
589
590
591 static void *
alloc_tmp_store(tmp_store * s,size_t size)592 alloc_tmp_store(tmp_store *s, size_t size)
593 { void *p;
594
595 assert(size < CHUNKSIZE);
596
597 if ( s->chunks->used + size > CHUNKSIZE )
598 { mchunk *ch = malloc(sizeof(mchunk));
599
600 ch->used = 0;
601 ch->next = s->chunks;
602 s->chunks = ch;
603 }
604
605 p = &s->chunks->buf[s->chunks->used];
606 s->chunks->used += size;
607
608 return p;
609 }
610
611
612 static void
destroy_tmp_store(tmp_store * s)613 destroy_tmp_store(tmp_store *s)
614 { mchunk *ch, *next;
615
616 for(ch=s->chunks; ch != &s->store0; ch = next)
617 { next = ch->next;
618 free(ch);
619 }
620 }
621
622
623 /*******************************
624 * ATOM SETS *
625 *******************************/
626
627 #define ATOMSET_INITIAL_ENTRIES 16
628
629 typedef struct atom_cell
630 { struct atom_cell *next;
631 atom_t atom;
632 } atom_cell;
633
634 typedef struct
635 { atom_cell **entries; /* Hash entries */
636 size_t size; /* Hash-table size */
637 size_t count; /* # atoms stored */
638 tmp_store store; /* Temporary storage */
639 atom_cell *entries0[ATOMSET_INITIAL_ENTRIES];
640 } atomset;
641
642
643 static void *
alloc_atomset(atomset * as,size_t size)644 alloc_atomset(atomset *as, size_t size)
645 { return alloc_tmp_store(&as->store, size);
646 }
647
648
649 static void
init_atomset(atomset * as)650 init_atomset(atomset *as)
651 { init_tmp_store(&as->store);
652 memset(as->entries0, 0, sizeof(as->entries0));
653 as->entries = as->entries0;
654 as->size = ATOMSET_INITIAL_ENTRIES;
655 as->count = 0;
656 }
657
658
659 static void
destroy_atomset(atomset * as)660 destroy_atomset(atomset *as)
661 { destroy_tmp_store(&as->store);
662
663 if ( as->entries != as->entries0 )
664 free(as->entries);
665 }
666
667
668 static void
rehash_atom_set(atomset * as)669 rehash_atom_set(atomset *as)
670 { size_t newsize = as->size*2;
671 atom_cell **new = malloc(newsize*sizeof(atom_cell*));
672 int i;
673
674 memset(new, 0, newsize*sizeof(atom_cell*));
675
676 for(i=0; i<as->size; i++)
677 { atom_cell *c, *n;
678
679 for(c=as->entries[i]; c; c=n)
680 { size_t inew = atom_hash(c->atom, MURMUR_SEED)&(newsize-1);
681
682 n = c->next;
683 c->next = new[inew];
684 new[inew] = c;
685 }
686 }
687
688 if ( as->entries == as->entries0 )
689 { as->entries = new;
690 } else
691 { atom_cell **old = as->entries;
692 as->entries = new;
693 free(old);
694 }
695
696 as->size = newsize;
697 }
698
699
700 static int
add_atomset(atomset * as,atom_t atom)701 add_atomset(atomset *as, atom_t atom)
702 { size_t i = atom_hash(atom, MURMUR_SEED)&(as->size-1);
703 atom_cell *c;
704
705 for(c=as->entries[i]; c; c=c->next)
706 { if ( c->atom == atom )
707 return 0;
708 }
709
710 if ( ++as->count > 2*as->size )
711 { rehash_atom_set(as);
712 i = atom_hash(atom, MURMUR_SEED)&(as->size-1);
713 }
714
715 c = alloc_atomset(as, sizeof(*c));
716 c->atom = atom;
717 c->next = as->entries[i];
718 as->entries[i] = c;
719
720 return 1;
721 }
722
723
724 static int
for_atomset(atomset * as,int (* func)(atom_t a,void * closure),void * closure)725 for_atomset(atomset *as,
726 int (*func)(atom_t a, void *closure),
727 void *closure)
728 { int key;
729
730 for(key=0; key < as->size; key++)
731 { atom_cell *c;
732
733 for(c=as->entries[key]; c; c=c->next)
734 { if ( !(*func)(c->atom, closure) )
735 return FALSE;
736 }
737 }
738
739 return TRUE;
740 }
741
742
743 /*******************************
744 * TRIPLE SETS *
745 *******************************/
746
747 /* Note that only ->entries need to be NULL to consider the set empty.
748 The remainder of the initialization is done lazily.
749 */
750
751 static void *
alloc_tripleset(void * ptr,size_t size)752 alloc_tripleset(void *ptr, size_t size)
753 { tripleset *ts = ptr;
754
755 return alloc_tmp_store(&ts->store, size);
756 }
757
758
759 static void
init_tripleset(tripleset * ts)760 init_tripleset(tripleset *ts)
761 { init_tmp_store(&ts->store);
762 memset(ts->entries0, 0, sizeof(ts->entries0));
763 ts->entries = ts->entries0;
764 ts->size = TRIPLESET_INITIAL_ENTRIES;
765 ts->count = 0;
766 }
767
768
769 static void
destroy_tripleset(tripleset * ts)770 destroy_tripleset(tripleset *ts)
771 { if ( ts->entries )
772 { destroy_tmp_store(&ts->store);
773
774 if ( ts->entries != ts->entries0 )
775 free(ts->entries);
776 }
777 }
778
779
780 static void
rehash_triple_set(tripleset * ts)781 rehash_triple_set(tripleset *ts)
782 { size_t newsize = ts->size*2;
783 triple_cell **new = malloc(newsize*sizeof(triple_cell*));
784 int i;
785
786 memset(new, 0, newsize*sizeof(triple_cell*));
787
788 for(i=0; i<ts->size; i++)
789 { triple_cell *c, *n;
790
791 for(c=ts->entries[i]; c; c=n)
792 { size_t inew = triple_hash_key(c->triple, BY_SPO)&(newsize-1);
793
794 n = c->next;
795 c->next = new[inew];
796 new[inew] = c;
797 }
798 }
799
800 if ( ts->entries == ts->entries0 )
801 { ts->entries = new;
802 } else
803 { triple_cell **old = ts->entries;
804 ts->entries = new;
805 free(old);
806 }
807
808 ts->size = newsize;
809 }
810
811
812 static int
add_tripleset(search_state * state,tripleset * ts,triple * triple)813 add_tripleset(search_state *state, tripleset *ts, triple *triple)
814 { size_t i;
815 triple_cell *c;
816
817 if ( !ts->entries )
818 init_tripleset(ts);
819
820 i = triple_hash_key(triple, BY_SPO)&(ts->size-1);
821 for(c=ts->entries[i]; c; c=c->next)
822 { if ( match_triples(state->db,
823 triple, c->triple,
824 state->query, MATCH_DUPLICATE) )
825 return 0;
826 }
827
828 if ( ++ts->count > 2*ts->size )
829 { rehash_triple_set(ts);
830 i = triple_hash_key(triple, BY_SPO)&(ts->size-1);
831 }
832
833 c = alloc_tripleset(ts, sizeof(*c));
834 c->triple = triple;
835 c->next = ts->entries[i];
836 ts->entries[i] = c;
837
838 return 1;
839 }
840
841
842 /*******************************
843 * PREFIXES *
844 *******************************/
845
846 static prefix_table *
new_prefix_table(void)847 new_prefix_table(void)
848 { prefix_table *t = malloc(sizeof(*t));
849
850 if ( t )
851 { memset(t, 0, sizeof(*t));
852 t->size = PREFIX_INITIAL_ENTRIES;
853 t->entries = malloc(t->size*sizeof(*t->entries));
854 if ( t->entries )
855 { memset(t->entries, 0, t->size*sizeof(*t->entries));
856 } else
857 { free(t);
858 t = NULL;
859 }
860 }
861
862 return t;
863 }
864
865
866 static void
empty_prefix_table(rdf_db * db)867 empty_prefix_table(rdf_db *db)
868 { int i;
869 prefix_table *t = db->prefixes;
870
871 simpleMutexLock(&db->locks.prefixes);
872 for(i=0; i<t->size; i++)
873 { prefix *p, *next;
874
875 p = t->entries[i];
876 t->entries[i] = NULL;
877 for(; p; p = next)
878 { next = p->next;
879
880 PL_unregister_atom(p->alias);
881 PL_unregister_atom(p->uri.handle);
882 free(p);
883 }
884 }
885 simpleMutexUnlock(&db->locks.prefixes);
886 t->count = 0;
887
888 flush_prefix_cache();
889 }
890
891
892 static void
resize_prefix_table(prefix_table * t)893 resize_prefix_table(prefix_table *t)
894 { size_t new_size = t->size*2;
895 prefix **new_entries = malloc(new_size*sizeof(*new_entries));
896
897 if ( new_entries )
898 { int i;
899
900 memset(new_entries, 0, new_size*sizeof(*new_entries));
901 for(i=0; i<t->size; i++)
902 { prefix *p, *next;
903
904 for(p=t->entries[i]; p; p = next)
905 { unsigned key = atom_hash(p->alias, MURMUR_SEED) & (new_size-1);
906
907 next = p->next;
908 p->next = new_entries[key];
909 new_entries[key] = p;
910 }
911 }
912
913 t->size = new_size;
914 free(t->entries);
915 t->entries = new_entries;
916 }
917 }
918
919
920
921 static prefix *
add_prefix(rdf_db * db,atom_t alias,atom_t uri)922 add_prefix(rdf_db *db, atom_t alias, atom_t uri)
923 { prefix_table *t = db->prefixes;
924 unsigned key = atom_hash(alias, MURMUR_SEED) & (t->size-1);
925 prefix *p = malloc(sizeof(*p));
926
927 if ( !p )
928 { PL_resource_error("memory");
929 return NULL;
930 }
931
932 if ( t->count > t->size )
933 resize_prefix_table(t);
934
935 memset(p, 0, sizeof(*p));
936 p->alias = alias;
937 p->uri.handle = uri;
938 PL_register_atom(alias);
939 PL_register_atom(uri);
940 fill_atom_info(&p->uri);
941
942 p->next = t->entries[key];
943 t->entries[key] = p;
944 t->count++;
945
946 return p;
947 }
948
949
950 static prefix *
lookup_prefix(rdf_db * db,atom_t a)951 lookup_prefix(rdf_db *db, atom_t a)
952 { prefix_table *t;
953 prefix *pl;
954 fid_t fid;
955 static predicate_t pred = NULL;
956
957 simpleMutexLock(&db->locks.prefixes);
958 t = db->prefixes;
959 for(pl = t->entries[atom_hash(a, MURMUR_SEED)&(t->size-1)]; pl; pl=pl->next)
960 { if ( pl->alias == a )
961 { simpleMutexUnlock(&db->locks.prefixes);
962 return pl;
963 }
964 }
965
966 if ( !pred )
967 pred = PL_predicate("rdf_current_prefix", 2, "rdf_db");
968
969 assert(pl == NULL);
970 if ( (fid = PL_open_foreign_frame()) )
971 { term_t av = PL_new_term_refs(2);
972 atom_t uri_atom;
973
974 PL_put_atom(av+0, a);
975 if ( PL_call_predicate(NULL, PL_Q_PASS_EXCEPTION, pred, av) &&
976 PL_get_atom_ex(av+1, &uri_atom) )
977 pl = add_prefix(db, a, uri_atom);
978 else if ( !PL_exception(0) )
979 PL_existence_error("rdf_prefix", av+0);
980
981 PL_close_foreign_frame(fid);
982 }
983
984 simpleMutexUnlock(&db->locks.prefixes);
985
986 return pl;
987 }
988
989
990 static wchar_t *
add_text(wchar_t * w,const text * t)991 add_text(wchar_t *w, const text *t)
992 { if ( t->a )
993 { const unsigned char *a = t->a;
994 const unsigned char *e = &a[t->length];
995
996 for(; a<e; a++)
997 *w++ = *a;
998 } else
999 { const wchar_t *a = t->w;
1000 const wchar_t *e = &a[t->length];
1001
1002 for(; a<e; a++)
1003 *w++ = *a;
1004 }
1005
1006 return w;
1007 }
1008
1009
1010 atom_t
expand_prefix(rdf_db * db,atom_t alias,atom_t local)1011 expand_prefix(rdf_db *db, atom_t alias, atom_t local)
1012 { prefix *p = lookup_prefix(db, alias);
1013
1014 if ( p )
1015 { atom_info ai = {0};
1016 ai.handle = local;
1017 fill_atom_info(&ai);
1018 atom_t uri;
1019
1020 if ( ai.text.a && p->uri.text.a )
1021 { char buf[256];
1022 size_t len = ai.text.length + p->uri.text.length;
1023 char *a = len <= sizeof(buf) ? buf : malloc(len);
1024
1025 if ( !len )
1026 return (atom_t)0;
1027 memcpy(a, p->uri.text.a, p->uri.text.length);
1028 memcpy(&a[p->uri.text.length], ai.text.a, ai.text.length);
1029
1030 uri = PL_new_atom_nchars(len, a);
1031 if ( a != buf )
1032 free(a);
1033 } else
1034 { wchar_t buf[256];
1035 size_t len = ai.text.length + p->uri.text.length;
1036 wchar_t *w = len <= sizeof(buf)/sizeof(wchar_t)
1037 ? buf
1038 : malloc(len*sizeof(wchar_t));
1039
1040 if ( !len )
1041 return (atom_t)0;
1042 w = add_text(w, &p->uri.text);
1043 w = add_text(w, &ai.text);
1044
1045 uri = PL_new_atom_wchars(len, w);
1046 if ( w != buf )
1047 free(w);
1048 }
1049
1050 return uri;
1051 }
1052
1053 return (atom_t)0;
1054 }
1055
1056
1057
1058 #ifdef COMPACT
1059
1060 /*******************************
1061 * TRIPLE ARRAY *
1062 *******************************/
1063
1064 static triple_element *
alloc_array_slice(size_t count,triple_element ** last)1065 alloc_array_slice(size_t count, triple_element **last)
1066 { size_t bytes = count*sizeof(triple_element);
1067 triple_element *slice = malloc(bytes);
1068
1069 if ( slice )
1070 { triple_element *end = slice+count-1;
1071 triple_element *e, *n;
1072
1073 for(e=slice; e<end; e=n)
1074 { n = e+1;
1075 e->fnext = n;
1076 }
1077 e->fnext = NULL;
1078
1079 if ( last )
1080 *last = e;
1081 }
1082
1083 return slice;
1084 }
1085
1086 static void
free_array_slice(triple_array * a,triple_element * list,triple_element * last)1087 free_array_slice(triple_array *a, triple_element *list, triple_element *last)
1088 { triple_element *o;
1089
1090 do
1091 { o = a->freelist;
1092 last->fnext = o;
1093 } while ( !COMPARE_AND_SWAP_PTR(&a->freelist, o, list) );
1094 }
1095
1096 static int
init_triple_array(rdf_db * db)1097 init_triple_array(rdf_db *db)
1098 { triple_array *a = &db->triple_array;
1099 triple_element *slice = alloc_array_slice(TRIPLE_ARRAY_PREINIT, NULL);
1100 int i;
1101
1102 for(i=0; i<MSB(TRIPLE_ARRAY_PREINIT); i++)
1103 a->blocks[i] = slice;
1104
1105 a->freelist = slice->fnext; /* simply ignore the first for id>0 */
1106 a->preinit = TRIPLE_ARRAY_PREINIT;
1107 a->size = TRIPLE_ARRAY_PREINIT;
1108
1109 return TRUE;
1110 }
1111
1112 static void
destroy_triple_array(rdf_db * db)1113 destroy_triple_array(rdf_db *db)
1114 { triple_array *a = &db->triple_array;
1115 int i;
1116
1117 free(a->blocks[0]);
1118 for(i=MSB(a->preinit); i<MSB(a->size); i++)
1119 { triple_element *e = a->blocks[i];
1120
1121 e += 1<<(i-1);
1122 free(e);
1123 }
1124 memset(a, 0, sizeof(*a));
1125 }
1126
1127 static void
reset_triple_array(rdf_db * db)1128 reset_triple_array(rdf_db *db)
1129 { destroy_triple_array(db);
1130 init_triple_array(db);
1131 }
1132
1133 static void
resize_triple_array(rdf_db * db)1134 resize_triple_array(rdf_db *db)
1135 { triple_array *a = &db->triple_array;
1136 int i = MSB(a->size);
1137 triple_element *last;
1138 triple_element *slice = alloc_array_slice(a->size, &last);
1139
1140 if ( slice )
1141 { a->blocks[i] = slice - a->size;
1142 a->size *= 2;
1143 free_array_slice(a, slice, last);
1144 }
1145 }
1146
1147 static triple_element *
fetch_triple_element(rdf_db * db,triple_id id)1148 fetch_triple_element(rdf_db *db, triple_id id)
1149 { return &db->triple_array.blocks[MSB(id)][id];
1150 }
1151
1152 /* assign a new triple a place in the triple array
1153 */
1154
1155 static triple_id
register_triple(rdf_db * db,triple * t)1156 register_triple(rdf_db *db, triple *t)
1157 { triple_array *a = &db->triple_array;
1158 triple_element *e;
1159 size_t slice_size;
1160 int i;
1161
1162 do
1163 { if ( !(e=a->freelist) )
1164 { simpleMutexLock(&db->locks.misc);
1165 while ( !(e=a->freelist) )
1166 resize_triple_array(db);
1167 simpleMutexUnlock(&db->locks.misc);
1168 }
1169 } while ( !COMPARE_AND_SWAP_PTR(&a->freelist, e, e->fnext) );
1170
1171 e->triple = t;
1172
1173 for(i=1,slice_size=1; i<MAX_TBLOCKS; i++,slice_size*=2)
1174 { if ( e >= a->blocks[i]+slice_size &&
1175 e < a->blocks[i]+slice_size*2 )
1176 { t->id = e - a->blocks[i];
1177
1178 assert(fetch_triple(db, t->id) == t);
1179 return t->id;
1180 }
1181 }
1182
1183 assert(0);
1184 return 0;
1185 }
1186
1187 static void
unregister_triple(rdf_db * db,triple * t)1188 unregister_triple(rdf_db *db, triple *t)
1189 { if ( t->id != TRIPLE_NO_ID )
1190 { triple_element *e = fetch_triple_element(db, t->id);
1191
1192 t->id = TRIPLE_NO_ID;
1193 free_array_slice(&db->triple_array, e, e);
1194 }
1195 }
1196
1197 static triple *
triple_follow_hash(rdf_db * db,triple * t,int icol)1198 triple_follow_hash(rdf_db *db, triple *t, int icol)
1199 { triple_id nid = t->tp.next[icol];
1200
1201 return fetch_triple(db, nid);
1202 }
1203
1204 #define T_ID(t) ((t) ? (t)->id : 0)
1205
1206 #else /*COMPACT*/
1207
1208 #define init_triple_array(db) (void)0
1209 #define reset_triple_array(db) (void)0
1210 #define register_triple(db, t) (void)0
1211 #define unregister_triple(db, t) (void)0
1212 #define triple_follow_hash(db, t, icol) ((t)->tp.next[icol])
1213 #define T_ID(t) (t)
1214
1215 #endif /*COMPACT*/
1216
1217 static void
finalize_triple(void * data,void * client)1218 finalize_triple(void *data, void *client)
1219 { triple *t = data;
1220 rdf_db *db = client;
1221
1222 if ( !db->resetting )
1223 { unlock_atoms(db, t);
1224 if ( t->object_is_literal && t->object.literal )
1225 free_literal(db, t->object.literal);
1226 #ifdef COMPACT
1227 unregister_triple(db, t);
1228 #endif
1229 }
1230 SECURE(memset(t, 0, sizeof(*t)));
1231 TMAGIC(t, T_FREED);
1232 ATOMIC_SUB(&db->lingering, 1);
1233 }
1234
1235
1236 /*******************************
1237 * TRIPLE WALKER *
1238 *******************************/
1239
1240 /* init_triple_walker() and next_triple() are the primitives to walk indexed
1241 triples. The pattern is:
1242
1243 triple_walker tw;
1244
1245 init_triple_walker(&tw, db, pattern, index);
1246 while((t=next_triple(tw)))
1247 <do your job>
1248
1249 TBD: Get the generation into this story. Most likely it is better to
1250 deal with this in this low-level loop then outside. We will handle
1251 this in the next cycle.
1252 */
1253
1254 static void
init_triple_walker(triple_walker * tw,rdf_db * db,triple * pattern,int which)1255 init_triple_walker(triple_walker *tw, rdf_db *db, triple *pattern, int which)
1256 { tw->unbounded_hash = triple_hash_key(pattern, which);
1257 tw->current = NULL;
1258 tw->icol = ICOL(which);
1259 tw->db = db;
1260 if ( !tw->db->hash[tw->icol].created )
1261 create_triple_hashes(db, 1, &tw->icol);
1262 tw->bcount = tw->db->hash[tw->icol].bucket_count_epoch;
1263 }
1264
1265
1266 static void
init_triple_literal_walker(triple_walker * tw,rdf_db * db,triple * pattern,int which,unsigned int hash)1267 init_triple_literal_walker(triple_walker *tw, rdf_db *db,
1268 triple *pattern, int which, unsigned int hash)
1269 { tw->unbounded_hash = hash;
1270 tw->current = NULL;
1271 tw->icol = ICOL(which);
1272 tw->db = db;
1273 if ( !tw->db->hash[tw->icol].created )
1274 create_triple_hashes(db, 1, &tw->icol);
1275 tw->bcount = tw->db->hash[tw->icol].bucket_count_epoch;
1276 }
1277
1278
1279 static void
rewind_triple_walker(triple_walker * tw)1280 rewind_triple_walker(triple_walker *tw)
1281 { tw->bcount = tw->db->hash[tw->icol].bucket_count_epoch;
1282 tw->current = NULL;
1283 }
1284
1285
1286 static triple *
next_hash_triple(triple_walker * tw)1287 next_hash_triple(triple_walker *tw)
1288 { triple *rc;
1289 triple_hash *hash = &tw->db->hash[tw->icol];
1290
1291 if ( tw->bcount <= hash->bucket_count )
1292 { do
1293 { int entry = tw->unbounded_hash % tw->bcount;
1294 triple_bucket *bucket = &hash->blocks[MSB(entry)][entry];
1295
1296 rc = fetch_triple(tw->db, bucket->head);
1297 do
1298 { tw->bcount *= 2;
1299 } while ( tw->bcount <= hash->bucket_count &&
1300 tw->unbounded_hash % tw->bcount == entry );
1301 } while(!rc && tw->bcount <= hash->bucket_count );
1302
1303 if ( rc )
1304 tw->current = triple_follow_hash(tw->db, rc, tw->icol);
1305 } else
1306 { rc = NULL;
1307 }
1308
1309 return rc;
1310 }
1311
1312
1313 static inline triple *
next_triple(triple_walker * tw)1314 next_triple(triple_walker *tw)
1315 { triple *rc;
1316
1317 if ( (rc=tw->current) )
1318 { tw->current = triple_follow_hash(tw->db, rc, tw->icol);
1319
1320 return rc;
1321 } else
1322 { return next_hash_triple(tw);
1323 }
1324 }
1325
1326
1327 static inline void
destroy_triple_walker(rdf_db * db,triple_walker * tw)1328 destroy_triple_walker(rdf_db *db, triple_walker *tw)
1329 {
1330 }
1331
1332
1333 /*******************************
1334 * PREDICATES *
1335 *******************************/
1336
1337 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1338 Predicates are represented as first class citizens for three reasons:
1339 quickly answer on the transitive rdfs:subPropertyOf relation for
1340 rdf_hash/3, keep track of statistics that are useful for query
1341 optimization (#triples, branching factor) and keep properties
1342 (inverse/transitive).
1343
1344 To answer the rdfs:subPropertyOf quickly, predicates are organised in
1345 `clouds', where a cloud defines a set of predicates connected through
1346 rdfs:subPropertyOf triples. The cloud numbers its members and maintains
1347 a bit-matrix that contains the closure of the reachability. Initially a
1348 predicate has a simple cloud of size 1. merge_clouds() and split_cloud()
1349 deals with adding and deleting rdfs:subPropertyOf relations. These
1350 operations try to modify the clouds that have no triples, so it can be
1351 done without a rehash. If this fails, the predicates keep their own hash
1352 to make search without rdfs:subPropertyOf still possible (so we can
1353 avoid frequent updates while loading triples), sets the cloud `dirty'
1354 flag and the DB's need_update flag. Queries that need rdfs:subPropertyOf
1355 find the need_update flag, which calls organise_predicates(), which
1356 cause a rehash if some predicates have changed hash-code to the new
1357 cloud they have become part of.
1358
1359 TBD: We can do a partial re-hash in that case!
1360 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1361
1362 static int
init_pred_table(rdf_db * db)1363 init_pred_table(rdf_db *db)
1364 { size_t bytes = sizeof(predicate**)*INITIAL_PREDICATE_TABLE_SIZE;
1365 predicate **p = PL_malloc_uncollectable(bytes);
1366 int i, count = INITIAL_PREDICATE_TABLE_SIZE;
1367
1368 memset(p, 0, bytes);
1369 for(i=0; i<MSB(count); i++)
1370 db->predicates.blocks[i] = p;
1371
1372 db->predicates.bucket_count = count;
1373 db->predicates.bucket_count_epoch = count;
1374 db->predicates.count = 0;
1375
1376 return TRUE;
1377 }
1378
1379
1380 static int
resize_pred_table(rdf_db * db)1381 resize_pred_table(rdf_db *db)
1382 { int i = MSB(db->predicates.bucket_count);
1383 size_t bytes = sizeof(predicate**)*db->predicates.bucket_count;
1384 predicate **p = PL_malloc_uncollectable(bytes);
1385
1386 memset(p, 0, bytes);
1387 db->predicates.blocks[i] = p-db->predicates.bucket_count;
1388 db->predicates.bucket_count *= 2;
1389 DEBUG(1, Sdprintf("Resized predicate table to %ld\n",
1390 (long)db->predicates.bucket_count));
1391
1392 return TRUE;
1393 }
1394
1395
1396 typedef struct pred_walker
1397 { rdf_db *db; /* RDF DB */
1398 atom_t name; /* Name of the predicate */
1399 size_t unbounded_hash; /* Atom's hash */
1400 size_t bcount; /* current bucket count */
1401 predicate *current; /* current location */
1402 } pred_walker;
1403
1404
1405 static void
init_predicate_walker(pred_walker * pw,rdf_db * db,atom_t name)1406 init_predicate_walker(pred_walker *pw, rdf_db *db, atom_t name)
1407 { pw->db = db;
1408 pw->name = name;
1409 pw->unbounded_hash = atom_hash(name, MURMUR_SEED);
1410 pw->bcount = db->predicates.bucket_count_epoch;
1411 pw->current = NULL;
1412 }
1413
1414 static predicate*
next_predicate(pred_walker * pw)1415 next_predicate(pred_walker *pw)
1416 { predicate *p;
1417
1418 if ( pw->current )
1419 { p = pw->current;
1420 pw->current = p->next;
1421 } else if ( pw->bcount <= pw->db->predicates.bucket_count )
1422 { do
1423 { int entry = pw->unbounded_hash % pw->bcount;
1424 p = pw->db->predicates.blocks[MSB(entry)][entry];
1425 pw->bcount *= 2;
1426 } while(!p && pw->bcount <= pw->db->predicates.bucket_count );
1427
1428 if ( p )
1429 pw->current = p->next;
1430 } else
1431 return NULL;
1432
1433 return p;
1434 }
1435
1436
1437 static predicate *
existing_predicate(rdf_db * db,atom_t name)1438 existing_predicate(rdf_db *db, atom_t name)
1439 { pred_walker pw;
1440 predicate *p;
1441
1442 init_predicate_walker(&pw, db, name);
1443 while((p=next_predicate(&pw)))
1444 { if ( p->name == name )
1445 return p;
1446 }
1447
1448 return NULL;
1449 }
1450
1451
1452 predicate *
lookup_predicate(rdf_db * db,atom_t name)1453 lookup_predicate(rdf_db *db, atom_t name)
1454 { predicate *p, **pp;
1455 predicate_cloud *cp;
1456 int entry;
1457
1458 if ( (p=existing_predicate(db, name)) )
1459 return p;
1460
1461 LOCK_MISC(db);
1462 if ( (p=existing_predicate(db, name)) )
1463 { UNLOCK_MISC(db);
1464 return p;
1465 }
1466
1467 p = rdf_malloc(db, sizeof(*p));
1468 memset(p, 0, sizeof(*p));
1469 p->name = name;
1470 cp = new_predicate_cloud(db, &p, 1);
1471 p->hash = cp->hash;
1472 PL_register_atom(name);
1473 if ( db->predicates.count > db->predicates.bucket_count )
1474 resize_pred_table(db);
1475 entry = atom_hash(name, MURMUR_SEED) % db->predicates.bucket_count;
1476 pp = &db->predicates.blocks[MSB(entry)][entry];
1477 p->next = *pp;
1478 *pp = p;
1479 db->predicates.count++;
1480 DEBUG(5, Sdprintf("Pred %s (count = %d)\n",
1481 PL_atom_chars(name), db->predicates.count));
1482 UNLOCK_MISC(db);
1483
1484 return p;
1485 }
1486
1487
1488 static const char *
pname(predicate * p)1489 pname(predicate *p)
1490 { if ( p->name )
1491 return PL_atom_chars(p->name);
1492 else
1493 { static char *ring[10];
1494 static int ri = 0;
1495 char buf[25];
1496 char *r;
1497
1498 Ssprintf(buf, "__D%p", p);
1499 ring[ri++] = r = strdup(buf);
1500 if ( ri == 10 )
1501 { ri = 0;
1502 free(ring[ri]);
1503 }
1504
1505 return (const char*)r;
1506 }
1507 }
1508
1509
1510 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1511 Keep track of the triple count.
1512 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1513
1514 static inline void
register_predicate(rdf_db * db,triple * t)1515 register_predicate(rdf_db *db, triple *t)
1516 { ATOMIC_ADD(&t->predicate.r->triple_count, 1);
1517 }
1518
1519
1520 static inline void
unregister_predicate(rdf_db * db,triple * t)1521 unregister_predicate(rdf_db *db, triple *t)
1522 { ATOMIC_SUB(&t->predicate.r->triple_count, 1);
1523 }
1524
1525
1526 /*******************************
1527 * PREDICATE CLOUDS *
1528 *******************************/
1529
1530 static predicate_cloud *
new_predicate_cloud(rdf_db * db,predicate ** p,size_t count)1531 new_predicate_cloud(rdf_db *db, predicate **p, size_t count)
1532 { predicate_cloud *cloud = rdf_malloc(db, sizeof(*cloud));
1533
1534 memset(cloud, 0, sizeof(*cloud));
1535 cloud->hash = rdf_murmer_hash(&cloud, sizeof(cloud), PRED_MURMUR_SEED);
1536 if ( count )
1537 { int i;
1538 predicate **p2;
1539
1540 cloud->size = count;
1541 cloud->members = rdf_malloc(db, sizeof(predicate*)*count);
1542 memcpy(cloud->members, p, sizeof(predicate*)*count);
1543
1544 for(i=0, p2=cloud->members; i<cloud->size; i++, p2++)
1545 { (*p2)->cloud = cloud;
1546 (*p2)->label = i;
1547 }
1548 }
1549
1550 return cloud;
1551 }
1552
1553
1554 static void
finalize_cloud(void * data,void * client)1555 finalize_cloud(void *data, void *client)
1556 { rdf_db *db = client;
1557 predicate_cloud *cloud = data;
1558 sub_p_matrix *rm, *rm2;
1559
1560 if ( cloud->members )
1561 rdf_free(db, cloud->members, sizeof(predicate*)*cloud->size);
1562
1563 for(rm=cloud->reachable; rm; rm=rm2)
1564 { rm2 = rm->older;
1565
1566 free_reachability_matrix(db, rm);
1567 }
1568 }
1569
1570
1571 static void
free_predicate_cloud(rdf_db * db,predicate_cloud * cloud)1572 free_predicate_cloud(rdf_db *db, predicate_cloud *cloud)
1573 { finalize_cloud(cloud, db);
1574
1575 rdf_free(db, cloud, sizeof(*cloud));
1576 }
1577
1578
1579 static size_t
triples_in_predicate_cloud(predicate_cloud * cloud)1580 triples_in_predicate_cloud(predicate_cloud *cloud)
1581 { size_t triples = 0;
1582 predicate **p;
1583 int i;
1584
1585 for(i=0, p=cloud->members; i<cloud->size; i++, p++)
1586 triples += (*p)->triple_count;
1587
1588 return triples;
1589 }
1590
1591
1592 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1593 gc_cloud() removes old reachability matrices. As the query generation
1594 has passed, we can immediately remove the old bitmap. We must leave the
1595 sub_p_matrix struct to GC as someone might be walking the chain.
1596 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1597
1598 static void
gc_cloud(rdf_db * db,predicate_cloud * cloud,gen_t gen)1599 gc_cloud(rdf_db *db, predicate_cloud *cloud, gen_t gen)
1600 { sub_p_matrix *rm, *older;
1601 sub_p_matrix *prev = NULL;
1602
1603 for(rm=cloud->reachable; rm; rm=older)
1604 { older = rm->older;
1605
1606 if ( rm->lifespan.died < gen )
1607 { if ( prev )
1608 { prev->older = older;
1609 } else
1610 { simpleMutexLock(&db->locks.misc); /* sync with */
1611 cloud->reachable = older; /* create_reachability_matrix() */
1612 simpleMutexUnlock(&db->locks.misc);
1613 }
1614
1615 free_bitmatrix(db, rm->matrix);
1616 rm->matrix = NULL; /* Clean to avoid false pointers */
1617 memset(&rm->lifespan, 0, sizeof(rm->lifespan));
1618 deferred_free(&db->defer_clouds, rm);
1619 } else
1620 { prev = rm;
1621 }
1622 }
1623 }
1624
1625
1626 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1627 GC all clouds. We walk the predicates and keep a flag on the cloud in
1628 which GC run it was collected to avoid collecting a cloud multiple times
1629 in the same GC run. Alternatively, we could keep a list of possibly
1630 dirty clouds, but that is more complicated and most likely not worth the
1631 trouble. Afterall, we might walk many predicates for few clouds, but
1632 generally the number of predicates is still small compared to the number
1633 of triples and thus the total cost in the GC process will be small.
1634 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1635
1636 static int
gc_clouds(rdf_db * db,gen_t gen)1637 gc_clouds(rdf_db *db, gen_t gen)
1638 { int i;
1639 int gc_id = db->gc.count+1;
1640
1641 enter_scan(&db->defer_all);
1642 for(i=0; i<db->predicates.bucket_count; i++)
1643 { predicate *p = db->predicates.blocks[MSB(i)][i];
1644
1645 for( ; p; p = p->next )
1646 { if ( p->cloud->last_gc != gc_id )
1647 { p->cloud->last_gc = gc_id;
1648
1649 gc_cloud(db, p->cloud, gen);
1650 if ( PL_handle_signals() < 0 )
1651 return -1;
1652 }
1653 gc_is_leaf(db, p, gen);
1654 }
1655 }
1656 exit_scan(&db->defer_all);
1657
1658 return 0;
1659 }
1660
1661
1662 static void
invalidateReachability(predicate_cloud * cloud,query * q)1663 invalidateReachability(predicate_cloud *cloud, query *q)
1664 { sub_p_matrix *rm;
1665 gen_t gen_max = query_max_gen(q);
1666
1667 for(rm=cloud->reachable; rm; rm=rm->older)
1668 { if ( rm->lifespan.died == gen_max )
1669 rm->lifespan.died = queryWriteGen(q);
1670 }
1671 }
1672
1673
1674 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1675 Append the predicates from cloud C2 to those of cloud C1. There are two
1676 scenarios:
1677
1678 - C2 has no triples. We are in a writer lock. As there are no
1679 triples for C2, queries cannot go wrong.
1680 - C2 has triples. It is possible that queries with the predicate
1681 hash of C2 are in progress. See comment at merge_clouds() for
1682 how this is handled.
1683 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1684
1685 static predicate_cloud *
append_clouds(rdf_db * db,predicate_cloud * c1,predicate_cloud * c2,int update_hash)1686 append_clouds(rdf_db *db,
1687 predicate_cloud *c1, predicate_cloud *c2,
1688 int update_hash)
1689 { int i;
1690 predicate **new_members;
1691 predicate **old_members = c1->members;
1692
1693 new_members = rdf_malloc(db, (c1->size+c2->size)*sizeof(predicate*));
1694 memcpy(&new_members[0], c1->members, c1->size*sizeof(predicate*));
1695 memcpy(&new_members[c1->size], c2->members, c2->size*sizeof(predicate*));
1696 c1->members = new_members;
1697 deferred_free(&db->defer_clouds, old_members);
1698
1699 /* re-label the new ones */
1700 for(i=c1->size; i<c1->size+c2->size; i++)
1701 { predicate *p = c1->members[i];
1702
1703 p->cloud = c1;
1704 p->label = i;
1705 if ( update_hash )
1706 p->hash = c1->hash;
1707 }
1708 c1->size += c2->size;
1709
1710 if ( !update_hash )
1711 { size_t newc = 0;
1712
1713 if ( c1->alt_hash_count )
1714 newc += c1->alt_hash_count;
1715 else
1716 newc++;
1717
1718 if ( c2->alt_hash_count )
1719 newc += c2->alt_hash_count;
1720 else
1721 newc++;
1722
1723 DEBUG(1, Sdprintf("Cloud %p: %d alt-hashes\n", c1, newc));
1724
1725 if ( c1->alt_hashes )
1726 { unsigned int *new_hashes;
1727 unsigned int *old_hashes = c1->alt_hashes;
1728
1729 new_hashes = rdf_malloc(db, newc*sizeof(unsigned int));
1730 memcpy(&new_hashes[0], c1->alt_hashes,
1731 c1->alt_hash_count*sizeof(unsigned int));
1732 MEMORY_BARRIER();
1733 c1->alt_hashes = new_hashes;
1734 deferred_free(&db->defer_clouds, old_hashes);
1735 } else
1736 { c1->alt_hashes = rdf_malloc(db, newc*sizeof(unsigned int));
1737 c1->alt_hashes[0] = c1->hash;
1738 MEMORY_BARRIER();
1739 c1->alt_hash_count = 1;
1740 }
1741
1742 if ( c2->alt_hash_count )
1743 { memcpy(&c1->alt_hashes[c1->alt_hash_count],
1744 c2->alt_hashes, c2->alt_hash_count*sizeof(unsigned int));
1745 } else
1746 { c1->alt_hashes[c1->alt_hash_count] = c2->hash;
1747 }
1748 MEMORY_BARRIER();
1749 c1->alt_hash_count = newc;
1750 }
1751
1752 deferred_finalize(&db->defer_clouds, c2,
1753 finalize_cloud, db);
1754
1755 return c1;
1756 }
1757
1758
1759 /* merge two predicate clouds. Note that this code is only called
1760 from addSubPropertyOf(). If c1==c2, we added an rdfs:subPropertyOf
1761 between two predicates in the same cloud. we must still invalidate
1762 the matrix.
1763 */
1764
1765 static predicate_cloud *
merge_clouds(rdf_db * db,predicate_cloud * c1,predicate_cloud * c2,query * q)1766 merge_clouds(rdf_db *db, predicate_cloud *c1, predicate_cloud *c2, query *q)
1767 { predicate_cloud *cloud;
1768
1769 if ( c1 != c2 )
1770 { size_t tc1, tc2;
1771
1772 if ( (tc1=triples_in_predicate_cloud(c1)) == 0 )
1773 { cloud = append_clouds(db, c2, c1, TRUE);
1774 } else if ( (tc2=triples_in_predicate_cloud(c2)) == 0 )
1775 { cloud = append_clouds(db, c1, c2, TRUE);
1776 } else
1777 { predicate_cloud *reindex;
1778
1779 if ( tc2 < tc1 )
1780 { cloud = c1;
1781 reindex = c2;
1782 } else
1783 { cloud = c2;
1784 reindex = c1;
1785 }
1786
1787 cloud = append_clouds(db, cloud, reindex, FALSE);
1788 }
1789 } else
1790 { cloud = c1;
1791 }
1792
1793 invalidateReachability(cloud, q);
1794
1795 return cloud;
1796 }
1797
1798
1799 static size_t
predicate_hash(predicate * p)1800 predicate_hash(predicate *p)
1801 { return p->hash;
1802 }
1803
1804
1805 static void
addSubPropertyOf(rdf_db * db,triple * t,query * q)1806 addSubPropertyOf(rdf_db *db, triple *t, query *q)
1807 { predicate *sub = lookup_predicate(db, ID_ATOM(t->subject_id));
1808 predicate *super = lookup_predicate(db, t->object.resource);
1809
1810 DEBUG(3, Sdprintf("addSubPropertyOf(%s, %s)\n",
1811 pname(sub), pname(super)));
1812
1813 invalidate_is_leaf(super, q, TRUE);
1814
1815 if ( add_list(db, &sub->subPropertyOf, super) )
1816 { add_list(db, &super->siblings, sub);
1817 merge_clouds(db, sub->cloud, super->cloud, q);
1818 } else
1819 { predicate_cloud *cloud;
1820
1821 cloud = super->cloud;
1822 assert(cloud == sub->cloud);
1823
1824 invalidateReachability(cloud, q);
1825 }
1826 }
1827
1828
1829 /* deleting an rdfs:subPropertyOf. This is a bit naughty. If the
1830 cloud is still connected we only need to refresh the reachability
1831 matrix. Otherwise the cloud breaks in maximum two clusters. We
1832 can decide to leave it as is, which is simpler to implement
1833 but harms indexing.
1834
1835 TBD: If the cloud becomes disconnected, it may be split.
1836 */
1837
1838 static void
delSubPropertyOf(rdf_db * db,triple * t,query * q)1839 delSubPropertyOf(rdf_db *db, triple *t, query *q)
1840 { predicate *sub = lookup_predicate(db, ID_ATOM(t->subject_id));
1841 predicate *super = lookup_predicate(db, t->object.resource);
1842 predicate_cloud *cloud;
1843
1844 DEBUG(3, Sdprintf("delSubPropertyOf(%s, %s)\n",
1845 pname(sub), pname(super)));
1846
1847 invalidate_is_leaf(super, q, FALSE);
1848
1849 if ( del_list(db, &sub->subPropertyOf, super) )
1850 { del_list(db, &super->siblings, sub);
1851 }
1852
1853 cloud = super->cloud;
1854 assert(cloud == sub->cloud);
1855
1856 invalidateReachability(cloud, q);
1857 }
1858
1859
1860 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1861 Reachability matrix.
1862 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1863
1864 #define WBITSIZE (sizeof(int)*8)
1865
1866 static size_t
byte_size_bitmatrix(size_t w,size_t h)1867 byte_size_bitmatrix(size_t w, size_t h)
1868 { size_t wsize = ((w*h)+WBITSIZE-1)/WBITSIZE;
1869
1870 return (size_t)(intptr_t)&((bitmatrix*)NULL)->bits[wsize];
1871 }
1872
1873
1874 static bitmatrix *
alloc_bitmatrix(rdf_db * db,size_t w,size_t h)1875 alloc_bitmatrix(rdf_db *db, size_t w, size_t h)
1876 { size_t size = byte_size_bitmatrix(w, h);
1877 bitmatrix *m = rdf_malloc(db, size);
1878
1879 memset(m, 0, size);
1880 m->width = w;
1881 m->heigth = h;
1882
1883 return m;
1884 }
1885
1886
1887 static void
free_bitmatrix(rdf_db * db,bitmatrix * bm)1888 free_bitmatrix(rdf_db *db, bitmatrix *bm)
1889 { size_t size = byte_size_bitmatrix(bm->width, bm->heigth);
1890
1891 rdf_free(db, bm, size);
1892 }
1893
1894
1895 #undef setbit /* conflict in HPUX 11.23 */
1896
1897 static void
setbit(bitmatrix * m,int i,int j)1898 setbit(bitmatrix *m, int i, int j)
1899 { size_t ij = m->width*i+j;
1900 size_t word = ij/WBITSIZE;
1901 int bit = ij%WBITSIZE;
1902
1903 m->bits[word] |= 1<<bit;
1904 }
1905
1906
1907 static int
testbit(bitmatrix * m,int i,int j)1908 testbit(bitmatrix *m, int i, int j)
1909 { size_t ij = m->width*i+j;
1910 size_t word = ij/WBITSIZE;
1911 int bit = ij%WBITSIZE;
1912
1913 return ((m->bits[word] & (1<<bit)) != 0);
1914 }
1915
1916
1917 static int
check_labels_predicate_cloud(predicate_cloud * cloud)1918 check_labels_predicate_cloud(predicate_cloud *cloud)
1919 { predicate **p;
1920 int i;
1921
1922 for(i=0, p=cloud->members; i<cloud->size; i++, p++)
1923 assert((*p)->label == i);
1924
1925 return i;
1926 }
1927
1928 static void
update_valid(lifespan * valid,gen_t change)1929 update_valid(lifespan *valid, gen_t change)
1930 { if ( change < valid->died )
1931 { if ( valid->died <= GEN_MAX || /* both non-transaction */
1932 change > GEN_MAX ) /* both in transaction */
1933 valid->died = change;
1934 }
1935 }
1936
1937
1938 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1939 Match triple t against pattern p in query q. Update the died-property of
1940 valid if the triple matches now, but will not after some generation
1941 (i.e., it will die) or the triple must still be born.
1942 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1943
1944 static triple *
matching_object_triple_until(rdf_db * db,triple * t,triple * p,query * q,unsigned flags,lifespan * valid)1945 matching_object_triple_until(rdf_db *db, triple *t, triple *p, query *q,
1946 unsigned flags, lifespan *valid)
1947 { triple *t2;
1948
1949 if ( (t2=alive_triple(q, t)) )
1950 { if ( match_triples(db, t2, p, q, 0) &&
1951 !t2->object_is_literal ) /* object properties only */
1952 { if ( t2->lifespan.died != query_max_gen(q) )
1953 { DEBUG(1, Sdprintf("Limit lifespan due to dead: ");
1954 print_triple(t2, PRT_GEN|PRT_NL));
1955 update_valid(valid, t2->lifespan.died);
1956 }
1957
1958 return t2;
1959 }
1960 } else
1961 { t2 = deref_triple(db, t); /* Dubious */
1962
1963 if ( match_triples(db, t2, p, q, 0) &&
1964 !t2->object_is_literal )
1965 { if ( !t2->erased &&
1966 !born_lifespan(q, &t2->lifespan) )
1967 { DEBUG(1, Sdprintf("Limit lifespan due to new born: ");
1968 print_triple(t2, PRT_GEN|PRT_NL));
1969 update_valid(valid, t2->lifespan.born);
1970 }
1971 }
1972 }
1973
1974 return NULL;
1975 }
1976
1977
1978 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1979 fill_reachable() computes that transitive closure of the
1980 rdfs:subPropertyOf relation. In addition, it maintains the generation
1981 valid_until, which expresses the maximum generation until when the
1982 reachability matrix is valid. This is needed if we compute a
1983 reachability matrix for an older generation.
1984
1985 TBD: The code below probably doesn't work properly inside a transaction
1986 due to the complicated generation reasoning there. This must be
1987 clarified and cleaned.
1988 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1989
1990 static void
fill_reachable(rdf_db * db,predicate_cloud * cloud,bitmatrix * bm,predicate * p0,predicate * p,query * q,lifespan * valid)1991 fill_reachable(rdf_db *db,
1992 predicate_cloud *cloud,
1993 bitmatrix *bm,
1994 predicate *p0, predicate *p,
1995 query *q,
1996 lifespan *valid)
1997 { if ( !testbit(bm, p0->label, p->label) )
1998 { triple pattern;
1999 triple *t;
2000 triple_walker tw;
2001
2002 memset(&pattern, 0, sizeof(pattern));
2003
2004 DEBUG(3, Sdprintf(" Reachable [%s (%d)]\n", pname(p), p->label));
2005 setbit(bm, p0->label, p->label);
2006 pattern.subject_id = ATOM_ID(p->name);
2007 pattern.predicate.r = existing_predicate(db, ATOM_subPropertyOf);
2008 init_triple_walker(&tw, db, &pattern, BY_SP);
2009 while((t=next_triple(&tw)))
2010 { triple *t2;
2011
2012 if ( (t2=matching_object_triple_until(db, t, &pattern, q, 0, valid)) )
2013 { predicate *super;
2014
2015 super = lookup_predicate(db, t2->object.resource);
2016 assert(super->cloud == cloud);
2017 fill_reachable(db, cloud, bm, p0, super, q, valid);
2018 }
2019 }
2020 }
2021 }
2022
2023
2024 static int
is_transaction_start_gen(gen_t gen)2025 is_transaction_start_gen(gen_t gen)
2026 { return (gen-GEN_TBASE)%GEN_TNEST == 0;
2027 }
2028
2029
2030 static void
init_valid_lifespan(rdf_db * db,lifespan * span,query * q)2031 init_valid_lifespan(rdf_db *db, lifespan *span, query *q)
2032 { if ( q->transaction && !is_transaction_start_gen(q->tr_gen) )
2033 { span->born = q->tr_gen;
2034 span->died = query_max_gen(q);
2035 add_list(db, &q->transaction->transaction_data.lifespans, span);
2036 } else
2037 { span->born = q->rd_gen;
2038 span->died = GEN_MAX;
2039 }
2040 }
2041
2042
2043
2044 static sub_p_matrix *
create_reachability_matrix(rdf_db * db,predicate_cloud * cloud,query * q)2045 create_reachability_matrix(rdf_db *db, predicate_cloud *cloud, query *q)
2046 { bitmatrix *m = alloc_bitmatrix(db, cloud->size, cloud->size);
2047 sub_p_matrix *rm = rdf_malloc(db, sizeof(*rm));
2048 predicate **p;
2049 int i;
2050
2051 init_valid_lifespan(db, &rm->lifespan, q);
2052
2053 DEBUG(1, { char buf[4][24];
2054 Sdprintf("Create matrix for q at %s/%s, valid %s..%s\n",
2055 gen_name(q->rd_gen, buf[0]),
2056 gen_name(q->tr_gen, buf[1]),
2057 gen_name(rm->lifespan.born, buf[2]),
2058 gen_name(rm->lifespan.died, buf[3]));
2059 });
2060
2061 check_labels_predicate_cloud(cloud);
2062 for(i=0, p=cloud->members; i<cloud->size; i++, p++)
2063 { DEBUG(2, Sdprintf("Reachability for %s (%d)\n", pname(*p), (*p)->label));
2064
2065 fill_reachable(db, cloud, m, *p, *p, q, &rm->lifespan);
2066 }
2067
2068 DEBUG(1, { char buf[2][24];
2069 Sdprintf("Created matrix, valid %s..%s\n",
2070 gen_name(rm->lifespan.born, buf[0]),
2071 gen_name(rm->lifespan.died, buf[1]));
2072 });
2073
2074 rm->matrix = m;
2075 simpleMutexLock(&db->locks.misc); /* sync with gc_cloud() */
2076 rm->older = cloud->reachable;
2077 MEMORY_BARRIER();
2078 cloud->reachable = rm;
2079 simpleMutexUnlock(&db->locks.misc);
2080
2081 return rm;
2082 }
2083
2084
2085 /* FIXME: we probably cannot guarantee these are not being
2086 accessed. I.e., we must use GC lingering on them
2087 */
2088
2089 static void
free_reachability_matrix(rdf_db * db,sub_p_matrix * rm)2090 free_reachability_matrix(rdf_db *db, sub_p_matrix *rm)
2091 { free_bitmatrix(db, rm->matrix);
2092
2093 rdf_free(db, rm, sizeof(*rm));
2094 }
2095
2096
2097 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2098 isSubPropertyOf() is true if sub is an rdfs:subPropertyOf p (transitive)
2099 for the given query q. If two predicates are connected through
2100 rdfs:subPropertyOf, they belong to the same `cloud'. The cloud keeps one
2101 or more bitmatrices with the entailment of all rdfs:subPropertyOf
2102 triples. Each bitmatrix is valid during a certain lifespan (set of
2103 generations).
2104
2105 isSubPropertyOf() runs concurrently with updates and must be careful in
2106 its processing to deal with the modifications realised by
2107 addSubPropertyOf() and delSubPropertyOf(). The critical path is if
2108 addSubPropertyOf() connects two clouds, both having multiple predicates
2109 and both clouds have triples.
2110
2111 It is solved as follows. Suppose cloud C2 is merged into cloud C1, we
2112 take the following steps:
2113
2114 - The predicates from C2 are added at the end of the ->members of C1.
2115 C1->size is updated.
2116 - This has no consequences for running queries that need the old
2117 entailment of the subPropertyOf anyway.
2118 - The cloud C2 gets ->merged_into set to C1
2119 - The cloud of a predicate is reached by following the ->merged_into
2120 chain. If such a link is followed, predicate->label (the index in
2121 the predicate cloud) is invalid and we must compute it.
2122 - For each member of C2
2123 - update <-label to the label in C1
2124 update <-cloud to C1
2125 - Leave C2 to Boehm-GC
2126 - Add the hash-key of C2 to the alt-hashes of C1. Queries that
2127 involve sub-property on C1 must re-run the query with each
2128 alt-hash for that has a predicate that is a sub-property of
2129 the target. TBD: find a good compromise between computing and
2130 storing yet additional closures.
2131 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2132
2133 static predicate_cloud *
cloud_of(predicate * p,int * labelp)2134 cloud_of(predicate *p, int *labelp)
2135 { predicate_cloud *pc = p->cloud;
2136 int i;
2137
2138 if ( !pc->merged_into )
2139 { *labelp = p->label;
2140 return pc;
2141 }
2142
2143 while(!pc->merged_into)
2144 pc = pc->merged_into;
2145
2146 for(i=0; i<pc->size; i++)
2147 { if ( pc->members[i] == p )
2148 { *labelp = i;
2149 return pc;
2150 }
2151 }
2152
2153 assert(0);
2154 return 0;
2155 }
2156
2157
2158 static int
isSubPropertyOf(rdf_db * db,predicate * sub,predicate * p,query * q)2159 isSubPropertyOf(rdf_db *db, predicate *sub, predicate *p, query *q)
2160 { predicate_cloud *pc;
2161 int sub_label, p_label;
2162
2163 assert(sub != p);
2164
2165 pc = cloud_of(sub, &sub_label);
2166 if ( pc == cloud_of(p, &p_label) )
2167 { sub_p_matrix *rm;
2168 int max_label = (sub_label > p_label ? sub_label : p_label);
2169
2170 for(rm=pc->reachable; rm; rm=rm->older)
2171 { if ( alive_lifespan(q, &rm->lifespan) &&
2172 max_label < rm->matrix->width )
2173 return testbit(rm->matrix, sub_label, p_label);
2174 }
2175
2176 if ( (rm = create_reachability_matrix(db, pc, q)) )
2177 { assert(alive_lifespan(q, &rm->lifespan));
2178 return testbit(rm->matrix, sub_label, p_label);
2179 } else
2180 assert(0);
2181 }
2182
2183 return FALSE;
2184 }
2185
2186
2187 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2188 is_leaf_predicate() is true if p has no children in the
2189 rdfs:subPropertyOf tree at query q. We cache this information.
2190
2191 FIXME: Note that this code is subject to race conditions. If we want to
2192 avoid that without using locks, we must put the validity information in
2193 a separate object that is not modified.
2194 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2195
2196 static int
is_leaf_predicate(rdf_db * db,predicate * p,query * q)2197 is_leaf_predicate(rdf_db *db, predicate *p, query *q)
2198 { is_leaf *data;
2199 triple pattern;
2200 triple_walker tw;
2201 triple *t;
2202
2203 memset(&pattern, 0, sizeof(pattern));
2204
2205 for( data=p->is_leaf; data; data=data->older )
2206 { if ( alive_lifespan(q, &data->lifespan) )
2207 return data->is_leaf;
2208 }
2209
2210 data = rdf_malloc(db, sizeof(*data));
2211 init_valid_lifespan(db, &data->lifespan, q);
2212
2213 if ( (pattern.predicate.r = existing_predicate(db, ATOM_subPropertyOf)) )
2214 { pattern.object.resource = p->name;
2215
2216 init_triple_walker(&tw, db, &pattern, BY_PO);
2217 while((t=next_triple(&tw)))
2218 { triple *t2;
2219
2220 if ( (t2=matching_object_triple_until(db, t, &pattern, q, 0,
2221 &data->lifespan)) )
2222 { data->is_leaf = FALSE;
2223 break;
2224 } else
2225 data->is_leaf = TRUE;
2226 }
2227 } else /* rdfs:subPropertyOf doesn't exist */
2228 { data->is_leaf = TRUE; /* so all preds are leafs */
2229 }
2230
2231 simpleMutexLock(&db->locks.misc);
2232 data->older = p->is_leaf;
2233 MEMORY_BARRIER();
2234 p->is_leaf = data;
2235 simpleMutexUnlock(&db->locks.misc);
2236
2237 return data->is_leaf;
2238 }
2239
2240
2241 /* invalidate the is_leaf status if a sub-property is added/deleted.
2242 no need to do so if we add a child to a non-leaf.
2243 */
2244
2245 static void
invalidate_is_leaf(predicate * p,query * q,int add)2246 invalidate_is_leaf(predicate *p, query *q, int add)
2247 { gen_t gen_max = query_max_gen(q);
2248 is_leaf *il;
2249
2250 for(il=p->is_leaf; il; il=il->older)
2251 { if ( il->lifespan.died == gen_max )
2252 { if ( !(add && !il->is_leaf) )
2253 il->lifespan.died = queryWriteGen(q);
2254 }
2255 }
2256 }
2257
2258
2259 static void
gc_is_leaf(rdf_db * db,predicate * p,gen_t gen)2260 gc_is_leaf(rdf_db *db, predicate *p, gen_t gen)
2261 { is_leaf *il, *older;
2262 is_leaf *prev = NULL;
2263
2264 for(il = p->is_leaf; il; il=older)
2265 { older = il->older;
2266
2267 if ( il->lifespan.died < gen )
2268 { if ( prev )
2269 { prev->older = older;
2270 } else
2271 { simpleMutexLock(&db->locks.misc); /* sync with */
2272 p->is_leaf = older; /* is_leaf_predicate() */
2273 simpleMutexUnlock(&db->locks.misc);
2274 }
2275
2276 memset(&il->lifespan, 0, sizeof(il->lifespan));
2277 deferred_free(&db->defer_clouds, il);
2278 } else
2279 { prev = il;
2280 }
2281 }
2282 }
2283
2284
2285 static void
free_is_leaf(rdf_db * db,predicate * p)2286 free_is_leaf(rdf_db *db, predicate *p)
2287 { is_leaf *il, *older;
2288
2289 for(il = p->is_leaf; il; il=older)
2290 { older = il->older;
2291
2292 rdf_free(db, il, sizeof(*il));
2293 }
2294
2295 p->is_leaf = NULL;
2296 }
2297
2298
2299 /*******************************
2300 * PRINT PREDICATE HIERARCHY *
2301 *******************************/
2302
2303 static int
check_predicate_cloud(predicate_cloud * c)2304 check_predicate_cloud(predicate_cloud *c)
2305 { predicate **pp;
2306 int errors = 0;
2307 int i;
2308
2309 for(i=0, pp=c->members; i<c->size; i++, pp++)
2310 { predicate *p = *pp;
2311
2312 if ( p->label != i )
2313 { Sdprintf("Wrong label for %s (%d != %d\n", pname(p), i, p->label);
2314 errors++;
2315 }
2316 if ( p->hash != c->hash )
2317 { Sdprintf("Hash of %s doesn't match cloud hash\n", pname(p));
2318 errors++; /* this is now normal! */
2319 }
2320 if ( p->cloud != c )
2321 { Sdprintf("Wrong cloud of %s\n", pname(p));
2322 errors++;
2323 }
2324 }
2325
2326 return errors;
2327 }
2328
2329
2330 static void
print_reachability_cloud(rdf_db * db,predicate * p,int all)2331 print_reachability_cloud(rdf_db *db, predicate *p, int all)
2332 { int x, y;
2333 predicate_cloud *cloud = p->cloud;
2334 sub_p_matrix *rm;
2335 query *q;
2336
2337 Sdprintf("Cloud has %d members, hash = 0x%x\n", cloud->size, cloud->hash);
2338 check_predicate_cloud(cloud);
2339
2340 if ( !(q = open_query(db)) )
2341 { Sdprintf("No more open queries\n");
2342 return;
2343 }
2344
2345 for(rm=cloud->reachable; rm; rm=rm->older)
2346 { char b[2][24];
2347
2348 if ( !all && !alive_lifespan(q, &rm->lifespan) )
2349 continue;
2350
2351 Sdprintf("\nReachability matrix: %s..%s (%s)\n ",
2352 gen_name(rm->lifespan.born, b[0]),
2353 gen_name(rm->lifespan.died, b[1]),
2354 alive_lifespan(q, &rm->lifespan) ? "alive" : "dead");
2355
2356 for(x=0; x<rm->matrix->width; x++)
2357 Sdprintf("%d", x%10);
2358 Sdprintf("\n ");
2359 for(y=0; y<rm->matrix->heigth; y++)
2360 { predicate *yp = cloud->members[y];
2361
2362 for(x=0; x<rm->matrix->width; x++)
2363 { if ( testbit(rm->matrix, x, y) )
2364 Sdprintf("X");
2365 else
2366 Sdprintf(".");
2367 }
2368
2369 if ( predicate_hash(yp) == cloud->hash )
2370 Sdprintf(" %2d %s\n ", y, pname(yp));
2371 else
2372 Sdprintf(" %2d %s (hash=0x%x)\n ", y, pname(yp), predicate_hash(yp));
2373 assert(cloud->members[y]->label == y);
2374 }
2375 }
2376 close_query(q);
2377 }
2378
2379
2380 static foreign_t
rdf_print_predicate_cloud(term_t t,term_t all)2381 rdf_print_predicate_cloud(term_t t, term_t all)
2382 { predicate *p;
2383 rdf_db *db = rdf_current_db();
2384 int print_all;
2385
2386 if ( !get_existing_predicate(db, t, &p) ||
2387 !PL_get_bool_ex(all, &print_all) )
2388 return FALSE; /* error or no predicate */
2389
2390 print_reachability_cloud(db, p, print_all);
2391
2392 return TRUE;
2393 }
2394
2395
2396
2397 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2398 Branching factors are crucial in ordering the statements of a
2399 conjunction. These functions compute the average branching factor in
2400 both directions ("subject --> P --> object" and "object --> P -->
2401 subject") by determining the number of unique values at either side of
2402 the predicate. This number is only recomputed if it is considered
2403 `dirty'.
2404 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2405
2406 static int
update_predicate_counts(rdf_db * db,predicate * p,int which,query * q)2407 update_predicate_counts(rdf_db *db, predicate *p, int which, query *q)
2408 { size_t total = 0;
2409
2410 if ( which == DISTINCT_DIRECT )
2411 { size_t changed;
2412
2413 if ( p->triple_count >= p->distinct_updated[DISTINCT_DIRECT] )
2414 changed = p->triple_count - p->distinct_updated[DISTINCT_DIRECT];
2415 else
2416 changed = p->distinct_updated[DISTINCT_DIRECT] - p->triple_count;
2417
2418 if ( changed < p->distinct_updated[DISTINCT_DIRECT] )
2419 return TRUE;
2420
2421 if ( p->triple_count == 0 )
2422 { p->distinct_count[which] = 0;
2423 p->distinct_subjects[which] = 0;
2424 p->distinct_objects[which] = 0;
2425
2426 return TRUE;
2427 }
2428 } else
2429 { size_t changed = db->queries.generation - p->distinct_updated[DISTINCT_SUB];
2430
2431 if ( changed < p->distinct_count[DISTINCT_SUB] )
2432 return TRUE;
2433 }
2434
2435 { atomset subject_set;
2436 atomset object_set;
2437 triple t;
2438 triple *byp;
2439 triple_walker tw;
2440
2441 memset(&t, 0, sizeof(t));
2442 t.predicate.r = p;
2443 t.indexed |= BY_P;
2444
2445 init_atomset(&subject_set);
2446 init_atomset(&object_set);
2447 init_triple_walker(&tw, db, &t, t.indexed);
2448 while((byp=next_triple(&tw)))
2449 { if ( byp->lifespan.died == GEN_MAX && !byp->is_duplicate )
2450 { if ( byp->predicate.r == p ||
2451 (which != DISTINCT_DIRECT &&
2452 isSubPropertyOf(db, byp->predicate.r, p, q)) )
2453 { total++;
2454 add_atomset(&subject_set, ID_ATOM(byp->subject_id));
2455 add_atomset(&object_set, object_hash(byp)); /* NOTE: not exact! */
2456 }
2457 }
2458 }
2459
2460 p->distinct_count[which] = total;
2461 p->distinct_subjects[which] = subject_set.count;
2462 p->distinct_objects[which] = object_set.count;
2463
2464 destroy_atomset(&subject_set);
2465 destroy_atomset(&object_set);
2466
2467 if ( which == DISTINCT_DIRECT )
2468 p->distinct_updated[DISTINCT_DIRECT] = total;
2469 else
2470 p->distinct_updated[DISTINCT_SUB] = db->queries.generation;
2471
2472 DEBUG(1, Sdprintf("%s: distinct subjects (%s): %ld, objects: %ld\n",
2473 PL_atom_chars(p->name),
2474 (which == DISTINCT_DIRECT ? "rdf" : "rdfs"),
2475 p->distinct_subjects[which],
2476 p->distinct_objects[which]));
2477 }
2478
2479 return TRUE;
2480 }
2481
2482
2483 static void
invalidate_distinct_counts(rdf_db * db)2484 invalidate_distinct_counts(rdf_db *db)
2485 { int i;
2486
2487 for(i=0; i<db->predicates.bucket_count; i++)
2488 { predicate *p = db->predicates.blocks[MSB(i)][i];
2489
2490 for( ; p; p = p->next )
2491 { p->distinct_updated[DISTINCT_SUB] = 0;
2492 p->distinct_count[DISTINCT_SUB] = 0;
2493 p->distinct_subjects[DISTINCT_SUB] = 0;
2494 p->distinct_objects[DISTINCT_SUB] = 0;
2495 }
2496 }
2497 }
2498
2499
2500 static double
subject_branch_factor(rdf_db * db,predicate * p,query * q,int which)2501 subject_branch_factor(rdf_db *db, predicate *p, query *q, int which)
2502 { if ( !update_predicate_counts(db, p, which, q) )
2503 return FALSE;
2504
2505 if ( p->distinct_subjects[which] == 0 )
2506 return 0.0; /* 0 --> 0 */
2507
2508 return (double)p->distinct_count[which] /
2509 (double)p->distinct_subjects[which];
2510 }
2511
2512
2513 static double
object_branch_factor(rdf_db * db,predicate * p,query * q,int which)2514 object_branch_factor(rdf_db *db, predicate *p, query *q, int which)
2515 { if ( !update_predicate_counts(db, p, which, q) )
2516 return FALSE;
2517
2518 if ( p->distinct_objects[which] == 0 )
2519 return 0.0; /* 0 --> 0 */
2520
2521 return (double)p->distinct_count[which] /
2522 (double)p->distinct_objects[which];
2523 }
2524
2525
2526
2527
2528 /*******************************
2529 * NAMED GRAPHS *
2530 *******************************/
2531
2532 /* MT: all calls must be locked
2533 */
2534
2535 static int
init_graph_table(rdf_db * db)2536 init_graph_table(rdf_db *db)
2537 { size_t bytes = sizeof(graph**)*INITIAL_GRAPH_TABLE_SIZE;
2538 graph **p = PL_malloc_uncollectable(bytes);
2539 int i, count = INITIAL_GRAPH_TABLE_SIZE;
2540
2541 memset(p, 0, bytes);
2542 for(i=0; i<MSB(count); i++)
2543 db->graphs.blocks[i] = p;
2544
2545 db->graphs.bucket_count = count;
2546 db->graphs.bucket_count_epoch = count;
2547 db->graphs.count = 0;
2548 db->graphs.erased = 0;
2549
2550 return TRUE;
2551 }
2552
2553
2554 static int
resize_graph_table(rdf_db * db)2555 resize_graph_table(rdf_db *db)
2556 { int i = MSB(db->graphs.bucket_count);
2557 size_t bytes = sizeof(graph**)*db->graphs.bucket_count;
2558 graph **p = PL_malloc_uncollectable(bytes);
2559
2560 memset(p, 0, bytes);
2561 db->graphs.blocks[i] = p-db->graphs.bucket_count;
2562 db->graphs.bucket_count *= 2;
2563 DEBUG(1, Sdprintf("Resized graph table to %ld\n",
2564 (long)db->graphs.bucket_count));
2565
2566 return TRUE;
2567 }
2568
2569
2570 typedef struct graph_walker
2571 { rdf_db *db; /* RDF DB */
2572 atom_t name; /* Name of the graph */
2573 size_t unbounded_hash; /* Atom's hash */
2574 size_t bcount; /* current bucket count */
2575 graph *current; /* current location */
2576 } graph_walker;
2577
2578
2579 static void
init_graph_walker(graph_walker * gw,rdf_db * db,atom_t name)2580 init_graph_walker(graph_walker *gw, rdf_db *db, atom_t name)
2581 { gw->db = db;
2582 gw->name = name;
2583 gw->unbounded_hash = atom_hash(name, MURMUR_SEED);
2584 gw->bcount = db->graphs.bucket_count_epoch;
2585 gw->current = NULL;
2586 }
2587
2588 static graph*
next_graph(graph_walker * gw)2589 next_graph(graph_walker *gw)
2590 { graph *g;
2591
2592 if ( gw->current )
2593 { g = gw->current;
2594 gw->current = g->next;
2595 } else if ( gw->bcount <= gw->db->graphs.bucket_count )
2596 { do
2597 { int entry = gw->unbounded_hash % gw->bcount;
2598 g = gw->db->graphs.blocks[MSB(entry)][entry];
2599 gw->bcount *= 2;
2600 } while(!g && gw->bcount <= gw->db->graphs.bucket_count );
2601
2602 if ( g )
2603 gw->current = g->next;
2604 } else
2605 return NULL;
2606
2607 return g;
2608 }
2609
2610
2611 static graph *
existing_graph(rdf_db * db,atom_t name)2612 existing_graph(rdf_db *db, atom_t name)
2613 { graph_walker gw;
2614 graph *g;
2615
2616 init_graph_walker(&gw, db, name);
2617 while((g=next_graph(&gw)))
2618 { if ( g->name == name )
2619 return g;
2620 }
2621
2622 return g;
2623 }
2624
2625
2626 static graph *
lookup_graph(rdf_db * db,atom_t name)2627 lookup_graph(rdf_db *db, atom_t name)
2628 { graph *g, **gp;
2629 int entry;
2630
2631 if ( (g=existing_graph(db, name)) && !g->erased )
2632 return g;
2633
2634 LOCK_MISC(db);
2635 if ( (g=existing_graph(db, name)) )
2636 { if ( g->erased )
2637 { memset(g->digest, 0, sizeof(g->digest));
2638 memset(g->unmodified_digest, 0, sizeof(g->unmodified_digest));
2639 g->md5 = TRUE;
2640 g->erased = FALSE;
2641 db->graphs.erased--;
2642 }
2643
2644 UNLOCK_MISC(db);
2645 return g;
2646 }
2647
2648 g = rdf_malloc(db, sizeof(*g));
2649 memset(g, 0, sizeof(*g));
2650 g->name = name;
2651 g->md5 = TRUE;
2652 PL_register_atom(name);
2653 if ( db->graphs.count > db->graphs.bucket_count )
2654 resize_graph_table(db);
2655 entry = atom_hash(name, MURMUR_SEED) % db->graphs.bucket_count;
2656 gp = &db->graphs.blocks[MSB(entry)][entry];
2657 g->next = *gp;
2658 *gp = g;
2659 db->graphs.count++;
2660 UNLOCK_MISC(db);
2661
2662 return g;
2663 }
2664
2665
2666 static void
erase_graphs(rdf_db * db)2667 erase_graphs(rdf_db *db)
2668 { int i;
2669
2670 for(i=0; i<db->graphs.bucket_count; i++)
2671 { graph *n, *g = db->graphs.blocks[MSB(i)][i];
2672
2673 db->graphs.blocks[MSB(i)][i] = NULL;
2674
2675 for( ; g; g = n )
2676 { n = g->next;
2677
2678 PL_unregister_atom(g->name);
2679 if ( g->source )
2680 PL_unregister_atom(g->source);
2681 rdf_free(db, g, sizeof(*g));
2682 }
2683 }
2684
2685 db->graphs.count = 0;
2686 db->graphs.erased = 0;
2687 db->last_graph = NULL;
2688 }
2689
2690
2691 static int
gc_graphs(rdf_db * db,gen_t gen)2692 gc_graphs(rdf_db *db, gen_t gen)
2693 { int reclaimed = 0;
2694
2695 if ( db->graphs.erased > 10 + db->graphs.count/2 )
2696 { int i;
2697
2698 LOCK_MISC(db);
2699 for(i=0; i<db->graphs.bucket_count; i++)
2700 { graph *p, *n, *g;
2701
2702 p = NULL;
2703 g = db->graphs.blocks[MSB(i)][i];
2704
2705 for( ; g; g = n )
2706 { n = g->next;
2707
2708 if ( g->erased && g->triple_count == 0 )
2709 { if ( p )
2710 p->next = g->next;
2711 else
2712 db->graphs.blocks[MSB(i)][i] = g->next;
2713
2714 if ( db->last_graph == g )
2715 db->last_graph = NULL;
2716 db->graphs.count--;
2717 db->graphs.erased--;
2718 reclaimed++;
2719 deferred_finalize(&db->defer_all, g,
2720 finalize_graph, db);
2721 } else
2722 p = g;
2723 }
2724 }
2725 UNLOCK_MISC(db);
2726 }
2727
2728 return reclaimed;
2729 }
2730
2731
2732 static void
register_graph(rdf_db * db,triple * t)2733 register_graph(rdf_db *db, triple *t)
2734 { graph *src;
2735
2736 if ( !t->graph_id )
2737 return;
2738
2739 if ( !((src=db->last_graph) && src->name == ID_ATOM(t->graph_id)) )
2740 { src = lookup_graph(db, ID_ATOM(t->graph_id));
2741 db->last_graph = src;
2742 }
2743
2744 ATOMIC_ADD(&src->triple_count, 1);
2745 #ifdef WITH_MD5
2746 if ( src->md5 )
2747 { md5_byte_t digest[16];
2748 md5_triple(t, digest);
2749 sum_digest(src->digest, digest);
2750 }
2751 #endif
2752 }
2753
2754
2755 static void
unregister_graph(rdf_db * db,triple * t)2756 unregister_graph(rdf_db *db, triple *t)
2757 { graph *src;
2758
2759 if ( !t->graph_id )
2760 return;
2761
2762 if ( db->last_graph && db->last_graph->name == ID_ATOM(t->graph_id) )
2763 { src = db->last_graph;
2764 } else
2765 { src = existing_graph(db, ID_ATOM(t->graph_id));
2766 }
2767
2768 if ( src )
2769 { ATOMIC_SUB(&src->triple_count, 1);
2770 #ifdef WITH_MD5
2771 if ( src->md5 )
2772 { md5_byte_t digest[16];
2773 md5_triple(t, digest);
2774 dec_digest(src->digest, digest);
2775 }
2776 #endif
2777 }
2778 }
2779
2780
2781 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2782 rdf_graph_(?Graph, ?TripleCount) is nondet.
2783
2784 True when Graph is a current graph with TripleCount triples.
2785 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2786
2787 typedef struct enum_graph
2788 { graph *g;
2789 int i;
2790 } enum_graph;
2791
2792
2793 static graph *
advance_graph_enum(rdf_db * db,enum_graph * eg)2794 advance_graph_enum(rdf_db *db, enum_graph *eg)
2795 { if ( eg->g )
2796 eg->g = eg->g->next;
2797
2798 while ( !eg->g || (eg->g->erased && eg->g->triple_count == 0) )
2799 { if ( !eg->g )
2800 { while ( ++eg->i < db->graphs.bucket_count &&
2801 !(eg->g = db->graphs.blocks[MSB(eg->i)][eg->i]) )
2802 ;
2803 if ( !eg->g )
2804 return NULL;
2805 } else
2806 eg->g = eg->g->next;
2807 }
2808
2809 return eg->g;
2810 }
2811
2812
2813 static foreign_t
rdf_graph(term_t name,term_t triple_count,control_t h)2814 rdf_graph(term_t name, term_t triple_count, control_t h)
2815 { rdf_db *db = rdf_current_db();
2816 enum_graph *eg;
2817 atom_t a;
2818
2819 switch( PL_foreign_control(h) )
2820 { case PL_FIRST_CALL:
2821 if ( PL_is_variable(name) )
2822 { eg = rdf_malloc(db, sizeof(*eg));
2823 eg->i = -1;
2824 eg->g = NULL;
2825 advance_graph_enum(db, eg);
2826 goto next;
2827 } else if ( PL_get_atom_ex(name, &a) )
2828 { graph *g;
2829
2830 if ( (g=existing_graph(db, a)) && !(g->erased && g->triple_count == 0) )
2831 return PL_unify_int64(triple_count, g->triple_count);
2832 }
2833 return FALSE;
2834 case PL_REDO:
2835 eg = PL_foreign_context_address(h);
2836 goto next;
2837 case PL_PRUNED:
2838 eg = PL_foreign_context_address(h);
2839 rdf_free(db, eg, sizeof(*eg));
2840 return TRUE;
2841 default:
2842 assert(0);
2843 return FALSE;
2844 }
2845
2846 next:
2847 if ( !eg->g ||
2848 !PL_unify_atom(name, eg->g->name) ||
2849 !PL_unify_int64(triple_count, eg->g->triple_count) )
2850 { rdf_free(db, eg, sizeof(*eg));
2851 return FALSE;
2852 }
2853
2854 if ( advance_graph_enum(db, eg) )
2855 { PL_retry_address(eg);
2856 } else
2857 { rdf_free(db, eg, sizeof(*eg));
2858 return TRUE;
2859 }
2860 }
2861
2862
2863 static foreign_t
rdf_graph_source(term_t graph_name,term_t source,term_t modified)2864 rdf_graph_source(term_t graph_name, term_t source, term_t modified)
2865 { atom_t gn;
2866 rdf_db *db = rdf_current_db();
2867
2868 if ( !get_atom_or_var_ex(graph_name, &gn) )
2869 return FALSE;
2870
2871 if ( gn )
2872 { graph *s;
2873
2874 if ( (s = existing_graph(db, gn)) &&
2875 !(s->erased && s->triple_count == 0) &&
2876 s->source)
2877 { return ( PL_unify_atom(source, s->source) &&
2878 PL_unify_float(modified, s->modified) );
2879 }
2880 } else
2881 { atom_t src;
2882
2883 if ( PL_get_atom_ex(source, &src) )
2884 { int i;
2885
2886 for(i=0; i<db->graphs.bucket_count; i++)
2887 { graph *g = db->graphs.blocks[MSB(i)][i];
2888
2889 for(; g; g=g->next)
2890 { if ( g->source == src )
2891 { return ( PL_unify_atom(graph_name, g->name) &&
2892 PL_unify_float(modified, g->modified) );
2893 }
2894 }
2895 }
2896 }
2897 }
2898
2899 return FALSE;
2900 }
2901
2902
2903 static foreign_t
rdf_set_graph_source(term_t graph_name,term_t source,term_t modified)2904 rdf_set_graph_source(term_t graph_name, term_t source, term_t modified)
2905 { atom_t gn, src;
2906 int rc = FALSE;
2907 rdf_db *db = rdf_current_db();
2908 graph *s;
2909 double mtime;
2910
2911 if ( !PL_get_atom_ex(graph_name, &gn) ||
2912 !PL_get_atom_ex(source, &src) ||
2913 !PL_get_float_ex(modified, &mtime) )
2914 return FALSE;
2915
2916 if ( (s = lookup_graph(db, gn)) )
2917 { LOCK_MISC(db);
2918 if ( s->source != src )
2919 { if ( s->source )
2920 PL_unregister_atom(s->source);
2921 s->source = src;
2922 PL_register_atom(s->source);
2923 }
2924 s->modified = mtime;
2925 UNLOCK_MISC(db);
2926 rc = TRUE;
2927 }
2928
2929 return rc;
2930 }
2931
2932
2933 static foreign_t
rdf_create_graph(term_t graph_name)2934 rdf_create_graph(term_t graph_name)
2935 { atom_t gn;
2936 rdf_db *db = rdf_current_db();
2937 graph *g;
2938
2939 if ( !PL_get_atom_ex(graph_name, &gn) )
2940 return FALSE;
2941
2942 if ( (g = existing_graph(db, gn)) && !g->erased )
2943 return TRUE; /* already exists */
2944 if ( (g = lookup_graph(db, gn)) )
2945 { rdf_broadcast(EV_CREATE_GRAPH, g, NULL);
2946
2947 return TRUE;
2948 }
2949
2950 return FALSE;
2951 }
2952
2953
2954 static void
clean_atom(atom_t * ap)2955 clean_atom(atom_t *ap)
2956 { atom_t old;
2957
2958 if ( (old=*ap) )
2959 { *ap = 0;
2960 PL_unregister_atom(old);
2961 }
2962 }
2963
2964
2965 static void
finalize_graph(void * mem,void * clientdata)2966 finalize_graph(void *mem, void *clientdata)
2967 { graph *g = mem;
2968 (void)clientdata;
2969
2970 clean_atom(&g->name);
2971 }
2972
2973
2974 static foreign_t
rdf_destroy_graph(term_t graph_name)2975 rdf_destroy_graph(term_t graph_name)
2976 { atom_t gn;
2977 rdf_db *db = rdf_current_db();
2978 graph *g;
2979
2980 if ( !PL_get_atom_ex(graph_name, &gn) )
2981 return FALSE;
2982
2983 if ( (g = existing_graph(db, gn)) )
2984 { LOCK_MISC(db);
2985 g->md5 = FALSE;
2986 memset(g->digest, 0, sizeof(g->digest));
2987 memset(g->unmodified_digest, 0, sizeof(g->unmodified_digest));
2988 clean_atom(&g->source);
2989 g->modified = 0.0;
2990 g->erased = TRUE;
2991 db->graphs.erased++;
2992 if ( db->last_graph == g )
2993 db->last_graph = NULL;
2994 UNLOCK_MISC(db);
2995 }
2996
2997 return TRUE;
2998 }
2999
3000
3001 #ifdef WITH_MD5
3002 /** rdf_graph_modified_(+Graph, -IsModified, -UnmodifiedHash)
3003
3004 True when IsModified reflects the modified status relative to the
3005 `unmodified' digest.
3006 */
3007
3008 static foreign_t
rdf_graph_modified_(term_t graph_name,term_t ismodified,term_t hash)3009 rdf_graph_modified_(term_t graph_name, term_t ismodified, term_t hash)
3010 { atom_t gn;
3011 rdf_db *db = rdf_current_db();
3012 graph *g;
3013 int rc;
3014
3015 if ( !PL_get_atom_ex(graph_name, &gn) )
3016 return FALSE;
3017
3018 if ( (g = lookup_graph(db, gn)) )
3019 { int ismod = (memcmp(g->digest, g->unmodified_digest, 16) != 0);
3020
3021 rc = ( PL_unify_bool(ismodified, ismod) &&
3022 md5_unify_digest(hash, g->unmodified_digest)
3023 );
3024 } else
3025 rc = FALSE;
3026
3027 return rc;
3028 }
3029
3030
3031 static int
clear_modified(graph * g)3032 clear_modified(graph *g)
3033 { if ( g->md5 )
3034 { memcpy(g->unmodified_digest, g->digest, 16);
3035 return TRUE;
3036 }
3037
3038 return FALSE;
3039 }
3040
3041
3042 static foreign_t
rdf_graph_clear_modified_(term_t graph_name)3043 rdf_graph_clear_modified_(term_t graph_name)
3044 { atom_t gn;
3045 rdf_db *db = rdf_current_db();
3046 graph *g;
3047
3048 if ( !PL_get_atom_ex(graph_name, &gn) )
3049 return FALSE;
3050
3051 if ( (g = lookup_graph(db, gn)) )
3052 return clear_modified(g);
3053
3054 return FALSE;
3055 }
3056
3057
3058 #endif /*WITH_MD5*/
3059
3060
3061 /*******************************
3062 * LITERALS *
3063 *******************************/
3064
3065 static inline void
prepare_literal_ex(literal_ex * lex)3066 prepare_literal_ex(literal_ex *lex)
3067 {
3068 #ifdef LITERAL_EX_MAGIC
3069 lex->magic = LITERAL_EX_MAGIC;
3070 #endif
3071
3072 if ( lex->literal->objtype == OBJ_STRING )
3073 { lex->atom.handle = lex->literal->value.string;
3074 lex->atom.resolved = FALSE;
3075 }
3076 }
3077
3078
3079 static literal *
new_literal(rdf_db * db)3080 new_literal(rdf_db *db)
3081 { literal *lit = rdf_malloc(db, sizeof(*lit));
3082 memset(lit, 0, sizeof(*lit));
3083 lit->references = 1;
3084
3085 return lit;
3086 }
3087
3088
3089 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3090 free_literal_value() gets rid of atoms or term that forms the value of
3091 the literal. We cannot dispose of these immediately as they might be
3092 needed by an ongoing scan of the literal skiplist for comparison.
3093 Therefore, we use deferred_finalize() and dispose of the triple later.
3094
3095 Return TRUE if the triple value could be distroyed and FALSE if the
3096 destruction has been deferred. That will eventually call
3097 finalize_literal_ptr(), which calls free_literal_value() again, but now
3098 as not shared literal so it can do its work unconditionally.
3099 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3100
3101 static void
finalize_literal_ptr(void * mem,void * clientdata)3102 finalize_literal_ptr(void *mem, void *clientdata)
3103 { literal **litp = mem;
3104 rdf_db *db = clientdata;
3105 literal *lit = *litp;
3106
3107 free_literal_value(db, lit);
3108 rdf_free(db, lit, sizeof(*lit));
3109 }
3110
3111
3112 static literal **
unlink_literal(rdf_db * db,literal * lit)3113 unlink_literal(rdf_db *db, literal *lit)
3114 { if ( lit->shared && !db->resetting )
3115 { literal_ex lex;
3116 literal **data;
3117
3118 lit->shared = FALSE;
3119 DEBUG(2,
3120 Sdprintf("Delete %p from literal table: ", lit);
3121 print_literal(lit);
3122 Sdprintf("\n"));
3123
3124 lex.literal = lit;
3125 prepare_literal_ex(&lex);
3126
3127 if ( (data=skiplist_delete(&db->literals, &lex)) )
3128 { return data;
3129 } else
3130 { Sdprintf("Failed to delete %p (size=%ld): ", lit, db->literals.count);
3131 print_literal(lit);
3132 Sdprintf("\n");
3133 assert(0);
3134 }
3135 }
3136
3137 return NULL;
3138 }
3139
3140
3141 static void
free_literal_value(rdf_db * db,literal * lit)3142 free_literal_value(rdf_db *db, literal *lit)
3143 { unlock_atoms_literal(lit);
3144 if ( lit->objtype == OBJ_TERM &&
3145 lit->value.term.record )
3146 { if ( lit->term_loaded )
3147 rdf_free(db, lit->value.term.record, lit->value.term.len);
3148 else
3149 PL_erase_external(lit->value.term.record);
3150 }
3151 lit->objtype = OBJ_UNTYPED; /* debugging: trap errors early */
3152 }
3153
3154
3155 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3156 free_literal() frees a literal, normally referenced from a triple. The
3157 triple may be shared or not. Triples that are part of the database are
3158 always shared. Unshared triples are typically search patterns, or
3159 created triples that are deleted because some part of the operation
3160 fails.
3161 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3162
3163 static int
free_literal(rdf_db * db,literal * lit)3164 free_literal(rdf_db *db, literal *lit)
3165 { int rc = TRUE;
3166
3167 if ( lit->shared )
3168 { simpleMutexLock(&db->locks.literal);
3169 if ( --lit->references == 0 )
3170 { literal **data = unlink_literal(db, lit);
3171 simpleMutexUnlock(&db->locks.literal);
3172
3173 if ( data ) /* unlinked */
3174 { rc = rdf_broadcast(EV_OLD_LITERAL, lit, NULL);
3175 deferred_finalize(&db->defer_literals, data,
3176 finalize_literal_ptr, db);
3177 } else
3178 { free_literal_value(db, lit);
3179 rdf_free(db, lit, sizeof(*lit));
3180 }
3181 } else
3182 { simpleMutexUnlock(&db->locks.literal);
3183 }
3184 } else /* not shared; no locking needed */
3185 { if ( --lit->references == 0 )
3186 { free_literal_value(db, lit);
3187 rdf_free(db, lit, sizeof(*lit));
3188 }
3189 }
3190
3191 return rc;
3192 }
3193
3194
3195 static literal *
copy_literal(rdf_db * db,literal * lit)3196 copy_literal(rdf_db *db, literal *lit)
3197 { lit->references++;
3198 assert(lit->references != 0);
3199 return lit;
3200 }
3201
3202
3203 static void
alloc_literal_triple(rdf_db * db,triple * t)3204 alloc_literal_triple(rdf_db *db, triple *t)
3205 { if ( !t->object_is_literal )
3206 { t->object.literal = new_literal(db);
3207 t->object_is_literal = TRUE;
3208 }
3209 }
3210
3211
3212 static void
lock_atoms_literal(literal * lit)3213 lock_atoms_literal(literal *lit)
3214 { if ( !lit->atoms_locked )
3215 { lit->atoms_locked = TRUE;
3216
3217 switch(lit->objtype)
3218 { case OBJ_STRING:
3219 PL_register_atom(lit->value.string);
3220 if ( lit->qualifier )
3221 PL_register_atom(ID_ATOM(lit->type_or_lang));
3222 break;
3223 }
3224 }
3225 }
3226
3227
3228 static void
unlock_atoms_literal(literal * lit)3229 unlock_atoms_literal(literal *lit)
3230 { if ( lit->atoms_locked )
3231 { lit->atoms_locked = FALSE;
3232
3233 switch(lit->objtype)
3234 { case OBJ_STRING:
3235 PL_unregister_atom(lit->value.string);
3236 if ( lit->qualifier )
3237 PL_unregister_atom(ID_ATOM(lit->type_or_lang));
3238 break;
3239 }
3240 }
3241 }
3242
3243
3244 /*******************************
3245 * LITERAL DB *
3246 *******************************/
3247
3248 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3249 compare_literals() sorts literals. Ordering is defined as:
3250
3251 * Numeric literals < string literals < term literals
3252 * Numeric literals (int and float) are sorted by value
3253 * String literals are sorted alhabetically
3254 - case independent, but uppercase before lowercase
3255 - locale (strcoll) sorting?
3256 - delete dyadrics
3257 - first on string, then on type, then on language
3258 * Terms are sorted on Prolog standard order of terms
3259 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3260
3261 static int
cmp_qualifier(const literal * l1,const literal * l2)3262 cmp_qualifier(const literal *l1, const literal *l2)
3263 { int q1 = l1->qualifier;
3264 int q2 = l2->qualifier;
3265 atom_t tl1 = ID_ATOM(l1->type_or_lang);
3266 atom_t tl2 = ID_ATOM(l2->type_or_lang);
3267
3268 if ( q1 == Q_NONE )
3269 { q1 = Q_TYPE;
3270 tl1 = ATOM_XSDString;
3271 }
3272 if ( q2 == Q_NONE )
3273 { q2 = Q_TYPE;
3274 tl2 = ATOM_XSDString;
3275 }
3276
3277 if ( q1 == q2 )
3278 { if ( tl1 )
3279 return cmp_atoms(tl1, tl2);
3280 return -1;
3281 }
3282
3283 return q1 - q2;
3284 }
3285
3286 static xsd_primary
is_numerical_string(const literal * lit)3287 is_numerical_string(const literal *lit)
3288 { if ( lit->objtype == OBJ_STRING &&
3289 lit->qualifier == Q_TYPE )
3290 return is_numeric_type(ID_ATOM(lit->type_or_lang));
3291
3292 return XSD_NONNUMERIC;
3293 }
3294
3295
3296 static int
same_type(atom_id id1,atom_id id2)3297 same_type(atom_id id1, atom_id id2)
3298 { if ( id1 == id2 )
3299 return TRUE;
3300 if ( id2 == 0 && ID_ATOM(id1) == ATOM_XSDString )
3301 return TRUE;
3302 if ( id1 == 0 && ID_ATOM(id2) == ATOM_XSDString )
3303 return TRUE;
3304
3305 return FALSE;
3306 }
3307
3308
3309 static int
compare_literals(literal_ex * lex,literal * l2)3310 compare_literals(literal_ex *lex, literal *l2)
3311 { literal *l1 = lex->literal;
3312
3313 #ifdef LITERAL_EX_MAGIC
3314 assert(lex->magic == LITERAL_EX_MAGIC);
3315 #endif
3316
3317 if ( l1->objtype == l2->objtype )
3318 { int rc;
3319
3320 switch(l1->objtype)
3321 { case OBJ_INTEGER:
3322 { int64_t v1 = l1->value.integer;
3323 int64_t v2 = l2->value.integer;
3324 rc = v1 < v2 ? -1 : v1 > v2 ? 1 : 0;
3325 break;
3326 }
3327 case OBJ_DOUBLE:
3328 { double v1 = l1->value.real;
3329 double v2 = l2->value.real;
3330 rc = v1 < v2 ? -1 : v1 > v2 ? 1 : 0;
3331 break;
3332 }
3333 case OBJ_STRING:
3334 { if ( lex->atom.handle == l2->value.string &&
3335 same_type(l1->type_or_lang, l2->type_or_lang) )
3336 { rc = 0;
3337 } else
3338 { xsd_primary nt1 = is_numerical_string(l1);
3339 xsd_primary nt2 = is_numerical_string(l2);
3340
3341 if ( nt1 || nt2 )
3342 { if ( nt1 && nt2 )
3343 { rc = cmp_xsd_info(nt1, &lex->atom, nt2, l2->value.string);
3344 if ( rc == 0 && nt1 != nt2 )
3345 rc = nt1 < nt2 ? 1 : -1;
3346 } else
3347 { rc = nt1 ? -1 : 1;
3348 }
3349 } else
3350 { rc = cmp_atom_info(&lex->atom, l2->value.string);
3351 }
3352 }
3353 break;
3354 }
3355 case OBJ_TERM:
3356 { fid_t fid = PL_open_foreign_frame();
3357 term_t t1 = PL_new_term_ref();
3358 term_t t2 = PL_new_term_ref();
3359 /* can also be handled in literal_ex */
3360 PL_recorded_external(l1->value.term.record, t1);
3361 PL_recorded_external(l2->value.term.record, t2);
3362 rc = PL_compare(t1, t2);
3363
3364 PL_discard_foreign_frame(fid);
3365 break;
3366 }
3367 default:
3368 assert(0);
3369 return 0;
3370 }
3371
3372 if ( rc != 0 )
3373 return rc;
3374 return cmp_qualifier(l1, l2);
3375 } else if ( l1->objtype == OBJ_INTEGER && l2->objtype == OBJ_DOUBLE )
3376 { double v1 = (double)l1->value.integer;
3377 double v2 = l2->value.real;
3378 return v1 < v2 ? -1 : v1 > v2 ? 1 : -1;
3379 } else if ( l1->objtype == OBJ_DOUBLE && l2->objtype == OBJ_INTEGER )
3380 { double v1 = l1->value.real;
3381 double v2 = (double)l2->value.integer;
3382 return v1 < v2 ? -1 : v1 > v2 ? 1 : 1;
3383 } else
3384 { return l1->objtype - l2->objtype;
3385 }
3386 }
3387
3388 #ifdef SL_CHECK
3389 static int sl_checking = FALSE;
3390 #endif
3391
3392 static int
sl_compare_literals(void * p1,void * p2,void * cd)3393 sl_compare_literals(void *p1, void *p2, void *cd)
3394 { literal *l2 = *(literal**)p2;
3395 (void)cd;
3396
3397 #ifdef SL_CHECK
3398 if ( sl_checking )
3399 { literal *l1 = *(literal**)p1;
3400 literal_ex lex;
3401
3402 lex.literal = l1;
3403 prepare_literal_ex(&lex);
3404 return compare_literals(&lex, l2);
3405 } else
3406 #endif
3407 { literal_ex *lex = p1;
3408
3409 assert(l2->objtype != OBJ_UNTYPED);
3410 return compare_literals(lex, l2);
3411 }
3412 }
3413
3414
3415 #ifdef SL_CHECK
3416 static int
sl_check(rdf_db * db,int print)3417 sl_check(rdf_db *db, int print)
3418 { int rc = TRUE;
3419
3420 DEBUG(2, { assert(sl_checking == FALSE);
3421 sl_checking = TRUE;
3422 rc = skiplist_check(&db->literals, print);
3423 sl_checking = FALSE;
3424 });
3425
3426 return rc;
3427 }
3428 #else
3429 #define sl_check(db, print) (void)0
3430 #endif
3431
3432
3433 static void *
sl_rdf_malloc(size_t bytes,void * cd)3434 sl_rdf_malloc(size_t bytes, void *cd)
3435 { return rdf_malloc(cd, bytes);
3436 }
3437
3438
3439 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3440 Create the sorted literal tree. Note that we do not register a free
3441 handler for the tree as nodes are either already destroyed by
3442 free_literal() or by rdf_reset_db().
3443 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3444
3445 static int
init_literal_table(rdf_db * db)3446 init_literal_table(rdf_db *db)
3447 { skiplist_init(&db->literals,
3448 sizeof(literal*), /* Payload size */
3449 db, /* Client data */
3450 sl_compare_literals, /* Compare */
3451 sl_rdf_malloc, /* Allocate */
3452 NULL); /* Destroy */
3453
3454 return TRUE;
3455 }
3456
3457
3458 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3459 share_literal() takes a literal and replaces it with one from the
3460 literal database if there is a match. On a match, the argument literal
3461 is destroyed. Without a match it adds the literal to the database and
3462 returns it.
3463
3464 Called from add_triples() and update_triples() outside the locked areas.
3465 We must hold db->locks.literal for updating the literal database.
3466 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3467
3468 static literal *
share_literal(rdf_db * db,literal * from)3469 share_literal(rdf_db *db, literal *from)
3470 { literal **data, *shared;
3471 literal_ex lex;
3472 int is_new;
3473 static float existing = 0.0;
3474 static float new = 0.0;
3475
3476 if ( from->shared )
3477 return from; /* already shared */
3478
3479 lex.literal = from;
3480 prepare_literal_ex(&lex);
3481
3482 if ( existing*2 > new &&
3483 (data = skiplist_find(&db->literals, &lex)) )
3484 { simpleMutexLock(&db->locks.literal);
3485 existing = existing*0.99+1.0;
3486 if ( !skiplist_erased_payload(&db->literals, data) )
3487 { shared = *data;
3488 shared->references++;
3489 assert(shared->references != 0);
3490
3491 simpleMutexUnlock(&db->locks.literal);
3492 free_literal(db, from);
3493
3494 return shared;
3495 }
3496 simpleMutexUnlock(&db->locks.literal);
3497 }
3498
3499 simpleMutexLock(&db->locks.literal);
3500 sl_check(db, FALSE);
3501 data = skiplist_insert(&db->literals, &lex, &is_new);
3502 sl_check(db, FALSE);
3503 if ( is_new )
3504 { new = new*0.99+1.0;
3505 from->shared = TRUE;
3506 shared = from;
3507 assert(from->references==1);
3508 assert(from->atoms_locked==1);
3509 } else
3510 { existing = existing*0.99+1.0;
3511 shared = *data;
3512 shared->references++;
3513 assert(shared->references != 0);
3514 }
3515 simpleMutexUnlock(&db->locks.literal);
3516
3517 if ( !is_new )
3518 { DEBUG(2,
3519 Sdprintf("Replace %p by %p:\n", from, shared);
3520 Sdprintf("\tfrom: "); print_literal(from);
3521 Sdprintf("\n\tto: "); print_literal(shared);
3522 Sdprintf("\n"));
3523
3524 free_literal(db, from);
3525 } else
3526 { DEBUG(2,
3527 Sdprintf("Insert %p into literal table: ", from);
3528 print_literal(from);
3529 Sdprintf("\n"));
3530
3531 rdf_broadcast(EV_NEW_LITERAL, from, NULL);
3532 }
3533
3534 return shared;
3535 }
3536
3537
3538 /*******************************
3539 * TRIPLES *
3540 *******************************/
3541
3542 static triple *
alloc_triple(void)3543 alloc_triple(void)
3544 { triple *t = malloc(sizeof(*t));
3545
3546 if ( t )
3547 { memset(t, 0, sizeof(*t));
3548 #ifdef COMPACT
3549 t->id = TRIPLE_NO_ID;
3550 #endif
3551 }
3552
3553 return t;
3554 }
3555
3556
3557 static void
unalloc_triple(rdf_db * db,triple * t,int linger)3558 unalloc_triple(rdf_db *db, triple *t, int linger)
3559 { if ( t )
3560 { if ( linger )
3561 { TMAGIC(t, T_LINGERING);
3562 #ifdef COMPACT
3563 if ( t->id != TRIPLE_NO_ID )
3564 #endif
3565 deferred_finalize(&db->defer_triples, t,
3566 finalize_triple, db);
3567 ATOMIC_ADD(&db->lingering, 1);
3568 } else
3569 { unlock_atoms(db, t);
3570 if ( t->object_is_literal && t->object.literal )
3571 free_literal(db, t->object.literal);
3572 SECURE(memset(t, 0, sizeof(*t)));
3573 TMAGIC(t, T_FREED);
3574 free(t);
3575 }
3576 }
3577 }
3578
3579
3580 /*******************************
3581 * TRIPLE HASH *
3582 *******************************/
3583
3584 static int
init_triple_hash(rdf_db * db,int index,size_t count)3585 init_triple_hash(rdf_db *db, int index, size_t count)
3586 { triple_hash *h = &db->hash[index];
3587 size_t bytes = sizeof(triple_bucket)*count;
3588 triple_bucket *t = PL_malloc_uncollectable(bytes);
3589 int i;
3590
3591 memset(t, 0, bytes);
3592 memset(h, 0, sizeof(*h));
3593
3594 h->optimize_threshold = col_opt_threshold[index];
3595 h->avg_chain_len = col_avg_len[index];
3596 h->icol = index;
3597
3598 for(i=0; i<MSB(count); i++)
3599 h->blocks[i] = t;
3600
3601 h->bucket_preinit = h->bucket_count_epoch = h->bucket_count = count;
3602
3603 return TRUE;
3604 }
3605
3606
3607 static int
size_triple_hash(rdf_db * db,int index,size_t size)3608 size_triple_hash(rdf_db *db, int index, size_t size)
3609 { triple_hash *hash = &db->hash[index];
3610 int extra;
3611
3612 if ( hash->created )
3613 rdf_create_gc_thread(db);
3614
3615 simpleMutexLock(&db->queries.write.lock);
3616 extra = MSB(size) - MSB(hash->bucket_count);
3617 while( extra-- > 0 )
3618 { int i = MSB(hash->bucket_count);
3619 size_t bytes = sizeof(triple_bucket)*hash->bucket_count;
3620 triple_bucket *t = PL_malloc_uncollectable(bytes);
3621
3622 memset(t, 0, bytes);
3623 hash->blocks[i] = t-hash->bucket_count;
3624 hash->bucket_count *= 2;
3625 if ( !hash->created )
3626 hash->bucket_count_epoch = hash->bucket_count;
3627 DEBUG(1, Sdprintf("Resized triple index %s=%d to %ld at %d\n",
3628 col_name[index], index, (long)hash->bucket_count, i));
3629 }
3630 simpleMutexUnlock(&db->queries.write.lock);
3631
3632 return TRUE;
3633 }
3634
3635
3636 static void
reset_triple_hash(rdf_db * db,triple_hash * hash)3637 reset_triple_hash(rdf_db *db, triple_hash *hash)
3638 { size_t bytes = sizeof(triple_bucket)*hash->bucket_preinit;
3639 int i;
3640
3641 memset(hash->blocks[0], 0, bytes); /* clear first block */
3642 for(i=MSB(hash->bucket_preinit); i<MAX_TBLOCKS; i++)
3643 { if ( hash->blocks[i] )
3644 { triple_bucket *t = hash->blocks[i];
3645
3646 hash->blocks[i] = NULL;
3647 t += 1<<(i-1);
3648 PL_free(t);
3649 } else
3650 break;
3651 }
3652 hash->bucket_count = hash->bucket_count_epoch = hash->bucket_preinit;
3653 hash->created = FALSE;
3654 }
3655
3656
3657 /* count_different() returns the number of elements in a hash bucket
3658 that have a different unbounded hash. That is, the bucket might
3659 split if we resize the table.
3660
3661 *count is assigned with the size. That is merely consistency because
3662 we also keep track of this value.
3663 */
3664
3665 #define COUNT_DIFF_NOHASH 5
3666
3667 static int
count_different(rdf_db * db,triple_bucket * tb,int index,int * count)3668 count_different(rdf_db *db, triple_bucket *tb, int index, int *count)
3669 { triple *t;
3670 int rc;
3671
3672 if ( tb->count < COUNT_DIFF_NOHASH )
3673 { if ( tb->count <= 1 )
3674 { *count = tb->count;
3675
3676 return tb->count;
3677 } else
3678 { size_t hashes[COUNT_DIFF_NOHASH];
3679 int different = 0;
3680 int found = 0;
3681
3682 for(t = fetch_triple(db, tb->head);
3683 t && different < COUNT_DIFF_NOHASH; /* be careful with concurrently */
3684 t = triple_follow_hash(db, t, ICOL(index))) /* added triples */
3685 { size_t hash = triple_hash_key(t, index);
3686 int i;
3687
3688 found++;
3689 for(i=0; i<different; i++)
3690 { if ( hashes[i] == hash )
3691 goto next;
3692 }
3693 hashes[different++] = hash;
3694
3695 next:;
3696 }
3697
3698 *count = found;
3699
3700 return different;
3701 }
3702 } else
3703 { atomset hash_set;
3704 int c = 0;
3705
3706 init_atomset(&hash_set);
3707 for(t=fetch_triple(db, tb->head); t; t=triple_follow_hash(db, t, ICOL(index)))
3708 { c++;
3709 add_atomset(&hash_set, (atom_t)triple_hash_key(t, index));
3710 }
3711 rc = hash_set.count;
3712 destroy_atomset(&hash_set);
3713
3714 *count = c;
3715 }
3716
3717 return rc;
3718 }
3719
3720
3721 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3722 triple_hash_quality() computes the quality of the triple hash index. The
3723 return is 1.0 if the unbounded hashkey for all objects in each bucket is
3724 the same, and < 1.0 if there are buckets holding objects with different
3725 unbounded keys.
3726 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3727
3728 static float
triple_hash_quality(rdf_db * db,int index,int sample)3729 triple_hash_quality(rdf_db *db, int index, int sample)
3730 { triple_hash *hash = &db->hash[index];
3731 int i, step;
3732 float q = 0;
3733 size_t total = 0;
3734
3735 if ( index == 0 )
3736 return 1.0;
3737
3738 if ( sample > 0 )
3739 step = (hash->bucket_count+sample)/sample; /* step >= 1 */
3740 else
3741 step = 1;
3742
3743 for(i=0; i<hash->bucket_count; i += step)
3744 { int entry = MSB(i);
3745 triple_bucket *tb = &hash->blocks[entry][i];
3746 int count;
3747 int different = count_different(db, tb, col_index[index], &count);
3748
3749 DEBUG(1, /* inconsistency is normal due to concurrency */
3750 if ( count != tb->count )
3751 Sdprintf("Inconsistent count in index=%d, bucket=%d, %d != %d\n",
3752 index, i, count, tb->count));
3753
3754 if ( count )
3755 { q += (float)count/(float)different;
3756 total += count;
3757 }
3758 }
3759
3760 return total == 0 ? 1.0 : q/(float)total;
3761 }
3762
3763
3764 #ifdef O_DEBUG
3765 void
print_triple_hash(rdf_db * db,int index,int sample)3766 print_triple_hash(rdf_db *db, int index, int sample)
3767 { triple_hash *hash = &db->hash[index];
3768 int i, step;
3769
3770 if ( sample > 0 )
3771 step = (hash->bucket_count+sample)/sample; /* step >= 1 */
3772 else
3773 step = 1;
3774
3775 for(i=0; i<hash->bucket_count; i += step)
3776 { int entry = MSB(i);
3777 triple_bucket *tb = &hash->blocks[entry][i];
3778 int count;
3779 int different = count_different(db, tb, col_index[index], &count);
3780
3781 if ( count != 0 )
3782 { triple *t;
3783
3784 Sdprintf("%d: c=%d; d=%d", i, count, different);
3785 for(t=fetch_triple(db, tb->head); t; t=triple_follow_hash(db, t, index))
3786 { Sdprintf("\n\t");
3787 print_triple(t, 0);
3788 }
3789 }
3790 }
3791 }
3792 #endif /*O_DEBUG*/
3793
3794 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3795 Consider resizing the hash-tables. The argument 'extra' gives the number
3796 of triples that will be added. This is used to guess the hash
3797 requirements of the table and thus avoid duplicating triples in on
3798 optimize_triple_hashes().
3799 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3800
3801 void
consider_triple_rehash(rdf_db * db,size_t extra)3802 consider_triple_rehash(rdf_db *db, size_t extra)
3803 { size_t triples = db->created - db->erased;
3804 triple_hash *spo = &db->hash[ICOL(BY_SPO)];
3805
3806 if ( (extra + triples)/spo->avg_chain_len > spo->bucket_count )
3807 { int i;
3808 int resized = 0;
3809 int factor = ((extra+triples+100000)*16)/(triples+100000);
3810
3811 #define SCALE(n) (((n)*factor)/(16*db->hash[i].avg_chain_len))
3812 #define SCALEF(n) (((n)*(float)factor)/(16.0*(float)db->hash[i].avg_chain_len))
3813
3814 for(i=1; i<INDEX_TABLES; i++)
3815 { int resize = 0;
3816 size_t sizenow = db->hash[i].bucket_count;
3817
3818 if ( db->hash[i].user_size || db->hash[i].created == FALSE )
3819 continue; /* user set size */
3820
3821 switch(col_index[i])
3822 { case BY_S:
3823 case BY_SG:
3824 case BY_SP:
3825 while ( SCALE(db->resources.hash.count) > sizenow<<resize )
3826 resize++;
3827 break;
3828 case BY_P:
3829 while ( SCALE(db->predicates.count) > sizenow<<resize )
3830 resize++;
3831 break;
3832 case BY_O:
3833 case BY_PO:
3834 { size_t setsize = SCALE(db->resources.hash.count + db->literals.count);
3835
3836 if ( setsize > triples )
3837 setsize = triples;
3838 while ( setsize > sizenow<<resize )
3839 resize++;
3840 break;
3841 }
3842 case BY_SPO:
3843 while ( (extra+triples)/spo->avg_chain_len > sizenow<<resize )
3844 resize++;
3845 break;
3846 case BY_G:
3847 while ( SCALE(db->graphs.count) > sizenow<<resize )
3848 resize++;
3849 break;
3850 case BY_PG:
3851 { size_t s;
3852
3853 s = (db->graphs.count < db->predicates.count ?
3854 db->predicates.count : db->graphs.count);
3855
3856 while ( SCALE(s) > sizenow<<resize )
3857 resize++;
3858 break;
3859 }
3860 default:
3861 assert(0);
3862 }
3863
3864 if ( resize )
3865 { resized++;
3866 size_triple_hash(db, i, sizenow<<resize);
3867 }
3868 }
3869
3870 #undef SCALE
3871 #undef SCALEF
3872
3873 if ( resized )
3874 invalidate_distinct_counts(db);
3875 }
3876 }
3877
3878
3879 static size_t
distinct_hash_values(rdf_db * db,int icol)3880 distinct_hash_values(rdf_db *db, int icol)
3881 { triple *t;
3882 size_t count;
3883 atomset hash_set;
3884 int byx = col_index[icol];
3885
3886 init_atomset(&hash_set);
3887 for(t=fetch_triple(db, db->by_none.head);
3888 t;
3889 t=triple_follow_hash(db, t, ICOL(BY_NONE)))
3890 { add_atomset(&hash_set, (atom_t)triple_hash_key(t, byx));
3891 }
3892 count = hash_set.count;
3893 destroy_atomset(&hash_set);
3894
3895 return count;
3896 }
3897
3898
3899 static void
initial_size_triple_hash(rdf_db * db,int icol)3900 initial_size_triple_hash(rdf_db *db, int icol)
3901 { triple_hash *hash = &db->hash[icol];
3902 size_t size;
3903
3904 switch(col_index[icol])
3905 { case BY_S:
3906 size = db->resources.hash.count;
3907 break;
3908 case BY_P:
3909 size = db->predicates.count;
3910 break;
3911 case BY_O:
3912 size = db->resources.hash.count + db->literals.count;
3913 break;
3914 case BY_SPO:
3915 size = db->created - db->erased;
3916 break;
3917 case BY_G:
3918 size = db->graphs.count;
3919 break;
3920 case BY_PO:
3921 case BY_SG:
3922 case BY_SP:
3923 case BY_PG:
3924 size = distinct_hash_values(db, icol);
3925 break;
3926 default:
3927 assert(0);
3928 return;
3929 }
3930
3931 size /= hash->avg_chain_len;
3932 size_triple_hash(db, icol, size);
3933 }
3934
3935
3936 static int
init_tables(rdf_db * db)3937 init_tables(rdf_db *db)
3938 { int ic;
3939 triple_hash *by_none = &db->hash[ICOL(BY_NONE)];
3940
3941 by_none->blocks[0] = &db->by_none;
3942 by_none->bucket_count_epoch = 1;
3943 by_none->bucket_count = 1;
3944 by_none->created = TRUE;
3945
3946 for(ic=BY_S; ic<INDEX_TABLES; ic++)
3947 { if ( !init_triple_hash(db, ic, INITIAL_TABLE_SIZE) )
3948 return FALSE;
3949 }
3950
3951 return (init_resource_db(db, &db->resources) &&
3952 init_pred_table(db) &&
3953 init_graph_table(db) &&
3954 init_literal_table(db));
3955 }
3956
3957
3958 /*******************************
3959 * INDEX OPTIMIZATION *
3960 *******************************/
3961
3962 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3963 Index optimization copies triples that have been indexed while the
3964 hash-table was small to the current table. This adds a copy of the
3965 triple to the index (at the new place). The old triple gets a pointer
3966 ->reindexed pointing to the new version. deref_triple() finds the real
3967 triple.
3968
3969 The next thing we need to do is reclaim this in gc_hash_chain(). To to
3970 that, we replace old->lifespan.died with db->reindexed++. The logic that
3971 finds old queries also finds the query with the oldest reindexed
3972 counter. Triples that have yet older old->lifespan.died can safely be
3973 removed.
3974
3975 TBD: To preserve order, we must insert the new triples before the old
3976 ones. This is significantly more complex, notably because they must be
3977 re-indexed in reverse order in this case. Probably the best way to
3978 implement this is to collect the triples that must be reindexed in a
3979 triple buffer and then use a version of link_triple_hash() that prepends
3980 the triples, calling on the triples from the buffer in reverse order. We
3981 will ignore this for now: triple ordering has no semantics.
3982 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3983
3984 static void
reindex_triple(rdf_db * db,triple * t)3985 reindex_triple(rdf_db *db, triple *t)
3986 { triple *t2 = alloc_triple();
3987
3988 *t2 = *t;
3989 t2->has_reindex_prev = TRUE;
3990 memset(&t2->tp, 0, sizeof(t2->tp));
3991 register_triple(db, t2);
3992 simpleMutexLock(&db->queries.write.lock);
3993 link_triple_hash(db, t2);
3994 TMAGIC(t2, T_CHAINED2);
3995 t->reindexed = T_ID(t2);
3996 TMAGIC(t, T_REINDEXED);
3997 t->lifespan.died = db->reindexed++;
3998 if ( t2->object_is_literal ) /* do not deallocate lit twice */
3999 { simpleMutexLock(&db->locks.literal);
4000 t2->object.literal->references++;
4001 assert(t2->object.literal->references != 0);
4002 simpleMutexUnlock(&db->locks.literal);
4003 }
4004 t->atoms_locked = FALSE; /* same for unlock_atoms() */
4005 simpleMutexUnlock(&db->queries.write.lock);
4006 }
4007
4008
4009 static int
optimizable_triple_hash(rdf_db * db,int icol)4010 optimizable_triple_hash(rdf_db *db, int icol)
4011 { triple_hash *hash = &db->hash[icol];
4012 int opt = 0;
4013 size_t epoch;
4014
4015 if ( hash->created == FALSE )
4016 return FALSE;
4017
4018 for ( epoch=hash->bucket_count_epoch; epoch < hash->bucket_count; epoch*=2 )
4019 opt++;
4020
4021 opt -= hash->optimize_threshold;
4022 if ( opt < 0 )
4023 opt = 0;
4024
4025 return opt;
4026 }
4027
4028
4029 static int
optimizable_hashes(rdf_db * db)4030 optimizable_hashes(rdf_db *db)
4031 { int icol;
4032 int optimizable = 0;
4033
4034 for(icol=1; icol<INDEX_TABLES; icol++)
4035 optimizable += optimizable_triple_hash(db, icol);
4036
4037 return optimizable;
4038 }
4039
4040
4041 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4042 optimize_triple_hash() only doubles hash->bucket_count_epoch! It may be
4043 necessary to call it multiple times, but reindexing one step at a time
4044 is not slower than doing it all at once (is this true?)
4045
4046 Note that there is another reason to do only a little of the work
4047 because copying the triples temporarily costs memory.
4048
4049 (*) We have already done the reindexing from another index. It may also
4050 mean that this triple was reindexed in a previous pass, but that GC has
4051 not yet reclaimed the triple. I think that should be fine because it is
4052 old and burried anyway, but still accessible for old queries.
4053 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4054
4055 static int
optimize_triple_hash(rdf_db * db,int icol,gen_t gen)4056 optimize_triple_hash(rdf_db *db, int icol, gen_t gen)
4057 { triple_hash *hash = &db->hash[icol];
4058
4059 if ( optimizable_triple_hash(db, icol) )
4060 { size_t b_no = 0;
4061 size_t upto = hash->bucket_count_epoch;
4062 size_t copied = 0;
4063
4064 for( ; b_no < upto; b_no++ )
4065 { triple_bucket *bucket = &hash->blocks[MSB(b_no)][b_no];
4066 triple *t;
4067
4068 for(t=fetch_triple(db, bucket->head); t; t=triple_follow_hash(db, t, icol))
4069 { if ( t->lifespan.died >= gen &&
4070 !t->reindexed && /* see (*) */
4071 triple_hash_key(t, col_index[icol]) % hash->bucket_count != b_no )
4072 { reindex_triple(db, t);
4073 copied++;
4074 }
4075 }
4076 }
4077
4078 hash->bucket_count_epoch = upto*2;
4079 DEBUG(1, Sdprintf("Optimized hash %s (epoch=%ld; size=%ld; copied=%ld)\n",
4080 col_name[icol],
4081 (long)hash->bucket_count_epoch,
4082 (long)hash->bucket_count,
4083 (long)copied));
4084
4085 return 1;
4086 }
4087
4088 return 0;
4089 }
4090
4091
4092 static int
optimize_triple_hashes(rdf_db * db,gen_t gen)4093 optimize_triple_hashes(rdf_db *db, gen_t gen)
4094 { int icol;
4095 int optimized = 0;
4096
4097 for(icol=1; icol<INDEX_TABLES; icol++)
4098 { enter_scan(&db->defer_all);
4099 optimized += optimize_triple_hash(db, icol, gen);
4100 exit_scan(&db->defer_all);
4101 if ( PL_handle_signals() < 0 )
4102 return -1;
4103 }
4104
4105 return optimized; /* # hashes optimized */
4106 }
4107
4108
4109 /*******************************
4110 * GARBAGE COLLECTION *
4111 *******************************/
4112
4113 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4114 Garbage collect triples, given that the oldest running query reads at
4115 generation gen. There are two thing we can do:
4116
4117 - Remove any triple that died before gen. These triples must be left
4118 to GC. See also alloc.c.
4119
4120 We count `uncollectable' triples: erased triples that still have queries
4121 that depend on them. If no such triples exist there is no point in
4122 running GC.
4123
4124 Should do something similar with reindexed triples that cannot yet be
4125 collected? The problem is less likely, because they become ready after
4126 all active _queries_ started before the reindexing have died, wereas the
4127 generation stuff depends on longer lived objects which as snapshots and
4128 transactions.
4129
4130 t->linked is managed at three placed: link_triple_hash(), where we are
4131 sure that the triple is not garbage (are we, reindex_triple()?), when a
4132 new index is created and when the triple has been removed from the index
4133 links (below).
4134 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4135
4136 static inline int
is_garbage_triple(triple * t,gen_t old_query_gen,gen_t old_reindex_gen)4137 is_garbage_triple(triple *t, gen_t old_query_gen, gen_t old_reindex_gen)
4138 { if ( t->has_reindex_prev )
4139 return FALSE;
4140
4141 if ( t->reindexed ) /* Safe: reindex_triple() */
4142 return t->lifespan.died < old_reindex_gen; /* is also part of GC */
4143 else
4144 return t->lifespan.died < old_query_gen;
4145 }
4146
4147
4148 static size_t
gc_hash_chain(rdf_db * db,size_t bucket_no,int icol,gen_t gen,gen_t reindex_gen)4149 gc_hash_chain(rdf_db *db, size_t bucket_no, int icol,
4150 gen_t gen, gen_t reindex_gen)
4151 { triple_bucket *bucket = &db->hash[icol].blocks[MSB(bucket_no)][bucket_no];
4152 triple *prev = NULL;
4153 triple *t;
4154 size_t collected = 0;
4155 size_t uncollectable = 0;
4156
4157 for(t = fetch_triple(db, bucket->head); t; t=triple_follow_hash(db, t, icol))
4158 { if ( is_garbage_triple(t, gen, reindex_gen) )
4159 { simpleMutexLock(&db->queries.write.lock);
4160
4161 if ( prev )
4162 prev->tp.next[icol] = t->tp.next[icol];
4163 else
4164 bucket->head = t->tp.next[icol];
4165 if ( T_ID(t) == bucket->tail )
4166 bucket->tail = T_ID(prev);
4167
4168 collected++;
4169
4170 if ( --t->linked == 0 )
4171 { DEBUG(2, { char buf[2][64];
4172 Sdprintf("GC at gen=%s..%s: ",
4173 gen_name(t->lifespan.born, buf[0]),
4174 gen_name(t->lifespan.died, buf[1]));
4175 print_triple(t, PRT_NL);
4176 });
4177
4178 if ( t->reindexed )
4179 { triple *t2 = fetch_triple(db, t->reindexed);
4180
4181 db->gc.reclaimed_reindexed++;
4182 t2->has_reindex_prev = FALSE;
4183 } else
4184 db->gc.reclaimed_triples++;
4185
4186 simpleMutexUnlock(&db->queries.write.lock);
4187 free_triple(db, t, TRUE);
4188 } else
4189 { simpleMutexUnlock(&db->queries.write.lock);
4190 }
4191 } else
4192 { prev=t;
4193 if ( icol == 0 && t->erased && !t->reindexed &&
4194 t->lifespan.died >= gen )
4195 uncollectable++;
4196 }
4197 }
4198
4199 if ( collected && icol > 0 ) /* concurrent with hashing new ones */
4200 ATOMIC_SUB(&bucket->count, collected);
4201
4202 if ( icol == 0 )
4203 { char buf[64];
4204
4205 DEBUG(4, Sdprintf("At %s: %lld uncollectable\n",
4206 gen_name(gen, buf),
4207 uncollectable));
4208 db->gc.uncollectable = uncollectable;
4209 }
4210
4211 return collected;
4212 }
4213
4214
4215 static size_t
gc_hash(rdf_db * db,int icol,gen_t gen,gen_t reindex_gen)4216 gc_hash(rdf_db *db, int icol, gen_t gen, gen_t reindex_gen)
4217 { size_t mb = db->hash[icol].bucket_count;
4218 size_t b;
4219 size_t collected = 0;
4220
4221 for(b=0; b<mb; b++)
4222 collected += gc_hash_chain(db, b, icol, gen, reindex_gen);
4223
4224 return collected;
4225 }
4226
4227
4228 static int
gc_hashes(rdf_db * db,gen_t gen,gen_t reindex_gen)4229 gc_hashes(rdf_db *db, gen_t gen, gen_t reindex_gen)
4230 { size_t garbage = db->erased - db->gc.reclaimed_triples;
4231 size_t reindex = db->reindexed - db->gc.reclaimed_reindexed;
4232
4233 if ( garbage + reindex > 0 )
4234 { int icol;
4235
4236 for(icol=0; icol<INDEX_TABLES; icol++)
4237 { size_t collected;
4238
4239 if ( db->hash[icol].created )
4240 { enter_scan(&db->defer_all);
4241 collected = gc_hash(db, icol, gen, reindex_gen);
4242 exit_scan(&db->defer_all);
4243
4244 if ( PL_handle_signals() < 0 )
4245 return -1;
4246 } else
4247 collected = 0;
4248
4249 if ( icol == 0 && collected == 0 )
4250 break;
4251 }
4252 }
4253
4254 return 0;
4255 }
4256
4257
4258 static int
gc_set_busy(rdf_db * db)4259 gc_set_busy(rdf_db *db)
4260 { int busy;
4261
4262 simpleMutexLock(&db->locks.misc);
4263 if ( !(busy = db->gc.busy) )
4264 db->gc.busy = TRUE;
4265 simpleMutexUnlock(&db->locks.misc);
4266
4267 return !busy;
4268 }
4269
4270
4271 static void
gc_clear_busy(rdf_db * db)4272 gc_clear_busy(rdf_db *db)
4273 { simpleMutexLock(&db->locks.misc);
4274 db->gc.busy = FALSE;
4275 simpleMutexUnlock(&db->locks.misc);
4276 }
4277
4278
4279 static int
gc_db(rdf_db * db,gen_t gen,gen_t reindex_gen)4280 gc_db(rdf_db *db, gen_t gen, gen_t reindex_gen)
4281 { char buf[64];
4282 int rc;
4283
4284 if ( !gc_set_busy(db) )
4285 return FALSE;
4286 simpleMutexLock(&db->locks.gc);
4287 DEBUG(10, Sdprintf("RDF GC; gen = %s\n", gen_name(gen, buf)));
4288 if ( optimize_triple_hashes(db, gen) >= 0 &&
4289 gc_hashes(db, gen, reindex_gen) >= 0 &&
4290 gc_clouds(db, gen) >= 0 &&
4291 gc_graphs(db, gen) >= 0 )
4292 { db->gc.count++;
4293 db->gc.last_gen = gen;
4294 db->gc.last_reindex_gen = reindex_gen;
4295 rc = TRUE;
4296 } else
4297 rc = FALSE;
4298 gc_clear_busy(db);
4299 simpleMutexUnlock(&db->locks.gc);
4300
4301 return rc;
4302 }
4303
4304
4305 static int
suspend_gc(rdf_db * db)4306 suspend_gc(rdf_db *db)
4307 { int was_busy = db->gc.busy;
4308
4309 DEBUG(2, if ( was_busy )
4310 Sdprintf("Reset: GC in progress, waiting ...\n"));
4311
4312 simpleMutexLock(&db->locks.gc);
4313 DEBUG(2, if ( was_busy )
4314 Sdprintf("Reset: GC finished\n"));
4315 db->gc.busy = TRUE;
4316 db->gc.count = 0;
4317 db->gc.time = 0.0;
4318 db->gc.reclaimed_triples = 0;
4319 db->gc.reclaimed_reindexed = 0;
4320 db->reindexed = 0;
4321 db->gc.uncollectable = 0;
4322 db->gc.last_gen = 0;
4323 db->gc.busy = FALSE;
4324
4325 return TRUE;
4326 }
4327
4328
4329 static void
resume_gc(rdf_db * db)4330 resume_gc(rdf_db *db)
4331 { simpleMutexUnlock(&db->locks.gc);
4332 }
4333
4334
4335
4336 /** rdf_gc_(-Done) is semidet.
4337
4338 Run the RDF-DB garbage collector. The collector is typically ran in a
4339 separate thread. Its execution does not interfere with readers and only
4340 synchronizes with writers using short-held locks.
4341
4342 Fails without any action if there is already a GC in progress.
4343 */
4344
4345 static foreign_t
rdf_gc(void)4346 rdf_gc(void)
4347 { rdf_db *db = rdf_current_db();
4348 gen_t reindex_gen;
4349 gen_t gen = oldest_query_geneneration(db, &reindex_gen);
4350
4351 return gc_db(db, gen, reindex_gen);
4352 }
4353
4354
4355 /** rdf_add_gc_time(+Time:double) is det.
4356
4357 Add CPU time to GC statistics. This is left to Prolog
4358
4359 */
4360
4361 static foreign_t
rdf_add_gc_time(term_t time)4362 rdf_add_gc_time(term_t time)
4363 { double t;
4364
4365 if ( PL_get_float_ex(time, &t) )
4366 { rdf_db *db = rdf_current_db();
4367
4368 db->gc.time += t;
4369 return TRUE;
4370 }
4371
4372 return FALSE;
4373 }
4374
4375 /** rdf_gc_info(-Info) is det.
4376
4377 Return info to help deciding on whether or not to call rdf_gc. Info is a
4378 record with the following members:
4379
4380 1. Total number of triples in hash (dead or alive)
4381 2. Total dead triples in hash (deleted or reindexed)
4382 3. Total reindexed but not reclaimed triples
4383 4. Total number of possible optimizations to hash-tables.
4384 5. Oldest generation we must keep
4385 6. Oldest generation at last GC
4386 7. Oldest reindexed triple we must keep
4387 8. Oldest reindexed at last GC
4388 */
4389
4390 #define INT_ARG(val) PL_INT64, (int64_t)(val)
4391
4392 static foreign_t
rdf_gc_info(term_t info)4393 rdf_gc_info(term_t info)
4394 { rdf_db *db = rdf_current_db();
4395 size_t life = db->created - db->gc.reclaimed_triples;
4396 size_t garbage = db->erased - db->gc.reclaimed_triples;
4397 size_t reindex = db->reindexed - db->gc.reclaimed_reindexed;
4398 gen_t keep_reindex;
4399 gen_t keep_gen = oldest_query_geneneration(db, &keep_reindex);
4400
4401 if ( keep_gen == db->gc.last_gen )
4402 { garbage -= db->gc.uncollectable;
4403 assert((int64_t)garbage >= 0);
4404 }
4405
4406 return PL_unify_term(info,
4407 PL_FUNCTOR_CHARS, "gc_info", 8,
4408 INT_ARG(life),
4409 INT_ARG(garbage),
4410 INT_ARG(reindex),
4411 INT_ARG(optimizable_hashes(db)),
4412 INT_ARG(keep_gen),
4413 INT_ARG(db->gc.last_gen),
4414 INT_ARG(keep_reindex),
4415 INT_ARG(db->gc.last_reindex_gen));
4416 }
4417
4418
4419 /*******************************
4420 * GC THREAD *
4421 *******************************/
4422
4423 int
rdf_create_gc_thread(rdf_db * db)4424 rdf_create_gc_thread(rdf_db *db)
4425 { if ( db->gc.thread_started )
4426 return TRUE;
4427
4428 simpleMutexLock(&db->locks.misc);
4429 if ( !db->gc.thread_started )
4430 { db->gc.thread_started = TRUE;
4431
4432 PL_call_predicate(NULL, PL_Q_NORMAL,
4433 PL_predicate("rdf_create_gc_thread", 0, "rdf_db"), 0);
4434 }
4435 simpleMutexUnlock(&db->locks.misc);
4436
4437 return TRUE;
4438 }
4439
4440
4441 /*******************************
4442 * OVERALL DATABASE *
4443 *******************************/
4444
4445 static rdf_db *
new_db(void)4446 new_db(void)
4447 { rdf_db *db = PL_malloc_uncollectable(sizeof(*db));
4448
4449 memset(db, 0, sizeof(*db));
4450 INIT_LOCK(db);
4451 init_tables(db);
4452 init_triple_array(db);
4453 init_query_admin(db);
4454 db->prefixes = new_prefix_table();
4455
4456 db->duplicate_admin_threshold = DUPLICATE_ADMIN_THRESHOLD;
4457 db->snapshots.keep = GEN_MAX;
4458 db->queries.generation = GEN_EPOCH;
4459
4460 return db;
4461 }
4462
4463
4464 static rdf_db *RDF_DB;
4465
4466 rdf_db *
rdf_current_db(void)4467 rdf_current_db(void)
4468 { if ( RDF_DB )
4469 return RDF_DB;
4470
4471 simpleMutexLock(&rdf_lock);
4472 if ( !RDF_DB )
4473 RDF_DB = new_db();
4474 simpleMutexUnlock(&rdf_lock);
4475
4476 return RDF_DB;
4477 }
4478
4479
4480 static triple *
new_triple(rdf_db * db)4481 new_triple(rdf_db *db)
4482 { triple *t = alloc_triple();
4483 t->allocated = TRUE;
4484
4485 return t;
4486 }
4487
4488
4489 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4490 free_triple() is called in two scenarios. One is from the garbage
4491 collector after a triple is deleted from all hash chains. In this case
4492 the linger argument is TRUE and the next-pointers of the triples are
4493 still in place because search may be scanning the triple. See alloc.c
4494 for details on the triple memory management. The second case is deletion
4495 of temporary triples, something that may happen from many threads. In
4496 either case, this is typically called unlocked.
4497 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4498
4499 static void
free_triple(rdf_db * db,triple * t,int linger)4500 free_triple(rdf_db *db, triple *t, int linger)
4501 { if ( t->match == STR_MATCH_BETWEEN )
4502 free_literal_value(db, &t->tp.end);
4503
4504 if ( !t->allocated )
4505 { unlock_atoms(db, t);
4506 if ( t->object_is_literal && t->object.literal )
4507 { free_literal(db, t->object.literal);
4508 t->object_is_literal = FALSE;
4509 }
4510 } else
4511 { unalloc_triple(db, t, linger);
4512 }
4513 }
4514
4515
4516 static size_t
literal_hash(literal * lit)4517 literal_hash(literal *lit)
4518 { if ( lit->hash )
4519 { return lit->hash;
4520 } else
4521 { unsigned int hash;
4522
4523 switch(lit->objtype)
4524 { case OBJ_STRING:
4525 hash = atom_hash_case(lit->value.string);
4526 break;
4527 case OBJ_INTEGER:
4528 case OBJ_DOUBLE:
4529 hash = rdf_murmer_hash(&lit->value.integer,
4530 sizeof(lit->value.integer),
4531 MURMUR_SEED);
4532 break;
4533 case OBJ_TERM:
4534 hash = rdf_murmer_hash(lit->value.term.record,
4535 (int)lit->value.term.len,
4536 MURMUR_SEED);
4537 break;
4538 default:
4539 assert(0);
4540 return 0;
4541 }
4542
4543 if ( !hash )
4544 hash = 0x1; /* cannot be 0 */
4545
4546 lit->hash = hash;
4547 return lit->hash;
4548 }
4549 }
4550
4551
4552 static size_t
object_hash(triple * t)4553 object_hash(triple *t)
4554 { if ( t->object_is_literal )
4555 { return literal_hash(t->object.literal);
4556 } else
4557 { return atom_hash(t->object.resource, OBJ_MURMUR_SEED);
4558 }
4559 }
4560
4561
4562 static size_t
subject_hash(triple * t)4563 subject_hash(triple *t)
4564 { return atom_hash(t->subject_id, SUBJ_MURMUR_SEED);
4565 }
4566
4567 static size_t
graph_hash(triple * t)4568 graph_hash(triple *t)
4569 { return atom_hash(t->graph_id, GRAPH_MURMUR_SEED);
4570 }
4571
4572
4573 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4574 triple_hash_key() computes the hash for a triple on a given index. It
4575 can only be called for indices defined in the col_index-array. Note that
4576 the returned value is unconstrained and needs to be taken modulo the
4577 table-size.
4578
4579 If you change anything here, you might need to update
4580 init_cursor_from_literal().
4581 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4582
4583 static size_t
triple_hash_key(triple * t,int which)4584 triple_hash_key(triple *t, int which)
4585 { size_t v = 0;
4586
4587 assert(t->resolve_pred == FALSE);
4588
4589 if ( which&BY_S ) v ^= subject_hash(t);
4590 if ( which&BY_P ) v ^= predicate_hash(t->predicate.r);
4591 if ( which&BY_O ) v ^= object_hash(t);
4592 if ( which&BY_G ) v ^= graph_hash(t);
4593
4594 return v;
4595 }
4596
4597
4598 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4599 by_inverse[] returns the index key to use for inverse search as needed
4600 to realise symmetric and inverse predicates.
4601
4602 Note that this only deals with the non-G(graph) indices because it is
4603 only used by rdf_has/3 and rdf_reachable/3.
4604 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4605
4606 static int by_inverse[8] =
4607 { BY_NONE, /* BY_NONE = 0 */
4608 BY_O, /* BY_S = 1 */
4609 BY_P, /* BY_P = 2 */
4610 BY_PO, /* BY_SP = 3 */
4611 BY_S, /* BY_O = 4 */
4612 BY_SO, /* BY_SO = 5 */
4613 BY_SP, /* BY_PO = 6 */
4614 BY_SPO, /* BY_SPO = 7 */
4615 };
4616
4617
4618 static inline void
append_triple_bucket(rdf_db * db,triple_bucket * bucket,int icol,triple * t)4619 append_triple_bucket(rdf_db *db, triple_bucket *bucket, int icol, triple *t)
4620 { if ( bucket->tail )
4621 { fetch_triple(db, bucket->tail)->tp.next[icol] = T_ID(t);
4622 } else
4623 { bucket->head = T_ID(t);
4624 }
4625 bucket->tail = T_ID(t);
4626 ATOMIC_INC(&bucket->count);
4627 }
4628
4629
4630 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4631 (*) ->linked is decremented in gc_hash_chain() for garbage triples. This
4632 can conflict. We must use some sort of synchronization with GC if the
4633 died generation is not the maximum and the triple might thus be garbage.
4634 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4635
4636 static void
create_triple_hashes(rdf_db * db,int count,int * ic)4637 create_triple_hashes(rdf_db *db, int count, int *ic)
4638 { triple_hash *hashes[16];
4639 int i, mx=0;
4640
4641 for(i=0; i<count; i++)
4642 { hashes[mx] = &db->hash[ic[i]];
4643 if ( !hashes[mx]->created )
4644 { initial_size_triple_hash(db, hashes[mx]->icol);
4645 mx++;
4646 }
4647 }
4648 hashes[mx] = NULL;
4649
4650 if ( mx > 0 )
4651 { simpleMutexLock(&db->queries.write.lock);
4652
4653 for(i=0; i<mx; i++)
4654 { if ( hashes[i]->created )
4655 { mx--;
4656 memmove(&hashes[i], &hashes[i+1], sizeof(hashes[0])*(mx-i));
4657 } else
4658 { DEBUG(1, Sdprintf("Creating hash %s\n", col_name[hashes[i]->icol]));
4659 }
4660 }
4661
4662 if ( mx > 0 )
4663 { triple *t;
4664
4665 for(t=fetch_triple(db, db->by_none.head);
4666 t;
4667 t=triple_follow_hash(db, t, ICOL(BY_NONE)))
4668 { for(i=0; i<mx; i++)
4669 { triple_hash *hash = hashes[i];
4670 int i = col_index[hash->icol];
4671 int key = triple_hash_key(t, i) % hash->bucket_count;
4672 triple_bucket *bucket = &hash->blocks[MSB(key)][key];
4673
4674 append_triple_bucket(db, bucket, hash->icol, t);
4675 t->linked++; /* (*) atomic? */
4676 }
4677 }
4678
4679 for(i=0; i<mx; i++)
4680 { triple_hash *hash = hashes[i];
4681 hash->created = TRUE;
4682 }
4683 }
4684 simpleMutexUnlock(&db->queries.write.lock);
4685 }
4686 }
4687
4688
4689 /* called with queries.write.lock held */
4690
4691 static void
link_triple_hash(rdf_db * db,triple * t)4692 link_triple_hash(rdf_db *db, triple *t)
4693 { int ic;
4694 int linked = 1;
4695
4696 append_triple_bucket(db, &db->by_none, ICOL(BY_NONE), t);
4697
4698 for(ic=1; ic<INDEX_TABLES; ic++)
4699 { triple_hash *hash = &db->hash[ic];
4700
4701 if ( hash->created )
4702 { int i = col_index[ic];
4703 int key = triple_hash_key(t, i) % hash->bucket_count;
4704 triple_bucket *bucket = &hash->blocks[MSB(key)][key];
4705
4706 append_triple_bucket(db, bucket, ic, t);
4707 linked++;
4708 }
4709 }
4710
4711 t->linked = linked; /* safe: never garbage */
4712 }
4713
4714
4715 /* prelink_triple() performs that part of the triple loading that does
4716 not require locking.
4717 */
4718
4719 int
prelink_triple(rdf_db * db,triple * t,query * q)4720 prelink_triple(rdf_db *db, triple *t, query *q)
4721 { register_triple(db, t);
4722 if ( t->resolve_pred )
4723 { t->predicate.r = lookup_predicate(db, t->predicate.u);
4724 t->resolve_pred = FALSE;
4725 }
4726 if ( t->object_is_literal )
4727 t->object.literal = share_literal(db, t->object.literal);
4728 if ( db->maintain_duplicates )
4729 mark_duplicate(db, t, q);
4730
4731 return TRUE;
4732 }
4733
4734
4735 /* MT: Caller must be hold db->queries.write.lock
4736
4737 Return: FALSE if nothing changed; TRUE if the database has changed
4738 TBD: Not all of this requires locking. Most should be moved out of
4739 the lock:
4740
4741 - Check for duplicates (?)
4742 - Consider re-hash
4743 - subProperty admin
4744 */
4745
4746 void
add_triple_consequences(rdf_db * db,triple * t,query * q)4747 add_triple_consequences(rdf_db *db, triple *t, query *q)
4748 { if ( t->predicate.r->name == ATOM_subPropertyOf &&
4749 t->object_is_literal == FALSE )
4750 { addSubPropertyOf(db, t, q);
4751 }
4752 }
4753
4754
4755 /* Called with queries.write.lock held */
4756
4757 int
link_triple(rdf_db * db,triple * t,query * q)4758 link_triple(rdf_db *db, triple *t, query *q)
4759 { assert(!t->linked);
4760
4761 link_triple_hash(db, t);
4762 TMAGIC(t, T_CHAINED1);
4763 add_triple_consequences(db, t, q);
4764 db->created++;
4765
4766 return TRUE;
4767 }
4768
4769
4770 int
postlink_triple(rdf_db * db,triple * t,query * q)4771 postlink_triple(rdf_db *db, triple *t, query *q)
4772 { register_predicate(db, t);
4773 register_graph(db, t);
4774
4775 return TRUE;
4776 }
4777
4778
4779 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4780 Erase a triple from the DB.
4781
4782 MT: Caller must be hold db->queries.write.lock
4783 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4784
4785 void
del_triple_consequences(rdf_db * db,triple * t,query * q)4786 del_triple_consequences(rdf_db *db, triple *t, query *q)
4787 { if ( t->predicate.r->name == ATOM_subPropertyOf &&
4788 t->object_is_literal == FALSE )
4789 delSubPropertyOf(db, t, q);
4790 }
4791
4792
4793 void
erase_triple(rdf_db * db,triple * t,query * q)4794 erase_triple(rdf_db *db, triple *t, query *q)
4795 { if ( t->erased )
4796 return;
4797
4798 simpleMutexLock(&db->locks.erase);
4799 if ( !t->erased )
4800 { db->erased++; /* incr. must be before setting erased */
4801 t->erased = TRUE; /* to make sure #garbage >= 0 */
4802 simpleMutexUnlock(&db->locks.erase);
4803
4804 unregister_graph(db, t); /* Updates count and MD5 */
4805 unregister_predicate(db, t); /* Updates count */
4806 if ( t->is_duplicate )
4807 ATOMIC_SUB(&db->duplicates, 1);
4808 } else
4809 { simpleMutexUnlock(&db->locks.erase);
4810 }
4811 }
4812
4813
4814 static int
match_literals(int how,literal * p,literal * e,literal * v)4815 match_literals(int how, literal *p, literal *e, literal *v)
4816 { literal_ex lex;
4817
4818 lex.literal = p;
4819 prepare_literal_ex(&lex);
4820
4821 DEBUG(2, { Sdprintf("match_literals(");
4822 print_literal(p);
4823 Sdprintf(", ");
4824 print_literal(v);
4825 Sdprintf(")\n"); });
4826
4827 switch(how)
4828 { case STR_MATCH_LT:
4829 return compare_literals(&lex, v) > 0;
4830 case STR_MATCH_LE:
4831 return compare_literals(&lex, v) >= 0;
4832 case STR_MATCH_EQ:
4833 return compare_literals(&lex, v) == 0;
4834 case STR_MATCH_GE:
4835 return compare_literals(&lex, v) <= 0;
4836 case STR_MATCH_GT:
4837 return compare_literals(&lex, v) < 0;
4838 case STR_MATCH_BETWEEN:
4839 if ( compare_literals(&lex, v) <= 0 )
4840 { lex.literal = e;
4841 prepare_literal_ex(&lex);
4842
4843 if ( compare_literals(&lex, v) >= 0 )
4844 return TRUE;
4845 }
4846 return FALSE;
4847 default:
4848 return match_atoms(how, p->value.string, v->value.string);
4849 }
4850 }
4851
4852
4853 static int
match_numerical(int how,literal * p,literal * e,literal * v)4854 match_numerical(int how, literal *p, literal *e, literal *v)
4855 { xsd_primary nv, np;
4856 literal_ex lex;
4857
4858 if ( !(nv=is_numerical_string(v)) )
4859 return FALSE;
4860 if ( !p->value.string ) /* literal(eq(type(<numeric>,_)),_) */
4861 return TRUE;
4862
4863 np = is_numerical_string(p);
4864 assert(np);
4865
4866 lex.literal = p;
4867 prepare_literal_ex(&lex);
4868
4869 switch(how)
4870 { case STR_MATCH_LT:
4871 return cmp_xsd_info(np, &lex.atom, nv, v->value.string) > 0;
4872 case STR_MATCH_LE:
4873 return cmp_xsd_info(np, &lex.atom, nv, v->value.string) >= 0;
4874 case STR_MATCH_GE:
4875 return cmp_xsd_info(np, &lex.atom, nv, v->value.string) <= 0;
4876 case STR_MATCH_GT:
4877 return cmp_xsd_info(np, &lex.atom, nv, v->value.string) < 0;
4878 case STR_MATCH_BETWEEN:
4879 if ( cmp_xsd_info(np, &lex.atom, nv, v->value.string) <= 0 )
4880 { lex.literal = e;
4881 prepare_literal_ex(&lex);
4882
4883 if ( cmp_xsd_info(np, &lex.atom, nv, v->value.string) >= 0 )
4884 return TRUE;
4885 }
4886 return FALSE;
4887 case STR_MATCH_EQ:
4888 default:
4889 return cmp_xsd_info(np, &lex.atom, nv, v->value.string) == 0;
4890 }
4891 }
4892
4893
4894 static int
match_object(triple * t,triple * p,unsigned flags)4895 match_object(triple *t, triple *p, unsigned flags)
4896 { if ( p->object_is_literal )
4897 { if ( t->object_is_literal )
4898 { literal *plit = p->object.literal;
4899 literal *tlit = t->object.literal;
4900
4901 if ( !plit->objtype && !plit->qualifier )
4902 return TRUE;
4903
4904 if ( plit->objtype && plit->objtype != tlit->objtype )
4905 return FALSE;
4906
4907 switch( plit->objtype )
4908 { case 0:
4909 if ( plit->type_or_lang == ATOM_ID(ATOM_xsdString) &&
4910 tlit->qualifier == Q_NONE )
4911 return TRUE;
4912 if ( plit->qualifier &&
4913 tlit->qualifier != plit->qualifier )
4914 return FALSE;
4915 if ( plit->type_or_lang &&
4916 tlit->type_or_lang != plit->type_or_lang )
4917 return FALSE;
4918 return TRUE;
4919 case OBJ_STRING:
4920 /* numeric match */
4921 if ( (flags&MATCH_NUMERIC) )
4922 return match_numerical(p->match, plit, &p->tp.end, tlit);
4923 /* qualifier match */
4924 if ( !( plit->type_or_lang == ATOM_ID(ATOM_xsdString) &&
4925 tlit->qualifier == Q_NONE ) )
4926 { if ( (flags & MATCH_QUAL) ||
4927 p->match == STR_MATCH_PLAIN )
4928 { if ( tlit->qualifier != plit->qualifier )
4929 return FALSE;
4930 } else
4931 { if ( plit->qualifier && tlit->qualifier &&
4932 tlit->qualifier != plit->qualifier )
4933 return FALSE;
4934 }
4935 if ( plit->type_or_lang &&
4936 tlit->type_or_lang != plit->type_or_lang )
4937 return FALSE;
4938 }
4939 /* lexical match */
4940 if ( plit->value.string )
4941 { if ( tlit->value.string != plit->value.string ||
4942 p->match == STR_MATCH_LT || p->match == STR_MATCH_GT )
4943 { if ( p->match >= STR_MATCH_ICASE )
4944 { return match_literals(p->match, plit, &p->tp.end, tlit);
4945 } else
4946 { return FALSE;
4947 }
4948 }
4949 }
4950 return TRUE;
4951 case OBJ_INTEGER:
4952 if ( p->match >= STR_MATCH_LT )
4953 return match_literals(p->match, plit, &p->tp.end, tlit);
4954 return tlit->value.integer == plit->value.integer;
4955 case OBJ_DOUBLE:
4956 if ( p->match >= STR_MATCH_LT )
4957 return match_literals(p->match, plit, &p->tp.end, tlit);
4958 return tlit->value.real == plit->value.real;
4959 case OBJ_TERM:
4960 if ( p->match >= STR_MATCH_LT )
4961 return match_literals(p->match, plit, &p->tp.end, tlit);
4962 if ( plit->value.term.record &&
4963 plit->value.term.len != tlit->value.term.len )
4964 return FALSE;
4965 return memcmp(tlit->value.term.record, plit->value.term.record,
4966 plit->value.term.len) == 0;
4967 default:
4968 assert(0);
4969 }
4970 }
4971 return FALSE;
4972 } else
4973 { if ( p->object.resource )
4974 { if ( t->object_is_literal ||
4975 (p->object.resource != t->object.resource) )
4976 return FALSE;
4977 }
4978 }
4979
4980 return TRUE;
4981 }
4982
4983
4984
4985 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4986 match_triples() is TRUE if the triple t matches the pattern p. This
4987 function does not consider whether or not the triple is visible.
4988 Matching is controlled by flags:
4989
4990 - MATCH_SUBPROPERTY Perform rdfs:subPropertyOf matching
4991 - MATCH_SRC Also match the source
4992 - MATCH_QUAL Match language/type qualifiers
4993 - STR_MATCH_* Additional string matching
4994 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4995
4996 static int
match_triples(rdf_db * db,triple * t,triple * p,query * q,unsigned flags)4997 match_triples(rdf_db *db, triple *t, triple *p, query *q, unsigned flags)
4998 { /* DEBUG(3, Sdprintf("match_triple(");
4999 print_triple(t, 0);
5000 Sdprintf(")\n"));
5001 */
5002
5003 if ( p->subject_id && t->subject_id != p->subject_id )
5004 return FALSE;
5005 if ( !match_object(t, p, flags) )
5006 return FALSE;
5007 if ( flags & MATCH_SRC )
5008 { if ( p->graph_id && t->graph_id != p->graph_id )
5009 return FALSE;
5010 if ( p->line && t->line != p->line )
5011 return FALSE;
5012 }
5013 /* last; may be expensive */
5014 if ( p->predicate.r && t->predicate.r != p->predicate.r )
5015 { if ( (flags & MATCH_SUBPROPERTY) )
5016 return isSubPropertyOf(db, t->predicate.r, p->predicate.r, q);
5017 else
5018 return FALSE;
5019 }
5020 return TRUE;
5021 }
5022
5023
5024 /*******************************
5025 * SAVE/LOAD *
5026 *******************************/
5027
5028 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5029 The RDF triple format. This format is intended for quick save and load
5030 and not for readability or exchange. Parts are based on the SWI-Prolog
5031 Quick Load Format (implemented in pl-wic.c).
5032
5033 <file> ::= <magic>
5034 <version>
5035 ['S' <graph-name>]
5036 ['F' <graph-source>]
5037 ['t' <modified>]
5038 ['M' <md5>]
5039 {<triple>}
5040 'E'
5041
5042 <magic> ::= "RDF-dump\n"
5043 <version> ::= <integer>
5044
5045 <md5> ::= <byte>* (16 bytes digest)
5046
5047 <triple> ::= 'T'
5048 <subject>
5049 <predicate>
5050 <object>
5051 <graph>
5052
5053 <subject> ::= <resource>
5054 <predicate> ::= <resource>
5055
5056 <object> ::= "R" <resource>
5057 | "L" <atom>
5058
5059 <resource> ::= <atom>
5060
5061 <atom> ::= "X" <integer>
5062 "A" <string>
5063 "W" <utf-8 string>
5064
5065 <string> ::= <integer><bytes>
5066
5067 <graph-name> ::= <atom>
5068 <graph-source> ::= <atom>
5069
5070 <graph> ::= <graph-file>
5071 <line>
5072 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
5073
5074 #define SAVE_MAGIC "RDF-dump\n"
5075 #define SAVE_VERSION 2
5076
5077 typedef struct saved
5078 { union
5079 { atom_t atom;
5080 predicate *pred;
5081 literal *lit;
5082 void *any;
5083 } value;
5084 size_t as;
5085 struct saved *next;
5086 } saved;
5087
5088
5089 typedef struct saved_table
5090 { saved ** saved_table;
5091 size_t saved_size;
5092 size_t saved_id;
5093 tmp_store *store;
5094 } saved_table;
5095
5096
5097 static inline int
saved_hash(void * value,unsigned int seed)5098 saved_hash(void *value, unsigned int seed)
5099 { return rdf_murmer_hash(&value, sizeof(value), seed);
5100 }
5101
5102
5103 static void
init_saved_table(rdf_db * db,saved_table * tab,tmp_store * store)5104 init_saved_table(rdf_db *db, saved_table *tab, tmp_store *store)
5105 { size_t size = 64;
5106 size_t bytes = size * sizeof(*tab->saved_table);
5107
5108 tab->saved_table = rdf_malloc(db, bytes);
5109 memset(tab->saved_table, 0, bytes);
5110 tab->saved_size = size;
5111 tab->saved_id = 0;
5112 tab->store = store;
5113 }
5114
5115 static void
resize_saved(rdf_db * db,saved_table * tab)5116 resize_saved(rdf_db *db, saved_table *tab)
5117 { size_t newsize = tab->saved_size * 2;
5118 size_t newbytes = sizeof(*tab->saved_table) * newsize;
5119 saved **newt = rdf_malloc(db, newbytes);
5120 saved **s = tab->saved_table;
5121 int i;
5122
5123 memset(newt, 0, newbytes);
5124 for(i=0; i<tab->saved_size; i++, s++)
5125 { saved *c, *n;
5126
5127 for(c=*s; c; c = n)
5128 { int hash = saved_hash(c->value.any, MURMUR_SEED) % newsize;
5129
5130 n = c->next;
5131 c->next = newt[hash];
5132 newt[hash] = c;
5133 }
5134 }
5135
5136 rdf_free(db, tab->saved_table, tab->saved_size*sizeof(*tab->saved_table));
5137 tab->saved_table = newt;
5138 tab->saved_size = newsize;
5139 }
5140
5141
5142 static void
destroy_saved_table(rdf_db * db,saved_table * tab)5143 destroy_saved_table(rdf_db *db, saved_table *tab)
5144 { if ( tab->saved_table )
5145 rdf_free(db, tab->saved_table, tab->saved_size*sizeof(*tab->saved_table));
5146 }
5147
5148 static saved *
lookup_saved(saved_table * tab,void * value)5149 lookup_saved(saved_table *tab, void *value)
5150 { int hash = saved_hash(value, MURMUR_SEED) % tab->saved_size;
5151 saved *s;
5152
5153 for(s=tab->saved_table[hash]; s; s= s->next)
5154 { if ( s->value.any == value )
5155 return s;
5156 }
5157
5158 return NULL;
5159 }
5160
5161 static saved *
add_saved(rdf_db * db,saved_table * tab,void * value)5162 add_saved(rdf_db *db, saved_table *tab, void *value)
5163 { int hash;
5164 saved *s;
5165
5166 if ( tab->saved_id/4 > tab->saved_size )
5167 resize_saved(db, tab);
5168
5169 hash = saved_hash(value, MURMUR_SEED) % tab->saved_size;
5170 if ( (s = alloc_tmp_store(tab->store, sizeof(*s))) )
5171 { s->value.any = value;
5172 s->as = tab->saved_id++;
5173 s->next = tab->saved_table[hash];
5174 tab->saved_table[hash] = s;
5175 }
5176
5177 return s;
5178 }
5179
5180
5181 typedef struct save_context
5182 { saved_table atoms;
5183 saved_table literals;
5184 saved_table predicates;
5185 tmp_store store;
5186 int version; /* current save version */
5187 } save_context;
5188
5189 static void
init_saved(rdf_db * db,save_context * ctx,int version)5190 init_saved(rdf_db *db, save_context *ctx, int version)
5191 { init_tmp_store(&ctx->store);
5192 init_saved_table(db, &ctx->atoms, &ctx->store);
5193 if ( version > 2 )
5194 { init_saved_table(db, &ctx->literals, &ctx->store);
5195 init_saved_table(db, &ctx->predicates, &ctx->store);
5196 }
5197 ctx->version = version;
5198 }
5199
5200 static void
destroy_saved(rdf_db * db,save_context * ctx)5201 destroy_saved(rdf_db *db, save_context *ctx)
5202 { destroy_saved_table(db, &ctx->atoms);
5203 if ( ctx->version > 2 )
5204 { destroy_saved_table(db, &ctx->literals);
5205 destroy_saved_table(db, &ctx->predicates);
5206 }
5207 destroy_tmp_store(&ctx->store);
5208 }
5209
5210 static saved *
lookup_saved_atom(save_context * ctx,atom_t a)5211 lookup_saved_atom(save_context *ctx, atom_t a)
5212 { return lookup_saved(&ctx->atoms, (void*)a);
5213 }
5214
5215 static saved *
add_saved_atom(rdf_db * db,save_context * ctx,atom_t a)5216 add_saved_atom(rdf_db *db, save_context *ctx, atom_t a)
5217 { return add_saved(db, &ctx->atoms, (void*)a);
5218 }
5219
5220 static saved *
lookup_saved_literal(save_context * ctx,literal * l)5221 lookup_saved_literal(save_context *ctx, literal *l)
5222 { return lookup_saved(&ctx->literals, l);
5223 }
5224
5225 static saved *
add_saved_literal(rdf_db * db,save_context * ctx,literal * l)5226 add_saved_literal(rdf_db *db, save_context *ctx, literal *l)
5227 { return add_saved(db, &ctx->literals, l);
5228 }
5229
5230 static saved *
lookup_saved_predicate(save_context * ctx,predicate * p)5231 lookup_saved_predicate(save_context *ctx, predicate *p)
5232 { return lookup_saved(&ctx->predicates, p);
5233 }
5234
5235 static saved *
add_saved_predicate(rdf_db * db,save_context * ctx,predicate * p)5236 add_saved_predicate(rdf_db *db, save_context *ctx, predicate *p)
5237 { return add_saved(db, &ctx->predicates, p);
5238 }
5239
5240
5241 #define INT64BITSIZE (sizeof(int64_t)*8)
5242 #define PLMINLONG ((int64_t)((uint64_t)1<<(INT64BITSIZE-1)))
5243
5244 static void
save_int(IOSTREAM * fd,int64_t n)5245 save_int(IOSTREAM *fd, int64_t n)
5246 { int m;
5247 int64_t absn = (n >= 0 ? n : -n);
5248
5249 if ( n != PLMINLONG )
5250 { if ( absn < ((intptr_t)1 << 5) )
5251 { Sputc((int)(n & 0x3f), fd);
5252 return;
5253 } else if ( absn < ((intptr_t)1 << 13) )
5254 { Sputc((int)(((n >> 8) & 0x3f) | (1 << 6)), fd);
5255 Sputc((int)(n & 0xff), fd);
5256 return;
5257 } else if ( absn < ((intptr_t)1 << 21) )
5258 { Sputc((int)(((n >> 16) & 0x3f) | (2 << 6)), fd);
5259 Sputc((int)((n >> 8) & 0xff), fd);
5260 Sputc((int)(n & 0xff), fd);
5261 return;
5262 }
5263 }
5264
5265 for(m = sizeof(n); ; m--)
5266 { int b = (int)((absn >> (((m-1)*8)-1)) & 0x1ff);
5267
5268 if ( b == 0 )
5269 continue;
5270 break;
5271 }
5272
5273 Sputc(m | (3 << 6), fd);
5274
5275 for( ; m > 0; m--)
5276 { int b = (int)((n >> ((m-1)*8)) & 0xff);
5277
5278 Sputc(b, fd);
5279 }
5280 }
5281
5282
5283 #define BYTES_PER_DOUBLE sizeof(double)
5284 #ifdef WORDS_BIGENDIAN
5285 static const int double_byte_order[] = { 7,6,5,4,3,2,1,0 };
5286 #else
5287 static const int double_byte_order[] = { 0,1,2,3,4,5,6,7 };
5288 #endif
5289
5290 static int
save_double(IOSTREAM * fd,double f)5291 save_double(IOSTREAM *fd, double f)
5292 { unsigned char *cl = (unsigned char *)&f;
5293 unsigned int i;
5294
5295 for(i=0; i<BYTES_PER_DOUBLE; i++)
5296 Sputc(cl[double_byte_order[i]], fd);
5297
5298 return TRUE;
5299 }
5300
5301
5302 static int
save_atom(rdf_db * db,IOSTREAM * out,atom_t a,save_context * ctx)5303 save_atom(rdf_db *db, IOSTREAM *out, atom_t a, save_context *ctx)
5304 { saved *s;
5305 size_t len;
5306 const char *chars;
5307 unsigned int i;
5308 const wchar_t *wchars;
5309
5310 if ( (s=lookup_saved_atom(ctx, a)) )
5311 { Sputc('X', out);
5312 save_int(out, s->as);
5313
5314 return TRUE;
5315 } else
5316 { s = add_saved_atom(db, ctx, a);
5317 }
5318
5319 if ( (chars = PL_atom_nchars(a, &len)) )
5320 { Sputc('A', out);
5321 save_int(out, len);
5322 for(i=0; i<len; i++, chars++)
5323 Sputc(*chars&0xff, out);
5324 } else if ( (wchars = PL_atom_wchars(a, &len)) )
5325 { IOENC enc = out->encoding;
5326
5327 Sputc('W', out);
5328 save_int(out, len);
5329 out->encoding = ENC_UTF8;
5330 for(i=0; i<len; i++, wchars++)
5331 { wint_t c = *wchars;
5332
5333 SECURE(assert(c>=0 && c <= 0x10ffff));
5334 Sputcode(c, out);
5335 }
5336 out->encoding = enc;
5337 } else
5338 return FALSE;
5339
5340 return TRUE;
5341 }
5342
5343
5344 static int
save_predicate(rdf_db * db,IOSTREAM * out,predicate * p,save_context * ctx)5345 save_predicate(rdf_db *db, IOSTREAM *out, predicate *p, save_context *ctx)
5346 { if ( ctx->version > 2 )
5347 { saved *s;
5348
5349 if ( (s=lookup_saved_predicate(ctx, p)) )
5350 { Sputc('X', out);
5351 save_int(out, s->as);
5352
5353 return TRUE;
5354 } else
5355 { s = add_saved_predicate(db, ctx, p);
5356 Sputc('P', out);
5357 }
5358 }
5359
5360 return save_atom(db, out, p->name, ctx);
5361 }
5362
5363 static int
save_literal(rdf_db * db,IOSTREAM * out,literal * lit,save_context * ctx)5364 save_literal(rdf_db *db, IOSTREAM *out, literal *lit, save_context *ctx)
5365 { if ( ctx->version > 2 )
5366 { saved *s;
5367
5368 if ( (s=lookup_saved_literal(ctx, lit)) )
5369 { Sputc('X', out);
5370 save_int(out, s->as);
5371
5372 return TRUE;
5373 } else
5374 { s = add_saved_literal(db, ctx, lit);
5375 }
5376 }
5377
5378 if ( lit->qualifier )
5379 { assert(lit->type_or_lang);
5380 Sputc(lit->qualifier == Q_LANG ? 'l' : 't', out);
5381 save_atom(db, out, ID_ATOM(lit->type_or_lang), ctx);
5382 }
5383
5384 switch(lit->objtype)
5385 { case OBJ_STRING:
5386 Sputc('L', out);
5387 save_atom(db, out, lit->value.string, ctx);
5388 break;
5389 case OBJ_INTEGER:
5390 Sputc('I', out);
5391 save_int(out, lit->value.integer);
5392 break;
5393 case OBJ_DOUBLE:
5394 { Sputc('F', out);
5395 save_double(out, lit->value.real);
5396 break;
5397 }
5398 case OBJ_TERM:
5399 { const char *s = lit->value.term.record;
5400 size_t len = lit->value.term.len;
5401
5402 Sputc('T', out);
5403 save_int(out, len);
5404 while(len-- > 0)
5405 Sputc(*s++, out);
5406
5407 break;
5408 }
5409 default:
5410 assert(0);
5411 }
5412
5413 return TRUE;
5414 }
5415
5416
5417
5418 static void
write_triple(rdf_db * db,IOSTREAM * out,triple * t,save_context * ctx)5419 write_triple(rdf_db *db, IOSTREAM *out, triple *t, save_context *ctx)
5420 { Sputc('T', out);
5421
5422 save_atom(db, out, ID_ATOM(t->subject_id), ctx);
5423 save_predicate(db, out, t->predicate.r, ctx);
5424
5425 if ( t->object_is_literal )
5426 { save_literal(db, out, t->object.literal, ctx);
5427 } else
5428 { Sputc('R', out);
5429 save_atom(db, out, t->object.resource, ctx);
5430 }
5431
5432 save_atom(db, out, ID_ATOM(t->graph_id), ctx);
5433 save_int(out, t->line);
5434 }
5435
5436
5437 static void
write_source(rdf_db * db,IOSTREAM * out,atom_t src,save_context * ctx)5438 write_source(rdf_db *db, IOSTREAM *out, atom_t src, save_context *ctx)
5439 { graph *s = existing_graph(db, src);
5440
5441 if ( s && s->source )
5442 { Sputc('F', out);
5443 save_atom(db, out, s->source, ctx);
5444 Sputc('t', out);
5445 save_double(out, s->modified);
5446 }
5447 }
5448
5449
5450 static void
write_md5(rdf_db * db,IOSTREAM * out,atom_t src)5451 write_md5(rdf_db *db, IOSTREAM *out, atom_t src)
5452 { graph *s = existing_graph(db, src);
5453
5454 if ( s )
5455 { md5_byte_t *p = s->digest;
5456 int i;
5457
5458 Sputc('M', out);
5459 for(i=0; i<16; i++)
5460 Sputc(*p++, out);
5461 }
5462 }
5463
5464
5465 static int
save_db(query * q,IOSTREAM * out,atom_t src,int version)5466 save_db(query *q, IOSTREAM *out, atom_t src, int version)
5467 { rdf_db *db = q->db;
5468 triple *t, p;
5469 save_context ctx;
5470 triple_walker tw;
5471
5472 memset(&p, 0, sizeof(p));
5473 init_saved(db, &ctx, version);
5474
5475 Sfprintf(out, "%s", SAVE_MAGIC);
5476 save_int(out, version);
5477 if ( src )
5478 { Sputc('S', out); /* start of graph header */
5479 save_atom(db, out, src, &ctx);
5480 write_source(db, out, src, &ctx);
5481 write_md5(db, out, src);
5482 p.graph_id = ATOM_ID(src);
5483 p.indexed = BY_G;
5484 } else
5485 { p.indexed = BY_NONE;
5486 }
5487 if ( Sferror(out) )
5488 return FALSE;
5489
5490 init_triple_walker(&tw, db, &p, p.indexed);
5491 while((t=next_triple(&tw)))
5492 { triple *t2;
5493
5494 if ( (t2=alive_triple(q, t)) &&
5495 (!src || ID_ATOM(t2->graph_id) == src) )
5496 { write_triple(db, out, t2, &ctx);
5497 if ( Sferror(out) )
5498 return FALSE;
5499 }
5500 }
5501 Sputc('E', out);
5502 if ( Sferror(out) )
5503 return FALSE;
5504
5505 destroy_saved(db, &ctx);
5506
5507 return TRUE;
5508 }
5509
5510
5511 static foreign_t
rdf_save_db(term_t stream,term_t graph,term_t version)5512 rdf_save_db(term_t stream, term_t graph, term_t version)
5513 { rdf_db *db = rdf_current_db();
5514 query *q;
5515 IOSTREAM *out;
5516 atom_t src;
5517 int rc;
5518 int v;
5519
5520 if ( !PL_get_stream_handle(stream, &out) )
5521 return PL_type_error("stream", stream);
5522 if ( !get_atom_or_var_ex(graph, &src) )
5523 return FALSE;
5524 if ( !PL_get_integer(version, &v) )
5525 return FALSE;
5526 if ( v < 2 || v > 3 )
5527 return PL_domain_error("rdf_db_save_version", version);
5528
5529 if ( (q = open_query(db)) )
5530 { rc = save_db(q, out, src, v);
5531 close_query(q);
5532 return rc;
5533 } else
5534 return FALSE;
5535 }
5536
5537
5538 static int64_t
load_int(IOSTREAM * fd)5539 load_int(IOSTREAM *fd)
5540 { int64_t first = Sgetc(fd);
5541 int bytes, shift, b;
5542
5543 if ( !(first & 0xc0) ) /* 99% of them: speed up a bit */
5544 { first <<= (INT64BITSIZE-6);
5545 first >>= (INT64BITSIZE-6);
5546
5547 return first;
5548 }
5549
5550 bytes = (int) ((first >> 6) & 0x3);
5551 first &= 0x3f;
5552
5553 if ( bytes <= 2 )
5554 { for( b = 0; b < bytes; b++ )
5555 { first <<= 8;
5556 first |= Sgetc(fd) & 0xff;
5557 }
5558
5559 shift = (sizeof(first)-1-bytes)*8 + 2;
5560 } else
5561 { int m;
5562
5563 bytes = (int)first;
5564 first = 0L;
5565
5566 for(m=0; m<bytes; m++)
5567 { first <<= 8;
5568 first |= Sgetc(fd) & 0xff;
5569 }
5570 shift = (sizeof(first)-bytes)*8;
5571 }
5572
5573 first <<= shift;
5574 first >>= shift;
5575
5576 return first;
5577 }
5578
5579
5580 static int
load_double(IOSTREAM * fd,double * fp)5581 load_double(IOSTREAM *fd, double *fp)
5582 { double f;
5583 unsigned char *cl = (unsigned char *)&f;
5584 unsigned int i;
5585
5586 for(i=0; i<BYTES_PER_DOUBLE; i++)
5587 { int c = Sgetc(fd);
5588
5589 if ( c == -1 )
5590 { *fp = 0.0;
5591 return FALSE;
5592 }
5593 cl[double_byte_order[i]] = c;
5594 }
5595
5596 *fp = f;
5597 return TRUE;
5598 }
5599
5600
5601 typedef struct ld_array
5602 { size_t loaded_id;
5603 size_t allocated_size;
5604 void **loaded_objects;
5605 } ld_array;
5606
5607 typedef struct ld_context
5608 { ld_array atoms;
5609 ld_array predicates;
5610 ld_array literals;
5611 atom_t graph_name; /* for single-graph files */
5612 graph *graph;
5613 atom_t graph_source;
5614 double modified;
5615 int has_digest;
5616 int version;
5617 md5_byte_t digest[16];
5618 atomset graph_table; /* multi-graph file */
5619 triple_buffer triples;
5620 } ld_context;
5621
5622
5623 static int
add_object(rdf_db * db,void * obj,ld_array * ar)5624 add_object(rdf_db *db, void *obj, ld_array *ar)
5625 { if ( ar->loaded_id >= ar->allocated_size )
5626 { if ( ar->allocated_size == 0 )
5627 { ar->allocated_size = 1024;
5628 ar->loaded_objects = malloc(sizeof(void*)*ar->allocated_size);
5629 } else
5630 { size_t bytes;
5631 void *new;
5632
5633 ar->allocated_size *= 2;
5634 bytes = sizeof(void*)*ar->allocated_size;
5635 if ( (new = realloc(ar->loaded_objects, bytes)) )
5636 ar->loaded_objects = new;
5637 else
5638 return FALSE;
5639 }
5640 }
5641
5642 ar->loaded_objects[ar->loaded_id++] = obj;
5643 return TRUE;
5644 }
5645
5646 static int
add_atom(rdf_db * db,atom_t a,ld_context * ctx)5647 add_atom(rdf_db *db, atom_t a, ld_context *ctx)
5648 { return add_object(db, (void*)a, &ctx->atoms);
5649 }
5650
5651 static atom_t
fetch_atom(ld_context * ctx,size_t idx)5652 fetch_atom(ld_context *ctx, size_t idx)
5653 { if ( idx < ctx->atoms.loaded_id )
5654 return (atom_t)ctx->atoms.loaded_objects[idx];
5655
5656 return (atom_t)0;
5657 }
5658
5659 static atom_t
load_atom(rdf_db * db,IOSTREAM * in,ld_context * ctx)5660 load_atom(rdf_db *db, IOSTREAM *in, ld_context *ctx)
5661 { switch(Sgetc(in))
5662 { case 'X':
5663 { size_t idx = (size_t)load_int(in);
5664 return fetch_atom(ctx, idx);
5665 }
5666 case 'A':
5667 { size_t len = (size_t)load_int(in);
5668 atom_t a;
5669
5670 if ( len < 1024 )
5671 { char buf[1024];
5672 Sfread(buf, 1, len, in);
5673 a = PL_new_atom_nchars(len, buf);
5674 } else
5675 { char *buf = rdf_malloc(db, len);
5676 Sfread(buf, 1, len, in);
5677 a = PL_new_atom_nchars(len, buf);
5678 rdf_free(db, buf, len);
5679 }
5680
5681 add_atom(db, a, ctx);
5682 return a;
5683 }
5684 case 'W':
5685 { int len = (int)load_int(in);
5686 atom_t a;
5687 wchar_t buf[1024];
5688 wchar_t *w;
5689 IOENC enc = in->encoding;
5690 int i;
5691
5692 if ( len < 1024 )
5693 w = buf;
5694 else
5695 w = rdf_malloc(db, len*sizeof(wchar_t));
5696
5697 in->encoding = ENC_UTF8;
5698 for(i=0; i<len; i++)
5699 { w[i] = Sgetcode(in);
5700 SECURE(assert(w[i]>=0 && w[i] <= 0x10ffff));
5701 }
5702 in->encoding = enc;
5703
5704 a = PL_new_atom_wchars(len, w);
5705 if ( w != buf )
5706 rdf_free(db, w, len*sizeof(wchar_t));
5707
5708 add_atom(db, a, ctx);
5709 return a;
5710 }
5711 default:
5712 { assert(0);
5713 return 0;
5714 }
5715 }
5716 }
5717
5718
5719 static int
add_predicate(rdf_db * db,predicate * p,ld_context * ctx)5720 add_predicate(rdf_db *db, predicate *p, ld_context *ctx)
5721 { return add_object(db, p, &ctx->predicates);
5722 }
5723
5724 static predicate *
fetch_predicate(ld_context * ctx,size_t idx)5725 fetch_predicate(ld_context *ctx, size_t idx)
5726 { if ( idx < ctx->predicates.loaded_id )
5727 return ctx->predicates.loaded_objects[idx];
5728
5729 return NULL;
5730 }
5731
5732 static predicate *
load_predicate(rdf_db * db,IOSTREAM * in,ld_context * ctx)5733 load_predicate(rdf_db *db, IOSTREAM *in, ld_context *ctx)
5734 { switch(Sgetc(in))
5735 { case 'X':
5736 { size_t idx = (size_t)load_int(in);
5737 return fetch_predicate(ctx, idx);
5738 }
5739 case 'P':
5740 { atom_t a;
5741
5742 if ( (a=load_atom(db, in, ctx)) )
5743 { predicate *p;
5744
5745 if ( (p=lookup_predicate(db, a)) &&
5746 add_predicate(db, p, ctx) )
5747 return p;
5748 }
5749 return NULL; /* no memory */
5750 }
5751 default:
5752 assert(0);
5753 return NULL;
5754 }
5755 }
5756
5757
5758 static int
add_literal(rdf_db * db,literal * lit,ld_context * ctx)5759 add_literal(rdf_db *db, literal *lit, ld_context *ctx)
5760 { return add_object(db, lit, &ctx->literals);
5761 }
5762
5763 static literal *
fetch_literal(ld_context * ctx,size_t idx)5764 fetch_literal(ld_context *ctx, size_t idx)
5765 { if ( idx < ctx->literals.loaded_id )
5766 return ctx->literals.loaded_objects[idx];
5767
5768 return NULL;
5769 }
5770
5771 static literal *
load_literal(rdf_db * db,IOSTREAM * in,ld_context * ctx,int c)5772 load_literal(rdf_db *db, IOSTREAM *in, ld_context *ctx, int c)
5773 { literal *lit;
5774
5775 if ( c == 'X' && ctx->version >= 3 )
5776 { size_t idx = (size_t)load_int(in);
5777 lit = fetch_literal(ctx, idx);
5778 simpleMutexLock(&db->locks.literal);
5779 lit->references++;
5780 assert(lit->references != 0);
5781 simpleMutexUnlock(&db->locks.literal);
5782 } else if ( (lit=new_literal(db)) )
5783 {
5784 value:
5785 switch(c)
5786 { case 'L':
5787 lit->objtype = OBJ_STRING;
5788 lit->value.string = load_atom(db, in, ctx);
5789 break;
5790 case 'I':
5791 lit->objtype = OBJ_INTEGER;
5792 lit->value.integer = load_int(in);
5793 break;
5794 case 'F':
5795 lit->objtype = OBJ_DOUBLE;
5796 load_double(in, &lit->value.real);
5797 break;
5798 case 'T':
5799 { unsigned int i;
5800 char *s;
5801
5802 lit->objtype = OBJ_TERM;
5803 lit->value.term.len = (size_t)load_int(in);
5804 lit->value.term.record = rdf_malloc(db, lit->value.term.len);
5805 lit->term_loaded = TRUE; /* see free_literal() */
5806 s = (char *)lit->value.term.record;
5807
5808 for(i=0; i<lit->value.term.len; i++)
5809 s[i] = Sgetc(in);
5810
5811 break;
5812 }
5813 case 'l':
5814 lit->qualifier = Q_LANG;
5815 lit->type_or_lang = ATOM_ID(load_atom(db, in, ctx));
5816 c = Sgetc(in);
5817 goto value;
5818 case 't':
5819 lit->qualifier = Q_TYPE;
5820 lit->type_or_lang = ATOM_ID(load_atom(db, in, ctx));
5821 c = Sgetc(in);
5822 goto value;
5823 default:
5824 assert(0);
5825 return NULL;
5826 }
5827
5828 if ( ctx->version >= 3 )
5829 { lock_atoms_literal(lit);
5830 lit = share_literal(db, lit);
5831
5832 add_literal(db, lit, ctx);
5833 }
5834 }
5835
5836 return lit;
5837 }
5838
5839
5840 static triple *
load_triple(rdf_db * db,IOSTREAM * in,ld_context * ctx)5841 load_triple(rdf_db *db, IOSTREAM *in, ld_context *ctx)
5842 { triple *t = new_triple(db);
5843 int c;
5844
5845 t->subject_id = ATOM_ID(load_atom(db, in, ctx));
5846 if ( ctx->version < 3 )
5847 { t->resolve_pred = TRUE;
5848 t->predicate.u = load_atom(db, in, ctx);
5849 } else
5850 { t->predicate.r = load_predicate(db, in, ctx);
5851 }
5852 if ( (c=Sgetc(in)) == 'R' )
5853 { t->object.resource = load_atom(db, in, ctx);
5854 } else
5855 { t->object_is_literal = TRUE;
5856 t->object.literal = load_literal(db, in, ctx, c);
5857 }
5858 t->graph_id = ATOM_ID(load_atom(db, in, ctx));
5859 t->line = (unsigned long)load_int(in);
5860 if ( !ctx->graph )
5861 add_atomset(&ctx->graph_table, ID_ATOM(t->graph_id));
5862
5863 return t;
5864 }
5865
5866
5867 static int
load_magic(IOSTREAM * in)5868 load_magic(IOSTREAM *in)
5869 { char *s = SAVE_MAGIC;
5870
5871 for( ; *s; s++)
5872 { if ( Sgetc(in) != *s )
5873 return FALSE;
5874 }
5875
5876 return TRUE;
5877 }
5878
5879
5880 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5881 Note that we have two types of saved states. One holding many named
5882 graphs and one holding the content of exactly one named graph.
5883 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
5884
5885 static int
load_db(rdf_db * db,IOSTREAM * in,ld_context * ctx)5886 load_db(rdf_db *db, IOSTREAM *in, ld_context *ctx)
5887 { int c;
5888
5889 if ( !load_magic(in) )
5890 return FALSE;
5891 ctx->version = (int)load_int(in);
5892 if ( ctx->version < 2 || ctx->version > 3 )
5893 { term_t v = PL_new_term_ref();
5894
5895 if ( PL_put_integer(v, ctx->version) )
5896 return PL_domain_error("rdf_db_save_version", v);
5897 else
5898 return FALSE;
5899 }
5900
5901 while((c=Sgetc(in)) != EOF)
5902 { switch(c)
5903 { case 'T':
5904 { triple *t;
5905
5906 if ( !(t=load_triple(db, in, ctx)) )
5907 return FALSE;
5908 t->loaded = TRUE;
5909 buffer_triple(&ctx->triples, t);
5910 break;
5911 }
5912 /* file holding exactly one graph */
5913 case 'S': /* name of the graph */
5914 { ctx->graph_name = load_atom(db, in, ctx);
5915 break;
5916 }
5917 case 'M': /* MD5 of the graph */
5918 { int i;
5919
5920 for(i=0; i<16; i++)
5921 ctx->digest[i] = Sgetc(in);
5922 ctx->has_digest = TRUE;
5923
5924 break;
5925 }
5926 case 'F': /* file of the graph */
5927 ctx->graph_source = load_atom(db, in, ctx);
5928 break; /* end of one-graph handling */
5929 case 't':
5930 load_double(in, &ctx->modified);
5931 break;
5932 case 'E': /* end of file */
5933 return TRUE;
5934 default:
5935 break;
5936 }
5937 }
5938
5939 return PL_warning("Illegal RDF triple file");
5940 }
5941
5942
5943 static int
prepare_loaded_triples(rdf_db * db,ld_context * ctx)5944 prepare_loaded_triples(rdf_db *db, ld_context *ctx)
5945 { triple **t;
5946
5947 if ( ctx->graph_name ) /* lookup named graph */
5948 { ctx->graph = lookup_graph(db, ctx->graph_name);
5949 if ( ctx->graph_source && ctx->graph->source != ctx->graph_source )
5950 { if ( ctx->graph->source )
5951 PL_unregister_atom(ctx->graph->source);
5952 ctx->graph->source = ctx->graph_source;
5953 PL_register_atom(ctx->graph->source);
5954 ctx->graph->modified = ctx->modified;
5955 }
5956
5957 if ( ctx->has_digest )
5958 { if ( ctx->graph->md5 )
5959 { ctx->graph->md5 = FALSE; /* kill repetitive MD5 update */
5960 } else
5961 { ctx->has_digest = FALSE;
5962 }
5963 }
5964 } else
5965 { ctx->graph = NULL;
5966 }
5967
5968 for(t=ctx->triples.base; t<ctx->triples.top; t++)
5969 lock_atoms(db, *t);
5970
5971 return TRUE;
5972 }
5973
5974
5975 static void
destroy_load_context(rdf_db * db,ld_context * ctx,int delete_triples)5976 destroy_load_context(rdf_db *db, ld_context *ctx, int delete_triples)
5977 { if ( delete_triples )
5978 { triple **tp;
5979
5980 for(tp=ctx->triples.base;
5981 tp<ctx->triples.top;
5982 tp++)
5983 { triple *t = *tp;
5984
5985 free_triple(db, t, FALSE);
5986 }
5987 }
5988
5989 free_triple_buffer(&ctx->triples);
5990
5991 if ( ctx->atoms.loaded_objects )
5992 { atom_t *ap, *ep;
5993
5994 for( ap=(atom_t*)ctx->atoms.loaded_objects, ep=ap+ctx->atoms.loaded_id;
5995 ap<ep;
5996 ap++)
5997 { PL_unregister_atom(*ap);
5998 }
5999
6000 free(ctx->atoms.loaded_objects);
6001 }
6002 if ( ctx->predicates.loaded_objects )
6003 free(ctx->predicates.loaded_objects);
6004 if ( ctx->literals.loaded_objects )
6005 free(ctx->literals.loaded_objects);
6006 }
6007
6008 typedef struct
6009 { term_t tail;
6010 term_t head;
6011 } add_graph_context;
6012
6013 static int
append_graph_to_list(atom_t graph,void * closure)6014 append_graph_to_list(atom_t graph, void *closure)
6015 { add_graph_context *ctx = closure;
6016
6017 return ( PL_unify_list(ctx->tail, ctx->head, ctx->tail) &&
6018 PL_unify_atom(ctx->head, graph)
6019 );
6020 }
6021
6022
6023 static foreign_t
rdf_load_db(term_t stream,term_t id,term_t graphs)6024 rdf_load_db(term_t stream, term_t id, term_t graphs)
6025 { ld_context ctx;
6026 rdf_db *db = rdf_current_db();
6027 IOSTREAM *in;
6028 int rc;
6029 term_t ba_arg2;
6030
6031 if ( !(ba_arg2 = PL_new_term_ref()) )
6032 return FALSE;
6033
6034 if ( !PL_get_stream_handle(stream, &in) )
6035 return PL_type_error("stream", stream);
6036
6037 memset(&ctx, 0, sizeof(ctx));
6038 init_atomset(&ctx.graph_table);
6039 init_triple_buffer(&ctx.triples);
6040 rc = load_db(db, in, &ctx);
6041 PL_release_stream(in);
6042
6043 if ( !rc ||
6044 !PL_put_atom(ba_arg2, ATOM_begin) ||
6045 !rdf_broadcast(EV_LOAD, (void*)id, (void*)ba_arg2) )
6046 { destroy_load_context(db, &ctx, TRUE);
6047 return FALSE;
6048 }
6049
6050 if ( (rc=prepare_loaded_triples(db, &ctx)) )
6051 { add_graph_context gctx;
6052
6053 gctx.tail = PL_copy_term_ref(graphs);
6054 gctx.head = PL_new_term_ref();
6055
6056 rc = ( for_atomset(&ctx.graph_table, append_graph_to_list, &gctx) &&
6057 PL_unify_nil(gctx.tail) );
6058
6059 destroy_atomset(&ctx.graph_table);
6060 }
6061
6062 if ( rc )
6063 { query *q;
6064
6065 if ( (q=open_query(db)) )
6066 { add_triples(q, ctx.triples.base, ctx.triples.top - ctx.triples.base);
6067 close_query(q);
6068 } else
6069 { goto error;
6070 }
6071 if ( ctx.graph )
6072 { if ( ctx.has_digest )
6073 { sum_digest(ctx.graph->digest, ctx.digest);
6074 ctx.graph->md5 = TRUE;
6075 }
6076 clear_modified(ctx.graph);
6077 }
6078 if ( (rc=PL_cons_functor(ba_arg2, FUNCTOR_end1, graphs)) )
6079 rc = rdf_broadcast(EV_LOAD, (void*)id, (void*)ba_arg2);
6080 destroy_load_context(db, &ctx, FALSE);
6081
6082 return rc;
6083 }
6084
6085 error:
6086 rdf_broadcast(EV_LOAD, (void*)id, (void*)ATOM_error);
6087 destroy_load_context(db, &ctx, TRUE);
6088 return FALSE;
6089 }
6090
6091
6092 #ifdef WITH_MD5
6093 /*******************************
6094 * MD5 SUPPORT *
6095 *******************************/
6096
6097 /* md5_type is used to keep the MD5 independent from the internal
6098 numbers
6099 */
6100 static const char md5_type[] =
6101 { 0x0, /* OBJ_UNKNOWN */
6102 0x3, /* OBJ_INTEGER */
6103 0x4, /* OBJ_DOUBLE */
6104 0x2, /* OBJ_STRING */
6105 0x5 /* OBJ_TERM */
6106 };
6107
6108 static void
md5_triple(triple * t,md5_byte_t * digest)6109 md5_triple(triple *t, md5_byte_t *digest)
6110 { md5_state_t state;
6111 size_t len;
6112 md5_byte_t tmp[2];
6113 const char *s;
6114 literal *lit;
6115
6116 md5_init(&state);
6117 s = PL_blob_data(ID_ATOM(t->subject_id), &len, NULL);
6118 md5_append(&state, (const md5_byte_t *)s, (int)len);
6119 md5_append(&state, (const md5_byte_t *)"P", 1);
6120 s = PL_blob_data(t->predicate.r->name, &len, NULL);
6121 md5_append(&state, (const md5_byte_t *)s, (int)len);
6122 tmp[0] = 'O';
6123 if ( t->object_is_literal )
6124 { lit = t->object.literal;
6125 tmp[1] = md5_type[lit->objtype];
6126
6127 switch(lit->objtype)
6128 { case OBJ_STRING:
6129 s = PL_blob_data(lit->value.string, &len, NULL);
6130 break;
6131 case OBJ_INTEGER: /* TBD: byte order issues */
6132 s = (const char *)&lit->value.integer;
6133 len = sizeof(lit->value.integer);
6134 break;
6135 case OBJ_DOUBLE:
6136 s = (const char *)&lit->value.real;
6137 len = sizeof(lit->value.real);
6138 break;
6139 case OBJ_TERM:
6140 s = (const char *)lit->value.term.record;
6141 len = lit->value.term.len;
6142 break;
6143 default:
6144 assert(0);
6145 }
6146 } else
6147 { s = PL_blob_data(t->object.resource, &len, NULL);
6148 tmp[1] = 0x1; /* old OBJ_RESOURCE */
6149 lit = NULL;
6150 }
6151 md5_append(&state, tmp, 2);
6152 md5_append(&state, (const md5_byte_t *)s, (int)len);
6153 if ( lit && lit->qualifier )
6154 { assert(lit->type_or_lang);
6155 md5_append(&state,
6156 (const md5_byte_t *)(lit->qualifier == Q_LANG ? "l" : "t"),
6157 1);
6158 s = PL_blob_data(ID_ATOM(lit->type_or_lang), &len, NULL);
6159 md5_append(&state, (const md5_byte_t *)s, (int)len);
6160 }
6161 if ( t->graph_id )
6162 { md5_append(&state, (const md5_byte_t *)"S", 1);
6163 s = PL_blob_data(ID_ATOM(t->graph_id), &len, NULL);
6164 md5_append(&state, (const md5_byte_t *)s, (int)len);
6165 }
6166
6167 md5_finish(&state, digest);
6168 }
6169
6170
6171 static void
sum_digest(md5_byte_t * digest,md5_byte_t * add)6172 sum_digest(md5_byte_t *digest, md5_byte_t *add)
6173 { md5_byte_t *p, *q;
6174 int n;
6175
6176 for(p=digest, q=add, n=16; --n>=0; )
6177 *p++ += *q++;
6178 }
6179
6180
6181 static void
dec_digest(md5_byte_t * digest,md5_byte_t * add)6182 dec_digest(md5_byte_t *digest, md5_byte_t *add)
6183 { md5_byte_t *p, *q;
6184 int n;
6185
6186 for(p=digest, q=add, n=16; --n>=0; )
6187 *p++ -= *q++;
6188 }
6189
6190
6191 static int
md5_unify_digest(term_t t,md5_byte_t digest[16])6192 md5_unify_digest(term_t t, md5_byte_t digest[16])
6193 { char hex_output[16*2];
6194 int di;
6195 char *pi;
6196 static char hexd[] = "0123456789abcdef";
6197
6198 for(pi=hex_output, di = 0; di < 16; ++di)
6199 { *pi++ = hexd[(digest[di] >> 4) & 0x0f];
6200 *pi++ = hexd[digest[di] & 0x0f];
6201 }
6202
6203 return PL_unify_atom_nchars(t, 16*2, hex_output);
6204 }
6205
6206
6207 static foreign_t
rdf_md5(term_t graph_name,term_t md5)6208 rdf_md5(term_t graph_name, term_t md5)
6209 { atom_t src;
6210 int rc;
6211 rdf_db *db = rdf_current_db();
6212
6213 if ( !get_atom_or_var_ex(graph_name, &src) )
6214 return FALSE;
6215
6216 if ( src )
6217 { graph *s;
6218
6219 if ( (s = existing_graph(db, src)) && !s->erased )
6220 { rc = md5_unify_digest(md5, s->digest);
6221 } else
6222 { md5_byte_t digest[16];
6223
6224 memset(digest, 0, sizeof(digest));
6225 rc = md5_unify_digest(md5, digest);
6226 }
6227 } else
6228 { md5_byte_t digest[16];
6229 int i;
6230
6231 memset(&digest, 0, sizeof(digest));
6232 for(i=0; i<db->graphs.bucket_count; i++)
6233 { graph *g = db->graphs.blocks[MSB(i)][i];
6234
6235 for( ; g; g = g->next )
6236 sum_digest(digest, g->digest);
6237 }
6238
6239 return md5_unify_digest(md5, digest);
6240 }
6241
6242 return rc;
6243 }
6244
6245
6246 static foreign_t
rdf_atom_md5(term_t text,term_t times,term_t md5)6247 rdf_atom_md5(term_t text, term_t times, term_t md5)
6248 { char *s;
6249 int n, i;
6250 size_t len;
6251 md5_byte_t digest[16];
6252
6253 if ( !PL_get_nchars(text, &len, &s, CVT_ALL|REP_UTF8|CVT_EXCEPTION) )
6254 return FALSE;
6255 if ( !PL_get_integer_ex(times, &n) )
6256 return FALSE;
6257 if ( n < 1 )
6258 return PL_domain_error("positive_integer", times);
6259
6260 for(i=0; i<n; i++)
6261 { md5_state_t state;
6262 md5_init(&state);
6263 md5_append(&state, (const md5_byte_t *)s, (int)len);
6264 md5_finish(&state, digest);
6265 s = (char *)digest;
6266 len = sizeof(digest);
6267 }
6268
6269 return md5_unify_digest(md5, digest);
6270 }
6271
6272
6273
6274 #endif /*WITH_MD5*/
6275
6276
6277 /*******************************
6278 * ATOMS *
6279 *******************************/
6280
6281 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6282 Lock atoms in triple against AGC. Note that the predicate name is locked
6283 in the predicate structure.
6284 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6285
6286 static void
lock_atoms(rdf_db * db,triple * t)6287 lock_atoms(rdf_db *db, triple *t)
6288 { if ( !t->atoms_locked )
6289 { t->atoms_locked = TRUE;
6290
6291 register_resource(&db->resources, ID_ATOM(t->subject_id));
6292 if ( t->object_is_literal )
6293 { lock_atoms_literal(t->object.literal);
6294 } else
6295 { register_resource(&db->resources, t->object.resource);
6296 }
6297 }
6298 }
6299
6300
6301 static void
unlock_atoms(rdf_db * db,triple * t)6302 unlock_atoms(rdf_db *db, triple *t)
6303 { if ( t->atoms_locked )
6304 { t->atoms_locked = FALSE;
6305
6306 unregister_resource(&db->resources, ID_ATOM(t->subject_id));
6307 if ( t->object_is_literal )
6308 { if ( !t->object.literal->shared )
6309 unlock_atoms_literal(t->object.literal);
6310 } else
6311 { unregister_resource(&db->resources, t->object.resource);
6312 }
6313 }
6314 }
6315
6316
6317 /*******************************
6318 * PROLOG CONVERSION *
6319 *******************************/
6320
6321 #define LIT_TYPED 0x1
6322 #define LIT_NOERROR 0x2
6323 #define LIT_PARTIAL 0x4
6324
6325 static int
get_lit_atom_ex(term_t t,atom_t * a,int flags)6326 get_lit_atom_ex(term_t t, atom_t *a, int flags)
6327 { if ( PL_get_atom(t, a) )
6328 return TRUE;
6329 if ( (flags & LIT_PARTIAL) && PL_is_variable(t) )
6330 { *a = 0L;
6331 return TRUE;
6332 }
6333
6334 return PL_type_error("atom", t);
6335 }
6336
6337
6338 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6339 get_literal() processes the argument of a literal/1 term passes as
6340 object.
6341 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6342
6343 static int
get_literal(rdf_db * db,term_t litt,literal * lit,int flags)6344 get_literal(rdf_db *db, term_t litt, literal *lit, int flags)
6345 { if ( PL_get_atom(litt, &lit->value.string) )
6346 { lit->objtype = OBJ_STRING;
6347 } else if ( PL_is_integer(litt) && PL_get_int64(litt, &lit->value.integer) )
6348 { lit->objtype = OBJ_INTEGER;
6349 } else if ( PL_get_float(litt, &lit->value.real) )
6350 { lit->objtype = OBJ_DOUBLE;
6351 } else if ( PL_is_functor(litt, FUNCTOR_lang2) )
6352 { term_t a = PL_new_term_ref();
6353 atom_t tol;
6354
6355 _PL_get_arg(1, litt, a);
6356 if ( !get_lit_atom_ex(a, &tol, flags) )
6357 return FALSE;
6358 lit->type_or_lang = ATOM_ID(tol);
6359 _PL_get_arg(2, litt, a);
6360 if ( !get_lit_atom_ex(a, &lit->value.string, flags) )
6361 return FALSE;
6362
6363 lit->qualifier = Q_LANG;
6364 lit->objtype = OBJ_STRING;
6365 } else if ( PL_is_functor(litt, FUNCTOR_type2) &&
6366 !(flags & LIT_TYPED) ) /* avoid recursion */
6367 { term_t a = PL_new_term_ref();
6368 atom_t tol;
6369
6370 _PL_get_arg(1, litt, a);
6371 if ( !get_lit_atom_ex(a, &tol, flags) )
6372 return FALSE;
6373 lit->type_or_lang = ATOM_ID(tol);
6374 lit->qualifier = Q_TYPE;
6375 _PL_get_arg(2, litt, a);
6376
6377 return get_literal(db, a, lit, LIT_TYPED|flags);
6378 } else if ( !PL_is_ground(litt) )
6379 { if ( !(flags & LIT_PARTIAL) )
6380 return PL_type_error("rdf_object", litt);
6381 if ( !PL_is_variable(litt) )
6382 lit->objtype = OBJ_TERM;
6383 } else
6384 { lit->value.term.record = PL_record_external(litt, &lit->value.term.len);
6385 lit->objtype = OBJ_TERM;
6386 }
6387
6388 return TRUE;
6389 }
6390
6391
6392 static int
get_object(rdf_db * db,term_t object,triple * t)6393 get_object(rdf_db *db, term_t object, triple *t)
6394 { if ( PL_get_atom(object, &t->object.resource) )
6395 { assert(!t->object_is_literal);
6396 } else if ( PL_is_functor(object, FUNCTOR_literal1) )
6397 { term_t a = PL_new_term_ref();
6398
6399 _PL_get_arg(1, object, a);
6400 alloc_literal_triple(db, t);
6401 return get_literal(db, a, t->object.literal, 0);
6402 } else if ( get_prefixed_iri(db, object, &t->object.resource) )
6403 { assert(!t->object_is_literal);
6404 } else
6405 return PL_type_error("rdf_object", object);
6406
6407 return TRUE;
6408 }
6409
6410
6411 static int
get_src(term_t src,triple * t)6412 get_src(term_t src, triple *t)
6413 { if ( src && !PL_is_variable(src) )
6414 { atom_t at;
6415
6416 if ( PL_get_atom(src, &at) )
6417 { t->graph_id = ATOM_ID(at);
6418 t->line = NO_LINE;
6419 } else if ( PL_is_functor(src, FUNCTOR_colon2) )
6420 { term_t a = PL_new_term_ref();
6421 long line;
6422
6423 _PL_get_arg(1, src, a);
6424 if ( !get_atom_or_var_ex(a, &at) )
6425 return FALSE;
6426 t->graph_id = ATOM_ID(at);
6427 _PL_get_arg(2, src, a);
6428 if ( PL_get_long(a, &line) )
6429 t->line = line;
6430 else if ( !PL_is_variable(a) )
6431 return PL_type_error("integer", a);
6432 } else
6433 return PL_type_error("rdf_graph", src);
6434 }
6435
6436 return TRUE;
6437 }
6438
6439
6440 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6441 Return values:
6442 -1: exception
6443 0: no predicate
6444 1: the predicate
6445 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6446
6447 static int
get_existing_predicate(rdf_db * db,term_t t,predicate ** p)6448 get_existing_predicate(rdf_db *db, term_t t, predicate **p)
6449 { atom_t name;
6450
6451 if ( !PL_get_atom(t, &name ) )
6452 { if ( PL_is_functor(t, FUNCTOR_literal1) )
6453 return 0; /* rdf(_, literal(_), _) */
6454 if ( get_prefixed_iri(db, t, &name) )
6455 goto ok;
6456 PL_type_error("rdf_predicate", t);
6457 return -1;
6458 }
6459
6460 ok:
6461 if ( (*p = existing_predicate(db, name)) )
6462 return 1;
6463
6464 DEBUG(5, Sdprintf("No predicate %s\n", PL_atom_chars(name)));
6465 return 0; /* no predicate */
6466 }
6467
6468
6469 static int
get_predicate(rdf_db * db,term_t t,predicate ** p,query * q)6470 get_predicate(rdf_db *db, term_t t, predicate **p, query *q)
6471 { atom_t name;
6472
6473 if ( !get_iri_ex(db, t, &name ) )
6474 return FALSE;
6475
6476 *p = lookup_predicate(db, name);
6477 return TRUE;
6478 }
6479
6480
6481 static int
get_triple(rdf_db * db,term_t subject,term_t predicate,term_t object,triple * t,query * q)6482 get_triple(rdf_db *db,
6483 term_t subject, term_t predicate, term_t object,
6484 triple *t, query *q)
6485 { atom_t at;
6486
6487 if ( !get_iri_ex(db, subject, &at) ||
6488 !get_predicate(db, predicate, &t->predicate.r, q) ||
6489 !get_object(db, object, t) )
6490 return FALSE;
6491
6492 t->subject_id = ATOM_ID(at);
6493
6494 return TRUE;
6495 }
6496
6497
6498 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6499 get_partial_triple() creates a triple for matching purposes. It can
6500 return FALSE for two reasons. Mostly (type) errors, but also if
6501 resources are accessed that do not exist and therefore the subsequent
6502 matching will always fail. This is notably the case for predicates,
6503 which are first class citizens to this library.
6504
6505 Return values:
6506 1: ok
6507 0: no predicate
6508 -1: error
6509 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6510
6511 static int
get_partial_triple(rdf_db * db,term_t subject,term_t predicate,term_t object,term_t src,triple * t)6512 get_partial_triple(rdf_db *db,
6513 term_t subject, term_t predicate, term_t object,
6514 term_t src, triple *t)
6515 { int rc;
6516 int ipat = 0;
6517
6518 if ( subject )
6519 { atom_t at;
6520
6521 if ( !get_resource_or_var_ex(db, subject, &at) )
6522 return FALSE;
6523 t->subject_id = ATOM_ID(at);
6524 }
6525 if ( !PL_is_variable(predicate) &&
6526 (rc=get_existing_predicate(db, predicate, &t->predicate.r)) != 1 )
6527 return rc;
6528 /* the object */
6529 if ( object && !PL_is_variable(object) )
6530 { if ( PL_get_atom(object, &t->object.resource) )
6531 { assert(!t->object_is_literal);
6532 } else if ( PL_is_functor(object, FUNCTOR_literal1) )
6533 { term_t a = PL_new_term_ref();
6534
6535 _PL_get_arg(1, object, a);
6536 alloc_literal_triple(db, t);
6537 if ( !get_literal(db, a, t->object.literal, LIT_PARTIAL) )
6538 return FALSE;
6539 } else if ( PL_is_functor(object, FUNCTOR_literal2) )
6540 { term_t a = PL_new_term_ref();
6541 literal *lit;
6542
6543 alloc_literal_triple(db, t);
6544 lit = t->object.literal;
6545
6546 _PL_get_arg(1, object, a);
6547 if ( PL_is_functor(a, FUNCTOR_exact1) )
6548 t->match = STR_MATCH_ICASE;
6549 else if ( PL_is_functor(a, FUNCTOR_icase1) )
6550 t->match = STR_MATCH_ICASE;
6551 else if ( PL_is_functor(a, FUNCTOR_plain1) )
6552 t->match = STR_MATCH_PLAIN;
6553 else if ( PL_is_functor(a, FUNCTOR_substring1) )
6554 t->match = STR_MATCH_SUBSTRING;
6555 else if ( PL_is_functor(a, FUNCTOR_word1) )
6556 t->match = STR_MATCH_WORD;
6557 else if ( PL_is_functor(a, FUNCTOR_prefix1) )
6558 t->match = STR_MATCH_PREFIX;
6559 else if ( PL_is_functor(a, FUNCTOR_like1) )
6560 t->match = STR_MATCH_LIKE;
6561 else if ( PL_is_functor(a, FUNCTOR_lt1) )
6562 t->match = STR_MATCH_LT;
6563 else if ( PL_is_functor(a, FUNCTOR_le1) )
6564 t->match = STR_MATCH_LE;
6565 else if ( PL_is_functor(a, FUNCTOR_eq1) )
6566 t->match = STR_MATCH_EQ;
6567 else if ( PL_is_functor(a, FUNCTOR_ge1) )
6568 t->match = STR_MATCH_GE;
6569 else if ( PL_is_functor(a, FUNCTOR_gt1) )
6570 t->match = STR_MATCH_GT;
6571 else if ( PL_is_functor(a, FUNCTOR_between2) )
6572 { term_t e = PL_new_term_ref();
6573
6574 _PL_get_arg(2, a, e);
6575 memset(&t->tp.end, 0, sizeof(t->tp.end));
6576 if ( !get_literal(db, e, &t->tp.end, 0) )
6577 return FALSE;
6578 t->match = STR_MATCH_BETWEEN;
6579 } else
6580 return PL_domain_error("match_type", a);
6581
6582 _PL_get_arg(1, a, a);
6583 if ( t->match >= STR_MATCH_LT )
6584 { if ( !get_literal(db, a, lit, 0) )
6585 return FALSE;
6586 } else
6587 { if ( !PL_get_atom_ex(a, &lit->value.string) )
6588 return FALSE;
6589 lit->objtype = OBJ_STRING;
6590 }
6591 } else
6592 return PL_type_error("rdf_object", object);
6593 }
6594 /* the graph */
6595 if ( !get_src(src, t) )
6596 return FALSE;
6597
6598 if ( t->subject_id )
6599 ipat |= BY_S;
6600 if ( t->predicate.r )
6601 ipat |= BY_P;
6602 if ( t->object_is_literal )
6603 { literal *lit = t->object.literal;
6604
6605 switch( lit->objtype )
6606 { case OBJ_UNTYPED:
6607 break;
6608 case OBJ_STRING:
6609 if ( lit->objtype == OBJ_STRING )
6610 { if ( lit->value.string &&
6611 t->match <= STR_MATCH_ICASE )
6612 ipat |= BY_O;
6613 }
6614 break;
6615 case OBJ_INTEGER:
6616 case OBJ_DOUBLE:
6617 ipat |= BY_O;
6618 break;
6619 case OBJ_TERM:
6620 if ( PL_is_ground(object) )
6621 ipat |= BY_O;
6622 break;
6623 default:
6624 assert(0);
6625 }
6626 } else if ( t->object.resource )
6627 { ipat |= BY_O;
6628 }
6629 if ( t->graph_id )
6630 ipat |= BY_G;
6631
6632 db->indexed[ipat]++; /* statistics */
6633 t->indexed = alt_index[ipat];
6634
6635 return TRUE;
6636 }
6637
6638
6639 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6640 inverse_partial_triple(triple *t) inverses a triple by swapping object
6641 and subject and replacing the predicate with its inverse.
6642
6643 TBD: In many cases we can compute the hash more efficiently than by
6644 simply recomputing it:
6645
6646 - Change predicate: x-or with old and new predicate hash
6647 - swap S<->O if the other is known is a no-op wrt the hash.
6648 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6649
6650 static int
inverse_partial_triple(triple * t)6651 inverse_partial_triple(triple *t)
6652 { predicate *i;
6653
6654 if ( !t->inversed &&
6655 (!(i=t->predicate.r) || (i=t->predicate.r->inverse_of)) &&
6656 !t->object_is_literal )
6657 { atom_t o = t->object.resource;
6658
6659 t->object.resource = t->subject_id ? ID_ATOM(t->subject_id) : 0;
6660 t->subject_id = o ? ATOM_ID(o) : 0;
6661
6662 if ( t->predicate.r )
6663 t->predicate.r = i;
6664
6665 t->indexed = by_inverse[t->indexed];
6666 t->inversed = TRUE;
6667
6668 return TRUE;
6669 }
6670
6671 return FALSE;
6672 }
6673
6674
6675 static int
get_graph(term_t src,triple * t)6676 get_graph(term_t src, triple *t)
6677 { atom_t at;
6678
6679 if ( PL_get_atom(src, &at) )
6680 { t->line = NO_LINE;
6681 t->graph_id = ATOM_ID(at);
6682 return TRUE;
6683 }
6684
6685 if ( PL_is_functor(src, FUNCTOR_colon2) )
6686 { term_t a = PL_new_term_ref();
6687 long line;
6688
6689 _PL_get_arg(1, src, a);
6690 if ( !PL_get_atom_ex(a, &at) )
6691 return FALSE;
6692 t->graph_id = ATOM_ID(at);
6693 _PL_get_arg(2, src, a);
6694 if ( !PL_get_long_ex(a, &line) )
6695 return FALSE;
6696 t->line = line;
6697
6698 return TRUE;
6699 }
6700
6701 return PL_type_error("rdf_graph", src);
6702 }
6703
6704
6705 static int
unify_graph(term_t src,triple * t)6706 unify_graph(term_t src, triple *t)
6707 { switch( PL_term_type(src) )
6708 { case PL_VARIABLE:
6709 { if ( t->line == NO_LINE )
6710 return PL_unify_atom(src, ID_ATOM(t->graph_id));
6711 else
6712 goto full_term;
6713 }
6714 case PL_ATOM:
6715 { atom_t a;
6716 return (PL_get_atom(src, &a) &&
6717 a == ID_ATOM(t->graph_id));
6718 }
6719 case PL_TERM:
6720 { if ( t->line == NO_LINE )
6721 { return PL_unify_term(src,
6722 PL_FUNCTOR, FUNCTOR_colon2,
6723 PL_ATOM, ID_ATOM(t->graph_id),
6724 PL_VARIABLE);
6725 } else
6726 { full_term:
6727 return PL_unify_term(src,
6728 PL_FUNCTOR, FUNCTOR_colon2,
6729 PL_ATOM, ID_ATOM(t->graph_id),
6730 PL_INT64, (int64_t)t->line); /* line is uint32_t */
6731 }
6732 }
6733 default:
6734 return PL_type_error("rdf_graph", src);
6735 }
6736 }
6737
6738
6739 static int
same_graph(triple * t1,triple * t2)6740 same_graph(triple *t1, triple *t2)
6741 { return t1->line == t2->line &&
6742 t1->graph_id == t2->graph_id;
6743 }
6744
6745
6746
6747 static int
put_literal_value(term_t v,literal * lit)6748 put_literal_value(term_t v, literal *lit)
6749 { switch(lit->objtype)
6750 { case OBJ_STRING:
6751 PL_put_atom(v, lit->value.string);
6752 break;
6753 case OBJ_INTEGER:
6754 PL_put_variable(v);
6755 return PL_unify_int64(v, lit->value.integer);
6756 case OBJ_DOUBLE:
6757 return PL_put_float(v, lit->value.real);
6758 case OBJ_TERM:
6759 return PL_recorded_external(lit->value.term.record, v);
6760 default:
6761 assert(0);
6762 return FALSE;
6763 }
6764
6765 return TRUE;
6766 }
6767
6768
6769 static int
unify_literal(term_t lit,literal * l)6770 unify_literal(term_t lit, literal *l)
6771 { term_t v = PL_new_term_ref();
6772
6773 if ( !put_literal_value(v, l) )
6774 return FALSE;
6775
6776 if ( l->qualifier )
6777 { functor_t qf;
6778
6779 assert(l->type_or_lang);
6780
6781 if ( l->qualifier == Q_LANG )
6782 qf = FUNCTOR_lang2;
6783 else
6784 qf = FUNCTOR_type2;
6785
6786 if ( PL_unify_term(lit, PL_FUNCTOR, qf,
6787 PL_ATOM, ID_ATOM(l->type_or_lang),
6788 PL_TERM, v) )
6789 return TRUE;
6790
6791 if ( PL_exception(0) )
6792 return FALSE;
6793
6794 return PL_unify(lit, v); /* allow rdf(X, Y, literal(foo)) */
6795 } else if ( PL_unify(lit, v) )
6796 { return TRUE;
6797 } else if ( PL_is_functor(lit, FUNCTOR_lang2) &&
6798 l->objtype == OBJ_STRING )
6799 { term_t a = PL_new_term_ref();
6800 _PL_get_arg(2, lit, a);
6801 return PL_unify(a, v);
6802 } else if ( PL_is_functor(lit, FUNCTOR_type2) )
6803 { term_t a = PL_new_term_ref();
6804 _PL_get_arg(2, lit, a);
6805 return PL_unify(a, v);
6806 } else
6807 return FALSE;
6808 }
6809
6810
6811
6812 static int
unify_object(term_t object,triple * t)6813 unify_object(term_t object, triple *t)
6814 { if ( t->object_is_literal )
6815 { term_t lit = PL_new_term_ref();
6816
6817 if ( PL_unify_functor(object, FUNCTOR_literal1) )
6818 _PL_get_arg(1, object, lit);
6819 else if ( PL_is_functor(object, FUNCTOR_literal2) )
6820 _PL_get_arg(2, object, lit);
6821 else
6822 return FALSE;
6823
6824 return unify_literal(lit, t->object.literal);
6825 } else
6826 { return PL_unify_atom(object, t->object.resource);
6827 }
6828 }
6829
6830
6831 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6832 TRUE: ok
6833 FALSE: failure
6834 ERROR: error
6835 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6836
6837 static int
unify_triple(term_t subject,term_t pred,term_t object,term_t src,triple * t,int inversed)6838 unify_triple(term_t subject, term_t pred, term_t object,
6839 term_t src, triple *t, int inversed)
6840 { predicate *p = t->predicate.r;
6841 fid_t fid = PL_open_foreign_frame();
6842 int rc;
6843
6844 if ( inversed )
6845 { term_t tmp = object;
6846 object = subject;
6847 subject = tmp;
6848
6849 rc = !pred || PL_unify_term(pred,
6850 PL_FUNCTOR, FUNCTOR_inverse_of1,
6851 PL_ATOM, p->name);
6852 } else
6853 { rc = !pred || PL_unify_atom(pred, p->name);
6854 }
6855
6856 if ( !rc ||
6857 !PL_unify_atom(subject, ID_ATOM(t->subject_id)) ||
6858 !unify_object(object, t) ||
6859 (src && !unify_graph(src, t)) )
6860 { if ( PL_exception(0) )
6861 { PL_close_foreign_frame(fid);
6862 return ERROR;
6863 }
6864
6865 PL_discard_foreign_frame(fid);
6866 return FALSE;
6867 } else
6868 { PL_close_foreign_frame(fid);
6869 return TRUE;
6870 }
6871 }
6872
6873
6874 /*******************************
6875 * DUPLICATE HANDLING *
6876 *******************************/
6877
6878 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6879 According to the RDF specs, duplicate triples have no meaning, but they
6880 slow down search and often produce duplicate results in search. Worse,
6881 some coding styles proposed in the OWL documents introduce huge amounts
6882 of duplicate triples. We cannot simply ignore a triple if it is a
6883 duplicate as a subsequent retract would delete the final triple. For
6884 example, after loading two files that contain the same triple and
6885 unloading one of these files the database would be left without triples.
6886
6887 mark_duplicate() searches the DB for a duplicate triple and sets the
6888 flag is_duplicate on both. This flag is used by rdf/3, where duplicate
6889 triples are stored into a temporary table to be filtered from the
6890 results by new_answer().
6891
6892 (*) We pick the write generation of the current query. This may still be
6893 set higher, but that that may only lead to triples being marked
6894 duplicates that are not. By use this conservatie approach, we can move
6895 mark_duplicate() into prelink_triple().
6896
6897 TBD: Duplicate marks may be removed by GC: walk over all triples that
6898 are marked as duplicates and try to find the duplicate.
6899 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6900
6901 static void
mark_duplicate(rdf_db * db,triple * t,query * q)6902 mark_duplicate(rdf_db *db, triple *t, query *q)
6903 { triple_walker tw;
6904 triple *d;
6905 const int indexed = BY_SPO;
6906 lifespan qls;
6907 lifespan *ls;
6908
6909 if ( q )
6910 { qls.born = queryWriteGen(q) + 1; /* (*) */
6911 qls.died = query_max_gen(q);
6912 ls = &qls;
6913 } else
6914 { ls = &t->lifespan;
6915 }
6916
6917 init_triple_walker(&tw, db, t, indexed);
6918 while((d=next_triple(&tw)) && d != t)
6919 { d = deref_triple(db, d);
6920 DEBUG(3, Sdprintf("Possible duplicate: ");
6921 print_triple(d, PRT_NL|PRT_ADR));
6922
6923 if ( !overlap_lifespan(&d->lifespan, ls) )
6924 continue;
6925
6926 if ( match_triples(db, d, t, q, MATCH_DUPLICATE) )
6927 { if ( !t->is_duplicate )
6928 { t->is_duplicate = TRUE;
6929 db->duplicates++;
6930 }
6931 if ( !d->is_duplicate )
6932 { d->is_duplicate = TRUE;
6933 db->duplicates++;
6934 }
6935 }
6936 }
6937 destroy_triple_walker(db, &tw);
6938 }
6939
6940
6941 static int
update_duplicates(rdf_db * db)6942 update_duplicates(rdf_db *db)
6943 { triple *t;
6944 int count = 0;
6945
6946 simpleMutexLock(&db->locks.duplicates);
6947 db->duplicates_up_to_date = FALSE;
6948 db->maintain_duplicates = FALSE;
6949
6950 if ( db->duplicates )
6951 { enter_scan(&db->defer_all);
6952 for(t=fetch_triple(db, db->by_none.head);
6953 t;
6954 t=triple_follow_hash(db, t, ICOL(BY_NONE)))
6955 { if ( ++count % 10240 == 0 &&
6956 (PL_handle_signals() < 0 || db->resetting) )
6957
6958 { exit_scan(&db->defer_all);
6959 simpleMutexUnlock(&db->locks.duplicates);
6960 return FALSE; /* aborted */
6961 }
6962 t->is_duplicate = FALSE;
6963 }
6964 exit_scan(&db->defer_all);
6965
6966 db->duplicates = 0;
6967 }
6968
6969 db->maintain_duplicates = TRUE;
6970
6971 enter_scan(&db->defer_all);
6972 for(t=fetch_triple(db, db->by_none.head);
6973 t;
6974 t=triple_follow_hash(db, t, ICOL(BY_NONE)))
6975 { if ( ++count % 1024 == 0 &&
6976 PL_handle_signals() < 0 )
6977 { exit_scan(&db->defer_all);
6978 db->maintain_duplicates = FALSE; /* no point anymore */
6979 simpleMutexUnlock(&db->locks.duplicates);
6980 return FALSE;
6981 }
6982 mark_duplicate(db, t, NULL);
6983 }
6984 exit_scan(&db->defer_all);
6985
6986 db->duplicates_up_to_date = TRUE;
6987 simpleMutexUnlock(&db->locks.duplicates);
6988
6989 return TRUE;
6990 }
6991
6992
6993 static void
start_duplicate_admin(rdf_db * db)6994 start_duplicate_admin(rdf_db *db)
6995 { db->maintain_duplicates = TRUE;
6996
6997 PL_call_predicate(NULL, PL_Q_NORMAL,
6998 PL_predicate("rdf_update_duplicates_thread", 0, "rdf_db"), 0);
6999 }
7000
7001
7002
7003 /*******************************
7004 * TRANSACTIONS *
7005 *******************************/
7006
7007 static int
put_begin_end(term_t t,functor_t be,int level)7008 put_begin_end(term_t t, functor_t be, int level)
7009 { term_t av;
7010
7011 return ( (av = PL_new_term_ref()) &&
7012 PL_put_integer(av, level) &&
7013 PL_cons_functor_v(t, be, av) );
7014 }
7015
7016
7017 /** rdf_transaction(:Goal, +Id, +Options)
7018
7019 Options:
7020
7021 * generation(+Generation)
7022 Determines query generation
7023 */
7024
7025 static int
transaction_depth(const query * q)7026 transaction_depth(const query *q)
7027 { int depth = 0;
7028
7029 for(q=q->transaction; q; q=q->transaction)
7030 depth++;
7031
7032 return depth;
7033 }
7034
7035
7036 static foreign_t
rdf_transaction(term_t goal,term_t id,term_t options)7037 rdf_transaction(term_t goal, term_t id, term_t options)
7038 { int rc;
7039 rdf_db *db = rdf_current_db();
7040 query *q;
7041 triple_buffer added;
7042 triple_buffer deleted;
7043 triple_buffer updated;
7044 snapshot *ss = NULL;
7045
7046 if ( !PL_get_nil(options) )
7047 { term_t tail = PL_copy_term_ref(options);
7048 term_t head = PL_new_term_ref();
7049 term_t arg = PL_new_term_ref();
7050
7051 while( PL_get_list(tail, head, tail) )
7052 { size_t arity;
7053 atom_t name;
7054
7055 if ( !PL_get_name_arity(head, &name, &arity) || arity != 1 )
7056 return PL_type_error("option", head);
7057 _PL_get_arg(1, head, arg);
7058
7059 if ( name == ATOM_snapshot )
7060 { if ( get_snapshot(arg, &ss) )
7061 { int ss_tid = snapshot_thread(ss);
7062
7063 if ( ss_tid && ss_tid != PL_thread_self() )
7064 PL_permission_error("access", "rdf-snapshot", arg);
7065 } else
7066 { atom_t a;
7067
7068 if ( PL_get_atom(arg, &a) && a == ATOM_true )
7069 ss = SNAPSHOT_ANONYMOUS;
7070 else
7071 return PL_type_error("rdf_snapshot", arg);
7072 }
7073 }
7074 }
7075 if ( !PL_get_nil_ex(tail) )
7076 return FALSE;
7077 }
7078
7079 if ( !(q = open_transaction(db, &added, &deleted, &updated, ss)) )
7080 return FALSE;
7081 q->transaction_data.prolog_id = id;
7082 rc = PL_call_predicate(NULL, PL_Q_PASS_EXCEPTION, PRED_call1, goal);
7083
7084 if ( rc )
7085 { if ( !empty_transaction(q) )
7086 { if ( ss )
7087 { discard_transaction(q);
7088 } else
7089 { term_t be;
7090 int depth = transaction_depth(q);
7091
7092 if ( !(be=PL_new_term_ref()) ||
7093 !put_begin_end(be, FUNCTOR_begin1, depth) ||
7094 !rdf_broadcast(EV_TRANSACTION, (void*)id, (void*)be) ||
7095 !put_begin_end(be, FUNCTOR_end1, depth) )
7096 return FALSE;
7097
7098 commit_transaction(q);
7099
7100 if ( !rdf_broadcast(EV_TRANSACTION, (void*)id, (void*)be) )
7101 return FALSE;
7102 }
7103 } else
7104 { close_transaction(q);
7105 }
7106 } else
7107 { discard_transaction(q);
7108 }
7109
7110 return rc;
7111 }
7112
7113 /*******************************
7114 * PREDICATES *
7115 *******************************/
7116
7117 /** rdf_active_transactions_(-List)
7118
7119 Provides list of parent transactions in the calling thread
7120 */
7121
7122 static foreign_t
rdf_active_transactions(term_t list)7123 rdf_active_transactions(term_t list)
7124 { rdf_db *db = rdf_current_db();
7125 query *q = open_query(db);
7126 term_t tail = PL_copy_term_ref(list);
7127 term_t head = PL_new_term_ref();
7128 query *t;
7129
7130 if ( !q ) return FALSE;
7131 for(t = q->transaction; t; t=t->transaction)
7132 { if ( !PL_unify_list(tail, head, tail) ||
7133 !PL_unify(head, t->transaction_data.prolog_id) )
7134 { close_query(q);
7135 return FALSE;
7136 }
7137 }
7138
7139 close_query(q);
7140
7141 return PL_unify_nil(tail);
7142 }
7143
7144 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7145 (*) rdf_assert(S,P,O,G) adds a triple, but does not do so if exactly the
7146 same quintuple is visible and not yet erased. Adding would not make
7147 sense as this would be a complete duplicate that cannot be distinguished
7148 from the original and rdf_retractall/4 will erase both.
7149
7150 Note that full duplicates are quite common as a result of forward
7151 reasoning.
7152 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
7153
7154 static foreign_t
rdf_assert4(term_t subject,term_t predicate,term_t object,term_t src)7155 rdf_assert4(term_t subject, term_t predicate, term_t object, term_t src)
7156 { rdf_db *db = rdf_current_db();
7157 query *q = open_query(db);
7158 triple *t, *d;
7159 triple_walker tw;
7160
7161 if ( !q ) return FALSE;
7162 t = new_triple(db);
7163 if ( !get_triple(db, subject, predicate, object, t, q) )
7164 { error:
7165 free_triple(db, t, FALSE);
7166 close_query(q);
7167 return FALSE;
7168 }
7169 if ( src )
7170 { if ( !get_graph(src, t) )
7171 goto error;
7172 } else
7173 { t->graph_id = ATOM_ID(ATOM_user);
7174 t->line = NO_LINE;
7175 }
7176
7177 init_triple_walker(&tw, db, t, BY_SPO);
7178 while((d=next_triple(&tw)))
7179 { if ( (d=alive_triple(q, d)) && !d->erased ) /* (*) */
7180 { if ( match_triples(db, d, t, q, MATCH_DUPLICATE|MATCH_SRC) &&
7181 d->line == t->line )
7182 { destroy_triple_walker(db, &tw);
7183 free_triple(db, t, FALSE);
7184 close_query(q);
7185
7186 return TRUE;
7187 }
7188 }
7189 }
7190 destroy_triple_walker(db, &tw);
7191
7192 lock_atoms(db, t);
7193
7194 add_triples(q, &t, 1);
7195 close_query(q);
7196
7197 return TRUE;
7198 }
7199
7200
7201 static foreign_t
rdf_assert3(term_t subject,term_t predicate,term_t object)7202 rdf_assert3(term_t subject, term_t predicate, term_t object)
7203 { return rdf_assert4(subject, predicate, object, 0);
7204 }
7205
7206
7207 static void free_search_state(search_state *state);
7208
7209 static int
init_cursor_from_literal(search_state * state,literal * cursor)7210 init_cursor_from_literal(search_state *state, literal *cursor)
7211 { triple *p = &state->pattern;
7212 size_t iv;
7213
7214 DEBUG(3,
7215 Sdprintf("Trying literal search for ");
7216 print_literal(cursor);
7217 Sdprintf("\n"));
7218
7219 p->indexed |= BY_O;
7220 p->indexed &= ~BY_G; /* No graph indexing supported */
7221 if ( p->indexed == BY_SO )
7222 { p->indexed = BY_S; /* we do not have index BY_SO */
7223 init_triple_walker(&state->cursor, state->db, p, p->indexed);
7224 return FALSE;
7225 }
7226
7227 iv = literal_hash(cursor); /* see also triple_hash_key() */
7228 if ( p->indexed&BY_S ) iv ^= subject_hash(p);
7229 if ( p->indexed&BY_P ) iv ^= predicate_hash(p->predicate.r);
7230
7231 init_triple_literal_walker(&state->cursor, state->db, p, p->indexed, iv);
7232 state->has_literal_state = TRUE;
7233 state->literal_cursor = cursor;
7234
7235 return TRUE;
7236 }
7237
7238
7239 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7240 init_search_state(search_state *state, query *q)
7241 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
7242
7243 static int
init_search_state(search_state * state,query * query)7244 init_search_state(search_state *state, query *query)
7245 { triple *p = &state->pattern;
7246
7247 if ( get_partial_triple(state->db,
7248 state->subject, state->predicate, state->object,
7249 state->src, p) != TRUE )
7250 { free_search_state(state);
7251 return FALSE;
7252 }
7253
7254 if ( p->object_is_literal && !is_numerical_string(p->object.literal) )
7255 state->flags &= ~MATCH_NUMERIC;
7256
7257 if ( (p->match == STR_MATCH_PREFIX || p->match == STR_MATCH_LIKE) &&
7258 p->indexed != BY_SP &&
7259 (state->prefix = first_atom(p->object.literal->value.string, p->match)))
7260 { literal lit;
7261 literal **rlitp;
7262
7263 lit = *p->object.literal;
7264 lit.value.string = state->prefix;
7265 state->lit_ex.literal = &lit;
7266 prepare_literal_ex(&state->lit_ex);
7267 rlitp = skiplist_find_first(&state->db->literals,
7268 &state->lit_ex, &state->literal_state);
7269 if ( rlitp )
7270 { if ( init_cursor_from_literal(state, *rlitp) )
7271 { state->restart_lit = *rlitp;
7272 state->restart_lit_state = state->literal_state;
7273 }
7274 } else
7275 { free_search_state(state);
7276 return FALSE;
7277 }
7278 } else if ( p->indexed != BY_SP && p->match >= STR_MATCH_LT )
7279 { literal **rlitp;
7280
7281 state->lit_ex.literal = p->object.literal;
7282 prepare_literal_ex(&state->lit_ex);
7283
7284 switch(p->match)
7285 { case STR_MATCH_LT:
7286 case STR_MATCH_LE:
7287 rlitp = skiplist_find_first(&state->db->literals,
7288 NULL, &state->literal_state);
7289 break;
7290 case STR_MATCH_GT:
7291 rlitp = skiplist_find_first(&state->db->literals,
7292 &state->lit_ex, &state->literal_state);
7293 break;
7294 case STR_MATCH_GE:
7295 case STR_MATCH_EQ:
7296 if ( (state->flags&MATCH_NUMERIC) ) /* xsd:double is lowest type */
7297 p->object.literal->type_or_lang = ATOM_ID(ATOM_xsdDouble);
7298 rlitp = skiplist_find_first(&state->db->literals,
7299 &state->lit_ex, &state->literal_state);
7300 break;
7301 case STR_MATCH_BETWEEN:
7302 if ( (state->flags&MATCH_NUMERIC) )
7303 p->object.literal->type_or_lang = ATOM_ID(ATOM_xsdDouble);
7304 rlitp = skiplist_find_first(&state->db->literals,
7305 &state->lit_ex, &state->literal_state);
7306 state->lit_ex.literal = &p->tp.end;
7307 prepare_literal_ex(&state->lit_ex);
7308 break;
7309 default:
7310 assert(0);
7311 return FALSE;
7312 }
7313
7314 if ( rlitp )
7315 { if ( init_cursor_from_literal(state, *rlitp) )
7316 { state->restart_lit = *rlitp;
7317 state->restart_lit_state = state->literal_state;
7318 }
7319 } else
7320 { free_search_state(state);
7321 return FALSE;
7322 }
7323 } else
7324 { init_triple_walker(&state->cursor, state->db, p, p->indexed);
7325 }
7326
7327 return TRUE;
7328 }
7329
7330
7331 static void
free_search_state(search_state * state)7332 free_search_state(search_state *state)
7333 { if ( state->query )
7334 close_query(state->query);
7335
7336 free_triple(state->db, &state->pattern, FALSE);
7337 destroy_triple_walker(state->db, &state->cursor);
7338 if ( !state->db->maintain_duplicates &&
7339 state->dup_answers.count > state->db->duplicate_admin_threshold )
7340 start_duplicate_admin(state->db);
7341 destroy_tripleset(&state->dup_answers);
7342
7343 if ( state->prefix )
7344 PL_unregister_atom(state->prefix);
7345 }
7346
7347
7348 static foreign_t
allow_retry_state(search_state * state)7349 allow_retry_state(search_state *state)
7350 { PL_retry_address(state);
7351 }
7352
7353
7354 static int
new_answer(search_state * state,triple * t)7355 new_answer(search_state *state, triple *t)
7356 { if ( !t->is_duplicate && state->db->duplicates_up_to_date )
7357 return TRUE;
7358
7359 return add_tripleset(state, &state->dup_answers, t);
7360 }
7361
7362
7363 static triple *
is_candidate(search_state * state,triple * t)7364 is_candidate(search_state *state, triple *t)
7365 { if ( !(t=alive_triple(state->query, t)) )
7366 return NULL;
7367 /* hash-collision, skip */
7368 if ( state->has_literal_state )
7369 { if ( !(t->object_is_literal &&
7370 t->object.literal == state->literal_cursor) )
7371 return NULL;
7372 }
7373
7374 if ( !match_triples(state->db, t, &state->pattern, state->query, state->flags) )
7375 return NULL;
7376
7377 if ( !state->src ) /* with source, we report */
7378 { if ( !new_answer(state, t) ) /* duplicates */
7379 return NULL;
7380 }
7381
7382 return t;
7383 }
7384
7385
7386 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7387 next_sub_property() advances the triple-walker to walk over an alternate
7388 hash of the cloud.
7389
7390 - If the cloud doesn't have ->alt_hashes, all related predicates have
7391 the same hash, and we are done.
7392 - If the cloud does have ->alt_hashes, we must walk the
7393 alt-hashes. We do not need to walk hashes that do not use
7394 sub-properties of the target. This is implemented using
7395 hash_holds_candidates().
7396
7397 TBD: How expensive is hash_holds_candidates(). Maybe we should only try
7398 that if there are many candidates in the hash-chains? Alternatively, we
7399 can keep a list of predicates that uses a particular alt-hash, so we do
7400 not have to scan the whole cloud each time.
7401 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
7402
7403 static int
hash_holds_candidates(rdf_db * db,unsigned int hash,predicate * p,predicate_cloud * pc,query * q)7404 hash_holds_candidates(rdf_db *db, unsigned int hash,
7405 predicate *p, predicate_cloud *pc,
7406 query *q)
7407 { predicate **pp = pc->members;
7408 predicate **end = &pp[pc->size];
7409
7410 for(; pp<end; pp++)
7411 { predicate *p2 = *pp;
7412
7413 if ( p2->hash == hash && isSubPropertyOf(db, p2, p, q) )
7414 { DEBUG(1, Sdprintf("\thash 0x%x: <%s rdfs:subPropertyOf %s>\n",
7415 hash, pname(p2), pname(p)));
7416 return TRUE;
7417 }
7418 }
7419
7420 return FALSE;
7421 }
7422
7423
7424 static int
next_sub_property(search_state * state)7425 next_sub_property(search_state *state)
7426 { if ( (state->flags & MATCH_SUBPROPERTY) )
7427 { triple *p = &state->pattern;
7428 triple_walker *tw = &state->cursor;
7429 predicate_cloud *pc;
7430
7431 if ( !(pc=state->p_cloud) )
7432 { if ( !p->predicate.r ) /* no pred on rdf_has(?,-,?) */
7433 return FALSE;
7434
7435 if ( is_leaf_predicate(state->db, p->predicate.r, state->query) )
7436 return FALSE;
7437
7438 if ( p->predicate.r->cloud->alt_hash_count )
7439 { pc = state->p_cloud = p->predicate.r->cloud;
7440
7441 DEBUG(1, Sdprintf("%d alt hashes; first was 0x%x\n",
7442 p->predicate.r->cloud->alt_hash_count,
7443 predicate_hash(p->predicate.r)));
7444 tw->unbounded_hash ^= predicate_hash(p->predicate.r);
7445 state->alt_hash_cursor = 0;
7446 } else
7447 return FALSE; /* Cloud has only one hash */
7448 } else
7449 { tw->unbounded_hash ^= pc->alt_hashes[state->alt_hash_cursor];
7450 state->alt_hash_cursor++;
7451 }
7452
7453 for( ; state->alt_hash_cursor < pc->alt_hash_count; state->alt_hash_cursor++)
7454 { unsigned new_hash = pc->alt_hashes[state->alt_hash_cursor];
7455
7456 if ( new_hash != predicate_hash(p->predicate.r) &&
7457 hash_holds_candidates(state->db, new_hash,
7458 p->predicate.r, pc, state->query) )
7459 { DEBUG(1, Sdprintf("Retrying with alt-hash %d (0x%x)\n",
7460 state->alt_hash_cursor, new_hash));
7461 tw->unbounded_hash ^= new_hash;
7462 rewind_triple_walker(tw);
7463
7464 return TRUE;
7465 }
7466 }
7467 }
7468
7469 return FALSE;
7470 }
7471
7472
7473 /* next_pattern() advances the pattern for the next query. This is done
7474 for matches that deal with matching inverse properties and matches
7475 that deal with literal ranges (prefix, between, etc.)
7476
7477 Note that inverse and literal enumeration are mutually exclusive (as
7478 long as we do not have literal subjects ...).
7479
7480 If we enumerate (sub)properties, we must enumerate the carthesian
7481 product of the sub properties and the inverse/literal search.
7482 */
7483
7484 static int
next_pattern(search_state * state)7485 next_pattern(search_state *state)
7486 { triple_walker *tw = &state->cursor;
7487 triple *p = &state->pattern;
7488
7489 if ( state->has_literal_state )
7490 { literal **litp;
7491
7492 if ( (litp = skiplist_find_next(&state->literal_state)) )
7493 { literal *lit = *litp;
7494
7495 DEBUG(2, Sdprintf("next: ");
7496 print_literal(lit);
7497 Sdprintf("\n"));
7498
7499 switch(state->pattern.match)
7500 { case STR_MATCH_PREFIX:
7501 { if ( !match_atoms(STR_MATCH_PREFIX, state->prefix, lit->value.string) )
7502 { DEBUG(1,
7503 Sdprintf("PREFIX: terminated literal iteration from ");
7504 print_literal(lit);
7505 Sdprintf("\n"));
7506 return FALSE; /* no longer a prefix */
7507 }
7508
7509 break;
7510 }
7511 case STR_MATCH_LT:
7512 if ( compare_literals(&state->lit_ex, lit) <= 0 )
7513 return FALSE;
7514 case STR_MATCH_EQ:
7515 case STR_MATCH_LE:
7516 case STR_MATCH_BETWEEN:
7517 { if ( (state->flags&MATCH_NUMERIC) )
7518 { xsd_primary nt;
7519
7520 if ( (nt=is_numerical_string(lit)) )
7521 { xsd_primary np = is_numerical_string(state->lit_ex.literal);
7522
7523 if ( cmp_xsd_info(np, &state->lit_ex.atom, nt, lit->value.string) < 0 )
7524 return FALSE; /* no longer smaller/equal */
7525
7526 break;
7527 }
7528 return FALSE;
7529 } else
7530 { if ( compare_literals(&state->lit_ex, lit) < 0 )
7531 { DEBUG(1,
7532 Sdprintf("LE/BETWEEN(");
7533 print_literal(state->lit_ex.literal);
7534 Sdprintf("): terminated literal iteration from ");
7535 print_literal(lit);
7536 Sdprintf("\n"));
7537 return FALSE; /* no longer smaller/equal */
7538 }
7539 }
7540
7541 break;
7542 }
7543 }
7544
7545 init_cursor_from_literal(state, lit);
7546 return TRUE;
7547 }
7548 }
7549
7550 if ( next_sub_property(state) ) /* redo search with alternative hash */
7551 { if ( state->restart_lit )
7552 { state->literal_state = state->restart_lit_state;
7553 init_cursor_from_literal(state, state->restart_lit);
7554 }
7555
7556 return TRUE;
7557 }
7558
7559 if ( (state->flags&MATCH_INVERSE) &&
7560 inverse_partial_triple(p) )
7561 { DEBUG(1, Sdprintf("Retrying inverse: "); print_triple(p, PRT_NL));
7562 state->p_cloud = NULL;
7563 init_triple_walker(tw, state->db, p, p->indexed);
7564
7565 return TRUE;
7566 }
7567
7568 return FALSE;
7569 }
7570
7571
7572 static int
next_search_state(search_state * state)7573 next_search_state(search_state *state)
7574 { triple *t, *t2;
7575 triple_walker *tw = &state->cursor;
7576 triple *p = &state->pattern;
7577 term_t retpred;
7578
7579 if ( (state->flags & MATCH_SUBPROPERTY) )
7580 { retpred = state->realpred;
7581 if ( retpred )
7582 { if ( !p->predicate.r ) /* state->predicate is unbound */
7583 { if ( !PL_unify(state->predicate, retpred) )
7584 return FALSE;
7585 }
7586 } else
7587 { if ( !p->predicate.r )
7588 retpred = state->predicate;
7589 }
7590 } else
7591 { retpred = p->predicate.r ? 0 : state->predicate;
7592 }
7593
7594 if ( (t2=state->prefetched) )
7595 { state->prefetched = NULL; /* retrying; to need to check */
7596 goto retry;
7597 }
7598
7599 do
7600 { while( (t = next_triple(tw)) )
7601 { DEBUG(3, Sdprintf("Search: ");
7602 print_triple(t, PRT_SRC|PRT_GEN|PRT_NL|PRT_ADR));
7603
7604 if ( (t2=is_candidate(state, t)) )
7605 { int rc;
7606
7607 retry:
7608 if ( (rc=unify_triple(state->subject, retpred, state->object,
7609 state->src, t2, p->inversed)) == FALSE )
7610 continue;
7611 if ( rc == ERROR )
7612 return FALSE; /* makes rdf/3 return FALSE */
7613
7614 do
7615 { while( (t = next_triple(tw)) )
7616 { DEBUG(3, Sdprintf("Search (prefetch): ");
7617 print_triple(t, PRT_SRC|PRT_GEN|PRT_NL|PRT_ADR));
7618
7619 if ( (t2=is_candidate(state, t)) )
7620 { state->prefetched = t2;
7621
7622 return TRUE; /* non-deterministic */
7623 }
7624 }
7625 } while(next_pattern(state));
7626
7627 return TRUE; /* deterministic */
7628 }
7629 }
7630 } while(next_pattern(state));
7631
7632 return FALSE;
7633 }
7634
7635
7636
7637 static foreign_t
rdf(term_t subject,term_t predicate,term_t object,term_t src,term_t realpred,control_t h,unsigned flags)7638 rdf(term_t subject, term_t predicate, term_t object,
7639 term_t src, term_t realpred, control_t h, unsigned flags)
7640 { rdf_db *db = rdf_current_db();
7641 search_state *state;
7642
7643 switch(PL_foreign_control(h))
7644 { case PL_FIRST_CALL:
7645 { query *q = open_query(db);
7646
7647 if ( !q ) return FALSE;
7648
7649 state = &q->state.search;
7650 state->query = q;
7651 state->db = db;
7652 state->subject = subject;
7653 state->object = object;
7654 state->predicate = predicate;
7655 state->src = src;
7656 state->realpred = realpred;
7657 state->flags = flags;
7658 /* clear the rest */
7659 memset(&state->cursor, 0,
7660 (char*)&state->lit_ex - (char*)&state->cursor);
7661 state->dup_answers.entries = NULL; /* see add_tripleset() */
7662
7663 if ( !init_search_state(state, q) )
7664 return FALSE;
7665
7666 goto search;
7667 }
7668 case PL_REDO:
7669 { int rc;
7670
7671 state = PL_foreign_context_address(h);
7672 assert(state->subject == subject);
7673
7674 search:
7675 if ( (rc=next_search_state(state)) )
7676 { if ( state->prefetched )
7677 return allow_retry_state(state);
7678 }
7679
7680 free_search_state(state);
7681 return rc;
7682 }
7683 case PL_PRUNED:
7684 { state = PL_foreign_context_address(h);
7685
7686 free_search_state(state);
7687 return TRUE;
7688 }
7689 default:
7690 assert(0);
7691 return FALSE;
7692 }
7693 }
7694
7695
7696 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7697 rdf(Subject, Predicate, Object)
7698
7699 Search specifications:
7700
7701 Predicate:
7702
7703 subPropertyOf(X) = P
7704
7705 Object:
7706
7707 literal(substring(X), L)
7708 literal(word(X), L)
7709 literal(exact(X), L)
7710 literal(icase(X), L)
7711 literal(prefix(X), L)
7712 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
7713
7714
7715 static foreign_t
rdf3(term_t subject,term_t predicate,term_t object,control_t h)7716 rdf3(term_t subject, term_t predicate, term_t object, control_t h)
7717 { return rdf(subject, predicate, object, 0, 0, h,
7718 MATCH_EXACT|MATCH_NUMERIC);
7719 }
7720
7721 static foreign_t
rdf4(term_t subject,term_t predicate,term_t object,term_t src,control_t h)7722 rdf4(term_t subject, term_t predicate, term_t object,
7723 term_t src, control_t h)
7724 { return rdf(subject, predicate, object, src, 0, h,
7725 MATCH_EXACT|MATCH_NUMERIC|MATCH_SRC);
7726 }
7727
7728
7729 static foreign_t
rdf_has3(term_t subject,term_t predicate,term_t object,control_t h)7730 rdf_has3(term_t subject, term_t predicate, term_t object, control_t h)
7731 { return rdf(subject, predicate, object, 0, 0, h,
7732 MATCH_EXACT|MATCH_NUMERIC|MATCH_SUBPROPERTY|MATCH_INVERSE);
7733 }
7734
7735
7736 static foreign_t
rdf_has4(term_t subject,term_t predicate,term_t object,term_t realpred,control_t h)7737 rdf_has4(term_t subject, term_t predicate, term_t object,
7738 term_t realpred, control_t h)
7739 { return rdf(subject, predicate, object, 0, realpred, h,
7740 MATCH_EXACT|MATCH_NUMERIC|MATCH_SUBPROPERTY|MATCH_INVERSE);
7741 }
7742
7743
7744 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7745 rdf_estimate_complexity(+S,+P,+O,-C)
7746
7747 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
7748
7749 static foreign_t
rdf_estimate_complexity(term_t subject,term_t predicate,term_t object,term_t complexity)7750 rdf_estimate_complexity(term_t subject, term_t predicate, term_t object,
7751 term_t complexity)
7752 { triple t;
7753 size_t c;
7754 rdf_db *db = rdf_current_db();
7755 int rc;
7756
7757 memset(&t, 0, sizeof(t));
7758 if ( (rc=get_partial_triple(db, subject, predicate, object, 0, &t)) != TRUE )
7759 { if ( rc == -1 )
7760 { return FALSE; /* error */
7761 } else
7762 { return PL_unify_integer(complexity, 0); /* no predicate */
7763 }
7764 }
7765
7766 if ( t.indexed == BY_NONE )
7767 { c = db->created - db->erased; /* = totale triple count */
7768 #if 0
7769 } else if ( t.indexed == BY_P )
7770 { c = t.predicate.r->triple_count; /* must sum over children */
7771 #endif
7772 } else
7773 { size_t key = triple_hash_key(&t, t.indexed);
7774 int icol = ICOL(t.indexed);
7775 triple_hash *hash = &db->hash[icol];
7776 size_t count;
7777
7778 if ( !db->hash[icol].created )
7779 create_triple_hashes(db, 1, &icol);
7780
7781 c = 0;
7782 for(count=hash->bucket_count_epoch; count <= hash->bucket_count; count *= 2)
7783 { int entry = key%count;
7784 triple_bucket *bucket = &hash->blocks[MSB(entry)][entry];
7785
7786 c += bucket->count; /* TBD: compensate for resize */
7787 }
7788 }
7789
7790 rc = PL_unify_int64(complexity, c);
7791 free_triple(db, &t, FALSE);
7792
7793 return rc;
7794 }
7795
7796
7797 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7798 current_literal(?Literals)
7799 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
7800
7801 typedef struct cl_state
7802 { skiplist_enum sl_state;
7803 int indexed;
7804 literal lit;
7805 literal_ex lit_ex;
7806 } cl_state;
7807
7808 static int
indexedLiteral(const literal * lit)7809 indexedLiteral(const literal *lit)
7810 { if ( lit->objtype == OBJ_STRING )
7811 return lit->value.string != 0;
7812 return lit->objtype != OBJ_UNTYPED;
7813 }
7814
7815
7816 static foreign_t
rdf_current_literal(term_t t,control_t h)7817 rdf_current_literal(term_t t, control_t h)
7818 { rdf_db *db = rdf_current_db();
7819 literal **data;
7820 cl_state *state;
7821 int rc;
7822
7823 switch(PL_foreign_control(h))
7824 { case PL_FIRST_CALL:
7825 state = rdf_malloc(db, sizeof(*state));
7826 memset(state, 0, sizeof(*state));
7827
7828 if ( PL_is_variable(t) )
7829 { data = skiplist_find_first(&db->literals, NULL, &state->sl_state);
7830 goto next;
7831 } else
7832 { if ( !get_literal(db, t, &state->lit, LIT_PARTIAL) )
7833 { rdf_free(db, state, sizeof(*state));
7834 return FALSE;
7835 }
7836 if ( indexedLiteral(&state->lit) )
7837 { state->lit_ex.literal = &state->lit;
7838 prepare_literal_ex(&state->lit_ex);
7839 data = skiplist_find_first(&db->literals,
7840 &state->lit_ex, &state->sl_state);
7841 state->indexed = TRUE;
7842 } else
7843 { data = skiplist_find_first(&db->literals, NULL, &state->sl_state);
7844 }
7845 goto next;
7846 }
7847 case PL_REDO:
7848 state = PL_foreign_context_address(h);
7849 data = skiplist_find_next(&state->sl_state);
7850 next:
7851 { fid_t fid = PL_open_foreign_frame();
7852
7853 for(; data; data=skiplist_find_next(&state->sl_state))
7854 { literal *lit = *data;
7855
7856 if ( unify_literal(t, lit) )
7857 { PL_close_foreign_frame(fid);
7858 PL_retry_address(state);
7859 } else if ( PL_exception(0) )
7860 { break;
7861 } else if ( state->indexed &&
7862 compare_literals(&state->lit_ex, lit) > 0 )
7863 { break;
7864 } else
7865 { PL_rewind_foreign_frame(fid);
7866 }
7867 }
7868 PL_close_foreign_frame(fid);
7869 rc = FALSE;
7870 goto cleanup;
7871 }
7872 case PL_PRUNED:
7873 state = PL_foreign_context_address(h);
7874 rc = TRUE;
7875
7876 cleanup:
7877 free_literal(db, &state->lit);
7878 rdf_free(db, state, sizeof(*state));
7879
7880 return rc;
7881 default:
7882 assert(0);
7883 return FALSE;
7884 }
7885 }
7886
7887
7888 static int
update_triple(rdf_db * db,term_t action,triple * t,triple ** updated,query * q)7889 update_triple(rdf_db *db, term_t action, triple *t, triple **updated, query *q)
7890 { term_t a = PL_new_term_ref();
7891 triple tmp, *new;
7892 /* Create copy in local memory */
7893 tmp = *t;
7894
7895 if ( !PL_get_arg(1, action, a) )
7896 return PL_type_error("rdf_action", action);
7897
7898 if ( PL_is_functor(action, FUNCTOR_subject1) )
7899 { atom_t s;
7900
7901 if ( !PL_get_atom_ex(a, &s) )
7902 return FALSE;
7903 if ( tmp.subject_id == ATOM_ID(s) )
7904 return TRUE; /* no change */
7905
7906 tmp.subject_id = ATOM_ID(s);
7907 } else if ( PL_is_functor(action, FUNCTOR_predicate1) )
7908 { predicate *p;
7909
7910 if ( !get_predicate(db, a, &p, q) )
7911 return FALSE;
7912 if ( tmp.predicate.r == p )
7913 return TRUE; /* no change */
7914
7915 tmp.predicate.r = p;
7916 } else if ( PL_is_functor(action, FUNCTOR_object1) )
7917 { triple t2;
7918
7919 memset(&t2, 0, sizeof(t2));
7920
7921 if ( !get_object(db, a, &t2) )
7922 { free_triple(db, &t2, FALSE);
7923 return FALSE;
7924 }
7925 if ( match_object(&t2, &tmp, MATCH_QUAL) )
7926 { free_triple(db, &t2, FALSE);
7927 return TRUE;
7928 }
7929
7930 if ( (tmp.object_is_literal = t2.object_is_literal) )
7931 { tmp.object.literal = t2.object.literal;
7932 } else
7933 { tmp.object.resource = t2.object.resource;
7934 }
7935 } else if ( PL_is_functor(action, FUNCTOR_graph1) )
7936 { triple t2;
7937
7938 if ( !get_graph(a, &t2) )
7939 return FALSE;
7940 if ( t2.graph_id == t->graph_id && t2.line == t->line )
7941 { *updated = NULL;
7942 return TRUE;
7943 }
7944
7945 tmp.graph_id = t2.graph_id;
7946 tmp.line = t2.line;
7947 } else
7948 return PL_domain_error("rdf_action", action);
7949
7950 new = new_triple(db);
7951 new->subject_id = tmp.subject_id;
7952 new->predicate.r = tmp.predicate.r;
7953 if ( (new->object_is_literal = tmp.object_is_literal) )
7954 { if ( tmp.object.literal->shared )
7955 { simpleMutexLock(&db->locks.literal);
7956 new->object.literal = copy_literal(db, tmp.object.literal);
7957 simpleMutexUnlock(&db->locks.literal);
7958 } else
7959 { new->object.literal = tmp.object.literal;
7960 }
7961 } else
7962 { new->object.resource = tmp.object.resource;
7963 }
7964 new->graph_id = tmp.graph_id;
7965 new->line = tmp.line;
7966
7967 lock_atoms(db, new);
7968
7969 *updated = new;
7970
7971 return TRUE;
7972 }
7973
7974
7975 /** rdf_update(+Subject, +Predicate, +Object, +Action) is det.
7976
7977 Update a triple. Please note this is actually erase+assert
7978 */
7979
7980 static foreign_t
rdf_update5(term_t subject,term_t predicate,term_t object,term_t src,term_t action)7981 rdf_update5(term_t subject, term_t predicate, term_t object, term_t src,
7982 term_t action)
7983 { triple t, *p;
7984 int indexed = BY_SPO;
7985 rdf_db *db = rdf_current_db();
7986 int rc = TRUE;
7987 size_t count;
7988 triple_walker tw;
7989 triple_buffer matches;
7990 query *q = open_query(db);
7991
7992 if ( !q ) return FALSE;
7993 memset(&t, 0, sizeof(t));
7994
7995 if ( !get_src(src, &t) ||
7996 !get_triple(db, subject, predicate, object, &t, q) )
7997 { close_query(q);
7998 return FALSE;
7999 }
8000
8001 init_triple_buffer(&matches);
8002 init_triple_walker(&tw, db, &t, indexed);
8003 while((p=next_triple(&tw)))
8004 { if ( !(p=alive_triple(q, p)) )
8005 continue;
8006
8007 if ( match_triples(db, p, &t, q, MATCH_EXACT) )
8008 buffer_triple(&matches, p);
8009 }
8010
8011 if ( !is_empty_buffer(&matches) )
8012 { triple_buffer replacements;
8013 triple *new, **tp;
8014
8015 count = matches.top-matches.base;
8016 init_triple_buffer(&replacements);
8017 for(tp=matches.base; tp<matches.top; tp++)
8018 { new = NULL;
8019 if ( !update_triple(db, action, *tp, &new, q) )
8020 { rc = FALSE;
8021 free_triple_buffer(&replacements);
8022 goto out;
8023 }
8024
8025 buffer_triple(&replacements, new);
8026 }
8027
8028 update_triples(q, matches.base, replacements.base, count);
8029 free_triple_buffer(&replacements);
8030 } else
8031 { count = 0;
8032 }
8033
8034 out:
8035 close_query(q);
8036 free_triple_buffer(&matches);
8037 free_triple(db, &t, FALSE);
8038
8039 return (rc && count > 0) ? TRUE : FALSE;
8040 }
8041
8042
8043 static foreign_t
rdf_update(term_t subject,term_t predicate,term_t object,term_t action)8044 rdf_update(term_t subject, term_t predicate, term_t object, term_t action)
8045 { return rdf_update5(subject, predicate, object, 0, action);
8046 }
8047
8048
8049 static foreign_t
rdf_retractall4(term_t subject,term_t predicate,term_t object,term_t src)8050 rdf_retractall4(term_t subject, term_t predicate, term_t object, term_t src)
8051 { triple t, *p;
8052 rdf_db *db = rdf_current_db();
8053 triple_walker tw;
8054 triple_buffer buf;
8055 query *q;
8056
8057 memset(&t, 0, sizeof(t));
8058 switch( get_partial_triple(db, subject, predicate, object, src, &t) )
8059 { case 0: /* no such predicate */
8060 return TRUE;
8061 case -1: /* error */
8062 return FALSE;
8063 }
8064
8065 if ( t.graph_id ) /* speedup for rdf_retractall(_,_,_,DB) */
8066 { graph *gr = existing_graph(db, ID_ATOM(t.graph_id));
8067
8068 if ( !gr || gr->triple_count == 0 )
8069 return TRUE;
8070 }
8071
8072 if ( !(q = open_query(db)) )
8073 return FALSE;
8074 init_triple_buffer(&buf);
8075 init_triple_walker(&tw, db, &t, t.indexed);
8076 while((p=next_triple(&tw)))
8077 { if ( !(p=alive_triple(q, p)) )
8078 continue;
8079
8080 if ( match_triples(db, p, &t, q, MATCH_EXACT|MATCH_SRC) )
8081 { if ( t.object_is_literal && t.object.literal->objtype == OBJ_TERM )
8082 { fid_t fid = PL_open_foreign_frame();
8083 int rc = unify_object(object, p);
8084 PL_discard_foreign_frame(fid);
8085 if ( !rc )
8086 continue;
8087 }
8088
8089 buffer_triple(&buf, p);
8090 }
8091 }
8092 free_triple(db, &t, FALSE);
8093 del_triples(q, buf.base, buf.top-buf.base);
8094 close_query(q);
8095 free_triple_buffer(&buf);
8096
8097
8098 return TRUE;
8099 }
8100
8101
8102 static foreign_t
rdf_retractall3(term_t subject,term_t predicate,term_t object)8103 rdf_retractall3(term_t subject, term_t predicate, term_t object)
8104 { return rdf_retractall4(subject, predicate, object, 0);
8105 }
8106
8107
8108 /*******************************
8109 * MONITOR *
8110 *******************************/
8111
8112 typedef struct broadcast_callback
8113 { struct broadcast_callback *next;
8114 predicate_t pred;
8115 long mask;
8116 } broadcast_callback;
8117
8118 static long joined_mask = 0L;
8119 static broadcast_callback *callback_list;
8120 static broadcast_callback *callback_tail;
8121
8122 static int
do_broadcast(term_t term,long mask)8123 do_broadcast(term_t term, long mask)
8124 { if ( callback_list )
8125 { broadcast_callback *cb;
8126
8127 for(cb = callback_list; cb; cb = cb->next)
8128 { qid_t qid;
8129 term_t ex;
8130
8131 if ( !(cb->mask & mask) )
8132 continue;
8133
8134 if ( !(qid = PL_open_query(NULL, PL_Q_CATCH_EXCEPTION, cb->pred, term)) )
8135 return FALSE;
8136 if ( !PL_next_solution(qid) && (ex = PL_exception(qid)) )
8137 { term_t av;
8138
8139 PL_cut_query(qid);
8140
8141 if ( (av = PL_new_term_refs(2)) &&
8142 PL_put_atom(av+0, ATOM_error) &&
8143 PL_put_term(av+1, ex) )
8144 PL_call_predicate(NULL, PL_Q_NORMAL,
8145 PL_predicate("print_message", 2, "user"),
8146 av);
8147 return FALSE;
8148 } else
8149 { PL_close_query(qid);
8150 }
8151 }
8152 }
8153
8154 return TRUE;
8155 }
8156
8157
8158 int
rdf_is_broadcasting(broadcast_id id)8159 rdf_is_broadcasting(broadcast_id id)
8160 { return (joined_mask & id) != 0;
8161 }
8162
8163
8164 int
rdf_broadcast(broadcast_id id,void * a1,void * a2)8165 rdf_broadcast(broadcast_id id, void *a1, void *a2)
8166 { int rc = TRUE;
8167
8168 if ( (joined_mask & id) )
8169 { fid_t fid;
8170 term_t term;
8171 functor_t funct;
8172
8173 if ( !(fid = PL_open_foreign_frame()) ||
8174 !(term = PL_new_term_ref()) )
8175 return FALSE;
8176
8177 switch(id)
8178 { case EV_ASSERT:
8179 case EV_ASSERT_LOAD:
8180 funct = FUNCTOR_assert4;
8181 goto assert_retract;
8182 case EV_RETRACT:
8183 funct = FUNCTOR_retract4;
8184 assert_retract:
8185 { triple *t = a1;
8186 term_t tmp;
8187
8188 if ( !(tmp = PL_new_term_refs(4)) ||
8189 !PL_put_atom(tmp+0, ID_ATOM(t->subject_id)) ||
8190 !PL_put_atom(tmp+1, t->predicate.r->name) ||
8191 !unify_object(tmp+2, t) ||
8192 !unify_graph(tmp+3, t) ||
8193 !PL_cons_functor_v(term, funct, tmp) )
8194 return FALSE;
8195 break;
8196 }
8197 case EV_UPDATE:
8198 { triple *t = a1;
8199 triple *new = a2;
8200 term_t tmp, a;
8201 functor_t action;
8202 int rc;
8203
8204 if ( !(tmp = PL_new_term_refs(5)) ||
8205 !(a = PL_new_term_ref()) ||
8206 !PL_put_atom(tmp+0, ID_ATOM(t->subject_id)) ||
8207 !PL_put_atom(tmp+1, t->predicate.r->name) ||
8208 !unify_object(tmp+2, t) ||
8209 !unify_graph(tmp+3, t) )
8210 return FALSE;
8211
8212 if ( t->subject_id != new->subject_id )
8213 { action = FUNCTOR_subject1;
8214 rc = PL_put_atom(a, ID_ATOM(new->subject_id));
8215 } else if ( t->predicate.r != new->predicate.r )
8216 { action = FUNCTOR_predicate1;
8217 rc = PL_put_atom(a, new->predicate.r->name);
8218 } else if ( !match_object(t, new, MATCH_QUAL) )
8219 { action = FUNCTOR_object1;
8220 rc = unify_object(a, new);
8221 } else if ( !same_graph(t, new) )
8222 { action = FUNCTOR_graph1;
8223 rc = unify_graph(a, new);
8224 } else
8225 { return TRUE; /* no change */
8226 }
8227
8228 if ( !rc ||
8229 !PL_cons_functor_v(tmp+4, action, a) ||
8230 !PL_cons_functor_v(term, FUNCTOR_update5, tmp) )
8231 return FALSE;
8232 break;
8233 }
8234 case EV_NEW_LITERAL:
8235 { literal *lit = a1;
8236 term_t tmp;
8237
8238 if ( !(tmp = PL_new_term_refs(1)) ||
8239 !unify_literal(tmp, lit) ||
8240 !PL_cons_functor_v(term, FUNCTOR_new_literal1, tmp) )
8241 return FALSE;
8242 break;
8243 }
8244 case EV_OLD_LITERAL:
8245 { literal *lit = a1;
8246 term_t tmp;
8247
8248 if ( !(tmp = PL_new_term_refs(1)) ||
8249 !unify_literal(tmp, lit) ||
8250 !PL_cons_functor_v(term, FUNCTOR_old_literal1, tmp) )
8251 return FALSE;
8252 break;
8253 }
8254 case EV_LOAD:
8255 { term_t ctx = (term_t)a1;
8256 term_t be = (term_t)a2;
8257 term_t tmp;
8258
8259 if ( !(tmp = PL_new_term_refs(2)) ||
8260 !PL_put_term(tmp+0, be) || /* begin/end(graphs) */
8261 !PL_put_term(tmp+1, ctx) ||
8262 !PL_cons_functor_v(term, FUNCTOR_load2, tmp) )
8263 return FALSE;
8264 break;
8265 }
8266 case EV_TRANSACTION:
8267 { term_t ctx = (term_t)a1;
8268 term_t be = (term_t)a2;
8269 term_t tmp;
8270
8271 if ( !(tmp = PL_new_term_refs(2)) ||
8272 !PL_put_term(tmp+0, be) || /* begin/end */
8273 !PL_put_term(tmp+1, ctx) ||
8274 !PL_cons_functor_v(term, FUNCTOR_transaction2, tmp) )
8275 return FALSE;
8276 break;
8277 }
8278 case EV_RESET:
8279 { PL_put_atom(term, ATOM_reset);
8280 break;
8281 }
8282 case EV_CREATE_GRAPH:
8283 { graph *g = a1;
8284 term_t tmp;
8285
8286 if ( !(tmp = PL_new_term_refs(1)) ||
8287 !(PL_put_atom(tmp, g->name)) ||
8288 !PL_cons_functor_v(term, FUNCTOR_create_graph1, tmp) )
8289 return FALSE;
8290 break;
8291 }
8292 default:
8293 assert(0);
8294 }
8295
8296 rc = do_broadcast(term, id);
8297
8298 PL_discard_foreign_frame(fid);
8299 }
8300
8301 return rc;
8302 }
8303
8304
8305 static foreign_t
rdf_monitor(term_t goal,term_t mask)8306 rdf_monitor(term_t goal, term_t mask)
8307 { atom_t name;
8308 broadcast_callback *cb;
8309 predicate_t p;
8310 long msk;
8311 module_t m = NULL;
8312
8313 if ( !PL_strip_module(goal, &m, goal) ||
8314 !PL_get_atom_ex(goal, &name) ||
8315 !PL_get_long_ex(mask, &msk) )
8316 return FALSE;
8317
8318 p = PL_pred(PL_new_functor(name, 1), m);
8319
8320 for(cb=callback_list; cb; cb = cb->next)
8321 { if ( cb->pred == p )
8322 { broadcast_callback *cb2;
8323 cb->mask = msk;
8324
8325 joined_mask = 0L;
8326 for(cb2=callback_list; cb2; cb2 = cb2->next)
8327 joined_mask |= cb2->mask;
8328 DEBUG(2, Sdprintf("Set mask to 0x%x\n", joined_mask));
8329
8330 return TRUE;
8331 }
8332 }
8333
8334 cb = PL_malloc(sizeof(*cb));
8335 cb->next = NULL;
8336 cb->mask = msk;
8337 cb->pred = p;
8338 if ( callback_list )
8339 { callback_tail->next = cb;
8340 callback_tail = cb;
8341 } else
8342 { callback_list = callback_tail = cb;
8343 }
8344 joined_mask |= msk;
8345
8346 return TRUE;
8347 }
8348
8349
8350
8351 static foreign_t
rdf_set_predicate(term_t pred,term_t option)8352 rdf_set_predicate(term_t pred, term_t option)
8353 { predicate *p;
8354 rdf_db *db = rdf_current_db();
8355 query *q = open_query(db);
8356 int rc;
8357
8358 if ( !q ) return FALSE;
8359 if ( !get_predicate(db, pred, &p, q) )
8360 { rc = FALSE;
8361 goto out;
8362 }
8363
8364 if ( PL_is_functor(option, FUNCTOR_symmetric1) )
8365 { int val;
8366
8367 if ( !get_bool_arg_ex(1, option, &val) )
8368 { rc = FALSE;
8369 goto out;
8370 }
8371
8372 if ( val )
8373 p->inverse_of = p;
8374 else
8375 p->inverse_of = NULL;
8376
8377 rc = TRUE;
8378 } else if ( PL_is_functor(option, FUNCTOR_inverse_of1) )
8379 { term_t a = PL_new_term_ref();
8380 predicate *i;
8381
8382 _PL_get_arg(1, option, a);
8383 if ( PL_get_nil(a) )
8384 { if ( p->inverse_of )
8385 { p->inverse_of->inverse_of = NULL;
8386 p->inverse_of = NULL;
8387 }
8388 } else
8389 { if ( !get_predicate(db, a, &i, q) )
8390 { rc = FALSE;
8391 goto out;
8392 }
8393
8394 p->inverse_of = i;
8395 i->inverse_of = p;
8396 }
8397 rc = TRUE;
8398 } else if ( PL_is_functor(option, FUNCTOR_transitive1) )
8399 { int val;
8400
8401 if ( !get_bool_arg_ex(1, option, &val) )
8402 return FALSE;
8403
8404 p->transitive = val;
8405
8406 rc = TRUE;
8407 } else
8408 rc = PL_type_error("predicate_option", option);
8409
8410 out:
8411 close_query(q);
8412 return rc;
8413 }
8414
8415
8416 #define PRED_PROPERTY_COUNT 9
8417 static functor_t predicate_key[PRED_PROPERTY_COUNT];
8418
8419 static int
unify_predicate_property(rdf_db * db,predicate * p,term_t option,functor_t f,query * q)8420 unify_predicate_property(rdf_db *db, predicate *p, term_t option,
8421 functor_t f, query *q)
8422 { if ( f == FUNCTOR_symmetric1 )
8423 return PL_unify_term(option, PL_FUNCTOR, f,
8424 PL_BOOL, p->inverse_of == p ? TRUE : FALSE);
8425 else if ( f == FUNCTOR_inverse_of1 )
8426 { if ( p->inverse_of )
8427 return PL_unify_term(option, PL_FUNCTOR, f,
8428 PL_ATOM, p->inverse_of->name);
8429 else
8430 return FALSE;
8431 } else if ( f == FUNCTOR_transitive1 )
8432 { return PL_unify_term(option, PL_FUNCTOR, f,
8433 PL_BOOL, p->transitive);
8434 } else if ( f == FUNCTOR_triples1 )
8435 { return PL_unify_term(option, PL_FUNCTOR, f,
8436 PL_LONG, p->triple_count);
8437 } else if ( f == FUNCTOR_rdf_subject_branch_factor1 )
8438 { return PL_unify_term(option, PL_FUNCTOR, f,
8439 PL_FLOAT, subject_branch_factor(db, p, q, DISTINCT_DIRECT));
8440 } else if ( f == FUNCTOR_rdf_object_branch_factor1 )
8441 { return PL_unify_term(option, PL_FUNCTOR, f,
8442 PL_FLOAT, object_branch_factor(db, p, q, DISTINCT_DIRECT));
8443 } else if ( f == FUNCTOR_rdfs_subject_branch_factor1 )
8444 { return PL_unify_term(option, PL_FUNCTOR, f,
8445 PL_FLOAT, subject_branch_factor(db, p, q, DISTINCT_SUB));
8446 } else if ( f == FUNCTOR_rdfs_object_branch_factor1 )
8447 { return PL_unify_term(option, PL_FUNCTOR, f,
8448 PL_FLOAT, object_branch_factor(db, p, q, DISTINCT_SUB));
8449 } else
8450 { assert(0);
8451 return FALSE;
8452 }
8453 }
8454
8455
8456 typedef struct enum_pred
8457 { predicate *p;
8458 int i;
8459 } enum_pred;
8460
8461
8462 static foreign_t
rdf_current_predicate(term_t name,control_t h)8463 rdf_current_predicate(term_t name, control_t h)
8464 { rdf_db *db = rdf_current_db();
8465 predicate *p;
8466 enum_pred *ep;
8467 atom_t a;
8468
8469 switch( PL_foreign_control(h) )
8470 { case PL_FIRST_CALL:
8471 if ( PL_is_variable(name) )
8472 { ep = rdf_malloc(db, sizeof(*ep));
8473 ep->i = 0;
8474 ep->p = NULL;
8475 goto next;
8476 } else if ( PL_get_atom(name, &a) )
8477 { return existing_predicate(db, a) != NULL;
8478 } else if ( PL_is_functor(name, FUNCTOR_literal1) )
8479 { return FALSE;
8480 }
8481
8482 return PL_type_error("atom", name);
8483 case PL_REDO:
8484 ep = PL_foreign_context_address(h);
8485 goto next;
8486 case PL_PRUNED:
8487 ep = PL_foreign_context_address(h);
8488 rdf_free(db, ep, sizeof(*ep));
8489 return TRUE;
8490 default:
8491 assert(0);
8492 return FALSE;
8493 }
8494
8495 next:
8496 if ( !(p=ep->p) )
8497 { while (!(p = db->predicates.blocks[MSB(ep->i)][ep->i]) )
8498 { if ( ++ep->i >= db->predicates.bucket_count )
8499 goto fail;
8500 }
8501 }
8502
8503 if ( !PL_unify_atom(name, p->name) )
8504 { fail:
8505 rdf_free(db, ep, sizeof(*ep));
8506 return FALSE;
8507 }
8508
8509 if ( !(ep->p = p->next) )
8510 { if ( ++ep->i >= db->predicates.bucket_count )
8511 { rdf_free(db, ep, sizeof(*ep));
8512 return TRUE;
8513 }
8514 }
8515 PL_retry_address(ep);
8516 }
8517
8518
8519 static foreign_t
rdf_predicate_property(term_t pred,term_t option,control_t h)8520 rdf_predicate_property(term_t pred, term_t option, control_t h)
8521 { predicate *p;
8522 rdf_db *db = rdf_current_db();
8523 query *q;
8524
8525 if ( !predicate_key[0] )
8526 { int i = 0;
8527
8528 predicate_key[i++] = FUNCTOR_symmetric1;
8529 predicate_key[i++] = FUNCTOR_inverse_of1;
8530 predicate_key[i++] = FUNCTOR_transitive1;
8531 predicate_key[i++] = FUNCTOR_triples1;
8532 predicate_key[i++] = FUNCTOR_rdf_subject_branch_factor1;
8533 predicate_key[i++] = FUNCTOR_rdf_object_branch_factor1;
8534 predicate_key[i++] = FUNCTOR_rdfs_subject_branch_factor1;
8535 predicate_key[i++] = FUNCTOR_rdfs_object_branch_factor1;
8536 assert(i < PRED_PROPERTY_COUNT);
8537 }
8538
8539 switch(PL_foreign_control(h))
8540 { case PL_FIRST_CALL:
8541 { functor_t f;
8542 int rc;
8543
8544 if ( !(q = open_query(db)) )
8545 return FALSE;
8546 if ( PL_is_variable(option) )
8547 { q->state.predprop.prop = 0;
8548 if ( !get_predicate(db, pred, &q->state.predprop.pred, q) )
8549 { close_query(q);
8550 return FALSE;
8551 }
8552 goto redo;
8553 } else if ( PL_get_functor(option, &f) )
8554 { int n;
8555
8556 for(n=0; predicate_key[n]; n++)
8557 { if ( predicate_key[n] == f )
8558 { if ( !get_predicate(db, pred, &p, q) )
8559 return FALSE;
8560 rc = unify_predicate_property(db, p, option, f, q);
8561 goto out;
8562 }
8563 }
8564 rc = PL_domain_error("rdf_predicate_property", option);
8565 } else
8566 rc = PL_type_error("rdf_predicate_property", option);
8567 out:
8568 close_query(q);
8569 return rc;
8570 }
8571 case PL_REDO:
8572 q = PL_foreign_context_address(h);
8573 redo:
8574 for( ; predicate_key[q->state.predprop.prop]; q->state.predprop.prop++ )
8575 { if ( unify_predicate_property(db,
8576 q->state.predprop.pred,
8577 option,
8578 predicate_key[q->state.predprop.prop],
8579 q) )
8580 { q->state.predprop.prop++;
8581 if ( predicate_key[q->state.predprop.prop] )
8582 PL_retry_address(q);
8583 return TRUE;
8584 }
8585 }
8586 return FALSE;
8587 case PL_PRUNED:
8588 q = PL_foreign_context_address(h);
8589 close_query(q);
8590 return TRUE;
8591 default:
8592 assert(0);
8593 return TRUE;
8594 }
8595 }
8596
8597
8598 /*******************************
8599 * TRANSITIVE RELATIONS *
8600 *******************************/
8601
8602 static visited *
alloc_node_agenda(rdf_db * db,agenda * a)8603 alloc_node_agenda(rdf_db *db, agenda *a)
8604 { chunk *c;
8605 int size;
8606
8607 if ( (c=a->chunk) )
8608 { if ( c->used < c->size )
8609 { visited *v = &c->nodes[c->used++];
8610
8611 return v;
8612 }
8613 }
8614
8615 size = (a->size == 0 ? 8 : 1024);
8616 c = rdf_malloc(db, CHUNK_SIZE(size));
8617 c->size = size;
8618 c->used = 1;
8619 c->next = a->chunk;
8620 a->chunk = c;
8621
8622 return &c->nodes[0];
8623 }
8624
8625
8626 static void
empty_agenda(rdf_db * db,agenda * a)8627 empty_agenda(rdf_db *db, agenda *a)
8628 { chunk *c, *n;
8629
8630 for(c=a->chunk; c; c = n)
8631 { n = c->next;
8632 rdf_free(db, c, CHUNK_SIZE(c->size));
8633 }
8634 if ( a->hash )
8635 rdf_free(db, a->hash, sizeof(visited*)*a->hash_size);
8636
8637 if ( a->query )
8638 close_query(a->query);
8639 }
8640
8641
8642 static void
hash_agenda(rdf_db * db,agenda * a,int size)8643 hash_agenda(rdf_db *db, agenda *a, int size)
8644 { if ( a->hash )
8645 rdf_free(db, a->hash, sizeof(*a->hash));
8646 if ( size > 0 )
8647 { visited *v;
8648
8649 a->hash = rdf_malloc(db, sizeof(visited*)*size);
8650 memset(a->hash, 0, sizeof(visited*)*size);
8651 a->hash_size = size;
8652
8653 for(v=a->head; v; v = v->next)
8654 { int key = atom_hash(v->resource, MURMUR_SEED)&(size-1);
8655
8656 v->hash_link = a->hash[key];
8657 a->hash[key] = v;
8658 }
8659 }
8660 }
8661
8662
8663 static int
in_agenda(agenda * a,atom_t resource)8664 in_agenda(agenda *a, atom_t resource)
8665 { visited *v;
8666
8667 if ( a->hash )
8668 { int key = atom_hash(resource, MURMUR_SEED)&(a->hash_size-1);
8669 v = a->hash[key];
8670
8671 for( ; v; v = v->hash_link )
8672 { if ( v->resource == resource )
8673 return TRUE;
8674 }
8675 } else
8676 { v = a->head;
8677
8678 for( ; v; v = v->next )
8679 { if ( v->resource == resource )
8680 return TRUE;
8681 }
8682 }
8683
8684 return FALSE;
8685 }
8686
8687
8688 static visited *
append_agenda(rdf_db * db,agenda * a,atom_t res,uintptr_t d)8689 append_agenda(rdf_db *db, agenda *a, atom_t res, uintptr_t d)
8690 { visited *v = a->head;
8691
8692 if ( in_agenda(a, res) )
8693 return NULL;
8694
8695 db->agenda_created++; /* statistics */
8696
8697 a->size++;
8698 if ( !a->hash_size && a->size > 32 )
8699 hash_agenda(db, a, 64);
8700 else if ( a->size > a->hash_size * 4 )
8701 hash_agenda(db, a, a->hash_size * 4);
8702
8703 v = alloc_node_agenda(db, a);
8704 v->resource = res;
8705 v->distance = d;
8706 v->next = NULL;
8707 if ( a->tail )
8708 { a->tail->next = v;
8709 a->tail = v;
8710 } else
8711 { a->head = a->tail = v;
8712 }
8713
8714 if ( a->hash_size )
8715 { int key = atom_hash(res, MURMUR_SEED)&(a->hash_size-1);
8716
8717 v->hash_link = a->hash[key];
8718 a->hash[key] = v;
8719 }
8720
8721 return v;
8722 }
8723
8724
8725 static int
can_reach_target(rdf_db * db,agenda * a,query * q)8726 can_reach_target(rdf_db *db, agenda *a, query *q)
8727 { triple_walker tw;
8728 int indexed = a->pattern.indexed;
8729 int rc = FALSE;
8730 triple *p;
8731
8732 if ( indexed & BY_S ) /* subj ---> */
8733 { a->pattern.object.resource = a->target;
8734 indexed |= BY_O;
8735 } else
8736 { a->pattern.subject_id = ATOM_ID(a->target);
8737 indexed |= BY_S;
8738 }
8739
8740 init_triple_walker(&tw, db, &a->pattern, indexed);
8741 while((p=next_triple(&tw)))
8742 { if ( match_triples(db, p, &a->pattern, q, MATCH_SUBPROPERTY) )
8743 { rc = TRUE;
8744 break;
8745 }
8746 }
8747
8748 if ( a->pattern.indexed & BY_S )
8749 { a->pattern.object.resource = 0;
8750 } else
8751 { a->pattern.subject_id = 0;
8752 }
8753
8754 return rc;
8755 }
8756
8757
8758
8759 static visited *
bf_expand(rdf_db * db,agenda * a,atom_t resource,uintptr_t d,query * q)8760 bf_expand(rdf_db *db, agenda *a, atom_t resource, uintptr_t d, query *q)
8761 { search_state state;
8762 visited *rc = NULL;
8763
8764 state.pattern = a->pattern; /* Structure copy */
8765 state.flags = MATCH_SUBPROPERTY|MATCH_INVERSE;
8766 state.p_cloud = NULL;
8767 state.query = q;
8768 state.db = db;
8769
8770 if ( state.pattern.indexed & BY_S ) /* subj ---> */
8771 { state.pattern.subject_id = ATOM_ID(resource);
8772 } else
8773 { state.pattern.object.resource = resource;
8774 }
8775
8776 if ( a->target && can_reach_target(db, a, q) )
8777 return append_agenda(db, a, a->target, d);
8778
8779 for(;;)
8780 { int indexed = state.pattern.indexed;
8781 triple *p;
8782
8783 init_triple_walker(&state.cursor, db, &state.pattern, indexed);
8784 while((p=next_triple(&state.cursor)))
8785 { if ( !alive_triple(a->query, p) )
8786 continue;
8787
8788 if ( match_triples(db, p, &state.pattern, a->query, MATCH_SUBPROPERTY) )
8789 { atom_t found;
8790 visited *v;
8791
8792 if ( indexed & BY_S )
8793 { if ( p->object_is_literal )
8794 continue;
8795 found = p->object.resource;
8796 } else
8797 { found = ID_ATOM(p->subject_id);
8798 }
8799
8800 v = append_agenda(db, a, found, d);
8801 if ( !rc )
8802 rc = v;
8803 if ( found == a->target )
8804 return rc;
8805 }
8806 }
8807 if ( next_sub_property(&state) )
8808 continue;
8809 if ( inverse_partial_triple(&state.pattern) )
8810 { state.p_cloud = NULL;
8811 continue;
8812 }
8813 break;
8814 }
8815 /* TBD: handle owl:sameAs */
8816 return rc;
8817 }
8818
8819
8820 static int
peek_agenda(rdf_db * db,agenda * a)8821 peek_agenda(rdf_db *db, agenda *a)
8822 { if ( a->to_return )
8823 return TRUE;
8824
8825 while( a->to_expand )
8826 { uintptr_t next_d = a->to_expand->distance+1;
8827
8828 if ( next_d > a->max_d )
8829 return FALSE;
8830
8831 a->to_return = bf_expand(db, a,
8832 a->to_expand->resource,
8833 next_d,
8834 a->query);
8835 a->to_expand = a->to_expand->next;
8836
8837 if ( a->to_return )
8838 return TRUE;
8839 }
8840
8841 return FALSE;
8842 }
8843
8844
8845 static visited *
next_agenda(rdf_db * db,agenda * a)8846 next_agenda(rdf_db *db, agenda *a)
8847 { if ( peek_agenda(db, a) )
8848 { visited *v = a->to_return;
8849
8850 a->to_return = a->to_return->next;
8851
8852 return v;
8853 }
8854
8855 return NULL;
8856 }
8857
8858
8859 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
8860 rdf_reachable(+Subject, +Predicate, -Object)
8861 rdf_reachable(-Subject, +Predicate, ?Object)
8862 Examine transitive relations, reporting all `Object' that can be
8863 reached from `Subject' using Predicate without going into a loop
8864 if the relation is cyclic.
8865
8866 directly_attached() deals with the posibility that the predicate is not
8867 defined and Subject and Object are the same. Should use clean error
8868 handling, but that means a lot of changes. For now this will do.
8869
8870 TBD: Implement bi-directional search if both Subject and Object are
8871 given.
8872 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
8873
8874 static int
directly_attached(term_t pred,term_t from,term_t to)8875 directly_attached(term_t pred, term_t from, term_t to)
8876 { if ( PL_is_atom(pred) && PL_is_atom(from) )
8877 return PL_unify(to, from);
8878
8879 return FALSE;
8880 }
8881
8882
8883 static int
unify_distance(term_t d,uintptr_t dist)8884 unify_distance(term_t d, uintptr_t dist)
8885 { if ( d )
8886 return PL_unify_integer(d, dist);
8887
8888 return TRUE;
8889 }
8890
8891
8892 static foreign_t
rdf_reachable(term_t subj,term_t pred,term_t obj,term_t max_d,term_t d,control_t h)8893 rdf_reachable(term_t subj, term_t pred, term_t obj,
8894 term_t max_d, term_t d,
8895 control_t h)
8896 { rdf_db *db = rdf_current_db();
8897 query *q;
8898
8899 switch(PL_foreign_control(h))
8900 { case PL_FIRST_CALL:
8901 { visited *v;
8902 agenda *a;
8903 term_t target_term;
8904 int is_det = FALSE;
8905
8906 if ( PL_is_variable(pred) )
8907 return PL_instantiation_error(pred);
8908
8909 if ( !(q = open_query(db)) )
8910 return FALSE;
8911 a = &q->state.tr_search;
8912 memset(a, 0, sizeof(*a));
8913 a->query = q;
8914
8915 if ( max_d )
8916 { long md;
8917 atom_t inf;
8918
8919 if ( PL_get_atom(max_d, &inf) && inf == ATOM_infinite )
8920 { a->max_d = (uintptr_t)-1;
8921 } else
8922 { if ( !PL_get_long_ex(max_d, &md) || md < 0 )
8923 { close_query(q);
8924 return FALSE;
8925 }
8926 a->max_d = md;
8927 }
8928 } else
8929 { a->max_d = (uintptr_t)-1;
8930 }
8931
8932 if ( !PL_is_variable(subj) ) /* subj .... obj */
8933 { switch(get_partial_triple(db, subj, pred, 0, 0, &a->pattern))
8934 { case 0:
8935 { close_query(q);
8936 return directly_attached(pred, subj, obj) &&
8937 unify_distance(d, 0);
8938 }
8939 case -1:
8940 { close_query(q);
8941 return FALSE;
8942 }
8943 }
8944 is_det = PL_is_ground(obj);
8945 if ( a->pattern.object_is_literal )
8946 { close_query(q);
8947 return FALSE; /* rdf_reachable(literal(...),?,?) */
8948 }
8949 target_term = obj;
8950 } else if ( !PL_is_variable(obj) ) /* obj .... subj */
8951 { switch(get_partial_triple(db, 0, pred, obj, 0, &a->pattern))
8952 { case 0:
8953 { close_query(q);
8954 return directly_attached(pred, obj, subj);
8955 }
8956 case -1:
8957 { close_query(q);
8958 return FALSE;
8959 }
8960 }
8961 if ( a->pattern.object_is_literal )
8962 { close_query(q);
8963 return FALSE; /* rdf_reachable(-,+,literal(...)) */
8964 }
8965 target_term = subj;
8966 } else
8967 { close_query(q);
8968 return PL_instantiation_error(subj);
8969 }
8970
8971 if ( (a->pattern.indexed & BY_S) ) /* subj ... */
8972 append_agenda(db, a, ID_ATOM(a->pattern.subject_id), 0);
8973 else
8974 append_agenda(db, a, a->pattern.object.resource, 0);
8975 a->to_return = a->head;
8976 a->to_expand = a->head;
8977
8978 while( (v=next_agenda(db, a)) )
8979 { if ( PL_unify_atom(target_term, v->resource) )
8980 { if ( is_det ) /* mode(+, +, +) */
8981 { int rc = unify_distance(d, v->distance);
8982 empty_agenda(db, a);
8983 return rc;
8984 } else if ( unify_distance(d, v->distance) )
8985 { /* mode(+, +, -) or mode(-, +, +) */
8986 if ( peek_agenda(db, a) )
8987 PL_retry_address(a);
8988
8989 empty_agenda(db, a);
8990 return TRUE;
8991 }
8992 }
8993 }
8994 empty_agenda(db, a);
8995 return FALSE;
8996 }
8997 case PL_REDO:
8998 { agenda *a = PL_foreign_context_address(h);
8999 term_t target_term;
9000 visited *v;
9001
9002 if ( !PL_is_variable(subj) ) /* +, +, - */
9003 target_term = obj;
9004 else
9005 target_term = subj; /* -, +, + */
9006
9007 while( (v=next_agenda(db, a)) )
9008 { if ( PL_unify_atom(target_term, v->resource) &&
9009 unify_distance(d, v->distance) )
9010 { if ( peek_agenda(db, a) )
9011 { PL_retry_address(a);
9012 } else
9013 { empty_agenda(db, a);
9014 return TRUE;
9015 }
9016 }
9017 }
9018
9019 empty_agenda(db, a);
9020 return FALSE;
9021 }
9022 case PL_PRUNED:
9023 { agenda *a = PL_foreign_context_address(h);
9024
9025 DEBUG(9, Sdprintf("Cutted; agenda = %p\n", a));
9026
9027 empty_agenda(db, a);
9028 return TRUE;
9029 }
9030 default:
9031 assert(0);
9032 return FALSE;
9033 }
9034 }
9035
9036 static foreign_t
rdf_reachable3(term_t subj,term_t pred,term_t obj,control_t h)9037 rdf_reachable3(term_t subj, term_t pred, term_t obj, control_t h)
9038 { return rdf_reachable(subj, pred, obj, 0, 0, h);
9039 }
9040
9041 static foreign_t
rdf_reachable5(term_t subj,term_t pred,term_t obj,term_t max_d,term_t d,control_t h)9042 rdf_reachable5(term_t subj, term_t pred, term_t obj, term_t max_d, term_t d,
9043 control_t h)
9044 { return rdf_reachable(subj, pred, obj, max_d, d, h);
9045 }
9046
9047
9048 /*******************************
9049 * STATISTICS *
9050 *******************************/
9051
9052 static functor_t keys[16]; /* initialised in install_rdf_db() */
9053
9054 static int
unify_statistics(rdf_db * db,term_t key,functor_t f)9055 unify_statistics(rdf_db *db, term_t key, functor_t f)
9056 { int64_t v;
9057
9058 if ( f == FUNCTOR_triples1 )
9059 { v = db->created - db->erased;
9060 } else if ( f == FUNCTOR_resources1 )
9061 { v = db->resources.hash.count;
9062 } else if ( f == FUNCTOR_predicates1 )
9063 { v = db->predicates.count;
9064 } else if ( f == FUNCTOR_graphs1 )
9065 { v = db->graphs.count - db->graphs.erased;
9066 } else if ( f == FUNCTOR_indexed16 )
9067 { int i;
9068 term_t a = PL_new_term_ref();
9069
9070 if ( !PL_unify_functor(key, FUNCTOR_indexed16) )
9071 return FALSE;
9072 for(i=0; i<16; i++)
9073 { if ( !PL_get_arg(i+1, key, a) ||
9074 !PL_unify_integer(a, db->indexed[i]) )
9075 return FALSE;
9076 }
9077
9078 return TRUE;
9079 } else if ( f == FUNCTOR_hash_quality1 )
9080 { term_t tail, list = PL_new_term_ref();
9081 term_t head = PL_new_term_ref();
9082 term_t tmp = PL_new_term_ref();
9083 term_t av = PL_new_term_refs(4);
9084 int i;
9085
9086 if ( !PL_unify_functor(key, FUNCTOR_hash_quality1) )
9087 return FALSE;
9088 _PL_get_arg(1, key, list);
9089 tail = PL_copy_term_ref(list);
9090
9091 for(i=1; i<INDEX_TABLES; i++)
9092 { if ( db->hash[i].created )
9093 { if ( !PL_unify_list(tail, head, tail) ||
9094 !PL_put_integer(av+0, col_index[i]) ||
9095 !PL_put_integer(av+1, db->hash[i].bucket_count) ||
9096 !PL_put_float(av+2, triple_hash_quality(db, i, 1024)) ||
9097 !PL_put_integer(av+3, MSB(db->hash[i].bucket_count)-
9098 MSB(db->hash[i].bucket_count_epoch)) ||
9099 !PL_cons_functor_v(tmp, FUNCTOR_hash4, av) ||
9100 !PL_unify(head, tmp) )
9101 return FALSE;
9102 }
9103 }
9104
9105 return PL_unify_nil(tail);
9106 } else if ( f == FUNCTOR_searched_nodes1 )
9107 { v = db->agenda_created;
9108 } else if ( f == FUNCTOR_duplicates1 )
9109 { if ( db->duplicates_up_to_date == FALSE )
9110 return FALSE;
9111 v = db->duplicates;
9112 } else if ( f == FUNCTOR_lingering1 )
9113 { v = db->lingering;
9114 } else if ( f == FUNCTOR_literals1 )
9115 { v = db->literals.count;
9116 } else if ( f == FUNCTOR_triples2 && PL_is_functor(key, f) )
9117 { graph *src;
9118 term_t a = PL_new_term_ref();
9119 atom_t name;
9120
9121 _PL_get_arg(1, key, a);
9122 if ( !PL_get_atom_ex(a, &name) )
9123 return FALSE;
9124 if ( (src = existing_graph(db, name)) )
9125 v = src->triple_count;
9126 else
9127 v = 0;
9128
9129 _PL_get_arg(2, key, a);
9130 return PL_unify_int64(a, v);
9131 } else if ( f == FUNCTOR_gc4 )
9132 { return PL_unify_term(key,
9133 PL_FUNCTOR, f,
9134 PL_INT, (int)db->gc.count,
9135 PL_INT64, (int64_t)db->gc.reclaimed_triples,
9136 PL_INT64, (int64_t)db->reindexed,
9137 PL_FLOAT, (double)db->gc.time); /* time spent */
9138 } else
9139 { assert(0);
9140 return FALSE;
9141 }
9142
9143 return PL_unify_term(key, PL_FUNCTOR, f, PL_INT64, v);
9144 }
9145
9146 static foreign_t
rdf_statistics(term_t key,control_t h)9147 rdf_statistics(term_t key, control_t h)
9148 { int n;
9149 rdf_db *db = rdf_current_db();
9150
9151 switch(PL_foreign_control(h))
9152 { case PL_FIRST_CALL:
9153 { functor_t f;
9154
9155 if ( PL_is_variable(key) )
9156 { n = 0;
9157 goto redo;
9158 } else if ( PL_get_functor(key, &f) )
9159 { for(n=0; keys[n]; n++)
9160 { if ( keys[n] == f )
9161 return unify_statistics(db, key, f);
9162 }
9163 return PL_domain_error("rdf_statistics", key);
9164 } else
9165 return PL_type_error("rdf_statistics", key);
9166 }
9167 case PL_REDO:
9168 n = (int)PL_foreign_context(h);
9169 redo:
9170 unify_statistics(db, key, keys[n]);
9171 n++;
9172 if ( keys[n] )
9173 PL_retry(n);
9174 case PL_PRUNED:
9175 return TRUE;
9176 default:
9177 assert(0);
9178 return TRUE;
9179 }
9180 }
9181
9182
9183 /** rdf_generation(-Generation) is det.
9184
9185 True when Generation is the current reading generation. If we are
9186 inside a modified transaction, Generation has the format Base+TrGen,
9187 where TrGen expresses the generation inside the transaction.
9188 */
9189
9190 static foreign_t
rdf_generation(term_t t)9191 rdf_generation(term_t t)
9192 { rdf_db *db = rdf_current_db();
9193 query *q = open_query(db);
9194 int rc;
9195
9196 if ( !q ) return FALSE;
9197 if ( q->tr_gen > q->stack->tr_gen_base )
9198 { assert(q->tr_gen < q->stack->tr_gen_max);
9199
9200 rc = PL_unify_term(t, PL_FUNCTOR, FUNCTOR_plus2,
9201 PL_INT64, q->rd_gen,
9202 PL_INT64, q->tr_gen - q->stack->tr_gen_base);
9203 } else
9204 { rc = PL_unify_int64(t, q->rd_gen);
9205 }
9206
9207 close_query(q);
9208
9209 return rc;
9210 }
9211
9212
9213 /** rdf_snapshot(-Snapshot) is det.
9214
9215 True when Snapshot is a handle to the current state of the database.
9216 */
9217
9218 static foreign_t
rdf_snapshot(term_t t)9219 rdf_snapshot(term_t t)
9220 { rdf_db *db = rdf_current_db();
9221 snapshot *s = new_snapshot(db);
9222
9223 if ( !s )
9224 return FALSE;
9225 return unify_snapshot(t, s);
9226 }
9227
9228
9229 /*******************************
9230 * CONTROL INDEXING *
9231 *******************************/
9232
9233 /** rdf_set(+What)
9234
9235 Set aspect of the RDF database. What is one of:
9236
9237 * hash(Which, Parameter, Value)
9238
9239 Where Parameter is one of =size=, =optimize_threshold= or
9240 =avg_chain_len= and Which is one of =s=, =p=, etc.
9241 */
9242
9243 static int
get_index_name(term_t t,int * index)9244 get_index_name(term_t t, int *index)
9245 { int i;
9246 char *s;
9247
9248 if ( !PL_get_chars(t, &s, CVT_ATOM|CVT_EXCEPTION) )
9249 return FALSE;
9250
9251 for(i=1; i<INDEX_TABLES; i++)
9252 { if ( strcmp(s, col_name[i]) == 0 )
9253 { *index = i;
9254 return TRUE;
9255 }
9256 }
9257
9258 PL_domain_error("index", t);
9259 return FALSE;
9260 }
9261
9262
9263 static foreign_t
rdf_set(term_t what)9264 rdf_set(term_t what)
9265 { rdf_db *db = rdf_current_db();
9266
9267 if ( PL_is_functor(what, FUNCTOR_hash3) )
9268 { term_t arg = PL_new_term_ref();
9269 int index;
9270 int value;
9271 atom_t param;
9272
9273 _PL_get_arg(1, what, arg);
9274 if ( !get_index_name(arg, &index) )
9275 return FALSE;
9276
9277 _PL_get_arg(3, what, arg);
9278 if ( !PL_get_integer_ex(arg, &value) )
9279 return FALSE;
9280
9281 _PL_get_arg(2, what, arg);
9282 if ( !PL_get_atom_ex(arg, ¶m) )
9283 return FALSE;
9284
9285 if ( param == ATOM_size )
9286 { if ( size_triple_hash(db, index, value) )
9287 { db->hash[index].user_size = MSB(value);
9288 return TRUE;
9289 }
9290 if ( value <= 0 || MSB(value) >= MAX_TBLOCKS )
9291 return PL_domain_error("hash_size", arg);
9292 /* cannot shrink */
9293 return PL_permission_error("size", "hash", arg);
9294 } else if ( param == ATOM_optimize_threshold )
9295 { if ( value >= 0 && value < 20 )
9296 db->hash[index].optimize_threshold = value;
9297 else
9298 return PL_domain_error("optimize_threshold", arg);
9299 } else if ( param == ATOM_average_chain_len )
9300 { if ( value >= 0 && value < 20 )
9301 db->hash[index].avg_chain_len = value;
9302 return PL_domain_error("average_chain_len", arg);
9303 } else
9304 return PL_domain_error("rdf_hash_parameter", arg);
9305
9306 return TRUE;
9307 }
9308
9309 return PL_type_error("rdf_setting", what);
9310 }
9311
9312
9313 static foreign_t
rdf_update_duplicates(void)9314 rdf_update_duplicates(void)
9315 { rdf_db *db = rdf_current_db();
9316
9317 return update_duplicates(db);
9318 }
9319
9320
9321 /** rdf_warm_indexes(+List) is det.
9322 */
9323
9324 static foreign_t
rdf_warm_indexes(term_t indexes)9325 rdf_warm_indexes(term_t indexes)
9326 { int il[16];
9327 int ic = 0;
9328 term_t tail = PL_copy_term_ref(indexes);
9329 term_t head = PL_new_term_ref();
9330 rdf_db *db = rdf_current_db();
9331
9332 while(PL_get_list_ex(tail, head, tail))
9333 { char *s;
9334
9335 if ( PL_get_chars(head, &s, CVT_ATOM|CVT_STRING|CVT_EXCEPTION) )
9336 { int by = 0;
9337 int i;
9338
9339 for(; *s; s++)
9340 { switch(*s)
9341 { case 's': by |= BY_S; break;
9342 case 'p': by |= BY_P; break;
9343 case 'o': by |= BY_O; break;
9344 case 'g': by |= BY_G; break;
9345 default: return PL_domain_error("rdf_index", head);
9346 }
9347 }
9348
9349 if ( index_col[by] == ~0 )
9350 return PL_existence_error("rdf_index", head);
9351
9352 for(i=0; i<ic; i++)
9353 { if ( il[i] == by )
9354 break;
9355 }
9356 if ( i == ic )
9357 il[ic++] = ICOL(by);
9358 } else
9359 return 0;
9360 }
9361 if ( !PL_get_nil_ex(tail) )
9362 return FALSE;
9363
9364 create_triple_hashes(db, ic, il);
9365
9366 return TRUE;
9367 }
9368
9369
9370 static foreign_t
pl_empty_prefix_table(void)9371 pl_empty_prefix_table(void)
9372 { rdf_db *db = rdf_current_db();
9373
9374 empty_prefix_table(db);
9375
9376 return TRUE;
9377 }
9378
9379
9380 /*******************************
9381 * RESET *
9382 *******************************/
9383
9384 static void
erase_triples(rdf_db * db)9385 erase_triples(rdf_db *db)
9386 { triple *t, *n;
9387 int i;
9388
9389 for(t=fetch_triple(db, db->by_none.head); t; t=n)
9390 { n = triple_follow_hash(db, t, ICOL(BY_NONE));
9391
9392 free_triple(db, t, FALSE); /* ? */
9393 }
9394 db->by_none.head = db->by_none.tail = 0;
9395
9396 for(i=BY_S; i<INDEX_TABLES; i++)
9397 { triple_hash *hash = &db->hash[i];
9398
9399 reset_triple_hash(db, hash);
9400 }
9401 reset_triple_array(db);
9402
9403 db->created = 0;
9404 db->erased = 0;
9405 memset(db->indexed, 0, sizeof(db->indexed));
9406 db->duplicates = 0;
9407 db->queries.generation = 0;
9408 }
9409
9410
9411 static void
erase_predicates(rdf_db * db)9412 erase_predicates(rdf_db *db)
9413 { int i;
9414
9415 for(i=0; i<db->predicates.bucket_count; i++)
9416 { predicate *n, *p = db->predicates.blocks[MSB(i)][i];
9417
9418 db->predicates.blocks[MSB(i)][i] = NULL;
9419
9420 for( ; p; p = n )
9421 { n = p->next;
9422
9423 free_list(db, &p->subPropertyOf);
9424 free_list(db, &p->siblings);
9425 if ( ++p->cloud->deleted == p->cloud->size )
9426 free_predicate_cloud(db, p->cloud);
9427 free_is_leaf(db, p);
9428
9429 rdf_free(db, p, sizeof(*p));
9430 }
9431 }
9432
9433 db->predicates.count = 0;
9434 }
9435
9436
9437 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9438 Reset the DB. It might be wiser to create a new one and have a separate
9439 thread deleting the old one (e.g. do this in GC).
9440 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
9441
9442 static int
reset_db(rdf_db * db)9443 reset_db(rdf_db *db)
9444 { int rc;
9445
9446 suspend_gc(db);
9447 simpleMutexLock(&db->locks.duplicates);
9448 erase_snapshots(db);
9449 erase_triples(db);
9450 erase_predicates(db);
9451 erase_resources(&db->resources);
9452 erase_graphs(db);
9453 empty_prefix_table(db);
9454 db->agenda_created = 0;
9455 skiplist_destroy(&db->literals);
9456
9457 rc = (init_resource_db(db, &db->resources) &&
9458 init_literal_table(db));
9459
9460 db->snapshots.keep = GEN_MAX;
9461 db->queries.generation = GEN_EPOCH;
9462
9463 simpleMutexUnlock(&db->locks.duplicates);
9464 resume_gc(db);
9465
9466 return rc;
9467 }
9468
9469
9470 /** rdf_reset_db
9471
9472 Reset the RDF database to its initial state. Only allowed if there
9473 are no active queries. This means that if the calling thread has
9474 open queries this must be considered a permission error. Otherwise
9475 we wait until all queries have died.
9476
9477 TBD: Check queries in other threads!
9478 */
9479
9480 static foreign_t
rdf_reset_db(void)9481 rdf_reset_db(void)
9482 { rdf_db *db = rdf_current_db();
9483 query *q;
9484 int rc;
9485
9486 db->resetting = TRUE;
9487 if ( !(q = open_query(db)) )
9488 return FALSE;
9489
9490 if ( q->depth > 0 || q->transaction )
9491 { close_query(q);
9492 return permission_error("reset", "rdf_db", "default",
9493 "Active queries");
9494 }
9495
9496 if ( !rdf_broadcast(EV_RESET, NULL, NULL) )
9497 return FALSE;
9498
9499 rc = reset_db(db);
9500 close_query(q);
9501 db->resetting = FALSE;
9502
9503 return rc;
9504 }
9505
9506
9507 static foreign_t
rdf_delete_snapshot(term_t t)9508 rdf_delete_snapshot(term_t t)
9509 { snapshot *ss;
9510 int rc;
9511
9512 if ( (rc=get_snapshot(t, &ss)) == TRUE )
9513 { if ( free_snapshot(ss) )
9514 return TRUE;
9515 rc = -1;
9516 }
9517
9518 if ( rc == -1 )
9519 return PL_existence_error("rdf_snapshot", t);
9520
9521 return PL_type_error("rdf_snapshot", t);
9522 }
9523
9524 #ifdef O_DEBUG
9525 static foreign_t
rdf_checks_literal_references(term_t l)9526 rdf_checks_literal_references(term_t l)
9527 { triple p, *t;
9528 triple_walker tw;
9529 long count = 0, refs = -1;
9530 term_t var = PL_new_term_ref();
9531 rdf_db *db = rdf_current_db();
9532
9533 memset(&p, 0, sizeof(p));
9534 if ( !get_partial_triple(db, var, var, l, 0, &p) )
9535 return FALSE;
9536 assert(p.object_is_literal);
9537
9538 init_triple_walker(&tw, db, &p, BY_O);
9539 while((t=next_triple(&tw)))
9540 { if ( match_object(t, &p, MATCH_QUAL) )
9541 { if ( count++ == 0 )
9542 { refs = (long)t->object.literal->references;
9543 }
9544 }
9545 }
9546 destroy_triple_walker(db, &tw);
9547
9548 if ( count != refs )
9549 { if ( refs == -1 )
9550 { Sdprintf("Not found in triples\n");
9551 } else
9552 { Sdprintf("Refs: %ld; counted: %ld; lit=", refs, count);
9553 print_literal(p.object.literal);
9554 Sdprintf("\n");
9555 }
9556
9557 return FALSE;
9558 }
9559
9560 return TRUE;
9561 }
9562 #endif
9563
9564 /*******************************
9565 * MATCH *
9566 *******************************/
9567
9568
9569 static int
get_text_ex(term_t term,text * txt)9570 get_text_ex(term_t term, text *txt)
9571 { memset(txt, 0, sizeof(*txt));
9572
9573 return ( PL_get_nchars(term, &txt->length, (char**)&txt->a,
9574 CVT_ATOM|CVT_STRING) ||
9575 PL_get_wchars(term, &txt->length, (pl_wchar_t**)&txt->w,
9576 CVT_ATOM|CVT_STRING|CVT_EXCEPTION)
9577 );
9578 }
9579
9580
9581
9582 static foreign_t
match_label(term_t how,term_t search,term_t label)9583 match_label(term_t how, term_t search, term_t label)
9584 { atom_t h;
9585 text f, l;
9586 int type;
9587
9588 if ( !PL_get_atom_ex(how, &h) ||
9589 !get_text_ex(search, &f) ||
9590 !get_text_ex(label, &l) )
9591 return FALSE;
9592
9593 if ( h == ATOM_exact )
9594 type = STR_MATCH_ICASE;
9595 if ( h == ATOM_icase )
9596 type = STR_MATCH_ICASE;
9597 else if ( h == ATOM_substring )
9598 type = STR_MATCH_SUBSTRING;
9599 else if ( h == ATOM_word )
9600 type = STR_MATCH_WORD;
9601 else if ( h == ATOM_prefix )
9602 type = STR_MATCH_PREFIX;
9603 else if ( h == ATOM_like )
9604 type = STR_MATCH_LIKE;
9605 else
9606 return PL_domain_error("search_method", how);
9607
9608 return match_text(type, &f, &l);
9609 }
9610
9611
9612 static foreign_t
lang_matches(term_t lang,term_t pattern)9613 lang_matches(term_t lang, term_t pattern)
9614 { atom_t l, p;
9615
9616 if ( !PL_get_atom_ex(lang, &l) ||
9617 !PL_get_atom_ex(pattern, &p) )
9618 return FALSE;
9619
9620 return atom_lang_matches(l, p);
9621 }
9622
9623
9624 static foreign_t
rdf_compare(term_t dif,term_t a,term_t b)9625 rdf_compare(term_t dif, term_t a, term_t b)
9626 { triple ta, tb;
9627 rdf_db *db = rdf_current_db();
9628 int rc;
9629
9630 memset(&ta, 0, sizeof(ta));
9631 memset(&tb, 0, sizeof(tb));
9632 if ( get_object(db, a, &ta) &&
9633 get_object(db, b, &tb) )
9634 { int d;
9635 atom_t ad;
9636
9637 if ( ta.object_is_literal &&
9638 tb.object_is_literal )
9639 { literal_ex lex;
9640 lex.literal = ta.object.literal;
9641 prepare_literal_ex(&lex);
9642 d = compare_literals(&lex, tb.object.literal);
9643 } else if ( !ta.object_is_literal && !tb.object_is_literal )
9644 { d = cmp_atoms(ta.object.resource, tb.object.resource);
9645 } else
9646 { d = ta.object_is_literal ? -1 : 1;
9647 }
9648
9649 ad = d < 0 ? ATOM_lt : d > 0 ? ATOM_gt : ATOM_eq;
9650
9651 rc = PL_unify_atom(dif, ad);
9652 } else
9653 { rc = FALSE;
9654 }
9655
9656 free_triple(db, &ta, FALSE);
9657 free_triple(db, &tb, FALSE);
9658
9659 return rc;
9660 }
9661
9662
9663 /*******************************
9664 * TEST *
9665 *******************************/
9666
9667 static foreign_t
rdf_is_bnode(term_t t)9668 rdf_is_bnode(term_t t)
9669 { size_t len;
9670 char *s;
9671
9672 if ( PL_get_nchars(t, &len, &s, CVT_ATOM) &&
9673 s[0] == '_' && (s[1] == ':' || s[1] == '_') )
9674 return TRUE;
9675
9676 return FALSE;
9677 }
9678
9679
9680 /*******************************
9681 * VERSION *
9682 *******************************/
9683
9684 static foreign_t
rdf_version(term_t v)9685 rdf_version(term_t v)
9686 { return PL_unify_integer(v, RDF_VERSION);
9687 }
9688
9689
9690 /*******************************
9691 * REGISTER *
9692 *******************************/
9693
9694 #define MKFUNCTOR(n, a) \
9695 FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a)
9696 #define NDET PL_FA_NONDETERMINISTIC
9697 #define META PL_FA_TRANSPARENT
9698
9699 install_t
install_rdf_db(void)9700 install_rdf_db(void)
9701 { int i=0;
9702 extern install_t install_atom_map(void);
9703
9704 simpleMutexInit(&rdf_lock);
9705 init_errors();
9706 register_resource_predicates();
9707
9708 MKFUNCTOR(literal, 1);
9709 MKFUNCTOR(triples, 1);
9710 MKFUNCTOR(triples, 2);
9711 MKFUNCTOR(resources, 1);
9712 MKFUNCTOR(predicates, 1);
9713 MKFUNCTOR(subject, 1);
9714 MKFUNCTOR(predicate, 1);
9715 MKFUNCTOR(object, 1);
9716 MKFUNCTOR(graph, 1);
9717 MKFUNCTOR(indexed, 16);
9718 MKFUNCTOR(exact, 1);
9719 MKFUNCTOR(icase, 1);
9720 MKFUNCTOR(plain, 1);
9721 MKFUNCTOR(substring, 1);
9722 MKFUNCTOR(word, 1);
9723 MKFUNCTOR(prefix, 1);
9724 MKFUNCTOR(like, 1);
9725 MKFUNCTOR(lt, 1);
9726 MKFUNCTOR(le, 1);
9727 MKFUNCTOR(between, 2);
9728 MKFUNCTOR(eq, 1);
9729 MKFUNCTOR(ge, 1);
9730 MKFUNCTOR(gt, 1);
9731 MKFUNCTOR(literal, 2);
9732 MKFUNCTOR(searched_nodes, 1);
9733 MKFUNCTOR(duplicates, 1);
9734 MKFUNCTOR(lingering, 1);
9735 MKFUNCTOR(literals, 1);
9736 MKFUNCTOR(symmetric, 1);
9737 MKFUNCTOR(transitive, 1);
9738 MKFUNCTOR(inverse_of, 1);
9739 MKFUNCTOR(lang, 2);
9740 MKFUNCTOR(type, 2);
9741 MKFUNCTOR(rdf_subject_branch_factor, 1);
9742 MKFUNCTOR(rdf_object_branch_factor, 1);
9743 MKFUNCTOR(rdfs_subject_branch_factor, 1);
9744 MKFUNCTOR(rdfs_object_branch_factor, 1);
9745 MKFUNCTOR(gc, 4);
9746 MKFUNCTOR(graphs, 1);
9747 MKFUNCTOR(assert, 4);
9748 MKFUNCTOR(retract, 4);
9749 MKFUNCTOR(update, 5);
9750 MKFUNCTOR(new_literal, 1);
9751 MKFUNCTOR(old_literal, 1);
9752 MKFUNCTOR(transaction, 2);
9753 MKFUNCTOR(load, 2);
9754 MKFUNCTOR(begin, 1);
9755 MKFUNCTOR(end, 1);
9756 MKFUNCTOR(create_graph, 1);
9757 MKFUNCTOR(hash_quality, 1);
9758 MKFUNCTOR(hash, 3);
9759 MKFUNCTOR(hash, 4);
9760
9761 FUNCTOR_colon2 = PL_new_functor(PL_new_atom(":"), 2);
9762 FUNCTOR_plus2 = PL_new_functor(PL_new_atom("+"), 2);
9763
9764 ATOM_user = PL_new_atom("user");
9765 ATOM_exact = PL_new_atom("exact");
9766 ATOM_icase = PL_new_atom("icase");
9767 ATOM_plain = PL_new_atom("plain");
9768 ATOM_prefix = PL_new_atom("prefix");
9769 ATOM_like = PL_new_atom("like");
9770 ATOM_substring = PL_new_atom("substring");
9771 ATOM_word = PL_new_atom("word");
9772 ATOM_subPropertyOf = PL_new_atom(URL_subPropertyOf);
9773 ATOM_xsdString = PL_new_atom(URL_xsdString);
9774 ATOM_xsdDouble = PL_new_atom(URL_xsdDouble);
9775 ATOM_error = PL_new_atom("error");
9776 ATOM_begin = PL_new_atom("begin");
9777 ATOM_end = PL_new_atom("end");
9778 ATOM_error = PL_new_atom("error");
9779 ATOM_infinite = PL_new_atom("infinite");
9780 ATOM_snapshot = PL_new_atom("snapshot");
9781 ATOM_true = PL_new_atom("true");
9782 ATOM_size = PL_new_atom("size");
9783 ATOM_optimize_threshold = PL_new_atom("optimize_threshold");
9784 ATOM_average_chain_len = PL_new_atom("average_chain_len");
9785 ATOM_reset = PL_new_atom("reset");
9786 ATOM_lt = PL_new_atom("<");
9787 ATOM_eq = PL_new_atom("=");
9788 ATOM_gt = PL_new_atom(">");
9789 ATOM_XSDString = PL_new_atom("http://www.w3.org/2001/XMLSchema#string");
9790
9791 PRED_call1 = PL_predicate("call", 1, "user");
9792
9793 /* statistics */
9794 keys[i++] = FUNCTOR_graphs1;
9795 keys[i++] = FUNCTOR_triples1;
9796 keys[i++] = FUNCTOR_resources1;
9797 keys[i++] = FUNCTOR_indexed16;
9798 keys[i++] = FUNCTOR_hash_quality1;
9799 keys[i++] = FUNCTOR_predicates1;
9800 keys[i++] = FUNCTOR_searched_nodes1;
9801 keys[i++] = FUNCTOR_duplicates1;
9802 keys[i++] = FUNCTOR_lingering1;
9803 keys[i++] = FUNCTOR_literals1;
9804 keys[i++] = FUNCTOR_triples2;
9805 keys[i++] = FUNCTOR_gc4;
9806 keys[i++] = 0;
9807 assert(i<=16);
9808
9809 check_index_tables();
9810 /* see struct triple */
9811 assert(sizeof(literal) <= sizeof(triple*)*INDEX_TABLES);
9812
9813 PL_register_foreign("rdf_version", 1, rdf_version, 0);
9814 PL_register_foreign("rdf_assert", 3, rdf_assert3, 0);
9815 PL_register_foreign("rdf_assert", 4, rdf_assert4, 0);
9816 PL_register_foreign("rdf_update", 4, rdf_update, 0);
9817 PL_register_foreign("rdf_update", 5, rdf_update5, 0);
9818 PL_register_foreign("rdf_retractall", 3, rdf_retractall3, 0);
9819 PL_register_foreign("rdf_retractall", 4, rdf_retractall4, 0);
9820 PL_register_foreign("rdf", 3, rdf3, NDET);
9821 PL_register_foreign("rdf", 4, rdf4, NDET);
9822 PL_register_foreign("rdf_has", 4, rdf_has4, NDET);
9823 PL_register_foreign("rdf_has", 3, rdf_has3, NDET);
9824 PL_register_foreign("rdf_gc_", 0, rdf_gc, 0);
9825 PL_register_foreign("rdf_add_gc_time",1, rdf_add_gc_time, 0);
9826 PL_register_foreign("rdf_gc_info_", 1, rdf_gc_info, 0);
9827 PL_register_foreign("rdf_statistics_",1, rdf_statistics, NDET);
9828 PL_register_foreign("rdf_set", 1, rdf_set, 0);
9829 PL_register_foreign("rdf_update_duplicates",
9830 0, rdf_update_duplicates, 0);
9831 PL_register_foreign("rdf_warm_indexes",
9832 1, rdf_warm_indexes,0);
9833 PL_register_foreign("rdf_generation", 1, rdf_generation, 0);
9834 PL_register_foreign("rdf_snapshot", 1, rdf_snapshot, 0);
9835 PL_register_foreign("rdf_delete_snapshot", 1, rdf_delete_snapshot, 0);
9836 PL_register_foreign("rdf_match_label",3, match_label, 0);
9837 PL_register_foreign("rdf_save_db_", 3, rdf_save_db, 0);
9838 PL_register_foreign("rdf_load_db_", 3, rdf_load_db, 0);
9839 PL_register_foreign("rdf_reachable", 3, rdf_reachable3, NDET);
9840 PL_register_foreign("rdf_reachable", 5, rdf_reachable5, NDET);
9841 PL_register_foreign("rdf_reset_db_", 0, rdf_reset_db, 0);
9842 PL_register_foreign("rdf_set_predicate",
9843 2, rdf_set_predicate, 0);
9844 PL_register_foreign("rdf_predicate_property_",
9845 2, rdf_predicate_property, NDET);
9846 PL_register_foreign("rdf_current_predicate",
9847 1, rdf_current_predicate, NDET);
9848 PL_register_foreign("rdf_current_literal",
9849 1, rdf_current_literal, NDET);
9850 PL_register_foreign("rdf_graph_", 2, rdf_graph, NDET);
9851 PL_register_foreign("rdf_create_graph", 1, rdf_create_graph, 0);
9852 PL_register_foreign("rdf_destroy_graph", 1, rdf_destroy_graph, 0);
9853 PL_register_foreign("rdf_set_graph_source", 3, rdf_set_graph_source, 0);
9854 PL_register_foreign("rdf_graph_source_", 3, rdf_graph_source, 0);
9855 PL_register_foreign("rdf_estimate_complexity",
9856 4, rdf_estimate_complexity, 0);
9857 PL_register_foreign("rdf_transaction", 3, rdf_transaction, META);
9858 PL_register_foreign("rdf_active_transactions_",
9859 1, rdf_active_transactions, 0);
9860 PL_register_foreign("rdf_monitor_", 2, rdf_monitor, META);
9861 PL_register_foreign("rdf_empty_prefix_cache",
9862 0, pl_empty_prefix_table, 0);
9863 PL_register_foreign("rdf_is_bnode", 1, rdf_is_bnode, 0);
9864 #ifdef WITH_MD5
9865 PL_register_foreign("rdf_md5", 2, rdf_md5, 0);
9866 PL_register_foreign("rdf_graph_modified_", 3, rdf_graph_modified_, 0);
9867 PL_register_foreign("rdf_graph_clear_modified_",
9868 1, rdf_graph_clear_modified_, 0);
9869 PL_register_foreign("rdf_atom_md5", 3, rdf_atom_md5, 0);
9870 #endif
9871
9872 #ifdef O_DEBUG
9873 PL_register_foreign("rdf_debug", 1, rdf_debug, 0);
9874 PL_register_foreign("rdf_print_predicate_cloud", 2,
9875 rdf_print_predicate_cloud, 0);
9876 PL_register_foreign("rdf_checks_literal_references", 1,
9877 rdf_checks_literal_references, 0);
9878 #endif
9879
9880 PL_register_foreign("lang_matches", 2, lang_matches, 0);
9881 PL_register_foreign("rdf_compare", 3, rdf_compare, 0);
9882
9883 install_atom_map();
9884 }
9885