1 /* $Id$
2
3 Part of SWI-Prolog
4
5 Author: Jan Wielemaker
6 E-mail: J.Wielemaker@uva.nl
7 WWW: http://www.swi-prolog.org
8 Copyright (C): 1985-2009, University of Amsterdam
9
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
14
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
19
20 You should have received a copy of the GNU Lesser General Public
21 License along with this library; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25 #ifdef HAVE_CONFIG_H
26 #include <config.h>
27 #endif
28
29 #define WITH_MD5 1
30 #define WITH_PL_MUTEX 1
31 #define _GNU_SOURCE 1 /* get rwlocks from glibc */
32
33 #ifdef _REENTRANT
34 #ifdef __WINDOWS__
35 #include <malloc.h> /* alloca() */
36 #define inline __inline
37 #ifndef SIZEOF_LONG
38 #define SIZEOF_LONG 4
39 #endif
40 #else
41 #if (!defined(__GNUC__) || defined(__hpux)) && defined(HAVE_ALLOCA_H)
42 #include <alloca.h>
43 #endif
44 #include <errno.h>
45 #endif
46 #endif
47
48 #include <SWI-Stream.h>
49 #include <SWI-Prolog.h>
50 #include "rdf_db.h"
51 #include <assert.h>
52 #include <string.h>
53 #include <wchar.h>
54 #include <wctype.h>
55 #include <ctype.h>
56 #include "avl.h"
57 #ifdef WITH_MD5
58 #include "md5.h"
59 #include "atom.h"
60 #include "debug.h"
61 #include "hash.h"
62 #include "murmur.h"
63
64 #undef UNLOCK
65
66 static void md5_triple(triple *t, md5_byte_t *digest);
67 static void sum_digest(md5_byte_t *digest, md5_byte_t *add);
68 static void dec_digest(md5_byte_t *digest, md5_byte_t *add);
69 #endif
70
71
72 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
73 The ids form a mask. This must be kept consistent with monitor_mask/2 in
74 rdf_db.pl!
75 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
76
77 typedef enum
78 { EV_ASSERT = 0x0001, /* triple */
79 EV_ASSERT_LOAD = 0x0002, /* triple */
80 EV_RETRACT = 0x0004, /* triple */
81 EV_UPDATE = 0x0008, /* old, new */
82 EV_NEW_LITERAL = 0x0010, /* literal */
83 EV_OLD_LITERAL = 0x0020, /* literal */
84 EV_TRANSACTION = 0x0040, /* id, begin/end */
85 EV_LOAD = 0x0080, /* id, begin/end */
86 EV_REHASH = 0x0100 /* begin/end */
87 } broadcast_id;
88
89 static int broadcast(broadcast_id id, void *a1, void *a2);
90
91
92 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
93 We now use malloc/free/realloc calls with explicit sizes to allow
94 maintaining statistics as well as to prepare for dealing with special
95 memory pools associated with databases. Using -DDIRECT_MALLOC the
96 library uses plain malloc to facilitate malloc debuggers.
97 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
98
99 #ifdef DIRECT_MALLOC
100
101 #define rdf_malloc(db, size) malloc(size)
102 #define rdf_free(db, ptr, size) free(ptr)
103 #define rdf_realloc(db, ptr, old, new) realloc(ptr, new)
104
105 #else /*DIRECT_MALLOC*/
106
107 #if CHECK_MALLOC_SIZES
108 static void *
rdf_malloc(rdf_db * db,size_t size)109 rdf_malloc(rdf_db *db, size_t size)
110 { size_t bytes = size + sizeof(size_t);
111 size_t *ptr = PL_malloc(bytes);
112
113 *ptr++ = size;
114 if ( db )
115 db->core += size;
116
117 return ptr;
118 }
119
120 static void
rdf_free(rdf_db * db,void * ptr,size_t size)121 rdf_free(rdf_db *db, void *ptr, size_t size)
122 { size_t *p = ptr;
123
124 assert(p[-1] == size);
125
126 db->core -= size;
127 PL_free(&p[-1]);
128 }
129
130
131 static void *
rdf_realloc(rdf_db * db,void * ptr,size_t old,size_t new)132 rdf_realloc(rdf_db *db, void *ptr, size_t old, size_t new)
133 { size_t *p = ptr;
134 size_t bytes = new + sizeof(size_t);
135
136 assert(p[-1] == old);
137 p = PL_realloc(&p[-1], bytes);
138 *p++ = new;
139 db->core< += new-old;
140
141 return p;
142 }
143
144 #else /*CHECK_MALLOC_SIZES*/
145
146 static void *
rdf_malloc(rdf_db * db,size_t size)147 rdf_malloc(rdf_db *db, size_t size)
148 { if ( db )
149 db->core += size;
150
151 return PL_malloc(size);
152 }
153
154 static void
rdf_free(rdf_db * db,void * ptr,size_t size)155 rdf_free(rdf_db *db, void *ptr, size_t size)
156 { db->core -= size;
157
158 PL_free(ptr);
159 }
160
161
162 static void *
rdf_realloc(rdf_db * db,void * ptr,size_t old,size_t new)163 rdf_realloc(rdf_db *db, void *ptr, size_t old, size_t new)
164 { db->core += new-old;
165
166 return PL_realloc(ptr, new);
167 }
168
169 #endif /*CHECK_MALLOC_SIZES*/
170 #endif /*DIRECT_MALLOC*/
171
172 static functor_t FUNCTOR_literal1;
173 static functor_t FUNCTOR_literal2;
174 static functor_t FUNCTOR_error2;
175 static functor_t FUNCTOR_type_error2;
176 static functor_t FUNCTOR_domain_error2;
177 static functor_t FUNCTOR_colon2;
178
179 static functor_t FUNCTOR_triples1;
180 static functor_t FUNCTOR_triples2;
181 static functor_t FUNCTOR_subjects1;
182 static functor_t FUNCTOR_predicates1;
183 static functor_t FUNCTOR_duplicates1;
184 static functor_t FUNCTOR_literals1;
185 static functor_t FUNCTOR_subject1;
186 static functor_t FUNCTOR_predicate1;
187 static functor_t FUNCTOR_object1;
188 static functor_t FUNCTOR_graph1;
189 static functor_t FUNCTOR_indexed8;
190
191 static functor_t FUNCTOR_exact1;
192 static functor_t FUNCTOR_plain1;
193 static functor_t FUNCTOR_substring1;
194 static functor_t FUNCTOR_word1;
195 static functor_t FUNCTOR_prefix1;
196 static functor_t FUNCTOR_like1;
197
198 static functor_t FUNCTOR_symmetric1;
199 static functor_t FUNCTOR_inverse_of1;
200 static functor_t FUNCTOR_transitive1;
201 static functor_t FUNCTOR_rdf_subject_branch_factor1; /* S --> BF*O */
202 static functor_t FUNCTOR_rdf_object_branch_factor1; /* O --> BF*S */
203 static functor_t FUNCTOR_rdfs_subject_branch_factor1; /* S --> BF*O */
204 static functor_t FUNCTOR_rdfs_object_branch_factor1; /* O --> BF*S */
205
206 static functor_t FUNCTOR_searched_nodes1;
207 static functor_t FUNCTOR_lang2;
208 static functor_t FUNCTOR_type2;
209
210 static functor_t FUNCTOR_gc2;
211 static functor_t FUNCTOR_rehash2;
212 static functor_t FUNCTOR_core1;
213
214 static functor_t FUNCTOR_assert4;
215 static functor_t FUNCTOR_retract4;
216 static functor_t FUNCTOR_update5;
217 static functor_t FUNCTOR_new_literal1;
218 static functor_t FUNCTOR_old_literal1;
219 static functor_t FUNCTOR_transaction2;
220 static functor_t FUNCTOR_load2;
221 static functor_t FUNCTOR_rehash1;
222 static functor_t FUNCTOR_begin1;
223 static functor_t FUNCTOR_end1;
224
225 static atom_t ATOM_user;
226 static atom_t ATOM_exact;
227 static atom_t ATOM_plain;
228 static atom_t ATOM_prefix;
229 static atom_t ATOM_substring;
230 static atom_t ATOM_word;
231 static atom_t ATOM_like;
232 static atom_t ATOM_error;
233 static atom_t ATOM_begin;
234 static atom_t ATOM_end;
235 static atom_t ATOM_infinite;
236
237 static atom_t ATOM_subPropertyOf;
238
239 static predicate_t PRED_call1;
240
241 #define MATCH_EXACT 0x01 /* exact triple match */
242 #define MATCH_SUBPROPERTY 0x02 /* Use subPropertyOf relations */
243 #define MATCH_SRC 0x04 /* Match graph location */
244 #define MATCH_INVERSE 0x08 /* use symmetric match too */
245 #define MATCH_QUAL 0x10 /* Match qualifiers too */
246 #define MATCH_DUPLICATE (MATCH_EXACT|MATCH_QUAL)
247
248 static int WANT_GC(rdf_db *db);
249 static int match_triples(triple *t, triple *p, unsigned flags);
250 static int update_duplicates_add(rdf_db *db, triple *t);
251 static void update_duplicates_del(rdf_db *db, triple *t);
252 static void unlock_atoms(triple *t);
253 static void lock_atoms(triple *t);
254 static void unlock_atoms_literal(literal *lit);
255 static int update_hash(rdf_db *db);
256 static int triple_hash(rdf_db *db, triple *t, int which);
257 static unsigned long object_hash(triple *t);
258 static void reset_db(rdf_db *db);
259
260 static void record_transaction(rdf_db *db,
261 tr_type type, triple *t);
262 static void record_md5_transaction(rdf_db *db,
263 graph *src, md5_byte_t *digest);
264 static void create_reachability_matrix(rdf_db *db, predicate_cloud *cloud);
265 static int get_predicate(rdf_db *db, term_t t, predicate **p);
266 static predicate_cloud *new_predicate_cloud(rdf_db *db, predicate **p, size_t count);
267 static int unify_literal(term_t lit, literal *l);
268 static int check_predicate_cloud(predicate_cloud *c);
269
270
271 /*******************************
272 * LOCKING *
273 *******************************/
274
275 #define RDLOCK(db) rdlock(&db->lock)
276 #define WRLOCK(db, allowreaders) wrlock(&db->lock, allowreaders)
277 #define LOCKOUT_READERS(db) lockout_readers(&db->lock)
278 #define REALLOW_READERS(db) reallow_readers(&db->lock)
279 #define WRUNLOCK(db) unlock(&db->lock, FALSE)
280 #define RDUNLOCK(db) unlock(&db->lock, TRUE)
281 #define LOCK_MISC(db) lock_misc(&db->lock)
282 #define UNLOCK_MISC(db) unlock_misc(&db->lock)
283 #define INIT_LOCK(db) init_lock(&db->lock)
284
285
286 /*******************************
287 * ERRORS *
288 *******************************/
289
290 static int
instantiation_error(term_t actual)291 instantiation_error(term_t actual)
292 { term_t ex;
293
294 if ( (ex = PL_new_term_ref()) &&
295 PL_unify_term(ex,
296 PL_FUNCTOR, FUNCTOR_error2,
297 PL_CHARS, "instantiation_error",
298 PL_VARIABLE) )
299 return PL_raise_exception(ex);
300
301 return FALSE;
302 }
303
304
305 static int
type_error(term_t actual,const char * expected)306 type_error(term_t actual, const char *expected)
307 { term_t ex;
308
309 if ( (ex = PL_new_term_ref()) &&
310 PL_unify_term(ex,
311 PL_FUNCTOR, FUNCTOR_error2,
312 PL_FUNCTOR, FUNCTOR_type_error2,
313 PL_CHARS, expected,
314 PL_TERM, actual,
315 PL_VARIABLE) )
316 return PL_raise_exception(ex);
317
318 return FALSE;
319 }
320
321
322 static int
domain_error(term_t actual,const char * expected)323 domain_error(term_t actual, const char *expected)
324 { term_t ex;
325
326 if ( (ex = PL_new_term_ref()) &&
327 PL_unify_term(ex,
328 PL_FUNCTOR, FUNCTOR_error2,
329 PL_FUNCTOR, FUNCTOR_domain_error2,
330 PL_CHARS, expected,
331 PL_TERM, actual,
332 PL_VARIABLE) )
333 return PL_raise_exception(ex);
334
335 return FALSE;
336 }
337
338
339 static int
permission_error(const char * op,const char * type,const char * obj,const char * msg)340 permission_error(const char *op, const char *type, const char *obj,
341 const char *msg)
342 { term_t ex, ctx;
343
344 if ( !(ex = PL_new_term_ref()) ||
345 !(ctx = PL_new_term_ref()) )
346 return FALSE;
347
348 if ( msg )
349 { if ( !PL_unify_term(ctx, PL_FUNCTOR_CHARS, "context", 2,
350 PL_VARIABLE,
351 PL_CHARS, msg) )
352 return FALSE;
353 }
354
355 if ( !PL_unify_term(ex, PL_FUNCTOR_CHARS, "error", 2,
356 PL_FUNCTOR_CHARS, "permission_error", 3,
357 PL_CHARS, op,
358 PL_CHARS, type,
359 PL_CHARS, obj,
360 PL_TERM, ctx) )
361 return FALSE;
362
363 return PL_raise_exception(ex);
364 }
365
366
367 static int
get_atom_ex(term_t t,atom_t * a)368 get_atom_ex(term_t t, atom_t *a)
369 { if ( PL_get_atom(t, a) )
370 return TRUE;
371
372 return type_error(t, "atom");
373 }
374
375
376 static int
get_long_ex(term_t t,long * v)377 get_long_ex(term_t t, long *v)
378 { if ( PL_get_long(t, v) )
379 return TRUE;
380
381 return type_error(t, "integer");
382 }
383
384
385 static int
get_double_ex(term_t t,double * v)386 get_double_ex(term_t t, double *v)
387 { if ( PL_get_float(t, v) )
388 return TRUE;
389
390 return type_error(t, "float");
391 }
392
393
394 static int
get_atom_or_var_ex(term_t t,atom_t * a)395 get_atom_or_var_ex(term_t t, atom_t *a)
396 { if ( PL_get_atom(t, a) )
397 return TRUE;
398 if ( PL_is_variable(t) )
399 { *a = 0L;
400 return TRUE;
401 }
402
403 return type_error(t, "atom");
404 }
405
406
407 static int
get_resource_or_var_ex(term_t t,atom_t * a)408 get_resource_or_var_ex(term_t t, atom_t *a)
409 { if ( PL_get_atom(t, a) )
410 return TRUE;
411 if ( PL_is_variable(t) )
412 { *a = 0L;
413 return TRUE;
414 }
415 if ( PL_is_functor(t, FUNCTOR_literal1) )
416 return FALSE; /* fail on rdf(literal(_), ...) */
417
418 return type_error(t, "atom");
419 }
420
421
422 static int
get_bool_arg_ex(int a,term_t t,int * val)423 get_bool_arg_ex(int a, term_t t, int *val)
424 { term_t arg = PL_new_term_ref();
425
426 if ( !PL_get_arg(a, t, arg) )
427 return type_error(t, "compound");
428 if ( !PL_get_bool(arg, val) )
429 return type_error(arg, "bool");
430
431 return TRUE;
432 }
433
434
435
436 /*******************************
437 * DEBUG SUPPORT *
438 *******************************/
439
440 #ifdef O_DEBUG
441
442 #define PRT_SRC 0x1
443
444 static void
print_literal(literal * lit)445 print_literal(literal *lit)
446 { switch(lit->objtype)
447 { case OBJ_STRING:
448 switch(lit->qualifier)
449 { case Q_TYPE:
450 Sdprintf("%s^^\"%s\"",
451 PL_atom_chars(lit->value.string),
452 PL_atom_chars(lit->type_or_lang));
453 break;
454 case Q_LANG:
455 Sdprintf("%s@\"%s\"",
456 PL_atom_chars(lit->value.string),
457 PL_atom_chars(lit->type_or_lang));
458 break;
459 default:
460 { size_t len;
461 const char *s;
462 const wchar_t *w;
463
464 if ( (s = PL_atom_nchars(lit->value.string, &len)) )
465 { if ( strlen(s) == len )
466 Sdprintf("\"%s\"", s);
467 else
468 Sdprintf("\"%s\" (len=%d)", s, len);
469 } else if ( (w = PL_atom_wchars(lit->value.string, &len)) )
470 { unsigned int i;
471 Sputc('L', Serror);
472 Sputc('"', Serror);
473 for(i=0; i<len; i++)
474 { if ( w[i] < 0x7f )
475 Sputc(w[i], Serror);
476 else
477 Sfprintf(Serror, "\\\\u%04x", w[i]);
478 }
479 Sputc('"', Serror);
480 }
481 break;
482 }
483 }
484 break;
485 case OBJ_INTEGER:
486 Sdprintf("%ld", lit->value.integer);
487 break;
488 case OBJ_DOUBLE:
489 Sdprintf("%f", lit->value.real);
490 break;
491 case OBJ_TERM:
492 { fid_t fid = PL_open_foreign_frame();
493 term_t term = PL_new_term_ref();
494
495 PL_recorded_external(lit->value.term.record, term);
496 PL_write_term(Serror, term, 1200,
497 PL_WRT_QUOTED|PL_WRT_NUMBERVARS|PL_WRT_PORTRAY);
498 PL_discard_foreign_frame(fid);
499 break;
500 }
501 default:
502 assert(0);
503 }
504 }
505
506
507 static void
print_object(triple * t)508 print_object(triple *t)
509 { if ( t->object_is_literal )
510 { print_literal(t->object.literal);
511 } else
512 { Sdprintf("%s", PL_atom_chars(t->object.resource));
513 }
514 }
515
516
517 static void
print_src(triple * t)518 print_src(triple *t)
519 { if ( t->line == NO_LINE )
520 Sdprintf(" [%s]", PL_atom_chars(t->graph));
521 else
522 Sdprintf(" [%s:%ld]", PL_atom_chars(t->graph), t->line);
523 }
524
525
526 static void
print_triple(triple * t,int flags)527 print_triple(triple *t, int flags)
528 { Sdprintf("<%s %s ",
529 PL_atom_chars(t->subject),
530 PL_atom_chars(t->predicate.r->name));
531 print_object(t);
532 if ( (flags & PRT_SRC) )
533 print_src(t);
534 Sdprintf(">");
535 }
536
537 #endif
538
539 /*******************************
540 * STORAGE *
541 *******************************/
542
543 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
544 Our one and only database (for the time being).
545 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
546
547 static rdf_db *DB;
548
549
550 /*******************************
551 * LISTS *
552 *******************************/
553
554 static int
add_list(rdf_db * db,list * list,void * value)555 add_list(rdf_db *db, list *list, void *value)
556 { cell *c;
557
558 for(c=list->head; c; c=c->next)
559 { if ( c->value == value )
560 return FALSE; /* already a member */
561 }
562
563 c = rdf_malloc(db, sizeof(*c));
564 c->value = value;
565 c->next = NULL;
566
567 if ( list->tail )
568 list->tail->next = c;
569 else
570 list->head = c;
571
572 list->tail = c;
573
574 return TRUE;
575 }
576
577
578 static int
del_list(rdf_db * db,list * list,void * value)579 del_list(rdf_db *db, list *list, void *value)
580 { cell *c, *p = NULL;
581
582 for(c=list->head; c; p=c, c=c->next)
583 { if ( c->value == value )
584 { if ( p )
585 p->next = c->next;
586 else
587 list->head = c->next;
588
589 if ( !c->next )
590 list->tail = p;
591
592 rdf_free(db, c, sizeof(*c));
593
594 return TRUE;
595 }
596 }
597
598 return FALSE; /* not a member */
599 }
600
601
602 static void
free_list(rdf_db * db,list * list)603 free_list(rdf_db *db, list *list)
604 { cell *c, *n;
605
606 for(c=list->head; c; c=n)
607 { n = c->next;
608 rdf_free(db, c, sizeof(*c));
609 }
610
611 list->head = list->tail = NULL;
612 }
613
614
615 /*******************************
616 * ATOM SETS *
617 *******************************/
618
619
620 #define CHUNKSIZE 1024
621
622 typedef struct mchunk
623 { struct mchunk *next;
624 size_t used;
625 char buf[CHUNKSIZE];
626 } mchunk;
627
628 typedef struct
629 { avl_tree tree;
630 mchunk *node_store;
631 mchunk store0;
632 } atomset;
633
634
635 static void *
alloc_node_atomset(void * ptr,size_t size)636 alloc_node_atomset(void *ptr, size_t size)
637 { void *p;
638 atomset *as = ptr;
639
640 assert(size < CHUNKSIZE);
641
642 if ( as->node_store->used + size > CHUNKSIZE )
643 { mchunk *ch = malloc(sizeof(mchunk));
644
645 ch->used = 0;
646 ch->next = as->node_store;
647 as->node_store = ch;
648 }
649
650 p = &as->node_store->buf[as->node_store->used];
651 as->node_store->used += size;
652
653 return p;
654 }
655
656
657 static void
free_node_atomset(void * ptr,void * data,size_t size)658 free_node_atomset(void *ptr, void *data, size_t size)
659 { assert(0);
660 }
661
662
663 static int
cmp_long_ptr(void * p1,void * p2,NODE type)664 cmp_long_ptr(void *p1, void *p2, NODE type)
665 { long *l1 = p1;
666 long *l2 = p2;
667
668 return *l1 < *l2 ? -1 : *l1 > *l2 ? 1 : 0;
669 }
670
671
672 static void
init_atomset(atomset * as)673 init_atomset(atomset *as)
674 { avlinit(&as->tree, as, sizeof(atom_t),
675 cmp_long_ptr,
676 NULL,
677 alloc_node_atomset,
678 free_node_atomset);
679
680 as->node_store = &as->store0;
681 as->node_store->next = NULL;
682 as->node_store->used = 0;
683 }
684
685
686 static void
destroy_atomset(atomset * as)687 destroy_atomset(atomset *as)
688 { mchunk *ch, *next;
689
690 for(ch=as->node_store; ch != &as->store0; ch = next)
691 { next = ch->next;
692 free(ch);
693 }
694 }
695
696
697 static int
add_atomset(atomset * as,atom_t atom)698 add_atomset(atomset *as, atom_t atom)
699 { return avlins(&as->tree, &atom) ? FALSE : TRUE;
700 }
701
702
703 /*******************************
704 * PREDICATES *
705 *******************************/
706
707 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
708 Predicates are represented as first class citizens for three reasons:
709 quickly answer on the transitive rdfs:subPropertyOf relation for
710 rdf_hash/3, keep track of statistics that are useful for query
711 optimization (#triples, branching factor) and keep properties
712 (inverse/transitive).
713
714 To answer the rdfs:subPropertyOf quickly, predicates are organised in
715 `clouds', where a cloud defines a set of predicates connected through
716 rdfs:subPropertyOf triples. The cloud numbers its members and maintains
717 a bit-matrix that contains the closure of the reachability. Initially a
718 predicate has a simple cloud of size 1. merge_clouds() and split_cloud()
719 deals with adding and deleting rdfs:subPropertyOf relations. These
720 operations try to modify the clouds that have no triples, so it can be
721 done without a rehash. If this fails, the predicates keep their own hash
722 to make search without rdfs:subPropertyOf still possible (so we can
723 avoid frequent updates while loading triples), sets the cloud `dirty'
724 flag and the DB's need_update flag. Queries that need rdfs:subPropertyOf
725 find the need_update flag, which calls organise_predicates(), which
726 cause a rehash if some predicates have changed hash-code to the new
727 cloud they have become part of.
728
729 TBD: We can do a partial re-hash in that case!
730 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
731
732
733 static void
init_pred_table(rdf_db * db)734 init_pred_table(rdf_db *db)
735 { int bytes = sizeof(predicate*)*INITIAL_PREDICATE_TABLE_SIZE;
736
737 db->pred_table = rdf_malloc(db, bytes);
738 memset(db->pred_table, 0, bytes);
739 db->pred_table_size = INITIAL_PREDICATE_TABLE_SIZE;
740 }
741
742
743 static predicate *
existing_predicate(rdf_db * db,atom_t name)744 existing_predicate(rdf_db *db, atom_t name)
745 { int hash = atom_hash(name) % db->pred_table_size;
746 predicate *p;
747
748 LOCK_MISC(db);
749 for(p=db->pred_table[hash]; p; p = p->next)
750 { if ( p->name == name )
751 { UNLOCK_MISC(db);
752 return p;
753 }
754 }
755
756 UNLOCK_MISC(db);
757 return NULL;
758 }
759
760
761 static predicate *
lookup_predicate(rdf_db * db,atom_t name)762 lookup_predicate(rdf_db *db, atom_t name)
763 { int hash = atom_hash(name) % db->pred_table_size;
764 predicate *p;
765 predicate_cloud *cp;
766
767 LOCK_MISC(db);
768 for(p=db->pred_table[hash]; p; p = p->next)
769 { if ( p->name == name )
770 { UNLOCK_MISC(db);
771 return p;
772 }
773 }
774 p = rdf_malloc(db, sizeof(*p));
775 memset(p, 0, sizeof(*p));
776 p->name = name;
777 cp = new_predicate_cloud(db, &p, 1);
778 p->hash = cp->hash;
779 PL_register_atom(name);
780 p->next = db->pred_table[hash];
781 db->pred_table[hash] = p;
782 db->pred_count++;
783 DEBUG(5, Sdprintf("Pred %s (count = %d)\n",
784 PL_atom_chars(name), db->pred_count));
785 UNLOCK_MISC(db);
786
787 return p;
788 }
789
790
791 static const char *
pname(predicate * p)792 pname(predicate *p)
793 { if ( p->name )
794 return PL_atom_chars(p->name);
795 else
796 { static char *ring[10];
797 static int ri = 0;
798 char buf[25];
799 char *r;
800
801 Ssprintf(buf, "__D%p", p);
802 ring[ri++] = r = strdup(buf);
803 if ( ri == 10 )
804 { ri = 0;
805 free(ring[ri]);
806 }
807
808 return (const char*)r;
809 }
810 }
811
812
813 static int
organise_predicates(rdf_db * db)814 organise_predicates(rdf_db *db) /* TBD: rename&move */
815 { predicate **ht;
816 int i;
817 int changed = 0;
818
819 DEBUG(2, Sdprintf("rdf_db: fixing predicate clouds\n"));
820
821 for(i=0,ht = db->pred_table; i<db->pred_table_size; i++, ht++)
822 { predicate *p;
823
824 for( p = *ht; p; p = p->next )
825 { predicate_cloud *cloud = p->cloud;
826
827 if ( cloud->dirty )
828 { predicate **cp;
829 int i2;
830
831 for(i2=0, cp = cloud->members; i2 < cloud->size; i2++, cp++)
832 { if ( (*cp)->hash != cloud->hash )
833 { (*cp)->hash = cloud->hash;
834 if ( (*cp)->triple_count > 0 )
835 changed++;
836 }
837 }
838 cloud->dirty = FALSE;
839 }
840 }
841 }
842
843 return changed;
844 }
845
846
847 /*******************************
848 * PREDICATE CLOUDS *
849 *******************************/
850
851 static predicate_cloud *
new_predicate_cloud(rdf_db * db,predicate ** p,size_t count)852 new_predicate_cloud(rdf_db *db, predicate **p, size_t count)
853 { predicate_cloud *cloud = rdf_malloc(db, sizeof(*cloud));
854
855 memset(cloud, 0, sizeof(*cloud));
856 cloud->hash = db->next_hash++;
857 if ( count )
858 { int i;
859 predicate **p2;
860
861 cloud->size = count;
862 cloud->members = rdf_malloc(db, sizeof(predicate*)*count);
863 memcpy(cloud->members, p, sizeof(predicate*)*count);
864
865 for(i=0, p2=cloud->members; i<cloud->size; i++, p2++)
866 (*p2)->cloud = cloud;
867 }
868 create_reachability_matrix(db, cloud);
869
870 return cloud;
871 }
872
873
874 static void
free_predicate_cloud(rdf_db * db,predicate_cloud * cloud)875 free_predicate_cloud(rdf_db *db, predicate_cloud *cloud)
876 { if ( cloud->members )
877 { rdf_free(db, cloud->members, sizeof(predicate*)*cloud->size);
878 }
879
880 rdf_free(db, cloud, sizeof(*cloud));
881 }
882
883
884 static long
triples_in_predicate_cloud(predicate_cloud * cloud)885 triples_in_predicate_cloud(predicate_cloud *cloud)
886 { long triples = 0;
887 predicate **p;
888 int i;
889
890 for(i=0, p=cloud->members; i<cloud->size; i++, p++)
891 triples += (*p)->triple_count;
892
893 return triples;
894 }
895
896
897 /* Add the predicates of c2 to c1 and destroy c2. Returns c1 */
898
899 static predicate_cloud *
append_clouds(rdf_db * db,predicate_cloud * c1,predicate_cloud * c2,int update_hash)900 append_clouds(rdf_db *db, predicate_cloud *c1, predicate_cloud *c2, int update_hash)
901 { predicate **p;
902 int i;
903
904 for(i=0, p=c2->members; i<c2->size; i++, p++)
905 { (*p)->cloud = c1;
906 if ( update_hash )
907 (*p)->hash = c1->hash;
908 }
909
910 if ( c1->size > 0 && c2->size > 0 )
911 { c1->members = rdf_realloc(db, c1->members,
912 c1->size*sizeof(predicate*),
913 (c1->size+c2->size)*sizeof(predicate*));
914 memcpy(&c1->members[c1->size], c2->members, c2->size*sizeof(predicate*));
915 c1->size += c2->size;
916 free_predicate_cloud(db, c2);
917 } else if ( c2->size > 0 )
918 { c1->members = c2->members;
919 c1->size = c2->size;
920 c2->members = NULL;
921 free_predicate_cloud(db, c2);
922 } else
923 { free_predicate_cloud(db, c2);
924 }
925
926 return c1;
927 }
928
929
930 /* merge two predicate clouds. If either of them has no triples we
931 can do the merge without rehashing the database. Note that this
932 code is only called from addSubPropertyOf(). If c1==c2, we added
933 an rdfs:subPropertyOf between two predicates in the same cloud.
934 we must still update the matrix, though we could do it a bit more
935 efficient. I doubt this is worth the trouble though.
936 */
937
938 static predicate_cloud *
merge_clouds(rdf_db * db,predicate_cloud * c1,predicate_cloud * c2)939 merge_clouds(rdf_db *db, predicate_cloud *c1, predicate_cloud *c2)
940 { predicate_cloud *cloud;
941
942 if ( c1 != c2 )
943 { if ( triples_in_predicate_cloud(c1) == 0 )
944 { cloud = append_clouds(db, c2, c1, TRUE);
945 } else if ( triples_in_predicate_cloud(c2) == 0 )
946 { cloud = append_clouds(db, c1, c2, TRUE);
947 } else
948 { cloud = append_clouds(db, c1, c2, FALSE);
949 cloud->dirty = TRUE;
950 db->need_update++;
951 }
952 } else
953 { cloud = c1;
954 }
955
956 DEBUG(1, if ( !db->need_update )
957 { check_predicate_cloud(cloud);
958 });
959
960 create_reachability_matrix(db, cloud);
961
962 return cloud;
963 }
964
965
966 /* split a cloud into multiple disjoint clouds. The first cloud is
967 given the hash of the original, so we only need to update if new
968 clouds are created. Ideally we should se whether it is possible
969 to give the orginal hash to the one and only non-empty cloud to
970 avoid re-hashing alltogether.
971 */
972
973 static void
pred_reachable(predicate * start,char * visited,predicate ** nodes,int * size)974 pred_reachable(predicate *start, char *visited, predicate **nodes, int *size)
975 { if ( !visited[start->label] )
976 { cell *c;
977
978 visited[start->label] = TRUE;
979 nodes[(*size)++] = start;
980 for(c=start->subPropertyOf.head; c; c=c->next)
981 pred_reachable(c->value, visited, nodes, size);
982 for(c=start->siblings.head; c; c=c->next)
983 pred_reachable(c->value, visited, nodes, size);
984 }
985 }
986
987
988 static int
split_cloud(rdf_db * db,predicate_cloud * cloud,predicate_cloud ** parts,int size)989 split_cloud(rdf_db *db, predicate_cloud *cloud,
990 predicate_cloud **parts, int size)
991 { char *done = alloca(cloud->size*sizeof(char));
992 predicate **graph = alloca(cloud->size*sizeof(predicate*));
993 int found = 0;
994 int i;
995
996 memset(done, 0, cloud->size*sizeof(char));
997 for(i=0; i<cloud->size; i++)
998 { if ( !done[i] )
999 { predicate *start = cloud->members[i];
1000 predicate_cloud *new_cloud;
1001 int gsize = 0;
1002
1003 pred_reachable(start, done, graph, &gsize);
1004 new_cloud = new_predicate_cloud(db, graph, gsize);
1005 if ( found == 0 )
1006 { new_cloud->hash = cloud->hash;
1007 } else
1008 { new_cloud->dirty = TRUE; /* preds come from another cloud */
1009 db->need_update++;
1010 }
1011 parts[found++] = new_cloud;
1012 }
1013 }
1014
1015 free_predicate_cloud(db, cloud);
1016
1017 return found;
1018 }
1019
1020
1021 static unsigned long
predicate_hash(predicate * p)1022 predicate_hash(predicate *p)
1023 { return p->hash;
1024 }
1025
1026
1027 static void
addSubPropertyOf(rdf_db * db,predicate * sub,predicate * super)1028 addSubPropertyOf(rdf_db *db, predicate *sub, predicate *super)
1029 { /*DEBUG(2, Sdprintf("addSubPropertyOf(%s, %s)\n", pname(sub), pname(super)));*/
1030
1031 if ( add_list(db, &sub->subPropertyOf, super) )
1032 { add_list(db, &super->siblings, sub);
1033 merge_clouds(db, sub->cloud, super->cloud);
1034 }
1035 }
1036
1037
1038 /* deleting an rdfs:subPropertyOf. This is a bit naughty. If the
1039 cloud is still connected we only need to refresh the reachability
1040 matrix. Otherwise the cloud breaks in maximum two clusters. We
1041 can decide to leave it as is, which saves a re-hash of the triples
1042 but harms indexing. Alternative we can create a new cloud for one
1043 of the clusters and re-hash.
1044 */
1045
1046 static void
delSubPropertyOf(rdf_db * db,predicate * sub,predicate * super)1047 delSubPropertyOf(rdf_db *db, predicate *sub, predicate *super)
1048 { if ( del_list(db, &sub->subPropertyOf, super) )
1049 { del_list(db, &super->siblings, sub);
1050 /* if ( not worth the trouble )
1051 create_reachability_matrix(db, sub->cloud);
1052 else */
1053 { predicate_cloud *parts[2];
1054 split_cloud(db, sub->cloud, parts, 2);
1055 }
1056 }
1057 }
1058
1059
1060 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1061 Reachability matrix.
1062 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1063
1064 #define WBITSIZE (sizeof(int)*8)
1065
1066 static size_t
byte_size_bitmatrix(size_t w,size_t h)1067 byte_size_bitmatrix(size_t w, size_t h)
1068 { size_t wsize = ((w*h)+WBITSIZE-1)/WBITSIZE;
1069
1070 return (size_t)(intptr_t)&((bitmatrix*)NULL)->bits[wsize];
1071 }
1072
1073
1074 static bitmatrix *
alloc_bitmatrix(rdf_db * db,size_t w,size_t h)1075 alloc_bitmatrix(rdf_db *db, size_t w, size_t h)
1076 { size_t size = byte_size_bitmatrix(w, h);
1077 bitmatrix *m = rdf_malloc(db, size);
1078
1079 memset(m, 0, size);
1080 m->width = w;
1081 m->heigth = h;
1082
1083 return m;
1084 }
1085
1086
1087 static void
free_bitmatrix(rdf_db * db,bitmatrix * bm)1088 free_bitmatrix(rdf_db *db, bitmatrix *bm)
1089 { size_t size = byte_size_bitmatrix(bm->width, bm->heigth);
1090
1091 rdf_free(db, bm, size);
1092 }
1093
1094
1095 #undef setbit /* conflict in HPUX 11.23 */
1096
1097 static void
setbit(bitmatrix * m,int i,int j)1098 setbit(bitmatrix *m, int i, int j)
1099 { size_t ij = m->width*i+j;
1100 size_t word = ij/WBITSIZE;
1101 int bit = ij%WBITSIZE;
1102
1103 m->bits[word] |= 1<<bit;
1104 }
1105
1106
1107 static int
testbit(bitmatrix * m,int i,int j)1108 testbit(bitmatrix *m, int i, int j)
1109 { size_t ij = m->width*i+j;
1110 size_t word = ij/WBITSIZE;
1111 int bit = ij%WBITSIZE;
1112
1113 return ((m->bits[word] & (1<<bit)) != 0);
1114 }
1115
1116
1117 static int
label_predicate_cloud(predicate_cloud * cloud)1118 label_predicate_cloud(predicate_cloud *cloud)
1119 { predicate **p;
1120 int i;
1121
1122 for(i=0, p=cloud->members; i<cloud->size; i++, p++)
1123 (*p)->label = i;
1124
1125 return i;
1126 }
1127
1128
1129 static void
fill_reachable(bitmatrix * bm,predicate * p0,predicate * p)1130 fill_reachable(bitmatrix *bm, predicate *p0, predicate *p)
1131 { if ( !testbit(bm, p0->label, p->label) )
1132 { cell *c;
1133
1134 DEBUG(1, Sdprintf(" Reachable [%s (%d)]\n", pname(p), p->label));
1135 setbit(bm, p0->label, p->label);
1136 for(c = p->subPropertyOf.head; c; c=c->next)
1137 fill_reachable(bm, p0, c->value);
1138 }
1139 }
1140
1141
1142 static void
create_reachability_matrix(rdf_db * db,predicate_cloud * cloud)1143 create_reachability_matrix(rdf_db *db, predicate_cloud *cloud)
1144 { bitmatrix *m = alloc_bitmatrix(db, cloud->size, cloud->size);
1145 predicate **p;
1146 int i;
1147
1148 label_predicate_cloud(cloud);
1149 for(i=0, p=cloud->members; i<cloud->size; i++, p++)
1150 { DEBUG(1, Sdprintf("Reachability for %s (%d)\n", pname(*p), (*p)->label));
1151
1152 fill_reachable(m, *p, *p);
1153 }
1154
1155 if ( cloud->reachable )
1156 free_bitmatrix(db, cloud->reachable);
1157
1158 cloud->reachable = m;
1159 }
1160
1161
1162 static int
isSubPropertyOf(predicate * sub,predicate * p)1163 isSubPropertyOf(predicate *sub, predicate *p)
1164 { if ( sub->cloud == p->cloud )
1165 return testbit(sub->cloud->reachable, sub->label, p->label);
1166
1167 return FALSE;
1168 }
1169
1170 /*******************************
1171 * PRINT PREDICATE HIERARCHY *
1172 *******************************/
1173
1174 static int
check_predicate_cloud(predicate_cloud * c)1175 check_predicate_cloud(predicate_cloud *c)
1176 { predicate **p;
1177 int errors = 0;
1178 int i;
1179
1180 DEBUG(1, if ( c->dirty ) Sdprintf("Cloud is dirty\n"));
1181
1182 for(i=0, p=c->members; i<c->size; i++, p++)
1183 { if ( !c->dirty )
1184 { if ( (*p)->hash != c->hash )
1185 { Sdprintf("Hash of %s doesn't match cloud hash\n", pname(*p));
1186 errors++;
1187 }
1188 }
1189 if ( (*p)->cloud != c )
1190 { Sdprintf("Wrong cloud of %s\n", pname(*p));
1191 errors++;
1192 }
1193 }
1194
1195 return errors;
1196 }
1197
1198
1199 static void
print_reachability_cloud(predicate * p)1200 print_reachability_cloud(predicate *p)
1201 { int x, y;
1202 predicate_cloud *cloud = p->cloud;
1203
1204 check_predicate_cloud(cloud);
1205
1206 Sdprintf("Reachability matrix:\n");
1207 for(x=0; x<cloud->reachable->width; x++)
1208 Sdprintf("%d", x%10);
1209 Sdprintf("\n");
1210 for(y=0; y<cloud->reachable->heigth; y++)
1211 { for(x=0; x<cloud->reachable->width; x++)
1212 { if ( testbit(cloud->reachable, x, y) )
1213 Sdprintf("X");
1214 else
1215 Sdprintf(".");
1216 }
1217
1218 Sdprintf(" %2d %s\n", y, PL_atom_chars(cloud->members[y]->name));
1219 assert(cloud->members[y]->label == y);
1220 }
1221 }
1222
1223
1224 static foreign_t
rdf_print_predicate_cloud(term_t t)1225 rdf_print_predicate_cloud(term_t t)
1226 { predicate *p;
1227 rdf_db *db = DB;
1228
1229 if ( !get_predicate(db, t, &p) )
1230 return FALSE;
1231
1232 print_reachability_cloud(p);
1233
1234 return TRUE;
1235 }
1236
1237
1238
1239 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1240 Branching factors are crucial in ordering the statements of a
1241 conjunction. These functions compute the average branching factor in
1242 both directions ("subject --> P --> object" and "object --> P -->
1243 subject") by determining the number of unique values at either side of
1244 the predicate. This number is only recomputed if it is considered
1245 `dirty'.
1246 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1247
1248 static int
update_predicate_counts(rdf_db * db,predicate * p,int which)1249 update_predicate_counts(rdf_db *db, predicate *p, int which)
1250 { long total = 0;
1251
1252 if ( which == DISTINCT_DIRECT )
1253 { long changed = abs(p->triple_count - p->distinct_updated[DISTINCT_DIRECT]);
1254
1255 if ( changed < p->distinct_updated[DISTINCT_DIRECT] )
1256 return TRUE;
1257
1258 if ( p->triple_count == 0 )
1259 { p->distinct_count[which] = 0;
1260 p->distinct_subjects[which] = 0;
1261 p->distinct_objects[which] = 0;
1262
1263 return TRUE;
1264 }
1265 } else
1266 { long changed = db->generation - p->distinct_updated[DISTINCT_SUB];
1267
1268 if ( changed < p->distinct_count[DISTINCT_SUB] )
1269 return TRUE;
1270 }
1271
1272 if ( !update_hash(db) )
1273 return FALSE;
1274
1275 { atomset subject_set;
1276 atomset object_set;
1277 triple t;
1278 triple *byp;
1279
1280 memset(&t, 0, sizeof(t));
1281 t.predicate.r = p;
1282 t.indexed |= BY_P;
1283
1284 init_atomset(&subject_set);
1285 init_atomset(&object_set);
1286 for(byp = db->table[t.indexed][triple_hash(db, &t, t.indexed)];
1287 byp;
1288 byp = byp->next[t.indexed])
1289 { if ( !byp->erased && !byp->is_duplicate )
1290 { if ( (which == DISTINCT_DIRECT && byp->predicate.r == p) ||
1291 (which != DISTINCT_DIRECT && isSubPropertyOf(byp->predicate.r, p)) )
1292 { total++;
1293 add_atomset(&subject_set, byp->subject);
1294 add_atomset(&object_set, object_hash(byp)); /* NOTE: not exact! */
1295 }
1296 }
1297 }
1298
1299 p->distinct_count[which] = total;
1300 p->distinct_subjects[which] = subject_set.tree.count;
1301 p->distinct_objects[which] = object_set.tree.count;
1302
1303 destroy_atomset(&subject_set);
1304 destroy_atomset(&object_set);
1305
1306 if ( which == DISTINCT_DIRECT )
1307 p->distinct_updated[DISTINCT_DIRECT] = total;
1308 else
1309 p->distinct_updated[DISTINCT_SUB] = db->generation;
1310
1311 DEBUG(1, Sdprintf("%s: distinct subjects (%s): %ld, objects: %ld\n",
1312 PL_atom_chars(p->name),
1313 (which == DISTINCT_DIRECT ? "rdf" : "rdfs"),
1314 p->distinct_subjects[which],
1315 p->distinct_objects[which]));
1316 }
1317
1318 return TRUE;
1319 }
1320
1321
1322 static void
invalidate_distinct_counts(rdf_db * db)1323 invalidate_distinct_counts(rdf_db *db)
1324 { predicate **ht;
1325 int i;
1326
1327 for(i=0,ht = db->pred_table; i<db->pred_table_size; i++, ht++)
1328 { predicate *p;
1329
1330 for( p = *ht; p; p = p->next )
1331 { p->distinct_updated[DISTINCT_SUB] = 0;
1332 p->distinct_count[DISTINCT_SUB] = 0;
1333 p->distinct_subjects[DISTINCT_SUB] = 0;
1334 p->distinct_objects[DISTINCT_SUB] = 0;
1335 }
1336 }
1337 }
1338
1339
1340 static double
subject_branch_factor(rdf_db * db,predicate * p,int which)1341 subject_branch_factor(rdf_db *db, predicate *p, int which)
1342 { if ( !update_predicate_counts(db, p, which) )
1343 return FALSE;
1344
1345 if ( p->distinct_subjects[which] == 0 )
1346 return 0.0; /* 0 --> 0 */
1347
1348 return (double)p->distinct_count[which] /
1349 (double)p->distinct_subjects[which];
1350 }
1351
1352
1353 static double
object_branch_factor(rdf_db * db,predicate * p,int which)1354 object_branch_factor(rdf_db *db, predicate *p, int which)
1355 { if ( !update_predicate_counts(db, p, which) )
1356 return FALSE;
1357
1358 if ( p->distinct_objects[which] == 0 )
1359 return 0.0; /* 0 --> 0 */
1360
1361 return (double)p->distinct_count[which] /
1362 (double)p->distinct_objects[which];
1363 }
1364
1365
1366
1367
1368 /*******************************
1369 * NAMED GRAPHS *
1370 *******************************/
1371
1372 /* MT: all calls must be locked
1373 */
1374
1375 static void
init_graph_table(rdf_db * db)1376 init_graph_table(rdf_db *db)
1377 { int bytes = sizeof(predicate*)*INITIAL_GRAPH_TABLE_SIZE;
1378
1379 db->graph_table = rdf_malloc(db, bytes);
1380 memset(db->graph_table, 0, bytes);
1381 db->graph_table_size = INITIAL_GRAPH_TABLE_SIZE;
1382 }
1383
1384
1385 static graph *
lookup_graph(rdf_db * db,atom_t name,int create)1386 lookup_graph(rdf_db *db, atom_t name, int create)
1387 { int hash = atom_hash(name) % db->graph_table_size;
1388 graph *src;
1389
1390 LOCK_MISC(db);
1391 for(src=db->graph_table[hash]; src; src = src->next)
1392 { if ( src->name == name )
1393 { UNLOCK_MISC(db);
1394 return src;
1395 }
1396 }
1397
1398 if ( !create )
1399 { UNLOCK_MISC(db);
1400 return NULL;
1401 }
1402
1403 src = rdf_malloc(db, sizeof(*src));
1404 memset(src, 0, sizeof(*src));
1405 src->name = name;
1406 src->md5 = TRUE;
1407 PL_register_atom(name);
1408 src->next = db->graph_table[hash];
1409 db->graph_table[hash] = src;
1410 UNLOCK_MISC(db);
1411
1412 return src;
1413 }
1414
1415
1416 static void
erase_graphs(rdf_db * db)1417 erase_graphs(rdf_db *db)
1418 { graph **ht;
1419 int i;
1420
1421 for(i=0,ht = db->graph_table; i<db->graph_table_size; i++, ht++)
1422 { graph *src, *n;
1423
1424 for( src = *ht; src; src = n )
1425 { n = src->next;
1426
1427 PL_unregister_atom(src->name);
1428 if ( src->source )
1429 PL_unregister_atom(src->source);
1430 rdf_free(db, src, sizeof(*src));
1431 }
1432
1433 *ht = NULL;
1434 }
1435
1436 db->last_graph = NULL;
1437 }
1438
1439
1440 static void
register_graph(rdf_db * db,triple * t)1441 register_graph(rdf_db *db, triple *t)
1442 { graph *src;
1443
1444 if ( !t->graph )
1445 return;
1446
1447 if ( db->last_graph && db->last_graph->name == t->graph )
1448 { src = db->last_graph;
1449 } else
1450 { src = lookup_graph(db, t->graph, TRUE);
1451 db->last_graph = src;
1452 }
1453
1454 src->triple_count++;
1455 #ifdef WITH_MD5
1456 if ( src->md5 )
1457 { md5_byte_t digest[16];
1458 md5_triple(t, digest);
1459 sum_digest(src->digest, digest);
1460 }
1461 #endif
1462 }
1463
1464
1465 static void
unregister_graph(rdf_db * db,triple * t)1466 unregister_graph(rdf_db *db, triple *t)
1467 { graph *src;
1468
1469 if ( !t->graph )
1470 return;
1471
1472 if ( db->last_graph && db->last_graph->name == t->graph )
1473 { src = db->last_graph;
1474 } else
1475 { src = lookup_graph(db, t->graph, TRUE);
1476 db->last_graph = src;
1477 }
1478
1479 src->triple_count--;
1480 #ifdef WITH_MD5
1481 if ( src->md5 )
1482 { md5_byte_t digest[16];
1483 md5_triple(t, digest);
1484 dec_digest(src->digest, digest);
1485 }
1486 #endif
1487 }
1488
1489
1490 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1491 rdf_graphs_(-ListOfGraphs)
1492
1493 Return a list holding the names of all currently defined graphs. We
1494 return a list to avoid the need for complicated long locks.
1495 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1496
1497 static foreign_t
rdf_graphs(term_t list)1498 rdf_graphs(term_t list)
1499 { int i;
1500 term_t tail = PL_copy_term_ref(list);
1501 term_t head = PL_new_term_ref();
1502 rdf_db *db = DB;
1503
1504 if ( !RDLOCK(db) )
1505 return FALSE;
1506 for(i=0; i<db->graph_table_size; i++)
1507 { graph *src;
1508
1509 for(src=db->graph_table[i]; src; src = src->next)
1510 { if ( !PL_unify_list(tail, head, tail) ||
1511 !PL_unify_atom(head, src->name) )
1512 { RDUNLOCK(db);
1513 return FALSE;
1514 }
1515 }
1516 }
1517 RDUNLOCK(db);
1518
1519 return PL_unify_nil(tail);
1520 }
1521
1522
1523 static foreign_t
rdf_graph_source(term_t graph_name,term_t source,term_t modified)1524 rdf_graph_source(term_t graph_name, term_t source, term_t modified)
1525 { atom_t gn;
1526 int rc = FALSE;
1527 rdf_db *db = DB;
1528
1529 if ( !get_atom_or_var_ex(graph_name, &gn) )
1530 return FALSE;
1531
1532 if ( gn )
1533 { graph *s;
1534
1535 if ( !RDLOCK(db) )
1536 return FALSE;
1537 if ( (s = lookup_graph(db, gn, FALSE)) && s->source)
1538 { rc = ( PL_unify_atom(source, s->source) &&
1539 PL_unify_float(modified, s->modified) );
1540 }
1541 RDUNLOCK(db);
1542 } else
1543 { atom_t src;
1544
1545 if ( get_atom_ex(source, &src) )
1546 { int i;
1547 graph **ht;
1548
1549 if ( !RDLOCK(db) )
1550 return FALSE;
1551
1552 for(i=0,ht = db->graph_table; i<db->graph_table_size; i++, ht++)
1553 { graph *s;
1554
1555 for( s = *ht; s; s = s->next )
1556 { if ( s->source == src )
1557 { rc = ( PL_unify_atom(graph_name, s->name) &&
1558 PL_unify_float(modified, s->modified) );
1559 }
1560 }
1561 }
1562
1563 RDUNLOCK(db);
1564 }
1565 }
1566
1567 return rc;
1568 }
1569
1570
1571 static foreign_t
rdf_set_graph_source(term_t graph_name,term_t source,term_t modified)1572 rdf_set_graph_source(term_t graph_name, term_t source, term_t modified)
1573 { atom_t gn, src;
1574 int rc = FALSE;
1575 rdf_db *db = DB;
1576 graph *s;
1577 double mtime;
1578
1579 if ( !get_atom_ex(graph_name, &gn) ||
1580 !get_atom_ex(source, &src) ||
1581 !get_double_ex(modified, &mtime) )
1582 return FALSE;
1583
1584 if ( !RDLOCK(db) )
1585 return FALSE;
1586 if ( (s = lookup_graph(db, gn, TRUE)) )
1587 { if ( s->source != src )
1588 { if ( s->source )
1589 PL_unregister_atom(s->source);
1590 s->source = src;
1591 PL_register_atom(s->source);
1592 }
1593 s->modified = mtime;
1594 rc = TRUE;
1595 }
1596 RDUNLOCK(db);
1597
1598 return rc;
1599 }
1600
1601
1602 static foreign_t
rdf_unset_graph_source(term_t graph_name)1603 rdf_unset_graph_source(term_t graph_name)
1604 { atom_t gn;
1605 rdf_db *db = DB;
1606 graph *s;
1607
1608 if ( !get_atom_ex(graph_name, &gn) )
1609 return FALSE;
1610 if ( (s = lookup_graph(db, gn, TRUE)) )
1611 { if ( s->source )
1612 { PL_unregister_atom(s->source);
1613 s->source = 0;
1614 }
1615 s->modified = 0.0;
1616 }
1617 if ( !RDLOCK(db) )
1618 return FALSE;
1619
1620 RDUNLOCK(db);
1621
1622 return TRUE;
1623 }
1624
1625
1626
1627 /*******************************
1628 * LITERALS *
1629 *******************************/
1630
1631 #define LITERAL_EX_MAGIC 0x2b97e881
1632
1633 typedef struct literal_ex
1634 { literal *literal;
1635 atom_info atom;
1636 #ifdef O_SECURE
1637 long magic;
1638 #endif
1639 } literal_ex;
1640
1641
1642 static inline void
prepare_literal_ex(literal_ex * lex)1643 prepare_literal_ex(literal_ex *lex)
1644 { SECURE(lex->magic = 0x2b97e881);
1645
1646 if ( lex->literal->objtype == OBJ_STRING )
1647 { lex->atom.handle = lex->literal->value.string;
1648 lex->atom.resolved = FALSE;
1649 }
1650 }
1651
1652
1653 static literal *
new_literal(rdf_db * db)1654 new_literal(rdf_db *db)
1655 { literal *lit = rdf_malloc(db, sizeof(*lit));
1656 memset(lit, 0, sizeof(*lit));
1657 lit->references = 1;
1658
1659 return lit;
1660 }
1661
1662
1663 static void
free_literal(rdf_db * db,literal * lit)1664 free_literal(rdf_db *db, literal *lit)
1665 { if ( --lit->references == 0 )
1666 { unlock_atoms_literal(lit);
1667
1668 if ( lit->shared && !db->resetting )
1669 { literal_ex lex;
1670
1671 lit->shared = FALSE;
1672 broadcast(EV_OLD_LITERAL, lit, NULL);
1673 DEBUG(2,
1674 Sdprintf("Delete %p from literal table: ", lit);
1675 print_literal(lit);
1676 Sdprintf("\n"));
1677
1678 lex.literal = lit;
1679 prepare_literal_ex(&lex);
1680
1681 if ( !avldel(&db->literals, &lex) )
1682 { Sdprintf("Failed to delete %p (size=%ld): ", lit, db->literals.count);
1683 print_literal(lit);
1684 Sdprintf("\n");
1685 assert(0);
1686 }
1687 }
1688
1689 if ( lit->objtype == OBJ_TERM &&
1690 lit->value.term.record )
1691 { if ( lit->term_loaded )
1692 rdf_free(db, lit->value.term.record, lit->value.term.len);
1693 else
1694 PL_erase_external(lit->value.term.record);
1695 }
1696 rdf_free(db, lit, sizeof(*lit));
1697 }
1698 }
1699
1700
1701 static literal *
copy_literal(rdf_db * db,literal * lit)1702 copy_literal(rdf_db *db, literal *lit)
1703 { lit->references++;
1704 return lit;
1705 }
1706
1707
1708 static void
alloc_literal_triple(rdf_db * db,triple * t)1709 alloc_literal_triple(rdf_db *db, triple *t)
1710 { if ( !t->object_is_literal )
1711 { t->object.literal = new_literal(db);
1712 t->object_is_literal = TRUE;
1713 }
1714 }
1715
1716
1717 static void
lock_atoms_literal(literal * lit)1718 lock_atoms_literal(literal *lit)
1719 { if ( !lit->atoms_locked )
1720 { lit->atoms_locked = TRUE;
1721
1722 switch(lit->objtype)
1723 { case OBJ_STRING:
1724 PL_register_atom(lit->value.string);
1725 if ( lit->qualifier )
1726 PL_register_atom(lit->type_or_lang);
1727 break;
1728 }
1729 }
1730 }
1731
1732
1733 static void
unlock_atoms_literal(literal * lit)1734 unlock_atoms_literal(literal *lit)
1735 { if ( lit->atoms_locked )
1736 { lit->atoms_locked = FALSE;
1737
1738 switch(lit->objtype)
1739 { case OBJ_STRING:
1740 PL_unregister_atom(lit->value.string);
1741 if ( lit->qualifier )
1742 PL_unregister_atom(lit->type_or_lang);
1743 break;
1744 }
1745 }
1746 }
1747
1748
1749 /*******************************
1750 * LITERAL DB *
1751 *******************************/
1752
1753 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1754 compare_literals() sorts literals. Ordering is defined as:
1755
1756 * Numeric literals < string literals < term literals
1757 * Numeric literals (int and float) are sorted by value
1758 * String literals are sorted alhabetically
1759 - case independent, but uppercase before lowercase
1760 - locale (strcoll) sorting?
1761 - delete dyadrics
1762 - first on string, then on type, then on language
1763 * Terms are sorted on Prolog standard order of terms
1764 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1765
1766 static int
compare_literals(void * p1,void * p2,NODE type)1767 compare_literals(void *p1, void *p2, NODE type)
1768 { literal_ex *lex = p1;
1769 literal *l1 = lex->literal;
1770 literal *l2 = *(literal**)p2;
1771
1772 SECURE(assert(lex->magic == LITERAL_EX_MAGIC));
1773
1774 if ( l1->objtype == l2->objtype )
1775 { switch(l1->objtype)
1776 { case OBJ_INTEGER:
1777 { int64_t v1 = l1->value.integer;
1778 int64_t v2 = l2->value.integer;
1779 return v1 < v2 ? -1 : v1 > v2 ? 1 : 0;
1780 }
1781 case OBJ_DOUBLE:
1782 { double v1 = l1->value.real;
1783 double v2 = l2->value.real;
1784 return v1 < v2 ? -1 : v1 > v2 ? 1 : 0;
1785 }
1786 case OBJ_STRING:
1787 { int rc = cmp_atom_info(&lex->atom, l2->value.string);
1788
1789 if ( rc == 0 )
1790 { if ( l1->qualifier == l2->qualifier )
1791 return cmp_atoms(l1->type_or_lang, l2->type_or_lang);
1792 return l1->qualifier - l2->qualifier;
1793 }
1794 return rc;
1795 }
1796 case OBJ_TERM:
1797 { fid_t fid = PL_open_foreign_frame();
1798 term_t t1 = PL_new_term_ref();
1799 term_t t2 = PL_new_term_ref();
1800 int rc;
1801
1802 PL_recorded_external(l1->value.term.record, t1); /* can also be handled in literal_ex */
1803 PL_recorded_external(l2->value.term.record, t2);
1804 rc = PL_compare(t1, t2);
1805
1806 PL_discard_foreign_frame(fid);
1807 return rc;
1808 }
1809 default:
1810 assert(0);
1811 return 0;
1812 }
1813 } else if ( l1->objtype == OBJ_INTEGER && l2->objtype == OBJ_DOUBLE )
1814 { double v1 = (double)l1->value.integer;
1815 double v2 = l2->value.real;
1816 return v1 < v2 ? -1 : v1 > v2 ? 1 : -1;
1817 } else if ( l1->objtype == OBJ_DOUBLE && l2->objtype == OBJ_INTEGER )
1818 { double v1 = l1->value.real;
1819 double v2 = (double)l2->value.integer;
1820 return v1 < v2 ? -1 : v1 > v2 ? 1 : 1;
1821 } else
1822 { return l1->objtype - l2->objtype;
1823 }
1824 }
1825
1826
1827 static void*
avl_malloc(void * ptr,size_t size)1828 avl_malloc(void *ptr, size_t size)
1829 { return rdf_malloc(ptr, size);
1830 }
1831
1832
1833 static void
avl_free(void * ptr,void * data,size_t size)1834 avl_free(void *ptr, void *data, size_t size)
1835 { rdf_free(ptr, data, size);
1836 }
1837
1838
1839 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1840 Create the sorted literal tree. Note that we do not register a free
1841 handler for the tree as nodes are either already destroyed by
1842 free_literal() or by rdf_reset_db().
1843 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1844
1845 static void
init_literal_table(rdf_db * db)1846 init_literal_table(rdf_db *db)
1847 { avlinit(&db->literals,
1848 db, sizeof(literal*),
1849 compare_literals,
1850 NULL,
1851 avl_malloc,
1852 avl_free);
1853 }
1854
1855
1856 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1857 share_literal() takes a literal and replaces it with one from the
1858 literal database if there is a match. On a match, the argument literal
1859 is destroyed. Without a match it adds the literal to the database and
1860 returns it.
1861 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1862
1863 static literal *
share_literal(rdf_db * db,literal * from)1864 share_literal(rdf_db *db, literal *from)
1865 { literal **data;
1866 literal_ex lex;
1867
1868 lex.literal = from;
1869 prepare_literal_ex(&lex);
1870
1871 if ( (data = avlins(&db->literals, &lex)) )
1872 { literal *l2 = *data;
1873
1874 DEBUG(2,
1875 Sdprintf("Replace %p by %p:\n", from, l2);
1876 Sdprintf("\tfrom: "); print_literal(from);
1877 Sdprintf("\n\tto: "); print_literal(l2);
1878 Sdprintf("\n"));
1879
1880 l2->references++;
1881 free_literal(db, from);
1882
1883 return l2;
1884 } else
1885 { DEBUG(2,
1886 Sdprintf("Insert %p into literal table: ", from);
1887 print_literal(from);
1888 Sdprintf("\n"));
1889
1890 from->shared = TRUE;
1891 broadcast(EV_NEW_LITERAL, from, NULL);
1892 return from;
1893 }
1894 }
1895
1896
1897 #ifdef O_SECURE
1898 static literal **
add_literals(AVLtree node,literal ** p)1899 add_literals(AVLtree node, literal **p)
1900 { literal **litp;
1901
1902 if ( node->subtree[LEFT] )
1903 p = add_literals(node->subtree[LEFT], p);
1904 litp = (literal**)node->data;
1905 *p++ = *litp;
1906 if ( node->subtree[RIGHT] )
1907 p = add_literals(node->subtree[RIGHT], p);
1908
1909 return p;
1910 }
1911
1912
1913 static foreign_t
check_transitivity()1914 check_transitivity()
1915 { rdf_db *db = DB;
1916 literal **array = malloc(sizeof(literal*)*db->literals.count);
1917 literal **p = array;
1918 int i,j;
1919
1920 add_literals(db->literals.root, p);
1921 Sdprintf("Checking %ld literals ...\n", db->literals.count);
1922
1923 for(i=0; i<db->literals.count; i++)
1924 { int end;
1925
1926 Sdprintf("\r%6ld", i);
1927 end = i+100;
1928 if ( end > db->literals.count )
1929 end = db->literals.count;
1930
1931 for(j=i+1; j<end; j++)
1932 { literal_ex lex;
1933
1934 lex.literal = &array[i];
1935 prepare_literal_ex(&lex);
1936
1937 if ( compare_literals(&lex, &array[j], IS_NULL) >= 0 )
1938 { Sdprintf("\nERROR: i,j=%d,%d: ", i, j);
1939 print_literal(array[i]);
1940 Sdprintf(" >= ");
1941 print_literal(array[j]);
1942 Sdprintf("\n");
1943 }
1944 }
1945 }
1946
1947 free(array);
1948
1949 return TRUE;
1950 }
1951
1952
1953 static void
dump_lnode(AVLtree node)1954 dump_lnode(AVLtree node)
1955 { literal **litp;
1956
1957 if ( node->subtree[LEFT] )
1958 dump_lnode(node->subtree[LEFT]);
1959 litp = (literal**)node->data;
1960 print_literal(*litp);
1961 Sdprintf("\n");
1962 if ( node->subtree[RIGHT] )
1963 dump_lnode(node->subtree[RIGHT]);
1964 }
1965
1966 static foreign_t
dump_literals()1967 dump_literals()
1968 { rdf_db *db = DB;
1969
1970 dump_lnode(db->literals.root);
1971 return TRUE;
1972 }
1973 #endif
1974
1975
1976
1977 /*******************************
1978 * TRIPLES *
1979 *******************************/
1980
1981 static void
init_tables(rdf_db * db)1982 init_tables(rdf_db *db)
1983 { int i;
1984 int bytes = sizeof(triple*)*INITIAL_TABLE_SIZE;
1985 int cbytes = sizeof(int)*INITIAL_TABLE_SIZE;
1986
1987 db->table[0] = &db->by_none;
1988 db->tail[0] = &db->by_none_tail;
1989
1990 for(i=BY_S; i<=BY_OP; i++)
1991 { if ( i == BY_SO )
1992 continue;
1993
1994 db->table[i] = rdf_malloc(db, bytes);
1995 memset(db->table[i], 0, bytes);
1996 db->tail[i] = rdf_malloc(db, bytes);
1997 memset(db->tail[i], 0, bytes);
1998 db->counts[i] = rdf_malloc(db, cbytes);
1999 memset(db->counts[i], 0, cbytes);
2000 db->table_size[i] = INITIAL_TABLE_SIZE;
2001 }
2002
2003 init_pred_table(db);
2004 init_graph_table(db);
2005 init_literal_table(db);
2006 }
2007
2008
2009 static rdf_db *
new_db()2010 new_db()
2011 { rdf_db *db = rdf_malloc(NULL, sizeof(*db));
2012
2013 memset(db, 0, sizeof(*db));
2014 INIT_LOCK(db);
2015 init_tables(db);
2016
2017 return db;
2018 }
2019
2020
2021 static triple *
new_triple(rdf_db * db)2022 new_triple(rdf_db *db)
2023 { triple *t = rdf_malloc(db, sizeof(*t));
2024 memset(t, 0, sizeof(*t));
2025 t->allocated = TRUE;
2026
2027 return t;
2028 }
2029
2030
2031 static void
free_triple(rdf_db * db,triple * t)2032 free_triple(rdf_db *db, triple *t)
2033 { unlock_atoms(t);
2034
2035 if ( t->object_is_literal && t->object.literal )
2036 free_literal(db, t->object.literal);
2037
2038 if ( t->allocated )
2039 rdf_free(db, t, sizeof(*t));
2040 }
2041
2042
2043 #define HASHED 0x80000000
2044
2045 static unsigned int
literal_hash(literal * lit)2046 literal_hash(literal *lit)
2047 { if ( lit->hash & HASHED )
2048 { return lit->hash;
2049 } else
2050 { unsigned int hash;
2051
2052 switch(lit->objtype)
2053 { case OBJ_STRING:
2054 hash = atom_hash_case(lit->value.string);
2055 break;
2056 case OBJ_INTEGER:
2057 case OBJ_DOUBLE:
2058 hash = rdf_murmer_hash(&lit->value.integer,
2059 sizeof(lit->value.integer),
2060 MURMUR_SEED);
2061 break;
2062 case OBJ_TERM:
2063 hash = rdf_murmer_hash(lit->value.term.record,
2064 (int)lit->value.term.len,
2065 MURMUR_SEED);
2066 break;
2067 default:
2068 assert(0);
2069 return 0;
2070 }
2071
2072 lit->hash = (hash | HASHED);
2073 return lit->hash;
2074 }
2075 }
2076
2077
2078 static unsigned long
object_hash(triple * t)2079 object_hash(triple *t)
2080 { if ( t->object_is_literal )
2081 { return literal_hash(t->object.literal);
2082 } else
2083 { return atom_hash(t->object.resource);
2084 }
2085 }
2086
2087
2088 static int
triple_hash(rdf_db * db,triple * t,int which)2089 triple_hash(rdf_db *db, triple *t, int which)
2090 { unsigned long v;
2091
2092 switch(which)
2093 { case BY_NONE:
2094 return 0;
2095 case BY_S:
2096 v = atom_hash(t->subject);
2097 break;
2098 case BY_P:
2099 v = predicate_hash(t->predicate.r);
2100 break;
2101 case BY_O:
2102 v = object_hash(t);
2103 break;
2104 case BY_SP:
2105 v = atom_hash(t->subject) ^ predicate_hash(t->predicate.r);
2106 break;
2107 case BY_OP:
2108 v = predicate_hash(t->predicate.r) ^ object_hash(t);
2109 break;
2110 default:
2111 v = 0; /* make compiler silent */
2112 assert(0);
2113 }
2114
2115 return (int)(v % (long)db->table_size[which]);
2116 }
2117
2118
2119 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2120 by_inverse[] returns the index key to use for inverse search as needed
2121 to realise symmetric and inverse predicates.
2122 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2123
2124 static int by_inverse[8] =
2125 { BY_NONE, /* BY_NONE = 0 */
2126 BY_O, /* BY_S = 1 */
2127 BY_P, /* BY_P = 2 */
2128 BY_OP, /* BY_SP = 3 */
2129 BY_S, /* BY_O = 4 */
2130 BY_SO, /* BY_SO = 5 */
2131 BY_SP, /* BY_OP = 6 */
2132 BY_SPO, /* BY_SPO = 7 */
2133 };
2134
2135
2136 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2137 triple *first(atom_t subject)
2138 Find the first triple on subject. The first is marked to generate a
2139 unique subjects quickly;
2140 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2141
2142 static triple *
first(rdf_db * db,atom_t subject)2143 first(rdf_db *db, atom_t subject)
2144 { triple *t, tmp;
2145 int hash;
2146
2147 tmp.subject = subject;
2148 hash = triple_hash(db, &tmp, BY_S);
2149
2150 for(t=db->table[BY_S][hash]; t; t = t->next[BY_S])
2151 { if ( t->subject == subject && !t->erased )
2152 return t;
2153 }
2154
2155 return NULL;
2156 }
2157
2158
2159 static void
link_triple_hash(rdf_db * db,triple * t)2160 link_triple_hash(rdf_db *db, triple *t)
2161 { int i;
2162
2163 for(i=1; i<=BY_OP; i++)
2164 { if ( db->table[i] )
2165 { int hash = triple_hash(db, t, i);
2166
2167 if ( db->tail[i][hash] )
2168 { db->tail[i][hash]->next[i] = t;
2169 } else
2170 { db->table[i][hash] = t;
2171 }
2172 db->tail[i][hash] = t;
2173 db->counts[i][hash]++;
2174 }
2175 }
2176 }
2177
2178
2179 typedef enum
2180 { DUP_NONE,
2181 DUP_DUPLICATE,
2182 DUP_DISCARDED
2183 } dub_state;
2184
2185
2186 static dub_state
discard_duplicate(rdf_db * db,triple * t)2187 discard_duplicate(rdf_db *db, triple *t)
2188 { triple *d;
2189 const int indexed = BY_SP;
2190 dub_state rc = DUP_NONE;
2191
2192 assert(t->is_duplicate == FALSE);
2193 assert(t->duplicates == 0);
2194
2195 if ( WANT_GC(db) ) /* (*) See above */
2196 update_hash(db);
2197 d = db->table[indexed][triple_hash(db, t, indexed)];
2198 for( ; d && d != t; d = d->next[indexed] )
2199 { if ( match_triples(d, t, MATCH_DUPLICATE) )
2200 { if ( d->graph == t->graph &&
2201 (d->line == NO_LINE || d->line == t->line) )
2202 { free_triple(db, t);
2203
2204 return DUP_DISCARDED;
2205 }
2206
2207 rc = DUP_DUPLICATE;
2208 }
2209 }
2210
2211 return rc;
2212 }
2213
2214
2215 /* MT: must be locked by caller */
2216
2217 static int
link_triple_silent(rdf_db * db,triple * t)2218 link_triple_silent(rdf_db *db, triple *t)
2219 { triple *one;
2220 dub_state dup;
2221
2222 if ( t->resolve_pred )
2223 { t->predicate.r = lookup_predicate(db, t->predicate.u);
2224 t->resolve_pred = FALSE;
2225 }
2226
2227 if ( (dup=discard_duplicate(db, t)) == DUP_DISCARDED )
2228 return FALSE;
2229
2230 if ( db->by_none_tail )
2231 db->by_none_tail->next[BY_NONE] = t;
2232 else
2233 db->by_none = t;
2234 db->by_none_tail = t;
2235
2236 link_triple_hash(db, t);
2237 if ( t->object_is_literal )
2238 t->object.literal = share_literal(db, t->object.literal);
2239
2240 if ( dup == DUP_DUPLICATE && update_duplicates_add(db, t) )
2241 goto ok; /* is a duplicate */
2242
2243 /* keep track of subjects */
2244 one = first(db, t->subject);
2245 if ( !one->first )
2246 { one->first = TRUE;
2247 db->subjects++;
2248 }
2249
2250 /* keep track of subPropertyOf */
2251 if ( t->predicate.r->name == ATOM_subPropertyOf &&
2252 t->object_is_literal == FALSE )
2253 { predicate *me = lookup_predicate(db, t->subject);
2254 predicate *super = lookup_predicate(db, t->object.resource);
2255
2256 addSubPropertyOf(db, me, super);
2257 }
2258
2259 ok:
2260 db->created++;
2261 t->predicate.r->triple_count++;
2262 register_graph(db, t);
2263
2264 return TRUE;
2265 }
2266
2267
2268 static inline void
link_triple(rdf_db * db,triple * t)2269 link_triple(rdf_db *db, triple *t)
2270 { if ( link_triple_silent(db, t) )
2271 broadcast(EV_ASSERT, t, NULL);
2272 }
2273
2274
2275 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2276 rehash_triples()
2277
2278 Relink the triples in the hash-chains after the hash-keys for properties
2279 have changed or the tables have been resized. The caller must ensure
2280 there are no active queries and the tables are of the proper size.
2281
2282 At the same time, this predicate actually removes erased triples.
2283 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2284
2285 static long
tbl_size(long triples)2286 tbl_size(long triples)
2287 { long s0 = 1024;
2288
2289 triples /= MIN_HASH_FACTOR;
2290
2291 while(s0 < triples)
2292 s0 *= 2;
2293
2294 return s0;
2295 }
2296
2297
2298 static void
rehash_triples(rdf_db * db)2299 rehash_triples(rdf_db *db)
2300 { int i;
2301 triple *t, *t2;
2302 long count = db->created - db->freed;
2303 long tsize = tbl_size(count);
2304
2305 DEBUG(1, Sdprintf("(%ld triples; %ld entries) ...", count, tsize));
2306 broadcast(EV_REHASH, (void*)ATOM_begin, NULL);
2307
2308 for(i=1; i<INDEX_TABLES; i++)
2309 { if ( db->table[i] )
2310 { long bytes = sizeof(triple*) * tsize;
2311 long cbytes = sizeof(int) * tsize;
2312 long obytes = sizeof(triple*) * db->table_size[i];
2313 long ocbytes = sizeof(int) * db->table_size[i];
2314
2315 db->table[i] = rdf_realloc(db, db->table[i], obytes, bytes);
2316 db->tail[i] = rdf_realloc(db, db->tail[i], obytes, bytes);
2317 db->counts[i] = rdf_realloc(db, db->counts[i], ocbytes, cbytes);
2318 db->table_size[i] = tsize;
2319
2320 memset(db->table[i], 0, bytes);
2321 memset(db->tail[i], 0, bytes);
2322 memset(db->counts[i], 0, cbytes);
2323 }
2324 }
2325
2326 /* delete leading erased triples */
2327 for(t=db->by_none; t && t->erased; t=t2)
2328 { t2 = t->next[BY_NONE];
2329
2330 free_triple(db, t);
2331 db->freed++;
2332
2333 db->by_none = t2;
2334 }
2335
2336 for(t=db->by_none; t; t = t2)
2337 { triple *t3;
2338
2339 t2 = t->next[BY_NONE];
2340
2341 for(i=1; i<INDEX_TABLES; i++)
2342 t->next[i] = NULL;
2343
2344 assert(t->erased == FALSE);
2345 link_triple_hash(db, t);
2346
2347 for( ; t2 && t2->erased; t2=t3 )
2348 { t3 = t2->next[BY_NONE];
2349
2350 free_triple(db, t2);
2351 db->freed++;
2352 }
2353
2354 t->next[BY_NONE] = t2;
2355 if ( !t2 )
2356 db->by_none_tail = t;
2357 }
2358
2359 if ( db->by_none == NULL )
2360 db->by_none_tail = NULL;
2361
2362 broadcast(EV_REHASH, (void*)ATOM_end, NULL);
2363 }
2364
2365
2366 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2367 update_hash(). Note this may be called by readers and writers, but must
2368 be done only onces and certainly not concurrently by multiple readers.
2369 Hence we need a seperate lock.
2370 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2371
2372 static int
WANT_GC(rdf_db * db)2373 WANT_GC(rdf_db *db)
2374 { if ( db->gc_blocked )
2375 { return FALSE;
2376 } else
2377 { long dirty = db->erased - db->freed;
2378 long count = db->created - db->erased;
2379
2380 if ( dirty > 1000 && dirty > count )
2381 return TRUE;
2382 if ( count > db->table_size[1]*MAX_HASH_FACTOR )
2383 return TRUE;
2384
2385 return FALSE;
2386 }
2387 }
2388
2389
2390 static int
update_hash(rdf_db * db)2391 update_hash(rdf_db *db)
2392 { int want_gc = WANT_GC(db);
2393
2394 if ( want_gc )
2395 DEBUG(1, Sdprintf("rdf_db: want GC\n"));
2396
2397 if ( db->need_update || want_gc )
2398 { LOCK_MISC(db);
2399
2400 if ( db->need_update ) /* check again */
2401 { if ( organise_predicates(db) )
2402 { long t0 = (long)PL_query(PL_QUERY_USER_CPU);
2403
2404 DEBUG(1, Sdprintf("Re-hash ..."));
2405 invalidate_distinct_counts(db);
2406 rehash_triples(db);
2407 db->generation += (db->created-db->erased);
2408 db->rehash_count++;
2409 db->rehash_time += ((double)(PL_query(PL_QUERY_USER_CPU)-t0))/1000.0;
2410 DEBUG(1, Sdprintf("ok\n"));
2411 }
2412 db->need_update = 0;
2413 } else if ( WANT_GC(db) )
2414 { long t0 = (long)PL_query(PL_QUERY_USER_CPU);
2415
2416 DEBUG(1, Sdprintf("rdf_db: GC ..."));
2417 rehash_triples(db);
2418 db->gc_count++;
2419 db->gc_time += ((double)(PL_query(PL_QUERY_USER_CPU)-t0))/1000.0;
2420 DEBUG(1, Sdprintf("ok\n"));
2421 }
2422
2423 UNLOCK_MISC(db);
2424 }
2425
2426 return TRUE;
2427 }
2428
2429
2430 /* MT: Must be locked */
2431
2432 static void
erase_triple_silent(rdf_db * db,triple * t)2433 erase_triple_silent(rdf_db *db, triple *t)
2434 { if ( !t->erased )
2435 { t->erased = TRUE;
2436
2437 update_duplicates_del(db, t);
2438
2439 if ( t->predicate.r->name == ATOM_subPropertyOf &&
2440 t->object_is_literal == FALSE )
2441 { predicate *me = lookup_predicate(db, t->subject);
2442 predicate *super = lookup_predicate(db, t->object.resource);
2443
2444 delSubPropertyOf(db, me, super);
2445 }
2446
2447 if ( t->first )
2448 { triple *one = first(db, t->subject);
2449
2450 if ( one )
2451 one->first = TRUE;
2452 else
2453 db->subjects--;
2454 }
2455 db->erased++;
2456 t->predicate.r->triple_count--;
2457 unregister_graph(db, t);
2458
2459 if ( t->object_is_literal )
2460 { literal *lit = t->object.literal;
2461
2462 t->object.literal = NULL;
2463 free_literal(db, lit); /* TBD: thread-safe? */
2464 }
2465 }
2466 }
2467
2468
2469 static inline void
erase_triple(rdf_db * db,triple * t)2470 erase_triple(rdf_db *db, triple *t)
2471 { broadcast(EV_RETRACT, t, NULL);
2472 erase_triple_silent(db, t);
2473 }
2474
2475
2476 static int
match_object(triple * t,triple * p,unsigned flags)2477 match_object(triple *t, triple *p, unsigned flags)
2478 { if ( p->object_is_literal )
2479 { if ( t->object_is_literal )
2480 { literal *plit = p->object.literal;
2481 literal *tlit = t->object.literal;
2482
2483 if ( !plit->objtype && !plit->qualifier )
2484 return TRUE;
2485
2486 if ( plit->objtype && plit->objtype != tlit->objtype )
2487 return FALSE;
2488
2489 switch( plit->objtype )
2490 { case 0:
2491 if ( plit->qualifier &&
2492 tlit->qualifier != plit->qualifier )
2493 return FALSE;
2494 return TRUE;
2495 case OBJ_STRING:
2496 if ( (flags & MATCH_QUAL) ||
2497 p->match == STR_MATCH_PLAIN )
2498 { if ( tlit->qualifier != plit->qualifier )
2499 return FALSE;
2500 } else
2501 { if ( plit->qualifier && tlit->qualifier &&
2502 tlit->qualifier != plit->qualifier )
2503 return FALSE;
2504 }
2505 if ( plit->type_or_lang &&
2506 tlit->type_or_lang != plit->type_or_lang )
2507 return FALSE;
2508 if ( plit->value.string )
2509 { if ( tlit->value.string != plit->value.string )
2510 { if ( p->match >= STR_MATCH_EXACT )
2511 { return match_atoms(p->match,
2512 plit->value.string, tlit->value.string);
2513 } else
2514 { return FALSE;
2515 }
2516 }
2517 }
2518 return TRUE;
2519 case OBJ_INTEGER:
2520 return tlit->value.integer == plit->value.integer;
2521 case OBJ_DOUBLE:
2522 return tlit->value.real == plit->value.real;
2523 case OBJ_TERM:
2524 if ( plit->value.term.record &&
2525 plit->value.term.len != tlit->value.term.len )
2526 return FALSE;
2527 return memcmp(tlit->value.term.record, plit->value.term.record,
2528 plit->value.term.len) == 0;
2529 default:
2530 assert(0);
2531 }
2532 }
2533 return FALSE;
2534 } else
2535 { if ( p->object.resource )
2536 { if ( t->object_is_literal ||
2537 (p->object.resource != t->object.resource) )
2538 return FALSE;
2539 }
2540 }
2541
2542 return TRUE;
2543 }
2544
2545
2546
2547 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2548 Match triple t to pattern p. Erased triples are always skipped.
2549 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2550
2551 static int
match_triples(triple * t,triple * p,unsigned flags)2552 match_triples(triple *t, triple *p, unsigned flags)
2553 { /* DEBUG(3, Sdprintf("match_triple(");
2554 print_triple(t, 0);
2555 Sdprintf(")\n"));
2556 */
2557
2558 if ( t->erased )
2559 return FALSE;
2560 if ( p->subject && t->subject != p->subject )
2561 return FALSE;
2562 if ( !match_object(t, p, flags) )
2563 return FALSE;
2564 if ( flags & MATCH_SRC )
2565 { if ( p->graph && t->graph != p->graph )
2566 return FALSE;
2567 if ( p->line && t->line != p->line )
2568 return FALSE;
2569 }
2570 /* last; may be expensive */
2571 if ( p->predicate.r && t->predicate.r != p->predicate.r )
2572 { if ( (flags & MATCH_SUBPROPERTY) )
2573 return isSubPropertyOf(t->predicate.r, p->predicate.r);
2574 else
2575 return FALSE;
2576 }
2577 return TRUE;
2578 }
2579
2580
2581 /*******************************
2582 * SAVE/LOAD *
2583 *******************************/
2584
2585 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2586 The RDF triple format. This format is intended for quick save and load
2587 and not for readability or exchange. Parts are based on the SWI-Prolog
2588 Quick Load Format (implemented in pl-wic.c).
2589
2590 <file> ::= <magic>
2591 <version>
2592 ['S' <graph-name>]
2593 ['F' <graph-source>]
2594 ['t' <modified>]
2595 ['M' <md5>]
2596 {<triple>}
2597 'E'
2598
2599 <magic> ::= "RDF-dump\n"
2600 <version> ::= <integer>
2601
2602 <md5> ::= <byte>* (16 bytes digest)
2603
2604 <triple> ::= 'T'
2605 <subject>
2606 <predicate>
2607 <object>
2608 <graph>
2609
2610 <subject> ::= <resource>
2611 <predicate> ::= <resource>
2612
2613 <object> ::= "R" <resource>
2614 | "L" <atom>
2615
2616 <resource> ::= <atom>
2617
2618 <atom> ::= "X" <integer>
2619 "A" <string>
2620 "W" <utf-8 string>
2621
2622 <string> ::= <integer><bytes>
2623
2624 <graph-name> ::= <atom>
2625 <graph-source> ::= <atom>
2626
2627 <graph> ::= <graph-file>
2628 <line>
2629 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2630
2631 #define SAVE_MAGIC "RDF-dump\n"
2632 #define SAVE_VERSION 2
2633
2634 typedef struct saved
2635 { atom_t name;
2636 long as;
2637 struct saved *next;
2638 } saved;
2639
2640
2641 typedef struct save_context
2642 { saved ** saved_table;
2643 long saved_size;
2644 long saved_id;
2645 } save_context;
2646
2647
2648 long
next_table_size(long s0)2649 next_table_size(long s0)
2650 { long size = 2;
2651
2652 while(size < s0)
2653 size *= 2;
2654
2655 return size;
2656 }
2657
2658 static void
init_saved(rdf_db * db,save_context * ctx)2659 init_saved(rdf_db *db, save_context *ctx)
2660 { long size = next_table_size((db->created - db->erased)/8);
2661 long bytes = size * sizeof(*ctx->saved_table);
2662
2663 ctx->saved_table = rdf_malloc(db, bytes);
2664 memset(ctx->saved_table, 0, bytes);
2665 ctx->saved_size = size;
2666 ctx->saved_id = 0;
2667 }
2668
2669 static void
destroy_saved(rdf_db * db,save_context * ctx)2670 destroy_saved(rdf_db *db, save_context *ctx)
2671 { if ( ctx->saved_table )
2672 { saved **s = ctx->saved_table;
2673 int i;
2674
2675 for(i=0; i<ctx->saved_size; i++, s++)
2676 { saved *c, *n;
2677
2678 for(c=*s; c; c = n)
2679 { n = c->next;
2680 free(c);
2681 }
2682 }
2683
2684 rdf_free(db, ctx->saved_table, ctx->saved_size*sizeof(*ctx->saved_table));
2685 }
2686 }
2687
2688 #define INT64BITSIZE (sizeof(int64_t)*8)
2689 #define PLMINLONG ((int64_t)((uint64_t)1<<(INT64BITSIZE-1)))
2690
2691 static void
save_int(IOSTREAM * fd,int64_t n)2692 save_int(IOSTREAM *fd, int64_t n)
2693 { int m;
2694 int64_t absn = (n >= 0 ? n : -n);
2695
2696 if ( n != PLMINLONG )
2697 { if ( absn < ((intptr_t)1 << 5) )
2698 { Sputc((int)(n & 0x3f), fd);
2699 return;
2700 } else if ( absn < ((intptr_t)1 << 13) )
2701 { Sputc((int)(((n >> 8) & 0x3f) | (1 << 6)), fd);
2702 Sputc((int)(n & 0xff), fd);
2703 return;
2704 } else if ( absn < ((intptr_t)1 << 21) )
2705 { Sputc((int)(((n >> 16) & 0x3f) | (2 << 6)), fd);
2706 Sputc((int)((n >> 8) & 0xff), fd);
2707 Sputc((int)(n & 0xff), fd);
2708 return;
2709 }
2710 }
2711
2712 for(m = sizeof(n); ; m--)
2713 { int b = (int)((absn >> (((m-1)*8)-1)) & 0x1ff);
2714
2715 if ( b == 0 )
2716 continue;
2717 break;
2718 }
2719
2720 Sputc(m | (3 << 6), fd);
2721
2722 for( ; m > 0; m--)
2723 { int b = (int)((n >> ((m-1)*8)) & 0xff);
2724
2725 Sputc(b, fd);
2726 }
2727 }
2728
2729
2730 #define BYTES_PER_DOUBLE sizeof(double)
2731 #ifdef WORDS_BIGENDIAN
2732 static const int double_byte_order[] = { 7,6,5,4,3,2,1,0 };
2733 #else
2734 static const int double_byte_order[] = { 0,1,2,3,4,5,6,7 };
2735 #endif
2736
2737 static int
save_double(IOSTREAM * fd,double f)2738 save_double(IOSTREAM *fd, double f)
2739 { unsigned char *cl = (unsigned char *)&f;
2740 unsigned int i;
2741
2742 for(i=0; i<BYTES_PER_DOUBLE; i++)
2743 Sputc(cl[double_byte_order[i]], fd);
2744
2745 return TRUE;
2746 }
2747
2748
2749 static int
save_atom(rdf_db * db,IOSTREAM * out,atom_t a,save_context * ctx)2750 save_atom(rdf_db *db, IOSTREAM *out, atom_t a, save_context *ctx)
2751 { int hash = atom_hash(a) % ctx->saved_size;
2752 saved *s;
2753 size_t len;
2754 const char *chars;
2755 unsigned int i;
2756 const wchar_t *wchars;
2757
2758 for(s=ctx->saved_table[hash]; s; s= s->next)
2759 { if ( s->name == a )
2760 { Sputc('X', out);
2761 save_int(out, s->as);
2762
2763 return TRUE;
2764 }
2765 }
2766
2767 s = rdf_malloc(db, sizeof(*s));
2768 s->name = a;
2769 s->as = ctx->saved_id++;
2770 s->next = ctx->saved_table[hash];
2771 ctx->saved_table[hash] = s;
2772
2773 if ( (chars = PL_atom_nchars(a, &len)) )
2774 { Sputc('A', out);
2775 save_int(out, len);
2776 for(i=0; i<len; i++, chars++)
2777 Sputc(*chars&0xff, out);
2778 } else if ( (wchars = PL_atom_wchars(a, &len)) )
2779 { IOENC enc = out->encoding;
2780
2781 Sputc('W', out);
2782 save_int(out, len);
2783 out->encoding = ENC_UTF8;
2784 for(i=0; i<len; i++, wchars++)
2785 { wint_t c = *wchars;
2786
2787 SECURE(assert(c>=0 && c <= 0x10ffff));
2788 Sputcode(c, out);
2789 }
2790 out->encoding = enc;
2791 } else
2792 return FALSE;
2793
2794 return TRUE;
2795 }
2796
2797
2798 static void
write_triple(rdf_db * db,IOSTREAM * out,triple * t,save_context * ctx)2799 write_triple(rdf_db *db, IOSTREAM *out, triple *t, save_context *ctx)
2800 { Sputc('T', out);
2801
2802 save_atom(db, out, t->subject, ctx);
2803 save_atom(db, out, t->predicate.r->name, ctx);
2804
2805 if ( t->object_is_literal )
2806 { literal *lit = t->object.literal;
2807
2808 if ( lit->qualifier )
2809 { assert(lit->type_or_lang);
2810 Sputc(lit->qualifier == Q_LANG ? 'l' : 't', out);
2811 save_atom(db, out, lit->type_or_lang, ctx);
2812 }
2813
2814 switch(lit->objtype)
2815 { case OBJ_STRING:
2816 Sputc('L', out);
2817 save_atom(db, out, lit->value.string, ctx);
2818 break;
2819 case OBJ_INTEGER:
2820 Sputc('I', out);
2821 save_int(out, lit->value.integer);
2822 break;
2823 case OBJ_DOUBLE:
2824 { Sputc('F', out);
2825 save_double(out, lit->value.real);
2826 break;
2827 }
2828 case OBJ_TERM:
2829 { const char *s = lit->value.term.record;
2830 size_t len = lit->value.term.len;
2831
2832 Sputc('T', out);
2833 save_int(out, len);
2834 while(len-- > 0)
2835 Sputc(*s++, out);
2836
2837 break;
2838 }
2839 default:
2840 assert(0);
2841 }
2842 } else
2843 { Sputc('R', out);
2844 save_atom(db, out, t->object.resource, ctx);
2845 }
2846
2847 save_atom(db, out, t->graph, ctx);
2848 save_int(out, t->line);
2849 }
2850
2851
2852 static void
write_source(rdf_db * db,IOSTREAM * out,atom_t src,save_context * ctx)2853 write_source(rdf_db *db, IOSTREAM *out, atom_t src, save_context *ctx)
2854 { graph *s = lookup_graph(db, src, FALSE);
2855
2856 if ( s && s->source )
2857 { Sputc('F', out);
2858 save_atom(db, out, s->source, ctx);
2859 Sputc('t', out);
2860 save_double(out, s->modified);
2861 }
2862 }
2863
2864
2865 static void
write_md5(rdf_db * db,IOSTREAM * out,atom_t src)2866 write_md5(rdf_db *db, IOSTREAM *out, atom_t src)
2867 { graph *s = lookup_graph(db, src, FALSE);
2868
2869 if ( s )
2870 { md5_byte_t *p = s->digest;
2871 int i;
2872
2873 Sputc('M', out);
2874 for(i=0; i<16; i++)
2875 Sputc(*p++, out);
2876 }
2877 }
2878
2879
2880 static int
save_db(rdf_db * db,IOSTREAM * out,atom_t src)2881 save_db(rdf_db *db, IOSTREAM *out, atom_t src)
2882 { triple *t;
2883 save_context ctx;
2884
2885 if ( !RDLOCK(db) )
2886 return FALSE;
2887 init_saved(db, &ctx);
2888
2889 Sfprintf(out, "%s", SAVE_MAGIC);
2890 save_int(out, SAVE_VERSION);
2891 if ( src )
2892 { Sputc('S', out); /* start of graph header */
2893 save_atom(db, out, src, &ctx);
2894 write_source(db, out, src, &ctx);
2895 write_md5(db, out, src);
2896 }
2897 if ( Sferror(out) )
2898 { RDUNLOCK(db);
2899 return FALSE;
2900 }
2901
2902 for(t = db->by_none; t; t = t->next[BY_NONE])
2903 { if ( !t->erased &&
2904 (!src || t->graph == src) )
2905 { write_triple(db, out, t, &ctx);
2906 if ( Sferror(out) )
2907 return FALSE;
2908 }
2909 }
2910 Sputc('E', out);
2911 if ( Sferror(out) )
2912 { RDUNLOCK(db);
2913 return FALSE;
2914 }
2915
2916 destroy_saved(db, &ctx);
2917 RDUNLOCK(db);
2918
2919 return TRUE;
2920 }
2921
2922
2923 static foreign_t
rdf_save_db(term_t stream,term_t graph)2924 rdf_save_db(term_t stream, term_t graph)
2925 { IOSTREAM *out;
2926 atom_t src;
2927
2928 if ( !PL_get_stream_handle(stream, &out) )
2929 return type_error(stream, "stream");
2930 if ( !get_atom_or_var_ex(graph, &src) )
2931 return FALSE;
2932
2933 return save_db(DB, out, src);
2934 }
2935
2936
2937 static int64_t
load_int(IOSTREAM * fd)2938 load_int(IOSTREAM *fd)
2939 { int64_t first = Sgetc(fd);
2940 int bytes, shift, b;
2941
2942 if ( !(first & 0xc0) ) /* 99% of them: speed up a bit */
2943 { first <<= (INT64BITSIZE-6);
2944 first >>= (INT64BITSIZE-6);
2945
2946 return first;
2947 }
2948
2949 bytes = (int) ((first >> 6) & 0x3);
2950 first &= 0x3f;
2951
2952 if ( bytes <= 2 )
2953 { for( b = 0; b < bytes; b++ )
2954 { first <<= 8;
2955 first |= Sgetc(fd) & 0xff;
2956 }
2957
2958 shift = (sizeof(first)-1-bytes)*8 + 2;
2959 } else
2960 { int m;
2961
2962 bytes = (int)first;
2963 first = 0L;
2964
2965 for(m=0; m<bytes; m++)
2966 { first <<= 8;
2967 first |= Sgetc(fd) & 0xff;
2968 }
2969 shift = (sizeof(first)-bytes)*8;
2970 }
2971
2972 first <<= shift;
2973 first >>= shift;
2974
2975 return first;
2976 }
2977
2978
2979 static int
load_double(IOSTREAM * fd,double * fp)2980 load_double(IOSTREAM *fd, double *fp)
2981 { double f;
2982 unsigned char *cl = (unsigned char *)&f;
2983 unsigned int i;
2984
2985 for(i=0; i<BYTES_PER_DOUBLE; i++)
2986 { int c = Sgetc(fd);
2987
2988 if ( c == -1 )
2989 { *fp = 0.0;
2990 return FALSE;
2991 }
2992 cl[double_byte_order[i]] = c;
2993 }
2994
2995 *fp = f;
2996 return TRUE;
2997 }
2998
2999
3000 typedef struct ld_context
3001 { long loaded_id; /* keep track of atoms */
3002 atom_t *loaded_atoms;
3003 long atoms_size;
3004 atom_t graph; /* for single-graph files */
3005 atom_t graph_source;
3006 double modified;
3007 int has_digest;
3008 md5_byte_t digest[16];
3009 atom_hash *graph_table; /* multi-graph file */
3010 } ld_context;
3011
3012
3013 static void
add_atom(rdf_db * db,atom_t a,ld_context * ctx)3014 add_atom(rdf_db *db, atom_t a, ld_context *ctx)
3015 { if ( ctx->loaded_id >= ctx->atoms_size )
3016 { if ( ctx->atoms_size == 0 )
3017 { ctx->atoms_size = 1024;
3018 ctx->loaded_atoms = rdf_malloc(db, sizeof(atom_t)*ctx->atoms_size);
3019 } else
3020 { long obytes = sizeof(atom_t)*ctx->atoms_size;
3021 long bytes;
3022
3023 ctx->atoms_size *= 2;
3024 bytes = sizeof(atom_t)*ctx->atoms_size;
3025 ctx->loaded_atoms = rdf_realloc(db, ctx->loaded_atoms, obytes, bytes);
3026 }
3027 }
3028
3029 ctx->loaded_atoms[ctx->loaded_id++] = a;
3030 }
3031
3032
3033 static atom_t
load_atom(rdf_db * db,IOSTREAM * in,ld_context * ctx)3034 load_atom(rdf_db *db, IOSTREAM *in, ld_context *ctx)
3035 { switch(Sgetc(in))
3036 { case 'X':
3037 { intptr_t idx = (intptr_t)load_int(in);
3038 return ctx->loaded_atoms[idx];
3039 }
3040 case 'A':
3041 { size_t len = (size_t)load_int(in);
3042 atom_t a;
3043
3044 if ( len < 1024 )
3045 { char buf[1024];
3046 Sfread(buf, 1, len, in);
3047 a = PL_new_atom_nchars(len, buf);
3048 } else
3049 { char *buf = rdf_malloc(db, len);
3050 Sfread(buf, 1, len, in);
3051 a = PL_new_atom_nchars(len, buf);
3052 rdf_free(db, buf, len);
3053 }
3054
3055 add_atom(db, a, ctx);
3056 return a;
3057 }
3058 case 'W':
3059 { int len = (int)load_int(in);
3060 atom_t a;
3061 wchar_t buf[1024];
3062 wchar_t *w;
3063 IOENC enc = in->encoding;
3064 int i;
3065
3066 if ( len < 1024 )
3067 w = buf;
3068 else
3069 w = rdf_malloc(db, len*sizeof(wchar_t));
3070
3071 in->encoding = ENC_UTF8;
3072 for(i=0; i<len; i++)
3073 { w[i] = Sgetcode(in);
3074 SECURE(assert(w[i]>=0 && w[i] <= 0x10ffff));
3075 }
3076 in->encoding = enc;
3077
3078 a = PL_new_atom_wchars(len, w);
3079 if ( w != buf )
3080 rdf_free(db, w, len*sizeof(wchar_t));
3081
3082 add_atom(db, a, ctx);
3083 return a;
3084 }
3085 default:
3086 { assert(0);
3087 return 0;
3088 }
3089 }
3090 }
3091
3092
3093 static triple *
load_triple(rdf_db * db,IOSTREAM * in,ld_context * ctx)3094 load_triple(rdf_db *db, IOSTREAM *in, ld_context *ctx)
3095 { triple *t = new_triple(db);
3096 int c;
3097
3098 t->subject = load_atom(db, in, ctx);
3099 t->predicate.u = load_atom(db, in, ctx);
3100 t->resolve_pred = TRUE;
3101 if ( (c=Sgetc(in)) == 'R' )
3102 { t->object.resource = load_atom(db, in, ctx);
3103 } else
3104 { literal *lit = new_literal(db);
3105
3106 t->object_is_literal = TRUE;
3107 t->object.literal = lit;
3108
3109 value:
3110 switch(c)
3111 { case 'L':
3112 lit->objtype = OBJ_STRING;
3113 lit->value.string = load_atom(db, in, ctx);
3114 break;
3115 case 'I':
3116 lit->objtype = OBJ_INTEGER;
3117 lit->value.integer = load_int(in);
3118 break;
3119 case 'F':
3120 lit->objtype = OBJ_DOUBLE;
3121 load_double(in, &lit->value.real);
3122 break;
3123 case 'T':
3124 { unsigned int i;
3125 char *s;
3126
3127 lit->objtype = OBJ_TERM;
3128 lit->value.term.len = (size_t)load_int(in);
3129 lit->value.term.record = rdf_malloc(db, lit->value.term.len);
3130 lit->term_loaded = TRUE; /* see free_literal() */
3131 s = (char *)lit->value.term.record;
3132
3133 for(i=0; i<lit->value.term.len; i++)
3134 s[i] = Sgetc(in);
3135
3136 break;
3137 }
3138 case 'l':
3139 lit->qualifier = Q_LANG;
3140 lit->type_or_lang = load_atom(db, in, ctx);
3141 c = Sgetc(in);
3142 goto value;
3143 case 't':
3144 lit->qualifier = Q_TYPE;
3145 lit->type_or_lang = load_atom(db, in, ctx);
3146 c = Sgetc(in);
3147 goto value;
3148 default:
3149 assert(0);
3150 return NULL;
3151 }
3152 }
3153 t->graph = load_atom(db, in, ctx);
3154 t->line = (unsigned long)load_int(in);
3155 if ( !ctx->graph )
3156 { if ( !ctx->graph_table )
3157 ctx->graph_table = new_atom_hash(64);
3158 add_atom_hash(ctx->graph_table, t->graph);
3159 }
3160
3161 return t;
3162 }
3163
3164
3165 static int
load_magic(IOSTREAM * in)3166 load_magic(IOSTREAM *in)
3167 { char *s = SAVE_MAGIC;
3168
3169 for( ; *s; s++)
3170 { if ( Sgetc(in) != *s )
3171 return FALSE;
3172 }
3173
3174 return TRUE;
3175 }
3176
3177
3178 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3179 Note that we have two types of saved states. One holding many named
3180 graphs and one holding the content of exactly one named graph.
3181 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3182
3183 #define LOAD_ERROR ((triple*)(intptr_t)-1)
3184
3185 static triple *
load_db(rdf_db * db,IOSTREAM * in,ld_context * ctx)3186 load_db(rdf_db *db, IOSTREAM *in, ld_context *ctx)
3187 { int version;
3188 int c;
3189 triple *list = NULL, *tail = NULL;
3190
3191 if ( !load_magic(in) )
3192 return LOAD_ERROR;
3193 version = (int)load_int(in);
3194
3195 while((c=Sgetc(in)) != EOF)
3196 { switch(c)
3197 { case 'T':
3198 { triple *t;
3199
3200 if ( !(t=load_triple(db, in, ctx)) )
3201 return FALSE;
3202
3203 if ( tail )
3204 { tail->next[BY_NONE] = t;
3205 tail = t;
3206 } else
3207 { list = tail = t;
3208 }
3209
3210 break;
3211 }
3212 /* file holding exactly one graph */
3213 case 'S': /* name of the graph */
3214 { ctx->graph = load_atom(db, in, ctx);
3215 break;
3216 }
3217 case 'M': /* MD5 of the graph */
3218 { int i;
3219
3220 for(i=0; i<16; i++)
3221 ctx->digest[i] = Sgetc(in);
3222 ctx->has_digest = TRUE;
3223
3224 break;
3225 }
3226 case 'F': /* file of the graph */
3227 ctx->graph_source = load_atom(db, in, ctx);
3228 break; /* end of one-graph handling */
3229 case 't':
3230 load_double(in, &ctx->modified);
3231 break;
3232 case 'E': /* end of file */
3233 return list;
3234 default:
3235 break;
3236 }
3237 }
3238
3239 PL_warning("Illegal RDF triple file");
3240
3241 return LOAD_ERROR;
3242 }
3243
3244
3245 static int
link_loaded_triples(rdf_db * db,triple * t,ld_context * ctx)3246 link_loaded_triples(rdf_db *db, triple *t, ld_context *ctx)
3247 { long created0 = db->created;
3248 graph *graph;
3249
3250 if ( ctx->graph ) /* lookup named graph */
3251 { graph = lookup_graph(db, ctx->graph, TRUE);
3252 if ( ctx->graph_source && graph->source != ctx->graph_source )
3253 { if ( graph->source )
3254 PL_unregister_atom(graph->source);
3255 graph->source = ctx->graph_source;
3256 PL_register_atom(graph->source);
3257 graph->modified = ctx->modified;
3258 }
3259
3260 if ( ctx->has_digest )
3261 { if ( graph->md5 )
3262 { if ( db->tr_first )
3263 { record_md5_transaction(db, graph, NULL);
3264 } else
3265 { graph->md5 = FALSE; /* kill repetitive MD5 update */
3266 }
3267 } else
3268 { ctx->has_digest = FALSE;
3269 }
3270 }
3271 } else
3272 { graph = NULL;
3273 }
3274
3275
3276 if ( db->tr_first ) /* loading in a transaction */
3277 { triple *next;
3278
3279 for( ; t; t = next )
3280 { next = t->next[BY_NONE];
3281
3282 t->next[BY_NONE] = NULL;
3283 lock_atoms(t);
3284 record_transaction(db, TR_ASSERT, t);
3285 }
3286 } else
3287 { triple *next;
3288
3289 for( ; t; t = next )
3290 { next = t->next[BY_NONE];
3291
3292 t->next[BY_NONE] = NULL;
3293 lock_atoms(t);
3294 if ( link_triple_silent(db, t) )
3295 broadcast(EV_ASSERT_LOAD, t, NULL);
3296 }
3297 }
3298
3299 /* update the graph info */
3300 if ( ctx->has_digest )
3301 { if ( db->tr_first )
3302 { md5_byte_t *d = rdf_malloc(db, sizeof(ctx->digest));
3303 memcpy(d, ctx->digest, sizeof(ctx->digest));
3304 record_md5_transaction(db, graph, d);
3305 } else
3306 { sum_digest(graph->digest, ctx->digest);
3307 }
3308 graph->md5 = TRUE;
3309 }
3310
3311 db->generation += (db->created-created0);
3312
3313 return TRUE;
3314 }
3315
3316
3317 static int
append_graph_to_list(ptr_hash_node * node,void * closure)3318 append_graph_to_list(ptr_hash_node *node, void *closure)
3319 { atom_t graph = (atom_t)node->value;
3320 term_t tail = (term_t)closure;
3321 term_t head = PL_new_term_ref();
3322 int rc;
3323
3324 rc = (PL_unify_list(tail, head, tail) &&
3325 PL_unify_atom(head, graph));
3326 PL_reset_term_refs(head);
3327
3328 return rc;
3329 }
3330
3331
3332 static foreign_t
rdf_load_db(term_t stream,term_t id,term_t graphs)3333 rdf_load_db(term_t stream, term_t id, term_t graphs)
3334 { ld_context ctx;
3335 rdf_db *db = DB;
3336 IOSTREAM *in;
3337 triple *list;
3338 int rc;
3339
3340 if ( !PL_get_stream_handle(stream, &in) )
3341 return type_error(stream, "stream");
3342
3343 memset(&ctx, 0, sizeof(ctx));
3344 if ( (list=load_db(db, in, &ctx)) == LOAD_ERROR )
3345 return FALSE;
3346
3347 if ( !WRLOCK(db, FALSE) )
3348 return FALSE;
3349 broadcast(EV_LOAD, (void*)id, (void*)ATOM_begin);
3350
3351 if ( (rc=link_loaded_triples(db, list, &ctx)) )
3352 { if ( ctx.graph_table )
3353 { term_t tail = PL_copy_term_ref(graphs);
3354
3355 rc = ( for_atom_hash(ctx.graph_table, append_graph_to_list, (void*)tail) &&
3356 PL_unify_nil(tail) );
3357
3358 destroy_atom_hash(ctx.graph_table);
3359 } else
3360 { rc = PL_unify_atom(graphs, ctx.graph);
3361 }
3362 }
3363 broadcast(EV_LOAD, (void*)id, (void*)ATOM_end);
3364 WRUNLOCK(db);
3365
3366 PL_release_stream(in);
3367 if ( ctx.loaded_atoms )
3368 { atom_t *ap, *ep;
3369
3370 for(ap=ctx.loaded_atoms, ep=ap+ctx.loaded_id; ap<ep; ap++)
3371 PL_unregister_atom(*ap);
3372
3373 rdf_free(db, ctx.loaded_atoms, sizeof(atom_t)*ctx.atoms_size);
3374 }
3375
3376 return rc;
3377 }
3378
3379
3380 #ifdef WITH_MD5
3381 /*******************************
3382 * MD5 SUPPORT *
3383 *******************************/
3384
3385 /* md5_type is used to keep the MD5 independent from the internal
3386 numbers
3387 */
3388 static const char md5_type[] =
3389 { 0x0, /* OBJ_UNKNOWN */
3390 0x3, /* OBJ_INTEGER */
3391 0x4, /* OBJ_DOUBLE */
3392 0x2, /* OBJ_STRING */
3393 0x5 /* OBJ_TERM */
3394 };
3395
3396 static void
md5_triple(triple * t,md5_byte_t * digest)3397 md5_triple(triple *t, md5_byte_t *digest)
3398 { md5_state_t state;
3399 size_t len;
3400 md5_byte_t tmp[2];
3401 const char *s;
3402 literal *lit;
3403
3404 md5_init(&state);
3405 s = PL_blob_data(t->subject, &len, NULL);
3406 md5_append(&state, (const md5_byte_t *)s, (int)len);
3407 md5_append(&state, (const md5_byte_t *)"P", 1);
3408 s = PL_blob_data(t->predicate.r->name, &len, NULL);
3409 md5_append(&state, (const md5_byte_t *)s, (int)len);
3410 tmp[0] = 'O';
3411 if ( t->object_is_literal )
3412 { lit = t->object.literal;
3413 tmp[1] = md5_type[lit->objtype];
3414
3415 switch(lit->objtype)
3416 { case OBJ_STRING:
3417 s = PL_blob_data(lit->value.string, &len, NULL);
3418 break;
3419 case OBJ_INTEGER: /* TBD: byte order issues */
3420 s = (const char *)&lit->value.integer;
3421 len = sizeof(lit->value.integer);
3422 break;
3423 case OBJ_DOUBLE:
3424 s = (const char *)&lit->value.real;
3425 len = sizeof(lit->value.real);
3426 break;
3427 case OBJ_TERM:
3428 s = (const char *)lit->value.term.record;
3429 len = lit->value.term.len;
3430 break;
3431 default:
3432 assert(0);
3433 }
3434 } else
3435 { s = PL_blob_data(t->object.resource, &len, NULL);
3436 tmp[1] = 0x1; /* old OBJ_RESOURCE */
3437 lit = NULL;
3438 }
3439 md5_append(&state, tmp, 2);
3440 md5_append(&state, (const md5_byte_t *)s, (int)len);
3441 if ( lit && lit->qualifier )
3442 { assert(lit->type_or_lang);
3443 md5_append(&state,
3444 (const md5_byte_t *)(lit->qualifier == Q_LANG ? "l" : "t"),
3445 1);
3446 s = PL_blob_data(lit->type_or_lang, &len, NULL);
3447 md5_append(&state, (const md5_byte_t *)s, (int)len);
3448 }
3449 if ( t->graph )
3450 { md5_append(&state, (const md5_byte_t *)"S", 1);
3451 s = PL_blob_data(t->graph, &len, NULL);
3452 md5_append(&state, (const md5_byte_t *)s, (int)len);
3453 }
3454
3455 md5_finish(&state, digest);
3456 }
3457
3458
3459 static void
sum_digest(md5_byte_t * digest,md5_byte_t * add)3460 sum_digest(md5_byte_t *digest, md5_byte_t *add)
3461 { md5_byte_t *p, *q;
3462 int n;
3463
3464 for(p=digest, q=add, n=16; --n>=0; )
3465 *p++ += *q++;
3466 }
3467
3468
3469 static void
dec_digest(md5_byte_t * digest,md5_byte_t * add)3470 dec_digest(md5_byte_t *digest, md5_byte_t *add)
3471 { md5_byte_t *p, *q;
3472 int n;
3473
3474 for(p=digest, q=add, n=16; --n>=0; )
3475 *p++ -= *q++;
3476 }
3477
3478
3479 static int
md5_unify_digest(term_t t,md5_byte_t digest[16])3480 md5_unify_digest(term_t t, md5_byte_t digest[16])
3481 { char hex_output[16*2];
3482 int di;
3483 char *pi;
3484 static char hexd[] = "0123456789abcdef";
3485
3486 for(pi=hex_output, di = 0; di < 16; ++di)
3487 { *pi++ = hexd[(digest[di] >> 4) & 0x0f];
3488 *pi++ = hexd[digest[di] & 0x0f];
3489 }
3490
3491 return PL_unify_atom_nchars(t, 16*2, hex_output);
3492 }
3493
3494
3495 static foreign_t
rdf_md5(term_t graph_name,term_t md5)3496 rdf_md5(term_t graph_name, term_t md5)
3497 { atom_t src;
3498 int rc;
3499 rdf_db *db = DB;
3500
3501 if ( !get_atom_or_var_ex(graph_name, &src) )
3502 return FALSE;
3503
3504 if ( src )
3505 { graph *s;
3506
3507 if ( !RDLOCK(db) )
3508 return FALSE;
3509 if ( (s = lookup_graph(db, src, FALSE)) )
3510 { rc = md5_unify_digest(md5, s->digest);
3511 } else
3512 { md5_byte_t digest[16];
3513
3514 memset(digest, 0, sizeof(digest));
3515 rc = md5_unify_digest(md5, digest);
3516 }
3517 RDUNLOCK(db);
3518 } else
3519 { md5_byte_t digest[16];
3520 graph **ht;
3521 int i;
3522
3523 memset(&digest, 0, sizeof(digest));
3524
3525 if ( !RDLOCK(db) )
3526 return FALSE;
3527
3528 for(i=0,ht = db->graph_table; i<db->graph_table_size; i++, ht++)
3529 { graph *s;
3530
3531 for( s = *ht; s; s = s->next )
3532 sum_digest(digest, s->digest);
3533 }
3534
3535 rc = md5_unify_digest(md5, digest);
3536 RDUNLOCK(db);
3537 }
3538
3539 return rc;
3540 }
3541
3542
3543 static foreign_t
rdf_atom_md5(term_t text,term_t times,term_t md5)3544 rdf_atom_md5(term_t text, term_t times, term_t md5)
3545 { char *s;
3546 int n, i;
3547 size_t len;
3548 md5_byte_t digest[16];
3549
3550 if ( !PL_get_nchars(text, &len, &s, CVT_ALL) )
3551 return type_error(text, "text");
3552 if ( !PL_get_integer(times, &n) )
3553 return type_error(times, "integer");
3554 if ( n < 1 )
3555 return domain_error(times, "positive_integer");
3556
3557 for(i=0; i<n; i++)
3558 { md5_state_t state;
3559 md5_init(&state);
3560 md5_append(&state, (const md5_byte_t *)s, (int)len);
3561 md5_finish(&state, digest);
3562 s = (char *)digest;
3563 len = sizeof(digest);
3564 }
3565
3566 return md5_unify_digest(md5, digest);
3567 }
3568
3569
3570
3571 #endif /*WITH_MD5*/
3572
3573
3574 /*******************************
3575 * ATOMS *
3576 *******************************/
3577
3578 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3579 Lock atoms in triple against AGC. Note that the predicate name is locked
3580 in the predicate structure.
3581 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3582
3583 static void
lock_atoms(triple * t)3584 lock_atoms(triple *t)
3585 { if ( !t->atoms_locked )
3586 { t->atoms_locked = TRUE;
3587
3588 PL_register_atom(t->subject);
3589 if ( t->object_is_literal )
3590 { lock_atoms_literal(t->object.literal);
3591 } else
3592 { PL_register_atom(t->object.resource);
3593 }
3594 }
3595 }
3596
3597
3598 static void
unlock_atoms(triple * t)3599 unlock_atoms(triple *t)
3600 { if ( t->atoms_locked )
3601 { t->atoms_locked = FALSE;
3602
3603 PL_unregister_atom(t->subject);
3604 if ( !t->object_is_literal )
3605 { PL_unregister_atom(t->object.resource);
3606 }
3607 }
3608 }
3609
3610
3611 /*******************************
3612 * PROLOG CONVERSION *
3613 *******************************/
3614
3615 #define LIT_TYPED 0x1
3616 #define LIT_NOERROR 0x2
3617 #define LIT_PARTIAL 0x4
3618
3619 static int
get_lit_atom_ex(term_t t,atom_t * a,int flags)3620 get_lit_atom_ex(term_t t, atom_t *a, int flags)
3621 { if ( PL_get_atom(t, a) )
3622 return TRUE;
3623 if ( (flags & LIT_PARTIAL) && PL_is_variable(t) )
3624 { *a = 0L;
3625 return TRUE;
3626 }
3627
3628 return type_error(t, "atom");
3629 }
3630
3631
3632 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3633 get_literal() processes the argument of a literal/1 term passes as
3634 object.
3635 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3636
3637 static int
get_literal(rdf_db * db,term_t litt,triple * t,int flags)3638 get_literal(rdf_db *db, term_t litt, triple *t, int flags)
3639 { literal *lit;
3640
3641 alloc_literal_triple(db, t);
3642 lit = t->object.literal;
3643
3644 if ( PL_get_atom(litt, &lit->value.string) )
3645 { lit->objtype = OBJ_STRING;
3646 } else if ( PL_is_integer(litt) && PL_get_int64(litt, &lit->value.integer) )
3647 { lit->objtype = OBJ_INTEGER;
3648 } else if ( PL_get_float(litt, &lit->value.real) )
3649 { lit->objtype = OBJ_DOUBLE;
3650 } else if ( PL_is_functor(litt, FUNCTOR_lang2) )
3651 { term_t a = PL_new_term_ref();
3652
3653 _PL_get_arg(1, litt, a);
3654 if ( !get_lit_atom_ex(a, &lit->type_or_lang, flags) )
3655 return FALSE;
3656 _PL_get_arg(2, litt, a);
3657 if ( !get_lit_atom_ex(a, &lit->value.string, flags) )
3658 return FALSE;
3659
3660 lit->qualifier = Q_LANG;
3661 lit->objtype = OBJ_STRING;
3662 } else if ( PL_is_functor(litt, FUNCTOR_type2) &&
3663 !(flags & LIT_TYPED) ) /* avoid recursion */
3664 { term_t a = PL_new_term_ref();
3665
3666 _PL_get_arg(1, litt, a);
3667 if ( !get_lit_atom_ex(a, &lit->type_or_lang, flags) )
3668 return FALSE;
3669 lit->qualifier = Q_TYPE;
3670 _PL_get_arg(2, litt, a);
3671
3672 return get_literal(db, a, t, LIT_TYPED|flags);
3673 } else if ( !PL_is_ground(litt) )
3674 { if ( !(flags & LIT_PARTIAL) )
3675 return type_error(litt, "rdf_object");
3676 if ( !PL_is_variable(litt) )
3677 lit->objtype = OBJ_TERM;
3678 } else
3679 { lit->value.term.record = PL_record_external(litt, &lit->value.term.len);
3680 lit->objtype = OBJ_TERM;
3681 }
3682
3683 return TRUE;
3684 }
3685
3686
3687 static int
get_object(rdf_db * db,term_t object,triple * t)3688 get_object(rdf_db *db, term_t object, triple *t)
3689 { if ( PL_get_atom(object, &t->object.resource) )
3690 { assert(!t->object_is_literal);
3691 } else if ( PL_is_functor(object, FUNCTOR_literal1) )
3692 { term_t a = PL_new_term_ref();
3693
3694 _PL_get_arg(1, object, a);
3695 return get_literal(db, a, t, 0);
3696 } else
3697 return type_error(object, "rdf_object");
3698
3699 return TRUE;
3700 }
3701
3702
3703 static int
get_src(term_t src,triple * t)3704 get_src(term_t src, triple *t)
3705 { if ( src && !PL_is_variable(src) )
3706 { if ( PL_get_atom(src, &t->graph) )
3707 { t->line = NO_LINE;
3708 } else if ( PL_is_functor(src, FUNCTOR_colon2) )
3709 { term_t a = PL_new_term_ref();
3710 long line;
3711
3712 _PL_get_arg(1, src, a);
3713 if ( !get_atom_or_var_ex(a, &t->graph) )
3714 return FALSE;
3715 _PL_get_arg(2, src, a);
3716 if ( PL_get_long(a, &line) )
3717 t->line = line;
3718 else if ( !PL_is_variable(a) )
3719 return type_error(a, "integer");
3720 } else
3721 return type_error(src, "rdf_graph");
3722 }
3723
3724 return TRUE;
3725 }
3726
3727
3728 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3729 Return values:
3730 -1: exception
3731 0: no predicate
3732 1: the predicate
3733 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3734
3735 static int
get_existing_predicate(rdf_db * db,term_t t,predicate ** p)3736 get_existing_predicate(rdf_db *db, term_t t, predicate **p)
3737 { atom_t name;
3738
3739 if ( !PL_get_atom(t, &name ) )
3740 { if ( PL_is_functor(t, FUNCTOR_literal1) )
3741 return 0; /* rdf(_, literal(_), _) */
3742 return type_error(t, "atom");
3743 }
3744
3745 if ( (*p = existing_predicate(db, name)) )
3746 return 1;
3747
3748 DEBUG(5, Sdprintf("No predicate %s\n", PL_atom_chars(name)));
3749 return 0; /* no predicate */
3750 }
3751
3752
3753 static int
get_predicate(rdf_db * db,term_t t,predicate ** p)3754 get_predicate(rdf_db *db, term_t t, predicate **p)
3755 { atom_t name;
3756
3757 if ( !get_atom_ex(t, &name ) )
3758 return FALSE;
3759
3760 *p = lookup_predicate(db, name);
3761 return TRUE;
3762 }
3763
3764
3765 static int
get_triple(rdf_db * db,term_t subject,term_t predicate,term_t object,triple * t)3766 get_triple(rdf_db *db,
3767 term_t subject, term_t predicate, term_t object,
3768 triple *t)
3769 { if ( !get_atom_ex(subject, &t->subject) ||
3770 !get_predicate(db, predicate, &t->predicate.r) ||
3771 !get_object(db, object, t) )
3772 return FALSE;
3773
3774 return TRUE;
3775 }
3776
3777
3778 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3779 get_partial_triple() creates a triple for matching purposes. It can
3780 return FALSE for two reasons. Mostly (type) errors, but also if
3781 resources are accessed that do not exist and therefore the subsequent
3782 matching will always fail. This is notably the case for predicates,
3783 which are first class citizens to this library.
3784
3785 Return values:
3786 1: ok
3787 0: no predicate
3788 -1: error
3789 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3790
3791 static int
get_partial_triple(rdf_db * db,term_t subject,term_t predicate,term_t object,term_t src,triple * t)3792 get_partial_triple(rdf_db *db,
3793 term_t subject, term_t predicate, term_t object,
3794 term_t src, triple *t)
3795 { int rc;
3796
3797 if ( subject && !get_resource_or_var_ex(subject, &t->subject) )
3798 return FALSE;
3799 if ( !PL_is_variable(predicate) &&
3800 (rc=get_existing_predicate(db, predicate, &t->predicate.r)) != 1 )
3801 return rc;
3802 /* the object */
3803 if ( object && !PL_is_variable(object) )
3804 { if ( PL_get_atom(object, &t->object.resource) )
3805 { assert(!t->object_is_literal);
3806 } else if ( PL_is_functor(object, FUNCTOR_literal1) )
3807 { term_t a = PL_new_term_ref();
3808
3809 _PL_get_arg(1, object, a);
3810 if ( !get_literal(db, a, t, LIT_PARTIAL) )
3811 return FALSE;
3812 } else if ( PL_is_functor(object, FUNCTOR_literal2) )
3813 { term_t a = PL_new_term_ref();
3814 literal *lit;
3815
3816 alloc_literal_triple(db, t);
3817 lit = t->object.literal;
3818
3819 _PL_get_arg(1, object, a);
3820 if ( PL_is_functor(a, FUNCTOR_exact1) )
3821 t->match = STR_MATCH_EXACT;
3822 else if ( PL_is_functor(a, FUNCTOR_plain1) )
3823 t->match = STR_MATCH_PLAIN;
3824 else if ( PL_is_functor(a, FUNCTOR_substring1) )
3825 t->match = STR_MATCH_SUBSTRING;
3826 else if ( PL_is_functor(a, FUNCTOR_word1) )
3827 t->match = STR_MATCH_WORD;
3828 else if ( PL_is_functor(a, FUNCTOR_prefix1) )
3829 t->match = STR_MATCH_PREFIX;
3830 else if ( PL_is_functor(a, FUNCTOR_like1) )
3831 t->match = STR_MATCH_LIKE;
3832 else
3833 return domain_error(a, "match_type");
3834
3835 _PL_get_arg(1, a, a);
3836 if ( !get_atom_or_var_ex(a, &lit->value.string) )
3837 return FALSE;
3838 lit->objtype = OBJ_STRING;
3839 } else
3840 return type_error(object, "rdf_object");
3841 }
3842 /* the graph */
3843 if ( !get_src(src, t) )
3844 return FALSE;
3845
3846 if ( t->subject )
3847 t->indexed |= BY_S;
3848 if ( t->predicate.r )
3849 t->indexed |= BY_P;
3850 if ( t->object_is_literal )
3851 { literal *lit = t->object.literal;
3852
3853 if ( lit->objtype == OBJ_STRING &&
3854 lit->value.string &&
3855 t->match <= STR_MATCH_EXACT )
3856 t->indexed |= BY_O;
3857 } else if ( t->object.resource )
3858 t->indexed |= BY_O;
3859
3860 db->indexed[t->indexed]++; /* statistics */
3861
3862 switch(t->indexed)
3863 { case BY_SPO:
3864 t->indexed = BY_SP;
3865 break;
3866 case BY_SO:
3867 t->indexed = BY_S;
3868 break;
3869 }
3870
3871 return TRUE;
3872 }
3873
3874
3875 static int
inverse_partial_triple(triple * t)3876 inverse_partial_triple(triple *t)
3877 { predicate *i = 0;
3878
3879 if ( !t->inversed &&
3880 (!t->predicate.r || (i=t->predicate.r->inverse_of)) &&
3881 !t->object_is_literal )
3882 { atom_t o = t->object.resource;
3883
3884 t->object.resource = t->subject;
3885 t->subject = o;
3886
3887 if ( t->predicate.r )
3888 t->predicate.r = i;
3889
3890 t->indexed = by_inverse[t->indexed];
3891 t->inversed = TRUE;
3892
3893 return TRUE;
3894 }
3895
3896 return FALSE;
3897 }
3898
3899
3900 static int
get_graph(term_t src,triple * t)3901 get_graph(term_t src, triple *t)
3902 { if ( PL_get_atom(src, &t->graph) )
3903 { t->line = NO_LINE;
3904 return TRUE;
3905 }
3906
3907 if ( PL_is_functor(src, FUNCTOR_colon2) )
3908 { term_t a = PL_new_term_ref();
3909 long line;
3910
3911 _PL_get_arg(1, src, a);
3912 if ( !get_atom_ex(a, &t->graph) )
3913 return FALSE;
3914 _PL_get_arg(2, src, a);
3915 if ( !get_long_ex(a, &line) )
3916 return FALSE;
3917 t->line = line;
3918
3919 return TRUE;
3920 }
3921
3922 return type_error(src, "rdf_graph");
3923 }
3924
3925
3926 static int
unify_graph(term_t src,triple * t)3927 unify_graph(term_t src, triple *t)
3928 { switch( PL_term_type(src) )
3929 { case PL_VARIABLE:
3930 { if ( t->line == NO_LINE )
3931 return PL_unify_atom(src, t->graph);
3932 else
3933 goto full_term;
3934 }
3935 case PL_ATOM:
3936 { atom_t a;
3937 return (PL_get_atom(src, &a) &&
3938 a == t->graph);
3939 }
3940 case PL_TERM:
3941 { if ( t->line == NO_LINE )
3942 { return PL_unify_term(src,
3943 PL_FUNCTOR, FUNCTOR_colon2,
3944 PL_ATOM, t->graph,
3945 PL_VARIABLE);
3946 } else
3947 { full_term:
3948 return PL_unify_term(src,
3949 PL_FUNCTOR, FUNCTOR_colon2,
3950 PL_ATOM, t->graph,
3951 PL_LONG, t->line);
3952 }
3953 }
3954 default:
3955 return type_error(src, "rdf_graph");
3956 }
3957 }
3958
3959
3960 static int
same_graph(triple * t1,triple * t2)3961 same_graph(triple *t1, triple *t2)
3962 { return t1->line == t2->line &&
3963 t1->graph == t2->graph;
3964 }
3965
3966
3967
3968 static int
put_literal_value(term_t v,literal * lit)3969 put_literal_value(term_t v, literal *lit)
3970 { switch(lit->objtype)
3971 { case OBJ_STRING:
3972 PL_put_atom(v, lit->value.string);
3973 break;
3974 case OBJ_INTEGER:
3975 PL_put_variable(v);
3976 return PL_unify_int64(v, lit->value.integer);
3977 case OBJ_DOUBLE:
3978 return PL_put_float(v, lit->value.real);
3979 case OBJ_TERM:
3980 return PL_recorded_external(lit->value.term.record, v);
3981 default:
3982 assert(0);
3983 return FALSE;
3984 }
3985
3986 return TRUE;
3987 }
3988
3989
3990 static int
unify_literal(term_t lit,literal * l)3991 unify_literal(term_t lit, literal *l)
3992 { term_t v = PL_new_term_ref();
3993
3994 if ( !put_literal_value(v, l) )
3995 return FALSE;
3996
3997 if ( l->qualifier )
3998 { functor_t qf;
3999
4000 assert(l->type_or_lang);
4001
4002 if ( l->qualifier == Q_LANG )
4003 qf = FUNCTOR_lang2;
4004 else
4005 qf = FUNCTOR_type2;
4006
4007 if ( PL_unify_term(lit, PL_FUNCTOR, qf,
4008 PL_ATOM, l->type_or_lang,
4009 PL_TERM, v) )
4010 return TRUE;
4011
4012 return PL_unify(lit, v); /* allow rdf(X, Y, literal(foo)) */
4013 } else if ( PL_unify(lit, v) )
4014 { return TRUE;
4015 } else if ( PL_is_functor(lit, FUNCTOR_lang2) &&
4016 l->objtype == OBJ_STRING )
4017 { term_t a = PL_new_term_ref();
4018 _PL_get_arg(2, lit, a);
4019 return PL_unify(a, v);
4020 } else if ( PL_is_functor(lit, FUNCTOR_type2) )
4021 { term_t a = PL_new_term_ref();
4022 _PL_get_arg(2, lit, a);
4023 return PL_unify(a, v);
4024 } else
4025 return FALSE;
4026 }
4027
4028
4029
4030 static int
unify_object(term_t object,triple * t)4031 unify_object(term_t object, triple *t)
4032 { if ( t->object_is_literal )
4033 { term_t lit = PL_new_term_ref();
4034
4035 if ( PL_unify_functor(object, FUNCTOR_literal1) )
4036 _PL_get_arg(1, object, lit);
4037 else if ( PL_is_functor(object, FUNCTOR_literal2) )
4038 _PL_get_arg(2, object, lit);
4039 else
4040 return FALSE;
4041
4042 return unify_literal(lit, t->object.literal);
4043 } else
4044 { return PL_unify_atom(object, t->object.resource);
4045 }
4046 }
4047
4048
4049 static int
unify_triple(term_t subject,term_t pred,term_t object,term_t src,triple * t,int inversed)4050 unify_triple(term_t subject, term_t pred, term_t object,
4051 term_t src, triple *t, int inversed)
4052 { predicate *p = t->predicate.r;
4053 fid_t fid;
4054
4055 if ( inversed )
4056 { term_t tmp = object;
4057 object = subject;
4058 subject = tmp;
4059
4060 if ( !(p = p->inverse_of) )
4061 return FALSE;
4062 }
4063
4064 fid = PL_open_foreign_frame();
4065
4066 if ( !PL_unify_atom(subject, t->subject) ||
4067 !PL_unify_atom(pred, p->name) ||
4068 !unify_object(object, t) ||
4069 (src && !unify_graph(src, t)) )
4070 { PL_discard_foreign_frame(fid);
4071 return FALSE;
4072 } else
4073 { PL_close_foreign_frame(fid);
4074 return TRUE;
4075 }
4076 }
4077
4078
4079 /*******************************
4080 * DUBLICATE HANDLING *
4081 *******************************/
4082
4083 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4084 According to the RDF specs, duplicate triples have no meaning, but they
4085 slow down search and often produce duplicate results in search. Worse,
4086 some coding styles proposed in the OWL documents introduce huge amounts
4087 of duplicate triples. We cannot simply ignore a triple if it is a
4088 duplicate as a subsequent retract would delete the final triple. For
4089 example, after loading two files that contain the same triple and
4090 unloading one of these files the database would be left without triples.
4091
4092 In our solution, if a triple is added as a duplicate, it is flagged such
4093 using the flag is_duplicate. The `principal' triple has a count
4094 `duplicates', indicating the number of duplicate triples in the
4095 database.
4096
4097 It might make sense to introduce the BY_SPO table as fully indexed
4098 lookups are frequent with the introduction of duplicate detection.
4099
4100 (*) Iff too many triples are added, it may be time to enlarge the
4101 hashtable. Note that we do not call update_hash() blindly as this would
4102 cause each triple that modifies the predicate hierarchy to force a
4103 rehash. As we are not searching using subPropertyOf semantics during the
4104 duplicate update, there is no point updating. If it is incorrect it will
4105 be updated on the first real query.
4106 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4107
4108
4109 static int
update_duplicates_add(rdf_db * db,triple * t)4110 update_duplicates_add(rdf_db *db, triple *t)
4111 { triple *d;
4112 const int indexed = BY_SP;
4113
4114 assert(t->is_duplicate == FALSE);
4115 assert(t->duplicates == 0);
4116
4117 if ( WANT_GC(db) ) /* (*) See above */
4118 update_hash(db);
4119 d = db->table[indexed][triple_hash(db, t, indexed)];
4120 for( ; d && d != t; d = d->next[indexed] )
4121 { if ( match_triples(d, t, MATCH_DUPLICATE) )
4122 { t->is_duplicate = TRUE;
4123 assert( !d->is_duplicate );
4124
4125 d->duplicates++;
4126
4127 DEBUG(2,
4128 print_triple(t, PRT_SRC);
4129 Sdprintf(" %p: %d-th duplicate: ", t, d->duplicates);
4130 Sdprintf("Principal: %p at", d);
4131 print_src(d);
4132 Sdprintf("\n"));
4133
4134 assert(d->duplicates); /* check overflow */
4135 db->duplicates++;
4136 return TRUE;
4137 }
4138 }
4139
4140 return FALSE;
4141 }
4142
4143
4144 static void /* t is about to be deleted */
update_duplicates_del(rdf_db * db,triple * t)4145 update_duplicates_del(rdf_db *db, triple *t)
4146 { const int indexed = BY_SP;
4147
4148 if ( t->duplicates ) /* I am the principal one */
4149 { triple *d;
4150
4151 DEBUG(2,
4152 print_triple(t, PRT_SRC);
4153 Sdprintf(": DEL principal %p, %d duplicates: ", t, t->duplicates));
4154
4155 db->duplicates--;
4156 d = db->table[indexed][triple_hash(db, t, indexed)];
4157 for( ; d; d = d->next[indexed] )
4158 { if ( d != t && match_triples(d, t, MATCH_DUPLICATE) )
4159 { assert(d->is_duplicate);
4160 d->is_duplicate = FALSE;
4161 d->duplicates = t->duplicates-1;
4162 DEBUG(2,
4163 Sdprintf("New principal: %p at", d);
4164 print_src(d);
4165 Sdprintf("\n"));
4166
4167 return;
4168 }
4169 }
4170 assert(0);
4171 } else if ( t->is_duplicate ) /* I am a duplicate */
4172 { triple *d;
4173
4174 DEBUG(2,
4175 print_triple(t, PRT_SRC);
4176 Sdprintf(": DEL: is a duplicate: "));
4177
4178 db->duplicates--;
4179 d = db->table[indexed][triple_hash(db, t, indexed)];
4180 for( ; d; d = d->next[indexed] )
4181 { if ( d != t && match_triples(d, t, MATCH_DUPLICATE) )
4182 { if ( d->duplicates )
4183 { d->duplicates--;
4184 DEBUG(2,
4185 Sdprintf("Principal %p at ", d);
4186 print_src(d);
4187 Sdprintf(" has %d duplicates\n", d->duplicates));
4188 return;
4189 }
4190 }
4191 }
4192 Sdprintf("FATAL\n");
4193 PL_halt(1);
4194 assert(0);
4195 }
4196 }
4197
4198
4199 /*******************************
4200 * TRANSACTIONS *
4201 *******************************/
4202
4203 static void
append_transaction(rdf_db * db,transaction_record * tr)4204 append_transaction(rdf_db *db, transaction_record *tr)
4205 { if ( db->tr_last )
4206 { tr->next = NULL;
4207 tr->previous = db->tr_last;
4208 db->tr_last->next = tr;
4209 db->tr_last = tr;
4210 } else
4211 { tr->next = tr->previous = NULL;
4212 db->tr_first = db->tr_last = tr;
4213 }
4214 }
4215
4216
4217 static void
open_transaction(rdf_db * db)4218 open_transaction(rdf_db *db)
4219 { transaction_record *tr = rdf_malloc(db, sizeof(*tr));
4220
4221 memset(tr, 0, sizeof(*tr));
4222 tr->type = TR_MARK;
4223
4224 if ( db->tr_first )
4225 db->tr_nesting++;
4226 else
4227 db->tr_nesting = 0;
4228
4229 append_transaction(db, tr);
4230 }
4231
4232
4233 static void
record_transaction(rdf_db * db,tr_type type,triple * t)4234 record_transaction(rdf_db *db, tr_type type, triple *t)
4235 { transaction_record *tr = rdf_malloc(db, sizeof(*tr));
4236
4237 memset(tr, 0, sizeof(*tr));
4238 tr->type = type;
4239 tr->triple = t;
4240
4241 append_transaction(db, tr);
4242 }
4243
4244
4245 static void
record_md5_transaction(rdf_db * db,graph * src,md5_byte_t * digest)4246 record_md5_transaction(rdf_db *db, graph *src, md5_byte_t *digest)
4247 { transaction_record *tr = rdf_malloc(db, sizeof(*tr));
4248
4249 memset(tr, 0, sizeof(*tr));
4250 tr->type = TR_UPDATE_MD5,
4251 tr->update.md5.graph = src;
4252 tr->update.md5.digest = digest;
4253
4254 append_transaction(db, tr);
4255 }
4256
4257
4258 static void
record_update_transaction(rdf_db * db,triple * t,triple * new)4259 record_update_transaction(rdf_db *db, triple *t, triple *new)
4260 { transaction_record *tr = rdf_malloc(db, sizeof(*tr));
4261
4262 memset(tr, 0, sizeof(*tr));
4263 tr->type = TR_UPDATE,
4264 tr->triple = t;
4265 tr->update.triple = new;
4266
4267 append_transaction(db, tr);
4268 }
4269
4270
4271 static void
record_update_src_transaction(rdf_db * db,triple * t,atom_t src,unsigned long line)4272 record_update_src_transaction(rdf_db *db, triple *t,
4273 atom_t src, unsigned long line)
4274 { transaction_record *tr = rdf_malloc(db, sizeof(*tr));
4275
4276 memset(tr, 0, sizeof(*tr));
4277 tr->type = TR_UPDATE_SRC,
4278 tr->triple = t;
4279 tr->update.src.atom = src;
4280 tr->update.src.line = line;
4281
4282 append_transaction(db, tr);
4283 }
4284
4285
4286 static void
void_transaction(rdf_db * db,transaction_record * tr)4287 void_transaction(rdf_db *db, transaction_record *tr)
4288 { switch(tr->type)
4289 { case TR_ASSERT:
4290 free_triple(db, tr->triple);
4291 break;
4292 case TR_UPDATE:
4293 free_triple(db, tr->update.triple);
4294 break;
4295 case TR_UPDATE_MD5:
4296 if ( tr->update.md5.digest )
4297 rdf_free(db, tr->update.md5.digest, sizeof(*tr->update.md5.digest));
4298 break;
4299 default:
4300 break;
4301 }
4302
4303 tr->type = TR_VOID;
4304 }
4305
4306
4307 static void
free_transaction(rdf_db * db,transaction_record * tr)4308 free_transaction(rdf_db *db, transaction_record *tr)
4309 { void_transaction(db, tr);
4310
4311 rdf_free(db, tr, sizeof(*tr));
4312 }
4313
4314
4315 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4316 This must deal with multiple operations on the same triple. Most
4317 probably the most important thing is to merge update records. We must
4318 also make-up our mind with regard to updated records that are erased or
4319 records that are erased after updating, etc.
4320 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4321
4322 static void
clean_transaction(rdf_db * db,transaction_record * tr0)4323 clean_transaction(rdf_db *db, transaction_record *tr0)
4324 {
4325 #if 0
4326 transaction_record *tr;
4327
4328 for(tr=tr0; tr; tr=tr->next)
4329 { if ( TR_RETRACT )
4330 { transaction_record *tr2;
4331
4332 for(tr2=tr->next; tr2; tr2=tr2->next)
4333 { if ( tr2->triple == tr->triple )
4334 { switch(tr2->type)
4335 { case TR_RETRACT:
4336 case TR_UPDATE:
4337 case TR_UPDATE_SRC:
4338 void_transaction(db, tr2);
4339 default:
4340 ;
4341 }
4342 }
4343 }
4344 }
4345 }
4346 #endif
4347 }
4348
4349
4350 static void
truncate_transaction(rdf_db * db,transaction_record * last)4351 truncate_transaction(rdf_db *db, transaction_record *last)
4352 { db->tr_last = last;
4353 if ( last )
4354 { db->tr_last->next = NULL;
4355 } else
4356 { db->tr_first = NULL;
4357 }
4358 }
4359
4360 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4361 discard_transaction() simply destroys all actions in the last
4362 transaction.
4363 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4364
4365 static void
discard_transaction(rdf_db * db)4366 discard_transaction(rdf_db *db)
4367 { transaction_record *tr, *prev;
4368
4369 for(tr=db->tr_last; tr; tr = prev)
4370 { prev = tr->previous;
4371
4372 if ( tr->type == TR_SUB_END )
4373 { if ( tr->update.transaction_id )
4374 PL_erase(tr->update.transaction_id);
4375 }
4376
4377 if ( tr->type == TR_MARK )
4378 { rdf_free(db, tr, sizeof(*tr));
4379 truncate_transaction(db, prev);
4380 db->tr_nesting--;
4381 return;
4382 }
4383
4384 free_transaction(db, tr);
4385 }
4386 }
4387
4388
4389 int
put_begin_end(term_t t,functor_t be,int level)4390 put_begin_end(term_t t, functor_t be, int level)
4391 { term_t av;
4392
4393 return ( (av = PL_new_term_ref()) &&
4394 PL_put_integer(av, level) &&
4395 PL_cons_functor_v(t, be, av) );
4396 }
4397
4398
4399
4400
4401 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4402 Note (*) rdf-monitors can modify the database by opening new
4403 transactions. Therefore we first close the transaction to allow opening
4404 new ones. TBD: get this clear. Monitors have only restricted read
4405 access?
4406 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4407
4408 static int
commit_transaction_int(rdf_db * db,term_t id)4409 commit_transaction_int(rdf_db *db, term_t id)
4410 { transaction_record *tr, *next;
4411 int tr_level = 0; /* nesting level */
4412
4413 if ( db->tr_nesting > 0 ) /* commit nested transaction */
4414 { tr=db->tr_last;
4415
4416 if ( tr->type == TR_MARK ) /* empty nested transaction */
4417 { truncate_transaction(db, tr->previous);
4418 rdf_free(db, tr, sizeof(*tr));
4419 db->tr_nesting--;
4420
4421 return TRUE;
4422 }
4423
4424 for(; tr; tr = tr->previous) /* not the last (tested above) */
4425 { /* not the first (we are nested) */
4426 if ( tr->type == TR_MARK )
4427 { transaction_record *end = rdf_malloc(db, sizeof(*end));
4428
4429 memset(end, 0, sizeof(*end));
4430 end->type = TR_SUB_END;
4431 end->update.transaction_id = PL_record(id);
4432 append_transaction(db, end);
4433
4434 tr->type = TR_SUB_START;
4435 tr->update.transaction_id = end->update.transaction_id;
4436 db->tr_nesting--;
4437
4438 return TRUE;
4439 }
4440 }
4441
4442 assert(0);
4443 return FALSE;
4444 }
4445
4446 while( (tr=db->tr_first) ) /* See above (*) */
4447 { db->tr_first = db->tr_last = NULL;
4448
4449 clean_transaction(db, tr);
4450 /* real commit */
4451 for(; tr; tr = next)
4452 { next = tr->next;
4453
4454 switch(tr->type)
4455 { case TR_MARK:
4456 break;
4457 case TR_SUB_START:
4458 { term_t id = PL_new_term_ref();
4459 term_t be = PL_new_term_ref();
4460 if ( !PL_recorded(tr->update.transaction_id, id) ||
4461 !put_begin_end(be, FUNCTOR_begin1, ++tr_level) ||
4462 !broadcast(EV_TRANSACTION, (void*)id, (void*)be) )
4463 return FALSE;
4464 break;
4465 }
4466 case TR_SUB_END:
4467 { term_t id = PL_new_term_ref();
4468 term_t be = PL_new_term_ref();
4469 if ( !PL_recorded(tr->update.transaction_id, id) )
4470 return FALSE;
4471 PL_erase(tr->update.transaction_id);
4472 if ( !put_begin_end(be, FUNCTOR_end1, tr_level--) ||
4473 !broadcast(EV_TRANSACTION, (void*)id, (void*)be) )
4474 return FALSE;
4475 break;
4476 }
4477 case TR_ASSERT:
4478 link_triple(db, tr->triple);
4479 db->generation++;
4480 break;
4481 case TR_RETRACT:
4482 if ( !tr->triple->erased ) /* already erased */
4483 { erase_triple(db, tr->triple);
4484 db->generation++;
4485 }
4486 break;
4487 case TR_UPDATE:
4488 if ( !tr->triple->erased )
4489 { if ( !broadcast(EV_UPDATE, tr->triple, tr->update.triple) )
4490 return FALSE; /* TBD: how to handle? */
4491 if ( !tr->triple->erased )
4492 { erase_triple_silent(db, tr->triple);
4493 link_triple_silent(db, tr->update.triple);
4494 db->generation++;
4495 }
4496 }
4497 break;
4498 case TR_UPDATE_SRC:
4499 if ( !tr->triple->erased )
4500 { if ( tr->triple->graph != tr->update.src.atom )
4501 { if ( tr->triple->graph )
4502 unregister_graph(db, tr->triple);
4503 tr->triple->graph = tr->update.src.atom;
4504 if ( tr->triple->graph )
4505 register_graph(db, tr->triple);
4506 }
4507 tr->triple->line = tr->update.src.line;
4508 db->generation++;
4509 }
4510 break;
4511 case TR_UPDATE_MD5:
4512 { graph *src = tr->update.md5.graph;
4513 md5_byte_t *digest = tr->update.md5.digest;
4514 if ( digest )
4515 { sum_digest(digest, src->digest);
4516 src->md5 = TRUE;
4517 rdf_free(db, digest, sizeof(md5_byte_t)*16);
4518 } else
4519 { src->md5 = FALSE;
4520 }
4521 break;
4522 }
4523 case TR_RESET:
4524 db->tr_reset = FALSE;
4525 reset_db(db);
4526 break;
4527 case TR_VOID:
4528 break;
4529 default:
4530 assert(0);
4531 }
4532
4533 rdf_free(db, tr, sizeof(*tr));
4534 }
4535 }
4536
4537 return TRUE;
4538 }
4539
4540
4541 static int
commit_transaction(rdf_db * db,term_t id)4542 commit_transaction(rdf_db *db, term_t id)
4543 { int rc;
4544
4545 db->gc_blocked++;
4546 rc = commit_transaction_int(db, id);
4547 db->gc_blocked--;
4548
4549 return rc;
4550 }
4551
4552
4553 static foreign_t
rdf_transaction(term_t goal,term_t id)4554 rdf_transaction(term_t goal, term_t id)
4555 { int rc;
4556 rdf_db *db = DB;
4557 active_transaction me;
4558
4559 if ( !WRLOCK(db, TRUE) )
4560 return FALSE;
4561
4562 open_transaction(db);
4563 me.parent = db->tr_active;
4564 me.id = id;
4565 db->tr_active = &me;
4566
4567 rc = PL_call_predicate(NULL, PL_Q_PASS_EXCEPTION, PRED_call1, goal);
4568
4569 if ( rc )
4570 { int empty = (db->tr_last == NULL || db->tr_last->type == TR_MARK);
4571
4572 if ( empty || db->tr_nesting > 0 )
4573 { commit_transaction(db, id);
4574 } else
4575 { term_t be;
4576
4577 if ( !(be=PL_new_term_ref()) ||
4578 !put_begin_end(be, FUNCTOR_begin1, 0) ||
4579 !broadcast(EV_TRANSACTION, (void*)id, (void*)be) ||
4580 !put_begin_end(be, FUNCTOR_end1, 0) )
4581 return FALSE;
4582
4583 if ( !LOCKOUT_READERS(db) ) /* interrupt, timeout */
4584 { broadcast(EV_TRANSACTION, (void*)id, (void*)be);
4585 rc = FALSE;
4586 goto discard;
4587 }
4588 commit_transaction(db, id);
4589 REALLOW_READERS(db);
4590 if ( !broadcast(EV_TRANSACTION, (void*)id, (void*)be) )
4591 return FALSE;
4592 }
4593 } else
4594 { discard:
4595 discard_transaction(db);
4596 }
4597 db->tr_active = me.parent;
4598 WRUNLOCK(db);
4599
4600 return rc;
4601 }
4602
4603
4604 static foreign_t
rdf_active_transactions(term_t list)4605 rdf_active_transactions(term_t list)
4606 { rdf_db *db = DB;
4607 term_t tail = PL_copy_term_ref(list);
4608 term_t head = PL_new_term_ref();
4609 active_transaction *ot;
4610
4611 for(ot = db->tr_active; ot; ot=ot->parent)
4612 { if ( !PL_unify_list(tail, head, tail) ||
4613 !PL_unify(head, ot->id) )
4614 return FALSE;
4615 }
4616
4617 return PL_unify_nil(tail);
4618 }
4619
4620
4621 /*******************************
4622 * PREDICATES *
4623 *******************************/
4624
4625 static foreign_t
rdf_assert4(term_t subject,term_t predicate,term_t object,term_t src)4626 rdf_assert4(term_t subject, term_t predicate, term_t object, term_t src)
4627 { rdf_db *db = DB;
4628 triple *t = new_triple(db);
4629
4630 if ( !get_triple(db, subject, predicate, object, t) )
4631 { free_triple(db, t);
4632 return FALSE;
4633 }
4634 if ( src )
4635 { if ( !get_graph(src, t) )
4636 { free_triple(db, t);
4637 return FALSE;
4638 }
4639 } else
4640 { t->graph = ATOM_user;
4641 t->line = NO_LINE;
4642 }
4643
4644 lock_atoms(t);
4645 if ( !WRLOCK(db, FALSE) )
4646 { free_triple(db, t);
4647 return FALSE;
4648 }
4649
4650 if ( db->tr_first )
4651 { record_transaction(db, TR_ASSERT, t);
4652 } else
4653 { link_triple(db, t);
4654 db->generation++;
4655 }
4656 WRUNLOCK(db);
4657
4658 return TRUE;
4659 }
4660
4661
4662 static foreign_t
rdf_assert3(term_t subject,term_t predicate,term_t object)4663 rdf_assert3(term_t subject, term_t predicate, term_t object)
4664 { return rdf_assert4(subject, predicate, object, 0);
4665 }
4666
4667
4668 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4669 inc_active_queries(rdf_db *db);
4670 dec_active_queries(rdf_db *db);
4671
4672 TBD: Either delete this or use atomic inc/dec.
4673 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4674
4675 static void
inc_active_queries(rdf_db * db)4676 inc_active_queries(rdf_db *db)
4677 { LOCK_MISC(db);
4678 db->active_queries++;
4679 UNLOCK_MISC(db);
4680 }
4681
4682
4683 static void
dec_active_queries(rdf_db * db)4684 dec_active_queries(rdf_db *db)
4685 { LOCK_MISC(db);
4686 db->active_queries--;
4687 assert(db->active_queries>=0);
4688 UNLOCK_MISC(db);
4689 }
4690
4691
4692 typedef struct search_state
4693 { rdf_db *db; /* our database */
4694 term_t subject; /* Prolog term references */
4695 term_t object;
4696 term_t predicate;
4697 term_t src;
4698 term_t realpred;
4699 unsigned locked : 1; /* State has been locked */
4700 unsigned allocated : 1; /* State has been allocated */
4701 unsigned flags; /* Misc flags controlling search */
4702 atom_t prefix; /* prefix and like search */
4703 avl_enum *literal_state; /* Literal search state */
4704 literal *literal_cursor; /* pointer in current literal */
4705 literal_ex lit_ex; /* extended literal for fast compare */
4706 triple *cursor; /* Pointer in triple DB */
4707 triple pattern; /* Pattern triple */
4708 } search_state;
4709
4710
4711 static void free_search_state(search_state *state);
4712
4713 static void
init_cursor_from_literal(search_state * state,literal * cursor)4714 init_cursor_from_literal(search_state *state, literal *cursor)
4715 { triple *p = &state->pattern;
4716 unsigned long iv;
4717 int i;
4718
4719 DEBUG(3,
4720 Sdprintf("Trying literal search for ");
4721 print_literal(cursor);
4722 Sdprintf("\n"));
4723
4724 p->indexed |= BY_O;
4725 p->indexed &= ~BY_S; /* we do not have index BY_SO */
4726 switch(p->indexed)
4727 { case BY_O:
4728 iv = literal_hash(cursor);
4729 break;
4730 case BY_OP:
4731 iv = predicate_hash(p->predicate.r) ^ literal_hash(cursor);
4732 break;
4733 default:
4734 iv = 0; /* make compiler silent */
4735 assert(0);
4736 }
4737
4738 i = (int)(iv % (long)state->db->table_size[p->indexed]);
4739 state->cursor = state->db->table[p->indexed][i];
4740 state->literal_cursor = cursor;
4741 }
4742
4743
4744 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4745 (*) update_hash() is there to update the hash after a change to the
4746 predicate organization. If we do not have a predicate or we do not
4747 search using rdf_has/3, this is not needed.
4748 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4749
4750 static int
init_search_state(search_state * state)4751 init_search_state(search_state *state)
4752 { triple *p = &state->pattern;
4753
4754 if ( get_partial_triple(state->db,
4755 state->subject, state->predicate, state->object,
4756 state->src, p) != TRUE )
4757 { free_triple(state->db, p);
4758 return FALSE;
4759 }
4760
4761 if ( !RDLOCK(state->db) )
4762 { free_triple(state->db, p);
4763 return FALSE;
4764 }
4765 state->locked = TRUE;
4766 if ( p->predicate.r && (state->flags & MATCH_SUBPROPERTY) ) /* See (*) */
4767 { if ( !update_hash(state->db) )
4768 { free_search_state(state);
4769 return FALSE;
4770 }
4771 }
4772
4773 if ( (p->match == STR_MATCH_PREFIX || p->match == STR_MATCH_LIKE) &&
4774 p->indexed != BY_SP &&
4775 (state->prefix = first_atom(p->object.literal->value.string, p->match)))
4776 { literal lit;
4777 literal **rlitp;
4778
4779 lit = *p->object.literal;
4780 lit.value.string = state->prefix;
4781 state->literal_state = rdf_malloc(state->db,
4782 sizeof(*state->literal_state));
4783 state->lit_ex.literal = &lit;
4784 prepare_literal_ex(&state->lit_ex);
4785 rlitp = avlfindfirst(&state->db->literals, &state->lit_ex, state->literal_state);
4786 if ( rlitp )
4787 { init_cursor_from_literal(state, *rlitp);
4788 } else
4789 { free_search_state(state);
4790 return FALSE;
4791 }
4792 } else
4793 { state->cursor = state->db->table[p->indexed]
4794 [triple_hash(state->db, p, p->indexed)];
4795 }
4796
4797 return TRUE;
4798 }
4799
4800
4801 static void
free_search_state(search_state * state)4802 free_search_state(search_state *state)
4803 { if ( state->locked )
4804 { RDUNLOCK(state->db);
4805 }
4806
4807 free_triple(state->db, &state->pattern);
4808 if ( state->prefix )
4809 PL_unregister_atom(state->prefix);
4810 if ( state->literal_state )
4811 rdf_free(state->db, state->literal_state, sizeof(*state->literal_state));
4812 if ( state->allocated ) /* also means redo! */
4813 { dec_active_queries(state->db);
4814 rdf_free(state->db, state, sizeof(*state));
4815 }
4816 }
4817
4818
4819 static foreign_t
allow_retry_state(search_state * state)4820 allow_retry_state(search_state *state)
4821 { if ( !state->allocated )
4822 { search_state *copy = rdf_malloc(state->db, sizeof(*copy));
4823 *copy = *state;
4824 copy->allocated = TRUE;
4825 inc_active_queries(state->db);
4826
4827 state = copy;
4828 }
4829
4830 PL_retry_address(state);
4831 }
4832
4833
4834 /* TBD: simplify. Maybe split for resource and literal search, as
4835 both involve mutual exclusive complications to this routine,
4836 */
4837
4838 static int
next_search_state(search_state * state)4839 next_search_state(search_state *state)
4840 { triple *t = state->cursor;
4841 triple *p = &state->pattern;
4842
4843 retry:
4844 for( ; t; t = t->next[p->indexed])
4845 { if ( t->is_duplicate && !state->src )
4846 continue;
4847
4848 /* hash-collision, skip */
4849 if ( state->literal_state )
4850 { if ( !(t->object_is_literal &&
4851 t->object.literal == state->literal_cursor) )
4852 continue;
4853 }
4854
4855 if ( match_triples(t, p, state->flags) )
4856 { term_t retpred = state->realpred ? state->realpred : state->predicate;
4857 if ( !unify_triple(state->subject, retpred, state->object,
4858 state->src, t, p->inversed) )
4859 continue;
4860 if ( state->realpred && PL_is_variable(state->predicate) )
4861 { if ( !PL_unify(state->predicate, retpred) )
4862 return FALSE;
4863 }
4864
4865 t=t->next[p->indexed];
4866 inv_alt:
4867 for(; t; t = t->next[p->indexed])
4868 { if ( state->literal_state )
4869 { if ( !(t->object_is_literal &&
4870 t->object.literal == state->literal_cursor) )
4871 continue;
4872 }
4873
4874 if ( match_triples(t, p, state->flags) )
4875 { state->cursor = t;
4876
4877 return TRUE; /* non-deterministic */
4878 }
4879 }
4880
4881 if ( (state->flags & MATCH_INVERSE) && inverse_partial_triple(p) )
4882 { t = state->db->table[p->indexed][triple_hash(state->db, p, p->indexed)];
4883 goto inv_alt;
4884 }
4885
4886 state->cursor = NULL; /* deterministic */
4887 return TRUE;
4888 }
4889 }
4890
4891 if ( (state->flags & MATCH_INVERSE) && inverse_partial_triple(p) )
4892 { t = state->db->table[p->indexed][triple_hash(state->db, p, p->indexed)];
4893 goto retry;
4894 }
4895
4896 if ( state->literal_state )
4897 { literal **litp;
4898
4899 if ( (litp = avlfindnext(state->literal_state)) )
4900 { if ( state->prefix )
4901 { literal *lit = *litp;
4902
4903 if ( !match_atoms(STR_MATCH_PREFIX, state->prefix, lit->value.string) )
4904 { DEBUG(1,
4905 Sdprintf("Terminated literal iteration from ");
4906 print_literal(lit);
4907 Sdprintf("\n"));
4908 return FALSE; /* no longer a prefix */
4909 }
4910 }
4911
4912 init_cursor_from_literal(state, *litp);
4913 t = state->cursor;
4914
4915 goto retry;
4916 }
4917 }
4918
4919 return FALSE;
4920 }
4921
4922
4923
4924 static foreign_t
rdf(term_t subject,term_t predicate,term_t object,term_t src,term_t realpred,control_t h,unsigned flags)4925 rdf(term_t subject, term_t predicate, term_t object,
4926 term_t src, term_t realpred, control_t h, unsigned flags)
4927 { rdf_db *db = DB;
4928 search_state *state;
4929
4930 switch(PL_foreign_control(h))
4931 { case PL_FIRST_CALL:
4932 { search_state buf;
4933
4934 state = &buf;
4935 memset(state, 0, sizeof(*state));
4936 state->db = db;
4937 state->subject = subject;
4938 state->object = object;
4939 state->predicate = predicate;
4940 state->src = src;
4941 state->realpred = realpred;
4942 state->flags = flags;
4943
4944 if ( !init_search_state(state) )
4945 return FALSE;
4946
4947 goto search;
4948 }
4949 case PL_REDO:
4950 { int rc;
4951
4952 state = PL_foreign_context_address(h);
4953 assert(state->subject == subject);
4954
4955 search:
4956 if ( (rc=next_search_state(state)) )
4957 { if ( state->cursor || state->literal_state )
4958 return allow_retry_state(state);
4959 }
4960
4961 free_search_state(state);
4962 return rc;
4963 }
4964 case PL_CUTTED:
4965 { search_state *state = PL_foreign_context_address(h);
4966
4967 free_search_state(state);
4968 return TRUE;
4969 }
4970 default:
4971 assert(0);
4972 return FALSE;
4973 }
4974 }
4975
4976
4977 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4978 rdf(Subject, Predicate, Object)
4979
4980 Search specifications:
4981
4982 Predicate:
4983
4984 subPropertyOf(X) = P
4985
4986 Object:
4987
4988 literal(substring(X), L)
4989 literal(word(X), L)
4990 literal(exact(X), L)
4991 literal(prefix(X), L)
4992 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4993
4994
4995 static foreign_t
rdf3(term_t subject,term_t predicate,term_t object,control_t h)4996 rdf3(term_t subject, term_t predicate, term_t object, control_t h)
4997 { return rdf(subject, predicate, object, 0, 0, h,
4998 MATCH_EXACT);
4999 }
5000
5001
5002 static foreign_t
rdf4(term_t subject,term_t predicate,term_t object,term_t src,control_t h)5003 rdf4(term_t subject, term_t predicate, term_t object,
5004 term_t src, control_t h)
5005 { return rdf(subject, predicate, object, src, 0, h,
5006 MATCH_EXACT|MATCH_SRC);
5007 }
5008
5009
5010 static foreign_t
rdf_has(term_t subject,term_t predicate,term_t object,term_t realpred,control_t h)5011 rdf_has(term_t subject, term_t predicate, term_t object,
5012 term_t realpred, control_t h)
5013 { return rdf(subject, predicate, object, 0, realpred, h,
5014 MATCH_SUBPROPERTY|MATCH_INVERSE);
5015 }
5016
5017
5018 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5019 rdf_estimate_complexity(+S,+P,+O,-C)
5020
5021 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
5022
5023 static foreign_t
rdf_estimate_complexity(term_t subject,term_t predicate,term_t object,term_t complexity)5024 rdf_estimate_complexity(term_t subject, term_t predicate, term_t object,
5025 term_t complexity)
5026 { triple t;
5027 long c;
5028 rdf_db *db = DB;
5029 int rc;
5030
5031 memset(&t, 0, sizeof(t));
5032 if ( (rc=get_partial_triple(db, subject, predicate, object, 0, &t)) != TRUE )
5033 { if ( rc == -1 )
5034 { return FALSE; /* error */
5035 } else
5036 { return PL_unify_integer(complexity, 0); /* no predicate */
5037 }
5038 }
5039
5040 if ( !RDLOCK(db) )
5041 return FALSE;
5042 if ( !update_hash(db) ) /* or ignore this problem? */
5043 { RDUNLOCK(db);
5044 free_triple(db, &t);
5045 return FALSE;
5046 }
5047
5048 if ( t.indexed == BY_NONE )
5049 { c = db->created - db->erased; /* = totale triple count */
5050 #if 0
5051 } else if ( t.indexed == BY_P )
5052 { c = t.predicate.r->triple_count; /* must sum over children */
5053 #endif
5054 } else
5055 { c = db->counts[t.indexed][triple_hash(db, &t, t.indexed)];
5056 }
5057
5058 rc = PL_unify_integer(complexity, c);
5059 RDUNLOCK(db);
5060 free_triple(db, &t);
5061
5062 return rc;
5063 }
5064
5065
5066 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5067 current_literal(?Literals)
5068 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
5069
5070 static foreign_t
rdf_current_literal(term_t t,control_t h)5071 rdf_current_literal(term_t t, control_t h)
5072 { rdf_db *db = DB;
5073 literal **data;
5074 avl_enum *state;
5075 int rc;
5076
5077 switch(PL_foreign_control(h))
5078 { case PL_FIRST_CALL:
5079 if ( PL_is_variable(t) )
5080 { state = rdf_malloc(db, sizeof(*state));
5081
5082 RDLOCK(db);
5083 inc_active_queries(db);
5084 data = avlfindfirst(&db->literals, NULL, state);
5085 goto next;
5086 } else
5087 { return FALSE; /* TBD */
5088 }
5089 case PL_REDO:
5090 state = PL_foreign_context_address(h);
5091 data = avlfindnext(state);
5092 next:
5093 for(; data; data=avlfindnext(state))
5094 { literal *lit = *data;
5095
5096 if ( unify_literal(t, lit) )
5097 { PL_retry_address(state);
5098 }
5099 }
5100
5101 rc = FALSE;
5102 goto cleanup;
5103 case PL_CUTTED:
5104 rc = TRUE;
5105
5106 cleanup:
5107 state = PL_foreign_context_address(h);
5108 avlfinddestroy(state);
5109 rdf_free(db, state, sizeof(*state));
5110 RDUNLOCK(db);
5111 dec_active_queries(db);
5112
5113 return rc;
5114 default:
5115 assert(0);
5116 return FALSE;
5117 }
5118 }
5119
5120
5121 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5122 rdf_update(+Subject, +Predicate, +Object, +Action)
5123
5124 Update a triple. Please note this is actually erase+assert as the triple
5125 needs to be updated in the linked lists while erase simply flags a
5126 triple as `erases' without deleting it to support queries which active
5127 choicepoints.
5128 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
5129
5130 static int
update_triple(rdf_db * db,term_t action,triple * t)5131 update_triple(rdf_db *db, term_t action, triple *t)
5132 { term_t a = PL_new_term_ref();
5133 triple tmp, *new;
5134 int i;
5135 /* Create copy in local memory */
5136 tmp = *t;
5137 tmp.allocated = FALSE;
5138 tmp.atoms_locked = FALSE;
5139 if ( t->object_is_literal )
5140 tmp.object.literal = copy_literal(db, t->object.literal);
5141
5142 if ( !PL_get_arg(1, action, a) )
5143 return type_error(action, "rdf_action");
5144
5145 if ( PL_is_functor(action, FUNCTOR_subject1) )
5146 { atom_t s;
5147
5148 if ( !get_atom_ex(a, &s) )
5149 return FALSE;
5150 if ( tmp.subject == s )
5151 return TRUE; /* no change */
5152
5153 tmp.subject = s;
5154 } else if ( PL_is_functor(action, FUNCTOR_predicate1) )
5155 { predicate *p;
5156
5157 if ( !get_predicate(db, a, &p) )
5158 return FALSE;
5159 if ( tmp.predicate.r == p )
5160 return TRUE; /* no change */
5161
5162 tmp.predicate.r = p;
5163 } else if ( PL_is_functor(action, FUNCTOR_object1) )
5164 { triple t2;
5165
5166 memset(&t2, 0, sizeof(t2));
5167
5168 if ( !get_object(db, a, &t2) )
5169 { free_triple(db, &t2);
5170 return FALSE;
5171 }
5172 if ( match_object(&t2, &tmp, MATCH_QUAL) )
5173 { free_triple(db, &t2);
5174 return TRUE;
5175 }
5176
5177 if ( tmp.object_is_literal )
5178 free_literal(db, tmp.object.literal);
5179 if ( (tmp.object_is_literal = t2.object_is_literal) )
5180 { tmp.object.literal = t2.object.literal;
5181 } else
5182 { tmp.object.resource = t2.object.resource;
5183 }
5184 } else if ( PL_is_functor(action, FUNCTOR_graph1) )
5185 { triple t2;
5186
5187 if ( !get_graph(a, &t2) )
5188 return FALSE;
5189 if ( t2.graph == t->graph && t2.line == t->line )
5190 return TRUE;
5191 if ( db->tr_first )
5192 { record_update_src_transaction(db, t, t2.graph, t2.line);
5193 } else
5194 { if ( t->graph )
5195 unregister_graph(db, t);
5196 t->graph = t2.graph;
5197 t->line = t2.line;
5198 if ( t->graph )
5199 register_graph(db, t);
5200 }
5201
5202 return TRUE; /* considered no change */
5203 } else
5204 return domain_error(action, "rdf_action");
5205
5206 for(i=0; i<INDEX_TABLES; i++)
5207 tmp.next[i] = NULL;
5208
5209 new = new_triple(db);
5210 new->subject = tmp.subject;
5211 new->predicate.r = tmp.predicate.r;
5212 if ( (new->object_is_literal = tmp.object_is_literal) )
5213 { new->object.literal = copy_literal(db, tmp.object.literal);
5214 } else
5215 { new->object.resource = tmp.object.resource;
5216 }
5217 new->graph = tmp.graph;
5218 new->line = tmp.line;
5219
5220 free_triple(db, &tmp);
5221 lock_atoms(new);
5222
5223 if ( db->tr_first )
5224 { record_update_transaction(db, t, new);
5225 } else
5226 { broadcast(EV_UPDATE, t, new);
5227 erase_triple_silent(db, t);
5228 link_triple_silent(db, new);
5229 db->generation++;
5230 }
5231
5232 return TRUE;
5233 }
5234
5235
5236
5237 static foreign_t
rdf_update5(term_t subject,term_t predicate,term_t object,term_t src,term_t action)5238 rdf_update5(term_t subject, term_t predicate, term_t object, term_t src,
5239 term_t action)
5240 { triple t, *p;
5241 int indexed = BY_SP;
5242 int done = 0;
5243 rdf_db *db = DB;
5244
5245 memset(&t, 0, sizeof(t));
5246
5247 if ( !get_src(src, &t) ||
5248 !get_triple(db, subject, predicate, object, &t) )
5249 return FALSE;
5250
5251 if ( !WRLOCK(db, FALSE) )
5252 { free_triple(db, &t);
5253 return FALSE;
5254 }
5255 if ( !update_hash(db) )
5256 { WRUNLOCK(db);
5257 free_triple(db, &t);
5258 return FALSE;
5259 }
5260 p = db->table[indexed][triple_hash(db, &t, indexed)];
5261 for( ; p; p = p->next[indexed])
5262 { if ( match_triples(p, &t, MATCH_EXACT) )
5263 { if ( !update_triple(db, action, p) )
5264 { WRUNLOCK(db);
5265 free_triple(db, &t);
5266 return FALSE; /* type errors */
5267 }
5268 done++;
5269 }
5270 }
5271 free_triple(db, &t);
5272 WRUNLOCK(db);
5273
5274 return done ? TRUE : FALSE;
5275 }
5276
5277
5278 static foreign_t
rdf_update(term_t subject,term_t predicate,term_t object,term_t action)5279 rdf_update(term_t subject, term_t predicate, term_t object, term_t action)
5280 { return rdf_update5(subject, predicate, object, 0, action);
5281 }
5282
5283
5284 static foreign_t
rdf_retractall4(term_t subject,term_t predicate,term_t object,term_t src)5285 rdf_retractall4(term_t subject, term_t predicate, term_t object, term_t src)
5286 { triple t, *p;
5287 rdf_db *db = DB;
5288
5289 memset(&t, 0, sizeof(t));
5290 switch( get_partial_triple(db, subject, predicate, object, src, &t) )
5291 { case 0: /* no such predicate */
5292 return TRUE;
5293 case -1: /* error */
5294 return FALSE;
5295 }
5296
5297 if ( t.graph ) /* speedup for rdf_retractall(_,_,_,DB) */
5298 { graph *gr = lookup_graph(db, t.graph, FALSE);
5299
5300 if ( !gr || gr->triple_count == 0 )
5301 return TRUE;
5302 }
5303
5304 if ( !WRLOCK(db, FALSE) )
5305 return FALSE;
5306 /* No need, as we do not search with subPropertyOf
5307 if ( !update_hash(db) )
5308 { WRUNLOCK(db);
5309 return FALSE;
5310 }
5311 */
5312 p = db->table[t.indexed][triple_hash(db, &t, t.indexed)];
5313 for( ; p; p = p->next[t.indexed])
5314 { if ( match_triples(p, &t, MATCH_EXACT|MATCH_SRC) )
5315 { if ( t.object_is_literal && t.object.literal->objtype == OBJ_TERM )
5316 { fid_t fid = PL_open_foreign_frame();
5317 int rc = unify_object(object, p);
5318 PL_discard_foreign_frame(fid);
5319 if ( !rc )
5320 continue;
5321 }
5322
5323 if ( db->tr_first )
5324 { if ( db->tr_reset )
5325 { WRUNLOCK(db);
5326 return permission_error("retract", "triple", "",
5327 "rdf_retractall cannot follow "
5328 "rdf_reset_db in one transaction");
5329 }
5330 record_transaction(db, TR_RETRACT, p);
5331 } else
5332 { erase_triple(db, p);
5333 db->generation++;
5334 }
5335 }
5336 }
5337
5338 WRUNLOCK(db);
5339 free_triple(db, &t);
5340
5341 return TRUE;
5342 }
5343
5344
5345 static foreign_t
rdf_retractall3(term_t subject,term_t predicate,term_t object)5346 rdf_retractall3(term_t subject, term_t predicate, term_t object)
5347 { return rdf_retractall4(subject, predicate, object, 0);
5348 }
5349
5350
5351 /*******************************
5352 * MONITOR *
5353 *******************************/
5354
5355 typedef struct broadcast_callback
5356 { struct broadcast_callback *next;
5357 predicate_t pred;
5358 long mask;
5359 } broadcast_callback;
5360
5361 static long joined_mask = 0L;
5362 static broadcast_callback *callback_list;
5363 static broadcast_callback *callback_tail;
5364
5365 static void
do_broadcast(term_t term,long mask)5366 do_broadcast(term_t term, long mask)
5367 { if ( callback_list )
5368 { broadcast_callback *cb;
5369
5370 for(cb = callback_list; cb; cb = cb->next)
5371 { qid_t qid;
5372 term_t ex;
5373
5374 if ( !(cb->mask & mask) )
5375 continue;
5376
5377 qid = PL_open_query(NULL, PL_Q_CATCH_EXCEPTION, cb->pred, term);
5378 if ( !PL_next_solution(qid) && (ex = PL_exception(qid)) )
5379 { term_t av = PL_new_term_refs(2);
5380
5381 PL_cut_query(qid);
5382
5383 PL_put_atom(av+0, ATOM_error);
5384 PL_put_term(av+1, ex);
5385
5386 PL_call_predicate(NULL, PL_Q_NORMAL,
5387 PL_predicate("print_message", 2, "user"),
5388 av);
5389 } else
5390 { PL_close_query(qid);
5391 }
5392 }
5393 }
5394 }
5395
5396
5397 /* No longer used, but we keep it for if we need it again
5398 static foreign_t
5399 rdf_broadcast(term_t term, term_t mask)
5400 { long msk;
5401
5402 if ( !get_long_ex(mask, &msk) )
5403 return FALSE;
5404
5405 do_broadcast(term, msk);
5406 return TRUE;
5407 }
5408 */
5409
5410 static int
broadcast(broadcast_id id,void * a1,void * a2)5411 broadcast(broadcast_id id, void *a1, void *a2)
5412 { if ( (joined_mask & id) )
5413 { fid_t fid;
5414 term_t term;
5415 functor_t funct;
5416
5417 if ( !(fid = PL_open_foreign_frame()) ||
5418 !(term = PL_new_term_ref()) )
5419 return FALSE;
5420
5421 switch(id)
5422 { case EV_ASSERT:
5423 case EV_ASSERT_LOAD:
5424 funct = FUNCTOR_assert4;
5425 goto assert_retract;
5426 case EV_RETRACT:
5427 funct = FUNCTOR_retract4;
5428 assert_retract:
5429 { triple *t = a1;
5430 term_t tmp;
5431
5432 if ( !(tmp = PL_new_term_refs(4)) ||
5433 !PL_put_atom(tmp+0, t->subject) ||
5434 !PL_put_atom(tmp+1, t->predicate.r->name) ||
5435 !unify_object(tmp+2, t) ||
5436 !unify_graph(tmp+3, t) ||
5437 !PL_cons_functor_v(term, funct, tmp) )
5438 return FALSE;
5439 break;
5440 }
5441 case EV_UPDATE:
5442 { triple *t = a1;
5443 triple *new = a2;
5444 term_t tmp, a;
5445 functor_t action;
5446 int rc;
5447
5448 if ( !(tmp = PL_new_term_refs(5)) ||
5449 !(a = PL_new_term_ref()) ||
5450 !PL_put_atom(tmp+0, t->subject) ||
5451 !PL_put_atom(tmp+1, t->predicate.r->name) ||
5452 !unify_object(tmp+2, t) ||
5453 !unify_graph(tmp+3, t) )
5454 return FALSE;
5455
5456 if ( t->subject != new->subject )
5457 { action = FUNCTOR_subject1;
5458 rc = PL_put_atom(a, new->subject);
5459 } else if ( t->predicate.r != new->predicate.r )
5460 { action = FUNCTOR_predicate1;
5461 rc = PL_put_atom(a, new->predicate.r->name);
5462 } else if ( !match_object(t, new, MATCH_QUAL) )
5463 { action = FUNCTOR_object1;
5464 rc = unify_object(a, new);
5465 } else if ( !same_graph(t, new) )
5466 { action = FUNCTOR_graph1;
5467 rc = unify_graph(a, new);
5468 } else
5469 { return TRUE; /* no change */
5470 }
5471
5472 if ( !rc ||
5473 !PL_cons_functor_v(tmp+4, action, a) ||
5474 !PL_cons_functor_v(term, FUNCTOR_update5, tmp) )
5475 return FALSE;
5476 break;
5477 }
5478 case EV_NEW_LITERAL:
5479 { literal *lit = a1;
5480 term_t tmp;
5481
5482 if ( !(tmp = PL_new_term_refs(1)) ||
5483 !unify_literal(tmp, lit) ||
5484 !PL_cons_functor_v(term, FUNCTOR_new_literal1, tmp) )
5485 return FALSE;
5486 break;
5487 }
5488 case EV_OLD_LITERAL:
5489 { literal *lit = a1;
5490 term_t tmp;
5491
5492 if ( !(tmp = PL_new_term_refs(1)) ||
5493 !unify_literal(tmp, lit) ||
5494 !PL_cons_functor_v(term, FUNCTOR_old_literal1, tmp) )
5495 return FALSE;
5496 break;
5497 }
5498 case EV_LOAD:
5499 { term_t ctx = (term_t)a1;
5500 atom_t be = (atom_t)a2;
5501 term_t tmp;
5502
5503 if ( !(tmp = PL_new_term_refs(2)) ||
5504 !PL_put_atom(tmp+0, be) || /* begin/end */
5505 !PL_put_term(tmp+1, ctx) ||
5506 !PL_cons_functor_v(term, FUNCTOR_load2, tmp) )
5507 return FALSE;
5508 break;
5509 }
5510 case EV_TRANSACTION:
5511 { term_t ctx = (term_t)a1;
5512 term_t be = (term_t)a2;
5513 term_t tmp;
5514
5515 if ( !(tmp = PL_new_term_refs(2)) ||
5516 !PL_put_term(tmp+0, be) || /* begin/end */
5517 !PL_put_term(tmp+1, ctx) ||
5518 !PL_cons_functor_v(term, FUNCTOR_transaction2, tmp) )
5519 return FALSE;
5520 break;
5521 }
5522 case EV_REHASH:
5523 { atom_t be = (atom_t)a1;
5524 term_t tmp = PL_new_term_refs(1);
5525
5526 if ( !(tmp = PL_new_term_refs(1)) ||
5527 !PL_put_atom(tmp+0, be) ||
5528 !PL_cons_functor_v(term, FUNCTOR_rehash1, tmp) )
5529 return FALSE;
5530 break;
5531 }
5532 default:
5533 assert(0);
5534 }
5535
5536 do_broadcast(term, id);
5537
5538 PL_discard_foreign_frame(fid);
5539 }
5540
5541 return TRUE;
5542 }
5543
5544
5545 static foreign_t
rdf_monitor(term_t goal,term_t mask)5546 rdf_monitor(term_t goal, term_t mask)
5547 { atom_t name;
5548 broadcast_callback *cb;
5549 predicate_t p;
5550 long msk;
5551 module_t m = NULL;
5552
5553 PL_strip_module(goal, &m, goal);
5554
5555 if ( !get_atom_ex(goal, &name) ||
5556 !get_long_ex(mask, &msk) )
5557 return FALSE;
5558
5559 p = PL_pred(PL_new_functor(name, 1), m);
5560
5561 for(cb=callback_list; cb; cb = cb->next)
5562 { if ( cb->pred == p )
5563 { broadcast_callback *cb2;
5564 cb->mask = msk;
5565
5566 joined_mask = 0L;
5567 for(cb2=callback_list; cb2; cb2 = cb2->next)
5568 joined_mask |= cb2->mask;
5569 DEBUG(2, Sdprintf("Set mask to 0x%x\n", joined_mask));
5570
5571 return TRUE;
5572 }
5573 }
5574
5575 cb = PL_malloc(sizeof(*cb));
5576 cb->next = NULL;
5577 cb->mask = msk;
5578 cb->pred = p;
5579 if ( callback_list )
5580 { callback_tail->next = cb;
5581 callback_tail = cb;
5582 } else
5583 { callback_list = callback_tail = cb;
5584 }
5585 joined_mask |= msk;
5586
5587 return TRUE;
5588 }
5589
5590
5591
5592 /*******************************
5593 * QUERY *
5594 *******************************/
5595
5596 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5597 Enumerate the known subjects. This uses the `first' flag on triples to
5598 avoid returning the same resource multiple times. As the `by_none' is
5599 never re-hashed, we don't mark this query in the `active_queries'.
5600 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
5601
5602 static foreign_t
rdf_subject(term_t subject,control_t h)5603 rdf_subject(term_t subject, control_t h)
5604 { triple *t;
5605 rdf_db *db = DB;
5606
5607 switch(PL_foreign_control(h))
5608 { case PL_FIRST_CALL:
5609 { if ( PL_is_variable(subject) )
5610 { t = db->table[BY_NONE][0];
5611 goto next;
5612 } else
5613 { atom_t a;
5614
5615 if ( get_atom_ex(subject, &a) )
5616 { if ( first(db, a) )
5617 return TRUE;
5618 return FALSE;
5619 }
5620
5621 return FALSE;
5622 }
5623 }
5624 case PL_REDO:
5625 t = PL_foreign_context_address(h);
5626 next:
5627 for(; t; t = t->next[BY_NONE])
5628 { if ( t->first && !t->erased )
5629 { if ( !PL_unify_atom(subject, t->subject) )
5630 return FALSE;
5631
5632 t = t->next[BY_NONE];
5633 if ( t )
5634 PL_retry_address(t);
5635 return TRUE;
5636 }
5637 }
5638 return FALSE;
5639 case PL_CUTTED:
5640 return TRUE;
5641 default:
5642 assert(0);
5643 return FALSE;
5644 }
5645 }
5646
5647
5648 static foreign_t
rdf_set_predicate(term_t pred,term_t option)5649 rdf_set_predicate(term_t pred, term_t option)
5650 { predicate *p;
5651 rdf_db *db = DB;
5652
5653 if ( !get_predicate(db, pred, &p) )
5654 return FALSE;
5655
5656 if ( PL_is_functor(option, FUNCTOR_symmetric1) )
5657 { int val;
5658
5659 if ( !get_bool_arg_ex(1, option, &val) )
5660 return FALSE;
5661
5662 p->inverse_of = p;
5663 return TRUE;
5664 } else if ( PL_is_functor(option, FUNCTOR_inverse_of1) )
5665 { term_t a = PL_new_term_ref();
5666 predicate *i;
5667
5668 _PL_get_arg(1, option, a);
5669 if ( PL_get_nil(a) )
5670 { if ( p->inverse_of )
5671 { p->inverse_of->inverse_of = NULL;
5672 p->inverse_of = NULL;
5673 }
5674 } else
5675 { if ( !get_predicate(db, a, &i) )
5676 return FALSE;
5677
5678 p->inverse_of = i;
5679 i->inverse_of = p;
5680 }
5681 return TRUE;
5682 } else if ( PL_is_functor(option, FUNCTOR_transitive1) )
5683 { int val;
5684
5685 if ( !get_bool_arg_ex(1, option, &val) )
5686 return FALSE;
5687
5688 p->transitive = val;
5689
5690 return TRUE;
5691 } else
5692 return type_error(option, "predicate_option");
5693 }
5694
5695
5696 #define PRED_PROPERTY_COUNT 9
5697 static functor_t predicate_key[PRED_PROPERTY_COUNT];
5698
5699 static int
unify_predicate_property(rdf_db * db,predicate * p,term_t option,functor_t f)5700 unify_predicate_property(rdf_db *db, predicate *p, term_t option, functor_t f)
5701 { if ( f == FUNCTOR_symmetric1 )
5702 return PL_unify_term(option, PL_FUNCTOR, f,
5703 PL_BOOL, p->inverse_of == p ? TRUE : FALSE);
5704 else if ( f == FUNCTOR_inverse_of1 )
5705 { if ( p->inverse_of )
5706 return PL_unify_term(option, PL_FUNCTOR, f,
5707 PL_ATOM, p->inverse_of->name);
5708 else
5709 return FALSE;
5710 } else if ( f == FUNCTOR_transitive1 )
5711 { return PL_unify_term(option, PL_FUNCTOR, f,
5712 PL_BOOL, p->transitive);
5713 } else if ( f == FUNCTOR_triples1 )
5714 { return PL_unify_term(option, PL_FUNCTOR, f,
5715 PL_LONG, p->triple_count);
5716 } else if ( f == FUNCTOR_rdf_subject_branch_factor1 )
5717 { return PL_unify_term(option, PL_FUNCTOR, f,
5718 PL_FLOAT, subject_branch_factor(db, p, DISTINCT_DIRECT));
5719 } else if ( f == FUNCTOR_rdf_object_branch_factor1 )
5720 { return PL_unify_term(option, PL_FUNCTOR, f,
5721 PL_FLOAT, object_branch_factor(db, p, DISTINCT_DIRECT));
5722 } else if ( f == FUNCTOR_rdfs_subject_branch_factor1 )
5723 { return PL_unify_term(option, PL_FUNCTOR, f,
5724 PL_FLOAT, subject_branch_factor(db, p, DISTINCT_SUB));
5725 } else if ( f == FUNCTOR_rdfs_object_branch_factor1 )
5726 { return PL_unify_term(option, PL_FUNCTOR, f,
5727 PL_FLOAT, object_branch_factor(db, p, DISTINCT_SUB));
5728 } else
5729 { assert(0);
5730 return FALSE;
5731 }
5732 }
5733
5734
5735 static foreign_t
rdf_current_predicates(term_t preds)5736 rdf_current_predicates(term_t preds)
5737 { rdf_db *db = DB;
5738 int i;
5739 term_t head = PL_new_term_ref();
5740 term_t tail = PL_copy_term_ref(preds);
5741
5742 LOCK_MISC(db);
5743 for(i=0; i<db->pred_table_size; i++)
5744 { predicate *p;
5745
5746 for(p=db->pred_table[i]; p; p = p->next)
5747 { if ( !PL_unify_list(tail, head, tail) ||
5748 !PL_unify_atom(head, p->name) )
5749 { UNLOCK_MISC(db);
5750 return FALSE;
5751 }
5752 }
5753 }
5754 UNLOCK_MISC(db);
5755
5756 return PL_unify_nil(tail);
5757 }
5758
5759
5760 static foreign_t
rdf_predicate_property(term_t pred,term_t option,control_t h)5761 rdf_predicate_property(term_t pred, term_t option, control_t h)
5762 { int n;
5763 predicate *p;
5764 rdf_db *db = DB;
5765
5766 if ( !predicate_key[0] )
5767 { int i = 0;
5768
5769 predicate_key[i++] = FUNCTOR_symmetric1;
5770 predicate_key[i++] = FUNCTOR_inverse_of1;
5771 predicate_key[i++] = FUNCTOR_transitive1;
5772 predicate_key[i++] = FUNCTOR_triples1;
5773 predicate_key[i++] = FUNCTOR_rdf_subject_branch_factor1;
5774 predicate_key[i++] = FUNCTOR_rdf_object_branch_factor1;
5775 predicate_key[i++] = FUNCTOR_rdfs_subject_branch_factor1;
5776 predicate_key[i++] = FUNCTOR_rdfs_object_branch_factor1;
5777 assert(i < PRED_PROPERTY_COUNT);
5778 }
5779
5780 switch(PL_foreign_control(h))
5781 { case PL_FIRST_CALL:
5782 { functor_t f;
5783
5784 if ( PL_is_variable(option) )
5785 { n = 0;
5786 goto redo;
5787 } else if ( PL_get_functor(option, &f) )
5788 { for(n=0; predicate_key[n]; n++)
5789 { if ( predicate_key[n] == f )
5790 { if ( !get_predicate(db, pred, &p) )
5791 return FALSE;
5792 return unify_predicate_property(db, p, option, f);
5793 }
5794 }
5795 return domain_error(option, "rdf_predicate_property");
5796 } else
5797 return type_error(option, "rdf_predicate_property");
5798 }
5799 case PL_REDO:
5800 n = (int)PL_foreign_context(h);
5801 redo:
5802 if ( !get_predicate(db, pred, &p) )
5803 return FALSE;
5804 for( ; predicate_key[n]; n++ )
5805 { if ( unify_predicate_property(db, p, option, predicate_key[n]) )
5806 { n++;
5807 if ( predicate_key[n] )
5808 PL_retry(n);
5809 return TRUE;
5810 }
5811 }
5812 return FALSE;
5813 case PL_CUTTED:
5814 return TRUE;
5815 default:
5816 assert(0);
5817 return TRUE;
5818 }
5819 }
5820
5821
5822 /*******************************
5823 * TRANSITIVE RELATIONS *
5824 *******************************/
5825
5826 typedef struct visited
5827 { struct visited *next; /* next in list */
5828 struct visited *hash_link; /* next in hashed link */
5829 atom_t resource; /* visited resource */
5830 uintptr_t distance; /* Distance */
5831 } visited;
5832
5833
5834 #define AGENDA_LOCAL_MAGIC 742736360
5835 #define AGENDA_SAVED_MAGIC 742736362
5836
5837 typedef struct agenda
5838 { visited *head; /* visited list */
5839 visited *tail; /* tail of visited list */
5840 visited *to_expand; /* next to expand */
5841 visited *to_return; /* next to return */
5842 visited **hash; /* hash-table for cycle detection */
5843 int magic; /* AGENDA_*_MAGIC */
5844 int hash_size;
5845 int size; /* size of the agenda */
5846 uintptr_t max_d; /* max distance */
5847 triple pattern; /* partial triple used as pattern */
5848 atom_t target; /* resource we are seaching for */
5849 struct chunk *chunk; /* node-allocation chunks */
5850 } agenda;
5851
5852 #ifndef offsetof
5853 #define offsetof(structure, field) ((size_t) &(((structure *)NULL)->field))
5854 #endif
5855 #define CHUNK_SIZE(n) offsetof(chunk, nodes[n])
5856
5857 typedef struct chunk
5858 { struct chunk *next;
5859 int used; /* # used elements */
5860 int size; /* size of the chunk */
5861 struct visited nodes[1]; /* nodes in the chunk */
5862 } chunk;
5863
5864
5865 static visited *
alloc_node_agenda(rdf_db * db,agenda * a)5866 alloc_node_agenda(rdf_db *db, agenda *a)
5867 { chunk *c;
5868 int size;
5869
5870 if ( (c=a->chunk) )
5871 { if ( c->used < c->size )
5872 { visited *v = &c->nodes[c->used++];
5873
5874 return v;
5875 }
5876 }
5877
5878 size = (a->size == 0 ? 8 : 1024);
5879 c = rdf_malloc(db, CHUNK_SIZE(size));
5880 c->size = size;
5881 c->used = 1;
5882 c->next = a->chunk;
5883 a->chunk = c;
5884
5885 return &c->nodes[0];
5886 }
5887
5888
5889 static void
empty_agenda(rdf_db * db,agenda * a)5890 empty_agenda(rdf_db *db, agenda *a)
5891 { chunk *c, *n;
5892
5893 for(c=a->chunk; c; c = n)
5894 { n = c->next;
5895 rdf_free(db, c, CHUNK_SIZE(c->size));
5896 }
5897 if ( a->hash )
5898 rdf_free(db, a->hash, sizeof(visited*)*a->hash_size);
5899
5900 if ( a->magic == AGENDA_SAVED_MAGIC )
5901 { a->magic = 0;
5902 rdf_free(db, a, sizeof(*a));
5903 } else
5904 { a->magic = 0;
5905 }
5906 }
5907
5908
5909 static void
unlock_and_empty_agenda(rdf_db * db,agenda * a)5910 unlock_and_empty_agenda(rdf_db *db, agenda *a)
5911 { RDUNLOCK(db);
5912 empty_agenda(db, a);
5913 }
5914
5915
5916 static agenda *
save_agenda(rdf_db * db,agenda * a)5917 save_agenda(rdf_db *db, agenda *a)
5918 { agenda *r = rdf_malloc(db, sizeof(*r));
5919
5920 assert(a->magic == AGENDA_LOCAL_MAGIC);
5921 *r = *a;
5922 r->magic = AGENDA_SAVED_MAGIC;
5923
5924 return r;
5925 }
5926
5927
5928 static void
hash_agenda(rdf_db * db,agenda * a,int size)5929 hash_agenda(rdf_db *db, agenda *a, int size)
5930 { if ( a->hash )
5931 rdf_free(db, a->hash, sizeof(*a->hash));
5932 if ( size > 0 )
5933 { visited *v;
5934
5935 a->hash = rdf_malloc(db, sizeof(visited*)*size);
5936 memset(a->hash, 0, sizeof(visited*)*size);
5937 a->hash_size = size;
5938
5939 for(v=a->head; v; v = v->next)
5940 { int key = atom_hash(v->resource)&(size-1);
5941
5942 v->hash_link = a->hash[key];
5943 a->hash[key] = v;
5944 }
5945 }
5946 }
5947
5948
5949 static int
in_aganda(agenda * a,atom_t resource)5950 in_aganda(agenda *a, atom_t resource)
5951 { visited *v;
5952
5953 if ( a->hash )
5954 { int key = atom_hash(resource)&(a->hash_size-1);
5955 v = a->hash[key];
5956
5957 for( ; v; v = v->hash_link )
5958 { if ( v->resource == resource )
5959 return TRUE;
5960 }
5961 } else
5962 { v = a->head;
5963
5964 for( ; v; v = v->next )
5965 { if ( v->resource == resource )
5966 return TRUE;
5967 }
5968 }
5969
5970 return FALSE;
5971 }
5972
5973
5974 static visited *
append_agenda(rdf_db * db,agenda * a,atom_t res,uintptr_t d)5975 append_agenda(rdf_db *db, agenda *a, atom_t res, uintptr_t d)
5976 { visited *v = a->head;
5977
5978 if ( in_aganda(a, res) )
5979 return NULL;
5980
5981 db->agenda_created++; /* statistics */
5982
5983 a->size++;
5984 if ( !a->hash_size && a->size > 32 )
5985 hash_agenda(db, a, 64);
5986 else if ( a->size > a->hash_size * 4 )
5987 hash_agenda(db, a, a->hash_size * 4);
5988
5989 v = alloc_node_agenda(db, a);
5990 v->resource = res;
5991 v->distance = d;
5992 v->next = NULL;
5993 if ( a->tail )
5994 { a->tail->next = v;
5995 a->tail = v;
5996 } else
5997 { a->head = a->tail = v;
5998 }
5999
6000 if ( a->hash_size )
6001 { int key = atom_hash(res)&(a->hash_size-1);
6002
6003 v->hash_link = a->hash[key];
6004 a->hash[key] = v;
6005 }
6006
6007 return v;
6008 }
6009
6010
6011 static int
can_reach_target(rdf_db * db,agenda * a)6012 can_reach_target(rdf_db *db, agenda *a)
6013 { int indexed = a->pattern.indexed;
6014 int rc = FALSE;
6015 triple *p;
6016
6017 if ( indexed & BY_S ) /* subj ---> */
6018 { a->pattern.object.resource = a->target;
6019 indexed |= BY_O;
6020 } else
6021 { a->pattern.subject = a->target;
6022 indexed |= BY_S;
6023 }
6024
6025 p = db->table[indexed][triple_hash(db, &a->pattern, indexed)];
6026 for( ; p; p = p->next[indexed])
6027 { if ( match_triples(p, &a->pattern, MATCH_SUBPROPERTY) )
6028 { rc = TRUE;
6029 break;
6030 }
6031 }
6032
6033 if ( a->pattern.indexed & BY_S )
6034 { a->pattern.object.resource = 0;
6035 } else
6036 { a->pattern.subject = 0;
6037 }
6038
6039 return rc;
6040 }
6041
6042
6043
6044 static visited *
bf_expand(rdf_db * db,agenda * a,atom_t resource,uintptr_t d)6045 bf_expand(rdf_db *db, agenda *a, atom_t resource, uintptr_t d)
6046 { triple *p;
6047 int indexed = a->pattern.indexed;
6048 visited *rc = NULL;
6049
6050 if ( indexed & BY_S ) /* subj ---> */
6051 { a->pattern.subject = resource;
6052 } else
6053 { a->pattern.object.resource = resource;
6054 }
6055
6056 if ( a->target && can_reach_target(db, a) )
6057 { return append_agenda(db, a, a->target, d);
6058 }
6059
6060 p = db->table[indexed][triple_hash(db, &a->pattern, indexed)];
6061 for( ; p; p = p->next[indexed])
6062 { if ( match_triples(p, &a->pattern, MATCH_SUBPROPERTY) )
6063 { atom_t found;
6064 visited *v;
6065
6066 if ( indexed & BY_S )
6067 { if ( p->object_is_literal )
6068 continue;
6069 found = p->object.resource;
6070 } else
6071 { found = p->subject;
6072 }
6073
6074 v = append_agenda(db, a, found, d);
6075 if ( !rc )
6076 rc = v;
6077 if ( found == a->target )
6078 break;
6079 }
6080 }
6081 /* TBD: handle owl:inverseOf */
6082 /* TBD: handle owl:sameAs */
6083 return rc;
6084 }
6085
6086
6087 static visited *
next_agenda(rdf_db * db,agenda * a)6088 next_agenda(rdf_db *db, agenda *a)
6089 { visited *v;
6090
6091 if ( (v=a->to_return) )
6092 { ok:
6093
6094 a->to_return = a->to_return->next;
6095
6096 return v;
6097 }
6098
6099 while( a->to_expand )
6100 { uintptr_t next_d = a->to_expand->distance+1;
6101
6102 if ( next_d >= a->max_d )
6103 return NULL;
6104
6105 a->to_return = bf_expand(db, a,
6106 a->to_expand->resource,
6107 next_d);
6108 a->to_expand = a->to_expand->next;
6109
6110 if ( (v=a->to_return) )
6111 goto ok;
6112 }
6113
6114 return NULL;
6115 }
6116
6117
6118 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6119 rdf_reachable(+Subject, +Predicate, -Object)
6120 rdf_reachable(-Subject, +Predicate, ?Object)
6121 Examine transitive relations, reporting all `Object' that can be
6122 reached from `Subject' using Predicate without going into a loop
6123 if the relation is cyclic.
6124
6125 directly_attached() deals with the posibility that the predicate is not
6126 defined and Subject and Object are the same. Should use clean error
6127 handling, but that means a lot of changes. For now this will do.
6128 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6129
6130 static int
directly_attached(term_t pred,term_t from,term_t to)6131 directly_attached(term_t pred, term_t from, term_t to)
6132 { if ( PL_is_atom(pred) && PL_is_atom(from) )
6133 return PL_unify(to, from);
6134
6135 return FALSE;
6136 }
6137
6138
6139 static int
unify_distance(term_t d,uintptr_t dist)6140 unify_distance(term_t d, uintptr_t dist)
6141 { if ( d )
6142 return PL_unify_integer(d, dist);
6143
6144 return TRUE;
6145 }
6146
6147
6148 static foreign_t
rdf_reachable(term_t subj,term_t pred,term_t obj,term_t max_d,term_t d,control_t h)6149 rdf_reachable(term_t subj, term_t pred, term_t obj,
6150 term_t max_d, term_t d,
6151 control_t h)
6152 { rdf_db *db = DB;
6153
6154 switch(PL_foreign_control(h))
6155 { case PL_FIRST_CALL:
6156 { agenda a;
6157 visited *v;
6158 term_t target_term;
6159 int is_det = FALSE;
6160
6161 if ( PL_is_variable(pred) )
6162 return instantiation_error(pred);
6163
6164 memset(&a, 0, sizeof(a));
6165 a.magic = AGENDA_LOCAL_MAGIC;
6166 if ( max_d )
6167 { long md;
6168 atom_t inf;
6169
6170 if ( PL_get_atom(max_d, &inf) && inf == ATOM_infinite )
6171 a.max_d = (uintptr_t)-1;
6172 if ( !get_long_ex(max_d, &md) || md < 0 )
6173 return FALSE;
6174 a.max_d = md;
6175 } else
6176 { a.max_d = (uintptr_t)-1;
6177 }
6178
6179 if ( !PL_is_variable(subj) ) /* subj .... obj */
6180 { switch(get_partial_triple(db, subj, pred, 0, 0, &a.pattern))
6181 { case 0:
6182 return directly_attached(pred, subj, obj) &&
6183 unify_distance(d, 0);
6184 case -1:
6185 return FALSE;
6186 }
6187 is_det = PL_is_ground(obj);
6188 target_term = obj;
6189 } else if ( !PL_is_variable(obj) ) /* obj .... subj */
6190 { switch(get_partial_triple(db, 0, pred, obj, 0, &a.pattern))
6191 { case 0:
6192 return directly_attached(pred, obj, subj);
6193 case -1:
6194 return FALSE;
6195 }
6196 if ( a.pattern.object_is_literal )
6197 return FALSE; /* rdf_reachable(-,+,literal(...)) */
6198 target_term = subj;
6199 } else
6200 return instantiation_error(subj);
6201
6202 if ( !RDLOCK(db) )
6203 return FALSE;
6204 if ( !update_hash(db) )
6205 return FALSE;
6206 if ( (a.pattern.indexed & BY_S) ) /* subj ... */
6207 append_agenda(db, &a, a.pattern.subject, 0);
6208 else
6209 append_agenda(db, &a, a.pattern.object.resource, 0);
6210 a.to_return = a.head;
6211 a.to_expand = a.head;
6212
6213 while( (v=next_agenda(db, &a)) )
6214 { if ( PL_unify_atom(target_term, v->resource) )
6215 { if ( is_det ) /* mode(+, +, +) */
6216 { int rc = unify_distance(d, v->distance);
6217 unlock_and_empty_agenda(db, &a);
6218 return rc;
6219 } else if ( unify_distance(d, v->distance) )
6220 { /* mode(+, +, -) or mode(-, +, +) */
6221 agenda *ra = save_agenda(db, &a);
6222 inc_active_queries(db);
6223 DEBUG(9, Sdprintf("Saved agenta to %p\n", ra));
6224 PL_retry_address(ra);
6225 }
6226 }
6227 }
6228 unlock_and_empty_agenda(db, &a);
6229 return FALSE;
6230 }
6231 case PL_REDO:
6232 { agenda *a = PL_foreign_context_address(h);
6233 term_t target_term;
6234 visited *v;
6235
6236 assert(a->magic == AGENDA_SAVED_MAGIC);
6237
6238 if ( !PL_is_variable(subj) ) /* +, +, - */
6239 target_term = obj;
6240 else
6241 target_term = subj; /* -, +, + */
6242
6243 while( (v=next_agenda(db, a)) )
6244 { if ( PL_unify_atom(target_term, v->resource) &&
6245 unify_distance(d, v->distance) )
6246 { assert(a->magic == AGENDA_SAVED_MAGIC);
6247 PL_retry_address(a);
6248 }
6249 }
6250
6251 dec_active_queries(db);
6252 unlock_and_empty_agenda(db, a);
6253 return FALSE;
6254 }
6255 case PL_CUTTED:
6256 { agenda *a = PL_foreign_context_address(h);
6257
6258 DEBUG(9, Sdprintf("Cutted; agenda = %p\n", a));
6259
6260 assert(a->magic == AGENDA_SAVED_MAGIC);
6261
6262 dec_active_queries(db);
6263 unlock_and_empty_agenda(db, a);
6264 return TRUE;
6265 }
6266 default:
6267 assert(0);
6268 return FALSE;
6269 }
6270 }
6271
6272 static foreign_t
rdf_reachable3(term_t subj,term_t pred,term_t obj,control_t h)6273 rdf_reachable3(term_t subj, term_t pred, term_t obj, control_t h)
6274 { return rdf_reachable(subj, pred, obj, 0, 0, h);
6275 }
6276
6277 static foreign_t
rdf_reachable5(term_t subj,term_t pred,term_t obj,term_t max_d,term_t d,control_t h)6278 rdf_reachable5(term_t subj, term_t pred, term_t obj, term_t max_d, term_t d,
6279 control_t h)
6280 { return rdf_reachable(subj, pred, obj, max_d, d, h);
6281 }
6282
6283
6284 /*******************************
6285 * STATISTICS *
6286 *******************************/
6287
6288 static functor_t keys[16]; /* initialised in install_rdf_db() */
6289
6290 static int
unify_statistics(rdf_db * db,term_t key,functor_t f)6291 unify_statistics(rdf_db *db, term_t key, functor_t f)
6292 { int64_t v;
6293
6294 if ( f == FUNCTOR_triples1 )
6295 { v = db->created - db->erased;
6296 } else if ( f == FUNCTOR_subjects1 )
6297 { v = db->subjects;
6298 } else if ( f == FUNCTOR_predicates1 )
6299 { v = db->pred_count;
6300 } else if ( f == FUNCTOR_core1 )
6301 { v = db->core;
6302 } else if ( f == FUNCTOR_indexed8 )
6303 { int i;
6304 term_t a = PL_new_term_ref();
6305
6306 if ( !PL_unify_functor(key, FUNCTOR_indexed8) )
6307 return FALSE;
6308 for(i=0; i<8; i++)
6309 { if ( !PL_get_arg(i+1, key, a) ||
6310 !PL_unify_integer(a, db->indexed[i]) )
6311 return FALSE;
6312 }
6313
6314 return TRUE;
6315 } else if ( f == FUNCTOR_searched_nodes1 )
6316 { v = db->agenda_created;
6317 } else if ( f == FUNCTOR_duplicates1 )
6318 { v = db->duplicates;
6319 } else if ( f == FUNCTOR_literals1 )
6320 { v = db->literals.count;
6321 } else if ( f == FUNCTOR_triples2 && PL_is_functor(key, f) )
6322 { graph *src;
6323 term_t a = PL_new_term_ref();
6324 atom_t name;
6325
6326 _PL_get_arg(1, key, a);
6327 if ( !PL_get_atom(a, &name) )
6328 return type_error(a, "atom");
6329 if ( (src = lookup_graph(db, name, FALSE)) )
6330 v = src->triple_count;
6331 else
6332 v = 0;
6333
6334 _PL_get_arg(2, key, a);
6335 return PL_unify_int64(a, v);
6336 } else if ( f == FUNCTOR_gc2 )
6337 { return PL_unify_term(key,
6338 PL_FUNCTOR, f,
6339 PL_INT, db->gc_count,
6340 PL_FLOAT, db->gc_time); /* time spent */
6341 } else if ( f == FUNCTOR_rehash2 )
6342 { return PL_unify_term(key,
6343 PL_FUNCTOR, f,
6344 PL_INT, db->rehash_count,
6345 PL_FLOAT, db->rehash_time);
6346 } else
6347 assert(0);
6348
6349 return PL_unify_term(key, PL_FUNCTOR, f, PL_INT64, v);
6350 }
6351
6352 static foreign_t
rdf_statistics(term_t key,control_t h)6353 rdf_statistics(term_t key, control_t h)
6354 { int n;
6355 rdf_db *db = DB;
6356
6357 switch(PL_foreign_control(h))
6358 { case PL_FIRST_CALL:
6359 { functor_t f;
6360
6361 if ( PL_is_variable(key) )
6362 { n = 0;
6363 goto redo;
6364 } else if ( PL_get_functor(key, &f) )
6365 { for(n=0; keys[n]; n++)
6366 { if ( keys[n] == f )
6367 return unify_statistics(db, key, f);
6368 }
6369 return domain_error(key, "rdf_statistics");
6370 } else
6371 return type_error(key, "rdf_statistics");
6372 }
6373 case PL_REDO:
6374 n = (int)PL_foreign_context(h);
6375 redo:
6376 unify_statistics(db, key, keys[n]);
6377 n++;
6378 if ( keys[n] )
6379 PL_retry(n);
6380 case PL_CUTTED:
6381 return TRUE;
6382 default:
6383 assert(0);
6384 return TRUE;
6385 }
6386 }
6387
6388
6389 static foreign_t
rdf_generation(term_t t)6390 rdf_generation(term_t t)
6391 { rdf_db *db = DB;
6392
6393 return PL_unify_integer(t, db->generation);
6394 }
6395
6396
6397 /*******************************
6398 * RESET *
6399 *******************************/
6400
6401 static void
erase_triples(rdf_db * db)6402 erase_triples(rdf_db *db)
6403 { triple *t, *n;
6404 int i;
6405
6406 for(t=db->by_none; t; t=n)
6407 { n = t->next[BY_NONE];
6408
6409 free_triple(db, t);
6410 db->freed++;
6411 }
6412 db->by_none = db->by_none_tail = NULL;
6413
6414 for(i=BY_S; i<=BY_OP; i++)
6415 { if ( db->table[i] )
6416 { int bytes = sizeof(triple*) * db->table_size[i];
6417
6418 memset(db->table[i], 0, bytes);
6419 memset(db->tail[i], 0, bytes);
6420 }
6421 }
6422
6423 db->created = 0;
6424 db->erased = 0;
6425 db->freed = 0;
6426 db->erased = 0;
6427 db->subjects = 0;
6428 db->rehash_count = 0;
6429 memset(db->indexed, 0, sizeof(db->indexed));
6430 db->duplicates = 0;
6431 db->generation = 0;
6432 }
6433
6434
6435 static void
erase_predicates(rdf_db * db)6436 erase_predicates(rdf_db *db)
6437 { predicate **ht;
6438 int i;
6439
6440 for(i=0,ht = db->pred_table; i<db->pred_table_size; i++, ht++)
6441 { predicate *p, *n;
6442
6443 for( p = *ht; p; p = n )
6444 { n = p->next;
6445
6446 free_list(db, &p->subPropertyOf);
6447 free_list(db, &p->siblings);
6448 if ( ++p->cloud->deleted == p->cloud->size )
6449 free_predicate_cloud(db, p->cloud);
6450
6451 rdf_free(db, p, sizeof(*p));
6452 }
6453
6454 *ht = NULL;
6455 }
6456
6457 db->pred_count = 0;
6458 db->next_hash = 0;
6459 }
6460
6461
6462 static void
reset_db(rdf_db * db)6463 reset_db(rdf_db *db)
6464 { db->resetting = TRUE;
6465
6466 erase_triples(db);
6467 erase_predicates(db);
6468 erase_graphs(db);
6469 db->need_update = FALSE;
6470 db->agenda_created = 0;
6471 avlfree(&db->literals);
6472 init_literal_table(db);
6473
6474 db->resetting = FALSE;
6475 }
6476
6477
6478 static foreign_t
rdf_reset_db()6479 rdf_reset_db()
6480 { rdf_db *db = DB;
6481
6482 if ( !WRLOCK(db, FALSE) )
6483 return FALSE;
6484
6485 if ( db->tr_first )
6486 { record_transaction(db, TR_RESET, NULL);
6487 db->tr_reset = TRUE;
6488 } else
6489 reset_db(db);
6490
6491 WRUNLOCK(db);
6492
6493 return TRUE;
6494 }
6495
6496
6497 /*******************************
6498 * MATCH *
6499 *******************************/
6500
6501
6502 static foreign_t
match_label(term_t how,term_t search,term_t label)6503 match_label(term_t how, term_t search, term_t label)
6504 { atom_t h, f, l;
6505 int type;
6506
6507 if ( !get_atom_ex(how, &h) ||
6508 !get_atom_ex(search, &f) ||
6509 !get_atom_ex(label, &l) )
6510 return FALSE;
6511
6512 if ( h == ATOM_exact )
6513 type = STR_MATCH_EXACT;
6514 else if ( h == ATOM_substring )
6515 type = STR_MATCH_SUBSTRING;
6516 else if ( h == ATOM_word )
6517 type = STR_MATCH_WORD;
6518 else if ( h == ATOM_prefix )
6519 type = STR_MATCH_PREFIX;
6520 else if ( h == ATOM_like )
6521 type = STR_MATCH_LIKE;
6522 else
6523 return domain_error(how, "search_method");
6524
6525 return match_atoms(type, f, l);
6526 }
6527
6528
6529 static foreign_t
lang_matches(term_t lang,term_t pattern)6530 lang_matches(term_t lang, term_t pattern)
6531 { atom_t l, p;
6532
6533 if ( !get_atom_ex(lang, &l) ||
6534 !get_atom_ex(pattern, &p) )
6535 return FALSE;
6536
6537 return atom_lang_matches(l, p);
6538 }
6539
6540
6541
6542
6543 /*******************************
6544 * VERSION *
6545 *******************************/
6546
6547 static foreign_t
rdf_version(term_t v)6548 rdf_version(term_t v)
6549 { return PL_unify_integer(v, RDF_VERSION);
6550 }
6551
6552
6553 /*******************************
6554 * MORE STUFF *
6555 *******************************/
6556
6557 #include "quote.c"
6558
6559 /*******************************
6560 * REGISTER *
6561 *******************************/
6562
6563 #define MKFUNCTOR(n, a) \
6564 FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a)
6565 #define NDET PL_FA_NONDETERMINISTIC
6566 #define META PL_FA_TRANSPARENT
6567
6568 install_t
install_rdf_db()6569 install_rdf_db()
6570 { int i=0;
6571 extern install_t install_atom_map(void);
6572
6573 MKFUNCTOR(literal, 1);
6574 MKFUNCTOR(error, 2);
6575 MKFUNCTOR(type_error, 2);
6576 MKFUNCTOR(domain_error, 2);
6577 MKFUNCTOR(triples, 1);
6578 MKFUNCTOR(triples, 2);
6579 MKFUNCTOR(subjects, 1);
6580 MKFUNCTOR(predicates, 1);
6581 MKFUNCTOR(subject, 1);
6582 MKFUNCTOR(predicate, 1);
6583 MKFUNCTOR(object, 1);
6584 MKFUNCTOR(graph, 1);
6585 MKFUNCTOR(indexed, 8);
6586 MKFUNCTOR(exact, 1);
6587 MKFUNCTOR(plain, 1);
6588 MKFUNCTOR(substring, 1);
6589 MKFUNCTOR(word, 1);
6590 MKFUNCTOR(prefix, 1);
6591 MKFUNCTOR(like, 1);
6592 MKFUNCTOR(literal, 2);
6593 MKFUNCTOR(searched_nodes, 1);
6594 MKFUNCTOR(duplicates, 1);
6595 MKFUNCTOR(literals, 1);
6596 MKFUNCTOR(symmetric, 1);
6597 MKFUNCTOR(transitive, 1);
6598 MKFUNCTOR(inverse_of, 1);
6599 MKFUNCTOR(lang, 2);
6600 MKFUNCTOR(type, 2);
6601 MKFUNCTOR(rdf_subject_branch_factor, 1);
6602 MKFUNCTOR(rdf_object_branch_factor, 1);
6603 MKFUNCTOR(rdfs_subject_branch_factor, 1);
6604 MKFUNCTOR(rdfs_object_branch_factor, 1);
6605 MKFUNCTOR(gc, 2);
6606 MKFUNCTOR(rehash, 2);
6607 MKFUNCTOR(core, 1);
6608 MKFUNCTOR(assert, 4);
6609 MKFUNCTOR(retract, 4);
6610 MKFUNCTOR(update, 5);
6611 MKFUNCTOR(new_literal, 1);
6612 MKFUNCTOR(old_literal, 1);
6613 MKFUNCTOR(transaction, 2);
6614 MKFUNCTOR(load, 2);
6615 MKFUNCTOR(rehash, 1);
6616 MKFUNCTOR(begin, 1);
6617 MKFUNCTOR(end, 1);
6618
6619 FUNCTOR_colon2 = PL_new_functor(PL_new_atom(":"), 2);
6620
6621 ATOM_user = PL_new_atom("user");
6622 ATOM_exact = PL_new_atom("exact");
6623 ATOM_plain = PL_new_atom("plain");
6624 ATOM_prefix = PL_new_atom("prefix");
6625 ATOM_like = PL_new_atom("like");
6626 ATOM_substring = PL_new_atom("substring");
6627 ATOM_word = PL_new_atom("word");
6628 ATOM_subPropertyOf = PL_new_atom(URL_subPropertyOf);
6629 ATOM_error = PL_new_atom("error");
6630 ATOM_begin = PL_new_atom("begin");
6631 ATOM_end = PL_new_atom("end");
6632 ATOM_infinite = PL_new_atom("infinite");
6633
6634 PRED_call1 = PL_predicate("call", 1, "user");
6635
6636 /* statistics */
6637 keys[i++] = FUNCTOR_triples1;
6638 keys[i++] = FUNCTOR_subjects1;
6639 keys[i++] = FUNCTOR_indexed8;
6640 keys[i++] = FUNCTOR_predicates1;
6641 keys[i++] = FUNCTOR_searched_nodes1;
6642 keys[i++] = FUNCTOR_duplicates1;
6643 keys[i++] = FUNCTOR_literals1;
6644 keys[i++] = FUNCTOR_triples2;
6645 keys[i++] = FUNCTOR_gc2;
6646 keys[i++] = FUNCTOR_rehash2;
6647 keys[i++] = FUNCTOR_core1;
6648 keys[i++] = 0;
6649
6650 /* setup the database */
6651 DB = new_db();
6652
6653 PL_register_foreign("rdf_version", 1, rdf_version, 0);
6654 PL_register_foreign("rdf_assert", 3, rdf_assert3, 0);
6655 PL_register_foreign("rdf_assert", 4, rdf_assert4, 0);
6656 PL_register_foreign("rdf_update", 4, rdf_update, 0);
6657 PL_register_foreign("rdf_update", 5, rdf_update5, 0);
6658 PL_register_foreign("rdf_retractall", 3, rdf_retractall3, 0);
6659 PL_register_foreign("rdf_retractall", 4, rdf_retractall4, 0);
6660 PL_register_foreign("rdf_subject", 1, rdf_subject, NDET);
6661 PL_register_foreign("rdf", 3, rdf3, NDET);
6662 PL_register_foreign("rdf", 4, rdf4, NDET);
6663 PL_register_foreign("rdf_has", 4, rdf_has, NDET);
6664 PL_register_foreign("rdf_statistics_",1, rdf_statistics, NDET);
6665 PL_register_foreign("rdf_generation", 1, rdf_generation, 0);
6666 PL_register_foreign("rdf_match_label",3, match_label, 0);
6667 PL_register_foreign("rdf_save_db_", 2, rdf_save_db, 0);
6668 PL_register_foreign("rdf_load_db_", 3, rdf_load_db, 0);
6669 PL_register_foreign("rdf_reachable", 3, rdf_reachable3, NDET);
6670 PL_register_foreign("rdf_reachable", 5, rdf_reachable5, NDET);
6671 PL_register_foreign("rdf_reset_db_", 0, rdf_reset_db, 0);
6672 PL_register_foreign("rdf_set_predicate",
6673 2, rdf_set_predicate, 0);
6674 PL_register_foreign("rdf_predicate_property_",
6675 2, rdf_predicate_property, NDET);
6676 PL_register_foreign("rdf_current_predicates",
6677 1, rdf_current_predicates, 0);
6678 PL_register_foreign("rdf_current_literal",
6679 1, rdf_current_literal, NDET);
6680 PL_register_foreign("rdf_graphs_", 1, rdf_graphs, 0);
6681 PL_register_foreign("rdf_set_graph_source", 3, rdf_set_graph_source, 0);
6682 PL_register_foreign("rdf_unset_graph_source", 1, rdf_unset_graph_source, 0);
6683 PL_register_foreign("rdf_graph_source_", 3, rdf_graph_source, 0);
6684 PL_register_foreign("rdf_estimate_complexity",
6685 4, rdf_estimate_complexity, 0);
6686 PL_register_foreign("rdf_transaction_",2, rdf_transaction, META);
6687 PL_register_foreign("rdf_active_transactions_",
6688 1, rdf_active_transactions, 0);
6689 PL_register_foreign("rdf_monitor_", 2, rdf_monitor, META);
6690 /*PL_register_foreign("rdf_broadcast_", 2, rdf_broadcast, 0);*/
6691 #ifdef WITH_MD5
6692 PL_register_foreign("rdf_md5", 2, rdf_md5, 0);
6693 PL_register_foreign("rdf_atom_md5", 3, rdf_atom_md5, 0);
6694 #endif
6695 PL_register_foreign("rdf_quote_uri", 2, rdf_quote_uri, 0);
6696
6697 #ifdef O_DEBUG
6698 PL_register_foreign("rdf_debug", 1, rdf_debug, 0);
6699 PL_register_foreign("rdf_print_predicate_cloud", 1, rdf_print_predicate_cloud, 0);
6700 #endif
6701 #ifdef O_SECURE
6702 PL_register_foreign("rdf_dump_literals", 0, dump_literals, 0);
6703 PL_register_foreign("rdf_check_literals", 0, check_transitivity, 0);
6704 #endif
6705 PL_register_foreign("lang_matches", 2, lang_matches, 0);
6706
6707 install_atom_map();
6708 }
6709