1 /*  $Id$
2 
3     Part of SWI-Prolog
4 
5     Author:        Jan Wielemaker
6     E-mail:        J.Wielemaker@uva.nl
7     WWW:           http://www.swi-prolog.org
8     Copyright (C): 1985-2009, University of Amsterdam
9 
10     This library is free software; you can redistribute it and/or
11     modify it under the terms of the GNU Lesser General Public
12     License as published by the Free Software Foundation; either
13     version 2.1 of the License, or (at your option) any later version.
14 
15     This library is distributed in the hope that it will be useful,
16     but WITHOUT ANY WARRANTY; without even the implied warranty of
17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18     Lesser General Public License for more details.
19 
20     You should have received a copy of the GNU Lesser General Public
21     License along with this library; if not, write to the Free Software
22     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23 */
24 
25 #ifdef HAVE_CONFIG_H
26 #include <config.h>
27 #endif
28 
29 #define WITH_MD5 1
30 #define WITH_PL_MUTEX 1
31 #define _GNU_SOURCE 1			/* get rwlocks from glibc */
32 
33 #ifdef _REENTRANT
34 #ifdef __WINDOWS__
35 #include <malloc.h>			/* alloca() */
36 #define inline __inline
37 #ifndef SIZEOF_LONG
38 #define SIZEOF_LONG 4
39 #endif
40 #else
41 #if (!defined(__GNUC__) || defined(__hpux)) && defined(HAVE_ALLOCA_H)
42 #include <alloca.h>
43 #endif
44 #include <errno.h>
45 #endif
46 #endif
47 
48 #include <SWI-Stream.h>
49 #include <SWI-Prolog.h>
50 #include "rdf_db.h"
51 #include <assert.h>
52 #include <string.h>
53 #include <wchar.h>
54 #include <wctype.h>
55 #include <ctype.h>
56 #include "avl.h"
57 #ifdef WITH_MD5
58 #include "md5.h"
59 #include "atom.h"
60 #include "debug.h"
61 #include "hash.h"
62 #include "murmur.h"
63 
64 #undef UNLOCK
65 
66 static void md5_triple(triple *t, md5_byte_t *digest);
67 static void sum_digest(md5_byte_t *digest, md5_byte_t *add);
68 static void dec_digest(md5_byte_t *digest, md5_byte_t *add);
69 #endif
70 
71 
72 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
73 The ids form a mask. This must be kept consistent with monitor_mask/2 in
74 rdf_db.pl!
75 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
76 
77 typedef enum
78 { EV_ASSERT      = 0x0001,		/* triple */
79   EV_ASSERT_LOAD = 0x0002,		/* triple */
80   EV_RETRACT     = 0x0004,		/* triple */
81   EV_UPDATE      = 0x0008,		/* old, new */
82   EV_NEW_LITERAL = 0x0010,		/* literal */
83   EV_OLD_LITERAL = 0x0020,		/* literal */
84   EV_TRANSACTION = 0x0040,		/* id, begin/end */
85   EV_LOAD	 = 0x0080,		/* id, begin/end */
86   EV_REHASH	 = 0x0100		/* begin/end */
87 } broadcast_id;
88 
89 static int broadcast(broadcast_id id, void *a1, void *a2);
90 
91 
92 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
93 We now use malloc/free/realloc  calls  with   explicit  sizes  to  allow
94 maintaining statistics as well as to   prepare  for dealing with special
95 memory  pools  associated  with  databases.  Using  -DDIRECT_MALLOC  the
96 library uses plain malloc to facilitate malloc debuggers.
97 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
98 
99 #ifdef DIRECT_MALLOC
100 
101 #define rdf_malloc(db, size)		malloc(size)
102 #define rdf_free(db, ptr, size)     	free(ptr)
103 #define rdf_realloc(db, ptr, old, new)  realloc(ptr, new)
104 
105 #else /*DIRECT_MALLOC*/
106 
107 #if CHECK_MALLOC_SIZES
108 static void *
rdf_malloc(rdf_db * db,size_t size)109 rdf_malloc(rdf_db *db, size_t size)
110 { size_t bytes = size + sizeof(size_t);
111   size_t *ptr = PL_malloc(bytes);
112 
113   *ptr++ = size;
114   if ( db )
115     db->core += size;
116 
117   return ptr;
118 }
119 
120 static void
rdf_free(rdf_db * db,void * ptr,size_t size)121 rdf_free(rdf_db *db, void *ptr, size_t size)
122 { size_t *p = ptr;
123 
124   assert(p[-1] == size);
125 
126   db->core -= size;
127   PL_free(&p[-1]);
128 }
129 
130 
131 static void *
rdf_realloc(rdf_db * db,void * ptr,size_t old,size_t new)132 rdf_realloc(rdf_db *db, void *ptr, size_t old, size_t new)
133 { size_t *p = ptr;
134   size_t bytes = new + sizeof(size_t);
135 
136   assert(p[-1] == old);
137   p = PL_realloc(&p[-1], bytes);
138   *p++ = new;
139   db->core< += new-old;
140 
141   return p;
142 }
143 
144 #else /*CHECK_MALLOC_SIZES*/
145 
146 static void *
rdf_malloc(rdf_db * db,size_t size)147 rdf_malloc(rdf_db *db, size_t size)
148 { if ( db )
149     db->core += size;
150 
151   return PL_malloc(size);
152 }
153 
154 static void
rdf_free(rdf_db * db,void * ptr,size_t size)155 rdf_free(rdf_db *db, void *ptr, size_t size)
156 { db->core -= size;
157 
158   PL_free(ptr);
159 }
160 
161 
162 static void *
rdf_realloc(rdf_db * db,void * ptr,size_t old,size_t new)163 rdf_realloc(rdf_db *db, void *ptr, size_t old, size_t new)
164 { db->core += new-old;
165 
166   return PL_realloc(ptr, new);
167 }
168 
169 #endif /*CHECK_MALLOC_SIZES*/
170 #endif /*DIRECT_MALLOC*/
171 
172 static functor_t FUNCTOR_literal1;
173 static functor_t FUNCTOR_literal2;
174 static functor_t FUNCTOR_error2;
175 static functor_t FUNCTOR_type_error2;
176 static functor_t FUNCTOR_domain_error2;
177 static functor_t FUNCTOR_colon2;
178 
179 static functor_t FUNCTOR_triples1;
180 static functor_t FUNCTOR_triples2;
181 static functor_t FUNCTOR_subjects1;
182 static functor_t FUNCTOR_predicates1;
183 static functor_t FUNCTOR_duplicates1;
184 static functor_t FUNCTOR_literals1;
185 static functor_t FUNCTOR_subject1;
186 static functor_t FUNCTOR_predicate1;
187 static functor_t FUNCTOR_object1;
188 static functor_t FUNCTOR_graph1;
189 static functor_t FUNCTOR_indexed8;
190 
191 static functor_t FUNCTOR_exact1;
192 static functor_t FUNCTOR_plain1;
193 static functor_t FUNCTOR_substring1;
194 static functor_t FUNCTOR_word1;
195 static functor_t FUNCTOR_prefix1;
196 static functor_t FUNCTOR_like1;
197 
198 static functor_t FUNCTOR_symmetric1;
199 static functor_t FUNCTOR_inverse_of1;
200 static functor_t FUNCTOR_transitive1;
201 static functor_t FUNCTOR_rdf_subject_branch_factor1;    /* S --> BF*O */
202 static functor_t FUNCTOR_rdf_object_branch_factor1;	/* O --> BF*S */
203 static functor_t FUNCTOR_rdfs_subject_branch_factor1;	/* S --> BF*O */
204 static functor_t FUNCTOR_rdfs_object_branch_factor1;	/* O --> BF*S */
205 
206 static functor_t FUNCTOR_searched_nodes1;
207 static functor_t FUNCTOR_lang2;
208 static functor_t FUNCTOR_type2;
209 
210 static functor_t FUNCTOR_gc2;
211 static functor_t FUNCTOR_rehash2;
212 static functor_t FUNCTOR_core1;
213 
214 static functor_t FUNCTOR_assert4;
215 static functor_t FUNCTOR_retract4;
216 static functor_t FUNCTOR_update5;
217 static functor_t FUNCTOR_new_literal1;
218 static functor_t FUNCTOR_old_literal1;
219 static functor_t FUNCTOR_transaction2;
220 static functor_t FUNCTOR_load2;
221 static functor_t FUNCTOR_rehash1;
222 static functor_t FUNCTOR_begin1;
223 static functor_t FUNCTOR_end1;
224 
225 static atom_t   ATOM_user;
226 static atom_t	ATOM_exact;
227 static atom_t	ATOM_plain;
228 static atom_t	ATOM_prefix;
229 static atom_t	ATOM_substring;
230 static atom_t	ATOM_word;
231 static atom_t	ATOM_like;
232 static atom_t	ATOM_error;
233 static atom_t	ATOM_begin;
234 static atom_t	ATOM_end;
235 static atom_t	ATOM_infinite;
236 
237 static atom_t	ATOM_subPropertyOf;
238 
239 static predicate_t PRED_call1;
240 
241 #define MATCH_EXACT 		0x01	/* exact triple match */
242 #define MATCH_SUBPROPERTY	0x02	/* Use subPropertyOf relations */
243 #define MATCH_SRC		0x04	/* Match graph location */
244 #define MATCH_INVERSE		0x08	/* use symmetric match too */
245 #define MATCH_QUAL		0x10	/* Match qualifiers too */
246 #define MATCH_DUPLICATE		(MATCH_EXACT|MATCH_QUAL)
247 
248 static int WANT_GC(rdf_db *db);
249 static int match_triples(triple *t, triple *p, unsigned flags);
250 static int update_duplicates_add(rdf_db *db, triple *t);
251 static void update_duplicates_del(rdf_db *db, triple *t);
252 static void unlock_atoms(triple *t);
253 static void lock_atoms(triple *t);
254 static void unlock_atoms_literal(literal *lit);
255 static int  update_hash(rdf_db *db);
256 static int  triple_hash(rdf_db *db, triple *t, int which);
257 static unsigned long object_hash(triple *t);
258 static void	reset_db(rdf_db *db);
259 
260 static void	record_transaction(rdf_db *db,
261 				   tr_type type, triple *t);
262 static void	record_md5_transaction(rdf_db *db,
263 				       graph *src, md5_byte_t *digest);
264 static void	create_reachability_matrix(rdf_db *db, predicate_cloud *cloud);
265 static int	get_predicate(rdf_db *db, term_t t, predicate **p);
266 static predicate_cloud *new_predicate_cloud(rdf_db *db, predicate **p, size_t count);
267 static int	unify_literal(term_t lit, literal *l);
268 static int	check_predicate_cloud(predicate_cloud *c);
269 
270 
271 		 /*******************************
272 		 *	       LOCKING		*
273 		 *******************************/
274 
275 #define RDLOCK(db)			rdlock(&db->lock)
276 #define WRLOCK(db, allowreaders)	wrlock(&db->lock, allowreaders)
277 #define LOCKOUT_READERS(db)		lockout_readers(&db->lock)
278 #define REALLOW_READERS(db)		reallow_readers(&db->lock)
279 #define WRUNLOCK(db)			unlock(&db->lock, FALSE)
280 #define RDUNLOCK(db)			unlock(&db->lock, TRUE)
281 #define LOCK_MISC(db)			lock_misc(&db->lock)
282 #define UNLOCK_MISC(db)			unlock_misc(&db->lock)
283 #define INIT_LOCK(db)			init_lock(&db->lock)
284 
285 
286 		 /*******************************
287 		 *	       ERRORS		*
288 		 *******************************/
289 
290 static int
instantiation_error(term_t actual)291 instantiation_error(term_t actual)
292 { term_t ex;
293 
294   if ( (ex = PL_new_term_ref()) &&
295        PL_unify_term(ex,
296 		     PL_FUNCTOR, FUNCTOR_error2,
297 		       PL_CHARS, "instantiation_error",
298 		       PL_VARIABLE) )
299     return PL_raise_exception(ex);
300 
301   return FALSE;
302 }
303 
304 
305 static int
type_error(term_t actual,const char * expected)306 type_error(term_t actual, const char *expected)
307 { term_t ex;
308 
309   if ( (ex = PL_new_term_ref()) &&
310        PL_unify_term(ex,
311 		     PL_FUNCTOR, FUNCTOR_error2,
312 		       PL_FUNCTOR, FUNCTOR_type_error2,
313 		         PL_CHARS, expected,
314 		         PL_TERM, actual,
315 		       PL_VARIABLE) )
316     return PL_raise_exception(ex);
317 
318   return FALSE;
319 }
320 
321 
322 static int
domain_error(term_t actual,const char * expected)323 domain_error(term_t actual, const char *expected)
324 { term_t ex;
325 
326   if ( (ex = PL_new_term_ref()) &&
327        PL_unify_term(ex,
328 		     PL_FUNCTOR, FUNCTOR_error2,
329 		       PL_FUNCTOR, FUNCTOR_domain_error2,
330 		         PL_CHARS, expected,
331 		         PL_TERM, actual,
332 		       PL_VARIABLE) )
333     return PL_raise_exception(ex);
334 
335   return FALSE;
336 }
337 
338 
339 static int
permission_error(const char * op,const char * type,const char * obj,const char * msg)340 permission_error(const char *op, const char *type, const char *obj,
341 		 const char *msg)
342 { term_t ex, ctx;
343 
344   if ( !(ex = PL_new_term_ref()) ||
345        !(ctx = PL_new_term_ref()) )
346     return FALSE;
347 
348   if ( msg )
349   { if ( !PL_unify_term(ctx, PL_FUNCTOR_CHARS, "context", 2,
350 			       PL_VARIABLE,
351 			       PL_CHARS, msg) )
352       return FALSE;
353   }
354 
355   if ( !PL_unify_term(ex, PL_FUNCTOR_CHARS, "error", 2,
356 		      PL_FUNCTOR_CHARS, "permission_error", 3,
357 		        PL_CHARS, op,
358 		        PL_CHARS, type,
359 		        PL_CHARS, obj,
360 		      PL_TERM, ctx) )
361     return FALSE;
362 
363   return PL_raise_exception(ex);
364 }
365 
366 
367 static int
get_atom_ex(term_t t,atom_t * a)368 get_atom_ex(term_t t, atom_t *a)
369 { if ( PL_get_atom(t, a) )
370     return TRUE;
371 
372   return type_error(t, "atom");
373 }
374 
375 
376 static int
get_long_ex(term_t t,long * v)377 get_long_ex(term_t t, long *v)
378 { if ( PL_get_long(t, v) )
379     return TRUE;
380 
381   return type_error(t, "integer");
382 }
383 
384 
385 static int
get_double_ex(term_t t,double * v)386 get_double_ex(term_t t, double *v)
387 { if ( PL_get_float(t, v) )
388     return TRUE;
389 
390   return type_error(t, "float");
391 }
392 
393 
394 static int
get_atom_or_var_ex(term_t t,atom_t * a)395 get_atom_or_var_ex(term_t t, atom_t *a)
396 { if ( PL_get_atom(t, a) )
397     return TRUE;
398   if ( PL_is_variable(t) )
399   { *a = 0L;
400     return TRUE;
401   }
402 
403   return type_error(t, "atom");
404 }
405 
406 
407 static int
get_resource_or_var_ex(term_t t,atom_t * a)408 get_resource_or_var_ex(term_t t, atom_t *a)
409 { if ( PL_get_atom(t, a) )
410     return TRUE;
411   if ( PL_is_variable(t) )
412   { *a = 0L;
413     return TRUE;
414   }
415   if ( PL_is_functor(t, FUNCTOR_literal1) )
416     return FALSE;			/* fail on rdf(literal(_), ...) */
417 
418   return type_error(t, "atom");
419 }
420 
421 
422 static int
get_bool_arg_ex(int a,term_t t,int * val)423 get_bool_arg_ex(int a, term_t t, int *val)
424 { term_t arg = PL_new_term_ref();
425 
426   if ( !PL_get_arg(a, t, arg) )
427     return type_error(t, "compound");
428   if ( !PL_get_bool(arg, val) )
429     return type_error(arg, "bool");
430 
431   return TRUE;
432 }
433 
434 
435 
436 		 /*******************************
437 		 *	   DEBUG SUPPORT	*
438 		 *******************************/
439 
440 #ifdef O_DEBUG
441 
442 #define PRT_SRC	0x1
443 
444 static void
print_literal(literal * lit)445 print_literal(literal *lit)
446 { switch(lit->objtype)
447   { case OBJ_STRING:
448       switch(lit->qualifier)
449       { case Q_TYPE:
450 	  Sdprintf("%s^^\"%s\"",
451 		   PL_atom_chars(lit->value.string),
452 		   PL_atom_chars(lit->type_or_lang));
453 	  break;
454 	case Q_LANG:
455 	  Sdprintf("%s@\"%s\"",
456 		   PL_atom_chars(lit->value.string),
457 		   PL_atom_chars(lit->type_or_lang));
458 	  break;
459 	default:
460 	{ size_t len;
461 	  const char *s;
462 	  const wchar_t *w;
463 
464 	  if ( (s = PL_atom_nchars(lit->value.string, &len)) )
465 	  { if ( strlen(s) == len )
466 	      Sdprintf("\"%s\"", s);
467 	    else
468 	      Sdprintf("\"%s\" (len=%d)", s, len);
469 	  } else if ( (w = PL_atom_wchars(lit->value.string, &len)) )
470 	  { unsigned int i;
471 	    Sputc('L', Serror);
472 	    Sputc('"', Serror);
473 	    for(i=0; i<len; i++)
474 	    { if ( w[i] < 0x7f )
475 		Sputc(w[i], Serror);
476 	      else
477 		Sfprintf(Serror, "\\\\u%04x", w[i]);
478 	    }
479 	    Sputc('"', Serror);
480 	  }
481 	  break;
482 	}
483       }
484       break;
485     case OBJ_INTEGER:
486       Sdprintf("%ld", lit->value.integer);
487       break;
488     case OBJ_DOUBLE:
489       Sdprintf("%f", lit->value.real);
490       break;
491     case OBJ_TERM:
492     { fid_t fid = PL_open_foreign_frame();
493       term_t term = PL_new_term_ref();
494 
495       PL_recorded_external(lit->value.term.record, term);
496       PL_write_term(Serror, term, 1200,
497 		    PL_WRT_QUOTED|PL_WRT_NUMBERVARS|PL_WRT_PORTRAY);
498       PL_discard_foreign_frame(fid);
499       break;
500     }
501     default:
502       assert(0);
503   }
504 }
505 
506 
507 static void
print_object(triple * t)508 print_object(triple *t)
509 { if ( t->object_is_literal )
510   { print_literal(t->object.literal);
511   } else
512   { Sdprintf("%s", PL_atom_chars(t->object.resource));
513   }
514 }
515 
516 
517 static void
print_src(triple * t)518 print_src(triple *t)
519 { if ( t->line == NO_LINE )
520     Sdprintf(" [%s]", PL_atom_chars(t->graph));
521   else
522     Sdprintf(" [%s:%ld]", PL_atom_chars(t->graph), t->line);
523 }
524 
525 
526 static void
print_triple(triple * t,int flags)527 print_triple(triple *t, int flags)
528 { Sdprintf("<%s %s ",
529 	   PL_atom_chars(t->subject),
530 	   PL_atom_chars(t->predicate.r->name));
531   print_object(t);
532   if ( (flags & PRT_SRC) )
533     print_src(t);
534   Sdprintf(">");
535 }
536 
537 #endif
538 
539 		 /*******************************
540 		 *	     STORAGE		*
541 		 *******************************/
542 
543 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
544 Our one and only database (for the time being).
545 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
546 
547 static rdf_db *DB;
548 
549 
550 		 /*******************************
551 		 *	      LISTS		*
552 		 *******************************/
553 
554 static int
add_list(rdf_db * db,list * list,void * value)555 add_list(rdf_db *db, list *list, void *value)
556 { cell *c;
557 
558   for(c=list->head; c; c=c->next)
559   { if ( c->value == value )
560       return FALSE;			/* already a member */
561   }
562 
563   c = rdf_malloc(db, sizeof(*c));
564   c->value = value;
565   c->next = NULL;
566 
567   if ( list->tail )
568     list->tail->next = c;
569   else
570     list->head = c;
571 
572   list->tail = c;
573 
574   return TRUE;
575 }
576 
577 
578 static int
del_list(rdf_db * db,list * list,void * value)579 del_list(rdf_db *db, list *list, void *value)
580 { cell *c, *p = NULL;
581 
582   for(c=list->head; c; p=c, c=c->next)
583   { if ( c->value == value )
584     { if ( p )
585 	p->next = c->next;
586       else
587 	list->head = c->next;
588 
589       if ( !c->next )
590 	list->tail = p;
591 
592       rdf_free(db, c, sizeof(*c));
593 
594       return TRUE;
595     }
596   }
597 
598   return FALSE;				/* not a member */
599 }
600 
601 
602 static void
free_list(rdf_db * db,list * list)603 free_list(rdf_db *db, list *list)
604 { cell *c, *n;
605 
606   for(c=list->head; c; c=n)
607   { n = c->next;
608     rdf_free(db, c, sizeof(*c));
609   }
610 
611   list->head = list->tail = NULL;
612 }
613 
614 
615 		 /*******************************
616 		 *	     ATOM SETS		*
617 		 *******************************/
618 
619 
620 #define CHUNKSIZE 1024
621 
622 typedef struct mchunk
623 { struct mchunk *next;
624   size_t used;
625   char buf[CHUNKSIZE];
626 } mchunk;
627 
628 typedef struct
629 { avl_tree tree;
630   mchunk *node_store;
631   mchunk store0;
632 } atomset;
633 
634 
635 static void *
alloc_node_atomset(void * ptr,size_t size)636 alloc_node_atomset(void *ptr, size_t size)
637 { void *p;
638   atomset *as = ptr;
639 
640   assert(size < CHUNKSIZE);
641 
642   if ( as->node_store->used + size > CHUNKSIZE )
643   { mchunk *ch = malloc(sizeof(mchunk));
644 
645     ch->used = 0;
646     ch->next = as->node_store;
647     as->node_store = ch;
648   }
649 
650   p = &as->node_store->buf[as->node_store->used];
651   as->node_store->used += size;
652 
653   return p;
654 }
655 
656 
657 static void
free_node_atomset(void * ptr,void * data,size_t size)658 free_node_atomset(void *ptr, void *data, size_t size)
659 { assert(0);
660 }
661 
662 
663 static int
cmp_long_ptr(void * p1,void * p2,NODE type)664 cmp_long_ptr(void *p1, void *p2, NODE type)
665 { long *l1 = p1;
666   long *l2 = p2;
667 
668   return *l1 < *l2 ? -1 : *l1 > *l2 ? 1 : 0;
669 }
670 
671 
672 static void
init_atomset(atomset * as)673 init_atomset(atomset *as)
674 { avlinit(&as->tree, as, sizeof(atom_t),
675 	  cmp_long_ptr,
676 	  NULL,
677 	  alloc_node_atomset,
678 	  free_node_atomset);
679 
680   as->node_store = &as->store0;
681   as->node_store->next = NULL;
682   as->node_store->used = 0;
683 }
684 
685 
686 static void
destroy_atomset(atomset * as)687 destroy_atomset(atomset *as)
688 { mchunk *ch, *next;
689 
690   for(ch=as->node_store; ch != &as->store0; ch = next)
691   { next = ch->next;
692     free(ch);
693   }
694 }
695 
696 
697 static int
add_atomset(atomset * as,atom_t atom)698 add_atomset(atomset *as, atom_t atom)
699 { return avlins(&as->tree, &atom) ? FALSE : TRUE;
700 }
701 
702 
703 		 /*******************************
704 		 *	    PREDICATES		*
705 		 *******************************/
706 
707 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
708 Predicates are represented as first class   citizens  for three reasons:
709 quickly  answer  on  the  transitive   rdfs:subPropertyOf  relation  for
710 rdf_hash/3,  keep  track  of  statistics  that   are  useful  for  query
711 optimization  (#triples,  branching   factor)    and   keep   properties
712 (inverse/transitive).
713 
714 To answer the rdfs:subPropertyOf quickly,   predicates  are organised in
715 `clouds', where a cloud defines a   set  of predicates connected through
716 rdfs:subPropertyOf triples. The cloud numbers  its members and maintains
717 a bit-matrix that contains the closure  of the reachability. Initially a
718 predicate has a simple cloud of size 1. merge_clouds() and split_cloud()
719 deals with adding  and  deleting   rdfs:subPropertyOf  relations.  These
720 operations try to modify the clouds that have   no triples, so it can be
721 done without a rehash. If this fails, the predicates keep their own hash
722 to make search without rdfs:subPropertyOf  still   possible  (so  we can
723 avoid frequent updates while loading triples),   sets  the cloud `dirty'
724 flag and the DB's need_update flag. Queries that need rdfs:subPropertyOf
725 find the need_update flag,  which   calls  organise_predicates(),  which
726 cause a rehash if some predicates  have   changed  hash-code  to the new
727 cloud they have become part of.
728 
729 TBD: We can do a partial re-hash in that case!
730 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
731 
732 
733 static void
init_pred_table(rdf_db * db)734 init_pred_table(rdf_db *db)
735 { int bytes = sizeof(predicate*)*INITIAL_PREDICATE_TABLE_SIZE;
736 
737   db->pred_table = rdf_malloc(db, bytes);
738   memset(db->pred_table, 0, bytes);
739   db->pred_table_size = INITIAL_PREDICATE_TABLE_SIZE;
740 }
741 
742 
743 static predicate *
existing_predicate(rdf_db * db,atom_t name)744 existing_predicate(rdf_db *db, atom_t name)
745 { int hash = atom_hash(name) % db->pred_table_size;
746   predicate *p;
747 
748   LOCK_MISC(db);
749   for(p=db->pred_table[hash]; p; p = p->next)
750   { if ( p->name == name )
751     { UNLOCK_MISC(db);
752       return p;
753     }
754   }
755 
756   UNLOCK_MISC(db);
757   return NULL;
758 }
759 
760 
761 static predicate *
lookup_predicate(rdf_db * db,atom_t name)762 lookup_predicate(rdf_db *db, atom_t name)
763 { int hash = atom_hash(name) % db->pred_table_size;
764   predicate *p;
765   predicate_cloud *cp;
766 
767   LOCK_MISC(db);
768   for(p=db->pred_table[hash]; p; p = p->next)
769   { if ( p->name == name )
770     { UNLOCK_MISC(db);
771       return p;
772     }
773   }
774   p = rdf_malloc(db, sizeof(*p));
775   memset(p, 0, sizeof(*p));
776   p->name = name;
777   cp = new_predicate_cloud(db, &p, 1);
778   p->hash = cp->hash;
779   PL_register_atom(name);
780   p->next = db->pred_table[hash];
781   db->pred_table[hash] = p;
782   db->pred_count++;
783   DEBUG(5, Sdprintf("Pred %s (count = %d)\n",
784 		    PL_atom_chars(name), db->pred_count));
785   UNLOCK_MISC(db);
786 
787   return p;
788 }
789 
790 
791 static const char *
pname(predicate * p)792 pname(predicate *p)
793 { if ( p->name )
794     return PL_atom_chars(p->name);
795   else
796   { static char *ring[10];
797     static int ri = 0;
798     char buf[25];
799     char *r;
800 
801     Ssprintf(buf, "__D%p", p);
802     ring[ri++] = r = strdup(buf);
803     if ( ri == 10 )
804     { ri = 0;
805       free(ring[ri]);
806     }
807 
808     return (const char*)r;
809   }
810 }
811 
812 
813 static int
organise_predicates(rdf_db * db)814 organise_predicates(rdf_db *db)		/* TBD: rename&move */
815 { predicate **ht;
816   int i;
817   int changed = 0;
818 
819   DEBUG(2, Sdprintf("rdf_db: fixing predicate clouds\n"));
820 
821   for(i=0,ht = db->pred_table; i<db->pred_table_size; i++, ht++)
822   { predicate *p;
823 
824     for( p = *ht; p; p = p->next )
825     { predicate_cloud *cloud = p->cloud;
826 
827       if ( cloud->dirty )
828       { predicate **cp;
829 	int i2;
830 
831 	for(i2=0, cp = cloud->members; i2 < cloud->size; i2++, cp++)
832 	{ if ( (*cp)->hash != cloud->hash )
833 	  { (*cp)->hash = cloud->hash;
834 	    if ( (*cp)->triple_count > 0 )
835 	      changed++;
836 	  }
837 	}
838 	cloud->dirty = FALSE;
839       }
840     }
841   }
842 
843   return changed;
844 }
845 
846 
847 		 /*******************************
848 		 *	 PREDICATE CLOUDS	*
849 		 *******************************/
850 
851 static predicate_cloud *
new_predicate_cloud(rdf_db * db,predicate ** p,size_t count)852 new_predicate_cloud(rdf_db *db, predicate **p, size_t count)
853 { predicate_cloud *cloud = rdf_malloc(db, sizeof(*cloud));
854 
855   memset(cloud, 0, sizeof(*cloud));
856   cloud->hash = db->next_hash++;
857   if ( count )
858   { int i;
859     predicate **p2;
860 
861     cloud->size = count;
862     cloud->members = rdf_malloc(db, sizeof(predicate*)*count);
863     memcpy(cloud->members, p, sizeof(predicate*)*count);
864 
865     for(i=0, p2=cloud->members; i<cloud->size; i++, p2++)
866       (*p2)->cloud = cloud;
867   }
868   create_reachability_matrix(db, cloud);
869 
870   return cloud;
871 }
872 
873 
874 static void
free_predicate_cloud(rdf_db * db,predicate_cloud * cloud)875 free_predicate_cloud(rdf_db *db, predicate_cloud *cloud)
876 { if ( cloud->members )
877   { rdf_free(db, cloud->members, sizeof(predicate*)*cloud->size);
878   }
879 
880   rdf_free(db, cloud, sizeof(*cloud));
881 }
882 
883 
884 static long
triples_in_predicate_cloud(predicate_cloud * cloud)885 triples_in_predicate_cloud(predicate_cloud *cloud)
886 { long triples = 0;
887   predicate **p;
888   int i;
889 
890   for(i=0, p=cloud->members; i<cloud->size; i++, p++)
891     triples += (*p)->triple_count;
892 
893   return triples;
894 }
895 
896 
897 /* Add the predicates of c2 to c1 and destroy c2.  Returns c1 */
898 
899 static predicate_cloud *
append_clouds(rdf_db * db,predicate_cloud * c1,predicate_cloud * c2,int update_hash)900 append_clouds(rdf_db *db, predicate_cloud *c1, predicate_cloud *c2, int update_hash)
901 { predicate **p;
902   int i;
903 
904   for(i=0, p=c2->members; i<c2->size; i++, p++)
905   { (*p)->cloud = c1;
906     if ( update_hash )
907       (*p)->hash = c1->hash;
908   }
909 
910   if ( c1->size > 0 && c2->size > 0 )
911   { c1->members = rdf_realloc(db, c1->members,
912 			      c1->size*sizeof(predicate*),
913 			      (c1->size+c2->size)*sizeof(predicate*));
914     memcpy(&c1->members[c1->size], c2->members, c2->size*sizeof(predicate*));
915     c1->size += c2->size;
916     free_predicate_cloud(db, c2);
917   } else if ( c2->size > 0 )
918   { c1->members = c2->members;
919     c1->size = c2->size;
920     c2->members = NULL;
921     free_predicate_cloud(db, c2);
922   } else
923   { free_predicate_cloud(db, c2);
924   }
925 
926   return c1;
927 }
928 
929 
930 /* merge two predicate clouds.  If either of them has no triples we
931    can do the merge without rehashing the database.  Note that this
932    code is only called from addSubPropertyOf().  If c1==c2, we added
933    an rdfs:subPropertyOf between two predicates in the same cloud.
934    we must still update the matrix, though we could do it a bit more
935    efficient.  I doubt this is worth the trouble though.
936 */
937 
938 static predicate_cloud *
merge_clouds(rdf_db * db,predicate_cloud * c1,predicate_cloud * c2)939 merge_clouds(rdf_db *db, predicate_cloud *c1, predicate_cloud *c2)
940 { predicate_cloud *cloud;
941 
942   if ( c1 != c2 )
943   { if ( triples_in_predicate_cloud(c1) == 0 )
944     { cloud = append_clouds(db, c2, c1, TRUE);
945     } else if ( triples_in_predicate_cloud(c2) == 0 )
946     { cloud = append_clouds(db, c1, c2, TRUE);
947     } else
948     { cloud = append_clouds(db, c1, c2, FALSE);
949       cloud->dirty = TRUE;
950       db->need_update++;
951     }
952   } else
953   { cloud = c1;
954   }
955 
956   DEBUG(1, if ( !db->need_update )
957 	   { check_predicate_cloud(cloud);
958 	   });
959 
960   create_reachability_matrix(db, cloud);
961 
962   return cloud;
963 }
964 
965 
966 /* split a cloud into multiple disjoint clouds.  The first cloud is
967    given the hash of the original, so we only need to update if new
968    clouds are created.  Ideally we should se whether it is possible
969    to give the orginal hash to the one and only non-empty cloud to
970    avoid re-hashing alltogether.
971 */
972 
973 static void
pred_reachable(predicate * start,char * visited,predicate ** nodes,int * size)974 pred_reachable(predicate *start, char *visited, predicate **nodes, int *size)
975 { if ( !visited[start->label] )
976   { cell *c;
977 
978     visited[start->label] = TRUE;
979     nodes[(*size)++] = start;
980     for(c=start->subPropertyOf.head; c; c=c->next)
981       pred_reachable(c->value, visited, nodes, size);
982     for(c=start->siblings.head; c; c=c->next)
983       pred_reachable(c->value, visited, nodes, size);
984   }
985 }
986 
987 
988 static int
split_cloud(rdf_db * db,predicate_cloud * cloud,predicate_cloud ** parts,int size)989 split_cloud(rdf_db *db, predicate_cloud *cloud,
990 	    predicate_cloud **parts, int size)
991 { char *done        = alloca(cloud->size*sizeof(char));
992   predicate **graph = alloca(cloud->size*sizeof(predicate*));
993   int found = 0;
994   int i;
995 
996   memset(done, 0, cloud->size*sizeof(char));
997   for(i=0; i<cloud->size; i++)
998   { if ( !done[i] )
999     { predicate *start = cloud->members[i];
1000       predicate_cloud *new_cloud;
1001       int gsize = 0;
1002 
1003       pred_reachable(start, done, graph, &gsize);
1004       new_cloud = new_predicate_cloud(db, graph, gsize);
1005       if ( found == 0 )
1006       { new_cloud->hash = cloud->hash;
1007       } else
1008       { new_cloud->dirty = TRUE;	/* preds come from another cloud */
1009 	db->need_update++;
1010       }
1011       parts[found++] = new_cloud;
1012     }
1013   }
1014 
1015   free_predicate_cloud(db, cloud);
1016 
1017   return found;
1018 }
1019 
1020 
1021 static unsigned long
predicate_hash(predicate * p)1022 predicate_hash(predicate *p)
1023 { return p->hash;
1024 }
1025 
1026 
1027 static void
addSubPropertyOf(rdf_db * db,predicate * sub,predicate * super)1028 addSubPropertyOf(rdf_db *db, predicate *sub, predicate *super)
1029 { /*DEBUG(2, Sdprintf("addSubPropertyOf(%s, %s)\n", pname(sub), pname(super)));*/
1030 
1031   if ( add_list(db, &sub->subPropertyOf, super) )
1032   { add_list(db, &super->siblings, sub);
1033     merge_clouds(db, sub->cloud, super->cloud);
1034   }
1035 }
1036 
1037 
1038 /* deleting an rdfs:subPropertyOf.  This is a bit naughty.  If the
1039    cloud is still connected we only need to refresh the reachability
1040    matrix.  Otherwise the cloud breaks in maximum two clusters.  We
1041    can decide to leave it as is, which saves a re-hash of the triples
1042    but harms indexing.  Alternative we can create a new cloud for one
1043    of the clusters and re-hash.
1044 */
1045 
1046 static void
delSubPropertyOf(rdf_db * db,predicate * sub,predicate * super)1047 delSubPropertyOf(rdf_db *db, predicate *sub, predicate *super)
1048 { if ( del_list(db, &sub->subPropertyOf, super) )
1049   { del_list(db, &super->siblings, sub);
1050  /* if ( not worth the trouble )
1051       create_reachability_matrix(db, sub->cloud);
1052     else */
1053     { predicate_cloud *parts[2];
1054       split_cloud(db, sub->cloud, parts, 2);
1055     }
1056   }
1057 }
1058 
1059 
1060 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1061 Reachability matrix.
1062 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1063 
1064 #define WBITSIZE (sizeof(int)*8)
1065 
1066 static size_t
byte_size_bitmatrix(size_t w,size_t h)1067 byte_size_bitmatrix(size_t w, size_t h)
1068 { size_t wsize = ((w*h)+WBITSIZE-1)/WBITSIZE;
1069 
1070   return (size_t)(intptr_t)&((bitmatrix*)NULL)->bits[wsize];
1071 }
1072 
1073 
1074 static bitmatrix *
alloc_bitmatrix(rdf_db * db,size_t w,size_t h)1075 alloc_bitmatrix(rdf_db *db, size_t w, size_t h)
1076 { size_t size = byte_size_bitmatrix(w, h);
1077   bitmatrix *m = rdf_malloc(db, size);
1078 
1079   memset(m, 0, size);
1080   m->width = w;
1081   m->heigth = h;
1082 
1083   return m;
1084 }
1085 
1086 
1087 static void
free_bitmatrix(rdf_db * db,bitmatrix * bm)1088 free_bitmatrix(rdf_db *db, bitmatrix *bm)
1089 { size_t size = byte_size_bitmatrix(bm->width, bm->heigth);
1090 
1091   rdf_free(db, bm, size);
1092 }
1093 
1094 
1095 #undef setbit				/* conflict in HPUX 11.23 */
1096 
1097 static void
setbit(bitmatrix * m,int i,int j)1098 setbit(bitmatrix *m, int i, int j)
1099 { size_t ij = m->width*i+j;
1100   size_t word = ij/WBITSIZE;
1101   int bit  = ij%WBITSIZE;
1102 
1103   m->bits[word] |= 1<<bit;
1104 }
1105 
1106 
1107 static int
testbit(bitmatrix * m,int i,int j)1108 testbit(bitmatrix *m, int i, int j)
1109 { size_t ij = m->width*i+j;
1110   size_t word = ij/WBITSIZE;
1111   int bit  = ij%WBITSIZE;
1112 
1113   return ((m->bits[word] & (1<<bit)) != 0);
1114 }
1115 
1116 
1117 static int
label_predicate_cloud(predicate_cloud * cloud)1118 label_predicate_cloud(predicate_cloud *cloud)
1119 { predicate **p;
1120   int i;
1121 
1122   for(i=0, p=cloud->members; i<cloud->size; i++, p++)
1123     (*p)->label = i;
1124 
1125   return i;
1126 }
1127 
1128 
1129 static void
fill_reachable(bitmatrix * bm,predicate * p0,predicate * p)1130 fill_reachable(bitmatrix *bm, predicate *p0, predicate *p)
1131 { if ( !testbit(bm, p0->label, p->label) )
1132   { cell *c;
1133 
1134     DEBUG(1, Sdprintf("    Reachable [%s (%d)]\n", pname(p), p->label));
1135     setbit(bm, p0->label, p->label);
1136     for(c = p->subPropertyOf.head; c; c=c->next)
1137       fill_reachable(bm, p0, c->value);
1138   }
1139 }
1140 
1141 
1142 static void
create_reachability_matrix(rdf_db * db,predicate_cloud * cloud)1143 create_reachability_matrix(rdf_db *db, predicate_cloud *cloud)
1144 { bitmatrix *m = alloc_bitmatrix(db, cloud->size, cloud->size);
1145   predicate **p;
1146   int i;
1147 
1148   label_predicate_cloud(cloud);
1149   for(i=0, p=cloud->members; i<cloud->size; i++, p++)
1150   { DEBUG(1, Sdprintf("Reachability for %s (%d)\n", pname(*p), (*p)->label));
1151 
1152     fill_reachable(m, *p, *p);
1153   }
1154 
1155   if ( cloud->reachable )
1156     free_bitmatrix(db, cloud->reachable);
1157 
1158   cloud->reachable = m;
1159 }
1160 
1161 
1162 static int
isSubPropertyOf(predicate * sub,predicate * p)1163 isSubPropertyOf(predicate *sub, predicate *p)
1164 { if ( sub->cloud == p->cloud )
1165     return testbit(sub->cloud->reachable, sub->label, p->label);
1166 
1167   return FALSE;
1168 }
1169 
1170 		 /*******************************
1171 		 *   PRINT PREDICATE HIERARCHY	*
1172 		 *******************************/
1173 
1174 static int
check_predicate_cloud(predicate_cloud * c)1175 check_predicate_cloud(predicate_cloud *c)
1176 { predicate **p;
1177   int errors = 0;
1178   int i;
1179 
1180   DEBUG(1, if ( c->dirty ) Sdprintf("Cloud is dirty\n"));
1181 
1182   for(i=0, p=c->members; i<c->size; i++, p++)
1183   { if ( !c->dirty )
1184     { if ( (*p)->hash != c->hash )
1185       { Sdprintf("Hash of %s doesn't match cloud hash\n", pname(*p));
1186 	errors++;
1187       }
1188     }
1189     if ( (*p)->cloud != c )
1190     { Sdprintf("Wrong cloud of %s\n", pname(*p));
1191       errors++;
1192     }
1193   }
1194 
1195   return errors;
1196 }
1197 
1198 
1199 static void
print_reachability_cloud(predicate * p)1200 print_reachability_cloud(predicate *p)
1201 { int x, y;
1202   predicate_cloud *cloud = p->cloud;
1203 
1204   check_predicate_cloud(cloud);
1205 
1206   Sdprintf("Reachability matrix:\n");
1207   for(x=0; x<cloud->reachable->width; x++)
1208     Sdprintf("%d", x%10);
1209   Sdprintf("\n");
1210   for(y=0; y<cloud->reachable->heigth; y++)
1211   { for(x=0; x<cloud->reachable->width; x++)
1212     { if ( testbit(cloud->reachable, x, y) )
1213 	Sdprintf("X");
1214       else
1215 	Sdprintf(".");
1216     }
1217 
1218     Sdprintf(" %2d %s\n", y, PL_atom_chars(cloud->members[y]->name));
1219     assert(cloud->members[y]->label == y);
1220   }
1221 }
1222 
1223 
1224 static foreign_t
rdf_print_predicate_cloud(term_t t)1225 rdf_print_predicate_cloud(term_t t)
1226 { predicate *p;
1227   rdf_db *db = DB;
1228 
1229   if ( !get_predicate(db, t, &p) )
1230     return FALSE;
1231 
1232   print_reachability_cloud(p);
1233 
1234   return TRUE;
1235 }
1236 
1237 
1238 
1239 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1240 Branching  factors  are  crucial  in  ordering    the  statements  of  a
1241 conjunction. These functions compute  the   average  branching factor in
1242 both directions ("subject --> P  -->  object"   and  "object  -->  P -->
1243 subject") by determining the number of unique   values at either side of
1244 the predicate. This number  is  only   recomputed  if  it  is considered
1245 `dirty'.
1246 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1247 
1248 static int
update_predicate_counts(rdf_db * db,predicate * p,int which)1249 update_predicate_counts(rdf_db *db, predicate *p, int which)
1250 { long total = 0;
1251 
1252   if ( which == DISTINCT_DIRECT )
1253   { long changed = abs(p->triple_count - p->distinct_updated[DISTINCT_DIRECT]);
1254 
1255     if ( changed < p->distinct_updated[DISTINCT_DIRECT] )
1256       return TRUE;
1257 
1258     if ( p->triple_count == 0 )
1259     { p->distinct_count[which]    = 0;
1260       p->distinct_subjects[which] = 0;
1261       p->distinct_objects[which]  = 0;
1262 
1263       return TRUE;
1264     }
1265   } else
1266   { long changed = db->generation - p->distinct_updated[DISTINCT_SUB];
1267 
1268     if ( changed < p->distinct_count[DISTINCT_SUB] )
1269       return TRUE;
1270   }
1271 
1272   if ( !update_hash(db) )
1273     return FALSE;
1274 
1275   { atomset subject_set;
1276     atomset object_set;
1277     triple t;
1278     triple *byp;
1279 
1280     memset(&t, 0, sizeof(t));
1281     t.predicate.r = p;
1282     t.indexed |= BY_P;
1283 
1284     init_atomset(&subject_set);
1285     init_atomset(&object_set);
1286     for(byp = db->table[t.indexed][triple_hash(db, &t, t.indexed)];
1287 	byp;
1288 	byp = byp->next[t.indexed])
1289     { if ( !byp->erased && !byp->is_duplicate )
1290       { if ( (which == DISTINCT_DIRECT && byp->predicate.r == p) ||
1291 	     (which != DISTINCT_DIRECT && isSubPropertyOf(byp->predicate.r, p)) )
1292 	{ total++;
1293 	  add_atomset(&subject_set, byp->subject);
1294 	  add_atomset(&object_set, object_hash(byp)); /* NOTE: not exact! */
1295 	}
1296       }
1297     }
1298 
1299     p->distinct_count[which]    = total;
1300     p->distinct_subjects[which] = subject_set.tree.count;
1301     p->distinct_objects[which]  = object_set.tree.count;
1302 
1303     destroy_atomset(&subject_set);
1304     destroy_atomset(&object_set);
1305 
1306     if ( which == DISTINCT_DIRECT )
1307       p->distinct_updated[DISTINCT_DIRECT] = total;
1308     else
1309       p->distinct_updated[DISTINCT_SUB] = db->generation;
1310 
1311     DEBUG(1, Sdprintf("%s: distinct subjects (%s): %ld, objects: %ld\n",
1312 		      PL_atom_chars(p->name),
1313 		      (which == DISTINCT_DIRECT ? "rdf" : "rdfs"),
1314 		      p->distinct_subjects[which],
1315 		      p->distinct_objects[which]));
1316   }
1317 
1318   return TRUE;
1319 }
1320 
1321 
1322 static void
invalidate_distinct_counts(rdf_db * db)1323 invalidate_distinct_counts(rdf_db *db)
1324 { predicate **ht;
1325   int i;
1326 
1327   for(i=0,ht = db->pred_table; i<db->pred_table_size; i++, ht++)
1328   { predicate *p;
1329 
1330     for( p = *ht; p; p = p->next )
1331     { p->distinct_updated[DISTINCT_SUB] = 0;
1332       p->distinct_count[DISTINCT_SUB] = 0;
1333       p->distinct_subjects[DISTINCT_SUB] = 0;
1334       p->distinct_objects[DISTINCT_SUB] = 0;
1335     }
1336   }
1337 }
1338 
1339 
1340 static double
subject_branch_factor(rdf_db * db,predicate * p,int which)1341 subject_branch_factor(rdf_db *db, predicate *p, int which)
1342 { if ( !update_predicate_counts(db, p, which) )
1343     return FALSE;
1344 
1345   if ( p->distinct_subjects[which] == 0 )
1346     return 0.0;				/* 0 --> 0 */
1347 
1348   return (double)p->distinct_count[which] /
1349          (double)p->distinct_subjects[which];
1350 }
1351 
1352 
1353 static double
object_branch_factor(rdf_db * db,predicate * p,int which)1354 object_branch_factor(rdf_db *db, predicate *p, int which)
1355 { if ( !update_predicate_counts(db, p, which) )
1356     return FALSE;
1357 
1358   if ( p->distinct_objects[which] == 0 )
1359     return 0.0;				/* 0 --> 0 */
1360 
1361   return (double)p->distinct_count[which] /
1362          (double)p->distinct_objects[which];
1363 }
1364 
1365 
1366 
1367 
1368 		 /*******************************
1369 		 *	   NAMED GRAPHS		*
1370 		 *******************************/
1371 
1372 /* MT: all calls must be locked
1373 */
1374 
1375 static void
init_graph_table(rdf_db * db)1376 init_graph_table(rdf_db *db)
1377 { int bytes = sizeof(predicate*)*INITIAL_GRAPH_TABLE_SIZE;
1378 
1379   db->graph_table = rdf_malloc(db, bytes);
1380   memset(db->graph_table, 0, bytes);
1381   db->graph_table_size = INITIAL_GRAPH_TABLE_SIZE;
1382 }
1383 
1384 
1385 static graph *
lookup_graph(rdf_db * db,atom_t name,int create)1386 lookup_graph(rdf_db *db, atom_t name, int create)
1387 { int hash = atom_hash(name) % db->graph_table_size;
1388   graph *src;
1389 
1390   LOCK_MISC(db);
1391   for(src=db->graph_table[hash]; src; src = src->next)
1392   { if ( src->name == name )
1393     { UNLOCK_MISC(db);
1394       return src;
1395     }
1396   }
1397 
1398   if ( !create )
1399   { UNLOCK_MISC(db);
1400     return NULL;
1401   }
1402 
1403   src = rdf_malloc(db, sizeof(*src));
1404   memset(src, 0, sizeof(*src));
1405   src->name = name;
1406   src->md5 = TRUE;
1407   PL_register_atom(name);
1408   src->next = db->graph_table[hash];
1409   db->graph_table[hash] = src;
1410   UNLOCK_MISC(db);
1411 
1412   return src;
1413 }
1414 
1415 
1416 static void
erase_graphs(rdf_db * db)1417 erase_graphs(rdf_db *db)
1418 { graph **ht;
1419   int i;
1420 
1421   for(i=0,ht = db->graph_table; i<db->graph_table_size; i++, ht++)
1422   { graph *src, *n;
1423 
1424     for( src = *ht; src; src = n )
1425     { n = src->next;
1426 
1427       PL_unregister_atom(src->name);
1428       if ( src->source )
1429 	PL_unregister_atom(src->source);
1430       rdf_free(db, src, sizeof(*src));
1431     }
1432 
1433     *ht = NULL;
1434   }
1435 
1436   db->last_graph = NULL;
1437 }
1438 
1439 
1440 static void
register_graph(rdf_db * db,triple * t)1441 register_graph(rdf_db *db, triple *t)
1442 { graph *src;
1443 
1444   if ( !t->graph )
1445     return;
1446 
1447   if ( db->last_graph && db->last_graph->name == t->graph )
1448   { src = db->last_graph;
1449   } else
1450   { src = lookup_graph(db, t->graph, TRUE);
1451     db->last_graph = src;
1452   }
1453 
1454   src->triple_count++;
1455 #ifdef WITH_MD5
1456   if ( src->md5 )
1457   { md5_byte_t digest[16];
1458     md5_triple(t, digest);
1459     sum_digest(src->digest, digest);
1460   }
1461 #endif
1462 }
1463 
1464 
1465 static void
unregister_graph(rdf_db * db,triple * t)1466 unregister_graph(rdf_db *db, triple *t)
1467 { graph *src;
1468 
1469   if ( !t->graph )
1470     return;
1471 
1472   if ( db->last_graph && db->last_graph->name == t->graph )
1473   { src = db->last_graph;
1474   } else
1475   { src = lookup_graph(db, t->graph, TRUE);
1476     db->last_graph = src;
1477   }
1478 
1479   src->triple_count--;
1480 #ifdef WITH_MD5
1481   if ( src->md5 )
1482   { md5_byte_t digest[16];
1483     md5_triple(t, digest);
1484     dec_digest(src->digest, digest);
1485   }
1486 #endif
1487 }
1488 
1489 
1490 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1491 rdf_graphs_(-ListOfGraphs)
1492 
1493 Return a list holding the names  of   all  currently defined graphs. We
1494 return a list to avoid the need for complicated long locks.
1495 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1496 
1497 static foreign_t
rdf_graphs(term_t list)1498 rdf_graphs(term_t list)
1499 { int i;
1500   term_t tail = PL_copy_term_ref(list);
1501   term_t head = PL_new_term_ref();
1502   rdf_db *db = DB;
1503 
1504   if ( !RDLOCK(db) )
1505     return FALSE;
1506   for(i=0; i<db->graph_table_size; i++)
1507   { graph *src;
1508 
1509     for(src=db->graph_table[i]; src; src = src->next)
1510     { if ( !PL_unify_list(tail, head, tail) ||
1511 	   !PL_unify_atom(head, src->name) )
1512       { RDUNLOCK(db);
1513 	return FALSE;
1514       }
1515     }
1516   }
1517   RDUNLOCK(db);
1518 
1519   return PL_unify_nil(tail);
1520 }
1521 
1522 
1523 static foreign_t
rdf_graph_source(term_t graph_name,term_t source,term_t modified)1524 rdf_graph_source(term_t graph_name, term_t source, term_t modified)
1525 { atom_t gn;
1526   int rc = FALSE;
1527   rdf_db *db = DB;
1528 
1529   if ( !get_atom_or_var_ex(graph_name, &gn) )
1530     return FALSE;
1531 
1532   if ( gn )
1533   { graph *s;
1534 
1535     if ( !RDLOCK(db) )
1536       return FALSE;
1537     if ( (s = lookup_graph(db, gn, FALSE)) && s->source)
1538     { rc = ( PL_unify_atom(source, s->source) &&
1539 	     PL_unify_float(modified, s->modified) );
1540     }
1541     RDUNLOCK(db);
1542   } else
1543   { atom_t src;
1544 
1545     if ( get_atom_ex(source, &src) )
1546     { int i;
1547       graph **ht;
1548 
1549       if ( !RDLOCK(db) )
1550 	return FALSE;
1551 
1552       for(i=0,ht = db->graph_table; i<db->graph_table_size; i++, ht++)
1553       { graph *s;
1554 
1555 	for( s = *ht; s; s = s->next )
1556 	{ if ( s->source == src )
1557 	  { rc = ( PL_unify_atom(graph_name, s->name) &&
1558 		   PL_unify_float(modified, s->modified) );
1559 	  }
1560 	}
1561       }
1562 
1563       RDUNLOCK(db);
1564     }
1565   }
1566 
1567   return rc;
1568 }
1569 
1570 
1571 static foreign_t
rdf_set_graph_source(term_t graph_name,term_t source,term_t modified)1572 rdf_set_graph_source(term_t graph_name, term_t source, term_t modified)
1573 { atom_t gn, src;
1574   int rc = FALSE;
1575   rdf_db *db = DB;
1576   graph *s;
1577   double mtime;
1578 
1579   if ( !get_atom_ex(graph_name, &gn) ||
1580        !get_atom_ex(source, &src) ||
1581        !get_double_ex(modified, &mtime) )
1582     return FALSE;
1583 
1584   if ( !RDLOCK(db) )
1585     return FALSE;
1586   if ( (s = lookup_graph(db, gn, TRUE)) )
1587   { if ( s->source != src )
1588     { if ( s->source )
1589 	PL_unregister_atom(s->source);
1590       s->source = src;
1591       PL_register_atom(s->source);
1592     }
1593     s->modified = mtime;
1594     rc = TRUE;
1595   }
1596   RDUNLOCK(db);
1597 
1598   return rc;
1599 }
1600 
1601 
1602 static foreign_t
rdf_unset_graph_source(term_t graph_name)1603 rdf_unset_graph_source(term_t graph_name)
1604 { atom_t gn;
1605   rdf_db *db = DB;
1606   graph *s;
1607 
1608   if ( !get_atom_ex(graph_name, &gn) )
1609     return FALSE;
1610   if ( (s = lookup_graph(db, gn, TRUE)) )
1611   { if ( s->source )
1612     { PL_unregister_atom(s->source);
1613       s->source = 0;
1614     }
1615     s->modified = 0.0;
1616   }
1617   if ( !RDLOCK(db) )
1618     return FALSE;
1619 
1620   RDUNLOCK(db);
1621 
1622   return TRUE;
1623 }
1624 
1625 
1626 
1627 		 /*******************************
1628 		 *	     LITERALS		*
1629 		 *******************************/
1630 
1631 #define LITERAL_EX_MAGIC 0x2b97e881
1632 
1633 typedef struct literal_ex
1634 { literal  *literal;
1635   atom_info atom;
1636 #ifdef O_SECURE
1637   long	    magic;
1638 #endif
1639 } literal_ex;
1640 
1641 
1642 static inline void
prepare_literal_ex(literal_ex * lex)1643 prepare_literal_ex(literal_ex *lex)
1644 { SECURE(lex->magic = 0x2b97e881);
1645 
1646   if ( lex->literal->objtype == OBJ_STRING )
1647   { lex->atom.handle = lex->literal->value.string;
1648     lex->atom.resolved = FALSE;
1649   }
1650 }
1651 
1652 
1653 static literal *
new_literal(rdf_db * db)1654 new_literal(rdf_db *db)
1655 { literal *lit = rdf_malloc(db, sizeof(*lit));
1656   memset(lit, 0, sizeof(*lit));
1657   lit->references = 1;
1658 
1659   return lit;
1660 }
1661 
1662 
1663 static void
free_literal(rdf_db * db,literal * lit)1664 free_literal(rdf_db *db, literal *lit)
1665 { if ( --lit->references == 0 )
1666   { unlock_atoms_literal(lit);
1667 
1668     if ( lit->shared && !db->resetting )
1669     { literal_ex lex;
1670 
1671       lit->shared = FALSE;
1672       broadcast(EV_OLD_LITERAL, lit, NULL);
1673       DEBUG(2,
1674 	    Sdprintf("Delete %p from literal table: ", lit);
1675 	    print_literal(lit);
1676 	    Sdprintf("\n"));
1677 
1678       lex.literal = lit;
1679       prepare_literal_ex(&lex);
1680 
1681       if ( !avldel(&db->literals, &lex) )
1682       { Sdprintf("Failed to delete %p (size=%ld): ", lit, db->literals.count);
1683 	print_literal(lit);
1684 	Sdprintf("\n");
1685 	assert(0);
1686       }
1687     }
1688 
1689     if ( lit->objtype == OBJ_TERM &&
1690 	 lit->value.term.record )
1691     { if ( lit->term_loaded )
1692 	rdf_free(db, lit->value.term.record, lit->value.term.len);
1693       else
1694 	PL_erase_external(lit->value.term.record);
1695     }
1696     rdf_free(db, lit, sizeof(*lit));
1697   }
1698 }
1699 
1700 
1701 static literal *
copy_literal(rdf_db * db,literal * lit)1702 copy_literal(rdf_db *db, literal *lit)
1703 { lit->references++;
1704   return lit;
1705 }
1706 
1707 
1708 static void
alloc_literal_triple(rdf_db * db,triple * t)1709 alloc_literal_triple(rdf_db *db, triple *t)
1710 { if ( !t->object_is_literal )
1711   { t->object.literal = new_literal(db);
1712     t->object_is_literal = TRUE;
1713   }
1714 }
1715 
1716 
1717 static void
lock_atoms_literal(literal * lit)1718 lock_atoms_literal(literal *lit)
1719 { if ( !lit->atoms_locked )
1720   { lit->atoms_locked = TRUE;
1721 
1722     switch(lit->objtype)
1723     { case OBJ_STRING:
1724 	PL_register_atom(lit->value.string);
1725 	if ( lit->qualifier )
1726 	  PL_register_atom(lit->type_or_lang);
1727 	break;
1728     }
1729   }
1730 }
1731 
1732 
1733 static void
unlock_atoms_literal(literal * lit)1734 unlock_atoms_literal(literal *lit)
1735 { if ( lit->atoms_locked )
1736   { lit->atoms_locked = FALSE;
1737 
1738     switch(lit->objtype)
1739     { case OBJ_STRING:
1740 	PL_unregister_atom(lit->value.string);
1741 	if ( lit->qualifier )
1742 	  PL_unregister_atom(lit->type_or_lang);
1743 	break;
1744     }
1745   }
1746 }
1747 
1748 
1749 		 /*******************************
1750 		 *	     LITERAL DB		*
1751 		 *******************************/
1752 
1753 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1754 compare_literals() sorts literals.  Ordering is defined as:
1755 
1756 	* Numeric literals < string literals < term literals
1757 	* Numeric literals (int and float) are sorted by value
1758 	* String literals are sorted alhabetically
1759 		- case independent, but uppercase before lowercase
1760 		- locale (strcoll) sorting?
1761 		- delete dyadrics
1762 		- first on string, then on type, then on language
1763 	* Terms are sorted on Prolog standard order of terms
1764 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1765 
1766 static int
compare_literals(void * p1,void * p2,NODE type)1767 compare_literals(void *p1, void *p2, NODE type)
1768 { literal_ex *lex = p1;
1769   literal *l1 = lex->literal;
1770   literal *l2 = *(literal**)p2;
1771 
1772   SECURE(assert(lex->magic == LITERAL_EX_MAGIC));
1773 
1774   if ( l1->objtype == l2->objtype )
1775   { switch(l1->objtype)
1776     { case OBJ_INTEGER:
1777       { int64_t v1 = l1->value.integer;
1778 	int64_t v2 = l2->value.integer;
1779 	return v1 < v2 ? -1 : v1 > v2 ? 1 : 0;
1780       }
1781       case OBJ_DOUBLE:
1782       { double v1 = l1->value.real;
1783 	double v2 = l2->value.real;
1784 	return v1 < v2 ? -1 : v1 > v2 ? 1 : 0;
1785       }
1786       case OBJ_STRING:
1787       { int rc = cmp_atom_info(&lex->atom, l2->value.string);
1788 
1789 	if ( rc == 0 )
1790 	{ if ( l1->qualifier == l2->qualifier )
1791 	    return cmp_atoms(l1->type_or_lang, l2->type_or_lang);
1792 	  return l1->qualifier - l2->qualifier;
1793 	}
1794 	return rc;
1795       }
1796       case OBJ_TERM:
1797       { fid_t fid = PL_open_foreign_frame();
1798 	term_t t1 = PL_new_term_ref();
1799 	term_t t2 = PL_new_term_ref();
1800 	int rc;
1801 
1802 	PL_recorded_external(l1->value.term.record, t1); /* can also be handled in literal_ex */
1803 	PL_recorded_external(l2->value.term.record, t2);
1804 	rc = PL_compare(t1, t2);
1805 
1806 	PL_discard_foreign_frame(fid);
1807 	return rc;
1808       }
1809       default:
1810 	assert(0);
1811         return 0;
1812     }
1813   } else if ( l1->objtype == OBJ_INTEGER && l2->objtype == OBJ_DOUBLE )
1814   { double v1 = (double)l1->value.integer;
1815     double v2 = l2->value.real;
1816     return v1 < v2 ? -1 : v1 > v2 ? 1 : -1;
1817   } else if ( l1->objtype == OBJ_DOUBLE && l2->objtype == OBJ_INTEGER )
1818   { double v1 = l1->value.real;
1819     double v2 = (double)l2->value.integer;
1820     return v1 < v2 ? -1 : v1 > v2 ? 1 : 1;
1821   } else
1822   { return l1->objtype - l2->objtype;
1823   }
1824 }
1825 
1826 
1827 static void*
avl_malloc(void * ptr,size_t size)1828 avl_malloc(void *ptr, size_t size)
1829 { return rdf_malloc(ptr, size);
1830 }
1831 
1832 
1833 static void
avl_free(void * ptr,void * data,size_t size)1834 avl_free(void *ptr, void *data, size_t size)
1835 { rdf_free(ptr, data, size);
1836 }
1837 
1838 
1839 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1840 Create the sorted literal tree. Note  that   we  do  not register a free
1841 handler  for  the  tree  as  nodes   are  either  already  destroyed  by
1842 free_literal() or by rdf_reset_db().
1843 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1844 
1845 static void
init_literal_table(rdf_db * db)1846 init_literal_table(rdf_db *db)
1847 { avlinit(&db->literals,
1848 	  db, sizeof(literal*),
1849 	  compare_literals,
1850 	  NULL,
1851 	  avl_malloc,
1852 	  avl_free);
1853 }
1854 
1855 
1856 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1857 share_literal() takes a literal  and  replaces   it  with  one  from the
1858 literal database if there is a match.   On a match, the argument literal
1859 is destroyed. Without a match it adds   the  literal to the database and
1860 returns it.
1861 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1862 
1863 static literal *
share_literal(rdf_db * db,literal * from)1864 share_literal(rdf_db *db, literal *from)
1865 { literal **data;
1866   literal_ex lex;
1867 
1868   lex.literal = from;
1869   prepare_literal_ex(&lex);
1870 
1871   if ( (data = avlins(&db->literals, &lex)) )
1872   { literal *l2 = *data;
1873 
1874     DEBUG(2,
1875 	  Sdprintf("Replace %p by %p:\n", from, l2);
1876 	  Sdprintf("\tfrom: "); print_literal(from);
1877 	  Sdprintf("\n\tto: "); print_literal(l2);
1878 	  Sdprintf("\n"));
1879 
1880     l2->references++;
1881     free_literal(db, from);
1882 
1883     return l2;
1884   } else
1885   { DEBUG(2,
1886 	  Sdprintf("Insert %p into literal table: ", from);
1887 	  print_literal(from);
1888 	  Sdprintf("\n"));
1889 
1890     from->shared = TRUE;
1891     broadcast(EV_NEW_LITERAL, from, NULL);
1892     return from;
1893   }
1894 }
1895 
1896 
1897 #ifdef O_SECURE
1898 static literal **
add_literals(AVLtree node,literal ** p)1899 add_literals(AVLtree node, literal **p)
1900 { literal **litp;
1901 
1902   if ( node->subtree[LEFT] )
1903     p = add_literals(node->subtree[LEFT], p);
1904   litp = (literal**)node->data;
1905   *p++ = *litp;
1906   if ( node->subtree[RIGHT] )
1907     p = add_literals(node->subtree[RIGHT], p);
1908 
1909   return p;
1910 }
1911 
1912 
1913 static foreign_t
check_transitivity()1914 check_transitivity()
1915 { rdf_db *db = DB;
1916   literal **array = malloc(sizeof(literal*)*db->literals.count);
1917   literal **p = array;
1918   int i,j;
1919 
1920   add_literals(db->literals.root, p);
1921   Sdprintf("Checking %ld literals ...\n", db->literals.count);
1922 
1923   for(i=0; i<db->literals.count; i++)
1924   { int end;
1925 
1926     Sdprintf("\r%6ld", i);
1927     end = i+100;
1928     if ( end > db->literals.count )
1929       end = db->literals.count;
1930 
1931     for(j=i+1; j<end; j++)
1932     { literal_ex lex;
1933 
1934       lex.literal = &array[i];
1935       prepare_literal_ex(&lex);
1936 
1937       if ( compare_literals(&lex, &array[j], IS_NULL) >= 0 )
1938       { Sdprintf("\nERROR: i,j=%d,%d: ", i, j);
1939 	print_literal(array[i]);
1940 	Sdprintf(" >= ");
1941 	print_literal(array[j]);
1942 	Sdprintf("\n");
1943       }
1944     }
1945   }
1946 
1947   free(array);
1948 
1949   return TRUE;
1950 }
1951 
1952 
1953 static void
dump_lnode(AVLtree node)1954 dump_lnode(AVLtree node)
1955 { literal **litp;
1956 
1957   if ( node->subtree[LEFT] )
1958     dump_lnode(node->subtree[LEFT]);
1959   litp = (literal**)node->data;
1960   print_literal(*litp);
1961   Sdprintf("\n");
1962   if ( node->subtree[RIGHT] )
1963     dump_lnode(node->subtree[RIGHT]);
1964 }
1965 
1966 static foreign_t
dump_literals()1967 dump_literals()
1968 { rdf_db *db = DB;
1969 
1970   dump_lnode(db->literals.root);
1971   return TRUE;
1972 }
1973 #endif
1974 
1975 
1976 
1977 		 /*******************************
1978 		 *	      TRIPLES		*
1979 		 *******************************/
1980 
1981 static void
init_tables(rdf_db * db)1982 init_tables(rdf_db *db)
1983 { int i;
1984   int bytes = sizeof(triple*)*INITIAL_TABLE_SIZE;
1985   int cbytes = sizeof(int)*INITIAL_TABLE_SIZE;
1986 
1987   db->table[0] = &db->by_none;
1988   db->tail[0]  = &db->by_none_tail;
1989 
1990   for(i=BY_S; i<=BY_OP; i++)
1991   { if ( i == BY_SO )
1992       continue;
1993 
1994     db->table[i] = rdf_malloc(db, bytes);
1995     memset(db->table[i], 0, bytes);
1996     db->tail[i] = rdf_malloc(db, bytes);
1997     memset(db->tail[i], 0, bytes);
1998     db->counts[i] = rdf_malloc(db, cbytes);
1999     memset(db->counts[i], 0, cbytes);
2000     db->table_size[i] = INITIAL_TABLE_SIZE;
2001   }
2002 
2003   init_pred_table(db);
2004   init_graph_table(db);
2005   init_literal_table(db);
2006 }
2007 
2008 
2009 static rdf_db *
new_db()2010 new_db()
2011 { rdf_db *db = rdf_malloc(NULL, sizeof(*db));
2012 
2013   memset(db, 0, sizeof(*db));
2014   INIT_LOCK(db);
2015   init_tables(db);
2016 
2017   return db;
2018 }
2019 
2020 
2021 static triple *
new_triple(rdf_db * db)2022 new_triple(rdf_db *db)
2023 { triple *t = rdf_malloc(db, sizeof(*t));
2024   memset(t, 0, sizeof(*t));
2025   t->allocated = TRUE;
2026 
2027   return t;
2028 }
2029 
2030 
2031 static void
free_triple(rdf_db * db,triple * t)2032 free_triple(rdf_db *db, triple *t)
2033 { unlock_atoms(t);
2034 
2035   if ( t->object_is_literal && t->object.literal )
2036     free_literal(db, t->object.literal);
2037 
2038   if ( t->allocated )
2039     rdf_free(db, t, sizeof(*t));
2040 }
2041 
2042 
2043 #define HASHED 0x80000000
2044 
2045 static unsigned int
literal_hash(literal * lit)2046 literal_hash(literal *lit)
2047 { if ( lit->hash & HASHED )
2048   { return lit->hash;
2049   } else
2050   { unsigned int hash;
2051 
2052     switch(lit->objtype)
2053     { case OBJ_STRING:
2054 	hash = atom_hash_case(lit->value.string);
2055         break;
2056       case OBJ_INTEGER:
2057       case OBJ_DOUBLE:
2058 	hash = rdf_murmer_hash(&lit->value.integer,
2059 			       sizeof(lit->value.integer),
2060 			       MURMUR_SEED);
2061         break;
2062       case OBJ_TERM:
2063 	hash = rdf_murmer_hash(lit->value.term.record,
2064 			       (int)lit->value.term.len,
2065 			       MURMUR_SEED);
2066 	break;
2067       default:
2068 	assert(0);
2069 	return 0;
2070     }
2071 
2072     lit->hash = (hash | HASHED);
2073     return lit->hash;
2074   }
2075 }
2076 
2077 
2078 static unsigned long
object_hash(triple * t)2079 object_hash(triple *t)
2080 { if ( t->object_is_literal )
2081   { return literal_hash(t->object.literal);
2082   } else
2083   { return atom_hash(t->object.resource);
2084   }
2085 }
2086 
2087 
2088 static int
triple_hash(rdf_db * db,triple * t,int which)2089 triple_hash(rdf_db *db, triple *t, int which)
2090 { unsigned long v;
2091 
2092   switch(which)
2093   { case BY_NONE:
2094       return 0;
2095     case BY_S:
2096       v = atom_hash(t->subject);
2097       break;
2098     case BY_P:
2099       v = predicate_hash(t->predicate.r);
2100       break;
2101     case BY_O:
2102       v = object_hash(t);
2103       break;
2104     case BY_SP:
2105       v = atom_hash(t->subject) ^ predicate_hash(t->predicate.r);
2106       break;
2107     case BY_OP:
2108       v = predicate_hash(t->predicate.r) ^ object_hash(t);
2109       break;
2110     default:
2111       v = 0;				/* make compiler silent */
2112       assert(0);
2113   }
2114 
2115   return (int)(v % (long)db->table_size[which]);
2116 }
2117 
2118 
2119 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2120 by_inverse[] returns the index key to use   for inverse search as needed
2121 to realise symmetric and inverse predicates.
2122 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2123 
2124 static int by_inverse[8] =
2125 { BY_NONE,				/* BY_NONE = 0 */
2126   BY_O,					/* BY_S    = 1 */
2127   BY_P,					/* BY_P    = 2 */
2128   BY_OP,				/* BY_SP   = 3 */
2129   BY_S,					/* BY_O    = 4 */
2130   BY_SO,				/* BY_SO   = 5 */
2131   BY_SP,				/* BY_OP   = 6 */
2132   BY_SPO,				/* BY_SPO  = 7 */
2133 };
2134 
2135 
2136 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2137 triple *first(atom_t subject)
2138     Find the first triple on subject.  The first is marked to generate a
2139     unique subjects quickly;
2140 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2141 
2142 static triple *
first(rdf_db * db,atom_t subject)2143 first(rdf_db *db, atom_t subject)
2144 { triple *t, tmp;
2145   int hash;
2146 
2147   tmp.subject = subject;
2148   hash = triple_hash(db, &tmp, BY_S);
2149 
2150   for(t=db->table[BY_S][hash]; t; t = t->next[BY_S])
2151   { if ( t->subject == subject && !t->erased )
2152       return t;
2153   }
2154 
2155   return NULL;
2156 }
2157 
2158 
2159 static void
link_triple_hash(rdf_db * db,triple * t)2160 link_triple_hash(rdf_db *db, triple *t)
2161 { int i;
2162 
2163   for(i=1; i<=BY_OP; i++)
2164   { if ( db->table[i] )
2165     { int hash = triple_hash(db, t, i);
2166 
2167       if ( db->tail[i][hash] )
2168       { db->tail[i][hash]->next[i] = t;
2169       } else
2170       { db->table[i][hash] = t;
2171       }
2172       db->tail[i][hash] = t;
2173       db->counts[i][hash]++;
2174     }
2175   }
2176 }
2177 
2178 
2179 typedef enum
2180 { DUP_NONE,
2181   DUP_DUPLICATE,
2182   DUP_DISCARDED
2183 } dub_state;
2184 
2185 
2186 static dub_state
discard_duplicate(rdf_db * db,triple * t)2187 discard_duplicate(rdf_db *db, triple *t)
2188 { triple *d;
2189   const int indexed = BY_SP;
2190   dub_state rc = DUP_NONE;
2191 
2192   assert(t->is_duplicate == FALSE);
2193   assert(t->duplicates == 0);
2194 
2195   if ( WANT_GC(db) )			/* (*) See above */
2196     update_hash(db);
2197   d = db->table[indexed][triple_hash(db, t, indexed)];
2198   for( ; d && d != t; d = d->next[indexed] )
2199   { if ( match_triples(d, t, MATCH_DUPLICATE) )
2200     { if ( d->graph == t->graph &&
2201 	   (d->line == NO_LINE || d->line == t->line) )
2202       { free_triple(db, t);
2203 
2204 	return DUP_DISCARDED;
2205       }
2206 
2207       rc = DUP_DUPLICATE;
2208     }
2209   }
2210 
2211   return rc;
2212 }
2213 
2214 
2215 /* MT: must be locked by caller */
2216 
2217 static int
link_triple_silent(rdf_db * db,triple * t)2218 link_triple_silent(rdf_db *db, triple *t)
2219 { triple *one;
2220   dub_state dup;
2221 
2222   if ( t->resolve_pred )
2223   { t->predicate.r = lookup_predicate(db, t->predicate.u);
2224     t->resolve_pred = FALSE;
2225   }
2226 
2227   if ( (dup=discard_duplicate(db, t)) == DUP_DISCARDED )
2228     return FALSE;
2229 
2230   if ( db->by_none_tail )
2231     db->by_none_tail->next[BY_NONE] = t;
2232   else
2233     db->by_none = t;
2234   db->by_none_tail = t;
2235 
2236   link_triple_hash(db, t);
2237   if ( t->object_is_literal )
2238     t->object.literal = share_literal(db, t->object.literal);
2239 
2240   if ( dup == DUP_DUPLICATE && update_duplicates_add(db, t) )
2241     goto ok;				/* is a duplicate */
2242 
2243 					/* keep track of subjects */
2244   one = first(db, t->subject);
2245   if ( !one->first )
2246   { one->first = TRUE;
2247     db->subjects++;
2248   }
2249 
2250 					/* keep track of subPropertyOf */
2251   if ( t->predicate.r->name == ATOM_subPropertyOf &&
2252        t->object_is_literal == FALSE )
2253   { predicate *me    = lookup_predicate(db, t->subject);
2254     predicate *super = lookup_predicate(db, t->object.resource);
2255 
2256     addSubPropertyOf(db, me, super);
2257   }
2258 
2259 ok:
2260   db->created++;
2261   t->predicate.r->triple_count++;
2262   register_graph(db, t);
2263 
2264   return TRUE;
2265 }
2266 
2267 
2268 static inline void
link_triple(rdf_db * db,triple * t)2269 link_triple(rdf_db *db, triple *t)
2270 { if ( link_triple_silent(db, t) )
2271     broadcast(EV_ASSERT, t, NULL);
2272 }
2273 
2274 
2275 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2276 rehash_triples()
2277 
2278 Relink the triples in the hash-chains after the hash-keys for properties
2279 have changed or the tables have  been   resized.  The caller must ensure
2280 there are no active queries and the tables are of the proper size.
2281 
2282 At the same time, this predicate actually removes erased triples.
2283 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2284 
2285 static long
tbl_size(long triples)2286 tbl_size(long triples)
2287 { long s0 = 1024;
2288 
2289   triples /= MIN_HASH_FACTOR;
2290 
2291   while(s0 < triples)
2292     s0 *= 2;
2293 
2294   return s0;
2295 }
2296 
2297 
2298 static void
rehash_triples(rdf_db * db)2299 rehash_triples(rdf_db *db)
2300 { int i;
2301   triple *t, *t2;
2302   long count = db->created - db->freed;
2303   long tsize = tbl_size(count);
2304 
2305   DEBUG(1, Sdprintf("(%ld triples; %ld entries) ...", count, tsize));
2306   broadcast(EV_REHASH, (void*)ATOM_begin, NULL);
2307 
2308   for(i=1; i<INDEX_TABLES; i++)
2309   { if ( db->table[i] )
2310     { long bytes   = sizeof(triple*) * tsize;
2311       long cbytes  = sizeof(int)     * tsize;
2312       long obytes  = sizeof(triple*) * db->table_size[i];
2313       long ocbytes = sizeof(int)     * db->table_size[i];
2314 
2315       db->table[i]  = rdf_realloc(db, db->table[i],  obytes,  bytes);
2316       db->tail[i]   = rdf_realloc(db, db->tail[i],   obytes,  bytes);
2317       db->counts[i] = rdf_realloc(db, db->counts[i], ocbytes, cbytes);
2318       db->table_size[i] = tsize;
2319 
2320       memset(db->table[i],  0, bytes);
2321       memset(db->tail[i],   0, bytes);
2322       memset(db->counts[i], 0, cbytes);
2323     }
2324   }
2325 
2326 					/* delete leading erased triples */
2327   for(t=db->by_none; t && t->erased; t=t2)
2328   { t2 = t->next[BY_NONE];
2329 
2330     free_triple(db, t);
2331     db->freed++;
2332 
2333     db->by_none = t2;
2334   }
2335 
2336   for(t=db->by_none; t; t = t2)
2337   { triple *t3;
2338 
2339     t2 = t->next[BY_NONE];
2340 
2341     for(i=1; i<INDEX_TABLES; i++)
2342       t->next[i] = NULL;
2343 
2344     assert(t->erased == FALSE);
2345     link_triple_hash(db, t);
2346 
2347     for( ; t2 && t2->erased; t2=t3 )
2348     { t3 = t2->next[BY_NONE];
2349 
2350       free_triple(db, t2);
2351       db->freed++;
2352     }
2353 
2354     t->next[BY_NONE] = t2;
2355     if ( !t2 )
2356       db->by_none_tail = t;
2357   }
2358 
2359   if ( db->by_none == NULL )
2360     db->by_none_tail = NULL;
2361 
2362   broadcast(EV_REHASH, (void*)ATOM_end, NULL);
2363 }
2364 
2365 
2366 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2367 update_hash(). Note this may be called by  readers and writers, but must
2368 be done only onces and certainly   not concurrently by multiple readers.
2369 Hence we need a seperate lock.
2370 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2371 
2372 static int
WANT_GC(rdf_db * db)2373 WANT_GC(rdf_db *db)
2374 { if ( db->gc_blocked )
2375   { return FALSE;
2376   } else
2377   { long dirty = db->erased - db->freed;
2378     long count = db->created - db->erased;
2379 
2380     if ( dirty > 1000 && dirty > count )
2381       return TRUE;
2382     if ( count > db->table_size[1]*MAX_HASH_FACTOR )
2383       return TRUE;
2384 
2385     return FALSE;
2386   }
2387 }
2388 
2389 
2390 static int
update_hash(rdf_db * db)2391 update_hash(rdf_db *db)
2392 { int want_gc = WANT_GC(db);
2393 
2394   if ( want_gc )
2395     DEBUG(1, Sdprintf("rdf_db: want GC\n"));
2396 
2397   if ( db->need_update || want_gc )
2398   { LOCK_MISC(db);
2399 
2400     if ( db->need_update )		/* check again */
2401     { if ( organise_predicates(db) )
2402       { long t0 = (long)PL_query(PL_QUERY_USER_CPU);
2403 
2404 	DEBUG(1, Sdprintf("Re-hash ..."));
2405 	invalidate_distinct_counts(db);
2406 	rehash_triples(db);
2407 	db->generation += (db->created-db->erased);
2408 	db->rehash_count++;
2409 	db->rehash_time += ((double)(PL_query(PL_QUERY_USER_CPU)-t0))/1000.0;
2410 	DEBUG(1, Sdprintf("ok\n"));
2411       }
2412       db->need_update = 0;
2413     } else if ( WANT_GC(db) )
2414     { long t0 = (long)PL_query(PL_QUERY_USER_CPU);
2415 
2416       DEBUG(1, Sdprintf("rdf_db: GC ..."));
2417       rehash_triples(db);
2418       db->gc_count++;
2419       db->gc_time += ((double)(PL_query(PL_QUERY_USER_CPU)-t0))/1000.0;
2420       DEBUG(1, Sdprintf("ok\n"));
2421     }
2422 
2423     UNLOCK_MISC(db);
2424   }
2425 
2426   return TRUE;
2427 }
2428 
2429 
2430 /* MT: Must be locked */
2431 
2432 static void
erase_triple_silent(rdf_db * db,triple * t)2433 erase_triple_silent(rdf_db *db, triple *t)
2434 { if ( !t->erased )
2435   { t->erased = TRUE;
2436 
2437     update_duplicates_del(db, t);
2438 
2439     if ( t->predicate.r->name == ATOM_subPropertyOf &&
2440 	 t->object_is_literal == FALSE )
2441     { predicate *me    = lookup_predicate(db, t->subject);
2442       predicate *super = lookup_predicate(db, t->object.resource);
2443 
2444       delSubPropertyOf(db, me, super);
2445     }
2446 
2447     if ( t->first )
2448     { triple *one = first(db, t->subject);
2449 
2450       if ( one )
2451 	one->first = TRUE;
2452       else
2453 	db->subjects--;
2454     }
2455     db->erased++;
2456     t->predicate.r->triple_count--;
2457     unregister_graph(db, t);
2458 
2459     if ( t->object_is_literal )
2460     { literal *lit = t->object.literal;
2461 
2462       t->object.literal = NULL;
2463       free_literal(db, lit);		/* TBD: thread-safe? */
2464     }
2465   }
2466 }
2467 
2468 
2469 static inline void
erase_triple(rdf_db * db,triple * t)2470 erase_triple(rdf_db *db, triple *t)
2471 { broadcast(EV_RETRACT, t, NULL);
2472   erase_triple_silent(db, t);
2473 }
2474 
2475 
2476 static int
match_object(triple * t,triple * p,unsigned flags)2477 match_object(triple *t, triple *p, unsigned flags)
2478 { if ( p->object_is_literal )
2479   { if ( t->object_is_literal )
2480     { literal *plit = p->object.literal;
2481       literal *tlit = t->object.literal;
2482 
2483       if ( !plit->objtype && !plit->qualifier )
2484 	return TRUE;
2485 
2486       if ( plit->objtype && plit->objtype != tlit->objtype )
2487 	return FALSE;
2488 
2489       switch( plit->objtype )
2490       { case 0:
2491 	  if ( plit->qualifier &&
2492 	       tlit->qualifier != plit->qualifier )
2493 	    return FALSE;
2494 	  return TRUE;
2495 	case OBJ_STRING:
2496 	  if ( (flags & MATCH_QUAL) ||
2497 	       p->match == STR_MATCH_PLAIN )
2498 	  { if ( tlit->qualifier != plit->qualifier )
2499 	      return FALSE;
2500 	  } else
2501 	  { if ( plit->qualifier && tlit->qualifier &&
2502 		 tlit->qualifier != plit->qualifier )
2503 	      return FALSE;
2504 	  }
2505 	  if ( plit->type_or_lang &&
2506 	       tlit->type_or_lang != plit->type_or_lang )
2507 	    return FALSE;
2508 	  if ( plit->value.string )
2509 	  { if ( tlit->value.string != plit->value.string )
2510 	    { if ( p->match >= STR_MATCH_EXACT )
2511 	      { return match_atoms(p->match,
2512 				   plit->value.string, tlit->value.string);
2513 	      } else
2514 	      { return FALSE;
2515 	      }
2516 	    }
2517 	  }
2518 	  return TRUE;
2519 	case OBJ_INTEGER:
2520 	  return tlit->value.integer == plit->value.integer;
2521 	case OBJ_DOUBLE:
2522 	  return tlit->value.real == plit->value.real;
2523 	case OBJ_TERM:
2524 	  if ( plit->value.term.record &&
2525 	       plit->value.term.len != tlit->value.term.len )
2526 	    return FALSE;
2527 	  return memcmp(tlit->value.term.record, plit->value.term.record,
2528 			plit->value.term.len) == 0;
2529 	default:
2530 	  assert(0);
2531       }
2532     }
2533     return FALSE;
2534   } else
2535   { if ( p->object.resource )
2536     { if ( t->object_is_literal ||
2537 	   (p->object.resource != t->object.resource) )
2538 	return FALSE;
2539     }
2540   }
2541 
2542   return TRUE;
2543 }
2544 
2545 
2546 
2547 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2548 Match triple t to pattern p.  Erased triples are always skipped.
2549 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2550 
2551 static int
match_triples(triple * t,triple * p,unsigned flags)2552 match_triples(triple *t, triple *p, unsigned flags)
2553 { /* DEBUG(3, Sdprintf("match_triple(");
2554 	   print_triple(t, 0);
2555 	   Sdprintf(")\n"));
2556   */
2557 
2558   if ( t->erased )
2559     return FALSE;
2560   if ( p->subject && t->subject != p->subject )
2561     return FALSE;
2562   if ( !match_object(t, p, flags) )
2563     return FALSE;
2564   if ( flags & MATCH_SRC )
2565   { if ( p->graph && t->graph != p->graph )
2566       return FALSE;
2567     if ( p->line && t->line != p->line )
2568       return FALSE;
2569   }
2570 					/* last; may be expensive */
2571   if ( p->predicate.r && t->predicate.r != p->predicate.r )
2572   { if ( (flags & MATCH_SUBPROPERTY) )
2573       return isSubPropertyOf(t->predicate.r, p->predicate.r);
2574     else
2575       return FALSE;
2576   }
2577   return TRUE;
2578 }
2579 
2580 
2581 		 /*******************************
2582 		 *	      SAVE/LOAD		*
2583 		 *******************************/
2584 
2585 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2586 The RDF triple format.  This format is intended for quick save and load
2587 and not for readability or exchange.  Parts are based on the SWI-Prolog
2588 Quick Load Format (implemented in pl-wic.c).
2589 
2590 	<file> 		::= <magic>
2591 			    <version>
2592 			    ['S' <graph-name>]
2593 			    ['F' <graph-source>]
2594 		            ['t' <modified>]
2595 			    ['M' <md5>]
2596 			    {<triple>}
2597 			    'E'
2598 
2599 	<magic> 	::= "RDF-dump\n"
2600 	<version> 	::= <integer>
2601 
2602 	<md5>		::= <byte>* 		(16 bytes digest)
2603 
2604 	<triple>	::= 'T'
2605 	                    <subject>
2606 			    <predicate>
2607 			    <object>
2608 			    <graph>
2609 
2610 	<subject>	::= <resource>
2611 	<predicate>	::= <resource>
2612 
2613 	<object>	::= "R" <resource>
2614 			  | "L" <atom>
2615 
2616 	<resource>	::= <atom>
2617 
2618 	<atom>		::= "X" <integer>
2619 			    "A" <string>
2620 			    "W" <utf-8 string>
2621 
2622 	<string>	::= <integer><bytes>
2623 
2624 	<graph-name>	::= <atom>
2625 	<graph-source>	::= <atom>
2626 
2627 	<graph>	::= <graph-file>
2628 			    <line>
2629 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2630 
2631 #define SAVE_MAGIC "RDF-dump\n"
2632 #define SAVE_VERSION 2
2633 
2634 typedef struct saved
2635 { atom_t name;
2636   long   as;
2637   struct saved *next;
2638 } saved;
2639 
2640 
2641 typedef struct save_context
2642 { saved ** saved_table;
2643   long     saved_size;
2644   long     saved_id;
2645 } save_context;
2646 
2647 
2648 long
next_table_size(long s0)2649 next_table_size(long s0)
2650 { long size = 2;
2651 
2652   while(size < s0)
2653     size *= 2;
2654 
2655   return size;
2656 }
2657 
2658 static void
init_saved(rdf_db * db,save_context * ctx)2659 init_saved(rdf_db *db, save_context *ctx)
2660 { long size = next_table_size((db->created - db->erased)/8);
2661   long bytes = size * sizeof(*ctx->saved_table);
2662 
2663   ctx->saved_table = rdf_malloc(db, bytes);
2664   memset(ctx->saved_table, 0, bytes);
2665   ctx->saved_size = size;
2666   ctx->saved_id = 0;
2667 }
2668 
2669 static void
destroy_saved(rdf_db * db,save_context * ctx)2670 destroy_saved(rdf_db *db, save_context *ctx)
2671 { if ( ctx->saved_table )
2672   { saved **s = ctx->saved_table;
2673     int i;
2674 
2675     for(i=0; i<ctx->saved_size; i++, s++)
2676     { saved *c, *n;
2677 
2678       for(c=*s; c; c = n)
2679       { n = c->next;
2680 	free(c);
2681       }
2682     }
2683 
2684     rdf_free(db, ctx->saved_table, ctx->saved_size*sizeof(*ctx->saved_table));
2685   }
2686 }
2687 
2688 #define INT64BITSIZE (sizeof(int64_t)*8)
2689 #define PLMINLONG   ((int64_t)((uint64_t)1<<(INT64BITSIZE-1)))
2690 
2691 static void
save_int(IOSTREAM * fd,int64_t n)2692 save_int(IOSTREAM *fd, int64_t n)
2693 { int m;
2694   int64_t absn = (n >= 0 ? n : -n);
2695 
2696   if ( n != PLMINLONG )
2697   { if ( absn < ((intptr_t)1 << 5) )
2698     { Sputc((int)(n & 0x3f), fd);
2699       return;
2700     } else if ( absn < ((intptr_t)1 << 13) )
2701     { Sputc((int)(((n >> 8) & 0x3f) | (1 << 6)), fd);
2702       Sputc((int)(n & 0xff), fd);
2703       return;
2704     } else if ( absn < ((intptr_t)1 << 21) )
2705     { Sputc((int)(((n >> 16) & 0x3f) | (2 << 6)), fd);
2706       Sputc((int)((n >> 8) & 0xff), fd);
2707       Sputc((int)(n & 0xff), fd);
2708       return;
2709     }
2710   }
2711 
2712   for(m = sizeof(n); ; m--)
2713   { int b = (int)((absn >> (((m-1)*8)-1)) & 0x1ff);
2714 
2715     if ( b == 0 )
2716       continue;
2717     break;
2718   }
2719 
2720   Sputc(m | (3 << 6), fd);
2721 
2722   for( ; m > 0; m--)
2723   { int b = (int)((n >> ((m-1)*8)) & 0xff);
2724 
2725     Sputc(b, fd);
2726   }
2727 }
2728 
2729 
2730 #define BYTES_PER_DOUBLE sizeof(double)
2731 #ifdef WORDS_BIGENDIAN
2732 static const int double_byte_order[] = { 7,6,5,4,3,2,1,0 };
2733 #else
2734 static const int double_byte_order[] = { 0,1,2,3,4,5,6,7 };
2735 #endif
2736 
2737 static int
save_double(IOSTREAM * fd,double f)2738 save_double(IOSTREAM *fd, double f)
2739 { unsigned char *cl = (unsigned char *)&f;
2740   unsigned int i;
2741 
2742   for(i=0; i<BYTES_PER_DOUBLE; i++)
2743     Sputc(cl[double_byte_order[i]], fd);
2744 
2745   return TRUE;
2746 }
2747 
2748 
2749 static int
save_atom(rdf_db * db,IOSTREAM * out,atom_t a,save_context * ctx)2750 save_atom(rdf_db *db, IOSTREAM *out, atom_t a, save_context *ctx)
2751 { int hash = atom_hash(a) % ctx->saved_size;
2752   saved *s;
2753   size_t len;
2754   const char *chars;
2755   unsigned int i;
2756   const wchar_t *wchars;
2757 
2758   for(s=ctx->saved_table[hash]; s; s= s->next)
2759   { if ( s->name == a )
2760     { Sputc('X', out);
2761       save_int(out, s->as);
2762 
2763       return TRUE;
2764     }
2765   }
2766 
2767   s = rdf_malloc(db, sizeof(*s));
2768   s->name = a;
2769   s->as = ctx->saved_id++;
2770   s->next = ctx->saved_table[hash];
2771   ctx->saved_table[hash] = s;
2772 
2773   if ( (chars = PL_atom_nchars(a, &len)) )
2774   { Sputc('A', out);
2775     save_int(out, len);
2776     for(i=0; i<len; i++, chars++)
2777       Sputc(*chars&0xff, out);
2778   } else if ( (wchars = PL_atom_wchars(a, &len)) )
2779   { IOENC enc = out->encoding;
2780 
2781     Sputc('W', out);
2782     save_int(out, len);
2783     out->encoding = ENC_UTF8;
2784     for(i=0; i<len; i++, wchars++)
2785     { wint_t c = *wchars;
2786 
2787       SECURE(assert(c>=0 && c <= 0x10ffff));
2788       Sputcode(c, out);
2789     }
2790     out->encoding = enc;
2791   } else
2792     return FALSE;
2793 
2794   return TRUE;
2795 }
2796 
2797 
2798 static void
write_triple(rdf_db * db,IOSTREAM * out,triple * t,save_context * ctx)2799 write_triple(rdf_db *db, IOSTREAM *out, triple *t, save_context *ctx)
2800 { Sputc('T', out);
2801 
2802   save_atom(db, out, t->subject, ctx);
2803   save_atom(db, out, t->predicate.r->name, ctx);
2804 
2805   if ( t->object_is_literal )
2806   { literal *lit = t->object.literal;
2807 
2808     if ( lit->qualifier )
2809     { assert(lit->type_or_lang);
2810       Sputc(lit->qualifier == Q_LANG ? 'l' : 't', out);
2811       save_atom(db, out, lit->type_or_lang, ctx);
2812     }
2813 
2814     switch(lit->objtype)
2815     { case OBJ_STRING:
2816 	Sputc('L', out);
2817 	save_atom(db, out, lit->value.string, ctx);
2818 	break;
2819       case OBJ_INTEGER:
2820 	Sputc('I', out);
2821 	save_int(out, lit->value.integer);
2822 	break;
2823       case OBJ_DOUBLE:
2824       {	Sputc('F', out);
2825 	save_double(out, lit->value.real);
2826 	break;
2827       }
2828       case OBJ_TERM:
2829       { const char *s = lit->value.term.record;
2830 	size_t len = lit->value.term.len;
2831 
2832 	Sputc('T', out);
2833 	save_int(out, len);
2834 	while(len-- > 0)
2835 	  Sputc(*s++, out);
2836 
2837 	break;
2838       }
2839       default:
2840 	assert(0);
2841     }
2842   } else
2843   { Sputc('R', out);
2844     save_atom(db, out, t->object.resource, ctx);
2845   }
2846 
2847   save_atom(db, out, t->graph, ctx);
2848   save_int(out, t->line);
2849 }
2850 
2851 
2852 static void
write_source(rdf_db * db,IOSTREAM * out,atom_t src,save_context * ctx)2853 write_source(rdf_db *db, IOSTREAM *out, atom_t src, save_context *ctx)
2854 { graph *s = lookup_graph(db, src, FALSE);
2855 
2856   if ( s && s->source )
2857   { Sputc('F', out);
2858     save_atom(db, out, s->source, ctx);
2859     Sputc('t', out);
2860     save_double(out, s->modified);
2861   }
2862 }
2863 
2864 
2865 static void
write_md5(rdf_db * db,IOSTREAM * out,atom_t src)2866 write_md5(rdf_db *db, IOSTREAM *out, atom_t src)
2867 { graph *s = lookup_graph(db, src, FALSE);
2868 
2869   if ( s )
2870   { md5_byte_t *p = s->digest;
2871     int i;
2872 
2873     Sputc('M', out);
2874     for(i=0; i<16; i++)
2875       Sputc(*p++, out);
2876   }
2877 }
2878 
2879 
2880 static int
save_db(rdf_db * db,IOSTREAM * out,atom_t src)2881 save_db(rdf_db *db, IOSTREAM *out, atom_t src)
2882 { triple *t;
2883   save_context ctx;
2884 
2885   if ( !RDLOCK(db) )
2886     return FALSE;
2887   init_saved(db, &ctx);
2888 
2889   Sfprintf(out, "%s", SAVE_MAGIC);
2890   save_int(out, SAVE_VERSION);
2891   if ( src )
2892   { Sputc('S', out);			/* start of graph header */
2893     save_atom(db, out, src, &ctx);
2894     write_source(db, out, src, &ctx);
2895     write_md5(db, out, src);
2896   }
2897   if ( Sferror(out) )
2898   { RDUNLOCK(db);
2899     return FALSE;
2900   }
2901 
2902   for(t = db->by_none; t; t = t->next[BY_NONE])
2903   { if ( !t->erased &&
2904 	 (!src || t->graph == src) )
2905     { write_triple(db, out, t, &ctx);
2906       if ( Sferror(out) )
2907 	return FALSE;
2908     }
2909   }
2910   Sputc('E', out);
2911   if ( Sferror(out) )
2912   { RDUNLOCK(db);
2913     return FALSE;
2914   }
2915 
2916   destroy_saved(db, &ctx);
2917   RDUNLOCK(db);
2918 
2919   return TRUE;
2920 }
2921 
2922 
2923 static foreign_t
rdf_save_db(term_t stream,term_t graph)2924 rdf_save_db(term_t stream, term_t graph)
2925 { IOSTREAM *out;
2926   atom_t src;
2927 
2928   if ( !PL_get_stream_handle(stream, &out) )
2929     return type_error(stream, "stream");
2930   if ( !get_atom_or_var_ex(graph, &src) )
2931     return FALSE;
2932 
2933   return save_db(DB, out, src);
2934 }
2935 
2936 
2937 static int64_t
load_int(IOSTREAM * fd)2938 load_int(IOSTREAM *fd)
2939 { int64_t first = Sgetc(fd);
2940   int bytes, shift, b;
2941 
2942   if ( !(first & 0xc0) )		/* 99% of them: speed up a bit */
2943   { first <<= (INT64BITSIZE-6);
2944     first >>= (INT64BITSIZE-6);
2945 
2946     return first;
2947   }
2948 
2949   bytes = (int) ((first >> 6) & 0x3);
2950   first &= 0x3f;
2951 
2952   if ( bytes <= 2 )
2953   { for( b = 0; b < bytes; b++ )
2954     { first <<= 8;
2955       first |= Sgetc(fd) & 0xff;
2956     }
2957 
2958     shift = (sizeof(first)-1-bytes)*8 + 2;
2959   } else
2960   { int m;
2961 
2962     bytes = (int)first;
2963     first = 0L;
2964 
2965     for(m=0; m<bytes; m++)
2966     { first <<= 8;
2967       first |= Sgetc(fd) & 0xff;
2968     }
2969     shift = (sizeof(first)-bytes)*8;
2970   }
2971 
2972   first <<= shift;
2973   first >>= shift;
2974 
2975   return first;
2976 }
2977 
2978 
2979 static int
load_double(IOSTREAM * fd,double * fp)2980 load_double(IOSTREAM *fd, double *fp)
2981 { double f;
2982   unsigned char *cl = (unsigned char *)&f;
2983   unsigned int i;
2984 
2985   for(i=0; i<BYTES_PER_DOUBLE; i++)
2986   { int c = Sgetc(fd);
2987 
2988     if ( c == -1 )
2989     { *fp = 0.0;
2990       return FALSE;
2991     }
2992     cl[double_byte_order[i]] = c;
2993   }
2994 
2995   *fp = f;
2996   return TRUE;
2997 }
2998 
2999 
3000 typedef struct ld_context
3001 { long		loaded_id;		/* keep track of atoms */
3002   atom_t       *loaded_atoms;
3003   long		atoms_size;
3004   atom_t	graph;			/* for single-graph files */
3005   atom_t	graph_source;
3006   double	modified;
3007   int		has_digest;
3008   md5_byte_t    digest[16];
3009   atom_hash    *graph_table;		/* multi-graph file */
3010 } ld_context;
3011 
3012 
3013 static void
add_atom(rdf_db * db,atom_t a,ld_context * ctx)3014 add_atom(rdf_db *db, atom_t a, ld_context *ctx)
3015 { if ( ctx->loaded_id >= ctx->atoms_size )
3016   { if ( ctx->atoms_size == 0 )
3017     { ctx->atoms_size = 1024;
3018       ctx->loaded_atoms = rdf_malloc(db, sizeof(atom_t)*ctx->atoms_size);
3019     } else
3020     { long obytes = sizeof(atom_t)*ctx->atoms_size;
3021       long  bytes;
3022 
3023       ctx->atoms_size *= 2;
3024       bytes = sizeof(atom_t)*ctx->atoms_size;
3025       ctx->loaded_atoms = rdf_realloc(db, ctx->loaded_atoms, obytes, bytes);
3026     }
3027   }
3028 
3029   ctx->loaded_atoms[ctx->loaded_id++] = a;
3030 }
3031 
3032 
3033 static atom_t
load_atom(rdf_db * db,IOSTREAM * in,ld_context * ctx)3034 load_atom(rdf_db *db, IOSTREAM *in, ld_context *ctx)
3035 { switch(Sgetc(in))
3036   { case 'X':
3037     { intptr_t idx = (intptr_t)load_int(in);
3038       return ctx->loaded_atoms[idx];
3039     }
3040     case 'A':
3041     { size_t len = (size_t)load_int(in);
3042       atom_t a;
3043 
3044       if ( len < 1024 )
3045       { char buf[1024];
3046 	Sfread(buf, 1, len, in);
3047 	a = PL_new_atom_nchars(len, buf);
3048       } else
3049       { char *buf = rdf_malloc(db, len);
3050 	Sfread(buf, 1, len, in);
3051 	a = PL_new_atom_nchars(len, buf);
3052 	rdf_free(db, buf, len);
3053       }
3054 
3055       add_atom(db, a, ctx);
3056       return a;
3057     }
3058     case 'W':
3059     { int len = (int)load_int(in);
3060       atom_t a;
3061       wchar_t buf[1024];
3062       wchar_t *w;
3063       IOENC enc = in->encoding;
3064       int i;
3065 
3066       if ( len < 1024 )
3067 	w = buf;
3068       else
3069 	w = rdf_malloc(db, len*sizeof(wchar_t));
3070 
3071       in->encoding = ENC_UTF8;
3072       for(i=0; i<len; i++)
3073       { w[i] = Sgetcode(in);
3074 	SECURE(assert(w[i]>=0 && w[i] <= 0x10ffff));
3075       }
3076       in->encoding = enc;
3077 
3078       a = PL_new_atom_wchars(len, w);
3079       if ( w != buf )
3080 	rdf_free(db, w, len*sizeof(wchar_t));
3081 
3082       add_atom(db, a, ctx);
3083       return a;
3084     }
3085     default:
3086     { assert(0);
3087       return 0;
3088     }
3089   }
3090 }
3091 
3092 
3093 static triple *
load_triple(rdf_db * db,IOSTREAM * in,ld_context * ctx)3094 load_triple(rdf_db *db, IOSTREAM *in, ld_context *ctx)
3095 { triple *t = new_triple(db);
3096   int c;
3097 
3098   t->subject   = load_atom(db, in, ctx);
3099   t->predicate.u = load_atom(db, in, ctx);
3100   t->resolve_pred = TRUE;
3101   if ( (c=Sgetc(in)) == 'R' )
3102   { t->object.resource = load_atom(db, in, ctx);
3103   } else
3104   { literal *lit = new_literal(db);
3105 
3106     t->object_is_literal = TRUE;
3107     t->object.literal = lit;
3108 
3109   value:
3110     switch(c)
3111     { case 'L':
3112 	lit->objtype = OBJ_STRING;
3113 	lit->value.string = load_atom(db, in, ctx);
3114 	break;
3115       case 'I':
3116 	lit->objtype = OBJ_INTEGER;
3117 	lit->value.integer = load_int(in);
3118 	break;
3119       case 'F':
3120 	lit->objtype = OBJ_DOUBLE;
3121         load_double(in, &lit->value.real);
3122 	break;
3123       case 'T':
3124       { unsigned int i;
3125 	char *s;
3126 
3127 	lit->objtype = OBJ_TERM;
3128 	lit->value.term.len = (size_t)load_int(in);
3129 	lit->value.term.record = rdf_malloc(db, lit->value.term.len);
3130 	lit->term_loaded = TRUE;	/* see free_literal() */
3131 	s = (char *)lit->value.term.record;
3132 
3133 	for(i=0; i<lit->value.term.len; i++)
3134 	  s[i] = Sgetc(in);
3135 
3136 	break;
3137       }
3138       case 'l':
3139 	lit->qualifier = Q_LANG;
3140 	lit->type_or_lang = load_atom(db, in, ctx);
3141 	c = Sgetc(in);
3142 	goto value;
3143       case 't':
3144 	lit->qualifier = Q_TYPE;
3145 	lit->type_or_lang = load_atom(db, in, ctx);
3146 	c = Sgetc(in);
3147 	goto value;
3148       default:
3149 	assert(0);
3150         return NULL;
3151     }
3152   }
3153   t->graph = load_atom(db, in, ctx);
3154   t->line  = (unsigned long)load_int(in);
3155   if ( !ctx->graph )
3156   { if ( !ctx->graph_table )
3157       ctx->graph_table = new_atom_hash(64);
3158     add_atom_hash(ctx->graph_table, t->graph);
3159   }
3160 
3161   return t;
3162 }
3163 
3164 
3165 static int
load_magic(IOSTREAM * in)3166 load_magic(IOSTREAM *in)
3167 { char *s = SAVE_MAGIC;
3168 
3169   for( ; *s; s++)
3170   { if ( Sgetc(in) != *s )
3171       return FALSE;
3172   }
3173 
3174   return TRUE;
3175 }
3176 
3177 
3178 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3179 Note that we have two types  of   saved  states.  One holding many named
3180 graphs and one holding the content of exactly one named graph.
3181 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3182 
3183 #define LOAD_ERROR ((triple*)(intptr_t)-1)
3184 
3185 static triple *
load_db(rdf_db * db,IOSTREAM * in,ld_context * ctx)3186 load_db(rdf_db *db, IOSTREAM *in, ld_context *ctx)
3187 { int version;
3188   int c;
3189   triple *list = NULL, *tail = NULL;
3190 
3191   if ( !load_magic(in) )
3192     return LOAD_ERROR;
3193   version = (int)load_int(in);
3194 
3195   while((c=Sgetc(in)) != EOF)
3196   { switch(c)
3197     { case 'T':
3198       { triple *t;
3199 
3200 	if ( !(t=load_triple(db, in, ctx)) )
3201 	  return FALSE;
3202 
3203 	if ( tail )
3204 	{ tail->next[BY_NONE] = t;
3205 	  tail = t;
3206 	} else
3207 	{ list = tail = t;
3208 	}
3209 
3210         break;
3211       }
3212 					/* file holding exactly one graph */
3213       case 'S':				/* name of the graph */
3214       { ctx->graph = load_atom(db, in, ctx);
3215         break;
3216       }
3217       case 'M':				/* MD5 of the graph */
3218       { int i;
3219 
3220 	for(i=0; i<16; i++)
3221 	  ctx->digest[i] = Sgetc(in);
3222 	ctx->has_digest = TRUE;
3223 
3224 	break;
3225       }
3226       case 'F':				/* file of the graph */
3227 	ctx->graph_source = load_atom(db, in, ctx);
3228 	break;				/* end of one-graph handling */
3229       case 't':
3230 	load_double(in, &ctx->modified);
3231         break;
3232       case 'E':				/* end of file */
3233 	return list;
3234       default:
3235 	break;
3236     }
3237   }
3238 
3239   PL_warning("Illegal RDF triple file");
3240 
3241   return LOAD_ERROR;
3242 }
3243 
3244 
3245 static int
link_loaded_triples(rdf_db * db,triple * t,ld_context * ctx)3246 link_loaded_triples(rdf_db *db, triple *t, ld_context *ctx)
3247 { long created0 = db->created;
3248   graph *graph;
3249 
3250   if ( ctx->graph )			/* lookup named graph */
3251   { graph = lookup_graph(db, ctx->graph, TRUE);
3252     if ( ctx->graph_source && graph->source != ctx->graph_source )
3253     { if ( graph->source )
3254 	PL_unregister_atom(graph->source);
3255       graph->source = ctx->graph_source;
3256       PL_register_atom(graph->source);
3257       graph->modified = ctx->modified;
3258     }
3259 
3260     if ( ctx->has_digest )
3261     { if ( graph->md5 )
3262       { if ( db->tr_first )
3263 	{ record_md5_transaction(db, graph, NULL);
3264 	} else
3265 	{ graph->md5 = FALSE;		/* kill repetitive MD5 update */
3266 	}
3267       } else
3268       { ctx->has_digest = FALSE;
3269       }
3270     }
3271   } else
3272   { graph = NULL;
3273   }
3274 
3275 
3276   if ( db->tr_first )			/* loading in a transaction */
3277   { triple *next;
3278 
3279     for( ; t; t = next )
3280     { next = t->next[BY_NONE];
3281 
3282       t->next[BY_NONE] = NULL;
3283       lock_atoms(t);
3284       record_transaction(db, TR_ASSERT, t);
3285     }
3286   } else
3287   { triple *next;
3288 
3289     for( ; t; t = next )
3290     { next = t->next[BY_NONE];
3291 
3292       t->next[BY_NONE] = NULL;
3293       lock_atoms(t);
3294       if ( link_triple_silent(db, t) )
3295 	broadcast(EV_ASSERT_LOAD, t, NULL);
3296     }
3297   }
3298 
3299 					/* update the graph info */
3300   if ( ctx->has_digest )
3301   { if ( db->tr_first )
3302     { md5_byte_t *d = rdf_malloc(db, sizeof(ctx->digest));
3303       memcpy(d, ctx->digest, sizeof(ctx->digest));
3304       record_md5_transaction(db, graph, d);
3305     } else
3306     { sum_digest(graph->digest, ctx->digest);
3307     }
3308     graph->md5 = TRUE;
3309   }
3310 
3311   db->generation += (db->created-created0);
3312 
3313   return TRUE;
3314 }
3315 
3316 
3317 static int
append_graph_to_list(ptr_hash_node * node,void * closure)3318 append_graph_to_list(ptr_hash_node *node, void *closure)
3319 { atom_t graph = (atom_t)node->value;
3320   term_t tail  = (term_t)closure;
3321   term_t head  = PL_new_term_ref();
3322   int rc;
3323 
3324   rc = (PL_unify_list(tail, head, tail) &&
3325 	PL_unify_atom(head, graph));
3326   PL_reset_term_refs(head);
3327 
3328   return rc;
3329 }
3330 
3331 
3332 static foreign_t
rdf_load_db(term_t stream,term_t id,term_t graphs)3333 rdf_load_db(term_t stream, term_t id, term_t graphs)
3334 { ld_context ctx;
3335   rdf_db *db = DB;
3336   IOSTREAM *in;
3337   triple *list;
3338   int rc;
3339 
3340   if ( !PL_get_stream_handle(stream, &in) )
3341     return type_error(stream, "stream");
3342 
3343   memset(&ctx, 0, sizeof(ctx));
3344   if ( (list=load_db(db, in, &ctx)) == LOAD_ERROR )
3345     return FALSE;
3346 
3347   if ( !WRLOCK(db, FALSE) )
3348     return FALSE;
3349   broadcast(EV_LOAD, (void*)id, (void*)ATOM_begin);
3350 
3351   if ( (rc=link_loaded_triples(db, list, &ctx)) )
3352   { if ( ctx.graph_table )
3353     { term_t tail = PL_copy_term_ref(graphs);
3354 
3355       rc = ( for_atom_hash(ctx.graph_table, append_graph_to_list, (void*)tail) &&
3356 	     PL_unify_nil(tail) );
3357 
3358       destroy_atom_hash(ctx.graph_table);
3359     } else
3360     { rc = PL_unify_atom(graphs, ctx.graph);
3361     }
3362   }
3363   broadcast(EV_LOAD, (void*)id, (void*)ATOM_end);
3364   WRUNLOCK(db);
3365 
3366   PL_release_stream(in);
3367   if ( ctx.loaded_atoms )
3368   { atom_t *ap, *ep;
3369 
3370     for(ap=ctx.loaded_atoms, ep=ap+ctx.loaded_id; ap<ep; ap++)
3371       PL_unregister_atom(*ap);
3372 
3373     rdf_free(db, ctx.loaded_atoms, sizeof(atom_t)*ctx.atoms_size);
3374   }
3375 
3376   return rc;
3377 }
3378 
3379 
3380 #ifdef WITH_MD5
3381 		 /*******************************
3382 		 *	     MD5 SUPPORT	*
3383 		 *******************************/
3384 
3385 /* md5_type is used to keep the MD5 independent from the internal
3386    numbers
3387 */
3388 static const char md5_type[] =
3389 { 0x0,					/* OBJ_UNKNOWN */
3390   0x3,					/* OBJ_INTEGER */
3391   0x4,					/* OBJ_DOUBLE */
3392   0x2,					/* OBJ_STRING */
3393   0x5					/* OBJ_TERM */
3394 };
3395 
3396 static void
md5_triple(triple * t,md5_byte_t * digest)3397 md5_triple(triple *t, md5_byte_t *digest)
3398 { md5_state_t state;
3399   size_t len;
3400   md5_byte_t tmp[2];
3401   const char *s;
3402   literal *lit;
3403 
3404   md5_init(&state);
3405   s = PL_blob_data(t->subject, &len, NULL);
3406   md5_append(&state, (const md5_byte_t *)s, (int)len);
3407   md5_append(&state, (const md5_byte_t *)"P", 1);
3408   s = PL_blob_data(t->predicate.r->name, &len, NULL);
3409   md5_append(&state, (const md5_byte_t *)s, (int)len);
3410   tmp[0] = 'O';
3411   if ( t->object_is_literal )
3412   { lit = t->object.literal;
3413     tmp[1] = md5_type[lit->objtype];
3414 
3415     switch(lit->objtype)
3416     { case OBJ_STRING:
3417 	s = PL_blob_data(lit->value.string, &len, NULL);
3418 	break;
3419       case OBJ_INTEGER:			/* TBD: byte order issues */
3420 	s = (const char *)&lit->value.integer;
3421 	len = sizeof(lit->value.integer);
3422 	break;
3423       case OBJ_DOUBLE:
3424 	s = (const char *)&lit->value.real;
3425 	len = sizeof(lit->value.real);
3426 	break;
3427       case OBJ_TERM:
3428 	s = (const char *)lit->value.term.record;
3429 	len = lit->value.term.len;
3430 	break;
3431       default:
3432 	assert(0);
3433     }
3434   } else
3435   { s = PL_blob_data(t->object.resource, &len, NULL);
3436     tmp[1] = 0x1;			/* old OBJ_RESOURCE */
3437     lit = NULL;
3438   }
3439   md5_append(&state, tmp, 2);
3440   md5_append(&state, (const md5_byte_t *)s, (int)len);
3441   if ( lit && lit->qualifier )
3442   { assert(lit->type_or_lang);
3443     md5_append(&state,
3444 	       (const md5_byte_t *)(lit->qualifier == Q_LANG ? "l" : "t"),
3445 	       1);
3446     s = PL_blob_data(lit->type_or_lang, &len, NULL);
3447     md5_append(&state, (const md5_byte_t *)s, (int)len);
3448   }
3449   if ( t->graph )
3450   { md5_append(&state, (const md5_byte_t *)"S", 1);
3451     s = PL_blob_data(t->graph, &len, NULL);
3452     md5_append(&state, (const md5_byte_t *)s, (int)len);
3453   }
3454 
3455   md5_finish(&state, digest);
3456 }
3457 
3458 
3459 static void
sum_digest(md5_byte_t * digest,md5_byte_t * add)3460 sum_digest(md5_byte_t *digest, md5_byte_t *add)
3461 { md5_byte_t *p, *q;
3462   int n;
3463 
3464   for(p=digest, q=add, n=16; --n>=0; )
3465     *p++ += *q++;
3466 }
3467 
3468 
3469 static void
dec_digest(md5_byte_t * digest,md5_byte_t * add)3470 dec_digest(md5_byte_t *digest, md5_byte_t *add)
3471 { md5_byte_t *p, *q;
3472   int n;
3473 
3474   for(p=digest, q=add, n=16; --n>=0; )
3475     *p++ -= *q++;
3476 }
3477 
3478 
3479 static int
md5_unify_digest(term_t t,md5_byte_t digest[16])3480 md5_unify_digest(term_t t, md5_byte_t digest[16])
3481 { char hex_output[16*2];
3482   int di;
3483   char *pi;
3484   static char hexd[] = "0123456789abcdef";
3485 
3486   for(pi=hex_output, di = 0; di < 16; ++di)
3487   { *pi++ = hexd[(digest[di] >> 4) & 0x0f];
3488     *pi++ = hexd[digest[di] & 0x0f];
3489   }
3490 
3491   return PL_unify_atom_nchars(t, 16*2, hex_output);
3492 }
3493 
3494 
3495 static foreign_t
rdf_md5(term_t graph_name,term_t md5)3496 rdf_md5(term_t graph_name, term_t md5)
3497 { atom_t src;
3498   int rc;
3499   rdf_db *db = DB;
3500 
3501   if ( !get_atom_or_var_ex(graph_name, &src) )
3502     return FALSE;
3503 
3504   if ( src )
3505   { graph *s;
3506 
3507     if ( !RDLOCK(db) )
3508       return FALSE;
3509     if ( (s = lookup_graph(db, src, FALSE)) )
3510     { rc = md5_unify_digest(md5, s->digest);
3511     } else
3512     { md5_byte_t digest[16];
3513 
3514       memset(digest, 0, sizeof(digest));
3515       rc = md5_unify_digest(md5, digest);
3516     }
3517     RDUNLOCK(db);
3518   } else
3519   { md5_byte_t digest[16];
3520     graph **ht;
3521     int i;
3522 
3523     memset(&digest, 0, sizeof(digest));
3524 
3525     if ( !RDLOCK(db) )
3526       return FALSE;
3527 
3528     for(i=0,ht = db->graph_table; i<db->graph_table_size; i++, ht++)
3529     { graph *s;
3530 
3531       for( s = *ht; s; s = s->next )
3532 	sum_digest(digest, s->digest);
3533     }
3534 
3535     rc = md5_unify_digest(md5, digest);
3536     RDUNLOCK(db);
3537   }
3538 
3539   return rc;
3540 }
3541 
3542 
3543 static foreign_t
rdf_atom_md5(term_t text,term_t times,term_t md5)3544 rdf_atom_md5(term_t text, term_t times, term_t md5)
3545 { char *s;
3546   int n, i;
3547   size_t len;
3548   md5_byte_t digest[16];
3549 
3550   if ( !PL_get_nchars(text, &len, &s, CVT_ALL) )
3551     return type_error(text, "text");
3552   if ( !PL_get_integer(times, &n) )
3553     return type_error(times, "integer");
3554   if ( n < 1 )
3555     return domain_error(times, "positive_integer");
3556 
3557   for(i=0; i<n; i++)
3558   { md5_state_t state;
3559     md5_init(&state);
3560     md5_append(&state, (const md5_byte_t *)s, (int)len);
3561     md5_finish(&state, digest);
3562     s = (char *)digest;
3563     len = sizeof(digest);
3564   }
3565 
3566   return md5_unify_digest(md5, digest);
3567 }
3568 
3569 
3570 
3571 #endif /*WITH_MD5*/
3572 
3573 
3574 		 /*******************************
3575 		 *	       ATOMS		*
3576 		 *******************************/
3577 
3578 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3579 Lock atoms in triple against AGC. Note that the predicate name is locked
3580 in the predicate structure.
3581 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3582 
3583 static void
lock_atoms(triple * t)3584 lock_atoms(triple *t)
3585 { if ( !t->atoms_locked )
3586   { t->atoms_locked = TRUE;
3587 
3588     PL_register_atom(t->subject);
3589     if ( t->object_is_literal )
3590     { lock_atoms_literal(t->object.literal);
3591     } else
3592     { PL_register_atom(t->object.resource);
3593     }
3594   }
3595 }
3596 
3597 
3598 static void
unlock_atoms(triple * t)3599 unlock_atoms(triple *t)
3600 { if ( t->atoms_locked )
3601   { t->atoms_locked = FALSE;
3602 
3603     PL_unregister_atom(t->subject);
3604     if ( !t->object_is_literal )
3605     { PL_unregister_atom(t->object.resource);
3606     }
3607   }
3608 }
3609 
3610 
3611 		 /*******************************
3612 		 *      PROLOG CONVERSION	*
3613 		 *******************************/
3614 
3615 #define LIT_TYPED	0x1
3616 #define LIT_NOERROR	0x2
3617 #define LIT_PARTIAL	0x4
3618 
3619 static int
get_lit_atom_ex(term_t t,atom_t * a,int flags)3620 get_lit_atom_ex(term_t t, atom_t *a, int flags)
3621 { if ( PL_get_atom(t, a) )
3622     return TRUE;
3623   if ( (flags & LIT_PARTIAL) && PL_is_variable(t) )
3624   { *a = 0L;
3625     return TRUE;
3626   }
3627 
3628   return type_error(t, "atom");
3629 }
3630 
3631 
3632 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3633 get_literal() processes the argument  of  a   literal/1  term  passes as
3634 object.
3635 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3636 
3637 static int
get_literal(rdf_db * db,term_t litt,triple * t,int flags)3638 get_literal(rdf_db *db, term_t litt, triple *t, int flags)
3639 { literal *lit;
3640 
3641   alloc_literal_triple(db, t);
3642   lit = t->object.literal;
3643 
3644   if ( PL_get_atom(litt, &lit->value.string) )
3645   { lit->objtype = OBJ_STRING;
3646   } else if ( PL_is_integer(litt) && PL_get_int64(litt, &lit->value.integer) )
3647   { lit->objtype = OBJ_INTEGER;
3648   } else if ( PL_get_float(litt, &lit->value.real) )
3649   { lit->objtype = OBJ_DOUBLE;
3650   } else if ( PL_is_functor(litt, FUNCTOR_lang2) )
3651   { term_t a = PL_new_term_ref();
3652 
3653     _PL_get_arg(1, litt, a);
3654     if ( !get_lit_atom_ex(a, &lit->type_or_lang, flags) )
3655       return FALSE;
3656     _PL_get_arg(2, litt, a);
3657     if ( !get_lit_atom_ex(a, &lit->value.string, flags) )
3658       return FALSE;
3659 
3660     lit->qualifier = Q_LANG;
3661     lit->objtype = OBJ_STRING;
3662   } else if ( PL_is_functor(litt, FUNCTOR_type2) &&
3663 	      !(flags & LIT_TYPED) )	/* avoid recursion */
3664   { term_t a = PL_new_term_ref();
3665 
3666     _PL_get_arg(1, litt, a);
3667     if ( !get_lit_atom_ex(a, &lit->type_or_lang, flags) )
3668       return FALSE;
3669     lit->qualifier = Q_TYPE;
3670     _PL_get_arg(2, litt, a);
3671 
3672     return get_literal(db, a, t, LIT_TYPED|flags);
3673   } else if ( !PL_is_ground(litt) )
3674   { if ( !(flags & LIT_PARTIAL) )
3675       return type_error(litt, "rdf_object");
3676     if ( !PL_is_variable(litt) )
3677       lit->objtype = OBJ_TERM;
3678   } else
3679   { lit->value.term.record = PL_record_external(litt, &lit->value.term.len);
3680     lit->objtype = OBJ_TERM;
3681   }
3682 
3683   return TRUE;
3684 }
3685 
3686 
3687 static int
get_object(rdf_db * db,term_t object,triple * t)3688 get_object(rdf_db *db, term_t object, triple *t)
3689 { if ( PL_get_atom(object, &t->object.resource) )
3690   { assert(!t->object_is_literal);
3691   } else if ( PL_is_functor(object, FUNCTOR_literal1) )
3692   { term_t a = PL_new_term_ref();
3693 
3694     _PL_get_arg(1, object, a);
3695     return get_literal(db, a, t, 0);
3696   } else
3697     return type_error(object, "rdf_object");
3698 
3699   return TRUE;
3700 }
3701 
3702 
3703 static int
get_src(term_t src,triple * t)3704 get_src(term_t src, triple *t)
3705 { if ( src && !PL_is_variable(src) )
3706   { if ( PL_get_atom(src, &t->graph) )
3707     { t->line = NO_LINE;
3708     } else if ( PL_is_functor(src, FUNCTOR_colon2) )
3709     { term_t a = PL_new_term_ref();
3710       long line;
3711 
3712       _PL_get_arg(1, src, a);
3713       if ( !get_atom_or_var_ex(a, &t->graph) )
3714 	return FALSE;
3715       _PL_get_arg(2, src, a);
3716       if ( PL_get_long(a, &line) )
3717 	t->line = line;
3718       else if ( !PL_is_variable(a) )
3719 	return type_error(a, "integer");
3720     } else
3721       return type_error(src, "rdf_graph");
3722   }
3723 
3724   return TRUE;
3725 }
3726 
3727 
3728 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3729 Return values:
3730 	-1: exception
3731 	 0: no predicate
3732 	 1: the predicate
3733 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3734 
3735 static int
get_existing_predicate(rdf_db * db,term_t t,predicate ** p)3736 get_existing_predicate(rdf_db *db, term_t t, predicate **p)
3737 { atom_t name;
3738 
3739   if ( !PL_get_atom(t, &name ) )
3740   { if ( PL_is_functor(t, FUNCTOR_literal1) )
3741       return 0;				/* rdf(_, literal(_), _) */
3742     return type_error(t, "atom");
3743   }
3744 
3745   if ( (*p = existing_predicate(db, name)) )
3746     return 1;
3747 
3748   DEBUG(5, Sdprintf("No predicate %s\n", PL_atom_chars(name)));
3749   return 0;				/* no predicate */
3750 }
3751 
3752 
3753 static int
get_predicate(rdf_db * db,term_t t,predicate ** p)3754 get_predicate(rdf_db *db, term_t t, predicate **p)
3755 { atom_t name;
3756 
3757   if ( !get_atom_ex(t, &name ) )
3758     return FALSE;
3759 
3760   *p = lookup_predicate(db, name);
3761   return TRUE;
3762 }
3763 
3764 
3765 static int
get_triple(rdf_db * db,term_t subject,term_t predicate,term_t object,triple * t)3766 get_triple(rdf_db *db,
3767 	   term_t subject, term_t predicate, term_t object,
3768 	   triple *t)
3769 { if ( !get_atom_ex(subject, &t->subject) ||
3770        !get_predicate(db, predicate, &t->predicate.r) ||
3771        !get_object(db, object, t) )
3772     return FALSE;
3773 
3774   return TRUE;
3775 }
3776 
3777 
3778 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3779 get_partial_triple() creates a triple  for   matching  purposes.  It can
3780 return FALSE for  two  reasons.  Mostly   (type)  errors,  but  also  if
3781 resources are accessed that do not   exist  and therefore the subsequent
3782 matching will always fail. This  is   notably  the  case for predicates,
3783 which are first class citizens to this library.
3784 
3785 Return values:
3786 	1: ok
3787 	0: no predicate
3788        -1: error
3789 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3790 
3791 static int
get_partial_triple(rdf_db * db,term_t subject,term_t predicate,term_t object,term_t src,triple * t)3792 get_partial_triple(rdf_db *db,
3793 		   term_t subject, term_t predicate, term_t object,
3794 		   term_t src, triple *t)
3795 { int rc;
3796 
3797   if ( subject && !get_resource_or_var_ex(subject, &t->subject) )
3798     return FALSE;
3799   if ( !PL_is_variable(predicate) &&
3800        (rc=get_existing_predicate(db, predicate, &t->predicate.r)) != 1 )
3801     return rc;
3802 					/* the object */
3803   if ( object && !PL_is_variable(object) )
3804   { if ( PL_get_atom(object, &t->object.resource) )
3805     { assert(!t->object_is_literal);
3806     } else if ( PL_is_functor(object, FUNCTOR_literal1) )
3807     { term_t a = PL_new_term_ref();
3808 
3809       _PL_get_arg(1, object, a);
3810       if ( !get_literal(db, a, t, LIT_PARTIAL) )
3811 	return FALSE;
3812     } else if ( PL_is_functor(object, FUNCTOR_literal2) )
3813     { term_t a = PL_new_term_ref();
3814       literal *lit;
3815 
3816       alloc_literal_triple(db, t);
3817       lit = t->object.literal;
3818 
3819       _PL_get_arg(1, object, a);
3820       if ( PL_is_functor(a, FUNCTOR_exact1) )
3821 	t->match = STR_MATCH_EXACT;
3822       else if ( PL_is_functor(a, FUNCTOR_plain1) )
3823 	t->match = STR_MATCH_PLAIN;
3824       else if ( PL_is_functor(a, FUNCTOR_substring1) )
3825 	t->match = STR_MATCH_SUBSTRING;
3826       else if ( PL_is_functor(a, FUNCTOR_word1) )
3827 	t->match = STR_MATCH_WORD;
3828       else if ( PL_is_functor(a, FUNCTOR_prefix1) )
3829 	t->match = STR_MATCH_PREFIX;
3830       else if ( PL_is_functor(a, FUNCTOR_like1) )
3831 	t->match = STR_MATCH_LIKE;
3832       else
3833 	return domain_error(a, "match_type");
3834 
3835       _PL_get_arg(1, a, a);
3836       if ( !get_atom_or_var_ex(a, &lit->value.string) )
3837 	return FALSE;
3838       lit->objtype = OBJ_STRING;
3839     } else
3840       return type_error(object, "rdf_object");
3841   }
3842 					/* the graph */
3843   if ( !get_src(src, t) )
3844     return FALSE;
3845 
3846   if ( t->subject )
3847     t->indexed |= BY_S;
3848   if ( t->predicate.r )
3849     t->indexed |= BY_P;
3850   if ( t->object_is_literal )
3851   { literal *lit = t->object.literal;
3852 
3853     if ( lit->objtype == OBJ_STRING &&
3854 	 lit->value.string &&
3855 	 t->match <= STR_MATCH_EXACT )
3856       t->indexed |= BY_O;
3857   } else if ( t->object.resource )
3858     t->indexed |= BY_O;
3859 
3860   db->indexed[t->indexed]++;		/* statistics */
3861 
3862   switch(t->indexed)
3863   { case BY_SPO:
3864       t->indexed = BY_SP;
3865       break;
3866     case BY_SO:
3867       t->indexed = BY_S;
3868       break;
3869   }
3870 
3871   return TRUE;
3872 }
3873 
3874 
3875 static int
inverse_partial_triple(triple * t)3876 inverse_partial_triple(triple *t)
3877 { predicate *i = 0;
3878 
3879   if ( !t->inversed &&
3880        (!t->predicate.r || (i=t->predicate.r->inverse_of)) &&
3881        !t->object_is_literal )
3882   { atom_t o = t->object.resource;
3883 
3884     t->object.resource = t->subject;
3885     t->subject = o;
3886 
3887     if ( t->predicate.r )
3888       t->predicate.r = i;
3889 
3890     t->indexed  = by_inverse[t->indexed];
3891     t->inversed = TRUE;
3892 
3893     return TRUE;
3894   }
3895 
3896   return FALSE;
3897 }
3898 
3899 
3900 static int
get_graph(term_t src,triple * t)3901 get_graph(term_t src, triple *t)
3902 { if ( PL_get_atom(src, &t->graph) )
3903   { t->line = NO_LINE;
3904     return TRUE;
3905   }
3906 
3907   if ( PL_is_functor(src, FUNCTOR_colon2) )
3908   { term_t a = PL_new_term_ref();
3909     long line;
3910 
3911     _PL_get_arg(1, src, a);
3912     if ( !get_atom_ex(a, &t->graph) )
3913       return FALSE;
3914     _PL_get_arg(2, src, a);
3915     if ( !get_long_ex(a, &line) )
3916       return FALSE;
3917     t->line = line;
3918 
3919     return TRUE;
3920   }
3921 
3922   return type_error(src, "rdf_graph");
3923 }
3924 
3925 
3926 static int
unify_graph(term_t src,triple * t)3927 unify_graph(term_t src, triple *t)
3928 { switch( PL_term_type(src) )
3929   { case PL_VARIABLE:
3930     { if ( t->line == NO_LINE )
3931 	return PL_unify_atom(src, t->graph);
3932       else
3933 	goto full_term;
3934     }
3935     case PL_ATOM:
3936     { atom_t a;
3937       return (PL_get_atom(src, &a) &&
3938 	      a == t->graph);
3939     }
3940     case PL_TERM:
3941     { if ( t->line == NO_LINE )
3942       { return PL_unify_term(src,
3943 			     PL_FUNCTOR, FUNCTOR_colon2,
3944 			       PL_ATOM, t->graph,
3945 			       PL_VARIABLE);
3946       } else
3947       { full_term:
3948 	return PL_unify_term(src,
3949 			     PL_FUNCTOR, FUNCTOR_colon2,
3950 			       PL_ATOM, t->graph,
3951 			       PL_LONG, t->line);
3952       }
3953     }
3954     default:
3955       return type_error(src, "rdf_graph");
3956   }
3957 }
3958 
3959 
3960 static int
same_graph(triple * t1,triple * t2)3961 same_graph(triple *t1, triple *t2)
3962 { return t1->line  == t2->line &&
3963          t1->graph == t2->graph;
3964 }
3965 
3966 
3967 
3968 static int
put_literal_value(term_t v,literal * lit)3969 put_literal_value(term_t v, literal *lit)
3970 { switch(lit->objtype)
3971   { case OBJ_STRING:
3972       PL_put_atom(v, lit->value.string);
3973       break;
3974     case OBJ_INTEGER:
3975       PL_put_variable(v);
3976       return PL_unify_int64(v, lit->value.integer);
3977     case OBJ_DOUBLE:
3978       return PL_put_float(v, lit->value.real);
3979     case OBJ_TERM:
3980       return PL_recorded_external(lit->value.term.record, v);
3981     default:
3982       assert(0);
3983       return FALSE;
3984   }
3985 
3986   return TRUE;
3987 }
3988 
3989 
3990 static int
unify_literal(term_t lit,literal * l)3991 unify_literal(term_t lit, literal *l)
3992 { term_t v = PL_new_term_ref();
3993 
3994   if ( !put_literal_value(v, l) )
3995     return FALSE;
3996 
3997   if ( l->qualifier )
3998   { functor_t qf;
3999 
4000     assert(l->type_or_lang);
4001 
4002     if ( l->qualifier == Q_LANG )
4003       qf = FUNCTOR_lang2;
4004     else
4005       qf = FUNCTOR_type2;
4006 
4007     if ( PL_unify_term(lit, PL_FUNCTOR, qf,
4008 			 PL_ATOM, l->type_or_lang,
4009 			 PL_TERM, v) )
4010       return TRUE;
4011 
4012     return PL_unify(lit, v);		/* allow rdf(X, Y, literal(foo)) */
4013   } else if ( PL_unify(lit, v) )
4014   { return TRUE;
4015   } else if ( PL_is_functor(lit, FUNCTOR_lang2) &&
4016 	      l->objtype == OBJ_STRING )
4017   { term_t a = PL_new_term_ref();
4018     _PL_get_arg(2, lit, a);
4019     return PL_unify(a, v);
4020   } else if ( PL_is_functor(lit, FUNCTOR_type2) )
4021   { term_t a = PL_new_term_ref();
4022     _PL_get_arg(2, lit, a);
4023     return PL_unify(a, v);
4024   } else
4025     return FALSE;
4026 }
4027 
4028 
4029 
4030 static int
unify_object(term_t object,triple * t)4031 unify_object(term_t object, triple *t)
4032 { if ( t->object_is_literal )
4033   { term_t lit = PL_new_term_ref();
4034 
4035     if ( PL_unify_functor(object, FUNCTOR_literal1) )
4036       _PL_get_arg(1, object, lit);
4037     else if ( PL_is_functor(object, FUNCTOR_literal2) )
4038       _PL_get_arg(2, object, lit);
4039     else
4040       return FALSE;
4041 
4042     return unify_literal(lit, t->object.literal);
4043   } else
4044   { return PL_unify_atom(object, t->object.resource);
4045   }
4046 }
4047 
4048 
4049 static int
unify_triple(term_t subject,term_t pred,term_t object,term_t src,triple * t,int inversed)4050 unify_triple(term_t subject, term_t pred, term_t object,
4051 	     term_t src, triple *t, int inversed)
4052 { predicate *p = t->predicate.r;
4053   fid_t fid;
4054 
4055   if ( inversed )
4056   { term_t tmp = object;
4057     object = subject;
4058     subject = tmp;
4059 
4060     if ( !(p = p->inverse_of) )
4061       return FALSE;
4062   }
4063 
4064   fid = PL_open_foreign_frame();
4065 
4066   if ( !PL_unify_atom(subject, t->subject) ||
4067        !PL_unify_atom(pred, p->name) ||
4068        !unify_object(object, t) ||
4069        (src && !unify_graph(src, t)) )
4070   { PL_discard_foreign_frame(fid);
4071     return FALSE;
4072   } else
4073   { PL_close_foreign_frame(fid);
4074     return TRUE;
4075   }
4076 }
4077 
4078 
4079 		 /*******************************
4080 		 *	DUBLICATE HANDLING	*
4081 		 *******************************/
4082 
4083 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4084 According to the RDF specs, duplicate triples  have no meaning, but they
4085 slow down search and often produce   duplicate results in search. Worse,
4086 some coding styles proposed in the  OWL documents introduce huge amounts
4087 of duplicate triples. We cannot  simply  ignore   a  triple  if  it is a
4088 duplicate as a subsequent retract  would   delete  the final triple. For
4089 example, after loading two  files  that   contain  the  same  triple and
4090 unloading one of these files the database would be left without triples.
4091 
4092 In our solution, if a triple is added as a duplicate, it is flagged such
4093 using  the  flag  is_duplicate.  The  `principal'  triple  has  a  count
4094 `duplicates',  indicating  the  number  of   duplicate  triples  in  the
4095 database.
4096 
4097 It might make sense to  introduce  the   BY_SPO  table  as fully indexed
4098 lookups are frequent with the introduction of duplicate detection.
4099 
4100 (*) Iff too many triples are  added,  it   may  be  time  to enlarge the
4101 hashtable. Note that we do not call  update_hash() blindly as this would
4102 cause each triple that  modifies  the   predicate  hierarchy  to force a
4103 rehash. As we are not searching using subPropertyOf semantics during the
4104 duplicate update, there is no point updating. If it is incorrect it will
4105 be updated on the first real query.
4106 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4107 
4108 
4109 static int
update_duplicates_add(rdf_db * db,triple * t)4110 update_duplicates_add(rdf_db *db, triple *t)
4111 { triple *d;
4112   const int indexed = BY_SP;
4113 
4114   assert(t->is_duplicate == FALSE);
4115   assert(t->duplicates == 0);
4116 
4117   if ( WANT_GC(db) )			/* (*) See above */
4118     update_hash(db);
4119   d = db->table[indexed][triple_hash(db, t, indexed)];
4120   for( ; d && d != t; d = d->next[indexed] )
4121   { if ( match_triples(d, t, MATCH_DUPLICATE) )
4122     { t->is_duplicate = TRUE;
4123       assert( !d->is_duplicate );
4124 
4125       d->duplicates++;
4126 
4127       DEBUG(2,
4128 	    print_triple(t, PRT_SRC);
4129 	    Sdprintf(" %p: %d-th duplicate: ", t, d->duplicates);
4130 	    Sdprintf("Principal: %p at", d);
4131 	    print_src(d);
4132 	    Sdprintf("\n"));
4133 
4134       assert(d->duplicates);		/* check overflow */
4135       db->duplicates++;
4136       return TRUE;
4137     }
4138   }
4139 
4140   return FALSE;
4141 }
4142 
4143 
4144 static void				/* t is about to be deleted */
update_duplicates_del(rdf_db * db,triple * t)4145 update_duplicates_del(rdf_db *db, triple *t)
4146 { const int indexed = BY_SP;
4147 
4148   if ( t->duplicates )			/* I am the principal one */
4149   { triple *d;
4150 
4151     DEBUG(2,
4152 	  print_triple(t, PRT_SRC);
4153 	  Sdprintf(": DEL principal %p, %d duplicates: ", t, t->duplicates));
4154 
4155     db->duplicates--;
4156     d = db->table[indexed][triple_hash(db, t, indexed)];
4157     for( ; d; d = d->next[indexed] )
4158     { if ( d != t && match_triples(d, t, MATCH_DUPLICATE) )
4159       { assert(d->is_duplicate);
4160 	d->is_duplicate = FALSE;
4161 	d->duplicates = t->duplicates-1;
4162 	DEBUG(2,
4163 	      Sdprintf("New principal: %p at", d);
4164 	      print_src(d);
4165 	      Sdprintf("\n"));
4166 
4167 	return;
4168       }
4169     }
4170     assert(0);
4171   } else if ( t->is_duplicate )		/* I am a duplicate */
4172   { triple *d;
4173 
4174     DEBUG(2,
4175 	  print_triple(t, PRT_SRC);
4176 	  Sdprintf(": DEL: is a duplicate: "));
4177 
4178     db->duplicates--;
4179     d = db->table[indexed][triple_hash(db, t, indexed)];
4180     for( ; d; d = d->next[indexed] )
4181     { if ( d != t && match_triples(d, t, MATCH_DUPLICATE) )
4182       { if ( d->duplicates )
4183 	{ d->duplicates--;
4184 	  DEBUG(2,
4185 		Sdprintf("Principal %p at ", d);
4186 		print_src(d);
4187 		Sdprintf(" has %d duplicates\n", d->duplicates));
4188 	  return;
4189 	}
4190       }
4191     }
4192     Sdprintf("FATAL\n");
4193     PL_halt(1);
4194     assert(0);
4195   }
4196 }
4197 
4198 
4199 		 /*******************************
4200 		 *	    TRANSACTIONS	*
4201 		 *******************************/
4202 
4203 static void
append_transaction(rdf_db * db,transaction_record * tr)4204 append_transaction(rdf_db *db, transaction_record *tr)
4205 { if ( db->tr_last )
4206   { tr->next = NULL;
4207     tr->previous = db->tr_last;
4208     db->tr_last->next = tr;
4209     db->tr_last = tr;
4210   } else
4211   { tr->next = tr->previous = NULL;
4212     db->tr_first = db->tr_last = tr;
4213   }
4214 }
4215 
4216 
4217 static void
open_transaction(rdf_db * db)4218 open_transaction(rdf_db *db)
4219 { transaction_record *tr = rdf_malloc(db, sizeof(*tr));
4220 
4221   memset(tr, 0, sizeof(*tr));
4222   tr->type = TR_MARK;
4223 
4224   if ( db->tr_first )
4225     db->tr_nesting++;
4226   else
4227     db->tr_nesting = 0;
4228 
4229   append_transaction(db, tr);
4230 }
4231 
4232 
4233 static void
record_transaction(rdf_db * db,tr_type type,triple * t)4234 record_transaction(rdf_db *db, tr_type type, triple *t)
4235 { transaction_record *tr = rdf_malloc(db, sizeof(*tr));
4236 
4237   memset(tr, 0, sizeof(*tr));
4238   tr->type = type;
4239   tr->triple = t;
4240 
4241   append_transaction(db, tr);
4242 }
4243 
4244 
4245 static void
record_md5_transaction(rdf_db * db,graph * src,md5_byte_t * digest)4246 record_md5_transaction(rdf_db *db, graph *src, md5_byte_t *digest)
4247 { transaction_record *tr = rdf_malloc(db, sizeof(*tr));
4248 
4249   memset(tr, 0, sizeof(*tr));
4250   tr->type = TR_UPDATE_MD5,
4251   tr->update.md5.graph = src;
4252   tr->update.md5.digest = digest;
4253 
4254   append_transaction(db, tr);
4255 }
4256 
4257 
4258 static void
record_update_transaction(rdf_db * db,triple * t,triple * new)4259 record_update_transaction(rdf_db *db, triple *t, triple *new)
4260 { transaction_record *tr = rdf_malloc(db, sizeof(*tr));
4261 
4262   memset(tr, 0, sizeof(*tr));
4263   tr->type = TR_UPDATE,
4264   tr->triple = t;
4265   tr->update.triple = new;
4266 
4267   append_transaction(db, tr);
4268 }
4269 
4270 
4271 static void
record_update_src_transaction(rdf_db * db,triple * t,atom_t src,unsigned long line)4272 record_update_src_transaction(rdf_db *db, triple *t,
4273 			      atom_t src, unsigned long line)
4274 { transaction_record *tr = rdf_malloc(db, sizeof(*tr));
4275 
4276   memset(tr, 0, sizeof(*tr));
4277   tr->type = TR_UPDATE_SRC,
4278   tr->triple = t;
4279   tr->update.src.atom = src;
4280   tr->update.src.line = line;
4281 
4282   append_transaction(db, tr);
4283 }
4284 
4285 
4286 static void
void_transaction(rdf_db * db,transaction_record * tr)4287 void_transaction(rdf_db *db, transaction_record *tr)
4288 { switch(tr->type)
4289   { case TR_ASSERT:
4290       free_triple(db, tr->triple);
4291       break;
4292     case TR_UPDATE:
4293       free_triple(db, tr->update.triple);
4294       break;
4295     case TR_UPDATE_MD5:
4296       if ( tr->update.md5.digest )
4297 	rdf_free(db, tr->update.md5.digest, sizeof(*tr->update.md5.digest));
4298       break;
4299     default:
4300       break;
4301   }
4302 
4303   tr->type = TR_VOID;
4304 }
4305 
4306 
4307 static void
free_transaction(rdf_db * db,transaction_record * tr)4308 free_transaction(rdf_db *db, transaction_record *tr)
4309 { void_transaction(db, tr);
4310 
4311   rdf_free(db, tr, sizeof(*tr));
4312 }
4313 
4314 
4315 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4316 This must deal  with  multiple  operations   on  the  same  triple. Most
4317 probably the most important thing is to   merge  update records. We must
4318 also make-up our mind with regard to  updated records that are erased or
4319 records that are erased after updating, etc.
4320 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4321 
4322 static void
clean_transaction(rdf_db * db,transaction_record * tr0)4323 clean_transaction(rdf_db *db, transaction_record *tr0)
4324 {
4325 #if 0
4326   transaction_record *tr;
4327 
4328   for(tr=tr0; tr; tr=tr->next)
4329   { if ( TR_RETRACT )
4330     { transaction_record *tr2;
4331 
4332       for(tr2=tr->next; tr2; tr2=tr2->next)
4333       { if ( tr2->triple == tr->triple )
4334 	{ switch(tr2->type)
4335 	  { case TR_RETRACT:
4336 	    case TR_UPDATE:
4337 	    case TR_UPDATE_SRC:
4338 	      void_transaction(db, tr2);
4339 	    default:
4340 	      ;
4341 	  }
4342 	}
4343       }
4344     }
4345   }
4346 #endif
4347 }
4348 
4349 
4350 static void
truncate_transaction(rdf_db * db,transaction_record * last)4351 truncate_transaction(rdf_db *db, transaction_record *last)
4352 { db->tr_last = last;
4353   if ( last )
4354   { db->tr_last->next = NULL;
4355   } else
4356   { db->tr_first = NULL;
4357   }
4358 }
4359 
4360 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4361 discard_transaction()  simply  destroys  all   actions    in   the  last
4362 transaction.
4363 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4364 
4365 static void
discard_transaction(rdf_db * db)4366 discard_transaction(rdf_db *db)
4367 { transaction_record *tr, *prev;
4368 
4369   for(tr=db->tr_last; tr; tr = prev)
4370   { prev = tr->previous;
4371 
4372     if ( tr->type == TR_SUB_END )
4373     { if ( tr->update.transaction_id )
4374 	PL_erase(tr->update.transaction_id);
4375     }
4376 
4377     if ( tr->type == TR_MARK )
4378     { rdf_free(db, tr, sizeof(*tr));
4379       truncate_transaction(db, prev);
4380       db->tr_nesting--;
4381       return;
4382     }
4383 
4384     free_transaction(db, tr);
4385   }
4386 }
4387 
4388 
4389 int
put_begin_end(term_t t,functor_t be,int level)4390 put_begin_end(term_t t, functor_t be, int level)
4391 { term_t av;
4392 
4393   return ( (av = PL_new_term_ref()) &&
4394 	   PL_put_integer(av, level) &&
4395 	   PL_cons_functor_v(t, be, av) );
4396 }
4397 
4398 
4399 
4400 
4401 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4402 Note  (*)  rdf-monitors  can  modify  the    database   by  opening  new
4403 transactions. Therefore we first close the  transaction to allow opening
4404 new ones. TBD: get  this  clear.   Monitors  have  only  restricted read
4405 access?
4406 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4407 
4408 static int
commit_transaction_int(rdf_db * db,term_t id)4409 commit_transaction_int(rdf_db *db, term_t id)
4410 { transaction_record *tr, *next;
4411   int tr_level = 0;			/* nesting level */
4412 
4413   if ( db->tr_nesting > 0 )		/* commit nested transaction */
4414   { tr=db->tr_last;
4415 
4416     if ( tr->type == TR_MARK )		/* empty nested transaction */
4417     { truncate_transaction(db, tr->previous);
4418       rdf_free(db, tr, sizeof(*tr));
4419       db->tr_nesting--;
4420 
4421       return TRUE;
4422     }
4423 
4424     for(; tr; tr = tr->previous)	/* not the last (tested above) */
4425     {					/* not the first (we are nested) */
4426       if ( tr->type == TR_MARK )
4427       { transaction_record *end = rdf_malloc(db, sizeof(*end));
4428 
4429 	memset(end, 0, sizeof(*end));
4430 	end->type = TR_SUB_END;
4431 	end->update.transaction_id = PL_record(id);
4432 	append_transaction(db, end);
4433 
4434 	tr->type = TR_SUB_START;
4435 	tr->update.transaction_id = end->update.transaction_id;
4436 	db->tr_nesting--;
4437 
4438 	return TRUE;
4439       }
4440     }
4441 
4442     assert(0);
4443     return FALSE;
4444   }
4445 
4446   while( (tr=db->tr_first) )		/* See above (*) */
4447   { db->tr_first = db->tr_last = NULL;
4448 
4449     clean_transaction(db, tr);
4450 					/* real commit */
4451     for(; tr; tr = next)
4452     { next = tr->next;
4453 
4454       switch(tr->type)
4455       { case TR_MARK:
4456 	  break;
4457 	case TR_SUB_START:
4458 	{ term_t id = PL_new_term_ref();
4459 	  term_t be = PL_new_term_ref();
4460 	  if ( !PL_recorded(tr->update.transaction_id, id) ||
4461 	       !put_begin_end(be, FUNCTOR_begin1, ++tr_level) ||
4462 	       !broadcast(EV_TRANSACTION, (void*)id, (void*)be) )
4463 	    return FALSE;
4464 	  break;
4465 	}
4466 	case TR_SUB_END:
4467 	{ term_t id = PL_new_term_ref();
4468 	  term_t be = PL_new_term_ref();
4469 	  if ( !PL_recorded(tr->update.transaction_id, id) )
4470 	    return FALSE;
4471 	  PL_erase(tr->update.transaction_id);
4472 	  if ( !put_begin_end(be, FUNCTOR_end1, tr_level--) ||
4473 	       !broadcast(EV_TRANSACTION, (void*)id, (void*)be)	)
4474 	    return FALSE;
4475 	  break;
4476 	}
4477 	case TR_ASSERT:
4478 	  link_triple(db, tr->triple);
4479 	  db->generation++;
4480 	  break;
4481 	case TR_RETRACT:
4482 	  if ( !tr->triple->erased )	/* already erased */
4483 	  { erase_triple(db, tr->triple);
4484 	    db->generation++;
4485 	  }
4486 	  break;
4487 	case TR_UPDATE:
4488 	  if ( !tr->triple->erased )
4489 	  { if ( !broadcast(EV_UPDATE, tr->triple, tr->update.triple) )
4490 	      return FALSE;		/* TBD: how to handle? */
4491 	    if ( !tr->triple->erased )
4492 	    { erase_triple_silent(db, tr->triple);
4493 	      link_triple_silent(db, tr->update.triple);
4494 	      db->generation++;
4495 	    }
4496 	  }
4497 	  break;
4498 	case TR_UPDATE_SRC:
4499 	  if ( !tr->triple->erased )
4500 	  { if ( tr->triple->graph != tr->update.src.atom )
4501 	    { if ( tr->triple->graph )
4502 		unregister_graph(db, tr->triple);
4503 	      tr->triple->graph = tr->update.src.atom;
4504 	      if ( tr->triple->graph )
4505 		register_graph(db, tr->triple);
4506 	    }
4507 	    tr->triple->line = tr->update.src.line;
4508 	    db->generation++;
4509 	  }
4510 	  break;
4511 	case TR_UPDATE_MD5:
4512 	{ graph *src = tr->update.md5.graph;
4513 	  md5_byte_t *digest = tr->update.md5.digest;
4514 	  if ( digest )
4515 	  { sum_digest(digest, src->digest);
4516 	    src->md5 = TRUE;
4517 	    rdf_free(db, digest, sizeof(md5_byte_t)*16);
4518 	  } else
4519 	  { src->md5 = FALSE;
4520 	  }
4521 	  break;
4522 	}
4523 	case TR_RESET:
4524 	  db->tr_reset = FALSE;
4525 	  reset_db(db);
4526 	  break;
4527 	case TR_VOID:
4528 	  break;
4529 	default:
4530 	  assert(0);
4531       }
4532 
4533       rdf_free(db, tr, sizeof(*tr));
4534     }
4535   }
4536 
4537   return TRUE;
4538 }
4539 
4540 
4541 static int
commit_transaction(rdf_db * db,term_t id)4542 commit_transaction(rdf_db *db, term_t id)
4543 { int rc;
4544 
4545   db->gc_blocked++;
4546   rc = commit_transaction_int(db, id);
4547   db->gc_blocked--;
4548 
4549   return rc;
4550 }
4551 
4552 
4553 static foreign_t
rdf_transaction(term_t goal,term_t id)4554 rdf_transaction(term_t goal, term_t id)
4555 { int rc;
4556   rdf_db *db = DB;
4557   active_transaction me;
4558 
4559   if ( !WRLOCK(db, TRUE) )
4560     return FALSE;
4561 
4562   open_transaction(db);
4563   me.parent = db->tr_active;
4564   me.id = id;
4565   db->tr_active = &me;
4566 
4567   rc = PL_call_predicate(NULL, PL_Q_PASS_EXCEPTION, PRED_call1, goal);
4568 
4569   if ( rc )
4570   { int empty = (db->tr_last == NULL || db->tr_last->type == TR_MARK);
4571 
4572     if ( empty || db->tr_nesting > 0 )
4573     { commit_transaction(db, id);
4574     } else
4575     { term_t be;
4576 
4577       if ( !(be=PL_new_term_ref()) ||
4578 	   !put_begin_end(be, FUNCTOR_begin1, 0) ||
4579 	   !broadcast(EV_TRANSACTION, (void*)id, (void*)be) ||
4580 	   !put_begin_end(be, FUNCTOR_end1, 0) )
4581 	return FALSE;
4582 
4583       if ( !LOCKOUT_READERS(db) )	/* interrupt, timeout */
4584       { broadcast(EV_TRANSACTION, (void*)id, (void*)be);
4585 	rc = FALSE;
4586 	goto discard;
4587       }
4588       commit_transaction(db, id);
4589       REALLOW_READERS(db);
4590       if ( !broadcast(EV_TRANSACTION, (void*)id, (void*)be) )
4591 	return FALSE;
4592     }
4593   } else
4594   { discard:
4595     discard_transaction(db);
4596   }
4597   db->tr_active = me.parent;
4598   WRUNLOCK(db);
4599 
4600   return rc;
4601 }
4602 
4603 
4604 static foreign_t
rdf_active_transactions(term_t list)4605 rdf_active_transactions(term_t list)
4606 { rdf_db *db = DB;
4607   term_t tail = PL_copy_term_ref(list);
4608   term_t head = PL_new_term_ref();
4609   active_transaction *ot;
4610 
4611   for(ot = db->tr_active; ot; ot=ot->parent)
4612   { if ( !PL_unify_list(tail, head, tail) ||
4613 	 !PL_unify(head, ot->id) )
4614       return FALSE;
4615   }
4616 
4617   return PL_unify_nil(tail);
4618 }
4619 
4620 
4621 		 /*******************************
4622 		 *	     PREDICATES		*
4623 		 *******************************/
4624 
4625 static foreign_t
rdf_assert4(term_t subject,term_t predicate,term_t object,term_t src)4626 rdf_assert4(term_t subject, term_t predicate, term_t object, term_t src)
4627 { rdf_db *db = DB;
4628   triple *t = new_triple(db);
4629 
4630   if ( !get_triple(db, subject, predicate, object, t) )
4631   { free_triple(db, t);
4632     return FALSE;
4633   }
4634   if ( src )
4635   { if ( !get_graph(src, t) )
4636     { free_triple(db, t);
4637       return FALSE;
4638     }
4639   } else
4640   { t->graph = ATOM_user;
4641     t->line = NO_LINE;
4642   }
4643 
4644   lock_atoms(t);
4645   if ( !WRLOCK(db, FALSE) )
4646   { free_triple(db, t);
4647     return FALSE;
4648   }
4649 
4650   if ( db->tr_first )
4651   { record_transaction(db, TR_ASSERT, t);
4652   } else
4653   { link_triple(db, t);
4654     db->generation++;
4655   }
4656   WRUNLOCK(db);
4657 
4658   return TRUE;
4659 }
4660 
4661 
4662 static foreign_t
rdf_assert3(term_t subject,term_t predicate,term_t object)4663 rdf_assert3(term_t subject, term_t predicate, term_t object)
4664 { return rdf_assert4(subject, predicate, object, 0);
4665 }
4666 
4667 
4668 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4669 inc_active_queries(rdf_db *db);
4670 dec_active_queries(rdf_db *db);
4671 
4672 TBD: Either delete this or use atomic inc/dec.
4673 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4674 
4675 static void
inc_active_queries(rdf_db * db)4676 inc_active_queries(rdf_db *db)
4677 { LOCK_MISC(db);
4678   db->active_queries++;
4679   UNLOCK_MISC(db);
4680 }
4681 
4682 
4683 static void
dec_active_queries(rdf_db * db)4684 dec_active_queries(rdf_db *db)
4685 { LOCK_MISC(db);
4686   db->active_queries--;
4687   assert(db->active_queries>=0);
4688   UNLOCK_MISC(db);
4689 }
4690 
4691 
4692 typedef struct search_state
4693 { rdf_db       *db;			/* our database */
4694   term_t	subject;		/* Prolog term references */
4695   term_t 	object;
4696   term_t 	predicate;
4697   term_t 	src;
4698   term_t 	realpred;
4699   unsigned 	locked : 1;		/* State has been locked */
4700   unsigned	allocated : 1;		/* State has been allocated */
4701   unsigned	flags;			/* Misc flags controlling search */
4702   atom_t	prefix;			/* prefix and like search */
4703   avl_enum     *literal_state;		/* Literal search state */
4704   literal      *literal_cursor;		/* pointer in current literal */
4705   literal_ex    lit_ex;			/* extended literal for fast compare */
4706   triple       *cursor;			/* Pointer in triple DB */
4707   triple	pattern;		/* Pattern triple */
4708 } search_state;
4709 
4710 
4711 static void	free_search_state(search_state *state);
4712 
4713 static void
init_cursor_from_literal(search_state * state,literal * cursor)4714 init_cursor_from_literal(search_state *state, literal *cursor)
4715 { triple *p = &state->pattern;
4716   unsigned long iv;
4717   int i;
4718 
4719   DEBUG(3,
4720 	Sdprintf("Trying literal search for ");
4721 	print_literal(cursor);
4722 	Sdprintf("\n"));
4723 
4724   p->indexed |= BY_O;
4725   p->indexed &= ~BY_S;			/* we do not have index BY_SO */
4726   switch(p->indexed)
4727   { case BY_O:
4728       iv = literal_hash(cursor);
4729       break;
4730     case BY_OP:
4731       iv = predicate_hash(p->predicate.r) ^ literal_hash(cursor);
4732       break;
4733     default:
4734       iv = 0;				/* make compiler silent */
4735       assert(0);
4736   }
4737 
4738   i = (int)(iv % (long)state->db->table_size[p->indexed]);
4739   state->cursor = state->db->table[p->indexed][i];
4740   state->literal_cursor = cursor;
4741 }
4742 
4743 
4744 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4745 (*) update_hash() is there to update  the   hash  after  a change to the
4746 predicate organization. If we do  not  have   a  predicate  or we do not
4747 search using rdf_has/3, this is not needed.
4748 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4749 
4750 static int
init_search_state(search_state * state)4751 init_search_state(search_state *state)
4752 { triple *p = &state->pattern;
4753 
4754   if ( get_partial_triple(state->db,
4755 			  state->subject, state->predicate, state->object,
4756 			  state->src, p) != TRUE )
4757   { free_triple(state->db, p);
4758     return FALSE;
4759   }
4760 
4761   if ( !RDLOCK(state->db) )
4762   { free_triple(state->db, p);
4763     return FALSE;
4764   }
4765   state->locked = TRUE;
4766   if ( p->predicate.r && (state->flags & MATCH_SUBPROPERTY) ) /* See (*) */
4767   { if ( !update_hash(state->db) )
4768     { free_search_state(state);
4769       return FALSE;
4770     }
4771   }
4772 
4773   if ( (p->match == STR_MATCH_PREFIX ||	p->match == STR_MATCH_LIKE) &&
4774        p->indexed != BY_SP &&
4775        (state->prefix = first_atom(p->object.literal->value.string, p->match)))
4776   { literal lit;
4777     literal **rlitp;
4778 
4779     lit = *p->object.literal;
4780     lit.value.string = state->prefix;
4781     state->literal_state = rdf_malloc(state->db,
4782 				      sizeof(*state->literal_state));
4783     state->lit_ex.literal = &lit;
4784     prepare_literal_ex(&state->lit_ex);
4785     rlitp = avlfindfirst(&state->db->literals, &state->lit_ex, state->literal_state);
4786     if ( rlitp )
4787     { init_cursor_from_literal(state, *rlitp);
4788     } else
4789     { free_search_state(state);
4790       return FALSE;
4791     }
4792   } else
4793   { state->cursor = state->db->table[p->indexed]
4794     				    [triple_hash(state->db, p, p->indexed)];
4795   }
4796 
4797   return TRUE;
4798 }
4799 
4800 
4801 static void
free_search_state(search_state * state)4802 free_search_state(search_state *state)
4803 { if ( state->locked )
4804   { RDUNLOCK(state->db);
4805   }
4806 
4807   free_triple(state->db, &state->pattern);
4808   if ( state->prefix )
4809     PL_unregister_atom(state->prefix);
4810   if ( state->literal_state )
4811     rdf_free(state->db, state->literal_state, sizeof(*state->literal_state));
4812   if ( state->allocated )		/* also means redo! */
4813   { dec_active_queries(state->db);
4814     rdf_free(state->db, state, sizeof(*state));
4815   }
4816 }
4817 
4818 
4819 static foreign_t
allow_retry_state(search_state * state)4820 allow_retry_state(search_state *state)
4821 { if ( !state->allocated )
4822   { search_state *copy = rdf_malloc(state->db, sizeof(*copy));
4823     *copy = *state;
4824     copy->allocated = TRUE;
4825     inc_active_queries(state->db);
4826 
4827     state = copy;
4828   }
4829 
4830   PL_retry_address(state);
4831 }
4832 
4833 
4834 /* TBD: simplify.   Maybe split for resource and literal search, as
4835    both involve mutual exclusive complications to this routine,
4836 */
4837 
4838 static int
next_search_state(search_state * state)4839 next_search_state(search_state *state)
4840 { triple *t = state->cursor;
4841   triple *p = &state->pattern;
4842 
4843 retry:
4844   for( ; t; t = t->next[p->indexed])
4845   { if ( t->is_duplicate && !state->src )
4846       continue;
4847 
4848 					/* hash-collision, skip */
4849     if ( state->literal_state )
4850     { if ( !(t->object_is_literal &&
4851 	     t->object.literal == state->literal_cursor) )
4852 	continue;
4853     }
4854 
4855     if ( match_triples(t, p, state->flags) )
4856     { term_t retpred = state->realpred ? state->realpred : state->predicate;
4857       if ( !unify_triple(state->subject, retpred, state->object,
4858 			 state->src, t, p->inversed) )
4859 	continue;
4860       if ( state->realpred && PL_is_variable(state->predicate) )
4861       { if ( !PL_unify(state->predicate, retpred) )
4862 	  return FALSE;
4863       }
4864 
4865       t=t->next[p->indexed];
4866     inv_alt:
4867       for(; t; t = t->next[p->indexed])
4868       { if ( state->literal_state )
4869 	{ if ( !(t->object_is_literal &&
4870 		 t->object.literal == state->literal_cursor) )
4871 	    continue;
4872 	}
4873 
4874 	if ( match_triples(t, p, state->flags) )
4875 	{ state->cursor = t;
4876 
4877 	  return TRUE;			/* non-deterministic */
4878 	}
4879       }
4880 
4881       if ( (state->flags & MATCH_INVERSE) && inverse_partial_triple(p) )
4882       { t = state->db->table[p->indexed][triple_hash(state->db, p, p->indexed)];
4883 	goto inv_alt;
4884       }
4885 
4886       state->cursor = NULL;		/* deterministic */
4887       return TRUE;
4888     }
4889   }
4890 
4891   if ( (state->flags & MATCH_INVERSE) && inverse_partial_triple(p) )
4892   { t = state->db->table[p->indexed][triple_hash(state->db, p, p->indexed)];
4893     goto retry;
4894   }
4895 
4896   if ( state->literal_state )
4897   { literal **litp;
4898 
4899     if ( (litp = avlfindnext(state->literal_state)) )
4900     { if ( state->prefix )
4901       { literal *lit = *litp;
4902 
4903 	if ( !match_atoms(STR_MATCH_PREFIX, state->prefix, lit->value.string) )
4904 	{ DEBUG(1,
4905 		Sdprintf("Terminated literal iteration from ");
4906 		print_literal(lit);
4907 		Sdprintf("\n"));
4908 	  return FALSE;			/* no longer a prefix */
4909 	}
4910       }
4911 
4912       init_cursor_from_literal(state, *litp);
4913       t = state->cursor;
4914 
4915       goto retry;
4916     }
4917   }
4918 
4919   return FALSE;
4920 }
4921 
4922 
4923 
4924 static foreign_t
rdf(term_t subject,term_t predicate,term_t object,term_t src,term_t realpred,control_t h,unsigned flags)4925 rdf(term_t subject, term_t predicate, term_t object,
4926     term_t src, term_t realpred, control_t h, unsigned flags)
4927 { rdf_db *db = DB;
4928   search_state *state;
4929 
4930   switch(PL_foreign_control(h))
4931   { case PL_FIRST_CALL:
4932     { search_state buf;
4933 
4934       state = &buf;
4935       memset(state, 0, sizeof(*state));
4936       state->db	       = db;
4937       state->subject   = subject;
4938       state->object    = object;
4939       state->predicate = predicate;
4940       state->src       = src;
4941       state->realpred  = realpred;
4942       state->flags     = flags;
4943 
4944       if ( !init_search_state(state) )
4945 	return FALSE;
4946 
4947       goto search;
4948     }
4949     case PL_REDO:
4950     { int rc;
4951 
4952       state = PL_foreign_context_address(h);
4953       assert(state->subject == subject);
4954 
4955     search:
4956       if ( (rc=next_search_state(state)) )
4957       { if ( state->cursor || state->literal_state )
4958 	  return allow_retry_state(state);
4959       }
4960 
4961       free_search_state(state);
4962       return rc;
4963     }
4964     case PL_CUTTED:
4965     { search_state *state = PL_foreign_context_address(h);
4966 
4967       free_search_state(state);
4968       return TRUE;
4969     }
4970     default:
4971       assert(0);
4972       return FALSE;
4973   }
4974 }
4975 
4976 
4977 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4978 rdf(Subject, Predicate, Object)
4979 
4980 Search specifications:
4981 
4982 	Predicate:
4983 
4984 		subPropertyOf(X) = P
4985 
4986 	Object:
4987 
4988 		literal(substring(X), L)
4989 		literal(word(X), L)
4990 		literal(exact(X), L)
4991 		literal(prefix(X), L)
4992 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4993 
4994 
4995 static foreign_t
rdf3(term_t subject,term_t predicate,term_t object,control_t h)4996 rdf3(term_t subject, term_t predicate, term_t object, control_t h)
4997 { return rdf(subject, predicate, object, 0, 0, h,
4998 	     MATCH_EXACT);
4999 }
5000 
5001 
5002 static foreign_t
rdf4(term_t subject,term_t predicate,term_t object,term_t src,control_t h)5003 rdf4(term_t subject, term_t predicate, term_t object,
5004      term_t src, control_t h)
5005 { return rdf(subject, predicate, object, src, 0, h,
5006 	     MATCH_EXACT|MATCH_SRC);
5007 }
5008 
5009 
5010 static foreign_t
rdf_has(term_t subject,term_t predicate,term_t object,term_t realpred,control_t h)5011 rdf_has(term_t subject, term_t predicate, term_t object,
5012 	term_t realpred, control_t h)
5013 { return rdf(subject, predicate, object, 0, realpred, h,
5014 	     MATCH_SUBPROPERTY|MATCH_INVERSE);
5015 }
5016 
5017 
5018 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5019 rdf_estimate_complexity(+S,+P,+O,-C)
5020 
5021 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
5022 
5023 static foreign_t
rdf_estimate_complexity(term_t subject,term_t predicate,term_t object,term_t complexity)5024 rdf_estimate_complexity(term_t subject, term_t predicate, term_t object,
5025 		        term_t complexity)
5026 { triple t;
5027   long c;
5028   rdf_db *db = DB;
5029   int rc;
5030 
5031   memset(&t, 0, sizeof(t));
5032   if ( (rc=get_partial_triple(db, subject, predicate, object, 0, &t)) != TRUE )
5033   { if ( rc == -1 )
5034     { return FALSE;			/* error */
5035     } else
5036     { return PL_unify_integer(complexity, 0); 	/* no predicate */
5037     }
5038   }
5039 
5040   if ( !RDLOCK(db) )
5041     return FALSE;
5042   if ( !update_hash(db) )			/* or ignore this problem? */
5043   { RDUNLOCK(db);
5044     free_triple(db, &t);
5045     return FALSE;
5046   }
5047 
5048   if ( t.indexed == BY_NONE )
5049   { c = db->created - db->erased;		/* = totale triple count */
5050 #if 0
5051   } else if ( t.indexed == BY_P )
5052   { c = t.predicate.r->triple_count;		/* must sum over children */
5053 #endif
5054   } else
5055   { c = db->counts[t.indexed][triple_hash(db, &t, t.indexed)];
5056   }
5057 
5058   rc = PL_unify_integer(complexity, c);
5059   RDUNLOCK(db);
5060   free_triple(db, &t);
5061 
5062   return rc;
5063 }
5064 
5065 
5066 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5067 current_literal(?Literals)
5068 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
5069 
5070 static foreign_t
rdf_current_literal(term_t t,control_t h)5071 rdf_current_literal(term_t t, control_t h)
5072 { rdf_db *db = DB;
5073   literal **data;
5074   avl_enum *state;
5075   int rc;
5076 
5077   switch(PL_foreign_control(h))
5078   { case PL_FIRST_CALL:
5079       if ( PL_is_variable(t) )
5080       { state = rdf_malloc(db, sizeof(*state));
5081 
5082 	RDLOCK(db);
5083 	inc_active_queries(db);
5084 	data = avlfindfirst(&db->literals, NULL, state);
5085 	goto next;
5086       } else
5087       { return FALSE;			/* TBD */
5088       }
5089     case PL_REDO:
5090       state = PL_foreign_context_address(h);
5091       data = avlfindnext(state);
5092     next:
5093       for(; data; data=avlfindnext(state))
5094       { literal *lit = *data;
5095 
5096 	if ( unify_literal(t, lit) )
5097 	{ PL_retry_address(state);
5098 	}
5099       }
5100 
5101       rc = FALSE;
5102       goto cleanup;
5103     case PL_CUTTED:
5104       rc = TRUE;
5105 
5106     cleanup:
5107       state = PL_foreign_context_address(h);
5108       avlfinddestroy(state);
5109       rdf_free(db, state, sizeof(*state));
5110       RDUNLOCK(db);
5111       dec_active_queries(db);
5112 
5113       return rc;
5114     default:
5115       assert(0);
5116       return FALSE;
5117   }
5118 }
5119 
5120 
5121 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5122 rdf_update(+Subject, +Predicate, +Object, +Action)
5123 
5124 Update a triple. Please note this is actually erase+assert as the triple
5125 needs to be updated in  the  linked   lists  while  erase simply flags a
5126 triple as `erases' without deleting it   to support queries which active
5127 choicepoints.
5128 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
5129 
5130 static int
update_triple(rdf_db * db,term_t action,triple * t)5131 update_triple(rdf_db *db, term_t action, triple *t)
5132 { term_t a = PL_new_term_ref();
5133   triple tmp, *new;
5134   int i;
5135 					/* Create copy in local memory */
5136   tmp = *t;
5137   tmp.allocated = FALSE;
5138   tmp.atoms_locked = FALSE;
5139   if ( t->object_is_literal )
5140     tmp.object.literal = copy_literal(db, t->object.literal);
5141 
5142   if ( !PL_get_arg(1, action, a) )
5143     return type_error(action, "rdf_action");
5144 
5145   if ( PL_is_functor(action, FUNCTOR_subject1) )
5146   { atom_t s;
5147 
5148     if ( !get_atom_ex(a, &s) )
5149       return FALSE;
5150     if ( tmp.subject == s )
5151       return TRUE;			/* no change */
5152 
5153     tmp.subject = s;
5154   } else if ( PL_is_functor(action, FUNCTOR_predicate1) )
5155   { predicate *p;
5156 
5157     if ( !get_predicate(db, a, &p) )
5158       return FALSE;
5159     if ( tmp.predicate.r == p )
5160       return TRUE;			/* no change */
5161 
5162     tmp.predicate.r = p;
5163   } else if ( PL_is_functor(action, FUNCTOR_object1) )
5164   { triple t2;
5165 
5166     memset(&t2, 0, sizeof(t2));
5167 
5168     if ( !get_object(db, a, &t2) )
5169     { free_triple(db, &t2);
5170       return FALSE;
5171     }
5172     if ( match_object(&t2, &tmp, MATCH_QUAL) )
5173     { free_triple(db, &t2);
5174       return TRUE;
5175     }
5176 
5177     if ( tmp.object_is_literal )
5178       free_literal(db, tmp.object.literal);
5179     if ( (tmp.object_is_literal = t2.object_is_literal) )
5180     { tmp.object.literal = t2.object.literal;
5181     } else
5182     { tmp.object.resource = t2.object.resource;
5183     }
5184   } else if ( PL_is_functor(action, FUNCTOR_graph1) )
5185   { triple t2;
5186 
5187     if ( !get_graph(a, &t2) )
5188       return FALSE;
5189     if ( t2.graph == t->graph && t2.line == t->line )
5190       return TRUE;
5191     if ( db->tr_first )
5192     { record_update_src_transaction(db, t, t2.graph, t2.line);
5193     } else
5194     { if ( t->graph )
5195 	unregister_graph(db, t);
5196       t->graph = t2.graph;
5197       t->line = t2.line;
5198       if ( t->graph )
5199 	register_graph(db, t);
5200     }
5201 
5202     return TRUE;			/* considered no change */
5203   } else
5204     return domain_error(action, "rdf_action");
5205 
5206   for(i=0; i<INDEX_TABLES; i++)
5207     tmp.next[i] = NULL;
5208 
5209   new = new_triple(db);
5210   new->subject		 = tmp.subject;
5211   new->predicate.r	 = tmp.predicate.r;
5212   if ( (new->object_is_literal = tmp.object_is_literal) )
5213   { new->object.literal = copy_literal(db, tmp.object.literal);
5214   } else
5215   { new->object.resource = tmp.object.resource;
5216   }
5217   new->graph		 = tmp.graph;
5218   new->line		 = tmp.line;
5219 
5220   free_triple(db, &tmp);
5221   lock_atoms(new);
5222 
5223   if ( db->tr_first )
5224   { record_update_transaction(db, t, new);
5225   } else
5226   { broadcast(EV_UPDATE, t, new);
5227     erase_triple_silent(db, t);
5228     link_triple_silent(db, new);
5229     db->generation++;
5230   }
5231 
5232   return TRUE;
5233 }
5234 
5235 
5236 
5237 static foreign_t
rdf_update5(term_t subject,term_t predicate,term_t object,term_t src,term_t action)5238 rdf_update5(term_t subject, term_t predicate, term_t object, term_t src,
5239 	    term_t action)
5240 { triple t, *p;
5241   int indexed = BY_SP;
5242   int done = 0;
5243   rdf_db *db = DB;
5244 
5245   memset(&t, 0, sizeof(t));
5246 
5247   if ( !get_src(src, &t) ||
5248        !get_triple(db, subject, predicate, object, &t) )
5249     return FALSE;
5250 
5251   if ( !WRLOCK(db, FALSE) )
5252   { free_triple(db, &t);
5253     return FALSE;
5254   }
5255   if ( !update_hash(db) )
5256   { WRUNLOCK(db);
5257     free_triple(db, &t);
5258     return FALSE;
5259   }
5260   p = db->table[indexed][triple_hash(db, &t, indexed)];
5261   for( ; p; p = p->next[indexed])
5262   { if ( match_triples(p, &t, MATCH_EXACT) )
5263     { if ( !update_triple(db, action, p) )
5264       { WRUNLOCK(db);
5265 	free_triple(db, &t);
5266 	return FALSE;			/* type errors */
5267       }
5268       done++;
5269     }
5270   }
5271   free_triple(db, &t);
5272   WRUNLOCK(db);
5273 
5274   return done ? TRUE : FALSE;
5275 }
5276 
5277 
5278 static foreign_t
rdf_update(term_t subject,term_t predicate,term_t object,term_t action)5279 rdf_update(term_t subject, term_t predicate, term_t object, term_t action)
5280 { return rdf_update5(subject, predicate, object, 0, action);
5281 }
5282 
5283 
5284 static foreign_t
rdf_retractall4(term_t subject,term_t predicate,term_t object,term_t src)5285 rdf_retractall4(term_t subject, term_t predicate, term_t object, term_t src)
5286 { triple t, *p;
5287   rdf_db *db = DB;
5288 
5289   memset(&t, 0, sizeof(t));
5290   switch( get_partial_triple(db, subject, predicate, object, src, &t) )
5291   { case 0:				/* no such predicate */
5292       return TRUE;
5293     case -1:				/* error */
5294       return FALSE;
5295   }
5296 
5297   if ( t.graph	)		/* speedup for rdf_retractall(_,_,_,DB) */
5298   { graph *gr = lookup_graph(db, t.graph, FALSE);
5299 
5300     if ( !gr || gr->triple_count == 0 )
5301       return TRUE;
5302   }
5303 
5304   if ( !WRLOCK(db, FALSE) )
5305     return FALSE;
5306 /*			No need, as we do not search with subPropertyOf
5307   if ( !update_hash(db) )
5308   { WRUNLOCK(db);
5309     return FALSE;
5310   }
5311 */
5312   p = db->table[t.indexed][triple_hash(db, &t, t.indexed)];
5313   for( ; p; p = p->next[t.indexed])
5314   { if ( match_triples(p, &t, MATCH_EXACT|MATCH_SRC) )
5315     { if ( t.object_is_literal && t.object.literal->objtype == OBJ_TERM )
5316       { fid_t fid = PL_open_foreign_frame();
5317 	int rc = unify_object(object, p);
5318 	PL_discard_foreign_frame(fid);
5319 	if ( !rc )
5320 	  continue;
5321       }
5322 
5323       if ( db->tr_first )
5324       { if ( db->tr_reset )
5325 	{ WRUNLOCK(db);
5326 	  return permission_error("retract", "triple", "",
5327 				  "rdf_retractall cannot follow "
5328 				  "rdf_reset_db in one transaction");
5329 	}
5330 	record_transaction(db, TR_RETRACT, p);
5331       } else
5332       { erase_triple(db, p);
5333 	db->generation++;
5334       }
5335     }
5336   }
5337 
5338   WRUNLOCK(db);
5339   free_triple(db, &t);
5340 
5341   return TRUE;
5342 }
5343 
5344 
5345 static foreign_t
rdf_retractall3(term_t subject,term_t predicate,term_t object)5346 rdf_retractall3(term_t subject, term_t predicate, term_t object)
5347 { return rdf_retractall4(subject, predicate, object, 0);
5348 }
5349 
5350 
5351 		 /*******************************
5352 		 *	     MONITOR		*
5353 		 *******************************/
5354 
5355 typedef struct broadcast_callback
5356 { struct broadcast_callback *next;
5357   predicate_t		     pred;
5358   long			     mask;
5359 } broadcast_callback;
5360 
5361 static long joined_mask = 0L;
5362 static broadcast_callback *callback_list;
5363 static broadcast_callback *callback_tail;
5364 
5365 static void
do_broadcast(term_t term,long mask)5366 do_broadcast(term_t term, long mask)
5367 { if ( callback_list )
5368   { broadcast_callback *cb;
5369 
5370     for(cb = callback_list; cb; cb = cb->next)
5371     { qid_t qid;
5372       term_t ex;
5373 
5374       if ( !(cb->mask & mask) )
5375 	continue;
5376 
5377       qid = PL_open_query(NULL, PL_Q_CATCH_EXCEPTION, cb->pred, term);
5378       if ( !PL_next_solution(qid) && (ex = PL_exception(qid)) )
5379       { term_t av = PL_new_term_refs(2);
5380 
5381 	PL_cut_query(qid);
5382 
5383 	PL_put_atom(av+0, ATOM_error);
5384 	PL_put_term(av+1, ex);
5385 
5386 	PL_call_predicate(NULL, PL_Q_NORMAL,
5387 			  PL_predicate("print_message", 2, "user"),
5388 			  av);
5389       } else
5390       { PL_close_query(qid);
5391       }
5392     }
5393   }
5394 }
5395 
5396 
5397 /* No longer used, but we keep it for if we need it again
5398 static foreign_t
5399 rdf_broadcast(term_t term, term_t mask)
5400 { long msk;
5401 
5402   if ( !get_long_ex(mask, &msk) )
5403     return FALSE;
5404 
5405   do_broadcast(term, msk);
5406   return TRUE;
5407 }
5408 */
5409 
5410 static int
broadcast(broadcast_id id,void * a1,void * a2)5411 broadcast(broadcast_id id, void *a1, void *a2)
5412 { if ( (joined_mask & id) )
5413   { fid_t fid;
5414     term_t term;
5415     functor_t funct;
5416 
5417     if ( !(fid = PL_open_foreign_frame()) ||
5418 	 !(term = PL_new_term_ref()) )
5419       return FALSE;
5420 
5421     switch(id)
5422     { case EV_ASSERT:
5423       case EV_ASSERT_LOAD:
5424 	funct = FUNCTOR_assert4;
5425         goto assert_retract;
5426       case EV_RETRACT:
5427 	funct = FUNCTOR_retract4;
5428       assert_retract:
5429       { triple *t = a1;
5430 	term_t tmp;
5431 
5432 	if ( !(tmp = PL_new_term_refs(4)) ||
5433 	     !PL_put_atom(tmp+0, t->subject) ||
5434 	     !PL_put_atom(tmp+1, t->predicate.r->name) ||
5435 	     !unify_object(tmp+2, t) ||
5436 	     !unify_graph(tmp+3, t) ||
5437 	     !PL_cons_functor_v(term, funct, tmp) )
5438 	  return FALSE;
5439 	break;
5440       }
5441       case EV_UPDATE:
5442       { triple *t = a1;
5443 	triple *new = a2;
5444 	term_t tmp, a;
5445 	functor_t action;
5446 	int rc;
5447 
5448 	if ( !(tmp = PL_new_term_refs(5)) ||
5449 	     !(a = PL_new_term_ref()) ||
5450 	     !PL_put_atom(tmp+0, t->subject) ||
5451 	     !PL_put_atom(tmp+1, t->predicate.r->name) ||
5452 	     !unify_object(tmp+2, t) ||
5453 	     !unify_graph(tmp+3, t) )
5454 	  return FALSE;
5455 
5456 	if ( t->subject != new->subject )
5457 	{ action = FUNCTOR_subject1;
5458 	  rc = PL_put_atom(a, new->subject);
5459 	} else if ( t->predicate.r != new->predicate.r )
5460 	{ action = FUNCTOR_predicate1;
5461 	  rc = PL_put_atom(a, new->predicate.r->name);
5462 	} else if ( !match_object(t, new, MATCH_QUAL) )
5463 	{ action = FUNCTOR_object1;
5464 	  rc = unify_object(a, new);
5465 	} else if ( !same_graph(t, new) )
5466 	{ action = FUNCTOR_graph1;
5467 	  rc = unify_graph(a, new);
5468 	} else
5469 	{ return TRUE;			/* no change */
5470 	}
5471 
5472         if ( !rc ||
5473 	     !PL_cons_functor_v(tmp+4, action, a) ||
5474 	     !PL_cons_functor_v(term, FUNCTOR_update5, tmp) )
5475 	  return FALSE;
5476 	break;
5477       }
5478       case EV_NEW_LITERAL:
5479       { literal *lit = a1;
5480 	term_t tmp;
5481 
5482 	if ( !(tmp = PL_new_term_refs(1)) ||
5483 	     !unify_literal(tmp, lit) ||
5484 	     !PL_cons_functor_v(term, FUNCTOR_new_literal1, tmp) )
5485 	  return FALSE;
5486 	break;
5487       }
5488       case EV_OLD_LITERAL:
5489       { literal *lit = a1;
5490 	term_t tmp;
5491 
5492 	if ( !(tmp = PL_new_term_refs(1)) ||
5493 	     !unify_literal(tmp, lit) ||
5494 	     !PL_cons_functor_v(term, FUNCTOR_old_literal1, tmp) )
5495 	  return FALSE;
5496 	break;
5497       }
5498       case EV_LOAD:
5499       { term_t ctx = (term_t)a1;
5500 	atom_t be  = (atom_t)a2;
5501 	term_t tmp;
5502 
5503 	if ( !(tmp = PL_new_term_refs(2)) ||
5504 	     !PL_put_atom(tmp+0, be) ||		/* begin/end */
5505 	     !PL_put_term(tmp+1, ctx) ||
5506 	     !PL_cons_functor_v(term, FUNCTOR_load2, tmp) )
5507 	  return FALSE;
5508 	break;
5509       }
5510       case EV_TRANSACTION:
5511       { term_t ctx = (term_t)a1;
5512 	term_t be  = (term_t)a2;
5513 	term_t tmp;
5514 
5515 	if ( !(tmp = PL_new_term_refs(2)) ||
5516 	     !PL_put_term(tmp+0, be) ||		/* begin/end */
5517 	     !PL_put_term(tmp+1, ctx) ||
5518 	     !PL_cons_functor_v(term, FUNCTOR_transaction2, tmp) )
5519 	  return FALSE;
5520 	break;
5521       }
5522       case EV_REHASH:
5523       { atom_t be = (atom_t)a1;
5524 	term_t tmp = PL_new_term_refs(1);
5525 
5526 	if ( !(tmp = PL_new_term_refs(1)) ||
5527 	     !PL_put_atom(tmp+0, be) ||
5528 	     !PL_cons_functor_v(term, FUNCTOR_rehash1, tmp) )
5529 	  return FALSE;
5530 	break;
5531       }
5532       default:
5533 	assert(0);
5534     }
5535 
5536     do_broadcast(term, id);
5537 
5538     PL_discard_foreign_frame(fid);
5539   }
5540 
5541   return TRUE;
5542 }
5543 
5544 
5545 static foreign_t
rdf_monitor(term_t goal,term_t mask)5546 rdf_monitor(term_t goal, term_t mask)
5547 { atom_t name;
5548   broadcast_callback *cb;
5549   predicate_t p;
5550   long msk;
5551   module_t m = NULL;
5552 
5553   PL_strip_module(goal, &m, goal);
5554 
5555   if ( !get_atom_ex(goal, &name) ||
5556        !get_long_ex(mask, &msk) )
5557     return FALSE;
5558 
5559   p = PL_pred(PL_new_functor(name, 1), m);
5560 
5561   for(cb=callback_list; cb; cb = cb->next)
5562   { if ( cb->pred == p )
5563     { broadcast_callback *cb2;
5564       cb->mask = msk;
5565 
5566       joined_mask = 0L;
5567       for(cb2=callback_list; cb2; cb2 = cb2->next)
5568 	joined_mask |= cb2->mask;
5569       DEBUG(2, Sdprintf("Set mask to 0x%x\n", joined_mask));
5570 
5571       return TRUE;
5572     }
5573   }
5574 
5575   cb = PL_malloc(sizeof(*cb));
5576   cb->next = NULL;
5577   cb->mask = msk;
5578   cb->pred = p;
5579   if ( callback_list )
5580   { callback_tail->next = cb;
5581     callback_tail = cb;
5582   } else
5583   { callback_list = callback_tail = cb;
5584   }
5585   joined_mask |= msk;
5586 
5587   return TRUE;
5588 }
5589 
5590 
5591 
5592 		 /*******************************
5593 		 *	       QUERY		*
5594 		 *******************************/
5595 
5596 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5597 Enumerate the known subjects. This uses the   `first' flag on triples to
5598 avoid returning the same resource multiple   times.  As the `by_none' is
5599 never re-hashed, we don't mark this query in the `active_queries'.
5600 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
5601 
5602 static foreign_t
rdf_subject(term_t subject,control_t h)5603 rdf_subject(term_t subject, control_t h)
5604 { triple *t;
5605   rdf_db *db = DB;
5606 
5607   switch(PL_foreign_control(h))
5608   { case PL_FIRST_CALL:
5609     { if ( PL_is_variable(subject) )
5610       { t = db->table[BY_NONE][0];
5611 	goto next;
5612       } else
5613       { atom_t a;
5614 
5615 	if ( get_atom_ex(subject, &a) )
5616 	{ if ( first(db, a) )
5617 	    return TRUE;
5618 	  return FALSE;
5619 	}
5620 
5621 	return FALSE;
5622       }
5623     }
5624     case PL_REDO:
5625       t = PL_foreign_context_address(h);
5626     next:
5627       for(; t; t = t->next[BY_NONE])
5628       { if ( t->first && !t->erased )
5629 	{ if ( !PL_unify_atom(subject, t->subject) )
5630 	    return FALSE;
5631 
5632 	  t = t->next[BY_NONE];
5633 	  if ( t )
5634 	    PL_retry_address(t);
5635 	  return TRUE;
5636 	}
5637       }
5638       return FALSE;
5639     case PL_CUTTED:
5640       return TRUE;
5641     default:
5642       assert(0);
5643       return FALSE;
5644   }
5645 }
5646 
5647 
5648 static foreign_t
rdf_set_predicate(term_t pred,term_t option)5649 rdf_set_predicate(term_t pred, term_t option)
5650 { predicate *p;
5651   rdf_db *db = DB;
5652 
5653   if ( !get_predicate(db, pred, &p) )
5654     return FALSE;
5655 
5656   if ( PL_is_functor(option, FUNCTOR_symmetric1) )
5657   { int val;
5658 
5659     if ( !get_bool_arg_ex(1, option, &val) )
5660       return FALSE;
5661 
5662     p->inverse_of = p;
5663     return TRUE;
5664   } else if ( PL_is_functor(option, FUNCTOR_inverse_of1) )
5665   { term_t a = PL_new_term_ref();
5666     predicate *i;
5667 
5668     _PL_get_arg(1, option, a);
5669     if ( PL_get_nil(a) )
5670     { if ( p->inverse_of )
5671       { p->inverse_of->inverse_of = NULL;
5672 	p->inverse_of = NULL;
5673       }
5674     } else
5675     { if ( !get_predicate(db, a, &i) )
5676 	return FALSE;
5677 
5678       p->inverse_of = i;
5679       i->inverse_of = p;
5680     }
5681     return TRUE;
5682   } else if ( PL_is_functor(option, FUNCTOR_transitive1) )
5683   { int val;
5684 
5685     if ( !get_bool_arg_ex(1, option, &val) )
5686       return FALSE;
5687 
5688     p->transitive = val;
5689 
5690     return TRUE;
5691   } else
5692     return type_error(option, "predicate_option");
5693 }
5694 
5695 
5696 #define PRED_PROPERTY_COUNT 9
5697 static functor_t predicate_key[PRED_PROPERTY_COUNT];
5698 
5699 static int
unify_predicate_property(rdf_db * db,predicate * p,term_t option,functor_t f)5700 unify_predicate_property(rdf_db *db, predicate *p, term_t option, functor_t f)
5701 { if ( f == FUNCTOR_symmetric1 )
5702     return PL_unify_term(option, PL_FUNCTOR, f,
5703 			 PL_BOOL, p->inverse_of == p ? TRUE : FALSE);
5704   else if ( f == FUNCTOR_inverse_of1 )
5705   { if ( p->inverse_of )
5706       return PL_unify_term(option, PL_FUNCTOR, f,
5707 			   PL_ATOM, p->inverse_of->name);
5708     else
5709       return FALSE;
5710   } else if ( f == FUNCTOR_transitive1 )
5711   { return PL_unify_term(option, PL_FUNCTOR, f,
5712 			 PL_BOOL, p->transitive);
5713   } else if ( f == FUNCTOR_triples1 )
5714   { return PL_unify_term(option, PL_FUNCTOR, f,
5715 			 PL_LONG, p->triple_count);
5716   } else if ( f == FUNCTOR_rdf_subject_branch_factor1 )
5717   { return PL_unify_term(option, PL_FUNCTOR, f,
5718 		 PL_FLOAT, subject_branch_factor(db, p, DISTINCT_DIRECT));
5719   } else if ( f == FUNCTOR_rdf_object_branch_factor1 )
5720   { return PL_unify_term(option, PL_FUNCTOR, f,
5721 		 PL_FLOAT, object_branch_factor(db, p, DISTINCT_DIRECT));
5722   } else if ( f == FUNCTOR_rdfs_subject_branch_factor1 )
5723   { return PL_unify_term(option, PL_FUNCTOR, f,
5724 		 PL_FLOAT, subject_branch_factor(db, p, DISTINCT_SUB));
5725   } else if ( f == FUNCTOR_rdfs_object_branch_factor1 )
5726   { return PL_unify_term(option, PL_FUNCTOR, f,
5727 		 PL_FLOAT, object_branch_factor(db, p, DISTINCT_SUB));
5728   } else
5729   { assert(0);
5730     return FALSE;
5731   }
5732 }
5733 
5734 
5735 static foreign_t
rdf_current_predicates(term_t preds)5736 rdf_current_predicates(term_t preds)
5737 { rdf_db *db = DB;
5738   int i;
5739   term_t head = PL_new_term_ref();
5740   term_t tail = PL_copy_term_ref(preds);
5741 
5742   LOCK_MISC(db);
5743   for(i=0; i<db->pred_table_size; i++)
5744   { predicate *p;
5745 
5746     for(p=db->pred_table[i]; p; p = p->next)
5747     { if ( !PL_unify_list(tail, head, tail) ||
5748 	   !PL_unify_atom(head, p->name) )
5749       { UNLOCK_MISC(db);
5750 	return FALSE;
5751       }
5752     }
5753   }
5754   UNLOCK_MISC(db);
5755 
5756   return PL_unify_nil(tail);
5757 }
5758 
5759 
5760 static foreign_t
rdf_predicate_property(term_t pred,term_t option,control_t h)5761 rdf_predicate_property(term_t pred, term_t option, control_t h)
5762 { int n;
5763   predicate *p;
5764   rdf_db *db = DB;
5765 
5766   if ( !predicate_key[0] )
5767   { int i = 0;
5768 
5769     predicate_key[i++] = FUNCTOR_symmetric1;
5770     predicate_key[i++] = FUNCTOR_inverse_of1;
5771     predicate_key[i++] = FUNCTOR_transitive1;
5772     predicate_key[i++] = FUNCTOR_triples1;
5773     predicate_key[i++] = FUNCTOR_rdf_subject_branch_factor1;
5774     predicate_key[i++] = FUNCTOR_rdf_object_branch_factor1;
5775     predicate_key[i++] = FUNCTOR_rdfs_subject_branch_factor1;
5776     predicate_key[i++] = FUNCTOR_rdfs_object_branch_factor1;
5777     assert(i < PRED_PROPERTY_COUNT);
5778   }
5779 
5780   switch(PL_foreign_control(h))
5781   { case PL_FIRST_CALL:
5782     { functor_t f;
5783 
5784       if ( PL_is_variable(option) )
5785       { n = 0;
5786 	goto redo;
5787       } else if ( PL_get_functor(option, &f) )
5788       { for(n=0; predicate_key[n]; n++)
5789 	{ if ( predicate_key[n] == f )
5790 	  { if ( !get_predicate(db, pred, &p) )
5791 	      return FALSE;
5792 	    return unify_predicate_property(db, p, option, f);
5793 	  }
5794 	}
5795 	return domain_error(option, "rdf_predicate_property");
5796       } else
5797 	return type_error(option, "rdf_predicate_property");
5798     }
5799     case PL_REDO:
5800       n = (int)PL_foreign_context(h);
5801     redo:
5802       if ( !get_predicate(db, pred, &p) )
5803 	return FALSE;
5804       for( ; predicate_key[n]; n++ )
5805       { if ( unify_predicate_property(db, p, option, predicate_key[n]) )
5806 	{ n++;
5807 	  if ( predicate_key[n] )
5808 	    PL_retry(n);
5809 	  return TRUE;
5810 	}
5811       }
5812       return FALSE;
5813     case PL_CUTTED:
5814       return TRUE;
5815     default:
5816       assert(0);
5817       return TRUE;
5818   }
5819 }
5820 
5821 
5822 		 /*******************************
5823 		 *     TRANSITIVE RELATIONS	*
5824 		 *******************************/
5825 
5826 typedef struct visited
5827 { struct visited *next;			/* next in list */
5828   struct visited *hash_link;		/* next in hashed link */
5829   atom_t resource;			/* visited resource */
5830   uintptr_t distance;			/* Distance */
5831 } visited;
5832 
5833 
5834 #define AGENDA_LOCAL_MAGIC 742736360
5835 #define AGENDA_SAVED_MAGIC 742736362
5836 
5837 typedef struct agenda
5838 { visited *head;			/* visited list */
5839   visited *tail;			/* tail of visited list */
5840   visited *to_expand;			/* next to expand */
5841   visited *to_return;			/* next to return */
5842   visited **hash;			/* hash-table for cycle detection */
5843   int	  magic;			/* AGENDA_*_MAGIC */
5844   int	  hash_size;
5845   int     size;				/* size of the agenda */
5846   uintptr_t max_d;			/* max distance */
5847   triple  pattern;			/* partial triple used as pattern */
5848   atom_t  target;			/* resource we are seaching for */
5849   struct chunk  *chunk;			/* node-allocation chunks */
5850 } agenda;
5851 
5852 #ifndef offsetof
5853 #define offsetof(structure, field) ((size_t) &(((structure *)NULL)->field))
5854 #endif
5855 #define CHUNK_SIZE(n) offsetof(chunk, nodes[n])
5856 
5857 typedef struct chunk
5858 { struct chunk *next;
5859   int	 used;				/* # used elements */
5860   int	 size;				/* size of the chunk */
5861   struct visited nodes[1];		/* nodes in the chunk */
5862 } chunk;
5863 
5864 
5865 static visited *
alloc_node_agenda(rdf_db * db,agenda * a)5866 alloc_node_agenda(rdf_db *db, agenda *a)
5867 { chunk *c;
5868   int size;
5869 
5870   if ( (c=a->chunk) )
5871   { if ( c->used < c->size )
5872     { visited *v = &c->nodes[c->used++];
5873 
5874       return v;
5875     }
5876   }
5877 
5878   size = (a->size == 0 ? 8 : 1024);
5879   c = rdf_malloc(db, CHUNK_SIZE(size));
5880   c->size = size;
5881   c->used = 1;
5882   c->next = a->chunk;
5883   a->chunk = c;
5884 
5885   return &c->nodes[0];
5886 }
5887 
5888 
5889 static void
empty_agenda(rdf_db * db,agenda * a)5890 empty_agenda(rdf_db *db, agenda *a)
5891 { chunk *c, *n;
5892 
5893   for(c=a->chunk; c; c = n)
5894   { n = c->next;
5895     rdf_free(db, c, CHUNK_SIZE(c->size));
5896   }
5897   if ( a->hash )
5898     rdf_free(db, a->hash, sizeof(visited*)*a->hash_size);
5899 
5900   if ( a->magic == AGENDA_SAVED_MAGIC )
5901   {  a->magic = 0;
5902      rdf_free(db, a, sizeof(*a));
5903   } else
5904   { a->magic = 0;
5905   }
5906 }
5907 
5908 
5909 static void
unlock_and_empty_agenda(rdf_db * db,agenda * a)5910 unlock_and_empty_agenda(rdf_db *db, agenda *a)
5911 { RDUNLOCK(db);
5912   empty_agenda(db, a);
5913 }
5914 
5915 
5916 static agenda *
save_agenda(rdf_db * db,agenda * a)5917 save_agenda(rdf_db *db, agenda *a)
5918 { agenda *r = rdf_malloc(db, sizeof(*r));
5919 
5920   assert(a->magic == AGENDA_LOCAL_MAGIC);
5921   *r = *a;
5922   r->magic = AGENDA_SAVED_MAGIC;
5923 
5924   return r;
5925 }
5926 
5927 
5928 static void
hash_agenda(rdf_db * db,agenda * a,int size)5929 hash_agenda(rdf_db *db, agenda *a, int size)
5930 { if ( a->hash )
5931     rdf_free(db, a->hash, sizeof(*a->hash));
5932   if ( size > 0 )
5933   { visited *v;
5934 
5935     a->hash = rdf_malloc(db, sizeof(visited*)*size);
5936     memset(a->hash, 0, sizeof(visited*)*size);
5937     a->hash_size = size;
5938 
5939     for(v=a->head; v; v = v->next)
5940     { int key = atom_hash(v->resource)&(size-1);
5941 
5942       v->hash_link = a->hash[key];
5943       a->hash[key] = v;
5944     }
5945   }
5946 }
5947 
5948 
5949 static int
in_aganda(agenda * a,atom_t resource)5950 in_aganda(agenda *a, atom_t resource)
5951 { visited *v;
5952 
5953   if ( a->hash )
5954   { int key = atom_hash(resource)&(a->hash_size-1);
5955     v = a->hash[key];
5956 
5957     for( ; v; v = v->hash_link )
5958     { if ( v->resource == resource )
5959 	return TRUE;
5960     }
5961   } else
5962   { v = a->head;
5963 
5964     for( ; v; v = v->next )
5965     { if ( v->resource == resource )
5966 	return TRUE;
5967     }
5968   }
5969 
5970   return FALSE;
5971 }
5972 
5973 
5974 static visited *
append_agenda(rdf_db * db,agenda * a,atom_t res,uintptr_t d)5975 append_agenda(rdf_db *db, agenda *a, atom_t res, uintptr_t d)
5976 { visited *v = a->head;
5977 
5978   if ( in_aganda(a, res) )
5979     return NULL;
5980 
5981   db->agenda_created++;			/* statistics */
5982 
5983   a->size++;
5984   if ( !a->hash_size && a->size > 32 )
5985     hash_agenda(db, a, 64);
5986   else if ( a->size > a->hash_size * 4 )
5987     hash_agenda(db, a, a->hash_size * 4);
5988 
5989   v = alloc_node_agenda(db, a);
5990   v->resource = res;
5991   v->distance = d;
5992   v->next = NULL;
5993   if ( a->tail )
5994   { a->tail->next = v;
5995     a->tail = v;
5996   } else
5997   { a->head = a->tail = v;
5998   }
5999 
6000   if ( a->hash_size )
6001   { int key = atom_hash(res)&(a->hash_size-1);
6002 
6003     v->hash_link = a->hash[key];
6004     a->hash[key] = v;
6005   }
6006 
6007   return v;
6008 }
6009 
6010 
6011 static int
can_reach_target(rdf_db * db,agenda * a)6012 can_reach_target(rdf_db *db, agenda *a)
6013 { int indexed = a->pattern.indexed;
6014   int rc = FALSE;
6015   triple *p;
6016 
6017   if ( indexed & BY_S )			/* subj ---> */
6018   { a->pattern.object.resource = a->target;
6019     indexed |= BY_O;
6020   } else
6021   { a->pattern.subject = a->target;
6022     indexed |= BY_S;
6023   }
6024 
6025   p = db->table[indexed][triple_hash(db, &a->pattern, indexed)];
6026   for( ; p; p = p->next[indexed])
6027   { if ( match_triples(p, &a->pattern, MATCH_SUBPROPERTY) )
6028     { rc = TRUE;
6029       break;
6030     }
6031   }
6032 
6033   if ( a->pattern.indexed & BY_S )
6034   { a->pattern.object.resource = 0;
6035   } else
6036   { a->pattern.subject = 0;
6037   }
6038 
6039   return rc;
6040 }
6041 
6042 
6043 
6044 static visited *
bf_expand(rdf_db * db,agenda * a,atom_t resource,uintptr_t d)6045 bf_expand(rdf_db *db, agenda *a, atom_t resource, uintptr_t d)
6046 { triple *p;
6047   int indexed = a->pattern.indexed;
6048   visited *rc = NULL;
6049 
6050   if ( indexed & BY_S )			/* subj ---> */
6051   { a->pattern.subject = resource;
6052   } else
6053   { a->pattern.object.resource = resource;
6054   }
6055 
6056   if ( a->target && can_reach_target(db, a) )
6057   { return append_agenda(db, a, a->target, d);
6058   }
6059 
6060   p = db->table[indexed][triple_hash(db, &a->pattern, indexed)];
6061   for( ; p; p = p->next[indexed])
6062   { if ( match_triples(p, &a->pattern, MATCH_SUBPROPERTY) )
6063     { atom_t found;
6064       visited *v;
6065 
6066       if ( indexed & BY_S )
6067       { if ( p->object_is_literal )
6068 	  continue;
6069 	found = p->object.resource;
6070       } else
6071       { found = p->subject;
6072       }
6073 
6074       v = append_agenda(db, a, found, d);
6075       if ( !rc )
6076 	rc = v;
6077       if ( found == a->target )
6078 	break;
6079     }
6080   }
6081 					/* TBD: handle owl:inverseOf */
6082 					/* TBD: handle owl:sameAs */
6083   return rc;
6084 }
6085 
6086 
6087 static visited *
next_agenda(rdf_db * db,agenda * a)6088 next_agenda(rdf_db *db, agenda *a)
6089 { visited *v;
6090 
6091   if ( (v=a->to_return) )
6092   { ok:
6093 
6094     a->to_return = a->to_return->next;
6095 
6096     return v;
6097   }
6098 
6099   while( a->to_expand )
6100   { uintptr_t next_d = a->to_expand->distance+1;
6101 
6102     if ( next_d >= a->max_d )
6103       return NULL;
6104 
6105     a->to_return = bf_expand(db, a,
6106 			     a->to_expand->resource,
6107 			     next_d);
6108     a->to_expand = a->to_expand->next;
6109 
6110     if ( (v=a->to_return) )
6111       goto ok;
6112   }
6113 
6114   return NULL;
6115 }
6116 
6117 
6118 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6119 rdf_reachable(+Subject, +Predicate, -Object)
6120 rdf_reachable(-Subject, +Predicate, ?Object)
6121     Examine transitive relations, reporting all `Object' that can be
6122     reached from `Subject' using Predicate without going into a loop
6123     if the relation is cyclic.
6124 
6125 directly_attached() deals with the posibility that  the predicate is not
6126 defined and Subject and Object are  the   same.  Should  use clean error
6127 handling, but that means a lot of changes. For now this will do.
6128 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6129 
6130 static int
directly_attached(term_t pred,term_t from,term_t to)6131 directly_attached(term_t pred, term_t from, term_t to)
6132 { if ( PL_is_atom(pred) && PL_is_atom(from) )
6133     return PL_unify(to, from);
6134 
6135   return FALSE;
6136 }
6137 
6138 
6139 static int
unify_distance(term_t d,uintptr_t dist)6140 unify_distance(term_t d, uintptr_t dist)
6141 { if ( d )
6142     return PL_unify_integer(d, dist);
6143 
6144   return TRUE;
6145 }
6146 
6147 
6148 static foreign_t
rdf_reachable(term_t subj,term_t pred,term_t obj,term_t max_d,term_t d,control_t h)6149 rdf_reachable(term_t subj, term_t pred, term_t obj,
6150 	      term_t max_d, term_t d,
6151 	      control_t h)
6152 { rdf_db *db = DB;
6153 
6154   switch(PL_foreign_control(h))
6155   { case PL_FIRST_CALL:
6156     { agenda a;
6157       visited *v;
6158       term_t target_term;
6159       int is_det = FALSE;
6160 
6161       if ( PL_is_variable(pred) )
6162 	return instantiation_error(pred);
6163 
6164       memset(&a, 0, sizeof(a));
6165       a.magic = AGENDA_LOCAL_MAGIC;
6166       if ( max_d )
6167       { long md;
6168 	atom_t inf;
6169 
6170 	if ( PL_get_atom(max_d, &inf) && inf == ATOM_infinite )
6171 	  a.max_d = (uintptr_t)-1;
6172 	if ( !get_long_ex(max_d, &md) || md < 0 )
6173 	  return FALSE;
6174 	a.max_d = md;
6175       } else
6176       { a.max_d = (uintptr_t)-1;
6177       }
6178 
6179       if ( !PL_is_variable(subj) )		/* subj .... obj */
6180       { switch(get_partial_triple(db, subj, pred, 0, 0, &a.pattern))
6181 	{ case 0:
6182 	    return directly_attached(pred, subj, obj) &&
6183 		   unify_distance(d, 0);
6184 	  case -1:
6185 	    return FALSE;
6186 	}
6187 	is_det = PL_is_ground(obj);
6188 	target_term = obj;
6189       } else if ( !PL_is_variable(obj) )	/* obj .... subj */
6190       {	switch(get_partial_triple(db, 0, pred, obj, 0, &a.pattern))
6191 	{ case 0:
6192 	    return directly_attached(pred, obj, subj);
6193 	  case -1:
6194 	    return FALSE;
6195 	}
6196 	if ( a.pattern.object_is_literal )
6197 	  return FALSE;			/* rdf_reachable(-,+,literal(...)) */
6198 	target_term = subj;
6199       } else
6200 	return instantiation_error(subj);
6201 
6202       if ( !RDLOCK(db) )
6203 	return FALSE;
6204       if ( !update_hash(db) )
6205 	return FALSE;
6206       if ( (a.pattern.indexed & BY_S) ) 	/* subj ... */
6207 	append_agenda(db, &a, a.pattern.subject, 0);
6208       else
6209 	append_agenda(db, &a, a.pattern.object.resource, 0);
6210       a.to_return = a.head;
6211       a.to_expand = a.head;
6212 
6213       while( (v=next_agenda(db, &a)) )
6214       { if ( PL_unify_atom(target_term, v->resource) )
6215 	{ if ( is_det )		/* mode(+, +, +) */
6216 	  { int rc = unify_distance(d, v->distance);
6217 	    unlock_and_empty_agenda(db, &a);
6218 	    return rc;
6219 	  } else if ( unify_distance(d, v->distance) )
6220 	  {				/* mode(+, +, -) or mode(-, +, +) */
6221 	    agenda *ra = save_agenda(db, &a);
6222 	    inc_active_queries(db);
6223 	    DEBUG(9, Sdprintf("Saved agenta to %p\n", ra));
6224 	    PL_retry_address(ra);
6225 	  }
6226 	}
6227       }
6228       unlock_and_empty_agenda(db, &a);
6229       return FALSE;
6230     }
6231     case PL_REDO:
6232     { agenda *a = PL_foreign_context_address(h);
6233       term_t target_term;
6234       visited *v;
6235 
6236       assert(a->magic == AGENDA_SAVED_MAGIC);
6237 
6238       if ( !PL_is_variable(subj) )	/* +, +, - */
6239 	target_term = obj;
6240       else
6241 	target_term = subj;		/* -, +, + */
6242 
6243       while( (v=next_agenda(db, a)) )
6244       { if ( PL_unify_atom(target_term, v->resource) &&
6245 	     unify_distance(d, v->distance) )
6246 	{ assert(a->magic == AGENDA_SAVED_MAGIC);
6247 	  PL_retry_address(a);
6248 	}
6249       }
6250 
6251       dec_active_queries(db);
6252       unlock_and_empty_agenda(db, a);
6253       return FALSE;
6254     }
6255     case PL_CUTTED:
6256     { agenda *a = PL_foreign_context_address(h);
6257 
6258       DEBUG(9, Sdprintf("Cutted; agenda = %p\n", a));
6259 
6260       assert(a->magic == AGENDA_SAVED_MAGIC);
6261 
6262       dec_active_queries(db);
6263       unlock_and_empty_agenda(db, a);
6264       return TRUE;
6265     }
6266     default:
6267       assert(0);
6268       return FALSE;
6269   }
6270 }
6271 
6272 static foreign_t
rdf_reachable3(term_t subj,term_t pred,term_t obj,control_t h)6273 rdf_reachable3(term_t subj, term_t pred, term_t obj, control_t h)
6274 { return rdf_reachable(subj, pred, obj, 0, 0, h);
6275 }
6276 
6277 static foreign_t
rdf_reachable5(term_t subj,term_t pred,term_t obj,term_t max_d,term_t d,control_t h)6278 rdf_reachable5(term_t subj, term_t pred, term_t obj, term_t max_d, term_t d,
6279 	       control_t h)
6280 { return rdf_reachable(subj, pred, obj, max_d, d, h);
6281 }
6282 
6283 
6284 		 /*******************************
6285 		 *	     STATISTICS		*
6286 		 *******************************/
6287 
6288 static functor_t keys[16];		/* initialised in install_rdf_db() */
6289 
6290 static int
unify_statistics(rdf_db * db,term_t key,functor_t f)6291 unify_statistics(rdf_db *db, term_t key, functor_t f)
6292 { int64_t v;
6293 
6294   if ( f == FUNCTOR_triples1 )
6295   { v = db->created - db->erased;
6296   } else if ( f == FUNCTOR_subjects1 )
6297   { v = db->subjects;
6298   } else if ( f == FUNCTOR_predicates1 )
6299   { v = db->pred_count;
6300   } else if ( f == FUNCTOR_core1 )
6301   { v = db->core;
6302   } else if ( f == FUNCTOR_indexed8 )
6303   { int i;
6304     term_t a = PL_new_term_ref();
6305 
6306     if ( !PL_unify_functor(key, FUNCTOR_indexed8) )
6307       return FALSE;
6308     for(i=0; i<8; i++)
6309     { if ( !PL_get_arg(i+1, key, a) ||
6310 	   !PL_unify_integer(a, db->indexed[i]) )
6311 	return FALSE;
6312     }
6313 
6314     return TRUE;
6315   } else if ( f == FUNCTOR_searched_nodes1 )
6316   { v = db->agenda_created;
6317   } else if ( f == FUNCTOR_duplicates1 )
6318   { v = db->duplicates;
6319   } else if ( f == FUNCTOR_literals1 )
6320   { v = db->literals.count;
6321   } else if ( f == FUNCTOR_triples2 && PL_is_functor(key, f) )
6322   { graph *src;
6323     term_t a = PL_new_term_ref();
6324     atom_t name;
6325 
6326     _PL_get_arg(1, key, a);
6327     if ( !PL_get_atom(a, &name) )
6328       return type_error(a, "atom");
6329     if ( (src = lookup_graph(db, name, FALSE)) )
6330       v = src->triple_count;
6331     else
6332       v = 0;
6333 
6334     _PL_get_arg(2, key, a);
6335     return PL_unify_int64(a, v);
6336   } else if ( f == FUNCTOR_gc2 )
6337   { return PL_unify_term(key,
6338 			 PL_FUNCTOR, f,
6339 			   PL_INT, db->gc_count,
6340 			   PL_FLOAT, db->gc_time); 	/* time spent */
6341   } else if ( f == FUNCTOR_rehash2 )
6342   { return PL_unify_term(key,
6343 			 PL_FUNCTOR, f,
6344 			   PL_INT, db->rehash_count,
6345 			   PL_FLOAT, db->rehash_time);
6346   } else
6347     assert(0);
6348 
6349   return PL_unify_term(key, PL_FUNCTOR, f, PL_INT64, v);
6350 }
6351 
6352 static foreign_t
rdf_statistics(term_t key,control_t h)6353 rdf_statistics(term_t key, control_t h)
6354 { int n;
6355   rdf_db *db = DB;
6356 
6357   switch(PL_foreign_control(h))
6358   { case PL_FIRST_CALL:
6359     { functor_t f;
6360 
6361       if ( PL_is_variable(key) )
6362       { n = 0;
6363 	goto redo;
6364       } else if ( PL_get_functor(key, &f) )
6365       { for(n=0; keys[n]; n++)
6366 	{ if ( keys[n] == f )
6367 	    return unify_statistics(db, key, f);
6368 	}
6369 	return domain_error(key, "rdf_statistics");
6370       } else
6371 	return type_error(key, "rdf_statistics");
6372     }
6373     case PL_REDO:
6374       n = (int)PL_foreign_context(h);
6375     redo:
6376       unify_statistics(db, key, keys[n]);
6377       n++;
6378       if ( keys[n] )
6379 	PL_retry(n);
6380     case PL_CUTTED:
6381       return TRUE;
6382     default:
6383       assert(0);
6384       return TRUE;
6385   }
6386 }
6387 
6388 
6389 static foreign_t
rdf_generation(term_t t)6390 rdf_generation(term_t t)
6391 { rdf_db *db = DB;
6392 
6393   return PL_unify_integer(t, db->generation);
6394 }
6395 
6396 
6397 		 /*******************************
6398 		 *	       RESET		*
6399 		 *******************************/
6400 
6401 static void
erase_triples(rdf_db * db)6402 erase_triples(rdf_db *db)
6403 { triple *t, *n;
6404   int i;
6405 
6406   for(t=db->by_none; t; t=n)
6407   { n = t->next[BY_NONE];
6408 
6409     free_triple(db, t);
6410     db->freed++;
6411   }
6412   db->by_none = db->by_none_tail = NULL;
6413 
6414   for(i=BY_S; i<=BY_OP; i++)
6415   { if ( db->table[i] )
6416     { int bytes = sizeof(triple*) * db->table_size[i];
6417 
6418       memset(db->table[i], 0, bytes);
6419       memset(db->tail[i], 0, bytes);
6420     }
6421   }
6422 
6423   db->created = 0;
6424   db->erased = 0;
6425   db->freed = 0;
6426   db->erased = 0;
6427   db->subjects = 0;
6428   db->rehash_count = 0;
6429   memset(db->indexed, 0, sizeof(db->indexed));
6430   db->duplicates = 0;
6431   db->generation = 0;
6432 }
6433 
6434 
6435 static void
erase_predicates(rdf_db * db)6436 erase_predicates(rdf_db *db)
6437 { predicate **ht;
6438   int i;
6439 
6440   for(i=0,ht = db->pred_table; i<db->pred_table_size; i++, ht++)
6441   { predicate *p, *n;
6442 
6443     for( p = *ht; p; p = n )
6444     { n = p->next;
6445 
6446       free_list(db, &p->subPropertyOf);
6447       free_list(db, &p->siblings);
6448       if ( ++p->cloud->deleted == p->cloud->size )
6449 	free_predicate_cloud(db, p->cloud);
6450 
6451       rdf_free(db, p, sizeof(*p));
6452     }
6453 
6454     *ht = NULL;
6455   }
6456 
6457   db->pred_count = 0;
6458   db->next_hash = 0;
6459 }
6460 
6461 
6462 static void
reset_db(rdf_db * db)6463 reset_db(rdf_db *db)
6464 { db->resetting = TRUE;
6465 
6466   erase_triples(db);
6467   erase_predicates(db);
6468   erase_graphs(db);
6469   db->need_update = FALSE;
6470   db->agenda_created = 0;
6471   avlfree(&db->literals);
6472   init_literal_table(db);
6473 
6474   db->resetting = FALSE;
6475 }
6476 
6477 
6478 static foreign_t
rdf_reset_db()6479 rdf_reset_db()
6480 { rdf_db *db = DB;
6481 
6482   if ( !WRLOCK(db, FALSE) )
6483     return FALSE;
6484 
6485   if ( db->tr_first )
6486   { record_transaction(db, TR_RESET, NULL);
6487     db->tr_reset = TRUE;
6488   } else
6489     reset_db(db);
6490 
6491   WRUNLOCK(db);
6492 
6493   return TRUE;
6494 }
6495 
6496 
6497 		 /*******************************
6498 		 *	       MATCH		*
6499 		 *******************************/
6500 
6501 
6502 static foreign_t
match_label(term_t how,term_t search,term_t label)6503 match_label(term_t how, term_t search, term_t label)
6504 { atom_t h, f, l;
6505   int type;
6506 
6507   if ( !get_atom_ex(how, &h) ||
6508        !get_atom_ex(search, &f) ||
6509        !get_atom_ex(label, &l) )
6510     return FALSE;
6511 
6512   if ( h == ATOM_exact )
6513     type = STR_MATCH_EXACT;
6514   else if ( h == ATOM_substring )
6515     type = STR_MATCH_SUBSTRING;
6516   else if ( h == ATOM_word )
6517     type = STR_MATCH_WORD;
6518   else if ( h == ATOM_prefix )
6519     type = STR_MATCH_PREFIX;
6520   else if ( h == ATOM_like )
6521     type = STR_MATCH_LIKE;
6522   else
6523     return domain_error(how, "search_method");
6524 
6525   return match_atoms(type, f, l);
6526 }
6527 
6528 
6529 static foreign_t
lang_matches(term_t lang,term_t pattern)6530 lang_matches(term_t lang, term_t pattern)
6531 { atom_t l, p;
6532 
6533   if ( !get_atom_ex(lang, &l) ||
6534        !get_atom_ex(pattern, &p) )
6535     return FALSE;
6536 
6537   return atom_lang_matches(l, p);
6538 }
6539 
6540 
6541 
6542 
6543 		 /*******************************
6544 		 *	       VERSION		*
6545 		 *******************************/
6546 
6547 static foreign_t
rdf_version(term_t v)6548 rdf_version(term_t v)
6549 { return PL_unify_integer(v, RDF_VERSION);
6550 }
6551 
6552 
6553 		 /*******************************
6554 		 *	     MORE STUFF		*
6555 		 *******************************/
6556 
6557 #include "quote.c"
6558 
6559 		 /*******************************
6560 		 *	     REGISTER		*
6561 		 *******************************/
6562 
6563 #define MKFUNCTOR(n, a) \
6564 	FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a)
6565 #define NDET PL_FA_NONDETERMINISTIC
6566 #define META PL_FA_TRANSPARENT
6567 
6568 install_t
install_rdf_db()6569 install_rdf_db()
6570 { int i=0;
6571   extern install_t install_atom_map(void);
6572 
6573   MKFUNCTOR(literal, 1);
6574   MKFUNCTOR(error, 2);
6575   MKFUNCTOR(type_error, 2);
6576   MKFUNCTOR(domain_error, 2);
6577   MKFUNCTOR(triples, 1);
6578   MKFUNCTOR(triples, 2);
6579   MKFUNCTOR(subjects, 1);
6580   MKFUNCTOR(predicates, 1);
6581   MKFUNCTOR(subject, 1);
6582   MKFUNCTOR(predicate, 1);
6583   MKFUNCTOR(object, 1);
6584   MKFUNCTOR(graph, 1);
6585   MKFUNCTOR(indexed, 8);
6586   MKFUNCTOR(exact, 1);
6587   MKFUNCTOR(plain, 1);
6588   MKFUNCTOR(substring, 1);
6589   MKFUNCTOR(word, 1);
6590   MKFUNCTOR(prefix, 1);
6591   MKFUNCTOR(like, 1);
6592   MKFUNCTOR(literal, 2);
6593   MKFUNCTOR(searched_nodes, 1);
6594   MKFUNCTOR(duplicates, 1);
6595   MKFUNCTOR(literals, 1);
6596   MKFUNCTOR(symmetric, 1);
6597   MKFUNCTOR(transitive, 1);
6598   MKFUNCTOR(inverse_of, 1);
6599   MKFUNCTOR(lang, 2);
6600   MKFUNCTOR(type, 2);
6601   MKFUNCTOR(rdf_subject_branch_factor, 1);
6602   MKFUNCTOR(rdf_object_branch_factor, 1);
6603   MKFUNCTOR(rdfs_subject_branch_factor, 1);
6604   MKFUNCTOR(rdfs_object_branch_factor, 1);
6605   MKFUNCTOR(gc, 2);
6606   MKFUNCTOR(rehash, 2);
6607   MKFUNCTOR(core, 1);
6608   MKFUNCTOR(assert, 4);
6609   MKFUNCTOR(retract, 4);
6610   MKFUNCTOR(update, 5);
6611   MKFUNCTOR(new_literal, 1);
6612   MKFUNCTOR(old_literal, 1);
6613   MKFUNCTOR(transaction, 2);
6614   MKFUNCTOR(load, 2);
6615   MKFUNCTOR(rehash, 1);
6616   MKFUNCTOR(begin, 1);
6617   MKFUNCTOR(end, 1);
6618 
6619   FUNCTOR_colon2 = PL_new_functor(PL_new_atom(":"), 2);
6620 
6621   ATOM_user	     = PL_new_atom("user");
6622   ATOM_exact	     = PL_new_atom("exact");
6623   ATOM_plain	     = PL_new_atom("plain");
6624   ATOM_prefix	     = PL_new_atom("prefix");
6625   ATOM_like	     = PL_new_atom("like");
6626   ATOM_substring     = PL_new_atom("substring");
6627   ATOM_word	     = PL_new_atom("word");
6628   ATOM_subPropertyOf = PL_new_atom(URL_subPropertyOf);
6629   ATOM_error	     = PL_new_atom("error");
6630   ATOM_begin	     = PL_new_atom("begin");
6631   ATOM_end	     = PL_new_atom("end");
6632   ATOM_infinite	     = PL_new_atom("infinite");
6633 
6634   PRED_call1         = PL_predicate("call", 1, "user");
6635 
6636 					/* statistics */
6637   keys[i++] = FUNCTOR_triples1;
6638   keys[i++] = FUNCTOR_subjects1;
6639   keys[i++] = FUNCTOR_indexed8;
6640   keys[i++] = FUNCTOR_predicates1;
6641   keys[i++] = FUNCTOR_searched_nodes1;
6642   keys[i++] = FUNCTOR_duplicates1;
6643   keys[i++] = FUNCTOR_literals1;
6644   keys[i++] = FUNCTOR_triples2;
6645   keys[i++] = FUNCTOR_gc2;
6646   keys[i++] = FUNCTOR_rehash2;
6647   keys[i++] = FUNCTOR_core1;
6648   keys[i++] = 0;
6649 
6650 					/* setup the database */
6651   DB = new_db();
6652 
6653   PL_register_foreign("rdf_version",    1, rdf_version,     0);
6654   PL_register_foreign("rdf_assert",	3, rdf_assert3,	    0);
6655   PL_register_foreign("rdf_assert",	4, rdf_assert4,	    0);
6656   PL_register_foreign("rdf_update",	4, rdf_update,      0);
6657   PL_register_foreign("rdf_update",	5, rdf_update5,     0);
6658   PL_register_foreign("rdf_retractall",	3, rdf_retractall3, 0);
6659   PL_register_foreign("rdf_retractall",	4, rdf_retractall4, 0);
6660   PL_register_foreign("rdf_subject",	1, rdf_subject,	    NDET);
6661   PL_register_foreign("rdf",		3, rdf3,	    NDET);
6662   PL_register_foreign("rdf",		4, rdf4,	    NDET);
6663   PL_register_foreign("rdf_has",	4, rdf_has,	    NDET);
6664   PL_register_foreign("rdf_statistics_",1, rdf_statistics,  NDET);
6665   PL_register_foreign("rdf_generation", 1, rdf_generation,  0);
6666   PL_register_foreign("rdf_match_label",3, match_label,     0);
6667   PL_register_foreign("rdf_save_db_",   2, rdf_save_db,     0);
6668   PL_register_foreign("rdf_load_db_",   3, rdf_load_db,     0);
6669   PL_register_foreign("rdf_reachable",  3, rdf_reachable3,  NDET);
6670   PL_register_foreign("rdf_reachable",  5, rdf_reachable5,  NDET);
6671   PL_register_foreign("rdf_reset_db_",  0, rdf_reset_db,    0);
6672   PL_register_foreign("rdf_set_predicate",
6673 					2, rdf_set_predicate, 0);
6674   PL_register_foreign("rdf_predicate_property_",
6675 					2, rdf_predicate_property, NDET);
6676   PL_register_foreign("rdf_current_predicates",
6677 					1, rdf_current_predicates, 0);
6678   PL_register_foreign("rdf_current_literal",
6679 					1, rdf_current_literal, NDET);
6680   PL_register_foreign("rdf_graphs_",    1, rdf_graphs,      0);
6681   PL_register_foreign("rdf_set_graph_source", 3, rdf_set_graph_source, 0);
6682   PL_register_foreign("rdf_unset_graph_source", 1, rdf_unset_graph_source, 0);
6683   PL_register_foreign("rdf_graph_source_", 3, rdf_graph_source, 0);
6684   PL_register_foreign("rdf_estimate_complexity",
6685 					4, rdf_estimate_complexity, 0);
6686   PL_register_foreign("rdf_transaction_",2, rdf_transaction, META);
6687   PL_register_foreign("rdf_active_transactions_",
6688 					1, rdf_active_transactions, 0);
6689   PL_register_foreign("rdf_monitor_",   2, rdf_monitor,     META);
6690 /*PL_register_foreign("rdf_broadcast_", 2, rdf_broadcast,   0);*/
6691 #ifdef WITH_MD5
6692   PL_register_foreign("rdf_md5",	2, rdf_md5,	    0);
6693   PL_register_foreign("rdf_atom_md5",	3, rdf_atom_md5,    0);
6694 #endif
6695   PL_register_foreign("rdf_quote_uri",	2, rdf_quote_uri,   0);
6696 
6697 #ifdef O_DEBUG
6698   PL_register_foreign("rdf_debug",      1, rdf_debug,       0);
6699   PL_register_foreign("rdf_print_predicate_cloud", 1, rdf_print_predicate_cloud, 0);
6700 #endif
6701 #ifdef O_SECURE
6702   PL_register_foreign("rdf_dump_literals", 0, dump_literals, 0);
6703   PL_register_foreign("rdf_check_literals", 0, check_transitivity, 0);
6704 #endif
6705   PL_register_foreign("lang_matches", 2, lang_matches, 0);
6706 
6707   install_atom_map();
6708 }
6709