1 /*  Part of SWI-Prolog
2 
3     Author:        Jan Wielemaker
4     E-mail:        J.Wielemaker@vu.nl
5     WWW:           http://www.swi-prolog.org
6     Copyright (c)  2003-2020, University of Amsterdam
7                               VU University Amsterdam
8     All rights reserved.
9 
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions
12     are met:
13 
14     1. Redistributions of source code must retain the above copyright
15        notice, this list of conditions and the following disclaimer.
16 
17     2. Redistributions in binary form must reproduce the above copyright
18        notice, this list of conditions and the following disclaimer in
19        the documentation and/or other materials provided with the
20        distribution.
21 
22     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26     COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33     POSSIBILITY OF SUCH DAMAGE.
34 */
35 
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39 
40 #define WITH_PL_MUTEX 1
41 
42 #ifdef __WINDOWS__
43 #include <malloc.h>
44 #define inline __inline
45 #ifndef SIZEOF_LONG
46 #define SIZEOF_LONG 4
47 #endif
48 #endif
49 
50 #include "rdf_db.h"
51 #include <wctype.h>
52 #include <ctype.h>
53 #include "murmur.h"
54 #include "memory.h"
55 #include "buffer.h"
56 #ifdef WITH_MD5
57 #include "md5.h"
58 
59 #undef ERROR				/* also in wingdi.h; we do not care */
60 #define ERROR -1
61 
62 static void md5_triple(triple *t, md5_byte_t *digest);
63 static void sum_digest(md5_byte_t *digest, md5_byte_t *add);
64 static void dec_digest(md5_byte_t *digest, md5_byte_t *add);
65 static int  md5_unify_digest(term_t t, md5_byte_t digest[16]);
66 #endif
67 
68 void *
rdf_malloc(rdf_db * db,size_t size)69 rdf_malloc(rdf_db *db, size_t size)
70 { return malloc(size);
71 }
72 
73 void
rdf_free(rdf_db * db,void * ptr,size_t size)74 rdf_free(rdf_db *db, void *ptr, size_t size)
75 { free(ptr);
76 }
77 
78 static functor_t FUNCTOR_literal1;
79 static functor_t FUNCTOR_literal2;
80 static functor_t FUNCTOR_colon2;
81 static functor_t FUNCTOR_plus2;
82 
83 static functor_t FUNCTOR_triples1;
84 static functor_t FUNCTOR_triples2;
85 static functor_t FUNCTOR_resources1;
86 static functor_t FUNCTOR_predicates1;
87 static functor_t FUNCTOR_duplicates1;
88 static functor_t FUNCTOR_lingering1;
89 static functor_t FUNCTOR_literals1;
90 static functor_t FUNCTOR_subject1;
91 static functor_t FUNCTOR_predicate1;
92 static functor_t FUNCTOR_object1;
93 static functor_t FUNCTOR_graph1;
94 static functor_t FUNCTOR_indexed16;
95 static functor_t FUNCTOR_hash_quality1;
96 static functor_t FUNCTOR_hash3;
97 static functor_t FUNCTOR_hash4;
98 
99 static functor_t FUNCTOR_exact1;
100 static functor_t FUNCTOR_icase1;
101 static functor_t FUNCTOR_plain1;
102 static functor_t FUNCTOR_substring1;
103 static functor_t FUNCTOR_word1;
104 static functor_t FUNCTOR_prefix1;
105 static functor_t FUNCTOR_like1;
106 static functor_t FUNCTOR_lt1;
107 static functor_t FUNCTOR_le1;
108 static functor_t FUNCTOR_eq1;
109 static functor_t FUNCTOR_between2;
110 static functor_t FUNCTOR_ge1;
111 static functor_t FUNCTOR_gt1;
112 
113 static functor_t FUNCTOR_symmetric1;
114 static functor_t FUNCTOR_inverse_of1;
115 static functor_t FUNCTOR_transitive1;
116 static functor_t FUNCTOR_rdf_subject_branch_factor1;    /* S --> BF*O */
117 static functor_t FUNCTOR_rdf_object_branch_factor1;	/* O --> BF*S */
118 static functor_t FUNCTOR_rdfs_subject_branch_factor1;	/* S --> BF*O */
119 static functor_t FUNCTOR_rdfs_object_branch_factor1;	/* O --> BF*S */
120 
121 static functor_t FUNCTOR_searched_nodes1;
122 static functor_t FUNCTOR_lang2;
123 static functor_t FUNCTOR_type2;
124 
125 static functor_t FUNCTOR_gc4;
126 static functor_t FUNCTOR_graphs1;
127 
128 static functor_t FUNCTOR_assert4;
129 static functor_t FUNCTOR_retract4;
130 static functor_t FUNCTOR_update5;
131 static functor_t FUNCTOR_new_literal1;
132 static functor_t FUNCTOR_old_literal1;
133 static functor_t FUNCTOR_transaction2;
134 static functor_t FUNCTOR_load2;
135 static functor_t FUNCTOR_begin1;
136 static functor_t FUNCTOR_end1;
137 static functor_t FUNCTOR_create_graph1;
138 
139 static atom_t   ATOM_user;
140 static atom_t	ATOM_exact;
141 static atom_t	ATOM_icase;
142 static atom_t	ATOM_plain;
143 static atom_t	ATOM_prefix;
144 static atom_t	ATOM_substring;
145 static atom_t	ATOM_word;
146 static atom_t	ATOM_like;
147 static atom_t	ATOM_error;
148 static atom_t	ATOM_begin;
149 static atom_t	ATOM_end;
150 static atom_t	ATOM_error;
151 static atom_t	ATOM_infinite;
152 static atom_t	ATOM_snapshot;
153 static atom_t	ATOM_true;
154 static atom_t	ATOM_size;
155 static atom_t	ATOM_optimize_threshold;
156 static atom_t	ATOM_average_chain_len;
157 static atom_t	ATOM_reset;
158 static atom_t	ATOM_lt;		/* < */
159 static atom_t	ATOM_eq;		/* = */
160 static atom_t	ATOM_gt;		/* > */
161 static atom_t	ATOM_XSDString;
162 
163 static atom_t	ATOM_subPropertyOf;
164 static atom_t	ATOM_xsdString;
165 static atom_t	ATOM_xsdDouble;
166 
167 static predicate_t PRED_call1;
168 
169 #define MATCH_EXACT		0x01	/* exact triple match */
170 #define MATCH_SUBPROPERTY	0x02	/* Use subPropertyOf relations */
171 #define MATCH_SRC		0x04	/* Match graph location */
172 #define MATCH_INVERSE		0x08	/* use symmetric match too */
173 #define MATCH_QUAL		0x10	/* Match qualifiers too */
174 #define MATCH_NUMERIC		0x20	/* Match typed objects numerically */
175 #define MATCH_DUPLICATE		(MATCH_EXACT|MATCH_QUAL)
176 
177 static int match_triples(rdf_db *db, triple *t, triple *p,
178 			 query *q, unsigned flags);
179 static void unlock_atoms(rdf_db *db, triple *t);
180 static void lock_atoms(rdf_db *db, triple *t);
181 static void unlock_atoms_literal(literal *lit);
182 
183 static size_t	triple_hash_key(triple *t, int which);
184 static size_t	object_hash(triple *t);
185 static void	mark_duplicate(rdf_db *db, triple *t, query *q);
186 static void	link_triple_hash(rdf_db *db, triple *t);
187 static void	free_triple(rdf_db *db, triple *t, int linger);
188 
189 static sub_p_matrix *create_reachability_matrix(rdf_db *db,
190 						predicate_cloud *cloud,
191 						query *q);
192 static void	free_reachability_matrix(rdf_db *db, sub_p_matrix *rm);
193 static void	gc_is_leaf(rdf_db *db, predicate *p, gen_t gen);
194 static int	get_predicate(rdf_db *db, term_t t, predicate **p, query *q);
195 static int	get_existing_predicate(rdf_db *db, term_t t, predicate **p);
196 static void	free_bitmatrix(rdf_db *db, bitmatrix *bm);
197 static predicate_cloud *new_predicate_cloud(rdf_db *db,
198 					    predicate **p, size_t count);
199 static int	unify_literal(term_t lit, literal *l);
200 static int	free_literal(rdf_db *db, literal *lit);
201 static int	check_predicate_cloud(predicate_cloud *c);
202 static void	invalidate_is_leaf(predicate *p, query *q, int add);
203 static void	create_triple_hashes(rdf_db *db, int count, int *ic);
204 static void	free_literal_value(rdf_db *db, literal *lit);
205 static void	finalize_graph(void *g, void *db);
206 
207 
208 		 /*******************************
209 		 *	       LOCKING		*
210 		 *******************************/
211 
212 static void
INIT_LOCK(rdf_db * db)213 INIT_LOCK(rdf_db *db)
214 { simpleMutexInit(&db->locks.literal);
215   simpleMutexInit(&db->locks.misc);
216   simpleMutexInit(&db->locks.gc);
217   simpleMutexInit(&db->locks.duplicates);
218   simpleMutexInit(&db->locks.erase);
219   simpleMutexInit(&db->locks.prefixes);
220 }
221 
222 static simpleMutex rdf_lock;
223 
224 
225 		 /*******************************
226 		 *	   DEBUG SUPPORT	*
227 		 *******************************/
228 
229 #ifdef O_DEBUG
230 
231 #define PRT_SRC	0x1				/* print source */
232 #define PRT_NL	0x2				/* add newline */
233 #define PRT_GEN	0x4				/* print generation info */
234 #define PRT_ADR	0x8				/* print address */
235 
236 static void
print_literal(literal * lit)237 print_literal(literal *lit)
238 { switch(lit->objtype)
239   { case OBJ_STRING:
240       switch(lit->qualifier)
241       { case Q_TYPE:
242 	  Sdprintf("%s^^\"%s\"",
243 		   PL_atom_chars(lit->value.string),
244 		   PL_atom_chars(ID_ATOM(lit->type_or_lang)));
245 	  break;
246 	case Q_LANG:
247 	  Sdprintf("%s@\"%s\"",
248 		   PL_atom_chars(lit->value.string),
249 		   PL_atom_chars(ID_ATOM(lit->type_or_lang)));
250 	  break;
251 	default:
252 	{ size_t len;
253 	  const char *s;
254 	  const wchar_t *w;
255 
256 	  if ( (s = PL_atom_nchars(lit->value.string, &len)) )
257 	  { if ( strlen(s) == len )
258 	      Sdprintf("\"%s\"", s);
259 	    else
260 	      Sdprintf("\"%s\" (len=%d)", s, len);
261 	  } else if ( (w = PL_atom_wchars(lit->value.string, &len)) )
262 	  { unsigned int i;
263 	    Sputc('L', Serror);
264 	    Sputc('"', Serror);
265 	    for(i=0; i<len; i++)
266 	    { if ( w[i] < 0x7f )
267 		Sputc(w[i], Serror);
268 	      else
269 		Sfprintf(Serror, "\\\\u%04x", w[i]);
270 	    }
271 	    Sputc('"', Serror);
272 	  }
273 	  break;
274 	}
275       }
276       break;
277     case OBJ_INTEGER:
278       Sdprintf("%ld", lit->value.integer);
279       break;
280     case OBJ_DOUBLE:
281       Sdprintf("%f", lit->value.real);
282       break;
283     case OBJ_TERM:
284     { fid_t fid = PL_open_foreign_frame();
285       term_t term = PL_new_term_ref();
286 
287       PL_recorded_external(lit->value.term.record, term);
288       PL_write_term(Serror, term, 1200,
289 		    PL_WRT_QUOTED|PL_WRT_NUMBERVARS|PL_WRT_PORTRAY);
290       PL_discard_foreign_frame(fid);
291       break;
292     }
293     default:
294       assert(0);
295   }
296 }
297 
298 
299 static void
print_object(triple * t)300 print_object(triple *t)
301 { if ( t->object_is_literal )
302   { print_literal(t->object.literal);
303   } else
304   { Sdprintf("%s", t->object.resource ? PL_atom_chars(t->object.resource) : "?o");
305   }
306 }
307 
308 
309 static void
print_src(triple * t)310 print_src(triple *t)
311 { if ( t->graph_id )
312   { if ( t->line == NO_LINE )
313       Sdprintf(" [%s]", PL_atom_chars(ID_ATOM(t->graph_id)));
314     else
315       Sdprintf(" [%s:%ld]", PL_atom_chars(ID_ATOM(t->graph_id)), t->line);
316   } else
317   { Sdprintf(" ?g");
318   }
319 }
320 
321 
322 static char *
triple_status_flags(triple * t,char * buf)323 triple_status_flags(triple *t, char *buf)
324 { char *o = buf;
325 
326   *o++ = ' ';
327   if ( t->atoms_locked )
328     *o++ = 'L';
329   if ( t->is_duplicate )
330     *o++ = 'D';
331 
332   if ( o > buf+1 )
333     *o = '\0';
334   else
335     buf[0] = '\0';
336 
337   return buf;
338 }
339 
340 
341 static void
print_gen(triple * t)342 print_gen(triple *t)
343 { char buf[3][24];
344 
345   Sdprintf(" (%s..%s%s)",
346 	   gen_name(t->lifespan.born, buf[0]),
347 	   gen_name(t->lifespan.died, buf[1]),
348 	   triple_status_flags(t, buf[2]));
349 }
350 
351 
352 static void
print_triple(triple * t,int flags)353 print_triple(triple *t, int flags)
354 { Sdprintf("<%s %s ",
355 	   t->subject_id ? PL_atom_chars(ID_ATOM(t->subject_id)) : "?s",
356 	   t->predicate.r->name ? PL_atom_chars(t->predicate.r->name) : "?p");
357   print_object(t);
358   if ( (flags & PRT_SRC) )
359     print_src(t);
360   if ( (flags & PRT_GEN) )
361     print_gen(t);
362   if ( (flags & PRT_ADR) )
363     Sdprintf(" &%p", t);
364   Sdprintf((flags & PRT_NL) ? ">\n" : ">");
365 }
366 
367 #endif
368 
369 		 /*******************************
370 		 *	     STORAGE		*
371 		 *******************************/
372 
373 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
374 Tables that allow finding the hash-chains   for a particular index. They
375 are currently crafted by hand, such that the compiler knowns the mapping
376 is  constant.  check_index_tables()  verifies  that    the   tables  are
377 consistent.  To add an index:
378 
379     * Increment INDEX_TABLES in rdf_db.h
380     * Add the index to col_index[]
381     * Assign it a (consistent) position in index_col[]
382     * If decide wich unindexed queries are best mapped
383       to the new index and add them to alt_index[]
384     * Add entries to col_name[], col_avg_len[], col_opt_threshold[]
385     * Deal with the new index in consider_triple_rehash() and
386       initial_size_triple_hash()
387 
388 Make sure you compile with support for   assert(). If you make a mistake
389 in the above, you are likely  to   get  an  assertion failure. Thanks to
390 Haitao Zhang for debugging these notes.
391 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
392 
393 #define ICOL(i) (index_col[i])
394 
395 static const int index_col[16] =
396 { 0,					/* BY_NONE */
397   1,					/* BY_S */
398   2,					/* BY_P */
399   3,					/* BY_SP */
400   4,					/* BY_O */
401   ~0,					/* BY_SO */
402   5,					/* BY_PO */
403   6,					/* BY_SPO */
404 
405   7,					/* BY_G */
406   8,					/* BY_SG */
407   9,					/* BY_PG */
408  ~0,					/* BY_SPG */
409  ~0,					/* BY_OG */
410  ~0,					/* BY_SOG */
411  ~0,					/* BY_POG */
412  ~0					/* BY_SPOG */
413 };
414 
415 static int col_index[INDEX_TABLES] =
416 { BY_NONE,
417   BY_S,
418   BY_P,
419   BY_SP,
420   BY_O,
421   BY_PO,
422   BY_SPO,
423   BY_G,
424   BY_SG,
425   BY_PG
426 };
427 
428 static const char *col_name[INDEX_TABLES] =
429 { "-",
430   "s",
431   "p",
432   "sp",
433   "o",
434   "po",
435   "spo",
436   "g",
437   "sg",
438   "pg"
439 };
440 
441 static const int col_avg_len[INDEX_TABLES] =
442 { 0,	/*BY_NONE*/
443   2,	/*BY_S*/
444   2,	/*BY_P*/
445   2,	/*BY_SP*/
446   4,	/*BY_O*/
447   2,	/*BY_PO*/
448   2,	/*BY_SPO*/
449   1,	/*BY_G*/
450   2,	/*BY_SG*/
451   2	/*BY_PG*/
452 };
453 
454 static const int col_opt_threshold[INDEX_TABLES] =
455 { 0,	/*BY_NONE*/
456   2,	/*BY_S*/
457   2,	/*BY_P*/
458   2,	/*BY_SP*/
459   2,	/*BY_O*/
460   2,	/*BY_PO*/
461   2,	/*BY_SPO*/
462   2,	/*BY_G*/
463   2,	/*BY_SG*/
464   2	/*BY_PG*/
465 };
466 
467 static const int alt_index[16] =
468 { BY_NONE,				/* BY_NONE */
469   BY_S,					/* BY_S */
470   BY_P,					/* BY_P */
471   BY_SP,				/* BY_SP */
472   BY_O,					/* BY_O */
473   BY_S,					/* BY_SO */
474   BY_PO,				/* BY_PO */
475   BY_SPO,				/* BY_SPO */
476 
477   BY_G,					/* BY_G */
478   BY_SG,				/* BY_SG */
479   BY_PG,				/* BY_PG */
480   BY_SP,				/* BY_SPG */
481   BY_O,					/* BY_OG */
482   BY_S,					/* BY_SOG */
483   BY_PO,				/* BY_POG */
484   BY_SPO				/* BY_SPOG */
485 };
486 
487 
488 static void
check_index_tables(void)489 check_index_tables(void)
490 {
491 #ifndef NDEBUG
492   int i, ic;
493 
494   for(i=0; i<16; i++)
495   { if ( (ic=index_col[i]) != ~0 )
496     { assert(col_index[ic] == i);
497     }
498   }
499 
500   for(i=0; i<16; i++)
501   { int ai = alt_index[i];
502 
503     assert(index_col[ai] != ~0);
504   }
505 
506   for(i=0; i<INDEX_TABLES; i++)
507   { ic = col_index[i];
508     assert(alt_index[ic] == ic);
509   }
510 #endif
511 }
512 
513 
514 		 /*******************************
515 		 *	      LISTS		*
516 		 *******************************/
517 
518 static int
add_list(rdf_db * db,list * list,void * value)519 add_list(rdf_db *db, list *list, void *value)
520 { cell *c;
521 
522   for(c=list->head; c; c=c->next)
523   { if ( c->value == value )
524       return FALSE;			/* already a member */
525   }
526 
527   c = rdf_malloc(db, sizeof(*c));
528   c->value = value;
529   c->next = NULL;
530 
531   if ( list->tail )
532     list->tail->next = c;
533   else
534     list->head = c;
535 
536   list->tail = c;
537 
538   return TRUE;
539 }
540 
541 
542 static int
del_list(rdf_db * db,list * list,void * value)543 del_list(rdf_db *db, list *list, void *value)
544 { cell *c, *p = NULL;
545 
546   for(c=list->head; c; p=c, c=c->next)
547   { if ( c->value == value )
548     { if ( p )
549 	p->next = c->next;
550       else
551 	list->head = c->next;
552 
553       if ( !c->next )
554 	list->tail = p;
555 
556       rdf_free(db, c, sizeof(*c));
557 
558       return TRUE;
559     }
560   }
561 
562   return FALSE;				/* not a member */
563 }
564 
565 
566 static void
free_list(rdf_db * db,list * list)567 free_list(rdf_db *db, list *list)
568 { cell *c, *n;
569 
570   for(c=list->head; c; c=n)
571   { n = c->next;
572     rdf_free(db, c, sizeof(*c));
573   }
574 
575   list->head = list->tail = NULL;
576 }
577 
578 
579 		 /*******************************
580 		 *	      TMP STORE		*
581 		 *******************************/
582 
583 static void
init_tmp_store(tmp_store * s)584 init_tmp_store(tmp_store *s)
585 { s->chunks = &s->store0;
586   s->chunks->next = NULL;
587   s->chunks->used = 0;
588 }
589 
590 
591 static void *
alloc_tmp_store(tmp_store * s,size_t size)592 alloc_tmp_store(tmp_store *s, size_t size)
593 { void *p;
594 
595   assert(size < CHUNKSIZE);
596 
597   if ( s->chunks->used + size > CHUNKSIZE )
598   { mchunk *ch = malloc(sizeof(mchunk));
599 
600     ch->used = 0;
601     ch->next = s->chunks;
602     s->chunks = ch;
603   }
604 
605   p = &s->chunks->buf[s->chunks->used];
606   s->chunks->used += size;
607 
608   return p;
609 }
610 
611 
612 static void
destroy_tmp_store(tmp_store * s)613 destroy_tmp_store(tmp_store *s)
614 { mchunk *ch, *next;
615 
616   for(ch=s->chunks; ch != &s->store0; ch = next)
617   { next = ch->next;
618     free(ch);
619   }
620 }
621 
622 
623 		 /*******************************
624 		 *	     ATOM SETS		*
625 		 *******************************/
626 
627 #define ATOMSET_INITIAL_ENTRIES 16
628 
629 typedef struct atom_cell
630 { struct atom_cell *next;
631   atom_t     atom;
632 } atom_cell;
633 
634 typedef struct
635 { atom_cell **entries;			/* Hash entries */
636   size_t      size;			/* Hash-table size */
637   size_t      count;			/* # atoms stored */
638   tmp_store   store;			/* Temporary storage */
639   atom_cell  *entries0[ATOMSET_INITIAL_ENTRIES];
640 } atomset;
641 
642 
643 static void *
alloc_atomset(atomset * as,size_t size)644 alloc_atomset(atomset *as, size_t size)
645 { return alloc_tmp_store(&as->store, size);
646 }
647 
648 
649 static void
init_atomset(atomset * as)650 init_atomset(atomset *as)
651 { init_tmp_store(&as->store);
652   memset(as->entries0, 0, sizeof(as->entries0));
653   as->entries = as->entries0;
654   as->size = ATOMSET_INITIAL_ENTRIES;
655   as->count = 0;
656 }
657 
658 
659 static void
destroy_atomset(atomset * as)660 destroy_atomset(atomset *as)
661 { destroy_tmp_store(&as->store);
662 
663   if ( as->entries != as->entries0 )
664     free(as->entries);
665 }
666 
667 
668 static void
rehash_atom_set(atomset * as)669 rehash_atom_set(atomset *as)
670 { size_t newsize = as->size*2;
671   atom_cell **new = malloc(newsize*sizeof(atom_cell*));
672   int i;
673 
674   memset(new, 0, newsize*sizeof(atom_cell*));
675 
676   for(i=0; i<as->size; i++)
677   { atom_cell *c, *n;
678 
679     for(c=as->entries[i]; c; c=n)
680     { size_t inew = atom_hash(c->atom, MURMUR_SEED)&(newsize-1);
681 
682       n = c->next;
683       c->next = new[inew];
684       new[inew] = c;
685     }
686   }
687 
688   if ( as->entries == as->entries0 )
689   { as->entries = new;
690   } else
691   { atom_cell **old = as->entries;
692     as->entries = new;
693     free(old);
694   }
695 
696   as->size = newsize;
697 }
698 
699 
700 static int
add_atomset(atomset * as,atom_t atom)701 add_atomset(atomset *as, atom_t atom)
702 { size_t i = atom_hash(atom, MURMUR_SEED)&(as->size-1);
703   atom_cell *c;
704 
705   for(c=as->entries[i]; c; c=c->next)
706   { if ( c->atom == atom )
707       return 0;
708   }
709 
710   if ( ++as->count > 2*as->size )
711   { rehash_atom_set(as);
712     i = atom_hash(atom, MURMUR_SEED)&(as->size-1);
713   }
714 
715   c = alloc_atomset(as, sizeof(*c));
716   c->atom = atom;
717   c->next = as->entries[i];
718   as->entries[i] = c;
719 
720   return 1;
721 }
722 
723 
724 static int
for_atomset(atomset * as,int (* func)(atom_t a,void * closure),void * closure)725 for_atomset(atomset *as,
726 	    int (*func)(atom_t a, void *closure),
727 	    void *closure)
728 { int key;
729 
730   for(key=0; key < as->size; key++)
731   { atom_cell *c;
732 
733     for(c=as->entries[key]; c; c=c->next)
734     { if ( !(*func)(c->atom, closure) )
735 	return FALSE;
736     }
737   }
738 
739   return TRUE;
740 }
741 
742 
743 		 /*******************************
744 		 *	   TRIPLE SETS		*
745 		 *******************************/
746 
747 /* Note that only ->entries need to be NULL to consider the set empty.
748    The remainder of the initialization is done lazily.
749 */
750 
751 static void *
alloc_tripleset(void * ptr,size_t size)752 alloc_tripleset(void *ptr, size_t size)
753 { tripleset *ts = ptr;
754 
755   return alloc_tmp_store(&ts->store, size);
756 }
757 
758 
759 static void
init_tripleset(tripleset * ts)760 init_tripleset(tripleset *ts)
761 { init_tmp_store(&ts->store);
762   memset(ts->entries0, 0, sizeof(ts->entries0));
763   ts->entries = ts->entries0;
764   ts->size = TRIPLESET_INITIAL_ENTRIES;
765   ts->count = 0;
766 }
767 
768 
769 static void
destroy_tripleset(tripleset * ts)770 destroy_tripleset(tripleset *ts)
771 { if ( ts->entries )
772   { destroy_tmp_store(&ts->store);
773 
774     if ( ts->entries != ts->entries0 )
775       free(ts->entries);
776   }
777 }
778 
779 
780 static void
rehash_triple_set(tripleset * ts)781 rehash_triple_set(tripleset *ts)
782 { size_t newsize = ts->size*2;
783   triple_cell **new = malloc(newsize*sizeof(triple_cell*));
784   int i;
785 
786   memset(new, 0, newsize*sizeof(triple_cell*));
787 
788   for(i=0; i<ts->size; i++)
789   { triple_cell *c, *n;
790 
791     for(c=ts->entries[i]; c; c=n)
792     { size_t inew = triple_hash_key(c->triple, BY_SPO)&(newsize-1);
793 
794       n = c->next;
795       c->next = new[inew];
796       new[inew] = c;
797     }
798   }
799 
800   if ( ts->entries == ts->entries0 )
801   { ts->entries = new;
802   } else
803   { triple_cell **old = ts->entries;
804     ts->entries = new;
805     free(old);
806   }
807 
808   ts->size = newsize;
809 }
810 
811 
812 static int
add_tripleset(search_state * state,tripleset * ts,triple * triple)813 add_tripleset(search_state *state, tripleset *ts, triple *triple)
814 { size_t i;
815   triple_cell *c;
816 
817   if ( !ts->entries )
818     init_tripleset(ts);
819 
820   i = triple_hash_key(triple, BY_SPO)&(ts->size-1);
821   for(c=ts->entries[i]; c; c=c->next)
822   { if ( match_triples(state->db,
823 		       triple, c->triple,
824 		       state->query, MATCH_DUPLICATE) )
825       return 0;
826   }
827 
828   if ( ++ts->count > 2*ts->size )
829   { rehash_triple_set(ts);
830     i = triple_hash_key(triple, BY_SPO)&(ts->size-1);
831   }
832 
833   c = alloc_tripleset(ts, sizeof(*c));
834   c->triple = triple;
835   c->next = ts->entries[i];
836   ts->entries[i] = c;
837 
838   return 1;
839 }
840 
841 
842 		 /*******************************
843 		 *	      PREFIXES		*
844 		 *******************************/
845 
846 static prefix_table *
new_prefix_table(void)847 new_prefix_table(void)
848 { prefix_table *t = malloc(sizeof(*t));
849 
850   if ( t )
851   { memset(t, 0, sizeof(*t));
852     t->size    = PREFIX_INITIAL_ENTRIES;
853     t->entries = malloc(t->size*sizeof(*t->entries));
854     if ( t->entries )
855     { memset(t->entries, 0, t->size*sizeof(*t->entries));
856     } else
857     { free(t);
858       t = NULL;
859     }
860   }
861 
862   return t;
863 }
864 
865 
866 static void
empty_prefix_table(rdf_db * db)867 empty_prefix_table(rdf_db *db)
868 { int i;
869   prefix_table *t = db->prefixes;
870 
871   simpleMutexLock(&db->locks.prefixes);
872   for(i=0; i<t->size; i++)
873   { prefix *p, *next;
874 
875     p = t->entries[i];
876     t->entries[i] = NULL;
877     for(; p; p = next)
878     { next = p->next;
879 
880       PL_unregister_atom(p->alias);
881       PL_unregister_atom(p->uri.handle);
882       free(p);
883     }
884   }
885   simpleMutexUnlock(&db->locks.prefixes);
886   t->count = 0;
887 
888   flush_prefix_cache();
889 }
890 
891 
892 static void
resize_prefix_table(prefix_table * t)893 resize_prefix_table(prefix_table *t)
894 { size_t new_size = t->size*2;
895   prefix **new_entries = malloc(new_size*sizeof(*new_entries));
896 
897   if ( new_entries )
898   { int i;
899 
900     memset(new_entries, 0, new_size*sizeof(*new_entries));
901     for(i=0; i<t->size; i++)
902     { prefix *p, *next;
903 
904       for(p=t->entries[i]; p; p = next)
905       { unsigned key = atom_hash(p->alias, MURMUR_SEED) & (new_size-1);
906 
907 	next = p->next;
908 	p->next = new_entries[key];
909 	new_entries[key] = p;
910       }
911     }
912 
913     t->size = new_size;
914     free(t->entries);
915     t->entries = new_entries;
916   }
917 }
918 
919 
920 
921 static prefix *
add_prefix(rdf_db * db,atom_t alias,atom_t uri)922 add_prefix(rdf_db *db, atom_t alias, atom_t uri)
923 { prefix_table *t = db->prefixes;
924   unsigned key = atom_hash(alias, MURMUR_SEED) & (t->size-1);
925   prefix *p = malloc(sizeof(*p));
926 
927   if ( !p )
928   { PL_resource_error("memory");
929     return NULL;
930   }
931 
932   if ( t->count > t->size )
933     resize_prefix_table(t);
934 
935   memset(p, 0, sizeof(*p));
936   p->alias      = alias;
937   p->uri.handle = uri;
938   PL_register_atom(alias);
939   PL_register_atom(uri);
940   fill_atom_info(&p->uri);
941 
942   p->next = t->entries[key];
943   t->entries[key] = p;
944   t->count++;
945 
946   return p;
947 }
948 
949 
950 static prefix *
lookup_prefix(rdf_db * db,atom_t a)951 lookup_prefix(rdf_db *db, atom_t a)
952 { prefix_table *t;
953   prefix *pl;
954   fid_t fid;
955   static predicate_t pred = NULL;
956 
957   simpleMutexLock(&db->locks.prefixes);
958   t = db->prefixes;
959   for(pl = t->entries[atom_hash(a, MURMUR_SEED)&(t->size-1)]; pl; pl=pl->next)
960   { if ( pl->alias == a )
961     { simpleMutexUnlock(&db->locks.prefixes);
962       return pl;
963     }
964   }
965 
966   if ( !pred )
967     pred = PL_predicate("rdf_current_prefix", 2, "rdf_db");
968 
969   assert(pl == NULL);
970   if ( (fid = PL_open_foreign_frame()) )
971   { term_t av = PL_new_term_refs(2);
972     atom_t uri_atom;
973 
974     PL_put_atom(av+0, a);
975     if ( PL_call_predicate(NULL, PL_Q_PASS_EXCEPTION, pred, av) &&
976 	 PL_get_atom_ex(av+1, &uri_atom) )
977       pl = add_prefix(db, a, uri_atom);
978     else if ( !PL_exception(0) )
979       PL_existence_error("rdf_prefix", av+0);
980 
981     PL_close_foreign_frame(fid);
982   }
983 
984   simpleMutexUnlock(&db->locks.prefixes);
985 
986   return pl;
987 }
988 
989 
990 static wchar_t *
add_text(wchar_t * w,const text * t)991 add_text(wchar_t *w, const text *t)
992 { if ( t->a )
993   { const unsigned char *a = t->a;
994     const unsigned char *e = &a[t->length];
995 
996     for(; a<e; a++)
997       *w++ = *a;
998   } else
999   { const wchar_t *a = t->w;
1000     const wchar_t *e = &a[t->length];
1001 
1002     for(; a<e; a++)
1003       *w++ = *a;
1004   }
1005 
1006   return w;
1007 }
1008 
1009 
1010 atom_t
expand_prefix(rdf_db * db,atom_t alias,atom_t local)1011 expand_prefix(rdf_db *db, atom_t alias, atom_t local)
1012 { prefix *p = lookup_prefix(db, alias);
1013 
1014   if ( p )
1015   { atom_info ai = {0};
1016     ai.handle = local;
1017     fill_atom_info(&ai);
1018     atom_t uri;
1019 
1020     if ( ai.text.a && p->uri.text.a )
1021     { char buf[256];
1022       size_t len = ai.text.length + p->uri.text.length;
1023       char *a = len <= sizeof(buf) ? buf : malloc(len);
1024 
1025       if ( !len )
1026 	return (atom_t)0;
1027       memcpy(a, p->uri.text.a, p->uri.text.length);
1028       memcpy(&a[p->uri.text.length], ai.text.a, ai.text.length);
1029 
1030       uri = PL_new_atom_nchars(len, a);
1031       if ( a != buf )
1032 	free(a);
1033     } else
1034     { wchar_t buf[256];
1035       size_t len = ai.text.length + p->uri.text.length;
1036       wchar_t *w = len <= sizeof(buf)/sizeof(wchar_t)
1037 				   ? buf
1038 				   : malloc(len*sizeof(wchar_t));
1039 
1040       if ( !len )
1041 	return (atom_t)0;
1042       w = add_text(w, &p->uri.text);
1043       w = add_text(w, &ai.text);
1044 
1045       uri = PL_new_atom_wchars(len, w);
1046       if ( w != buf )
1047 	free(w);
1048     }
1049 
1050     return uri;
1051   }
1052 
1053   return (atom_t)0;
1054 }
1055 
1056 
1057 
1058 #ifdef COMPACT
1059 
1060 		 /*******************************
1061 		 *	   TRIPLE ARRAY		*
1062 		 *******************************/
1063 
1064 static triple_element *
alloc_array_slice(size_t count,triple_element ** last)1065 alloc_array_slice(size_t count, triple_element **last)
1066 { size_t bytes = count*sizeof(triple_element);
1067   triple_element *slice = malloc(bytes);
1068 
1069   if ( slice )
1070   { triple_element *end = slice+count-1;
1071     triple_element *e, *n;
1072 
1073     for(e=slice; e<end; e=n)
1074     { n = e+1;
1075       e->fnext = n;
1076     }
1077     e->fnext = NULL;
1078 
1079     if ( last )
1080       *last = e;
1081   }
1082 
1083   return slice;
1084 }
1085 
1086 static void
free_array_slice(triple_array * a,triple_element * list,triple_element * last)1087 free_array_slice(triple_array *a, triple_element *list, triple_element *last)
1088 { triple_element *o;
1089 
1090   do
1091   { o = a->freelist;
1092     last->fnext = o;
1093   } while ( !COMPARE_AND_SWAP_PTR(&a->freelist, o, list) );
1094 }
1095 
1096 static int
init_triple_array(rdf_db * db)1097 init_triple_array(rdf_db *db)
1098 { triple_array *a = &db->triple_array;
1099   triple_element *slice = alloc_array_slice(TRIPLE_ARRAY_PREINIT, NULL);
1100   int i;
1101 
1102   for(i=0; i<MSB(TRIPLE_ARRAY_PREINIT); i++)
1103     a->blocks[i] = slice;
1104 
1105   a->freelist = slice->fnext;		/* simply ignore the first for id>0 */
1106   a->preinit  = TRIPLE_ARRAY_PREINIT;
1107   a->size     = TRIPLE_ARRAY_PREINIT;
1108 
1109   return TRUE;
1110 }
1111 
1112 static void
destroy_triple_array(rdf_db * db)1113 destroy_triple_array(rdf_db *db)
1114 { triple_array *a = &db->triple_array;
1115   int i;
1116 
1117   free(a->blocks[0]);
1118   for(i=MSB(a->preinit); i<MSB(a->size); i++)
1119   { triple_element *e = a->blocks[i];
1120 
1121     e += 1<<(i-1);
1122     free(e);
1123   }
1124   memset(a, 0, sizeof(*a));
1125 }
1126 
1127 static void
reset_triple_array(rdf_db * db)1128 reset_triple_array(rdf_db *db)
1129 { destroy_triple_array(db);
1130   init_triple_array(db);
1131 }
1132 
1133 static void
resize_triple_array(rdf_db * db)1134 resize_triple_array(rdf_db *db)
1135 { triple_array *a = &db->triple_array;
1136   int i = MSB(a->size);
1137   triple_element *last;
1138   triple_element *slice = alloc_array_slice(a->size, &last);
1139 
1140   if ( slice )
1141   { a->blocks[i] = slice - a->size;
1142     a->size *= 2;
1143     free_array_slice(a, slice, last);
1144   }
1145 }
1146 
1147 static triple_element *
fetch_triple_element(rdf_db * db,triple_id id)1148 fetch_triple_element(rdf_db *db, triple_id id)
1149 { return &db->triple_array.blocks[MSB(id)][id];
1150 }
1151 
1152 /* assign a new triple a place in the triple array
1153 */
1154 
1155 static triple_id
register_triple(rdf_db * db,triple * t)1156 register_triple(rdf_db *db, triple *t)
1157 { triple_array *a = &db->triple_array;
1158   triple_element *e;
1159   size_t slice_size;
1160   int i;
1161 
1162   do
1163   { if ( !(e=a->freelist) )
1164     { simpleMutexLock(&db->locks.misc);
1165       while ( !(e=a->freelist) )
1166 	resize_triple_array(db);
1167       simpleMutexUnlock(&db->locks.misc);
1168     }
1169   } while ( !COMPARE_AND_SWAP_PTR(&a->freelist, e, e->fnext) );
1170 
1171   e->triple = t;
1172 
1173   for(i=1,slice_size=1; i<MAX_TBLOCKS; i++,slice_size*=2)
1174   { if ( e >= a->blocks[i]+slice_size &&
1175 	 e <  a->blocks[i]+slice_size*2 )
1176     { t->id = e - a->blocks[i];
1177 
1178       assert(fetch_triple(db, t->id) == t);
1179       return t->id;
1180     }
1181   }
1182 
1183   assert(0);
1184   return 0;
1185 }
1186 
1187 static void
unregister_triple(rdf_db * db,triple * t)1188 unregister_triple(rdf_db *db, triple *t)
1189 { if ( t->id != TRIPLE_NO_ID )
1190   { triple_element *e = fetch_triple_element(db, t->id);
1191 
1192     t->id = TRIPLE_NO_ID;
1193     free_array_slice(&db->triple_array, e, e);
1194   }
1195 }
1196 
1197 static triple *
triple_follow_hash(rdf_db * db,triple * t,int icol)1198 triple_follow_hash(rdf_db *db, triple *t, int icol)
1199 { triple_id nid = t->tp.next[icol];
1200 
1201   return fetch_triple(db, nid);
1202 }
1203 
1204 #define T_ID(t) ((t) ? (t)->id : 0)
1205 
1206 #else /*COMPACT*/
1207 
1208 #define init_triple_array(db) (void)0
1209 #define reset_triple_array(db) (void)0
1210 #define register_triple(db, t) (void)0
1211 #define unregister_triple(db, t) (void)0
1212 #define triple_follow_hash(db, t, icol) ((t)->tp.next[icol])
1213 #define T_ID(t) (t)
1214 
1215 #endif /*COMPACT*/
1216 
1217 static void
finalize_triple(void * data,void * client)1218 finalize_triple(void *data, void *client)
1219 { triple *t = data;
1220   rdf_db *db = client;
1221 
1222   if ( !db->resetting )
1223   { unlock_atoms(db, t);
1224     if ( t->object_is_literal && t->object.literal )
1225       free_literal(db, t->object.literal);
1226 #ifdef COMPACT
1227       unregister_triple(db, t);
1228 #endif
1229   }
1230   SECURE(memset(t, 0, sizeof(*t)));
1231   TMAGIC(t, T_FREED);
1232   ATOMIC_SUB(&db->lingering, 1);
1233 }
1234 
1235 
1236 		 /*******************************
1237 		 *	  TRIPLE WALKER		*
1238 		 *******************************/
1239 
1240 /* init_triple_walker() and next_triple() are the primitives to walk indexed
1241    triples.  The pattern is:
1242 
1243 	triple_walker tw;
1244 
1245 	init_triple_walker(&tw, db, pattern, index);
1246 	while((t=next_triple(tw)))
1247 	  <do your job>
1248 
1249   TBD: Get the generation into this story.  Most likely it is better to
1250   deal with this in this low-level loop then outside. We will handle
1251   this in the next cycle.
1252 */
1253 
1254 static void
init_triple_walker(triple_walker * tw,rdf_db * db,triple * pattern,int which)1255 init_triple_walker(triple_walker *tw, rdf_db *db, triple *pattern, int which)
1256 { tw->unbounded_hash = triple_hash_key(pattern, which);
1257   tw->current	     = NULL;
1258   tw->icol	     = ICOL(which);
1259   tw->db	     = db;
1260   if ( !tw->db->hash[tw->icol].created )
1261     create_triple_hashes(db, 1, &tw->icol);
1262   tw->bcount	     = tw->db->hash[tw->icol].bucket_count_epoch;
1263 }
1264 
1265 
1266 static void
init_triple_literal_walker(triple_walker * tw,rdf_db * db,triple * pattern,int which,unsigned int hash)1267 init_triple_literal_walker(triple_walker *tw, rdf_db *db,
1268 			   triple *pattern, int which, unsigned int hash)
1269 { tw->unbounded_hash = hash;
1270   tw->current	     = NULL;
1271   tw->icol	     = ICOL(which);
1272   tw->db	     = db;
1273   if ( !tw->db->hash[tw->icol].created )
1274     create_triple_hashes(db, 1, &tw->icol);
1275   tw->bcount	     = tw->db->hash[tw->icol].bucket_count_epoch;
1276 }
1277 
1278 
1279 static void
rewind_triple_walker(triple_walker * tw)1280 rewind_triple_walker(triple_walker *tw)
1281 { tw->bcount  = tw->db->hash[tw->icol].bucket_count_epoch;
1282   tw->current = NULL;
1283 }
1284 
1285 
1286 static triple *
next_hash_triple(triple_walker * tw)1287 next_hash_triple(triple_walker *tw)
1288 { triple *rc;
1289   triple_hash *hash = &tw->db->hash[tw->icol];
1290 
1291   if ( tw->bcount <= hash->bucket_count )
1292   { do
1293     { int entry = tw->unbounded_hash % tw->bcount;
1294       triple_bucket *bucket = &hash->blocks[MSB(entry)][entry];
1295 
1296       rc = fetch_triple(tw->db, bucket->head);
1297       do
1298       { tw->bcount *= 2;
1299       } while ( tw->bcount <= hash->bucket_count &&
1300 		tw->unbounded_hash % tw->bcount == entry );
1301     } while(!rc && tw->bcount <= hash->bucket_count );
1302 
1303     if ( rc )
1304       tw->current = triple_follow_hash(tw->db, rc, tw->icol);
1305   } else
1306   { rc = NULL;
1307   }
1308 
1309   return rc;
1310 }
1311 
1312 
1313 static inline triple *
next_triple(triple_walker * tw)1314 next_triple(triple_walker *tw)
1315 { triple *rc;
1316 
1317   if ( (rc=tw->current) )
1318   { tw->current = triple_follow_hash(tw->db, rc, tw->icol);
1319 
1320     return rc;
1321   } else
1322   { return next_hash_triple(tw);
1323   }
1324 }
1325 
1326 
1327 static inline void
destroy_triple_walker(rdf_db * db,triple_walker * tw)1328 destroy_triple_walker(rdf_db *db, triple_walker *tw)
1329 {
1330 }
1331 
1332 
1333 		 /*******************************
1334 		 *	    PREDICATES		*
1335 		 *******************************/
1336 
1337 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1338 Predicates are represented as first class   citizens  for three reasons:
1339 quickly  answer  on  the  transitive   rdfs:subPropertyOf  relation  for
1340 rdf_hash/3,  keep  track  of  statistics  that   are  useful  for  query
1341 optimization  (#triples,  branching   factor)    and   keep   properties
1342 (inverse/transitive).
1343 
1344 To answer the rdfs:subPropertyOf quickly,   predicates  are organised in
1345 `clouds', where a cloud defines a   set  of predicates connected through
1346 rdfs:subPropertyOf triples. The cloud numbers  its members and maintains
1347 a bit-matrix that contains the closure  of the reachability. Initially a
1348 predicate has a simple cloud of size 1. merge_clouds() and split_cloud()
1349 deals with adding  and  deleting   rdfs:subPropertyOf  relations.  These
1350 operations try to modify the clouds that have   no triples, so it can be
1351 done without a rehash. If this fails, the predicates keep their own hash
1352 to make search without rdfs:subPropertyOf  still   possible  (so  we can
1353 avoid frequent updates while loading triples),   sets  the cloud `dirty'
1354 flag and the DB's need_update flag. Queries that need rdfs:subPropertyOf
1355 find the need_update flag,  which   calls  organise_predicates(),  which
1356 cause a rehash if some predicates  have   changed  hash-code  to the new
1357 cloud they have become part of.
1358 
1359 TBD: We can do a partial re-hash in that case!
1360 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1361 
1362 static int
init_pred_table(rdf_db * db)1363 init_pred_table(rdf_db *db)
1364 { size_t bytes = sizeof(predicate**)*INITIAL_PREDICATE_TABLE_SIZE;
1365   predicate **p = PL_malloc_uncollectable(bytes);
1366   int i, count = INITIAL_PREDICATE_TABLE_SIZE;
1367 
1368   memset(p, 0, bytes);
1369   for(i=0; i<MSB(count); i++)
1370     db->predicates.blocks[i] = p;
1371 
1372   db->predicates.bucket_count       = count;
1373   db->predicates.bucket_count_epoch = count;
1374   db->predicates.count              = 0;
1375 
1376   return TRUE;
1377 }
1378 
1379 
1380 static int
resize_pred_table(rdf_db * db)1381 resize_pred_table(rdf_db *db)
1382 { int i = MSB(db->predicates.bucket_count);
1383   size_t bytes  = sizeof(predicate**)*db->predicates.bucket_count;
1384   predicate **p = PL_malloc_uncollectable(bytes);
1385 
1386   memset(p, 0, bytes);
1387   db->predicates.blocks[i] = p-db->predicates.bucket_count;
1388   db->predicates.bucket_count *= 2;
1389   DEBUG(1, Sdprintf("Resized predicate table to %ld\n",
1390 		    (long)db->predicates.bucket_count));
1391 
1392   return TRUE;
1393 }
1394 
1395 
1396 typedef struct pred_walker
1397 { rdf_db       *db;			/* RDF DB */
1398   atom_t	name;			/* Name of the predicate */
1399   size_t	unbounded_hash;		/* Atom's hash */
1400   size_t	bcount;			/* current bucket count */
1401   predicate    *current;		/* current location */
1402 } pred_walker;
1403 
1404 
1405 static void
init_predicate_walker(pred_walker * pw,rdf_db * db,atom_t name)1406 init_predicate_walker(pred_walker *pw, rdf_db *db, atom_t name)
1407 { pw->db	     = db;
1408   pw->name	     = name;
1409   pw->unbounded_hash = atom_hash(name, MURMUR_SEED);
1410   pw->bcount	     = db->predicates.bucket_count_epoch;
1411   pw->current	     = NULL;
1412 }
1413 
1414 static predicate*
next_predicate(pred_walker * pw)1415 next_predicate(pred_walker *pw)
1416 { predicate *p;
1417 
1418   if ( pw->current )
1419   { p = pw->current;
1420     pw->current = p->next;
1421   } else if ( pw->bcount <= pw->db->predicates.bucket_count )
1422   { do
1423     { int entry = pw->unbounded_hash % pw->bcount;
1424       p = pw->db->predicates.blocks[MSB(entry)][entry];
1425       pw->bcount *= 2;
1426     } while(!p && pw->bcount <= pw->db->predicates.bucket_count );
1427 
1428     if ( p )
1429       pw->current = p->next;
1430   } else
1431     return NULL;
1432 
1433   return p;
1434 }
1435 
1436 
1437 static predicate *
existing_predicate(rdf_db * db,atom_t name)1438 existing_predicate(rdf_db *db, atom_t name)
1439 { pred_walker pw;
1440   predicate *p;
1441 
1442   init_predicate_walker(&pw, db, name);
1443   while((p=next_predicate(&pw)))
1444   { if ( p->name == name )
1445       return p;
1446   }
1447 
1448   return NULL;
1449 }
1450 
1451 
1452 predicate *
lookup_predicate(rdf_db * db,atom_t name)1453 lookup_predicate(rdf_db *db, atom_t name)
1454 { predicate *p, **pp;
1455   predicate_cloud *cp;
1456   int entry;
1457 
1458   if ( (p=existing_predicate(db, name)) )
1459     return p;
1460 
1461   LOCK_MISC(db);
1462   if ( (p=existing_predicate(db, name)) )
1463   { UNLOCK_MISC(db);
1464     return p;
1465   }
1466 
1467   p = rdf_malloc(db, sizeof(*p));
1468   memset(p, 0, sizeof(*p));
1469   p->name = name;
1470   cp = new_predicate_cloud(db, &p, 1);
1471   p->hash = cp->hash;
1472   PL_register_atom(name);
1473   if ( db->predicates.count > db->predicates.bucket_count )
1474     resize_pred_table(db);
1475   entry = atom_hash(name, MURMUR_SEED) % db->predicates.bucket_count;
1476   pp = &db->predicates.blocks[MSB(entry)][entry];
1477   p->next = *pp;
1478   *pp = p;
1479   db->predicates.count++;
1480   DEBUG(5, Sdprintf("Pred %s (count = %d)\n",
1481 		    PL_atom_chars(name), db->predicates.count));
1482   UNLOCK_MISC(db);
1483 
1484   return p;
1485 }
1486 
1487 
1488 static const char *
pname(predicate * p)1489 pname(predicate *p)
1490 { if ( p->name )
1491     return PL_atom_chars(p->name);
1492   else
1493   { static char *ring[10];
1494     static int ri = 0;
1495     char buf[25];
1496     char *r;
1497 
1498     Ssprintf(buf, "__D%p", p);
1499     ring[ri++] = r = strdup(buf);
1500     if ( ri == 10 )
1501     { ri = 0;
1502       free(ring[ri]);
1503     }
1504 
1505     return (const char*)r;
1506   }
1507 }
1508 
1509 
1510 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1511 Keep track of the triple count.
1512 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1513 
1514 static inline void
register_predicate(rdf_db * db,triple * t)1515 register_predicate(rdf_db *db, triple *t)
1516 { ATOMIC_ADD(&t->predicate.r->triple_count, 1);
1517 }
1518 
1519 
1520 static inline void
unregister_predicate(rdf_db * db,triple * t)1521 unregister_predicate(rdf_db *db, triple *t)
1522 { ATOMIC_SUB(&t->predicate.r->triple_count, 1);
1523 }
1524 
1525 
1526 		 /*******************************
1527 		 *	 PREDICATE CLOUDS	*
1528 		 *******************************/
1529 
1530 static predicate_cloud *
new_predicate_cloud(rdf_db * db,predicate ** p,size_t count)1531 new_predicate_cloud(rdf_db *db, predicate **p, size_t count)
1532 { predicate_cloud *cloud = rdf_malloc(db, sizeof(*cloud));
1533 
1534   memset(cloud, 0, sizeof(*cloud));
1535   cloud->hash = rdf_murmer_hash(&cloud, sizeof(cloud), PRED_MURMUR_SEED);
1536   if ( count )
1537   { int i;
1538     predicate **p2;
1539 
1540     cloud->size = count;
1541     cloud->members = rdf_malloc(db, sizeof(predicate*)*count);
1542     memcpy(cloud->members, p, sizeof(predicate*)*count);
1543 
1544     for(i=0, p2=cloud->members; i<cloud->size; i++, p2++)
1545     { (*p2)->cloud = cloud;
1546       (*p2)->label = i;
1547     }
1548   }
1549 
1550   return cloud;
1551 }
1552 
1553 
1554 static void
finalize_cloud(void * data,void * client)1555 finalize_cloud(void *data, void *client)
1556 { rdf_db *db = client;
1557   predicate_cloud *cloud = data;
1558   sub_p_matrix *rm, *rm2;
1559 
1560   if ( cloud->members )
1561     rdf_free(db, cloud->members, sizeof(predicate*)*cloud->size);
1562 
1563   for(rm=cloud->reachable; rm; rm=rm2)
1564   { rm2 = rm->older;
1565 
1566     free_reachability_matrix(db, rm);
1567   }
1568 }
1569 
1570 
1571 static void
free_predicate_cloud(rdf_db * db,predicate_cloud * cloud)1572 free_predicate_cloud(rdf_db *db, predicate_cloud *cloud)
1573 { finalize_cloud(cloud, db);
1574 
1575   rdf_free(db, cloud, sizeof(*cloud));
1576 }
1577 
1578 
1579 static size_t
triples_in_predicate_cloud(predicate_cloud * cloud)1580 triples_in_predicate_cloud(predicate_cloud *cloud)
1581 { size_t triples = 0;
1582   predicate **p;
1583   int i;
1584 
1585   for(i=0, p=cloud->members; i<cloud->size; i++, p++)
1586     triples += (*p)->triple_count;
1587 
1588   return triples;
1589 }
1590 
1591 
1592 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1593 gc_cloud() removes old reachability matrices.   As  the query generation
1594 has passed, we can immediately remove the  old bitmap. We must leave the
1595 sub_p_matrix struct to GC as someone might be walking the chain.
1596 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1597 
1598 static void
gc_cloud(rdf_db * db,predicate_cloud * cloud,gen_t gen)1599 gc_cloud(rdf_db *db, predicate_cloud *cloud, gen_t gen)
1600 { sub_p_matrix *rm, *older;
1601   sub_p_matrix *prev = NULL;
1602 
1603   for(rm=cloud->reachable; rm; rm=older)
1604   { older = rm->older;
1605 
1606     if ( rm->lifespan.died < gen )
1607     { if ( prev )
1608       { prev->older = older;
1609       } else
1610       { simpleMutexLock(&db->locks.misc);   /* sync with */
1611 	cloud->reachable = older;	    /* create_reachability_matrix() */
1612 	simpleMutexUnlock(&db->locks.misc);
1613       }
1614 
1615       free_bitmatrix(db, rm->matrix);
1616       rm->matrix = NULL;		    /* Clean to avoid false pointers */
1617       memset(&rm->lifespan, 0, sizeof(rm->lifespan));
1618       deferred_free(&db->defer_clouds, rm);
1619     } else
1620     { prev = rm;
1621     }
1622   }
1623 }
1624 
1625 
1626 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1627 GC all clouds. We walk the predicates and   keep  a flag on the cloud in
1628 which GC run it was collected to avoid collecting a cloud multiple times
1629 in the same GC run. Alternatively,  we   could  keep  a list of possibly
1630 dirty clouds, but that is more complicated and most likely not worth the
1631 trouble. Afterall, we might walk  many   predicates  for few clouds, but
1632 generally the number of predicates is still small compared to the number
1633 of triples and thus the total cost in the GC process will be small.
1634 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1635 
1636 static int
gc_clouds(rdf_db * db,gen_t gen)1637 gc_clouds(rdf_db *db, gen_t gen)
1638 { int i;
1639   int gc_id = db->gc.count+1;
1640 
1641   enter_scan(&db->defer_all);
1642   for(i=0; i<db->predicates.bucket_count; i++)
1643   { predicate *p = db->predicates.blocks[MSB(i)][i];
1644 
1645     for( ; p; p = p->next )
1646     { if ( p->cloud->last_gc != gc_id )
1647       { p->cloud->last_gc = gc_id;
1648 
1649 	gc_cloud(db, p->cloud, gen);
1650 	if ( PL_handle_signals() < 0 )
1651 	  return -1;
1652       }
1653       gc_is_leaf(db, p, gen);
1654     }
1655   }
1656   exit_scan(&db->defer_all);
1657 
1658   return 0;
1659 }
1660 
1661 
1662 static void
invalidateReachability(predicate_cloud * cloud,query * q)1663 invalidateReachability(predicate_cloud *cloud, query *q)
1664 { sub_p_matrix *rm;
1665   gen_t gen_max = query_max_gen(q);
1666 
1667   for(rm=cloud->reachable; rm; rm=rm->older)
1668   { if ( rm->lifespan.died == gen_max )
1669       rm->lifespan.died = queryWriteGen(q);
1670   }
1671 }
1672 
1673 
1674 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1675 Append the predicates from cloud C2 to those of cloud C1.  There are two
1676 scenarios:
1677 
1678   - C2 has no triples.  We are in a writer lock.  As there are no
1679     triples for C2, queries cannot go wrong.
1680   - C2 has triples.  It is possible that queries with the predicate
1681     hash of C2 are in progress.  See comment at merge_clouds() for
1682     how this is handled.
1683 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1684 
1685 static predicate_cloud *
append_clouds(rdf_db * db,predicate_cloud * c1,predicate_cloud * c2,int update_hash)1686 append_clouds(rdf_db *db,
1687 	      predicate_cloud *c1, predicate_cloud *c2,
1688 	      int update_hash)
1689 { int i;
1690   predicate **new_members;
1691   predicate **old_members = c1->members;
1692 
1693   new_members = rdf_malloc(db, (c1->size+c2->size)*sizeof(predicate*));
1694   memcpy(&new_members[0],        c1->members, c1->size*sizeof(predicate*));
1695   memcpy(&new_members[c1->size], c2->members, c2->size*sizeof(predicate*));
1696   c1->members = new_members;
1697   deferred_free(&db->defer_clouds, old_members);
1698 
1699 					/* re-label the new ones */
1700   for(i=c1->size; i<c1->size+c2->size; i++)
1701   { predicate *p = c1->members[i];
1702 
1703     p->cloud = c1;
1704     p->label = i;
1705     if ( update_hash )
1706       p->hash = c1->hash;
1707   }
1708   c1->size += c2->size;
1709 
1710   if ( !update_hash )
1711   { size_t newc = 0;
1712 
1713     if ( c1->alt_hash_count )
1714       newc += c1->alt_hash_count;
1715     else
1716       newc++;
1717 
1718     if ( c2->alt_hash_count )
1719       newc += c2->alt_hash_count;
1720     else
1721       newc++;
1722 
1723     DEBUG(1, Sdprintf("Cloud %p: %d alt-hashes\n", c1, newc));
1724 
1725     if ( c1->alt_hashes )
1726     { unsigned int *new_hashes;
1727       unsigned int *old_hashes = c1->alt_hashes;
1728 
1729       new_hashes = rdf_malloc(db, newc*sizeof(unsigned int));
1730       memcpy(&new_hashes[0], c1->alt_hashes,
1731 	     c1->alt_hash_count*sizeof(unsigned int));
1732       MEMORY_BARRIER();
1733       c1->alt_hashes = new_hashes;
1734       deferred_free(&db->defer_clouds, old_hashes);
1735     } else
1736     { c1->alt_hashes = rdf_malloc(db, newc*sizeof(unsigned int));
1737       c1->alt_hashes[0] = c1->hash;
1738       MEMORY_BARRIER();
1739       c1->alt_hash_count = 1;
1740     }
1741 
1742     if ( c2->alt_hash_count )
1743     { memcpy(&c1->alt_hashes[c1->alt_hash_count],
1744 	     c2->alt_hashes, c2->alt_hash_count*sizeof(unsigned int));
1745     } else
1746     { c1->alt_hashes[c1->alt_hash_count] = c2->hash;
1747     }
1748     MEMORY_BARRIER();
1749     c1->alt_hash_count = newc;
1750   }
1751 
1752   deferred_finalize(&db->defer_clouds, c2,
1753 		    finalize_cloud, db);
1754 
1755   return c1;
1756 }
1757 
1758 
1759 /* merge two predicate clouds. Note that this code is only called
1760    from addSubPropertyOf().  If c1==c2, we added an rdfs:subPropertyOf
1761    between two predicates in the same cloud. we must still invalidate
1762    the matrix.
1763 */
1764 
1765 static predicate_cloud *
merge_clouds(rdf_db * db,predicate_cloud * c1,predicate_cloud * c2,query * q)1766 merge_clouds(rdf_db *db, predicate_cloud *c1, predicate_cloud *c2, query *q)
1767 { predicate_cloud *cloud;
1768 
1769   if ( c1 != c2 )
1770   { size_t tc1, tc2;
1771 
1772     if ( (tc1=triples_in_predicate_cloud(c1)) == 0 )
1773     { cloud = append_clouds(db, c2, c1, TRUE);
1774     } else if ( (tc2=triples_in_predicate_cloud(c2)) == 0 )
1775     { cloud = append_clouds(db, c1, c2, TRUE);
1776     } else
1777     { predicate_cloud *reindex;
1778 
1779       if ( tc2 < tc1 )
1780       { cloud = c1;
1781 	reindex = c2;
1782       } else
1783       { cloud = c2;
1784 	reindex = c1;
1785       }
1786 
1787       cloud = append_clouds(db, cloud, reindex, FALSE);
1788     }
1789   } else
1790   { cloud = c1;
1791   }
1792 
1793   invalidateReachability(cloud, q);
1794 
1795   return cloud;
1796 }
1797 
1798 
1799 static size_t
predicate_hash(predicate * p)1800 predicate_hash(predicate *p)
1801 { return p->hash;
1802 }
1803 
1804 
1805 static void
addSubPropertyOf(rdf_db * db,triple * t,query * q)1806 addSubPropertyOf(rdf_db *db, triple *t, query *q)
1807 { predicate *sub   = lookup_predicate(db, ID_ATOM(t->subject_id));
1808   predicate *super = lookup_predicate(db, t->object.resource);
1809 
1810   DEBUG(3, Sdprintf("addSubPropertyOf(%s, %s)\n",
1811 		    pname(sub), pname(super)));
1812 
1813   invalidate_is_leaf(super, q, TRUE);
1814 
1815   if ( add_list(db, &sub->subPropertyOf, super) )
1816   { add_list(db, &super->siblings, sub);
1817     merge_clouds(db, sub->cloud, super->cloud, q);
1818   } else
1819   { predicate_cloud *cloud;
1820 
1821     cloud = super->cloud;
1822     assert(cloud == sub->cloud);
1823 
1824     invalidateReachability(cloud, q);
1825   }
1826 }
1827 
1828 
1829 /* deleting an rdfs:subPropertyOf.  This is a bit naughty.  If the
1830    cloud is still connected we only need to refresh the reachability
1831    matrix.  Otherwise the cloud breaks in maximum two clusters.  We
1832    can decide to leave it as is, which is simpler to implement
1833    but harms indexing.
1834 
1835    TBD: If the cloud becomes disconnected, it may be split.
1836 */
1837 
1838 static void
delSubPropertyOf(rdf_db * db,triple * t,query * q)1839 delSubPropertyOf(rdf_db *db, triple *t, query *q)
1840 { predicate *sub   = lookup_predicate(db, ID_ATOM(t->subject_id));
1841   predicate *super = lookup_predicate(db, t->object.resource);
1842   predicate_cloud *cloud;
1843 
1844   DEBUG(3, Sdprintf("delSubPropertyOf(%s, %s)\n",
1845 		    pname(sub), pname(super)));
1846 
1847   invalidate_is_leaf(super, q, FALSE);
1848 
1849   if ( del_list(db, &sub->subPropertyOf, super) )
1850   { del_list(db, &super->siblings, sub);
1851   }
1852 
1853   cloud = super->cloud;
1854   assert(cloud == sub->cloud);
1855 
1856   invalidateReachability(cloud, q);
1857 }
1858 
1859 
1860 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1861 Reachability matrix.
1862 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1863 
1864 #define WBITSIZE (sizeof(int)*8)
1865 
1866 static size_t
byte_size_bitmatrix(size_t w,size_t h)1867 byte_size_bitmatrix(size_t w, size_t h)
1868 { size_t wsize = ((w*h)+WBITSIZE-1)/WBITSIZE;
1869 
1870   return (size_t)(intptr_t)&((bitmatrix*)NULL)->bits[wsize];
1871 }
1872 
1873 
1874 static bitmatrix *
alloc_bitmatrix(rdf_db * db,size_t w,size_t h)1875 alloc_bitmatrix(rdf_db *db, size_t w, size_t h)
1876 { size_t size = byte_size_bitmatrix(w, h);
1877   bitmatrix *m = rdf_malloc(db, size);
1878 
1879   memset(m, 0, size);
1880   m->width = w;
1881   m->heigth = h;
1882 
1883   return m;
1884 }
1885 
1886 
1887 static void
free_bitmatrix(rdf_db * db,bitmatrix * bm)1888 free_bitmatrix(rdf_db *db, bitmatrix *bm)
1889 { size_t size = byte_size_bitmatrix(bm->width, bm->heigth);
1890 
1891   rdf_free(db, bm, size);
1892 }
1893 
1894 
1895 #undef setbit				/* conflict in HPUX 11.23 */
1896 
1897 static void
setbit(bitmatrix * m,int i,int j)1898 setbit(bitmatrix *m, int i, int j)
1899 { size_t ij = m->width*i+j;
1900   size_t word = ij/WBITSIZE;
1901   int bit  = ij%WBITSIZE;
1902 
1903   m->bits[word] |= 1<<bit;
1904 }
1905 
1906 
1907 static int
testbit(bitmatrix * m,int i,int j)1908 testbit(bitmatrix *m, int i, int j)
1909 { size_t ij = m->width*i+j;
1910   size_t word = ij/WBITSIZE;
1911   int bit  = ij%WBITSIZE;
1912 
1913   return ((m->bits[word] & (1<<bit)) != 0);
1914 }
1915 
1916 
1917 static int
check_labels_predicate_cloud(predicate_cloud * cloud)1918 check_labels_predicate_cloud(predicate_cloud *cloud)
1919 { predicate **p;
1920   int i;
1921 
1922   for(i=0, p=cloud->members; i<cloud->size; i++, p++)
1923     assert((*p)->label == i);
1924 
1925   return i;
1926 }
1927 
1928 static void
update_valid(lifespan * valid,gen_t change)1929 update_valid(lifespan *valid, gen_t change)
1930 { if ( change < valid->died )
1931   { if ( valid->died <= GEN_MAX ||	/* both non-transaction */
1932 	 change > GEN_MAX )		/* both in transaction */
1933       valid->died = change;
1934   }
1935 }
1936 
1937 
1938 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1939 Match triple t against pattern p in query q. Update the died-property of
1940 valid if the triple matches now,  but   will  not  after some generation
1941 (i.e., it will die) or the triple must still be born.
1942 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1943 
1944 static triple *
matching_object_triple_until(rdf_db * db,triple * t,triple * p,query * q,unsigned flags,lifespan * valid)1945 matching_object_triple_until(rdf_db *db, triple *t, triple *p, query *q,
1946 			     unsigned flags, lifespan *valid)
1947 { triple *t2;
1948 
1949   if ( (t2=alive_triple(q, t)) )
1950   { if ( match_triples(db, t2, p, q, 0) &&
1951 	 !t2->object_is_literal )	/* object properties only */
1952     { if ( t2->lifespan.died != query_max_gen(q) )
1953       { DEBUG(1, Sdprintf("Limit lifespan due to dead: ");
1954 	      print_triple(t2, PRT_GEN|PRT_NL));
1955 	update_valid(valid, t2->lifespan.died);
1956       }
1957 
1958       return t2;
1959     }
1960   } else
1961   { t2 = deref_triple(db, t);		/* Dubious */
1962 
1963     if ( match_triples(db, t2, p, q, 0) &&
1964 	 !t2->object_is_literal )
1965     { if ( !t2->erased &&
1966 	   !born_lifespan(q, &t2->lifespan) )
1967       { DEBUG(1, Sdprintf("Limit lifespan due to new born: ");
1968 	      print_triple(t2, PRT_GEN|PRT_NL));
1969 	update_valid(valid, t2->lifespan.born);
1970       }
1971     }
1972   }
1973 
1974   return NULL;
1975 }
1976 
1977 
1978 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1979 fill_reachable()   computes   that    transitive     closure    of   the
1980 rdfs:subPropertyOf relation. In addition, it   maintains  the generation
1981 valid_until, which expresses  the  maximum   generation  until  when the
1982 reachability  matrix  is  valid.  This  is    needed  if  we  compute  a
1983 reachability matrix for an older generation.
1984 
1985 TBD: The code below probably doesn't  work properly inside a transaction
1986 due  to  the  complicated  generation  reasoning  there.  This  must  be
1987 clarified and cleaned.
1988 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1989 
1990 static void
fill_reachable(rdf_db * db,predicate_cloud * cloud,bitmatrix * bm,predicate * p0,predicate * p,query * q,lifespan * valid)1991 fill_reachable(rdf_db *db,
1992 	       predicate_cloud *cloud,
1993 	       bitmatrix *bm,
1994 	       predicate *p0, predicate *p,
1995 	       query *q,
1996 	       lifespan *valid)
1997 { if ( !testbit(bm, p0->label, p->label) )
1998   { triple pattern;
1999     triple *t;
2000     triple_walker tw;
2001 
2002     memset(&pattern, 0, sizeof(pattern));
2003 
2004     DEBUG(3, Sdprintf("    Reachable [%s (%d)]\n", pname(p), p->label));
2005     setbit(bm, p0->label, p->label);
2006     pattern.subject_id = ATOM_ID(p->name);
2007     pattern.predicate.r = existing_predicate(db, ATOM_subPropertyOf);
2008     init_triple_walker(&tw, db, &pattern, BY_SP);
2009     while((t=next_triple(&tw)))
2010     { triple *t2;
2011 
2012       if ( (t2=matching_object_triple_until(db, t, &pattern, q, 0, valid)) )
2013       { predicate *super;
2014 
2015 	super = lookup_predicate(db, t2->object.resource);
2016 	assert(super->cloud == cloud);
2017 	fill_reachable(db, cloud, bm, p0, super, q, valid);
2018       }
2019     }
2020   }
2021 }
2022 
2023 
2024 static int
is_transaction_start_gen(gen_t gen)2025 is_transaction_start_gen(gen_t gen)
2026 { return (gen-GEN_TBASE)%GEN_TNEST == 0;
2027 }
2028 
2029 
2030 static void
init_valid_lifespan(rdf_db * db,lifespan * span,query * q)2031 init_valid_lifespan(rdf_db *db, lifespan *span, query *q)
2032 { if ( q->transaction && !is_transaction_start_gen(q->tr_gen) )
2033   { span->born = q->tr_gen;
2034     span->died = query_max_gen(q);
2035     add_list(db, &q->transaction->transaction_data.lifespans, span);
2036   } else
2037   { span->born = q->rd_gen;
2038     span->died = GEN_MAX;
2039   }
2040 }
2041 
2042 
2043 
2044 static sub_p_matrix *
create_reachability_matrix(rdf_db * db,predicate_cloud * cloud,query * q)2045 create_reachability_matrix(rdf_db *db, predicate_cloud *cloud, query *q)
2046 { bitmatrix *m = alloc_bitmatrix(db, cloud->size, cloud->size);
2047   sub_p_matrix *rm = rdf_malloc(db, sizeof(*rm));
2048   predicate **p;
2049   int i;
2050 
2051   init_valid_lifespan(db, &rm->lifespan, q);
2052 
2053   DEBUG(1, { char buf[4][24];
2054 	     Sdprintf("Create matrix for q at %s/%s, valid %s..%s\n",
2055 		      gen_name(q->rd_gen, buf[0]),
2056 		      gen_name(q->tr_gen, buf[1]),
2057 		      gen_name(rm->lifespan.born, buf[2]),
2058 		      gen_name(rm->lifespan.died, buf[3]));
2059 	   });
2060 
2061   check_labels_predicate_cloud(cloud);
2062   for(i=0, p=cloud->members; i<cloud->size; i++, p++)
2063   { DEBUG(2, Sdprintf("Reachability for %s (%d)\n", pname(*p), (*p)->label));
2064 
2065     fill_reachable(db, cloud, m, *p, *p, q, &rm->lifespan);
2066   }
2067 
2068   DEBUG(1, { char buf[2][24];
2069 	     Sdprintf("Created matrix, valid %s..%s\n",
2070 		      gen_name(rm->lifespan.born, buf[0]),
2071 		      gen_name(rm->lifespan.died, buf[1]));
2072 	   });
2073 
2074   rm->matrix = m;
2075   simpleMutexLock(&db->locks.misc);		/* sync with gc_cloud() */
2076   rm->older = cloud->reachable;
2077   MEMORY_BARRIER();
2078   cloud->reachable = rm;
2079   simpleMutexUnlock(&db->locks.misc);
2080 
2081   return rm;
2082 }
2083 
2084 
2085 /* FIXME: we probably cannot guarantee these are not being
2086    accessed.  I.e., we must use GC lingering on them
2087 */
2088 
2089 static void
free_reachability_matrix(rdf_db * db,sub_p_matrix * rm)2090 free_reachability_matrix(rdf_db *db, sub_p_matrix *rm)
2091 { free_bitmatrix(db, rm->matrix);
2092 
2093   rdf_free(db, rm, sizeof(*rm));
2094 }
2095 
2096 
2097 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2098 isSubPropertyOf() is true if sub is an rdfs:subPropertyOf p (transitive)
2099 for  the  given  query  q.  If  two  predicates  are  connected  through
2100 rdfs:subPropertyOf, they belong to the same `cloud'. The cloud keeps one
2101 or more bitmatrices  with  the   entailment  of  all  rdfs:subPropertyOf
2102 triples. Each bitmatrix is  valid  during   a  certain  lifespan (set of
2103 generations).
2104 
2105 isSubPropertyOf() runs concurrently with updates and  must be careful in
2106 its  processing  to   deal   with    the   modifications   realised   by
2107 addSubPropertyOf() and delSubPropertyOf().  The  critical   path  is  if
2108 addSubPropertyOf() connects two clouds, both  having multiple predicates
2109 and both clouds have triples.
2110 
2111 It is solved as follows. Suppose cloud C2   is  merged into cloud C1, we
2112 take the following steps:
2113 
2114   - The predicates from C2 are added at the end of the ->members of C1.
2115     C1->size is updated.
2116     - This has no consequences for running queries that need the old
2117       entailment of the subPropertyOf anyway.
2118   - The cloud C2 gets ->merged_into set to C1
2119     - The cloud of a predicate is reached by following the ->merged_into
2120       chain. If such a link is followed, predicate->label (the index in
2121       the predicate cloud) is invalid and we must compute it.
2122   - For each member of C2
2123     - update <-label to the label in C1
2124       update <-cloud to C1
2125     - Leave C2 to Boehm-GC
2126   - Add the hash-key of C2 to the alt-hashes of C1.  Queries that
2127     involve sub-property on C1 must re-run the query with each
2128     alt-hash for that has a predicate that is a sub-property of
2129     the target.  TBD: find a good compromise between computing and
2130     storing yet additional closures.
2131 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2132 
2133 static predicate_cloud *
cloud_of(predicate * p,int * labelp)2134 cloud_of(predicate *p, int *labelp)
2135 { predicate_cloud *pc = p->cloud;
2136   int i;
2137 
2138   if ( !pc->merged_into )
2139   { *labelp = p->label;
2140     return pc;
2141   }
2142 
2143   while(!pc->merged_into)
2144     pc = pc->merged_into;
2145 
2146   for(i=0; i<pc->size; i++)
2147   { if ( pc->members[i] == p )
2148     { *labelp = i;
2149       return pc;
2150     }
2151   }
2152 
2153   assert(0);
2154   return 0;
2155 }
2156 
2157 
2158 static int
isSubPropertyOf(rdf_db * db,predicate * sub,predicate * p,query * q)2159 isSubPropertyOf(rdf_db *db, predicate *sub, predicate *p, query *q)
2160 { predicate_cloud *pc;
2161   int sub_label, p_label;
2162 
2163   assert(sub != p);
2164 
2165   pc = cloud_of(sub, &sub_label);
2166   if ( pc == cloud_of(p, &p_label) )
2167   { sub_p_matrix *rm;
2168     int max_label = (sub_label > p_label ? sub_label : p_label);
2169 
2170     for(rm=pc->reachable; rm; rm=rm->older)
2171     { if ( alive_lifespan(q, &rm->lifespan) &&
2172 	   max_label < rm->matrix->width )
2173 	return testbit(rm->matrix, sub_label, p_label);
2174     }
2175 
2176     if ( (rm = create_reachability_matrix(db, pc, q)) )
2177     { assert(alive_lifespan(q, &rm->lifespan));
2178       return testbit(rm->matrix, sub_label, p_label);
2179     } else
2180       assert(0);
2181   }
2182 
2183   return FALSE;
2184 }
2185 
2186 
2187 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2188 is_leaf_predicate()  is  true   if   p   has    no   children   in   the
2189 rdfs:subPropertyOf tree at query q. We cache this information.
2190 
2191 FIXME: Note that this code is subject to  race conditions. If we want to
2192 avoid that without using locks, we must  put the validity information in
2193 a separate object that is not modified.
2194 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2195 
2196 static int
is_leaf_predicate(rdf_db * db,predicate * p,query * q)2197 is_leaf_predicate(rdf_db *db, predicate *p, query *q)
2198 { is_leaf *data;
2199   triple pattern;
2200   triple_walker tw;
2201   triple *t;
2202 
2203   memset(&pattern, 0, sizeof(pattern));
2204 
2205   for( data=p->is_leaf; data; data=data->older )
2206   { if ( alive_lifespan(q, &data->lifespan) )
2207       return data->is_leaf;
2208   }
2209 
2210   data = rdf_malloc(db, sizeof(*data));
2211   init_valid_lifespan(db, &data->lifespan, q);
2212 
2213   if ( (pattern.predicate.r = existing_predicate(db, ATOM_subPropertyOf)) )
2214   { pattern.object.resource = p->name;
2215 
2216     init_triple_walker(&tw, db, &pattern, BY_PO);
2217     while((t=next_triple(&tw)))
2218     { triple *t2;
2219 
2220       if ( (t2=matching_object_triple_until(db, t, &pattern, q, 0,
2221 					    &data->lifespan)) )
2222       { data->is_leaf = FALSE;
2223 	break;
2224       } else
2225 	data->is_leaf = TRUE;
2226     }
2227   } else				/* rdfs:subPropertyOf doesn't exist */
2228   { data->is_leaf = TRUE;		/* so all preds are leafs  */
2229   }
2230 
2231   simpleMutexLock(&db->locks.misc);
2232   data->older = p->is_leaf;
2233   MEMORY_BARRIER();
2234   p->is_leaf = data;
2235   simpleMutexUnlock(&db->locks.misc);
2236 
2237   return data->is_leaf;
2238 }
2239 
2240 
2241 /* invalidate the is_leaf status if a sub-property is added/deleted.
2242    no need to do so if we add a child to a non-leaf.
2243 */
2244 
2245 static void
invalidate_is_leaf(predicate * p,query * q,int add)2246 invalidate_is_leaf(predicate *p, query *q, int add)
2247 { gen_t gen_max = query_max_gen(q);
2248   is_leaf *il;
2249 
2250   for(il=p->is_leaf; il; il=il->older)
2251   { if ( il->lifespan.died == gen_max )
2252     { if ( !(add && !il->is_leaf) )
2253 	il->lifespan.died = queryWriteGen(q);
2254     }
2255   }
2256 }
2257 
2258 
2259 static void
gc_is_leaf(rdf_db * db,predicate * p,gen_t gen)2260 gc_is_leaf(rdf_db *db, predicate *p, gen_t gen)
2261 { is_leaf *il, *older;
2262   is_leaf *prev = NULL;
2263 
2264   for(il = p->is_leaf; il; il=older)
2265   { older = il->older;
2266 
2267     if ( il->lifespan.died < gen )
2268     { if ( prev )
2269       { prev->older = older;
2270       } else
2271       { simpleMutexLock(&db->locks.misc);   /* sync with */
2272 	p->is_leaf = older;		    /* is_leaf_predicate() */
2273 	simpleMutexUnlock(&db->locks.misc);
2274       }
2275 
2276       memset(&il->lifespan, 0, sizeof(il->lifespan));
2277       deferred_free(&db->defer_clouds, il);
2278     } else
2279     { prev = il;
2280     }
2281   }
2282 }
2283 
2284 
2285 static void
free_is_leaf(rdf_db * db,predicate * p)2286 free_is_leaf(rdf_db *db, predicate *p)
2287 { is_leaf *il, *older;
2288 
2289   for(il = p->is_leaf; il; il=older)
2290   { older = il->older;
2291 
2292     rdf_free(db, il, sizeof(*il));
2293   }
2294 
2295   p->is_leaf = NULL;
2296 }
2297 
2298 
2299 		 /*******************************
2300 		 *   PRINT PREDICATE HIERARCHY	*
2301 		 *******************************/
2302 
2303 static int
check_predicate_cloud(predicate_cloud * c)2304 check_predicate_cloud(predicate_cloud *c)
2305 { predicate **pp;
2306   int errors = 0;
2307   int i;
2308 
2309   for(i=0, pp=c->members; i<c->size; i++, pp++)
2310   { predicate *p = *pp;
2311 
2312     if ( p->label != i )
2313     { Sdprintf("Wrong label for %s (%d != %d\n", pname(p), i, p->label);
2314       errors++;
2315     }
2316     if ( p->hash != c->hash )
2317     { Sdprintf("Hash of %s doesn't match cloud hash\n", pname(p));
2318       errors++;				/* this is now normal! */
2319     }
2320     if ( p->cloud != c )
2321     { Sdprintf("Wrong cloud of %s\n", pname(p));
2322       errors++;
2323     }
2324   }
2325 
2326   return errors;
2327 }
2328 
2329 
2330 static void
print_reachability_cloud(rdf_db * db,predicate * p,int all)2331 print_reachability_cloud(rdf_db *db, predicate *p, int all)
2332 { int x, y;
2333   predicate_cloud *cloud = p->cloud;
2334   sub_p_matrix *rm;
2335   query *q;
2336 
2337   Sdprintf("Cloud has %d members, hash = 0x%x\n", cloud->size, cloud->hash);
2338   check_predicate_cloud(cloud);
2339 
2340   if ( !(q = open_query(db)) )
2341   { Sdprintf("No more open queries\n");
2342     return;
2343   }
2344 
2345   for(rm=cloud->reachable; rm; rm=rm->older)
2346   { char b[2][24];
2347 
2348     if ( !all && !alive_lifespan(q, &rm->lifespan) )
2349       continue;
2350 
2351     Sdprintf("\nReachability matrix: %s..%s (%s)\n  ",
2352 	     gen_name(rm->lifespan.born, b[0]),
2353 	     gen_name(rm->lifespan.died, b[1]),
2354 	     alive_lifespan(q, &rm->lifespan) ? "alive" : "dead");
2355 
2356     for(x=0; x<rm->matrix->width; x++)
2357       Sdprintf("%d", x%10);
2358     Sdprintf("\n  ");
2359     for(y=0; y<rm->matrix->heigth; y++)
2360     { predicate *yp = cloud->members[y];
2361 
2362       for(x=0; x<rm->matrix->width; x++)
2363       { if ( testbit(rm->matrix, x, y) )
2364 	  Sdprintf("X");
2365 	else
2366 	  Sdprintf(".");
2367       }
2368 
2369       if ( predicate_hash(yp) == cloud->hash )
2370 	Sdprintf(" %2d %s\n  ", y, pname(yp));
2371       else
2372 	Sdprintf(" %2d %s (hash=0x%x)\n  ", y, pname(yp), predicate_hash(yp));
2373       assert(cloud->members[y]->label == y);
2374     }
2375   }
2376   close_query(q);
2377 }
2378 
2379 
2380 static foreign_t
rdf_print_predicate_cloud(term_t t,term_t all)2381 rdf_print_predicate_cloud(term_t t, term_t all)
2382 { predicate *p;
2383   rdf_db *db = rdf_current_db();
2384   int print_all;
2385 
2386   if ( !get_existing_predicate(db, t, &p) ||
2387        !PL_get_bool_ex(all, &print_all) )
2388     return FALSE;			/* error or no predicate */
2389 
2390   print_reachability_cloud(db, p, print_all);
2391 
2392   return TRUE;
2393 }
2394 
2395 
2396 
2397 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2398 Branching  factors  are  crucial  in  ordering    the  statements  of  a
2399 conjunction. These functions compute  the   average  branching factor in
2400 both directions ("subject --> P  -->  object"   and  "object  -->  P -->
2401 subject") by determining the number of unique   values at either side of
2402 the predicate. This number  is  only   recomputed  if  it  is considered
2403 `dirty'.
2404 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2405 
2406 static int
update_predicate_counts(rdf_db * db,predicate * p,int which,query * q)2407 update_predicate_counts(rdf_db *db, predicate *p, int which, query *q)
2408 { size_t total = 0;
2409 
2410   if ( which == DISTINCT_DIRECT )
2411   { size_t changed;
2412 
2413     if ( p->triple_count >= p->distinct_updated[DISTINCT_DIRECT] )
2414       changed = p->triple_count - p->distinct_updated[DISTINCT_DIRECT];
2415     else
2416       changed = p->distinct_updated[DISTINCT_DIRECT] - p->triple_count;
2417 
2418     if ( changed < p->distinct_updated[DISTINCT_DIRECT] )
2419       return TRUE;
2420 
2421     if ( p->triple_count == 0 )
2422     { p->distinct_count[which]    = 0;
2423       p->distinct_subjects[which] = 0;
2424       p->distinct_objects[which]  = 0;
2425 
2426       return TRUE;
2427     }
2428   } else
2429   { size_t changed = db->queries.generation - p->distinct_updated[DISTINCT_SUB];
2430 
2431     if ( changed < p->distinct_count[DISTINCT_SUB] )
2432       return TRUE;
2433   }
2434 
2435   { atomset subject_set;
2436     atomset object_set;
2437     triple t;
2438     triple *byp;
2439     triple_walker tw;
2440 
2441     memset(&t, 0, sizeof(t));
2442     t.predicate.r = p;
2443     t.indexed |= BY_P;
2444 
2445     init_atomset(&subject_set);
2446     init_atomset(&object_set);
2447     init_triple_walker(&tw, db, &t, t.indexed);
2448     while((byp=next_triple(&tw)))
2449     { if ( byp->lifespan.died == GEN_MAX && !byp->is_duplicate )
2450       { if ( byp->predicate.r == p ||
2451 	     (which != DISTINCT_DIRECT &&
2452 	      isSubPropertyOf(db, byp->predicate.r, p, q)) )
2453 	{ total++;
2454 	  add_atomset(&subject_set, ID_ATOM(byp->subject_id));
2455 	  add_atomset(&object_set, object_hash(byp)); /* NOTE: not exact! */
2456 	}
2457       }
2458     }
2459 
2460     p->distinct_count[which]    = total;
2461     p->distinct_subjects[which] = subject_set.count;
2462     p->distinct_objects[which]  = object_set.count;
2463 
2464     destroy_atomset(&subject_set);
2465     destroy_atomset(&object_set);
2466 
2467     if ( which == DISTINCT_DIRECT )
2468       p->distinct_updated[DISTINCT_DIRECT] = total;
2469     else
2470       p->distinct_updated[DISTINCT_SUB] = db->queries.generation;
2471 
2472     DEBUG(1, Sdprintf("%s: distinct subjects (%s): %ld, objects: %ld\n",
2473 		      PL_atom_chars(p->name),
2474 		      (which == DISTINCT_DIRECT ? "rdf" : "rdfs"),
2475 		      p->distinct_subjects[which],
2476 		      p->distinct_objects[which]));
2477   }
2478 
2479   return TRUE;
2480 }
2481 
2482 
2483 static void
invalidate_distinct_counts(rdf_db * db)2484 invalidate_distinct_counts(rdf_db *db)
2485 { int i;
2486 
2487   for(i=0; i<db->predicates.bucket_count; i++)
2488   { predicate *p = db->predicates.blocks[MSB(i)][i];
2489 
2490     for( ; p; p = p->next )
2491     { p->distinct_updated[DISTINCT_SUB] = 0;
2492       p->distinct_count[DISTINCT_SUB] = 0;
2493       p->distinct_subjects[DISTINCT_SUB] = 0;
2494       p->distinct_objects[DISTINCT_SUB] = 0;
2495     }
2496   }
2497 }
2498 
2499 
2500 static double
subject_branch_factor(rdf_db * db,predicate * p,query * q,int which)2501 subject_branch_factor(rdf_db *db, predicate *p, query *q, int which)
2502 { if ( !update_predicate_counts(db, p, which, q) )
2503     return FALSE;
2504 
2505   if ( p->distinct_subjects[which] == 0 )
2506     return 0.0;				/* 0 --> 0 */
2507 
2508   return (double)p->distinct_count[which] /
2509          (double)p->distinct_subjects[which];
2510 }
2511 
2512 
2513 static double
object_branch_factor(rdf_db * db,predicate * p,query * q,int which)2514 object_branch_factor(rdf_db *db, predicate *p, query *q, int which)
2515 { if ( !update_predicate_counts(db, p, which, q) )
2516     return FALSE;
2517 
2518   if ( p->distinct_objects[which] == 0 )
2519     return 0.0;				/* 0 --> 0 */
2520 
2521   return (double)p->distinct_count[which] /
2522          (double)p->distinct_objects[which];
2523 }
2524 
2525 
2526 
2527 
2528 		 /*******************************
2529 		 *	   NAMED GRAPHS		*
2530 		 *******************************/
2531 
2532 /* MT: all calls must be locked
2533 */
2534 
2535 static int
init_graph_table(rdf_db * db)2536 init_graph_table(rdf_db *db)
2537 { size_t bytes = sizeof(graph**)*INITIAL_GRAPH_TABLE_SIZE;
2538   graph **p = PL_malloc_uncollectable(bytes);
2539   int i, count = INITIAL_GRAPH_TABLE_SIZE;
2540 
2541   memset(p, 0, bytes);
2542   for(i=0; i<MSB(count); i++)
2543     db->graphs.blocks[i] = p;
2544 
2545   db->graphs.bucket_count       = count;
2546   db->graphs.bucket_count_epoch = count;
2547   db->graphs.count              = 0;
2548   db->graphs.erased             = 0;
2549 
2550   return TRUE;
2551 }
2552 
2553 
2554 static int
resize_graph_table(rdf_db * db)2555 resize_graph_table(rdf_db *db)
2556 { int i = MSB(db->graphs.bucket_count);
2557   size_t bytes  = sizeof(graph**)*db->graphs.bucket_count;
2558   graph **p = PL_malloc_uncollectable(bytes);
2559 
2560   memset(p, 0, bytes);
2561   db->graphs.blocks[i] = p-db->graphs.bucket_count;
2562   db->graphs.bucket_count *= 2;
2563   DEBUG(1, Sdprintf("Resized graph table to %ld\n",
2564 		    (long)db->graphs.bucket_count));
2565 
2566   return TRUE;
2567 }
2568 
2569 
2570 typedef struct graph_walker
2571 { rdf_db       *db;			/* RDF DB */
2572   atom_t	name;			/* Name of the graph */
2573   size_t	unbounded_hash;		/* Atom's hash */
2574   size_t	bcount;			/* current bucket count */
2575   graph	       *current;		/* current location */
2576 } graph_walker;
2577 
2578 
2579 static void
init_graph_walker(graph_walker * gw,rdf_db * db,atom_t name)2580 init_graph_walker(graph_walker *gw, rdf_db *db, atom_t name)
2581 { gw->db	     = db;
2582   gw->name	     = name;
2583   gw->unbounded_hash = atom_hash(name, MURMUR_SEED);
2584   gw->bcount	     = db->graphs.bucket_count_epoch;
2585   gw->current	     = NULL;
2586 }
2587 
2588 static graph*
next_graph(graph_walker * gw)2589 next_graph(graph_walker *gw)
2590 { graph *g;
2591 
2592   if ( gw->current )
2593   { g = gw->current;
2594     gw->current = g->next;
2595   } else if ( gw->bcount <= gw->db->graphs.bucket_count )
2596   { do
2597     { int entry = gw->unbounded_hash % gw->bcount;
2598       g = gw->db->graphs.blocks[MSB(entry)][entry];
2599       gw->bcount *= 2;
2600     } while(!g && gw->bcount <= gw->db->graphs.bucket_count );
2601 
2602     if ( g )
2603       gw->current = g->next;
2604   } else
2605     return NULL;
2606 
2607   return g;
2608 }
2609 
2610 
2611 static graph *
existing_graph(rdf_db * db,atom_t name)2612 existing_graph(rdf_db *db, atom_t name)
2613 { graph_walker gw;
2614   graph *g;
2615 
2616   init_graph_walker(&gw, db, name);
2617   while((g=next_graph(&gw)))
2618   { if ( g->name == name )
2619       return g;
2620   }
2621 
2622   return g;
2623 }
2624 
2625 
2626 static graph *
lookup_graph(rdf_db * db,atom_t name)2627 lookup_graph(rdf_db *db, atom_t name)
2628 { graph *g, **gp;
2629   int entry;
2630 
2631   if ( (g=existing_graph(db, name)) && !g->erased )
2632     return g;
2633 
2634   LOCK_MISC(db);
2635   if ( (g=existing_graph(db, name)) )
2636   { if ( g->erased )
2637     { memset(g->digest,            0, sizeof(g->digest));
2638       memset(g->unmodified_digest, 0, sizeof(g->unmodified_digest));
2639       g->md5    = TRUE;
2640       g->erased = FALSE;
2641       db->graphs.erased--;
2642     }
2643 
2644     UNLOCK_MISC(db);
2645     return g;
2646   }
2647 
2648   g = rdf_malloc(db, sizeof(*g));
2649   memset(g, 0, sizeof(*g));
2650   g->name = name;
2651   g->md5 = TRUE;
2652   PL_register_atom(name);
2653   if ( db->graphs.count > db->graphs.bucket_count )
2654     resize_graph_table(db);
2655   entry = atom_hash(name, MURMUR_SEED) % db->graphs.bucket_count;
2656   gp = &db->graphs.blocks[MSB(entry)][entry];
2657   g->next = *gp;
2658   *gp = g;
2659   db->graphs.count++;
2660   UNLOCK_MISC(db);
2661 
2662   return g;
2663 }
2664 
2665 
2666 static void
erase_graphs(rdf_db * db)2667 erase_graphs(rdf_db *db)
2668 { int i;
2669 
2670   for(i=0; i<db->graphs.bucket_count; i++)
2671   { graph *n, *g = db->graphs.blocks[MSB(i)][i];
2672 
2673     db->graphs.blocks[MSB(i)][i] = NULL;
2674 
2675     for( ; g; g = n )
2676     { n = g->next;
2677 
2678       PL_unregister_atom(g->name);
2679       if ( g->source )
2680 	PL_unregister_atom(g->source);
2681       rdf_free(db, g, sizeof(*g));
2682     }
2683   }
2684 
2685   db->graphs.count  = 0;
2686   db->graphs.erased = 0;
2687   db->last_graph    = NULL;
2688 }
2689 
2690 
2691 static int
gc_graphs(rdf_db * db,gen_t gen)2692 gc_graphs(rdf_db *db, gen_t gen)
2693 { int reclaimed = 0;
2694 
2695   if ( db->graphs.erased > 10 + db->graphs.count/2 )
2696   { int i;
2697 
2698     LOCK_MISC(db);
2699     for(i=0; i<db->graphs.bucket_count; i++)
2700     { graph *p, *n, *g;
2701 
2702       p = NULL;
2703       g = db->graphs.blocks[MSB(i)][i];
2704 
2705       for( ; g; g = n )
2706       { n = g->next;
2707 
2708 	if ( g->erased && g->triple_count == 0 )
2709 	{ if ( p )
2710 	    p->next = g->next;
2711 	  else
2712 	    db->graphs.blocks[MSB(i)][i] = g->next;
2713 
2714 	  if ( db->last_graph == g )
2715 	    db->last_graph = NULL;
2716 	  db->graphs.count--;
2717 	  db->graphs.erased--;
2718 	  reclaimed++;
2719 	  deferred_finalize(&db->defer_all, g,
2720 			    finalize_graph, db);
2721 	} else
2722 	  p = g;
2723       }
2724     }
2725     UNLOCK_MISC(db);
2726   }
2727 
2728   return reclaimed;
2729 }
2730 
2731 
2732 static void
register_graph(rdf_db * db,triple * t)2733 register_graph(rdf_db *db, triple *t)
2734 { graph *src;
2735 
2736   if ( !t->graph_id )
2737     return;
2738 
2739   if ( !((src=db->last_graph) && src->name == ID_ATOM(t->graph_id)) )
2740   { src = lookup_graph(db, ID_ATOM(t->graph_id));
2741     db->last_graph = src;
2742   }
2743 
2744   ATOMIC_ADD(&src->triple_count, 1);
2745 #ifdef WITH_MD5
2746   if ( src->md5 )
2747   { md5_byte_t digest[16];
2748     md5_triple(t, digest);
2749     sum_digest(src->digest, digest);
2750   }
2751 #endif
2752 }
2753 
2754 
2755 static void
unregister_graph(rdf_db * db,triple * t)2756 unregister_graph(rdf_db *db, triple *t)
2757 { graph *src;
2758 
2759   if ( !t->graph_id )
2760     return;
2761 
2762   if ( db->last_graph && db->last_graph->name == ID_ATOM(t->graph_id) )
2763   { src = db->last_graph;
2764   } else
2765   { src = existing_graph(db, ID_ATOM(t->graph_id));
2766   }
2767 
2768   if ( src )
2769   { ATOMIC_SUB(&src->triple_count, 1);
2770 #ifdef WITH_MD5
2771     if ( src->md5 )
2772     { md5_byte_t digest[16];
2773       md5_triple(t, digest);
2774       dec_digest(src->digest, digest);
2775     }
2776 #endif
2777   }
2778 }
2779 
2780 
2781 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2782 rdf_graph_(?Graph, ?TripleCount) is nondet.
2783 
2784 True when Graph is a current graph with TripleCount triples.
2785 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2786 
2787 typedef struct enum_graph
2788 { graph *g;
2789   int i;
2790 } enum_graph;
2791 
2792 
2793 static graph *
advance_graph_enum(rdf_db * db,enum_graph * eg)2794 advance_graph_enum(rdf_db *db, enum_graph *eg)
2795 { if ( eg->g )
2796     eg->g = eg->g->next;
2797 
2798   while ( !eg->g || (eg->g->erased && eg->g->triple_count == 0) )
2799   { if ( !eg->g )
2800     { while ( ++eg->i < db->graphs.bucket_count &&
2801 	      !(eg->g = db->graphs.blocks[MSB(eg->i)][eg->i]) )
2802 	;
2803       if ( !eg->g )
2804 	return NULL;
2805     } else
2806       eg->g = eg->g->next;
2807   }
2808 
2809   return eg->g;
2810 }
2811 
2812 
2813 static foreign_t
rdf_graph(term_t name,term_t triple_count,control_t h)2814 rdf_graph(term_t name, term_t triple_count, control_t h)
2815 { rdf_db *db = rdf_current_db();
2816   enum_graph *eg;
2817   atom_t a;
2818 
2819   switch( PL_foreign_control(h) )
2820   { case PL_FIRST_CALL:
2821       if ( PL_is_variable(name) )
2822       { eg = rdf_malloc(db, sizeof(*eg));
2823 	eg->i  = -1;
2824 	eg->g  = NULL;
2825 	advance_graph_enum(db, eg);
2826 	goto next;
2827       } else if ( PL_get_atom_ex(name, &a) )
2828       { graph *g;
2829 
2830 	if ( (g=existing_graph(db, a)) && !(g->erased && g->triple_count == 0) )
2831 	  return PL_unify_int64(triple_count, g->triple_count);
2832       }
2833       return FALSE;
2834     case PL_REDO:
2835       eg = PL_foreign_context_address(h);
2836       goto next;
2837     case PL_PRUNED:
2838       eg = PL_foreign_context_address(h);
2839       rdf_free(db, eg, sizeof(*eg));
2840       return TRUE;
2841     default:
2842       assert(0);
2843       return FALSE;
2844   }
2845 
2846 next:
2847   if ( !eg->g ||
2848        !PL_unify_atom(name, eg->g->name) ||
2849        !PL_unify_int64(triple_count, eg->g->triple_count) )
2850   { rdf_free(db, eg, sizeof(*eg));
2851     return FALSE;
2852   }
2853 
2854   if ( advance_graph_enum(db, eg) )
2855   { PL_retry_address(eg);
2856   } else
2857   { rdf_free(db, eg, sizeof(*eg));
2858     return TRUE;
2859   }
2860 }
2861 
2862 
2863 static foreign_t
rdf_graph_source(term_t graph_name,term_t source,term_t modified)2864 rdf_graph_source(term_t graph_name, term_t source, term_t modified)
2865 { atom_t gn;
2866   rdf_db *db = rdf_current_db();
2867 
2868   if ( !get_atom_or_var_ex(graph_name, &gn) )
2869     return FALSE;
2870 
2871   if ( gn )
2872   { graph *s;
2873 
2874     if ( (s = existing_graph(db, gn)) &&
2875 	 !(s->erased && s->triple_count == 0) &&
2876 	 s->source)
2877     { return ( PL_unify_atom(source, s->source) &&
2878 	       PL_unify_float(modified, s->modified) );
2879     }
2880   } else
2881   { atom_t src;
2882 
2883     if ( PL_get_atom_ex(source, &src) )
2884     { int i;
2885 
2886       for(i=0; i<db->graphs.bucket_count; i++)
2887       { graph *g = db->graphs.blocks[MSB(i)][i];
2888 
2889 	for(; g; g=g->next)
2890 	{ if ( g->source == src )
2891 	  { return ( PL_unify_atom(graph_name, g->name) &&
2892 		     PL_unify_float(modified, g->modified) );
2893 	  }
2894 	}
2895       }
2896     }
2897   }
2898 
2899   return FALSE;
2900 }
2901 
2902 
2903 static foreign_t
rdf_set_graph_source(term_t graph_name,term_t source,term_t modified)2904 rdf_set_graph_source(term_t graph_name, term_t source, term_t modified)
2905 { atom_t gn, src;
2906   int rc = FALSE;
2907   rdf_db *db = rdf_current_db();
2908   graph *s;
2909   double mtime;
2910 
2911   if ( !PL_get_atom_ex(graph_name, &gn) ||
2912        !PL_get_atom_ex(source, &src) ||
2913        !PL_get_float_ex(modified, &mtime) )
2914     return FALSE;
2915 
2916   if ( (s = lookup_graph(db, gn)) )
2917   { LOCK_MISC(db);
2918     if ( s->source != src )
2919     { if ( s->source )
2920 	PL_unregister_atom(s->source);
2921       s->source = src;
2922       PL_register_atom(s->source);
2923     }
2924     s->modified = mtime;
2925     UNLOCK_MISC(db);
2926     rc = TRUE;
2927   }
2928 
2929   return rc;
2930 }
2931 
2932 
2933 static foreign_t
rdf_create_graph(term_t graph_name)2934 rdf_create_graph(term_t graph_name)
2935 { atom_t gn;
2936   rdf_db *db = rdf_current_db();
2937   graph *g;
2938 
2939   if ( !PL_get_atom_ex(graph_name, &gn) )
2940     return FALSE;
2941 
2942   if ( (g = existing_graph(db, gn)) && !g->erased )
2943     return TRUE;				/* already exists */
2944   if ( (g = lookup_graph(db, gn)) )
2945   { rdf_broadcast(EV_CREATE_GRAPH, g, NULL);
2946 
2947     return TRUE;
2948   }
2949 
2950   return FALSE;
2951 }
2952 
2953 
2954 static void
clean_atom(atom_t * ap)2955 clean_atom(atom_t *ap)
2956 { atom_t old;
2957 
2958   if ( (old=*ap) )
2959   { *ap = 0;
2960     PL_unregister_atom(old);
2961   }
2962 }
2963 
2964 
2965 static void
finalize_graph(void * mem,void * clientdata)2966 finalize_graph(void *mem, void *clientdata)
2967 { graph *g = mem;
2968   (void)clientdata;
2969 
2970   clean_atom(&g->name);
2971 }
2972 
2973 
2974 static foreign_t
rdf_destroy_graph(term_t graph_name)2975 rdf_destroy_graph(term_t graph_name)
2976 { atom_t gn;
2977   rdf_db *db = rdf_current_db();
2978   graph *g;
2979 
2980   if ( !PL_get_atom_ex(graph_name, &gn) )
2981     return FALSE;
2982 
2983   if ( (g = existing_graph(db, gn)) )
2984   { LOCK_MISC(db);
2985     g->md5 = FALSE;
2986     memset(g->digest,            0, sizeof(g->digest));
2987     memset(g->unmodified_digest, 0, sizeof(g->unmodified_digest));
2988     clean_atom(&g->source);
2989     g->modified = 0.0;
2990     g->erased = TRUE;
2991     db->graphs.erased++;
2992     if ( db->last_graph == g )
2993       db->last_graph = NULL;
2994     UNLOCK_MISC(db);
2995   }
2996 
2997   return TRUE;
2998 }
2999 
3000 
3001 #ifdef WITH_MD5
3002 /** rdf_graph_modified_(+Graph, -IsModified, -UnmodifiedHash)
3003 
3004 True when IsModified reflects  the  modified   status  relative  to  the
3005 `unmodified' digest.
3006 */
3007 
3008 static foreign_t
rdf_graph_modified_(term_t graph_name,term_t ismodified,term_t hash)3009 rdf_graph_modified_(term_t graph_name, term_t ismodified, term_t hash)
3010 { atom_t gn;
3011   rdf_db *db = rdf_current_db();
3012   graph *g;
3013   int rc;
3014 
3015   if ( !PL_get_atom_ex(graph_name, &gn) )
3016     return FALSE;
3017 
3018   if ( (g = lookup_graph(db, gn)) )
3019   { int ismod = (memcmp(g->digest, g->unmodified_digest, 16) != 0);
3020 
3021     rc = ( PL_unify_bool(ismodified, ismod) &&
3022 	   md5_unify_digest(hash, g->unmodified_digest)
3023 	 );
3024   } else
3025     rc = FALSE;
3026 
3027   return rc;
3028 }
3029 
3030 
3031 static int
clear_modified(graph * g)3032 clear_modified(graph *g)
3033 { if ( g->md5 )
3034   { memcpy(g->unmodified_digest, g->digest, 16);
3035     return TRUE;
3036   }
3037 
3038   return FALSE;
3039 }
3040 
3041 
3042 static foreign_t
rdf_graph_clear_modified_(term_t graph_name)3043 rdf_graph_clear_modified_(term_t graph_name)
3044 { atom_t gn;
3045   rdf_db *db = rdf_current_db();
3046   graph *g;
3047 
3048   if ( !PL_get_atom_ex(graph_name, &gn) )
3049     return FALSE;
3050 
3051   if ( (g = lookup_graph(db, gn)) )
3052     return clear_modified(g);
3053 
3054   return FALSE;
3055 }
3056 
3057 
3058 #endif /*WITH_MD5*/
3059 
3060 
3061 		 /*******************************
3062 		 *	     LITERALS		*
3063 		 *******************************/
3064 
3065 static inline void
prepare_literal_ex(literal_ex * lex)3066 prepare_literal_ex(literal_ex *lex)
3067 {
3068 #ifdef LITERAL_EX_MAGIC
3069   lex->magic = LITERAL_EX_MAGIC;
3070 #endif
3071 
3072   if ( lex->literal->objtype == OBJ_STRING )
3073   { lex->atom.handle = lex->literal->value.string;
3074     lex->atom.resolved = FALSE;
3075   }
3076 }
3077 
3078 
3079 static literal *
new_literal(rdf_db * db)3080 new_literal(rdf_db *db)
3081 { literal *lit = rdf_malloc(db, sizeof(*lit));
3082   memset(lit, 0, sizeof(*lit));
3083   lit->references = 1;
3084 
3085   return lit;
3086 }
3087 
3088 
3089 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3090 free_literal_value() gets rid of atoms or term   that forms the value of
3091 the literal. We cannot dispose of  these   immediately  as they might be
3092 needed by an ongoing  scan  of   the  literal  skiplist  for comparison.
3093 Therefore, we use deferred_finalize() and dispose of the triple later.
3094 
3095 Return TRUE if the triple value  could   be  distroyed  and FALSE if the
3096 destruction   has   been   deferred.   That     will   eventually   call
3097 finalize_literal_ptr(), which calls free_literal_value()  again, but now
3098 as not shared literal so it can do its work unconditionally.
3099 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3100 
3101 static void
finalize_literal_ptr(void * mem,void * clientdata)3102 finalize_literal_ptr(void *mem, void *clientdata)
3103 { literal **litp = mem;
3104   rdf_db *db = clientdata;
3105   literal *lit = *litp;
3106 
3107   free_literal_value(db, lit);
3108   rdf_free(db, lit, sizeof(*lit));
3109 }
3110 
3111 
3112 static literal **
unlink_literal(rdf_db * db,literal * lit)3113 unlink_literal(rdf_db *db, literal *lit)
3114 { if ( lit->shared && !db->resetting )
3115   { literal_ex lex;
3116     literal **data;
3117 
3118     lit->shared = FALSE;
3119     DEBUG(2,
3120 	  Sdprintf("Delete %p from literal table: ", lit);
3121 	  print_literal(lit);
3122 	  Sdprintf("\n"));
3123 
3124     lex.literal = lit;
3125     prepare_literal_ex(&lex);
3126 
3127     if ( (data=skiplist_delete(&db->literals, &lex)) )
3128     { return data;
3129     } else
3130     { Sdprintf("Failed to delete %p (size=%ld): ", lit, db->literals.count);
3131       print_literal(lit);
3132       Sdprintf("\n");
3133       assert(0);
3134     }
3135   }
3136 
3137   return NULL;
3138 }
3139 
3140 
3141 static void
free_literal_value(rdf_db * db,literal * lit)3142 free_literal_value(rdf_db *db, literal *lit)
3143 { unlock_atoms_literal(lit);
3144   if ( lit->objtype == OBJ_TERM &&
3145        lit->value.term.record )
3146   { if ( lit->term_loaded )
3147       rdf_free(db, lit->value.term.record, lit->value.term.len);
3148     else
3149       PL_erase_external(lit->value.term.record);
3150   }
3151   lit->objtype = OBJ_UNTYPED;		/* debugging: trap errors early */
3152 }
3153 
3154 
3155 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3156 free_literal() frees a literal, normally referenced   from a triple. The
3157 triple may be shared or not. Triples that   are part of the database are
3158 always shared. Unshared  triples  are   typically  search  patterns,  or
3159 created triples that are deleted  because   some  part  of the operation
3160 fails.
3161 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3162 
3163 static int
free_literal(rdf_db * db,literal * lit)3164 free_literal(rdf_db *db, literal *lit)
3165 { int rc = TRUE;
3166 
3167   if ( lit->shared )
3168   { simpleMutexLock(&db->locks.literal);
3169     if ( --lit->references == 0 )
3170     { literal **data = unlink_literal(db, lit);
3171       simpleMutexUnlock(&db->locks.literal);
3172 
3173       if ( data )			/* unlinked */
3174       { rc = rdf_broadcast(EV_OLD_LITERAL, lit, NULL);
3175 	deferred_finalize(&db->defer_literals, data,
3176 			  finalize_literal_ptr, db);
3177       } else
3178       { free_literal_value(db, lit);
3179 	rdf_free(db, lit, sizeof(*lit));
3180       }
3181     } else
3182     { simpleMutexUnlock(&db->locks.literal);
3183     }
3184   } else				/* not shared; no locking needed */
3185   { if ( --lit->references == 0 )
3186     { free_literal_value(db, lit);
3187       rdf_free(db, lit, sizeof(*lit));
3188     }
3189   }
3190 
3191   return rc;
3192 }
3193 
3194 
3195 static literal *
copy_literal(rdf_db * db,literal * lit)3196 copy_literal(rdf_db *db, literal *lit)
3197 { lit->references++;
3198   assert(lit->references != 0);
3199   return lit;
3200 }
3201 
3202 
3203 static void
alloc_literal_triple(rdf_db * db,triple * t)3204 alloc_literal_triple(rdf_db *db, triple *t)
3205 { if ( !t->object_is_literal )
3206   { t->object.literal = new_literal(db);
3207     t->object_is_literal = TRUE;
3208   }
3209 }
3210 
3211 
3212 static void
lock_atoms_literal(literal * lit)3213 lock_atoms_literal(literal *lit)
3214 { if ( !lit->atoms_locked )
3215   { lit->atoms_locked = TRUE;
3216 
3217     switch(lit->objtype)
3218     { case OBJ_STRING:
3219 	PL_register_atom(lit->value.string);
3220 	if ( lit->qualifier )
3221 	  PL_register_atom(ID_ATOM(lit->type_or_lang));
3222 	break;
3223     }
3224   }
3225 }
3226 
3227 
3228 static void
unlock_atoms_literal(literal * lit)3229 unlock_atoms_literal(literal *lit)
3230 { if ( lit->atoms_locked )
3231   { lit->atoms_locked = FALSE;
3232 
3233     switch(lit->objtype)
3234     { case OBJ_STRING:
3235 	PL_unregister_atom(lit->value.string);
3236 	if ( lit->qualifier )
3237 	  PL_unregister_atom(ID_ATOM(lit->type_or_lang));
3238 	break;
3239     }
3240   }
3241 }
3242 
3243 
3244 		 /*******************************
3245 		 *	     LITERAL DB		*
3246 		 *******************************/
3247 
3248 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3249 compare_literals() sorts literals.  Ordering is defined as:
3250 
3251 	* Numeric literals < string literals < term literals
3252 	* Numeric literals (int and float) are sorted by value
3253 	* String literals are sorted alhabetically
3254 		- case independent, but uppercase before lowercase
3255 		- locale (strcoll) sorting?
3256 		- delete dyadrics
3257 		- first on string, then on type, then on language
3258 	* Terms are sorted on Prolog standard order of terms
3259 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3260 
3261 static int
cmp_qualifier(const literal * l1,const literal * l2)3262 cmp_qualifier(const literal *l1, const literal *l2)
3263 { int q1 = l1->qualifier;
3264   int q2 = l2->qualifier;
3265   atom_t tl1 = ID_ATOM(l1->type_or_lang);
3266   atom_t tl2 = ID_ATOM(l2->type_or_lang);
3267 
3268   if ( q1 == Q_NONE )
3269   { q1 = Q_TYPE;
3270     tl1 = ATOM_XSDString;
3271   }
3272   if ( q2 == Q_NONE )
3273   { q2 = Q_TYPE;
3274     tl2 = ATOM_XSDString;
3275   }
3276 
3277   if ( q1 == q2 )
3278   { if ( tl1 )
3279       return cmp_atoms(tl1, tl2);
3280     return -1;
3281   }
3282 
3283   return q1 - q2;
3284 }
3285 
3286 static xsd_primary
is_numerical_string(const literal * lit)3287 is_numerical_string(const literal *lit)
3288 { if ( lit->objtype == OBJ_STRING &&
3289        lit->qualifier == Q_TYPE )
3290     return is_numeric_type(ID_ATOM(lit->type_or_lang));
3291 
3292   return XSD_NONNUMERIC;
3293 }
3294 
3295 
3296 static int
same_type(atom_id id1,atom_id id2)3297 same_type(atom_id id1, atom_id id2)
3298 { if ( id1 == id2 )
3299     return TRUE;
3300   if ( id2 == 0 && ID_ATOM(id1) == ATOM_XSDString )
3301     return TRUE;
3302   if ( id1 == 0 && ID_ATOM(id2) == ATOM_XSDString )
3303     return TRUE;
3304 
3305   return FALSE;
3306 }
3307 
3308 
3309 static int
compare_literals(literal_ex * lex,literal * l2)3310 compare_literals(literal_ex *lex, literal *l2)
3311 { literal *l1 = lex->literal;
3312 
3313 #ifdef LITERAL_EX_MAGIC
3314   assert(lex->magic == LITERAL_EX_MAGIC);
3315 #endif
3316 
3317   if ( l1->objtype == l2->objtype )
3318   { int rc;
3319 
3320     switch(l1->objtype)
3321     { case OBJ_INTEGER:
3322       { int64_t v1 = l1->value.integer;
3323 	int64_t v2 = l2->value.integer;
3324 	rc = v1 < v2 ? -1 : v1 > v2 ? 1 : 0;
3325 	break;
3326       }
3327       case OBJ_DOUBLE:
3328       { double v1 = l1->value.real;
3329 	double v2 = l2->value.real;
3330 	rc = v1 < v2 ? -1 : v1 > v2 ? 1 : 0;
3331 	break;
3332       }
3333       case OBJ_STRING:
3334       { if ( lex->atom.handle == l2->value.string &&
3335 	     same_type(l1->type_or_lang, l2->type_or_lang) )
3336 	{ rc = 0;
3337 	} else
3338 	{ xsd_primary nt1 = is_numerical_string(l1);
3339 	  xsd_primary nt2 = is_numerical_string(l2);
3340 
3341 	  if ( nt1 || nt2 )
3342 	  { if ( nt1 && nt2 )
3343 	    { rc = cmp_xsd_info(nt1, &lex->atom, nt2, l2->value.string);
3344 	      if ( rc == 0 && nt1 != nt2 )
3345 		rc = nt1 < nt2 ? 1 : -1;
3346 	    } else
3347 	    { rc = nt1 ? -1 : 1;
3348 	    }
3349 	  } else
3350 	  { rc = cmp_atom_info(&lex->atom, l2->value.string);
3351 	  }
3352 	}
3353 	break;
3354       }
3355       case OBJ_TERM:
3356       { fid_t fid = PL_open_foreign_frame();
3357 	term_t t1 = PL_new_term_ref();
3358 	term_t t2 = PL_new_term_ref();
3359 					/* can also be handled in literal_ex */
3360 	PL_recorded_external(l1->value.term.record, t1);
3361 	PL_recorded_external(l2->value.term.record, t2);
3362 	rc = PL_compare(t1, t2);
3363 
3364 	PL_discard_foreign_frame(fid);
3365 	break;
3366       }
3367       default:
3368 	assert(0);
3369         return 0;
3370     }
3371 
3372     if ( rc != 0 )
3373       return rc;
3374     return cmp_qualifier(l1, l2);
3375   } else if ( l1->objtype == OBJ_INTEGER && l2->objtype == OBJ_DOUBLE )
3376   { double v1 = (double)l1->value.integer;
3377     double v2 = l2->value.real;
3378     return v1 < v2 ? -1 : v1 > v2 ? 1 : -1;
3379   } else if ( l1->objtype == OBJ_DOUBLE && l2->objtype == OBJ_INTEGER )
3380   { double v1 = l1->value.real;
3381     double v2 = (double)l2->value.integer;
3382     return v1 < v2 ? -1 : v1 > v2 ? 1 : 1;
3383   } else
3384   { return l1->objtype - l2->objtype;
3385   }
3386 }
3387 
3388 #ifdef SL_CHECK
3389 static int sl_checking = FALSE;
3390 #endif
3391 
3392 static int
sl_compare_literals(void * p1,void * p2,void * cd)3393 sl_compare_literals(void *p1, void *p2, void *cd)
3394 { literal *l2 = *(literal**)p2;
3395   (void)cd;
3396 
3397 #ifdef SL_CHECK
3398   if ( sl_checking )
3399   { literal *l1 = *(literal**)p1;
3400     literal_ex lex;
3401 
3402     lex.literal = l1;
3403     prepare_literal_ex(&lex);
3404     return compare_literals(&lex, l2);
3405   } else
3406 #endif
3407   { literal_ex *lex = p1;
3408 
3409     assert(l2->objtype != OBJ_UNTYPED);
3410     return compare_literals(lex, l2);
3411   }
3412 }
3413 
3414 
3415 #ifdef SL_CHECK
3416 static int
sl_check(rdf_db * db,int print)3417 sl_check(rdf_db *db, int print)
3418 { int rc = TRUE;
3419 
3420   DEBUG(2, { assert(sl_checking == FALSE);
3421 	     sl_checking = TRUE;
3422 	     rc = skiplist_check(&db->literals, print);
3423 	     sl_checking = FALSE;
3424 	   });
3425 
3426   return rc;
3427 }
3428 #else
3429 #define sl_check(db, print) (void)0
3430 #endif
3431 
3432 
3433 static void *
sl_rdf_malloc(size_t bytes,void * cd)3434 sl_rdf_malloc(size_t bytes, void *cd)
3435 { return rdf_malloc(cd, bytes);
3436 }
3437 
3438 
3439 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3440 Create the sorted literal tree. Note  that   we  do  not register a free
3441 handler  for  the  tree  as  nodes   are  either  already  destroyed  by
3442 free_literal() or by rdf_reset_db().
3443 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3444 
3445 static int
init_literal_table(rdf_db * db)3446 init_literal_table(rdf_db *db)
3447 { skiplist_init(&db->literals,
3448 		sizeof(literal*),	/* Payload size */
3449 		db,			/* Client data */
3450 		sl_compare_literals,	/* Compare */
3451 		sl_rdf_malloc,		/* Allocate */
3452 		NULL);			/* Destroy */
3453 
3454   return TRUE;
3455 }
3456 
3457 
3458 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3459 share_literal() takes a literal  and  replaces   it  with  one  from the
3460 literal database if there is a match.   On a match, the argument literal
3461 is destroyed. Without a match it adds   the  literal to the database and
3462 returns it.
3463 
3464 Called from add_triples() and update_triples() outside the locked areas.
3465 We must hold db->locks.literal for updating the literal database.
3466 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3467 
3468 static literal *
share_literal(rdf_db * db,literal * from)3469 share_literal(rdf_db *db, literal *from)
3470 { literal **data, *shared;
3471   literal_ex lex;
3472   int is_new;
3473   static float existing = 0.0;
3474   static float new      = 0.0;
3475 
3476   if ( from->shared )
3477     return from;				/* already shared */
3478 
3479   lex.literal = from;
3480   prepare_literal_ex(&lex);
3481 
3482   if ( existing*2 > new &&
3483       (data = skiplist_find(&db->literals, &lex)) )
3484   { simpleMutexLock(&db->locks.literal);
3485     existing = existing*0.99+1.0;
3486     if ( !skiplist_erased_payload(&db->literals, data) )
3487     { shared = *data;
3488       shared->references++;
3489       assert(shared->references != 0);
3490 
3491       simpleMutexUnlock(&db->locks.literal);
3492       free_literal(db, from);
3493 
3494       return shared;
3495     }
3496     simpleMutexUnlock(&db->locks.literal);
3497   }
3498 
3499   simpleMutexLock(&db->locks.literal);
3500   sl_check(db, FALSE);
3501   data = skiplist_insert(&db->literals, &lex, &is_new);
3502   sl_check(db, FALSE);
3503   if ( is_new )
3504   { new = new*0.99+1.0;
3505     from->shared = TRUE;
3506     shared = from;
3507     assert(from->references==1);
3508     assert(from->atoms_locked==1);
3509   } else
3510   { existing = existing*0.99+1.0;
3511     shared = *data;
3512     shared->references++;
3513     assert(shared->references != 0);
3514   }
3515   simpleMutexUnlock(&db->locks.literal);
3516 
3517   if ( !is_new )
3518   { DEBUG(2,
3519 	  Sdprintf("Replace %p by %p:\n", from, shared);
3520 	  Sdprintf("\tfrom: "); print_literal(from);
3521 	  Sdprintf("\n\tto: "); print_literal(shared);
3522 	  Sdprintf("\n"));
3523 
3524     free_literal(db, from);
3525   } else
3526   { DEBUG(2,
3527 	  Sdprintf("Insert %p into literal table: ", from);
3528 	  print_literal(from);
3529 	  Sdprintf("\n"));
3530 
3531     rdf_broadcast(EV_NEW_LITERAL, from, NULL);
3532   }
3533 
3534   return shared;
3535 }
3536 
3537 
3538 		 /*******************************
3539 		 *	      TRIPLES		*
3540 		 *******************************/
3541 
3542 static triple *
alloc_triple(void)3543 alloc_triple(void)
3544 { triple *t = malloc(sizeof(*t));
3545 
3546   if ( t )
3547   { memset(t, 0, sizeof(*t));
3548 #ifdef COMPACT
3549     t->id = TRIPLE_NO_ID;
3550 #endif
3551   }
3552 
3553   return t;
3554 }
3555 
3556 
3557 static void
unalloc_triple(rdf_db * db,triple * t,int linger)3558 unalloc_triple(rdf_db *db, triple *t, int linger)
3559 { if ( t )
3560   { if ( linger )
3561     { TMAGIC(t, T_LINGERING);
3562 #ifdef COMPACT
3563       if ( t->id != TRIPLE_NO_ID )
3564 #endif
3565 	deferred_finalize(&db->defer_triples, t,
3566 			  finalize_triple, db);
3567       ATOMIC_ADD(&db->lingering, 1);
3568     } else
3569     { unlock_atoms(db, t);
3570       if ( t->object_is_literal && t->object.literal )
3571 	free_literal(db, t->object.literal);
3572       SECURE(memset(t, 0, sizeof(*t)));
3573       TMAGIC(t, T_FREED);
3574       free(t);
3575     }
3576   }
3577 }
3578 
3579 
3580 		 /*******************************
3581 		 *	    TRIPLE HASH		*
3582 		 *******************************/
3583 
3584 static int
init_triple_hash(rdf_db * db,int index,size_t count)3585 init_triple_hash(rdf_db *db, int index, size_t count)
3586 { triple_hash *h = &db->hash[index];
3587   size_t bytes = sizeof(triple_bucket)*count;
3588   triple_bucket *t = PL_malloc_uncollectable(bytes);
3589   int i;
3590 
3591   memset(t, 0, bytes);
3592   memset(h, 0, sizeof(*h));
3593 
3594   h->optimize_threshold = col_opt_threshold[index];
3595   h->avg_chain_len      = col_avg_len[index];
3596   h->icol		= index;
3597 
3598   for(i=0; i<MSB(count); i++)
3599     h->blocks[i] = t;
3600 
3601   h->bucket_preinit = h->bucket_count_epoch = h->bucket_count = count;
3602 
3603   return TRUE;
3604 }
3605 
3606 
3607 static int
size_triple_hash(rdf_db * db,int index,size_t size)3608 size_triple_hash(rdf_db *db, int index, size_t size)
3609 { triple_hash *hash = &db->hash[index];
3610   int extra;
3611 
3612   if ( hash->created )
3613     rdf_create_gc_thread(db);
3614 
3615   simpleMutexLock(&db->queries.write.lock);
3616   extra = MSB(size) - MSB(hash->bucket_count);
3617   while( extra-- > 0 )
3618   { int i = MSB(hash->bucket_count);
3619     size_t bytes  = sizeof(triple_bucket)*hash->bucket_count;
3620     triple_bucket *t = PL_malloc_uncollectable(bytes);
3621 
3622     memset(t, 0, bytes);
3623     hash->blocks[i] = t-hash->bucket_count;
3624     hash->bucket_count *= 2;
3625     if ( !hash->created )
3626       hash->bucket_count_epoch = hash->bucket_count;
3627     DEBUG(1, Sdprintf("Resized triple index %s=%d to %ld at %d\n",
3628 		      col_name[index], index, (long)hash->bucket_count, i));
3629   }
3630   simpleMutexUnlock(&db->queries.write.lock);
3631 
3632   return TRUE;
3633 }
3634 
3635 
3636 static void
reset_triple_hash(rdf_db * db,triple_hash * hash)3637 reset_triple_hash(rdf_db *db, triple_hash *hash)
3638 { size_t bytes = sizeof(triple_bucket)*hash->bucket_preinit;
3639   int i;
3640 
3641   memset(hash->blocks[0], 0, bytes);	/* clear first block */
3642   for(i=MSB(hash->bucket_preinit); i<MAX_TBLOCKS; i++)
3643   { if ( hash->blocks[i] )
3644     { triple_bucket *t = hash->blocks[i];
3645 
3646       hash->blocks[i] = NULL;
3647       t += 1<<(i-1);
3648       PL_free(t);
3649     } else
3650       break;
3651   }
3652   hash->bucket_count = hash->bucket_count_epoch = hash->bucket_preinit;
3653   hash->created = FALSE;
3654 }
3655 
3656 
3657 /* count_different() returns the number of elements in a hash bucket
3658    that have a different unbounded hash.  That is, the bucket might
3659    split if we resize the table.
3660 
3661    *count is assigned with the size.  That is merely consistency because
3662    we also keep track of this value.
3663 */
3664 
3665 #define COUNT_DIFF_NOHASH 5
3666 
3667 static int
count_different(rdf_db * db,triple_bucket * tb,int index,int * count)3668 count_different(rdf_db *db, triple_bucket *tb, int index, int *count)
3669 { triple *t;
3670   int rc;
3671 
3672   if ( tb->count < COUNT_DIFF_NOHASH )
3673   { if ( tb->count <= 1 )
3674     { *count = tb->count;
3675 
3676       return tb->count;
3677     } else
3678     { size_t hashes[COUNT_DIFF_NOHASH];
3679       int different = 0;
3680       int found = 0;
3681 
3682       for(t = fetch_triple(db, tb->head);
3683 	  t && different < COUNT_DIFF_NOHASH;	/* be careful with concurrently */
3684 	  t = triple_follow_hash(db, t, ICOL(index))) /* added triples */
3685       { size_t hash = triple_hash_key(t, index);
3686 	int i;
3687 
3688 	found++;
3689 	for(i=0; i<different; i++)
3690 	{ if ( hashes[i] == hash )
3691 	    goto next;
3692 	}
3693 	hashes[different++] = hash;
3694 
3695       next:;
3696       }
3697 
3698       *count = found;
3699 
3700       return different;
3701     }
3702   } else
3703   { atomset hash_set;
3704     int c = 0;
3705 
3706     init_atomset(&hash_set);
3707     for(t=fetch_triple(db, tb->head); t; t=triple_follow_hash(db, t, ICOL(index)))
3708     { c++;
3709       add_atomset(&hash_set, (atom_t)triple_hash_key(t, index));
3710     }
3711     rc = hash_set.count;
3712     destroy_atomset(&hash_set);
3713 
3714     *count = c;
3715   }
3716 
3717   return rc;
3718 }
3719 
3720 
3721 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3722 triple_hash_quality() computes the quality of the triple hash index. The
3723 return is 1.0 if the unbounded hashkey for all objects in each bucket is
3724 the same, and < 1.0 if there  are buckets holding objects with different
3725 unbounded keys.
3726 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3727 
3728 static float
triple_hash_quality(rdf_db * db,int index,int sample)3729 triple_hash_quality(rdf_db *db, int index, int sample)
3730 { triple_hash *hash = &db->hash[index];
3731   int i, step;
3732   float q = 0;
3733   size_t total = 0;
3734 
3735   if ( index == 0 )
3736     return 1.0;
3737 
3738   if ( sample > 0 )
3739     step = (hash->bucket_count+sample)/sample;	/* step >= 1 */
3740   else
3741     step = 1;
3742 
3743   for(i=0; i<hash->bucket_count; i += step)
3744   { int entry = MSB(i);
3745     triple_bucket *tb = &hash->blocks[entry][i];
3746     int count;
3747     int different = count_different(db, tb, col_index[index], &count);
3748 
3749     DEBUG(1,			/* inconsistency is normal due to concurrency */
3750 	  if ( count != tb->count )
3751 	    Sdprintf("Inconsistent count in index=%d, bucket=%d, %d != %d\n",
3752 		     index, i, count, tb->count));
3753 
3754     if ( count )
3755     { q += (float)count/(float)different;
3756       total += count;
3757     }
3758   }
3759 
3760   return total == 0 ? 1.0 : q/(float)total;
3761 }
3762 
3763 
3764 #ifdef O_DEBUG
3765 void
print_triple_hash(rdf_db * db,int index,int sample)3766 print_triple_hash(rdf_db *db, int index, int sample)
3767 { triple_hash *hash = &db->hash[index];
3768   int i, step;
3769 
3770   if ( sample > 0 )
3771     step = (hash->bucket_count+sample)/sample;	/* step >= 1 */
3772   else
3773     step = 1;
3774 
3775   for(i=0; i<hash->bucket_count; i += step)
3776   { int entry = MSB(i);
3777     triple_bucket *tb = &hash->blocks[entry][i];
3778     int count;
3779     int different = count_different(db, tb, col_index[index], &count);
3780 
3781     if ( count != 0 )
3782     { triple *t;
3783 
3784       Sdprintf("%d: c=%d; d=%d", i, count, different);
3785       for(t=fetch_triple(db, tb->head); t; t=triple_follow_hash(db, t, index))
3786       { Sdprintf("\n\t");
3787 	print_triple(t, 0);
3788       }
3789     }
3790   }
3791 }
3792 #endif /*O_DEBUG*/
3793 
3794 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3795 Consider resizing the hash-tables. The argument 'extra' gives the number
3796 of triples that  will  be  added.  This   is  used  to  guess  the  hash
3797 requirements of the table  and  thus   avoid  duplicating  triples in on
3798 optimize_triple_hashes().
3799 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3800 
3801 void
consider_triple_rehash(rdf_db * db,size_t extra)3802 consider_triple_rehash(rdf_db *db, size_t extra)
3803 { size_t triples = db->created - db->erased;
3804   triple_hash *spo = &db->hash[ICOL(BY_SPO)];
3805 
3806   if ( (extra + triples)/spo->avg_chain_len > spo->bucket_count )
3807   { int i;
3808     int resized = 0;
3809     int factor = ((extra+triples+100000)*16)/(triples+100000);
3810 
3811 #define SCALE(n) (((n)*factor)/(16*db->hash[i].avg_chain_len))
3812 #define SCALEF(n) (((n)*(float)factor)/(16.0*(float)db->hash[i].avg_chain_len))
3813 
3814     for(i=1; i<INDEX_TABLES; i++)
3815     { int resize = 0;
3816       size_t sizenow = db->hash[i].bucket_count;
3817 
3818       if ( db->hash[i].user_size || db->hash[i].created == FALSE )
3819 	continue;			/* user set size */
3820 
3821       switch(col_index[i])
3822       { case BY_S:
3823 	case BY_SG:
3824 	case BY_SP:
3825 	  while ( SCALE(db->resources.hash.count) > sizenow<<resize )
3826 	    resize++;
3827 	  break;
3828 	case BY_P:
3829 	  while ( SCALE(db->predicates.count) > sizenow<<resize )
3830 	    resize++;
3831 	  break;
3832 	case BY_O:
3833 	case BY_PO:
3834 	{ size_t setsize = SCALE(db->resources.hash.count + db->literals.count);
3835 
3836 	  if ( setsize > triples )
3837 	    setsize = triples;
3838 	  while ( setsize > sizenow<<resize )
3839 	    resize++;
3840 	  break;
3841 	}
3842 	case BY_SPO:
3843 	  while ( (extra+triples)/spo->avg_chain_len > sizenow<<resize )
3844 	    resize++;
3845 	  break;
3846 	case BY_G:
3847 	  while ( SCALE(db->graphs.count) > sizenow<<resize )
3848 	    resize++;
3849 	  break;
3850 	case BY_PG:
3851 	{ size_t s;
3852 
3853 	  s = (db->graphs.count < db->predicates.count ?
3854 				  db->predicates.count : db->graphs.count);
3855 
3856 	  while ( SCALE(s) > sizenow<<resize )
3857 	    resize++;
3858 	  break;
3859 	}
3860 	default:
3861 	  assert(0);
3862       }
3863 
3864       if ( resize )
3865       { resized++;
3866 	size_triple_hash(db, i, sizenow<<resize);
3867       }
3868     }
3869 
3870 #undef SCALE
3871 #undef SCALEF
3872 
3873     if ( resized )
3874       invalidate_distinct_counts(db);
3875   }
3876 }
3877 
3878 
3879 static size_t
distinct_hash_values(rdf_db * db,int icol)3880 distinct_hash_values(rdf_db *db, int icol)
3881 { triple *t;
3882   size_t count;
3883   atomset hash_set;
3884   int byx = col_index[icol];
3885 
3886   init_atomset(&hash_set);
3887   for(t=fetch_triple(db, db->by_none.head);
3888       t;
3889       t=triple_follow_hash(db, t, ICOL(BY_NONE)))
3890   { add_atomset(&hash_set, (atom_t)triple_hash_key(t, byx));
3891   }
3892   count = hash_set.count;
3893   destroy_atomset(&hash_set);
3894 
3895   return count;
3896 }
3897 
3898 
3899 static void
initial_size_triple_hash(rdf_db * db,int icol)3900 initial_size_triple_hash(rdf_db *db, int icol)
3901 { triple_hash *hash = &db->hash[icol];
3902   size_t size;
3903 
3904   switch(col_index[icol])
3905   { case BY_S:
3906       size = db->resources.hash.count;
3907       break;
3908     case BY_P:
3909       size = db->predicates.count;
3910       break;
3911     case BY_O:
3912       size = db->resources.hash.count + db->literals.count;
3913       break;
3914     case BY_SPO:
3915       size = db->created - db->erased;
3916       break;
3917     case BY_G:
3918       size = db->graphs.count;
3919       break;
3920     case BY_PO:
3921     case BY_SG:
3922     case BY_SP:
3923     case BY_PG:
3924       size = distinct_hash_values(db, icol);
3925       break;
3926     default:
3927       assert(0);
3928       return;
3929   }
3930 
3931   size /= hash->avg_chain_len;
3932   size_triple_hash(db, icol, size);
3933 }
3934 
3935 
3936 static int
init_tables(rdf_db * db)3937 init_tables(rdf_db *db)
3938 { int ic;
3939   triple_hash *by_none = &db->hash[ICOL(BY_NONE)];
3940 
3941   by_none->blocks[0] = &db->by_none;
3942   by_none->bucket_count_epoch = 1;
3943   by_none->bucket_count = 1;
3944   by_none->created = TRUE;
3945 
3946   for(ic=BY_S; ic<INDEX_TABLES; ic++)
3947   { if ( !init_triple_hash(db, ic, INITIAL_TABLE_SIZE) )
3948       return FALSE;
3949   }
3950 
3951   return (init_resource_db(db, &db->resources) &&
3952 	  init_pred_table(db) &&
3953 	  init_graph_table(db) &&
3954 	  init_literal_table(db));
3955 }
3956 
3957 
3958 		 /*******************************
3959 		 *     INDEX OPTIMIZATION	*
3960 		 *******************************/
3961 
3962 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3963 Index optimization copies triples  that  have   been  indexed  while the
3964 hash-table was small to the  current  table.   This  adds  a copy of the
3965 triple to the index (at the new place).   The  old triple gets a pointer
3966 ->reindexed pointing to the new version.   deref_triple() finds the real
3967 triple.
3968 
3969 The next thing we need to do is   reclaim this in gc_hash_chain(). To to
3970 that, we replace old->lifespan.died with db->reindexed++. The logic that
3971 finds old queries  also  finds  the   query  with  the  oldest reindexed
3972 counter. Triples that have yet older   old->lifespan.died  can safely be
3973 removed.
3974 
3975 TBD: To preserve order, we must insert   the  new triples before the old
3976 ones. This is significantly more complex,   notably because they must be
3977 re-indexed in reverse order in  this  case.   Probably  the  best way to
3978 implement this is to collect the  triples   that  must be reindexed in a
3979 triple buffer and then use a version of link_triple_hash() that prepends
3980 the triples, calling on the triples from the buffer in reverse order. We
3981 will ignore this for now: triple ordering has no semantics.
3982 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
3983 
3984 static void
reindex_triple(rdf_db * db,triple * t)3985 reindex_triple(rdf_db *db, triple *t)
3986 { triple *t2 = alloc_triple();
3987 
3988   *t2 = *t;
3989   t2->has_reindex_prev = TRUE;
3990   memset(&t2->tp, 0, sizeof(t2->tp));
3991   register_triple(db, t2);
3992   simpleMutexLock(&db->queries.write.lock);
3993   link_triple_hash(db, t2);
3994   TMAGIC(t2, T_CHAINED2);
3995   t->reindexed = T_ID(t2);
3996   TMAGIC(t, T_REINDEXED);
3997   t->lifespan.died = db->reindexed++;
3998   if ( t2->object_is_literal )			/* do not deallocate lit twice */
3999   { simpleMutexLock(&db->locks.literal);
4000     t2->object.literal->references++;
4001     assert(t2->object.literal->references != 0);
4002     simpleMutexUnlock(&db->locks.literal);
4003   }
4004   t->atoms_locked = FALSE;			/* same for unlock_atoms() */
4005   simpleMutexUnlock(&db->queries.write.lock);
4006 }
4007 
4008 
4009 static int
optimizable_triple_hash(rdf_db * db,int icol)4010 optimizable_triple_hash(rdf_db *db, int icol)
4011 { triple_hash *hash = &db->hash[icol];
4012   int opt = 0;
4013   size_t epoch;
4014 
4015   if ( hash->created == FALSE )
4016     return FALSE;
4017 
4018   for ( epoch=hash->bucket_count_epoch; epoch < hash->bucket_count; epoch*=2 )
4019     opt++;
4020 
4021   opt -= hash->optimize_threshold;
4022   if ( opt < 0 )
4023     opt = 0;
4024 
4025   return opt;
4026 }
4027 
4028 
4029 static int
optimizable_hashes(rdf_db * db)4030 optimizable_hashes(rdf_db *db)
4031 { int icol;
4032   int optimizable = 0;
4033 
4034   for(icol=1; icol<INDEX_TABLES; icol++)
4035     optimizable += optimizable_triple_hash(db, icol);
4036 
4037   return optimizable;
4038 }
4039 
4040 
4041 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4042 optimize_triple_hash() only doubles hash->bucket_count_epoch!  It may be
4043 necessary to call it multiple times, but   reindexing one step at a time
4044 is not slower than doing it all at once (is this true?)
4045 
4046 Note that there is another reason  to  do   only  a  little  of the work
4047 because copying the triples temporarily costs memory.
4048 
4049 (*) We have already done the reindexing  from another index. It may also
4050 mean that this triple was reindexed in a  previous pass, but that GC has
4051 not yet reclaimed the triple. I think that  should be fine because it is
4052 old and burried anyway, but still accessible for old queries.
4053 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4054 
4055 static int
optimize_triple_hash(rdf_db * db,int icol,gen_t gen)4056 optimize_triple_hash(rdf_db *db, int icol, gen_t gen)
4057 { triple_hash *hash = &db->hash[icol];
4058 
4059   if ( optimizable_triple_hash(db, icol) )
4060   { size_t b_no = 0;
4061     size_t upto = hash->bucket_count_epoch;
4062     size_t copied = 0;
4063 
4064     for( ; b_no < upto; b_no++ )
4065     { triple_bucket *bucket = &hash->blocks[MSB(b_no)][b_no];
4066       triple *t;
4067 
4068       for(t=fetch_triple(db, bucket->head); t; t=triple_follow_hash(db, t, icol))
4069       { if ( t->lifespan.died >= gen &&
4070 	     !t->reindexed &&		/* see (*) */
4071 	     triple_hash_key(t, col_index[icol]) % hash->bucket_count != b_no )
4072 	{ reindex_triple(db, t);
4073 	  copied++;
4074 	}
4075       }
4076     }
4077 
4078     hash->bucket_count_epoch = upto*2;
4079     DEBUG(1, Sdprintf("Optimized hash %s (epoch=%ld; size=%ld; copied=%ld)\n",
4080 		      col_name[icol],
4081 		      (long)hash->bucket_count_epoch,
4082 		      (long)hash->bucket_count,
4083 		      (long)copied));
4084 
4085     return 1;
4086   }
4087 
4088   return 0;
4089 }
4090 
4091 
4092 static int
optimize_triple_hashes(rdf_db * db,gen_t gen)4093 optimize_triple_hashes(rdf_db *db, gen_t gen)
4094 { int icol;
4095   int optimized = 0;
4096 
4097   for(icol=1; icol<INDEX_TABLES; icol++)
4098   { enter_scan(&db->defer_all);
4099     optimized += optimize_triple_hash(db, icol, gen);
4100     exit_scan(&db->defer_all);
4101     if ( PL_handle_signals() < 0 )
4102       return -1;
4103   }
4104 
4105   return optimized;			/* # hashes optimized */
4106 }
4107 
4108 
4109 		 /*******************************
4110 		 *	GARBAGE COLLECTION	*
4111 		 *******************************/
4112 
4113 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4114 Garbage collect triples, given that the   oldest  running query reads at
4115 generation gen.	 There are two thing we can do:
4116 
4117   - Remove any triple that died before gen.  These triples must be left
4118     to GC.  See also alloc.c.
4119 
4120 We count `uncollectable' triples: erased triples that still have queries
4121 that depend on them. If no  such  triples   exist  there  is no point in
4122 running GC.
4123 
4124 Should do something similar with reindexed   triples  that cannot yet be
4125 collected? The problem is less likely,   because they become ready after
4126 all active _queries_ started before the reindexing have died, wereas the
4127 generation stuff depends on longer lived  objects which as snapshots and
4128 transactions.
4129 
4130 t->linked is managed at three placed:   link_triple_hash(), where we are
4131 sure that the triple is not garbage  (are we, reindex_triple()?), when a
4132 new index is created and when the triple has been removed from the index
4133 links (below).
4134 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4135 
4136 static inline int
is_garbage_triple(triple * t,gen_t old_query_gen,gen_t old_reindex_gen)4137 is_garbage_triple(triple *t, gen_t old_query_gen, gen_t old_reindex_gen)
4138 { if ( t->has_reindex_prev )
4139     return FALSE;
4140 
4141   if ( t->reindexed )				/* Safe: reindex_triple() */
4142     return t->lifespan.died < old_reindex_gen;	/* is also part of GC */
4143   else
4144     return t->lifespan.died < old_query_gen;
4145 }
4146 
4147 
4148 static size_t
gc_hash_chain(rdf_db * db,size_t bucket_no,int icol,gen_t gen,gen_t reindex_gen)4149 gc_hash_chain(rdf_db *db, size_t bucket_no, int icol,
4150 	      gen_t gen, gen_t reindex_gen)
4151 { triple_bucket *bucket = &db->hash[icol].blocks[MSB(bucket_no)][bucket_no];
4152   triple *prev = NULL;
4153   triple *t;
4154   size_t collected = 0;
4155   size_t uncollectable = 0;
4156 
4157   for(t = fetch_triple(db, bucket->head); t; t=triple_follow_hash(db, t, icol))
4158   { if ( is_garbage_triple(t, gen, reindex_gen) )
4159     { simpleMutexLock(&db->queries.write.lock);
4160 
4161       if ( prev )
4162 	prev->tp.next[icol] = t->tp.next[icol];
4163       else
4164 	bucket->head = t->tp.next[icol];
4165       if ( T_ID(t) == bucket->tail )
4166 	bucket->tail = T_ID(prev);
4167 
4168       collected++;
4169 
4170       if ( --t->linked == 0 )
4171       { DEBUG(2, { char buf[2][64];
4172 		   Sdprintf("GC at gen=%s..%s: ",
4173 			    gen_name(t->lifespan.born, buf[0]),
4174 			    gen_name(t->lifespan.died, buf[1]));
4175 		   print_triple(t, PRT_NL);
4176 		 });
4177 
4178 	if ( t->reindexed )
4179 	{ triple *t2 = fetch_triple(db, t->reindexed);
4180 
4181 	  db->gc.reclaimed_reindexed++;
4182 	  t2->has_reindex_prev = FALSE;
4183 	} else
4184 	  db->gc.reclaimed_triples++;
4185 
4186 	simpleMutexUnlock(&db->queries.write.lock);
4187 	free_triple(db, t, TRUE);
4188       } else
4189       { simpleMutexUnlock(&db->queries.write.lock);
4190       }
4191     } else
4192     { prev=t;
4193       if ( icol == 0 && t->erased && !t->reindexed &&
4194 	   t->lifespan.died >= gen )
4195 	uncollectable++;
4196     }
4197   }
4198 
4199   if ( collected && icol > 0 )		/* concurrent with hashing new ones */
4200     ATOMIC_SUB(&bucket->count, collected);
4201 
4202   if ( icol == 0 )
4203   { char buf[64];
4204 
4205     DEBUG(4, Sdprintf("At %s: %lld uncollectable\n",
4206 		      gen_name(gen, buf),
4207 		      uncollectable));
4208     db->gc.uncollectable = uncollectable;
4209   }
4210 
4211   return collected;
4212 }
4213 
4214 
4215 static size_t
gc_hash(rdf_db * db,int icol,gen_t gen,gen_t reindex_gen)4216 gc_hash(rdf_db *db, int icol, gen_t gen, gen_t reindex_gen)
4217 { size_t mb = db->hash[icol].bucket_count;
4218   size_t b;
4219   size_t collected = 0;
4220 
4221   for(b=0; b<mb; b++)
4222     collected += gc_hash_chain(db, b, icol, gen, reindex_gen);
4223 
4224   return collected;
4225 }
4226 
4227 
4228 static int
gc_hashes(rdf_db * db,gen_t gen,gen_t reindex_gen)4229 gc_hashes(rdf_db *db, gen_t gen, gen_t reindex_gen)
4230 { size_t garbage = db->erased    - db->gc.reclaimed_triples;
4231   size_t reindex = db->reindexed - db->gc.reclaimed_reindexed;
4232 
4233   if ( garbage + reindex > 0 )
4234   { int icol;
4235 
4236     for(icol=0; icol<INDEX_TABLES; icol++)
4237     { size_t collected;
4238 
4239       if ( db->hash[icol].created )
4240       { enter_scan(&db->defer_all);
4241 	collected = gc_hash(db, icol, gen, reindex_gen);
4242 	exit_scan(&db->defer_all);
4243 
4244 	if ( PL_handle_signals() < 0 )
4245 	  return -1;
4246       } else
4247 	collected = 0;
4248 
4249       if ( icol == 0 && collected == 0 )
4250 	break;
4251     }
4252   }
4253 
4254   return 0;
4255 }
4256 
4257 
4258 static int
gc_set_busy(rdf_db * db)4259 gc_set_busy(rdf_db *db)
4260 { int busy;
4261 
4262   simpleMutexLock(&db->locks.misc);
4263   if ( !(busy = db->gc.busy) )
4264     db->gc.busy = TRUE;
4265   simpleMutexUnlock(&db->locks.misc);
4266 
4267   return !busy;
4268 }
4269 
4270 
4271 static void
gc_clear_busy(rdf_db * db)4272 gc_clear_busy(rdf_db *db)
4273 { simpleMutexLock(&db->locks.misc);
4274   db->gc.busy = FALSE;
4275   simpleMutexUnlock(&db->locks.misc);
4276 }
4277 
4278 
4279 static int
gc_db(rdf_db * db,gen_t gen,gen_t reindex_gen)4280 gc_db(rdf_db *db, gen_t gen, gen_t reindex_gen)
4281 { char buf[64];
4282   int rc;
4283 
4284   if ( !gc_set_busy(db) )
4285     return FALSE;
4286   simpleMutexLock(&db->locks.gc);
4287   DEBUG(10, Sdprintf("RDF GC; gen = %s\n", gen_name(gen, buf)));
4288   if ( optimize_triple_hashes(db, gen) >= 0 &&
4289        gc_hashes(db, gen, reindex_gen) >= 0 &&
4290        gc_clouds(db, gen) >= 0 &&
4291        gc_graphs(db, gen) >= 0 )
4292   { db->gc.count++;
4293     db->gc.last_gen = gen;
4294     db->gc.last_reindex_gen = reindex_gen;
4295     rc = TRUE;
4296   } else
4297     rc = FALSE;
4298   gc_clear_busy(db);
4299   simpleMutexUnlock(&db->locks.gc);
4300 
4301   return rc;
4302 }
4303 
4304 
4305 static int
suspend_gc(rdf_db * db)4306 suspend_gc(rdf_db *db)
4307 { int was_busy = db->gc.busy;
4308 
4309   DEBUG(2, if ( was_busy )
4310 	     Sdprintf("Reset: GC in progress, waiting ...\n"));
4311 
4312   simpleMutexLock(&db->locks.gc);
4313   DEBUG(2, if ( was_busy )
4314 	     Sdprintf("Reset: GC finished\n"));
4315   db->gc.busy		     = TRUE;
4316   db->gc.count		     = 0;
4317   db->gc.time		     = 0.0;
4318   db->gc.reclaimed_triples   = 0;
4319   db->gc.reclaimed_reindexed = 0;
4320   db->reindexed		     = 0;
4321   db->gc.uncollectable	     = 0;
4322   db->gc.last_gen	     = 0;
4323   db->gc.busy		     = FALSE;
4324 
4325   return TRUE;
4326 }
4327 
4328 
4329 static void
resume_gc(rdf_db * db)4330 resume_gc(rdf_db *db)
4331 { simpleMutexUnlock(&db->locks.gc);
4332 }
4333 
4334 
4335 
4336 /** rdf_gc_(-Done) is semidet.
4337 
4338 Run the RDF-DB garbage collector. The collector   is  typically ran in a
4339 separate thread. Its execution does not  interfere with readers and only
4340 synchronizes with writers using short-held locks.
4341 
4342 Fails without any action if there is already a GC in progress.
4343 */
4344 
4345 static foreign_t
rdf_gc(void)4346 rdf_gc(void)
4347 { rdf_db *db = rdf_current_db();
4348   gen_t reindex_gen;
4349   gen_t gen = oldest_query_geneneration(db, &reindex_gen);
4350 
4351   return gc_db(db, gen, reindex_gen);
4352 }
4353 
4354 
4355 /** rdf_add_gc_time(+Time:double) is det.
4356 
4357 Add CPU time to GC statistics.  This is left to Prolog
4358 
4359 */
4360 
4361 static foreign_t
rdf_add_gc_time(term_t time)4362 rdf_add_gc_time(term_t time)
4363 { double t;
4364 
4365   if ( PL_get_float_ex(time, &t) )
4366   { rdf_db *db = rdf_current_db();
4367 
4368     db->gc.time += t;
4369     return TRUE;
4370   }
4371 
4372   return FALSE;
4373 }
4374 
4375 /** rdf_gc_info(-Info) is det.
4376 
4377 Return info to help deciding on whether or not to call rdf_gc. Info is a
4378 record with the following members:
4379 
4380   1. Total number of triples in hash (dead or alive)
4381   2. Total dead triples in hash (deleted or reindexed)
4382   3. Total reindexed but not reclaimed triples
4383   4. Total number of possible optimizations to hash-tables.
4384   5. Oldest generation we must keep
4385   6. Oldest generation at last GC
4386   7. Oldest reindexed triple we must keep
4387   8. Oldest reindexed at last GC
4388 */
4389 
4390 #define INT_ARG(val) PL_INT64, (int64_t)(val)
4391 
4392 static foreign_t
rdf_gc_info(term_t info)4393 rdf_gc_info(term_t info)
4394 { rdf_db *db     = rdf_current_db();
4395   size_t life    = db->created   - db->gc.reclaimed_triples;
4396   size_t garbage = db->erased    - db->gc.reclaimed_triples;
4397   size_t reindex = db->reindexed - db->gc.reclaimed_reindexed;
4398   gen_t keep_reindex;
4399   gen_t keep_gen = oldest_query_geneneration(db, &keep_reindex);
4400 
4401   if ( keep_gen == db->gc.last_gen )
4402   { garbage -= db->gc.uncollectable;
4403     assert((int64_t)garbage >= 0);
4404   }
4405 
4406   return PL_unify_term(info,
4407 		       PL_FUNCTOR_CHARS, "gc_info", 8,
4408 		         INT_ARG(life),
4409 		         INT_ARG(garbage),
4410 		         INT_ARG(reindex),
4411 		         INT_ARG(optimizable_hashes(db)),
4412 		         INT_ARG(keep_gen),
4413 		         INT_ARG(db->gc.last_gen),
4414 		         INT_ARG(keep_reindex),
4415 		         INT_ARG(db->gc.last_reindex_gen));
4416 }
4417 
4418 
4419 		 /*******************************
4420 		 *	      GC THREAD		*
4421 		 *******************************/
4422 
4423 int
rdf_create_gc_thread(rdf_db * db)4424 rdf_create_gc_thread(rdf_db *db)
4425 { if ( db->gc.thread_started )
4426     return TRUE;
4427 
4428   simpleMutexLock(&db->locks.misc);
4429   if ( !db->gc.thread_started )
4430   { db->gc.thread_started = TRUE;
4431 
4432     PL_call_predicate(NULL, PL_Q_NORMAL,
4433 		      PL_predicate("rdf_create_gc_thread", 0, "rdf_db"), 0);
4434   }
4435   simpleMutexUnlock(&db->locks.misc);
4436 
4437   return TRUE;
4438 }
4439 
4440 
4441 		 /*******************************
4442 		 *	  OVERALL DATABASE	*
4443 		 *******************************/
4444 
4445 static rdf_db *
new_db(void)4446 new_db(void)
4447 { rdf_db *db = PL_malloc_uncollectable(sizeof(*db));
4448 
4449   memset(db, 0, sizeof(*db));
4450   INIT_LOCK(db);
4451   init_tables(db);
4452   init_triple_array(db);
4453   init_query_admin(db);
4454   db->prefixes = new_prefix_table();
4455 
4456   db->duplicate_admin_threshold = DUPLICATE_ADMIN_THRESHOLD;
4457   db->snapshots.keep = GEN_MAX;
4458   db->queries.generation = GEN_EPOCH;
4459 
4460   return db;
4461 }
4462 
4463 
4464 static rdf_db *RDF_DB;
4465 
4466 rdf_db *
rdf_current_db(void)4467 rdf_current_db(void)
4468 { if ( RDF_DB )
4469     return RDF_DB;
4470 
4471   simpleMutexLock(&rdf_lock);
4472   if ( !RDF_DB )
4473     RDF_DB = new_db();
4474   simpleMutexUnlock(&rdf_lock);
4475 
4476   return RDF_DB;
4477 }
4478 
4479 
4480 static triple *
new_triple(rdf_db * db)4481 new_triple(rdf_db *db)
4482 { triple *t = alloc_triple();
4483   t->allocated = TRUE;
4484 
4485   return t;
4486 }
4487 
4488 
4489 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4490 free_triple() is called in  two  scenarios.   One  is  from  the garbage
4491 collector after a triple is deleted from   all hash chains. In this case
4492 the linger argument is TRUE and  the   next-pointers  of the triples are
4493 still in place because search may be   scanning  the triple. See alloc.c
4494 for details on the triple memory management. The second case is deletion
4495 of temporary triples, something that may   happen  from many threads. In
4496 either case, this is typically called unlocked.
4497 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4498 
4499 static void
free_triple(rdf_db * db,triple * t,int linger)4500 free_triple(rdf_db *db, triple *t, int linger)
4501 { if ( t->match == STR_MATCH_BETWEEN )
4502     free_literal_value(db, &t->tp.end);
4503 
4504   if ( !t->allocated )
4505   { unlock_atoms(db, t);
4506     if ( t->object_is_literal && t->object.literal )
4507     { free_literal(db, t->object.literal);
4508       t->object_is_literal = FALSE;
4509     }
4510   } else
4511   { unalloc_triple(db, t, linger);
4512   }
4513 }
4514 
4515 
4516 static size_t
literal_hash(literal * lit)4517 literal_hash(literal *lit)
4518 { if ( lit->hash )
4519   { return lit->hash;
4520   } else
4521   { unsigned int hash;
4522 
4523     switch(lit->objtype)
4524     { case OBJ_STRING:
4525 	hash = atom_hash_case(lit->value.string);
4526         break;
4527       case OBJ_INTEGER:
4528       case OBJ_DOUBLE:
4529 	hash = rdf_murmer_hash(&lit->value.integer,
4530 			       sizeof(lit->value.integer),
4531 			       MURMUR_SEED);
4532         break;
4533       case OBJ_TERM:
4534 	hash = rdf_murmer_hash(lit->value.term.record,
4535 			       (int)lit->value.term.len,
4536 			       MURMUR_SEED);
4537 	break;
4538       default:
4539 	assert(0);
4540 	return 0;
4541     }
4542 
4543     if ( !hash )
4544       hash = 0x1;			/* cannot be 0 */
4545 
4546     lit->hash = hash;
4547     return lit->hash;
4548   }
4549 }
4550 
4551 
4552 static size_t
object_hash(triple * t)4553 object_hash(triple *t)
4554 { if ( t->object_is_literal )
4555   { return literal_hash(t->object.literal);
4556   } else
4557   { return atom_hash(t->object.resource, OBJ_MURMUR_SEED);
4558   }
4559 }
4560 
4561 
4562 static size_t
subject_hash(triple * t)4563 subject_hash(triple *t)
4564 { return atom_hash(t->subject_id, SUBJ_MURMUR_SEED);
4565 }
4566 
4567 static size_t
graph_hash(triple * t)4568 graph_hash(triple *t)
4569 { return atom_hash(t->graph_id, GRAPH_MURMUR_SEED);
4570 }
4571 
4572 
4573 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4574 triple_hash_key() computes the hash for a triple   on  a given index. It
4575 can only be called for indices defined in the col_index-array. Note that
4576 the returned value is unconstrained and  needs   to  be taken modulo the
4577 table-size.
4578 
4579 If   you   change   anything   here,   you    might   need   to   update
4580 init_cursor_from_literal().
4581 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4582 
4583 static size_t
triple_hash_key(triple * t,int which)4584 triple_hash_key(triple *t, int which)
4585 { size_t v = 0;
4586 
4587   assert(t->resolve_pred == FALSE);
4588 
4589   if ( which&BY_S ) v ^= subject_hash(t);
4590   if ( which&BY_P ) v ^= predicate_hash(t->predicate.r);
4591   if ( which&BY_O ) v ^= object_hash(t);
4592   if ( which&BY_G ) v ^= graph_hash(t);
4593 
4594   return v;
4595 }
4596 
4597 
4598 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4599 by_inverse[] returns the index key to use   for inverse search as needed
4600 to realise symmetric and inverse predicates.
4601 
4602 Note that this only deals with the   non-G(graph)  indices because it is
4603 only used by rdf_has/3 and rdf_reachable/3.
4604 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4605 
4606 static int by_inverse[8] =
4607 { BY_NONE,				/* BY_NONE = 0 */
4608   BY_O,					/* BY_S    = 1 */
4609   BY_P,					/* BY_P    = 2 */
4610   BY_PO,				/* BY_SP   = 3 */
4611   BY_S,					/* BY_O    = 4 */
4612   BY_SO,				/* BY_SO   = 5 */
4613   BY_SP,				/* BY_PO   = 6 */
4614   BY_SPO,				/* BY_SPO  = 7 */
4615 };
4616 
4617 
4618 static inline void
append_triple_bucket(rdf_db * db,triple_bucket * bucket,int icol,triple * t)4619 append_triple_bucket(rdf_db *db, triple_bucket *bucket, int icol, triple *t)
4620 { if ( bucket->tail )
4621   { fetch_triple(db, bucket->tail)->tp.next[icol] = T_ID(t);
4622   } else
4623   { bucket->head = T_ID(t);
4624   }
4625   bucket->tail = T_ID(t);
4626   ATOMIC_INC(&bucket->count);
4627 }
4628 
4629 
4630 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4631 (*) ->linked is decremented in gc_hash_chain() for garbage triples. This
4632 can conflict. We must use some sort   of  synchronization with GC if the
4633 died generation is not the maximum and the triple might thus be garbage.
4634 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4635 
4636 static void
create_triple_hashes(rdf_db * db,int count,int * ic)4637 create_triple_hashes(rdf_db *db, int count, int *ic)
4638 { triple_hash *hashes[16];
4639   int i, mx=0;
4640 
4641   for(i=0; i<count; i++)
4642   { hashes[mx] = &db->hash[ic[i]];
4643     if ( !hashes[mx]->created )
4644     { initial_size_triple_hash(db, hashes[mx]->icol);
4645       mx++;
4646     }
4647   }
4648   hashes[mx] = NULL;
4649 
4650   if ( mx > 0 )
4651   { simpleMutexLock(&db->queries.write.lock);
4652 
4653     for(i=0; i<mx; i++)
4654     { if ( hashes[i]->created )
4655       { mx--;
4656 	memmove(&hashes[i], &hashes[i+1], sizeof(hashes[0])*(mx-i));
4657       } else
4658       { DEBUG(1, Sdprintf("Creating hash %s\n", col_name[hashes[i]->icol]));
4659       }
4660     }
4661 
4662     if ( mx > 0 )
4663     { triple *t;
4664 
4665       for(t=fetch_triple(db, db->by_none.head);
4666 	  t;
4667 	  t=triple_follow_hash(db, t, ICOL(BY_NONE)))
4668       { for(i=0; i<mx; i++)
4669 	{ triple_hash *hash = hashes[i];
4670 	  int i = col_index[hash->icol];
4671 	  int key = triple_hash_key(t, i) % hash->bucket_count;
4672 	  triple_bucket *bucket = &hash->blocks[MSB(key)][key];
4673 
4674 	  append_triple_bucket(db, bucket, hash->icol, t);
4675 	  t->linked++;				/* (*) atomic? */
4676 	}
4677       }
4678 
4679       for(i=0; i<mx; i++)
4680       { triple_hash *hash = hashes[i];
4681 	hash->created = TRUE;
4682       }
4683     }
4684     simpleMutexUnlock(&db->queries.write.lock);
4685   }
4686 }
4687 
4688 
4689 /* called with queries.write.lock held */
4690 
4691 static void
link_triple_hash(rdf_db * db,triple * t)4692 link_triple_hash(rdf_db *db, triple *t)
4693 { int ic;
4694   int linked = 1;
4695 
4696   append_triple_bucket(db, &db->by_none, ICOL(BY_NONE), t);
4697 
4698   for(ic=1; ic<INDEX_TABLES; ic++)
4699   { triple_hash *hash = &db->hash[ic];
4700 
4701     if ( hash->created )
4702     { int i = col_index[ic];
4703       int key = triple_hash_key(t, i) % hash->bucket_count;
4704       triple_bucket *bucket = &hash->blocks[MSB(key)][key];
4705 
4706       append_triple_bucket(db, bucket, ic, t);
4707       linked++;
4708     }
4709   }
4710 
4711   t->linked = linked;				/* safe: never garbage */
4712 }
4713 
4714 
4715 /* prelink_triple() performs that part of the triple loading that does
4716    not require locking.
4717 */
4718 
4719 int
prelink_triple(rdf_db * db,triple * t,query * q)4720 prelink_triple(rdf_db *db, triple *t, query *q)
4721 { register_triple(db, t);
4722   if ( t->resolve_pred )
4723   { t->predicate.r = lookup_predicate(db, t->predicate.u);
4724     t->resolve_pred = FALSE;
4725   }
4726   if ( t->object_is_literal )
4727     t->object.literal = share_literal(db, t->object.literal);
4728   if ( db->maintain_duplicates )
4729     mark_duplicate(db, t, q);
4730 
4731   return TRUE;
4732 }
4733 
4734 
4735 /* MT: Caller must be hold db->queries.write.lock
4736 
4737    Return: FALSE if nothing changed; TRUE if the database has changed
4738    TBD: Not all of this requires locking.  Most should be moved out of
4739    the lock:
4740 
4741 	- Check for duplicates (?)
4742 	- Consider re-hash
4743 	- subProperty admin
4744 */
4745 
4746 void
add_triple_consequences(rdf_db * db,triple * t,query * q)4747 add_triple_consequences(rdf_db *db, triple *t, query *q)
4748 { if ( t->predicate.r->name == ATOM_subPropertyOf &&
4749        t->object_is_literal == FALSE )
4750   { addSubPropertyOf(db, t, q);
4751   }
4752 }
4753 
4754 
4755 /* Called with queries.write.lock held */
4756 
4757 int
link_triple(rdf_db * db,triple * t,query * q)4758 link_triple(rdf_db *db, triple *t, query *q)
4759 { assert(!t->linked);
4760 
4761   link_triple_hash(db, t);
4762   TMAGIC(t, T_CHAINED1);
4763   add_triple_consequences(db, t, q);
4764   db->created++;
4765 
4766   return TRUE;
4767 }
4768 
4769 
4770 int
postlink_triple(rdf_db * db,triple * t,query * q)4771 postlink_triple(rdf_db *db, triple *t, query *q)
4772 { register_predicate(db, t);
4773   register_graph(db, t);
4774 
4775   return TRUE;
4776 }
4777 
4778 
4779 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4780 Erase a triple from the DB.
4781 
4782 MT: Caller must be hold db->queries.write.lock
4783 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4784 
4785 void
del_triple_consequences(rdf_db * db,triple * t,query * q)4786 del_triple_consequences(rdf_db *db, triple *t, query *q)
4787 { if ( t->predicate.r->name == ATOM_subPropertyOf &&
4788        t->object_is_literal == FALSE )
4789     delSubPropertyOf(db, t, q);
4790 }
4791 
4792 
4793 void
erase_triple(rdf_db * db,triple * t,query * q)4794 erase_triple(rdf_db *db, triple *t, query *q)
4795 { if ( t->erased )
4796     return;
4797 
4798   simpleMutexLock(&db->locks.erase);
4799   if ( !t->erased )
4800   { db->erased++;			/* incr. must be before setting erased */
4801     t->erased = TRUE;			/* to make sure #garbage >= 0 */
4802     simpleMutexUnlock(&db->locks.erase);
4803 
4804     unregister_graph(db, t);		/* Updates count and MD5 */
4805     unregister_predicate(db, t);	/* Updates count */
4806     if ( t->is_duplicate )
4807       ATOMIC_SUB(&db->duplicates, 1);
4808   } else
4809   { simpleMutexUnlock(&db->locks.erase);
4810   }
4811 }
4812 
4813 
4814 static int
match_literals(int how,literal * p,literal * e,literal * v)4815 match_literals(int how, literal *p, literal *e, literal *v)
4816 { literal_ex lex;
4817 
4818   lex.literal = p;
4819   prepare_literal_ex(&lex);
4820 
4821   DEBUG(2, { Sdprintf("match_literals(");
4822 	     print_literal(p);
4823 	     Sdprintf(", ");
4824 	     print_literal(v);
4825 	     Sdprintf(")\n"); });
4826 
4827   switch(how)
4828   { case STR_MATCH_LT:
4829       return compare_literals(&lex, v) > 0;
4830     case STR_MATCH_LE:
4831       return compare_literals(&lex, v) >= 0;
4832     case STR_MATCH_EQ:
4833       return compare_literals(&lex, v) == 0;
4834     case STR_MATCH_GE:
4835       return compare_literals(&lex, v) <= 0;
4836     case STR_MATCH_GT:
4837       return compare_literals(&lex, v) < 0;
4838     case STR_MATCH_BETWEEN:
4839       if ( compare_literals(&lex, v) <= 0 )
4840       { lex.literal = e;
4841 	prepare_literal_ex(&lex);
4842 
4843 	if ( compare_literals(&lex, v) >= 0 )
4844 	  return TRUE;
4845       }
4846       return FALSE;
4847     default:
4848       return match_atoms(how, p->value.string, v->value.string);
4849   }
4850 }
4851 
4852 
4853 static int
match_numerical(int how,literal * p,literal * e,literal * v)4854 match_numerical(int how, literal *p, literal *e, literal *v)
4855 { xsd_primary nv, np;
4856   literal_ex lex;
4857 
4858   if ( !(nv=is_numerical_string(v)) )
4859     return FALSE;
4860   if ( !p->value.string )		/* literal(eq(type(<numeric>,_)),_) */
4861     return TRUE;
4862 
4863   np = is_numerical_string(p);
4864   assert(np);
4865 
4866   lex.literal = p;
4867   prepare_literal_ex(&lex);
4868 
4869   switch(how)
4870   { case STR_MATCH_LT:
4871       return cmp_xsd_info(np, &lex.atom, nv, v->value.string)  > 0;
4872     case STR_MATCH_LE:
4873       return cmp_xsd_info(np, &lex.atom, nv, v->value.string) >= 0;
4874     case STR_MATCH_GE:
4875       return cmp_xsd_info(np, &lex.atom, nv, v->value.string) <= 0;
4876     case STR_MATCH_GT:
4877       return cmp_xsd_info(np, &lex.atom, nv, v->value.string) <  0;
4878     case STR_MATCH_BETWEEN:
4879       if ( cmp_xsd_info(np, &lex.atom, nv, v->value.string) <= 0 )
4880       { lex.literal = e;
4881 	prepare_literal_ex(&lex);
4882 
4883 	if ( cmp_xsd_info(np, &lex.atom, nv, v->value.string) >= 0 )
4884 	  return TRUE;
4885       }
4886       return FALSE;
4887     case STR_MATCH_EQ:
4888     default:
4889       return cmp_xsd_info(np, &lex.atom, nv, v->value.string) == 0;
4890   }
4891 }
4892 
4893 
4894 static int
match_object(triple * t,triple * p,unsigned flags)4895 match_object(triple *t, triple *p, unsigned flags)
4896 { if ( p->object_is_literal )
4897   { if ( t->object_is_literal )
4898     { literal *plit = p->object.literal;
4899       literal *tlit = t->object.literal;
4900 
4901       if ( !plit->objtype && !plit->qualifier )
4902 	return TRUE;
4903 
4904       if ( plit->objtype && plit->objtype != tlit->objtype )
4905 	return FALSE;
4906 
4907       switch( plit->objtype )
4908       { case 0:
4909 	  if ( plit->type_or_lang == ATOM_ID(ATOM_xsdString) &&
4910 	       tlit->qualifier == Q_NONE )
4911 	    return TRUE;
4912 	  if ( plit->qualifier &&
4913 	       tlit->qualifier != plit->qualifier )
4914 	    return FALSE;
4915 	  if ( plit->type_or_lang &&
4916 	       tlit->type_or_lang != plit->type_or_lang )
4917 	    return FALSE;
4918 	  return TRUE;
4919 	case OBJ_STRING:
4920 	  /* numeric match */
4921 	  if ( (flags&MATCH_NUMERIC) )
4922 	    return match_numerical(p->match, plit, &p->tp.end, tlit);
4923 	  /* qualifier match */
4924 	  if ( !( plit->type_or_lang == ATOM_ID(ATOM_xsdString) &&
4925 		  tlit->qualifier == Q_NONE ) )
4926 	  { if ( (flags & MATCH_QUAL) ||
4927 		 p->match == STR_MATCH_PLAIN )
4928 	    { if ( tlit->qualifier != plit->qualifier )
4929 		return FALSE;
4930 	    } else
4931 	    { if ( plit->qualifier && tlit->qualifier &&
4932 		   tlit->qualifier != plit->qualifier )
4933 		return FALSE;
4934 	    }
4935 	    if ( plit->type_or_lang &&
4936 		 tlit->type_or_lang != plit->type_or_lang )
4937 	      return FALSE;
4938 	  }
4939 	  /* lexical match */
4940 	  if ( plit->value.string )
4941 	  { if ( tlit->value.string != plit->value.string ||
4942 		 p->match == STR_MATCH_LT || p->match == STR_MATCH_GT )
4943 	    { if ( p->match >= STR_MATCH_ICASE )
4944 	      { return match_literals(p->match, plit, &p->tp.end, tlit);
4945 	      } else
4946 	      { return FALSE;
4947 	      }
4948 	    }
4949 	  }
4950 	  return TRUE;
4951 	case OBJ_INTEGER:
4952 	  if ( p->match >= STR_MATCH_LT )
4953 	    return match_literals(p->match, plit, &p->tp.end, tlit);
4954 	  return tlit->value.integer == plit->value.integer;
4955 	case OBJ_DOUBLE:
4956 	  if ( p->match >= STR_MATCH_LT )
4957 	    return match_literals(p->match, plit, &p->tp.end, tlit);
4958 	  return tlit->value.real == plit->value.real;
4959 	case OBJ_TERM:
4960 	  if ( p->match >= STR_MATCH_LT )
4961 	    return match_literals(p->match, plit, &p->tp.end, tlit);
4962 	  if ( plit->value.term.record &&
4963 	       plit->value.term.len != tlit->value.term.len )
4964 	    return FALSE;
4965 	  return memcmp(tlit->value.term.record, plit->value.term.record,
4966 			plit->value.term.len) == 0;
4967 	default:
4968 	  assert(0);
4969       }
4970     }
4971     return FALSE;
4972   } else
4973   { if ( p->object.resource )
4974     { if ( t->object_is_literal ||
4975 	   (p->object.resource != t->object.resource) )
4976 	return FALSE;
4977     }
4978   }
4979 
4980   return TRUE;
4981 }
4982 
4983 
4984 
4985 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4986 match_triples() is TRUE if the triple  t   matches  the  pattern p. This
4987 function does not  consider  whether  or   not  the  triple  is visible.
4988 Matching is controlled by flags:
4989 
4990     - MATCH_SUBPROPERTY		Perform rdfs:subPropertyOf matching
4991     - MATCH_SRC			Also match the source
4992     - MATCH_QUAL		Match language/type qualifiers
4993     - STR_MATCH_*		Additional string matching
4994 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
4995 
4996 static int
match_triples(rdf_db * db,triple * t,triple * p,query * q,unsigned flags)4997 match_triples(rdf_db *db, triple *t, triple *p, query *q, unsigned flags)
4998 { /* DEBUG(3, Sdprintf("match_triple(");
4999 	   print_triple(t, 0);
5000 	   Sdprintf(")\n"));
5001   */
5002 
5003   if ( p->subject_id && t->subject_id != p->subject_id )
5004     return FALSE;
5005   if ( !match_object(t, p, flags) )
5006     return FALSE;
5007   if ( flags & MATCH_SRC )
5008   { if ( p->graph_id && t->graph_id != p->graph_id )
5009       return FALSE;
5010     if ( p->line && t->line != p->line )
5011       return FALSE;
5012   }
5013 					/* last; may be expensive */
5014   if ( p->predicate.r && t->predicate.r != p->predicate.r )
5015   { if ( (flags & MATCH_SUBPROPERTY) )
5016       return isSubPropertyOf(db, t->predicate.r, p->predicate.r, q);
5017     else
5018       return FALSE;
5019   }
5020   return TRUE;
5021 }
5022 
5023 
5024 		 /*******************************
5025 		 *	      SAVE/LOAD		*
5026 		 *******************************/
5027 
5028 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5029 The RDF triple format.  This format is intended for quick save and load
5030 and not for readability or exchange.  Parts are based on the SWI-Prolog
5031 Quick Load Format (implemented in pl-wic.c).
5032 
5033 	<file>		::= <magic>
5034 			    <version>
5035 			    ['S' <graph-name>]
5036 			    ['F' <graph-source>]
5037 		            ['t' <modified>]
5038 			    ['M' <md5>]
5039 			    {<triple>}
5040 			    'E'
5041 
5042 	<magic>		::= "RDF-dump\n"
5043 	<version>	::= <integer>
5044 
5045 	<md5>		::= <byte>*		(16 bytes digest)
5046 
5047 	<triple>	::= 'T'
5048 	                    <subject>
5049 			    <predicate>
5050 			    <object>
5051 			    <graph>
5052 
5053 	<subject>	::= <resource>
5054 	<predicate>	::= <resource>
5055 
5056 	<object>	::= "R" <resource>
5057 			  | "L" <atom>
5058 
5059 	<resource>	::= <atom>
5060 
5061 	<atom>		::= "X" <integer>
5062 			    "A" <string>
5063 			    "W" <utf-8 string>
5064 
5065 	<string>	::= <integer><bytes>
5066 
5067 	<graph-name>	::= <atom>
5068 	<graph-source>	::= <atom>
5069 
5070 	<graph>	::= <graph-file>
5071 			    <line>
5072 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
5073 
5074 #define SAVE_MAGIC "RDF-dump\n"
5075 #define SAVE_VERSION 2
5076 
5077 typedef struct saved
5078 { union
5079   { atom_t     atom;
5080     predicate *pred;
5081     literal   *lit;
5082     void      *any;
5083   } value;
5084   size_t as;
5085   struct saved *next;
5086 } saved;
5087 
5088 
5089 typedef struct saved_table
5090 { saved ** saved_table;
5091   size_t   saved_size;
5092   size_t   saved_id;
5093   tmp_store *store;
5094 } saved_table;
5095 
5096 
5097 static inline int
saved_hash(void * value,unsigned int seed)5098 saved_hash(void *value, unsigned int seed)
5099 { return rdf_murmer_hash(&value, sizeof(value), seed);
5100 }
5101 
5102 
5103 static void
init_saved_table(rdf_db * db,saved_table * tab,tmp_store * store)5104 init_saved_table(rdf_db *db, saved_table *tab, tmp_store *store)
5105 { size_t size = 64;
5106   size_t bytes = size * sizeof(*tab->saved_table);
5107 
5108   tab->saved_table = rdf_malloc(db, bytes);
5109   memset(tab->saved_table, 0, bytes);
5110   tab->saved_size = size;
5111   tab->saved_id = 0;
5112   tab->store = store;
5113 }
5114 
5115 static void
resize_saved(rdf_db * db,saved_table * tab)5116 resize_saved(rdf_db *db, saved_table *tab)
5117 { size_t newsize = tab->saved_size * 2;
5118   size_t newbytes = sizeof(*tab->saved_table) * newsize;
5119   saved **newt = rdf_malloc(db, newbytes);
5120   saved **s = tab->saved_table;
5121   int i;
5122 
5123   memset(newt, 0, newbytes);
5124   for(i=0; i<tab->saved_size; i++, s++)
5125   { saved *c, *n;
5126 
5127     for(c=*s; c; c = n)
5128     { int hash = saved_hash(c->value.any, MURMUR_SEED) % newsize;
5129 
5130       n = c->next;
5131       c->next = newt[hash];
5132       newt[hash] = c;
5133     }
5134   }
5135 
5136   rdf_free(db, tab->saved_table, tab->saved_size*sizeof(*tab->saved_table));
5137   tab->saved_table = newt;
5138   tab->saved_size  = newsize;
5139 }
5140 
5141 
5142 static void
destroy_saved_table(rdf_db * db,saved_table * tab)5143 destroy_saved_table(rdf_db *db, saved_table *tab)
5144 { if ( tab->saved_table )
5145     rdf_free(db, tab->saved_table, tab->saved_size*sizeof(*tab->saved_table));
5146 }
5147 
5148 static saved *
lookup_saved(saved_table * tab,void * value)5149 lookup_saved(saved_table *tab, void *value)
5150 { int hash = saved_hash(value, MURMUR_SEED) % tab->saved_size;
5151   saved *s;
5152 
5153   for(s=tab->saved_table[hash]; s; s= s->next)
5154   { if ( s->value.any == value )
5155       return s;
5156   }
5157 
5158   return NULL;
5159 }
5160 
5161 static saved *
add_saved(rdf_db * db,saved_table * tab,void * value)5162 add_saved(rdf_db *db, saved_table *tab, void *value)
5163 { int hash;
5164   saved *s;
5165 
5166   if ( tab->saved_id/4 > tab->saved_size )
5167     resize_saved(db, tab);
5168 
5169   hash = saved_hash(value, MURMUR_SEED) % tab->saved_size;
5170   if ( (s = alloc_tmp_store(tab->store, sizeof(*s))) )
5171   { s->value.any = value;
5172     s->as = tab->saved_id++;
5173     s->next = tab->saved_table[hash];
5174     tab->saved_table[hash] = s;
5175   }
5176 
5177   return s;
5178 }
5179 
5180 
5181 typedef struct save_context
5182 { saved_table	atoms;
5183   saved_table	literals;
5184   saved_table	predicates;
5185   tmp_store	store;
5186   int		version;			/* current save version */
5187 } save_context;
5188 
5189 static void
init_saved(rdf_db * db,save_context * ctx,int version)5190 init_saved(rdf_db *db, save_context *ctx, int version)
5191 { init_tmp_store(&ctx->store);
5192   init_saved_table(db, &ctx->atoms, &ctx->store);
5193   if ( version > 2 )
5194   { init_saved_table(db, &ctx->literals, &ctx->store);
5195     init_saved_table(db, &ctx->predicates, &ctx->store);
5196   }
5197   ctx->version = version;
5198 }
5199 
5200 static void
destroy_saved(rdf_db * db,save_context * ctx)5201 destroy_saved(rdf_db *db, save_context *ctx)
5202 { destroy_saved_table(db, &ctx->atoms);
5203   if ( ctx->version > 2 )
5204   { destroy_saved_table(db, &ctx->literals);
5205     destroy_saved_table(db, &ctx->predicates);
5206   }
5207   destroy_tmp_store(&ctx->store);
5208 }
5209 
5210 static saved *
lookup_saved_atom(save_context * ctx,atom_t a)5211 lookup_saved_atom(save_context *ctx, atom_t a)
5212 { return lookup_saved(&ctx->atoms, (void*)a);
5213 }
5214 
5215 static saved *
add_saved_atom(rdf_db * db,save_context * ctx,atom_t a)5216 add_saved_atom(rdf_db *db, save_context *ctx, atom_t a)
5217 { return add_saved(db, &ctx->atoms, (void*)a);
5218 }
5219 
5220 static saved *
lookup_saved_literal(save_context * ctx,literal * l)5221 lookup_saved_literal(save_context *ctx, literal *l)
5222 { return lookup_saved(&ctx->literals, l);
5223 }
5224 
5225 static saved *
add_saved_literal(rdf_db * db,save_context * ctx,literal * l)5226 add_saved_literal(rdf_db *db, save_context *ctx, literal *l)
5227 { return add_saved(db, &ctx->literals, l);
5228 }
5229 
5230 static saved *
lookup_saved_predicate(save_context * ctx,predicate * p)5231 lookup_saved_predicate(save_context *ctx, predicate *p)
5232 { return lookup_saved(&ctx->predicates, p);
5233 }
5234 
5235 static saved *
add_saved_predicate(rdf_db * db,save_context * ctx,predicate * p)5236 add_saved_predicate(rdf_db *db, save_context *ctx, predicate *p)
5237 { return add_saved(db, &ctx->predicates, p);
5238 }
5239 
5240 
5241 #define INT64BITSIZE (sizeof(int64_t)*8)
5242 #define PLMINLONG   ((int64_t)((uint64_t)1<<(INT64BITSIZE-1)))
5243 
5244 static void
save_int(IOSTREAM * fd,int64_t n)5245 save_int(IOSTREAM *fd, int64_t n)
5246 { int m;
5247   int64_t absn = (n >= 0 ? n : -n);
5248 
5249   if ( n != PLMINLONG )
5250   { if ( absn < ((intptr_t)1 << 5) )
5251     { Sputc((int)(n & 0x3f), fd);
5252       return;
5253     } else if ( absn < ((intptr_t)1 << 13) )
5254     { Sputc((int)(((n >> 8) & 0x3f) | (1 << 6)), fd);
5255       Sputc((int)(n & 0xff), fd);
5256       return;
5257     } else if ( absn < ((intptr_t)1 << 21) )
5258     { Sputc((int)(((n >> 16) & 0x3f) | (2 << 6)), fd);
5259       Sputc((int)((n >> 8) & 0xff), fd);
5260       Sputc((int)(n & 0xff), fd);
5261       return;
5262     }
5263   }
5264 
5265   for(m = sizeof(n); ; m--)
5266   { int b = (int)((absn >> (((m-1)*8)-1)) & 0x1ff);
5267 
5268     if ( b == 0 )
5269       continue;
5270     break;
5271   }
5272 
5273   Sputc(m | (3 << 6), fd);
5274 
5275   for( ; m > 0; m--)
5276   { int b = (int)((n >> ((m-1)*8)) & 0xff);
5277 
5278     Sputc(b, fd);
5279   }
5280 }
5281 
5282 
5283 #define BYTES_PER_DOUBLE sizeof(double)
5284 #ifdef WORDS_BIGENDIAN
5285 static const int double_byte_order[] = { 7,6,5,4,3,2,1,0 };
5286 #else
5287 static const int double_byte_order[] = { 0,1,2,3,4,5,6,7 };
5288 #endif
5289 
5290 static int
save_double(IOSTREAM * fd,double f)5291 save_double(IOSTREAM *fd, double f)
5292 { unsigned char *cl = (unsigned char *)&f;
5293   unsigned int i;
5294 
5295   for(i=0; i<BYTES_PER_DOUBLE; i++)
5296     Sputc(cl[double_byte_order[i]], fd);
5297 
5298   return TRUE;
5299 }
5300 
5301 
5302 static int
save_atom(rdf_db * db,IOSTREAM * out,atom_t a,save_context * ctx)5303 save_atom(rdf_db *db, IOSTREAM *out, atom_t a, save_context *ctx)
5304 { saved *s;
5305   size_t len;
5306   const char *chars;
5307   unsigned int i;
5308   const wchar_t *wchars;
5309 
5310   if ( (s=lookup_saved_atom(ctx, a)) )
5311   { Sputc('X', out);
5312     save_int(out, s->as);
5313 
5314     return TRUE;
5315   } else
5316   { s = add_saved_atom(db, ctx, a);
5317   }
5318 
5319   if ( (chars = PL_atom_nchars(a, &len)) )
5320   { Sputc('A', out);
5321     save_int(out, len);
5322     for(i=0; i<len; i++, chars++)
5323       Sputc(*chars&0xff, out);
5324   } else if ( (wchars = PL_atom_wchars(a, &len)) )
5325   { IOENC enc = out->encoding;
5326 
5327     Sputc('W', out);
5328     save_int(out, len);
5329     out->encoding = ENC_UTF8;
5330     for(i=0; i<len; i++, wchars++)
5331     { wint_t c = *wchars;
5332 
5333       SECURE(assert(c>=0 && c <= 0x10ffff));
5334       Sputcode(c, out);
5335     }
5336     out->encoding = enc;
5337   } else
5338     return FALSE;
5339 
5340   return TRUE;
5341 }
5342 
5343 
5344 static int
save_predicate(rdf_db * db,IOSTREAM * out,predicate * p,save_context * ctx)5345 save_predicate(rdf_db *db, IOSTREAM *out, predicate *p, save_context *ctx)
5346 { if ( ctx->version > 2 )
5347   { saved *s;
5348 
5349     if ( (s=lookup_saved_predicate(ctx, p)) )
5350     { Sputc('X', out);
5351       save_int(out, s->as);
5352 
5353       return TRUE;
5354     } else
5355     { s = add_saved_predicate(db, ctx, p);
5356       Sputc('P', out);
5357     }
5358   }
5359 
5360   return save_atom(db, out, p->name, ctx);
5361 }
5362 
5363 static int
save_literal(rdf_db * db,IOSTREAM * out,literal * lit,save_context * ctx)5364 save_literal(rdf_db *db, IOSTREAM *out, literal *lit, save_context *ctx)
5365 { if ( ctx->version > 2 )
5366   { saved *s;
5367 
5368     if ( (s=lookup_saved_literal(ctx, lit)) )
5369     { Sputc('X', out);
5370       save_int(out, s->as);
5371 
5372       return TRUE;
5373     } else
5374     { s = add_saved_literal(db, ctx, lit);
5375     }
5376   }
5377 
5378   if ( lit->qualifier )
5379   { assert(lit->type_or_lang);
5380     Sputc(lit->qualifier == Q_LANG ? 'l' : 't', out);
5381     save_atom(db, out, ID_ATOM(lit->type_or_lang), ctx);
5382   }
5383 
5384   switch(lit->objtype)
5385   { case OBJ_STRING:
5386       Sputc('L', out);
5387       save_atom(db, out, lit->value.string, ctx);
5388       break;
5389     case OBJ_INTEGER:
5390       Sputc('I', out);
5391       save_int(out, lit->value.integer);
5392       break;
5393     case OBJ_DOUBLE:
5394     {	Sputc('F', out);
5395       save_double(out, lit->value.real);
5396       break;
5397     }
5398     case OBJ_TERM:
5399     { const char *s = lit->value.term.record;
5400       size_t len = lit->value.term.len;
5401 
5402       Sputc('T', out);
5403       save_int(out, len);
5404       while(len-- > 0)
5405 	Sputc(*s++, out);
5406 
5407       break;
5408     }
5409     default:
5410       assert(0);
5411   }
5412 
5413   return TRUE;
5414 }
5415 
5416 
5417 
5418 static void
write_triple(rdf_db * db,IOSTREAM * out,triple * t,save_context * ctx)5419 write_triple(rdf_db *db, IOSTREAM *out, triple *t, save_context *ctx)
5420 { Sputc('T', out);
5421 
5422   save_atom(db, out, ID_ATOM(t->subject_id), ctx);
5423   save_predicate(db, out, t->predicate.r, ctx);
5424 
5425   if ( t->object_is_literal )
5426   { save_literal(db, out, t->object.literal, ctx);
5427   } else
5428   { Sputc('R', out);
5429     save_atom(db, out, t->object.resource, ctx);
5430   }
5431 
5432   save_atom(db, out, ID_ATOM(t->graph_id), ctx);
5433   save_int(out, t->line);
5434 }
5435 
5436 
5437 static void
write_source(rdf_db * db,IOSTREAM * out,atom_t src,save_context * ctx)5438 write_source(rdf_db *db, IOSTREAM *out, atom_t src, save_context *ctx)
5439 { graph *s = existing_graph(db, src);
5440 
5441   if ( s && s->source )
5442   { Sputc('F', out);
5443     save_atom(db, out, s->source, ctx);
5444     Sputc('t', out);
5445     save_double(out, s->modified);
5446   }
5447 }
5448 
5449 
5450 static void
write_md5(rdf_db * db,IOSTREAM * out,atom_t src)5451 write_md5(rdf_db *db, IOSTREAM *out, atom_t src)
5452 { graph *s = existing_graph(db, src);
5453 
5454   if ( s )
5455   { md5_byte_t *p = s->digest;
5456     int i;
5457 
5458     Sputc('M', out);
5459     for(i=0; i<16; i++)
5460       Sputc(*p++, out);
5461   }
5462 }
5463 
5464 
5465 static int
save_db(query * q,IOSTREAM * out,atom_t src,int version)5466 save_db(query *q, IOSTREAM *out, atom_t src, int version)
5467 { rdf_db *db = q->db;
5468   triple *t, p;
5469   save_context ctx;
5470   triple_walker tw;
5471 
5472   memset(&p, 0, sizeof(p));
5473   init_saved(db, &ctx, version);
5474 
5475   Sfprintf(out, "%s", SAVE_MAGIC);
5476   save_int(out, version);
5477   if ( src )
5478   { Sputc('S', out);			/* start of graph header */
5479     save_atom(db, out, src, &ctx);
5480     write_source(db, out, src, &ctx);
5481     write_md5(db, out, src);
5482     p.graph_id = ATOM_ID(src);
5483     p.indexed = BY_G;
5484   } else
5485   { p.indexed = BY_NONE;
5486   }
5487   if ( Sferror(out) )
5488     return FALSE;
5489 
5490   init_triple_walker(&tw, db, &p, p.indexed);
5491   while((t=next_triple(&tw)))
5492   { triple *t2;
5493 
5494     if ( (t2=alive_triple(q, t)) &&
5495 	 (!src || ID_ATOM(t2->graph_id) == src) )
5496     { write_triple(db, out, t2, &ctx);
5497       if ( Sferror(out) )
5498 	return FALSE;
5499     }
5500   }
5501   Sputc('E', out);
5502   if ( Sferror(out) )
5503     return FALSE;
5504 
5505   destroy_saved(db, &ctx);
5506 
5507   return TRUE;
5508 }
5509 
5510 
5511 static foreign_t
rdf_save_db(term_t stream,term_t graph,term_t version)5512 rdf_save_db(term_t stream, term_t graph, term_t version)
5513 { rdf_db *db = rdf_current_db();
5514   query *q;
5515   IOSTREAM *out;
5516   atom_t src;
5517   int rc;
5518   int v;
5519 
5520   if ( !PL_get_stream_handle(stream, &out) )
5521     return PL_type_error("stream", stream);
5522   if ( !get_atom_or_var_ex(graph, &src) )
5523     return FALSE;
5524   if ( !PL_get_integer(version, &v) )
5525     return FALSE;
5526   if ( v < 2 || v > 3 )
5527     return PL_domain_error("rdf_db_save_version", version);
5528 
5529   if ( (q = open_query(db)) )
5530   { rc = save_db(q, out, src, v);
5531     close_query(q);
5532     return rc;
5533   } else
5534     return FALSE;
5535 }
5536 
5537 
5538 static int64_t
load_int(IOSTREAM * fd)5539 load_int(IOSTREAM *fd)
5540 { int64_t first = Sgetc(fd);
5541   int bytes, shift, b;
5542 
5543   if ( !(first & 0xc0) )		/* 99% of them: speed up a bit */
5544   { first <<= (INT64BITSIZE-6);
5545     first >>= (INT64BITSIZE-6);
5546 
5547     return first;
5548   }
5549 
5550   bytes = (int) ((first >> 6) & 0x3);
5551   first &= 0x3f;
5552 
5553   if ( bytes <= 2 )
5554   { for( b = 0; b < bytes; b++ )
5555     { first <<= 8;
5556       first |= Sgetc(fd) & 0xff;
5557     }
5558 
5559     shift = (sizeof(first)-1-bytes)*8 + 2;
5560   } else
5561   { int m;
5562 
5563     bytes = (int)first;
5564     first = 0L;
5565 
5566     for(m=0; m<bytes; m++)
5567     { first <<= 8;
5568       first |= Sgetc(fd) & 0xff;
5569     }
5570     shift = (sizeof(first)-bytes)*8;
5571   }
5572 
5573   first <<= shift;
5574   first >>= shift;
5575 
5576   return first;
5577 }
5578 
5579 
5580 static int
load_double(IOSTREAM * fd,double * fp)5581 load_double(IOSTREAM *fd, double *fp)
5582 { double f;
5583   unsigned char *cl = (unsigned char *)&f;
5584   unsigned int i;
5585 
5586   for(i=0; i<BYTES_PER_DOUBLE; i++)
5587   { int c = Sgetc(fd);
5588 
5589     if ( c == -1 )
5590     { *fp = 0.0;
5591       return FALSE;
5592     }
5593     cl[double_byte_order[i]] = c;
5594   }
5595 
5596   *fp = f;
5597   return TRUE;
5598 }
5599 
5600 
5601 typedef struct ld_array
5602 { size_t	loaded_id;
5603   size_t	allocated_size;
5604   void	      **loaded_objects;
5605 } ld_array;
5606 
5607 typedef struct ld_context
5608 { ld_array	atoms;
5609   ld_array	predicates;
5610   ld_array	literals;
5611   atom_t	graph_name;		/* for single-graph files */
5612   graph	       *graph;
5613   atom_t	graph_source;
5614   double	modified;
5615   int		has_digest;
5616   int		version;
5617   md5_byte_t    digest[16];
5618   atomset       graph_table;		/* multi-graph file */
5619   triple_buffer	triples;
5620 } ld_context;
5621 
5622 
5623 static int
add_object(rdf_db * db,void * obj,ld_array * ar)5624 add_object(rdf_db *db, void *obj, ld_array *ar)
5625 { if ( ar->loaded_id >= ar->allocated_size )
5626   { if ( ar->allocated_size == 0 )
5627     { ar->allocated_size = 1024;
5628       ar->loaded_objects = malloc(sizeof(void*)*ar->allocated_size);
5629     } else
5630     { size_t  bytes;
5631       void *new;
5632 
5633       ar->allocated_size *= 2;
5634       bytes = sizeof(void*)*ar->allocated_size;
5635       if ( (new = realloc(ar->loaded_objects, bytes)) )
5636 	ar->loaded_objects = new;
5637       else
5638 	return FALSE;
5639     }
5640   }
5641 
5642   ar->loaded_objects[ar->loaded_id++] = obj;
5643   return TRUE;
5644 }
5645 
5646 static int
add_atom(rdf_db * db,atom_t a,ld_context * ctx)5647 add_atom(rdf_db *db, atom_t a, ld_context *ctx)
5648 { return add_object(db, (void*)a, &ctx->atoms);
5649 }
5650 
5651 static atom_t
fetch_atom(ld_context * ctx,size_t idx)5652 fetch_atom(ld_context *ctx, size_t idx)
5653 { if ( idx < ctx->atoms.loaded_id )
5654     return (atom_t)ctx->atoms.loaded_objects[idx];
5655 
5656   return (atom_t)0;
5657 }
5658 
5659 static atom_t
load_atom(rdf_db * db,IOSTREAM * in,ld_context * ctx)5660 load_atom(rdf_db *db, IOSTREAM *in, ld_context *ctx)
5661 { switch(Sgetc(in))
5662   { case 'X':
5663     { size_t idx = (size_t)load_int(in);
5664       return fetch_atom(ctx, idx);
5665     }
5666     case 'A':
5667     { size_t len = (size_t)load_int(in);
5668       atom_t a;
5669 
5670       if ( len < 1024 )
5671       { char buf[1024];
5672 	Sfread(buf, 1, len, in);
5673 	a = PL_new_atom_nchars(len, buf);
5674       } else
5675       { char *buf = rdf_malloc(db, len);
5676 	Sfread(buf, 1, len, in);
5677 	a = PL_new_atom_nchars(len, buf);
5678 	rdf_free(db, buf, len);
5679       }
5680 
5681       add_atom(db, a, ctx);
5682       return a;
5683     }
5684     case 'W':
5685     { int len = (int)load_int(in);
5686       atom_t a;
5687       wchar_t buf[1024];
5688       wchar_t *w;
5689       IOENC enc = in->encoding;
5690       int i;
5691 
5692       if ( len < 1024 )
5693 	w = buf;
5694       else
5695 	w = rdf_malloc(db, len*sizeof(wchar_t));
5696 
5697       in->encoding = ENC_UTF8;
5698       for(i=0; i<len; i++)
5699       { w[i] = Sgetcode(in);
5700 	SECURE(assert(w[i]>=0 && w[i] <= 0x10ffff));
5701       }
5702       in->encoding = enc;
5703 
5704       a = PL_new_atom_wchars(len, w);
5705       if ( w != buf )
5706 	rdf_free(db, w, len*sizeof(wchar_t));
5707 
5708       add_atom(db, a, ctx);
5709       return a;
5710     }
5711     default:
5712     { assert(0);
5713       return 0;
5714     }
5715   }
5716 }
5717 
5718 
5719 static int
add_predicate(rdf_db * db,predicate * p,ld_context * ctx)5720 add_predicate(rdf_db *db, predicate *p, ld_context *ctx)
5721 { return add_object(db, p, &ctx->predicates);
5722 }
5723 
5724 static predicate *
fetch_predicate(ld_context * ctx,size_t idx)5725 fetch_predicate(ld_context *ctx, size_t idx)
5726 { if ( idx < ctx->predicates.loaded_id )
5727     return ctx->predicates.loaded_objects[idx];
5728 
5729   return NULL;
5730 }
5731 
5732 static predicate *
load_predicate(rdf_db * db,IOSTREAM * in,ld_context * ctx)5733 load_predicate(rdf_db *db, IOSTREAM *in, ld_context *ctx)
5734 { switch(Sgetc(in))
5735   { case 'X':
5736     { size_t idx = (size_t)load_int(in);
5737       return fetch_predicate(ctx, idx);
5738     }
5739     case 'P':
5740     { atom_t a;
5741 
5742       if ( (a=load_atom(db, in, ctx)) )
5743       { predicate *p;
5744 
5745 	if ( (p=lookup_predicate(db, a)) &&
5746 	     add_predicate(db, p, ctx) )
5747 	  return p;
5748       }
5749       return NULL;			/* no memory */
5750     }
5751     default:
5752       assert(0);
5753       return NULL;
5754   }
5755 }
5756 
5757 
5758 static int
add_literal(rdf_db * db,literal * lit,ld_context * ctx)5759 add_literal(rdf_db *db, literal *lit, ld_context *ctx)
5760 { return add_object(db, lit, &ctx->literals);
5761 }
5762 
5763 static literal *
fetch_literal(ld_context * ctx,size_t idx)5764 fetch_literal(ld_context *ctx, size_t idx)
5765 { if ( idx < ctx->literals.loaded_id )
5766     return ctx->literals.loaded_objects[idx];
5767 
5768   return NULL;
5769 }
5770 
5771 static literal *
load_literal(rdf_db * db,IOSTREAM * in,ld_context * ctx,int c)5772 load_literal(rdf_db *db, IOSTREAM *in, ld_context *ctx, int c)
5773 { literal *lit;
5774 
5775   if ( c == 'X' && ctx->version >= 3 )
5776   { size_t idx = (size_t)load_int(in);
5777     lit = fetch_literal(ctx, idx);
5778     simpleMutexLock(&db->locks.literal);
5779     lit->references++;
5780     assert(lit->references != 0);
5781     simpleMutexUnlock(&db->locks.literal);
5782   } else if ( (lit=new_literal(db)) )
5783   {
5784   value:
5785     switch(c)
5786     { case 'L':
5787 	lit->objtype = OBJ_STRING;
5788 	lit->value.string = load_atom(db, in, ctx);
5789 	break;
5790       case 'I':
5791 	lit->objtype = OBJ_INTEGER;
5792 	lit->value.integer = load_int(in);
5793 	break;
5794       case 'F':
5795 	lit->objtype = OBJ_DOUBLE;
5796         load_double(in, &lit->value.real);
5797 	break;
5798       case 'T':
5799       { unsigned int i;
5800 	char *s;
5801 
5802 	lit->objtype = OBJ_TERM;
5803 	lit->value.term.len = (size_t)load_int(in);
5804 	lit->value.term.record = rdf_malloc(db, lit->value.term.len);
5805 	lit->term_loaded = TRUE;	/* see free_literal() */
5806 	s = (char *)lit->value.term.record;
5807 
5808 	for(i=0; i<lit->value.term.len; i++)
5809 	  s[i] = Sgetc(in);
5810 
5811 	break;
5812       }
5813       case 'l':
5814 	lit->qualifier = Q_LANG;
5815 	lit->type_or_lang = ATOM_ID(load_atom(db, in, ctx));
5816 	c = Sgetc(in);
5817 	goto value;
5818       case 't':
5819 	lit->qualifier = Q_TYPE;
5820 	lit->type_or_lang = ATOM_ID(load_atom(db, in, ctx));
5821 	c = Sgetc(in);
5822 	goto value;
5823       default:
5824 	assert(0);
5825         return NULL;
5826     }
5827 
5828     if ( ctx->version >= 3 )
5829     { lock_atoms_literal(lit);
5830       lit = share_literal(db, lit);
5831 
5832       add_literal(db, lit, ctx);
5833     }
5834   }
5835 
5836   return lit;
5837 }
5838 
5839 
5840 static triple *
load_triple(rdf_db * db,IOSTREAM * in,ld_context * ctx)5841 load_triple(rdf_db *db, IOSTREAM *in, ld_context *ctx)
5842 { triple *t = new_triple(db);
5843   int c;
5844 
5845   t->subject_id = ATOM_ID(load_atom(db, in, ctx));
5846   if ( ctx->version < 3 )
5847   { t->resolve_pred = TRUE;
5848     t->predicate.u = load_atom(db, in, ctx);
5849   } else
5850   { t->predicate.r = load_predicate(db, in, ctx);
5851   }
5852   if ( (c=Sgetc(in)) == 'R' )
5853   { t->object.resource = load_atom(db, in, ctx);
5854   } else
5855   { t->object_is_literal = TRUE;
5856     t->object.literal = load_literal(db, in, ctx, c);
5857   }
5858   t->graph_id = ATOM_ID(load_atom(db, in, ctx));
5859   t->line  = (unsigned long)load_int(in);
5860   if ( !ctx->graph )
5861     add_atomset(&ctx->graph_table, ID_ATOM(t->graph_id));
5862 
5863   return t;
5864 }
5865 
5866 
5867 static int
load_magic(IOSTREAM * in)5868 load_magic(IOSTREAM *in)
5869 { char *s = SAVE_MAGIC;
5870 
5871   for( ; *s; s++)
5872   { if ( Sgetc(in) != *s )
5873       return FALSE;
5874   }
5875 
5876   return TRUE;
5877 }
5878 
5879 
5880 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5881 Note that we have two types  of   saved  states.  One holding many named
5882 graphs and one holding the content of exactly one named graph.
5883 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
5884 
5885 static int
load_db(rdf_db * db,IOSTREAM * in,ld_context * ctx)5886 load_db(rdf_db *db, IOSTREAM *in, ld_context *ctx)
5887 { int c;
5888 
5889   if ( !load_magic(in) )
5890     return FALSE;
5891   ctx->version = (int)load_int(in);
5892   if ( ctx->version < 2 || ctx->version > 3 )
5893   { term_t v = PL_new_term_ref();
5894 
5895     if ( PL_put_integer(v, ctx->version) )
5896       return PL_domain_error("rdf_db_save_version", v);
5897     else
5898       return FALSE;
5899   }
5900 
5901   while((c=Sgetc(in)) != EOF)
5902   { switch(c)
5903     { case 'T':
5904       { triple *t;
5905 
5906 	if ( !(t=load_triple(db, in, ctx)) )
5907 	  return FALSE;
5908 	t->loaded = TRUE;
5909 	buffer_triple(&ctx->triples, t);
5910         break;
5911       }
5912 					/* file holding exactly one graph */
5913       case 'S':				/* name of the graph */
5914       { ctx->graph_name = load_atom(db, in, ctx);
5915         break;
5916       }
5917       case 'M':				/* MD5 of the graph */
5918       { int i;
5919 
5920 	for(i=0; i<16; i++)
5921 	  ctx->digest[i] = Sgetc(in);
5922 	ctx->has_digest = TRUE;
5923 
5924 	break;
5925       }
5926       case 'F':				/* file of the graph */
5927 	ctx->graph_source = load_atom(db, in, ctx);
5928 	break;				/* end of one-graph handling */
5929       case 't':
5930 	load_double(in, &ctx->modified);
5931         break;
5932       case 'E':				/* end of file */
5933 	return TRUE;
5934       default:
5935 	break;
5936     }
5937   }
5938 
5939   return PL_warning("Illegal RDF triple file");
5940 }
5941 
5942 
5943 static int
prepare_loaded_triples(rdf_db * db,ld_context * ctx)5944 prepare_loaded_triples(rdf_db *db, ld_context *ctx)
5945 { triple **t;
5946 
5947   if ( ctx->graph_name )			/* lookup named graph */
5948   { ctx->graph = lookup_graph(db, ctx->graph_name);
5949     if ( ctx->graph_source && ctx->graph->source != ctx->graph_source )
5950     { if ( ctx->graph->source )
5951 	PL_unregister_atom(ctx->graph->source);
5952       ctx->graph->source = ctx->graph_source;
5953       PL_register_atom(ctx->graph->source);
5954       ctx->graph->modified = ctx->modified;
5955     }
5956 
5957     if ( ctx->has_digest )
5958     { if ( ctx->graph->md5 )
5959       { ctx->graph->md5 = FALSE;		/* kill repetitive MD5 update */
5960       } else
5961       { ctx->has_digest = FALSE;
5962       }
5963     }
5964   } else
5965   { ctx->graph = NULL;
5966   }
5967 
5968   for(t=ctx->triples.base; t<ctx->triples.top; t++)
5969     lock_atoms(db, *t);
5970 
5971   return TRUE;
5972 }
5973 
5974 
5975 static void
destroy_load_context(rdf_db * db,ld_context * ctx,int delete_triples)5976 destroy_load_context(rdf_db *db, ld_context *ctx, int delete_triples)
5977 { if ( delete_triples )
5978   { triple **tp;
5979 
5980     for(tp=ctx->triples.base;
5981 	tp<ctx->triples.top;
5982 	tp++)
5983     { triple *t = *tp;
5984 
5985       free_triple(db, t, FALSE);
5986     }
5987   }
5988 
5989   free_triple_buffer(&ctx->triples);
5990 
5991   if ( ctx->atoms.loaded_objects )
5992   { atom_t *ap, *ep;
5993 
5994     for( ap=(atom_t*)ctx->atoms.loaded_objects, ep=ap+ctx->atoms.loaded_id;
5995 	 ap<ep;
5996 	 ap++)
5997     { PL_unregister_atom(*ap);
5998     }
5999 
6000     free(ctx->atoms.loaded_objects);
6001   }
6002   if ( ctx->predicates.loaded_objects )
6003     free(ctx->predicates.loaded_objects);
6004   if ( ctx->literals.loaded_objects )
6005     free(ctx->literals.loaded_objects);
6006 }
6007 
6008 typedef struct
6009 { term_t tail;
6010   term_t head;
6011 } add_graph_context;
6012 
6013 static int
append_graph_to_list(atom_t graph,void * closure)6014 append_graph_to_list(atom_t graph, void *closure)
6015 { add_graph_context *ctx = closure;
6016 
6017   return ( PL_unify_list(ctx->tail, ctx->head, ctx->tail) &&
6018 	   PL_unify_atom(ctx->head, graph)
6019 	 );
6020 }
6021 
6022 
6023 static foreign_t
rdf_load_db(term_t stream,term_t id,term_t graphs)6024 rdf_load_db(term_t stream, term_t id, term_t graphs)
6025 { ld_context ctx;
6026   rdf_db *db = rdf_current_db();
6027   IOSTREAM *in;
6028   int rc;
6029   term_t ba_arg2;
6030 
6031   if ( !(ba_arg2 = PL_new_term_ref()) )
6032     return FALSE;
6033 
6034   if ( !PL_get_stream_handle(stream, &in) )
6035     return PL_type_error("stream", stream);
6036 
6037   memset(&ctx, 0, sizeof(ctx));
6038   init_atomset(&ctx.graph_table);
6039   init_triple_buffer(&ctx.triples);
6040   rc = load_db(db, in, &ctx);
6041   PL_release_stream(in);
6042 
6043   if ( !rc ||
6044        !PL_put_atom(ba_arg2, ATOM_begin) ||
6045        !rdf_broadcast(EV_LOAD, (void*)id, (void*)ba_arg2) )
6046   { destroy_load_context(db, &ctx, TRUE);
6047     return FALSE;
6048   }
6049 
6050   if ( (rc=prepare_loaded_triples(db, &ctx)) )
6051   { add_graph_context gctx;
6052 
6053     gctx.tail = PL_copy_term_ref(graphs);
6054     gctx.head = PL_new_term_ref();
6055 
6056     rc = ( for_atomset(&ctx.graph_table, append_graph_to_list, &gctx) &&
6057 	   PL_unify_nil(gctx.tail) );
6058 
6059     destroy_atomset(&ctx.graph_table);
6060   }
6061 
6062   if ( rc )
6063   { query *q;
6064 
6065     if ( (q=open_query(db)) )
6066     { add_triples(q, ctx.triples.base, ctx.triples.top - ctx.triples.base);
6067       close_query(q);
6068     } else
6069     { goto error;
6070     }
6071     if ( ctx.graph )
6072     { if ( ctx.has_digest )
6073       { sum_digest(ctx.graph->digest, ctx.digest);
6074 	ctx.graph->md5 = TRUE;
6075       }
6076       clear_modified(ctx.graph);
6077     }
6078     if ( (rc=PL_cons_functor(ba_arg2, FUNCTOR_end1, graphs)) )
6079       rc = rdf_broadcast(EV_LOAD, (void*)id, (void*)ba_arg2);
6080     destroy_load_context(db, &ctx, FALSE);
6081 
6082     return rc;
6083   }
6084 
6085 error:
6086   rdf_broadcast(EV_LOAD, (void*)id, (void*)ATOM_error);
6087   destroy_load_context(db, &ctx, TRUE);
6088   return FALSE;
6089 }
6090 
6091 
6092 #ifdef WITH_MD5
6093 		 /*******************************
6094 		 *	     MD5 SUPPORT	*
6095 		 *******************************/
6096 
6097 /* md5_type is used to keep the MD5 independent from the internal
6098    numbers
6099 */
6100 static const char md5_type[] =
6101 { 0x0,					/* OBJ_UNKNOWN */
6102   0x3,					/* OBJ_INTEGER */
6103   0x4,					/* OBJ_DOUBLE */
6104   0x2,					/* OBJ_STRING */
6105   0x5					/* OBJ_TERM */
6106 };
6107 
6108 static void
md5_triple(triple * t,md5_byte_t * digest)6109 md5_triple(triple *t, md5_byte_t *digest)
6110 { md5_state_t state;
6111   size_t len;
6112   md5_byte_t tmp[2];
6113   const char *s;
6114   literal *lit;
6115 
6116   md5_init(&state);
6117   s = PL_blob_data(ID_ATOM(t->subject_id), &len, NULL);
6118   md5_append(&state, (const md5_byte_t *)s, (int)len);
6119   md5_append(&state, (const md5_byte_t *)"P", 1);
6120   s = PL_blob_data(t->predicate.r->name, &len, NULL);
6121   md5_append(&state, (const md5_byte_t *)s, (int)len);
6122   tmp[0] = 'O';
6123   if ( t->object_is_literal )
6124   { lit = t->object.literal;
6125     tmp[1] = md5_type[lit->objtype];
6126 
6127     switch(lit->objtype)
6128     { case OBJ_STRING:
6129 	s = PL_blob_data(lit->value.string, &len, NULL);
6130 	break;
6131       case OBJ_INTEGER:			/* TBD: byte order issues */
6132 	s = (const char *)&lit->value.integer;
6133 	len = sizeof(lit->value.integer);
6134 	break;
6135       case OBJ_DOUBLE:
6136 	s = (const char *)&lit->value.real;
6137 	len = sizeof(lit->value.real);
6138 	break;
6139       case OBJ_TERM:
6140 	s = (const char *)lit->value.term.record;
6141 	len = lit->value.term.len;
6142 	break;
6143       default:
6144 	assert(0);
6145     }
6146   } else
6147   { s = PL_blob_data(t->object.resource, &len, NULL);
6148     tmp[1] = 0x1;			/* old OBJ_RESOURCE */
6149     lit = NULL;
6150   }
6151   md5_append(&state, tmp, 2);
6152   md5_append(&state, (const md5_byte_t *)s, (int)len);
6153   if ( lit && lit->qualifier )
6154   { assert(lit->type_or_lang);
6155     md5_append(&state,
6156 	       (const md5_byte_t *)(lit->qualifier == Q_LANG ? "l" : "t"),
6157 	       1);
6158     s = PL_blob_data(ID_ATOM(lit->type_or_lang), &len, NULL);
6159     md5_append(&state, (const md5_byte_t *)s, (int)len);
6160   }
6161   if ( t->graph_id )
6162   { md5_append(&state, (const md5_byte_t *)"S", 1);
6163     s = PL_blob_data(ID_ATOM(t->graph_id), &len, NULL);
6164     md5_append(&state, (const md5_byte_t *)s, (int)len);
6165   }
6166 
6167   md5_finish(&state, digest);
6168 }
6169 
6170 
6171 static void
sum_digest(md5_byte_t * digest,md5_byte_t * add)6172 sum_digest(md5_byte_t *digest, md5_byte_t *add)
6173 { md5_byte_t *p, *q;
6174   int n;
6175 
6176   for(p=digest, q=add, n=16; --n>=0; )
6177     *p++ += *q++;
6178 }
6179 
6180 
6181 static void
dec_digest(md5_byte_t * digest,md5_byte_t * add)6182 dec_digest(md5_byte_t *digest, md5_byte_t *add)
6183 { md5_byte_t *p, *q;
6184   int n;
6185 
6186   for(p=digest, q=add, n=16; --n>=0; )
6187     *p++ -= *q++;
6188 }
6189 
6190 
6191 static int
md5_unify_digest(term_t t,md5_byte_t digest[16])6192 md5_unify_digest(term_t t, md5_byte_t digest[16])
6193 { char hex_output[16*2];
6194   int di;
6195   char *pi;
6196   static char hexd[] = "0123456789abcdef";
6197 
6198   for(pi=hex_output, di = 0; di < 16; ++di)
6199   { *pi++ = hexd[(digest[di] >> 4) & 0x0f];
6200     *pi++ = hexd[digest[di] & 0x0f];
6201   }
6202 
6203   return PL_unify_atom_nchars(t, 16*2, hex_output);
6204 }
6205 
6206 
6207 static foreign_t
rdf_md5(term_t graph_name,term_t md5)6208 rdf_md5(term_t graph_name, term_t md5)
6209 { atom_t src;
6210   int rc;
6211   rdf_db *db = rdf_current_db();
6212 
6213   if ( !get_atom_or_var_ex(graph_name, &src) )
6214     return FALSE;
6215 
6216   if ( src )
6217   { graph *s;
6218 
6219     if ( (s = existing_graph(db, src)) && !s->erased )
6220     { rc = md5_unify_digest(md5, s->digest);
6221     } else
6222     { md5_byte_t digest[16];
6223 
6224       memset(digest, 0, sizeof(digest));
6225       rc = md5_unify_digest(md5, digest);
6226     }
6227   } else
6228   { md5_byte_t digest[16];
6229     int i;
6230 
6231     memset(&digest, 0, sizeof(digest));
6232     for(i=0; i<db->graphs.bucket_count; i++)
6233     { graph *g = db->graphs.blocks[MSB(i)][i];
6234 
6235       for( ; g; g = g->next )
6236 	sum_digest(digest, g->digest);
6237     }
6238 
6239     return md5_unify_digest(md5, digest);
6240   }
6241 
6242   return rc;
6243 }
6244 
6245 
6246 static foreign_t
rdf_atom_md5(term_t text,term_t times,term_t md5)6247 rdf_atom_md5(term_t text, term_t times, term_t md5)
6248 { char *s;
6249   int n, i;
6250   size_t len;
6251   md5_byte_t digest[16];
6252 
6253   if ( !PL_get_nchars(text, &len, &s, CVT_ALL|REP_UTF8|CVT_EXCEPTION) )
6254     return FALSE;
6255   if ( !PL_get_integer_ex(times, &n) )
6256     return FALSE;
6257   if ( n < 1 )
6258     return PL_domain_error("positive_integer", times);
6259 
6260   for(i=0; i<n; i++)
6261   { md5_state_t state;
6262     md5_init(&state);
6263     md5_append(&state, (const md5_byte_t *)s, (int)len);
6264     md5_finish(&state, digest);
6265     s = (char *)digest;
6266     len = sizeof(digest);
6267   }
6268 
6269   return md5_unify_digest(md5, digest);
6270 }
6271 
6272 
6273 
6274 #endif /*WITH_MD5*/
6275 
6276 
6277 		 /*******************************
6278 		 *	       ATOMS		*
6279 		 *******************************/
6280 
6281 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6282 Lock atoms in triple against AGC. Note that the predicate name is locked
6283 in the predicate structure.
6284 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6285 
6286 static void
lock_atoms(rdf_db * db,triple * t)6287 lock_atoms(rdf_db *db, triple *t)
6288 { if ( !t->atoms_locked )
6289   { t->atoms_locked = TRUE;
6290 
6291     register_resource(&db->resources, ID_ATOM(t->subject_id));
6292     if ( t->object_is_literal )
6293     { lock_atoms_literal(t->object.literal);
6294     } else
6295     { register_resource(&db->resources, t->object.resource);
6296     }
6297   }
6298 }
6299 
6300 
6301 static void
unlock_atoms(rdf_db * db,triple * t)6302 unlock_atoms(rdf_db *db, triple *t)
6303 { if ( t->atoms_locked )
6304   { t->atoms_locked = FALSE;
6305 
6306     unregister_resource(&db->resources, ID_ATOM(t->subject_id));
6307     if ( t->object_is_literal )
6308     { if ( !t->object.literal->shared )
6309 	unlock_atoms_literal(t->object.literal);
6310     } else
6311     { unregister_resource(&db->resources, t->object.resource);
6312     }
6313   }
6314 }
6315 
6316 
6317 		 /*******************************
6318 		 *      PROLOG CONVERSION	*
6319 		 *******************************/
6320 
6321 #define LIT_TYPED	0x1
6322 #define LIT_NOERROR	0x2
6323 #define LIT_PARTIAL	0x4
6324 
6325 static int
get_lit_atom_ex(term_t t,atom_t * a,int flags)6326 get_lit_atom_ex(term_t t, atom_t *a, int flags)
6327 { if ( PL_get_atom(t, a) )
6328     return TRUE;
6329   if ( (flags & LIT_PARTIAL) && PL_is_variable(t) )
6330   { *a = 0L;
6331     return TRUE;
6332   }
6333 
6334   return PL_type_error("atom", t);
6335 }
6336 
6337 
6338 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6339 get_literal() processes the argument  of  a   literal/1  term  passes as
6340 object.
6341 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6342 
6343 static int
get_literal(rdf_db * db,term_t litt,literal * lit,int flags)6344 get_literal(rdf_db *db, term_t litt, literal *lit, int flags)
6345 { if ( PL_get_atom(litt, &lit->value.string) )
6346   { lit->objtype = OBJ_STRING;
6347   } else if ( PL_is_integer(litt) && PL_get_int64(litt, &lit->value.integer) )
6348   { lit->objtype = OBJ_INTEGER;
6349   } else if ( PL_get_float(litt, &lit->value.real) )
6350   { lit->objtype = OBJ_DOUBLE;
6351   } else if ( PL_is_functor(litt, FUNCTOR_lang2) )
6352   { term_t a = PL_new_term_ref();
6353     atom_t tol;
6354 
6355     _PL_get_arg(1, litt, a);
6356     if ( !get_lit_atom_ex(a, &tol, flags) )
6357       return FALSE;
6358     lit->type_or_lang = ATOM_ID(tol);
6359     _PL_get_arg(2, litt, a);
6360     if ( !get_lit_atom_ex(a, &lit->value.string, flags) )
6361       return FALSE;
6362 
6363     lit->qualifier = Q_LANG;
6364     lit->objtype = OBJ_STRING;
6365   } else if ( PL_is_functor(litt, FUNCTOR_type2) &&
6366 	      !(flags & LIT_TYPED) )	/* avoid recursion */
6367   { term_t a = PL_new_term_ref();
6368     atom_t tol;
6369 
6370     _PL_get_arg(1, litt, a);
6371     if ( !get_lit_atom_ex(a, &tol, flags) )
6372       return FALSE;
6373     lit->type_or_lang = ATOM_ID(tol);
6374     lit->qualifier = Q_TYPE;
6375     _PL_get_arg(2, litt, a);
6376 
6377     return get_literal(db, a, lit, LIT_TYPED|flags);
6378   } else if ( !PL_is_ground(litt) )
6379   { if ( !(flags & LIT_PARTIAL) )
6380       return PL_type_error("rdf_object", litt);
6381     if ( !PL_is_variable(litt) )
6382       lit->objtype = OBJ_TERM;
6383   } else
6384   { lit->value.term.record = PL_record_external(litt, &lit->value.term.len);
6385     lit->objtype = OBJ_TERM;
6386   }
6387 
6388   return TRUE;
6389 }
6390 
6391 
6392 static int
get_object(rdf_db * db,term_t object,triple * t)6393 get_object(rdf_db *db, term_t object, triple *t)
6394 { if ( PL_get_atom(object, &t->object.resource) )
6395   { assert(!t->object_is_literal);
6396   } else if ( PL_is_functor(object, FUNCTOR_literal1) )
6397   { term_t a = PL_new_term_ref();
6398 
6399     _PL_get_arg(1, object, a);
6400     alloc_literal_triple(db, t);
6401     return get_literal(db, a, t->object.literal, 0);
6402   } else if ( get_prefixed_iri(db, object, &t->object.resource) )
6403   { assert(!t->object_is_literal);
6404   } else
6405     return PL_type_error("rdf_object", object);
6406 
6407   return TRUE;
6408 }
6409 
6410 
6411 static int
get_src(term_t src,triple * t)6412 get_src(term_t src, triple *t)
6413 { if ( src && !PL_is_variable(src) )
6414   { atom_t at;
6415 
6416     if ( PL_get_atom(src, &at) )
6417     { t->graph_id = ATOM_ID(at);
6418       t->line = NO_LINE;
6419     } else if ( PL_is_functor(src, FUNCTOR_colon2) )
6420     { term_t a = PL_new_term_ref();
6421       long line;
6422 
6423       _PL_get_arg(1, src, a);
6424       if ( !get_atom_or_var_ex(a, &at) )
6425 	return FALSE;
6426       t->graph_id = ATOM_ID(at);
6427       _PL_get_arg(2, src, a);
6428       if ( PL_get_long(a, &line) )
6429 	t->line = line;
6430       else if ( !PL_is_variable(a) )
6431 	return PL_type_error("integer", a);
6432     } else
6433       return PL_type_error("rdf_graph", src);
6434   }
6435 
6436   return TRUE;
6437 }
6438 
6439 
6440 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6441 Return values:
6442 	-1: exception
6443 	 0: no predicate
6444 	 1: the predicate
6445 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6446 
6447 static int
get_existing_predicate(rdf_db * db,term_t t,predicate ** p)6448 get_existing_predicate(rdf_db *db, term_t t, predicate **p)
6449 { atom_t name;
6450 
6451   if ( !PL_get_atom(t, &name ) )
6452   { if ( PL_is_functor(t, FUNCTOR_literal1) )
6453       return 0;				/* rdf(_, literal(_), _) */
6454     if ( get_prefixed_iri(db, t, &name) )
6455       goto ok;
6456     PL_type_error("rdf_predicate", t);
6457     return -1;
6458   }
6459 
6460 ok:
6461   if ( (*p = existing_predicate(db, name)) )
6462     return 1;
6463 
6464   DEBUG(5, Sdprintf("No predicate %s\n", PL_atom_chars(name)));
6465   return 0;				/* no predicate */
6466 }
6467 
6468 
6469 static int
get_predicate(rdf_db * db,term_t t,predicate ** p,query * q)6470 get_predicate(rdf_db *db, term_t t, predicate **p, query *q)
6471 { atom_t name;
6472 
6473   if ( !get_iri_ex(db, t, &name ) )
6474     return FALSE;
6475 
6476   *p = lookup_predicate(db, name);
6477   return TRUE;
6478 }
6479 
6480 
6481 static int
get_triple(rdf_db * db,term_t subject,term_t predicate,term_t object,triple * t,query * q)6482 get_triple(rdf_db *db,
6483 	   term_t subject, term_t predicate, term_t object,
6484 	   triple *t, query *q)
6485 { atom_t at;
6486 
6487   if ( !get_iri_ex(db, subject, &at) ||
6488        !get_predicate(db, predicate, &t->predicate.r, q) ||
6489        !get_object(db, object, t) )
6490     return FALSE;
6491 
6492   t->subject_id = ATOM_ID(at);
6493 
6494   return TRUE;
6495 }
6496 
6497 
6498 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6499 get_partial_triple() creates a triple  for   matching  purposes.  It can
6500 return FALSE for  two  reasons.  Mostly   (type)  errors,  but  also  if
6501 resources are accessed that do not   exist  and therefore the subsequent
6502 matching will always fail. This  is   notably  the  case for predicates,
6503 which are first class citizens to this library.
6504 
6505 Return values:
6506 	1: ok
6507 	0: no predicate
6508        -1: error
6509 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6510 
6511 static int
get_partial_triple(rdf_db * db,term_t subject,term_t predicate,term_t object,term_t src,triple * t)6512 get_partial_triple(rdf_db *db,
6513 		   term_t subject, term_t predicate, term_t object,
6514 		   term_t src, triple *t)
6515 { int rc;
6516   int ipat = 0;
6517 
6518   if ( subject )
6519   { atom_t at;
6520 
6521     if ( !get_resource_or_var_ex(db, subject, &at) )
6522       return FALSE;
6523     t->subject_id = ATOM_ID(at);
6524   }
6525   if ( !PL_is_variable(predicate) &&
6526        (rc=get_existing_predicate(db, predicate, &t->predicate.r)) != 1 )
6527     return rc;
6528 					/* the object */
6529   if ( object && !PL_is_variable(object) )
6530   { if ( PL_get_atom(object, &t->object.resource) )
6531     { assert(!t->object_is_literal);
6532     } else if ( PL_is_functor(object, FUNCTOR_literal1) )
6533     { term_t a = PL_new_term_ref();
6534 
6535       _PL_get_arg(1, object, a);
6536       alloc_literal_triple(db, t);
6537       if ( !get_literal(db, a, t->object.literal, LIT_PARTIAL) )
6538 	return FALSE;
6539     } else if ( PL_is_functor(object, FUNCTOR_literal2) )
6540     { term_t a = PL_new_term_ref();
6541       literal *lit;
6542 
6543       alloc_literal_triple(db, t);
6544       lit = t->object.literal;
6545 
6546       _PL_get_arg(1, object, a);
6547       if ( PL_is_functor(a, FUNCTOR_exact1) )
6548 	t->match = STR_MATCH_ICASE;
6549       else if ( PL_is_functor(a, FUNCTOR_icase1) )
6550 	t->match = STR_MATCH_ICASE;
6551       else if ( PL_is_functor(a, FUNCTOR_plain1) )
6552 	t->match = STR_MATCH_PLAIN;
6553       else if ( PL_is_functor(a, FUNCTOR_substring1) )
6554 	t->match = STR_MATCH_SUBSTRING;
6555       else if ( PL_is_functor(a, FUNCTOR_word1) )
6556 	t->match = STR_MATCH_WORD;
6557       else if ( PL_is_functor(a, FUNCTOR_prefix1) )
6558 	t->match = STR_MATCH_PREFIX;
6559       else if ( PL_is_functor(a, FUNCTOR_like1) )
6560 	t->match = STR_MATCH_LIKE;
6561       else if ( PL_is_functor(a, FUNCTOR_lt1) )
6562 	t->match = STR_MATCH_LT;
6563       else if ( PL_is_functor(a, FUNCTOR_le1) )
6564 	t->match = STR_MATCH_LE;
6565       else if ( PL_is_functor(a, FUNCTOR_eq1) )
6566 	t->match = STR_MATCH_EQ;
6567       else if ( PL_is_functor(a, FUNCTOR_ge1) )
6568 	t->match = STR_MATCH_GE;
6569       else if ( PL_is_functor(a, FUNCTOR_gt1) )
6570 	t->match = STR_MATCH_GT;
6571       else if ( PL_is_functor(a, FUNCTOR_between2) )
6572       { term_t e = PL_new_term_ref();
6573 
6574 	_PL_get_arg(2, a, e);
6575 	memset(&t->tp.end, 0, sizeof(t->tp.end));
6576 	if ( !get_literal(db, e, &t->tp.end, 0) )
6577 	  return FALSE;
6578 	t->match = STR_MATCH_BETWEEN;
6579       } else
6580 	return PL_domain_error("match_type", a);
6581 
6582       _PL_get_arg(1, a, a);
6583       if ( t->match >= STR_MATCH_LT )
6584       { if ( !get_literal(db, a, lit, 0) )
6585 	  return FALSE;
6586       } else
6587       { if ( !PL_get_atom_ex(a, &lit->value.string) )
6588 	  return FALSE;
6589 	lit->objtype = OBJ_STRING;
6590       }
6591     } else
6592       return PL_type_error("rdf_object", object);
6593   }
6594 					/* the graph */
6595   if ( !get_src(src, t) )
6596     return FALSE;
6597 
6598   if ( t->subject_id )
6599     ipat |= BY_S;
6600   if ( t->predicate.r )
6601     ipat |= BY_P;
6602   if ( t->object_is_literal )
6603   { literal *lit = t->object.literal;
6604 
6605     switch( lit->objtype )
6606     { case OBJ_UNTYPED:
6607 	break;
6608       case OBJ_STRING:
6609 	if ( lit->objtype == OBJ_STRING )
6610 	{ if ( lit->value.string &&
6611 	       t->match <= STR_MATCH_ICASE )
6612 	    ipat |= BY_O;
6613 	}
6614         break;
6615       case OBJ_INTEGER:
6616       case OBJ_DOUBLE:
6617 	ipat |= BY_O;
6618         break;
6619       case OBJ_TERM:
6620 	if ( PL_is_ground(object) )
6621 	  ipat |= BY_O;
6622         break;
6623       default:
6624 	assert(0);
6625     }
6626   } else if ( t->object.resource )
6627   { ipat |= BY_O;
6628   }
6629   if ( t->graph_id )
6630     ipat |= BY_G;
6631 
6632   db->indexed[ipat]++;			/* statistics */
6633   t->indexed = alt_index[ipat];
6634 
6635   return TRUE;
6636 }
6637 
6638 
6639 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6640 inverse_partial_triple(triple *t) inverses a triple   by swapping object
6641 and subject and replacing the predicate with its inverse.
6642 
6643 TBD: In many cases we can  compute   the  hash  more efficiently than by
6644 simply recomputing it:
6645 
6646   - Change predicate: x-or with old and new predicate hash
6647   - swap S<->O if the other is known is a no-op wrt the hash.
6648 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6649 
6650 static int
inverse_partial_triple(triple * t)6651 inverse_partial_triple(triple *t)
6652 { predicate *i;
6653 
6654   if ( !t->inversed &&
6655        (!(i=t->predicate.r) || (i=t->predicate.r->inverse_of)) &&
6656        !t->object_is_literal )
6657   { atom_t o = t->object.resource;
6658 
6659     t->object.resource = t->subject_id ? ID_ATOM(t->subject_id) : 0;
6660     t->subject_id = o ? ATOM_ID(o) : 0;
6661 
6662     if ( t->predicate.r )
6663       t->predicate.r = i;
6664 
6665     t->indexed  = by_inverse[t->indexed];
6666     t->inversed = TRUE;
6667 
6668     return TRUE;
6669   }
6670 
6671   return FALSE;
6672 }
6673 
6674 
6675 static int
get_graph(term_t src,triple * t)6676 get_graph(term_t src, triple *t)
6677 { atom_t at;
6678 
6679   if ( PL_get_atom(src, &at) )
6680   { t->line = NO_LINE;
6681     t->graph_id = ATOM_ID(at);
6682     return TRUE;
6683   }
6684 
6685   if ( PL_is_functor(src, FUNCTOR_colon2) )
6686   { term_t a = PL_new_term_ref();
6687     long line;
6688 
6689     _PL_get_arg(1, src, a);
6690     if ( !PL_get_atom_ex(a, &at) )
6691       return FALSE;
6692     t->graph_id = ATOM_ID(at);
6693     _PL_get_arg(2, src, a);
6694     if ( !PL_get_long_ex(a, &line) )
6695       return FALSE;
6696     t->line = line;
6697 
6698     return TRUE;
6699   }
6700 
6701   return PL_type_error("rdf_graph", src);
6702 }
6703 
6704 
6705 static int
unify_graph(term_t src,triple * t)6706 unify_graph(term_t src, triple *t)
6707 { switch( PL_term_type(src) )
6708   { case PL_VARIABLE:
6709     { if ( t->line == NO_LINE )
6710 	return PL_unify_atom(src, ID_ATOM(t->graph_id));
6711       else
6712 	goto full_term;
6713     }
6714     case PL_ATOM:
6715     { atom_t a;
6716       return (PL_get_atom(src, &a) &&
6717 	      a == ID_ATOM(t->graph_id));
6718     }
6719     case PL_TERM:
6720     { if ( t->line == NO_LINE )
6721       { return PL_unify_term(src,
6722 			     PL_FUNCTOR, FUNCTOR_colon2,
6723 			       PL_ATOM, ID_ATOM(t->graph_id),
6724 			       PL_VARIABLE);
6725       } else
6726       { full_term:
6727 	return PL_unify_term(src,
6728 			     PL_FUNCTOR, FUNCTOR_colon2,
6729 			       PL_ATOM,  ID_ATOM(t->graph_id),
6730 			       PL_INT64, (int64_t)t->line); /* line is uint32_t */
6731       }
6732     }
6733     default:
6734       return PL_type_error("rdf_graph", src);
6735   }
6736 }
6737 
6738 
6739 static int
same_graph(triple * t1,triple * t2)6740 same_graph(triple *t1, triple *t2)
6741 { return t1->line     == t2->line &&
6742          t1->graph_id == t2->graph_id;
6743 }
6744 
6745 
6746 
6747 static int
put_literal_value(term_t v,literal * lit)6748 put_literal_value(term_t v, literal *lit)
6749 { switch(lit->objtype)
6750   { case OBJ_STRING:
6751       PL_put_atom(v, lit->value.string);
6752       break;
6753     case OBJ_INTEGER:
6754       PL_put_variable(v);
6755       return PL_unify_int64(v, lit->value.integer);
6756     case OBJ_DOUBLE:
6757       return PL_put_float(v, lit->value.real);
6758     case OBJ_TERM:
6759       return PL_recorded_external(lit->value.term.record, v);
6760     default:
6761       assert(0);
6762       return FALSE;
6763   }
6764 
6765   return TRUE;
6766 }
6767 
6768 
6769 static int
unify_literal(term_t lit,literal * l)6770 unify_literal(term_t lit, literal *l)
6771 { term_t v = PL_new_term_ref();
6772 
6773   if ( !put_literal_value(v, l) )
6774     return FALSE;
6775 
6776   if ( l->qualifier )
6777   { functor_t qf;
6778 
6779     assert(l->type_or_lang);
6780 
6781     if ( l->qualifier == Q_LANG )
6782       qf = FUNCTOR_lang2;
6783     else
6784       qf = FUNCTOR_type2;
6785 
6786     if ( PL_unify_term(lit, PL_FUNCTOR, qf,
6787 			 PL_ATOM, ID_ATOM(l->type_or_lang),
6788 			 PL_TERM, v) )
6789       return TRUE;
6790 
6791     if ( PL_exception(0) )
6792       return FALSE;
6793 
6794     return PL_unify(lit, v);		/* allow rdf(X, Y, literal(foo)) */
6795   } else if ( PL_unify(lit, v) )
6796   { return TRUE;
6797   } else if ( PL_is_functor(lit, FUNCTOR_lang2) &&
6798 	      l->objtype == OBJ_STRING )
6799   { term_t a = PL_new_term_ref();
6800     _PL_get_arg(2, lit, a);
6801     return PL_unify(a, v);
6802   } else if ( PL_is_functor(lit, FUNCTOR_type2) )
6803   { term_t a = PL_new_term_ref();
6804     _PL_get_arg(2, lit, a);
6805     return PL_unify(a, v);
6806   } else
6807     return FALSE;
6808 }
6809 
6810 
6811 
6812 static int
unify_object(term_t object,triple * t)6813 unify_object(term_t object, triple *t)
6814 { if ( t->object_is_literal )
6815   { term_t lit = PL_new_term_ref();
6816 
6817     if ( PL_unify_functor(object, FUNCTOR_literal1) )
6818       _PL_get_arg(1, object, lit);
6819     else if ( PL_is_functor(object, FUNCTOR_literal2) )
6820       _PL_get_arg(2, object, lit);
6821     else
6822       return FALSE;
6823 
6824     return unify_literal(lit, t->object.literal);
6825   } else
6826   { return PL_unify_atom(object, t->object.resource);
6827   }
6828 }
6829 
6830 
6831 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6832 TRUE:  ok
6833 FALSE: failure
6834 ERROR: error
6835 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6836 
6837 static int
unify_triple(term_t subject,term_t pred,term_t object,term_t src,triple * t,int inversed)6838 unify_triple(term_t subject, term_t pred, term_t object,
6839 	     term_t src, triple *t, int inversed)
6840 { predicate *p = t->predicate.r;
6841   fid_t fid = PL_open_foreign_frame();
6842   int rc;
6843 
6844   if ( inversed )
6845   { term_t tmp = object;
6846     object = subject;
6847     subject = tmp;
6848 
6849     rc = !pred || PL_unify_term(pred,
6850 				PL_FUNCTOR, FUNCTOR_inverse_of1,
6851 				  PL_ATOM, p->name);
6852   } else
6853   { rc = !pred || PL_unify_atom(pred, p->name);
6854   }
6855 
6856   if ( !rc ||
6857        !PL_unify_atom(subject, ID_ATOM(t->subject_id)) ||
6858        !unify_object(object, t) ||
6859        (src && !unify_graph(src, t)) )
6860   { if ( PL_exception(0) )
6861     { PL_close_foreign_frame(fid);
6862       return ERROR;
6863     }
6864 
6865     PL_discard_foreign_frame(fid);
6866     return FALSE;
6867   } else
6868   { PL_close_foreign_frame(fid);
6869     return TRUE;
6870   }
6871 }
6872 
6873 
6874 		 /*******************************
6875 		 *	DUPLICATE HANDLING	*
6876 		 *******************************/
6877 
6878 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6879 According to the RDF specs, duplicate triples  have no meaning, but they
6880 slow down search and often produce   duplicate results in search. Worse,
6881 some coding styles proposed in the  OWL documents introduce huge amounts
6882 of duplicate triples. We cannot  simply  ignore   a  triple  if  it is a
6883 duplicate as a subsequent retract  would   delete  the final triple. For
6884 example, after loading two  files  that   contain  the  same  triple and
6885 unloading one of these files the database would be left without triples.
6886 
6887 mark_duplicate() searches the DB for  a   duplicate  triple and sets the
6888 flag is_duplicate on both. This flag is   used by rdf/3, where duplicate
6889 triples are stored into a  temporary  table   to  be  filtered  from the
6890 results by new_answer().
6891 
6892 (*) We pick the write generation of the current query. This may still be
6893 set higher, but that  that  may  only   lead  to  triples  being  marked
6894 duplicates that are not. By use this   conservatie approach, we can move
6895 mark_duplicate() into prelink_triple().
6896 
6897 TBD: Duplicate marks may be removed by   GC:  walk over all triples that
6898 are marked as duplicates and try to find the duplicate.
6899 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
6900 
6901 static void
mark_duplicate(rdf_db * db,triple * t,query * q)6902 mark_duplicate(rdf_db *db, triple *t, query *q)
6903 { triple_walker tw;
6904   triple *d;
6905   const int indexed = BY_SPO;
6906   lifespan qls;
6907   lifespan *ls;
6908 
6909   if ( q )
6910   { qls.born = queryWriteGen(q) + 1;		/* (*) */
6911     qls.died = query_max_gen(q);
6912     ls = &qls;
6913   } else
6914   { ls = &t->lifespan;
6915   }
6916 
6917   init_triple_walker(&tw, db, t, indexed);
6918   while((d=next_triple(&tw)) && d != t)
6919   { d = deref_triple(db, d);
6920     DEBUG(3, Sdprintf("Possible duplicate: ");
6921 	     print_triple(d, PRT_NL|PRT_ADR));
6922 
6923     if ( !overlap_lifespan(&d->lifespan, ls) )
6924       continue;
6925 
6926     if ( match_triples(db, d, t, q, MATCH_DUPLICATE) )
6927     { if ( !t->is_duplicate )
6928       { t->is_duplicate = TRUE;
6929 	db->duplicates++;
6930       }
6931       if ( !d->is_duplicate )
6932       { d->is_duplicate = TRUE;
6933 	db->duplicates++;
6934       }
6935     }
6936   }
6937   destroy_triple_walker(db, &tw);
6938 }
6939 
6940 
6941 static int
update_duplicates(rdf_db * db)6942 update_duplicates(rdf_db *db)
6943 { triple *t;
6944   int count = 0;
6945 
6946   simpleMutexLock(&db->locks.duplicates);
6947   db->duplicates_up_to_date = FALSE;
6948   db->maintain_duplicates = FALSE;
6949 
6950   if ( db->duplicates )
6951   { enter_scan(&db->defer_all);
6952     for(t=fetch_triple(db, db->by_none.head);
6953 	t;
6954 	t=triple_follow_hash(db, t, ICOL(BY_NONE)))
6955     { if ( ++count % 10240 == 0 &&
6956 	   (PL_handle_signals() < 0 || db->resetting) )
6957 
6958       { exit_scan(&db->defer_all);
6959 	simpleMutexUnlock(&db->locks.duplicates);
6960 	return FALSE;			/* aborted */
6961       }
6962       t->is_duplicate = FALSE;
6963     }
6964     exit_scan(&db->defer_all);
6965 
6966     db->duplicates = 0;
6967   }
6968 
6969   db->maintain_duplicates = TRUE;
6970 
6971   enter_scan(&db->defer_all);
6972   for(t=fetch_triple(db, db->by_none.head);
6973       t;
6974       t=triple_follow_hash(db, t, ICOL(BY_NONE)))
6975   { if ( ++count % 1024 == 0 &&
6976 	 PL_handle_signals() < 0 )
6977     { exit_scan(&db->defer_all);
6978       db->maintain_duplicates = FALSE;		/* no point anymore */
6979       simpleMutexUnlock(&db->locks.duplicates);
6980       return FALSE;
6981     }
6982     mark_duplicate(db, t, NULL);
6983   }
6984   exit_scan(&db->defer_all);
6985 
6986   db->duplicates_up_to_date = TRUE;
6987   simpleMutexUnlock(&db->locks.duplicates);
6988 
6989   return TRUE;
6990 }
6991 
6992 
6993 static void
start_duplicate_admin(rdf_db * db)6994 start_duplicate_admin(rdf_db *db)
6995 { db->maintain_duplicates = TRUE;
6996 
6997   PL_call_predicate(NULL, PL_Q_NORMAL,
6998 		    PL_predicate("rdf_update_duplicates_thread", 0, "rdf_db"), 0);
6999 }
7000 
7001 
7002 
7003 		 /*******************************
7004 		 *	    TRANSACTIONS	*
7005 		 *******************************/
7006 
7007 static int
put_begin_end(term_t t,functor_t be,int level)7008 put_begin_end(term_t t, functor_t be, int level)
7009 { term_t av;
7010 
7011   return ( (av = PL_new_term_ref()) &&
7012 	   PL_put_integer(av, level) &&
7013 	   PL_cons_functor_v(t, be, av) );
7014 }
7015 
7016 
7017 /** rdf_transaction(:Goal, +Id, +Options)
7018 
7019 Options:
7020 
7021   * generation(+Generation)
7022   Determines query generation
7023 */
7024 
7025 static int
transaction_depth(const query * q)7026 transaction_depth(const query *q)
7027 { int depth = 0;
7028 
7029   for(q=q->transaction; q; q=q->transaction)
7030     depth++;
7031 
7032   return depth;
7033 }
7034 
7035 
7036 static foreign_t
rdf_transaction(term_t goal,term_t id,term_t options)7037 rdf_transaction(term_t goal, term_t id, term_t options)
7038 { int rc;
7039   rdf_db *db = rdf_current_db();
7040   query *q;
7041   triple_buffer added;
7042   triple_buffer deleted;
7043   triple_buffer updated;
7044   snapshot *ss = NULL;
7045 
7046   if ( !PL_get_nil(options) )
7047   { term_t tail = PL_copy_term_ref(options);
7048     term_t head = PL_new_term_ref();
7049     term_t arg = PL_new_term_ref();
7050 
7051     while( PL_get_list(tail, head, tail) )
7052     { size_t arity;
7053       atom_t name;
7054 
7055       if ( !PL_get_name_arity(head, &name, &arity) || arity != 1 )
7056 	return PL_type_error("option", head);
7057       _PL_get_arg(1, head, arg);
7058 
7059       if ( name == ATOM_snapshot )
7060       { if ( get_snapshot(arg, &ss) )
7061 	{ int ss_tid = snapshot_thread(ss);
7062 
7063 	  if ( ss_tid && ss_tid != PL_thread_self() )
7064 	    PL_permission_error("access", "rdf-snapshot", arg);
7065 	} else
7066 	{ atom_t a;
7067 
7068 	  if ( PL_get_atom(arg, &a) && a == ATOM_true )
7069 	    ss = SNAPSHOT_ANONYMOUS;
7070 	  else
7071 	    return PL_type_error("rdf_snapshot", arg);
7072 	}
7073       }
7074     }
7075     if ( !PL_get_nil_ex(tail) )
7076       return FALSE;
7077   }
7078 
7079   if ( !(q = open_transaction(db, &added, &deleted, &updated, ss)) )
7080     return FALSE;
7081   q->transaction_data.prolog_id = id;
7082   rc = PL_call_predicate(NULL, PL_Q_PASS_EXCEPTION, PRED_call1, goal);
7083 
7084   if ( rc )
7085   { if ( !empty_transaction(q) )
7086     { if ( ss )
7087       { discard_transaction(q);
7088       } else
7089       { term_t be;
7090 	int depth = transaction_depth(q);
7091 
7092 	if ( !(be=PL_new_term_ref()) ||
7093 	     !put_begin_end(be, FUNCTOR_begin1, depth) ||
7094 	     !rdf_broadcast(EV_TRANSACTION, (void*)id, (void*)be) ||
7095 	     !put_begin_end(be, FUNCTOR_end1, depth) )
7096 	  return FALSE;
7097 
7098 	commit_transaction(q);
7099 
7100 	if ( !rdf_broadcast(EV_TRANSACTION, (void*)id, (void*)be) )
7101 	  return FALSE;
7102       }
7103     } else
7104     { close_transaction(q);
7105     }
7106   } else
7107   { discard_transaction(q);
7108   }
7109 
7110   return rc;
7111 }
7112 
7113 		 /*******************************
7114 		 *	     PREDICATES		*
7115 		 *******************************/
7116 
7117 /** rdf_active_transactions_(-List)
7118 
7119 Provides list of parent transactions in the calling thread
7120 */
7121 
7122 static foreign_t
rdf_active_transactions(term_t list)7123 rdf_active_transactions(term_t list)
7124 { rdf_db *db = rdf_current_db();
7125   query *q = open_query(db);
7126   term_t tail = PL_copy_term_ref(list);
7127   term_t head = PL_new_term_ref();
7128   query *t;
7129 
7130   if ( !q ) return FALSE;
7131   for(t = q->transaction; t; t=t->transaction)
7132   { if ( !PL_unify_list(tail, head, tail) ||
7133          !PL_unify(head, t->transaction_data.prolog_id) )
7134     { close_query(q);
7135       return FALSE;
7136     }
7137   }
7138 
7139   close_query(q);
7140 
7141   return PL_unify_nil(tail);
7142 }
7143 
7144 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7145 (*) rdf_assert(S,P,O,G) adds a triple, but does not do so if exactly the
7146 same quintuple is visible and not  yet   erased.  Adding  would not make
7147 sense as this would be a complete duplicate that cannot be distinguished
7148 from the original and rdf_retractall/4 will erase both.
7149 
7150 Note that full duplicates are  quite  common   as  a  result  of forward
7151 reasoning.
7152 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
7153 
7154 static foreign_t
rdf_assert4(term_t subject,term_t predicate,term_t object,term_t src)7155 rdf_assert4(term_t subject, term_t predicate, term_t object, term_t src)
7156 { rdf_db *db = rdf_current_db();
7157   query *q = open_query(db);
7158   triple *t, *d;
7159   triple_walker tw;
7160 
7161   if ( !q ) return FALSE;
7162   t = new_triple(db);
7163   if ( !get_triple(db, subject, predicate, object, t, q) )
7164   { error:
7165     free_triple(db, t, FALSE);
7166     close_query(q);
7167     return FALSE;
7168   }
7169   if ( src )
7170   { if ( !get_graph(src, t) )
7171       goto error;
7172   } else
7173   { t->graph_id = ATOM_ID(ATOM_user);
7174     t->line = NO_LINE;
7175   }
7176 
7177   init_triple_walker(&tw, db, t, BY_SPO);
7178   while((d=next_triple(&tw)))
7179   { if ( (d=alive_triple(q, d)) && !d->erased )		/* (*) */
7180     { if ( match_triples(db, d, t, q, MATCH_DUPLICATE|MATCH_SRC) &&
7181 	   d->line == t->line )
7182       { destroy_triple_walker(db, &tw);
7183 	free_triple(db, t, FALSE);
7184 	close_query(q);
7185 
7186 	return TRUE;
7187       }
7188     }
7189   }
7190   destroy_triple_walker(db, &tw);
7191 
7192   lock_atoms(db, t);
7193 
7194   add_triples(q, &t, 1);
7195   close_query(q);
7196 
7197   return TRUE;
7198 }
7199 
7200 
7201 static foreign_t
rdf_assert3(term_t subject,term_t predicate,term_t object)7202 rdf_assert3(term_t subject, term_t predicate, term_t object)
7203 { return rdf_assert4(subject, predicate, object, 0);
7204 }
7205 
7206 
7207 static void	free_search_state(search_state *state);
7208 
7209 static int
init_cursor_from_literal(search_state * state,literal * cursor)7210 init_cursor_from_literal(search_state *state, literal *cursor)
7211 { triple *p = &state->pattern;
7212   size_t iv;
7213 
7214   DEBUG(3,
7215 	Sdprintf("Trying literal search for ");
7216 	print_literal(cursor);
7217 	Sdprintf("\n"));
7218 
7219   p->indexed |= BY_O;
7220   p->indexed &= ~BY_G;			/* No graph indexing supported */
7221   if ( p->indexed == BY_SO )
7222   { p->indexed = BY_S;			/* we do not have index BY_SO */
7223     init_triple_walker(&state->cursor, state->db, p, p->indexed);
7224     return FALSE;
7225   }
7226 
7227   iv = literal_hash(cursor);		/* see also triple_hash_key() */
7228   if ( p->indexed&BY_S ) iv ^= subject_hash(p);
7229   if ( p->indexed&BY_P ) iv ^= predicate_hash(p->predicate.r);
7230 
7231   init_triple_literal_walker(&state->cursor, state->db, p, p->indexed, iv);
7232   state->has_literal_state = TRUE;
7233   state->literal_cursor = cursor;
7234 
7235   return TRUE;
7236 }
7237 
7238 
7239 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7240 init_search_state(search_state *state, query *q)
7241 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
7242 
7243 static int
init_search_state(search_state * state,query * query)7244 init_search_state(search_state *state, query *query)
7245 { triple *p = &state->pattern;
7246 
7247   if ( get_partial_triple(state->db,
7248 			  state->subject, state->predicate, state->object,
7249 			  state->src, p) != TRUE )
7250   { free_search_state(state);
7251     return FALSE;
7252   }
7253 
7254   if ( p->object_is_literal && !is_numerical_string(p->object.literal) )
7255     state->flags &= ~MATCH_NUMERIC;
7256 
7257   if ( (p->match == STR_MATCH_PREFIX ||	p->match == STR_MATCH_LIKE) &&
7258        p->indexed != BY_SP &&
7259        (state->prefix = first_atom(p->object.literal->value.string, p->match)))
7260   { literal lit;
7261     literal **rlitp;
7262 
7263     lit = *p->object.literal;
7264     lit.value.string = state->prefix;
7265     state->lit_ex.literal = &lit;
7266     prepare_literal_ex(&state->lit_ex);
7267     rlitp = skiplist_find_first(&state->db->literals,
7268 				&state->lit_ex, &state->literal_state);
7269     if ( rlitp )
7270     { if ( init_cursor_from_literal(state, *rlitp) )
7271       { state->restart_lit = *rlitp;
7272 	state->restart_lit_state = state->literal_state;
7273       }
7274     } else
7275     { free_search_state(state);
7276       return FALSE;
7277     }
7278   } else if ( p->indexed != BY_SP && p->match >= STR_MATCH_LT )
7279   { literal **rlitp;
7280 
7281     state->lit_ex.literal = p->object.literal;
7282     prepare_literal_ex(&state->lit_ex);
7283 
7284     switch(p->match)
7285     { case STR_MATCH_LT:
7286       case STR_MATCH_LE:
7287 	rlitp = skiplist_find_first(&state->db->literals,
7288 				    NULL, &state->literal_state);
7289         break;
7290       case STR_MATCH_GT:
7291 	rlitp = skiplist_find_first(&state->db->literals,
7292 				    &state->lit_ex, &state->literal_state);
7293         break;
7294       case STR_MATCH_GE:
7295       case STR_MATCH_EQ:
7296 	if ( (state->flags&MATCH_NUMERIC) ) /* xsd:double is lowest type */
7297 	  p->object.literal->type_or_lang = ATOM_ID(ATOM_xsdDouble);
7298 	rlitp = skiplist_find_first(&state->db->literals,
7299 				    &state->lit_ex, &state->literal_state);
7300         break;
7301       case STR_MATCH_BETWEEN:
7302 	if ( (state->flags&MATCH_NUMERIC) )
7303 	  p->object.literal->type_or_lang = ATOM_ID(ATOM_xsdDouble);
7304 	rlitp = skiplist_find_first(&state->db->literals,
7305 				    &state->lit_ex, &state->literal_state);
7306         state->lit_ex.literal = &p->tp.end;
7307 	prepare_literal_ex(&state->lit_ex);
7308         break;
7309       default:
7310 	assert(0);
7311         return FALSE;
7312     }
7313 
7314     if ( rlitp )
7315     { if ( init_cursor_from_literal(state, *rlitp) )
7316       {	state->restart_lit = *rlitp;
7317 	state->restart_lit_state = state->literal_state;
7318       }
7319     } else
7320     { free_search_state(state);
7321       return FALSE;
7322     }
7323   } else
7324   { init_triple_walker(&state->cursor, state->db, p, p->indexed);
7325   }
7326 
7327   return TRUE;
7328 }
7329 
7330 
7331 static void
free_search_state(search_state * state)7332 free_search_state(search_state *state)
7333 { if ( state->query )
7334     close_query(state->query);
7335 
7336   free_triple(state->db, &state->pattern, FALSE);
7337   destroy_triple_walker(state->db, &state->cursor);
7338   if ( !state->db->maintain_duplicates &&
7339        state->dup_answers.count > state->db->duplicate_admin_threshold )
7340     start_duplicate_admin(state->db);
7341   destroy_tripleset(&state->dup_answers);
7342 
7343   if ( state->prefix )
7344     PL_unregister_atom(state->prefix);
7345 }
7346 
7347 
7348 static foreign_t
allow_retry_state(search_state * state)7349 allow_retry_state(search_state *state)
7350 { PL_retry_address(state);
7351 }
7352 
7353 
7354 static int
new_answer(search_state * state,triple * t)7355 new_answer(search_state *state, triple *t)
7356 { if ( !t->is_duplicate && state->db->duplicates_up_to_date )
7357     return TRUE;
7358 
7359   return add_tripleset(state, &state->dup_answers, t);
7360 }
7361 
7362 
7363 static triple *
is_candidate(search_state * state,triple * t)7364 is_candidate(search_state *state, triple *t)
7365 { if ( !(t=alive_triple(state->query, t)) )
7366     return NULL;
7367 					/* hash-collision, skip */
7368   if ( state->has_literal_state )
7369   { if ( !(t->object_is_literal &&
7370 	   t->object.literal == state->literal_cursor) )
7371       return NULL;
7372   }
7373 
7374   if ( !match_triples(state->db, t, &state->pattern, state->query, state->flags) )
7375     return NULL;
7376 
7377   if ( !state->src )				/* with source, we report */
7378   { if ( !new_answer(state, t) )		/* duplicates */
7379       return NULL;
7380   }
7381 
7382   return t;
7383 }
7384 
7385 
7386 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7387 next_sub_property() advances the triple-walker to walk over an alternate
7388 hash of the cloud.
7389 
7390   - If the cloud doesn't have ->alt_hashes, all related predicates have
7391     the same hash, and we are done.
7392   - If the cloud does have ->alt_hashes, we must walk the
7393     alt-hashes.  We do not need to walk hashes that do not use
7394     sub-properties of the target.  This is implemented using
7395     hash_holds_candidates().
7396 
7397 TBD: How expensive is hash_holds_candidates(). Maybe  we should only try
7398 that if there are many candidates  in the hash-chains? Alternatively, we
7399 can keep a list of predicates that uses  a particular alt-hash, so we do
7400 not have to scan the whole cloud each time.
7401 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
7402 
7403 static int
hash_holds_candidates(rdf_db * db,unsigned int hash,predicate * p,predicate_cloud * pc,query * q)7404 hash_holds_candidates(rdf_db *db, unsigned int hash,
7405 		      predicate *p, predicate_cloud *pc,
7406 		      query *q)
7407 { predicate **pp  = pc->members;
7408   predicate **end = &pp[pc->size];
7409 
7410   for(; pp<end; pp++)
7411   { predicate *p2 = *pp;
7412 
7413     if ( p2->hash == hash && isSubPropertyOf(db, p2, p, q) )
7414     { DEBUG(1, Sdprintf("\thash 0x%x: <%s rdfs:subPropertyOf %s>\n",
7415 			hash, pname(p2), pname(p)));
7416       return TRUE;
7417     }
7418   }
7419 
7420   return FALSE;
7421 }
7422 
7423 
7424 static int
next_sub_property(search_state * state)7425 next_sub_property(search_state *state)
7426 { if ( (state->flags & MATCH_SUBPROPERTY) )
7427   { triple *p = &state->pattern;
7428     triple_walker *tw = &state->cursor;
7429     predicate_cloud *pc;
7430 
7431     if ( !(pc=state->p_cloud) )
7432     { if ( !p->predicate.r )		/* no pred on rdf_has(?,-,?) */
7433 	return FALSE;
7434 
7435       if ( is_leaf_predicate(state->db, p->predicate.r, state->query) )
7436 	return FALSE;
7437 
7438       if ( p->predicate.r->cloud->alt_hash_count )
7439       { pc = state->p_cloud = p->predicate.r->cloud;
7440 
7441 	DEBUG(1, Sdprintf("%d alt hashes; first was 0x%x\n",
7442 			  p->predicate.r->cloud->alt_hash_count,
7443 			  predicate_hash(p->predicate.r)));
7444 	tw->unbounded_hash ^= predicate_hash(p->predicate.r);
7445 	state->alt_hash_cursor = 0;
7446       } else
7447 	return FALSE;			/* Cloud has only one hash */
7448     } else
7449     { tw->unbounded_hash ^= pc->alt_hashes[state->alt_hash_cursor];
7450       state->alt_hash_cursor++;
7451     }
7452 
7453     for( ; state->alt_hash_cursor < pc->alt_hash_count; state->alt_hash_cursor++)
7454     { unsigned new_hash = pc->alt_hashes[state->alt_hash_cursor];
7455 
7456       if ( new_hash != predicate_hash(p->predicate.r) &&
7457 	   hash_holds_candidates(state->db, new_hash,
7458 				 p->predicate.r, pc, state->query) )
7459       { DEBUG(1, Sdprintf("Retrying with alt-hash %d (0x%x)\n",
7460 			  state->alt_hash_cursor, new_hash));
7461 	tw->unbounded_hash ^= new_hash;
7462 	rewind_triple_walker(tw);
7463 
7464 	return TRUE;
7465       }
7466     }
7467   }
7468 
7469   return FALSE;
7470 }
7471 
7472 
7473 /* next_pattern() advances the pattern for the next query.  This is done
7474    for matches that deal with matching inverse properties and matches
7475    that deal with literal ranges (prefix, between, etc.)
7476 
7477    Note that inverse and literal enumeration are mutually exclusive (as
7478    long as we do not have literal subjects ...).
7479 
7480    If we enumerate (sub)properties, we must enumerate the carthesian
7481    product of the sub properties and the inverse/literal search.
7482 */
7483 
7484 static int
next_pattern(search_state * state)7485 next_pattern(search_state *state)
7486 { triple_walker *tw = &state->cursor;
7487   triple *p = &state->pattern;
7488 
7489   if ( state->has_literal_state )
7490   { literal **litp;
7491 
7492     if ( (litp = skiplist_find_next(&state->literal_state)) )
7493     { literal *lit = *litp;
7494 
7495       DEBUG(2, Sdprintf("next: ");
7496 	       print_literal(lit);
7497 	       Sdprintf("\n"));
7498 
7499       switch(state->pattern.match)
7500       { case STR_MATCH_PREFIX:
7501 	{ if ( !match_atoms(STR_MATCH_PREFIX, state->prefix, lit->value.string) )
7502 	  { DEBUG(1,
7503 		  Sdprintf("PREFIX: terminated literal iteration from ");
7504 		  print_literal(lit);
7505 		  Sdprintf("\n"));
7506 	    return FALSE;			/* no longer a prefix */
7507 	  }
7508 
7509 	  break;
7510 	}
7511 	case STR_MATCH_LT:
7512 	  if ( compare_literals(&state->lit_ex, lit) <= 0 )
7513 	    return FALSE;
7514 	case STR_MATCH_EQ:
7515 	case STR_MATCH_LE:
7516 	case STR_MATCH_BETWEEN:
7517 	{ if ( (state->flags&MATCH_NUMERIC) )
7518 	  { xsd_primary nt;
7519 
7520 	    if ( (nt=is_numerical_string(lit)) )
7521 	    { xsd_primary np = is_numerical_string(state->lit_ex.literal);
7522 
7523 	      if ( cmp_xsd_info(np, &state->lit_ex.atom, nt, lit->value.string) < 0 )
7524 		return FALSE;			/* no longer smaller/equal */
7525 
7526 	      break;
7527 	    }
7528 	    return FALSE;
7529 	  } else
7530 	  { if ( compare_literals(&state->lit_ex, lit) < 0 )
7531 	    { DEBUG(1,
7532 		    Sdprintf("LE/BETWEEN(");
7533 		    print_literal(state->lit_ex.literal);
7534 		    Sdprintf("): terminated literal iteration from ");
7535 		    print_literal(lit);
7536 		    Sdprintf("\n"));
7537 	      return FALSE;			/* no longer smaller/equal */
7538 	    }
7539 	  }
7540 
7541 	  break;
7542 	}
7543       }
7544 
7545       init_cursor_from_literal(state, lit);
7546       return TRUE;
7547     }
7548   }
7549 
7550   if ( next_sub_property(state) )	/* redo search with alternative hash */
7551   { if ( state->restart_lit )
7552     { state->literal_state = state->restart_lit_state;
7553       init_cursor_from_literal(state, state->restart_lit);
7554     }
7555 
7556     return TRUE;
7557   }
7558 
7559   if ( (state->flags&MATCH_INVERSE) &&
7560        inverse_partial_triple(p) )
7561   { DEBUG(1, Sdprintf("Retrying inverse: "); print_triple(p, PRT_NL));
7562     state->p_cloud = NULL;
7563     init_triple_walker(tw, state->db, p, p->indexed);
7564 
7565     return TRUE;
7566   }
7567 
7568   return FALSE;
7569 }
7570 
7571 
7572 static int
next_search_state(search_state * state)7573 next_search_state(search_state *state)
7574 { triple *t, *t2;
7575   triple_walker *tw = &state->cursor;
7576   triple *p = &state->pattern;
7577   term_t retpred;
7578 
7579   if ( (state->flags & MATCH_SUBPROPERTY) )
7580   { retpred = state->realpred;
7581     if ( retpred )
7582     { if ( !p->predicate.r )		/* state->predicate is unbound */
7583       { if ( !PL_unify(state->predicate, retpred) )
7584 	  return FALSE;
7585       }
7586     } else
7587     { if ( !p->predicate.r )
7588 	retpred = state->predicate;
7589     }
7590   } else
7591   { retpred = p->predicate.r ? 0 : state->predicate;
7592   }
7593 
7594   if ( (t2=state->prefetched) )
7595   { state->prefetched = NULL;		/* retrying; to need to check */
7596     goto retry;
7597   }
7598 
7599   do
7600   { while( (t = next_triple(tw)) )
7601     { DEBUG(3, Sdprintf("Search: ");
7602 	       print_triple(t, PRT_SRC|PRT_GEN|PRT_NL|PRT_ADR));
7603 
7604       if ( (t2=is_candidate(state, t)) )
7605       { int rc;
7606 
7607       retry:
7608 	if ( (rc=unify_triple(state->subject, retpred, state->object,
7609 			      state->src, t2, p->inversed)) == FALSE )
7610 	  continue;
7611 	if ( rc == ERROR )
7612 	  return FALSE;			/* makes rdf/3 return FALSE */
7613 
7614 	do
7615 	{ while( (t = next_triple(tw)) )
7616 	  { DEBUG(3, Sdprintf("Search (prefetch): ");
7617 		  print_triple(t, PRT_SRC|PRT_GEN|PRT_NL|PRT_ADR));
7618 
7619 	    if ( (t2=is_candidate(state, t)) )
7620 	    { state->prefetched = t2;
7621 
7622 	      return TRUE;		/* non-deterministic */
7623 	    }
7624 	  }
7625 	} while(next_pattern(state));
7626 
7627 	return TRUE;			/* deterministic */
7628       }
7629     }
7630   } while(next_pattern(state));
7631 
7632   return FALSE;
7633 }
7634 
7635 
7636 
7637 static foreign_t
rdf(term_t subject,term_t predicate,term_t object,term_t src,term_t realpred,control_t h,unsigned flags)7638 rdf(term_t subject, term_t predicate, term_t object,
7639     term_t src, term_t realpred, control_t h, unsigned flags)
7640 { rdf_db *db = rdf_current_db();
7641   search_state *state;
7642 
7643   switch(PL_foreign_control(h))
7644   { case PL_FIRST_CALL:
7645     { query *q = open_query(db);
7646 
7647       if ( !q ) return FALSE;
7648 
7649       state = &q->state.search;
7650       state->query     = q;
7651       state->db	       = db;
7652       state->subject   = subject;
7653       state->object    = object;
7654       state->predicate = predicate;
7655       state->src       = src;
7656       state->realpred  = realpred;
7657       state->flags     = flags;
7658 						/* clear the rest */
7659       memset(&state->cursor, 0,
7660 	     (char*)&state->lit_ex - (char*)&state->cursor);
7661       state->dup_answers.entries = NULL;	/* see add_tripleset() */
7662 
7663       if ( !init_search_state(state, q) )
7664 	return FALSE;
7665 
7666       goto search;
7667     }
7668     case PL_REDO:
7669     { int rc;
7670 
7671       state = PL_foreign_context_address(h);
7672       assert(state->subject == subject);
7673 
7674     search:
7675       if ( (rc=next_search_state(state)) )
7676       { if ( state->prefetched )
7677 	  return allow_retry_state(state);
7678       }
7679 
7680       free_search_state(state);
7681       return rc;
7682     }
7683     case PL_PRUNED:
7684     { state = PL_foreign_context_address(h);
7685 
7686       free_search_state(state);
7687       return TRUE;
7688     }
7689     default:
7690       assert(0);
7691       return FALSE;
7692   }
7693 }
7694 
7695 
7696 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7697 rdf(Subject, Predicate, Object)
7698 
7699 Search specifications:
7700 
7701 	Predicate:
7702 
7703 		subPropertyOf(X) = P
7704 
7705 	Object:
7706 
7707 		literal(substring(X), L)
7708 		literal(word(X), L)
7709 		literal(exact(X), L)
7710 		literal(icase(X), L)
7711 		literal(prefix(X), L)
7712 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
7713 
7714 
7715 static foreign_t
rdf3(term_t subject,term_t predicate,term_t object,control_t h)7716 rdf3(term_t subject, term_t predicate, term_t object, control_t h)
7717 { return rdf(subject, predicate, object, 0, 0, h,
7718 	     MATCH_EXACT|MATCH_NUMERIC);
7719 }
7720 
7721 static foreign_t
rdf4(term_t subject,term_t predicate,term_t object,term_t src,control_t h)7722 rdf4(term_t subject, term_t predicate, term_t object,
7723      term_t src, control_t h)
7724 { return rdf(subject, predicate, object, src, 0, h,
7725 	     MATCH_EXACT|MATCH_NUMERIC|MATCH_SRC);
7726 }
7727 
7728 
7729 static foreign_t
rdf_has3(term_t subject,term_t predicate,term_t object,control_t h)7730 rdf_has3(term_t subject, term_t predicate, term_t object, control_t h)
7731 { return rdf(subject, predicate, object, 0, 0, h,
7732 	     MATCH_EXACT|MATCH_NUMERIC|MATCH_SUBPROPERTY|MATCH_INVERSE);
7733 }
7734 
7735 
7736 static foreign_t
rdf_has4(term_t subject,term_t predicate,term_t object,term_t realpred,control_t h)7737 rdf_has4(term_t subject, term_t predicate, term_t object,
7738 	term_t realpred, control_t h)
7739 { return rdf(subject, predicate, object, 0, realpred, h,
7740 	     MATCH_EXACT|MATCH_NUMERIC|MATCH_SUBPROPERTY|MATCH_INVERSE);
7741 }
7742 
7743 
7744 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7745 rdf_estimate_complexity(+S,+P,+O,-C)
7746 
7747 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
7748 
7749 static foreign_t
rdf_estimate_complexity(term_t subject,term_t predicate,term_t object,term_t complexity)7750 rdf_estimate_complexity(term_t subject, term_t predicate, term_t object,
7751 		        term_t complexity)
7752 { triple t;
7753   size_t c;
7754   rdf_db *db = rdf_current_db();
7755   int rc;
7756 
7757   memset(&t, 0, sizeof(t));
7758   if ( (rc=get_partial_triple(db, subject, predicate, object, 0, &t)) != TRUE )
7759   { if ( rc == -1 )
7760     { return FALSE;			/* error */
7761     } else
7762     { return PL_unify_integer(complexity, 0);	/* no predicate */
7763     }
7764   }
7765 
7766   if ( t.indexed == BY_NONE )
7767   { c = db->created - db->erased;		/* = totale triple count */
7768 #if 0
7769   } else if ( t.indexed == BY_P )
7770   { c = t.predicate.r->triple_count;		/* must sum over children */
7771 #endif
7772   } else
7773   { size_t key = triple_hash_key(&t, t.indexed);
7774     int icol = ICOL(t.indexed);
7775     triple_hash *hash = &db->hash[icol];
7776     size_t count;
7777 
7778     if ( !db->hash[icol].created )
7779       create_triple_hashes(db, 1, &icol);
7780 
7781     c = 0;
7782     for(count=hash->bucket_count_epoch; count <= hash->bucket_count; count *= 2)
7783     { int entry = key%count;
7784       triple_bucket *bucket = &hash->blocks[MSB(entry)][entry];
7785 
7786       c += bucket->count;		/* TBD: compensate for resize */
7787     }
7788   }
7789 
7790   rc = PL_unify_int64(complexity, c);
7791   free_triple(db, &t, FALSE);
7792 
7793   return rc;
7794 }
7795 
7796 
7797 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
7798 current_literal(?Literals)
7799 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
7800 
7801 typedef struct cl_state
7802 { skiplist_enum sl_state;
7803   int		indexed;
7804   literal	lit;
7805   literal_ex	lit_ex;
7806 } cl_state;
7807 
7808 static int
indexedLiteral(const literal * lit)7809 indexedLiteral(const literal *lit)
7810 { if ( lit->objtype == OBJ_STRING )
7811     return lit->value.string != 0;
7812   return lit->objtype != OBJ_UNTYPED;
7813 }
7814 
7815 
7816 static foreign_t
rdf_current_literal(term_t t,control_t h)7817 rdf_current_literal(term_t t, control_t h)
7818 { rdf_db *db = rdf_current_db();
7819   literal **data;
7820   cl_state *state;
7821   int rc;
7822 
7823   switch(PL_foreign_control(h))
7824   { case PL_FIRST_CALL:
7825       state = rdf_malloc(db, sizeof(*state));
7826       memset(state, 0, sizeof(*state));
7827 
7828       if ( PL_is_variable(t) )
7829       { data = skiplist_find_first(&db->literals, NULL, &state->sl_state);
7830 	goto next;
7831       } else
7832       { if ( !get_literal(db, t, &state->lit, LIT_PARTIAL) )
7833 	{ rdf_free(db, state, sizeof(*state));
7834 	  return FALSE;
7835 	}
7836 	if ( indexedLiteral(&state->lit) )
7837 	{ state->lit_ex.literal = &state->lit;
7838 	  prepare_literal_ex(&state->lit_ex);
7839 	  data = skiplist_find_first(&db->literals,
7840 				     &state->lit_ex, &state->sl_state);
7841 	  state->indexed = TRUE;
7842 	} else
7843 	{ data = skiplist_find_first(&db->literals, NULL, &state->sl_state);
7844 	}
7845 	goto next;
7846       }
7847     case PL_REDO:
7848       state = PL_foreign_context_address(h);
7849       data  = skiplist_find_next(&state->sl_state);
7850     next:
7851     { fid_t fid = PL_open_foreign_frame();
7852 
7853       for(; data; data=skiplist_find_next(&state->sl_state))
7854       { literal *lit = *data;
7855 
7856 	if ( unify_literal(t, lit) )
7857 	{ PL_close_foreign_frame(fid);
7858 	  PL_retry_address(state);
7859 	} else if ( PL_exception(0) )
7860 	{ break;
7861 	} else if ( state->indexed &&
7862 		    compare_literals(&state->lit_ex, lit) > 0 )
7863 	{ break;
7864 	} else
7865 	{ PL_rewind_foreign_frame(fid);
7866 	}
7867       }
7868       PL_close_foreign_frame(fid);
7869       rc = FALSE;
7870       goto cleanup;
7871     }
7872     case PL_PRUNED:
7873       state = PL_foreign_context_address(h);
7874       rc = TRUE;
7875 
7876     cleanup:
7877       free_literal(db, &state->lit);
7878       rdf_free(db, state, sizeof(*state));
7879 
7880       return rc;
7881     default:
7882       assert(0);
7883       return FALSE;
7884   }
7885 }
7886 
7887 
7888 static int
update_triple(rdf_db * db,term_t action,triple * t,triple ** updated,query * q)7889 update_triple(rdf_db *db, term_t action, triple *t, triple **updated, query *q)
7890 { term_t a = PL_new_term_ref();
7891   triple tmp, *new;
7892 					/* Create copy in local memory */
7893   tmp = *t;
7894 
7895   if ( !PL_get_arg(1, action, a) )
7896     return PL_type_error("rdf_action", action);
7897 
7898   if ( PL_is_functor(action, FUNCTOR_subject1) )
7899   { atom_t s;
7900 
7901     if ( !PL_get_atom_ex(a, &s) )
7902       return FALSE;
7903     if ( tmp.subject_id == ATOM_ID(s) )
7904       return TRUE;			/* no change */
7905 
7906     tmp.subject_id = ATOM_ID(s);
7907   } else if ( PL_is_functor(action, FUNCTOR_predicate1) )
7908   { predicate *p;
7909 
7910     if ( !get_predicate(db, a, &p, q) )
7911       return FALSE;
7912     if ( tmp.predicate.r == p )
7913       return TRUE;			/* no change */
7914 
7915     tmp.predicate.r = p;
7916   } else if ( PL_is_functor(action, FUNCTOR_object1) )
7917   { triple t2;
7918 
7919     memset(&t2, 0, sizeof(t2));
7920 
7921     if ( !get_object(db, a, &t2) )
7922     { free_triple(db, &t2, FALSE);
7923       return FALSE;
7924     }
7925     if ( match_object(&t2, &tmp, MATCH_QUAL) )
7926     { free_triple(db, &t2, FALSE);
7927       return TRUE;
7928     }
7929 
7930     if ( (tmp.object_is_literal = t2.object_is_literal) )
7931     { tmp.object.literal = t2.object.literal;
7932     } else
7933     { tmp.object.resource = t2.object.resource;
7934     }
7935   } else if ( PL_is_functor(action, FUNCTOR_graph1) )
7936   { triple t2;
7937 
7938     if ( !get_graph(a, &t2) )
7939       return FALSE;
7940     if ( t2.graph_id == t->graph_id && t2.line == t->line )
7941     { *updated = NULL;
7942       return TRUE;
7943     }
7944 
7945     tmp.graph_id = t2.graph_id;
7946     tmp.line = t2.line;
7947   } else
7948     return PL_domain_error("rdf_action", action);
7949 
7950   new = new_triple(db);
7951   new->subject_id	 = tmp.subject_id;
7952   new->predicate.r	 = tmp.predicate.r;
7953   if ( (new->object_is_literal = tmp.object_is_literal) )
7954   { if ( tmp.object.literal->shared )
7955     { simpleMutexLock(&db->locks.literal);
7956       new->object.literal = copy_literal(db, tmp.object.literal);
7957       simpleMutexUnlock(&db->locks.literal);
7958     } else
7959     { new->object.literal = tmp.object.literal;
7960     }
7961   } else
7962   { new->object.resource = tmp.object.resource;
7963   }
7964   new->graph_id		 = tmp.graph_id;
7965   new->line		 = tmp.line;
7966 
7967   lock_atoms(db, new);
7968 
7969   *updated = new;
7970 
7971   return TRUE;
7972 }
7973 
7974 
7975 /** rdf_update(+Subject, +Predicate, +Object, +Action) is det.
7976 
7977 Update a triple. Please note this is actually erase+assert
7978 */
7979 
7980 static foreign_t
rdf_update5(term_t subject,term_t predicate,term_t object,term_t src,term_t action)7981 rdf_update5(term_t subject, term_t predicate, term_t object, term_t src,
7982 	    term_t action)
7983 { triple t, *p;
7984   int indexed = BY_SPO;
7985   rdf_db *db = rdf_current_db();
7986   int rc = TRUE;
7987   size_t count;
7988   triple_walker tw;
7989   triple_buffer matches;
7990   query *q = open_query(db);
7991 
7992   if ( !q ) return FALSE;
7993   memset(&t, 0, sizeof(t));
7994 
7995   if ( !get_src(src, &t) ||
7996        !get_triple(db, subject, predicate, object, &t, q) )
7997   { close_query(q);
7998     return FALSE;
7999   }
8000 
8001   init_triple_buffer(&matches);
8002   init_triple_walker(&tw, db, &t, indexed);
8003   while((p=next_triple(&tw)))
8004   { if ( !(p=alive_triple(q, p)) )
8005       continue;
8006 
8007     if ( match_triples(db, p, &t, q, MATCH_EXACT) )
8008       buffer_triple(&matches, p);
8009   }
8010 
8011   if ( !is_empty_buffer(&matches) )
8012   { triple_buffer replacements;
8013     triple *new, **tp;
8014 
8015     count = matches.top-matches.base;
8016     init_triple_buffer(&replacements);
8017     for(tp=matches.base; tp<matches.top; tp++)
8018     { new = NULL;
8019       if ( !update_triple(db, action, *tp, &new, q) )
8020       { rc = FALSE;
8021 	free_triple_buffer(&replacements);
8022 	goto out;
8023       }
8024 
8025       buffer_triple(&replacements, new);
8026     }
8027 
8028     update_triples(q, matches.base, replacements.base, count);
8029     free_triple_buffer(&replacements);
8030   } else
8031   { count = 0;
8032   }
8033 
8034 out:
8035   close_query(q);
8036   free_triple_buffer(&matches);
8037   free_triple(db, &t, FALSE);
8038 
8039   return (rc && count > 0) ? TRUE : FALSE;
8040 }
8041 
8042 
8043 static foreign_t
rdf_update(term_t subject,term_t predicate,term_t object,term_t action)8044 rdf_update(term_t subject, term_t predicate, term_t object, term_t action)
8045 { return rdf_update5(subject, predicate, object, 0, action);
8046 }
8047 
8048 
8049 static foreign_t
rdf_retractall4(term_t subject,term_t predicate,term_t object,term_t src)8050 rdf_retractall4(term_t subject, term_t predicate, term_t object, term_t src)
8051 { triple t, *p;
8052   rdf_db *db = rdf_current_db();
8053   triple_walker tw;
8054   triple_buffer buf;
8055   query *q;
8056 
8057   memset(&t, 0, sizeof(t));
8058   switch( get_partial_triple(db, subject, predicate, object, src, &t) )
8059   { case 0:				/* no such predicate */
8060       return TRUE;
8061     case -1:				/* error */
8062       return FALSE;
8063   }
8064 
8065   if ( t.graph_id )		/* speedup for rdf_retractall(_,_,_,DB) */
8066   { graph *gr = existing_graph(db, ID_ATOM(t.graph_id));
8067 
8068     if ( !gr || gr->triple_count == 0 )
8069       return TRUE;
8070   }
8071 
8072   if ( !(q = open_query(db)) )
8073     return FALSE;
8074   init_triple_buffer(&buf);
8075   init_triple_walker(&tw, db, &t, t.indexed);
8076   while((p=next_triple(&tw)))
8077   { if ( !(p=alive_triple(q, p)) )
8078       continue;
8079 
8080     if ( match_triples(db, p, &t, q, MATCH_EXACT|MATCH_SRC) )
8081     { if ( t.object_is_literal && t.object.literal->objtype == OBJ_TERM )
8082       { fid_t fid = PL_open_foreign_frame();
8083 	int rc = unify_object(object, p);
8084 	PL_discard_foreign_frame(fid);
8085 	if ( !rc )
8086 	  continue;
8087       }
8088 
8089       buffer_triple(&buf, p);
8090     }
8091   }
8092   free_triple(db, &t, FALSE);
8093   del_triples(q, buf.base, buf.top-buf.base);
8094   close_query(q);
8095   free_triple_buffer(&buf);
8096 
8097 
8098   return TRUE;
8099 }
8100 
8101 
8102 static foreign_t
rdf_retractall3(term_t subject,term_t predicate,term_t object)8103 rdf_retractall3(term_t subject, term_t predicate, term_t object)
8104 { return rdf_retractall4(subject, predicate, object, 0);
8105 }
8106 
8107 
8108 		 /*******************************
8109 		 *	     MONITOR		*
8110 		 *******************************/
8111 
8112 typedef struct broadcast_callback
8113 { struct broadcast_callback *next;
8114   predicate_t		     pred;
8115   long			     mask;
8116 } broadcast_callback;
8117 
8118 static long joined_mask = 0L;
8119 static broadcast_callback *callback_list;
8120 static broadcast_callback *callback_tail;
8121 
8122 static int
do_broadcast(term_t term,long mask)8123 do_broadcast(term_t term, long mask)
8124 { if ( callback_list )
8125   { broadcast_callback *cb;
8126 
8127     for(cb = callback_list; cb; cb = cb->next)
8128     { qid_t qid;
8129       term_t ex;
8130 
8131       if ( !(cb->mask & mask) )
8132 	continue;
8133 
8134       if ( !(qid = PL_open_query(NULL, PL_Q_CATCH_EXCEPTION, cb->pred, term)) )
8135 	return FALSE;
8136       if ( !PL_next_solution(qid) && (ex = PL_exception(qid)) )
8137       { term_t av;
8138 
8139 	PL_cut_query(qid);
8140 
8141 	if ( (av = PL_new_term_refs(2)) &&
8142 	     PL_put_atom(av+0, ATOM_error) &&
8143 	     PL_put_term(av+1, ex) )
8144 	  PL_call_predicate(NULL, PL_Q_NORMAL,
8145 			    PL_predicate("print_message", 2, "user"),
8146 			    av);
8147 	return FALSE;
8148       } else
8149       { PL_close_query(qid);
8150       }
8151     }
8152   }
8153 
8154   return TRUE;
8155 }
8156 
8157 
8158 int
rdf_is_broadcasting(broadcast_id id)8159 rdf_is_broadcasting(broadcast_id id)
8160 { return (joined_mask & id) != 0;
8161 }
8162 
8163 
8164 int
rdf_broadcast(broadcast_id id,void * a1,void * a2)8165 rdf_broadcast(broadcast_id id, void *a1, void *a2)
8166 { int rc = TRUE;
8167 
8168   if ( (joined_mask & id) )
8169   { fid_t fid;
8170     term_t term;
8171     functor_t funct;
8172 
8173     if ( !(fid = PL_open_foreign_frame()) ||
8174 	 !(term = PL_new_term_ref()) )
8175       return FALSE;
8176 
8177     switch(id)
8178     { case EV_ASSERT:
8179       case EV_ASSERT_LOAD:
8180 	funct = FUNCTOR_assert4;
8181         goto assert_retract;
8182       case EV_RETRACT:
8183 	funct = FUNCTOR_retract4;
8184       assert_retract:
8185       { triple *t = a1;
8186 	term_t tmp;
8187 
8188 	if ( !(tmp = PL_new_term_refs(4)) ||
8189 	     !PL_put_atom(tmp+0, ID_ATOM(t->subject_id)) ||
8190 	     !PL_put_atom(tmp+1, t->predicate.r->name) ||
8191 	     !unify_object(tmp+2, t) ||
8192 	     !unify_graph(tmp+3, t) ||
8193 	     !PL_cons_functor_v(term, funct, tmp) )
8194 	  return FALSE;
8195 	break;
8196       }
8197       case EV_UPDATE:
8198       { triple *t = a1;
8199 	triple *new = a2;
8200 	term_t tmp, a;
8201 	functor_t action;
8202 	int rc;
8203 
8204 	if ( !(tmp = PL_new_term_refs(5)) ||
8205 	     !(a = PL_new_term_ref()) ||
8206 	     !PL_put_atom(tmp+0, ID_ATOM(t->subject_id)) ||
8207 	     !PL_put_atom(tmp+1, t->predicate.r->name) ||
8208 	     !unify_object(tmp+2, t) ||
8209 	     !unify_graph(tmp+3, t) )
8210 	  return FALSE;
8211 
8212 	if ( t->subject_id != new->subject_id )
8213 	{ action = FUNCTOR_subject1;
8214 	  rc = PL_put_atom(a, ID_ATOM(new->subject_id));
8215 	} else if ( t->predicate.r != new->predicate.r )
8216 	{ action = FUNCTOR_predicate1;
8217 	  rc = PL_put_atom(a, new->predicate.r->name);
8218 	} else if ( !match_object(t, new, MATCH_QUAL) )
8219 	{ action = FUNCTOR_object1;
8220 	  rc = unify_object(a, new);
8221 	} else if ( !same_graph(t, new) )
8222 	{ action = FUNCTOR_graph1;
8223 	  rc = unify_graph(a, new);
8224 	} else
8225 	{ return TRUE;			/* no change */
8226 	}
8227 
8228         if ( !rc ||
8229 	     !PL_cons_functor_v(tmp+4, action, a) ||
8230 	     !PL_cons_functor_v(term, FUNCTOR_update5, tmp) )
8231 	  return FALSE;
8232 	break;
8233       }
8234       case EV_NEW_LITERAL:
8235       { literal *lit = a1;
8236 	term_t tmp;
8237 
8238 	if ( !(tmp = PL_new_term_refs(1)) ||
8239 	     !unify_literal(tmp, lit) ||
8240 	     !PL_cons_functor_v(term, FUNCTOR_new_literal1, tmp) )
8241 	  return FALSE;
8242 	break;
8243       }
8244       case EV_OLD_LITERAL:
8245       { literal *lit = a1;
8246 	term_t tmp;
8247 
8248 	if ( !(tmp = PL_new_term_refs(1)) ||
8249 	     !unify_literal(tmp, lit) ||
8250 	     !PL_cons_functor_v(term, FUNCTOR_old_literal1, tmp) )
8251 	  return FALSE;
8252 	break;
8253       }
8254       case EV_LOAD:
8255       { term_t ctx = (term_t)a1;
8256 	term_t be  = (term_t)a2;
8257 	term_t tmp;
8258 
8259 	if ( !(tmp = PL_new_term_refs(2)) ||
8260 	     !PL_put_term(tmp+0, be) ||		/* begin/end(graphs) */
8261 	     !PL_put_term(tmp+1, ctx) ||
8262 	     !PL_cons_functor_v(term, FUNCTOR_load2, tmp) )
8263 	  return FALSE;
8264 	break;
8265       }
8266       case EV_TRANSACTION:
8267       { term_t ctx = (term_t)a1;
8268 	term_t be  = (term_t)a2;
8269 	term_t tmp;
8270 
8271 	if ( !(tmp = PL_new_term_refs(2)) ||
8272 	     !PL_put_term(tmp+0, be) ||		/* begin/end */
8273 	     !PL_put_term(tmp+1, ctx) ||
8274 	     !PL_cons_functor_v(term, FUNCTOR_transaction2, tmp) )
8275 	  return FALSE;
8276 	break;
8277       }
8278       case EV_RESET:
8279       { PL_put_atom(term, ATOM_reset);
8280 	break;
8281       }
8282       case EV_CREATE_GRAPH:
8283       { graph *g = a1;
8284 	term_t tmp;
8285 
8286 	if ( !(tmp = PL_new_term_refs(1)) ||
8287 	     !(PL_put_atom(tmp, g->name)) ||
8288 	     !PL_cons_functor_v(term, FUNCTOR_create_graph1, tmp) )
8289 	  return FALSE;
8290 	break;
8291       }
8292       default:
8293 	assert(0);
8294     }
8295 
8296     rc = do_broadcast(term, id);
8297 
8298     PL_discard_foreign_frame(fid);
8299   }
8300 
8301   return rc;
8302 }
8303 
8304 
8305 static foreign_t
rdf_monitor(term_t goal,term_t mask)8306 rdf_monitor(term_t goal, term_t mask)
8307 { atom_t name;
8308   broadcast_callback *cb;
8309   predicate_t p;
8310   long msk;
8311   module_t m = NULL;
8312 
8313   if ( !PL_strip_module(goal, &m, goal) ||
8314        !PL_get_atom_ex(goal, &name) ||
8315        !PL_get_long_ex(mask, &msk) )
8316     return FALSE;
8317 
8318   p = PL_pred(PL_new_functor(name, 1), m);
8319 
8320   for(cb=callback_list; cb; cb = cb->next)
8321   { if ( cb->pred == p )
8322     { broadcast_callback *cb2;
8323       cb->mask = msk;
8324 
8325       joined_mask = 0L;
8326       for(cb2=callback_list; cb2; cb2 = cb2->next)
8327 	joined_mask |= cb2->mask;
8328       DEBUG(2, Sdprintf("Set mask to 0x%x\n", joined_mask));
8329 
8330       return TRUE;
8331     }
8332   }
8333 
8334   cb = PL_malloc(sizeof(*cb));
8335   cb->next = NULL;
8336   cb->mask = msk;
8337   cb->pred = p;
8338   if ( callback_list )
8339   { callback_tail->next = cb;
8340     callback_tail = cb;
8341   } else
8342   { callback_list = callback_tail = cb;
8343   }
8344   joined_mask |= msk;
8345 
8346   return TRUE;
8347 }
8348 
8349 
8350 
8351 static foreign_t
rdf_set_predicate(term_t pred,term_t option)8352 rdf_set_predicate(term_t pred, term_t option)
8353 { predicate *p;
8354   rdf_db *db = rdf_current_db();
8355   query *q = open_query(db);
8356   int rc;
8357 
8358   if ( !q ) return FALSE;
8359   if ( !get_predicate(db, pred, &p, q) )
8360   { rc = FALSE;
8361     goto out;
8362   }
8363 
8364   if ( PL_is_functor(option, FUNCTOR_symmetric1) )
8365   { int val;
8366 
8367     if ( !get_bool_arg_ex(1, option, &val) )
8368     { rc = FALSE;
8369       goto out;
8370     }
8371 
8372     if ( val )
8373       p->inverse_of = p;
8374     else
8375       p->inverse_of = NULL;
8376 
8377     rc = TRUE;
8378   } else if ( PL_is_functor(option, FUNCTOR_inverse_of1) )
8379   { term_t a = PL_new_term_ref();
8380     predicate *i;
8381 
8382     _PL_get_arg(1, option, a);
8383     if ( PL_get_nil(a) )
8384     { if ( p->inverse_of )
8385       { p->inverse_of->inverse_of = NULL;
8386 	p->inverse_of = NULL;
8387       }
8388     } else
8389     { if ( !get_predicate(db, a, &i, q) )
8390       { rc = FALSE;
8391 	goto out;
8392       }
8393 
8394       p->inverse_of = i;
8395       i->inverse_of = p;
8396     }
8397     rc = TRUE;
8398   } else if ( PL_is_functor(option, FUNCTOR_transitive1) )
8399   { int val;
8400 
8401     if ( !get_bool_arg_ex(1, option, &val) )
8402       return FALSE;
8403 
8404     p->transitive = val;
8405 
8406     rc = TRUE;
8407   } else
8408     rc = PL_type_error("predicate_option", option);
8409 
8410 out:
8411   close_query(q);
8412   return rc;
8413 }
8414 
8415 
8416 #define PRED_PROPERTY_COUNT 9
8417 static functor_t predicate_key[PRED_PROPERTY_COUNT];
8418 
8419 static int
unify_predicate_property(rdf_db * db,predicate * p,term_t option,functor_t f,query * q)8420 unify_predicate_property(rdf_db *db, predicate *p, term_t option,
8421 			 functor_t f, query *q)
8422 { if ( f == FUNCTOR_symmetric1 )
8423     return PL_unify_term(option, PL_FUNCTOR, f,
8424 			 PL_BOOL, p->inverse_of == p ? TRUE : FALSE);
8425   else if ( f == FUNCTOR_inverse_of1 )
8426   { if ( p->inverse_of )
8427       return PL_unify_term(option, PL_FUNCTOR, f,
8428 			   PL_ATOM, p->inverse_of->name);
8429     else
8430       return FALSE;
8431   } else if ( f == FUNCTOR_transitive1 )
8432   { return PL_unify_term(option, PL_FUNCTOR, f,
8433 			 PL_BOOL, p->transitive);
8434   } else if ( f == FUNCTOR_triples1 )
8435   { return PL_unify_term(option, PL_FUNCTOR, f,
8436 			 PL_LONG, p->triple_count);
8437   } else if ( f == FUNCTOR_rdf_subject_branch_factor1 )
8438   { return PL_unify_term(option, PL_FUNCTOR, f,
8439 		 PL_FLOAT, subject_branch_factor(db, p, q, DISTINCT_DIRECT));
8440   } else if ( f == FUNCTOR_rdf_object_branch_factor1 )
8441   { return PL_unify_term(option, PL_FUNCTOR, f,
8442 		 PL_FLOAT, object_branch_factor(db, p, q, DISTINCT_DIRECT));
8443   } else if ( f == FUNCTOR_rdfs_subject_branch_factor1 )
8444   { return PL_unify_term(option, PL_FUNCTOR, f,
8445 		 PL_FLOAT, subject_branch_factor(db, p, q, DISTINCT_SUB));
8446   } else if ( f == FUNCTOR_rdfs_object_branch_factor1 )
8447   { return PL_unify_term(option, PL_FUNCTOR, f,
8448 		 PL_FLOAT, object_branch_factor(db, p, q, DISTINCT_SUB));
8449   } else
8450   { assert(0);
8451     return FALSE;
8452   }
8453 }
8454 
8455 
8456 typedef struct enum_pred
8457 { predicate *p;
8458   int i;
8459 } enum_pred;
8460 
8461 
8462 static foreign_t
rdf_current_predicate(term_t name,control_t h)8463 rdf_current_predicate(term_t name, control_t h)
8464 { rdf_db *db = rdf_current_db();
8465   predicate *p;
8466   enum_pred *ep;
8467   atom_t a;
8468 
8469   switch( PL_foreign_control(h) )
8470   { case PL_FIRST_CALL:
8471       if ( PL_is_variable(name) )
8472       { ep = rdf_malloc(db, sizeof(*ep));
8473 	ep->i  = 0;
8474 	ep->p  = NULL;
8475 	goto next;
8476       } else if ( PL_get_atom(name, &a) )
8477       { return existing_predicate(db, a) != NULL;
8478       } else if ( PL_is_functor(name, FUNCTOR_literal1) )
8479       { return FALSE;
8480       }
8481 
8482       return PL_type_error("atom", name);
8483     case PL_REDO:
8484       ep = PL_foreign_context_address(h);
8485       goto next;
8486     case PL_PRUNED:
8487       ep = PL_foreign_context_address(h);
8488       rdf_free(db, ep, sizeof(*ep));
8489       return TRUE;
8490     default:
8491       assert(0);
8492       return FALSE;
8493   }
8494 
8495 next:
8496   if ( !(p=ep->p) )
8497   { while (!(p = db->predicates.blocks[MSB(ep->i)][ep->i]) )
8498     { if ( ++ep->i >= db->predicates.bucket_count )
8499 	goto fail;
8500     }
8501   }
8502 
8503   if ( !PL_unify_atom(name, p->name) )
8504   { fail:
8505     rdf_free(db, ep, sizeof(*ep));
8506     return FALSE;
8507   }
8508 
8509   if ( !(ep->p = p->next) )
8510   { if ( ++ep->i >= db->predicates.bucket_count )
8511     { rdf_free(db, ep, sizeof(*ep));
8512       return TRUE;
8513     }
8514   }
8515   PL_retry_address(ep);
8516 }
8517 
8518 
8519 static foreign_t
rdf_predicate_property(term_t pred,term_t option,control_t h)8520 rdf_predicate_property(term_t pred, term_t option, control_t h)
8521 { predicate *p;
8522   rdf_db *db = rdf_current_db();
8523   query *q;
8524 
8525   if ( !predicate_key[0] )
8526   { int i = 0;
8527 
8528     predicate_key[i++] = FUNCTOR_symmetric1;
8529     predicate_key[i++] = FUNCTOR_inverse_of1;
8530     predicate_key[i++] = FUNCTOR_transitive1;
8531     predicate_key[i++] = FUNCTOR_triples1;
8532     predicate_key[i++] = FUNCTOR_rdf_subject_branch_factor1;
8533     predicate_key[i++] = FUNCTOR_rdf_object_branch_factor1;
8534     predicate_key[i++] = FUNCTOR_rdfs_subject_branch_factor1;
8535     predicate_key[i++] = FUNCTOR_rdfs_object_branch_factor1;
8536     assert(i < PRED_PROPERTY_COUNT);
8537   }
8538 
8539   switch(PL_foreign_control(h))
8540   { case PL_FIRST_CALL:
8541     { functor_t f;
8542       int rc;
8543 
8544       if ( !(q = open_query(db)) )
8545 	return FALSE;
8546       if ( PL_is_variable(option) )
8547       { q->state.predprop.prop = 0;
8548 	if ( !get_predicate(db, pred, &q->state.predprop.pred, q) )
8549 	{ close_query(q);
8550 	  return FALSE;
8551 	}
8552 	goto redo;
8553       } else if ( PL_get_functor(option, &f) )
8554       { int n;
8555 
8556 	for(n=0; predicate_key[n]; n++)
8557 	{ if ( predicate_key[n] == f )
8558 	  { if ( !get_predicate(db, pred, &p, q) )
8559 	      return FALSE;
8560 	    rc = unify_predicate_property(db, p, option, f, q);
8561 	    goto out;
8562 	  }
8563 	}
8564 	rc = PL_domain_error("rdf_predicate_property", option);
8565       } else
8566 	rc = PL_type_error("rdf_predicate_property", option);
8567     out:
8568       close_query(q);
8569       return rc;
8570     }
8571     case PL_REDO:
8572       q = PL_foreign_context_address(h);
8573     redo:
8574       for( ; predicate_key[q->state.predprop.prop]; q->state.predprop.prop++ )
8575       { if ( unify_predicate_property(db,
8576 				      q->state.predprop.pred,
8577 				      option,
8578 				      predicate_key[q->state.predprop.prop],
8579 				      q) )
8580 	{ q->state.predprop.prop++;
8581 	  if ( predicate_key[q->state.predprop.prop] )
8582 	    PL_retry_address(q);
8583 	  return TRUE;
8584 	}
8585       }
8586       return FALSE;
8587     case PL_PRUNED:
8588       q = PL_foreign_context_address(h);
8589       close_query(q);
8590       return TRUE;
8591     default:
8592       assert(0);
8593       return TRUE;
8594   }
8595 }
8596 
8597 
8598 		 /*******************************
8599 		 *     TRANSITIVE RELATIONS	*
8600 		 *******************************/
8601 
8602 static visited *
alloc_node_agenda(rdf_db * db,agenda * a)8603 alloc_node_agenda(rdf_db *db, agenda *a)
8604 { chunk *c;
8605   int size;
8606 
8607   if ( (c=a->chunk) )
8608   { if ( c->used < c->size )
8609     { visited *v = &c->nodes[c->used++];
8610 
8611       return v;
8612     }
8613   }
8614 
8615   size = (a->size == 0 ? 8 : 1024);
8616   c = rdf_malloc(db, CHUNK_SIZE(size));
8617   c->size = size;
8618   c->used = 1;
8619   c->next = a->chunk;
8620   a->chunk = c;
8621 
8622   return &c->nodes[0];
8623 }
8624 
8625 
8626 static void
empty_agenda(rdf_db * db,agenda * a)8627 empty_agenda(rdf_db *db, agenda *a)
8628 { chunk *c, *n;
8629 
8630   for(c=a->chunk; c; c = n)
8631   { n = c->next;
8632     rdf_free(db, c, CHUNK_SIZE(c->size));
8633   }
8634   if ( a->hash )
8635     rdf_free(db, a->hash, sizeof(visited*)*a->hash_size);
8636 
8637   if ( a->query )
8638     close_query(a->query);
8639 }
8640 
8641 
8642 static void
hash_agenda(rdf_db * db,agenda * a,int size)8643 hash_agenda(rdf_db *db, agenda *a, int size)
8644 { if ( a->hash )
8645     rdf_free(db, a->hash, sizeof(*a->hash));
8646   if ( size > 0 )
8647   { visited *v;
8648 
8649     a->hash = rdf_malloc(db, sizeof(visited*)*size);
8650     memset(a->hash, 0, sizeof(visited*)*size);
8651     a->hash_size = size;
8652 
8653     for(v=a->head; v; v = v->next)
8654     { int key = atom_hash(v->resource, MURMUR_SEED)&(size-1);
8655 
8656       v->hash_link = a->hash[key];
8657       a->hash[key] = v;
8658     }
8659   }
8660 }
8661 
8662 
8663 static int
in_agenda(agenda * a,atom_t resource)8664 in_agenda(agenda *a, atom_t resource)
8665 { visited *v;
8666 
8667   if ( a->hash )
8668   { int key = atom_hash(resource, MURMUR_SEED)&(a->hash_size-1);
8669     v = a->hash[key];
8670 
8671     for( ; v; v = v->hash_link )
8672     { if ( v->resource == resource )
8673 	return TRUE;
8674     }
8675   } else
8676   { v = a->head;
8677 
8678     for( ; v; v = v->next )
8679     { if ( v->resource == resource )
8680 	return TRUE;
8681     }
8682   }
8683 
8684   return FALSE;
8685 }
8686 
8687 
8688 static visited *
append_agenda(rdf_db * db,agenda * a,atom_t res,uintptr_t d)8689 append_agenda(rdf_db *db, agenda *a, atom_t res, uintptr_t d)
8690 { visited *v = a->head;
8691 
8692   if ( in_agenda(a, res) )
8693     return NULL;
8694 
8695   db->agenda_created++;			/* statistics */
8696 
8697   a->size++;
8698   if ( !a->hash_size && a->size > 32 )
8699     hash_agenda(db, a, 64);
8700   else if ( a->size > a->hash_size * 4 )
8701     hash_agenda(db, a, a->hash_size * 4);
8702 
8703   v = alloc_node_agenda(db, a);
8704   v->resource = res;
8705   v->distance = d;
8706   v->next = NULL;
8707   if ( a->tail )
8708   { a->tail->next = v;
8709     a->tail = v;
8710   } else
8711   { a->head = a->tail = v;
8712   }
8713 
8714   if ( a->hash_size )
8715   { int key = atom_hash(res, MURMUR_SEED)&(a->hash_size-1);
8716 
8717     v->hash_link = a->hash[key];
8718     a->hash[key] = v;
8719   }
8720 
8721   return v;
8722 }
8723 
8724 
8725 static int
can_reach_target(rdf_db * db,agenda * a,query * q)8726 can_reach_target(rdf_db *db, agenda *a, query *q)
8727 { triple_walker tw;
8728   int indexed = a->pattern.indexed;
8729   int rc = FALSE;
8730   triple *p;
8731 
8732   if ( indexed & BY_S )			/* subj ---> */
8733   { a->pattern.object.resource = a->target;
8734     indexed |= BY_O;
8735   } else
8736   { a->pattern.subject_id = ATOM_ID(a->target);
8737     indexed |= BY_S;
8738   }
8739 
8740   init_triple_walker(&tw, db, &a->pattern, indexed);
8741   while((p=next_triple(&tw)))
8742   { if ( match_triples(db, p, &a->pattern, q, MATCH_SUBPROPERTY) )
8743     { rc = TRUE;
8744       break;
8745     }
8746   }
8747 
8748   if ( a->pattern.indexed & BY_S )
8749   { a->pattern.object.resource = 0;
8750   } else
8751   { a->pattern.subject_id = 0;
8752   }
8753 
8754   return rc;
8755 }
8756 
8757 
8758 
8759 static visited *
bf_expand(rdf_db * db,agenda * a,atom_t resource,uintptr_t d,query * q)8760 bf_expand(rdf_db *db, agenda *a, atom_t resource, uintptr_t d, query *q)
8761 { search_state state;
8762   visited *rc = NULL;
8763 
8764   state.pattern = a->pattern;		/* Structure copy */
8765   state.flags   = MATCH_SUBPROPERTY|MATCH_INVERSE;
8766   state.p_cloud = NULL;
8767   state.query   = q;
8768   state.db      = db;
8769 
8770   if ( state.pattern.indexed & BY_S )		/* subj ---> */
8771   { state.pattern.subject_id = ATOM_ID(resource);
8772   } else
8773   { state.pattern.object.resource = resource;
8774   }
8775 
8776   if ( a->target && can_reach_target(db, a, q) )
8777     return append_agenda(db, a, a->target, d);
8778 
8779   for(;;)
8780   { int indexed = state.pattern.indexed;
8781     triple *p;
8782 
8783     init_triple_walker(&state.cursor, db, &state.pattern, indexed);
8784     while((p=next_triple(&state.cursor)))
8785     { if ( !alive_triple(a->query, p) )
8786 	continue;
8787 
8788       if ( match_triples(db, p, &state.pattern, a->query, MATCH_SUBPROPERTY) )
8789       { atom_t found;
8790 	visited *v;
8791 
8792 	if ( indexed & BY_S )
8793 	{ if ( p->object_is_literal )
8794 	    continue;
8795 	  found = p->object.resource;
8796 	} else
8797 	{ found = ID_ATOM(p->subject_id);
8798 	}
8799 
8800 	v = append_agenda(db, a, found, d);
8801 	if ( !rc )
8802 	  rc = v;
8803 	if ( found == a->target )
8804 	  return rc;
8805       }
8806     }
8807     if ( next_sub_property(&state) )
8808       continue;
8809     if ( inverse_partial_triple(&state.pattern) )
8810     { state.p_cloud = NULL;
8811       continue;
8812     }
8813     break;
8814   }
8815 					/* TBD: handle owl:sameAs */
8816   return rc;
8817 }
8818 
8819 
8820 static int
peek_agenda(rdf_db * db,agenda * a)8821 peek_agenda(rdf_db *db, agenda *a)
8822 { if ( a->to_return )
8823     return TRUE;
8824 
8825   while( a->to_expand )
8826   { uintptr_t next_d = a->to_expand->distance+1;
8827 
8828     if ( next_d > a->max_d )
8829       return FALSE;
8830 
8831     a->to_return = bf_expand(db, a,
8832 			     a->to_expand->resource,
8833 			     next_d,
8834 			     a->query);
8835     a->to_expand = a->to_expand->next;
8836 
8837     if ( a->to_return )
8838       return TRUE;
8839   }
8840 
8841   return FALSE;
8842 }
8843 
8844 
8845 static visited *
next_agenda(rdf_db * db,agenda * a)8846 next_agenda(rdf_db *db, agenda *a)
8847 { if ( peek_agenda(db, a) )
8848   { visited *v = a->to_return;
8849 
8850     a->to_return = a->to_return->next;
8851 
8852     return v;
8853   }
8854 
8855   return NULL;
8856 }
8857 
8858 
8859 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
8860 rdf_reachable(+Subject, +Predicate, -Object)
8861 rdf_reachable(-Subject, +Predicate, ?Object)
8862     Examine transitive relations, reporting all `Object' that can be
8863     reached from `Subject' using Predicate without going into a loop
8864     if the relation is cyclic.
8865 
8866 directly_attached() deals with the posibility that  the predicate is not
8867 defined and Subject and Object are  the   same.  Should  use clean error
8868 handling, but that means a lot of changes. For now this will do.
8869 
8870 TBD:	Implement bi-directional search if both Subject and Object are
8871 	given.
8872 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
8873 
8874 static int
directly_attached(term_t pred,term_t from,term_t to)8875 directly_attached(term_t pred, term_t from, term_t to)
8876 { if ( PL_is_atom(pred) && PL_is_atom(from) )
8877     return PL_unify(to, from);
8878 
8879   return FALSE;
8880 }
8881 
8882 
8883 static int
unify_distance(term_t d,uintptr_t dist)8884 unify_distance(term_t d, uintptr_t dist)
8885 { if ( d )
8886     return PL_unify_integer(d, dist);
8887 
8888   return TRUE;
8889 }
8890 
8891 
8892 static foreign_t
rdf_reachable(term_t subj,term_t pred,term_t obj,term_t max_d,term_t d,control_t h)8893 rdf_reachable(term_t subj, term_t pred, term_t obj,
8894 	      term_t max_d, term_t d,
8895 	      control_t h)
8896 { rdf_db *db = rdf_current_db();
8897   query *q;
8898 
8899   switch(PL_foreign_control(h))
8900   { case PL_FIRST_CALL:
8901     { visited *v;
8902       agenda *a;
8903       term_t target_term;
8904       int is_det = FALSE;
8905 
8906       if ( PL_is_variable(pred) )
8907 	return PL_instantiation_error(pred);
8908 
8909       if ( !(q = open_query(db)) )
8910 	return FALSE;
8911       a = &q->state.tr_search;
8912       memset(a, 0, sizeof(*a));
8913       a->query = q;
8914 
8915       if ( max_d )
8916       { long md;
8917 	atom_t inf;
8918 
8919 	if ( PL_get_atom(max_d, &inf) && inf == ATOM_infinite )
8920 	{ a->max_d = (uintptr_t)-1;
8921 	} else
8922 	{ if ( !PL_get_long_ex(max_d, &md) || md < 0 )
8923 	  { close_query(q);
8924 	    return FALSE;
8925 	  }
8926 	  a->max_d = md;
8927 	}
8928       } else
8929       { a->max_d = (uintptr_t)-1;
8930       }
8931 
8932       if ( !PL_is_variable(subj) )		/* subj .... obj */
8933       { switch(get_partial_triple(db, subj, pred, 0, 0, &a->pattern))
8934 	{ case 0:
8935 	  { close_query(q);
8936 	    return directly_attached(pred, subj, obj) &&
8937 		   unify_distance(d, 0);
8938 	  }
8939 	  case -1:
8940 	  { close_query(q);
8941 	    return FALSE;
8942 	  }
8943 	}
8944 	is_det = PL_is_ground(obj);
8945 	if ( a->pattern.object_is_literal )
8946 	{ close_query(q);
8947 	  return FALSE;			/* rdf_reachable(literal(...),?,?) */
8948 	}
8949 	target_term = obj;
8950       } else if ( !PL_is_variable(obj) )	/* obj .... subj */
8951       {	switch(get_partial_triple(db, 0, pred, obj, 0, &a->pattern))
8952 	{ case 0:
8953 	  { close_query(q);
8954 	    return directly_attached(pred, obj, subj);
8955 	  }
8956 	  case -1:
8957 	  { close_query(q);
8958 	    return FALSE;
8959 	  }
8960 	}
8961 	if ( a->pattern.object_is_literal )
8962 	{ close_query(q);
8963 	  return FALSE;			/* rdf_reachable(-,+,literal(...)) */
8964 	}
8965 	target_term = subj;
8966       } else
8967       { close_query(q);
8968 	return PL_instantiation_error(subj);
8969       }
8970 
8971       if ( (a->pattern.indexed & BY_S) )		/* subj ... */
8972 	append_agenda(db, a, ID_ATOM(a->pattern.subject_id), 0);
8973       else
8974 	append_agenda(db, a, a->pattern.object.resource, 0);
8975       a->to_return = a->head;
8976       a->to_expand = a->head;
8977 
8978       while( (v=next_agenda(db, a)) )
8979       { if ( PL_unify_atom(target_term, v->resource) )
8980 	{ if ( is_det )		/* mode(+, +, +) */
8981 	  { int rc = unify_distance(d, v->distance);
8982 	    empty_agenda(db, a);
8983 	    return rc;
8984 	  } else if ( unify_distance(d, v->distance) )
8985 	  {				/* mode(+, +, -) or mode(-, +, +) */
8986 	    if ( peek_agenda(db, a) )
8987 	      PL_retry_address(a);
8988 
8989 	    empty_agenda(db, a);
8990 	    return TRUE;
8991 	  }
8992 	}
8993       }
8994       empty_agenda(db, a);
8995       return FALSE;
8996     }
8997     case PL_REDO:
8998     { agenda *a = PL_foreign_context_address(h);
8999       term_t target_term;
9000       visited *v;
9001 
9002       if ( !PL_is_variable(subj) )	/* +, +, - */
9003 	target_term = obj;
9004       else
9005 	target_term = subj;		/* -, +, + */
9006 
9007       while( (v=next_agenda(db, a)) )
9008       { if ( PL_unify_atom(target_term, v->resource) &&
9009 	     unify_distance(d, v->distance) )
9010 	{ if ( peek_agenda(db, a) )
9011 	  { PL_retry_address(a);
9012 	  } else
9013 	  { empty_agenda(db, a);
9014 	    return TRUE;
9015 	  }
9016 	}
9017       }
9018 
9019       empty_agenda(db, a);
9020       return FALSE;
9021     }
9022     case PL_PRUNED:
9023     { agenda *a = PL_foreign_context_address(h);
9024 
9025       DEBUG(9, Sdprintf("Cutted; agenda = %p\n", a));
9026 
9027       empty_agenda(db, a);
9028       return TRUE;
9029     }
9030     default:
9031       assert(0);
9032       return FALSE;
9033   }
9034 }
9035 
9036 static foreign_t
rdf_reachable3(term_t subj,term_t pred,term_t obj,control_t h)9037 rdf_reachable3(term_t subj, term_t pred, term_t obj, control_t h)
9038 { return rdf_reachable(subj, pred, obj, 0, 0, h);
9039 }
9040 
9041 static foreign_t
rdf_reachable5(term_t subj,term_t pred,term_t obj,term_t max_d,term_t d,control_t h)9042 rdf_reachable5(term_t subj, term_t pred, term_t obj, term_t max_d, term_t d,
9043 	       control_t h)
9044 { return rdf_reachable(subj, pred, obj, max_d, d, h);
9045 }
9046 
9047 
9048 		 /*******************************
9049 		 *	     STATISTICS		*
9050 		 *******************************/
9051 
9052 static functor_t keys[16];		/* initialised in install_rdf_db() */
9053 
9054 static int
unify_statistics(rdf_db * db,term_t key,functor_t f)9055 unify_statistics(rdf_db *db, term_t key, functor_t f)
9056 { int64_t v;
9057 
9058   if ( f == FUNCTOR_triples1 )
9059   { v = db->created - db->erased;
9060   } else if ( f == FUNCTOR_resources1 )
9061   { v = db->resources.hash.count;
9062   } else if ( f == FUNCTOR_predicates1 )
9063   { v = db->predicates.count;
9064   } else if ( f == FUNCTOR_graphs1 )
9065   { v = db->graphs.count - db->graphs.erased;
9066   } else if ( f == FUNCTOR_indexed16 )
9067   { int i;
9068     term_t a = PL_new_term_ref();
9069 
9070     if ( !PL_unify_functor(key, FUNCTOR_indexed16) )
9071       return FALSE;
9072     for(i=0; i<16; i++)
9073     { if ( !PL_get_arg(i+1, key, a) ||
9074 	   !PL_unify_integer(a, db->indexed[i]) )
9075 	return FALSE;
9076     }
9077 
9078     return TRUE;
9079   } else if ( f == FUNCTOR_hash_quality1 )
9080   { term_t tail, list = PL_new_term_ref();
9081     term_t head = PL_new_term_ref();
9082     term_t tmp = PL_new_term_ref();
9083     term_t av = PL_new_term_refs(4);
9084     int i;
9085 
9086     if ( !PL_unify_functor(key, FUNCTOR_hash_quality1) )
9087       return FALSE;
9088     _PL_get_arg(1, key, list);
9089     tail = PL_copy_term_ref(list);
9090 
9091     for(i=1; i<INDEX_TABLES; i++)
9092     { if ( db->hash[i].created )
9093       { if ( !PL_unify_list(tail, head, tail) ||
9094 	     !PL_put_integer(av+0, col_index[i]) ||
9095 	     !PL_put_integer(av+1, db->hash[i].bucket_count) ||
9096 	     !PL_put_float(av+2, triple_hash_quality(db, i, 1024)) ||
9097 	     !PL_put_integer(av+3, MSB(db->hash[i].bucket_count)-
9098 				   MSB(db->hash[i].bucket_count_epoch)) ||
9099 	     !PL_cons_functor_v(tmp, FUNCTOR_hash4, av) ||
9100 	     !PL_unify(head, tmp) )
9101 	  return FALSE;
9102       }
9103     }
9104 
9105     return PL_unify_nil(tail);
9106   } else if ( f == FUNCTOR_searched_nodes1 )
9107   { v = db->agenda_created;
9108   } else if ( f == FUNCTOR_duplicates1 )
9109   { if ( db->duplicates_up_to_date == FALSE )
9110       return FALSE;
9111     v = db->duplicates;
9112   } else if ( f == FUNCTOR_lingering1 )
9113   { v = db->lingering;
9114   } else if ( f == FUNCTOR_literals1 )
9115   { v = db->literals.count;
9116   } else if ( f == FUNCTOR_triples2 && PL_is_functor(key, f) )
9117   { graph *src;
9118     term_t a = PL_new_term_ref();
9119     atom_t name;
9120 
9121     _PL_get_arg(1, key, a);
9122     if ( !PL_get_atom_ex(a, &name) )
9123       return FALSE;
9124     if ( (src = existing_graph(db, name)) )
9125       v = src->triple_count;
9126     else
9127       v = 0;
9128 
9129     _PL_get_arg(2, key, a);
9130     return PL_unify_int64(a, v);
9131   } else if ( f == FUNCTOR_gc4 )
9132   { return PL_unify_term(key,
9133 			 PL_FUNCTOR, f,
9134 			   PL_INT,   (int)db->gc.count,
9135 			   PL_INT64, (int64_t)db->gc.reclaimed_triples,
9136 			   PL_INT64, (int64_t)db->reindexed,
9137 			   PL_FLOAT, (double)db->gc.time);	/* time spent */
9138   } else
9139   { assert(0);
9140     return FALSE;
9141   }
9142 
9143   return PL_unify_term(key, PL_FUNCTOR, f, PL_INT64, v);
9144 }
9145 
9146 static foreign_t
rdf_statistics(term_t key,control_t h)9147 rdf_statistics(term_t key, control_t h)
9148 { int n;
9149   rdf_db *db = rdf_current_db();
9150 
9151   switch(PL_foreign_control(h))
9152   { case PL_FIRST_CALL:
9153     { functor_t f;
9154 
9155       if ( PL_is_variable(key) )
9156       { n = 0;
9157 	goto redo;
9158       } else if ( PL_get_functor(key, &f) )
9159       { for(n=0; keys[n]; n++)
9160 	{ if ( keys[n] == f )
9161 	    return unify_statistics(db, key, f);
9162 	}
9163 	return PL_domain_error("rdf_statistics", key);
9164       } else
9165 	return PL_type_error("rdf_statistics", key);
9166     }
9167     case PL_REDO:
9168       n = (int)PL_foreign_context(h);
9169     redo:
9170       unify_statistics(db, key, keys[n]);
9171       n++;
9172       if ( keys[n] )
9173 	PL_retry(n);
9174     case PL_PRUNED:
9175       return TRUE;
9176     default:
9177       assert(0);
9178       return TRUE;
9179   }
9180 }
9181 
9182 
9183 /** rdf_generation(-Generation) is det.
9184 
9185     True when Generation is the current reading generation.  If we are
9186     inside a modified transaction, Generation has the format Base+TrGen,
9187     where TrGen expresses the generation inside the transaction.
9188 */
9189 
9190 static foreign_t
rdf_generation(term_t t)9191 rdf_generation(term_t t)
9192 { rdf_db *db = rdf_current_db();
9193   query *q = open_query(db);
9194   int rc;
9195 
9196   if ( !q ) return FALSE;
9197   if ( q->tr_gen > q->stack->tr_gen_base )
9198   { assert(q->tr_gen < q->stack->tr_gen_max);
9199 
9200     rc = PL_unify_term(t, PL_FUNCTOR, FUNCTOR_plus2,
9201 		            PL_INT64, q->rd_gen,
9202 		            PL_INT64, q->tr_gen - q->stack->tr_gen_base);
9203   } else
9204   { rc = PL_unify_int64(t, q->rd_gen);
9205   }
9206 
9207   close_query(q);
9208 
9209   return rc;
9210 }
9211 
9212 
9213 /** rdf_snapshot(-Snapshot) is det.
9214 
9215     True when Snapshot is a handle to the current state of the database.
9216 */
9217 
9218 static foreign_t
rdf_snapshot(term_t t)9219 rdf_snapshot(term_t t)
9220 { rdf_db *db = rdf_current_db();
9221   snapshot *s = new_snapshot(db);
9222 
9223   if ( !s )
9224     return FALSE;
9225   return unify_snapshot(t, s);
9226 }
9227 
9228 
9229 		 /*******************************
9230 		 *	  CONTROL INDEXING	*
9231 		 *******************************/
9232 
9233 /** rdf_set(+What)
9234 
9235     Set aspect of the RDF database.  What is one of:
9236 
9237       * hash(Which, Parameter, Value)
9238 
9239     Where Parameter is one of =size=, =optimize_threshold= or
9240     =avg_chain_len= and Which is one of =s=, =p=, etc.
9241 */
9242 
9243 static int
get_index_name(term_t t,int * index)9244 get_index_name(term_t t, int *index)
9245 { int i;
9246   char *s;
9247 
9248   if ( !PL_get_chars(t, &s, CVT_ATOM|CVT_EXCEPTION) )
9249     return FALSE;
9250 
9251   for(i=1; i<INDEX_TABLES; i++)
9252   { if ( strcmp(s, col_name[i]) == 0 )
9253     { *index = i;
9254       return TRUE;
9255     }
9256   }
9257 
9258   PL_domain_error("index", t);
9259   return FALSE;
9260 }
9261 
9262 
9263 static foreign_t
rdf_set(term_t what)9264 rdf_set(term_t what)
9265 { rdf_db *db = rdf_current_db();
9266 
9267   if ( PL_is_functor(what, FUNCTOR_hash3) )
9268   { term_t arg = PL_new_term_ref();
9269     int index;
9270     int value;
9271     atom_t param;
9272 
9273     _PL_get_arg(1, what, arg);
9274     if ( !get_index_name(arg, &index) )
9275       return FALSE;
9276 
9277     _PL_get_arg(3, what, arg);
9278     if ( !PL_get_integer_ex(arg, &value) )
9279       return FALSE;
9280 
9281     _PL_get_arg(2, what, arg);
9282     if ( !PL_get_atom_ex(arg, &param) )
9283       return FALSE;
9284 
9285     if ( param == ATOM_size )
9286     { if ( size_triple_hash(db, index, value) )
9287       { db->hash[index].user_size = MSB(value);
9288 	return TRUE;
9289       }
9290       if ( value <= 0 || MSB(value) >= MAX_TBLOCKS )
9291 	return PL_domain_error("hash_size", arg);
9292 						/* cannot shrink */
9293       return PL_permission_error("size", "hash", arg);
9294     } else if ( param == ATOM_optimize_threshold )
9295     { if ( value >= 0 && value < 20 )
9296 	db->hash[index].optimize_threshold = value;
9297       else
9298 	return PL_domain_error("optimize_threshold", arg);
9299     } else if ( param == ATOM_average_chain_len )
9300     { if ( value >= 0 && value < 20 )
9301 	db->hash[index].avg_chain_len = value;
9302       return PL_domain_error("average_chain_len", arg);
9303     } else
9304       return PL_domain_error("rdf_hash_parameter", arg);
9305 
9306     return TRUE;
9307   }
9308 
9309   return PL_type_error("rdf_setting", what);
9310 }
9311 
9312 
9313 static foreign_t
rdf_update_duplicates(void)9314 rdf_update_duplicates(void)
9315 { rdf_db *db = rdf_current_db();
9316 
9317   return update_duplicates(db);
9318 }
9319 
9320 
9321 /** rdf_warm_indexes(+List) is det.
9322 */
9323 
9324 static foreign_t
rdf_warm_indexes(term_t indexes)9325 rdf_warm_indexes(term_t indexes)
9326 { int il[16];
9327   int ic = 0;
9328   term_t tail = PL_copy_term_ref(indexes);
9329   term_t head = PL_new_term_ref();
9330   rdf_db *db = rdf_current_db();
9331 
9332   while(PL_get_list_ex(tail, head, tail))
9333   { char *s;
9334 
9335     if ( PL_get_chars(head, &s, CVT_ATOM|CVT_STRING|CVT_EXCEPTION) )
9336     { int by = 0;
9337       int i;
9338 
9339       for(; *s; s++)
9340       { switch(*s)
9341 	{ case 's': by |= BY_S; break;
9342 	  case 'p': by |= BY_P; break;
9343 	  case 'o': by |= BY_O; break;
9344 	  case 'g': by |= BY_G; break;
9345 	  default: return PL_domain_error("rdf_index", head);
9346 	}
9347       }
9348 
9349       if ( index_col[by] == ~0 )
9350 	return PL_existence_error("rdf_index", head);
9351 
9352       for(i=0; i<ic; i++)
9353       { if ( il[i] == by )
9354 	  break;
9355       }
9356       if ( i == ic )
9357 	il[ic++] = ICOL(by);
9358     } else
9359       return 0;
9360   }
9361   if ( !PL_get_nil_ex(tail) )
9362     return FALSE;
9363 
9364   create_triple_hashes(db, ic, il);
9365 
9366   return TRUE;
9367 }
9368 
9369 
9370 static foreign_t
pl_empty_prefix_table(void)9371 pl_empty_prefix_table(void)
9372 { rdf_db *db = rdf_current_db();
9373 
9374   empty_prefix_table(db);
9375 
9376   return TRUE;
9377 }
9378 
9379 
9380 		 /*******************************
9381 		 *	       RESET		*
9382 		 *******************************/
9383 
9384 static void
erase_triples(rdf_db * db)9385 erase_triples(rdf_db *db)
9386 { triple *t, *n;
9387   int i;
9388 
9389   for(t=fetch_triple(db, db->by_none.head); t; t=n)
9390   { n = triple_follow_hash(db, t, ICOL(BY_NONE));
9391 
9392     free_triple(db, t, FALSE);		/* ? */
9393   }
9394   db->by_none.head = db->by_none.tail = 0;
9395 
9396   for(i=BY_S; i<INDEX_TABLES; i++)
9397   { triple_hash *hash = &db->hash[i];
9398 
9399     reset_triple_hash(db, hash);
9400   }
9401   reset_triple_array(db);
9402 
9403   db->created = 0;
9404   db->erased = 0;
9405   memset(db->indexed, 0, sizeof(db->indexed));
9406   db->duplicates = 0;
9407   db->queries.generation = 0;
9408 }
9409 
9410 
9411 static void
erase_predicates(rdf_db * db)9412 erase_predicates(rdf_db *db)
9413 { int i;
9414 
9415   for(i=0; i<db->predicates.bucket_count; i++)
9416   { predicate *n, *p = db->predicates.blocks[MSB(i)][i];
9417 
9418     db->predicates.blocks[MSB(i)][i] = NULL;
9419 
9420     for( ; p; p = n )
9421     { n = p->next;
9422 
9423       free_list(db, &p->subPropertyOf);
9424       free_list(db, &p->siblings);
9425       if ( ++p->cloud->deleted == p->cloud->size )
9426 	free_predicate_cloud(db, p->cloud);
9427       free_is_leaf(db, p);
9428 
9429       rdf_free(db, p, sizeof(*p));
9430     }
9431   }
9432 
9433   db->predicates.count = 0;
9434 }
9435 
9436 
9437 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9438 Reset the DB. It might be wiser to create  a new one and have a separate
9439 thread deleting the old one (e.g. do this in GC).
9440 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
9441 
9442 static int
reset_db(rdf_db * db)9443 reset_db(rdf_db *db)
9444 { int rc;
9445 
9446   suspend_gc(db);
9447   simpleMutexLock(&db->locks.duplicates);
9448   erase_snapshots(db);
9449   erase_triples(db);
9450   erase_predicates(db);
9451   erase_resources(&db->resources);
9452   erase_graphs(db);
9453   empty_prefix_table(db);
9454   db->agenda_created = 0;
9455   skiplist_destroy(&db->literals);
9456 
9457   rc = (init_resource_db(db, &db->resources) &&
9458 	init_literal_table(db));
9459 
9460   db->snapshots.keep = GEN_MAX;
9461   db->queries.generation = GEN_EPOCH;
9462 
9463   simpleMutexUnlock(&db->locks.duplicates);
9464   resume_gc(db);
9465 
9466   return rc;
9467 }
9468 
9469 
9470 /** rdf_reset_db
9471 
9472     Reset the RDF database to its initial state.  Only allowed if there
9473     are no active queries. This means that if the calling thread has
9474     open queries this must be considered a permission error.  Otherwise
9475     we wait until all queries have died.
9476 
9477     TBD: Check queries in other threads!
9478 */
9479 
9480 static foreign_t
rdf_reset_db(void)9481 rdf_reset_db(void)
9482 { rdf_db *db = rdf_current_db();
9483   query *q;
9484   int rc;
9485 
9486   db->resetting = TRUE;
9487   if ( !(q = open_query(db)) )
9488     return FALSE;
9489 
9490   if ( q->depth > 0 || q->transaction )
9491   { close_query(q);
9492     return permission_error("reset", "rdf_db", "default",
9493 			    "Active queries");
9494   }
9495 
9496   if ( !rdf_broadcast(EV_RESET, NULL, NULL) )
9497     return FALSE;
9498 
9499   rc = reset_db(db);
9500   close_query(q);
9501   db->resetting = FALSE;
9502 
9503   return rc;
9504 }
9505 
9506 
9507 static foreign_t
rdf_delete_snapshot(term_t t)9508 rdf_delete_snapshot(term_t t)
9509 { snapshot *ss;
9510   int rc;
9511 
9512   if ( (rc=get_snapshot(t, &ss)) == TRUE )
9513   { if ( free_snapshot(ss) )
9514       return TRUE;
9515     rc = -1;
9516   }
9517 
9518   if ( rc == -1 )
9519     return PL_existence_error("rdf_snapshot", t);
9520 
9521   return PL_type_error("rdf_snapshot", t);
9522 }
9523 
9524 #ifdef O_DEBUG
9525 static foreign_t
rdf_checks_literal_references(term_t l)9526 rdf_checks_literal_references(term_t l)
9527 { triple p, *t;
9528   triple_walker tw;
9529   long count = 0, refs = -1;
9530   term_t var = PL_new_term_ref();
9531   rdf_db *db = rdf_current_db();
9532 
9533   memset(&p, 0, sizeof(p));
9534   if ( !get_partial_triple(db, var, var, l, 0, &p) )
9535     return FALSE;
9536   assert(p.object_is_literal);
9537 
9538   init_triple_walker(&tw, db, &p, BY_O);
9539   while((t=next_triple(&tw)))
9540   { if ( match_object(t, &p, MATCH_QUAL) )
9541     { if ( count++ == 0 )
9542       { refs = (long)t->object.literal->references;
9543       }
9544     }
9545   }
9546   destroy_triple_walker(db, &tw);
9547 
9548   if ( count != refs )
9549   { if ( refs == -1 )
9550     { Sdprintf("Not found in triples\n");
9551     } else
9552     { Sdprintf("Refs: %ld; counted: %ld; lit=", refs, count);
9553       print_literal(p.object.literal);
9554       Sdprintf("\n");
9555     }
9556 
9557     return FALSE;
9558   }
9559 
9560   return TRUE;
9561 }
9562 #endif
9563 
9564 		 /*******************************
9565 		 *	       MATCH		*
9566 		 *******************************/
9567 
9568 
9569 static int
get_text_ex(term_t term,text * txt)9570 get_text_ex(term_t term, text *txt)
9571 { memset(txt, 0, sizeof(*txt));
9572 
9573   return ( PL_get_nchars(term, &txt->length, (char**)&txt->a,
9574 			 CVT_ATOM|CVT_STRING) ||
9575 	   PL_get_wchars(term, &txt->length, (pl_wchar_t**)&txt->w,
9576 			 CVT_ATOM|CVT_STRING|CVT_EXCEPTION)
9577 	 );
9578 }
9579 
9580 
9581 
9582 static foreign_t
match_label(term_t how,term_t search,term_t label)9583 match_label(term_t how, term_t search, term_t label)
9584 { atom_t h;
9585   text f, l;
9586   int type;
9587 
9588   if ( !PL_get_atom_ex(how, &h) ||
9589        !get_text_ex(search, &f) ||
9590        !get_text_ex(label, &l) )
9591     return FALSE;
9592 
9593   if ( h == ATOM_exact )
9594     type = STR_MATCH_ICASE;
9595   if ( h == ATOM_icase )
9596     type = STR_MATCH_ICASE;
9597   else if ( h == ATOM_substring )
9598     type = STR_MATCH_SUBSTRING;
9599   else if ( h == ATOM_word )
9600     type = STR_MATCH_WORD;
9601   else if ( h == ATOM_prefix )
9602     type = STR_MATCH_PREFIX;
9603   else if ( h == ATOM_like )
9604     type = STR_MATCH_LIKE;
9605   else
9606     return PL_domain_error("search_method", how);
9607 
9608   return match_text(type, &f, &l);
9609 }
9610 
9611 
9612 static foreign_t
lang_matches(term_t lang,term_t pattern)9613 lang_matches(term_t lang, term_t pattern)
9614 { atom_t l, p;
9615 
9616   if ( !PL_get_atom_ex(lang, &l) ||
9617        !PL_get_atom_ex(pattern, &p) )
9618     return FALSE;
9619 
9620   return atom_lang_matches(l, p);
9621 }
9622 
9623 
9624 static foreign_t
rdf_compare(term_t dif,term_t a,term_t b)9625 rdf_compare(term_t dif, term_t a, term_t b)
9626 { triple ta, tb;
9627   rdf_db *db = rdf_current_db();
9628   int rc;
9629 
9630   memset(&ta, 0, sizeof(ta));
9631   memset(&tb, 0, sizeof(tb));
9632   if ( get_object(db, a, &ta) &&
9633        get_object(db, b, &tb) )
9634   { int d;
9635     atom_t ad;
9636 
9637     if ( ta.object_is_literal &&
9638 	 tb.object_is_literal )
9639     { literal_ex lex;
9640       lex.literal = ta.object.literal;
9641       prepare_literal_ex(&lex);
9642       d = compare_literals(&lex, tb.object.literal);
9643     } else if ( !ta.object_is_literal && !tb.object_is_literal )
9644     { d = cmp_atoms(ta.object.resource, tb.object.resource);
9645     } else
9646     { d = ta.object_is_literal ? -1 : 1;
9647     }
9648 
9649     ad = d < 0 ? ATOM_lt : d > 0 ? ATOM_gt : ATOM_eq;
9650 
9651     rc = PL_unify_atom(dif, ad);
9652   } else
9653   { rc = FALSE;
9654   }
9655 
9656   free_triple(db, &ta, FALSE);
9657   free_triple(db, &tb, FALSE);
9658 
9659   return rc;
9660 }
9661 
9662 
9663 		 /*******************************
9664 		 *	       TEST		*
9665 		 *******************************/
9666 
9667 static foreign_t
rdf_is_bnode(term_t t)9668 rdf_is_bnode(term_t t)
9669 { size_t len;
9670   char *s;
9671 
9672   if ( PL_get_nchars(t, &len, &s, CVT_ATOM) &&
9673        s[0] == '_' && (s[1] == ':' || s[1] == '_') )
9674     return TRUE;
9675 
9676   return FALSE;
9677 }
9678 
9679 
9680 		 /*******************************
9681 		 *	       VERSION		*
9682 		 *******************************/
9683 
9684 static foreign_t
rdf_version(term_t v)9685 rdf_version(term_t v)
9686 { return PL_unify_integer(v, RDF_VERSION);
9687 }
9688 
9689 
9690 		 /*******************************
9691 		 *	     REGISTER		*
9692 		 *******************************/
9693 
9694 #define MKFUNCTOR(n, a) \
9695 	FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a)
9696 #define NDET PL_FA_NONDETERMINISTIC
9697 #define META PL_FA_TRANSPARENT
9698 
9699 install_t
install_rdf_db(void)9700 install_rdf_db(void)
9701 { int i=0;
9702   extern install_t install_atom_map(void);
9703 
9704   simpleMutexInit(&rdf_lock);
9705   init_errors();
9706   register_resource_predicates();
9707 
9708   MKFUNCTOR(literal, 1);
9709   MKFUNCTOR(triples, 1);
9710   MKFUNCTOR(triples, 2);
9711   MKFUNCTOR(resources, 1);
9712   MKFUNCTOR(predicates, 1);
9713   MKFUNCTOR(subject, 1);
9714   MKFUNCTOR(predicate, 1);
9715   MKFUNCTOR(object, 1);
9716   MKFUNCTOR(graph, 1);
9717   MKFUNCTOR(indexed, 16);
9718   MKFUNCTOR(exact, 1);
9719   MKFUNCTOR(icase, 1);
9720   MKFUNCTOR(plain, 1);
9721   MKFUNCTOR(substring, 1);
9722   MKFUNCTOR(word, 1);
9723   MKFUNCTOR(prefix, 1);
9724   MKFUNCTOR(like, 1);
9725   MKFUNCTOR(lt, 1);
9726   MKFUNCTOR(le, 1);
9727   MKFUNCTOR(between, 2);
9728   MKFUNCTOR(eq, 1);
9729   MKFUNCTOR(ge, 1);
9730   MKFUNCTOR(gt, 1);
9731   MKFUNCTOR(literal, 2);
9732   MKFUNCTOR(searched_nodes, 1);
9733   MKFUNCTOR(duplicates, 1);
9734   MKFUNCTOR(lingering, 1);
9735   MKFUNCTOR(literals, 1);
9736   MKFUNCTOR(symmetric, 1);
9737   MKFUNCTOR(transitive, 1);
9738   MKFUNCTOR(inverse_of, 1);
9739   MKFUNCTOR(lang, 2);
9740   MKFUNCTOR(type, 2);
9741   MKFUNCTOR(rdf_subject_branch_factor, 1);
9742   MKFUNCTOR(rdf_object_branch_factor, 1);
9743   MKFUNCTOR(rdfs_subject_branch_factor, 1);
9744   MKFUNCTOR(rdfs_object_branch_factor, 1);
9745   MKFUNCTOR(gc, 4);
9746   MKFUNCTOR(graphs, 1);
9747   MKFUNCTOR(assert, 4);
9748   MKFUNCTOR(retract, 4);
9749   MKFUNCTOR(update, 5);
9750   MKFUNCTOR(new_literal, 1);
9751   MKFUNCTOR(old_literal, 1);
9752   MKFUNCTOR(transaction, 2);
9753   MKFUNCTOR(load, 2);
9754   MKFUNCTOR(begin, 1);
9755   MKFUNCTOR(end, 1);
9756   MKFUNCTOR(create_graph, 1);
9757   MKFUNCTOR(hash_quality, 1);
9758   MKFUNCTOR(hash, 3);
9759   MKFUNCTOR(hash, 4);
9760 
9761   FUNCTOR_colon2 = PL_new_functor(PL_new_atom(":"), 2);
9762   FUNCTOR_plus2  = PL_new_functor(PL_new_atom("+"), 2);
9763 
9764   ATOM_user		  = PL_new_atom("user");
9765   ATOM_exact		  = PL_new_atom("exact");
9766   ATOM_icase		  = PL_new_atom("icase");
9767   ATOM_plain		  = PL_new_atom("plain");
9768   ATOM_prefix		  = PL_new_atom("prefix");
9769   ATOM_like		  = PL_new_atom("like");
9770   ATOM_substring	  = PL_new_atom("substring");
9771   ATOM_word		  = PL_new_atom("word");
9772   ATOM_subPropertyOf	  = PL_new_atom(URL_subPropertyOf);
9773   ATOM_xsdString	  = PL_new_atom(URL_xsdString);
9774   ATOM_xsdDouble	  = PL_new_atom(URL_xsdDouble);
9775   ATOM_error		  = PL_new_atom("error");
9776   ATOM_begin		  = PL_new_atom("begin");
9777   ATOM_end		  = PL_new_atom("end");
9778   ATOM_error		  = PL_new_atom("error");
9779   ATOM_infinite		  = PL_new_atom("infinite");
9780   ATOM_snapshot		  = PL_new_atom("snapshot");
9781   ATOM_true		  = PL_new_atom("true");
9782   ATOM_size		  = PL_new_atom("size");
9783   ATOM_optimize_threshold = PL_new_atom("optimize_threshold");
9784   ATOM_average_chain_len  = PL_new_atom("average_chain_len");
9785   ATOM_reset		  = PL_new_atom("reset");
9786   ATOM_lt		  = PL_new_atom("<");
9787   ATOM_eq		  = PL_new_atom("=");
9788   ATOM_gt		  = PL_new_atom(">");
9789   ATOM_XSDString	  = PL_new_atom("http://www.w3.org/2001/XMLSchema#string");
9790 
9791   PRED_call1         = PL_predicate("call", 1, "user");
9792 
9793 					/* statistics */
9794   keys[i++] = FUNCTOR_graphs1;
9795   keys[i++] = FUNCTOR_triples1;
9796   keys[i++] = FUNCTOR_resources1;
9797   keys[i++] = FUNCTOR_indexed16;
9798   keys[i++] = FUNCTOR_hash_quality1;
9799   keys[i++] = FUNCTOR_predicates1;
9800   keys[i++] = FUNCTOR_searched_nodes1;
9801   keys[i++] = FUNCTOR_duplicates1;
9802   keys[i++] = FUNCTOR_lingering1;
9803   keys[i++] = FUNCTOR_literals1;
9804   keys[i++] = FUNCTOR_triples2;
9805   keys[i++] = FUNCTOR_gc4;
9806   keys[i++] = 0;
9807   assert(i<=16);
9808 
9809   check_index_tables();
9810 					/* see struct triple */
9811   assert(sizeof(literal) <= sizeof(triple*)*INDEX_TABLES);
9812 
9813   PL_register_foreign("rdf_version",    1, rdf_version,     0);
9814   PL_register_foreign("rdf_assert",	3, rdf_assert3,	    0);
9815   PL_register_foreign("rdf_assert",	4, rdf_assert4,	    0);
9816   PL_register_foreign("rdf_update",	4, rdf_update,      0);
9817   PL_register_foreign("rdf_update",	5, rdf_update5,     0);
9818   PL_register_foreign("rdf_retractall",	3, rdf_retractall3, 0);
9819   PL_register_foreign("rdf_retractall",	4, rdf_retractall4, 0);
9820   PL_register_foreign("rdf",		3, rdf3,	    NDET);
9821   PL_register_foreign("rdf",		4, rdf4,	    NDET);
9822   PL_register_foreign("rdf_has",	4, rdf_has4,	    NDET);
9823   PL_register_foreign("rdf_has",	3, rdf_has3,	    NDET);
9824   PL_register_foreign("rdf_gc_",	0, rdf_gc,	    0);
9825   PL_register_foreign("rdf_add_gc_time",1, rdf_add_gc_time, 0);
9826   PL_register_foreign("rdf_gc_info_",   1, rdf_gc_info,	    0);
9827   PL_register_foreign("rdf_statistics_",1, rdf_statistics,  NDET);
9828   PL_register_foreign("rdf_set",        1, rdf_set,         0);
9829   PL_register_foreign("rdf_update_duplicates",
9830 					0, rdf_update_duplicates, 0);
9831   PL_register_foreign("rdf_warm_indexes",
9832 					1, rdf_warm_indexes,0);
9833   PL_register_foreign("rdf_generation", 1, rdf_generation,  0);
9834   PL_register_foreign("rdf_snapshot",   1, rdf_snapshot,    0);
9835   PL_register_foreign("rdf_delete_snapshot", 1, rdf_delete_snapshot, 0);
9836   PL_register_foreign("rdf_match_label",3, match_label,     0);
9837   PL_register_foreign("rdf_save_db_",   3, rdf_save_db,     0);
9838   PL_register_foreign("rdf_load_db_",   3, rdf_load_db,     0);
9839   PL_register_foreign("rdf_reachable",  3, rdf_reachable3,  NDET);
9840   PL_register_foreign("rdf_reachable",  5, rdf_reachable5,  NDET);
9841   PL_register_foreign("rdf_reset_db_",  0, rdf_reset_db,    0);
9842   PL_register_foreign("rdf_set_predicate",
9843 					2, rdf_set_predicate, 0);
9844   PL_register_foreign("rdf_predicate_property_",
9845 					2, rdf_predicate_property, NDET);
9846   PL_register_foreign("rdf_current_predicate",
9847 					1, rdf_current_predicate, NDET);
9848   PL_register_foreign("rdf_current_literal",
9849 					1, rdf_current_literal, NDET);
9850   PL_register_foreign("rdf_graph_",     2, rdf_graph,       NDET);
9851   PL_register_foreign("rdf_create_graph",  1, rdf_create_graph, 0);
9852   PL_register_foreign("rdf_destroy_graph", 1, rdf_destroy_graph, 0);
9853   PL_register_foreign("rdf_set_graph_source", 3, rdf_set_graph_source, 0);
9854   PL_register_foreign("rdf_graph_source_", 3, rdf_graph_source, 0);
9855   PL_register_foreign("rdf_estimate_complexity",
9856 					4, rdf_estimate_complexity, 0);
9857   PL_register_foreign("rdf_transaction", 3, rdf_transaction, META);
9858   PL_register_foreign("rdf_active_transactions_",
9859 					1, rdf_active_transactions, 0);
9860   PL_register_foreign("rdf_monitor_",   2, rdf_monitor,     META);
9861   PL_register_foreign("rdf_empty_prefix_cache",
9862 					0, pl_empty_prefix_table, 0);
9863   PL_register_foreign("rdf_is_bnode",   1, rdf_is_bnode,    0);
9864 #ifdef WITH_MD5
9865   PL_register_foreign("rdf_md5",	2, rdf_md5,	    0);
9866   PL_register_foreign("rdf_graph_modified_", 3, rdf_graph_modified_, 0);
9867   PL_register_foreign("rdf_graph_clear_modified_",
9868 				        1, rdf_graph_clear_modified_, 0);
9869   PL_register_foreign("rdf_atom_md5",	3, rdf_atom_md5,    0);
9870 #endif
9871 
9872 #ifdef O_DEBUG
9873   PL_register_foreign("rdf_debug",      1, rdf_debug,       0);
9874   PL_register_foreign("rdf_print_predicate_cloud", 2,
9875 		      rdf_print_predicate_cloud, 0);
9876   PL_register_foreign("rdf_checks_literal_references", 1,
9877 		      rdf_checks_literal_references, 0);
9878 #endif
9879 
9880   PL_register_foreign("lang_matches", 2, lang_matches, 0);
9881   PL_register_foreign("rdf_compare",  3, rdf_compare,  0);
9882 
9883   install_atom_map();
9884 }
9885