1/*  Part of SWI-Prolog
2
3    Author:        Jan Wielemaker
4    E-mail:        J.Wielemaker@vu.nl
5    WWW:           http://www.swi-prolog.org
6    Copyright (c)  2003-2020, University of Amsterdam
7                              VU University Amsterdam
8                              CWI, Amsterdam
9    All rights reserved.
10
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions
13    are met:
14
15    1. Redistributions of source code must retain the above copyright
16       notice, this list of conditions and the following disclaimer.
17
18    2. Redistributions in binary form must reproduce the above copyright
19       notice, this list of conditions and the following disclaimer in
20       the documentation and/or other materials provided with the
21       distribution.
22
23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35*/
36
37:- module(rdf_db,
38          [ rdf_version/1,              % -Version
39
40            rdf/3,                      % ?Subject, ?Predicate, ?Object
41            rdf/4,                      % ?Subject, ?Predicate, ?Object, ?DB
42            rdf_has/3,                  % ?Subject, +Pred, ?Obj
43            rdf_has/4,                  % ?Subject, +Pred, ?Obj, -RealPred
44            rdf_reachable/3,            % ?Subject, +Pred, ?Object
45            rdf_reachable/5,            % ?Subject, +Pred, ?Object, +MaxD, ?D
46            rdf_resource/1,             % ?Resource
47            rdf_subject/1,              % ?Subject
48
49            rdf_member_property/2,      % ?Property, ?Index
50
51            rdf_assert/3,               % +Subject, +Predicate, +Object
52            rdf_assert/4,               % +Subject, +Predicate, +Object, +DB
53            rdf_retractall/3,           % ?Subject, ?Predicate, ?Object
54            rdf_retractall/4,           % ?Subject, ?Predicate, ?Object, +DB
55            rdf_update/4,               % +Subject, +Predicate, +Object, +Act
56            rdf_update/5,               % +Subject, +Predicate, +Object, +Src, +Act
57            rdf_set_predicate/2,        % +Predicate, +Property
58            rdf_predicate_property/2,   % +Predicate, ?Property
59            rdf_current_predicate/1,    % -Predicate
60            rdf_current_literal/1,      % -Literal
61            rdf_transaction/1,          % :Goal
62            rdf_transaction/2,          % :Goal, +Id
63            rdf_transaction/3,          % :Goal, +Id, +Options
64            rdf_active_transaction/1,   % ?Id
65
66            rdf_monitor/2,              % :Goal, +Options
67
68            rdf_save_db/1,              % +File
69            rdf_save_db/2,              % +File, +DB
70            rdf_load_db/1,              % +File
71            rdf_reset_db/0,
72
73            rdf_node/1,                 % -Id
74            rdf_bnode/1,                % -Id
75            rdf_is_bnode/1,             % +Id
76
77            rdf_is_resource/1,          % +Term
78            rdf_is_literal/1,           % +Term
79            rdf_literal_value/2,        % +Term, -Value
80
81            rdf_load/1,                 % +File
82            rdf_load/2,                 % +File, +Options
83            rdf_save/1,                 % +File
84            rdf_save/2,                 % +File, +Options
85            rdf_unload/1,               % +File
86            rdf_unload_graph/1,         % +Graph
87
88            rdf_md5/2,                  % +DB, -MD5
89            rdf_atom_md5/3,             % +Text, +Times, -MD5
90
91            rdf_create_graph/1,         % ?Graph
92            rdf_graph_property/2,       % ?Graph, ?Property
93            rdf_set_graph/2,            % +Graph, +Property
94            rdf_graph/1,                % ?Graph
95            rdf_source/1,               % ?File
96            rdf_source/2,               % ?DB, ?SourceURL
97            rdf_make/0,                 % Reload modified databases
98            rdf_gc/0,                   % Garbage collection
99
100            rdf_source_location/2,      % +Subject, -Source
101            rdf_statistics/1,           % -Key
102            rdf_set/1,                  % +Term
103            rdf_generation/1,           % -Generation
104            rdf_snapshot/1,             % -Snapshot
105            rdf_delete_snapshot/1,      % +Snapshot
106            rdf_current_snapshot/1,     % +Snapshot
107            rdf_estimate_complexity/4,  % +S,+P,+O,-Count
108
109            rdf_save_subject/3,         % +Stream, +Subject, +DB
110            rdf_save_header/2,          % +Out, +Options
111            rdf_save_footer/1,          % +Out
112
113            rdf_equal/2,                % ?Resource, ?Resource
114            lang_equal/2,               % +Lang1, +Lang2
115            lang_matches/2,             % +Lang, +Pattern
116
117            rdf_prefix/2,               % :Alias, +URI
118            rdf_current_prefix/2,       % :Alias, ?URI
119            rdf_register_prefix/2,      % +Alias, +URI
120            rdf_register_prefix/3,      % +Alias, +URI, +Options
121            rdf_unregister_prefix/1,    % +Alias
122            rdf_current_ns/2,           % :Alias, ?URI
123            rdf_register_ns/2,          % +Alias, +URI
124            rdf_register_ns/3,          % +Alias, +URI, +Options
125            rdf_global_id/2,            % ?NS:Name, :Global
126            rdf_global_object/2,        % +Object, :NSExpandedObject
127            rdf_global_term/2,          % +Term, :WithExpandedNS
128
129            rdf_compare/3,              % -Dif, +Object1, +Object2
130            rdf_match_label/3,          % +How, +String, +Label
131            rdf_split_url/3,            % ?Base, ?Local, ?URL
132            rdf_url_namespace/2,        % +URL, ?Base
133
134            rdf_warm_indexes/0,
135            rdf_warm_indexes/1,         % +Indexed
136            rdf_update_duplicates/0,
137
138            rdf_debug/1,                % Set verbosity
139
140            rdf_new_literal_map/1,      % -Handle
141            rdf_destroy_literal_map/1,  % +Handle
142            rdf_reset_literal_map/1,    % +Handle
143            rdf_insert_literal_map/3,   % +Handle, +Key, +Literal
144            rdf_insert_literal_map/4,   % +Handle, +Key, +Literal, -NewKeys
145            rdf_delete_literal_map/3,   % +Handle, +Key, +Literal
146            rdf_delete_literal_map/2,   % +Handle, +Key
147            rdf_find_literal_map/3,     % +Handle, +KeyList, -Literals
148            rdf_keys_in_literal_map/3,  % +Handle, +Spec, -Keys
149            rdf_statistics_literal_map/2, % +Handle, +Name(-Arg...)
150
151            rdf_graph_prefixes/2,       % ?Graph, -Prefixes
152            rdf_graph_prefixes/3,       % ?Graph, -Prefixes, :Filter
153
154            (rdf_meta)/1,               % +Heads
155            op(1150, fx, (rdf_meta))
156          ]).
157:- use_module(library(semweb/rdf_prefixes),
158              [ (rdf_meta)/1,
159                register_file_prefixes/1,
160                rdf_global_id/2,
161                rdf_register_ns/2,
162                                        % re-exported predicates
163                rdf_global_object/2,
164                rdf_current_ns/2,
165                rdf_prefix/2,
166                rdf_global_term/2,
167                rdf_register_ns/3,
168                rdf_register_prefix/3,
169                rdf_register_prefix/2,
170                rdf_current_prefix/2,
171                rdf_unregister_prefix/1
172              ]).
173
174:- autoload(library(apply),[maplist/2,maplist/3]).
175:- autoload(library(debug),[debug/3,assertion/1]).
176:- autoload(library(error),[must_be/2,existence_error/2]).
177:- autoload(library(gensym),[gensym/2,reset_gensym/1]).
178:- autoload(library(lists),
179	    [member/2,flatten/2,list_to_set/2,append/3,select/3]).
180:- autoload(library(memfile),
181	    [atom_to_memory_file/2,open_memory_file/4]).
182:- autoload(library(option),
183	    [option/2,option/3,merge_options/3,meta_options/3]).
184:- autoload(library(rdf),[process_rdf/3]).
185:- autoload(library(sgml),
186	    [ load_structure/3,
187	      xml_quote_attribute/3,
188	      xml_name/1,
189	      xml_quote_cdata/3,
190	      xml_is_dom/1,
191	      iri_xml_namespace/3,
192	      iri_xml_namespace/2
193	    ]).
194:- autoload(library(sgml_write),[xml_write/3]).
195:- autoload(library(uri),
196	    [ uri_file_name/2,
197	      uri_is_global/1,
198	      uri_normalized/2,
199	      uri_components/2,
200	      uri_data/3,
201	      uri_data/4
202	    ]).
203:- autoload(library(xsdp_types),[xsdp_numeric_uri/2]).
204:- autoload(library(semweb/rdf_cache),[rdf_cache_file/3]).
205
206:- if(exists_source(library(thread))).
207:- autoload(library(thread), [concurrent/3]).
208:- endif.
209
210:- use_foreign_library(foreign(rdf_db)).
211:- public rdf_print_predicate_cloud/2.  % print matrix of reachable predicates
212
213:- meta_predicate
214    rdf_transaction(0),
215    rdf_transaction(0, +),
216    rdf_transaction(0, +, +),
217    rdf_monitor(1, +),
218    rdf_save(+, :),
219    rdf_load(+, :).
220
221:- predicate_options(rdf_graph_prefixes/3, 3,
222                     [expand(callable), filter(callable), min_count(nonneg)]).
223:- predicate_options(rdf_load/2, 2,
224                     [ base_uri(atom),
225                       blank_nodes(oneof([share,noshare])),
226                       cache(boolean),
227                       concurrent(positive_integer),
228                       db(atom),
229                       format(oneof([xml,triples,turtle,trig,nquads,ntriples])),
230                       graph(atom),
231                       multifile(boolean),
232                       if(oneof([true,changed,not_loaded])),
233                       modified(-float),
234                       prefixes(-list),
235                       silent(boolean),
236                       register_namespaces(boolean)
237                     ]).
238:- predicate_options(rdf_save/2, 2,
239                     [ graph(atom),
240                       db(atom),
241                       anon(boolean),
242                       base_uri(atom),
243                       write_xml_base(boolean),
244                       convert_typed_literal(callable),
245                       encoding(encoding),
246                       document_language(atom),
247                       namespaces(list(atom)),
248                       xml_attributes(boolean),
249                       inline(boolean)
250                     ]).
251:- predicate_options(rdf_save_header/2, 2,
252                     [ graph(atom),
253                       db(atom),
254                       namespaces(list(atom))
255                     ]).
256:- predicate_options(rdf_save_subject/3, 3,
257                     [ graph(atom),
258                       base_uri(atom),
259                       convert_typed_literal(callable),
260                       document_language(atom)
261                     ]).
262:- predicate_options(rdf_transaction/3, 3,
263                     [ snapshot(any)
264                     ]).
265
266:- discontiguous
267    term_expansion/2.
268
269/** <module> Core RDF database
270
271The file library(semweb/rdf_db) provides the core  of the SWI-Prolog RDF
272store.
273
274@deprecated     New applications should use library(semweb/rdf11), which
275                provides a much more intuitive API to the RDF store, notably
276                for handling literals.  The library(semweb/rdf11) runs
277                currently on top of this library and both can run side-by-side
278                in the same application.  Terms retrieved from the database
279                however have a different shape and can not be exchanged without
280                precautions.
281*/
282
283		 /*******************************
284		 *            PREFIXES		*
285		 *******************************/
286
287% the ns/2 predicate is historically defined  in this module. We'll keep
288% that for compatibility reasons.
289
290:- multifile ns/2.
291:- dynamic   ns/2.                      % ID, URL
292
293:- multifile
294    rdf_prefixes:rdf_empty_prefix_cache/2.
295
296rdf_prefixes:rdf_empty_prefix_cache(_Prefix, _IRI) :-
297    rdf_empty_prefix_cache.
298
299:- rdf_meta
300    rdf(r,r,o),
301    rdf_has(r,r,o,r),
302    rdf_has(r,r,o),
303    rdf_assert(r,r,o),
304    rdf_retractall(r,r,o),
305    rdf(r,r,o,?),
306    rdf_assert(r,r,o,+),
307    rdf_retractall(r,r,o,?),
308    rdf_reachable(r,r,o),
309    rdf_reachable(r,r,o,+,?),
310    rdf_update(r,r,o,t),
311    rdf_update(r,r,o,+,t),
312    rdf_equal(o,o),
313    rdf_source_location(r,-),
314    rdf_resource(r),
315    rdf_subject(r),
316    rdf_create_graph(r),
317    rdf_graph(r),
318    rdf_graph_property(r,?),
319    rdf_set_graph(r,+),
320    rdf_unload_graph(r),
321    rdf_set_predicate(r, t),
322    rdf_predicate_property(r, -),
323    rdf_estimate_complexity(r,r,r,-),
324    rdf_print_predicate_cloud(r,+).
325
326%!  rdf_equal(?Resource1, ?Resource2)
327%
328%   Simple equality test to exploit goal-expansion.
329
330rdf_equal(Resource, Resource).
331
332%!  lang_equal(+Lang1, +Lang2) is semidet.
333%
334%   True if two RFC language specifiers denote the same language
335%
336%   @see lang_matches/2.
337
338lang_equal(Lang, Lang) :- !.
339lang_equal(Lang1, Lang2) :-
340    downcase_atom(Lang1, LangCannon),
341    downcase_atom(Lang2, LangCannon).
342
343%!  lang_matches(+Lang, +Pattern) is semidet.
344%
345%   True if Lang  matches  Pattern.   This  implements  XML language
346%   matching  conform  RFC  4647.   Both    Lang   and  Pattern  are
347%   dash-separated strings of  identifiers  or   (for  Pattern)  the
348%   wildcard *. Identifiers are  matched   case-insensitive  and a *
349%   matches any number of identifiers. A   short pattern is the same
350%   as *.
351
352
353                 /*******************************
354                 *     BASIC TRIPLE QUERIES     *
355                 *******************************/
356
357%!  rdf(?Subject, ?Predicate, ?Object) is nondet.
358%
359%   Elementary query for triples. Subject   and  Predicate are atoms
360%   representing the fully qualified URL of  the resource. Object is
361%   either an atom representing a resource  or literal(Value) if the
362%   object  is  a  literal  value.   If    a   value   of  the  form
363%   NameSpaceID:LocalName is provided it  is   expanded  to a ground
364%   atom  using  expand_goal/2.  This  implies   you  can  use  this
365%   construct in compiled code without paying a performance penalty.
366%   Literal values take one of the following forms:
367%
368%     * Atom
369%     If the value is a simple atom it is the textual representation
370%     of a string literal without explicit type or language
371%     qualifier.
372%
373%     * lang(LangID, Atom)
374%     Atom represents the text of a string literal qualified with
375%     the given language.
376%
377%     * type(TypeID, Value)
378%     Used for attributes qualified using the =|rdf:datatype|=
379%     TypeID. The Value is either the textual representation or a
380%     natural Prolog representation. See the option
381%     convert_typed_literal(:Convertor) of the parser. The storage
382%     layer provides efficient handling of atoms, integers (64-bit)
383%     and floats (native C-doubles). All other data is represented
384%     as a Prolog record.
385%
386%   For literal querying purposes, Object can be of the form
387%   literal(+Query, -Value), where Query is one of the terms below.
388%   If the Query takes a literal argument and the value has a
389%   numeric type numerical comparison is performed.
390%
391%     * plain(+Text)
392%     Perform exact match and demand the language or type qualifiers
393%     to match. This query is fully indexed.
394%
395%     * icase(+Text)
396%     Perform a full but case-insensitive match. This query is
397%     fully indexed.
398%
399%     * exact(+Text)
400%     Same as icase(Text).  Backward compatibility.
401%
402%     * substring(+Text)
403%     Match any literal that contains Text as a case-insensitive
404%     substring. The query is not indexed on Object.
405%
406%     * word(+Text)
407%     Match any literal that contains Text delimited by a non
408%     alpha-numeric character, the start or end of the string. The
409%     query is not indexed on Object.
410%
411%     * prefix(+Text)
412%     Match any literal that starts with Text. This call is intended
413%     for completion. The query is indexed using the skip list of
414%     literals.
415%
416%     * ge(+Literal)
417%     Match any literal that is equal or larger than Literal in the
418%     ordered set of literals.
419%
420%     * gt(+Literal)
421%     Match any literal that is larger than Literal in the ordered set
422%     of literals.
423%
424%     * eq(+Literal)
425%     Match any literal that is equal to Literal in the ordered set
426%     of literals.
427%
428%     * le(+Literal)
429%     Match any literal that is equal or smaller than Literal in the
430%     ordered set of literals.
431%
432%     * lt(+Literal)
433%     Match any literal that is smaller than Literal in the ordered set
434%     of literals.
435%
436%     * between(+Literal1, +Literal2)
437%     Match any literal that is between Literal1 and Literal2 in the
438%     ordered set of literals. This may include both Literal1 and
439%     Literal2.
440%
441%     * like(+Pattern)
442%     Match any literal that matches Pattern case insensitively,
443%     where the `*' character in Pattern matches zero or more
444%     characters.
445%
446%   Backtracking never returns duplicate triples.  Duplicates can be
447%   retrieved using rdf/4. The predicate   rdf/3 raises a type-error
448%   if called with improper arguments.  If   rdf/3  is called with a
449%   term  literal(_)  as  Subject  or   Predicate  object  it  fails
450%   silently.  This  allows   for   graph    matching   goals   like
451%   rdf(S,P,O),rdf(O,P2,O2) to proceed without errors.
452
453%!  rdf(?Subject, ?Predicate, ?Object, ?Source) is nondet.
454%
455%   As rdf/3 but in addition query  the   graph  to which the triple
456%   belongs. Unlike rdf/3, this predicate does not remove duplicates
457%   from the result set.
458%
459%   @param Source is a term Graph:Line.  If Source is instatiated,
460%   passing an atom is the same as passing Atom:_.
461
462
463%!  rdf_has(?Subject, +Predicate, ?Object) is nondet.
464%
465%   Succeeds if the triple rdf(Subject,   Predicate, Object) is true
466%   exploiting the rdfs:subPropertyOf predicate as   well as inverse
467%   predicates   declared   using   rdf_set_predicate/2   with   the
468%   =inverse_of= property.
469
470%!  rdf_has(?Subject, +Predicate, ?Object, -RealPredicate) is nondet.
471%
472%   Same as rdf_has/3, but RealPredicate is   unified  to the actual
473%   predicate that makes this relation   true. RealPredicate must be
474%   Predicate or an rdfs:subPropertyOf  Predicate.   If  an  inverse
475%   match is found, RealPredicate is the term inverse_of(Pred).
476
477%!  rdf_reachable(?Subject, +Predicate, ?Object) is nondet.
478%
479%   Is true if Object can  be   reached  from  Subject following the
480%   transitive predicate Predicate or a  sub-property thereof, while
481%   repecting the symetric(true) or inverse_of(P2) properties.
482%
483%   If used with either Subject or  Object unbound, it first returns
484%   the origin, followed by  the  reachable  nodes  in breadth-first
485%   search-order. The implementation internally   looks one solution
486%   ahead and succeeds deterministically on  the last solution. This
487%   predicate never generates the same  node   twice  and  is robust
488%   against cycles in the transitive relation.
489%
490%   With all arguments instantiated,   it succeeds deterministically
491%   if a path can be found from  Subject to Object. Searching starts
492%   at Subject, assuming the branching factor   is normally lower. A
493%   call  with  both  Subject   and    Object   unbound   raises  an
494%   instantiation  error.  The  following    example  generates  all
495%   subclasses of rdfs:Resource:
496%
497%     ==
498%     ?- rdf_reachable(X, rdfs:subClassOf, rdfs:'Resource').
499%     X = 'http://www.w3.org/2000/01/rdf-schema#Resource' ;
500%     X = 'http://www.w3.org/2000/01/rdf-schema#Class' ;
501%     X = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property' ;
502%     ...
503%     ==
504
505
506%!  rdf_reachable(?Subject, +Predicate, ?Object, +MaxD, -D) is nondet.
507%
508%   Same as rdf_reachable/3, but in addition, MaxD limits the number
509%   of edges expanded and D is   unified with the `distance' between
510%   Subject and Object. Distance 0 means  Subject and Object are the
511%   same resource. MaxD can be the  constant =infinite= to impose no
512%   distance-limit.
513
514%!  rdf_subject(?Resource) is nondet.
515%
516%   True if Resource appears as a   subject. This query respects the
517%   visibility rules implied by the logical update view.
518%
519%   @see rdf_resource/1.
520
521rdf_subject(Resource) :-
522    rdf_resource(Resource),
523    ( rdf(Resource, _, _) -> true ).
524
525%!  rdf_resource(?Resource) is nondet.
526%
527%   True when Resource is a resource used as a subject or object in
528%   a triple.
529%
530%   This predicate is primarily intended  as   a  way to process all
531%   resources without processing resources twice.   The user must be
532%   aware that some of the returned resources  may not appear in any
533%   _visible_ triple.
534
535
536                 /*******************************
537                 *     TRIPLE MODIFICATIONS     *
538                 *******************************/
539
540%!  rdf_assert(+Subject, +Predicate, +Object) is det.
541%
542%   Assert a new triple into  the   database.  This is equivalent to
543%   rdf_assert/4 using Graph  =user=.  Subject   and  Predicate  are
544%   resources. Object is either a resource or a term literal(Value).
545%   See rdf/3 for an explanation  of   Value  for typed and language
546%   qualified literals. All arguments  are   subject  to  name-space
547%   expansion. Complete duplicates (including  the   same  graph and
548%   `line' and with a compatible `lifespan')   are  not added to the
549%   database.
550
551%!  rdf_assert(+Subject, +Predicate, +Object, +Graph) is det.
552%
553%   As rdf_assert/3, adding the  predicate   to  the indicated named
554%   graph.
555%
556%   @param Graph is either the name of a   graph (an atom) or a term
557%   Graph:Line, where Line is an integer that denotes a line number.
558
559%!  rdf_retractall(?Subject, ?Predicate, ?Object) is det.
560%
561%   Remove   all   matching   triples   from    the   database.   As
562%   rdf_retractall/4 using an unbound graph.
563
564%!  rdf_retractall(?Subject, ?Predicate, ?Object, ?Graph) is det.
565%
566%   As rdf_retractall/3, also matching Graph.   This  is particulary
567%   useful to remove all triples coming from a loaded file. See also
568%   rdf_unload/1.
569
570%!  rdf_update(+Subject, +Predicate, +Object, ++Action) is det.
571%!  rdf_update(+Subject, +Predicate, +Object, +Graph, ++Action) is det
572%
573%   Replaces one of the three  (four)   fields  on  the matching triples
574%   depending on Action:
575%
576%     * subject(Resource)
577%     Changes the first field of the triple.
578%     * predicate(Resource)
579%     Changes the second field of the triple.
580%     * object(Object)
581%     Changes the last field of the triple to the given resource or
582%     literal(Value).
583%     * graph(Graph)
584%     Moves the triple from its current named graph to Graph.
585%     This only works with rdf_update/5 and throws an error when
586%     used with rdf_update/4.
587
588
589                 /*******************************
590                 *          COLLECTIONS         *
591                 *******************************/
592
593%!  rdf_member_property(?Prop, ?Index)
594%
595%   Deal with the rdf:_1, ... properties.
596
597term_expansion(member_prefix(x),
598               member_prefix(Prefix)) :-
599    rdf_db:ns(rdf, NS),
600    atom_concat(NS, '_', Prefix).
601member_prefix(x).
602
603rdf_member_property(P, N) :-
604    integer(N),
605    !,
606    member_prefix(Prefix),
607    atom_concat(Prefix, N, P).
608rdf_member_property(P, N) :-
609    member_prefix(Prefix),
610    atom_concat(Prefix, Sub, P),
611    atom_number(Sub, N).
612
613
614                 /*******************************
615                 *      ANONYMOUS SUBJECTS      *
616                 *******************************/
617
618%!  rdf_node(-Id)
619%
620%   Generate a unique blank node identifier for a subject.
621%
622%   @deprecated     New code should use rdf_bnode/1.
623
624rdf_node(Resource) :-
625    rdf_bnode(Resource).
626
627%!  rdf_bnode(-Id)
628%
629%   Generate a unique anonymous identifier for a subject.
630
631rdf_bnode(Value) :-
632    repeat,
633    gensym('_:genid', Value),
634    \+ rdf(Value, _, _),
635    \+ rdf(_, _, Value),
636    \+ rdf(_, Value, _),
637    !.
638
639
640
641                 /*******************************
642                 *             TYPES            *
643                 *******************************/
644
645%!  rdf_is_bnode(+Id)
646%
647%   Tests if a resource is  a  blank   node  (i.e.  is  an anonymous
648%   resource). A blank node is represented   as  an atom that starts
649%   with =|_:|=. For backward compatibility   reason, =|__|= is also
650%   considered to be a blank node.
651%
652%   @see rdf_bnode/1.
653
654%!  rdf_is_resource(@Term) is semidet.
655%
656%   True if Term is an RDF  resource.   Note  that  this is merely a
657%   type-test; it does not mean  this   resource  is involved in any
658%   triple.  Blank nodes are also considered resources.
659%
660%   @see rdf_is_bnode/1
661
662rdf_is_resource(Term) :-
663    atom(Term).
664
665%!  rdf_is_literal(@Term) is semidet.
666%
667%   True if Term is an RDF literal object. Currently only checks for
668%   groundness and the literal functor.
669
670rdf_is_literal(literal(Value)) :-
671    ground(Value).
672
673                 /*******************************
674                 *             LITERALS         *
675                 *******************************/
676
677%!  rdf_current_literal(-Literal) is nondet.
678%
679%   True when Literal is a currently  known literal. Enumerates each
680%   unique literal exactly once. Note that   it is possible that the
681%   literal only appears in already deleted triples. Deleted triples
682%   may be locked due to active   queries, transactions or snapshots
683%   or may not yet be reclaimed by the garbage collector.
684
685
686%!  rdf_literal_value(+Literal, -Value) is semidet.
687%
688%   True when value is  the   appropriate  Prolog  representation of
689%   Literal in the RDF _|value space|_.  Current mapping:
690%
691%     | Plain literals              | Atom                    |
692%     | Language tagged literal     | Atom holding plain text |
693%     | xsd:string                  | Atom                    |
694%     | rdf:XMLLiteral              | XML DOM Tree            |
695%     | Numeric XSD type            | Number                  |
696%
697%   @tbd    Well, this is the long-term idea.
698%   @tbd    Add mode (-,+)
699
700:- rdf_meta
701    rdf_literal_value(o, -),
702    typed_value(r, +, -),
703    numeric_value(r, +, -).
704
705rdf_literal_value(literal(String), Value) :-
706    atom(String),
707    !,
708    Value = String.
709rdf_literal_value(literal(lang(_Lang, String)), String).
710rdf_literal_value(literal(type(Type, String)), Value) :-
711    typed_value(Type, String, Value).
712
713typed_value(Numeric, String, Value) :-
714    xsdp_numeric_uri(Numeric, NumType),
715    !,
716    numeric_value(NumType, String, Value).
717typed_value(xsd:string, String, String).
718typed_value(rdf:'XMLLiteral', Value, DOM) :-
719    (   atom(Value)
720    ->  setup_call_cleanup(
721            ( atom_to_memory_file(Value, MF),
722              open_memory_file(MF, read, In, [free_on_close(true)])
723            ),
724            load_structure(stream(In), DOM, [dialect(xml)]),
725            close(In))
726    ;   DOM = Value
727    ).
728
729numeric_value(xsd:integer, String, Value) :-
730    atom_number(String, Value),
731    integer(Value).
732numeric_value(xsd:float, String, Value) :-
733    atom_number(String, Number),
734    Value is float(Number).
735numeric_value(xsd:double, String, Value) :-
736    atom_number(String, Number),
737    Value is float(Number).
738numeric_value(xsd:decimal, String, Value) :-
739    atom_number(String, Value).
740
741
742                 /*******************************
743                 *            SOURCE            *
744                 *******************************/
745
746%!  rdf_source_location(+Subject, -Location) is nondet.
747%
748%   True when triples for Subject are loaded from Location.
749%
750%   @param Location is a term File:Line.
751
752rdf_source_location(Subject, Source) :-
753    findall(Source, rdf(Subject, _, _, Source), Sources),
754    sort(Sources, Unique),
755    member(Source, Unique).
756
757
758                 /*******************************
759                 *       GARBAGE COLLECT        *
760                 *******************************/
761
762%!  rdf_create_gc_thread
763%
764%   Create the garbage collection thread.
765
766:- public
767    rdf_create_gc_thread/0.
768
769rdf_create_gc_thread :-
770    thread_create(rdf_gc_loop, _,
771                  [ alias('__rdf_GC')
772                  ]).
773
774%!  rdf_gc_loop
775%
776%   Take care of running the RDF garbage collection.  This predicate
777%   is called from a thread started by creating the RDF DB.
778
779rdf_gc_loop :-
780    catch(rdf_gc_loop(0), E, recover_gc(E)).
781
782recover_gc('$aborted') :-
783    !,
784    thread_self(Me),
785    thread_detach(Me).
786recover_gc(Error) :-
787    print_message(error, Error),
788    rdf_gc_loop.
789
790rdf_gc_loop(CPU) :-
791    repeat,
792    (   consider_gc(CPU)
793    ->  rdf_gc(CPU1),
794        sleep(CPU1)
795    ;   sleep(0.1)
796    ),
797    fail.
798
799%!  rdf_gc(-CPU) is det.
800%
801%   Run RDF GC one time. CPU is  the   amount  of CPU time spent. We
802%   update this in Prolog because portable access to thread specific
803%   CPU is really hard in C.
804
805rdf_gc(CPU) :-
806    statistics(cputime, CPU0),
807    (   rdf_gc_
808    ->  statistics(cputime, CPU1),
809        CPU is CPU1-CPU0,
810        rdf_add_gc_time(CPU)
811    ;   CPU = 0.0
812    ).
813
814%!  rdf_gc is det.
815%
816%   Run the RDF-DB garbage collector until no   garbage  is left and all
817%   tables are fully optimized. Under normal operation a separate thread
818%   with identifier =|__rdf_GC|= performs garbage  collection as long as
819%   it is considered `useful'.
820%
821%   Using rdf_gc/0 should  only  be  needed   to  ensure  a  fully clean
822%   database for analysis purposes such as leak detection.
823
824rdf_gc :-
825    has_garbage,
826    !,
827    rdf_gc(_),
828    rdf_gc.
829rdf_gc.
830
831%!  has_garbage is semidet.
832%
833%   True if there is something to gain using GC.
834
835has_garbage :-
836    rdf_gc_info_(Info),
837    has_garbage(Info),
838    !.
839
840has_garbage(Info) :- arg(2, Info, Garbage),     Garbage > 0.
841has_garbage(Info) :- arg(3, Info, Reindexed),   Reindexed > 0.
842has_garbage(Info) :- arg(4, Info, Optimizable), Optimizable > 0.
843
844%!  consider_gc(+CPU) is semidet.
845%
846%   @param CPU is the amount of CPU time spent in the most recent
847%   GC.
848
849consider_gc(_CPU) :-
850    (   rdf_gc_info_(gc_info(Triples,       % Total #triples in DB
851                             Garbage,       % Garbage triples in DB
852                             Reindexed,     % Reindexed & not reclaimed
853                             Optimizable,   % Non-optimized tables
854                             _KeepGen,      % Oldest active generation
855                             _LastGCGen,    % Oldest active gen at last GC
856                             _ReindexGen,
857                             _LastGCReindexGen))
858    ->  (   (Garbage+Reindexed) * 5 > Triples
859        ;   Optimizable > 4
860        )
861    ;   print_message(error, rdf(invalid_gc_info)),
862        sleep(10)
863    ),
864    !.
865
866
867                 /*******************************
868                 *           STATISTICS         *
869                 *******************************/
870
871%!  rdf_statistics(?KeyValue) is nondet.
872%
873%   Obtain statistics on the RDF database.  Defined statistics are:
874%
875%     * graphs(-Count)
876%     Number of named graphs.
877%
878%     * triples(-Count)
879%     Total number of triples in the database.  This is the number
880%     of asserted triples minus the number of retracted ones.  The
881%     number of _visible_ triples in a particular context may be
882%     different due to visibility rules defined by the logical
883%     update view and transaction isolation.
884%
885%     * resources(-Count)
886%     Number of resources that appear as subject or object in a
887%     triple.  See rdf_resource/1.
888%
889%     * properties(-Count)
890%     Number of current predicates.  See rdf_current_predicate/1.
891%
892%     * literals(-Count)
893%     Number of current literals.  See rdf_current_literal/1.
894%
895%     * gc(GCCount, ReclaimedTriples, ReindexedTriples, Time)
896%     Information about the garbage collector.
897%
898%     * searched_nodes(-Count)
899%     Number of nodes expanded by rdf_reachable/3 and
900%     rdf_reachable/5.
901%
902%     * lookup(rdf(S,P,O,G), Count)
903%     Number of queries that have been performed for this particular
904%     instantiation pattern.  Each of S,P,O,G is either + or -.
905%     Fails in case the number of performed queries is zero.
906%
907%     * hash_quality(rdf(S,P,O,G), Buckets, Quality, PendingResize)
908%     Statistics on the index for this pattern.  Indices are created
909%     lazily on the first relevant query.
910%
911%     * triples_by_graph(Graph, Count)
912%     This statistics is produced for each named graph. See
913%     =triples= for the interpretation of this value.
914
915rdf_statistics(graphs(Count)) :-
916    rdf_statistics_(graphs(Count)).
917rdf_statistics(triples(Count)) :-
918    rdf_statistics_(triples(Count)).
919rdf_statistics(duplicates(Count)) :-
920    rdf_statistics_(duplicates(Count)).
921rdf_statistics(lingering(Count)) :-
922    rdf_statistics_(lingering(Count)).
923rdf_statistics(resources(Count)) :-
924    rdf_statistics_(resources(Count)).
925rdf_statistics(properties(Count)) :-
926    rdf_statistics_(predicates(Count)).
927rdf_statistics(literals(Count)) :-
928    rdf_statistics_(literals(Count)).
929rdf_statistics(gc(Count, Reclaimed, Reindexed, Time)) :-
930    rdf_statistics_(gc(Count, Reclaimed, Reindexed, Time)).
931rdf_statistics(searched_nodes(Count)) :-
932    rdf_statistics_(searched_nodes(Count)).
933rdf_statistics(lookup(Index, Count)) :-
934    functor(Indexed, indexed, 16),
935    rdf_statistics_(Indexed),
936    index(Index, I),
937    Arg is I + 1,
938    arg(Arg, Indexed, Count),
939    Count \== 0.
940rdf_statistics(hash_quality(Index, Size, Quality,Optimize)) :-
941    rdf_statistics_(hash_quality(List)),
942    member(hash(Place,Size,Quality,Optimize), List),
943    index(Index, Place).
944rdf_statistics(triples_by_graph(Graph, Count)) :-
945    rdf_graph_(Graph, Count).
946
947index(rdf(-,-,-,-), 0).
948index(rdf(+,-,-,-), 1).
949index(rdf(-,+,-,-), 2).
950index(rdf(+,+,-,-), 3).
951index(rdf(-,-,+,-), 4).
952index(rdf(+,-,+,-), 5).
953index(rdf(-,+,+,-), 6).
954index(rdf(+,+,+,-), 7).
955
956index(rdf(-,-,-,+), 8).
957index(rdf(+,-,-,+), 9).
958index(rdf(-,+,-,+), 10).
959index(rdf(+,+,-,+), 11).
960index(rdf(-,-,+,+), 12).
961index(rdf(+,-,+,+), 13).
962index(rdf(-,+,+,+), 14).
963index(rdf(+,+,+,+), 15).
964
965
966                 /*******************************
967                 *           PREDICATES         *
968                 *******************************/
969
970%!  rdf_current_predicate(?Predicate) is nondet.
971%
972%   True when Predicate is a   currently known predicate. Predicates
973%   are created if a triples is created  that uses this predicate or
974%   a property of the predicate   is  set using rdf_set_predicate/2.
975%   The predicate may (no longer) have triples associated with it.
976%
977%   Note that resources that have  =|rdf:type|= =|rdf:Property|= are
978%   not automatically included in the  result-set of this predicate,
979%   while _all_ resources that appear as   the  second argument of a
980%   triple _are_ included.
981%
982%   @see rdf_predicate_property/2.
983
984rdf_current_predicate(P, DB) :-
985    rdf_current_predicate(P),
986    (   rdf(_,P,_,DB)
987    ->  true
988    ).
989
990%!  rdf_predicate_property(?Predicate, ?Property)
991%
992%   Query properties of  a  defined   predicate.  Currently  defined
993%   properties are given below.
994%
995%     * symmetric(Bool)
996%     True if the predicate is defined to be symetric. I.e., {A} P
997%     {B} implies {B} P {A}. Setting symmetric is equivalent to
998%     inverse_of(Self).
999%
1000%     * inverse_of(Inverse)
1001%     True if this predicate is the inverse of Inverse. This
1002%     property is used by rdf_has/3, rdf_has/4, rdf_reachable/3 and
1003%     rdf_reachable/5.
1004%
1005%     * transitive(Bool)
1006%     True if this predicate is transitive. This predicate is
1007%     currently not used. It might be used to make rdf_has/3 imply
1008%     rdf_reachable/3 for transitive predicates.
1009%
1010%     * triples(Triples)
1011%     Unify Triples with the number of existing triples using this
1012%     predicate as second argument. Reporting the number of triples
1013%     is intended to support query optimization.
1014%
1015%     * rdf_subject_branch_factor(-Float)
1016%     Unify Float with the average number of triples associated with
1017%     each unique value for the subject-side of this relation. If
1018%     there are no triples the value 0.0 is returned. This value is
1019%     cached with the predicate and recomputed only after
1020%     substantial changes to the triple set associated to this
1021%     relation. This property is intended for path optimalisation
1022%     when solving conjunctions of rdf/3 goals.
1023%
1024%     * rdf_object_branch_factor(-Float)
1025%     Unify Float with the average number of triples associated with
1026%     each unique value for the object-side of this relation. In
1027%     addition to the comments with the =rdf_subject_branch_factor=
1028%     property, uniqueness of the object value is computed from the
1029%     hash key rather than the actual values.
1030%
1031%     * rdfs_subject_branch_factor(-Float)
1032%     Same as =rdf_subject_branch_factor=, but also considering
1033%     triples of `subPropertyOf' this relation. See also rdf_has/3.
1034%
1035%     * rdfs_object_branch_factor(-Float)
1036%     Same as =rdf_object_branch_factor=, but also considering
1037%     triples of `subPropertyOf' this relation. See also rdf_has/3.
1038%
1039%   @see rdf_set_predicate/2.
1040
1041rdf_predicate_property(P, Prop) :-
1042    var(P),
1043    !,
1044    rdf_current_predicate(P),
1045    rdf_predicate_property_(P, Prop).
1046rdf_predicate_property(P, Prop) :-
1047    rdf_predicate_property_(P, Prop).
1048
1049%!  rdf_set_predicate(+Predicate, +Property) is det.
1050%
1051%   Define a property of  the   predicate.  This predicate currently
1052%   supports the following properties:
1053%
1054%       - symmetric(+Boolean)
1055%       Set/unset the predicate as being symmetric.  Using
1056%       symmetric(true) is the same as inverse_of(Predicate),
1057%       i.e., creating a predicate that is the inverse of
1058%       itself.
1059%       - transitive(+Boolean)
1060%       Sets the transitive property.
1061%       - inverse_of(+Predicate2)
1062%       Define Predicate as the inverse of Predicate2. An inverse
1063%       relation is deleted using inverse_of([]).
1064%
1065%   The `transitive` property is currently not used. The `symmetric`
1066%   and `inverse_of` properties are considered   by  rdf_has/3,4 and
1067%   rdf_reachable/3.
1068%
1069%   @tbd    Maintain these properties based on OWL triples.
1070
1071
1072                 /*******************************
1073                 *            SNAPSHOTS         *
1074                 *******************************/
1075
1076%!  rdf_snapshot(-Snapshot) is det.
1077%
1078%   Take a snapshot of the current state   of  the RDF store. Later,
1079%   goals may be executed in the  context   of  the database at this
1080%   moment using rdf_transaction/3 with  the   =snapshot=  option. A
1081%   snapshot created outside  a  transaction   exists  until  it  is
1082%   deleted. Snapshots taken inside a transaction   can only be used
1083%   inside this transaction.
1084
1085%!  rdf_delete_snapshot(+Snapshot) is det.
1086%
1087%   Delete a snapshot as obtained   from  rdf_snapshot/1. After this
1088%   call, resources used for maintaining the snapshot become subject
1089%   to garbage collection.
1090
1091%!  rdf_current_snapshot(?Term) is nondet.
1092%
1093%   True when Term is a currently known snapshot.
1094%
1095%   @bug    Enumeration of snapshots is slow.
1096
1097rdf_current_snapshot(Term) :-
1098    current_blob(Term, rdf_snapshot).
1099
1100
1101                 /*******************************
1102                 *          TRANSACTION         *
1103                 *******************************/
1104
1105%!  rdf_transaction(:Goal) is semidet.
1106%
1107%   Same as rdf_transaction(Goal, user, []).  See rdf_transaction/3.
1108
1109%!  rdf_transaction(:Goal, +Id) is semidet.
1110%
1111%   Same as rdf_transaction(Goal, Id, []).  See rdf_transaction/3.
1112
1113%!  rdf_transaction(:Goal, +Id, +Options) is semidet.
1114%
1115%   Run Goal in an RDF  transaction.   Compared to the ACID model,
1116%   RDF transactions have the following properties:
1117%
1118%     1. Modifications inside the transactions become all atomically
1119%        visible to the outside world if Goal succeeds or remain
1120%        invisible if Goal fails or throws an exception.  I.e.,
1121%        the _atomicy_ property is fully supported.
1122%     2. _Consistency_ is not guaranteed. Later versions may
1123%        implement consistency constraints that will be checked
1124%        serialized just before the actual commit of a transaction.
1125%     3. Concurrently executing transactions do not infuence each
1126%        other.  I.e., the _isolation_ property is fully supported.
1127%     4. _Durability_ can be activated by loading
1128%        library(semweb/rdf_persistency).
1129%
1130%   Processed options are:
1131%
1132%     * snapshot(+Snapshot)
1133%     Execute Goal using the state of the RDF store as stored in
1134%     Snapshot.  See rdf_snapshot/1.  Snapshot can also be the
1135%     atom =true=, which implies that an anonymous snapshot is
1136%     created at the current state of the store.  Modifications
1137%     due to executing Goal are only visible to Goal.
1138
1139rdf_transaction(Goal) :-
1140    rdf_transaction(Goal, user, []).
1141rdf_transaction(Goal, Id) :-
1142    rdf_transaction(Goal, Id, []).
1143
1144%!  rdf_active_transaction(?Id) is nondet.
1145%
1146%   True if Id is the identifier of  a transaction in the context of
1147%   which  this  call  is  executed.  If  Id  is  not  instantiated,
1148%   backtracking yields transaction identifiers   starting  with the
1149%   innermost nested transaction. Transaction   identifier terms are
1150%   not copied, need not be ground   and  can be instantiated during
1151%   the transaction.
1152
1153rdf_active_transaction(Id) :-
1154    rdf_active_transactions_(List),
1155    member(Id, List).
1156
1157%!  rdf_monitor(:Goal, +Options)
1158%
1159%   Call Goal if specified actions occur on the database.
1160
1161rdf_monitor(Goal, Options) :-
1162    monitor_mask(Options, 0xffff, Mask),
1163    rdf_monitor_(Goal, Mask).
1164
1165monitor_mask([], Mask, Mask).
1166monitor_mask([H|T], Mask0, Mask) :-
1167    update_mask(H, Mask0, Mask1),
1168    monitor_mask(T, Mask1, Mask).
1169
1170update_mask(-X, Mask0, Mask) :-
1171    !,
1172    monitor_mask(X, M),
1173    Mask is Mask0 /\ \M.
1174update_mask(+X, Mask0, Mask) :-
1175    !,
1176    monitor_mask(X, M),
1177    Mask is Mask0 \/ M.
1178update_mask(X, Mask0, Mask) :-
1179    monitor_mask(X, M),
1180    Mask is Mask0 \/ M.
1181
1182%!  monitor_mask(Name, Mask)
1183%
1184%   Mask bit for the monitor events.  Note that this must be kept
1185%   consistent with the enum broadcast_id defined in rdf_db.c
1186
1187                                        % C-defined broadcasts
1188monitor_mask(assert,       0x0001).
1189monitor_mask(assert(load), 0x0002).
1190monitor_mask(retract,      0x0004).
1191monitor_mask(update,       0x0008).
1192monitor_mask(new_literal,  0x0010).
1193monitor_mask(old_literal,  0x0020).
1194monitor_mask(transaction,  0x0040).
1195monitor_mask(load,         0x0080).
1196monitor_mask(create_graph, 0x0100).
1197monitor_mask(reset,        0x0200).
1198                                        % prolog defined broadcasts
1199monitor_mask(parse,        0x1000).
1200monitor_mask(unload,       0x1000).     % FIXME: Duplicate
1201                                        % mask for all
1202monitor_mask(all,          0xffff).
1203
1204%rdf_broadcast(Term, MaskName) :-
1205%%      monitor_mask(MaskName, Mask),
1206%%      rdf_broadcast_(Term, Mask).
1207
1208
1209                 /*******************************
1210                 *            WARM              *
1211                 *******************************/
1212
1213%!  rdf_warm_indexes
1214%
1215%   Warm all indexes.  See rdf_warm_indexes/1.
1216
1217rdf_warm_indexes :-
1218    findall(Index, rdf_index(Index), Indexes),
1219    rdf_warm_indexes(Indexes).
1220
1221rdf_index(s).
1222rdf_index(p).
1223rdf_index(o).
1224rdf_index(sp).
1225rdf_index(o).
1226rdf_index(po).
1227rdf_index(spo).
1228rdf_index(g).
1229rdf_index(sg).
1230rdf_index(pg).
1231
1232%!  rdf_warm_indexes(+Indexes) is det.
1233%
1234%   Create the named indexes.  Normally,   the  RDF database creates
1235%   indexes on lazily the first time they are needed. This predicate
1236%   serves two purposes: it provides an   explicit  way to make sure
1237%   that the required indexes  are   present  and  creating multiple
1238%   indexes at the same time is more efficient.
1239
1240
1241                 /*******************************
1242                 *          DUPLICATES          *
1243                 *******************************/
1244
1245%!  rdf_update_duplicates is det.
1246%
1247%   Update the duplicate administration of the RDF store. This marks
1248%   every triple that is potentionally  a   duplicate  of another as
1249%   duplicate. Being potentially a  duplicate   means  that subject,
1250%   predicate and object are equivalent and   the  life-times of the
1251%   two triples overlap.
1252%
1253%   The duplicates marks are used to  reduce the administrative load
1254%   of avoiding duplicate answers.  Normally,   the  duplicates  are
1255%   marked using a background thread that   is  started on the first
1256%   query that produces a substantial amount of duplicates.
1257
1258:- public
1259    rdf_update_duplicates_thread/0.
1260
1261%!  rdf_update_duplicates_thread
1262%
1263%   Start a thread to initialize the duplicate administration.
1264
1265rdf_update_duplicates_thread :-
1266    thread_create(rdf_update_duplicates, _,
1267                  [ detached(true),
1268                    alias('__rdf_duplicate_detecter')
1269                  ]).
1270
1271%!  rdf_update_duplicates is det.
1272%
1273%   Update the duplicate administration. If   this  adminstration is
1274%   up-to-date, each triples that _may_ have a duplicate is flagged.
1275%   The predicate rdf/3 uses this administration to speedup checking
1276%   for duplicate answers.
1277%
1278%   This predicate is normally  executed   from  a background thread
1279%   named =__rdf_duplicate_detecter= which is created   when a query
1280%   discovers that checking for duplicates becomes too expensive.
1281
1282
1283                 /*******************************
1284                 *    QUICK BINARY LOAD/SAVE    *
1285                 *******************************/
1286
1287%!  rdf_save_db(+File) is det.
1288%!  rdf_save_db(+File, +Graph) is det.
1289%
1290%   Save triples into File in a   quick-to-load binary format. If Graph
1291%   is supplied only triples flagged to originate from that database
1292%   are  added.  Files  created  this  way    can  be  loaded  using
1293%   rdf_load_db/1.
1294
1295:- create_prolog_flag(rdf_triple_format, 3, [type(integer)]).
1296
1297rdf_save_db(File) :-
1298    current_prolog_flag(rdf_triple_format, Version),
1299    setup_call_cleanup(
1300        open(File, write, Out, [type(binary)]),
1301        ( set_stream(Out, record_position(false)),
1302          rdf_save_db_(Out, _, Version)
1303        ),
1304        close(Out)).
1305
1306
1307rdf_save_db(File, Graph) :-
1308    current_prolog_flag(rdf_triple_format, Version),
1309    setup_call_cleanup(
1310        open(File, write, Out, [type(binary)]),
1311        ( set_stream(Out, record_position(false)),
1312          rdf_save_db_(Out, Graph, Version)
1313        ),
1314        close(Out)).
1315
1316
1317%!  rdf_load_db_no_admin(+File, +Id, -Graphs) is det.
1318%
1319%   Load triples from a  .trp  file   without  updating  the  source
1320%   administration. Id is  handled  to   monitor  action.  Graphs is
1321%   a list of graph-names encountered in File.
1322
1323rdf_load_db_no_admin(File, Id, Graphs) :-
1324    open(File, read, In, [type(binary)]),
1325    set_stream(In, record_position(false)),
1326    call_cleanup(rdf_load_db_(In, Id, Graphs), close(In)).
1327
1328
1329%!  check_loaded_cache(+Graph, +Graphs, +Modified) is det.
1330%
1331%   Verify the loaded cache file and optionally fix the modification
1332%   time (new versions save this along with the snapshot).
1333%
1334%   @tbd    What to do if there is a cache mismatch? Delete the loaded
1335%           graphs and fail?
1336
1337check_loaded_cache(DB, [DB], _Modified) :- !.
1338check_loaded_cache(DB, Graphs, _) :-
1339    print_message(warning, rdf(inconsistent_cache(DB, Graphs))).
1340
1341
1342%!  rdf_load_db(+File) is det.
1343%
1344%   Load triples from a file created using rdf_save_db/2.
1345
1346rdf_load_db(File) :-
1347    uri_file_name(URL, File),
1348    rdf_load_db_no_admin(File, URL, _Graphs).
1349
1350
1351                 /*******************************
1352                 *          LOADING RDF         *
1353                 *******************************/
1354
1355:- multifile
1356    rdf_open_hook/8,
1357    rdf_open_decode/4,              % +Encoding, +File, -Stream, -Cleanup
1358    rdf_load_stream/3,              % +Format, +Stream, +Options
1359    rdf_file_type/2,                % ?Extension, ?Format
1360    rdf_storage_encoding/2,         % ?Extension, ?Encoding
1361    url_protocol/1.                 % ?Protocol
1362
1363%!  rdf_load(+FileOrList) is det.
1364%
1365%   Same as rdf_load(FileOrList, []).  See rdf_load/2.
1366
1367%!  rdf_load(+FileOrList, :Options) is det.
1368%
1369%   Load RDF data. Options provides   additional processing options.
1370%   Defined options are:
1371%
1372%       * blank_nodes(+ShareMode)
1373%       How to handle equivalent blank nodes.  If =share= (default),
1374%       equivalent blank nodes are shared in the same resource.
1375%
1376%       * base_uri(+URI)
1377%       URI that is used for rdf:about="" and other RDF constructs
1378%       that are relative to the base uri.  Default is the source
1379%       URL.
1380%
1381%       * concurrent(+Jobs)
1382%       If FileOrList is a list of files, process the input files
1383%       using Jobs threads concurrently.  Default is the mininum
1384%       of the number of cores and the number of inputs.  Higher
1385%       values can be useful when loading inputs from (slow)
1386%       network connections.  Using 1 (one) does not use
1387%       separate worker threads.
1388%
1389%       * format(+Format)
1390%       Specify the source format explicitly. Normally this is
1391%       deduced from the filename extension or the mime-type. The
1392%       core library understands the formats xml (RDF/XML) and
1393%       triples (internal quick load and cache format).  Plugins,
1394%       such as library(semweb/turtle) extend the set of recognised
1395%       extensions.
1396%
1397%       * graph(?Graph)
1398%       Named graph in which to load the data.  It is *not* allowed
1399%       to load two sources into the same named graph.  If Graph is
1400%       unbound, it is unified to the graph into which the data is
1401%       loaded.  The default graph is a =|file://|= URL when loading
1402%       a file or, if the specification is a URL, its normalized
1403%       version without the optional _|#fragment|_.
1404%
1405%       * if(Condition)
1406%       When to load the file. One of =true=, =changed= (default) or
1407%       =not_loaded=.
1408%
1409%       * modified(-Modified)
1410%       Unify Modified with one of =not_modified=, cached(File),
1411%       last_modified(Stamp) or =unknown=.
1412%
1413%       * cache(Bool)
1414%       If =false=, do not use or create a cache file.
1415%
1416%       * register_namespaces(Bool)
1417%       If =true= (default =false=), register =xmlns= namespace
1418%       declarations or Turtle =|@prefix|= prefixes using
1419%       rdf_register_prefix/3 if there is no conflict.
1420%
1421%       * silent(+Bool)
1422%       If =true=, the message reporting completion is printed using
1423%       level =silent=. Otherwise the level is =informational=. See
1424%       also print_message/2.
1425%
1426%       * prefixes(-Prefixes)
1427%       Returns the prefixes defined in the source   data file as a list
1428%       of pairs.
1429%
1430%       * multifile(+Boolean)
1431%       Indicate that the addressed graph may be populated with
1432%       triples from multiple sources. This disables caching and
1433%       avoids that an rdf_load/2 call affecting the specified
1434%       graph cleans the graph.
1435%
1436%   Other  options  are  forwarded  to  process_rdf/3.  By  default,
1437%   rdf_load/2 only loads RDF/XML from files.  It can be extended to
1438%   load data from other formats and   locations  using plugins. The
1439%   full set of plugins relevant to   support  different formats and
1440%   locations is below:
1441%
1442%     ==
1443%     :- use_module(library(semweb/turtle)).        % Turtle and TriG
1444%     :- use_module(library(semweb/rdf_ntriples)).
1445%     :- use_module(library(semweb/rdf_zlib_plugin)).
1446%     :- use_module(library(semweb/rdf_http_plugin)).
1447%     :- use_module(library(http/http_ssl_plugin)).
1448%     ==
1449%
1450%   @see    rdf_db:rdf_open_hook/3, library(semweb/rdf_persistency) and
1451%           library(semweb/rdf_cache)
1452
1453:- dynamic
1454    rdf_loading/3.                          % Graph, Queue, Thread
1455
1456rdf_load(Spec) :-
1457    rdf_load(Spec, []).
1458
1459:- if(\+current_predicate(concurrent/3)).
1460concurrent(_, Goals, _) :-
1461    forall(member(G, Goals), call(G)).
1462:- endif.
1463
1464% Note that we kill atom garbage collection.  This improves performance
1465% with about 15% loading the LUBM Univ_50 benchmark.
1466
1467rdf_load(Spec, M:Options) :-
1468    must_be(list, Options),
1469    current_prolog_flag(agc_margin, Old),
1470    setup_call_cleanup(
1471        set_prolog_flag(agc_margin, 0),
1472        rdf_load_noagc(Spec, M, Options),
1473        set_prolog_flag(agc_margin, Old)).
1474
1475rdf_load_noagc(List, M, Options) :-
1476    is_list(List),
1477    !,
1478    flatten(List, Inputs),          % Compatibility: allow nested lists
1479    maplist(must_be(ground), Inputs),
1480    length(Inputs, Count),
1481    load_jobs(Count, Jobs, Options),
1482    (   Jobs =:= 1
1483    ->  forall(member(Spec, Inputs),
1484               rdf_load_one(Spec, M, Options))
1485    ;   maplist(load_goal(Options, M), Inputs, Goals),
1486        concurrent(Jobs, Goals, [])
1487    ).
1488rdf_load_noagc(One, M, Options) :-
1489    must_be(ground, One),
1490    rdf_load_one(One, M, Options).
1491
1492load_goal(Options, M, Spec, rdf_load_one(Spec, M, Options)).
1493
1494load_jobs(_, Jobs, Options) :-
1495    option(concurrent(Jobs), Options),
1496    !,
1497    must_be(positive_integer, Jobs).
1498load_jobs(Count, Jobs, _) :-
1499    current_prolog_flag(cpu_count, CPUs),
1500    CPUs > 0,
1501    !,
1502    Jobs is max(1, min(CPUs, Count)).
1503load_jobs(_, 1, _).
1504
1505
1506rdf_load_one(Spec, M, Options) :-
1507    source_url(Spec, Protocol, SourceURL),
1508    load_graph(SourceURL, Graph, Options),
1509    setup_call_cleanup(
1510        with_mutex(rdf_load_file,
1511                   rdf_start_load(SourceURL, Loading)),
1512        rdf_load_file(Loading, Spec, SourceURL, Protocol,
1513                      Graph, M, Options),
1514        rdf_end_load(Loading)).
1515
1516%!  rdf_start_load(+SourceURL, -WhatToDo) is det.
1517%!  rdf_end_load(+WhatToDo) is det.
1518%!  rdf_load_file(+WhatToDo, +Spec, +SourceURL, +Protocol, +Graph,
1519%!                +Module, +Options) is det.
1520%
1521%   Of these three predicates, rdf_load_file/7   does the real work.
1522%   The others deal with the  possibility   that  the graph is being
1523%   loaded by another thread. In that case,   we  wait for the other
1524%   thread to complete the work.
1525%
1526%   @tbd    What if both threads disagree on what is loaded into the
1527%           graph?
1528%   @see    Code is modelled closely after how concurrent loading
1529%           is handled in SWI-Prolog's boot/init.pl
1530
1531rdf_start_load(SourceURL, queue(Queue)) :-
1532    rdf_loading(SourceURL, Queue, LoadThread),
1533    \+ thread_self(LoadThread),
1534    !,
1535    debug(rdf(load), '~p is being loaded by thread ~w; waiting ...',
1536          [ SourceURL, LoadThread]).
1537rdf_start_load(SourceURL, Ref) :-
1538    thread_self(Me),
1539    message_queue_create(Queue),
1540    assertz(rdf_loading(SourceURL, Queue, Me), Ref).
1541
1542rdf_end_load(queue(_)) :- !.
1543rdf_end_load(Ref) :-
1544    clause(rdf_loading(_, Queue, _), _, Ref),
1545    erase(Ref),
1546    thread_send_message(Queue, done),
1547    message_queue_destroy(Queue).
1548
1549rdf_load_file(queue(Queue), _Spec, _SourceURL, _Protocol, _Graph, _M, _Options) :-
1550    !,
1551    catch(thread_get_message(Queue, _), _, true).
1552rdf_load_file(_Ref, _Spec, SourceURL, Protocol, Graph, M, Options) :-
1553    debug(rdf(load), 'RDF: Loading ~q into ~q', [SourceURL, Graph]),
1554    statistics(cputime, T0),
1555    rdf_open_input(SourceURL, Protocol, Graph,
1556                   In, Cleanup, Modified, Format, Options),
1557    supported_format(Format, Cleanup),
1558    return_modified(Modified, Options),
1559    (   Modified == not_modified
1560    ->  Action = none
1561    ;   Modified = cached(CacheFile)
1562    ->  do_unload(Graph),
1563        catch(rdf_load_db_no_admin(CacheFile, cache(Graph), Graphs), _, fail),
1564        check_loaded_cache(Graph, Graphs, Modified),
1565        Action = load
1566    ;   option(base_uri(BaseURI), Options, Graph),
1567        (   var(BaseURI)
1568        ->  BaseURI = SourceURL
1569        ;   true
1570        ),
1571        once(phrase(derived_options(Options, NSList), Extra)),
1572        merge_options([ base_uri(BaseURI),
1573                        graph(Graph),
1574                        format(Format)
1575                      | Extra
1576                      ], Options, RDFOptions),
1577        (   option(multifile(true), Options)
1578        ->  true
1579        ;   do_unload(Graph)
1580        ),
1581        graph_modified(Modified, ModifiedStamp),
1582        rdf_set_graph_source(Graph, SourceURL, ModifiedStamp),
1583        call_cleanup(rdf_load_stream(Format, In, M:RDFOptions),
1584                     Cleanup),
1585        save_cache(Graph, SourceURL, Options),
1586        register_file_prefixes(NSList),
1587        format_action(Format, Action)
1588    ),
1589    rdf_statistics_(triples(Graph, Triples)),
1590    report_loaded(Action, SourceURL, Graph, Triples, T0, Options).
1591
1592supported_format(Format, _Cleanup) :-
1593    rdf_file_type(_, Format),
1594    !.
1595supported_format(Format, Cleanup) :-
1596    call(Cleanup),
1597    existence_error(rdf_format_plugin, Format).
1598
1599format_action(triples, load) :- !.
1600format_action(_, parsed).
1601
1602save_cache(Graph, SourceURL, Options) :-
1603    option(cache(true), Options, true),
1604    rdf_cache_file(SourceURL, write, CacheFile),
1605    !,
1606    catch(save_cache(Graph, CacheFile), E,
1607          print_message(warning, E)).
1608save_cache(_, _, _).
1609
1610derived_options([], _) -->
1611    [].
1612derived_options([H|T], NSList) -->
1613    (   {   H == register_namespaces(true)
1614        ;   H == (register_namespaces = true)
1615        }
1616    ->  [ namespaces(NSList) ]
1617    ;   []
1618    ),
1619    derived_options(T, NSList).
1620
1621graph_modified(last_modified(Stamp), Stamp).
1622graph_modified(unknown, Stamp) :-
1623    get_time(Stamp).
1624
1625return_modified(Modified, Options) :-
1626    option(modified(M0), Options),
1627    !,
1628    M0 = Modified.
1629return_modified(_, _).
1630
1631
1632                 /*******************************
1633                 *        INPUT HANDLING        *
1634                 *******************************/
1635
1636/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1637This section deals with pluggable input sources.  The task of the input
1638layer is
1639
1640    * Decide on the graph-name
1641    * Decide on the source-location
1642    * Decide whether loading is needed (if-modified)
1643    * Decide on the serialization in the input
1644
1645The protocol must ensure minimal  overhead,   in  particular for network
1646protocols. E.g. for HTTP we want to make a single call on the server and
1647use If-modified-since to verify that we need not reloading this file.
1648- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1649
1650%!  rdf_open_input(+SourceURL, +Protocol, +Graph,
1651%!                 -Stream, -Cleanup, -Modified, -Format, +Options)
1652%
1653%   Open an input source.
1654%
1655%   Options processed:
1656%
1657%       * graph(Graph)
1658%       * db(Graph)
1659%       * if(Condition)
1660%       * cache(Cache)
1661%       * format(Format)
1662%
1663%   @param  Modified is one of =not_modified=, last_modified(Time),
1664%           cached(CacheFile) or =unknown=
1665
1666rdf_open_input(SourceURL, Protocol, Graph,
1667               Stream, Cleanup, Modified, Format, Options) :-
1668    (   option(multifile(true), Options)
1669    ->  true
1670    ;   option(if(If), Options, changed),
1671        (   If == true
1672        ->  true
1673        ;   rdf_graph_source_(Graph, SourceURL, HaveModified)
1674        ->  true
1675        ;   option(cache(true), Options, true),
1676            rdf_cache_file(SourceURL, read, CacheFile)
1677        ->  time_file(CacheFile, HaveModified)
1678        ;   true
1679        )
1680    ),
1681    option(format(Format), Options, _),
1682    open_input_if_modified(Protocol, SourceURL, HaveModified,
1683                           Stream, Cleanup, Modified0, Format, Options),
1684    (   Modified0 == not_modified
1685    ->  (   nonvar(CacheFile)
1686        ->  Modified = cached(CacheFile)
1687        ;   Modified = not_modified
1688        )
1689    ;   Modified = Modified0
1690    ).
1691
1692
1693%!  source_url(+Spec, -Class, -SourceURL) is det.
1694%
1695%   Determine class and url of the source.  Class is one of
1696%
1697%       * stream(Stream)
1698%       * file
1699%       * a url-protocol (e.g., =http=)
1700
1701source_url(stream(In), stream(In), SourceURL) :-
1702    !,
1703    (   stream_property(In, file_name(File))
1704    ->  to_url(File, SourceURL)
1705    ;   gensym('stream://', SourceURL)
1706    ).
1707source_url(Stream, Class, SourceURL) :-
1708    is_stream(Stream),
1709    !,
1710    source_url(stream(Stream), Class, SourceURL).
1711source_url(Spec, Protocol, SourceURL) :-
1712    compound(Spec),
1713    !,
1714    source_file(Spec, Protocol, SourceURL).
1715source_url(FileURL, Protocol, SourceURL) :-             % or return FileURL?
1716    uri_file_name(FileURL, File),
1717    !,
1718    source_file(File, Protocol, SourceURL).
1719source_url(SourceURL0, Protocol, SourceURL) :-
1720    is_url(SourceURL0, Protocol, SourceURL),
1721    !.
1722source_url(File, Protocol, SourceURL) :-
1723    source_file(File, Protocol, SourceURL).
1724
1725source_file(Spec, file(SExt), SourceURL) :-
1726    findall(Ext, valid_extension(Ext), Exts),
1727    absolute_file_name(Spec, File, [access(read), extensions([''|Exts])]),
1728    storage_extension(_Plain, SExt, File),
1729    uri_file_name(SourceURL, File).
1730
1731to_url(URL, URL) :-
1732    uri_is_global(URL),
1733    !.
1734to_url(File, URL) :-
1735    absolute_file_name(File, Path),
1736    uri_file_name(URL, Path).
1737
1738storage_extension(Plain, SExt, File) :-
1739    file_name_extension(Plain, SExt, File),
1740    SExt \== '',
1741    rdf_storage_encoding(SExt, _),
1742    !.
1743storage_extension(File, '', File).
1744
1745%!  load_graph(+SourceURL, -Graph, +Options) is det.
1746%
1747%   Graph is the graph into which  we   load  the  data. Tries these
1748%   options:
1749%
1750%     1. The graph(Graph) option
1751%     2. The db(Graph) option (backward compatibility)
1752%     3. The base_uri(BaseURI) option
1753%     4. The source URL
1754
1755load_graph(_Source, Graph, Options) :-
1756    option(multifile(true), Options),
1757    !,
1758    (   (   option(graph(Graph), Options)
1759        ->  true
1760        ;   option(db(Graph), Options)
1761        ),
1762        ground(Graph)
1763    ->  true
1764    ;   throw(error(existence_error(option, graph),
1765                    context(_, "rdf_load/2: using multifile requires graph")))
1766    ).
1767load_graph(Source, Graph, Options) :-
1768    (   option(graph(Graph), Options)
1769    ;   option(db(Graph), Options)
1770    ),
1771    !,
1772    load_graph2(Source, Graph, Options).
1773load_graph(Source, Graph, Options) :-
1774    load_graph2(Source, Graph, Options).
1775
1776load_graph2(_, Graph, _) :-
1777    ground(Graph),
1778    !.
1779load_graph2(_Source, Graph, Options) :-
1780    option(base_uri(Graph), Options),
1781    Graph \== [],
1782    ground(Graph),
1783    !.
1784load_graph2(Source, Graph, _) :-
1785    load_graph(Source, Graph).
1786
1787load_graph(SourceURL, BaseURI) :-
1788    file_name_extension(BaseURI, Ext, SourceURL),
1789    rdf_storage_encoding(Ext, _),
1790    !.
1791load_graph(SourceURL, SourceURL).
1792
1793
1794open_input_if_modified(stream(In), SourceURL, _, In, true,
1795                       unknown, Format, _) :-
1796    !,
1797    (   var(Format)
1798    ->  guess_format(SourceURL, Format)
1799    ;   true
1800    ).
1801open_input_if_modified(file(SExt), SourceURL, HaveModified, Stream, Cleanup,
1802                       Modified, Format, _) :-
1803    !,
1804    uri_file_name(SourceURL, File),
1805    (   SExt == '' -> Plain = File; file_name_extension(Plain, SExt, File)),
1806    time_file(File, LastModified),
1807    (   nonvar(HaveModified),
1808        HaveModified >= LastModified
1809    ->  Modified = not_modified,
1810        Cleanup = true
1811    ;   storage_open(SExt, File, Stream, Cleanup),
1812        Modified = last_modified(LastModified),
1813        (   var(Format)
1814        ->  guess_format(Plain, Format)
1815        ;   true
1816        )
1817    ).
1818open_input_if_modified(file, SourceURL, HaveModified, Stream, Cleanup,
1819                       Modified, Format, Options) :-
1820    !,
1821    open_input_if_modified(file(''), SourceURL, HaveModified,
1822                           Stream, Cleanup,
1823                           Modified, Format, Options).
1824open_input_if_modified(Protocol, SourceURL, HaveModified, Stream, Cleanup,
1825                       Modified, Format, Options) :-
1826    rdf_open_hook(Protocol, SourceURL, HaveModified, Stream, Cleanup,
1827                  Modified, Format, Options).
1828
1829guess_format(File, Format) :-
1830    file_name_extension(_, Ext, File),
1831    (   rdf_file_type(Ext, Format)
1832    ->  true
1833    ;   Format = xml,
1834        print_message(warning, rdf(guess_format(Ext)))
1835    ).
1836
1837%!  storage_open(+Extension, +File, -Stream, -Cleanup)
1838%
1839%   Open the low-level storage. Note  that   the  file  is opened as
1840%   binary. This is the same  as   for  HTTP  resources. The correct
1841%   encoding will be set by the XML parser or the Turtle parser.
1842
1843storage_open('', File, Stream, close(Stream)) :-
1844    !,
1845    open(File, read, Stream, [type(binary)]).
1846storage_open(Ext, File, Stream, Cleanup) :-
1847    rdf_storage_encoding(Ext, Encoding),
1848    rdf_open_decode(Encoding, File, Stream, Cleanup).
1849
1850valid_extension(Ext) :-
1851    rdf_file_type(Ext, _).
1852valid_extension(Ext) :-
1853    rdf_storage_encoding(Ext, _).
1854
1855%!  is_url(@Term, -Scheme, -URL) is semidet.
1856%
1857%   True if Term is an atom denoting URL of the given Scheme. URL is
1858%   normalized  (see  uri_normalized/2)  and   a  possible  fragment
1859%   identifier (#fragment) is removed. This  predicate only succeeds
1860%   if  the  scheme  is   registered    using   the  multifile  hook
1861%   url_protocol/1.
1862
1863is_url(URL, Scheme, FetchURL) :-
1864    atom(URL),
1865    uri_is_global(URL),
1866    uri_normalized(URL, URL1),              % case normalization
1867    uri_components(URL1, Components),
1868    uri_data(scheme, Components, Scheme0),
1869    url_protocol(Scheme0),
1870    !,
1871    Scheme = Scheme0,
1872    uri_data(fragment, Components, _, Components1),
1873    uri_components(FetchURL, Components1).
1874
1875url_protocol(file).                     % built-in
1876
1877%!  rdf_file_type(+Extension, -Format) is semidet.
1878%
1879%   True if Format  is  the  format   belonging  to  the  given file
1880%   extension.  This predicate is multifile and can thus be extended
1881%   by plugins.
1882
1883rdf_file_type(xml,   xml).
1884rdf_file_type(rdf,   xml).
1885rdf_file_type(rdfs,  xml).
1886rdf_file_type(owl,   xml).
1887rdf_file_type(htm,   xhtml).
1888rdf_file_type(html,  xhtml).
1889rdf_file_type(xhtml, xhtml).
1890rdf_file_type(trp,   triples).
1891
1892
1893%!  rdf_file_encoding(+Extension, -Format) is semidet.
1894%
1895%   True if Format describes the storage encoding of file.
1896
1897rdf_storage_encoding('', plain).
1898
1899
1900%!  rdf_load_stream(+Format, +Stream, :Options)
1901%
1902%   Load RDF data from Stream.
1903%
1904%   @tbd    Handle mime-types?
1905
1906rdf_load_stream(xml, Stream, Options) :-
1907    !,
1908    graph(Options, Graph),
1909    rdf_transaction(load_stream(Stream, Options),
1910                    parse(Graph)).
1911rdf_load_stream(xhtml, Stream, M:Options) :-
1912    !,
1913    graph(Options, Graph),
1914    rdf_transaction(load_stream(Stream, M:[embedded(true)|Options]),
1915                    parse(Graph)).
1916rdf_load_stream(triples, Stream, Options) :-
1917    !,
1918    graph(Options, Graph),
1919    rdf_load_db_(Stream, Graph, _Graphs).
1920
1921load_stream(Stream, M:Options) :-
1922    process_rdf(Stream, assert_triples, M:Options),
1923    option(graph(Graph), Options),
1924    rdf_graph_clear_modified_(Graph).
1925
1926
1927%!  report_loaded(+Action, +Source, +DB, +Triples, +StartCPU, +Options)
1928
1929report_loaded(none, _, _, _, _, _) :- !.
1930report_loaded(Action, Source, DB, Triples, T0, Options) :-
1931    statistics(cputime, T1),
1932    Time is T1 - T0,
1933    (   option(silent(true), Options)
1934    ->  Level = silent
1935    ;   Level = informational
1936    ),
1937    print_message(Level,
1938                  rdf(loaded(Action, Source, DB, Triples, Time))).
1939
1940
1941%!  rdf_unload(+Source) is det.
1942%
1943%   Identify the graph loaded from Source and use rdf_unload_graph/1
1944%   to erase this graph.
1945%
1946%   @deprecated     For compatibility, this predicate also accepts a
1947%                   graph name instead of a source specification.
1948%                   Please update your code to use
1949%                   rdf_unload_graph/1.
1950
1951rdf_unload(Spec) :-
1952    source_url(Spec, _Protocol, SourceURL),
1953    rdf_graph_source_(Graph, SourceURL, _),
1954    !,
1955    rdf_unload_graph(Graph).
1956rdf_unload(Graph) :-
1957    atom(Graph),
1958    rdf_graph(Graph),
1959    !,
1960    warn_deprecated_unload(Graph),
1961    rdf_unload_graph(Graph).
1962rdf_unload(_).
1963
1964:- dynamic
1965    warned/0.
1966
1967warn_deprecated_unload(_) :-
1968    warned,
1969    !.
1970warn_deprecated_unload(Graph) :-
1971    assertz(warned),
1972    print_message(warning, rdf(deprecated(rdf_unload(Graph)))).
1973
1974
1975%!  rdf_unload_graph(+Graph) is det.
1976%
1977%   Remove Graph from the RDF store.  Succeeds silently if the named
1978%   graph does not exist.
1979
1980rdf_unload_graph(Graph) :-
1981    must_be(atom, Graph),
1982    (   rdf_graph(Graph)
1983    ->  rdf_transaction(do_unload(Graph), unload(Graph))
1984    ;   true
1985    ).
1986
1987do_unload(Graph) :-
1988    (   rdf_graph_(Graph, Triples),
1989        Triples > 0
1990    ->  rdf_retractall(_,_,_,Graph)
1991    ;   true
1992    ),
1993    rdf_destroy_graph(Graph).
1994
1995                 /*******************************
1996                 *         GRAPH QUERIES        *
1997                 *******************************/
1998
1999%!  rdf_create_graph(+Graph) is det.
2000%
2001%   Create an RDF graph without triples.   Succeeds  silently if the
2002%   graph already exists.
2003
2004
2005%!  rdf_graph(?Graph) is nondet.
2006%
2007%   True when Graph is an existing graph.
2008
2009rdf_graph(Graph) :-
2010    rdf_graph_(Graph, _Triples).
2011
2012%!  rdf_source(?Graph, ?SourceURL) is nondet.
2013%
2014%   True if named Graph is loaded from SourceURL.
2015%
2016%   @deprecated Use rdf_graph_property(Graph, source(SourceURL)).
2017
2018rdf_source(Graph, SourceURL) :-
2019    rdf_graph(Graph),
2020    rdf_graph_source_(Graph, SourceURL, _Modified).
2021
2022%!  rdf_source(?Source)
2023%
2024%   True if Source is a loaded source.
2025%
2026%   @deprecated     Use rdf_graph/1 or rdf_source/2.
2027
2028rdf_source(SourceURL) :-
2029    rdf_source(_Graph, SourceURL).
2030
2031%!  rdf_make
2032%
2033%   Reload all loaded files that have been modified since the last
2034%   time they were loaded.
2035
2036rdf_make :-
2037    findall(Source-Graph, modified_graph(Source, Graph), Modified),
2038    forall(member(Source-Graph, Modified),
2039           catch(rdf_load(Source, [graph(Graph), if(changed)]), E,
2040                 print_message(error, E))).
2041
2042modified_graph(SourceURL, Graph) :-
2043    rdf_graph(Graph),
2044    rdf_graph_source_(Graph, SourceURL, Modified),
2045    \+ sub_atom(SourceURL, 0, _, _, 'stream://'),
2046    Modified > 0.
2047
2048%!  rdf_graph_property(?Graph, ?Property) is nondet.
2049%
2050%   True when Property is a property of Graph.  Defined properties
2051%   are:
2052%
2053%       * hash(Hash)
2054%       Hash is the (MD5-)hash for the content of Graph.
2055%       * modified(Boolean)
2056%       True if the graph is modified since it was loaded or
2057%       rdf_set_graph/2 was called with modified(false).
2058%       * source(Source)
2059%       The graph is loaded from the Source (a URL)
2060%       * source_last_modified(?Time)
2061%       Time is the last-modified timestamp of Source at the moment
2062%       the graph was loaded from Source.
2063%       * triples(Count)
2064%       True when Count is the number of triples in Graph.
2065%
2066%    Additional graph properties can be added  by defining rules for
2067%    the multifile predicate  property_of_graph/2.   Currently,  the
2068%    following extensions are defined:
2069%
2070%       - library(semweb/rdf_persistency)
2071%         - persistent(Boolean)
2072%           Boolean is =true= if the graph is persistent.
2073
2074rdf_graph_property(Graph, Property) :-
2075    rdf_graph(Graph),
2076    property_of_graph(Property, Graph).
2077
2078:- multifile
2079    property_of_graph/2.
2080
2081property_of_graph(hash(Hash), Graph) :-
2082    rdf_md5(Graph, Hash).
2083property_of_graph(modified(Boolean), Graph) :-
2084    rdf_graph_modified_(Graph, Boolean, _).
2085property_of_graph(source(URL), Graph) :-
2086    rdf_graph_source_(Graph, URL, _).
2087property_of_graph(source_last_modified(Time), Graph) :-
2088    rdf_graph_source_(Graph, _, Time),
2089    Time > 0.0.
2090property_of_graph(triples(Count), Graph) :-
2091    rdf_graph_(Graph, Count).
2092
2093%!  rdf_set_graph(+Graph, +Property) is det.
2094%
2095%   Set properties of Graph.  Defined properties are:
2096%
2097%       * modified(false)
2098%       Set the modified state of Graph to false.
2099
2100rdf_set_graph(Graph, modified(Modified)) :-
2101    must_be(oneof([false]), Modified),
2102    rdf_graph_clear_modified_(Graph).
2103
2104
2105%!  save_cache(+DB, +Cache) is det.
2106%
2107%   Save triples belonging to DB in the file Cache.
2108
2109save_cache(DB, Cache) :-
2110    current_prolog_flag(rdf_triple_format, Version),
2111    setup_call_cleanup(
2112        catch(open(Cache, write, CacheStream, [type(binary)]), _, fail),
2113        rdf_save_db_(CacheStream, DB, Version),
2114        close(CacheStream)).
2115
2116%!  assert_triples(+Triples, +Source)
2117%
2118%   Assert a list of triples into the database. Foir security
2119%   reasons we check we aren't inserting anything but nice RDF
2120%   triples.
2121
2122assert_triples([], _).
2123assert_triples([rdf(S,P,O)|T], DB) :-
2124    !,
2125    rdf_assert(S, P, O, DB),
2126    assert_triples(T, DB).
2127assert_triples([H|_], _) :-
2128    throw(error(type_error(rdf_triple, H), _)).
2129
2130
2131                 /*******************************
2132                 *             RESET            *
2133                 *******************************/
2134
2135%!  rdf_reset_db
2136%
2137%   Remove all triples from the RDF database and reset all its
2138%   statistics.
2139%
2140%   @bug    This predicate checks for active queries, but this check is
2141%           not properly synchronized and therefore the use of this
2142%           predicate is unsafe in multi-threaded contexts. It is
2143%           mainly used to run functionality tests that need to
2144%           start with an empty database.
2145
2146rdf_reset_db :-
2147    reset_gensym('_:genid'),
2148    rdf_reset_db_.
2149
2150
2151                 /*******************************
2152                 *           SAVE RDF           *
2153                 *******************************/
2154
2155%!  rdf_save(+Out) is det.
2156%
2157%   Same as rdf_save(Out, []).  See rdf_save/2 for details.
2158
2159%!  rdf_save(+Out, :Options) is det.
2160%
2161%   Write RDF data as RDF/XML. Options is a list of one or more of
2162%   the following options:
2163%
2164%           * graph(+Graph)
2165%           Save only triples associated to the given named Graph.
2166%
2167%           * anon(Bool)
2168%           If =false= (default =true=) do not save blank nodes that do
2169%           not appear (indirectly) as object of a named resource.
2170%
2171%           * base_uri(URI)
2172%           BaseURI used. If present, all URIs that can be
2173%           represented relative to this base are written using
2174%           their shorthand.  See also =write_xml_base= option.
2175%
2176%           * convert_typed_literal(:Convertor)
2177%           Call Convertor(-Type, -Content, +RDFObject), providing
2178%           the opposite for the convert_typed_literal option of
2179%           the RDF parser.
2180%
2181%           * document_language(+Lang)
2182%           Initial =|xml:lang|= saved with rdf:RDF element.
2183%
2184%           * encoding(Encoding)
2185%           Encoding for the output.  Either utf8 or iso_latin_1.
2186%
2187%           * inline(+Bool)
2188%           If =true= (default =false=), inline resources when
2189%           encountered for the first time. Normally, only bnodes
2190%           are handled this way.
2191%
2192%           * namespaces(+List)
2193%           Explicitly specify saved namespace declarations. See
2194%           rdf_save_header/2 option namespaces for details.
2195%
2196%           * sorted(+Boolean)
2197%           If =true= (default =false=), emit subjects sorted on
2198%           the full URI.  Useful to make file comparison easier.
2199%
2200%           * write_xml_base(Bool)
2201%           If =false=, do _not_ include the =|xml:base|=
2202%           declaration that is written normally when using the
2203%           =base_uri= option.
2204%
2205%           * xml_attributes(+Bool)
2206%           If =false= (default =true=), never use xml attributes to
2207%           save plain literal attributes, i.e., always used an XML
2208%           element as in =|<name>Joe</name>|=.
2209%
2210%   @param Out      Location to save the data.  This can also be a
2211%                   file-url (=|file://path|=) or a stream wrapped
2212%                   in a term stream(Out).
2213%   @see rdf_save_db/1
2214
2215:- thread_local
2216    named_anon/2,                   % +Resource, -Id
2217    inlined/1.                      % +Resource
2218
2219rdf_save(File) :-
2220    rdf_save2(File, []).
2221
2222rdf_save(Spec, M:Options0) :-
2223    is_list(Options0),
2224    !,
2225    meta_options(save_meta_option, M:Options0, Options),
2226    to_file(Spec, File),
2227    rdf_save2(File, Options).
2228rdf_save(Spec, _:DB) :-
2229    atom(DB),                      % backward compatibility
2230    !,
2231    to_file(Spec, File),
2232    rdf_save2(File, [graph(DB)]).
2233
2234save_meta_option(convert_typed_literal).
2235
2236to_file(URL, File) :-
2237    atom(URL),
2238    uri_file_name(URL, File),
2239    !.
2240to_file(File, File).
2241
2242rdf_save2(File, Options) :-
2243    option(encoding(Encoding), Options, utf8),
2244    valid_encoding(Encoding),
2245    open_output(File, Encoding, Out, Close),
2246    flag(rdf_db_saved_subjects, OSavedSubjects, 0),
2247    flag(rdf_db_saved_triples, OSavedTriples, 0),
2248    call_cleanup(rdf_do_save(Out, Options),
2249                 Reason,
2250                 cleanup_save(Reason,
2251                              File,
2252                              OSavedSubjects,
2253                              OSavedTriples,
2254                              Close)).
2255
2256open_output(stream(Out), Encoding, Out, Cleanup) :-
2257    !,
2258    stream_property(Out, encoding(Old)),
2259    (   (   Old == Encoding
2260        ;   Old == wchar_t          % Internal encoding
2261        )
2262    ->  Cleanup = true
2263    ;   set_stream(Out, encoding(Encoding)),
2264        Cleanup = set_stream(Out, encoding(Old))
2265    ).
2266open_output(File, Encoding, Out,
2267            close(Out)) :-
2268    open(File, write, Out, [encoding(Encoding)]).
2269
2270valid_encoding(Enc) :-
2271    (   xml_encoding_name(Enc, _)
2272    ->  true
2273    ;   throw(error(domain_error(encoding, Enc), _))
2274    ).
2275
2276
2277cleanup_save(Reason,
2278             File,
2279             OSavedSubjects,
2280             OSavedTriples,
2281             Close) :-
2282    call(Close),
2283    flag(rdf_db_saved_subjects, SavedSubjects, OSavedSubjects),
2284    flag(rdf_db_saved_triples, SavedTriples, OSavedTriples),
2285    retractall(named_anon(_, _)),
2286    retractall(inlined(_)),
2287    (   Reason == exit
2288    ->  print_message(informational,
2289                      rdf(saved(File, SavedSubjects, SavedTriples)))
2290    ;   format(user_error, 'Reason = ~w~n', [Reason])
2291    ).
2292
2293rdf_do_save(Out, Options0) :-
2294    rdf_save_header(Out, Options0, Options),
2295    graph(Options, DB),
2296    (   option(sorted(true), Options, false)
2297    ->  (   var(DB)
2298        ->  setof(Subject, rdf_subject(Subject), Subjects)
2299        ;   findall(Subject, rdf(Subject, _, _, DB:_), SubjectList),
2300            sort(SubjectList, Subjects)
2301        ),
2302        forall(member(Subject, Subjects),
2303               rdf_save_non_anon_subject(Out, Subject, Options))
2304    ;   forall(rdf_subject_in_graph(Subject, DB),
2305               rdf_save_non_anon_subject(Out, Subject, Options))
2306    ),
2307    rdf_save_footer(Out),
2308    !.                                  % dubious cut; without the
2309                                        % cleanup handlers isn't called!?
2310
2311%!  rdf_subject_in_graph(-Subject, ?DB) is nondet.
2312%
2313%   True when Subject is a subject in the   graph  DB. If DB is unbound,
2314%   all  subjects  are  enumerated.  Otherwise   we  have  two  options:
2315%   enumerate all subjects and filter by graph or collect all triples of
2316%   the graph and get the unique subjects.   The  first is attractive if
2317%   the graph is big compared  to  the   DB,  also  because  it does not
2318%   require memory, the second if the graph is small compared to the DB.
2319
2320rdf_subject_in_graph(Subject, DB) :-
2321    var(DB),
2322    !,
2323    rdf_subject(Subject).
2324rdf_subject_in_graph(Subject, DB) :-
2325    rdf_statistics(triples(AllTriples)),
2326    rdf_graph_property(DB, triples(DBTriples)),
2327    DBTriples > AllTriples // 10,
2328    !,
2329    rdf_resource(Subject),
2330    (   rdf(Subject, _, _, DB:_)
2331    ->  true
2332    ).
2333rdf_subject_in_graph(Subject, DB) :-
2334    findall(Subject, rdf(Subject, _, _, DB:_), SubjectList),
2335    list_to_set(SubjectList, Subjects),
2336    member(Subject, Subjects).
2337
2338
2339graph(Options0, DB) :-
2340    strip_module(Options0, _, Options),
2341    (   memberchk(graph(DB0), Options)
2342    ->  DB = DB0
2343    ;   memberchk(db(DB0), Options)
2344    ->  DB = DB0
2345    ;   true                            % leave unbound
2346    ).
2347
2348
2349%!  rdf_save_header(+Fd, +Options)
2350%
2351%   Save XML document header, doctype and open the RDF environment.
2352%   This predicate also sets up the namespace notation.
2353%
2354%   Save an RDF header, with the XML header, DOCTYPE, ENTITY and
2355%   opening the rdf:RDF element with appropriate namespace
2356%   declarations. It uses the primitives from section 3.5 to
2357%   generate the required namespaces and desired short-name. Options
2358%   is one of:
2359%
2360%     * graph(+URI)
2361%     Only search for namespaces used in triples that belong to the
2362%     given named graph.
2363%
2364%     * namespaces(+List)
2365%     Where List is a list of namespace abbreviations. With this
2366%     option, the expensive search for all namespaces that may be
2367%     used by your data is omitted. The namespaces =rdf= and =rdfs=
2368%     are added to the provided List. If a namespace is not
2369%     declared, the resource is emitted in non-abreviated form.
2370
2371rdf_save_header(Out, Options) :-
2372    rdf_save_header(Out, Options, _).
2373
2374rdf_save_header(Out, Options, OptionsOut) :-
2375    is_list(Options),
2376    !,
2377    option(encoding(Enc), Options, utf8),
2378    xml_encoding(Enc, Encoding),
2379    format(Out, '<?xml version=\'1.0\' encoding=\'~w\'?>~n', [Encoding]),
2380    format(Out, '<!DOCTYPE rdf:RDF [', []),
2381    header_namespaces(Options, NSIdList),
2382    nsmap(NSIdList, NsMap),
2383    append(Options, [nsmap(NsMap)], OptionsOut),
2384    forall(member(Id=URI, NsMap),
2385           (   xml_quote_attribute(URI, NSText0, Enc),
2386               xml_escape_parameter_entity(NSText0, NSText),
2387               format(Out, '~N    <!ENTITY ~w \'~w\'>', [Id, NSText])
2388           )),
2389    format(Out, '~N]>~n~n', []),
2390    format(Out, '<rdf:RDF', []),
2391    (   member(Id, NSIdList),
2392        format(Out, '~N    xmlns:~w="&~w;"~n', [Id, Id]),
2393        fail
2394    ;   true
2395    ),
2396    (   option(base_uri(Base), Options),
2397        option(write_xml_base(true), Options, true)
2398    ->  xml_quote_attribute(Base, BaseText, Enc),
2399        format(Out, '~N    xml:base="~w"~n', [BaseText])
2400    ;   true
2401    ),
2402    (   memberchk(document_language(Lang), Options)
2403    ->  format(Out, '~N    xml:lang="~w"', [Lang])
2404    ;   true
2405    ),
2406    format(Out, '>~n', []).
2407rdf_save_header(Out, FileRef, OptionsOut) :-    % compatibility
2408    atom(FileRef),
2409    rdf_save_header(Out, [graph(FileRef)], OptionsOut).
2410
2411xml_encoding(Enc, Encoding) :-
2412    (   xml_encoding_name(Enc, Encoding)
2413    ->  true
2414    ;   throw(error(domain_error(rdf_encoding, Enc), _))
2415    ).
2416
2417xml_encoding_name(ascii,       'US-ASCII').
2418xml_encoding_name(iso_latin_1, 'ISO-8859-1').
2419xml_encoding_name(utf8,        'UTF-8').
2420
2421%!  nsmap(+NSIds, -Map:list(id=uri)) is det.
2422%
2423%   Create a namespace-map that is compatible to xml_write/2
2424%   for dealing with XML-Literals
2425
2426nsmap([], []).
2427nsmap([Id|T0], [Id=URI|T]) :-
2428    ns(Id, URI),
2429    nsmap(T0, T).
2430
2431%!  xml_escape_parameter_entity(+In, -Out) is det.
2432%
2433%   Escape % as &#37; for entity declarations.
2434
2435xml_escape_parameter_entity(In, Out) :-
2436    sub_atom(In, _, _, _, '%'),
2437    !,
2438    atom_codes(In, Codes),
2439    phrase(escape_parent(Codes), OutCodes),
2440    atom_codes(Out, OutCodes).
2441xml_escape_parameter_entity(In, In).
2442
2443escape_parent([]) --> [].
2444escape_parent([H|T]) -->
2445    (   { H == 37 }
2446    ->  "&#37;"
2447    ;   [H]
2448    ),
2449    escape_parent(T).
2450
2451
2452%!  header_namespaces(Options, -List)
2453%
2454%   Get namespaces we will define as entities
2455
2456header_namespaces(Options, List) :-
2457    memberchk(namespaces(NSL0), Options),
2458    !,
2459    sort([rdf,rdfs|NSL0], List).
2460header_namespaces(Options, List) :-
2461    graph(Options, DB),
2462    used_namespace_entities(List, DB).
2463
2464%!  rdf_graph_prefixes(?Graph, -List:ord_set) is det.
2465%!  rdf_graph_prefixes(?Graph, -List:ord_set, :Options) is det.
2466%
2467%   List is a sorted list of  prefixes (namepaces) in Graph. Options
2468%   defined are:
2469%
2470%       * filter(:Filter)
2471%       optional Filter argument is used to filter the results. It
2472%       is called with 3 additional arguments:
2473%
2474%           ==
2475%           call(Filter, Where, Prefix, URI)
2476%           ==
2477%
2478%       The Where argument gives the location of the prefix ans is
2479%       one of =subject=, =predicate=, =object= or =type=. The
2480%       Prefix argument is the potentionally new prefix and URI is
2481%       the full URI that is being processed.
2482%
2483%       * expand(:Goal)
2484%       Hook to generate the graph.  Called using
2485%
2486%           ==
2487%           call(Goal,S,P,O,Graph)
2488%           ==
2489%
2490%       * min_count(+Count)
2491%       Only include prefixes that appear at least N times.  Default
2492%       is 1. Declared prefixes are always returned if found at
2493%       least one time.
2494%
2495%       * get_prefix(:GetPrefix)
2496%       Predicate to extract the candidate prefix from an IRI.  Default
2497%       is iri_xml_namespace/2.
2498
2499
2500:- thread_local
2501    graph_prefix/3.
2502:- meta_predicate
2503    rdf_graph_prefixes(?, -, :).
2504
2505rdf_graph_prefixes(Graph, List) :-
2506    rdf_graph_prefixes(Graph, List, []).
2507
2508rdf_graph_prefixes(Graph, List, M:QOptions) :-
2509    is_list(QOptions),
2510    !,
2511    meta_options(is_meta, M:QOptions, Options),
2512    option(filter(Filter), Options, true),
2513    option(expand(Expand), Options, rdf_db),
2514    option(min_count(MinCount), Options, 1),
2515    option(get_prefix(GetPrefix), Options, iri_xml_namespace),
2516    call_cleanup(prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix),
2517                 retractall(graph_prefix(_,_,_))),
2518    sort(Prefixes, List).
2519rdf_graph_prefixes(Graph, List, M:Filter) :-
2520    rdf_graph_prefixes(Graph, List, M:[filter(Filter)]).
2521
2522is_meta(filter).
2523is_meta(expand).
2524is_meta(get_prefix).
2525
2526
2527prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix) :-
2528    (   call(Expand, S, P, O, Graph),
2529        add_ns(subject, GetPrefix, Filter, S, MinCount, s(S)),
2530        add_ns(predicate, GetPrefix, Filter, P, MinCount, sp(S,P)),
2531        add_ns_obj(GetPrefix, Filter, O, MinCount, spo(S,P,O)),
2532        fail
2533    ;   true
2534    ),
2535    findall(Prefix, graph_prefix(Prefix, MinCount, _), Prefixes).
2536
2537add_ns(Where, GetPrefix, Filter, S, MinCount, Context) :-
2538    \+ rdf_is_bnode(S),
2539    call(GetPrefix, S, Full),
2540    Full \== '',
2541    !,
2542    (   graph_prefix(Full, MinCount, _)
2543    ->  true
2544    ;   Filter == true
2545    ->  add_ns(Full, Context)
2546    ;   call(Filter, Where, Full, S)
2547    ->  add_ns(Full, Context)
2548    ;   true
2549    ).
2550add_ns(_, _, _, _, _, _).
2551
2552add_ns(Full, Context) :-
2553    graph_prefix(Full, _, Contexts),
2554    memberchk(Context, Contexts),
2555    !.
2556add_ns(Full, Context) :-
2557    retract(graph_prefix(Full, C0, Contexts)),
2558    !,
2559    C1 is C0+1,
2560    asserta(graph_prefix(Full, C1, [Context|Contexts])).
2561add_ns(Full, _) :-
2562    ns(_, Full),
2563    !,
2564    asserta(graph_prefix(Full, _, _)).
2565add_ns(Full, Context) :-
2566    asserta(graph_prefix(Full, 1, [Context])).
2567
2568
2569add_ns_obj(GetPrefix, Filter, O, MinCount, Context) :-
2570    atom(O),
2571    !,
2572    add_ns(object, GetPrefix, Filter, O, MinCount, Context).
2573add_ns_obj(GetPrefix, Filter, literal(type(Type, _)), MinCount, _) :-
2574    atom(Type),
2575    !,
2576    add_ns(type, GetPrefix, Filter, Type, MinCount, t(Type)).
2577add_ns_obj(_, _, _, _, _).
2578
2579
2580%!  used_namespace_entities(-List, ?Graph) is det.
2581%
2582%   Return the namespace aliases that are actually used in Graph. In
2583%   addition, this predicate creates ns<N>   aliases  for namespaces
2584%   used in predicates because RDF/XML cannot write predicates other
2585%   than as an XML name.
2586
2587used_namespace_entities(List, Graph) :-
2588    decl_used_predicate_ns(Graph),
2589    used_namespaces(List, Graph).
2590
2591used_namespaces(List, DB) :-
2592    rdf_graph_prefixes(DB, FullList),
2593    ns_abbreviations(FullList, List0),
2594    sort([rdf|List0], List).
2595
2596ns_abbreviations([], []).
2597ns_abbreviations([H0|T0], [H|T]) :-
2598    ns(H, H0),
2599    !,
2600    ns_abbreviations(T0, T).
2601ns_abbreviations([_|T0], T) :-
2602    ns_abbreviations(T0, T).
2603
2604
2605/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2606For every URL used as a predicate  we   *MUST*  define a namespace as we
2607cannot use names holding /, :, etc. as XML identifiers.
2608- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2609
2610:- thread_local
2611    predicate_ns/2.
2612
2613decl_used_predicate_ns(DB) :-
2614    retractall(predicate_ns(_,_)),
2615    (   rdf_current_predicate(P, DB),
2616        decl_predicate_ns(P),
2617        fail
2618    ;   true
2619    ).
2620
2621decl_predicate_ns(Pred) :-
2622    predicate_ns(Pred, _),
2623    !.
2624decl_predicate_ns(Pred) :-
2625    rdf_global_id(NS:Local, Pred),
2626    xml_name(Local),
2627    !,
2628    assert(predicate_ns(Pred, NS)).
2629decl_predicate_ns(Pred) :-
2630    atom_codes(Pred, Codes),
2631    append(NSCodes, LocalCodes, Codes),
2632    xml_codes(LocalCodes),
2633    !,
2634    (   NSCodes \== []
2635    ->  atom_codes(NS, NSCodes),
2636        (   ns(Id, NS)
2637        ->  assert(predicate_ns(Pred, Id))
2638        ;   between(1, infinite, N),
2639            atom_concat(ns, N, Id),
2640            \+ ns(Id, _)
2641        ->  rdf_register_ns(Id, NS),
2642            print_message(informational,
2643                          rdf(using_namespace(Id, NS)))
2644        ),
2645        assert(predicate_ns(Pred, Id))
2646    ;   assert(predicate_ns(Pred, -)) % no namespace used
2647    ).
2648
2649xml_codes([]).
2650xml_codes([H|T]) :-
2651    xml_code(H),
2652    xml_codes(T).
2653
2654xml_code(X) :-
2655    code_type(X, csym),
2656    !.
2657xml_code(0'-).                          % Match 0'-
2658
2659
2660%!  rdf_save_footer(Out:stream) is det.
2661%
2662%   Finish XML generation and write the document footer.
2663%
2664%   @see rdf_save_header/2, rdf_save_subject/3.
2665
2666rdf_save_footer(Out) :-
2667    retractall(named_anon(_, _)),
2668    retractall(inlined(_)),
2669    format(Out, '</rdf:RDF>~n', []).
2670
2671%!  rdf_save_non_anon_subject(+Out, +Subject, +Options)
2672%
2673%   Save an object.  Anonymous objects not saved if anon(false)
2674%   is present in the Options list.
2675
2676rdf_save_non_anon_subject(_Out, Subject, Options) :-
2677    rdf_is_bnode(Subject),
2678    (   memberchk(anon(false), Options)
2679    ;   graph(Options, DB),
2680        rdf_db(_, _, Subject, DB)
2681    ),
2682    !.
2683rdf_save_non_anon_subject(Out, Subject, Options) :-
2684    rdf_save_subject(Out, Subject, Options),
2685    flag(rdf_db_saved_subjects, X, X+1).
2686
2687
2688%!  rdf_save_subject(+Out, +Subject:resource, +Options) is det.
2689%
2690%   Save the triples associated to Subject to Out. Options:
2691%
2692%     * graph(+Graph)
2693%     Only save properties from Graph.
2694%     * base_uri(+URI)
2695%     * convert_typed_literal(:Goal)
2696%     * document_language(+XMLLang)
2697%
2698%   @see rdf_save/2 for a description of these options.
2699
2700rdf_save_subject(Out, Subject, Options) :-
2701    is_list(Options),
2702    !,
2703    option(base_uri(BaseURI), Options, '-'),
2704    (   rdf_save_subject(Out, Subject, BaseURI, 0, Options)
2705    ->  format(Out, '~n', [])
2706    ;   throw(error(rdf_save_failed(Subject), 'Internal error'))
2707    ).
2708rdf_save_subject(Out, Subject, DB) :-
2709    (   var(DB)
2710    ->  rdf_save_subject(Out, Subject, [])
2711    ;   rdf_save_subject(Out, Subject, [graph(DB)])
2712    ).
2713
2714
2715%!  rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI,
2716%!                   +Indent:int, +Options) is det.
2717%
2718%   Save properties of Subject.
2719%
2720%   @param Indent   Current indentation
2721
2722rdf_save_subject(_, Subject, _, _, _) :-
2723    inlined(Subject),
2724    !.
2725rdf_save_subject(Out, Subject, BaseURI, Indent, Options) :-
2726    do_save_subject(Out, Subject, BaseURI, Indent, Options).
2727
2728do_save_subject(Out, Subject, BaseURI, Indent, Options) :-
2729    graph(Options, DB),
2730    findall(Pred=Object, rdf_db(Subject, Pred, Object, DB), Atts0),
2731    sort(Atts0, Atts),              % remove duplicates
2732    length(Atts, L),
2733    (   length(Atts0, L0),
2734        Del is L0-L,
2735        Del > 0
2736    ->  print_message(informational,
2737                      rdf(save_removed_duplicates(Del, Subject)))
2738    ;   true
2739    ),
2740    rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options),
2741    flag(rdf_db_saved_triples, X, X+L).
2742
2743rdf_db(Subject, Pred, Object, DB) :-
2744    var(DB),
2745    !,
2746    rdf(Subject, Pred, Object).
2747rdf_db(Subject, Pred, Object, DB) :-
2748    rdf(Subject, Pred, Object, DB:_).
2749
2750%!  rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI,
2751%!                   +Atts:list(Pred=Obj), +Indent:int, +Options) is det.
2752%
2753%   Save triples defined by Atts on Subject.
2754
2755rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :-
2756    rdf_equal(rdf:type, RdfType),
2757    select(RdfType=Type, Atts, Atts1),
2758    \+ rdf_is_bnode(Type),
2759    rdf_id(Type, BaseURI, TypeId),
2760    xml_is_name(TypeId),
2761    !,
2762    format(Out, '~*|<', [Indent]),
2763    rdf_write_id(Out, TypeId),
2764    save_about(Out, BaseURI, Subject, Options),
2765    save_attributes(Atts1, BaseURI, Out, TypeId, Indent, Options).
2766rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :-
2767    format(Out, '~*|<rdf:Description', [Indent]),
2768    save_about(Out, BaseURI, Subject, Options),
2769    save_attributes(Atts, BaseURI, Out, rdf:'Description', Indent, Options).
2770
2771xml_is_name(_NS:Atom) :-
2772    !,
2773    xml_name(Atom).
2774xml_is_name(Atom) :-
2775    xml_name(Atom).
2776
2777%!  save_about(+Out, +BaseURI, +Subject, +Options) is det.
2778%
2779%   Save the rdf:about. If Subject is a  blank node, save the nodeID
2780%   if any.
2781
2782save_about(Out, _BaseURI, Subject, _Options) :-
2783    rdf_is_bnode(Subject),
2784    !,
2785    (   named_anon(Subject, NodeID)
2786    ->  format(Out, ' rdf:nodeID="~w"', [NodeID])
2787    ;   true
2788    ).
2789save_about(Out, BaseURI, Subject, Options) :-
2790    option(encoding(Encoding), Options, utf8),
2791    rdf_value(Subject, BaseURI, QSubject, Encoding),
2792    format(Out, ' rdf:about="~w"', [QSubject]).
2793
2794%!  save_attributes(+List, +BaseURI, +Stream, +Element, +Indent, +Options)
2795%
2796%   Save the attributes.  Short literal attributes are saved in the
2797%   tag.  Others as the content of the description element.  The
2798%   begin tag has already been filled.
2799
2800save_attributes(Atts, BaseURI, Out, Element, Indent, Options) :-
2801    split_attributes(Atts, InTag, InBody, Options),
2802    SubIndent is Indent + 2,
2803    save_attributes2(InTag, BaseURI, tag, Out, SubIndent, Options),
2804    (   InBody == []
2805    ->  format(Out, '/>~n', [])
2806    ;   format(Out, '>~n', []),
2807        save_attributes2(InBody, BaseURI, body, Out, SubIndent, Options),
2808        format(Out, '~N~*|</', [Indent]),
2809        rdf_write_id(Out, Element),
2810        format(Out, '>~n', [])
2811    ).
2812
2813%!  split_attributes(+Attributes, -HeadAttrs, -BodyAttr, Options)
2814%
2815%   Split attribute (Name=Value) list into attributes for the head
2816%   and body. Attributes can only be in the head if they are literal
2817%   and appear only one time in the attribute list.
2818
2819split_attributes(Atts, [], Atts, Options) :-
2820    option(xml_attributes(false), Options),
2821    !.
2822split_attributes(Atts, HeadAttr, BodyAttr, _) :-
2823    duplicate_attributes(Atts, Dupls, Singles),
2824    simple_literal_attributes(Singles, HeadAttr, Rest),
2825    append(Dupls, Rest, BodyAttr).
2826
2827%!  duplicate_attributes(+Attrs, -Duplicates, -Singles)
2828%
2829%   Extract attributes that appear more than onces as we cannot
2830%   dublicate an attribute in the head according to the XML rules.
2831
2832duplicate_attributes([], [], []).
2833duplicate_attributes([H|T], Dupls, Singles) :-
2834    H = (Name=_),
2835    named_attributes(Name, T, D, R),
2836    D \== [],
2837    append([H|D], Dupls2, Dupls),
2838    !,
2839    duplicate_attributes(R, Dupls2, Singles).
2840duplicate_attributes([H|T], Dupls2, [H|Singles]) :-
2841    duplicate_attributes(T, Dupls2, Singles).
2842
2843named_attributes(_, [], [], []) :- !.
2844named_attributes(Name, [H|T], D, R) :-
2845    (   H = (Name=_)
2846    ->  D = [H|DT],
2847        named_attributes(Name, T, DT, R)
2848    ;   R = [H|RT],
2849        named_attributes(Name, T, D, RT)
2850    ).
2851
2852%!  simple_literal_attributes(+Attributes, -Inline, -Body)
2853%
2854%   Split attributes for (literal) attributes to be used in the
2855%   begin-tag and ones that have to go into the body of the description.
2856
2857simple_literal_attributes([], [], []).
2858simple_literal_attributes([H|TA], [H|TI], B) :-
2859    in_tag_attribute(H),
2860    !,
2861    simple_literal_attributes(TA, TI, B).
2862simple_literal_attributes([H|TA], I, [H|TB]) :-
2863    simple_literal_attributes(TA, I, TB).
2864
2865in_tag_attribute(_=literal(Text)) :-
2866    atom(Text),                     % may not have lang qualifier
2867    atom_length(Text, Len),
2868    Len < 60.
2869
2870%!  save_attributes2(+List, +BaseURI, +TagOrBody, +Stream, +Indent, +Options)
2871%
2872%   Save a list of attributes.
2873
2874save_attributes2([], _, _, _, _, _).
2875save_attributes2([H|T], BaseURI, Where, Out, Indent, Options) :-
2876    save_attribute(Where, H, BaseURI, Out, Indent, Options),
2877    save_attributes2(T, BaseURI, Where, Out, Indent, Options).
2878
2879save_attribute(tag, Name=literal(Value), BaseURI, Out, Indent, Options) :-
2880    AttIndent is Indent + 2,
2881    rdf_id(Name, BaseURI, NameText),
2882    option(encoding(Encoding), Options, utf8),
2883    xml_quote_attribute(Value, QVal, Encoding),
2884    format(Out, '~N~*|', [AttIndent]),
2885    rdf_write_id(Out, NameText),
2886    format(Out, '="~w"', [QVal]).
2887save_attribute(body, Name=literal(Literal0), BaseURI, Out, Indent, Options) :-
2888    !,
2889    rdf_id(Name, BaseURI, NameText),
2890    (   memberchk(convert_typed_literal(Converter), Options),
2891        call(Converter, Type, Content, Literal0)
2892    ->  Literal = type(Type, Content)
2893    ;   Literal = Literal0
2894    ),
2895    save_body_literal(Literal, NameText, BaseURI, Out, Indent, Options).
2896save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
2897    rdf_is_bnode(Value),
2898    !,
2899    rdf_id(Name, BaseURI, NameText),
2900    format(Out, '~N~*|<', [Indent]),
2901    rdf_write_id(Out, NameText),
2902    (   named_anon(Value, NodeID)
2903    ->  format(Out, ' rdf:nodeID="~w"/>', [NodeID])
2904    ;   (   rdf(S1, Name, Value),
2905            rdf(S2, P2, Value),
2906            (S1 \== S2 ; Name \== P2)
2907        ->  predicate_property(named_anon(_,_), number_of_clauses(N)),
2908            atom_concat('bn', N, NodeID),
2909            assertz(named_anon(Value, NodeID))
2910        ;   true
2911        ),
2912        SubIndent is Indent + 2,
2913        (   rdf_collection(Value)
2914        ->  save_about(Out, BaseURI, Value, Options),
2915            format(Out, ' rdf:parseType="Collection">~n', []),
2916            rdf_save_list(Out, Value, BaseURI, SubIndent, Options)
2917        ;   format(Out, '>~n', []),
2918            rdf_save_subject(Out, Value, BaseURI, SubIndent, Options)
2919        ),
2920        format(Out, '~N~*|</', [Indent]),
2921        rdf_write_id(Out, NameText),
2922        format(Out, '>~n', [])
2923    ).
2924save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
2925    option(inline(true), Options),
2926    has_attributes(Value, Options),
2927    \+ inlined(Value),
2928    !,
2929    assertz(inlined(Value)),
2930    rdf_id(Name, BaseURI, NameText),
2931    format(Out, '~N~*|<', [Indent]),
2932    rdf_write_id(Out, NameText),
2933    SubIndent is Indent + 2,
2934    (   rdf_collection(Value)
2935    ->  save_about(Out, BaseURI, Value, Options),
2936        format(Out, ' rdf:parseType="Collection">~n', []),
2937        rdf_save_list(Out, Value, BaseURI, SubIndent, Options)
2938    ;   format(Out, '>~n', []),
2939        do_save_subject(Out, Value, BaseURI, SubIndent, Options)
2940    ),
2941    format(Out, '~N~*|</', [Indent]),
2942    rdf_write_id(Out, NameText),
2943    format(Out, '>~n', []).
2944save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
2945    option(encoding(Encoding), Options, utf8),
2946    rdf_value(Value, BaseURI, QVal, Encoding),
2947    rdf_id(Name, BaseURI, NameText),
2948    format(Out, '~N~*|<', [Indent]),
2949    rdf_write_id(Out, NameText),
2950    format(Out, ' rdf:resource="~w"/>', [QVal]).
2951
2952has_attributes(URI, Options) :-
2953    graph(Options, DB),
2954    rdf_db(URI, _, _, DB),
2955    !.
2956
2957%!  save_body_literal(+Literal, +NameText, +BaseURI,
2958%!                    +Out, +Indent, +Options).
2959
2960save_body_literal(lang(Lang, Value),
2961                  NameText, BaseURI, Out, Indent, Options) :-
2962    !,
2963    format(Out, '~N~*|<', [Indent]),
2964    rdf_write_id(Out, NameText),
2965    (   memberchk(document_language(Lang), Options)
2966    ->  write(Out, '>')
2967    ;   rdf_id(Lang, BaseURI, LangText),
2968        format(Out, ' xml:lang="~w">', [LangText])
2969    ),
2970    save_attribute_value(Value, Out, Options),
2971    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
2972save_body_literal(type(Type, DOM),
2973                  NameText, _BaseURI, Out, Indent, Options) :-
2974    rdf_equal(Type, rdf:'XMLLiteral'),
2975    !,
2976    (   atom(DOM)
2977    ->  format(Out, '~N~*|<', [Indent]),
2978        rdf_write_id(Out, NameText),
2979        format(Out, ' rdf:parseType="Literal">~w</', [DOM]),
2980        rdf_write_id(Out, NameText), write(Out, '>')
2981    ;   save_xml_literal(DOM, NameText, Out, Indent, Options)
2982    ).
2983save_body_literal(type(Type, Value),
2984                  NameText, BaseURI, Out, Indent, Options) :-
2985    !,
2986    format(Out, '~N~*|<', [Indent]),
2987    rdf_write_id(Out, NameText),
2988    option(encoding(Encoding), Options, utf8),
2989    rdf_value(Type, BaseURI, QVal, Encoding),
2990    format(Out, ' rdf:datatype="~w">', [QVal]),
2991    save_attribute_value(Value, Out, Options),
2992    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
2993save_body_literal(Literal,
2994                  NameText, _, Out, Indent, Options) :-
2995    atomic(Literal),
2996    !,
2997    format(Out, '~N~*|<', [Indent]),
2998    rdf_write_id(Out, NameText),
2999    write(Out, '>'),
3000    save_attribute_value(Literal, Out, Options),
3001    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
3002save_body_literal(DOM,
3003                  NameText, BaseURI, Out, Indent, Options) :-
3004    rdf_equal(Type, rdf:'XMLLiteral'),
3005    save_body_literal(type(Type, DOM),
3006                      NameText, BaseURI, Out, Indent, Options).
3007
3008save_attribute_value(Value, Out, Options) :-  % strings
3009    (	atom(Value)
3010    ;	string(Value)
3011    ),
3012    !,
3013    option(encoding(Encoding), Options, utf8),
3014    xml_quote_cdata(Value, QVal, Encoding),
3015    write(Out, QVal).
3016save_attribute_value(Value, Out, _Options) :-  % numbers
3017    number(Value),
3018    !,
3019    writeq(Out, Value).             % quoted: preserve floats
3020save_attribute_value(Value, _Out, _Options) :-
3021    throw(error(save_attribute_value(Value), _)).
3022
3023%!  save_xml_literal(+DOM, +Attr, +Out, +Indent, +Options) is det.
3024%
3025%   Save an XMLLiteral value. We already emitted
3026%
3027%           ==
3028%           <prop parseType="literal"
3029%           ==
3030%
3031%   but  not  the  terminating  =|>|=.  We  need  to  establish  the
3032%   namespaces used in the DOM. The   namespaces in the rdf document
3033%   are in the nsmap-option of Options.
3034
3035save_xml_literal(DOM, Attr, Out, Indent, Options) :-
3036    xml_is_dom(DOM),
3037    !,
3038    memberchk(nsmap(NsMap), Options),
3039    id_to_atom(Attr, Atom),
3040    xml_write(Out,
3041              element(Atom, ['rdf:parseType'='Literal'], DOM),
3042              [ header(false),
3043                indent(Indent),
3044                nsmap(NsMap)
3045              ]).
3046save_xml_literal(NoDOM, _, _, _, _) :-
3047    must_be(xml_dom, NoDOM).
3048
3049id_to_atom(NS:Local, Atom) :-
3050    !,
3051    atomic_list_concat([NS,Local], :, Atom).
3052id_to_atom(ID, ID).
3053
3054
3055%!  rdf_collection(+URI) is semidet.
3056%
3057%   True  if  URI  represents  an  RDF    list  that  fits  the  RDF
3058%   parseType=collection syntax. This means it is   a linked list of
3059%   bnode-cells with a rdf:first that is   a  resource, optionally a
3060%   rdf:type that is an rdf:list and the list ends in an rdf:nil.
3061
3062:- rdf_meta
3063    rdf_collection(r),
3064    collection_p(r,r).
3065
3066rdf_collection(rdf:nil) :- !.
3067rdf_collection(Cell) :-
3068    rdf_is_bnode(Cell),
3069    findall(F, rdf(Cell, rdf:first, F), [_]),
3070    findall(F, rdf(Cell, rdf:rest, F), [Rest]),
3071    forall(rdf(Cell, P, V),
3072           collection_p(P, V)),
3073    rdf_collection(Rest).
3074
3075collection_p(rdf:first, V) :- atom(V).
3076collection_p(rdf:rest, _).
3077collection_p(rdf:type, rdf:'List').
3078
3079
3080%!  rdf_save_list(+Out, +List, +BaseURI, +Indent, +Options)
3081
3082rdf_save_list(_, List, _, _, _) :-
3083    rdf_equal(List, rdf:nil),
3084    !.
3085rdf_save_list(Out, List, BaseURI, Indent, Options) :-
3086    rdf_has(List, rdf:first, First),
3087    (   rdf_is_bnode(First)
3088    ->  nl(Out),
3089        rdf_save_subject(Out, First, BaseURI, Indent, Options)
3090    ;   option(encoding(Encoding), Options, utf8),
3091        rdf_value(First, BaseURI, QVal, Encoding),
3092        format(Out, '~N~*|<rdf:Description rdf:about="~w"/>',
3093               [Indent, QVal])
3094    ),
3095    flag(rdf_db_saved_triples, X, X+3),
3096    (   rdf_has(List, rdf:rest, List2),
3097        \+ rdf_equal(List2, rdf:nil)
3098    ->  rdf_save_list(Out, List2, BaseURI, Indent, Options)
3099    ;   true
3100    ).
3101
3102
3103%!  rdf_id(+Resource, +BaseURI, -NSLocal)
3104%
3105%   Generate a NS:Local  name  for   Resource  given  the  indicated
3106%   default namespace. This call is used for elements.
3107
3108rdf_id(Id, BaseURI, Local) :-
3109    assertion(atom(BaseURI)),
3110    atom_concat(BaseURI, Local, Id),
3111    sub_atom(Local, 0, 1, _, #),
3112    !.
3113rdf_id(Id, _, NS:Local) :-
3114    iri_xml_namespace(Id, Full, Local),
3115    ns(NS, Full),
3116    !.
3117rdf_id(Id, _, NS:Local) :-
3118    ns(NS, Full),
3119    Full \== '',
3120    atom_concat(Full, Local, Id),
3121    !.
3122rdf_id(Id, _, Id).
3123
3124
3125%!  rdf_write_id(+Out, +NSLocal) is det.
3126%
3127%   Write an identifier. We cannot use native write on it as both NS
3128%   and Local can be operators.
3129
3130rdf_write_id(Out, NS:Local) :-
3131    !,
3132    format(Out, '~w:~w', [NS, Local]).
3133rdf_write_id(Out, Atom) :-
3134    write(Out, Atom).
3135
3136%!  rdf_value(+Resource, +BaseURI, -Text, +Encoding)
3137%
3138%   According  to  "6.4  RDF  URI  References"  of  the  RDF  Syntax
3139%   specification, a URI reference is  UNICODE string not containing
3140%   control sequences, represented as  UTF-8   and  then  as escaped
3141%   US-ASCII.
3142
3143rdf_value(Base, Base, '', _) :- !.
3144rdf_value(V, Base, Text, Encoding) :-
3145    atom_concat(Base, Local, V),
3146    sub_atom(Local, 0, _, _, #),
3147    !,
3148    xml_quote_attribute(Local, Text, Encoding).
3149rdf_value(V, _, Text, Encoding) :-
3150    ns(NS, Full),
3151    atom_concat(Full, Local, V),
3152    xml_is_name(Local),
3153    !,
3154    xml_quote_attribute(Local, QLocal, Encoding),
3155    atomic_list_concat(['&', NS, (';'), QLocal], Text).
3156rdf_value(V, _, Q, Encoding) :-
3157    xml_quote_attribute(V, Q, Encoding).
3158
3159
3160                 /*******************************
3161                 *       MATCH AND COMPARE      *
3162                 *******************************/
3163
3164%!  rdf_compare(-Dif, +Object1, +Object2) is det.
3165%
3166%   Compare  two  object  terms.  Where  SPARQL  defines  a  partial
3167%   ordering, we define a complete ordering   of terms. The ordering
3168%   is defines as:
3169%
3170%     - Blank nodes < IRIs < Literals
3171%     - Numeric literals < other literals
3172%     - Numeric literals are compared by value and then by type,
3173%       where Integer < Decimal < Double
3174%     - Other literals are compare lexically, case insensitive.
3175%       If equal, uppercase preceeds lowercase.  If still equal,
3176%       the types are compared lexically.
3177
3178%!  rdf_match_label(+How, +Pattern, +Label) is semidet.
3179%
3180%   True if Label matches Pattern according to   How.  How is one of
3181%   `icase`, `substring`, `word`, `prefix` or   `like`. For backward
3182%   compatibility, `exact` is a synonym for `icase`.
3183
3184
3185                 /*******************************
3186                 *      DEPRECATED MATERIAL     *
3187                 *******************************/
3188
3189%!  rdf_split_url(+Prefix, +Local, -URL) is det.
3190%!  rdf_split_url(-Prefix, -Local, +URL) is det.
3191%
3192%   Split/join a URL.  This functionality is moved to library(sgml).
3193%
3194%   @deprecated Use iri_xml_namespace/3. Note that the argument
3195%   order is iri_xml_namespace(+IRI, -Namespace, -Localname).
3196
3197rdf_split_url(Prefix, Local, URL) :-
3198    atomic(URL),
3199    !,
3200    iri_xml_namespace(URL, Prefix, Local).
3201rdf_split_url(Prefix, Local, URL) :-
3202    atom_concat(Prefix, Local, URL).
3203
3204%!  rdf_url_namespace(+URL, -Namespace)
3205%
3206%   Namespace is the namespace of URL.
3207%
3208%   @deprecated Use iri_xml_namespace/2
3209
3210rdf_url_namespace(URL, Prefix) :-
3211    iri_xml_namespace(URL, Prefix).
3212
3213
3214                 /*******************************
3215                 *            LITERALS          *
3216                 *******************************/
3217
3218%!  rdf_new_literal_map(-Map) is det.
3219%
3220%   Create a new literal map, returning an opaque handle.
3221
3222%!  rdf_destroy_literal_map(+Map) is det.
3223%
3224%   Destroy a literal map. After this call,   further use of the Map
3225%   handle is illegal. Additional synchronisation  is needed if maps
3226%   that are shared between threads are   destroyed to guarantee the
3227%   handle    is    no    longer    used.    In    some    scenarios
3228%   rdf_reset_literal_map/1 provides a safe alternative.
3229
3230%!  rdf_reset_literal_map(+Map) is det.
3231%
3232%   Delete all content from the literal map.
3233
3234%!  rdf_insert_literal_map(+Map, +Key, +Value) is det.
3235%
3236%   Add a relation between  Key  and  Value   to  the  map.  If this
3237%   relation already exists no action is performed.
3238
3239%!  rdf_insert_literal_map(+Map, +Key, +Value, -KeyCount) is det.
3240%
3241%   As rdf_insert_literal_map/3. In addition, if Key is a new key in
3242%   Map, unify KeyCount with the number of  keys in Map. This serves
3243%   two purposes. Derived maps, such as  the stem and metaphone maps
3244%   need to know about new  keys   and  it avoids additional foreign
3245%   calls for doing the progress in rdf_litindex.pl.
3246
3247%!  rdf_delete_literal_map(+Map, +Key) is det.
3248%
3249%   Delete Key and all associated values from the map.
3250
3251%!  rdf_delete_literal_map(+Map, +Key, +Value) is det.
3252%
3253%   Delete the association between Key and Value from the map.
3254
3255%!  rdf_find_literal_map(+Map, +KeyList, -ValueList) is det.
3256%
3257%   Unify ValueList with an ordered set  of values associated to all
3258%   keys from KeyList. Each key in  KeyList   is  either an atom, an
3259%   integer or a term not(Key).  If   not-terms  are provided, there
3260%   must be at least one positive keywords. The negations are tested
3261%   after establishing the positive matches.
3262
3263%!  rdf_keys_in_literal_map(+Map, +Spec, -Answer) is det.
3264%
3265%   Realises various queries on the key-set:
3266%
3267%     * all
3268%
3269%     Unify Answer with an ordered list of all keys.
3270%     * key(+Key)
3271%
3272%     Succeeds if Key is a key in the map and unify Answer with the
3273%     number of values associated with the key. This provides a fast
3274%     test of existence without fetching the possibly large
3275%     associated value set as with rdf_find_literal_map/3.
3276%
3277%     * prefix(+Prefix)
3278%     Unify Answer with an ordered set of all keys that have the
3279%     given prefix. See section 3.1 for details on prefix matching.
3280%     Prefix must be an atom. This call is intended for
3281%     auto-completion in user interfaces.
3282%
3283%     * ge(+Min)
3284%     Unify Answer with all keys that are larger or equal to the
3285%     integer Min.
3286%
3287%     * le(+Max)
3288%     Unify Answer with all keys that are smaller or equal to the integer
3289%     Max.
3290%
3291%     * between(+Min, +Max) Unify
3292%     Answer with all keys between Min and Max (including).
3293
3294%!  rdf_statistics_literal_map(+Map, -KeyValue)
3295%
3296%   Query some statistics of the map. Provides KeyValue are:
3297%
3298%     * size(-Keys, -Relations)
3299%     Unify Keys with the total key-count of the index and Relation
3300%     with the total Key-Value count.
3301
3302
3303
3304                 /*******************************
3305                 *             MISC             *
3306                 *******************************/
3307
3308%!  rdf_version(-Version) is det.
3309%
3310%   True when Version is the numerical version-id of this library.
3311%   The version is computed as
3312%
3313%           Major*10000 + Minor*100 + Patch.
3314
3315%!  rdf_set(+Term) is det.
3316%
3317%   Set properties of the RDF store.  Currently defines:
3318%
3319%     * hash(+Hash, +Parameter, +Value)
3320%     Set properties for a triple index.  Hash is one of =s=,
3321%     =p=, =sp=, =o=, =po=, =spo=, =g=, =sg= or =pg=.  Parameter
3322%     is one of:
3323%
3324%       - size
3325%       Value defines the number of entries in the hash-table.
3326%       Value is rounded _down_ to a power of 2.  After setting
3327%       the size explicitly, auto-sizing for this table is
3328%       disabled.  Setting the size smaller than the current
3329%       size results in a =permission_error= exception.
3330%
3331%       - average_chain_len
3332%       Set maximum average collision number for the hash.
3333%
3334%       - optimize_threshold
3335%       Related to resizing hash-tables.  If 0, all triples are
3336%       moved to the new size by the garbage collector.  If more
3337%       then zero, those of the last Value resize steps remain at
3338%       their current location.  Leaving cells at their current
3339%       location reduces memory fragmentation and slows down
3340%       access.
3341
3342%!  rdf_md5(+Graph, -MD5) is det.
3343%
3344%   True when MD5 is the MD5 hash for  all triples in graph. The MD5
3345%   digest itself is represented as an   atom holding a 32-character
3346%   hexadecimal   string.   The   library   maintains   the   digest
3347%   incrementally on rdf_load/[1,2], rdf_load_db/1, rdf_assert/[3,4]
3348%   and  rdf_retractall/[3,4].  Checking  whether   the  digest  has
3349%   changed since the last rdf_load/[1,2]  call provides a practical
3350%   means for checking whether the file needs to be saved.
3351%
3352%   @deprecated New code should use rdf_graph_property(Graph,
3353%   hash(Hash)).
3354
3355%!  rdf_generation(-Generation) is det.
3356%
3357%   True when Generation is the current  generation of the database.
3358%   Each modification to the database  increments the generation. It
3359%   can be used to check the validity of cached results deduced from
3360%   the database. Committing a non-empty  transaction increments the
3361%   generation by one.
3362%
3363%   When inside a transaction,  Generation  is   unified  to  a term
3364%   _TransactionStartGen_ + _InsideTransactionGen_. E.g.,  4+3 means
3365%   that the transaction was started at   generation 4 of the global
3366%   database and we have  created  3   new  generations  inside  the
3367%   transaction. Note that this choice  of representation allows for
3368%   comparing  generations  using  Prolog  arithmetic.  Comparing  a
3369%   generation in one  transaction  with   a  generation  in another
3370%   transaction is meaningless.
3371
3372%!  rdf_estimate_complexity(?Subject, ?Predicate, ?Object, -Complexity)
3373%
3374%   Return the number of alternatives as   indicated by the database
3375%   internal hashed indexing. This is a rough measure for the number
3376%   of alternatives we can expect for   an  rdf_has/3 call using the
3377%   given three arguments. When  called   with  three variables, the
3378%   total number of triples is returned.   This  estimate is used in
3379%   query  optimisation.  See  also    rdf_predicate_property/2  and
3380%   rdf_statistics/1 for additional information to help optimizers.
3381
3382%!  rdf_debug(+Level) is det.
3383%
3384%   Set debugging to Level.  Level is an integer 0..9.  Default is
3385%   0 no debugging.
3386
3387%!  rdf_atom_md5(+Text, +Times, -MD5) is det.
3388%
3389%   Computes the MD5 hash from Text, which is an atom, string or list of
3390%   character codes. Times is  an  integer  >=   1.  When  >  0, the MD5
3391%   algorithm is repeated Times times on the generated hash. This can be
3392%   used for password encryption algorithms   to  make generate-and-test
3393%   loops slow.
3394%
3395%   @deprecated Obviously, password hash  primitives   do  not belong in
3396%   this library. The  library(crypto)  from   the  \const{ssl}  package
3397%   provides extensive support for  hashes.   The  \const{clib}  package
3398%   provides library(crypt) to  access  the   OS  (Unix)  password  hash
3399%   implementation as well as  lightweight   implementations  of several
3400%   popular hashes.
3401
3402
3403                 /*******************************
3404                 *             MESSAGES         *
3405                 *******************************/
3406
3407:- multifile
3408    prolog:message//1.
3409
3410prolog:message(rdf(Term)) -->
3411    message(Term).
3412
3413message(loaded(How, What, BaseURI, Triples, Time)) -->
3414    how(How),
3415    source(What),
3416    into(What, BaseURI),
3417    in_time(Triples, Time).
3418message(save_removed_duplicates(N, Subject)) -->
3419    [ 'Removed ~d duplicate triples about "~p"'-[N,Subject] ].
3420message(saved(File, SavedSubjects, SavedTriples)) -->
3421    [ 'Saved ~D triples about ~D subjects into ~p'-
3422      [SavedTriples, SavedSubjects, File]
3423    ].
3424message(using_namespace(Id, NS)) -->
3425    [ 'Using namespace id ~w for ~w'-[Id, NS] ].
3426message(inconsistent_cache(DB, Graphs)) -->
3427    [ 'RDF cache file for ~w contains the following graphs'-[DB], nl,
3428      '~t~8|~p'-[Graphs]
3429    ].
3430message(guess_format(Ext)) -->
3431    [ 'Unknown file-extension: ~w.  Assuming RDF/XML'-[Ext] ].
3432message(meta(not_expanded(G))) -->
3433    [ 'rdf_meta/1: ~p is not expanded'-[G] ].
3434message(deprecated(rdf_unload(Graph))) -->
3435    [ 'rdf_unload/1: Use ~q'-[rdf_unload_graph(Graph)] ].
3436
3437
3438how(load)   --> [ 'Loaded' ].
3439how(parsed) --> [ 'Parsed' ].
3440
3441source(SourceURL) -->
3442    { uri_file_name(SourceURL, File),
3443      !,
3444      file_base_name(File, Base)    % TBD: relative file?
3445    },
3446    [ ' "~w"'-[Base] ].
3447source(SourceURL) -->
3448    [ ' "~w"'-[SourceURL] ].
3449
3450into(_, _) --> [].                      % TBD
3451
3452in_time(Triples, ParseTime) -->
3453    [ ' in ~2f sec; ~D triples'-[ParseTime, Triples]
3454    ].
3455