1/*  $Id$
2
3    Part of SWI-Prolog
4
5    Author:        Jan Wielemaker
6    E-mail:        wielemak@science.uva.nl
7    WWW:           http://www.swi-prolog.org
8    Copyright (C): 2002-2007, University of Amsterdam
9
10    This program is free software; you can redistribute it and/or
11    modify it under the terms of the GNU General Public License
12    as published by the Free Software Foundation; either version 2
13    of the License, or (at your option) any later version.
14
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19
20    You should have received a copy of the GNU General Public
21    License along with this library; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23
24    As a special exception, if you link this library with other files,
25    compiled with a Free Software compiler, to produce an executable, this
26    library does not by itself cause the resulting executable to be covered
27    by the GNU General Public License. This exception does not however
28    invalidate any other reasons why the executable file might be covered by
29    the GNU General Public License.
30*/
31
32
33:- module(rdf,
34	  [ load_rdf/2,			% +File, -Triples
35	    load_rdf/3,			% +File, -Triples, :Options
36	    xml_to_rdf/3,		% +XML, -Triples, +Options
37	    process_rdf/3		% +File, :OnTriples, :Options
38	  ]).
39
40:- expects_dialect(swi).
41:- assert(system:swi_io).
42
43:- meta_predicate
44	load_rdf(+, -, :),
45	process_rdf(+, :, :).
46
47:- use_module(library(sgml)).		% Basic XML loading
48:- use_module(library(option)).		% option/3
49:- use_module(library(lists)).
50:- use_module(rdf_parser).		% Basic parser
51:- use_module(rdf_triple).		% Generate triples
52
53%%	load_rdf(+File, -Triples) is det.
54%%	load_rdf(+File, -Triples, :Options) is det.
55%
56%	Parse an XML file holding an RDF term into a list of RDF triples.
57%	see rdf_triple.pl for a definition of the output format. Options:
58%
59%		* base_uri(+URI)
60%		URI to use as base
61%
62%		* expand_foreach(+Bool)
63%		Apply each(Container, Pred, Object) on the members of
64%		Container
65%
66%		* namespaces(-Namespaces:list(NS=URL))
67%		Return list of namespaces declared using xmlns:NS=URL in
68%		the document.  This can be used to update the namespace
69%		list with rdf_register_ns/2.
70%
71%	@see	Use process_rdf/3 for processing large documents in
72%		_|call-back|_ style.
73
74load_rdf(File, Triples) :-
75	load_rdf(File, Triples, []).
76
77load_rdf(File, Triples, M:Options0) :-
78	entity_options(Options0, EntOptions, Options1),
79	meta_options(load_meta_option, M:Options1, Options),
80	init_ns_collect(Options, NSList),
81	load_structure(File,
82		       [ RDFElement
83		       ],
84		       [ dialect(xmlns),
85			 space(sgml),
86			 call(xmlns, rdf:on_xmlns)
87		       | EntOptions
88		       ]),
89	rdf_start_file(Options, Cleanup),
90	call_cleanup(xml_to_rdf(RDFElement, Triples0, Options),
91		     rdf_end_file(Cleanup)),
92	exit_ns_collect(NSList),
93	post_process(Options, Triples0, Triples).
94
95entity_options([], [], []).
96entity_options([H|T0], Entities, Rest) :-
97	(   H = entity(_,_)
98	->  Entities = [H|ET],
99	    entity_options(T0, ET, Rest)
100	;   Rest = [H|RT],
101	    entity_options(T0, Entities, RT)
102	).
103
104load_meta_option(convert_typed_literal).
105
106%%	xml_to_rdf(+XML, -Triples, +Options)
107
108xml_to_rdf(XML, Triples, Options) :-
109	is_list(Options), !,
110	make_rdf_state(Options, State, _),
111	xml_to_plrdf(XML, RDF, State),
112	rdf_triples(RDF, Triples).
113xml_to_rdf(XML, BaseURI, Triples) :-
114	atom(BaseURI), !,
115	xml_to_rdf(XML, Triples, [base_uri(BaseURI)]).
116
117
118		 /*******************************
119		 *	 POST-PROCESSING	*
120		 *******************************/
121
122post_process([], Triples, Triples).
123post_process([expand_foreach(true)|T], Triples0, Triples) :- !,
124	expand_each(Triples0, Triples1),
125	post_process(T, Triples1, Triples).
126post_process([_|T], Triples0, Triples) :- !,
127	post_process(T, Triples0, Triples).
128
129
130		 /*******************************
131		 *	      EXPAND		*
132		 *******************************/
133
134expand_each(Triples0, Triples) :-
135	select(rdf(each(Container), Pred, Object),
136	       Triples0, Triples1), !,
137	each_triples(Triples1, Container, Pred, Object, Triples2),
138	expand_each(Triples2, Triples).
139expand_each(Triples, Triples).
140
141each_triples([], _, _, _, []).
142each_triples([H0|T0], Container, P, O,
143	     [H0, rdf(S,P,O)|T]) :-
144	H0 = rdf(Container, rdf:A, S),
145	member_attribute(A), !,
146	each_triples(T0, Container, P, O, T).
147each_triples([H|T0], Container, P, O, [H|T]) :-
148	each_triples(T0, Container, P, O, T).
149
150member_attribute(A) :-
151	sub_atom(A, 0, _, _, '_').	% must check number?
152
153
154		 /*******************************
155		 *	     BIG FILES		*
156		 *******************************/
157
158%%	process_rdf(+Input, :OnObject, :Options)
159%
160%	Process RDF from Input. Input is either an atom or a term of the
161%	format stream(Handle). For each   encountered  description, call
162%	OnObject(+Triples) to handle the  triples   resulting  from  the
163%	description. Defined Options are:
164%
165%		* base_uri(+URI)
166%		Determines the reference URI.
167%
168%		* db(DB)
169%		When loading from a stream, the source is taken from
170%		this option or -if non-existent- from base_uri.
171%
172%		* lang(LanguageID)
173%		Set initial language (as xml:lang)
174%
175%		* convert_typed_literal(:Convertor)
176%		Call Convertor(+Type, +Content, -RDFObject) to create
177%		a triple rdf(S, P, RDFObject) instead of rdf(S, P,
178%		literal(type(Type, Content)).
179%
180%		*  namespaces(-Namespaces:list(NS=URL))
181%		Return list of namespaces declared using xmlns:NS=URL in
182%		the document.  This can be used to update the namespace
183%		list with rdf_register_ns/2.
184%
185%		* entity(Name, Value)
186%		Overrule entity values found in the file
187%
188%		* embedded(Boolean)
189%		If =true=, do not give warnings if rdf:RDF is embedded
190%		in other XML data.
191
192process_rdf(File, OnObject, M:Options0) :-
193	is_list(Options0), !,
194	entity_options(Options0, EntOptions, Options1),
195	meta_options(load_meta_option, M:Options1, Options2),
196	process_options(Options2, ProcessOptions, Options),
197	option(base_uri(BaseURI), Options, ''),
198	rdf_start_file(Options, Cleanup),
199	strip_module(OnObject, Module, Pred),
200	nb_setval(rdf_object_handler, Module:Pred),
201	nb_setval(rdf_options, Options),
202	nb_setval(rdf_state, -),
203	init_ns_collect(Options, NSList),
204	(   File = stream(In)
205	->  Source = BaseURI
206	;   is_stream(File)
207	->  In = File,
208	    option(db(Source), Options, BaseURI)
209	;   open(File, read, In, [type(binary)]),
210	    Close = In,
211	    Source = File
212	),
213	new_sgml_parser(Parser, [dtd(DTD)]),
214	def_entities(EntOptions, DTD),
215	set_sgml_parser(Parser, file(Source)),
216	set_sgml_parser(Parser, dialect(xmlns)),
217	set_sgml_parser(Parser, space(sgml)),
218	do_process_rdf(Parser, In, NSList, Close, Cleanup, ProcessOptions).
219process_rdf(File, BaseURI, OnObject) :-
220	process_rdf(File, OnObject, [base_uri(BaseURI)]).
221
222def_entities([], _).
223def_entities([entity(Name, Value)|T], DTD) :- !,
224	def_entity(DTD, Name, Value),
225	def_entities(T, DTD).
226def_entities([_|T0], DTD) :-
227	def_entities(T0, DTD).
228
229def_entity(DTD, Name, Value) :-
230	open_dtd(DTD, [], Stream),
231	xml_quote_attribute(Value, QValue),
232	format(Stream, '<!ENTITY ~w "~w">~n', [Name, QValue]),
233	close(Stream).
234
235
236do_process_rdf(Parser, In, NSList, Close, Cleanup, Options) :-
237	call_cleanup((   sgml_parse(Parser,
238				    [ source(In),
239				      call(begin, rdf:on_begin),
240				      call(xmlns, rdf:on_xmlns)
241				    | Options
242				    ]),
243			 exit_ns_collect(NSList)
244		     ),
245		     cleanup_process(Close, Cleanup, Parser)).
246
247cleanup_process(In, Cleanup, Parser) :-
248	(   var(In)
249	->  true
250	;   close(In)
251	),
252	free_sgml_parser(Parser),
253	nb_delete(rdf_options),
254	nb_delete(rdf_object_handler),
255	nb_delete(rdf_state),
256	nb_delete(rdf_nslist),
257	rdf_end_file(Cleanup).
258
259on_begin(NS:'RDF', Attr, _) :-
260	rdf_name_space(NS), !,
261	nb_getval(rdf_options, Options),
262	make_rdf_state(Options, State0, _),
263	rdf_modify_state(Attr, State0, State),
264	nb_setval(rdf_state, State).
265on_begin(Tag, Attr, Parser) :-
266	nb_getval(rdf_state, State),
267	(   State == (-)
268	->  nb_getval(rdf_options, RdfOptions),
269	    (	memberchk(embedded(true), RdfOptions)
270	    ->	true
271	    ;	print_message(warning, rdf(unexpected(Tag, Parser)))
272	    )
273	;   get_sgml_parser(Parser, line(Start)),
274	    get_sgml_parser(Parser, file(File)),
275	    sgml_parse(Parser,
276		       [ document(Content),
277			 parse(content)
278		       ]),
279	    nb_getval(rdf_object_handler, OnTriples),
280	    element_to_plrdf(element(Tag, Attr, Content), Objects, State),
281	    rdf_triples(Objects, Triples),
282	    call(OnTriples, Triples, File:Start)
283	).
284
285%%	on_xmlns(+NS, +URL, +Parser)
286%
287%	Build up the list of   encountered xmlns:NS=URL declarations. We
288%	use  destructive  assignment  here   as    an   alternative   to
289%	assert/retract, ensuring thread-safety and better performance.
290
291on_xmlns(NS, URL, _Parser) :-
292	(   nb_getval(rdf_nslist, List),
293	    List = list(L0)
294	->  nb_linkarg(1, List, [NS=URL|L0])
295	;   true
296	).
297
298init_ns_collect(Options, NSList) :-
299	(   option(namespaces(NSList), Options, -),
300	    NSList \== (-)
301	->  nb_setval(rdf_nslist, list([]))
302	;   nb_setval(rdf_nslist, -),
303	    NSList = (-)
304	).
305
306exit_ns_collect(NSList) :-
307	(   NSList == (-)
308	->  true
309	;   nb_getval(rdf_nslist, list(NSList))
310	).
311
312
313process_options(Options, Process, RestOptions) :-
314	select_option(content_length(Len), Options, RestOptions), !,
315	Process = [content_length(Len)].
316process_options(Options, [], Options).
317
318
319		 /*******************************
320		 *	      MESSAGES		*
321		 *******************************/
322
323:- multifile
324	prolog:message/3.
325
326%	Catch messages.  sgml/4 is generated by the SGML2PL binding.
327
328prolog:message(rdf(unparsed(Data))) -->
329	{ phrase(unparse_xml(Data), XML)
330	},
331	[ 'RDF: Failed to interpret "~s"'-[XML] ].
332prolog:message(rdf(shared_blank_nodes(N))) -->
333	[ 'RDF: Shared ~D blank nodes'-[N] ].
334prolog:message(rdf(not_a_name(Name))) -->
335	[ 'RDF: argument to rdf:ID is not an XML name: ~p'-[Name] ].
336prolog:message(rdf(redefined_id(Id))) -->
337	[ 'RDF: rdf:ID ~p: multiple definitions'-[Id] ].
338prolog:message(rdf(unexpected(Tag, Parser))) -->
339	{ get_sgml_parser(Parser, file(File)),
340	  get_sgml_parser(Parser, line(Line))
341	},
342	[ 'RDF: ~w:~d: Unexpected element ~w'-[File, Line, Tag] ].
343
344
345		 /*******************************
346		 *	    XML-TO-TEXT		*
347		 *******************************/
348
349unparse_xml([]) --> !,
350	[].
351unparse_xml([H|T]) --> !,
352	unparse_xml(H),
353	unparse_xml(T).
354unparse_xml(Atom) -->
355	{ atom(Atom)
356	}, !,
357	atom(Atom).
358unparse_xml(element(Name, Attr, Content)) -->
359	"<",
360	identifier(Name),
361	attributes(Attr),
362	(   { Content == []
363	    }
364	->  "/>"
365	;   ">",
366	    unparse_xml(Content)
367	).
368
369attributes([]) -->
370	[].
371attributes([H|T]) -->
372	attribute(H),
373	attributes(T).
374
375attribute(Name=Value) -->
376	" ",
377	identifier(Name),
378	"=",
379	value(Value).
380
381identifier(NS:Local) --> !,
382	"{", atom(NS), "}",
383	atom(Local).
384identifier(Local) -->
385	atom(Local).
386
387atom(Atom, Text, Rest) :-
388	atom_codes(Atom, Chars),
389	append(Chars, Rest, Text).
390
391value(Value) -->
392	{ atom_codes(Value, Chars)
393	},
394	"\"",
395	quoted(Chars),
396	"\"".
397
398quoted([]) -->
399	[].
400quoted([H|T]) -->
401	quote(H), !,
402	quoted(T).
403
404quote(0'<) --> "&lt;".
405quote(0'>) --> "&gt;".
406quote(0'") --> "&quot;".
407quote(0'&) --> "&amp;".
408quote(X)   --> [X].
409
410
411		 /*******************************
412		 *	       XREF		*
413		 *******************************/
414
415:- multifile prolog:meta_goal/2.
416prolog:meta_goal(process_rdf(_,G,_), [G+2]).
417
418:- retract(system:swi_io).	%
419