1/* $Id$ 2 3 Part of SWI-Prolog 4 5 Author: Jan Wielemaker 6 E-mail: wielemak@science.uva.nl 7 WWW: http://www.swi-prolog.org 8 Copyright (C): 2002-2007, University of Amsterdam 9 10 This program is free software; you can redistribute it and/or 11 modify it under the terms of the GNU General Public License 12 as published by the Free Software Foundation; either version 2 13 of the License, or (at your option) any later version. 14 15 This program is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public 21 License along with this library; if not, write to the Free Software 22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 24 As a special exception, if you link this library with other files, 25 compiled with a Free Software compiler, to produce an executable, this 26 library does not by itself cause the resulting executable to be covered 27 by the GNU General Public License. This exception does not however 28 invalidate any other reasons why the executable file might be covered by 29 the GNU General Public License. 30*/ 31 32 33:- module(rdf, 34 [ load_rdf/2, % +File, -Triples 35 load_rdf/3, % +File, -Triples, :Options 36 xml_to_rdf/3, % +XML, -Triples, +Options 37 process_rdf/3 % +File, :OnTriples, :Options 38 ]). 39 40:- expects_dialect(swi). 41:- assert(system:swi_io). 42 43:- meta_predicate 44 load_rdf(+, -, :), 45 process_rdf(+, :, :). 46 47:- use_module(library(sgml)). % Basic XML loading 48:- use_module(library(option)). % option/3 49:- use_module(library(lists)). 50:- use_module(rdf_parser). % Basic parser 51:- use_module(rdf_triple). % Generate triples 52 53%% load_rdf(+File, -Triples) is det. 54%% load_rdf(+File, -Triples, :Options) is det. 55% 56% Parse an XML file holding an RDF term into a list of RDF triples. 57% see rdf_triple.pl for a definition of the output format. Options: 58% 59% * base_uri(+URI) 60% URI to use as base 61% 62% * expand_foreach(+Bool) 63% Apply each(Container, Pred, Object) on the members of 64% Container 65% 66% * namespaces(-Namespaces:list(NS=URL)) 67% Return list of namespaces declared using xmlns:NS=URL in 68% the document. This can be used to update the namespace 69% list with rdf_register_ns/2. 70% 71% @see Use process_rdf/3 for processing large documents in 72% _|call-back|_ style. 73 74load_rdf(File, Triples) :- 75 load_rdf(File, Triples, []). 76 77load_rdf(File, Triples, M:Options0) :- 78 entity_options(Options0, EntOptions, Options1), 79 meta_options(load_meta_option, M:Options1, Options), 80 init_ns_collect(Options, NSList), 81 load_structure(File, 82 [ RDFElement 83 ], 84 [ dialect(xmlns), 85 space(sgml), 86 call(xmlns, rdf:on_xmlns) 87 | EntOptions 88 ]), 89 rdf_start_file(Options, Cleanup), 90 call_cleanup(xml_to_rdf(RDFElement, Triples0, Options), 91 rdf_end_file(Cleanup)), 92 exit_ns_collect(NSList), 93 post_process(Options, Triples0, Triples). 94 95entity_options([], [], []). 96entity_options([H|T0], Entities, Rest) :- 97 ( H = entity(_,_) 98 -> Entities = [H|ET], 99 entity_options(T0, ET, Rest) 100 ; Rest = [H|RT], 101 entity_options(T0, Entities, RT) 102 ). 103 104load_meta_option(convert_typed_literal). 105 106%% xml_to_rdf(+XML, -Triples, +Options) 107 108xml_to_rdf(XML, Triples, Options) :- 109 is_list(Options), !, 110 make_rdf_state(Options, State, _), 111 xml_to_plrdf(XML, RDF, State), 112 rdf_triples(RDF, Triples). 113xml_to_rdf(XML, BaseURI, Triples) :- 114 atom(BaseURI), !, 115 xml_to_rdf(XML, Triples, [base_uri(BaseURI)]). 116 117 118 /******************************* 119 * POST-PROCESSING * 120 *******************************/ 121 122post_process([], Triples, Triples). 123post_process([expand_foreach(true)|T], Triples0, Triples) :- !, 124 expand_each(Triples0, Triples1), 125 post_process(T, Triples1, Triples). 126post_process([_|T], Triples0, Triples) :- !, 127 post_process(T, Triples0, Triples). 128 129 130 /******************************* 131 * EXPAND * 132 *******************************/ 133 134expand_each(Triples0, Triples) :- 135 select(rdf(each(Container), Pred, Object), 136 Triples0, Triples1), !, 137 each_triples(Triples1, Container, Pred, Object, Triples2), 138 expand_each(Triples2, Triples). 139expand_each(Triples, Triples). 140 141each_triples([], _, _, _, []). 142each_triples([H0|T0], Container, P, O, 143 [H0, rdf(S,P,O)|T]) :- 144 H0 = rdf(Container, rdf:A, S), 145 member_attribute(A), !, 146 each_triples(T0, Container, P, O, T). 147each_triples([H|T0], Container, P, O, [H|T]) :- 148 each_triples(T0, Container, P, O, T). 149 150member_attribute(A) :- 151 sub_atom(A, 0, _, _, '_'). % must check number? 152 153 154 /******************************* 155 * BIG FILES * 156 *******************************/ 157 158%% process_rdf(+Input, :OnObject, :Options) 159% 160% Process RDF from Input. Input is either an atom or a term of the 161% format stream(Handle). For each encountered description, call 162% OnObject(+Triples) to handle the triples resulting from the 163% description. Defined Options are: 164% 165% * base_uri(+URI) 166% Determines the reference URI. 167% 168% * db(DB) 169% When loading from a stream, the source is taken from 170% this option or -if non-existent- from base_uri. 171% 172% * lang(LanguageID) 173% Set initial language (as xml:lang) 174% 175% * convert_typed_literal(:Convertor) 176% Call Convertor(+Type, +Content, -RDFObject) to create 177% a triple rdf(S, P, RDFObject) instead of rdf(S, P, 178% literal(type(Type, Content)). 179% 180% * namespaces(-Namespaces:list(NS=URL)) 181% Return list of namespaces declared using xmlns:NS=URL in 182% the document. This can be used to update the namespace 183% list with rdf_register_ns/2. 184% 185% * entity(Name, Value) 186% Overrule entity values found in the file 187% 188% * embedded(Boolean) 189% If =true=, do not give warnings if rdf:RDF is embedded 190% in other XML data. 191 192process_rdf(File, OnObject, M:Options0) :- 193 is_list(Options0), !, 194 entity_options(Options0, EntOptions, Options1), 195 meta_options(load_meta_option, M:Options1, Options2), 196 process_options(Options2, ProcessOptions, Options), 197 option(base_uri(BaseURI), Options, ''), 198 rdf_start_file(Options, Cleanup), 199 strip_module(OnObject, Module, Pred), 200 nb_setval(rdf_object_handler, Module:Pred), 201 nb_setval(rdf_options, Options), 202 nb_setval(rdf_state, -), 203 init_ns_collect(Options, NSList), 204 ( File = stream(In) 205 -> Source = BaseURI 206 ; is_stream(File) 207 -> In = File, 208 option(db(Source), Options, BaseURI) 209 ; open(File, read, In, [type(binary)]), 210 Close = In, 211 Source = File 212 ), 213 new_sgml_parser(Parser, [dtd(DTD)]), 214 def_entities(EntOptions, DTD), 215 set_sgml_parser(Parser, file(Source)), 216 set_sgml_parser(Parser, dialect(xmlns)), 217 set_sgml_parser(Parser, space(sgml)), 218 do_process_rdf(Parser, In, NSList, Close, Cleanup, ProcessOptions). 219process_rdf(File, BaseURI, OnObject) :- 220 process_rdf(File, OnObject, [base_uri(BaseURI)]). 221 222def_entities([], _). 223def_entities([entity(Name, Value)|T], DTD) :- !, 224 def_entity(DTD, Name, Value), 225 def_entities(T, DTD). 226def_entities([_|T0], DTD) :- 227 def_entities(T0, DTD). 228 229def_entity(DTD, Name, Value) :- 230 open_dtd(DTD, [], Stream), 231 xml_quote_attribute(Value, QValue), 232 format(Stream, '<!ENTITY ~w "~w">~n', [Name, QValue]), 233 close(Stream). 234 235 236do_process_rdf(Parser, In, NSList, Close, Cleanup, Options) :- 237 call_cleanup(( sgml_parse(Parser, 238 [ source(In), 239 call(begin, rdf:on_begin), 240 call(xmlns, rdf:on_xmlns) 241 | Options 242 ]), 243 exit_ns_collect(NSList) 244 ), 245 cleanup_process(Close, Cleanup, Parser)). 246 247cleanup_process(In, Cleanup, Parser) :- 248 ( var(In) 249 -> true 250 ; close(In) 251 ), 252 free_sgml_parser(Parser), 253 nb_delete(rdf_options), 254 nb_delete(rdf_object_handler), 255 nb_delete(rdf_state), 256 nb_delete(rdf_nslist), 257 rdf_end_file(Cleanup). 258 259on_begin(NS:'RDF', Attr, _) :- 260 rdf_name_space(NS), !, 261 nb_getval(rdf_options, Options), 262 make_rdf_state(Options, State0, _), 263 rdf_modify_state(Attr, State0, State), 264 nb_setval(rdf_state, State). 265on_begin(Tag, Attr, Parser) :- 266 nb_getval(rdf_state, State), 267 ( State == (-) 268 -> nb_getval(rdf_options, RdfOptions), 269 ( memberchk(embedded(true), RdfOptions) 270 -> true 271 ; print_message(warning, rdf(unexpected(Tag, Parser))) 272 ) 273 ; get_sgml_parser(Parser, line(Start)), 274 get_sgml_parser(Parser, file(File)), 275 sgml_parse(Parser, 276 [ document(Content), 277 parse(content) 278 ]), 279 nb_getval(rdf_object_handler, OnTriples), 280 element_to_plrdf(element(Tag, Attr, Content), Objects, State), 281 rdf_triples(Objects, Triples), 282 call(OnTriples, Triples, File:Start) 283 ). 284 285%% on_xmlns(+NS, +URL, +Parser) 286% 287% Build up the list of encountered xmlns:NS=URL declarations. We 288% use destructive assignment here as an alternative to 289% assert/retract, ensuring thread-safety and better performance. 290 291on_xmlns(NS, URL, _Parser) :- 292 ( nb_getval(rdf_nslist, List), 293 List = list(L0) 294 -> nb_linkarg(1, List, [NS=URL|L0]) 295 ; true 296 ). 297 298init_ns_collect(Options, NSList) :- 299 ( option(namespaces(NSList), Options, -), 300 NSList \== (-) 301 -> nb_setval(rdf_nslist, list([])) 302 ; nb_setval(rdf_nslist, -), 303 NSList = (-) 304 ). 305 306exit_ns_collect(NSList) :- 307 ( NSList == (-) 308 -> true 309 ; nb_getval(rdf_nslist, list(NSList)) 310 ). 311 312 313process_options(Options, Process, RestOptions) :- 314 select_option(content_length(Len), Options, RestOptions), !, 315 Process = [content_length(Len)]. 316process_options(Options, [], Options). 317 318 319 /******************************* 320 * MESSAGES * 321 *******************************/ 322 323:- multifile 324 prolog:message/3. 325 326% Catch messages. sgml/4 is generated by the SGML2PL binding. 327 328prolog:message(rdf(unparsed(Data))) --> 329 { phrase(unparse_xml(Data), XML) 330 }, 331 [ 'RDF: Failed to interpret "~s"'-[XML] ]. 332prolog:message(rdf(shared_blank_nodes(N))) --> 333 [ 'RDF: Shared ~D blank nodes'-[N] ]. 334prolog:message(rdf(not_a_name(Name))) --> 335 [ 'RDF: argument to rdf:ID is not an XML name: ~p'-[Name] ]. 336prolog:message(rdf(redefined_id(Id))) --> 337 [ 'RDF: rdf:ID ~p: multiple definitions'-[Id] ]. 338prolog:message(rdf(unexpected(Tag, Parser))) --> 339 { get_sgml_parser(Parser, file(File)), 340 get_sgml_parser(Parser, line(Line)) 341 }, 342 [ 'RDF: ~w:~d: Unexpected element ~w'-[File, Line, Tag] ]. 343 344 345 /******************************* 346 * XML-TO-TEXT * 347 *******************************/ 348 349unparse_xml([]) --> !, 350 []. 351unparse_xml([H|T]) --> !, 352 unparse_xml(H), 353 unparse_xml(T). 354unparse_xml(Atom) --> 355 { atom(Atom) 356 }, !, 357 atom(Atom). 358unparse_xml(element(Name, Attr, Content)) --> 359 "<", 360 identifier(Name), 361 attributes(Attr), 362 ( { Content == [] 363 } 364 -> "/>" 365 ; ">", 366 unparse_xml(Content) 367 ). 368 369attributes([]) --> 370 []. 371attributes([H|T]) --> 372 attribute(H), 373 attributes(T). 374 375attribute(Name=Value) --> 376 " ", 377 identifier(Name), 378 "=", 379 value(Value). 380 381identifier(NS:Local) --> !, 382 "{", atom(NS), "}", 383 atom(Local). 384identifier(Local) --> 385 atom(Local). 386 387atom(Atom, Text, Rest) :- 388 atom_codes(Atom, Chars), 389 append(Chars, Rest, Text). 390 391value(Value) --> 392 { atom_codes(Value, Chars) 393 }, 394 "\"", 395 quoted(Chars), 396 "\"". 397 398quoted([]) --> 399 []. 400quoted([H|T]) --> 401 quote(H), !, 402 quoted(T). 403 404quote(0'<) --> "<". 405quote(0'>) --> ">". 406quote(0'") --> """. 407quote(0'&) --> "&". 408quote(X) --> [X]. 409 410 411 /******************************* 412 * XREF * 413 *******************************/ 414 415:- multifile prolog:meta_goal/2. 416prolog:meta_goal(process_rdf(_,G,_), [G+2]). 417 418:- retract(system:swi_io). % 419