1/*  Part of SWI-Prolog
2
3    Author:        Jan Wielemaker
4    E-mail:        J.Wielemaker@vu.nl
5    WWW:           http://www.swi-prolog.org
6    Copyright (c)  2010-2013, University of Amsterdam
7    All rights reserved.
8
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions
11    are met:
12
13    1. Redistributions of source code must retain the above copyright
14       notice, this list of conditions and the following disclaimer.
15
16    2. Redistributions in binary form must reproduce the above copyright
17       notice, this list of conditions and the following disclaimer in
18       the documentation and/or other materials provided with the
19       distribution.
20
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32    POSSIBILITY OF SUCH DAMAGE.
33*/
34
35:- module(rdf_parser,
36          [ xml_to_plrdf/3,             % +XMLTerm, -RDFTerm, +State
37            element_to_plrdf/3,         % +ContentList, -RDFTerm, +State
38            make_rdf_state/3,           % +Options, -State, -RestOptions
39            rdf_modify_state/3,         % +XMLAttrs, +State0, -State
40            rdf_name_space/1
41          ]).
42:- use_module(library(record),[(record)/1, op(_,_,record)]).
43:- use_module(library(rewrite_term),
44              [ rew_term_expansion/2,rew_goal_expansion/2,
45                op(_,_,::=)
46              ]).
47:- autoload(library(lists),[select/3,append/3,member/2]).
48:- autoload(library(occurs),[sub_term/2]).
49:- autoload(library(sgml),[xml_name/1]).
50:- autoload(library(uri),[iri_normalized/3]).
51
52
53% xml_name/1
54:- op(500, fx, \?).                     % Optional (attrs)
55
56term_expansion(F, T) :- rew_term_expansion(F, T).
57goal_expansion(F, T) :- rew_goal_expansion(F, T).
58
59goal_expansion(attrs(Attrs, List), Goal) :-
60    translate_attrs(List, Attrs, Goal).
61
62translate_attrs(Var, Attrs, rewrite_term(Var, Attrs)) :-
63    var(Var),
64    !.
65translate_attrs([], _, true) :- !.
66translate_attrs([H], Attrs, Goal) :-
67    !,
68    (   var(H)
69    ->  Goal = rewrite_term(H, Attrs)
70    ;   H = \?Optional
71    ->  Goal = (   member(A, Attrs),
72                   OptRewrite
73               ->  true
74               ;   true
75               ),
76        expand_goal(rewrite_term(\Optional, A), OptRewrite)
77    ;   Goal = (   member(A, Attrs),
78                   Rewrite
79               ->  true
80               ),
81        expand_goal(rewrite_term(H, A), Rewrite)
82    ).
83translate_attrs([H|T], Attrs0, (G0, G1)) :-
84    !,
85    (   var(H)
86    ->  G0 = rewrite_term(H, Attrs0),
87        Attrs1 = Attrs0
88    ;   H = \?Optional
89    ->  G0 = (   select(A, Attrs0, Attrs1),
90                 OptRewrite
91             ->  true
92             ;   Attrs1 = Attrs0
93             ),
94        expand_goal(rewrite_term(\Optional, A), OptRewrite)
95    ;   G0 = (   select(A, Attrs0, Attrs1),
96                 Rewrite
97             ),
98        expand_goal(rewrite_term(H, A), Rewrite)
99    ),
100    translate_attrs(T, Attrs1, G1).
101translate_attrs(Rule, Attrs, Goal) :-
102    expand_goal(rewrite_term(Rule, Attrs), Goal).
103
104
105:- multifile rdf_name_space/1.
106:- dynamic   rdf_name_space/1.
107
108%!  rdf_name_space(?URL) is nondet.
109%
110%   True if URL must be handled  as rdf: Determines special handling
111%   of rdf:about, rdf:resource, etc.
112
113
114rdf_name_space('http://www.w3.org/1999/02/22-rdf-syntax-ns#').
115rdf_name_space('http://www.w3.org/TR/REC-rdf-syntax').
116
117
118:- record
119    rdf_state(base_uri='',
120              lang='',
121              ignore_lang=false,
122              convert_typed_literal).
123
124
125%!  xml_to_plrdf(+RDFElementOrObject, -RDFTerm, +State)
126%
127%   Translate an XML (using namespaces)  term   into  an Prolog term
128%   representing the RDF data.  This  term   can  then  be  fed into
129%   rdf_triples/[2,3] to create a list of   RDF triples. State is an
130%   instance of an rdf_state record.
131
132xml_to_plrdf(Element, RDF, State) :-
133    (   is_list(Element)
134    ->  rewrite_term(\xml_content_objects(RDF, State), Element)
135    ;   rewrite_term(\xml_objects(RDF, State), Element)
136    ).
137
138%!  element_to_plrdf(+DOM, -RDFTerm, +State)
139%
140%   Rewrite a single XML element.
141
142element_to_plrdf(Element, RDF, State) :-
143    rewrite_term(\nodeElementList(RDF, State), [Element]).
144
145xml_objects(Objects, Options0) ::=
146        E0,
147        { modify_state(E0, Options0, E, Options), !,
148          rewrite_term(\xml_objects(Objects, Options), E)
149        }.
150xml_objects(Objects, Options) ::=
151        element((\rdf('RDF'), !),
152                _,
153                \nodeElementList(Objects, Options)),
154        !.
155xml_objects(Objects, Options) ::=
156        element(_, _, \xml_content_objects(Objects, Options)).
157
158xml_content_objects([], _) ::=
159        [].
160xml_content_objects([H|T], Options) ::=
161        [ \xml_objects(H, Options)
162        | \xml_content_objects(T, Options)
163        ].
164
165
166nodeElementList([], _Options) ::=
167        [], !.
168nodeElementList(L, Options) ::=
169        [ (\ws, !)
170        | \nodeElementList(L, Options)
171        ].
172nodeElementList([H|T], Options) ::=
173        [ \nodeElementOrError(H, Options)
174        | \nodeElementList(T, Options)
175        ].
176
177nodeElementOrError(H, Options) ::=
178        \nodeElement(H, Options), !.
179nodeElementOrError(unparsed(Data), _Options) ::=
180        Data.
181
182nodeElement(description(Type, About, Properties), Options) ::=
183        \description(Type, About, Properties, Options).
184
185
186                 /*******************************
187                 *          DESCRIPTION         *
188                 *******************************/
189
190description(Type, About, Properties, Options0) ::=
191        E0,
192        { modify_state(E0, Options0, E, Options), !,
193          rewrite_term(\description(Type, About, Properties, Options), E)
194        }.
195description(description, About, Properties, Options) ::=
196        element(\rdf('Description'),
197                \attrs([ \?idAboutAttr(About, Options)
198                       | \propAttrs(PropAttrs, Options)
199                       ]),
200                \propertyElts(PropElts, Options)),
201        { !, append(PropAttrs, PropElts, Properties)
202        }.
203description(Type, About, Properties, Options) ::=
204        element(\name_uri(Type, Options),
205                \attrs([ \?idAboutAttr(About, Options)
206                       | \propAttrs(PropAttrs, Options)
207                       ]),
208                \propertyElts(PropElts, Options)),
209        { append(PropAttrs, PropElts, Properties)
210        }.
211
212propAttrs([], _) ::=
213        [], !.
214propAttrs([H|T], Options) ::=
215        [ \propAttr(H, Options)
216        | \propAttrs(T, Options)
217        ].
218
219propAttr(rdf:type = URI, Options) ::=
220        \rdf_or_unqualified(type) = \value_uri(URI, Options), !.
221propAttr(Name = Literal, Options) ::=
222        Name = Value,
223        { mkliteral(Value, Literal, Options)
224        }.
225
226propertyElts([], _) ::=
227        [], !.
228propertyElts(Elts, Options) ::=
229        [ (\ws, !)
230        | \propertyElts(Elts, Options)
231        ].
232propertyElts([H|T], Options) ::=
233        [ \propertyElt(H, Options)
234        | \propertyElts(T, Options)
235        ].
236
237propertyElt(E, Options) ::=
238        \propertyElt(Id, Name, Value, Options),
239        { mkprop(Name, Value, Prop),
240          (   var(Id)
241          ->  E = Prop
242          ;   E = id(Id, Prop)
243          )
244        }.
245
246mkprop(NS:Local, Value, rdf:Local = Value) :-
247    rdf_name_space(NS),
248    !.
249mkprop(Name, Value, Name = Value).
250
251
252propertyElt(Id, Name, Value, Options0) ::=
253        E0,
254        { modify_state(E0, Options0, E, Options), !,
255          rewrite_term(\propertyElt(Id, Name, Value, Options), E)
256        }.
257propertyElt(Id, Name, Value, Options) ::=
258        \literalPropertyElt(Id, Name, Value, Options), !.
259propertyElt(_, Name, Literal, Options) ::=
260        element(Name,
261                \attrs([ \parseLiteral
262                       ]),
263                Content),
264        { !,
265          literal_value(Content, Literal, Options)
266        }.
267propertyElt(Id, Name, collection(Elements), Options) ::=
268        element(Name,
269                \attrs([ \parseCollection,
270                         \?idAttr(Id, Options)
271                       ]),
272                \nodeElementList(Elements, Options)).
273                                        % 5.14 emptyPropertyElt
274propertyElt(Id, Name, Value, Options) ::=
275        element(Name, A, \all_ws),
276        { !,
277          rewrite_term(\emptyPropertyElt(Id, Value, Options), A)
278        }.
279
280propertyElt(_, Name, description(description, Id, Properties), Options) ::=
281        element(Name,
282                \attrs([ \parseResource,
283                         \?idAboutAttr(Id, Options)
284                       ]),
285                \propertyElts(Properties, Options)),
286        !.
287propertyElt(Id, Name, Literal, Options) ::=
288        element(Name,
289                \attrs([ \?idAttr(Id, Options)
290                       ]),
291                [ Value ]),
292        { atom(Value), !,
293          mkliteral(Value, Literal, Options)
294        }.
295propertyElt(Id, Name, Value, Options) ::=
296        element(Name,
297                \attrs([ \?idAttr(Id, Options)
298                       ]),
299                \an_rdf_object(Value, Options)), !.
300propertyElt(Id, Name, unparsed(Value), Options) ::=
301        element(Name,
302                \attrs([ \?idAttr(Id, Options)
303                       ]),
304                Value).
305
306literalPropertyElt(Id, Name, Literal, Options) ::=
307        element(Name,
308                \attrs([ \typeAttr(Type, Options),
309                         \?idAttr(Id, Options)
310                       ]),
311                Content),
312        { typed_literal(Type, Content, Literal, Options)
313        }.
314
315emptyPropertyElt(Id, Literal, Options) ::=
316        \attrs([ \?idAttr(Id, Options),
317                 \?parseLiteral
318               | \noMoreAttrs
319               ]),
320        { !,
321          mkliteral('', Literal, Options)
322        }.
323emptyPropertyElt(Id,
324                 description(description, About, Properties),
325                 Options) ::=
326        \attrs([ \?idAttr(Id, Options),
327                 \?aboutResourceEmptyElt(About, Options),
328                 \?parseResource
329               | \propAttrs(Properties, Options)
330               ]), !.
331
332aboutResourceEmptyElt(about(URI), Options) ::=
333        \resourceAttr(URI, Options), !.
334aboutResourceEmptyElt(node(URI), _Options) ::=
335        \nodeIDAttr(URI).
336
337%!  literal_value(+In, -Value, +Options)
338%
339%   Create the literal value for rdf:parseType="Literal" attributes.
340%   The content is the Prolog XML DOM tree for the literal.
341%
342%   @tbd    Note that the specs demand a canonical textual representation
343%           of the XML data as a Unicode string.  For now the user can
344%           achieve this using the convert_typed_literal hook.
345
346literal_value(Value, literal(type(rdf:'XMLLiteral', Value)), _).
347
348%!  mkliteral(+Atom, -Object, +Options)
349%
350%   Translate attribute value Atom into an RDF object using the
351%   lang(Lang) option from Options.
352
353mkliteral(Text, literal(Val), Options) :-
354    atom(Text),
355    (   rdf_state_lang(Options, Lang),
356        Lang \== ''
357    ->  Val = lang(Lang, Text)
358    ;   Val = Text
359    ).
360
361%!  typed_literal(+Type, +Content, -Literal, +Options)
362%
363%   Handle a literal attribute with rdf:datatype=Type qualifier. NB:
364%   possibly  it  is  faster  to  use  a  global  variable  for  the
365%   conversion hook.
366
367typed_literal(Type, Content, literal(Object), Options) :-
368    rdf_state_convert_typed_literal(Options, Convert),
369    nonvar(Convert),
370    !,
371    (   catch(call(Convert, Type, Content, Object), E, true)
372    ->  (   var(E)
373        ->  true
374        ;   Object = E
375        )
376    ;   Object = error(cannot_convert(Type, Content), _)
377    ).
378typed_literal(Type, [], literal(type(Type, '')), _Options) :- !.
379typed_literal(Type, [Text], literal(type(Type, Text)), _Options) :- !.
380typed_literal(Type, Content, literal(type(Type, Content)), _Options).
381
382
383idAboutAttr(id(Id), Options) ::=
384        \idAttr(Id, Options), !.
385idAboutAttr(about(About), Options) ::=
386        \aboutAttr(About, Options), !.
387idAboutAttr(node(About), _Options) ::=
388        \nodeIDAttr(About), !.
389
390%!  an_rdf_object(-Object, +OptionsURI)
391%
392%   Deals with an object, but there may be spaces around.  I'm still
393%   not sure where to deal with these.  Best is to ask the XML parser
394%   to get rid of them, So most likely this code will change if this
395%   happens.
396
397an_rdf_object(Object, Options) ::=
398        [ \nodeElement(Object, Options)
399        ], !.
400an_rdf_object(Object, Options) ::=
401        [ (\ws, !)
402        | \an_rdf_object(Object, Options)
403        ].
404an_rdf_object(Object, Options) ::=
405        [ \nodeElement(Object, Options),
406          \ws
407        ], !.
408
409ws ::=
410        A,
411        { atom(A),
412          atom_chars(A, Chars),
413          all_blank(Chars), !
414        }.
415ws ::=
416        pi(_).
417
418all_ws ::=
419        [], !.
420all_ws ::=
421        [\ws | \all_ws].
422
423all_blank([]).
424all_blank([H|T]) :-
425    char_type(H, space),            % SWI-Prolog specific
426    all_blank(T).
427
428
429                 /*******************************
430                 *         RDF ATTRIBUTES       *
431                 *******************************/
432
433idAttr(Id, Options) ::=
434        \rdf_or_unqualified('ID') = \uniqueid(Id, Options).
435
436aboutAttr(About, Options) ::=
437        \rdf_or_unqualified(about) = \value_uri(About, Options).
438
439nodeIDAttr(About) ::=
440        \rdf_or_unqualified(nodeID) = About.
441
442resourceAttr(URI, Options) ::=
443        \rdf_or_unqualified(resource) = \value_uri(URI, Options).
444
445typeAttr(Type, Options) ::=
446        \rdf_or_unqualified(datatype) = \value_uri(Type, Options).
447
448name_uri(URI, Options) ::=
449        NS:Local,
450        {   !, atom_concat(NS, Local, A),
451            rewrite_term(\value_uri(URI, Options), A)
452        }.
453name_uri(URI, Options) ::=
454        \value_uri(URI, Options).
455
456value_uri(URI, Options) ::=
457        A,
458        {   rdf_state_base_uri(Options, Base),
459            (   Base \== []
460            ->  iri_normalized(A, Base, URI)
461            ;   URI = A
462            )
463        }.
464
465uniqueid(Id, Options) ::=
466        A,
467        {   unique_xml_name(A, HashID),
468            make_globalid(HashID, Options, Id)
469        }.
470
471unique_xml_name(Name, HashID) :-
472    atom_concat(#, Name, HashID),
473    (   xml_name(Name)
474    ->  true
475    ;   print_message(warning, rdf(not_a_name(Name)))
476    ).
477
478make_globalid(In, Options, Id) :-
479    rdf_state_base_uri(Options, Base),
480    iri_normalized(In, Base, Id).
481
482parseLiteral    ::= \rdf_or_unqualified(parseType) = 'Literal'.
483parseResource   ::= \rdf_or_unqualified(parseType) = 'Resource'.
484parseCollection ::= \rdf_or_unqualified(parseType) = 'Collection'.
485
486
487                 /*******************************
488                 *           PRIMITIVES         *
489                 *******************************/
490
491rdf(Tag) ::=
492        NS:Tag,
493        { rdf_name_space(NS), !
494        }.
495
496rdf_or_unqualified(Tag) ::=
497        Tag.
498rdf_or_unqualified(Tag) ::=
499        NS:Tag,
500        { rdf_name_space(NS), !
501        }.
502
503
504                 /*******************************
505                 *             BASICS           *
506                 *******************************/
507
508/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
509This code is translated by the  goal_expansion/2   rule  at the start of
510this file. We leave the original code for reference.
511
512attrs(Bag) ::=
513        L0,
514        { do_attrs(Bag, L0)
515        }.
516
517do_attrs([], _) :- !.
518do_attrs([\?H|T], L0) :- !,             % optional
519        (   select(X, L0, L),
520            rewrite_term(\H, X)
521        ->  true
522        ;   L = L0
523        ),
524        do_attrs(T, L).
525do_attrs([H|T], L0) :-
526        select(X, L0, L),
527        rewrite_term(H, X), !,
528        do_attrs(T, L).
529do_attrs(C, L) :-
530        rewrite_term(C, L).
531- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
532
533%       \noMoreAttrs
534%
535%       Check attribute-list is empty.  Reserved xml: attributes are
536%       excluded from this test.
537
538noMoreAttrs ::=
539        [], !.
540noMoreAttrs ::=
541        [ xml:_=_
542        | \noMoreAttrs
543        ].
544
545%!  modify_state(+Element0, +Options0, -Element, -Options) is semidet.
546%
547%   If Element0 contains xml:base = Base, strip it from the
548%   attributes list and update base_uri(_) in the Options
549%
550%   It Element0 contains xml:lang = Lang, strip it from the
551%   attributes list and update lang(_) in the Options
552%
553%   Remove all xmlns=_, xmlns:_=_ and xml:_=_.  Only succeed
554%   if something changed.
555
556modify_state(element(Name, Attrs0, Content), Options0,
557             element(Name, Attrs,  Content), Options) :-
558    modify_a_state(Attrs0, Options0, Attrs, Options),
559    Attrs0 \== Attrs.
560
561rdf_modify_state(Attributes, State0, State) :-
562    modify_a_state(Attributes, State0, _, State).
563
564
565modify_a_state([], Options, [], Options).
566modify_a_state([Name=Value|T0], Options0, T, Options) :-
567    modify_a(Name, Value, Options0, Options1),
568    !,
569    modify_a_state(T0, Options1, T, Options).
570modify_a_state([H|T0], Options0, [H|T], Options) :-
571    modify_a_state(T0, Options0, T, Options).
572
573
574modify_a(xml:base, Base1, Options0, Options) :-
575    !,
576    rdf_state_base_uri(Options0, Base0),
577    remove_fragment(Base1, Base2),
578    iri_normalized(Base2, Base0, Base),
579    set_base_uri_of_rdf_state(Base, Options0, Options).
580modify_a(xml:lang, Lang, Options0, Options) :-
581    !,
582    rdf_state_ignore_lang(Options0, false),
583    !,
584    set_lang_of_rdf_state(Lang, Options0, Options).
585modify_a(xmlns, _, Options, Options).
586modify_a(xmlns:_, _, Options, Options).
587modify_a(xml:_, _, Options, Options).
588
589
590%!  remove_fragment(+URI, -WithoutFragment)
591%
592%   When handling xml:base, we must delete the possible fragment.
593
594remove_fragment(URI, Plain) :-
595    sub_atom(URI, B, _, _, #),
596    !,
597    sub_atom(URI, 0, B, _, Plain).
598remove_fragment(URI, URI).
599
600
601                 /*******************************
602                 *     HELP PCE-EMACS A BIT     *
603                 *******************************/
604
605:- multifile
606    emacs_prolog_colours:term_colours/2,
607    emacs_prolog_colours:goal_classification/2.
608
609expand(c(X), _, X) :- !.
610expand(In,   Pattern, Colours) :-
611    compound(In),
612    !,
613    In =.. [F|Args],
614    expand_list(Args, PatternArgs, ColourArgs),
615    Pattern =.. [F|PatternArgs],
616    Colours = functor(F) - ColourArgs.
617expand(X, X, classify).
618
619expand_list([], [], []).
620expand_list([H|T], [PH|PT], [CH|CT]) :-
621    expand(H, PH, CH),
622    expand_list(T, PT, CT).
623
624:- discontiguous
625    term_expansion/2.
626
627term_expansion(term_colours(C),
628               emacs_prolog_colours:term_colours(Pattern, Colours)) :-
629    expand(C, Pattern, Colours).
630
631term_colours((c(head(+(1))) ::= c(match), {c(body)})).
632term_colours((c(head(+(1))) ::= c(match))).
633
634emacs_prolog_colours:goal_classification(\_, expanded).
635
636:- dynamic
637    prolog:meta_goal/2.
638:- multifile
639    prolog:meta_goal/2,
640    prolog:called_by/2.
641
642prolog:meta_goal(rewrite_term(A, _), [A]).
643prolog:meta_goal(\A,            [A+1]).
644
645prolog:called_by(attrs(Attrs, _Term), Called) :-
646    findall(G+1, sub_term(\?G, Attrs), Called, Tail),
647    findall(G+1, sub_term(\G, Attrs), Tail).
648
649
650