1/*  Part of SWI-Prolog
2
3    Author:        Jan Wielemaker
4    E-mail:        J.Wielemaker@vu.nl
5    WWW:           http://www.swi-prolog.org
6    Copyright (c)  2010-2018, University of Amsterdam
7                              CWI, Amsterdam
8    All rights reserved.
9
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions
12    are met:
13
14    1. Redistributions of source code must retain the above copyright
15       notice, this list of conditions and the following disclaimer.
16
17    2. Redistributions in binary form must reproduce the above copyright
18       notice, this list of conditions and the following disclaimer in
19       the documentation and/or other materials provided with the
20       distribution.
21
22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33    POSSIBILITY OF SUCH DAMAGE.
34*/
35
36:- module(w3c_ntdata,
37          [ load_rdf_ntriples/2,        % +File, -Triples
38            rdf_ntriple_part/4          % +Field, -Value, <DCG>
39          ]).
40
41
42/** <module> RDF N-triples parser (obsolete)
43
44This module parses n-triple files as defined   by the W3C RDF working in
45http://www.w3.org/TR/rdf-testcases/#ntriples.   This   format     is   a
46simplified version of the RDF N3 notation   used  in the *.nt files that
47are used to describe the normative outcome of the RDF test-cases.
48
49The returned list terms are of the form
50
51        rdf(Subject, Predicate, Object)
52
53where
54
55        * Subject
56        is an atom or node(Id) for anonymous nodes
57        * Predicate
58        is an atom
59        * Object
60        is an atom, node(Id), literal(Atom) or xml(Atom)
61
62@deprecated     This library will shortly be replaced with a stub that
63                calls library(semweb/rdf_ntriples).
64*/
65
66%       load_rdf_ntriples(+Source, -Triples)
67%
68%       Load a file or stream to a list of rdf(S,P,O) triples.
69
70load_rdf_ntriples(File, Triples) :-
71    open_nt_file(File, In, Close),
72    call_cleanup(stream_to_triples(In, Triples), Close).
73
74%       open_nt_file(+Input, -Stream, -Close)
75%
76%       Open Input, returning Stream and a goal to cleanup Stream if it
77%       was opened.
78
79open_nt_file(stream(Stream), Stream, true) :- !.
80open_nt_file(Stream, Stream, true) :-
81    is_stream(Stream),
82    !.
83open_nt_file(Spec, Stream, close(Stream)) :-
84    absolute_file_name(Spec,
85                       [ access(read),
86                         extensions([nt,''])
87                       ], Path),
88    open(Path, read, Stream).
89
90
91%       rdf_ntriple_part(+Type, -Value, <DCG>)
92%
93%       Parse one of the fields of  an   ntriple.  This  is used for the
94%       SWI-Prolog Sesame (www.openrdf.org) implementation   to  realise
95%       /servlets/removeStatements. I do not think   public  use of this
96%       predicate should be stimulated.
97
98rdf_ntriple_part(subject, Subject) -->
99    subject(Subject).
100rdf_ntriple_part(predicate, Predicate) -->
101    predicate(Predicate).
102rdf_ntriple_part(object, Object) -->
103    predicate(Object).
104
105
106%       stream_to_triples(+Stream, -ListOfTriples)
107%
108%       Read Stream, returning all its triples
109
110stream_to_triples(In, Triples) :-
111    read_line_to_codes(In, Line),
112    (   Line == end_of_file
113    ->  Triples = []
114    ;   phrase(line(Triples, Tail), Line),
115        stream_to_triples(In, Tail)
116    ).
117
118line(Triples, Tail) -->
119    wss,
120    (   comment
121    ->  {Triples = Tail}
122    ;   triple(Triple)
123    ->  {Triples = [Triple|Tail]}
124    ).
125
126comment -->
127    "#",
128    !,
129    skip_rest.
130comment -->
131    end_of_input.
132
133triple(rdf(Subject, Predicate, Object)) -->
134    subject(Subject), ws, wss,
135    predicate(Predicate), ws, wss,
136    object(Object), wss, ".", wss.
137
138subject(Subject) -->
139    uniref(Subject),
140    !.
141subject(Subject) -->
142    node_id(Subject).
143
144predicate(Predicate) -->
145    uniref(Predicate).
146
147object(Object) -->
148    uniref(Object),
149    !.
150object(Object) -->
151    node_id(Object).
152object(Object) -->
153    literal(Object).
154
155
156uniref(URI) -->
157    "<",
158    escaped_uri_codes(Codes),
159    ">",
160    !,
161    { atom_codes(URI, Codes)
162    }.
163
164node_id(node(Id)) -->                   % anonymous nodes
165    "_:",
166    name_start(C0),
167    name_codes(Codes),
168    { atom_codes(Id, [C0|Codes])
169    }.
170
171literal(Literal) -->
172    lang_string(Literal),
173    !.
174literal(Literal) -->
175    xml_string(Literal).
176
177
178%       name_start(-Code)
179%       name_codes(-ListfCodes)
180%
181%       Parse identifier names
182
183name_start(C) -->
184    [C],
185    { code_type(C, alpha)
186    }.
187
188name_codes([C|T]) -->
189    [C],
190    { code_type(C, alnum)
191    },
192    !,
193    name_codes(T).
194name_codes([]) -->
195    [].
196
197
198%       escaped_uri_codes(-CodeList)
199%
200%       Decode string holding %xx escaped characters.
201
202escaped_uri_codes([]) -->
203    [].
204escaped_uri_codes([C|T]) -->
205    "%", [D0,D1],
206    { code_type(D0, xdigit(V0)),
207      code_type(D1, xdigit(V1)),
208      !,
209      C is V0<<4 + V1
210    },
211    escaped_uri_codes(T).
212escaped_uri_codes([C|T]) -->
213    "\\u", [D0,D1,D2,D3],
214    !,
215    { code_type(D0, xdigit(V0)),
216      code_type(D1, xdigit(V1)),
217      code_type(D2, xdigit(V2)),
218      code_type(D3, xdigit(V3)),
219      C is V0<<12 + V1<<8 + V2<<4 + V3
220    },
221    escaped_uri_codes(T).
222escaped_uri_codes([C|T]) -->
223    "\\U", [D0,D1,D2,D3,D4,D5,D6,D7],
224    !,
225    { code_type(D0, xdigit(V0)),
226      code_type(D1, xdigit(V1)),
227      code_type(D2, xdigit(V2)),
228      code_type(D3, xdigit(V3)),
229      code_type(D4, xdigit(V4)),
230      code_type(D5, xdigit(V5)),
231      code_type(D6, xdigit(V6)),
232      code_type(D7, xdigit(V7)),
233      C is V0<<28 + V1<<24 + V2<<20 + V3<<16 +
234           V4<<12 + V5<<8 + V6<<4 + V7
235    },
236    escaped_uri_codes(T).
237escaped_uri_codes([C|T]) -->
238    [C],
239    escaped_uri_codes(T).
240
241
242%       lang_string()
243%
244%       Process a language string
245
246lang_string(String) -->
247    "\"",
248    string(Codes),
249    "\"",
250    !,
251    { atom_codes(Atom, Codes)
252    },
253    (   langsep
254    ->  language(Lang),
255        { String = literal(lang(Lang, Atom))
256        }
257    ;   "^^"
258    ->  uniref(Type),
259        { String = literal(type(Type, Atom))
260        }
261    ;   { String = literal(Atom)
262        }
263    ).
264
265langsep -->
266    "-".
267langsep -->
268    "@".
269
270%       xml_string(String)
271%
272%       Handle xml"..."
273
274xml_string(xml(String)) -->
275    "xml\"",                        % really no whitespace?
276    string(Codes),
277    "\"",
278    { atom_codes(String, Codes)
279    }.
280
281string([]) -->
282    [].
283string([C0|T]) -->
284    string_char(C0),
285    string(T).
286
287string_char(0'\\) -->
288    "\\\\".
289string_char(0'") -->
290    "\\\"".
291string_char(10) -->
292    "\\n".
293string_char(13) -->
294    "\\r".
295string_char(9) -->
296    "\\t".
297string_char(C) -->
298    "\\u",
299    '4xdigits'(C).
300string_char(C) -->
301    "\\U",
302    '4xdigits'(C0),
303    '4xdigits'(C1),
304    { C is C0<<16 + C1
305    }.
306string_char(C) -->
307    [C].
308
309'4xdigits'(C) -->
310    [C0,C1,C2,C3],
311    { code_type(C0, xdigit(V0)),
312      code_type(C1, xdigit(V1)),
313      code_type(C2, xdigit(V2)),
314      code_type(C3, xdigit(V3)),
315
316      C is V0<<12 + V1<<8 + V2<<4 + V3
317    }.
318
319%       language(-Lang)
320%
321%       Return xml:lang language identifier.
322
323language(Lang) -->
324    lang_code(C0),
325    lang_codes(Codes),
326    { atom_codes(Lang, [C0|Codes])
327    }.
328
329lang_code(C) -->
330    [C],
331    { C \== 0'.,
332      \+ code_type(C, white)
333    }.
334
335lang_codes([C|T]) -->
336    lang_code(C),
337    !,
338    lang_codes(T).
339lang_codes([]) -->
340    [].
341
342
343                 /*******************************
344                 *             BASICS           *
345                 *******************************/
346
347skip_rest(_,[]).
348
349ws -->
350    [C],
351    { code_type(C, white)
352    }.
353
354end_of_input([], []).
355
356
357wss -->
358    ws,
359    !,
360    wss.
361wss -->
362    [].
363