1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2010-2018, University of Amsterdam 7 CWI, Amsterdam 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(w3c_ntdata, 37 [ load_rdf_ntriples/2, % +File, -Triples 38 rdf_ntriple_part/4 % +Field, -Value, <DCG> 39 ]). 40 41 42/** <module> RDF N-triples parser (obsolete) 43 44This module parses n-triple files as defined by the W3C RDF working in 45http://www.w3.org/TR/rdf-testcases/#ntriples. This format is a 46simplified version of the RDF N3 notation used in the *.nt files that 47are used to describe the normative outcome of the RDF test-cases. 48 49The returned list terms are of the form 50 51 rdf(Subject, Predicate, Object) 52 53where 54 55 * Subject 56 is an atom or node(Id) for anonymous nodes 57 * Predicate 58 is an atom 59 * Object 60 is an atom, node(Id), literal(Atom) or xml(Atom) 61 62@deprecated This library will shortly be replaced with a stub that 63 calls library(semweb/rdf_ntriples). 64*/ 65 66% load_rdf_ntriples(+Source, -Triples) 67% 68% Load a file or stream to a list of rdf(S,P,O) triples. 69 70load_rdf_ntriples(File, Triples) :- 71 open_nt_file(File, In, Close), 72 call_cleanup(stream_to_triples(In, Triples), Close). 73 74% open_nt_file(+Input, -Stream, -Close) 75% 76% Open Input, returning Stream and a goal to cleanup Stream if it 77% was opened. 78 79open_nt_file(stream(Stream), Stream, true) :- !. 80open_nt_file(Stream, Stream, true) :- 81 is_stream(Stream), 82 !. 83open_nt_file(Spec, Stream, close(Stream)) :- 84 absolute_file_name(Spec, 85 [ access(read), 86 extensions([nt,'']) 87 ], Path), 88 open(Path, read, Stream). 89 90 91% rdf_ntriple_part(+Type, -Value, <DCG>) 92% 93% Parse one of the fields of an ntriple. This is used for the 94% SWI-Prolog Sesame (www.openrdf.org) implementation to realise 95% /servlets/removeStatements. I do not think public use of this 96% predicate should be stimulated. 97 98rdf_ntriple_part(subject, Subject) --> 99 subject(Subject). 100rdf_ntriple_part(predicate, Predicate) --> 101 predicate(Predicate). 102rdf_ntriple_part(object, Object) --> 103 predicate(Object). 104 105 106% stream_to_triples(+Stream, -ListOfTriples) 107% 108% Read Stream, returning all its triples 109 110stream_to_triples(In, Triples) :- 111 read_line_to_codes(In, Line), 112 ( Line == end_of_file 113 -> Triples = [] 114 ; phrase(line(Triples, Tail), Line), 115 stream_to_triples(In, Tail) 116 ). 117 118line(Triples, Tail) --> 119 wss, 120 ( comment 121 -> {Triples = Tail} 122 ; triple(Triple) 123 -> {Triples = [Triple|Tail]} 124 ). 125 126comment --> 127 "#", 128 !, 129 skip_rest. 130comment --> 131 end_of_input. 132 133triple(rdf(Subject, Predicate, Object)) --> 134 subject(Subject), ws, wss, 135 predicate(Predicate), ws, wss, 136 object(Object), wss, ".", wss. 137 138subject(Subject) --> 139 uniref(Subject), 140 !. 141subject(Subject) --> 142 node_id(Subject). 143 144predicate(Predicate) --> 145 uniref(Predicate). 146 147object(Object) --> 148 uniref(Object), 149 !. 150object(Object) --> 151 node_id(Object). 152object(Object) --> 153 literal(Object). 154 155 156uniref(URI) --> 157 "<", 158 escaped_uri_codes(Codes), 159 ">", 160 !, 161 { atom_codes(URI, Codes) 162 }. 163 164node_id(node(Id)) --> % anonymous nodes 165 "_:", 166 name_start(C0), 167 name_codes(Codes), 168 { atom_codes(Id, [C0|Codes]) 169 }. 170 171literal(Literal) --> 172 lang_string(Literal), 173 !. 174literal(Literal) --> 175 xml_string(Literal). 176 177 178% name_start(-Code) 179% name_codes(-ListfCodes) 180% 181% Parse identifier names 182 183name_start(C) --> 184 [C], 185 { code_type(C, alpha) 186 }. 187 188name_codes([C|T]) --> 189 [C], 190 { code_type(C, alnum) 191 }, 192 !, 193 name_codes(T). 194name_codes([]) --> 195 []. 196 197 198% escaped_uri_codes(-CodeList) 199% 200% Decode string holding %xx escaped characters. 201 202escaped_uri_codes([]) --> 203 []. 204escaped_uri_codes([C|T]) --> 205 "%", [D0,D1], 206 { code_type(D0, xdigit(V0)), 207 code_type(D1, xdigit(V1)), 208 !, 209 C is V0<<4 + V1 210 }, 211 escaped_uri_codes(T). 212escaped_uri_codes([C|T]) --> 213 "\\u", [D0,D1,D2,D3], 214 !, 215 { code_type(D0, xdigit(V0)), 216 code_type(D1, xdigit(V1)), 217 code_type(D2, xdigit(V2)), 218 code_type(D3, xdigit(V3)), 219 C is V0<<12 + V1<<8 + V2<<4 + V3 220 }, 221 escaped_uri_codes(T). 222escaped_uri_codes([C|T]) --> 223 "\\U", [D0,D1,D2,D3,D4,D5,D6,D7], 224 !, 225 { code_type(D0, xdigit(V0)), 226 code_type(D1, xdigit(V1)), 227 code_type(D2, xdigit(V2)), 228 code_type(D3, xdigit(V3)), 229 code_type(D4, xdigit(V4)), 230 code_type(D5, xdigit(V5)), 231 code_type(D6, xdigit(V6)), 232 code_type(D7, xdigit(V7)), 233 C is V0<<28 + V1<<24 + V2<<20 + V3<<16 + 234 V4<<12 + V5<<8 + V6<<4 + V7 235 }, 236 escaped_uri_codes(T). 237escaped_uri_codes([C|T]) --> 238 [C], 239 escaped_uri_codes(T). 240 241 242% lang_string() 243% 244% Process a language string 245 246lang_string(String) --> 247 "\"", 248 string(Codes), 249 "\"", 250 !, 251 { atom_codes(Atom, Codes) 252 }, 253 ( langsep 254 -> language(Lang), 255 { String = literal(lang(Lang, Atom)) 256 } 257 ; "^^" 258 -> uniref(Type), 259 { String = literal(type(Type, Atom)) 260 } 261 ; { String = literal(Atom) 262 } 263 ). 264 265langsep --> 266 "-". 267langsep --> 268 "@". 269 270% xml_string(String) 271% 272% Handle xml"..." 273 274xml_string(xml(String)) --> 275 "xml\"", % really no whitespace? 276 string(Codes), 277 "\"", 278 { atom_codes(String, Codes) 279 }. 280 281string([]) --> 282 []. 283string([C0|T]) --> 284 string_char(C0), 285 string(T). 286 287string_char(0'\\) --> 288 "\\\\". 289string_char(0'") --> 290 "\\\"". 291string_char(10) --> 292 "\\n". 293string_char(13) --> 294 "\\r". 295string_char(9) --> 296 "\\t". 297string_char(C) --> 298 "\\u", 299 '4xdigits'(C). 300string_char(C) --> 301 "\\U", 302 '4xdigits'(C0), 303 '4xdigits'(C1), 304 { C is C0<<16 + C1 305 }. 306string_char(C) --> 307 [C]. 308 309'4xdigits'(C) --> 310 [C0,C1,C2,C3], 311 { code_type(C0, xdigit(V0)), 312 code_type(C1, xdigit(V1)), 313 code_type(C2, xdigit(V2)), 314 code_type(C3, xdigit(V3)), 315 316 C is V0<<12 + V1<<8 + V2<<4 + V3 317 }. 318 319% language(-Lang) 320% 321% Return xml:lang language identifier. 322 323language(Lang) --> 324 lang_code(C0), 325 lang_codes(Codes), 326 { atom_codes(Lang, [C0|Codes]) 327 }. 328 329lang_code(C) --> 330 [C], 331 { C \== 0'., 332 \+ code_type(C, white) 333 }. 334 335lang_codes([C|T]) --> 336 lang_code(C), 337 !, 338 lang_codes(T). 339lang_codes([]) --> 340 []. 341 342 343 /******************************* 344 * BASICS * 345 *******************************/ 346 347skip_rest(_,[]). 348 349ws --> 350 [C], 351 { code_type(C, white) 352 }. 353 354end_of_input([], []). 355 356 357wss --> 358 ws, 359 !, 360 wss. 361wss --> 362 []. 363