1 /* Part of SWI-Prolog
2
3 Author: Jan Wielemaker
4 E-mail: J.Wielemaker@vu.nl
5 WWW: http://www.swi-prolog.org
6 Copyright (c) 2013-2015, VU University Amsterdam
7 All rights reserved.
8
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions
11 are met:
12
13 1. Redistributions of source code must retain the above copyright
14 notice, this list of conditions and the following disclaimer.
15
16 2. Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in
18 the documentation and/or other materials provided with the
19 distribution.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 #include <config.h>
36 #include <SWI-Stream.h>
37 #include <SWI-Prolog.h>
38 #include <string.h>
39 #include <assert.h>
40 #include "turtle_chars.c"
41
42 static atom_t ATOM_end_of_file;
43
44 static functor_t FUNCTOR_node1;
45 static functor_t FUNCTOR_literal1;
46 static functor_t FUNCTOR_type2;
47 static functor_t FUNCTOR_lang2;
48 static functor_t FUNCTOR_triple3;
49 static functor_t FUNCTOR_quad4;
50 static functor_t FUNCTOR_error2;
51 static functor_t FUNCTOR_syntax_error1;
52 static functor_t FUNCTOR_stream4;
53
54
55 /*******************************
56 * CHARACTER CLASSES *
57 *******************************/
58
59 #define WS 0x01 /* white space */
60 #define EL 0x02 /* end-of-line */
61 #define DI 0x04 /* digit */
62 #define LC 0x08 /* digit */
63 #define UC 0x10 /* digit */
64 #define IV 0x20 /* invalid */
65 #define EF 0x80 /* END-OF-FILE */
66 #define NI 0x100 /* Not IRI */
67 #define EC 0x200 /* Local escape */
68
69 static const short char_type0[] =
70 { /*0 1 2 3 4 5 6 7
71 8 9 A B C D E F */
72 NI|EF,
73 NI, NI, NI, NI, NI, NI, NI, NI, /* 00-07 */
74 NI, WS|NI, EL|NI, NI, NI, EL|NI, NI, NI, /* 08-0f */
75 NI, NI, NI, NI, NI, NI, NI, NI, /* 10-17 */
76 NI, NI, NI, NI, NI, NI, NI, NI, /* 18-1F */
77 NI|WS, EC, NI, EC, EC, EC, EC, EC, /* 20-27 */
78 EC, EC, EC, EC, EC, EC, EC, EC, /* 28-2F */
79 DI, DI, DI, DI, DI, DI, DI, DI, /* 30-37 */
80 DI, DI, 0, EC, NI, EC, NI, EC, /* 38-3F */
81 EC, UC, UC, UC, UC, UC, UC, UC, /* 40-47 */
82 UC, UC, UC, UC, UC, UC, UC, UC, /* 48-4F */
83 UC, UC, UC, UC, UC, UC, UC, UC, /* 50-57 */
84 UC, UC, UC, 0, NI, 0, NI, EC, /* 58-5F */
85 NI, LC, LC, LC, LC, LC, LC, LC, /* 60-67 */
86 LC, LC, LC, LC, LC, LC, LC, LC, /* 68-6F */
87 LC, LC, LC, LC, LC, LC, LC, LC, /* 70-77 */
88 LC, LC, LC, NI, NI, NI, EC, 0 /* 78-7F */
89 };
90
91 static const short* char_type = &char_type0[1];
92
93 static inline int
is_ws(int c)94 is_ws(int c) /* Turtle: (WS|EL) */
95 { return (c < 128 ? (char_type[c] & (WS)) != 0 : FALSE);
96 }
97
98 static inline int
is_eol(int c)99 is_eol(int c)
100 { return (c < 128 ? (char_type[c] & EL) != 0 : FALSE);
101 }
102
103 static inline int
is_lang_char1(int c)104 is_lang_char1(int c)
105 { return (c < 128 ? (char_type[c] & (LC|UC)) != 0 : FALSE);
106 }
107
108 static inline int
is_lang_char(int c)109 is_lang_char(int c)
110 { return (c < 128 ? (char_type[c] & (LC|UC|DI)) != 0 : FALSE) || c == '-';
111 }
112
113 static const signed char hexval0[] =
114 {/*0 1 2 3 4 5 6 7 8 9 A B C D E F */
115 -1,
116 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00-0f */
117 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10-1F */
118 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20-2F */
119 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 30-3F */
120 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 40-4F */
121 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 50-5F */
122 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1 /* 60-6F */
123 };
124
125 static const signed char* hexval = &hexval0[1];
126
127 static inline int
hexd(int c)128 hexd(int c)
129 { return (c <= 'f' ? hexval[c] : -1);
130 }
131
132
133 static inline int
wcis_pn_chars_u(int c)134 wcis_pn_chars_u(int c) /* 158s */
135 { return ( wcis_pn_chars_base(c) ||
136 c == '_' || c == ':'
137 );
138 }
139
140
141 static inline int /* 141s RDF Blank Nodes */
wcis_pn_chars_du(int c)142 wcis_pn_chars_du(int c)
143 { return ( wcis_pn_chars_u(c) ||
144 (c >= '0' && c <= '9')
145 );
146 }
147
148 static inline int
wcis_pn_chars(int c)149 wcis_pn_chars(int c) /* 160s */
150 { return ( wcis_pn_chars_u(c) ||
151 c == '-' ||
152 (c >= '0' && c <= '9') ||
153 wcis_pn_chars_extra(c)
154 );
155 }
156
157 /*******************************
158 * ERROR *
159 *******************************/
160
161 static int
syntax_error(IOSTREAM * in,const char * msg)162 syntax_error(IOSTREAM *in, const char *msg)
163 { term_t ex = PL_new_term_refs(2);
164 IOPOS *pos;
165
166 if ( !PL_unify_term(ex+0, PL_FUNCTOR, FUNCTOR_syntax_error1,
167 PL_CHARS, msg) )
168 return FALSE;
169
170 if ( (pos=in->position) )
171 { term_t stream;
172
173 if ( !(stream = PL_new_term_ref()) ||
174 !PL_unify_stream(stream, in) ||
175 !PL_unify_term(ex+1,
176 PL_FUNCTOR, FUNCTOR_stream4,
177 PL_TERM, stream,
178 PL_INT, (int)pos->lineno,
179 PL_INT, (int)(pos->linepos-1), /* one too late */
180 PL_INT64, (int64_t)(pos->charno-1)) )
181 return FALSE;
182 }
183
184 if ( PL_cons_functor_v(ex, FUNCTOR_error2, ex) )
185 { int c;
186
187 do
188 { c = Sgetcode(in);
189 } while(c != '\n' && c != -1);
190
191 return PL_raise_exception(ex),FALSE;
192 }
193
194 return FALSE;
195 }
196
197
198 /*******************************
199 * BUFFER *
200 *******************************/
201
202 #ifdef STRING_BUF_DEBUG
203 #define FAST_BUF_SIZE 1
204 #else
205 #define FAST_BUF_SIZE 512
206 #endif
207
208 typedef struct string_buffer
209 { wchar_t fast[FAST_BUF_SIZE];
210 wchar_t *buf;
211 wchar_t *in;
212 wchar_t *end;
213 #ifdef STRING_BUF_DEBUG
214 int discarded;
215 #endif
216 } string_buffer;
217
218
219 static int
growBuffer(string_buffer * b,int c)220 growBuffer(string_buffer *b, int c)
221 { assert(c != -1);
222
223 if ( b->buf == b->fast )
224 { wchar_t *new = malloc((FAST_BUF_SIZE*2)*sizeof(wchar_t));
225
226 if ( new )
227 { memcpy(new, b->fast, sizeof(b->fast));
228 b->buf = new;
229 b->in = b->buf+FAST_BUF_SIZE;
230 b->end = b->in+FAST_BUF_SIZE;
231 *b->in++ = c;
232
233 return TRUE;
234 }
235 } else
236 { size_t sz = b->end - b->buf;
237 wchar_t *new = realloc(b->buf, sz*sizeof(wchar_t)*2);
238
239 if ( new )
240 { b->buf = new;
241 b->in = new+sz;
242 b->end = b->in+sz;
243 *b->in++ = c;
244
245 return TRUE;
246 }
247 }
248
249 PL_resource_error("memory");
250 return FALSE;
251 }
252
253
254 static inline void
initBuf(string_buffer * b)255 initBuf(string_buffer *b)
256 { b->buf = b->fast;
257 b->in = b->buf;
258 b->end = &b->fast[FAST_BUF_SIZE];
259 #ifdef STRING_BUF_DEBUG
260 b->discarded = FALSE;
261 #endif
262 }
263
264
265 static inline void
discardBuf(string_buffer * b)266 discardBuf(string_buffer *b)
267 {
268 #ifdef STRING_BUF_DEBUG
269 assert(b->discarded == FALSE);
270 b->discarded = TRUE;
271 #endif
272 if ( b->buf != b->fast )
273 free(b->buf);
274 }
275
276
277 static inline int
addBuf(string_buffer * b,int c)278 addBuf(string_buffer *b, int c)
279 { if ( b->in < b->end )
280 { *b->in++ = c;
281 return TRUE;
282 }
283
284 return growBuffer(b, c);
285 }
286
287
288 static inline int
bufSize(string_buffer * b)289 bufSize(string_buffer *b)
290 { return b->in - b->buf;
291 }
292
293 #define baseBuf(b) ( (b)->buf )
294
295
296 /*******************************
297 * SKIPPING *
298 *******************************/
299
300 static int
skip_ws(IOSTREAM * in,int * cp)301 skip_ws(IOSTREAM *in, int *cp)
302 { int c = *cp;
303
304 while(is_ws(c))
305 c = Sgetcode(in);
306
307 *cp = c;
308
309 return !Sferror(in);
310 }
311
312
313 static int
skip_comment_line(IOSTREAM * in,int * cp)314 skip_comment_line(IOSTREAM *in, int *cp)
315 { int c;
316
317 do
318 { c = Sgetcode(in);
319 } while ( c != -1 && !is_eol(c) );
320
321 while(is_eol(c))
322 c = Sgetcode(in);
323
324 *cp = c;
325
326 return !Sferror(in);
327 }
328
329
330 static int
skip_eol(IOSTREAM * in,int * cp)331 skip_eol(IOSTREAM *in, int *cp)
332 { if ( skip_ws(in, cp) )
333 { int c = *cp;
334
335 if ( c == '\n' )
336 return TRUE;
337 if ( c == '\r' )
338 { if ( Speekcode(in) == '\n' )
339 (void)Sgetcode(in);
340 return TRUE;
341 }
342 if ( c == EOF )
343 return TRUE;
344 if ( c == '#' )
345 return skip_comment_line(in, cp);
346
347 return syntax_error(in, "end-of-line expected");
348 } else
349 { return FALSE;
350 }
351 }
352
353
354 /*******************************
355 * READING *
356 *******************************/
357
358 #define ESCAPED_CODE (-1)
359
360 static int
read_hex(IOSTREAM * in,int * cp,int len)361 read_hex(IOSTREAM *in, int *cp, int len)
362 { int c = 0;
363
364 while(len-- > 0)
365 { int v0;
366 int c2 = Sgetcode(in);
367
368 if ( (v0 = hexd(c2)) >= 0 )
369 { c <<= 4;
370 c += v0;
371 } else
372 { return syntax_error(in, "illegal unicode escape");
373 }
374 }
375
376 *cp = c;
377 return ESCAPED_CODE;
378 }
379
380
381 static int
get_iri_code(IOSTREAM * in,int * cp)382 get_iri_code(IOSTREAM *in, int *cp)
383 { int c = Sgetcode(in);
384
385 switch(c)
386 { case '\r':
387 case '\n':
388 return syntax_error(in, "newline in uriref");
389 case EOF:
390 return syntax_error(in, "EOF in uriref");
391 case '<':
392 case '"':
393 case '{':
394 case '}':
395 case '|':
396 case '^':
397 case '`':
398 return syntax_error(in, "Illegal character in uriref");
399 case '\\':
400 { int c2 = Sgetcode(in);
401
402 switch(c2)
403 { case 'u': return read_hex(in, cp, 4);
404 case 'U': return read_hex(in, cp, 8);
405 default: return syntax_error(in, "illegal escape");
406 }
407 }
408 default:
409 { if ( c > ' ' )
410 { *cp = c;
411 return TRUE;
412 }
413 return syntax_error(in, "Illegal control character in uriref");
414 }
415 }
416 }
417
418 static int
read_uniref(IOSTREAM * in,term_t subject,int * cp)419 read_uniref(IOSTREAM *in, term_t subject, int *cp)
420 { int c = -1;
421 string_buffer buf;
422
423 initBuf(&buf);
424 for(;;)
425 { int rc;
426
427 if ( (rc=get_iri_code(in, &c)) == TRUE )
428 { switch(c)
429 { case '>':
430 { int rc = PL_unify_wchars(subject, PL_ATOM,
431 bufSize(&buf), baseBuf(&buf));
432 discardBuf(&buf);
433 *cp = Sgetcode(in);
434 return rc;
435 }
436 default:
437 if ( !addBuf(&buf, c) )
438 { discardBuf(&buf);
439 return FALSE;
440 }
441 }
442 } else if ( rc == ESCAPED_CODE )
443 { if ( !addBuf(&buf, c) )
444 { discardBuf(&buf);
445 return FALSE;
446 }
447 } else
448 { discardBuf(&buf);
449 return FALSE;
450 }
451 }
452 }
453
454
455 static int
read_node_id(IOSTREAM * in,term_t subject,int * cp)456 read_node_id(IOSTREAM *in, term_t subject, int *cp)
457 { int c;
458
459 c = Sgetcode(in);
460 if ( c != ':' )
461 return syntax_error(in, "invalid nodeID");
462
463 c = Sgetcode(in);
464 if ( wcis_pn_chars_du(c) )
465 { string_buffer buf;
466
467 initBuf(&buf);
468 addBuf(&buf, c);
469 for(;;)
470 { int c2;
471
472 c = Sgetcode(in);
473
474 if ( wcis_pn_chars(c) )
475 { addBuf(&buf, c);
476 } else if ( c == '.' &&
477 (wcis_pn_chars((c2=Speekcode(in))) || c2 == '.') )
478 { addBuf(&buf, c);
479 } else
480 { term_t av = PL_new_term_refs(1);
481 int rc;
482
483 rc = ( PL_unify_wchars(av+0, PL_ATOM, bufSize(&buf), baseBuf(&buf)) &&
484 PL_cons_functor_v(subject, FUNCTOR_node1, av)
485 );
486 discardBuf(&buf);
487 *cp = c;
488
489 return rc;
490 }
491 }
492 } else
493 return syntax_error(in, "invalid nodeID");
494 }
495
496
497 static int
read_lan(IOSTREAM * in,term_t lan,int * cp)498 read_lan(IOSTREAM *in, term_t lan, int *cp)
499 { int c;
500 string_buffer buf;
501 int rc;
502
503 c = Sgetcode(in);
504 if ( !skip_ws(in, &c) )
505 return FALSE;
506 if ( !is_lang_char1(c) )
507 return syntax_error(in, "language tag must start with a-zA-Z");
508
509 initBuf(&buf);
510 addBuf(&buf, c);
511 for(;;)
512 { c = Sgetcode(in);
513 if ( is_lang_char(c) )
514 { addBuf(&buf, c);
515 } else
516 { break;
517 }
518 }
519 while(c=='-')
520 { addBuf(&buf, c);
521 c = Sgetcode(in);
522 if ( !is_lang_char(c) )
523 { discardBuf(&buf);
524 return syntax_error(in, "Illegal language tag");
525 }
526 addBuf(&buf, c);
527 for(;;)
528 { c = Sgetcode(in);
529 if ( is_lang_char(c) )
530 { addBuf(&buf, c);
531 } else
532 { break;
533 }
534 }
535 }
536
537 *cp = c;
538 rc = PL_unify_wchars(lan, PL_ATOM, bufSize(&buf), baseBuf(&buf));
539 discardBuf(&buf);
540
541 return rc;
542 }
543
544
545 static int
read_subject(IOSTREAM * in,term_t subject,int * cp)546 read_subject(IOSTREAM *in, term_t subject, int *cp)
547 { int c = *cp;
548 int rc;
549
550 switch ( c )
551 { case '<':
552 rc = read_uniref(in, subject, cp);
553 break;
554 case '_':
555 rc = read_node_id(in, subject, cp);
556 break;
557 default:
558 return syntax_error(in, "subject expected");
559 }
560
561 if ( rc && !is_ws(*cp) )
562 return syntax_error(in, "subject not followed by whitespace");
563
564 return rc;
565 }
566
567
568 static int
read_predicate(IOSTREAM * in,term_t predicate,int * cp)569 read_predicate(IOSTREAM *in, term_t predicate, int *cp)
570 { int c = *cp;
571 int rc;
572
573 switch ( c )
574 { case '<':
575 rc = read_uniref(in, predicate, cp);
576 break;
577 default:
578 return syntax_error(in, "predicate expected");
579 }
580
581 if ( rc && !is_ws(*cp) )
582 return syntax_error(in, "predicate not followed by whitespace");
583
584 return rc;
585 }
586
587
588 static int
read_graph(IOSTREAM * in,term_t graph,int * cp)589 read_graph(IOSTREAM *in, term_t graph, int *cp)
590 { int c = *cp;
591 int rc;
592
593 switch ( c )
594 { case '<':
595 rc = read_uniref(in, graph, cp);
596 break;
597 default:
598 return syntax_error(in, "graph expected");
599 }
600
601 return rc;
602 }
603
604
605 static int
wrap_literal(term_t lit)606 wrap_literal(term_t lit)
607 { return PL_cons_functor_v(lit, FUNCTOR_literal1, lit);
608 }
609
610
611 static int
get_string_code(IOSTREAM * in,int * cp)612 get_string_code(IOSTREAM *in, int *cp)
613 { int c = Sgetcode(in);
614
615 switch(c)
616 { case '\r':
617 case '\n':
618 return syntax_error(in, "newline in string");
619 case '\\':
620 { int c2 = Sgetcode(in);
621
622 switch(c2)
623 { case 'b': *cp = '\b'; return ESCAPED_CODE;
624 case 't': *cp = '\t'; return ESCAPED_CODE;
625 case 'f': *cp = '\f'; return ESCAPED_CODE;
626 case 'n': *cp = '\n'; return ESCAPED_CODE;
627 case 'r': *cp = '\r'; return ESCAPED_CODE;
628 case '"': *cp = '"'; return ESCAPED_CODE;
629 case '\'': *cp = '\''; return ESCAPED_CODE;
630 case '\\': *cp = '\\'; return ESCAPED_CODE;
631 case 'u': return read_hex(in, cp, 4);
632 case 'U': return read_hex(in, cp, 8);
633 default: return syntax_error(in, "illegal escape");
634 }
635 }
636 default:
637 *cp = c;
638 return TRUE;
639 }
640 }
641
642
643 static int
read_literal(IOSTREAM * in,term_t literal,int * cp)644 read_literal(IOSTREAM *in, term_t literal, int *cp)
645 { int c = -1;
646 string_buffer buf;
647
648 initBuf(&buf);
649 for(;;)
650 { int rc;
651
652 if ( (rc=get_string_code(in, &c)) == TRUE )
653 { switch(c)
654 { case '"':
655 { c = Sgetcode(in);
656
657 if ( !skip_ws(in, &c) )
658 { discardBuf(&buf);
659 return FALSE;
660 }
661
662 switch(c)
663 { case '@':
664 { term_t av = PL_new_term_refs(2);
665
666 if ( read_lan(in, av+0, cp) )
667 { int rc = ( PL_unify_wchars(av+1, PL_ATOM,
668 bufSize(&buf), baseBuf(&buf)) &&
669 PL_cons_functor_v(literal, FUNCTOR_lang2, av) &&
670 wrap_literal(literal)
671 );
672 discardBuf(&buf);
673 return rc;
674 } else
675 { discardBuf(&buf);
676 return FALSE;
677 }
678 }
679 case '^':
680 { c = Sgetcode(in);
681
682 if ( c == '^' )
683 { term_t av = PL_new_term_refs(2);
684
685 c = Sgetcode(in);
686 if ( !skip_ws(in, &c) )
687 { discardBuf(&buf);
688 return FALSE;
689 }
690 if ( c == '<' )
691 { if ( read_uniref(in, av+0, cp) )
692 { int rc = ( PL_unify_wchars(av+1, PL_ATOM,
693 bufSize(&buf), baseBuf(&buf)) &&
694 PL_cons_functor_v(literal, FUNCTOR_type2, av) &&
695 wrap_literal(literal)
696 );
697 discardBuf(&buf);
698 return rc;
699 } else
700 { discardBuf(&buf);
701 return FALSE;
702 }
703 } else
704 { discardBuf(&buf);
705 return syntax_error(in, "datatype uriref expected");
706 }
707 } else
708 { discardBuf(&buf);
709 return syntax_error(in, "^ expected");
710 }
711 }
712 default:
713 { int rc;
714
715 *cp = c;
716 rc = ( PL_unify_wchars(literal, PL_ATOM,
717 bufSize(&buf), baseBuf(&buf)) &&
718 wrap_literal(literal)
719 );
720 discardBuf(&buf);
721 return rc;
722 }
723 }
724 }
725 case EOF:
726 discardBuf(&buf);
727 return syntax_error(in, "EOF in string");
728 case '\n':
729 case '\r':
730 discardBuf(&buf);
731 return syntax_error(in, "newline in string");
732 default:
733 if ( !addBuf(&buf, c) )
734 { discardBuf(&buf);
735 return FALSE;
736 }
737 }
738 } else if ( rc == ESCAPED_CODE )
739 { if ( !addBuf(&buf, c) )
740 { discardBuf(&buf);
741 return FALSE;
742 }
743 } else
744 { discardBuf(&buf);
745 return FALSE;
746 }
747 }
748 }
749
750
751 static int
read_object(IOSTREAM * in,term_t object,int * cp)752 read_object(IOSTREAM *in, term_t object, int *cp)
753 { int c = *cp;
754
755 switch ( c )
756 { case '<':
757 return read_uniref(in, object, cp);
758 case '_':
759 return read_node_id(in, object, cp);
760 case '"':
761 return read_literal(in, object, cp);
762 default:
763 return syntax_error(in, "object expected");
764 }
765 }
766
767
768 static int
check_full_stop(IOSTREAM * in,int * cp)769 check_full_stop(IOSTREAM *in, int *cp)
770 { int c = *cp;
771
772 if ( c == '.' )
773 { *cp = Sgetcode(in);
774
775 return TRUE;
776 }
777
778 return syntax_error(in, "fullstop (.) expected");
779 }
780
781
782 static int
read_ntuple(term_t from,term_t triple,int arity)783 read_ntuple(term_t from, term_t triple, int arity)
784 { IOSTREAM *in;
785 int rc;
786 int c;
787
788 if ( !PL_get_stream(from, &in, SIO_INPUT) )
789 return FALSE;
790
791 c=Sgetcode(in);
792 next:
793 rc = skip_ws(in, &c);
794
795 if ( rc )
796 { if ( c == '#' )
797 { if ( skip_comment_line(in, &c) )
798 goto next;
799 } else if ( c == EOF )
800 { rc = PL_unify_atom(triple, ATOM_end_of_file);
801 } else if ( c < 128 && (char_type[c]&EL) )
802 { if ( skip_eol(in, &c) )
803 { c=Sgetcode(in);
804 goto next;
805 } else
806 return FALSE;
807 } else
808 { term_t av = PL_new_term_refs(5); /* room for quad */
809
810 rc = ( read_subject(in, av+1, &c) &&
811 skip_ws(in, &c) &&
812 read_predicate(in, av+2, &c) &&
813 skip_ws(in, &c) &&
814 read_object(in, av+3, &c) &&
815 skip_ws(in, &c)
816 );
817
818 if ( rc )
819 {
820 again:
821 if ( arity == 3 )
822 { rc = ( check_full_stop(in, &c) &&
823 skip_eol(in, &c)
824 );
825 } else if ( arity == 4 )
826 { rc = ( read_graph(in, av+4, &c) &&
827 skip_ws(in, &c) &&
828 check_full_stop(in, &c) &&
829 skip_eol(in, &c)
830 );
831 } else
832 { arity = (c == '<' ? 4 : 3);
833 goto again;
834 }
835 }
836
837 if ( rc )
838 { functor_t f = arity == 3 ? FUNCTOR_triple3 : FUNCTOR_quad4;
839
840 rc = ( PL_cons_functor_v(av+0, f, av+1) &&
841 PL_unify(triple, av+0)
842 );
843 }
844 }
845 }
846
847 return (PL_release_stream(in) && rc);
848 }
849
850
851 static foreign_t
read_ntriple(term_t from,term_t triple)852 read_ntriple(term_t from, term_t triple)
853 { return read_ntuple(from, triple, 3);
854 }
855
856 static foreign_t
read_nquad(term_t from,term_t quad)857 read_nquad(term_t from, term_t quad)
858 { return read_ntuple(from, quad, 4);
859 }
860
861 static foreign_t
read_ntuple2(term_t from,term_t quad)862 read_ntuple2(term_t from, term_t quad)
863 { return read_ntuple(from, quad, 0);
864 }
865
866
867 /*******************************
868 * INSTALL *
869 *******************************/
870
871 #define MKFUNCTOR(n, a) \
872 FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a)
873
874 install_t
install_ntriples(void)875 install_ntriples(void)
876 { ATOM_end_of_file = PL_new_atom("end_of_file");
877
878 MKFUNCTOR(node, 1);
879 MKFUNCTOR(literal, 1);
880 MKFUNCTOR(type, 2);
881 MKFUNCTOR(lang, 2);
882 MKFUNCTOR(triple, 3);
883 MKFUNCTOR(quad, 4);
884 MKFUNCTOR(error, 2);
885 MKFUNCTOR(syntax_error, 1);
886 MKFUNCTOR(stream, 4);
887
888 PL_register_foreign("read_ntriple", 2, read_ntriple, 0);
889 PL_register_foreign("read_nquad", 2, read_nquad, 0);
890 PL_register_foreign("read_ntuple", 2, read_ntuple2, 0);
891 }
892