1 /*  Part of SWI-Prolog
2 
3     Author:        Jan Wielemaker
4     E-mail:        J.Wielemaker@vu.nl
5     WWW:           http://www.swi-prolog.org
6     Copyright (c)  2013-2015, VU University Amsterdam
7     All rights reserved.
8 
9     Redistribution and use in source and binary forms, with or without
10     modification, are permitted provided that the following conditions
11     are met:
12 
13     1. Redistributions of source code must retain the above copyright
14        notice, this list of conditions and the following disclaimer.
15 
16     2. Redistributions in binary form must reproduce the above copyright
17        notice, this list of conditions and the following disclaimer in
18        the documentation and/or other materials provided with the
19        distribution.
20 
21     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25     COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32     POSSIBILITY OF SUCH DAMAGE.
33 */
34 
35 #include <config.h>
36 #include <SWI-Stream.h>
37 #include <SWI-Prolog.h>
38 #include <string.h>
39 #include <assert.h>
40 #include "turtle_chars.c"
41 
42 static atom_t ATOM_end_of_file;
43 
44 static functor_t FUNCTOR_node1;
45 static functor_t FUNCTOR_literal1;
46 static functor_t FUNCTOR_type2;
47 static functor_t FUNCTOR_lang2;
48 static functor_t FUNCTOR_triple3;
49 static functor_t FUNCTOR_quad4;
50 static functor_t FUNCTOR_error2;
51 static functor_t FUNCTOR_syntax_error1;
52 static functor_t FUNCTOR_stream4;
53 
54 
55 		 /*******************************
56 		 *	CHARACTER CLASSES	*
57 		 *******************************/
58 
59 #define WS	0x01			/* white space */
60 #define EL	0x02			/* end-of-line */
61 #define DI	0x04			/* digit */
62 #define LC	0x08			/* digit */
63 #define UC	0x10			/* digit */
64 #define IV	0x20			/* invalid */
65 #define EF	0x80			/* END-OF-FILE */
66 #define NI	0x100			/* Not IRI */
67 #define EC	0x200			/* Local escape */
68 
69 static const short char_type0[] =
70 {   /*0      1      2      3      4      5      6      7
71       8      9      A      B      C      D      E      F  */
72   NI|EF,
73      NI,    NI,    NI,    NI,    NI,    NI,    NI,    NI,   /* 00-07 */
74      NI, WS|NI, EL|NI,	  NI,	 NI, EL|NI,    NI,    NI,   /* 08-0f */
75      NI,    NI,    NI,    NI,    NI,    NI,    NI,    NI,   /* 10-17 */
76      NI,    NI,    NI,    NI,    NI,    NI,    NI,    NI,   /* 18-1F */
77   NI|WS,    EC,    NI,	  EC,	 EC,	EC,    EC,    EC,   /* 20-27 */
78      EC,    EC,	   EC,	  EC,	 EC,	EC,    EC,    EC,   /* 28-2F */
79      DI,    DI,    DI,    DI,    DI,    DI,    DI,    DI,   /* 30-37 */
80      DI,    DI,     0,	  EC,    NI,	EC,    NI,    EC,   /* 38-3F */
81      EC,    UC,    UC,    UC,    UC,    UC,    UC,    UC,   /* 40-47 */
82      UC,    UC,    UC,    UC,    UC,    UC,    UC,    UC,   /* 48-4F */
83      UC,    UC,    UC,    UC,    UC,    UC,    UC,    UC,   /* 50-57 */
84      UC,    UC,    UC,     0,    NI,     0,    NI,    EC,   /* 58-5F */
85      NI,    LC,    LC,    LC,    LC,    LC,    LC,    LC,   /* 60-67 */
86      LC,    LC,    LC,    LC,    LC,    LC,    LC,    LC,   /* 68-6F */
87      LC,    LC,    LC,    LC,    LC,    LC,    LC,    LC,   /* 70-77 */
88      LC,    LC,    LC,    NI,    NI,    NI,    EC,     0    /* 78-7F */
89 };
90 
91 static const short* char_type = &char_type0[1];
92 
93 static inline int
is_ws(int c)94 is_ws(int c)				/* Turtle: (WS|EL) */
95 { return (c < 128 ? (char_type[c] & (WS)) != 0 : FALSE);
96 }
97 
98 static inline int
is_eol(int c)99 is_eol(int c)
100 { return (c < 128 ? (char_type[c] & EL) != 0 : FALSE);
101 }
102 
103 static inline int
is_lang_char1(int c)104 is_lang_char1(int c)
105 { return (c < 128 ? (char_type[c] & (LC|UC)) != 0 : FALSE);
106 }
107 
108 static inline int
is_lang_char(int c)109 is_lang_char(int c)
110 { return (c < 128 ? (char_type[c] & (LC|UC|DI)) != 0 : FALSE) || c == '-';
111 }
112 
113 static const signed char hexval0[] =
114 {/*0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F  */
115   -1,
116   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00-0f */
117   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10-1F */
118   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20-2F */
119    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, /* 30-3F */
120   -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 40-4F */
121   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 50-5F */
122   -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1  /* 60-6F */
123 };
124 
125 static const signed char* hexval = &hexval0[1];
126 
127 static inline int
hexd(int c)128 hexd(int c)
129 { return (c <= 'f' ? hexval[c] : -1);
130 }
131 
132 
133 static inline int
wcis_pn_chars_u(int c)134 wcis_pn_chars_u(int c)			/* 158s */
135 { return ( wcis_pn_chars_base(c) ||
136 	   c == '_' || c == ':'
137 	 );
138 }
139 
140 
141 static inline int			/* 141s RDF Blank Nodes */
wcis_pn_chars_du(int c)142 wcis_pn_chars_du(int c)
143 { return ( wcis_pn_chars_u(c) ||
144 	   (c >= '0' && c <= '9')
145 	 );
146 }
147 
148 static inline int
wcis_pn_chars(int c)149 wcis_pn_chars(int c)			/* 160s */
150 { return ( wcis_pn_chars_u(c) ||
151 	   c == '-' ||
152 	   (c >= '0' && c <= '9') ||
153 	   wcis_pn_chars_extra(c)
154 	 );
155 }
156 
157 		 /*******************************
158 		 *	      ERROR		*
159 		 *******************************/
160 
161 static int
syntax_error(IOSTREAM * in,const char * msg)162 syntax_error(IOSTREAM *in, const char *msg)
163 { term_t ex = PL_new_term_refs(2);
164   IOPOS *pos;
165 
166   if ( !PL_unify_term(ex+0, PL_FUNCTOR, FUNCTOR_syntax_error1,
167 		              PL_CHARS, msg) )
168     return FALSE;
169 
170   if ( (pos=in->position) )
171   { term_t stream;
172 
173     if ( !(stream = PL_new_term_ref()) ||
174 	 !PL_unify_stream(stream, in) ||
175 	 !PL_unify_term(ex+1,
176 			PL_FUNCTOR, FUNCTOR_stream4,
177 			  PL_TERM, stream,
178 			  PL_INT, (int)pos->lineno,
179 			  PL_INT, (int)(pos->linepos-1), /* one too late */
180 			  PL_INT64, (int64_t)(pos->charno-1)) )
181       return FALSE;
182   }
183 
184   if ( PL_cons_functor_v(ex, FUNCTOR_error2, ex) )
185   { int c;
186 
187     do
188     { c = Sgetcode(in);
189     } while(c != '\n' && c != -1);
190 
191     return PL_raise_exception(ex),FALSE;
192   }
193 
194   return FALSE;
195 }
196 
197 
198 		 /*******************************
199 		 *	       BUFFER		*
200 		 *******************************/
201 
202 #ifdef STRING_BUF_DEBUG
203 #define FAST_BUF_SIZE 1
204 #else
205 #define FAST_BUF_SIZE 512
206 #endif
207 
208 typedef struct string_buffer
209 { wchar_t fast[FAST_BUF_SIZE];
210   wchar_t *buf;
211   wchar_t *in;
212   wchar_t *end;
213 #ifdef STRING_BUF_DEBUG
214   int	   discarded;
215 #endif
216 } string_buffer;
217 
218 
219 static int
growBuffer(string_buffer * b,int c)220 growBuffer(string_buffer *b, int c)
221 { assert(c != -1);
222 
223   if ( b->buf == b->fast )
224   { wchar_t *new = malloc((FAST_BUF_SIZE*2)*sizeof(wchar_t));
225 
226     if ( new )
227     { memcpy(new, b->fast, sizeof(b->fast));
228       b->buf = new;
229       b->in  = b->buf+FAST_BUF_SIZE;
230       b->end = b->in+FAST_BUF_SIZE;
231       *b->in++ = c;
232 
233       return TRUE;
234     }
235   } else
236   { size_t sz = b->end - b->buf;
237     wchar_t *new = realloc(b->buf, sz*sizeof(wchar_t)*2);
238 
239     if ( new )
240     { b->buf = new;
241       b->in  = new+sz;
242       b->end = b->in+sz;
243       *b->in++ = c;
244 
245       return TRUE;
246     }
247   }
248 
249   PL_resource_error("memory");
250   return FALSE;
251 }
252 
253 
254 static inline void
initBuf(string_buffer * b)255 initBuf(string_buffer *b)
256 { b->buf = b->fast;
257   b->in  = b->buf;
258   b->end = &b->fast[FAST_BUF_SIZE];
259 #ifdef STRING_BUF_DEBUG
260   b->discarded = FALSE;
261 #endif
262 }
263 
264 
265 static inline void
discardBuf(string_buffer * b)266 discardBuf(string_buffer *b)
267 {
268 #ifdef STRING_BUF_DEBUG
269   assert(b->discarded == FALSE);
270   b->discarded = TRUE;
271 #endif
272   if ( b->buf != b->fast )
273     free(b->buf);
274 }
275 
276 
277 static inline int
addBuf(string_buffer * b,int c)278 addBuf(string_buffer *b, int c)
279 { if ( b->in < b->end )
280   { *b->in++ = c;
281     return TRUE;
282   }
283 
284   return growBuffer(b, c);
285 }
286 
287 
288 static inline int
bufSize(string_buffer * b)289 bufSize(string_buffer *b)
290 { return b->in - b->buf;
291 }
292 
293 #define baseBuf(b)   ( (b)->buf )
294 
295 
296 		 /*******************************
297 		 *	     SKIPPING		*
298 		 *******************************/
299 
300 static int
skip_ws(IOSTREAM * in,int * cp)301 skip_ws(IOSTREAM *in, int *cp)
302 { int c = *cp;
303 
304   while(is_ws(c))
305     c = Sgetcode(in);
306 
307   *cp = c;
308 
309   return !Sferror(in);
310 }
311 
312 
313 static int
skip_comment_line(IOSTREAM * in,int * cp)314 skip_comment_line(IOSTREAM *in, int *cp)
315 { int c;
316 
317   do
318   { c = Sgetcode(in);
319   } while ( c != -1 && !is_eol(c) );
320 
321   while(is_eol(c))
322     c = Sgetcode(in);
323 
324   *cp = c;
325 
326   return !Sferror(in);
327 }
328 
329 
330 static int
skip_eol(IOSTREAM * in,int * cp)331 skip_eol(IOSTREAM *in, int *cp)
332 { if ( skip_ws(in, cp) )
333   { int c = *cp;
334 
335     if ( c == '\n' )
336       return TRUE;
337     if ( c == '\r' )
338     { if ( Speekcode(in) == '\n' )
339 	(void)Sgetcode(in);
340       return TRUE;
341     }
342     if ( c == EOF )
343       return TRUE;
344     if ( c == '#' )
345       return skip_comment_line(in, cp);
346 
347     return syntax_error(in, "end-of-line expected");
348   } else
349   { return FALSE;
350   }
351 }
352 
353 
354 		 /*******************************
355 		 *	      READING		*
356 		 *******************************/
357 
358 #define ESCAPED_CODE (-1)
359 
360 static int
read_hex(IOSTREAM * in,int * cp,int len)361 read_hex(IOSTREAM *in, int *cp, int len)
362 { int c = 0;
363 
364   while(len-- > 0)
365   { int v0;
366     int c2 = Sgetcode(in);
367 
368     if ( (v0 = hexd(c2)) >= 0 )
369     { c <<= 4;
370       c += v0;
371     } else
372     { return syntax_error(in, "illegal unicode escape");
373     }
374   }
375 
376   *cp = c;
377   return ESCAPED_CODE;
378 }
379 
380 
381 static int
get_iri_code(IOSTREAM * in,int * cp)382 get_iri_code(IOSTREAM *in, int *cp)
383 { int c = Sgetcode(in);
384 
385   switch(c)
386   { case '\r':
387     case '\n':
388       return syntax_error(in, "newline in uriref");
389     case EOF:
390       return syntax_error(in, "EOF in uriref");
391     case '<':
392     case '"':
393     case '{':
394     case '}':
395     case '|':
396     case '^':
397     case '`':
398       return syntax_error(in, "Illegal character in uriref");
399     case '\\':
400     { int c2 = Sgetcode(in);
401 
402       switch(c2)
403       { case 'u':	return read_hex(in, cp, 4);
404 	case 'U':	return read_hex(in, cp, 8);
405 	default:	return syntax_error(in, "illegal escape");
406       }
407     }
408     default:
409     { if ( c > ' ' )
410       { *cp = c;
411 	return TRUE;
412       }
413       return syntax_error(in, "Illegal control character in uriref");
414     }
415   }
416 }
417 
418 static int
read_uniref(IOSTREAM * in,term_t subject,int * cp)419 read_uniref(IOSTREAM *in, term_t subject, int *cp)
420 { int c = -1;
421   string_buffer buf;
422 
423   initBuf(&buf);
424   for(;;)
425   { int rc;
426 
427     if ( (rc=get_iri_code(in, &c)) == TRUE )
428     { switch(c)
429       { case '>':
430 	{ int rc = PL_unify_wchars(subject, PL_ATOM,
431 				   bufSize(&buf), baseBuf(&buf));
432 	  discardBuf(&buf);
433 	  *cp = Sgetcode(in);
434 	  return rc;
435 	}
436 	default:
437 	  if ( !addBuf(&buf, c) )
438 	  { discardBuf(&buf);
439 	    return FALSE;
440 	  }
441       }
442     } else if ( rc == ESCAPED_CODE )
443     { if ( !addBuf(&buf, c) )
444       { discardBuf(&buf);
445 	return FALSE;
446       }
447     } else
448     { discardBuf(&buf);
449       return FALSE;
450     }
451   }
452 }
453 
454 
455 static int
read_node_id(IOSTREAM * in,term_t subject,int * cp)456 read_node_id(IOSTREAM *in, term_t subject, int *cp)
457 { int c;
458 
459   c = Sgetcode(in);
460   if ( c != ':' )
461     return syntax_error(in, "invalid nodeID");
462 
463   c = Sgetcode(in);
464   if ( wcis_pn_chars_du(c) )
465   { string_buffer buf;
466 
467     initBuf(&buf);
468     addBuf(&buf, c);
469     for(;;)
470     { int c2;
471 
472       c = Sgetcode(in);
473 
474       if ( wcis_pn_chars(c) )
475       { addBuf(&buf, c);
476       } else if ( c == '.' &&
477 		  (wcis_pn_chars((c2=Speekcode(in))) || c2 == '.') )
478       { addBuf(&buf, c);
479       } else
480       { term_t av = PL_new_term_refs(1);
481 	int rc;
482 
483 	rc = ( PL_unify_wchars(av+0, PL_ATOM, bufSize(&buf), baseBuf(&buf)) &&
484 	       PL_cons_functor_v(subject, FUNCTOR_node1, av)
485 	     );
486 	discardBuf(&buf);
487 	*cp = c;
488 
489 	return rc;
490       }
491     }
492   } else
493     return syntax_error(in, "invalid nodeID");
494 }
495 
496 
497 static int
read_lan(IOSTREAM * in,term_t lan,int * cp)498 read_lan(IOSTREAM *in, term_t lan, int *cp)
499 { int c;
500   string_buffer buf;
501   int rc;
502 
503   c = Sgetcode(in);
504   if ( !skip_ws(in, &c) )
505     return FALSE;
506   if ( !is_lang_char1(c) )
507     return syntax_error(in, "language tag must start with a-zA-Z");
508 
509   initBuf(&buf);
510   addBuf(&buf, c);
511   for(;;)
512   { c = Sgetcode(in);
513     if ( is_lang_char(c) )
514     { addBuf(&buf, c);
515     } else
516     { break;
517     }
518   }
519   while(c=='-')
520   { addBuf(&buf, c);
521     c = Sgetcode(in);
522     if ( !is_lang_char(c) )
523     { discardBuf(&buf);
524       return syntax_error(in, "Illegal language tag");
525     }
526     addBuf(&buf, c);
527     for(;;)
528     { c = Sgetcode(in);
529       if ( is_lang_char(c) )
530       { addBuf(&buf, c);
531       } else
532       { break;
533       }
534     }
535   }
536 
537   *cp = c;
538   rc = PL_unify_wchars(lan, PL_ATOM, bufSize(&buf), baseBuf(&buf));
539   discardBuf(&buf);
540 
541   return rc;
542 }
543 
544 
545 static int
read_subject(IOSTREAM * in,term_t subject,int * cp)546 read_subject(IOSTREAM *in, term_t subject, int *cp)
547 { int c = *cp;
548   int rc;
549 
550   switch ( c )
551   { case '<':
552       rc = read_uniref(in, subject, cp);
553       break;
554     case '_':
555       rc = read_node_id(in, subject, cp);
556       break;
557     default:
558       return syntax_error(in, "subject expected");
559   }
560 
561   if ( rc && !is_ws(*cp) )
562     return syntax_error(in, "subject not followed by whitespace");
563 
564   return rc;
565 }
566 
567 
568 static int
read_predicate(IOSTREAM * in,term_t predicate,int * cp)569 read_predicate(IOSTREAM *in, term_t predicate, int *cp)
570 { int c = *cp;
571   int rc;
572 
573   switch ( c )
574   { case '<':
575       rc = read_uniref(in, predicate, cp);
576       break;
577     default:
578       return syntax_error(in, "predicate expected");
579   }
580 
581   if ( rc && !is_ws(*cp) )
582     return syntax_error(in, "predicate not followed by whitespace");
583 
584   return rc;
585 }
586 
587 
588 static int
read_graph(IOSTREAM * in,term_t graph,int * cp)589 read_graph(IOSTREAM *in, term_t graph, int *cp)
590 { int c = *cp;
591   int rc;
592 
593   switch ( c )
594   { case '<':
595       rc = read_uniref(in, graph, cp);
596       break;
597     default:
598       return syntax_error(in, "graph expected");
599   }
600 
601   return rc;
602 }
603 
604 
605 static int
wrap_literal(term_t lit)606 wrap_literal(term_t lit)
607 { return PL_cons_functor_v(lit, FUNCTOR_literal1, lit);
608 }
609 
610 
611 static int
get_string_code(IOSTREAM * in,int * cp)612 get_string_code(IOSTREAM *in, int *cp)
613 { int c = Sgetcode(in);
614 
615   switch(c)
616   { case '\r':
617     case '\n':
618       return syntax_error(in, "newline in string");
619     case '\\':
620     { int c2 = Sgetcode(in);
621 
622       switch(c2)
623       { case 'b':	*cp = '\b'; return ESCAPED_CODE;
624 	case 't':	*cp = '\t'; return ESCAPED_CODE;
625 	case 'f':	*cp = '\f'; return ESCAPED_CODE;
626 	case 'n':	*cp = '\n'; return ESCAPED_CODE;
627 	case 'r':	*cp = '\r'; return ESCAPED_CODE;
628 	case '"':	*cp =  '"'; return ESCAPED_CODE;
629 	case '\'':	*cp = '\''; return ESCAPED_CODE;
630 	case '\\':	*cp = '\\'; return ESCAPED_CODE;
631 	case 'u':	return read_hex(in, cp, 4);
632 	case 'U':	return read_hex(in, cp, 8);
633 	default:	return syntax_error(in, "illegal escape");
634       }
635     }
636     default:
637       *cp = c;
638       return TRUE;
639   }
640 }
641 
642 
643 static int
read_literal(IOSTREAM * in,term_t literal,int * cp)644 read_literal(IOSTREAM *in, term_t literal, int *cp)
645 { int c = -1;
646   string_buffer buf;
647 
648   initBuf(&buf);
649   for(;;)
650   { int rc;
651 
652     if ( (rc=get_string_code(in, &c)) == TRUE )
653     { switch(c)
654       { case '"':
655 	{ c = Sgetcode(in);
656 
657 	  if ( !skip_ws(in, &c) )
658 	  { discardBuf(&buf);
659 	    return FALSE;
660 	  }
661 
662 	  switch(c)
663 	  { case '@':
664 	    { term_t av = PL_new_term_refs(2);
665 
666 	      if ( read_lan(in, av+0, cp) )
667 	      { int rc = ( PL_unify_wchars(av+1, PL_ATOM,
668 					   bufSize(&buf), baseBuf(&buf)) &&
669 			   PL_cons_functor_v(literal, FUNCTOR_lang2, av) &&
670 			   wrap_literal(literal)
671 			 );
672 		discardBuf(&buf);
673 		return rc;
674 	      } else
675 	      { discardBuf(&buf);
676 		return FALSE;
677 	      }
678 	    }
679 	    case '^':
680 	    { c = Sgetcode(in);
681 
682 	      if ( c == '^' )
683 	      { term_t av = PL_new_term_refs(2);
684 
685 		c = Sgetcode(in);
686 		if ( !skip_ws(in, &c) )
687 		{ discardBuf(&buf);
688 		  return FALSE;
689 		}
690 		if ( c == '<' )
691 		{ if ( read_uniref(in, av+0, cp) )
692 		  { int rc = ( PL_unify_wchars(av+1, PL_ATOM,
693 					       bufSize(&buf), baseBuf(&buf)) &&
694 			       PL_cons_functor_v(literal, FUNCTOR_type2, av) &&
695 			       wrap_literal(literal)
696 			     );
697 		    discardBuf(&buf);
698 		    return rc;
699 		  } else
700 		  { discardBuf(&buf);
701 		    return FALSE;
702 		  }
703 		} else
704 		{ discardBuf(&buf);
705 		  return syntax_error(in, "datatype uriref expected");
706 		}
707 	      } else
708 	      { discardBuf(&buf);
709 		return syntax_error(in, "^ expected");
710 	      }
711 	    }
712 	    default:
713 	    { int rc;
714 
715 	      *cp = c;
716 	      rc = ( PL_unify_wchars(literal, PL_ATOM,
717 				     bufSize(&buf), baseBuf(&buf)) &&
718 		     wrap_literal(literal)
719 		   );
720 	      discardBuf(&buf);
721 	      return rc;
722 	    }
723 	  }
724 	}
725 	case EOF:
726 	  discardBuf(&buf);
727 	  return syntax_error(in, "EOF in string");
728 	case '\n':
729 	case '\r':
730 	  discardBuf(&buf);
731 	  return syntax_error(in, "newline in string");
732 	default:
733 	  if ( !addBuf(&buf, c) )
734 	  { discardBuf(&buf);
735 	    return FALSE;
736 	  }
737       }
738     } else if ( rc == ESCAPED_CODE )
739     { if ( !addBuf(&buf, c) )
740       { discardBuf(&buf);
741 	return FALSE;
742       }
743     } else
744     { discardBuf(&buf);
745       return FALSE;
746     }
747   }
748 }
749 
750 
751 static int
read_object(IOSTREAM * in,term_t object,int * cp)752 read_object(IOSTREAM *in, term_t object, int *cp)
753 { int c = *cp;
754 
755   switch ( c )
756   { case '<':
757       return read_uniref(in, object, cp);
758     case '_':
759       return read_node_id(in, object, cp);
760     case '"':
761       return read_literal(in, object, cp);
762     default:
763       return syntax_error(in, "object expected");
764   }
765 }
766 
767 
768 static int
check_full_stop(IOSTREAM * in,int * cp)769 check_full_stop(IOSTREAM *in, int *cp)
770 { int c = *cp;
771 
772   if ( c == '.' )
773   { *cp = Sgetcode(in);
774 
775     return TRUE;
776   }
777 
778   return syntax_error(in, "fullstop (.) expected");
779 }
780 
781 
782 static int
read_ntuple(term_t from,term_t triple,int arity)783 read_ntuple(term_t from, term_t triple, int arity)
784 { IOSTREAM *in;
785   int rc;
786   int c;
787 
788   if ( !PL_get_stream(from, &in, SIO_INPUT) )
789     return FALSE;
790 
791   c=Sgetcode(in);
792 next:
793   rc = skip_ws(in, &c);
794 
795   if ( rc )
796   { if ( c == '#' )
797     { if ( skip_comment_line(in, &c) )
798 	goto next;
799     } else if ( c == EOF )
800     { rc = PL_unify_atom(triple, ATOM_end_of_file);
801     } else if ( c < 128 && (char_type[c]&EL) )
802     { if ( skip_eol(in, &c) )
803       { c=Sgetcode(in);
804 	goto next;
805       } else
806 	return FALSE;
807     } else
808     { term_t av = PL_new_term_refs(5);	/* room for quad */
809 
810       rc = (  read_subject(in, av+1, &c) &&
811 	      skip_ws(in, &c) &&
812 	      read_predicate(in, av+2, &c) &&
813 	      skip_ws(in, &c) &&
814 	      read_object(in, av+3, &c) &&
815 	      skip_ws(in, &c)
816 	   );
817 
818       if ( rc )
819       {
820       again:
821 	if ( arity == 3 )
822 	{ rc = ( check_full_stop(in, &c) &&
823 		 skip_eol(in, &c)
824 	       );
825 	} else if ( arity == 4 )
826 	{ rc = ( read_graph(in, av+4, &c) &&
827 		 skip_ws(in, &c) &&
828 		 check_full_stop(in, &c) &&
829 		 skip_eol(in, &c)
830 	       );
831 	} else
832 	{ arity = (c == '<' ? 4 : 3);
833 	  goto again;
834 	}
835       }
836 
837       if ( rc )
838       { functor_t f = arity == 3 ? FUNCTOR_triple3 : FUNCTOR_quad4;
839 
840 	rc = ( PL_cons_functor_v(av+0, f, av+1) &&
841 	       PL_unify(triple, av+0)
842 	     );
843       }
844     }
845   }
846 
847   return (PL_release_stream(in) && rc);
848 }
849 
850 
851 static foreign_t
read_ntriple(term_t from,term_t triple)852 read_ntriple(term_t from, term_t triple)
853 { return read_ntuple(from, triple, 3);
854 }
855 
856 static foreign_t
read_nquad(term_t from,term_t quad)857 read_nquad(term_t from, term_t quad)
858 { return read_ntuple(from, quad, 4);
859 }
860 
861 static foreign_t
read_ntuple2(term_t from,term_t quad)862 read_ntuple2(term_t from, term_t quad)
863 { return read_ntuple(from, quad, 0);
864 }
865 
866 
867 		 /*******************************
868 		 *	       INSTALL		*
869 		 *******************************/
870 
871 #define MKFUNCTOR(n, a) \
872 	FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a)
873 
874 install_t
install_ntriples(void)875 install_ntriples(void)
876 { ATOM_end_of_file = PL_new_atom("end_of_file");
877 
878   MKFUNCTOR(node,         1);
879   MKFUNCTOR(literal,      1);
880   MKFUNCTOR(type,         2);
881   MKFUNCTOR(lang,         2);
882   MKFUNCTOR(triple,       3);
883   MKFUNCTOR(quad,         4);
884   MKFUNCTOR(error,        2);
885   MKFUNCTOR(syntax_error, 1);
886   MKFUNCTOR(stream,       4);
887 
888   PL_register_foreign("read_ntriple", 2, read_ntriple, 0);
889   PL_register_foreign("read_nquad",   2, read_nquad,   0);
890   PL_register_foreign("read_ntuple",  2, read_ntuple2, 0);
891 }
892