1package RDF::Notation3;
2
3
4use strict;
5#use warnings;
6use vars qw($VERSION);
7use File::Spec::Functions ();
8use Carp;
9use RDF::Notation3::ReaderFile;
10use RDF::Notation3::ReaderString;
11
12$VERSION = '0.91';
13
14############################################################
15
16sub new {
17    my ($class) = @_;
18
19    my $self = {
20	ansuri  => '#',
21	quantif => 1,
22	nIDpref	=> '_:a', # this fits to RDF:Core prefix for nodeID
23    };
24
25    bless $self, $class;
26    return $self;
27}
28
29
30sub parse_file {
31    my ($self, $path) = @_;
32
33    $self->_define;
34
35    my $fh;
36    if (ref $path eq 'IO::File') {
37	$fh = $path;
38
39    } else {
40	open(FILE, "$path") or $self->_do_error(2, $path);
41	$fh = *FILE;
42    }
43
44    my $t = new RDF::Notation3::ReaderFile($fh);
45    $self->{reader} = $t;
46
47    $self->_document;
48
49    close (FILE);
50}
51
52
53sub parse_string {
54    my ($self, $str) = @_;
55
56    $self->_define;
57
58    my $t = new RDF::Notation3::ReaderString($str);
59    $self->{reader} = $t;
60
61    $self->_document;
62}
63
64
65sub anonymous_ns_uri {
66    my ($self, $uri) = @_;
67    if (@_ > 1) {
68	$self->{ansuri} = $uri;
69    } else {
70	return $self->{ansuri};
71    }
72}
73
74sub quantification {
75    my ($self, $val) = @_;
76    if (@_ > 1) {
77	$self->_do_error(4, $val)
78	  unless $val == 1 || $val == 0;
79	$self->{quantif} = $val;
80    } else {
81	return $self->{quantif};
82    }
83}
84
85
86sub _define {
87    my ($self) = @_;
88
89    $self->{ns} = {};
90    $self->{context} = '<>';
91    $self->{gid} = 1;
92    $self->{cid} = 1;
93    $self->{hardns} = {
94	rdf  => ['rdf','http://www.w3.org/1999/02/22-rdf-syntax-ns#'],
95	daml => ['daml','http://www.daml.org/2001/03/daml+oil#'],
96	log  => ['log','http://www.w3.org/2000/10/swap/log.n3#'],
97	};
98    $self->{keywords} = [];
99}
100
101
102sub _document {
103    my ($self) = @_;
104    my $next = $self->{reader}->try;
105    #print ">doc starts: $next\n";
106    if ($next ne ' EOF ') {
107	$self->_statement_list;
108    }
109    #print ">end\n";
110}
111
112
113sub _statement_list {
114    my ($self) = @_;
115    my $next = $self->_eat_EOLs;
116    #print ">statement list: $next\n";
117
118    while ($next ne ' EOF ') {
119	if ($next =~ /^(?:|#.*)$/) {
120	    $self->_space;
121
122	} elsif ($next =~ /^}/) {
123	    #print ">end of nested statement list: $next\n";
124	    last;
125
126	} else {
127	    $self->_statement;
128	}
129	$next = $self->_eat_EOLs;
130    }
131    #print ">end of statement list: $next\n";
132}
133
134
135sub _space {
136    my ($self) = @_;
137    #print ">space: ";
138
139    my $tk = $self->{reader}->get;
140    # comment or empty string
141    while ($tk ne ' EOL ') {
142	#print ">$tk ";
143	$tk = $self->{reader}->get;
144    }
145    #print ">\n";
146}
147
148
149sub _statement {
150    my ($self, $subject) = @_;
151    my $next = $self->{reader}->try;
152    #print ">statement starts: $next\n";
153
154    if ($next =~ /^\@prefix|\@keywords|bind$/) {
155	$self->_directive;
156
157    } else {
158	$subject = $self->_node unless $subject;
159	#print ">subject: $subject\n";
160
161	my $properties = [];
162	$self->_property_list($properties);
163
164	#print ">CONTEXT: $self->{context}\n";
165	#print ">SUBJECT: $subject\n";
166	#print ">PROPERTY: void\n" unless @$properties;
167	#foreach (@$properties) { # comment/uncomment by hand
168	    #print ">PROPERTY: ", join('-', @$_), "\n";
169	#}
170
171	$self->_process_statement($subject, $properties) if @$properties;
172    }
173    # next step
174    $next = $self->_eat_EOLs;
175    if ($next eq '.') {
176	$self->{reader}->get;
177    } elsif ($next =~ /^\.(.*)$/) {
178	$self->{reader}->get;
179	unshift @{$self->{reader}->{tokens}}, $1;
180    } elsif ($next =~ /^(?:\]|\)|\})/) {
181    } else {
182	$self->_do_error(115,$next);
183    }
184}
185
186
187sub _node {
188    my ($self) = @_;
189    my $next = $self->_eat_EOLs;
190    #print ">node: $next\n";
191
192    if ($next =~ /^[\[\{\(]/) {
193	#print ">node is anonnode\n";
194	return $self->_anonymous_node;
195
196    } elsif ($next eq 'this') {
197	#print ">this\n";
198	$self->{reader}->get;
199	return "$self->{context}";
200
201    } elsif ($next =~ /^(<[^>]*>|^(?:[_a-zA-Z]\w*)?:[_a-zA-Z][_\w]*)(.*)$/) {
202	#print ">node is uri_ref2: $next\n";
203
204	if ($2) {
205	    $self->{reader}->get;
206	    unshift @{$self->{reader}->{tokens}}, $2;
207	    unshift @{$self->{reader}->{tokens}}, $1;
208	    #print ">cleaned uri_ref2: $1\n";
209	}
210	return $self->_uri_ref2;
211
212    } elsif ($self->{keywords}[0] && ($next =~ /^(^[_a-zA-Z][_\w]*)(.*)$/)) {
213	#print ">node is uri_ref_kw: $next\n";
214
215	$self->{reader}->get;
216	unshift @{$self->{reader}->{tokens}}, $2 if $2;
217	unshift @{$self->{reader}->{tokens}}, ':' . $1;
218	#print ">cleaned uri_ref2: $1\n";
219	return $self->_uri_ref2;
220
221    } else {
222	#print ">unknown node: $next\n";
223	$self->_do_error(116,$next);
224    }
225}
226
227
228sub _directive {
229    my ($self) = @_;
230    my $tk = $self->{reader}->get;
231    #print ">directive: $tk\n";
232
233    if ($tk eq '@prefix') {
234	my $tk = $self->{reader}->get;
235	if ($tk =~ /^([_a-zA-Z]\w*)?:$/) {
236	    my $pref = $1;
237	    #print ">nprefix: $pref\n" if $pref;
238
239	    my $ns_uri = $self->_uri_ref2;
240	    $ns_uri =~ s/^<(.*)>$/$1/;
241
242	    if ($pref) {
243		$self->{ns}->{$self->{context}}->{$pref} = $ns_uri;
244	    } else {
245		$self->{ns}->{$self->{context}}->{''} = $ns_uri;
246	    }
247	} else {
248	    $self->_do_error(102,$tk);
249	}
250
251    } elsif ($tk eq '@keywords') {
252	my $kw = $self->{reader}->get;
253	while ($kw =~ /,$/) {
254	    $kw =~ s/,$//;
255	    push @{$self->{keywords}}, $kw;
256	    $kw = $self->{reader}->get;
257	}
258
259	if ($kw =~ /^(.+)\.$/) {
260	    push @{$self->{keywords}}, $1;
261	    unshift @{$self->{reader}{tokens}}, '.';
262	} else {
263	    $self->_do_error(117,$tk);
264	}
265	#print ">keywords: ", join('|', @{$self->{keywords}}), "\n";
266
267    } else {
268	$self->_do_error(101,$tk);
269    }
270}
271
272
273sub _uri_ref2 {
274    my ($self) = @_;
275
276    # possible end of statement, a simple . check is done
277    my $next = $self->{reader}->try;
278    if ($next =~ /^(.+)\.$/) {
279	$self->{reader}->{tokens}->[0] = '.';
280	unshift @{$self->{reader}->{tokens}}, $1;
281    }
282
283    my $tk = $self->{reader}->get;
284    #print ">uri_ref2: $tk\n";
285
286    if ($tk =~ /^<[^>]*>$/) {
287	#print ">URI\n";
288	return $tk;
289
290    } elsif ($tk =~ /^([_a-zA-Z]\w*)?:[a-zA-Z]\w*$/) {
291	#print ">qname ($1:)\n" if $1;
292
293	my $pref = '';
294	$pref = $1 if $1;
295	if ($pref eq '_') { # workaround to parse N-Triples
296	    $self->{ns}->{$self->{context}}->{_} = $self->{ansuri}
297		unless $self->{ns}->{$self->{context}}->{_};
298	}
299
300	# Identifier demunging
301	$tk = _unesc_qname($tk) if $tk =~ /_/;
302	return $tk;
303
304    } else {
305	$self->_do_error(103,$tk);
306    }
307}
308
309
310sub _property_list {
311    my ($self, $properties) = @_;
312    my $next = $self->_eat_EOLs;
313    #print ">property list: $next\n";
314
315    $next = $self->_check_inline_comment($next);
316
317    if ($next =~ /^:-/) {
318	#print ">anonnode\n";
319	# TBD
320	$self->_do_error(199, $next);
321
322    } elsif ($next =~ /^\./) {
323	#print ">void prop_list\n";
324	# TBD
325
326    } else {
327	#print ">prop_list with verb\n";
328	my $property = $self->_verb;
329	#print ">property is back: $property\n";
330
331	my $objects = [];
332	$self->_object_list($objects);
333	unshift @$objects, $property;
334	unshift @$objects, 'i' if ($next eq 'is' or $next eq '<-');
335	#print ">inverse mode\n" if ($next eq 'is' or $next eq '<-');
336	push @$properties, $objects;
337    }
338    # next step
339    $next = $self->_eat_EOLs;
340    if ($next eq ';') {
341	$self->{reader}->get;
342	$self->_property_list($properties);
343    }
344}
345
346
347sub _verb {
348    my ($self) = @_;
349    my $next = $self->{reader}->try;
350    #print ">verb: $next\n";
351
352    if ($next eq 'has') {
353	$self->{reader}->get;
354	return $self->_node;
355
356    } elsif ($next eq '>-') {
357	$self->{reader}->get;
358	my $node = $self->_node;
359	my $tk = $self->{reader}->get;
360	$self->_do_error(104,$tk) unless $tk eq '->';
361	return $node;
362
363    } elsif ($next eq 'is') {
364	$self->{reader}->get;
365	my $node = $self->_node;
366	my $tk = $self->{reader}->get;
367	$self->_do_error(109,$tk) unless $tk eq 'of';
368	return $node;
369
370    } elsif ($next eq '<-') {
371 	$self->{reader}->get;
372 	my $node = $self->_node;
373 	my $tk = $self->{reader}->get;
374 	$self->_do_error(110,$tk) unless $tk eq '-<';
375 	return $node;
376
377    } elsif ($next eq 'a') {
378	$self->{reader}->get;
379	return $self->_built_in_verb('rdf','type');
380#	return '<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>'
381
382    } elsif ($next =~ /^=(.*)/) {
383	$self->{reader}->get;
384	unshift @{$self->{reader}->{tokens}}, $1 if $1;
385	return $self->_built_in_verb('daml','equivalentTo');
386#	return '<http://www.daml.org/2001/03/daml+oil#equivalentTo>';
387
388    } else {
389	#print ">property: $next\n";
390	return $self->_node;
391    }
392}
393
394
395sub _object_list {
396    my ($self, $objects) = @_;
397    my $next = $self->_eat_EOLs;
398    #print ">object list: $next\n";
399
400    $next = $self->_check_inline_comment($next);
401
402    # possible end of entity, check for sticked next char is done
403    while ($next =~ /^([^"]+)([,;\.\}\]\)])$/) {
404	$self->{reader}->{tokens}->[0] = $2;
405	unshift @{$self->{reader}->{tokens}}, $1;
406	$next = $1;
407    }
408
409    my $obj = $self->_object;
410    #print ">object is back: $obj\n";
411    push @$objects, $obj;
412
413    # next step
414    $next = $self->_eat_EOLs;
415    if ($next eq ',') {
416	$self->{reader}->get;
417	$self->_object_list($objects);
418    }
419}
420
421
422sub _object {
423    my ($self) = @_;
424    my $next = $self->_eat_EOLs;
425    #print ">object: $next:\n";
426
427    if ($next =~ /^("(?:\\"|[^\"])*")([\.;,\]\}\)])*$/) {
428	#print ">complete string1: $next\n";
429	my $tk = $self->{reader}->get;
430	unshift @{$self->{reader}->{tokens}}, $2 if $2;
431	return $self->_unesc_string($1);
432
433    } else {
434	#print ">object is node: $next\n";
435	$self->_node;
436    }
437}
438
439
440sub _anonymous_node {
441    my ($self) = @_;
442    my $next = $self->{reader}->try;
443    $next =~ /^([\[\{\(])(.*)$/;
444    #print ">anonnode1: $1\n";
445    #print ">anonnode2: $2\n";
446
447    $self->{reader}->get;
448    unshift @{$self->{reader}->{tokens}}, $2 if $2;
449
450    if ($1 eq '[') {
451	#print ">anonnode: []\n";
452	my $genid = "<$self->{ansuri}g_$self->{gid}>";
453	$self->{gid}++;
454
455	$next = $self->_eat_EOLs;
456	if ($next =~ /^\](.)*$/) {
457	    $self->_exist_quantif($genid);
458	} else {
459	    $self->_exist_quantif($genid);
460	    $self->_statement($genid);
461	}
462
463	# next step
464	$next = $self->_eat_EOLs;
465	my $tk = $self->{reader}->get;
466	if ($tk =~ /^\](.+)$/) {
467	    unshift @{$self->{reader}->{tokens}}, $1;
468	} elsif ($tk ne ']') {
469	    $self->_do_error(107, $tk);
470	}
471	return $genid;
472
473    } elsif ($1 eq '{') {
474	#print ">anonnode: {}\n";
475	my $genid = "<$self->{ansuri}c_$self->{cid}>";
476	$self->{cid}++;
477
478	# ns mapping is passed to inner context
479	$self->{ns}->{$genid} = {};
480	foreach (keys %{$self->{ns}->{$self->{context}}}) {
481	    $self->{ns}->{$genid}->{$_} =
482	      $self->{ns}->{$self->{context}}->{$_};
483	    #print ">prefix '$_' passed to inner context\n";
484	}
485
486	my $parent_context = $self->{context};
487	$self->{context} = $genid;
488	$self->_exist_quantif($genid); # quantifying the new context
489	$self->_statement_list;        # parsing nested statements
490	$self->{context} = $parent_context;
491
492	# next step
493	$self->_eat_EOLs;
494 	my $tk = $self->{reader}->get;
495	#print ">next token: $tk\n";
496	if ($tk =~ /^\}([,;\.\]\}\)])?$/) {
497	    unshift @{$self->{reader}->{tokens}}, $1 if $1;
498	} else {
499	    $self->_do_error(108, $tk);
500	}
501	return $genid;
502
503    } else {
504	#print ">anonnode: ()\n";
505	my $next = $self->_eat_EOLs;
506
507#	if ($next =~ /^\)([,;\.\]\}\)])*$/) {
508	if ($next =~ /^\)(.*)$/) {
509	    #print ">void ()\n";
510	    $self->{reader}->get;
511	    unshift @{$self->{reader}->{tokens}}, $1 if $1;
512	    return $self->_built_in_verb('daml','nil');
513
514	} else {
515
516	    #print ">anonnode () starts: $next\n";
517	    my @nodes = ();
518 	    until ($next =~ /^.*\)[,;\.\]\}\)]*$/) {
519		push @nodes, $self->_object;
520 		$next = $self->_eat_EOLs;
521 	    }
522	    if ($next =~ /^([^)]*)\)([,;\.\]\}\)]*)$/) {
523		$self->{reader}->get;
524		unshift @{$self->{reader}->{tokens}}, $2 if $2;
525		unshift @{$self->{reader}->{tokens}}, ')';
526		if ($1) {
527		    unshift @{$self->{reader}->{tokens}}, $1;
528		    push @nodes, $self->_object;
529		}
530		$self->{reader}->get;
531	    }
532	    my $pref = $self->_built_in_verb('daml','');
533
534	    my $i = 0;
535	    my @expnl = (); # expanded node list
536	    foreach (@nodes) {
537		$i++;
538		push @expnl, '[';
539		push @expnl, $pref . 'first';
540		push @expnl, $_;
541		push @expnl, ';';
542		push @expnl, $pref . 'rest';
543		push @expnl, $pref . 'nil'
544		  if $i == scalar @nodes;
545	    }
546	    for (my $j = 0; $j < $i; $j++) {push @expnl, ']'}
547	    unshift @{$self->{reader}->{tokens}}, @expnl;
548	    my $exp = join(' ', @expnl);
549	    #print ">expanded: $exp\n";
550	    my $genid = $self->_anonymous_node;
551	    return $genid;
552	}
553    }
554}
555
556########################################
557# utils
558
559sub _exist_quantif {
560    my ($self, $anode) = @_;
561
562    if ($self->{quantif}) {
563	my $qname = $self->_built_in_verb('log','forSome');
564	#print ">existential quantification: $anode\n";
565	#print ">CONTEXT: $self->{context}\n";
566	#print ">SUBJECT: $self->{context}\n";
567	#print ">PROPERTY: $qname";
568	#print ">-$anode\n";
569	$self->_process_statement($self->{context},
570		[[$qname, $anode]]);
571    }
572}
573
574
575sub _eat_EOLs {
576    my ($self) = @_;
577
578    my $next = $self->{reader}->try;
579    while ($next eq ' EOL ') {
580	$self->{reader}->get;
581	$next = $self->{reader}->try;
582    }
583    return $next;
584}
585
586
587# comment inside a list
588sub _check_inline_comment {
589    my ($self, $next) = @_;
590
591    if ($next =~ /^#/) {
592	$self->_space;
593	$next = $self->_eat_EOLs;
594    }
595    return $next;
596}
597
598
599sub _built_in_verb {
600    my ($self, $key, $verb) = @_;
601
602    # resolves possible NS conflicts
603    my $i = 1;
604    while ($self->{ns}->{$self->{context}}->{$self->{hardns}->{$key}->[0]} and
605	   $self->{ns}->{$self->{context}}->{$self->{hardns}->{$key}->[0]} ne
606	   $self->{hardns}->{$key}->[1]) {
607
608	$self->{hardns}->{$key}->[0] = "$key$i";
609	$i++;
610    }
611    # adds prefix-NS binding
612    $self->{ns}->{$self->{context}}->{$self->{hardns}->{$key}->[0]} =
613      $self->{hardns}->{$key}->[1];
614
615    return "$self->{hardns}->{$key}->[0]:$verb";
616}
617
618
619sub _unesc_qname {
620    my $qname = shift;
621
622    #print ">escaped qname: $qname\n";
623    my $i = 0;
624    my @unesc = ();
625    while ($qname =~ /(__+)/) {
626	my $res = substr(sprintf("%b", length($1) + 1), 1);
627	$res =~ s/1/-/g;
628	$res =~ s/0/_/g;
629	$qname =~ s/__+/<$i>/;
630	push @unesc, $res;
631	$i++;
632    }
633    for ($i=0; $i<@unesc; $i++) { $qname =~ s/<$i>/$unesc[$i]/; }
634    #print ">unescaped qname: $qname\n";
635    return $qname;
636}
637
638
639sub _unesc_string {
640    my ($self, $str) = @_;
641
642    $str =~ s/\\\n//go;
643    $str =~ s/\\\\/\\/go;
644    $str =~ s/\\'/'/go;
645    $str =~ s/\\"/"/go;
646    $str =~ s/\\n/\n/go;
647    $str =~ s/\\r/\r/go;
648    $str =~ s/\\t/\t/go;
649    $str =~ s/\\u([\da-fA-F]{4})/pack('U',hex($1))/ge;
650    $str =~ s/\\U00([\da-fA-F]{6})/pack('U',hex($1))/ge;
651    $str =~ s/\\([\da-fA-F]{3})/pack('C',oct($1))/ge; #deprecated
652    $str =~ s/\\x([\da-fA-F]{2})/pack('C',hex($1))/ge; #deprecated
653
654    return $str;
655}
656
657########################################
658
659sub _do_error {
660    my ($self, $n, $tk) = @_;
661
662    my %msg = (
663	1   => 'file not specified',
664	2   => 'file not found',
665	3   => 'string not specified',
666	4   => 'invalid parameter of quantification method (0|1)',
667
668	101 => 'bind directive is obsolete, use @prefix instead',
669	102 => 'invalid namespace prefix',
670	103 => 'invalid URI reference (uri_ref2)',
671	104 => 'end of verb (->) expected',
672	105 => 'invalid characters in string1',
673	106 => 'namespace prefix not bound',
674	107 => 'invalid end of anonnode, ] expected',
675	108 => 'invalid end of anonnode, } expected',
676	109 => 'end of verb (of) expected',
677	110 => 'end of verb (-<) expected',
678	111 => 'string1 ("...") is not terminated',
679	112 => 'invalid characters in string2',
680	113 => 'string2 ("""...""")is not terminated',
681	114 => 'string1 ("...") can\'t include newlines',
682	115 => 'end of statement expected',
683	116 => 'invalid node',
684	117 => 'last keyword expected',
685	199 => ':- token not supported yet',
686
687	201 => '[Triples] attempt to add invalid node',
688	202 => '[Triples] literal not allowed as subject or predicate',
689
690	#301 => '[SAX] systemID source not implemented',
691	302 => '[SAX] characterStream source not implemented',
692
693	401 => '[XML] unable to convert URI predicate to QName',
694	402 => '[XML] subject not recognized - internal error',
695
696	501 => '[RDFCore] literal not allowed as subject',
697	502 => '[RDFCore] valid storage not specified',
698	503 => '[RDFStore] literal not allowed as subject',
699	);
700
701    my $msg = "[Error $n]";
702    $msg .= " line $self->{reader}->{ln}, token" if $n > 100;
703    $msg .= " \"$tk\"\n";
704    $msg .= "$msg{$n}!\n";
705    croak $msg;
706}
707
708
7091;
710
711
712
713
714
715
716
717
718