1package RDF::Notation3; 2 3 4use strict; 5#use warnings; 6use vars qw($VERSION); 7use File::Spec::Functions (); 8use Carp; 9use RDF::Notation3::ReaderFile; 10use RDF::Notation3::ReaderString; 11 12$VERSION = '0.91'; 13 14############################################################ 15 16sub new { 17 my ($class) = @_; 18 19 my $self = { 20 ansuri => '#', 21 quantif => 1, 22 nIDpref => '_:a', # this fits to RDF:Core prefix for nodeID 23 }; 24 25 bless $self, $class; 26 return $self; 27} 28 29 30sub parse_file { 31 my ($self, $path) = @_; 32 33 $self->_define; 34 35 my $fh; 36 if (ref $path eq 'IO::File') { 37 $fh = $path; 38 39 } else { 40 open(FILE, "$path") or $self->_do_error(2, $path); 41 $fh = *FILE; 42 } 43 44 my $t = new RDF::Notation3::ReaderFile($fh); 45 $self->{reader} = $t; 46 47 $self->_document; 48 49 close (FILE); 50} 51 52 53sub parse_string { 54 my ($self, $str) = @_; 55 56 $self->_define; 57 58 my $t = new RDF::Notation3::ReaderString($str); 59 $self->{reader} = $t; 60 61 $self->_document; 62} 63 64 65sub anonymous_ns_uri { 66 my ($self, $uri) = @_; 67 if (@_ > 1) { 68 $self->{ansuri} = $uri; 69 } else { 70 return $self->{ansuri}; 71 } 72} 73 74sub quantification { 75 my ($self, $val) = @_; 76 if (@_ > 1) { 77 $self->_do_error(4, $val) 78 unless $val == 1 || $val == 0; 79 $self->{quantif} = $val; 80 } else { 81 return $self->{quantif}; 82 } 83} 84 85 86sub _define { 87 my ($self) = @_; 88 89 $self->{ns} = {}; 90 $self->{context} = '<>'; 91 $self->{gid} = 1; 92 $self->{cid} = 1; 93 $self->{hardns} = { 94 rdf => ['rdf','http://www.w3.org/1999/02/22-rdf-syntax-ns#'], 95 daml => ['daml','http://www.daml.org/2001/03/daml+oil#'], 96 log => ['log','http://www.w3.org/2000/10/swap/log.n3#'], 97 }; 98 $self->{keywords} = []; 99} 100 101 102sub _document { 103 my ($self) = @_; 104 my $next = $self->{reader}->try; 105 #print ">doc starts: $next\n"; 106 if ($next ne ' EOF ') { 107 $self->_statement_list; 108 } 109 #print ">end\n"; 110} 111 112 113sub _statement_list { 114 my ($self) = @_; 115 my $next = $self->_eat_EOLs; 116 #print ">statement list: $next\n"; 117 118 while ($next ne ' EOF ') { 119 if ($next =~ /^(?:|#.*)$/) { 120 $self->_space; 121 122 } elsif ($next =~ /^}/) { 123 #print ">end of nested statement list: $next\n"; 124 last; 125 126 } else { 127 $self->_statement; 128 } 129 $next = $self->_eat_EOLs; 130 } 131 #print ">end of statement list: $next\n"; 132} 133 134 135sub _space { 136 my ($self) = @_; 137 #print ">space: "; 138 139 my $tk = $self->{reader}->get; 140 # comment or empty string 141 while ($tk ne ' EOL ') { 142 #print ">$tk "; 143 $tk = $self->{reader}->get; 144 } 145 #print ">\n"; 146} 147 148 149sub _statement { 150 my ($self, $subject) = @_; 151 my $next = $self->{reader}->try; 152 #print ">statement starts: $next\n"; 153 154 if ($next =~ /^\@prefix|\@keywords|bind$/) { 155 $self->_directive; 156 157 } else { 158 $subject = $self->_node unless $subject; 159 #print ">subject: $subject\n"; 160 161 my $properties = []; 162 $self->_property_list($properties); 163 164 #print ">CONTEXT: $self->{context}\n"; 165 #print ">SUBJECT: $subject\n"; 166 #print ">PROPERTY: void\n" unless @$properties; 167 #foreach (@$properties) { # comment/uncomment by hand 168 #print ">PROPERTY: ", join('-', @$_), "\n"; 169 #} 170 171 $self->_process_statement($subject, $properties) if @$properties; 172 } 173 # next step 174 $next = $self->_eat_EOLs; 175 if ($next eq '.') { 176 $self->{reader}->get; 177 } elsif ($next =~ /^\.(.*)$/) { 178 $self->{reader}->get; 179 unshift @{$self->{reader}->{tokens}}, $1; 180 } elsif ($next =~ /^(?:\]|\)|\})/) { 181 } else { 182 $self->_do_error(115,$next); 183 } 184} 185 186 187sub _node { 188 my ($self) = @_; 189 my $next = $self->_eat_EOLs; 190 #print ">node: $next\n"; 191 192 if ($next =~ /^[\[\{\(]/) { 193 #print ">node is anonnode\n"; 194 return $self->_anonymous_node; 195 196 } elsif ($next eq 'this') { 197 #print ">this\n"; 198 $self->{reader}->get; 199 return "$self->{context}"; 200 201 } elsif ($next =~ /^(<[^>]*>|^(?:[_a-zA-Z]\w*)?:[_a-zA-Z][_\w]*)(.*)$/) { 202 #print ">node is uri_ref2: $next\n"; 203 204 if ($2) { 205 $self->{reader}->get; 206 unshift @{$self->{reader}->{tokens}}, $2; 207 unshift @{$self->{reader}->{tokens}}, $1; 208 #print ">cleaned uri_ref2: $1\n"; 209 } 210 return $self->_uri_ref2; 211 212 } elsif ($self->{keywords}[0] && ($next =~ /^(^[_a-zA-Z][_\w]*)(.*)$/)) { 213 #print ">node is uri_ref_kw: $next\n"; 214 215 $self->{reader}->get; 216 unshift @{$self->{reader}->{tokens}}, $2 if $2; 217 unshift @{$self->{reader}->{tokens}}, ':' . $1; 218 #print ">cleaned uri_ref2: $1\n"; 219 return $self->_uri_ref2; 220 221 } else { 222 #print ">unknown node: $next\n"; 223 $self->_do_error(116,$next); 224 } 225} 226 227 228sub _directive { 229 my ($self) = @_; 230 my $tk = $self->{reader}->get; 231 #print ">directive: $tk\n"; 232 233 if ($tk eq '@prefix') { 234 my $tk = $self->{reader}->get; 235 if ($tk =~ /^([_a-zA-Z]\w*)?:$/) { 236 my $pref = $1; 237 #print ">nprefix: $pref\n" if $pref; 238 239 my $ns_uri = $self->_uri_ref2; 240 $ns_uri =~ s/^<(.*)>$/$1/; 241 242 if ($pref) { 243 $self->{ns}->{$self->{context}}->{$pref} = $ns_uri; 244 } else { 245 $self->{ns}->{$self->{context}}->{''} = $ns_uri; 246 } 247 } else { 248 $self->_do_error(102,$tk); 249 } 250 251 } elsif ($tk eq '@keywords') { 252 my $kw = $self->{reader}->get; 253 while ($kw =~ /,$/) { 254 $kw =~ s/,$//; 255 push @{$self->{keywords}}, $kw; 256 $kw = $self->{reader}->get; 257 } 258 259 if ($kw =~ /^(.+)\.$/) { 260 push @{$self->{keywords}}, $1; 261 unshift @{$self->{reader}{tokens}}, '.'; 262 } else { 263 $self->_do_error(117,$tk); 264 } 265 #print ">keywords: ", join('|', @{$self->{keywords}}), "\n"; 266 267 } else { 268 $self->_do_error(101,$tk); 269 } 270} 271 272 273sub _uri_ref2 { 274 my ($self) = @_; 275 276 # possible end of statement, a simple . check is done 277 my $next = $self->{reader}->try; 278 if ($next =~ /^(.+)\.$/) { 279 $self->{reader}->{tokens}->[0] = '.'; 280 unshift @{$self->{reader}->{tokens}}, $1; 281 } 282 283 my $tk = $self->{reader}->get; 284 #print ">uri_ref2: $tk\n"; 285 286 if ($tk =~ /^<[^>]*>$/) { 287 #print ">URI\n"; 288 return $tk; 289 290 } elsif ($tk =~ /^([_a-zA-Z]\w*)?:[a-zA-Z]\w*$/) { 291 #print ">qname ($1:)\n" if $1; 292 293 my $pref = ''; 294 $pref = $1 if $1; 295 if ($pref eq '_') { # workaround to parse N-Triples 296 $self->{ns}->{$self->{context}}->{_} = $self->{ansuri} 297 unless $self->{ns}->{$self->{context}}->{_}; 298 } 299 300 # Identifier demunging 301 $tk = _unesc_qname($tk) if $tk =~ /_/; 302 return $tk; 303 304 } else { 305 $self->_do_error(103,$tk); 306 } 307} 308 309 310sub _property_list { 311 my ($self, $properties) = @_; 312 my $next = $self->_eat_EOLs; 313 #print ">property list: $next\n"; 314 315 $next = $self->_check_inline_comment($next); 316 317 if ($next =~ /^:-/) { 318 #print ">anonnode\n"; 319 # TBD 320 $self->_do_error(199, $next); 321 322 } elsif ($next =~ /^\./) { 323 #print ">void prop_list\n"; 324 # TBD 325 326 } else { 327 #print ">prop_list with verb\n"; 328 my $property = $self->_verb; 329 #print ">property is back: $property\n"; 330 331 my $objects = []; 332 $self->_object_list($objects); 333 unshift @$objects, $property; 334 unshift @$objects, 'i' if ($next eq 'is' or $next eq '<-'); 335 #print ">inverse mode\n" if ($next eq 'is' or $next eq '<-'); 336 push @$properties, $objects; 337 } 338 # next step 339 $next = $self->_eat_EOLs; 340 if ($next eq ';') { 341 $self->{reader}->get; 342 $self->_property_list($properties); 343 } 344} 345 346 347sub _verb { 348 my ($self) = @_; 349 my $next = $self->{reader}->try; 350 #print ">verb: $next\n"; 351 352 if ($next eq 'has') { 353 $self->{reader}->get; 354 return $self->_node; 355 356 } elsif ($next eq '>-') { 357 $self->{reader}->get; 358 my $node = $self->_node; 359 my $tk = $self->{reader}->get; 360 $self->_do_error(104,$tk) unless $tk eq '->'; 361 return $node; 362 363 } elsif ($next eq 'is') { 364 $self->{reader}->get; 365 my $node = $self->_node; 366 my $tk = $self->{reader}->get; 367 $self->_do_error(109,$tk) unless $tk eq 'of'; 368 return $node; 369 370 } elsif ($next eq '<-') { 371 $self->{reader}->get; 372 my $node = $self->_node; 373 my $tk = $self->{reader}->get; 374 $self->_do_error(110,$tk) unless $tk eq '-<'; 375 return $node; 376 377 } elsif ($next eq 'a') { 378 $self->{reader}->get; 379 return $self->_built_in_verb('rdf','type'); 380# return '<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>' 381 382 } elsif ($next =~ /^=(.*)/) { 383 $self->{reader}->get; 384 unshift @{$self->{reader}->{tokens}}, $1 if $1; 385 return $self->_built_in_verb('daml','equivalentTo'); 386# return '<http://www.daml.org/2001/03/daml+oil#equivalentTo>'; 387 388 } else { 389 #print ">property: $next\n"; 390 return $self->_node; 391 } 392} 393 394 395sub _object_list { 396 my ($self, $objects) = @_; 397 my $next = $self->_eat_EOLs; 398 #print ">object list: $next\n"; 399 400 $next = $self->_check_inline_comment($next); 401 402 # possible end of entity, check for sticked next char is done 403 while ($next =~ /^([^"]+)([,;\.\}\]\)])$/) { 404 $self->{reader}->{tokens}->[0] = $2; 405 unshift @{$self->{reader}->{tokens}}, $1; 406 $next = $1; 407 } 408 409 my $obj = $self->_object; 410 #print ">object is back: $obj\n"; 411 push @$objects, $obj; 412 413 # next step 414 $next = $self->_eat_EOLs; 415 if ($next eq ',') { 416 $self->{reader}->get; 417 $self->_object_list($objects); 418 } 419} 420 421 422sub _object { 423 my ($self) = @_; 424 my $next = $self->_eat_EOLs; 425 #print ">object: $next:\n"; 426 427 if ($next =~ /^("(?:\\"|[^\"])*")([\.;,\]\}\)])*$/) { 428 #print ">complete string1: $next\n"; 429 my $tk = $self->{reader}->get; 430 unshift @{$self->{reader}->{tokens}}, $2 if $2; 431 return $self->_unesc_string($1); 432 433 } else { 434 #print ">object is node: $next\n"; 435 $self->_node; 436 } 437} 438 439 440sub _anonymous_node { 441 my ($self) = @_; 442 my $next = $self->{reader}->try; 443 $next =~ /^([\[\{\(])(.*)$/; 444 #print ">anonnode1: $1\n"; 445 #print ">anonnode2: $2\n"; 446 447 $self->{reader}->get; 448 unshift @{$self->{reader}->{tokens}}, $2 if $2; 449 450 if ($1 eq '[') { 451 #print ">anonnode: []\n"; 452 my $genid = "<$self->{ansuri}g_$self->{gid}>"; 453 $self->{gid}++; 454 455 $next = $self->_eat_EOLs; 456 if ($next =~ /^\](.)*$/) { 457 $self->_exist_quantif($genid); 458 } else { 459 $self->_exist_quantif($genid); 460 $self->_statement($genid); 461 } 462 463 # next step 464 $next = $self->_eat_EOLs; 465 my $tk = $self->{reader}->get; 466 if ($tk =~ /^\](.+)$/) { 467 unshift @{$self->{reader}->{tokens}}, $1; 468 } elsif ($tk ne ']') { 469 $self->_do_error(107, $tk); 470 } 471 return $genid; 472 473 } elsif ($1 eq '{') { 474 #print ">anonnode: {}\n"; 475 my $genid = "<$self->{ansuri}c_$self->{cid}>"; 476 $self->{cid}++; 477 478 # ns mapping is passed to inner context 479 $self->{ns}->{$genid} = {}; 480 foreach (keys %{$self->{ns}->{$self->{context}}}) { 481 $self->{ns}->{$genid}->{$_} = 482 $self->{ns}->{$self->{context}}->{$_}; 483 #print ">prefix '$_' passed to inner context\n"; 484 } 485 486 my $parent_context = $self->{context}; 487 $self->{context} = $genid; 488 $self->_exist_quantif($genid); # quantifying the new context 489 $self->_statement_list; # parsing nested statements 490 $self->{context} = $parent_context; 491 492 # next step 493 $self->_eat_EOLs; 494 my $tk = $self->{reader}->get; 495 #print ">next token: $tk\n"; 496 if ($tk =~ /^\}([,;\.\]\}\)])?$/) { 497 unshift @{$self->{reader}->{tokens}}, $1 if $1; 498 } else { 499 $self->_do_error(108, $tk); 500 } 501 return $genid; 502 503 } else { 504 #print ">anonnode: ()\n"; 505 my $next = $self->_eat_EOLs; 506 507# if ($next =~ /^\)([,;\.\]\}\)])*$/) { 508 if ($next =~ /^\)(.*)$/) { 509 #print ">void ()\n"; 510 $self->{reader}->get; 511 unshift @{$self->{reader}->{tokens}}, $1 if $1; 512 return $self->_built_in_verb('daml','nil'); 513 514 } else { 515 516 #print ">anonnode () starts: $next\n"; 517 my @nodes = (); 518 until ($next =~ /^.*\)[,;\.\]\}\)]*$/) { 519 push @nodes, $self->_object; 520 $next = $self->_eat_EOLs; 521 } 522 if ($next =~ /^([^)]*)\)([,;\.\]\}\)]*)$/) { 523 $self->{reader}->get; 524 unshift @{$self->{reader}->{tokens}}, $2 if $2; 525 unshift @{$self->{reader}->{tokens}}, ')'; 526 if ($1) { 527 unshift @{$self->{reader}->{tokens}}, $1; 528 push @nodes, $self->_object; 529 } 530 $self->{reader}->get; 531 } 532 my $pref = $self->_built_in_verb('daml',''); 533 534 my $i = 0; 535 my @expnl = (); # expanded node list 536 foreach (@nodes) { 537 $i++; 538 push @expnl, '['; 539 push @expnl, $pref . 'first'; 540 push @expnl, $_; 541 push @expnl, ';'; 542 push @expnl, $pref . 'rest'; 543 push @expnl, $pref . 'nil' 544 if $i == scalar @nodes; 545 } 546 for (my $j = 0; $j < $i; $j++) {push @expnl, ']'} 547 unshift @{$self->{reader}->{tokens}}, @expnl; 548 my $exp = join(' ', @expnl); 549 #print ">expanded: $exp\n"; 550 my $genid = $self->_anonymous_node; 551 return $genid; 552 } 553 } 554} 555 556######################################## 557# utils 558 559sub _exist_quantif { 560 my ($self, $anode) = @_; 561 562 if ($self->{quantif}) { 563 my $qname = $self->_built_in_verb('log','forSome'); 564 #print ">existential quantification: $anode\n"; 565 #print ">CONTEXT: $self->{context}\n"; 566 #print ">SUBJECT: $self->{context}\n"; 567 #print ">PROPERTY: $qname"; 568 #print ">-$anode\n"; 569 $self->_process_statement($self->{context}, 570 [[$qname, $anode]]); 571 } 572} 573 574 575sub _eat_EOLs { 576 my ($self) = @_; 577 578 my $next = $self->{reader}->try; 579 while ($next eq ' EOL ') { 580 $self->{reader}->get; 581 $next = $self->{reader}->try; 582 } 583 return $next; 584} 585 586 587# comment inside a list 588sub _check_inline_comment { 589 my ($self, $next) = @_; 590 591 if ($next =~ /^#/) { 592 $self->_space; 593 $next = $self->_eat_EOLs; 594 } 595 return $next; 596} 597 598 599sub _built_in_verb { 600 my ($self, $key, $verb) = @_; 601 602 # resolves possible NS conflicts 603 my $i = 1; 604 while ($self->{ns}->{$self->{context}}->{$self->{hardns}->{$key}->[0]} and 605 $self->{ns}->{$self->{context}}->{$self->{hardns}->{$key}->[0]} ne 606 $self->{hardns}->{$key}->[1]) { 607 608 $self->{hardns}->{$key}->[0] = "$key$i"; 609 $i++; 610 } 611 # adds prefix-NS binding 612 $self->{ns}->{$self->{context}}->{$self->{hardns}->{$key}->[0]} = 613 $self->{hardns}->{$key}->[1]; 614 615 return "$self->{hardns}->{$key}->[0]:$verb"; 616} 617 618 619sub _unesc_qname { 620 my $qname = shift; 621 622 #print ">escaped qname: $qname\n"; 623 my $i = 0; 624 my @unesc = (); 625 while ($qname =~ /(__+)/) { 626 my $res = substr(sprintf("%b", length($1) + 1), 1); 627 $res =~ s/1/-/g; 628 $res =~ s/0/_/g; 629 $qname =~ s/__+/<$i>/; 630 push @unesc, $res; 631 $i++; 632 } 633 for ($i=0; $i<@unesc; $i++) { $qname =~ s/<$i>/$unesc[$i]/; } 634 #print ">unescaped qname: $qname\n"; 635 return $qname; 636} 637 638 639sub _unesc_string { 640 my ($self, $str) = @_; 641 642 $str =~ s/\\\n//go; 643 $str =~ s/\\\\/\\/go; 644 $str =~ s/\\'/'/go; 645 $str =~ s/\\"/"/go; 646 $str =~ s/\\n/\n/go; 647 $str =~ s/\\r/\r/go; 648 $str =~ s/\\t/\t/go; 649 $str =~ s/\\u([\da-fA-F]{4})/pack('U',hex($1))/ge; 650 $str =~ s/\\U00([\da-fA-F]{6})/pack('U',hex($1))/ge; 651 $str =~ s/\\([\da-fA-F]{3})/pack('C',oct($1))/ge; #deprecated 652 $str =~ s/\\x([\da-fA-F]{2})/pack('C',hex($1))/ge; #deprecated 653 654 return $str; 655} 656 657######################################## 658 659sub _do_error { 660 my ($self, $n, $tk) = @_; 661 662 my %msg = ( 663 1 => 'file not specified', 664 2 => 'file not found', 665 3 => 'string not specified', 666 4 => 'invalid parameter of quantification method (0|1)', 667 668 101 => 'bind directive is obsolete, use @prefix instead', 669 102 => 'invalid namespace prefix', 670 103 => 'invalid URI reference (uri_ref2)', 671 104 => 'end of verb (->) expected', 672 105 => 'invalid characters in string1', 673 106 => 'namespace prefix not bound', 674 107 => 'invalid end of anonnode, ] expected', 675 108 => 'invalid end of anonnode, } expected', 676 109 => 'end of verb (of) expected', 677 110 => 'end of verb (-<) expected', 678 111 => 'string1 ("...") is not terminated', 679 112 => 'invalid characters in string2', 680 113 => 'string2 ("""...""")is not terminated', 681 114 => 'string1 ("...") can\'t include newlines', 682 115 => 'end of statement expected', 683 116 => 'invalid node', 684 117 => 'last keyword expected', 685 199 => ':- token not supported yet', 686 687 201 => '[Triples] attempt to add invalid node', 688 202 => '[Triples] literal not allowed as subject or predicate', 689 690 #301 => '[SAX] systemID source not implemented', 691 302 => '[SAX] characterStream source not implemented', 692 693 401 => '[XML] unable to convert URI predicate to QName', 694 402 => '[XML] subject not recognized - internal error', 695 696 501 => '[RDFCore] literal not allowed as subject', 697 502 => '[RDFCore] valid storage not specified', 698 503 => '[RDFStore] literal not allowed as subject', 699 ); 700 701 my $msg = "[Error $n]"; 702 $msg .= " line $self->{reader}->{ln}, token" if $n > 100; 703 $msg .= " \"$tk\"\n"; 704 $msg .= "$msg{$n}!\n"; 705 croak $msg; 706} 707 708 7091; 710 711 712 713 714 715 716 717 718