1# RDF::Trine::Serializer::Turtle 2# ----------------------------------------------------------------------------- 3 4=head1 NAME 5 6RDF::Trine::Serializer::Turtle - Turtle Serializer 7 8=head1 VERSION 9 10This document describes RDF::Trine::Serializer::Turtle version 1.019 11 12=head1 SYNOPSIS 13 14 use RDF::Trine::Serializer::Turtle; 15 my $serializer = RDF::Trine::Serializer::Turtle->new( namespaces => { ex => 'http://example/' } ); 16 print $serializer->serialize_model_to_string($model); 17 18=head1 DESCRIPTION 19 20The RDF::Trine::Serializer::Turtle class provides an API for serializing RDF 21graphs to the Turtle syntax. XSD numeric types are serialized as bare literals, 22and where possible the more concise syntax is used for rdf:Lists. 23 24=head1 METHODS 25 26Beyond the methods documented below, this class inherits methods from the 27L<RDF::Trine::Serializer> class. 28 29=over 4 30 31=cut 32 33package RDF::Trine::Serializer::Turtle; 34 35use strict; 36use warnings; 37use base qw(RDF::Trine::Serializer); 38 39use URI; 40use Carp; 41use Encode; 42use Data::Dumper; 43use Scalar::Util qw(blessed refaddr reftype); 44 45use RDF::Trine qw(variable iri); 46use RDF::Trine::Node; 47use RDF::Trine::Statement; 48use RDF::Trine::Error qw(:try); 49use RDF::Trine::Namespace qw(rdf); 50 51###################################################################### 52 53our ($VERSION, $debug); 54BEGIN { 55 $debug = 0; 56 $VERSION = '1.019'; 57 $RDF::Trine::Serializer::serializer_names{ 'turtle' } = __PACKAGE__; 58 $RDF::Trine::Serializer::format_uris{ 'http://www.w3.org/ns/formats/Turtle' } = __PACKAGE__; 59 foreach my $type (qw(application/x-turtle application/turtle text/turtle text/rdf+n3)) { 60 $RDF::Trine::Serializer::media_types{ $type } = __PACKAGE__; 61 } 62} 63 64###################################################################### 65 66=item C<< new ( namespaces => \%namespaces, base_uri => $base_uri ) >> 67 68Returns a new Turtle serializer object. 69 70=cut 71 72sub new { 73 my $class = shift; 74 my $ns = {}; 75 my $base_uri; 76 77 if (@_) { 78 if (scalar(@_) == 1 and reftype($_[0]) eq 'HASH') { 79 $ns = shift; 80 } else { 81 my %args = @_; 82 if (exists $args{ base }) { 83 $base_uri = $args{ base }; 84 } 85 if (exists $args{ base_uri }) { 86 $base_uri = $args{ base_uri }; 87 } 88 if (exists $args{ namespaces }) { 89 $ns = $args{ namespaces }; 90 } 91 } 92 } 93 94 my %rev; 95 if (blessed($ns) and $ns->isa('RDF::Trine::NamespaceMap')) { 96 for my $prefix ($ns->list_prefixes) { 97 # way convoluted 98 my $nsuri = $ns->namespace_uri($prefix)->uri->value; 99 $rev{$nsuri} = $prefix; 100 } 101 } 102 else { 103 while (my ($ns, $uri) = each(%{ $ns })) { 104 if (blessed($uri)) { 105 $uri = $uri->uri_value; 106 if (blessed($uri)) { 107 $uri = $uri->uri_value; 108 } 109 } 110 $rev{ $uri } = $ns; 111 } 112 } 113 114 my $self = bless( { 115 ns => \%rev, 116 base_uri => $base_uri, 117 }, $class ); 118 return $self; 119} 120 121=item C<< serialize_model_to_file ( $fh, $model ) >> 122 123Serializes the C<$model> to Turtle, printing the results to the supplied 124filehandle C<<$fh>>. 125 126=cut 127 128sub serialize_model_to_file { 129 my $self = shift; 130 my $fh = shift; 131 my $model = shift; 132 my $sink = RDF::Trine::Serializer::FileSink->new($fh); 133 134 my $st = RDF::Trine::Statement->new( map { variable($_) } qw(s p o) ); 135 my $pat = RDF::Trine::Pattern->new( $st ); 136 my $stream = $model->get_pattern( $pat, undef, orderby => [ qw(s ASC p ASC o ASC) ] ); 137 my $iter = $stream->as_statements( qw(s p o) ); 138 139 $self->serialize_iterator( $sink, $iter, seen => {}, level => 0, tab => "\t", @_, model => $model ); 140 return 1; 141} 142 143=item C<< serialize_model_to_string ( $model ) >> 144 145Serializes the C<$model> to Turtle, returning the result as a string. 146 147=cut 148 149sub serialize_model_to_string { 150 my $self = shift; 151 my $model = shift; 152 my $sink = RDF::Trine::Serializer::StringSink->new(); 153 154 my $st = RDF::Trine::Statement->new( map { variable($_) } qw(s p o) ); 155 my $pat = RDF::Trine::Pattern->new( $st ); 156 my $stream = $model->get_pattern( $pat, undef, orderby => [ qw(s ASC p ASC o ASC) ] ); 157 my $iter = $stream->as_statements( qw(s p o) ); 158 159 $self->serialize_iterator( $sink, $iter, seen => {}, level => 0, tab => "\t", @_, model => $model, string => 1 ); 160 return $sink->string; 161} 162 163=item C<< serialize_iterator_to_file ( $file, $iter ) >> 164 165Serializes the iterator to Turtle, printing the results to the supplied 166filehandle C<<$fh>>. 167 168=cut 169 170sub serialize_iterator_to_file { 171 my $self = shift; 172 my $fh = shift; 173 my $iter = shift; 174 my %args = @_; 175 176 my $sink = RDF::Trine::Serializer::FileSink->new($fh); 177 $self->serialize_iterator( $sink, $iter, %args ); 178 return 1; 179} 180 181=item C<< serialize_iterator ( $sink, $iter ) >> 182 183Serializes the iterator to Turtle, printing the results to the supplied 184sink object. 185 186=cut 187 188sub serialize_iterator { 189 my $self = shift; 190 my $sink = shift; 191 my $iter = shift; 192 my %args = @_; 193 194 my $seen = $args{ seen } || {}; 195 my $level = $args{ level } || 0; 196 my $tab = $args{ tab } || "\t"; 197 my $indent = $tab x $level; 198 199 my %ns = reverse(%{ $self->{ns} }); 200 my @nskeys = sort keys %ns; 201 202 unless ($sink->can('prepend')) { 203 if (@nskeys) { 204 foreach my $ns (sort @nskeys) { 205 my $uri = $ns{ $ns }; 206 $sink->emit("\@prefix $ns: <$uri> .\n"); 207 } 208 $sink->emit("\n"); 209 } 210 } 211 if ($self->{base_uri}) { 212 $sink->emit("\@base <$self->{base_uri}> .\n\n"); 213 } 214 215 my $last_subj; 216 my $last_pred; 217 218 my $open_triple = 0; 219 while (my $st = $iter->next) { 220# warn "------------------\n"; 221# warn $st->as_string . "\n"; 222 my $subj = $st->subject; 223 my $pred = $st->predicate; 224 my $obj = $st->object; 225 226 # we're abusing the seen hash here as the key isn't really a node value, 227 # but since it isn't a valid node string being used it shouldn't collide 228 # with real data. we set this here so that later on when we check for 229 # single-owner bnodes (when attempting to use the [...] concise syntax), 230 # bnodes that have already been serialized as the 'head' of a statement 231 # aren't considered as single-owner. This is because the output string 232 # is acting as a second ownder of the node -- it's already been emitted 233 # as something like '_:foobar', so it can't also be output as '[...]'. 234 $seen->{ ' heads' }{ $subj->as_string }++; 235 236 if (my $model = $args{model}) { 237 if (my $head = $self->_statement_describes_list($model, $st)) { 238 warn "found a rdf:List head " . $head->as_string . " for the subject in statement " . $st->as_string if ($debug); 239 if ($model->count_statements(undef, undef, $head)) { 240 # the rdf:List appears as the object of a statement, and so 241 # will be serialized whenever we get to serializing that 242 # statement 243 warn "next" if ($debug); 244 next; 245 } 246 } 247 } 248 249 if ($seen->{ $subj->as_string }) { 250 warn "next on seen subject " . $st->as_string if ($debug); 251 next; 252 } 253 254 if ($subj->equal( $last_subj )) { 255 # continue an existing subject 256 if ($pred->equal( $last_pred )) { 257 # continue an existing predicate 258 $sink->emit(qq[, ]); 259 $self->_serialize_object_to_file( $sink, $obj, $seen, $level, $tab, %args ); 260 } else { 261 # start a new predicate 262 $sink->emit(qq[ ;\n${indent}$tab]); 263 $self->_turtle( $sink, $pred, 1, $seen, $level, $tab, %args ); 264 $sink->emit(' '); 265 $self->_serialize_object_to_file( $sink, $obj, $seen, $level, $tab, %args ); 266 } 267 } else { 268 # start a new subject 269 if ($open_triple) { 270 $sink->emit(qq[ .\n${indent}]); 271 } 272 $open_triple = 1; 273 $self->_turtle( $sink, $subj, 0, $seen, $level, $tab, %args ); 274 275 warn '-> ' . $pred->as_string if ($debug); 276 $sink->emit(' '); 277 $self->_turtle( $sink, $pred, 1, $seen, $level, $tab, %args ); 278 $sink->emit(' '); 279 $self->_serialize_object_to_file( $sink, $obj, $seen, $level, $tab, %args ); 280 } 281 } continue { 282 if (blessed($last_subj) and not($last_subj->equal($st->subject))) { 283# warn "marking " . $st->subject->as_string . " as seen"; 284 $seen->{ $last_subj->as_string }++; 285 } 286# warn "setting last subject to " . $st->subject->as_string; 287 $last_subj = $st->subject; 288 $last_pred = $st->predicate; 289 } 290 291 if ($open_triple) { 292 $sink->emit(qq[ .\n]); 293 } 294 295 if ($sink->can('prepend')) { 296 my @used_nskeys = keys %{ $self->{used_ns} }; 297 if (@used_nskeys) { 298 my $string = ''; 299 foreach my $ns (sort @used_nskeys) { 300 my $uri = $ns{ $ns }; 301 $string .= "\@prefix $ns: <$uri> .\n"; 302 } 303 $string .= "\n"; 304 $sink->prepend($string); 305 } 306 } 307} 308 309=item C<< serialize_iterator_to_string ( $iter ) >> 310 311Serializes the iterator to Turtle, returning the result as a string. 312 313=cut 314 315sub serialize_iterator_to_string { 316 my $self = shift; 317 my $iter = shift; 318 my $sink = RDF::Trine::Serializer::StringSink->new(); 319 $self->serialize_iterator( $sink, $iter, seen => {}, level => 0, tab => "\t", @_, string => 1 ); 320 return $sink->string; 321} 322 323=item C<< serialize_node ( $node ) >> 324 325Returns a string containing the Turtle serialization of C<< $node >>. 326 327=cut 328 329sub serialize_node { 330 my $self = shift; 331 my $node = shift; 332 return $self->node_as_concise_string( $node ); 333} 334 335sub _serialize_object_to_file { 336 my $self = shift; 337 my $sink = shift; 338 my $subj = shift; 339 my $seen = shift; 340 my $level = shift; 341 my $tab = shift; 342 my %args = @_; 343 my $indent = $tab x $level; 344 345 if (my $model = $args{model}) { 346 if ($subj->isa('RDF::Trine::Node::Blank')) { 347 if ($self->_check_valid_rdf_list( $subj, $model )) { 348# warn "node is a valid rdf:List: " . $subj->as_string . "\n"; 349 return $self->_turtle_rdf_list( $sink, $subj, $model, $seen, $level, $tab, %args ); 350 } else { 351 my $count = $model->count_statements( undef, undef, $subj ); 352 my $rec = $model->count_statements( $subj, undef, $subj ); 353 warn "count=$count, rec=$rec for node " . $subj->as_string if ($debug); 354 if ($count == 1 and $rec == 0) { 355 unless ($seen->{ $subj->as_string }++ or $seen->{ ' heads' }{ $subj->as_string }) { 356 my $pat = RDF::Trine::Pattern->new( RDF::Trine::Statement->new($subj, variable('p'), variable('o')) ); 357 my $stream = $model->get_pattern( $pat, undef, orderby => [ qw(p ASC o ASC) ] ); 358 my $iter = $stream->as_statements( qw(s p o) ); 359 my $last_pred; 360 my $triple_count = 0; 361 $sink->emit("["); 362 while (my $st = $iter->next) { 363 my $pred = $st->predicate; 364 my $obj = $st->object; 365 366 # continue an existing subject 367 if ($pred->equal( $last_pred )) { 368 # continue an existing predicate 369 $sink->emit(qq[, ]); 370 $self->_serialize_object_to_file( $sink, $obj, $seen, $level, $tab, %args ); 371# $self->_turtle( $fh, $obj, 2, $seen, $level, $tab, %args ); 372 } else { 373 # start a new predicate 374 if ($triple_count == 0) { 375 $sink->emit(qq[\n${indent}${tab}${tab}]); 376 } else { 377 $sink->emit(qq[ ;\n${indent}$tab${tab}]); 378 } 379 $self->_turtle( $sink, $pred, 1, $seen, $level, $tab, %args ); 380 $sink->emit(' '); 381 $self->_serialize_object_to_file( $sink, $obj, $seen, $level+1, $tab, %args ); 382 } 383 384 $last_pred = $pred; 385 $triple_count++; 386 } 387 if ($triple_count) { 388 $sink->emit("\n${indent}${tab}"); 389 } 390 $sink->emit("]"); 391 return; 392 } 393 } 394 } 395 } 396 } 397 398 $self->_turtle( $sink, $subj, 2, $seen, $level, $tab, %args ); 399} 400 401sub _statement_describes_list { 402 my $self = shift; 403 my $model = shift; 404 my $st = shift; 405 my $subj = $st->subject; 406 my $pred = $st->predicate; 407 my $obj = $st->object; 408 if ($model->count_statements($subj, $rdf->first) and $model->count_statements($subj, $rdf->rest)) { 409# warn $subj->as_string . " looks like a rdf:List element"; 410 if (my $head = $self->_node_belongs_to_valid_list( $model, $subj )) { 411 return $head; 412 } 413 } 414 415 return; 416} 417 418sub _node_belongs_to_valid_list { 419 my $self = shift; 420 my $model = shift; 421 my $node = shift; 422 while ($model->count_statements( undef, $rdf->rest, $node )) { 423 my $iter = $model->get_statements( undef, $rdf->rest, $node ); 424 my $s = $iter->next; 425 my $ancestor = $s->subject; 426 unless (blessed($ancestor)) { 427# warn "failed to get an expected rdf:List element ancestor"; 428 return 0; 429 } 430 ($node) = $ancestor; 431# warn "stepping back to rdf:List element ancestor " . $node->as_string; 432 } 433 if ($self->_check_valid_rdf_list( $node, $model )) { 434 return $node; 435 } else { 436 return; 437 } 438} 439 440sub _check_valid_rdf_list { 441 my $self = shift; 442 my $head = shift; 443 my $model = shift; 444# warn '--------------------------'; 445# warn "checking if node " . $head->as_string . " is a valid rdf:List\n"; 446 447 my $headrest = $model->count_statements( undef, $rdf->rest, $head ); 448 if ($headrest) { 449# warn "\tnode " . $head->as_string . " seems to be the middle of an rdf:List\n"; 450 return 0; 451 } 452 453 my %list_elements; 454 my $node = $head; 455 until ($node->equal( $rdf->nil )) { 456 $list_elements{ $node->as_string }++; 457 458 unless ($node->isa('RDF::Trine::Node::Blank')) { 459# warn "\tnode " . $node->as_string . " isn't a blank node\n"; 460 return 0; 461 } 462 463 my $first = $model->count_statements( $node, $rdf->first ); 464 unless ($first == 1) { 465# warn "\tnode " . $node->as_string . " has $first rdf:first links when 1 was expected\n"; 466 return 0; 467 } 468 469 my $rest = $model->count_statements( $node, $rdf->rest ); 470 unless ($rest == 1) { 471# warn "\tnode " . $node->as_string . " has $rest rdf:rest links when 1 was expected\n"; 472 return 0; 473 } 474 475 my $in = $model->count_statements( undef, undef, $node ); 476 unless ($in < 2) { 477# warn "\tnode " . $node->as_string . " has $in incoming links when 2 were expected\n"; 478 return 0; 479 } 480 481 if (not($head->equal( $node ))) { 482 # It's OK for the head of a list to have any outgoing links (e.g. (1 2) ex:p "o" 483 # but internal list elements should have only the expected links of rdf:first, 484 # rdf:rest, and optionally an rdf:type rdf:List 485 my $out = $model->count_statements( $node ); 486 unless ($out == 2 or $out == 3) { 487# warn "\tnode " . $node->as_string . " has $out outgoing links when 2 or 3 were expected\n"; 488 return 0; 489 } 490 491 if ($out == 3) { 492 my $type = $model->count_statements( $node, $rdf->type, $rdf->List ); 493 unless ($type == 1) { 494# warn "\tnode " . $node->as_string . " has more outgoing links than expected\n"; 495 return 0; 496 } 497 } 498 } 499 500 501 502 my @links = $model->objects_for_predicate_list( $node, $rdf->first, $rdf->rest ); 503 foreach my $l (@links) { 504 if ($list_elements{ $l->as_string }) { 505 warn $node->as_string . " is repeated in the list" if ($debug); 506 return 0; 507 } 508 } 509 510 ($node) = $model->objects_for_predicate_list( $node, $rdf->rest ); 511 unless (blessed($node)) { 512# warn "\tno valid rdf:rest object found"; 513 return 0; 514 } 515# warn "\tmoving on to rdf:rest object " . $node->as_string . "\n"; 516 } 517 518# warn "\tlooks like a valid rdf:List\n"; 519 return 1; 520} 521 522sub _turtle_rdf_list { 523 my $self = shift; 524 my $sink = shift; 525 my $head = shift; 526 my $model = shift; 527 my $seen = shift; 528 my $level = shift; 529 my $tab = shift; 530 my %args = @_; 531 my $node = $head; 532 my $count = 0; 533 $sink->emit('('); 534 until ($node->equal( $rdf->nil )) { 535 if ($count) { 536 $sink->emit(' '); 537 } 538 my ($value) = $model->objects_for_predicate_list( $node, $rdf->first ); 539 $self->_serialize_object_to_file( $sink, $value, $seen, $level, $tab, %args ); 540 $seen->{ $node->as_string }++; 541 ($node) = $model->objects_for_predicate_list( $node, $rdf->rest ); 542 $count++; 543 } 544 $sink->emit(')'); 545} 546 547sub _node_concise_string { 548 my $self = shift; 549 my $obj = shift; 550 if ($obj->is_literal and $obj->has_datatype) { 551 my $dt = $obj->literal_datatype; 552 if ($dt =~ m<^http://www.w3.org/2001/XMLSchema#(integer|double|decimal)$> and $obj->is_canonical_lexical_form) { 553 my $value = $obj->literal_value; 554 return $value; 555 } else { 556 my $dtr = iri($dt); 557 my $literal = $obj->literal_value; 558 my $qname; 559 try { 560 my ($ns,$local) = $dtr->qname; 561 if (blessed($self) and exists $self->{ns}{$ns}) { 562 $qname = join(':', $self->{ns}{$ns}, $local); 563 $self->{used_ns}{ $self->{ns}{$ns} }++; 564 } 565 } catch RDF::Trine::Error with {}; 566 if ($qname) { 567 my $escaped = $obj->_unicode_escape( $literal ); 568 return qq["$escaped"^^$qname]; 569 } 570 } 571 } elsif ($obj->isa('RDF::Trine::Node::Resource')) { 572 my $value; 573 try { 574 my ($ns,$local) = $obj->qname; 575 if (blessed($self) and exists $self->{ns}{$ns}) { 576 $value = join(':', $self->{ns}{$ns}, $local); 577 $self->{used_ns}{ $self->{ns}{$ns} }++; 578 } 579 } catch RDF::Trine::Error with {} otherwise {}; 580 if ($value) { 581 return $value; 582 } 583 } 584 return; 585} 586 587=item C<< node_as_concise_string >> 588 589Returns a string representation using common Turtle syntax shortcuts (e.g. for numeric literals). 590 591=cut 592 593sub node_as_concise_string { 594 my $self = shift; 595 my $obj = shift; 596 my $str = $self->_node_concise_string( $obj ); 597 if (defined($str)) { 598 return $str; 599 } else { 600 return $obj->as_ntriples; 601 } 602} 603 604sub _turtle { 605 my $self = shift; 606 my $sink = shift; 607 my $obj = shift; 608 my $pos = shift; 609 my $seen = shift; 610 my $level = shift; 611 my $tab = shift; 612 my %args = @_; 613 614 if ($obj->isa('RDF::Trine::Node::Resource') and $pos == 1 and $obj->uri_value eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') { 615 $sink->emit('a'); 616 return; 617 } elsif ($obj->isa('RDF::Trine::Node::Blank') and $pos == 0) { 618 if (my $model = $args{ model }) { 619 my $count = $model->count_statements( undef, undef, $obj ); 620 my $rec = $model->count_statements( $obj, undef, $obj ); 621 # XXX if $count == 1, then it would be better to ignore this triple for now, since it's a 'single-owner' bnode, and better serialized as a '[ ... ]' bnode in the object position as part of the 'owning' triple 622 if ($count < 1 and $rec == 0) { 623 $sink->emit('[]'); 624 return; 625 } 626 } 627 } elsif (defined(my $str = $self->_node_concise_string( $obj ))) { 628 $sink->emit($str); 629 return; 630 } 631 632 $sink->emit($obj->as_ntriples); 633 return; 634} 635 6361; 637 638__END__ 639 640=back 641 642=head1 BUGS 643 644Please report any bugs or feature requests to through the GitHub web interface 645at L<https://github.com/kasei/perlrdf/issues>. 646 647=head1 SEE ALSO 648 649L<http://www.w3.org/TeamSubmission/turtle/> 650 651=head1 AUTHOR 652 653Gregory Todd Williams C<< <gwilliams@cpan.org> >> 654 655=head1 COPYRIGHT 656 657Copyright (c) 2006-2012 Gregory Todd Williams. This 658program is free software; you can redistribute it and/or modify it under 659the same terms as Perl itself. 660 661=cut 662