1# RDF::Trine::Serializer::Turtle
2# -----------------------------------------------------------------------------
3
4=head1 NAME
5
6RDF::Trine::Serializer::Turtle - Turtle Serializer
7
8=head1 VERSION
9
10This document describes RDF::Trine::Serializer::Turtle version 1.019
11
12=head1 SYNOPSIS
13
14 use RDF::Trine::Serializer::Turtle;
15 my $serializer	= RDF::Trine::Serializer::Turtle->new( namespaces => { ex => 'http://example/' } );
16 print $serializer->serialize_model_to_string($model);
17
18=head1 DESCRIPTION
19
20The RDF::Trine::Serializer::Turtle class provides an API for serializing RDF
21graphs to the Turtle syntax. XSD numeric types are serialized as bare literals,
22and where possible the more concise syntax is used for rdf:Lists.
23
24=head1 METHODS
25
26Beyond the methods documented below, this class inherits methods from the
27L<RDF::Trine::Serializer> class.
28
29=over 4
30
31=cut
32
33package RDF::Trine::Serializer::Turtle;
34
35use strict;
36use warnings;
37use base qw(RDF::Trine::Serializer);
38
39use URI;
40use Carp;
41use Encode;
42use Data::Dumper;
43use Scalar::Util qw(blessed refaddr reftype);
44
45use RDF::Trine qw(variable iri);
46use RDF::Trine::Node;
47use RDF::Trine::Statement;
48use RDF::Trine::Error qw(:try);
49use RDF::Trine::Namespace qw(rdf);
50
51######################################################################
52
53our ($VERSION, $debug);
54BEGIN {
55	$debug		= 0;
56	$VERSION	= '1.019';
57	$RDF::Trine::Serializer::serializer_names{ 'turtle' }	= __PACKAGE__;
58	$RDF::Trine::Serializer::format_uris{ 'http://www.w3.org/ns/formats/Turtle' }	= __PACKAGE__;
59	foreach my $type (qw(application/x-turtle application/turtle text/turtle text/rdf+n3)) {
60		$RDF::Trine::Serializer::media_types{ $type }	= __PACKAGE__;
61	}
62}
63
64######################################################################
65
66=item C<< new ( namespaces => \%namespaces, base_uri => $base_uri ) >>
67
68Returns a new Turtle serializer object.
69
70=cut
71
72sub new {
73	my $class	= shift;
74	my $ns	= {};
75	my $base_uri;
76
77	if (@_) {
78		if (scalar(@_) == 1 and reftype($_[0]) eq 'HASH') {
79			$ns	= shift;
80		} else {
81			my %args	= @_;
82			if (exists $args{ base }) {
83				$base_uri   = $args{ base };
84			}
85			if (exists $args{ base_uri }) {
86				$base_uri   = $args{ base_uri };
87			}
88			if (exists $args{ namespaces }) {
89				$ns	= $args{ namespaces };
90			}
91		}
92	}
93
94	my %rev;
95    if (blessed($ns) and $ns->isa('RDF::Trine::NamespaceMap')) {
96        for my $prefix ($ns->list_prefixes) {
97            # way convoluted
98            my $nsuri = $ns->namespace_uri($prefix)->uri->value;
99            $rev{$nsuri} = $prefix;
100        }
101    }
102    else {
103        while (my ($ns, $uri) = each(%{ $ns })) {
104            if (blessed($uri)) {
105                $uri	= $uri->uri_value;
106                if (blessed($uri)) {
107                    $uri	= $uri->uri_value;
108                }
109            }
110            $rev{ $uri }	= $ns;
111        }
112	}
113
114	my $self = bless( {
115		ns		=> \%rev,
116		base_uri	=> $base_uri,
117	}, $class );
118	return $self;
119}
120
121=item C<< serialize_model_to_file ( $fh, $model ) >>
122
123Serializes the C<$model> to Turtle, printing the results to the supplied
124filehandle C<<$fh>>.
125
126=cut
127
128sub serialize_model_to_file {
129	my $self	= shift;
130	my $fh		= shift;
131	my $model	= shift;
132	my $sink	= RDF::Trine::Serializer::FileSink->new($fh);
133
134	my $st		= RDF::Trine::Statement->new( map { variable($_) } qw(s p o) );
135	my $pat		= RDF::Trine::Pattern->new( $st );
136	my $stream	= $model->get_pattern( $pat, undef, orderby => [ qw(s ASC p ASC o ASC) ] );
137	my $iter	= $stream->as_statements( qw(s p o) );
138
139	$self->serialize_iterator( $sink, $iter, seen => {}, level => 0, tab => "\t", @_, model => $model );
140	return 1;
141}
142
143=item C<< serialize_model_to_string ( $model ) >>
144
145Serializes the C<$model> to Turtle, returning the result as a string.
146
147=cut
148
149sub serialize_model_to_string {
150	my $self	= shift;
151	my $model	= shift;
152	my $sink	= RDF::Trine::Serializer::StringSink->new();
153
154	my $st		= RDF::Trine::Statement->new( map { variable($_) } qw(s p o) );
155	my $pat		= RDF::Trine::Pattern->new( $st );
156	my $stream	= $model->get_pattern( $pat, undef, orderby => [ qw(s ASC p ASC o ASC) ] );
157	my $iter	= $stream->as_statements( qw(s p o) );
158
159	$self->serialize_iterator( $sink, $iter, seen => {}, level => 0, tab => "\t", @_, model => $model, string => 1 );
160	return $sink->string;
161}
162
163=item C<< serialize_iterator_to_file ( $file, $iter ) >>
164
165Serializes the iterator to Turtle, printing the results to the supplied
166filehandle C<<$fh>>.
167
168=cut
169
170sub serialize_iterator_to_file {
171	my $self	= shift;
172	my $fh		= shift;
173	my $iter	= shift;
174	my %args	= @_;
175
176	my $sink	= RDF::Trine::Serializer::FileSink->new($fh);
177	$self->serialize_iterator( $sink, $iter, %args );
178	return 1;
179}
180
181=item C<< serialize_iterator ( $sink, $iter ) >>
182
183Serializes the iterator to Turtle, printing the results to the supplied
184sink object.
185
186=cut
187
188sub serialize_iterator {
189	my $self	= shift;
190	my $sink	= shift;
191	my $iter	= shift;
192	my %args	= @_;
193
194	my $seen	= $args{ seen } || {};
195	my $level	= $args{ level } || 0;
196	my $tab		= $args{ tab } || "\t";
197	my $indent	= $tab x $level;
198
199	my %ns		= reverse(%{ $self->{ns} });
200	my @nskeys	= sort keys %ns;
201
202	unless ($sink->can('prepend')) {
203		if (@nskeys) {
204			foreach my $ns (sort @nskeys) {
205				my $uri	= $ns{ $ns };
206				$sink->emit("\@prefix $ns: <$uri> .\n");
207			}
208			$sink->emit("\n");
209		}
210	}
211	if ($self->{base_uri}) {
212		$sink->emit("\@base <$self->{base_uri}> .\n\n");
213	}
214
215	my $last_subj;
216	my $last_pred;
217
218	my $open_triple	= 0;
219	while (my $st = $iter->next) {
220# 		warn "------------------\n";
221# 		warn $st->as_string . "\n";
222		my $subj	= $st->subject;
223		my $pred	= $st->predicate;
224		my $obj		= $st->object;
225
226		# we're abusing the seen hash here as the key isn't really a node value,
227		# but since it isn't a valid node string being used it shouldn't collide
228		# with real data. we set this here so that later on when we check for
229		# single-owner bnodes (when attempting to use the [...] concise syntax),
230		# bnodes that have already been serialized as the 'head' of a statement
231		# aren't considered as single-owner. This is because the output string
232		# is acting as a second ownder of the node -- it's already been emitted
233		# as something like '_:foobar', so it can't also be output as '[...]'.
234		$seen->{ '  heads' }{ $subj->as_string }++;
235
236		if (my $model = $args{model}) {
237			if (my $head = $self->_statement_describes_list($model, $st)) {
238				warn "found a rdf:List head " . $head->as_string . " for the subject in statement " . $st->as_string if ($debug);
239				if ($model->count_statements(undef, undef, $head)) {
240					# the rdf:List appears as the object of a statement, and so
241					# will be serialized whenever we get to serializing that
242					# statement
243					warn "next" if ($debug);
244					next;
245				}
246			}
247		}
248
249		if ($seen->{ $subj->as_string }) {
250			warn "next on seen subject " . $st->as_string if ($debug);
251			next;
252		}
253
254		if ($subj->equal( $last_subj )) {
255			# continue an existing subject
256			if ($pred->equal( $last_pred )) {
257				# continue an existing predicate
258				$sink->emit(qq[, ]);
259				$self->_serialize_object_to_file( $sink, $obj, $seen, $level, $tab, %args );
260			} else {
261				# start a new predicate
262				$sink->emit(qq[ ;\n${indent}$tab]);
263				$self->_turtle( $sink, $pred, 1, $seen, $level, $tab, %args );
264				$sink->emit(' ');
265				$self->_serialize_object_to_file( $sink, $obj, $seen, $level, $tab, %args );
266			}
267		} else {
268			# start a new subject
269			if ($open_triple) {
270				$sink->emit(qq[ .\n${indent}]);
271			}
272			$open_triple	= 1;
273			$self->_turtle( $sink, $subj, 0, $seen, $level, $tab, %args );
274
275			warn '-> ' . $pred->as_string if ($debug);
276			$sink->emit(' ');
277			$self->_turtle( $sink, $pred, 1, $seen, $level, $tab, %args );
278			$sink->emit(' ');
279			$self->_serialize_object_to_file( $sink, $obj, $seen, $level, $tab, %args );
280		}
281	} continue {
282		if (blessed($last_subj) and not($last_subj->equal($st->subject))) {
283# 			warn "marking " . $st->subject->as_string . " as seen";
284			$seen->{ $last_subj->as_string }++;
285		}
286# 		warn "setting last subject to " . $st->subject->as_string;
287		$last_subj	= $st->subject;
288		$last_pred	= $st->predicate;
289	}
290
291	if ($open_triple) {
292		$sink->emit(qq[ .\n]);
293	}
294
295	if ($sink->can('prepend')) {
296		my @used_nskeys	= keys %{ $self->{used_ns} };
297		if (@used_nskeys) {
298			my $string	= '';
299			foreach my $ns (sort @used_nskeys) {
300				my $uri	= $ns{ $ns };
301				$string	.= "\@prefix $ns: <$uri> .\n";
302			}
303			$string	.= "\n";
304			$sink->prepend($string);
305		}
306	}
307}
308
309=item C<< serialize_iterator_to_string ( $iter ) >>
310
311Serializes the iterator to Turtle, returning the result as a string.
312
313=cut
314
315sub serialize_iterator_to_string {
316	my $self	= shift;
317	my $iter	= shift;
318	my $sink	= RDF::Trine::Serializer::StringSink->new();
319	$self->serialize_iterator( $sink, $iter, seen => {}, level => 0, tab => "\t", @_, string => 1 );
320	return $sink->string;
321}
322
323=item C<< serialize_node ( $node ) >>
324
325Returns a string containing the Turtle serialization of C<< $node >>.
326
327=cut
328
329sub serialize_node {
330	my $self	= shift;
331	my $node	= shift;
332	return $self->node_as_concise_string( $node );
333}
334
335sub _serialize_object_to_file {
336	my $self	= shift;
337	my $sink	= shift;
338	my $subj	= shift;
339	my $seen	= shift;
340	my $level	= shift;
341	my $tab		= shift;
342	my %args	= @_;
343	my $indent	= $tab x $level;
344
345	if (my $model = $args{model}) {
346		if ($subj->isa('RDF::Trine::Node::Blank')) {
347			if ($self->_check_valid_rdf_list( $subj, $model )) {
348# 				warn "node is a valid rdf:List: " . $subj->as_string . "\n";
349				return $self->_turtle_rdf_list( $sink, $subj, $model, $seen, $level, $tab, %args );
350			} else {
351				my $count	= $model->count_statements( undef, undef, $subj );
352				my $rec		= $model->count_statements( $subj, undef, $subj );
353				warn "count=$count, rec=$rec for node " . $subj->as_string if ($debug);
354				if ($count == 1 and $rec == 0) {
355					unless ($seen->{ $subj->as_string }++ or $seen->{ '  heads' }{ $subj->as_string }) {
356						my $pat		= RDF::Trine::Pattern->new( RDF::Trine::Statement->new($subj, variable('p'), variable('o')) );
357						my $stream	= $model->get_pattern( $pat, undef, orderby => [ qw(p ASC o ASC) ] );
358						my $iter	= $stream->as_statements( qw(s p o) );
359						my $last_pred;
360						my $triple_count	= 0;
361						$sink->emit("[");
362						while (my $st = $iter->next) {
363							my $pred	= $st->predicate;
364							my $obj		= $st->object;
365
366							# continue an existing subject
367							if ($pred->equal( $last_pred )) {
368								# continue an existing predicate
369								$sink->emit(qq[, ]);
370								$self->_serialize_object_to_file( $sink, $obj, $seen, $level, $tab, %args );
371#								$self->_turtle( $fh, $obj, 2, $seen, $level, $tab, %args );
372							} else {
373								# start a new predicate
374								if ($triple_count == 0) {
375									$sink->emit(qq[\n${indent}${tab}${tab}]);
376								} else {
377									$sink->emit(qq[ ;\n${indent}$tab${tab}]);
378								}
379								$self->_turtle( $sink, $pred, 1, $seen, $level, $tab, %args );
380								$sink->emit(' ');
381								$self->_serialize_object_to_file( $sink, $obj, $seen, $level+1, $tab, %args );
382							}
383
384							$last_pred	= $pred;
385							$triple_count++;
386						}
387						if ($triple_count) {
388							$sink->emit("\n${indent}${tab}");
389						}
390						$sink->emit("]");
391						return;
392					}
393				}
394			}
395		}
396	}
397
398	$self->_turtle( $sink, $subj, 2, $seen, $level, $tab, %args );
399}
400
401sub _statement_describes_list {
402	my $self	= shift;
403	my $model	= shift;
404	my $st		= shift;
405	my $subj	= $st->subject;
406	my $pred	= $st->predicate;
407	my $obj		= $st->object;
408	if ($model->count_statements($subj, $rdf->first) and $model->count_statements($subj, $rdf->rest)) {
409# 		warn $subj->as_string . " looks like a rdf:List element";
410		if (my $head = $self->_node_belongs_to_valid_list( $model, $subj )) {
411			return $head;
412		}
413	}
414
415	return;
416}
417
418sub _node_belongs_to_valid_list {
419	my $self	= shift;
420	my $model	= shift;
421	my $node	= shift;
422	while ($model->count_statements( undef, $rdf->rest, $node )) {
423		my $iter		= $model->get_statements( undef, $rdf->rest, $node );
424		my $s			= $iter->next;
425		my $ancestor	= $s->subject;
426		unless (blessed($ancestor)) {
427# 			warn "failed to get an expected rdf:List element ancestor";
428			return 0;
429		}
430		($node)	= $ancestor;
431# 		warn "stepping back to rdf:List element ancestor " . $node->as_string;
432	}
433	if ($self->_check_valid_rdf_list( $node, $model )) {
434		return $node;
435	} else {
436		return;
437	}
438}
439
440sub _check_valid_rdf_list {
441	my $self	= shift;
442	my $head	= shift;
443	my $model	= shift;
444# 	warn '--------------------------';
445# 	warn "checking if node " . $head->as_string . " is a valid rdf:List\n";
446
447	my $headrest	= $model->count_statements( undef, $rdf->rest, $head );
448	if ($headrest) {
449# 		warn "\tnode " . $head->as_string . " seems to be the middle of an rdf:List\n";
450		return 0;
451	}
452
453	my %list_elements;
454	my $node	= $head;
455	until ($node->equal( $rdf->nil )) {
456		$list_elements{ $node->as_string }++;
457
458		unless ($node->isa('RDF::Trine::Node::Blank')) {
459# 			warn "\tnode " . $node->as_string . " isn't a blank node\n";
460			return 0;
461		}
462
463		my $first	= $model->count_statements( $node, $rdf->first );
464		unless ($first == 1) {
465# 			warn "\tnode " . $node->as_string . " has $first rdf:first links when 1 was expected\n";
466			return 0;
467		}
468
469		my $rest	= $model->count_statements( $node, $rdf->rest );
470		unless ($rest == 1) {
471# 			warn "\tnode " . $node->as_string . " has $rest rdf:rest links when 1 was expected\n";
472			return 0;
473		}
474
475		my $in		= $model->count_statements( undef, undef, $node );
476		unless ($in < 2) {
477# 			warn "\tnode " . $node->as_string . " has $in incoming links when 2 were expected\n";
478			return 0;
479		}
480
481		if (not($head->equal( $node ))) {
482			# It's OK for the head of a list to have any outgoing links (e.g. (1 2) ex:p "o"
483			# but internal list elements should have only the expected links of rdf:first,
484			# rdf:rest, and optionally an rdf:type rdf:List
485			my $out		= $model->count_statements( $node );
486			unless ($out == 2 or $out == 3) {
487# 				warn "\tnode " . $node->as_string . " has $out outgoing links when 2 or 3 were expected\n";
488				return 0;
489			}
490
491			if ($out == 3) {
492				my $type	= $model->count_statements( $node, $rdf->type, $rdf->List );
493				unless ($type == 1) {
494# 					warn "\tnode " . $node->as_string . " has more outgoing links than expected\n";
495					return 0;
496				}
497			}
498		}
499
500
501
502		my @links	= $model->objects_for_predicate_list( $node, $rdf->first, $rdf->rest );
503		foreach my $l (@links) {
504			if ($list_elements{ $l->as_string }) {
505				warn $node->as_string . " is repeated in the list" if ($debug);
506				return 0;
507			}
508		}
509
510		($node)	= $model->objects_for_predicate_list( $node, $rdf->rest );
511		unless (blessed($node)) {
512# 			warn "\tno valid rdf:rest object found";
513			return 0;
514		}
515# 		warn "\tmoving on to rdf:rest object " . $node->as_string . "\n";
516	}
517
518# 	warn "\tlooks like a valid rdf:List\n";
519	return 1;
520}
521
522sub _turtle_rdf_list {
523	my $self	= shift;
524	my $sink	= shift;
525	my $head	= shift;
526	my $model	= shift;
527	my $seen	= shift;
528	my $level	= shift;
529	my $tab		= shift;
530	my %args	= @_;
531	my $node	= $head;
532	my $count	= 0;
533	$sink->emit('(');
534	until ($node->equal( $rdf->nil )) {
535		if ($count) {
536			$sink->emit(' ');
537		}
538		my ($value)	= $model->objects_for_predicate_list( $node, $rdf->first );
539		$self->_serialize_object_to_file( $sink, $value, $seen, $level, $tab, %args );
540		$seen->{ $node->as_string }++;
541		($node)		= $model->objects_for_predicate_list( $node, $rdf->rest );
542		$count++;
543	}
544	$sink->emit(')');
545}
546
547sub _node_concise_string {
548	my $self	= shift;
549	my $obj		= shift;
550	if ($obj->is_literal and $obj->has_datatype) {
551		my $dt	= $obj->literal_datatype;
552		if ($dt =~ m<^http://www.w3.org/2001/XMLSchema#(integer|double|decimal)$> and $obj->is_canonical_lexical_form) {
553			my $value	= $obj->literal_value;
554			return $value;
555		} else {
556			my $dtr	= iri($dt);
557			my $literal	= $obj->literal_value;
558			my $qname;
559			try {
560				my ($ns,$local)	= $dtr->qname;
561				if (blessed($self) and exists $self->{ns}{$ns}) {
562					$qname	= join(':', $self->{ns}{$ns}, $local);
563					$self->{used_ns}{ $self->{ns}{$ns} }++;
564				}
565			} catch RDF::Trine::Error with {};
566			if ($qname) {
567				my $escaped	= $obj->_unicode_escape( $literal );
568				return qq["$escaped"^^$qname];
569			}
570		}
571	} elsif ($obj->isa('RDF::Trine::Node::Resource')) {
572		my $value;
573		try {
574			my ($ns,$local)	= $obj->qname;
575			if (blessed($self) and exists $self->{ns}{$ns}) {
576				$value	= join(':', $self->{ns}{$ns}, $local);
577				$self->{used_ns}{ $self->{ns}{$ns} }++;
578			}
579		} catch RDF::Trine::Error with {} otherwise {};
580		if ($value) {
581			return $value;
582		}
583	}
584	return;
585}
586
587=item C<< node_as_concise_string >>
588
589Returns a string representation using common Turtle syntax shortcuts (e.g. for numeric literals).
590
591=cut
592
593sub node_as_concise_string {
594	my $self	= shift;
595	my $obj		= shift;
596	my $str		= $self->_node_concise_string( $obj );
597	if (defined($str)) {
598		return $str;
599	} else {
600		return $obj->as_ntriples;
601	}
602}
603
604sub _turtle {
605	my $self	= shift;
606	my $sink	= shift;
607	my $obj		= shift;
608	my $pos		= shift;
609	my $seen	= shift;
610	my $level	= shift;
611	my $tab		= shift;
612	my %args	= @_;
613
614	if ($obj->isa('RDF::Trine::Node::Resource') and $pos == 1 and $obj->uri_value eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') {
615		$sink->emit('a');
616		return;
617	} elsif ($obj->isa('RDF::Trine::Node::Blank') and $pos == 0) {
618		if (my $model = $args{ model }) {
619			my $count	= $model->count_statements( undef, undef, $obj );
620			my $rec		= $model->count_statements( $obj, undef, $obj );
621			# XXX if $count == 1, then it would be better to ignore this triple for now, since it's a 'single-owner' bnode, and better serialized as a '[ ... ]' bnode in the object position as part of the 'owning' triple
622			if ($count < 1 and $rec == 0) {
623				$sink->emit('[]');
624				return;
625			}
626		}
627	} elsif (defined(my $str = $self->_node_concise_string( $obj ))) {
628		$sink->emit($str);
629		return;
630	}
631
632	$sink->emit($obj->as_ntriples);
633	return;
634}
635
6361;
637
638__END__
639
640=back
641
642=head1 BUGS
643
644Please report any bugs or feature requests to through the GitHub web interface
645at L<https://github.com/kasei/perlrdf/issues>.
646
647=head1 SEE ALSO
648
649L<http://www.w3.org/TeamSubmission/turtle/>
650
651=head1 AUTHOR
652
653Gregory Todd Williams  C<< <gwilliams@cpan.org> >>
654
655=head1 COPYRIGHT
656
657Copyright (c) 2006-2012 Gregory Todd Williams. This
658program is free software; you can redistribute it and/or modify it under
659the same terms as Perl itself.
660
661=cut
662