1#!/usr/bin/perl -Tw
2#
3# ntriples.pl - Redland N-Triples validator demo
4#
5# Copyright (C) 2002-2004, David Beckett http://www.dajobe.org/
6# Copyright (C) 2000-2004, University of Bristol, UK http://www.bristol.ac.uk/
7#
8# This package is Free Software and part of Redland http://librdf.org/
9#
10# It is licensed under the following three licenses as alternatives:
11#   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12#   2. GNU General Public License (GPL) V2 or any newer version
13#   3. Apache License, V2.0 or any newer version
14#
15# You may not use this file except in compliance with at least one of
16# the above three licenses.
17#
18# See LICENSE.html or LICENSE.txt at the top of this package for the
19# complete terms and further detail along with the license texts for
20# the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21#
22#
23#
24
25# CHANGE THIS FOR YOUR CONFIGURATION
26$::ROOT_DIR='/somewhere';
27
28use strict;
29
30# Helps with broken web requests (missing headers)
31$ENV{'Content-Length'}||=0;
32
33# Tainting, dontcha know
34$ENV{'PATH'}="/bin:/usr/bin:/usr/local/bin:$::ROOT_DIR/bin/";
35
36delete $ENV{'BASH_ENV'};
37
38# Standard perl modules
39use CGI;
40use LWP::Simple;
41use URI::URL;
42
43
44# Configuration
45
46my $tmp_dir="$::ROOT_DIR/tmp";
47my $log_file="$::ROOT_DIR/logs/ntriples.log";
48
49my $max_stream_size=200;
50my $max_error_size=100;
51
52my(@parameters)=qw(uri);
53
54# Redland perl modules
55
56use RDF::Redland;
57use RDF::Redland::RSS;
58
59
60
61######################################################################
62# Subroutines
63
64sub log_action ($$;$) {
65  my($host, $message, $now)=@_;
66  $now ||= time;
67  return unless open (LOG, ">>$log_file");
68  my($sec,$min,$hour,$mday,$mon,$year)=gmtime $now;
69  my $date=sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",1900+$year,$mon+1,$mday,$hour,$min,$sec);
70  print LOG "$host $date $message\n";
71  close(LOG);
72}
73
74sub end_page($) {
75  my $q=shift;
76
77print <<"EOT";
78
79<h2>About the validator</h2>
80
81<p>This was written using
82<a href="http://librdf.org/">Redland</a>
83and the
84<a href="http://librdf.org/docs/pod/RDF/Redland/Parser.html">RDF::Redland::Parser</a>
85<a href="http://librdf.org/docs/perl.html">Perl</a> interface
86to the <a href="http://librdf.org/raptor/">Raptor</a>
87N-Triples parser.</p>
88
89<p>The source code of this demonstration is available in the Redland
90distribution as <tt>demos/ntriples.pl</tt> or from the
91<a href="http://librdf.org/">Redland</a> website</p>
92
93EOT
94
95
96  print qq{<hr />\n\n<p class="copyright"><a href="http://www.dajobe.org/">Dave Beckett</a></p>\n\n</body></html>};
97}
98
99
100sub format_body($) {
101  my $string=shift;
102  # No need for HTML::Entities here for three things
103  $string =~ s/\&/\&amp;/g;
104  $string =~ s/</\&lt;/g;
105  $string =~ s/>/\&gt;/g;
106  $string;
107}
108
109sub format_attr($) {
110  my $string=format_body(shift);
111  $string =~ s/"/\&quot;/g; #"
112  $string;
113}
114
115sub format_literal ($) {
116  my($string)=@_;
117  return 'UNDEFINED' if !defined $string;
118
119  my $new_string='';
120  for my $c (split(//, $string)) {
121    if(ord($c) <0x20 || ord($c) == 0x7e || $c eq '\\' || $c eq '"') {
122      $new_string.=sprintf("\\x%02X",ord($c));
123    } else {
124      $new_string.=$c;
125    }
126  }
127  return format_body($new_string);
128}
129
130sub format_url($) {
131  my $url=shift;
132  my $a_url= format_attr($url);
133  my $q_url= format_body($url);
134  qq{<a href="$a_url">$q_url</a>};
135}
136
137sub format_node ($) {
138  my $node=shift;
139  my $type=$node->type;
140  if($type == $RDF::Redland::Node::Type_Resource) {
141    my $uri=$node->uri->as_string;
142    return qq{URI <a href="$uri">$uri</a>};
143  } elsif ($type == $RDF::Redland::Node::Type_Literal) {
144    my $str=format_literal($node->literal_value).'"';
145    my $is_xml=$node->literal_value_is_wf_xml;
146    $str=$is_xml ? 'XML Literal: "'.$str : 'UTF-8 Literal: "'.$str;
147    my $lang=$node->literal_value_language;
148    $str.=" (Language: $lang)" if $lang;
149    return $str;
150  } elsif ($type == $RDF::Redland::Node::Type_Blank) {
151    my $id=$node->blank_identifier;
152    return qq{BNodeID $id};
153  } else {
154    return $node->as_string;
155  }
156}
157
158######################################################################
159my $q = new CGI;
160
161# CGI parameter paranoia
162my $val;
163
164my $uri_string;
165$val=$q->param('uri');
166if(defined $val && $val =~ /^([ -~]+)$/) {
167  $uri_string=$1;
168} else {
169  $uri_string='';
170}
171
172my $empty=(!$uri_string);
173
174# Zap remaining parameters
175$q->delete_all;
176
177# End of parameter decoding
178
179
180# Used in logging
181my $host=$q->remote_host;
182
183
184######################################################################
185# Emit content
186
187print $q->header(-type => 'text/html', -charset=>'utf-8');
188
189# Always print header
190print <<"EOT";
191<?xml version="1.0" encoding="UTF-8"?>
192<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
193<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
194<head>
195  <title>Redland N-Triples Validator</title>
196  <!-- HTML STYLE -->
197</head>
198<body>
199
200<!-- LOGO START -->
201         <h1>Redland N-Triples Validator</h1>
202<!-- LOGO END -->
203
204<p>Enter the address of
205<a href="http://www.w3.org/TR/rdf-testcases/#ntriples">N-Triples</a>
206content into the following form and it will be validated
207and formatted for display.
208
209EOT
210
211
212# Restore values
213$q->delete_all;
214
215$q->param('uri', $uri_string) if $uri_string;
216
217# use q->url() to get URL of this script without any query parameters
218# since we are using a POST here and don't want them added to the
219# submission URL.
220my $action_url="/".$q->url(-relative=>1);
221
222print $q->start_form(-name => 'myform', -method=>'GET', -action => $action_url),"\n";
223
224print "<p>N-Triples URI: ";
225print $q->textfield(-name=>'uri',
226		    -default=>'',
227		    -size=>60,
228		    -maxlength=>1024);
229print '&nbsp;', $q->submit('Go');
230
231print "</p>\n\n";
232
233print $q->endform,"\n\n";
234
235print <<"EOT";
236<p><b>Data Privacy:</b> IP addresses and content URIs are logged
237and may be used for testing.</p>
238
239EOT
240
241# Any parameters?
242if($empty) {
243
244  print <<"EOT";
245<p>You might want to try this N-Triples test case from the
246RDF Core test suite:
247http://www.w3.org/2000/10/rdf-tests/rdfcore/ntriples/test.nt
248</p>
249EOT
250
251  end_page($q);
252  exit 0;
253}
254
255
256######################################################################
257
258print "<h2>Results of N-Triples Validation</h2>\n";
259
260
261# Validate me
262
263my $uri;
264eval "\$uri=new URI::URL(q{$uri_string});" if $uri_string;
265if($@ || !$uri) {
266  print qq{\n\n<p>URI <a href="$uri_string">$uri_string</a> is not a legal URI (according to perl)</p>\n};
267  end_page($q);
268  exit 0;
269}
270
271if(!$uri->scheme || $uri->scheme ne 'http') {
272  print "\n\n<p>Cannot use URI $uri_string - must be a web http URI.</p>\n";
273  end_page($q);
274  exit 0;
275}
276
277my $source_uri=new URI::URL $uri;
278
279# Must fetch and copy to temp file
280my $temp_file;
281$temp_file="$tmp_dir/rss-demo-$$.rss";
282my $rc=getstore($uri_string, $temp_file);
283if(!is_success($rc)) {
284  print "\n\n<p>Failed to read URI $uri_string - HTTP error $rc</p>\n";
285  end_page($q);
286  exit 0;
287}
288if(open(IN, $temp_file)) {
289  my $content=join('', <IN>);
290  close(IN);
291}
292$source_uri=new URI::URL("file:$temp_file");
293
294
295my(@errors)=();
296RDF::Redland::set_error_handler(sub {
297  my $msg=shift;
298  push(@errors, $msg);
299});
300
301
302
303my $parser=new RDF::Redland::Parser("ntriples");
304if(!$parser) {
305  print "\n\n<p>Failed to create N-Triples parser.</p>\n";
306  end_page($q);
307  unlink $temp_file if $temp_file;
308  exit 0;
309}
310
311my $redland_base_uri=new RDF::Redland::URI $uri;
312my $redland_source_uri=new RDF::Redland::URI $source_uri;
313
314log_action($host,"Parsing N-Triples URI $uri", time);
315my $stream=$parser->parse_as_stream($redland_source_uri, $redland_base_uri);
316if(!$stream || $stream->end) {
317  print "\n\n<p>URI \"$uri\" failed to parse URI $uri as N-Triples.</p>\n";
318}
319
320my $count=0;
321if($stream && !$stream->end) {
322
323  print "<h2>Triples</h2>\n";
324
325  print <<"EOT";
326<center>
327<table style="text-align:center" border="1">
328<tr align="left">
329<th>Count</th>
330<th>Subject</th>
331<th>Predicate</th>
332<th>Object</th>
333</tr>
334EOT
335
336
337  for(; $stream && !$stream->end; $stream->next) {
338    my $statement=$stream->current;
339
340    my $subject=format_node($statement->subject);
341    my $predicate=format_node($statement->predicate);
342    my $object=format_node($statement->object);
343
344    my $id=$count+1;
345    print << "EOT";
346<tr align="left">
347<td>$id</td>
348<td>$subject</td>
349<td>$predicate</td>
350<td>$object</td>
351</tr>
352EOT
353
354    $count++;
355
356    if ($count == $max_stream_size) {
357      my $cur=$count+1;
358      while(1) {
359	$stream->next;
360	last if $stream->end;
361	$count++;
362      }
363      print << "EOT";
364<tr align="left">
365<td>$cur...$count</td><td colspan="3">Truncated at $max_stream_size to limit table / page size</td>
366</tr>
367EOT
368      last;
369    }
370  }
371
372  print <<"EOT";
373</table>
374</center>
375
376<p>Note: \\x<em>HH</em> where <em>HH</em> are hexadecimal digits
377is used to indicate escaped characters such as \\, &quot; or
378non-printable characters such as tabs and newlines.</p>
379EOT
380
381  $stream=undef;
382}
383
384if(@errors) {
385  print "<h2>Errors</h2>\n\n<p>";
386
387  my $error_count=1;
388  for my $error (@errors) {
389    $error =~ s/URI $uri_string:/line /;
390    $error =~ s/- Raptor error//;
391    print $error,"<br />\n";
392    $error_count++;
393    if ($error_count > $max_error_size) {
394      print "</p>\n\n<p>Remaining errors $error_count..",scalar(@errors)," truncated to limit page size";
395      last;
396    }
397  }
398  print "</p>";
399}
400
401
402if(!$count) {
403  end_page($q);
404  #unlink $temp_file if $temp_file;
405  exit 0;
406}
407
408my $error_count=scalar(@errors);
409my $pl=($count != 1) ? 's' : '';
410my $errorpl=($error_count != 1) ? 's' : '';
411print "\n\n<p>URI \"$uri\" parsed as N-Triples giving $count triple$pl and $error_count error$errorpl</p>\n";
412
413#unlink $temp_file if $temp_file;
414
415end_page($q);
416exit 0;
417