1#!/usr/bin/perl -Tw 2# 3# ntriples.pl - Redland N-Triples validator demo 4# 5# Copyright (C) 2002-2004, David Beckett http://www.dajobe.org/ 6# Copyright (C) 2000-2004, University of Bristol, UK http://www.bristol.ac.uk/ 7# 8# This package is Free Software and part of Redland http://librdf.org/ 9# 10# It is licensed under the following three licenses as alternatives: 11# 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version 12# 2. GNU General Public License (GPL) V2 or any newer version 13# 3. Apache License, V2.0 or any newer version 14# 15# You may not use this file except in compliance with at least one of 16# the above three licenses. 17# 18# See LICENSE.html or LICENSE.txt at the top of this package for the 19# complete terms and further detail along with the license texts for 20# the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. 21# 22# 23# 24 25# CHANGE THIS FOR YOUR CONFIGURATION 26$::ROOT_DIR='/somewhere'; 27 28use strict; 29 30# Helps with broken web requests (missing headers) 31$ENV{'Content-Length'}||=0; 32 33# Tainting, dontcha know 34$ENV{'PATH'}="/bin:/usr/bin:/usr/local/bin:$::ROOT_DIR/bin/"; 35 36delete $ENV{'BASH_ENV'}; 37 38# Standard perl modules 39use CGI; 40use LWP::Simple; 41use URI::URL; 42 43 44# Configuration 45 46my $tmp_dir="$::ROOT_DIR/tmp"; 47my $log_file="$::ROOT_DIR/logs/ntriples.log"; 48 49my $max_stream_size=200; 50my $max_error_size=100; 51 52my(@parameters)=qw(uri); 53 54# Redland perl modules 55 56use RDF::Redland; 57use RDF::Redland::RSS; 58 59 60 61###################################################################### 62# Subroutines 63 64sub log_action ($$;$) { 65 my($host, $message, $now)=@_; 66 $now ||= time; 67 return unless open (LOG, ">>$log_file"); 68 my($sec,$min,$hour,$mday,$mon,$year)=gmtime $now; 69 my $date=sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",1900+$year,$mon+1,$mday,$hour,$min,$sec); 70 print LOG "$host $date $message\n"; 71 close(LOG); 72} 73 74sub end_page($) { 75 my $q=shift; 76 77print <<"EOT"; 78 79<h2>About the validator</h2> 80 81<p>This was written using 82<a href="http://librdf.org/">Redland</a> 83and the 84<a href="http://librdf.org/docs/pod/RDF/Redland/Parser.html">RDF::Redland::Parser</a> 85<a href="http://librdf.org/docs/perl.html">Perl</a> interface 86to the <a href="http://librdf.org/raptor/">Raptor</a> 87N-Triples parser.</p> 88 89<p>The source code of this demonstration is available in the Redland 90distribution as <tt>demos/ntriples.pl</tt> or from the 91<a href="http://librdf.org/">Redland</a> website</p> 92 93EOT 94 95 96 print qq{<hr />\n\n<p class="copyright"><a href="http://www.dajobe.org/">Dave Beckett</a></p>\n\n</body></html>}; 97} 98 99 100sub format_body($) { 101 my $string=shift; 102 # No need for HTML::Entities here for three things 103 $string =~ s/\&/\&/g; 104 $string =~ s/</\</g; 105 $string =~ s/>/\>/g; 106 $string; 107} 108 109sub format_attr($) { 110 my $string=format_body(shift); 111 $string =~ s/"/\"/g; #" 112 $string; 113} 114 115sub format_literal ($) { 116 my($string)=@_; 117 return 'UNDEFINED' if !defined $string; 118 119 my $new_string=''; 120 for my $c (split(//, $string)) { 121 if(ord($c) <0x20 || ord($c) == 0x7e || $c eq '\\' || $c eq '"') { 122 $new_string.=sprintf("\\x%02X",ord($c)); 123 } else { 124 $new_string.=$c; 125 } 126 } 127 return format_body($new_string); 128} 129 130sub format_url($) { 131 my $url=shift; 132 my $a_url= format_attr($url); 133 my $q_url= format_body($url); 134 qq{<a href="$a_url">$q_url</a>}; 135} 136 137sub format_node ($) { 138 my $node=shift; 139 my $type=$node->type; 140 if($type == $RDF::Redland::Node::Type_Resource) { 141 my $uri=$node->uri->as_string; 142 return qq{URI <a href="$uri">$uri</a>}; 143 } elsif ($type == $RDF::Redland::Node::Type_Literal) { 144 my $str=format_literal($node->literal_value).'"'; 145 my $is_xml=$node->literal_value_is_wf_xml; 146 $str=$is_xml ? 'XML Literal: "'.$str : 'UTF-8 Literal: "'.$str; 147 my $lang=$node->literal_value_language; 148 $str.=" (Language: $lang)" if $lang; 149 return $str; 150 } elsif ($type == $RDF::Redland::Node::Type_Blank) { 151 my $id=$node->blank_identifier; 152 return qq{BNodeID $id}; 153 } else { 154 return $node->as_string; 155 } 156} 157 158###################################################################### 159my $q = new CGI; 160 161# CGI parameter paranoia 162my $val; 163 164my $uri_string; 165$val=$q->param('uri'); 166if(defined $val && $val =~ /^([ -~]+)$/) { 167 $uri_string=$1; 168} else { 169 $uri_string=''; 170} 171 172my $empty=(!$uri_string); 173 174# Zap remaining parameters 175$q->delete_all; 176 177# End of parameter decoding 178 179 180# Used in logging 181my $host=$q->remote_host; 182 183 184###################################################################### 185# Emit content 186 187print $q->header(-type => 'text/html', -charset=>'utf-8'); 188 189# Always print header 190print <<"EOT"; 191<?xml version="1.0" encoding="UTF-8"?> 192<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 193<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> 194<head> 195 <title>Redland N-Triples Validator</title> 196 <!-- HTML STYLE --> 197</head> 198<body> 199 200<!-- LOGO START --> 201 <h1>Redland N-Triples Validator</h1> 202<!-- LOGO END --> 203 204<p>Enter the address of 205<a href="http://www.w3.org/TR/rdf-testcases/#ntriples">N-Triples</a> 206content into the following form and it will be validated 207and formatted for display. 208 209EOT 210 211 212# Restore values 213$q->delete_all; 214 215$q->param('uri', $uri_string) if $uri_string; 216 217# use q->url() to get URL of this script without any query parameters 218# since we are using a POST here and don't want them added to the 219# submission URL. 220my $action_url="/".$q->url(-relative=>1); 221 222print $q->start_form(-name => 'myform', -method=>'GET', -action => $action_url),"\n"; 223 224print "<p>N-Triples URI: "; 225print $q->textfield(-name=>'uri', 226 -default=>'', 227 -size=>60, 228 -maxlength=>1024); 229print ' ', $q->submit('Go'); 230 231print "</p>\n\n"; 232 233print $q->endform,"\n\n"; 234 235print <<"EOT"; 236<p><b>Data Privacy:</b> IP addresses and content URIs are logged 237and may be used for testing.</p> 238 239EOT 240 241# Any parameters? 242if($empty) { 243 244 print <<"EOT"; 245<p>You might want to try this N-Triples test case from the 246RDF Core test suite: 247http://www.w3.org/2000/10/rdf-tests/rdfcore/ntriples/test.nt 248</p> 249EOT 250 251 end_page($q); 252 exit 0; 253} 254 255 256###################################################################### 257 258print "<h2>Results of N-Triples Validation</h2>\n"; 259 260 261# Validate me 262 263my $uri; 264eval "\$uri=new URI::URL(q{$uri_string});" if $uri_string; 265if($@ || !$uri) { 266 print qq{\n\n<p>URI <a href="$uri_string">$uri_string</a> is not a legal URI (according to perl)</p>\n}; 267 end_page($q); 268 exit 0; 269} 270 271if(!$uri->scheme || $uri->scheme ne 'http') { 272 print "\n\n<p>Cannot use URI $uri_string - must be a web http URI.</p>\n"; 273 end_page($q); 274 exit 0; 275} 276 277my $source_uri=new URI::URL $uri; 278 279# Must fetch and copy to temp file 280my $temp_file; 281$temp_file="$tmp_dir/rss-demo-$$.rss"; 282my $rc=getstore($uri_string, $temp_file); 283if(!is_success($rc)) { 284 print "\n\n<p>Failed to read URI $uri_string - HTTP error $rc</p>\n"; 285 end_page($q); 286 exit 0; 287} 288if(open(IN, $temp_file)) { 289 my $content=join('', <IN>); 290 close(IN); 291} 292$source_uri=new URI::URL("file:$temp_file"); 293 294 295my(@errors)=(); 296RDF::Redland::set_error_handler(sub { 297 my $msg=shift; 298 push(@errors, $msg); 299}); 300 301 302 303my $parser=new RDF::Redland::Parser("ntriples"); 304if(!$parser) { 305 print "\n\n<p>Failed to create N-Triples parser.</p>\n"; 306 end_page($q); 307 unlink $temp_file if $temp_file; 308 exit 0; 309} 310 311my $redland_base_uri=new RDF::Redland::URI $uri; 312my $redland_source_uri=new RDF::Redland::URI $source_uri; 313 314log_action($host,"Parsing N-Triples URI $uri", time); 315my $stream=$parser->parse_as_stream($redland_source_uri, $redland_base_uri); 316if(!$stream || $stream->end) { 317 print "\n\n<p>URI \"$uri\" failed to parse URI $uri as N-Triples.</p>\n"; 318} 319 320my $count=0; 321if($stream && !$stream->end) { 322 323 print "<h2>Triples</h2>\n"; 324 325 print <<"EOT"; 326<center> 327<table style="text-align:center" border="1"> 328<tr align="left"> 329<th>Count</th> 330<th>Subject</th> 331<th>Predicate</th> 332<th>Object</th> 333</tr> 334EOT 335 336 337 for(; $stream && !$stream->end; $stream->next) { 338 my $statement=$stream->current; 339 340 my $subject=format_node($statement->subject); 341 my $predicate=format_node($statement->predicate); 342 my $object=format_node($statement->object); 343 344 my $id=$count+1; 345 print << "EOT"; 346<tr align="left"> 347<td>$id</td> 348<td>$subject</td> 349<td>$predicate</td> 350<td>$object</td> 351</tr> 352EOT 353 354 $count++; 355 356 if ($count == $max_stream_size) { 357 my $cur=$count+1; 358 while(1) { 359 $stream->next; 360 last if $stream->end; 361 $count++; 362 } 363 print << "EOT"; 364<tr align="left"> 365<td>$cur...$count</td><td colspan="3">Truncated at $max_stream_size to limit table / page size</td> 366</tr> 367EOT 368 last; 369 } 370 } 371 372 print <<"EOT"; 373</table> 374</center> 375 376<p>Note: \\x<em>HH</em> where <em>HH</em> are hexadecimal digits 377is used to indicate escaped characters such as \\, " or 378non-printable characters such as tabs and newlines.</p> 379EOT 380 381 $stream=undef; 382} 383 384if(@errors) { 385 print "<h2>Errors</h2>\n\n<p>"; 386 387 my $error_count=1; 388 for my $error (@errors) { 389 $error =~ s/URI $uri_string:/line /; 390 $error =~ s/- Raptor error//; 391 print $error,"<br />\n"; 392 $error_count++; 393 if ($error_count > $max_error_size) { 394 print "</p>\n\n<p>Remaining errors $error_count..",scalar(@errors)," truncated to limit page size"; 395 last; 396 } 397 } 398 print "</p>"; 399} 400 401 402if(!$count) { 403 end_page($q); 404 #unlink $temp_file if $temp_file; 405 exit 0; 406} 407 408my $error_count=scalar(@errors); 409my $pl=($count != 1) ? 's' : ''; 410my $errorpl=($error_count != 1) ? 's' : ''; 411print "\n\n<p>URI \"$uri\" parsed as N-Triples giving $count triple$pl and $error_count error$errorpl</p>\n"; 412 413#unlink $temp_file if $temp_file; 414 415end_page($q); 416exit 0; 417