1# * 2# * Copyright (c) 2000-2006 Alberto Reggiori <areggiori@webweaving.org> 3# * Dirk-Willem van Gulik <dirkx@webweaving.org> 4# * 5# * NOTICE 6# * 7# * This product is distributed under a BSD/ASF like license as described in the 'LICENSE' 8# * file you should have received together with this source code. If you did not get a 9# * a copy of such a license agreement you can pick up one at: 10# * 11# * http://rdfstore.sourceforge.net/LICENSE 12# * 13# * Changes: 14# * version 0.1 - Tue Dec 16 00:51:44 CET 2003 15# * version 0.2 16# * - updated wget() adding Accept: HTTP header and use LWP::UserAgent if available 17# * 18 19package RDFStore::Parser; 20{ 21use vars qw ( $VERSION %Built_In_Styles ); 22use strict; 23 24$VERSION = '0.2'; 25 26use Carp; 27 28eval { require LWP::UserAgent; }; 29$RDFStore::Parser::hasLWPUserAgent = ($@) ? 0 : 1; 30 31sub new { 32 my ($pkg, %args) = @_; 33 34 my $style = $args{Style}; 35 36 my $nonexopt = $args{Non_Expat_Options} ||= {}; 37 38 $nonexopt->{Style} = 1; 39 $nonexopt->{Non_Expat_Options} = 1; 40 $nonexopt->{Handlers} = 1; 41 $nonexopt->{_HNDL_TYPES} = 1; 42 43 $args{_HNDL_TYPES} = {}; 44 $args{_HNDL_TYPES}->{Init} = 1; 45 $args{_HNDL_TYPES}->{Assert} = 1; 46 $args{_HNDL_TYPES}->{Start_XML_Literal} = 1; 47 $args{_HNDL_TYPES}->{Stop_XML_Literal} = 1; 48 $args{_HNDL_TYPES}->{Char_Literal} = 1; 49 $args{_HNDL_TYPES}->{manage_bNodes} = 1; #used only on RDF/XML SiRPAC parser 50 $args{_HNDL_TYPES}->{Final} = 1; 51 52 $args{'warnings'} = []; 53 54 $args{'Handlers'} ||= {}; 55 my $handlers = $args{'Handlers'}; 56 if (defined($style)) { 57 my $stylepkg = $style; 58 if ($stylepkg !~ /::/) { 59 $stylepkg = "\u$style"; 60 croak "Undefined style: $style" 61 unless defined($Built_In_Styles{$stylepkg}); 62 $stylepkg = 'RDFStore::Parser::NTriples::' . $stylepkg; 63 }; 64 65 # load the requested style 66 eval "use $stylepkg;"; 67 if($@) { 68 warn "Cannot load parser style '$stylepkg'" if($pkg->{Warnings}); 69 exit(1); 70 }; 71 72 my $htype; 73 foreach $htype (keys %{$args{_HNDL_TYPES}}) { 74 # Handlers explicity given override 75 # handlers from the Style package 76 unless (defined($handlers->{$htype})) { 77 # A handler in the style package must either have 78 # exactly the right case as the type name or a 79 # completely lower case version of it. 80 my $hname = "${stylepkg}::$htype"; 81 if (defined(&$hname)) { 82 $handlers->{$htype} = \&$hname; 83 next; 84 }; 85 $hname = "${stylepkg}::\L$htype"; 86 if (defined(&$hname)) { 87 $handlers->{$htype} = \&$hname; 88 next; 89 }; 90 }; 91 }; 92 }; 93 $args{Pkg} ||= caller; 94 95 $args{'options'} = {}; 96 97 $args{'_Source'} = 'STDIN:'; 98 99 bless \%args, $pkg; 100 }; 101 102sub setProperty { 103 my ($class, $name, $value) = @_; 104 105 $class->{'options'}->{ $name } = $value; 106 }; 107 108sub getProperty { 109 my ($class, $name) = @_; 110 111 return $class->{'options'}->{ $name }; 112 }; 113 114sub setHandlers { 115 my ($class, @handler_pairs) = @_; 116 117 croak("Uneven number of arguments to setHandlers method") 118 if (int(@handler_pairs) & 1); 119 120 my @ret; 121 while (@handler_pairs) { 122 my $type = shift @handler_pairs; 123 my $handler = shift @handler_pairs; 124 unless (defined($class->{_HNDL_TYPES}->{$type})) { 125 my @types = sort keys %{$class->{_HNDL_TYPES}}; 126 croak("Unknown Parser handler type: $type\n Valid types are : @types"); 127 }; 128 push(@ret, $type, $class->{Handlers}->{$type}); 129 $class->{Handlers}->{$type} = $handler; 130 }; 131 132 return @ret; 133 }; 134 135sub setSource { 136 my ($class,$file_or_uri)=@_; 137 138 $class->{'_Source'} = $file_or_uri 139 if(defined $file_or_uri); 140 141 return $file_or_uri; 142 }; 143 144sub getSource { 145 return $_[0]->{'_Source'}; 146 }; 147 148sub parse { }; 149 150sub parsestring { }; 151 152sub parsestream { }; 153 154sub parsefile { 155 my ($class) = shift; 156 157 $class->setSource( $_[0] ); 158 }; 159 160sub read { 161 my ($class) = shift; 162 163 $class->parse( @_ ); 164 }; 165 166sub readstring { 167 my ($class) = shift; 168 169 $class->parsestring( @_ ); 170 }; 171 172sub readstream { 173 my ($class) = shift; 174 175 $class->parsestream( @_ ); 176 }; 177 178sub readfile { 179 my ($class) = shift; 180 181 $class->parsefile( @_ ); 182 }; 183 184sub wget { 185 my ($class,$uri) = @_; 186 187 croak "RDFStore::Parser::wget: input url is not an instance of URI" 188 unless( (defined $uri) && ($uri->isa("URI")) ); 189 190 no strict; 191 192 if($RDFStore::Parser::hasLWPUserAgent) { 193 # HTTP GET it 194 my $ua = LWP::UserAgent->new( timeout => 60 ); 195 196 my %headers = ( "User-Agent" => "rdfstore\@asemantics.com/$VERSION" ); 197 $headers{'Accept'} = 'application/rdf+xml,application/xml;q=0.9,*/*;q=0.5' 198 if($class->isa("RDFStore::Parser::SiRPAC")); 199 200 my $response = $ua->get( $uri->as_string, %headers ); 201 202 unless($response) { 203 my $msg = "RDFStore::Parser::wget: Cannot HTTP GET $uri->as_string\n"; 204 push @{ $class->{warnings} },$msg; 205 return; 206 }; 207 208 return $response->content; 209 } else { 210 require IO::Socket; 211 212 local($^W) = 0; 213 my $sock = IO::Socket::INET->new( PeerAddr => $uri->host, 214 PeerPort => $uri->port, 215 Proto => 'tcp', 216 Timeout => 60) || return undef; 217 $sock->autoflush; 218 my $netloc = $uri->host; 219 $netloc .= ":".$uri->port if $uri->port != 80; 220 221 my $path = $uri->as_string; 222 223 #HTTP/1.0 GET request 224 print $sock join("\015\012" => 225 "GET $path HTTP/1.0", 226 "Host: $netloc", 227 "User-Agent: rdfstore\@asemantics.com/$VERSION", 228 ($class->isa("RDFStore::Parser::SiRPAC")) ? "Accept: application/rdf+xml,application/xml;q=0.9,*/*;q=0.5" : "", 229 "", ""); 230 231 my $line = <$sock>; 232 233 if ($line !~ m,^HTTP/\d+\.\d+\s+(\d\d\d)\s+(.+)$,m) { 234 my $msg = "RDFStore::Parser::wget: (10 Did not get HTTP/x.x header back...$line"; 235 push @{ $class->{warnings} },$msg; 236 warn $msg; 237 return; 238 }; 239 my $status = $1; 240 my $reason = $2; 241 if ( ($status != 200) && ($status != 302) ) { 242 my $msg = "Error MSG returned from server: $status $reason\n"; 243 push @{ $class->{warnings} },$msg; 244 245 #try HTTP/1.1 GET request 246 print $sock join("\015\012" => 247 "GET $path HTTP/1.1", 248 "Host: $netloc", 249 "User-Agent: rdfstore\@asemantics.com/$VERSION", 250 ($class->isa("RDFStore::Parser::SiRPAC")) ? "Accept: application/rdf+xml,application/xml;q=0.9,*/*;q=0.5" : "", 251 "Connection: close", 252 "", ""); 253 254 $line = <$sock>; 255 256 if ($line !~ m,^HTTP/\d+\.\d+\s+(\d\d\d)\s+(.+)$,m) { 257 my $msg = "RDFStore::Parser::wget: Did not get HTTP/x.x header back...$line"; 258 push @{ $class->{warnings} },$msg; 259 warn $msg; 260 return; 261 }; 262 $status = $3; 263 $reason = $4; 264 265 if ( ($status != 200) && ($status != 302) ) { 266 my $msg = "RDFStore::Parser::wget: Error MSG returned from server: $status $reason\n"; 267 push @{ $class->{warnings} },$msg; 268 return; 269 }; 270 }; 271 272 while(<$sock>) { 273 chomp; 274 if( m,^Location:\s(.*)$,) { 275 if( ( (exists $class->{HTTP_Location}) && 276 (defined $class->{HTTP_Location}) && ($class->{HTTP_Location} ne $1) ) || 277 (!(defined $class->{HTTP_Location})) ) { 278 $class->{HTTP_Location} = $1; 279 my $s = $class->wget(new URI($class->{HTTP_Location})); 280 $sock = $s 281 if(defined $s); 282 last; 283 }; 284 }; 285 last if m/^\s+$/; 286 }; 287 288 my $content=''; 289 while(<$sock>) { 290 $content.=$_; 291 }; 292 293 return $content; 294 }; 295 }; 296 2971; 298}; 299 300__END__ 301 302=head1 NAME 303 304RDFStore::Parser - Interface to an RDF parser 305 306=head1 SYNOPSIS 307 308 use RDFStore::Parser; 309 310 my $parser = new RDFStore::Parser( 311 ErrorContext => 3, 312 Style => 'RDFStore::Parser::Styles::RDFStore::Model' 313 ); 314 315 # or... 316 use RDFStore::Model; 317 318 my $model= new RDFStore::Model(); 319 $parser = $model->getReader; 320 321 my $rdfstring = qq| 322 323<rdf:RDF 324 xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' 325 xmlns:a='http://description.org/schema/'> 326<rdf:Description rdf:about='http://www.w3.org'> 327 <a:Date>1998-10-03T02:27</a:Date> 328</rdf:Description> 329 330</rdf:RDF>|; 331 332 $model = $parser->parsestring($rdfstring); 333 $model = $parser->parsefile('http://www.w3.org/1999/02/22-rdf-syntax-ns#'); 334 $model = $parser->parsestream(*RDFSTREAM); 335 336=head1 DESCRIPTION 337 338An RDFStore::Model parser. 339 340=head1 SEE ALSO 341 342RDFStore::Model(3) RDFStore::Parser::SiRPAC(3) RDFStore::Parser::NTriples(3) 343 344=head1 AUTHOR 345 346 Alberto Reggiori <areggiori@webweaving.org> 347