1# *
2# *     Copyright (c) 2000-2006 Alberto Reggiori <areggiori@webweaving.org>
3# *                        Dirk-Willem van Gulik <dirkx@webweaving.org>
4# *
5# * NOTICE
6# *
7# * This product is distributed under a BSD/ASF like license as described in the 'LICENSE'
8# * file you should have received together with this source code. If you did not get a
9# * a copy of such a license agreement you can pick up one at:
10# *
11# *     http://rdfstore.sourceforge.net/LICENSE
12# *
13# * Changes:
14# *     version 0.1 - Tue Dec 16 00:51:44 CET 2003
15# *     version 0.2
16# *		- updated wget() adding Accept: HTTP header and use LWP::UserAgent if available
17# *
18
19package RDFStore::Parser;
20{
21use vars qw ( $VERSION %Built_In_Styles );
22use strict;
23
24$VERSION = '0.2';
25
26use Carp;
27
28eval { require LWP::UserAgent; };
29$RDFStore::Parser::hasLWPUserAgent = ($@) ? 0 : 1;
30
31sub new {
32	my ($pkg, %args) = @_;
33
34        my $style = $args{Style};
35
36	my $nonexopt = $args{Non_Expat_Options} ||= {};
37
38        $nonexopt->{Style}             = 1;
39        $nonexopt->{Non_Expat_Options} = 1;
40        $nonexopt->{Handlers}          = 1;
41        $nonexopt->{_HNDL_TYPES}       = 1;
42
43        $args{_HNDL_TYPES} = {};
44        $args{_HNDL_TYPES}->{Init} = 1;
45        $args{_HNDL_TYPES}->{Assert} = 1;
46        $args{_HNDL_TYPES}->{Start_XML_Literal} = 1;
47        $args{_HNDL_TYPES}->{Stop_XML_Literal} = 1;
48        $args{_HNDL_TYPES}->{Char_Literal} = 1;
49	$args{_HNDL_TYPES}->{manage_bNodes} = 1; #used only on RDF/XML SiRPAC parser
50        $args{_HNDL_TYPES}->{Final} = 1;
51
52	$args{'warnings'} = [];
53
54        $args{'Handlers'} ||= {};
55        my $handlers = $args{'Handlers'};
56        if (defined($style)) {
57                my $stylepkg = $style;
58                if ($stylepkg !~ /::/) {
59                        $stylepkg = "\u$style";
60                        croak "Undefined style: $style"
61                                unless defined($Built_In_Styles{$stylepkg});
62                        $stylepkg = 'RDFStore::Parser::NTriples::' . $stylepkg;
63                	};
64
65                # load the requested style
66                eval "use $stylepkg;";
67                if($@) {
68                        warn "Cannot load parser style '$stylepkg'" if($pkg->{Warnings});
69                        exit(1);
70                        };
71
72                my $htype;
73                foreach $htype (keys %{$args{_HNDL_TYPES}}) {
74                        # Handlers explicity given override
75                        # handlers from the Style package
76                        unless (defined($handlers->{$htype})) {
77                                # A handler in the style package must either have
78                                # exactly the right case as the type name or a
79                                # completely lower case version of it.
80                                my $hname = "${stylepkg}::$htype";
81                                if (defined(&$hname)) {
82                                        $handlers->{$htype} = \&$hname;
83                                        next;
84                                	};
85                                $hname = "${stylepkg}::\L$htype";
86                                if (defined(&$hname)) {
87                                        $handlers->{$htype} = \&$hname;
88                                        next;
89                                	};
90                        	};
91                	};
92        	};
93        $args{Pkg} ||= caller;
94
95	$args{'options'} = {};
96
97	$args{'_Source'} = 'STDIN:';
98
99        bless \%args, $pkg;
100	};
101
102sub setProperty {
103	my ($class, $name, $value) = @_;
104
105	$class->{'options'}->{ $name } = $value;
106	};
107
108sub getProperty {
109	my ($class, $name) = @_;
110
111	return $class->{'options'}->{ $name };
112	};
113
114sub setHandlers {
115        my ($class, @handler_pairs) = @_;
116
117        croak("Uneven number of arguments to setHandlers method")
118                if (int(@handler_pairs) & 1);
119
120        my @ret;
121        while (@handler_pairs) {
122                my $type = shift @handler_pairs;
123                my $handler = shift @handler_pairs;
124                unless (defined($class->{_HNDL_TYPES}->{$type})) {
125                        my @types = sort keys %{$class->{_HNDL_TYPES}};
126                        croak("Unknown Parser handler type: $type\n Valid types are : @types");
127                	};
128                push(@ret, $type, $class->{Handlers}->{$type});
129                $class->{Handlers}->{$type} = $handler;
130        	};
131
132        return @ret;
133	};
134
135sub setSource {
136        my ($class,$file_or_uri)=@_;
137
138	$class->{'_Source'} = $file_or_uri
139		if(defined $file_or_uri);
140
141        return $file_or_uri;
142	};
143
144sub getSource {
145	return $_[0]->{'_Source'};
146	};
147
148sub parse { };
149
150sub parsestring { };
151
152sub parsestream { };
153
154sub parsefile {
155	my ($class) = shift;
156
157	$class->setSource( $_[0] );
158	};
159
160sub read {
161	my ($class) = shift;
162
163	$class->parse( @_ );
164	};
165
166sub readstring {
167	my ($class) = shift;
168
169	$class->parsestring( @_ );
170	};
171
172sub readstream {
173	my ($class) = shift;
174
175	$class->parsestream( @_ );
176	};
177
178sub readfile {
179	my ($class) = shift;
180
181	$class->parsefile( @_ );
182	};
183
184sub wget {
185        my ($class,$uri) = @_;
186
187        croak "RDFStore::Parser::wget: input url is not an instance of URI"
188                unless( (defined $uri) && ($uri->isa("URI")) );
189
190        no strict;
191
192	if($RDFStore::Parser::hasLWPUserAgent) {
193		# HTTP GET it
194		my $ua = LWP::UserAgent->new( timeout => 60 );
195
196		my %headers = ( "User-Agent" => "rdfstore\@asemantics.com/$VERSION" );
197		$headers{'Accept'} = 'application/rdf+xml,application/xml;q=0.9,*/*;q=0.5'
198			if($class->isa("RDFStore::Parser::SiRPAC"));
199
200                my $response = $ua->get( $uri->as_string, %headers );
201
202                unless($response) {
203			my $msg = "RDFStore::Parser::wget: Cannot HTTP GET $uri->as_string\n";
204			push @{ $class->{warnings} },$msg;
205			return;
206			};
207
208                return $response->content;
209	} else {
210        	require IO::Socket;
211
212        	local($^W) = 0;
213        	my $sock = IO::Socket::INET->new(       PeerAddr => $uri->host,
214                                                	PeerPort => $uri->port,
215                                                	Proto    => 'tcp',
216                                                	Timeout  => 60) || return undef;
217        	$sock->autoflush;
218        	my $netloc = $uri->host;
219        	$netloc .= ":".$uri->port if $uri->port != 80;
220
221        	my $path = $uri->as_string;
222
223        	#HTTP/1.0 GET request
224        	print $sock join("\015\012" =>
225                    "GET $path HTTP/1.0",
226                    "Host: $netloc",
227                    "User-Agent: rdfstore\@asemantics.com/$VERSION",
228		    ($class->isa("RDFStore::Parser::SiRPAC")) ? "Accept: application/rdf+xml,application/xml;q=0.9,*/*;q=0.5" : "",
229                    "", "");
230
231        	my $line = <$sock>;
232
233		if ($line !~ m,^HTTP/\d+\.\d+\s+(\d\d\d)\s+(.+)$,m) {
234                	my $msg = "RDFStore::Parser::wget: (10 Did not get HTTP/x.x header back...$line";
235                	push @{ $class->{warnings} },$msg;
236                	warn $msg;
237                	return;
238                	};
239        	my $status = $1;
240        	my $reason = $2;
241        	if ( ($status != 200) && ($status != 302) ) {
242                	my $msg = "Error MSG returned from server: $status $reason\n";
243                	push @{ $class->{warnings} },$msg;
244
245                	#try HTTP/1.1 GET request
246                	print $sock join("\015\012" =>
247                                 "GET $path HTTP/1.1",
248                                 "Host: $netloc",
249                                 "User-Agent: rdfstore\@asemantics.com/$VERSION",
250		    		($class->isa("RDFStore::Parser::SiRPAC")) ? "Accept: application/rdf+xml,application/xml;q=0.9,*/*;q=0.5" : "",
251                                 "Connection: close",
252                                 "", "");
253
254                	$line = <$sock>;
255
256                	if ($line !~ m,^HTTP/\d+\.\d+\s+(\d\d\d)\s+(.+)$,m) {
257                        	my $msg = "RDFStore::Parser::wget: Did not get HTTP/x.x header back...$line";
258                        	push @{ $class->{warnings} },$msg;
259                        	warn $msg;
260                        	return;
261                        	};
262                	$status = $3;
263                	$reason = $4;
264
265			if ( ($status != 200) && ($status != 302) ) {
266                        	my $msg = "RDFStore::Parser::wget: Error MSG returned from server: $status $reason\n";
267                        	push @{ $class->{warnings} },$msg;
268                        	return;
269                        	};
270                	};
271
272        	while(<$sock>) {
273                	chomp;
274                	if( m,^Location:\s(.*)$,) {
275                        	if( (   (exists $class->{HTTP_Location}) &&
276                                	(defined $class->{HTTP_Location}) && ($class->{HTTP_Location} ne $1)    ) ||
277                                        (!(defined $class->{HTTP_Location})) ) {
278                                	$class->{HTTP_Location} = $1;
279                                	my $s = $class->wget(new URI($class->{HTTP_Location}));
280                                	$sock = $s
281                                        	if(defined $s);
282                                	last;
283                                	};
284                        	};
285                	last if m/^\s+$/;
286                	};
287
288		my $content='';
289		while(<$sock>) {
290			$content.=$_;
291			};
292
293        	return $content;
294		};
295        };
296
2971;
298};
299
300__END__
301
302=head1 NAME
303
304RDFStore::Parser - Interface to an RDF parser
305
306=head1 SYNOPSIS
307
308	use RDFStore::Parser;
309
310	my $parser = new RDFStore::Parser(
311			ErrorContext => 3,
312                        Style => 'RDFStore::Parser::Styles::RDFStore::Model'
313			);
314
315	# or...
316	use RDFStore::Model;
317
318	my $model= new RDFStore::Model();
319	$parser = $model->getReader;
320
321	my $rdfstring = qq|
322
323<rdf:RDF
324        xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
325        xmlns:a='http://description.org/schema/'>
326<rdf:Description rdf:about='http://www.w3.org'>
327        <a:Date>1998-10-03T02:27</a:Date>
328</rdf:Description>
329
330</rdf:RDF>|;
331
332	$model = $parser->parsestring($rdfstring);
333	$model = $parser->parsefile('http://www.w3.org/1999/02/22-rdf-syntax-ns#');
334	$model = $parser->parsestream(*RDFSTREAM);
335
336=head1 DESCRIPTION
337
338An RDFStore::Model parser.
339
340=head1 SEE ALSO
341
342RDFStore::Model(3) RDFStore::Parser::SiRPAC(3) RDFStore::Parser::NTriples(3)
343
344=head1 AUTHOR
345
346	Alberto Reggiori <areggiori@webweaving.org>
347