1#  -*- perl -*-
2# $Id: Protocol.pm,v 1.10 2004/02/10 15:19:19 langhein Exp $
3# derived from: Protocol.pm,v 1.39 2001/10/26 19:00:21 gisle Exp
4
5package LWP::Parallel::Protocol;
6
7=head1 NAME
8
9LWP::Parallel::Protocol - Base class for parallel LWP protocols
10
11=head1 SYNOPSIS
12
13 package LWP::Parallel::Protocol::foo;
14 require LWP::Parallel::Protocol;
15 @ISA=qw(LWP::Parallel::Protocol);
16
17=head1 DESCRIPTION
18
19This class is used a the base class for all protocol implementations
20supported by the LWP::Parallel library. It mirrors the behavior of the
21original LWP::Parallel library by subclassing from it and adding a few
22subroutines of its own.
23
24Please see the LWP::Protocol for more information about the usage of
25this module.
26
27In addition to the inherited methods from LWP::Protocol, The following
28methods and functions are provided:
29
30=head1 ADDITIONAL METHODS AND FUNCTIONS
31
32=over 4
33
34=cut
35
36#######################################################
37
38require LWP::Protocol;
39@ISA = qw(LWP::Protocol);
40$VERSION = sprintf("%d.%02d", q$Revision: 1.10 $ =~ /(\d+)\.(\d+)/);
41
42
43use HTTP::Status ();
44use HTML::HeadParser; # thanks to Kirill
45use strict;
46use Carp ();
47
48my %ImplementedBy = (); # scheme => classname
49
50
51=item $prot = LWP::Parallel::Protocol->new();
52
53The LWP::Parallel::Protocol constructor is inherited by subclasses. As this is
54a virtual base class this method should B<not> be called directly.
55
56Note: This is inherited from LWP::Protocol
57
58=cut
59
60
61
62=item $prot = LWP::Parallel::Protocol::create($schema)
63
64Create an object of the class implementing the protocol to handle the
65given scheme. This is a function, not a method. It is more an object
66factory than a constructor. This is the function user agents should
67use to access protocols.
68
69=cut
70
71sub create
72{
73    my ($scheme, $ua) = @_;
74    my $impclass = LWP::Parallel::Protocol::implementor($scheme) or
75	Carp::croak("Protocol scheme '$scheme' is not supported");
76
77    # hand-off to scheme specific implementation sub-class
78    my $protocol = $impclass->new($scheme, $ua);
79
80    return $protocol;
81}
82
83
84=item $class = LWP::Parallel::Protocol::implementor($scheme, [$class])
85
86Get and/or set implementor class for a scheme.  Returns '' if the
87specified scheme is not supported.
88
89=cut
90
91sub implementor
92{
93    my($scheme, $impclass) = @_;
94
95    if ($impclass) {
96	$ImplementedBy{$scheme} = $impclass;
97    }
98    my $ic = $ImplementedBy{$scheme};
99    return $ic if $ic;
100
101    return '' unless $scheme =~ /^([.+\-\w]+)$/;  # check valid URL schemes
102    $scheme = $1; # untaint
103    $scheme =~ s/[.+\-]/_/g;  # make it a legal module name
104
105    # scheme not yet known, look for a 'use'd implementation
106    $ic = "LWP::Parallel::Protocol::$scheme";  # default location
107    no strict 'refs';
108    # check we actually have one for the scheme:
109    unless (@{"${ic}::ISA"}) { # fixed in LWP 5.48
110	# try to autoload it
111        #LWP::Debug::debug("Try autoloading $ic");
112	eval "require $ic";
113	if ($@) {
114	    if ($@ =~ /Can't locate/) { #' #emacs get confused by '
115		$ic = '';
116	    } else { # this msg never gets to the surface - 1002, JB
117		die "$@\n";
118	    }
119	}
120    }
121    $ImplementedBy{$scheme} = $ic if $ic;
122    $ic;
123}
124
125=item $prot->receive ($arg, $response, $content)
126
127Called to store a piece of content of a request, and process it
128appropriately into a scalar, file, or by calling a callback.  If $arg
129is undefined, then the content is stored within the $response.  If
130$arg is a simple scalar, then $arg is interpreted as a file name and
131the content is written to this file.  If $arg is a reference to a
132routine, then content is passed to this routine.
133
134$content must be a reference to a scalar holding the content that
135should be processed.
136
137The return value from receive() is undef for errors, positive for
138non-zero content processed, 0 for forced EOFs, and potentially a
139negative command from a user-defined callback function.
140
141B<Note:> We will only use the file or callback argument if
142$response->is_success().  This avoids sendig content data for
143redirects and authentization responses to the file or the callback
144function.
145
146=cut
147
148sub receive {
149    my ($self, $arg, $response, $content, $entry) = @_;
150
151  LWP::Debug::trace("( [self]" .
152                    ", ". (defined $arg ? $arg : '[undef]') .
153                    ", ". (defined $response ?
154		            (defined $response->code ?
155			      $response->code : '???') . " " .
156                            (defined $response->message ?
157			      $response->message : 'undef')
158                                                : '[undef]') .
159                    ", ". (defined $content ?
160		           (ref($content) eq 'SCALAR'?
161			       length($$content) . " bytes"
162			       : '[ref('. ref($content) .')' )
163                            : '[undef]') .
164                    ", ". (defined $entry ? $entry : '[undef]') .
165                    ")");
166
167
168    my($parse_head, $max_size, $parallel) =
169      @{$self}{qw(parse_head max_size parallel)};
170
171    my $parser;
172    if ($parse_head && $response->content_type eq 'text/html') {
173        require HTML::HeadParser; # LWP 5.60
174	$parser = HTML::HeadParser->new($response->{'_headers'});
175    }
176
177    my $content_size = $entry->content_size;
178
179    # Note: We don't need alarms here since we are not making any tcp
180    # connections.  All the data we need is alread in \$content, so we
181    # just read out a string value -- nothing should slow us down here
182    # (other than processor speed or memory constraints :) ) PS: You
183    # can't just add 'alarm' somewhere here unless you fix the calls
184    # to ->receive in the subclasses such as 'ftp' or 'http' and wrap
185    # them in an 'eval' statement that will catch our alarm-exceptions
186    # we would throw here! But since we don't need alarms here, just
187    # forget what I just said - it's irrelevant.
188
189    if (!defined($arg) || !$response->is_success ) {
190	# scalar
191	if ($parser) {
192	    $parser->parse($$content) or undef($parser);
193	}
194        LWP::Debug::debug("read " . length($$content) . " bytes");
195	$response->add_content($$content);
196	$content_size += length($$content);
197	$entry->content_size($content_size); # update persistant size counter
198	if (defined($max_size) && $content_size > $max_size) {
199  	    LWP::Debug::debug("Aborting because size limit of " .
200	                      "$max_size bytes exceeded");
201	    $response->push_header("Client-Aborted", "max_size");
202	    #my $tot = $response->header("Content-Length") || 0;
203	    #$response->header("X-Content-Range", "bytes 0-$content_size/$tot");
204	    return 0; # EOF (kind of)
205	}
206    }
207    elsif (!ref($arg)) {
208	# Mmmh. Could this take so long that we want to use alarm here?
209	my $file_open;
210	if (defined ($entry->content_size) and ($entry->content_size > 0)) {
211	  $file_open = open(OUT, ">>$arg"); # we already have data: append
212	} else {
213	  $file_open = open(OUT, ">$arg");  # no content received: open new
214	}
215	unless ( $file_open ) {
216	    $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR);
217	    $response->message("Cannot write to '$arg': $!");
218	    return; # undef means error
219	}
220        binmode(OUT);
221        local($\) = ""; # ensure standard $OUTPUT_RECORD_SEPARATOR
222	if ($parser) {
223	    $parser->parse($$content) or undef($parser);
224	}
225        LWP::Debug::debug("[FILE] read " . length($$content) . " bytes");
226	print OUT $$content;
227	$content_size += length($$content);
228	$entry->content_size($content_size); # update persistant size counter
229	close(OUT);
230	if (defined($max_size) && $content_size > $max_size) {
231	    LWP::Debug::debug("Aborting because size limit exceeded");
232	    $response->push_header("Client-Aborted", "max_size");
233	    #my $tot = $response->header("Content-Length") || 0;
234	    #$response->header("X-Content-Range", "bytes 0-$content_size/$tot");
235	    return 0;
236	}
237    }
238    elsif (ref($arg) eq 'CODE') {
239	# read into callback
240	if ($parser) {
241	    $parser->parse($$content) or undef($parser);
242	}
243        LWP::Debug::debug("[CODE] read " . length($$content) . " bytes");
244	my $retval;
245	eval {
246	    $retval = &$arg($$content, $response, $self, $entry);
247	};
248	if ($@) {
249	    chomp($@);
250	    $response->push_header('X-Died' => $@);
251	    $response->push_header("Client-Aborted", "die");
252	} else {
253	    # pass return value from callback through to implementor class
254	  LWP::Debug::debug("return-code from Callback was '".
255 	                    (defined $retval ? "$retval'" : "[undef]'"));
256	    return $retval;
257	}
258    }
259    else {
260	$response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR);
261	$response->message("Unexpected collect argument  '$arg'");
262    }
263    return length($$content); # otherwise return size of content processed
264}
265
266=item $prot->receive_once($arg, $response, $content, $entry)
267
268Can be called when the whole response content is available as
269$content.  This will invoke receive() with a collector callback that
270returns a reference to $content the first time and an empty string the
271next.
272
273=cut
274
275sub receive_once {
276    my ($self, $arg, $response, $content, $entry) = @_;
277
278    # read once
279    my $retval = $self->receive($arg, $response, \$content, $entry);
280
281    # and immediately simulate EOF
282    my $no_content = '';
283    $retval = $self->receive($arg, $response, \$no_content, $entry)
284	unless $retval;
285
286    return (defined $retval? $retval : 0);
287}
288
2891;
290
291=head1 SEE ALSO
292
293Inspect the F<LWP/Parallel/Protocol/http.pm> file for examples of usage.
294
295=head1 COPYRIGHT
296
297Copyright 1997-2004 Marc Langheinrich E<lt>marclang@cpan.org>
298Parts copyright 1995-2004 Gisle Aas
299
300This library is free software; you can redistribute it and/or modify
301it under the same terms as Perl itself.
302
303=cut
304
305
306