1# -*- perl -*- 2# $Id: Protocol.pm,v 1.10 2004/02/10 15:19:19 langhein Exp $ 3# derived from: Protocol.pm,v 1.39 2001/10/26 19:00:21 gisle Exp 4 5package LWP::Parallel::Protocol; 6 7=head1 NAME 8 9LWP::Parallel::Protocol - Base class for parallel LWP protocols 10 11=head1 SYNOPSIS 12 13 package LWP::Parallel::Protocol::foo; 14 require LWP::Parallel::Protocol; 15 @ISA=qw(LWP::Parallel::Protocol); 16 17=head1 DESCRIPTION 18 19This class is used a the base class for all protocol implementations 20supported by the LWP::Parallel library. It mirrors the behavior of the 21original LWP::Parallel library by subclassing from it and adding a few 22subroutines of its own. 23 24Please see the LWP::Protocol for more information about the usage of 25this module. 26 27In addition to the inherited methods from LWP::Protocol, The following 28methods and functions are provided: 29 30=head1 ADDITIONAL METHODS AND FUNCTIONS 31 32=over 4 33 34=cut 35 36####################################################### 37 38require LWP::Protocol; 39@ISA = qw(LWP::Protocol); 40$VERSION = sprintf("%d.%02d", q$Revision: 1.10 $ =~ /(\d+)\.(\d+)/); 41 42 43use HTTP::Status (); 44use HTML::HeadParser; # thanks to Kirill 45use strict; 46use Carp (); 47 48my %ImplementedBy = (); # scheme => classname 49 50 51=item $prot = LWP::Parallel::Protocol->new(); 52 53The LWP::Parallel::Protocol constructor is inherited by subclasses. As this is 54a virtual base class this method should B<not> be called directly. 55 56Note: This is inherited from LWP::Protocol 57 58=cut 59 60 61 62=item $prot = LWP::Parallel::Protocol::create($schema) 63 64Create an object of the class implementing the protocol to handle the 65given scheme. This is a function, not a method. It is more an object 66factory than a constructor. This is the function user agents should 67use to access protocols. 68 69=cut 70 71sub create 72{ 73 my ($scheme, $ua) = @_; 74 my $impclass = LWP::Parallel::Protocol::implementor($scheme) or 75 Carp::croak("Protocol scheme '$scheme' is not supported"); 76 77 # hand-off to scheme specific implementation sub-class 78 my $protocol = $impclass->new($scheme, $ua); 79 80 return $protocol; 81} 82 83 84=item $class = LWP::Parallel::Protocol::implementor($scheme, [$class]) 85 86Get and/or set implementor class for a scheme. Returns '' if the 87specified scheme is not supported. 88 89=cut 90 91sub implementor 92{ 93 my($scheme, $impclass) = @_; 94 95 if ($impclass) { 96 $ImplementedBy{$scheme} = $impclass; 97 } 98 my $ic = $ImplementedBy{$scheme}; 99 return $ic if $ic; 100 101 return '' unless $scheme =~ /^([.+\-\w]+)$/; # check valid URL schemes 102 $scheme = $1; # untaint 103 $scheme =~ s/[.+\-]/_/g; # make it a legal module name 104 105 # scheme not yet known, look for a 'use'd implementation 106 $ic = "LWP::Parallel::Protocol::$scheme"; # default location 107 no strict 'refs'; 108 # check we actually have one for the scheme: 109 unless (@{"${ic}::ISA"}) { # fixed in LWP 5.48 110 # try to autoload it 111 #LWP::Debug::debug("Try autoloading $ic"); 112 eval "require $ic"; 113 if ($@) { 114 if ($@ =~ /Can't locate/) { #' #emacs get confused by ' 115 $ic = ''; 116 } else { # this msg never gets to the surface - 1002, JB 117 die "$@\n"; 118 } 119 } 120 } 121 $ImplementedBy{$scheme} = $ic if $ic; 122 $ic; 123} 124 125=item $prot->receive ($arg, $response, $content) 126 127Called to store a piece of content of a request, and process it 128appropriately into a scalar, file, or by calling a callback. If $arg 129is undefined, then the content is stored within the $response. If 130$arg is a simple scalar, then $arg is interpreted as a file name and 131the content is written to this file. If $arg is a reference to a 132routine, then content is passed to this routine. 133 134$content must be a reference to a scalar holding the content that 135should be processed. 136 137The return value from receive() is undef for errors, positive for 138non-zero content processed, 0 for forced EOFs, and potentially a 139negative command from a user-defined callback function. 140 141B<Note:> We will only use the file or callback argument if 142$response->is_success(). This avoids sendig content data for 143redirects and authentization responses to the file or the callback 144function. 145 146=cut 147 148sub receive { 149 my ($self, $arg, $response, $content, $entry) = @_; 150 151 LWP::Debug::trace("( [self]" . 152 ", ". (defined $arg ? $arg : '[undef]') . 153 ", ". (defined $response ? 154 (defined $response->code ? 155 $response->code : '???') . " " . 156 (defined $response->message ? 157 $response->message : 'undef') 158 : '[undef]') . 159 ", ". (defined $content ? 160 (ref($content) eq 'SCALAR'? 161 length($$content) . " bytes" 162 : '[ref('. ref($content) .')' ) 163 : '[undef]') . 164 ", ". (defined $entry ? $entry : '[undef]') . 165 ")"); 166 167 168 my($parse_head, $max_size, $parallel) = 169 @{$self}{qw(parse_head max_size parallel)}; 170 171 my $parser; 172 if ($parse_head && $response->content_type eq 'text/html') { 173 require HTML::HeadParser; # LWP 5.60 174 $parser = HTML::HeadParser->new($response->{'_headers'}); 175 } 176 177 my $content_size = $entry->content_size; 178 179 # Note: We don't need alarms here since we are not making any tcp 180 # connections. All the data we need is alread in \$content, so we 181 # just read out a string value -- nothing should slow us down here 182 # (other than processor speed or memory constraints :) ) PS: You 183 # can't just add 'alarm' somewhere here unless you fix the calls 184 # to ->receive in the subclasses such as 'ftp' or 'http' and wrap 185 # them in an 'eval' statement that will catch our alarm-exceptions 186 # we would throw here! But since we don't need alarms here, just 187 # forget what I just said - it's irrelevant. 188 189 if (!defined($arg) || !$response->is_success ) { 190 # scalar 191 if ($parser) { 192 $parser->parse($$content) or undef($parser); 193 } 194 LWP::Debug::debug("read " . length($$content) . " bytes"); 195 $response->add_content($$content); 196 $content_size += length($$content); 197 $entry->content_size($content_size); # update persistant size counter 198 if (defined($max_size) && $content_size > $max_size) { 199 LWP::Debug::debug("Aborting because size limit of " . 200 "$max_size bytes exceeded"); 201 $response->push_header("Client-Aborted", "max_size"); 202 #my $tot = $response->header("Content-Length") || 0; 203 #$response->header("X-Content-Range", "bytes 0-$content_size/$tot"); 204 return 0; # EOF (kind of) 205 } 206 } 207 elsif (!ref($arg)) { 208 # Mmmh. Could this take so long that we want to use alarm here? 209 my $file_open; 210 if (defined ($entry->content_size) and ($entry->content_size > 0)) { 211 $file_open = open(OUT, ">>$arg"); # we already have data: append 212 } else { 213 $file_open = open(OUT, ">$arg"); # no content received: open new 214 } 215 unless ( $file_open ) { 216 $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR); 217 $response->message("Cannot write to '$arg': $!"); 218 return; # undef means error 219 } 220 binmode(OUT); 221 local($\) = ""; # ensure standard $OUTPUT_RECORD_SEPARATOR 222 if ($parser) { 223 $parser->parse($$content) or undef($parser); 224 } 225 LWP::Debug::debug("[FILE] read " . length($$content) . " bytes"); 226 print OUT $$content; 227 $content_size += length($$content); 228 $entry->content_size($content_size); # update persistant size counter 229 close(OUT); 230 if (defined($max_size) && $content_size > $max_size) { 231 LWP::Debug::debug("Aborting because size limit exceeded"); 232 $response->push_header("Client-Aborted", "max_size"); 233 #my $tot = $response->header("Content-Length") || 0; 234 #$response->header("X-Content-Range", "bytes 0-$content_size/$tot"); 235 return 0; 236 } 237 } 238 elsif (ref($arg) eq 'CODE') { 239 # read into callback 240 if ($parser) { 241 $parser->parse($$content) or undef($parser); 242 } 243 LWP::Debug::debug("[CODE] read " . length($$content) . " bytes"); 244 my $retval; 245 eval { 246 $retval = &$arg($$content, $response, $self, $entry); 247 }; 248 if ($@) { 249 chomp($@); 250 $response->push_header('X-Died' => $@); 251 $response->push_header("Client-Aborted", "die"); 252 } else { 253 # pass return value from callback through to implementor class 254 LWP::Debug::debug("return-code from Callback was '". 255 (defined $retval ? "$retval'" : "[undef]'")); 256 return $retval; 257 } 258 } 259 else { 260 $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR); 261 $response->message("Unexpected collect argument '$arg'"); 262 } 263 return length($$content); # otherwise return size of content processed 264} 265 266=item $prot->receive_once($arg, $response, $content, $entry) 267 268Can be called when the whole response content is available as 269$content. This will invoke receive() with a collector callback that 270returns a reference to $content the first time and an empty string the 271next. 272 273=cut 274 275sub receive_once { 276 my ($self, $arg, $response, $content, $entry) = @_; 277 278 # read once 279 my $retval = $self->receive($arg, $response, \$content, $entry); 280 281 # and immediately simulate EOF 282 my $no_content = ''; 283 $retval = $self->receive($arg, $response, \$no_content, $entry) 284 unless $retval; 285 286 return (defined $retval? $retval : 0); 287} 288 2891; 290 291=head1 SEE ALSO 292 293Inspect the F<LWP/Parallel/Protocol/http.pm> file for examples of usage. 294 295=head1 COPYRIGHT 296 297Copyright 1997-2004 Marc Langheinrich E<lt>marclang@cpan.org> 298Parts copyright 1995-2004 Gisle Aas 299 300This library is free software; you can redistribute it and/or modify 301it under the same terms as Perl itself. 302 303=cut 304 305 306