1# -*- perl -*- 2# $Id: http.pm,v 1.13 2003/03/11 16:49:35 langhein Exp $ 3# derived from: http10.pm,v 1.1 2001/10/26 17:27:19 gisle Exp $ 4 5package LWP::Parallel::Protocol::http; 6 7use strict; 8 9require LWP::Debug; 10require HTTP::Response; 11require HTTP::Status; 12require Net::HTTP; 13require IO::Socket; 14require IO::Select; 15use Carp (); 16 17use vars qw(@ISA @EXTRA_SOCK_OPTS); 18 19require LWP::Parallel::Protocol; 20require LWP::Protocol::http; # until i figure out gisle's http1.1 stuff! 21@ISA = qw(LWP::Parallel::Protocol LWP::Protocol::http); 22 23my $CRLF = "\015\012"; # how lines should be terminated; 24 # "\r\n" is not correct on all systems, for 25 # instance MacPerl defines it to "\012\015" 26 27# The following 4 methods are more or less a simple breakdown of the 28# original $http->request method: 29=item ($socket, $fullpath) = $prot->handle_connect ($req, $proxy, $timeout); 30 31This method connects with the server on the machine and port specified 32in the $req object. If a $proxy is given, it will translate the 33request into an appropriate proxy-request and return the new URL in 34the $fullpath argument. 35 36$socket is either an IO::Socket object (in parallel mode), or a 37LWP::Socket object (when used via Std. non-parallel modules, such as 38LWP::UserAgent) 39 40=cut 41 42sub handle_connect { 43 my ($self, $request, $proxy, $timeout, $nonblock) = @_; 44 45 # check method 46 my $method = $request->method; 47 unless ($method =~ /^[A-Za-z0-9_!\#\$%&\'*+\-.^\`|~]+$/) { # HTTP token 48 return (undef, new HTTP::Response &HTTP::Status::RC_BAD_REQUEST, 49 'Library does not allow method ' . 50 "$method for 'http:' URLs"); 51 } 52 53 my $url = $request->url; 54 my($host, $port, $fullpath) = $self->get_address ($proxy, $url, $method); 55 56 # connect to remote site 57 my $socket = $self->_connect ($host, $port, $timeout, $nonblock); 58 59# LWP::Debug::debug("Socket is $socket"); 60 61# get LINGER get it! 62# my $data = $socket->sockopt(13); #define SO_LINGER = 13 63# my @a_data = unpack ("ii",$data); 64# $a_data[0] = 1; $a_data[1] = 0; 65# $data = pack ("ii",@a_data); 66# 67# $socket->sockopt(13, $data); #define SO_LINGER = 13 68# my $newdata = $socket->sockopt(13); #define SO_LINGER = 13 69# @a_data = unpack ("ii",$newdata); 70# 71# print "Socket $socket: SO_LINGER (", $a_data[0],", ",$a_data[1],")\n"; 72# got Linger got it! 73 74 75 ($socket, $fullpath); 76} 77 78sub get_address { 79 my ($self, $proxy, $url,$method) = @_; 80 my($host, $port, $fullpath); 81 82 # Check if we're proxy'ing 83 if (defined $proxy) { 84 # $proxy is an URL to an HTTP server which will proxy this request 85 $host = $proxy->host; 86 $port = $proxy->port; 87 $fullpath = $method && ($method eq "CONNECT") ? 88 ($url->host . ":" . $url->port) : 89 $url->as_string; 90 } 91 else { 92 $host = $url->host; 93 $port = $url->port; 94 $fullpath = $url->path_query; 95 $fullpath = "/" unless length $fullpath; 96 } 97 ($host, $port, $fullpath); 98} 99 100sub _connect { # renamed to make clear that this is private sub 101 my ($self, $host, $port, $timeout, $nonblock) = @_; 102 my ($socket); 103 unless ($nonblock) { 104 # perform good ol' blocking behavior 105 # 106 # this method inherited from LWP::Protocol::http 107 $socket = $self->_new_socket($host, $port, $timeout); 108 # currently empty function in LWP::Protocol::http 109 # $self->_check_sock($request, $socket); 110 } else { 111 # new non-blocking behavior 112 # 113 # thanks to http://www.en-directo.net/mail/kirill.html 114 use Socket(); 115 use POSIX(); 116 $socket = 117 IO::Socket::INET->new(Proto => 'tcp', # Timeout => $timeout, 118 $self->_extra_sock_opts ($host, $port)); 119 120 die "Can't create socket for $host:$port ($@)" unless $socket; 121 unless ( defined $socket->blocking (0) ) 122 { 123 # IO::Handle::blocking doesn't (yet?) work on Win32 (ActiveState port) 124 # The following happens to work though. 125 # See also: perlport manpage, POE::Kernel, POE::Wheel::SocketFactory, 126 # Winsock2.h 127 if ( $^O eq 'MSWin32' ) 128 { 129 my $set_it = "1"; 130 my $ioctl_val = 0x80000000 | (4 << 16) | (ord('f') << 8) | 126; 131 $ioctl_val = ioctl ($socket, $ioctl_val, $set_it); 132# warn 'Win32 ioctl returned ' . (defined $ioctl_val ? $ioctl_val : '[undef]') . "\n"; 133# warn "Win32 ioctlsocket failed\n" unless $ioctl_val; 134 } 135 } 136 my $rhost = Socket::inet_aton ($host); 137 die "Bad hostname $host" unless defined $rhost; 138 unless ( $socket->connect ($port, $rhost) ) 139 { 140 my $err = $! + 0; 141 # More trouble with ActiveState: EINPROGRESS and EWOULDBLOCK 142 # are missing from POSIX.pm. See Microsoft's Winsock2.h 143 my ($einprogress, $ewouldblock) = $^O eq 'MSWin32' ? 144 (10036, 10035) : (POSIX::EINPROGRESS(), POSIX::EWOULDBLOCK()); 145 die "Can't connect to $host:$port ($@)" 146 if $err and $err != $einprogress and $err != $ewouldblock; 147 } 148 } 149 LWP::Debug::debug("Socket is $socket"); 150 $socket; 151} 152 153sub write_request { 154 my ($self, $request, $socket, $fullpath, $arg, $timeout, $proxy) = @_; 155 156 my $method = $request->method; 157 my $url = $request->url; 158 159 LWP::Debug::trace ("write_request (". 160 (defined $request ? $request : '[undef]'). 161 ", ". (defined $socket ? $socket : '[undef]'). 162 ", ". (defined $fullpath ? $fullpath : '[undef]'). 163 ", ". (defined $arg ? $arg : '[undef]'). 164 ", ". (defined $timeout ? $timeout : '[undef]'). 165 ", ". (defined $proxy ? $proxy : '[undef]'). ")"); 166 167 my $sel = IO::Select->new($socket) if $timeout; 168 169 my $request_line = "$method $fullpath HTTP/1.0$CRLF"; 170 171 my $h = $request->headers->clone; 172 my $cont_ref = $request->content_ref; 173 $cont_ref = $$cont_ref if ref($$cont_ref); 174 my $ctype = ref($cont_ref); 175 176 # If we're sending content we *have* to specify a content length 177 # otherwise the server won't know a messagebody is coming. 178 if ($ctype eq 'CODE') { 179 die 'No Content-Length header for request with dynamic content' 180 unless defined($h->header('Content-Length')) || 181 $h->content_type =~ /^multipart\//; 182 # For HTTP/1.1 we could have used chunked transfer encoding... 183 } 184 else { 185 $h->header('Content-Length' => length $$cont_ref) 186 if defined($$cont_ref) && length($$cont_ref); 187 } 188 189 $self->_fixup_header($h, $url, $proxy); 190 191 my $buf = $request_line . $h->as_string($CRLF) . $CRLF; 192 my $n; # used for return value from syswrite/sysread 193 my $length; 194 my $offset; 195 196 # die's will be caught if user specified "use_eval". 197 198 # syswrite $buf 199 $length = length($buf); 200 $offset = 0; 201 while ( $offset < $length ) { 202 die "write timeout" if $timeout && !$sel->can_write($timeout); 203 $n = $socket->syswrite($buf, $length-$offset, $offset ); 204 die $! unless defined($n); 205 $offset += $n; 206 } 207 208 LWP::Debug::conns($buf); 209 210 if ($ctype eq 'CODE') { 211 while ( ($buf = &$cont_ref()), defined($buf) && length($buf)) { 212 # syswrite $buf 213 $length = length($buf); 214 $offset = 0; 215 while ( $offset < $length ) { 216 die "write timeout" if $timeout && !$sel->can_write($timeout); 217 $n = $socket->syswrite($buf, $length-$offset, $offset ); 218 die $! unless defined($n); 219 $offset += $n; 220 } 221 LWP::Debug::conns($buf); 222 } 223 } 224 elsif (defined($$cont_ref) && length($$cont_ref)) { 225 # syswrite $$cont_ref 226 $length = length($$cont_ref); 227 $offset = 0; 228 while ( $offset < $length ) { 229 die "write timeout" if $timeout && !$sel->can_write($timeout); 230 $n = $socket->syswrite($$cont_ref, $length-$offset, $offset ); 231 die $! unless defined($n); 232 $offset += $n; 233 } 234 LWP::Debug::conns($buf); 235 } 236 237 # For an HTTP request, the 'command' socket is the same as the 238 # 'listen' socket, so we just return the socket here. 239 # (In the ftp module, we usually have one socket being the command 240 # socket, and another one being the read socket, so that's why we 241 # have this overhead here) 242 return $socket; 243} 244 245# whereas 'handle_connect' (with its submethods 'get_address' and 246# 'connect') and 'write_request' mainly just encapsulate different 247# parts of the old http->request method, 'read_chunk' has an added 248# level of complexity. This is because we have to be content with 249# whatever data is available, and somehow 'save' our current state 250# between multiple calls. 251 252# To faciliate things later, when we need redirects and 253# authentication, we insist that we _always_ have a response object 254# available, which is generated outside and initialized with bogus 255# data (code = 0). Also, we can then save ourselves the trouble of 256# using a call-by-variable for $response in order to return a freshly 257# generated $response-object. 258 259# We have to provide IO::Socket-objects with a pushback mechanism, 260# which comes pretty handy in case we can't use all the information read 261# so far. Instead of changing the IO::Socket code, we just have our own 262# little pushback buffer, $pushback, indexed by $socket object here. 263 264my %pushback; 265 266sub read_chunk { 267 my ($self, $response, $socket, $request, $arg, $size, 268 $timeout, $entry) = @_; 269 270 LWP::Debug::trace ("read_chunk (". 271 (defined $response ? $response : '[undef]'). 272 ", ". (defined $socket ? $socket : '[undef]'). 273 ", ". (defined $request ? $request : '[undef]'). 274 ", ". (defined $arg ? $arg : '[undef]'). 275 ", ". (defined $size ? $size : '[undef]'). 276 ", ". (defined $timeout ? $timeout : '[undef]'). 277 ", ". (defined $entry ? $entry : '[undef]'). ")"); 278 279 # hack! Can we just generate a new Select object here? Or do we 280 # have to take the one we created in &write_request?!? 281 my $sel = IO::Select->new($socket) if $timeout; 282 283 LWP::Debug::debug('reading response ('. 284 (defined($pushback{$socket})?length($pushback{$socket}):0) .' buffered)'); 285 286 my $buf = ""; 287 # read one chunk at a time from $socket 288 289 if ( $timeout && !$sel->can_read($timeout) ) { 290 $response->message("Read Timeout"); 291 $response->code(&HTTP::Status::RC_REQUEST_TIMEOUT); 292 $response->request($request); 293 return 0; # EOF 294 }; 295 my $n = $socket->sysread($buf, $size, length($buf)); 296 unless (defined ($n)) { 297 $response->message("Sysread Error: $!"); 298 $response->code(&HTTP::Status::RC_SERVICE_UNAVAILABLE); 299 $response->request($request); 300 return 0; # EOF 301 }; 302 # need our own EOF detection here 303 unless ( $n ) { 304 unless ($response and $response->code) { 305 $response->message("Unexpected EOF while reading response"); 306 $response->code(&HTTP::Status::RC_BAD_GATEWAY); 307 $response->request($request); 308 return 0; # EOF 309 } 310 } 311 312 # prepend contents of unprocessed buffer content from last read 313 $buf = $pushback{$socket} . $buf if $pushback{$socket}; 314 LWP::Debug::conns("Buffer contents between dashes -->\n==========\n$buf=========="); 315 316 # determine Protocol type and create response object 317 unless ($response and $response->code) { 318 if ($buf =~ s/^(HTTP\/\d+\.\d+)[ \t]+(\d+)[ \t]*([^\012]*)\012//) { #1.39 319 # HTTP/1.0 response or better 320 my($ver,$code,$msg) = ($1, $2, $3); 321 $msg =~ s/\015$//; 322 LWP::Debug::debug("Identified HTTP Protocol: $ver $code $msg"); 323 $response->code($code); 324 $response->message($msg); 325 $response->protocol($ver); 326 # store $request info in $response object 327 $response->request($request); 328 } 329 elsif ((length($buf) >= 5 and $buf !~ /^HTTP\//) or 330 $buf =~ /\012/ ) { 331 # HTTP/0.9 or worse 332 LWP::Debug::debug("HTTP/0.9 assume OK"); 333 $response->code(&HTTP::Status::RC_OK); 334 $response->message("OK"); 335 $response->protocol('HTTP/0.9'); 336 # store $request info in $response object 337 $response->request($request); 338 } 339 else { 340 # need more data 341 LWP::Debug::debug("need more data to know which protocol"); 342 } 343 } 344 345 # if we have a protocol, read headers if neccessary 346 if ( $response && !&headers($response) ) { 347 # ensure that we have read all headers. The headers will be 348 # terminated by two blank lines 349 unless ($buf =~ /^\015?\012/ || $buf =~ /\015?\012\015?\012/) { 350 # must read more if we can... 351 LWP::Debug::debug("need more data for headers"); 352 } else { 353 # now we start parsing the headers. The strategy is to 354 # remove one line at a time from the beginning of the header 355 # buffer ($buf). 356 my($key, $val); 357 358 while ($buf =~ s/([^\012]*)\012//) { 359 my $line = $1; 360 361 # if we need to restore as content when illegal headers 362 # are found. 363 my $save = "$line\012"; 364 365 $line =~ s/\015$//; 366 last unless length $line; 367 368 if ($line =~ /^([a-zA-Z0-9_\-.]+)\s*:\s*(.*)/) { 369 $response->push_header($key, $val) if $key; 370 ($key, $val) = ($1, $2); 371 } elsif ($line =~ /^\s+(.*)/ && $key) { 372 $val .= " $1"; 373 } else { 374 $response->push_header("Client-Bad-Header-Line" => 375 $line); 376 } 377 } 378 $response->push_header($key, $val) if $key; 379 380 # check to see if we have any header at all 381 unless (&headers($response)) { 382 # we need at least one header to go on 383 LWP::Debug::debug("no headers found, inserting Client-Date"); 384 $response->header ("Client-Date" => 385 HTTP::Date::time2str(time)); 386 } 387 } # of if then else 388 } # of if $response 389 390 # if we have both a response AND the headers, start parsing the rest 391 if ( $response && &headers($response) && length($buf)) { 392 $self->_get_sock_info($response, $socket); 393 # the CONNECT method does not need to read content 394 if ($request->method eq "CONNECT") { # from LWP 5.48's Protocol/http.pm 395 $response->{client_socket} = $socket; # so it can be picked up 396 } 397 else { 398 # all other methods want to read content, I guess... 399 # Note that we can't use $self->collect, since we don't want to give 400 # up control (by letting Protocol::collect use a $collector callback) 401 if (my @te = $response->remove_header('Transfer-Encoding')) { 402 $response->push_header('Client-Transfer-Encoding', \@te); 403 } 404 my $retval = $self->receive($arg, $response, \$buf, $entry); 405 # update pushback buffer (receive handles _all_ of current buffer) 406 $pushback{$socket} = ''; 407 # return length of response read (or value of $retval, if any, which 408 # could be one of C_LASTCON, C_ENDCON, or C_ENDALL) 409 return (defined $retval? $retval : length($buf)); 410 } 411 } 412 413 $pushback{$socket} = $buf; 414 return $n; 415} 416 417# This function indicates if we have already parsed the headers. In 418# case of HTTP/0.9 we (obviously?!) don't have any (which means that 419# we already 'parsed' them, so return 'true' no matter what) 420 421sub headers { 422 my ($response) = @_; 423 424 return 1 if $response->protocol eq 'HTTP/0.9'; 425 426 ($response->headers_as_string ? 1 : 0); 427} 428 429sub close_connection { 430 my ($self, $response, $listen_socket, $request, $cmd_socket) = @_; 431# print "Closing socket $listen_socket\n"; 432# $listen_socket->close; 433# $cmd_socket->close; 434} 435 436# the old (single request) frontend, defunct. 437sub request { 438 die "LWP::Parallel::Protocol::http does not support single requests\n"; 439} 440 441 442#----------------------------------------------------------- 443# copied from LWP::Protocol::http (v1.63 in LWP5.64) 444#----------------------------------------------------------- 445package LWP::Parallel::Protocol::http::SocketMethods; 446 447sub sysread { 448 my $self = shift; 449 if (my $timeout = ${*$self}{io_socket_timeout}) { 450 die "read timeout" unless $self->can_read($timeout); 451 } 452 else { 453 # since we have made the socket non-blocking we 454 # use select to wait for some data to arrive 455 $self->can_read(undef) || die "Assert"; 456 } 457 sysread($self, $_[0], $_[1], $_[2] || 0); 458} 459 460sub can_read { 461 my($self, $timeout) = @_; 462 my $fbits = ''; 463 vec($fbits, fileno($self), 1) = 1; 464 my $nfound = select($fbits, undef, undef, $timeout); 465 die "select failed: $!" unless defined $nfound; 466 return $nfound > 0; 467} 468 469sub ping { 470 my $self = shift; 471 !$self->can_read(0); 472} 473 474sub increment_response_count { 475 my $self = shift; 476 return ++${*$self}{'myhttp_response_count'}; 477} 478 479#----------------------------------------------------------- 480package LWP::Parallel::Protocol::http::Socket; 481use vars qw(@ISA); 482@ISA = qw(LWP::Parallel::Protocol::http::SocketMethods Net::HTTP); 483 484#----------------------------------------------------------- 485# ^^^ copied from LWP::Protocol::http (v1.63 in LWP5.64) 486#----------------------------------------------------------- 487 488 4891; 490