1# -*- perl -*-
2# $Id: http.pm,v 1.13 2003/03/11 16:49:35 langhein Exp $
3# derived from: http10.pm,v 1.1 2001/10/26 17:27:19 gisle Exp $
4
5package LWP::Parallel::Protocol::http;
6
7use strict;
8
9require LWP::Debug;
10require HTTP::Response;
11require HTTP::Status;
12require Net::HTTP;
13require IO::Socket;
14require IO::Select;
15use Carp ();
16
17use vars qw(@ISA @EXTRA_SOCK_OPTS);
18
19require LWP::Parallel::Protocol;
20require LWP::Protocol::http; # until i figure out gisle's http1.1 stuff!
21@ISA = qw(LWP::Parallel::Protocol LWP::Protocol::http);
22
23my $CRLF         = "\015\012";     # how lines should be terminated;
24				   # "\r\n" is not correct on all systems, for
25				   # instance MacPerl defines it to "\012\015"
26
27# The following 4 methods are more or less a simple breakdown of the
28# original $http->request method:
29=item ($socket, $fullpath) = $prot->handle_connect ($req, $proxy, $timeout);
30
31This method connects with the server on the machine and port specified
32in the $req object. If a $proxy is given, it will translate the
33request into an appropriate proxy-request and return the new URL in
34the $fullpath argument.
35
36$socket is either an IO::Socket object (in parallel mode), or a
37LWP::Socket object (when used via Std. non-parallel modules, such as
38LWP::UserAgent)
39
40=cut
41
42sub handle_connect {
43    my ($self, $request, $proxy, $timeout, $nonblock) = @_;
44
45    # check method
46    my $method = $request->method;
47    unless ($method =~ /^[A-Za-z0-9_!\#\$%&\'*+\-.^\`|~]+$/) {  # HTTP token
48	return (undef, new HTTP::Response &HTTP::Status::RC_BAD_REQUEST,
49				  'Library does not allow method ' .
50				  "$method for 'http:' URLs");
51    }
52
53    my $url = $request->url;
54    my($host, $port, $fullpath) = $self->get_address ($proxy, $url, $method);
55
56   # connect to remote site
57    my $socket = $self->_connect ($host, $port, $timeout, $nonblock);
58
59#  LWP::Debug::debug("Socket is $socket");
60
61# get LINGER get it!
62#    my $data = $socket->sockopt(13);  #define SO_LINGER = 13
63#    my @a_data = unpack ("ii",$data);
64#    $a_data[0] = 1; $a_data[1] = 0;
65#    $data = pack ("ii",@a_data);
66#
67#    $socket->sockopt(13, $data);  #define SO_LINGER = 13
68#    my $newdata = $socket->sockopt(13);  #define SO_LINGER = 13
69#    @a_data = unpack ("ii",$newdata);
70#
71#    print "Socket $socket: SO_LINGER (", $a_data[0],", ",$a_data[1],")\n";
72# got Linger got it!
73
74
75    ($socket, $fullpath);
76}
77
78sub get_address {
79    my ($self, $proxy, $url,$method) = @_;
80    my($host, $port, $fullpath);
81
82    # Check if we're proxy'ing
83    if (defined $proxy) {
84	# $proxy is an URL to an HTTP server which will proxy this request
85	$host = $proxy->host;
86	$port = $proxy->port;
87	$fullpath = $method && ($method eq "CONNECT") ?
88                    ($url->host . ":" . $url->port) :
89                     $url->as_string;
90    }
91    else {
92	$host = $url->host;
93	$port = $url->port;
94	$fullpath = $url->path_query;
95	$fullpath = "/" unless length $fullpath;
96    }
97    ($host, $port, $fullpath);
98}
99
100sub _connect { # renamed to make clear that this is private sub
101    my ($self, $host, $port, $timeout, $nonblock) = @_;
102    my ($socket);
103    unless ($nonblock) {
104      # perform good ol' blocking behavior
105      #
106      # this method inherited from LWP::Protocol::http
107      $socket = $self->_new_socket($host, $port, $timeout);
108      # currently empty function in LWP::Protocol::http
109      # $self->_check_sock($request, $socket);
110    } else {
111      # new non-blocking behavior
112      #
113      # thanks to http://www.en-directo.net/mail/kirill.html
114      use Socket();
115      use POSIX();
116      $socket =
117        IO::Socket::INET->new(Proto => 'tcp', # Timeout => $timeout,
118	                      $self->_extra_sock_opts ($host, $port));
119
120      die "Can't create socket for $host:$port ($@)" unless $socket;
121      unless ( defined $socket->blocking (0) )
122      {
123	# IO::Handle::blocking doesn't (yet?) work on Win32 (ActiveState port)
124	# The following happens to work though.
125	# See also: perlport manpage, POE::Kernel, POE::Wheel::SocketFactory,
126	#   Winsock2.h
127	if ( $^O eq 'MSWin32' )
128	{
129	  my $set_it = "1";
130	  my $ioctl_val = 0x80000000 | (4 << 16) | (ord('f') << 8) | 126;
131  	  $ioctl_val = ioctl ($socket, $ioctl_val, $set_it);
132#	warn 'Win32 ioctl returned ' . (defined $ioctl_val ? $ioctl_val : '[undef]') . "\n";
133#	warn "Win32 ioctlsocket failed\n" unless $ioctl_val;
134	}
135      }
136      my $rhost = Socket::inet_aton ($host);
137      die "Bad hostname $host" unless defined $rhost;
138      unless ( $socket->connect ($port, $rhost) )
139      {
140	my $err = $! + 0;
141	# More trouble with ActiveState: EINPROGRESS and EWOULDBLOCK
142	# are missing from POSIX.pm. See Microsoft's Winsock2.h
143	my ($einprogress, $ewouldblock) = $^O eq 'MSWin32' ?
144		(10036, 10035) : (POSIX::EINPROGRESS(), POSIX::EWOULDBLOCK());
145	die "Can't connect to $host:$port ($@)"
146		if $err and $err != $einprogress and $err != $ewouldblock;
147      }
148    }
149    LWP::Debug::debug("Socket is $socket");
150    $socket;
151}
152
153sub write_request {
154  my ($self, $request, $socket, $fullpath, $arg, $timeout, $proxy) = @_;
155
156  my $method = $request->method;
157  my $url    = $request->url;
158
159 LWP::Debug::trace ("write_request (".
160		    (defined $request ? $request : '[undef]').
161		    ", ". (defined $socket ? $socket : '[undef]').
162		    ", ". (defined $fullpath ? $fullpath : '[undef]').
163		    ", ". (defined $arg ? $arg : '[undef]').
164		    ", ". (defined $timeout ? $timeout : '[undef]').
165		    ", ". (defined $proxy ? $proxy : '[undef]'). ")");
166
167  my $sel = IO::Select->new($socket) if $timeout;
168
169  my $request_line = "$method $fullpath HTTP/1.0$CRLF";
170
171  my $h = $request->headers->clone;
172  my $cont_ref = $request->content_ref;
173  $cont_ref = $$cont_ref if ref($$cont_ref);
174  my $ctype = ref($cont_ref);
175
176  # If we're sending content we *have* to specify a content length
177  # otherwise the server won't know a messagebody is coming.
178  if ($ctype eq 'CODE') {
179    die 'No Content-Length header for request with dynamic content'
180      unless defined($h->header('Content-Length')) ||
181	$h->content_type =~ /^multipart\//;
182    # For HTTP/1.1 we could have used chunked transfer encoding...
183  }
184  else {
185    $h->header('Content-Length' => length $$cont_ref)
186      if defined($$cont_ref) && length($$cont_ref);
187  }
188
189  $self->_fixup_header($h, $url, $proxy);
190
191  my $buf = $request_line . $h->as_string($CRLF) . $CRLF;
192  my $n;  # used for return value from syswrite/sysread
193  my $length;
194  my $offset;
195
196  # die's will be caught if user specified "use_eval".
197
198  # syswrite $buf
199  $length = length($buf);
200  $offset = 0;
201  while ( $offset < $length ) {
202	die "write timeout" if $timeout && !$sel->can_write($timeout);
203	$n = $socket->syswrite($buf, $length-$offset, $offset );
204	die $! unless defined($n);
205	$offset += $n;
206  }
207
208  LWP::Debug::conns($buf);
209
210  if ($ctype eq 'CODE') {
211    while ( ($buf = &$cont_ref()), defined($buf) && length($buf)) {
212      # syswrite $buf
213      $length = length($buf);
214      $offset = 0;
215      while ( $offset < $length ) {
216	die "write timeout" if $timeout && !$sel->can_write($timeout);
217	$n = $socket->syswrite($buf, $length-$offset, $offset );
218	die $! unless defined($n);
219	$offset += $n;
220      }
221      LWP::Debug::conns($buf);
222    }
223  }
224  elsif (defined($$cont_ref) && length($$cont_ref)) {
225    # syswrite $$cont_ref
226    $length = length($$cont_ref);
227    $offset = 0;
228    while ( $offset < $length ) {
229      die "write timeout" if $timeout && !$sel->can_write($timeout);
230      $n = $socket->syswrite($$cont_ref, $length-$offset, $offset );
231      die $! unless defined($n);
232      $offset += $n;
233    }
234    LWP::Debug::conns($buf);
235  }
236
237  # For an HTTP request, the 'command' socket is the same as the
238  # 'listen' socket, so we just return the socket here.
239  # (In the ftp module, we usually have one socket being the command
240  # socket, and another one being the read socket, so that's why we
241  # have this overhead here)
242  return $socket;
243}
244
245# whereas 'handle_connect' (with its submethods 'get_address' and
246# 'connect') and 'write_request' mainly just encapsulate different
247# parts of the old http->request method, 'read_chunk' has an added
248# level of complexity. This is because we have to be content with
249# whatever data is available, and somehow 'save' our current state
250# between multiple calls.
251
252# To faciliate things later, when we need redirects and
253# authentication, we insist that we _always_ have a response object
254# available, which is generated outside and initialized with bogus
255# data (code = 0). Also, we can then save ourselves the trouble of
256# using a call-by-variable for $response in order to return a freshly
257# generated $response-object.
258
259# We have to provide IO::Socket-objects with a pushback mechanism,
260# which comes pretty handy in case we can't use all the information read
261# so far. Instead of changing the IO::Socket code, we just have our own
262# little pushback buffer, $pushback, indexed by $socket object here.
263
264my %pushback;
265
266sub read_chunk {
267  my ($self, $response, $socket, $request, $arg, $size,
268      $timeout, $entry) = @_;
269
270 LWP::Debug::trace ("read_chunk (".
271		    (defined $response ? $response : '[undef]').
272		    ", ". (defined $socket ? $socket : '[undef]').
273		    ", ". (defined $request ? $request : '[undef]').
274		    ", ". (defined $arg ? $arg : '[undef]').
275		    ", ". (defined $size ? $size : '[undef]').
276		    ", ". (defined $timeout ? $timeout : '[undef]').
277		    ", ". (defined $entry ? $entry : '[undef]'). ")");
278
279  # hack! Can we just generate a new Select object here? Or do we
280  # have to take the one we created in &write_request?!?
281  my $sel = IO::Select->new($socket) if $timeout;
282
283  LWP::Debug::debug('reading response ('.
284    (defined($pushback{$socket})?length($pushback{$socket}):0) .' buffered)');
285
286  my $buf = "";
287  # read one chunk at a time from $socket
288
289  if ( $timeout && !$sel->can_read($timeout) ) {
290      $response->message("Read Timeout");
291      $response->code(&HTTP::Status::RC_REQUEST_TIMEOUT);
292      $response->request($request);
293      return 0; # EOF
294  };
295  my $n = $socket->sysread($buf, $size, length($buf));
296  unless (defined ($n)) {
297      $response->message("Sysread Error: $!");
298      $response->code(&HTTP::Status::RC_SERVICE_UNAVAILABLE);
299      $response->request($request);
300      return 0; # EOF
301  };
302  # need our own EOF detection here
303  unless ( $n ) {
304      unless ($response  and  $response->code) {
305	  $response->message("Unexpected EOF while reading response");
306	  $response->code(&HTTP::Status::RC_BAD_GATEWAY);
307	  $response->request($request);
308	  return 0; # EOF
309      }
310  }
311
312  # prepend contents of unprocessed buffer content from last read
313  $buf = $pushback{$socket} . $buf if $pushback{$socket};
314  LWP::Debug::conns("Buffer contents between dashes -->\n==========\n$buf==========");
315
316  # determine Protocol type and create response object
317  unless ($response  and  $response->code) {
318    if ($buf =~ s/^(HTTP\/\d+\.\d+)[ \t]+(\d+)[ \t]*([^\012]*)\012//) { #1.39
319      # HTTP/1.0 response or better
320      my($ver,$code,$msg) = ($1, $2, $3);
321      $msg =~ s/\015$//;
322      LWP::Debug::debug("Identified HTTP Protocol: $ver $code $msg");
323      $response->code($code);
324      $response->message($msg);
325      $response->protocol($ver);
326      # store $request info in $response object
327      $response->request($request);
328    }
329    elsif ((length($buf) >= 5 and $buf !~ /^HTTP\//) or
330	     $buf =~ /\012/ ) {
331      # HTTP/0.9 or worse
332      LWP::Debug::debug("HTTP/0.9 assume OK");
333      $response->code(&HTTP::Status::RC_OK);
334      $response->message("OK");
335      $response->protocol('HTTP/0.9');
336      # store $request info in $response object
337      $response->request($request);
338    }
339    else {
340      # need more data
341      LWP::Debug::debug("need more data to know which protocol");
342    }
343  }
344
345  # if we have a protocol, read headers if neccessary
346  if ( $response && !&headers($response) ) {
347    # ensure that we have read all headers.  The headers will be
348    # terminated by two blank lines
349    unless ($buf =~ /^\015?\012/ || $buf =~ /\015?\012\015?\012/) {
350      # must read more if we can...
351      LWP::Debug::debug("need more data for headers");
352    } else {
353      # now we start parsing the headers.  The strategy is to
354      # remove one line at a time from the beginning of the header
355      # buffer ($buf).
356      my($key, $val);
357
358      while ($buf =~ s/([^\012]*)\012//) {
359	my $line = $1;
360
361	# if we need to restore as content when illegal headers
362	# are found.
363	my $save = "$line\012";
364
365	$line =~ s/\015$//;
366	last unless length $line;
367
368	if ($line =~ /^([a-zA-Z0-9_\-.]+)\s*:\s*(.*)/) {
369	  $response->push_header($key, $val) if $key;
370	  ($key, $val) = ($1, $2);
371	} elsif ($line =~ /^\s+(.*)/ && $key) {
372	  $val .= " $1";
373	} else {
374	    $response->push_header("Client-Bad-Header-Line" =>
375			           $line);
376	}
377      }
378      $response->push_header($key, $val) if $key;
379
380      # check to see if we have any header at all
381      unless (&headers($response)) {
382	# we need at least one header to go on
383        LWP::Debug::debug("no headers found, inserting Client-Date");
384	$response->header ("Client-Date" =>
385			   HTTP::Date::time2str(time));
386      }
387    } # of if then else
388  } # of if $response
389
390  # if we have both a response AND the headers, start parsing the rest
391  if ( $response && &headers($response) && length($buf)) {
392    $self->_get_sock_info($response, $socket);
393    # the CONNECT method does not need to read content
394    if ($request->method eq "CONNECT") { # from LWP 5.48's Protocol/http.pm
395	$response->{client_socket} = $socket;  # so it can be picked up
396    }
397    else {
398      # all other methods want to read content, I guess...
399      # Note that we can't use $self->collect, since we don't want to give
400      # up control (by letting Protocol::collect use a $collector callback)
401      if (my @te = $response->remove_header('Transfer-Encoding')) {
402        $response->push_header('Client-Transfer-Encoding', \@te);
403      }
404      my $retval = $self->receive($arg, $response, \$buf, $entry);
405      # update pushback buffer (receive handles _all_ of current buffer)
406      $pushback{$socket} = '';
407      # return length of response read (or value of $retval, if any, which
408      # could be one of C_LASTCON, C_ENDCON, or C_ENDALL)
409      return (defined $retval? $retval : length($buf));
410    }
411  }
412
413  $pushback{$socket} = $buf;
414  return $n;
415}
416
417# This function indicates if we have already parsed the headers.  In
418# case of HTTP/0.9 we (obviously?!) don't have any (which means that
419# we already 'parsed' them, so return 'true' no matter what)
420
421sub headers {
422    my ($response) = @_;
423
424    return 1  if $response->protocol eq 'HTTP/0.9';
425
426    ($response->headers_as_string ? 1 : 0);
427}
428
429sub close_connection {
430  my ($self, $response, $listen_socket, $request, $cmd_socket) = @_;
431#  print "Closing socket $listen_socket\n";
432#  $listen_socket->close;
433#  $cmd_socket->close;
434}
435
436# the old (single request) frontend, defunct.
437sub request {
438    die "LWP::Parallel::Protocol::http does not support single requests\n";
439}
440
441
442#-----------------------------------------------------------
443# copied from LWP::Protocol::http (v1.63 in LWP5.64)
444#-----------------------------------------------------------
445package LWP::Parallel::Protocol::http::SocketMethods;
446
447sub sysread {
448    my $self = shift;
449    if (my $timeout = ${*$self}{io_socket_timeout}) {
450	die "read timeout" unless $self->can_read($timeout);
451    }
452    else {
453	# since we have made the socket non-blocking we
454	# use select to wait for some data to arrive
455	$self->can_read(undef) || die "Assert";
456    }
457    sysread($self, $_[0], $_[1], $_[2] || 0);
458}
459
460sub can_read {
461    my($self, $timeout) = @_;
462    my $fbits = '';
463    vec($fbits, fileno($self), 1) = 1;
464    my $nfound = select($fbits, undef, undef, $timeout);
465    die "select failed: $!" unless defined $nfound;
466    return $nfound > 0;
467}
468
469sub ping {
470    my $self = shift;
471    !$self->can_read(0);
472}
473
474sub increment_response_count {
475    my $self = shift;
476    return ++${*$self}{'myhttp_response_count'};
477}
478
479#-----------------------------------------------------------
480package LWP::Parallel::Protocol::http::Socket;
481use vars qw(@ISA);
482@ISA = qw(LWP::Parallel::Protocol::http::SocketMethods Net::HTTP);
483
484#-----------------------------------------------------------
485# ^^^ copied from LWP::Protocol::http (v1.63 in LWP5.64)
486#-----------------------------------------------------------
487
488
4891;
490