1# -*- perl -*- 2# $Id: UserAgent.pm,v 1.31 2004/02/10 15:19:19 langhein Exp $ 3# derived from: UserAgent.pm,v 2.1 2001/12/11 21:11:29 gisle Exp $ 4# and: ParallelUA.pm,v 1.16 1997/07/23 16:45:09 ahoy Exp $ 5 6package LWP::Parallel::UserAgent::Entry; 7 8require 5.004; 9use Carp(); 10 11# allowed fields in Parallel::UserAgent entry 12my %fields = ( 13 arg => undef, 14 fullpath => undef, 15 protocol => undef, 16 proxy => undef, 17 redirect_ok => undef, 18 response => undef, 19 request => undef, 20 size => undef, 21 cmd_socket => undef, 22 listen_socket => undef, 23 content_size => undef, 24 ); 25 26sub new { 27 my($class, $init) = @_; 28 29 my $self = { 30 _permitted => \%fields, 31 %fields, 32 }; 33 $self = bless $self, $class; 34 35 if ($init) { 36 foreach (keys %$init) { 37 # call functions and initialize with given values 38 $self->$_($init->{$_}); 39 } 40 } 41 $self; 42} 43 44sub get { 45 my $self = shift; 46 my @answer; 47 my $field; 48 foreach $field (@_) { 49 push (@answer, $self->$field() ); 50 } 51 @answer; 52} 53 54use vars qw($AUTOLOAD); 55 56sub AUTOLOAD { 57 my $self = shift; 58 my $type = ref($self) || die "$self is not an object"; 59 my $name = $AUTOLOAD; 60 $name =~ s/.*://; # strip fully qualified portion 61 unless ( exists $self->{_permitted}->{$name} ) { 62 Carp::croak "Can't access '$name' field in $type object"; 63 } 64 if (@_) { 65 return $self->{$name} = $_[0]; 66 } else { 67 return $self->{$name}; 68 } 69} 70 71sub DESTROY { }; 72 73package LWP::Parallel::UserAgent; 74 75use Exporter(); 76 77$ENV{PERL_LWP_USE_HTTP_1.0} = "Yes"; # until i figure out gisle's http1.1 stuff 78require LWP::Parallel::Protocol; 79require LWP::UserAgent; 80@ISA = qw(LWP::UserAgent Exporter); 81 82@EXPORT = qw(); 83# callback commands 84@EXPORT_OK = qw(C_ENDCON C_ENDALL C_LASTCON); 85%EXPORT_TAGS = (CALLBACK => [qw(C_ENDCON C_ENDALL C_LASTCON)]); 86 87sub C_ENDCON { -1; }; # end current connection (but keep waiting/connecting) 88sub C_LASTCON{ -2; }; # don't start any new connections 89sub C_ENDALL { -3; }; # end all connections and return from 'wait'-method 90 91require HTTP::Request; 92require HTTP::Response; 93 94use Carp (); 95use LWP::Debug (); 96use HTTP::Status (); 97use HTTP::Date qw(time2str); 98use IO::Select; 99use strict; 100 101=head1 NAME 102 103LWP::Parallel::UserAgent - A class for parallel User Agents 104 105=head1 SYNOPSIS 106 107 require LWP::Parallel::UserAgent; 108 $ua = LWP::Parallel::UserAgent->new(); 109 ... 110 111 $ua->redirect (0); # prevents automatic following of redirects 112 $ua->max_hosts(5); # sets maximum number of locations accessed in parallel 113 $ua->max_req (5); # sets maximum number of parallel requests per host 114 ... 115 $ua->register ($request); # or 116 $ua->register ($request, '/tmp/sss'); # or 117 $ua->register ($request, \&callback, 4096); 118 ... 119 $ua->wait ( $timeout ); 120 ... 121 sub callback { my($data, $response, $protocol) = @_; .... } 122 123=head1 DESCRIPTION 124 125This class implements a user agent that access web sources in parallel. 126 127Using a I<LWP::Parallel::UserAgent> as your user agent, you typically start by 128registering your requests, along with how you want the Agent to process 129the incoming results (see $ua->register). 130 131Then you wait for the results by calling $ua->wait. This method only 132returns, if all requests have returned an answer, or the Agent timed 133out. Also, individual callback functions might indicate that the 134Agent should stop waiting for requests and return. (see $ua->register) 135 136See the file L<LWP::Parallel> for a set of simple examples. 137 138=head1 METHODS 139 140The LWP::Parallel::UserAgent is a sub-class of LWP::UserAgent, but not all 141of its methods are available here. However, you can use its main 142methods, $ua->simple_request and $ua->request, in order to simulate 143singular access with this package. Of course, if a single request is all 144you need, then you should probably use LWP::UserAgent in the first place, 145since it will be faster than our emulation here. 146 147For parallel access, you will need to use the new methods that come with 148LWP::Parallel::UserAgent, called $pua->register and $pua->wait. See below 149for more information on each method. 150 151=over 4 152 153=cut 154 155 156# 157# Additional attributes in addition to those found in LWP::UserAgent: 158# 159# $self->{'entries_by_sockets'} = {} Associative Array of registered 160# requests, indexed via sockets 161# 162# $self->{'entries_by_requests'} = {} Associative Array of registered 163# requests, indexed via requests 164# 165 166=item $ua = LWP::Parallel::UserAgent->new(); 167 168Constructor for the parallel UserAgent. Returns a reference to a 169LWP::Parallel::UserAgent object. 170 171Optionally, you can give it an existing LWP::Parallel::UserAgent (or 172even an LWP::UserAgent) as a first argument, and it will "clone" a 173new one from this (This just copies the behavior of LWP::UserAgent. 174I have never actually tried this, so let me know if this does not do 175what you want). 176 177=cut 178 179sub new { 180 my($class,$init) = @_; 181 182 # my $self = new LWP::UserAgent $init; 183 my $self = new LWP::UserAgent; # thanks to Kirill 184 $self = bless $self, $class; 185 186 # handle responses per default 187 $self->{'handle_response'} = 1; 188 # do not perform nonblocking connects per default 189 $self->{'nonblock'} = 0; 190 # don't handle duplicates per default 191 $self->{'handle_duplicates'} = 0; 192 # do not use ordered lists per default 193 $self->{'handle_in_order'} = 0; 194 # do not cache failed connection attempts 195 $self->{'remember_failures'} = 0; 196 197 # supply defaults 198 $self->{'max_hosts'} = 7; 199 $self->{'max_req'} = 5; 200 201 $self->initialize; 202} 203 204=item $ua->initialize; 205 206Takes no arguments and initializes the UserAgent. It is automatically 207called in LWP::Parallel::UserAgent::new, so usually there is no need to 208call this explicitly. 209 210However, if you want to re-use the same UserAgent object for a number 211of "runs", you should call $ua->initialize after you have processed the 212results of the previous call to $ua->wait, but before registering any 213new requests. 214 215=cut 216 217 218sub initialize { 219 my $self = shift; 220 221 # list of entries 222 $self->{'entries_by_sockets'} = {}; 223 $self->{'entries_by_requests'} = {}; 224 225 $self->{'previous_requests'} = {}; 226 227 # connection handling 228 $self->{'current_connections'} = {}; # hash 229 $self->{'pending_connections'} = {}; # hash (of [] arrays) 230 $self->{'ordpend_connections'} = []; # array 231 $self->{'failed_connections'} = {}; # hash 232 233 # duplicates 234 $self->{'seen_request'} = {}; 235 236 # select objects for reading & writing 237 $self->{'select_in'} = IO::Select->new(); 238 $self->{'select_out'} = IO::Select->new(); 239 240 $self; 241} 242 243=item $ua->redirect ( $ok ) 244 245Changes the default value for permitting Parallel::UserAgent to follow 246redirects and authentication-requests. The standard value is 'true'. 247 248See C<$ua->register> for how to change the behaviour for particular 249requests only. 250 251=cut 252 253sub redirect { 254 my $self = shift; 255 LWP::Debug::trace("($_[0])"); 256 $self->{'handle_response'} = $_[0] if defined $_[0]; 257} 258 259=item $ua->nonblock ( $ok ) 260 261Per default, LWP::Parallel will connect to a site using a blocking call. If 262you want to speed this step up, you can try the new non-blocking version of 263the connect call by setting $ua->nonblock to 'true'. 264The standard value is 'false' (although this might change in the future if 265nonblocking connects turn out to be stable enough.) 266 267=cut 268 269sub nonblock { 270 my $self = shift; 271 LWP::Debug::trace("($_[0])"); 272 $self->{'nonblock'} = $_[0] if defined $_[0]; 273} 274 275 276=item $ua->duplicates ( $ok ) 277 278Changes the default value for permitting Parallel::UserAgent to ignore 279duplicate requests. The standard value is 'false'. 280 281=cut 282 283sub duplicates { 284 my $self = shift; 285 LWP::Debug::trace("($_[0])"); 286 $self->{'handle_duplicates'} = $_[0] if defined $_[0]; 287} 288 289=item $ua->in_order ( $ok ) 290 291Changes the default value to restricting Parallel::UserAgent to 292connect to the registered sites in the order they were registered. The 293default value FALSE allows Parallel::UserAgent to make the connections 294in an apparently random order. 295 296=cut 297 298sub in_order { 299 my $self = shift; 300 LWP::Debug::trace("($_[0])"); 301 $self->{'handle_in_order'} = $_[0] if defined $_[0]; 302} 303 304=item $ua->remember_failures ( $yes ) 305 306If set to one, enables ParalleUA to ignore requests or connections to 307sites that it failed to connect to before during this "run". If set to 308zero (the dafault) Parallel::UserAgent will try to connect to every 309single URL you registered, even if it constantly fails to connect to a 310particular site. 311 312=cut 313 314sub remember_failures { 315 my $self = shift; 316 LWP::Debug::trace("($_[0])"); 317 $self->{'remember_failures'} = $_[0] if defined $_[0]; 318} 319 320=item $ua->max_hosts ( $max ) 321 322Changes the maximum number of locations accessed in parallel. The 323default value is 7. 324 325Note: Although it says 'host', it really means 'netloc/server'! That 326is, multiple server on the same host (i.e. one server running on port 32780, the other one on port 6060) will count as two 'hosts'. 328 329=cut 330 331sub max_hosts { 332 my $self = shift; 333 LWP::Debug::trace("($_[0])"); 334 $self->{'max_hosts'} = $_[0] if defined $_[0]; 335} 336 337=item $ua->max_req ( $max ) 338 339Changes the maximum number of requests issued per host in 340parallel. The default value is 5. 341 342=cut 343 344sub max_req { 345 my $self = shift; 346 LWP::Debug::trace("($_[0])"); 347 $self->{'max_req'} = $_[0] if defined $_[0]; 348} 349 350=item $ua->register ( $request [, $arg [, $size [, $redirect_ok]]] ) 351 352Registers the given request with the User Agent. In case of an error, 353a C<HTTP::Request> object containing the HTML-Error message is 354returned. Otherwise (that is, in case of a success) it will return 355undef. 356 357The C<$request> should be a reference to a C<HTTP::Request> object 358with values defined for at least the method() and url() attributes. 359 360C<$size> specifies the number of bytes Parallel::UserAgent should try 361to read each time some new data arrives. Setting it to '0' or 'undef' 362will make Parallel::UserAgent use the default. (8k) 363 364Specifying C<$redirect_ok> will alter the redirection behaviour for 365this particular request only. '1' or any other true value will force 366Parallel::UserAgent to follow redirects, even if the default is set to 367'no_redirect'. (see C<$ua->redirect>) '0' or any other false value 368should do the reverse. See LWP::UserAgent for using an object's 369C<requests_redirectable> list for fine-tuning this behavior. 370 371If C<$arg> is a scalar it is taken as a filename where the content of 372the response is stored. 373 374If C<$arg> is a reference to a subroutine, then this routine is called 375as chunks of the content is received. An optional C<$size> argument 376is taken as a hint for an appropriate chunk size. The callback 377function is called with 3 arguments: the data received this time, a 378reference to the response object and a reference to the protocol 379object. The callback can use the predefined constants C_ENDCON, 380C_LASTCON and C_ENDALL as a return value in order to influence pending 381and active connections. C_ENDCON will end this connection immediately, 382whereas C_LASTCON will inidicate that no further connections should be 383made. C_ENDALL will immediately end all requests and let the 384Parallel::UserAgent return from $pua->wait(). 385 386If C<$arg> is omitted, then the content is stored in the response 387object itself. 388 389If C<$arg> is a C<LWP::Parallel::UserAgent::Entry> object, then this 390request will be registered as a follow-up request to this particular 391entry. This will not create a new entry, but instead link the current 392response (i.e. the reason for re-registering) as $response->previous 393to the new response of this request. All other fields are either 394re-initialized ($request, $fullpath, $proxy) or left untouched ($arg, 395$size). (This should only be use internally) 396 397LWP::Parallel::UserAgent->request also allows the registration of 398follow-up requests to existing requests, that required redirection or 399authentication. In order to do this, an Parallel::UserAgent::Entry 400object will be passed as the second argument to the call. Usually, 401this should not be used directly, but left to the internal 402$ua->handle_response method! 403 404=cut 405 406sub register { 407 my ($self, $request, $arg, $size, $redirect) = @_; 408 my $entry; 409 410 unless (ref($request) and $request->can('url')) { 411 Carp::carp "Can't use '$request' as an HTTP::Request object. Ignoring"; 412 return LWP::UserAgent::_new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, 413 "Unknown request type: '$request'"); 414 } 415 LWP::Debug::debug("(".$request->url->as_string . 416 ", ". (defined $arg ? $arg : '[undef]') . 417 ", ". (defined $size ? $size : '[undef]') . 418 ", ". (defined $redirect ? $redirect : '[undef]') . ")"); 419 420 my($failed_connections,$remember_failures,$handle_duplicates, 421 $previous_requests)= @{$self}{qw(failed_connections 422 remember_failures handle_duplicates previous_requests)}; 423 424 my $response = HTTP::Response->new(0, '<empty response>'); 425 # make sure our request gets stored within the response 426 # (usually this is done automatically by LWP in case of 427 # a successful connection, but we want to have this info 428 # available even when something goes wrong) 429 $response->request($request); 430 431 # so far Parallel::UserAgent can handle http, ftp, and file requests 432 # (anybody volunteering to porting the rest of the protocols?!) 433 unless ( $request->url->scheme eq 'http' or $request->url->scheme eq 'ftp' 434 # https suggestion by <mszabo@coralwave.com> 435 or $request->url->scheme eq 'https' 436 # file scheme implementation by 437 or $request->url->scheme eq 'file' 438 ){ 439 $response->code (&HTTP::Status::RC_NOT_IMPLEMENTED); 440 $response->message ("Unknown Scheme: ". $request->url->scheme); 441 Carp::carp "Parallel::UserAgent can not handle '". $request->url->scheme . 442 "'-requests. Request ignored!"; 443 # simulate immediate response from server 444 $self->on_failure ($request, $response); 445 return $response; 446 } 447 448 my $netloc = $self->_netloc($request->url); 449 450 # check if we already tried to connect to this location, and failed 451 if ( $remember_failures and $failed_connections->{$netloc} ) { 452 $response->code (&HTTP::Status::RC_INTERNAL_SERVER_ERROR); 453 $response->message ("Server unavailable"); 454 # simulate immediate response from server 455 $self->on_failure ($request, $response); 456 return $response; 457 } 458 459 # duplicates handling: check if we connected to same URL before 460 if ($handle_duplicates and $previous_requests->{$request->url->as_string}){ 461 $response->code (&HTTP::Status::RC_INTERNAL_SERVER_ERROR); 462 $response->message ("Duplicate Request: ". $request->url); 463 ## just ignore the request for now. if you want to simulate 464 ## immediate response from server, uncomment this line: 465 # $self->on_failure ($request, $response); 466 return $response; 467 } 468 469 # support two calling techniques: new request or follow-up 470 # 1) follow-up request: 471 if ( ref($arg) and ( ref($arg) eq "LWP::Parallel::UserAgent::Entry") ) { 472 # called with $entry object as first parameter. 473 # re-register new request with same entry: 474 $entry = $arg; 475 # link the previous response to our new response object 476 $response->previous($entry->response); 477 # and update the fields in our entry 478 $entry->request($request); 479 $entry->response($response); 480 # re-registered requests are put first in line (->unshift) 481 # and stored underneath the host they're accessing: 482 # (first make sure we have an array to push things onto) 483 $self->{'pending_connections'}->{$netloc} = [] 484 unless $self->{'pending_connections'}->{$netloc}; 485 unshift (@{$self->{'pending_connections'}->{$netloc}}, $entry); 486 unshift (@{$self->{'ordpend_connections'}}, $entry); 487 488 # 2) new request: 489 } else { 490 # called first time, create new entry object 491 $size ||= 8192; 492 $entry = LWP::Parallel::UserAgent::Entry->new( { 493 request => $request, 494 response => $response, 495 arg => $arg, 496 size => $size, 497 content_size => 0, 498 redirect_ok => $self->{'handle_response'}, 499 } ); 500 # if the user specified 501 $entry->redirect_ok($redirect) if defined $redirect; 502 503 # store new entry by request (only new entries) 504 $self->{'entries_by_requests'}->{$request} = $entry; 505 506 # new requests are put at the end 507 # (first make sure we have an array to push things onto) 508 $self->{'pending_connections'}->{$netloc} = [] 509 unless $self->{'pending_connections'}->{$netloc}; 510 push (@{$self->{'pending_connections'}->{$netloc}}, $entry); 511 push (@{$self->{'ordpend_connections'}}, $entry); 512 } 513 # duplicates handling: remember this entry 514 if ($handle_duplicates) { 515 $previous_requests->{$request->url->as_string} = $entry; 516 } 517 518 return; 519} 520 521# Create a netloc from the url or return an alias netloc for file: proto 522# Fix netloc for file: reqs to generic localhost.file - this can be changed 523# if necessary. Test to ensure url->scheme doesn't return undef (JB) 524sub _netloc { 525 my $self = shift; 526 my $url = shift; 527 528 my $netloc; 529 if ($url->scheme eq 'file') { 530 $netloc = 'localhost.file'; 531 } else { 532 $netloc = $url->host_port; # eg www.cs.washington.edu:8001 533 } 534 $netloc; 535} 536 537 538# this method will take the pending entries one at a time and 539# decide wether we have enough bandwith (as specified by the 540# values in 'max_req' and 'max_hosts') to connect this request. 541# If not, the entry will stay on the stack (w/o changing the 542# order) 543sub _make_connections { 544 my $self = shift; 545 if ($self->{'handle_in_order'}) { 546 $self->_make_connections_in_order; 547 } else { 548 $self->_make_connections_unordered; 549 } 550} 551 552sub _make_connections_in_order { 553 my $self = shift; 554 LWP::Debug::trace('()'); 555 556 my ($entry, @queue, %busy); 557 # get first entry from pending connections 558 while ( $entry = shift @{ $self->{'ordpend_connections'} } ) { 559 my $netloc = $self->_netloc($entry->request->url); 560 push (@queue, $entry), next if $busy{$netloc}; 561 unless ($self->_check_bandwith($entry)) { 562 push (@queue, $entry); 563 $busy{$netloc}++; 564 }; 565 }; 566 # the un-connected entries form the new stack 567 $self->{'ordpend_connections'} = \@queue; 568} 569 570# unordered connections have the advantage that we do not have to 571# care about screwing up our list of pending connections. This will 572# speed up our iteration through the list 573sub _make_connections_unordered { 574 my $self = shift; 575 LWP::Debug::trace('()'); 576 577 my ($entry, $queue, $netloc); 578 # check every host in sequence (use 'each' for better performance) 579 my %delete; 580 SERVER: 581 while (($netloc, $queue) = each %{$self->{'pending_connections'}}) { 582 # get first entry from pending connections at this host 583 ENTRY: 584 while ( $entry = shift @$queue ) { 585 unless ( $self->_check_bandwith($entry) ) { 586 # we don't have enough bandwith -- put entry back on queue 587 LWP::Debug::debug("Not enough bandwidth for request to $netloc"); 588 unshift @$queue, $entry; 589 # we can stop here for this server 590 next SERVER; 591 } 592 } # of while ENTRY 593 # mark for deletion if we emptied the queue at this location 594 LWP::Debug::debug("Queue for $netloc contains ". scalar @$queue . " pending connections"); 595 $delete{$netloc}++ unless scalar @$queue; 596 } # of while SERVER 597 # delete all netlocs that we completely handled 598 foreach (keys %delete) { 599 LWP::Debug::debug("Deleting queue for $_"); 600 delete $self->{'pending_connections'}->{$_} 601 } 602} 603 604 605# this method checks the available bandwith and either connects 606# the request and returns 1, or, in case we didn't have enough 607# bandwith, returns undef 608sub _check_bandwith { 609 my ( $self, $entry ) = @_; 610 LWP::Debug::trace("($entry [".$entry->request->url."] )"); 611 612 my($failed_connections, $remember_failures ) = 613 @{$self}{qw(failed_connections remember_failures)}; 614 615 my ($request, $response) = ($entry->request, $entry->response); 616 my $url = $request->url; 617 my $netloc = $self->_netloc($url); 618 619 if ( $remember_failures and $failed_connections->{$netloc} ) { 620 $response->code (&HTTP::Status::RC_INTERNAL_SERVER_ERROR); 621 $response->message ("Server unavailable"); 622 # simulate immediate response from server 623 $self->on_failure ($request, $response, $entry); 624 return 1; 625 } 626 627 if ( $self->_active ($netloc) ) { 628 if ( $self->_req_available ( $url ) ) { 629 $self->on_connect ( $request, $response, $entry ); 630 unless ( $self->_connect ( $entry ) ) { 631 # only increase connection count if _connect doesn't 632 # return error 633 $self->{'current_connections'}->{$netloc}++; 634 } else { 635 # calling ->on_failure is done within ->_connect 636 $self->{'failed_connections'}->{$netloc}++; 637 } 638 } else { 639 LWP::Debug::debug ("No open request-slots available"); 640 return; }; 641 } elsif ( $self->_hosts_available ) { 642 $self->on_connect ( $request, $response, $entry ); 643 unless ( $self->_connect ( $entry ) ) { 644 # only increase connection count if _connect doesn't return error 645 $self->{'current_connections'}->{$netloc}++; 646 } else { 647 # calling ->on_failure is done within ->_connect 648 LWP::Debug::debug ("Failed connection for '" . $netloc ."'"); 649 $self->{'failed_connections'}->{$netloc}++; 650 } 651 } else { 652 LWP::Debug::debug ("No open host-slots available"); 653 return; 654 } 655 # indicate success here 656 return 1; 657} 658 659# 660# helper methods for _make_connections: 661# 662# number of active connections per netloc 663sub _active { shift->{'current_connections'}->{$_[0]}; }; 664# request-slots available at netloc 665sub _req_available { 666 my ( $self, $url ) = @_; 667 $self->{'max_req'} > $self->_active($self->_netloc($url)); 668}; 669# host-slots available 670sub _hosts_available { 671 my $self = shift; 672 $self->{'max_hosts'} > scalar keys %{$self->{'current_connections'}}; 673}; 674 675 676# _connect will take the request of the given entry and try to connect 677# to the host specified in its url. It returns the response object in 678# case of error, undef otherwise. 679sub _connect { 680 my ($self, $entry) = @_; 681 LWP::Debug::trace("($entry [".$entry->request->url."] )"); 682 local($SIG{"__DIE__"}); # protect against user defined die handlers 683 684 my ( $request, $response ) = $entry->get( qw(request response) ); 685 686 my ($error_response, $proxy, $protocol, $timeout, $use_eval, $nonblock) = 687 $self->init_request ($request); 688 if ($error_response) { 689 # we need to manually set code and message of $response as well, so 690 # that we have the correct information in our $entry as well 691 $response->code ($error_response->code); 692 $response->message ($error_response->message); 693 $self->on_failure ($request, $error_response, $entry); 694 return $error_response; 695 } 696 697 my ($socket, $fullpath); 698 699 # figure out host and connect to site 700 if ($use_eval) { 701 eval { 702 ($socket, $fullpath) = 703 $protocol->handle_connect ($request, $proxy, $timeout, $nonblock ); 704 }; 705 if ($@) { 706 if ($@ =~ /^timeout/i) { 707 $response->code (&HTTP::Status::RC_REQUEST_TIMEOUT); 708 $response->message ('User-agent timeout'); 709 } else { 710 # remove file/line number 711 # $@ =~ s/\s+at\s+\S+\s+line\s+\d+.*//s; 712 $response->code (&HTTP::Status::RC_INTERNAL_SERVER_ERROR); 713 $response->message ($@); 714 } 715 } 716 } else { 717 # user has to handle any dies, usually timeouts 718 ($socket, $fullpath) = 719 $protocol->handle_connect ($request, $proxy, $timeout, $nonblock ); 720 } 721 722 unless ($socket) { 723 # something went wrong. Explanation might be in second argument 724 unless ($response->code) { 725 # set response code and message accordingly (note: simply saying 726 # $response = $fullpath or $response = HTTP::Response->new would 727 # only affect the local copy of our response object. When using 728 # its ->code and ->message methods directly, we can affect the 729 # original instead!) 730 if (ref($fullpath) =~ /response/i) { 731 $response->code ($fullpath->code); 732 $response->message ($fullpath->message); 733 } else { 734 $response->code (&HTTP::Status::RC_INTERNAL_SERVER_ERROR); 735 $response->message ("Failed on connect for unknown reasons"); 736 } 737 } 738 } 739 # response should be empty, unless something went wrong 740 if ($response->code) { 741 $self->on_failure ($request, $response, $entry); 742 # should we remove $entry from 'entries_by_request' list here? no! 743 return $response; 744 } else { 745 # update $socket, $protocol, $fullpath and $proxy info 746 $entry->protocol($protocol); 747 $entry->fullpath($fullpath); 748 $entry->proxy($proxy); 749 $entry->cmd_socket($socket); 750 $self->{'entries_by_sockets'}->{$socket} = $entry; 751# LWP::Debug::debug ("Socket is $socket"); 752 # last not least: register socket with (write-) Select object 753 $self->_add_out_socket($socket); 754 } 755 756 return; 757} 758 759# once we're done with a connection, we have to make sure that all 760# references to it's socket are removed, and that the counter for its 761# netloc is properly decremented. 762sub _remove_current_connection { 763 my ($self, $entry ) = @_; 764 LWP::Debug::trace("($entry [".$entry->request->url."] )"); 765 766 $entry->cmd_socket(undef); 767 $entry->listen_socket(undef); 768 769 my $netloc = $self->_netloc($entry->request->url); 770 if ( $self->_active ($netloc) ) { 771 delete $self->{'current_connections'}->{$netloc} 772 unless --$self->{'current_connections'}->{$netloc}; 773 } else { 774 # this is serious! better stop here 775 Carp::confess "No connections for '$netloc'"; 776 } 777} 778 779=item $ua->on_connect ( $request, $response, $entry ) 780 781This method should be overridden in an (otherwise empty) subclass in 782order to present customized messages for each connection attempted by 783the User Agent. 784 785=cut 786 787sub on_connect { 788 my ($self, $request, $response, $entry) = @_; 789 LWP::Debug::trace("(".$request->url->as_string.")"); 790} 791 792=item $ua->on_failure ( $request, $response, $entry ) 793 794This method should be overridden in an (otherwise empty) subclass in 795order to present customized messages for each connection or 796registration that failed. 797 798=cut 799 800sub on_failure { 801 my ($self, $request, $response, $entry) = @_; 802 LWP::Debug::trace("(".$request->url->as_string.")"); 803} 804 805=item $ua->on_return ( $request, $response, $entry ) 806 807This method should be overridden in an (otherwise empty) subclass in 808order to present customized messages for each request returned. If a 809callback function was registered with this request, this callback 810function is called before $pua->on_return. 811 812Please note that while $pua->on_return is a method (which should be 813overridden in a subclass), a callback function is NOT a method, and 814does not have $self as its first parameter. (See more on callbacks 815below) 816 817The purpose of $pua->on_return is mainly to provide messages when a 818request returns. However, you can also re-register follow-up requests 819in case you need them. 820 821If you need specialized follow-up requests depending on the request 822that just returend, use a callback function instead (which can be 823different for each request registered). Otherwise you might end up 824writing a HUGE if..elsif..else.. branch in this global method. 825 826=cut 827 828sub on_return { 829 my ($self, $request, $response, $entry) = @_; 830 LWP::Debug::trace("(".join (", ",$request->url->as_string, 831 (defined $response->code ? 832 $response->code : '[undef]'), 833 (defined $response->message ? 834 $response->message : '[undef]')) .")"); 835} 836 837=item $us->discard_entry ( $entry ) 838 839Completely removes an entry from memory, in case its output is not 840needed. Use this in callbacks such as C<on_return> or <on_failure> if 841you want to make sure an entry that you do not need does not occupy 842valuable main memory. 843 844=cut 845 846# proposed by Glenn Wood <glenn@savesmart.com> 847# additional fixes by Kirill http://www.en-directo.net/mail/kirill.html 848sub discard_entry { 849 my ($self, $entry) = @_; 850 LWP::Debug::trace("($entry)") if $entry; 851 852 # Entries are added to ordpend_connections in $self->register: 853 # push (@{$self->{'ordpend_connections'}}, $entry); 854 # 855 # the reason we even maintain this ordered list is that 856 # currently the user can change the "in_order" flag any 857 # time, even if we already started 'wait'ing. 858 my $entries = $self->{ordpend_connections}; 859 @$entries = grep $_ != $entry, @$entries; 860 861 $entries = $self->{entries_by_requests}; 862 delete @$entries{grep $entries->{$_} == $entry, keys %$entries}; 863 864 $entries = $self->{entries_by_sockets}; 865 delete @$entries{grep $entries->{$_} == $entry, keys %$entries}; 866 867 return; 868} 869 870 871=item $ua->wait ( $timeout ) 872 873Waits for available sockets to write to or read from. Will timeout 874after $timeout seconds. Will block if $timeout = 0 specified. If 875$timeout is omitted, it will use the Agent default timeout value. 876 877=cut 878 879sub wait { 880 my ($self, $timeout) = @_; 881 LWP::Debug::trace("($timeout)") if $timeout; 882 883 my $foobar; 884 885 $timeout = $self->{'timeout'} unless defined $timeout; 886 887 # shortcuts to in- and out-filehandles 888 my $fh_out = $self->{'select_out'}; 889 my $fh_in = $self->{'select_in'}; 890 my $fh_err; # ignore errors for now 891 my @ready; 892 893 my ($active, $pending); 894 ATTEMPT: 895 while ( $active = scalar keys %{ $self->{'current_connections'} } or 896 $pending = scalar ($self->{'handle_in_order'}? 897 @{ $self->{'ordpend_connections'} } : 898 keys %{ $self->{'pending_connections'} } ) ) { 899 # check select 900 if ( (scalar $fh_in->handles) or (scalar $fh_out->handles) ) { 901 LWP::Debug::debug("Selecting Sockets, timeout is $timeout seconds"); 902 unless ( @ready = IO::Select->select ($fh_in, $fh_out, 903 undef, $timeout) ) { 904 # 905 # empty array, means that select timed out 906 LWP::Debug::trace('select timeout'); 907 my ($socket); 908 # set all active requests to "timed out" 909 foreach $socket ($fh_in->handles ,$fh_out->handles) { 910 my $entry = $self->{'entries_by_sockets'}->{$socket}; 911 delete $self->{'entries_by_sockets'}->{$socket}; 912 unless ($entry->response->code) { 913 # moved the creation of the timeout response into the loop so that 914 # each entry gets its own response object (otherwise they'll all 915 # share the same request entry in there). thanks to John Salmon 916 # <john@thesalmons.org> for pointing this out. 917 my $response = HTTP::Response->new(&HTTP::Status::RC_REQUEST_TIMEOUT, 918 'User-agent timeout (select)'); 919 # don't overwrite an already existing response 920 $entry->response ($response); 921 $response->request ($entry->request); 922 # only count as failure if we have no response yet 923 $self->on_failure ($entry->request, $response, $entry); 924 } else { 925 my $res = $entry->response; 926 $res->message ($res->message . " (timeout)"); 927 $entry->response ($res); 928 # thanks to Jonathan Feinberg <jdf@pobox.com> who finally 929 # reminded me that partial replies should trigger some sort 930 # of on_xxx callback as well. Let's try on_failure for now, 931 # unless people think that on_return is the right thing to 932 # call here: 933 $self->on_failure ($entry->request, $res, $entry); 934 } 935 $self->_remove_current_connection ( $entry ); 936 } 937 # and delete from read- and write-queues 938 foreach $socket ($fh_out->handles) { $fh_out->remove($socket); } 939 foreach $socket ($fh_in->handles) { $fh_in->remove($socket); } 940 # continue processing -- pending requests might still work! 941 } else { 942 # something is ready for reading or writing 943 my ($ready_read, $ready_write, $error) = @ready; 944 my ($socket); 945 946 # 947 # WRITE QUEUE 948 # 949 foreach $socket (@$ready_write) { 950 my $so_err; 951 if ($socket->can("getsockopt")) { # we also might have IO::File! 952 ## check if there is any error (suggested by Mike Heller) 953 $so_err = $socket->getsockopt( Socket::SOL_SOCKET(), 954 Socket::SO_ERROR() ); 955 LWP::Debug::debug( "SO_ERROR: $so_err" ) if $so_err; 956 } 957 # modularized this chunk so that it can be reused by 958 # POE::Component::Client::UserAgent 959 $self->_perform_write ($socket, $timeout) unless $so_err; 960 961 } 962 963 # 964 # READ QUEUE 965 # 966 foreach $socket (@$ready_read) { 967 968 # modularized this chunk so that it can be reused by 969 # POE::Component::Client::UserAgent 970 $self->_perform_read ($socket, $timeout); 971 972 } 973 } # of unless (@ready...) {} else {} 974 975 } else { 976 # when we are here, can we have active connections?!! 977 #(you might want to comment out this huge Debug statement if 978 #you're in a hurry. Then again, you wouldn't be using perl then, 979 #would you!?) 980 LWP::Debug::trace("\n\tCurrent Server: ". 981 scalar (keys %{$self->{'current_connections'}}) . 982 " [ ". join (", ", 983 map { $_, $self->{'current_connections'}->{$_} } 984 keys %{$self->{'current_connections'}}) . 985 " ]\n\tPending Server: ". 986 ($self->{'handle_in_order'}? 987 scalar @{$self->{'ordpend_connections'}} : 988 scalar (keys %{$self->{'pending_connections'}}) . 989 " [ ". join (", ", 990 map { $_, 991 scalar @{$self->{'pending_connections'}->{$_}} } 992 keys %{$self->{'pending_connections'}}) . 993 " ]") ); 994 } # end of if $sel->handles 995 # try to make new connections 996 $self->_make_connections; 997 } # end of while 'current_connections' or 'pending_connections' 998 999 # should we delete fh-queues here?! 1000 # or maybe re-initialize in case we register more requests later? 1001 # in that case we'll have to make sure we don't try to reconnect 1002 # to old sockets later - so we should create new Select-objects! 1003 $self->_remove_all_sockets(); 1004 1005 # allows the caller quick access to all issued requests, 1006 # although some original requests may have been replaced by 1007 # redirects or authentication requests... 1008 return $self->{'entries_by_requests'}; 1009} 1010 1011# socket handling modularized in order to work better with POE 1012# as suggested by Kirill http://www.en-directo.net/mail/kirill.html 1013# 1014sub _remove_out_socket { 1015 my ($self,$socket) = @_; 1016 $self->{select_out}->remove($socket); 1017} 1018 1019sub _remove_in_socket { 1020 my ($self,$socket) = @_; 1021 $self->{select_in}->remove($socket); 1022} 1023 1024sub _add_out_socket { 1025 my ($self,$socket) = @_; 1026 $self->{select_out}->add($socket); 1027} 1028 1029sub _add_in_socket { 1030 my ($self,$socket) = @_; 1031 $self->{select_in}->add($socket); 1032} 1033 1034sub _remove_all_sockets { 1035 my ($self) = @_; 1036 $self->{select_in} = IO::Select->new(); 1037 $self->{select_out} = IO::Select->new(); 1038} 1039 1040sub _perform_write 1041{ 1042 my ($self, $socket, $timeout) = @_; 1043 LWP::Debug::debug('Writing to Sockets'); 1044 my $entry = $self->{'entries_by_sockets'}->{$socket}; 1045 1046 my ( $request, $protocol, $fullpath, $arg, $proxy) = 1047 $entry->get( qw(request protocol fullpath arg proxy) ); 1048 1049 my ($listen_socket, $response); 1050 if ($self->{'use_eval'}) { 1051 eval { 1052 ($listen_socket, $response) = 1053 $protocol->write_request ($request, 1054 $socket, 1055 $fullpath, 1056 $arg, 1057 $timeout, 1058 $proxy); 1059 }; 1060 if ($@) { 1061 # if our call fails, we might not have a $response object, so we 1062 # have to create a new one here 1063 if ($@ =~ /^timeout/i) { 1064 $response = LWP::UserAgent::_new_response($request, &HTTP::Status::RC_REQUEST_TIMEOUT, 1065 'User-agent timeout (syswrite)'); 1066 } else { 1067 # remove file/line number 1068 # $@ =~ s/\s+at\s+\S+\s+line\s+\d+.*//s; 1069 $response = LWP::UserAgent::_new_response($request, &HTTP::Status::RC_INTERNAL_SERVER_ERROR, 1070 $@); 1071 } 1072 $entry->response ($response); 1073 $self->on_failure ($request, $response, $entry); 1074 } 1075 } else { 1076 # user has to handle any dies, usually timeouts 1077 ($listen_socket, $response) = 1078 $protocol->write_request ($request, 1079 $socket, 1080 $fullpath, 1081 $arg, 1082 $timeout, 1083 $proxy); 1084 } 1085 1086 if ($response and !$response->is_success) { 1087 $entry->response($response); 1088 $entry->response->request($request); 1089 LWP::Debug::trace('Error while issuing request '. 1090 $request->url->as_string); 1091 } elsif ($response) { 1092 # successful response already? 1093 LWP::Debug::trace('Fast response for request '. 1094 $request->url->as_string . 1095 ' ['. length($response->content) . 1096 ' bytes]'); 1097 $entry->response($response); 1098 $entry->response->request($request); 1099 my $content = $response->content; 1100 $response->content(''); # clear content here, so that it 1101 # can be properly processed by ->receive 1102 unless ($request->method eq 'DELETE') { # JB 1103 $protocol->receive_once($arg, $response, $content, $entry); 1104 } 1105 } 1106 # one write is (should be?) enough 1107 delete $self->{'entries_by_sockets'}->{$socket}; 1108 $self->_remove_out_socket($socket); 1109 1110 if (ref($listen_socket)) { 1111 # now make sure we start reading from the $listen_socket: 1112 # file existing entry under new (listen_)socket 1113 $self->_add_in_socket ($listen_socket); 1114 $entry->listen_socket($listen_socket); 1115 $self->{'entries_by_sockets'}->{$listen_socket} = $entry; 1116 } else { 1117 # remove from current_connections 1118 $self->_remove_current_connection ( $entry ); 1119 } 1120 1121 return; 1122} 1123 1124sub _perform_read 1125{ 1126 my ($self, $socket, $timeout) = @_; 1127 1128 LWP::Debug::debug('Reading from Sockets'); 1129 my $entry = $self->{'entries_by_sockets'}->{$socket}; 1130 1131 my ( $request, $response, $protocol, $fullpath, $arg, $size) = 1132 $entry->get( qw(request response protocol 1133 fullpath arg size) ); 1134 1135 my $retval; 1136 if ($self->{'use_eval'}) { 1137 eval { 1138 $retval = $protocol->read_chunk ($response, $socket, $request, 1139 $arg, $size, $timeout, 1140 $entry); 1141 }; 1142 if ($@) { 1143 if ($@ =~ /^timeout/i) { 1144 $response->code (&HTTP::Status::RC_REQUEST_TIMEOUT); 1145 $response->message ('User-agent timeout (sysread)'); 1146 } else { 1147 # remove file/line number 1148 # $@ =~ s/\s+at\s+\S+\s+line\s+\d+.*//s; 1149 $response->code (&HTTP::Status::RC_INTERNAL_SERVER_ERROR); 1150 $response->message ($@); 1151 } 1152 $self->on_failure ($request, $response, $entry); 1153 } 1154 } else { 1155 # user has to handle any dies, usually timeouts 1156 $retval = $protocol->read_chunk ($response, $socket, $request, 1157 $arg, $size, $timeout, 1158 $entry); 1159 } 1160 1161 # examine return value. $retval is either a positive 1162 # number, indicating the number of bytes read, or 1163 # '0' (for EOF), or a callback-function code (<0) 1164 1165 LWP::Debug::debug ("'$retval' = read_chunk from $entry (". 1166 $request->url.")"); 1167 1168 # call on_return method if it's the end of this request 1169 unless ($retval > 0) { 1170 my $command = $self->on_return ($request, $response, $entry); 1171 $retval = $command if defined $command and $command < 0; 1172 1173 LWP::Debug::debug ("received '". (defined $command ? $command : '[undef]'). 1174 "' from on_return"); 1175 1176 } 1177 1178 if ($retval > 0) { 1179 # In this case, just update response entry 1180 # $entry->response($response); 1181 } else { # zero or negative, that means: EOF, C_LASTCON, C_ENDCON, C_ENDALL 1182 # read_chunk returns 0 if we reached EOF 1183 $self->_remove_in_socket($socket); 1184 # use protocol dependent method to close connection 1185 $entry->protocol->close_connection($entry->response, $socket, 1186 $entry->request, $entry->cmd_socket); 1187 # $socket->shutdown(2); # see "man perlfunc" & "man 2 shutdown" 1188 close ($socket); 1189 $socket = undef; # close socket 1190 1191 # remove from current_connections 1192 $self->_remove_current_connection ( $entry ); 1193 # handle redirects and security if neccessary 1194 1195 if ($retval eq C_ENDALL) { 1196 # should we clean up a bit? Remove Select-queues: 1197 $self->_remove_all_sockets(); 1198 return $self->{'entries_by_requests'}; 1199 } elsif ($retval eq C_LASTCON) { 1200 # just delete all pending connections 1201 $self->{'pending_connections'} = {}; 1202 $self->{'ordpend_connections'} = []; 1203 } else { 1204 if ($entry->redirect_ok) { 1205 $self->handle_response ($entry); 1206 } 1207 # pop off next pending_connection (if bandwith available) 1208 $self->_make_connections; 1209 } 1210 } 1211 return; 1212} 1213 1214=item $ua->handle_response($request, $arg [, $size]) 1215 1216Analyses results, handling redirects and security. This method may 1217actually register several different, additional requests. 1218 1219This method should not be called directly. Instead, indicate for each 1220individual request registered with C<$ua->register()> whether or not 1221you want Parallel::UserAgent to handle redirects and security, or 1222specify a default value for all requests in Parallel::UserAgent by 1223using C<$ua->redirect()>. 1224 1225=cut 1226 1227# this should be mainly the old LWP::UserAgent->request, although the 1228# beginning and end are different (gets all of its data via $entry 1229# parameter!) Also, instead of recursive calls this uses 1230# $ua->register now. 1231 1232sub handle_response 1233{ 1234 my($self, $entry) = @_; 1235 LWP::Debug::trace("-> ($entry [".$entry->request->url->as_string.'] )'); 1236 1237 # check if we should process this response 1238 # (maybe later - for now always check) 1239 1240 my ( $response, $request ) = $entry->get( qw( response request ) ); 1241 1242 my $code = $response->code; 1243 1244 LWP::Debug::debug('Handling result: '. 1245 (HTTP::Status::status_message($code) || 1246 "Unknown code $code")); 1247 1248 if ($code == &HTTP::Status::RC_MOVED_PERMANENTLY or 1249 $code == &HTTP::Status::RC_MOVED_TEMPORARILY) { 1250 1251 # Make a copy of the request and initialize it with the new URI 1252 my $referral = $request->clone; 1253 1254 # And then we update the URL based on the Location:-header. 1255 my($referral_uri) = $response->header('Location'); 1256 { 1257 # Some servers erroneously return a relative URL for redirects, 1258 # so make it absolute if it not already is. 1259 local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1; 1260 my $base = $response->base; 1261 $referral_uri = $HTTP::URI_CLASS->new($referral_uri, $base) 1262 ->abs($base); 1263 } 1264 1265 $referral->url($referral_uri); 1266 $referral->remove_header('Host'); 1267 1268 # don't do anything unless we're allowed to redirect 1269 return $response unless $self->redirect_ok($referral, $response); # fix by th. boutell 1270 1271 # Check for loop in the redirects 1272 my $count = 0; 1273 my $r = $response; 1274 while ($r) { 1275 if (++$count > 13 || 1276 $r->request->url->as_string eq $referral_uri->as_string) { 1277 $response->header("Client-Warning" => 1278 "Redirect loop detected"); 1279 return $response; 1280 } 1281 $r = $r->previous; 1282 } 1283 # From: "Andrey A. Chernov" <ache@nagual.pp.ru> 1284 $self->cookie_jar->extract_cookies($response) 1285 if $self->cookie_jar; 1286 # register follow up request 1287 LWP::Debug::trace("<- (registering follow up request: $referral, $entry)"); 1288 return $self->register ($referral, $entry); 1289 1290 } elsif ($code == &HTTP::Status::RC_UNAUTHORIZED || 1291 $code == &HTTP::Status::RC_PROXY_AUTHENTICATION_REQUIRED 1292 ) 1293 { 1294 my $proxy = ($code == &HTTP::Status::RC_PROXY_AUTHENTICATION_REQUIRED); 1295 my $ch_header = $proxy ? "Proxy-Authenticate" : "WWW-Authenticate"; 1296 my @challenge = $response->header($ch_header); 1297 unless (@challenge) { 1298 $response->header("Client-Warning" => 1299 "Missing Authenticate header"); 1300 # added the argument to header here (a guess at which header) 1301 # because it dies if you pass no header https://rt.cpan.org/Ticket/Display.html?id=46821 1302 LWP::Debug::trace("<- ($response [".$response->header('Client-Warning').'] )'); 1303 return $response; 1304 } 1305 1306 require HTTP::Headers::Util; 1307 CHALLENGE: for my $challenge (@challenge) { 1308 $challenge =~ tr/,/;/; # "," is used to separate auth-params!! 1309 ($challenge) = HTTP::Headers::Util::split_header_words($challenge); 1310 my $scheme = lc(shift(@$challenge)); 1311 shift(@$challenge); # no value 1312 $challenge = { @$challenge }; # make rest into a hash 1313 for (keys %$challenge) { # make sure all keys are lower case 1314 $challenge->{lc $_} = delete $challenge->{$_}; 1315 } 1316 1317 unless ($scheme =~ /^([a-z]+(?:-[a-z]+)*)$/) { 1318 $response->header("Client-Warning" => 1319 "Bad authentication scheme '$scheme'"); 1320 # added the argument to header here (a guess at which header) 1321 # because it dies if you pass no header https://rt.cpan.org/Ticket/Display.html?id=46821 1322 LWP::Debug::trace("<- ($response [".$response->header('Client-Warning').'] )'); 1323 return $response; 1324 } 1325 $scheme = $1; # untainted now 1326 my $class = "LWP::Authen::\u$scheme"; 1327 $class =~ s/-/_/g; 1328 1329 no strict 'refs'; 1330 unless (%{"$class\::"}) { 1331 # try to load it 1332 eval "require $class"; 1333 if ($@) { 1334 if ($@ =~ /^Can\'t locate/) { 1335 $response->header("Client-Warning" => 1336 "Unsupport authentication scheme '$scheme'"); 1337 } else { 1338 $response->header("Client-Warning" => $@); 1339 } 1340 next CHALLENGE; 1341 } 1342 } 1343 LWP::Debug::trace("<- authenticates"); 1344 return $class->authenticate($self, $proxy, $challenge, $response, 1345 $request, $entry->arg, $entry->size); 1346 } 1347 # added the argument to header here (a guess at which header) 1348 # because it dies if you pass no header https://rt.cpan.org/Ticket/Display.html?id=46821 1349 LWP::Debug::trace("<- ($response [".$response->header('Client-Warning').'] )'); 1350 return $response; 1351 } 1352 LWP::Debug::trace("<- standard exit ($response)"); 1353 return $response; 1354} 1355 1356# helper function for (simple_)request method. 1357sub _single_request { 1358 my $self = shift; 1359 my $res; 1360 if ( $res = $self->register (@_) ) { 1361 return $res->error_as_HTML; 1362 } 1363 my $entries = $self->wait(5); 1364 foreach (keys %$entries) { 1365 my $response = $entries->{$_}->response; 1366# $cookie_jar->extract_cookies($response) if $cookie_jar; 1367 $response->header("Client-Date" => HTTP::Date::time2str(time)); 1368 return $response; 1369 } 1370} 1371 1372=item DEPRECATED $ua->deprecated_simple_request($request, [$arg [, $size]]) 1373 1374This method simulated the behavior of LWP::UserAgent->simple_request. 1375It was actually kinda overkill to use this method in 1376Parallel::UserAgent, and it was mainly here for testing backward 1377compatibility with the original LWP::UserAgent. 1378 1379The name has been changed to deprecated_simple_request in case you 1380need it, but because it it no longer compatible with the most recent 1381version of libwww, it will no longer run by default. 1382 1383The following 1384description is taken directly from the corresponding libwww pod: 1385 1386$ua->simple_request dispatches a single WWW request on behalf of a 1387user, and returns the response received. The C<$request> should be a 1388reference to a C<HTTP::Request> object with values defined for at 1389least the method() and url() attributes. 1390 1391If C<$arg> is a scalar it is taken as a filename where the content of 1392the response is stored. 1393 1394If C<$arg> is a reference to a subroutine, then this routine is called 1395as chunks of the content is received. An optional C<$size> argument 1396is taken as a hint for an appropriate chunk size. 1397 1398If C<$arg> is omitted, then the content is stored in the response 1399object itself. 1400 1401=cut 1402 1403# sub simple_request 1404# (see LWP::UserAgent) 1405 1406# Took this out because with the new libwww it goes into deep 1407# recursion. I believe calls that might have hit this will now 1408# just go to LWP::UserAgent's implementation. If I comment 1409# these out, tests pass; with them in, you get this deep 1410# recursion. I'm assuming it's ok for them to just 1411# go away, since they were deprecated many years ago after 1412# all. 1413sub deprecated_send_request { 1414 my $self = shift; 1415 1416 $self->initialize; 1417 my $redirect = $self->redirect(0); 1418 my $response = $self->_single_request(@_); 1419 $self->redirect($redirect); 1420 return $response; 1421} 1422 1423=item DEPRECATED $ua->deprecated_request($request, $arg [, $size]) 1424 1425Previously called 'request' and included for compatibility testing with 1426LWP::UserAgent. Every day usage was deprecated, and now you have to call it 1427with the deprecated_request name if you want to use it (because an incompatibility 1428was introduced with the newer versions of libwww). 1429 1430Here is what LWP::UserAgent has to say about it: 1431 1432Process a request, including redirects and security. This method may 1433actually send several different simple reqeusts. 1434 1435The arguments are the same as for C<simple_request()>. 1436 1437=cut 1438 1439sub deprecated_request { 1440 my $self = shift; 1441 1442 $self->initialize; 1443 my $redirect = $self->redirect(1); 1444 my $response = $self->_single_request(@_); 1445 $self->redirect($redirect); 1446 return $response; 1447} 1448 1449=item $ua->as_string 1450 1451Returns a text that describe the state of the UA. Should be useful 1452for debugging, if it would print out anything important. But it does 1453not (at least not yet). Try using LWP::Debug... 1454 1455=cut 1456 1457sub as_string { 1458 my $self = shift; 1459 my @s; 1460 push(@s, "Parallel UA: [$self]"); 1461 push(@s, " <Nothing in here yet, sorry>"); 1462 join("\n", @s, ''); 1463} 1464 14651; 1466 1467# 1468# Parallel::UserAgent specific methods 1469# 1470sub init_request { 1471 my ($self, $request) = @_; 1472 my($method, $url) = ($request->method, $request->url); 1473 LWP::Debug::trace("-> ($request) [$method $url]"); 1474 1475 # Check that we have a METHOD and a URL first 1476 return LWP::UserAgent::_new_response($request, &HTTP::Status::RC_BAD_REQUEST, "Method missing") 1477 unless $method; 1478 return LWP::UserAgent::_new_response($request, &HTTP::Status::RC_BAD_REQUEST, "URL missing") 1479 unless $url; 1480 return LWP::UserAgent::_new_response($request, &HTTP::Status::RC_BAD_REQUEST, "URL must be absolute") 1481 unless $url->scheme; 1482 1483 1484 LWP::Debug::trace("$method $url"); 1485 1486 # Locate protocol to use 1487 my $scheme = ''; 1488 1489 my $proxy = $self->_need_proxy($url); 1490 if (defined $proxy) { 1491 $scheme = $proxy->scheme; 1492 } else { 1493 $scheme = $url->scheme; 1494 } 1495 my $protocol; 1496 eval { 1497 # add Parallel extension here 1498 $protocol = LWP::Parallel::Protocol::create($scheme); 1499 }; 1500 if ($@) { 1501 # remove file/line number 1502 # $@ =~ s/\s+at\s+\S+\s+line\s+\d+.*//s; 1503 return LWP::UserAgent::_new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@) 1504 } 1505 1506 # Extract fields that will be used below 1507 my ($agent, $from, $timeout, $cookie_jar, 1508 $use_eval, $parse_head, $max_size, $nonblock) = 1509 @{$self}{qw(agent from timeout cookie_jar 1510 use_eval parse_head max_size nonblock)}; 1511 1512 # Set User-Agent and From headers if they are defined 1513 $request->init_header('User-Agent' => $agent) if $agent; 1514 $request->init_header('From' => $from) if $from; 1515 $request->init_header('Range' => "bytes=0-$max_size") if $max_size; 1516 $cookie_jar->add_cookie_header($request) if $cookie_jar; 1517 1518 # Transfer some attributes to the protocol object 1519 $protocol->can('parse_head') ? 1520 $protocol->_elem('parse_head', $parse_head); 1521 $protocol->max_size($max_size); 1522 1523 LWP::Debug::trace ("<- (undef". 1524 ", ". (defined $proxy ? $proxy : '[undef]'). 1525 ", ". (defined $protocol ? $protocol : '[undef]'). 1526 ", ". (defined $timeout ? $timeout : '[undef]'). 1527 ", ". (defined $use_eval ? $use_eval : '[undef]').")"); 1528 1529 (undef, $proxy, $protocol, $timeout, $use_eval, $nonblock); 1530} 1531 1532=head1 ADDITIONAL METHODS 1533 1534=item $ua->use_alarm([$boolean]) 1535 1536This function is not in use anymore and will display a warning when 1537called and warnings are enabled. 1538 1539=cut 1540 1541sub use_alarm { 1542 warn "The Parallel::UserAgent->use_alarm method is not available anymore.\n" if $^W; 1543} 1544 1545=head1 Callback functions 1546 1547You can register a callback function. See LWP::UserAgent for details. 1548 1549=head1 BUGS 1550 1551Probably lots! This was meant only as an interim release until this 1552functionality is incorporated into LWPng, the next generation libwww 1553module (though it has been this way for over 2 years now!) 1554 1555Needs a lot more documentation on how callbacks work! 1556 1557=head1 SEE ALSO 1558 1559L<LWP::UserAgent> 1560 1561=head1 COPYRIGHT 1562 1563Copyright 1997-2004 Marc Langheinrich E<lt>marclang@cpan.org> 1564 1565This library is free software; you can redistribute it and/or 1566modify it under the same terms as Perl itself. 1567 1568=cut 1569 1570__END__ 1571