1package File::Fetch; 2 3use strict; 4use FileHandle; 5use File::Temp; 6use File::Copy; 7use File::Spec; 8use File::Spec::Unix; 9use File::Basename qw[dirname]; 10 11use Cwd qw[cwd]; 12use Carp qw[carp]; 13use IPC::Cmd qw[can_run run QUOTE]; 14use File::Path qw[mkpath]; 15use File::Temp qw[tempdir]; 16use Params::Check qw[check]; 17use Module::Load::Conditional qw[can_load]; 18use Locale::Maketext::Simple Style => 'gettext'; 19 20use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT 21 $BLACKLIST $METHOD_FAIL $VERSION $METHODS 22 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN $FORCEIPV4 23 ]; 24 25$VERSION = '0.56'; 26$VERSION = eval $VERSION; # avoid warnings with development releases 27$PREFER_BIN = 0; # XXX TODO implement 28$FROM_EMAIL = 'File-Fetch@example.com'; 29$USER_AGENT = "File::Fetch/$VERSION"; 30$BLACKLIST = [qw|ftp|]; 31push @$BLACKLIST, qw|lftp| if $^O eq 'dragonfly' || $^O eq 'hpux'; 32$METHOD_FAIL = { }; 33$FTP_PASSIVE = 1; 34$TIMEOUT = 0; 35$DEBUG = 0; 36$WARN = 1; 37$FORCEIPV4 = 0; 38 39### methods available to fetch the file depending on the scheme 40$METHODS = { 41 http => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ], 42 https => [ qw|lwp wget curl| ], 43 ftp => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ], 44 file => [ qw|lwp lftp file| ], 45 rsync => [ qw|rsync| ], 46 git => [ qw|git| ], 47}; 48 49### silly warnings ### 50local $Params::Check::VERBOSE = 1; 51local $Params::Check::VERBOSE = 1; 52local $Module::Load::Conditional::VERBOSE = 0; 53local $Module::Load::Conditional::VERBOSE = 0; 54 55### Fix CVE-2016-1238 ### 56local $Module::Load::Conditional::FORCE_SAFE_INC = 1; 57 58### see what OS we are on, important for file:// uris ### 59use constant ON_WIN => ($^O eq 'MSWin32'); 60use constant ON_VMS => ($^O eq 'VMS'); 61use constant ON_UNIX => (!ON_WIN); 62use constant HAS_VOL => (ON_WIN); 63use constant HAS_SHARE => (ON_WIN); 64use constant HAS_FETCH => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! ); 65 66=pod 67 68=head1 NAME 69 70File::Fetch - A generic file fetching mechanism 71 72=head1 SYNOPSIS 73 74 use File::Fetch; 75 76 ### build a File::Fetch object ### 77 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt'); 78 79 ### fetch the uri to cwd() ### 80 my $where = $ff->fetch() or die $ff->error; 81 82 ### fetch the uri to /tmp ### 83 my $where = $ff->fetch( to => '/tmp' ); 84 85 ### parsed bits from the uri ### 86 $ff->uri; 87 $ff->scheme; 88 $ff->host; 89 $ff->path; 90 $ff->file; 91 92=head1 DESCRIPTION 93 94File::Fetch is a generic file fetching mechanism. 95 96It allows you to fetch any file pointed to by a C<ftp>, C<http>, 97C<file>, C<git> or C<rsync> uri by a number of different means. 98 99See the C<HOW IT WORKS> section further down for details. 100 101=head1 ACCESSORS 102 103A C<File::Fetch> object has the following accessors 104 105=over 4 106 107=item $ff->uri 108 109The uri you passed to the constructor 110 111=item $ff->scheme 112 113The scheme from the uri (like 'file', 'http', etc) 114 115=item $ff->host 116 117The hostname in the uri. Will be empty if host was originally 118'localhost' for a 'file://' url. 119 120=item $ff->vol 121 122On operating systems with the concept of a volume the second element 123of a file:// is considered to the be volume specification for the file. 124Thus on Win32 this routine returns the volume, on other operating 125systems this returns nothing. 126 127On Windows this value may be empty if the uri is to a network share, in 128which case the 'share' property will be defined. Additionally, volume 129specifications that use '|' as ':' will be converted on read to use ':'. 130 131On VMS, which has a volume concept, this field will be empty because VMS 132file specifications are converted to absolute UNIX format and the volume 133information is transparently included. 134 135=item $ff->share 136 137On systems with the concept of a network share (currently only Windows) returns 138the sharename from a file://// url. On other operating systems returns empty. 139 140=item $ff->path 141 142The path from the uri, will be at least a single '/'. 143 144=item $ff->file 145 146The name of the remote file. For the local file name, the 147result of $ff->output_file will be used. 148 149=item $ff->file_default 150 151The name of the default local file, that $ff->output_file falls back to if 152it would otherwise return no filename. For example when fetching a URI like 153http://www.abc.net.au/ the contents retrieved may be from a remote file called 154'index.html'. The default value of this attribute is literally 'file_default'. 155 156=cut 157 158 159########################## 160### Object & Accessors ### 161########################## 162 163{ 164 ### template for autogenerated accessors ### 165 my $Tmpl = { 166 scheme => { default => 'http' }, 167 host => { default => 'localhost' }, 168 path => { default => '/' }, 169 file => { required => 1 }, 170 uri => { required => 1 }, 171 userinfo => { default => '' }, 172 vol => { default => '' }, # windows for file:// uris 173 share => { default => '' }, # windows for file:// uris 174 file_default => { default => 'file_default' }, 175 tempdir_root => { required => 1 }, # Should be lazy-set at ->new() 176 _error_msg => { no_override => 1 }, 177 _error_msg_long => { no_override => 1 }, 178 }; 179 180 for my $method ( keys %$Tmpl ) { 181 no strict 'refs'; 182 *$method = sub { 183 my $self = shift; 184 $self->{$method} = $_[0] if @_; 185 return $self->{$method}; 186 } 187 } 188 189 sub _create { 190 my $class = shift; 191 my %hash = @_; 192 193 my $args = check( $Tmpl, \%hash ) or return; 194 195 bless $args, $class; 196 197 if( lc($args->scheme) ne 'file' and not $args->host ) { 198 return $class->_error(loc( 199 "Hostname required when fetching from '%1'",$args->scheme)); 200 } 201 202 for (qw[path]) { 203 unless( $args->$_() ) { # 5.5.x needs the () 204 return $class->_error(loc("No '%1' specified",$_)); 205 } 206 } 207 208 return $args; 209 } 210} 211 212=item $ff->output_file 213 214The name of the output file. This is the same as $ff->file, 215but any query parameters are stripped off. For example: 216 217 http://example.com/index.html?x=y 218 219would make the output file be C<index.html> rather than 220C<index.html?x=y>. 221 222=back 223 224=cut 225 226sub output_file { 227 my $self = shift; 228 my $file = $self->file; 229 230 $file =~ s/\?.*$//g; 231 232 $file ||= $self->file_default; 233 234 return $file; 235} 236 237### XXX do this or just point to URI::Escape? 238# =head2 $esc_uri = $ff->escaped_uri 239# 240# =cut 241# 242# ### most of this is stolen straight from URI::escape 243# { ### Build a char->hex map 244# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255; 245# 246# sub escaped_uri { 247# my $self = shift; 248# my $uri = $self->uri; 249# 250# ### Default unsafe characters. RFC 2732 ^(uric - reserved) 251# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/ 252# $escapes{$1} || $self->_fail_hi($1)/ge; 253# 254# return $uri; 255# } 256# 257# sub _fail_hi { 258# my $self = shift; 259# my $char = shift; 260# 261# $self->_error(loc( 262# "Can't escape '%1', try using the '%2' module instead", 263# sprintf("\\x{%04X}", ord($char)), 'URI::Escape' 264# )); 265# } 266# 267# sub output_file { 268# 269# } 270# 271# 272# } 273 274=head1 METHODS 275 276=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' ); 277 278Parses the uri and creates a corresponding File::Fetch::Item object, 279that is ready to be C<fetch>ed and returns it. 280 281Returns false on failure. 282 283=cut 284 285sub new { 286 my $class = shift; 287 my %hash = @_; 288 289 my ($uri, $file_default, $tempdir_root); 290 my $tmpl = { 291 uri => { required => 1, store => \$uri }, 292 file_default => { required => 0, store => \$file_default }, 293 tempdir_root => { required => 0, store => \$tempdir_root }, 294 }; 295 296 check( $tmpl, \%hash ) or return; 297 298 ### parse the uri to usable parts ### 299 my $href = $class->_parse_uri( $uri ) or return; 300 301 $href->{file_default} = $file_default if $file_default; 302 $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root; 303 $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd ) if not $href->{tempdir_root}; 304 305 ### make it into a FFI object ### 306 my $ff = $class->_create( %$href ) or return; 307 308 309 ### return the object ### 310 return $ff; 311} 312 313### parses an uri to a hash structure: 314### 315### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' ) 316### 317### becomes: 318### 319### $href = { 320### scheme => 'ftp', 321### host => 'ftp.cpan.org', 322### path => '/pub/mirror', 323### file => 'index.html' 324### }; 325### 326### In the case of file:// urls there maybe be additional fields 327### 328### For systems with volume specifications such as Win32 there will be 329### a volume specifier provided in the 'vol' field. 330### 331### 'vol' => 'volumename' 332### 333### For windows file shares there may be a 'share' key specified 334### 335### 'share' => 'sharename' 336### 337### Note that the rules of what a file:// url means vary by the operating system 338### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious 339### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and 340### not '/foo/bar.txt' 341### 342### Similarly if the host interpreting the url is VMS then 343### file:///disk$user/my/notes/note12345.txt' means 344### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as 345### if it is unix where it means /disk$user/my/notes/note12345.txt'. 346### Except for some cases in the File::Spec methods, Perl on VMS will generally 347### handle UNIX format file specifications. 348### 349### This means it is impossible to serve certain file:// urls on certain systems. 350### 351### Thus are the problems with a protocol-less specification. :-( 352### 353 354sub _parse_uri { 355 my $self = shift; 356 my $uri = shift or return; 357 358 my $href = { uri => $uri }; 359 360 ### find the scheme ### 361 $uri =~ s|^(\w+)://||; 362 $href->{scheme} = $1; 363 364 ### See rfc 1738 section 3.10 365 ### http://www.faqs.org/rfcs/rfc1738.html 366 ### And wikipedia for more on windows file:// urls 367 ### http://en.wikipedia.org/wiki/File:// 368 if( $href->{scheme} eq 'file' ) { 369 370 my @parts = split '/',$uri; 371 372 ### file://hostname/... 373 ### file://hostname/... 374 ### normalize file://localhost with file:/// 375 $href->{host} = $parts[0] || ''; 376 377 ### index in @parts where the path components begin; 378 my $index = 1; 379 380 ### file:////hostname/sharename/blah.txt 381 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) { 382 383 $href->{host} = $parts[2] || ''; # avoid warnings 384 $href->{share} = $parts[3] || ''; # avoid warnings 385 386 $index = 4 # index after the share 387 388 ### file:///D|/blah.txt 389 ### file:///D:/blah.txt 390 } elsif (HAS_VOL) { 391 392 ### this code comes from dmq's patch, but: 393 ### XXX if volume is empty, wouldn't that be an error? --kane 394 ### if so, our file://localhost test needs to be fixed as wel 395 $href->{vol} = $parts[1] || ''; 396 397 ### correct D| style colume descriptors 398 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN; 399 400 $index = 2; # index after the volume 401 } 402 403 ### rebuild the path from the leftover parts; 404 $href->{path} = join '/', '', splice( @parts, $index, $#parts ); 405 406 } else { 407 ### using anything but qw() in hash slices may produce warnings 408 ### in older perls :-( 409 @{$href}{ qw(userinfo host path) } = $uri =~ m|(?:([^\@:]*:[^\:\@]*)@)?([^/]*)(/.*)$|s; 410 } 411 412 ### split the path into file + dir ### 413 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} ); 414 $href->{path} = $parts[1]; 415 $href->{file} = $parts[2]; 416 } 417 418 ### host will be empty if the target was 'localhost' and the 419 ### scheme was 'file' 420 $href->{host} = '' if ($href->{host} eq 'localhost') and 421 ($href->{scheme} eq 'file'); 422 423 return $href; 424} 425 426=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] ) 427 428Fetches the file you requested and returns the full path to the file. 429 430By default it writes to C<cwd()>, but you can override that by specifying 431the C<to> argument: 432 433 ### file fetch to /tmp, full path to the file in $where 434 $where = $ff->fetch( to => '/tmp' ); 435 436 ### file slurped into $scalar, full path to the file in $where 437 ### file is downloaded to a temp directory and cleaned up at exit time 438 $where = $ff->fetch( to => \$scalar ); 439 440Returns the full path to the downloaded file on success, and false 441on failure. 442 443=cut 444 445sub fetch { 446 my $self = shift or return; 447 my %hash = @_; 448 449 my $target; 450 my $tmpl = { 451 to => { default => cwd(), store => \$target }, 452 }; 453 454 check( $tmpl, \%hash ) or return; 455 456 my ($to, $fh); 457 ### you want us to slurp the contents 458 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { 459 $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 ); 460 461 ### plain old fetch 462 } else { 463 $to = $target; 464 465 ### On VMS force to VMS format so File::Spec will work. 466 $to = VMS::Filespec::vmspath($to) if ON_VMS; 467 468 ### create the path if it doesn't exist yet ### 469 unless( -d $to ) { 470 eval { mkpath( $to ) }; 471 472 return $self->_error(loc("Could not create path '%1'",$to)) if $@; 473 } 474 } 475 476 ### set passive ftp if required ### 477 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE; 478 479 ### we dont use catfile on win32 because if we are using a cygwin tool 480 ### under cmd.exe they wont understand windows style separators. 481 my $out_to = ON_WIN ? $to.'/'.$self->output_file 482 : File::Spec->catfile( $to, $self->output_file ); 483 484 for my $method ( @{ $METHODS->{$self->scheme} } ) { 485 my $sub = '_'.$method.'_fetch'; 486 487 unless( __PACKAGE__->can($sub) ) { 488 $self->_error(loc("Cannot call method for '%1' -- WEIRD!", 489 $method)); 490 next; 491 } 492 493 ### method is blacklisted ### 494 next if grep { lc $_ eq $method } @$BLACKLIST; 495 496 ### method is known to fail ### 497 next if $METHOD_FAIL->{$method}; 498 499 ### there's serious issues with IPC::Run and quoting of command 500 ### line arguments. using quotes in the wrong place breaks things, 501 ### and in the case of say, 502 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document 503 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2" 504 ### it doesn't matter how you quote, it always fails. 505 local $IPC::Cmd::USE_IPC_RUN = 0; 506 507 if( my $file = $self->$sub( 508 to => $out_to 509 )){ 510 511 unless( -e $file && -s _ ) { 512 $self->_error(loc("'%1' said it fetched '%2', ". 513 "but it was not created",$method,$file)); 514 515 ### mark the failure ### 516 $METHOD_FAIL->{$method} = 1; 517 518 next; 519 520 } else { 521 522 ### slurp mode? 523 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { 524 525 ### open the file 526 open my $fh, "<$file" or do { 527 $self->_error( 528 loc("Could not open '%1': %2", $file, $!)); 529 return; 530 }; 531 532 ### slurp 533 $$target = do { local $/; <$fh> }; 534 535 } 536 537 my $abs = File::Spec->rel2abs( $file ); 538 return $abs; 539 540 } 541 } 542 } 543 544 545 ### if we got here, we looped over all methods, but we weren't able 546 ### to fetch it. 547 return; 548} 549 550######################## 551### _*_fetch methods ### 552######################## 553 554### LWP fetching ### 555sub _lwp_fetch { 556 my $self = shift; 557 my %hash = @_; 558 559 my ($to); 560 my $tmpl = { 561 to => { required => 1, store => \$to } 562 }; 563 check( $tmpl, \%hash ) or return; 564 565 ### modules required to download with lwp ### 566 my $use_list = { 567 LWP => '0.0', 568 'LWP::UserAgent' => '0.0', 569 'HTTP::Request' => '0.0', 570 'HTTP::Status' => '0.0', 571 URI => '0.0', 572 573 }; 574 575 if ($self->scheme eq 'https') { 576 $use_list->{'LWP::Protocol::https'} = '0'; 577 } 578 579 unless( can_load( modules => $use_list ) ) { 580 $METHOD_FAIL->{'lwp'} = 1; 581 return; 582 } 583 584 ### setup the uri object 585 my $uri = URI->new( File::Spec::Unix->catfile( 586 $self->path, $self->file 587 ) ); 588 589 ### special rules apply for file:// uris ### 590 $uri->scheme( $self->scheme ); 591 $uri->host( $self->scheme eq 'file' ? '' : $self->host ); 592 593 if ($self->userinfo) { 594 $uri->userinfo($self->userinfo); 595 } elsif ($self->scheme ne 'file') { 596 $uri->userinfo("anonymous:$FROM_EMAIL"); 597 } 598 599 ### set up the useragent object 600 my $ua = LWP::UserAgent->new(); 601 $ua->timeout( $TIMEOUT ) if $TIMEOUT; 602 $ua->agent( $USER_AGENT ); 603 $ua->from( $FROM_EMAIL ); 604 $ua->env_proxy; 605 606 my $res = $ua->mirror($uri, $to) or return; 607 608 ### uptodate or fetched ok ### 609 if ( $res->code == 304 or $res->code == 200 ) { 610 return $to; 611 612 } else { 613 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]", 614 $res->code, HTTP::Status::status_message($res->code), 615 $res->status_line)); 616 } 617 618} 619 620### HTTP::Tiny fetching ### 621sub _httptiny_fetch { 622 my $self = shift; 623 my %hash = @_; 624 625 my ($to); 626 my $tmpl = { 627 to => { required => 1, store => \$to } 628 }; 629 check( $tmpl, \%hash ) or return; 630 631 my $use_list = { 632 'HTTP::Tiny' => '0.008', 633 634 }; 635 636 unless( can_load(modules => $use_list) ) { 637 $METHOD_FAIL->{'httptiny'} = 1; 638 return; 639 } 640 641 my $uri = $self->uri; 642 643 my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) ); 644 645 my $rc = $http->mirror( $uri, $to ); 646 647 unless ( $rc->{success} ) { 648 649 return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]", 650 $rc->{status}, $rc->{reason} ) ); 651 652 } 653 654 return $to; 655 656} 657 658### HTTP::Lite fetching ### 659sub _httplite_fetch { 660 my $self = shift; 661 my %hash = @_; 662 663 my ($to); 664 my $tmpl = { 665 to => { required => 1, store => \$to } 666 }; 667 check( $tmpl, \%hash ) or return; 668 669 ### modules required to download with lwp ### 670 my $use_list = { 671 'HTTP::Lite' => '2.2', 672 'MIME::Base64' => '0', 673 }; 674 675 unless( can_load(modules => $use_list) ) { 676 $METHOD_FAIL->{'httplite'} = 1; 677 return; 678 } 679 680 my $uri = $self->uri; 681 my $retries = 0; 682 683 RETRIES: while ( $retries++ < 5 ) { 684 685 my $http = HTTP::Lite->new(); 686 # Naughty naughty but there isn't any accessor/setter 687 $http->{timeout} = $TIMEOUT if $TIMEOUT; 688 $http->http11_mode(1); 689 690 if ($self->userinfo) { 691 my $encoded = MIME::Base64::encode($self->userinfo, ''); 692 $http->add_req_header("Authorization", "Basic $encoded"); 693 } 694 695 my $fh = FileHandle->new; 696 697 unless ( $fh->open($to,'>') ) { 698 return $self->_error(loc( 699 "Could not open '%1' for writing: %2",$to,$!)); 700 } 701 702 $fh->autoflush(1); 703 704 binmode $fh; 705 706 my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh ); 707 708 close $fh; 709 710 if ( $rc == 301 || $rc == 302 ) { 711 my $loc; 712 HEADERS: for ($http->headers_array) { 713 /Location: (\S+)/ and $loc = $1, last HEADERS; 714 } 715 #$loc or last; # Think we should squeal here. 716 if ($loc =~ m!^/!) { 717 $uri =~ s{^(\w+?://[^/]+)/.*$}{$1}; 718 $uri .= $loc; 719 } 720 else { 721 $uri = $loc; 722 } 723 next RETRIES; 724 } 725 elsif ( $rc == 200 ) { 726 return $to; 727 } 728 else { 729 return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]", 730 $rc, $http->status_message)); 731 } 732 733 } # Loop for 5 retries. 734 735 return $self->_error("Fetch failed! Gave up after 5 tries"); 736 737} 738 739### Simple IO::Socket::INET fetching ### 740sub _iosock_fetch { 741 my $self = shift; 742 my %hash = @_; 743 744 my ($to); 745 my $tmpl = { 746 to => { required => 1, store => \$to } 747 }; 748 check( $tmpl, \%hash ) or return; 749 750 my $use_list = { 751 'IO::Socket::INET' => '0.0', 752 'IO::Select' => '0.0', 753 }; 754 755 unless( can_load(modules => $use_list) ) { 756 $METHOD_FAIL->{'iosock'} = 1; 757 return; 758 } 759 760 my $sock = IO::Socket::INET->new( 761 PeerHost => $self->host, 762 ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ), 763 ); 764 765 unless ( $sock ) { 766 return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!)); 767 } 768 769 my $fh = FileHandle->new; 770 771 # Check open() 772 773 unless ( $fh->open($to,'>') ) { 774 return $self->_error(loc( 775 "Could not open '%1' for writing: %2",$to,$!)); 776 } 777 778 $fh->autoflush(1); 779 binmode $fh; 780 781 my $path = File::Spec::Unix->catfile( $self->path, $self->file ); 782 my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a"; 783 $sock->send( $req ); 784 785 my $select = IO::Select->new( $sock ); 786 787 my $resp = ''; 788 my $normal = 0; 789 while ( $select->can_read( $TIMEOUT || 60 ) ) { 790 my $ret = $sock->sysread( $resp, 4096, length($resp) ); 791 if ( !defined $ret or $ret == 0 ) { 792 $select->remove( $sock ); 793 $normal++; 794 } 795 } 796 close $sock; 797 798 unless ( $normal ) { 799 return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 ))); 800 } 801 802 # Check the "response" 803 # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1) 804 $resp =~ s/^(\x0d?\x0a)+//; 805 # Check it is an HTTP response 806 unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) { 807 return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host)); 808 } 809 810 # Check for OK 811 my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i; 812 unless ( $code eq '200' ) { 813 return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host)); 814 } 815 816 { 817 local $\; 818 print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0]; 819 } 820 close $fh; 821 return $to; 822} 823 824### Net::FTP fetching 825sub _netftp_fetch { 826 my $self = shift; 827 my %hash = @_; 828 829 my ($to); 830 my $tmpl = { 831 to => { required => 1, store => \$to } 832 }; 833 check( $tmpl, \%hash ) or return; 834 835 ### required modules ### 836 my $use_list = { 'Net::FTP' => 0 }; 837 838 unless( can_load( modules => $use_list ) ) { 839 $METHOD_FAIL->{'netftp'} = 1; 840 return; 841 } 842 843 ### make connection ### 844 my $ftp; 845 my @options = ($self->host); 846 push(@options, Timeout => $TIMEOUT) if $TIMEOUT; 847 unless( $ftp = Net::FTP->new( @options ) ) { 848 return $self->_error(loc("Ftp creation failed: %1",$@)); 849 } 850 851 ### login ### 852 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) { 853 return $self->_error(loc("Could not login to '%1'",$self->host)); 854 } 855 856 ### set binary mode, just in case ### 857 $ftp->binary; 858 859 ### create the remote path 860 ### remember remote paths are unix paths! [#11483] 861 my $remote = File::Spec::Unix->catfile( $self->path, $self->file ); 862 863 ### fetch the file ### 864 my $target; 865 unless( $target = $ftp->get( $remote, $to ) ) { 866 return $self->_error(loc("Could not fetch '%1' from '%2'", 867 $remote, $self->host)); 868 } 869 870 ### log out ### 871 $ftp->quit; 872 873 return $target; 874 875} 876 877### /bin/wget fetch ### 878sub _wget_fetch { 879 my $self = shift; 880 my %hash = @_; 881 882 my ($to); 883 my $tmpl = { 884 to => { required => 1, store => \$to } 885 }; 886 check( $tmpl, \%hash ) or return; 887 888 my $wget; 889 ### see if we have a wget binary ### 890 unless( $wget = can_run('wget') ) { 891 $METHOD_FAIL->{'wget'} = 1; 892 return; 893 } 894 895 ### no verboseness, thanks ### 896 my $cmd = [ $wget, '--quiet' ]; 897 898 ### if a timeout is set, add it ### 899 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 900 901 ### run passive if specified ### 902 push @$cmd, '--passive-ftp' if $FTP_PASSIVE; 903 904 ### set the output document, add the uri ### 905 push @$cmd, '--output-document', $to, $self->uri; 906 907 ### with IPC::Cmd > 0.41, this is fixed in teh library, 908 ### and there's no need for special casing any more. 909 ### DO NOT quote things for IPC::Run, it breaks stuff. 910 # $IPC::Cmd::USE_IPC_RUN 911 # ? ($to, $self->uri) 912 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 913 914 ### shell out ### 915 my $captured; 916 unless(run( command => $cmd, 917 buffer => \$captured, 918 verbose => $DEBUG 919 )) { 920 ### wget creates the output document always, even if the fetch 921 ### fails.. so unlink it in that case 922 1 while unlink $to; 923 924 return $self->_error(loc( "Command failed: %1", $captured || '' )); 925 } 926 927 return $to; 928} 929 930### /bin/lftp fetch ### 931sub _lftp_fetch { 932 my $self = shift; 933 my %hash = @_; 934 935 my ($to); 936 my $tmpl = { 937 to => { required => 1, store => \$to } 938 }; 939 check( $tmpl, \%hash ) or return; 940 941 ### see if we have a lftp binary ### 942 my $lftp; 943 unless( $lftp = can_run('lftp') ) { 944 $METHOD_FAIL->{'lftp'} = 1; 945 return; 946 } 947 948 ### no verboseness, thanks ### 949 my $cmd = [ $lftp, '-f' ]; 950 951 my $fh = File::Temp->new; 952 953 my $str; 954 955 ### if a timeout is set, add it ### 956 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT; 957 958 ### run passive if specified ### 959 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE; 960 961 ### set the output document, add the uri ### 962 ### quote the URI, because lftp supports certain shell 963 ### expansions, most notably & for backgrounding. 964 ### ' quote does nto work, must be " 965 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/; 966 967 if( $DEBUG ) { 968 my $pp_str = join ' ', split $/, $str; 969 print "# lftp command: $pp_str\n"; 970 } 971 972 ### write straight to the file. 973 $fh->autoflush(1); 974 print $fh $str; 975 976 ### the command needs to be 1 string to be executed 977 push @$cmd, $fh->filename; 978 979 ### with IPC::Cmd > 0.41, this is fixed in teh library, 980 ### and there's no need for special casing any more. 981 ### DO NOT quote things for IPC::Run, it breaks stuff. 982 # $IPC::Cmd::USE_IPC_RUN 983 # ? ($to, $self->uri) 984 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 985 986 987 ### shell out ### 988 my $captured; 989 unless(run( command => $cmd, 990 buffer => \$captured, 991 verbose => $DEBUG 992 )) { 993 ### wget creates the output document always, even if the fetch 994 ### fails.. so unlink it in that case 995 1 while unlink $to; 996 997 return $self->_error(loc( "Command failed: %1", $captured || '' )); 998 } 999 1000 return $to; 1001} 1002 1003 1004 1005### /bin/ftp fetch ### 1006sub _ftp_fetch { 1007 my $self = shift; 1008 my %hash = @_; 1009 1010 my ($to); 1011 my $tmpl = { 1012 to => { required => 1, store => \$to } 1013 }; 1014 check( $tmpl, \%hash ) or return; 1015 1016 ### see if we have a ftp binary ### 1017 my $ftp; 1018 unless( $ftp = can_run('ftp') ) { 1019 $METHOD_FAIL->{'ftp'} = 1; 1020 return; 1021 } 1022 1023 my $fh = FileHandle->new; 1024 1025 local $SIG{CHLD} = 'IGNORE'; 1026 1027 unless ($fh->open("$ftp -n", '|-')) { 1028 return $self->_error(loc("%1 creation failed: %2", $ftp, $!)); 1029 } 1030 1031 my @dialog = ( 1032 "lcd " . dirname($to), 1033 "open " . $self->host, 1034 "user anonymous $FROM_EMAIL", 1035 "cd /", 1036 "cd " . $self->path, 1037 "binary", 1038 "get " . $self->file . " " . $self->output_file, 1039 "quit", 1040 ); 1041 1042 foreach (@dialog) { $fh->print($_, "\n") } 1043 $fh->close or return; 1044 1045 return $to; 1046} 1047 1048### lynx is stupid - it decompresses any .gz file it finds to be text 1049### use /bin/lynx to fetch files 1050sub _lynx_fetch { 1051 my $self = shift; 1052 my %hash = @_; 1053 1054 my ($to); 1055 my $tmpl = { 1056 to => { required => 1, store => \$to } 1057 }; 1058 check( $tmpl, \%hash ) or return; 1059 1060 ### see if we have a lynx binary ### 1061 my $lynx; 1062 unless ( $lynx = can_run('lynx') ){ 1063 $METHOD_FAIL->{'lynx'} = 1; 1064 return; 1065 } 1066 1067 unless( IPC::Cmd->can_capture_buffer ) { 1068 $METHOD_FAIL->{'lynx'} = 1; 1069 1070 return $self->_error(loc( 1071 "Can not capture buffers. Can not use '%1' to fetch files", 1072 'lynx' )); 1073 } 1074 1075 ### check if the HTTP resource exists ### 1076 if ($self->uri =~ /^https?:\/\//i) { 1077 my $cmd = [ 1078 $lynx, 1079 '-head', 1080 '-source', 1081 "-auth=anonymous:$FROM_EMAIL", 1082 ]; 1083 1084 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; 1085 1086 push @$cmd, $self->uri; 1087 1088 ### shell out ### 1089 my $head; 1090 unless(run( command => $cmd, 1091 buffer => \$head, 1092 verbose => $DEBUG ) 1093 ) { 1094 return $self->_error(loc("Command failed: %1", $head || '')); 1095 } 1096 1097 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) { 1098 return $self->_error(loc("Command failed: %1", $head || '')); 1099 } 1100 } 1101 1102 ### write to the output file ourselves, since lynx ass_u_mes to much 1103 my $local = FileHandle->new( $to, 'w' ) 1104 or return $self->_error(loc( 1105 "Could not open '%1' for writing: %2",$to,$!)); 1106 1107 ### dump to stdout ### 1108 my $cmd = [ 1109 $lynx, 1110 '-source', 1111 "-auth=anonymous:$FROM_EMAIL", 1112 ]; 1113 1114 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; 1115 1116 ### DO NOT quote things for IPC::Run, it breaks stuff. 1117 push @$cmd, $self->uri; 1118 1119 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1120 ### and there's no need for special casing any more. 1121 ### DO NOT quote things for IPC::Run, it breaks stuff. 1122 # $IPC::Cmd::USE_IPC_RUN 1123 # ? $self->uri 1124 # : QUOTE. $self->uri .QUOTE; 1125 1126 1127 ### shell out ### 1128 my $captured; 1129 unless(run( command => $cmd, 1130 buffer => \$captured, 1131 verbose => $DEBUG ) 1132 ) { 1133 return $self->_error(loc("Command failed: %1", $captured || '')); 1134 } 1135 1136 ### print to local file ### 1137 ### XXX on a 404 with a special error page, $captured will actually 1138 ### hold the contents of that page, and make it *appear* like the 1139 ### request was a success, when really it wasn't :( 1140 ### there doesn't seem to be an option for lynx to change the exit 1141 ### code based on a 4XX status or so. 1142 ### the closest we can come is using --error_file and parsing that, 1143 ### which is very unreliable ;( 1144 $local->print( $captured ); 1145 $local->close or return; 1146 1147 return $to; 1148} 1149 1150### use /bin/ncftp to fetch files 1151sub _ncftp_fetch { 1152 my $self = shift; 1153 my %hash = @_; 1154 1155 my ($to); 1156 my $tmpl = { 1157 to => { required => 1, store => \$to } 1158 }; 1159 check( $tmpl, \%hash ) or return; 1160 1161 ### we can only set passive mode in interactive sessions, so bail out 1162 ### if $FTP_PASSIVE is set 1163 return if $FTP_PASSIVE; 1164 1165 ### see if we have a ncftp binary ### 1166 my $ncftp; 1167 unless( $ncftp = can_run('ncftp') ) { 1168 $METHOD_FAIL->{'ncftp'} = 1; 1169 return; 1170 } 1171 1172 my $cmd = [ 1173 $ncftp, 1174 '-V', # do not be verbose 1175 '-p', $FROM_EMAIL, # email as password 1176 $self->host, # hostname 1177 dirname($to), # local dir for the file 1178 # remote path to the file 1179 ### DO NOT quote things for IPC::Run, it breaks stuff. 1180 $IPC::Cmd::USE_IPC_RUN 1181 ? File::Spec::Unix->catdir( $self->path, $self->file ) 1182 : QUOTE. File::Spec::Unix->catdir( 1183 $self->path, $self->file ) .QUOTE 1184 1185 ]; 1186 1187 ### shell out ### 1188 my $captured; 1189 unless(run( command => $cmd, 1190 buffer => \$captured, 1191 verbose => $DEBUG ) 1192 ) { 1193 return $self->_error(loc("Command failed: %1", $captured || '')); 1194 } 1195 1196 return $to; 1197 1198} 1199 1200### use /bin/curl to fetch files 1201sub _curl_fetch { 1202 my $self = shift; 1203 my %hash = @_; 1204 1205 my ($to); 1206 my $tmpl = { 1207 to => { required => 1, store => \$to } 1208 }; 1209 check( $tmpl, \%hash ) or return; 1210 my $curl; 1211 unless ( $curl = can_run('curl') ) { 1212 $METHOD_FAIL->{'curl'} = 1; 1213 return; 1214 } 1215 1216 ### these long opts are self explanatory - I like that -jmb 1217 my $cmd = [ $curl, '-q' ]; 1218 1219 push(@$cmd, '-4') if $^O eq 'netbsd' && $FORCEIPV4; # only seen this on NetBSD so far 1220 1221 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT; 1222 1223 push(@$cmd, '--silent') unless $DEBUG; 1224 1225 ### curl does the right thing with passive, regardless ### 1226 if ($self->scheme eq 'ftp') { 1227 push(@$cmd, '--user', "anonymous:$FROM_EMAIL"); 1228 } 1229 1230 ### curl doesn't follow 302 (temporarily moved) etc automatically 1231 ### so we add --location to enable that. 1232 push @$cmd, '--fail', '--location', '--output', $to, $self->uri; 1233 1234 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1235 ### and there's no need for special casing any more. 1236 ### DO NOT quote things for IPC::Run, it breaks stuff. 1237 # $IPC::Cmd::USE_IPC_RUN 1238 # ? ($to, $self->uri) 1239 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1240 1241 1242 my $captured; 1243 unless(run( command => $cmd, 1244 buffer => \$captured, 1245 verbose => $DEBUG ) 1246 ) { 1247 1248 return $self->_error(loc("Command failed: %1", $captured || '')); 1249 } 1250 1251 return $to; 1252 1253} 1254 1255### /usr/bin/fetch fetch! ### 1256sub _fetch_fetch { 1257 my $self = shift; 1258 my %hash = @_; 1259 1260 my ($to); 1261 my $tmpl = { 1262 to => { required => 1, store => \$to } 1263 }; 1264 check( $tmpl, \%hash ) or return; 1265 1266 ### see if we have a fetch binary ### 1267 my $fetch; 1268 unless( HAS_FETCH and $fetch = can_run('fetch') ) { 1269 $METHOD_FAIL->{'fetch'} = 1; 1270 return; 1271 } 1272 1273 ### no verboseness, thanks ### 1274 my $cmd = [ $fetch, '-q' ]; 1275 1276 ### if a timeout is set, add it ### 1277 push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT; 1278 1279 ### run passive if specified ### 1280 #push @$cmd, '-p' if $FTP_PASSIVE; 1281 local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE; 1282 1283 ### set the output document, add the uri ### 1284 push @$cmd, '-o', $to, $self->uri; 1285 1286 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1287 ### and there's no need for special casing any more. 1288 ### DO NOT quote things for IPC::Run, it breaks stuff. 1289 # $IPC::Cmd::USE_IPC_RUN 1290 # ? ($to, $self->uri) 1291 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1292 1293 ### shell out ### 1294 my $captured; 1295 unless(run( command => $cmd, 1296 buffer => \$captured, 1297 verbose => $DEBUG 1298 )) { 1299 ### wget creates the output document always, even if the fetch 1300 ### fails.. so unlink it in that case 1301 1 while unlink $to; 1302 1303 return $self->_error(loc( "Command failed: %1", $captured || '' )); 1304 } 1305 1306 return $to; 1307} 1308 1309### use File::Copy for fetching file:// urls ### 1310### 1311### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html) 1312### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://) 1313### 1314 1315sub _file_fetch { 1316 my $self = shift; 1317 my %hash = @_; 1318 1319 my ($to); 1320 my $tmpl = { 1321 to => { required => 1, store => \$to } 1322 }; 1323 check( $tmpl, \%hash ) or return; 1324 1325 1326 1327 ### prefix a / on unix systems with a file uri, since it would 1328 ### look somewhat like this: 1329 ### file:///home/kane/file 1330 ### whereas windows file uris for 'c:\some\dir\file' might look like: 1331 ### file:///C:/some/dir/file 1332 ### file:///C|/some/dir/file 1333 ### or for a network share '\\host\share\some\dir\file': 1334 ### file:////host/share/some/dir/file 1335 ### 1336 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like: 1337 ### file://vms.host.edu/disk$user/my/notes/note12345.txt 1338 ### 1339 1340 my $path = $self->path; 1341 my $vol = $self->vol; 1342 my $share = $self->share; 1343 1344 my $remote; 1345 if (!$share and $self->host) { 1346 return $self->_error(loc( 1347 "Currently %1 cannot handle hosts in %2 urls", 1348 'File::Fetch', 'file://' 1349 )); 1350 } 1351 1352 if( $vol ) { 1353 $path = File::Spec->catdir( split /\//, $path ); 1354 $remote = File::Spec->catpath( $vol, $path, $self->file); 1355 1356 } elsif( $share ) { 1357 ### win32 specific, and a share name, so we wont bother with File::Spec 1358 $path =~ s|/+|\\|g; 1359 $remote = "\\\\".$self->host."\\$share\\$path"; 1360 1361 } else { 1362 ### File::Spec on VMS can not currently handle UNIX syntax. 1363 my $file_class = ON_VMS 1364 ? 'File::Spec::Unix' 1365 : 'File::Spec'; 1366 1367 $remote = $file_class->catfile( $path, $self->file ); 1368 } 1369 1370 ### File::Copy is littered with 'die' statements :( ### 1371 my $rv = eval { File::Copy::copy( $remote, $to ) }; 1372 1373 ### something went wrong ### 1374 if( !$rv or $@ ) { 1375 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4", 1376 $remote, $to, $!, $@)); 1377 } 1378 1379 return $to; 1380} 1381 1382### use /usr/bin/rsync to fetch files 1383sub _rsync_fetch { 1384 my $self = shift; 1385 my %hash = @_; 1386 1387 my ($to); 1388 my $tmpl = { 1389 to => { required => 1, store => \$to } 1390 }; 1391 check( $tmpl, \%hash ) or return; 1392 my $rsync; 1393 unless ( $rsync = can_run('rsync') ) { 1394 $METHOD_FAIL->{'rsync'} = 1; 1395 return; 1396 } 1397 1398 my $cmd = [ $rsync ]; 1399 1400 ### XXX: rsync has no I/O timeouts at all, by default 1401 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 1402 1403 push(@$cmd, '--quiet') unless $DEBUG; 1404 1405 ### DO NOT quote things for IPC::Run, it breaks stuff. 1406 push @$cmd, $self->uri, $to; 1407 1408 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1409 ### and there's no need for special casing any more. 1410 ### DO NOT quote things for IPC::Run, it breaks stuff. 1411 # $IPC::Cmd::USE_IPC_RUN 1412 # ? ($to, $self->uri) 1413 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1414 1415 my $captured; 1416 unless(run( command => $cmd, 1417 buffer => \$captured, 1418 verbose => $DEBUG ) 1419 ) { 1420 1421 return $self->_error(loc("Command %1 failed: %2", 1422 "@$cmd" || '', $captured || '')); 1423 } 1424 1425 return $to; 1426 1427} 1428 1429### use git to fetch files 1430sub _git_fetch { 1431 my $self = shift; 1432 my %hash = @_; 1433 1434 my ($to); 1435 my $tmpl = { 1436 to => { required => 1, store => \$to } 1437 }; 1438 check( $tmpl, \%hash ) or return; 1439 my $git; 1440 unless ( $git = can_run('git') ) { 1441 $METHOD_FAIL->{'git'} = 1; 1442 return; 1443 } 1444 1445 my $cmd = [ $git, 'clone' ]; 1446 1447 #push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 1448 1449 push(@$cmd, '--quiet') unless $DEBUG; 1450 1451 ### DO NOT quote things for IPC::Run, it breaks stuff. 1452 push @$cmd, $self->uri, $to; 1453 1454 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1455 ### and there's no need for special casing any more. 1456 ### DO NOT quote things for IPC::Run, it breaks stuff. 1457 # $IPC::Cmd::USE_IPC_RUN 1458 # ? ($to, $self->uri) 1459 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1460 1461 my $captured; 1462 unless(run( command => $cmd, 1463 buffer => \$captured, 1464 verbose => $DEBUG ) 1465 ) { 1466 1467 return $self->_error(loc("Command %1 failed: %2", 1468 "@$cmd" || '', $captured || '')); 1469 } 1470 1471 return $to; 1472 1473} 1474 1475################################# 1476# 1477# Error code 1478# 1479################################# 1480 1481=pod 1482 1483=head2 $ff->error([BOOL]) 1484 1485Returns the last encountered error as string. 1486Pass it a true value to get the C<Carp::longmess()> output instead. 1487 1488=cut 1489 1490### error handling the way Archive::Extract does it 1491sub _error { 1492 my $self = shift; 1493 my $error = shift; 1494 1495 $self->_error_msg( $error ); 1496 $self->_error_msg_long( Carp::longmess($error) ); 1497 1498 if( $WARN ) { 1499 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg; 1500 } 1501 1502 return; 1503} 1504 1505sub error { 1506 my $self = shift; 1507 return shift() ? $self->_error_msg_long : $self->_error_msg; 1508} 1509 1510 15111; 1512 1513=pod 1514 1515=head1 HOW IT WORKS 1516 1517File::Fetch is able to fetch a variety of uris, by using several 1518external programs and modules. 1519 1520Below is a mapping of what utilities will be used in what order 1521for what schemes, if available: 1522 1523 file => LWP, lftp, file 1524 http => LWP, HTTP::Tiny, wget, curl, lftp, fetch, HTTP::Lite, lynx, iosock 1525 ftp => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp 1526 rsync => rsync 1527 git => git 1528 1529If you'd like to disable the use of one or more of these utilities 1530and/or modules, see the C<$BLACKLIST> variable further down. 1531 1532If a utility or module isn't available, it will be marked in a cache 1533(see the C<$METHOD_FAIL> variable further down), so it will not be 1534tried again. The C<fetch> method will only fail when all options are 1535exhausted, and it was not able to retrieve the file. 1536 1537The C<fetch> utility is available on FreeBSD. NetBSD and Dragonfly BSD 1538may also have it from C<pkgsrc>. We only check for C<fetch> on those 1539three platforms. 1540 1541C<iosock> is a very limited L<IO::Socket::INET> based mechanism for 1542retrieving C<http> schemed urls. It doesn't follow redirects for instance. 1543 1544C<git> only supports C<git://> style urls. 1545 1546A special note about fetching files from an ftp uri: 1547 1548By default, all ftp connections are done in passive mode. To change 1549that, see the C<$FTP_PASSIVE> variable further down. 1550 1551Furthermore, ftp uris only support anonymous connections, so no 1552named user/password pair can be passed along. 1553 1554C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable 1555further down. 1556 1557=head1 GLOBAL VARIABLES 1558 1559The behaviour of File::Fetch can be altered by changing the following 1560global variables: 1561 1562=head2 $File::Fetch::FROM_EMAIL 1563 1564This is the email address that will be sent as your anonymous ftp 1565password. 1566 1567Default is C<File-Fetch@example.com>. 1568 1569=head2 $File::Fetch::USER_AGENT 1570 1571This is the useragent as C<LWP> will report it. 1572 1573Default is C<File::Fetch/$VERSION>. 1574 1575=head2 $File::Fetch::FTP_PASSIVE 1576 1577This variable controls whether the environment variable C<FTP_PASSIVE> 1578and any passive switches to commandline tools will be set to true. 1579 1580Default value is 1. 1581 1582Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch 1583files, since passive mode can only be set interactively for this binary 1584 1585=head2 $File::Fetch::TIMEOUT 1586 1587When set, controls the network timeout (counted in seconds). 1588 1589Default value is 0. 1590 1591=head2 $File::Fetch::WARN 1592 1593This variable controls whether errors encountered internally by 1594C<File::Fetch> should be C<carp>'d or not. 1595 1596Set to false to silence warnings. Inspect the output of the C<error()> 1597method manually to see what went wrong. 1598 1599Defaults to C<true>. 1600 1601=head2 $File::Fetch::DEBUG 1602 1603This enables debugging output when calling commandline utilities to 1604fetch files. 1605This also enables C<Carp::longmess> errors, instead of the regular 1606C<carp> errors. 1607 1608Good for tracking down why things don't work with your particular 1609setup. 1610 1611Default is 0. 1612 1613=head2 $File::Fetch::BLACKLIST 1614 1615This is an array ref holding blacklisted modules/utilities for fetching 1616files with. 1617 1618To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could 1619set $File::Fetch::BLACKLIST to: 1620 1621 $File::Fetch::BLACKLIST = [qw|lwp netftp|] 1622 1623The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable. 1624 1625See the note on C<MAPPING> below. 1626 1627=head2 $File::Fetch::METHOD_FAIL 1628 1629This is a hashref registering what modules/utilities were known to fail 1630for fetching files (mostly because they weren't installed). 1631 1632You can reset this cache by assigning an empty hashref to it, or 1633individually remove keys. 1634 1635See the note on C<MAPPING> below. 1636 1637=head1 MAPPING 1638 1639 1640Here's a quick mapping for the utilities/modules, and their names for 1641the $BLACKLIST, $METHOD_FAIL and other internal functions. 1642 1643 LWP => lwp 1644 HTTP::Lite => httplite 1645 HTTP::Tiny => httptiny 1646 Net::FTP => netftp 1647 wget => wget 1648 lynx => lynx 1649 ncftp => ncftp 1650 ftp => ftp 1651 curl => curl 1652 rsync => rsync 1653 lftp => lftp 1654 fetch => fetch 1655 IO::Socket => iosock 1656 1657=head1 FREQUENTLY ASKED QUESTIONS 1658 1659=head2 So how do I use a proxy with File::Fetch? 1660 1661C<File::Fetch> currently only supports proxies with LWP::UserAgent. 1662You will need to set your environment variables accordingly. For 1663example, to use an ftp proxy: 1664 1665 $ENV{ftp_proxy} = 'foo.com'; 1666 1667Refer to the LWP::UserAgent manpage for more details. 1668 1669=head2 I used 'lynx' to fetch a file, but its contents is all wrong! 1670 1671C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>, 1672which we in turn capture. If that content is a 'custom' error file 1673(like, say, a C<404 handler>), you will get that contents instead. 1674 1675Sadly, C<lynx> doesn't support any options to return a different exit 1676code on non-C<200 OK> status, giving us no way to tell the difference 1677between a 'successful' fetch and a custom error page. 1678 1679Therefor, we recommend to only use C<lynx> as a last resort. This is 1680why it is at the back of our list of methods to try as well. 1681 1682=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do? 1683 1684C<File::Fetch> is relatively smart about things. When trying to write 1685a file to disk, it removes the C<query parameters> (see the 1686C<output_file> method for details) from the file name before creating 1687it. In most cases this suffices. 1688 1689If you have any other characters you need to escape, please install 1690the C<URI::Escape> module from CPAN, and pre-encode your URI before 1691passing it to C<File::Fetch>. You can read about the details of URIs 1692and URI encoding here: 1693 1694 http://www.faqs.org/rfcs/rfc2396.html 1695 1696=head1 TODO 1697 1698=over 4 1699 1700=item Implement $PREFER_BIN 1701 1702To indicate to rather use commandline tools than modules 1703 1704=back 1705 1706=head1 BUG REPORTS 1707 1708Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>. 1709 1710=head1 AUTHOR 1711 1712This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 1713 1714=head1 COPYRIGHT 1715 1716This library is free software; you may redistribute and/or modify it 1717under the same terms as Perl itself. 1718 1719 1720=cut 1721 1722# Local variables: 1723# c-indentation-style: bsd 1724# c-basic-offset: 4 1725# indent-tabs-mode: nil 1726# End: 1727# vim: expandtab shiftwidth=4: 1728 1729 1730 1731 1732