1package File::Fetch; 2 3use strict; 4use FileHandle; 5use File::Temp; 6use File::Copy; 7use File::Spec; 8use File::Spec::Unix; 9use File::Basename qw[dirname]; 10 11use Cwd qw[cwd]; 12use Carp qw[carp]; 13use IPC::Cmd qw[can_run run QUOTE]; 14use File::Path qw[mkpath]; 15use File::Temp qw[tempdir]; 16use Params::Check qw[check]; 17use Module::Load::Conditional qw[can_load]; 18use Locale::Maketext::Simple Style => 'gettext'; 19 20use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT 21 $BLACKLIST $METHOD_FAIL $VERSION $METHODS 22 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN $FORCEIPV4 23 ]; 24 25$VERSION = '1.04'; 26$VERSION = eval $VERSION; # avoid warnings with development releases 27$PREFER_BIN = 0; # XXX TODO implement 28$FROM_EMAIL = 'File-Fetch@example.com'; 29$USER_AGENT = "File::Fetch/$VERSION"; 30$BLACKLIST = [qw|ftp|]; 31push @$BLACKLIST, qw|lftp| if $^O eq 'dragonfly' || $^O eq 'hpux'; 32$METHOD_FAIL = { }; 33$FTP_PASSIVE = 1; 34$TIMEOUT = 0; 35$DEBUG = 0; 36$WARN = 1; 37$FORCEIPV4 = 0; 38 39### methods available to fetch the file depending on the scheme 40$METHODS = { 41 http => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ], 42 https => [ qw|lwp wget curl| ], 43 ftp => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ], 44 file => [ qw|lwp lftp file| ], 45 rsync => [ qw|rsync| ], 46 git => [ qw|git| ], 47}; 48 49### silly warnings ### 50local $Params::Check::VERBOSE = 1; 51local $Params::Check::VERBOSE = 1; 52local $Module::Load::Conditional::VERBOSE = 0; 53local $Module::Load::Conditional::VERBOSE = 0; 54 55### see what OS we are on, important for file:// uris ### 56use constant ON_WIN => ($^O eq 'MSWin32'); 57use constant ON_VMS => ($^O eq 'VMS'); 58use constant ON_UNIX => (!ON_WIN); 59use constant HAS_VOL => (ON_WIN); 60use constant HAS_SHARE => (ON_WIN); 61use constant HAS_FETCH => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! ); 62 63=pod 64 65=head1 NAME 66 67File::Fetch - A generic file fetching mechanism 68 69=head1 SYNOPSIS 70 71 use File::Fetch; 72 73 ### build a File::Fetch object ### 74 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt'); 75 76 ### fetch the uri to cwd() ### 77 my $where = $ff->fetch() or die $ff->error; 78 79 ### fetch the uri to /tmp ### 80 my $where = $ff->fetch( to => '/tmp' ); 81 82 ### parsed bits from the uri ### 83 $ff->uri; 84 $ff->scheme; 85 $ff->host; 86 $ff->path; 87 $ff->file; 88 89=head1 DESCRIPTION 90 91File::Fetch is a generic file fetching mechanism. 92 93It allows you to fetch any file pointed to by a C<ftp>, C<http>, 94C<file>, C<git> or C<rsync> uri by a number of different means. 95 96See the C<HOW IT WORKS> section further down for details. 97 98=head1 ACCESSORS 99 100A C<File::Fetch> object has the following accessors 101 102=over 4 103 104=item $ff->uri 105 106The uri you passed to the constructor 107 108=item $ff->scheme 109 110The scheme from the uri (like 'file', 'http', etc) 111 112=item $ff->host 113 114The hostname in the uri. Will be empty if host was originally 115'localhost' for a 'file://' url. 116 117=item $ff->vol 118 119On operating systems with the concept of a volume the second element 120of a file:// is considered to the be volume specification for the file. 121Thus on Win32 this routine returns the volume, on other operating 122systems this returns nothing. 123 124On Windows this value may be empty if the uri is to a network share, in 125which case the 'share' property will be defined. Additionally, volume 126specifications that use '|' as ':' will be converted on read to use ':'. 127 128On VMS, which has a volume concept, this field will be empty because VMS 129file specifications are converted to absolute UNIX format and the volume 130information is transparently included. 131 132=item $ff->share 133 134On systems with the concept of a network share (currently only Windows) returns 135the sharename from a file://// url. On other operating systems returns empty. 136 137=item $ff->path 138 139The path from the uri, will be at least a single '/'. 140 141=item $ff->file 142 143The name of the remote file. For the local file name, the 144result of $ff->output_file will be used. 145 146=item $ff->file_default 147 148The name of the default local file, that $ff->output_file falls back to if 149it would otherwise return no filename. For example when fetching a URI like 150http://www.abc.net.au/ the contents retrieved may be from a remote file called 151'index.html'. The default value of this attribute is literally 'file_default'. 152 153=cut 154 155 156########################## 157### Object & Accessors ### 158########################## 159 160{ 161 ### template for autogenerated accessors ### 162 my $Tmpl = { 163 scheme => { default => 'http' }, 164 host => { default => 'localhost' }, 165 path => { default => '/' }, 166 file => { required => 1 }, 167 uri => { required => 1 }, 168 userinfo => { default => '' }, 169 vol => { default => '' }, # windows for file:// uris 170 share => { default => '' }, # windows for file:// uris 171 file_default => { default => 'file_default' }, 172 tempdir_root => { required => 1 }, # Should be lazy-set at ->new() 173 _error_msg => { no_override => 1 }, 174 _error_msg_long => { no_override => 1 }, 175 }; 176 177 for my $method ( keys %$Tmpl ) { 178 no strict 'refs'; 179 *$method = sub { 180 my $self = shift; 181 $self->{$method} = $_[0] if @_; 182 return $self->{$method}; 183 } 184 } 185 186 sub _create { 187 my $class = shift; 188 my %hash = @_; 189 190 my $args = check( $Tmpl, \%hash ) or return; 191 192 bless $args, $class; 193 194 if( lc($args->scheme) ne 'file' and not $args->host ) { 195 return $class->_error(loc( 196 "Hostname required when fetching from '%1'",$args->scheme)); 197 } 198 199 for (qw[path]) { 200 unless( $args->$_() ) { # 5.5.x needs the () 201 return $class->_error(loc("No '%1' specified",$_)); 202 } 203 } 204 205 return $args; 206 } 207} 208 209=item $ff->output_file 210 211The name of the output file. This is the same as $ff->file, 212but any query parameters are stripped off. For example: 213 214 http://example.com/index.html?x=y 215 216would make the output file be C<index.html> rather than 217C<index.html?x=y>. 218 219=back 220 221=cut 222 223sub output_file { 224 my $self = shift; 225 my $file = $self->file; 226 227 $file =~ s/\?.*$//g; 228 229 $file ||= $self->file_default; 230 231 return $file; 232} 233 234### XXX do this or just point to URI::Escape? 235# =head2 $esc_uri = $ff->escaped_uri 236# 237# =cut 238# 239# ### most of this is stolen straight from URI::escape 240# { ### Build a char->hex map 241# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255; 242# 243# sub escaped_uri { 244# my $self = shift; 245# my $uri = $self->uri; 246# 247# ### Default unsafe characters. RFC 2732 ^(uric - reserved) 248# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/ 249# $escapes{$1} || $self->_fail_hi($1)/ge; 250# 251# return $uri; 252# } 253# 254# sub _fail_hi { 255# my $self = shift; 256# my $char = shift; 257# 258# $self->_error(loc( 259# "Can't escape '%1', try using the '%2' module instead", 260# sprintf("\\x{%04X}", ord($char)), 'URI::Escape' 261# )); 262# } 263# 264# sub output_file { 265# 266# } 267# 268# 269# } 270 271=head1 METHODS 272 273=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' ); 274 275Parses the uri and creates a corresponding File::Fetch::Item object, 276that is ready to be C<fetch>ed and returns it. 277 278Returns false on failure. 279 280=cut 281 282sub new { 283 my $class = shift; 284 my %hash = @_; 285 286 my ($uri, $file_default, $tempdir_root); 287 my $tmpl = { 288 uri => { required => 1, store => \$uri }, 289 file_default => { required => 0, store => \$file_default }, 290 tempdir_root => { required => 0, store => \$tempdir_root }, 291 }; 292 293 check( $tmpl, \%hash ) or return; 294 295 ### parse the uri to usable parts ### 296 my $href = $class->_parse_uri( $uri ) or return; 297 298 $href->{file_default} = $file_default if $file_default; 299 $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root; 300 $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd ) if not $href->{tempdir_root}; 301 302 ### make it into a FFI object ### 303 my $ff = $class->_create( %$href ) or return; 304 305 306 ### return the object ### 307 return $ff; 308} 309 310### parses an uri to a hash structure: 311### 312### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' ) 313### 314### becomes: 315### 316### $href = { 317### scheme => 'ftp', 318### host => 'ftp.cpan.org', 319### path => '/pub/mirror', 320### file => 'index.html' 321### }; 322### 323### In the case of file:// urls there maybe be additional fields 324### 325### For systems with volume specifications such as Win32 there will be 326### a volume specifier provided in the 'vol' field. 327### 328### 'vol' => 'volumename' 329### 330### For windows file shares there may be a 'share' key specified 331### 332### 'share' => 'sharename' 333### 334### Note that the rules of what a file:// url means vary by the operating system 335### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious 336### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and 337### not '/foo/bar.txt' 338### 339### Similarly if the host interpreting the url is VMS then 340### file:///disk$user/my/notes/note12345.txt' means 341### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as 342### if it is unix where it means /disk$user/my/notes/note12345.txt'. 343### Except for some cases in the File::Spec methods, Perl on VMS will generally 344### handle UNIX format file specifications. 345### 346### This means it is impossible to serve certain file:// urls on certain systems. 347### 348### Thus are the problems with a protocol-less specification. :-( 349### 350 351sub _parse_uri { 352 my $self = shift; 353 my $uri = shift or return; 354 355 my $href = { uri => $uri }; 356 357 ### find the scheme ### 358 $uri =~ s|^(\w+)://||; 359 $href->{scheme} = $1; 360 361 ### See rfc 1738 section 3.10 362 ### https://datatracker.ietf.org/doc/html/rfc1738#section-3.10 363 ### And wikipedia for more on windows file:// urls 364 ### http://en.wikipedia.org/wiki/File:// 365 if( $href->{scheme} eq 'file' ) { 366 367 my @parts = split '/',$uri; 368 369 ### file://hostname/... 370 ### file://hostname/... 371 ### normalize file://localhost with file:/// 372 $href->{host} = $parts[0] || ''; 373 374 ### index in @parts where the path components begin; 375 my $index = 1; 376 377 ### file:////hostname/sharename/blah.txt 378 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) { 379 380 $href->{host} = $parts[2] || ''; # avoid warnings 381 $href->{share} = $parts[3] || ''; # avoid warnings 382 383 $index = 4 # index after the share 384 385 ### file:///D|/blah.txt 386 ### file:///D:/blah.txt 387 } elsif (HAS_VOL) { 388 389 ### this code comes from dmq's patch, but: 390 ### XXX if volume is empty, wouldn't that be an error? --kane 391 ### if so, our file://localhost test needs to be fixed as wel 392 $href->{vol} = $parts[1] || ''; 393 394 ### correct D| style colume descriptors 395 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN; 396 397 $index = 2; # index after the volume 398 } 399 400 ### rebuild the path from the leftover parts; 401 $href->{path} = join '/', '', splice( @parts, $index, $#parts ); 402 403 } else { 404 ### using anything but qw() in hash slices may produce warnings 405 ### in older perls :-( 406 @{$href}{ qw(userinfo host path) } = $uri =~ m|(?:([^\@:]*:[^\:\@]*)@)?([^/]*)(/.*)$|s; 407 } 408 409 ### split the path into file + dir ### 410 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} ); 411 $href->{path} = $parts[1]; 412 $href->{file} = $parts[2]; 413 } 414 415 ### host will be empty if the target was 'localhost' and the 416 ### scheme was 'file' 417 $href->{host} = '' if ($href->{host} eq 'localhost') and 418 ($href->{scheme} eq 'file'); 419 420 return $href; 421} 422 423=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] ) 424 425Fetches the file you requested and returns the full path to the file. 426 427By default it writes to C<cwd()>, but you can override that by specifying 428the C<to> argument: 429 430 ### file fetch to /tmp, full path to the file in $where 431 $where = $ff->fetch( to => '/tmp' ); 432 433 ### file slurped into $scalar, full path to the file in $where 434 ### file is downloaded to a temp directory and cleaned up at exit time 435 $where = $ff->fetch( to => \$scalar ); 436 437Returns the full path to the downloaded file on success, and false 438on failure. 439 440=cut 441 442sub fetch { 443 my $self = shift or return; 444 my %hash = @_; 445 446 my $target; 447 my $tmpl = { 448 to => { default => cwd(), store => \$target }, 449 }; 450 451 check( $tmpl, \%hash ) or return; 452 453 my ($to, $fh); 454 ### you want us to slurp the contents 455 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { 456 $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 ); 457 458 ### plain old fetch 459 } else { 460 $to = $target; 461 462 ### On VMS force to VMS format so File::Spec will work. 463 $to = VMS::Filespec::vmspath($to) if ON_VMS; 464 465 ### create the path if it doesn't exist yet ### 466 unless( -d $to ) { 467 eval { mkpath( $to ) }; 468 469 return $self->_error(loc("Could not create path '%1'",$to)) if $@; 470 } 471 } 472 473 ### set passive ftp if required ### 474 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE; 475 476 ### we dont use catfile on win32 because if we are using a cygwin tool 477 ### under cmd.exe they wont understand windows style separators. 478 my $out_to = ON_WIN ? $to.'/'.$self->output_file 479 : File::Spec->catfile( $to, $self->output_file ); 480 481 for my $method ( @{ $METHODS->{$self->scheme} } ) { 482 my $sub = '_'.$method.'_fetch'; 483 484 unless( __PACKAGE__->can($sub) ) { 485 $self->_error(loc("Cannot call method for '%1' -- WEIRD!", 486 $method)); 487 next; 488 } 489 490 ### method is blacklisted ### 491 next if grep { lc $_ eq $method } @$BLACKLIST; 492 493 ### method is known to fail ### 494 next if $METHOD_FAIL->{$method}; 495 496 ### there's serious issues with IPC::Run and quoting of command 497 ### line arguments. using quotes in the wrong place breaks things, 498 ### and in the case of say, 499 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document 500 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2" 501 ### it doesn't matter how you quote, it always fails. 502 local $IPC::Cmd::USE_IPC_RUN = 0; 503 504 if( my $file = $self->$sub( 505 to => $out_to 506 )){ 507 508 unless( -e $file && -s _ ) { 509 $self->_error(loc("'%1' said it fetched '%2', ". 510 "but it was not created",$method,$file)); 511 512 ### mark the failure ### 513 $METHOD_FAIL->{$method} = 1; 514 515 next; 516 517 } else { 518 519 ### slurp mode? 520 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { 521 522 ### open the file 523 open my $fh, "<$file" or do { 524 $self->_error( 525 loc("Could not open '%1': %2", $file, $!)); 526 return; 527 }; 528 529 ### slurp 530 $$target = do { local $/; <$fh> }; 531 532 } 533 534 my $abs = File::Spec->rel2abs( $file ); 535 return $abs; 536 537 } 538 } 539 } 540 541 542 ### if we got here, we looped over all methods, but we weren't able 543 ### to fetch it. 544 return; 545} 546 547######################## 548### _*_fetch methods ### 549######################## 550 551### LWP fetching ### 552sub _lwp_fetch { 553 my $self = shift; 554 my %hash = @_; 555 556 my ($to); 557 my $tmpl = { 558 to => { required => 1, store => \$to } 559 }; 560 check( $tmpl, \%hash ) or return; 561 562 ### modules required to download with lwp ### 563 my $use_list = { 564 LWP => '0.0', 565 'LWP::UserAgent' => '0.0', 566 'HTTP::Request' => '0.0', 567 'HTTP::Status' => '0.0', 568 URI => '0.0', 569 570 }; 571 572 if ($self->scheme eq 'https') { 573 $use_list->{'LWP::Protocol::https'} = '0'; 574 } 575 576 ### Fix CVE-2016-1238 ### 577 local $Module::Load::Conditional::FORCE_SAFE_INC = 1; 578 unless( can_load( modules => $use_list ) ) { 579 $METHOD_FAIL->{'lwp'} = 1; 580 return; 581 } 582 583 ### setup the uri object 584 my $uri = URI->new( File::Spec::Unix->catfile( 585 $self->path, $self->file 586 ) ); 587 588 ### special rules apply for file:// uris ### 589 $uri->scheme( $self->scheme ); 590 $uri->host( $self->scheme eq 'file' ? '' : $self->host ); 591 592 if ($self->userinfo) { 593 $uri->userinfo($self->userinfo); 594 } elsif ($self->scheme ne 'file') { 595 $uri->userinfo("anonymous:$FROM_EMAIL"); 596 } 597 598 ### set up the useragent object 599 my $ua = LWP::UserAgent->new(); 600 $ua->timeout( $TIMEOUT ) if $TIMEOUT; 601 $ua->agent( $USER_AGENT ); 602 $ua->from( $FROM_EMAIL ); 603 $ua->env_proxy; 604 605 my $res = $ua->mirror($uri, $to) or return; 606 607 ### uptodate or fetched ok ### 608 if ( $res->code == 304 or $res->code == 200 ) { 609 return $to; 610 611 } else { 612 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]", 613 $res->code, HTTP::Status::status_message($res->code), 614 $res->status_line)); 615 } 616 617} 618 619### HTTP::Tiny fetching ### 620sub _httptiny_fetch { 621 my $self = shift; 622 my %hash = @_; 623 624 my ($to); 625 my $tmpl = { 626 to => { required => 1, store => \$to } 627 }; 628 check( $tmpl, \%hash ) or return; 629 630 my $use_list = { 631 'HTTP::Tiny' => '0.008', 632 633 }; 634 635 ### Fix CVE-2016-1238 ### 636 local $Module::Load::Conditional::FORCE_SAFE_INC = 1; 637 unless( can_load(modules => $use_list) ) { 638 $METHOD_FAIL->{'httptiny'} = 1; 639 return; 640 } 641 642 my $uri = $self->uri; 643 644 my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) ); 645 646 my $rc = $http->mirror( $uri, $to ); 647 648 unless ( $rc->{success} ) { 649 650 return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]", 651 $rc->{status}, $rc->{reason} ) ); 652 653 } 654 655 return $to; 656 657} 658 659### HTTP::Lite fetching ### 660sub _httplite_fetch { 661 my $self = shift; 662 my %hash = @_; 663 664 my ($to); 665 my $tmpl = { 666 to => { required => 1, store => \$to } 667 }; 668 check( $tmpl, \%hash ) or return; 669 670 ### modules required to download with lwp ### 671 my $use_list = { 672 'HTTP::Lite' => '2.2', 673 'MIME::Base64' => '0', 674 }; 675 676 ### Fix CVE-2016-1238 ### 677 local $Module::Load::Conditional::FORCE_SAFE_INC = 1; 678 unless( can_load(modules => $use_list) ) { 679 $METHOD_FAIL->{'httplite'} = 1; 680 return; 681 } 682 683 my $uri = $self->uri; 684 my $retries = 0; 685 686 RETRIES: while ( $retries++ < 5 ) { 687 688 my $http = HTTP::Lite->new(); 689 # Naughty naughty but there isn't any accessor/setter 690 $http->{timeout} = $TIMEOUT if $TIMEOUT; 691 $http->http11_mode(1); 692 693 if ($self->userinfo) { 694 my $encoded = MIME::Base64::encode($self->userinfo, ''); 695 $http->add_req_header("Authorization", "Basic $encoded"); 696 } 697 698 my $fh = FileHandle->new; 699 700 unless ( $fh->open($to,'>') ) { 701 return $self->_error(loc( 702 "Could not open '%1' for writing: %2",$to,$!)); 703 } 704 705 $fh->autoflush(1); 706 707 binmode $fh; 708 709 my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh ); 710 711 close $fh; 712 713 if ( $rc == 301 || $rc == 302 ) { 714 my $loc; 715 HEADERS: for ($http->headers_array) { 716 /Location: (\S+)/ and $loc = $1, last HEADERS; 717 } 718 #$loc or last; # Think we should squeal here. 719 if ($loc =~ m!^/!) { 720 $uri =~ s{^(\w+?://[^/]+)/.*$}{$1}; 721 $uri .= $loc; 722 } 723 else { 724 $uri = $loc; 725 } 726 next RETRIES; 727 } 728 elsif ( $rc == 200 ) { 729 return $to; 730 } 731 else { 732 return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]", 733 $rc, $http->status_message)); 734 } 735 736 } # Loop for 5 retries. 737 738 return $self->_error("Fetch failed! Gave up after 5 tries"); 739 740} 741 742### Simple IO::Socket::INET fetching ### 743sub _iosock_fetch { 744 my $self = shift; 745 my %hash = @_; 746 747 my ($to); 748 my $tmpl = { 749 to => { required => 1, store => \$to } 750 }; 751 check( $tmpl, \%hash ) or return; 752 753 my $use_list = { 754 'IO::Socket::INET' => '0.0', 755 'IO::Select' => '0.0', 756 }; 757 758 ### Fix CVE-2016-1238 ### 759 local $Module::Load::Conditional::FORCE_SAFE_INC = 1; 760 unless( can_load(modules => $use_list) ) { 761 $METHOD_FAIL->{'iosock'} = 1; 762 return; 763 } 764 765 my $sock = IO::Socket::INET->new( 766 PeerHost => $self->host, 767 ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ), 768 ); 769 770 unless ( $sock ) { 771 return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!)); 772 } 773 774 my $fh = FileHandle->new; 775 776 # Check open() 777 778 unless ( $fh->open($to,'>') ) { 779 return $self->_error(loc( 780 "Could not open '%1' for writing: %2",$to,$!)); 781 } 782 783 $fh->autoflush(1); 784 binmode $fh; 785 786 my $path = File::Spec::Unix->catfile( $self->path, $self->file ); 787 my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a"; 788 $sock->send( $req ); 789 790 my $select = IO::Select->new( $sock ); 791 792 my $resp = ''; 793 my $normal = 0; 794 while ( $select->can_read( $TIMEOUT || 60 ) ) { 795 my $ret = $sock->sysread( $resp, 4096, length($resp) ); 796 if ( !defined $ret or $ret == 0 ) { 797 $select->remove( $sock ); 798 $normal++; 799 } 800 } 801 close $sock; 802 803 unless ( $normal ) { 804 return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 ))); 805 } 806 807 # Check the "response" 808 # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1) 809 $resp =~ s/^(\x0d?\x0a)+//; 810 # Check it is an HTTP response 811 unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) { 812 return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host)); 813 } 814 815 # Check for OK 816 my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i; 817 unless ( $code eq '200' ) { 818 return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host)); 819 } 820 821 { 822 local $\; 823 print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0]; 824 } 825 close $fh; 826 return $to; 827} 828 829### Net::FTP fetching 830sub _netftp_fetch { 831 my $self = shift; 832 my %hash = @_; 833 834 my ($to); 835 my $tmpl = { 836 to => { required => 1, store => \$to } 837 }; 838 check( $tmpl, \%hash ) or return; 839 840 ### required modules ### 841 my $use_list = { 'Net::FTP' => 0 }; 842 843 ### Fix CVE-2016-1238 ### 844 local $Module::Load::Conditional::FORCE_SAFE_INC = 1; 845 unless( can_load( modules => $use_list ) ) { 846 $METHOD_FAIL->{'netftp'} = 1; 847 return; 848 } 849 850 ### make connection ### 851 my $ftp; 852 my @options = ($self->host); 853 push(@options, Timeout => $TIMEOUT) if $TIMEOUT; 854 unless( $ftp = Net::FTP->new( @options ) ) { 855 return $self->_error(loc("Ftp creation failed: %1",$@)); 856 } 857 858 ### login ### 859 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) { 860 return $self->_error(loc("Could not login to '%1'",$self->host)); 861 } 862 863 ### set binary mode, just in case ### 864 $ftp->binary; 865 866 ### create the remote path 867 ### remember remote paths are unix paths! [#11483] 868 my $remote = File::Spec::Unix->catfile( $self->path, $self->file ); 869 870 ### fetch the file ### 871 my $target; 872 unless( $target = $ftp->get( $remote, $to ) ) { 873 return $self->_error(loc("Could not fetch '%1' from '%2'", 874 $remote, $self->host)); 875 } 876 877 ### log out ### 878 $ftp->quit; 879 880 return $target; 881 882} 883 884### /bin/wget fetch ### 885sub _wget_fetch { 886 my $self = shift; 887 my %hash = @_; 888 889 my ($to); 890 my $tmpl = { 891 to => { required => 1, store => \$to } 892 }; 893 check( $tmpl, \%hash ) or return; 894 895 my $wget; 896 ### see if we have a wget binary ### 897 unless( $wget = can_run('wget') ) { 898 $METHOD_FAIL->{'wget'} = 1; 899 return; 900 } 901 902 ### no verboseness, thanks ### 903 my $cmd = [ $wget, '--quiet' ]; 904 905 ### if a timeout is set, add it ### 906 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 907 908 ### run passive if specified ### 909 push @$cmd, '--passive-ftp' if $self->scheme eq 'ftp' && $FTP_PASSIVE; 910 911 ### set the output document, add the uri ### 912 push @$cmd, '--output-document', $to, $self->uri; 913 914 ### with IPC::Cmd > 0.41, this is fixed in teh library, 915 ### and there's no need for special casing any more. 916 ### DO NOT quote things for IPC::Run, it breaks stuff. 917 # $IPC::Cmd::USE_IPC_RUN 918 # ? ($to, $self->uri) 919 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 920 921 ### shell out ### 922 my $captured; 923 unless(run( command => $cmd, 924 buffer => \$captured, 925 verbose => $DEBUG 926 )) { 927 ### wget creates the output document always, even if the fetch 928 ### fails.. so unlink it in that case 929 1 while unlink $to; 930 931 return $self->_error(loc( "Command failed: %1", $captured || '' )); 932 } 933 934 return $to; 935} 936 937### /bin/lftp fetch ### 938sub _lftp_fetch { 939 my $self = shift; 940 my %hash = @_; 941 942 my ($to); 943 my $tmpl = { 944 to => { required => 1, store => \$to } 945 }; 946 check( $tmpl, \%hash ) or return; 947 948 ### see if we have a lftp binary ### 949 my $lftp; 950 unless( $lftp = can_run('lftp') ) { 951 $METHOD_FAIL->{'lftp'} = 1; 952 return; 953 } 954 955 ### no verboseness, thanks ### 956 my $cmd = [ $lftp, '-f' ]; 957 958 my $fh = File::Temp->new; 959 960 my $str; 961 962 ### if a timeout is set, add it ### 963 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT; 964 965 ### run passive if specified ### 966 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE; 967 968 ### set the output document, add the uri ### 969 ### quote the URI, because lftp supports certain shell 970 ### expansions, most notably & for backgrounding. 971 ### ' quote does nto work, must be " 972 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/; 973 974 if( $DEBUG ) { 975 my $pp_str = join ' ', split $/, $str; 976 print "# lftp command: $pp_str\n"; 977 } 978 979 ### write straight to the file. 980 $fh->autoflush(1); 981 print $fh $str; 982 983 ### the command needs to be 1 string to be executed 984 push @$cmd, $fh->filename; 985 986 ### with IPC::Cmd > 0.41, this is fixed in teh library, 987 ### and there's no need for special casing any more. 988 ### DO NOT quote things for IPC::Run, it breaks stuff. 989 # $IPC::Cmd::USE_IPC_RUN 990 # ? ($to, $self->uri) 991 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 992 993 994 ### shell out ### 995 my $captured; 996 unless(run( command => $cmd, 997 buffer => \$captured, 998 verbose => $DEBUG 999 )) { 1000 ### wget creates the output document always, even if the fetch 1001 ### fails.. so unlink it in that case 1002 1 while unlink $to; 1003 1004 return $self->_error(loc( "Command failed: %1", $captured || '' )); 1005 } 1006 1007 return $to; 1008} 1009 1010 1011 1012### /bin/ftp fetch ### 1013sub _ftp_fetch { 1014 my $self = shift; 1015 my %hash = @_; 1016 1017 my ($to); 1018 my $tmpl = { 1019 to => { required => 1, store => \$to } 1020 }; 1021 check( $tmpl, \%hash ) or return; 1022 1023 ### see if we have a ftp binary ### 1024 my $ftp; 1025 unless( $ftp = can_run('ftp') ) { 1026 $METHOD_FAIL->{'ftp'} = 1; 1027 return; 1028 } 1029 1030 my $fh = FileHandle->new; 1031 1032 local $SIG{CHLD} = 'IGNORE'; 1033 1034 unless ($fh->open("$ftp -n", '|-')) { 1035 return $self->_error(loc("%1 creation failed: %2", $ftp, $!)); 1036 } 1037 1038 my @dialog = ( 1039 "lcd " . dirname($to), 1040 "open " . $self->host, 1041 "user anonymous $FROM_EMAIL", 1042 "cd /", 1043 "cd " . $self->path, 1044 "binary", 1045 "get " . $self->file . " " . $self->output_file, 1046 "quit", 1047 ); 1048 1049 foreach (@dialog) { $fh->print($_, "\n") } 1050 $fh->close or return; 1051 1052 return $to; 1053} 1054 1055### lynx is stupid - it decompresses any .gz file it finds to be text 1056### use /bin/lynx to fetch files 1057sub _lynx_fetch { 1058 my $self = shift; 1059 my %hash = @_; 1060 1061 my ($to); 1062 my $tmpl = { 1063 to => { required => 1, store => \$to } 1064 }; 1065 check( $tmpl, \%hash ) or return; 1066 1067 ### see if we have a lynx binary ### 1068 my $lynx; 1069 unless ( $lynx = can_run('lynx') ){ 1070 $METHOD_FAIL->{'lynx'} = 1; 1071 return; 1072 } 1073 1074 unless( IPC::Cmd->can_capture_buffer ) { 1075 $METHOD_FAIL->{'lynx'} = 1; 1076 1077 return $self->_error(loc( 1078 "Can not capture buffers. Can not use '%1' to fetch files", 1079 'lynx' )); 1080 } 1081 1082 ### check if the HTTP resource exists ### 1083 if ($self->uri =~ /^https?:\/\//i) { 1084 my $cmd = [ 1085 $lynx, 1086 '-head', 1087 '-source', 1088 "-auth=anonymous:$FROM_EMAIL", 1089 ]; 1090 1091 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; 1092 1093 push @$cmd, $self->uri; 1094 1095 ### shell out ### 1096 my $head; 1097 unless(run( command => $cmd, 1098 buffer => \$head, 1099 verbose => $DEBUG ) 1100 ) { 1101 return $self->_error(loc("Command failed: %1", $head || '')); 1102 } 1103 1104 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) { 1105 return $self->_error(loc("Command failed: %1", $head || '')); 1106 } 1107 } 1108 1109 ### write to the output file ourselves, since lynx ass_u_mes to much 1110 my $local = FileHandle->new( $to, 'w' ) 1111 or return $self->_error(loc( 1112 "Could not open '%1' for writing: %2",$to,$!)); 1113 1114 ### dump to stdout ### 1115 my $cmd = [ 1116 $lynx, 1117 '-source', 1118 "-auth=anonymous:$FROM_EMAIL", 1119 ]; 1120 1121 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; 1122 1123 ### DO NOT quote things for IPC::Run, it breaks stuff. 1124 push @$cmd, $self->uri; 1125 1126 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1127 ### and there's no need for special casing any more. 1128 ### DO NOT quote things for IPC::Run, it breaks stuff. 1129 # $IPC::Cmd::USE_IPC_RUN 1130 # ? $self->uri 1131 # : QUOTE. $self->uri .QUOTE; 1132 1133 1134 ### shell out ### 1135 my $captured; 1136 unless(run( command => $cmd, 1137 buffer => \$captured, 1138 verbose => $DEBUG ) 1139 ) { 1140 return $self->_error(loc("Command failed: %1", $captured || '')); 1141 } 1142 1143 ### print to local file ### 1144 ### XXX on a 404 with a special error page, $captured will actually 1145 ### hold the contents of that page, and make it *appear* like the 1146 ### request was a success, when really it wasn't :( 1147 ### there doesn't seem to be an option for lynx to change the exit 1148 ### code based on a 4XX status or so. 1149 ### the closest we can come is using --error_file and parsing that, 1150 ### which is very unreliable ;( 1151 $local->print( $captured ); 1152 $local->close or return; 1153 1154 return $to; 1155} 1156 1157### use /bin/ncftp to fetch files 1158sub _ncftp_fetch { 1159 my $self = shift; 1160 my %hash = @_; 1161 1162 my ($to); 1163 my $tmpl = { 1164 to => { required => 1, store => \$to } 1165 }; 1166 check( $tmpl, \%hash ) or return; 1167 1168 ### we can only set passive mode in interactive sessions, so bail out 1169 ### if $FTP_PASSIVE is set 1170 return if $FTP_PASSIVE; 1171 1172 ### see if we have a ncftp binary ### 1173 my $ncftp; 1174 unless( $ncftp = can_run('ncftp') ) { 1175 $METHOD_FAIL->{'ncftp'} = 1; 1176 return; 1177 } 1178 1179 my $cmd = [ 1180 $ncftp, 1181 '-V', # do not be verbose 1182 '-p', $FROM_EMAIL, # email as password 1183 $self->host, # hostname 1184 dirname($to), # local dir for the file 1185 # remote path to the file 1186 ### DO NOT quote things for IPC::Run, it breaks stuff. 1187 $IPC::Cmd::USE_IPC_RUN 1188 ? File::Spec::Unix->catdir( $self->path, $self->file ) 1189 : QUOTE. File::Spec::Unix->catdir( 1190 $self->path, $self->file ) .QUOTE 1191 1192 ]; 1193 1194 ### shell out ### 1195 my $captured; 1196 unless(run( command => $cmd, 1197 buffer => \$captured, 1198 verbose => $DEBUG ) 1199 ) { 1200 return $self->_error(loc("Command failed: %1", $captured || '')); 1201 } 1202 1203 return $to; 1204 1205} 1206 1207### use /bin/curl to fetch files 1208sub _curl_fetch { 1209 my $self = shift; 1210 my %hash = @_; 1211 1212 my ($to); 1213 my $tmpl = { 1214 to => { required => 1, store => \$to } 1215 }; 1216 check( $tmpl, \%hash ) or return; 1217 my $curl; 1218 unless ( $curl = can_run('curl') ) { 1219 $METHOD_FAIL->{'curl'} = 1; 1220 return; 1221 } 1222 1223 ### these long opts are self explanatory - I like that -jmb 1224 my $cmd = [ $curl, '-q' ]; 1225 1226 push(@$cmd, '-4') if $^O eq 'netbsd' && $FORCEIPV4; # only seen this on NetBSD so far 1227 1228 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT; 1229 1230 push(@$cmd, '--silent') unless $DEBUG; 1231 1232 ### curl does the right thing with passive, regardless ### 1233 if ($self->scheme eq 'ftp') { 1234 push(@$cmd, '--user', "anonymous:$FROM_EMAIL"); 1235 } 1236 1237 ### curl doesn't follow 302 (temporarily moved) etc automatically 1238 ### so we add --location to enable that. 1239 push @$cmd, '--fail', '--location', '--output', $to, $self->uri; 1240 1241 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1242 ### and there's no need for special casing any more. 1243 ### DO NOT quote things for IPC::Run, it breaks stuff. 1244 # $IPC::Cmd::USE_IPC_RUN 1245 # ? ($to, $self->uri) 1246 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1247 1248 1249 my $captured; 1250 unless(run( command => $cmd, 1251 buffer => \$captured, 1252 verbose => $DEBUG ) 1253 ) { 1254 1255 return $self->_error(loc("Command failed: %1", $captured || '')); 1256 } 1257 1258 return $to; 1259 1260} 1261 1262### /usr/bin/fetch fetch! ### 1263sub _fetch_fetch { 1264 my $self = shift; 1265 my %hash = @_; 1266 1267 my ($to); 1268 my $tmpl = { 1269 to => { required => 1, store => \$to } 1270 }; 1271 check( $tmpl, \%hash ) or return; 1272 1273 ### see if we have a fetch binary ### 1274 my $fetch; 1275 unless( HAS_FETCH and $fetch = can_run('fetch') ) { 1276 $METHOD_FAIL->{'fetch'} = 1; 1277 return; 1278 } 1279 1280 ### no verboseness, thanks ### 1281 my $cmd = [ $fetch, '-q' ]; 1282 1283 ### if a timeout is set, add it ### 1284 push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT; 1285 1286 ### run passive if specified ### 1287 #push @$cmd, '-p' if $FTP_PASSIVE; 1288 local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE; 1289 1290 ### set the output document, add the uri ### 1291 push @$cmd, '-o', $to, $self->uri; 1292 1293 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1294 ### and there's no need for special casing any more. 1295 ### DO NOT quote things for IPC::Run, it breaks stuff. 1296 # $IPC::Cmd::USE_IPC_RUN 1297 # ? ($to, $self->uri) 1298 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1299 1300 ### shell out ### 1301 my $captured; 1302 unless(run( command => $cmd, 1303 buffer => \$captured, 1304 verbose => $DEBUG 1305 )) { 1306 ### wget creates the output document always, even if the fetch 1307 ### fails.. so unlink it in that case 1308 1 while unlink $to; 1309 1310 return $self->_error(loc( "Command failed: %1", $captured || '' )); 1311 } 1312 1313 return $to; 1314} 1315 1316### use File::Copy for fetching file:// urls ### 1317### 1318### See section 3.10 of RFC 1738 (https://datatracker.ietf.org/doc/html/rfc1738#section-3.10) 1319### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://) 1320### 1321 1322sub _file_fetch { 1323 my $self = shift; 1324 my %hash = @_; 1325 1326 my ($to); 1327 my $tmpl = { 1328 to => { required => 1, store => \$to } 1329 }; 1330 check( $tmpl, \%hash ) or return; 1331 1332 1333 1334 ### prefix a / on unix systems with a file uri, since it would 1335 ### look somewhat like this: 1336 ### file:///home/kane/file 1337 ### whereas windows file uris for 'c:\some\dir\file' might look like: 1338 ### file:///C:/some/dir/file 1339 ### file:///C|/some/dir/file 1340 ### or for a network share '\\host\share\some\dir\file': 1341 ### file:////host/share/some/dir/file 1342 ### 1343 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like: 1344 ### file://vms.host.edu/disk$user/my/notes/note12345.txt 1345 ### 1346 1347 my $path = $self->path; 1348 my $vol = $self->vol; 1349 my $share = $self->share; 1350 1351 my $remote; 1352 if (!$share and $self->host) { 1353 return $self->_error(loc( 1354 "Currently %1 cannot handle hosts in %2 urls", 1355 'File::Fetch', 'file://' 1356 )); 1357 } 1358 1359 if( $vol ) { 1360 $path = File::Spec->catdir( split /\//, $path ); 1361 $remote = File::Spec->catpath( $vol, $path, $self->file); 1362 1363 } elsif( $share ) { 1364 ### win32 specific, and a share name, so we wont bother with File::Spec 1365 $path =~ s|/+|\\|g; 1366 $remote = "\\\\".$self->host."\\$share\\$path"; 1367 1368 } else { 1369 ### File::Spec on VMS can not currently handle UNIX syntax. 1370 my $file_class = ON_VMS 1371 ? 'File::Spec::Unix' 1372 : 'File::Spec'; 1373 1374 $remote = $file_class->catfile( $path, $self->file ); 1375 } 1376 1377 ### File::Copy is littered with 'die' statements :( ### 1378 my $rv = eval { File::Copy::copy( $remote, $to ) }; 1379 1380 ### something went wrong ### 1381 if( !$rv or $@ ) { 1382 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4", 1383 $remote, $to, $!, $@)); 1384 } 1385 1386 return $to; 1387} 1388 1389### use /usr/bin/rsync to fetch files 1390sub _rsync_fetch { 1391 my $self = shift; 1392 my %hash = @_; 1393 1394 my ($to); 1395 my $tmpl = { 1396 to => { required => 1, store => \$to } 1397 }; 1398 check( $tmpl, \%hash ) or return; 1399 my $rsync; 1400 unless ( $rsync = can_run('rsync') ) { 1401 $METHOD_FAIL->{'rsync'} = 1; 1402 return; 1403 } 1404 1405 my $cmd = [ $rsync ]; 1406 1407 ### XXX: rsync has no I/O timeouts at all, by default 1408 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 1409 1410 push(@$cmd, '--quiet') unless $DEBUG; 1411 1412 ### DO NOT quote things for IPC::Run, it breaks stuff. 1413 push @$cmd, $self->uri, $to; 1414 1415 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1416 ### and there's no need for special casing any more. 1417 ### DO NOT quote things for IPC::Run, it breaks stuff. 1418 # $IPC::Cmd::USE_IPC_RUN 1419 # ? ($to, $self->uri) 1420 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1421 1422 my $captured; 1423 unless(run( command => $cmd, 1424 buffer => \$captured, 1425 verbose => $DEBUG ) 1426 ) { 1427 1428 return $self->_error(loc("Command %1 failed: %2", 1429 "@$cmd" || '', $captured || '')); 1430 } 1431 1432 return $to; 1433 1434} 1435 1436### use git to fetch files 1437sub _git_fetch { 1438 my $self = shift; 1439 my %hash = @_; 1440 1441 my ($to); 1442 my $tmpl = { 1443 to => { required => 1, store => \$to } 1444 }; 1445 check( $tmpl, \%hash ) or return; 1446 my $git; 1447 unless ( $git = can_run('git') ) { 1448 $METHOD_FAIL->{'git'} = 1; 1449 return; 1450 } 1451 1452 my $cmd = [ $git, 'clone' ]; 1453 1454 #push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 1455 1456 push(@$cmd, '--quiet') unless $DEBUG; 1457 1458 ### DO NOT quote things for IPC::Run, it breaks stuff. 1459 push @$cmd, $self->uri, $to; 1460 1461 ### with IPC::Cmd > 0.41, this is fixed in teh library, 1462 ### and there's no need for special casing any more. 1463 ### DO NOT quote things for IPC::Run, it breaks stuff. 1464 # $IPC::Cmd::USE_IPC_RUN 1465 # ? ($to, $self->uri) 1466 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 1467 1468 my $captured; 1469 unless(run( command => $cmd, 1470 buffer => \$captured, 1471 verbose => $DEBUG ) 1472 ) { 1473 1474 return $self->_error(loc("Command %1 failed: %2", 1475 "@$cmd" || '', $captured || '')); 1476 } 1477 1478 return $to; 1479 1480} 1481 1482################################# 1483# 1484# Error code 1485# 1486################################# 1487 1488=pod 1489 1490=head2 $ff->error([BOOL]) 1491 1492Returns the last encountered error as string. 1493Pass it a true value to get the C<Carp::longmess()> output instead. 1494 1495=cut 1496 1497### error handling the way Archive::Extract does it 1498sub _error { 1499 my $self = shift; 1500 my $error = shift; 1501 1502 $self->_error_msg( $error ); 1503 $self->_error_msg_long( Carp::longmess($error) ); 1504 1505 if( $WARN ) { 1506 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg; 1507 } 1508 1509 return; 1510} 1511 1512sub error { 1513 my $self = shift; 1514 return shift() ? $self->_error_msg_long : $self->_error_msg; 1515} 1516 1517 15181; 1519 1520=pod 1521 1522=head1 HOW IT WORKS 1523 1524File::Fetch is able to fetch a variety of uris, by using several 1525external programs and modules. 1526 1527Below is a mapping of what utilities will be used in what order 1528for what schemes, if available: 1529 1530 file => LWP, lftp, file 1531 http => LWP, HTTP::Tiny, wget, curl, lftp, fetch, HTTP::Lite, lynx, iosock 1532 ftp => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp 1533 rsync => rsync 1534 git => git 1535 1536If you'd like to disable the use of one or more of these utilities 1537and/or modules, see the C<$BLACKLIST> variable further down. 1538 1539If a utility or module isn't available, it will be marked in a cache 1540(see the C<$METHOD_FAIL> variable further down), so it will not be 1541tried again. The C<fetch> method will only fail when all options are 1542exhausted, and it was not able to retrieve the file. 1543 1544The C<fetch> utility is available on FreeBSD. NetBSD and Dragonfly BSD 1545may also have it from C<pkgsrc>. We only check for C<fetch> on those 1546three platforms. 1547 1548C<iosock> is a very limited L<IO::Socket::INET> based mechanism for 1549retrieving C<http> schemed urls. It doesn't follow redirects for instance. 1550 1551C<git> only supports C<git://> style urls. 1552 1553A special note about fetching files from an ftp uri: 1554 1555By default, all ftp connections are done in passive mode. To change 1556that, see the C<$FTP_PASSIVE> variable further down. 1557 1558Furthermore, ftp uris only support anonymous connections, so no 1559named user/password pair can be passed along. 1560 1561C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable 1562further down. 1563 1564=head1 GLOBAL VARIABLES 1565 1566The behaviour of File::Fetch can be altered by changing the following 1567global variables: 1568 1569=head2 $File::Fetch::FROM_EMAIL 1570 1571This is the email address that will be sent as your anonymous ftp 1572password. 1573 1574Default is C<File-Fetch@example.com>. 1575 1576=head2 $File::Fetch::USER_AGENT 1577 1578This is the useragent as C<LWP> will report it. 1579 1580Default is C<File::Fetch/$VERSION>. 1581 1582=head2 $File::Fetch::FTP_PASSIVE 1583 1584This variable controls whether the environment variable C<FTP_PASSIVE> 1585and any passive switches to commandline tools will be set to true. 1586 1587Default value is 1. 1588 1589Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch 1590files, since passive mode can only be set interactively for this binary 1591 1592=head2 $File::Fetch::TIMEOUT 1593 1594When set, controls the network timeout (counted in seconds). 1595 1596Default value is 0. 1597 1598=head2 $File::Fetch::WARN 1599 1600This variable controls whether errors encountered internally by 1601C<File::Fetch> should be C<carp>'d or not. 1602 1603Set to false to silence warnings. Inspect the output of the C<error()> 1604method manually to see what went wrong. 1605 1606Defaults to C<true>. 1607 1608=head2 $File::Fetch::DEBUG 1609 1610This enables debugging output when calling commandline utilities to 1611fetch files. 1612This also enables C<Carp::longmess> errors, instead of the regular 1613C<carp> errors. 1614 1615Good for tracking down why things don't work with your particular 1616setup. 1617 1618Default is 0. 1619 1620=head2 $File::Fetch::BLACKLIST 1621 1622This is an array ref holding blacklisted modules/utilities for fetching 1623files with. 1624 1625To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could 1626set $File::Fetch::BLACKLIST to: 1627 1628 $File::Fetch::BLACKLIST = [qw|lwp netftp|] 1629 1630The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable. 1631 1632See the note on C<MAPPING> below. 1633 1634=head2 $File::Fetch::METHOD_FAIL 1635 1636This is a hashref registering what modules/utilities were known to fail 1637for fetching files (mostly because they weren't installed). 1638 1639You can reset this cache by assigning an empty hashref to it, or 1640individually remove keys. 1641 1642See the note on C<MAPPING> below. 1643 1644=head1 MAPPING 1645 1646 1647Here's a quick mapping for the utilities/modules, and their names for 1648the $BLACKLIST, $METHOD_FAIL and other internal functions. 1649 1650 LWP => lwp 1651 HTTP::Lite => httplite 1652 HTTP::Tiny => httptiny 1653 Net::FTP => netftp 1654 wget => wget 1655 lynx => lynx 1656 ncftp => ncftp 1657 ftp => ftp 1658 curl => curl 1659 rsync => rsync 1660 lftp => lftp 1661 fetch => fetch 1662 IO::Socket => iosock 1663 1664=head1 FREQUENTLY ASKED QUESTIONS 1665 1666=head2 So how do I use a proxy with File::Fetch? 1667 1668C<File::Fetch> currently only supports proxies with LWP::UserAgent. 1669You will need to set your environment variables accordingly. For 1670example, to use an ftp proxy: 1671 1672 $ENV{ftp_proxy} = 'foo.com'; 1673 1674Refer to the LWP::UserAgent manpage for more details. 1675 1676=head2 I used 'lynx' to fetch a file, but its contents is all wrong! 1677 1678C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>, 1679which we in turn capture. If that content is a 'custom' error file 1680(like, say, a C<404 handler>), you will get that contents instead. 1681 1682Sadly, C<lynx> doesn't support any options to return a different exit 1683code on non-C<200 OK> status, giving us no way to tell the difference 1684between a 'successful' fetch and a custom error page. 1685 1686Therefor, we recommend to only use C<lynx> as a last resort. This is 1687why it is at the back of our list of methods to try as well. 1688 1689=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do? 1690 1691C<File::Fetch> is relatively smart about things. When trying to write 1692a file to disk, it removes the C<query parameters> (see the 1693C<output_file> method for details) from the file name before creating 1694it. In most cases this suffices. 1695 1696If you have any other characters you need to escape, please install 1697the C<URI::Escape> module from CPAN, and pre-encode your URI before 1698passing it to C<File::Fetch>. You can read about the details of URIs 1699and URI encoding here: 1700 1701L<https://datatracker.ietf.org/doc/html/rfc2396> 1702 1703=head1 TODO 1704 1705=over 4 1706 1707=item Implement $PREFER_BIN 1708 1709To indicate to rather use commandline tools than modules 1710 1711=back 1712 1713=head1 BUG REPORTS 1714 1715Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>. 1716 1717=head1 AUTHOR 1718 1719This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 1720 1721=head1 COPYRIGHT 1722 1723This library is free software; you may redistribute and/or modify it 1724under the same terms as Perl itself. 1725 1726 1727=cut 1728 1729# Local variables: 1730# c-indentation-style: bsd 1731# c-basic-offset: 4 1732# indent-tabs-mode: nil 1733# End: 1734# vim: expandtab shiftwidth=4: 1735 1736 1737 1738 1739