1package File::Fetch;
2
3use strict;
4use FileHandle;
5use File::Temp;
6use File::Copy;
7use File::Spec;
8use File::Spec::Unix;
9use File::Basename              qw[dirname];
10
11use Cwd                         qw[cwd];
12use Carp                        qw[carp];
13use IPC::Cmd                    qw[can_run run QUOTE];
14use File::Path                  qw[mkpath];
15use File::Temp                  qw[tempdir];
16use Params::Check               qw[check];
17use Module::Load::Conditional   qw[can_load];
18use Locale::Maketext::Simple    Style => 'gettext';
19
20use vars    qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
21                $BLACKLIST $METHOD_FAIL $VERSION $METHODS
22                $FTP_PASSIVE $TIMEOUT $DEBUG $WARN $FORCEIPV4
23            ];
24
25$VERSION        = '1.04';
26$VERSION        = eval $VERSION;    # avoid warnings with development releases
27$PREFER_BIN     = 0;                # XXX TODO implement
28$FROM_EMAIL     = 'File-Fetch@example.com';
29$USER_AGENT     = "File::Fetch/$VERSION";
30$BLACKLIST      = [qw|ftp|];
31push @$BLACKLIST, qw|lftp| if $^O eq 'dragonfly' || $^O eq 'hpux';
32$METHOD_FAIL    = { };
33$FTP_PASSIVE    = 1;
34$TIMEOUT        = 0;
35$DEBUG          = 0;
36$WARN           = 1;
37$FORCEIPV4      = 0;
38
39### methods available to fetch the file depending on the scheme
40$METHODS = {
41    http    => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ],
42    https   => [ qw|lwp wget curl| ],
43    ftp     => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ],
44    file    => [ qw|lwp lftp file| ],
45    rsync   => [ qw|rsync| ],
46    git     => [ qw|git| ],
47};
48
49### silly warnings ###
50local $Params::Check::VERBOSE               = 1;
51local $Params::Check::VERBOSE               = 1;
52local $Module::Load::Conditional::VERBOSE   = 0;
53local $Module::Load::Conditional::VERBOSE   = 0;
54
55### see what OS we are on, important for file:// uris ###
56use constant ON_WIN     => ($^O eq 'MSWin32');
57use constant ON_VMS     => ($^O eq 'VMS');
58use constant ON_UNIX    => (!ON_WIN);
59use constant HAS_VOL    => (ON_WIN);
60use constant HAS_SHARE  => (ON_WIN);
61use constant HAS_FETCH  => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! );
62
63=pod
64
65=head1 NAME
66
67File::Fetch - A generic file fetching mechanism
68
69=head1 SYNOPSIS
70
71    use File::Fetch;
72
73    ### build a File::Fetch object ###
74    my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
75
76    ### fetch the uri to cwd() ###
77    my $where = $ff->fetch() or die $ff->error;
78
79    ### fetch the uri to /tmp ###
80    my $where = $ff->fetch( to => '/tmp' );
81
82    ### parsed bits from the uri ###
83    $ff->uri;
84    $ff->scheme;
85    $ff->host;
86    $ff->path;
87    $ff->file;
88
89=head1 DESCRIPTION
90
91File::Fetch is a generic file fetching mechanism.
92
93It allows you to fetch any file pointed to by a C<ftp>, C<http>,
94C<file>, C<git> or C<rsync> uri by a number of different means.
95
96See the C<HOW IT WORKS> section further down for details.
97
98=head1 ACCESSORS
99
100A C<File::Fetch> object has the following accessors
101
102=over 4
103
104=item $ff->uri
105
106The uri you passed to the constructor
107
108=item $ff->scheme
109
110The scheme from the uri (like 'file', 'http', etc)
111
112=item $ff->host
113
114The hostname in the uri.  Will be empty if host was originally
115'localhost' for a 'file://' url.
116
117=item $ff->vol
118
119On operating systems with the concept of a volume the second element
120of a file:// is considered to the be volume specification for the file.
121Thus on Win32 this routine returns the volume, on other operating
122systems this returns nothing.
123
124On Windows this value may be empty if the uri is to a network share, in
125which case the 'share' property will be defined. Additionally, volume
126specifications that use '|' as ':' will be converted on read to use ':'.
127
128On VMS, which has a volume concept, this field will be empty because VMS
129file specifications are converted to absolute UNIX format and the volume
130information is transparently included.
131
132=item $ff->share
133
134On systems with the concept of a network share (currently only Windows) returns
135the sharename from a file://// url.  On other operating systems returns empty.
136
137=item $ff->path
138
139The path from the uri, will be at least a single '/'.
140
141=item $ff->file
142
143The name of the remote file. For the local file name, the
144result of $ff->output_file will be used.
145
146=item $ff->file_default
147
148The name of the default local file, that $ff->output_file falls back to if
149it would otherwise return no filename. For example when fetching a URI like
150http://www.abc.net.au/ the contents retrieved may be from a remote file called
151'index.html'. The default value of this attribute is literally 'file_default'.
152
153=cut
154
155
156##########################
157### Object & Accessors ###
158##########################
159
160{
161    ### template for autogenerated accessors ###
162    my $Tmpl = {
163        scheme          => { default => 'http' },
164        host            => { default => 'localhost' },
165        path            => { default => '/' },
166        file            => { required => 1 },
167        uri             => { required => 1 },
168        userinfo        => { default => '' },
169        vol             => { default => '' }, # windows for file:// uris
170        share           => { default => '' }, # windows for file:// uris
171        file_default    => { default => 'file_default' },
172        tempdir_root    => { required => 1 }, # Should be lazy-set at ->new()
173        _error_msg      => { no_override => 1 },
174        _error_msg_long => { no_override => 1 },
175    };
176
177    for my $method ( keys %$Tmpl ) {
178        no strict 'refs';
179        *$method = sub {
180                        my $self = shift;
181                        $self->{$method} = $_[0] if @_;
182                        return $self->{$method};
183                    }
184    }
185
186    sub _create {
187        my $class = shift;
188        my %hash  = @_;
189
190        my $args = check( $Tmpl, \%hash ) or return;
191
192        bless $args, $class;
193
194        if( lc($args->scheme) ne 'file' and not $args->host ) {
195            return $class->_error(loc(
196                "Hostname required when fetching from '%1'",$args->scheme));
197        }
198
199        for (qw[path]) {
200            unless( $args->$_() ) { # 5.5.x needs the ()
201                return $class->_error(loc("No '%1' specified",$_));
202            }
203        }
204
205        return $args;
206    }
207}
208
209=item $ff->output_file
210
211The name of the output file. This is the same as $ff->file,
212but any query parameters are stripped off. For example:
213
214    http://example.com/index.html?x=y
215
216would make the output file be C<index.html> rather than
217C<index.html?x=y>.
218
219=back
220
221=cut
222
223sub output_file {
224    my $self = shift;
225    my $file = $self->file;
226
227    $file =~ s/\?.*$//g;
228
229    $file ||= $self->file_default;
230
231    return $file;
232}
233
234### XXX do this or just point to URI::Escape?
235# =head2 $esc_uri = $ff->escaped_uri
236#
237# =cut
238#
239# ### most of this is stolen straight from URI::escape
240# {   ### Build a char->hex map
241#     my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
242#
243#     sub escaped_uri {
244#         my $self = shift;
245#         my $uri  = $self->uri;
246#
247#         ### Default unsafe characters.  RFC 2732 ^(uric - reserved)
248#         $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
249#                     $escapes{$1} || $self->_fail_hi($1)/ge;
250#
251#         return $uri;
252#     }
253#
254#     sub _fail_hi {
255#         my $self = shift;
256#         my $char = shift;
257#
258#         $self->_error(loc(
259#             "Can't escape '%1', try using the '%2' module instead",
260#             sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
261#         ));
262#     }
263#
264#     sub output_file {
265#
266#     }
267#
268#
269# }
270
271=head1 METHODS
272
273=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
274
275Parses the uri and creates a corresponding File::Fetch::Item object,
276that is ready to be C<fetch>ed and returns it.
277
278Returns false on failure.
279
280=cut
281
282sub new {
283    my $class = shift;
284    my %hash  = @_;
285
286    my ($uri, $file_default, $tempdir_root);
287    my $tmpl = {
288        uri          => { required => 1, store => \$uri },
289        file_default => { required => 0, store => \$file_default },
290        tempdir_root => { required => 0, store => \$tempdir_root },
291    };
292
293    check( $tmpl, \%hash ) or return;
294
295    ### parse the uri to usable parts ###
296    my $href    = $class->_parse_uri( $uri ) or return;
297
298    $href->{file_default} = $file_default if $file_default;
299    $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root;
300    $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd      ) if not $href->{tempdir_root};
301
302    ### make it into a FFI object ###
303    my $ff      = $class->_create( %$href ) or return;
304
305
306    ### return the object ###
307    return $ff;
308}
309
310### parses an uri to a hash structure:
311###
312### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
313###
314### becomes:
315###
316### $href = {
317###     scheme  => 'ftp',
318###     host    => 'ftp.cpan.org',
319###     path    => '/pub/mirror',
320###     file    => 'index.html'
321### };
322###
323### In the case of file:// urls there maybe be additional fields
324###
325### For systems with volume specifications such as Win32 there will be
326### a volume specifier provided in the 'vol' field.
327###
328###   'vol' => 'volumename'
329###
330### For windows file shares there may be a 'share' key specified
331###
332###   'share' => 'sharename'
333###
334### Note that the rules of what a file:// url means vary by the operating system
335### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
336### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
337### not '/foo/bar.txt'
338###
339### Similarly if the host interpreting the url is VMS then
340### file:///disk$user/my/notes/note12345.txt' means
341### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
342### if it is unix where it means /disk$user/my/notes/note12345.txt'.
343### Except for some cases in the File::Spec methods, Perl on VMS will generally
344### handle UNIX format file specifications.
345###
346### This means it is impossible to serve certain file:// urls on certain systems.
347###
348### Thus are the problems with a protocol-less specification. :-(
349###
350
351sub _parse_uri {
352    my $self = shift;
353    my $uri  = shift or return;
354
355    my $href = { uri => $uri };
356
357    ### find the scheme ###
358    $uri            =~ s|^(\w+)://||;
359    $href->{scheme} = $1;
360
361    ### See rfc 1738 section 3.10
362    ### https://datatracker.ietf.org/doc/html/rfc1738#section-3.10
363    ### And wikipedia for more on windows file:// urls
364    ### http://en.wikipedia.org/wiki/File://
365    if( $href->{scheme} eq 'file' ) {
366
367        my @parts = split '/',$uri;
368
369        ### file://hostname/...
370        ### file://hostname/...
371        ### normalize file://localhost with file:///
372        $href->{host} = $parts[0] || '';
373
374        ### index in @parts where the path components begin;
375        my $index = 1;
376
377        ### file:////hostname/sharename/blah.txt
378        if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
379
380            $href->{host}   = $parts[2] || '';  # avoid warnings
381            $href->{share}  = $parts[3] || '';  # avoid warnings
382
383            $index          = 4         # index after the share
384
385        ### file:///D|/blah.txt
386        ### file:///D:/blah.txt
387        } elsif (HAS_VOL) {
388
389            ### this code comes from dmq's patch, but:
390            ### XXX if volume is empty, wouldn't that be an error? --kane
391            ### if so, our file://localhost test needs to be fixed as wel
392            $href->{vol}    = $parts[1] || '';
393
394            ### correct D| style colume descriptors
395            $href->{vol}    =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
396
397            $index          = 2;        # index after the volume
398        }
399
400        ### rebuild the path from the leftover parts;
401        $href->{path} = join '/', '', splice( @parts, $index, $#parts );
402
403    } else {
404        ### using anything but qw() in hash slices may produce warnings
405        ### in older perls :-(
406        @{$href}{ qw(userinfo host path) } = $uri =~ m|(?:([^\@:]*:[^\:\@]*)@)?([^/]*)(/.*)$|s;
407    }
408
409    ### split the path into file + dir ###
410    {   my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
411        $href->{path} = $parts[1];
412        $href->{file} = $parts[2];
413    }
414
415    ### host will be empty if the target was 'localhost' and the
416    ### scheme was 'file'
417    $href->{host} = '' if   ($href->{host}      eq 'localhost') and
418                            ($href->{scheme}    eq 'file');
419
420    return $href;
421}
422
423=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
424
425Fetches the file you requested and returns the full path to the file.
426
427By default it writes to C<cwd()>, but you can override that by specifying
428the C<to> argument:
429
430    ### file fetch to /tmp, full path to the file in $where
431    $where = $ff->fetch( to => '/tmp' );
432
433    ### file slurped into $scalar, full path to the file in $where
434    ### file is downloaded to a temp directory and cleaned up at exit time
435    $where = $ff->fetch( to => \$scalar );
436
437Returns the full path to the downloaded file on success, and false
438on failure.
439
440=cut
441
442sub fetch {
443    my $self = shift or return;
444    my %hash = @_;
445
446    my $target;
447    my $tmpl = {
448        to  => { default => cwd(), store => \$target },
449    };
450
451    check( $tmpl, \%hash ) or return;
452
453    my ($to, $fh);
454    ### you want us to slurp the contents
455    if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
456        $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 );
457
458    ### plain old fetch
459    } else {
460        $to = $target;
461
462        ### On VMS force to VMS format so File::Spec will work.
463        $to = VMS::Filespec::vmspath($to) if ON_VMS;
464
465        ### create the path if it doesn't exist yet ###
466        unless( -d $to ) {
467            eval { mkpath( $to ) };
468
469            return $self->_error(loc("Could not create path '%1'",$to)) if $@;
470        }
471    }
472
473    ### set passive ftp if required ###
474    local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
475
476    ### we dont use catfile on win32 because if we are using a cygwin tool
477    ### under cmd.exe they wont understand windows style separators.
478    my $out_to = ON_WIN ? $to.'/'.$self->output_file
479                        : File::Spec->catfile( $to, $self->output_file );
480
481    for my $method ( @{ $METHODS->{$self->scheme} } ) {
482        my $sub =  '_'.$method.'_fetch';
483
484        unless( __PACKAGE__->can($sub) ) {
485            $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
486                        $method));
487            next;
488        }
489
490        ### method is blacklisted ###
491        next if grep { lc $_ eq $method } @$BLACKLIST;
492
493        ### method is known to fail ###
494        next if $METHOD_FAIL->{$method};
495
496        ### there's serious issues with IPC::Run and quoting of command
497        ### line arguments. using quotes in the wrong place breaks things,
498        ### and in the case of say,
499        ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
500        ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
501        ### it doesn't matter how you quote, it always fails.
502        local $IPC::Cmd::USE_IPC_RUN = 0;
503
504        if( my $file = $self->$sub(
505                        to => $out_to
506        )){
507
508            unless( -e $file && -s _ ) {
509                $self->_error(loc("'%1' said it fetched '%2', ".
510                     "but it was not created",$method,$file));
511
512                ### mark the failure ###
513                $METHOD_FAIL->{$method} = 1;
514
515                next;
516
517            } else {
518
519                ### slurp mode?
520                if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
521
522                    ### open the file
523                    open my $fh, "<$file" or do {
524                        $self->_error(
525                            loc("Could not open '%1': %2", $file, $!));
526                        return;
527                    };
528
529                    ### slurp
530                    $$target = do { local $/; <$fh> };
531
532                }
533
534                my $abs = File::Spec->rel2abs( $file );
535                return $abs;
536
537            }
538        }
539    }
540
541
542    ### if we got here, we looped over all methods, but we weren't able
543    ### to fetch it.
544    return;
545}
546
547########################
548### _*_fetch methods ###
549########################
550
551### LWP fetching ###
552sub _lwp_fetch {
553    my $self = shift;
554    my %hash = @_;
555
556    my ($to);
557    my $tmpl = {
558        to  => { required => 1, store => \$to }
559    };
560    check( $tmpl, \%hash ) or return;
561
562    ### modules required to download with lwp ###
563    my $use_list = {
564        LWP                 => '0.0',
565        'LWP::UserAgent'    => '0.0',
566        'HTTP::Request'     => '0.0',
567        'HTTP::Status'      => '0.0',
568        URI                 => '0.0',
569
570    };
571
572    if ($self->scheme eq 'https') {
573        $use_list->{'LWP::Protocol::https'} = '0';
574    }
575
576    ### Fix CVE-2016-1238 ###
577    local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
578    unless( can_load( modules => $use_list ) ) {
579        $METHOD_FAIL->{'lwp'} = 1;
580        return;
581    }
582
583    ### setup the uri object
584    my $uri = URI->new( File::Spec::Unix->catfile(
585                                $self->path, $self->file
586                    ) );
587
588    ### special rules apply for file:// uris ###
589    $uri->scheme( $self->scheme );
590    $uri->host( $self->scheme eq 'file' ? '' : $self->host );
591
592    if ($self->userinfo) {
593        $uri->userinfo($self->userinfo);
594    } elsif ($self->scheme ne 'file') {
595        $uri->userinfo("anonymous:$FROM_EMAIL");
596    }
597
598    ### set up the useragent object
599    my $ua = LWP::UserAgent->new();
600    $ua->timeout( $TIMEOUT ) if $TIMEOUT;
601    $ua->agent( $USER_AGENT );
602    $ua->from( $FROM_EMAIL );
603    $ua->env_proxy;
604
605    my $res = $ua->mirror($uri, $to) or return;
606
607    ### uptodate or fetched ok ###
608    if ( $res->code == 304 or $res->code == 200 ) {
609        return $to;
610
611    } else {
612        return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
613                    $res->code, HTTP::Status::status_message($res->code),
614                    $res->status_line));
615    }
616
617}
618
619### HTTP::Tiny fetching ###
620sub _httptiny_fetch {
621    my $self = shift;
622    my %hash = @_;
623
624    my ($to);
625    my $tmpl = {
626        to  => { required => 1, store => \$to }
627    };
628    check( $tmpl, \%hash ) or return;
629
630    my $use_list = {
631        'HTTP::Tiny'    => '0.008',
632
633    };
634
635    ### Fix CVE-2016-1238 ###
636    local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
637    unless( can_load(modules => $use_list) ) {
638        $METHOD_FAIL->{'httptiny'} = 1;
639        return;
640    }
641
642    my $uri = $self->uri;
643
644    my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) );
645
646    my $rc = $http->mirror( $uri, $to );
647
648    unless ( $rc->{success} ) {
649
650        return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]",
651                    $rc->{status}, $rc->{reason} ) );
652
653    }
654
655    return $to;
656
657}
658
659### HTTP::Lite fetching ###
660sub _httplite_fetch {
661    my $self = shift;
662    my %hash = @_;
663
664    my ($to);
665    my $tmpl = {
666        to  => { required => 1, store => \$to }
667    };
668    check( $tmpl, \%hash ) or return;
669
670    ### modules required to download with lwp ###
671    my $use_list = {
672        'HTTP::Lite'    => '2.2',
673        'MIME::Base64'  => '0',
674    };
675
676    ### Fix CVE-2016-1238 ###
677    local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
678    unless( can_load(modules => $use_list) ) {
679        $METHOD_FAIL->{'httplite'} = 1;
680        return;
681    }
682
683    my $uri = $self->uri;
684    my $retries = 0;
685
686    RETRIES: while ( $retries++ < 5 ) {
687
688      my $http = HTTP::Lite->new();
689      # Naughty naughty but there isn't any accessor/setter
690      $http->{timeout} = $TIMEOUT if $TIMEOUT;
691      $http->http11_mode(1);
692
693      if ($self->userinfo) {
694          my $encoded = MIME::Base64::encode($self->userinfo, '');
695          $http->add_req_header("Authorization", "Basic $encoded");
696      }
697
698      my $fh = FileHandle->new;
699
700      unless ( $fh->open($to,'>') ) {
701        return $self->_error(loc(
702             "Could not open '%1' for writing: %2",$to,$!));
703      }
704
705      $fh->autoflush(1);
706
707      binmode $fh;
708
709      my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh );
710
711      close $fh;
712
713      if ( $rc == 301 || $rc == 302 ) {
714          my $loc;
715          HEADERS: for ($http->headers_array) {
716            /Location: (\S+)/ and $loc = $1, last HEADERS;
717          }
718          #$loc or last; # Think we should squeal here.
719          if ($loc =~ m!^/!) {
720            $uri =~ s{^(\w+?://[^/]+)/.*$}{$1};
721            $uri .= $loc;
722          }
723          else {
724            $uri = $loc;
725          }
726          next RETRIES;
727      }
728      elsif ( $rc == 200 ) {
729          return $to;
730      }
731      else {
732        return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]",
733                    $rc, $http->status_message));
734      }
735
736    } # Loop for 5 retries.
737
738    return $self->_error("Fetch failed! Gave up after 5 tries");
739
740}
741
742### Simple IO::Socket::INET fetching ###
743sub _iosock_fetch {
744    my $self = shift;
745    my %hash = @_;
746
747    my ($to);
748    my $tmpl = {
749        to  => { required => 1, store => \$to }
750    };
751    check( $tmpl, \%hash ) or return;
752
753    my $use_list = {
754        'IO::Socket::INET' => '0.0',
755        'IO::Select'       => '0.0',
756    };
757
758    ### Fix CVE-2016-1238 ###
759    local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
760    unless( can_load(modules => $use_list) ) {
761        $METHOD_FAIL->{'iosock'} = 1;
762        return;
763    }
764
765    my $sock = IO::Socket::INET->new(
766        PeerHost => $self->host,
767        ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ),
768    );
769
770    unless ( $sock ) {
771        return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!));
772    }
773
774    my $fh = FileHandle->new;
775
776    # Check open()
777
778    unless ( $fh->open($to,'>') ) {
779        return $self->_error(loc(
780             "Could not open '%1' for writing: %2",$to,$!));
781    }
782
783    $fh->autoflush(1);
784    binmode $fh;
785
786    my $path = File::Spec::Unix->catfile( $self->path, $self->file );
787    my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a";
788    $sock->send( $req );
789
790    my $select = IO::Select->new( $sock );
791
792    my $resp = '';
793    my $normal = 0;
794    while ( $select->can_read( $TIMEOUT || 60 ) ) {
795      my $ret = $sock->sysread( $resp, 4096, length($resp) );
796      if ( !defined $ret or $ret == 0 ) {
797        $select->remove( $sock );
798        $normal++;
799      }
800    }
801    close $sock;
802
803    unless ( $normal ) {
804        return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 )));
805    }
806
807    # Check the "response"
808    # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1)
809    $resp =~ s/^(\x0d?\x0a)+//;
810    # Check it is an HTTP response
811    unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) {
812        return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host));
813    }
814
815    # Check for OK
816    my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i;
817    unless ( $code eq '200' ) {
818        return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host));
819    }
820
821    {
822      local $\;
823      print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0];
824    }
825    close $fh;
826    return $to;
827}
828
829### Net::FTP fetching
830sub _netftp_fetch {
831    my $self = shift;
832    my %hash = @_;
833
834    my ($to);
835    my $tmpl = {
836        to  => { required => 1, store => \$to }
837    };
838    check( $tmpl, \%hash ) or return;
839
840    ### required modules ###
841    my $use_list = { 'Net::FTP' => 0 };
842
843    ### Fix CVE-2016-1238 ###
844    local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
845    unless( can_load( modules => $use_list ) ) {
846        $METHOD_FAIL->{'netftp'} = 1;
847        return;
848    }
849
850    ### make connection ###
851    my $ftp;
852    my @options = ($self->host);
853    push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
854    unless( $ftp = Net::FTP->new( @options ) ) {
855        return $self->_error(loc("Ftp creation failed: %1",$@));
856    }
857
858    ### login ###
859    unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
860        return $self->_error(loc("Could not login to '%1'",$self->host));
861    }
862
863    ### set binary mode, just in case ###
864    $ftp->binary;
865
866    ### create the remote path
867    ### remember remote paths are unix paths! [#11483]
868    my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
869
870    ### fetch the file ###
871    my $target;
872    unless( $target = $ftp->get( $remote, $to ) ) {
873        return $self->_error(loc("Could not fetch '%1' from '%2'",
874                    $remote, $self->host));
875    }
876
877    ### log out ###
878    $ftp->quit;
879
880    return $target;
881
882}
883
884### /bin/wget fetch ###
885sub _wget_fetch {
886    my $self = shift;
887    my %hash = @_;
888
889    my ($to);
890    my $tmpl = {
891        to  => { required => 1, store => \$to }
892    };
893    check( $tmpl, \%hash ) or return;
894
895    my $wget;
896    ### see if we have a wget binary ###
897    unless( $wget = can_run('wget') ) {
898        $METHOD_FAIL->{'wget'} = 1;
899        return;
900    }
901
902    ### no verboseness, thanks ###
903    my $cmd = [ $wget, '--quiet' ];
904
905    ### if a timeout is set, add it ###
906    push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
907
908    ### run passive if specified ###
909    push @$cmd, '--passive-ftp' if $self->scheme eq 'ftp' && $FTP_PASSIVE;
910
911    ### set the output document, add the uri ###
912    push @$cmd, '--output-document', $to, $self->uri;
913
914    ### with IPC::Cmd > 0.41, this is fixed in teh library,
915    ### and there's no need for special casing any more.
916    ### DO NOT quote things for IPC::Run, it breaks stuff.
917    # $IPC::Cmd::USE_IPC_RUN
918    #    ? ($to, $self->uri)
919    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
920
921    ### shell out ###
922    my $captured;
923    unless(run( command => $cmd,
924                buffer  => \$captured,
925                verbose => $DEBUG
926    )) {
927        ### wget creates the output document always, even if the fetch
928        ### fails.. so unlink it in that case
929        1 while unlink $to;
930
931        return $self->_error(loc( "Command failed: %1", $captured || '' ));
932    }
933
934    return $to;
935}
936
937### /bin/lftp fetch ###
938sub _lftp_fetch {
939    my $self = shift;
940    my %hash = @_;
941
942    my ($to);
943    my $tmpl = {
944        to  => { required => 1, store => \$to }
945    };
946    check( $tmpl, \%hash ) or return;
947
948    ### see if we have a lftp binary ###
949    my $lftp;
950    unless( $lftp = can_run('lftp') ) {
951        $METHOD_FAIL->{'lftp'} = 1;
952        return;
953    }
954
955    ### no verboseness, thanks ###
956    my $cmd = [ $lftp, '-f' ];
957
958    my $fh = File::Temp->new;
959
960    my $str;
961
962    ### if a timeout is set, add it ###
963    $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
964
965    ### run passive if specified ###
966    $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
967
968    ### set the output document, add the uri ###
969    ### quote the URI, because lftp supports certain shell
970    ### expansions, most notably & for backgrounding.
971    ### ' quote does nto work, must be "
972    $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
973
974    if( $DEBUG ) {
975        my $pp_str = join ' ', split $/, $str;
976        print "# lftp command: $pp_str\n";
977    }
978
979    ### write straight to the file.
980    $fh->autoflush(1);
981    print $fh $str;
982
983    ### the command needs to be 1 string to be executed
984    push @$cmd, $fh->filename;
985
986    ### with IPC::Cmd > 0.41, this is fixed in teh library,
987    ### and there's no need for special casing any more.
988    ### DO NOT quote things for IPC::Run, it breaks stuff.
989    # $IPC::Cmd::USE_IPC_RUN
990    #    ? ($to, $self->uri)
991    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
992
993
994    ### shell out ###
995    my $captured;
996    unless(run( command => $cmd,
997                buffer  => \$captured,
998                verbose => $DEBUG
999    )) {
1000        ### wget creates the output document always, even if the fetch
1001        ### fails.. so unlink it in that case
1002        1 while unlink $to;
1003
1004        return $self->_error(loc( "Command failed: %1", $captured || '' ));
1005    }
1006
1007    return $to;
1008}
1009
1010
1011
1012### /bin/ftp fetch ###
1013sub _ftp_fetch {
1014    my $self = shift;
1015    my %hash = @_;
1016
1017    my ($to);
1018    my $tmpl = {
1019        to  => { required => 1, store => \$to }
1020    };
1021    check( $tmpl, \%hash ) or return;
1022
1023    ### see if we have a ftp binary ###
1024    my $ftp;
1025    unless( $ftp = can_run('ftp') ) {
1026        $METHOD_FAIL->{'ftp'} = 1;
1027        return;
1028    }
1029
1030    my $fh = FileHandle->new;
1031
1032    local $SIG{CHLD} = 'IGNORE';
1033
1034    unless ($fh->open("$ftp -n", '|-')) {
1035        return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
1036    }
1037
1038    my @dialog = (
1039        "lcd " . dirname($to),
1040        "open " . $self->host,
1041        "user anonymous $FROM_EMAIL",
1042        "cd /",
1043        "cd " . $self->path,
1044        "binary",
1045        "get " . $self->file . " " . $self->output_file,
1046        "quit",
1047    );
1048
1049    foreach (@dialog) { $fh->print($_, "\n") }
1050    $fh->close or return;
1051
1052    return $to;
1053}
1054
1055### lynx is stupid - it decompresses any .gz file it finds to be text
1056### use /bin/lynx to fetch files
1057sub _lynx_fetch {
1058    my $self = shift;
1059    my %hash = @_;
1060
1061    my ($to);
1062    my $tmpl = {
1063        to  => { required => 1, store => \$to }
1064    };
1065    check( $tmpl, \%hash ) or return;
1066
1067    ### see if we have a lynx binary ###
1068    my $lynx;
1069    unless ( $lynx = can_run('lynx') ){
1070        $METHOD_FAIL->{'lynx'} = 1;
1071        return;
1072    }
1073
1074    unless( IPC::Cmd->can_capture_buffer ) {
1075        $METHOD_FAIL->{'lynx'} = 1;
1076
1077        return $self->_error(loc(
1078            "Can not capture buffers. Can not use '%1' to fetch files",
1079            'lynx' ));
1080    }
1081
1082    ### check if the HTTP resource exists ###
1083    if ($self->uri =~ /^https?:\/\//i) {
1084        my $cmd = [
1085            $lynx,
1086            '-head',
1087            '-source',
1088            "-auth=anonymous:$FROM_EMAIL",
1089        ];
1090
1091        push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1092
1093        push @$cmd, $self->uri;
1094
1095        ### shell out ###
1096        my $head;
1097        unless(run( command => $cmd,
1098                    buffer  => \$head,
1099                    verbose => $DEBUG )
1100        ) {
1101            return $self->_error(loc("Command failed: %1", $head || ''));
1102        }
1103
1104        unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
1105            return $self->_error(loc("Command failed: %1", $head || ''));
1106        }
1107    }
1108
1109    ### write to the output file ourselves, since lynx ass_u_mes to much
1110    my $local = FileHandle->new( $to, 'w' )
1111                    or return $self->_error(loc(
1112                        "Could not open '%1' for writing: %2",$to,$!));
1113
1114    ### dump to stdout ###
1115    my $cmd = [
1116        $lynx,
1117        '-source',
1118        "-auth=anonymous:$FROM_EMAIL",
1119    ];
1120
1121    push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1122
1123    ### DO NOT quote things for IPC::Run, it breaks stuff.
1124    push @$cmd, $self->uri;
1125
1126    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1127    ### and there's no need for special casing any more.
1128    ### DO NOT quote things for IPC::Run, it breaks stuff.
1129    # $IPC::Cmd::USE_IPC_RUN
1130    #    ? $self->uri
1131    #    : QUOTE. $self->uri .QUOTE;
1132
1133
1134    ### shell out ###
1135    my $captured;
1136    unless(run( command => $cmd,
1137                buffer  => \$captured,
1138                verbose => $DEBUG )
1139    ) {
1140        return $self->_error(loc("Command failed: %1", $captured || ''));
1141    }
1142
1143    ### print to local file ###
1144    ### XXX on a 404 with a special error page, $captured will actually
1145    ### hold the contents of that page, and make it *appear* like the
1146    ### request was a success, when really it wasn't :(
1147    ### there doesn't seem to be an option for lynx to change the exit
1148    ### code based on a 4XX status or so.
1149    ### the closest we can come is using --error_file and parsing that,
1150    ### which is very unreliable ;(
1151    $local->print( $captured );
1152    $local->close or return;
1153
1154    return $to;
1155}
1156
1157### use /bin/ncftp to fetch files
1158sub _ncftp_fetch {
1159    my $self = shift;
1160    my %hash = @_;
1161
1162    my ($to);
1163    my $tmpl = {
1164        to  => { required => 1, store => \$to }
1165    };
1166    check( $tmpl, \%hash ) or return;
1167
1168    ### we can only set passive mode in interactive sessions, so bail out
1169    ### if $FTP_PASSIVE is set
1170    return if $FTP_PASSIVE;
1171
1172    ### see if we have a ncftp binary ###
1173    my $ncftp;
1174    unless( $ncftp = can_run('ncftp') ) {
1175        $METHOD_FAIL->{'ncftp'} = 1;
1176        return;
1177    }
1178
1179    my $cmd = [
1180        $ncftp,
1181        '-V',                   # do not be verbose
1182        '-p', $FROM_EMAIL,      # email as password
1183        $self->host,            # hostname
1184        dirname($to),           # local dir for the file
1185                                # remote path to the file
1186        ### DO NOT quote things for IPC::Run, it breaks stuff.
1187        $IPC::Cmd::USE_IPC_RUN
1188                    ? File::Spec::Unix->catdir( $self->path, $self->file )
1189                    : QUOTE. File::Spec::Unix->catdir(
1190                                    $self->path, $self->file ) .QUOTE
1191
1192    ];
1193
1194    ### shell out ###
1195    my $captured;
1196    unless(run( command => $cmd,
1197                buffer  => \$captured,
1198                verbose => $DEBUG )
1199    ) {
1200        return $self->_error(loc("Command failed: %1", $captured || ''));
1201    }
1202
1203    return $to;
1204
1205}
1206
1207### use /bin/curl to fetch files
1208sub _curl_fetch {
1209    my $self = shift;
1210    my %hash = @_;
1211
1212    my ($to);
1213    my $tmpl = {
1214        to  => { required => 1, store => \$to }
1215    };
1216    check( $tmpl, \%hash ) or return;
1217    my $curl;
1218    unless ( $curl = can_run('curl') ) {
1219        $METHOD_FAIL->{'curl'} = 1;
1220        return;
1221    }
1222
1223    ### these long opts are self explanatory - I like that -jmb
1224    my $cmd = [ $curl, '-q' ];
1225
1226    push(@$cmd, '-4') if $^O eq 'netbsd' && $FORCEIPV4; # only seen this on NetBSD so far
1227
1228    push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
1229
1230    push(@$cmd, '--silent') unless $DEBUG;
1231
1232    ### curl does the right thing with passive, regardless ###
1233    if ($self->scheme eq 'ftp') {
1234        push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
1235    }
1236
1237    ### curl doesn't follow 302 (temporarily moved) etc automatically
1238    ### so we add --location to enable that.
1239    push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
1240
1241    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1242    ### and there's no need for special casing any more.
1243    ### DO NOT quote things for IPC::Run, it breaks stuff.
1244    # $IPC::Cmd::USE_IPC_RUN
1245    #    ? ($to, $self->uri)
1246    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1247
1248
1249    my $captured;
1250    unless(run( command => $cmd,
1251                buffer  => \$captured,
1252                verbose => $DEBUG )
1253    ) {
1254
1255        return $self->_error(loc("Command failed: %1", $captured || ''));
1256    }
1257
1258    return $to;
1259
1260}
1261
1262### /usr/bin/fetch fetch! ###
1263sub _fetch_fetch {
1264    my $self = shift;
1265    my %hash = @_;
1266
1267    my ($to);
1268    my $tmpl = {
1269        to  => { required => 1, store => \$to }
1270    };
1271    check( $tmpl, \%hash ) or return;
1272
1273    ### see if we have a fetch binary ###
1274    my $fetch;
1275    unless( HAS_FETCH and $fetch = can_run('fetch') ) {
1276        $METHOD_FAIL->{'fetch'} = 1;
1277        return;
1278    }
1279
1280    ### no verboseness, thanks ###
1281    my $cmd = [ $fetch, '-q' ];
1282
1283    ### if a timeout is set, add it ###
1284    push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT;
1285
1286    ### run passive if specified ###
1287    #push @$cmd, '-p' if $FTP_PASSIVE;
1288    local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE;
1289
1290    ### set the output document, add the uri ###
1291    push @$cmd, '-o', $to, $self->uri;
1292
1293    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1294    ### and there's no need for special casing any more.
1295    ### DO NOT quote things for IPC::Run, it breaks stuff.
1296    # $IPC::Cmd::USE_IPC_RUN
1297    #    ? ($to, $self->uri)
1298    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1299
1300    ### shell out ###
1301    my $captured;
1302    unless(run( command => $cmd,
1303                buffer  => \$captured,
1304                verbose => $DEBUG
1305    )) {
1306        ### wget creates the output document always, even if the fetch
1307        ### fails.. so unlink it in that case
1308        1 while unlink $to;
1309
1310        return $self->_error(loc( "Command failed: %1", $captured || '' ));
1311    }
1312
1313    return $to;
1314}
1315
1316### use File::Copy for fetching file:// urls ###
1317###
1318### See section 3.10 of RFC 1738 (https://datatracker.ietf.org/doc/html/rfc1738#section-3.10)
1319### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
1320###
1321
1322sub _file_fetch {
1323    my $self = shift;
1324    my %hash = @_;
1325
1326    my ($to);
1327    my $tmpl = {
1328        to  => { required => 1, store => \$to }
1329    };
1330    check( $tmpl, \%hash ) or return;
1331
1332
1333
1334    ### prefix a / on unix systems with a file uri, since it would
1335    ### look somewhat like this:
1336    ###     file:///home/kane/file
1337    ### whereas windows file uris for 'c:\some\dir\file' might look like:
1338    ###     file:///C:/some/dir/file
1339    ###     file:///C|/some/dir/file
1340    ### or for a network share '\\host\share\some\dir\file':
1341    ###     file:////host/share/some/dir/file
1342    ###
1343    ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1344    ###     file://vms.host.edu/disk$user/my/notes/note12345.txt
1345    ###
1346
1347    my $path    = $self->path;
1348    my $vol     = $self->vol;
1349    my $share   = $self->share;
1350
1351    my $remote;
1352    if (!$share and $self->host) {
1353        return $self->_error(loc(
1354            "Currently %1 cannot handle hosts in %2 urls",
1355            'File::Fetch', 'file://'
1356        ));
1357    }
1358
1359    if( $vol ) {
1360        $path   = File::Spec->catdir( split /\//, $path );
1361        $remote = File::Spec->catpath( $vol, $path, $self->file);
1362
1363    } elsif( $share ) {
1364        ### win32 specific, and a share name, so we wont bother with File::Spec
1365        $path   =~ s|/+|\\|g;
1366        $remote = "\\\\".$self->host."\\$share\\$path";
1367
1368    } else {
1369        ### File::Spec on VMS can not currently handle UNIX syntax.
1370        my $file_class = ON_VMS
1371            ? 'File::Spec::Unix'
1372            : 'File::Spec';
1373
1374        $remote  = $file_class->catfile( $path, $self->file );
1375    }
1376
1377    ### File::Copy is littered with 'die' statements :( ###
1378    my $rv = eval { File::Copy::copy( $remote, $to ) };
1379
1380    ### something went wrong ###
1381    if( !$rv or $@ ) {
1382        return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1383                             $remote, $to, $!, $@));
1384    }
1385
1386    return $to;
1387}
1388
1389### use /usr/bin/rsync to fetch files
1390sub _rsync_fetch {
1391    my $self = shift;
1392    my %hash = @_;
1393
1394    my ($to);
1395    my $tmpl = {
1396        to  => { required => 1, store => \$to }
1397    };
1398    check( $tmpl, \%hash ) or return;
1399    my $rsync;
1400    unless ( $rsync = can_run('rsync') ) {
1401        $METHOD_FAIL->{'rsync'} = 1;
1402        return;
1403    }
1404
1405    my $cmd = [ $rsync ];
1406
1407    ### XXX: rsync has no I/O timeouts at all, by default
1408    push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1409
1410    push(@$cmd, '--quiet') unless $DEBUG;
1411
1412    ### DO NOT quote things for IPC::Run, it breaks stuff.
1413    push @$cmd, $self->uri, $to;
1414
1415    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1416    ### and there's no need for special casing any more.
1417    ### DO NOT quote things for IPC::Run, it breaks stuff.
1418    # $IPC::Cmd::USE_IPC_RUN
1419    #    ? ($to, $self->uri)
1420    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1421
1422    my $captured;
1423    unless(run( command => $cmd,
1424                buffer  => \$captured,
1425                verbose => $DEBUG )
1426    ) {
1427
1428        return $self->_error(loc("Command %1 failed: %2",
1429            "@$cmd" || '', $captured || ''));
1430    }
1431
1432    return $to;
1433
1434}
1435
1436### use git to fetch files
1437sub _git_fetch {
1438    my $self = shift;
1439    my %hash = @_;
1440
1441    my ($to);
1442    my $tmpl = {
1443        to  => { required => 1, store => \$to }
1444    };
1445    check( $tmpl, \%hash ) or return;
1446    my $git;
1447    unless ( $git = can_run('git') ) {
1448        $METHOD_FAIL->{'git'} = 1;
1449        return;
1450    }
1451
1452    my $cmd = [ $git, 'clone' ];
1453
1454    #push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1455
1456    push(@$cmd, '--quiet') unless $DEBUG;
1457
1458    ### DO NOT quote things for IPC::Run, it breaks stuff.
1459    push @$cmd, $self->uri, $to;
1460
1461    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1462    ### and there's no need for special casing any more.
1463    ### DO NOT quote things for IPC::Run, it breaks stuff.
1464    # $IPC::Cmd::USE_IPC_RUN
1465    #    ? ($to, $self->uri)
1466    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1467
1468    my $captured;
1469    unless(run( command => $cmd,
1470                buffer  => \$captured,
1471                verbose => $DEBUG )
1472    ) {
1473
1474        return $self->_error(loc("Command %1 failed: %2",
1475            "@$cmd" || '', $captured || ''));
1476    }
1477
1478    return $to;
1479
1480}
1481
1482#################################
1483#
1484# Error code
1485#
1486#################################
1487
1488=pod
1489
1490=head2 $ff->error([BOOL])
1491
1492Returns the last encountered error as string.
1493Pass it a true value to get the C<Carp::longmess()> output instead.
1494
1495=cut
1496
1497### error handling the way Archive::Extract does it
1498sub _error {
1499    my $self    = shift;
1500    my $error   = shift;
1501
1502    $self->_error_msg( $error );
1503    $self->_error_msg_long( Carp::longmess($error) );
1504
1505    if( $WARN ) {
1506        carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1507    }
1508
1509    return;
1510}
1511
1512sub error {
1513    my $self = shift;
1514    return shift() ? $self->_error_msg_long : $self->_error_msg;
1515}
1516
1517
15181;
1519
1520=pod
1521
1522=head1 HOW IT WORKS
1523
1524File::Fetch is able to fetch a variety of uris, by using several
1525external programs and modules.
1526
1527Below is a mapping of what utilities will be used in what order
1528for what schemes, if available:
1529
1530    file    => LWP, lftp, file
1531    http    => LWP, HTTP::Tiny, wget, curl, lftp, fetch, HTTP::Lite, lynx, iosock
1532    ftp     => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp
1533    rsync   => rsync
1534    git     => git
1535
1536If you'd like to disable the use of one or more of these utilities
1537and/or modules, see the C<$BLACKLIST> variable further down.
1538
1539If a utility or module isn't available, it will be marked in a cache
1540(see the C<$METHOD_FAIL> variable further down), so it will not be
1541tried again. The C<fetch> method will only fail when all options are
1542exhausted, and it was not able to retrieve the file.
1543
1544The C<fetch> utility is available on FreeBSD. NetBSD and Dragonfly BSD
1545may also have it from C<pkgsrc>. We only check for C<fetch> on those
1546three platforms.
1547
1548C<iosock> is a very limited L<IO::Socket::INET> based mechanism for
1549retrieving C<http> schemed urls. It doesn't follow redirects for instance.
1550
1551C<git> only supports C<git://> style urls.
1552
1553A special note about fetching files from an ftp uri:
1554
1555By default, all ftp connections are done in passive mode. To change
1556that, see the C<$FTP_PASSIVE> variable further down.
1557
1558Furthermore, ftp uris only support anonymous connections, so no
1559named user/password pair can be passed along.
1560
1561C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1562further down.
1563
1564=head1 GLOBAL VARIABLES
1565
1566The behaviour of File::Fetch can be altered by changing the following
1567global variables:
1568
1569=head2 $File::Fetch::FROM_EMAIL
1570
1571This is the email address that will be sent as your anonymous ftp
1572password.
1573
1574Default is C<File-Fetch@example.com>.
1575
1576=head2 $File::Fetch::USER_AGENT
1577
1578This is the useragent as C<LWP> will report it.
1579
1580Default is C<File::Fetch/$VERSION>.
1581
1582=head2 $File::Fetch::FTP_PASSIVE
1583
1584This variable controls whether the environment variable C<FTP_PASSIVE>
1585and any passive switches to commandline tools will be set to true.
1586
1587Default value is 1.
1588
1589Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1590files, since passive mode can only be set interactively for this binary
1591
1592=head2 $File::Fetch::TIMEOUT
1593
1594When set, controls the network timeout (counted in seconds).
1595
1596Default value is 0.
1597
1598=head2 $File::Fetch::WARN
1599
1600This variable controls whether errors encountered internally by
1601C<File::Fetch> should be C<carp>'d or not.
1602
1603Set to false to silence warnings. Inspect the output of the C<error()>
1604method manually to see what went wrong.
1605
1606Defaults to C<true>.
1607
1608=head2 $File::Fetch::DEBUG
1609
1610This enables debugging output when calling commandline utilities to
1611fetch files.
1612This also enables C<Carp::longmess> errors, instead of the regular
1613C<carp> errors.
1614
1615Good for tracking down why things don't work with your particular
1616setup.
1617
1618Default is 0.
1619
1620=head2 $File::Fetch::BLACKLIST
1621
1622This is an array ref holding blacklisted modules/utilities for fetching
1623files with.
1624
1625To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1626set $File::Fetch::BLACKLIST to:
1627
1628    $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1629
1630The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1631
1632See the note on C<MAPPING> below.
1633
1634=head2 $File::Fetch::METHOD_FAIL
1635
1636This is a hashref registering what modules/utilities were known to fail
1637for fetching files (mostly because they weren't installed).
1638
1639You can reset this cache by assigning an empty hashref to it, or
1640individually remove keys.
1641
1642See the note on C<MAPPING> below.
1643
1644=head1 MAPPING
1645
1646
1647Here's a quick mapping for the utilities/modules, and their names for
1648the $BLACKLIST, $METHOD_FAIL and other internal functions.
1649
1650    LWP         => lwp
1651    HTTP::Lite  => httplite
1652    HTTP::Tiny  => httptiny
1653    Net::FTP    => netftp
1654    wget        => wget
1655    lynx        => lynx
1656    ncftp       => ncftp
1657    ftp         => ftp
1658    curl        => curl
1659    rsync       => rsync
1660    lftp        => lftp
1661    fetch       => fetch
1662    IO::Socket  => iosock
1663
1664=head1 FREQUENTLY ASKED QUESTIONS
1665
1666=head2 So how do I use a proxy with File::Fetch?
1667
1668C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1669You will need to set your environment variables accordingly. For
1670example, to use an ftp proxy:
1671
1672    $ENV{ftp_proxy} = 'foo.com';
1673
1674Refer to the LWP::UserAgent manpage for more details.
1675
1676=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1677
1678C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1679which we in turn capture. If that content is a 'custom' error file
1680(like, say, a C<404 handler>), you will get that contents instead.
1681
1682Sadly, C<lynx> doesn't support any options to return a different exit
1683code on non-C<200 OK> status, giving us no way to tell the difference
1684between a 'successful' fetch and a custom error page.
1685
1686Therefor, we recommend to only use C<lynx> as a last resort. This is
1687why it is at the back of our list of methods to try as well.
1688
1689=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1690
1691C<File::Fetch> is relatively smart about things. When trying to write
1692a file to disk, it removes the C<query parameters> (see the
1693C<output_file> method for details) from the file name before creating
1694it. In most cases this suffices.
1695
1696If you have any other characters you need to escape, please install
1697the C<URI::Escape> module from CPAN, and pre-encode your URI before
1698passing it to C<File::Fetch>. You can read about the details of URIs
1699and URI encoding here:
1700
1701L<https://datatracker.ietf.org/doc/html/rfc2396>
1702
1703=head1 TODO
1704
1705=over 4
1706
1707=item Implement $PREFER_BIN
1708
1709To indicate to rather use commandline tools than modules
1710
1711=back
1712
1713=head1 BUG REPORTS
1714
1715Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1716
1717=head1 AUTHOR
1718
1719This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1720
1721=head1 COPYRIGHT
1722
1723This library is free software; you may redistribute and/or modify it
1724under the same terms as Perl itself.
1725
1726
1727=cut
1728
1729# Local variables:
1730# c-indentation-style: bsd
1731# c-basic-offset: 4
1732# indent-tabs-mode: nil
1733# End:
1734# vim: expandtab shiftwidth=4:
1735
1736
1737
1738
1739