1package File::Fetch;
2
3use strict;
4use FileHandle;
5use File::Temp;
6use File::Copy;
7use File::Spec;
8use File::Spec::Unix;
9use File::Basename              qw[dirname];
10
11use Cwd                         qw[cwd];
12use Carp                        qw[carp];
13use IPC::Cmd                    qw[can_run run QUOTE];
14use File::Path                  qw[mkpath];
15use File::Temp                  qw[tempdir];
16use Params::Check               qw[check];
17use Module::Load::Conditional   qw[can_load];
18use Locale::Maketext::Simple    Style => 'gettext';
19
20use vars    qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
21                $BLACKLIST $METHOD_FAIL $VERSION $METHODS
22                $FTP_PASSIVE $TIMEOUT $DEBUG $WARN $FORCEIPV4
23            ];
24
25$VERSION        = '0.56';
26$VERSION        = eval $VERSION;    # avoid warnings with development releases
27$PREFER_BIN     = 0;                # XXX TODO implement
28$FROM_EMAIL     = 'File-Fetch@example.com';
29$USER_AGENT     = "File::Fetch/$VERSION";
30$BLACKLIST      = [qw|ftp|];
31push @$BLACKLIST, qw|lftp| if $^O eq 'dragonfly' || $^O eq 'hpux';
32$METHOD_FAIL    = { };
33$FTP_PASSIVE    = 1;
34$TIMEOUT        = 0;
35$DEBUG          = 0;
36$WARN           = 1;
37$FORCEIPV4      = 0;
38
39### methods available to fetch the file depending on the scheme
40$METHODS = {
41    http    => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ],
42    https   => [ qw|lwp wget curl| ],
43    ftp     => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ],
44    file    => [ qw|lwp lftp file| ],
45    rsync   => [ qw|rsync| ],
46    git     => [ qw|git| ],
47};
48
49### silly warnings ###
50local $Params::Check::VERBOSE               = 1;
51local $Params::Check::VERBOSE               = 1;
52local $Module::Load::Conditional::VERBOSE   = 0;
53local $Module::Load::Conditional::VERBOSE   = 0;
54
55### Fix CVE-2016-1238 ###
56local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
57
58### see what OS we are on, important for file:// uris ###
59use constant ON_WIN     => ($^O eq 'MSWin32');
60use constant ON_VMS     => ($^O eq 'VMS');
61use constant ON_UNIX    => (!ON_WIN);
62use constant HAS_VOL    => (ON_WIN);
63use constant HAS_SHARE  => (ON_WIN);
64use constant HAS_FETCH  => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! );
65
66=pod
67
68=head1 NAME
69
70File::Fetch - A generic file fetching mechanism
71
72=head1 SYNOPSIS
73
74    use File::Fetch;
75
76    ### build a File::Fetch object ###
77    my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
78
79    ### fetch the uri to cwd() ###
80    my $where = $ff->fetch() or die $ff->error;
81
82    ### fetch the uri to /tmp ###
83    my $where = $ff->fetch( to => '/tmp' );
84
85    ### parsed bits from the uri ###
86    $ff->uri;
87    $ff->scheme;
88    $ff->host;
89    $ff->path;
90    $ff->file;
91
92=head1 DESCRIPTION
93
94File::Fetch is a generic file fetching mechanism.
95
96It allows you to fetch any file pointed to by a C<ftp>, C<http>,
97C<file>, C<git> or C<rsync> uri by a number of different means.
98
99See the C<HOW IT WORKS> section further down for details.
100
101=head1 ACCESSORS
102
103A C<File::Fetch> object has the following accessors
104
105=over 4
106
107=item $ff->uri
108
109The uri you passed to the constructor
110
111=item $ff->scheme
112
113The scheme from the uri (like 'file', 'http', etc)
114
115=item $ff->host
116
117The hostname in the uri.  Will be empty if host was originally
118'localhost' for a 'file://' url.
119
120=item $ff->vol
121
122On operating systems with the concept of a volume the second element
123of a file:// is considered to the be volume specification for the file.
124Thus on Win32 this routine returns the volume, on other operating
125systems this returns nothing.
126
127On Windows this value may be empty if the uri is to a network share, in
128which case the 'share' property will be defined. Additionally, volume
129specifications that use '|' as ':' will be converted on read to use ':'.
130
131On VMS, which has a volume concept, this field will be empty because VMS
132file specifications are converted to absolute UNIX format and the volume
133information is transparently included.
134
135=item $ff->share
136
137On systems with the concept of a network share (currently only Windows) returns
138the sharename from a file://// url.  On other operating systems returns empty.
139
140=item $ff->path
141
142The path from the uri, will be at least a single '/'.
143
144=item $ff->file
145
146The name of the remote file. For the local file name, the
147result of $ff->output_file will be used.
148
149=item $ff->file_default
150
151The name of the default local file, that $ff->output_file falls back to if
152it would otherwise return no filename. For example when fetching a URI like
153http://www.abc.net.au/ the contents retrieved may be from a remote file called
154'index.html'. The default value of this attribute is literally 'file_default'.
155
156=cut
157
158
159##########################
160### Object & Accessors ###
161##########################
162
163{
164    ### template for autogenerated accessors ###
165    my $Tmpl = {
166        scheme          => { default => 'http' },
167        host            => { default => 'localhost' },
168        path            => { default => '/' },
169        file            => { required => 1 },
170        uri             => { required => 1 },
171        userinfo        => { default => '' },
172        vol             => { default => '' }, # windows for file:// uris
173        share           => { default => '' }, # windows for file:// uris
174        file_default    => { default => 'file_default' },
175        tempdir_root    => { required => 1 }, # Should be lazy-set at ->new()
176        _error_msg      => { no_override => 1 },
177        _error_msg_long => { no_override => 1 },
178    };
179
180    for my $method ( keys %$Tmpl ) {
181        no strict 'refs';
182        *$method = sub {
183                        my $self = shift;
184                        $self->{$method} = $_[0] if @_;
185                        return $self->{$method};
186                    }
187    }
188
189    sub _create {
190        my $class = shift;
191        my %hash  = @_;
192
193        my $args = check( $Tmpl, \%hash ) or return;
194
195        bless $args, $class;
196
197        if( lc($args->scheme) ne 'file' and not $args->host ) {
198            return $class->_error(loc(
199                "Hostname required when fetching from '%1'",$args->scheme));
200        }
201
202        for (qw[path]) {
203            unless( $args->$_() ) { # 5.5.x needs the ()
204                return $class->_error(loc("No '%1' specified",$_));
205            }
206        }
207
208        return $args;
209    }
210}
211
212=item $ff->output_file
213
214The name of the output file. This is the same as $ff->file,
215but any query parameters are stripped off. For example:
216
217    http://example.com/index.html?x=y
218
219would make the output file be C<index.html> rather than
220C<index.html?x=y>.
221
222=back
223
224=cut
225
226sub output_file {
227    my $self = shift;
228    my $file = $self->file;
229
230    $file =~ s/\?.*$//g;
231
232    $file ||= $self->file_default;
233
234    return $file;
235}
236
237### XXX do this or just point to URI::Escape?
238# =head2 $esc_uri = $ff->escaped_uri
239#
240# =cut
241#
242# ### most of this is stolen straight from URI::escape
243# {   ### Build a char->hex map
244#     my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
245#
246#     sub escaped_uri {
247#         my $self = shift;
248#         my $uri  = $self->uri;
249#
250#         ### Default unsafe characters.  RFC 2732 ^(uric - reserved)
251#         $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
252#                     $escapes{$1} || $self->_fail_hi($1)/ge;
253#
254#         return $uri;
255#     }
256#
257#     sub _fail_hi {
258#         my $self = shift;
259#         my $char = shift;
260#
261#         $self->_error(loc(
262#             "Can't escape '%1', try using the '%2' module instead",
263#             sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
264#         ));
265#     }
266#
267#     sub output_file {
268#
269#     }
270#
271#
272# }
273
274=head1 METHODS
275
276=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
277
278Parses the uri and creates a corresponding File::Fetch::Item object,
279that is ready to be C<fetch>ed and returns it.
280
281Returns false on failure.
282
283=cut
284
285sub new {
286    my $class = shift;
287    my %hash  = @_;
288
289    my ($uri, $file_default, $tempdir_root);
290    my $tmpl = {
291        uri          => { required => 1, store => \$uri },
292        file_default => { required => 0, store => \$file_default },
293        tempdir_root => { required => 0, store => \$tempdir_root },
294    };
295
296    check( $tmpl, \%hash ) or return;
297
298    ### parse the uri to usable parts ###
299    my $href    = $class->_parse_uri( $uri ) or return;
300
301    $href->{file_default} = $file_default if $file_default;
302    $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root;
303    $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd      ) if not $href->{tempdir_root};
304
305    ### make it into a FFI object ###
306    my $ff      = $class->_create( %$href ) or return;
307
308
309    ### return the object ###
310    return $ff;
311}
312
313### parses an uri to a hash structure:
314###
315### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
316###
317### becomes:
318###
319### $href = {
320###     scheme  => 'ftp',
321###     host    => 'ftp.cpan.org',
322###     path    => '/pub/mirror',
323###     file    => 'index.html'
324### };
325###
326### In the case of file:// urls there maybe be additional fields
327###
328### For systems with volume specifications such as Win32 there will be
329### a volume specifier provided in the 'vol' field.
330###
331###   'vol' => 'volumename'
332###
333### For windows file shares there may be a 'share' key specified
334###
335###   'share' => 'sharename'
336###
337### Note that the rules of what a file:// url means vary by the operating system
338### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
339### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
340### not '/foo/bar.txt'
341###
342### Similarly if the host interpreting the url is VMS then
343### file:///disk$user/my/notes/note12345.txt' means
344### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
345### if it is unix where it means /disk$user/my/notes/note12345.txt'.
346### Except for some cases in the File::Spec methods, Perl on VMS will generally
347### handle UNIX format file specifications.
348###
349### This means it is impossible to serve certain file:// urls on certain systems.
350###
351### Thus are the problems with a protocol-less specification. :-(
352###
353
354sub _parse_uri {
355    my $self = shift;
356    my $uri  = shift or return;
357
358    my $href = { uri => $uri };
359
360    ### find the scheme ###
361    $uri            =~ s|^(\w+)://||;
362    $href->{scheme} = $1;
363
364    ### See rfc 1738 section 3.10
365    ### http://www.faqs.org/rfcs/rfc1738.html
366    ### And wikipedia for more on windows file:// urls
367    ### http://en.wikipedia.org/wiki/File://
368    if( $href->{scheme} eq 'file' ) {
369
370        my @parts = split '/',$uri;
371
372        ### file://hostname/...
373        ### file://hostname/...
374        ### normalize file://localhost with file:///
375        $href->{host} = $parts[0] || '';
376
377        ### index in @parts where the path components begin;
378        my $index = 1;
379
380        ### file:////hostname/sharename/blah.txt
381        if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
382
383            $href->{host}   = $parts[2] || '';  # avoid warnings
384            $href->{share}  = $parts[3] || '';  # avoid warnings
385
386            $index          = 4         # index after the share
387
388        ### file:///D|/blah.txt
389        ### file:///D:/blah.txt
390        } elsif (HAS_VOL) {
391
392            ### this code comes from dmq's patch, but:
393            ### XXX if volume is empty, wouldn't that be an error? --kane
394            ### if so, our file://localhost test needs to be fixed as wel
395            $href->{vol}    = $parts[1] || '';
396
397            ### correct D| style colume descriptors
398            $href->{vol}    =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
399
400            $index          = 2;        # index after the volume
401        }
402
403        ### rebuild the path from the leftover parts;
404        $href->{path} = join '/', '', splice( @parts, $index, $#parts );
405
406    } else {
407        ### using anything but qw() in hash slices may produce warnings
408        ### in older perls :-(
409        @{$href}{ qw(userinfo host path) } = $uri =~ m|(?:([^\@:]*:[^\:\@]*)@)?([^/]*)(/.*)$|s;
410    }
411
412    ### split the path into file + dir ###
413    {   my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
414        $href->{path} = $parts[1];
415        $href->{file} = $parts[2];
416    }
417
418    ### host will be empty if the target was 'localhost' and the
419    ### scheme was 'file'
420    $href->{host} = '' if   ($href->{host}      eq 'localhost') and
421                            ($href->{scheme}    eq 'file');
422
423    return $href;
424}
425
426=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
427
428Fetches the file you requested and returns the full path to the file.
429
430By default it writes to C<cwd()>, but you can override that by specifying
431the C<to> argument:
432
433    ### file fetch to /tmp, full path to the file in $where
434    $where = $ff->fetch( to => '/tmp' );
435
436    ### file slurped into $scalar, full path to the file in $where
437    ### file is downloaded to a temp directory and cleaned up at exit time
438    $where = $ff->fetch( to => \$scalar );
439
440Returns the full path to the downloaded file on success, and false
441on failure.
442
443=cut
444
445sub fetch {
446    my $self = shift or return;
447    my %hash = @_;
448
449    my $target;
450    my $tmpl = {
451        to  => { default => cwd(), store => \$target },
452    };
453
454    check( $tmpl, \%hash ) or return;
455
456    my ($to, $fh);
457    ### you want us to slurp the contents
458    if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
459        $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 );
460
461    ### plain old fetch
462    } else {
463        $to = $target;
464
465        ### On VMS force to VMS format so File::Spec will work.
466        $to = VMS::Filespec::vmspath($to) if ON_VMS;
467
468        ### create the path if it doesn't exist yet ###
469        unless( -d $to ) {
470            eval { mkpath( $to ) };
471
472            return $self->_error(loc("Could not create path '%1'",$to)) if $@;
473        }
474    }
475
476    ### set passive ftp if required ###
477    local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
478
479    ### we dont use catfile on win32 because if we are using a cygwin tool
480    ### under cmd.exe they wont understand windows style separators.
481    my $out_to = ON_WIN ? $to.'/'.$self->output_file
482                        : File::Spec->catfile( $to, $self->output_file );
483
484    for my $method ( @{ $METHODS->{$self->scheme} } ) {
485        my $sub =  '_'.$method.'_fetch';
486
487        unless( __PACKAGE__->can($sub) ) {
488            $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
489                        $method));
490            next;
491        }
492
493        ### method is blacklisted ###
494        next if grep { lc $_ eq $method } @$BLACKLIST;
495
496        ### method is known to fail ###
497        next if $METHOD_FAIL->{$method};
498
499        ### there's serious issues with IPC::Run and quoting of command
500        ### line arguments. using quotes in the wrong place breaks things,
501        ### and in the case of say,
502        ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
503        ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
504        ### it doesn't matter how you quote, it always fails.
505        local $IPC::Cmd::USE_IPC_RUN = 0;
506
507        if( my $file = $self->$sub(
508                        to => $out_to
509        )){
510
511            unless( -e $file && -s _ ) {
512                $self->_error(loc("'%1' said it fetched '%2', ".
513                     "but it was not created",$method,$file));
514
515                ### mark the failure ###
516                $METHOD_FAIL->{$method} = 1;
517
518                next;
519
520            } else {
521
522                ### slurp mode?
523                if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
524
525                    ### open the file
526                    open my $fh, "<$file" or do {
527                        $self->_error(
528                            loc("Could not open '%1': %2", $file, $!));
529                        return;
530                    };
531
532                    ### slurp
533                    $$target = do { local $/; <$fh> };
534
535                }
536
537                my $abs = File::Spec->rel2abs( $file );
538                return $abs;
539
540            }
541        }
542    }
543
544
545    ### if we got here, we looped over all methods, but we weren't able
546    ### to fetch it.
547    return;
548}
549
550########################
551### _*_fetch methods ###
552########################
553
554### LWP fetching ###
555sub _lwp_fetch {
556    my $self = shift;
557    my %hash = @_;
558
559    my ($to);
560    my $tmpl = {
561        to  => { required => 1, store => \$to }
562    };
563    check( $tmpl, \%hash ) or return;
564
565    ### modules required to download with lwp ###
566    my $use_list = {
567        LWP                 => '0.0',
568        'LWP::UserAgent'    => '0.0',
569        'HTTP::Request'     => '0.0',
570        'HTTP::Status'      => '0.0',
571        URI                 => '0.0',
572
573    };
574
575    if ($self->scheme eq 'https') {
576        $use_list->{'LWP::Protocol::https'} = '0';
577    }
578
579    unless( can_load( modules => $use_list ) ) {
580        $METHOD_FAIL->{'lwp'} = 1;
581        return;
582    }
583
584    ### setup the uri object
585    my $uri = URI->new( File::Spec::Unix->catfile(
586                                $self->path, $self->file
587                    ) );
588
589    ### special rules apply for file:// uris ###
590    $uri->scheme( $self->scheme );
591    $uri->host( $self->scheme eq 'file' ? '' : $self->host );
592
593    if ($self->userinfo) {
594        $uri->userinfo($self->userinfo);
595    } elsif ($self->scheme ne 'file') {
596        $uri->userinfo("anonymous:$FROM_EMAIL");
597    }
598
599    ### set up the useragent object
600    my $ua = LWP::UserAgent->new();
601    $ua->timeout( $TIMEOUT ) if $TIMEOUT;
602    $ua->agent( $USER_AGENT );
603    $ua->from( $FROM_EMAIL );
604    $ua->env_proxy;
605
606    my $res = $ua->mirror($uri, $to) or return;
607
608    ### uptodate or fetched ok ###
609    if ( $res->code == 304 or $res->code == 200 ) {
610        return $to;
611
612    } else {
613        return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
614                    $res->code, HTTP::Status::status_message($res->code),
615                    $res->status_line));
616    }
617
618}
619
620### HTTP::Tiny fetching ###
621sub _httptiny_fetch {
622    my $self = shift;
623    my %hash = @_;
624
625    my ($to);
626    my $tmpl = {
627        to  => { required => 1, store => \$to }
628    };
629    check( $tmpl, \%hash ) or return;
630
631    my $use_list = {
632        'HTTP::Tiny'    => '0.008',
633
634    };
635
636    unless( can_load(modules => $use_list) ) {
637        $METHOD_FAIL->{'httptiny'} = 1;
638        return;
639    }
640
641    my $uri = $self->uri;
642
643    my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) );
644
645    my $rc = $http->mirror( $uri, $to );
646
647    unless ( $rc->{success} ) {
648
649        return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]",
650                    $rc->{status}, $rc->{reason} ) );
651
652    }
653
654    return $to;
655
656}
657
658### HTTP::Lite fetching ###
659sub _httplite_fetch {
660    my $self = shift;
661    my %hash = @_;
662
663    my ($to);
664    my $tmpl = {
665        to  => { required => 1, store => \$to }
666    };
667    check( $tmpl, \%hash ) or return;
668
669    ### modules required to download with lwp ###
670    my $use_list = {
671        'HTTP::Lite'    => '2.2',
672        'MIME::Base64'  => '0',
673    };
674
675    unless( can_load(modules => $use_list) ) {
676        $METHOD_FAIL->{'httplite'} = 1;
677        return;
678    }
679
680    my $uri = $self->uri;
681    my $retries = 0;
682
683    RETRIES: while ( $retries++ < 5 ) {
684
685      my $http = HTTP::Lite->new();
686      # Naughty naughty but there isn't any accessor/setter
687      $http->{timeout} = $TIMEOUT if $TIMEOUT;
688      $http->http11_mode(1);
689
690      if ($self->userinfo) {
691          my $encoded = MIME::Base64::encode($self->userinfo, '');
692          $http->add_req_header("Authorization", "Basic $encoded");
693      }
694
695      my $fh = FileHandle->new;
696
697      unless ( $fh->open($to,'>') ) {
698        return $self->_error(loc(
699             "Could not open '%1' for writing: %2",$to,$!));
700      }
701
702      $fh->autoflush(1);
703
704      binmode $fh;
705
706      my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh );
707
708      close $fh;
709
710      if ( $rc == 301 || $rc == 302 ) {
711          my $loc;
712          HEADERS: for ($http->headers_array) {
713            /Location: (\S+)/ and $loc = $1, last HEADERS;
714          }
715          #$loc or last; # Think we should squeal here.
716          if ($loc =~ m!^/!) {
717            $uri =~ s{^(\w+?://[^/]+)/.*$}{$1};
718            $uri .= $loc;
719          }
720          else {
721            $uri = $loc;
722          }
723          next RETRIES;
724      }
725      elsif ( $rc == 200 ) {
726          return $to;
727      }
728      else {
729        return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]",
730                    $rc, $http->status_message));
731      }
732
733    } # Loop for 5 retries.
734
735    return $self->_error("Fetch failed! Gave up after 5 tries");
736
737}
738
739### Simple IO::Socket::INET fetching ###
740sub _iosock_fetch {
741    my $self = shift;
742    my %hash = @_;
743
744    my ($to);
745    my $tmpl = {
746        to  => { required => 1, store => \$to }
747    };
748    check( $tmpl, \%hash ) or return;
749
750    my $use_list = {
751        'IO::Socket::INET' => '0.0',
752        'IO::Select'       => '0.0',
753    };
754
755    unless( can_load(modules => $use_list) ) {
756        $METHOD_FAIL->{'iosock'} = 1;
757        return;
758    }
759
760    my $sock = IO::Socket::INET->new(
761        PeerHost => $self->host,
762        ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ),
763    );
764
765    unless ( $sock ) {
766        return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!));
767    }
768
769    my $fh = FileHandle->new;
770
771    # Check open()
772
773    unless ( $fh->open($to,'>') ) {
774        return $self->_error(loc(
775             "Could not open '%1' for writing: %2",$to,$!));
776    }
777
778    $fh->autoflush(1);
779    binmode $fh;
780
781    my $path = File::Spec::Unix->catfile( $self->path, $self->file );
782    my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a";
783    $sock->send( $req );
784
785    my $select = IO::Select->new( $sock );
786
787    my $resp = '';
788    my $normal = 0;
789    while ( $select->can_read( $TIMEOUT || 60 ) ) {
790      my $ret = $sock->sysread( $resp, 4096, length($resp) );
791      if ( !defined $ret or $ret == 0 ) {
792        $select->remove( $sock );
793        $normal++;
794      }
795    }
796    close $sock;
797
798    unless ( $normal ) {
799        return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 )));
800    }
801
802    # Check the "response"
803    # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1)
804    $resp =~ s/^(\x0d?\x0a)+//;
805    # Check it is an HTTP response
806    unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) {
807        return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host));
808    }
809
810    # Check for OK
811    my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i;
812    unless ( $code eq '200' ) {
813        return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host));
814    }
815
816    {
817      local $\;
818      print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0];
819    }
820    close $fh;
821    return $to;
822}
823
824### Net::FTP fetching
825sub _netftp_fetch {
826    my $self = shift;
827    my %hash = @_;
828
829    my ($to);
830    my $tmpl = {
831        to  => { required => 1, store => \$to }
832    };
833    check( $tmpl, \%hash ) or return;
834
835    ### required modules ###
836    my $use_list = { 'Net::FTP' => 0 };
837
838    unless( can_load( modules => $use_list ) ) {
839        $METHOD_FAIL->{'netftp'} = 1;
840        return;
841    }
842
843    ### make connection ###
844    my $ftp;
845    my @options = ($self->host);
846    push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
847    unless( $ftp = Net::FTP->new( @options ) ) {
848        return $self->_error(loc("Ftp creation failed: %1",$@));
849    }
850
851    ### login ###
852    unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
853        return $self->_error(loc("Could not login to '%1'",$self->host));
854    }
855
856    ### set binary mode, just in case ###
857    $ftp->binary;
858
859    ### create the remote path
860    ### remember remote paths are unix paths! [#11483]
861    my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
862
863    ### fetch the file ###
864    my $target;
865    unless( $target = $ftp->get( $remote, $to ) ) {
866        return $self->_error(loc("Could not fetch '%1' from '%2'",
867                    $remote, $self->host));
868    }
869
870    ### log out ###
871    $ftp->quit;
872
873    return $target;
874
875}
876
877### /bin/wget fetch ###
878sub _wget_fetch {
879    my $self = shift;
880    my %hash = @_;
881
882    my ($to);
883    my $tmpl = {
884        to  => { required => 1, store => \$to }
885    };
886    check( $tmpl, \%hash ) or return;
887
888    my $wget;
889    ### see if we have a wget binary ###
890    unless( $wget = can_run('wget') ) {
891        $METHOD_FAIL->{'wget'} = 1;
892        return;
893    }
894
895    ### no verboseness, thanks ###
896    my $cmd = [ $wget, '--quiet' ];
897
898    ### if a timeout is set, add it ###
899    push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
900
901    ### run passive if specified ###
902    push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
903
904    ### set the output document, add the uri ###
905    push @$cmd, '--output-document', $to, $self->uri;
906
907    ### with IPC::Cmd > 0.41, this is fixed in teh library,
908    ### and there's no need for special casing any more.
909    ### DO NOT quote things for IPC::Run, it breaks stuff.
910    # $IPC::Cmd::USE_IPC_RUN
911    #    ? ($to, $self->uri)
912    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
913
914    ### shell out ###
915    my $captured;
916    unless(run( command => $cmd,
917                buffer  => \$captured,
918                verbose => $DEBUG
919    )) {
920        ### wget creates the output document always, even if the fetch
921        ### fails.. so unlink it in that case
922        1 while unlink $to;
923
924        return $self->_error(loc( "Command failed: %1", $captured || '' ));
925    }
926
927    return $to;
928}
929
930### /bin/lftp fetch ###
931sub _lftp_fetch {
932    my $self = shift;
933    my %hash = @_;
934
935    my ($to);
936    my $tmpl = {
937        to  => { required => 1, store => \$to }
938    };
939    check( $tmpl, \%hash ) or return;
940
941    ### see if we have a lftp binary ###
942    my $lftp;
943    unless( $lftp = can_run('lftp') ) {
944        $METHOD_FAIL->{'lftp'} = 1;
945        return;
946    }
947
948    ### no verboseness, thanks ###
949    my $cmd = [ $lftp, '-f' ];
950
951    my $fh = File::Temp->new;
952
953    my $str;
954
955    ### if a timeout is set, add it ###
956    $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
957
958    ### run passive if specified ###
959    $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
960
961    ### set the output document, add the uri ###
962    ### quote the URI, because lftp supports certain shell
963    ### expansions, most notably & for backgrounding.
964    ### ' quote does nto work, must be "
965    $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
966
967    if( $DEBUG ) {
968        my $pp_str = join ' ', split $/, $str;
969        print "# lftp command: $pp_str\n";
970    }
971
972    ### write straight to the file.
973    $fh->autoflush(1);
974    print $fh $str;
975
976    ### the command needs to be 1 string to be executed
977    push @$cmd, $fh->filename;
978
979    ### with IPC::Cmd > 0.41, this is fixed in teh library,
980    ### and there's no need for special casing any more.
981    ### DO NOT quote things for IPC::Run, it breaks stuff.
982    # $IPC::Cmd::USE_IPC_RUN
983    #    ? ($to, $self->uri)
984    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
985
986
987    ### shell out ###
988    my $captured;
989    unless(run( command => $cmd,
990                buffer  => \$captured,
991                verbose => $DEBUG
992    )) {
993        ### wget creates the output document always, even if the fetch
994        ### fails.. so unlink it in that case
995        1 while unlink $to;
996
997        return $self->_error(loc( "Command failed: %1", $captured || '' ));
998    }
999
1000    return $to;
1001}
1002
1003
1004
1005### /bin/ftp fetch ###
1006sub _ftp_fetch {
1007    my $self = shift;
1008    my %hash = @_;
1009
1010    my ($to);
1011    my $tmpl = {
1012        to  => { required => 1, store => \$to }
1013    };
1014    check( $tmpl, \%hash ) or return;
1015
1016    ### see if we have a ftp binary ###
1017    my $ftp;
1018    unless( $ftp = can_run('ftp') ) {
1019        $METHOD_FAIL->{'ftp'} = 1;
1020        return;
1021    }
1022
1023    my $fh = FileHandle->new;
1024
1025    local $SIG{CHLD} = 'IGNORE';
1026
1027    unless ($fh->open("$ftp -n", '|-')) {
1028        return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
1029    }
1030
1031    my @dialog = (
1032        "lcd " . dirname($to),
1033        "open " . $self->host,
1034        "user anonymous $FROM_EMAIL",
1035        "cd /",
1036        "cd " . $self->path,
1037        "binary",
1038        "get " . $self->file . " " . $self->output_file,
1039        "quit",
1040    );
1041
1042    foreach (@dialog) { $fh->print($_, "\n") }
1043    $fh->close or return;
1044
1045    return $to;
1046}
1047
1048### lynx is stupid - it decompresses any .gz file it finds to be text
1049### use /bin/lynx to fetch files
1050sub _lynx_fetch {
1051    my $self = shift;
1052    my %hash = @_;
1053
1054    my ($to);
1055    my $tmpl = {
1056        to  => { required => 1, store => \$to }
1057    };
1058    check( $tmpl, \%hash ) or return;
1059
1060    ### see if we have a lynx binary ###
1061    my $lynx;
1062    unless ( $lynx = can_run('lynx') ){
1063        $METHOD_FAIL->{'lynx'} = 1;
1064        return;
1065    }
1066
1067    unless( IPC::Cmd->can_capture_buffer ) {
1068        $METHOD_FAIL->{'lynx'} = 1;
1069
1070        return $self->_error(loc(
1071            "Can not capture buffers. Can not use '%1' to fetch files",
1072            'lynx' ));
1073    }
1074
1075    ### check if the HTTP resource exists ###
1076    if ($self->uri =~ /^https?:\/\//i) {
1077        my $cmd = [
1078            $lynx,
1079            '-head',
1080            '-source',
1081            "-auth=anonymous:$FROM_EMAIL",
1082        ];
1083
1084        push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1085
1086        push @$cmd, $self->uri;
1087
1088        ### shell out ###
1089        my $head;
1090        unless(run( command => $cmd,
1091                    buffer  => \$head,
1092                    verbose => $DEBUG )
1093        ) {
1094            return $self->_error(loc("Command failed: %1", $head || ''));
1095        }
1096
1097        unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
1098            return $self->_error(loc("Command failed: %1", $head || ''));
1099        }
1100    }
1101
1102    ### write to the output file ourselves, since lynx ass_u_mes to much
1103    my $local = FileHandle->new( $to, 'w' )
1104                    or return $self->_error(loc(
1105                        "Could not open '%1' for writing: %2",$to,$!));
1106
1107    ### dump to stdout ###
1108    my $cmd = [
1109        $lynx,
1110        '-source',
1111        "-auth=anonymous:$FROM_EMAIL",
1112    ];
1113
1114    push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1115
1116    ### DO NOT quote things for IPC::Run, it breaks stuff.
1117    push @$cmd, $self->uri;
1118
1119    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1120    ### and there's no need for special casing any more.
1121    ### DO NOT quote things for IPC::Run, it breaks stuff.
1122    # $IPC::Cmd::USE_IPC_RUN
1123    #    ? $self->uri
1124    #    : QUOTE. $self->uri .QUOTE;
1125
1126
1127    ### shell out ###
1128    my $captured;
1129    unless(run( command => $cmd,
1130                buffer  => \$captured,
1131                verbose => $DEBUG )
1132    ) {
1133        return $self->_error(loc("Command failed: %1", $captured || ''));
1134    }
1135
1136    ### print to local file ###
1137    ### XXX on a 404 with a special error page, $captured will actually
1138    ### hold the contents of that page, and make it *appear* like the
1139    ### request was a success, when really it wasn't :(
1140    ### there doesn't seem to be an option for lynx to change the exit
1141    ### code based on a 4XX status or so.
1142    ### the closest we can come is using --error_file and parsing that,
1143    ### which is very unreliable ;(
1144    $local->print( $captured );
1145    $local->close or return;
1146
1147    return $to;
1148}
1149
1150### use /bin/ncftp to fetch files
1151sub _ncftp_fetch {
1152    my $self = shift;
1153    my %hash = @_;
1154
1155    my ($to);
1156    my $tmpl = {
1157        to  => { required => 1, store => \$to }
1158    };
1159    check( $tmpl, \%hash ) or return;
1160
1161    ### we can only set passive mode in interactive sessions, so bail out
1162    ### if $FTP_PASSIVE is set
1163    return if $FTP_PASSIVE;
1164
1165    ### see if we have a ncftp binary ###
1166    my $ncftp;
1167    unless( $ncftp = can_run('ncftp') ) {
1168        $METHOD_FAIL->{'ncftp'} = 1;
1169        return;
1170    }
1171
1172    my $cmd = [
1173        $ncftp,
1174        '-V',                   # do not be verbose
1175        '-p', $FROM_EMAIL,      # email as password
1176        $self->host,            # hostname
1177        dirname($to),           # local dir for the file
1178                                # remote path to the file
1179        ### DO NOT quote things for IPC::Run, it breaks stuff.
1180        $IPC::Cmd::USE_IPC_RUN
1181                    ? File::Spec::Unix->catdir( $self->path, $self->file )
1182                    : QUOTE. File::Spec::Unix->catdir(
1183                                    $self->path, $self->file ) .QUOTE
1184
1185    ];
1186
1187    ### shell out ###
1188    my $captured;
1189    unless(run( command => $cmd,
1190                buffer  => \$captured,
1191                verbose => $DEBUG )
1192    ) {
1193        return $self->_error(loc("Command failed: %1", $captured || ''));
1194    }
1195
1196    return $to;
1197
1198}
1199
1200### use /bin/curl to fetch files
1201sub _curl_fetch {
1202    my $self = shift;
1203    my %hash = @_;
1204
1205    my ($to);
1206    my $tmpl = {
1207        to  => { required => 1, store => \$to }
1208    };
1209    check( $tmpl, \%hash ) or return;
1210    my $curl;
1211    unless ( $curl = can_run('curl') ) {
1212        $METHOD_FAIL->{'curl'} = 1;
1213        return;
1214    }
1215
1216    ### these long opts are self explanatory - I like that -jmb
1217    my $cmd = [ $curl, '-q' ];
1218
1219    push(@$cmd, '-4') if $^O eq 'netbsd' && $FORCEIPV4; # only seen this on NetBSD so far
1220
1221    push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
1222
1223    push(@$cmd, '--silent') unless $DEBUG;
1224
1225    ### curl does the right thing with passive, regardless ###
1226    if ($self->scheme eq 'ftp') {
1227        push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
1228    }
1229
1230    ### curl doesn't follow 302 (temporarily moved) etc automatically
1231    ### so we add --location to enable that.
1232    push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
1233
1234    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1235    ### and there's no need for special casing any more.
1236    ### DO NOT quote things for IPC::Run, it breaks stuff.
1237    # $IPC::Cmd::USE_IPC_RUN
1238    #    ? ($to, $self->uri)
1239    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1240
1241
1242    my $captured;
1243    unless(run( command => $cmd,
1244                buffer  => \$captured,
1245                verbose => $DEBUG )
1246    ) {
1247
1248        return $self->_error(loc("Command failed: %1", $captured || ''));
1249    }
1250
1251    return $to;
1252
1253}
1254
1255### /usr/bin/fetch fetch! ###
1256sub _fetch_fetch {
1257    my $self = shift;
1258    my %hash = @_;
1259
1260    my ($to);
1261    my $tmpl = {
1262        to  => { required => 1, store => \$to }
1263    };
1264    check( $tmpl, \%hash ) or return;
1265
1266    ### see if we have a fetch binary ###
1267    my $fetch;
1268    unless( HAS_FETCH and $fetch = can_run('fetch') ) {
1269        $METHOD_FAIL->{'fetch'} = 1;
1270        return;
1271    }
1272
1273    ### no verboseness, thanks ###
1274    my $cmd = [ $fetch, '-q' ];
1275
1276    ### if a timeout is set, add it ###
1277    push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT;
1278
1279    ### run passive if specified ###
1280    #push @$cmd, '-p' if $FTP_PASSIVE;
1281    local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE;
1282
1283    ### set the output document, add the uri ###
1284    push @$cmd, '-o', $to, $self->uri;
1285
1286    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1287    ### and there's no need for special casing any more.
1288    ### DO NOT quote things for IPC::Run, it breaks stuff.
1289    # $IPC::Cmd::USE_IPC_RUN
1290    #    ? ($to, $self->uri)
1291    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1292
1293    ### shell out ###
1294    my $captured;
1295    unless(run( command => $cmd,
1296                buffer  => \$captured,
1297                verbose => $DEBUG
1298    )) {
1299        ### wget creates the output document always, even if the fetch
1300        ### fails.. so unlink it in that case
1301        1 while unlink $to;
1302
1303        return $self->_error(loc( "Command failed: %1", $captured || '' ));
1304    }
1305
1306    return $to;
1307}
1308
1309### use File::Copy for fetching file:// urls ###
1310###
1311### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
1312### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
1313###
1314
1315sub _file_fetch {
1316    my $self = shift;
1317    my %hash = @_;
1318
1319    my ($to);
1320    my $tmpl = {
1321        to  => { required => 1, store => \$to }
1322    };
1323    check( $tmpl, \%hash ) or return;
1324
1325
1326
1327    ### prefix a / on unix systems with a file uri, since it would
1328    ### look somewhat like this:
1329    ###     file:///home/kane/file
1330    ### whereas windows file uris for 'c:\some\dir\file' might look like:
1331    ###     file:///C:/some/dir/file
1332    ###     file:///C|/some/dir/file
1333    ### or for a network share '\\host\share\some\dir\file':
1334    ###     file:////host/share/some/dir/file
1335    ###
1336    ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1337    ###     file://vms.host.edu/disk$user/my/notes/note12345.txt
1338    ###
1339
1340    my $path    = $self->path;
1341    my $vol     = $self->vol;
1342    my $share   = $self->share;
1343
1344    my $remote;
1345    if (!$share and $self->host) {
1346        return $self->_error(loc(
1347            "Currently %1 cannot handle hosts in %2 urls",
1348            'File::Fetch', 'file://'
1349        ));
1350    }
1351
1352    if( $vol ) {
1353        $path   = File::Spec->catdir( split /\//, $path );
1354        $remote = File::Spec->catpath( $vol, $path, $self->file);
1355
1356    } elsif( $share ) {
1357        ### win32 specific, and a share name, so we wont bother with File::Spec
1358        $path   =~ s|/+|\\|g;
1359        $remote = "\\\\".$self->host."\\$share\\$path";
1360
1361    } else {
1362        ### File::Spec on VMS can not currently handle UNIX syntax.
1363        my $file_class = ON_VMS
1364            ? 'File::Spec::Unix'
1365            : 'File::Spec';
1366
1367        $remote  = $file_class->catfile( $path, $self->file );
1368    }
1369
1370    ### File::Copy is littered with 'die' statements :( ###
1371    my $rv = eval { File::Copy::copy( $remote, $to ) };
1372
1373    ### something went wrong ###
1374    if( !$rv or $@ ) {
1375        return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1376                             $remote, $to, $!, $@));
1377    }
1378
1379    return $to;
1380}
1381
1382### use /usr/bin/rsync to fetch files
1383sub _rsync_fetch {
1384    my $self = shift;
1385    my %hash = @_;
1386
1387    my ($to);
1388    my $tmpl = {
1389        to  => { required => 1, store => \$to }
1390    };
1391    check( $tmpl, \%hash ) or return;
1392    my $rsync;
1393    unless ( $rsync = can_run('rsync') ) {
1394        $METHOD_FAIL->{'rsync'} = 1;
1395        return;
1396    }
1397
1398    my $cmd = [ $rsync ];
1399
1400    ### XXX: rsync has no I/O timeouts at all, by default
1401    push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1402
1403    push(@$cmd, '--quiet') unless $DEBUG;
1404
1405    ### DO NOT quote things for IPC::Run, it breaks stuff.
1406    push @$cmd, $self->uri, $to;
1407
1408    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1409    ### and there's no need for special casing any more.
1410    ### DO NOT quote things for IPC::Run, it breaks stuff.
1411    # $IPC::Cmd::USE_IPC_RUN
1412    #    ? ($to, $self->uri)
1413    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1414
1415    my $captured;
1416    unless(run( command => $cmd,
1417                buffer  => \$captured,
1418                verbose => $DEBUG )
1419    ) {
1420
1421        return $self->_error(loc("Command %1 failed: %2",
1422            "@$cmd" || '', $captured || ''));
1423    }
1424
1425    return $to;
1426
1427}
1428
1429### use git to fetch files
1430sub _git_fetch {
1431    my $self = shift;
1432    my %hash = @_;
1433
1434    my ($to);
1435    my $tmpl = {
1436        to  => { required => 1, store => \$to }
1437    };
1438    check( $tmpl, \%hash ) or return;
1439    my $git;
1440    unless ( $git = can_run('git') ) {
1441        $METHOD_FAIL->{'git'} = 1;
1442        return;
1443    }
1444
1445    my $cmd = [ $git, 'clone' ];
1446
1447    #push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1448
1449    push(@$cmd, '--quiet') unless $DEBUG;
1450
1451    ### DO NOT quote things for IPC::Run, it breaks stuff.
1452    push @$cmd, $self->uri, $to;
1453
1454    ### with IPC::Cmd > 0.41, this is fixed in teh library,
1455    ### and there's no need for special casing any more.
1456    ### DO NOT quote things for IPC::Run, it breaks stuff.
1457    # $IPC::Cmd::USE_IPC_RUN
1458    #    ? ($to, $self->uri)
1459    #    : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1460
1461    my $captured;
1462    unless(run( command => $cmd,
1463                buffer  => \$captured,
1464                verbose => $DEBUG )
1465    ) {
1466
1467        return $self->_error(loc("Command %1 failed: %2",
1468            "@$cmd" || '', $captured || ''));
1469    }
1470
1471    return $to;
1472
1473}
1474
1475#################################
1476#
1477# Error code
1478#
1479#################################
1480
1481=pod
1482
1483=head2 $ff->error([BOOL])
1484
1485Returns the last encountered error as string.
1486Pass it a true value to get the C<Carp::longmess()> output instead.
1487
1488=cut
1489
1490### error handling the way Archive::Extract does it
1491sub _error {
1492    my $self    = shift;
1493    my $error   = shift;
1494
1495    $self->_error_msg( $error );
1496    $self->_error_msg_long( Carp::longmess($error) );
1497
1498    if( $WARN ) {
1499        carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1500    }
1501
1502    return;
1503}
1504
1505sub error {
1506    my $self = shift;
1507    return shift() ? $self->_error_msg_long : $self->_error_msg;
1508}
1509
1510
15111;
1512
1513=pod
1514
1515=head1 HOW IT WORKS
1516
1517File::Fetch is able to fetch a variety of uris, by using several
1518external programs and modules.
1519
1520Below is a mapping of what utilities will be used in what order
1521for what schemes, if available:
1522
1523    file    => LWP, lftp, file
1524    http    => LWP, HTTP::Tiny, wget, curl, lftp, fetch, HTTP::Lite, lynx, iosock
1525    ftp     => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp
1526    rsync   => rsync
1527    git     => git
1528
1529If you'd like to disable the use of one or more of these utilities
1530and/or modules, see the C<$BLACKLIST> variable further down.
1531
1532If a utility or module isn't available, it will be marked in a cache
1533(see the C<$METHOD_FAIL> variable further down), so it will not be
1534tried again. The C<fetch> method will only fail when all options are
1535exhausted, and it was not able to retrieve the file.
1536
1537The C<fetch> utility is available on FreeBSD. NetBSD and Dragonfly BSD
1538may also have it from C<pkgsrc>. We only check for C<fetch> on those
1539three platforms.
1540
1541C<iosock> is a very limited L<IO::Socket::INET> based mechanism for
1542retrieving C<http> schemed urls. It doesn't follow redirects for instance.
1543
1544C<git> only supports C<git://> style urls.
1545
1546A special note about fetching files from an ftp uri:
1547
1548By default, all ftp connections are done in passive mode. To change
1549that, see the C<$FTP_PASSIVE> variable further down.
1550
1551Furthermore, ftp uris only support anonymous connections, so no
1552named user/password pair can be passed along.
1553
1554C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1555further down.
1556
1557=head1 GLOBAL VARIABLES
1558
1559The behaviour of File::Fetch can be altered by changing the following
1560global variables:
1561
1562=head2 $File::Fetch::FROM_EMAIL
1563
1564This is the email address that will be sent as your anonymous ftp
1565password.
1566
1567Default is C<File-Fetch@example.com>.
1568
1569=head2 $File::Fetch::USER_AGENT
1570
1571This is the useragent as C<LWP> will report it.
1572
1573Default is C<File::Fetch/$VERSION>.
1574
1575=head2 $File::Fetch::FTP_PASSIVE
1576
1577This variable controls whether the environment variable C<FTP_PASSIVE>
1578and any passive switches to commandline tools will be set to true.
1579
1580Default value is 1.
1581
1582Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1583files, since passive mode can only be set interactively for this binary
1584
1585=head2 $File::Fetch::TIMEOUT
1586
1587When set, controls the network timeout (counted in seconds).
1588
1589Default value is 0.
1590
1591=head2 $File::Fetch::WARN
1592
1593This variable controls whether errors encountered internally by
1594C<File::Fetch> should be C<carp>'d or not.
1595
1596Set to false to silence warnings. Inspect the output of the C<error()>
1597method manually to see what went wrong.
1598
1599Defaults to C<true>.
1600
1601=head2 $File::Fetch::DEBUG
1602
1603This enables debugging output when calling commandline utilities to
1604fetch files.
1605This also enables C<Carp::longmess> errors, instead of the regular
1606C<carp> errors.
1607
1608Good for tracking down why things don't work with your particular
1609setup.
1610
1611Default is 0.
1612
1613=head2 $File::Fetch::BLACKLIST
1614
1615This is an array ref holding blacklisted modules/utilities for fetching
1616files with.
1617
1618To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1619set $File::Fetch::BLACKLIST to:
1620
1621    $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1622
1623The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1624
1625See the note on C<MAPPING> below.
1626
1627=head2 $File::Fetch::METHOD_FAIL
1628
1629This is a hashref registering what modules/utilities were known to fail
1630for fetching files (mostly because they weren't installed).
1631
1632You can reset this cache by assigning an empty hashref to it, or
1633individually remove keys.
1634
1635See the note on C<MAPPING> below.
1636
1637=head1 MAPPING
1638
1639
1640Here's a quick mapping for the utilities/modules, and their names for
1641the $BLACKLIST, $METHOD_FAIL and other internal functions.
1642
1643    LWP         => lwp
1644    HTTP::Lite  => httplite
1645    HTTP::Tiny  => httptiny
1646    Net::FTP    => netftp
1647    wget        => wget
1648    lynx        => lynx
1649    ncftp       => ncftp
1650    ftp         => ftp
1651    curl        => curl
1652    rsync       => rsync
1653    lftp        => lftp
1654    fetch       => fetch
1655    IO::Socket  => iosock
1656
1657=head1 FREQUENTLY ASKED QUESTIONS
1658
1659=head2 So how do I use a proxy with File::Fetch?
1660
1661C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1662You will need to set your environment variables accordingly. For
1663example, to use an ftp proxy:
1664
1665    $ENV{ftp_proxy} = 'foo.com';
1666
1667Refer to the LWP::UserAgent manpage for more details.
1668
1669=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1670
1671C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1672which we in turn capture. If that content is a 'custom' error file
1673(like, say, a C<404 handler>), you will get that contents instead.
1674
1675Sadly, C<lynx> doesn't support any options to return a different exit
1676code on non-C<200 OK> status, giving us no way to tell the difference
1677between a 'successful' fetch and a custom error page.
1678
1679Therefor, we recommend to only use C<lynx> as a last resort. This is
1680why it is at the back of our list of methods to try as well.
1681
1682=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1683
1684C<File::Fetch> is relatively smart about things. When trying to write
1685a file to disk, it removes the C<query parameters> (see the
1686C<output_file> method for details) from the file name before creating
1687it. In most cases this suffices.
1688
1689If you have any other characters you need to escape, please install
1690the C<URI::Escape> module from CPAN, and pre-encode your URI before
1691passing it to C<File::Fetch>. You can read about the details of URIs
1692and URI encoding here:
1693
1694  http://www.faqs.org/rfcs/rfc2396.html
1695
1696=head1 TODO
1697
1698=over 4
1699
1700=item Implement $PREFER_BIN
1701
1702To indicate to rather use commandline tools than modules
1703
1704=back
1705
1706=head1 BUG REPORTS
1707
1708Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1709
1710=head1 AUTHOR
1711
1712This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1713
1714=head1 COPYRIGHT
1715
1716This library is free software; you may redistribute and/or modify it
1717under the same terms as Perl itself.
1718
1719
1720=cut
1721
1722# Local variables:
1723# c-indentation-style: bsd
1724# c-basic-offset: 4
1725# indent-tabs-mode: nil
1726# End:
1727# vim: expandtab shiftwidth=4:
1728
1729
1730
1731
1732