1package Net::FTP::RetrHandle;
2our $VERSION = '0.2';
3
4use warnings;
5use strict;
6
7use constant DEFAULT_MAX_SKIPSIZE => 1024 * 1024 * 2;
8use constant DEFAULT_BLOCKSIZE => 10240; # Net::FTP's default
9
10use base 'IO::Seekable';
11# We don't use base 'IO::Handle'; it currently confuses Archive::Zip.
12
13use Carp;
14use Scalar::Util;
15
16
17=head1 NAME
18
19Net::FTP::RetrHandle - Tied or IO::Handle-compatible interface to a file retrieved by FTP
20
21=head1 SYNOPSIS
22
23Provides a file reading interface for reading all or parts of files
24located on a remote FTP server, including emulation of C<seek> and
25support for downloading only the parts of the file requested.
26
27=head1 DESCRIPTION
28
29Support for skipping the beginning of the file is implemented with the
30FTP C<REST> command, which starts a retrieval at any point in the
31file.  Support for skipping the end of the file is implemented with
32the FTP C<ABOR> command, which stops the transfer.  With these two
33commands and some careful tracking of the current file position, we're
34able to reliably emulate a C<seek/read> pair, and get only the parts
35of the file that are actually read.
36
37This was originally designed for use with
38L<Archive::Zip|Archive::Zip>; it's reliable enough that the table of
39contents and individual files can be extracted from a remote ZIP
40archive without downloading the whole thing.  See L<EXAMPLES> below.
41
42An interface compatible with L<IO::Handle|IO::Handle> is provided,
43along with a C<tie>-based interface.
44
45Remember that an FTP server can only do one thing at a time, so make
46sure to C<close> your connection before asking the FTP server to do
47nything else.
48
49=head1 CONSTRUCTOR
50
51=head2 new ( $ftp, $filename, options... )
52
53Creates a new L<IO::Handle|IO::Handle>-compatible object to fetch all
54or parts of C<$filename> using the FTP connection C<$ftp>.
55
56Available options:
57
58=over 4
59
60=item MaxSkipSize => $size
61
62If we need to move forward in a file or close the connection,
63sometimes it's faster to just read the bytes we don't need than to
64abort the connection and restart. This setting tells how many
65unnecessary bytes we're willing to read rather than abort.  An
66appropriate setting depends on the speed of transferring files and the
67speed of reconnecting to the server.
68
69=item BlockSize => $size
70
71When doing buffered reads, how many bytes to read at once.  The
72default is the same as the default for L<Net::FTP|Net::FTP>, so it's
73generally best to leave it alone.
74
75=item AlreadyBinary => $bool
76
77If set to a true value, we assume the server is already in binary
78mode, and don't try to set it.
79
80=back
81
82=cut
83use constant USAGE => "Usage: Net::FTP::RetrHandle\->new(ftp => \$ftp_obj, filename => \$filename)\n";
84sub new
85{
86  my $class = shift;
87  my $ftp = shift
88    or croak USAGE;
89  my $filename = shift
90    or croak USAGE;
91  my $self = { MaxSkipSize => DEFAULT_MAX_SKIPSIZE,
92	       BlockSize => DEFAULT_BLOCKSIZE,
93	       @_,
94	       ftp => $ftp, filename => $filename,
95	       pos => 0, nextpos => 0};
96  $self->{size} = $self->{ftp}->size($self->{filename})
97    or return undef;
98  $self->{ftp}->binary()
99    unless ($self->{AlreadyBinary});
100
101  bless $self,$class;
102}
103
104=head1 METHODS
105
106Most of the methods implemented behave exactly like those from
107L<IO::Handle|IO::Handle>.
108
109These methods are implemented: C<binmode>, C<clearerr>, C<close>, C<eof>,
110C<error>, C<getc>, C<getline>, C<getlines>, C<getpos>, C<read>,
111C<seek>, C<setpos>, C<sysseek>, C<tell>, C<ungetc>, C<opened>.
112
113=cut ;
114
115sub opened { 1; }
116
117sub seek
118{
119  my $self = shift;
120  my $pos = shift || 0;
121  my $whence = shift || 0;
122  warn "   SEEK: self=$self, pos=$pos, whence=$whence\n"
123    if ($ENV{DEBUG});
124  my $curpos = $self->tell();
125  my $newpos = _newpos($self->tell(),$self->{size},$pos,$whence);
126  my $ret;
127  if ($newpos == $curpos)
128  {
129    return $curpos;
130  }
131  elsif (defined($self->{_buf}) and ($newpos > $curpos) and ($newpos < ($curpos + length($self->{_buf}))))
132  {
133    # Just seeking within the buffer (or not at all)
134    substr($self->{_buf},0,$newpos - $curpos,'');
135    $ret = $newpos;
136  }
137  else
138  {
139    $ret = $self->sysseek($newpos,0);
140    $self->{_buf} = '';
141  }
142  return $ret;
143}
144
145sub _newpos
146{
147
148  my($curpos,$size,$pos,$whence)=@_;
149  if ($whence == 0) # seek_set
150  {
151    return $pos;
152  }
153  elsif ($whence == 1) # seek_cur
154  {
155    return $curpos + $pos;
156  }
157  elsif ($whence == 2) # seek_end
158  {
159    return $size + $pos;
160  }
161  else
162  {
163    die "Invalid value $whence for whence!";
164  }
165}
166
167sub sysseek
168{
169  my $self = shift;
170  my $pos = shift || 0;
171  my $whence = shift || 0;
172  warn "SYSSEEK: self=$self, pos=$pos, whence=$whence\n"
173    if ($ENV{DEBUG});
174  my $newpos = _newpos($self->{nextpos},$self->{size},$pos,$whence);
175
176  $self->{eof}=undef;
177  return $self->{nextpos}=$newpos;
178}
179
180sub tell
181{
182  my $self = shift;
183  return $self->{nextpos} - (defined($self->{_buf}) ? length($self->{_buf}) : 0);
184}
185
186# WARNING: ASCII mode probably breaks seek.
187sub binmode
188{
189  my $self = shift;
190  my $mode = shift || ':raw';
191  return if (defined($self->{curmode}) && ($self->{curmode} eq $mode));
192  if (defined($mode) and $mode eq ':crlf')
193  {
194    $self->_finish_connection();
195    $self->{ftp}->ascii()
196      or return $self->seterr();
197  }
198  else
199  {
200    $self->_finish_connection();
201    $self->{ftp}->binary()
202      or return $self->seterr();
203  }
204  $self->{curmode} = $mode;
205}
206
207sub _min
208{
209  return $_[0] < $_[1] ? $_[0] : $_[1];
210}
211
212sub _max
213{
214  return $_[0] > $_[1] ? $_[0] : $_[1];
215}
216
217sub read
218{
219  my $self = shift;
220#  return $self->sysread(@_);
221
222  my(undef,$len,$offset)=@_;
223  $offset ||= 0;
224  warn "READ(buf,$len,$offset)\n"
225    if ($ENV{DEBUG});
226
227  if (!defined($self->{_buf}) || length($self->{_buf}) <= 0)
228  {
229    $self->sysread($self->{_buf},_max($len,$self->{BlockSize}))
230      or return 0;
231  }
232  elsif (length($self->{_buf}) < $len)
233  {
234    $self->sysread($self->{_buf},_max($len-length($self->{_buf}),$self->{BlockSize}),length($self->{_buf}));
235  }
236  my $ret = _min($len,length($self->{_buf}));
237  if (!defined($_[0])) { $_[0] = '' }
238  substr($_[0],$offset) = substr($self->{_buf},0,$len,'');
239  $self->{read_count}++;
240
241  return $ret;
242}
243
244sub sysread
245{
246  my $self = shift;
247  if ($self->{eof})
248  {
249    return 0;
250  }
251
252  my(undef,$len,$offset) = @_;
253  $offset ||= 0;
254
255  warn "SYSREAD(buf,$len,$offset)\n"
256    if ($ENV{DEBUG});
257  if ($self->{nextpos} >= $self->{size})
258  {
259    $self->{eof} = 1;
260    $self->{pos} = $self->{nextpos};
261    return 0;
262  }
263
264  if ($self->{pos} != $self->{nextpos})
265  {
266    # They seeked.
267    if ($self->{ftp_running})
268    {
269      warn "Seek detected, nextpos=$self->{nextpos}, pos=$self->{pos}, MaxSkipSize=$self->{MaxSkipSize}\n"
270	if ($ENV{DEBUG});
271      if ($self->{nextpos} > $self->{pos} and ($self->{nextpos} - $self->{pos}) < $self->{MaxSkipSize})
272      {
273	my $br = $self->{nextpos}-$self->{pos};
274	warn "Reading $br bytes to skip ahead\n"
275	  if ($ENV{DEBUG});
276	my $junkbuff;
277	while ($br > 0)
278	{
279	  warn "Trying to read $br more bytes\n"
280	    if ($ENV{DEBUG});
281	  my $b = $self->{ftp_data}->read($junkbuff,$br);
282	  if ($b == 0)
283	  {
284	    $self->_at_eof();
285	    return 0;
286	  }
287	  elsif (!defined($b) || $b < 0)
288	  {
289	    return $self->seterr();
290	  }
291	  else
292	  {
293	    $br -= $b;
294	  }
295	}
296	$self->{pos}=$self->{nextpos};
297      }
298      else
299      {
300	warn "Aborting connection to move to new position\n"
301	  if ($ENV{DEBUG});
302	$self->_finish_connection();
303      }
304    }
305  }
306
307  if (!$self->{ftp_running})
308  {
309    $self->{ftp}->restart($self->{nextpos});
310    $self->{ftp_data} = $self->{ftp}->retr($self->{filename})
311      or return $self->seterr();
312    $self->{ftp_running} = 1;
313    $self->{pos}=$self->{nextpos};
314  }
315
316  my $tmpbuf;
317  my $rb = $self->{ftp_data}->read($tmpbuf,$len);
318  if ($rb == 0)
319  {
320    $self->_at_eof();
321    return 0;
322  }
323  elsif (!defined($rb) || $rb < 0)
324  {
325    return $self->seterr();
326  }
327
328  if (!defined($_[0])) { $_[0] = '' }
329  substr($_[0],$offset) = $tmpbuf;
330  $self->{pos} += $rb;
331  $self->{nextpos} += $rb;
332
333  $self->{sysread_count}++;
334  $rb;
335}
336
337sub _at_eof
338{
339  my $self = shift;
340  $self->{eof}=1;
341  $self->_finish_connection();
342#  $self->{ftp_data}->_close();
343  $self->{ftp_running} = $self->{ftp_data} = undef;
344}
345
346sub _finish_connection
347{
348  my $self = shift;
349  warn "_finish_connection\n"
350    if ($ENV{DEBUG});
351  return unless ($self->{ftp_running});
352
353  if ($self->{size} - $self->{pos} < $self->{MaxSkipSize})
354  {
355    warn "Skipping " . ($self->{size}-$self->{pos}) . " bytes\n"
356      if ($ENV{DEBUG});
357    my $junkbuff;
358    my $br;
359    while(($br = $self->{ftp_data}->read($junkbuff,8192)))
360    {
361      # Read until EOF or error
362    }
363    defined($br)
364      or $self->seterr();
365  }
366  warn "Shutting down existing FTP DATA session...\n"
367    if ($ENV{DEBUG});
368
369  my $closeret;
370  {
371    eval {
372      $closeret = $self->{ftp_data}->close();
373    };
374    # Work around a timeout bug in Net::FTP
375    if ($@ && $@ =~ /^Timeout /)
376    {
377      warn "Timeout closing connection, retrying...\n"
378	if ($ENV{DEBUG});
379      select(undef,undef,undef,1);
380      redo;
381    }
382  }
383
384  $self->{ftp_running} = $self->{ftp_data} = undef;
385  return $closeret ? 1 : $self->seterr();
386}
387
388sub write
389{
390  die "Only reading currently supported";
391}
392
393sub close
394{
395  my $self = shift;
396  return $self->{ftp_data} ? $self->_finish_connection()
397                           : 1;
398}
399
400sub eof
401{
402  my $self = shift;
403  if ($self->{eof})
404  {
405    return 1;
406  }
407
408  my $c = $self->getc;
409  if (!defined($c))
410  {
411    return 1;
412  }
413  $self->ungetc(ord($c));
414  return undef;
415}
416
417sub getc
418{
419  my $self = shift;
420  my $c;
421  my $rb = $self->read($c,1);
422  if ($rb < 1)
423  {
424    return undef;
425  }
426  return $c;
427}
428
429sub ungetc
430{
431  my $self = shift;
432  # Note that $c is the ordinal value of a character, not the
433  # character itself (for some reason)
434  my($c)=@_;
435  $self->{_buf} = chr($c) . $self->{_buf};
436}
437
438sub getline
439{
440  my $self = shift;
441  if (!defined($/))
442  {
443    my $buf;
444    while($self->read($buf,$self->{BlockSize},length($buf)) > 0)
445    {
446      # Keep going
447    }
448    return $buf;
449  }
450  elsif (ref($/) && looks_like_number ${$/} )
451  {
452    my $buf;
453    $self->read($buf,${$/})
454      or return undef;
455    return $buf;
456  }
457
458  my $rs;
459  if ($/ eq '')
460  {
461    $rs = "\n\n";
462  }
463  else
464  {
465    $rs = $/;
466  }
467  my $eol;
468  if (!defined($self->{_buf})) { $self->{_buf} = '' }
469  while (($eol=index($self->{_buf},$rs)) < $[)
470  {
471    if ($self->{eof})
472    {
473      # return what's left
474      if (length($self->{_buf}) == 0)
475      {
476	return undef;
477      }
478      else
479      {
480	return substr($self->{_buf},0,length($self->{_buf}),'');
481      }
482    }
483    else
484    {
485      $self->sysread($self->{_buf},$self->{BlockSize},length($self->{_buf}));
486    }
487  }
488  # OK, we should have a match.
489  my $tmpbuf = substr($self->{_buf},0,$eol+length($rs),'');
490  while ($/ eq '' and substr($self->{_buf},0,1) eq "\n")
491  {
492    substr($self->{_buf},0,1)='';
493  }
494  return $tmpbuf;
495}
496
497sub getlines
498{
499  my $self = shift;
500  my @lines;
501  my $line;
502  while (defined($line = $self->getline()))
503  {
504    push(@lines,$line);
505  }
506  @lines;
507}
508
509sub error
510{
511  return undef;
512}
513
514sub seterr
515{
516  my $self = shift;
517  $self->{_error} = 1;
518  return undef;
519}
520
521sub clearerr
522{
523  my $self = shift;
524  $self->{_error} = undef;
525  return 0;
526}
527
528sub getpos
529{
530  my $self = shift;
531  return $self->tell();
532}
533
534sub setpos
535{
536  my $self = shift;
537  return $self->seek(@_);
538}
539
540sub DESTROY
541{
542  my $self = shift;
543  if (UNIVERSAL::isa($self,'GLOB'))
544  {
545    $self = tied *$self
546	or die "$self not tied?...";
547  }
548  if ($self->{ftp_data})
549  {
550    $self->_finish_connection();
551  }
552  warn "sysread called ".$self->{sysread_count}." times.\n"
553    if ($ENV{DEBUG});
554}
555
556=head1 TIED INTERFACE
557
558Instead of a L<IO::Handle|IO::Handle>-compatible interface, you can
559use a C<tie>-based interface to use the standard Perl I/O operators.
560You can use it like this:
561
562  use Net::FTP::RetrHandle;
563  # Create FTP object in $ftp
564  # Store filename in $filename
565  tie *FH, 'Net::FTP::RetrHandle', $ftp, $filename
566    or die "Error in tie!\n";
567
568=cut
569  ;
570sub TIEHANDLE
571{
572  my $class = shift;
573  my $obj = $class->new(@_);
574  $obj;
575}
576
577sub READ
578{
579  my $self = shift;
580  $self->read(@_);
581}
582
583sub READLINE
584{
585  my $self = shift;
586  return wantarray ? $self->getlines(@_)
587                   : $self->getline(@_);
588}
589
590sub GETC
591{
592  my $self = shift;
593  return $self->getc(@_);
594}
595
596sub SEEK
597{
598  my $self = shift;
599  return $self->seek(@_);
600}
601
602sub SYSSEEK
603{
604  my $self = shift;
605  return $self->sysseek(@_);
606}
607
608sub TELL
609{
610  my $self = shift;
611  return $self->tell();
612}
613
614sub CLOSE
615{
616  my $self = shift;
617  return $self->close(@_);
618}
619
620sub EOF
621{
622  my $self = shift;
623  return $self->eof(@_);
624
625}
626sub UNTIE
627{
628  tied($_[0])->close(@_);
629}
630
631=head1 EXAMPLE
632
633Here's an example of listing a Zip file without downloading the whole
634thing:
635
636    #!/usr/bin/perl
637
638    use warnings;
639    use strict;
640
641    use Net::FTP;
642    use Net::FTP::AutoReconnect;
643    use Net::FTP::RetrHandle;
644    use Archive::Zip;
645
646    my $ftp = Net::FTP::AutoReconnect->new("ftp.info-zip.com", Debug => $ENV{DEBUG})
647        or die "connect error\n";
648    $ftp->login('anonymous','example@example.com')
649        or die "login error\n";
650    $ftp->cwd('/pub/infozip/UNIX/LINUX')
651        or die "cwd error\n";
652    my $fh = Net::FTP::RetrHandle->new($ftp,'unz551x-glibc.zip')
653        or die "Couldn't get handle to remote file\n";
654    my $zip = Archive::Zip->new($fh)
655        or die "Couldn't create Zip object\n";
656    foreach my $fn ($zip->memberNames())
657    {
658      print "unz551-glibc.zip: $fn\n";
659    }
660
661
662=head1 AUTHOR
663
664Scott Gifford <sgifford@suspectclass.com>
665
666=head1 BUGS
667
668The distinction between tied filehandles and C<IO::Handle>-compatible
669filehandles should be blurrier.  It seems like other file handle
670objects you can freely mix method calls and traditional Perl
671operations, but I can't figure out how to do it.
672
673Many FTP servers don't like frequent connection aborts.  If that's the
674case, try L<Net::FTP::AutoReconnect>, which will hide much of that
675from you.
676
677If the filehandle is tied and created with C<gensym>, C<readline>
678doesn't work with older versions of Perl.  No idea why.
679
680=head1 SEE ALSO
681
682L<Net::FTP>, L<Net::FTP::AutoReconnect>, L<IO::Handle>.
683
684=head1 COPYRIGHT
685
686Copyright (c) 2006 Scott Gifford. All rights reserved.  This program
687is free software; you can redistribute it and/or modify it under the
688same terms as Perl itself.
689
690=cut
691
6921;
693