1package Net::FTP::RetrHandle; 2our $VERSION = '0.2'; 3 4use warnings; 5use strict; 6 7use constant DEFAULT_MAX_SKIPSIZE => 1024 * 1024 * 2; 8use constant DEFAULT_BLOCKSIZE => 10240; # Net::FTP's default 9 10use base 'IO::Seekable'; 11# We don't use base 'IO::Handle'; it currently confuses Archive::Zip. 12 13use Carp; 14use Scalar::Util; 15 16 17=head1 NAME 18 19Net::FTP::RetrHandle - Tied or IO::Handle-compatible interface to a file retrieved by FTP 20 21=head1 SYNOPSIS 22 23Provides a file reading interface for reading all or parts of files 24located on a remote FTP server, including emulation of C<seek> and 25support for downloading only the parts of the file requested. 26 27=head1 DESCRIPTION 28 29Support for skipping the beginning of the file is implemented with the 30FTP C<REST> command, which starts a retrieval at any point in the 31file. Support for skipping the end of the file is implemented with 32the FTP C<ABOR> command, which stops the transfer. With these two 33commands and some careful tracking of the current file position, we're 34able to reliably emulate a C<seek/read> pair, and get only the parts 35of the file that are actually read. 36 37This was originally designed for use with 38L<Archive::Zip|Archive::Zip>; it's reliable enough that the table of 39contents and individual files can be extracted from a remote ZIP 40archive without downloading the whole thing. See L<EXAMPLES> below. 41 42An interface compatible with L<IO::Handle|IO::Handle> is provided, 43along with a C<tie>-based interface. 44 45Remember that an FTP server can only do one thing at a time, so make 46sure to C<close> your connection before asking the FTP server to do 47nything else. 48 49=head1 CONSTRUCTOR 50 51=head2 new ( $ftp, $filename, options... ) 52 53Creates a new L<IO::Handle|IO::Handle>-compatible object to fetch all 54or parts of C<$filename> using the FTP connection C<$ftp>. 55 56Available options: 57 58=over 4 59 60=item MaxSkipSize => $size 61 62If we need to move forward in a file or close the connection, 63sometimes it's faster to just read the bytes we don't need than to 64abort the connection and restart. This setting tells how many 65unnecessary bytes we're willing to read rather than abort. An 66appropriate setting depends on the speed of transferring files and the 67speed of reconnecting to the server. 68 69=item BlockSize => $size 70 71When doing buffered reads, how many bytes to read at once. The 72default is the same as the default for L<Net::FTP|Net::FTP>, so it's 73generally best to leave it alone. 74 75=item AlreadyBinary => $bool 76 77If set to a true value, we assume the server is already in binary 78mode, and don't try to set it. 79 80=back 81 82=cut 83use constant USAGE => "Usage: Net::FTP::RetrHandle\->new(ftp => \$ftp_obj, filename => \$filename)\n"; 84sub new 85{ 86 my $class = shift; 87 my $ftp = shift 88 or croak USAGE; 89 my $filename = shift 90 or croak USAGE; 91 my $self = { MaxSkipSize => DEFAULT_MAX_SKIPSIZE, 92 BlockSize => DEFAULT_BLOCKSIZE, 93 @_, 94 ftp => $ftp, filename => $filename, 95 pos => 0, nextpos => 0}; 96 $self->{size} = $self->{ftp}->size($self->{filename}) 97 or return undef; 98 $self->{ftp}->binary() 99 unless ($self->{AlreadyBinary}); 100 101 bless $self,$class; 102} 103 104=head1 METHODS 105 106Most of the methods implemented behave exactly like those from 107L<IO::Handle|IO::Handle>. 108 109These methods are implemented: C<binmode>, C<clearerr>, C<close>, C<eof>, 110C<error>, C<getc>, C<getline>, C<getlines>, C<getpos>, C<read>, 111C<seek>, C<setpos>, C<sysseek>, C<tell>, C<ungetc>, C<opened>. 112 113=cut ; 114 115sub opened { 1; } 116 117sub seek 118{ 119 my $self = shift; 120 my $pos = shift || 0; 121 my $whence = shift || 0; 122 warn " SEEK: self=$self, pos=$pos, whence=$whence\n" 123 if ($ENV{DEBUG}); 124 my $curpos = $self->tell(); 125 my $newpos = _newpos($self->tell(),$self->{size},$pos,$whence); 126 my $ret; 127 if ($newpos == $curpos) 128 { 129 return $curpos; 130 } 131 elsif (defined($self->{_buf}) and ($newpos > $curpos) and ($newpos < ($curpos + length($self->{_buf})))) 132 { 133 # Just seeking within the buffer (or not at all) 134 substr($self->{_buf},0,$newpos - $curpos,''); 135 $ret = $newpos; 136 } 137 else 138 { 139 $ret = $self->sysseek($newpos,0); 140 $self->{_buf} = ''; 141 } 142 return $ret; 143} 144 145sub _newpos 146{ 147 148 my($curpos,$size,$pos,$whence)=@_; 149 if ($whence == 0) # seek_set 150 { 151 return $pos; 152 } 153 elsif ($whence == 1) # seek_cur 154 { 155 return $curpos + $pos; 156 } 157 elsif ($whence == 2) # seek_end 158 { 159 return $size + $pos; 160 } 161 else 162 { 163 die "Invalid value $whence for whence!"; 164 } 165} 166 167sub sysseek 168{ 169 my $self = shift; 170 my $pos = shift || 0; 171 my $whence = shift || 0; 172 warn "SYSSEEK: self=$self, pos=$pos, whence=$whence\n" 173 if ($ENV{DEBUG}); 174 my $newpos = _newpos($self->{nextpos},$self->{size},$pos,$whence); 175 176 $self->{eof}=undef; 177 return $self->{nextpos}=$newpos; 178} 179 180sub tell 181{ 182 my $self = shift; 183 return $self->{nextpos} - (defined($self->{_buf}) ? length($self->{_buf}) : 0); 184} 185 186# WARNING: ASCII mode probably breaks seek. 187sub binmode 188{ 189 my $self = shift; 190 my $mode = shift || ':raw'; 191 return if (defined($self->{curmode}) && ($self->{curmode} eq $mode)); 192 if (defined($mode) and $mode eq ':crlf') 193 { 194 $self->_finish_connection(); 195 $self->{ftp}->ascii() 196 or return $self->seterr(); 197 } 198 else 199 { 200 $self->_finish_connection(); 201 $self->{ftp}->binary() 202 or return $self->seterr(); 203 } 204 $self->{curmode} = $mode; 205} 206 207sub _min 208{ 209 return $_[0] < $_[1] ? $_[0] : $_[1]; 210} 211 212sub _max 213{ 214 return $_[0] > $_[1] ? $_[0] : $_[1]; 215} 216 217sub read 218{ 219 my $self = shift; 220# return $self->sysread(@_); 221 222 my(undef,$len,$offset)=@_; 223 $offset ||= 0; 224 warn "READ(buf,$len,$offset)\n" 225 if ($ENV{DEBUG}); 226 227 if (!defined($self->{_buf}) || length($self->{_buf}) <= 0) 228 { 229 $self->sysread($self->{_buf},_max($len,$self->{BlockSize})) 230 or return 0; 231 } 232 elsif (length($self->{_buf}) < $len) 233 { 234 $self->sysread($self->{_buf},_max($len-length($self->{_buf}),$self->{BlockSize}),length($self->{_buf})); 235 } 236 my $ret = _min($len,length($self->{_buf})); 237 if (!defined($_[0])) { $_[0] = '' } 238 substr($_[0],$offset) = substr($self->{_buf},0,$len,''); 239 $self->{read_count}++; 240 241 return $ret; 242} 243 244sub sysread 245{ 246 my $self = shift; 247 if ($self->{eof}) 248 { 249 return 0; 250 } 251 252 my(undef,$len,$offset) = @_; 253 $offset ||= 0; 254 255 warn "SYSREAD(buf,$len,$offset)\n" 256 if ($ENV{DEBUG}); 257 if ($self->{nextpos} >= $self->{size}) 258 { 259 $self->{eof} = 1; 260 $self->{pos} = $self->{nextpos}; 261 return 0; 262 } 263 264 if ($self->{pos} != $self->{nextpos}) 265 { 266 # They seeked. 267 if ($self->{ftp_running}) 268 { 269 warn "Seek detected, nextpos=$self->{nextpos}, pos=$self->{pos}, MaxSkipSize=$self->{MaxSkipSize}\n" 270 if ($ENV{DEBUG}); 271 if ($self->{nextpos} > $self->{pos} and ($self->{nextpos} - $self->{pos}) < $self->{MaxSkipSize}) 272 { 273 my $br = $self->{nextpos}-$self->{pos}; 274 warn "Reading $br bytes to skip ahead\n" 275 if ($ENV{DEBUG}); 276 my $junkbuff; 277 while ($br > 0) 278 { 279 warn "Trying to read $br more bytes\n" 280 if ($ENV{DEBUG}); 281 my $b = $self->{ftp_data}->read($junkbuff,$br); 282 if ($b == 0) 283 { 284 $self->_at_eof(); 285 return 0; 286 } 287 elsif (!defined($b) || $b < 0) 288 { 289 return $self->seterr(); 290 } 291 else 292 { 293 $br -= $b; 294 } 295 } 296 $self->{pos}=$self->{nextpos}; 297 } 298 else 299 { 300 warn "Aborting connection to move to new position\n" 301 if ($ENV{DEBUG}); 302 $self->_finish_connection(); 303 } 304 } 305 } 306 307 if (!$self->{ftp_running}) 308 { 309 $self->{ftp}->restart($self->{nextpos}); 310 $self->{ftp_data} = $self->{ftp}->retr($self->{filename}) 311 or return $self->seterr(); 312 $self->{ftp_running} = 1; 313 $self->{pos}=$self->{nextpos}; 314 } 315 316 my $tmpbuf; 317 my $rb = $self->{ftp_data}->read($tmpbuf,$len); 318 if ($rb == 0) 319 { 320 $self->_at_eof(); 321 return 0; 322 } 323 elsif (!defined($rb) || $rb < 0) 324 { 325 return $self->seterr(); 326 } 327 328 if (!defined($_[0])) { $_[0] = '' } 329 substr($_[0],$offset) = $tmpbuf; 330 $self->{pos} += $rb; 331 $self->{nextpos} += $rb; 332 333 $self->{sysread_count}++; 334 $rb; 335} 336 337sub _at_eof 338{ 339 my $self = shift; 340 $self->{eof}=1; 341 $self->_finish_connection(); 342# $self->{ftp_data}->_close(); 343 $self->{ftp_running} = $self->{ftp_data} = undef; 344} 345 346sub _finish_connection 347{ 348 my $self = shift; 349 warn "_finish_connection\n" 350 if ($ENV{DEBUG}); 351 return unless ($self->{ftp_running}); 352 353 if ($self->{size} - $self->{pos} < $self->{MaxSkipSize}) 354 { 355 warn "Skipping " . ($self->{size}-$self->{pos}) . " bytes\n" 356 if ($ENV{DEBUG}); 357 my $junkbuff; 358 my $br; 359 while(($br = $self->{ftp_data}->read($junkbuff,8192))) 360 { 361 # Read until EOF or error 362 } 363 defined($br) 364 or $self->seterr(); 365 } 366 warn "Shutting down existing FTP DATA session...\n" 367 if ($ENV{DEBUG}); 368 369 my $closeret; 370 { 371 eval { 372 $closeret = $self->{ftp_data}->close(); 373 }; 374 # Work around a timeout bug in Net::FTP 375 if ($@ && $@ =~ /^Timeout /) 376 { 377 warn "Timeout closing connection, retrying...\n" 378 if ($ENV{DEBUG}); 379 select(undef,undef,undef,1); 380 redo; 381 } 382 } 383 384 $self->{ftp_running} = $self->{ftp_data} = undef; 385 return $closeret ? 1 : $self->seterr(); 386} 387 388sub write 389{ 390 die "Only reading currently supported"; 391} 392 393sub close 394{ 395 my $self = shift; 396 return $self->{ftp_data} ? $self->_finish_connection() 397 : 1; 398} 399 400sub eof 401{ 402 my $self = shift; 403 if ($self->{eof}) 404 { 405 return 1; 406 } 407 408 my $c = $self->getc; 409 if (!defined($c)) 410 { 411 return 1; 412 } 413 $self->ungetc(ord($c)); 414 return undef; 415} 416 417sub getc 418{ 419 my $self = shift; 420 my $c; 421 my $rb = $self->read($c,1); 422 if ($rb < 1) 423 { 424 return undef; 425 } 426 return $c; 427} 428 429sub ungetc 430{ 431 my $self = shift; 432 # Note that $c is the ordinal value of a character, not the 433 # character itself (for some reason) 434 my($c)=@_; 435 $self->{_buf} = chr($c) . $self->{_buf}; 436} 437 438sub getline 439{ 440 my $self = shift; 441 if (!defined($/)) 442 { 443 my $buf; 444 while($self->read($buf,$self->{BlockSize},length($buf)) > 0) 445 { 446 # Keep going 447 } 448 return $buf; 449 } 450 elsif (ref($/) && looks_like_number ${$/} ) 451 { 452 my $buf; 453 $self->read($buf,${$/}) 454 or return undef; 455 return $buf; 456 } 457 458 my $rs; 459 if ($/ eq '') 460 { 461 $rs = "\n\n"; 462 } 463 else 464 { 465 $rs = $/; 466 } 467 my $eol; 468 if (!defined($self->{_buf})) { $self->{_buf} = '' } 469 while (($eol=index($self->{_buf},$rs)) < $[) 470 { 471 if ($self->{eof}) 472 { 473 # return what's left 474 if (length($self->{_buf}) == 0) 475 { 476 return undef; 477 } 478 else 479 { 480 return substr($self->{_buf},0,length($self->{_buf}),''); 481 } 482 } 483 else 484 { 485 $self->sysread($self->{_buf},$self->{BlockSize},length($self->{_buf})); 486 } 487 } 488 # OK, we should have a match. 489 my $tmpbuf = substr($self->{_buf},0,$eol+length($rs),''); 490 while ($/ eq '' and substr($self->{_buf},0,1) eq "\n") 491 { 492 substr($self->{_buf},0,1)=''; 493 } 494 return $tmpbuf; 495} 496 497sub getlines 498{ 499 my $self = shift; 500 my @lines; 501 my $line; 502 while (defined($line = $self->getline())) 503 { 504 push(@lines,$line); 505 } 506 @lines; 507} 508 509sub error 510{ 511 return undef; 512} 513 514sub seterr 515{ 516 my $self = shift; 517 $self->{_error} = 1; 518 return undef; 519} 520 521sub clearerr 522{ 523 my $self = shift; 524 $self->{_error} = undef; 525 return 0; 526} 527 528sub getpos 529{ 530 my $self = shift; 531 return $self->tell(); 532} 533 534sub setpos 535{ 536 my $self = shift; 537 return $self->seek(@_); 538} 539 540sub DESTROY 541{ 542 my $self = shift; 543 if (UNIVERSAL::isa($self,'GLOB')) 544 { 545 $self = tied *$self 546 or die "$self not tied?..."; 547 } 548 if ($self->{ftp_data}) 549 { 550 $self->_finish_connection(); 551 } 552 warn "sysread called ".$self->{sysread_count}." times.\n" 553 if ($ENV{DEBUG}); 554} 555 556=head1 TIED INTERFACE 557 558Instead of a L<IO::Handle|IO::Handle>-compatible interface, you can 559use a C<tie>-based interface to use the standard Perl I/O operators. 560You can use it like this: 561 562 use Net::FTP::RetrHandle; 563 # Create FTP object in $ftp 564 # Store filename in $filename 565 tie *FH, 'Net::FTP::RetrHandle', $ftp, $filename 566 or die "Error in tie!\n"; 567 568=cut 569 ; 570sub TIEHANDLE 571{ 572 my $class = shift; 573 my $obj = $class->new(@_); 574 $obj; 575} 576 577sub READ 578{ 579 my $self = shift; 580 $self->read(@_); 581} 582 583sub READLINE 584{ 585 my $self = shift; 586 return wantarray ? $self->getlines(@_) 587 : $self->getline(@_); 588} 589 590sub GETC 591{ 592 my $self = shift; 593 return $self->getc(@_); 594} 595 596sub SEEK 597{ 598 my $self = shift; 599 return $self->seek(@_); 600} 601 602sub SYSSEEK 603{ 604 my $self = shift; 605 return $self->sysseek(@_); 606} 607 608sub TELL 609{ 610 my $self = shift; 611 return $self->tell(); 612} 613 614sub CLOSE 615{ 616 my $self = shift; 617 return $self->close(@_); 618} 619 620sub EOF 621{ 622 my $self = shift; 623 return $self->eof(@_); 624 625} 626sub UNTIE 627{ 628 tied($_[0])->close(@_); 629} 630 631=head1 EXAMPLE 632 633Here's an example of listing a Zip file without downloading the whole 634thing: 635 636 #!/usr/bin/perl 637 638 use warnings; 639 use strict; 640 641 use Net::FTP; 642 use Net::FTP::AutoReconnect; 643 use Net::FTP::RetrHandle; 644 use Archive::Zip; 645 646 my $ftp = Net::FTP::AutoReconnect->new("ftp.info-zip.com", Debug => $ENV{DEBUG}) 647 or die "connect error\n"; 648 $ftp->login('anonymous','example@example.com') 649 or die "login error\n"; 650 $ftp->cwd('/pub/infozip/UNIX/LINUX') 651 or die "cwd error\n"; 652 my $fh = Net::FTP::RetrHandle->new($ftp,'unz551x-glibc.zip') 653 or die "Couldn't get handle to remote file\n"; 654 my $zip = Archive::Zip->new($fh) 655 or die "Couldn't create Zip object\n"; 656 foreach my $fn ($zip->memberNames()) 657 { 658 print "unz551-glibc.zip: $fn\n"; 659 } 660 661 662=head1 AUTHOR 663 664Scott Gifford <sgifford@suspectclass.com> 665 666=head1 BUGS 667 668The distinction between tied filehandles and C<IO::Handle>-compatible 669filehandles should be blurrier. It seems like other file handle 670objects you can freely mix method calls and traditional Perl 671operations, but I can't figure out how to do it. 672 673Many FTP servers don't like frequent connection aborts. If that's the 674case, try L<Net::FTP::AutoReconnect>, which will hide much of that 675from you. 676 677If the filehandle is tied and created with C<gensym>, C<readline> 678doesn't work with older versions of Perl. No idea why. 679 680=head1 SEE ALSO 681 682L<Net::FTP>, L<Net::FTP::AutoReconnect>, L<IO::Handle>. 683 684=head1 COPYRIGHT 685 686Copyright (c) 2006 Scott Gifford. All rights reserved. This program 687is free software; you can redistribute it and/or modify it under the 688same terms as Perl itself. 689 690=cut 691 6921; 693