1# IO::Zlib.pm 2# 3# Copyright (c) 1998-2004 Tom Hughes <tom@compton.nu>. 4# All rights reserved. This program is free software; you can redistribute 5# it and/or modify it under the same terms as Perl itself. 6 7package IO::Zlib; 8 9=head1 NAME 10 11IO::Zlib - IO:: style interface to L<Compress::Zlib> 12 13=head1 SYNOPSIS 14 15With any version of Perl 5 you can use the basic OO interface: 16 17 use IO::Zlib; 18 19 $fh = new IO::Zlib; 20 if ($fh->open("file.gz", "rb")) { 21 print <$fh>; 22 $fh->close; 23 } 24 25 $fh = IO::Zlib->new("file.gz", "wb9"); 26 if (defined $fh) { 27 print $fh "bar\n"; 28 $fh->close; 29 } 30 31 $fh = IO::Zlib->new("file.gz", "rb"); 32 if (defined $fh) { 33 print <$fh>; 34 undef $fh; # automatically closes the file 35 } 36 37With Perl 5.004 you can also use the TIEHANDLE interface to access 38compressed files just like ordinary files: 39 40 use IO::Zlib; 41 42 tie *FILE, 'IO::Zlib', "file.gz", "wb"; 43 print FILE "line 1\nline2\n"; 44 45 tie *FILE, 'IO::Zlib', "file.gz", "rb"; 46 while (<FILE>) { print "LINE: ", $_ }; 47 48=head1 DESCRIPTION 49 50C<IO::Zlib> provides an IO:: style interface to L<Compress::Zlib> and 51hence to gzip/zlib compressed files. It provides many of the same methods 52as the L<IO::Handle> interface. 53 54Starting from IO::Zlib version 1.02, IO::Zlib can also use an 55external F<gzip> command. The default behaviour is to try to use 56an external F<gzip> if no C<Compress::Zlib> can be loaded, unless 57explicitly disabled by 58 59 use IO::Zlib qw(:gzip_external 0); 60 61If explicitly enabled by 62 63 use IO::Zlib qw(:gzip_external 1); 64 65then the external F<gzip> is used B<instead> of C<Compress::Zlib>. 66 67=head1 CONSTRUCTOR 68 69=over 4 70 71=item new ( [ARGS] ) 72 73Creates an C<IO::Zlib> object. If it receives any parameters, they are 74passed to the method C<open>; if the open fails, the object is destroyed. 75Otherwise, it is returned to the caller. 76 77=back 78 79=head1 OBJECT METHODS 80 81=over 4 82 83=item open ( FILENAME, MODE ) 84 85C<open> takes two arguments. The first is the name of the file to open 86and the second is the open mode. The mode can be anything acceptable to 87L<Compress::Zlib> and by extension anything acceptable to I<zlib> (that 88basically means POSIX fopen() style mode strings plus an optional number 89to indicate the compression level). 90 91=item opened 92 93Returns true if the object currently refers to a opened file. 94 95=item close 96 97Close the file associated with the object and disassociate 98the file from the handle. 99Done automatically on destroy. 100 101=item getc 102 103Return the next character from the file, or undef if none remain. 104 105=item getline 106 107Return the next line from the file, or undef on end of string. 108Can safely be called in an array context. 109Currently ignores $/ ($INPUT_RECORD_SEPARATOR or $RS when L<English> 110is in use) and treats lines as delimited by "\n". 111 112=item getlines 113 114Get all remaining lines from the file. 115It will croak() if accidentally called in a scalar context. 116 117=item print ( ARGS... ) 118 119Print ARGS to the file. 120 121=item read ( BUF, NBYTES, [OFFSET] ) 122 123Read some bytes from the file. 124Returns the number of bytes actually read, 0 on end-of-file, undef on error. 125 126=item eof 127 128Returns true if the handle is currently positioned at end of file? 129 130=item seek ( OFFSET, WHENCE ) 131 132Seek to a given position in the stream. 133Not yet supported. 134 135=item tell 136 137Return the current position in the stream, as a numeric offset. 138Not yet supported. 139 140=item setpos ( POS ) 141 142Set the current position, using the opaque value returned by C<getpos()>. 143Not yet supported. 144 145=item getpos ( POS ) 146 147Return the current position in the string, as an opaque object. 148Not yet supported. 149 150=back 151 152=head1 USING THE EXTERNAL GZIP 153 154If the external F<gzip> is used, the following C<open>s are used: 155 156 open(FH, "gzip -dc $filename |") # for read opens 157 open(FH, " | gzip > $filename") # for write opens 158 159You can modify the 'commands' for example to hardwire 160an absolute path by e.g. 161 162 use IO::Zlib ':gzip_read_open' => '/some/where/gunzip -c %s |'; 163 use IO::Zlib ':gzip_write_open' => '| /some/where/gzip.exe > %s'; 164 165The C<%s> is expanded to be the filename (C<sprintf> is used, so be 166careful to escape any other C<%> signs). The 'commands' are checked 167for sanity - they must contain the C<%s>, and the read open must end 168with the pipe sign, and the write open must begin with the pipe sign. 169 170=head1 CLASS METHODS 171 172=over 4 173 174=item has_Compress_Zlib 175 176Returns true if C<Compress::Zlib> is available. Note that this does 177not mean that C<Compress::Zlib> is being used: see L</gzip_external> 178and L<gzip_used>. 179 180=item gzip_external 181 182Undef if an external F<gzip> B<can> be used if C<Compress::Zlib> is 183not available (see L</has_Compress_Zlib>), true if an external F<gzip> 184is explicitly used, false if an external F<gzip> must not be used. 185See L</gzip_used>. 186 187=item gzip_used 188 189True if an external F<gzip> is being used, false if not. 190 191=item gzip_read_open 192 193Return the 'command' being used for opening a file for reading using an 194external F<gzip>. 195 196=item gzip_write_open 197 198Return the 'command' being used for opening a file for writing using an 199external F<gzip>. 200 201=back 202 203=head1 DIAGNOSTICS 204 205=over 4 206 207=item IO::Zlib::getlines: must be called in list context 208 209If you want read lines, you must read in list context. 210 211=item IO::Zlib::gzopen_external: mode '...' is illegal 212 213Use only modes 'rb' or 'wb' or /wb[1-9]/. 214 215=item IO::Zlib::import: '...' is illegal 216 217The known import symbols are the C<:gzip_external>, C<:gzip_read_open>, 218and C<:gzip_write_open>. Anything else is not recognized. 219 220=item IO::Zlib::import: ':gzip_external' requires an argument 221 222The C<:gzip_external> requires one boolean argument. 223 224=item IO::Zlib::import: 'gzip_read_open' requires an argument 225 226The C<:gzip_external> requires one string argument. 227 228=item IO::Zlib::import: 'gzip_read' '...' is illegal 229 230The C<:gzip_read_open> argument must end with the pipe sign (|) 231and have the C<%s> for the filename. See L</"USING THE EXTERNAL GZIP">. 232 233=item IO::Zlib::import: 'gzip_write_open' requires an argument 234 235The C<:gzip_external> requires one string argument. 236 237=item IO::Zlib::import: 'gzip_write_open' '...' is illegal 238 239The C<:gzip_write_open> argument must begin with the pipe sign (|) 240and have the C<%s> for the filename. An output redirect (>) is also 241often a good idea, depending on your operating system shell syntax. 242See L</"USING THE EXTERNAL GZIP">. 243 244=item IO::Zlib::import: no Compress::Zlib and no external gzip 245 246Given that we failed to load C<Compress::Zlib> and that the use of 247 an external F<gzip> was disabled, IO::Zlib has not much chance of working. 248 249=item IO::Zlib::open: needs a filename 250 251No filename, no open. 252 253=item IO::Zlib::READ: NBYTES must be specified 254 255We must know how much to read. 256 257=item IO::Zlib::WRITE: too long LENGTH 258 259The LENGTH must be less than or equal to the buffer size. 260 261=back 262 263=head1 SEE ALSO 264 265L<perlfunc>, 266L<perlop/"I/O Operators">, 267L<IO::Handle>, 268L<Compress::Zlib> 269 270=head1 HISTORY 271 272Created by Tom Hughes E<lt>F<tom@compton.nu>E<gt>. 273 274Support for external gzip added by Jarkko Hietaniemi E<lt>F<jhi@iki.fi>E<gt>. 275 276=head1 COPYRIGHT 277 278Copyright (c) 1998-2004 Tom Hughes E<lt>F<tom@compton.nu>E<gt>. 279All rights reserved. This program is free software; you can redistribute 280it and/or modify it under the same terms as Perl itself. 281 282=cut 283 284require 5.006; 285 286use strict; 287use warnings; 288 289use Carp; 290use Fcntl qw(SEEK_SET); 291use Symbol; 292use Tie::Handle; 293 294our $VERSION = "1.15"; 295our $AUTOLOAD; 296our @ISA = qw(Tie::Handle); 297 298my $has_Compress_Zlib; 299my $gzip_external; 300my $gzip_used; 301my $gzip_read_open = "gzip -dc %s |"; 302my $gzip_write_open = "| gzip > %s"; 303my $aliased; 304 305BEGIN { 306 eval { require Compress::Zlib }; 307 $has_Compress_Zlib = $@ || $Compress::Zlib::VERSION < 2.000 ? 0 : 1; 308} 309 310sub has_Compress_Zlib 311{ 312 $has_Compress_Zlib; 313} 314 315sub gzip_external 316{ 317 $gzip_external; 318} 319 320sub gzip_used 321{ 322 $gzip_used; 323} 324 325sub gzip_read_open 326{ 327 $gzip_read_open; 328} 329 330sub gzip_write_open 331{ 332 $gzip_write_open; 333} 334 335sub can_gunzip 336{ 337 $has_Compress_Zlib || $gzip_external; 338} 339 340sub _import 341{ 342 my $import = shift; 343 344 while (@_) 345 { 346 if ($_[0] eq ':gzip_external') 347 { 348 shift; 349 350 if (@_) 351 { 352 $gzip_external = shift; 353 } 354 else 355 { 356 croak "$import: ':gzip_external' requires an argument"; 357 } 358 } 359 elsif ($_[0] eq ':gzip_read_open') 360 { 361 shift; 362 363 if (@_) 364 { 365 $gzip_read_open = shift; 366 367 croak "$import: ':gzip_read_open' '$gzip_read_open' is illegal" 368 unless $gzip_read_open =~ /^.+%s.+\|\s*$/; 369 } 370 else 371 { 372 croak "$import: ':gzip_read_open' requires an argument"; 373 } 374 } 375 elsif ($_[0] eq ':gzip_write_open') 376 { 377 shift; 378 379 if (@_) 380 { 381 $gzip_write_open = shift; 382 383 croak "$import: ':gzip_write_open' '$gzip_read_open' is illegal" 384 unless $gzip_write_open =~ /^\s*\|.+%s.*$/; 385 } 386 else 387 { 388 croak "$import: ':gzip_write_open' requires an argument"; 389 } 390 } 391 else 392 { 393 last; 394 } 395 } 396 397 return @_; 398} 399 400sub _alias 401{ 402 my $import = shift; 403 404 if ($gzip_external || (!$has_Compress_Zlib && !defined($gzip_external))) 405 { 406 require IO::Handle; 407 408 undef *gzopen; 409 *gzopen = \&gzopen_external; 410 411 *IO::Handle::gzread = \&gzread_external; 412 *IO::Handle::gzwrite = \&gzwrite_external; 413 *IO::Handle::gzreadline = \&gzreadline_external; 414 *IO::Handle::gzeof = \&gzeof_external; 415 *IO::Handle::gzclose = \&gzclose_external; 416 417 $gzip_used = 1; 418 } 419 elsif ($has_Compress_Zlib) 420 { 421 *gzopen = \&Compress::Zlib::gzopen; 422 *gzread = \&Compress::Zlib::gzread; 423 *gzwrite = \&Compress::Zlib::gzwrite; 424 *gzreadline = \&Compress::Zlib::gzreadline; 425 *gzeof = \&Compress::Zlib::gzeof; 426 } 427 else 428 { 429 croak "$import: no Compress::Zlib and no external gzip"; 430 } 431 432 $aliased = 1; 433} 434 435sub import 436{ 437 my $class = shift; 438 my $import = "IO::Zlib::import"; 439 440 if (@_) 441 { 442 if (_import($import, @_)) 443 { 444 croak "$import: '@_' is illegal"; 445 } 446 } 447 448 _alias($import); 449} 450 451sub TIEHANDLE 452{ 453 my $class = shift; 454 my @args = @_; 455 456 my $self = bless {}, $class; 457 458 return @args ? $self->OPEN(@args) : $self; 459} 460 461sub DESTROY 462{ 463} 464 465sub OPEN 466{ 467 my $self = shift; 468 my $filename = shift; 469 my $mode = shift; 470 471 croak "IO::Zlib::open: needs a filename" unless defined($filename); 472 473 $self->{'file'} = gzopen($filename,$mode); 474 475 return defined($self->{'file'}) ? $self : undef; 476} 477 478sub CLOSE 479{ 480 my $self = shift; 481 482 return undef unless defined($self->{'file'}); 483 484 my $status = $self->{'file'}->gzclose(); 485 486 delete $self->{'file'}; 487 488 return ($status == 0) ? 1 : undef; 489} 490 491sub READ 492{ 493 my $self = shift; 494 my $bufref = \$_[0]; 495 my $nbytes = $_[1]; 496 my $offset = $_[2] || 0; 497 498 croak "IO::Zlib::READ: NBYTES must be specified" unless defined($nbytes); 499 500 $$bufref = "" unless defined($$bufref); 501 502 my $bytesread = $self->{'file'}->gzread(substr($$bufref,$offset),$nbytes); 503 504 return undef if $bytesread < 0; 505 506 return $bytesread; 507} 508 509sub READLINE 510{ 511 my $self = shift; 512 513 my $line; 514 515 return () if $self->{'file'}->gzreadline($line) <= 0; 516 517 return $line unless wantarray; 518 519 my @lines = $line; 520 521 while ($self->{'file'}->gzreadline($line) > 0) 522 { 523 push @lines, $line; 524 } 525 526 return @lines; 527} 528 529sub WRITE 530{ 531 my $self = shift; 532 my $buf = shift; 533 my $length = shift; 534 my $offset = shift; 535 536 croak "IO::Zlib::WRITE: too long LENGTH" unless $offset + $length <= length($buf); 537 538 return $self->{'file'}->gzwrite(substr($buf,$offset,$length)); 539} 540 541sub EOF 542{ 543 my $self = shift; 544 545 return $self->{'file'}->gzeof(); 546} 547 548sub FILENO 549{ 550 return undef; 551} 552 553sub new 554{ 555 my $class = shift; 556 my @args = @_; 557 558 _alias("new", @_) unless $aliased; # Some call new IO::Zlib directly... 559 560 my $self = gensym(); 561 562 tie *{$self}, $class, @args; 563 564 return tied(${$self}) ? bless $self, $class : undef; 565} 566 567sub getline 568{ 569 my $self = shift; 570 571 return scalar tied(*{$self})->READLINE(); 572} 573 574sub getlines 575{ 576 my $self = shift; 577 578 croak "IO::Zlib::getlines: must be called in list context" 579 unless wantarray; 580 581 return tied(*{$self})->READLINE(); 582} 583 584sub opened 585{ 586 my $self = shift; 587 588 return defined tied(*{$self})->{'file'}; 589} 590 591sub AUTOLOAD 592{ 593 my $self = shift; 594 595 $AUTOLOAD =~ s/.*:://; 596 $AUTOLOAD =~ tr/a-z/A-Z/; 597 598 return tied(*{$self})->$AUTOLOAD(@_); 599} 600 601sub gzopen_external 602{ 603 my $filename = shift; 604 my $mode = shift; 605 my $fh = IO::Handle->new(); 606 607 if ($mode =~ /r/) 608 { 609 # Because someone will try to read ungzipped files 610 # with this we peek and verify the signature. Yes, 611 # this means that we open the file twice (if it is 612 # gzipped). 613 # Plenty of race conditions exist in this code, but 614 # the alternative would be to capture the stderr of 615 # gzip and parse it, which would be a portability nightmare. 616 if (-e $filename && open($fh, $filename)) 617 { 618 binmode $fh; 619 620 my $sig; 621 my $rdb = read($fh, $sig, 2); 622 623 if ($rdb == 2 && $sig eq "\x1F\x8B") 624 { 625 my $ropen = sprintf($gzip_read_open, $filename); 626 627 if (open($fh, $ropen)) 628 { 629 binmode $fh; 630 631 return $fh; 632 } 633 else 634 { 635 return undef; 636 } 637 } 638 639 seek($fh, 0, SEEK_SET) or 640 die "IO::Zlib: open('$filename', 'r'): seek: $!"; 641 642 return $fh; 643 } 644 else 645 { 646 return undef; 647 } 648 } 649 elsif ($mode =~ /w/) 650 { 651 my $level = $mode =~ /([1-9])/ ? "-$1" : ""; 652 653 # To maximize portability we would need to open 654 # two filehandles here, one for "| gzip $level" 655 # and another for "> $filename", and then when 656 # writing copy bytes from the first to the second. 657 # We are using IO::Handle objects for now, however, 658 # and they can only contain one stream at a time. 659 my $wopen = sprintf($gzip_write_open, $filename); 660 661 if (open($fh, $wopen)) 662 { 663 $fh->autoflush(1); 664 binmode $fh; 665 666 return $fh; 667 } 668 else 669 { 670 return undef; 671 } 672 } 673 else 674 { 675 croak "IO::Zlib::gzopen_external: mode '$mode' is illegal"; 676 } 677 678 return undef; 679} 680 681sub gzread_external 682{ 683 my $file = shift; 684 my $bufref = \$_[0]; 685 my $nbytes = $_[1] || 4096; 686 687 # Use read() instead of sysread() because people may 688 # mix reads and readlines, and we don't want to mess 689 # the stdio buffering. See also gzreadline_external() 690 # and gzwrite_external(). 691 my $nread = read($file, $$bufref, $nbytes); 692 693 return defined $nread ? $nread : -1; 694} 695 696sub gzwrite_external 697{ 698 my $file = shift; 699 my $buf = shift; 700 701 # Using syswrite() is okay (cf. gzread_external()) 702 # since the bytes leave this process and buffering 703 # is therefore not an issue. 704 my $nwrote = syswrite($file, $buf); 705 706 return defined $nwrote ? $nwrote : -1; 707} 708 709sub gzreadline_external 710{ 711 my $file = shift; 712 my $bufref = \$_[0]; 713 714 # See the comment in gzread_external(). 715 $$bufref = readline($file); 716 717 return defined $$bufref ? length($$bufref) : -1; 718} 719 720sub gzeof_external 721{ 722 my $file = shift; 723 724 return eof($file); 725} 726 727sub gzclose_external 728{ 729 my $file = shift; 730 731 close($file); 732 733 # I am not entirely certain why this is needed but it seems 734 # the above close() always fails (as if the stream would have 735 # been already closed - something to do with using external 736 # processes via pipes?) 737 return 0; 738} 739 7401; 741