1package IO::Uncompress::Bunzip2 ; 2 3use strict ; 4use warnings; 5use bytes; 6 7use IO::Compress::Base::Common 2.024 qw(:Status createSelfTiedObject); 8 9use IO::Uncompress::Base 2.024 ; 10use IO::Uncompress::Adapter::Bunzip2 2.024 ; 11 12require Exporter ; 13our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $Bunzip2Error); 14 15$VERSION = '2.024'; 16$Bunzip2Error = ''; 17 18@ISA = qw( Exporter IO::Uncompress::Base ); 19@EXPORT_OK = qw( $Bunzip2Error bunzip2 ) ; 20#%EXPORT_TAGS = %IO::Uncompress::Base::EXPORT_TAGS ; 21push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; 22#Exporter::export_ok_tags('all'); 23 24 25sub new 26{ 27 my $class = shift ; 28 my $obj = createSelfTiedObject($class, \$Bunzip2Error); 29 30 $obj->_create(undef, 0, @_); 31} 32 33sub bunzip2 34{ 35 my $obj = createSelfTiedObject(undef, \$Bunzip2Error); 36 return $obj->_inf(@_); 37} 38 39sub getExtraParams 40{ 41 my $self = shift ; 42 43 use IO::Compress::Base::Common 2.024 qw(:Parse); 44 45 return ( 46 'Verbosity' => [1, 1, Parse_boolean, 0], 47 'Small' => [1, 1, Parse_boolean, 0], 48 ); 49} 50 51 52sub ckParams 53{ 54 my $self = shift ; 55 my $got = shift ; 56 57 return 1; 58} 59 60sub mkUncomp 61{ 62 my $self = shift ; 63 my $got = shift ; 64 65 my $magic = $self->ckMagic() 66 or return 0; 67 68 *$self->{Info} = $self->readHeader($magic) 69 or return undef ; 70 71 my $Small = $got->value('Small'); 72 my $Verbosity = $got->value('Verbosity'); 73 74 my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Bunzip2::mkUncompObject( 75 $Small, $Verbosity); 76 77 return $self->saveErrorString(undef, $errstr, $errno) 78 if ! defined $obj; 79 80 *$self->{Uncomp} = $obj; 81 82 return 1; 83 84} 85 86 87sub ckMagic 88{ 89 my $self = shift; 90 91 my $magic ; 92 $self->smartReadExact(\$magic, 4); 93 94 *$self->{HeaderPending} = $magic ; 95 96 return $self->HeaderError("Header size is " . 97 4 . " bytes") 98 if length $magic != 4; 99 100 return $self->HeaderError("Bad Magic.") 101 if ! isBzip2Magic($magic) ; 102 103 104 *$self->{Type} = 'bzip2'; 105 return $magic; 106} 107 108sub readHeader 109{ 110 my $self = shift; 111 my $magic = shift ; 112 113 $self->pushBack($magic); 114 *$self->{HeaderPending} = ''; 115 116 117 return { 118 'Type' => 'bzip2', 119 'FingerprintLength' => 4, 120 'HeaderLength' => 4, 121 'TrailerLength' => 0, 122 'Header' => '$magic' 123 }; 124 125} 126 127sub chkTrailer 128{ 129 return STATUS_OK; 130} 131 132 133 134sub isBzip2Magic 135{ 136 my $buffer = shift ; 137 return $buffer =~ /^BZh\d$/; 138} 139 1401 ; 141 142__END__ 143 144 145=head1 NAME 146 147IO::Uncompress::Bunzip2 - Read bzip2 files/buffers 148 149=head1 SYNOPSIS 150 151 use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; 152 153 my $status = bunzip2 $input => $output [,OPTS] 154 or die "bunzip2 failed: $Bunzip2Error\n"; 155 156 my $z = new IO::Uncompress::Bunzip2 $input [OPTS] 157 or die "bunzip2 failed: $Bunzip2Error\n"; 158 159 $status = $z->read($buffer) 160 $status = $z->read($buffer, $length) 161 $status = $z->read($buffer, $length, $offset) 162 $line = $z->getline() 163 $char = $z->getc() 164 $char = $z->ungetc() 165 $char = $z->opened() 166 167 $data = $z->trailingData() 168 $status = $z->nextStream() 169 $data = $z->getHeaderInfo() 170 $z->tell() 171 $z->seek($position, $whence) 172 $z->binmode() 173 $z->fileno() 174 $z->eof() 175 $z->close() 176 177 $Bunzip2Error ; 178 179 # IO::File mode 180 181 <$z> 182 read($z, $buffer); 183 read($z, $buffer, $length); 184 read($z, $buffer, $length, $offset); 185 tell($z) 186 seek($z, $position, $whence) 187 binmode($z) 188 fileno($z) 189 eof($z) 190 close($z) 191 192=head1 DESCRIPTION 193 194This module provides a Perl interface that allows the reading of 195bzip2 files/buffers. 196 197For writing bzip2 files/buffers, see the companion module IO::Compress::Bzip2. 198 199=head1 Functional Interface 200 201A top-level function, C<bunzip2>, is provided to carry out 202"one-shot" uncompression between buffers and/or files. For finer 203control over the uncompression process, see the L</"OO Interface"> 204section. 205 206 use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; 207 208 bunzip2 $input => $output [,OPTS] 209 or die "bunzip2 failed: $Bunzip2Error\n"; 210 211The functional interface needs Perl5.005 or better. 212 213=head2 bunzip2 $input => $output [, OPTS] 214 215C<bunzip2> expects at least two parameters, C<$input> and C<$output>. 216 217=head3 The C<$input> parameter 218 219The parameter, C<$input>, is used to define the source of 220the compressed data. 221 222It can take one of the following forms: 223 224=over 5 225 226=item A filename 227 228If the C<$input> parameter is a simple scalar, it is assumed to be a 229filename. This file will be opened for reading and the input data 230will be read from it. 231 232=item A filehandle 233 234If the C<$input> parameter is a filehandle, the input data will be 235read from it. 236The string '-' can be used as an alias for standard input. 237 238=item A scalar reference 239 240If C<$input> is a scalar reference, the input data will be read 241from C<$$input>. 242 243=item An array reference 244 245If C<$input> is an array reference, each element in the array must be a 246filename. 247 248The input data will be read from each file in turn. 249 250The complete array will be walked to ensure that it only 251contains valid filenames before any data is uncompressed. 252 253=item An Input FileGlob string 254 255If C<$input> is a string that is delimited by the characters "<" and ">" 256C<bunzip2> will assume that it is an I<input fileglob string>. The 257input is the list of files that match the fileglob. 258 259If the fileglob does not match any files ... 260 261See L<File::GlobMapper|File::GlobMapper> for more details. 262 263=back 264 265If the C<$input> parameter is any other type, C<undef> will be returned. 266 267=head3 The C<$output> parameter 268 269The parameter C<$output> is used to control the destination of the 270uncompressed data. This parameter can take one of these forms. 271 272=over 5 273 274=item A filename 275 276If the C<$output> parameter is a simple scalar, it is assumed to be a 277filename. This file will be opened for writing and the uncompressed 278data will be written to it. 279 280=item A filehandle 281 282If the C<$output> parameter is a filehandle, the uncompressed data 283will be written to it. 284The string '-' can be used as an alias for standard output. 285 286=item A scalar reference 287 288If C<$output> is a scalar reference, the uncompressed data will be 289stored in C<$$output>. 290 291=item An Array Reference 292 293If C<$output> is an array reference, the uncompressed data will be 294pushed onto the array. 295 296=item An Output FileGlob 297 298If C<$output> is a string that is delimited by the characters "<" and ">" 299C<bunzip2> will assume that it is an I<output fileglob string>. The 300output is the list of files that match the fileglob. 301 302When C<$output> is an fileglob string, C<$input> must also be a fileglob 303string. Anything else is an error. 304 305=back 306 307If the C<$output> parameter is any other type, C<undef> will be returned. 308 309=head2 Notes 310 311When C<$input> maps to multiple compressed files/buffers and C<$output> is 312a single file/buffer, after uncompression C<$output> will contain a 313concatenation of all the uncompressed data from each of the input 314files/buffers. 315 316=head2 Optional Parameters 317 318Unless specified below, the optional parameters for C<bunzip2>, 319C<OPTS>, are the same as those used with the OO interface defined in the 320L</"Constructor Options"> section below. 321 322=over 5 323 324=item C<< AutoClose => 0|1 >> 325 326This option applies to any input or output data streams to 327C<bunzip2> that are filehandles. 328 329If C<AutoClose> is specified, and the value is true, it will result in all 330input and/or output filehandles being closed once C<bunzip2> has 331completed. 332 333This parameter defaults to 0. 334 335=item C<< BinModeOut => 0|1 >> 336 337When writing to a file or filehandle, set C<binmode> before writing to the 338file. 339 340Defaults to 0. 341 342=item C<< Append => 0|1 >> 343 344TODO 345 346=item C<< MultiStream => 0|1 >> 347 348If the input file/buffer contains multiple compressed data streams, this 349option will uncompress the whole lot as a single data stream. 350 351Defaults to 0. 352 353=item C<< TrailingData => $scalar >> 354 355Returns the data, if any, that is present immediately after the compressed 356data stream once uncompression is complete. 357 358This option can be used when there is useful information immediately 359following the compressed data stream, and you don't know the length of the 360compressed data stream. 361 362If the input is a buffer, C<trailingData> will return everything from the 363end of the compressed data stream to the end of the buffer. 364 365If the input is a filehandle, C<trailingData> will return the data that is 366left in the filehandle input buffer once the end of the compressed data 367stream has been reached. You can then use the filehandle to read the rest 368of the input file. 369 370Don't bother using C<trailingData> if the input is a filename. 371 372If you know the length of the compressed data stream before you start 373uncompressing, you can avoid having to use C<trailingData> by setting the 374C<InputLength> option. 375 376=back 377 378=head2 Examples 379 380To read the contents of the file C<file1.txt.bz2> and write the 381compressed data to the file C<file1.txt>. 382 383 use strict ; 384 use warnings ; 385 use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; 386 387 my $input = "file1.txt.bz2"; 388 my $output = "file1.txt"; 389 bunzip2 $input => $output 390 or die "bunzip2 failed: $Bunzip2Error\n"; 391 392To read from an existing Perl filehandle, C<$input>, and write the 393uncompressed data to a buffer, C<$buffer>. 394 395 use strict ; 396 use warnings ; 397 use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; 398 use IO::File ; 399 400 my $input = new IO::File "<file1.txt.bz2" 401 or die "Cannot open 'file1.txt.bz2': $!\n" ; 402 my $buffer ; 403 bunzip2 $input => \$buffer 404 or die "bunzip2 failed: $Bunzip2Error\n"; 405 406To uncompress all files in the directory "/my/home" that match "*.txt.bz2" and store the compressed data in the same directory 407 408 use strict ; 409 use warnings ; 410 use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; 411 412 bunzip2 '</my/home/*.txt.bz2>' => '</my/home/#1.txt>' 413 or die "bunzip2 failed: $Bunzip2Error\n"; 414 415and if you want to compress each file one at a time, this will do the trick 416 417 use strict ; 418 use warnings ; 419 use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; 420 421 for my $input ( glob "/my/home/*.txt.bz2" ) 422 { 423 my $output = $input; 424 $output =~ s/.bz2// ; 425 bunzip2 $input => $output 426 or die "Error compressing '$input': $Bunzip2Error\n"; 427 } 428 429=head1 OO Interface 430 431=head2 Constructor 432 433The format of the constructor for IO::Uncompress::Bunzip2 is shown below 434 435 my $z = new IO::Uncompress::Bunzip2 $input [OPTS] 436 or die "IO::Uncompress::Bunzip2 failed: $Bunzip2Error\n"; 437 438Returns an C<IO::Uncompress::Bunzip2> object on success and undef on failure. 439The variable C<$Bunzip2Error> will contain an error message on failure. 440 441If you are running Perl 5.005 or better the object, C<$z>, returned from 442IO::Uncompress::Bunzip2 can be used exactly like an L<IO::File|IO::File> filehandle. 443This means that all normal input file operations can be carried out with 444C<$z>. For example, to read a line from a compressed file/buffer you can 445use either of these forms 446 447 $line = $z->getline(); 448 $line = <$z>; 449 450The mandatory parameter C<$input> is used to determine the source of the 451compressed data. This parameter can take one of three forms. 452 453=over 5 454 455=item A filename 456 457If the C<$input> parameter is a scalar, it is assumed to be a filename. This 458file will be opened for reading and the compressed data will be read from it. 459 460=item A filehandle 461 462If the C<$input> parameter is a filehandle, the compressed data will be 463read from it. 464The string '-' can be used as an alias for standard input. 465 466=item A scalar reference 467 468If C<$input> is a scalar reference, the compressed data will be read from 469C<$$output>. 470 471=back 472 473=head2 Constructor Options 474 475The option names defined below are case insensitive and can be optionally 476prefixed by a '-'. So all of the following are valid 477 478 -AutoClose 479 -autoclose 480 AUTOCLOSE 481 autoclose 482 483OPTS is a combination of the following options: 484 485=over 5 486 487=item C<< AutoClose => 0|1 >> 488 489This option is only valid when the C<$input> parameter is a filehandle. If 490specified, and the value is true, it will result in the file being closed once 491either the C<close> method is called or the IO::Uncompress::Bunzip2 object is 492destroyed. 493 494This parameter defaults to 0. 495 496=item C<< MultiStream => 0|1 >> 497 498Allows multiple concatenated compressed streams to be treated as a single 499compressed stream. Decompression will stop once either the end of the 500file/buffer is reached, an error is encountered (premature eof, corrupt 501compressed data) or the end of a stream is not immediately followed by the 502start of another stream. 503 504This parameter defaults to 0. 505 506=item C<< Prime => $string >> 507 508This option will uncompress the contents of C<$string> before processing the 509input file/buffer. 510 511This option can be useful when the compressed data is embedded in another 512file/data structure and it is not possible to work out where the compressed 513data begins without having to read the first few bytes. If this is the 514case, the uncompression can be I<primed> with these bytes using this 515option. 516 517=item C<< Transparent => 0|1 >> 518 519If this option is set and the input file/buffer is not compressed data, 520the module will allow reading of it anyway. 521 522In addition, if the input file/buffer does contain compressed data and 523there is non-compressed data immediately following it, setting this option 524will make this module treat the whole file/bufffer as a single data stream. 525 526This option defaults to 1. 527 528=item C<< BlockSize => $num >> 529 530When reading the compressed input data, IO::Uncompress::Bunzip2 will read it in 531blocks of C<$num> bytes. 532 533This option defaults to 4096. 534 535=item C<< InputLength => $size >> 536 537When present this option will limit the number of compressed bytes read 538from the input file/buffer to C<$size>. This option can be used in the 539situation where there is useful data directly after the compressed data 540stream and you know beforehand the exact length of the compressed data 541stream. 542 543This option is mostly used when reading from a filehandle, in which case 544the file pointer will be left pointing to the first byte directly after the 545compressed data stream. 546 547This option defaults to off. 548 549=item C<< Append => 0|1 >> 550 551This option controls what the C<read> method does with uncompressed data. 552 553If set to 1, all uncompressed data will be appended to the output parameter 554of the C<read> method. 555 556If set to 0, the contents of the output parameter of the C<read> method 557will be overwritten by the uncompressed data. 558 559Defaults to 0. 560 561=item C<< Strict => 0|1 >> 562 563This option is a no-op. 564 565=item C<< Small => 0|1 >> 566 567When non-zero this options will make bzip2 use a decompression algorithm 568that uses less memory at the expense of increasing the amount of time 569taken for decompression. 570 571Default is 0. 572 573=back 574 575=head2 Examples 576 577TODO 578 579=head1 Methods 580 581=head2 read 582 583Usage is 584 585 $status = $z->read($buffer) 586 587Reads a block of compressed data (the size the the compressed block is 588determined by the C<Buffer> option in the constructor), uncompresses it and 589writes any uncompressed data into C<$buffer>. If the C<Append> parameter is 590set in the constructor, the uncompressed data will be appended to the 591C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. 592 593Returns the number of uncompressed bytes written to C<$buffer>, zero if eof 594or a negative number on error. 595 596=head2 read 597 598Usage is 599 600 $status = $z->read($buffer, $length) 601 $status = $z->read($buffer, $length, $offset) 602 603 $status = read($z, $buffer, $length) 604 $status = read($z, $buffer, $length, $offset) 605 606Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. 607 608The main difference between this form of the C<read> method and the 609previous one, is that this one will attempt to return I<exactly> C<$length> 610bytes. The only circumstances that this function will not is if end-of-file 611or an IO error is encountered. 612 613Returns the number of uncompressed bytes written to C<$buffer>, zero if eof 614or a negative number on error. 615 616=head2 getline 617 618Usage is 619 620 $line = $z->getline() 621 $line = <$z> 622 623Reads a single line. 624 625This method fully supports the use of of the variable C<$/> (or 626C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to 627determine what constitutes an end of line. Paragraph mode, record mode and 628file slurp mode are all supported. 629 630=head2 getc 631 632Usage is 633 634 $char = $z->getc() 635 636Read a single character. 637 638=head2 ungetc 639 640Usage is 641 642 $char = $z->ungetc($string) 643 644=head2 getHeaderInfo 645 646Usage is 647 648 $hdr = $z->getHeaderInfo(); 649 @hdrs = $z->getHeaderInfo(); 650 651This method returns either a hash reference (in scalar context) or a list 652or hash references (in array context) that contains information about each 653of the header fields in the compressed data stream(s). 654 655=head2 tell 656 657Usage is 658 659 $z->tell() 660 tell $z 661 662Returns the uncompressed file offset. 663 664=head2 eof 665 666Usage is 667 668 $z->eof(); 669 eof($z); 670 671Returns true if the end of the compressed input stream has been reached. 672 673=head2 seek 674 675 $z->seek($position, $whence); 676 seek($z, $position, $whence); 677 678Provides a sub-set of the C<seek> functionality, with the restriction 679that it is only legal to seek forward in the input file/buffer. 680It is a fatal error to attempt to seek backward. 681 682The C<$whence> parameter takes one the usual values, namely SEEK_SET, 683SEEK_CUR or SEEK_END. 684 685Returns 1 on success, 0 on failure. 686 687=head2 binmode 688 689Usage is 690 691 $z->binmode 692 binmode $z ; 693 694This is a noop provided for completeness. 695 696=head2 opened 697 698 $z->opened() 699 700Returns true if the object currently refers to a opened file/buffer. 701 702=head2 autoflush 703 704 my $prev = $z->autoflush() 705 my $prev = $z->autoflush(EXPR) 706 707If the C<$z> object is associated with a file or a filehandle, this method 708returns the current autoflush setting for the underlying filehandle. If 709C<EXPR> is present, and is non-zero, it will enable flushing after every 710write/print operation. 711 712If C<$z> is associated with a buffer, this method has no effect and always 713returns C<undef>. 714 715B<Note> that the special variable C<$|> B<cannot> be used to set or 716retrieve the autoflush setting. 717 718=head2 input_line_number 719 720 $z->input_line_number() 721 $z->input_line_number(EXPR) 722 723Returns the current uncompressed line number. If C<EXPR> is present it has 724the effect of setting the line number. Note that setting the line number 725does not change the current position within the file/buffer being read. 726 727The contents of C<$/> are used to to determine what constitutes a line 728terminator. 729 730=head2 fileno 731 732 $z->fileno() 733 fileno($z) 734 735If the C<$z> object is associated with a file or a filehandle, C<fileno> 736will return the underlying file descriptor. Once the C<close> method is 737called C<fileno> will return C<undef>. 738 739If the C<$z> object is is associated with a buffer, this method will return 740C<undef>. 741 742=head2 close 743 744 $z->close() ; 745 close $z ; 746 747Closes the output file/buffer. 748 749For most versions of Perl this method will be automatically invoked if 750the IO::Uncompress::Bunzip2 object is destroyed (either explicitly or by the 751variable with the reference to the object going out of scope). The 752exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In 753these cases, the C<close> method will be called automatically, but 754not until global destruction of all live objects when the program is 755terminating. 756 757Therefore, if you want your scripts to be able to run on all versions 758of Perl, you should call C<close> explicitly and not rely on automatic 759closing. 760 761Returns true on success, otherwise 0. 762 763If the C<AutoClose> option has been enabled when the IO::Uncompress::Bunzip2 764object was created, and the object is associated with a file, the 765underlying file will also be closed. 766 767=head2 nextStream 768 769Usage is 770 771 my $status = $z->nextStream(); 772 773Skips to the next compressed data stream in the input file/buffer. If a new 774compressed data stream is found, the eof marker will be cleared and C<$.> 775will be reset to 0. 776 777Returns 1 if a new stream was found, 0 if none was found, and -1 if an 778error was encountered. 779 780=head2 trailingData 781 782Usage is 783 784 my $data = $z->trailingData(); 785 786Returns the data, if any, that is present immediately after the compressed 787data stream once uncompression is complete. It only makes sense to call 788this method once the end of the compressed data stream has been 789encountered. 790 791This option can be used when there is useful information immediately 792following the compressed data stream, and you don't know the length of the 793compressed data stream. 794 795If the input is a buffer, C<trailingData> will return everything from the 796end of the compressed data stream to the end of the buffer. 797 798If the input is a filehandle, C<trailingData> will return the data that is 799left in the filehandle input buffer once the end of the compressed data 800stream has been reached. You can then use the filehandle to read the rest 801of the input file. 802 803Don't bother using C<trailingData> if the input is a filename. 804 805If you know the length of the compressed data stream before you start 806uncompressing, you can avoid having to use C<trailingData> by setting the 807C<InputLength> option in the constructor. 808 809=head1 Importing 810 811No symbolic constants are required by this IO::Uncompress::Bunzip2 at present. 812 813=over 5 814 815=item :all 816 817Imports C<bunzip2> and C<$Bunzip2Error>. 818Same as doing this 819 820 use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; 821 822=back 823 824=head1 EXAMPLES 825 826=head2 Working with Net::FTP 827 828See L<IO::Uncompress::Bunzip2::FAQ|IO::Uncompress::Bunzip2::FAQ/"Compressed files and Net::FTP"> 829 830=head1 SEE ALSO 831 832L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress> 833 834L<Compress::Zlib::FAQ|Compress::Zlib::FAQ> 835 836L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>, 837L<Archive::Tar|Archive::Tar>, 838L<IO::Zlib|IO::Zlib> 839 840The primary site for the bzip2 program is F<http://www.bzip.org>. 841 842See the module L<Compress::Bzip2|Compress::Bzip2> 843 844=head1 AUTHOR 845 846This module was written by Paul Marquess, F<pmqs@cpan.org>. 847 848=head1 MODIFICATION HISTORY 849 850See the Changes file. 851 852=head1 COPYRIGHT AND LICENSE 853 854Copyright (c) 2005-2008 Paul Marquess. All rights reserved. 855 856This program is free software; you can redistribute it and/or 857modify it under the same terms as Perl itself. 858 859