1# IO::Zlib.pm
2#
3# Copyright (c) 1998-2004 Tom Hughes <tom@compton.nu>.
4# All rights reserved. This program is free software; you can redistribute
5# it and/or modify it under the same terms as Perl itself.
6
7package IO::Zlib;
8
9=head1 NAME
10
11IO::Zlib - IO:: style interface to L<Compress::Zlib>
12
13=head1 SYNOPSIS
14
15With any version of Perl 5 you can use the basic OO interface:
16
17    use IO::Zlib;
18
19    $fh = new IO::Zlib;
20    if ($fh->open("file.gz", "rb")) {
21        print <$fh>;
22        $fh->close;
23    }
24
25    $fh = IO::Zlib->new("file.gz", "wb9");
26    if (defined $fh) {
27        print $fh "bar\n";
28        $fh->close;
29    }
30
31    $fh = IO::Zlib->new("file.gz", "rb");
32    if (defined $fh) {
33        print <$fh>;
34        undef $fh;       # automatically closes the file
35    }
36
37With Perl 5.004 you can also use the TIEHANDLE interface to access
38compressed files just like ordinary files:
39
40    use IO::Zlib;
41
42    tie *FILE, 'IO::Zlib', "file.gz", "wb";
43    print FILE "line 1\nline2\n";
44
45    tie *FILE, 'IO::Zlib', "file.gz", "rb";
46    while (<FILE>) { print "LINE: ", $_ };
47
48=head1 DESCRIPTION
49
50C<IO::Zlib> provides an IO:: style interface to L<Compress::Zlib> and
51hence to gzip/zlib compressed files. It provides many of the same methods
52as the L<IO::Handle> interface.
53
54Starting from IO::Zlib version 1.02, IO::Zlib can also use an
55external F<gzip> command.  The default behaviour is to try to use
56an external F<gzip> if no C<Compress::Zlib> can be loaded, unless
57explicitly disabled by
58
59    use IO::Zlib qw(:gzip_external 0);
60
61If explicitly enabled by
62
63    use IO::Zlib qw(:gzip_external 1);
64
65then the external F<gzip> is used B<instead> of C<Compress::Zlib>.
66
67=head1 CONSTRUCTOR
68
69=over 4
70
71=item new ( [ARGS] )
72
73Creates an C<IO::Zlib> object. If it receives any parameters, they are
74passed to the method C<open>; if the open fails, the object is destroyed.
75Otherwise, it is returned to the caller.
76
77=back
78
79=head1 OBJECT METHODS
80
81=over 4
82
83=item open ( FILENAME, MODE )
84
85C<open> takes two arguments. The first is the name of the file to open
86and the second is the open mode. The mode can be anything acceptable to
87L<Compress::Zlib> and by extension anything acceptable to I<zlib> (that
88basically means POSIX fopen() style mode strings plus an optional number
89to indicate the compression level).
90
91=item opened
92
93Returns true if the object currently refers to a opened file.
94
95=item close
96
97Close the file associated with the object and disassociate
98the file from the handle.
99Done automatically on destroy.
100
101=item getc
102
103Return the next character from the file, or undef if none remain.
104
105=item getline
106
107Return the next line from the file, or undef on end of string.
108Can safely be called in an array context.
109Currently ignores $/ ($INPUT_RECORD_SEPARATOR or $RS when L<English>
110is in use) and treats lines as delimited by "\n".
111
112=item getlines
113
114Get all remaining lines from the file.
115It will croak() if accidentally called in a scalar context.
116
117=item print ( ARGS... )
118
119Print ARGS to the  file.
120
121=item read ( BUF, NBYTES, [OFFSET] )
122
123Read some bytes from the file.
124Returns the number of bytes actually read, 0 on end-of-file, undef on error.
125
126=item eof
127
128Returns true if the handle is currently positioned at end of file?
129
130=item seek ( OFFSET, WHENCE )
131
132Seek to a given position in the stream.
133Not yet supported.
134
135=item tell
136
137Return the current position in the stream, as a numeric offset.
138Not yet supported.
139
140=item setpos ( POS )
141
142Set the current position, using the opaque value returned by C<getpos()>.
143Not yet supported.
144
145=item getpos ( POS )
146
147Return the current position in the string, as an opaque object.
148Not yet supported.
149
150=back
151
152=head1 USING THE EXTERNAL GZIP
153
154If the external F<gzip> is used, the following C<open>s are used:
155
156    open(FH, "gzip -dc $filename |")  # for read opens
157    open(FH, " | gzip > $filename")   # for write opens
158
159You can modify the 'commands' for example to hardwire
160an absolute path by e.g.
161
162    use IO::Zlib ':gzip_read_open'  => '/some/where/gunzip -c %s |';
163    use IO::Zlib ':gzip_write_open' => '| /some/where/gzip.exe > %s';
164
165The C<%s> is expanded to be the filename (C<sprintf> is used, so be
166careful to escape any other C<%> signs).  The 'commands' are checked
167for sanity - they must contain the C<%s>, and the read open must end
168with the pipe sign, and the write open must begin with the pipe sign.
169
170=head1 CLASS METHODS
171
172=over 4
173
174=item has_Compress_Zlib
175
176Returns true if C<Compress::Zlib> is available.  Note that this does
177not mean that C<Compress::Zlib> is being used: see L</gzip_external>
178and L<gzip_used>.
179
180=item gzip_external
181
182Undef if an external F<gzip> B<can> be used if C<Compress::Zlib> is
183not available (see L</has_Compress_Zlib>), true if an external F<gzip>
184is explicitly used, false if an external F<gzip> must not be used.
185See L</gzip_used>.
186
187=item gzip_used
188
189True if an external F<gzip> is being used, false if not.
190
191=item gzip_read_open
192
193Return the 'command' being used for opening a file for reading using an
194external F<gzip>.
195
196=item gzip_write_open
197
198Return the 'command' being used for opening a file for writing using an
199external F<gzip>.
200
201=back
202
203=head1 DIAGNOSTICS
204
205=over 4
206
207=item IO::Zlib::getlines: must be called in list context
208
209If you want read lines, you must read in list context.
210
211=item IO::Zlib::gzopen_external: mode '...' is illegal
212
213Use only modes 'rb' or 'wb' or /wb[1-9]/.
214
215=item IO::Zlib::import: '...' is illegal
216
217The known import symbols are the C<:gzip_external>, C<:gzip_read_open>,
218and C<:gzip_write_open>.  Anything else is not recognized.
219
220=item IO::Zlib::import: ':gzip_external' requires an argument
221
222The C<:gzip_external> requires one boolean argument.
223
224=item IO::Zlib::import: 'gzip_read_open' requires an argument
225
226The C<:gzip_external> requires one string argument.
227
228=item IO::Zlib::import: 'gzip_read' '...' is illegal
229
230The C<:gzip_read_open> argument must end with the pipe sign (|)
231and have the C<%s> for the filename.  See L</"USING THE EXTERNAL GZIP">.
232
233=item IO::Zlib::import: 'gzip_write_open' requires an argument
234
235The C<:gzip_external> requires one string argument.
236
237=item IO::Zlib::import: 'gzip_write_open' '...' is illegal
238
239The C<:gzip_write_open> argument must begin with the pipe sign (|)
240and have the C<%s> for the filename.  An output redirect (>) is also
241often a good idea, depending on your operating system shell syntax.
242See L</"USING THE EXTERNAL GZIP">.
243
244=item IO::Zlib::import: no Compress::Zlib and no external gzip
245
246Given that we failed to load C<Compress::Zlib> and that the use of
247 an external F<gzip> was disabled, IO::Zlib has not much chance of working.
248
249=item IO::Zlib::open: needs a filename
250
251No filename, no open.
252
253=item IO::Zlib::READ: NBYTES must be specified
254
255We must know how much to read.
256
257=item IO::Zlib::WRITE: too long LENGTH
258
259The LENGTH must be less than or equal to the buffer size.
260
261=back
262
263=head1 SEE ALSO
264
265L<perlfunc>,
266L<perlop/"I/O Operators">,
267L<IO::Handle>,
268L<Compress::Zlib>
269
270=head1 HISTORY
271
272Created by Tom Hughes E<lt>F<tom@compton.nu>E<gt>.
273
274Support for external gzip added by Jarkko Hietaniemi E<lt>F<jhi@iki.fi>E<gt>.
275
276=head1 COPYRIGHT
277
278Copyright (c) 1998-2004 Tom Hughes E<lt>F<tom@compton.nu>E<gt>.
279All rights reserved. This program is free software; you can redistribute
280it and/or modify it under the same terms as Perl itself.
281
282=cut
283
284require 5.006;
285
286use strict;
287use warnings;
288
289use Carp;
290use Fcntl qw(SEEK_SET);
291use Symbol;
292use Tie::Handle;
293
294our $VERSION = "1.11";
295our $AUTOLOAD;
296our @ISA = qw(Tie::Handle);
297
298my $has_Compress_Zlib;
299my $gzip_external;
300my $gzip_used;
301my $gzip_read_open = "gzip -dc %s |";
302my $gzip_write_open = "| gzip > %s";
303my $aliased;
304
305BEGIN {
306    eval { require Compress::Zlib };
307    $has_Compress_Zlib = $@ || $Compress::Zlib::VERSION < 2.000 ? 0 : 1;
308}
309
310sub has_Compress_Zlib
311{
312    $has_Compress_Zlib;
313}
314
315sub gzip_external
316{
317    $gzip_external;
318}
319
320sub gzip_used
321{
322    $gzip_used;
323}
324
325sub gzip_read_open
326{
327    $gzip_read_open;
328}
329
330sub gzip_write_open
331{
332    $gzip_write_open;
333}
334
335sub can_gunzip
336{
337    $has_Compress_Zlib || $gzip_external;
338}
339
340sub _import
341{
342    my $import = shift;
343
344    while (@_)
345    {
346        if ($_[0] eq ':gzip_external')
347        {
348            shift;
349
350            if (@_)
351            {
352                $gzip_external = shift;
353            }
354            else
355            {
356                croak "$import: ':gzip_external' requires an argument";
357            }
358        }
359        elsif ($_[0] eq ':gzip_read_open')
360        {
361            shift;
362
363            if (@_)
364            {
365                $gzip_read_open = shift;
366
367                croak "$import: ':gzip_read_open' '$gzip_read_open' is illegal"
368                    unless $gzip_read_open =~ /^.+%s.+\|\s*$/;
369            }
370            else
371            {
372                croak "$import: ':gzip_read_open' requires an argument";
373            }
374        }
375        elsif ($_[0] eq ':gzip_write_open')
376        {
377            shift;
378
379            if (@_)
380            {
381                $gzip_write_open = shift;
382
383                croak "$import: ':gzip_write_open' '$gzip_read_open' is illegal"
384                    unless $gzip_write_open =~ /^\s*\|.+%s.*$/;
385            }
386            else
387            {
388                croak "$import: ':gzip_write_open' requires an argument";
389            }
390        }
391        else
392        {
393            last;
394        }
395    }
396
397    return @_;
398}
399
400sub _alias
401{
402    my $import = shift;
403
404    if ($gzip_external || (!$has_Compress_Zlib && !defined($gzip_external)))
405    {
406        require IO::Handle;
407
408        undef *gzopen;
409        *gzopen = \&gzopen_external;
410
411        *IO::Handle::gzread = \&gzread_external;
412        *IO::Handle::gzwrite = \&gzwrite_external;
413        *IO::Handle::gzreadline = \&gzreadline_external;
414        *IO::Handle::gzeof = \&gzeof_external;
415        *IO::Handle::gzclose = \&gzclose_external;
416
417        $gzip_used = 1;
418    }
419    elsif ($has_Compress_Zlib)
420    {
421        *gzopen = \&Compress::Zlib::gzopen;
422        *gzread = \&Compress::Zlib::gzread;
423        *gzwrite = \&Compress::Zlib::gzwrite;
424        *gzreadline = \&Compress::Zlib::gzreadline;
425        *gzeof = \&Compress::Zlib::gzeof;
426    }
427    else
428    {
429        croak "$import: no Compress::Zlib and no external gzip";
430    }
431
432    $aliased = 1;
433}
434
435sub import
436{
437    my $class = shift;
438    my $import = "IO::Zlib::import";
439
440    if (@_)
441    {
442        if (_import($import, @_))
443        {
444            croak "$import: '@_' is illegal";
445        }
446    }
447
448    _alias($import);
449}
450
451sub TIEHANDLE
452{
453    my $class = shift;
454    my @args = @_;
455
456    my $self = bless {}, $class;
457
458    return @args ? $self->OPEN(@args) : $self;
459}
460
461sub DESTROY
462{
463}
464
465sub OPEN
466{
467    my $self = shift;
468    my $filename = shift;
469    my $mode = shift;
470
471    croak "IO::Zlib::open: needs a filename" unless defined($filename);
472
473    $self->{'file'} = gzopen($filename,$mode);
474
475    return defined($self->{'file'}) ? $self : undef;
476}
477
478sub CLOSE
479{
480    my $self = shift;
481
482    return undef unless defined($self->{'file'});
483
484    my $status = $self->{'file'}->gzclose();
485
486    delete $self->{'file'};
487
488    return ($status == 0) ? 1 : undef;
489}
490
491sub READ
492{
493    my $self = shift;
494    my $bufref = \$_[0];
495    my $nbytes = $_[1];
496    my $offset = $_[2] || 0;
497
498    croak "IO::Zlib::READ: NBYTES must be specified" unless defined($nbytes);
499
500    $$bufref = "" unless defined($$bufref);
501
502    my $bytesread = $self->{'file'}->gzread(substr($$bufref,$offset),$nbytes);
503
504    return undef if $bytesread < 0;
505
506    return $bytesread;
507}
508
509sub READLINE
510{
511    my $self = shift;
512
513    my $line;
514
515    return () if $self->{'file'}->gzreadline($line) <= 0;
516
517    return $line unless wantarray;
518
519    my @lines = $line;
520
521    while ($self->{'file'}->gzreadline($line) > 0)
522    {
523        push @lines, $line;
524    }
525
526    return @lines;
527}
528
529sub WRITE
530{
531    my $self = shift;
532    my $buf = shift;
533    my $length = shift;
534    my $offset = shift;
535
536    croak "IO::Zlib::WRITE: too long LENGTH" unless $offset + $length <= length($buf);
537
538    return $self->{'file'}->gzwrite(substr($buf,$offset,$length));
539}
540
541sub EOF
542{
543    my $self = shift;
544
545    return $self->{'file'}->gzeof();
546}
547
548sub FILENO
549{
550    return undef;
551}
552
553sub new
554{
555    my $class = shift;
556    my @args = @_;
557
558    _alias("new", @_) unless $aliased; # Some call new IO::Zlib directly...
559
560    my $self = gensym();
561
562    tie *{$self}, $class, @args;
563
564    return tied(${$self}) ? bless $self, $class : undef;
565}
566
567sub getline
568{
569    my $self = shift;
570
571    return scalar tied(*{$self})->READLINE();
572}
573
574sub getlines
575{
576    my $self = shift;
577
578    croak "IO::Zlib::getlines: must be called in list context"
579        unless wantarray;
580
581    return tied(*{$self})->READLINE();
582}
583
584sub opened
585{
586    my $self = shift;
587
588    return defined tied(*{$self})->{'file'};
589}
590
591sub AUTOLOAD
592{
593    my $self = shift;
594
595    $AUTOLOAD =~ s/.*:://;
596    $AUTOLOAD =~ tr/a-z/A-Z/;
597
598    return tied(*{$self})->$AUTOLOAD(@_);
599}
600
601sub gzopen_external
602{
603    my $filename = shift;
604    my $mode = shift;
605    my $fh = IO::Handle->new();
606
607    if ($mode =~ /r/)
608    {
609        # Because someone will try to read ungzipped files
610        # with this we peek and verify the signature.  Yes,
611        # this means that we open the file twice (if it is
612        # gzipped).
613        # Plenty of race conditions exist in this code, but
614        # the alternative would be to capture the stderr of
615        # gzip and parse it, which would be a portability nightmare.
616        if (-e $filename && open($fh, $filename))
617        {
618            binmode $fh;
619
620            my $sig;
621            my $rdb = read($fh, $sig, 2);
622
623            if ($rdb == 2 && $sig eq "\x1F\x8B")
624            {
625                my $ropen = sprintf($gzip_read_open, $filename);
626
627                if (open($fh, $ropen))
628                {
629                    binmode $fh;
630
631                    return $fh;
632                }
633                else
634                {
635                    return undef;
636                }
637            }
638
639            seek($fh, 0, SEEK_SET) or
640                die "IO::Zlib: open('$filename', 'r'): seek: $!";
641
642            return $fh;
643        }
644        else
645        {
646            return undef;
647        }
648    }
649    elsif ($mode =~ /w/)
650    {
651        my $level = $mode =~ /([1-9])/ ? "-$1" : "";
652
653        # To maximize portability we would need to open
654        # two filehandles here, one for "| gzip $level"
655        # and another for "> $filename", and then when
656        # writing copy bytes from the first to the second.
657        # We are using IO::Handle objects for now, however,
658        # and they can only contain one stream at a time.
659        my $wopen = sprintf($gzip_write_open, $filename);
660
661        if (open($fh, $wopen))
662        {
663            $fh->autoflush(1);
664            binmode $fh;
665
666            return $fh;
667        }
668        else
669        {
670            return undef;
671        }
672    }
673    else
674    {
675        croak "IO::Zlib::gzopen_external: mode '$mode' is illegal";
676    }
677
678    return undef;
679}
680
681sub gzread_external
682{
683    my $file = shift;
684    my $bufref = \$_[0];
685    my $nbytes = $_[1] || 4096;
686
687    # Use read() instead of sysread() because people may
688    # mix reads and readlines, and we don't want to mess
689    # the stdio buffering.  See also gzreadline_external()
690    # and gzwrite_external().
691    my $nread = read($file, $$bufref, $nbytes);
692
693    return defined $nread ? $nread : -1;
694}
695
696sub gzwrite_external
697{
698    my $file = shift;
699    my $buf = shift;
700
701    # Using syswrite() is okay (cf. gzread_external())
702    # since the bytes leave this process and buffering
703    # is therefore not an issue.
704    my $nwrote = syswrite($file, $buf);
705
706    return defined $nwrote ? $nwrote : -1;
707}
708
709sub gzreadline_external
710{
711    my $file = shift;
712    my $bufref = \$_[0];
713
714    # See the comment in gzread_external().
715    $$bufref = readline($file);
716
717    return defined $$bufref ? length($$bufref) : -1;
718}
719
720sub gzeof_external
721{
722    my $file = shift;
723
724    return eof($file);
725}
726
727sub gzclose_external
728{
729    my $file = shift;
730
731    close($file);
732
733    # I am not entirely certain why this is needed but it seems
734    # the above close() always fails (as if the stream would have
735    # been already closed - something to do with using external
736    # processes via pipes?)
737    return 0;
738}
739
7401;
741