1package Compress::LZW::Compressor;
2# ABSTRACT: Scaling LZW compressor class
3$Compress::LZW::Compressor::VERSION = '0.04';
4
5
6use Compress::LZW qw(:const);
7
8use Types::Standard qw( Bool Int );
9
10use bytes;
11
12use Moo;
13use namespace::clean;
14
15my $CHECKPOINT_BITS = 10_000;
16
17
18has block_mode => (
19  is      => 'ro',
20  default => 1,
21  isa     => Bool,
22);
23
24
25has max_code_size => ( # max bits
26  is      => 'ro',
27  default => 16,
28  isa     => Type::Tiny->new(
29    parent     => Int,
30    constraint => sub { $_ >= $INIT_CODE_SIZE and $_ < $MASK_BITS },
31    message    => sub { "$_ isn't between $INIT_CODE_SIZE and $MASK_BITS" },
32  ),
33);
34
35
36
37sub compress {
38  my $self = shift;
39  my ( $str ) = @_;
40
41  $self->reset;
42
43  my $bytes_in;
44  my ( $checkpoint, $last_ratio ) = ( 0, 0 );
45
46  my $seen = '';
47
48  for ( 0 .. length($str) ){
49    my $char = substr($str, $_, 1);
50
51    $bytes_in += 1;
52
53    if ( exists $self->{code_table}{ $seen . $char } ){
54      $seen .= $char;
55    }
56    else {
57      $self->_buf_write( $self->{code_table}{ $seen } );
58
59      $self->_new_code( $seen . $char );
60
61      $seen = $char;
62
63      if ( $self->{at_max_code} and $self->block_mode ){
64        if ( ! defined $checkpoint ){
65          $checkpoint = $self->{buf_pos} + $CHECKPOINT_BITS;
66        }
67        elsif ( $bytes_in > $checkpoint ){
68          my $ratio   = $bytes_in / ( $self->{buf_pos} / 8 );
69          $last_ratio = 0 if !defined $last_ratio;
70
71
72          if ( $ratio >= $last_ratio ){
73            $last_ratio = $ratio;
74            $checkpoint = $self->{buf_pos} + $CHECKPOINT_BITS;
75          }
76          elsif ( $ratio < $last_ratio ){
77            # warn "Resetting code table ( $ratio < $last_ratio :: $self->{buf_pos} )";
78            $self->_buf_write( $RESET_CODE );
79            $self->_code_reset;
80
81            undef $checkpoint;
82            undef $last_ratio;
83          }
84        }
85      }
86
87    }
88  }
89
90  $self->_buf_write( $self->{code_table}{ $seen } );  #last bit of input
91  # warn "final ratio: " . ($bytes_in / ($self->{buf_pos} / 8));
92
93  return $self->{buf};
94}
95
96
97
98sub reset {
99  my $self = shift;
100
101  # replace buf with empty buffer after magic bytes
102  $self->{buf}     = $MAGIC
103    . chr( $self->max_code_size | ( $self->block_mode ? $MASK_BLOCK : 0 ) );
104
105  $self->{buf_pos} = length($self->{buf}) * 8;
106
107  $self->_code_reset;
108}
109
110
111sub _code_reset {
112  my $self = shift;
113
114  $self->{code_table} = {
115    map { chr($_) => $_ } 0 .. 255
116  };
117
118  $self->{at_max_code}   = 0;
119  $self->{code_size}     = $INIT_CODE_SIZE;
120  $self->{next_code}     = $self->block_mode ? $BL_INIT_CODE : $NR_INIT_CODE;
121  $self->{next_increase} = 2 ** $self->{code_size};
122
123}
124
125sub _new_code {
126  my $self = shift;
127  my ( $word ) = @_;
128
129  if ( $self->{next_code} >= $self->{next_increase} ){
130
131    if ( $self->{code_size} < $self->{max_code_size} ){
132      $self->{code_size}     += 1;
133      $self->{next_increase} *= 2;
134    }
135    else {
136      $self->{at_max_code} = 1;
137    }
138  }
139
140  if ( $self->{at_max_code} == 0 ){
141    $self->{code_table}{ $word } = $self->{next_code};
142    $self->{next_code} += 1;
143  }
144
145}
146
147sub _buf_write {
148  my $self = shift;
149  my ( $code ) = @_;
150
151  return unless defined $code;
152
153  if ( $code > ( 2 ** $self->{code_size} ) ){
154    die "Code value $code too high for current code size $self->{code_size}";
155  }
156
157  my $wpos = $self->{buf_pos};
158  # if ( $code == $RESET_CODE ){
159  #   warn "wrote a reset code ($RESET_CODE) at $wpos";
160  # }
161  #~ warn "write $code \tat $code_size bits\toffset $wpos (byte ".int($wpos/8) . ')';
162
163  if ( $code == 1 ){
164    vec( $self->{buf}, $wpos, 1 ) = 1;
165  }
166  else {
167    for my $bit ( 0 .. ($self->{code_size} - 1) ){
168
169      if ( ($code >> $bit) & 1 ){
170        vec( $self->{buf}, $wpos + $bit, 1 ) = 1;
171      }
172    }
173  }
174
175  $self->{buf_pos} += $self->{code_size};
176}
177
1781;
179
180__END__
181
182=pod
183
184=encoding UTF-8
185
186=head1 NAME
187
188Compress::LZW::Compressor - Scaling LZW compressor class
189
190=head1 VERSION
191
192version 0.04
193
194=head1 SYNOPSIS
195
196 use Compress::LZW::Compressor;
197
198 my $c   = Compress::LZW::Compressor->new();
199 my $lzw = $c->compress( $some_data );
200
201=head1 ATTRIBUTES
202
203=head2 block_mode
204
205Default: 1
206
207Block mode is a feature added to LZW by compress(1). Once the maximum code size
208has been reached, if the compression ratio falls (NYI) the code table and code
209size can be reset, and a code indicating this reset is embedded in the output
210stream.
211
212May be 0 or 1.
213
214=head2 max_code_size
215
216Default: 16
217
218Maximum size in bits that code output may scale up to.  This value is stored in
219byte 3 of the compressed output so the decompressor can also stop at the same
220size automatically.  Maximum code size implies a maximum code table size of C<2
221** max_code_size>, which can be emptied and restarted mid-stream in
222L</block_mode>.
223
224May be between 9 and 31, inclusive.  The default of 16 is the largest supported
225by compress(1), but Compress::LZW can handle up to 31 bits.
226
227=head1 METHODS
228
229=head2 compress ( $input )
230
231Compresses $input with the current settings and returns the result.
232
233=head2 reset ()
234
235Resets the compressor state for another round of compression. Automatically
236called at the beginning of compress().
237
238Resets the following internal state: Code table, next code number, code size,
239output buffer, buffer position
240
241=head1 AUTHOR
242
243Meredith Howard <mhoward@cpan.org>
244
245=head1 COPYRIGHT AND LICENSE
246
247This software is copyright (c) 2015 by Meredith Howard.
248
249This is free software; you can redistribute it and/or modify it under
250the same terms as the Perl 5 programming language system itself.
251
252=cut
253