1package Compress::LZW::Compressor; 2# ABSTRACT: Scaling LZW compressor class 3$Compress::LZW::Compressor::VERSION = '0.04'; 4 5 6use Compress::LZW qw(:const); 7 8use Types::Standard qw( Bool Int ); 9 10use bytes; 11 12use Moo; 13use namespace::clean; 14 15my $CHECKPOINT_BITS = 10_000; 16 17 18has block_mode => ( 19 is => 'ro', 20 default => 1, 21 isa => Bool, 22); 23 24 25has max_code_size => ( # max bits 26 is => 'ro', 27 default => 16, 28 isa => Type::Tiny->new( 29 parent => Int, 30 constraint => sub { $_ >= $INIT_CODE_SIZE and $_ < $MASK_BITS }, 31 message => sub { "$_ isn't between $INIT_CODE_SIZE and $MASK_BITS" }, 32 ), 33); 34 35 36 37sub compress { 38 my $self = shift; 39 my ( $str ) = @_; 40 41 $self->reset; 42 43 my $bytes_in; 44 my ( $checkpoint, $last_ratio ) = ( 0, 0 ); 45 46 my $seen = ''; 47 48 for ( 0 .. length($str) ){ 49 my $char = substr($str, $_, 1); 50 51 $bytes_in += 1; 52 53 if ( exists $self->{code_table}{ $seen . $char } ){ 54 $seen .= $char; 55 } 56 else { 57 $self->_buf_write( $self->{code_table}{ $seen } ); 58 59 $self->_new_code( $seen . $char ); 60 61 $seen = $char; 62 63 if ( $self->{at_max_code} and $self->block_mode ){ 64 if ( ! defined $checkpoint ){ 65 $checkpoint = $self->{buf_pos} + $CHECKPOINT_BITS; 66 } 67 elsif ( $bytes_in > $checkpoint ){ 68 my $ratio = $bytes_in / ( $self->{buf_pos} / 8 ); 69 $last_ratio = 0 if !defined $last_ratio; 70 71 72 if ( $ratio >= $last_ratio ){ 73 $last_ratio = $ratio; 74 $checkpoint = $self->{buf_pos} + $CHECKPOINT_BITS; 75 } 76 elsif ( $ratio < $last_ratio ){ 77 # warn "Resetting code table ( $ratio < $last_ratio :: $self->{buf_pos} )"; 78 $self->_buf_write( $RESET_CODE ); 79 $self->_code_reset; 80 81 undef $checkpoint; 82 undef $last_ratio; 83 } 84 } 85 } 86 87 } 88 } 89 90 $self->_buf_write( $self->{code_table}{ $seen } ); #last bit of input 91 # warn "final ratio: " . ($bytes_in / ($self->{buf_pos} / 8)); 92 93 return $self->{buf}; 94} 95 96 97 98sub reset { 99 my $self = shift; 100 101 # replace buf with empty buffer after magic bytes 102 $self->{buf} = $MAGIC 103 . chr( $self->max_code_size | ( $self->block_mode ? $MASK_BLOCK : 0 ) ); 104 105 $self->{buf_pos} = length($self->{buf}) * 8; 106 107 $self->_code_reset; 108} 109 110 111sub _code_reset { 112 my $self = shift; 113 114 $self->{code_table} = { 115 map { chr($_) => $_ } 0 .. 255 116 }; 117 118 $self->{at_max_code} = 0; 119 $self->{code_size} = $INIT_CODE_SIZE; 120 $self->{next_code} = $self->block_mode ? $BL_INIT_CODE : $NR_INIT_CODE; 121 $self->{next_increase} = 2 ** $self->{code_size}; 122 123} 124 125sub _new_code { 126 my $self = shift; 127 my ( $word ) = @_; 128 129 if ( $self->{next_code} >= $self->{next_increase} ){ 130 131 if ( $self->{code_size} < $self->{max_code_size} ){ 132 $self->{code_size} += 1; 133 $self->{next_increase} *= 2; 134 } 135 else { 136 $self->{at_max_code} = 1; 137 } 138 } 139 140 if ( $self->{at_max_code} == 0 ){ 141 $self->{code_table}{ $word } = $self->{next_code}; 142 $self->{next_code} += 1; 143 } 144 145} 146 147sub _buf_write { 148 my $self = shift; 149 my ( $code ) = @_; 150 151 return unless defined $code; 152 153 if ( $code > ( 2 ** $self->{code_size} ) ){ 154 die "Code value $code too high for current code size $self->{code_size}"; 155 } 156 157 my $wpos = $self->{buf_pos}; 158 # if ( $code == $RESET_CODE ){ 159 # warn "wrote a reset code ($RESET_CODE) at $wpos"; 160 # } 161 #~ warn "write $code \tat $code_size bits\toffset $wpos (byte ".int($wpos/8) . ')'; 162 163 if ( $code == 1 ){ 164 vec( $self->{buf}, $wpos, 1 ) = 1; 165 } 166 else { 167 for my $bit ( 0 .. ($self->{code_size} - 1) ){ 168 169 if ( ($code >> $bit) & 1 ){ 170 vec( $self->{buf}, $wpos + $bit, 1 ) = 1; 171 } 172 } 173 } 174 175 $self->{buf_pos} += $self->{code_size}; 176} 177 1781; 179 180__END__ 181 182=pod 183 184=encoding UTF-8 185 186=head1 NAME 187 188Compress::LZW::Compressor - Scaling LZW compressor class 189 190=head1 VERSION 191 192version 0.04 193 194=head1 SYNOPSIS 195 196 use Compress::LZW::Compressor; 197 198 my $c = Compress::LZW::Compressor->new(); 199 my $lzw = $c->compress( $some_data ); 200 201=head1 ATTRIBUTES 202 203=head2 block_mode 204 205Default: 1 206 207Block mode is a feature added to LZW by compress(1). Once the maximum code size 208has been reached, if the compression ratio falls (NYI) the code table and code 209size can be reset, and a code indicating this reset is embedded in the output 210stream. 211 212May be 0 or 1. 213 214=head2 max_code_size 215 216Default: 16 217 218Maximum size in bits that code output may scale up to. This value is stored in 219byte 3 of the compressed output so the decompressor can also stop at the same 220size automatically. Maximum code size implies a maximum code table size of C<2 221** max_code_size>, which can be emptied and restarted mid-stream in 222L</block_mode>. 223 224May be between 9 and 31, inclusive. The default of 16 is the largest supported 225by compress(1), but Compress::LZW can handle up to 31 bits. 226 227=head1 METHODS 228 229=head2 compress ( $input ) 230 231Compresses $input with the current settings and returns the result. 232 233=head2 reset () 234 235Resets the compressor state for another round of compression. Automatically 236called at the beginning of compress(). 237 238Resets the following internal state: Code table, next code number, code size, 239output buffer, buffer position 240 241=head1 AUTHOR 242 243Meredith Howard <mhoward@cpan.org> 244 245=head1 COPYRIGHT AND LICENSE 246 247This software is copyright (c) 2015 by Meredith Howard. 248 249This is free software; you can redistribute it and/or modify it under 250the same terms as the Perl 5 programming language system itself. 251 252=cut 253