1 /* Lzlib - Compression library for the lzip format
2    Copyright (C) 2009-2021 Antonio Diaz Diaz.
3 
4    This library is free software. Redistribution and use in source and
5    binary forms, with or without modification, are permitted provided
6    that the following conditions are met:
7 
8    1. Redistributions of source code must retain the above copyright
9    notice, this list of conditions, and the following disclaimer.
10 
11    2. Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions, and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 */
19 
20 struct Len_prices
21   {
22   const struct Len_model * lm;
23   int len_symbols;
24   int count;
25   int prices[pos_states][max_len_symbols];
26   int counters[pos_states];			/* may decrement below 0 */
27   };
28 
Lp_update_low_mid_prices(struct Len_prices * const lp,const int pos_state)29 static inline void Lp_update_low_mid_prices( struct Len_prices * const lp,
30                                              const int pos_state )
31   {
32   int * const pps = lp->prices[pos_state];
33   int tmp = price0( lp->lm->choice1 );
34   int len = 0;
35   for( ; len < len_low_symbols && len < lp->len_symbols; ++len )
36     pps[len] = tmp + price_symbol3( lp->lm->bm_low[pos_state], len );
37   if( len >= lp->len_symbols ) return;
38   tmp = price1( lp->lm->choice1 ) + price0( lp->lm->choice2 );
39   for( ; len < len_low_symbols + len_mid_symbols && len < lp->len_symbols; ++len )
40     pps[len] = tmp +
41                price_symbol3( lp->lm->bm_mid[pos_state], len - len_low_symbols );
42     }
43 
Lp_update_high_prices(struct Len_prices * const lp)44 static inline void Lp_update_high_prices( struct Len_prices * const lp )
45   {
46   const int tmp = price1( lp->lm->choice1 ) + price1( lp->lm->choice2 );
47   int len;
48   for( len = len_low_symbols + len_mid_symbols; len < lp->len_symbols; ++len )
49     /* using 4 slots per value makes "Lp_price" faster */
50     lp->prices[3][len] = lp->prices[2][len] =
51     lp->prices[1][len] = lp->prices[0][len] = tmp +
52       price_symbol8( lp->lm->bm_high, len - len_low_symbols - len_mid_symbols );
53   }
54 
Lp_reset(struct Len_prices * const lp)55 static inline void Lp_reset( struct Len_prices * const lp )
56   { int i; for( i = 0; i < pos_states; ++i ) lp->counters[i] = 0; }
57 
Lp_init(struct Len_prices * const lp,const struct Len_model * const lm,const int match_len_limit)58 static inline void Lp_init( struct Len_prices * const lp,
59                             const struct Len_model * const lm,
60                             const int match_len_limit )
61   {
62   lp->lm = lm;
63   lp->len_symbols = match_len_limit + 1 - min_match_len;
64   lp->count = ( match_len_limit > 12 ) ? 1 : lp->len_symbols;
65   Lp_reset( lp );
66   }
67 
Lp_decrement_counter(struct Len_prices * const lp,const int pos_state)68 static inline void Lp_decrement_counter( struct Len_prices * const lp,
69                                          const int pos_state )
70   { --lp->counters[pos_state]; }
71 
Lp_update_prices(struct Len_prices * const lp)72 static inline void Lp_update_prices( struct Len_prices * const lp )
73   {
74   int pos_state;
75   bool high_pending = false;
76   for( pos_state = 0; pos_state < pos_states; ++pos_state )
77     if( lp->counters[pos_state] <= 0 )
78       { lp->counters[pos_state] = lp->count;
79         Lp_update_low_mid_prices( lp, pos_state ); high_pending = true; }
80   if( high_pending && lp->len_symbols > len_low_symbols + len_mid_symbols )
81     Lp_update_high_prices( lp );
82   }
83 
Lp_price(const struct Len_prices * const lp,const int len,const int pos_state)84 static inline int Lp_price( const struct Len_prices * const lp,
85                             const int len, const int pos_state )
86   { return lp->prices[pos_state][len - min_match_len]; }
87 
88 
89 struct Pair			/* distance-length pair */
90   {
91   int dis;
92   int len;
93   };
94 
95 enum { infinite_price = 0x0FFFFFFF,
96        max_num_trials = 1 << 13,
97        single_step_trial = -2,
98        dual_step_trial = -1 };
99 
100 struct Trial
101   {
102   State state;
103   int price;		/* dual use var; cumulative price, match length */
104   int dis4;		/* -1 for literal, or rep, or match distance + 4 */
105   int prev_index;	/* index of prev trial in trials[] */
106   int prev_index2;	/*   -2  trial is single step */
107 			/*   -1  literal + rep0 */
108 			/* >= 0  ( rep or match ) + literal + rep0 */
109   int reps[num_rep_distances];
110   };
111 
Tr_update(struct Trial * const trial,const int pr,const int distance4,const int p_i)112 static inline void Tr_update( struct Trial * const trial, const int pr,
113                               const int distance4, const int p_i )
114   {
115   if( pr < trial->price )
116     { trial->price = pr; trial->dis4 = distance4; trial->prev_index = p_i;
117       trial->prev_index2 = single_step_trial; }
118   }
119 
Tr_update2(struct Trial * const trial,const int pr,const int p_i)120 static inline void Tr_update2( struct Trial * const trial, const int pr,
121                                const int p_i )
122   {
123   if( pr < trial->price )
124     { trial->price = pr; trial->dis4 = 0; trial->prev_index = p_i;
125       trial->prev_index2 = dual_step_trial; }
126   }
127 
Tr_update3(struct Trial * const trial,const int pr,const int distance4,const int p_i,const int p_i2)128 static inline void Tr_update3( struct Trial * const trial, const int pr,
129                                const int distance4, const int p_i,
130                                const int p_i2 )
131   {
132   if( pr < trial->price )
133     { trial->price = pr; trial->dis4 = distance4; trial->prev_index = p_i;
134       trial->prev_index2 = p_i2; }
135   }
136 
137 
138 struct LZ_encoder
139   {
140   struct LZ_encoder_base eb;
141   int cycles;
142   int match_len_limit;
143   struct Len_prices match_len_prices;
144   struct Len_prices rep_len_prices;
145   int pending_num_pairs;
146   struct Pair pairs[max_match_len+1];
147   struct Trial trials[max_num_trials];
148 
149   int dis_slot_prices[len_states][2*max_dictionary_bits];
150   int dis_prices[len_states][modeled_distances];
151   int align_prices[dis_align_size];
152   int num_dis_slots;
153   int price_counter;		/* counters may decrement below 0 */
154   int dis_price_counter;
155   int align_price_counter;
156   bool been_flushed;
157   };
158 
Mb_dec_pos(struct Matchfinder_base * const mb,const int ahead)159 static inline bool Mb_dec_pos( struct Matchfinder_base * const mb,
160                                const int ahead )
161   {
162   if( ahead < 0 || mb->pos < ahead ) return false;
163   mb->pos -= ahead;
164   if( mb->cyclic_pos < ahead ) mb->cyclic_pos += mb->dictionary_size + 1;
165   mb->cyclic_pos -= ahead;
166   return true;
167   }
168 
169 static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs );
170 
171        /* move-to-front dis in/into reps; do nothing if( dis4 <= 0 ) */
mtf_reps(const int dis4,int reps[num_rep_distances])172 static inline void mtf_reps( const int dis4, int reps[num_rep_distances] )
173   {
174   if( dis4 >= num_rep_distances )			/* match */
175     {
176     reps[3] = reps[2]; reps[2] = reps[1]; reps[1] = reps[0];
177     reps[0] = dis4 - num_rep_distances;
178     }
179   else if( dis4 > 0 )				/* repeated match */
180     {
181     const int distance = reps[dis4];
182     int i; for( i = dis4; i > 0; --i ) reps[i] = reps[i-1];
183     reps[0] = distance;
184     }
185   }
186 
LZeb_price_shortrep(const struct LZ_encoder_base * const eb,const State state,const int pos_state)187 static inline int LZeb_price_shortrep( const struct LZ_encoder_base * const eb,
188                                        const State state, const int pos_state )
189   {
190   return price0( eb->bm_rep0[state] ) + price0( eb->bm_len[state][pos_state] );
191   }
192 
LZeb_price_rep(const struct LZ_encoder_base * const eb,const int rep,const State state,const int pos_state)193 static inline int LZeb_price_rep( const struct LZ_encoder_base * const eb,
194                                   const int rep, const State state,
195                                   const int pos_state )
196   {
197   int price;
198   if( rep == 0 ) return price0( eb->bm_rep0[state] ) +
199                         price1( eb->bm_len[state][pos_state] );
200   price = price1( eb->bm_rep0[state] );
201   if( rep == 1 )
202     price += price0( eb->bm_rep1[state] );
203   else
204     {
205     price += price1( eb->bm_rep1[state] );
206     price += price_bit( eb->bm_rep2[state], rep - 2 );
207     }
208   return price;
209   }
210 
LZe_price_rep0_len(const struct LZ_encoder * const e,const int len,const State state,const int pos_state)211 static inline int LZe_price_rep0_len( const struct LZ_encoder * const e,
212                                       const int len, const State state,
213                                       const int pos_state )
214   {
215   return LZeb_price_rep( &e->eb, 0, state, pos_state ) +
216          Lp_price( &e->rep_len_prices, len, pos_state );
217   }
218 
LZe_price_pair(const struct LZ_encoder * const e,const int dis,const int len,const int pos_state)219 static inline int LZe_price_pair( const struct LZ_encoder * const e,
220                                   const int dis, const int len,
221                                   const int pos_state )
222   {
223   const int price = Lp_price( &e->match_len_prices, len, pos_state );
224   const int len_state = get_len_state( len );
225   if( dis < modeled_distances )
226     return price + e->dis_prices[len_state][dis];
227   else
228     return price + e->dis_slot_prices[len_state][get_slot( dis )] +
229            e->align_prices[dis & (dis_align_size - 1)];
230   }
231 
LZe_read_match_distances(struct LZ_encoder * const e)232 static inline int LZe_read_match_distances( struct LZ_encoder * const e )
233   {
234   const int num_pairs = LZe_get_match_pairs( e, e->pairs );
235   if( num_pairs > 0 )
236     {
237     const int len = e->pairs[num_pairs-1].len;
238     if( len == e->match_len_limit && len < max_match_len )
239       e->pairs[num_pairs-1].len =
240         Mb_true_match_len( &e->eb.mb, len, e->pairs[num_pairs-1].dis + 1 );
241     }
242   return num_pairs;
243   }
244 
LZe_move_and_update(struct LZ_encoder * const e,int n)245 static inline bool LZe_move_and_update( struct LZ_encoder * const e, int n )
246   {
247   while( true )
248     {
249     if( !Mb_move_pos( &e->eb.mb ) ) return false;
250     if( --n <= 0 ) break;
251     LZe_get_match_pairs( e, 0 );
252     }
253   return true;
254   }
255 
LZe_backward(struct LZ_encoder * const e,int cur)256 static inline void LZe_backward( struct LZ_encoder * const e, int cur )
257   {
258   int dis4 = e->trials[cur].dis4;
259   while( cur > 0 )
260     {
261     const int prev_index = e->trials[cur].prev_index;
262     struct Trial * const prev_trial = &e->trials[prev_index];
263 
264     if( e->trials[cur].prev_index2 != single_step_trial )
265       {
266       prev_trial->dis4 = -1;					/* literal */
267       prev_trial->prev_index = prev_index - 1;
268       prev_trial->prev_index2 = single_step_trial;
269       if( e->trials[cur].prev_index2 >= 0 )
270         {
271         struct Trial * const prev_trial2 = &e->trials[prev_index-1];
272         prev_trial2->dis4 = dis4; dis4 = 0;			/* rep0 */
273         prev_trial2->prev_index = e->trials[cur].prev_index2;
274         prev_trial2->prev_index2 = single_step_trial;
275         }
276       }
277     prev_trial->price = cur - prev_index;			/* len */
278     cur = dis4; dis4 = prev_trial->dis4; prev_trial->dis4 = cur;
279     cur = prev_index;
280     }
281   }
282 
283 enum { num_prev_positions3 = 1 << 16,
284        num_prev_positions2 = 1 << 10 };
285 
LZe_init(struct LZ_encoder * const e,const int dict_size,const int len_limit,const unsigned long long member_size)286 static inline bool LZe_init( struct LZ_encoder * const e,
287                              const int dict_size, const int len_limit,
288                              const unsigned long long member_size )
289   {
290   enum { before_size = max_num_trials,
291          /* bytes to keep in buffer after pos */
292          after_size = max_num_trials + ( 2 * max_match_len ) + 1,
293          dict_factor = 2,
294          num_prev_positions23 = num_prev_positions2 + num_prev_positions3,
295          pos_array_factor = 2,
296          min_free_bytes = 2 * max_num_trials };
297 
298   if( !LZeb_init( &e->eb, before_size, dict_size, after_size, dict_factor,
299                   num_prev_positions23, pos_array_factor, min_free_bytes,
300                   member_size ) ) return false;
301   e->cycles = ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256;
302   e->match_len_limit = len_limit;
303   Lp_init( &e->match_len_prices, &e->eb.match_len_model, e->match_len_limit );
304   Lp_init( &e->rep_len_prices, &e->eb.rep_len_model, e->match_len_limit );
305   e->pending_num_pairs = 0;
306   e->num_dis_slots = 2 * real_bits( e->eb.mb.dictionary_size - 1 );
307   e->trials[1].prev_index = 0;
308   e->trials[1].prev_index2 = single_step_trial;
309   e->price_counter = 0;
310   e->dis_price_counter = 0;
311   e->align_price_counter = 0;
312   e->been_flushed = false;
313   return true;
314   }
315 
LZe_reset(struct LZ_encoder * const e,const unsigned long long member_size)316 static inline void LZe_reset( struct LZ_encoder * const e,
317                               const unsigned long long member_size )
318   {
319   LZeb_reset( &e->eb, member_size );
320   Lp_reset( &e->match_len_prices );
321   Lp_reset( &e->rep_len_prices );
322   e->pending_num_pairs = 0;
323   e->price_counter = 0;
324   e->dis_price_counter = 0;
325   e->align_price_counter = 0;
326   e->been_flushed = false;
327   }
328