1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       range_encoder.h
4 /// \brief      Range Encoder
5 ///
6 //  Authors:    Igor Pavlov
7 //              Lasse Collin
8 //
9 //  This file has been put into the public domain.
10 //  You can do whatever you want with this file.
11 //
12 ///////////////////////////////////////////////////////////////////////////////
13 
14 #ifndef LZMA_RANGE_ENCODER_H
15 #define LZMA_RANGE_ENCODER_H
16 
17 #include "range_common.h"
18 #include "price.h"
19 
20 
21 /// Maximum number of symbols that can be put pending into lzma_range_encoder
22 /// structure between calls to lzma_rc_encode(). For LZMA, 48+5 is enough
23 /// (match with big distance and length followed by range encoder flush).
24 #define RC_SYMBOLS_MAX 53
25 
26 
27 typedef struct {
28 	uint64_t low;
29 	uint64_t cache_size;
30 	uint32_t range;
31 	uint8_t cache;
32 
33 	/// Number of bytes written out by rc_encode() -> rc_shift_low()
34 	uint64_t out_total;
35 
36 	/// Number of symbols in the tables
37 	size_t count;
38 
39 	/// rc_encode()'s position in the tables
40 	size_t pos;
41 
42 	/// Symbols to encode
43 	enum {
44 		RC_BIT_0,
45 		RC_BIT_1,
46 		RC_DIRECT_0,
47 		RC_DIRECT_1,
48 		RC_FLUSH,
49 	} symbols[RC_SYMBOLS_MAX];
50 
51 	/// Probabilities associated with RC_BIT_0 or RC_BIT_1
52 	probability *probs[RC_SYMBOLS_MAX];
53 
54 } lzma_range_encoder;
55 
56 
57 static inline void
58 rc_reset(lzma_range_encoder *rc)
59 {
60 	rc->low = 0;
61 	rc->cache_size = 1;
62 	rc->range = UINT32_MAX;
63 	rc->cache = 0;
64 	rc->out_total = 0;
65 	rc->count = 0;
66 	rc->pos = 0;
67 }
68 
69 
70 static inline void
71 rc_forget(lzma_range_encoder *rc)
72 {
73 	// This must not be called when rc_encode() is partially done.
74 	assert(rc->pos == 0);
75 	rc->count = 0;
76 }
77 
78 
79 static inline void
80 rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit)
81 {
82 	rc->symbols[rc->count] = bit;
83 	rc->probs[rc->count] = prob;
84 	++rc->count;
85 }
86 
87 
88 static inline void
89 rc_bittree(lzma_range_encoder *rc, probability *probs,
90 		uint32_t bit_count, uint32_t symbol)
91 {
92 	uint32_t model_index = 1;
93 
94 	do {
95 		const uint32_t bit = (symbol >> --bit_count) & 1;
96 		rc_bit(rc, &probs[model_index], bit);
97 		model_index = (model_index << 1) + bit;
98 	} while (bit_count != 0);
99 }
100 
101 
102 static inline void
103 rc_bittree_reverse(lzma_range_encoder *rc, probability *probs,
104 		uint32_t bit_count, uint32_t symbol)
105 {
106 	uint32_t model_index = 1;
107 
108 	do {
109 		const uint32_t bit = symbol & 1;
110 		symbol >>= 1;
111 		rc_bit(rc, &probs[model_index], bit);
112 		model_index = (model_index << 1) + bit;
113 	} while (--bit_count != 0);
114 }
115 
116 
117 static inline void
118 rc_direct(lzma_range_encoder *rc,
119 		uint32_t value, uint32_t bit_count)
120 {
121 	do {
122 		rc->symbols[rc->count++]
123 				= RC_DIRECT_0 + ((value >> --bit_count) & 1);
124 	} while (bit_count != 0);
125 }
126 
127 
128 static inline void
129 rc_flush(lzma_range_encoder *rc)
130 {
131 	for (size_t i = 0; i < 5; ++i)
132 		rc->symbols[rc->count++] = RC_FLUSH;
133 }
134 
135 
136 static inline bool
137 rc_shift_low(lzma_range_encoder *rc,
138 		uint8_t *out, size_t *out_pos, size_t out_size)
139 {
140 	if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000)
141 			|| (uint32_t)(rc->low >> 32) != 0) {
142 		do {
143 			if (*out_pos == out_size)
144 				return true;
145 
146 			out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32);
147 			++*out_pos;
148 			++rc->out_total;
149 			rc->cache = 0xFF;
150 
151 		} while (--rc->cache_size != 0);
152 
153 		rc->cache = (rc->low >> 24) & 0xFF;
154 	}
155 
156 	++rc->cache_size;
157 	rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS;
158 
159 	return false;
160 }
161 
162 
163 // NOTE: The last two arguments are uint64_t instead of size_t because in
164 // the dummy version these refer to the size of the whole range-encoded
165 // output stream, not just to the currently available output buffer space.
166 static inline bool
167 rc_shift_low_dummy(uint64_t *low, uint64_t *cache_size, uint8_t *cache,
168 		uint64_t *out_pos, uint64_t out_size)
169 {
170 	if ((uint32_t)(*low) < (uint32_t)(0xFF000000)
171 			|| (uint32_t)(*low >> 32) != 0) {
172 		do {
173 			if (*out_pos == out_size)
174 				return true;
175 
176 			++*out_pos;
177 			*cache = 0xFF;
178 
179 		} while (--*cache_size != 0);
180 
181 		*cache = (*low >> 24) & 0xFF;
182 	}
183 
184 	++*cache_size;
185 	*low = (*low & 0x00FFFFFF) << RC_SHIFT_BITS;
186 
187 	return false;
188 }
189 
190 
191 static inline bool
192 rc_encode(lzma_range_encoder *rc,
193 		uint8_t *out, size_t *out_pos, size_t out_size)
194 {
195 	assert(rc->count <= RC_SYMBOLS_MAX);
196 
197 	while (rc->pos < rc->count) {
198 		// Normalize
199 		if (rc->range < RC_TOP_VALUE) {
200 			if (rc_shift_low(rc, out, out_pos, out_size))
201 				return true;
202 
203 			rc->range <<= RC_SHIFT_BITS;
204 		}
205 
206 		// Encode a bit
207 		switch (rc->symbols[rc->pos]) {
208 		case RC_BIT_0: {
209 			probability prob = *rc->probs[rc->pos];
210 			rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS)
211 					* prob;
212 			prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS;
213 			*rc->probs[rc->pos] = prob;
214 			break;
215 		}
216 
217 		case RC_BIT_1: {
218 			probability prob = *rc->probs[rc->pos];
219 			const uint32_t bound = prob * (rc->range
220 					>> RC_BIT_MODEL_TOTAL_BITS);
221 			rc->low += bound;
222 			rc->range -= bound;
223 			prob -= prob >> RC_MOVE_BITS;
224 			*rc->probs[rc->pos] = prob;
225 			break;
226 		}
227 
228 		case RC_DIRECT_0:
229 			rc->range >>= 1;
230 			break;
231 
232 		case RC_DIRECT_1:
233 			rc->range >>= 1;
234 			rc->low += rc->range;
235 			break;
236 
237 		case RC_FLUSH:
238 			// Prevent further normalizations.
239 			rc->range = UINT32_MAX;
240 
241 			// Flush the last five bytes (see rc_flush()).
242 			do {
243 				if (rc_shift_low(rc, out, out_pos, out_size))
244 					return true;
245 			} while (++rc->pos < rc->count);
246 
247 			// Reset the range encoder so we are ready to continue
248 			// encoding if we weren't finishing the stream.
249 			rc_reset(rc);
250 			return false;
251 
252 		default:
253 			assert(0);
254 			break;
255 		}
256 
257 		++rc->pos;
258 	}
259 
260 	rc->count = 0;
261 	rc->pos = 0;
262 
263 	return false;
264 }
265 
266 
267 static inline bool
268 rc_encode_dummy(const lzma_range_encoder *rc, uint64_t out_limit)
269 {
270 	assert(rc->count <= RC_SYMBOLS_MAX);
271 
272 	uint64_t low = rc->low;
273 	uint64_t cache_size = rc->cache_size;
274 	uint32_t range = rc->range;
275 	uint8_t cache = rc->cache;
276 	uint64_t out_pos = rc->out_total;
277 
278 	size_t pos = rc->pos;
279 
280 	while (true) {
281 		// Normalize
282 		if (range < RC_TOP_VALUE) {
283 			if (rc_shift_low_dummy(&low, &cache_size, &cache,
284 					&out_pos, out_limit))
285 				return true;
286 
287 			range <<= RC_SHIFT_BITS;
288 		}
289 
290 		// This check is here because the normalization above must
291 		// be done before flushing the last bytes.
292 		if (pos == rc->count)
293 			break;
294 
295 		// Encode a bit
296 		switch (rc->symbols[pos]) {
297 		case RC_BIT_0: {
298 			probability prob = *rc->probs[pos];
299 			range = (range >> RC_BIT_MODEL_TOTAL_BITS)
300 					* prob;
301 			break;
302 		}
303 
304 		case RC_BIT_1: {
305 			probability prob = *rc->probs[pos];
306 			const uint32_t bound = prob * (range
307 					>> RC_BIT_MODEL_TOTAL_BITS);
308 			low += bound;
309 			range -= bound;
310 			break;
311 		}
312 
313 		case RC_DIRECT_0:
314 			range >>= 1;
315 			break;
316 
317 		case RC_DIRECT_1:
318 			range >>= 1;
319 			low += range;
320 			break;
321 
322 		case RC_FLUSH:
323 		default:
324 			assert(0);
325 			break;
326 		}
327 
328 		++pos;
329 	}
330 
331 	// Flush the last bytes. This isn't in rc->symbols[] so we do
332 	// it after the above loop to take into account the size of
333 	// the flushing that will be done at the end of the stream.
334 	for (pos = 0; pos < 5; ++pos) {
335 		if (rc_shift_low_dummy(&low, &cache_size,
336 				&cache, &out_pos, out_limit))
337 			return true;
338 	}
339 
340 	return false;
341 }
342 
343 
344 static inline uint64_t
345 rc_pending(const lzma_range_encoder *rc)
346 {
347 	return rc->cache_size + 5 - 1;
348 }
349 
350 #endif
351