1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       lzma_encoder.c
4 /// \brief      LZMA encoder
5 ///
6 //  Authors:    Igor Pavlov
7 //              Lasse Collin
8 //
9 //  This file has been put into the public domain.
10 //  You can do whatever you want with this file.
11 //
12 ///////////////////////////////////////////////////////////////////////////////
13 
14 #include "lzma2_encoder.h"
15 #include "lzma_encoder_private.h"
16 #include "fastpos.h"
17 
18 
19 /////////////
20 // Literal //
21 /////////////
22 
23 static inline void
literal_matched(lzma_range_encoder * rc,probability * subcoder,uint32_t match_byte,uint32_t symbol)24 literal_matched(lzma_range_encoder *rc, probability *subcoder,
25 		uint32_t match_byte, uint32_t symbol)
26 {
27 	uint32_t offset = 0x100;
28 	symbol += UINT32_C(1) << 8;
29 
30 	do {
31 		match_byte <<= 1;
32 		const uint32_t match_bit = match_byte & offset;
33 		const uint32_t subcoder_index
34 				= offset + match_bit + (symbol >> 8);
35 		const uint32_t bit = (symbol >> 7) & 1;
36 		rc_bit(rc, &subcoder[subcoder_index], bit);
37 
38 		symbol <<= 1;
39 		offset &= ~(match_byte ^ symbol);
40 
41 	} while (symbol < (UINT32_C(1) << 16));
42 }
43 
44 
45 static inline void
literal(lzma_lzma1_encoder * coder,lzma_mf * mf,uint32_t position)46 literal(lzma_lzma1_encoder *coder, lzma_mf *mf, uint32_t position)
47 {
48 	// Locate the literal byte to be encoded and the subcoder.
49 	const uint8_t cur_byte = mf->buffer[
50 			mf->read_pos - mf->read_ahead];
51 	probability *subcoder = literal_subcoder(coder->literal,
52 			coder->literal_context_bits, coder->literal_pos_mask,
53 			position, mf->buffer[mf->read_pos - mf->read_ahead - 1]);
54 
55 	if (is_literal_state(coder->state)) {
56 		// Previous LZMA-symbol was a literal. Encode a normal
57 		// literal without a match byte.
58 		rc_bittree(&coder->rc, subcoder, 8, cur_byte);
59 	} else {
60 		// Previous LZMA-symbol was a match. Use the last byte of
61 		// the match as a "match byte". That is, compare the bits
62 		// of the current literal and the match byte.
63 		const uint8_t match_byte = mf->buffer[
64 				mf->read_pos - coder->reps[0] - 1
65 				- mf->read_ahead];
66 		literal_matched(&coder->rc, subcoder, match_byte, cur_byte);
67 	}
68 
69 	update_literal(coder->state);
70 }
71 
72 
73 //////////////////
74 // Match length //
75 //////////////////
76 
77 static void
length_update_prices(lzma_length_encoder * lc,const uint32_t pos_state)78 length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state)
79 {
80 	const uint32_t table_size = lc->table_size;
81 	lc->counters[pos_state] = table_size;
82 
83 	const uint32_t a0 = rc_bit_0_price(lc->choice);
84 	const uint32_t a1 = rc_bit_1_price(lc->choice);
85 	const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2);
86 	const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2);
87 	uint32_t *const prices = lc->prices[pos_state];
88 
89 	uint32_t i;
90 	for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i)
91 		prices[i] = a0 + rc_bittree_price(lc->low[pos_state],
92 				LEN_LOW_BITS, i);
93 
94 	for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i)
95 		prices[i] = b0 + rc_bittree_price(lc->mid[pos_state],
96 				LEN_MID_BITS, i - LEN_LOW_SYMBOLS);
97 
98 	for (; i < table_size; ++i)
99 		prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS,
100 				i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS);
101 
102 	return;
103 }
104 
105 
106 static inline void
length(lzma_range_encoder * rc,lzma_length_encoder * lc,const uint32_t pos_state,uint32_t len,const bool fast_mode)107 length(lzma_range_encoder *rc, lzma_length_encoder *lc,
108 		const uint32_t pos_state, uint32_t len, const bool fast_mode)
109 {
110 	assert(len <= MATCH_LEN_MAX);
111 	len -= MATCH_LEN_MIN;
112 
113 	if (len < LEN_LOW_SYMBOLS) {
114 		rc_bit(rc, &lc->choice, 0);
115 		rc_bittree(rc, lc->low[pos_state], LEN_LOW_BITS, len);
116 	} else {
117 		rc_bit(rc, &lc->choice, 1);
118 		len -= LEN_LOW_SYMBOLS;
119 
120 		if (len < LEN_MID_SYMBOLS) {
121 			rc_bit(rc, &lc->choice2, 0);
122 			rc_bittree(rc, lc->mid[pos_state], LEN_MID_BITS, len);
123 		} else {
124 			rc_bit(rc, &lc->choice2, 1);
125 			len -= LEN_MID_SYMBOLS;
126 			rc_bittree(rc, lc->high, LEN_HIGH_BITS, len);
127 		}
128 	}
129 
130 	// Only getoptimum uses the prices so don't update the table when
131 	// in fast mode.
132 	if (!fast_mode)
133 		if (--lc->counters[pos_state] == 0)
134 			length_update_prices(lc, pos_state);
135 }
136 
137 
138 ///////////
139 // Match //
140 ///////////
141 
142 static inline void
match(lzma_lzma1_encoder * coder,const uint32_t pos_state,const uint32_t distance,const uint32_t len)143 match(lzma_lzma1_encoder *coder, const uint32_t pos_state,
144 		const uint32_t distance, const uint32_t len)
145 {
146 	update_match(coder->state);
147 
148 	length(&coder->rc, &coder->match_len_encoder, pos_state, len,
149 			coder->fast_mode);
150 
151 	const uint32_t dist_slot = get_dist_slot(distance);
152 	const uint32_t dist_state = get_dist_state(len);
153 	rc_bittree(&coder->rc, coder->dist_slot[dist_state],
154 			DIST_SLOT_BITS, dist_slot);
155 
156 	if (dist_slot >= DIST_MODEL_START) {
157 		const uint32_t footer_bits = (dist_slot >> 1) - 1;
158 		const uint32_t base = (2 | (dist_slot & 1)) << footer_bits;
159 		const uint32_t dist_reduced = distance - base;
160 
161 		if (dist_slot < DIST_MODEL_END) {
162 			// Careful here: base - dist_slot - 1 can be -1, but
163 			// rc_bittree_reverse starts at probs[1], not probs[0].
164 			rc_bittree_reverse(&coder->rc,
165 				coder->dist_special + base - dist_slot - 1,
166 				footer_bits, dist_reduced);
167 		} else {
168 			rc_direct(&coder->rc, dist_reduced >> ALIGN_BITS,
169 					footer_bits - ALIGN_BITS);
170 			rc_bittree_reverse(
171 					&coder->rc, coder->dist_align,
172 					ALIGN_BITS, dist_reduced & ALIGN_MASK);
173 			++coder->align_price_count;
174 		}
175 	}
176 
177 	coder->reps[3] = coder->reps[2];
178 	coder->reps[2] = coder->reps[1];
179 	coder->reps[1] = coder->reps[0];
180 	coder->reps[0] = distance;
181 	++coder->match_price_count;
182 }
183 
184 
185 ////////////////////
186 // Repeated match //
187 ////////////////////
188 
189 static inline void
rep_match(lzma_lzma1_encoder * coder,const uint32_t pos_state,const uint32_t rep,const uint32_t len)190 rep_match(lzma_lzma1_encoder *coder, const uint32_t pos_state,
191 		const uint32_t rep, const uint32_t len)
192 {
193 	if (rep == 0) {
194 		rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0);
195 		rc_bit(&coder->rc,
196 				&coder->is_rep0_long[coder->state][pos_state],
197 				len != 1);
198 	} else {
199 		const uint32_t distance = coder->reps[rep];
200 		rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1);
201 
202 		if (rep == 1) {
203 			rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0);
204 		} else {
205 			rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1);
206 			rc_bit(&coder->rc, &coder->is_rep2[coder->state],
207 					rep - 2);
208 
209 			if (rep == 3)
210 				coder->reps[3] = coder->reps[2];
211 
212 			coder->reps[2] = coder->reps[1];
213 		}
214 
215 		coder->reps[1] = coder->reps[0];
216 		coder->reps[0] = distance;
217 	}
218 
219 	if (len == 1) {
220 		update_short_rep(coder->state);
221 	} else {
222 		length(&coder->rc, &coder->rep_len_encoder, pos_state, len,
223 				coder->fast_mode);
224 		update_long_rep(coder->state);
225 	}
226 }
227 
228 
229 //////////
230 // Main //
231 //////////
232 
233 static void
encode_symbol(lzma_lzma1_encoder * coder,lzma_mf * mf,uint32_t back,uint32_t len,uint32_t position)234 encode_symbol(lzma_lzma1_encoder *coder, lzma_mf *mf,
235 		uint32_t back, uint32_t len, uint32_t position)
236 {
237 	const uint32_t pos_state = position & coder->pos_mask;
238 
239 	if (back == UINT32_MAX) {
240 		// Literal i.e. eight-bit byte
241 		assert(len == 1);
242 		rc_bit(&coder->rc,
243 				&coder->is_match[coder->state][pos_state], 0);
244 		literal(coder, mf, position);
245 	} else {
246 		// Some type of match
247 		rc_bit(&coder->rc,
248 			&coder->is_match[coder->state][pos_state], 1);
249 
250 		if (back < REPS) {
251 			// It's a repeated match i.e. the same distance
252 			// has been used earlier.
253 			rc_bit(&coder->rc, &coder->is_rep[coder->state], 1);
254 			rep_match(coder, pos_state, back, len);
255 		} else {
256 			// Normal match
257 			rc_bit(&coder->rc, &coder->is_rep[coder->state], 0);
258 			match(coder, pos_state, back - REPS, len);
259 		}
260 	}
261 
262 	assert(mf->read_ahead >= len);
263 	mf->read_ahead -= len;
264 }
265 
266 
267 static bool
encode_init(lzma_lzma1_encoder * coder,lzma_mf * mf)268 encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf)
269 {
270 	assert(mf_position(mf) == 0);
271 	assert(coder->uncomp_size == 0);
272 
273 	if (mf->read_pos == mf->read_limit) {
274 		if (mf->action == LZMA_RUN)
275 			return false; // We cannot do anything.
276 
277 		// We are finishing (we cannot get here when flushing).
278 		assert(mf->write_pos == mf->read_pos);
279 		assert(mf->action == LZMA_FINISH);
280 	} else {
281 		// Do the actual initialization. The first LZMA symbol must
282 		// always be a literal.
283 		mf_skip(mf, 1);
284 		mf->read_ahead = 0;
285 		rc_bit(&coder->rc, &coder->is_match[0][0], 0);
286 		rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]);
287 		++coder->uncomp_size;
288 	}
289 
290 	// Initialization is done (except if empty file).
291 	coder->is_initialized = true;
292 
293 	return true;
294 }
295 
296 
297 static void
encode_eopm(lzma_lzma1_encoder * coder,uint32_t position)298 encode_eopm(lzma_lzma1_encoder *coder, uint32_t position)
299 {
300 	const uint32_t pos_state = position & coder->pos_mask;
301 	rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1);
302 	rc_bit(&coder->rc, &coder->is_rep[coder->state], 0);
303 	match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN);
304 }
305 
306 
307 /// Number of bytes that a single encoding loop in lzma_lzma_encode() can
308 /// consume from the dictionary. This limit comes from lzma_lzma_optimum()
309 /// and may need to be updated if that function is significantly modified.
310 #define LOOP_INPUT_MAX (OPTS + 1)
311 
312 
313 extern lzma_ret
lzma_lzma_encode(lzma_lzma1_encoder * restrict coder,lzma_mf * restrict mf,uint8_t * restrict out,size_t * restrict out_pos,size_t out_size,uint32_t limit)314 lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf,
315 		uint8_t *restrict out, size_t *restrict out_pos,
316 		size_t out_size, uint32_t limit)
317 {
318 	// Initialize the stream if no data has been encoded yet.
319 	if (!coder->is_initialized && !encode_init(coder, mf))
320 		return LZMA_OK;
321 
322 	// Encode pending output bytes from the range encoder.
323 	// At the start of the stream, encode_init() encodes one literal.
324 	// Later there can be pending output only with LZMA1 because LZMA2
325 	// ensures that there is always enough output space. Thus when using
326 	// LZMA2, rc_encode() calls in this function will always return false.
327 	if (rc_encode(&coder->rc, out, out_pos, out_size)) {
328 		// We don't get here with LZMA2.
329 		assert(limit == UINT32_MAX);
330 		return LZMA_OK;
331 	}
332 
333 	// If the range encoder was flushed in an earlier call to this
334 	// function but there wasn't enough output buffer space, those
335 	// bytes would have now been encoded by the above rc_encode() call
336 	// and the stream has now been finished. This can only happen with
337 	// LZMA1 as LZMA2 always provides enough output buffer space.
338 	if (coder->is_flushed) {
339 		assert(limit == UINT32_MAX);
340 		return LZMA_STREAM_END;
341 	}
342 
343 	while (true) {
344 		// With LZMA2 we need to take care that compressed size of
345 		// a chunk doesn't get too big.
346 		// FIXME? Check if this could be improved.
347 		if (limit != UINT32_MAX
348 				&& (mf->read_pos - mf->read_ahead >= limit
349 					|| *out_pos + rc_pending(&coder->rc)
350 						>= LZMA2_CHUNK_MAX
351 							- LOOP_INPUT_MAX))
352 			break;
353 
354 		// Check that there is some input to process.
355 		if (mf->read_pos >= mf->read_limit) {
356 			if (mf->action == LZMA_RUN)
357 				return LZMA_OK;
358 
359 			if (mf->read_ahead == 0)
360 				break;
361 		}
362 
363 		// Get optimal match (repeat position and length).
364 		// Value ranges for pos:
365 		//   - [0, REPS): repeated match
366 		//   - [REPS, UINT32_MAX):
367 		//     match at (pos - REPS)
368 		//   - UINT32_MAX: not a match but a literal
369 		// Value ranges for len:
370 		//   - [MATCH_LEN_MIN, MATCH_LEN_MAX]
371 		uint32_t len;
372 		uint32_t back;
373 
374 		if (coder->fast_mode)
375 			lzma_lzma_optimum_fast(coder, mf, &back, &len);
376 		else
377 			lzma_lzma_optimum_normal(coder, mf, &back, &len,
378 					(uint32_t)(coder->uncomp_size));
379 
380 		encode_symbol(coder, mf, back, len,
381 				(uint32_t)(coder->uncomp_size));
382 
383 		// If output size limiting is active (out_limit != 0), check
384 		// if encoding this LZMA symbol would make the output size
385 		// exceed the specified limit.
386 		if (coder->out_limit != 0 && rc_encode_dummy(
387 				&coder->rc, coder->out_limit)) {
388 			// The most recent LZMA symbol would make the output
389 			// too big. Throw it away.
390 			rc_forget(&coder->rc);
391 
392 			// FIXME: Tell the LZ layer to not read more input as
393 			// it would be waste of time. This doesn't matter if
394 			// output-size-limited encoding is done with a single
395 			// call though.
396 
397 			break;
398 		}
399 
400 		// This symbol will be encoded so update the uncompressed size.
401 		coder->uncomp_size += len;
402 
403 		// Encode the LZMA symbol.
404 		if (rc_encode(&coder->rc, out, out_pos, out_size)) {
405 			// Once again, this can only happen with LZMA1.
406 			assert(limit == UINT32_MAX);
407 			return LZMA_OK;
408 		}
409 	}
410 
411 	// Make the uncompressed size available to the application.
412 	if (coder->uncomp_size_ptr != NULL)
413 		*coder->uncomp_size_ptr = coder->uncomp_size;
414 
415 	// LZMA2 doesn't use EOPM at LZMA level.
416 	//
417 	// Plain LZMA streams without EOPM aren't supported except when
418 	// output size limiting is enabled.
419 	if (coder->use_eopm)
420 		encode_eopm(coder, (uint32_t)(coder->uncomp_size));
421 
422 	// Flush the remaining bytes from the range encoder.
423 	rc_flush(&coder->rc);
424 
425 	// Copy the remaining bytes to the output buffer. If there
426 	// isn't enough output space, we will copy out the remaining
427 	// bytes on the next call to this function.
428 	if (rc_encode(&coder->rc, out, out_pos, out_size)) {
429 		// This cannot happen with LZMA2.
430 		assert(limit == UINT32_MAX);
431 
432 		coder->is_flushed = true;
433 		return LZMA_OK;
434 	}
435 
436 	return LZMA_STREAM_END;
437 }
438 
439 
440 static lzma_ret
lzma_encode(void * coder,lzma_mf * restrict mf,uint8_t * restrict out,size_t * restrict out_pos,size_t out_size)441 lzma_encode(void *coder, lzma_mf *restrict mf,
442 		uint8_t *restrict out, size_t *restrict out_pos,
443 		size_t out_size)
444 {
445 	// Plain LZMA has no support for sync-flushing.
446 	if (unlikely(mf->action == LZMA_SYNC_FLUSH))
447 		return LZMA_OPTIONS_ERROR;
448 
449 	return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX);
450 }
451 
452 
453 static lzma_ret
lzma_lzma_set_out_limit(void * coder_ptr,uint64_t * uncomp_size,uint64_t out_limit)454 lzma_lzma_set_out_limit(
455 		void *coder_ptr, uint64_t *uncomp_size, uint64_t out_limit)
456 {
457 	// Minimum output size is 5 bytes but that cannot hold any output
458 	// so we use 6 bytes.
459 	if (out_limit < 6)
460 		return LZMA_BUF_ERROR;
461 
462 	lzma_lzma1_encoder *coder = coder_ptr;
463 	coder->out_limit = out_limit;
464 	coder->uncomp_size_ptr = uncomp_size;
465 	coder->use_eopm = false;
466 	return LZMA_OK;
467 }
468 
469 
470 ////////////////////
471 // Initialization //
472 ////////////////////
473 
474 static bool
is_options_valid(const lzma_options_lzma * options)475 is_options_valid(const lzma_options_lzma *options)
476 {
477 	// Validate some of the options. LZ encoder validates nice_len too
478 	// but we need a valid value here earlier.
479 	return is_lclppb_valid(options)
480 			&& options->nice_len >= MATCH_LEN_MIN
481 			&& options->nice_len <= MATCH_LEN_MAX
482 			&& (options->mode == LZMA_MODE_FAST
483 				|| options->mode == LZMA_MODE_NORMAL);
484 }
485 
486 
487 static void
set_lz_options(lzma_lz_options * lz_options,const lzma_options_lzma * options)488 set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options)
489 {
490 	// LZ encoder initialization does the validation for these so we
491 	// don't need to validate here.
492 	lz_options->before_size = OPTS;
493 	lz_options->dict_size = options->dict_size;
494 	lz_options->after_size = LOOP_INPUT_MAX;
495 	lz_options->match_len_max = MATCH_LEN_MAX;
496 	lz_options->nice_len = my_max(mf_get_hash_bytes(options->mf),
497 				options->nice_len);
498 	lz_options->match_finder = options->mf;
499 	lz_options->depth = options->depth;
500 	lz_options->preset_dict = options->preset_dict;
501 	lz_options->preset_dict_size = options->preset_dict_size;
502 	return;
503 }
504 
505 
506 static void
length_encoder_reset(lzma_length_encoder * lencoder,const uint32_t num_pos_states,const bool fast_mode)507 length_encoder_reset(lzma_length_encoder *lencoder,
508 		const uint32_t num_pos_states, const bool fast_mode)
509 {
510 	bit_reset(lencoder->choice);
511 	bit_reset(lencoder->choice2);
512 
513 	for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
514 		bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS);
515 		bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS);
516 	}
517 
518 	bittree_reset(lencoder->high, LEN_HIGH_BITS);
519 
520 	if (!fast_mode)
521 		for (uint32_t pos_state = 0; pos_state < num_pos_states;
522 				++pos_state)
523 			length_update_prices(lencoder, pos_state);
524 
525 	return;
526 }
527 
528 
529 extern lzma_ret
lzma_lzma_encoder_reset(lzma_lzma1_encoder * coder,const lzma_options_lzma * options)530 lzma_lzma_encoder_reset(lzma_lzma1_encoder *coder,
531 		const lzma_options_lzma *options)
532 {
533 	if (!is_options_valid(options))
534 		return LZMA_OPTIONS_ERROR;
535 
536 	coder->pos_mask = (1U << options->pb) - 1;
537 	coder->literal_context_bits = options->lc;
538 	coder->literal_pos_mask = (1U << options->lp) - 1;
539 
540 	// Range coder
541 	rc_reset(&coder->rc);
542 
543 	// State
544 	coder->state = STATE_LIT_LIT;
545 	for (size_t i = 0; i < REPS; ++i)
546 		coder->reps[i] = 0;
547 
548 	literal_init(coder->literal, options->lc, options->lp);
549 
550 	// Bit encoders
551 	for (size_t i = 0; i < STATES; ++i) {
552 		for (size_t j = 0; j <= coder->pos_mask; ++j) {
553 			bit_reset(coder->is_match[i][j]);
554 			bit_reset(coder->is_rep0_long[i][j]);
555 		}
556 
557 		bit_reset(coder->is_rep[i]);
558 		bit_reset(coder->is_rep0[i]);
559 		bit_reset(coder->is_rep1[i]);
560 		bit_reset(coder->is_rep2[i]);
561 	}
562 
563 	for (size_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i)
564 		bit_reset(coder->dist_special[i]);
565 
566 	// Bit tree encoders
567 	for (size_t i = 0; i < DIST_STATES; ++i)
568 		bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS);
569 
570 	bittree_reset(coder->dist_align, ALIGN_BITS);
571 
572 	// Length encoders
573 	length_encoder_reset(&coder->match_len_encoder,
574 			1U << options->pb, coder->fast_mode);
575 
576 	length_encoder_reset(&coder->rep_len_encoder,
577 			1U << options->pb, coder->fast_mode);
578 
579 	// Price counts are incremented every time appropriate probabilities
580 	// are changed. price counts are set to zero when the price tables
581 	// are updated, which is done when the appropriate price counts have
582 	// big enough value, and lzma_mf.read_ahead == 0 which happens at
583 	// least every OPTS (a few thousand) possible price count increments.
584 	//
585 	// By resetting price counts to UINT32_MAX / 2, we make sure that the
586 	// price tables will be initialized before they will be used (since
587 	// the value is definitely big enough), and that it is OK to increment
588 	// price counts without risk of integer overflow (since UINT32_MAX / 2
589 	// is small enough). The current code doesn't increment price counts
590 	// before initializing price tables, but it maybe done in future if
591 	// we add support for saving the state between LZMA2 chunks.
592 	coder->match_price_count = UINT32_MAX / 2;
593 	coder->align_price_count = UINT32_MAX / 2;
594 
595 	coder->opts_end_index = 0;
596 	coder->opts_current_index = 0;
597 
598 	return LZMA_OK;
599 }
600 
601 
602 extern lzma_ret
lzma_lzma_encoder_create(void ** coder_ptr,const lzma_allocator * allocator,lzma_vli id,const lzma_options_lzma * options,lzma_lz_options * lz_options)603 lzma_lzma_encoder_create(void **coder_ptr, const lzma_allocator *allocator,
604 		lzma_vli id, const lzma_options_lzma *options,
605 		lzma_lz_options *lz_options)
606 {
607 	assert(id == LZMA_FILTER_LZMA1 || id == LZMA_FILTER_LZMA1EXT
608 			|| id == LZMA_FILTER_LZMA2);
609 
610 	// Allocate lzma_lzma1_encoder if it wasn't already allocated.
611 	if (*coder_ptr == NULL) {
612 		*coder_ptr = lzma_alloc(sizeof(lzma_lzma1_encoder), allocator);
613 		if (*coder_ptr == NULL)
614 			return LZMA_MEM_ERROR;
615 	}
616 
617 	lzma_lzma1_encoder *coder = *coder_ptr;
618 
619 	// Set compression mode. Note that we haven't validated the options
620 	// yet. Invalid options will get rejected by lzma_lzma_encoder_reset()
621 	// call at the end of this function.
622 	switch (options->mode) {
623 		case LZMA_MODE_FAST:
624 			coder->fast_mode = true;
625 			break;
626 
627 		case LZMA_MODE_NORMAL: {
628 			coder->fast_mode = false;
629 
630 			// Set dist_table_size.
631 			// Round the dictionary size up to next 2^n.
632 			//
633 			// Currently the maximum encoder dictionary size
634 			// is 1.5 GiB due to lz_encoder.c and here we need
635 			// to be below 2 GiB to make the rounded up value
636 			// fit in an uint32_t and avoid an infinite while-loop
637 			// (and undefined behavior due to a too large shift).
638 			// So do the same check as in LZ encoder,
639 			// limiting to 1.5 GiB.
640 			if (options->dict_size > (UINT32_C(1) << 30)
641 					+ (UINT32_C(1) << 29))
642 				return LZMA_OPTIONS_ERROR;
643 
644 			uint32_t log_size = 0;
645 			while ((UINT32_C(1) << log_size) < options->dict_size)
646 				++log_size;
647 
648 			coder->dist_table_size = log_size * 2;
649 
650 			// Length encoders' price table size
651 			const uint32_t nice_len = my_max(
652 					mf_get_hash_bytes(options->mf),
653 					options->nice_len);
654 
655 			coder->match_len_encoder.table_size
656 					= nice_len + 1 - MATCH_LEN_MIN;
657 			coder->rep_len_encoder.table_size
658 					= nice_len + 1 - MATCH_LEN_MIN;
659 			break;
660 		}
661 
662 		default:
663 			return LZMA_OPTIONS_ERROR;
664 	}
665 
666 	// We don't need to write the first byte as literal if there is
667 	// a non-empty preset dictionary. encode_init() wouldn't even work
668 	// if there is a non-empty preset dictionary, because encode_init()
669 	// assumes that position is zero and previous byte is also zero.
670 	coder->is_initialized = options->preset_dict != NULL
671 			&& options->preset_dict_size > 0;
672 	coder->is_flushed = false;
673 	coder->uncomp_size = 0;
674 	coder->uncomp_size_ptr = NULL;
675 
676 	// Output size limiting is disabled by default.
677 	coder->out_limit = 0;
678 
679 	// Determine if end marker is wanted:
680 	//   - It is never used with LZMA2.
681 	//   - It is always used with LZMA_FILTER_LZMA1 (unless
682 	//     lzma_lzma_set_out_limit() is called later).
683 	//   - LZMA_FILTER_LZMA1EXT has a flag for it in the options.
684 	coder->use_eopm = (id == LZMA_FILTER_LZMA1);
685 	if (id == LZMA_FILTER_LZMA1EXT) {
686 		// Check if unsupported flags are present.
687 		if (options->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM)
688 			return LZMA_OPTIONS_ERROR;
689 
690 		coder->use_eopm = (options->ext_flags
691 				& LZMA_LZMA1EXT_ALLOW_EOPM) != 0;
692 
693 		// TODO? As long as there are no filters that change the size
694 		// of the data, it is enough to look at lzma_stream.total_in
695 		// after encoding has been finished to know the uncompressed
696 		// size of the LZMA1 stream. But in the future there could be
697 		// filters that change the size of the data and then total_in
698 		// doesn't work as the LZMA1 stream size might be different
699 		// due to another filter in the chain. The problem is simple
700 		// to solve: Add another flag to ext_flags and then set
701 		// coder->uncomp_size_ptr to the address stored in
702 		// lzma_options_lzma.reserved_ptr2 (or _ptr1).
703 	}
704 
705 	set_lz_options(lz_options, options);
706 
707 	return lzma_lzma_encoder_reset(coder, options);
708 }
709 
710 
711 static lzma_ret
lzma_encoder_init(lzma_lz_encoder * lz,const lzma_allocator * allocator,lzma_vli id,const void * options,lzma_lz_options * lz_options)712 lzma_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator,
713 		lzma_vli id, const void *options, lzma_lz_options *lz_options)
714 {
715 	lz->code = &lzma_encode;
716 	lz->set_out_limit = &lzma_lzma_set_out_limit;
717 	return lzma_lzma_encoder_create(
718 			&lz->coder, allocator, id, options, lz_options);
719 }
720 
721 
722 extern lzma_ret
lzma_lzma_encoder_init(lzma_next_coder * next,const lzma_allocator * allocator,const lzma_filter_info * filters)723 lzma_lzma_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
724 		const lzma_filter_info *filters)
725 {
726 	return lzma_lz_encoder_init(
727 			next, allocator, filters, &lzma_encoder_init);
728 }
729 
730 
731 extern uint64_t
lzma_lzma_encoder_memusage(const void * options)732 lzma_lzma_encoder_memusage(const void *options)
733 {
734 	if (!is_options_valid(options))
735 		return UINT64_MAX;
736 
737 	lzma_lz_options lz_options;
738 	set_lz_options(&lz_options, options);
739 
740 	const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options);
741 	if (lz_memusage == UINT64_MAX)
742 		return UINT64_MAX;
743 
744 	return (uint64_t)(sizeof(lzma_lzma1_encoder)) + lz_memusage;
745 }
746 
747 
748 extern bool
lzma_lzma_lclppb_encode(const lzma_options_lzma * options,uint8_t * byte)749 lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte)
750 {
751 	if (!is_lclppb_valid(options))
752 		return true;
753 
754 	*byte = (options->pb * 5 + options->lp) * 9 + options->lc;
755 	assert(*byte <= (4 * 5 + 4) * 9 + 8);
756 
757 	return false;
758 }
759 
760 
761 #ifdef HAVE_ENCODER_LZMA1
762 extern lzma_ret
lzma_lzma_props_encode(const void * options,uint8_t * out)763 lzma_lzma_props_encode(const void *options, uint8_t *out)
764 {
765 	if (options == NULL)
766 		return LZMA_PROG_ERROR;
767 
768 	const lzma_options_lzma *const opt = options;
769 
770 	if (lzma_lzma_lclppb_encode(opt, out))
771 		return LZMA_PROG_ERROR;
772 
773 	write32le(out + 1, opt->dict_size);
774 
775 	return LZMA_OK;
776 }
777 #endif
778 
779 
780 extern LZMA_API(lzma_bool)
lzma_mode_is_supported(lzma_mode mode)781 lzma_mode_is_supported(lzma_mode mode)
782 {
783 	return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL;
784 }
785