1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       block_decoder.c
4 /// \brief      Decodes .xz Blocks
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "block_decoder.h"
14 #include "filter_decoder.h"
15 #include "check.h"
16 
17 
18 typedef struct {
19 	enum {
20 		SEQ_CODE,
21 		SEQ_PADDING,
22 		SEQ_CHECK,
23 	} sequence;
24 
25 	/// The filters in the chain; initialized with lzma_raw_decoder_init().
26 	lzma_next_coder next;
27 
28 	/// Decoding options; we also write Compressed Size and Uncompressed
29 	/// Size back to this structure when the decoding has been finished.
30 	lzma_block *block;
31 
32 	/// Compressed Size calculated while decoding
33 	lzma_vli compressed_size;
34 
35 	/// Uncompressed Size calculated while decoding
36 	lzma_vli uncompressed_size;
37 
38 	/// Maximum allowed Compressed Size; this takes into account the
39 	/// size of the Block Header and Check fields when Compressed Size
40 	/// is unknown.
41 	lzma_vli compressed_limit;
42 
43 	/// Maximum allowed Uncompressed Size.
44 	lzma_vli uncompressed_limit;
45 
46 	/// Position when reading the Check field
47 	size_t check_pos;
48 
49 	/// Check of the uncompressed data
50 	lzma_check_state check;
51 
52 	/// True if the integrity check won't be calculated and verified.
53 	bool ignore_check;
54 } lzma_block_coder;
55 
56 
57 static inline bool
58 is_size_valid(lzma_vli size, lzma_vli reference)
59 {
60 	return reference == LZMA_VLI_UNKNOWN || reference == size;
61 }
62 
63 
64 static lzma_ret
65 block_decode(void *coder_ptr, const lzma_allocator *allocator,
66 		const uint8_t *restrict in, size_t *restrict in_pos,
67 		size_t in_size, uint8_t *restrict out,
68 		size_t *restrict out_pos, size_t out_size, lzma_action action)
69 {
70 	lzma_block_coder *coder = coder_ptr;
71 
72 	switch (coder->sequence) {
73 	case SEQ_CODE: {
74 		const size_t in_start = *in_pos;
75 		const size_t out_start = *out_pos;
76 
77 		// Limit the amount of input and output space that we give
78 		// to the raw decoder based on the information we have
79 		// (or don't have) from Block Header.
80 		const size_t in_stop = *in_pos + (size_t)my_min(
81 			in_size - *in_pos,
82 			coder->compressed_limit - coder->compressed_size);
83 		const size_t out_stop = *out_pos + (size_t)my_min(
84 			out_size - *out_pos,
85 			coder->uncompressed_limit - coder->uncompressed_size);
86 
87 		const lzma_ret ret = coder->next.code(coder->next.coder,
88 				allocator, in, in_pos, in_stop,
89 				out, out_pos, out_stop, action);
90 
91 		const size_t in_used = *in_pos - in_start;
92 		const size_t out_used = *out_pos - out_start;
93 
94 		// Because we have limited the input and output sizes,
95 		// we know that these cannot grow too big or overflow.
96 		coder->compressed_size += in_used;
97 		coder->uncompressed_size += out_used;
98 
99 		if (ret == LZMA_OK) {
100 			const bool comp_done = coder->compressed_size
101 					== coder->block->compressed_size;
102 			const bool uncomp_done = coder->uncompressed_size
103 					== coder->block->uncompressed_size;
104 
105 			// If both input and output amounts match the sizes
106 			// in Block Header but we still got LZMA_OK instead
107 			// of LZMA_STREAM_END, the file is broken.
108 			if (comp_done && uncomp_done)
109 				return LZMA_DATA_ERROR;
110 
111 			// If the decoder has consumed all the input that it
112 			// needs but it still couldn't fill the output buffer
113 			// or return LZMA_STREAM_END, the file is broken.
114 			if (comp_done && *out_pos < out_size)
115 				return LZMA_DATA_ERROR;
116 
117 			// If the decoder has produced all the output but
118 			// it still didn't return LZMA_STREAM_END or consume
119 			// more input (for example, detecting an end of
120 			// payload marker may need more input but produce
121 			// no output) the file is broken.
122 			if (uncomp_done && *in_pos < in_size)
123 				return LZMA_DATA_ERROR;
124 		}
125 
126 		// Don't waste time updating the integrity check if it will be
127 		// ignored. Also skip it if no new output was produced. This
128 		// avoids null pointer + 0 (undefined behavior) when out == 0.
129 		if (!coder->ignore_check && out_used > 0)
130 			lzma_check_update(&coder->check, coder->block->check,
131 					out + out_start, out_used);
132 
133 		if (ret != LZMA_STREAM_END)
134 			return ret;
135 
136 		// Compressed and Uncompressed Sizes are now at their final
137 		// values. Verify that they match the values given to us.
138 		if (!is_size_valid(coder->compressed_size,
139 					coder->block->compressed_size)
140 				|| !is_size_valid(coder->uncompressed_size,
141 					coder->block->uncompressed_size))
142 			return LZMA_DATA_ERROR;
143 
144 		// Copy the values into coder->block. The caller
145 		// may use this information to construct Index.
146 		coder->block->compressed_size = coder->compressed_size;
147 		coder->block->uncompressed_size = coder->uncompressed_size;
148 
149 		coder->sequence = SEQ_PADDING;
150 	}
151 
152 	// Fall through
153 
154 	case SEQ_PADDING:
155 		// Compressed Data is padded to a multiple of four bytes.
156 		while (coder->compressed_size & 3) {
157 			if (*in_pos >= in_size)
158 				return LZMA_OK;
159 
160 			// We use compressed_size here just get the Padding
161 			// right. The actual Compressed Size was stored to
162 			// coder->block already, and won't be modified by
163 			// us anymore.
164 			++coder->compressed_size;
165 
166 			if (in[(*in_pos)++] != 0x00)
167 				return LZMA_DATA_ERROR;
168 		}
169 
170 		if (coder->block->check == LZMA_CHECK_NONE)
171 			return LZMA_STREAM_END;
172 
173 		if (!coder->ignore_check)
174 			lzma_check_finish(&coder->check, coder->block->check);
175 
176 		coder->sequence = SEQ_CHECK;
177 
178 	// Fall through
179 
180 	case SEQ_CHECK: {
181 		const size_t check_size = lzma_check_size(coder->block->check);
182 		lzma_bufcpy(in, in_pos, in_size, coder->block->raw_check,
183 				&coder->check_pos, check_size);
184 		if (coder->check_pos < check_size)
185 			return LZMA_OK;
186 
187 		// Validate the Check only if we support it.
188 		// coder->check.buffer may be uninitialized
189 		// when the Check ID is not supported.
190 		if (!coder->ignore_check
191 				&& lzma_check_is_supported(coder->block->check)
192 				&& memcmp(coder->block->raw_check,
193 					coder->check.buffer.u8,
194 					check_size) != 0)
195 			return LZMA_DATA_ERROR;
196 
197 		return LZMA_STREAM_END;
198 	}
199 	}
200 
201 	return LZMA_PROG_ERROR;
202 }
203 
204 
205 static void
206 block_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
207 {
208 	lzma_block_coder *coder = coder_ptr;
209 	lzma_next_end(&coder->next, allocator);
210 	lzma_free(coder, allocator);
211 	return;
212 }
213 
214 
215 extern lzma_ret
216 lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
217 		lzma_block *block)
218 {
219 	lzma_next_coder_init(&lzma_block_decoder_init, next, allocator);
220 
221 	// Validate the options. lzma_block_unpadded_size() does that for us
222 	// except for Uncompressed Size and filters. Filters are validated
223 	// by the raw decoder.
224 	if (lzma_block_unpadded_size(block) == 0
225 			|| !lzma_vli_is_valid(block->uncompressed_size))
226 		return LZMA_PROG_ERROR;
227 
228 	// Allocate *next->coder if needed.
229 	lzma_block_coder *coder = next->coder;
230 	if (coder == NULL) {
231 		coder = lzma_alloc(sizeof(lzma_block_coder), allocator);
232 		if (coder == NULL)
233 			return LZMA_MEM_ERROR;
234 
235 		next->coder = coder;
236 		next->code = &block_decode;
237 		next->end = &block_decoder_end;
238 		coder->next = LZMA_NEXT_CODER_INIT;
239 	}
240 
241 	// Basic initializations
242 	coder->sequence = SEQ_CODE;
243 	coder->block = block;
244 	coder->compressed_size = 0;
245 	coder->uncompressed_size = 0;
246 
247 	// If Compressed Size is not known, we calculate the maximum allowed
248 	// value so that encoded size of the Block (including Block Padding)
249 	// is still a valid VLI and a multiple of four.
250 	coder->compressed_limit
251 			= block->compressed_size == LZMA_VLI_UNKNOWN
252 				? (LZMA_VLI_MAX & ~LZMA_VLI_C(3))
253 					- block->header_size
254 					- lzma_check_size(block->check)
255 				: block->compressed_size;
256 
257 	// With Uncompressed Size this is simpler. If Block Header lacks
258 	// the size info, then LZMA_VLI_MAX is the maximum possible
259 	// Uncompressed Size.
260 	coder->uncompressed_limit
261 			= block->uncompressed_size == LZMA_VLI_UNKNOWN
262 				? LZMA_VLI_MAX
263 				: block->uncompressed_size;
264 
265 	// Initialize the check. It's caller's problem if the Check ID is not
266 	// supported, and the Block decoder cannot verify the Check field.
267 	// Caller can test lzma_check_is_supported(block->check).
268 	coder->check_pos = 0;
269 	lzma_check_init(&coder->check, block->check);
270 
271 	coder->ignore_check = block->version >= 1
272 			? block->ignore_check : false;
273 
274 	// Initialize the filter chain.
275 	return lzma_raw_decoder_init(&coder->next, allocator,
276 			block->filters);
277 }
278 
279 
280 extern LZMA_API(lzma_ret)
281 lzma_block_decoder(lzma_stream *strm, lzma_block *block)
282 {
283 	lzma_next_strm_init(lzma_block_decoder_init, strm, block);
284 
285 	strm->internal->supported_actions[LZMA_RUN] = true;
286 	strm->internal->supported_actions[LZMA_FINISH] = true;
287 
288 	return LZMA_OK;
289 }
290