1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       index_decoder.c
4 /// \brief      Decodes the Index field
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "index_decoder.h"
14 #include "check.h"
15 
16 
17 typedef struct {
18 	enum {
19 		SEQ_INDICATOR,
20 		SEQ_COUNT,
21 		SEQ_MEMUSAGE,
22 		SEQ_UNPADDED,
23 		SEQ_UNCOMPRESSED,
24 		SEQ_PADDING_INIT,
25 		SEQ_PADDING,
26 		SEQ_CRC32,
27 	} sequence;
28 
29 	/// Memory usage limit
30 	uint64_t memlimit;
31 
32 	/// Target Index
33 	lzma_index *index;
34 
35 	/// Pointer give by the application, which is set after
36 	/// successful decoding.
37 	lzma_index **index_ptr;
38 
39 	/// Number of Records left to decode.
40 	lzma_vli count;
41 
42 	/// The most recent Unpadded Size field
43 	lzma_vli unpadded_size;
44 
45 	/// The most recent Uncompressed Size field
46 	lzma_vli uncompressed_size;
47 
48 	/// Position in integers
49 	size_t pos;
50 
51 	/// CRC32 of the List of Records field
52 	uint32_t crc32;
53 } lzma_index_coder;
54 
55 
56 static lzma_ret
57 index_decode(void *coder_ptr, const lzma_allocator *allocator,
58 		const uint8_t *restrict in, size_t *restrict in_pos,
59 		size_t in_size,
60 		uint8_t *restrict out lzma_attribute((__unused__)),
61 		size_t *restrict out_pos lzma_attribute((__unused__)),
62 		size_t out_size lzma_attribute((__unused__)),
63 		lzma_action action lzma_attribute((__unused__)))
64 {
65 	lzma_index_coder *coder = coder_ptr;
66 
67 	// Similar optimization as in index_encoder.c
68 	const size_t in_start = *in_pos;
69 	lzma_ret ret = LZMA_OK;
70 
71 	while (*in_pos < in_size)
72 	switch (coder->sequence) {
73 	case SEQ_INDICATOR:
74 		// Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
75 		// LZMA_FORMAT_ERROR, because a typical usage case for Index
76 		// decoder is when parsing the Stream backwards. If seeking
77 		// backward from the Stream Footer gives us something that
78 		// doesn't begin with Index Indicator, the file is considered
79 		// corrupt, not "programming error" or "unrecognized file
80 		// format". One could argue that the application should
81 		// verify the Index Indicator before trying to decode the
82 		// Index, but well, I suppose it is simpler this way.
83 		if (in[(*in_pos)++] != INDEX_INDICATOR)
84 			return LZMA_DATA_ERROR;
85 
86 		coder->sequence = SEQ_COUNT;
87 		break;
88 
89 	case SEQ_COUNT:
90 		ret = lzma_vli_decode(&coder->count, &coder->pos,
91 				in, in_pos, in_size);
92 		if (ret != LZMA_STREAM_END)
93 			goto out;
94 
95 		coder->pos = 0;
96 		coder->sequence = SEQ_MEMUSAGE;
97 
98 	// Fall through
99 
100 	case SEQ_MEMUSAGE:
101 		if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
102 			ret = LZMA_MEMLIMIT_ERROR;
103 			goto out;
104 		}
105 
106 		// Tell the Index handling code how many Records this
107 		// Index has to allow it to allocate memory more efficiently.
108 		lzma_index_prealloc(coder->index, coder->count);
109 
110 		ret = LZMA_OK;
111 		coder->sequence = coder->count == 0
112 				? SEQ_PADDING_INIT : SEQ_UNPADDED;
113 		break;
114 
115 	case SEQ_UNPADDED:
116 	case SEQ_UNCOMPRESSED: {
117 		lzma_vli *size = coder->sequence == SEQ_UNPADDED
118 				? &coder->unpadded_size
119 				: &coder->uncompressed_size;
120 
121 		ret = lzma_vli_decode(size, &coder->pos,
122 				in, in_pos, in_size);
123 		if (ret != LZMA_STREAM_END)
124 			goto out;
125 
126 		ret = LZMA_OK;
127 		coder->pos = 0;
128 
129 		if (coder->sequence == SEQ_UNPADDED) {
130 			// Validate that encoded Unpadded Size isn't too small
131 			// or too big.
132 			if (coder->unpadded_size < UNPADDED_SIZE_MIN
133 					|| coder->unpadded_size
134 						> UNPADDED_SIZE_MAX)
135 				return LZMA_DATA_ERROR;
136 
137 			coder->sequence = SEQ_UNCOMPRESSED;
138 		} else {
139 			// Add the decoded Record to the Index.
140 			return_if_error(lzma_index_append(
141 					coder->index, allocator,
142 					coder->unpadded_size,
143 					coder->uncompressed_size));
144 
145 			// Check if this was the last Record.
146 			coder->sequence = --coder->count == 0
147 					? SEQ_PADDING_INIT
148 					: SEQ_UNPADDED;
149 		}
150 
151 		break;
152 	}
153 
154 	case SEQ_PADDING_INIT:
155 		coder->pos = lzma_index_padding_size(coder->index);
156 		coder->sequence = SEQ_PADDING;
157 
158 	// Fall through
159 
160 	case SEQ_PADDING:
161 		if (coder->pos > 0) {
162 			--coder->pos;
163 			if (in[(*in_pos)++] != 0x00)
164 				return LZMA_DATA_ERROR;
165 
166 			break;
167 		}
168 
169 		// Finish the CRC32 calculation.
170 		coder->crc32 = lzma_crc32(in + in_start,
171 				*in_pos - in_start, coder->crc32);
172 
173 		coder->sequence = SEQ_CRC32;
174 
175 	// Fall through
176 
177 	case SEQ_CRC32:
178 		do {
179 			if (*in_pos == in_size)
180 				return LZMA_OK;
181 
182 			if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
183 					!= in[(*in_pos)++]) {
184 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
185 				return LZMA_DATA_ERROR;
186 #endif
187 			}
188 
189 		} while (++coder->pos < 4);
190 
191 		// Decoding was successful, now we can let the application
192 		// see the decoded Index.
193 		*coder->index_ptr = coder->index;
194 
195 		// Make index NULL so we don't free it unintentionally.
196 		coder->index = NULL;
197 
198 		return LZMA_STREAM_END;
199 
200 	default:
201 		assert(0);
202 		return LZMA_PROG_ERROR;
203 	}
204 
205 out:
206 	// Update the CRC32.
207 	//
208 	// Avoid null pointer + 0 (undefined behavior) in "in + in_start".
209 	// In such a case we had no input and thus in_used == 0.
210 	{
211 		const size_t in_used = *in_pos - in_start;
212 		if (in_used > 0)
213 			coder->crc32 = lzma_crc32(in + in_start,
214 					in_used, coder->crc32);
215 	}
216 
217 	return ret;
218 }
219 
220 
221 static void
222 index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
223 {
224 	lzma_index_coder *coder = coder_ptr;
225 	lzma_index_end(coder->index, allocator);
226 	lzma_free(coder, allocator);
227 	return;
228 }
229 
230 
231 static lzma_ret
232 index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
233 		uint64_t *old_memlimit, uint64_t new_memlimit)
234 {
235 	lzma_index_coder *coder = coder_ptr;
236 
237 	*memusage = lzma_index_memusage(1, coder->count);
238 	*old_memlimit = coder->memlimit;
239 
240 	if (new_memlimit != 0) {
241 		if (new_memlimit < *memusage)
242 			return LZMA_MEMLIMIT_ERROR;
243 
244 		coder->memlimit = new_memlimit;
245 	}
246 
247 	return LZMA_OK;
248 }
249 
250 
251 static lzma_ret
252 index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
253 		lzma_index **i, uint64_t memlimit)
254 {
255 	// Remember the pointer given by the application. We will set it
256 	// to point to the decoded Index only if decoding is successful.
257 	// Before that, keep it NULL so that applications can always safely
258 	// pass it to lzma_index_end() no matter did decoding succeed or not.
259 	coder->index_ptr = i;
260 	*i = NULL;
261 
262 	// We always allocate a new lzma_index.
263 	coder->index = lzma_index_init(allocator);
264 	if (coder->index == NULL)
265 		return LZMA_MEM_ERROR;
266 
267 	// Initialize the rest.
268 	coder->sequence = SEQ_INDICATOR;
269 	coder->memlimit = my_max(1, memlimit);
270 	coder->count = 0; // Needs to be initialized due to _memconfig().
271 	coder->pos = 0;
272 	coder->crc32 = 0;
273 
274 	return LZMA_OK;
275 }
276 
277 
278 extern lzma_ret
279 lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
280 		lzma_index **i, uint64_t memlimit)
281 {
282 	lzma_next_coder_init(&lzma_index_decoder_init, next, allocator);
283 
284 	if (i == NULL)
285 		return LZMA_PROG_ERROR;
286 
287 	lzma_index_coder *coder = next->coder;
288 	if (coder == NULL) {
289 		coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
290 		if (coder == NULL)
291 			return LZMA_MEM_ERROR;
292 
293 		next->coder = coder;
294 		next->code = &index_decode;
295 		next->end = &index_decoder_end;
296 		next->memconfig = &index_decoder_memconfig;
297 		coder->index = NULL;
298 	} else {
299 		lzma_index_end(coder->index, allocator);
300 	}
301 
302 	return index_decoder_reset(coder, allocator, i, memlimit);
303 }
304 
305 
306 extern LZMA_API(lzma_ret)
307 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
308 {
309 	lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit);
310 
311 	strm->internal->supported_actions[LZMA_RUN] = true;
312 	strm->internal->supported_actions[LZMA_FINISH] = true;
313 
314 	return LZMA_OK;
315 }
316 
317 
318 extern LZMA_API(lzma_ret)
319 lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
320 		const lzma_allocator *allocator,
321 		const uint8_t *in, size_t *in_pos, size_t in_size)
322 {
323 	// Sanity checks
324 	if (i == NULL || memlimit == NULL
325 			|| in == NULL || in_pos == NULL || *in_pos > in_size)
326 		return LZMA_PROG_ERROR;
327 
328 	// Initialize the decoder.
329 	lzma_index_coder coder;
330 	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
331 
332 	// Store the input start position so that we can restore it in case
333 	// of an error.
334 	const size_t in_start = *in_pos;
335 
336 	// Do the actual decoding.
337 	lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
338 			NULL, NULL, 0, LZMA_RUN);
339 
340 	if (ret == LZMA_STREAM_END) {
341 		ret = LZMA_OK;
342 	} else {
343 		// Something went wrong, free the Index structure and restore
344 		// the input position.
345 		lzma_index_end(coder.index, allocator);
346 		*in_pos = in_start;
347 
348 		if (ret == LZMA_OK) {
349 			// The input is truncated or otherwise corrupt.
350 			// Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
351 			// like lzma_vli_decode() does in single-call mode.
352 			ret = LZMA_DATA_ERROR;
353 
354 		} else if (ret == LZMA_MEMLIMIT_ERROR) {
355 			// Tell the caller how much memory would have
356 			// been needed.
357 			*memlimit = lzma_index_memusage(1, coder.count);
358 		}
359 	}
360 
361 	return ret;
362 }
363