1 /*
2  * Decompression code for LZ77+Huffman. This encoding is used by
3  * Microsoft in various file formats and protocols including SMB3.
4  *
5  * See MS-XCA.
6  *
7  * Initial code from Samba re-licensed with Samuel's permission.
8  * Copyright (C) Samuel Cabrero 2017
9  *
10  * Glib-ification, extra error-checking and WS integration
11  * Copyright (C) Aurélien Aptel 2019
12  *
13  * Wireshark - Network traffic analyzer
14  * By Gerald Combs <gerald@wireshark.org>
15  * Copyright 1998 Gerald Combs
16  *
17  * SPDX-License-Identifier: GPL-2.0-or-later
18  */
19 
20 #include <glib.h>
21 #include <stdlib.h> /* qsort */
22 #include <epan/exceptions.h>
23 #include <epan/tvbuff.h>
24 #include <epan/wmem_scopes.h>
25 
26 #define MAX_INPUT_SIZE (16*1024*1024) /* 16MB */
27 
28 #define TREE_SIZE 1024
29 #define ENCODED_TREE_SIZE 256
30 #define SYMBOL_INFO_SIZE (2*ENCODED_TREE_SIZE)
31 
32 struct input {
33 	tvbuff_t *tvb;
34 	int offset;
35 	gsize size;
36 };
37 
38 /**
39  * Represents a node in a Huffman prefix code tree
40  */
41 struct prefix_code_node {
42 	/* Stores the symbol encoded by this node in the prefix code tree */
43 	guint16 symbol;
44 
45 	/* Indicates whether this node is a leaf in the tree */
46 	guint8 leaf;
47 
48 	/*
49 	 * Points to the node's two children. Values are indexes in
50 	 * the tree node array. The value -1 is used to indicate that
51 	 * a particular child does not exist
52 	 */
53 	gint16 child[2];
54 };
55 
56 /**
57  * Represent information about a Huffman-encoded symbol
58  */
59 struct prefix_code_symbol {
60 	/* Stores the symbol */
61 	guint16 symbol;
62 
63 	/* Stores the symbol’s Huffman prefix code length */
64 	guint16 length;
65 };
66 
67 /**
68  * Represent a byte array as a bit string from which individual bits can
69  * be read
70  */
71 struct bitstring {
72 	/* The byte array */
73 	const struct input *input;
74 
75 	/* The index in source from which the next set of bits will be pulled
76          * when the bits in mask have been consumed */
77 	guint32 bitstring_index;
78 
79 	/* Stores the next bits to be consumed in the bit string */
80 	guint32 mask;
81 
82 	/* Stores the number of bits in mask that remain to be consumed */
83 	gint32 bits;
84 };
85 
86 struct hf_tree {
87 	struct prefix_code_node *root;
88 	struct prefix_code_node nodes[TREE_SIZE];
89 };
90 
is_node_valid(struct hf_tree * tree,struct prefix_code_node * node)91 static gboolean is_node_valid(struct hf_tree *tree, struct prefix_code_node *node)
92 {
93         return (node && node >= tree->nodes && node < tree->nodes + TREE_SIZE);
94 }
95 
96 /**
97  * Links a symbol's prefix_code_node into its correct position in a Huffman
98  * prefix code tree
99  */
prefix_code_tree_add_leaf(struct hf_tree * tree,guint32 leaf_index,guint32 mask,guint32 bits,guint32 * out_index)100 static int prefix_code_tree_add_leaf(struct hf_tree *tree,
101 				     guint32 leaf_index,
102 				     guint32 mask,
103 				     guint32 bits,
104 				     guint32 *out_index)
105 {
106 	struct prefix_code_node *node = &tree->nodes[0];
107 	guint32 i = leaf_index + 1;
108 	guint32 child_index;
109 
110 	if (leaf_index >= TREE_SIZE)
111 		return -1;
112 
113 	while (bits > 1) {
114 		bits = bits - 1;
115 		child_index = (mask >> bits) & 1;
116 		if (node->child[child_index] < 0) {
117 			if (i >= TREE_SIZE)
118 				return -1;
119 			node->child[child_index] = i;
120 			tree->nodes[i].leaf = FALSE;
121 			i = i + 1;
122 		}
123 		node = tree->nodes + node->child[child_index];
124 		if (!is_node_valid(tree, node))
125 			return -1;
126 	}
127 
128 	node->child[mask & 1] = leaf_index;
129 
130 	*out_index = i;
131 	return 0;
132 }
133 
134 /**
135  * Determines the sort order of one prefix_code_symbol relative to another
136  */
compare_symbols(const void * ve1,const void * ve2)137 static int compare_symbols(const void *ve1, const void *ve2)
138 {
139 	const struct prefix_code_symbol *e1 = (const struct prefix_code_symbol *)ve1;
140 	const struct prefix_code_symbol *e2 = (const struct prefix_code_symbol *)ve2;
141 
142 	if (e1->length < e2->length)
143 		return -1;
144 	else if (e1->length > e2->length)
145 		return 1;
146 	else if (e1->symbol < e2->symbol)
147 		return -1;
148 	else if (e1->symbol > e2->symbol)
149 		return 1;
150 	else
151 		return 0;
152 }
153 
154 /**
155  * Rebuilds the Huffman prefix code tree that will be used to decode symbols
156  * during decompression
157  */
PrefixCodeTreeRebuild(struct hf_tree * tree,const struct input * input)158 static int PrefixCodeTreeRebuild( struct hf_tree *tree,
159 				 const struct input *input)
160 {
161 	struct prefix_code_symbol symbolInfo[SYMBOL_INFO_SIZE];
162 	guint32 i, j, mask, bits;
163 	int rc;
164 
165 	for (i = 0; i < TREE_SIZE; i++) {
166 		tree->nodes[i].symbol = 0;
167 		tree->nodes[i].leaf = FALSE;
168 		tree->nodes[i].child[0] = -1;
169 		tree->nodes[i].child[1] = -1;
170 	}
171 
172 	if (input->size < ENCODED_TREE_SIZE)
173 		return -1;
174 
175 	for (i = 0; i < ENCODED_TREE_SIZE; i++) {
176 		symbolInfo[2*i].symbol = 2*i;
177 		symbolInfo[2*i].length = tvb_get_guint8(input->tvb, input->offset+i) & 15;
178 		symbolInfo[2*i+1].symbol = 2*i+1;
179 		symbolInfo[2*i+1].length = tvb_get_guint8(input->tvb, input->offset+i) >> 4;
180 	}
181 
182 	qsort(symbolInfo, SYMBOL_INFO_SIZE, sizeof(symbolInfo[0]), compare_symbols);
183 
184 	i = 0;
185 	while (i < SYMBOL_INFO_SIZE && symbolInfo[i].length == 0) {
186 		i = i + 1;
187 	}
188 
189 	mask = 0;
190 	bits = 1;
191 
192 	tree->root = &tree->nodes[0];
193 	tree->root->leaf = FALSE;
194 
195 	j = 1;
196 	for (; i < 512; i++) {
197 		tree->nodes[j].symbol = symbolInfo[i].symbol;
198 		tree->nodes[j].leaf = TRUE;
199 		mask <<= symbolInfo[i].length - bits;
200 		bits = symbolInfo[i].length;
201 		rc = prefix_code_tree_add_leaf(tree, j, mask, bits, &j);
202 		if (rc)
203 			return rc;
204 		mask += 1;
205 	}
206 
207 	return 0;
208 }
209 
210 /**
211  * Initializes a bitstream data structure
212  */
bitstring_init(struct bitstring * bstr,const struct input * input,guint32 bitstring_index)213 static void bitstring_init(struct bitstring *bstr,
214 			   const struct input *input,
215 			   guint32 bitstring_index)
216 {
217 	bstr->mask = tvb_get_letohs(input->tvb, input->offset+bitstring_index);
218 	bstr->mask <<= sizeof(bstr->mask) * 8 - 16;
219 	bitstring_index += 2;
220 
221 	bstr->mask += tvb_get_letohs(input->tvb, input->offset+bitstring_index);
222 	bitstring_index += 2;
223 
224 	bstr->bits = 32;
225 	bstr->input = input;
226 	bstr->bitstring_index = bitstring_index;
227 }
228 
229 /**
230  * Returns the next n bits from the front of a bit string.
231  */
bitstring_lookup(struct bitstring * bstr,guint32 n)232 static guint32 bitstring_lookup(struct bitstring *bstr, guint32 n)
233 {
234 	if (n == 0 || bstr->bits < 0 || n > (guint32)bstr->bits) {
235 		return 0;
236 	}
237 	return bstr->mask >> (sizeof(bstr->mask) * 8 - n);
238 }
239 
240 /**
241  * Advances the bit string's cursor by n bits.
242  */
bitstring_skip(struct bitstring * bstr,guint32 n)243 static void bitstring_skip(struct bitstring *bstr, guint32 n)
244 {
245 	bstr->mask = bstr->mask << n;
246 	bstr->bits = bstr->bits - n;
247 
248 	if (bstr->bits < 16) {
249 		bstr->mask += tvb_get_letohs(bstr->input->tvb,
250 					     bstr->input->offset + bstr->bitstring_index)
251 			<< (16 - bstr->bits);
252 		bstr->bitstring_index = bstr->bitstring_index + 2;
253 		bstr->bits = bstr->bits + 16;
254 	}
255 }
256 
257 /**
258  * Returns the symbol encoded by the next prefix code in a bit string.
259  */
prefix_code_tree_decode_symbol(struct hf_tree * tree,struct bitstring * bstr,guint32 * out_symbol)260 static int prefix_code_tree_decode_symbol(struct hf_tree *tree,
261 					  struct bitstring *bstr,
262 					  guint32 *out_symbol)
263 {
264 	guint32 bit;
265 	struct prefix_code_node *node = tree->root;
266 
267 	do {
268 		bit = bitstring_lookup(bstr, 1);
269 		bitstring_skip(bstr, 1);
270 		node = tree->nodes + node->child[bit];
271 		if (!is_node_valid(tree, node))
272 			return -1;
273 	} while (node->leaf == FALSE);
274 
275 	*out_symbol = node->symbol;
276 	return 0;
277 }
278 
do_uncompress(struct input * input,wmem_array_t * obuf)279 static gboolean do_uncompress(struct input *input,
280 			      wmem_array_t *obuf)
281 {
282 	guint32 symbol;
283 	guint32 length;
284 	gint32 match_offset;
285 	int rc;
286 	struct hf_tree tree = {0};
287 	struct bitstring bstr = {0};
288 
289 	if (!input->tvb)
290 		return FALSE;
291 
292 	if (input->size > MAX_INPUT_SIZE)
293 		return FALSE;
294 
295 	rc = PrefixCodeTreeRebuild(&tree, input);
296 	if (rc)
297 		return FALSE;
298 
299 	bitstring_init(&bstr, input, ENCODED_TREE_SIZE);
300 
301 	while (1) {
302 		rc = prefix_code_tree_decode_symbol(&tree, &bstr, &symbol);
303 		if (rc < 0)
304 			return FALSE;
305 
306 		if (symbol < 256) {
307 			guint8 v = symbol & 0xFF;
308 			wmem_array_append_one(obuf, v);
309 		} else {
310 			if (symbol == 256) {
311 				/* EOF symbol */
312 				return bstr.bitstring_index == bstr.input->size;
313 			}
314 			symbol = symbol - 256;
315 			length = symbol & 0xF;
316 			symbol = symbol >> 4;
317 
318 			match_offset = (1U << symbol) + bitstring_lookup(&bstr, symbol);
319 			match_offset *= -1;
320 
321 			if (length == 15) {
322 				if (bstr.bitstring_index >= bstr.input->size)
323 					return FALSE;
324 				length = tvb_get_guint8(bstr.input->tvb,
325 							bstr.input->offset+bstr.bitstring_index) + 15;
326 				bstr.bitstring_index += 1;
327 
328 				if (length == 270) {
329 					if (bstr.bitstring_index+1 >= bstr.input->size)
330 						return FALSE;
331 					length = tvb_get_letohs(bstr.input->tvb, bstr.input->offset+bstr.bitstring_index);
332 					bstr.bitstring_index += 2;
333 				}
334 			}
335 
336 			bitstring_skip(&bstr, symbol);
337 
338 			length += 3;
339 			do {
340 				guint8 byte;
341 				guint elem_count = wmem_array_get_count(obuf)+match_offset;
342 
343 				if (wmem_array_try_index(obuf, elem_count, &byte))
344 					return FALSE;
345 				wmem_array_append_one(obuf, byte);
346 				length--;
347 			} while (length != 0);
348 		}
349 	}
350 	return TRUE;
351 }
352 
353 tvbuff_t *
tvb_uncompress_lz77huff(tvbuff_t * tvb,const int offset,int input_size)354 tvb_uncompress_lz77huff(tvbuff_t *tvb,
355 			const int offset,
356 			int input_size)
357 {
358 	volatile gboolean ok;
359 	wmem_allocator_t *pool;
360 	wmem_array_t *obuf;
361 	tvbuff_t *out;
362 	struct input input = {
363 			      .tvb = tvb,
364 			      .offset = offset,
365 			      .size = input_size
366 	};
367 
368 	pool = wmem_allocator_new(WMEM_ALLOCATOR_SIMPLE);
369 	obuf = wmem_array_sized_new(pool, 1, input_size*2);
370 
371 	TRY {
372 		ok = do_uncompress(&input, obuf);
373 	} CATCH_ALL {
374 		ok = FALSE;
375 	}
376 	ENDTRY;
377 
378 	if (ok) {
379 		/*
380 		 * Cannot pass a tvb free callback that frees the wmem
381 		 * pool, so we make an extra copy that uses bare
382 		 * pointers. This could be optimized if tvb API had a
383 		 * free pool callback of some sort.
384 		 */
385 		guint size = wmem_array_get_count(obuf);
386 		guint8 *p = (guint8 *)g_malloc(size);
387 		memcpy(p, wmem_array_get_raw(obuf), size);
388 		out = tvb_new_real_data(p, size, size);
389 		tvb_set_free_cb(out, g_free);
390 	} else {
391 		out = NULL;
392 	}
393 
394 	wmem_destroy_allocator(pool);
395 
396 	return out;
397 }
398 
399 tvbuff_t *
tvb_child_uncompress_lz77huff(tvbuff_t * parent,tvbuff_t * tvb,const int offset,int in_size)400 tvb_child_uncompress_lz77huff(tvbuff_t *parent, tvbuff_t *tvb, const int offset, int in_size)
401 {
402 	tvbuff_t *new_tvb = tvb_uncompress_lz77huff(tvb, offset, in_size);
403 	if (new_tvb)
404 		tvb_set_child_real_data_tvbuff(parent, new_tvb);
405 	return new_tvb;
406 }
407 
408 /*
409  * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
410  *
411  * Local variables:
412  * c-basic-offset: 8
413  * tab-width: 8
414  * indent-tabs-mode: t
415  * End:
416  *
417  * vi: set shiftwidth=8 tabstop=8 noexpandtab:
418  * :indentSize=8:tabSize=8:noTabs=false:
419  */
420