1 /*
2 * Decompression code for LZ77+Huffman. This encoding is used by
3 * Microsoft in various file formats and protocols including SMB3.
4 *
5 * See MS-XCA.
6 *
7 * Initial code from Samba re-licensed with Samuel's permission.
8 * Copyright (C) Samuel Cabrero 2017
9 *
10 * Glib-ification, extra error-checking and WS integration
11 * Copyright (C) Aurélien Aptel 2019
12 *
13 * Wireshark - Network traffic analyzer
14 * By Gerald Combs <gerald@wireshark.org>
15 * Copyright 1998 Gerald Combs
16 *
17 * SPDX-License-Identifier: GPL-2.0-or-later
18 */
19
20 #include <glib.h>
21 #include <stdlib.h> /* qsort */
22 #include <epan/exceptions.h>
23 #include <epan/tvbuff.h>
24 #include <epan/wmem_scopes.h>
25
26 #define MAX_INPUT_SIZE (16*1024*1024) /* 16MB */
27
28 #define TREE_SIZE 1024
29 #define ENCODED_TREE_SIZE 256
30 #define SYMBOL_INFO_SIZE (2*ENCODED_TREE_SIZE)
31
32 struct input {
33 tvbuff_t *tvb;
34 int offset;
35 gsize size;
36 };
37
38 /**
39 * Represents a node in a Huffman prefix code tree
40 */
41 struct prefix_code_node {
42 /* Stores the symbol encoded by this node in the prefix code tree */
43 guint16 symbol;
44
45 /* Indicates whether this node is a leaf in the tree */
46 guint8 leaf;
47
48 /*
49 * Points to the node's two children. Values are indexes in
50 * the tree node array. The value -1 is used to indicate that
51 * a particular child does not exist
52 */
53 gint16 child[2];
54 };
55
56 /**
57 * Represent information about a Huffman-encoded symbol
58 */
59 struct prefix_code_symbol {
60 /* Stores the symbol */
61 guint16 symbol;
62
63 /* Stores the symbol’s Huffman prefix code length */
64 guint16 length;
65 };
66
67 /**
68 * Represent a byte array as a bit string from which individual bits can
69 * be read
70 */
71 struct bitstring {
72 /* The byte array */
73 const struct input *input;
74
75 /* The index in source from which the next set of bits will be pulled
76 * when the bits in mask have been consumed */
77 guint32 bitstring_index;
78
79 /* Stores the next bits to be consumed in the bit string */
80 guint32 mask;
81
82 /* Stores the number of bits in mask that remain to be consumed */
83 gint32 bits;
84 };
85
86 struct hf_tree {
87 struct prefix_code_node *root;
88 struct prefix_code_node nodes[TREE_SIZE];
89 };
90
is_node_valid(struct hf_tree * tree,struct prefix_code_node * node)91 static gboolean is_node_valid(struct hf_tree *tree, struct prefix_code_node *node)
92 {
93 return (node && node >= tree->nodes && node < tree->nodes + TREE_SIZE);
94 }
95
96 /**
97 * Links a symbol's prefix_code_node into its correct position in a Huffman
98 * prefix code tree
99 */
prefix_code_tree_add_leaf(struct hf_tree * tree,guint32 leaf_index,guint32 mask,guint32 bits,guint32 * out_index)100 static int prefix_code_tree_add_leaf(struct hf_tree *tree,
101 guint32 leaf_index,
102 guint32 mask,
103 guint32 bits,
104 guint32 *out_index)
105 {
106 struct prefix_code_node *node = &tree->nodes[0];
107 guint32 i = leaf_index + 1;
108 guint32 child_index;
109
110 if (leaf_index >= TREE_SIZE)
111 return -1;
112
113 while (bits > 1) {
114 bits = bits - 1;
115 child_index = (mask >> bits) & 1;
116 if (node->child[child_index] < 0) {
117 if (i >= TREE_SIZE)
118 return -1;
119 node->child[child_index] = i;
120 tree->nodes[i].leaf = FALSE;
121 i = i + 1;
122 }
123 node = tree->nodes + node->child[child_index];
124 if (!is_node_valid(tree, node))
125 return -1;
126 }
127
128 node->child[mask & 1] = leaf_index;
129
130 *out_index = i;
131 return 0;
132 }
133
134 /**
135 * Determines the sort order of one prefix_code_symbol relative to another
136 */
compare_symbols(const void * ve1,const void * ve2)137 static int compare_symbols(const void *ve1, const void *ve2)
138 {
139 const struct prefix_code_symbol *e1 = (const struct prefix_code_symbol *)ve1;
140 const struct prefix_code_symbol *e2 = (const struct prefix_code_symbol *)ve2;
141
142 if (e1->length < e2->length)
143 return -1;
144 else if (e1->length > e2->length)
145 return 1;
146 else if (e1->symbol < e2->symbol)
147 return -1;
148 else if (e1->symbol > e2->symbol)
149 return 1;
150 else
151 return 0;
152 }
153
154 /**
155 * Rebuilds the Huffman prefix code tree that will be used to decode symbols
156 * during decompression
157 */
PrefixCodeTreeRebuild(struct hf_tree * tree,const struct input * input)158 static int PrefixCodeTreeRebuild( struct hf_tree *tree,
159 const struct input *input)
160 {
161 struct prefix_code_symbol symbolInfo[SYMBOL_INFO_SIZE];
162 guint32 i, j, mask, bits;
163 int rc;
164
165 for (i = 0; i < TREE_SIZE; i++) {
166 tree->nodes[i].symbol = 0;
167 tree->nodes[i].leaf = FALSE;
168 tree->nodes[i].child[0] = -1;
169 tree->nodes[i].child[1] = -1;
170 }
171
172 if (input->size < ENCODED_TREE_SIZE)
173 return -1;
174
175 for (i = 0; i < ENCODED_TREE_SIZE; i++) {
176 symbolInfo[2*i].symbol = 2*i;
177 symbolInfo[2*i].length = tvb_get_guint8(input->tvb, input->offset+i) & 15;
178 symbolInfo[2*i+1].symbol = 2*i+1;
179 symbolInfo[2*i+1].length = tvb_get_guint8(input->tvb, input->offset+i) >> 4;
180 }
181
182 qsort(symbolInfo, SYMBOL_INFO_SIZE, sizeof(symbolInfo[0]), compare_symbols);
183
184 i = 0;
185 while (i < SYMBOL_INFO_SIZE && symbolInfo[i].length == 0) {
186 i = i + 1;
187 }
188
189 mask = 0;
190 bits = 1;
191
192 tree->root = &tree->nodes[0];
193 tree->root->leaf = FALSE;
194
195 j = 1;
196 for (; i < 512; i++) {
197 tree->nodes[j].symbol = symbolInfo[i].symbol;
198 tree->nodes[j].leaf = TRUE;
199 mask <<= symbolInfo[i].length - bits;
200 bits = symbolInfo[i].length;
201 rc = prefix_code_tree_add_leaf(tree, j, mask, bits, &j);
202 if (rc)
203 return rc;
204 mask += 1;
205 }
206
207 return 0;
208 }
209
210 /**
211 * Initializes a bitstream data structure
212 */
bitstring_init(struct bitstring * bstr,const struct input * input,guint32 bitstring_index)213 static void bitstring_init(struct bitstring *bstr,
214 const struct input *input,
215 guint32 bitstring_index)
216 {
217 bstr->mask = tvb_get_letohs(input->tvb, input->offset+bitstring_index);
218 bstr->mask <<= sizeof(bstr->mask) * 8 - 16;
219 bitstring_index += 2;
220
221 bstr->mask += tvb_get_letohs(input->tvb, input->offset+bitstring_index);
222 bitstring_index += 2;
223
224 bstr->bits = 32;
225 bstr->input = input;
226 bstr->bitstring_index = bitstring_index;
227 }
228
229 /**
230 * Returns the next n bits from the front of a bit string.
231 */
bitstring_lookup(struct bitstring * bstr,guint32 n)232 static guint32 bitstring_lookup(struct bitstring *bstr, guint32 n)
233 {
234 if (n == 0 || bstr->bits < 0 || n > (guint32)bstr->bits) {
235 return 0;
236 }
237 return bstr->mask >> (sizeof(bstr->mask) * 8 - n);
238 }
239
240 /**
241 * Advances the bit string's cursor by n bits.
242 */
bitstring_skip(struct bitstring * bstr,guint32 n)243 static void bitstring_skip(struct bitstring *bstr, guint32 n)
244 {
245 bstr->mask = bstr->mask << n;
246 bstr->bits = bstr->bits - n;
247
248 if (bstr->bits < 16) {
249 bstr->mask += tvb_get_letohs(bstr->input->tvb,
250 bstr->input->offset + bstr->bitstring_index)
251 << (16 - bstr->bits);
252 bstr->bitstring_index = bstr->bitstring_index + 2;
253 bstr->bits = bstr->bits + 16;
254 }
255 }
256
257 /**
258 * Returns the symbol encoded by the next prefix code in a bit string.
259 */
prefix_code_tree_decode_symbol(struct hf_tree * tree,struct bitstring * bstr,guint32 * out_symbol)260 static int prefix_code_tree_decode_symbol(struct hf_tree *tree,
261 struct bitstring *bstr,
262 guint32 *out_symbol)
263 {
264 guint32 bit;
265 struct prefix_code_node *node = tree->root;
266
267 do {
268 bit = bitstring_lookup(bstr, 1);
269 bitstring_skip(bstr, 1);
270 node = tree->nodes + node->child[bit];
271 if (!is_node_valid(tree, node))
272 return -1;
273 } while (node->leaf == FALSE);
274
275 *out_symbol = node->symbol;
276 return 0;
277 }
278
do_uncompress(struct input * input,wmem_array_t * obuf)279 static gboolean do_uncompress(struct input *input,
280 wmem_array_t *obuf)
281 {
282 guint32 symbol;
283 guint32 length;
284 gint32 match_offset;
285 int rc;
286 struct hf_tree tree = {0};
287 struct bitstring bstr = {0};
288
289 if (!input->tvb)
290 return FALSE;
291
292 if (input->size > MAX_INPUT_SIZE)
293 return FALSE;
294
295 rc = PrefixCodeTreeRebuild(&tree, input);
296 if (rc)
297 return FALSE;
298
299 bitstring_init(&bstr, input, ENCODED_TREE_SIZE);
300
301 while (1) {
302 rc = prefix_code_tree_decode_symbol(&tree, &bstr, &symbol);
303 if (rc < 0)
304 return FALSE;
305
306 if (symbol < 256) {
307 guint8 v = symbol & 0xFF;
308 wmem_array_append_one(obuf, v);
309 } else {
310 if (symbol == 256) {
311 /* EOF symbol */
312 return bstr.bitstring_index == bstr.input->size;
313 }
314 symbol = symbol - 256;
315 length = symbol & 0xF;
316 symbol = symbol >> 4;
317
318 match_offset = (1U << symbol) + bitstring_lookup(&bstr, symbol);
319 match_offset *= -1;
320
321 if (length == 15) {
322 if (bstr.bitstring_index >= bstr.input->size)
323 return FALSE;
324 length = tvb_get_guint8(bstr.input->tvb,
325 bstr.input->offset+bstr.bitstring_index) + 15;
326 bstr.bitstring_index += 1;
327
328 if (length == 270) {
329 if (bstr.bitstring_index+1 >= bstr.input->size)
330 return FALSE;
331 length = tvb_get_letohs(bstr.input->tvb, bstr.input->offset+bstr.bitstring_index);
332 bstr.bitstring_index += 2;
333 }
334 }
335
336 bitstring_skip(&bstr, symbol);
337
338 length += 3;
339 do {
340 guint8 byte;
341 guint elem_count = wmem_array_get_count(obuf)+match_offset;
342
343 if (wmem_array_try_index(obuf, elem_count, &byte))
344 return FALSE;
345 wmem_array_append_one(obuf, byte);
346 length--;
347 } while (length != 0);
348 }
349 }
350 return TRUE;
351 }
352
353 tvbuff_t *
tvb_uncompress_lz77huff(tvbuff_t * tvb,const int offset,int input_size)354 tvb_uncompress_lz77huff(tvbuff_t *tvb,
355 const int offset,
356 int input_size)
357 {
358 volatile gboolean ok;
359 wmem_allocator_t *pool;
360 wmem_array_t *obuf;
361 tvbuff_t *out;
362 struct input input = {
363 .tvb = tvb,
364 .offset = offset,
365 .size = input_size
366 };
367
368 pool = wmem_allocator_new(WMEM_ALLOCATOR_SIMPLE);
369 obuf = wmem_array_sized_new(pool, 1, input_size*2);
370
371 TRY {
372 ok = do_uncompress(&input, obuf);
373 } CATCH_ALL {
374 ok = FALSE;
375 }
376 ENDTRY;
377
378 if (ok) {
379 /*
380 * Cannot pass a tvb free callback that frees the wmem
381 * pool, so we make an extra copy that uses bare
382 * pointers. This could be optimized if tvb API had a
383 * free pool callback of some sort.
384 */
385 guint size = wmem_array_get_count(obuf);
386 guint8 *p = (guint8 *)g_malloc(size);
387 memcpy(p, wmem_array_get_raw(obuf), size);
388 out = tvb_new_real_data(p, size, size);
389 tvb_set_free_cb(out, g_free);
390 } else {
391 out = NULL;
392 }
393
394 wmem_destroy_allocator(pool);
395
396 return out;
397 }
398
399 tvbuff_t *
tvb_child_uncompress_lz77huff(tvbuff_t * parent,tvbuff_t * tvb,const int offset,int in_size)400 tvb_child_uncompress_lz77huff(tvbuff_t *parent, tvbuff_t *tvb, const int offset, int in_size)
401 {
402 tvbuff_t *new_tvb = tvb_uncompress_lz77huff(tvb, offset, in_size);
403 if (new_tvb)
404 tvb_set_child_real_data_tvbuff(parent, new_tvb);
405 return new_tvb;
406 }
407
408 /*
409 * Editor modelines - https://www.wireshark.org/tools/modelines.html
410 *
411 * Local variables:
412 * c-basic-offset: 8
413 * tab-width: 8
414 * indent-tabs-mode: t
415 * End:
416 *
417 * vi: set shiftwidth=8 tabstop=8 noexpandtab:
418 * :indentSize=8:tabSize=8:noTabs=false:
419 */
420