1522e010bSKonstantin Komarov /* SPDX-License-Identifier: GPL-2.0-or-later */
2522e010bSKonstantin Komarov /*
3522e010bSKonstantin Komarov * decompress_common.h - Code shared by the XPRESS and LZX decompressors
4522e010bSKonstantin Komarov *
5522e010bSKonstantin Komarov * Copyright (C) 2015 Eric Biggers
6522e010bSKonstantin Komarov */
7522e010bSKonstantin Komarov
8b6ba8103SKari Argillander #ifndef _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H
9b6ba8103SKari Argillander #define _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H
10b6ba8103SKari Argillander
11522e010bSKonstantin Komarov #include <linux/string.h>
12522e010bSKonstantin Komarov #include <linux/compiler.h>
13522e010bSKonstantin Komarov #include <linux/types.h>
14522e010bSKonstantin Komarov #include <linux/slab.h>
15*5f60d5f6SAl Viro #include <linux/unaligned.h>
16522e010bSKonstantin Komarov
17522e010bSKonstantin Komarov
18522e010bSKonstantin Komarov /* "Force inline" macro (not required, but helpful for performance) */
19522e010bSKonstantin Komarov #define forceinline __always_inline
20522e010bSKonstantin Komarov
21522e010bSKonstantin Komarov /* Enable whole-word match copying on selected architectures */
22522e010bSKonstantin Komarov #if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED)
23522e010bSKonstantin Komarov # define FAST_UNALIGNED_ACCESS
24522e010bSKonstantin Komarov #endif
25522e010bSKonstantin Komarov
26522e010bSKonstantin Komarov /* Size of a machine word */
27522e010bSKonstantin Komarov #define WORDBYTES (sizeof(size_t))
28522e010bSKonstantin Komarov
29522e010bSKonstantin Komarov static forceinline void
copy_unaligned_word(const void * src,void * dst)30522e010bSKonstantin Komarov copy_unaligned_word(const void *src, void *dst)
31522e010bSKonstantin Komarov {
32522e010bSKonstantin Komarov put_unaligned(get_unaligned((const size_t *)src), (size_t *)dst);
33522e010bSKonstantin Komarov }
34522e010bSKonstantin Komarov
35522e010bSKonstantin Komarov
36522e010bSKonstantin Komarov /* Generate a "word" with platform-dependent size whose bytes all contain the
37522e010bSKonstantin Komarov * value 'b'.
38522e010bSKonstantin Komarov */
repeat_byte(u8 b)39522e010bSKonstantin Komarov static forceinline size_t repeat_byte(u8 b)
40522e010bSKonstantin Komarov {
41522e010bSKonstantin Komarov size_t v;
42522e010bSKonstantin Komarov
43522e010bSKonstantin Komarov v = b;
44522e010bSKonstantin Komarov v |= v << 8;
45522e010bSKonstantin Komarov v |= v << 16;
46522e010bSKonstantin Komarov v |= v << ((WORDBYTES == 8) ? 32 : 0);
47522e010bSKonstantin Komarov return v;
48522e010bSKonstantin Komarov }
49522e010bSKonstantin Komarov
50522e010bSKonstantin Komarov /* Structure that encapsulates a block of in-memory data being interpreted as a
51522e010bSKonstantin Komarov * stream of bits, optionally with interwoven literal bytes. Bits are assumed
52522e010bSKonstantin Komarov * to be stored in little endian 16-bit coding units, with the bits ordered high
53522e010bSKonstantin Komarov * to low.
54522e010bSKonstantin Komarov */
55522e010bSKonstantin Komarov struct input_bitstream {
56522e010bSKonstantin Komarov
57522e010bSKonstantin Komarov /* Bits that have been read from the input buffer. The bits are
58522e010bSKonstantin Komarov * left-justified; the next bit is always bit 31.
59522e010bSKonstantin Komarov */
60522e010bSKonstantin Komarov u32 bitbuf;
61522e010bSKonstantin Komarov
62522e010bSKonstantin Komarov /* Number of bits currently held in @bitbuf. */
63522e010bSKonstantin Komarov u32 bitsleft;
64522e010bSKonstantin Komarov
65522e010bSKonstantin Komarov /* Pointer to the next byte to be retrieved from the input buffer. */
66522e010bSKonstantin Komarov const u8 *next;
67522e010bSKonstantin Komarov
68522e010bSKonstantin Komarov /* Pointer to just past the end of the input buffer. */
69522e010bSKonstantin Komarov const u8 *end;
70522e010bSKonstantin Komarov };
71522e010bSKonstantin Komarov
72522e010bSKonstantin Komarov /* Initialize a bitstream to read from the specified input buffer. */
init_input_bitstream(struct input_bitstream * is,const void * buffer,u32 size)73522e010bSKonstantin Komarov static forceinline void init_input_bitstream(struct input_bitstream *is,
74522e010bSKonstantin Komarov const void *buffer, u32 size)
75522e010bSKonstantin Komarov {
76522e010bSKonstantin Komarov is->bitbuf = 0;
77522e010bSKonstantin Komarov is->bitsleft = 0;
78522e010bSKonstantin Komarov is->next = buffer;
79522e010bSKonstantin Komarov is->end = is->next + size;
80522e010bSKonstantin Komarov }
81522e010bSKonstantin Komarov
82522e010bSKonstantin Komarov /* Ensure the bit buffer variable for the bitstream contains at least @num_bits
83522e010bSKonstantin Komarov * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
84522e010bSKonstantin Komarov * may be called on the bitstream to peek or remove up to @num_bits bits. Note
85522e010bSKonstantin Komarov * that @num_bits must be <= 16.
86522e010bSKonstantin Komarov */
bitstream_ensure_bits(struct input_bitstream * is,u32 num_bits)87522e010bSKonstantin Komarov static forceinline void bitstream_ensure_bits(struct input_bitstream *is,
88522e010bSKonstantin Komarov u32 num_bits)
89522e010bSKonstantin Komarov {
90522e010bSKonstantin Komarov if (is->bitsleft < num_bits) {
91522e010bSKonstantin Komarov if (is->end - is->next >= 2) {
92522e010bSKonstantin Komarov is->bitbuf |= (u32)get_unaligned_le16(is->next)
93522e010bSKonstantin Komarov << (16 - is->bitsleft);
94522e010bSKonstantin Komarov is->next += 2;
95522e010bSKonstantin Komarov }
96522e010bSKonstantin Komarov is->bitsleft += 16;
97522e010bSKonstantin Komarov }
98522e010bSKonstantin Komarov }
99522e010bSKonstantin Komarov
100522e010bSKonstantin Komarov /* Return the next @num_bits bits from the bitstream, without removing them.
101522e010bSKonstantin Komarov * There must be at least @num_bits remaining in the buffer variable, from a
102522e010bSKonstantin Komarov * previous call to bitstream_ensure_bits().
103522e010bSKonstantin Komarov */
104522e010bSKonstantin Komarov static forceinline u32
bitstream_peek_bits(const struct input_bitstream * is,const u32 num_bits)105522e010bSKonstantin Komarov bitstream_peek_bits(const struct input_bitstream *is, const u32 num_bits)
106522e010bSKonstantin Komarov {
107522e010bSKonstantin Komarov return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1);
108522e010bSKonstantin Komarov }
109522e010bSKonstantin Komarov
110522e010bSKonstantin Komarov /* Remove @num_bits from the bitstream. There must be at least @num_bits
111522e010bSKonstantin Komarov * remaining in the buffer variable, from a previous call to
112522e010bSKonstantin Komarov * bitstream_ensure_bits().
113522e010bSKonstantin Komarov */
114522e010bSKonstantin Komarov static forceinline void
bitstream_remove_bits(struct input_bitstream * is,u32 num_bits)115522e010bSKonstantin Komarov bitstream_remove_bits(struct input_bitstream *is, u32 num_bits)
116522e010bSKonstantin Komarov {
117522e010bSKonstantin Komarov is->bitbuf <<= num_bits;
118522e010bSKonstantin Komarov is->bitsleft -= num_bits;
119522e010bSKonstantin Komarov }
120522e010bSKonstantin Komarov
121522e010bSKonstantin Komarov /* Remove and return @num_bits bits from the bitstream. There must be at least
122522e010bSKonstantin Komarov * @num_bits remaining in the buffer variable, from a previous call to
123522e010bSKonstantin Komarov * bitstream_ensure_bits().
124522e010bSKonstantin Komarov */
125522e010bSKonstantin Komarov static forceinline u32
bitstream_pop_bits(struct input_bitstream * is,u32 num_bits)126522e010bSKonstantin Komarov bitstream_pop_bits(struct input_bitstream *is, u32 num_bits)
127522e010bSKonstantin Komarov {
128522e010bSKonstantin Komarov u32 bits = bitstream_peek_bits(is, num_bits);
129522e010bSKonstantin Komarov
130522e010bSKonstantin Komarov bitstream_remove_bits(is, num_bits);
131522e010bSKonstantin Komarov return bits;
132522e010bSKonstantin Komarov }
133522e010bSKonstantin Komarov
134522e010bSKonstantin Komarov /* Read and return the next @num_bits bits from the bitstream. */
135522e010bSKonstantin Komarov static forceinline u32
bitstream_read_bits(struct input_bitstream * is,u32 num_bits)136522e010bSKonstantin Komarov bitstream_read_bits(struct input_bitstream *is, u32 num_bits)
137522e010bSKonstantin Komarov {
138522e010bSKonstantin Komarov bitstream_ensure_bits(is, num_bits);
139522e010bSKonstantin Komarov return bitstream_pop_bits(is, num_bits);
140522e010bSKonstantin Komarov }
141522e010bSKonstantin Komarov
142522e010bSKonstantin Komarov /* Read and return the next literal byte embedded in the bitstream. */
143522e010bSKonstantin Komarov static forceinline u8
bitstream_read_byte(struct input_bitstream * is)144522e010bSKonstantin Komarov bitstream_read_byte(struct input_bitstream *is)
145522e010bSKonstantin Komarov {
146522e010bSKonstantin Komarov if (unlikely(is->end == is->next))
147522e010bSKonstantin Komarov return 0;
148522e010bSKonstantin Komarov return *is->next++;
149522e010bSKonstantin Komarov }
150522e010bSKonstantin Komarov
151522e010bSKonstantin Komarov /* Read and return the next 16-bit integer embedded in the bitstream. */
152522e010bSKonstantin Komarov static forceinline u16
bitstream_read_u16(struct input_bitstream * is)153522e010bSKonstantin Komarov bitstream_read_u16(struct input_bitstream *is)
154522e010bSKonstantin Komarov {
155522e010bSKonstantin Komarov u16 v;
156522e010bSKonstantin Komarov
157522e010bSKonstantin Komarov if (unlikely(is->end - is->next < 2))
158522e010bSKonstantin Komarov return 0;
159522e010bSKonstantin Komarov v = get_unaligned_le16(is->next);
160522e010bSKonstantin Komarov is->next += 2;
161522e010bSKonstantin Komarov return v;
162522e010bSKonstantin Komarov }
163522e010bSKonstantin Komarov
164522e010bSKonstantin Komarov /* Read and return the next 32-bit integer embedded in the bitstream. */
165522e010bSKonstantin Komarov static forceinline u32
bitstream_read_u32(struct input_bitstream * is)166522e010bSKonstantin Komarov bitstream_read_u32(struct input_bitstream *is)
167522e010bSKonstantin Komarov {
168522e010bSKonstantin Komarov u32 v;
169522e010bSKonstantin Komarov
170522e010bSKonstantin Komarov if (unlikely(is->end - is->next < 4))
171522e010bSKonstantin Komarov return 0;
172522e010bSKonstantin Komarov v = get_unaligned_le32(is->next);
173522e010bSKonstantin Komarov is->next += 4;
174522e010bSKonstantin Komarov return v;
175522e010bSKonstantin Komarov }
176522e010bSKonstantin Komarov
177522e010bSKonstantin Komarov /* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
178522e010bSKonstantin Komarov * Return either a pointer to the byte past the last written, or NULL if the
179522e010bSKonstantin Komarov * read overflows the input buffer.
180522e010bSKonstantin Komarov */
bitstream_read_bytes(struct input_bitstream * is,void * dst_buffer,size_t count)181522e010bSKonstantin Komarov static forceinline void *bitstream_read_bytes(struct input_bitstream *is,
182522e010bSKonstantin Komarov void *dst_buffer, size_t count)
183522e010bSKonstantin Komarov {
184522e010bSKonstantin Komarov if ((size_t)(is->end - is->next) < count)
185522e010bSKonstantin Komarov return NULL;
186522e010bSKonstantin Komarov memcpy(dst_buffer, is->next, count);
187522e010bSKonstantin Komarov is->next += count;
188522e010bSKonstantin Komarov return (u8 *)dst_buffer + count;
189522e010bSKonstantin Komarov }
190522e010bSKonstantin Komarov
191522e010bSKonstantin Komarov /* Align the input bitstream on a coding-unit boundary. */
bitstream_align(struct input_bitstream * is)192522e010bSKonstantin Komarov static forceinline void bitstream_align(struct input_bitstream *is)
193522e010bSKonstantin Komarov {
194522e010bSKonstantin Komarov is->bitsleft = 0;
195522e010bSKonstantin Komarov is->bitbuf = 0;
196522e010bSKonstantin Komarov }
197522e010bSKonstantin Komarov
198522e010bSKonstantin Komarov extern int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
199522e010bSKonstantin Komarov const u32 num_bits, const u8 lens[],
200522e010bSKonstantin Komarov const u32 max_codeword_len,
201522e010bSKonstantin Komarov u16 working_space[]);
202522e010bSKonstantin Komarov
203522e010bSKonstantin Komarov
204522e010bSKonstantin Komarov /* Reads and returns the next Huffman-encoded symbol from a bitstream. If the
205522e010bSKonstantin Komarov * input data is exhausted, the Huffman symbol is decoded as if the missing bits
206522e010bSKonstantin Komarov * are all zeroes.
207522e010bSKonstantin Komarov */
read_huffsym(struct input_bitstream * istream,const u16 decode_table[],u32 table_bits,u32 max_codeword_len)208522e010bSKonstantin Komarov static forceinline u32 read_huffsym(struct input_bitstream *istream,
209522e010bSKonstantin Komarov const u16 decode_table[],
210522e010bSKonstantin Komarov u32 table_bits,
211522e010bSKonstantin Komarov u32 max_codeword_len)
212522e010bSKonstantin Komarov {
213522e010bSKonstantin Komarov u32 entry;
214522e010bSKonstantin Komarov u32 key_bits;
215522e010bSKonstantin Komarov
216522e010bSKonstantin Komarov bitstream_ensure_bits(istream, max_codeword_len);
217522e010bSKonstantin Komarov
218522e010bSKonstantin Komarov /* Index the decode table by the next table_bits bits of the input. */
219522e010bSKonstantin Komarov key_bits = bitstream_peek_bits(istream, table_bits);
220522e010bSKonstantin Komarov entry = decode_table[key_bits];
221522e010bSKonstantin Komarov if (entry < 0xC000) {
222522e010bSKonstantin Komarov /* Fast case: The decode table directly provided the
223522e010bSKonstantin Komarov * symbol and codeword length. The low 11 bits are the
224522e010bSKonstantin Komarov * symbol, and the high 5 bits are the codeword length.
225522e010bSKonstantin Komarov */
226522e010bSKonstantin Komarov bitstream_remove_bits(istream, entry >> 11);
227522e010bSKonstantin Komarov return entry & 0x7FF;
228522e010bSKonstantin Komarov }
229522e010bSKonstantin Komarov /* Slow case: The codeword for the symbol is longer than
230522e010bSKonstantin Komarov * table_bits, so the symbol does not have an entry
231522e010bSKonstantin Komarov * directly in the first (1 << table_bits) entries of the
232522e010bSKonstantin Komarov * decode table. Traverse the appropriate binary tree
233522e010bSKonstantin Komarov * bit-by-bit to decode the symbol.
234522e010bSKonstantin Komarov */
235522e010bSKonstantin Komarov bitstream_remove_bits(istream, table_bits);
236522e010bSKonstantin Komarov do {
237522e010bSKonstantin Komarov key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1);
238522e010bSKonstantin Komarov } while ((entry = decode_table[key_bits]) >= 0xC000);
239522e010bSKonstantin Komarov return entry;
240522e010bSKonstantin Komarov }
241522e010bSKonstantin Komarov
242522e010bSKonstantin Komarov /*
243522e010bSKonstantin Komarov * Copy an LZ77 match at (dst - offset) to dst.
244522e010bSKonstantin Komarov *
245522e010bSKonstantin Komarov * The length and offset must be already validated --- that is, (dst - offset)
246522e010bSKonstantin Komarov * can't underrun the output buffer, and (dst + length) can't overrun the output
247522e010bSKonstantin Komarov * buffer. Also, the length cannot be 0.
248522e010bSKonstantin Komarov *
249522e010bSKonstantin Komarov * @bufend points to the byte past the end of the output buffer. This function
250522e010bSKonstantin Komarov * won't write any data beyond this position.
251522e010bSKonstantin Komarov *
252522e010bSKonstantin Komarov * Returns dst + length.
253522e010bSKonstantin Komarov */
lz_copy(u8 * dst,u32 length,u32 offset,const u8 * bufend,u32 min_length)254522e010bSKonstantin Komarov static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend,
255522e010bSKonstantin Komarov u32 min_length)
256522e010bSKonstantin Komarov {
257522e010bSKonstantin Komarov const u8 *src = dst - offset;
258522e010bSKonstantin Komarov
259522e010bSKonstantin Komarov /*
260522e010bSKonstantin Komarov * Try to copy one machine word at a time. On i386 and x86_64 this is
261522e010bSKonstantin Komarov * faster than copying one byte at a time, unless the data is
262522e010bSKonstantin Komarov * near-random and all the matches have very short lengths. Note that
263522e010bSKonstantin Komarov * since this requires unaligned memory accesses, it won't necessarily
264522e010bSKonstantin Komarov * be faster on every architecture.
265522e010bSKonstantin Komarov *
266522e010bSKonstantin Komarov * Also note that we might copy more than the length of the match. For
267522e010bSKonstantin Komarov * example, if a word is 8 bytes and the match is of length 5, then
268522e010bSKonstantin Komarov * we'll simply copy 8 bytes. This is okay as long as we don't write
269522e010bSKonstantin Komarov * beyond the end of the output buffer, hence the check for (bufend -
270522e010bSKonstantin Komarov * end >= WORDBYTES - 1).
271522e010bSKonstantin Komarov */
272522e010bSKonstantin Komarov #ifdef FAST_UNALIGNED_ACCESS
273522e010bSKonstantin Komarov u8 * const end = dst + length;
274522e010bSKonstantin Komarov
275522e010bSKonstantin Komarov if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) {
276522e010bSKonstantin Komarov
277522e010bSKonstantin Komarov if (offset >= WORDBYTES) {
278522e010bSKonstantin Komarov /* The source and destination words don't overlap. */
279522e010bSKonstantin Komarov
280522e010bSKonstantin Komarov /* To improve branch prediction, one iteration of this
281522e010bSKonstantin Komarov * loop is unrolled. Most matches are short and will
282522e010bSKonstantin Komarov * fail the first check. But if that check passes, then
283522e010bSKonstantin Komarov * it becomes increasing likely that the match is long
284522e010bSKonstantin Komarov * and we'll need to continue copying.
285522e010bSKonstantin Komarov */
286522e010bSKonstantin Komarov
287522e010bSKonstantin Komarov copy_unaligned_word(src, dst);
288522e010bSKonstantin Komarov src += WORDBYTES;
289522e010bSKonstantin Komarov dst += WORDBYTES;
290522e010bSKonstantin Komarov
291522e010bSKonstantin Komarov if (dst < end) {
292522e010bSKonstantin Komarov do {
293522e010bSKonstantin Komarov copy_unaligned_word(src, dst);
294522e010bSKonstantin Komarov src += WORDBYTES;
295522e010bSKonstantin Komarov dst += WORDBYTES;
296522e010bSKonstantin Komarov } while (dst < end);
297522e010bSKonstantin Komarov }
298522e010bSKonstantin Komarov return end;
299522e010bSKonstantin Komarov } else if (offset == 1) {
300522e010bSKonstantin Komarov
301522e010bSKonstantin Komarov /* Offset 1 matches are equivalent to run-length
302522e010bSKonstantin Komarov * encoding of the previous byte. This case is common
303522e010bSKonstantin Komarov * if the data contains many repeated bytes.
304522e010bSKonstantin Komarov */
305522e010bSKonstantin Komarov size_t v = repeat_byte(*(dst - 1));
306522e010bSKonstantin Komarov
307522e010bSKonstantin Komarov do {
308522e010bSKonstantin Komarov put_unaligned(v, (size_t *)dst);
309522e010bSKonstantin Komarov src += WORDBYTES;
310522e010bSKonstantin Komarov dst += WORDBYTES;
311522e010bSKonstantin Komarov } while (dst < end);
312522e010bSKonstantin Komarov return end;
313522e010bSKonstantin Komarov }
314522e010bSKonstantin Komarov /*
315522e010bSKonstantin Komarov * We don't bother with special cases for other 'offset <
316522e010bSKonstantin Komarov * WORDBYTES', which are usually rarer than 'offset == 1'. Extra
317522e010bSKonstantin Komarov * checks will just slow things down. Actually, it's possible
318522e010bSKonstantin Komarov * to handle all the 'offset < WORDBYTES' cases using the same
319522e010bSKonstantin Komarov * code, but it still becomes more complicated doesn't seem any
320522e010bSKonstantin Komarov * faster overall; it definitely slows down the more common
321522e010bSKonstantin Komarov * 'offset == 1' case.
322522e010bSKonstantin Komarov */
323522e010bSKonstantin Komarov }
324522e010bSKonstantin Komarov #endif /* FAST_UNALIGNED_ACCESS */
325522e010bSKonstantin Komarov
326522e010bSKonstantin Komarov /* Fall back to a bytewise copy. */
327522e010bSKonstantin Komarov
328522e010bSKonstantin Komarov if (min_length >= 2) {
329522e010bSKonstantin Komarov *dst++ = *src++;
330522e010bSKonstantin Komarov length--;
331522e010bSKonstantin Komarov }
332522e010bSKonstantin Komarov if (min_length >= 3) {
333522e010bSKonstantin Komarov *dst++ = *src++;
334522e010bSKonstantin Komarov length--;
335522e010bSKonstantin Komarov }
336522e010bSKonstantin Komarov do {
337522e010bSKonstantin Komarov *dst++ = *src++;
338522e010bSKonstantin Komarov } while (--length);
339522e010bSKonstantin Komarov
340522e010bSKonstantin Komarov return dst;
341522e010bSKonstantin Komarov }
342b6ba8103SKari Argillander
343b6ba8103SKari Argillander #endif /* _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H */
344