1 /* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: expandtab:ts=8:sw=4:softtabstop=4:
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       lz_decoder.h
6 /// \brief      LZ out window
7 ///
8 //  Authors:    Igor Pavlov
9 //              Lasse Collin
10 //
11 //  This file has been put into the public domain.
12 //  You can do whatever you want with this file.
13 //
14 ///////////////////////////////////////////////////////////////////////////////
15 
16 #ifndef LZMA_LZ_DECODER_H
17 #define LZMA_LZ_DECODER_H
18 
19 #include "common.h"
20 
21 
22 typedef struct {
23 	/// Pointer to the dictionary buffer. It can be an allocated buffer
24 	/// internal to liblzma, or it can a be a buffer given by the
25 	/// application when in single-call mode (not implemented yet).
26 	uint8_t *buf;
27 
28 	/// Write position in dictionary. The next byte will be written to
29 	/// buf[pos].
30 	size_t pos;
31 
32 	/// Indicates how full the dictionary is. This is used by
33 	/// dict_is_distance_valid() to detect corrupt files that would
34 	/// read beyond the beginning of the dictionary.
35 	size_t full;
36 
37 	/// Write limit
38 	size_t limit;
39 
40 	/// Size of the dictionary
41 	size_t size;
42 
43 	/// True when dictionary should be reset before decoding more data.
44 	bool need_reset;
45 
46 } lzma_dict;
47 
48 
49 typedef struct {
50 	size_t dict_size;
51 	const uint8_t *preset_dict;
52 	size_t preset_dict_size;
53 } lzma_lz_options;
54 
55 
56 typedef struct {
57 	/// Data specific to the LZ-based decoder
58 	lzma_coder *coder;
59 
60 	/// Function to decode from in[] to *dict
61 	lzma_ret (*code)(lzma_coder *restrict coder,
62 			lzma_dict *restrict dict, const uint8_t *restrict in,
63 			size_t *restrict in_pos, size_t in_size);
64 
65 	void (*reset)(lzma_coder *coder, const void *options);
66 
67 	/// Set the uncompressed size
68 	void (*set_uncompressed)(lzma_coder *coder,
69 			lzma_vli uncompressed_size);
70 
71 	/// Free allocated resources
72 	void (*end)(lzma_coder *coder, lzma_allocator *allocator);
73 
74 } lzma_lz_decoder;
75 
76 
77 #define LZMA_LZ_DECODER_INIT \
78 	(lzma_lz_decoder){ \
79 		.coder = NULL, \
80 		.code = NULL, \
81 		.reset = NULL, \
82 		.set_uncompressed = NULL, \
83 		.end = NULL, \
84 	}
85 
86 
87 extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
88 		lzma_allocator *allocator, const lzma_filter_info *filters,
89 		lzma_ret (*lz_init)(lzma_lz_decoder *lz,
90 			lzma_allocator *allocator, const void *options,
91 			lzma_lz_options *lz_options));
92 
93 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
94 
95 extern void lzma_lz_decoder_uncompressed(
96 		lzma_coder *coder, lzma_vli uncompressed_size);
97 
98 
99 //////////////////////
100 // Inline functions //
101 //////////////////////
102 
103 /// Get a byte from the history buffer.
104 static inline uint8_t
dict_get(const lzma_dict * const dict,const uint32_t distance)105 dict_get(const lzma_dict *const dict, const uint32_t distance)
106 {
107 	return dict->buf[dict->pos - distance - 1
108 			+ (distance < dict->pos ? 0 : dict->size)];
109 }
110 
111 
112 /// Test if dictionary is empty.
113 static inline bool
dict_is_empty(const lzma_dict * const dict)114 dict_is_empty(const lzma_dict *const dict)
115 {
116 	return dict->full == 0;
117 }
118 
119 
120 /// Validate the match distance
121 static inline bool
dict_is_distance_valid(const lzma_dict * const dict,const size_t distance)122 dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
123 {
124 	return dict->full > distance;
125 }
126 
127 
128 /// Repeat *len bytes at distance.
129 static inline bool
dict_repeat(lzma_dict * dict,uint32_t distance,uint32_t * len)130 dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
131 {
132 	// Don't write past the end of the dictionary.
133 	const size_t dict_avail = dict->limit - dict->pos;
134 	uint32_t left = MIN(dict_avail, *len);
135 	*len -= left;
136 
137 	// Repeat a block of data from the history. Because memcpy() is faster
138 	// than copying byte by byte in a loop, the copying process gets split
139 	// into three cases.
140 	if (distance < left) {
141 		// Source and target areas overlap, thus we can't use
142 		// memcpy() nor even memmove() safely.
143 		do {
144 			dict->buf[dict->pos] = dict_get(dict, distance);
145 			++dict->pos;
146 		} while (--left > 0);
147 
148 	} else if (distance < dict->pos) {
149 		// The easiest and fastest case
150 		memcpy(dict->buf + dict->pos,
151 				dict->buf + dict->pos - distance - 1,
152 				left);
153 		dict->pos += left;
154 
155 	} else {
156 		// The bigger the dictionary, the more rare this
157 		// case occurs. We need to "wrap" the dict, thus
158 		// we might need two memcpy() to copy all the data.
159 		assert(dict->full == dict->size);
160 		const uint32_t copy_pos
161 				= dict->pos - distance - 1 + dict->size;
162 		uint32_t copy_size = dict->size - copy_pos;
163 
164 		if (copy_size < left) {
165 			memmove(dict->buf + dict->pos, dict->buf + copy_pos,
166 					copy_size);
167 			dict->pos += copy_size;
168 			copy_size = left - copy_size;
169 			memcpy(dict->buf + dict->pos, dict->buf, copy_size);
170 			dict->pos += copy_size;
171 		} else {
172 			memmove(dict->buf + dict->pos, dict->buf + copy_pos,
173 					left);
174 			dict->pos += left;
175 		}
176 	}
177 
178 	// Update how full the dictionary is.
179 	if (dict->full < dict->pos)
180 		dict->full = dict->pos;
181 
182 	return unlikely(*len != 0);
183 }
184 
185 
186 /// Puts one byte into the dictionary. Returns true if the dictionary was
187 /// already full and the byte couldn't be added.
188 static inline bool
dict_put(lzma_dict * dict,uint8_t byte)189 dict_put(lzma_dict *dict, uint8_t byte)
190 {
191 	if (unlikely(dict->pos == dict->limit))
192 		return true;
193 
194 	dict->buf[dict->pos++] = byte;
195 
196 	if (dict->pos > dict->full)
197 		dict->full = dict->pos;
198 
199 	return false;
200 }
201 
202 
203 /// Copies arbitrary amount of data into the dictionary.
204 static inline void
dict_write(lzma_dict * restrict dict,const uint8_t * restrict in,size_t * restrict in_pos,size_t in_size,size_t * restrict left)205 dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
206 		size_t *restrict in_pos, size_t in_size,
207 		size_t *restrict left)
208 {
209 	// NOTE: If we are being given more data than the size of the
210 	// dictionary, it could be possible to optimize the LZ decoder
211 	// so that not everything needs to go through the dictionary.
212 	// This shouldn't be very common thing in practice though, and
213 	// the slowdown of one extra memcpy() isn't bad compared to how
214 	// much time it would have taken if the data were compressed.
215 
216 	if (in_size - *in_pos > *left)
217 		in_size = *in_pos + *left;
218 
219 	*left -= lzma_bufcpy(in, in_pos, in_size,
220 			dict->buf, &dict->pos, dict->limit);
221 
222 	if (dict->pos > dict->full)
223 		dict->full = dict->pos;
224 
225 	return;
226 }
227 
228 
229 static inline void
dict_reset(lzma_dict * dict)230 dict_reset(lzma_dict *dict)
231 {
232 	dict->need_reset = true;
233 	return;
234 }
235 
236 #endif
237