1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file lz_decoder.h
4 /// \brief LZ out window
5 ///
6 // Authors: Igor Pavlov
7 // Lasse Collin
8 //
9 // This file has been put into the public domain.
10 // You can do whatever you want with this file.
11 //
12 ///////////////////////////////////////////////////////////////////////////////
13
14 #ifndef LZMA_LZ_DECODER_H
15 #define LZMA_LZ_DECODER_H
16
17 #include "common.h"
18
19
20 typedef struct {
21 /// Pointer to the dictionary buffer. It can be an allocated buffer
22 /// internal to liblzma, or it can a be a buffer given by the
23 /// application when in single-call mode (not implemented yet).
24 uint8_t *buf;
25
26 /// Write position in dictionary. The next byte will be written to
27 /// buf[pos].
28 size_t pos;
29
30 /// Indicates how full the dictionary is. This is used by
31 /// dict_is_distance_valid() to detect corrupt files that would
32 /// read beyond the beginning of the dictionary.
33 size_t full;
34
35 /// Write limit
36 size_t limit;
37
38 /// Size of the dictionary
39 size_t size;
40
41 /// True when dictionary should be reset before decoding more data.
42 bool need_reset;
43
44 } lzma_dict;
45
46
47 typedef struct {
48 size_t dict_size;
49 const uint8_t *preset_dict;
50 size_t preset_dict_size;
51 } lzma_lz_options;
52
53
54 typedef struct {
55 /// Data specific to the LZ-based decoder
56 void *coder;
57
58 /// Function to decode from in[] to *dict
59 lzma_ret (*code)(void *coder,
60 lzma_dict *restrict dict, const uint8_t *restrict in,
61 size_t *restrict in_pos, size_t in_size);
62
63 void (*reset)(void *coder, const void *options);
64
65 /// Set the uncompressed size
66 void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size);
67
68 /// Free allocated resources
69 void (*end)(void *coder, const lzma_allocator *allocator);
70
71 } lzma_lz_decoder;
72
73
74 #define LZMA_LZ_DECODER_INIT \
75 (lzma_lz_decoder){ \
76 .coder = NULL, \
77 .code = NULL, \
78 .reset = NULL, \
79 .set_uncompressed = NULL, \
80 .end = NULL, \
81 }
82
83
84 extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
85 const lzma_allocator *allocator,
86 const lzma_filter_info *filters,
87 lzma_ret (*lz_init)(lzma_lz_decoder *lz,
88 const lzma_allocator *allocator, const void *options,
89 lzma_lz_options *lz_options));
90
91 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
92
93 extern void lzma_lz_decoder_uncompressed(
94 void *coder, lzma_vli uncompressed_size);
95
96
97 //////////////////////
98 // Inline functions //
99 //////////////////////
100
101 /// Get a byte from the history buffer.
102 static inline uint8_t
dict_get(const lzma_dict * const dict,const uint32_t distance)103 dict_get(const lzma_dict *const dict, const uint32_t distance)
104 {
105 return dict->buf[dict->pos - distance - 1
106 + (distance < dict->pos ? 0 : dict->size)];
107 }
108
109
110 /// Test if dictionary is empty.
111 static inline bool
dict_is_empty(const lzma_dict * const dict)112 dict_is_empty(const lzma_dict *const dict)
113 {
114 return dict->full == 0;
115 }
116
117
118 /// Validate the match distance
119 static inline bool
dict_is_distance_valid(const lzma_dict * const dict,const size_t distance)120 dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
121 {
122 return dict->full > distance;
123 }
124
125
126 /// Repeat *len bytes at distance.
127 static inline bool
dict_repeat(lzma_dict * dict,uint32_t distance,uint32_t * len)128 dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
129 {
130 // Don't write past the end of the dictionary.
131 const size_t dict_avail = dict->limit - dict->pos;
132 uint32_t left = my_min(dict_avail, *len);
133 *len -= left;
134
135 // Repeat a block of data from the history. Because memcpy() is faster
136 // than copying byte by byte in a loop, the copying process gets split
137 // into three cases.
138 if (distance < left) {
139 // Source and target areas overlap, thus we can't use
140 // memcpy() nor even memmove() safely.
141 do {
142 dict->buf[dict->pos] = dict_get(dict, distance);
143 ++dict->pos;
144 } while (--left > 0);
145
146 } else if (distance < dict->pos) {
147 // The easiest and fastest case
148 memcpy(dict->buf + dict->pos,
149 dict->buf + dict->pos - distance - 1,
150 left);
151 dict->pos += left;
152
153 } else {
154 // The bigger the dictionary, the more rare this
155 // case occurs. We need to "wrap" the dict, thus
156 // we might need two memcpy() to copy all the data.
157 assert(dict->full == dict->size);
158 const uint32_t copy_pos
159 = dict->pos - distance - 1 + dict->size;
160 uint32_t copy_size = dict->size - copy_pos;
161
162 if (copy_size < left) {
163 memmove(dict->buf + dict->pos, dict->buf + copy_pos,
164 copy_size);
165 dict->pos += copy_size;
166 copy_size = left - copy_size;
167 memcpy(dict->buf + dict->pos, dict->buf, copy_size);
168 dict->pos += copy_size;
169 } else {
170 memmove(dict->buf + dict->pos, dict->buf + copy_pos,
171 left);
172 dict->pos += left;
173 }
174 }
175
176 // Update how full the dictionary is.
177 if (dict->full < dict->pos)
178 dict->full = dict->pos;
179
180 return unlikely(*len != 0);
181 }
182
183
184 /// Puts one byte into the dictionary. Returns true if the dictionary was
185 /// already full and the byte couldn't be added.
186 static inline bool
dict_put(lzma_dict * dict,uint8_t byte)187 dict_put(lzma_dict *dict, uint8_t byte)
188 {
189 if (unlikely(dict->pos == dict->limit))
190 return true;
191
192 dict->buf[dict->pos++] = byte;
193
194 if (dict->pos > dict->full)
195 dict->full = dict->pos;
196
197 return false;
198 }
199
200
201 /// Copies arbitrary amount of data into the dictionary.
202 static inline void
dict_write(lzma_dict * restrict dict,const uint8_t * restrict in,size_t * restrict in_pos,size_t in_size,size_t * restrict left)203 dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
204 size_t *restrict in_pos, size_t in_size,
205 size_t *restrict left)
206 {
207 // NOTE: If we are being given more data than the size of the
208 // dictionary, it could be possible to optimize the LZ decoder
209 // so that not everything needs to go through the dictionary.
210 // This shouldn't be very common thing in practice though, and
211 // the slowdown of one extra memcpy() isn't bad compared to how
212 // much time it would have taken if the data were compressed.
213
214 if (in_size - *in_pos > *left)
215 in_size = *in_pos + *left;
216
217 *left -= lzma_bufcpy(in, in_pos, in_size,
218 dict->buf, &dict->pos, dict->limit);
219
220 if (dict->pos > dict->full)
221 dict->full = dict->pos;
222
223 return;
224 }
225
226
227 static inline void
dict_reset(lzma_dict * dict)228 dict_reset(lzma_dict *dict)
229 {
230 dict->need_reset = true;
231 return;
232 }
233
234 #endif
235