1 /* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: expandtab:ts=8:sw=4:softtabstop=4:
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file lz_decoder.h
6 /// \brief LZ out window
7 ///
8 // Authors: Igor Pavlov
9 // Lasse Collin
10 //
11 // This file has been put into the public domain.
12 // You can do whatever you want with this file.
13 //
14 ///////////////////////////////////////////////////////////////////////////////
15
16 #ifndef LZMA_LZ_DECODER_H
17 #define LZMA_LZ_DECODER_H
18
19 #include "common.h"
20
21
22 typedef struct {
23 /// Pointer to the dictionary buffer. It can be an allocated buffer
24 /// internal to liblzma, or it can a be a buffer given by the
25 /// application when in single-call mode (not implemented yet).
26 uint8_t *buf;
27
28 /// Write position in dictionary. The next byte will be written to
29 /// buf[pos].
30 size_t pos;
31
32 /// Indicates how full the dictionary is. This is used by
33 /// dict_is_distance_valid() to detect corrupt files that would
34 /// read beyond the beginning of the dictionary.
35 size_t full;
36
37 /// Write limit
38 size_t limit;
39
40 /// Size of the dictionary
41 size_t size;
42
43 /// True when dictionary should be reset before decoding more data.
44 bool need_reset;
45
46 } lzma_dict;
47
48
49 typedef struct {
50 size_t dict_size;
51 const uint8_t *preset_dict;
52 size_t preset_dict_size;
53 } lzma_lz_options;
54
55
56 typedef struct {
57 /// Data specific to the LZ-based decoder
58 lzma_coder *coder;
59
60 /// Function to decode from in[] to *dict
61 lzma_ret (*code)(lzma_coder *restrict coder,
62 lzma_dict *restrict dict, const uint8_t *restrict in,
63 size_t *restrict in_pos, size_t in_size);
64
65 void (*reset)(lzma_coder *coder, const void *options);
66
67 /// Set the uncompressed size
68 void (*set_uncompressed)(lzma_coder *coder,
69 lzma_vli uncompressed_size);
70
71 /// Free allocated resources
72 void (*end)(lzma_coder *coder, lzma_allocator *allocator);
73
74 } lzma_lz_decoder;
75
76
77 #define LZMA_LZ_DECODER_INIT \
78 (lzma_lz_decoder){ \
79 .coder = NULL, \
80 .code = NULL, \
81 .reset = NULL, \
82 .set_uncompressed = NULL, \
83 .end = NULL, \
84 }
85
86
87 extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
88 lzma_allocator *allocator, const lzma_filter_info *filters,
89 lzma_ret (*lz_init)(lzma_lz_decoder *lz,
90 lzma_allocator *allocator, const void *options,
91 lzma_lz_options *lz_options));
92
93 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
94
95 extern void lzma_lz_decoder_uncompressed(
96 lzma_coder *coder, lzma_vli uncompressed_size);
97
98
99 //////////////////////
100 // Inline functions //
101 //////////////////////
102
103 /// Get a byte from the history buffer.
104 static inline uint8_t
dict_get(const lzma_dict * const dict,const uint32_t distance)105 dict_get(const lzma_dict *const dict, const uint32_t distance)
106 {
107 return dict->buf[dict->pos - distance - 1
108 + (distance < dict->pos ? 0 : dict->size)];
109 }
110
111
112 /// Test if dictionary is empty.
113 static inline bool
dict_is_empty(const lzma_dict * const dict)114 dict_is_empty(const lzma_dict *const dict)
115 {
116 return dict->full == 0;
117 }
118
119
120 /// Validate the match distance
121 static inline bool
dict_is_distance_valid(const lzma_dict * const dict,const size_t distance)122 dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
123 {
124 return dict->full > distance;
125 }
126
127
128 /// Repeat *len bytes at distance.
129 static inline bool
dict_repeat(lzma_dict * dict,uint32_t distance,uint32_t * len)130 dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
131 {
132 // Don't write past the end of the dictionary.
133 const size_t dict_avail = dict->limit - dict->pos;
134 uint32_t left = MIN(dict_avail, *len);
135 *len -= left;
136
137 // Repeat a block of data from the history. Because memcpy() is faster
138 // than copying byte by byte in a loop, the copying process gets split
139 // into three cases.
140 if (distance < left) {
141 // Source and target areas overlap, thus we can't use
142 // memcpy() nor even memmove() safely.
143 do {
144 dict->buf[dict->pos] = dict_get(dict, distance);
145 ++dict->pos;
146 } while (--left > 0);
147
148 } else if (distance < dict->pos) {
149 // The easiest and fastest case
150 memcpy(dict->buf + dict->pos,
151 dict->buf + dict->pos - distance - 1,
152 left);
153 dict->pos += left;
154
155 } else {
156 // The bigger the dictionary, the more rare this
157 // case occurs. We need to "wrap" the dict, thus
158 // we might need two memcpy() to copy all the data.
159 assert(dict->full == dict->size);
160 const uint32_t copy_pos
161 = dict->pos - distance - 1 + dict->size;
162 uint32_t copy_size = dict->size - copy_pos;
163
164 if (copy_size < left) {
165 memmove(dict->buf + dict->pos, dict->buf + copy_pos,
166 copy_size);
167 dict->pos += copy_size;
168 copy_size = left - copy_size;
169 memcpy(dict->buf + dict->pos, dict->buf, copy_size);
170 dict->pos += copy_size;
171 } else {
172 memmove(dict->buf + dict->pos, dict->buf + copy_pos,
173 left);
174 dict->pos += left;
175 }
176 }
177
178 // Update how full the dictionary is.
179 if (dict->full < dict->pos)
180 dict->full = dict->pos;
181
182 return unlikely(*len != 0);
183 }
184
185
186 /// Puts one byte into the dictionary. Returns true if the dictionary was
187 /// already full and the byte couldn't be added.
188 static inline bool
dict_put(lzma_dict * dict,uint8_t byte)189 dict_put(lzma_dict *dict, uint8_t byte)
190 {
191 if (unlikely(dict->pos == dict->limit))
192 return true;
193
194 dict->buf[dict->pos++] = byte;
195
196 if (dict->pos > dict->full)
197 dict->full = dict->pos;
198
199 return false;
200 }
201
202
203 /// Copies arbitrary amount of data into the dictionary.
204 static inline void
dict_write(lzma_dict * restrict dict,const uint8_t * restrict in,size_t * restrict in_pos,size_t in_size,size_t * restrict left)205 dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
206 size_t *restrict in_pos, size_t in_size,
207 size_t *restrict left)
208 {
209 // NOTE: If we are being given more data than the size of the
210 // dictionary, it could be possible to optimize the LZ decoder
211 // so that not everything needs to go through the dictionary.
212 // This shouldn't be very common thing in practice though, and
213 // the slowdown of one extra memcpy() isn't bad compared to how
214 // much time it would have taken if the data were compressed.
215
216 if (in_size - *in_pos > *left)
217 in_size = *in_pos + *left;
218
219 *left -= lzma_bufcpy(in, in_pos, in_size,
220 dict->buf, &dict->pos, dict->limit);
221
222 if (dict->pos > dict->full)
223 dict->full = dict->pos;
224
225 return;
226 }
227
228
229 static inline void
dict_reset(lzma_dict * dict)230 dict_reset(lzma_dict *dict)
231 {
232 dict->need_reset = true;
233 return;
234 }
235
236 #endif
237