1 use std::ptr;
2
3 use byteorder::{ByteOrder, LittleEndian as LE};
4
5 use error::{Error, Result};
6 use tag;
7 use varint::read_varu64;
8 use MAX_INPUT_SIZE;
9
10 /// A lookup table for quickly computing the various attributes derived from a
11 /// tag byte.
12 const TAG_LOOKUP_TABLE: TagLookupTable = TagLookupTable(tag::TAG_LOOKUP_TABLE);
13
14 /// `WORD_MASK` is a map from the size of an integer in bytes to its
15 /// corresponding on a 32 bit integer. This is used when we need to read an
16 /// integer and we know there are at least 4 bytes to read from a buffer. In
17 /// this case, we can read a 32 bit little endian integer and mask out only the
18 /// bits we need. This in particular saves a branch.
19 const WORD_MASK: [usize; 5] = [0, 0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF];
20
21 /// Returns the decompressed size (in bytes) of the compressed bytes given.
22 ///
23 /// `input` must be a sequence of bytes returned by a conforming Snappy
24 /// compressor.
25 ///
26 /// # Errors
27 ///
28 /// This function returns an error in the following circumstances:
29 ///
30 /// * An invalid Snappy header was seen.
31 /// * The total space required for decompression exceeds `2^32 - 1`.
decompress_len(input: &[u8]) -> Result<usize>32 pub fn decompress_len(input: &[u8]) -> Result<usize> {
33 if input.is_empty() {
34 return Ok(0);
35 }
36 Ok(try!(Header::read(input)).decompress_len)
37 }
38
39 /// Decoder is a raw decoder for decompressing bytes in the Snappy format.
40 ///
41 /// This decoder does not use the Snappy frame format and simply decompresses
42 /// the given bytes as if it were returned from `Encoder`.
43 ///
44 /// Unless you explicitly need the low-level control, you should use
45 /// `Reader` instead, which decompresses the Snappy frame format.
46 #[derive(Clone, Debug, Default)]
47 pub struct Decoder {
48 // Place holder for potential future fields.
49 _dummy: (),
50 }
51
52 impl Decoder {
53 /// Return a new decoder that can be used for decompressing bytes.
new() -> Decoder54 pub fn new() -> Decoder {
55 Decoder { _dummy: () }
56 }
57
58 /// Decompresses all bytes in `input` into `output`.
59 ///
60 /// `input` must be a sequence of bytes returned by a conforming Snappy
61 /// compressor.
62 ///
63 /// The size of `output` must be large enough to hold all decompressed
64 /// bytes from the `input`. The size required can be queried with the
65 /// `decompress_len` function.
66 ///
67 /// On success, this returns the number of bytes written to `output`.
68 ///
69 /// # Errors
70 ///
71 /// This method returns an error in the following circumstances:
72 ///
73 /// * Invalid compressed Snappy data was seen.
74 /// * The total space required for decompression exceeds `2^32 - 1`.
75 /// * `output` has length less than `decompress_len(input)`.
decompress( &mut self, input: &[u8], output: &mut [u8], ) -> Result<usize>76 pub fn decompress(
77 &mut self,
78 input: &[u8],
79 output: &mut [u8],
80 ) -> Result<usize> {
81 if input.is_empty() {
82 return Err(Error::Empty);
83 }
84 let hdr = try!(Header::read(input));
85 if hdr.decompress_len > output.len() {
86 return Err(Error::BufferTooSmall {
87 given: output.len() as u64,
88 min: hdr.decompress_len as u64,
89 });
90 }
91 let dst = &mut output[..hdr.decompress_len];
92 let mut dec = Decompress {
93 src: &input[hdr.len..],
94 s: 0,
95 dst: dst,
96 d: 0,
97 };
98 try!(dec.decompress());
99 Ok(dec.dst.len())
100 }
101
102 /// Decompresses all bytes in `input` into a freshly allocated `Vec`.
103 ///
104 /// This is just like the `decompress` method, except it allocates a `Vec`
105 /// with the right size for you. (This is intended to be a convenience
106 /// method.)
107 ///
108 /// This method returns an error under the same circumstances that
109 /// `decompress` does.
decompress_vec(&mut self, input: &[u8]) -> Result<Vec<u8>>110 pub fn decompress_vec(&mut self, input: &[u8]) -> Result<Vec<u8>> {
111 let mut buf = vec![0; try!(decompress_len(input))];
112 let n = try!(self.decompress(input, &mut buf));
113 buf.truncate(n);
114 Ok(buf)
115 }
116 }
117
118 /// Decompress is the state of the Snappy compressor.
119 struct Decompress<'s, 'd> {
120 /// The original compressed bytes not including the header.
121 src: &'s [u8],
122 /// The current position in the compressed bytes.
123 s: usize,
124 /// The output buffer to write the decompressed bytes.
125 dst: &'d mut [u8],
126 /// The current position in the decompressed buffer.
127 d: usize,
128 }
129
130 impl<'s, 'd> Decompress<'s, 'd> {
131 /// Decompresses snappy compressed bytes in `src` to `dst`.
132 ///
133 /// This assumes that the header has already been read and that `dst` is
134 /// big enough to store all decompressed bytes.
decompress(&mut self) -> Result<()>135 fn decompress(&mut self) -> Result<()> {
136 while self.s < self.src.len() {
137 let byte = self.src[self.s];
138 self.s += 1;
139 if byte & 0b000000_11 == 0 {
140 let len = (byte >> 2) as usize + 1;
141 try!(self.read_literal(len));
142 } else {
143 try!(self.read_copy(byte));
144 }
145 }
146 if self.d != self.dst.len() {
147 return Err(Error::HeaderMismatch {
148 expected_len: self.dst.len() as u64,
149 got_len: self.d as u64,
150 });
151 }
152 Ok(())
153 }
154
155 /// Decompresses a literal from `src` starting at `s` to `dst` starting at
156 /// `d` and returns the updated values of `s` and `d`. `s` should point to
157 /// the byte immediately proceding the literal tag byte.
158 ///
159 /// `len` is the length of the literal if it's <=60. Otherwise, it's the
160 /// length tag, indicating the number of bytes needed to read a little
161 /// endian integer at `src[s..]`. i.e., `61 => 1 byte`, `62 => 2 bytes`,
162 /// `63 => 3 bytes` and `64 => 4 bytes`.
163 ///
164 /// `len` must be <=64.
165 #[inline(always)]
read_literal( &mut self, len: usize, ) -> Result<()>166 fn read_literal(
167 &mut self,
168 len: usize,
169 ) -> Result<()> {
170 debug_assert!(len <= 64);
171 let mut len = len as u64;
172 // As an optimization for the common case, if the literal length is
173 // <=16 and we have enough room in both `src` and `dst`, copy the
174 // literal using unaligned loads and stores.
175 //
176 // We pick 16 bytes with the hope that it optimizes down to a 128 bit
177 // load/store.
178 if len <= 16
179 && self.s + 16 <= self.src.len()
180 && self.d + 16 <= self.dst.len() {
181 unsafe {
182 // SAFETY: We know both src and dst have at least 16 bytes of
183 // wiggle room after s/d, even if `len` is <16, so the copy is
184 // safe.
185 let srcp = self.src.as_ptr().offset(self.s as isize);
186 let dstp = self.dst.as_mut_ptr().offset(self.d as isize);
187 // Hopefully uses SIMD registers for 128 bit load/store.
188 ptr::copy_nonoverlapping(srcp, dstp, 16);
189 }
190 self.d += len as usize;
191 self.s += len as usize;
192 return Ok(());
193 }
194 // When the length is bigger than 60, it indicates that we need to read
195 // an additional 1-4 bytes to get the real length of the literal.
196 if len >= 61 {
197 // If there aren't at least 4 bytes left to read then we know this
198 // is corrupt because the literal must have length >=61.
199 if self.s as u64 + 4 > self.src.len() as u64 {
200 return Err(Error::Literal {
201 len: 4,
202 src_len: (self.src.len() - self.s) as u64,
203 dst_len: (self.dst.len() - self.d) as u64,
204 });
205 }
206 // Since we know there are 4 bytes left to read, read a 32 bit LE
207 // integer and mask away the bits we don't need.
208 let byte_count = len as usize - 60;
209 len = LE::read_u32(&self.src[self.s..]) as u64;
210 len = (len & (WORD_MASK[byte_count] as u64)) + 1;
211 self.s += byte_count;
212 }
213 // If there's not enough buffer left to load or store this literal,
214 // then the input is corrupt.
215 // if self.s + len > self.src.len() || self.d + len > self.dst.len() {
216 if ((self.src.len() - self.s) as u64) < len
217 || ((self.dst.len() - self.d) as u64) < len {
218 return Err(Error::Literal {
219 len: len,
220 src_len: (self.src.len() - self.s) as u64,
221 dst_len: (self.dst.len() - self.d) as u64,
222 });
223 }
224 unsafe {
225 // SAFETY: We've already checked the bounds, so we know this copy
226 // is correct.
227 let srcp = self.src.as_ptr().offset(self.s as isize);
228 let dstp = self.dst.as_mut_ptr().offset(self.d as isize);
229 ptr::copy_nonoverlapping(srcp, dstp, len as usize);
230 }
231 self.s += len as usize;
232 self.d += len as usize;
233 Ok(())
234 }
235
236 /// Reads a copy from `src` and writes the decompressed bytes to `dst`. `s`
237 /// should point to the byte immediately proceding the copy tag byte.
238 #[inline(always)]
read_copy( &mut self, tag_byte: u8, ) -> Result<()>239 fn read_copy(
240 &mut self,
241 tag_byte: u8,
242 ) -> Result<()> {
243 // Find the copy offset and len, then advance the input past the copy.
244 // The rest of this function deals with reading/writing to output only.
245 let entry = TAG_LOOKUP_TABLE.entry(tag_byte);
246 let offset = try!(entry.offset(self.src, self.s));
247 let len = entry.len();
248 self.s += entry.num_tag_bytes();
249
250 // What we really care about here is whether `d == 0` or `d < offset`.
251 // To save an extra branch, use `d < offset - 1` instead. If `d` is
252 // `0`, then `offset.wrapping_sub(1)` will be usize::MAX which is also
253 // the max value of `d`.
254 if self.d <= offset.wrapping_sub(1) {
255 return Err(Error::Offset {
256 offset: offset as u64,
257 dst_pos: self.d as u64,
258 });
259 }
260 // When all is said and done, dst is advanced to end.
261 let end = self.d + len;
262 // When the copy is small and the offset is at least 8 bytes away from
263 // `d`, then we can decompress the copy with two 64 bit unaligned
264 // loads/stores.
265 if offset >= 8 && len <= 16 && self.d + 16 <= self.dst.len() {
266 unsafe {
267 // SAFETY: We know dstp points to at least 16 bytes of memory
268 // from the condition above, and we also know that dstp is
269 // preceded by at least `offset` bytes from the `d <= offset`
270 // check above.
271 //
272 // We also know that dstp and dstp-8 do not overlap from the
273 // check above, justifying the use of copy_nonoverlapping.
274 let dstp = self.dst.as_mut_ptr().offset(self.d as isize);
275 let srcp = dstp.offset(-(offset as isize));
276 // We can't do a single 16 byte load/store because src/dst may
277 // overlap with each other. Namely, the second copy here may
278 // copy bytes written in the first copy!
279 ptr::copy_nonoverlapping(srcp, dstp, 8);
280 ptr::copy_nonoverlapping(srcp.offset(8), dstp.offset(8), 8);
281 }
282 // If we have some wiggle room, try to decompress the copy 16 bytes
283 // at a time with 128 bit unaligned loads/stores. Remember, we can't
284 // just do a memcpy because decompressing copies may require copying
285 // overlapping memory.
286 //
287 // We need the extra wiggle room to make effective use of 128 bit
288 // loads/stores. Even if the store ends up copying more data than we
289 // need, we're careful to advance `d` by the correct amount at the end.
290 } else if end + 24 <= self.dst.len() {
291 unsafe {
292 // SAFETY: We know that dstp is preceded by at least `offset`
293 // bytes from the `d <= offset` check above.
294 //
295 // We don't know whether dstp overlaps with srcp, so we start
296 // by copying from srcp to dstp until they no longer overlap.
297 // The worst case is when dstp-src = 3 and copy length = 1. The
298 // first loop will issue these copy operations before stopping:
299 //
300 // [-1, 14] -> [0, 15]
301 // [-1, 14] -> [3, 18]
302 // [-1, 14] -> [9, 24]
303 //
304 // But the copy had length 1, so it was only supposed to write
305 // to [0, 0]. But the last copy wrote to [9, 24], which is 24
306 // extra bytes in dst *beyond* the end of the copy, which is
307 // guaranteed by the conditional above.
308 let mut dstp = self.dst.as_mut_ptr().offset(self.d as isize);
309 let mut srcp = dstp.offset(-(offset as isize));
310 loop {
311 let diff = (dstp as isize) - (srcp as isize);
312 if diff >= 16 {
313 break;
314 }
315 // srcp and dstp can overlap, so use ptr::copy.
316 debug_assert!(self.d + 16 <= self.dst.len());
317 ptr::copy(srcp, dstp, 16);
318 self.d += diff as usize;
319 dstp = dstp.offset(diff);
320 }
321 while self.d < end {
322 ptr::copy_nonoverlapping(srcp, dstp, 16);
323 srcp = srcp.offset(16);
324 dstp = dstp.offset(16);
325 self.d += 16;
326 }
327 // At this point, `d` is likely wrong. We correct it before
328 // returning. It's correct value is `end`.
329 }
330 } else {
331 if end > self.dst.len() {
332 return Err(Error::CopyWrite {
333 len: len as u64,
334 dst_len: (self.dst.len() - self.d) as u64,
335 });
336 }
337 // Finally, the slow byte-by-byte case, which should only be used
338 // for the last few bytes of decompression.
339 while self.d != end {
340 self.dst[self.d] = self.dst[self.d - offset];
341 self.d += 1;
342 }
343 }
344 self.d = end;
345 Ok(())
346 }
347 }
348
349 /// Header represents the single varint that starts every Snappy compressed
350 /// block.
351 #[derive(Debug)]
352 struct Header {
353 /// The length of the header in bytes (i.e., the varint).
354 len: usize,
355 /// The length of the original decompressed input in bytes.
356 decompress_len: usize,
357 }
358
359 impl Header {
360 /// Reads the varint header from the given input.
361 ///
362 /// If there was a problem reading the header then an error is returned.
363 /// If a header is returned then it is guaranteed to be valid.
364 #[inline(always)]
read(input: &[u8]) -> Result<Header>365 fn read(input: &[u8]) -> Result<Header> {
366 let (decompress_len, header_len) = read_varu64(input);
367 if header_len == 0 {
368 return Err(Error::Header);
369 }
370 if decompress_len > MAX_INPUT_SIZE {
371 return Err(Error::TooBig {
372 given: decompress_len as u64,
373 max: MAX_INPUT_SIZE,
374 });
375 }
376 Ok(Header { len: header_len, decompress_len: decompress_len as usize })
377 }
378 }
379
380 /// A lookup table for quickly computing the various attributes derived from
381 /// a tag byte. The attributes are most useful for the three "copy" tags
382 /// and include the length of the copy, part of the offset (for copy 1-byte
383 /// only) and the total number of bytes proceding the tag byte that encode
384 /// the other part of the offset (1 for copy 1, 2 for copy 2 and 4 for copy 4).
385 ///
386 /// More specifically, the keys of the table are u8s and the values are u16s.
387 /// The bits of the values are laid out as follows:
388 ///
389 /// xxaa abbb xxcc cccc
390 ///
391 /// Where `a` is the number of bytes, `b` are the three bits of the offset
392 /// for copy 1 (the other 8 bits are in the byte proceding the tag byte; for
393 /// copy 2 and copy 4, `b = 0`), and `c` is the length of the copy (max of 64).
394 ///
395 /// We could pack this in fewer bits, but the position of the three `b` bits
396 /// lines up with the most significant three bits in the total offset for copy
397 /// 1, which avoids an extra shift instruction.
398 ///
399 /// In sum, this table is useful because it reduces branches and various
400 /// arithmetic operations.
401 struct TagLookupTable([u16; 256]);
402
403 impl TagLookupTable {
404 /// Look up the tag entry given the tag `byte`.
405 #[inline(always)]
entry(&self, byte: u8) -> TagEntry406 fn entry(&self, byte: u8) -> TagEntry {
407 TagEntry(self.0[byte as usize] as usize)
408 }
409 }
410
411 /// Represents a single entry in the tag lookup table.
412 ///
413 /// See the documentation in `TagLookupTable` for the bit layout.
414 ///
415 /// The type is a `usize` for convenience.
416 struct TagEntry(usize);
417
418 impl TagEntry {
419 /// Return the total number of bytes proceding this tag byte required to
420 /// encode the offset.
num_tag_bytes(&self) -> usize421 fn num_tag_bytes(&self) -> usize {
422 self.0 >> 11
423 }
424
425 /// Return the total copy length, capped at 64.
len(&self) -> usize426 fn len(&self) -> usize {
427 self.0 & 0xFF
428 }
429
430 /// Return the copy offset corresponding to this copy operation. `s` should
431 /// point to the position just after the tag byte that this entry was read
432 /// from.
433 ///
434 /// This requires reading from the compressed input since the offset is
435 /// encoded in bytes proceding the tag byte.
offset(&self, src: &[u8], s: usize) -> Result<usize>436 fn offset(&self, src: &[u8], s: usize) -> Result<usize> {
437 let num_tag_bytes = self.num_tag_bytes();
438 let trailer =
439 // It is critical for this case to come first, since it is the
440 // fast path. We really hope that this case gets branch
441 // predicted.
442 if s + 4 <= src.len() {
443 unsafe {
444 // SAFETY: The conditional above guarantees that
445 // src[s..s+4] is valid to read from.
446 let p = src.as_ptr().offset(s as isize);
447 // We use WORD_MASK here to mask out the bits we don't
448 // need. While we're guaranteed to read 4 valid bytes,
449 // not all of those bytes are necessarily part of the
450 // offset. This is the key optimization: we don't need to
451 // branch on num_tag_bytes.
452 loadu32_le(p) as usize & WORD_MASK[num_tag_bytes]
453 }
454 } else if num_tag_bytes == 1 {
455 if s >= src.len() {
456 return Err(Error::CopyRead {
457 len: 1,
458 src_len: (src.len() - s) as u64,
459 });
460 }
461 src[s] as usize
462 } else if num_tag_bytes == 2 {
463 if s + 1 >= src.len() {
464 return Err(Error::CopyRead {
465 len: 2,
466 src_len: (src.len() - s) as u64,
467 });
468 }
469 LE::read_u16(&src[s..]) as usize
470 } else {
471 return Err(Error::CopyRead {
472 len: num_tag_bytes as u64,
473 src_len: (src.len() - s) as u64,
474 });
475 };
476 Ok((self.0 & 0b0000_0111_0000_0000) | trailer)
477 }
478 }
479
480 /// Loads a little endian encoded u32 from data.
481 ///
482 /// This is unsafe because `data` must point to some memory of size at least 4.
483 #[inline(always)]
loadu32_le(data: *const u8) -> u32484 unsafe fn loadu32_le(data: *const u8) -> u32 {
485 let mut n: u32 = 0;
486 ptr::copy_nonoverlapping(
487 data,
488 &mut n as *mut u32 as *mut u8,
489 4);
490 n.to_le()
491 }
492