1 use std::ptr;
2 
3 use byteorder::{ByteOrder, LittleEndian as LE};
4 
5 use error::{Error, Result};
6 use tag;
7 use varint::read_varu64;
8 use MAX_INPUT_SIZE;
9 
10 /// A lookup table for quickly computing the various attributes derived from a
11 /// tag byte.
12 const TAG_LOOKUP_TABLE: TagLookupTable = TagLookupTable(tag::TAG_LOOKUP_TABLE);
13 
14 /// `WORD_MASK` is a map from the size of an integer in bytes to its
15 /// corresponding on a 32 bit integer. This is used when we need to read an
16 /// integer and we know there are at least 4 bytes to read from a buffer. In
17 /// this case, we can read a 32 bit little endian integer and mask out only the
18 /// bits we need. This in particular saves a branch.
19 const WORD_MASK: [usize; 5] = [0, 0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF];
20 
21 /// Returns the decompressed size (in bytes) of the compressed bytes given.
22 ///
23 /// `input` must be a sequence of bytes returned by a conforming Snappy
24 /// compressor.
25 ///
26 /// # Errors
27 ///
28 /// This function returns an error in the following circumstances:
29 ///
30 /// * An invalid Snappy header was seen.
31 /// * The total space required for decompression exceeds `2^32 - 1`.
decompress_len(input: &[u8]) -> Result<usize>32 pub fn decompress_len(input: &[u8]) -> Result<usize> {
33     if input.is_empty() {
34         return Ok(0);
35     }
36     Ok(try!(Header::read(input)).decompress_len)
37 }
38 
39 /// Decoder is a raw decoder for decompressing bytes in the Snappy format.
40 ///
41 /// This decoder does not use the Snappy frame format and simply decompresses
42 /// the given bytes as if it were returned from `Encoder`.
43 ///
44 /// Unless you explicitly need the low-level control, you should use
45 /// `Reader` instead, which decompresses the Snappy frame format.
46 #[derive(Clone, Debug, Default)]
47 pub struct Decoder {
48     // Place holder for potential future fields.
49     _dummy: (),
50 }
51 
52 impl Decoder {
53     /// Return a new decoder that can be used for decompressing bytes.
new() -> Decoder54     pub fn new() -> Decoder {
55         Decoder { _dummy: () }
56     }
57 
58     /// Decompresses all bytes in `input` into `output`.
59     ///
60     /// `input` must be a sequence of bytes returned by a conforming Snappy
61     /// compressor.
62     ///
63     /// The size of `output` must be large enough to hold all decompressed
64     /// bytes from the `input`. The size required can be queried with the
65     /// `decompress_len` function.
66     ///
67     /// On success, this returns the number of bytes written to `output`.
68     ///
69     /// # Errors
70     ///
71     /// This method returns an error in the following circumstances:
72     ///
73     /// * Invalid compressed Snappy data was seen.
74     /// * The total space required for decompression exceeds `2^32 - 1`.
75     /// * `output` has length less than `decompress_len(input)`.
decompress( &mut self, input: &[u8], output: &mut [u8], ) -> Result<usize>76     pub fn decompress(
77         &mut self,
78         input: &[u8],
79         output: &mut [u8],
80     ) -> Result<usize> {
81         if input.is_empty() {
82             return Err(Error::Empty);
83         }
84         let hdr = try!(Header::read(input));
85         if hdr.decompress_len > output.len() {
86             return Err(Error::BufferTooSmall {
87                 given: output.len() as u64,
88                 min: hdr.decompress_len as u64,
89             });
90         }
91         let dst = &mut output[..hdr.decompress_len];
92         let mut dec = Decompress {
93             src: &input[hdr.len..],
94             s: 0,
95             dst: dst,
96             d: 0,
97         };
98         try!(dec.decompress());
99         Ok(dec.dst.len())
100     }
101 
102     /// Decompresses all bytes in `input` into a freshly allocated `Vec`.
103     ///
104     /// This is just like the `decompress` method, except it allocates a `Vec`
105     /// with the right size for you. (This is intended to be a convenience
106     /// method.)
107     ///
108     /// This method returns an error under the same circumstances that
109     /// `decompress` does.
decompress_vec(&mut self, input: &[u8]) -> Result<Vec<u8>>110     pub fn decompress_vec(&mut self, input: &[u8]) -> Result<Vec<u8>> {
111         let mut buf = vec![0; try!(decompress_len(input))];
112         let n = try!(self.decompress(input, &mut buf));
113         buf.truncate(n);
114         Ok(buf)
115     }
116 }
117 
118 /// Decompress is the state of the Snappy compressor.
119 struct Decompress<'s, 'd> {
120     /// The original compressed bytes not including the header.
121     src: &'s [u8],
122     /// The current position in the compressed bytes.
123     s: usize,
124     /// The output buffer to write the decompressed bytes.
125     dst: &'d mut [u8],
126     /// The current position in the decompressed buffer.
127     d: usize,
128 }
129 
130 impl<'s, 'd> Decompress<'s, 'd> {
131     /// Decompresses snappy compressed bytes in `src` to `dst`.
132     ///
133     /// This assumes that the header has already been read and that `dst` is
134     /// big enough to store all decompressed bytes.
decompress(&mut self) -> Result<()>135     fn decompress(&mut self) -> Result<()> {
136         while self.s < self.src.len() {
137             let byte = self.src[self.s];
138             self.s += 1;
139             if byte & 0b000000_11 == 0 {
140                 let len = (byte >> 2) as usize + 1;
141                 try!(self.read_literal(len));
142             } else {
143                 try!(self.read_copy(byte));
144             }
145         }
146         if self.d != self.dst.len() {
147             return Err(Error::HeaderMismatch {
148                 expected_len: self.dst.len() as u64,
149                 got_len: self.d as u64,
150             });
151         }
152         Ok(())
153     }
154 
155     /// Decompresses a literal from `src` starting at `s` to `dst` starting at
156     /// `d` and returns the updated values of `s` and `d`. `s` should point to
157     /// the byte immediately proceding the literal tag byte.
158     ///
159     /// `len` is the length of the literal if it's <=60. Otherwise, it's the
160     /// length tag, indicating the number of bytes needed to read a little
161     /// endian integer at `src[s..]`. i.e., `61 => 1 byte`, `62 => 2 bytes`,
162     /// `63 => 3 bytes` and `64 => 4 bytes`.
163     ///
164     /// `len` must be <=64.
165     #[inline(always)]
read_literal( &mut self, len: usize, ) -> Result<()>166     fn read_literal(
167         &mut self,
168         len: usize,
169     ) -> Result<()> {
170         debug_assert!(len <= 64);
171         let mut len = len as u64;
172         // As an optimization for the common case, if the literal length is
173         // <=16 and we have enough room in both `src` and `dst`, copy the
174         // literal using unaligned loads and stores.
175         //
176         // We pick 16 bytes with the hope that it optimizes down to a 128 bit
177         // load/store.
178         if len <= 16
179             && self.s + 16 <= self.src.len()
180             && self.d + 16 <= self.dst.len() {
181             unsafe {
182                 // SAFETY: We know both src and dst have at least 16 bytes of
183                 // wiggle room after s/d, even if `len` is <16, so the copy is
184                 // safe.
185                 let srcp = self.src.as_ptr().offset(self.s as isize);
186                 let dstp = self.dst.as_mut_ptr().offset(self.d as isize);
187                 // Hopefully uses SIMD registers for 128 bit load/store.
188                 ptr::copy_nonoverlapping(srcp, dstp, 16);
189             }
190             self.d += len as usize;
191             self.s += len as usize;
192             return Ok(());
193         }
194         // When the length is bigger than 60, it indicates that we need to read
195         // an additional 1-4 bytes to get the real length of the literal.
196         if len >= 61 {
197             // If there aren't at least 4 bytes left to read then we know this
198             // is corrupt because the literal must have length >=61.
199             if self.s as u64 + 4 > self.src.len() as u64 {
200                 return Err(Error::Literal {
201                     len: 4,
202                     src_len: (self.src.len() - self.s) as u64,
203                     dst_len: (self.dst.len() - self.d) as u64,
204                 });
205             }
206             // Since we know there are 4 bytes left to read, read a 32 bit LE
207             // integer and mask away the bits we don't need.
208             let byte_count = len as usize - 60;
209             len = LE::read_u32(&self.src[self.s..]) as u64;
210             len = (len & (WORD_MASK[byte_count] as u64)) + 1;
211             self.s += byte_count;
212         }
213         // If there's not enough buffer left to load or store this literal,
214         // then the input is corrupt.
215         // if self.s + len > self.src.len() || self.d + len > self.dst.len() {
216         if ((self.src.len() - self.s) as u64) < len
217             || ((self.dst.len() - self.d) as u64) < len {
218             return Err(Error::Literal {
219                 len: len,
220                 src_len: (self.src.len() - self.s) as u64,
221                 dst_len: (self.dst.len() - self.d) as u64,
222             });
223         }
224         unsafe {
225             // SAFETY: We've already checked the bounds, so we know this copy
226             // is correct.
227             let srcp = self.src.as_ptr().offset(self.s as isize);
228             let dstp = self.dst.as_mut_ptr().offset(self.d as isize);
229             ptr::copy_nonoverlapping(srcp, dstp, len as usize);
230         }
231         self.s += len as usize;
232         self.d += len as usize;
233         Ok(())
234     }
235 
236     /// Reads a copy from `src` and writes the decompressed bytes to `dst`. `s`
237     /// should point to the byte immediately proceding the copy tag byte.
238     #[inline(always)]
read_copy( &mut self, tag_byte: u8, ) -> Result<()>239     fn read_copy(
240         &mut self,
241         tag_byte: u8,
242     ) -> Result<()> {
243         // Find the copy offset and len, then advance the input past the copy.
244         // The rest of this function deals with reading/writing to output only.
245         let entry = TAG_LOOKUP_TABLE.entry(tag_byte);
246         let offset = try!(entry.offset(self.src, self.s));
247         let len = entry.len();
248         self.s += entry.num_tag_bytes();
249 
250         // What we really care about here is whether `d == 0` or `d < offset`.
251         // To save an extra branch, use `d < offset - 1` instead. If `d` is
252         // `0`, then `offset.wrapping_sub(1)` will be usize::MAX which is also
253         // the max value of `d`.
254         if self.d <= offset.wrapping_sub(1) {
255             return Err(Error::Offset {
256                 offset: offset as u64,
257                 dst_pos: self.d as u64,
258             });
259         }
260         // When all is said and done, dst is advanced to end.
261         let end = self.d + len;
262         // When the copy is small and the offset is at least 8 bytes away from
263         // `d`, then we can decompress the copy with two 64 bit unaligned
264         // loads/stores.
265         if offset >= 8 && len <= 16 && self.d + 16 <= self.dst.len() {
266             unsafe {
267                 // SAFETY: We know dstp points to at least 16 bytes of memory
268                 // from the condition above, and we also know that dstp is
269                 // preceded by at least `offset` bytes from the `d <= offset`
270                 // check above.
271                 //
272                 // We also know that dstp and dstp-8 do not overlap from the
273                 // check above, justifying the use of copy_nonoverlapping.
274                 let dstp = self.dst.as_mut_ptr().offset(self.d as isize);
275                 let srcp = dstp.offset(-(offset as isize));
276                 // We can't do a single 16 byte load/store because src/dst may
277                 // overlap with each other. Namely, the second copy here may
278                 // copy bytes written in the first copy!
279                 ptr::copy_nonoverlapping(srcp, dstp, 8);
280                 ptr::copy_nonoverlapping(srcp.offset(8), dstp.offset(8), 8);
281             }
282         // If we have some wiggle room, try to decompress the copy 16 bytes
283         // at a time with 128 bit unaligned loads/stores. Remember, we can't
284         // just do a memcpy because decompressing copies may require copying
285         // overlapping memory.
286         //
287         // We need the extra wiggle room to make effective use of 128 bit
288         // loads/stores. Even if the store ends up copying more data than we
289         // need, we're careful to advance `d` by the correct amount at the end.
290         } else if end + 24 <= self.dst.len() {
291             unsafe {
292                 // SAFETY: We know that dstp is preceded by at least `offset`
293                 // bytes from the `d <= offset` check above.
294                 //
295                 // We don't know whether dstp overlaps with srcp, so we start
296                 // by copying from srcp to dstp until they no longer overlap.
297                 // The worst case is when dstp-src = 3 and copy length = 1. The
298                 // first loop will issue these copy operations before stopping:
299                 //
300                 //   [-1, 14] -> [0, 15]
301                 //   [-1, 14] -> [3, 18]
302                 //   [-1, 14] -> [9, 24]
303                 //
304                 // But the copy had length 1, so it was only supposed to write
305                 // to [0, 0]. But the last copy wrote to [9, 24], which is 24
306                 // extra bytes in dst *beyond* the end of the copy, which is
307                 // guaranteed by the conditional above.
308                 let mut dstp = self.dst.as_mut_ptr().offset(self.d as isize);
309                 let mut srcp = dstp.offset(-(offset as isize));
310                 loop {
311                     let diff = (dstp as isize) - (srcp as isize);
312                     if diff >= 16 {
313                         break;
314                     }
315                     // srcp and dstp can overlap, so use ptr::copy.
316                     debug_assert!(self.d + 16 <= self.dst.len());
317                     ptr::copy(srcp, dstp, 16);
318                     self.d += diff as usize;
319                     dstp = dstp.offset(diff);
320                 }
321                 while self.d < end {
322                     ptr::copy_nonoverlapping(srcp, dstp, 16);
323                     srcp = srcp.offset(16);
324                     dstp = dstp.offset(16);
325                     self.d += 16;
326                 }
327                 // At this point, `d` is likely wrong. We correct it before
328                 // returning. It's correct value is `end`.
329             }
330         } else {
331             if end > self.dst.len() {
332                 return Err(Error::CopyWrite {
333                     len: len as u64,
334                     dst_len: (self.dst.len() - self.d) as u64,
335                 });
336             }
337             // Finally, the slow byte-by-byte case, which should only be used
338             // for the last few bytes of decompression.
339             while self.d != end {
340                 self.dst[self.d] = self.dst[self.d - offset];
341                 self.d += 1;
342             }
343         }
344         self.d = end;
345         Ok(())
346     }
347 }
348 
349 /// Header represents the single varint that starts every Snappy compressed
350 /// block.
351 #[derive(Debug)]
352 struct Header {
353     /// The length of the header in bytes (i.e., the varint).
354     len: usize,
355     /// The length of the original decompressed input in bytes.
356     decompress_len: usize,
357 }
358 
359 impl Header {
360     /// Reads the varint header from the given input.
361     ///
362     /// If there was a problem reading the header then an error is returned.
363     /// If a header is returned then it is guaranteed to be valid.
364     #[inline(always)]
read(input: &[u8]) -> Result<Header>365     fn read(input: &[u8]) -> Result<Header> {
366         let (decompress_len, header_len) = read_varu64(input);
367         if header_len == 0 {
368             return Err(Error::Header);
369         }
370         if decompress_len > MAX_INPUT_SIZE {
371             return Err(Error::TooBig {
372                 given: decompress_len as u64,
373                 max: MAX_INPUT_SIZE,
374             });
375         }
376         Ok(Header { len: header_len, decompress_len: decompress_len as usize })
377     }
378 }
379 
380 /// A lookup table for quickly computing the various attributes derived from
381 /// a tag byte. The attributes are most useful for the three "copy" tags
382 /// and include the length of the copy, part of the offset (for copy 1-byte
383 /// only) and the total number of bytes proceding the tag byte that encode
384 /// the other part of the offset (1 for copy 1, 2 for copy 2 and 4 for copy 4).
385 ///
386 /// More specifically, the keys of the table are u8s and the values are u16s.
387 /// The bits of the values are laid out as follows:
388 ///
389 /// xxaa abbb xxcc cccc
390 ///
391 /// Where `a` is the number of bytes, `b` are the three bits of the offset
392 /// for copy 1 (the other 8 bits are in the byte proceding the tag byte; for
393 /// copy 2 and copy 4, `b = 0`), and `c` is the length of the copy (max of 64).
394 ///
395 /// We could pack this in fewer bits, but the position of the three `b` bits
396 /// lines up with the most significant three bits in the total offset for copy
397 /// 1, which avoids an extra shift instruction.
398 ///
399 /// In sum, this table is useful because it reduces branches and various
400 /// arithmetic operations.
401 struct TagLookupTable([u16; 256]);
402 
403 impl TagLookupTable {
404     /// Look up the tag entry given the tag `byte`.
405     #[inline(always)]
entry(&self, byte: u8) -> TagEntry406     fn entry(&self, byte: u8) -> TagEntry {
407         TagEntry(self.0[byte as usize] as usize)
408     }
409 }
410 
411 /// Represents a single entry in the tag lookup table.
412 ///
413 /// See the documentation in `TagLookupTable` for the bit layout.
414 ///
415 /// The type is a `usize` for convenience.
416 struct TagEntry(usize);
417 
418 impl TagEntry {
419     /// Return the total number of bytes proceding this tag byte required to
420     /// encode the offset.
num_tag_bytes(&self) -> usize421     fn num_tag_bytes(&self) -> usize {
422         self.0 >> 11
423     }
424 
425     /// Return the total copy length, capped at 64.
len(&self) -> usize426     fn len(&self) -> usize {
427         self.0 & 0xFF
428     }
429 
430     /// Return the copy offset corresponding to this copy operation. `s` should
431     /// point to the position just after the tag byte that this entry was read
432     /// from.
433     ///
434     /// This requires reading from the compressed input since the offset is
435     /// encoded in bytes proceding the tag byte.
offset(&self, src: &[u8], s: usize) -> Result<usize>436     fn offset(&self, src: &[u8], s: usize) -> Result<usize> {
437         let num_tag_bytes = self.num_tag_bytes();
438         let trailer =
439             // It is critical for this case to come first, since it is the
440             // fast path. We really hope that this case gets branch
441             // predicted.
442             if s + 4 <= src.len() {
443                 unsafe {
444                     // SAFETY: The conditional above guarantees that
445                     // src[s..s+4] is valid to read from.
446                     let p = src.as_ptr().offset(s as isize);
447                     // We use WORD_MASK here to mask out the bits we don't
448                     // need. While we're guaranteed to read 4 valid bytes,
449                     // not all of those bytes are necessarily part of the
450                     // offset. This is the key optimization: we don't need to
451                     // branch on num_tag_bytes.
452                     loadu32_le(p) as usize & WORD_MASK[num_tag_bytes]
453                 }
454             } else if num_tag_bytes == 1 {
455                 if s >= src.len() {
456                     return Err(Error::CopyRead {
457                         len: 1,
458                         src_len: (src.len() - s) as u64,
459                     });
460                 }
461                 src[s] as usize
462             } else if num_tag_bytes == 2 {
463                 if s + 1 >= src.len() {
464                     return Err(Error::CopyRead {
465                         len: 2,
466                         src_len: (src.len() - s) as u64,
467                     });
468                 }
469                 LE::read_u16(&src[s..]) as usize
470             } else {
471                 return Err(Error::CopyRead {
472                     len: num_tag_bytes as u64,
473                     src_len: (src.len() - s) as u64,
474                 });
475             };
476         Ok((self.0 & 0b0000_0111_0000_0000) | trailer)
477     }
478 }
479 
480 /// Loads a little endian encoded u32 from data.
481 ///
482 /// This is unsafe because `data` must point to some memory of size at least 4.
483 #[inline(always)]
loadu32_le(data: *const u8) -> u32484 unsafe fn loadu32_le(data: *const u8) -> u32 {
485     let mut n: u32 = 0;
486     ptr::copy_nonoverlapping(
487         data,
488         &mut n as *mut u32 as *mut u8,
489         4);
490     n.to_le()
491 }
492