1 // Copyright 2005 Google Inc. All Rights Reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 #include "snappy.h"
30 #include "snappy-internal.h"
31 #include "snappy-sinksource.h"
32 
33 #include <stdio.h>
34 
35 #include <algorithm>
36 #include <string>
37 #include <vector>
38 
39 
40 namespace snappy {
41 
42 // Any hash function will produce a valid compressed bitstream, but a good
43 // hash function reduces the number of collisions and thus yields better
44 // compression for compressible input, and more speed for incompressible
45 // input. Of course, it doesn't hurt if the hash function is reasonably fast
46 // either, as it gets called a lot.
HashBytes(uint32 bytes,int shift)47 static inline uint32 HashBytes(uint32 bytes, int shift) {
48   uint32 kMul = 0x1e35a7bd;
49   return (bytes * kMul) >> shift;
50 }
Hash(const char * p,int shift)51 static inline uint32 Hash(const char* p, int shift) {
52   return HashBytes(UNALIGNED_LOAD32(p), shift);
53 }
54 
MaxCompressedLength(size_t source_len)55 size_t MaxCompressedLength(size_t source_len) {
56   // Compressed data can be defined as:
57   //    compressed := item* literal*
58   //    item       := literal* copy
59   //
60   // The trailing literal sequence has a space blowup of at most 62/60
61   // since a literal of length 60 needs one tag byte + one extra byte
62   // for length information.
63   //
64   // Item blowup is trickier to measure.  Suppose the "copy" op copies
65   // 4 bytes of data.  Because of a special check in the encoding code,
66   // we produce a 4-byte copy only if the offset is < 65536.  Therefore
67   // the copy op takes 3 bytes to encode, and this type of item leads
68   // to at most the 62/60 blowup for representing literals.
69   //
70   // Suppose the "copy" op copies 5 bytes of data.  If the offset is big
71   // enough, it will take 5 bytes to encode the copy op.  Therefore the
72   // worst case here is a one-byte literal followed by a five-byte copy.
73   // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
74   //
75   // This last factor dominates the blowup, so the final estimate is:
76   return 32 + source_len + source_len/6;
77 }
78 
79 enum {
80   LITERAL = 0,
81   COPY_1_BYTE_OFFSET = 1,  // 3 bit length + 3 bits of offset in opcode
82   COPY_2_BYTE_OFFSET = 2,
83   COPY_4_BYTE_OFFSET = 3
84 };
85 static const int kMaximumTagLength = 5;  // COPY_4_BYTE_OFFSET plus the actual offset.
86 
87 // Copy "len" bytes from "src" to "op", one byte at a time.  Used for
88 // handling COPY operations where the input and output regions may
89 // overlap.  For example, suppose:
90 //    src    == "ab"
91 //    op     == src + 2
92 //    len    == 20
93 // After IncrementalCopy(src, op, len), the result will have
94 // eleven copies of "ab"
95 //    ababababababababababab
96 // Note that this does not match the semantics of either memcpy()
97 // or memmove().
IncrementalCopy(const char * src,char * op,ssize_t len)98 static inline void IncrementalCopy(const char* src, char* op, ssize_t len) {
99   assert(len > 0);
100   do {
101     *op++ = *src++;
102   } while (--len > 0);
103 }
104 
105 // Equivalent to IncrementalCopy except that it can write up to ten extra
106 // bytes after the end of the copy, and that it is faster.
107 //
108 // The main part of this loop is a simple copy of eight bytes at a time until
109 // we've copied (at least) the requested amount of bytes.  However, if op and
110 // src are less than eight bytes apart (indicating a repeating pattern of
111 // length < 8), we first need to expand the pattern in order to get the correct
112 // results. For instance, if the buffer looks like this, with the eight-byte
113 // <src> and <op> patterns marked as intervals:
114 //
115 //    abxxxxxxxxxxxx
116 //    [------]           src
117 //      [------]         op
118 //
119 // a single eight-byte copy from <src> to <op> will repeat the pattern once,
120 // after which we can move <op> two bytes without moving <src>:
121 //
122 //    ababxxxxxxxxxx
123 //    [------]           src
124 //        [------]       op
125 //
126 // and repeat the exercise until the two no longer overlap.
127 //
128 // This allows us to do very well in the special case of one single byte
129 // repeated many times, without taking a big hit for more general cases.
130 //
131 // The worst case of extra writing past the end of the match occurs when
132 // op - src == 1 and len == 1; the last copy will read from byte positions
133 // [0..7] and write to [4..11], whereas it was only supposed to write to
134 // position 1. Thus, ten excess bytes.
135 
136 namespace {
137 
138 const int kMaxIncrementCopyOverflow = 10;
139 
IncrementalCopyFastPath(const char * src,char * op,ssize_t len)140 inline void IncrementalCopyFastPath(const char* src, char* op, ssize_t len) {
141   while (op - src < 8) {
142     UnalignedCopy64(src, op);
143     len -= op - src;
144     op += op - src;
145   }
146   while (len > 0) {
147     UnalignedCopy64(src, op);
148     src += 8;
149     op += 8;
150     len -= 8;
151   }
152 }
153 
154 }  // namespace
155 
EmitLiteral(char * op,const char * literal,int len,bool allow_fast_path)156 static inline char* EmitLiteral(char* op,
157                                 const char* literal,
158                                 int len,
159                                 bool allow_fast_path) {
160   int n = len - 1;      // Zero-length literals are disallowed
161   if (n < 60) {
162     // Fits in tag byte
163     *op++ = LITERAL | (n << 2);
164 
165     // The vast majority of copies are below 16 bytes, for which a
166     // call to memcpy is overkill. This fast path can sometimes
167     // copy up to 15 bytes too much, but that is okay in the
168     // main loop, since we have a bit to go on for both sides:
169     //
170     //   - The input will always have kInputMarginBytes = 15 extra
171     //     available bytes, as long as we're in the main loop, and
172     //     if not, allow_fast_path = false.
173     //   - The output will always have 32 spare bytes (see
174     //     MaxCompressedLength).
175     if (allow_fast_path && len <= 16) {
176       UnalignedCopy64(literal, op);
177       UnalignedCopy64(literal + 8, op + 8);
178       return op + len;
179     }
180   } else {
181     // Encode in upcoming bytes
182     char* base = op;
183     int count = 0;
184     op++;
185     while (n > 0) {
186       *op++ = n & 0xff;
187       n >>= 8;
188       count++;
189     }
190     assert(count >= 1);
191     assert(count <= 4);
192     *base = LITERAL | ((59+count) << 2);
193   }
194   memcpy(op, literal, len);
195   return op + len;
196 }
197 
EmitCopyLessThan64(char * op,size_t offset,int len)198 static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
199   assert(len <= 64);
200   assert(len >= 4);
201   assert(offset < 65536);
202 
203   if ((len < 12) && (offset < 2048)) {
204     size_t len_minus_4 = len - 4;
205     assert(len_minus_4 < 8);            // Must fit in 3 bits
206     *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5);
207     *op++ = offset & 0xff;
208   } else {
209     *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
210     LittleEndian::Store16(op, offset);
211     op += 2;
212   }
213   return op;
214 }
215 
EmitCopy(char * op,size_t offset,int len)216 static inline char* EmitCopy(char* op, size_t offset, int len) {
217   // Emit 64 byte copies but make sure to keep at least four bytes reserved
218   while (len >= 68) {
219     op = EmitCopyLessThan64(op, offset, 64);
220     len -= 64;
221   }
222 
223   // Emit an extra 60 byte copy if have too much data to fit in one copy
224   if (len > 64) {
225     op = EmitCopyLessThan64(op, offset, 60);
226     len -= 60;
227   }
228 
229   // Emit remainder
230   op = EmitCopyLessThan64(op, offset, len);
231   return op;
232 }
233 
234 
GetUncompressedLength(const char * start,size_t n,size_t * result)235 bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
236   uint32 v = 0;
237   const char* limit = start + n;
238   if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
239     *result = v;
240     return true;
241   } else {
242     return false;
243   }
244 }
245 
246 namespace internal {
GetHashTable(size_t input_size,int * table_size)247 uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
248   // Use smaller hash table when input.size() is smaller, since we
249   // fill the table, incurring O(hash table size) overhead for
250   // compression, and if the input is short, we won't need that
251   // many hash table entries anyway.
252   assert(kMaxHashTableSize >= 256);
253   size_t htsize = 256;
254   while (htsize < kMaxHashTableSize && htsize < input_size) {
255     htsize <<= 1;
256   }
257 
258   uint16* table;
259   if (htsize <= ARRAYSIZE(small_table_)) {
260     table = small_table_;
261   } else {
262     if (large_table_ == NULL) {
263       large_table_ = new uint16[kMaxHashTableSize];
264     }
265     table = large_table_;
266   }
267 
268   *table_size = htsize;
269   memset(table, 0, htsize * sizeof(*table));
270   return table;
271 }
272 }  // end namespace internal
273 
274 // For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
275 // equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have
276 // empirically found that overlapping loads such as
277 //  UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
278 // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
279 //
280 // We have different versions for 64- and 32-bit; ideally we would avoid the
281 // two functions and just inline the UNALIGNED_LOAD64 call into
282 // GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
283 // enough to avoid loading the value multiple times then. For 64-bit, the load
284 // is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
285 // done at GetUint32AtOffset() time.
286 
287 #ifdef ARCH_K8
288 
289 typedef uint64 EightBytesReference;
290 
GetEightBytesAt(const char * ptr)291 static inline EightBytesReference GetEightBytesAt(const char* ptr) {
292   return UNALIGNED_LOAD64(ptr);
293 }
294 
GetUint32AtOffset(uint64 v,int offset)295 static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
296   assert(offset >= 0);
297   assert(offset <= 4);
298   return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
299 }
300 
301 #else
302 
303 typedef const char* EightBytesReference;
304 
GetEightBytesAt(const char * ptr)305 static inline EightBytesReference GetEightBytesAt(const char* ptr) {
306   return ptr;
307 }
308 
GetUint32AtOffset(const char * v,int offset)309 static inline uint32 GetUint32AtOffset(const char* v, int offset) {
310   assert(offset >= 0);
311   assert(offset <= 4);
312   return UNALIGNED_LOAD32(v + offset);
313 }
314 
315 #endif
316 
317 // Flat array compression that does not emit the "uncompressed length"
318 // prefix. Compresses "input" string to the "*op" buffer.
319 //
320 // REQUIRES: "input" is at most "kBlockSize" bytes long.
321 // REQUIRES: "op" points to an array of memory that is at least
322 // "MaxCompressedLength(input.size())" in size.
323 // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
324 // REQUIRES: "table_size" is a power of two
325 //
326 // Returns an "end" pointer into "op" buffer.
327 // "end - op" is the compressed size of "input".
328 namespace internal {
CompressFragment(const char * input,size_t input_size,char * op,uint16 * table,const int table_size)329 char* CompressFragment(const char* input,
330                        size_t input_size,
331                        char* op,
332                        uint16* table,
333                        const int table_size) {
334   // "ip" is the input pointer, and "op" is the output pointer.
335   const char* ip = input;
336   assert(input_size <= kBlockSize);
337   assert((table_size & (table_size - 1)) == 0); // table must be power of two
338   const int shift = 32 - Bits::Log2Floor(table_size);
339   assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
340   const char* ip_end = input + input_size;
341   const char* base_ip = ip;
342   // Bytes in [next_emit, ip) will be emitted as literal bytes.  Or
343   // [next_emit, ip_end) after the main loop.
344   const char* next_emit = ip;
345 
346   const size_t kInputMarginBytes = 15;
347   if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
348     const char* ip_limit = input + input_size - kInputMarginBytes;
349 
350     for (uint32 next_hash = Hash(++ip, shift); ; ) {
351       assert(next_emit < ip);
352       // The body of this loop calls EmitLiteral once and then EmitCopy one or
353       // more times.  (The exception is that when we're close to exhausting
354       // the input we goto emit_remainder.)
355       //
356       // In the first iteration of this loop we're just starting, so
357       // there's nothing to copy, so calling EmitLiteral once is
358       // necessary.  And we only start a new iteration when the
359       // current iteration has determined that a call to EmitLiteral will
360       // precede the next call to EmitCopy (if any).
361       //
362       // Step 1: Scan forward in the input looking for a 4-byte-long match.
363       // If we get close to exhausting the input then goto emit_remainder.
364       //
365       // Heuristic match skipping: If 32 bytes are scanned with no matches
366       // found, start looking only at every other byte. If 32 more bytes are
367       // scanned, look at every third byte, etc.. When a match is found,
368       // immediately go back to looking at every byte. This is a small loss
369       // (~5% performance, ~0.1% density) for compressible data due to more
370       // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
371       // win since the compressor quickly "realizes" the data is incompressible
372       // and doesn't bother looking for matches everywhere.
373       //
374       // The "skip" variable keeps track of how many bytes there are since the
375       // last match; dividing it by 32 (ie. right-shifting by five) gives the
376       // number of bytes to move ahead for each iteration.
377       uint32 skip = 32;
378 
379       const char* next_ip = ip;
380       const char* candidate;
381       do {
382         ip = next_ip;
383         uint32 hash = next_hash;
384         assert(hash == Hash(ip, shift));
385         uint32 bytes_between_hash_lookups = skip++ >> 5;
386         next_ip = ip + bytes_between_hash_lookups;
387         if (PREDICT_FALSE(next_ip > ip_limit)) {
388           goto emit_remainder;
389         }
390         next_hash = Hash(next_ip, shift);
391         candidate = base_ip + table[hash];
392         assert(candidate >= base_ip);
393         assert(candidate < ip);
394 
395         table[hash] = ip - base_ip;
396       } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
397                             UNALIGNED_LOAD32(candidate)));
398 
399       // Step 2: A 4-byte match has been found.  We'll later see if more
400       // than 4 bytes match.  But, prior to the match, input
401       // bytes [next_emit, ip) are unmatched.  Emit them as "literal bytes."
402       assert(next_emit + 16 <= ip_end);
403       op = EmitLiteral(op, next_emit, ip - next_emit, true);
404 
405       // Step 3: Call EmitCopy, and then see if another EmitCopy could
406       // be our next move.  Repeat until we find no match for the
407       // input immediately after what was consumed by the last EmitCopy call.
408       //
409       // If we exit this loop normally then we need to call EmitLiteral next,
410       // though we don't yet know how big the literal will be.  We handle that
411       // by proceeding to the next iteration of the main loop.  We also can exit
412       // this loop via goto if we get close to exhausting the input.
413       EightBytesReference input_bytes;
414       uint32 candidate_bytes = 0;
415 
416       do {
417         // We have a 4-byte match at ip, and no need to emit any
418         // "literal bytes" prior to ip.
419         const char* base = ip;
420         int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
421         ip += matched;
422         size_t offset = base - candidate;
423         assert(0 == memcmp(base, candidate, matched));
424         op = EmitCopy(op, offset, matched);
425         // We could immediately start working at ip now, but to improve
426         // compression we first update table[Hash(ip - 1, ...)].
427         const char* insert_tail = ip - 1;
428         next_emit = ip;
429         if (PREDICT_FALSE(ip >= ip_limit)) {
430           goto emit_remainder;
431         }
432         input_bytes = GetEightBytesAt(insert_tail);
433         uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
434         table[prev_hash] = ip - base_ip - 1;
435         uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
436         candidate = base_ip + table[cur_hash];
437         candidate_bytes = UNALIGNED_LOAD32(candidate);
438         table[cur_hash] = ip - base_ip;
439       } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
440 
441       next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
442       ++ip;
443     }
444   }
445 
446  emit_remainder:
447   // Emit the remaining bytes as a literal
448   if (next_emit < ip_end) {
449     op = EmitLiteral(op, next_emit, ip_end - next_emit, false);
450   }
451 
452   return op;
453 }
454 }  // end namespace internal
455 
456 // Signature of output types needed by decompression code.
457 // The decompression code is templatized on a type that obeys this
458 // signature so that we do not pay virtual function call overhead in
459 // the middle of a tight decompression loop.
460 //
461 // class DecompressionWriter {
462 //  public:
463 //   // Called before decompression
464 //   void SetExpectedLength(size_t length);
465 //
466 //   // Called after decompression
467 //   bool CheckLength() const;
468 //
469 //   // Called repeatedly during decompression
470 //   bool Append(const char* ip, size_t length);
471 //   bool AppendFromSelf(uint32 offset, size_t length);
472 //
473 //   // The rules for how TryFastAppend differs from Append are somewhat
474 //   // convoluted:
475 //   //
476 //   //  - TryFastAppend is allowed to decline (return false) at any
477 //   //    time, for any reason -- just "return false" would be
478 //   //    a perfectly legal implementation of TryFastAppend.
479 //   //    The intention is for TryFastAppend to allow a fast path
480 //   //    in the common case of a small append.
481 //   //  - TryFastAppend is allowed to read up to <available> bytes
482 //   //    from the input buffer, whereas Append is allowed to read
483 //   //    <length>. However, if it returns true, it must leave
484 //   //    at least five (kMaximumTagLength) bytes in the input buffer
485 //   //    afterwards, so that there is always enough space to read the
486 //   //    next tag without checking for a refill.
487 //   //  - TryFastAppend must always return decline (return false)
488 //   //    if <length> is 61 or more, as in this case the literal length is not
489 //   //    decoded fully. In practice, this should not be a big problem,
490 //   //    as it is unlikely that one would implement a fast path accepting
491 //   //    this much data.
492 //   //
493 //   bool TryFastAppend(const char* ip, size_t available, size_t length);
494 // };
495 
496 // -----------------------------------------------------------------------
497 // Lookup table for decompression code.  Generated by ComputeTable() below.
498 // -----------------------------------------------------------------------
499 
500 // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
501 static const uint32 wordmask[] = {
502   0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
503 };
504 
505 // Data stored per entry in lookup table:
506 //      Range   Bits-used       Description
507 //      ------------------------------------
508 //      1..64   0..7            Literal/copy length encoded in opcode byte
509 //      0..7    8..10           Copy offset encoded in opcode byte / 256
510 //      0..4    11..13          Extra bytes after opcode
511 //
512 // We use eight bits for the length even though 7 would have sufficed
513 // because of efficiency reasons:
514 //      (1) Extracting a byte is faster than a bit-field
515 //      (2) It properly aligns copy offset so we do not need a <<8
516 static const uint16 char_table[256] = {
517   0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
518   0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
519   0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
520   0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
521   0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
522   0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
523   0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
524   0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
525   0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
526   0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
527   0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
528   0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
529   0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
530   0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
531   0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
532   0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
533   0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
534   0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
535   0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
536   0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
537   0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
538   0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
539   0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
540   0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
541   0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
542   0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
543   0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
544   0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
545   0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
546   0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
547   0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
548   0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
549 };
550 
551 // In debug mode, allow optional computation of the table at startup.
552 // Also, check that the decompression table is correct.
553 #ifndef NDEBUG
554 DEFINE_bool(snappy_dump_decompression_table, false,
555             "If true, we print the decompression table at startup.");
556 
MakeEntry(unsigned int extra,unsigned int len,unsigned int copy_offset)557 static uint16 MakeEntry(unsigned int extra,
558                         unsigned int len,
559                         unsigned int copy_offset) {
560   // Check that all of the fields fit within the allocated space
561   assert(extra       == (extra & 0x7));          // At most 3 bits
562   assert(copy_offset == (copy_offset & 0x7));    // At most 3 bits
563   assert(len         == (len & 0x7f));           // At most 7 bits
564   return len | (copy_offset << 8) | (extra << 11);
565 }
566 
ComputeTable()567 static void ComputeTable() {
568   uint16 dst[256];
569 
570   // Place invalid entries in all places to detect missing initialization
571   int assigned = 0;
572   for (int i = 0; i < 256; i++) {
573     dst[i] = 0xffff;
574   }
575 
576   // Small LITERAL entries.  We store (len-1) in the top 6 bits.
577   for (unsigned int len = 1; len <= 60; len++) {
578     dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
579     assigned++;
580   }
581 
582   // Large LITERAL entries.  We use 60..63 in the high 6 bits to
583   // encode the number of bytes of length info that follow the opcode.
584   for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
585     // We set the length field in the lookup table to 1 because extra
586     // bytes encode len-1.
587     dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
588     assigned++;
589   }
590 
591   // COPY_1_BYTE_OFFSET.
592   //
593   // The tag byte in the compressed data stores len-4 in 3 bits, and
594   // offset/256 in 5 bits.  offset%256 is stored in the next byte.
595   //
596   // This format is used for length in range [4..11] and offset in
597   // range [0..2047]
598   for (unsigned int len = 4; len < 12; len++) {
599     for (unsigned int offset = 0; offset < 2048; offset += 256) {
600       dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
601         MakeEntry(1, len, offset>>8);
602       assigned++;
603     }
604   }
605 
606   // COPY_2_BYTE_OFFSET.
607   // Tag contains len-1 in top 6 bits, and offset in next two bytes.
608   for (unsigned int len = 1; len <= 64; len++) {
609     dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
610     assigned++;
611   }
612 
613   // COPY_4_BYTE_OFFSET.
614   // Tag contents len-1 in top 6 bits, and offset in next four bytes.
615   for (unsigned int len = 1; len <= 64; len++) {
616     dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
617     assigned++;
618   }
619 
620   // Check that each entry was initialized exactly once.
621   if (assigned != 256) {
622     fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
623     abort();
624   }
625   for (int i = 0; i < 256; i++) {
626     if (dst[i] == 0xffff) {
627       fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
628       abort();
629     }
630   }
631 
632   if (FLAGS_snappy_dump_decompression_table) {
633     printf("static const uint16 char_table[256] = {\n  ");
634     for (int i = 0; i < 256; i++) {
635       printf("0x%04x%s",
636              dst[i],
637              ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n  " : ", ")));
638     }
639     printf("};\n");
640   }
641 
642   // Check that computed table matched recorded table
643   for (int i = 0; i < 256; i++) {
644     if (dst[i] != char_table[i]) {
645       fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
646               i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
647       abort();
648     }
649   }
650 }
651 #endif /* !NDEBUG */
652 
653 // Helper class for decompression
654 class SnappyDecompressor {
655  private:
656   Source*       reader_;         // Underlying source of bytes to decompress
657   const char*   ip_;             // Points to next buffered byte
658   const char*   ip_limit_;       // Points just past buffered bytes
659   uint32        peeked_;         // Bytes peeked from reader (need to skip)
660   bool          eof_;            // Hit end of input without an error?
661   char          scratch_[kMaximumTagLength];  // See RefillTag().
662 
663   // Ensure that all of the tag metadata for the next tag is available
664   // in [ip_..ip_limit_-1].  Also ensures that [ip,ip+4] is readable even
665   // if (ip_limit_ - ip_ < 5).
666   //
667   // Returns true on success, false on error or end of input.
668   bool RefillTag();
669 
670  public:
SnappyDecompressor(Source * reader)671   explicit SnappyDecompressor(Source* reader)
672       : reader_(reader),
673         ip_(NULL),
674         ip_limit_(NULL),
675         peeked_(0),
676         eof_(false) {
677   }
678 
~SnappyDecompressor()679   ~SnappyDecompressor() {
680     // Advance past any bytes we peeked at from the reader
681     reader_->Skip(peeked_);
682   }
683 
684   // Returns true iff we have hit the end of the input without an error.
eof() const685   bool eof() const {
686     return eof_;
687   }
688 
689   // Read the uncompressed length stored at the start of the compressed data.
690   // On succcess, stores the length in *result and returns true.
691   // On failure, returns false.
ReadUncompressedLength(uint32 * result)692   bool ReadUncompressedLength(uint32* result) {
693     assert(ip_ == NULL);       // Must not have read anything yet
694     // Length is encoded in 1..5 bytes
695     *result = 0;
696     uint32 shift = 0;
697     while (true) {
698       if (shift >= 32) return false;
699       size_t n;
700       const char* ip = reader_->Peek(&n);
701       if (n == 0) return false;
702       const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
703       reader_->Skip(1);
704       *result |= static_cast<uint32>(c & 0x7f) << shift;
705       if (c < 128) {
706         break;
707       }
708       shift += 7;
709     }
710     return true;
711   }
712 
713   // Process the next item found in the input.
714   // Returns true if successful, false on error or end of input.
715   template <class Writer>
DecompressAllTags(Writer * writer)716   void DecompressAllTags(Writer* writer) {
717     const char* ip = ip_;
718 
719     // We could have put this refill fragment only at the beginning of the loop.
720     // However, duplicating it at the end of each branch gives the compiler more
721     // scope to optimize the <ip_limit_ - ip> expression based on the local
722     // context, which overall increases speed.
723     #define MAYBE_REFILL() \
724         if (ip_limit_ - ip < kMaximumTagLength) { \
725           ip_ = ip; \
726           if (!RefillTag()) return; \
727           ip = ip_; \
728         }
729 
730     MAYBE_REFILL();
731     for ( ;; ) {
732       const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
733 
734       if ((c & 0x3) == LITERAL) {
735         size_t literal_length = (c >> 2) + 1u;
736         if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
737           assert(literal_length < 61);
738           ip += literal_length;
739           // NOTE(user): There is no MAYBE_REFILL() here, as TryFastAppend()
740           // will not return true unless there's already at least five spare
741           // bytes in addition to the literal.
742           continue;
743         }
744         if (PREDICT_FALSE(literal_length >= 61)) {
745           // Long literal.
746           const size_t literal_length_length = literal_length - 60;
747           literal_length =
748               (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
749           ip += literal_length_length;
750         }
751 
752         size_t avail = ip_limit_ - ip;
753         while (avail < literal_length) {
754           if (!writer->Append(ip, avail)) return;
755           literal_length -= avail;
756           reader_->Skip(peeked_);
757           size_t n;
758           ip = reader_->Peek(&n);
759           avail = n;
760           peeked_ = avail;
761           if (avail == 0) return;  // Premature end of input
762           ip_limit_ = ip + avail;
763         }
764         if (!writer->Append(ip, literal_length)) {
765           return;
766         }
767         ip += literal_length;
768         MAYBE_REFILL();
769       } else {
770         const uint32 entry = char_table[c];
771         const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
772         const uint32 length = entry & 0xff;
773         ip += entry >> 11;
774 
775         // copy_offset/256 is encoded in bits 8..10.  By just fetching
776         // those bits, we get copy_offset (since the bit-field starts at
777         // bit 8).
778         const uint32 copy_offset = entry & 0x700;
779         if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
780           return;
781         }
782         MAYBE_REFILL();
783       }
784     }
785 
786 #undef MAYBE_REFILL
787   }
788 };
789 
RefillTag()790 bool SnappyDecompressor::RefillTag() {
791   const char* ip = ip_;
792   if (ip == ip_limit_) {
793     // Fetch a new fragment from the reader
794     reader_->Skip(peeked_);   // All peeked bytes are used up
795     size_t n;
796     ip = reader_->Peek(&n);
797     peeked_ = n;
798     if (n == 0) {
799       eof_ = true;
800       return false;
801     }
802     ip_limit_ = ip + n;
803   }
804 
805   // Read the tag character
806   assert(ip < ip_limit_);
807   const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
808   const uint32 entry = char_table[c];
809   const uint32 needed = (entry >> 11) + 1;  // +1 byte for 'c'
810   assert(needed <= sizeof(scratch_));
811 
812   // Read more bytes from reader if needed
813   uint32 nbuf = ip_limit_ - ip;
814   if (nbuf < needed) {
815     // Stitch together bytes from ip and reader to form the word
816     // contents.  We store the needed bytes in "scratch_".  They
817     // will be consumed immediately by the caller since we do not
818     // read more than we need.
819     memmove(scratch_, ip, nbuf);
820     reader_->Skip(peeked_);  // All peeked bytes are used up
821     peeked_ = 0;
822     while (nbuf < needed) {
823       size_t length;
824       const char* src = reader_->Peek(&length);
825       if (length == 0) return false;
826       uint32 to_add = min<uint32>(needed - nbuf, length);
827       memcpy(scratch_ + nbuf, src, to_add);
828       nbuf += to_add;
829       reader_->Skip(to_add);
830     }
831     assert(nbuf == needed);
832     ip_ = scratch_;
833     ip_limit_ = scratch_ + needed;
834   } else if (nbuf < kMaximumTagLength) {
835     // Have enough bytes, but move into scratch_ so that we do not
836     // read past end of input
837     memmove(scratch_, ip, nbuf);
838     reader_->Skip(peeked_);  // All peeked bytes are used up
839     peeked_ = 0;
840     ip_ = scratch_;
841     ip_limit_ = scratch_ + nbuf;
842   } else {
843     // Pass pointer to buffer returned by reader_.
844     ip_ = ip;
845   }
846   return true;
847 }
848 
849 template <typename Writer>
InternalUncompress(Source * r,Writer * writer)850 static bool InternalUncompress(Source* r, Writer* writer) {
851   // Read the uncompressed length from the front of the compressed input
852   SnappyDecompressor decompressor(r);
853   uint32 uncompressed_len = 0;
854   if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
855   return InternalUncompressAllTags(&decompressor, writer, uncompressed_len);
856 }
857 
858 template <typename Writer>
InternalUncompressAllTags(SnappyDecompressor * decompressor,Writer * writer,uint32 uncompressed_len)859 static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
860                                       Writer* writer,
861                                       uint32 uncompressed_len) {
862   writer->SetExpectedLength(uncompressed_len);
863 
864   // Process the entire input
865   decompressor->DecompressAllTags(writer);
866   return (decompressor->eof() && writer->CheckLength());
867 }
868 
GetUncompressedLength(Source * source,uint32 * result)869 bool GetUncompressedLength(Source* source, uint32* result) {
870   SnappyDecompressor decompressor(source);
871   return decompressor.ReadUncompressedLength(result);
872 }
873 
Compress(Source * reader,Sink * writer)874 size_t Compress(Source* reader, Sink* writer) {
875   size_t written = 0;
876   size_t N = reader->Available();
877   char ulength[Varint::kMax32];
878   char* p = Varint::Encode32(ulength, N);
879   writer->Append(ulength, p-ulength);
880   written += (p - ulength);
881 
882   internal::WorkingMemory wmem;
883   char* scratch = NULL;
884   char* scratch_output = NULL;
885 
886   while (N > 0) {
887     // Get next block to compress (without copying if possible)
888     size_t fragment_size;
889     const char* fragment = reader->Peek(&fragment_size);
890     assert(fragment_size != 0);  // premature end of input
891     const size_t num_to_read = min(N, kBlockSize);
892     size_t bytes_read = fragment_size;
893 
894     size_t pending_advance = 0;
895     if (bytes_read >= num_to_read) {
896       // Buffer returned by reader is large enough
897       pending_advance = num_to_read;
898       fragment_size = num_to_read;
899     } else {
900       // Read into scratch buffer
901       if (scratch == NULL) {
902         // If this is the last iteration, we want to allocate N bytes
903         // of space, otherwise the max possible kBlockSize space.
904         // num_to_read contains exactly the correct value
905         scratch = new char[num_to_read];
906       }
907       memcpy(scratch, fragment, bytes_read);
908       reader->Skip(bytes_read);
909 
910       while (bytes_read < num_to_read) {
911         fragment = reader->Peek(&fragment_size);
912         size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
913         memcpy(scratch + bytes_read, fragment, n);
914         bytes_read += n;
915         reader->Skip(n);
916       }
917       assert(bytes_read == num_to_read);
918       fragment = scratch;
919       fragment_size = num_to_read;
920     }
921     assert(fragment_size == num_to_read);
922 
923     // Get encoding table for compression
924     int table_size;
925     uint16* table = wmem.GetHashTable(num_to_read, &table_size);
926 
927     // Compress input_fragment and append to dest
928     const int max_output = MaxCompressedLength(num_to_read);
929 
930     // Need a scratch buffer for the output, in case the byte sink doesn't
931     // have room for us directly.
932     if (scratch_output == NULL) {
933       scratch_output = new char[max_output];
934     } else {
935       // Since we encode kBlockSize regions followed by a region
936       // which is <= kBlockSize in length, a previously allocated
937       // scratch_output[] region is big enough for this iteration.
938     }
939     char* dest = writer->GetAppendBuffer(max_output, scratch_output);
940     char* end = internal::CompressFragment(fragment, fragment_size,
941                                            dest, table, table_size);
942     writer->Append(dest, end - dest);
943     written += (end - dest);
944 
945     N -= num_to_read;
946     reader->Skip(pending_advance);
947   }
948 
949   delete[] scratch;
950   delete[] scratch_output;
951 
952   return written;
953 }
954 
955 // -----------------------------------------------------------------------
956 // IOVec interfaces
957 // -----------------------------------------------------------------------
958 
959 // A type that writes to an iovec.
960 // Note that this is not a "ByteSink", but a type that matches the
961 // Writer template argument to SnappyDecompressor::DecompressAllTags().
962 class SnappyIOVecWriter {
963  private:
964   const struct iovec* output_iov_;
965   const size_t output_iov_count_;
966 
967   // We are currently writing into output_iov_[curr_iov_index_].
968   int curr_iov_index_;
969 
970   // Bytes written to output_iov_[curr_iov_index_] so far.
971   size_t curr_iov_written_;
972 
973   // Total bytes decompressed into output_iov_ so far.
974   size_t total_written_;
975 
976   // Maximum number of bytes that will be decompressed into output_iov_.
977   size_t output_limit_;
978 
GetIOVecPointer(int index,size_t offset)979   inline char* GetIOVecPointer(int index, size_t offset) {
980     return reinterpret_cast<char*>(output_iov_[index].iov_base) +
981         offset;
982   }
983 
984  public:
985   // Does not take ownership of iov. iov must be valid during the
986   // entire lifetime of the SnappyIOVecWriter.
SnappyIOVecWriter(const struct iovec * iov,size_t iov_count)987   inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count)
988       : output_iov_(iov),
989         output_iov_count_(iov_count),
990         curr_iov_index_(0),
991         curr_iov_written_(0),
992         total_written_(0),
993         output_limit_(-1) {
994   }
995 
SetExpectedLength(size_t len)996   inline void SetExpectedLength(size_t len) {
997     output_limit_ = len;
998   }
999 
CheckLength() const1000   inline bool CheckLength() const {
1001     return total_written_ == output_limit_;
1002   }
1003 
Append(const char * ip,size_t len)1004   inline bool Append(const char* ip, size_t len) {
1005     if (total_written_ + len > output_limit_) {
1006       return false;
1007     }
1008 
1009     while (len > 0) {
1010       assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
1011       if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) {
1012         // This iovec is full. Go to the next one.
1013         if (curr_iov_index_ + 1 >= output_iov_count_) {
1014           return false;
1015         }
1016         curr_iov_written_ = 0;
1017         ++curr_iov_index_;
1018       }
1019 
1020       const size_t to_write = std::min(
1021           len, output_iov_[curr_iov_index_].iov_len - curr_iov_written_);
1022       memcpy(GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1023              ip,
1024              to_write);
1025       curr_iov_written_ += to_write;
1026       total_written_ += to_write;
1027       ip += to_write;
1028       len -= to_write;
1029     }
1030 
1031     return true;
1032   }
1033 
TryFastAppend(const char * ip,size_t available,size_t len)1034   inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
1035     const size_t space_left = output_limit_ - total_written_;
1036     if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 &&
1037         output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) {
1038       // Fast path, used for the majority (about 95%) of invocations.
1039       char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_);
1040       UnalignedCopy64(ip, ptr);
1041       UnalignedCopy64(ip + 8, ptr + 8);
1042       curr_iov_written_ += len;
1043       total_written_ += len;
1044       return true;
1045     }
1046 
1047     return false;
1048   }
1049 
AppendFromSelf(size_t offset,size_t len)1050   inline bool AppendFromSelf(size_t offset, size_t len) {
1051     if (offset > total_written_ || offset == 0) {
1052       return false;
1053     }
1054     const size_t space_left = output_limit_ - total_written_;
1055     if (len > space_left) {
1056       return false;
1057     }
1058 
1059     // Locate the iovec from which we need to start the copy.
1060     int from_iov_index = curr_iov_index_;
1061     size_t from_iov_offset = curr_iov_written_;
1062     while (offset > 0) {
1063       if (from_iov_offset >= offset) {
1064         from_iov_offset -= offset;
1065         break;
1066       }
1067 
1068       offset -= from_iov_offset;
1069       --from_iov_index;
1070       assert(from_iov_index >= 0);
1071       from_iov_offset = output_iov_[from_iov_index].iov_len;
1072     }
1073 
1074     // Copy <len> bytes starting from the iovec pointed to by from_iov_index to
1075     // the current iovec.
1076     while (len > 0) {
1077       assert(from_iov_index <= curr_iov_index_);
1078       if (from_iov_index != curr_iov_index_) {
1079         const size_t to_copy = std::min(
1080             output_iov_[from_iov_index].iov_len - from_iov_offset,
1081             len);
1082         Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy);
1083         len -= to_copy;
1084         if (len > 0) {
1085           ++from_iov_index;
1086           from_iov_offset = 0;
1087         }
1088       } else {
1089         assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
1090         size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len -
1091                                       curr_iov_written_,
1092                                   len);
1093         if (to_copy == 0) {
1094           // This iovec is full. Go to the next one.
1095           if (curr_iov_index_ + 1 >= output_iov_count_) {
1096             return false;
1097           }
1098           ++curr_iov_index_;
1099           curr_iov_written_ = 0;
1100           continue;
1101         }
1102         if (to_copy > len) {
1103           to_copy = len;
1104         }
1105         IncrementalCopy(GetIOVecPointer(from_iov_index, from_iov_offset),
1106                         GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1107                         to_copy);
1108         curr_iov_written_ += to_copy;
1109         from_iov_offset += to_copy;
1110         total_written_ += to_copy;
1111         len -= to_copy;
1112       }
1113     }
1114 
1115     return true;
1116   }
1117 
1118 };
1119 
RawUncompressToIOVec(const char * compressed,size_t compressed_length,const struct iovec * iov,size_t iov_cnt)1120 bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
1121                           const struct iovec* iov, size_t iov_cnt) {
1122   ByteArraySource reader(compressed, compressed_length);
1123   return RawUncompressToIOVec(&reader, iov, iov_cnt);
1124 }
1125 
RawUncompressToIOVec(Source * compressed,const struct iovec * iov,size_t iov_cnt)1126 bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
1127                           size_t iov_cnt) {
1128   SnappyIOVecWriter output(iov, iov_cnt);
1129   return InternalUncompress(compressed, &output);
1130 }
1131 
1132 // -----------------------------------------------------------------------
1133 // Flat array interfaces
1134 // -----------------------------------------------------------------------
1135 
1136 // A type that writes to a flat array.
1137 // Note that this is not a "ByteSink", but a type that matches the
1138 // Writer template argument to SnappyDecompressor::DecompressAllTags().
1139 class SnappyArrayWriter {
1140  private:
1141   char* base_;
1142   char* op_;
1143   char* op_limit_;
1144 
1145  public:
SnappyArrayWriter(char * dst)1146   inline explicit SnappyArrayWriter(char* dst)
1147       : base_(dst),
1148         op_(dst) {
1149   }
1150 
SetExpectedLength(size_t len)1151   inline void SetExpectedLength(size_t len) {
1152     op_limit_ = op_ + len;
1153   }
1154 
CheckLength() const1155   inline bool CheckLength() const {
1156     return op_ == op_limit_;
1157   }
1158 
Append(const char * ip,size_t len)1159   inline bool Append(const char* ip, size_t len) {
1160     char* op = op_;
1161     const size_t space_left = op_limit_ - op;
1162     if (space_left < len) {
1163       return false;
1164     }
1165     memcpy(op, ip, len);
1166     op_ = op + len;
1167     return true;
1168   }
1169 
TryFastAppend(const char * ip,size_t available,size_t len)1170   inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
1171     char* op = op_;
1172     const size_t space_left = op_limit_ - op;
1173     if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
1174       // Fast path, used for the majority (about 95%) of invocations.
1175       UnalignedCopy64(ip, op);
1176       UnalignedCopy64(ip + 8, op + 8);
1177       op_ = op + len;
1178       return true;
1179     } else {
1180       return false;
1181     }
1182   }
1183 
AppendFromSelf(size_t offset,size_t len)1184   inline bool AppendFromSelf(size_t offset, size_t len) {
1185     char* op = op_;
1186     const size_t space_left = op_limit_ - op;
1187 
1188     // Check if we try to append from before the start of the buffer.
1189     // Normally this would just be a check for "produced < offset",
1190     // but "produced <= offset - 1u" is equivalent for every case
1191     // except the one where offset==0, where the right side will wrap around
1192     // to a very big number. This is convenient, as offset==0 is another
1193     // invalid case that we also want to catch, so that we do not go
1194     // into an infinite loop.
1195     assert(op >= base_);
1196     size_t produced = op - base_;
1197     if (produced <= offset - 1u) {
1198       return false;
1199     }
1200     if (len <= 16 && offset >= 8 && space_left >= 16) {
1201       // Fast path, used for the majority (70-80%) of dynamic invocations.
1202       UnalignedCopy64(op - offset, op);
1203       UnalignedCopy64(op - offset + 8, op + 8);
1204     } else {
1205       if (space_left >= len + kMaxIncrementCopyOverflow) {
1206         IncrementalCopyFastPath(op - offset, op, len);
1207       } else {
1208         if (space_left < len) {
1209           return false;
1210         }
1211         IncrementalCopy(op - offset, op, len);
1212       }
1213     }
1214 
1215     op_ = op + len;
1216     return true;
1217   }
1218 };
1219 
RawUncompress(const char * compressed,size_t n,char * uncompressed)1220 bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
1221   ByteArraySource reader(compressed, n);
1222   return RawUncompress(&reader, uncompressed);
1223 }
1224 
RawUncompress(Source * compressed,char * uncompressed)1225 bool RawUncompress(Source* compressed, char* uncompressed) {
1226   SnappyArrayWriter output(uncompressed);
1227   return InternalUncompress(compressed, &output);
1228 }
1229 
Uncompress(const char * compressed,size_t n,string * uncompressed)1230 bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
1231   size_t ulength;
1232   if (!GetUncompressedLength(compressed, n, &ulength)) {
1233     return false;
1234   }
1235   // On 32-bit builds: max_size() < kuint32max.  Check for that instead
1236   // of crashing (e.g., consider externally specified compressed data).
1237   if (ulength > uncompressed->max_size()) {
1238     return false;
1239   }
1240   STLStringResizeUninitialized(uncompressed, ulength);
1241   return RawUncompress(compressed, n, string_as_array(uncompressed));
1242 }
1243 
1244 
1245 // A Writer that drops everything on the floor and just does validation
1246 class SnappyDecompressionValidator {
1247  private:
1248   size_t expected_;
1249   size_t produced_;
1250 
1251  public:
SnappyDecompressionValidator()1252   inline SnappyDecompressionValidator() : produced_(0) { }
SetExpectedLength(size_t len)1253   inline void SetExpectedLength(size_t len) {
1254     expected_ = len;
1255   }
CheckLength() const1256   inline bool CheckLength() const {
1257     return expected_ == produced_;
1258   }
Append(const char * ip,size_t len)1259   inline bool Append(const char* ip, size_t len) {
1260     produced_ += len;
1261     return produced_ <= expected_;
1262   }
TryFastAppend(const char * ip,size_t available,size_t length)1263   inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1264     return false;
1265   }
AppendFromSelf(size_t offset,size_t len)1266   inline bool AppendFromSelf(size_t offset, size_t len) {
1267     // See SnappyArrayWriter::AppendFromSelf for an explanation of
1268     // the "offset - 1u" trick.
1269     if (produced_ <= offset - 1u) return false;
1270     produced_ += len;
1271     return produced_ <= expected_;
1272   }
1273 };
1274 
IsValidCompressedBuffer(const char * compressed,size_t n)1275 bool IsValidCompressedBuffer(const char* compressed, size_t n) {
1276   ByteArraySource reader(compressed, n);
1277   SnappyDecompressionValidator writer;
1278   return InternalUncompress(&reader, &writer);
1279 }
1280 
RawCompress(const char * input,size_t input_length,char * compressed,size_t * compressed_length)1281 void RawCompress(const char* input,
1282                  size_t input_length,
1283                  char* compressed,
1284                  size_t* compressed_length) {
1285   ByteArraySource reader(input, input_length);
1286   UncheckedByteArraySink writer(compressed);
1287   Compress(&reader, &writer);
1288 
1289   // Compute how many bytes were added
1290   *compressed_length = (writer.CurrentDestination() - compressed);
1291 }
1292 
Compress(const char * input,size_t input_length,string * compressed)1293 size_t Compress(const char* input, size_t input_length, string* compressed) {
1294   // Pre-grow the buffer to the max length of the compressed output
1295   compressed->resize(MaxCompressedLength(input_length));
1296 
1297   size_t compressed_length;
1298   RawCompress(input, input_length, string_as_array(compressed),
1299               &compressed_length);
1300   compressed->resize(compressed_length);
1301   return compressed_length;
1302 }
1303 
1304 
1305 } // end namespace snappy
1306 
1307