1 #if defined(__clang__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)
2 #pragma GCC diagnostic ignored "-Wunused-parameter"
3 #pragma GCC diagnostic ignored "-Wunused-function"
4 #pragma GCC diagnostic ignored "-Wsign-compare"
5 #endif
6 
7 // Copyright 2005 Google Inc. All Rights Reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
12 //
13 //     * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
15 //     * Redistributions in binary form must reproduce the above
16 // copyright notice, this list of conditions and the following disclaimer
17 // in the documentation and/or other materials provided with the
18 // distribution.
19 //     * Neither the name of Google Inc. nor the names of its
20 // contributors may be used to endorse or promote products derived from
21 // this software without specific prior written permission.
22 //
23 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 #include "snappy-lcb-msvc.h" // Added by libcouchbase
36 #include "snappy.h"
37 #include "snappy-internal.h"
38 #include "snappy-sinksource.h"
39 
40 #include <stdio.h>
41 
42 #include <algorithm>
43 #include <string>
44 #include <vector>
45 
46 
47 namespace snappy {
48 
49 // Any hash function will produce a valid compressed bitstream, but a good
50 // hash function reduces the number of collisions and thus yields better
51 // compression for compressible input, and more speed for incompressible
52 // input. Of course, it doesn't hurt if the hash function is reasonably fast
53 // either, as it gets called a lot.
54 static inline uint32 HashBytes(uint32 bytes, int shift) {
55   uint32 kMul = 0x1e35a7bd;
56   return (bytes * kMul) >> shift;
57 }
58 static inline uint32 Hash(const char* p, int shift) {
59   return HashBytes(UNALIGNED_LOAD32(p), shift);
60 }
61 
62 size_t MaxCompressedLength(size_t source_len) {
63   // Compressed data can be defined as:
64   //    compressed := item* literal*
65   //    item       := literal* copy
66   //
67   // The trailing literal sequence has a space blowup of at most 62/60
68   // since a literal of length 60 needs one tag byte + one extra byte
69   // for length information.
70   //
71   // Item blowup is trickier to measure.  Suppose the "copy" op copies
72   // 4 bytes of data.  Because of a special check in the encoding code,
73   // we produce a 4-byte copy only if the offset is < 65536.  Therefore
74   // the copy op takes 3 bytes to encode, and this type of item leads
75   // to at most the 62/60 blowup for representing literals.
76   //
77   // Suppose the "copy" op copies 5 bytes of data.  If the offset is big
78   // enough, it will take 5 bytes to encode the copy op.  Therefore the
79   // worst case here is a one-byte literal followed by a five-byte copy.
80   // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
81   //
82   // This last factor dominates the blowup, so the final estimate is:
83   return 32 + source_len + source_len/6;
84 }
85 
86 enum {
87   LITERAL = 0,
88   COPY_1_BYTE_OFFSET = 1,  // 3 bit length + 3 bits of offset in opcode
89   COPY_2_BYTE_OFFSET = 2,
90   COPY_4_BYTE_OFFSET = 3
91 };
92 static const int kMaximumTagLength = 5;  // COPY_4_BYTE_OFFSET plus the actual offset.
93 
94 // Copy "len" bytes from "src" to "op", one byte at a time.  Used for
95 // handling COPY operations where the input and output regions may
96 // overlap.  For example, suppose:
97 //    src    == "ab"
98 //    op     == src + 2
99 //    len    == 20
100 // After IncrementalCopy(src, op, len), the result will have
101 // eleven copies of "ab"
102 //    ababababababababababab
103 // Note that this does not match the semantics of either memcpy()
104 // or memmove().
105 static inline void IncrementalCopy(const char* src, char* op, ssize_t len) {
106   assert(len > 0);
107   do {
108     *op++ = *src++;
109   } while (--len > 0);
110 }
111 
112 // Equivalent to IncrementalCopy except that it can write up to ten extra
113 // bytes after the end of the copy, and that it is faster.
114 //
115 // The main part of this loop is a simple copy of eight bytes at a time until
116 // we've copied (at least) the requested amount of bytes.  However, if op and
117 // src are less than eight bytes apart (indicating a repeating pattern of
118 // length < 8), we first need to expand the pattern in order to get the correct
119 // results. For instance, if the buffer looks like this, with the eight-byte
120 // <src> and <op> patterns marked as intervals:
121 //
122 //    abxxxxxxxxxxxx
123 //    [------]           src
124 //      [------]         op
125 //
126 // a single eight-byte copy from <src> to <op> will repeat the pattern once,
127 // after which we can move <op> two bytes without moving <src>:
128 //
129 //    ababxxxxxxxxxx
130 //    [------]           src
131 //        [------]       op
132 //
133 // and repeat the exercise until the two no longer overlap.
134 //
135 // This allows us to do very well in the special case of one single byte
136 // repeated many times, without taking a big hit for more general cases.
137 //
138 // The worst case of extra writing past the end of the match occurs when
139 // op - src == 1 and len == 1; the last copy will read from byte positions
140 // [0..7] and write to [4..11], whereas it was only supposed to write to
141 // position 1. Thus, ten excess bytes.
142 
143 namespace {
144 
145 const int kMaxIncrementCopyOverflow = 10;
146 
147 inline void IncrementalCopyFastPath(const char* src, char* op, ssize_t len) {
148   while (op - src < 8) {
149     UnalignedCopy64(src, op);
150     len -= op - src;
151     op += op - src;
152   }
153   while (len > 0) {
154     UnalignedCopy64(src, op);
155     src += 8;
156     op += 8;
157     len -= 8;
158   }
159 }
160 
161 }  // namespace
162 
163 static inline char* EmitLiteral(char* op,
164                                 const char* literal,
165                                 int len,
166                                 bool allow_fast_path) {
167   int n = len - 1;      // Zero-length literals are disallowed
168   if (n < 60) {
169     // Fits in tag byte
170     *op++ = LITERAL | (n << 2);
171 
172     // The vast majority of copies are below 16 bytes, for which a
173     // call to memcpy is overkill. This fast path can sometimes
174     // copy up to 15 bytes too much, but that is okay in the
175     // main loop, since we have a bit to go on for both sides:
176     //
177     //   - The input will always have kInputMarginBytes = 15 extra
178     //     available bytes, as long as we're in the main loop, and
179     //     if not, allow_fast_path = false.
180     //   - The output will always have 32 spare bytes (see
181     //     MaxCompressedLength).
182     if (allow_fast_path && len <= 16) {
183       UnalignedCopy64(literal, op);
184       UnalignedCopy64(literal + 8, op + 8);
185       return op + len;
186     }
187   } else {
188     // Encode in upcoming bytes
189     char* base = op;
190     int count = 0;
191     op++;
192     while (n > 0) {
193       *op++ = n & 0xff;
194       n >>= 8;
195       count++;
196     }
197     assert(count >= 1);
198     assert(count <= 4);
199     *base = LITERAL | ((59+count) << 2);
200   }
201   memcpy(op, literal, len);
202   return op + len;
203 }
204 
205 static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
206   assert(len <= 64);
207   assert(len >= 4);
208   assert(offset < 65536);
209 
210   if ((len < 12) && (offset < 2048)) {
211     size_t len_minus_4 = len - 4;
212     assert(len_minus_4 < 8);            // Must fit in 3 bits
213     *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5);
214     *op++ = offset & 0xff;
215   } else {
216     *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
217     LittleEndian::Store16(op, offset);
218     op += 2;
219   }
220   return op;
221 }
222 
223 static inline char* EmitCopy(char* op, size_t offset, int len) {
224   // Emit 64 byte copies but make sure to keep at least four bytes reserved
225   while (len >= 68) {
226     op = EmitCopyLessThan64(op, offset, 64);
227     len -= 64;
228   }
229 
230   // Emit an extra 60 byte copy if have too much data to fit in one copy
231   if (len > 64) {
232     op = EmitCopyLessThan64(op, offset, 60);
233     len -= 60;
234   }
235 
236   // Emit remainder
237   op = EmitCopyLessThan64(op, offset, len);
238   return op;
239 }
240 
241 
242 bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
243   uint32 v = 0;
244   const char* limit = start + n;
245   if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
246     *result = v;
247     return true;
248   } else {
249     return false;
250   }
251 }
252 
253 namespace internal {
254 uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
255   // Use smaller hash table when input.size() is smaller, since we
256   // fill the table, incurring O(hash table size) overhead for
257   // compression, and if the input is short, we won't need that
258   // many hash table entries anyway.
259   assert(kMaxHashTableSize >= 256);
260   size_t htsize = 256;
261   while (htsize < kMaxHashTableSize && htsize < input_size) {
262     htsize <<= 1;
263   }
264 
265   uint16* table;
266   if (htsize <= ARRAYSIZE(small_table_)) {
267     table = small_table_;
268   } else {
269     if (large_table_ == NULL) {
270       large_table_ = new uint16[kMaxHashTableSize];
271     }
272     table = large_table_;
273   }
274 
275   *table_size = htsize;
276   memset(table, 0, htsize * sizeof(*table));
277   return table;
278 }
279 }  // end namespace internal
280 
281 // For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
282 // equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have
283 // empirically found that overlapping loads such as
284 //  UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
285 // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
286 //
287 // We have different versions for 64- and 32-bit; ideally we would avoid the
288 // two functions and just inline the UNALIGNED_LOAD64 call into
289 // GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
290 // enough to avoid loading the value multiple times then. For 64-bit, the load
291 // is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
292 // done at GetUint32AtOffset() time.
293 
294 #ifdef ARCH_K8
295 
296 typedef uint64 EightBytesReference;
297 
298 static inline EightBytesReference GetEightBytesAt(const char* ptr) {
299   return UNALIGNED_LOAD64(ptr);
300 }
301 
302 static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
303   assert(offset >= 0);
304   assert(offset <= 4);
305   return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
306 }
307 
308 #else
309 
310 typedef const char* EightBytesReference;
311 
312 static inline EightBytesReference GetEightBytesAt(const char* ptr) {
313   return ptr;
314 }
315 
316 static inline uint32 GetUint32AtOffset(const char* v, int offset) {
317   assert(offset >= 0);
318   assert(offset <= 4);
319   return UNALIGNED_LOAD32(v + offset);
320 }
321 
322 #endif
323 
324 // Flat array compression that does not emit the "uncompressed length"
325 // prefix. Compresses "input" string to the "*op" buffer.
326 //
327 // REQUIRES: "input" is at most "kBlockSize" bytes long.
328 // REQUIRES: "op" points to an array of memory that is at least
329 // "MaxCompressedLength(input.size())" in size.
330 // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
331 // REQUIRES: "table_size" is a power of two
332 //
333 // Returns an "end" pointer into "op" buffer.
334 // "end - op" is the compressed size of "input".
335 namespace internal {
336 char* CompressFragment(const char* input,
337                        size_t input_size,
338                        char* op,
339                        uint16* table,
340                        const int table_size) {
341   // "ip" is the input pointer, and "op" is the output pointer.
342   const char* ip = input;
343   assert(input_size <= kBlockSize);
344   assert((table_size & (table_size - 1)) == 0); // table must be power of two
345   const int shift = 32 - Bits::Log2Floor(table_size);
346   assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
347   const char* ip_end = input + input_size;
348   const char* base_ip = ip;
349   // Bytes in [next_emit, ip) will be emitted as literal bytes.  Or
350   // [next_emit, ip_end) after the main loop.
351   const char* next_emit = ip;
352 
353   const size_t kInputMarginBytes = 15;
354   if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
355     const char* ip_limit = input + input_size - kInputMarginBytes;
356 
357     for (uint32 next_hash = Hash(++ip, shift); ; ) {
358       assert(next_emit < ip);
359       // The body of this loop calls EmitLiteral once and then EmitCopy one or
360       // more times.  (The exception is that when we're close to exhausting
361       // the input we goto emit_remainder.)
362       //
363       // In the first iteration of this loop we're just starting, so
364       // there's nothing to copy, so calling EmitLiteral once is
365       // necessary.  And we only start a new iteration when the
366       // current iteration has determined that a call to EmitLiteral will
367       // precede the next call to EmitCopy (if any).
368       //
369       // Step 1: Scan forward in the input looking for a 4-byte-long match.
370       // If we get close to exhausting the input then goto emit_remainder.
371       //
372       // Heuristic match skipping: If 32 bytes are scanned with no matches
373       // found, start looking only at every other byte. If 32 more bytes are
374       // scanned, look at every third byte, etc.. When a match is found,
375       // immediately go back to looking at every byte. This is a small loss
376       // (~5% performance, ~0.1% density) for compressible data due to more
377       // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
378       // win since the compressor quickly "realizes" the data is incompressible
379       // and doesn't bother looking for matches everywhere.
380       //
381       // The "skip" variable keeps track of how many bytes there are since the
382       // last match; dividing it by 32 (ie. right-shifting by five) gives the
383       // number of bytes to move ahead for each iteration.
384       uint32 skip = 32;
385 
386       const char* next_ip = ip;
387       const char* candidate;
388       do {
389         ip = next_ip;
390         uint32 hash = next_hash;
391         assert(hash == Hash(ip, shift));
392         uint32 bytes_between_hash_lookups = skip++ >> 5;
393         next_ip = ip + bytes_between_hash_lookups;
394         if (PREDICT_FALSE(next_ip > ip_limit)) {
395           goto emit_remainder;
396         }
397         next_hash = Hash(next_ip, shift);
398         candidate = base_ip + table[hash];
399         assert(candidate >= base_ip);
400         assert(candidate < ip);
401 
402         table[hash] = ip - base_ip;
403       } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
404                             UNALIGNED_LOAD32(candidate)));
405 
406       // Step 2: A 4-byte match has been found.  We'll later see if more
407       // than 4 bytes match.  But, prior to the match, input
408       // bytes [next_emit, ip) are unmatched.  Emit them as "literal bytes."
409       assert(next_emit + 16 <= ip_end);
410       op = EmitLiteral(op, next_emit, ip - next_emit, true);
411 
412       // Step 3: Call EmitCopy, and then see if another EmitCopy could
413       // be our next move.  Repeat until we find no match for the
414       // input immediately after what was consumed by the last EmitCopy call.
415       //
416       // If we exit this loop normally then we need to call EmitLiteral next,
417       // though we don't yet know how big the literal will be.  We handle that
418       // by proceeding to the next iteration of the main loop.  We also can exit
419       // this loop via goto if we get close to exhausting the input.
420       EightBytesReference input_bytes;
421       uint32 candidate_bytes = 0;
422 
423       do {
424         // We have a 4-byte match at ip, and no need to emit any
425         // "literal bytes" prior to ip.
426         const char* base = ip;
427         int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
428         ip += matched;
429         size_t offset = base - candidate;
430         assert(0 == memcmp(base, candidate, matched));
431         op = EmitCopy(op, offset, matched);
432         // We could immediately start working at ip now, but to improve
433         // compression we first update table[Hash(ip - 1, ...)].
434         const char* insert_tail = ip - 1;
435         next_emit = ip;
436         if (PREDICT_FALSE(ip >= ip_limit)) {
437           goto emit_remainder;
438         }
439         input_bytes = GetEightBytesAt(insert_tail);
440         uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
441         table[prev_hash] = ip - base_ip - 1;
442         uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
443         candidate = base_ip + table[cur_hash];
444         candidate_bytes = UNALIGNED_LOAD32(candidate);
445         table[cur_hash] = ip - base_ip;
446       } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
447 
448       next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
449       ++ip;
450     }
451   }
452 
453  emit_remainder:
454   // Emit the remaining bytes as a literal
455   if (next_emit < ip_end) {
456     op = EmitLiteral(op, next_emit, ip_end - next_emit, false);
457   }
458 
459   return op;
460 }
461 }  // end namespace internal
462 
463 // Signature of output types needed by decompression code.
464 // The decompression code is templatized on a type that obeys this
465 // signature so that we do not pay virtual function call overhead in
466 // the middle of a tight decompression loop.
467 //
468 // class DecompressionWriter {
469 //  public:
470 //   // Called before decompression
471 //   void SetExpectedLength(size_t length);
472 //
473 //   // Called after decompression
474 //   bool CheckLength() const;
475 //
476 //   // Called repeatedly during decompression
477 //   bool Append(const char* ip, size_t length);
478 //   bool AppendFromSelf(uint32 offset, size_t length);
479 //
480 //   // The rules for how TryFastAppend differs from Append are somewhat
481 //   // convoluted:
482 //   //
483 //   //  - TryFastAppend is allowed to decline (return false) at any
484 //   //    time, for any reason -- just "return false" would be
485 //   //    a perfectly legal implementation of TryFastAppend.
486 //   //    The intention is for TryFastAppend to allow a fast path
487 //   //    in the common case of a small append.
488 //   //  - TryFastAppend is allowed to read up to <available> bytes
489 //   //    from the input buffer, whereas Append is allowed to read
490 //   //    <length>. However, if it returns true, it must leave
491 //   //    at least five (kMaximumTagLength) bytes in the input buffer
492 //   //    afterwards, so that there is always enough space to read the
493 //   //    next tag without checking for a refill.
494 //   //  - TryFastAppend must always return decline (return false)
495 //   //    if <length> is 61 or more, as in this case the literal length is not
496 //   //    decoded fully. In practice, this should not be a big problem,
497 //   //    as it is unlikely that one would implement a fast path accepting
498 //   //    this much data.
499 //   //
500 //   bool TryFastAppend(const char* ip, size_t available, size_t length);
501 // };
502 
503 // -----------------------------------------------------------------------
504 // Lookup table for decompression code.  Generated by ComputeTable() below.
505 // -----------------------------------------------------------------------
506 
507 // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
508 static const uint32 wordmask[] = {
509   0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
510 };
511 
512 // Data stored per entry in lookup table:
513 //      Range   Bits-used       Description
514 //      ------------------------------------
515 //      1..64   0..7            Literal/copy length encoded in opcode byte
516 //      0..7    8..10           Copy offset encoded in opcode byte / 256
517 //      0..4    11..13          Extra bytes after opcode
518 //
519 // We use eight bits for the length even though 7 would have sufficed
520 // because of efficiency reasons:
521 //      (1) Extracting a byte is faster than a bit-field
522 //      (2) It properly aligns copy offset so we do not need a <<8
523 static const uint16 char_table[256] = {
524   0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
525   0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
526   0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
527   0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
528   0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
529   0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
530   0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
531   0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
532   0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
533   0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
534   0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
535   0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
536   0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
537   0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
538   0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
539   0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
540   0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
541   0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
542   0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
543   0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
544   0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
545   0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
546   0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
547   0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
548   0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
549   0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
550   0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
551   0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
552   0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
553   0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
554   0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
555   0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
556 };
557 
558 // In debug mode, allow optional computation of the table at startup.
559 // Also, check that the decompression table is correct.
560 #ifndef NDEBUG
561 DEFINE_bool(snappy_dump_decompression_table, false,
562             "If true, we print the decompression table at startup.");
563 
564 static uint16 MakeEntry(unsigned int extra,
565                         unsigned int len,
566                         unsigned int copy_offset) {
567   // Check that all of the fields fit within the allocated space
568   assert(extra       == (extra & 0x7));          // At most 3 bits
569   assert(copy_offset == (copy_offset & 0x7));    // At most 3 bits
570   assert(len         == (len & 0x7f));           // At most 7 bits
571   return len | (copy_offset << 8) | (extra << 11);
572 }
573 
574 static void ComputeTable() {
575   uint16 dst[256];
576 
577   // Place invalid entries in all places to detect missing initialization
578   int assigned = 0;
579   for (int i = 0; i < 256; i++) {
580     dst[i] = 0xffff;
581   }
582 
583   // Small LITERAL entries.  We store (len-1) in the top 6 bits.
584   for (unsigned int len = 1; len <= 60; len++) {
585     dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
586     assigned++;
587   }
588 
589   // Large LITERAL entries.  We use 60..63 in the high 6 bits to
590   // encode the number of bytes of length info that follow the opcode.
591   for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
592     // We set the length field in the lookup table to 1 because extra
593     // bytes encode len-1.
594     dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
595     assigned++;
596   }
597 
598   // COPY_1_BYTE_OFFSET.
599   //
600   // The tag byte in the compressed data stores len-4 in 3 bits, and
601   // offset/256 in 5 bits.  offset%256 is stored in the next byte.
602   //
603   // This format is used for length in range [4..11] and offset in
604   // range [0..2047]
605   for (unsigned int len = 4; len < 12; len++) {
606     for (unsigned int offset = 0; offset < 2048; offset += 256) {
607       dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
608         MakeEntry(1, len, offset>>8);
609       assigned++;
610     }
611   }
612 
613   // COPY_2_BYTE_OFFSET.
614   // Tag contains len-1 in top 6 bits, and offset in next two bytes.
615   for (unsigned int len = 1; len <= 64; len++) {
616     dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
617     assigned++;
618   }
619 
620   // COPY_4_BYTE_OFFSET.
621   // Tag contents len-1 in top 6 bits, and offset in next four bytes.
622   for (unsigned int len = 1; len <= 64; len++) {
623     dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
624     assigned++;
625   }
626 
627   // Check that each entry was initialized exactly once.
628   if (assigned != 256) {
629     fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
630     abort();
631   }
632   for (int i = 0; i < 256; i++) {
633     if (dst[i] == 0xffff) {
634       fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
635       abort();
636     }
637   }
638 
639   if (FLAGS_snappy_dump_decompression_table) {
640     printf("static const uint16 char_table[256] = {\n  ");
641     for (int i = 0; i < 256; i++) {
642       printf("0x%04x%s",
643              dst[i],
644              ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n  " : ", ")));
645     }
646     printf("};\n");
647   }
648 
649   // Check that computed table matched recorded table
650   for (int i = 0; i < 256; i++) {
651     if (dst[i] != char_table[i]) {
652       fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
653               i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
654       abort();
655     }
656   }
657 }
658 #endif /* !NDEBUG */
659 
660 // Helper class for decompression
661 class SnappyDecompressor {
662  private:
663   Source*       reader_;         // Underlying source of bytes to decompress
664   const char*   ip_;             // Points to next buffered byte
665   const char*   ip_limit_;       // Points just past buffered bytes
666   uint32        peeked_;         // Bytes peeked from reader (need to skip)
667   bool          eof_;            // Hit end of input without an error?
668   char          scratch_[kMaximumTagLength];  // See RefillTag().
669 
670   // Ensure that all of the tag metadata for the next tag is available
671   // in [ip_..ip_limit_-1].  Also ensures that [ip,ip+4] is readable even
672   // if (ip_limit_ - ip_ < 5).
673   //
674   // Returns true on success, false on error or end of input.
675   bool RefillTag();
676 
677  public:
678   explicit SnappyDecompressor(Source* reader)
679       : reader_(reader),
680         ip_(NULL),
681         ip_limit_(NULL),
682         peeked_(0),
683         eof_(false) {
684   }
685 
686   ~SnappyDecompressor() {
687     // Advance past any bytes we peeked at from the reader
688     reader_->Skip(peeked_);
689   }
690 
691   // Returns true iff we have hit the end of the input without an error.
692   bool eof() const {
693     return eof_;
694   }
695 
696   // Read the uncompressed length stored at the start of the compressed data.
697   // On succcess, stores the length in *result and returns true.
698   // On failure, returns false.
699   bool ReadUncompressedLength(uint32* result) {
700     assert(ip_ == NULL);       // Must not have read anything yet
701     // Length is encoded in 1..5 bytes
702     *result = 0;
703     uint32 shift = 0;
704     while (true) {
705       if (shift >= 32) return false;
706       size_t n;
707       const char* ip = reader_->Peek(&n);
708       if (n == 0) return false;
709       const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
710       reader_->Skip(1);
711       *result |= static_cast<uint32>(c & 0x7f) << shift;
712       if (c < 128) {
713         break;
714       }
715       shift += 7;
716     }
717     return true;
718   }
719 
720   // Process the next item found in the input.
721   // Returns true if successful, false on error or end of input.
722   template <class Writer>
723   void DecompressAllTags(Writer* writer) {
724     const char* ip = ip_;
725 
726     // We could have put this refill fragment only at the beginning of the loop.
727     // However, duplicating it at the end of each branch gives the compiler more
728     // scope to optimize the <ip_limit_ - ip> expression based on the local
729     // context, which overall increases speed.
730     #define MAYBE_REFILL() \
731         if (ip_limit_ - ip < kMaximumTagLength) { \
732           ip_ = ip; \
733           if (!RefillTag()) return; \
734           ip = ip_; \
735         }
736 
737     MAYBE_REFILL();
738     for ( ;; ) {
739       const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
740 
741       if ((c & 0x3) == LITERAL) {
742         size_t literal_length = (c >> 2) + 1u;
743         if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
744           assert(literal_length < 61);
745           ip += literal_length;
746           // NOTE(user): There is no MAYBE_REFILL() here, as TryFastAppend()
747           // will not return true unless there's already at least five spare
748           // bytes in addition to the literal.
749           continue;
750         }
751         if (PREDICT_FALSE(literal_length >= 61)) {
752           // Long literal.
753           const size_t literal_length_length = literal_length - 60;
754           literal_length =
755               (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
756           ip += literal_length_length;
757         }
758 
759         size_t avail = ip_limit_ - ip;
760         while (avail < literal_length) {
761           if (!writer->Append(ip, avail)) return;
762           literal_length -= avail;
763           reader_->Skip(peeked_);
764           size_t n;
765           ip = reader_->Peek(&n);
766           avail = n;
767           peeked_ = avail;
768           if (avail == 0) return;  // Premature end of input
769           ip_limit_ = ip + avail;
770         }
771         if (!writer->Append(ip, literal_length)) {
772           return;
773         }
774         ip += literal_length;
775         MAYBE_REFILL();
776       } else {
777         const uint32 entry = char_table[c];
778         const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
779         const uint32 length = entry & 0xff;
780         ip += entry >> 11;
781 
782         // copy_offset/256 is encoded in bits 8..10.  By just fetching
783         // those bits, we get copy_offset (since the bit-field starts at
784         // bit 8).
785         const uint32 copy_offset = entry & 0x700;
786         if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
787           return;
788         }
789         MAYBE_REFILL();
790       }
791     }
792 
793 #undef MAYBE_REFILL
794   }
795 };
796 
797 bool SnappyDecompressor::RefillTag() {
798   const char* ip = ip_;
799   if (ip == ip_limit_) {
800     // Fetch a new fragment from the reader
801     reader_->Skip(peeked_);   // All peeked bytes are used up
802     size_t n;
803     ip = reader_->Peek(&n);
804     peeked_ = n;
805     if (n == 0) {
806       eof_ = true;
807       return false;
808     }
809     ip_limit_ = ip + n;
810   }
811 
812   // Read the tag character
813   assert(ip < ip_limit_);
814   const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
815   const uint32 entry = char_table[c];
816   const uint32 needed = (entry >> 11) + 1;  // +1 byte for 'c'
817   assert(needed <= sizeof(scratch_));
818 
819   // Read more bytes from reader if needed
820   uint32 nbuf = ip_limit_ - ip;
821   if (nbuf < needed) {
822     // Stitch together bytes from ip and reader to form the word
823     // contents.  We store the needed bytes in "scratch_".  They
824     // will be consumed immediately by the caller since we do not
825     // read more than we need.
826     memmove(scratch_, ip, nbuf);
827     reader_->Skip(peeked_);  // All peeked bytes are used up
828     peeked_ = 0;
829     while (nbuf < needed) {
830       size_t length;
831       const char* src = reader_->Peek(&length);
832       if (length == 0) return false;
833       uint32 to_add = min<uint32>(needed - nbuf, length);
834       memcpy(scratch_ + nbuf, src, to_add);
835       nbuf += to_add;
836       reader_->Skip(to_add);
837     }
838     assert(nbuf == needed);
839     ip_ = scratch_;
840     ip_limit_ = scratch_ + needed;
841   } else if (nbuf < kMaximumTagLength) {
842     // Have enough bytes, but move into scratch_ so that we do not
843     // read past end of input
844     memmove(scratch_, ip, nbuf);
845     reader_->Skip(peeked_);  // All peeked bytes are used up
846     peeked_ = 0;
847     ip_ = scratch_;
848     ip_limit_ = scratch_ + nbuf;
849   } else {
850     // Pass pointer to buffer returned by reader_.
851     ip_ = ip;
852   }
853   return true;
854 }
855 
856 template <typename Writer>
857 static bool InternalUncompress(Source* r, Writer* writer) {
858   // Read the uncompressed length from the front of the compressed input
859   SnappyDecompressor decompressor(r);
860   uint32 uncompressed_len = 0;
861   if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
862   return InternalUncompressAllTags(&decompressor, writer, uncompressed_len);
863 }
864 
865 template <typename Writer>
866 static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
867                                       Writer* writer,
868                                       uint32 uncompressed_len) {
869   writer->SetExpectedLength(uncompressed_len);
870 
871   // Process the entire input
872   decompressor->DecompressAllTags(writer);
873   return (decompressor->eof() && writer->CheckLength());
874 }
875 
876 bool GetUncompressedLength(Source* source, uint32* result) {
877   SnappyDecompressor decompressor(source);
878   return decompressor.ReadUncompressedLength(result);
879 }
880 
881 size_t Compress(Source* reader, Sink* writer) {
882   size_t written = 0;
883   size_t N = reader->Available();
884   char ulength[Varint::kMax32];
885   char* p = Varint::Encode32(ulength, N);
886   writer->Append(ulength, p-ulength);
887   written += (p - ulength);
888 
889   internal::WorkingMemory wmem;
890   char* scratch = NULL;
891   char* scratch_output = NULL;
892 
893   while (N > 0) {
894     // Get next block to compress (without copying if possible)
895     size_t fragment_size;
896     const char* fragment = reader->Peek(&fragment_size);
897     assert(fragment_size != 0);  // premature end of input
898     const size_t num_to_read = min(N, kBlockSize);
899     size_t bytes_read = fragment_size;
900 
901     size_t pending_advance = 0;
902     if (bytes_read >= num_to_read) {
903       // Buffer returned by reader is large enough
904       pending_advance = num_to_read;
905       fragment_size = num_to_read;
906     } else {
907       // Read into scratch buffer
908       if (scratch == NULL) {
909         // If this is the last iteration, we want to allocate N bytes
910         // of space, otherwise the max possible kBlockSize space.
911         // num_to_read contains exactly the correct value
912         scratch = new char[num_to_read];
913       }
914       memcpy(scratch, fragment, bytes_read);
915       reader->Skip(bytes_read);
916 
917       while (bytes_read < num_to_read) {
918         fragment = reader->Peek(&fragment_size);
919         size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
920         memcpy(scratch + bytes_read, fragment, n);
921         bytes_read += n;
922         reader->Skip(n);
923       }
924       assert(bytes_read == num_to_read);
925       fragment = scratch;
926       fragment_size = num_to_read;
927     }
928     assert(fragment_size == num_to_read);
929 
930     // Get encoding table for compression
931     int table_size;
932     uint16* table = wmem.GetHashTable(num_to_read, &table_size);
933 
934     // Compress input_fragment and append to dest
935     const int max_output = MaxCompressedLength(num_to_read);
936 
937     // Need a scratch buffer for the output, in case the byte sink doesn't
938     // have room for us directly.
939     if (scratch_output == NULL) {
940       scratch_output = new char[max_output];
941     } else {
942       // Since we encode kBlockSize regions followed by a region
943       // which is <= kBlockSize in length, a previously allocated
944       // scratch_output[] region is big enough for this iteration.
945     }
946     char* dest = writer->GetAppendBuffer(max_output, scratch_output);
947     char* end = internal::CompressFragment(fragment, fragment_size,
948                                            dest, table, table_size);
949     writer->Append(dest, end - dest);
950     written += (end - dest);
951 
952     N -= num_to_read;
953     reader->Skip(pending_advance);
954   }
955 
956   delete[] scratch;
957   delete[] scratch_output;
958 
959   return written;
960 }
961 
962 // -----------------------------------------------------------------------
963 // IOVec interfaces
964 // -----------------------------------------------------------------------
965 
966 // A type that writes to an iovec.
967 // Note that this is not a "ByteSink", but a type that matches the
968 // Writer template argument to SnappyDecompressor::DecompressAllTags().
969 class SnappyIOVecWriter {
970  private:
971   const struct iovec* output_iov_;
972   const size_t output_iov_count_;
973 
974   // We are currently writing into output_iov_[curr_iov_index_].
975   int curr_iov_index_;
976 
977   // Bytes written to output_iov_[curr_iov_index_] so far.
978   size_t curr_iov_written_;
979 
980   // Total bytes decompressed into output_iov_ so far.
981   size_t total_written_;
982 
983   // Maximum number of bytes that will be decompressed into output_iov_.
984   size_t output_limit_;
985 
986   inline char* GetIOVecPointer(int index, size_t offset) {
987     return reinterpret_cast<char*>(output_iov_[index].iov_base) +
988         offset;
989   }
990 
991  public:
992   // Does not take ownership of iov. iov must be valid during the
993   // entire lifetime of the SnappyIOVecWriter.
994   inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count)
995       : output_iov_(iov),
996         output_iov_count_(iov_count),
997         curr_iov_index_(0),
998         curr_iov_written_(0),
999         total_written_(0),
1000         output_limit_(-1) {
1001   }
1002 
1003   inline void SetExpectedLength(size_t len) {
1004     output_limit_ = len;
1005   }
1006 
1007   inline bool CheckLength() const {
1008     return total_written_ == output_limit_;
1009   }
1010 
1011   inline bool Append(const char* ip, size_t len) {
1012     if (total_written_ + len > output_limit_) {
1013       return false;
1014     }
1015 
1016     while (len > 0) {
1017       assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
1018       if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) {
1019         // This iovec is full. Go to the next one.
1020         if (curr_iov_index_ + 1 >= output_iov_count_) {
1021           return false;
1022         }
1023         curr_iov_written_ = 0;
1024         ++curr_iov_index_;
1025       }
1026 
1027       const size_t to_write = std::min(
1028           len, output_iov_[curr_iov_index_].iov_len - curr_iov_written_);
1029       memcpy(GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1030              ip,
1031              to_write);
1032       curr_iov_written_ += to_write;
1033       total_written_ += to_write;
1034       ip += to_write;
1035       len -= to_write;
1036     }
1037 
1038     return true;
1039   }
1040 
1041   inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
1042     const size_t space_left = output_limit_ - total_written_;
1043     if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 &&
1044         output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) {
1045       // Fast path, used for the majority (about 95%) of invocations.
1046       char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_);
1047       UnalignedCopy64(ip, ptr);
1048       UnalignedCopy64(ip + 8, ptr + 8);
1049       curr_iov_written_ += len;
1050       total_written_ += len;
1051       return true;
1052     }
1053 
1054     return false;
1055   }
1056 
1057   inline bool AppendFromSelf(size_t offset, size_t len) {
1058     if (offset > total_written_ || offset == 0) {
1059       return false;
1060     }
1061     const size_t space_left = output_limit_ - total_written_;
1062     if (len > space_left) {
1063       return false;
1064     }
1065 
1066     // Locate the iovec from which we need to start the copy.
1067     int from_iov_index = curr_iov_index_;
1068     size_t from_iov_offset = curr_iov_written_;
1069     while (offset > 0) {
1070       if (from_iov_offset >= offset) {
1071         from_iov_offset -= offset;
1072         break;
1073       }
1074 
1075       offset -= from_iov_offset;
1076       --from_iov_index;
1077       assert(from_iov_index >= 0);
1078       from_iov_offset = output_iov_[from_iov_index].iov_len;
1079     }
1080 
1081     // Copy <len> bytes starting from the iovec pointed to by from_iov_index to
1082     // the current iovec.
1083     while (len > 0) {
1084       assert(from_iov_index <= curr_iov_index_);
1085       if (from_iov_index != curr_iov_index_) {
1086         const size_t to_copy = std::min(
1087             output_iov_[from_iov_index].iov_len - from_iov_offset,
1088             len);
1089         Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy);
1090         len -= to_copy;
1091         if (len > 0) {
1092           ++from_iov_index;
1093           from_iov_offset = 0;
1094         }
1095       } else {
1096         assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
1097         size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len -
1098                                       curr_iov_written_,
1099                                   len);
1100         if (to_copy == 0) {
1101           // This iovec is full. Go to the next one.
1102           if (curr_iov_index_ + 1 >= output_iov_count_) {
1103             return false;
1104           }
1105           ++curr_iov_index_;
1106           curr_iov_written_ = 0;
1107           continue;
1108         }
1109         if (to_copy > len) {
1110           to_copy = len;
1111         }
1112         IncrementalCopy(GetIOVecPointer(from_iov_index, from_iov_offset),
1113                         GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1114                         to_copy);
1115         curr_iov_written_ += to_copy;
1116         from_iov_offset += to_copy;
1117         total_written_ += to_copy;
1118         len -= to_copy;
1119       }
1120     }
1121 
1122     return true;
1123   }
1124 
1125 };
1126 
1127 bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
1128                           const struct iovec* iov, size_t iov_cnt) {
1129   ByteArraySource reader(compressed, compressed_length);
1130   return RawUncompressToIOVec(&reader, iov, iov_cnt);
1131 }
1132 
1133 bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
1134                           size_t iov_cnt) {
1135   SnappyIOVecWriter output(iov, iov_cnt);
1136   return InternalUncompress(compressed, &output);
1137 }
1138 
1139 // -----------------------------------------------------------------------
1140 // Flat array interfaces
1141 // -----------------------------------------------------------------------
1142 
1143 // A type that writes to a flat array.
1144 // Note that this is not a "ByteSink", but a type that matches the
1145 // Writer template argument to SnappyDecompressor::DecompressAllTags().
1146 class SnappyArrayWriter {
1147  private:
1148   char* base_;
1149   char* op_;
1150   char* op_limit_;
1151 
1152  public:
1153   inline explicit SnappyArrayWriter(char* dst)
1154       : base_(dst),
1155         op_(dst) {
1156   }
1157 
1158   inline void SetExpectedLength(size_t len) {
1159     op_limit_ = op_ + len;
1160   }
1161 
1162   inline bool CheckLength() const {
1163     return op_ == op_limit_;
1164   }
1165 
1166   inline bool Append(const char* ip, size_t len) {
1167     char* op = op_;
1168     const size_t space_left = op_limit_ - op;
1169     if (space_left < len) {
1170       return false;
1171     }
1172     memcpy(op, ip, len);
1173     op_ = op + len;
1174     return true;
1175   }
1176 
1177   inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
1178     char* op = op_;
1179     const size_t space_left = op_limit_ - op;
1180     if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
1181       // Fast path, used for the majority (about 95%) of invocations.
1182       UnalignedCopy64(ip, op);
1183       UnalignedCopy64(ip + 8, op + 8);
1184       op_ = op + len;
1185       return true;
1186     } else {
1187       return false;
1188     }
1189   }
1190 
1191   inline bool AppendFromSelf(size_t offset, size_t len) {
1192     char* op = op_;
1193     const size_t space_left = op_limit_ - op;
1194 
1195     // Check if we try to append from before the start of the buffer.
1196     // Normally this would just be a check for "produced < offset",
1197     // but "produced <= offset - 1u" is equivalent for every case
1198     // except the one where offset==0, where the right side will wrap around
1199     // to a very big number. This is convenient, as offset==0 is another
1200     // invalid case that we also want to catch, so that we do not go
1201     // into an infinite loop.
1202     assert(op >= base_);
1203     size_t produced = op - base_;
1204     if (produced <= offset - 1u) {
1205       return false;
1206     }
1207     if (len <= 16 && offset >= 8 && space_left >= 16) {
1208       // Fast path, used for the majority (70-80%) of dynamic invocations.
1209       UnalignedCopy64(op - offset, op);
1210       UnalignedCopy64(op - offset + 8, op + 8);
1211     } else {
1212       if (space_left >= len + kMaxIncrementCopyOverflow) {
1213         IncrementalCopyFastPath(op - offset, op, len);
1214       } else {
1215         if (space_left < len) {
1216           return false;
1217         }
1218         IncrementalCopy(op - offset, op, len);
1219       }
1220     }
1221 
1222     op_ = op + len;
1223     return true;
1224   }
1225 };
1226 
1227 bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
1228   ByteArraySource reader(compressed, n);
1229   return RawUncompress(&reader, uncompressed);
1230 }
1231 
1232 bool RawUncompress(Source* compressed, char* uncompressed) {
1233   SnappyArrayWriter output(uncompressed);
1234   return InternalUncompress(compressed, &output);
1235 }
1236 
1237 bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
1238   size_t ulength;
1239   if (!GetUncompressedLength(compressed, n, &ulength)) {
1240     return false;
1241   }
1242   // On 32-bit builds: max_size() < kuint32max.  Check for that instead
1243   // of crashing (e.g., consider externally specified compressed data).
1244   if (ulength > uncompressed->max_size()) {
1245     return false;
1246   }
1247   STLStringResizeUninitialized(uncompressed, ulength);
1248   return RawUncompress(compressed, n, string_as_array(uncompressed));
1249 }
1250 
1251 
1252 // A Writer that drops everything on the floor and just does validation
1253 class SnappyDecompressionValidator {
1254  private:
1255   size_t expected_;
1256   size_t produced_;
1257 
1258  public:
1259   inline SnappyDecompressionValidator() : produced_(0) { }
1260   inline void SetExpectedLength(size_t len) {
1261     expected_ = len;
1262   }
1263   inline bool CheckLength() const {
1264     return expected_ == produced_;
1265   }
1266   inline bool Append(const char* ip, size_t len) {
1267     produced_ += len;
1268     return produced_ <= expected_;
1269   }
1270   inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1271     return false;
1272   }
1273   inline bool AppendFromSelf(size_t offset, size_t len) {
1274     // See SnappyArrayWriter::AppendFromSelf for an explanation of
1275     // the "offset - 1u" trick.
1276     if (produced_ <= offset - 1u) return false;
1277     produced_ += len;
1278     return produced_ <= expected_;
1279   }
1280 };
1281 
1282 bool IsValidCompressedBuffer(const char* compressed, size_t n) {
1283   ByteArraySource reader(compressed, n);
1284   SnappyDecompressionValidator writer;
1285   return InternalUncompress(&reader, &writer);
1286 }
1287 
1288 void RawCompress(const char* input,
1289                  size_t input_length,
1290                  char* compressed,
1291                  size_t* compressed_length) {
1292   ByteArraySource reader(input, input_length);
1293   UncheckedByteArraySink writer(compressed);
1294   Compress(&reader, &writer);
1295 
1296   // Compute how many bytes were added
1297   *compressed_length = (writer.CurrentDestination() - compressed);
1298 }
1299 
1300 size_t Compress(const char* input, size_t input_length, string* compressed) {
1301   // Pre-grow the buffer to the max length of the compressed output
1302   compressed->resize(MaxCompressedLength(input_length));
1303 
1304   size_t compressed_length;
1305   RawCompress(input, input_length, string_as_array(compressed),
1306               &compressed_length);
1307   compressed->resize(compressed_length);
1308   return compressed_length;
1309 }
1310 
1311 
1312 } // end namespace snappy
1313