1 // Copyright 2005 Google Inc. All Rights Reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 #include "snappy.h"
30 #include "snappy-internal.h"
31 #include "snappy-sinksource.h"
32
33 #include <stdio.h>
34
35 #include <algorithm>
36 #include <string>
37 #include <vector>
38
39
40 namespace snappy {
41
42 // Any hash function will produce a valid compressed bitstream, but a good
43 // hash function reduces the number of collisions and thus yields better
44 // compression for compressible input, and more speed for incompressible
45 // input. Of course, it doesn't hurt if the hash function is reasonably fast
46 // either, as it gets called a lot.
HashBytes(uint32 bytes,int shift)47 static inline uint32 HashBytes(uint32 bytes, int shift) {
48 uint32 kMul = 0x1e35a7bd;
49 return (bytes * kMul) >> shift;
50 }
Hash(const char * p,int shift)51 static inline uint32 Hash(const char* p, int shift) {
52 return HashBytes(UNALIGNED_LOAD32(p), shift);
53 }
54
MaxCompressedLength(size_t source_len)55 size_t MaxCompressedLength(size_t source_len) {
56 // Compressed data can be defined as:
57 // compressed := item* literal*
58 // item := literal* copy
59 //
60 // The trailing literal sequence has a space blowup of at most 62/60
61 // since a literal of length 60 needs one tag byte + one extra byte
62 // for length information.
63 //
64 // Item blowup is trickier to measure. Suppose the "copy" op copies
65 // 4 bytes of data. Because of a special check in the encoding code,
66 // we produce a 4-byte copy only if the offset is < 65536. Therefore
67 // the copy op takes 3 bytes to encode, and this type of item leads
68 // to at most the 62/60 blowup for representing literals.
69 //
70 // Suppose the "copy" op copies 5 bytes of data. If the offset is big
71 // enough, it will take 5 bytes to encode the copy op. Therefore the
72 // worst case here is a one-byte literal followed by a five-byte copy.
73 // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
74 //
75 // This last factor dominates the blowup, so the final estimate is:
76 return 32 + source_len + source_len/6;
77 }
78
79 enum {
80 LITERAL = 0,
81 COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
82 COPY_2_BYTE_OFFSET = 2,
83 COPY_4_BYTE_OFFSET = 3
84 };
85 static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
86
87 // Copy "len" bytes from "src" to "op", one byte at a time. Used for
88 // handling COPY operations where the input and output regions may
89 // overlap. For example, suppose:
90 // src == "ab"
91 // op == src + 2
92 // len == 20
93 // After IncrementalCopy(src, op, len), the result will have
94 // eleven copies of "ab"
95 // ababababababababababab
96 // Note that this does not match the semantics of either memcpy()
97 // or memmove().
IncrementalCopy(const char * src,char * op,ssize_t len)98 static inline void IncrementalCopy(const char* src, char* op, ssize_t len) {
99 assert(len > 0);
100 do {
101 *op++ = *src++;
102 } while (--len > 0);
103 }
104
105 // Equivalent to IncrementalCopy except that it can write up to ten extra
106 // bytes after the end of the copy, and that it is faster.
107 //
108 // The main part of this loop is a simple copy of eight bytes at a time until
109 // we've copied (at least) the requested amount of bytes. However, if op and
110 // src are less than eight bytes apart (indicating a repeating pattern of
111 // length < 8), we first need to expand the pattern in order to get the correct
112 // results. For instance, if the buffer looks like this, with the eight-byte
113 // <src> and <op> patterns marked as intervals:
114 //
115 // abxxxxxxxxxxxx
116 // [------] src
117 // [------] op
118 //
119 // a single eight-byte copy from <src> to <op> will repeat the pattern once,
120 // after which we can move <op> two bytes without moving <src>:
121 //
122 // ababxxxxxxxxxx
123 // [------] src
124 // [------] op
125 //
126 // and repeat the exercise until the two no longer overlap.
127 //
128 // This allows us to do very well in the special case of one single byte
129 // repeated many times, without taking a big hit for more general cases.
130 //
131 // The worst case of extra writing past the end of the match occurs when
132 // op - src == 1 and len == 1; the last copy will read from byte positions
133 // [0..7] and write to [4..11], whereas it was only supposed to write to
134 // position 1. Thus, ten excess bytes.
135
136 namespace {
137
138 const int kMaxIncrementCopyOverflow = 10;
139
IncrementalCopyFastPath(const char * src,char * op,ssize_t len)140 inline void IncrementalCopyFastPath(const char* src, char* op, ssize_t len) {
141 while (op - src < 8) {
142 UnalignedCopy64(src, op);
143 len -= op - src;
144 op += op - src;
145 }
146 while (len > 0) {
147 UnalignedCopy64(src, op);
148 src += 8;
149 op += 8;
150 len -= 8;
151 }
152 }
153
154 } // namespace
155
EmitLiteral(char * op,const char * literal,int len,bool allow_fast_path)156 static inline char* EmitLiteral(char* op,
157 const char* literal,
158 int len,
159 bool allow_fast_path) {
160 int n = len - 1; // Zero-length literals are disallowed
161 if (n < 60) {
162 // Fits in tag byte
163 *op++ = LITERAL | (n << 2);
164
165 // The vast majority of copies are below 16 bytes, for which a
166 // call to memcpy is overkill. This fast path can sometimes
167 // copy up to 15 bytes too much, but that is okay in the
168 // main loop, since we have a bit to go on for both sides:
169 //
170 // - The input will always have kInputMarginBytes = 15 extra
171 // available bytes, as long as we're in the main loop, and
172 // if not, allow_fast_path = false.
173 // - The output will always have 32 spare bytes (see
174 // MaxCompressedLength).
175 if (allow_fast_path && len <= 16) {
176 UnalignedCopy64(literal, op);
177 UnalignedCopy64(literal + 8, op + 8);
178 return op + len;
179 }
180 } else {
181 // Encode in upcoming bytes
182 char* base = op;
183 int count = 0;
184 op++;
185 while (n > 0) {
186 *op++ = n & 0xff;
187 n >>= 8;
188 count++;
189 }
190 assert(count >= 1);
191 assert(count <= 4);
192 *base = LITERAL | ((59+count) << 2);
193 }
194 memcpy(op, literal, len);
195 return op + len;
196 }
197
EmitCopyLessThan64(char * op,size_t offset,int len)198 static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
199 assert(len <= 64);
200 assert(len >= 4);
201 assert(offset < 65536);
202
203 if ((len < 12) && (offset < 2048)) {
204 size_t len_minus_4 = len - 4;
205 assert(len_minus_4 < 8); // Must fit in 3 bits
206 *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5);
207 *op++ = offset & 0xff;
208 } else {
209 *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
210 LittleEndian::Store16(op, offset);
211 op += 2;
212 }
213 return op;
214 }
215
EmitCopy(char * op,size_t offset,int len)216 static inline char* EmitCopy(char* op, size_t offset, int len) {
217 // Emit 64 byte copies but make sure to keep at least four bytes reserved
218 while (len >= 68) {
219 op = EmitCopyLessThan64(op, offset, 64);
220 len -= 64;
221 }
222
223 // Emit an extra 60 byte copy if have too much data to fit in one copy
224 if (len > 64) {
225 op = EmitCopyLessThan64(op, offset, 60);
226 len -= 60;
227 }
228
229 // Emit remainder
230 op = EmitCopyLessThan64(op, offset, len);
231 return op;
232 }
233
234
GetUncompressedLength(const char * start,size_t n,size_t * result)235 bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
236 uint32 v = 0;
237 const char* limit = start + n;
238 if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
239 *result = v;
240 return true;
241 } else {
242 return false;
243 }
244 }
245
246 namespace internal {
GetHashTable(size_t input_size,int * table_size)247 uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
248 // Use smaller hash table when input.size() is smaller, since we
249 // fill the table, incurring O(hash table size) overhead for
250 // compression, and if the input is short, we won't need that
251 // many hash table entries anyway.
252 assert(kMaxHashTableSize >= 256);
253 size_t htsize = 256;
254 while (htsize < kMaxHashTableSize && htsize < input_size) {
255 htsize <<= 1;
256 }
257
258 uint16* table;
259 if (htsize <= ARRAYSIZE(small_table_)) {
260 table = small_table_;
261 } else {
262 if (large_table_ == NULL) {
263 large_table_ = new uint16[kMaxHashTableSize];
264 }
265 table = large_table_;
266 }
267
268 *table_size = htsize;
269 memset(table, 0, htsize * sizeof(*table));
270 return table;
271 }
272 } // end namespace internal
273
274 // For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
275 // equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have
276 // empirically found that overlapping loads such as
277 // UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
278 // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
279 //
280 // We have different versions for 64- and 32-bit; ideally we would avoid the
281 // two functions and just inline the UNALIGNED_LOAD64 call into
282 // GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
283 // enough to avoid loading the value multiple times then. For 64-bit, the load
284 // is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
285 // done at GetUint32AtOffset() time.
286
287 #ifdef ARCH_K8
288
289 typedef uint64 EightBytesReference;
290
GetEightBytesAt(const char * ptr)291 static inline EightBytesReference GetEightBytesAt(const char* ptr) {
292 return UNALIGNED_LOAD64(ptr);
293 }
294
GetUint32AtOffset(uint64 v,int offset)295 static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
296 assert(offset >= 0);
297 assert(offset <= 4);
298 return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
299 }
300
301 #else
302
303 typedef const char* EightBytesReference;
304
GetEightBytesAt(const char * ptr)305 static inline EightBytesReference GetEightBytesAt(const char* ptr) {
306 return ptr;
307 }
308
GetUint32AtOffset(const char * v,int offset)309 static inline uint32 GetUint32AtOffset(const char* v, int offset) {
310 assert(offset >= 0);
311 assert(offset <= 4);
312 return UNALIGNED_LOAD32(v + offset);
313 }
314
315 #endif
316
317 // Flat array compression that does not emit the "uncompressed length"
318 // prefix. Compresses "input" string to the "*op" buffer.
319 //
320 // REQUIRES: "input" is at most "kBlockSize" bytes long.
321 // REQUIRES: "op" points to an array of memory that is at least
322 // "MaxCompressedLength(input.size())" in size.
323 // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
324 // REQUIRES: "table_size" is a power of two
325 //
326 // Returns an "end" pointer into "op" buffer.
327 // "end - op" is the compressed size of "input".
328 namespace internal {
CompressFragment(const char * input,size_t input_size,char * op,uint16 * table,const int table_size)329 char* CompressFragment(const char* input,
330 size_t input_size,
331 char* op,
332 uint16* table,
333 const int table_size) {
334 // "ip" is the input pointer, and "op" is the output pointer.
335 const char* ip = input;
336 assert(input_size <= kBlockSize);
337 assert((table_size & (table_size - 1)) == 0); // table must be power of two
338 const int shift = 32 - Bits::Log2Floor(table_size);
339 assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
340 const char* ip_end = input + input_size;
341 const char* base_ip = ip;
342 // Bytes in [next_emit, ip) will be emitted as literal bytes. Or
343 // [next_emit, ip_end) after the main loop.
344 const char* next_emit = ip;
345
346 const size_t kInputMarginBytes = 15;
347 if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
348 const char* ip_limit = input + input_size - kInputMarginBytes;
349
350 for (uint32 next_hash = Hash(++ip, shift); ; ) {
351 assert(next_emit < ip);
352 // The body of this loop calls EmitLiteral once and then EmitCopy one or
353 // more times. (The exception is that when we're close to exhausting
354 // the input we goto emit_remainder.)
355 //
356 // In the first iteration of this loop we're just starting, so
357 // there's nothing to copy, so calling EmitLiteral once is
358 // necessary. And we only start a new iteration when the
359 // current iteration has determined that a call to EmitLiteral will
360 // precede the next call to EmitCopy (if any).
361 //
362 // Step 1: Scan forward in the input looking for a 4-byte-long match.
363 // If we get close to exhausting the input then goto emit_remainder.
364 //
365 // Heuristic match skipping: If 32 bytes are scanned with no matches
366 // found, start looking only at every other byte. If 32 more bytes are
367 // scanned, look at every third byte, etc.. When a match is found,
368 // immediately go back to looking at every byte. This is a small loss
369 // (~5% performance, ~0.1% density) for compressible data due to more
370 // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
371 // win since the compressor quickly "realizes" the data is incompressible
372 // and doesn't bother looking for matches everywhere.
373 //
374 // The "skip" variable keeps track of how many bytes there are since the
375 // last match; dividing it by 32 (ie. right-shifting by five) gives the
376 // number of bytes to move ahead for each iteration.
377 uint32 skip = 32;
378
379 const char* next_ip = ip;
380 const char* candidate;
381 do {
382 ip = next_ip;
383 uint32 hash = next_hash;
384 assert(hash == Hash(ip, shift));
385 uint32 bytes_between_hash_lookups = skip++ >> 5;
386 next_ip = ip + bytes_between_hash_lookups;
387 if (PREDICT_FALSE(next_ip > ip_limit)) {
388 goto emit_remainder;
389 }
390 next_hash = Hash(next_ip, shift);
391 candidate = base_ip + table[hash];
392 assert(candidate >= base_ip);
393 assert(candidate < ip);
394
395 table[hash] = ip - base_ip;
396 } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
397 UNALIGNED_LOAD32(candidate)));
398
399 // Step 2: A 4-byte match has been found. We'll later see if more
400 // than 4 bytes match. But, prior to the match, input
401 // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
402 assert(next_emit + 16 <= ip_end);
403 op = EmitLiteral(op, next_emit, ip - next_emit, true);
404
405 // Step 3: Call EmitCopy, and then see if another EmitCopy could
406 // be our next move. Repeat until we find no match for the
407 // input immediately after what was consumed by the last EmitCopy call.
408 //
409 // If we exit this loop normally then we need to call EmitLiteral next,
410 // though we don't yet know how big the literal will be. We handle that
411 // by proceeding to the next iteration of the main loop. We also can exit
412 // this loop via goto if we get close to exhausting the input.
413 EightBytesReference input_bytes;
414 uint32 candidate_bytes = 0;
415
416 do {
417 // We have a 4-byte match at ip, and no need to emit any
418 // "literal bytes" prior to ip.
419 const char* base = ip;
420 int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
421 ip += matched;
422 size_t offset = base - candidate;
423 assert(0 == memcmp(base, candidate, matched));
424 op = EmitCopy(op, offset, matched);
425 // We could immediately start working at ip now, but to improve
426 // compression we first update table[Hash(ip - 1, ...)].
427 const char* insert_tail = ip - 1;
428 next_emit = ip;
429 if (PREDICT_FALSE(ip >= ip_limit)) {
430 goto emit_remainder;
431 }
432 input_bytes = GetEightBytesAt(insert_tail);
433 uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
434 table[prev_hash] = ip - base_ip - 1;
435 uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
436 candidate = base_ip + table[cur_hash];
437 candidate_bytes = UNALIGNED_LOAD32(candidate);
438 table[cur_hash] = ip - base_ip;
439 } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
440
441 next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
442 ++ip;
443 }
444 }
445
446 emit_remainder:
447 // Emit the remaining bytes as a literal
448 if (next_emit < ip_end) {
449 op = EmitLiteral(op, next_emit, ip_end - next_emit, false);
450 }
451
452 return op;
453 }
454 } // end namespace internal
455
456 // Signature of output types needed by decompression code.
457 // The decompression code is templatized on a type that obeys this
458 // signature so that we do not pay virtual function call overhead in
459 // the middle of a tight decompression loop.
460 //
461 // class DecompressionWriter {
462 // public:
463 // // Called before decompression
464 // void SetExpectedLength(size_t length);
465 //
466 // // Called after decompression
467 // bool CheckLength() const;
468 //
469 // // Called repeatedly during decompression
470 // bool Append(const char* ip, size_t length);
471 // bool AppendFromSelf(uint32 offset, size_t length);
472 //
473 // // The rules for how TryFastAppend differs from Append are somewhat
474 // // convoluted:
475 // //
476 // // - TryFastAppend is allowed to decline (return false) at any
477 // // time, for any reason -- just "return false" would be
478 // // a perfectly legal implementation of TryFastAppend.
479 // // The intention is for TryFastAppend to allow a fast path
480 // // in the common case of a small append.
481 // // - TryFastAppend is allowed to read up to <available> bytes
482 // // from the input buffer, whereas Append is allowed to read
483 // // <length>. However, if it returns true, it must leave
484 // // at least five (kMaximumTagLength) bytes in the input buffer
485 // // afterwards, so that there is always enough space to read the
486 // // next tag without checking for a refill.
487 // // - TryFastAppend must always return decline (return false)
488 // // if <length> is 61 or more, as in this case the literal length is not
489 // // decoded fully. In practice, this should not be a big problem,
490 // // as it is unlikely that one would implement a fast path accepting
491 // // this much data.
492 // //
493 // bool TryFastAppend(const char* ip, size_t available, size_t length);
494 // };
495
496 // -----------------------------------------------------------------------
497 // Lookup table for decompression code. Generated by ComputeTable() below.
498 // -----------------------------------------------------------------------
499
500 // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
501 static const uint32 wordmask[] = {
502 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
503 };
504
505 // Data stored per entry in lookup table:
506 // Range Bits-used Description
507 // ------------------------------------
508 // 1..64 0..7 Literal/copy length encoded in opcode byte
509 // 0..7 8..10 Copy offset encoded in opcode byte / 256
510 // 0..4 11..13 Extra bytes after opcode
511 //
512 // We use eight bits for the length even though 7 would have sufficed
513 // because of efficiency reasons:
514 // (1) Extracting a byte is faster than a bit-field
515 // (2) It properly aligns copy offset so we do not need a <<8
516 static const uint16 char_table[256] = {
517 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
518 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
519 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
520 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
521 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
522 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
523 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
524 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
525 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
526 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
527 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
528 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
529 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
530 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
531 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
532 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
533 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
534 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
535 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
536 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
537 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
538 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
539 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
540 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
541 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
542 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
543 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
544 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
545 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
546 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
547 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
548 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
549 };
550
551 // In debug mode, allow optional computation of the table at startup.
552 // Also, check that the decompression table is correct.
553 #ifndef NDEBUG
554 DEFINE_bool(snappy_dump_decompression_table, false,
555 "If true, we print the decompression table at startup.");
556
MakeEntry(unsigned int extra,unsigned int len,unsigned int copy_offset)557 static uint16 MakeEntry(unsigned int extra,
558 unsigned int len,
559 unsigned int copy_offset) {
560 // Check that all of the fields fit within the allocated space
561 assert(extra == (extra & 0x7)); // At most 3 bits
562 assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
563 assert(len == (len & 0x7f)); // At most 7 bits
564 return len | (copy_offset << 8) | (extra << 11);
565 }
566
ComputeTable()567 static void ComputeTable() {
568 uint16 dst[256];
569
570 // Place invalid entries in all places to detect missing initialization
571 int assigned = 0;
572 for (int i = 0; i < 256; i++) {
573 dst[i] = 0xffff;
574 }
575
576 // Small LITERAL entries. We store (len-1) in the top 6 bits.
577 for (unsigned int len = 1; len <= 60; len++) {
578 dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
579 assigned++;
580 }
581
582 // Large LITERAL entries. We use 60..63 in the high 6 bits to
583 // encode the number of bytes of length info that follow the opcode.
584 for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
585 // We set the length field in the lookup table to 1 because extra
586 // bytes encode len-1.
587 dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
588 assigned++;
589 }
590
591 // COPY_1_BYTE_OFFSET.
592 //
593 // The tag byte in the compressed data stores len-4 in 3 bits, and
594 // offset/256 in 5 bits. offset%256 is stored in the next byte.
595 //
596 // This format is used for length in range [4..11] and offset in
597 // range [0..2047]
598 for (unsigned int len = 4; len < 12; len++) {
599 for (unsigned int offset = 0; offset < 2048; offset += 256) {
600 dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
601 MakeEntry(1, len, offset>>8);
602 assigned++;
603 }
604 }
605
606 // COPY_2_BYTE_OFFSET.
607 // Tag contains len-1 in top 6 bits, and offset in next two bytes.
608 for (unsigned int len = 1; len <= 64; len++) {
609 dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
610 assigned++;
611 }
612
613 // COPY_4_BYTE_OFFSET.
614 // Tag contents len-1 in top 6 bits, and offset in next four bytes.
615 for (unsigned int len = 1; len <= 64; len++) {
616 dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
617 assigned++;
618 }
619
620 // Check that each entry was initialized exactly once.
621 if (assigned != 256) {
622 fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
623 abort();
624 }
625 for (int i = 0; i < 256; i++) {
626 if (dst[i] == 0xffff) {
627 fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
628 abort();
629 }
630 }
631
632 if (FLAGS_snappy_dump_decompression_table) {
633 printf("static const uint16 char_table[256] = {\n ");
634 for (int i = 0; i < 256; i++) {
635 printf("0x%04x%s",
636 dst[i],
637 ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
638 }
639 printf("};\n");
640 }
641
642 // Check that computed table matched recorded table
643 for (int i = 0; i < 256; i++) {
644 if (dst[i] != char_table[i]) {
645 fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
646 i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
647 abort();
648 }
649 }
650 }
651 #endif /* !NDEBUG */
652
653 // Helper class for decompression
654 class SnappyDecompressor {
655 private:
656 Source* reader_; // Underlying source of bytes to decompress
657 const char* ip_; // Points to next buffered byte
658 const char* ip_limit_; // Points just past buffered bytes
659 uint32 peeked_; // Bytes peeked from reader (need to skip)
660 bool eof_; // Hit end of input without an error?
661 char scratch_[kMaximumTagLength]; // See RefillTag().
662
663 // Ensure that all of the tag metadata for the next tag is available
664 // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even
665 // if (ip_limit_ - ip_ < 5).
666 //
667 // Returns true on success, false on error or end of input.
668 bool RefillTag();
669
670 public:
SnappyDecompressor(Source * reader)671 explicit SnappyDecompressor(Source* reader)
672 : reader_(reader),
673 ip_(NULL),
674 ip_limit_(NULL),
675 peeked_(0),
676 eof_(false) {
677 }
678
~SnappyDecompressor()679 ~SnappyDecompressor() {
680 // Advance past any bytes we peeked at from the reader
681 reader_->Skip(peeked_);
682 }
683
684 // Returns true iff we have hit the end of the input without an error.
eof() const685 bool eof() const {
686 return eof_;
687 }
688
689 // Read the uncompressed length stored at the start of the compressed data.
690 // On succcess, stores the length in *result and returns true.
691 // On failure, returns false.
ReadUncompressedLength(uint32 * result)692 bool ReadUncompressedLength(uint32* result) {
693 assert(ip_ == NULL); // Must not have read anything yet
694 // Length is encoded in 1..5 bytes
695 *result = 0;
696 uint32 shift = 0;
697 while (true) {
698 if (shift >= 32) return false;
699 size_t n;
700 const char* ip = reader_->Peek(&n);
701 if (n == 0) return false;
702 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
703 reader_->Skip(1);
704 *result |= static_cast<uint32>(c & 0x7f) << shift;
705 if (c < 128) {
706 break;
707 }
708 shift += 7;
709 }
710 return true;
711 }
712
713 // Process the next item found in the input.
714 // Returns true if successful, false on error or end of input.
715 template <class Writer>
DecompressAllTags(Writer * writer)716 void DecompressAllTags(Writer* writer) {
717 const char* ip = ip_;
718
719 // We could have put this refill fragment only at the beginning of the loop.
720 // However, duplicating it at the end of each branch gives the compiler more
721 // scope to optimize the <ip_limit_ - ip> expression based on the local
722 // context, which overall increases speed.
723 #define MAYBE_REFILL() \
724 if (ip_limit_ - ip < kMaximumTagLength) { \
725 ip_ = ip; \
726 if (!RefillTag()) return; \
727 ip = ip_; \
728 }
729
730 MAYBE_REFILL();
731 for ( ;; ) {
732 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
733
734 if ((c & 0x3) == LITERAL) {
735 size_t literal_length = (c >> 2) + 1u;
736 if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
737 assert(literal_length < 61);
738 ip += literal_length;
739 // NOTE(user): There is no MAYBE_REFILL() here, as TryFastAppend()
740 // will not return true unless there's already at least five spare
741 // bytes in addition to the literal.
742 continue;
743 }
744 if (PREDICT_FALSE(literal_length >= 61)) {
745 // Long literal.
746 const size_t literal_length_length = literal_length - 60;
747 literal_length =
748 (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
749 ip += literal_length_length;
750 }
751
752 size_t avail = ip_limit_ - ip;
753 while (avail < literal_length) {
754 if (!writer->Append(ip, avail)) return;
755 literal_length -= avail;
756 reader_->Skip(peeked_);
757 size_t n;
758 ip = reader_->Peek(&n);
759 avail = n;
760 peeked_ = avail;
761 if (avail == 0) return; // Premature end of input
762 ip_limit_ = ip + avail;
763 }
764 if (!writer->Append(ip, literal_length)) {
765 return;
766 }
767 ip += literal_length;
768 MAYBE_REFILL();
769 } else {
770 const uint32 entry = char_table[c];
771 const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
772 const uint32 length = entry & 0xff;
773 ip += entry >> 11;
774
775 // copy_offset/256 is encoded in bits 8..10. By just fetching
776 // those bits, we get copy_offset (since the bit-field starts at
777 // bit 8).
778 const uint32 copy_offset = entry & 0x700;
779 if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
780 return;
781 }
782 MAYBE_REFILL();
783 }
784 }
785
786 #undef MAYBE_REFILL
787 }
788 };
789
RefillTag()790 bool SnappyDecompressor::RefillTag() {
791 const char* ip = ip_;
792 if (ip == ip_limit_) {
793 // Fetch a new fragment from the reader
794 reader_->Skip(peeked_); // All peeked bytes are used up
795 size_t n;
796 ip = reader_->Peek(&n);
797 peeked_ = n;
798 if (n == 0) {
799 eof_ = true;
800 return false;
801 }
802 ip_limit_ = ip + n;
803 }
804
805 // Read the tag character
806 assert(ip < ip_limit_);
807 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
808 const uint32 entry = char_table[c];
809 const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c'
810 assert(needed <= sizeof(scratch_));
811
812 // Read more bytes from reader if needed
813 uint32 nbuf = ip_limit_ - ip;
814 if (nbuf < needed) {
815 // Stitch together bytes from ip and reader to form the word
816 // contents. We store the needed bytes in "scratch_". They
817 // will be consumed immediately by the caller since we do not
818 // read more than we need.
819 memmove(scratch_, ip, nbuf);
820 reader_->Skip(peeked_); // All peeked bytes are used up
821 peeked_ = 0;
822 while (nbuf < needed) {
823 size_t length;
824 const char* src = reader_->Peek(&length);
825 if (length == 0) return false;
826 uint32 to_add = min<uint32>(needed - nbuf, length);
827 memcpy(scratch_ + nbuf, src, to_add);
828 nbuf += to_add;
829 reader_->Skip(to_add);
830 }
831 assert(nbuf == needed);
832 ip_ = scratch_;
833 ip_limit_ = scratch_ + needed;
834 } else if (nbuf < kMaximumTagLength) {
835 // Have enough bytes, but move into scratch_ so that we do not
836 // read past end of input
837 memmove(scratch_, ip, nbuf);
838 reader_->Skip(peeked_); // All peeked bytes are used up
839 peeked_ = 0;
840 ip_ = scratch_;
841 ip_limit_ = scratch_ + nbuf;
842 } else {
843 // Pass pointer to buffer returned by reader_.
844 ip_ = ip;
845 }
846 return true;
847 }
848
849 template <typename Writer>
InternalUncompress(Source * r,Writer * writer)850 static bool InternalUncompress(Source* r, Writer* writer) {
851 // Read the uncompressed length from the front of the compressed input
852 SnappyDecompressor decompressor(r);
853 uint32 uncompressed_len = 0;
854 if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
855 return InternalUncompressAllTags(&decompressor, writer, uncompressed_len);
856 }
857
858 template <typename Writer>
InternalUncompressAllTags(SnappyDecompressor * decompressor,Writer * writer,uint32 uncompressed_len)859 static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
860 Writer* writer,
861 uint32 uncompressed_len) {
862 writer->SetExpectedLength(uncompressed_len);
863
864 // Process the entire input
865 decompressor->DecompressAllTags(writer);
866 return (decompressor->eof() && writer->CheckLength());
867 }
868
GetUncompressedLength(Source * source,uint32 * result)869 bool GetUncompressedLength(Source* source, uint32* result) {
870 SnappyDecompressor decompressor(source);
871 return decompressor.ReadUncompressedLength(result);
872 }
873
Compress(Source * reader,Sink * writer)874 size_t Compress(Source* reader, Sink* writer) {
875 size_t written = 0;
876 size_t N = reader->Available();
877 char ulength[Varint::kMax32];
878 char* p = Varint::Encode32(ulength, N);
879 writer->Append(ulength, p-ulength);
880 written += (p - ulength);
881
882 internal::WorkingMemory wmem;
883 char* scratch = NULL;
884 char* scratch_output = NULL;
885
886 while (N > 0) {
887 // Get next block to compress (without copying if possible)
888 size_t fragment_size;
889 const char* fragment = reader->Peek(&fragment_size);
890 assert(fragment_size != 0); // premature end of input
891 const size_t num_to_read = min(N, kBlockSize);
892 size_t bytes_read = fragment_size;
893
894 size_t pending_advance = 0;
895 if (bytes_read >= num_to_read) {
896 // Buffer returned by reader is large enough
897 pending_advance = num_to_read;
898 fragment_size = num_to_read;
899 } else {
900 // Read into scratch buffer
901 if (scratch == NULL) {
902 // If this is the last iteration, we want to allocate N bytes
903 // of space, otherwise the max possible kBlockSize space.
904 // num_to_read contains exactly the correct value
905 scratch = new char[num_to_read];
906 }
907 memcpy(scratch, fragment, bytes_read);
908 reader->Skip(bytes_read);
909
910 while (bytes_read < num_to_read) {
911 fragment = reader->Peek(&fragment_size);
912 size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
913 memcpy(scratch + bytes_read, fragment, n);
914 bytes_read += n;
915 reader->Skip(n);
916 }
917 assert(bytes_read == num_to_read);
918 fragment = scratch;
919 fragment_size = num_to_read;
920 }
921 assert(fragment_size == num_to_read);
922
923 // Get encoding table for compression
924 int table_size;
925 uint16* table = wmem.GetHashTable(num_to_read, &table_size);
926
927 // Compress input_fragment and append to dest
928 const int max_output = MaxCompressedLength(num_to_read);
929
930 // Need a scratch buffer for the output, in case the byte sink doesn't
931 // have room for us directly.
932 if (scratch_output == NULL) {
933 scratch_output = new char[max_output];
934 } else {
935 // Since we encode kBlockSize regions followed by a region
936 // which is <= kBlockSize in length, a previously allocated
937 // scratch_output[] region is big enough for this iteration.
938 }
939 char* dest = writer->GetAppendBuffer(max_output, scratch_output);
940 char* end = internal::CompressFragment(fragment, fragment_size,
941 dest, table, table_size);
942 writer->Append(dest, end - dest);
943 written += (end - dest);
944
945 N -= num_to_read;
946 reader->Skip(pending_advance);
947 }
948
949 delete[] scratch;
950 delete[] scratch_output;
951
952 return written;
953 }
954
955 // -----------------------------------------------------------------------
956 // IOVec interfaces
957 // -----------------------------------------------------------------------
958
959 // A type that writes to an iovec.
960 // Note that this is not a "ByteSink", but a type that matches the
961 // Writer template argument to SnappyDecompressor::DecompressAllTags().
962 class SnappyIOVecWriter {
963 private:
964 const struct iovec* output_iov_;
965 const size_t output_iov_count_;
966
967 // We are currently writing into output_iov_[curr_iov_index_].
968 int curr_iov_index_;
969
970 // Bytes written to output_iov_[curr_iov_index_] so far.
971 size_t curr_iov_written_;
972
973 // Total bytes decompressed into output_iov_ so far.
974 size_t total_written_;
975
976 // Maximum number of bytes that will be decompressed into output_iov_.
977 size_t output_limit_;
978
GetIOVecPointer(int index,size_t offset)979 inline char* GetIOVecPointer(int index, size_t offset) {
980 return reinterpret_cast<char*>(output_iov_[index].iov_base) +
981 offset;
982 }
983
984 public:
985 // Does not take ownership of iov. iov must be valid during the
986 // entire lifetime of the SnappyIOVecWriter.
SnappyIOVecWriter(const struct iovec * iov,size_t iov_count)987 inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count)
988 : output_iov_(iov),
989 output_iov_count_(iov_count),
990 curr_iov_index_(0),
991 curr_iov_written_(0),
992 total_written_(0),
993 output_limit_(-1) {
994 }
995
SetExpectedLength(size_t len)996 inline void SetExpectedLength(size_t len) {
997 output_limit_ = len;
998 }
999
CheckLength() const1000 inline bool CheckLength() const {
1001 return total_written_ == output_limit_;
1002 }
1003
Append(const char * ip,size_t len)1004 inline bool Append(const char* ip, size_t len) {
1005 if (total_written_ + len > output_limit_) {
1006 return false;
1007 }
1008
1009 while (len > 0) {
1010 assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
1011 if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) {
1012 // This iovec is full. Go to the next one.
1013 if (curr_iov_index_ + 1 >= output_iov_count_) {
1014 return false;
1015 }
1016 curr_iov_written_ = 0;
1017 ++curr_iov_index_;
1018 }
1019
1020 const size_t to_write = std::min(
1021 len, output_iov_[curr_iov_index_].iov_len - curr_iov_written_);
1022 memcpy(GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1023 ip,
1024 to_write);
1025 curr_iov_written_ += to_write;
1026 total_written_ += to_write;
1027 ip += to_write;
1028 len -= to_write;
1029 }
1030
1031 return true;
1032 }
1033
TryFastAppend(const char * ip,size_t available,size_t len)1034 inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
1035 const size_t space_left = output_limit_ - total_written_;
1036 if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 &&
1037 output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) {
1038 // Fast path, used for the majority (about 95%) of invocations.
1039 char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_);
1040 UnalignedCopy64(ip, ptr);
1041 UnalignedCopy64(ip + 8, ptr + 8);
1042 curr_iov_written_ += len;
1043 total_written_ += len;
1044 return true;
1045 }
1046
1047 return false;
1048 }
1049
AppendFromSelf(size_t offset,size_t len)1050 inline bool AppendFromSelf(size_t offset, size_t len) {
1051 if (offset > total_written_ || offset == 0) {
1052 return false;
1053 }
1054 const size_t space_left = output_limit_ - total_written_;
1055 if (len > space_left) {
1056 return false;
1057 }
1058
1059 // Locate the iovec from which we need to start the copy.
1060 int from_iov_index = curr_iov_index_;
1061 size_t from_iov_offset = curr_iov_written_;
1062 while (offset > 0) {
1063 if (from_iov_offset >= offset) {
1064 from_iov_offset -= offset;
1065 break;
1066 }
1067
1068 offset -= from_iov_offset;
1069 --from_iov_index;
1070 assert(from_iov_index >= 0);
1071 from_iov_offset = output_iov_[from_iov_index].iov_len;
1072 }
1073
1074 // Copy <len> bytes starting from the iovec pointed to by from_iov_index to
1075 // the current iovec.
1076 while (len > 0) {
1077 assert(from_iov_index <= curr_iov_index_);
1078 if (from_iov_index != curr_iov_index_) {
1079 const size_t to_copy = std::min(
1080 output_iov_[from_iov_index].iov_len - from_iov_offset,
1081 len);
1082 Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy);
1083 len -= to_copy;
1084 if (len > 0) {
1085 ++from_iov_index;
1086 from_iov_offset = 0;
1087 }
1088 } else {
1089 assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
1090 size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len -
1091 curr_iov_written_,
1092 len);
1093 if (to_copy == 0) {
1094 // This iovec is full. Go to the next one.
1095 if (curr_iov_index_ + 1 >= output_iov_count_) {
1096 return false;
1097 }
1098 ++curr_iov_index_;
1099 curr_iov_written_ = 0;
1100 continue;
1101 }
1102 if (to_copy > len) {
1103 to_copy = len;
1104 }
1105 IncrementalCopy(GetIOVecPointer(from_iov_index, from_iov_offset),
1106 GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1107 to_copy);
1108 curr_iov_written_ += to_copy;
1109 from_iov_offset += to_copy;
1110 total_written_ += to_copy;
1111 len -= to_copy;
1112 }
1113 }
1114
1115 return true;
1116 }
1117
1118 };
1119
RawUncompressToIOVec(const char * compressed,size_t compressed_length,const struct iovec * iov,size_t iov_cnt)1120 bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
1121 const struct iovec* iov, size_t iov_cnt) {
1122 ByteArraySource reader(compressed, compressed_length);
1123 return RawUncompressToIOVec(&reader, iov, iov_cnt);
1124 }
1125
RawUncompressToIOVec(Source * compressed,const struct iovec * iov,size_t iov_cnt)1126 bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
1127 size_t iov_cnt) {
1128 SnappyIOVecWriter output(iov, iov_cnt);
1129 return InternalUncompress(compressed, &output);
1130 }
1131
1132 // -----------------------------------------------------------------------
1133 // Flat array interfaces
1134 // -----------------------------------------------------------------------
1135
1136 // A type that writes to a flat array.
1137 // Note that this is not a "ByteSink", but a type that matches the
1138 // Writer template argument to SnappyDecompressor::DecompressAllTags().
1139 class SnappyArrayWriter {
1140 private:
1141 char* base_;
1142 char* op_;
1143 char* op_limit_;
1144
1145 public:
SnappyArrayWriter(char * dst)1146 inline explicit SnappyArrayWriter(char* dst)
1147 : base_(dst),
1148 op_(dst) {
1149 }
1150
SetExpectedLength(size_t len)1151 inline void SetExpectedLength(size_t len) {
1152 op_limit_ = op_ + len;
1153 }
1154
CheckLength() const1155 inline bool CheckLength() const {
1156 return op_ == op_limit_;
1157 }
1158
Append(const char * ip,size_t len)1159 inline bool Append(const char* ip, size_t len) {
1160 char* op = op_;
1161 const size_t space_left = op_limit_ - op;
1162 if (space_left < len) {
1163 return false;
1164 }
1165 memcpy(op, ip, len);
1166 op_ = op + len;
1167 return true;
1168 }
1169
TryFastAppend(const char * ip,size_t available,size_t len)1170 inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
1171 char* op = op_;
1172 const size_t space_left = op_limit_ - op;
1173 if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
1174 // Fast path, used for the majority (about 95%) of invocations.
1175 UnalignedCopy64(ip, op);
1176 UnalignedCopy64(ip + 8, op + 8);
1177 op_ = op + len;
1178 return true;
1179 } else {
1180 return false;
1181 }
1182 }
1183
AppendFromSelf(size_t offset,size_t len)1184 inline bool AppendFromSelf(size_t offset, size_t len) {
1185 char* op = op_;
1186 const size_t space_left = op_limit_ - op;
1187
1188 // Check if we try to append from before the start of the buffer.
1189 // Normally this would just be a check for "produced < offset",
1190 // but "produced <= offset - 1u" is equivalent for every case
1191 // except the one where offset==0, where the right side will wrap around
1192 // to a very big number. This is convenient, as offset==0 is another
1193 // invalid case that we also want to catch, so that we do not go
1194 // into an infinite loop.
1195 assert(op >= base_);
1196 size_t produced = op - base_;
1197 if (produced <= offset - 1u) {
1198 return false;
1199 }
1200 if (len <= 16 && offset >= 8 && space_left >= 16) {
1201 // Fast path, used for the majority (70-80%) of dynamic invocations.
1202 UnalignedCopy64(op - offset, op);
1203 UnalignedCopy64(op - offset + 8, op + 8);
1204 } else {
1205 if (space_left >= len + kMaxIncrementCopyOverflow) {
1206 IncrementalCopyFastPath(op - offset, op, len);
1207 } else {
1208 if (space_left < len) {
1209 return false;
1210 }
1211 IncrementalCopy(op - offset, op, len);
1212 }
1213 }
1214
1215 op_ = op + len;
1216 return true;
1217 }
1218 };
1219
RawUncompress(const char * compressed,size_t n,char * uncompressed)1220 bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
1221 ByteArraySource reader(compressed, n);
1222 return RawUncompress(&reader, uncompressed);
1223 }
1224
RawUncompress(Source * compressed,char * uncompressed)1225 bool RawUncompress(Source* compressed, char* uncompressed) {
1226 SnappyArrayWriter output(uncompressed);
1227 return InternalUncompress(compressed, &output);
1228 }
1229
Uncompress(const char * compressed,size_t n,string * uncompressed)1230 bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
1231 size_t ulength;
1232 if (!GetUncompressedLength(compressed, n, &ulength)) {
1233 return false;
1234 }
1235 // On 32-bit builds: max_size() < kuint32max. Check for that instead
1236 // of crashing (e.g., consider externally specified compressed data).
1237 if (ulength > uncompressed->max_size()) {
1238 return false;
1239 }
1240 STLStringResizeUninitialized(uncompressed, ulength);
1241 return RawUncompress(compressed, n, string_as_array(uncompressed));
1242 }
1243
1244
1245 // A Writer that drops everything on the floor and just does validation
1246 class SnappyDecompressionValidator {
1247 private:
1248 size_t expected_;
1249 size_t produced_;
1250
1251 public:
SnappyDecompressionValidator()1252 inline SnappyDecompressionValidator() : produced_(0) { }
SetExpectedLength(size_t len)1253 inline void SetExpectedLength(size_t len) {
1254 expected_ = len;
1255 }
CheckLength() const1256 inline bool CheckLength() const {
1257 return expected_ == produced_;
1258 }
Append(const char * ip,size_t len)1259 inline bool Append(const char* ip, size_t len) {
1260 produced_ += len;
1261 return produced_ <= expected_;
1262 }
TryFastAppend(const char * ip,size_t available,size_t length)1263 inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1264 return false;
1265 }
AppendFromSelf(size_t offset,size_t len)1266 inline bool AppendFromSelf(size_t offset, size_t len) {
1267 // See SnappyArrayWriter::AppendFromSelf for an explanation of
1268 // the "offset - 1u" trick.
1269 if (produced_ <= offset - 1u) return false;
1270 produced_ += len;
1271 return produced_ <= expected_;
1272 }
1273 };
1274
IsValidCompressedBuffer(const char * compressed,size_t n)1275 bool IsValidCompressedBuffer(const char* compressed, size_t n) {
1276 ByteArraySource reader(compressed, n);
1277 SnappyDecompressionValidator writer;
1278 return InternalUncompress(&reader, &writer);
1279 }
1280
RawCompress(const char * input,size_t input_length,char * compressed,size_t * compressed_length)1281 void RawCompress(const char* input,
1282 size_t input_length,
1283 char* compressed,
1284 size_t* compressed_length) {
1285 ByteArraySource reader(input, input_length);
1286 UncheckedByteArraySink writer(compressed);
1287 Compress(&reader, &writer);
1288
1289 // Compute how many bytes were added
1290 *compressed_length = (writer.CurrentDestination() - compressed);
1291 }
1292
Compress(const char * input,size_t input_length,string * compressed)1293 size_t Compress(const char* input, size_t input_length, string* compressed) {
1294 // Pre-grow the buffer to the max length of the compressed output
1295 compressed->resize(MaxCompressedLength(input_length));
1296
1297 size_t compressed_length;
1298 RawCompress(input, input_length, string_as_array(compressed),
1299 &compressed_length);
1300 compressed->resize(compressed_length);
1301 return compressed_length;
1302 }
1303
1304
1305 } // end namespace snappy
1306
1307