1 // Copyright 2005 and onwards Google Inc.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 #include <math.h>
30 #include <stdlib.h>
31 
32 
33 #include <algorithm>
34 #include <string>
35 #include <utility>
36 #include <vector>
37 
38 #include "snappy.h"
39 #include "snappy-internal.h"
40 #include "snappy-test.h"
41 #include "snappy-sinksource.h"
42 
43 DEFINE_int32(start_len, -1,
44              "Starting prefix size for testing (-1: just full file contents)");
45 DEFINE_int32(end_len, -1,
46              "Starting prefix size for testing (-1: just full file contents)");
47 DEFINE_int32(bytes, 10485760,
48              "How many bytes to compress/uncompress per file for timing");
49 
50 DEFINE_bool(zlib, false,
51             "Run zlib compression (http://www.zlib.net)");
52 DEFINE_bool(lzo, false,
53             "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
54 DEFINE_bool(snappy, true, "Run snappy compression");
55 
56 DEFINE_bool(write_compressed, false,
57             "Write compressed versions of each file to <file>.comp");
58 DEFINE_bool(write_uncompressed, false,
59             "Write uncompressed versions of each file to <file>.uncomp");
60 
61 DEFINE_bool(snappy_dump_decompression_table, false,
62             "If true, we print the decompression table during tests.");
63 
64 namespace snappy {
65 
66 #if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
67 
68 // To test against code that reads beyond its input, this class copies a
69 // string to a newly allocated group of pages, the last of which
70 // is made unreadable via mprotect. Note that we need to allocate the
71 // memory with mmap(), as POSIX allows mprotect() only on memory allocated
72 // with mmap(), and some malloc/posix_memalign implementations expect to
73 // be able to read previously allocated memory while doing heap allocations.
74 class DataEndingAtUnreadablePage {
75  public:
DataEndingAtUnreadablePage(const string & s)76   explicit DataEndingAtUnreadablePage(const string& s) {
77     const size_t page_size = sysconf(_SC_PAGESIZE);
78     const size_t size = s.size();
79     // Round up space for string to a multiple of page_size.
80     size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
81     alloc_size_ = space_for_string + page_size;
82     mem_ = mmap(NULL, alloc_size_,
83                 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
84     CHECK_NE(MAP_FAILED, mem_);
85     protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
86     char* dst = protected_page_ - size;
87     memcpy(dst, s.data(), size);
88     data_ = dst;
89     size_ = size;
90     // Make guard page unreadable.
91     CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
92   }
93 
~DataEndingAtUnreadablePage()94   ~DataEndingAtUnreadablePage() {
95     const size_t page_size = sysconf(_SC_PAGESIZE);
96     // Undo the mprotect.
97     CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
98     CHECK_EQ(0, munmap(mem_, alloc_size_));
99   }
100 
data() const101   const char* data() const { return data_; }
size() const102   size_t size() const { return size_; }
103 
104  private:
105   size_t alloc_size_;
106   void* mem_;
107   char* protected_page_;
108   const char* data_;
109   size_t size_;
110 };
111 
112 #else  // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
113 
114 // Fallback for systems without mmap.
115 typedef string DataEndingAtUnreadablePage;
116 
117 #endif
118 
119 enum CompressorType {
120   ZLIB, LZO, SNAPPY
121 };
122 
123 const char* names[] = {
124   "ZLIB", "LZO", "SNAPPY"
125 };
126 
MinimumRequiredOutputSpace(size_t input_size,CompressorType comp)127 static size_t MinimumRequiredOutputSpace(size_t input_size,
128                                          CompressorType comp) {
129   switch (comp) {
130 #ifdef ZLIB_VERSION
131     case ZLIB:
132       return ZLib::MinCompressbufSize(input_size);
133 #endif  // ZLIB_VERSION
134 
135 #ifdef LZO_VERSION
136     case LZO:
137       return input_size + input_size/64 + 16 + 3;
138 #endif  // LZO_VERSION
139 
140     case SNAPPY:
141       return snappy::MaxCompressedLength(input_size);
142 
143     default:
144       LOG(FATAL) << "Unknown compression type number " << comp;
145       return 0;
146   }
147 }
148 
149 // Returns true if we successfully compressed, false otherwise.
150 //
151 // If compressed_is_preallocated is set, do not resize the compressed buffer.
152 // This is typically what you want for a benchmark, in order to not spend
153 // time in the memory allocator. If you do set this flag, however,
154 // "compressed" must be preinitialized to at least MinCompressbufSize(comp)
155 // number of bytes, and may contain junk bytes at the end after return.
Compress(const char * input,size_t input_size,CompressorType comp,string * compressed,bool compressed_is_preallocated)156 static bool Compress(const char* input, size_t input_size, CompressorType comp,
157                      string* compressed, bool compressed_is_preallocated) {
158   if (!compressed_is_preallocated) {
159     compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
160   }
161 
162   switch (comp) {
163 #ifdef ZLIB_VERSION
164     case ZLIB: {
165       ZLib zlib;
166       uLongf destlen = compressed->size();
167       int ret = zlib.Compress(
168           reinterpret_cast<Bytef*>(string_as_array(compressed)),
169           &destlen,
170           reinterpret_cast<const Bytef*>(input),
171           input_size);
172       CHECK_EQ(Z_OK, ret);
173       if (!compressed_is_preallocated) {
174         compressed->resize(destlen);
175       }
176       return true;
177     }
178 #endif  // ZLIB_VERSION
179 
180 #ifdef LZO_VERSION
181     case LZO: {
182       unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
183       lzo_uint destlen;
184       int ret = lzo1x_1_15_compress(
185           reinterpret_cast<const uint8*>(input),
186           input_size,
187           reinterpret_cast<uint8*>(string_as_array(compressed)),
188           &destlen,
189           mem);
190       CHECK_EQ(LZO_E_OK, ret);
191       delete[] mem;
192       if (!compressed_is_preallocated) {
193         compressed->resize(destlen);
194       }
195       break;
196     }
197 #endif  // LZO_VERSION
198 
199     case SNAPPY: {
200       size_t destlen;
201       snappy::RawCompress(input, input_size,
202                           string_as_array(compressed),
203                           &destlen);
204       CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
205       if (!compressed_is_preallocated) {
206         compressed->resize(destlen);
207       }
208       break;
209     }
210 
211     default: {
212       return false;     // the asked-for library wasn't compiled in
213     }
214   }
215   return true;
216 }
217 
Uncompress(const string & compressed,CompressorType comp,int size,string * output)218 static bool Uncompress(const string& compressed, CompressorType comp,
219                        int size, string* output) {
220   switch (comp) {
221 #ifdef ZLIB_VERSION
222     case ZLIB: {
223       output->resize(size);
224       ZLib zlib;
225       uLongf destlen = output->size();
226       int ret = zlib.Uncompress(
227           reinterpret_cast<Bytef*>(string_as_array(output)),
228           &destlen,
229           reinterpret_cast<const Bytef*>(compressed.data()),
230           compressed.size());
231       CHECK_EQ(Z_OK, ret);
232       CHECK_EQ(static_cast<uLongf>(size), destlen);
233       break;
234     }
235 #endif  // ZLIB_VERSION
236 
237 #ifdef LZO_VERSION
238     case LZO: {
239       output->resize(size);
240       lzo_uint destlen;
241       int ret = lzo1x_decompress(
242           reinterpret_cast<const uint8*>(compressed.data()),
243           compressed.size(),
244           reinterpret_cast<uint8*>(string_as_array(output)),
245           &destlen,
246           NULL);
247       CHECK_EQ(LZO_E_OK, ret);
248       CHECK_EQ(static_cast<lzo_uint>(size), destlen);
249       break;
250     }
251 #endif  // LZO_VERSION
252 
253     case SNAPPY: {
254       snappy::RawUncompress(compressed.data(), compressed.size(),
255                             string_as_array(output));
256       break;
257     }
258 
259     default: {
260       return false;     // the asked-for library wasn't compiled in
261     }
262   }
263   return true;
264 }
265 
Measure(const char * data,size_t length,CompressorType comp,int repeats,int block_size)266 static void Measure(const char* data,
267                     size_t length,
268                     CompressorType comp,
269                     int repeats,
270                     int block_size) {
271   // Run tests a few time and pick median running times
272   static const int kRuns = 5;
273   double ctime[kRuns];
274   double utime[kRuns];
275   int compressed_size = 0;
276 
277   {
278     // Chop the input into blocks
279     int num_blocks = (length + block_size - 1) / block_size;
280     std::vector<const char*> input(num_blocks);
281     std::vector<size_t> input_length(num_blocks);
282     std::vector<string> compressed(num_blocks);
283     std::vector<string> output(num_blocks);
284     for (int b = 0; b < num_blocks; b++) {
285       int input_start = b * block_size;
286       int input_limit = std::min<int>((b+1)*block_size, length);
287       input[b] = data+input_start;
288       input_length[b] = input_limit-input_start;
289 
290       // Pre-grow the output buffer so we don't measure string append time.
291       compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
292     }
293 
294     // First, try one trial compression to make sure the code is compiled in
295     if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
296       LOG(WARNING) << "Skipping " << names[comp] << ": "
297                    << "library not compiled in";
298       return;
299     }
300 
301     for (int run = 0; run < kRuns; run++) {
302       CycleTimer ctimer, utimer;
303 
304       for (int b = 0; b < num_blocks; b++) {
305         // Pre-grow the output buffer so we don't measure string append time.
306         compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
307       }
308 
309       ctimer.Start();
310       for (int b = 0; b < num_blocks; b++)
311         for (int i = 0; i < repeats; i++)
312           Compress(input[b], input_length[b], comp, &compressed[b], true);
313       ctimer.Stop();
314 
315       // Compress once more, with resizing, so we don't leave junk
316       // at the end that will confuse the decompressor.
317       for (int b = 0; b < num_blocks; b++) {
318         Compress(input[b], input_length[b], comp, &compressed[b], false);
319       }
320 
321       for (int b = 0; b < num_blocks; b++) {
322         output[b].resize(input_length[b]);
323       }
324 
325       utimer.Start();
326       for (int i = 0; i < repeats; i++)
327         for (int b = 0; b < num_blocks; b++)
328           Uncompress(compressed[b], comp, input_length[b], &output[b]);
329       utimer.Stop();
330 
331       ctime[run] = ctimer.Get();
332       utime[run] = utimer.Get();
333     }
334 
335     compressed_size = 0;
336     for (size_t i = 0; i < compressed.size(); i++) {
337       compressed_size += compressed[i].size();
338     }
339   }
340 
341   std::sort(ctime, ctime + kRuns);
342   std::sort(utime, utime + kRuns);
343   const int med = kRuns/2;
344 
345   float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
346   float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
347   string x = names[comp];
348   x += ":";
349   string urate = (uncomp_rate >= 0)
350                  ? StringPrintf("%.1f", uncomp_rate)
351                  : string("?");
352   printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%%  "
353          "comp %5.1f MB/s  uncomp %5s MB/s\n",
354          x.c_str(),
355          block_size/(1<<20),
356          static_cast<int>(length), static_cast<uint32>(compressed_size),
357          (compressed_size * 100.0) / std::max<int>(1, length),
358          comp_rate,
359          urate.c_str());
360 }
361 
VerifyString(const string & input)362 static int VerifyString(const string& input) {
363   string compressed;
364   DataEndingAtUnreadablePage i(input);
365   const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
366   CHECK_EQ(written, compressed.size());
367   CHECK_LE(compressed.size(),
368            snappy::MaxCompressedLength(input.size()));
369   CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
370 
371   string uncompressed;
372   DataEndingAtUnreadablePage c(compressed);
373   CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
374   CHECK_EQ(uncompressed, input);
375   return uncompressed.size();
376 }
377 
VerifyStringSink(const string & input)378 static void VerifyStringSink(const string& input) {
379   string compressed;
380   DataEndingAtUnreadablePage i(input);
381   const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
382   CHECK_EQ(written, compressed.size());
383   CHECK_LE(compressed.size(),
384            snappy::MaxCompressedLength(input.size()));
385   CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
386 
387   string uncompressed;
388   uncompressed.resize(input.size());
389   snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
390   DataEndingAtUnreadablePage c(compressed);
391   snappy::ByteArraySource source(c.data(), c.size());
392   CHECK(snappy::Uncompress(&source, &sink));
393   CHECK_EQ(uncompressed, input);
394 }
395 
VerifyIOVec(const string & input)396 static void VerifyIOVec(const string& input) {
397   string compressed;
398   DataEndingAtUnreadablePage i(input);
399   const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
400   CHECK_EQ(written, compressed.size());
401   CHECK_LE(compressed.size(),
402            snappy::MaxCompressedLength(input.size()));
403   CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
404 
405   // Try uncompressing into an iovec containing a random number of entries
406   // ranging from 1 to 10.
407   char* buf = new char[input.size()];
408   ACMRandom rnd(input.size());
409   size_t num = rnd.Next() % 10 + 1;
410   if (input.size() < num) {
411     num = input.size();
412   }
413   struct iovec* iov = new iovec[num];
414   int used_so_far = 0;
415   for (size_t i = 0; i < num; ++i) {
416     iov[i].iov_base = buf + used_so_far;
417     if (i == num - 1) {
418       iov[i].iov_len = input.size() - used_so_far;
419     } else {
420       // Randomly choose to insert a 0 byte entry.
421       if (rnd.OneIn(5)) {
422         iov[i].iov_len = 0;
423       } else {
424         iov[i].iov_len = rnd.Uniform(input.size());
425       }
426     }
427     used_so_far += iov[i].iov_len;
428   }
429   CHECK(snappy::RawUncompressToIOVec(
430       compressed.data(), compressed.size(), iov, num));
431   CHECK(!memcmp(buf, input.data(), input.size()));
432   delete[] iov;
433   delete[] buf;
434 }
435 
436 // Test that data compressed by a compressor that does not
437 // obey block sizes is uncompressed properly.
VerifyNonBlockedCompression(const string & input)438 static void VerifyNonBlockedCompression(const string& input) {
439   if (input.length() > snappy::kBlockSize) {
440     // We cannot test larger blocks than the maximum block size, obviously.
441     return;
442   }
443 
444   string prefix;
445   Varint::Append32(&prefix, input.size());
446 
447   // Setup compression table
448   snappy::internal::WorkingMemory wmem;
449   int table_size;
450   uint16* table = wmem.GetHashTable(input.size(), &table_size);
451 
452   // Compress entire input in one shot
453   string compressed;
454   compressed += prefix;
455   compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
456   char* dest = string_as_array(&compressed) + prefix.size();
457   char* end = snappy::internal::CompressFragment(input.data(), input.size(),
458                                                 dest, table, table_size);
459   compressed.resize(end - compressed.data());
460 
461   // Uncompress into string
462   string uncomp_str;
463   CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
464   CHECK_EQ(uncomp_str, input);
465 
466   // Uncompress using source/sink
467   string uncomp_str2;
468   uncomp_str2.resize(input.size());
469   snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
470   snappy::ByteArraySource source(compressed.data(), compressed.size());
471   CHECK(snappy::Uncompress(&source, &sink));
472   CHECK_EQ(uncomp_str2, input);
473 
474   // Uncompress into iovec
475   {
476     static const int kNumBlocks = 10;
477     struct iovec vec[kNumBlocks];
478     const int block_size = 1 + input.size() / kNumBlocks;
479     string iovec_data(block_size * kNumBlocks, 'x');
480     for (int i = 0; i < kNumBlocks; i++) {
481       vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
482       vec[i].iov_len = block_size;
483     }
484     CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
485                                        vec, kNumBlocks));
486     CHECK_EQ(string(iovec_data.data(), input.size()), input);
487   }
488 }
489 
490 // Expand the input so that it is at least K times as big as block size
Expand(const string & input)491 static string Expand(const string& input) {
492   static const int K = 3;
493   string data = input;
494   while (data.size() < K * snappy::kBlockSize) {
495     data += input;
496   }
497   return data;
498 }
499 
Verify(const string & input)500 static int Verify(const string& input) {
501   VLOG(1) << "Verifying input of size " << input.size();
502 
503   // Compress using string based routines
504   const int result = VerifyString(input);
505 
506   // Verify using sink based routines
507   VerifyStringSink(input);
508 
509   VerifyNonBlockedCompression(input);
510   VerifyIOVec(input);
511   if (!input.empty()) {
512     const string expanded = Expand(input);
513     VerifyNonBlockedCompression(expanded);
514     VerifyIOVec(input);
515   }
516 
517   return result;
518 }
519 
520 
IsValidCompressedBuffer(const string & c)521 static bool IsValidCompressedBuffer(const string& c) {
522   return snappy::IsValidCompressedBuffer(c.data(), c.size());
523 }
Uncompress(const string & c,string * u)524 static bool Uncompress(const string& c, string* u) {
525   return snappy::Uncompress(c.data(), c.size(), u);
526 }
527 
528 // This test checks to ensure that snappy doesn't coredump if it gets
529 // corrupted data.
TEST(CorruptedTest,VerifyCorrupted)530 TEST(CorruptedTest, VerifyCorrupted) {
531   string source = "making sure we don't crash with corrupted input";
532   VLOG(1) << source;
533   string dest;
534   string uncmp;
535   snappy::Compress(source.data(), source.size(), &dest);
536 
537   // Mess around with the data. It's hard to simulate all possible
538   // corruptions; this is just one example ...
539   CHECK_GT(dest.size(), 3);
540   dest[1]--;
541   dest[3]++;
542   // this really ought to fail.
543   CHECK(!IsValidCompressedBuffer(dest));
544   CHECK(!Uncompress(dest, &uncmp));
545 
546   // This is testing for a security bug - a buffer that decompresses to 100k
547   // but we lie in the snappy header and only reserve 0 bytes of memory :)
548   source.resize(100000);
549   for (size_t i = 0; i < source.length(); ++i) {
550     source[i] = 'A';
551   }
552   snappy::Compress(source.data(), source.size(), &dest);
553   dest[0] = dest[1] = dest[2] = dest[3] = 0;
554   CHECK(!IsValidCompressedBuffer(dest));
555   CHECK(!Uncompress(dest, &uncmp));
556 
557   if (sizeof(void *) == 4) {
558     // Another security check; check a crazy big length can't DoS us with an
559     // over-allocation.
560     // Currently this is done only for 32-bit builds.  On 64-bit builds,
561     // where 3 GB might be an acceptable allocation size, Uncompress()
562     // attempts to decompress, and sometimes causes the test to run out of
563     // memory.
564     dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
565     // This decodes to a really large size, i.e., about 3 GB.
566     dest[4] = 'k';
567     CHECK(!IsValidCompressedBuffer(dest));
568     CHECK(!Uncompress(dest, &uncmp));
569   } else {
570     LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
571   }
572 
573   // This decodes to about 2 MB; much smaller, but should still fail.
574   dest[0] = dest[1] = dest[2] = '\xff';
575   dest[3] = 0x00;
576   CHECK(!IsValidCompressedBuffer(dest));
577   CHECK(!Uncompress(dest, &uncmp));
578 
579   // try reading stuff in from a bad file.
580   for (int i = 1; i <= 3; ++i) {
581     string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str(),
582                                    0);
583     string uncmp;
584     // check that we don't return a crazy length
585     size_t ulen;
586     CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
587           || (ulen < (1<<20)));
588     uint32 ulen2;
589     snappy::ByteArraySource source(data.data(), data.size());
590     CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
591           (ulen2 < (1<<20)));
592     CHECK(!IsValidCompressedBuffer(data));
593     CHECK(!Uncompress(data, &uncmp));
594   }
595 }
596 
597 // Helper routines to construct arbitrary compressed strings.
598 // These mirror the compression code in snappy.cc, but are copied
599 // here so that we can bypass some limitations in the how snappy.cc
600 // invokes these routines.
AppendLiteral(string * dst,const string & literal)601 static void AppendLiteral(string* dst, const string& literal) {
602   if (literal.empty()) return;
603   int n = literal.size() - 1;
604   if (n < 60) {
605     // Fit length in tag byte
606     dst->push_back(0 | (n << 2));
607   } else {
608     // Encode in upcoming bytes
609     char number[4];
610     int count = 0;
611     while (n > 0) {
612       number[count++] = n & 0xff;
613       n >>= 8;
614     }
615     dst->push_back(0 | ((59+count) << 2));
616     *dst += string(number, count);
617   }
618   *dst += literal;
619 }
620 
AppendCopy(string * dst,int offset,int length)621 static void AppendCopy(string* dst, int offset, int length) {
622   while (length > 0) {
623     // Figure out how much to copy in one shot
624     int to_copy;
625     if (length >= 68) {
626       to_copy = 64;
627     } else if (length > 64) {
628       to_copy = 60;
629     } else {
630       to_copy = length;
631     }
632     length -= to_copy;
633 
634     if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) {
635       assert(to_copy-4 < 8);            // Must fit in 3 bits
636       dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5));
637       dst->push_back(offset & 0xff);
638     } else if (offset < 65536) {
639       dst->push_back(2 | ((to_copy-1) << 2));
640       dst->push_back(offset & 0xff);
641       dst->push_back(offset >> 8);
642     } else {
643       dst->push_back(3 | ((to_copy-1) << 2));
644       dst->push_back(offset & 0xff);
645       dst->push_back((offset >> 8) & 0xff);
646       dst->push_back((offset >> 16) & 0xff);
647       dst->push_back((offset >> 24) & 0xff);
648     }
649   }
650 }
651 
TEST(Snappy,SimpleTests)652 TEST(Snappy, SimpleTests) {
653   Verify("");
654   Verify("a");
655   Verify("ab");
656   Verify("abc");
657 
658   Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
659   Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
660   Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
661   Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
662   Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
663 }
664 
665 // Verify max blowup (lots of four-byte copies)
TEST(Snappy,MaxBlowup)666 TEST(Snappy, MaxBlowup) {
667   string input;
668   for (int i = 0; i < 20000; i++) {
669     ACMRandom rnd(i);
670     uint32 bytes = static_cast<uint32>(rnd.Next());
671     input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
672   }
673   for (int i = 19999; i >= 0; i--) {
674     ACMRandom rnd(i);
675     uint32 bytes = static_cast<uint32>(rnd.Next());
676     input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
677   }
678   Verify(input);
679 }
680 
TEST(Snappy,RandomData)681 TEST(Snappy, RandomData) {
682   ACMRandom rnd(FLAGS_test_random_seed);
683 
684   const int num_ops = 20000;
685   for (int i = 0; i < num_ops; i++) {
686     if ((i % 1000) == 0) {
687       VLOG(0) << "Random op " << i << " of " << num_ops;
688     }
689 
690     string x;
691     size_t len = rnd.Uniform(4096);
692     if (i < 100) {
693       len = 65536 + rnd.Uniform(65536);
694     }
695     while (x.size() < len) {
696       int run_len = 1;
697       if (rnd.OneIn(10)) {
698         run_len = rnd.Skewed(8);
699       }
700       char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
701       while (run_len-- > 0 && x.size() < len) {
702         x += c;
703       }
704     }
705 
706     Verify(x);
707   }
708 }
709 
TEST(Snappy,FourByteOffset)710 TEST(Snappy, FourByteOffset) {
711   // The new compressor cannot generate four-byte offsets since
712   // it chops up the input into 32KB pieces.  So we hand-emit the
713   // copy manually.
714 
715   // The two fragments that make up the input string.
716   string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
717   string fragment2 = "some other string";
718 
719   // How many times each fragment is emitted.
720   const int n1 = 2;
721   const int n2 = 100000 / fragment2.size();
722   const int length = n1 * fragment1.size() + n2 * fragment2.size();
723 
724   string compressed;
725   Varint::Append32(&compressed, length);
726 
727   AppendLiteral(&compressed, fragment1);
728   string src = fragment1;
729   for (int i = 0; i < n2; i++) {
730     AppendLiteral(&compressed, fragment2);
731     src += fragment2;
732   }
733   AppendCopy(&compressed, src.size(), fragment1.size());
734   src += fragment1;
735   CHECK_EQ(length, src.size());
736 
737   string uncompressed;
738   CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
739   CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
740                            &uncompressed));
741   CHECK_EQ(uncompressed, src);
742 }
743 
TEST(Snappy,IOVecEdgeCases)744 TEST(Snappy, IOVecEdgeCases) {
745   // Test some tricky edge cases in the iovec output that are not necessarily
746   // exercised by random tests.
747 
748   // Our output blocks look like this initially (the last iovec is bigger
749   // than depicted):
750   // [  ] [ ] [    ] [        ] [        ]
751   static const int kLengths[] = { 2, 1, 4, 8, 128 };
752 
753   struct iovec iov[ARRAYSIZE(kLengths)];
754   for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
755     iov[i].iov_base = new char[kLengths[i]];
756     iov[i].iov_len = kLengths[i];
757   }
758 
759   string compressed;
760   Varint::Append32(&compressed, 22);
761 
762   // A literal whose output crosses three blocks.
763   // [ab] [c] [123 ] [        ] [        ]
764   AppendLiteral(&compressed, "abc123");
765 
766   // A copy whose output crosses two blocks (source and destination
767   // segments marked).
768   // [ab] [c] [1231] [23      ] [        ]
769   //           ^--^   --
770   AppendCopy(&compressed, 3, 3);
771 
772   // A copy where the input is, at first, in the block before the output:
773   //
774   // [ab] [c] [1231] [231231  ] [        ]
775   //           ^---     ^---
776   // Then during the copy, the pointers move such that the input and
777   // output pointers are in the same block:
778   //
779   // [ab] [c] [1231] [23123123] [        ]
780   //                  ^-    ^-
781   // And then they move again, so that the output pointer is no longer
782   // in the same block as the input pointer:
783   // [ab] [c] [1231] [23123123] [123     ]
784   //                    ^--      ^--
785   AppendCopy(&compressed, 6, 9);
786 
787   // Finally, a copy where the input is from several blocks back,
788   // and it also crosses three blocks:
789   //
790   // [ab] [c] [1231] [23123123] [123b    ]
791   //   ^                            ^
792   // [ab] [c] [1231] [23123123] [123bc   ]
793   //       ^                         ^
794   // [ab] [c] [1231] [23123123] [123bc12 ]
795   //           ^-                     ^-
796   AppendCopy(&compressed, 17, 4);
797 
798   CHECK(snappy::RawUncompressToIOVec(
799       compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
800   CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2));
801   CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1));
802   CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4));
803   CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8));
804   CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7));
805 
806   for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
807     delete[] reinterpret_cast<char *>(iov[i].iov_base);
808   }
809 }
810 
TEST(Snappy,IOVecLiteralOverflow)811 TEST(Snappy, IOVecLiteralOverflow) {
812   static const int kLengths[] = { 3, 4 };
813 
814   struct iovec iov[ARRAYSIZE(kLengths)];
815   for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
816     iov[i].iov_base = new char[kLengths[i]];
817     iov[i].iov_len = kLengths[i];
818   }
819 
820   string compressed;
821   Varint::Append32(&compressed, 8);
822 
823   AppendLiteral(&compressed, "12345678");
824 
825   CHECK(!snappy::RawUncompressToIOVec(
826       compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
827 
828   for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
829     delete[] reinterpret_cast<char *>(iov[i].iov_base);
830   }
831 }
832 
TEST(Snappy,IOVecCopyOverflow)833 TEST(Snappy, IOVecCopyOverflow) {
834   static const int kLengths[] = { 3, 4 };
835 
836   struct iovec iov[ARRAYSIZE(kLengths)];
837   for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
838     iov[i].iov_base = new char[kLengths[i]];
839     iov[i].iov_len = kLengths[i];
840   }
841 
842   string compressed;
843   Varint::Append32(&compressed, 8);
844 
845   AppendLiteral(&compressed, "123");
846   AppendCopy(&compressed, 3, 5);
847 
848   CHECK(!snappy::RawUncompressToIOVec(
849       compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
850 
851   for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
852     delete[] reinterpret_cast<char *>(iov[i].iov_base);
853   }
854 }
855 
CheckUncompressedLength(const string & compressed,size_t * ulength)856 static bool CheckUncompressedLength(const string& compressed,
857                                     size_t* ulength) {
858   const bool result1 = snappy::GetUncompressedLength(compressed.data(),
859                                                      compressed.size(),
860                                                      ulength);
861 
862   snappy::ByteArraySource source(compressed.data(), compressed.size());
863   uint32 length;
864   const bool result2 = snappy::GetUncompressedLength(&source, &length);
865   CHECK_EQ(result1, result2);
866   return result1;
867 }
868 
TEST(SnappyCorruption,TruncatedVarint)869 TEST(SnappyCorruption, TruncatedVarint) {
870   string compressed, uncompressed;
871   size_t ulength;
872   compressed.push_back('\xf0');
873   CHECK(!CheckUncompressedLength(compressed, &ulength));
874   CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
875   CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
876                             &uncompressed));
877 }
878 
TEST(SnappyCorruption,UnterminatedVarint)879 TEST(SnappyCorruption, UnterminatedVarint) {
880   string compressed, uncompressed;
881   size_t ulength;
882   compressed.push_back('\x80');
883   compressed.push_back('\x80');
884   compressed.push_back('\x80');
885   compressed.push_back('\x80');
886   compressed.push_back('\x80');
887   compressed.push_back(10);
888   CHECK(!CheckUncompressedLength(compressed, &ulength));
889   CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
890   CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
891                             &uncompressed));
892 }
893 
TEST(SnappyCorruption,OverflowingVarint)894 TEST(SnappyCorruption, OverflowingVarint) {
895   string compressed, uncompressed;
896   size_t ulength;
897   compressed.push_back('\xfb');
898   compressed.push_back('\xff');
899   compressed.push_back('\xff');
900   compressed.push_back('\xff');
901   compressed.push_back('\x7f');
902   CHECK(!CheckUncompressedLength(compressed, &ulength));
903   CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
904   CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
905                             &uncompressed));
906 }
907 
TEST(Snappy,ReadPastEndOfBuffer)908 TEST(Snappy, ReadPastEndOfBuffer) {
909   // Check that we do not read past end of input
910 
911   // Make a compressed string that ends with a single-byte literal
912   string compressed;
913   Varint::Append32(&compressed, 1);
914   AppendLiteral(&compressed, "x");
915 
916   string uncompressed;
917   DataEndingAtUnreadablePage c(compressed);
918   CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
919   CHECK_EQ(uncompressed, string("x"));
920 }
921 
922 // Check for an infinite loop caused by a copy with offset==0
TEST(Snappy,ZeroOffsetCopy)923 TEST(Snappy, ZeroOffsetCopy) {
924   const char* compressed = "\x40\x12\x00\x00";
925   //  \x40              Length (must be > kMaxIncrementCopyOverflow)
926   //  \x12\x00\x00      Copy with offset==0, length==5
927   char uncompressed[100];
928   EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
929 }
930 
TEST(Snappy,ZeroOffsetCopyValidation)931 TEST(Snappy, ZeroOffsetCopyValidation) {
932   const char* compressed = "\x05\x12\x00\x00";
933   //  \x05              Length
934   //  \x12\x00\x00      Copy with offset==0, length==5
935   EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
936 }
937 
938 namespace {
939 
TestFindMatchLength(const char * s1,const char * s2,unsigned length)940 int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
941   std::pair<size_t, bool> p =
942       snappy::internal::FindMatchLength(s1, s2, s2 + length);
943   CHECK_EQ(p.first < 8, p.second);
944   return p.first;
945 }
946 
947 }  // namespace
948 
TEST(Snappy,FindMatchLength)949 TEST(Snappy, FindMatchLength) {
950   // Exercise all different code paths through the function.
951   // 64-bit version:
952 
953   // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
954   EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
955   EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
956 
957   // Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
958   EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
959 
960   // Same, but edge cases.
961   EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
962   EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
963 
964   // Find non-match at once in first loop.
965   EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
966   EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
967   EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
968   EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
969 
970   // Find non-match in first loop after one block.
971   EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
972                                    "abcdefgh?1234567xxxxxxxx", 24));
973   EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
974                                    "abcdefgh0?234567xxxxxxxx", 24));
975   EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
976                                     "abcdefgh01237654xxxxxxxx", 24));
977   EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
978                                     "abcdefgh0123456?xxxxxxxx", 24));
979 
980   // 32-bit version:
981 
982   // Short matches.
983   EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
984   EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
985   EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
986   EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
987   EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
988   EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
989   EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
990   EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
991   EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
992   EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
993 
994   // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
995   EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
996   EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
997   EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
998 
999   // Same, but edge cases.
1000   EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
1001   EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
1002 
1003   // Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
1004   EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
1005 
1006   // Find non-match at once in first loop.
1007   EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1008                                    "xxxxxx?123xxxxxxxx", 18));
1009   EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1010                                    "xxxxxx0?23xxxxxxxx", 18));
1011   EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1012                                    "xxxxxx0132xxxxxxxx", 18));
1013   EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1014                                    "xxxxxx012?xxxxxxxx", 18));
1015 
1016   // Same, but edge cases.
1017   EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
1018   EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
1019   EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
1020   EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
1021 
1022   // Find non-match in first loop after one block.
1023   EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
1024                                     "xxxxxxabcd?123xx", 16));
1025   EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
1026                                     "xxxxxxabcd0?23xx", 16));
1027   EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
1028                                     "xxxxxxabcd0132xx", 16));
1029   EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
1030                                     "xxxxxxabcd012?xx", 16));
1031 
1032   // Same, but edge cases.
1033   EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
1034   EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
1035   EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
1036   EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
1037 }
1038 
TEST(Snappy,FindMatchLengthRandom)1039 TEST(Snappy, FindMatchLengthRandom) {
1040   const int kNumTrials = 10000;
1041   const int kTypicalLength = 10;
1042   ACMRandom rnd(FLAGS_test_random_seed);
1043 
1044   for (int i = 0; i < kNumTrials; i++) {
1045     string s, t;
1046     char a = rnd.Rand8();
1047     char b = rnd.Rand8();
1048     while (!rnd.OneIn(kTypicalLength)) {
1049       s.push_back(rnd.OneIn(2) ? a : b);
1050       t.push_back(rnd.OneIn(2) ? a : b);
1051     }
1052     DataEndingAtUnreadablePage u(s);
1053     DataEndingAtUnreadablePage v(t);
1054     int matched = TestFindMatchLength(u.data(), v.data(), t.size());
1055     if (matched == t.size()) {
1056       EXPECT_EQ(s, t);
1057     } else {
1058       EXPECT_NE(s[matched], t[matched]);
1059       for (int j = 0; j < matched; j++) {
1060         EXPECT_EQ(s[j], t[j]);
1061       }
1062     }
1063   }
1064 }
1065 
MakeEntry(unsigned int extra,unsigned int len,unsigned int copy_offset)1066 static uint16 MakeEntry(unsigned int extra,
1067                         unsigned int len,
1068                         unsigned int copy_offset) {
1069   // Check that all of the fields fit within the allocated space
1070   assert(extra       == (extra & 0x7));          // At most 3 bits
1071   assert(copy_offset == (copy_offset & 0x7));    // At most 3 bits
1072   assert(len         == (len & 0x7f));           // At most 7 bits
1073   return len | (copy_offset << 8) | (extra << 11);
1074 }
1075 
1076 // Check that the decompression table is correct, and optionally print out
1077 // the computed one.
TEST(Snappy,VerifyCharTable)1078 TEST(Snappy, VerifyCharTable) {
1079   using snappy::internal::LITERAL;
1080   using snappy::internal::COPY_1_BYTE_OFFSET;
1081   using snappy::internal::COPY_2_BYTE_OFFSET;
1082   using snappy::internal::COPY_4_BYTE_OFFSET;
1083   using snappy::internal::char_table;
1084 
1085   uint16 dst[256];
1086 
1087   // Place invalid entries in all places to detect missing initialization
1088   int assigned = 0;
1089   for (int i = 0; i < 256; i++) {
1090     dst[i] = 0xffff;
1091   }
1092 
1093   // Small LITERAL entries.  We store (len-1) in the top 6 bits.
1094   for (unsigned int len = 1; len <= 60; len++) {
1095     dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
1096     assigned++;
1097   }
1098 
1099   // Large LITERAL entries.  We use 60..63 in the high 6 bits to
1100   // encode the number of bytes of length info that follow the opcode.
1101   for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
1102     // We set the length field in the lookup table to 1 because extra
1103     // bytes encode len-1.
1104     dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
1105     assigned++;
1106   }
1107 
1108   // COPY_1_BYTE_OFFSET.
1109   //
1110   // The tag byte in the compressed data stores len-4 in 3 bits, and
1111   // offset/256 in 5 bits.  offset%256 is stored in the next byte.
1112   //
1113   // This format is used for length in range [4..11] and offset in
1114   // range [0..2047]
1115   for (unsigned int len = 4; len < 12; len++) {
1116     for (unsigned int offset = 0; offset < 2048; offset += 256) {
1117       dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
1118         MakeEntry(1, len, offset>>8);
1119       assigned++;
1120     }
1121   }
1122 
1123   // COPY_2_BYTE_OFFSET.
1124   // Tag contains len-1 in top 6 bits, and offset in next two bytes.
1125   for (unsigned int len = 1; len <= 64; len++) {
1126     dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
1127     assigned++;
1128   }
1129 
1130   // COPY_4_BYTE_OFFSET.
1131   // Tag contents len-1 in top 6 bits, and offset in next four bytes.
1132   for (unsigned int len = 1; len <= 64; len++) {
1133     dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
1134     assigned++;
1135   }
1136 
1137   // Check that each entry was initialized exactly once.
1138   EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
1139   for (int i = 0; i < 256; i++) {
1140     EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
1141   }
1142 
1143   if (FLAGS_snappy_dump_decompression_table) {
1144     printf("static const uint16 char_table[256] = {\n  ");
1145     for (int i = 0; i < 256; i++) {
1146       printf("0x%04x%s",
1147              dst[i],
1148              ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n  " : ", ")));
1149     }
1150     printf("};\n");
1151   }
1152 
1153   // Check that computed table matched recorded table.
1154   for (int i = 0; i < 256; i++) {
1155     EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
1156   }
1157 }
1158 
CompressFile(const char * fname)1159 static void CompressFile(const char* fname) {
1160   string fullinput;
1161   CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1162 
1163   string compressed;
1164   Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
1165 
1166   CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
1167                              file::Defaults()));
1168 }
1169 
UncompressFile(const char * fname)1170 static void UncompressFile(const char* fname) {
1171   string fullinput;
1172   CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1173 
1174   size_t uncompLength;
1175   CHECK(CheckUncompressedLength(fullinput, &uncompLength));
1176 
1177   string uncompressed;
1178   uncompressed.resize(uncompLength);
1179   CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
1180 
1181   CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
1182                              file::Defaults()));
1183 }
1184 
MeasureFile(const char * fname)1185 static void MeasureFile(const char* fname) {
1186   string fullinput;
1187   CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1188   printf("%-40s :\n", fname);
1189 
1190   int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
1191   int end_len = fullinput.size();
1192   if (FLAGS_end_len >= 0) {
1193     end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
1194   }
1195   for (int len = start_len; len <= end_len; len++) {
1196     const char* const input = fullinput.data();
1197     int repeats = (FLAGS_bytes + len) / (len + 1);
1198     if (FLAGS_zlib)     Measure(input, len, ZLIB, repeats, 1024<<10);
1199     if (FLAGS_lzo)      Measure(input, len, LZO, repeats, 1024<<10);
1200     if (FLAGS_snappy)    Measure(input, len, SNAPPY, repeats, 4096<<10);
1201 
1202     // For block-size based measurements
1203     if (0 && FLAGS_snappy) {
1204       Measure(input, len, SNAPPY, repeats, 8<<10);
1205       Measure(input, len, SNAPPY, repeats, 16<<10);
1206       Measure(input, len, SNAPPY, repeats, 32<<10);
1207       Measure(input, len, SNAPPY, repeats, 64<<10);
1208       Measure(input, len, SNAPPY, repeats, 256<<10);
1209       Measure(input, len, SNAPPY, repeats, 1024<<10);
1210     }
1211   }
1212 }
1213 
1214 static struct {
1215   const char* label;
1216   const char* filename;
1217   size_t size_limit;
1218 } files[] = {
1219   { "html", "html", 0 },
1220   { "urls", "urls.10K", 0 },
1221   { "jpg", "fireworks.jpeg", 0 },
1222   { "jpg_200", "fireworks.jpeg", 200 },
1223   { "pdf", "paper-100k.pdf", 0 },
1224   { "html4", "html_x_4", 0 },
1225   { "txt1", "alice29.txt", 0 },
1226   { "txt2", "asyoulik.txt", 0 },
1227   { "txt3", "lcet10.txt", 0 },
1228   { "txt4", "plrabn12.txt", 0 },
1229   { "pb", "geo.protodata", 0 },
1230   { "gaviota", "kppkn.gtb", 0 },
1231 };
1232 
BM_UFlat(int iters,int arg)1233 static void BM_UFlat(int iters, int arg) {
1234   StopBenchmarkTiming();
1235 
1236   // Pick file to process based on "arg"
1237   CHECK_GE(arg, 0);
1238   CHECK_LT(arg, ARRAYSIZE(files));
1239   string contents = ReadTestDataFile(files[arg].filename,
1240                                      files[arg].size_limit);
1241 
1242   string zcontents;
1243   snappy::Compress(contents.data(), contents.size(), &zcontents);
1244   char* dst = new char[contents.size()];
1245 
1246   SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1247                              static_cast<int64>(contents.size()));
1248   SetBenchmarkLabel(files[arg].label);
1249   StartBenchmarkTiming();
1250   while (iters-- > 0) {
1251     CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
1252   }
1253   StopBenchmarkTiming();
1254 
1255   delete[] dst;
1256 }
1257 BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1258 
BM_UValidate(int iters,int arg)1259 static void BM_UValidate(int iters, int arg) {
1260   StopBenchmarkTiming();
1261 
1262   // Pick file to process based on "arg"
1263   CHECK_GE(arg, 0);
1264   CHECK_LT(arg, ARRAYSIZE(files));
1265   string contents = ReadTestDataFile(files[arg].filename,
1266                                      files[arg].size_limit);
1267 
1268   string zcontents;
1269   snappy::Compress(contents.data(), contents.size(), &zcontents);
1270 
1271   SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1272                              static_cast<int64>(contents.size()));
1273   SetBenchmarkLabel(files[arg].label);
1274   StartBenchmarkTiming();
1275   while (iters-- > 0) {
1276     CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
1277   }
1278   StopBenchmarkTiming();
1279 }
1280 BENCHMARK(BM_UValidate)->DenseRange(0, 4);
1281 
BM_UIOVec(int iters,int arg)1282 static void BM_UIOVec(int iters, int arg) {
1283   StopBenchmarkTiming();
1284 
1285   // Pick file to process based on "arg"
1286   CHECK_GE(arg, 0);
1287   CHECK_LT(arg, ARRAYSIZE(files));
1288   string contents = ReadTestDataFile(files[arg].filename,
1289                                      files[arg].size_limit);
1290 
1291   string zcontents;
1292   snappy::Compress(contents.data(), contents.size(), &zcontents);
1293 
1294   // Uncompress into an iovec containing ten entries.
1295   const int kNumEntries = 10;
1296   struct iovec iov[kNumEntries];
1297   char *dst = new char[contents.size()];
1298   int used_so_far = 0;
1299   for (int i = 0; i < kNumEntries; ++i) {
1300     iov[i].iov_base = dst + used_so_far;
1301     if (used_so_far == contents.size()) {
1302       iov[i].iov_len = 0;
1303       continue;
1304     }
1305 
1306     if (i == kNumEntries - 1) {
1307       iov[i].iov_len = contents.size() - used_so_far;
1308     } else {
1309       iov[i].iov_len = contents.size() / kNumEntries;
1310     }
1311     used_so_far += iov[i].iov_len;
1312   }
1313 
1314   SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1315                              static_cast<int64>(contents.size()));
1316   SetBenchmarkLabel(files[arg].label);
1317   StartBenchmarkTiming();
1318   while (iters-- > 0) {
1319     CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
1320                                        kNumEntries));
1321   }
1322   StopBenchmarkTiming();
1323 
1324   delete[] dst;
1325 }
1326 BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
1327 
BM_UFlatSink(int iters,int arg)1328 static void BM_UFlatSink(int iters, int arg) {
1329   StopBenchmarkTiming();
1330 
1331   // Pick file to process based on "arg"
1332   CHECK_GE(arg, 0);
1333   CHECK_LT(arg, ARRAYSIZE(files));
1334   string contents = ReadTestDataFile(files[arg].filename,
1335                                      files[arg].size_limit);
1336 
1337   string zcontents;
1338   snappy::Compress(contents.data(), contents.size(), &zcontents);
1339   char* dst = new char[contents.size()];
1340 
1341   SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1342                              static_cast<int64>(contents.size()));
1343   SetBenchmarkLabel(files[arg].label);
1344   StartBenchmarkTiming();
1345   while (iters-- > 0) {
1346     snappy::ByteArraySource source(zcontents.data(), zcontents.size());
1347     snappy::UncheckedByteArraySink sink(dst);
1348     CHECK(snappy::Uncompress(&source, &sink));
1349   }
1350   StopBenchmarkTiming();
1351 
1352   string s(dst, contents.size());
1353   CHECK_EQ(contents, s);
1354 
1355   delete[] dst;
1356 }
1357 
1358 BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
1359 
BM_ZFlat(int iters,int arg)1360 static void BM_ZFlat(int iters, int arg) {
1361   StopBenchmarkTiming();
1362 
1363   // Pick file to process based on "arg"
1364   CHECK_GE(arg, 0);
1365   CHECK_LT(arg, ARRAYSIZE(files));
1366   string contents = ReadTestDataFile(files[arg].filename,
1367                                      files[arg].size_limit);
1368 
1369   char* dst = new char[snappy::MaxCompressedLength(contents.size())];
1370 
1371   SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1372                              static_cast<int64>(contents.size()));
1373   StartBenchmarkTiming();
1374 
1375   size_t zsize = 0;
1376   while (iters-- > 0) {
1377     snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
1378   }
1379   StopBenchmarkTiming();
1380   const double compression_ratio =
1381       static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
1382   SetBenchmarkLabel(StringPrintf("%s (%.2f %%)",
1383                                  files[arg].label, 100.0 * compression_ratio));
1384   VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes",
1385                           files[arg].label, contents.size(), zsize);
1386   delete[] dst;
1387 }
1388 BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1389 
1390 }  // namespace snappy
1391 
main(int argc,char ** argv)1392 int main(int argc, char** argv) {
1393   InitGoogle(argv[0], &argc, &argv, true);
1394   RunSpecifiedBenchmarks();
1395 
1396   if (argc >= 2) {
1397     for (int arg = 1; arg < argc; arg++) {
1398       if (FLAGS_write_compressed) {
1399         snappy::CompressFile(argv[arg]);
1400       } else if (FLAGS_write_uncompressed) {
1401         snappy::UncompressFile(argv[arg]);
1402       } else {
1403         snappy::MeasureFile(argv[arg]);
1404       }
1405     }
1406     return 0;
1407   }
1408 
1409   return RUN_ALL_TESTS();
1410 }
1411