1 // Copyright 2005 and onwards Google Inc.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 #include <math.h>
30 #include <stdlib.h>
31
32
33 #include <algorithm>
34 #include <string>
35 #include <vector>
36
37 #include "snappy.h"
38 #include "snappy-internal.h"
39 #include "snappy-test.h"
40 #include "snappy-sinksource.h"
41
42 DEFINE_int32(start_len, -1,
43 "Starting prefix size for testing (-1: just full file contents)");
44 DEFINE_int32(end_len, -1,
45 "Starting prefix size for testing (-1: just full file contents)");
46 DEFINE_int32(bytes, 10485760,
47 "How many bytes to compress/uncompress per file for timing");
48
49 DEFINE_bool(zlib, false,
50 "Run zlib compression (http://www.zlib.net)");
51 DEFINE_bool(lzo, false,
52 "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
53 DEFINE_bool(quicklz, false,
54 "Run quickLZ compression (http://www.quicklz.com/)");
55 DEFINE_bool(liblzf, false,
56 "Run libLZF compression "
57 "(http://www.goof.com/pcg/marc/liblzf.html)");
58 DEFINE_bool(fastlz, false,
59 "Run FastLZ compression (http://www.fastlz.org/");
60 DEFINE_bool(snappy, true, "Run snappy compression");
61
62 DEFINE_bool(write_compressed, false,
63 "Write compressed versions of each file to <file>.comp");
64 DEFINE_bool(write_uncompressed, false,
65 "Write uncompressed versions of each file to <file>.uncomp");
66
67 DEFINE_bool(snappy_dump_decompression_table, false,
68 "If true, we print the decompression table during tests.");
69
70 namespace snappy {
71
72
73 #ifdef HAVE_FUNC_MMAP
74
75 // To test against code that reads beyond its input, this class copies a
76 // string to a newly allocated group of pages, the last of which
77 // is made unreadable via mprotect. Note that we need to allocate the
78 // memory with mmap(), as POSIX allows mprotect() only on memory allocated
79 // with mmap(), and some malloc/posix_memalign implementations expect to
80 // be able to read previously allocated memory while doing heap allocations.
81 class DataEndingAtUnreadablePage {
82 public:
DataEndingAtUnreadablePage(const string & s)83 explicit DataEndingAtUnreadablePage(const string& s) {
84 const size_t page_size = getpagesize();
85 const size_t size = s.size();
86 // Round up space for string to a multiple of page_size.
87 size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
88 alloc_size_ = space_for_string + page_size;
89 mem_ = mmap(NULL, alloc_size_,
90 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
91 CHECK_NE(MAP_FAILED, mem_);
92 protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
93 char* dst = protected_page_ - size;
94 memcpy(dst, s.data(), size);
95 data_ = dst;
96 size_ = size;
97 // Make guard page unreadable.
98 CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
99 }
100
~DataEndingAtUnreadablePage()101 ~DataEndingAtUnreadablePage() {
102 // Undo the mprotect.
103 CHECK_EQ(0, mprotect(protected_page_, getpagesize(), PROT_READ|PROT_WRITE));
104 CHECK_EQ(0, munmap(mem_, alloc_size_));
105 }
106
data() const107 const char* data() const { return data_; }
size() const108 size_t size() const { return size_; }
109
110 private:
111 size_t alloc_size_;
112 void* mem_;
113 char* protected_page_;
114 const char* data_;
115 size_t size_;
116 };
117
118 #else // HAVE_FUNC_MMAP
119
120 // Fallback for systems without mmap.
121 typedef string DataEndingAtUnreadablePage;
122
123 #endif
124
125 enum CompressorType {
126 ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, SNAPPY
127 };
128
129 const char* names[] = {
130 "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY"
131 };
132
MinimumRequiredOutputSpace(size_t input_size,CompressorType comp)133 static size_t MinimumRequiredOutputSpace(size_t input_size,
134 CompressorType comp) {
135 switch (comp) {
136 #ifdef ZLIB_VERSION
137 case ZLIB:
138 return ZLib::MinCompressbufSize(input_size);
139 #endif // ZLIB_VERSION
140
141 #ifdef LZO_VERSION
142 case LZO:
143 return input_size + input_size/64 + 16 + 3;
144 #endif // LZO_VERSION
145
146 #ifdef LZF_VERSION
147 case LIBLZF:
148 return input_size;
149 #endif // LZF_VERSION
150
151 #ifdef QLZ_VERSION_MAJOR
152 case QUICKLZ:
153 return input_size + 36000; // 36000 is used for scratch.
154 #endif // QLZ_VERSION_MAJOR
155
156 #ifdef FASTLZ_VERSION
157 case FASTLZ:
158 return max(static_cast<int>(ceil(input_size * 1.05)), 66);
159 #endif // FASTLZ_VERSION
160
161 case SNAPPY:
162 return snappy::MaxCompressedLength(input_size);
163
164 default:
165 LOG(FATAL) << "Unknown compression type number " << comp;
166 return 0;
167 }
168 }
169
170 // Returns true if we successfully compressed, false otherwise.
171 //
172 // If compressed_is_preallocated is set, do not resize the compressed buffer.
173 // This is typically what you want for a benchmark, in order to not spend
174 // time in the memory allocator. If you do set this flag, however,
175 // "compressed" must be preinitialized to at least MinCompressbufSize(comp)
176 // number of bytes, and may contain junk bytes at the end after return.
Compress(const char * input,size_t input_size,CompressorType comp,string * compressed,bool compressed_is_preallocated)177 static bool Compress(const char* input, size_t input_size, CompressorType comp,
178 string* compressed, bool compressed_is_preallocated) {
179 if (!compressed_is_preallocated) {
180 compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
181 }
182
183 switch (comp) {
184 #ifdef ZLIB_VERSION
185 case ZLIB: {
186 ZLib zlib;
187 uLongf destlen = compressed->size();
188 int ret = zlib.Compress(
189 reinterpret_cast<Bytef*>(string_as_array(compressed)),
190 &destlen,
191 reinterpret_cast<const Bytef*>(input),
192 input_size);
193 CHECK_EQ(Z_OK, ret);
194 if (!compressed_is_preallocated) {
195 compressed->resize(destlen);
196 }
197 return true;
198 }
199 #endif // ZLIB_VERSION
200
201 #ifdef LZO_VERSION
202 case LZO: {
203 unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
204 lzo_uint destlen;
205 int ret = lzo1x_1_15_compress(
206 reinterpret_cast<const uint8*>(input),
207 input_size,
208 reinterpret_cast<uint8*>(string_as_array(compressed)),
209 &destlen,
210 mem);
211 CHECK_EQ(LZO_E_OK, ret);
212 delete[] mem;
213 if (!compressed_is_preallocated) {
214 compressed->resize(destlen);
215 }
216 break;
217 }
218 #endif // LZO_VERSION
219
220 #ifdef LZF_VERSION
221 case LIBLZF: {
222 int destlen = lzf_compress(input,
223 input_size,
224 string_as_array(compressed),
225 input_size);
226 if (destlen == 0) {
227 // lzf *can* cause lots of blowup when compressing, so they
228 // recommend to limit outsize to insize, and just not compress
229 // if it's bigger. Ideally, we'd just swap input and output.
230 compressed->assign(input, input_size);
231 destlen = input_size;
232 }
233 if (!compressed_is_preallocated) {
234 compressed->resize(destlen);
235 }
236 break;
237 }
238 #endif // LZF_VERSION
239
240 #ifdef QLZ_VERSION_MAJOR
241 case QUICKLZ: {
242 qlz_state_compress *state_compress = new qlz_state_compress;
243 int destlen = qlz_compress(input,
244 string_as_array(compressed),
245 input_size,
246 state_compress);
247 delete state_compress;
248 CHECK_NE(0, destlen);
249 if (!compressed_is_preallocated) {
250 compressed->resize(destlen);
251 }
252 break;
253 }
254 #endif // QLZ_VERSION_MAJOR
255
256 #ifdef FASTLZ_VERSION
257 case FASTLZ: {
258 // Use level 1 compression since we mostly care about speed.
259 int destlen = fastlz_compress_level(
260 1,
261 input,
262 input_size,
263 string_as_array(compressed));
264 if (!compressed_is_preallocated) {
265 compressed->resize(destlen);
266 }
267 CHECK_NE(destlen, 0);
268 break;
269 }
270 #endif // FASTLZ_VERSION
271
272 case SNAPPY: {
273 size_t destlen;
274 snappy::RawCompress(input, input_size,
275 string_as_array(compressed),
276 &destlen);
277 CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
278 if (!compressed_is_preallocated) {
279 compressed->resize(destlen);
280 }
281 break;
282 }
283
284 default: {
285 return false; // the asked-for library wasn't compiled in
286 }
287 }
288 return true;
289 }
290
Uncompress(const string & compressed,CompressorType comp,int size,string * output)291 static bool Uncompress(const string& compressed, CompressorType comp,
292 int size, string* output) {
293 switch (comp) {
294 #ifdef ZLIB_VERSION
295 case ZLIB: {
296 output->resize(size);
297 ZLib zlib;
298 uLongf destlen = output->size();
299 int ret = zlib.Uncompress(
300 reinterpret_cast<Bytef*>(string_as_array(output)),
301 &destlen,
302 reinterpret_cast<const Bytef*>(compressed.data()),
303 compressed.size());
304 CHECK_EQ(Z_OK, ret);
305 CHECK_EQ(static_cast<uLongf>(size), destlen);
306 break;
307 }
308 #endif // ZLIB_VERSION
309
310 #ifdef LZO_VERSION
311 case LZO: {
312 output->resize(size);
313 lzo_uint destlen;
314 int ret = lzo1x_decompress(
315 reinterpret_cast<const uint8*>(compressed.data()),
316 compressed.size(),
317 reinterpret_cast<uint8*>(string_as_array(output)),
318 &destlen,
319 NULL);
320 CHECK_EQ(LZO_E_OK, ret);
321 CHECK_EQ(static_cast<lzo_uint>(size), destlen);
322 break;
323 }
324 #endif // LZO_VERSION
325
326 #ifdef LZF_VERSION
327 case LIBLZF: {
328 output->resize(size);
329 int destlen = lzf_decompress(compressed.data(),
330 compressed.size(),
331 string_as_array(output),
332 output->size());
333 if (destlen == 0) {
334 // This error probably means we had decided not to compress,
335 // and thus have stored input in output directly.
336 output->assign(compressed.data(), compressed.size());
337 destlen = compressed.size();
338 }
339 CHECK_EQ(destlen, size);
340 break;
341 }
342 #endif // LZF_VERSION
343
344 #ifdef QLZ_VERSION_MAJOR
345 case QUICKLZ: {
346 output->resize(size);
347 qlz_state_decompress *state_decompress = new qlz_state_decompress;
348 int destlen = qlz_decompress(compressed.data(),
349 string_as_array(output),
350 state_decompress);
351 delete state_decompress;
352 CHECK_EQ(destlen, size);
353 break;
354 }
355 #endif // QLZ_VERSION_MAJOR
356
357 #ifdef FASTLZ_VERSION
358 case FASTLZ: {
359 output->resize(size);
360 int destlen = fastlz_decompress(compressed.data(),
361 compressed.length(),
362 string_as_array(output),
363 size);
364 CHECK_EQ(destlen, size);
365 break;
366 }
367 #endif // FASTLZ_VERSION
368
369 case SNAPPY: {
370 snappy::RawUncompress(compressed.data(), compressed.size(),
371 string_as_array(output));
372 break;
373 }
374
375 default: {
376 return false; // the asked-for library wasn't compiled in
377 }
378 }
379 return true;
380 }
381
Measure(const char * data,size_t length,CompressorType comp,int repeats,int block_size)382 static void Measure(const char* data,
383 size_t length,
384 CompressorType comp,
385 int repeats,
386 int block_size) {
387 // Run tests a few time and pick median running times
388 static const int kRuns = 5;
389 double ctime[kRuns];
390 double utime[kRuns];
391 int compressed_size = 0;
392
393 {
394 // Chop the input into blocks
395 int num_blocks = (length + block_size - 1) / block_size;
396 vector<const char*> input(num_blocks);
397 vector<size_t> input_length(num_blocks);
398 vector<string> compressed(num_blocks);
399 vector<string> output(num_blocks);
400 for (int b = 0; b < num_blocks; b++) {
401 int input_start = b * block_size;
402 int input_limit = min<int>((b+1)*block_size, length);
403 input[b] = data+input_start;
404 input_length[b] = input_limit-input_start;
405
406 // Pre-grow the output buffer so we don't measure string append time.
407 compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
408 }
409
410 // First, try one trial compression to make sure the code is compiled in
411 if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
412 LOG(WARNING) << "Skipping " << names[comp] << ": "
413 << "library not compiled in";
414 return;
415 }
416
417 for (int run = 0; run < kRuns; run++) {
418 CycleTimer ctimer, utimer;
419
420 for (int b = 0; b < num_blocks; b++) {
421 // Pre-grow the output buffer so we don't measure string append time.
422 compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
423 }
424
425 ctimer.Start();
426 for (int b = 0; b < num_blocks; b++)
427 for (int i = 0; i < repeats; i++)
428 Compress(input[b], input_length[b], comp, &compressed[b], true);
429 ctimer.Stop();
430
431 // Compress once more, with resizing, so we don't leave junk
432 // at the end that will confuse the decompressor.
433 for (int b = 0; b < num_blocks; b++) {
434 Compress(input[b], input_length[b], comp, &compressed[b], false);
435 }
436
437 for (int b = 0; b < num_blocks; b++) {
438 output[b].resize(input_length[b]);
439 }
440
441 utimer.Start();
442 for (int i = 0; i < repeats; i++)
443 for (int b = 0; b < num_blocks; b++)
444 Uncompress(compressed[b], comp, input_length[b], &output[b]);
445 utimer.Stop();
446
447 ctime[run] = ctimer.Get();
448 utime[run] = utimer.Get();
449 }
450
451 compressed_size = 0;
452 for (size_t i = 0; i < compressed.size(); i++) {
453 compressed_size += compressed[i].size();
454 }
455 }
456
457 sort(ctime, ctime + kRuns);
458 sort(utime, utime + kRuns);
459 const int med = kRuns/2;
460
461 float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
462 float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
463 string x = names[comp];
464 x += ":";
465 string urate = (uncomp_rate >= 0)
466 ? StringPrintf("%.1f", uncomp_rate)
467 : string("?");
468 printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
469 "comp %5.1f MB/s uncomp %5s MB/s\n",
470 x.c_str(),
471 block_size/(1<<20),
472 static_cast<int>(length), static_cast<uint32>(compressed_size),
473 (compressed_size * 100.0) / max<int>(1, length),
474 comp_rate,
475 urate.c_str());
476 }
477
VerifyString(const string & input)478 static int VerifyString(const string& input) {
479 string compressed;
480 DataEndingAtUnreadablePage i(input);
481 const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
482 CHECK_EQ(written, compressed.size());
483 CHECK_LE(compressed.size(),
484 snappy::MaxCompressedLength(input.size()));
485 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
486
487 string uncompressed;
488 DataEndingAtUnreadablePage c(compressed);
489 CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
490 CHECK_EQ(uncompressed, input);
491 return uncompressed.size();
492 }
493
VerifyStringSink(const string & input)494 static void VerifyStringSink(const string& input) {
495 string compressed;
496 DataEndingAtUnreadablePage i(input);
497 const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
498 CHECK_EQ(written, compressed.size());
499 CHECK_LE(compressed.size(),
500 snappy::MaxCompressedLength(input.size()));
501 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
502
503 string uncompressed;
504 uncompressed.resize(input.size());
505 snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
506 DataEndingAtUnreadablePage c(compressed);
507 snappy::ByteArraySource source(c.data(), c.size());
508 CHECK(snappy::Uncompress(&source, &sink));
509 CHECK_EQ(uncompressed, input);
510 }
511
VerifyIOVec(const string & input)512 static void VerifyIOVec(const string& input) {
513 string compressed;
514 DataEndingAtUnreadablePage i(input);
515 const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
516 CHECK_EQ(written, compressed.size());
517 CHECK_LE(compressed.size(),
518 snappy::MaxCompressedLength(input.size()));
519 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
520
521 // Try uncompressing into an iovec containing a random number of entries
522 // ranging from 1 to 10.
523 char* buf = new char[input.size()];
524 ACMRandom rnd(input.size());
525 size_t num = rnd.Next() % 10 + 1;
526 if (input.size() < num) {
527 num = input.size();
528 }
529 struct iovec* iov = new iovec[num];
530 int used_so_far = 0;
531 for (size_t i = 0; i < num; ++i) {
532 iov[i].iov_base = buf + used_so_far;
533 if (i == num - 1) {
534 iov[i].iov_len = input.size() - used_so_far;
535 } else {
536 // Randomly choose to insert a 0 byte entry.
537 if (rnd.OneIn(5)) {
538 iov[i].iov_len = 0;
539 } else {
540 iov[i].iov_len = rnd.Uniform(input.size());
541 }
542 }
543 used_so_far += iov[i].iov_len;
544 }
545 CHECK(snappy::RawUncompressToIOVec(
546 compressed.data(), compressed.size(), iov, num));
547 CHECK(!memcmp(buf, input.data(), input.size()));
548 delete[] iov;
549 delete[] buf;
550 }
551
552 // Test that data compressed by a compressor that does not
553 // obey block sizes is uncompressed properly.
VerifyNonBlockedCompression(const string & input)554 static void VerifyNonBlockedCompression(const string& input) {
555 if (input.length() > snappy::kBlockSize) {
556 // We cannot test larger blocks than the maximum block size, obviously.
557 return;
558 }
559
560 string prefix;
561 Varint::Append32(&prefix, input.size());
562
563 // Setup compression table
564 snappy::internal::WorkingMemory wmem;
565 int table_size;
566 uint16* table = wmem.GetHashTable(input.size(), &table_size);
567
568 // Compress entire input in one shot
569 string compressed;
570 compressed += prefix;
571 compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
572 char* dest = string_as_array(&compressed) + prefix.size();
573 char* end = snappy::internal::CompressFragment(input.data(), input.size(),
574 dest, table, table_size);
575 compressed.resize(end - compressed.data());
576
577 // Uncompress into string
578 string uncomp_str;
579 CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
580 CHECK_EQ(uncomp_str, input);
581
582 // Uncompress using source/sink
583 string uncomp_str2;
584 uncomp_str2.resize(input.size());
585 snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
586 snappy::ByteArraySource source(compressed.data(), compressed.size());
587 CHECK(snappy::Uncompress(&source, &sink));
588 CHECK_EQ(uncomp_str2, input);
589
590 // Uncompress into iovec
591 {
592 static const int kNumBlocks = 10;
593 struct iovec vec[kNumBlocks];
594 const int block_size = 1 + input.size() / kNumBlocks;
595 string iovec_data(block_size * kNumBlocks, 'x');
596 for (int i = 0; i < kNumBlocks; i++) {
597 vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
598 vec[i].iov_len = block_size;
599 }
600 CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
601 vec, kNumBlocks));
602 CHECK_EQ(string(iovec_data.data(), input.size()), input);
603 }
604 }
605
606 // Expand the input so that it is at least K times as big as block size
Expand(const string & input)607 static string Expand(const string& input) {
608 static const int K = 3;
609 string data = input;
610 while (data.size() < K * snappy::kBlockSize) {
611 data += input;
612 }
613 return data;
614 }
615
Verify(const string & input)616 static int Verify(const string& input) {
617 VLOG(1) << "Verifying input of size " << input.size();
618
619 // Compress using string based routines
620 const int result = VerifyString(input);
621
622 // Verify using sink based routines
623 VerifyStringSink(input);
624
625 VerifyNonBlockedCompression(input);
626 VerifyIOVec(input);
627 if (!input.empty()) {
628 const string expanded = Expand(input);
629 VerifyNonBlockedCompression(expanded);
630 VerifyIOVec(input);
631 }
632
633 return result;
634 }
635
636
IsValidCompressedBuffer(const string & c)637 static bool IsValidCompressedBuffer(const string& c) {
638 return snappy::IsValidCompressedBuffer(c.data(), c.size());
639 }
Uncompress(const string & c,string * u)640 static bool Uncompress(const string& c, string* u) {
641 return snappy::Uncompress(c.data(), c.size(), u);
642 }
643
644 // This test checks to ensure that snappy doesn't coredump if it gets
645 // corrupted data.
TEST(CorruptedTest,VerifyCorrupted)646 TEST(CorruptedTest, VerifyCorrupted) {
647 string source = "making sure we don't crash with corrupted input";
648 VLOG(1) << source;
649 string dest;
650 string uncmp;
651 snappy::Compress(source.data(), source.size(), &dest);
652
653 // Mess around with the data. It's hard to simulate all possible
654 // corruptions; this is just one example ...
655 CHECK_GT(dest.size(), 3);
656 dest[1]--;
657 dest[3]++;
658 // this really ought to fail.
659 CHECK(!IsValidCompressedBuffer(dest));
660 CHECK(!Uncompress(dest, &uncmp));
661
662 // This is testing for a security bug - a buffer that decompresses to 100k
663 // but we lie in the snappy header and only reserve 0 bytes of memory :)
664 source.resize(100000);
665 for (size_t i = 0; i < source.length(); ++i) {
666 source[i] = 'A';
667 }
668 snappy::Compress(source.data(), source.size(), &dest);
669 dest[0] = dest[1] = dest[2] = dest[3] = 0;
670 CHECK(!IsValidCompressedBuffer(dest));
671 CHECK(!Uncompress(dest, &uncmp));
672
673 if (sizeof(void *) == 4) {
674 // Another security check; check a crazy big length can't DoS us with an
675 // over-allocation.
676 // Currently this is done only for 32-bit builds. On 64-bit builds,
677 // where 3 GB might be an acceptable allocation size, Uncompress()
678 // attempts to decompress, and sometimes causes the test to run out of
679 // memory.
680 dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
681 // This decodes to a really large size, i.e., about 3 GB.
682 dest[4] = 'k';
683 CHECK(!IsValidCompressedBuffer(dest));
684 CHECK(!Uncompress(dest, &uncmp));
685 } else {
686 LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
687 }
688
689 // This decodes to about 2 MB; much smaller, but should still fail.
690 dest[0] = dest[1] = dest[2] = '\xff';
691 dest[3] = 0x00;
692 CHECK(!IsValidCompressedBuffer(dest));
693 CHECK(!Uncompress(dest, &uncmp));
694
695 // try reading stuff in from a bad file.
696 for (int i = 1; i <= 3; ++i) {
697 string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str(),
698 0);
699 string uncmp;
700 // check that we don't return a crazy length
701 size_t ulen;
702 CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
703 || (ulen < (1<<20)));
704 uint32 ulen2;
705 snappy::ByteArraySource source(data.data(), data.size());
706 CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
707 (ulen2 < (1<<20)));
708 CHECK(!IsValidCompressedBuffer(data));
709 CHECK(!Uncompress(data, &uncmp));
710 }
711 }
712
713 // Helper routines to construct arbitrary compressed strings.
714 // These mirror the compression code in snappy.cc, but are copied
715 // here so that we can bypass some limitations in the how snappy.cc
716 // invokes these routines.
AppendLiteral(string * dst,const string & literal)717 static void AppendLiteral(string* dst, const string& literal) {
718 if (literal.empty()) return;
719 int n = literal.size() - 1;
720 if (n < 60) {
721 // Fit length in tag byte
722 dst->push_back(0 | (n << 2));
723 } else {
724 // Encode in upcoming bytes
725 char number[4];
726 int count = 0;
727 while (n > 0) {
728 number[count++] = n & 0xff;
729 n >>= 8;
730 }
731 dst->push_back(0 | ((59+count) << 2));
732 *dst += string(number, count);
733 }
734 *dst += literal;
735 }
736
AppendCopy(string * dst,int offset,int length)737 static void AppendCopy(string* dst, int offset, int length) {
738 while (length > 0) {
739 // Figure out how much to copy in one shot
740 int to_copy;
741 if (length >= 68) {
742 to_copy = 64;
743 } else if (length > 64) {
744 to_copy = 60;
745 } else {
746 to_copy = length;
747 }
748 length -= to_copy;
749
750 if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) {
751 assert(to_copy-4 < 8); // Must fit in 3 bits
752 dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5));
753 dst->push_back(offset & 0xff);
754 } else if (offset < 65536) {
755 dst->push_back(2 | ((to_copy-1) << 2));
756 dst->push_back(offset & 0xff);
757 dst->push_back(offset >> 8);
758 } else {
759 dst->push_back(3 | ((to_copy-1) << 2));
760 dst->push_back(offset & 0xff);
761 dst->push_back((offset >> 8) & 0xff);
762 dst->push_back((offset >> 16) & 0xff);
763 dst->push_back((offset >> 24) & 0xff);
764 }
765 }
766 }
767
TEST(Snappy,SimpleTests)768 TEST(Snappy, SimpleTests) {
769 Verify("");
770 Verify("a");
771 Verify("ab");
772 Verify("abc");
773
774 Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
775 Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
776 Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
777 Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
778 Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
779 }
780
781 // Verify max blowup (lots of four-byte copies)
TEST(Snappy,MaxBlowup)782 TEST(Snappy, MaxBlowup) {
783 string input;
784 for (int i = 0; i < 20000; i++) {
785 ACMRandom rnd(i);
786 uint32 bytes = static_cast<uint32>(rnd.Next());
787 input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
788 }
789 for (int i = 19999; i >= 0; i--) {
790 ACMRandom rnd(i);
791 uint32 bytes = static_cast<uint32>(rnd.Next());
792 input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
793 }
794 Verify(input);
795 }
796
TEST(Snappy,RandomData)797 TEST(Snappy, RandomData) {
798 ACMRandom rnd(FLAGS_test_random_seed);
799
800 const int num_ops = 20000;
801 for (int i = 0; i < num_ops; i++) {
802 if ((i % 1000) == 0) {
803 VLOG(0) << "Random op " << i << " of " << num_ops;
804 }
805
806 string x;
807 size_t len = rnd.Uniform(4096);
808 if (i < 100) {
809 len = 65536 + rnd.Uniform(65536);
810 }
811 while (x.size() < len) {
812 int run_len = 1;
813 if (rnd.OneIn(10)) {
814 run_len = rnd.Skewed(8);
815 }
816 char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
817 while (run_len-- > 0 && x.size() < len) {
818 x += c;
819 }
820 }
821
822 Verify(x);
823 }
824 }
825
TEST(Snappy,FourByteOffset)826 TEST(Snappy, FourByteOffset) {
827 // The new compressor cannot generate four-byte offsets since
828 // it chops up the input into 32KB pieces. So we hand-emit the
829 // copy manually.
830
831 // The two fragments that make up the input string.
832 string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
833 string fragment2 = "some other string";
834
835 // How many times each fragment is emitted.
836 const int n1 = 2;
837 const int n2 = 100000 / fragment2.size();
838 const int length = n1 * fragment1.size() + n2 * fragment2.size();
839
840 string compressed;
841 Varint::Append32(&compressed, length);
842
843 AppendLiteral(&compressed, fragment1);
844 string src = fragment1;
845 for (int i = 0; i < n2; i++) {
846 AppendLiteral(&compressed, fragment2);
847 src += fragment2;
848 }
849 AppendCopy(&compressed, src.size(), fragment1.size());
850 src += fragment1;
851 CHECK_EQ(length, src.size());
852
853 string uncompressed;
854 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
855 CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
856 &uncompressed));
857 CHECK_EQ(uncompressed, src);
858 }
859
TEST(Snappy,IOVecEdgeCases)860 TEST(Snappy, IOVecEdgeCases) {
861 // Test some tricky edge cases in the iovec output that are not necessarily
862 // exercised by random tests.
863
864 // Our output blocks look like this initially (the last iovec is bigger
865 // than depicted):
866 // [ ] [ ] [ ] [ ] [ ]
867 static const int kLengths[] = { 2, 1, 4, 8, 128 };
868
869 struct iovec iov[ARRAYSIZE(kLengths)];
870 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
871 iov[i].iov_base = new char[kLengths[i]];
872 iov[i].iov_len = kLengths[i];
873 }
874
875 string compressed;
876 Varint::Append32(&compressed, 22);
877
878 // A literal whose output crosses three blocks.
879 // [ab] [c] [123 ] [ ] [ ]
880 AppendLiteral(&compressed, "abc123");
881
882 // A copy whose output crosses two blocks (source and destination
883 // segments marked).
884 // [ab] [c] [1231] [23 ] [ ]
885 // ^--^ --
886 AppendCopy(&compressed, 3, 3);
887
888 // A copy where the input is, at first, in the block before the output:
889 //
890 // [ab] [c] [1231] [231231 ] [ ]
891 // ^--- ^---
892 // Then during the copy, the pointers move such that the input and
893 // output pointers are in the same block:
894 //
895 // [ab] [c] [1231] [23123123] [ ]
896 // ^- ^-
897 // And then they move again, so that the output pointer is no longer
898 // in the same block as the input pointer:
899 // [ab] [c] [1231] [23123123] [123 ]
900 // ^-- ^--
901 AppendCopy(&compressed, 6, 9);
902
903 // Finally, a copy where the input is from several blocks back,
904 // and it also crosses three blocks:
905 //
906 // [ab] [c] [1231] [23123123] [123b ]
907 // ^ ^
908 // [ab] [c] [1231] [23123123] [123bc ]
909 // ^ ^
910 // [ab] [c] [1231] [23123123] [123bc12 ]
911 // ^- ^-
912 AppendCopy(&compressed, 17, 4);
913
914 CHECK(snappy::RawUncompressToIOVec(
915 compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
916 CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2));
917 CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1));
918 CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4));
919 CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8));
920 CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7));
921
922 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
923 delete[] reinterpret_cast<char *>(iov[i].iov_base);
924 }
925 }
926
TEST(Snappy,IOVecLiteralOverflow)927 TEST(Snappy, IOVecLiteralOverflow) {
928 static const int kLengths[] = { 3, 4 };
929
930 struct iovec iov[ARRAYSIZE(kLengths)];
931 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
932 iov[i].iov_base = new char[kLengths[i]];
933 iov[i].iov_len = kLengths[i];
934 }
935
936 string compressed;
937 Varint::Append32(&compressed, 8);
938
939 AppendLiteral(&compressed, "12345678");
940
941 CHECK(!snappy::RawUncompressToIOVec(
942 compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
943
944 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
945 delete[] reinterpret_cast<char *>(iov[i].iov_base);
946 }
947 }
948
TEST(Snappy,IOVecCopyOverflow)949 TEST(Snappy, IOVecCopyOverflow) {
950 static const int kLengths[] = { 3, 4 };
951
952 struct iovec iov[ARRAYSIZE(kLengths)];
953 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
954 iov[i].iov_base = new char[kLengths[i]];
955 iov[i].iov_len = kLengths[i];
956 }
957
958 string compressed;
959 Varint::Append32(&compressed, 8);
960
961 AppendLiteral(&compressed, "123");
962 AppendCopy(&compressed, 3, 5);
963
964 CHECK(!snappy::RawUncompressToIOVec(
965 compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
966
967 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
968 delete[] reinterpret_cast<char *>(iov[i].iov_base);
969 }
970 }
971
CheckUncompressedLength(const string & compressed,size_t * ulength)972 static bool CheckUncompressedLength(const string& compressed,
973 size_t* ulength) {
974 const bool result1 = snappy::GetUncompressedLength(compressed.data(),
975 compressed.size(),
976 ulength);
977
978 snappy::ByteArraySource source(compressed.data(), compressed.size());
979 uint32 length;
980 const bool result2 = snappy::GetUncompressedLength(&source, &length);
981 CHECK_EQ(result1, result2);
982 return result1;
983 }
984
TEST(SnappyCorruption,TruncatedVarint)985 TEST(SnappyCorruption, TruncatedVarint) {
986 string compressed, uncompressed;
987 size_t ulength;
988 compressed.push_back('\xf0');
989 CHECK(!CheckUncompressedLength(compressed, &ulength));
990 CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
991 CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
992 &uncompressed));
993 }
994
TEST(SnappyCorruption,UnterminatedVarint)995 TEST(SnappyCorruption, UnterminatedVarint) {
996 string compressed, uncompressed;
997 size_t ulength;
998 compressed.push_back('\x80');
999 compressed.push_back('\x80');
1000 compressed.push_back('\x80');
1001 compressed.push_back('\x80');
1002 compressed.push_back('\x80');
1003 compressed.push_back(10);
1004 CHECK(!CheckUncompressedLength(compressed, &ulength));
1005 CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
1006 CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
1007 &uncompressed));
1008 }
1009
TEST(SnappyCorruption,OverflowingVarint)1010 TEST(SnappyCorruption, OverflowingVarint) {
1011 string compressed, uncompressed;
1012 size_t ulength;
1013 compressed.push_back('\xfb');
1014 compressed.push_back('\xff');
1015 compressed.push_back('\xff');
1016 compressed.push_back('\xff');
1017 compressed.push_back('\x7f');
1018 CHECK(!CheckUncompressedLength(compressed, &ulength));
1019 CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
1020 CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
1021 &uncompressed));
1022 }
1023
TEST(Snappy,ReadPastEndOfBuffer)1024 TEST(Snappy, ReadPastEndOfBuffer) {
1025 // Check that we do not read past end of input
1026
1027 // Make a compressed string that ends with a single-byte literal
1028 string compressed;
1029 Varint::Append32(&compressed, 1);
1030 AppendLiteral(&compressed, "x");
1031
1032 string uncompressed;
1033 DataEndingAtUnreadablePage c(compressed);
1034 CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
1035 CHECK_EQ(uncompressed, string("x"));
1036 }
1037
1038 // Check for an infinite loop caused by a copy with offset==0
TEST(Snappy,ZeroOffsetCopy)1039 TEST(Snappy, ZeroOffsetCopy) {
1040 const char* compressed = "\x40\x12\x00\x00";
1041 // \x40 Length (must be > kMaxIncrementCopyOverflow)
1042 // \x12\x00\x00 Copy with offset==0, length==5
1043 char uncompressed[100];
1044 EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
1045 }
1046
TEST(Snappy,ZeroOffsetCopyValidation)1047 TEST(Snappy, ZeroOffsetCopyValidation) {
1048 const char* compressed = "\x05\x12\x00\x00";
1049 // \x05 Length
1050 // \x12\x00\x00 Copy with offset==0, length==5
1051 EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
1052 }
1053
1054 namespace {
1055
TestFindMatchLength(const char * s1,const char * s2,unsigned length)1056 int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
1057 return snappy::internal::FindMatchLength(s1, s2, s2 + length);
1058 }
1059
1060 } // namespace
1061
TEST(Snappy,FindMatchLength)1062 TEST(Snappy, FindMatchLength) {
1063 // Exercise all different code paths through the function.
1064 // 64-bit version:
1065
1066 // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
1067 EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
1068 EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
1069
1070 // Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
1071 EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
1072
1073 // Same, but edge cases.
1074 EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
1075 EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
1076
1077 // Find non-match at once in first loop.
1078 EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
1079 EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
1080 EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
1081 EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
1082
1083 // Find non-match in first loop after one block.
1084 EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1085 "abcdefgh?1234567xxxxxxxx", 24));
1086 EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1087 "abcdefgh0?234567xxxxxxxx", 24));
1088 EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1089 "abcdefgh01237654xxxxxxxx", 24));
1090 EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
1091 "abcdefgh0123456?xxxxxxxx", 24));
1092
1093 // 32-bit version:
1094
1095 // Short matches.
1096 EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
1097 EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
1098 EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
1099 EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
1100 EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
1101 EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
1102 EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
1103 EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
1104 EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
1105 EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
1106
1107 // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
1108 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
1109 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
1110 EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
1111
1112 // Same, but edge cases.
1113 EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
1114 EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
1115
1116 // Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
1117 EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
1118
1119 // Find non-match at once in first loop.
1120 EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1121 "xxxxxx?123xxxxxxxx", 18));
1122 EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1123 "xxxxxx0?23xxxxxxxx", 18));
1124 EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1125 "xxxxxx0132xxxxxxxx", 18));
1126 EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
1127 "xxxxxx012?xxxxxxxx", 18));
1128
1129 // Same, but edge cases.
1130 EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
1131 EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
1132 EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
1133 EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
1134
1135 // Find non-match in first loop after one block.
1136 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
1137 "xxxxxxabcd?123xx", 16));
1138 EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
1139 "xxxxxxabcd0?23xx", 16));
1140 EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
1141 "xxxxxxabcd0132xx", 16));
1142 EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
1143 "xxxxxxabcd012?xx", 16));
1144
1145 // Same, but edge cases.
1146 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
1147 EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
1148 EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
1149 EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
1150 }
1151
TEST(Snappy,FindMatchLengthRandom)1152 TEST(Snappy, FindMatchLengthRandom) {
1153 const int kNumTrials = 10000;
1154 const int kTypicalLength = 10;
1155 ACMRandom rnd(FLAGS_test_random_seed);
1156
1157 for (int i = 0; i < kNumTrials; i++) {
1158 string s, t;
1159 char a = rnd.Rand8();
1160 char b = rnd.Rand8();
1161 while (!rnd.OneIn(kTypicalLength)) {
1162 s.push_back(rnd.OneIn(2) ? a : b);
1163 t.push_back(rnd.OneIn(2) ? a : b);
1164 }
1165 DataEndingAtUnreadablePage u(s);
1166 DataEndingAtUnreadablePage v(t);
1167 int matched = snappy::internal::FindMatchLength(
1168 u.data(), v.data(), v.data() + t.size());
1169 if (matched == t.size()) {
1170 EXPECT_EQ(s, t);
1171 } else {
1172 EXPECT_NE(s[matched], t[matched]);
1173 for (int j = 0; j < matched; j++) {
1174 EXPECT_EQ(s[j], t[j]);
1175 }
1176 }
1177 }
1178 }
1179
MakeEntry(unsigned int extra,unsigned int len,unsigned int copy_offset)1180 static uint16 MakeEntry(unsigned int extra,
1181 unsigned int len,
1182 unsigned int copy_offset) {
1183 // Check that all of the fields fit within the allocated space
1184 assert(extra == (extra & 0x7)); // At most 3 bits
1185 assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
1186 assert(len == (len & 0x7f)); // At most 7 bits
1187 return len | (copy_offset << 8) | (extra << 11);
1188 }
1189
1190 // Check that the decompression table is correct, and optionally print out
1191 // the computed one.
TEST(Snappy,VerifyCharTable)1192 TEST(Snappy, VerifyCharTable) {
1193 using snappy::internal::LITERAL;
1194 using snappy::internal::COPY_1_BYTE_OFFSET;
1195 using snappy::internal::COPY_2_BYTE_OFFSET;
1196 using snappy::internal::COPY_4_BYTE_OFFSET;
1197 using snappy::internal::char_table;
1198 using snappy::internal::wordmask;
1199
1200 uint16 dst[256];
1201
1202 // Place invalid entries in all places to detect missing initialization
1203 int assigned = 0;
1204 for (int i = 0; i < 256; i++) {
1205 dst[i] = 0xffff;
1206 }
1207
1208 // Small LITERAL entries. We store (len-1) in the top 6 bits.
1209 for (unsigned int len = 1; len <= 60; len++) {
1210 dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
1211 assigned++;
1212 }
1213
1214 // Large LITERAL entries. We use 60..63 in the high 6 bits to
1215 // encode the number of bytes of length info that follow the opcode.
1216 for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
1217 // We set the length field in the lookup table to 1 because extra
1218 // bytes encode len-1.
1219 dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
1220 assigned++;
1221 }
1222
1223 // COPY_1_BYTE_OFFSET.
1224 //
1225 // The tag byte in the compressed data stores len-4 in 3 bits, and
1226 // offset/256 in 5 bits. offset%256 is stored in the next byte.
1227 //
1228 // This format is used for length in range [4..11] and offset in
1229 // range [0..2047]
1230 for (unsigned int len = 4; len < 12; len++) {
1231 for (unsigned int offset = 0; offset < 2048; offset += 256) {
1232 dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
1233 MakeEntry(1, len, offset>>8);
1234 assigned++;
1235 }
1236 }
1237
1238 // COPY_2_BYTE_OFFSET.
1239 // Tag contains len-1 in top 6 bits, and offset in next two bytes.
1240 for (unsigned int len = 1; len <= 64; len++) {
1241 dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
1242 assigned++;
1243 }
1244
1245 // COPY_4_BYTE_OFFSET.
1246 // Tag contents len-1 in top 6 bits, and offset in next four bytes.
1247 for (unsigned int len = 1; len <= 64; len++) {
1248 dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
1249 assigned++;
1250 }
1251
1252 // Check that each entry was initialized exactly once.
1253 EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
1254 for (int i = 0; i < 256; i++) {
1255 EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
1256 }
1257
1258 if (FLAGS_snappy_dump_decompression_table) {
1259 printf("static const uint16 char_table[256] = {\n ");
1260 for (int i = 0; i < 256; i++) {
1261 printf("0x%04x%s",
1262 dst[i],
1263 ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
1264 }
1265 printf("};\n");
1266 }
1267
1268 // Check that computed table matched recorded table.
1269 for (int i = 0; i < 256; i++) {
1270 EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
1271 }
1272 }
1273
CompressFile(const char * fname)1274 static void CompressFile(const char* fname) {
1275 string fullinput;
1276 CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1277
1278 string compressed;
1279 Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
1280
1281 CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
1282 file::Defaults()));
1283 }
1284
UncompressFile(const char * fname)1285 static void UncompressFile(const char* fname) {
1286 string fullinput;
1287 CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1288
1289 size_t uncompLength;
1290 CHECK(CheckUncompressedLength(fullinput, &uncompLength));
1291
1292 string uncompressed;
1293 uncompressed.resize(uncompLength);
1294 CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
1295
1296 CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
1297 file::Defaults()));
1298 }
1299
MeasureFile(const char * fname)1300 static void MeasureFile(const char* fname) {
1301 string fullinput;
1302 CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
1303 printf("%-40s :\n", fname);
1304
1305 int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
1306 int end_len = fullinput.size();
1307 if (FLAGS_end_len >= 0) {
1308 end_len = min<int>(fullinput.size(), FLAGS_end_len);
1309 }
1310 for (int len = start_len; len <= end_len; len++) {
1311 const char* const input = fullinput.data();
1312 int repeats = (FLAGS_bytes + len) / (len + 1);
1313 if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10);
1314 if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10);
1315 if (FLAGS_liblzf) Measure(input, len, LIBLZF, repeats, 1024<<10);
1316 if (FLAGS_quicklz) Measure(input, len, QUICKLZ, repeats, 1024<<10);
1317 if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10);
1318 if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10);
1319
1320 // For block-size based measurements
1321 if (0 && FLAGS_snappy) {
1322 Measure(input, len, SNAPPY, repeats, 8<<10);
1323 Measure(input, len, SNAPPY, repeats, 16<<10);
1324 Measure(input, len, SNAPPY, repeats, 32<<10);
1325 Measure(input, len, SNAPPY, repeats, 64<<10);
1326 Measure(input, len, SNAPPY, repeats, 256<<10);
1327 Measure(input, len, SNAPPY, repeats, 1024<<10);
1328 }
1329 }
1330 }
1331
1332 static struct {
1333 const char* label;
1334 const char* filename;
1335 size_t size_limit;
1336 } files[] = {
1337 { "html", "html", 0 },
1338 { "urls", "urls.10K", 0 },
1339 { "jpg", "fireworks.jpeg", 0 },
1340 { "jpg_200", "fireworks.jpeg", 200 },
1341 { "pdf", "paper-100k.pdf", 0 },
1342 { "html4", "html_x_4", 0 },
1343 { "txt1", "alice29.txt", 0 },
1344 { "txt2", "asyoulik.txt", 0 },
1345 { "txt3", "lcet10.txt", 0 },
1346 { "txt4", "plrabn12.txt", 0 },
1347 { "pb", "geo.protodata", 0 },
1348 { "gaviota", "kppkn.gtb", 0 },
1349 };
1350
BM_UFlat(int iters,int arg)1351 static void BM_UFlat(int iters, int arg) {
1352 StopBenchmarkTiming();
1353
1354 // Pick file to process based on "arg"
1355 CHECK_GE(arg, 0);
1356 CHECK_LT(arg, ARRAYSIZE(files));
1357 string contents = ReadTestDataFile(files[arg].filename,
1358 files[arg].size_limit);
1359
1360 string zcontents;
1361 snappy::Compress(contents.data(), contents.size(), &zcontents);
1362 char* dst = new char[contents.size()];
1363
1364 SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1365 static_cast<int64>(contents.size()));
1366 SetBenchmarkLabel(files[arg].label);
1367 StartBenchmarkTiming();
1368 while (iters-- > 0) {
1369 CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
1370 }
1371 StopBenchmarkTiming();
1372
1373 delete[] dst;
1374 }
1375 BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1376
BM_UValidate(int iters,int arg)1377 static void BM_UValidate(int iters, int arg) {
1378 StopBenchmarkTiming();
1379
1380 // Pick file to process based on "arg"
1381 CHECK_GE(arg, 0);
1382 CHECK_LT(arg, ARRAYSIZE(files));
1383 string contents = ReadTestDataFile(files[arg].filename,
1384 files[arg].size_limit);
1385
1386 string zcontents;
1387 snappy::Compress(contents.data(), contents.size(), &zcontents);
1388
1389 SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1390 static_cast<int64>(contents.size()));
1391 SetBenchmarkLabel(files[arg].label);
1392 StartBenchmarkTiming();
1393 while (iters-- > 0) {
1394 CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
1395 }
1396 StopBenchmarkTiming();
1397 }
1398 BENCHMARK(BM_UValidate)->DenseRange(0, 4);
1399
BM_UIOVec(int iters,int arg)1400 static void BM_UIOVec(int iters, int arg) {
1401 StopBenchmarkTiming();
1402
1403 // Pick file to process based on "arg"
1404 CHECK_GE(arg, 0);
1405 CHECK_LT(arg, ARRAYSIZE(files));
1406 string contents = ReadTestDataFile(files[arg].filename,
1407 files[arg].size_limit);
1408
1409 string zcontents;
1410 snappy::Compress(contents.data(), contents.size(), &zcontents);
1411
1412 // Uncompress into an iovec containing ten entries.
1413 const int kNumEntries = 10;
1414 struct iovec iov[kNumEntries];
1415 char *dst = new char[contents.size()];
1416 int used_so_far = 0;
1417 for (int i = 0; i < kNumEntries; ++i) {
1418 iov[i].iov_base = dst + used_so_far;
1419 if (used_so_far == contents.size()) {
1420 iov[i].iov_len = 0;
1421 continue;
1422 }
1423
1424 if (i == kNumEntries - 1) {
1425 iov[i].iov_len = contents.size() - used_so_far;
1426 } else {
1427 iov[i].iov_len = contents.size() / kNumEntries;
1428 }
1429 used_so_far += iov[i].iov_len;
1430 }
1431
1432 SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1433 static_cast<int64>(contents.size()));
1434 SetBenchmarkLabel(files[arg].label);
1435 StartBenchmarkTiming();
1436 while (iters-- > 0) {
1437 CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
1438 kNumEntries));
1439 }
1440 StopBenchmarkTiming();
1441
1442 delete[] dst;
1443 }
1444 BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
1445
BM_UFlatSink(int iters,int arg)1446 static void BM_UFlatSink(int iters, int arg) {
1447 StopBenchmarkTiming();
1448
1449 // Pick file to process based on "arg"
1450 CHECK_GE(arg, 0);
1451 CHECK_LT(arg, ARRAYSIZE(files));
1452 string contents = ReadTestDataFile(files[arg].filename,
1453 files[arg].size_limit);
1454
1455 string zcontents;
1456 snappy::Compress(contents.data(), contents.size(), &zcontents);
1457 char* dst = new char[contents.size()];
1458
1459 SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1460 static_cast<int64>(contents.size()));
1461 SetBenchmarkLabel(files[arg].label);
1462 StartBenchmarkTiming();
1463 while (iters-- > 0) {
1464 snappy::ByteArraySource source(zcontents.data(), zcontents.size());
1465 snappy::UncheckedByteArraySink sink(dst);
1466 CHECK(snappy::Uncompress(&source, &sink));
1467 }
1468 StopBenchmarkTiming();
1469
1470 string s(dst, contents.size());
1471 CHECK_EQ(contents, s);
1472
1473 delete[] dst;
1474 }
1475
1476 BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
1477
BM_ZFlat(int iters,int arg)1478 static void BM_ZFlat(int iters, int arg) {
1479 StopBenchmarkTiming();
1480
1481 // Pick file to process based on "arg"
1482 CHECK_GE(arg, 0);
1483 CHECK_LT(arg, ARRAYSIZE(files));
1484 string contents = ReadTestDataFile(files[arg].filename,
1485 files[arg].size_limit);
1486
1487 char* dst = new char[snappy::MaxCompressedLength(contents.size())];
1488
1489 SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
1490 static_cast<int64>(contents.size()));
1491 StartBenchmarkTiming();
1492
1493 size_t zsize = 0;
1494 while (iters-- > 0) {
1495 snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
1496 }
1497 StopBenchmarkTiming();
1498 const double compression_ratio =
1499 static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
1500 SetBenchmarkLabel(StringPrintf("%s (%.2f %%)",
1501 files[arg].label, 100.0 * compression_ratio));
1502 VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes",
1503 files[arg].label, contents.size(), zsize);
1504 delete[] dst;
1505 }
1506 BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
1507
1508 } // namespace snappy
1509
1510
main(int argc,char ** argv)1511 int main(int argc, char** argv) {
1512 InitGoogle(argv[0], &argc, &argv, true);
1513 RunSpecifiedBenchmarks();
1514
1515 if (argc >= 2) {
1516 for (int arg = 1; arg < argc; arg++) {
1517 if (FLAGS_write_compressed) {
1518 CompressFile(argv[arg]);
1519 } else if (FLAGS_write_uncompressed) {
1520 UncompressFile(argv[arg]);
1521 } else {
1522 MeasureFile(argv[arg]);
1523 }
1524 }
1525 return 0;
1526 }
1527
1528 return RUN_ALL_TESTS();
1529 }
1530