1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 //
29 // Various stubs for the unit tests for the open-source version of Snappy.
30 
31 #ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
32 #define UTIL_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
33 
34 #include <iostream>
35 #include <string>
36 
37 #include "snappy-stubs-internal.h"
38 
39 #include <stdio.h>
40 #include <stdarg.h>
41 
42 #ifdef HAVE_SYS_MMAN_H
43 #include <sys/mman.h>
44 #endif
45 
46 #ifdef HAVE_SYS_RESOURCE_H
47 #include <sys/resource.h>
48 #endif
49 
50 #ifdef HAVE_SYS_TIME_H
51 #include <sys/time.h>
52 #endif
53 
54 #ifdef HAVE_WINDOWS_H
55 #define WIN32_LEAN_AND_MEAN
56 #include <windows.h>
57 #endif
58 
59 #include <string>
60 
61 #ifdef HAVE_GTEST
62 
63 #include <gtest/gtest.h>
64 #undef TYPED_TEST
65 #define TYPED_TEST TEST
66 #define INIT_GTEST(argc, argv) ::testing::InitGoogleTest(argc, *argv)
67 
68 #else
69 
70 // Stubs for if the user doesn't have Google Test installed.
71 
72 #define TEST(test_case, test_subcase) \
73   void Test_ ## test_case ## _ ## test_subcase()
74 #define INIT_GTEST(argc, argv)
75 
76 #define TYPED_TEST TEST
77 #define EXPECT_EQ CHECK_EQ
78 #define EXPECT_NE CHECK_NE
79 #define EXPECT_FALSE(cond) CHECK(!(cond))
80 
81 #endif
82 
83 #ifdef HAVE_GFLAGS
84 
85 #include <gflags/gflags.h>
86 
87 // This is tricky; both gflags and Google Test want to look at the command line
88 // arguments. Google Test seems to be the most happy with unknown arguments,
89 // though, so we call it first and hope for the best.
90 #define InitGoogle(argv0, argc, argv, remove_flags) \
91   INIT_GTEST(argc, argv); \
92   google::ParseCommandLineFlags(argc, argv, remove_flags);
93 
94 #else
95 
96 // If we don't have the gflags package installed, these can only be
97 // changed at compile time.
98 #define DEFINE_int32(flag_name, default_value, description) \
99   static int FLAGS_ ## flag_name = default_value;
100 
101 #define InitGoogle(argv0, argc, argv, remove_flags) \
102   INIT_GTEST(argc, argv)
103 
104 #endif
105 
106 #ifdef HAVE_LIBZ
107 #include "zlib.h"
108 #endif
109 
110 #ifdef HAVE_LIBLZO2
111 #include "lzo/lzo1x.h"
112 #endif
113 
114 #ifdef HAVE_LIBLZF
115 extern "C" {
116 #include "lzf.h"
117 }
118 #endif
119 
120 #ifdef HAVE_LIBFASTLZ
121 #include "fastlz.h"
122 #endif
123 
124 #ifdef HAVE_LIBQUICKLZ
125 #include "quicklz.h"
126 #endif
127 
128 namespace {
129 
130 namespace File {
Init()131   void Init() { }
132 }  // namespace File
133 
134 namespace file {
Defaults()135   int Defaults() { }
136 
137   class DummyStatus {
138    public:
CheckSuccess()139     void CheckSuccess() { }
140   };
141 
GetContents(const string & filename,string * data,int unused)142   DummyStatus GetContents(const string& filename, string* data, int unused) {
143     FILE* fp = fopen(filename.c_str(), "rb");
144     if (fp == NULL) {
145       perror(filename.c_str());
146       exit(1);
147     }
148 
149     data->clear();
150     while (!feof(fp)) {
151       char buf[4096];
152       size_t ret = fread(buf, 1, 4096, fp);
153       if (ret == 0 && ferror(fp)) {
154         perror("fread");
155         exit(1);
156       }
157       data->append(string(buf, ret));
158     }
159 
160     fclose(fp);
161   }
162 
SetContents(const string & filename,const string & str,int unused)163   DummyStatus SetContents(const string& filename,
164                           const string& str,
165                           int unused) {
166     FILE* fp = fopen(filename.c_str(), "wb");
167     if (fp == NULL) {
168       perror(filename.c_str());
169       exit(1);
170     }
171 
172     int ret = fwrite(str.data(), str.size(), 1, fp);
173     if (ret != 1) {
174       perror("fwrite");
175       exit(1);
176     }
177 
178     fclose(fp);
179   }
180 }  // namespace file
181 
182 }  // namespace
183 
184 namespace snappy {
185 
186 #define FLAGS_test_random_seed 301
187 typedef string TypeParam;
188 
189 void Test_CorruptedTest_VerifyCorrupted();
190 void Test_Snappy_SimpleTests();
191 void Test_Snappy_MaxBlowup();
192 void Test_Snappy_RandomData();
193 void Test_Snappy_FourByteOffset();
194 void Test_SnappyCorruption_TruncatedVarint();
195 void Test_SnappyCorruption_UnterminatedVarint();
196 void Test_Snappy_ReadPastEndOfBuffer();
197 void Test_Snappy_FindMatchLength();
198 void Test_Snappy_FindMatchLengthRandom();
199 
200 string ReadTestDataFile(const string& base, size_t size_limit);
201 
202 string ReadTestDataFile(const string& base);
203 
204 // A sprintf() variant that returns a std::string.
205 // Not safe for general use due to truncation issues.
206 string StringPrintf(const char* format, ...);
207 
208 // A simple, non-cryptographically-secure random generator.
209 class ACMRandom {
210  public:
ACMRandom(uint32 seed)211   explicit ACMRandom(uint32 seed) : seed_(seed) {}
212 
213   int32 Next();
214 
Uniform(int32 n)215   int32 Uniform(int32 n) {
216     return Next() % n;
217   }
Rand8()218   uint8 Rand8() {
219     return static_cast<uint8>((Next() >> 1) & 0x000000ff);
220   }
OneIn(int X)221   bool OneIn(int X) { return Uniform(X) == 0; }
222 
223   // Skewed: pick "base" uniformly from range [0,max_log] and then
224   // return "base" random bits.  The effect is to pick a number in the
225   // range [0,2^max_log-1] with bias towards smaller numbers.
226   int32 Skewed(int max_log);
227 
228  private:
229   static const uint32 M = 2147483647L;   // 2^31-1
230   uint32 seed_;
231 };
232 
Next()233 inline int32 ACMRandom::Next() {
234   static const uint64 A = 16807;  // bits 14, 8, 7, 5, 2, 1, 0
235   // We are computing
236   //       seed_ = (seed_ * A) % M,    where M = 2^31-1
237   //
238   // seed_ must not be zero or M, or else all subsequent computed values
239   // will be zero or M respectively.  For all other values, seed_ will end
240   // up cycling through every number in [1,M-1]
241   uint64 product = seed_ * A;
242 
243   // Compute (product % M) using the fact that ((x << 31) % M) == x.
244   seed_ = (product >> 31) + (product & M);
245   // The first reduction may overflow by 1 bit, so we may need to repeat.
246   // mod == M is not possible; using > allows the faster sign-bit-based test.
247   if (seed_ > M) {
248     seed_ -= M;
249   }
250   return seed_;
251 }
252 
Skewed(int max_log)253 inline int32 ACMRandom::Skewed(int max_log) {
254   const int32 base = (Next() - 1) % (max_log+1);
255   return (Next() - 1) & ((1u << base)-1);
256 }
257 
258 // A wall-time clock. This stub is not super-accurate, nor resistant to the
259 // system time changing.
260 class CycleTimer {
261  public:
CycleTimer()262   CycleTimer() : real_time_us_(0) {}
263 
Start()264   void Start() {
265 #ifdef WIN32
266     QueryPerformanceCounter(&start_);
267 #else
268     gettimeofday(&start_, NULL);
269 #endif
270   }
271 
Stop()272   void Stop() {
273 #ifdef WIN32
274     LARGE_INTEGER stop;
275     LARGE_INTEGER frequency;
276     QueryPerformanceCounter(&stop);
277     QueryPerformanceFrequency(&frequency);
278 
279     double elapsed = static_cast<double>(stop.QuadPart - start_.QuadPart) /
280         frequency.QuadPart;
281     real_time_us_ += elapsed * 1e6 + 0.5;
282 #else
283     struct timeval stop;
284     gettimeofday(&stop, NULL);
285 
286     real_time_us_ += 1000000 * (stop.tv_sec - start_.tv_sec);
287     real_time_us_ += (stop.tv_usec - start_.tv_usec);
288 #endif
289   }
290 
Get()291   double Get() {
292     return real_time_us_ * 1e-6;
293   }
294 
295  private:
296   int64 real_time_us_;
297 #ifdef WIN32
298   LARGE_INTEGER start_;
299 #else
300   struct timeval start_;
301 #endif
302 };
303 
304 // Minimalistic microbenchmark framework.
305 
306 typedef void (*BenchmarkFunction)(int, int);
307 
308 class Benchmark {
309  public:
Benchmark(const string & name,BenchmarkFunction function)310   Benchmark(const string& name, BenchmarkFunction function) :
311       name_(name), function_(function) {}
312 
DenseRange(int start,int stop)313   Benchmark* DenseRange(int start, int stop) {
314     start_ = start;
315     stop_ = stop;
316     return this;
317   }
318 
319   void Run();
320 
321  private:
322   const string name_;
323   const BenchmarkFunction function_;
324   int start_, stop_;
325 };
326 #define BENCHMARK(benchmark_name) \
327   Benchmark* Benchmark_ ## benchmark_name = \
328           (new Benchmark(#benchmark_name, benchmark_name))
329 
330 extern Benchmark* Benchmark_BM_UFlat;
331 extern Benchmark* Benchmark_BM_UIOVec;
332 extern Benchmark* Benchmark_BM_UValidate;
333 extern Benchmark* Benchmark_BM_ZFlat;
334 
335 void ResetBenchmarkTiming();
336 void StartBenchmarkTiming();
337 void StopBenchmarkTiming();
338 void SetBenchmarkLabel(const string& str);
339 void SetBenchmarkBytesProcessed(int64 bytes);
340 
341 #ifdef HAVE_LIBZ
342 
343 // Object-oriented wrapper around zlib.
344 class ZLib {
345  public:
346   ZLib();
347   ~ZLib();
348 
349   // Wipe a ZLib object to a virgin state.  This differs from Reset()
350   // in that it also breaks any state.
351   void Reinit();
352 
353   // Call this to make a zlib buffer as good as new.  Here's the only
354   // case where they differ:
355   //    CompressChunk(a); CompressChunk(b); CompressChunkDone();   vs
356   //    CompressChunk(a); Reset(); CompressChunk(b); CompressChunkDone();
357   // You'll want to use Reset(), then, when you interrupt a compress
358   // (or uncompress) in the middle of a chunk and want to start over.
359   void Reset();
360 
361   // According to the zlib manual, when you Compress, the destination
362   // buffer must have size at least src + .1%*src + 12.  This function
363   // helps you calculate that.  Augment this to account for a potential
364   // gzip header and footer, plus a few bytes of slack.
MinCompressbufSize(int uncompress_size)365   static int MinCompressbufSize(int uncompress_size) {
366     return uncompress_size + uncompress_size/1000 + 40;
367   }
368 
369   // Compresses the source buffer into the destination buffer.
370   // sourceLen is the byte length of the source buffer.
371   // Upon entry, destLen is the total size of the destination buffer,
372   // which must be of size at least MinCompressbufSize(sourceLen).
373   // Upon exit, destLen is the actual size of the compressed buffer.
374   //
375   // This function can be used to compress a whole file at once if the
376   // input file is mmap'ed.
377   //
378   // Returns Z_OK if success, Z_MEM_ERROR if there was not
379   // enough memory, Z_BUF_ERROR if there was not enough room in the
380   // output buffer. Note that if the output buffer is exactly the same
381   // size as the compressed result, we still return Z_BUF_ERROR.
382   // (check CL#1936076)
383   int Compress(Bytef *dest, uLongf *destLen,
384                const Bytef *source, uLong sourceLen);
385 
386   // Uncompresses the source buffer into the destination buffer.
387   // The destination buffer must be long enough to hold the entire
388   // decompressed contents.
389   //
390   // Returns Z_OK on success, otherwise, it returns a zlib error code.
391   int Uncompress(Bytef *dest, uLongf *destLen,
392                  const Bytef *source, uLong sourceLen);
393 
394   // Uncompress data one chunk at a time -- ie you can call this
395   // more than once.  To get this to work you need to call per-chunk
396   // and "done" routines.
397   //
398   // Returns Z_OK if success, Z_MEM_ERROR if there was not
399   // enough memory, Z_BUF_ERROR if there was not enough room in the
400   // output buffer.
401 
402   int UncompressAtMost(Bytef *dest, uLongf *destLen,
403                        const Bytef *source, uLong *sourceLen);
404 
405   // Checks gzip footer information, as needed.  Mostly this just
406   // makes sure the checksums match.  Whenever you call this, it
407   // will assume the last 8 bytes from the previous UncompressChunk
408   // call are the footer.  Returns true iff everything looks ok.
409   bool UncompressChunkDone();
410 
411  private:
412   int InflateInit();       // sets up the zlib inflate structure
413   int DeflateInit();       // sets up the zlib deflate structure
414 
415   // These init the zlib data structures for compressing/uncompressing
416   int CompressInit(Bytef *dest, uLongf *destLen,
417                    const Bytef *source, uLong *sourceLen);
418   int UncompressInit(Bytef *dest, uLongf *destLen,
419                      const Bytef *source, uLong *sourceLen);
420   // Initialization method to be called if we hit an error while
421   // uncompressing. On hitting an error, call this method before
422   // returning the error.
423   void UncompressErrorInit();
424 
425   // Helper function for Compress
426   int CompressChunkOrAll(Bytef *dest, uLongf *destLen,
427                          const Bytef *source, uLong sourceLen,
428                          int flush_mode);
429   int CompressAtMostOrAll(Bytef *dest, uLongf *destLen,
430                           const Bytef *source, uLong *sourceLen,
431                           int flush_mode);
432 
433   // Likewise for UncompressAndUncompressChunk
434   int UncompressChunkOrAll(Bytef *dest, uLongf *destLen,
435                            const Bytef *source, uLong sourceLen,
436                            int flush_mode);
437 
438   int UncompressAtMostOrAll(Bytef *dest, uLongf *destLen,
439                             const Bytef *source, uLong *sourceLen,
440                             int flush_mode);
441 
442   // Initialization method to be called if we hit an error while
443   // compressing. On hitting an error, call this method before
444   // returning the error.
445   void CompressErrorInit();
446 
447   int compression_level_;   // compression level
448   int window_bits_;         // log base 2 of the window size used in compression
449   int mem_level_;           // specifies the amount of memory to be used by
450                             // compressor (1-9)
451   z_stream comp_stream_;    // Zlib stream data structure
452   bool comp_init_;          // True if we have initialized comp_stream_
453   z_stream uncomp_stream_;  // Zlib stream data structure
454   bool uncomp_init_;        // True if we have initialized uncomp_stream_
455 
456   // These are used only with chunked compression.
457   bool first_chunk_;       // true if we need to emit headers with this chunk
458 };
459 
460 #endif  // HAVE_LIBZ
461 
462 }  // namespace snappy
463 
464 DECLARE_bool(run_microbenchmarks);
465 
RunSpecifiedBenchmarks()466 static void RunSpecifiedBenchmarks() {
467   if (!FLAGS_run_microbenchmarks) {
468     return;
469   }
470 
471   fprintf(stderr, "Running microbenchmarks.\n");
472 #ifndef NDEBUG
473   fprintf(stderr, "WARNING: Compiled with assertions enabled, will be slow.\n");
474 #endif
475 #ifndef __OPTIMIZE__
476   fprintf(stderr, "WARNING: Compiled without optimization, will be slow.\n");
477 #endif
478   fprintf(stderr, "Benchmark            Time(ns)    CPU(ns) Iterations\n");
479   fprintf(stderr, "---------------------------------------------------\n");
480 
481   snappy::Benchmark_BM_UFlat->Run();
482   snappy::Benchmark_BM_UIOVec->Run();
483   snappy::Benchmark_BM_UValidate->Run();
484   snappy::Benchmark_BM_ZFlat->Run();
485 
486   fprintf(stderr, "\n");
487 }
488 
489 #ifndef HAVE_GTEST
490 
RUN_ALL_TESTS()491 static inline int RUN_ALL_TESTS() {
492   fprintf(stderr, "Running correctness tests.\n");
493   snappy::Test_CorruptedTest_VerifyCorrupted();
494   snappy::Test_Snappy_SimpleTests();
495   snappy::Test_Snappy_MaxBlowup();
496   snappy::Test_Snappy_RandomData();
497   snappy::Test_Snappy_FourByteOffset();
498   snappy::Test_SnappyCorruption_TruncatedVarint();
499   snappy::Test_SnappyCorruption_UnterminatedVarint();
500   snappy::Test_Snappy_ReadPastEndOfBuffer();
501   snappy::Test_Snappy_FindMatchLength();
502   snappy::Test_Snappy_FindMatchLengthRandom();
503   fprintf(stderr, "All tests passed.\n");
504 
505   return 0;
506 }
507 
508 #endif  // HAVE_GTEST
509 
510 // For main().
511 namespace snappy {
512 
513 static void CompressFile(const char* fname);
514 static void UncompressFile(const char* fname);
515 static void MeasureFile(const char* fname);
516 
517 // Logging.
518 
519 #define LOG(level) LogMessage()
520 #define VLOG(level) true ? (void)0 : \
521     snappy::LogMessageVoidify() & snappy::LogMessage()
522 
523 class LogMessage {
524  public:
LogMessage()525   LogMessage() { }
~LogMessage()526   ~LogMessage() {
527     cerr << endl;
528   }
529 
530   LogMessage& operator<<(const std::string& msg) {
531     cerr << msg;
532     return *this;
533   }
534   LogMessage& operator<<(int x) {
535     cerr << x;
536     return *this;
537   }
538 };
539 
540 // Asserts, both versions activated in debug mode only,
541 // and ones that are always active.
542 
543 #define CRASH_UNLESS(condition) \
544     PREDICT_TRUE(condition) ? (void)0 : \
545     snappy::LogMessageVoidify() & snappy::LogMessageCrash()
546 
547 class LogMessageCrash : public LogMessage {
548  public:
LogMessageCrash()549   LogMessageCrash() { }
~LogMessageCrash()550   ~LogMessageCrash() {
551     cerr << endl;
552     abort();
553   }
554 };
555 
556 // This class is used to explicitly ignore values in the conditional
557 // logging macros.  This avoids compiler warnings like "value computed
558 // is not used" and "statement has no effect".
559 
560 class LogMessageVoidify {
561  public:
LogMessageVoidify()562   LogMessageVoidify() { }
563   // This has to be an operator with a precedence lower than << but
564   // higher than ?:
565   void operator&(const LogMessage&) { }
566 };
567 
568 #define CHECK(cond) CRASH_UNLESS(cond)
569 #define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b))
570 #define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b))
571 #define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b))
572 #define CHECK_NE(a, b) CRASH_UNLESS((a) != (b))
573 #define CHECK_LT(a, b) CRASH_UNLESS((a) < (b))
574 #define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
575 
576 }  // namespace
577 
578 using snappy::CompressFile;
579 using snappy::UncompressFile;
580 using snappy::MeasureFile;
581 
582 #endif  // UTIL_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
583