1 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 //
10 #pragma once
11 
12 #include <algorithm>
13 #include <limits>
14 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
15 #ifdef OS_FREEBSD
16 #include <malloc_np.h>
17 #else  // OS_FREEBSD
18 #include <malloc.h>
19 #endif  // OS_FREEBSD
20 #endif  // ROCKSDB_MALLOC_USABLE_SIZE
21 #include <string>
22 
23 #include "memory/memory_allocator.h"
24 #include "rocksdb/options.h"
25 #include "rocksdb/table.h"
26 #include "util/coding.h"
27 #include "util/compression_context_cache.h"
28 #include "util/string_util.h"
29 
30 #ifdef SNAPPY
31 #include <snappy.h>
32 #endif
33 
34 #ifdef ZLIB
35 #include <zlib.h>
36 #endif
37 
38 #ifdef BZIP2
39 #include <bzlib.h>
40 #endif
41 
42 #if defined(LZ4)
43 #include <lz4.h>
44 #include <lz4hc.h>
45 #endif
46 
47 #if defined(ZSTD)
48 #include <zstd.h>
49 #if ZSTD_VERSION_NUMBER >= 10103  // v1.1.3+
50 #include <zdict.h>
51 #endif  // ZSTD_VERSION_NUMBER >= 10103
52 namespace ROCKSDB_NAMESPACE {
53 // Need this for the context allocation override
54 // On windows we need to do this explicitly
55 #if (ZSTD_VERSION_NUMBER >= 500)
56 #if defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) && \
57     defined(ZSTD_STATIC_LINKING_ONLY)
58 #define ROCKSDB_ZSTD_CUSTOM_MEM
59 namespace port {
60 ZSTD_customMem GetJeZstdAllocationOverrides();
61 }  // namespace port
62 #endif  // defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) &&
63         // defined(ZSTD_STATIC_LINKING_ONLY)
64 
65 // We require `ZSTD_sizeof_DDict` and `ZSTD_createDDict_byReference` to use
66 // `ZSTD_DDict`. The former was introduced in v1.0.0 and the latter was
67 // introduced in v1.1.3. But an important bug fix for `ZSTD_sizeof_DDict` came
68 // in v1.1.4, so that is the version we require. As of today's latest version
69 // (v1.3.8), they are both still in the experimental API, which means they are
70 // only exported when the compiler flag `ZSTD_STATIC_LINKING_ONLY` is set.
71 #if defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
72 #define ROCKSDB_ZSTD_DDICT
73 #endif  // defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
74 
75 // Cached data represents a portion that can be re-used
76 // If, in the future we have more than one native context to
77 // cache we can arrange this as a tuple
78 class ZSTDUncompressCachedData {
79  public:
80   using ZSTDNativeContext = ZSTD_DCtx*;
ZSTDUncompressCachedData()81   ZSTDUncompressCachedData() {}
82   // Init from cache
83   ZSTDUncompressCachedData(const ZSTDUncompressCachedData& o) = delete;
84   ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
ZSTDUncompressCachedData(ZSTDUncompressCachedData && o)85   ZSTDUncompressCachedData(ZSTDUncompressCachedData&& o) ROCKSDB_NOEXCEPT
86       : ZSTDUncompressCachedData() {
87     *this = std::move(o);
88   }
89   ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&& o)
90       ROCKSDB_NOEXCEPT {
91     assert(zstd_ctx_ == nullptr);
92     std::swap(zstd_ctx_, o.zstd_ctx_);
93     std::swap(cache_idx_, o.cache_idx_);
94     return *this;
95   }
Get()96   ZSTDNativeContext Get() const { return zstd_ctx_; }
GetCacheIndex()97   int64_t GetCacheIndex() const { return cache_idx_; }
CreateIfNeeded()98   void CreateIfNeeded() {
99     if (zstd_ctx_ == nullptr) {
100 #ifdef ROCKSDB_ZSTD_CUSTOM_MEM
101       zstd_ctx_ =
102           ZSTD_createDCtx_advanced(port::GetJeZstdAllocationOverrides());
103 #else   // ROCKSDB_ZSTD_CUSTOM_MEM
104       zstd_ctx_ = ZSTD_createDCtx();
105 #endif  // ROCKSDB_ZSTD_CUSTOM_MEM
106       cache_idx_ = -1;
107     }
108   }
InitFromCache(const ZSTDUncompressCachedData & o,int64_t idx)109   void InitFromCache(const ZSTDUncompressCachedData& o, int64_t idx) {
110     zstd_ctx_ = o.zstd_ctx_;
111     cache_idx_ = idx;
112   }
~ZSTDUncompressCachedData()113   ~ZSTDUncompressCachedData() {
114     if (zstd_ctx_ != nullptr && cache_idx_ == -1) {
115       ZSTD_freeDCtx(zstd_ctx_);
116     }
117   }
118 
119  private:
120   ZSTDNativeContext zstd_ctx_ = nullptr;
121   int64_t cache_idx_ = -1;  // -1 means this instance owns the context
122 };
123 #endif  // (ZSTD_VERSION_NUMBER >= 500)
124 }  // namespace ROCKSDB_NAMESPACE
125 #endif  // ZSTD
126 
127 #if !(defined ZSTD) || !(ZSTD_VERSION_NUMBER >= 500)
128 namespace ROCKSDB_NAMESPACE {
129 class ZSTDUncompressCachedData {
130   void* padding;  // unused
131  public:
132   using ZSTDNativeContext = void*;
ZSTDUncompressCachedData()133   ZSTDUncompressCachedData() {}
ZSTDUncompressCachedData(const ZSTDUncompressCachedData &)134   ZSTDUncompressCachedData(const ZSTDUncompressCachedData&) {}
135   ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
136   ZSTDUncompressCachedData(ZSTDUncompressCachedData&&)
137       ROCKSDB_NOEXCEPT = default;
138   ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&&)
139       ROCKSDB_NOEXCEPT = default;
Get()140   ZSTDNativeContext Get() const { return nullptr; }
GetCacheIndex()141   int64_t GetCacheIndex() const { return -1; }
CreateIfNeeded()142   void CreateIfNeeded() {}
InitFromCache(const ZSTDUncompressCachedData &,int64_t)143   void InitFromCache(const ZSTDUncompressCachedData&, int64_t) {}
144  private:
ignore_padding__()145   void ignore_padding__() { padding = nullptr; }
146 };
147 }  // namespace ROCKSDB_NAMESPACE
148 #endif
149 
150 #if defined(XPRESS)
151 #include "port/xpress.h"
152 #endif
153 
154 namespace ROCKSDB_NAMESPACE {
155 
156 // Holds dictionary and related data, like ZSTD's digested compression
157 // dictionary.
158 struct CompressionDict {
159 #if ZSTD_VERSION_NUMBER >= 700
160   ZSTD_CDict* zstd_cdict_ = nullptr;
161 #endif  // ZSTD_VERSION_NUMBER >= 700
162   std::string dict_;
163 
164  public:
165 #if ZSTD_VERSION_NUMBER >= 700
CompressionDictCompressionDict166   CompressionDict(std::string dict, CompressionType type, int level) {
167 #else   // ZSTD_VERSION_NUMBER >= 700
168   CompressionDict(std::string dict, CompressionType /*type*/, int /*level*/) {
169 #endif  // ZSTD_VERSION_NUMBER >= 700
170     dict_ = std::move(dict);
171 #if ZSTD_VERSION_NUMBER >= 700
172     zstd_cdict_ = nullptr;
173     if (!dict_.empty() && (type == kZSTD || type == kZSTDNotFinalCompression)) {
174       if (level == CompressionOptions::kDefaultCompressionLevel) {
175         // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
176         // https://github.com/facebook/zstd/issues/1148
177         level = 3;
178       }
179       // Should be safe (but slower) if below call fails as we'll use the
180       // raw dictionary to compress.
181       zstd_cdict_ = ZSTD_createCDict(dict_.data(), dict_.size(), level);
182       assert(zstd_cdict_ != nullptr);
183     }
184 #endif  // ZSTD_VERSION_NUMBER >= 700
185   }
186 
187   ~CompressionDict() {
188 #if ZSTD_VERSION_NUMBER >= 700
189     size_t res = 0;
190     if (zstd_cdict_ != nullptr) {
191       res = ZSTD_freeCDict(zstd_cdict_);
192     }
193     assert(res == 0);  // Last I checked they can't fail
194     (void)res;         // prevent unused var warning
195 #endif                 // ZSTD_VERSION_NUMBER >= 700
196   }
197 
198 #if ZSTD_VERSION_NUMBER >= 700
199   const ZSTD_CDict* GetDigestedZstdCDict() const { return zstd_cdict_; }
200 #endif  // ZSTD_VERSION_NUMBER >= 700
201 
202   Slice GetRawDict() const { return dict_; }
203 
204   static const CompressionDict& GetEmptyDict() {
205     static CompressionDict empty_dict{};
206     return empty_dict;
207   }
208 
209   CompressionDict() = default;
210   // Disable copy/move
211   CompressionDict(const CompressionDict&) = delete;
212   CompressionDict& operator=(const CompressionDict&) = delete;
213   CompressionDict(CompressionDict&&) = delete;
214   CompressionDict& operator=(CompressionDict&&) = delete;
215 };
216 
217 // Holds dictionary and related data, like ZSTD's digested uncompression
218 // dictionary.
219 struct UncompressionDict {
220   // Block containing the data for the compression dictionary in case the
221   // constructor that takes a string parameter is used.
222   std::string dict_;
223 
224   // Block containing the data for the compression dictionary in case the
225   // constructor that takes a Slice parameter is used and the passed in
226   // CacheAllocationPtr is not nullptr.
227   CacheAllocationPtr allocation_;
228 
229   // Slice pointing to the compression dictionary data. Can point to
230   // dict_, allocation_, or some other memory location, depending on how
231   // the object was constructed.
232   Slice slice_;
233 
234 #ifdef ROCKSDB_ZSTD_DDICT
235   // Processed version of the contents of slice_ for ZSTD compression.
236   ZSTD_DDict* zstd_ddict_ = nullptr;
237 #endif  // ROCKSDB_ZSTD_DDICT
238 
239 #ifdef ROCKSDB_ZSTD_DDICT
UncompressionDictUncompressionDict240   UncompressionDict(std::string dict, bool using_zstd)
241 #else   // ROCKSDB_ZSTD_DDICT
242   UncompressionDict(std::string dict, bool /* using_zstd */)
243 #endif  // ROCKSDB_ZSTD_DDICT
244       : dict_(std::move(dict)), slice_(dict_) {
245 #ifdef ROCKSDB_ZSTD_DDICT
246     if (!slice_.empty() && using_zstd) {
247       zstd_ddict_ = ZSTD_createDDict_byReference(slice_.data(), slice_.size());
248       assert(zstd_ddict_ != nullptr);
249     }
250 #endif  // ROCKSDB_ZSTD_DDICT
251   }
252 
253 #ifdef ROCKSDB_ZSTD_DDICT
UncompressionDictUncompressionDict254   UncompressionDict(Slice slice, CacheAllocationPtr&& allocation,
255                     bool using_zstd)
256 #else   // ROCKSDB_ZSTD_DDICT
257   UncompressionDict(Slice slice, CacheAllocationPtr&& allocation,
258                     bool /* using_zstd */)
259 #endif  // ROCKSDB_ZSTD_DDICT
260       : allocation_(std::move(allocation)), slice_(std::move(slice)) {
261 #ifdef ROCKSDB_ZSTD_DDICT
262     if (!slice_.empty() && using_zstd) {
263       zstd_ddict_ = ZSTD_createDDict_byReference(slice_.data(), slice_.size());
264       assert(zstd_ddict_ != nullptr);
265     }
266 #endif  // ROCKSDB_ZSTD_DDICT
267   }
268 
UncompressionDictUncompressionDict269   UncompressionDict(UncompressionDict&& rhs)
270       : dict_(std::move(rhs.dict_)),
271         allocation_(std::move(rhs.allocation_)),
272         slice_(std::move(rhs.slice_))
273 #ifdef ROCKSDB_ZSTD_DDICT
274         ,
275         zstd_ddict_(rhs.zstd_ddict_)
276 #endif
277   {
278 #ifdef ROCKSDB_ZSTD_DDICT
279     rhs.zstd_ddict_ = nullptr;
280 #endif
281   }
282 
~UncompressionDictUncompressionDict283   ~UncompressionDict() {
284 #ifdef ROCKSDB_ZSTD_DDICT
285     size_t res = 0;
286     if (zstd_ddict_ != nullptr) {
287       res = ZSTD_freeDDict(zstd_ddict_);
288     }
289     assert(res == 0);  // Last I checked they can't fail
290     (void)res;         // prevent unused var warning
291 #endif                 // ROCKSDB_ZSTD_DDICT
292   }
293 
294   UncompressionDict& operator=(UncompressionDict&& rhs) {
295     if (this == &rhs) {
296       return *this;
297     }
298 
299     dict_ = std::move(rhs.dict_);
300     allocation_ = std::move(rhs.allocation_);
301     slice_ = std::move(rhs.slice_);
302 
303 #ifdef ROCKSDB_ZSTD_DDICT
304     zstd_ddict_ = rhs.zstd_ddict_;
305     rhs.zstd_ddict_ = nullptr;
306 #endif
307 
308     return *this;
309   }
310 
311   // The object is self-contained if the string constructor is used, or the
312   // Slice constructor is invoked with a non-null allocation. Otherwise, it
313   // is the caller's responsibility to ensure that the underlying storage
314   // outlives this object.
own_bytesUncompressionDict315   bool own_bytes() const { return !dict_.empty() || allocation_; }
316 
GetRawDictUncompressionDict317   const Slice& GetRawDict() const { return slice_; }
318 
319 #ifdef ROCKSDB_ZSTD_DDICT
GetDigestedZstdDDictUncompressionDict320   const ZSTD_DDict* GetDigestedZstdDDict() const { return zstd_ddict_; }
321 #endif  // ROCKSDB_ZSTD_DDICT
322 
GetEmptyDictUncompressionDict323   static const UncompressionDict& GetEmptyDict() {
324     static UncompressionDict empty_dict{};
325     return empty_dict;
326   }
327 
ApproximateMemoryUsageUncompressionDict328   size_t ApproximateMemoryUsage() const {
329     size_t usage = sizeof(struct UncompressionDict);
330     usage += dict_.size();
331     if (allocation_) {
332       auto allocator = allocation_.get_deleter().allocator;
333       if (allocator) {
334         usage += allocator->UsableSize(allocation_.get(), slice_.size());
335       } else {
336         usage += slice_.size();
337       }
338     }
339 #ifdef ROCKSDB_ZSTD_DDICT
340     usage += ZSTD_sizeof_DDict(zstd_ddict_);
341 #endif  // ROCKSDB_ZSTD_DDICT
342     return usage;
343   }
344 
345   UncompressionDict() = default;
346   // Disable copy
347   UncompressionDict(const CompressionDict&) = delete;
348   UncompressionDict& operator=(const CompressionDict&) = delete;
349 };
350 
351 class CompressionContext {
352  private:
353 #if defined(ZSTD) && (ZSTD_VERSION_NUMBER >= 500)
354   ZSTD_CCtx* zstd_ctx_ = nullptr;
CreateNativeContext(CompressionType type)355   void CreateNativeContext(CompressionType type) {
356     if (type == kZSTD || type == kZSTDNotFinalCompression) {
357 #ifdef ROCKSDB_ZSTD_CUSTOM_MEM
358       zstd_ctx_ =
359           ZSTD_createCCtx_advanced(port::GetJeZstdAllocationOverrides());
360 #else   // ROCKSDB_ZSTD_CUSTOM_MEM
361       zstd_ctx_ = ZSTD_createCCtx();
362 #endif  // ROCKSDB_ZSTD_CUSTOM_MEM
363     }
364   }
DestroyNativeContext()365   void DestroyNativeContext() {
366     if (zstd_ctx_ != nullptr) {
367       ZSTD_freeCCtx(zstd_ctx_);
368     }
369   }
370 
371  public:
372   // callable inside ZSTD_Compress
ZSTDPreallocCtx()373   ZSTD_CCtx* ZSTDPreallocCtx() const {
374     assert(zstd_ctx_ != nullptr);
375     return zstd_ctx_;
376   }
377 
378 #else   // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
379  private:
380   void CreateNativeContext(CompressionType /* type */) {}
381   void DestroyNativeContext() {}
382 #endif  // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
383  public:
CompressionContext(CompressionType type)384   explicit CompressionContext(CompressionType type) {
385     CreateNativeContext(type);
386   }
~CompressionContext()387   ~CompressionContext() { DestroyNativeContext(); }
388   CompressionContext(const CompressionContext&) = delete;
389   CompressionContext& operator=(const CompressionContext&) = delete;
390 };
391 
392 class CompressionInfo {
393   const CompressionOptions& opts_;
394   const CompressionContext& context_;
395   const CompressionDict& dict_;
396   const CompressionType type_;
397   const uint64_t sample_for_compression_;
398 
399  public:
CompressionInfo(const CompressionOptions & _opts,const CompressionContext & _context,const CompressionDict & _dict,CompressionType _type,uint64_t _sample_for_compression)400   CompressionInfo(const CompressionOptions& _opts,
401                   const CompressionContext& _context,
402                   const CompressionDict& _dict, CompressionType _type,
403                   uint64_t _sample_for_compression)
404       : opts_(_opts),
405         context_(_context),
406         dict_(_dict),
407         type_(_type),
408         sample_for_compression_(_sample_for_compression) {}
409 
options()410   const CompressionOptions& options() const { return opts_; }
context()411   const CompressionContext& context() const { return context_; }
dict()412   const CompressionDict& dict() const { return dict_; }
type()413   CompressionType type() const { return type_; }
SampleForCompression()414   uint64_t SampleForCompression() const { return sample_for_compression_; }
415 };
416 
417 class UncompressionContext {
418  private:
419   CompressionContextCache* ctx_cache_ = nullptr;
420   ZSTDUncompressCachedData uncomp_cached_data_;
421 
422  public:
423   struct NoCache {};
424   // Do not use context cache, used by TableBuilder
UncompressionContext(NoCache,CompressionType)425   UncompressionContext(NoCache, CompressionType /* type */) {}
426 
UncompressionContext(CompressionType type)427   explicit UncompressionContext(CompressionType type) {
428     if (type == kZSTD || type == kZSTDNotFinalCompression) {
429       ctx_cache_ = CompressionContextCache::Instance();
430       uncomp_cached_data_ = ctx_cache_->GetCachedZSTDUncompressData();
431     }
432   }
~UncompressionContext()433   ~UncompressionContext() {
434     if (uncomp_cached_data_.GetCacheIndex() != -1) {
435       assert(ctx_cache_ != nullptr);
436       ctx_cache_->ReturnCachedZSTDUncompressData(
437           uncomp_cached_data_.GetCacheIndex());
438     }
439   }
440   UncompressionContext(const UncompressionContext&) = delete;
441   UncompressionContext& operator=(const UncompressionContext&) = delete;
442 
GetZSTDContext()443   ZSTDUncompressCachedData::ZSTDNativeContext GetZSTDContext() const {
444     return uncomp_cached_data_.Get();
445   }
446 };
447 
448 class UncompressionInfo {
449   const UncompressionContext& context_;
450   const UncompressionDict& dict_;
451   const CompressionType type_;
452 
453  public:
UncompressionInfo(const UncompressionContext & _context,const UncompressionDict & _dict,CompressionType _type)454   UncompressionInfo(const UncompressionContext& _context,
455                     const UncompressionDict& _dict, CompressionType _type)
456       : context_(_context), dict_(_dict), type_(_type) {}
457 
context()458   const UncompressionContext& context() const { return context_; }
dict()459   const UncompressionDict& dict() const { return dict_; }
type()460   CompressionType type() const { return type_; }
461 };
462 
Snappy_Supported()463 inline bool Snappy_Supported() {
464 #ifdef SNAPPY
465   return true;
466 #else
467   return false;
468 #endif
469 }
470 
Zlib_Supported()471 inline bool Zlib_Supported() {
472 #ifdef ZLIB
473   return true;
474 #else
475   return false;
476 #endif
477 }
478 
BZip2_Supported()479 inline bool BZip2_Supported() {
480 #ifdef BZIP2
481   return true;
482 #else
483   return false;
484 #endif
485 }
486 
LZ4_Supported()487 inline bool LZ4_Supported() {
488 #ifdef LZ4
489   return true;
490 #else
491   return false;
492 #endif
493 }
494 
XPRESS_Supported()495 inline bool XPRESS_Supported() {
496 #ifdef XPRESS
497   return true;
498 #else
499   return false;
500 #endif
501 }
502 
ZSTD_Supported()503 inline bool ZSTD_Supported() {
504 #ifdef ZSTD
505   // ZSTD format is finalized since version 0.8.0.
506   return (ZSTD_versionNumber() >= 800);
507 #else
508   return false;
509 #endif
510 }
511 
ZSTDNotFinal_Supported()512 inline bool ZSTDNotFinal_Supported() {
513 #ifdef ZSTD
514   return true;
515 #else
516   return false;
517 #endif
518 }
519 
CompressionTypeSupported(CompressionType compression_type)520 inline bool CompressionTypeSupported(CompressionType compression_type) {
521   switch (compression_type) {
522     case kNoCompression:
523       return true;
524     case kSnappyCompression:
525       return Snappy_Supported();
526     case kZlibCompression:
527       return Zlib_Supported();
528     case kBZip2Compression:
529       return BZip2_Supported();
530     case kLZ4Compression:
531       return LZ4_Supported();
532     case kLZ4HCCompression:
533       return LZ4_Supported();
534     case kXpressCompression:
535       return XPRESS_Supported();
536     case kZSTDNotFinalCompression:
537       return ZSTDNotFinal_Supported();
538     case kZSTD:
539       return ZSTD_Supported();
540     default:
541       assert(false);
542       return false;
543   }
544 }
545 
CompressionTypeToString(CompressionType compression_type)546 inline std::string CompressionTypeToString(CompressionType compression_type) {
547   switch (compression_type) {
548     case kNoCompression:
549       return "NoCompression";
550     case kSnappyCompression:
551       return "Snappy";
552     case kZlibCompression:
553       return "Zlib";
554     case kBZip2Compression:
555       return "BZip2";
556     case kLZ4Compression:
557       return "LZ4";
558     case kLZ4HCCompression:
559       return "LZ4HC";
560     case kXpressCompression:
561       return "Xpress";
562     case kZSTD:
563       return "ZSTD";
564     case kZSTDNotFinalCompression:
565       return "ZSTDNotFinal";
566     case kDisableCompressionOption:
567       return "DisableOption";
568     default:
569       assert(false);
570       return "";
571   }
572 }
573 
CompressionOptionsToString(CompressionOptions & compression_options)574 inline std::string CompressionOptionsToString(
575     CompressionOptions& compression_options) {
576   std::string result;
577   result.reserve(512);
578   result.append("window_bits=")
579       .append(ToString(compression_options.window_bits))
580       .append("; ");
581   result.append("level=")
582       .append(ToString(compression_options.level))
583       .append("; ");
584   result.append("strategy=")
585       .append(ToString(compression_options.strategy))
586       .append("; ");
587   result.append("max_dict_bytes=")
588       .append(ToString(compression_options.max_dict_bytes))
589       .append("; ");
590   result.append("zstd_max_train_bytes=")
591       .append(ToString(compression_options.zstd_max_train_bytes))
592       .append("; ");
593   result.append("enabled=")
594       .append(ToString(compression_options.enabled))
595       .append("; ");
596   return result;
597 }
598 
599 // compress_format_version can have two values:
600 // 1 -- decompressed sizes for BZip2 and Zlib are not included in the compressed
601 // block. Also, decompressed sizes for LZ4 are encoded in platform-dependent
602 // way.
603 // 2 -- Zlib, BZip2 and LZ4 encode decompressed size as Varint32 just before the
604 // start of compressed block. Snappy format is the same as version 1.
605 
Snappy_Compress(const CompressionInfo &,const char * input,size_t length,::std::string * output)606 inline bool Snappy_Compress(const CompressionInfo& /*info*/, const char* input,
607                             size_t length, ::std::string* output) {
608 #ifdef SNAPPY
609   output->resize(snappy::MaxCompressedLength(length));
610   size_t outlen;
611   snappy::RawCompress(input, length, &(*output)[0], &outlen);
612   output->resize(outlen);
613   return true;
614 #else
615   (void)input;
616   (void)length;
617   (void)output;
618   return false;
619 #endif
620 }
621 
Snappy_GetUncompressedLength(const char * input,size_t length,size_t * result)622 inline bool Snappy_GetUncompressedLength(const char* input, size_t length,
623                                          size_t* result) {
624 #ifdef SNAPPY
625   return snappy::GetUncompressedLength(input, length, result);
626 #else
627   (void)input;
628   (void)length;
629   (void)result;
630   return false;
631 #endif
632 }
633 
Snappy_Uncompress(const char * input,size_t length,char * output)634 inline bool Snappy_Uncompress(const char* input, size_t length, char* output) {
635 #ifdef SNAPPY
636   return snappy::RawUncompress(input, length, output);
637 #else
638   (void)input;
639   (void)length;
640   (void)output;
641   return false;
642 #endif
643 }
644 
645 namespace compression {
646 // returns size
PutDecompressedSizeInfo(std::string * output,uint32_t length)647 inline size_t PutDecompressedSizeInfo(std::string* output, uint32_t length) {
648   PutVarint32(output, length);
649   return output->size();
650 }
651 
GetDecompressedSizeInfo(const char ** input_data,size_t * input_length,uint32_t * output_len)652 inline bool GetDecompressedSizeInfo(const char** input_data,
653                                     size_t* input_length,
654                                     uint32_t* output_len) {
655   auto new_input_data =
656       GetVarint32Ptr(*input_data, *input_data + *input_length, output_len);
657   if (new_input_data == nullptr) {
658     return false;
659   }
660   *input_length -= (new_input_data - *input_data);
661   *input_data = new_input_data;
662   return true;
663 }
664 }  // namespace compression
665 
666 // compress_format_version == 1 -- decompressed size is not included in the
667 // block header
668 // compress_format_version == 2 -- decompressed size is included in the block
669 // header in varint32 format
670 // @param compression_dict Data for presetting the compression library's
671 //    dictionary.
Zlib_Compress(const CompressionInfo & info,uint32_t compress_format_version,const char * input,size_t length,::std::string * output)672 inline bool Zlib_Compress(const CompressionInfo& info,
673                           uint32_t compress_format_version, const char* input,
674                           size_t length, ::std::string* output) {
675 #ifdef ZLIB
676   if (length > std::numeric_limits<uint32_t>::max()) {
677     // Can't compress more than 4GB
678     return false;
679   }
680 
681   size_t output_header_len = 0;
682   if (compress_format_version == 2) {
683     output_header_len = compression::PutDecompressedSizeInfo(
684         output, static_cast<uint32_t>(length));
685   }
686   // Resize output to be the plain data length.
687   // This may not be big enough if the compression actually expands data.
688   output->resize(output_header_len + length);
689 
690   // The memLevel parameter specifies how much memory should be allocated for
691   // the internal compression state.
692   // memLevel=1 uses minimum memory but is slow and reduces compression ratio.
693   // memLevel=9 uses maximum memory for optimal speed.
694   // The default value is 8. See zconf.h for more details.
695   static const int memLevel = 8;
696   int level;
697   if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
698     level = Z_DEFAULT_COMPRESSION;
699   } else {
700     level = info.options().level;
701   }
702   z_stream _stream;
703   memset(&_stream, 0, sizeof(z_stream));
704   int st = deflateInit2(&_stream, level, Z_DEFLATED, info.options().window_bits,
705                         memLevel, info.options().strategy);
706   if (st != Z_OK) {
707     return false;
708   }
709 
710   Slice compression_dict = info.dict().GetRawDict();
711   if (compression_dict.size()) {
712     // Initialize the compression library's dictionary
713     st = deflateSetDictionary(
714         &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
715         static_cast<unsigned int>(compression_dict.size()));
716     if (st != Z_OK) {
717       deflateEnd(&_stream);
718       return false;
719     }
720   }
721 
722   // Compress the input, and put compressed data in output.
723   _stream.next_in = (Bytef*)input;
724   _stream.avail_in = static_cast<unsigned int>(length);
725 
726   // Initialize the output size.
727   _stream.avail_out = static_cast<unsigned int>(length);
728   _stream.next_out = reinterpret_cast<Bytef*>(&(*output)[output_header_len]);
729 
730   bool compressed = false;
731   st = deflate(&_stream, Z_FINISH);
732   if (st == Z_STREAM_END) {
733     compressed = true;
734     output->resize(output->size() - _stream.avail_out);
735   }
736   // The only return value we really care about is Z_STREAM_END.
737   // Z_OK means insufficient output space. This means the compression is
738   // bigger than decompressed size. Just fail the compression in that case.
739 
740   deflateEnd(&_stream);
741   return compressed;
742 #else
743   (void)info;
744   (void)compress_format_version;
745   (void)input;
746   (void)length;
747   (void)output;
748   return false;
749 #endif
750 }
751 
752 // compress_format_version == 1 -- decompressed size is not included in the
753 // block header
754 // compress_format_version == 2 -- decompressed size is included in the block
755 // header in varint32 format
756 // @param compression_dict Data for presetting the compression library's
757 //    dictionary.
758 inline CacheAllocationPtr Zlib_Uncompress(
759     const UncompressionInfo& info, const char* input_data, size_t input_length,
760     int* decompress_size, uint32_t compress_format_version,
761     MemoryAllocator* allocator = nullptr, int windowBits = -14) {
762 #ifdef ZLIB
763   uint32_t output_len = 0;
764   if (compress_format_version == 2) {
765     if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
766                                               &output_len)) {
767       return nullptr;
768     }
769   } else {
770     // Assume the decompressed data size will 5x of compressed size, but round
771     // to the page size
772     size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
773     output_len = static_cast<uint32_t>(
774         std::min(proposed_output_len,
775                  static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
776   }
777 
778   z_stream _stream;
779   memset(&_stream, 0, sizeof(z_stream));
780 
781   // For raw inflate, the windowBits should be -8..-15.
782   // If windowBits is bigger than zero, it will use either zlib
783   // header or gzip header. Adding 32 to it will do automatic detection.
784   int st =
785       inflateInit2(&_stream, windowBits > 0 ? windowBits + 32 : windowBits);
786   if (st != Z_OK) {
787     return nullptr;
788   }
789 
790   const Slice& compression_dict = info.dict().GetRawDict();
791   if (compression_dict.size()) {
792     // Initialize the compression library's dictionary
793     st = inflateSetDictionary(
794         &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
795         static_cast<unsigned int>(compression_dict.size()));
796     if (st != Z_OK) {
797       return nullptr;
798     }
799   }
800 
801   _stream.next_in = (Bytef*)input_data;
802   _stream.avail_in = static_cast<unsigned int>(input_length);
803 
804   auto output = AllocateBlock(output_len, allocator);
805 
806   _stream.next_out = (Bytef*)output.get();
807   _stream.avail_out = static_cast<unsigned int>(output_len);
808 
809   bool done = false;
810   while (!done) {
811     st = inflate(&_stream, Z_SYNC_FLUSH);
812     switch (st) {
813       case Z_STREAM_END:
814         done = true;
815         break;
816       case Z_OK: {
817         // No output space. Increase the output space by 20%.
818         // We should never run out of output space if
819         // compress_format_version == 2
820         assert(compress_format_version != 2);
821         size_t old_sz = output_len;
822         uint32_t output_len_delta = output_len / 5;
823         output_len += output_len_delta < 10 ? 10 : output_len_delta;
824         auto tmp = AllocateBlock(output_len, allocator);
825         memcpy(tmp.get(), output.get(), old_sz);
826         output = std::move(tmp);
827 
828         // Set more output.
829         _stream.next_out = (Bytef*)(output.get() + old_sz);
830         _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
831         break;
832       }
833       case Z_BUF_ERROR:
834       default:
835         inflateEnd(&_stream);
836         return nullptr;
837     }
838   }
839 
840   // If we encoded decompressed block size, we should have no bytes left
841   assert(compress_format_version != 2 || _stream.avail_out == 0);
842   *decompress_size = static_cast<int>(output_len - _stream.avail_out);
843   inflateEnd(&_stream);
844   return output;
845 #else
846   (void)info;
847   (void)input_data;
848   (void)input_length;
849   (void)decompress_size;
850   (void)compress_format_version;
851   (void)allocator;
852   (void)windowBits;
853   return nullptr;
854 #endif
855 }
856 
857 // compress_format_version == 1 -- decompressed size is not included in the
858 // block header
859 // compress_format_version == 2 -- decompressed size is included in the block
860 // header in varint32 format
BZip2_Compress(const CompressionInfo &,uint32_t compress_format_version,const char * input,size_t length,::std::string * output)861 inline bool BZip2_Compress(const CompressionInfo& /*info*/,
862                            uint32_t compress_format_version, const char* input,
863                            size_t length, ::std::string* output) {
864 #ifdef BZIP2
865   if (length > std::numeric_limits<uint32_t>::max()) {
866     // Can't compress more than 4GB
867     return false;
868   }
869   size_t output_header_len = 0;
870   if (compress_format_version == 2) {
871     output_header_len = compression::PutDecompressedSizeInfo(
872         output, static_cast<uint32_t>(length));
873   }
874   // Resize output to be the plain data length.
875   // This may not be big enough if the compression actually expands data.
876   output->resize(output_header_len + length);
877 
878   bz_stream _stream;
879   memset(&_stream, 0, sizeof(bz_stream));
880 
881   // Block size 1 is 100K.
882   // 0 is for silent.
883   // 30 is the default workFactor
884   int st = BZ2_bzCompressInit(&_stream, 1, 0, 30);
885   if (st != BZ_OK) {
886     return false;
887   }
888 
889   // Compress the input, and put compressed data in output.
890   _stream.next_in = (char*)input;
891   _stream.avail_in = static_cast<unsigned int>(length);
892 
893   // Initialize the output size.
894   _stream.avail_out = static_cast<unsigned int>(length);
895   _stream.next_out = reinterpret_cast<char*>(&(*output)[output_header_len]);
896 
897   bool compressed = false;
898   st = BZ2_bzCompress(&_stream, BZ_FINISH);
899   if (st == BZ_STREAM_END) {
900     compressed = true;
901     output->resize(output->size() - _stream.avail_out);
902   }
903   // The only return value we really care about is BZ_STREAM_END.
904   // BZ_FINISH_OK means insufficient output space. This means the compression
905   // is bigger than decompressed size. Just fail the compression in that case.
906 
907   BZ2_bzCompressEnd(&_stream);
908   return compressed;
909 #else
910   (void)compress_format_version;
911   (void)input;
912   (void)length;
913   (void)output;
914   return false;
915 #endif
916 }
917 
918 // compress_format_version == 1 -- decompressed size is not included in the
919 // block header
920 // compress_format_version == 2 -- decompressed size is included in the block
921 // header in varint32 format
922 inline CacheAllocationPtr BZip2_Uncompress(
923     const char* input_data, size_t input_length, int* decompress_size,
924     uint32_t compress_format_version, MemoryAllocator* allocator = nullptr) {
925 #ifdef BZIP2
926   uint32_t output_len = 0;
927   if (compress_format_version == 2) {
928     if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
929                                               &output_len)) {
930       return nullptr;
931     }
932   } else {
933     // Assume the decompressed data size will 5x of compressed size, but round
934     // to the next page size
935     size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
936     output_len = static_cast<uint32_t>(
937         std::min(proposed_output_len,
938                  static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
939   }
940 
941   bz_stream _stream;
942   memset(&_stream, 0, sizeof(bz_stream));
943 
944   int st = BZ2_bzDecompressInit(&_stream, 0, 0);
945   if (st != BZ_OK) {
946     return nullptr;
947   }
948 
949   _stream.next_in = (char*)input_data;
950   _stream.avail_in = static_cast<unsigned int>(input_length);
951 
952   auto output = AllocateBlock(output_len, allocator);
953 
954   _stream.next_out = (char*)output.get();
955   _stream.avail_out = static_cast<unsigned int>(output_len);
956 
957   bool done = false;
958   while (!done) {
959     st = BZ2_bzDecompress(&_stream);
960     switch (st) {
961       case BZ_STREAM_END:
962         done = true;
963         break;
964       case BZ_OK: {
965         // No output space. Increase the output space by 20%.
966         // We should never run out of output space if
967         // compress_format_version == 2
968         assert(compress_format_version != 2);
969         uint32_t old_sz = output_len;
970         output_len = output_len * 1.2;
971         auto tmp = AllocateBlock(output_len, allocator);
972         memcpy(tmp.get(), output.get(), old_sz);
973         output = std::move(tmp);
974 
975         // Set more output.
976         _stream.next_out = (char*)(output.get() + old_sz);
977         _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
978         break;
979       }
980       default:
981         BZ2_bzDecompressEnd(&_stream);
982         return nullptr;
983     }
984   }
985 
986   // If we encoded decompressed block size, we should have no bytes left
987   assert(compress_format_version != 2 || _stream.avail_out == 0);
988   *decompress_size = static_cast<int>(output_len - _stream.avail_out);
989   BZ2_bzDecompressEnd(&_stream);
990   return output;
991 #else
992   (void)input_data;
993   (void)input_length;
994   (void)decompress_size;
995   (void)compress_format_version;
996   (void)allocator;
997   return nullptr;
998 #endif
999 }
1000 
1001 // compress_format_version == 1 -- decompressed size is included in the
1002 // block header using memcpy, which makes database non-portable)
1003 // compress_format_version == 2 -- decompressed size is included in the block
1004 // header in varint32 format
1005 // @param compression_dict Data for presetting the compression library's
1006 //    dictionary.
LZ4_Compress(const CompressionInfo & info,uint32_t compress_format_version,const char * input,size_t length,::std::string * output)1007 inline bool LZ4_Compress(const CompressionInfo& info,
1008                          uint32_t compress_format_version, const char* input,
1009                          size_t length, ::std::string* output) {
1010 #ifdef LZ4
1011   if (length > std::numeric_limits<uint32_t>::max()) {
1012     // Can't compress more than 4GB
1013     return false;
1014   }
1015 
1016   size_t output_header_len = 0;
1017   if (compress_format_version == 2) {
1018     // new encoding, using varint32 to store size information
1019     output_header_len = compression::PutDecompressedSizeInfo(
1020         output, static_cast<uint32_t>(length));
1021   } else {
1022     // legacy encoding, which is not really portable (depends on big/little
1023     // endianness)
1024     output_header_len = 8;
1025     output->resize(output_header_len);
1026     char* p = const_cast<char*>(output->c_str());
1027     memcpy(p, &length, sizeof(length));
1028   }
1029   int compress_bound = LZ4_compressBound(static_cast<int>(length));
1030   output->resize(static_cast<size_t>(output_header_len + compress_bound));
1031 
1032   int outlen;
1033 #if LZ4_VERSION_NUMBER >= 10400  // r124+
1034   LZ4_stream_t* stream = LZ4_createStream();
1035   Slice compression_dict = info.dict().GetRawDict();
1036   if (compression_dict.size()) {
1037     LZ4_loadDict(stream, compression_dict.data(),
1038                  static_cast<int>(compression_dict.size()));
1039   }
1040 #if LZ4_VERSION_NUMBER >= 10700  // r129+
1041   outlen =
1042       LZ4_compress_fast_continue(stream, input, &(*output)[output_header_len],
1043                                  static_cast<int>(length), compress_bound, 1);
1044 #else  // up to r128
1045   outlen = LZ4_compress_limitedOutput_continue(
1046       stream, input, &(*output)[output_header_len], static_cast<int>(length),
1047       compress_bound);
1048 #endif
1049   LZ4_freeStream(stream);
1050 #else   // up to r123
1051   outlen = LZ4_compress_limitedOutput(input, &(*output)[output_header_len],
1052                                       static_cast<int>(length), compress_bound);
1053 #endif  // LZ4_VERSION_NUMBER >= 10400
1054 
1055   if (outlen == 0) {
1056     return false;
1057   }
1058   output->resize(static_cast<size_t>(output_header_len + outlen));
1059   return true;
1060 #else  // LZ4
1061   (void)info;
1062   (void)compress_format_version;
1063   (void)input;
1064   (void)length;
1065   (void)output;
1066   return false;
1067 #endif
1068 }
1069 
1070 // compress_format_version == 1 -- decompressed size is included in the
1071 // block header using memcpy, which makes database non-portable)
1072 // compress_format_version == 2 -- decompressed size is included in the block
1073 // header in varint32 format
1074 // @param compression_dict Data for presetting the compression library's
1075 //    dictionary.
1076 inline CacheAllocationPtr LZ4_Uncompress(const UncompressionInfo& info,
1077                                          const char* input_data,
1078                                          size_t input_length,
1079                                          int* decompress_size,
1080                                          uint32_t compress_format_version,
1081                                          MemoryAllocator* allocator = nullptr) {
1082 #ifdef LZ4
1083   uint32_t output_len = 0;
1084   if (compress_format_version == 2) {
1085     // new encoding, using varint32 to store size information
1086     if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
1087                                               &output_len)) {
1088       return nullptr;
1089     }
1090   } else {
1091     // legacy encoding, which is not really portable (depends on big/little
1092     // endianness)
1093     if (input_length < 8) {
1094       return nullptr;
1095     }
1096     memcpy(&output_len, input_data, sizeof(output_len));
1097     input_length -= 8;
1098     input_data += 8;
1099   }
1100 
1101   auto output = AllocateBlock(output_len, allocator);
1102 #if LZ4_VERSION_NUMBER >= 10400  // r124+
1103   LZ4_streamDecode_t* stream = LZ4_createStreamDecode();
1104   const Slice& compression_dict = info.dict().GetRawDict();
1105   if (compression_dict.size()) {
1106     LZ4_setStreamDecode(stream, compression_dict.data(),
1107                         static_cast<int>(compression_dict.size()));
1108   }
1109   *decompress_size = LZ4_decompress_safe_continue(
1110       stream, input_data, output.get(), static_cast<int>(input_length),
1111       static_cast<int>(output_len));
1112   LZ4_freeStreamDecode(stream);
1113 #else   // up to r123
1114   *decompress_size = LZ4_decompress_safe(input_data, output.get(),
1115                                          static_cast<int>(input_length),
1116                                          static_cast<int>(output_len));
1117 #endif  // LZ4_VERSION_NUMBER >= 10400
1118 
1119   if (*decompress_size < 0) {
1120     return nullptr;
1121   }
1122   assert(*decompress_size == static_cast<int>(output_len));
1123   return output;
1124 #else  // LZ4
1125   (void)info;
1126   (void)input_data;
1127   (void)input_length;
1128   (void)decompress_size;
1129   (void)compress_format_version;
1130   (void)allocator;
1131   return nullptr;
1132 #endif
1133 }
1134 
1135 // compress_format_version == 1 -- decompressed size is included in the
1136 // block header using memcpy, which makes database non-portable)
1137 // compress_format_version == 2 -- decompressed size is included in the block
1138 // header in varint32 format
1139 // @param compression_dict Data for presetting the compression library's
1140 //    dictionary.
LZ4HC_Compress(const CompressionInfo & info,uint32_t compress_format_version,const char * input,size_t length,::std::string * output)1141 inline bool LZ4HC_Compress(const CompressionInfo& info,
1142                            uint32_t compress_format_version, const char* input,
1143                            size_t length, ::std::string* output) {
1144 #ifdef LZ4
1145   if (length > std::numeric_limits<uint32_t>::max()) {
1146     // Can't compress more than 4GB
1147     return false;
1148   }
1149 
1150   size_t output_header_len = 0;
1151   if (compress_format_version == 2) {
1152     // new encoding, using varint32 to store size information
1153     output_header_len = compression::PutDecompressedSizeInfo(
1154         output, static_cast<uint32_t>(length));
1155   } else {
1156     // legacy encoding, which is not really portable (depends on big/little
1157     // endianness)
1158     output_header_len = 8;
1159     output->resize(output_header_len);
1160     char* p = const_cast<char*>(output->c_str());
1161     memcpy(p, &length, sizeof(length));
1162   }
1163   int compress_bound = LZ4_compressBound(static_cast<int>(length));
1164   output->resize(static_cast<size_t>(output_header_len + compress_bound));
1165 
1166   int outlen;
1167   int level;
1168   if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
1169     level = 0;  // lz4hc.h says any value < 1 will be sanitized to default
1170   } else {
1171     level = info.options().level;
1172   }
1173 #if LZ4_VERSION_NUMBER >= 10400  // r124+
1174   LZ4_streamHC_t* stream = LZ4_createStreamHC();
1175   LZ4_resetStreamHC(stream, level);
1176   Slice compression_dict = info.dict().GetRawDict();
1177   const char* compression_dict_data =
1178       compression_dict.size() > 0 ? compression_dict.data() : nullptr;
1179   size_t compression_dict_size = compression_dict.size();
1180   LZ4_loadDictHC(stream, compression_dict_data,
1181                  static_cast<int>(compression_dict_size));
1182 
1183 #if LZ4_VERSION_NUMBER >= 10700  // r129+
1184   outlen =
1185       LZ4_compress_HC_continue(stream, input, &(*output)[output_header_len],
1186                                static_cast<int>(length), compress_bound);
1187 #else   // r124-r128
1188   outlen = LZ4_compressHC_limitedOutput_continue(
1189       stream, input, &(*output)[output_header_len], static_cast<int>(length),
1190       compress_bound);
1191 #endif  // LZ4_VERSION_NUMBER >= 10700
1192   LZ4_freeStreamHC(stream);
1193 
1194 #elif LZ4_VERSION_MAJOR  // r113-r123
1195   outlen = LZ4_compressHC2_limitedOutput(input, &(*output)[output_header_len],
1196                                          static_cast<int>(length),
1197                                          compress_bound, level);
1198 #else                    // up to r112
1199   outlen =
1200       LZ4_compressHC_limitedOutput(input, &(*output)[output_header_len],
1201                                    static_cast<int>(length), compress_bound);
1202 #endif                   // LZ4_VERSION_NUMBER >= 10400
1203 
1204   if (outlen == 0) {
1205     return false;
1206   }
1207   output->resize(static_cast<size_t>(output_header_len + outlen));
1208   return true;
1209 #else  // LZ4
1210   (void)info;
1211   (void)compress_format_version;
1212   (void)input;
1213   (void)length;
1214   (void)output;
1215   return false;
1216 #endif
1217 }
1218 
1219 #ifdef XPRESS
XPRESS_Compress(const char * input,size_t length,std::string * output)1220 inline bool XPRESS_Compress(const char* input, size_t length,
1221                             std::string* output) {
1222   return port::xpress::Compress(input, length, output);
1223 }
1224 #else
XPRESS_Compress(const char *,size_t,std::string *)1225 inline bool XPRESS_Compress(const char* /*input*/, size_t /*length*/,
1226                             std::string* /*output*/) {
1227   return false;
1228 }
1229 #endif
1230 
1231 #ifdef XPRESS
XPRESS_Uncompress(const char * input_data,size_t input_length,int * decompress_size)1232 inline char* XPRESS_Uncompress(const char* input_data, size_t input_length,
1233                                int* decompress_size) {
1234   return port::xpress::Decompress(input_data, input_length, decompress_size);
1235 }
1236 #else
XPRESS_Uncompress(const char *,size_t,int *)1237 inline char* XPRESS_Uncompress(const char* /*input_data*/,
1238                                size_t /*input_length*/,
1239                                int* /*decompress_size*/) {
1240   return nullptr;
1241 }
1242 #endif
1243 
ZSTD_Compress(const CompressionInfo & info,const char * input,size_t length,::std::string * output)1244 inline bool ZSTD_Compress(const CompressionInfo& info, const char* input,
1245                           size_t length, ::std::string* output) {
1246 #ifdef ZSTD
1247   if (length > std::numeric_limits<uint32_t>::max()) {
1248     // Can't compress more than 4GB
1249     return false;
1250   }
1251 
1252   size_t output_header_len = compression::PutDecompressedSizeInfo(
1253       output, static_cast<uint32_t>(length));
1254 
1255   size_t compressBound = ZSTD_compressBound(length);
1256   output->resize(static_cast<size_t>(output_header_len + compressBound));
1257   size_t outlen = 0;
1258   int level;
1259   if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
1260     // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
1261     // https://github.com/facebook/zstd/issues/1148
1262     level = 3;
1263   } else {
1264     level = info.options().level;
1265   }
1266 #if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
1267   ZSTD_CCtx* context = info.context().ZSTDPreallocCtx();
1268   assert(context != nullptr);
1269 #if ZSTD_VERSION_NUMBER >= 700  // v0.7.0+
1270   if (info.dict().GetDigestedZstdCDict() != nullptr) {
1271     outlen = ZSTD_compress_usingCDict(context, &(*output)[output_header_len],
1272                                       compressBound, input, length,
1273                                       info.dict().GetDigestedZstdCDict());
1274   }
1275 #endif  // ZSTD_VERSION_NUMBER >= 700
1276   if (outlen == 0) {
1277     outlen = ZSTD_compress_usingDict(context, &(*output)[output_header_len],
1278                                      compressBound, input, length,
1279                                      info.dict().GetRawDict().data(),
1280                                      info.dict().GetRawDict().size(), level);
1281   }
1282 #else   // up to v0.4.x
1283   outlen = ZSTD_compress(&(*output)[output_header_len], compressBound, input,
1284                          length, level);
1285 #endif  // ZSTD_VERSION_NUMBER >= 500
1286   if (outlen == 0) {
1287     return false;
1288   }
1289   output->resize(output_header_len + outlen);
1290   return true;
1291 #else  // ZSTD
1292   (void)info;
1293   (void)input;
1294   (void)length;
1295   (void)output;
1296   return false;
1297 #endif
1298 }
1299 
1300 // @param compression_dict Data for presetting the compression library's
1301 //    dictionary.
1302 inline CacheAllocationPtr ZSTD_Uncompress(
1303     const UncompressionInfo& info, const char* input_data, size_t input_length,
1304     int* decompress_size, MemoryAllocator* allocator = nullptr) {
1305 #ifdef ZSTD
1306   uint32_t output_len = 0;
1307   if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
1308                                             &output_len)) {
1309     return nullptr;
1310   }
1311 
1312   auto output = AllocateBlock(output_len, allocator);
1313   size_t actual_output_length = 0;
1314 #if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
1315   ZSTD_DCtx* context = info.context().GetZSTDContext();
1316   assert(context != nullptr);
1317 #ifdef ROCKSDB_ZSTD_DDICT
1318   if (info.dict().GetDigestedZstdDDict() != nullptr) {
1319     actual_output_length = ZSTD_decompress_usingDDict(
1320         context, output.get(), output_len, input_data, input_length,
1321         info.dict().GetDigestedZstdDDict());
1322   }
1323 #endif  // ROCKSDB_ZSTD_DDICT
1324   if (actual_output_length == 0) {
1325     actual_output_length = ZSTD_decompress_usingDict(
1326         context, output.get(), output_len, input_data, input_length,
1327         info.dict().GetRawDict().data(), info.dict().GetRawDict().size());
1328   }
1329 #else   // up to v0.4.x
1330   (void)info;
1331   actual_output_length =
1332       ZSTD_decompress(output.get(), output_len, input_data, input_length);
1333 #endif  // ZSTD_VERSION_NUMBER >= 500
1334   assert(actual_output_length == output_len);
1335   *decompress_size = static_cast<int>(actual_output_length);
1336   return output;
1337 #else  // ZSTD
1338   (void)info;
1339   (void)input_data;
1340   (void)input_length;
1341   (void)decompress_size;
1342   (void)allocator;
1343   return nullptr;
1344 #endif
1345 }
1346 
ZSTD_TrainDictionarySupported()1347 inline bool ZSTD_TrainDictionarySupported() {
1348 #ifdef ZSTD
1349   // Dictionary trainer is available since v0.6.1 for static linking, but not
1350   // available for dynamic linking until v1.1.3. For now we enable the feature
1351   // in v1.1.3+ only.
1352   return (ZSTD_versionNumber() >= 10103);
1353 #else
1354   return false;
1355 #endif
1356 }
1357 
ZSTD_TrainDictionary(const std::string & samples,const std::vector<size_t> & sample_lens,size_t max_dict_bytes)1358 inline std::string ZSTD_TrainDictionary(const std::string& samples,
1359                                         const std::vector<size_t>& sample_lens,
1360                                         size_t max_dict_bytes) {
1361   // Dictionary trainer is available since v0.6.1 for static linking, but not
1362   // available for dynamic linking until v1.1.3. For now we enable the feature
1363   // in v1.1.3+ only.
1364 #if ZSTD_VERSION_NUMBER >= 10103  // v1.1.3+
1365   assert(samples.empty() == sample_lens.empty());
1366   if (samples.empty()) {
1367     return "";
1368   }
1369   std::string dict_data(max_dict_bytes, '\0');
1370   size_t dict_len = ZDICT_trainFromBuffer(
1371       &dict_data[0], max_dict_bytes, &samples[0], &sample_lens[0],
1372       static_cast<unsigned>(sample_lens.size()));
1373   if (ZDICT_isError(dict_len)) {
1374     return "";
1375   }
1376   assert(dict_len <= max_dict_bytes);
1377   dict_data.resize(dict_len);
1378   return dict_data;
1379 #else   // up to v1.1.2
1380   assert(false);
1381   (void)samples;
1382   (void)sample_lens;
1383   (void)max_dict_bytes;
1384   return "";
1385 #endif  // ZSTD_VERSION_NUMBER >= 10103
1386 }
1387 
ZSTD_TrainDictionary(const std::string & samples,size_t sample_len_shift,size_t max_dict_bytes)1388 inline std::string ZSTD_TrainDictionary(const std::string& samples,
1389                                         size_t sample_len_shift,
1390                                         size_t max_dict_bytes) {
1391   // Dictionary trainer is available since v0.6.1, but ZSTD was marked stable
1392   // only since v0.8.0. For now we enable the feature in stable versions only.
1393 #if ZSTD_VERSION_NUMBER >= 10103  // v1.1.3+
1394   // skips potential partial sample at the end of "samples"
1395   size_t num_samples = samples.size() >> sample_len_shift;
1396   std::vector<size_t> sample_lens(num_samples, size_t(1) << sample_len_shift);
1397   return ZSTD_TrainDictionary(samples, sample_lens, max_dict_bytes);
1398 #else   // up to v1.1.2
1399   assert(false);
1400   (void)samples;
1401   (void)sample_len_shift;
1402   (void)max_dict_bytes;
1403   return "";
1404 #endif  // ZSTD_VERSION_NUMBER >= 10103
1405 }
1406 
1407 }  // namespace ROCKSDB_NAMESPACE
1408