1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 //
10 #pragma once
11
12 #include <algorithm>
13 #include <limits>
14 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
15 #ifdef OS_FREEBSD
16 #include <malloc_np.h>
17 #else // OS_FREEBSD
18 #include <malloc.h>
19 #endif // OS_FREEBSD
20 #endif // ROCKSDB_MALLOC_USABLE_SIZE
21 #include <string>
22
23 #include "memory/memory_allocator.h"
24 #include "rocksdb/options.h"
25 #include "rocksdb/table.h"
26 #include "util/coding.h"
27 #include "util/compression_context_cache.h"
28 #include "util/string_util.h"
29
30 #ifdef SNAPPY
31 #include <snappy.h>
32 #endif
33
34 #ifdef ZLIB
35 #include <zlib.h>
36 #endif
37
38 #ifdef BZIP2
39 #include <bzlib.h>
40 #endif
41
42 #if defined(LZ4)
43 #include <lz4.h>
44 #include <lz4hc.h>
45 #endif
46
47 #if defined(ZSTD)
48 #include <zstd.h>
49 #if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+
50 #include <zdict.h>
51 #endif // ZSTD_VERSION_NUMBER >= 10103
52 namespace ROCKSDB_NAMESPACE {
53 // Need this for the context allocation override
54 // On windows we need to do this explicitly
55 #if (ZSTD_VERSION_NUMBER >= 500)
56 #if defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) && \
57 defined(ZSTD_STATIC_LINKING_ONLY)
58 #define ROCKSDB_ZSTD_CUSTOM_MEM
59 namespace port {
60 ZSTD_customMem GetJeZstdAllocationOverrides();
61 } // namespace port
62 #endif // defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) &&
63 // defined(ZSTD_STATIC_LINKING_ONLY)
64
65 // We require `ZSTD_sizeof_DDict` and `ZSTD_createDDict_byReference` to use
66 // `ZSTD_DDict`. The former was introduced in v1.0.0 and the latter was
67 // introduced in v1.1.3. But an important bug fix for `ZSTD_sizeof_DDict` came
68 // in v1.1.4, so that is the version we require. As of today's latest version
69 // (v1.3.8), they are both still in the experimental API, which means they are
70 // only exported when the compiler flag `ZSTD_STATIC_LINKING_ONLY` is set.
71 #if defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
72 #define ROCKSDB_ZSTD_DDICT
73 #endif // defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
74
75 // Cached data represents a portion that can be re-used
76 // If, in the future we have more than one native context to
77 // cache we can arrange this as a tuple
78 class ZSTDUncompressCachedData {
79 public:
80 using ZSTDNativeContext = ZSTD_DCtx*;
ZSTDUncompressCachedData()81 ZSTDUncompressCachedData() {}
82 // Init from cache
83 ZSTDUncompressCachedData(const ZSTDUncompressCachedData& o) = delete;
84 ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
ZSTDUncompressCachedData(ZSTDUncompressCachedData && o)85 ZSTDUncompressCachedData(ZSTDUncompressCachedData&& o) ROCKSDB_NOEXCEPT
86 : ZSTDUncompressCachedData() {
87 *this = std::move(o);
88 }
89 ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&& o)
90 ROCKSDB_NOEXCEPT {
91 assert(zstd_ctx_ == nullptr);
92 std::swap(zstd_ctx_, o.zstd_ctx_);
93 std::swap(cache_idx_, o.cache_idx_);
94 return *this;
95 }
Get()96 ZSTDNativeContext Get() const { return zstd_ctx_; }
GetCacheIndex()97 int64_t GetCacheIndex() const { return cache_idx_; }
CreateIfNeeded()98 void CreateIfNeeded() {
99 if (zstd_ctx_ == nullptr) {
100 #ifdef ROCKSDB_ZSTD_CUSTOM_MEM
101 zstd_ctx_ =
102 ZSTD_createDCtx_advanced(port::GetJeZstdAllocationOverrides());
103 #else // ROCKSDB_ZSTD_CUSTOM_MEM
104 zstd_ctx_ = ZSTD_createDCtx();
105 #endif // ROCKSDB_ZSTD_CUSTOM_MEM
106 cache_idx_ = -1;
107 }
108 }
InitFromCache(const ZSTDUncompressCachedData & o,int64_t idx)109 void InitFromCache(const ZSTDUncompressCachedData& o, int64_t idx) {
110 zstd_ctx_ = o.zstd_ctx_;
111 cache_idx_ = idx;
112 }
~ZSTDUncompressCachedData()113 ~ZSTDUncompressCachedData() {
114 if (zstd_ctx_ != nullptr && cache_idx_ == -1) {
115 ZSTD_freeDCtx(zstd_ctx_);
116 }
117 }
118
119 private:
120 ZSTDNativeContext zstd_ctx_ = nullptr;
121 int64_t cache_idx_ = -1; // -1 means this instance owns the context
122 };
123 #endif // (ZSTD_VERSION_NUMBER >= 500)
124 } // namespace ROCKSDB_NAMESPACE
125 #endif // ZSTD
126
127 #if !(defined ZSTD) || !(ZSTD_VERSION_NUMBER >= 500)
128 namespace ROCKSDB_NAMESPACE {
129 class ZSTDUncompressCachedData {
130 void* padding; // unused
131 public:
132 using ZSTDNativeContext = void*;
ZSTDUncompressCachedData()133 ZSTDUncompressCachedData() {}
ZSTDUncompressCachedData(const ZSTDUncompressCachedData &)134 ZSTDUncompressCachedData(const ZSTDUncompressCachedData&) {}
135 ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
136 ZSTDUncompressCachedData(ZSTDUncompressCachedData&&)
137 ROCKSDB_NOEXCEPT = default;
138 ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&&)
139 ROCKSDB_NOEXCEPT = default;
Get()140 ZSTDNativeContext Get() const { return nullptr; }
GetCacheIndex()141 int64_t GetCacheIndex() const { return -1; }
CreateIfNeeded()142 void CreateIfNeeded() {}
InitFromCache(const ZSTDUncompressCachedData &,int64_t)143 void InitFromCache(const ZSTDUncompressCachedData&, int64_t) {}
144 private:
ignore_padding__()145 void ignore_padding__() { padding = nullptr; }
146 };
147 } // namespace ROCKSDB_NAMESPACE
148 #endif
149
150 #if defined(XPRESS)
151 #include "port/xpress.h"
152 #endif
153
154 namespace ROCKSDB_NAMESPACE {
155
156 // Holds dictionary and related data, like ZSTD's digested compression
157 // dictionary.
158 struct CompressionDict {
159 #if ZSTD_VERSION_NUMBER >= 700
160 ZSTD_CDict* zstd_cdict_ = nullptr;
161 #endif // ZSTD_VERSION_NUMBER >= 700
162 std::string dict_;
163
164 public:
165 #if ZSTD_VERSION_NUMBER >= 700
CompressionDictCompressionDict166 CompressionDict(std::string dict, CompressionType type, int level) {
167 #else // ZSTD_VERSION_NUMBER >= 700
168 CompressionDict(std::string dict, CompressionType /*type*/, int /*level*/) {
169 #endif // ZSTD_VERSION_NUMBER >= 700
170 dict_ = std::move(dict);
171 #if ZSTD_VERSION_NUMBER >= 700
172 zstd_cdict_ = nullptr;
173 if (!dict_.empty() && (type == kZSTD || type == kZSTDNotFinalCompression)) {
174 if (level == CompressionOptions::kDefaultCompressionLevel) {
175 // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
176 // https://github.com/facebook/zstd/issues/1148
177 level = 3;
178 }
179 // Should be safe (but slower) if below call fails as we'll use the
180 // raw dictionary to compress.
181 zstd_cdict_ = ZSTD_createCDict(dict_.data(), dict_.size(), level);
182 assert(zstd_cdict_ != nullptr);
183 }
184 #endif // ZSTD_VERSION_NUMBER >= 700
185 }
186
187 ~CompressionDict() {
188 #if ZSTD_VERSION_NUMBER >= 700
189 size_t res = 0;
190 if (zstd_cdict_ != nullptr) {
191 res = ZSTD_freeCDict(zstd_cdict_);
192 }
193 assert(res == 0); // Last I checked they can't fail
194 (void)res; // prevent unused var warning
195 #endif // ZSTD_VERSION_NUMBER >= 700
196 }
197
198 #if ZSTD_VERSION_NUMBER >= 700
199 const ZSTD_CDict* GetDigestedZstdCDict() const { return zstd_cdict_; }
200 #endif // ZSTD_VERSION_NUMBER >= 700
201
202 Slice GetRawDict() const { return dict_; }
203
204 static const CompressionDict& GetEmptyDict() {
205 static CompressionDict empty_dict{};
206 return empty_dict;
207 }
208
209 CompressionDict() = default;
210 // Disable copy/move
211 CompressionDict(const CompressionDict&) = delete;
212 CompressionDict& operator=(const CompressionDict&) = delete;
213 CompressionDict(CompressionDict&&) = delete;
214 CompressionDict& operator=(CompressionDict&&) = delete;
215 };
216
217 // Holds dictionary and related data, like ZSTD's digested uncompression
218 // dictionary.
219 struct UncompressionDict {
220 // Block containing the data for the compression dictionary in case the
221 // constructor that takes a string parameter is used.
222 std::string dict_;
223
224 // Block containing the data for the compression dictionary in case the
225 // constructor that takes a Slice parameter is used and the passed in
226 // CacheAllocationPtr is not nullptr.
227 CacheAllocationPtr allocation_;
228
229 // Slice pointing to the compression dictionary data. Can point to
230 // dict_, allocation_, or some other memory location, depending on how
231 // the object was constructed.
232 Slice slice_;
233
234 #ifdef ROCKSDB_ZSTD_DDICT
235 // Processed version of the contents of slice_ for ZSTD compression.
236 ZSTD_DDict* zstd_ddict_ = nullptr;
237 #endif // ROCKSDB_ZSTD_DDICT
238
239 #ifdef ROCKSDB_ZSTD_DDICT
UncompressionDictUncompressionDict240 UncompressionDict(std::string dict, bool using_zstd)
241 #else // ROCKSDB_ZSTD_DDICT
242 UncompressionDict(std::string dict, bool /* using_zstd */)
243 #endif // ROCKSDB_ZSTD_DDICT
244 : dict_(std::move(dict)), slice_(dict_) {
245 #ifdef ROCKSDB_ZSTD_DDICT
246 if (!slice_.empty() && using_zstd) {
247 zstd_ddict_ = ZSTD_createDDict_byReference(slice_.data(), slice_.size());
248 assert(zstd_ddict_ != nullptr);
249 }
250 #endif // ROCKSDB_ZSTD_DDICT
251 }
252
253 #ifdef ROCKSDB_ZSTD_DDICT
UncompressionDictUncompressionDict254 UncompressionDict(Slice slice, CacheAllocationPtr&& allocation,
255 bool using_zstd)
256 #else // ROCKSDB_ZSTD_DDICT
257 UncompressionDict(Slice slice, CacheAllocationPtr&& allocation,
258 bool /* using_zstd */)
259 #endif // ROCKSDB_ZSTD_DDICT
260 : allocation_(std::move(allocation)), slice_(std::move(slice)) {
261 #ifdef ROCKSDB_ZSTD_DDICT
262 if (!slice_.empty() && using_zstd) {
263 zstd_ddict_ = ZSTD_createDDict_byReference(slice_.data(), slice_.size());
264 assert(zstd_ddict_ != nullptr);
265 }
266 #endif // ROCKSDB_ZSTD_DDICT
267 }
268
UncompressionDictUncompressionDict269 UncompressionDict(UncompressionDict&& rhs)
270 : dict_(std::move(rhs.dict_)),
271 allocation_(std::move(rhs.allocation_)),
272 slice_(std::move(rhs.slice_))
273 #ifdef ROCKSDB_ZSTD_DDICT
274 ,
275 zstd_ddict_(rhs.zstd_ddict_)
276 #endif
277 {
278 #ifdef ROCKSDB_ZSTD_DDICT
279 rhs.zstd_ddict_ = nullptr;
280 #endif
281 }
282
~UncompressionDictUncompressionDict283 ~UncompressionDict() {
284 #ifdef ROCKSDB_ZSTD_DDICT
285 size_t res = 0;
286 if (zstd_ddict_ != nullptr) {
287 res = ZSTD_freeDDict(zstd_ddict_);
288 }
289 assert(res == 0); // Last I checked they can't fail
290 (void)res; // prevent unused var warning
291 #endif // ROCKSDB_ZSTD_DDICT
292 }
293
294 UncompressionDict& operator=(UncompressionDict&& rhs) {
295 if (this == &rhs) {
296 return *this;
297 }
298
299 dict_ = std::move(rhs.dict_);
300 allocation_ = std::move(rhs.allocation_);
301 slice_ = std::move(rhs.slice_);
302
303 #ifdef ROCKSDB_ZSTD_DDICT
304 zstd_ddict_ = rhs.zstd_ddict_;
305 rhs.zstd_ddict_ = nullptr;
306 #endif
307
308 return *this;
309 }
310
311 // The object is self-contained if the string constructor is used, or the
312 // Slice constructor is invoked with a non-null allocation. Otherwise, it
313 // is the caller's responsibility to ensure that the underlying storage
314 // outlives this object.
own_bytesUncompressionDict315 bool own_bytes() const { return !dict_.empty() || allocation_; }
316
GetRawDictUncompressionDict317 const Slice& GetRawDict() const { return slice_; }
318
319 #ifdef ROCKSDB_ZSTD_DDICT
GetDigestedZstdDDictUncompressionDict320 const ZSTD_DDict* GetDigestedZstdDDict() const { return zstd_ddict_; }
321 #endif // ROCKSDB_ZSTD_DDICT
322
GetEmptyDictUncompressionDict323 static const UncompressionDict& GetEmptyDict() {
324 static UncompressionDict empty_dict{};
325 return empty_dict;
326 }
327
ApproximateMemoryUsageUncompressionDict328 size_t ApproximateMemoryUsage() const {
329 size_t usage = sizeof(struct UncompressionDict);
330 usage += dict_.size();
331 if (allocation_) {
332 auto allocator = allocation_.get_deleter().allocator;
333 if (allocator) {
334 usage += allocator->UsableSize(allocation_.get(), slice_.size());
335 } else {
336 usage += slice_.size();
337 }
338 }
339 #ifdef ROCKSDB_ZSTD_DDICT
340 usage += ZSTD_sizeof_DDict(zstd_ddict_);
341 #endif // ROCKSDB_ZSTD_DDICT
342 return usage;
343 }
344
345 UncompressionDict() = default;
346 // Disable copy
347 UncompressionDict(const CompressionDict&) = delete;
348 UncompressionDict& operator=(const CompressionDict&) = delete;
349 };
350
351 class CompressionContext {
352 private:
353 #if defined(ZSTD) && (ZSTD_VERSION_NUMBER >= 500)
354 ZSTD_CCtx* zstd_ctx_ = nullptr;
CreateNativeContext(CompressionType type)355 void CreateNativeContext(CompressionType type) {
356 if (type == kZSTD || type == kZSTDNotFinalCompression) {
357 #ifdef ROCKSDB_ZSTD_CUSTOM_MEM
358 zstd_ctx_ =
359 ZSTD_createCCtx_advanced(port::GetJeZstdAllocationOverrides());
360 #else // ROCKSDB_ZSTD_CUSTOM_MEM
361 zstd_ctx_ = ZSTD_createCCtx();
362 #endif // ROCKSDB_ZSTD_CUSTOM_MEM
363 }
364 }
DestroyNativeContext()365 void DestroyNativeContext() {
366 if (zstd_ctx_ != nullptr) {
367 ZSTD_freeCCtx(zstd_ctx_);
368 }
369 }
370
371 public:
372 // callable inside ZSTD_Compress
ZSTDPreallocCtx()373 ZSTD_CCtx* ZSTDPreallocCtx() const {
374 assert(zstd_ctx_ != nullptr);
375 return zstd_ctx_;
376 }
377
378 #else // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
379 private:
380 void CreateNativeContext(CompressionType /* type */) {}
381 void DestroyNativeContext() {}
382 #endif // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
383 public:
CompressionContext(CompressionType type)384 explicit CompressionContext(CompressionType type) {
385 CreateNativeContext(type);
386 }
~CompressionContext()387 ~CompressionContext() { DestroyNativeContext(); }
388 CompressionContext(const CompressionContext&) = delete;
389 CompressionContext& operator=(const CompressionContext&) = delete;
390 };
391
392 class CompressionInfo {
393 const CompressionOptions& opts_;
394 const CompressionContext& context_;
395 const CompressionDict& dict_;
396 const CompressionType type_;
397 const uint64_t sample_for_compression_;
398
399 public:
CompressionInfo(const CompressionOptions & _opts,const CompressionContext & _context,const CompressionDict & _dict,CompressionType _type,uint64_t _sample_for_compression)400 CompressionInfo(const CompressionOptions& _opts,
401 const CompressionContext& _context,
402 const CompressionDict& _dict, CompressionType _type,
403 uint64_t _sample_for_compression)
404 : opts_(_opts),
405 context_(_context),
406 dict_(_dict),
407 type_(_type),
408 sample_for_compression_(_sample_for_compression) {}
409
options()410 const CompressionOptions& options() const { return opts_; }
context()411 const CompressionContext& context() const { return context_; }
dict()412 const CompressionDict& dict() const { return dict_; }
type()413 CompressionType type() const { return type_; }
SampleForCompression()414 uint64_t SampleForCompression() const { return sample_for_compression_; }
415 };
416
417 class UncompressionContext {
418 private:
419 CompressionContextCache* ctx_cache_ = nullptr;
420 ZSTDUncompressCachedData uncomp_cached_data_;
421
422 public:
423 struct NoCache {};
424 // Do not use context cache, used by TableBuilder
UncompressionContext(NoCache,CompressionType)425 UncompressionContext(NoCache, CompressionType /* type */) {}
426
UncompressionContext(CompressionType type)427 explicit UncompressionContext(CompressionType type) {
428 if (type == kZSTD || type == kZSTDNotFinalCompression) {
429 ctx_cache_ = CompressionContextCache::Instance();
430 uncomp_cached_data_ = ctx_cache_->GetCachedZSTDUncompressData();
431 }
432 }
~UncompressionContext()433 ~UncompressionContext() {
434 if (uncomp_cached_data_.GetCacheIndex() != -1) {
435 assert(ctx_cache_ != nullptr);
436 ctx_cache_->ReturnCachedZSTDUncompressData(
437 uncomp_cached_data_.GetCacheIndex());
438 }
439 }
440 UncompressionContext(const UncompressionContext&) = delete;
441 UncompressionContext& operator=(const UncompressionContext&) = delete;
442
GetZSTDContext()443 ZSTDUncompressCachedData::ZSTDNativeContext GetZSTDContext() const {
444 return uncomp_cached_data_.Get();
445 }
446 };
447
448 class UncompressionInfo {
449 const UncompressionContext& context_;
450 const UncompressionDict& dict_;
451 const CompressionType type_;
452
453 public:
UncompressionInfo(const UncompressionContext & _context,const UncompressionDict & _dict,CompressionType _type)454 UncompressionInfo(const UncompressionContext& _context,
455 const UncompressionDict& _dict, CompressionType _type)
456 : context_(_context), dict_(_dict), type_(_type) {}
457
context()458 const UncompressionContext& context() const { return context_; }
dict()459 const UncompressionDict& dict() const { return dict_; }
type()460 CompressionType type() const { return type_; }
461 };
462
Snappy_Supported()463 inline bool Snappy_Supported() {
464 #ifdef SNAPPY
465 return true;
466 #else
467 return false;
468 #endif
469 }
470
Zlib_Supported()471 inline bool Zlib_Supported() {
472 #ifdef ZLIB
473 return true;
474 #else
475 return false;
476 #endif
477 }
478
BZip2_Supported()479 inline bool BZip2_Supported() {
480 #ifdef BZIP2
481 return true;
482 #else
483 return false;
484 #endif
485 }
486
LZ4_Supported()487 inline bool LZ4_Supported() {
488 #ifdef LZ4
489 return true;
490 #else
491 return false;
492 #endif
493 }
494
XPRESS_Supported()495 inline bool XPRESS_Supported() {
496 #ifdef XPRESS
497 return true;
498 #else
499 return false;
500 #endif
501 }
502
ZSTD_Supported()503 inline bool ZSTD_Supported() {
504 #ifdef ZSTD
505 // ZSTD format is finalized since version 0.8.0.
506 return (ZSTD_versionNumber() >= 800);
507 #else
508 return false;
509 #endif
510 }
511
ZSTDNotFinal_Supported()512 inline bool ZSTDNotFinal_Supported() {
513 #ifdef ZSTD
514 return true;
515 #else
516 return false;
517 #endif
518 }
519
CompressionTypeSupported(CompressionType compression_type)520 inline bool CompressionTypeSupported(CompressionType compression_type) {
521 switch (compression_type) {
522 case kNoCompression:
523 return true;
524 case kSnappyCompression:
525 return Snappy_Supported();
526 case kZlibCompression:
527 return Zlib_Supported();
528 case kBZip2Compression:
529 return BZip2_Supported();
530 case kLZ4Compression:
531 return LZ4_Supported();
532 case kLZ4HCCompression:
533 return LZ4_Supported();
534 case kXpressCompression:
535 return XPRESS_Supported();
536 case kZSTDNotFinalCompression:
537 return ZSTDNotFinal_Supported();
538 case kZSTD:
539 return ZSTD_Supported();
540 default:
541 assert(false);
542 return false;
543 }
544 }
545
CompressionTypeToString(CompressionType compression_type)546 inline std::string CompressionTypeToString(CompressionType compression_type) {
547 switch (compression_type) {
548 case kNoCompression:
549 return "NoCompression";
550 case kSnappyCompression:
551 return "Snappy";
552 case kZlibCompression:
553 return "Zlib";
554 case kBZip2Compression:
555 return "BZip2";
556 case kLZ4Compression:
557 return "LZ4";
558 case kLZ4HCCompression:
559 return "LZ4HC";
560 case kXpressCompression:
561 return "Xpress";
562 case kZSTD:
563 return "ZSTD";
564 case kZSTDNotFinalCompression:
565 return "ZSTDNotFinal";
566 case kDisableCompressionOption:
567 return "DisableOption";
568 default:
569 assert(false);
570 return "";
571 }
572 }
573
CompressionOptionsToString(CompressionOptions & compression_options)574 inline std::string CompressionOptionsToString(
575 CompressionOptions& compression_options) {
576 std::string result;
577 result.reserve(512);
578 result.append("window_bits=")
579 .append(ToString(compression_options.window_bits))
580 .append("; ");
581 result.append("level=")
582 .append(ToString(compression_options.level))
583 .append("; ");
584 result.append("strategy=")
585 .append(ToString(compression_options.strategy))
586 .append("; ");
587 result.append("max_dict_bytes=")
588 .append(ToString(compression_options.max_dict_bytes))
589 .append("; ");
590 result.append("zstd_max_train_bytes=")
591 .append(ToString(compression_options.zstd_max_train_bytes))
592 .append("; ");
593 result.append("enabled=")
594 .append(ToString(compression_options.enabled))
595 .append("; ");
596 return result;
597 }
598
599 // compress_format_version can have two values:
600 // 1 -- decompressed sizes for BZip2 and Zlib are not included in the compressed
601 // block. Also, decompressed sizes for LZ4 are encoded in platform-dependent
602 // way.
603 // 2 -- Zlib, BZip2 and LZ4 encode decompressed size as Varint32 just before the
604 // start of compressed block. Snappy format is the same as version 1.
605
Snappy_Compress(const CompressionInfo &,const char * input,size_t length,::std::string * output)606 inline bool Snappy_Compress(const CompressionInfo& /*info*/, const char* input,
607 size_t length, ::std::string* output) {
608 #ifdef SNAPPY
609 output->resize(snappy::MaxCompressedLength(length));
610 size_t outlen;
611 snappy::RawCompress(input, length, &(*output)[0], &outlen);
612 output->resize(outlen);
613 return true;
614 #else
615 (void)input;
616 (void)length;
617 (void)output;
618 return false;
619 #endif
620 }
621
Snappy_GetUncompressedLength(const char * input,size_t length,size_t * result)622 inline bool Snappy_GetUncompressedLength(const char* input, size_t length,
623 size_t* result) {
624 #ifdef SNAPPY
625 return snappy::GetUncompressedLength(input, length, result);
626 #else
627 (void)input;
628 (void)length;
629 (void)result;
630 return false;
631 #endif
632 }
633
Snappy_Uncompress(const char * input,size_t length,char * output)634 inline bool Snappy_Uncompress(const char* input, size_t length, char* output) {
635 #ifdef SNAPPY
636 return snappy::RawUncompress(input, length, output);
637 #else
638 (void)input;
639 (void)length;
640 (void)output;
641 return false;
642 #endif
643 }
644
645 namespace compression {
646 // returns size
PutDecompressedSizeInfo(std::string * output,uint32_t length)647 inline size_t PutDecompressedSizeInfo(std::string* output, uint32_t length) {
648 PutVarint32(output, length);
649 return output->size();
650 }
651
GetDecompressedSizeInfo(const char ** input_data,size_t * input_length,uint32_t * output_len)652 inline bool GetDecompressedSizeInfo(const char** input_data,
653 size_t* input_length,
654 uint32_t* output_len) {
655 auto new_input_data =
656 GetVarint32Ptr(*input_data, *input_data + *input_length, output_len);
657 if (new_input_data == nullptr) {
658 return false;
659 }
660 *input_length -= (new_input_data - *input_data);
661 *input_data = new_input_data;
662 return true;
663 }
664 } // namespace compression
665
666 // compress_format_version == 1 -- decompressed size is not included in the
667 // block header
668 // compress_format_version == 2 -- decompressed size is included in the block
669 // header in varint32 format
670 // @param compression_dict Data for presetting the compression library's
671 // dictionary.
Zlib_Compress(const CompressionInfo & info,uint32_t compress_format_version,const char * input,size_t length,::std::string * output)672 inline bool Zlib_Compress(const CompressionInfo& info,
673 uint32_t compress_format_version, const char* input,
674 size_t length, ::std::string* output) {
675 #ifdef ZLIB
676 if (length > std::numeric_limits<uint32_t>::max()) {
677 // Can't compress more than 4GB
678 return false;
679 }
680
681 size_t output_header_len = 0;
682 if (compress_format_version == 2) {
683 output_header_len = compression::PutDecompressedSizeInfo(
684 output, static_cast<uint32_t>(length));
685 }
686 // Resize output to be the plain data length.
687 // This may not be big enough if the compression actually expands data.
688 output->resize(output_header_len + length);
689
690 // The memLevel parameter specifies how much memory should be allocated for
691 // the internal compression state.
692 // memLevel=1 uses minimum memory but is slow and reduces compression ratio.
693 // memLevel=9 uses maximum memory for optimal speed.
694 // The default value is 8. See zconf.h for more details.
695 static const int memLevel = 8;
696 int level;
697 if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
698 level = Z_DEFAULT_COMPRESSION;
699 } else {
700 level = info.options().level;
701 }
702 z_stream _stream;
703 memset(&_stream, 0, sizeof(z_stream));
704 int st = deflateInit2(&_stream, level, Z_DEFLATED, info.options().window_bits,
705 memLevel, info.options().strategy);
706 if (st != Z_OK) {
707 return false;
708 }
709
710 Slice compression_dict = info.dict().GetRawDict();
711 if (compression_dict.size()) {
712 // Initialize the compression library's dictionary
713 st = deflateSetDictionary(
714 &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
715 static_cast<unsigned int>(compression_dict.size()));
716 if (st != Z_OK) {
717 deflateEnd(&_stream);
718 return false;
719 }
720 }
721
722 // Compress the input, and put compressed data in output.
723 _stream.next_in = (Bytef*)input;
724 _stream.avail_in = static_cast<unsigned int>(length);
725
726 // Initialize the output size.
727 _stream.avail_out = static_cast<unsigned int>(length);
728 _stream.next_out = reinterpret_cast<Bytef*>(&(*output)[output_header_len]);
729
730 bool compressed = false;
731 st = deflate(&_stream, Z_FINISH);
732 if (st == Z_STREAM_END) {
733 compressed = true;
734 output->resize(output->size() - _stream.avail_out);
735 }
736 // The only return value we really care about is Z_STREAM_END.
737 // Z_OK means insufficient output space. This means the compression is
738 // bigger than decompressed size. Just fail the compression in that case.
739
740 deflateEnd(&_stream);
741 return compressed;
742 #else
743 (void)info;
744 (void)compress_format_version;
745 (void)input;
746 (void)length;
747 (void)output;
748 return false;
749 #endif
750 }
751
752 // compress_format_version == 1 -- decompressed size is not included in the
753 // block header
754 // compress_format_version == 2 -- decompressed size is included in the block
755 // header in varint32 format
756 // @param compression_dict Data for presetting the compression library's
757 // dictionary.
758 inline CacheAllocationPtr Zlib_Uncompress(
759 const UncompressionInfo& info, const char* input_data, size_t input_length,
760 int* decompress_size, uint32_t compress_format_version,
761 MemoryAllocator* allocator = nullptr, int windowBits = -14) {
762 #ifdef ZLIB
763 uint32_t output_len = 0;
764 if (compress_format_version == 2) {
765 if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
766 &output_len)) {
767 return nullptr;
768 }
769 } else {
770 // Assume the decompressed data size will 5x of compressed size, but round
771 // to the page size
772 size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
773 output_len = static_cast<uint32_t>(
774 std::min(proposed_output_len,
775 static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
776 }
777
778 z_stream _stream;
779 memset(&_stream, 0, sizeof(z_stream));
780
781 // For raw inflate, the windowBits should be -8..-15.
782 // If windowBits is bigger than zero, it will use either zlib
783 // header or gzip header. Adding 32 to it will do automatic detection.
784 int st =
785 inflateInit2(&_stream, windowBits > 0 ? windowBits + 32 : windowBits);
786 if (st != Z_OK) {
787 return nullptr;
788 }
789
790 const Slice& compression_dict = info.dict().GetRawDict();
791 if (compression_dict.size()) {
792 // Initialize the compression library's dictionary
793 st = inflateSetDictionary(
794 &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
795 static_cast<unsigned int>(compression_dict.size()));
796 if (st != Z_OK) {
797 return nullptr;
798 }
799 }
800
801 _stream.next_in = (Bytef*)input_data;
802 _stream.avail_in = static_cast<unsigned int>(input_length);
803
804 auto output = AllocateBlock(output_len, allocator);
805
806 _stream.next_out = (Bytef*)output.get();
807 _stream.avail_out = static_cast<unsigned int>(output_len);
808
809 bool done = false;
810 while (!done) {
811 st = inflate(&_stream, Z_SYNC_FLUSH);
812 switch (st) {
813 case Z_STREAM_END:
814 done = true;
815 break;
816 case Z_OK: {
817 // No output space. Increase the output space by 20%.
818 // We should never run out of output space if
819 // compress_format_version == 2
820 assert(compress_format_version != 2);
821 size_t old_sz = output_len;
822 uint32_t output_len_delta = output_len / 5;
823 output_len += output_len_delta < 10 ? 10 : output_len_delta;
824 auto tmp = AllocateBlock(output_len, allocator);
825 memcpy(tmp.get(), output.get(), old_sz);
826 output = std::move(tmp);
827
828 // Set more output.
829 _stream.next_out = (Bytef*)(output.get() + old_sz);
830 _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
831 break;
832 }
833 case Z_BUF_ERROR:
834 default:
835 inflateEnd(&_stream);
836 return nullptr;
837 }
838 }
839
840 // If we encoded decompressed block size, we should have no bytes left
841 assert(compress_format_version != 2 || _stream.avail_out == 0);
842 *decompress_size = static_cast<int>(output_len - _stream.avail_out);
843 inflateEnd(&_stream);
844 return output;
845 #else
846 (void)info;
847 (void)input_data;
848 (void)input_length;
849 (void)decompress_size;
850 (void)compress_format_version;
851 (void)allocator;
852 (void)windowBits;
853 return nullptr;
854 #endif
855 }
856
857 // compress_format_version == 1 -- decompressed size is not included in the
858 // block header
859 // compress_format_version == 2 -- decompressed size is included in the block
860 // header in varint32 format
BZip2_Compress(const CompressionInfo &,uint32_t compress_format_version,const char * input,size_t length,::std::string * output)861 inline bool BZip2_Compress(const CompressionInfo& /*info*/,
862 uint32_t compress_format_version, const char* input,
863 size_t length, ::std::string* output) {
864 #ifdef BZIP2
865 if (length > std::numeric_limits<uint32_t>::max()) {
866 // Can't compress more than 4GB
867 return false;
868 }
869 size_t output_header_len = 0;
870 if (compress_format_version == 2) {
871 output_header_len = compression::PutDecompressedSizeInfo(
872 output, static_cast<uint32_t>(length));
873 }
874 // Resize output to be the plain data length.
875 // This may not be big enough if the compression actually expands data.
876 output->resize(output_header_len + length);
877
878 bz_stream _stream;
879 memset(&_stream, 0, sizeof(bz_stream));
880
881 // Block size 1 is 100K.
882 // 0 is for silent.
883 // 30 is the default workFactor
884 int st = BZ2_bzCompressInit(&_stream, 1, 0, 30);
885 if (st != BZ_OK) {
886 return false;
887 }
888
889 // Compress the input, and put compressed data in output.
890 _stream.next_in = (char*)input;
891 _stream.avail_in = static_cast<unsigned int>(length);
892
893 // Initialize the output size.
894 _stream.avail_out = static_cast<unsigned int>(length);
895 _stream.next_out = reinterpret_cast<char*>(&(*output)[output_header_len]);
896
897 bool compressed = false;
898 st = BZ2_bzCompress(&_stream, BZ_FINISH);
899 if (st == BZ_STREAM_END) {
900 compressed = true;
901 output->resize(output->size() - _stream.avail_out);
902 }
903 // The only return value we really care about is BZ_STREAM_END.
904 // BZ_FINISH_OK means insufficient output space. This means the compression
905 // is bigger than decompressed size. Just fail the compression in that case.
906
907 BZ2_bzCompressEnd(&_stream);
908 return compressed;
909 #else
910 (void)compress_format_version;
911 (void)input;
912 (void)length;
913 (void)output;
914 return false;
915 #endif
916 }
917
918 // compress_format_version == 1 -- decompressed size is not included in the
919 // block header
920 // compress_format_version == 2 -- decompressed size is included in the block
921 // header in varint32 format
922 inline CacheAllocationPtr BZip2_Uncompress(
923 const char* input_data, size_t input_length, int* decompress_size,
924 uint32_t compress_format_version, MemoryAllocator* allocator = nullptr) {
925 #ifdef BZIP2
926 uint32_t output_len = 0;
927 if (compress_format_version == 2) {
928 if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
929 &output_len)) {
930 return nullptr;
931 }
932 } else {
933 // Assume the decompressed data size will 5x of compressed size, but round
934 // to the next page size
935 size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
936 output_len = static_cast<uint32_t>(
937 std::min(proposed_output_len,
938 static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
939 }
940
941 bz_stream _stream;
942 memset(&_stream, 0, sizeof(bz_stream));
943
944 int st = BZ2_bzDecompressInit(&_stream, 0, 0);
945 if (st != BZ_OK) {
946 return nullptr;
947 }
948
949 _stream.next_in = (char*)input_data;
950 _stream.avail_in = static_cast<unsigned int>(input_length);
951
952 auto output = AllocateBlock(output_len, allocator);
953
954 _stream.next_out = (char*)output.get();
955 _stream.avail_out = static_cast<unsigned int>(output_len);
956
957 bool done = false;
958 while (!done) {
959 st = BZ2_bzDecompress(&_stream);
960 switch (st) {
961 case BZ_STREAM_END:
962 done = true;
963 break;
964 case BZ_OK: {
965 // No output space. Increase the output space by 20%.
966 // We should never run out of output space if
967 // compress_format_version == 2
968 assert(compress_format_version != 2);
969 uint32_t old_sz = output_len;
970 output_len = output_len * 1.2;
971 auto tmp = AllocateBlock(output_len, allocator);
972 memcpy(tmp.get(), output.get(), old_sz);
973 output = std::move(tmp);
974
975 // Set more output.
976 _stream.next_out = (char*)(output.get() + old_sz);
977 _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
978 break;
979 }
980 default:
981 BZ2_bzDecompressEnd(&_stream);
982 return nullptr;
983 }
984 }
985
986 // If we encoded decompressed block size, we should have no bytes left
987 assert(compress_format_version != 2 || _stream.avail_out == 0);
988 *decompress_size = static_cast<int>(output_len - _stream.avail_out);
989 BZ2_bzDecompressEnd(&_stream);
990 return output;
991 #else
992 (void)input_data;
993 (void)input_length;
994 (void)decompress_size;
995 (void)compress_format_version;
996 (void)allocator;
997 return nullptr;
998 #endif
999 }
1000
1001 // compress_format_version == 1 -- decompressed size is included in the
1002 // block header using memcpy, which makes database non-portable)
1003 // compress_format_version == 2 -- decompressed size is included in the block
1004 // header in varint32 format
1005 // @param compression_dict Data for presetting the compression library's
1006 // dictionary.
LZ4_Compress(const CompressionInfo & info,uint32_t compress_format_version,const char * input,size_t length,::std::string * output)1007 inline bool LZ4_Compress(const CompressionInfo& info,
1008 uint32_t compress_format_version, const char* input,
1009 size_t length, ::std::string* output) {
1010 #ifdef LZ4
1011 if (length > std::numeric_limits<uint32_t>::max()) {
1012 // Can't compress more than 4GB
1013 return false;
1014 }
1015
1016 size_t output_header_len = 0;
1017 if (compress_format_version == 2) {
1018 // new encoding, using varint32 to store size information
1019 output_header_len = compression::PutDecompressedSizeInfo(
1020 output, static_cast<uint32_t>(length));
1021 } else {
1022 // legacy encoding, which is not really portable (depends on big/little
1023 // endianness)
1024 output_header_len = 8;
1025 output->resize(output_header_len);
1026 char* p = const_cast<char*>(output->c_str());
1027 memcpy(p, &length, sizeof(length));
1028 }
1029 int compress_bound = LZ4_compressBound(static_cast<int>(length));
1030 output->resize(static_cast<size_t>(output_header_len + compress_bound));
1031
1032 int outlen;
1033 #if LZ4_VERSION_NUMBER >= 10400 // r124+
1034 LZ4_stream_t* stream = LZ4_createStream();
1035 Slice compression_dict = info.dict().GetRawDict();
1036 if (compression_dict.size()) {
1037 LZ4_loadDict(stream, compression_dict.data(),
1038 static_cast<int>(compression_dict.size()));
1039 }
1040 #if LZ4_VERSION_NUMBER >= 10700 // r129+
1041 outlen =
1042 LZ4_compress_fast_continue(stream, input, &(*output)[output_header_len],
1043 static_cast<int>(length), compress_bound, 1);
1044 #else // up to r128
1045 outlen = LZ4_compress_limitedOutput_continue(
1046 stream, input, &(*output)[output_header_len], static_cast<int>(length),
1047 compress_bound);
1048 #endif
1049 LZ4_freeStream(stream);
1050 #else // up to r123
1051 outlen = LZ4_compress_limitedOutput(input, &(*output)[output_header_len],
1052 static_cast<int>(length), compress_bound);
1053 #endif // LZ4_VERSION_NUMBER >= 10400
1054
1055 if (outlen == 0) {
1056 return false;
1057 }
1058 output->resize(static_cast<size_t>(output_header_len + outlen));
1059 return true;
1060 #else // LZ4
1061 (void)info;
1062 (void)compress_format_version;
1063 (void)input;
1064 (void)length;
1065 (void)output;
1066 return false;
1067 #endif
1068 }
1069
1070 // compress_format_version == 1 -- decompressed size is included in the
1071 // block header using memcpy, which makes database non-portable)
1072 // compress_format_version == 2 -- decompressed size is included in the block
1073 // header in varint32 format
1074 // @param compression_dict Data for presetting the compression library's
1075 // dictionary.
1076 inline CacheAllocationPtr LZ4_Uncompress(const UncompressionInfo& info,
1077 const char* input_data,
1078 size_t input_length,
1079 int* decompress_size,
1080 uint32_t compress_format_version,
1081 MemoryAllocator* allocator = nullptr) {
1082 #ifdef LZ4
1083 uint32_t output_len = 0;
1084 if (compress_format_version == 2) {
1085 // new encoding, using varint32 to store size information
1086 if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
1087 &output_len)) {
1088 return nullptr;
1089 }
1090 } else {
1091 // legacy encoding, which is not really portable (depends on big/little
1092 // endianness)
1093 if (input_length < 8) {
1094 return nullptr;
1095 }
1096 memcpy(&output_len, input_data, sizeof(output_len));
1097 input_length -= 8;
1098 input_data += 8;
1099 }
1100
1101 auto output = AllocateBlock(output_len, allocator);
1102 #if LZ4_VERSION_NUMBER >= 10400 // r124+
1103 LZ4_streamDecode_t* stream = LZ4_createStreamDecode();
1104 const Slice& compression_dict = info.dict().GetRawDict();
1105 if (compression_dict.size()) {
1106 LZ4_setStreamDecode(stream, compression_dict.data(),
1107 static_cast<int>(compression_dict.size()));
1108 }
1109 *decompress_size = LZ4_decompress_safe_continue(
1110 stream, input_data, output.get(), static_cast<int>(input_length),
1111 static_cast<int>(output_len));
1112 LZ4_freeStreamDecode(stream);
1113 #else // up to r123
1114 *decompress_size = LZ4_decompress_safe(input_data, output.get(),
1115 static_cast<int>(input_length),
1116 static_cast<int>(output_len));
1117 #endif // LZ4_VERSION_NUMBER >= 10400
1118
1119 if (*decompress_size < 0) {
1120 return nullptr;
1121 }
1122 assert(*decompress_size == static_cast<int>(output_len));
1123 return output;
1124 #else // LZ4
1125 (void)info;
1126 (void)input_data;
1127 (void)input_length;
1128 (void)decompress_size;
1129 (void)compress_format_version;
1130 (void)allocator;
1131 return nullptr;
1132 #endif
1133 }
1134
1135 // compress_format_version == 1 -- decompressed size is included in the
1136 // block header using memcpy, which makes database non-portable)
1137 // compress_format_version == 2 -- decompressed size is included in the block
1138 // header in varint32 format
1139 // @param compression_dict Data for presetting the compression library's
1140 // dictionary.
LZ4HC_Compress(const CompressionInfo & info,uint32_t compress_format_version,const char * input,size_t length,::std::string * output)1141 inline bool LZ4HC_Compress(const CompressionInfo& info,
1142 uint32_t compress_format_version, const char* input,
1143 size_t length, ::std::string* output) {
1144 #ifdef LZ4
1145 if (length > std::numeric_limits<uint32_t>::max()) {
1146 // Can't compress more than 4GB
1147 return false;
1148 }
1149
1150 size_t output_header_len = 0;
1151 if (compress_format_version == 2) {
1152 // new encoding, using varint32 to store size information
1153 output_header_len = compression::PutDecompressedSizeInfo(
1154 output, static_cast<uint32_t>(length));
1155 } else {
1156 // legacy encoding, which is not really portable (depends on big/little
1157 // endianness)
1158 output_header_len = 8;
1159 output->resize(output_header_len);
1160 char* p = const_cast<char*>(output->c_str());
1161 memcpy(p, &length, sizeof(length));
1162 }
1163 int compress_bound = LZ4_compressBound(static_cast<int>(length));
1164 output->resize(static_cast<size_t>(output_header_len + compress_bound));
1165
1166 int outlen;
1167 int level;
1168 if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
1169 level = 0; // lz4hc.h says any value < 1 will be sanitized to default
1170 } else {
1171 level = info.options().level;
1172 }
1173 #if LZ4_VERSION_NUMBER >= 10400 // r124+
1174 LZ4_streamHC_t* stream = LZ4_createStreamHC();
1175 LZ4_resetStreamHC(stream, level);
1176 Slice compression_dict = info.dict().GetRawDict();
1177 const char* compression_dict_data =
1178 compression_dict.size() > 0 ? compression_dict.data() : nullptr;
1179 size_t compression_dict_size = compression_dict.size();
1180 LZ4_loadDictHC(stream, compression_dict_data,
1181 static_cast<int>(compression_dict_size));
1182
1183 #if LZ4_VERSION_NUMBER >= 10700 // r129+
1184 outlen =
1185 LZ4_compress_HC_continue(stream, input, &(*output)[output_header_len],
1186 static_cast<int>(length), compress_bound);
1187 #else // r124-r128
1188 outlen = LZ4_compressHC_limitedOutput_continue(
1189 stream, input, &(*output)[output_header_len], static_cast<int>(length),
1190 compress_bound);
1191 #endif // LZ4_VERSION_NUMBER >= 10700
1192 LZ4_freeStreamHC(stream);
1193
1194 #elif LZ4_VERSION_MAJOR // r113-r123
1195 outlen = LZ4_compressHC2_limitedOutput(input, &(*output)[output_header_len],
1196 static_cast<int>(length),
1197 compress_bound, level);
1198 #else // up to r112
1199 outlen =
1200 LZ4_compressHC_limitedOutput(input, &(*output)[output_header_len],
1201 static_cast<int>(length), compress_bound);
1202 #endif // LZ4_VERSION_NUMBER >= 10400
1203
1204 if (outlen == 0) {
1205 return false;
1206 }
1207 output->resize(static_cast<size_t>(output_header_len + outlen));
1208 return true;
1209 #else // LZ4
1210 (void)info;
1211 (void)compress_format_version;
1212 (void)input;
1213 (void)length;
1214 (void)output;
1215 return false;
1216 #endif
1217 }
1218
1219 #ifdef XPRESS
XPRESS_Compress(const char * input,size_t length,std::string * output)1220 inline bool XPRESS_Compress(const char* input, size_t length,
1221 std::string* output) {
1222 return port::xpress::Compress(input, length, output);
1223 }
1224 #else
XPRESS_Compress(const char *,size_t,std::string *)1225 inline bool XPRESS_Compress(const char* /*input*/, size_t /*length*/,
1226 std::string* /*output*/) {
1227 return false;
1228 }
1229 #endif
1230
1231 #ifdef XPRESS
XPRESS_Uncompress(const char * input_data,size_t input_length,int * decompress_size)1232 inline char* XPRESS_Uncompress(const char* input_data, size_t input_length,
1233 int* decompress_size) {
1234 return port::xpress::Decompress(input_data, input_length, decompress_size);
1235 }
1236 #else
XPRESS_Uncompress(const char *,size_t,int *)1237 inline char* XPRESS_Uncompress(const char* /*input_data*/,
1238 size_t /*input_length*/,
1239 int* /*decompress_size*/) {
1240 return nullptr;
1241 }
1242 #endif
1243
ZSTD_Compress(const CompressionInfo & info,const char * input,size_t length,::std::string * output)1244 inline bool ZSTD_Compress(const CompressionInfo& info, const char* input,
1245 size_t length, ::std::string* output) {
1246 #ifdef ZSTD
1247 if (length > std::numeric_limits<uint32_t>::max()) {
1248 // Can't compress more than 4GB
1249 return false;
1250 }
1251
1252 size_t output_header_len = compression::PutDecompressedSizeInfo(
1253 output, static_cast<uint32_t>(length));
1254
1255 size_t compressBound = ZSTD_compressBound(length);
1256 output->resize(static_cast<size_t>(output_header_len + compressBound));
1257 size_t outlen = 0;
1258 int level;
1259 if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
1260 // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
1261 // https://github.com/facebook/zstd/issues/1148
1262 level = 3;
1263 } else {
1264 level = info.options().level;
1265 }
1266 #if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+
1267 ZSTD_CCtx* context = info.context().ZSTDPreallocCtx();
1268 assert(context != nullptr);
1269 #if ZSTD_VERSION_NUMBER >= 700 // v0.7.0+
1270 if (info.dict().GetDigestedZstdCDict() != nullptr) {
1271 outlen = ZSTD_compress_usingCDict(context, &(*output)[output_header_len],
1272 compressBound, input, length,
1273 info.dict().GetDigestedZstdCDict());
1274 }
1275 #endif // ZSTD_VERSION_NUMBER >= 700
1276 if (outlen == 0) {
1277 outlen = ZSTD_compress_usingDict(context, &(*output)[output_header_len],
1278 compressBound, input, length,
1279 info.dict().GetRawDict().data(),
1280 info.dict().GetRawDict().size(), level);
1281 }
1282 #else // up to v0.4.x
1283 outlen = ZSTD_compress(&(*output)[output_header_len], compressBound, input,
1284 length, level);
1285 #endif // ZSTD_VERSION_NUMBER >= 500
1286 if (outlen == 0) {
1287 return false;
1288 }
1289 output->resize(output_header_len + outlen);
1290 return true;
1291 #else // ZSTD
1292 (void)info;
1293 (void)input;
1294 (void)length;
1295 (void)output;
1296 return false;
1297 #endif
1298 }
1299
1300 // @param compression_dict Data for presetting the compression library's
1301 // dictionary.
1302 inline CacheAllocationPtr ZSTD_Uncompress(
1303 const UncompressionInfo& info, const char* input_data, size_t input_length,
1304 int* decompress_size, MemoryAllocator* allocator = nullptr) {
1305 #ifdef ZSTD
1306 uint32_t output_len = 0;
1307 if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
1308 &output_len)) {
1309 return nullptr;
1310 }
1311
1312 auto output = AllocateBlock(output_len, allocator);
1313 size_t actual_output_length = 0;
1314 #if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+
1315 ZSTD_DCtx* context = info.context().GetZSTDContext();
1316 assert(context != nullptr);
1317 #ifdef ROCKSDB_ZSTD_DDICT
1318 if (info.dict().GetDigestedZstdDDict() != nullptr) {
1319 actual_output_length = ZSTD_decompress_usingDDict(
1320 context, output.get(), output_len, input_data, input_length,
1321 info.dict().GetDigestedZstdDDict());
1322 }
1323 #endif // ROCKSDB_ZSTD_DDICT
1324 if (actual_output_length == 0) {
1325 actual_output_length = ZSTD_decompress_usingDict(
1326 context, output.get(), output_len, input_data, input_length,
1327 info.dict().GetRawDict().data(), info.dict().GetRawDict().size());
1328 }
1329 #else // up to v0.4.x
1330 (void)info;
1331 actual_output_length =
1332 ZSTD_decompress(output.get(), output_len, input_data, input_length);
1333 #endif // ZSTD_VERSION_NUMBER >= 500
1334 assert(actual_output_length == output_len);
1335 *decompress_size = static_cast<int>(actual_output_length);
1336 return output;
1337 #else // ZSTD
1338 (void)info;
1339 (void)input_data;
1340 (void)input_length;
1341 (void)decompress_size;
1342 (void)allocator;
1343 return nullptr;
1344 #endif
1345 }
1346
ZSTD_TrainDictionarySupported()1347 inline bool ZSTD_TrainDictionarySupported() {
1348 #ifdef ZSTD
1349 // Dictionary trainer is available since v0.6.1 for static linking, but not
1350 // available for dynamic linking until v1.1.3. For now we enable the feature
1351 // in v1.1.3+ only.
1352 return (ZSTD_versionNumber() >= 10103);
1353 #else
1354 return false;
1355 #endif
1356 }
1357
ZSTD_TrainDictionary(const std::string & samples,const std::vector<size_t> & sample_lens,size_t max_dict_bytes)1358 inline std::string ZSTD_TrainDictionary(const std::string& samples,
1359 const std::vector<size_t>& sample_lens,
1360 size_t max_dict_bytes) {
1361 // Dictionary trainer is available since v0.6.1 for static linking, but not
1362 // available for dynamic linking until v1.1.3. For now we enable the feature
1363 // in v1.1.3+ only.
1364 #if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+
1365 assert(samples.empty() == sample_lens.empty());
1366 if (samples.empty()) {
1367 return "";
1368 }
1369 std::string dict_data(max_dict_bytes, '\0');
1370 size_t dict_len = ZDICT_trainFromBuffer(
1371 &dict_data[0], max_dict_bytes, &samples[0], &sample_lens[0],
1372 static_cast<unsigned>(sample_lens.size()));
1373 if (ZDICT_isError(dict_len)) {
1374 return "";
1375 }
1376 assert(dict_len <= max_dict_bytes);
1377 dict_data.resize(dict_len);
1378 return dict_data;
1379 #else // up to v1.1.2
1380 assert(false);
1381 (void)samples;
1382 (void)sample_lens;
1383 (void)max_dict_bytes;
1384 return "";
1385 #endif // ZSTD_VERSION_NUMBER >= 10103
1386 }
1387
ZSTD_TrainDictionary(const std::string & samples,size_t sample_len_shift,size_t max_dict_bytes)1388 inline std::string ZSTD_TrainDictionary(const std::string& samples,
1389 size_t sample_len_shift,
1390 size_t max_dict_bytes) {
1391 // Dictionary trainer is available since v0.6.1, but ZSTD was marked stable
1392 // only since v0.8.0. For now we enable the feature in stable versions only.
1393 #if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+
1394 // skips potential partial sample at the end of "samples"
1395 size_t num_samples = samples.size() >> sample_len_shift;
1396 std::vector<size_t> sample_lens(num_samples, size_t(1) << sample_len_shift);
1397 return ZSTD_TrainDictionary(samples, sample_lens, max_dict_bytes);
1398 #else // up to v1.1.2
1399 assert(false);
1400 (void)samples;
1401 (void)sample_len_shift;
1402 (void)max_dict_bytes;
1403 return "";
1404 #endif // ZSTD_VERSION_NUMBER >= 10103
1405 }
1406
1407 } // namespace ROCKSDB_NAMESPACE
1408