1 /*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <folly/compression/Zlib.h>
18
19 #if FOLLY_HAVE_LIBZ
20
21 #include <folly/Conv.h>
22 #include <folly/Optional.h>
23 #include <folly/Range.h>
24 #include <folly/ScopeGuard.h>
25 #include <folly/compression/Compression.h>
26 #include <folly/compression/Utils.h>
27 #include <folly/io/Cursor.h>
28
29 using folly::io::compression::detail::dataStartsWithLE;
30 using folly::io::compression::detail::prefixToStringLE;
31
32 namespace folly {
33 namespace io {
34 namespace zlib {
35
36 namespace {
37
isValidStrategy(int strategy)38 bool isValidStrategy(int strategy) {
39 std::array<int, 5> strategies{{
40 Z_DEFAULT_STRATEGY,
41 Z_FILTERED,
42 Z_HUFFMAN_ONLY,
43 Z_RLE,
44 Z_FIXED,
45 }};
46 return std::any_of(strategies.begin(), strategies.end(), [&](int i) {
47 return i == strategy;
48 });
49 }
50
getWindowBits(Options::Format format,int windowSize)51 int getWindowBits(Options::Format format, int windowSize) {
52 switch (format) {
53 case Options::Format::ZLIB:
54 return windowSize;
55 case Options::Format::GZIP:
56 return windowSize + 16;
57 case Options::Format::RAW:
58 return -windowSize;
59 case Options::Format::AUTO:
60 return windowSize + 32;
61 default:
62 return windowSize;
63 }
64 }
65
getCodecType(Options options)66 CodecType getCodecType(Options options) {
67 if (options.windowSize == 15 && options.format == Options::Format::ZLIB) {
68 return CodecType::ZLIB;
69 } else if (
70 options.windowSize == 15 && options.format == Options::Format::GZIP) {
71 return CodecType::GZIP;
72 } else {
73 return CodecType::USER_DEFINED;
74 }
75 }
76
77 class ZlibStreamCodec final : public StreamCodec {
78 public:
79 static std::unique_ptr<Codec> createCodec(Options options, int level);
80 static std::unique_ptr<StreamCodec> createStream(Options options, int level);
81
82 explicit ZlibStreamCodec(Options options, int level);
83 ~ZlibStreamCodec() override;
84
85 std::vector<std::string> validPrefixes() const override;
86 bool canUncompress(
87 const IOBuf* data, Optional<uint64_t> uncompressedLength) const override;
88
89 private:
90 uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
91
92 void doResetStream() override;
93 bool doCompressStream(
94 ByteRange& input,
95 MutableByteRange& output,
96 StreamCodec::FlushOp flush) override;
97 bool doUncompressStream(
98 ByteRange& input,
99 MutableByteRange& output,
100 StreamCodec::FlushOp flush) override;
101
102 void resetDeflateStream();
103 void resetInflateStream();
104
105 Options options_;
106
107 Optional<z_stream> deflateStream_{};
108 Optional<z_stream> inflateStream_{};
109 int level_;
110 bool needReset_{true};
111 };
112 constexpr uint16_t kGZIPMagicLE = 0x8B1F;
113
validPrefixes() const114 std::vector<std::string> ZlibStreamCodec::validPrefixes() const {
115 if (type() == CodecType::ZLIB) {
116 // Zlib streams start with a 2 byte header.
117 //
118 // 0 1
119 // +---+---+
120 // |CMF|FLG|
121 // +---+---+
122 //
123 // We won't restrict the values of any sub-fields except as described below.
124 //
125 // The lowest 4 bits of CMF is the compression method (CM).
126 // CM == 0x8 is the deflate compression method, which is currently the only
127 // supported compression method, so any valid prefix must have CM == 0x8.
128 //
129 // The lowest 5 bits of FLG is FCHECK.
130 // FCHECK must be such that the two header bytes are a multiple of 31 when
131 // interpreted as a big endian 16-bit number.
132 std::vector<std::string> result;
133 // 16 values for the first byte, 8 values for the second byte.
134 // There are also 4 combinations where both 0x00 and 0x1F work as FCHECK.
135 result.reserve(132);
136 // Select all values for the CMF byte that use the deflate algorithm 0x8.
137 for (uint32_t first = 0x0800; first <= 0xF800; first += 0x1000) {
138 // Select all values for the FLG, but leave FCHECK as 0 since it's fixed.
139 for (uint32_t second = 0x00; second <= 0xE0; second += 0x20) {
140 uint16_t prefix = first | second;
141 // Compute FCHECK.
142 prefix += 31 - (prefix % 31);
143 result.push_back(prefixToStringLE(Endian::big(prefix)));
144 // zlib won't produce this, but it is a valid prefix.
145 if ((prefix & 0x1F) == 31) {
146 prefix -= 31;
147 result.push_back(prefixToStringLE(Endian::big(prefix)));
148 }
149 }
150 }
151 return result;
152 } else if (type() == CodecType::GZIP) {
153 // The gzip frame starts with 2 magic bytes.
154 return {prefixToStringLE(kGZIPMagicLE)};
155 } else {
156 return {};
157 }
158 }
159
canUncompress(const IOBuf * data,Optional<uint64_t>) const160 bool ZlibStreamCodec::canUncompress(
161 const IOBuf* data, Optional<uint64_t>) const {
162 if (type() == CodecType::ZLIB) {
163 uint16_t value;
164 Cursor cursor{data};
165 if (!cursor.tryReadBE(value)) {
166 return false;
167 }
168 // zlib compressed if using deflate and is a multiple of 31.
169 return (value & 0x0F00) == 0x0800 && value % 31 == 0;
170 } else if (type() == CodecType::GZIP) {
171 return dataStartsWithLE(data, kGZIPMagicLE);
172 } else {
173 return false;
174 }
175 }
176
doMaxCompressedLength(uint64_t uncompressedLength) const177 uint64_t ZlibStreamCodec::doMaxCompressedLength(
178 uint64_t uncompressedLength) const {
179 // When passed a nullptr, deflateBound() adds 6 bytes for a zlib wrapper. A
180 // gzip wrapper is 18 bytes, so we add the 12 byte difference.
181 return deflateBound(nullptr, uncompressedLength) +
182 (options_.format == Options::Format::GZIP ? 12 : 0);
183 }
184
createCodec(Options options,int level)185 std::unique_ptr<Codec> ZlibStreamCodec::createCodec(
186 Options options, int level) {
187 return std::make_unique<ZlibStreamCodec>(options, level);
188 }
189
createStream(Options options,int level)190 std::unique_ptr<StreamCodec> ZlibStreamCodec::createStream(
191 Options options, int level) {
192 return std::make_unique<ZlibStreamCodec>(options, level);
193 }
194
inBounds(int value,int low,int high)195 bool inBounds(int value, int low, int high) {
196 return (value >= low) && (value <= high);
197 }
198
zlibConvertLevel(int level)199 int zlibConvertLevel(int level) {
200 switch (level) {
201 case COMPRESSION_LEVEL_FASTEST:
202 return 1;
203 case COMPRESSION_LEVEL_DEFAULT:
204 return 6;
205 case COMPRESSION_LEVEL_BEST:
206 return 9;
207 }
208 if (!inBounds(level, 0, 9)) {
209 throw std::invalid_argument(
210 to<std::string>("ZlibStreamCodec: invalid level: ", level));
211 }
212 return level;
213 }
214
ZlibStreamCodec(Options options,int level)215 ZlibStreamCodec::ZlibStreamCodec(Options options, int level)
216 : StreamCodec(
217 getCodecType(options),
218 zlibConvertLevel(level),
219 getCodecType(options) == CodecType::GZIP ? "gzip" : "zlib"),
220 level_(zlibConvertLevel(level)) {
221 options_ = options;
222
223 // Although zlib allows a windowSize of 8..15, a value of 8 is not
224 // properly supported and is treated as a value of 9. This means data deflated
225 // with windowSize==8 can not be re-inflated with windowSize==8. windowSize==8
226 // is also not supported for gzip and raw deflation.
227 // Hence, the codec supports only 9..15.
228 if (!inBounds(options_.windowSize, 9, 15)) {
229 throw std::invalid_argument(to<std::string>(
230 "ZlibStreamCodec: invalid windowSize option: ", options.windowSize));
231 }
232 if (!inBounds(options_.memLevel, 1, 9)) {
233 throw std::invalid_argument(to<std::string>(
234 "ZlibStreamCodec: invalid memLevel option: ", options.memLevel));
235 }
236 if (!isValidStrategy(options_.strategy)) {
237 throw std::invalid_argument(to<std::string>(
238 "ZlibStreamCodec: invalid strategy: ", options.strategy));
239 }
240 }
241
~ZlibStreamCodec()242 ZlibStreamCodec::~ZlibStreamCodec() {
243 if (deflateStream_) {
244 deflateEnd(deflateStream_.get_pointer());
245 deflateStream_.reset();
246 }
247 if (inflateStream_) {
248 inflateEnd(inflateStream_.get_pointer());
249 inflateStream_.reset();
250 }
251 }
252
doResetStream()253 void ZlibStreamCodec::doResetStream() {
254 needReset_ = true;
255 }
256
resetDeflateStream()257 void ZlibStreamCodec::resetDeflateStream() {
258 if (deflateStream_) {
259 int const rc = deflateReset(deflateStream_.get_pointer());
260 if (rc != Z_OK) {
261 deflateStream_.reset();
262 throw std::runtime_error(
263 to<std::string>("ZlibStreamCodec: deflateReset error: ", rc));
264 }
265 return;
266 }
267 deflateStream_ = z_stream{};
268
269 // The automatic header detection format is only for inflation.
270 // Use zlib for deflation if the format is auto.
271 int const windowBits = getWindowBits(
272 options_.format == Options::Format::AUTO ? Options::Format::ZLIB
273 : options_.format,
274 options_.windowSize);
275
276 int const rc = deflateInit2(
277 deflateStream_.get_pointer(),
278 level_,
279 Z_DEFLATED,
280 windowBits,
281 options_.memLevel,
282 options_.strategy);
283 if (rc != Z_OK) {
284 deflateStream_.reset();
285 throw std::runtime_error(
286 to<std::string>("ZlibStreamCodec: deflateInit error: ", rc));
287 }
288 }
289
resetInflateStream()290 void ZlibStreamCodec::resetInflateStream() {
291 if (inflateStream_) {
292 int const rc = inflateReset(inflateStream_.get_pointer());
293 if (rc != Z_OK) {
294 inflateStream_.reset();
295 throw std::runtime_error(
296 to<std::string>("ZlibStreamCodec: inflateReset error: ", rc));
297 }
298 return;
299 }
300 inflateStream_ = z_stream{};
301 int const rc = inflateInit2(
302 inflateStream_.get_pointer(),
303 getWindowBits(options_.format, options_.windowSize));
304 if (rc != Z_OK) {
305 inflateStream_.reset();
306 throw std::runtime_error(
307 to<std::string>("ZlibStreamCodec: inflateInit error: ", rc));
308 }
309 }
310
zlibTranslateFlush(StreamCodec::FlushOp flush)311 int zlibTranslateFlush(StreamCodec::FlushOp flush) {
312 switch (flush) {
313 case StreamCodec::FlushOp::NONE:
314 return Z_NO_FLUSH;
315 case StreamCodec::FlushOp::FLUSH:
316 return Z_SYNC_FLUSH;
317 case StreamCodec::FlushOp::END:
318 return Z_FINISH;
319 default:
320 throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
321 }
322 }
323
zlibThrowOnError(int rc)324 int zlibThrowOnError(int rc) {
325 switch (rc) {
326 case Z_OK:
327 case Z_BUF_ERROR:
328 case Z_STREAM_END:
329 return rc;
330 default:
331 throw std::runtime_error(to<std::string>("ZlibStreamCodec: error: ", rc));
332 }
333 }
334
doCompressStream(ByteRange & input,MutableByteRange & output,StreamCodec::FlushOp flush)335 bool ZlibStreamCodec::doCompressStream(
336 ByteRange& input, MutableByteRange& output, StreamCodec::FlushOp flush) {
337 if (needReset_) {
338 resetDeflateStream();
339 needReset_ = false;
340 }
341 DCHECK(deflateStream_.has_value());
342 // zlib will return Z_STREAM_ERROR if output.data() is null.
343 if (output.data() == nullptr) {
344 return false;
345 }
346 deflateStream_->next_in = const_cast<uint8_t*>(input.data());
347 deflateStream_->avail_in = input.size();
348 deflateStream_->next_out = output.data();
349 deflateStream_->avail_out = output.size();
350 SCOPE_EXIT {
351 input.uncheckedAdvance(input.size() - deflateStream_->avail_in);
352 output.uncheckedAdvance(output.size() - deflateStream_->avail_out);
353 };
354 int const rc = zlibThrowOnError(
355 deflate(deflateStream_.get_pointer(), zlibTranslateFlush(flush)));
356 switch (flush) {
357 case StreamCodec::FlushOp::NONE:
358 return false;
359 case StreamCodec::FlushOp::FLUSH:
360 return deflateStream_->avail_in == 0 && deflateStream_->avail_out != 0;
361 case StreamCodec::FlushOp::END:
362 return rc == Z_STREAM_END;
363 default:
364 throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
365 }
366 }
367
doUncompressStream(ByteRange & input,MutableByteRange & output,StreamCodec::FlushOp flush)368 bool ZlibStreamCodec::doUncompressStream(
369 ByteRange& input, MutableByteRange& output, StreamCodec::FlushOp flush) {
370 if (needReset_) {
371 resetInflateStream();
372 needReset_ = false;
373 }
374 DCHECK(inflateStream_.has_value());
375 // zlib will return Z_STREAM_ERROR if output.data() is null.
376 if (output.data() == nullptr) {
377 return false;
378 }
379 inflateStream_->next_in = const_cast<uint8_t*>(input.data());
380 inflateStream_->avail_in = input.size();
381 inflateStream_->next_out = output.data();
382 inflateStream_->avail_out = output.size();
383 SCOPE_EXIT {
384 input.advance(input.size() - inflateStream_->avail_in);
385 output.advance(output.size() - inflateStream_->avail_out);
386 };
387 int const rc = zlibThrowOnError(
388 inflate(inflateStream_.get_pointer(), zlibTranslateFlush(flush)));
389 return rc == Z_STREAM_END;
390 }
391
392 } // namespace
393
defaultGzipOptions()394 Options defaultGzipOptions() {
395 return Options(Options::Format::GZIP);
396 }
397
defaultZlibOptions()398 Options defaultZlibOptions() {
399 return Options(Options::Format::ZLIB);
400 }
401
getCodec(Options options,int level)402 std::unique_ptr<Codec> getCodec(Options options, int level) {
403 return ZlibStreamCodec::createCodec(options, level);
404 }
405
getStreamCodec(Options options,int level)406 std::unique_ptr<StreamCodec> getStreamCodec(Options options, int level) {
407 return ZlibStreamCodec::createStream(options, level);
408 }
409
410 } // namespace zlib
411 } // namespace io
412 } // namespace folly
413
414 #endif // FOLLY_HAVE_LIBZ
415