1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <folly/compression/Zlib.h>
18 
19 #if FOLLY_HAVE_LIBZ
20 
21 #include <folly/Conv.h>
22 #include <folly/Optional.h>
23 #include <folly/Range.h>
24 #include <folly/ScopeGuard.h>
25 #include <folly/compression/Compression.h>
26 #include <folly/compression/Utils.h>
27 #include <folly/io/Cursor.h>
28 
29 using folly::io::compression::detail::dataStartsWithLE;
30 using folly::io::compression::detail::prefixToStringLE;
31 
32 namespace folly {
33 namespace io {
34 namespace zlib {
35 
36 namespace {
37 
isValidStrategy(int strategy)38 bool isValidStrategy(int strategy) {
39   std::array<int, 5> strategies{{
40       Z_DEFAULT_STRATEGY,
41       Z_FILTERED,
42       Z_HUFFMAN_ONLY,
43       Z_RLE,
44       Z_FIXED,
45   }};
46   return std::any_of(strategies.begin(), strategies.end(), [&](int i) {
47     return i == strategy;
48   });
49 }
50 
getWindowBits(Options::Format format,int windowSize)51 int getWindowBits(Options::Format format, int windowSize) {
52   switch (format) {
53     case Options::Format::ZLIB:
54       return windowSize;
55     case Options::Format::GZIP:
56       return windowSize + 16;
57     case Options::Format::RAW:
58       return -windowSize;
59     case Options::Format::AUTO:
60       return windowSize + 32;
61     default:
62       return windowSize;
63   }
64 }
65 
getCodecType(Options options)66 CodecType getCodecType(Options options) {
67   if (options.windowSize == 15 && options.format == Options::Format::ZLIB) {
68     return CodecType::ZLIB;
69   } else if (
70       options.windowSize == 15 && options.format == Options::Format::GZIP) {
71     return CodecType::GZIP;
72   } else {
73     return CodecType::USER_DEFINED;
74   }
75 }
76 
77 class ZlibStreamCodec final : public StreamCodec {
78  public:
79   static std::unique_ptr<Codec> createCodec(Options options, int level);
80   static std::unique_ptr<StreamCodec> createStream(Options options, int level);
81 
82   explicit ZlibStreamCodec(Options options, int level);
83   ~ZlibStreamCodec() override;
84 
85   std::vector<std::string> validPrefixes() const override;
86   bool canUncompress(
87       const IOBuf* data, Optional<uint64_t> uncompressedLength) const override;
88 
89  private:
90   uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
91 
92   void doResetStream() override;
93   bool doCompressStream(
94       ByteRange& input,
95       MutableByteRange& output,
96       StreamCodec::FlushOp flush) override;
97   bool doUncompressStream(
98       ByteRange& input,
99       MutableByteRange& output,
100       StreamCodec::FlushOp flush) override;
101 
102   void resetDeflateStream();
103   void resetInflateStream();
104 
105   Options options_;
106 
107   Optional<z_stream> deflateStream_{};
108   Optional<z_stream> inflateStream_{};
109   int level_;
110   bool needReset_{true};
111 };
112 constexpr uint16_t kGZIPMagicLE = 0x8B1F;
113 
validPrefixes() const114 std::vector<std::string> ZlibStreamCodec::validPrefixes() const {
115   if (type() == CodecType::ZLIB) {
116     // Zlib streams start with a 2 byte header.
117     //
118     //   0   1
119     // +---+---+
120     // |CMF|FLG|
121     // +---+---+
122     //
123     // We won't restrict the values of any sub-fields except as described below.
124     //
125     // The lowest 4 bits of CMF is the compression method (CM).
126     // CM == 0x8 is the deflate compression method, which is currently the only
127     // supported compression method, so any valid prefix must have CM == 0x8.
128     //
129     // The lowest 5 bits of FLG is FCHECK.
130     // FCHECK must be such that the two header bytes are a multiple of 31 when
131     // interpreted as a big endian 16-bit number.
132     std::vector<std::string> result;
133     // 16 values for the first byte, 8 values for the second byte.
134     // There are also 4 combinations where both 0x00 and 0x1F work as FCHECK.
135     result.reserve(132);
136     // Select all values for the CMF byte that use the deflate algorithm 0x8.
137     for (uint32_t first = 0x0800; first <= 0xF800; first += 0x1000) {
138       // Select all values for the FLG, but leave FCHECK as 0 since it's fixed.
139       for (uint32_t second = 0x00; second <= 0xE0; second += 0x20) {
140         uint16_t prefix = first | second;
141         // Compute FCHECK.
142         prefix += 31 - (prefix % 31);
143         result.push_back(prefixToStringLE(Endian::big(prefix)));
144         // zlib won't produce this, but it is a valid prefix.
145         if ((prefix & 0x1F) == 31) {
146           prefix -= 31;
147           result.push_back(prefixToStringLE(Endian::big(prefix)));
148         }
149       }
150     }
151     return result;
152   } else if (type() == CodecType::GZIP) {
153     // The gzip frame starts with 2 magic bytes.
154     return {prefixToStringLE(kGZIPMagicLE)};
155   } else {
156     return {};
157   }
158 }
159 
canUncompress(const IOBuf * data,Optional<uint64_t>) const160 bool ZlibStreamCodec::canUncompress(
161     const IOBuf* data, Optional<uint64_t>) const {
162   if (type() == CodecType::ZLIB) {
163     uint16_t value;
164     Cursor cursor{data};
165     if (!cursor.tryReadBE(value)) {
166       return false;
167     }
168     // zlib compressed if using deflate and is a multiple of 31.
169     return (value & 0x0F00) == 0x0800 && value % 31 == 0;
170   } else if (type() == CodecType::GZIP) {
171     return dataStartsWithLE(data, kGZIPMagicLE);
172   } else {
173     return false;
174   }
175 }
176 
doMaxCompressedLength(uint64_t uncompressedLength) const177 uint64_t ZlibStreamCodec::doMaxCompressedLength(
178     uint64_t uncompressedLength) const {
179   // When passed a nullptr, deflateBound() adds 6 bytes for a zlib wrapper. A
180   // gzip wrapper is 18 bytes, so we add the 12 byte difference.
181   return deflateBound(nullptr, uncompressedLength) +
182       (options_.format == Options::Format::GZIP ? 12 : 0);
183 }
184 
createCodec(Options options,int level)185 std::unique_ptr<Codec> ZlibStreamCodec::createCodec(
186     Options options, int level) {
187   return std::make_unique<ZlibStreamCodec>(options, level);
188 }
189 
createStream(Options options,int level)190 std::unique_ptr<StreamCodec> ZlibStreamCodec::createStream(
191     Options options, int level) {
192   return std::make_unique<ZlibStreamCodec>(options, level);
193 }
194 
inBounds(int value,int low,int high)195 bool inBounds(int value, int low, int high) {
196   return (value >= low) && (value <= high);
197 }
198 
zlibConvertLevel(int level)199 int zlibConvertLevel(int level) {
200   switch (level) {
201     case COMPRESSION_LEVEL_FASTEST:
202       return 1;
203     case COMPRESSION_LEVEL_DEFAULT:
204       return 6;
205     case COMPRESSION_LEVEL_BEST:
206       return 9;
207   }
208   if (!inBounds(level, 0, 9)) {
209     throw std::invalid_argument(
210         to<std::string>("ZlibStreamCodec: invalid level: ", level));
211   }
212   return level;
213 }
214 
ZlibStreamCodec(Options options,int level)215 ZlibStreamCodec::ZlibStreamCodec(Options options, int level)
216     : StreamCodec(
217           getCodecType(options),
218           zlibConvertLevel(level),
219           getCodecType(options) == CodecType::GZIP ? "gzip" : "zlib"),
220       level_(zlibConvertLevel(level)) {
221   options_ = options;
222 
223   // Although zlib allows a windowSize of 8..15, a value of 8 is not
224   // properly supported and is treated as a value of 9. This means data deflated
225   // with windowSize==8 can not be re-inflated with windowSize==8. windowSize==8
226   // is also not supported for gzip and raw deflation.
227   // Hence, the codec supports only 9..15.
228   if (!inBounds(options_.windowSize, 9, 15)) {
229     throw std::invalid_argument(to<std::string>(
230         "ZlibStreamCodec: invalid windowSize option: ", options.windowSize));
231   }
232   if (!inBounds(options_.memLevel, 1, 9)) {
233     throw std::invalid_argument(to<std::string>(
234         "ZlibStreamCodec: invalid memLevel option: ", options.memLevel));
235   }
236   if (!isValidStrategy(options_.strategy)) {
237     throw std::invalid_argument(to<std::string>(
238         "ZlibStreamCodec: invalid strategy: ", options.strategy));
239   }
240 }
241 
~ZlibStreamCodec()242 ZlibStreamCodec::~ZlibStreamCodec() {
243   if (deflateStream_) {
244     deflateEnd(deflateStream_.get_pointer());
245     deflateStream_.reset();
246   }
247   if (inflateStream_) {
248     inflateEnd(inflateStream_.get_pointer());
249     inflateStream_.reset();
250   }
251 }
252 
doResetStream()253 void ZlibStreamCodec::doResetStream() {
254   needReset_ = true;
255 }
256 
resetDeflateStream()257 void ZlibStreamCodec::resetDeflateStream() {
258   if (deflateStream_) {
259     int const rc = deflateReset(deflateStream_.get_pointer());
260     if (rc != Z_OK) {
261       deflateStream_.reset();
262       throw std::runtime_error(
263           to<std::string>("ZlibStreamCodec: deflateReset error: ", rc));
264     }
265     return;
266   }
267   deflateStream_ = z_stream{};
268 
269   // The automatic header detection format is only for inflation.
270   // Use zlib for deflation if the format is auto.
271   int const windowBits = getWindowBits(
272       options_.format == Options::Format::AUTO ? Options::Format::ZLIB
273                                                : options_.format,
274       options_.windowSize);
275 
276   int const rc = deflateInit2(
277       deflateStream_.get_pointer(),
278       level_,
279       Z_DEFLATED,
280       windowBits,
281       options_.memLevel,
282       options_.strategy);
283   if (rc != Z_OK) {
284     deflateStream_.reset();
285     throw std::runtime_error(
286         to<std::string>("ZlibStreamCodec: deflateInit error: ", rc));
287   }
288 }
289 
resetInflateStream()290 void ZlibStreamCodec::resetInflateStream() {
291   if (inflateStream_) {
292     int const rc = inflateReset(inflateStream_.get_pointer());
293     if (rc != Z_OK) {
294       inflateStream_.reset();
295       throw std::runtime_error(
296           to<std::string>("ZlibStreamCodec: inflateReset error: ", rc));
297     }
298     return;
299   }
300   inflateStream_ = z_stream{};
301   int const rc = inflateInit2(
302       inflateStream_.get_pointer(),
303       getWindowBits(options_.format, options_.windowSize));
304   if (rc != Z_OK) {
305     inflateStream_.reset();
306     throw std::runtime_error(
307         to<std::string>("ZlibStreamCodec: inflateInit error: ", rc));
308   }
309 }
310 
zlibTranslateFlush(StreamCodec::FlushOp flush)311 int zlibTranslateFlush(StreamCodec::FlushOp flush) {
312   switch (flush) {
313     case StreamCodec::FlushOp::NONE:
314       return Z_NO_FLUSH;
315     case StreamCodec::FlushOp::FLUSH:
316       return Z_SYNC_FLUSH;
317     case StreamCodec::FlushOp::END:
318       return Z_FINISH;
319     default:
320       throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
321   }
322 }
323 
zlibThrowOnError(int rc)324 int zlibThrowOnError(int rc) {
325   switch (rc) {
326     case Z_OK:
327     case Z_BUF_ERROR:
328     case Z_STREAM_END:
329       return rc;
330     default:
331       throw std::runtime_error(to<std::string>("ZlibStreamCodec: error: ", rc));
332   }
333 }
334 
doCompressStream(ByteRange & input,MutableByteRange & output,StreamCodec::FlushOp flush)335 bool ZlibStreamCodec::doCompressStream(
336     ByteRange& input, MutableByteRange& output, StreamCodec::FlushOp flush) {
337   if (needReset_) {
338     resetDeflateStream();
339     needReset_ = false;
340   }
341   DCHECK(deflateStream_.has_value());
342   // zlib will return Z_STREAM_ERROR if output.data() is null.
343   if (output.data() == nullptr) {
344     return false;
345   }
346   deflateStream_->next_in = const_cast<uint8_t*>(input.data());
347   deflateStream_->avail_in = input.size();
348   deflateStream_->next_out = output.data();
349   deflateStream_->avail_out = output.size();
350   SCOPE_EXIT {
351     input.uncheckedAdvance(input.size() - deflateStream_->avail_in);
352     output.uncheckedAdvance(output.size() - deflateStream_->avail_out);
353   };
354   int const rc = zlibThrowOnError(
355       deflate(deflateStream_.get_pointer(), zlibTranslateFlush(flush)));
356   switch (flush) {
357     case StreamCodec::FlushOp::NONE:
358       return false;
359     case StreamCodec::FlushOp::FLUSH:
360       return deflateStream_->avail_in == 0 && deflateStream_->avail_out != 0;
361     case StreamCodec::FlushOp::END:
362       return rc == Z_STREAM_END;
363     default:
364       throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
365   }
366 }
367 
doUncompressStream(ByteRange & input,MutableByteRange & output,StreamCodec::FlushOp flush)368 bool ZlibStreamCodec::doUncompressStream(
369     ByteRange& input, MutableByteRange& output, StreamCodec::FlushOp flush) {
370   if (needReset_) {
371     resetInflateStream();
372     needReset_ = false;
373   }
374   DCHECK(inflateStream_.has_value());
375   // zlib will return Z_STREAM_ERROR if output.data() is null.
376   if (output.data() == nullptr) {
377     return false;
378   }
379   inflateStream_->next_in = const_cast<uint8_t*>(input.data());
380   inflateStream_->avail_in = input.size();
381   inflateStream_->next_out = output.data();
382   inflateStream_->avail_out = output.size();
383   SCOPE_EXIT {
384     input.advance(input.size() - inflateStream_->avail_in);
385     output.advance(output.size() - inflateStream_->avail_out);
386   };
387   int const rc = zlibThrowOnError(
388       inflate(inflateStream_.get_pointer(), zlibTranslateFlush(flush)));
389   return rc == Z_STREAM_END;
390 }
391 
392 } // namespace
393 
defaultGzipOptions()394 Options defaultGzipOptions() {
395   return Options(Options::Format::GZIP);
396 }
397 
defaultZlibOptions()398 Options defaultZlibOptions() {
399   return Options(Options::Format::ZLIB);
400 }
401 
getCodec(Options options,int level)402 std::unique_ptr<Codec> getCodec(Options options, int level) {
403   return ZlibStreamCodec::createCodec(options, level);
404 }
405 
getStreamCodec(Options options,int level)406 std::unique_ptr<StreamCodec> getStreamCodec(Options options, int level) {
407   return ZlibStreamCodec::createStream(options, level);
408 }
409 
410 } // namespace zlib
411 } // namespace io
412 } // namespace folly
413 
414 #endif // FOLLY_HAVE_LIBZ
415