1 // -*- coding: utf-8 -*- 2 // 3 // zlibstream.hxx --- IOStreams classes for working with RFC 1950 and RFC 1952 4 // compression formats (respectively known as the zlib and 5 // gzip formats) 6 // 7 // Copyright (C) 2017 Florent Rougon 8 // 9 // This library is free software; you can redistribute it and/or 10 // modify it under the terms of the GNU Library General Public 11 // License as published by the Free Software Foundation; either 12 // version 2 of the License, or (at your option) any later version. 13 // 14 // This library is distributed in the hope that it will be useful, 15 // but WITHOUT ANY WARRANTY; without even the implied warranty of 16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 // Library General Public License for more details. 18 // 19 // You should have received a copy of the GNU Library General Public 20 // License along with this library; if not, write to the Free Software 21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 22 // MA 02110-1301 USA. 23 24 #ifndef SG_ZLIBSTREAM_HXX 25 #define SG_ZLIBSTREAM_HXX 26 27 #include <ios> // std::streamsize 28 #include <istream> 29 #include <streambuf> 30 #include <memory> // std::unique_ptr 31 #include <zlib.h> // struct z_stream 32 33 #include <simgear/misc/sg_path.hxx> 34 35 // This file contains: 36 // 37 // - two stream buffer classes (ZlibCompressorIStreambuf and 38 // ZlibDecompressorIStreambuf), both based on the same abstract class: 39 // ZlibAbstractIStreambuf; 40 // 41 // - two std::istream subclasses (ZlibCompressorIStream and 42 // ZlibDecompressorIStream), each creating and using the corresponding 43 // stream buffer class from the previous item. 44 // 45 // All these allow one to work with RFC 1950 and RFC 1952 compression 46 // formats, respectively known as the zlib and gzip formats. 47 // 48 // These classes are *input* streaming classes, which means they can 49 // efficiently handle arbitrary amounts of data without using any disk 50 // space nor increasing amounts of memory, and allow “client code” to pull 51 // exactly as much data as it wants at any given time, resuming later 52 // when it is ready to handle the next chunk. 53 // 54 // So, for example, assuming you've created an instance of 55 // ZlibCompressorIStream (bound to some input stream of your choice, let's 56 // call it iStream), you could read 512 bytes of data from it, and you 57 // would get the first 512 bytes of *compressed* data corresponding to what 58 // iStream provided. Then you could resume at any time and ask for the next 59 // 512 bytes of compressed data (or any other amount), etc. 60 // 61 // Therefore, these classes are well suited, among others, to compress or 62 // decompress data streams while at the same time packing the result into 63 // discrete chunks or packets with size constraints (you can think of the 64 // process as making sausages :). 65 // 66 // The input being in each case an std::istream (for compressing as well as 67 // for decompressing), it can be tied to an arbitrary source: a file with 68 // sg_ifstream or std::ifstream, a memory buffer with std::istringstream or 69 // std::stringstream, a TCP socket with a custom std::streambuf subclass[1] 70 // to interface with the sockets API, etc. 71 // 72 // [1] Possibly wrapped in an std::istream. 73 // 74 // The stream buffer classes upon which ZlibCompressorIStream and 75 // ZlibDecompressorIStream are built have an xsgetn() implementation that 76 // avoids useless copies of data by asking zlib to write directly to the 77 // destination buffer. This xsgetn() method is used when calling read() on 78 // the std::istream subclasses, or sgetn() if you are using the stream 79 // buffer classes directly (i.e., ZlibCompressorIStreambuf and 80 // ZlibDecompressorIStreambuf). Other std::istream methods may instead rely 81 // only on the internal buffer and the underflow() method, and therefore be 82 // less efficient for large amounts of data. You may want to take a look at 83 // zlibstream_test.cxx to see various ways of using these classes. 84 // 85 // In case you use std::istream& operator>>(std::istream&, std::string&) or 86 // its overload friends, beware that it splits fields at spaces, and by 87 // default ignores spaces at the beginning of a field (cf. std::skipws, 88 // std::noskipws and friends). As far as I understand it, most of these 89 // operators are mainly intended in the IOStreams library to be used to 90 // read an int here, a double there, a space-delimited string afterwards, 91 // etc. (the exception could be the overload writing to a stream buffer, 92 // however it doesn't seem to be very efficient on my system with GNU 93 // libstdc++ [it is not using xsgetn()], so beware also of this one if you 94 // are handling large amounts of data). For moderately complex or large 95 // input handling, I'd suggest to use std::istream methods such as read(), 96 // gcount() and getline() (std::getline() can be useful too). Or directly 97 // use the stream buffer classes, in particular with sgetn(). 98 99 100 namespace simgear 101 { 102 103 enum class ZLibCompressionFormat { 104 ZLIB = 0, 105 GZIP, 106 AUTODETECT 107 }; 108 109 enum class ZLibMemoryStrategy { 110 FAVOR_MEMORY_OVER_SPEED = 0, 111 FAVOR_SPEED_OVER_MEMORY 112 }; 113 114 // Abstract base class for both the compressor and decompressor stream buffers. 115 class ZlibAbstractIStreambuf: public std::streambuf 116 { 117 public: 118 /** 119 * @brief Constructor for ZlibAbstractIStreambuf. 120 * @param iStream Input stream to read from. 121 * @param path Optional path to the file corresponding to iStream, 122 * if any. Only used for error messages. 123 * @param inBuf Pointer to the input buffer (data read from iStream is 124 * written there before being compressed or decompressed). 125 * If nullptr, the buffer is allocated on the heap in the 126 * constructor and deallocated in the destructor. 127 * @param inBufSize Size of the input buffer, in chars. 128 * @param outBuf Pointer to the output buffer. Data is read by zlib 129 * from the input buffer, compressed or decompressed, and 130 * the result is directly written to the output buffer. 131 * If nullptr, the buffer is allocated on the heap in the 132 * constructor and deallocated in the destructor. 133 * @param outBufSize Size of the output buffer, in chars. 134 * @param putbackSize Size of the putback area inside the output buffer, in 135 * chars. 136 * 137 * It is required that putbackSize < outBufSize. It is guaranteed that, 138 * if at least putbackSize chars have been read without any putback (or 139 * unget) operation intermixed, then at least putbackSize chars can be 140 * put back in sequence. If you don't need this feature, use zero for the 141 * putbackSize value (the default) for best performance. 142 */ 143 explicit ZlibAbstractIStreambuf(std::istream& iStream, 144 const SGPath& path = SGPath(), 145 char* inBuf = nullptr, 146 std::size_t inBufSize = 262144, 147 char* outBuf = nullptr, 148 std::size_t outBufSize = 262144, 149 std::size_t putbackSize = 0); 150 151 // Alternate constructor with sink semantics for the “source” std::istream. 152 // When used, the class takes ownership of the std::istream instance pointed 153 // to by the first constructor argument, and keeps it alive as long as the 154 // object this constructor is for is itself alive. 155 explicit ZlibAbstractIStreambuf(std::unique_ptr<std::istream> iStream_p, 156 const SGPath& path = SGPath(), 157 char* inBuf = nullptr, 158 std::size_t inBufSize = 262144, 159 char* outBuf = nullptr, 160 std::size_t outBufSize = 262144, 161 std::size_t putbackSize = 0); 162 163 ZlibAbstractIStreambuf(const ZlibAbstractIStreambuf&) = delete; 164 ZlibAbstractIStreambuf& operator=(const ZlibAbstractIStreambuf&) = delete; 165 virtual ~ZlibAbstractIStreambuf(); 166 167 protected: 168 enum class OperationType { 169 COMPRESSION = 0, 170 DECOMPRESSION 171 }; 172 173 virtual OperationType operationType() const = 0; 174 175 // Either compress or decompress a chunk of data (depending on the 176 // particular subclass implementation). The semantics are the same as for 177 // zlib's inflate() and deflate() functions applied to member _zstream, 178 // concerning 1) the return value and 2) where, and how much to read and 179 // write (which thus depends on _zstream.avail_in, _zstream.next_in, 180 // _zstream.avail_out and _zstream.next_out). 181 virtual int zlibProcessData() = 0; 182 183 // The input stream, from which data is read before being processed by zlib 184 std::istream& _iStream; 185 // Pointer to the same, used when calling the constructor that takes an 186 // std::unique_ptr<std::istream> as its first argument; empty 187 // std::unique_ptr object otherwise. 188 std::unique_ptr<std::istream> _iStream_p; 189 190 // Corresponding path, if any (default-constructed SGPath instance otherwise) 191 const SGPath _path; 192 // Structure used to communicate with zlib 193 z_stream _zstream; 194 195 private: 196 // Callback whose role is to refill the output buffer when it's empty and 197 // the “client” tries to read more. 198 virtual int underflow() override; 199 // Optional override when subclassing std::streambuf. This is the most 200 // efficient way of reading several characters (as soon as we've emptied the 201 // output buffer, data is written by zlib directly to the destination 202 // buffer). 203 virtual std::streamsize xsgetn(char* dest, std::streamsize n) override; 204 // Utility method for xsgetn() 205 std::size_t xsgetn_preparePutbackArea(char* origGptr, char* dest, 206 char* writePtr); 207 // Make sure there is data to read in the input buffer, or signal EOF. 208 bool getInputData(); 209 // Utility method for fillOutputBuffer() 210 std::size_t fOB_remainingSpace(unsigned char* nextOutPtr) const; 211 // Fill the output buffer (using zlib functions) as much as possible. 212 char* fillOutputBuffer(); 213 // Utility method 214 [[ noreturn ]] void handleZ_BUF_ERROR() const; 215 216 bool _allFinished = false; 217 218 // The buffers 219 // ~~~~~~~~~~~ 220 // 221 // The input buffer receives data obtained from _iStream, before it is 222 // processed by zlib. In underflow(), zlib reads from this buffer it and 223 // writes the resulting data(*) to the output buffer. Then we point the 224 // standard std::streambuf pointers (gptr() and friends) directly towards 225 // the data inside that output buffer. xsgetn() is even more optimized: it 226 // first empties the output buffer, then makes zlib write the remaining data 227 // directly to the destination area. 228 // 229 // (*) Compressed or decompressed, depending on the particular 230 // implementation of zlibProcessData() in each subclass. 231 char* _inBuf; 232 const std::size_t _inBufSize; 233 // _inBufEndPtr points right after the last data retrieved from _iStream and 234 // stored into _inBuf. When zlib has read all such data, _zstream.next_in is 235 // equal to _inBufEndPtr (after proper casting). Except in this particular 236 // situation, only _zstream.next_in <= _inBufEndPtr is guaranteed. 237 char* _inBufEndPtr; 238 // Layout of the _outBuf buffer: 239 // 240 // |_outBuf <putback area> |_outBuf + _putbackSize |_outBuf + _outBufSize 241 // 242 // The first _putbackSize chars in _outBuf are reserved for the putback area 243 // (right-aligned at _outBuf + _putbackSize). The actual output buffer thus 244 // starts at _outBuf + _putbackSize. At any given time for callers of this 245 // class, the number of characters that can be put back is gptr() - eback(). 246 // It may be lower than _putbackSize if we haven't read that many characters 247 // yet. It may also be larger if gptr() > _outBuf + _putbackSize, i.e., 248 // when the buffer for pending data is non-empty. 249 // 250 // At any given time, callers should see: 251 // 252 // _outBuf <= eback() <= _outBuf + _putbackSize <= gptr() <= egptr() 253 // <= _outBuf + _outBufSize 254 // 255 // (hoping this won't get out of sync with the code!) 256 char *_outBuf; 257 const std::size_t _outBufSize; 258 // Space reserved for characters to be put back into the stream. Must be 259 // strictly smaller than _outBufSize (this is checked in the constructor). 260 // It is guaranteed that this number of chars can be put back, except of 261 // course if we haven't read that many characters from the input stream yet. 262 // If characters are buffered in _outBuf[2], then it may be that more 263 // characters than _putbackSize can be put back (it is essentially a matter 264 // for std::streambuf of decreasing the “next pointer for the input 265 // sequence”, i.e., the one returned by gptr()). 266 // 267 // [2] In the [_outBuf + _putbackSize, _outBuf + _outBufSize) area. 268 const std::size_t _putbackSize; 269 270 // Since the constructor optionally allocates memory for the input and 271 // output buffers, these members allow the destructor to know which buffers 272 // have to be deallocated, if any. 273 bool _inBufMustBeFreed = false; 274 bool _outBufMustBeFreed = false; 275 }; 276 277 278 // Stream buffer class for compressing data. Input data is obtained from an 279 // std::istream instance; the corresponding compressed data can be read using 280 // the standard std::streambuf read interface (mainly: sbumpc(), sgetc(), 281 // snextc(), sgetn(), sputbackc(), sungetc()). Input, uncompressed data is 282 // “pulled” as needed for the amount of compressed data requested by the 283 // “client” using the methods I just listed. 284 class ZlibCompressorIStreambuf: public ZlibAbstractIStreambuf 285 { 286 public: 287 // Same parameters as for ZlibAbstractIStreambuf, except: 288 // 289 // compressionLevel: in the [0,9] range. 0 means no compression at all. 290 // Levels 1 to 9 yield compressed data, with 1 giving 291 // the highest compression speed but worst compression 292 // ratio, and 9 the highest compression ratio but lowest 293 // compression speed. 294 // format either ZLibCompressionFormat::ZLIB or 295 // ZLibCompressionFormat::GZIP 296 // memStrategy either ZLibMemoryStrategy::FAVOR_MEMORY_OVER_SPEED or 297 // ZLibMemoryStrategy::FAVOR_SPEED_OVER_MEMORY 298 explicit ZlibCompressorIStreambuf( 299 std::istream& iStream, 300 const SGPath& path = SGPath(), 301 int compressionLevel = Z_DEFAULT_COMPRESSION, 302 ZLibCompressionFormat format = ZLibCompressionFormat::ZLIB, 303 ZLibMemoryStrategy memStrategy = ZLibMemoryStrategy::FAVOR_SPEED_OVER_MEMORY, 304 char* inBuf = nullptr, 305 std::size_t inBufSize = 262144, 306 char* outBuf = nullptr, 307 std::size_t outBufSize = 262144, 308 std::size_t putbackSize = 0); 309 310 // Alternate constructor with sink semantics for the “source” std::istream. 311 explicit ZlibCompressorIStreambuf( 312 std::unique_ptr<std::istream> _iStream_p, 313 const SGPath& path = SGPath(), 314 int compressionLevel = Z_DEFAULT_COMPRESSION, 315 ZLibCompressionFormat format = ZLibCompressionFormat::ZLIB, 316 ZLibMemoryStrategy memStrategy = ZLibMemoryStrategy::FAVOR_SPEED_OVER_MEMORY, 317 char* inBuf = nullptr, 318 std::size_t inBufSize = 262144, 319 char* outBuf = nullptr, 320 std::size_t outBufSize = 262144, 321 std::size_t putbackSize = 0); 322 323 ZlibCompressorIStreambuf(const ZlibCompressorIStreambuf&) = delete; 324 ZlibCompressorIStreambuf& operator=(const ZlibCompressorIStreambuf&) = delete; 325 virtual ~ZlibCompressorIStreambuf(); 326 327 protected: 328 virtual OperationType operationType() const override; 329 // Initialize the z_stream struct used by zlib 330 void zStreamInit(int compressionLevel, ZLibCompressionFormat format, 331 ZLibMemoryStrategy memStrategy); 332 // Call zlib's deflate() function to compress data. 333 virtual int zlibProcessData() override; 334 }; 335 336 337 // Stream buffer class for decompressing data. Input data is obtained from an 338 // std::istream instance; the corresponding decompressed data can be read 339 // using the standard std::streambuf read interface (mainly: sbumpc(), 340 // sgetc(), snextc(), sgetn(), sputbackc(), sungetc()). Input, compressed data 341 // is “pulled” as needed for the amount of uncompressed data requested by the 342 // “client” using the methods I just listed. 343 class ZlibDecompressorIStreambuf: public ZlibAbstractIStreambuf 344 { 345 public: 346 // Same parameters as for ZlibAbstractIStreambuf, except: 347 // 348 // format ZLibCompressionFormat::ZLIB, 349 // ZLibCompressionFormat::GZIP or 350 // ZLibCompressionFormat::AUTODETECT 351 explicit ZlibDecompressorIStreambuf( 352 std::istream& iStream, 353 const SGPath& path = SGPath(), 354 ZLibCompressionFormat format = ZLibCompressionFormat::ZLIB, 355 char* inBuf = nullptr, 356 std::size_t inBufSize = 262144, 357 char* outBuf = nullptr, 358 std::size_t outBufSize = 262144, 359 std::size_t putbackSize = 0); // default optimized for speed 360 361 // Alternate constructor with sink semantics for the “source” std::istream. 362 explicit ZlibDecompressorIStreambuf( 363 std::unique_ptr<std::istream> _iStream_p, 364 const SGPath& path = SGPath(), 365 ZLibCompressionFormat format = ZLibCompressionFormat::ZLIB, 366 char* inBuf = nullptr, 367 std::size_t inBufSize = 262144, 368 char* outBuf = nullptr, 369 std::size_t outBufSize = 262144, 370 std::size_t putbackSize = 0); // default optimized for speed 371 372 ZlibDecompressorIStreambuf(const ZlibDecompressorIStreambuf&) = delete; 373 ZlibDecompressorIStreambuf& operator=(const ZlibDecompressorIStreambuf&) 374 = delete; 375 virtual ~ZlibDecompressorIStreambuf(); 376 377 protected: 378 virtual OperationType operationType() const override; 379 void zStreamInit(ZLibCompressionFormat format); 380 virtual int zlibProcessData() override; 381 }; 382 383 // std::istream subclass for compressing data. Input data is obtained from an 384 // std::istream instance; the corresponding compressed data can be read using 385 // the standard std::istream interface (read(), readsome(), gcount(), get(), 386 // getline(), operator>>(), peek(), putback(), ignore(), unget()... plus 387 // operator overloads such as istream& operator>>(istream&, string&) as 388 // defined in <string>, and std::getline()). Input, uncompressed data is 389 // “pulled” as needed for the amount of compressed data requested by the 390 // “client”. 391 // 392 // To get data efficiently from an instance of this class, use its read() 393 // method (typically in conjunction with gcount(), inside a loop). 394 class ZlibCompressorIStream: public std::istream 395 { 396 public: 397 // Same parameters as for ZlibCompressorIStreambuf 398 explicit ZlibCompressorIStream( 399 std::istream& iStream, 400 const SGPath& path = SGPath(), 401 int compressionLevel = Z_DEFAULT_COMPRESSION, 402 ZLibCompressionFormat format = ZLibCompressionFormat::ZLIB, 403 ZLibMemoryStrategy memStrategy = ZLibMemoryStrategy::FAVOR_SPEED_OVER_MEMORY, 404 char* inBuf = nullptr, 405 std::size_t inBufSize = 262144, 406 char* outBuf = nullptr, 407 std::size_t outBufSize = 262144, 408 std::size_t putbackSize = 0); // default optimized for speed 409 410 // Alternate constructor with sink semantics for the “source” std::istream. 411 explicit ZlibCompressorIStream( 412 std::unique_ptr<std::istream> _iStream_p, 413 const SGPath& path = SGPath(), 414 int compressionLevel = Z_DEFAULT_COMPRESSION, 415 ZLibCompressionFormat format = ZLibCompressionFormat::ZLIB, 416 ZLibMemoryStrategy memStrategy = ZLibMemoryStrategy::FAVOR_SPEED_OVER_MEMORY, 417 char* inBuf = nullptr, 418 std::size_t inBufSize = 262144, 419 char* outBuf = nullptr, 420 std::size_t outBufSize = 262144, 421 std::size_t putbackSize = 0); // default optimized for speed 422 423 ZlibCompressorIStream(const ZlibCompressorIStream&) = delete; 424 ZlibCompressorIStream& operator=(const ZlibCompressorIStream&) = delete; 425 virtual ~ZlibCompressorIStream(); 426 427 private: 428 ZlibCompressorIStreambuf _streamBuf; 429 }; 430 431 // std::istream subclass for decompressing data. Input data is obtained from 432 // an std::istream instance; the corresponding decompressed data can be read 433 // using the standard std::istream interface (read(), readsome(), gcount(), 434 // get(), getline(), operator>>(), peek(), putback(), ignore(), unget()... 435 // plus operator overloads such as istream& operator>>(istream&, string&) as 436 // defined in <string>, and std::getline()). Input, compressed data is 437 // “pulled” as needed for the amount of uncompressed data requested by the 438 // “client”. 439 // 440 // To get data efficiently from an instance of this class, use its read() 441 // method (typically in conjunction with gcount(), inside a loop). 442 class ZlibDecompressorIStream: public std::istream 443 { 444 public: 445 // Same parameters as for ZlibDecompressorIStreambuf 446 explicit ZlibDecompressorIStream( 447 std::istream& iStream, 448 const SGPath& path = SGPath(), 449 ZLibCompressionFormat format = ZLibCompressionFormat::ZLIB, 450 char* inBuf = nullptr, 451 std::size_t inBufSize = 262144, 452 char* outBuf = nullptr, 453 std::size_t outBufSize = 262144, 454 std::size_t putbackSize = 0); // default optimized for speed 455 456 // Alternate constructor with sink semantics for the “source” std::istream. 457 explicit ZlibDecompressorIStream( 458 std::unique_ptr<std::istream> _iStream_p, 459 const SGPath& path = SGPath(), 460 ZLibCompressionFormat format = ZLibCompressionFormat::ZLIB, 461 char* inBuf = nullptr, 462 std::size_t inBufSize = 262144, 463 char* outBuf = nullptr, 464 std::size_t outBufSize = 262144, 465 std::size_t putbackSize = 0); // default optimized for speed 466 467 ZlibDecompressorIStream(const ZlibDecompressorIStream&) = delete; 468 ZlibDecompressorIStream& operator=(const ZlibDecompressorIStream&) = delete; 469 virtual ~ZlibDecompressorIStream(); 470 471 private: 472 ZlibDecompressorIStreambuf _streamBuf; 473 }; 474 475 } // of namespace simgear 476 477 #endif // of SG_ZLIBSTREAM_HXX 478