1 #ifndef UTIL_COMPRESS__COMPRESS__HPP
2 #define UTIL_COMPRESS__COMPRESS__HPP
3 
4 /*  $Id: compress.hpp 612744 2020-07-27 14:26:09Z ivanov $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author:  Vladimir Ivanov
30  *
31  * File Description:  The Compression API
32  *
33  */
34 
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/version_api.hpp>
37 
38 
39 /** @addtogroup Compression
40  *
41  * @{
42  */
43 
44 
45 BEGIN_NCBI_SCOPE
46 
47 
48 /// Default compression I/O stream buffer size.
49 const streamsize kCompressionDefaultBufSize = 16*1024;
50 
51 /// Macro to report errors in compression API.
52 #define ERR_COMPRESS(subcode, message) ERR_POST_X(subcode, Warning << message)
53 
54 /// Macro to catch and handle exceptions (from streams in the destructor)
55 #define COMPRESS_HANDLE_EXCEPTIONS(subcode, message)                  \
56     catch (CException& e) {                                           \
57         try {                                                         \
58             NCBI_REPORT_EXCEPTION_X(subcode, message, e);             \
59         } catch (...) {                                               \
60         }                                                             \
61     }                                                                 \
62     catch (exception& e) {                                            \
63         try {                                                         \
64             ERR_POST_X(subcode, Error                                 \
65                        << "[" << message                              \
66                        << "] Exception: " << e.what());               \
67         } catch (...) {                                               \
68         }                                                             \
69     }                                                                 \
70     catch (...) {                                                     \
71         try {                                                         \
72             ERR_POST_X(subcode, Error                                 \
73                        << "[" << message << "] Unknown exception");   \
74         } catch (...) {                                               \
75         }                                                             \
76     }                                                                 \
77 
78 
79 // Forward declaration
80 class CCompressionFile;
81 class CCompressionStreambuf;
82 
83 
84 //////////////////////////////////////////////////////////////////////////////
85 //
86 // ICompression -- abstract interface class
87 //
88 
89 class NCBI_XUTIL_EXPORT ICompression
90 {
91 public:
92     /// Compression level.
93     ///
94     /// It is in range [0..9]. Increase of level might mean better compression
95     /// and usualy greater time of compression. Usualy 1 gives best speed,
96     /// 9 gives best compression, 0 gives no compression at all.
97     /// eDefault value requests a compromise between speed and compression
98     /// (according to developers of the corresponding compression algorithm).
99     enum ELevel {
100         eLevel_Default       = -1,  // default
101         eLevel_NoCompression =  0,  // just store data
102         eLevel_Lowest        =  1,
103         eLevel_VeryLow       =  2,
104         eLevel_Low           =  3,
105         eLevel_MediumLow     =  4,
106         eLevel_Medium        =  5,
107         eLevel_MediumHigh    =  6,
108         eLevel_High          =  7,
109         eLevel_VeryHigh      =  8,
110         eLevel_Best          =  9
111     };
112 
113     /// Compression flags. The flag selection depends from compression
114     /// algorithm implementation. For examples see the flags defined
115     /// in the derived classes:  CBZip2Compression::EFlags,
116     /// CLZOCompression::EFlags, CZipCompression::EFlags, etc.
117     typedef unsigned int TFlags;    ///< Bitwise OR of CXxxCompression::EFlags
118 
119 public:
120     /// Destructor
~ICompression(void)121     virtual ~ICompression(void) {}
122 
123     /// Return name and version of the compression library.
124     virtual CVersionInfo GetVersion(void) const = 0;
125 
126     // Get/set compression level.
127     virtual void   SetLevel(ELevel level) = 0;
128     virtual ELevel GetLevel(void) const = 0;
129 
130     /// Return the default compression level for current compression algorithm.
131     virtual ELevel GetDefaultLevel(void) const = 0;
132 
133     // Get compressor's internal status/error code and description
134     // for the last operation.
135     virtual int    GetErrorCode(void) const = 0;
136     virtual string GetErrorDescription(void) const = 0;
137 
138     // Get/set flags
139     virtual TFlags GetFlags(void) const = 0;
140     virtual void   SetFlags(TFlags flags) = 0;
141 
142     //
143     // Utility functions
144     //
145 
146     // (De)compress the source buffer into the destination buffer.
147     // Return TRUE on success, FALSE on error.
148     // The compressor error code can be acquired via GetErrorCode() call.
149     // Notice that altogether the total size of the destination buffer must
150     // be little more then size of the source buffer.
151     virtual bool CompressBuffer(
152         const void* src_buf, size_t  src_len,
153         void*       dst_buf, size_t  dst_size,
154         /* out */            size_t* dst_len
155     ) = 0;
156 
157     virtual bool DecompressBuffer(
158         const void* src_buf, size_t  src_len,
159         void*       dst_buf, size_t  dst_size,
160         /* out */            size_t* dst_len
161     ) = 0;
162 
163     // (De)compress file "src_file" and put result to file "dst_file".
164     // Return TRUE on success, FALSE on error.
165     virtual bool CompressFile(
166         const string&     src_file,
167         const string&     dst_file,
168         size_t            buf_size = kCompressionDefaultBufSize
169     ) = 0;
170     virtual bool DecompressFile(
171         const string&     src_file,
172         const string&     dst_file,
173         size_t            buf_size = kCompressionDefaultBufSize
174     ) = 0;
175 };
176 
177 
178 //////////////////////////////////////////////////////////////////////////////
179 //
180 // CCompression -- abstract base class
181 //
182 
183 class NCBI_XUTIL_EXPORT CCompression : public ICompression
184 {
185 public:
186     // 'ctors
187     CCompression(ELevel level = eLevel_Default);
188     virtual ~CCompression(void);
189 
190     /// Return name and version of the compression library.
191     virtual CVersionInfo GetVersion(void) const = 0;
192 
193     // Get/set compression level.
194     // NOTE 1:  Changing compression level after compression has begun will
195     //          be ignored.
196     // NOTE 2:  If the level is not supported by the underlying algorithm,
197     //          then it will be translated to the nearest supported value.
198     virtual void   SetLevel(ELevel level);
199     virtual ELevel GetLevel(void) const;
200 
201     // Get compressor's internal status/error code and description
202     // for the last operation.
203     virtual int    GetErrorCode(void) const;
204     virtual string GetErrorDescription(void) const;
205 
206     /// Get flags.
207     virtual TFlags GetFlags(void) const;
208     /// Set flags.
209     virtual void   SetFlags(TFlags flags);
210 
211 protected:
212     // Universal file compression/decompression functions.
213     // Return TRUE on success, FALSE on error.
214     virtual bool x_CompressFile(
215         const string&     src_file,
216         CCompressionFile& dst_file,
217         size_t            buf_size = kCompressionDefaultBufSize
218     );
219     virtual bool x_DecompressFile(
220         CCompressionFile& src_file,
221         const string&     dst_file,
222         size_t            buf_size = kCompressionDefaultBufSize
223     );
224 
225     // Set last action error/status code and description
226     void SetError(int status, const char* description = 0);
227     void SetError(int status, const string& description);
228 
229 protected:
230     /// Decompression mode (see fAllowTransparentRead flag).
231     enum EDecompressMode {
232         eMode_Unknown,         ///< Not known yet (decompress/transparent read)
233         eMode_Decompress,      ///< Generic decompression
234         eMode_TransparentRead  ///< Transparent read, the data is uncompressed
235     };
236     ///< Decompress mode (Decompress/TransparentRead/Unknown).
237     EDecompressMode m_DecompressMode;
238 
239 private:
240     ELevel  m_Level;      // Compression level
241     int     m_ErrorCode;  // Last compressor action error/status
242     string  m_ErrorMsg;   // Last compressor action error message
243     TFlags  m_Flags;      // Bitwise OR of flags
244 
245     // Friend classes
246     friend class CCompressionStreambuf;
247 };
248 
249 
250 
251 //////////////////////////////////////////////////////////////////////////////
252 //
253 // CCompressionFile -- abstract base class
254 //
255 
256 // Class for support work with compressed files.
257 // Assumed that file on hard disk is always compressed and data in memory
258 // is uncompressed.
259 //
260 
261 class NCBI_XUTIL_EXPORT CCompressionFile
262 {
263 public:
264     /// Compression file handler
265     typedef void* TFile;
266 
267     /// File open mode
268     enum EMode {
269         eMode_Read,         ///< Reading from compressed file
270         eMode_Write         ///< Writing compressed data to file
271     };
272 
273     // 'ctors
274     CCompressionFile(void);
275     CCompressionFile(const string& path, EMode mode);
276     virtual ~CCompressionFile(void);
277 
278     /// Opens a compressed file for reading or writing.
279     /// Return NULL if error has been occurred.
280     virtual bool Open(const string& path, EMode mode) = 0;
281 
282     /// Read up to "len" uncompressed bytes from the compressed file "file"
283     /// into the buffer "buf". Return the number of bytes actually read
284     /// (0 for end of file, -1 for error)
285     virtual long Read(void* buf, size_t len) = 0;
286 
287     /// Writes the given number of uncompressed bytes into the compressed file.
288     /// Return the number of bytes actually written or -1 for error.
289     /// Returned value can be less than "len", especially if it exceed
290     /// numeric_limits<long>::max(), you should repeat writing for remaining portion.
291     virtual long Write(const void* buf, size_t len) = 0;
292 
293     /// Flushes all pending output if necessary, closes the compressed file.
294     /// Return TRUE on success, FALSE on error.
295     virtual bool Close(void) = 0;
296 
297 protected:
298     TFile  m_File;   ///< File handler.
299     EMode  m_Mode;   ///< File open mode.
300 };
301 
302 
303 
304 //////////////////////////////////////////////////////////////////////////////
305 //
306 // CCompressionProcessor -- abstract base class
307 //
308 // Contains a functions for service a compression/decompression session.
309 //
310 
311 class NCBI_XUTIL_EXPORT CCompressionProcessor
312 {
313 public:
314     /// Type of the result of all basic functions
315     enum EStatus {
316         /// Everything is fine, no errors occurred
317         eStatus_Success,
318         /// Special case of eStatus_Success.
319         /// Logical end of (compressed) stream is detected, no errors occurred.
320         /// All subsequent inquiries about data processing should be ignored.
321         eStatus_EndOfData,
322         /// Error has occurred. The error code can be acquired by GetErrorCode().
323         eStatus_Error,
324         /// Output buffer overflow - not enough output space.
325         /// Buffer must be emptied and the last action repeated.
326         eStatus_Overflow,
327         /// Special value. Just need to repeat last action.
328         eStatus_Repeat,
329         /// Special value. Status is undefined.
330         eStatus_Unknown
331     };
332 
333     // 'ctors
334     CCompressionProcessor(void);
335     virtual ~CCompressionProcessor(void);
336 
337     /// Return compressor's busy flag. If returns value is true that
338     /// the current compression object already have being use in other
339     /// compression session.
340     bool IsBusy(void) const;
341 
342     /// Return TRUE if fAllowEmptyData flag is set for this compression.
343     /// @note
344     ///   Used by stream buffer, that don't have access to specific
345     ///   compression implementation flags. So this method should be
346     ///   implemented in each processor.
347     virtual bool AllowEmptyData() const = 0;
348 
349     // Return number of processed/output bytes.
350     size_t GetProcessedSize(void);
351     size_t GetOutputSize(void);
352 
353 protected:
354     /// Initialize the internal stream state for compression/decompression.
355     /// It does not perform any compression, this will be done by Process().
356     virtual EStatus Init(void) = 0;
357 
358     /// Compress/decompress as much data as possible, and stops when the input
359     /// buffer becomes empty or the output buffer becomes full. It may
360     /// introduce some output latency (reading input without producing any
361     /// output).
362     virtual EStatus Process
363     (const char* in_buf,      // [in]  input buffer
364      size_t      in_len,      // [in]  input data length
365      char*       out_buf,     // [in]  output buffer
366      size_t      out_size,    // [in]  output buffer size
367      size_t*     in_avail,    // [out] count unproc.bytes in input buffer
368      size_t*     out_avail    // [out] count bytes putted into out buffer
369      ) = 0;
370 
371     /// Flush compressed/decompressed data from the output buffer.
372     /// Flushing may degrade compression for some compression algorithms
373     /// and so it should be used only when necessary.
374     virtual EStatus Flush
375     (char*       out_buf,     // [in]  output buffer
376      size_t      out_size,    // [in]  output buffer size
377      size_t*     out_avail    // [out] count bytes putted into out buffer
378      ) = 0;
379 
380     /// Finish the compression/decompression process.
381     /// Process pending input, flush pending output.
382     /// This function slightly like to Flush(), but it must be called only
383     /// at the end of compression process just before End().
384     virtual EStatus Finish
385     (char*       out_buf,     // [in]  output buffer
386      size_t      out_size,    // [in]  output buffer size
387      size_t*     out_avail    // [out] count bytes putted into out buffer
388      ) = 0;
389 
390     /// Free all dynamically allocated data structures.
391     /// This function discards any unprocessed input and does not flush
392     /// any pending output.
393     /// @param abandon
394     ///   If this parameter is not zero that skip all error checks,
395     ///   always return eStatus_Success. Use it if Process/Flush/Finish where
396     ///   not called to perform any compression/decompression after Init().
397     virtual EStatus End(int abandon = 0) = 0;
398 
399 protected:
400     /// Reset internal state
401     void Reset(void);
402 
403     /// Set/unset compressor busy flag
404     void SetBusy(bool busy = true);
405 
406     // Increase number of processed/output bytes.
407     void IncreaseProcessedSize(size_t n_bytes);
408     void IncreaseOutputSize(size_t n_bytes);
409 
410 private:
411     size_t  m_ProcessedSize;  //< The number of processed bytes
412     size_t  m_OutputSize;     //< The number of output bytes
413     bool    m_Busy;           //< Is true if compressor is ready to begin next session
414     // Friend classes
415     friend class CCompressionStream;
416     friend class CCompressionStreambuf;
417     friend class CCompressionStreamProcessor;
418 };
419 
420 
421 /////////////////////////////////////////////////////////////////////////////
422 //
423 // CCompressionException
424 //
425 // Exceptions generated by CCompresson and derived classes
426 //
427 
428 class NCBI_XUTIL_EXPORT CCompressionException : public CCoreException
429 {
430 public:
431     enum EErrCode {
432         eCompression,      ///< Compression/decompression error
433         eCompressionFile   ///< Compression/decompression file error
434     };
GetErrCodeString(void) const435     virtual const char* GetErrCodeString(void) const override
436     {
437         switch (GetErrCode()) {
438         case eCompression     : return "eCompression";
439         case eCompressionFile : return "eCompressionFile";
440         default               : return CException::GetErrCodeString();
441         }
442     }
443     NCBI_EXCEPTION_DEFAULT(CCompressionException,CCoreException);
444 };
445 
446 
447 /////////////////////////////////////////////////////////////////////////////
448 //
449 // CCompressionUtil
450 //
451 // Utility functions
452 //
453 
454 class NCBI_XUTIL_EXPORT CCompressionUtil
455 {
456 public:
457     /// Store 4 bytes of value in the buffer.
458     static void StoreUI4(void* buf, unsigned long value);
459 
460     /// Read 4 bytes from buffer.
461     static Uint4 GetUI4(const void* buf);
462 
463     /// Store 2 bytes of value in the buffer.
464     static void StoreUI2(void* buf, unsigned long value);
465 
466     /// Read 2 bytes from buffer.
467     static Uint2 GetUI2(const void* buf);
468 };
469 
470 
471 //////////////////////////////////////////////////////////////////////////////
472 //
473 // IChunkHandler -- abstract interface class
474 //
475 
476 /// Interface class to scan data source for seekable data chunks.
477 ///
478 class NCBI_XUTIL_EXPORT IChunkHandler
479 {
480 public:
481     typedef Uint8 TPosition; ///< Type to store stream positions
482 
483     /// Action types
484     enum EAction {
485         eAction_Continue, ///< Continue scanning to the next data chunk, if any.
486         eAction_Stop      ///< Stop scanning.
487     };
488 
489     /// Destructor.
~IChunkHandler(void)490     virtual ~IChunkHandler(void) {}
491 
492     /// Callback method, to be implemented by the end user.
493     /// @param raw_pos
494     ///   Position of the chunk in the "raw" (undecoded) stream.
495     /// @param data_pos
496     ///   Position of the chunk in the decoded stream data.
497     /// @return
498     ///   Return a command for the scanning algorithm to continue or stop scanning.
499     virtual EAction OnChunk(TPosition raw_pos, TPosition data_pos) = 0;
500 };
501 
502 
503 /* @} */
504 
505 
506 //===========================================================================
507 //
508 //  Inline
509 //
510 //===========================================================================
511 
512 inline
Reset(void)513 void CCompressionProcessor::Reset(void)
514 {
515     m_ProcessedSize  = 0;
516     m_OutputSize     = 0;
517     m_Busy           = false;
518 }
519 
520 inline
IsBusy(void) const521 bool CCompressionProcessor::IsBusy(void) const
522 {
523     return m_Busy;
524 }
525 
526 inline
SetBusy(bool busy)527 void CCompressionProcessor::SetBusy(bool busy)
528 {
529     if ( busy  &&  m_Busy ) {
530         NCBI_THROW(CCompressionException, eCompression,
531                    "CCompression::SetBusy(): The compressor is busy now");
532     }
533     m_Busy = busy;
534 }
535 
536 inline
IncreaseProcessedSize(size_t n_bytes)537 void CCompressionProcessor::IncreaseProcessedSize(size_t n_bytes)
538 {
539     m_ProcessedSize += n_bytes;
540 }
541 
542 inline
IncreaseOutputSize(size_t n_bytes)543 void CCompressionProcessor::IncreaseOutputSize(size_t n_bytes)
544 {
545     m_OutputSize += n_bytes;
546 }
547 
548 inline
GetProcessedSize(void)549 size_t CCompressionProcessor::GetProcessedSize(void)
550 {
551     return m_ProcessedSize;
552 }
553 
554 inline
GetOutputSize(void)555 size_t CCompressionProcessor::GetOutputSize(void)
556 {
557     return m_OutputSize;
558 }
559 
560 
561 END_NCBI_SCOPE
562 
563 
564 #endif  /* UTIL_COMPRESS__COMPRESS__HPP */
565