1 #ifndef UTIL_COMPRESS__COMPRESS__HPP
2 #define UTIL_COMPRESS__COMPRESS__HPP
3
4 /* $Id: compress.hpp 612744 2020-07-27 14:26:09Z ivanov $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Vladimir Ivanov
30 *
31 * File Description: The Compression API
32 *
33 */
34
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/version_api.hpp>
37
38
39 /** @addtogroup Compression
40 *
41 * @{
42 */
43
44
45 BEGIN_NCBI_SCOPE
46
47
48 /// Default compression I/O stream buffer size.
49 const streamsize kCompressionDefaultBufSize = 16*1024;
50
51 /// Macro to report errors in compression API.
52 #define ERR_COMPRESS(subcode, message) ERR_POST_X(subcode, Warning << message)
53
54 /// Macro to catch and handle exceptions (from streams in the destructor)
55 #define COMPRESS_HANDLE_EXCEPTIONS(subcode, message) \
56 catch (CException& e) { \
57 try { \
58 NCBI_REPORT_EXCEPTION_X(subcode, message, e); \
59 } catch (...) { \
60 } \
61 } \
62 catch (exception& e) { \
63 try { \
64 ERR_POST_X(subcode, Error \
65 << "[" << message \
66 << "] Exception: " << e.what()); \
67 } catch (...) { \
68 } \
69 } \
70 catch (...) { \
71 try { \
72 ERR_POST_X(subcode, Error \
73 << "[" << message << "] Unknown exception"); \
74 } catch (...) { \
75 } \
76 } \
77
78
79 // Forward declaration
80 class CCompressionFile;
81 class CCompressionStreambuf;
82
83
84 //////////////////////////////////////////////////////////////////////////////
85 //
86 // ICompression -- abstract interface class
87 //
88
89 class NCBI_XUTIL_EXPORT ICompression
90 {
91 public:
92 /// Compression level.
93 ///
94 /// It is in range [0..9]. Increase of level might mean better compression
95 /// and usualy greater time of compression. Usualy 1 gives best speed,
96 /// 9 gives best compression, 0 gives no compression at all.
97 /// eDefault value requests a compromise between speed and compression
98 /// (according to developers of the corresponding compression algorithm).
99 enum ELevel {
100 eLevel_Default = -1, // default
101 eLevel_NoCompression = 0, // just store data
102 eLevel_Lowest = 1,
103 eLevel_VeryLow = 2,
104 eLevel_Low = 3,
105 eLevel_MediumLow = 4,
106 eLevel_Medium = 5,
107 eLevel_MediumHigh = 6,
108 eLevel_High = 7,
109 eLevel_VeryHigh = 8,
110 eLevel_Best = 9
111 };
112
113 /// Compression flags. The flag selection depends from compression
114 /// algorithm implementation. For examples see the flags defined
115 /// in the derived classes: CBZip2Compression::EFlags,
116 /// CLZOCompression::EFlags, CZipCompression::EFlags, etc.
117 typedef unsigned int TFlags; ///< Bitwise OR of CXxxCompression::EFlags
118
119 public:
120 /// Destructor
~ICompression(void)121 virtual ~ICompression(void) {}
122
123 /// Return name and version of the compression library.
124 virtual CVersionInfo GetVersion(void) const = 0;
125
126 // Get/set compression level.
127 virtual void SetLevel(ELevel level) = 0;
128 virtual ELevel GetLevel(void) const = 0;
129
130 /// Return the default compression level for current compression algorithm.
131 virtual ELevel GetDefaultLevel(void) const = 0;
132
133 // Get compressor's internal status/error code and description
134 // for the last operation.
135 virtual int GetErrorCode(void) const = 0;
136 virtual string GetErrorDescription(void) const = 0;
137
138 // Get/set flags
139 virtual TFlags GetFlags(void) const = 0;
140 virtual void SetFlags(TFlags flags) = 0;
141
142 //
143 // Utility functions
144 //
145
146 // (De)compress the source buffer into the destination buffer.
147 // Return TRUE on success, FALSE on error.
148 // The compressor error code can be acquired via GetErrorCode() call.
149 // Notice that altogether the total size of the destination buffer must
150 // be little more then size of the source buffer.
151 virtual bool CompressBuffer(
152 const void* src_buf, size_t src_len,
153 void* dst_buf, size_t dst_size,
154 /* out */ size_t* dst_len
155 ) = 0;
156
157 virtual bool DecompressBuffer(
158 const void* src_buf, size_t src_len,
159 void* dst_buf, size_t dst_size,
160 /* out */ size_t* dst_len
161 ) = 0;
162
163 // (De)compress file "src_file" and put result to file "dst_file".
164 // Return TRUE on success, FALSE on error.
165 virtual bool CompressFile(
166 const string& src_file,
167 const string& dst_file,
168 size_t buf_size = kCompressionDefaultBufSize
169 ) = 0;
170 virtual bool DecompressFile(
171 const string& src_file,
172 const string& dst_file,
173 size_t buf_size = kCompressionDefaultBufSize
174 ) = 0;
175 };
176
177
178 //////////////////////////////////////////////////////////////////////////////
179 //
180 // CCompression -- abstract base class
181 //
182
183 class NCBI_XUTIL_EXPORT CCompression : public ICompression
184 {
185 public:
186 // 'ctors
187 CCompression(ELevel level = eLevel_Default);
188 virtual ~CCompression(void);
189
190 /// Return name and version of the compression library.
191 virtual CVersionInfo GetVersion(void) const = 0;
192
193 // Get/set compression level.
194 // NOTE 1: Changing compression level after compression has begun will
195 // be ignored.
196 // NOTE 2: If the level is not supported by the underlying algorithm,
197 // then it will be translated to the nearest supported value.
198 virtual void SetLevel(ELevel level);
199 virtual ELevel GetLevel(void) const;
200
201 // Get compressor's internal status/error code and description
202 // for the last operation.
203 virtual int GetErrorCode(void) const;
204 virtual string GetErrorDescription(void) const;
205
206 /// Get flags.
207 virtual TFlags GetFlags(void) const;
208 /// Set flags.
209 virtual void SetFlags(TFlags flags);
210
211 protected:
212 // Universal file compression/decompression functions.
213 // Return TRUE on success, FALSE on error.
214 virtual bool x_CompressFile(
215 const string& src_file,
216 CCompressionFile& dst_file,
217 size_t buf_size = kCompressionDefaultBufSize
218 );
219 virtual bool x_DecompressFile(
220 CCompressionFile& src_file,
221 const string& dst_file,
222 size_t buf_size = kCompressionDefaultBufSize
223 );
224
225 // Set last action error/status code and description
226 void SetError(int status, const char* description = 0);
227 void SetError(int status, const string& description);
228
229 protected:
230 /// Decompression mode (see fAllowTransparentRead flag).
231 enum EDecompressMode {
232 eMode_Unknown, ///< Not known yet (decompress/transparent read)
233 eMode_Decompress, ///< Generic decompression
234 eMode_TransparentRead ///< Transparent read, the data is uncompressed
235 };
236 ///< Decompress mode (Decompress/TransparentRead/Unknown).
237 EDecompressMode m_DecompressMode;
238
239 private:
240 ELevel m_Level; // Compression level
241 int m_ErrorCode; // Last compressor action error/status
242 string m_ErrorMsg; // Last compressor action error message
243 TFlags m_Flags; // Bitwise OR of flags
244
245 // Friend classes
246 friend class CCompressionStreambuf;
247 };
248
249
250
251 //////////////////////////////////////////////////////////////////////////////
252 //
253 // CCompressionFile -- abstract base class
254 //
255
256 // Class for support work with compressed files.
257 // Assumed that file on hard disk is always compressed and data in memory
258 // is uncompressed.
259 //
260
261 class NCBI_XUTIL_EXPORT CCompressionFile
262 {
263 public:
264 /// Compression file handler
265 typedef void* TFile;
266
267 /// File open mode
268 enum EMode {
269 eMode_Read, ///< Reading from compressed file
270 eMode_Write ///< Writing compressed data to file
271 };
272
273 // 'ctors
274 CCompressionFile(void);
275 CCompressionFile(const string& path, EMode mode);
276 virtual ~CCompressionFile(void);
277
278 /// Opens a compressed file for reading or writing.
279 /// Return NULL if error has been occurred.
280 virtual bool Open(const string& path, EMode mode) = 0;
281
282 /// Read up to "len" uncompressed bytes from the compressed file "file"
283 /// into the buffer "buf". Return the number of bytes actually read
284 /// (0 for end of file, -1 for error)
285 virtual long Read(void* buf, size_t len) = 0;
286
287 /// Writes the given number of uncompressed bytes into the compressed file.
288 /// Return the number of bytes actually written or -1 for error.
289 /// Returned value can be less than "len", especially if it exceed
290 /// numeric_limits<long>::max(), you should repeat writing for remaining portion.
291 virtual long Write(const void* buf, size_t len) = 0;
292
293 /// Flushes all pending output if necessary, closes the compressed file.
294 /// Return TRUE on success, FALSE on error.
295 virtual bool Close(void) = 0;
296
297 protected:
298 TFile m_File; ///< File handler.
299 EMode m_Mode; ///< File open mode.
300 };
301
302
303
304 //////////////////////////////////////////////////////////////////////////////
305 //
306 // CCompressionProcessor -- abstract base class
307 //
308 // Contains a functions for service a compression/decompression session.
309 //
310
311 class NCBI_XUTIL_EXPORT CCompressionProcessor
312 {
313 public:
314 /// Type of the result of all basic functions
315 enum EStatus {
316 /// Everything is fine, no errors occurred
317 eStatus_Success,
318 /// Special case of eStatus_Success.
319 /// Logical end of (compressed) stream is detected, no errors occurred.
320 /// All subsequent inquiries about data processing should be ignored.
321 eStatus_EndOfData,
322 /// Error has occurred. The error code can be acquired by GetErrorCode().
323 eStatus_Error,
324 /// Output buffer overflow - not enough output space.
325 /// Buffer must be emptied and the last action repeated.
326 eStatus_Overflow,
327 /// Special value. Just need to repeat last action.
328 eStatus_Repeat,
329 /// Special value. Status is undefined.
330 eStatus_Unknown
331 };
332
333 // 'ctors
334 CCompressionProcessor(void);
335 virtual ~CCompressionProcessor(void);
336
337 /// Return compressor's busy flag. If returns value is true that
338 /// the current compression object already have being use in other
339 /// compression session.
340 bool IsBusy(void) const;
341
342 /// Return TRUE if fAllowEmptyData flag is set for this compression.
343 /// @note
344 /// Used by stream buffer, that don't have access to specific
345 /// compression implementation flags. So this method should be
346 /// implemented in each processor.
347 virtual bool AllowEmptyData() const = 0;
348
349 // Return number of processed/output bytes.
350 size_t GetProcessedSize(void);
351 size_t GetOutputSize(void);
352
353 protected:
354 /// Initialize the internal stream state for compression/decompression.
355 /// It does not perform any compression, this will be done by Process().
356 virtual EStatus Init(void) = 0;
357
358 /// Compress/decompress as much data as possible, and stops when the input
359 /// buffer becomes empty or the output buffer becomes full. It may
360 /// introduce some output latency (reading input without producing any
361 /// output).
362 virtual EStatus Process
363 (const char* in_buf, // [in] input buffer
364 size_t in_len, // [in] input data length
365 char* out_buf, // [in] output buffer
366 size_t out_size, // [in] output buffer size
367 size_t* in_avail, // [out] count unproc.bytes in input buffer
368 size_t* out_avail // [out] count bytes putted into out buffer
369 ) = 0;
370
371 /// Flush compressed/decompressed data from the output buffer.
372 /// Flushing may degrade compression for some compression algorithms
373 /// and so it should be used only when necessary.
374 virtual EStatus Flush
375 (char* out_buf, // [in] output buffer
376 size_t out_size, // [in] output buffer size
377 size_t* out_avail // [out] count bytes putted into out buffer
378 ) = 0;
379
380 /// Finish the compression/decompression process.
381 /// Process pending input, flush pending output.
382 /// This function slightly like to Flush(), but it must be called only
383 /// at the end of compression process just before End().
384 virtual EStatus Finish
385 (char* out_buf, // [in] output buffer
386 size_t out_size, // [in] output buffer size
387 size_t* out_avail // [out] count bytes putted into out buffer
388 ) = 0;
389
390 /// Free all dynamically allocated data structures.
391 /// This function discards any unprocessed input and does not flush
392 /// any pending output.
393 /// @param abandon
394 /// If this parameter is not zero that skip all error checks,
395 /// always return eStatus_Success. Use it if Process/Flush/Finish where
396 /// not called to perform any compression/decompression after Init().
397 virtual EStatus End(int abandon = 0) = 0;
398
399 protected:
400 /// Reset internal state
401 void Reset(void);
402
403 /// Set/unset compressor busy flag
404 void SetBusy(bool busy = true);
405
406 // Increase number of processed/output bytes.
407 void IncreaseProcessedSize(size_t n_bytes);
408 void IncreaseOutputSize(size_t n_bytes);
409
410 private:
411 size_t m_ProcessedSize; //< The number of processed bytes
412 size_t m_OutputSize; //< The number of output bytes
413 bool m_Busy; //< Is true if compressor is ready to begin next session
414 // Friend classes
415 friend class CCompressionStream;
416 friend class CCompressionStreambuf;
417 friend class CCompressionStreamProcessor;
418 };
419
420
421 /////////////////////////////////////////////////////////////////////////////
422 //
423 // CCompressionException
424 //
425 // Exceptions generated by CCompresson and derived classes
426 //
427
428 class NCBI_XUTIL_EXPORT CCompressionException : public CCoreException
429 {
430 public:
431 enum EErrCode {
432 eCompression, ///< Compression/decompression error
433 eCompressionFile ///< Compression/decompression file error
434 };
GetErrCodeString(void) const435 virtual const char* GetErrCodeString(void) const override
436 {
437 switch (GetErrCode()) {
438 case eCompression : return "eCompression";
439 case eCompressionFile : return "eCompressionFile";
440 default : return CException::GetErrCodeString();
441 }
442 }
443 NCBI_EXCEPTION_DEFAULT(CCompressionException,CCoreException);
444 };
445
446
447 /////////////////////////////////////////////////////////////////////////////
448 //
449 // CCompressionUtil
450 //
451 // Utility functions
452 //
453
454 class NCBI_XUTIL_EXPORT CCompressionUtil
455 {
456 public:
457 /// Store 4 bytes of value in the buffer.
458 static void StoreUI4(void* buf, unsigned long value);
459
460 /// Read 4 bytes from buffer.
461 static Uint4 GetUI4(const void* buf);
462
463 /// Store 2 bytes of value in the buffer.
464 static void StoreUI2(void* buf, unsigned long value);
465
466 /// Read 2 bytes from buffer.
467 static Uint2 GetUI2(const void* buf);
468 };
469
470
471 //////////////////////////////////////////////////////////////////////////////
472 //
473 // IChunkHandler -- abstract interface class
474 //
475
476 /// Interface class to scan data source for seekable data chunks.
477 ///
478 class NCBI_XUTIL_EXPORT IChunkHandler
479 {
480 public:
481 typedef Uint8 TPosition; ///< Type to store stream positions
482
483 /// Action types
484 enum EAction {
485 eAction_Continue, ///< Continue scanning to the next data chunk, if any.
486 eAction_Stop ///< Stop scanning.
487 };
488
489 /// Destructor.
~IChunkHandler(void)490 virtual ~IChunkHandler(void) {}
491
492 /// Callback method, to be implemented by the end user.
493 /// @param raw_pos
494 /// Position of the chunk in the "raw" (undecoded) stream.
495 /// @param data_pos
496 /// Position of the chunk in the decoded stream data.
497 /// @return
498 /// Return a command for the scanning algorithm to continue or stop scanning.
499 virtual EAction OnChunk(TPosition raw_pos, TPosition data_pos) = 0;
500 };
501
502
503 /* @} */
504
505
506 //===========================================================================
507 //
508 // Inline
509 //
510 //===========================================================================
511
512 inline
Reset(void)513 void CCompressionProcessor::Reset(void)
514 {
515 m_ProcessedSize = 0;
516 m_OutputSize = 0;
517 m_Busy = false;
518 }
519
520 inline
IsBusy(void) const521 bool CCompressionProcessor::IsBusy(void) const
522 {
523 return m_Busy;
524 }
525
526 inline
SetBusy(bool busy)527 void CCompressionProcessor::SetBusy(bool busy)
528 {
529 if ( busy && m_Busy ) {
530 NCBI_THROW(CCompressionException, eCompression,
531 "CCompression::SetBusy(): The compressor is busy now");
532 }
533 m_Busy = busy;
534 }
535
536 inline
IncreaseProcessedSize(size_t n_bytes)537 void CCompressionProcessor::IncreaseProcessedSize(size_t n_bytes)
538 {
539 m_ProcessedSize += n_bytes;
540 }
541
542 inline
IncreaseOutputSize(size_t n_bytes)543 void CCompressionProcessor::IncreaseOutputSize(size_t n_bytes)
544 {
545 m_OutputSize += n_bytes;
546 }
547
548 inline
GetProcessedSize(void)549 size_t CCompressionProcessor::GetProcessedSize(void)
550 {
551 return m_ProcessedSize;
552 }
553
554 inline
GetOutputSize(void)555 size_t CCompressionProcessor::GetOutputSize(void)
556 {
557 return m_OutputSize;
558 }
559
560
561 END_NCBI_SCOPE
562
563
564 #endif /* UTIL_COMPRESS__COMPRESS__HPP */
565