1 /*  $Id: test_compress.cpp 622833 2021-01-04 18:21:21Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Vladimir Ivanov
27  *
28  * File Description:  Test program for the Compression API
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbiapp.hpp>
34 #include <corelib/ncbiargs.hpp>
35 #include <corelib/ncbi_limits.hpp>
36 #include <corelib/ncbifile.hpp>
37 #include <util/compress/stream_util.hpp>
38 
39 #include <common/test_assert.h>  // This header must go last
40 
41 
42 USING_NCBI_SCOPE;
43 
44 
45 #define KB * NCBI_CONST_UINT8(1024)
46 #define MB * NCBI_CONST_UINT8(1024) * 1024
47 #define GB * NCBI_CONST_UINT8(1024) * 1024 * 1024
48 
49 
50 // -- regular tests
51 
52 /// Length of data buffers for tests (>5 for overflow test)
53 const size_t  kRegTests[] = { 20, 16 KB, 41 KB, 101 KB };
54 
55 // Maximum source size (maximum value from kReqTests[])
56 const size_t kRegDataLen = 101 KB;
57 /// Output buffer length. ~20% more than kRegDataLen.
58 const size_t kRegBufLen = size_t(kRegDataLen * 1.2);
59 
60 
61 
62 //////////////////////////////////////////////////////////////////////////////
63 //
64 // Test application
65 //
66 
67 class CTest : public CNcbiApplication
68 {
69 public:
70     void Init(void);
71     int  Run(void);
72 
73 public:
74     // Test specified compression method
75     template<class TCompression,
76              class TCompressionFile,
77              class TStreamCompressor,
78              class TStreamDecompressor>
79     void TestMethod(const char* src_buf, size_t src_len, size_t buf_len);
80 
81     // Print out compress/decompress status
82     enum EPrintType {
83         eCompress,
84         eDecompress
85     };
86     void PrintResult(EPrintType type, int last_errcode,
87                      size_t src_len, size_t dst_len, size_t out_len);
88 
89     // Additional tests
90     void TestEmptyInputData(CCompressStream::EMethod);
91     void TestTransparentCopy(const char* src_buf, size_t src_len, size_t buf_len);
92 
93 private:
94     // Auxiliary methods
95     CNcbiIos* x_CreateIStream(const string& filename, const string& src, size_t buf_len);
96     void x_CreateFile(const string& filename, const char* buf, size_t len);
97 
98 private:
99     // Path to store working files,see -path command line argument;
100     // current directory by default.
101     string m_Dir;
102 
103     // Auxiliary members for "big data" tests support
104     bool   m_AllowIstrstream;   // allow using CNcbiIstrstream
105     bool   m_AllowOstrstream;   // allow using CNcbiOstrstream
106     bool   m_AllowStrstream;    // allow using CNcbiStrstream
107     string m_SrcFile;           // file with source data
108 };
109 
110 
111 #include "test_compress_util.inl"
112 
113 
114 
Init(void)115 void CTest::Init(void)
116 {
117     SetDiagPostLevel(eDiag_Error);
118     // To see all output, uncomment next line:
119     //SetDiagPostLevel(eDiag_Trace);
120 
121     // Create command-line argument descriptions
122     unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
123     arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
124                               "Test compression library");
125     arg_desc->AddDefaultPositional
126         ("lib", "Compression library to test", CArgDescriptions::eString, "all");
127     arg_desc->SetConstraint
128         ("lib", &(*new CArgAllow_Strings, "all", "z", "bz2", "lzo"));
129     arg_desc->AddDefaultKey
130         ("size", "SIZE",
131          "Test data size. If not specified, default set of tests will be used. "
132          "Size greater than 4GB can be applied to 'z' compression library tests only,",
133          CArgDescriptions::eString, kEmptyStr);
134     arg_desc->AddDefaultKey
135         ("dir", "PATH",
136          "Path to directory to store working files. Current directory by default.",
137          CArgDescriptions::eString, kEmptyStr);
138 
139     SetupArgDescriptions(arg_desc.release());
140 
141     m_AllowIstrstream = true;
142     m_AllowOstrstream = true;
143     m_AllowStrstream  = true;
144 }
145 
146 
Run(void)147 int CTest::Run(void)
148 {
149     // Get arguments
150     const CArgs& args = GetArgs();
151     string test = args["lib"].AsString();
152 
153     if (!args["dir"].AsString().empty()) {
154         m_Dir = args["dir"].AsString();
155         assert(CDir(m_Dir).Exists());
156     }
157     size_t custom_size = 0;
158     if (!args["size"].AsString().empty()) {
159         custom_size = (size_t)NStr::StringToUInt8_DataSize(args["size"].AsString());
160         //m_AllowIstrstream = (custom_size < (size_t)numeric_limits<std::streamsize>::max());
161         m_AllowIstrstream = (custom_size < (size_t)numeric_limits<int>::max());
162         m_AllowOstrstream = (custom_size < (size_t)numeric_limits<int>::max());
163         m_AllowStrstream  = m_AllowIstrstream && m_AllowOstrstream;
164     }
165     const size_t kCustomTests[] = { custom_size };
166 
167     // Define available tests
168 
169     bool bz2 = (test == "all" || test == "bz2");
170     bool z   = (test == "all" || test == "z");
171     bool lzo = (test == "all" || test == "lzo");
172 #if !defined(HAVE_LIBLZO)
173     if (lzo) {
174         ERR_POST(Warning << "LZO is not available on this platform, ignored.");
175         lzo = false;
176     }
177 #endif
178 
179     // Set a random starting point
180     unsigned int seed = (unsigned int)time(0);
181     ERR_POST(Info << "Random seed = " << seed);
182     srand(seed);
183 
184     // For custom size we add extra ~20% to the buffer size,
185     // some tests like LZO need it, for others it is not necessary,
186     // usually custom size is large enough to fit all data even due
187     // a bad compression ratio.
188     const size_t kDataLen   = custom_size ? custom_size : kRegDataLen;
189     const size_t kBufLen    = custom_size ? size_t((double)custom_size * 1.2) : kRegBufLen;
190     const size_t kTestCount = custom_size ? 1 : sizeof(kRegTests)/sizeof(kRegTests[0]);
191     const auto&  kTests     = custom_size ? kCustomTests : kRegTests;
192 
193     // Preparing data for compression
194     ERR_POST(Trace << "Creating test data...");
195     AutoArray<char> src_buf_arr(kBufLen + 1 /* for possible '\0' */);
196     char* src_buf = src_buf_arr.get();
197     assert(src_buf);
198 #if 1
199     for (size_t i = 0; i < kDataLen; i += 2) {
200         // Use a set of 25 chars [A-Z]
201         // NOTE: manipulator tests don't allow '\0'.
202         src_buf[i]   = (char)(65+(double)rand()/RAND_MAX*(90-65));
203         // Make data more predictable for better compression,
204         // especially for LZO, that is bad on a random data.
205         src_buf[i+1] = (char)(src_buf[i] + 1);
206     }
207 #else
208     for (size_t i = 0; i < kDataLen; i++) {
209         // Use a set of 25 chars [A-Z]
210         // NOTE: manipulator tests don't allow '\0'.
211         src_buf[i] = (char)(65+(double)rand()/RAND_MAX*(90-65));
212     }
213 #endif
214     // Modify first bytes to fixed value, this can prevent decoders
215     // to treat random text data as compressed data.
216     assert(kBufLen > 5);
217     memcpy(src_buf, "12345", 5);
218 
219     // If strstream(s) cannot work with big data than create a copy of the source data on disk,
220     if (custom_size  &&  !(m_AllowIstrstream && m_AllowOstrstream)) {
221         ERR_POST(Trace << "Creating source data file...");
222         m_SrcFile = CFile::ConcatPath(m_Dir, "test_compress.src.file");
223         CFileDeleteAtExit::Add(m_SrcFile);
224         x_CreateFile(m_SrcFile, src_buf, kDataLen);
225     }
226 
227     // Test compressors with different size of data
228     for (size_t i = 0; i < kTestCount; i++) {
229 
230         // Some test require zero-terminated data (manipulators).
231         size_t len   = kTests[i];
232         char   saved = src_buf[len];
233         src_buf[len] = '\0';
234 
235         ERR_POST(Trace << "====================================");
236         ERR_POST(Trace << "Data size = " << len);
237 
238         if ( bz2 ) {
239             ERR_POST(Trace << "-------------- BZip2 ---------------");
240             TestMethod<CBZip2Compression,
241                        CBZip2CompressionFile,
242                        CBZip2StreamCompressor,
243                        CBZip2StreamDecompressor> (src_buf, len, kBufLen);
244         }
245 #if defined(HAVE_LIBLZO)
246         if ( lzo ) {
247             ERR_POST(Trace << "-------------- LZO -----------------");
248             TestMethod<CLZOCompression,
249                        CLZOCompressionFile,
250                        CLZOStreamCompressor,
251                        CLZOStreamDecompressor> (src_buf, len, kBufLen);
252         }
253 #endif
254         if ( z ) {
255             ERR_POST(Trace << "-------------- Zlib ----------------");
256             TestMethod<CZipCompression,
257                        CZipCompressionFile,
258                        CZipStreamCompressor,
259                        CZipStreamDecompressor> (src_buf, len, kBufLen);
260         }
261 
262         // Test for (de)compressor's transparent copy
263         TestTransparentCopy(src_buf, len, kBufLen);
264 
265         // Restore saved character
266         src_buf[len] = saved;
267     }
268 
269     // Run separate test for empty input data
270     if ( !custom_size ) {
271         ERR_POST(Trace << "====================================");
272         ERR_POST(Trace << "Data size = 0");
273         if (bz2) {
274             TestEmptyInputData(CCompressStream::eBZip2);
275         }
276         if (lzo) {
277             TestEmptyInputData(CCompressStream::eLZO);
278         }
279         if (z) {
280             TestEmptyInputData(CCompressStream::eZip);
281         }
282     }
283 
284     ERR_POST(Info << "TEST execution completed successfully!");
285     return 0;
286 }
287 
288 
289 
290 //////////////////////////////////////////////////////////////////////////////
291 //
292 // Test specified compression method
293 //
294 
295 // Print OK message.
296 #define OK          ERR_POST(Trace << "OK")
297 #define OK_MSG(msg) ERR_POST(Trace << msg << " - OK")
298 
299 // Initialize destination buffers.
300 #define INIT_BUFFERS  memset(dst_buf, 0, buf_len); memset(cmp_buf, 0, buf_len)
301 
302 
303 template<class TCompression,
304          class TCompressionFile,
305          class TStreamCompressor,
306          class TStreamDecompressor>
TestMethod(const char * src_buf,size_t src_len,size_t buf_len)307 void CTest::TestMethod(const char* src_buf, size_t src_len, size_t buf_len)
308 {
309     const string kFileName_str = CFile::ConcatPath(m_Dir, "test_compress.compressed.file");
310     const char*  kFileName = kFileName_str.c_str();
311 
312 #if defined(HAVE_LIBLZO)
313     // Initialize LZO compression
314     assert(CLZOCompression::Initialize());
315 #endif
316 #   include "test_compress_run.inl"
317 }
318 
319 
PrintResult(EPrintType type,int last_errcode,size_t src_len,size_t dst_len,size_t out_len)320 void CTest::PrintResult(EPrintType type, int last_errcode,
321                        size_t src_len, size_t dst_len, size_t out_len)
322 {
323     ERR_POST(Trace
324         << string((type == eCompress) ? "Compress   " : "Decompress ")
325         << "errcode = "
326         << ((last_errcode == kUnknownErr) ? "?" : NStr::IntToString(last_errcode)) << ", "
327         << ((src_len == kUnknown) ?         "?" : NStr::SizetToString(src_len)) << " -> "
328         << ((out_len == kUnknown) ?         "?" : NStr::SizetToString(out_len)) << ", limit "
329         << ((dst_len == kUnknown) ?         "?" : NStr::SizetToString(dst_len))
330     );
331 }
332 
333 
334 
335 //////////////////////////////////////////////////////////////////////////////
336 //
337 // Tests for empty input data
338 //   - stream tests (CXX-1828, CXX-3365)
339 //   - fAllowEmptyData flag test (CXX-3365)
340 //
341 
342 struct SEmptyInputDataTest
343 {
344     CCompressStream::EMethod method;
345     unsigned int flags;
346     // Result of CompressBuffer()/DecompressBuffer() methods for specified
347     // set of flags. Stream's Finalize() also should set badbit if FALSE.
348     bool result;
349     // An expected output size for compression with specified set of 'flags'.
350     // Usually this is a sum of sizes for header and footer, if selected
351     // format have it. Decompression output size should be always 0.
352     unsigned int buffer_output_size;
353     unsigned int stream_output_size;
354 };
355 
356 static const SEmptyInputDataTest s_EmptyInputDataTests[] =
357 {
358     { CCompressStream::eZip, CZipCompression::fAllowEmptyData | CZipCompression::fGZip, true, 20, 20 },
359 
360     { CCompressStream::eBZip2, 0 /* default flags */,              false,  0,  0 },
361     { CCompressStream::eBZip2, CBZip2Compression::fAllowEmptyData, true,  14, 14 },
362 #if defined(HAVE_LIBLZO)
363     // LZO's CompressBuffer() method do not use fStreamFormat that add header
364     //  and footer to the output, streams always use it.
365     { CCompressStream::eLZO,   0 /* default flags */,              false,  0,  0 },
366     { CCompressStream::eLZO,   CLZOCompression::fAllowEmptyData,   true,   0, 15 },
367     { CCompressStream::eLZO,   CLZOCompression::fAllowEmptyData |
368                                CLZOCompression::fStreamFormat,     true,  15, 15 },
369 #endif
370     { CCompressStream::eZip,   0 /* default flags */,              false,  0,  0 },
371     { CCompressStream::eZip,   CZipCompression::fGZip,             false,  0,  0 },
372     { CCompressStream::eZip,   CZipCompression::fAllowEmptyData,   true,   8,  8 },
373     { CCompressStream::eZip,   CZipCompression::fAllowEmptyData |
374                                CZipCompression::fGZip,             true,  20, 20 }
375 };
376 
TestEmptyInputData(CCompressStream::EMethod method)377 void CTest::TestEmptyInputData(CCompressStream::EMethod method)
378 {
379     const size_t kLen = 1024;
380     char   src_buf[kLen];
381     char   dst_buf[kLen];
382     char   cmp_buf[kLen];
383     size_t n;
384 
385     assert(
386         static_cast<int>(CZipCompression::fAllowEmptyData) ==
387         static_cast<int>(CBZip2Compression::fAllowEmptyData)
388     );
389 #ifdef HAVE_LIBLZO
390     assert(
391         static_cast<int>(CZipCompression::fAllowEmptyData) ==
392         static_cast<int>(CLZOCompression::fAllowEmptyData)
393     );
394 #endif
395 
396     const size_t count = ArraySize(s_EmptyInputDataTests);
397 
398     for (size_t i = 0;  i < count;  ++i)
399     {
400         SEmptyInputDataTest test = s_EmptyInputDataTests[i];
401         if (test.method != method) {
402             continue;
403         }
404         ERR_POST(Trace << "Test # " << i+1);
405 
406         bool allow_empty = (test.flags & CZipCompression::fAllowEmptyData) > 0;
407 
408         CNcbiIstrstream is_str("");
409         unique_ptr<CCompression>                compression;
410         unique_ptr<CCompressionStreamProcessor> stream_compressor;
411         unique_ptr<CCompressionStreamProcessor> stream_decompressor;
412 
413         if (method == CCompressStream::eBZip2) {
414             compression.reset(new CBZip2Compression());
415             compression->SetFlags(test.flags);
416             stream_compressor.reset(new CBZip2StreamCompressor(test.flags));
417             stream_decompressor.reset(new CBZip2StreamDecompressor(test.flags));
418         } else
419 #if defined(HAVE_LIBLZO)
420         if (method == CCompressStream::eLZO) {
421             compression.reset(new CLZOCompression());
422             compression->SetFlags(test.flags);
423             stream_compressor.reset(new CLZOStreamCompressor(test.flags));
424             stream_decompressor.reset(new CLZOStreamDecompressor(test.flags));
425         } else
426 #endif
427         if (method == CCompressStream::eZip) {
428             compression.reset(new CZipCompression());
429             compression->SetFlags(test.flags);
430             stream_compressor.reset(new CZipStreamCompressor(test.flags));
431             stream_decompressor.reset(new CZipStreamDecompressor(test.flags));
432         } else
433         {
434             _TROUBLE;
435         }
436 
437         // ---- Run tests ----
438 
439         // Buffer compression/decompression test
440         {{
441             bool res = compression->CompressBuffer(src_buf, 0, dst_buf, kLen, &n);
442             assert(res == test.result);
443             assert(n == test.buffer_output_size);
444             res = compression->DecompressBuffer(dst_buf, n, cmp_buf, kLen, &n);
445             assert(res == test.result);
446             assert(n == 0);
447         }}
448 
449         // Input stream
450         {{
451             // Compression
452             {{
453                 CCompressionIStream ics(is_str, stream_compressor.get());
454                 assert(ics.good());
455                 ics.read(dst_buf, kLen);
456                 if (allow_empty) {
457                     assert(ics.eof() && ics.fail()); // short read
458                 } else {
459                     assert(ics.bad()); //error
460                 }
461                 n = (size_t)ics.gcount();
462                 assert(n == test.stream_output_size);
463                 assert(ics.GetProcessedSize() == 0);
464                 assert(ics.GetOutputSize() == n);
465             }}
466             // Decompression
467             {{
468                 CCompressionIStream ids(is_str, stream_decompressor.get());
469                 assert(ids.good());
470                 ids.read(dst_buf, kLen);
471                 if (allow_empty) {
472                     assert(ids.eof() && ids.fail()); // short read
473                 } else {
474                     assert(ids.bad()); // error
475                 }
476                 n = (size_t)ids.gcount();
477                 assert(n == 0);
478                 assert(ids.GetProcessedSize() == 0);
479                 assert(ids.GetOutputSize() == n);
480             }}
481         }}
482 
483         // Output stream
484         {{
485             // Compression
486             {{
487                 CNcbiOstrstream os_str;
488                 CCompressionOStream ocs(os_str, stream_compressor.get());
489                 assert(ocs.good());
490                 ocs.Finalize();
491                 assert(test.result ? ocs.good() : ocs.bad());
492                 n = (size_t)GetOssSize(os_str);
493                 assert(n == test.stream_output_size);
494                 assert(ocs.GetProcessedSize() == 0);
495                 assert(ocs.GetOutputSize() == n);
496             }}
497             // Decompression
498             {{
499                 CNcbiOstrstream os_str;
500                 CCompressionOStream ods(os_str, stream_decompressor.get());
501                 assert(ods.good());
502                 ods.Finalize();
503                 assert(test.result ? ods.good() : ods.bad());
504                 n = (size_t)GetOssSize(os_str);
505                 assert(n == 0);
506                 assert(ods.GetProcessedSize() == 0);
507                 assert(ods.GetOutputSize() == n);
508             }}
509         }}
510 
511         // Output stream tests -- just with flush()
512         {{
513             // Compression
514             {{
515                 CNcbiOstrstream os_str;
516                 CCompressionOStream ocs(os_str, stream_compressor.get());
517                 assert(ocs.good());
518                 ocs.flush();
519                 assert(ocs.good());
520                 ocs.Finalize();
521                 assert(test.result ? ocs.good() : ocs.bad());
522                 n = (size_t)GetOssSize(os_str);
523                 assert(n == test.stream_output_size);
524                 assert(ocs.GetProcessedSize() == 0);
525                 assert(ocs.GetOutputSize() == n);
526             }}
527             // Decompression
528             {{
529                 CNcbiOstrstream os_str;
530                 CCompressionOStream ods(os_str, stream_decompressor.get());
531                 assert(ods.good());
532                 ods.flush();
533                 assert(ods.good());
534                 ods.Finalize();
535                 assert(test.result ? ods.good() : !ods.good());
536                 n = (size_t)GetOssSize(os_str);
537                 assert(n == 0);
538                 assert(ods.GetProcessedSize() == 0);
539                 assert(ods.GetOutputSize() == n);
540             }}
541         }}
542     }
543 }
544 
545 
546 //////////////////////////////////////////////////////////////////////////////
547 //
548 // Tests for transparent stream encoder (CXX-4148)
549 //
550 
TestTransparentCopy(const char * src_buf,size_t src_len,size_t buf_len)551 void CTest::TestTransparentCopy(const char* src_buf, size_t src_len, size_t buf_len)
552 {
553     AutoArray<char> dst_buf_arr(buf_len);
554     char* dst_buf = dst_buf_arr.get();
555     assert(dst_buf);
556     size_t n;
557     unique_ptr<CNcbiIos> stm;
558 
559     const string kFileName_str = CFile::ConcatPath(m_Dir, "test_compress.dst.file");
560     const char*  kFileName = kFileName_str.c_str();
561 
562     // Input stream test
563     {{
564         memset(dst_buf, 0, buf_len);
565         string src(src_buf, src_len);
566         // Create input stream
567         if ( m_AllowIstrstream ) {
568             stm.reset(new CNcbiIstrstream(src));
569         } else {
570             stm.reset(new CNcbiIfstream(_T_XCSTRING(m_SrcFile), ios::in | ios::binary));
571         }
572         assert(stm->good());
573 
574         // Transparent copy using input compression stream
575         CCompressionIStream is(*stm, new CTransparentStreamProcessor(),
576                                CCompressionStream::fOwnProcessor);
577         assert(is.good());
578         n = is.Read(dst_buf, src_len + 1 /* more than exists to get EOF */);
579         assert(is.eof());
580         assert(src_len == n);
581         assert(is.GetProcessedSize() == n);
582         assert(is.GetOutputSize() == n);
583 
584         // Compare data
585         assert(memcmp(src_buf, dst_buf, n) == 0);
586 
587         OK_MSG("input");
588     }}
589 
590     // Output stream test
591     {{
592         CNcbiOstrstream* os_str = nullptr; // need for CNcbiOstrstreamToString()
593 
594         // Create output stream
595         if ( m_AllowOstrstream ) {
596             os_str = new CNcbiOstrstream();
597             stm.reset(os_str);
598         } else {
599             stm.reset(new CNcbiOfstream(kFileName, ios::out | ios::binary));
600         }
601         assert(stm->good());
602 
603         // Transparent copy using output compression stream
604         CCompressionOStream os(*stm, new CTransparentStreamProcessor(),
605                                CCompressionStream::fOwnProcessor);
606         assert(os.good());
607         n = os.Write(src_buf, src_len);
608         assert(os.good());
609         assert(src_len == n);
610         os.Finalize();
611         assert(os.good());
612         assert(os.GetProcessedSize() == n);
613         assert(os.GetOutputSize() == n);
614 
615         // Compare data
616         if ( m_AllowOstrstream ) {
617             string str = CNcbiOstrstreamToString(*os_str);
618             n = str.size();
619             assert(n == src_len);
620             assert(memcmp(src_buf, str.data(), n) == 0);
621         } else {
622             CFile f(kFileName);
623             assert((size_t)f.GetLength() == src_len);
624             assert(f.Compare(m_SrcFile));
625         }
626         if ( !m_AllowOstrstream ) {
627             CFile(kFileName).Remove();
628         }
629         OK_MSG("output");
630     }}
631 }
632 
633 
634 
635 //////////////////////////////////////////////////////////////////////////////
636 //
637 // MAIN
638 //
639 
main(int argc,const char * argv[])640 int main(int argc, const char* argv[])
641 {
642     // Execute main application function
643     return CTest().AppMain(argc, argv);
644 }
645