1 /* $Id: test_compress.cpp 622833 2021-01-04 18:21:21Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Vladimir Ivanov
27 *
28 * File Description: Test program for the Compression API
29 *
30 */
31
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbiapp.hpp>
34 #include <corelib/ncbiargs.hpp>
35 #include <corelib/ncbi_limits.hpp>
36 #include <corelib/ncbifile.hpp>
37 #include <util/compress/stream_util.hpp>
38
39 #include <common/test_assert.h> // This header must go last
40
41
42 USING_NCBI_SCOPE;
43
44
45 #define KB * NCBI_CONST_UINT8(1024)
46 #define MB * NCBI_CONST_UINT8(1024) * 1024
47 #define GB * NCBI_CONST_UINT8(1024) * 1024 * 1024
48
49
50 // -- regular tests
51
52 /// Length of data buffers for tests (>5 for overflow test)
53 const size_t kRegTests[] = { 20, 16 KB, 41 KB, 101 KB };
54
55 // Maximum source size (maximum value from kReqTests[])
56 const size_t kRegDataLen = 101 KB;
57 /// Output buffer length. ~20% more than kRegDataLen.
58 const size_t kRegBufLen = size_t(kRegDataLen * 1.2);
59
60
61
62 //////////////////////////////////////////////////////////////////////////////
63 //
64 // Test application
65 //
66
67 class CTest : public CNcbiApplication
68 {
69 public:
70 void Init(void);
71 int Run(void);
72
73 public:
74 // Test specified compression method
75 template<class TCompression,
76 class TCompressionFile,
77 class TStreamCompressor,
78 class TStreamDecompressor>
79 void TestMethod(const char* src_buf, size_t src_len, size_t buf_len);
80
81 // Print out compress/decompress status
82 enum EPrintType {
83 eCompress,
84 eDecompress
85 };
86 void PrintResult(EPrintType type, int last_errcode,
87 size_t src_len, size_t dst_len, size_t out_len);
88
89 // Additional tests
90 void TestEmptyInputData(CCompressStream::EMethod);
91 void TestTransparentCopy(const char* src_buf, size_t src_len, size_t buf_len);
92
93 private:
94 // Auxiliary methods
95 CNcbiIos* x_CreateIStream(const string& filename, const string& src, size_t buf_len);
96 void x_CreateFile(const string& filename, const char* buf, size_t len);
97
98 private:
99 // Path to store working files,see -path command line argument;
100 // current directory by default.
101 string m_Dir;
102
103 // Auxiliary members for "big data" tests support
104 bool m_AllowIstrstream; // allow using CNcbiIstrstream
105 bool m_AllowOstrstream; // allow using CNcbiOstrstream
106 bool m_AllowStrstream; // allow using CNcbiStrstream
107 string m_SrcFile; // file with source data
108 };
109
110
111 #include "test_compress_util.inl"
112
113
114
Init(void)115 void CTest::Init(void)
116 {
117 SetDiagPostLevel(eDiag_Error);
118 // To see all output, uncomment next line:
119 //SetDiagPostLevel(eDiag_Trace);
120
121 // Create command-line argument descriptions
122 unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
123 arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
124 "Test compression library");
125 arg_desc->AddDefaultPositional
126 ("lib", "Compression library to test", CArgDescriptions::eString, "all");
127 arg_desc->SetConstraint
128 ("lib", &(*new CArgAllow_Strings, "all", "z", "bz2", "lzo"));
129 arg_desc->AddDefaultKey
130 ("size", "SIZE",
131 "Test data size. If not specified, default set of tests will be used. "
132 "Size greater than 4GB can be applied to 'z' compression library tests only,",
133 CArgDescriptions::eString, kEmptyStr);
134 arg_desc->AddDefaultKey
135 ("dir", "PATH",
136 "Path to directory to store working files. Current directory by default.",
137 CArgDescriptions::eString, kEmptyStr);
138
139 SetupArgDescriptions(arg_desc.release());
140
141 m_AllowIstrstream = true;
142 m_AllowOstrstream = true;
143 m_AllowStrstream = true;
144 }
145
146
Run(void)147 int CTest::Run(void)
148 {
149 // Get arguments
150 const CArgs& args = GetArgs();
151 string test = args["lib"].AsString();
152
153 if (!args["dir"].AsString().empty()) {
154 m_Dir = args["dir"].AsString();
155 assert(CDir(m_Dir).Exists());
156 }
157 size_t custom_size = 0;
158 if (!args["size"].AsString().empty()) {
159 custom_size = (size_t)NStr::StringToUInt8_DataSize(args["size"].AsString());
160 //m_AllowIstrstream = (custom_size < (size_t)numeric_limits<std::streamsize>::max());
161 m_AllowIstrstream = (custom_size < (size_t)numeric_limits<int>::max());
162 m_AllowOstrstream = (custom_size < (size_t)numeric_limits<int>::max());
163 m_AllowStrstream = m_AllowIstrstream && m_AllowOstrstream;
164 }
165 const size_t kCustomTests[] = { custom_size };
166
167 // Define available tests
168
169 bool bz2 = (test == "all" || test == "bz2");
170 bool z = (test == "all" || test == "z");
171 bool lzo = (test == "all" || test == "lzo");
172 #if !defined(HAVE_LIBLZO)
173 if (lzo) {
174 ERR_POST(Warning << "LZO is not available on this platform, ignored.");
175 lzo = false;
176 }
177 #endif
178
179 // Set a random starting point
180 unsigned int seed = (unsigned int)time(0);
181 ERR_POST(Info << "Random seed = " << seed);
182 srand(seed);
183
184 // For custom size we add extra ~20% to the buffer size,
185 // some tests like LZO need it, for others it is not necessary,
186 // usually custom size is large enough to fit all data even due
187 // a bad compression ratio.
188 const size_t kDataLen = custom_size ? custom_size : kRegDataLen;
189 const size_t kBufLen = custom_size ? size_t((double)custom_size * 1.2) : kRegBufLen;
190 const size_t kTestCount = custom_size ? 1 : sizeof(kRegTests)/sizeof(kRegTests[0]);
191 const auto& kTests = custom_size ? kCustomTests : kRegTests;
192
193 // Preparing data for compression
194 ERR_POST(Trace << "Creating test data...");
195 AutoArray<char> src_buf_arr(kBufLen + 1 /* for possible '\0' */);
196 char* src_buf = src_buf_arr.get();
197 assert(src_buf);
198 #if 1
199 for (size_t i = 0; i < kDataLen; i += 2) {
200 // Use a set of 25 chars [A-Z]
201 // NOTE: manipulator tests don't allow '\0'.
202 src_buf[i] = (char)(65+(double)rand()/RAND_MAX*(90-65));
203 // Make data more predictable for better compression,
204 // especially for LZO, that is bad on a random data.
205 src_buf[i+1] = (char)(src_buf[i] + 1);
206 }
207 #else
208 for (size_t i = 0; i < kDataLen; i++) {
209 // Use a set of 25 chars [A-Z]
210 // NOTE: manipulator tests don't allow '\0'.
211 src_buf[i] = (char)(65+(double)rand()/RAND_MAX*(90-65));
212 }
213 #endif
214 // Modify first bytes to fixed value, this can prevent decoders
215 // to treat random text data as compressed data.
216 assert(kBufLen > 5);
217 memcpy(src_buf, "12345", 5);
218
219 // If strstream(s) cannot work with big data than create a copy of the source data on disk,
220 if (custom_size && !(m_AllowIstrstream && m_AllowOstrstream)) {
221 ERR_POST(Trace << "Creating source data file...");
222 m_SrcFile = CFile::ConcatPath(m_Dir, "test_compress.src.file");
223 CFileDeleteAtExit::Add(m_SrcFile);
224 x_CreateFile(m_SrcFile, src_buf, kDataLen);
225 }
226
227 // Test compressors with different size of data
228 for (size_t i = 0; i < kTestCount; i++) {
229
230 // Some test require zero-terminated data (manipulators).
231 size_t len = kTests[i];
232 char saved = src_buf[len];
233 src_buf[len] = '\0';
234
235 ERR_POST(Trace << "====================================");
236 ERR_POST(Trace << "Data size = " << len);
237
238 if ( bz2 ) {
239 ERR_POST(Trace << "-------------- BZip2 ---------------");
240 TestMethod<CBZip2Compression,
241 CBZip2CompressionFile,
242 CBZip2StreamCompressor,
243 CBZip2StreamDecompressor> (src_buf, len, kBufLen);
244 }
245 #if defined(HAVE_LIBLZO)
246 if ( lzo ) {
247 ERR_POST(Trace << "-------------- LZO -----------------");
248 TestMethod<CLZOCompression,
249 CLZOCompressionFile,
250 CLZOStreamCompressor,
251 CLZOStreamDecompressor> (src_buf, len, kBufLen);
252 }
253 #endif
254 if ( z ) {
255 ERR_POST(Trace << "-------------- Zlib ----------------");
256 TestMethod<CZipCompression,
257 CZipCompressionFile,
258 CZipStreamCompressor,
259 CZipStreamDecompressor> (src_buf, len, kBufLen);
260 }
261
262 // Test for (de)compressor's transparent copy
263 TestTransparentCopy(src_buf, len, kBufLen);
264
265 // Restore saved character
266 src_buf[len] = saved;
267 }
268
269 // Run separate test for empty input data
270 if ( !custom_size ) {
271 ERR_POST(Trace << "====================================");
272 ERR_POST(Trace << "Data size = 0");
273 if (bz2) {
274 TestEmptyInputData(CCompressStream::eBZip2);
275 }
276 if (lzo) {
277 TestEmptyInputData(CCompressStream::eLZO);
278 }
279 if (z) {
280 TestEmptyInputData(CCompressStream::eZip);
281 }
282 }
283
284 ERR_POST(Info << "TEST execution completed successfully!");
285 return 0;
286 }
287
288
289
290 //////////////////////////////////////////////////////////////////////////////
291 //
292 // Test specified compression method
293 //
294
295 // Print OK message.
296 #define OK ERR_POST(Trace << "OK")
297 #define OK_MSG(msg) ERR_POST(Trace << msg << " - OK")
298
299 // Initialize destination buffers.
300 #define INIT_BUFFERS memset(dst_buf, 0, buf_len); memset(cmp_buf, 0, buf_len)
301
302
303 template<class TCompression,
304 class TCompressionFile,
305 class TStreamCompressor,
306 class TStreamDecompressor>
TestMethod(const char * src_buf,size_t src_len,size_t buf_len)307 void CTest::TestMethod(const char* src_buf, size_t src_len, size_t buf_len)
308 {
309 const string kFileName_str = CFile::ConcatPath(m_Dir, "test_compress.compressed.file");
310 const char* kFileName = kFileName_str.c_str();
311
312 #if defined(HAVE_LIBLZO)
313 // Initialize LZO compression
314 assert(CLZOCompression::Initialize());
315 #endif
316 # include "test_compress_run.inl"
317 }
318
319
PrintResult(EPrintType type,int last_errcode,size_t src_len,size_t dst_len,size_t out_len)320 void CTest::PrintResult(EPrintType type, int last_errcode,
321 size_t src_len, size_t dst_len, size_t out_len)
322 {
323 ERR_POST(Trace
324 << string((type == eCompress) ? "Compress " : "Decompress ")
325 << "errcode = "
326 << ((last_errcode == kUnknownErr) ? "?" : NStr::IntToString(last_errcode)) << ", "
327 << ((src_len == kUnknown) ? "?" : NStr::SizetToString(src_len)) << " -> "
328 << ((out_len == kUnknown) ? "?" : NStr::SizetToString(out_len)) << ", limit "
329 << ((dst_len == kUnknown) ? "?" : NStr::SizetToString(dst_len))
330 );
331 }
332
333
334
335 //////////////////////////////////////////////////////////////////////////////
336 //
337 // Tests for empty input data
338 // - stream tests (CXX-1828, CXX-3365)
339 // - fAllowEmptyData flag test (CXX-3365)
340 //
341
342 struct SEmptyInputDataTest
343 {
344 CCompressStream::EMethod method;
345 unsigned int flags;
346 // Result of CompressBuffer()/DecompressBuffer() methods for specified
347 // set of flags. Stream's Finalize() also should set badbit if FALSE.
348 bool result;
349 // An expected output size for compression with specified set of 'flags'.
350 // Usually this is a sum of sizes for header and footer, if selected
351 // format have it. Decompression output size should be always 0.
352 unsigned int buffer_output_size;
353 unsigned int stream_output_size;
354 };
355
356 static const SEmptyInputDataTest s_EmptyInputDataTests[] =
357 {
358 { CCompressStream::eZip, CZipCompression::fAllowEmptyData | CZipCompression::fGZip, true, 20, 20 },
359
360 { CCompressStream::eBZip2, 0 /* default flags */, false, 0, 0 },
361 { CCompressStream::eBZip2, CBZip2Compression::fAllowEmptyData, true, 14, 14 },
362 #if defined(HAVE_LIBLZO)
363 // LZO's CompressBuffer() method do not use fStreamFormat that add header
364 // and footer to the output, streams always use it.
365 { CCompressStream::eLZO, 0 /* default flags */, false, 0, 0 },
366 { CCompressStream::eLZO, CLZOCompression::fAllowEmptyData, true, 0, 15 },
367 { CCompressStream::eLZO, CLZOCompression::fAllowEmptyData |
368 CLZOCompression::fStreamFormat, true, 15, 15 },
369 #endif
370 { CCompressStream::eZip, 0 /* default flags */, false, 0, 0 },
371 { CCompressStream::eZip, CZipCompression::fGZip, false, 0, 0 },
372 { CCompressStream::eZip, CZipCompression::fAllowEmptyData, true, 8, 8 },
373 { CCompressStream::eZip, CZipCompression::fAllowEmptyData |
374 CZipCompression::fGZip, true, 20, 20 }
375 };
376
TestEmptyInputData(CCompressStream::EMethod method)377 void CTest::TestEmptyInputData(CCompressStream::EMethod method)
378 {
379 const size_t kLen = 1024;
380 char src_buf[kLen];
381 char dst_buf[kLen];
382 char cmp_buf[kLen];
383 size_t n;
384
385 assert(
386 static_cast<int>(CZipCompression::fAllowEmptyData) ==
387 static_cast<int>(CBZip2Compression::fAllowEmptyData)
388 );
389 #ifdef HAVE_LIBLZO
390 assert(
391 static_cast<int>(CZipCompression::fAllowEmptyData) ==
392 static_cast<int>(CLZOCompression::fAllowEmptyData)
393 );
394 #endif
395
396 const size_t count = ArraySize(s_EmptyInputDataTests);
397
398 for (size_t i = 0; i < count; ++i)
399 {
400 SEmptyInputDataTest test = s_EmptyInputDataTests[i];
401 if (test.method != method) {
402 continue;
403 }
404 ERR_POST(Trace << "Test # " << i+1);
405
406 bool allow_empty = (test.flags & CZipCompression::fAllowEmptyData) > 0;
407
408 CNcbiIstrstream is_str("");
409 unique_ptr<CCompression> compression;
410 unique_ptr<CCompressionStreamProcessor> stream_compressor;
411 unique_ptr<CCompressionStreamProcessor> stream_decompressor;
412
413 if (method == CCompressStream::eBZip2) {
414 compression.reset(new CBZip2Compression());
415 compression->SetFlags(test.flags);
416 stream_compressor.reset(new CBZip2StreamCompressor(test.flags));
417 stream_decompressor.reset(new CBZip2StreamDecompressor(test.flags));
418 } else
419 #if defined(HAVE_LIBLZO)
420 if (method == CCompressStream::eLZO) {
421 compression.reset(new CLZOCompression());
422 compression->SetFlags(test.flags);
423 stream_compressor.reset(new CLZOStreamCompressor(test.flags));
424 stream_decompressor.reset(new CLZOStreamDecompressor(test.flags));
425 } else
426 #endif
427 if (method == CCompressStream::eZip) {
428 compression.reset(new CZipCompression());
429 compression->SetFlags(test.flags);
430 stream_compressor.reset(new CZipStreamCompressor(test.flags));
431 stream_decompressor.reset(new CZipStreamDecompressor(test.flags));
432 } else
433 {
434 _TROUBLE;
435 }
436
437 // ---- Run tests ----
438
439 // Buffer compression/decompression test
440 {{
441 bool res = compression->CompressBuffer(src_buf, 0, dst_buf, kLen, &n);
442 assert(res == test.result);
443 assert(n == test.buffer_output_size);
444 res = compression->DecompressBuffer(dst_buf, n, cmp_buf, kLen, &n);
445 assert(res == test.result);
446 assert(n == 0);
447 }}
448
449 // Input stream
450 {{
451 // Compression
452 {{
453 CCompressionIStream ics(is_str, stream_compressor.get());
454 assert(ics.good());
455 ics.read(dst_buf, kLen);
456 if (allow_empty) {
457 assert(ics.eof() && ics.fail()); // short read
458 } else {
459 assert(ics.bad()); //error
460 }
461 n = (size_t)ics.gcount();
462 assert(n == test.stream_output_size);
463 assert(ics.GetProcessedSize() == 0);
464 assert(ics.GetOutputSize() == n);
465 }}
466 // Decompression
467 {{
468 CCompressionIStream ids(is_str, stream_decompressor.get());
469 assert(ids.good());
470 ids.read(dst_buf, kLen);
471 if (allow_empty) {
472 assert(ids.eof() && ids.fail()); // short read
473 } else {
474 assert(ids.bad()); // error
475 }
476 n = (size_t)ids.gcount();
477 assert(n == 0);
478 assert(ids.GetProcessedSize() == 0);
479 assert(ids.GetOutputSize() == n);
480 }}
481 }}
482
483 // Output stream
484 {{
485 // Compression
486 {{
487 CNcbiOstrstream os_str;
488 CCompressionOStream ocs(os_str, stream_compressor.get());
489 assert(ocs.good());
490 ocs.Finalize();
491 assert(test.result ? ocs.good() : ocs.bad());
492 n = (size_t)GetOssSize(os_str);
493 assert(n == test.stream_output_size);
494 assert(ocs.GetProcessedSize() == 0);
495 assert(ocs.GetOutputSize() == n);
496 }}
497 // Decompression
498 {{
499 CNcbiOstrstream os_str;
500 CCompressionOStream ods(os_str, stream_decompressor.get());
501 assert(ods.good());
502 ods.Finalize();
503 assert(test.result ? ods.good() : ods.bad());
504 n = (size_t)GetOssSize(os_str);
505 assert(n == 0);
506 assert(ods.GetProcessedSize() == 0);
507 assert(ods.GetOutputSize() == n);
508 }}
509 }}
510
511 // Output stream tests -- just with flush()
512 {{
513 // Compression
514 {{
515 CNcbiOstrstream os_str;
516 CCompressionOStream ocs(os_str, stream_compressor.get());
517 assert(ocs.good());
518 ocs.flush();
519 assert(ocs.good());
520 ocs.Finalize();
521 assert(test.result ? ocs.good() : ocs.bad());
522 n = (size_t)GetOssSize(os_str);
523 assert(n == test.stream_output_size);
524 assert(ocs.GetProcessedSize() == 0);
525 assert(ocs.GetOutputSize() == n);
526 }}
527 // Decompression
528 {{
529 CNcbiOstrstream os_str;
530 CCompressionOStream ods(os_str, stream_decompressor.get());
531 assert(ods.good());
532 ods.flush();
533 assert(ods.good());
534 ods.Finalize();
535 assert(test.result ? ods.good() : !ods.good());
536 n = (size_t)GetOssSize(os_str);
537 assert(n == 0);
538 assert(ods.GetProcessedSize() == 0);
539 assert(ods.GetOutputSize() == n);
540 }}
541 }}
542 }
543 }
544
545
546 //////////////////////////////////////////////////////////////////////////////
547 //
548 // Tests for transparent stream encoder (CXX-4148)
549 //
550
TestTransparentCopy(const char * src_buf,size_t src_len,size_t buf_len)551 void CTest::TestTransparentCopy(const char* src_buf, size_t src_len, size_t buf_len)
552 {
553 AutoArray<char> dst_buf_arr(buf_len);
554 char* dst_buf = dst_buf_arr.get();
555 assert(dst_buf);
556 size_t n;
557 unique_ptr<CNcbiIos> stm;
558
559 const string kFileName_str = CFile::ConcatPath(m_Dir, "test_compress.dst.file");
560 const char* kFileName = kFileName_str.c_str();
561
562 // Input stream test
563 {{
564 memset(dst_buf, 0, buf_len);
565 string src(src_buf, src_len);
566 // Create input stream
567 if ( m_AllowIstrstream ) {
568 stm.reset(new CNcbiIstrstream(src));
569 } else {
570 stm.reset(new CNcbiIfstream(_T_XCSTRING(m_SrcFile), ios::in | ios::binary));
571 }
572 assert(stm->good());
573
574 // Transparent copy using input compression stream
575 CCompressionIStream is(*stm, new CTransparentStreamProcessor(),
576 CCompressionStream::fOwnProcessor);
577 assert(is.good());
578 n = is.Read(dst_buf, src_len + 1 /* more than exists to get EOF */);
579 assert(is.eof());
580 assert(src_len == n);
581 assert(is.GetProcessedSize() == n);
582 assert(is.GetOutputSize() == n);
583
584 // Compare data
585 assert(memcmp(src_buf, dst_buf, n) == 0);
586
587 OK_MSG("input");
588 }}
589
590 // Output stream test
591 {{
592 CNcbiOstrstream* os_str = nullptr; // need for CNcbiOstrstreamToString()
593
594 // Create output stream
595 if ( m_AllowOstrstream ) {
596 os_str = new CNcbiOstrstream();
597 stm.reset(os_str);
598 } else {
599 stm.reset(new CNcbiOfstream(kFileName, ios::out | ios::binary));
600 }
601 assert(stm->good());
602
603 // Transparent copy using output compression stream
604 CCompressionOStream os(*stm, new CTransparentStreamProcessor(),
605 CCompressionStream::fOwnProcessor);
606 assert(os.good());
607 n = os.Write(src_buf, src_len);
608 assert(os.good());
609 assert(src_len == n);
610 os.Finalize();
611 assert(os.good());
612 assert(os.GetProcessedSize() == n);
613 assert(os.GetOutputSize() == n);
614
615 // Compare data
616 if ( m_AllowOstrstream ) {
617 string str = CNcbiOstrstreamToString(*os_str);
618 n = str.size();
619 assert(n == src_len);
620 assert(memcmp(src_buf, str.data(), n) == 0);
621 } else {
622 CFile f(kFileName);
623 assert((size_t)f.GetLength() == src_len);
624 assert(f.Compare(m_SrcFile));
625 }
626 if ( !m_AllowOstrstream ) {
627 CFile(kFileName).Remove();
628 }
629 OK_MSG("output");
630 }}
631 }
632
633
634
635 //////////////////////////////////////////////////////////////////////////////
636 //
637 // MAIN
638 //
639
main(int argc,const char * argv[])640 int main(int argc, const char* argv[])
641 {
642 // Execute main application function
643 return CTest().AppMain(argc, argv);
644 }
645