1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11
12 /* *************************************
13 * Compiler Options
14 ***************************************/
15 #ifdef _MSC_VER /* Visual */
16 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
17 # pragma warning(disable : 4204) /* non-constant aggregate initializer */
18 #endif
19 #if defined(__MINGW32__) && !defined(_POSIX_SOURCE)
20 # define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */
21 #endif
22
23 /*-*************************************
24 * Includes
25 ***************************************/
26 #include "platform.h" /* Large Files support, SET_BINARY_MODE */
27 #include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */
28 #include <stdio.h> /* fprintf, fopen, fread, _fileno, stdin, stdout */
29 #include <stdlib.h> /* malloc, free */
30 #include <string.h> /* strcmp, strlen */
31 #include <assert.h>
32 #include <errno.h> /* errno */
33 #include <limits.h> /* INT_MAX */
34 #include <signal.h>
35 #include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */
36
37 #if defined (_MSC_VER)
38 # include <sys/stat.h>
39 # include <io.h>
40 #endif
41
42 #include "mem.h" /* U32, U64 */
43 #include "fileio.h"
44
45 #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
46 #include "zstd.h"
47 #include "zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */
48
49 #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
50 # include <zlib.h>
51 # if !defined(z_const)
52 # define z_const
53 # endif
54 #endif
55
56 #if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
57 # include <lzma.h>
58 #endif
59
60 #define LZ4_MAGICNUMBER 0x184D2204
61 #if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
62 # define LZ4F_ENABLE_OBSOLETE_ENUMS
63 # include <lz4frame.h>
64 # include <lz4.h>
65 #endif
66
67
68 /*-*************************************
69 * Constants
70 ***************************************/
71 #define KB *(1<<10)
72 #define MB *(1<<20)
73 #define GB *(1U<<30)
74
75 #define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */
76 #define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */
77
78 #define FNSPACE 30
79
80
81 /*-*************************************
82 * Macros
83 ***************************************/
84
85 struct FIO_display_prefs_s {
86 int displayLevel; /* 0 : no display; 1: errors; 2: + result + interaction + warnings; 3: + progression; 4: + information */
87 U32 noProgress;
88 };
89
90 static FIO_display_prefs_t g_display_prefs = {2, 0};
91
92 #define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
93 #define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__)
94 #define DISPLAYLEVEL(l, ...) { if (g_display_prefs.displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
95
96 static const U64 g_refreshRate = SEC_TO_MICRO / 6;
97 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
98
99 #define READY_FOR_UPDATE() (!g_display_prefs.noProgress && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate)
100 #define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); }
101 #define DISPLAYUPDATE(l, ...) { \
102 if (g_display_prefs.displayLevel>=l && !g_display_prefs.noProgress) { \
103 if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \
104 DELAY_NEXT_UPDATE(); \
105 DISPLAY(__VA_ARGS__); \
106 if (g_display_prefs.displayLevel>=4) fflush(stderr); \
107 } } }
108
109 #undef MIN /* in case it would be already defined */
110 #define MIN(a,b) ((a) < (b) ? (a) : (b))
111
112
113 #define EXM_THROW(error, ...) \
114 { \
115 DISPLAYLEVEL(1, "zstd: "); \
116 DISPLAYLEVEL(5, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \
117 DISPLAYLEVEL(1, "error %i : ", error); \
118 DISPLAYLEVEL(1, __VA_ARGS__); \
119 DISPLAYLEVEL(1, " \n"); \
120 exit(error); \
121 }
122
123 #define CHECK_V(v, f) \
124 v = f; \
125 if (ZSTD_isError(v)) { \
126 DISPLAYLEVEL(5, "%s \n", #f); \
127 EXM_THROW(11, "%s", ZSTD_getErrorName(v)); \
128 }
129 #define CHECK(f) { size_t err; CHECK_V(err, f); }
130
131
132 /*-************************************
133 * Signal (Ctrl-C trapping)
134 **************************************/
135 static const char* g_artefact = NULL;
INThandler(int sig)136 static void INThandler(int sig)
137 {
138 assert(sig==SIGINT); (void)sig;
139 #if !defined(_MSC_VER)
140 signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */
141 #endif
142 if (g_artefact) {
143 assert(UTIL_isRegularFile(g_artefact));
144 remove(g_artefact);
145 }
146 DISPLAY("\n");
147 exit(2);
148 }
addHandler(char const * dstFileName)149 static void addHandler(char const* dstFileName)
150 {
151 if (UTIL_isRegularFile(dstFileName)) {
152 g_artefact = dstFileName;
153 signal(SIGINT, INThandler);
154 } else {
155 g_artefact = NULL;
156 }
157 }
158 /* Idempotent */
clearHandler(void)159 static void clearHandler(void)
160 {
161 if (g_artefact) signal(SIGINT, SIG_DFL);
162 g_artefact = NULL;
163 }
164
165
166 /*-*********************************************************
167 * Termination signal trapping (Print debug stack trace)
168 ***********************************************************/
169 #if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */
170 # if (__has_feature(address_sanitizer))
171 # define BACKTRACE_ENABLE 0
172 # endif /* __has_feature(address_sanitizer) */
173 #elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */
174 # define BACKTRACE_ENABLE 0
175 #endif
176
177 #if !defined(BACKTRACE_ENABLE)
178 /* automatic detector : backtrace enabled by default on linux+glibc and osx */
179 # if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \
180 || (defined(__APPLE__) && defined(__MACH__))
181 # define BACKTRACE_ENABLE 1
182 # else
183 # define BACKTRACE_ENABLE 0
184 # endif
185 #endif
186
187 /* note : after this point, BACKTRACE_ENABLE is necessarily defined */
188
189
190 #if BACKTRACE_ENABLE
191
192 #include <execinfo.h> /* backtrace, backtrace_symbols */
193
194 #define MAX_STACK_FRAMES 50
195
ABRThandler(int sig)196 static void ABRThandler(int sig) {
197 const char* name;
198 void* addrlist[MAX_STACK_FRAMES];
199 char** symbollist;
200 int addrlen, i;
201
202 switch (sig) {
203 case SIGABRT: name = "SIGABRT"; break;
204 case SIGFPE: name = "SIGFPE"; break;
205 case SIGILL: name = "SIGILL"; break;
206 case SIGINT: name = "SIGINT"; break;
207 case SIGSEGV: name = "SIGSEGV"; break;
208 default: name = "UNKNOWN";
209 }
210
211 DISPLAY("Caught %s signal, printing stack:\n", name);
212 /* Retrieve current stack addresses. */
213 addrlen = backtrace(addrlist, MAX_STACK_FRAMES);
214 if (addrlen == 0) {
215 DISPLAY("\n");
216 return;
217 }
218 /* Create readable strings to each frame. */
219 symbollist = backtrace_symbols(addrlist, addrlen);
220 /* Print the stack trace, excluding calls handling the signal. */
221 for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) {
222 DISPLAY("%s\n", symbollist[i]);
223 }
224 free(symbollist);
225 /* Reset and raise the signal so default handler runs. */
226 signal(sig, SIG_DFL);
227 raise(sig);
228 }
229 #endif
230
FIO_addAbortHandler()231 void FIO_addAbortHandler()
232 {
233 #if BACKTRACE_ENABLE
234 signal(SIGABRT, ABRThandler);
235 signal(SIGFPE, ABRThandler);
236 signal(SIGILL, ABRThandler);
237 signal(SIGSEGV, ABRThandler);
238 signal(SIGBUS, ABRThandler);
239 #endif
240 }
241
242
243 /*-************************************************************
244 * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
245 ***************************************************************/
246 #if defined(_MSC_VER) && _MSC_VER >= 1400
247 # define LONG_SEEK _fseeki64
248 # define LONG_TELL _ftelli64
249 #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
250 # define LONG_SEEK fseeko
251 # define LONG_TELL ftello
252 #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
253 # define LONG_SEEK fseeko64
254 # define LONG_TELL ftello64
255 #elif defined(_WIN32) && !defined(__DJGPP__)
256 # include <windows.h>
LONG_SEEK(FILE * file,__int64 offset,int origin)257 static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
258 LARGE_INTEGER off;
259 DWORD method;
260 off.QuadPart = offset;
261 if (origin == SEEK_END)
262 method = FILE_END;
263 else if (origin == SEEK_CUR)
264 method = FILE_CURRENT;
265 else
266 method = FILE_BEGIN;
267
268 if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
269 return 0;
270 else
271 return -1;
272 }
LONG_TELL(FILE * file)273 static __int64 LONG_TELL(FILE* file) {
274 LARGE_INTEGER off, newOff;
275 off.QuadPart = 0;
276 newOff.QuadPart = 0;
277 SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, &newOff, FILE_CURRENT);
278 return newOff.QuadPart;
279 }
280 #else
281 # define LONG_SEEK fseek
282 # define LONG_TELL ftell
283 #endif
284
285
286 /*-*************************************
287 * Parameters: FIO_prefs_t
288 ***************************************/
289
290 /* typedef'd to FIO_prefs_t within fileio.h */
291 struct FIO_prefs_s {
292
293 /* Algorithm preferences */
294 FIO_compressionType_t compressionType;
295 U32 sparseFileSupport; /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */
296 int dictIDFlag;
297 int checksumFlag;
298 int blockSize;
299 int overlapLog;
300 U32 adaptiveMode;
301 int rsyncable;
302 int minAdaptLevel;
303 int maxAdaptLevel;
304 int ldmFlag;
305 int ldmHashLog;
306 int ldmMinMatch;
307 int ldmBucketSizeLog;
308 int ldmHashRateLog;
309 size_t streamSrcSize;
310 size_t targetCBlockSize;
311 int srcSizeHint;
312 int testMode;
313 ZSTD_literalCompressionMode_e literalCompressionMode;
314
315 /* IO preferences */
316 U32 removeSrcFile;
317 U32 overwrite;
318
319 /* Computation resources preferences */
320 unsigned memLimit;
321 int nbWorkers;
322
323 int excludeCompressedFiles;
324 };
325
326
327 /*-*************************************
328 * Parameters: Initialization
329 ***************************************/
330
331 #define FIO_OVERLAP_LOG_NOTSET 9999
332 #define FIO_LDM_PARAM_NOTSET 9999
333
334
FIO_createPreferences(void)335 FIO_prefs_t* FIO_createPreferences(void)
336 {
337 FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t));
338 if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
339
340 ret->compressionType = FIO_zstdCompression;
341 ret->overwrite = 0;
342 ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
343 ret->dictIDFlag = 1;
344 ret->checksumFlag = 1;
345 ret->removeSrcFile = 0;
346 ret->memLimit = 0;
347 ret->nbWorkers = 1;
348 ret->blockSize = 0;
349 ret->overlapLog = FIO_OVERLAP_LOG_NOTSET;
350 ret->adaptiveMode = 0;
351 ret->rsyncable = 0;
352 ret->minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */
353 ret->maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */
354 ret->ldmFlag = 0;
355 ret->ldmHashLog = 0;
356 ret->ldmMinMatch = 0;
357 ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
358 ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
359 ret->streamSrcSize = 0;
360 ret->targetCBlockSize = 0;
361 ret->srcSizeHint = 0;
362 ret->testMode = 0;
363 ret->literalCompressionMode = ZSTD_lcm_auto;
364 ret->excludeCompressedFiles = 0;
365 return ret;
366 }
367
FIO_freePreferences(FIO_prefs_t * const prefs)368 void FIO_freePreferences(FIO_prefs_t* const prefs)
369 {
370 free(prefs);
371 }
372
373
374 /*-*************************************
375 * Parameters: Display Options
376 ***************************************/
377
FIO_setNotificationLevel(int level)378 void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; }
379
FIO_setNoProgress(unsigned noProgress)380 void FIO_setNoProgress(unsigned noProgress) { g_display_prefs.noProgress = noProgress; }
381
382
383 /*-*************************************
384 * Parameters: Setters
385 ***************************************/
386
FIO_setCompressionType(FIO_prefs_t * const prefs,FIO_compressionType_t compressionType)387 void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; }
388
FIO_overwriteMode(FIO_prefs_t * const prefs)389 void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; }
390
FIO_setSparseWrite(FIO_prefs_t * const prefs,unsigned sparse)391 void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse) { prefs->sparseFileSupport = sparse; }
392
FIO_setDictIDFlag(FIO_prefs_t * const prefs,int dictIDFlag)393 void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; }
394
FIO_setChecksumFlag(FIO_prefs_t * const prefs,int checksumFlag)395 void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; }
396
FIO_setRemoveSrcFile(FIO_prefs_t * const prefs,unsigned flag)397 void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag) { prefs->removeSrcFile = (flag>0); }
398
FIO_setMemLimit(FIO_prefs_t * const prefs,unsigned memLimit)399 void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; }
400
FIO_setNbWorkers(FIO_prefs_t * const prefs,int nbWorkers)401 void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) {
402 #ifndef ZSTD_MULTITHREAD
403 if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n");
404 #endif
405 prefs->nbWorkers = nbWorkers;
406 }
407
FIO_setExcludeCompressedFile(FIO_prefs_t * const prefs,int excludeCompressedFiles)408 void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
409
FIO_setBlockSize(FIO_prefs_t * const prefs,int blockSize)410 void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
411 if (blockSize && prefs->nbWorkers==0)
412 DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
413 prefs->blockSize = blockSize;
414 }
415
FIO_setOverlapLog(FIO_prefs_t * const prefs,int overlapLog)416 void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){
417 if (overlapLog && prefs->nbWorkers==0)
418 DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
419 prefs->overlapLog = overlapLog;
420 }
421
FIO_setAdaptiveMode(FIO_prefs_t * const prefs,unsigned adapt)422 void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt) {
423 if ((adapt>0) && (prefs->nbWorkers==0))
424 EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n");
425 prefs->adaptiveMode = adapt;
426 }
427
FIO_setRsyncable(FIO_prefs_t * const prefs,int rsyncable)428 void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
429 if ((rsyncable>0) && (prefs->nbWorkers==0))
430 EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n");
431 prefs->rsyncable = rsyncable;
432 }
433
FIO_setStreamSrcSize(FIO_prefs_t * const prefs,size_t streamSrcSize)434 void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
435 prefs->streamSrcSize = streamSrcSize;
436 }
437
FIO_setTargetCBlockSize(FIO_prefs_t * const prefs,size_t targetCBlockSize)438 void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
439 prefs->targetCBlockSize = targetCBlockSize;
440 }
441
FIO_setSrcSizeHint(FIO_prefs_t * const prefs,size_t srcSizeHint)442 void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
443 prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint);
444 }
445
FIO_setTestMode(FIO_prefs_t * const prefs,int testMode)446 void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) {
447 prefs->testMode = (testMode!=0);
448 }
449
FIO_setLiteralCompressionMode(FIO_prefs_t * const prefs,ZSTD_literalCompressionMode_e mode)450 void FIO_setLiteralCompressionMode(
451 FIO_prefs_t* const prefs,
452 ZSTD_literalCompressionMode_e mode) {
453 prefs->literalCompressionMode = mode;
454 }
455
FIO_setAdaptMin(FIO_prefs_t * const prefs,int minCLevel)456 void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel)
457 {
458 #ifndef ZSTD_NOCOMPRESS
459 assert(minCLevel >= ZSTD_minCLevel());
460 #endif
461 prefs->minAdaptLevel = minCLevel;
462 }
463
FIO_setAdaptMax(FIO_prefs_t * const prefs,int maxCLevel)464 void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel)
465 {
466 prefs->maxAdaptLevel = maxCLevel;
467 }
468
FIO_setLdmFlag(FIO_prefs_t * const prefs,unsigned ldmFlag)469 void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) {
470 prefs->ldmFlag = (ldmFlag>0);
471 }
472
FIO_setLdmHashLog(FIO_prefs_t * const prefs,int ldmHashLog)473 void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) {
474 prefs->ldmHashLog = ldmHashLog;
475 }
476
FIO_setLdmMinMatch(FIO_prefs_t * const prefs,int ldmMinMatch)477 void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) {
478 prefs->ldmMinMatch = ldmMinMatch;
479 }
480
FIO_setLdmBucketSizeLog(FIO_prefs_t * const prefs,int ldmBucketSizeLog)481 void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) {
482 prefs->ldmBucketSizeLog = ldmBucketSizeLog;
483 }
484
485
FIO_setLdmHashRateLog(FIO_prefs_t * const prefs,int ldmHashRateLog)486 void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) {
487 prefs->ldmHashRateLog = ldmHashRateLog;
488 }
489
490
491 /*-*************************************
492 * Functions
493 ***************************************/
494 /** FIO_remove() :
495 * @result : Unlink `fileName`, even if it's read-only */
FIO_remove(const char * path)496 static int FIO_remove(const char* path)
497 {
498 if (!UTIL_isRegularFile(path)) {
499 DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s \n", path);
500 return 0;
501 }
502 #if defined(_WIN32) || defined(WIN32)
503 /* windows doesn't allow remove read-only files,
504 * so try to make it writable first */
505 chmod(path, _S_IWRITE);
506 #endif
507 return remove(path);
508 }
509
510 /** FIO_openSrcFile() :
511 * condition : `srcFileName` must be non-NULL.
512 * @result : FILE* to `srcFileName`, or NULL if it fails */
FIO_openSrcFile(const char * srcFileName)513 static FILE* FIO_openSrcFile(const char* srcFileName)
514 {
515 assert(srcFileName != NULL);
516 if (!strcmp (srcFileName, stdinmark)) {
517 DISPLAYLEVEL(4,"Using stdin for input \n");
518 SET_BINARY_MODE(stdin);
519 return stdin;
520 }
521
522 if (!UTIL_fileExist(srcFileName)) {
523 DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n",
524 srcFileName, strerror(errno));
525 return NULL;
526 }
527
528 if (!UTIL_isRegularFile(srcFileName)
529 #ifndef _MSC_VER
530 && !UTIL_isFIFO(srcFileName)
531 #endif /* _MSC_VER */
532 ) {
533 DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
534 srcFileName);
535 return NULL;
536 }
537
538 { FILE* const f = fopen(srcFileName, "rb");
539 if (f == NULL)
540 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
541 return f;
542 }
543 }
544
545 /** FIO_openDstFile() :
546 * condition : `dstFileName` must be non-NULL.
547 * @result : FILE* to `dstFileName`, or NULL if it fails */
548 static FILE*
FIO_openDstFile(FIO_prefs_t * const prefs,const char * srcFileName,const char * dstFileName)549 FIO_openDstFile(FIO_prefs_t* const prefs,
550 const char* srcFileName, const char* dstFileName)
551 {
552 if (prefs->testMode) return NULL; /* do not open file in test mode */
553
554 assert(dstFileName != NULL);
555 if (!strcmp (dstFileName, stdoutmark)) {
556 DISPLAYLEVEL(4,"Using stdout for output \n");
557 SET_BINARY_MODE(stdout);
558 if (prefs->sparseFileSupport == 1) {
559 prefs->sparseFileSupport = 0;
560 DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
561 }
562 return stdout;
563 }
564
565 /* ensure dst is not the same as src */
566 if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) {
567 DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n");
568 return NULL;
569 }
570
571 if (prefs->sparseFileSupport == 1) {
572 prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
573 }
574
575 if (UTIL_isRegularFile(dstFileName)) {
576 /* Check if destination file already exists */
577 FILE* const fCheck = fopen( dstFileName, "rb" );
578 #if !defined(_WIN32)
579 /* this test does not work on Windows :
580 * `NUL` and `nul` are detected as regular files */
581 if (!strcmp(dstFileName, nulmark)) {
582 EXM_THROW(40, "%s is unexpectedly categorized as a regular file",
583 dstFileName);
584 }
585 #endif
586 if (fCheck != NULL) { /* dst file exists, authorization prompt */
587 fclose(fCheck);
588 if (!prefs->overwrite) {
589 if (g_display_prefs.displayLevel <= 1) {
590 /* No interaction possible */
591 DISPLAY("zstd: %s already exists; not overwritten \n",
592 dstFileName);
593 return NULL;
594 }
595 DISPLAY("zstd: %s already exists; overwrite (y/N) ? ",
596 dstFileName);
597 { int ch = getchar();
598 if ((ch!='Y') && (ch!='y')) {
599 DISPLAY(" not overwritten \n");
600 return NULL;
601 }
602 /* flush rest of input line */
603 while ((ch!=EOF) && (ch!='\n')) ch = getchar();
604 } }
605 /* need to unlink */
606 FIO_remove(dstFileName);
607 } }
608
609 { FILE* const f = fopen( dstFileName, "wb" );
610 if (f == NULL) {
611 DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
612 } else if(srcFileName != NULL && strcmp (srcFileName, stdinmark)) {
613 chmod(dstFileName, 00600);
614 }
615 return f;
616 }
617 }
618
619
620 /*! FIO_createDictBuffer() :
621 * creates a buffer, pointed by `*bufferPtr`,
622 * loads `filename` content into it, up to DICTSIZE_MAX bytes.
623 * @return : loaded size
624 * if fileName==NULL, returns 0 and a NULL pointer
625 */
FIO_createDictBuffer(void ** bufferPtr,const char * fileName)626 static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName)
627 {
628 FILE* fileHandle;
629 U64 fileSize;
630
631 assert(bufferPtr != NULL);
632 *bufferPtr = NULL;
633 if (fileName == NULL) return 0;
634
635 DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
636 fileHandle = fopen(fileName, "rb");
637 if (fileHandle==NULL) EXM_THROW(31, "%s: %s", fileName, strerror(errno));
638
639 fileSize = UTIL_getFileSize(fileName);
640 if (fileSize > DICTSIZE_MAX) {
641 EXM_THROW(32, "Dictionary file %s is too large (> %u MB)",
642 fileName, DICTSIZE_MAX >> 20); /* avoid extreme cases */
643 }
644 *bufferPtr = malloc((size_t)fileSize);
645 if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
646 { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
647 if (readSize != fileSize)
648 EXM_THROW(35, "Error reading dictionary file %s : %s",
649 fileName, strerror(errno));
650 }
651 fclose(fileHandle);
652 return (size_t)fileSize;
653 }
654
655
656
657 /* FIO_checkFilenameCollisions() :
658 * Checks for and warns if there are any files that would have the same output path
659 */
FIO_checkFilenameCollisions(const char ** filenameTable,unsigned nbFiles)660 int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) {
661 const char **filenameTableSorted, *c, *prevElem, *filename;
662 unsigned u;
663
664 #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
665 c = "\\";
666 #else
667 c = "/";
668 #endif
669
670 filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles);
671 if (!filenameTableSorted) {
672 DISPLAY("Unable to malloc new str array, not checking for name collisions\n");
673 return 1;
674 }
675
676 for (u = 0; u < nbFiles; ++u) {
677 filename = strrchr(filenameTable[u], c[0]);
678 if (filename == NULL) {
679 filenameTableSorted[u] = filenameTable[u];
680 } else {
681 filenameTableSorted[u] = filename+1;
682 }
683 }
684
685 qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr);
686 prevElem = filenameTableSorted[0];
687 for (u = 1; u < nbFiles; ++u) {
688 if (strcmp(prevElem, filenameTableSorted[u]) == 0) {
689 DISPLAY("WARNING: Two files have same filename: %s\n", prevElem);
690 }
691 prevElem = filenameTableSorted[u];
692 }
693
694 free((void*)filenameTableSorted);
695 return 0;
696 }
697
698 static const char*
extractFilename(const char * path,char separator)699 extractFilename(const char* path, char separator)
700 {
701 const char* search = strrchr(path, separator);
702 if (search == NULL) return path;
703 return search+1;
704 }
705
706 /* FIO_createFilename_fromOutDir() :
707 * Takes a source file name and specified output directory, and
708 * allocates memory for and returns a pointer to final path.
709 * This function never returns an error (it may abort() in case of pb)
710 */
711 static char*
FIO_createFilename_fromOutDir(const char * path,const char * outDirName,const size_t suffixLen)712 FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen)
713 {
714 const char* filenameStart;
715 char separator;
716 char* result;
717
718 #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
719 separator = '\\';
720 #else
721 separator = '/';
722 #endif
723
724 filenameStart = extractFilename(path, separator);
725 #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
726 filenameStart = extractFilename(filenameStart, '/'); /* sometimes, '/' separator is also used on Windows (mingw+msys2) */
727 #endif
728
729 result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1);
730 if (!result) {
731 EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno));
732 }
733
734 memcpy(result, outDirName, strlen(outDirName));
735 if (outDirName[strlen(outDirName)-1] == separator) {
736 memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart));
737 } else {
738 memcpy(result + strlen(outDirName), &separator, 1);
739 memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart));
740 }
741
742 return result;
743 }
744
745 #ifndef ZSTD_NOCOMPRESS
746
747 /* **********************************************************************
748 * Compression
749 ************************************************************************/
750 typedef struct {
751 FILE* srcFile;
752 FILE* dstFile;
753 void* srcBuffer;
754 size_t srcBufferSize;
755 void* dstBuffer;
756 size_t dstBufferSize;
757 const char* dictFileName;
758 ZSTD_CStream* cctx;
759 } cRess_t;
760
FIO_createCResources(FIO_prefs_t * const prefs,const char * dictFileName,int cLevel,ZSTD_compressionParameters comprParams)761 static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
762 const char* dictFileName, int cLevel,
763 ZSTD_compressionParameters comprParams) {
764 cRess_t ress;
765 memset(&ress, 0, sizeof(ress));
766
767 DISPLAYLEVEL(6, "FIO_createCResources \n");
768 ress.cctx = ZSTD_createCCtx();
769 if (ress.cctx == NULL)
770 EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx",
771 strerror(errno));
772 ress.srcBufferSize = ZSTD_CStreamInSize();
773 ress.srcBuffer = malloc(ress.srcBufferSize);
774 ress.dstBufferSize = ZSTD_CStreamOutSize();
775 ress.dstBuffer = malloc(ress.dstBufferSize);
776 if (!ress.srcBuffer || !ress.dstBuffer)
777 EXM_THROW(31, "allocation error : not enough memory");
778
779 /* Advanced parameters, including dictionary */
780 { void* dictBuffer;
781 size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName); /* works with dictFileName==NULL */
782 if (dictFileName && (dictBuffer==NULL))
783 EXM_THROW(32, "allocation error : can't create dictBuffer");
784 ress.dictFileName = dictFileName;
785
786 if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
787 comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
788
789 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 1) ); /* always enable content size when available (note: supposed to be default) */
790 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
791 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
792 /* compression level */
793 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
794 /* max compressed block size */
795 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
796 /* source size hint */
797 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
798 /* long distance matching */
799 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
800 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
801 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) );
802 if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
803 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) );
804 }
805 if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
806 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
807 }
808 /* compression parameters */
809 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
810 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
811 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) );
812 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) );
813 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
814 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
815 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) );
816 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
817 /* multi-threading */
818 #ifdef ZSTD_MULTITHREAD
819 DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);
820 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) );
821 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) );
822 if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) {
823 DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog);
824 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) );
825 }
826 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
827 #endif
828 /* dictionary */
829 CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) );
830 free(dictBuffer);
831 }
832
833 return ress;
834 }
835
FIO_freeCResources(cRess_t ress)836 static void FIO_freeCResources(cRess_t ress)
837 {
838 free(ress.srcBuffer);
839 free(ress.dstBuffer);
840 ZSTD_freeCStream(ress.cctx); /* never fails */
841 }
842
843
844 #ifdef ZSTD_GZCOMPRESS
845 static unsigned long long
FIO_compressGzFrame(const cRess_t * ress,const char * srcFileName,U64 const srcFileSize,int compressionLevel,U64 * readsize)846 FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but not changed */
847 const char* srcFileName, U64 const srcFileSize,
848 int compressionLevel, U64* readsize)
849 {
850 unsigned long long inFileSize = 0, outFileSize = 0;
851 z_stream strm;
852
853 if (compressionLevel > Z_BEST_COMPRESSION)
854 compressionLevel = Z_BEST_COMPRESSION;
855
856 strm.zalloc = Z_NULL;
857 strm.zfree = Z_NULL;
858 strm.opaque = Z_NULL;
859
860 { int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED,
861 15 /* maxWindowLogSize */ + 16 /* gzip only */,
862 8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */
863 if (ret != Z_OK) {
864 EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
865 } }
866
867 strm.next_in = 0;
868 strm.avail_in = 0;
869 strm.next_out = (Bytef*)ress->dstBuffer;
870 strm.avail_out = (uInt)ress->dstBufferSize;
871
872 while (1) {
873 int ret;
874 if (strm.avail_in == 0) {
875 size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
876 if (inSize == 0) break;
877 inFileSize += inSize;
878 strm.next_in = (z_const unsigned char*)ress->srcBuffer;
879 strm.avail_in = (uInt)inSize;
880 }
881 ret = deflate(&strm, Z_NO_FLUSH);
882 if (ret != Z_OK)
883 EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret);
884 { size_t const cSize = ress->dstBufferSize - strm.avail_out;
885 if (cSize) {
886 if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
887 EXM_THROW(73, "Write error : cannot write to output file : %s ", strerror(errno));
888 outFileSize += cSize;
889 strm.next_out = (Bytef*)ress->dstBuffer;
890 strm.avail_out = (uInt)ress->dstBufferSize;
891 } }
892 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
893 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ",
894 (unsigned)(inFileSize>>20),
895 (double)outFileSize/inFileSize*100)
896 } else {
897 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%% ",
898 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
899 (double)outFileSize/inFileSize*100);
900 } }
901
902 while (1) {
903 int const ret = deflate(&strm, Z_FINISH);
904 { size_t const cSize = ress->dstBufferSize - strm.avail_out;
905 if (cSize) {
906 if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
907 EXM_THROW(75, "Write error : %s ", strerror(errno));
908 outFileSize += cSize;
909 strm.next_out = (Bytef*)ress->dstBuffer;
910 strm.avail_out = (uInt)ress->dstBufferSize;
911 } }
912 if (ret == Z_STREAM_END) break;
913 if (ret != Z_BUF_ERROR)
914 EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret);
915 }
916
917 { int const ret = deflateEnd(&strm);
918 if (ret != Z_OK) {
919 EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
920 } }
921 *readsize = inFileSize;
922 return outFileSize;
923 }
924 #endif
925
926
927 #ifdef ZSTD_LZMACOMPRESS
928 static unsigned long long
FIO_compressLzmaFrame(cRess_t * ress,const char * srcFileName,U64 const srcFileSize,int compressionLevel,U64 * readsize,int plain_lzma)929 FIO_compressLzmaFrame(cRess_t* ress,
930 const char* srcFileName, U64 const srcFileSize,
931 int compressionLevel, U64* readsize, int plain_lzma)
932 {
933 unsigned long long inFileSize = 0, outFileSize = 0;
934 lzma_stream strm = LZMA_STREAM_INIT;
935 lzma_action action = LZMA_RUN;
936 lzma_ret ret;
937
938 if (compressionLevel < 0) compressionLevel = 0;
939 if (compressionLevel > 9) compressionLevel = 9;
940
941 if (plain_lzma) {
942 lzma_options_lzma opt_lzma;
943 if (lzma_lzma_preset(&opt_lzma, compressionLevel))
944 EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName);
945 ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
946 if (ret != LZMA_OK)
947 EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
948 } else {
949 ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
950 if (ret != LZMA_OK)
951 EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
952 }
953
954 strm.next_in = 0;
955 strm.avail_in = 0;
956 strm.next_out = (BYTE*)ress->dstBuffer;
957 strm.avail_out = ress->dstBufferSize;
958
959 while (1) {
960 if (strm.avail_in == 0) {
961 size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
962 if (inSize == 0) action = LZMA_FINISH;
963 inFileSize += inSize;
964 strm.next_in = (BYTE const*)ress->srcBuffer;
965 strm.avail_in = inSize;
966 }
967
968 ret = lzma_code(&strm, action);
969
970 if (ret != LZMA_OK && ret != LZMA_STREAM_END)
971 EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
972 { size_t const compBytes = ress->dstBufferSize - strm.avail_out;
973 if (compBytes) {
974 if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes)
975 EXM_THROW(85, "Write error : %s", strerror(errno));
976 outFileSize += compBytes;
977 strm.next_out = (BYTE*)ress->dstBuffer;
978 strm.avail_out = ress->dstBufferSize;
979 } }
980 if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
981 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
982 (unsigned)(inFileSize>>20),
983 (double)outFileSize/inFileSize*100)
984 else
985 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
986 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
987 (double)outFileSize/inFileSize*100);
988 if (ret == LZMA_STREAM_END) break;
989 }
990
991 lzma_end(&strm);
992 *readsize = inFileSize;
993
994 return outFileSize;
995 }
996 #endif
997
998 #ifdef ZSTD_LZ4COMPRESS
999
1000 #if LZ4_VERSION_NUMBER <= 10600
1001 #define LZ4F_blockLinked blockLinked
1002 #define LZ4F_max64KB max64KB
1003 #endif
1004
FIO_LZ4_GetBlockSize_FromBlockId(int id)1005 static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
1006
1007 static unsigned long long
FIO_compressLz4Frame(cRess_t * ress,const char * srcFileName,U64 const srcFileSize,int compressionLevel,int checksumFlag,U64 * readsize)1008 FIO_compressLz4Frame(cRess_t* ress,
1009 const char* srcFileName, U64 const srcFileSize,
1010 int compressionLevel, int checksumFlag,
1011 U64* readsize)
1012 {
1013 const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB);
1014 unsigned long long inFileSize = 0, outFileSize = 0;
1015
1016 LZ4F_preferences_t prefs;
1017 LZ4F_compressionContext_t ctx;
1018
1019 LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
1020 if (LZ4F_isError(errorCode))
1021 EXM_THROW(31, "zstd: failed to create lz4 compression context");
1022
1023 memset(&prefs, 0, sizeof(prefs));
1024
1025 assert(blockSize <= ress->srcBufferSize);
1026
1027 prefs.autoFlush = 1;
1028 prefs.compressionLevel = compressionLevel;
1029 prefs.frameInfo.blockMode = LZ4F_blockLinked;
1030 prefs.frameInfo.blockSizeID = LZ4F_max64KB;
1031 prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag;
1032 #if LZ4_VERSION_NUMBER >= 10600
1033 prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
1034 #endif
1035 assert(LZ4F_compressBound(blockSize, &prefs) <= ress->dstBufferSize);
1036
1037 {
1038 size_t readSize;
1039 size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs);
1040 if (LZ4F_isError(headerSize))
1041 EXM_THROW(33, "File header generation failed : %s",
1042 LZ4F_getErrorName(headerSize));
1043 if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize)
1044 EXM_THROW(34, "Write error : %s (cannot write header)", strerror(errno));
1045 outFileSize += headerSize;
1046
1047 /* Read first block */
1048 readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
1049 inFileSize += readSize;
1050
1051 /* Main Loop */
1052 while (readSize>0) {
1053 size_t const outSize = LZ4F_compressUpdate(ctx,
1054 ress->dstBuffer, ress->dstBufferSize,
1055 ress->srcBuffer, readSize, NULL);
1056 if (LZ4F_isError(outSize))
1057 EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
1058 srcFileName, LZ4F_getErrorName(outSize));
1059 outFileSize += outSize;
1060 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
1061 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
1062 (unsigned)(inFileSize>>20),
1063 (double)outFileSize/inFileSize*100)
1064 } else {
1065 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
1066 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
1067 (double)outFileSize/inFileSize*100);
1068 }
1069
1070 /* Write Block */
1071 { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile);
1072 if (sizeCheck != outSize)
1073 EXM_THROW(36, "Write error : %s", strerror(errno));
1074 }
1075
1076 /* Read next block */
1077 readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
1078 inFileSize += readSize;
1079 }
1080 if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName);
1081
1082 /* End of Stream mark */
1083 headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL);
1084 if (LZ4F_isError(headerSize))
1085 EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s",
1086 srcFileName, LZ4F_getErrorName(headerSize));
1087
1088 { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
1089 if (sizeCheck != headerSize)
1090 EXM_THROW(39, "Write error : %s (cannot write end of stream)",
1091 strerror(errno));
1092 }
1093 outFileSize += headerSize;
1094 }
1095
1096 *readsize = inFileSize;
1097 LZ4F_freeCompressionContext(ctx);
1098
1099 return outFileSize;
1100 }
1101 #endif
1102
1103
1104 static unsigned long long
FIO_compressZstdFrame(FIO_prefs_t * const prefs,const cRess_t * ressPtr,const char * srcFileName,U64 fileSize,int compressionLevel,U64 * readsize)1105 FIO_compressZstdFrame(FIO_prefs_t* const prefs,
1106 const cRess_t* ressPtr,
1107 const char* srcFileName, U64 fileSize,
1108 int compressionLevel, U64* readsize)
1109 {
1110 cRess_t const ress = *ressPtr;
1111 FILE* const srcFile = ress.srcFile;
1112 FILE* const dstFile = ress.dstFile;
1113 U64 compressedfilesize = 0;
1114 ZSTD_EndDirective directive = ZSTD_e_continue;
1115
1116 /* stats */
1117 ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 };
1118 ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 };
1119 typedef enum { noChange, slower, faster } speedChange_e;
1120 speedChange_e speedChange = noChange;
1121 unsigned flushWaiting = 0;
1122 unsigned inputPresented = 0;
1123 unsigned inputBlocked = 0;
1124 unsigned lastJobID = 0;
1125
1126 DISPLAYLEVEL(6, "compression using zstd format \n");
1127
1128 /* init */
1129 if (fileSize != UTIL_FILESIZE_UNKNOWN) {
1130 CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
1131 } else if (prefs->streamSrcSize > 0) {
1132 /* unknown source size; use the declared stream size */
1133 CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
1134 }
1135 (void)srcFileName;
1136
1137 /* Main compression loop */
1138 do {
1139 size_t stillToFlush;
1140 /* Fill input Buffer */
1141 size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
1142 ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 };
1143 DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize);
1144 *readsize += inSize;
1145
1146 if ((inSize == 0) || (*readsize == fileSize))
1147 directive = ZSTD_e_end;
1148
1149 stillToFlush = 1;
1150 while ((inBuff.pos != inBuff.size) /* input buffer must be entirely ingested */
1151 || (directive == ZSTD_e_end && stillToFlush != 0) ) {
1152
1153 size_t const oldIPos = inBuff.pos;
1154 ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
1155 size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx);
1156 CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive));
1157
1158 /* count stats */
1159 inputPresented++;
1160 if (oldIPos == inBuff.pos) inputBlocked++; /* input buffer is full and can't take any more : input speed is faster than consumption rate */
1161 if (!toFlushNow) flushWaiting = 1;
1162
1163 /* Write compressed stream */
1164 DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n",
1165 (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos);
1166 if (outBuff.pos) {
1167 size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
1168 if (sizeCheck != outBuff.pos)
1169 EXM_THROW(25, "Write error : %s (cannot write compressed block)",
1170 strerror(errno));
1171 compressedfilesize += outBuff.pos;
1172 }
1173
1174 /* display notification; and adapt compression level */
1175 if (READY_FOR_UPDATE()) {
1176 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
1177 double const cShare = (double)zfp.produced / (zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
1178
1179 /* display progress notifications */
1180 if (g_display_prefs.displayLevel >= 3) {
1181 DISPLAYUPDATE(3, "\r(L%i) Buffered :%4u MB - Consumed :%4u MB - Compressed :%4u MB => %.2f%% ",
1182 compressionLevel,
1183 (unsigned)((zfp.ingested - zfp.consumed) >> 20),
1184 (unsigned)(zfp.consumed >> 20),
1185 (unsigned)(zfp.produced >> 20),
1186 cShare );
1187 } else { /* summarized notifications if == 2; */
1188 DISPLAYLEVEL(2, "\rRead : %u ", (unsigned)(zfp.consumed >> 20));
1189 if (fileSize != UTIL_FILESIZE_UNKNOWN)
1190 DISPLAYLEVEL(2, "/ %u ", (unsigned)(fileSize >> 20));
1191 DISPLAYLEVEL(2, "MB ==> %2.f%% ", cShare);
1192 DELAY_NEXT_UPDATE();
1193 }
1194
1195 /* adaptive mode : statistics measurement and speed correction */
1196 if (prefs->adaptiveMode) {
1197
1198 /* check output speed */
1199 if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */
1200
1201 unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced;
1202 unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed;
1203 assert(zfp.produced >= previous_zfp_update.produced);
1204 assert(prefs->nbWorkers >= 1);
1205
1206 /* test if compression is blocked
1207 * either because output is slow and all buffers are full
1208 * or because input is slow and no job can start while waiting for at least one buffer to be filled.
1209 * note : exclude starting part, since currentJobID > 1 */
1210 if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/
1211 && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */
1212 ) {
1213 DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n")
1214 speedChange = slower;
1215 }
1216
1217 previous_zfp_update = zfp;
1218
1219 if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */
1220 && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */
1221 ) {
1222 DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed);
1223 speedChange = slower;
1224 }
1225 flushWaiting = 0;
1226 }
1227
1228 /* course correct only if there is at least one new job completed */
1229 if (zfp.currentJobID > lastJobID) {
1230 DISPLAYLEVEL(6, "compression level adaptation check \n")
1231
1232 /* check input speed */
1233 if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */
1234 if (inputBlocked <= 0) {
1235 DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n");
1236 speedChange = slower;
1237 } else if (speedChange == noChange) {
1238 unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested;
1239 unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed;
1240 unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced;
1241 unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed;
1242 previous_zfp_correction = zfp;
1243 assert(inputPresented > 0);
1244 DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n",
1245 inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100,
1246 (unsigned)newlyIngested, (unsigned)newlyConsumed,
1247 (unsigned)newlyFlushed, (unsigned)newlyProduced);
1248 if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */
1249 && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */
1250 && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */
1251 ) {
1252 DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n",
1253 newlyIngested, newlyConsumed, newlyProduced, newlyFlushed);
1254 speedChange = faster;
1255 }
1256 }
1257 inputBlocked = 0;
1258 inputPresented = 0;
1259 }
1260
1261 if (speedChange == slower) {
1262 DISPLAYLEVEL(6, "slower speed , higher compression \n")
1263 compressionLevel ++;
1264 if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
1265 if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel;
1266 compressionLevel += (compressionLevel == 0); /* skip 0 */
1267 ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
1268 }
1269 if (speedChange == faster) {
1270 DISPLAYLEVEL(6, "faster speed , lighter compression \n")
1271 compressionLevel --;
1272 if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel;
1273 compressionLevel -= (compressionLevel == 0); /* skip 0 */
1274 ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
1275 }
1276 speedChange = noChange;
1277
1278 lastJobID = zfp.currentJobID;
1279 } /* if (zfp.currentJobID > lastJobID) */
1280 } /* if (g_adaptiveMode) */
1281 } /* if (READY_FOR_UPDATE()) */
1282 } /* while ((inBuff.pos != inBuff.size) */
1283 } while (directive != ZSTD_e_end);
1284
1285 if (ferror(srcFile)) {
1286 EXM_THROW(26, "Read error : I/O error");
1287 }
1288 if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) {
1289 EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B",
1290 (unsigned long long)*readsize, (unsigned long long)fileSize);
1291 }
1292
1293 return compressedfilesize;
1294 }
1295
1296 /*! FIO_compressFilename_internal() :
1297 * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
1298 * @return : 0 : compression completed correctly,
1299 * 1 : missing or pb opening srcFileName
1300 */
1301 static int
FIO_compressFilename_internal(FIO_prefs_t * const prefs,cRess_t ress,const char * dstFileName,const char * srcFileName,int compressionLevel)1302 FIO_compressFilename_internal(FIO_prefs_t* const prefs,
1303 cRess_t ress,
1304 const char* dstFileName, const char* srcFileName,
1305 int compressionLevel)
1306 {
1307 UTIL_time_t const timeStart = UTIL_getTime();
1308 clock_t const cpuStart = clock();
1309 U64 readsize = 0;
1310 U64 compressedfilesize = 0;
1311 U64 const fileSize = UTIL_getFileSize(srcFileName);
1312 DISPLAYLEVEL(5, "%s: %u bytes \n", srcFileName, (unsigned)fileSize);
1313
1314 /* compression format selection */
1315 switch (prefs->compressionType) {
1316 default:
1317 case FIO_zstdCompression:
1318 compressedfilesize = FIO_compressZstdFrame(prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize);
1319 break;
1320
1321 case FIO_gzipCompression:
1322 #ifdef ZSTD_GZCOMPRESS
1323 compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
1324 #else
1325 (void)compressionLevel;
1326 EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n",
1327 srcFileName);
1328 #endif
1329 break;
1330
1331 case FIO_xzCompression:
1332 case FIO_lzmaCompression:
1333 #ifdef ZSTD_LZMACOMPRESS
1334 compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression);
1335 #else
1336 (void)compressionLevel;
1337 EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n",
1338 srcFileName);
1339 #endif
1340 break;
1341
1342 case FIO_lz4Compression:
1343 #ifdef ZSTD_LZ4COMPRESS
1344 compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize);
1345 #else
1346 (void)compressionLevel;
1347 EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n",
1348 srcFileName);
1349 #endif
1350 break;
1351 }
1352
1353 /* Status */
1354 DISPLAYLEVEL(2, "\r%79s\r", "");
1355 DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n",
1356 srcFileName,
1357 (double)compressedfilesize / (readsize+(!readsize)/*avoid div by zero*/) * 100,
1358 (unsigned long long)readsize, (unsigned long long) compressedfilesize,
1359 dstFileName);
1360
1361 /* Elapsed Time and CPU Load */
1362 { clock_t const cpuEnd = clock();
1363 double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
1364 U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
1365 double const timeLength_s = (double)timeLength_ns / 1000000000;
1366 double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100;
1367 DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec (cpu load : %.0f%%)\n",
1368 srcFileName, timeLength_s, cpuLoad_pct);
1369 }
1370 return 0;
1371 }
1372
1373
1374 /*! FIO_compressFilename_dstFile() :
1375 * open dstFileName, or pass-through if ress.dstFile != NULL,
1376 * then start compression with FIO_compressFilename_internal().
1377 * Manages source removal (--rm) and file permissions transfer.
1378 * note : ress.srcFile must be != NULL,
1379 * so reach this function through FIO_compressFilename_srcFile().
1380 * @return : 0 : compression completed correctly,
1381 * 1 : pb
1382 */
FIO_compressFilename_dstFile(FIO_prefs_t * const prefs,cRess_t ress,const char * dstFileName,const char * srcFileName,int compressionLevel)1383 static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs,
1384 cRess_t ress,
1385 const char* dstFileName,
1386 const char* srcFileName,
1387 int compressionLevel)
1388 {
1389 int closeDstFile = 0;
1390 int result;
1391 stat_t statbuf;
1392 int transfer_permissions = 0;
1393 assert(ress.srcFile != NULL);
1394 if (ress.dstFile == NULL) {
1395 closeDstFile = 1;
1396 DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s", dstFileName);
1397 ress.dstFile = FIO_openDstFile(prefs, srcFileName, dstFileName);
1398 if (ress.dstFile==NULL) return 1; /* could not open dstFileName */
1399 /* Must only be added after FIO_openDstFile() succeeds.
1400 * Otherwise we may delete the destination file if it already exists,
1401 * and the user presses Ctrl-C when asked if they wish to overwrite.
1402 */
1403 addHandler(dstFileName);
1404
1405 if ( strcmp (srcFileName, stdinmark)
1406 && UTIL_getFileStat(srcFileName, &statbuf))
1407 transfer_permissions = 1;
1408 }
1409
1410 result = FIO_compressFilename_internal(prefs, ress, dstFileName, srcFileName, compressionLevel);
1411
1412 if (closeDstFile) {
1413 FILE* const dstFile = ress.dstFile;
1414 ress.dstFile = NULL;
1415
1416 clearHandler();
1417
1418 if (fclose(dstFile)) { /* error closing dstFile */
1419 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
1420 result=1;
1421 }
1422 if ( (result != 0) /* operation failure */
1423 && strcmp(dstFileName, nulmark) /* special case : don't remove() /dev/null */
1424 && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
1425 ) {
1426 FIO_remove(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
1427 } else if ( strcmp(dstFileName, stdoutmark)
1428 && strcmp(dstFileName, nulmark)
1429 && transfer_permissions) {
1430 UTIL_setFileStat(dstFileName, &statbuf);
1431 }
1432 }
1433
1434 return result;
1435 }
1436
1437 /* List used to compare file extensions (used with --exclude-compressed flag)
1438 * Different from the suffixList and should only apply to ZSTD compress operationResult
1439 */
1440 static const char *compressedFileExtensions[] = {
1441 ZSTD_EXTENSION,
1442 TZSTD_EXTENSION,
1443 GZ_EXTENSION,
1444 TGZ_EXTENSION,
1445 LZMA_EXTENSION,
1446 XZ_EXTENSION,
1447 TXZ_EXTENSION,
1448 LZ4_EXTENSION,
1449 TLZ4_EXTENSION,
1450 NULL
1451 };
1452
1453 /*! FIO_compressFilename_srcFile() :
1454 * @return : 0 : compression completed correctly,
1455 * 1 : missing or pb opening srcFileName
1456 */
1457 static int
FIO_compressFilename_srcFile(FIO_prefs_t * const prefs,cRess_t ress,const char * dstFileName,const char * srcFileName,int compressionLevel)1458 FIO_compressFilename_srcFile(FIO_prefs_t* const prefs,
1459 cRess_t ress,
1460 const char* dstFileName,
1461 const char* srcFileName,
1462 int compressionLevel)
1463 {
1464 int result;
1465
1466 /* ensure src is not a directory */
1467 if (UTIL_isDirectory(srcFileName)) {
1468 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
1469 return 1;
1470 }
1471
1472 /* ensure src is not the same as dict (if present) */
1473 if (ress.dictFileName != NULL && UTIL_isSameFile(srcFileName, ress.dictFileName)) {
1474 DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName);
1475 return 1;
1476 }
1477
1478 /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used
1479 * YES => ZSTD will skip compression of the file and will return 0.
1480 * NO => ZSTD will resume with compress operation.
1481 */
1482 if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) {
1483 DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName);
1484 return 0;
1485 }
1486
1487 ress.srcFile = FIO_openSrcFile(srcFileName);
1488 if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */
1489
1490 result = FIO_compressFilename_dstFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
1491
1492 fclose(ress.srcFile);
1493 ress.srcFile = NULL;
1494 if ( prefs->removeSrcFile /* --rm */
1495 && result == 0 /* success */
1496 && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */
1497 ) {
1498 /* We must clear the handler, since after this point calling it would
1499 * delete both the source and destination files.
1500 */
1501 clearHandler();
1502 if (FIO_remove(srcFileName))
1503 EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno));
1504 }
1505 return result;
1506 }
1507
FIO_compressFilename(FIO_prefs_t * const prefs,const char * dstFileName,const char * srcFileName,const char * dictFileName,int compressionLevel,ZSTD_compressionParameters comprParams)1508 int FIO_compressFilename(FIO_prefs_t* const prefs, const char* dstFileName,
1509 const char* srcFileName, const char* dictFileName,
1510 int compressionLevel, ZSTD_compressionParameters comprParams)
1511 {
1512 cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
1513 int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
1514
1515
1516 FIO_freeCResources(ress);
1517 return result;
1518 }
1519
1520 /* FIO_determineCompressedName() :
1521 * create a destination filename for compressed srcFileName.
1522 * @return a pointer to it.
1523 * This function never returns an error (it may abort() in case of pb)
1524 */
1525 static const char*
FIO_determineCompressedName(const char * srcFileName,const char * outDirName,const char * suffix)1526 FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix)
1527 {
1528 static size_t dfnbCapacity = 0;
1529 static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
1530 char* outDirFilename = NULL;
1531 size_t sfnSize = strlen(srcFileName);
1532 size_t const srcSuffixLen = strlen(suffix);
1533 if (outDirName) {
1534 outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen);
1535 sfnSize = strlen(outDirFilename);
1536 assert(outDirFilename != NULL);
1537 }
1538
1539 if (dfnbCapacity <= sfnSize+srcSuffixLen+1) {
1540 /* resize buffer for dstName */
1541 free(dstFileNameBuffer);
1542 dfnbCapacity = sfnSize + srcSuffixLen + 30;
1543 dstFileNameBuffer = (char*)malloc(dfnbCapacity);
1544 if (!dstFileNameBuffer) {
1545 EXM_THROW(30, "zstd: %s", strerror(errno));
1546 }
1547 }
1548 assert(dstFileNameBuffer != NULL);
1549
1550 if (outDirFilename) {
1551 memcpy(dstFileNameBuffer, outDirFilename, sfnSize);
1552 free(outDirFilename);
1553 } else {
1554 memcpy(dstFileNameBuffer, srcFileName, sfnSize);
1555 }
1556 memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */);
1557 return dstFileNameBuffer;
1558 }
1559
1560
1561 /* FIO_compressMultipleFilenames() :
1562 * compress nbFiles files
1563 * into either one destination (outFileName),
1564 * or into one file each (outFileName == NULL, but suffix != NULL),
1565 * or into a destination folder (specified with -O)
1566 */
FIO_compressMultipleFilenames(FIO_prefs_t * const prefs,const char ** inFileNamesTable,unsigned nbFiles,const char * outDirName,const char * outFileName,const char * suffix,const char * dictFileName,int compressionLevel,ZSTD_compressionParameters comprParams)1567 int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
1568 const char** inFileNamesTable, unsigned nbFiles,
1569 const char* outDirName,
1570 const char* outFileName, const char* suffix,
1571 const char* dictFileName, int compressionLevel,
1572 ZSTD_compressionParameters comprParams)
1573 {
1574 int error = 0;
1575 cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
1576
1577 /* init */
1578 assert(outFileName != NULL || suffix != NULL);
1579 if (outFileName != NULL) { /* output into a single destination (stdout typically) */
1580 ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName);
1581 if (ress.dstFile == NULL) { /* could not open outFileName */
1582 error = 1;
1583 } else {
1584 unsigned u;
1585 for (u=0; u<nbFiles; u++)
1586 error |= FIO_compressFilename_srcFile(prefs, ress, outFileName, inFileNamesTable[u], compressionLevel);
1587 if (fclose(ress.dstFile))
1588 EXM_THROW(29, "Write error (%s) : cannot properly close %s",
1589 strerror(errno), outFileName);
1590 ress.dstFile = NULL;
1591 }
1592 } else {
1593 unsigned u;
1594 for (u=0; u<nbFiles; u++) {
1595 const char* const srcFileName = inFileNamesTable[u];
1596 const char* const dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */
1597 error |= FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
1598 }
1599 if (outDirName)
1600 FIO_checkFilenameCollisions(inFileNamesTable ,nbFiles);
1601 }
1602
1603 FIO_freeCResources(ress);
1604 return error;
1605 }
1606
1607 #endif /* #ifndef ZSTD_NOCOMPRESS */
1608
1609
1610
1611 #ifndef ZSTD_NODECOMPRESS
1612
1613 /* **************************************************************************
1614 * Decompression
1615 ***************************************************************************/
1616 typedef struct {
1617 void* srcBuffer;
1618 size_t srcBufferSize;
1619 size_t srcBufferLoaded;
1620 void* dstBuffer;
1621 size_t dstBufferSize;
1622 ZSTD_DStream* dctx;
1623 FILE* dstFile;
1624 } dRess_t;
1625
FIO_createDResources(FIO_prefs_t * const prefs,const char * dictFileName)1626 static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName)
1627 {
1628 dRess_t ress;
1629 memset(&ress, 0, sizeof(ress));
1630
1631 /* Allocation */
1632 ress.dctx = ZSTD_createDStream();
1633 if (ress.dctx==NULL)
1634 EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno));
1635 CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) );
1636 ress.srcBufferSize = ZSTD_DStreamInSize();
1637 ress.srcBuffer = malloc(ress.srcBufferSize);
1638 ress.dstBufferSize = ZSTD_DStreamOutSize();
1639 ress.dstBuffer = malloc(ress.dstBufferSize);
1640 if (!ress.srcBuffer || !ress.dstBuffer)
1641 EXM_THROW(61, "Allocation error : not enough memory");
1642
1643 /* dictionary */
1644 { void* dictBuffer;
1645 size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName);
1646 CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) );
1647 free(dictBuffer);
1648 }
1649
1650 return ress;
1651 }
1652
FIO_freeDResources(dRess_t ress)1653 static void FIO_freeDResources(dRess_t ress)
1654 {
1655 CHECK( ZSTD_freeDStream(ress.dctx) );
1656 free(ress.srcBuffer);
1657 free(ress.dstBuffer);
1658 }
1659
1660
1661 /** FIO_fwriteSparse() :
1662 * @return : storedSkips, to be provided to next call to FIO_fwriteSparse() of LZ4IO_fwriteSparseEnd() */
1663 static unsigned
FIO_fwriteSparse(const FIO_prefs_t * const prefs,FILE * file,const void * buffer,size_t bufferSize,unsigned storedSkips)1664 FIO_fwriteSparse(const FIO_prefs_t* const prefs,
1665 FILE* file,
1666 const void* buffer, size_t bufferSize,
1667 unsigned storedSkips)
1668 {
1669 const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */
1670 size_t bufferSizeT = bufferSize / sizeof(size_t);
1671 const size_t* const bufferTEnd = bufferT + bufferSizeT;
1672 const size_t* ptrT = bufferT;
1673 static const size_t segmentSizeT = (32 KB) / sizeof(size_t); /* 0-test re-attempted every 32 KB */
1674
1675 if (prefs->testMode) return 0; /* do not output anything in test mode */
1676
1677 if (!prefs->sparseFileSupport) { /* normal write */
1678 size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
1679 if (sizeCheck != bufferSize)
1680 EXM_THROW(70, "Write error : cannot write decoded block : %s",
1681 strerror(errno));
1682 return 0;
1683 }
1684
1685 /* avoid int overflow */
1686 if (storedSkips > 1 GB) {
1687 int const seekResult = LONG_SEEK(file, 1 GB, SEEK_CUR);
1688 if (seekResult != 0)
1689 EXM_THROW(91, "1 GB skip error (sparse file support)");
1690 storedSkips -= 1 GB;
1691 }
1692
1693 while (ptrT < bufferTEnd) {
1694 size_t seg0SizeT = segmentSizeT;
1695 size_t nb0T;
1696
1697 /* count leading zeros */
1698 if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT;
1699 bufferSizeT -= seg0SizeT;
1700 for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ;
1701 storedSkips += (unsigned)(nb0T * sizeof(size_t));
1702
1703 if (nb0T != seg0SizeT) { /* not all 0s */
1704 int const seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR);
1705 if (seekResult) EXM_THROW(92, "Sparse skip error ; try --no-sparse");
1706 storedSkips = 0;
1707 seg0SizeT -= nb0T;
1708 ptrT += nb0T;
1709 { size_t const sizeCheck = fwrite(ptrT, sizeof(size_t), seg0SizeT, file);
1710 if (sizeCheck != seg0SizeT)
1711 EXM_THROW(93, "Write error : cannot write decoded block : %s",
1712 strerror(errno));
1713 } }
1714 ptrT += seg0SizeT;
1715 }
1716
1717 { static size_t const maskT = sizeof(size_t)-1;
1718 if (bufferSize & maskT) {
1719 /* size not multiple of sizeof(size_t) : implies end of block */
1720 const char* const restStart = (const char*)bufferTEnd;
1721 const char* restPtr = restStart;
1722 size_t restSize = bufferSize & maskT;
1723 const char* const restEnd = restStart + restSize;
1724 for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ;
1725 storedSkips += (unsigned) (restPtr - restStart);
1726 if (restPtr != restEnd) {
1727 int seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR);
1728 if (seekResult)
1729 EXM_THROW(94, "Sparse skip error ; try --no-sparse");
1730 storedSkips = 0;
1731 { size_t const sizeCheck = fwrite(restPtr, 1, (size_t)(restEnd - restPtr), file);
1732 if (sizeCheck != (size_t)(restEnd - restPtr))
1733 EXM_THROW(95, "Write error : cannot write decoded end of block : %s",
1734 strerror(errno));
1735 } } } }
1736
1737 return storedSkips;
1738 }
1739
1740 static void
FIO_fwriteSparseEnd(const FIO_prefs_t * const prefs,FILE * file,unsigned storedSkips)1741 FIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
1742 {
1743 if (prefs->testMode) assert(storedSkips == 0);
1744 if (storedSkips>0) {
1745 assert(prefs->sparseFileSupport > 0); /* storedSkips>0 implies sparse support is enabled */
1746 (void)prefs; /* assert can be disabled, in which case prefs becomes unused */
1747 if (LONG_SEEK(file, storedSkips-1, SEEK_CUR) != 0)
1748 EXM_THROW(69, "Final skip error (sparse file support)");
1749 /* last zero must be explicitly written,
1750 * so that skipped ones get implicitly translated as zero by FS */
1751 { const char lastZeroByte[1] = { 0 };
1752 if (fwrite(lastZeroByte, 1, 1, file) != 1)
1753 EXM_THROW(69, "Write error : cannot write last zero : %s", strerror(errno));
1754 } }
1755 }
1756
1757
1758 /** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
1759 @return : 0 (no error) */
FIO_passThrough(const FIO_prefs_t * const prefs,FILE * foutput,FILE * finput,void * buffer,size_t bufferSize,size_t alreadyLoaded)1760 static int FIO_passThrough(const FIO_prefs_t* const prefs,
1761 FILE* foutput, FILE* finput,
1762 void* buffer, size_t bufferSize,
1763 size_t alreadyLoaded)
1764 {
1765 size_t const blockSize = MIN(64 KB, bufferSize);
1766 size_t readFromInput = 1;
1767 unsigned storedSkips = 0;
1768
1769 /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */
1770 { size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput);
1771 if (sizeCheck != alreadyLoaded) {
1772 DISPLAYLEVEL(1, "Pass-through write error : %s\n", strerror(errno));
1773 return 1;
1774 } }
1775
1776 while (readFromInput) {
1777 readFromInput = fread(buffer, 1, blockSize, finput);
1778 storedSkips = FIO_fwriteSparse(prefs, foutput, buffer, readFromInput, storedSkips);
1779 }
1780
1781 FIO_fwriteSparseEnd(prefs, foutput, storedSkips);
1782 return 0;
1783 }
1784
1785 /* FIO_highbit64() :
1786 * gives position of highest bit.
1787 * note : only works for v > 0 !
1788 */
FIO_highbit64(unsigned long long v)1789 static unsigned FIO_highbit64(unsigned long long v)
1790 {
1791 unsigned count = 0;
1792 assert(v != 0);
1793 v >>= 1;
1794 while (v) { v >>= 1; count++; }
1795 return count;
1796 }
1797
1798 /* FIO_zstdErrorHelp() :
1799 * detailed error message when requested window size is too large */
1800 static void
FIO_zstdErrorHelp(const FIO_prefs_t * const prefs,const dRess_t * ress,size_t err,const char * srcFileName)1801 FIO_zstdErrorHelp(const FIO_prefs_t* const prefs,
1802 const dRess_t* ress,
1803 size_t err, const char* srcFileName)
1804 {
1805 ZSTD_frameHeader header;
1806
1807 /* Help message only for one specific error */
1808 if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge)
1809 return;
1810
1811 /* Try to decode the frame header */
1812 err = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded);
1813 if (err == 0) {
1814 unsigned long long const windowSize = header.windowSize;
1815 unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0);
1816 assert(prefs->memLimit > 0);
1817 DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u\n",
1818 srcFileName, windowSize, prefs->memLimit);
1819 if (windowLog <= ZSTD_WINDOWLOG_MAX) {
1820 unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0));
1821 assert(windowSize < (U64)(1ULL << 52)); /* ensure now overflow for windowMB */
1822 DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB\n",
1823 srcFileName, windowLog, windowMB);
1824 return;
1825 }
1826 }
1827 DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported\n",
1828 srcFileName, ZSTD_WINDOWLOG_MAX);
1829 }
1830
1831 /** FIO_decompressFrame() :
1832 * @return : size of decoded zstd frame, or an error code
1833 */
1834 #define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2))
1835 static unsigned long long
FIO_decompressZstdFrame(const FIO_prefs_t * const prefs,dRess_t * ress,FILE * finput,const char * srcFileName,U64 alreadyDecoded)1836 FIO_decompressZstdFrame(const FIO_prefs_t* const prefs,
1837 dRess_t* ress, FILE* finput,
1838 const char* srcFileName, U64 alreadyDecoded)
1839 {
1840 U64 frameSize = 0;
1841 U32 storedSkips = 0;
1842
1843 size_t const srcFileLength = strlen(srcFileName);
1844 if (srcFileLength>20) srcFileName += srcFileLength-20; /* display last 20 characters only */
1845
1846 ZSTD_resetDStream(ress->dctx);
1847
1848 /* Header loading : ensures ZSTD_getFrameHeader() will succeed */
1849 { size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX;
1850 if (ress->srcBufferLoaded < toDecode) {
1851 size_t const toRead = toDecode - ress->srcBufferLoaded;
1852 void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
1853 ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput);
1854 } }
1855
1856 /* Main decompression Loop */
1857 while (1) {
1858 ZSTD_inBuffer inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 };
1859 ZSTD_outBuffer outBuff= { ress->dstBuffer, ress->dstBufferSize, 0 };
1860 size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff);
1861 if (ZSTD_isError(readSizeHint)) {
1862 DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n",
1863 srcFileName, ZSTD_getErrorName(readSizeHint));
1864 FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName);
1865 return FIO_ERROR_FRAME_DECODING;
1866 }
1867
1868 /* Write block */
1869 storedSkips = FIO_fwriteSparse(prefs, ress->dstFile, ress->dstBuffer, outBuff.pos, storedSkips);
1870 frameSize += outBuff.pos;
1871 DISPLAYUPDATE(2, "\r%-20.20s : %u MB... ",
1872 srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
1873
1874 if (inBuff.pos > 0) {
1875 memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos);
1876 ress->srcBufferLoaded -= inBuff.pos;
1877 }
1878
1879 if (readSizeHint == 0) break; /* end of frame */
1880
1881 /* Fill input buffer */
1882 { size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */
1883 if (ress->srcBufferLoaded < toDecode) {
1884 size_t const toRead = toDecode - ress->srcBufferLoaded; /* > 0 */
1885 void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
1886 size_t const readSize = fread(startPosition, 1, toRead, finput);
1887 if (readSize==0) {
1888 DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n",
1889 srcFileName);
1890 return FIO_ERROR_FRAME_DECODING;
1891 }
1892 ress->srcBufferLoaded += readSize;
1893 } } }
1894
1895 FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
1896
1897 return frameSize;
1898 }
1899
1900
1901 #ifdef ZSTD_GZDECOMPRESS
1902 static unsigned long long
FIO_decompressGzFrame(const FIO_prefs_t * const prefs,dRess_t * ress,FILE * srcFile,const char * srcFileName)1903 FIO_decompressGzFrame(const FIO_prefs_t* const prefs,
1904 dRess_t* ress, FILE* srcFile,
1905 const char* srcFileName)
1906 {
1907 unsigned long long outFileSize = 0;
1908 z_stream strm;
1909 int flush = Z_NO_FLUSH;
1910 int decodingError = 0;
1911 unsigned storedSkips = 0;
1912
1913 strm.zalloc = Z_NULL;
1914 strm.zfree = Z_NULL;
1915 strm.opaque = Z_NULL;
1916 strm.next_in = 0;
1917 strm.avail_in = 0;
1918 /* see http://www.zlib.net/manual.html */
1919 if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK)
1920 return FIO_ERROR_FRAME_DECODING;
1921
1922 strm.next_out = (Bytef*)ress->dstBuffer;
1923 strm.avail_out = (uInt)ress->dstBufferSize;
1924 strm.avail_in = (uInt)ress->srcBufferLoaded;
1925 strm.next_in = (z_const unsigned char*)ress->srcBuffer;
1926
1927 for ( ; ; ) {
1928 int ret;
1929 if (strm.avail_in == 0) {
1930 ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
1931 if (ress->srcBufferLoaded == 0) flush = Z_FINISH;
1932 strm.next_in = (z_const unsigned char*)ress->srcBuffer;
1933 strm.avail_in = (uInt)ress->srcBufferLoaded;
1934 }
1935 ret = inflate(&strm, flush);
1936 if (ret == Z_BUF_ERROR) {
1937 DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName);
1938 decodingError = 1; break;
1939 }
1940 if (ret != Z_OK && ret != Z_STREAM_END) {
1941 DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret);
1942 decodingError = 1; break;
1943 }
1944 { size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
1945 if (decompBytes) {
1946 storedSkips = FIO_fwriteSparse(prefs, ress->dstFile, ress->dstBuffer, decompBytes, storedSkips);
1947 outFileSize += decompBytes;
1948 strm.next_out = (Bytef*)ress->dstBuffer;
1949 strm.avail_out = (uInt)ress->dstBufferSize;
1950 }
1951 }
1952 if (ret == Z_STREAM_END) break;
1953 }
1954
1955 if (strm.avail_in > 0)
1956 memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
1957 ress->srcBufferLoaded = strm.avail_in;
1958 if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */
1959 && (decodingError==0) ) {
1960 DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName);
1961 decodingError = 1;
1962 }
1963 FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
1964 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
1965 }
1966 #endif
1967
1968
1969 #ifdef ZSTD_LZMADECOMPRESS
1970 static unsigned long long
FIO_decompressLzmaFrame(const FIO_prefs_t * const prefs,dRess_t * ress,FILE * srcFile,const char * srcFileName,int plain_lzma)1971 FIO_decompressLzmaFrame(const FIO_prefs_t* const prefs,
1972 dRess_t* ress, FILE* srcFile,
1973 const char* srcFileName, int plain_lzma)
1974 {
1975 unsigned long long outFileSize = 0;
1976 lzma_stream strm = LZMA_STREAM_INIT;
1977 lzma_action action = LZMA_RUN;
1978 lzma_ret initRet;
1979 int decodingError = 0;
1980 unsigned storedSkips = 0;
1981
1982 strm.next_in = 0;
1983 strm.avail_in = 0;
1984 if (plain_lzma) {
1985 initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */
1986 } else {
1987 initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */
1988 }
1989
1990 if (initRet != LZMA_OK) {
1991 DISPLAYLEVEL(1, "zstd: %s: %s error %d \n",
1992 plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder",
1993 srcFileName, initRet);
1994 return FIO_ERROR_FRAME_DECODING;
1995 }
1996
1997 strm.next_out = (BYTE*)ress->dstBuffer;
1998 strm.avail_out = ress->dstBufferSize;
1999 strm.next_in = (BYTE const*)ress->srcBuffer;
2000 strm.avail_in = ress->srcBufferLoaded;
2001
2002 for ( ; ; ) {
2003 lzma_ret ret;
2004 if (strm.avail_in == 0) {
2005 ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
2006 if (ress->srcBufferLoaded == 0) action = LZMA_FINISH;
2007 strm.next_in = (BYTE const*)ress->srcBuffer;
2008 strm.avail_in = ress->srcBufferLoaded;
2009 }
2010 ret = lzma_code(&strm, action);
2011
2012 if (ret == LZMA_BUF_ERROR) {
2013 DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName);
2014 decodingError = 1; break;
2015 }
2016 if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
2017 DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n",
2018 srcFileName, ret);
2019 decodingError = 1; break;
2020 }
2021 { size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
2022 if (decompBytes) {
2023 storedSkips = FIO_fwriteSparse(prefs, ress->dstFile, ress->dstBuffer, decompBytes, storedSkips);
2024 outFileSize += decompBytes;
2025 strm.next_out = (BYTE*)ress->dstBuffer;
2026 strm.avail_out = ress->dstBufferSize;
2027 } }
2028 if (ret == LZMA_STREAM_END) break;
2029 }
2030
2031 if (strm.avail_in > 0)
2032 memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
2033 ress->srcBufferLoaded = strm.avail_in;
2034 lzma_end(&strm);
2035 FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
2036 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
2037 }
2038 #endif
2039
2040 #ifdef ZSTD_LZ4DECOMPRESS
2041 static unsigned long long
FIO_decompressLz4Frame(const FIO_prefs_t * const prefs,dRess_t * ress,FILE * srcFile,const char * srcFileName)2042 FIO_decompressLz4Frame(const FIO_prefs_t* const prefs,
2043 dRess_t* ress, FILE* srcFile,
2044 const char* srcFileName)
2045 {
2046 unsigned long long filesize = 0;
2047 LZ4F_errorCode_t nextToLoad;
2048 LZ4F_decompressionContext_t dCtx;
2049 LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
2050 int decodingError = 0;
2051 unsigned storedSkips = 0;
2052
2053 if (LZ4F_isError(errorCode)) {
2054 DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n");
2055 return FIO_ERROR_FRAME_DECODING;
2056 }
2057
2058 /* Init feed with magic number (already consumed from FILE* sFile) */
2059 { size_t inSize = 4;
2060 size_t outSize= 0;
2061 MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER);
2062 nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL);
2063 if (LZ4F_isError(nextToLoad)) {
2064 DISPLAYLEVEL(1, "zstd: %s: lz4 header error : %s \n",
2065 srcFileName, LZ4F_getErrorName(nextToLoad));
2066 LZ4F_freeDecompressionContext(dCtx);
2067 return FIO_ERROR_FRAME_DECODING;
2068 } }
2069
2070 /* Main Loop */
2071 for (;nextToLoad;) {
2072 size_t readSize;
2073 size_t pos = 0;
2074 size_t decodedBytes = ress->dstBufferSize;
2075
2076 /* Read input */
2077 if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize;
2078 readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile);
2079 if (!readSize) break; /* reached end of file or stream */
2080
2081 while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) { /* still to read, or still to flush */
2082 /* Decode Input (at least partially) */
2083 size_t remaining = readSize - pos;
2084 decodedBytes = ress->dstBufferSize;
2085 nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL);
2086 if (LZ4F_isError(nextToLoad)) {
2087 DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n",
2088 srcFileName, LZ4F_getErrorName(nextToLoad));
2089 decodingError = 1; nextToLoad = 0; break;
2090 }
2091 pos += remaining;
2092
2093 /* Write Block */
2094 if (decodedBytes) {
2095 storedSkips = FIO_fwriteSparse(prefs, ress->dstFile, ress->dstBuffer, decodedBytes, storedSkips);
2096 filesize += decodedBytes;
2097 DISPLAYUPDATE(2, "\rDecompressed : %u MB ", (unsigned)(filesize>>20));
2098 }
2099
2100 if (!nextToLoad) break;
2101 }
2102 }
2103 /* can be out because readSize == 0, which could be an fread() error */
2104 if (ferror(srcFile)) {
2105 DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName);
2106 decodingError=1;
2107 }
2108
2109 if (nextToLoad!=0) {
2110 DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName);
2111 decodingError=1;
2112 }
2113
2114 LZ4F_freeDecompressionContext(dCtx);
2115 ress->srcBufferLoaded = 0; /* LZ4F will reach exact frame boundary */
2116 FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
2117
2118 return decodingError ? FIO_ERROR_FRAME_DECODING : filesize;
2119 }
2120 #endif
2121
2122
2123
2124 /** FIO_decompressFrames() :
2125 * Find and decode frames inside srcFile
2126 * srcFile presumed opened and valid
2127 * @return : 0 : OK
2128 * 1 : error
2129 */
FIO_decompressFrames(const FIO_prefs_t * const prefs,dRess_t ress,FILE * srcFile,const char * dstFileName,const char * srcFileName)2130 static int FIO_decompressFrames(const FIO_prefs_t* const prefs,
2131 dRess_t ress, FILE* srcFile,
2132 const char* dstFileName, const char* srcFileName)
2133 {
2134 unsigned readSomething = 0;
2135 unsigned long long filesize = 0;
2136 assert(srcFile != NULL);
2137
2138 /* for each frame */
2139 for ( ; ; ) {
2140 /* check magic number -> version */
2141 size_t const toRead = 4;
2142 const BYTE* const buf = (const BYTE*)ress.srcBuffer;
2143 if (ress.srcBufferLoaded < toRead) /* load up to 4 bytes for header */
2144 ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded,
2145 (size_t)1, toRead - ress.srcBufferLoaded, srcFile);
2146 if (ress.srcBufferLoaded==0) {
2147 if (readSomething==0) { /* srcFile is empty (which is invalid) */
2148 DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName);
2149 return 1;
2150 } /* else, just reached frame boundary */
2151 break; /* no more input */
2152 }
2153 readSomething = 1; /* there is at least 1 byte in srcFile */
2154 if (ress.srcBufferLoaded < toRead) {
2155 DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName);
2156 return 1;
2157 }
2158 if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) {
2159 unsigned long long const frameSize = FIO_decompressZstdFrame(prefs, &ress, srcFile, srcFileName, filesize);
2160 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2161 filesize += frameSize;
2162 } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
2163 #ifdef ZSTD_GZDECOMPRESS
2164 unsigned long long const frameSize = FIO_decompressGzFrame(prefs, &ress, srcFile, srcFileName);
2165 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2166 filesize += frameSize;
2167 #else
2168 DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName);
2169 return 1;
2170 #endif
2171 } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */
2172 || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
2173 #ifdef ZSTD_LZMADECOMPRESS
2174 unsigned long long const frameSize = FIO_decompressLzmaFrame(prefs, &ress, srcFile, srcFileName, buf[0] != 0xFD);
2175 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2176 filesize += frameSize;
2177 #else
2178 DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName);
2179 return 1;
2180 #endif
2181 } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
2182 #ifdef ZSTD_LZ4DECOMPRESS
2183 unsigned long long const frameSize = FIO_decompressLz4Frame(prefs, &ress, srcFile, srcFileName);
2184 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2185 filesize += frameSize;
2186 #else
2187 DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName);
2188 return 1;
2189 #endif
2190 } else if ((prefs->overwrite) && !strcmp (dstFileName, stdoutmark)) { /* pass-through mode */
2191 return FIO_passThrough(prefs,
2192 ress.dstFile, srcFile,
2193 ress.srcBuffer, ress.srcBufferSize,
2194 ress.srcBufferLoaded);
2195 } else {
2196 DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName);
2197 return 1;
2198 } } /* for each frame */
2199
2200 /* Final Status */
2201 DISPLAYLEVEL(2, "\r%79s\r", "");
2202 DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize);
2203
2204 return 0;
2205 }
2206
2207 /** FIO_decompressDstFile() :
2208 open `dstFileName`,
2209 or path-through if ress.dstFile is already != 0,
2210 then start decompression process (FIO_decompressFrames()).
2211 @return : 0 : OK
2212 1 : operation aborted
2213 */
FIO_decompressDstFile(FIO_prefs_t * const prefs,dRess_t ress,FILE * srcFile,const char * dstFileName,const char * srcFileName)2214 static int FIO_decompressDstFile(FIO_prefs_t* const prefs,
2215 dRess_t ress, FILE* srcFile,
2216 const char* dstFileName, const char* srcFileName)
2217 {
2218 int result;
2219 stat_t statbuf;
2220 int transfer_permissions = 0;
2221 int releaseDstFile = 0;
2222
2223 if ((ress.dstFile == NULL) && (prefs->testMode==0)) {
2224 releaseDstFile = 1;
2225
2226 ress.dstFile = FIO_openDstFile(prefs, srcFileName, dstFileName);
2227 if (ress.dstFile==NULL) return 1;
2228
2229 /* Must only be added after FIO_openDstFile() succeeds.
2230 * Otherwise we may delete the destination file if it already exists,
2231 * and the user presses Ctrl-C when asked if they wish to overwrite.
2232 */
2233 addHandler(dstFileName);
2234
2235 if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */
2236 && UTIL_getFileStat(srcFileName, &statbuf) )
2237 transfer_permissions = 1;
2238 }
2239
2240 result = FIO_decompressFrames(prefs, ress, srcFile, dstFileName, srcFileName);
2241
2242 if (releaseDstFile) {
2243 FILE* const dstFile = ress.dstFile;
2244 clearHandler();
2245 ress.dstFile = NULL;
2246 if (fclose(dstFile)) {
2247 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
2248 result = 1;
2249 }
2250
2251 if ( (result != 0) /* operation failure */
2252 && strcmp(dstFileName, nulmark) /* special case : don't remove() /dev/null (#316) */
2253 && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
2254 ) {
2255 FIO_remove(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */
2256 } else { /* operation success */
2257 if ( strcmp(dstFileName, stdoutmark) /* special case : don't chmod stdout */
2258 && strcmp(dstFileName, nulmark) /* special case : don't chmod /dev/null */
2259 && transfer_permissions ) /* file permissions correctly extracted from src */
2260 UTIL_setFileStat(dstFileName, &statbuf); /* transfer file permissions from src into dst */
2261 }
2262 }
2263
2264 return result;
2265 }
2266
2267
2268 /** FIO_decompressSrcFile() :
2269 Open `srcFileName`, transfer control to decompressDstFile()
2270 @return : 0 : OK
2271 1 : error
2272 */
FIO_decompressSrcFile(FIO_prefs_t * const prefs,dRess_t ress,const char * dstFileName,const char * srcFileName)2273 static int FIO_decompressSrcFile(FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName)
2274 {
2275 FILE* srcFile;
2276 int result;
2277
2278 if (UTIL_isDirectory(srcFileName)) {
2279 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
2280 return 1;
2281 }
2282
2283 srcFile = FIO_openSrcFile(srcFileName);
2284 if (srcFile==NULL) return 1;
2285 ress.srcBufferLoaded = 0;
2286
2287 result = FIO_decompressDstFile(prefs, ress, srcFile, dstFileName, srcFileName);
2288
2289 /* Close file */
2290 if (fclose(srcFile)) {
2291 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */
2292 return 1;
2293 }
2294 if ( prefs->removeSrcFile /* --rm */
2295 && (result==0) /* decompression successful */
2296 && strcmp(srcFileName, stdinmark) ) /* not stdin */ {
2297 /* We must clear the handler, since after this point calling it would
2298 * delete both the source and destination files.
2299 */
2300 clearHandler();
2301 if (FIO_remove(srcFileName)) {
2302 /* failed to remove src file */
2303 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
2304 return 1;
2305 } }
2306 return result;
2307 }
2308
2309
2310
FIO_decompressFilename(FIO_prefs_t * const prefs,const char * dstFileName,const char * srcFileName,const char * dictFileName)2311 int FIO_decompressFilename(FIO_prefs_t* const prefs,
2312 const char* dstFileName, const char* srcFileName,
2313 const char* dictFileName)
2314 {
2315 dRess_t const ress = FIO_createDResources(prefs, dictFileName);
2316
2317 int const decodingError = FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName);
2318
2319 FIO_freeDResources(ress);
2320 return decodingError;
2321 }
2322
2323 static const char *suffixList[] = {
2324 ZSTD_EXTENSION,
2325 TZSTD_EXTENSION,
2326 #ifdef ZSTD_GZDECOMPRESS
2327 GZ_EXTENSION,
2328 TGZ_EXTENSION,
2329 #endif
2330 #ifdef ZSTD_LZMADECOMPRESS
2331 LZMA_EXTENSION,
2332 XZ_EXTENSION,
2333 TXZ_EXTENSION,
2334 #endif
2335 #ifdef ZSTD_LZ4DECOMPRESS
2336 LZ4_EXTENSION,
2337 TLZ4_EXTENSION,
2338 #endif
2339 NULL
2340 };
2341
2342 static const char *suffixListStr =
2343 ZSTD_EXTENSION "/" TZSTD_EXTENSION
2344 #ifdef ZSTD_GZDECOMPRESS
2345 "/" GZ_EXTENSION "/" TGZ_EXTENSION
2346 #endif
2347 #ifdef ZSTD_LZMADECOMPRESS
2348 "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION
2349 #endif
2350 #ifdef ZSTD_LZ4DECOMPRESS
2351 "/" LZ4_EXTENSION "/" TLZ4_EXTENSION
2352 #endif
2353 ;
2354
2355 /* FIO_determineDstName() :
2356 * create a destination filename from a srcFileName.
2357 * @return a pointer to it.
2358 * @return == NULL if there is an error */
2359 static const char*
FIO_determineDstName(const char * srcFileName,const char * outDirName)2360 FIO_determineDstName(const char* srcFileName, const char* outDirName)
2361 {
2362 static size_t dfnbCapacity = 0;
2363 static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
2364 size_t dstFileNameEndPos;
2365 char* outDirFilename = NULL;
2366 const char* dstSuffix = "";
2367 size_t dstSuffixLen = 0;
2368
2369 size_t sfnSize = strlen(srcFileName);
2370
2371 size_t srcSuffixLen;
2372 const char* const srcSuffix = strrchr(srcFileName, '.');
2373 if (srcSuffix == NULL) {
2374 DISPLAYLEVEL(1,
2375 "zstd: %s: unknown suffix (%s expected). "
2376 "Can't derive the output file name. "
2377 "Specify it with -o dstFileName. Ignoring.\n",
2378 srcFileName, suffixListStr);
2379 return NULL;
2380 }
2381 srcSuffixLen = strlen(srcSuffix);
2382
2383 {
2384 const char** matchedSuffixPtr;
2385 for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) {
2386 if (!strcmp(*matchedSuffixPtr, srcSuffix)) {
2387 break;
2388 }
2389 }
2390
2391 /* check suffix is authorized */
2392 if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) {
2393 DISPLAYLEVEL(1,
2394 "zstd: %s: unknown suffix (%s expected). "
2395 "Can't derive the output file name. "
2396 "Specify it with -o dstFileName. Ignoring.\n",
2397 srcFileName, suffixListStr);
2398 return NULL;
2399 }
2400
2401 if ((*matchedSuffixPtr)[1] == 't') {
2402 dstSuffix = ".tar";
2403 dstSuffixLen = strlen(dstSuffix);
2404 }
2405 }
2406
2407 if (outDirName) {
2408 outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
2409 sfnSize = strlen(outDirFilename);
2410 assert(outDirFilename != NULL);
2411 }
2412
2413 if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) {
2414 /* allocate enough space to write dstFilename into it */
2415 free(dstFileNameBuffer);
2416 dfnbCapacity = sfnSize + 20;
2417 dstFileNameBuffer = (char*)malloc(dfnbCapacity);
2418 if (dstFileNameBuffer==NULL)
2419 EXM_THROW(74, "%s : not enough memory for dstFileName",
2420 strerror(errno));
2421 }
2422
2423 /* return dst name == src name truncated from suffix */
2424 assert(dstFileNameBuffer != NULL);
2425 dstFileNameEndPos = sfnSize - srcSuffixLen;
2426 if (outDirFilename) {
2427 memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos);
2428 free(outDirFilename);
2429 } else {
2430 memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos);
2431 }
2432
2433 /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar"
2434 * extension on decompression. Also writes terminating null. */
2435 strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix);
2436 return dstFileNameBuffer;
2437
2438 /* note : dstFileNameBuffer memory is not going to be free */
2439 }
2440
2441
2442 int
FIO_decompressMultipleFilenames(FIO_prefs_t * const prefs,const char ** srcNamesTable,unsigned nbFiles,const char * outDirName,const char * outFileName,const char * dictFileName)2443 FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs,
2444 const char** srcNamesTable, unsigned nbFiles,
2445 const char* outDirName, const char* outFileName,
2446 const char* dictFileName)
2447 {
2448 int error = 0;
2449 dRess_t ress = FIO_createDResources(prefs, dictFileName);
2450
2451 if (outFileName) {
2452 unsigned u;
2453 if (!prefs->testMode) {
2454 ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName);
2455 if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName);
2456 }
2457 for (u=0; u<nbFiles; u++)
2458 error |= FIO_decompressSrcFile(prefs, ress, outFileName, srcNamesTable[u]);
2459 if ((!prefs->testMode) && (fclose(ress.dstFile)))
2460 EXM_THROW(72, "Write error : %s : cannot properly close output file",
2461 strerror(errno));
2462 } else {
2463 unsigned u;
2464 for (u=0; u<nbFiles; u++) { /* create dstFileName */
2465 const char* const srcFileName = srcNamesTable[u];
2466 const char* const dstFileName = FIO_determineDstName(srcFileName, outDirName);
2467 if (dstFileName == NULL) { error=1; continue; }
2468
2469 error |= FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName);
2470 }
2471 if (outDirName)
2472 FIO_checkFilenameCollisions(srcNamesTable ,nbFiles);
2473 }
2474
2475 FIO_freeDResources(ress);
2476 return error;
2477 }
2478
2479 /* **************************************************************************
2480 * .zst file info (--list command)
2481 ***************************************************************************/
2482
2483 typedef struct {
2484 U64 decompressedSize;
2485 U64 compressedSize;
2486 U64 windowSize;
2487 int numActualFrames;
2488 int numSkippableFrames;
2489 int decompUnavailable;
2490 int usesCheck;
2491 U32 nbFiles;
2492 } fileInfo_t;
2493
2494 typedef enum {
2495 info_success=0,
2496 info_frame_error=1,
2497 info_not_zstd=2,
2498 info_file_error=3,
2499 info_truncated_input=4,
2500 } InfoError;
2501
2502 #define ERROR_IF(c,n,...) { \
2503 if (c) { \
2504 DISPLAYLEVEL(1, __VA_ARGS__); \
2505 DISPLAYLEVEL(1, " \n"); \
2506 return n; \
2507 } \
2508 }
2509
2510 static InfoError
FIO_analyzeFrames(fileInfo_t * info,FILE * const srcFile)2511 FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile)
2512 {
2513 /* begin analyzing frame */
2514 for ( ; ; ) {
2515 BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
2516 size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
2517 if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) {
2518 if ( feof(srcFile)
2519 && (numBytesRead == 0)
2520 && (info->compressedSize > 0)
2521 && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
2522 unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile);
2523 unsigned long long file_size = (unsigned long long) info->compressedSize;
2524 ERROR_IF(file_position != file_size, info_truncated_input,
2525 "Error: seeked to position %llu, which is beyond file size of %llu\n",
2526 file_position,
2527 file_size);
2528 break; /* correct end of file => success */
2529 }
2530 ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame");
2531 ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames");
2532 }
2533 { U32 const magicNumber = MEM_readLE32(headerBuffer);
2534 /* Zstandard frame */
2535 if (magicNumber == ZSTD_MAGICNUMBER) {
2536 ZSTD_frameHeader header;
2537 U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
2538 if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR
2539 || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) {
2540 info->decompUnavailable = 1;
2541 } else {
2542 info->decompressedSize += frameContentSize;
2543 }
2544 ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0,
2545 info_frame_error, "Error: could not decode frame header");
2546 info->windowSize = header.windowSize;
2547 /* move to the end of the frame header */
2548 { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
2549 ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size");
2550 ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0,
2551 info_frame_error, "Error: could not move to end of frame header");
2552 }
2553
2554 /* skip all blocks in the frame */
2555 { int lastBlock = 0;
2556 do {
2557 BYTE blockHeaderBuffer[3];
2558 ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3,
2559 info_frame_error, "Error while reading block header");
2560 { U32 const blockHeader = MEM_readLE24(blockHeaderBuffer);
2561 U32 const blockTypeID = (blockHeader >> 1) & 3;
2562 U32 const isRLE = (blockTypeID == 1);
2563 U32 const isWrongBlock = (blockTypeID == 3);
2564 long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3);
2565 ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type");
2566 lastBlock = blockHeader & 1;
2567 ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0,
2568 info_frame_error, "Error: could not skip to end of block");
2569 }
2570 } while (lastBlock != 1);
2571 }
2572
2573 /* check if checksum is used */
2574 { BYTE const frameHeaderDescriptor = headerBuffer[4];
2575 int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
2576 if (contentChecksumFlag) {
2577 info->usesCheck = 1;
2578 ERROR_IF(fseek(srcFile, 4, SEEK_CUR) != 0,
2579 info_frame_error, "Error: could not skip past checksum");
2580 } }
2581 info->numActualFrames++;
2582 }
2583 /* Skippable frame */
2584 else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
2585 U32 const frameSize = MEM_readLE32(headerBuffer + 4);
2586 long const seek = (long)(8 + frameSize - numBytesRead);
2587 ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0,
2588 info_frame_error, "Error: could not find end of skippable frame");
2589 info->numSkippableFrames++;
2590 }
2591 /* unknown content */
2592 else {
2593 return info_not_zstd;
2594 }
2595 } /* magic number analysis */
2596 } /* end analyzing frames */
2597 return info_success;
2598 }
2599
2600
2601 static InfoError
getFileInfo_fileConfirmed(fileInfo_t * info,const char * inFileName)2602 getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName)
2603 {
2604 InfoError status;
2605 FILE* const srcFile = FIO_openSrcFile(inFileName);
2606 ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName);
2607
2608 info->compressedSize = UTIL_getFileSize(inFileName);
2609 status = FIO_analyzeFrames(info, srcFile);
2610
2611 fclose(srcFile);
2612 info->nbFiles = 1;
2613 return status;
2614 }
2615
2616
2617 /** getFileInfo() :
2618 * Reads information from file, stores in *info
2619 * @return : InfoError status
2620 */
2621 static InfoError
getFileInfo(fileInfo_t * info,const char * srcFileName)2622 getFileInfo(fileInfo_t* info, const char* srcFileName)
2623 {
2624 ERROR_IF(!UTIL_isRegularFile(srcFileName),
2625 info_file_error, "Error : %s is not a file", srcFileName);
2626 return getFileInfo_fileConfirmed(info, srcFileName);
2627 }
2628
2629
2630 static void
displayInfo(const char * inFileName,const fileInfo_t * info,int displayLevel)2631 displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel)
2632 {
2633 unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB);
2634 const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB";
2635 double const windowSizeUnit = (double)info->windowSize / unit;
2636 double const compressedSizeUnit = (double)info->compressedSize / unit;
2637 double const decompressedSizeUnit = (double)info->decompressedSize / unit;
2638 double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/info->compressedSize;
2639 const char* const checkString = (info->usesCheck ? "XXH64" : "None");
2640 if (displayLevel <= 2) {
2641 if (!info->decompUnavailable) {
2642 DISPLAYOUT("%6d %5d %7.2f %2s %9.2f %2s %5.3f %5s %s\n",
2643 info->numSkippableFrames + info->numActualFrames,
2644 info->numSkippableFrames,
2645 compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr,
2646 ratio, checkString, inFileName);
2647 } else {
2648 DISPLAYOUT("%6d %5d %7.2f %2s %5s %s\n",
2649 info->numSkippableFrames + info->numActualFrames,
2650 info->numSkippableFrames,
2651 compressedSizeUnit, unitStr,
2652 checkString, inFileName);
2653 }
2654 } else {
2655 DISPLAYOUT("%s \n", inFileName);
2656 DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
2657 if (info->numSkippableFrames)
2658 DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
2659 DISPLAYOUT("Window Size: %.2f %2s (%llu B)\n",
2660 windowSizeUnit, unitStr,
2661 (unsigned long long)info->windowSize);
2662 DISPLAYOUT("Compressed Size: %.2f %2s (%llu B)\n",
2663 compressedSizeUnit, unitStr,
2664 (unsigned long long)info->compressedSize);
2665 if (!info->decompUnavailable) {
2666 DISPLAYOUT("Decompressed Size: %.2f %2s (%llu B)\n",
2667 decompressedSizeUnit, unitStr,
2668 (unsigned long long)info->decompressedSize);
2669 DISPLAYOUT("Ratio: %.4f\n", ratio);
2670 }
2671 DISPLAYOUT("Check: %s\n", checkString);
2672 DISPLAYOUT("\n");
2673 }
2674 }
2675
FIO_addFInfo(fileInfo_t fi1,fileInfo_t fi2)2676 static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2)
2677 {
2678 fileInfo_t total;
2679 memset(&total, 0, sizeof(total));
2680 total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames;
2681 total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames;
2682 total.compressedSize = fi1.compressedSize + fi2.compressedSize;
2683 total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize;
2684 total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable;
2685 total.usesCheck = fi1.usesCheck & fi2.usesCheck;
2686 total.nbFiles = fi1.nbFiles + fi2.nbFiles;
2687 return total;
2688 }
2689
2690 static int
FIO_listFile(fileInfo_t * total,const char * inFileName,int displayLevel)2691 FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel)
2692 {
2693 fileInfo_t info;
2694 memset(&info, 0, sizeof(info));
2695 { InfoError const error = getFileInfo(&info, inFileName);
2696 switch (error) {
2697 case info_frame_error:
2698 /* display error, but provide output */
2699 DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName);
2700 break;
2701 case info_not_zstd:
2702 DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName);
2703 if (displayLevel > 2) DISPLAYOUT("\n");
2704 return 1;
2705 case info_file_error:
2706 /* error occurred while opening the file */
2707 if (displayLevel > 2) DISPLAYOUT("\n");
2708 return 1;
2709 case info_truncated_input:
2710 DISPLAYOUT("File \"%s\" is truncated \n", inFileName);
2711 if (displayLevel > 2) DISPLAYOUT("\n");
2712 return 1;
2713 case info_success:
2714 default:
2715 break;
2716 }
2717
2718 displayInfo(inFileName, &info, displayLevel);
2719 *total = FIO_addFInfo(*total, info);
2720 assert(error == info_success || error == info_frame_error);
2721 return error;
2722 }
2723 }
2724
FIO_listMultipleFiles(unsigned numFiles,const char ** filenameTable,int displayLevel)2725 int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel)
2726 {
2727 /* ensure no specified input is stdin (needs fseek() capability) */
2728 { unsigned u;
2729 for (u=0; u<numFiles;u++) {
2730 ERROR_IF(!strcmp (filenameTable[u], stdinmark),
2731 1, "zstd: --list does not support reading from standard input");
2732 } }
2733
2734 if (numFiles == 0) {
2735 if (!IS_CONSOLE(stdin)) {
2736 DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n");
2737 }
2738 DISPLAYLEVEL(1, "No files given \n");
2739 return 1;
2740 }
2741
2742 if (displayLevel <= 2) {
2743 DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n");
2744 }
2745 { int error = 0;
2746 fileInfo_t total;
2747 memset(&total, 0, sizeof(total));
2748 total.usesCheck = 1;
2749 /* --list each file, and check for any error */
2750 { unsigned u;
2751 for (u=0; u<numFiles;u++) {
2752 error |= FIO_listFile(&total, filenameTable[u], displayLevel);
2753 } }
2754 if (numFiles > 1 && displayLevel <= 2) { /* display total */
2755 unsigned const unit = total.compressedSize < (1 MB) ? (1 KB) : (1 MB);
2756 const char* const unitStr = total.compressedSize < (1 MB) ? "KB" : "MB";
2757 double const compressedSizeUnit = (double)total.compressedSize / unit;
2758 double const decompressedSizeUnit = (double)total.decompressedSize / unit;
2759 double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/total.compressedSize;
2760 const char* const checkString = (total.usesCheck ? "XXH64" : "");
2761 DISPLAYOUT("----------------------------------------------------------------- \n");
2762 if (total.decompUnavailable) {
2763 DISPLAYOUT("%6d %5d %7.2f %2s %5s %u files\n",
2764 total.numSkippableFrames + total.numActualFrames,
2765 total.numSkippableFrames,
2766 compressedSizeUnit, unitStr,
2767 checkString, (unsigned)total.nbFiles);
2768 } else {
2769 DISPLAYOUT("%6d %5d %7.2f %2s %9.2f %2s %5.3f %5s %u files\n",
2770 total.numSkippableFrames + total.numActualFrames,
2771 total.numSkippableFrames,
2772 compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr,
2773 ratio, checkString, (unsigned)total.nbFiles);
2774 } }
2775 return error;
2776 }
2777 }
2778
2779
2780 #endif /* #ifndef ZSTD_NODECOMPRESS */
2781