1 /*
2  * Copyright (c) Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 
12 /* *************************************
13 *  Compiler Options
14 ***************************************/
15 #ifdef _MSC_VER   /* Visual */
16 #  pragma warning(disable : 4127)  /* disable: C4127: conditional expression is constant */
17 #  pragma warning(disable : 4204)  /* non-constant aggregate initializer */
18 #endif
19 #if defined(__MINGW32__) && !defined(_POSIX_SOURCE)
20 #  define _POSIX_SOURCE 1          /* disable %llu warnings with MinGW on Windows */
21 #endif
22 
23 /*-*************************************
24 *  Includes
25 ***************************************/
26 #include "platform.h"   /* Large Files support, SET_BINARY_MODE */
27 #include "util.h"       /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */
28 #include <stdio.h>      /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */
29 #include <stdlib.h>     /* malloc, free */
30 #include <string.h>     /* strcmp, strlen */
31 #include <fcntl.h>      /* O_WRONLY */
32 #include <assert.h>
33 #include <errno.h>      /* errno */
34 #include <limits.h>     /* INT_MAX */
35 #include <signal.h>
36 #include "timefn.h"     /* UTIL_getTime, UTIL_clockSpanMicro */
37 
38 #if defined (_MSC_VER)
39 #  include <sys/stat.h>
40 #  include <io.h>
41 #endif
42 
43 #include "../lib/common/mem.h"     /* U32, U64 */
44 #include "fileio.h"
45 
46 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
47 #include "../lib/zstd.h"
48 #include "../lib/zstd_errors.h"  /* ZSTD_error_frameParameter_windowTooLarge */
49 
50 #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
51 #  include <zlib.h>
52 #  if !defined(z_const)
53 #    define z_const
54 #  endif
55 #endif
56 
57 #if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
58 #  include <lzma.h>
59 #endif
60 
61 #define LZ4_MAGICNUMBER 0x184D2204
62 #if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
63 #  define LZ4F_ENABLE_OBSOLETE_ENUMS
64 #  include <lz4frame.h>
65 #  include <lz4.h>
66 #endif
67 
68 
69 /*-*************************************
70 *  Constants
71 ***************************************/
72 #define ADAPT_WINDOWLOG_DEFAULT 23   /* 8 MB */
73 #define DICTSIZE_MAX (32 MB)   /* protection against large input (attack scenario) */
74 
75 #define FNSPACE 30
76 
77 /* Default file permissions 0666 (modulated by umask) */
78 #if !defined(_WIN32)
79 /* These macros aren't defined on windows. */
80 #define DEFAULT_FILE_PERMISSIONS (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
81 #else
82 #define DEFAULT_FILE_PERMISSIONS (0666)
83 #endif
84 
85 /*-*************************************
86 *  Macros
87 ***************************************/
88 #define KB *(1 <<10)
89 #define MB *(1 <<20)
90 #define GB *(1U<<30)
91 #undef MAX
92 #define MAX(a,b) ((a)>(b) ? (a) : (b))
93 
94 struct FIO_display_prefs_s {
95     int displayLevel;   /* 0 : no display;  1: errors;  2: + result + interaction + warnings;  3: + progression;  4: + information */
96     FIO_progressSetting_e progressSetting;
97 };
98 
99 static FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto};
100 
101 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
102 #define DISPLAYOUT(...)      fprintf(stdout, __VA_ARGS__)
103 #define DISPLAYLEVEL(l, ...) { if (g_display_prefs.displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
104 
105 static const U64 g_refreshRate = SEC_TO_MICRO / 6;
106 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
107 
108 #define READY_FOR_UPDATE() ((g_display_prefs.progressSetting != FIO_ps_never) && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate)
109 #define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); }
110 #define DISPLAYUPDATE(l, ...) {                              \
111         if (g_display_prefs.displayLevel>=l && (g_display_prefs.progressSetting != FIO_ps_never)) { \
112             if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \
113                 DELAY_NEXT_UPDATE();                         \
114                 DISPLAY(__VA_ARGS__);                        \
115                 if (g_display_prefs.displayLevel>=4) fflush(stderr);       \
116     }   }   }
117 
118 #undef MIN  /* in case it would be already defined */
119 #define MIN(a,b)    ((a) < (b) ? (a) : (b))
120 
121 
122 #define EXM_THROW(error, ...)                                             \
123 {                                                                         \
124     DISPLAYLEVEL(1, "zstd: ");                                            \
125     DISPLAYLEVEL(5, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \
126     DISPLAYLEVEL(1, "error %i : ", error);                                \
127     DISPLAYLEVEL(1, __VA_ARGS__);                                         \
128     DISPLAYLEVEL(1, " \n");                                               \
129     exit(error);                                                          \
130 }
131 
132 #define CHECK_V(v, f)                                \
133     v = f;                                           \
134     if (ZSTD_isError(v)) {                           \
135         DISPLAYLEVEL(5, "%s \n", #f);                \
136         EXM_THROW(11, "%s", ZSTD_getErrorName(v));   \
137     }
138 #define CHECK(f) { size_t err; CHECK_V(err, f); }
139 
140 
141 /*-************************************
142 *  Signal (Ctrl-C trapping)
143 **************************************/
144 static const char* g_artefact = NULL;
INThandler(int sig)145 static void INThandler(int sig)
146 {
147     assert(sig==SIGINT); (void)sig;
148 #if !defined(_MSC_VER)
149     signal(sig, SIG_IGN);  /* this invocation generates a buggy warning in Visual Studio */
150 #endif
151     if (g_artefact) {
152         assert(UTIL_isRegularFile(g_artefact));
153         remove(g_artefact);
154     }
155     DISPLAY("\n");
156     exit(2);
157 }
addHandler(char const * dstFileName)158 static void addHandler(char const* dstFileName)
159 {
160     if (UTIL_isRegularFile(dstFileName)) {
161         g_artefact = dstFileName;
162         signal(SIGINT, INThandler);
163     } else {
164         g_artefact = NULL;
165     }
166 }
167 /* Idempotent */
clearHandler(void)168 static void clearHandler(void)
169 {
170     if (g_artefact) signal(SIGINT, SIG_DFL);
171     g_artefact = NULL;
172 }
173 
174 
175 /*-*********************************************************
176 *  Termination signal trapping (Print debug stack trace)
177 ***********************************************************/
178 #if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */
179 #  if (__has_feature(address_sanitizer))
180 #    define BACKTRACE_ENABLE 0
181 #  endif /* __has_feature(address_sanitizer) */
182 #elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */
183 #  define BACKTRACE_ENABLE 0
184 #endif
185 
186 #if !defined(BACKTRACE_ENABLE)
187 /* automatic detector : backtrace enabled by default on linux+glibc and osx */
188 #  if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \
189      || (defined(__APPLE__) && defined(__MACH__))
190 #    define BACKTRACE_ENABLE 1
191 #  else
192 #    define BACKTRACE_ENABLE 0
193 #  endif
194 #endif
195 
196 /* note : after this point, BACKTRACE_ENABLE is necessarily defined */
197 
198 
199 #if BACKTRACE_ENABLE
200 
201 #include <execinfo.h>   /* backtrace, backtrace_symbols */
202 
203 #define MAX_STACK_FRAMES    50
204 
ABRThandler(int sig)205 static void ABRThandler(int sig) {
206     const char* name;
207     void* addrlist[MAX_STACK_FRAMES];
208     char** symbollist;
209     int addrlen, i;
210 
211     switch (sig) {
212         case SIGABRT: name = "SIGABRT"; break;
213         case SIGFPE: name = "SIGFPE"; break;
214         case SIGILL: name = "SIGILL"; break;
215         case SIGINT: name = "SIGINT"; break;
216         case SIGSEGV: name = "SIGSEGV"; break;
217         default: name = "UNKNOWN";
218     }
219 
220     DISPLAY("Caught %s signal, printing stack:\n", name);
221     /* Retrieve current stack addresses. */
222     addrlen = backtrace(addrlist, MAX_STACK_FRAMES);
223     if (addrlen == 0) {
224         DISPLAY("\n");
225         return;
226     }
227     /* Create readable strings to each frame. */
228     symbollist = backtrace_symbols(addrlist, addrlen);
229     /* Print the stack trace, excluding calls handling the signal. */
230     for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) {
231         DISPLAY("%s\n", symbollist[i]);
232     }
233     free(symbollist);
234     /* Reset and raise the signal so default handler runs. */
235     signal(sig, SIG_DFL);
236     raise(sig);
237 }
238 #endif
239 
FIO_addAbortHandler()240 void FIO_addAbortHandler()
241 {
242 #if BACKTRACE_ENABLE
243     signal(SIGABRT, ABRThandler);
244     signal(SIGFPE, ABRThandler);
245     signal(SIGILL, ABRThandler);
246     signal(SIGSEGV, ABRThandler);
247     signal(SIGBUS, ABRThandler);
248 #endif
249 }
250 
251 
252 /*-************************************************************
253 * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
254 ***************************************************************/
255 #if defined(_MSC_VER) && _MSC_VER >= 1400
256 #   define LONG_SEEK _fseeki64
257 #   define LONG_TELL _ftelli64
258 #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
259 #  define LONG_SEEK fseeko
260 #  define LONG_TELL ftello
261 #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
262 #   define LONG_SEEK fseeko64
263 #   define LONG_TELL ftello64
264 #elif defined(_WIN32) && !defined(__DJGPP__)
265 #   include <windows.h>
LONG_SEEK(FILE * file,__int64 offset,int origin)266     static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
267         LARGE_INTEGER off;
268         DWORD method;
269         off.QuadPart = offset;
270         if (origin == SEEK_END)
271             method = FILE_END;
272         else if (origin == SEEK_CUR)
273             method = FILE_CURRENT;
274         else
275             method = FILE_BEGIN;
276 
277         if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
278             return 0;
279         else
280             return -1;
281     }
LONG_TELL(FILE * file)282     static __int64 LONG_TELL(FILE* file) {
283         LARGE_INTEGER off, newOff;
284         off.QuadPart = 0;
285         newOff.QuadPart = 0;
286         SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, &newOff, FILE_CURRENT);
287         return newOff.QuadPart;
288     }
289 #else
290 #   define LONG_SEEK fseek
291 #   define LONG_TELL ftell
292 #endif
293 
294 
295 /*-*************************************
296 *  Parameters: FIO_prefs_t
297 ***************************************/
298 
299 /* typedef'd to FIO_prefs_t within fileio.h */
300 struct FIO_prefs_s {
301 
302     /* Algorithm preferences */
303     FIO_compressionType_t compressionType;
304     U32 sparseFileSupport;   /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */
305     int dictIDFlag;
306     int checksumFlag;
307     int blockSize;
308     int overlapLog;
309     U32 adaptiveMode;
310     U32 useRowMatchFinder;
311     int rsyncable;
312     int minAdaptLevel;
313     int maxAdaptLevel;
314     int ldmFlag;
315     int ldmHashLog;
316     int ldmMinMatch;
317     int ldmBucketSizeLog;
318     int ldmHashRateLog;
319     size_t streamSrcSize;
320     size_t targetCBlockSize;
321     int srcSizeHint;
322     int testMode;
323     ZSTD_literalCompressionMode_e literalCompressionMode;
324 
325     /* IO preferences */
326     U32 removeSrcFile;
327     U32 overwrite;
328 
329     /* Computation resources preferences */
330     unsigned memLimit;
331     int nbWorkers;
332 
333     int excludeCompressedFiles;
334     int patchFromMode;
335     int contentSize;
336     int allowBlockDevices;
337 };
338 
339 /*-*************************************
340 *  Parameters: FIO_ctx_t
341 ***************************************/
342 
343 /* typedef'd to FIO_ctx_t within fileio.h */
344 struct FIO_ctx_s {
345 
346     /* file i/o info */
347     int nbFilesTotal;
348     int hasStdinInput;
349     int hasStdoutOutput;
350 
351     /* file i/o state */
352     int currFileIdx;
353     int nbFilesProcessed;
354     size_t totalBytesInput;
355     size_t totalBytesOutput;
356 };
357 
358 
359 /*-*************************************
360 *  Parameters: Initialization
361 ***************************************/
362 
363 #define FIO_OVERLAP_LOG_NOTSET 9999
364 #define FIO_LDM_PARAM_NOTSET 9999
365 
366 
FIO_createPreferences(void)367 FIO_prefs_t* FIO_createPreferences(void)
368 {
369     FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t));
370     if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
371 
372     ret->compressionType = FIO_zstdCompression;
373     ret->overwrite = 0;
374     ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
375     ret->dictIDFlag = 1;
376     ret->checksumFlag = 1;
377     ret->removeSrcFile = 0;
378     ret->memLimit = 0;
379     ret->nbWorkers = 1;
380     ret->blockSize = 0;
381     ret->overlapLog = FIO_OVERLAP_LOG_NOTSET;
382     ret->adaptiveMode = 0;
383     ret->rsyncable = 0;
384     ret->minAdaptLevel = -50;   /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */
385     ret->maxAdaptLevel = 22;   /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */
386     ret->ldmFlag = 0;
387     ret->ldmHashLog = 0;
388     ret->ldmMinMatch = 0;
389     ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
390     ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
391     ret->streamSrcSize = 0;
392     ret->targetCBlockSize = 0;
393     ret->srcSizeHint = 0;
394     ret->testMode = 0;
395     ret->literalCompressionMode = ZSTD_lcm_auto;
396     ret->excludeCompressedFiles = 0;
397     ret->allowBlockDevices = 0;
398     return ret;
399 }
400 
FIO_createContext(void)401 FIO_ctx_t* FIO_createContext(void)
402 {
403     FIO_ctx_t* const ret = (FIO_ctx_t*)malloc(sizeof(FIO_ctx_t));
404     if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
405 
406     ret->currFileIdx = 0;
407     ret->hasStdinInput = 0;
408     ret->hasStdoutOutput = 0;
409     ret->nbFilesTotal = 1;
410     ret->nbFilesProcessed = 0;
411     ret->totalBytesInput = 0;
412     ret->totalBytesOutput = 0;
413     return ret;
414 }
415 
FIO_freePreferences(FIO_prefs_t * const prefs)416 void FIO_freePreferences(FIO_prefs_t* const prefs)
417 {
418     free(prefs);
419 }
420 
FIO_freeContext(FIO_ctx_t * const fCtx)421 void FIO_freeContext(FIO_ctx_t* const fCtx)
422 {
423     free(fCtx);
424 }
425 
426 
427 /*-*************************************
428 *  Parameters: Display Options
429 ***************************************/
430 
FIO_setNotificationLevel(int level)431 void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; }
432 
FIO_setProgressSetting(FIO_progressSetting_e setting)433 void FIO_setProgressSetting(FIO_progressSetting_e setting) { g_display_prefs.progressSetting = setting; }
434 
435 
436 /*-*************************************
437 *  Parameters: Setters
438 ***************************************/
439 
440 /* FIO_prefs_t functions */
441 
FIO_setCompressionType(FIO_prefs_t * const prefs,FIO_compressionType_t compressionType)442 void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; }
443 
FIO_overwriteMode(FIO_prefs_t * const prefs)444 void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; }
445 
FIO_setSparseWrite(FIO_prefs_t * const prefs,unsigned sparse)446 void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse) { prefs->sparseFileSupport = sparse; }
447 
FIO_setDictIDFlag(FIO_prefs_t * const prefs,int dictIDFlag)448 void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; }
449 
FIO_setChecksumFlag(FIO_prefs_t * const prefs,int checksumFlag)450 void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; }
451 
FIO_setRemoveSrcFile(FIO_prefs_t * const prefs,unsigned flag)452 void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag) { prefs->removeSrcFile = (flag>0); }
453 
FIO_setMemLimit(FIO_prefs_t * const prefs,unsigned memLimit)454 void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; }
455 
FIO_setNbWorkers(FIO_prefs_t * const prefs,int nbWorkers)456 void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) {
457 #ifndef ZSTD_MULTITHREAD
458     if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n");
459 #endif
460     prefs->nbWorkers = nbWorkers;
461 }
462 
FIO_setExcludeCompressedFile(FIO_prefs_t * const prefs,int excludeCompressedFiles)463 void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
464 
FIO_setAllowBlockDevices(FIO_prefs_t * const prefs,int allowBlockDevices)465 void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices) { prefs->allowBlockDevices = allowBlockDevices; }
466 
FIO_setBlockSize(FIO_prefs_t * const prefs,int blockSize)467 void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
468     if (blockSize && prefs->nbWorkers==0)
469         DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
470     prefs->blockSize = blockSize;
471 }
472 
FIO_setOverlapLog(FIO_prefs_t * const prefs,int overlapLog)473 void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){
474     if (overlapLog && prefs->nbWorkers==0)
475         DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
476     prefs->overlapLog = overlapLog;
477 }
478 
FIO_setAdaptiveMode(FIO_prefs_t * const prefs,unsigned adapt)479 void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt) {
480     if ((adapt>0) && (prefs->nbWorkers==0))
481         EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n");
482     prefs->adaptiveMode = adapt;
483 }
484 
FIO_setUseRowMatchFinder(FIO_prefs_t * const prefs,int useRowMatchFinder)485 void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) {
486     prefs->useRowMatchFinder = useRowMatchFinder;
487 }
488 
FIO_setRsyncable(FIO_prefs_t * const prefs,int rsyncable)489 void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
490     if ((rsyncable>0) && (prefs->nbWorkers==0))
491         EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n");
492     prefs->rsyncable = rsyncable;
493 }
494 
FIO_setStreamSrcSize(FIO_prefs_t * const prefs,size_t streamSrcSize)495 void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
496     prefs->streamSrcSize = streamSrcSize;
497 }
498 
FIO_setTargetCBlockSize(FIO_prefs_t * const prefs,size_t targetCBlockSize)499 void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
500     prefs->targetCBlockSize = targetCBlockSize;
501 }
502 
FIO_setSrcSizeHint(FIO_prefs_t * const prefs,size_t srcSizeHint)503 void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
504     prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint);
505 }
506 
FIO_setTestMode(FIO_prefs_t * const prefs,int testMode)507 void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) {
508     prefs->testMode = (testMode!=0);
509 }
510 
FIO_setLiteralCompressionMode(FIO_prefs_t * const prefs,ZSTD_literalCompressionMode_e mode)511 void FIO_setLiteralCompressionMode(
512         FIO_prefs_t* const prefs,
513         ZSTD_literalCompressionMode_e mode) {
514     prefs->literalCompressionMode = mode;
515 }
516 
FIO_setAdaptMin(FIO_prefs_t * const prefs,int minCLevel)517 void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel)
518 {
519 #ifndef ZSTD_NOCOMPRESS
520     assert(minCLevel >= ZSTD_minCLevel());
521 #endif
522     prefs->minAdaptLevel = minCLevel;
523 }
524 
FIO_setAdaptMax(FIO_prefs_t * const prefs,int maxCLevel)525 void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel)
526 {
527     prefs->maxAdaptLevel = maxCLevel;
528 }
529 
FIO_setLdmFlag(FIO_prefs_t * const prefs,unsigned ldmFlag)530 void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) {
531     prefs->ldmFlag = (ldmFlag>0);
532 }
533 
FIO_setLdmHashLog(FIO_prefs_t * const prefs,int ldmHashLog)534 void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) {
535     prefs->ldmHashLog = ldmHashLog;
536 }
537 
FIO_setLdmMinMatch(FIO_prefs_t * const prefs,int ldmMinMatch)538 void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) {
539     prefs->ldmMinMatch = ldmMinMatch;
540 }
541 
FIO_setLdmBucketSizeLog(FIO_prefs_t * const prefs,int ldmBucketSizeLog)542 void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) {
543     prefs->ldmBucketSizeLog = ldmBucketSizeLog;
544 }
545 
546 
FIO_setLdmHashRateLog(FIO_prefs_t * const prefs,int ldmHashRateLog)547 void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) {
548     prefs->ldmHashRateLog = ldmHashRateLog;
549 }
550 
FIO_setPatchFromMode(FIO_prefs_t * const prefs,int value)551 void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value)
552 {
553     prefs->patchFromMode = value != 0;
554 }
555 
FIO_setContentSize(FIO_prefs_t * const prefs,int value)556 void FIO_setContentSize(FIO_prefs_t* const prefs, int value)
557 {
558     prefs->contentSize = value != 0;
559 }
560 
561 /* FIO_ctx_t functions */
562 
FIO_setHasStdoutOutput(FIO_ctx_t * const fCtx,int value)563 void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) {
564     fCtx->hasStdoutOutput = value;
565 }
566 
FIO_setNbFilesTotal(FIO_ctx_t * const fCtx,int value)567 void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value)
568 {
569     fCtx->nbFilesTotal = value;
570 }
571 
FIO_determineHasStdinInput(FIO_ctx_t * const fCtx,const FileNamesTable * const filenames)572 void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames) {
573     size_t i = 0;
574     for ( ; i < filenames->tableSize; ++i) {
575         if (!strcmp(stdinmark, filenames->fileNames[i])) {
576             fCtx->hasStdinInput = 1;
577             return;
578         }
579     }
580 }
581 
582 /*-*************************************
583 *  Functions
584 ***************************************/
585 /** FIO_removeFile() :
586  * @result : Unlink `fileName`, even if it's read-only */
FIO_removeFile(const char * path)587 static int FIO_removeFile(const char* path)
588 {
589     stat_t statbuf;
590     if (!UTIL_stat(path, &statbuf)) {
591         DISPLAYLEVEL(2, "zstd: Failed to stat %s while trying to remove it\n", path);
592         return 0;
593     }
594     if (!UTIL_isRegularFileStat(&statbuf)) {
595         DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path);
596         return 0;
597     }
598 #if defined(_WIN32) || defined(WIN32)
599     /* windows doesn't allow remove read-only files,
600      * so try to make it writable first */
601     if (!(statbuf.st_mode & _S_IWRITE)) {
602         UTIL_chmod(path, &statbuf, _S_IWRITE);
603     }
604 #endif
605     return remove(path);
606 }
607 
608 /** FIO_openSrcFile() :
609  *  condition : `srcFileName` must be non-NULL. `prefs` may be NULL.
610  * @result : FILE* to `srcFileName`, or NULL if it fails */
FIO_openSrcFile(const FIO_prefs_t * const prefs,const char * srcFileName)611 static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName)
612 {
613     stat_t statbuf;
614     int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0;
615     assert(srcFileName != NULL);
616     if (!strcmp (srcFileName, stdinmark)) {
617         DISPLAYLEVEL(4,"Using stdin for input \n");
618         SET_BINARY_MODE(stdin);
619         return stdin;
620     }
621 
622     if (!UTIL_stat(srcFileName, &statbuf)) {
623         DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n",
624                         srcFileName, strerror(errno));
625         return NULL;
626     }
627 
628     if (!UTIL_isRegularFileStat(&statbuf)
629      && !UTIL_isFIFOStat(&statbuf)
630      && !(allowBlockDevices && UTIL_isBlockDevStat(&statbuf))
631     ) {
632         DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
633                         srcFileName);
634         return NULL;
635     }
636 
637     {   FILE* const f = fopen(srcFileName, "rb");
638         if (f == NULL)
639             DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
640         return f;
641     }
642 }
643 
644 /** FIO_openDstFile() :
645  *  condition : `dstFileName` must be non-NULL.
646  * @result : FILE* to `dstFileName`, or NULL if it fails */
647 static FILE*
FIO_openDstFile(FIO_ctx_t * fCtx,FIO_prefs_t * const prefs,const char * srcFileName,const char * dstFileName,const int mode)648 FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
649                 const char* srcFileName, const char* dstFileName,
650                 const int mode)
651 {
652     if (prefs->testMode) return NULL;  /* do not open file in test mode */
653 
654     assert(dstFileName != NULL);
655     if (!strcmp (dstFileName, stdoutmark)) {
656         DISPLAYLEVEL(4,"Using stdout for output \n");
657         SET_BINARY_MODE(stdout);
658         if (prefs->sparseFileSupport == 1) {
659             prefs->sparseFileSupport = 0;
660             DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
661         }
662         return stdout;
663     }
664 
665     /* ensure dst is not the same as src */
666     if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) {
667         DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n");
668         return NULL;
669     }
670 
671     if (prefs->sparseFileSupport == 1) {
672         prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
673     }
674 
675     if (UTIL_isRegularFile(dstFileName)) {
676         /* Check if destination file already exists */
677 #if !defined(_WIN32)
678         /* this test does not work on Windows :
679          * `NUL` and `nul` are detected as regular files */
680         if (!strcmp(dstFileName, nulmark)) {
681             EXM_THROW(40, "%s is unexpectedly categorized as a regular file",
682                         dstFileName);
683         }
684 #endif
685         if (!prefs->overwrite) {
686             if (g_display_prefs.displayLevel <= 1) {
687                 /* No interaction possible */
688                 DISPLAY("zstd: %s already exists; not overwritten  \n",
689                         dstFileName);
690                 return NULL;
691             }
692             DISPLAY("zstd: %s already exists; ", dstFileName);
693             if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten  \n", "yY", fCtx->hasStdinInput))
694                 return NULL;
695         }
696         /* need to unlink */
697         FIO_removeFile(dstFileName);
698     }
699 
700     {
701 #if defined(_WIN32)
702         /* Windows requires opening the file as a "binary" file to avoid
703          * mangling. This macro doesn't exist on unix. */
704         const int openflags = O_WRONLY|O_CREAT|O_TRUNC|O_BINARY;
705         const int fd = _open(dstFileName, openflags, mode);
706         FILE* f = NULL;
707         if (fd != -1) {
708             f = _fdopen(fd, "wb");
709         }
710 #else
711         const int openflags = O_WRONLY|O_CREAT|O_TRUNC;
712         const int fd = open(dstFileName, openflags, mode);
713         FILE* f = NULL;
714         if (fd != -1) {
715             f = fdopen(fd, "wb");
716         }
717 #endif
718         if (f == NULL) {
719             DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
720         }
721         return f;
722     }
723 }
724 
725 /*! FIO_createDictBuffer() :
726  *  creates a buffer, pointed by `*bufferPtr`,
727  *  loads `filename` content into it, up to DICTSIZE_MAX bytes.
728  * @return : loaded size
729  *  if fileName==NULL, returns 0 and a NULL pointer
730  */
FIO_createDictBuffer(void ** bufferPtr,const char * fileName,FIO_prefs_t * const prefs)731 static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_prefs_t* const prefs)
732 {
733     FILE* fileHandle;
734     U64 fileSize;
735 
736     assert(bufferPtr != NULL);
737     *bufferPtr = NULL;
738     if (fileName == NULL) return 0;
739 
740     DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
741     fileHandle = fopen(fileName, "rb");
742     if (fileHandle==NULL) EXM_THROW(31, "%s: %s", fileName, strerror(errno));
743 
744     fileSize = UTIL_getFileSize(fileName);
745     {
746         size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
747         if (fileSize >  dictSizeMax) {
748             EXM_THROW(32, "Dictionary file %s is too large (> %u bytes)",
749                             fileName,  (unsigned)dictSizeMax);   /* avoid extreme cases */
750         }
751     }
752     *bufferPtr = malloc((size_t)fileSize);
753     if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
754     {   size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
755         if (readSize != fileSize)
756             EXM_THROW(35, "Error reading dictionary file %s : %s",
757                     fileName, strerror(errno));
758     }
759     fclose(fileHandle);
760     return (size_t)fileSize;
761 }
762 
763 
764 
765 /* FIO_checkFilenameCollisions() :
766  * Checks for and warns if there are any files that would have the same output path
767  */
FIO_checkFilenameCollisions(const char ** filenameTable,unsigned nbFiles)768 int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) {
769     const char **filenameTableSorted, *prevElem, *filename;
770     unsigned u;
771 
772     filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles);
773     if (!filenameTableSorted) {
774         DISPLAY("Unable to malloc new str array, not checking for name collisions\n");
775         return 1;
776     }
777 
778     for (u = 0; u < nbFiles; ++u) {
779         filename = strrchr(filenameTable[u], PATH_SEP);
780         if (filename == NULL) {
781             filenameTableSorted[u] = filenameTable[u];
782         } else {
783             filenameTableSorted[u] = filename+1;
784         }
785     }
786 
787     qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr);
788     prevElem = filenameTableSorted[0];
789     for (u = 1; u < nbFiles; ++u) {
790         if (strcmp(prevElem, filenameTableSorted[u]) == 0) {
791             DISPLAY("WARNING: Two files have same filename: %s\n", prevElem);
792         }
793         prevElem = filenameTableSorted[u];
794     }
795 
796     free((void*)filenameTableSorted);
797     return 0;
798 }
799 
800 static const char*
extractFilename(const char * path,char separator)801 extractFilename(const char* path, char separator)
802 {
803     const char* search = strrchr(path, separator);
804     if (search == NULL) return path;
805     return search+1;
806 }
807 
808 /* FIO_createFilename_fromOutDir() :
809  * Takes a source file name and specified output directory, and
810  * allocates memory for and returns a pointer to final path.
811  * This function never returns an error (it may abort() in case of pb)
812  */
813 static char*
FIO_createFilename_fromOutDir(const char * path,const char * outDirName,const size_t suffixLen)814 FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen)
815 {
816     const char* filenameStart;
817     char separator;
818     char* result;
819 
820 #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
821     separator = '\\';
822 #else
823     separator = '/';
824 #endif
825 
826     filenameStart = extractFilename(path, separator);
827 #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
828     filenameStart = extractFilename(filenameStart, '/');  /* sometimes, '/' separator is also used on Windows (mingw+msys2) */
829 #endif
830 
831     result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1);
832     if (!result) {
833         EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno));
834     }
835 
836     memcpy(result, outDirName, strlen(outDirName));
837     if (outDirName[strlen(outDirName)-1] == separator) {
838         memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart));
839     } else {
840         memcpy(result + strlen(outDirName), &separator, 1);
841         memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart));
842     }
843 
844     return result;
845 }
846 
847 /* FIO_highbit64() :
848  * gives position of highest bit.
849  * note : only works for v > 0 !
850  */
FIO_highbit64(unsigned long long v)851 static unsigned FIO_highbit64(unsigned long long v)
852 {
853     unsigned count = 0;
854     assert(v != 0);
855     v >>= 1;
856     while (v) { v >>= 1; count++; }
857     return count;
858 }
859 
FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t * const prefs,unsigned long long const dictSize,unsigned long long const maxSrcFileSize)860 static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs,
861                                     unsigned long long const dictSize,
862                                     unsigned long long const maxSrcFileSize)
863 {
864     unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize));
865     unsigned const maxWindowSize = (1U << ZSTD_WINDOWLOG_MAX);
866     if (maxSize == UTIL_FILESIZE_UNKNOWN)
867         EXM_THROW(42, "Using --patch-from with stdin requires --stream-size");
868     assert(maxSize != UTIL_FILESIZE_UNKNOWN);
869     if (maxSize > maxWindowSize)
870         EXM_THROW(42, "Can't handle files larger than %u GB\n", maxWindowSize/(1 GB));
871     FIO_setMemLimit(prefs, (unsigned)maxSize);
872 }
873 
874 /* FIO_removeMultiFilesWarning() :
875  * Returns 1 if the console should abort, 0 if console should proceed.
876  * This function handles logic when processing multiple files with -o, displaying the appropriate warnings/prompts.
877  *
878  * If -f is specified, or there is just 1 file, zstd will always proceed as usual.
879  * If --rm is specified, there will be a prompt asking for user confirmation.
880  *         If -f is specified with --rm, zstd will proceed as usual
881  *         If -q is specified with --rm, zstd will abort pre-emptively
882  *         If neither flag is specified, zstd will prompt the user for confirmation to proceed.
883  * If --rm is not specified, then zstd will print a warning to the user (which can be silenced with -q).
884  * However, if the output is stdout, we will always abort rather than displaying the warning prompt.
885  */
FIO_removeMultiFilesWarning(FIO_ctx_t * const fCtx,const FIO_prefs_t * const prefs,const char * outFileName,int displayLevelCutoff)886 static int FIO_removeMultiFilesWarning(FIO_ctx_t* const fCtx, const FIO_prefs_t* const prefs, const char* outFileName, int displayLevelCutoff)
887 {
888     int error = 0;
889     if (fCtx->nbFilesTotal > 1 && !prefs->overwrite) {
890         if (g_display_prefs.displayLevel <= displayLevelCutoff) {
891             if (prefs->removeSrcFile) {
892                 DISPLAYLEVEL(1, "zstd: Aborting... not deleting files and processing into dst: %s", outFileName);
893                 error =  1;
894             }
895         } else {
896             if (!strcmp(outFileName, stdoutmark)) {
897                 DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. ");
898             } else {
899                 DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s ", outFileName);
900             }
901             DISPLAYLEVEL(2, "\nThe concatenated output CANNOT regenerate the original directory tree. ")
902             if (prefs->removeSrcFile) {
903                 if (fCtx->hasStdoutOutput) {
904                     DISPLAYLEVEL(1, "\nAborting. Use -f if you really want to delete the files and output to stdout");
905                     error = 1;
906                 } else {
907                     error = g_display_prefs.displayLevel > displayLevelCutoff && UTIL_requireUserConfirmation("This is a destructive operation. Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput);
908                 }
909             }
910         }
911         DISPLAY("\n");
912     }
913     return error;
914 }
915 
916 #ifndef ZSTD_NOCOMPRESS
917 
918 /* **********************************************************************
919  *  Compression
920  ************************************************************************/
921 typedef struct {
922     FILE* srcFile;
923     FILE* dstFile;
924     void*  srcBuffer;
925     size_t srcBufferSize;
926     void*  dstBuffer;
927     size_t dstBufferSize;
928     void* dictBuffer;
929     size_t dictBufferSize;
930     const char* dictFileName;
931     ZSTD_CStream* cctx;
932 } cRess_t;
933 
934 /** ZSTD_cycleLog() :
935  *  condition for correct operation : hashLog > 1 */
ZSTD_cycleLog(U32 hashLog,ZSTD_strategy strat)936 static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
937 {
938     U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
939     assert(hashLog > 1);
940     return hashLog - btScale;
941 }
942 
FIO_adjustParamsForPatchFromMode(FIO_prefs_t * const prefs,ZSTD_compressionParameters * comprParams,unsigned long long const dictSize,unsigned long long const maxSrcFileSize,int cLevel)943 static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
944                                     ZSTD_compressionParameters* comprParams,
945                                     unsigned long long const dictSize,
946                                     unsigned long long const maxSrcFileSize,
947                                     int cLevel)
948 {
949     unsigned const fileWindowLog = FIO_highbit64(maxSrcFileSize) + 1;
950     ZSTD_compressionParameters const cParams = ZSTD_getCParams(cLevel, (size_t)maxSrcFileSize, (size_t)dictSize);
951     FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize);
952     if (fileWindowLog > ZSTD_WINDOWLOG_MAX)
953         DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n");
954     comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog));
955     if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) {
956         if (!prefs->ldmFlag)
957             DISPLAYLEVEL(1, "long mode automatically triggered\n");
958         FIO_setLdmFlag(prefs, 1);
959     }
960     if (cParams.strategy >= ZSTD_btopt) {
961         DISPLAYLEVEL(1, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
962         DISPLAYLEVEL(1, "- Use --single-thread mode in the zstd cli\n");
963         DISPLAYLEVEL(1, "- Set a larger targetLength (eg. --zstd=targetLength=4096)\n");
964         DISPLAYLEVEL(1, "- Set a larger chainLog (eg. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
965         DISPLAYLEVEL(1, "Also consdier playing around with searchLog and hashLog\n");
966     }
967 }
968 
FIO_createCResources(FIO_prefs_t * const prefs,const char * dictFileName,unsigned long long const maxSrcFileSize,int cLevel,ZSTD_compressionParameters comprParams)969 static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
970                                     const char* dictFileName, unsigned long long const maxSrcFileSize,
971                                     int cLevel, ZSTD_compressionParameters comprParams) {
972     cRess_t ress;
973     memset(&ress, 0, sizeof(ress));
974 
975     DISPLAYLEVEL(6, "FIO_createCResources \n");
976     ress.cctx = ZSTD_createCCtx();
977     if (ress.cctx == NULL)
978         EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx",
979                     strerror(errno));
980     ress.srcBufferSize = ZSTD_CStreamInSize();
981     ress.srcBuffer = malloc(ress.srcBufferSize);
982     ress.dstBufferSize = ZSTD_CStreamOutSize();
983 
984     /* need to update memLimit before calling createDictBuffer
985      * because of memLimit check inside it */
986     if (prefs->patchFromMode) {
987         unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize;
988         FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), ssSize > 0 ? ssSize : maxSrcFileSize, cLevel);
989     }
990     ress.dstBuffer = malloc(ress.dstBufferSize);
991     ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs);   /* works with dictFileName==NULL */
992     if (!ress.srcBuffer || !ress.dstBuffer)
993         EXM_THROW(31, "allocation error : not enough memory");
994 
995     /* Advanced parameters, including dictionary */
996     if (dictFileName && (ress.dictBuffer==NULL))
997         EXM_THROW(32, "allocation error : can't create dictBuffer");
998     ress.dictFileName = dictFileName;
999 
1000     if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
1001         comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
1002 
1003     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) );  /* always enable content size when available (note: supposed to be default) */
1004     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
1005     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
1006     /* compression level */
1007     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
1008     /* max compressed block size */
1009     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
1010     /* source size hint */
1011     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
1012     /* long distance matching */
1013     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
1014     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
1015     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) );
1016     if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
1017         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) );
1018     }
1019     if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
1020         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
1021     }
1022     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder));
1023     /* compression parameters */
1024     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
1025     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
1026     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) );
1027     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) );
1028     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
1029     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
1030     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, (int)comprParams.strategy) );
1031     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
1032     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) );
1033     /* multi-threading */
1034 #ifdef ZSTD_MULTITHREAD
1035     DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);
1036     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) );
1037     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) );
1038     if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) {
1039         DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog);
1040         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) );
1041     }
1042     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
1043 #endif
1044     /* dictionary */
1045     if (prefs->patchFromMode) {
1046         CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
1047     } else {
1048         CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
1049     }
1050 
1051     return ress;
1052 }
1053 
FIO_freeCResources(const cRess_t * const ress)1054 static void FIO_freeCResources(const cRess_t* const ress)
1055 {
1056     free(ress->srcBuffer);
1057     free(ress->dstBuffer);
1058     free(ress->dictBuffer);
1059     ZSTD_freeCStream(ress->cctx);   /* never fails */
1060 }
1061 
1062 
1063 #ifdef ZSTD_GZCOMPRESS
1064 static unsigned long long
FIO_compressGzFrame(const cRess_t * ress,const char * srcFileName,U64 const srcFileSize,int compressionLevel,U64 * readsize)1065 FIO_compressGzFrame(const cRess_t* ress,  /* buffers & handlers are used, but not changed */
1066                     const char* srcFileName, U64 const srcFileSize,
1067                     int compressionLevel, U64* readsize)
1068 {
1069     unsigned long long inFileSize = 0, outFileSize = 0;
1070     z_stream strm;
1071 
1072     if (compressionLevel > Z_BEST_COMPRESSION)
1073         compressionLevel = Z_BEST_COMPRESSION;
1074 
1075     strm.zalloc = Z_NULL;
1076     strm.zfree = Z_NULL;
1077     strm.opaque = Z_NULL;
1078 
1079     {   int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED,
1080                         15 /* maxWindowLogSize */ + 16 /* gzip only */,
1081                         8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */
1082         if (ret != Z_OK) {
1083             EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
1084     }   }
1085 
1086     strm.next_in = 0;
1087     strm.avail_in = 0;
1088     strm.next_out = (Bytef*)ress->dstBuffer;
1089     strm.avail_out = (uInt)ress->dstBufferSize;
1090 
1091     while (1) {
1092         int ret;
1093         if (strm.avail_in == 0) {
1094             size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
1095             if (inSize == 0) break;
1096             inFileSize += inSize;
1097             strm.next_in = (z_const unsigned char*)ress->srcBuffer;
1098             strm.avail_in = (uInt)inSize;
1099         }
1100         ret = deflate(&strm, Z_NO_FLUSH);
1101         if (ret != Z_OK)
1102             EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret);
1103         {   size_t const cSize = ress->dstBufferSize - strm.avail_out;
1104             if (cSize) {
1105                 if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
1106                     EXM_THROW(73, "Write error : cannot write to output file : %s ", strerror(errno));
1107                 outFileSize += cSize;
1108                 strm.next_out = (Bytef*)ress->dstBuffer;
1109                 strm.avail_out = (uInt)ress->dstBufferSize;
1110         }   }
1111         if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
1112             DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ",
1113                             (unsigned)(inFileSize>>20),
1114                             (double)outFileSize/inFileSize*100)
1115         } else {
1116             DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%% ",
1117                             (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
1118                             (double)outFileSize/inFileSize*100);
1119     }   }
1120 
1121     while (1) {
1122         int const ret = deflate(&strm, Z_FINISH);
1123         {   size_t const cSize = ress->dstBufferSize - strm.avail_out;
1124             if (cSize) {
1125                 if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
1126                     EXM_THROW(75, "Write error : %s ", strerror(errno));
1127                 outFileSize += cSize;
1128                 strm.next_out = (Bytef*)ress->dstBuffer;
1129                 strm.avail_out = (uInt)ress->dstBufferSize;
1130         }   }
1131         if (ret == Z_STREAM_END) break;
1132         if (ret != Z_BUF_ERROR)
1133             EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret);
1134     }
1135 
1136     {   int const ret = deflateEnd(&strm);
1137         if (ret != Z_OK) {
1138             EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
1139     }   }
1140     *readsize = inFileSize;
1141     return outFileSize;
1142 }
1143 #endif
1144 
1145 
1146 #ifdef ZSTD_LZMACOMPRESS
1147 static unsigned long long
FIO_compressLzmaFrame(cRess_t * ress,const char * srcFileName,U64 const srcFileSize,int compressionLevel,U64 * readsize,int plain_lzma)1148 FIO_compressLzmaFrame(cRess_t* ress,
1149                       const char* srcFileName, U64 const srcFileSize,
1150                       int compressionLevel, U64* readsize, int plain_lzma)
1151 {
1152     unsigned long long inFileSize = 0, outFileSize = 0;
1153     lzma_stream strm = LZMA_STREAM_INIT;
1154     lzma_action action = LZMA_RUN;
1155     lzma_ret ret;
1156 
1157     if (compressionLevel < 0) compressionLevel = 0;
1158     if (compressionLevel > 9) compressionLevel = 9;
1159 
1160     if (plain_lzma) {
1161         lzma_options_lzma opt_lzma;
1162         if (lzma_lzma_preset(&opt_lzma, compressionLevel))
1163             EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName);
1164         ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
1165         if (ret != LZMA_OK)
1166             EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
1167     } else {
1168         ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
1169         if (ret != LZMA_OK)
1170             EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
1171     }
1172 
1173     strm.next_in = 0;
1174     strm.avail_in = 0;
1175     strm.next_out = (BYTE*)ress->dstBuffer;
1176     strm.avail_out = ress->dstBufferSize;
1177 
1178     while (1) {
1179         if (strm.avail_in == 0) {
1180             size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
1181             if (inSize == 0) action = LZMA_FINISH;
1182             inFileSize += inSize;
1183             strm.next_in = (BYTE const*)ress->srcBuffer;
1184             strm.avail_in = inSize;
1185         }
1186 
1187         ret = lzma_code(&strm, action);
1188 
1189         if (ret != LZMA_OK && ret != LZMA_STREAM_END)
1190             EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
1191         {   size_t const compBytes = ress->dstBufferSize - strm.avail_out;
1192             if (compBytes) {
1193                 if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes)
1194                     EXM_THROW(85, "Write error : %s", strerror(errno));
1195                 outFileSize += compBytes;
1196                 strm.next_out = (BYTE*)ress->dstBuffer;
1197                 strm.avail_out = ress->dstBufferSize;
1198         }   }
1199         if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
1200             DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
1201                             (unsigned)(inFileSize>>20),
1202                             (double)outFileSize/inFileSize*100)
1203         else
1204             DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
1205                             (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
1206                             (double)outFileSize/inFileSize*100);
1207         if (ret == LZMA_STREAM_END) break;
1208     }
1209 
1210     lzma_end(&strm);
1211     *readsize = inFileSize;
1212 
1213     return outFileSize;
1214 }
1215 #endif
1216 
1217 #ifdef ZSTD_LZ4COMPRESS
1218 
1219 #if LZ4_VERSION_NUMBER <= 10600
1220 #define LZ4F_blockLinked blockLinked
1221 #define LZ4F_max64KB max64KB
1222 #endif
1223 
FIO_LZ4_GetBlockSize_FromBlockId(int id)1224 static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
1225 
1226 static unsigned long long
FIO_compressLz4Frame(cRess_t * ress,const char * srcFileName,U64 const srcFileSize,int compressionLevel,int checksumFlag,U64 * readsize)1227 FIO_compressLz4Frame(cRess_t* ress,
1228                      const char* srcFileName, U64 const srcFileSize,
1229                      int compressionLevel, int checksumFlag,
1230                      U64* readsize)
1231 {
1232     const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB);
1233     unsigned long long inFileSize = 0, outFileSize = 0;
1234 
1235     LZ4F_preferences_t prefs;
1236     LZ4F_compressionContext_t ctx;
1237 
1238     LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
1239     if (LZ4F_isError(errorCode))
1240         EXM_THROW(31, "zstd: failed to create lz4 compression context");
1241 
1242     memset(&prefs, 0, sizeof(prefs));
1243 
1244     assert(blockSize <= ress->srcBufferSize);
1245 
1246     prefs.autoFlush = 1;
1247     prefs.compressionLevel = compressionLevel;
1248     prefs.frameInfo.blockMode = LZ4F_blockLinked;
1249     prefs.frameInfo.blockSizeID = LZ4F_max64KB;
1250     prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag;
1251 #if LZ4_VERSION_NUMBER >= 10600
1252     prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
1253 #endif
1254     assert(LZ4F_compressBound(blockSize, &prefs) <= ress->dstBufferSize);
1255 
1256     {
1257         size_t readSize;
1258         size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs);
1259         if (LZ4F_isError(headerSize))
1260             EXM_THROW(33, "File header generation failed : %s",
1261                             LZ4F_getErrorName(headerSize));
1262         if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize)
1263             EXM_THROW(34, "Write error : %s (cannot write header)", strerror(errno));
1264         outFileSize += headerSize;
1265 
1266         /* Read first block */
1267         readSize  = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
1268         inFileSize += readSize;
1269 
1270         /* Main Loop */
1271         while (readSize>0) {
1272             size_t const outSize = LZ4F_compressUpdate(ctx,
1273                                         ress->dstBuffer, ress->dstBufferSize,
1274                                         ress->srcBuffer, readSize, NULL);
1275             if (LZ4F_isError(outSize))
1276                 EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
1277                             srcFileName, LZ4F_getErrorName(outSize));
1278             outFileSize += outSize;
1279             if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
1280                 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
1281                                 (unsigned)(inFileSize>>20),
1282                                 (double)outFileSize/inFileSize*100)
1283             } else {
1284                 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
1285                                 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
1286                                 (double)outFileSize/inFileSize*100);
1287             }
1288 
1289             /* Write Block */
1290             {   size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile);
1291                 if (sizeCheck != outSize)
1292                     EXM_THROW(36, "Write error : %s", strerror(errno));
1293             }
1294 
1295             /* Read next block */
1296             readSize  = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
1297             inFileSize += readSize;
1298         }
1299         if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName);
1300 
1301         /* End of Stream mark */
1302         headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL);
1303         if (LZ4F_isError(headerSize))
1304             EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s",
1305                         srcFileName, LZ4F_getErrorName(headerSize));
1306 
1307         {   size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
1308             if (sizeCheck != headerSize)
1309                 EXM_THROW(39, "Write error : %s (cannot write end of stream)",
1310                             strerror(errno));
1311         }
1312         outFileSize += headerSize;
1313     }
1314 
1315     *readsize = inFileSize;
1316     LZ4F_freeCompressionContext(ctx);
1317 
1318     return outFileSize;
1319 }
1320 #endif
1321 
1322 
1323 static unsigned long long
FIO_compressZstdFrame(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,const cRess_t * ressPtr,const char * srcFileName,U64 fileSize,int compressionLevel,U64 * readsize)1324 FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
1325                       FIO_prefs_t* const prefs,
1326                       const cRess_t* ressPtr,
1327                       const char* srcFileName, U64 fileSize,
1328                       int compressionLevel, U64* readsize)
1329 {
1330     cRess_t const ress = *ressPtr;
1331     FILE* const srcFile = ress.srcFile;
1332     FILE* const dstFile = ress.dstFile;
1333     U64 compressedfilesize = 0;
1334     ZSTD_EndDirective directive = ZSTD_e_continue;
1335 
1336     /* stats */
1337     ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 };
1338     ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 };
1339     typedef enum { noChange, slower, faster } speedChange_e;
1340     speedChange_e speedChange = noChange;
1341     unsigned flushWaiting = 0;
1342     unsigned inputPresented = 0;
1343     unsigned inputBlocked = 0;
1344     unsigned lastJobID = 0;
1345 
1346     DISPLAYLEVEL(6, "compression using zstd format \n");
1347 
1348     /* init */
1349     if (fileSize != UTIL_FILESIZE_UNKNOWN) {
1350         CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
1351     } else if (prefs->streamSrcSize > 0) {
1352       /* unknown source size; use the declared stream size */
1353       CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
1354     }
1355     (void)srcFileName;
1356 
1357     /* Main compression loop */
1358     do {
1359         size_t stillToFlush;
1360         /* Fill input Buffer */
1361         size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
1362         ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 };
1363         DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize);
1364         *readsize += inSize;
1365 
1366         if ((inSize == 0) || (*readsize == fileSize))
1367             directive = ZSTD_e_end;
1368 
1369         stillToFlush = 1;
1370         while ((inBuff.pos != inBuff.size)   /* input buffer must be entirely ingested */
1371             || (directive == ZSTD_e_end && stillToFlush != 0) ) {
1372 
1373             size_t const oldIPos = inBuff.pos;
1374             ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
1375             size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx);
1376             CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive));
1377 
1378             /* count stats */
1379             inputPresented++;
1380             if (oldIPos == inBuff.pos) inputBlocked++;  /* input buffer is full and can't take any more : input speed is faster than consumption rate */
1381             if (!toFlushNow) flushWaiting = 1;
1382 
1383             /* Write compressed stream */
1384             DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n",
1385                             (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos);
1386             if (outBuff.pos) {
1387                 size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
1388                 if (sizeCheck != outBuff.pos)
1389                     EXM_THROW(25, "Write error : %s (cannot write compressed block)",
1390                                     strerror(errno));
1391                 compressedfilesize += outBuff.pos;
1392             }
1393 
1394             /* display notification; and adapt compression level */
1395             if (READY_FOR_UPDATE()) {
1396                 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
1397                 double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
1398 
1399                 /* display progress notifications */
1400                 if (g_display_prefs.displayLevel >= 3) {
1401                     DISPLAYUPDATE(3, "\r(L%i) Buffered :%4u MB - Consumed :%4u MB - Compressed :%4u MB => %.2f%% ",
1402                                 compressionLevel,
1403                                 (unsigned)((zfp.ingested - zfp.consumed) >> 20),
1404                                 (unsigned)(zfp.consumed >> 20),
1405                                 (unsigned)(zfp.produced >> 20),
1406                                 cShare );
1407                 } else if (g_display_prefs.displayLevel >= 2 || g_display_prefs.progressSetting == FIO_ps_always) {
1408                     /* Require level 2 or forcibly displayed progress counter for summarized updates */
1409                     DISPLAYLEVEL(1, "\r%79s\r", "");    /* Clear out the current displayed line */
1410                     if (fCtx->nbFilesTotal > 1) {
1411                         size_t srcFileNameSize = strlen(srcFileName);
1412                         /* Ensure that the string we print is roughly the same size each time */
1413                         if (srcFileNameSize > 18) {
1414                             const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
1415                             DISPLAYLEVEL(1, "Compress: %u/%u files. Current: ...%s ",
1416                                         fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName);
1417                         } else {
1418                             DISPLAYLEVEL(1, "Compress: %u/%u files. Current: %*s ",
1419                                         fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName);
1420                         }
1421                     }
1422                     DISPLAYLEVEL(1, "Read : %2u ", (unsigned)(zfp.consumed >> 20));
1423                     if (fileSize != UTIL_FILESIZE_UNKNOWN)
1424                         DISPLAYLEVEL(2, "/ %2u ", (unsigned)(fileSize >> 20));
1425                     DISPLAYLEVEL(1, "MB ==> %2.f%%", cShare);
1426                     DELAY_NEXT_UPDATE();
1427                 }
1428 
1429                 /* adaptive mode : statistics measurement and speed correction */
1430                 if (prefs->adaptiveMode) {
1431 
1432                     /* check output speed */
1433                     if (zfp.currentJobID > 1) {  /* only possible if nbWorkers >= 1 */
1434 
1435                         unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced;
1436                         unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed;
1437                         assert(zfp.produced >= previous_zfp_update.produced);
1438                         assert(prefs->nbWorkers >= 1);
1439 
1440                         /* test if compression is blocked
1441                          * either because output is slow and all buffers are full
1442                          * or because input is slow and no job can start while waiting for at least one buffer to be filled.
1443                          * note : exclude starting part, since currentJobID > 1 */
1444                         if ( (zfp.consumed == previous_zfp_update.consumed)   /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/
1445                           && (zfp.nbActiveWorkers == 0)                       /* confirmed : no compression ongoing */
1446                           ) {
1447                             DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n")
1448                             speedChange = slower;
1449                         }
1450 
1451                         previous_zfp_update = zfp;
1452 
1453                         if ( (newlyProduced > (newlyFlushed * 9 / 8))   /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */
1454                           && (flushWaiting == 0)                        /* flush speed was never slowed by lack of production, so it's operating at max capacity */
1455                           ) {
1456                             DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed);
1457                             speedChange = slower;
1458                         }
1459                         flushWaiting = 0;
1460                     }
1461 
1462                     /* course correct only if there is at least one new job completed */
1463                     if (zfp.currentJobID > lastJobID) {
1464                         DISPLAYLEVEL(6, "compression level adaptation check \n")
1465 
1466                         /* check input speed */
1467                         if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) {   /* warm up period, to fill all workers */
1468                             if (inputBlocked <= 0) {
1469                                 DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n");
1470                                 speedChange = slower;
1471                             } else if (speedChange == noChange) {
1472                                 unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested;
1473                                 unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed;
1474                                 unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced;
1475                                 unsigned long long newlyFlushed  = zfp.flushed  - previous_zfp_correction.flushed;
1476                                 previous_zfp_correction = zfp;
1477                                 assert(inputPresented > 0);
1478                                 DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n",
1479                                                 inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100,
1480                                                 (unsigned)newlyIngested, (unsigned)newlyConsumed,
1481                                                 (unsigned)newlyFlushed, (unsigned)newlyProduced);
1482                                 if ( (inputBlocked > inputPresented / 8)     /* input is waiting often, because input buffers is full : compression or output too slow */
1483                                   && (newlyFlushed * 33 / 32 > newlyProduced)  /* flush everything that is produced */
1484                                   && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */
1485                                 ) {
1486                                     DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n",
1487                                                     newlyIngested, newlyConsumed, newlyProduced, newlyFlushed);
1488                                     speedChange = faster;
1489                                 }
1490                             }
1491                             inputBlocked = 0;
1492                             inputPresented = 0;
1493                         }
1494 
1495                         if (speedChange == slower) {
1496                             DISPLAYLEVEL(6, "slower speed , higher compression \n")
1497                             compressionLevel ++;
1498                             if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
1499                             if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel;
1500                             compressionLevel += (compressionLevel == 0);   /* skip 0 */
1501                             ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
1502                         }
1503                         if (speedChange == faster) {
1504                             DISPLAYLEVEL(6, "faster speed , lighter compression \n")
1505                             compressionLevel --;
1506                             if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel;
1507                             compressionLevel -= (compressionLevel == 0);   /* skip 0 */
1508                             ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
1509                         }
1510                         speedChange = noChange;
1511 
1512                         lastJobID = zfp.currentJobID;
1513                     }  /* if (zfp.currentJobID > lastJobID) */
1514                 }  /* if (g_adaptiveMode) */
1515             }  /* if (READY_FOR_UPDATE()) */
1516         }  /* while ((inBuff.pos != inBuff.size) */
1517     } while (directive != ZSTD_e_end);
1518 
1519     if (ferror(srcFile)) {
1520         EXM_THROW(26, "Read error : I/O error");
1521     }
1522     if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) {
1523         EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B",
1524                 (unsigned long long)*readsize, (unsigned long long)fileSize);
1525     }
1526 
1527     return compressedfilesize;
1528 }
1529 
1530 /*! FIO_compressFilename_internal() :
1531  *  same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
1532  *  @return : 0 : compression completed correctly,
1533  *            1 : missing or pb opening srcFileName
1534  */
1535 static int
FIO_compressFilename_internal(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,cRess_t ress,const char * dstFileName,const char * srcFileName,int compressionLevel)1536 FIO_compressFilename_internal(FIO_ctx_t* const fCtx,
1537                               FIO_prefs_t* const prefs,
1538                               cRess_t ress,
1539                               const char* dstFileName, const char* srcFileName,
1540                               int compressionLevel)
1541 {
1542     UTIL_time_t const timeStart = UTIL_getTime();
1543     clock_t const cpuStart = clock();
1544     U64 readsize = 0;
1545     U64 compressedfilesize = 0;
1546     U64 const fileSize = UTIL_getFileSize(srcFileName);
1547     DISPLAYLEVEL(5, "%s: %llu bytes \n", srcFileName, (unsigned long long)fileSize);
1548 
1549     /* compression format selection */
1550     switch (prefs->compressionType) {
1551         default:
1552         case FIO_zstdCompression:
1553             compressedfilesize = FIO_compressZstdFrame(fCtx, prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize);
1554             break;
1555 
1556         case FIO_gzipCompression:
1557 #ifdef ZSTD_GZCOMPRESS
1558             compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
1559 #else
1560             (void)compressionLevel;
1561             EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n",
1562                             srcFileName);
1563 #endif
1564             break;
1565 
1566         case FIO_xzCompression:
1567         case FIO_lzmaCompression:
1568 #ifdef ZSTD_LZMACOMPRESS
1569             compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression);
1570 #else
1571             (void)compressionLevel;
1572             EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n",
1573                             srcFileName);
1574 #endif
1575             break;
1576 
1577         case FIO_lz4Compression:
1578 #ifdef ZSTD_LZ4COMPRESS
1579             compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize);
1580 #else
1581             (void)compressionLevel;
1582             EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n",
1583                             srcFileName);
1584 #endif
1585             break;
1586     }
1587 
1588     /* Status */
1589     fCtx->totalBytesInput += (size_t)readsize;
1590     fCtx->totalBytesOutput += (size_t)compressedfilesize;
1591     DISPLAYLEVEL(2, "\r%79s\r", "");
1592     if (g_display_prefs.displayLevel >= 2 &&
1593         !fCtx->hasStdoutOutput &&
1594         (g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1)) {
1595         if (readsize == 0) {
1596             DISPLAYLEVEL(2,"%-20s :  (%6llu => %6llu bytes, %s) \n",
1597                 srcFileName,
1598                 (unsigned long long)readsize, (unsigned long long) compressedfilesize,
1599                 dstFileName);
1600         } else {
1601             DISPLAYLEVEL(2,"%-20s :%6.2f%%   (%6llu => %6llu bytes, %s) \n",
1602                 srcFileName,
1603                 (double)compressedfilesize / (double)readsize * 100,
1604                 (unsigned long long)readsize, (unsigned long long) compressedfilesize,
1605                 dstFileName);
1606         }
1607     }
1608 
1609     /* Elapsed Time and CPU Load */
1610     {   clock_t const cpuEnd = clock();
1611         double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
1612         U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
1613         double const timeLength_s = (double)timeLength_ns / 1000000000;
1614         double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100;
1615         DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec  (cpu load : %.0f%%)\n",
1616                         srcFileName, timeLength_s, cpuLoad_pct);
1617     }
1618     return 0;
1619 }
1620 
1621 
1622 /*! FIO_compressFilename_dstFile() :
1623  *  open dstFileName, or pass-through if ress.dstFile != NULL,
1624  *  then start compression with FIO_compressFilename_internal().
1625  *  Manages source removal (--rm) and file permissions transfer.
1626  *  note : ress.srcFile must be != NULL,
1627  *  so reach this function through FIO_compressFilename_srcFile().
1628  *  @return : 0 : compression completed correctly,
1629  *            1 : pb
1630  */
FIO_compressFilename_dstFile(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,cRess_t ress,const char * dstFileName,const char * srcFileName,int compressionLevel)1631 static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
1632                                         FIO_prefs_t* const prefs,
1633                                         cRess_t ress,
1634                                         const char* dstFileName,
1635                                         const char* srcFileName,
1636                                         int compressionLevel)
1637 {
1638     int closeDstFile = 0;
1639     int result;
1640     stat_t statbuf;
1641     assert(ress.srcFile != NULL);
1642     if (ress.dstFile == NULL) {
1643         int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
1644         if ( strcmp (srcFileName, stdinmark)
1645           && UTIL_stat(srcFileName, &statbuf)
1646           && UTIL_isRegularFileStat(&statbuf) ) {
1647             dstFilePermissions = statbuf.st_mode;
1648         }
1649 
1650         closeDstFile = 1;
1651         DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
1652         ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
1653         if (ress.dstFile==NULL) return 1;  /* could not open dstFileName */
1654         /* Must only be added after FIO_openDstFile() succeeds.
1655          * Otherwise we may delete the destination file if it already exists,
1656          * and the user presses Ctrl-C when asked if they wish to overwrite.
1657          */
1658         addHandler(dstFileName);
1659     }
1660 
1661     result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
1662 
1663     if (closeDstFile) {
1664         FILE* const dstFile = ress.dstFile;
1665         ress.dstFile = NULL;
1666 
1667         clearHandler();
1668 
1669         DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName);
1670         if (fclose(dstFile)) { /* error closing dstFile */
1671             DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
1672             result=1;
1673         }
1674         if ( (result != 0)  /* operation failure */
1675           && strcmp(dstFileName, stdoutmark)  /* special case : don't remove() stdout */
1676           ) {
1677             FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
1678         }
1679     }
1680 
1681     return result;
1682 }
1683 
1684 /* List used to compare file extensions (used with --exclude-compressed flag)
1685 * Different from the suffixList and should only apply to ZSTD compress operationResult
1686 */
1687 static const char *compressedFileExtensions[] = {
1688     ZSTD_EXTENSION,
1689     TZSTD_EXTENSION,
1690     GZ_EXTENSION,
1691     TGZ_EXTENSION,
1692     LZMA_EXTENSION,
1693     XZ_EXTENSION,
1694     TXZ_EXTENSION,
1695     LZ4_EXTENSION,
1696     TLZ4_EXTENSION,
1697     NULL
1698 };
1699 
1700 /*! FIO_compressFilename_srcFile() :
1701  *  @return : 0 : compression completed correctly,
1702  *            1 : missing or pb opening srcFileName
1703  */
1704 static int
FIO_compressFilename_srcFile(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,cRess_t ress,const char * dstFileName,const char * srcFileName,int compressionLevel)1705 FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx,
1706                              FIO_prefs_t* const prefs,
1707                              cRess_t ress,
1708                              const char* dstFileName,
1709                              const char* srcFileName,
1710                              int compressionLevel)
1711 {
1712     int result;
1713     DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName);
1714 
1715     /* ensure src is not a directory */
1716     if (UTIL_isDirectory(srcFileName)) {
1717         DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
1718         return 1;
1719     }
1720 
1721     /* ensure src is not the same as dict (if present) */
1722     if (ress.dictFileName != NULL && UTIL_isSameFile(srcFileName, ress.dictFileName)) {
1723         DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName);
1724         return 1;
1725     }
1726 
1727     /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used
1728     * YES => ZSTD will skip compression of the file and will return 0.
1729     * NO => ZSTD will resume with compress operation.
1730     */
1731     if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) {
1732         DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName);
1733         return 0;
1734     }
1735 
1736     ress.srcFile = FIO_openSrcFile(prefs, srcFileName);
1737     if (ress.srcFile == NULL) return 1;   /* srcFile could not be opened */
1738 
1739     result = FIO_compressFilename_dstFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
1740 
1741     fclose(ress.srcFile);
1742     ress.srcFile = NULL;
1743     if ( prefs->removeSrcFile   /* --rm */
1744       && result == 0       /* success */
1745       && strcmp(srcFileName, stdinmark)   /* exception : don't erase stdin */
1746       ) {
1747         /* We must clear the handler, since after this point calling it would
1748          * delete both the source and destination files.
1749          */
1750         clearHandler();
1751         if (FIO_removeFile(srcFileName))
1752             EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno));
1753     }
1754     return result;
1755 }
1756 
FIO_compressFilename(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,const char * dstFileName,const char * srcFileName,const char * dictFileName,int compressionLevel,ZSTD_compressionParameters comprParams)1757 int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName,
1758                          const char* srcFileName, const char* dictFileName,
1759                          int compressionLevel, ZSTD_compressionParameters comprParams)
1760 {
1761     cRess_t const ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
1762     int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
1763 
1764 #define DISPLAY_LEVEL_DEFAULT 2
1765 
1766     FIO_freeCResources(&ress);
1767     return result;
1768 }
1769 
1770 /* FIO_determineCompressedName() :
1771  * create a destination filename for compressed srcFileName.
1772  * @return a pointer to it.
1773  * This function never returns an error (it may abort() in case of pb)
1774  */
1775 static const char*
FIO_determineCompressedName(const char * srcFileName,const char * outDirName,const char * suffix)1776 FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix)
1777 {
1778     static size_t dfnbCapacity = 0;
1779     static char* dstFileNameBuffer = NULL;   /* using static allocation : this function cannot be multi-threaded */
1780     char* outDirFilename = NULL;
1781     size_t sfnSize = strlen(srcFileName);
1782     size_t const srcSuffixLen = strlen(suffix);
1783     if (outDirName) {
1784         outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen);
1785         sfnSize = strlen(outDirFilename);
1786         assert(outDirFilename != NULL);
1787     }
1788 
1789     if (dfnbCapacity <= sfnSize+srcSuffixLen+1) {
1790         /* resize buffer for dstName */
1791         free(dstFileNameBuffer);
1792         dfnbCapacity = sfnSize + srcSuffixLen + 30;
1793         dstFileNameBuffer = (char*)malloc(dfnbCapacity);
1794         if (!dstFileNameBuffer) {
1795             EXM_THROW(30, "zstd: %s", strerror(errno));
1796         }
1797     }
1798     assert(dstFileNameBuffer != NULL);
1799 
1800     if (outDirFilename) {
1801         memcpy(dstFileNameBuffer, outDirFilename, sfnSize);
1802         free(outDirFilename);
1803     } else {
1804         memcpy(dstFileNameBuffer, srcFileName, sfnSize);
1805     }
1806     memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */);
1807     return dstFileNameBuffer;
1808 }
1809 
FIO_getLargestFileSize(const char ** inFileNames,unsigned nbFiles)1810 static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles)
1811 {
1812     size_t i;
1813     unsigned long long fileSize, maxFileSize = 0;
1814     for (i = 0; i < nbFiles; i++) {
1815         fileSize = UTIL_getFileSize(inFileNames[i]);
1816         maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize;
1817     }
1818     return maxFileSize;
1819 }
1820 
1821 /* FIO_compressMultipleFilenames() :
1822  * compress nbFiles files
1823  * into either one destination (outFileName),
1824  * or into one file each (outFileName == NULL, but suffix != NULL),
1825  * or into a destination folder (specified with -O)
1826  */
FIO_compressMultipleFilenames(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,const char ** inFileNamesTable,const char * outMirroredRootDirName,const char * outDirName,const char * outFileName,const char * suffix,const char * dictFileName,int compressionLevel,ZSTD_compressionParameters comprParams)1827 int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
1828                                   FIO_prefs_t* const prefs,
1829                                   const char** inFileNamesTable,
1830                                   const char* outMirroredRootDirName,
1831                                   const char* outDirName,
1832                                   const char* outFileName, const char* suffix,
1833                                   const char* dictFileName, int compressionLevel,
1834                                   ZSTD_compressionParameters comprParams)
1835 {
1836     int status;
1837     int error = 0;
1838     cRess_t ress = FIO_createCResources(prefs, dictFileName,
1839         FIO_getLargestFileSize(inFileNamesTable, (unsigned)fCtx->nbFilesTotal),
1840         compressionLevel, comprParams);
1841 
1842     /* init */
1843     assert(outFileName != NULL || suffix != NULL);
1844     if (outFileName != NULL) {   /* output into a single destination (stdout typically) */
1845         if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
1846             FIO_freeCResources(&ress);
1847             return 1;
1848         }
1849         ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
1850         if (ress.dstFile == NULL) {  /* could not open outFileName */
1851             error = 1;
1852         } else {
1853             for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
1854                 status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel);
1855                 if (!status) fCtx->nbFilesProcessed++;
1856                 error |= status;
1857             }
1858             if (fclose(ress.dstFile))
1859                 EXM_THROW(29, "Write error (%s) : cannot properly close %s",
1860                             strerror(errno), outFileName);
1861             ress.dstFile = NULL;
1862         }
1863     } else {
1864         if (outMirroredRootDirName)
1865             UTIL_mirrorSourceFilesDirectories(inFileNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
1866 
1867         for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
1868             const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx];
1869             const char* dstFileName = NULL;
1870             if (outMirroredRootDirName) {
1871                 char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
1872                 if (validMirroredDirName) {
1873                     dstFileName = FIO_determineCompressedName(srcFileName, validMirroredDirName, suffix);
1874                     free(validMirroredDirName);
1875                 } else {
1876                     DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot compress '%s' into '%s' \n", srcFileName, outMirroredRootDirName);
1877                     error=1;
1878                     continue;
1879                 }
1880             } else {
1881                 dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix);  /* cannot fail */
1882             }
1883             status = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
1884             if (!status) fCtx->nbFilesProcessed++;
1885             error |= status;
1886         }
1887 
1888         if (outDirName)
1889             FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal);
1890     }
1891 
1892     if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesInput != 0) {
1893         DISPLAYLEVEL(2, "\r%79s\r", "");
1894         DISPLAYLEVEL(2, "%d files compressed : %.2f%%  (%6zu => %6zu bytes)\n", fCtx->nbFilesProcessed,
1895                         (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100,
1896                         fCtx->totalBytesInput, fCtx->totalBytesOutput);
1897     }
1898 
1899     FIO_freeCResources(&ress);
1900     return error;
1901 }
1902 
1903 #endif /* #ifndef ZSTD_NOCOMPRESS */
1904 
1905 
1906 
1907 #ifndef ZSTD_NODECOMPRESS
1908 
1909 /* **************************************************************************
1910  *  Decompression
1911  ***************************************************************************/
1912 typedef struct {
1913     void*  srcBuffer;
1914     size_t srcBufferSize;
1915     size_t srcBufferLoaded;
1916     void*  dstBuffer;
1917     size_t dstBufferSize;
1918     ZSTD_DStream* dctx;
1919     FILE*  dstFile;
1920 } dRess_t;
1921 
FIO_createDResources(FIO_prefs_t * const prefs,const char * dictFileName)1922 static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName)
1923 {
1924     dRess_t ress;
1925     memset(&ress, 0, sizeof(ress));
1926 
1927     if (prefs->patchFromMode)
1928         FIO_adjustMemLimitForPatchFromMode(prefs, UTIL_getFileSize(dictFileName), 0 /* just use the dict size */);
1929 
1930     /* Allocation */
1931     ress.dctx = ZSTD_createDStream();
1932     if (ress.dctx==NULL)
1933         EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno));
1934     CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) );
1935     CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag));
1936 
1937     ress.srcBufferSize = ZSTD_DStreamInSize();
1938     ress.srcBuffer = malloc(ress.srcBufferSize);
1939     ress.dstBufferSize = ZSTD_DStreamOutSize();
1940     ress.dstBuffer = malloc(ress.dstBufferSize);
1941     if (!ress.srcBuffer || !ress.dstBuffer)
1942         EXM_THROW(61, "Allocation error : not enough memory");
1943 
1944     /* dictionary */
1945     {   void* dictBuffer;
1946         size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs);
1947         CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) );
1948         free(dictBuffer);
1949     }
1950 
1951     return ress;
1952 }
1953 
FIO_freeDResources(dRess_t ress)1954 static void FIO_freeDResources(dRess_t ress)
1955 {
1956     CHECK( ZSTD_freeDStream(ress.dctx) );
1957     free(ress.srcBuffer);
1958     free(ress.dstBuffer);
1959 }
1960 
1961 
1962 /** FIO_fwriteSparse() :
1963 *  @return : storedSkips,
1964 *            argument for next call to FIO_fwriteSparse() or FIO_fwriteSparseEnd() */
1965 static unsigned
FIO_fwriteSparse(FILE * file,const void * buffer,size_t bufferSize,const FIO_prefs_t * const prefs,unsigned storedSkips)1966 FIO_fwriteSparse(FILE* file,
1967                  const void* buffer, size_t bufferSize,
1968                  const FIO_prefs_t* const prefs,
1969                  unsigned storedSkips)
1970 {
1971     const size_t* const bufferT = (const size_t*)buffer;   /* Buffer is supposed malloc'ed, hence aligned on size_t */
1972     size_t bufferSizeT = bufferSize / sizeof(size_t);
1973     const size_t* const bufferTEnd = bufferT + bufferSizeT;
1974     const size_t* ptrT = bufferT;
1975     static const size_t segmentSizeT = (32 KB) / sizeof(size_t);   /* check every 32 KB */
1976 
1977     if (prefs->testMode) return 0;  /* do not output anything in test mode */
1978 
1979     if (!prefs->sparseFileSupport) {  /* normal write */
1980         size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
1981         if (sizeCheck != bufferSize)
1982             EXM_THROW(70, "Write error : cannot write decoded block : %s",
1983                             strerror(errno));
1984         return 0;
1985     }
1986 
1987     /* avoid int overflow */
1988     if (storedSkips > 1 GB) {
1989         if (LONG_SEEK(file, 1 GB, SEEK_CUR) != 0)
1990             EXM_THROW(91, "1 GB skip error (sparse file support)");
1991         storedSkips -= 1 GB;
1992     }
1993 
1994     while (ptrT < bufferTEnd) {
1995         size_t nb0T;
1996 
1997         /* adjust last segment if < 32 KB */
1998         size_t seg0SizeT = segmentSizeT;
1999         if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT;
2000         bufferSizeT -= seg0SizeT;
2001 
2002         /* count leading zeroes */
2003         for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ;
2004         storedSkips += (unsigned)(nb0T * sizeof(size_t));
2005 
2006         if (nb0T != seg0SizeT) {   /* not all 0s */
2007             size_t const nbNon0ST = seg0SizeT - nb0T;
2008             /* skip leading zeros */
2009             if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0)
2010                 EXM_THROW(92, "Sparse skip error ; try --no-sparse");
2011             storedSkips = 0;
2012             /* write the rest */
2013             if (fwrite(ptrT + nb0T, sizeof(size_t), nbNon0ST, file) != nbNon0ST)
2014                 EXM_THROW(93, "Write error : cannot write decoded block : %s",
2015                             strerror(errno));
2016         }
2017         ptrT += seg0SizeT;
2018     }
2019 
2020     {   static size_t const maskT = sizeof(size_t)-1;
2021         if (bufferSize & maskT) {
2022             /* size not multiple of sizeof(size_t) : implies end of block */
2023             const char* const restStart = (const char*)bufferTEnd;
2024             const char* restPtr = restStart;
2025             const char* const restEnd = (const char*)buffer + bufferSize;
2026             assert(restEnd > restStart && restEnd < restStart + sizeof(size_t));
2027             for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ;
2028             storedSkips += (unsigned) (restPtr - restStart);
2029             if (restPtr != restEnd) {
2030                 /* not all remaining bytes are 0 */
2031                 size_t const restSize = (size_t)(restEnd - restPtr);
2032                 if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0)
2033                     EXM_THROW(92, "Sparse skip error ; try --no-sparse");
2034                 if (fwrite(restPtr, 1, restSize, file) != restSize)
2035                     EXM_THROW(95, "Write error : cannot write end of decoded block : %s",
2036                         strerror(errno));
2037                 storedSkips = 0;
2038     }   }   }
2039 
2040     return storedSkips;
2041 }
2042 
2043 static void
FIO_fwriteSparseEnd(const FIO_prefs_t * const prefs,FILE * file,unsigned storedSkips)2044 FIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
2045 {
2046     if (prefs->testMode) assert(storedSkips == 0);
2047     if (storedSkips>0) {
2048         assert(prefs->sparseFileSupport > 0);  /* storedSkips>0 implies sparse support is enabled */
2049         (void)prefs;   /* assert can be disabled, in which case prefs becomes unused */
2050         if (LONG_SEEK(file, storedSkips-1, SEEK_CUR) != 0)
2051             EXM_THROW(69, "Final skip error (sparse file support)");
2052         /* last zero must be explicitly written,
2053          * so that skipped ones get implicitly translated as zero by FS */
2054         {   const char lastZeroByte[1] = { 0 };
2055             if (fwrite(lastZeroByte, 1, 1, file) != 1)
2056                 EXM_THROW(69, "Write error : cannot write last zero : %s", strerror(errno));
2057     }   }
2058 }
2059 
2060 
2061 /** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
2062     @return : 0 (no error) */
FIO_passThrough(const FIO_prefs_t * const prefs,FILE * foutput,FILE * finput,void * buffer,size_t bufferSize,size_t alreadyLoaded)2063 static int FIO_passThrough(const FIO_prefs_t* const prefs,
2064                            FILE* foutput, FILE* finput,
2065                            void* buffer, size_t bufferSize,
2066                            size_t alreadyLoaded)
2067 {
2068     size_t const blockSize = MIN(64 KB, bufferSize);
2069     size_t readFromInput;
2070     unsigned storedSkips = 0;
2071 
2072     /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */
2073     {   size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput);
2074         if (sizeCheck != alreadyLoaded) {
2075             DISPLAYLEVEL(1, "Pass-through write error : %s\n", strerror(errno));
2076             return 1;
2077     }   }
2078 
2079     do {
2080         readFromInput = fread(buffer, 1, blockSize, finput);
2081         storedSkips = FIO_fwriteSparse(foutput, buffer, readFromInput, prefs, storedSkips);
2082     } while (readFromInput == blockSize);
2083     if (ferror(finput)) {
2084         DISPLAYLEVEL(1, "Pass-through read error : %s\n", strerror(errno));
2085         return 1;
2086     }
2087     assert(feof(finput));
2088 
2089     FIO_fwriteSparseEnd(prefs, foutput, storedSkips);
2090     return 0;
2091 }
2092 
2093 /* FIO_zstdErrorHelp() :
2094  * detailed error message when requested window size is too large */
2095 static void
FIO_zstdErrorHelp(const FIO_prefs_t * const prefs,const dRess_t * ress,size_t err,const char * srcFileName)2096 FIO_zstdErrorHelp(const FIO_prefs_t* const prefs,
2097                   const dRess_t* ress,
2098                   size_t err, const char* srcFileName)
2099 {
2100     ZSTD_frameHeader header;
2101 
2102     /* Help message only for one specific error */
2103     if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge)
2104         return;
2105 
2106     /* Try to decode the frame header */
2107     err = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded);
2108     if (err == 0) {
2109         unsigned long long const windowSize = header.windowSize;
2110         unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0);
2111         assert(prefs->memLimit > 0);
2112         DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u \n",
2113                         srcFileName, windowSize, prefs->memLimit);
2114         if (windowLog <= ZSTD_WINDOWLOG_MAX) {
2115             unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0));
2116             assert(windowSize < (U64)(1ULL << 52));   /* ensure now overflow for windowMB */
2117             DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB \n",
2118                             srcFileName, windowLog, windowMB);
2119             return;
2120     }   }
2121     DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported \n",
2122                     srcFileName, ZSTD_WINDOWLOG_MAX);
2123 }
2124 
2125 /** FIO_decompressFrame() :
2126  *  @return : size of decoded zstd frame, or an error code
2127  */
2128 #define FIO_ERROR_FRAME_DECODING   ((unsigned long long)(-2))
2129 static unsigned long long
FIO_decompressZstdFrame(FIO_ctx_t * const fCtx,dRess_t * ress,FILE * finput,const FIO_prefs_t * const prefs,const char * srcFileName,U64 alreadyDecoded)2130 FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput,
2131                         const FIO_prefs_t* const prefs,
2132                         const char* srcFileName,
2133                         U64 alreadyDecoded)  /* for multi-frames streams */
2134 {
2135     U64 frameSize = 0;
2136     U32 storedSkips = 0;
2137 
2138     /* display last 20 characters only */
2139     {   size_t const srcFileLength = strlen(srcFileName);
2140         if (srcFileLength>20) srcFileName += srcFileLength-20;
2141     }
2142 
2143     ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only);
2144 
2145     /* Header loading : ensures ZSTD_getFrameHeader() will succeed */
2146     {   size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX;
2147         if (ress->srcBufferLoaded < toDecode) {
2148             size_t const toRead = toDecode - ress->srcBufferLoaded;
2149             void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
2150             ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput);
2151     }   }
2152 
2153     /* Main decompression Loop */
2154     while (1) {
2155         ZSTD_inBuffer  inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 };
2156         ZSTD_outBuffer outBuff= { ress->dstBuffer, ress->dstBufferSize, 0 };
2157         size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff);
2158         if (ZSTD_isError(readSizeHint)) {
2159             DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n",
2160                             srcFileName, ZSTD_getErrorName(readSizeHint));
2161             FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName);
2162             return FIO_ERROR_FRAME_DECODING;
2163         }
2164 
2165         /* Write block */
2166         storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, prefs, storedSkips);
2167         frameSize += outBuff.pos;
2168         if (!fCtx->hasStdoutOutput || g_display_prefs.progressSetting == FIO_ps_always) {
2169             if (fCtx->nbFilesTotal > 1) {
2170                 size_t srcFileNameSize = strlen(srcFileName);
2171                 if (srcFileNameSize > 18) {
2172                     const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
2173                     DISPLAYUPDATE(2, "\rDecompress: %2u/%2u files. Current: ...%s : %u MB...    ",
2174                                     fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
2175                 } else {
2176                     DISPLAYUPDATE(2, "\rDecompress: %2u/%2u files. Current: %s : %u MB...    ",
2177                                 fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
2178                 }
2179             } else {
2180                 DISPLAYUPDATE(2, "\r%-20.20s : %u MB...     ",
2181                                 srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
2182             }
2183         }
2184 
2185         if (inBuff.pos > 0) {
2186             memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos);
2187             ress->srcBufferLoaded -= inBuff.pos;
2188         }
2189 
2190         if (readSizeHint == 0) break;   /* end of frame */
2191 
2192         /* Fill input buffer */
2193         {   size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize);  /* support large skippable frames */
2194             if (ress->srcBufferLoaded < toDecode) {
2195                 size_t const toRead = toDecode - ress->srcBufferLoaded;   /* > 0 */
2196                 void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
2197                 size_t const readSize = fread(startPosition, 1, toRead, finput);
2198                 if (readSize==0) {
2199                     DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n",
2200                                     srcFileName);
2201                     return FIO_ERROR_FRAME_DECODING;
2202                 }
2203                 ress->srcBufferLoaded += readSize;
2204     }   }   }
2205 
2206     FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
2207 
2208     return frameSize;
2209 }
2210 
2211 
2212 #ifdef ZSTD_GZDECOMPRESS
2213 static unsigned long long
FIO_decompressGzFrame(dRess_t * ress,FILE * srcFile,const FIO_prefs_t * const prefs,const char * srcFileName)2214 FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile,
2215                       const FIO_prefs_t* const prefs,
2216                       const char* srcFileName)
2217 {
2218     unsigned long long outFileSize = 0;
2219     z_stream strm;
2220     int flush = Z_NO_FLUSH;
2221     int decodingError = 0;
2222     unsigned storedSkips = 0;
2223 
2224     strm.zalloc = Z_NULL;
2225     strm.zfree = Z_NULL;
2226     strm.opaque = Z_NULL;
2227     strm.next_in = 0;
2228     strm.avail_in = 0;
2229     /* see http://www.zlib.net/manual.html */
2230     if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK)
2231         return FIO_ERROR_FRAME_DECODING;
2232 
2233     strm.next_out = (Bytef*)ress->dstBuffer;
2234     strm.avail_out = (uInt)ress->dstBufferSize;
2235     strm.avail_in = (uInt)ress->srcBufferLoaded;
2236     strm.next_in = (z_const unsigned char*)ress->srcBuffer;
2237 
2238     for ( ; ; ) {
2239         int ret;
2240         if (strm.avail_in == 0) {
2241             ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
2242             if (ress->srcBufferLoaded == 0) flush = Z_FINISH;
2243             strm.next_in = (z_const unsigned char*)ress->srcBuffer;
2244             strm.avail_in = (uInt)ress->srcBufferLoaded;
2245         }
2246         ret = inflate(&strm, flush);
2247         if (ret == Z_BUF_ERROR) {
2248             DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName);
2249             decodingError = 1; break;
2250         }
2251         if (ret != Z_OK && ret != Z_STREAM_END) {
2252             DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret);
2253             decodingError = 1; break;
2254         }
2255         {   size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
2256             if (decompBytes) {
2257                 storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips);
2258                 outFileSize += decompBytes;
2259                 strm.next_out = (Bytef*)ress->dstBuffer;
2260                 strm.avail_out = (uInt)ress->dstBufferSize;
2261             }
2262         }
2263         if (ret == Z_STREAM_END) break;
2264     }
2265 
2266     if (strm.avail_in > 0)
2267         memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
2268     ress->srcBufferLoaded = strm.avail_in;
2269     if ( (inflateEnd(&strm) != Z_OK)  /* release resources ; error detected */
2270       && (decodingError==0) ) {
2271         DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName);
2272         decodingError = 1;
2273     }
2274     FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
2275     return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
2276 }
2277 #endif
2278 
2279 
2280 #ifdef ZSTD_LZMADECOMPRESS
2281 static unsigned long long
FIO_decompressLzmaFrame(dRess_t * ress,FILE * srcFile,const FIO_prefs_t * const prefs,const char * srcFileName,int plain_lzma)2282 FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
2283                         const FIO_prefs_t* const prefs,
2284                         const char* srcFileName, int plain_lzma)
2285 {
2286     unsigned long long outFileSize = 0;
2287     lzma_stream strm = LZMA_STREAM_INIT;
2288     lzma_action action = LZMA_RUN;
2289     lzma_ret initRet;
2290     int decodingError = 0;
2291     unsigned storedSkips = 0;
2292 
2293     strm.next_in = 0;
2294     strm.avail_in = 0;
2295     if (plain_lzma) {
2296         initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */
2297     } else {
2298         initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */
2299     }
2300 
2301     if (initRet != LZMA_OK) {
2302         DISPLAYLEVEL(1, "zstd: %s: %s error %d \n",
2303                         plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder",
2304                         srcFileName, initRet);
2305         return FIO_ERROR_FRAME_DECODING;
2306     }
2307 
2308     strm.next_out = (BYTE*)ress->dstBuffer;
2309     strm.avail_out = ress->dstBufferSize;
2310     strm.next_in = (BYTE const*)ress->srcBuffer;
2311     strm.avail_in = ress->srcBufferLoaded;
2312 
2313     for ( ; ; ) {
2314         lzma_ret ret;
2315         if (strm.avail_in == 0) {
2316             ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
2317             if (ress->srcBufferLoaded == 0) action = LZMA_FINISH;
2318             strm.next_in = (BYTE const*)ress->srcBuffer;
2319             strm.avail_in = ress->srcBufferLoaded;
2320         }
2321         ret = lzma_code(&strm, action);
2322 
2323         if (ret == LZMA_BUF_ERROR) {
2324             DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName);
2325             decodingError = 1; break;
2326         }
2327         if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
2328             DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n",
2329                             srcFileName, ret);
2330             decodingError = 1; break;
2331         }
2332         {   size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
2333             if (decompBytes) {
2334                 storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips);
2335                 outFileSize += decompBytes;
2336                 strm.next_out = (BYTE*)ress->dstBuffer;
2337                 strm.avail_out = ress->dstBufferSize;
2338         }   }
2339         if (ret == LZMA_STREAM_END) break;
2340     }
2341 
2342     if (strm.avail_in > 0)
2343         memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
2344     ress->srcBufferLoaded = strm.avail_in;
2345     lzma_end(&strm);
2346     FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
2347     return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
2348 }
2349 #endif
2350 
2351 #ifdef ZSTD_LZ4DECOMPRESS
2352 static unsigned long long
FIO_decompressLz4Frame(dRess_t * ress,FILE * srcFile,const FIO_prefs_t * const prefs,const char * srcFileName)2353 FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile,
2354                        const FIO_prefs_t* const prefs,
2355                        const char* srcFileName)
2356 {
2357     unsigned long long filesize = 0;
2358     LZ4F_errorCode_t nextToLoad;
2359     LZ4F_decompressionContext_t dCtx;
2360     LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
2361     int decodingError = 0;
2362     unsigned storedSkips = 0;
2363 
2364     if (LZ4F_isError(errorCode)) {
2365         DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n");
2366         return FIO_ERROR_FRAME_DECODING;
2367     }
2368 
2369     /* Init feed with magic number (already consumed from FILE* sFile) */
2370     {   size_t inSize = 4;
2371         size_t outSize= 0;
2372         MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER);
2373         nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL);
2374         if (LZ4F_isError(nextToLoad)) {
2375             DISPLAYLEVEL(1, "zstd: %s: lz4 header error : %s \n",
2376                             srcFileName, LZ4F_getErrorName(nextToLoad));
2377             LZ4F_freeDecompressionContext(dCtx);
2378             return FIO_ERROR_FRAME_DECODING;
2379     }   }
2380 
2381     /* Main Loop */
2382     for (;nextToLoad;) {
2383         size_t readSize;
2384         size_t pos = 0;
2385         size_t decodedBytes = ress->dstBufferSize;
2386 
2387         /* Read input */
2388         if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize;
2389         readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile);
2390         if (!readSize) break;   /* reached end of file or stream */
2391 
2392         while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) {  /* still to read, or still to flush */
2393             /* Decode Input (at least partially) */
2394             size_t remaining = readSize - pos;
2395             decodedBytes = ress->dstBufferSize;
2396             nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL);
2397             if (LZ4F_isError(nextToLoad)) {
2398                 DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n",
2399                                 srcFileName, LZ4F_getErrorName(nextToLoad));
2400                 decodingError = 1; nextToLoad = 0; break;
2401             }
2402             pos += remaining;
2403 
2404             /* Write Block */
2405             if (decodedBytes) {
2406                 storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decodedBytes, prefs, storedSkips);
2407                 filesize += decodedBytes;
2408                 DISPLAYUPDATE(2, "\rDecompressed : %u MB  ", (unsigned)(filesize>>20));
2409             }
2410 
2411             if (!nextToLoad) break;
2412         }
2413     }
2414     /* can be out because readSize == 0, which could be an fread() error */
2415     if (ferror(srcFile)) {
2416         DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName);
2417         decodingError=1;
2418     }
2419 
2420     if (nextToLoad!=0) {
2421         DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName);
2422         decodingError=1;
2423     }
2424 
2425     LZ4F_freeDecompressionContext(dCtx);
2426     ress->srcBufferLoaded = 0; /* LZ4F will reach exact frame boundary */
2427     FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
2428 
2429     return decodingError ? FIO_ERROR_FRAME_DECODING : filesize;
2430 }
2431 #endif
2432 
2433 
2434 
2435 /** FIO_decompressFrames() :
2436  *  Find and decode frames inside srcFile
2437  *  srcFile presumed opened and valid
2438  * @return : 0 : OK
2439  *           1 : error
2440  */
FIO_decompressFrames(FIO_ctx_t * const fCtx,dRess_t ress,FILE * srcFile,const FIO_prefs_t * const prefs,const char * dstFileName,const char * srcFileName)2441 static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
2442                           dRess_t ress, FILE* srcFile,
2443                           const FIO_prefs_t* const prefs,
2444                           const char* dstFileName, const char* srcFileName)
2445 {
2446     unsigned readSomething = 0;
2447     unsigned long long filesize = 0;
2448     assert(srcFile != NULL);
2449 
2450     /* for each frame */
2451     for ( ; ; ) {
2452         /* check magic number -> version */
2453         size_t const toRead = 4;
2454         const BYTE* const buf = (const BYTE*)ress.srcBuffer;
2455         if (ress.srcBufferLoaded < toRead)  /* load up to 4 bytes for header */
2456             ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded,
2457                                           (size_t)1, toRead - ress.srcBufferLoaded, srcFile);
2458         if (ress.srcBufferLoaded==0) {
2459             if (readSomething==0) {  /* srcFile is empty (which is invalid) */
2460                 DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName);
2461                 return 1;
2462             }  /* else, just reached frame boundary */
2463             break;   /* no more input */
2464         }
2465         readSomething = 1;   /* there is at least 1 byte in srcFile */
2466         if (ress.srcBufferLoaded < toRead) {
2467             DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName);
2468             return 1;
2469         }
2470         if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) {
2471             unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, srcFile, prefs, srcFileName, filesize);
2472             if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2473             filesize += frameSize;
2474         } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
2475 #ifdef ZSTD_GZDECOMPRESS
2476             unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, prefs, srcFileName);
2477             if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2478             filesize += frameSize;
2479 #else
2480             DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName);
2481             return 1;
2482 #endif
2483         } else if ((buf[0] == 0xFD && buf[1] == 0x37)  /* xz magic number */
2484                 || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
2485 #ifdef ZSTD_LZMADECOMPRESS
2486             unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, prefs, srcFileName, buf[0] != 0xFD);
2487             if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2488             filesize += frameSize;
2489 #else
2490             DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName);
2491             return 1;
2492 #endif
2493         } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
2494 #ifdef ZSTD_LZ4DECOMPRESS
2495             unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, prefs, srcFileName);
2496             if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2497             filesize += frameSize;
2498 #else
2499             DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName);
2500             return 1;
2501 #endif
2502         } else if ((prefs->overwrite) && !strcmp (dstFileName, stdoutmark)) {  /* pass-through mode */
2503             return FIO_passThrough(prefs,
2504                                    ress.dstFile, srcFile,
2505                                    ress.srcBuffer, ress.srcBufferSize,
2506                                    ress.srcBufferLoaded);
2507         } else {
2508             DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName);
2509             return 1;
2510     }   }  /* for each frame */
2511 
2512     /* Final Status */
2513     fCtx->totalBytesOutput += (size_t)filesize;
2514     DISPLAYLEVEL(2, "\r%79s\r", "");
2515     /* No status message in pipe mode (stdin - stdout) or multi-files mode */
2516     if (g_display_prefs.displayLevel >= 2) {
2517         if (fCtx->nbFilesTotal <= 1 || g_display_prefs.displayLevel >= 3) {
2518             DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize);
2519         }
2520     }
2521 
2522     return 0;
2523 }
2524 
2525 /** FIO_decompressDstFile() :
2526     open `dstFileName`,
2527     or path-through if ress.dstFile is already != 0,
2528     then start decompression process (FIO_decompressFrames()).
2529     @return : 0 : OK
2530               1 : operation aborted
2531 */
FIO_decompressDstFile(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,dRess_t ress,FILE * srcFile,const char * dstFileName,const char * srcFileName)2532 static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
2533                                  FIO_prefs_t* const prefs,
2534                                  dRess_t ress, FILE* srcFile,
2535                                  const char* dstFileName, const char* srcFileName)
2536 {
2537     int result;
2538     stat_t statbuf;
2539     int releaseDstFile = 0;
2540 
2541     if ((ress.dstFile == NULL) && (prefs->testMode==0)) {
2542         int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
2543         if ( strcmp(srcFileName, stdinmark)   /* special case : don't transfer permissions from stdin */
2544           && UTIL_stat(srcFileName, &statbuf)
2545           && UTIL_isRegularFileStat(&statbuf) ) {
2546             dstFilePermissions = statbuf.st_mode;
2547         }
2548 
2549         releaseDstFile = 1;
2550 
2551         ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
2552         if (ress.dstFile==NULL) return 1;
2553 
2554         /* Must only be added after FIO_openDstFile() succeeds.
2555          * Otherwise we may delete the destination file if it already exists,
2556          * and the user presses Ctrl-C when asked if they wish to overwrite.
2557          */
2558         addHandler(dstFileName);
2559     }
2560 
2561     result = FIO_decompressFrames(fCtx, ress, srcFile, prefs, dstFileName, srcFileName);
2562 
2563     if (releaseDstFile) {
2564         FILE* const dstFile = ress.dstFile;
2565         clearHandler();
2566         ress.dstFile = NULL;
2567         if (fclose(dstFile)) {
2568             DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
2569             result = 1;
2570         }
2571 
2572         if ( (result != 0)  /* operation failure */
2573           && strcmp(dstFileName, stdoutmark)  /* special case : don't remove() stdout */
2574           ) {
2575             FIO_removeFile(dstFileName);  /* remove decompression artefact; note: don't do anything special if remove() fails */
2576         }
2577     }
2578 
2579     return result;
2580 }
2581 
2582 
2583 /** FIO_decompressSrcFile() :
2584     Open `srcFileName`, transfer control to decompressDstFile()
2585     @return : 0 : OK
2586               1 : error
2587 */
FIO_decompressSrcFile(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,dRess_t ress,const char * dstFileName,const char * srcFileName)2588 static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName)
2589 {
2590     FILE* srcFile;
2591     int result;
2592 
2593     if (UTIL_isDirectory(srcFileName)) {
2594         DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
2595         return 1;
2596     }
2597 
2598     srcFile = FIO_openSrcFile(prefs, srcFileName);
2599     if (srcFile==NULL) return 1;
2600     ress.srcBufferLoaded = 0;
2601 
2602     result = FIO_decompressDstFile(fCtx, prefs, ress, srcFile, dstFileName, srcFileName);
2603 
2604     /* Close file */
2605     if (fclose(srcFile)) {
2606         DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));  /* error should not happen */
2607         return 1;
2608     }
2609     if ( prefs->removeSrcFile  /* --rm */
2610       && (result==0)      /* decompression successful */
2611       && strcmp(srcFileName, stdinmark) ) /* not stdin */ {
2612         /* We must clear the handler, since after this point calling it would
2613          * delete both the source and destination files.
2614          */
2615         clearHandler();
2616         if (FIO_removeFile(srcFileName)) {
2617             /* failed to remove src file */
2618             DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
2619             return 1;
2620     }   }
2621     return result;
2622 }
2623 
2624 
2625 
FIO_decompressFilename(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,const char * dstFileName,const char * srcFileName,const char * dictFileName)2626 int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs,
2627                            const char* dstFileName, const char* srcFileName,
2628                            const char* dictFileName)
2629 {
2630     dRess_t const ress = FIO_createDResources(prefs, dictFileName);
2631 
2632     int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
2633 
2634     FIO_freeDResources(ress);
2635     return decodingError;
2636 }
2637 
2638 static const char *suffixList[] = {
2639     ZSTD_EXTENSION,
2640     TZSTD_EXTENSION,
2641 #ifndef ZSTD_NODECOMPRESS
2642     ZSTD_ALT_EXTENSION,
2643 #endif
2644 #ifdef ZSTD_GZDECOMPRESS
2645     GZ_EXTENSION,
2646     TGZ_EXTENSION,
2647 #endif
2648 #ifdef ZSTD_LZMADECOMPRESS
2649     LZMA_EXTENSION,
2650     XZ_EXTENSION,
2651     TXZ_EXTENSION,
2652 #endif
2653 #ifdef ZSTD_LZ4DECOMPRESS
2654     LZ4_EXTENSION,
2655     TLZ4_EXTENSION,
2656 #endif
2657     NULL
2658 };
2659 
2660 static const char *suffixListStr =
2661     ZSTD_EXTENSION "/" TZSTD_EXTENSION
2662 #ifdef ZSTD_GZDECOMPRESS
2663     "/" GZ_EXTENSION "/" TGZ_EXTENSION
2664 #endif
2665 #ifdef ZSTD_LZMADECOMPRESS
2666     "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION
2667 #endif
2668 #ifdef ZSTD_LZ4DECOMPRESS
2669     "/" LZ4_EXTENSION "/" TLZ4_EXTENSION
2670 #endif
2671 ;
2672 
2673 /* FIO_determineDstName() :
2674  * create a destination filename from a srcFileName.
2675  * @return a pointer to it.
2676  * @return == NULL if there is an error */
2677 static const char*
FIO_determineDstName(const char * srcFileName,const char * outDirName)2678 FIO_determineDstName(const char* srcFileName, const char* outDirName)
2679 {
2680     static size_t dfnbCapacity = 0;
2681     static char* dstFileNameBuffer = NULL;   /* using static allocation : this function cannot be multi-threaded */
2682     size_t dstFileNameEndPos;
2683     char* outDirFilename = NULL;
2684     const char* dstSuffix = "";
2685     size_t dstSuffixLen = 0;
2686 
2687     size_t sfnSize = strlen(srcFileName);
2688 
2689     size_t srcSuffixLen;
2690     const char* const srcSuffix = strrchr(srcFileName, '.');
2691     if (srcSuffix == NULL) {
2692         DISPLAYLEVEL(1,
2693             "zstd: %s: unknown suffix (%s expected). "
2694             "Can't derive the output file name. "
2695             "Specify it with -o dstFileName. Ignoring.\n",
2696             srcFileName, suffixListStr);
2697         return NULL;
2698     }
2699     srcSuffixLen = strlen(srcSuffix);
2700 
2701     {
2702         const char** matchedSuffixPtr;
2703         for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) {
2704             if (!strcmp(*matchedSuffixPtr, srcSuffix)) {
2705                 break;
2706             }
2707         }
2708 
2709         /* check suffix is authorized */
2710         if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) {
2711             DISPLAYLEVEL(1,
2712                 "zstd: %s: unknown suffix (%s expected). "
2713                 "Can't derive the output file name. "
2714                 "Specify it with -o dstFileName. Ignoring.\n",
2715                 srcFileName, suffixListStr);
2716             return NULL;
2717         }
2718 
2719         if ((*matchedSuffixPtr)[1] == 't') {
2720             dstSuffix = ".tar";
2721             dstSuffixLen = strlen(dstSuffix);
2722         }
2723     }
2724 
2725     if (outDirName) {
2726         outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
2727         sfnSize = strlen(outDirFilename);
2728         assert(outDirFilename != NULL);
2729     }
2730 
2731     if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) {
2732         /* allocate enough space to write dstFilename into it */
2733         free(dstFileNameBuffer);
2734         dfnbCapacity = sfnSize + 20;
2735         dstFileNameBuffer = (char*)malloc(dfnbCapacity);
2736         if (dstFileNameBuffer==NULL)
2737             EXM_THROW(74, "%s : not enough memory for dstFileName",
2738                       strerror(errno));
2739     }
2740 
2741     /* return dst name == src name truncated from suffix */
2742     assert(dstFileNameBuffer != NULL);
2743     dstFileNameEndPos = sfnSize - srcSuffixLen;
2744     if (outDirFilename) {
2745         memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos);
2746         free(outDirFilename);
2747     } else {
2748         memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos);
2749     }
2750 
2751     /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar"
2752      * extension on decompression. Also writes terminating null. */
2753     strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix);
2754     return dstFileNameBuffer;
2755 
2756     /* note : dstFileNameBuffer memory is not going to be free */
2757 }
2758 
2759 int
FIO_decompressMultipleFilenames(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,const char ** srcNamesTable,const char * outMirroredRootDirName,const char * outDirName,const char * outFileName,const char * dictFileName)2760 FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx,
2761                                 FIO_prefs_t* const prefs,
2762                                 const char** srcNamesTable,
2763                                 const char* outMirroredRootDirName,
2764                                 const char* outDirName, const char* outFileName,
2765                                 const char* dictFileName)
2766 {
2767     int status;
2768     int error = 0;
2769     dRess_t ress = FIO_createDResources(prefs, dictFileName);
2770 
2771     if (outFileName) {
2772         if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
2773             FIO_freeDResources(ress);
2774             return 1;
2775         }
2776         if (!prefs->testMode) {
2777             ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
2778             if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName);
2779         }
2780         for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) {
2781             status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]);
2782             if (!status) fCtx->nbFilesProcessed++;
2783             error |= status;
2784         }
2785         if ((!prefs->testMode) && (fclose(ress.dstFile)))
2786             EXM_THROW(72, "Write error : %s : cannot properly close output file",
2787                         strerror(errno));
2788     } else {
2789         if (outMirroredRootDirName)
2790             UTIL_mirrorSourceFilesDirectories(srcNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
2791 
2792         for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) {   /* create dstFileName */
2793             const char* const srcFileName = srcNamesTable[fCtx->currFileIdx];
2794             const char* dstFileName = NULL;
2795             if (outMirroredRootDirName) {
2796                 char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
2797                 if (validMirroredDirName) {
2798                     dstFileName = FIO_determineDstName(srcFileName, validMirroredDirName);
2799                     free(validMirroredDirName);
2800                 } else {
2801                     DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot decompress '%s' into '%s'\n", srcFileName, outMirroredRootDirName);
2802                 }
2803             } else {
2804                 dstFileName = FIO_determineDstName(srcFileName, outDirName);
2805             }
2806             if (dstFileName == NULL) { error=1; continue; }
2807             status = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
2808             if (!status) fCtx->nbFilesProcessed++;
2809             error |= status;
2810         }
2811         if (outDirName)
2812             FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal);
2813     }
2814 
2815     if (fCtx->nbFilesProcessed >= 1  && fCtx->nbFilesTotal > 1 && fCtx->totalBytesOutput != 0)
2816         DISPLAYLEVEL(2, "%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput);
2817 
2818     FIO_freeDResources(ress);
2819     return error;
2820 }
2821 
2822 /* **************************************************************************
2823  *  .zst file info (--list command)
2824  ***************************************************************************/
2825 
2826 typedef struct {
2827     U64 decompressedSize;
2828     U64 compressedSize;
2829     U64 windowSize;
2830     int numActualFrames;
2831     int numSkippableFrames;
2832     int decompUnavailable;
2833     int usesCheck;
2834     U32 nbFiles;
2835 } fileInfo_t;
2836 
2837 typedef enum {
2838   info_success=0,
2839   info_frame_error=1,
2840   info_not_zstd=2,
2841   info_file_error=3,
2842   info_truncated_input=4,
2843 } InfoError;
2844 
2845 #define ERROR_IF(c,n,...) {             \
2846     if (c) {                           \
2847         DISPLAYLEVEL(1, __VA_ARGS__);  \
2848         DISPLAYLEVEL(1, " \n");        \
2849         return n;                      \
2850     }                                  \
2851 }
2852 
2853 static InfoError
FIO_analyzeFrames(fileInfo_t * info,FILE * const srcFile)2854 FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile)
2855 {
2856     /* begin analyzing frame */
2857     for ( ; ; ) {
2858         BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
2859         size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
2860         if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) {
2861             if ( feof(srcFile)
2862               && (numBytesRead == 0)
2863               && (info->compressedSize > 0)
2864               && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
2865                 unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile);
2866                 unsigned long long file_size = (unsigned long long) info->compressedSize;
2867                 ERROR_IF(file_position != file_size, info_truncated_input,
2868                   "Error: seeked to position %llu, which is beyond file size of %llu\n",
2869                   file_position,
2870                   file_size);
2871                 break;  /* correct end of file => success */
2872             }
2873             ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame");
2874             ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames");
2875         }
2876         {   U32 const magicNumber = MEM_readLE32(headerBuffer);
2877             /* Zstandard frame */
2878             if (magicNumber == ZSTD_MAGICNUMBER) {
2879                 ZSTD_frameHeader header;
2880                 U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
2881                 if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR
2882                   || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) {
2883                     info->decompUnavailable = 1;
2884                 } else {
2885                     info->decompressedSize += frameContentSize;
2886                 }
2887                 ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0,
2888                         info_frame_error, "Error: could not decode frame header");
2889                 info->windowSize = header.windowSize;
2890                 /* move to the end of the frame header */
2891                 {   size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
2892                     ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size");
2893                     ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0,
2894                             info_frame_error, "Error: could not move to end of frame header");
2895                 }
2896 
2897                 /* skip all blocks in the frame */
2898                 {   int lastBlock = 0;
2899                     do {
2900                         BYTE blockHeaderBuffer[3];
2901                         ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3,
2902                                 info_frame_error, "Error while reading block header");
2903                         {   U32 const blockHeader = MEM_readLE24(blockHeaderBuffer);
2904                             U32 const blockTypeID = (blockHeader >> 1) & 3;
2905                             U32 const isRLE = (blockTypeID == 1);
2906                             U32 const isWrongBlock = (blockTypeID == 3);
2907                             long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3);
2908                             ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type");
2909                             lastBlock = blockHeader & 1;
2910                             ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0,
2911                                     info_frame_error, "Error: could not skip to end of block");
2912                         }
2913                     } while (lastBlock != 1);
2914                 }
2915 
2916                 /* check if checksum is used */
2917                 {   BYTE const frameHeaderDescriptor = headerBuffer[4];
2918                     int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
2919                     if (contentChecksumFlag) {
2920                         info->usesCheck = 1;
2921                         ERROR_IF(fseek(srcFile, 4, SEEK_CUR) != 0,
2922                                 info_frame_error, "Error: could not skip past checksum");
2923                 }   }
2924                 info->numActualFrames++;
2925             }
2926             /* Skippable frame */
2927             else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
2928                 U32 const frameSize = MEM_readLE32(headerBuffer + 4);
2929                 long const seek = (long)(8 + frameSize - numBytesRead);
2930                 ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0,
2931                         info_frame_error, "Error: could not find end of skippable frame");
2932                 info->numSkippableFrames++;
2933             }
2934             /* unknown content */
2935             else {
2936                 return info_not_zstd;
2937             }
2938         }  /* magic number analysis */
2939     }  /* end analyzing frames */
2940     return info_success;
2941 }
2942 
2943 
2944 static InfoError
getFileInfo_fileConfirmed(fileInfo_t * info,const char * inFileName)2945 getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName)
2946 {
2947     InfoError status;
2948     FILE* const srcFile = FIO_openSrcFile(NULL, inFileName);
2949     ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName);
2950 
2951     info->compressedSize = UTIL_getFileSize(inFileName);
2952     status = FIO_analyzeFrames(info, srcFile);
2953 
2954     fclose(srcFile);
2955     info->nbFiles = 1;
2956     return status;
2957 }
2958 
2959 
2960 /** getFileInfo() :
2961  *  Reads information from file, stores in *info
2962  * @return : InfoError status
2963  */
2964 static InfoError
getFileInfo(fileInfo_t * info,const char * srcFileName)2965 getFileInfo(fileInfo_t* info, const char* srcFileName)
2966 {
2967     ERROR_IF(!UTIL_isRegularFile(srcFileName),
2968             info_file_error, "Error : %s is not a file", srcFileName);
2969     return getFileInfo_fileConfirmed(info, srcFileName);
2970 }
2971 
2972 
2973 static void
displayInfo(const char * inFileName,const fileInfo_t * info,int displayLevel)2974 displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel)
2975 {
2976     unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB);
2977     const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB";
2978     double const windowSizeUnit = (double)info->windowSize / unit;
2979     double const compressedSizeUnit = (double)info->compressedSize / unit;
2980     double const decompressedSizeUnit = (double)info->decompressedSize / unit;
2981     double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/(double)info->compressedSize;
2982     const char* const checkString = (info->usesCheck ? "XXH64" : "None");
2983     if (displayLevel <= 2) {
2984         if (!info->decompUnavailable) {
2985             DISPLAYOUT("%6d  %5d  %7.2f %2s  %9.2f %2s  %5.3f  %5s  %s\n",
2986                     info->numSkippableFrames + info->numActualFrames,
2987                     info->numSkippableFrames,
2988                     compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr,
2989                     ratio, checkString, inFileName);
2990         } else {
2991             DISPLAYOUT("%6d  %5d  %7.2f %2s                       %5s  %s\n",
2992                     info->numSkippableFrames + info->numActualFrames,
2993                     info->numSkippableFrames,
2994                     compressedSizeUnit, unitStr,
2995                     checkString, inFileName);
2996         }
2997     } else {
2998         DISPLAYOUT("%s \n", inFileName);
2999         DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
3000         if (info->numSkippableFrames)
3001             DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
3002         DISPLAYOUT("Window Size: %.2f %2s (%llu B)\n",
3003                    windowSizeUnit, unitStr,
3004                    (unsigned long long)info->windowSize);
3005         DISPLAYOUT("Compressed Size: %.2f %2s (%llu B)\n",
3006                     compressedSizeUnit, unitStr,
3007                     (unsigned long long)info->compressedSize);
3008         if (!info->decompUnavailable) {
3009             DISPLAYOUT("Decompressed Size: %.2f %2s (%llu B)\n",
3010                     decompressedSizeUnit, unitStr,
3011                     (unsigned long long)info->decompressedSize);
3012             DISPLAYOUT("Ratio: %.4f\n", ratio);
3013         }
3014         DISPLAYOUT("Check: %s\n", checkString);
3015         DISPLAYOUT("\n");
3016     }
3017 }
3018 
FIO_addFInfo(fileInfo_t fi1,fileInfo_t fi2)3019 static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2)
3020 {
3021     fileInfo_t total;
3022     memset(&total, 0, sizeof(total));
3023     total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames;
3024     total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames;
3025     total.compressedSize = fi1.compressedSize + fi2.compressedSize;
3026     total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize;
3027     total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable;
3028     total.usesCheck = fi1.usesCheck & fi2.usesCheck;
3029     total.nbFiles = fi1.nbFiles + fi2.nbFiles;
3030     return total;
3031 }
3032 
3033 static int
FIO_listFile(fileInfo_t * total,const char * inFileName,int displayLevel)3034 FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel)
3035 {
3036     fileInfo_t info;
3037     memset(&info, 0, sizeof(info));
3038     {   InfoError const error = getFileInfo(&info, inFileName);
3039         switch (error) {
3040             case info_frame_error:
3041                 /* display error, but provide output */
3042                 DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName);
3043                 break;
3044             case info_not_zstd:
3045                 DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName);
3046                 if (displayLevel > 2) DISPLAYOUT("\n");
3047                 return 1;
3048             case info_file_error:
3049                 /* error occurred while opening the file */
3050                 if (displayLevel > 2) DISPLAYOUT("\n");
3051                 return 1;
3052             case info_truncated_input:
3053                 DISPLAYOUT("File \"%s\" is truncated \n", inFileName);
3054                 if (displayLevel > 2) DISPLAYOUT("\n");
3055                 return 1;
3056             case info_success:
3057             default:
3058                 break;
3059         }
3060 
3061         displayInfo(inFileName, &info, displayLevel);
3062         *total = FIO_addFInfo(*total, info);
3063         assert(error == info_success || error == info_frame_error);
3064         return (int)error;
3065     }
3066 }
3067 
FIO_listMultipleFiles(unsigned numFiles,const char ** filenameTable,int displayLevel)3068 int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel)
3069 {
3070     /* ensure no specified input is stdin (needs fseek() capability) */
3071     {   unsigned u;
3072         for (u=0; u<numFiles;u++) {
3073             ERROR_IF(!strcmp (filenameTable[u], stdinmark),
3074                     1, "zstd: --list does not support reading from standard input");
3075     }   }
3076 
3077     if (numFiles == 0) {
3078         if (!IS_CONSOLE(stdin)) {
3079             DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n");
3080         }
3081         DISPLAYLEVEL(1, "No files given \n");
3082         return 1;
3083     }
3084 
3085     if (displayLevel <= 2) {
3086         DISPLAYOUT("Frames  Skips  Compressed  Uncompressed  Ratio  Check  Filename\n");
3087     }
3088     {   int error = 0;
3089         fileInfo_t total;
3090         memset(&total, 0, sizeof(total));
3091         total.usesCheck = 1;
3092         /* --list each file, and check for any error */
3093         {   unsigned u;
3094             for (u=0; u<numFiles;u++) {
3095                 error |= FIO_listFile(&total, filenameTable[u], displayLevel);
3096         }   }
3097         if (numFiles > 1 && displayLevel <= 2) {   /* display total */
3098             unsigned const unit = total.compressedSize < (1 MB) ? (1 KB) : (1 MB);
3099             const char* const unitStr = total.compressedSize < (1 MB) ? "KB" : "MB";
3100             double const compressedSizeUnit = (double)total.compressedSize / unit;
3101             double const decompressedSizeUnit = (double)total.decompressedSize / unit;
3102             double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/(double)total.compressedSize;
3103             const char* const checkString = (total.usesCheck ? "XXH64" : "");
3104             DISPLAYOUT("----------------------------------------------------------------- \n");
3105             if (total.decompUnavailable) {
3106                 DISPLAYOUT("%6d  %5d  %7.2f %2s                       %5s  %u files\n",
3107                         total.numSkippableFrames + total.numActualFrames,
3108                         total.numSkippableFrames,
3109                         compressedSizeUnit, unitStr,
3110                         checkString, (unsigned)total.nbFiles);
3111             } else {
3112                 DISPLAYOUT("%6d  %5d  %7.2f %2s  %9.2f %2s  %5.3f  %5s  %u files\n",
3113                         total.numSkippableFrames + total.numActualFrames,
3114                         total.numSkippableFrames,
3115                         compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr,
3116                         ratio, checkString, (unsigned)total.nbFiles);
3117         }   }
3118         return error;
3119     }
3120 }
3121 
3122 
3123 #endif /* #ifndef ZSTD_NODECOMPRESS */
3124