1 /*
2  * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 
12 /* *************************************
13 *  Compiler Options
14 ***************************************/
15 #ifdef _MSC_VER   /* Visual */
16 #  pragma warning(disable : 4127)  /* disable: C4127: conditional expression is constant */
17 #  pragma warning(disable : 4204)  /* non-constant aggregate initializer */
18 #endif
19 #if defined(__MINGW32__) && !defined(_POSIX_SOURCE)
20 #  define _POSIX_SOURCE 1          /* disable %llu warnings with MinGW on Windows */
21 #endif
22 
23 /*-*************************************
24 *  Includes
25 ***************************************/
26 #include "platform.h"   /* Large Files support, SET_BINARY_MODE */
27 #include "util.h"       /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */
28 #include <stdio.h>      /* fprintf, fopen, fread, _fileno, stdin, stdout */
29 #include <stdlib.h>     /* malloc, free */
30 #include <string.h>     /* strcmp, strlen */
31 #include <assert.h>
32 #include <errno.h>      /* errno */
33 #include <limits.h>     /* INT_MAX */
34 #include <signal.h>
35 #include "timefn.h"     /* UTIL_getTime, UTIL_clockSpanMicro */
36 
37 #if defined (_MSC_VER)
38 #  include <sys/stat.h>
39 #  include <io.h>
40 #endif
41 
42 #include "../lib/common/mem.h"     /* U32, U64 */
43 #include "fileio.h"
44 
45 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
46 #include "../lib/zstd.h"
47 #include "../lib/common/zstd_errors.h"  /* ZSTD_error_frameParameter_windowTooLarge */
48 
49 #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
50 #  include <zlib.h>
51 #  if !defined(z_const)
52 #    define z_const
53 #  endif
54 #endif
55 
56 #if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
57 #  include <lzma.h>
58 #endif
59 
60 #define LZ4_MAGICNUMBER 0x184D2204
61 #if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
62 #  define LZ4F_ENABLE_OBSOLETE_ENUMS
63 #  include <lz4frame.h>
64 #  include <lz4.h>
65 #endif
66 
67 
68 /*-*************************************
69 *  Constants
70 ***************************************/
71 #define ADAPT_WINDOWLOG_DEFAULT 23   /* 8 MB */
72 #define DICTSIZE_MAX (32 MB)   /* protection against large input (attack scenario) */
73 
74 #define FNSPACE 30
75 
76 /*-*************************************
77 *  Macros
78 ***************************************/
79 #define KB *(1 <<10)
80 #define MB *(1 <<20)
81 #define GB *(1U<<30)
82 #undef MAX
83 #define MAX(a,b) ((a)>(b) ? (a) : (b))
84 
85 struct FIO_display_prefs_s {
86     int displayLevel;   /* 0 : no display;  1: errors;  2: + result + interaction + warnings;  3: + progression;  4: + information */
87     U32 noProgress;
88 };
89 
90 static FIO_display_prefs_t g_display_prefs = {2, 0};
91 
92 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
93 #define DISPLAYOUT(...)      fprintf(stdout, __VA_ARGS__)
94 #define DISPLAYLEVEL(l, ...) { if (g_display_prefs.displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
95 
96 static const U64 g_refreshRate = SEC_TO_MICRO / 6;
97 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
98 
99 #define READY_FOR_UPDATE() (!g_display_prefs.noProgress && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate)
100 #define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); }
101 #define DISPLAYUPDATE(l, ...) {                              \
102         if (g_display_prefs.displayLevel>=l && !g_display_prefs.noProgress) { \
103             if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \
104                 DELAY_NEXT_UPDATE();                         \
105                 DISPLAY(__VA_ARGS__);                        \
106                 if (g_display_prefs.displayLevel>=4) fflush(stderr);       \
107     }   }   }
108 
109 #undef MIN  /* in case it would be already defined */
110 #define MIN(a,b)    ((a) < (b) ? (a) : (b))
111 
112 
113 #define EXM_THROW(error, ...)                                             \
114 {                                                                         \
115     DISPLAYLEVEL(1, "zstd: ");                                            \
116     DISPLAYLEVEL(5, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \
117     DISPLAYLEVEL(1, "error %i : ", error);                                \
118     DISPLAYLEVEL(1, __VA_ARGS__);                                         \
119     DISPLAYLEVEL(1, " \n");                                               \
120     exit(error);                                                          \
121 }
122 
123 #define CHECK_V(v, f)                                \
124     v = f;                                           \
125     if (ZSTD_isError(v)) {                           \
126         DISPLAYLEVEL(5, "%s \n", #f);                \
127         EXM_THROW(11, "%s", ZSTD_getErrorName(v));   \
128     }
129 #define CHECK(f) { size_t err; CHECK_V(err, f); }
130 
131 
132 /*-************************************
133 *  Signal (Ctrl-C trapping)
134 **************************************/
135 static const char* g_artefact = NULL;
INThandler(int sig)136 static void INThandler(int sig)
137 {
138     assert(sig==SIGINT); (void)sig;
139 #if !defined(_MSC_VER)
140     signal(sig, SIG_IGN);  /* this invocation generates a buggy warning in Visual Studio */
141 #endif
142     if (g_artefact) {
143         assert(UTIL_isRegularFile(g_artefact));
144         remove(g_artefact);
145     }
146     DISPLAY("\n");
147     exit(2);
148 }
addHandler(char const * dstFileName)149 static void addHandler(char const* dstFileName)
150 {
151     if (UTIL_isRegularFile(dstFileName)) {
152         g_artefact = dstFileName;
153         signal(SIGINT, INThandler);
154     } else {
155         g_artefact = NULL;
156     }
157 }
158 /* Idempotent */
clearHandler(void)159 static void clearHandler(void)
160 {
161     if (g_artefact) signal(SIGINT, SIG_DFL);
162     g_artefact = NULL;
163 }
164 
165 
166 /*-*********************************************************
167 *  Termination signal trapping (Print debug stack trace)
168 ***********************************************************/
169 #if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */
170 #  if (__has_feature(address_sanitizer))
171 #    define BACKTRACE_ENABLE 0
172 #  endif /* __has_feature(address_sanitizer) */
173 #elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */
174 #  define BACKTRACE_ENABLE 0
175 #endif
176 
177 #if !defined(BACKTRACE_ENABLE)
178 /* automatic detector : backtrace enabled by default on linux+glibc and osx */
179 #  if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \
180      || (defined(__APPLE__) && defined(__MACH__))
181 #    define BACKTRACE_ENABLE 1
182 #  else
183 #    define BACKTRACE_ENABLE 0
184 #  endif
185 #endif
186 
187 /* note : after this point, BACKTRACE_ENABLE is necessarily defined */
188 
189 
190 #if BACKTRACE_ENABLE
191 
192 #include <execinfo.h>   /* backtrace, backtrace_symbols */
193 
194 #define MAX_STACK_FRAMES    50
195 
ABRThandler(int sig)196 static void ABRThandler(int sig) {
197     const char* name;
198     void* addrlist[MAX_STACK_FRAMES];
199     char** symbollist;
200     int addrlen, i;
201 
202     switch (sig) {
203         case SIGABRT: name = "SIGABRT"; break;
204         case SIGFPE: name = "SIGFPE"; break;
205         case SIGILL: name = "SIGILL"; break;
206         case SIGINT: name = "SIGINT"; break;
207         case SIGSEGV: name = "SIGSEGV"; break;
208         default: name = "UNKNOWN";
209     }
210 
211     DISPLAY("Caught %s signal, printing stack:\n", name);
212     /* Retrieve current stack addresses. */
213     addrlen = backtrace(addrlist, MAX_STACK_FRAMES);
214     if (addrlen == 0) {
215         DISPLAY("\n");
216         return;
217     }
218     /* Create readable strings to each frame. */
219     symbollist = backtrace_symbols(addrlist, addrlen);
220     /* Print the stack trace, excluding calls handling the signal. */
221     for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) {
222         DISPLAY("%s\n", symbollist[i]);
223     }
224     free(symbollist);
225     /* Reset and raise the signal so default handler runs. */
226     signal(sig, SIG_DFL);
227     raise(sig);
228 }
229 #endif
230 
FIO_addAbortHandler()231 void FIO_addAbortHandler()
232 {
233 #if BACKTRACE_ENABLE
234     signal(SIGABRT, ABRThandler);
235     signal(SIGFPE, ABRThandler);
236     signal(SIGILL, ABRThandler);
237     signal(SIGSEGV, ABRThandler);
238     signal(SIGBUS, ABRThandler);
239 #endif
240 }
241 
242 
243 /*-************************************************************
244 * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
245 ***************************************************************/
246 #if defined(_MSC_VER) && _MSC_VER >= 1400
247 #   define LONG_SEEK _fseeki64
248 #   define LONG_TELL _ftelli64
249 #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
250 #  define LONG_SEEK fseeko
251 #  define LONG_TELL ftello
252 #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
253 #   define LONG_SEEK fseeko64
254 #   define LONG_TELL ftello64
255 #elif defined(_WIN32) && !defined(__DJGPP__)
256 #   include <windows.h>
LONG_SEEK(FILE * file,__int64 offset,int origin)257     static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
258         LARGE_INTEGER off;
259         DWORD method;
260         off.QuadPart = offset;
261         if (origin == SEEK_END)
262             method = FILE_END;
263         else if (origin == SEEK_CUR)
264             method = FILE_CURRENT;
265         else
266             method = FILE_BEGIN;
267 
268         if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
269             return 0;
270         else
271             return -1;
272     }
LONG_TELL(FILE * file)273     static __int64 LONG_TELL(FILE* file) {
274         LARGE_INTEGER off, newOff;
275         off.QuadPart = 0;
276         newOff.QuadPart = 0;
277         SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, &newOff, FILE_CURRENT);
278         return newOff.QuadPart;
279     }
280 #else
281 #   define LONG_SEEK fseek
282 #   define LONG_TELL ftell
283 #endif
284 
285 
286 /*-*************************************
287 *  Parameters: FIO_prefs_t
288 ***************************************/
289 
290 /* typedef'd to FIO_prefs_t within fileio.h */
291 struct FIO_prefs_s {
292 
293     /* Algorithm preferences */
294     FIO_compressionType_t compressionType;
295     U32 sparseFileSupport;   /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */
296     int dictIDFlag;
297     int checksumFlag;
298     int blockSize;
299     int overlapLog;
300     U32 adaptiveMode;
301     int rsyncable;
302     int minAdaptLevel;
303     int maxAdaptLevel;
304     int ldmFlag;
305     int ldmHashLog;
306     int ldmMinMatch;
307     int ldmBucketSizeLog;
308     int ldmHashRateLog;
309     size_t streamSrcSize;
310     size_t targetCBlockSize;
311     int srcSizeHint;
312     int testMode;
313     ZSTD_literalCompressionMode_e literalCompressionMode;
314 
315     /* IO preferences */
316     U32 removeSrcFile;
317     U32 overwrite;
318 
319     /* Computation resources preferences */
320     unsigned memLimit;
321     int nbWorkers;
322 
323     int excludeCompressedFiles;
324     int patchFromMode;
325     int contentSize;
326 };
327 
328 /*-*************************************
329 *  Parameters: FIO_ctx_t
330 ***************************************/
331 
332 /* typedef'd to FIO_ctx_t within fileio.h */
333 struct FIO_ctx_s {
334 
335     /* file i/o info */
336     int nbFilesTotal;
337     int hasStdinInput;
338     int hasStdoutOutput;
339 
340     /* file i/o state */
341     int currFileIdx;
342     int nbFilesProcessed;
343     size_t totalBytesInput;
344     size_t totalBytesOutput;
345 };
346 
347 
348 /*-*************************************
349 *  Parameters: Initialization
350 ***************************************/
351 
352 #define FIO_OVERLAP_LOG_NOTSET 9999
353 #define FIO_LDM_PARAM_NOTSET 9999
354 
355 
FIO_createPreferences(void)356 FIO_prefs_t* FIO_createPreferences(void)
357 {
358     FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t));
359     if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
360 
361     ret->compressionType = FIO_zstdCompression;
362     ret->overwrite = 0;
363     ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
364     ret->dictIDFlag = 1;
365     ret->checksumFlag = 1;
366     ret->removeSrcFile = 0;
367     ret->memLimit = 0;
368     ret->nbWorkers = 1;
369     ret->blockSize = 0;
370     ret->overlapLog = FIO_OVERLAP_LOG_NOTSET;
371     ret->adaptiveMode = 0;
372     ret->rsyncable = 0;
373     ret->minAdaptLevel = -50;   /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */
374     ret->maxAdaptLevel = 22;   /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */
375     ret->ldmFlag = 0;
376     ret->ldmHashLog = 0;
377     ret->ldmMinMatch = 0;
378     ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
379     ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
380     ret->streamSrcSize = 0;
381     ret->targetCBlockSize = 0;
382     ret->srcSizeHint = 0;
383     ret->testMode = 0;
384     ret->literalCompressionMode = ZSTD_lcm_auto;
385     ret->excludeCompressedFiles = 0;
386     return ret;
387 }
388 
FIO_createContext(void)389 FIO_ctx_t* FIO_createContext(void)
390 {
391     FIO_ctx_t* const ret = (FIO_ctx_t*)malloc(sizeof(FIO_ctx_t));
392     if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
393 
394     ret->currFileIdx = 0;
395     ret->hasStdinInput = 0;
396     ret->hasStdoutOutput = 0;
397     ret->nbFilesTotal = 1;
398     ret->nbFilesProcessed = 0;
399     ret->totalBytesInput = 0;
400     ret->totalBytesOutput = 0;
401     return ret;
402 }
403 
FIO_freePreferences(FIO_prefs_t * const prefs)404 void FIO_freePreferences(FIO_prefs_t* const prefs)
405 {
406     free(prefs);
407 }
408 
FIO_freeContext(FIO_ctx_t * const fCtx)409 void FIO_freeContext(FIO_ctx_t* const fCtx)
410 {
411     free(fCtx);
412 }
413 
414 
415 /*-*************************************
416 *  Parameters: Display Options
417 ***************************************/
418 
FIO_setNotificationLevel(int level)419 void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; }
420 
FIO_setNoProgress(unsigned noProgress)421 void FIO_setNoProgress(unsigned noProgress) { g_display_prefs.noProgress = noProgress; }
422 
423 
424 /*-*************************************
425 *  Parameters: Setters
426 ***************************************/
427 
428 /* FIO_prefs_t functions */
429 
FIO_setCompressionType(FIO_prefs_t * const prefs,FIO_compressionType_t compressionType)430 void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; }
431 
FIO_overwriteMode(FIO_prefs_t * const prefs)432 void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; }
433 
FIO_setSparseWrite(FIO_prefs_t * const prefs,unsigned sparse)434 void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse) { prefs->sparseFileSupport = sparse; }
435 
FIO_setDictIDFlag(FIO_prefs_t * const prefs,int dictIDFlag)436 void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; }
437 
FIO_setChecksumFlag(FIO_prefs_t * const prefs,int checksumFlag)438 void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; }
439 
FIO_setRemoveSrcFile(FIO_prefs_t * const prefs,unsigned flag)440 void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag) { prefs->removeSrcFile = (flag>0); }
441 
FIO_setMemLimit(FIO_prefs_t * const prefs,unsigned memLimit)442 void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; }
443 
FIO_setNbWorkers(FIO_prefs_t * const prefs,int nbWorkers)444 void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) {
445 #ifndef ZSTD_MULTITHREAD
446     if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n");
447 #endif
448     prefs->nbWorkers = nbWorkers;
449 }
450 
FIO_setExcludeCompressedFile(FIO_prefs_t * const prefs,int excludeCompressedFiles)451 void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
452 
FIO_setBlockSize(FIO_prefs_t * const prefs,int blockSize)453 void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
454     if (blockSize && prefs->nbWorkers==0)
455         DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
456     prefs->blockSize = blockSize;
457 }
458 
FIO_setOverlapLog(FIO_prefs_t * const prefs,int overlapLog)459 void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){
460     if (overlapLog && prefs->nbWorkers==0)
461         DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
462     prefs->overlapLog = overlapLog;
463 }
464 
FIO_setAdaptiveMode(FIO_prefs_t * const prefs,unsigned adapt)465 void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt) {
466     if ((adapt>0) && (prefs->nbWorkers==0))
467         EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n");
468     prefs->adaptiveMode = adapt;
469 }
470 
FIO_setRsyncable(FIO_prefs_t * const prefs,int rsyncable)471 void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
472     if ((rsyncable>0) && (prefs->nbWorkers==0))
473         EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n");
474     prefs->rsyncable = rsyncable;
475 }
476 
FIO_setStreamSrcSize(FIO_prefs_t * const prefs,size_t streamSrcSize)477 void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
478     prefs->streamSrcSize = streamSrcSize;
479 }
480 
FIO_setTargetCBlockSize(FIO_prefs_t * const prefs,size_t targetCBlockSize)481 void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
482     prefs->targetCBlockSize = targetCBlockSize;
483 }
484 
FIO_setSrcSizeHint(FIO_prefs_t * const prefs,size_t srcSizeHint)485 void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
486     prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint);
487 }
488 
FIO_setTestMode(FIO_prefs_t * const prefs,int testMode)489 void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) {
490     prefs->testMode = (testMode!=0);
491 }
492 
FIO_setLiteralCompressionMode(FIO_prefs_t * const prefs,ZSTD_literalCompressionMode_e mode)493 void FIO_setLiteralCompressionMode(
494         FIO_prefs_t* const prefs,
495         ZSTD_literalCompressionMode_e mode) {
496     prefs->literalCompressionMode = mode;
497 }
498 
FIO_setAdaptMin(FIO_prefs_t * const prefs,int minCLevel)499 void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel)
500 {
501 #ifndef ZSTD_NOCOMPRESS
502     assert(minCLevel >= ZSTD_minCLevel());
503 #endif
504     prefs->minAdaptLevel = minCLevel;
505 }
506 
FIO_setAdaptMax(FIO_prefs_t * const prefs,int maxCLevel)507 void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel)
508 {
509     prefs->maxAdaptLevel = maxCLevel;
510 }
511 
FIO_setLdmFlag(FIO_prefs_t * const prefs,unsigned ldmFlag)512 void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) {
513     prefs->ldmFlag = (ldmFlag>0);
514 }
515 
FIO_setLdmHashLog(FIO_prefs_t * const prefs,int ldmHashLog)516 void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) {
517     prefs->ldmHashLog = ldmHashLog;
518 }
519 
FIO_setLdmMinMatch(FIO_prefs_t * const prefs,int ldmMinMatch)520 void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) {
521     prefs->ldmMinMatch = ldmMinMatch;
522 }
523 
FIO_setLdmBucketSizeLog(FIO_prefs_t * const prefs,int ldmBucketSizeLog)524 void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) {
525     prefs->ldmBucketSizeLog = ldmBucketSizeLog;
526 }
527 
528 
FIO_setLdmHashRateLog(FIO_prefs_t * const prefs,int ldmHashRateLog)529 void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) {
530     prefs->ldmHashRateLog = ldmHashRateLog;
531 }
532 
FIO_setPatchFromMode(FIO_prefs_t * const prefs,int value)533 void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value)
534 {
535     prefs->patchFromMode = value != 0;
536 }
537 
FIO_setContentSize(FIO_prefs_t * const prefs,int value)538 void FIO_setContentSize(FIO_prefs_t* const prefs, int value)
539 {
540     prefs->contentSize = value != 0;
541 }
542 
543 /* FIO_ctx_t functions */
544 
FIO_setHasStdoutOutput(FIO_ctx_t * const fCtx,int value)545 void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) {
546     fCtx->hasStdoutOutput = value;
547 }
548 
FIO_setNbFilesTotal(FIO_ctx_t * const fCtx,int value)549 void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value)
550 {
551     fCtx->nbFilesTotal = value;
552 }
553 
FIO_determineHasStdinInput(FIO_ctx_t * const fCtx,const FileNamesTable * const filenames)554 void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames) {
555     size_t i = 0;
556     for ( ; i < filenames->tableSize; ++i) {
557         if (!strcmp(stdinmark, filenames->fileNames[i])) {
558             fCtx->hasStdinInput = 1;
559             return;
560         }
561     }
562 }
563 
564 /*-*************************************
565 *  Functions
566 ***************************************/
567 /** FIO_removeFile() :
568  * @result : Unlink `fileName`, even if it's read-only */
FIO_removeFile(const char * path)569 static int FIO_removeFile(const char* path)
570 {
571     stat_t statbuf;
572     if (!UTIL_stat(path, &statbuf)) {
573         DISPLAYLEVEL(2, "zstd: Failed to stat %s while trying to remove it\n", path);
574         return 0;
575     }
576     if (!UTIL_isRegularFileStat(&statbuf)) {
577         DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path);
578         return 0;
579     }
580 #if defined(_WIN32) || defined(WIN32)
581     /* windows doesn't allow remove read-only files,
582      * so try to make it writable first */
583     if (!(statbuf.st_mode & _S_IWRITE)) {
584         UTIL_chmod(path, &statbuf, _S_IWRITE);
585     }
586 #endif
587     return remove(path);
588 }
589 
590 /** FIO_openSrcFile() :
591  *  condition : `srcFileName` must be non-NULL.
592  * @result : FILE* to `srcFileName`, or NULL if it fails */
FIO_openSrcFile(const char * srcFileName)593 static FILE* FIO_openSrcFile(const char* srcFileName)
594 {
595     stat_t statbuf;
596     assert(srcFileName != NULL);
597     if (!strcmp (srcFileName, stdinmark)) {
598         DISPLAYLEVEL(4,"Using stdin for input \n");
599         SET_BINARY_MODE(stdin);
600         return stdin;
601     }
602 
603     if (!UTIL_stat(srcFileName, &statbuf)) {
604         DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n",
605                         srcFileName, strerror(errno));
606         return NULL;
607     }
608 
609     if (!UTIL_isRegularFileStat(&statbuf)
610      && !UTIL_isFIFOStat(&statbuf)
611     ) {
612         DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
613                         srcFileName);
614         return NULL;
615     }
616 
617     {   FILE* const f = fopen(srcFileName, "rb");
618         if (f == NULL)
619             DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
620         return f;
621     }
622 }
623 
624 /** FIO_openDstFile() :
625  *  condition : `dstFileName` must be non-NULL.
626  * @result : FILE* to `dstFileName`, or NULL if it fails */
627 static FILE*
FIO_openDstFile(FIO_ctx_t * fCtx,FIO_prefs_t * const prefs,const char * srcFileName,const char * dstFileName)628 FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
629                 const char* srcFileName, const char* dstFileName)
630 {
631     if (prefs->testMode) return NULL;  /* do not open file in test mode */
632 
633     assert(dstFileName != NULL);
634     if (!strcmp (dstFileName, stdoutmark)) {
635         DISPLAYLEVEL(4,"Using stdout for output \n");
636         SET_BINARY_MODE(stdout);
637         if (prefs->sparseFileSupport == 1) {
638             prefs->sparseFileSupport = 0;
639             DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
640         }
641         return stdout;
642     }
643 
644     /* ensure dst is not the same as src */
645     if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) {
646         DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n");
647         return NULL;
648     }
649 
650     if (prefs->sparseFileSupport == 1) {
651         prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
652     }
653 
654     if (UTIL_isRegularFile(dstFileName)) {
655         /* Check if destination file already exists */
656         FILE* const fCheck = fopen( dstFileName, "rb" );
657 #if !defined(_WIN32)
658         /* this test does not work on Windows :
659          * `NUL` and `nul` are detected as regular files */
660         if (!strcmp(dstFileName, nulmark)) {
661             EXM_THROW(40, "%s is unexpectedly categorized as a regular file",
662                         dstFileName);
663         }
664 #endif
665         if (fCheck != NULL) {  /* dst file exists, authorization prompt */
666             fclose(fCheck);
667             if (!prefs->overwrite) {
668                 if (g_display_prefs.displayLevel <= 1) {
669                     /* No interaction possible */
670                     DISPLAY("zstd: %s already exists; not overwritten  \n",
671                             dstFileName);
672                     return NULL;
673                 }
674                 DISPLAY("zstd: %s already exists; ", dstFileName);
675                 if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten  \n", "yY", fCtx->hasStdinInput))
676                     return NULL;
677             }
678             /* need to unlink */
679             FIO_removeFile(dstFileName);
680     }   }
681 
682     {   const int old_umask = UTIL_umask(0177); /* u-x,go-rwx */
683         FILE* const f = fopen( dstFileName, "wb" );
684         UTIL_umask(old_umask);
685         if (f == NULL) {
686             DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
687         }
688         return f;
689     }
690 }
691 
692 /*! FIO_createDictBuffer() :
693  *  creates a buffer, pointed by `*bufferPtr`,
694  *  loads `filename` content into it, up to DICTSIZE_MAX bytes.
695  * @return : loaded size
696  *  if fileName==NULL, returns 0 and a NULL pointer
697  */
FIO_createDictBuffer(void ** bufferPtr,const char * fileName,FIO_prefs_t * const prefs)698 static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_prefs_t* const prefs)
699 {
700     FILE* fileHandle;
701     U64 fileSize;
702 
703     assert(bufferPtr != NULL);
704     *bufferPtr = NULL;
705     if (fileName == NULL) return 0;
706 
707     DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
708     fileHandle = fopen(fileName, "rb");
709     if (fileHandle==NULL) EXM_THROW(31, "%s: %s", fileName, strerror(errno));
710 
711     fileSize = UTIL_getFileSize(fileName);
712     {
713         size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
714         if (fileSize >  dictSizeMax) {
715             EXM_THROW(32, "Dictionary file %s is too large (> %u bytes)",
716                             fileName,  (unsigned)dictSizeMax);   /* avoid extreme cases */
717         }
718     }
719     *bufferPtr = malloc((size_t)fileSize);
720     if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
721     {   size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
722         if (readSize != fileSize)
723             EXM_THROW(35, "Error reading dictionary file %s : %s",
724                     fileName, strerror(errno));
725     }
726     fclose(fileHandle);
727     return (size_t)fileSize;
728 }
729 
730 
731 
732 /* FIO_checkFilenameCollisions() :
733  * Checks for and warns if there are any files that would have the same output path
734  */
FIO_checkFilenameCollisions(const char ** filenameTable,unsigned nbFiles)735 int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) {
736     const char **filenameTableSorted, *prevElem, *filename;
737     unsigned u;
738 
739     filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles);
740     if (!filenameTableSorted) {
741         DISPLAY("Unable to malloc new str array, not checking for name collisions\n");
742         return 1;
743     }
744 
745     for (u = 0; u < nbFiles; ++u) {
746         filename = strrchr(filenameTable[u], PATH_SEP);
747         if (filename == NULL) {
748             filenameTableSorted[u] = filenameTable[u];
749         } else {
750             filenameTableSorted[u] = filename+1;
751         }
752     }
753 
754     qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr);
755     prevElem = filenameTableSorted[0];
756     for (u = 1; u < nbFiles; ++u) {
757         if (strcmp(prevElem, filenameTableSorted[u]) == 0) {
758             DISPLAY("WARNING: Two files have same filename: %s\n", prevElem);
759         }
760         prevElem = filenameTableSorted[u];
761     }
762 
763     free((void*)filenameTableSorted);
764     return 0;
765 }
766 
767 static const char*
extractFilename(const char * path,char separator)768 extractFilename(const char* path, char separator)
769 {
770     const char* search = strrchr(path, separator);
771     if (search == NULL) return path;
772     return search+1;
773 }
774 
775 /* FIO_createFilename_fromOutDir() :
776  * Takes a source file name and specified output directory, and
777  * allocates memory for and returns a pointer to final path.
778  * This function never returns an error (it may abort() in case of pb)
779  */
780 static char*
FIO_createFilename_fromOutDir(const char * path,const char * outDirName,const size_t suffixLen)781 FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen)
782 {
783     const char* filenameStart;
784     char separator;
785     char* result;
786 
787 #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
788     separator = '\\';
789 #else
790     separator = '/';
791 #endif
792 
793     filenameStart = extractFilename(path, separator);
794 #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
795     filenameStart = extractFilename(filenameStart, '/');  /* sometimes, '/' separator is also used on Windows (mingw+msys2) */
796 #endif
797 
798     result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1);
799     if (!result) {
800         EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno));
801     }
802 
803     memcpy(result, outDirName, strlen(outDirName));
804     if (outDirName[strlen(outDirName)-1] == separator) {
805         memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart));
806     } else {
807         memcpy(result + strlen(outDirName), &separator, 1);
808         memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart));
809     }
810 
811     return result;
812 }
813 
814 /* FIO_highbit64() :
815  * gives position of highest bit.
816  * note : only works for v > 0 !
817  */
FIO_highbit64(unsigned long long v)818 static unsigned FIO_highbit64(unsigned long long v)
819 {
820     unsigned count = 0;
821     assert(v != 0);
822     v >>= 1;
823     while (v) { v >>= 1; count++; }
824     return count;
825 }
826 
FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t * const prefs,unsigned long long const dictSize,unsigned long long const maxSrcFileSize)827 static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs,
828                                     unsigned long long const dictSize,
829                                     unsigned long long const maxSrcFileSize)
830 {
831     unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize));
832     unsigned const maxWindowSize = (1U << ZSTD_WINDOWLOG_MAX);
833     if (maxSize == UTIL_FILESIZE_UNKNOWN)
834         EXM_THROW(42, "Using --patch-from with stdin requires --stream-size");
835     assert(maxSize != UTIL_FILESIZE_UNKNOWN);
836     if (maxSize > maxWindowSize)
837         EXM_THROW(42, "Can't handle files larger than %u GB\n", maxWindowSize/(1 GB));
838     FIO_setMemLimit(prefs, (unsigned)maxSize);
839 }
840 
841 /* FIO_removeMultiFilesWarning() :
842  * Returns 1 if the console should abort, 0 if console should proceed.
843  * This function handles logic when processing multiple files with -o, displaying the appropriate warnings/prompts.
844  *
845  * If -f is specified, or there is just 1 file, zstd will always proceed as usual.
846  * If --rm is specified, there will be a prompt asking for user confirmation.
847  *         If -f is specified with --rm, zstd will proceed as usual
848  *         If -q is specified with --rm, zstd will abort pre-emptively
849  *         If neither flag is specified, zstd will prompt the user for confirmation to proceed.
850  * If --rm is not specified, then zstd will print a warning to the user (which can be silenced with -q).
851  * However, if the output is stdout, we will always abort rather than displaying the warning prompt.
852  */
FIO_removeMultiFilesWarning(FIO_ctx_t * const fCtx,const FIO_prefs_t * const prefs,const char * outFileName,int displayLevelCutoff)853 static int FIO_removeMultiFilesWarning(FIO_ctx_t* const fCtx, const FIO_prefs_t* const prefs, const char* outFileName, int displayLevelCutoff)
854 {
855     int error = 0;
856     if (fCtx->nbFilesTotal > 1 && !prefs->overwrite) {
857         if (g_display_prefs.displayLevel <= displayLevelCutoff) {
858             if (prefs->removeSrcFile) {
859                 DISPLAYLEVEL(1, "zstd: Aborting... not deleting files and processing into dst: %s", outFileName);
860                 error =  1;
861             }
862         } else {
863             if (!strcmp(outFileName, stdoutmark)) {
864                 DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. ");
865             } else {
866                 DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s ", outFileName);
867             }
868             DISPLAYLEVEL(2, "\nThe concatenated output CANNOT regenerate the original directory tree. ")
869             if (prefs->removeSrcFile) {
870                 if (fCtx->hasStdoutOutput) {
871                     DISPLAYLEVEL(1, "\nAborting. Use -f if you really want to delete the files and output to stdout");
872                     error = 1;
873                 } else {
874                     error = g_display_prefs.displayLevel > displayLevelCutoff && UTIL_requireUserConfirmation("This is a destructive operation. Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput);
875                 }
876             }
877         }
878         DISPLAY("\n");
879     }
880     return error;
881 }
882 
883 #ifndef ZSTD_NOCOMPRESS
884 
885 /* **********************************************************************
886  *  Compression
887  ************************************************************************/
888 typedef struct {
889     FILE* srcFile;
890     FILE* dstFile;
891     void*  srcBuffer;
892     size_t srcBufferSize;
893     void*  dstBuffer;
894     size_t dstBufferSize;
895     void* dictBuffer;
896     size_t dictBufferSize;
897     const char* dictFileName;
898     ZSTD_CStream* cctx;
899 } cRess_t;
900 
901 /** ZSTD_cycleLog() :
902  *  condition for correct operation : hashLog > 1 */
ZSTD_cycleLog(U32 hashLog,ZSTD_strategy strat)903 static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
904 {
905     U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
906     assert(hashLog > 1);
907     return hashLog - btScale;
908 }
909 
FIO_adjustParamsForPatchFromMode(FIO_prefs_t * const prefs,ZSTD_compressionParameters * comprParams,unsigned long long const dictSize,unsigned long long const maxSrcFileSize,int cLevel)910 static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
911                                     ZSTD_compressionParameters* comprParams,
912                                     unsigned long long const dictSize,
913                                     unsigned long long const maxSrcFileSize,
914                                     int cLevel)
915 {
916     unsigned const fileWindowLog = FIO_highbit64(maxSrcFileSize) + 1;
917     ZSTD_compressionParameters const cParams = ZSTD_getCParams(cLevel, (size_t)maxSrcFileSize, (size_t)dictSize);
918     FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize);
919     if (fileWindowLog > ZSTD_WINDOWLOG_MAX)
920         DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n");
921     comprParams->windowLog = MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog);
922     if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) {
923         if (!prefs->ldmFlag)
924             DISPLAYLEVEL(1, "long mode automatically triggered\n");
925         FIO_setLdmFlag(prefs, 1);
926     }
927     if (cParams.strategy >= ZSTD_btopt) {
928         DISPLAYLEVEL(1, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
929         DISPLAYLEVEL(1, "- Use --single-thread mode in the zstd cli\n");
930         DISPLAYLEVEL(1, "- Set a larger targetLength (eg. --zstd=targetLength=4096)\n");
931         DISPLAYLEVEL(1, "- Set a larger chainLog (eg. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
932         DISPLAYLEVEL(1, "Also consdier playing around with searchLog and hashLog\n");
933     }
934 }
935 
FIO_createCResources(FIO_prefs_t * const prefs,const char * dictFileName,unsigned long long const maxSrcFileSize,int cLevel,ZSTD_compressionParameters comprParams)936 static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
937                                     const char* dictFileName, unsigned long long const maxSrcFileSize,
938                                     int cLevel, ZSTD_compressionParameters comprParams) {
939     cRess_t ress;
940     memset(&ress, 0, sizeof(ress));
941 
942     DISPLAYLEVEL(6, "FIO_createCResources \n");
943     ress.cctx = ZSTD_createCCtx();
944     if (ress.cctx == NULL)
945         EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx",
946                     strerror(errno));
947     ress.srcBufferSize = ZSTD_CStreamInSize();
948     ress.srcBuffer = malloc(ress.srcBufferSize);
949     ress.dstBufferSize = ZSTD_CStreamOutSize();
950 
951     /* need to update memLimit before calling createDictBuffer
952      * because of memLimit check inside it */
953     if (prefs->patchFromMode) {
954         unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize;
955         FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), ssSize > 0 ? ssSize : maxSrcFileSize, cLevel);
956     }
957     ress.dstBuffer = malloc(ress.dstBufferSize);
958     ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs);   /* works with dictFileName==NULL */
959     if (!ress.srcBuffer || !ress.dstBuffer)
960         EXM_THROW(31, "allocation error : not enough memory");
961 
962     /* Advanced parameters, including dictionary */
963     if (dictFileName && (ress.dictBuffer==NULL))
964         EXM_THROW(32, "allocation error : can't create dictBuffer");
965     ress.dictFileName = dictFileName;
966 
967     if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
968         comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
969 
970     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) );  /* always enable content size when available (note: supposed to be default) */
971     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
972     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
973     /* compression level */
974     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
975     /* max compressed block size */
976     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
977     /* source size hint */
978     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
979     /* long distance matching */
980     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
981     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
982     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) );
983     if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
984         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) );
985     }
986     if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
987         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
988     }
989     /* compression parameters */
990     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
991     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
992     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) );
993     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) );
994     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
995     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
996     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, (int)comprParams.strategy) );
997     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
998     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) );
999     /* multi-threading */
1000 #ifdef ZSTD_MULTITHREAD
1001     DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);
1002     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) );
1003     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) );
1004     if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) {
1005         DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog);
1006         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) );
1007     }
1008     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
1009 #endif
1010     /* dictionary */
1011     if (prefs->patchFromMode) {
1012         CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
1013     } else {
1014         CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
1015     }
1016 
1017     return ress;
1018 }
1019 
FIO_freeCResources(const cRess_t * const ress)1020 static void FIO_freeCResources(const cRess_t* const ress)
1021 {
1022     free(ress->srcBuffer);
1023     free(ress->dstBuffer);
1024     free(ress->dictBuffer);
1025     ZSTD_freeCStream(ress->cctx);   /* never fails */
1026 }
1027 
1028 
1029 #ifdef ZSTD_GZCOMPRESS
1030 static unsigned long long
FIO_compressGzFrame(const cRess_t * ress,const char * srcFileName,U64 const srcFileSize,int compressionLevel,U64 * readsize)1031 FIO_compressGzFrame(const cRess_t* ress,  /* buffers & handlers are used, but not changed */
1032                     const char* srcFileName, U64 const srcFileSize,
1033                     int compressionLevel, U64* readsize)
1034 {
1035     unsigned long long inFileSize = 0, outFileSize = 0;
1036     z_stream strm;
1037 
1038     if (compressionLevel > Z_BEST_COMPRESSION)
1039         compressionLevel = Z_BEST_COMPRESSION;
1040 
1041     strm.zalloc = Z_NULL;
1042     strm.zfree = Z_NULL;
1043     strm.opaque = Z_NULL;
1044 
1045     {   int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED,
1046                         15 /* maxWindowLogSize */ + 16 /* gzip only */,
1047                         8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */
1048         if (ret != Z_OK) {
1049             EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
1050     }   }
1051 
1052     strm.next_in = 0;
1053     strm.avail_in = 0;
1054     strm.next_out = (Bytef*)ress->dstBuffer;
1055     strm.avail_out = (uInt)ress->dstBufferSize;
1056 
1057     while (1) {
1058         int ret;
1059         if (strm.avail_in == 0) {
1060             size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
1061             if (inSize == 0) break;
1062             inFileSize += inSize;
1063             strm.next_in = (z_const unsigned char*)ress->srcBuffer;
1064             strm.avail_in = (uInt)inSize;
1065         }
1066         ret = deflate(&strm, Z_NO_FLUSH);
1067         if (ret != Z_OK)
1068             EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret);
1069         {   size_t const cSize = ress->dstBufferSize - strm.avail_out;
1070             if (cSize) {
1071                 if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
1072                     EXM_THROW(73, "Write error : cannot write to output file : %s ", strerror(errno));
1073                 outFileSize += cSize;
1074                 strm.next_out = (Bytef*)ress->dstBuffer;
1075                 strm.avail_out = (uInt)ress->dstBufferSize;
1076         }   }
1077         if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
1078             DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ",
1079                             (unsigned)(inFileSize>>20),
1080                             (double)outFileSize/inFileSize*100)
1081         } else {
1082             DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%% ",
1083                             (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
1084                             (double)outFileSize/inFileSize*100);
1085     }   }
1086 
1087     while (1) {
1088         int const ret = deflate(&strm, Z_FINISH);
1089         {   size_t const cSize = ress->dstBufferSize - strm.avail_out;
1090             if (cSize) {
1091                 if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
1092                     EXM_THROW(75, "Write error : %s ", strerror(errno));
1093                 outFileSize += cSize;
1094                 strm.next_out = (Bytef*)ress->dstBuffer;
1095                 strm.avail_out = (uInt)ress->dstBufferSize;
1096         }   }
1097         if (ret == Z_STREAM_END) break;
1098         if (ret != Z_BUF_ERROR)
1099             EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret);
1100     }
1101 
1102     {   int const ret = deflateEnd(&strm);
1103         if (ret != Z_OK) {
1104             EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
1105     }   }
1106     *readsize = inFileSize;
1107     return outFileSize;
1108 }
1109 #endif
1110 
1111 
1112 #ifdef ZSTD_LZMACOMPRESS
1113 static unsigned long long
FIO_compressLzmaFrame(cRess_t * ress,const char * srcFileName,U64 const srcFileSize,int compressionLevel,U64 * readsize,int plain_lzma)1114 FIO_compressLzmaFrame(cRess_t* ress,
1115                       const char* srcFileName, U64 const srcFileSize,
1116                       int compressionLevel, U64* readsize, int plain_lzma)
1117 {
1118     unsigned long long inFileSize = 0, outFileSize = 0;
1119     lzma_stream strm = LZMA_STREAM_INIT;
1120     lzma_action action = LZMA_RUN;
1121     lzma_ret ret;
1122 
1123     if (compressionLevel < 0) compressionLevel = 0;
1124     if (compressionLevel > 9) compressionLevel = 9;
1125 
1126     if (plain_lzma) {
1127         lzma_options_lzma opt_lzma;
1128         if (lzma_lzma_preset(&opt_lzma, compressionLevel))
1129             EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName);
1130         ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
1131         if (ret != LZMA_OK)
1132             EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
1133     } else {
1134         ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
1135         if (ret != LZMA_OK)
1136             EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
1137     }
1138 
1139     strm.next_in = 0;
1140     strm.avail_in = 0;
1141     strm.next_out = (BYTE*)ress->dstBuffer;
1142     strm.avail_out = ress->dstBufferSize;
1143 
1144     while (1) {
1145         if (strm.avail_in == 0) {
1146             size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
1147             if (inSize == 0) action = LZMA_FINISH;
1148             inFileSize += inSize;
1149             strm.next_in = (BYTE const*)ress->srcBuffer;
1150             strm.avail_in = inSize;
1151         }
1152 
1153         ret = lzma_code(&strm, action);
1154 
1155         if (ret != LZMA_OK && ret != LZMA_STREAM_END)
1156             EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
1157         {   size_t const compBytes = ress->dstBufferSize - strm.avail_out;
1158             if (compBytes) {
1159                 if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes)
1160                     EXM_THROW(85, "Write error : %s", strerror(errno));
1161                 outFileSize += compBytes;
1162                 strm.next_out = (BYTE*)ress->dstBuffer;
1163                 strm.avail_out = ress->dstBufferSize;
1164         }   }
1165         if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
1166             DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
1167                             (unsigned)(inFileSize>>20),
1168                             (double)outFileSize/inFileSize*100)
1169         else
1170             DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
1171                             (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
1172                             (double)outFileSize/inFileSize*100);
1173         if (ret == LZMA_STREAM_END) break;
1174     }
1175 
1176     lzma_end(&strm);
1177     *readsize = inFileSize;
1178 
1179     return outFileSize;
1180 }
1181 #endif
1182 
1183 #ifdef ZSTD_LZ4COMPRESS
1184 
1185 #if LZ4_VERSION_NUMBER <= 10600
1186 #define LZ4F_blockLinked blockLinked
1187 #define LZ4F_max64KB max64KB
1188 #endif
1189 
FIO_LZ4_GetBlockSize_FromBlockId(int id)1190 static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
1191 
1192 static unsigned long long
FIO_compressLz4Frame(cRess_t * ress,const char * srcFileName,U64 const srcFileSize,int compressionLevel,int checksumFlag,U64 * readsize)1193 FIO_compressLz4Frame(cRess_t* ress,
1194                      const char* srcFileName, U64 const srcFileSize,
1195                      int compressionLevel, int checksumFlag,
1196                      U64* readsize)
1197 {
1198     const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB);
1199     unsigned long long inFileSize = 0, outFileSize = 0;
1200 
1201     LZ4F_preferences_t prefs;
1202     LZ4F_compressionContext_t ctx;
1203 
1204     LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
1205     if (LZ4F_isError(errorCode))
1206         EXM_THROW(31, "zstd: failed to create lz4 compression context");
1207 
1208     memset(&prefs, 0, sizeof(prefs));
1209 
1210     assert(blockSize <= ress->srcBufferSize);
1211 
1212     prefs.autoFlush = 1;
1213     prefs.compressionLevel = compressionLevel;
1214     prefs.frameInfo.blockMode = LZ4F_blockLinked;
1215     prefs.frameInfo.blockSizeID = LZ4F_max64KB;
1216     prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag;
1217 #if LZ4_VERSION_NUMBER >= 10600
1218     prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
1219 #endif
1220     assert(LZ4F_compressBound(blockSize, &prefs) <= ress->dstBufferSize);
1221 
1222     {
1223         size_t readSize;
1224         size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs);
1225         if (LZ4F_isError(headerSize))
1226             EXM_THROW(33, "File header generation failed : %s",
1227                             LZ4F_getErrorName(headerSize));
1228         if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize)
1229             EXM_THROW(34, "Write error : %s (cannot write header)", strerror(errno));
1230         outFileSize += headerSize;
1231 
1232         /* Read first block */
1233         readSize  = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
1234         inFileSize += readSize;
1235 
1236         /* Main Loop */
1237         while (readSize>0) {
1238             size_t const outSize = LZ4F_compressUpdate(ctx,
1239                                         ress->dstBuffer, ress->dstBufferSize,
1240                                         ress->srcBuffer, readSize, NULL);
1241             if (LZ4F_isError(outSize))
1242                 EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
1243                             srcFileName, LZ4F_getErrorName(outSize));
1244             outFileSize += outSize;
1245             if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
1246                 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
1247                                 (unsigned)(inFileSize>>20),
1248                                 (double)outFileSize/inFileSize*100)
1249             } else {
1250                 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
1251                                 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
1252                                 (double)outFileSize/inFileSize*100);
1253             }
1254 
1255             /* Write Block */
1256             {   size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile);
1257                 if (sizeCheck != outSize)
1258                     EXM_THROW(36, "Write error : %s", strerror(errno));
1259             }
1260 
1261             /* Read next block */
1262             readSize  = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
1263             inFileSize += readSize;
1264         }
1265         if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName);
1266 
1267         /* End of Stream mark */
1268         headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL);
1269         if (LZ4F_isError(headerSize))
1270             EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s",
1271                         srcFileName, LZ4F_getErrorName(headerSize));
1272 
1273         {   size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
1274             if (sizeCheck != headerSize)
1275                 EXM_THROW(39, "Write error : %s (cannot write end of stream)",
1276                             strerror(errno));
1277         }
1278         outFileSize += headerSize;
1279     }
1280 
1281     *readsize = inFileSize;
1282     LZ4F_freeCompressionContext(ctx);
1283 
1284     return outFileSize;
1285 }
1286 #endif
1287 
1288 
1289 static unsigned long long
FIO_compressZstdFrame(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,const cRess_t * ressPtr,const char * srcFileName,U64 fileSize,int compressionLevel,U64 * readsize)1290 FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
1291                       FIO_prefs_t* const prefs,
1292                       const cRess_t* ressPtr,
1293                       const char* srcFileName, U64 fileSize,
1294                       int compressionLevel, U64* readsize)
1295 {
1296     cRess_t const ress = *ressPtr;
1297     FILE* const srcFile = ress.srcFile;
1298     FILE* const dstFile = ress.dstFile;
1299     U64 compressedfilesize = 0;
1300     ZSTD_EndDirective directive = ZSTD_e_continue;
1301 
1302     /* stats */
1303     ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 };
1304     ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 };
1305     typedef enum { noChange, slower, faster } speedChange_e;
1306     speedChange_e speedChange = noChange;
1307     unsigned flushWaiting = 0;
1308     unsigned inputPresented = 0;
1309     unsigned inputBlocked = 0;
1310     unsigned lastJobID = 0;
1311 
1312     DISPLAYLEVEL(6, "compression using zstd format \n");
1313 
1314     /* init */
1315     if (fileSize != UTIL_FILESIZE_UNKNOWN) {
1316         CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
1317     } else if (prefs->streamSrcSize > 0) {
1318       /* unknown source size; use the declared stream size */
1319       CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
1320     }
1321     (void)srcFileName;
1322 
1323     /* Main compression loop */
1324     do {
1325         size_t stillToFlush;
1326         /* Fill input Buffer */
1327         size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
1328         ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 };
1329         DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize);
1330         *readsize += inSize;
1331 
1332         if ((inSize == 0) || (*readsize == fileSize))
1333             directive = ZSTD_e_end;
1334 
1335         stillToFlush = 1;
1336         while ((inBuff.pos != inBuff.size)   /* input buffer must be entirely ingested */
1337             || (directive == ZSTD_e_end && stillToFlush != 0) ) {
1338 
1339             size_t const oldIPos = inBuff.pos;
1340             ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
1341             size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx);
1342             CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive));
1343 
1344             /* count stats */
1345             inputPresented++;
1346             if (oldIPos == inBuff.pos) inputBlocked++;  /* input buffer is full and can't take any more : input speed is faster than consumption rate */
1347             if (!toFlushNow) flushWaiting = 1;
1348 
1349             /* Write compressed stream */
1350             DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n",
1351                             (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos);
1352             if (outBuff.pos) {
1353                 size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
1354                 if (sizeCheck != outBuff.pos)
1355                     EXM_THROW(25, "Write error : %s (cannot write compressed block)",
1356                                     strerror(errno));
1357                 compressedfilesize += outBuff.pos;
1358             }
1359 
1360             /* display notification; and adapt compression level */
1361             if (READY_FOR_UPDATE()) {
1362                 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
1363                 double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
1364 
1365                 /* display progress notifications */
1366                 if (g_display_prefs.displayLevel >= 3) {
1367                     DISPLAYUPDATE(3, "\r(L%i) Buffered :%4u MB - Consumed :%4u MB - Compressed :%4u MB => %.2f%% ",
1368                                 compressionLevel,
1369                                 (unsigned)((zfp.ingested - zfp.consumed) >> 20),
1370                                 (unsigned)(zfp.consumed >> 20),
1371                                 (unsigned)(zfp.produced >> 20),
1372                                 cShare );
1373                 } else {   /* summarized notifications if == 2 */
1374                     DISPLAYLEVEL(2, "\r%79s\r", "");    /* Clear out the current displayed line */
1375                     if (fCtx->nbFilesTotal > 1) {
1376                         size_t srcFileNameSize = strlen(srcFileName);
1377                         /* Ensure that the string we print is roughly the same size each time */
1378                         if (srcFileNameSize > 18) {
1379                             const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
1380                             DISPLAYLEVEL(2, "Compress: %u/%u files. Current: ...%s ",
1381                                          fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName);
1382                         } else {
1383                             DISPLAYLEVEL(2, "Compress: %u/%u files. Current: %*s ",
1384                                          fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName);
1385                         }
1386                     }
1387                     DISPLAYLEVEL(2, "Read : %2u ", (unsigned)(zfp.consumed >> 20));
1388                     if (fileSize != UTIL_FILESIZE_UNKNOWN)
1389                         DISPLAYLEVEL(2, "/ %2u ", (unsigned)(fileSize >> 20));
1390                     DISPLAYLEVEL(2, "MB ==> %2.f%%", cShare);
1391                     DELAY_NEXT_UPDATE();
1392                 }
1393 
1394                 /* adaptive mode : statistics measurement and speed correction */
1395                 if (prefs->adaptiveMode) {
1396 
1397                     /* check output speed */
1398                     if (zfp.currentJobID > 1) {  /* only possible if nbWorkers >= 1 */
1399 
1400                         unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced;
1401                         unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed;
1402                         assert(zfp.produced >= previous_zfp_update.produced);
1403                         assert(prefs->nbWorkers >= 1);
1404 
1405                         /* test if compression is blocked
1406                          * either because output is slow and all buffers are full
1407                          * or because input is slow and no job can start while waiting for at least one buffer to be filled.
1408                          * note : exclude starting part, since currentJobID > 1 */
1409                         if ( (zfp.consumed == previous_zfp_update.consumed)   /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/
1410                           && (zfp.nbActiveWorkers == 0)                       /* confirmed : no compression ongoing */
1411                           ) {
1412                             DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n")
1413                             speedChange = slower;
1414                         }
1415 
1416                         previous_zfp_update = zfp;
1417 
1418                         if ( (newlyProduced > (newlyFlushed * 9 / 8))   /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */
1419                           && (flushWaiting == 0)                        /* flush speed was never slowed by lack of production, so it's operating at max capacity */
1420                           ) {
1421                             DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed);
1422                             speedChange = slower;
1423                         }
1424                         flushWaiting = 0;
1425                     }
1426 
1427                     /* course correct only if there is at least one new job completed */
1428                     if (zfp.currentJobID > lastJobID) {
1429                         DISPLAYLEVEL(6, "compression level adaptation check \n")
1430 
1431                         /* check input speed */
1432                         if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) {   /* warm up period, to fill all workers */
1433                             if (inputBlocked <= 0) {
1434                                 DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n");
1435                                 speedChange = slower;
1436                             } else if (speedChange == noChange) {
1437                                 unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested;
1438                                 unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed;
1439                                 unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced;
1440                                 unsigned long long newlyFlushed  = zfp.flushed  - previous_zfp_correction.flushed;
1441                                 previous_zfp_correction = zfp;
1442                                 assert(inputPresented > 0);
1443                                 DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n",
1444                                                 inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100,
1445                                                 (unsigned)newlyIngested, (unsigned)newlyConsumed,
1446                                                 (unsigned)newlyFlushed, (unsigned)newlyProduced);
1447                                 if ( (inputBlocked > inputPresented / 8)     /* input is waiting often, because input buffers is full : compression or output too slow */
1448                                   && (newlyFlushed * 33 / 32 > newlyProduced)  /* flush everything that is produced */
1449                                   && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */
1450                                 ) {
1451                                     DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n",
1452                                                     newlyIngested, newlyConsumed, newlyProduced, newlyFlushed);
1453                                     speedChange = faster;
1454                                 }
1455                             }
1456                             inputBlocked = 0;
1457                             inputPresented = 0;
1458                         }
1459 
1460                         if (speedChange == slower) {
1461                             DISPLAYLEVEL(6, "slower speed , higher compression \n")
1462                             compressionLevel ++;
1463                             if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
1464                             if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel;
1465                             compressionLevel += (compressionLevel == 0);   /* skip 0 */
1466                             ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
1467                         }
1468                         if (speedChange == faster) {
1469                             DISPLAYLEVEL(6, "faster speed , lighter compression \n")
1470                             compressionLevel --;
1471                             if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel;
1472                             compressionLevel -= (compressionLevel == 0);   /* skip 0 */
1473                             ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
1474                         }
1475                         speedChange = noChange;
1476 
1477                         lastJobID = zfp.currentJobID;
1478                     }  /* if (zfp.currentJobID > lastJobID) */
1479                 }  /* if (g_adaptiveMode) */
1480             }  /* if (READY_FOR_UPDATE()) */
1481         }  /* while ((inBuff.pos != inBuff.size) */
1482     } while (directive != ZSTD_e_end);
1483 
1484     if (ferror(srcFile)) {
1485         EXM_THROW(26, "Read error : I/O error");
1486     }
1487     if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) {
1488         EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B",
1489                 (unsigned long long)*readsize, (unsigned long long)fileSize);
1490     }
1491 
1492     return compressedfilesize;
1493 }
1494 
1495 /*! FIO_compressFilename_internal() :
1496  *  same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
1497  *  @return : 0 : compression completed correctly,
1498  *            1 : missing or pb opening srcFileName
1499  */
1500 static int
FIO_compressFilename_internal(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,cRess_t ress,const char * dstFileName,const char * srcFileName,int compressionLevel)1501 FIO_compressFilename_internal(FIO_ctx_t* const fCtx,
1502                               FIO_prefs_t* const prefs,
1503                               cRess_t ress,
1504                               const char* dstFileName, const char* srcFileName,
1505                               int compressionLevel)
1506 {
1507     UTIL_time_t const timeStart = UTIL_getTime();
1508     clock_t const cpuStart = clock();
1509     U64 readsize = 0;
1510     U64 compressedfilesize = 0;
1511     U64 const fileSize = UTIL_getFileSize(srcFileName);
1512     DISPLAYLEVEL(5, "%s: %u bytes \n", srcFileName, (unsigned)fileSize);
1513 
1514     /* compression format selection */
1515     switch (prefs->compressionType) {
1516         default:
1517         case FIO_zstdCompression:
1518             compressedfilesize = FIO_compressZstdFrame(fCtx, prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize);
1519             break;
1520 
1521         case FIO_gzipCompression:
1522 #ifdef ZSTD_GZCOMPRESS
1523             compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
1524 #else
1525             (void)compressionLevel;
1526             EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n",
1527                             srcFileName);
1528 #endif
1529             break;
1530 
1531         case FIO_xzCompression:
1532         case FIO_lzmaCompression:
1533 #ifdef ZSTD_LZMACOMPRESS
1534             compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression);
1535 #else
1536             (void)compressionLevel;
1537             EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n",
1538                             srcFileName);
1539 #endif
1540             break;
1541 
1542         case FIO_lz4Compression:
1543 #ifdef ZSTD_LZ4COMPRESS
1544             compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize);
1545 #else
1546             (void)compressionLevel;
1547             EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n",
1548                             srcFileName);
1549 #endif
1550             break;
1551     }
1552 
1553     /* Status */
1554     fCtx->totalBytesInput += (size_t)readsize;
1555     fCtx->totalBytesOutput += (size_t)compressedfilesize;
1556     DISPLAYLEVEL(2, "\r%79s\r", "");
1557     if (g_display_prefs.displayLevel >= 2 &&
1558         !fCtx->hasStdoutOutput &&
1559         (g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1)) {
1560         if (readsize == 0) {
1561             DISPLAYLEVEL(2,"%-20s :  (%6llu => %6llu bytes, %s) \n",
1562                 srcFileName,
1563                 (unsigned long long)readsize, (unsigned long long) compressedfilesize,
1564                 dstFileName);
1565         } else {
1566             DISPLAYLEVEL(2,"%-20s :%6.2f%%   (%6llu => %6llu bytes, %s) \n",
1567                 srcFileName,
1568                 (double)compressedfilesize / (double)readsize * 100,
1569                 (unsigned long long)readsize, (unsigned long long) compressedfilesize,
1570                 dstFileName);
1571         }
1572     }
1573 
1574     /* Elapsed Time and CPU Load */
1575     {   clock_t const cpuEnd = clock();
1576         double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
1577         U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
1578         double const timeLength_s = (double)timeLength_ns / 1000000000;
1579         double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100;
1580         DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec  (cpu load : %.0f%%)\n",
1581                         srcFileName, timeLength_s, cpuLoad_pct);
1582     }
1583     return 0;
1584 }
1585 
1586 
1587 /*! FIO_compressFilename_dstFile() :
1588  *  open dstFileName, or pass-through if ress.dstFile != NULL,
1589  *  then start compression with FIO_compressFilename_internal().
1590  *  Manages source removal (--rm) and file permissions transfer.
1591  *  note : ress.srcFile must be != NULL,
1592  *  so reach this function through FIO_compressFilename_srcFile().
1593  *  @return : 0 : compression completed correctly,
1594  *            1 : pb
1595  */
FIO_compressFilename_dstFile(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,cRess_t ress,const char * dstFileName,const char * srcFileName,int compressionLevel)1596 static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
1597                                         FIO_prefs_t* const prefs,
1598                                         cRess_t ress,
1599                                         const char* dstFileName,
1600                                         const char* srcFileName,
1601                                         int compressionLevel)
1602 {
1603     int closeDstFile = 0;
1604     int result;
1605     stat_t statbuf;
1606     int transfer_permissions = 0;
1607     assert(ress.srcFile != NULL);
1608     if (ress.dstFile == NULL) {
1609         closeDstFile = 1;
1610         DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
1611         ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName);
1612         if (ress.dstFile==NULL) return 1;  /* could not open dstFileName */
1613         /* Must only be added after FIO_openDstFile() succeeds.
1614          * Otherwise we may delete the destination file if it already exists,
1615          * and the user presses Ctrl-C when asked if they wish to overwrite.
1616          */
1617         addHandler(dstFileName);
1618 
1619         if ( strcmp (srcFileName, stdinmark)
1620           && UTIL_stat(srcFileName, &statbuf)
1621           && UTIL_isRegularFileStat(&statbuf) )
1622             transfer_permissions = 1;
1623     }
1624 
1625     result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
1626 
1627     if (closeDstFile) {
1628         FILE* const dstFile = ress.dstFile;
1629         ress.dstFile = NULL;
1630 
1631         clearHandler();
1632 
1633         DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName);
1634         if (fclose(dstFile)) { /* error closing dstFile */
1635             DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
1636             result=1;
1637         }
1638         if ( (result != 0)  /* operation failure */
1639           && strcmp(dstFileName, stdoutmark)  /* special case : don't remove() stdout */
1640           ) {
1641             FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
1642         } else if (transfer_permissions) {
1643             DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: transferring permissions into dst: %s \n", dstFileName);
1644             UTIL_setFileStat(dstFileName, &statbuf);
1645         } else {
1646             DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: do not transfer permissions into dst: %s \n", dstFileName);
1647         }
1648     }
1649 
1650     return result;
1651 }
1652 
1653 /* List used to compare file extensions (used with --exclude-compressed flag)
1654 * Different from the suffixList and should only apply to ZSTD compress operationResult
1655 */
1656 static const char *compressedFileExtensions[] = {
1657     ZSTD_EXTENSION,
1658     TZSTD_EXTENSION,
1659     GZ_EXTENSION,
1660     TGZ_EXTENSION,
1661     LZMA_EXTENSION,
1662     XZ_EXTENSION,
1663     TXZ_EXTENSION,
1664     LZ4_EXTENSION,
1665     TLZ4_EXTENSION,
1666     NULL
1667 };
1668 
1669 /*! FIO_compressFilename_srcFile() :
1670  *  @return : 0 : compression completed correctly,
1671  *            1 : missing or pb opening srcFileName
1672  */
1673 static int
FIO_compressFilename_srcFile(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,cRess_t ress,const char * dstFileName,const char * srcFileName,int compressionLevel)1674 FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx,
1675                              FIO_prefs_t* const prefs,
1676                              cRess_t ress,
1677                              const char* dstFileName,
1678                              const char* srcFileName,
1679                              int compressionLevel)
1680 {
1681     int result;
1682     DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName);
1683 
1684     /* ensure src is not a directory */
1685     if (UTIL_isDirectory(srcFileName)) {
1686         DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
1687         return 1;
1688     }
1689 
1690     /* ensure src is not the same as dict (if present) */
1691     if (ress.dictFileName != NULL && UTIL_isSameFile(srcFileName, ress.dictFileName)) {
1692         DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName);
1693         return 1;
1694     }
1695 
1696     /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used
1697     * YES => ZSTD will skip compression of the file and will return 0.
1698     * NO => ZSTD will resume with compress operation.
1699     */
1700     if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) {
1701         DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName);
1702         return 0;
1703     }
1704 
1705     ress.srcFile = FIO_openSrcFile(srcFileName);
1706     if (ress.srcFile == NULL) return 1;   /* srcFile could not be opened */
1707 
1708     result = FIO_compressFilename_dstFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
1709 
1710     fclose(ress.srcFile);
1711     ress.srcFile = NULL;
1712     if ( prefs->removeSrcFile   /* --rm */
1713       && result == 0       /* success */
1714       && strcmp(srcFileName, stdinmark)   /* exception : don't erase stdin */
1715       ) {
1716         /* We must clear the handler, since after this point calling it would
1717          * delete both the source and destination files.
1718          */
1719         clearHandler();
1720         if (FIO_removeFile(srcFileName))
1721             EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno));
1722     }
1723     return result;
1724 }
1725 
FIO_compressFilename(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,const char * dstFileName,const char * srcFileName,const char * dictFileName,int compressionLevel,ZSTD_compressionParameters comprParams)1726 int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName,
1727                          const char* srcFileName, const char* dictFileName,
1728                          int compressionLevel, ZSTD_compressionParameters comprParams)
1729 {
1730     cRess_t const ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
1731     int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
1732 
1733 #define DISPLAY_LEVEL_DEFAULT 2
1734 
1735     FIO_freeCResources(&ress);
1736     return result;
1737 }
1738 
1739 /* FIO_determineCompressedName() :
1740  * create a destination filename for compressed srcFileName.
1741  * @return a pointer to it.
1742  * This function never returns an error (it may abort() in case of pb)
1743  */
1744 static const char*
FIO_determineCompressedName(const char * srcFileName,const char * outDirName,const char * suffix)1745 FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix)
1746 {
1747     static size_t dfnbCapacity = 0;
1748     static char* dstFileNameBuffer = NULL;   /* using static allocation : this function cannot be multi-threaded */
1749     char* outDirFilename = NULL;
1750     size_t sfnSize = strlen(srcFileName);
1751     size_t const srcSuffixLen = strlen(suffix);
1752     if (outDirName) {
1753         outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen);
1754         sfnSize = strlen(outDirFilename);
1755         assert(outDirFilename != NULL);
1756     }
1757 
1758     if (dfnbCapacity <= sfnSize+srcSuffixLen+1) {
1759         /* resize buffer for dstName */
1760         free(dstFileNameBuffer);
1761         dfnbCapacity = sfnSize + srcSuffixLen + 30;
1762         dstFileNameBuffer = (char*)malloc(dfnbCapacity);
1763         if (!dstFileNameBuffer) {
1764             EXM_THROW(30, "zstd: %s", strerror(errno));
1765         }
1766     }
1767     assert(dstFileNameBuffer != NULL);
1768 
1769     if (outDirFilename) {
1770         memcpy(dstFileNameBuffer, outDirFilename, sfnSize);
1771         free(outDirFilename);
1772     } else {
1773         memcpy(dstFileNameBuffer, srcFileName, sfnSize);
1774     }
1775     memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */);
1776     return dstFileNameBuffer;
1777 }
1778 
FIO_getLargestFileSize(const char ** inFileNames,unsigned nbFiles)1779 static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles)
1780 {
1781     size_t i;
1782     unsigned long long fileSize, maxFileSize = 0;
1783     for (i = 0; i < nbFiles; i++) {
1784         fileSize = UTIL_getFileSize(inFileNames[i]);
1785         maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize;
1786     }
1787     return maxFileSize;
1788 }
1789 
1790 /* FIO_compressMultipleFilenames() :
1791  * compress nbFiles files
1792  * into either one destination (outFileName),
1793  * or into one file each (outFileName == NULL, but suffix != NULL),
1794  * or into a destination folder (specified with -O)
1795  */
FIO_compressMultipleFilenames(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,const char ** inFileNamesTable,const char * outMirroredRootDirName,const char * outDirName,const char * outFileName,const char * suffix,const char * dictFileName,int compressionLevel,ZSTD_compressionParameters comprParams)1796 int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
1797                                   FIO_prefs_t* const prefs,
1798                                   const char** inFileNamesTable,
1799                                   const char* outMirroredRootDirName,
1800                                   const char* outDirName,
1801                                   const char* outFileName, const char* suffix,
1802                                   const char* dictFileName, int compressionLevel,
1803                                   ZSTD_compressionParameters comprParams)
1804 {
1805     int status;
1806     int error = 0;
1807     cRess_t ress = FIO_createCResources(prefs, dictFileName,
1808         FIO_getLargestFileSize(inFileNamesTable, (unsigned)fCtx->nbFilesTotal),
1809         compressionLevel, comprParams);
1810 
1811     /* init */
1812     assert(outFileName != NULL || suffix != NULL);
1813     if (outFileName != NULL) {   /* output into a single destination (stdout typically) */
1814         if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
1815             FIO_freeCResources(&ress);
1816             return 1;
1817         }
1818         ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName);
1819         if (ress.dstFile == NULL) {  /* could not open outFileName */
1820             error = 1;
1821         } else {
1822             for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
1823                 status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel);
1824                 if (!status) fCtx->nbFilesProcessed++;
1825                 error |= status;
1826             }
1827             if (fclose(ress.dstFile))
1828                 EXM_THROW(29, "Write error (%s) : cannot properly close %s",
1829                             strerror(errno), outFileName);
1830             ress.dstFile = NULL;
1831         }
1832     } else {
1833         if (outMirroredRootDirName)
1834             UTIL_mirrorSourceFilesDirectories(inFileNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
1835 
1836         for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
1837             const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx];
1838             const char* dstFileName = NULL;
1839             if (outMirroredRootDirName) {
1840                 char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
1841                 if (validMirroredDirName) {
1842                     dstFileName = FIO_determineCompressedName(srcFileName, validMirroredDirName, suffix);
1843                     free(validMirroredDirName);
1844                 } else {
1845                     DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot compress '%s' into '%s' \n", srcFileName, outMirroredRootDirName);
1846                     error=1;
1847                     continue;
1848                 }
1849             } else {
1850                 dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix);  /* cannot fail */
1851             }
1852             status = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
1853             if (!status) fCtx->nbFilesProcessed++;
1854             error |= status;
1855         }
1856 
1857         if (outDirName)
1858             FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal);
1859     }
1860 
1861     if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesInput != 0) {
1862         DISPLAYLEVEL(2, "\r%79s\r", "");
1863         DISPLAYLEVEL(2, "%d files compressed : %.2f%%  (%6zu => %6zu bytes)\n", fCtx->nbFilesProcessed,
1864                         (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100,
1865                         fCtx->totalBytesInput, fCtx->totalBytesOutput);
1866     }
1867 
1868     FIO_freeCResources(&ress);
1869     return error;
1870 }
1871 
1872 #endif /* #ifndef ZSTD_NOCOMPRESS */
1873 
1874 
1875 
1876 #ifndef ZSTD_NODECOMPRESS
1877 
1878 /* **************************************************************************
1879  *  Decompression
1880  ***************************************************************************/
1881 typedef struct {
1882     void*  srcBuffer;
1883     size_t srcBufferSize;
1884     size_t srcBufferLoaded;
1885     void*  dstBuffer;
1886     size_t dstBufferSize;
1887     ZSTD_DStream* dctx;
1888     FILE*  dstFile;
1889 } dRess_t;
1890 
FIO_createDResources(FIO_prefs_t * const prefs,const char * dictFileName)1891 static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName)
1892 {
1893     dRess_t ress;
1894     memset(&ress, 0, sizeof(ress));
1895 
1896     if (prefs->patchFromMode)
1897         FIO_adjustMemLimitForPatchFromMode(prefs, UTIL_getFileSize(dictFileName), 0 /* just use the dict size */);
1898 
1899     /* Allocation */
1900     ress.dctx = ZSTD_createDStream();
1901     if (ress.dctx==NULL)
1902         EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno));
1903     CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) );
1904     CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag));
1905 
1906     ress.srcBufferSize = ZSTD_DStreamInSize();
1907     ress.srcBuffer = malloc(ress.srcBufferSize);
1908     ress.dstBufferSize = ZSTD_DStreamOutSize();
1909     ress.dstBuffer = malloc(ress.dstBufferSize);
1910     if (!ress.srcBuffer || !ress.dstBuffer)
1911         EXM_THROW(61, "Allocation error : not enough memory");
1912 
1913     /* dictionary */
1914     {   void* dictBuffer;
1915         size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs);
1916         CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) );
1917         free(dictBuffer);
1918     }
1919 
1920     return ress;
1921 }
1922 
FIO_freeDResources(dRess_t ress)1923 static void FIO_freeDResources(dRess_t ress)
1924 {
1925     CHECK( ZSTD_freeDStream(ress.dctx) );
1926     free(ress.srcBuffer);
1927     free(ress.dstBuffer);
1928 }
1929 
1930 
1931 /** FIO_fwriteSparse() :
1932 *  @return : storedSkips,
1933 *            argument for next call to FIO_fwriteSparse() or FIO_fwriteSparseEnd() */
1934 static unsigned
FIO_fwriteSparse(FILE * file,const void * buffer,size_t bufferSize,const FIO_prefs_t * const prefs,unsigned storedSkips)1935 FIO_fwriteSparse(FILE* file,
1936                  const void* buffer, size_t bufferSize,
1937                  const FIO_prefs_t* const prefs,
1938                  unsigned storedSkips)
1939 {
1940     const size_t* const bufferT = (const size_t*)buffer;   /* Buffer is supposed malloc'ed, hence aligned on size_t */
1941     size_t bufferSizeT = bufferSize / sizeof(size_t);
1942     const size_t* const bufferTEnd = bufferT + bufferSizeT;
1943     const size_t* ptrT = bufferT;
1944     static const size_t segmentSizeT = (32 KB) / sizeof(size_t);   /* check every 32 KB */
1945 
1946     if (prefs->testMode) return 0;  /* do not output anything in test mode */
1947 
1948     if (!prefs->sparseFileSupport) {  /* normal write */
1949         size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
1950         if (sizeCheck != bufferSize)
1951             EXM_THROW(70, "Write error : cannot write decoded block : %s",
1952                             strerror(errno));
1953         return 0;
1954     }
1955 
1956     /* avoid int overflow */
1957     if (storedSkips > 1 GB) {
1958         if (LONG_SEEK(file, 1 GB, SEEK_CUR) != 0)
1959             EXM_THROW(91, "1 GB skip error (sparse file support)");
1960         storedSkips -= 1 GB;
1961     }
1962 
1963     while (ptrT < bufferTEnd) {
1964         size_t nb0T;
1965 
1966         /* adjust last segment if < 32 KB */
1967         size_t seg0SizeT = segmentSizeT;
1968         if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT;
1969         bufferSizeT -= seg0SizeT;
1970 
1971         /* count leading zeroes */
1972         for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ;
1973         storedSkips += (unsigned)(nb0T * sizeof(size_t));
1974 
1975         if (nb0T != seg0SizeT) {   /* not all 0s */
1976             size_t const nbNon0ST = seg0SizeT - nb0T;
1977             /* skip leading zeros */
1978             if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0)
1979                 EXM_THROW(92, "Sparse skip error ; try --no-sparse");
1980             storedSkips = 0;
1981             /* write the rest */
1982             if (fwrite(ptrT + nb0T, sizeof(size_t), nbNon0ST, file) != nbNon0ST)
1983                 EXM_THROW(93, "Write error : cannot write decoded block : %s",
1984                             strerror(errno));
1985         }
1986         ptrT += seg0SizeT;
1987     }
1988 
1989     {   static size_t const maskT = sizeof(size_t)-1;
1990         if (bufferSize & maskT) {
1991             /* size not multiple of sizeof(size_t) : implies end of block */
1992             const char* const restStart = (const char*)bufferTEnd;
1993             const char* restPtr = restStart;
1994             const char* const restEnd = (const char*)buffer + bufferSize;
1995             assert(restEnd > restStart && restEnd < restStart + sizeof(size_t));
1996             for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ;
1997             storedSkips += (unsigned) (restPtr - restStart);
1998             if (restPtr != restEnd) {
1999                 /* not all remaining bytes are 0 */
2000                 size_t const restSize = (size_t)(restEnd - restPtr);
2001                 if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0)
2002                     EXM_THROW(92, "Sparse skip error ; try --no-sparse");
2003                 if (fwrite(restPtr, 1, restSize, file) != restSize)
2004                     EXM_THROW(95, "Write error : cannot write end of decoded block : %s",
2005                         strerror(errno));
2006                 storedSkips = 0;
2007     }   }   }
2008 
2009     return storedSkips;
2010 }
2011 
2012 static void
FIO_fwriteSparseEnd(const FIO_prefs_t * const prefs,FILE * file,unsigned storedSkips)2013 FIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
2014 {
2015     if (prefs->testMode) assert(storedSkips == 0);
2016     if (storedSkips>0) {
2017         assert(prefs->sparseFileSupport > 0);  /* storedSkips>0 implies sparse support is enabled */
2018         (void)prefs;   /* assert can be disabled, in which case prefs becomes unused */
2019         if (LONG_SEEK(file, storedSkips-1, SEEK_CUR) != 0)
2020             EXM_THROW(69, "Final skip error (sparse file support)");
2021         /* last zero must be explicitly written,
2022          * so that skipped ones get implicitly translated as zero by FS */
2023         {   const char lastZeroByte[1] = { 0 };
2024             if (fwrite(lastZeroByte, 1, 1, file) != 1)
2025                 EXM_THROW(69, "Write error : cannot write last zero : %s", strerror(errno));
2026     }   }
2027 }
2028 
2029 
2030 /** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
2031     @return : 0 (no error) */
FIO_passThrough(const FIO_prefs_t * const prefs,FILE * foutput,FILE * finput,void * buffer,size_t bufferSize,size_t alreadyLoaded)2032 static int FIO_passThrough(const FIO_prefs_t* const prefs,
2033                            FILE* foutput, FILE* finput,
2034                            void* buffer, size_t bufferSize,
2035                            size_t alreadyLoaded)
2036 {
2037     size_t const blockSize = MIN(64 KB, bufferSize);
2038     size_t readFromInput;
2039     unsigned storedSkips = 0;
2040 
2041     /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */
2042     {   size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput);
2043         if (sizeCheck != alreadyLoaded) {
2044             DISPLAYLEVEL(1, "Pass-through write error : %s\n", strerror(errno));
2045             return 1;
2046     }   }
2047 
2048     do {
2049         readFromInput = fread(buffer, 1, blockSize, finput);
2050         storedSkips = FIO_fwriteSparse(foutput, buffer, readFromInput, prefs, storedSkips);
2051     } while (readFromInput == blockSize);
2052     if (ferror(finput)) {
2053         DISPLAYLEVEL(1, "Pass-through read error : %s\n", strerror(errno));
2054         return 1;
2055     }
2056     assert(feof(finput));
2057 
2058     FIO_fwriteSparseEnd(prefs, foutput, storedSkips);
2059     return 0;
2060 }
2061 
2062 /* FIO_zstdErrorHelp() :
2063  * detailed error message when requested window size is too large */
2064 static void
FIO_zstdErrorHelp(const FIO_prefs_t * const prefs,const dRess_t * ress,size_t err,const char * srcFileName)2065 FIO_zstdErrorHelp(const FIO_prefs_t* const prefs,
2066                   const dRess_t* ress,
2067                   size_t err, const char* srcFileName)
2068 {
2069     ZSTD_frameHeader header;
2070 
2071     /* Help message only for one specific error */
2072     if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge)
2073         return;
2074 
2075     /* Try to decode the frame header */
2076     err = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded);
2077     if (err == 0) {
2078         unsigned long long const windowSize = header.windowSize;
2079         unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0);
2080         assert(prefs->memLimit > 0);
2081         DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u \n",
2082                         srcFileName, windowSize, prefs->memLimit);
2083         if (windowLog <= ZSTD_WINDOWLOG_MAX) {
2084             unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0));
2085             assert(windowSize < (U64)(1ULL << 52));   /* ensure now overflow for windowMB */
2086             DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB \n",
2087                             srcFileName, windowLog, windowMB);
2088             return;
2089     }   }
2090     DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported \n",
2091                     srcFileName, ZSTD_WINDOWLOG_MAX);
2092 }
2093 
2094 /** FIO_decompressFrame() :
2095  *  @return : size of decoded zstd frame, or an error code
2096  */
2097 #define FIO_ERROR_FRAME_DECODING   ((unsigned long long)(-2))
2098 static unsigned long long
FIO_decompressZstdFrame(FIO_ctx_t * const fCtx,dRess_t * ress,FILE * finput,const FIO_prefs_t * const prefs,const char * srcFileName,U64 alreadyDecoded)2099 FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput,
2100                         const FIO_prefs_t* const prefs,
2101                         const char* srcFileName,
2102                         U64 alreadyDecoded)  /* for multi-frames streams */
2103 {
2104     U64 frameSize = 0;
2105     U32 storedSkips = 0;
2106 
2107     /* display last 20 characters only */
2108     {   size_t const srcFileLength = strlen(srcFileName);
2109         if (srcFileLength>20) srcFileName += srcFileLength-20;
2110     }
2111 
2112     ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only);
2113 
2114     /* Header loading : ensures ZSTD_getFrameHeader() will succeed */
2115     {   size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX;
2116         if (ress->srcBufferLoaded < toDecode) {
2117             size_t const toRead = toDecode - ress->srcBufferLoaded;
2118             void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
2119             ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput);
2120     }   }
2121 
2122     /* Main decompression Loop */
2123     while (1) {
2124         ZSTD_inBuffer  inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 };
2125         ZSTD_outBuffer outBuff= { ress->dstBuffer, ress->dstBufferSize, 0 };
2126         size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff);
2127         if (ZSTD_isError(readSizeHint)) {
2128             DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n",
2129                             srcFileName, ZSTD_getErrorName(readSizeHint));
2130             FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName);
2131             return FIO_ERROR_FRAME_DECODING;
2132         }
2133 
2134         /* Write block */
2135         storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, prefs, storedSkips);
2136         frameSize += outBuff.pos;
2137         if (!fCtx->hasStdoutOutput) {
2138             if (fCtx->nbFilesTotal > 1) {
2139                 size_t srcFileNameSize = strlen(srcFileName);
2140                 if (srcFileNameSize > 18) {
2141                     const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
2142                     DISPLAYUPDATE(2, "\rDecompress: %2u/%2u files. Current: ...%s : %u MB...    ",
2143                                     fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
2144                 } else {
2145                     DISPLAYUPDATE(2, "\rDecompress: %2u/%2u files. Current: %s : %u MB...    ",
2146                                 fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
2147                 }
2148             } else {
2149                 DISPLAYUPDATE(2, "\r%-20.20s : %u MB...     ",
2150                                 srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
2151             }
2152         }
2153 
2154         if (inBuff.pos > 0) {
2155             memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos);
2156             ress->srcBufferLoaded -= inBuff.pos;
2157         }
2158 
2159         if (readSizeHint == 0) break;   /* end of frame */
2160 
2161         /* Fill input buffer */
2162         {   size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize);  /* support large skippable frames */
2163             if (ress->srcBufferLoaded < toDecode) {
2164                 size_t const toRead = toDecode - ress->srcBufferLoaded;   /* > 0 */
2165                 void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
2166                 size_t const readSize = fread(startPosition, 1, toRead, finput);
2167                 if (readSize==0) {
2168                     DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n",
2169                                     srcFileName);
2170                     return FIO_ERROR_FRAME_DECODING;
2171                 }
2172                 ress->srcBufferLoaded += readSize;
2173     }   }   }
2174 
2175     FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
2176 
2177     return frameSize;
2178 }
2179 
2180 
2181 #ifdef ZSTD_GZDECOMPRESS
2182 static unsigned long long
FIO_decompressGzFrame(dRess_t * ress,FILE * srcFile,const FIO_prefs_t * const prefs,const char * srcFileName)2183 FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile,
2184                       const FIO_prefs_t* const prefs,
2185                       const char* srcFileName)
2186 {
2187     unsigned long long outFileSize = 0;
2188     z_stream strm;
2189     int flush = Z_NO_FLUSH;
2190     int decodingError = 0;
2191     unsigned storedSkips = 0;
2192 
2193     strm.zalloc = Z_NULL;
2194     strm.zfree = Z_NULL;
2195     strm.opaque = Z_NULL;
2196     strm.next_in = 0;
2197     strm.avail_in = 0;
2198     /* see http://www.zlib.net/manual.html */
2199     if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK)
2200         return FIO_ERROR_FRAME_DECODING;
2201 
2202     strm.next_out = (Bytef*)ress->dstBuffer;
2203     strm.avail_out = (uInt)ress->dstBufferSize;
2204     strm.avail_in = (uInt)ress->srcBufferLoaded;
2205     strm.next_in = (z_const unsigned char*)ress->srcBuffer;
2206 
2207     for ( ; ; ) {
2208         int ret;
2209         if (strm.avail_in == 0) {
2210             ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
2211             if (ress->srcBufferLoaded == 0) flush = Z_FINISH;
2212             strm.next_in = (z_const unsigned char*)ress->srcBuffer;
2213             strm.avail_in = (uInt)ress->srcBufferLoaded;
2214         }
2215         ret = inflate(&strm, flush);
2216         if (ret == Z_BUF_ERROR) {
2217             DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName);
2218             decodingError = 1; break;
2219         }
2220         if (ret != Z_OK && ret != Z_STREAM_END) {
2221             DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret);
2222             decodingError = 1; break;
2223         }
2224         {   size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
2225             if (decompBytes) {
2226                 storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips);
2227                 outFileSize += decompBytes;
2228                 strm.next_out = (Bytef*)ress->dstBuffer;
2229                 strm.avail_out = (uInt)ress->dstBufferSize;
2230             }
2231         }
2232         if (ret == Z_STREAM_END) break;
2233     }
2234 
2235     if (strm.avail_in > 0)
2236         memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
2237     ress->srcBufferLoaded = strm.avail_in;
2238     if ( (inflateEnd(&strm) != Z_OK)  /* release resources ; error detected */
2239       && (decodingError==0) ) {
2240         DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName);
2241         decodingError = 1;
2242     }
2243     FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
2244     return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
2245 }
2246 #endif
2247 
2248 
2249 #ifdef ZSTD_LZMADECOMPRESS
2250 static unsigned long long
FIO_decompressLzmaFrame(dRess_t * ress,FILE * srcFile,const FIO_prefs_t * const prefs,const char * srcFileName,int plain_lzma)2251 FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
2252                         const FIO_prefs_t* const prefs,
2253                         const char* srcFileName, int plain_lzma)
2254 {
2255     unsigned long long outFileSize = 0;
2256     lzma_stream strm = LZMA_STREAM_INIT;
2257     lzma_action action = LZMA_RUN;
2258     lzma_ret initRet;
2259     int decodingError = 0;
2260     unsigned storedSkips = 0;
2261 
2262     strm.next_in = 0;
2263     strm.avail_in = 0;
2264     if (plain_lzma) {
2265         initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */
2266     } else {
2267         initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */
2268     }
2269 
2270     if (initRet != LZMA_OK) {
2271         DISPLAYLEVEL(1, "zstd: %s: %s error %d \n",
2272                         plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder",
2273                         srcFileName, initRet);
2274         return FIO_ERROR_FRAME_DECODING;
2275     }
2276 
2277     strm.next_out = (BYTE*)ress->dstBuffer;
2278     strm.avail_out = ress->dstBufferSize;
2279     strm.next_in = (BYTE const*)ress->srcBuffer;
2280     strm.avail_in = ress->srcBufferLoaded;
2281 
2282     for ( ; ; ) {
2283         lzma_ret ret;
2284         if (strm.avail_in == 0) {
2285             ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
2286             if (ress->srcBufferLoaded == 0) action = LZMA_FINISH;
2287             strm.next_in = (BYTE const*)ress->srcBuffer;
2288             strm.avail_in = ress->srcBufferLoaded;
2289         }
2290         ret = lzma_code(&strm, action);
2291 
2292         if (ret == LZMA_BUF_ERROR) {
2293             DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName);
2294             decodingError = 1; break;
2295         }
2296         if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
2297             DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n",
2298                             srcFileName, ret);
2299             decodingError = 1; break;
2300         }
2301         {   size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
2302             if (decompBytes) {
2303                 storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips);
2304                 outFileSize += decompBytes;
2305                 strm.next_out = (BYTE*)ress->dstBuffer;
2306                 strm.avail_out = ress->dstBufferSize;
2307         }   }
2308         if (ret == LZMA_STREAM_END) break;
2309     }
2310 
2311     if (strm.avail_in > 0)
2312         memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
2313     ress->srcBufferLoaded = strm.avail_in;
2314     lzma_end(&strm);
2315     FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
2316     return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
2317 }
2318 #endif
2319 
2320 #ifdef ZSTD_LZ4DECOMPRESS
2321 static unsigned long long
FIO_decompressLz4Frame(dRess_t * ress,FILE * srcFile,const FIO_prefs_t * const prefs,const char * srcFileName)2322 FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile,
2323                        const FIO_prefs_t* const prefs,
2324                        const char* srcFileName)
2325 {
2326     unsigned long long filesize = 0;
2327     LZ4F_errorCode_t nextToLoad;
2328     LZ4F_decompressionContext_t dCtx;
2329     LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
2330     int decodingError = 0;
2331     unsigned storedSkips = 0;
2332 
2333     if (LZ4F_isError(errorCode)) {
2334         DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n");
2335         return FIO_ERROR_FRAME_DECODING;
2336     }
2337 
2338     /* Init feed with magic number (already consumed from FILE* sFile) */
2339     {   size_t inSize = 4;
2340         size_t outSize= 0;
2341         MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER);
2342         nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL);
2343         if (LZ4F_isError(nextToLoad)) {
2344             DISPLAYLEVEL(1, "zstd: %s: lz4 header error : %s \n",
2345                             srcFileName, LZ4F_getErrorName(nextToLoad));
2346             LZ4F_freeDecompressionContext(dCtx);
2347             return FIO_ERROR_FRAME_DECODING;
2348     }   }
2349 
2350     /* Main Loop */
2351     for (;nextToLoad;) {
2352         size_t readSize;
2353         size_t pos = 0;
2354         size_t decodedBytes = ress->dstBufferSize;
2355 
2356         /* Read input */
2357         if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize;
2358         readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile);
2359         if (!readSize) break;   /* reached end of file or stream */
2360 
2361         while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) {  /* still to read, or still to flush */
2362             /* Decode Input (at least partially) */
2363             size_t remaining = readSize - pos;
2364             decodedBytes = ress->dstBufferSize;
2365             nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL);
2366             if (LZ4F_isError(nextToLoad)) {
2367                 DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n",
2368                                 srcFileName, LZ4F_getErrorName(nextToLoad));
2369                 decodingError = 1; nextToLoad = 0; break;
2370             }
2371             pos += remaining;
2372 
2373             /* Write Block */
2374             if (decodedBytes) {
2375                 storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decodedBytes, prefs, storedSkips);
2376                 filesize += decodedBytes;
2377                 DISPLAYUPDATE(2, "\rDecompressed : %u MB  ", (unsigned)(filesize>>20));
2378             }
2379 
2380             if (!nextToLoad) break;
2381         }
2382     }
2383     /* can be out because readSize == 0, which could be an fread() error */
2384     if (ferror(srcFile)) {
2385         DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName);
2386         decodingError=1;
2387     }
2388 
2389     if (nextToLoad!=0) {
2390         DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName);
2391         decodingError=1;
2392     }
2393 
2394     LZ4F_freeDecompressionContext(dCtx);
2395     ress->srcBufferLoaded = 0; /* LZ4F will reach exact frame boundary */
2396     FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
2397 
2398     return decodingError ? FIO_ERROR_FRAME_DECODING : filesize;
2399 }
2400 #endif
2401 
2402 
2403 
2404 /** FIO_decompressFrames() :
2405  *  Find and decode frames inside srcFile
2406  *  srcFile presumed opened and valid
2407  * @return : 0 : OK
2408  *           1 : error
2409  */
FIO_decompressFrames(FIO_ctx_t * const fCtx,dRess_t ress,FILE * srcFile,const FIO_prefs_t * const prefs,const char * dstFileName,const char * srcFileName)2410 static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
2411                           dRess_t ress, FILE* srcFile,
2412                           const FIO_prefs_t* const prefs,
2413                           const char* dstFileName, const char* srcFileName)
2414 {
2415     unsigned readSomething = 0;
2416     unsigned long long filesize = 0;
2417     assert(srcFile != NULL);
2418 
2419     /* for each frame */
2420     for ( ; ; ) {
2421         /* check magic number -> version */
2422         size_t const toRead = 4;
2423         const BYTE* const buf = (const BYTE*)ress.srcBuffer;
2424         if (ress.srcBufferLoaded < toRead)  /* load up to 4 bytes for header */
2425             ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded,
2426                                           (size_t)1, toRead - ress.srcBufferLoaded, srcFile);
2427         if (ress.srcBufferLoaded==0) {
2428             if (readSomething==0) {  /* srcFile is empty (which is invalid) */
2429                 DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName);
2430                 return 1;
2431             }  /* else, just reached frame boundary */
2432             break;   /* no more input */
2433         }
2434         readSomething = 1;   /* there is at least 1 byte in srcFile */
2435         if (ress.srcBufferLoaded < toRead) {
2436             DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName);
2437             return 1;
2438         }
2439         if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) {
2440             unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, srcFile, prefs, srcFileName, filesize);
2441             if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2442             filesize += frameSize;
2443         } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
2444 #ifdef ZSTD_GZDECOMPRESS
2445             unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, prefs, srcFileName);
2446             if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2447             filesize += frameSize;
2448 #else
2449             DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName);
2450             return 1;
2451 #endif
2452         } else if ((buf[0] == 0xFD && buf[1] == 0x37)  /* xz magic number */
2453                 || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
2454 #ifdef ZSTD_LZMADECOMPRESS
2455             unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, prefs, srcFileName, buf[0] != 0xFD);
2456             if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2457             filesize += frameSize;
2458 #else
2459             DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName);
2460             return 1;
2461 #endif
2462         } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
2463 #ifdef ZSTD_LZ4DECOMPRESS
2464             unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, prefs, srcFileName);
2465             if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2466             filesize += frameSize;
2467 #else
2468             DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName);
2469             return 1;
2470 #endif
2471         } else if ((prefs->overwrite) && !strcmp (dstFileName, stdoutmark)) {  /* pass-through mode */
2472             return FIO_passThrough(prefs,
2473                                    ress.dstFile, srcFile,
2474                                    ress.srcBuffer, ress.srcBufferSize,
2475                                    ress.srcBufferLoaded);
2476         } else {
2477             DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName);
2478             return 1;
2479     }   }  /* for each frame */
2480 
2481     /* Final Status */
2482     fCtx->totalBytesOutput += (size_t)filesize;
2483     DISPLAYLEVEL(2, "\r%79s\r", "");
2484     /* No status message in pipe mode (stdin - stdout) or multi-files mode */
2485     if (g_display_prefs.displayLevel >= 2) {
2486         if (fCtx->nbFilesTotal <= 1 || g_display_prefs.displayLevel >= 3) {
2487             DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize);
2488         }
2489     }
2490 
2491     return 0;
2492 }
2493 
2494 /** FIO_decompressDstFile() :
2495     open `dstFileName`,
2496     or path-through if ress.dstFile is already != 0,
2497     then start decompression process (FIO_decompressFrames()).
2498     @return : 0 : OK
2499               1 : operation aborted
2500 */
FIO_decompressDstFile(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,dRess_t ress,FILE * srcFile,const char * dstFileName,const char * srcFileName)2501 static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
2502                                  FIO_prefs_t* const prefs,
2503                                  dRess_t ress, FILE* srcFile,
2504                                  const char* dstFileName, const char* srcFileName)
2505 {
2506     int result;
2507     stat_t statbuf;
2508     int transfer_permissions = 0;
2509     int releaseDstFile = 0;
2510 
2511     if ((ress.dstFile == NULL) && (prefs->testMode==0)) {
2512         releaseDstFile = 1;
2513 
2514         ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName);
2515         if (ress.dstFile==NULL) return 1;
2516 
2517         /* Must only be added after FIO_openDstFile() succeeds.
2518          * Otherwise we may delete the destination file if it already exists,
2519          * and the user presses Ctrl-C when asked if they wish to overwrite.
2520          */
2521         addHandler(dstFileName);
2522 
2523         if ( strcmp(srcFileName, stdinmark)   /* special case : don't transfer permissions from stdin */
2524           && UTIL_stat(srcFileName, &statbuf)
2525           && UTIL_isRegularFileStat(&statbuf) )
2526             transfer_permissions = 1;
2527     }
2528 
2529     result = FIO_decompressFrames(fCtx, ress, srcFile, prefs, dstFileName, srcFileName);
2530 
2531     if (releaseDstFile) {
2532         FILE* const dstFile = ress.dstFile;
2533         clearHandler();
2534         ress.dstFile = NULL;
2535         if (fclose(dstFile)) {
2536             DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
2537             result = 1;
2538         }
2539 
2540         if ( (result != 0)  /* operation failure */
2541           && strcmp(dstFileName, stdoutmark)  /* special case : don't remove() stdout */
2542           ) {
2543             FIO_removeFile(dstFileName);  /* remove decompression artefact; note: don't do anything special if remove() fails */
2544         } else if ( transfer_permissions /* file permissions correctly extracted from src */ ) {
2545             UTIL_setFileStat(dstFileName, &statbuf);  /* transfer file permissions from src into dst */
2546         }
2547     }
2548 
2549     return result;
2550 }
2551 
2552 
2553 /** FIO_decompressSrcFile() :
2554     Open `srcFileName`, transfer control to decompressDstFile()
2555     @return : 0 : OK
2556               1 : error
2557 */
FIO_decompressSrcFile(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,dRess_t ress,const char * dstFileName,const char * srcFileName)2558 static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName)
2559 {
2560     FILE* srcFile;
2561     int result;
2562 
2563     if (UTIL_isDirectory(srcFileName)) {
2564         DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
2565         return 1;
2566     }
2567 
2568     srcFile = FIO_openSrcFile(srcFileName);
2569     if (srcFile==NULL) return 1;
2570     ress.srcBufferLoaded = 0;
2571 
2572     result = FIO_decompressDstFile(fCtx, prefs, ress, srcFile, dstFileName, srcFileName);
2573 
2574     /* Close file */
2575     if (fclose(srcFile)) {
2576         DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));  /* error should not happen */
2577         return 1;
2578     }
2579     if ( prefs->removeSrcFile  /* --rm */
2580       && (result==0)      /* decompression successful */
2581       && strcmp(srcFileName, stdinmark) ) /* not stdin */ {
2582         /* We must clear the handler, since after this point calling it would
2583          * delete both the source and destination files.
2584          */
2585         clearHandler();
2586         if (FIO_removeFile(srcFileName)) {
2587             /* failed to remove src file */
2588             DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
2589             return 1;
2590     }   }
2591     return result;
2592 }
2593 
2594 
2595 
FIO_decompressFilename(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,const char * dstFileName,const char * srcFileName,const char * dictFileName)2596 int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs,
2597                            const char* dstFileName, const char* srcFileName,
2598                            const char* dictFileName)
2599 {
2600     dRess_t const ress = FIO_createDResources(prefs, dictFileName);
2601 
2602     int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
2603 
2604     FIO_freeDResources(ress);
2605     return decodingError;
2606 }
2607 
2608 static const char *suffixList[] = {
2609     ZSTD_EXTENSION,
2610     TZSTD_EXTENSION,
2611 #ifndef ZSTD_NODECOMPRESS
2612     ZSTD_ALT_EXTENSION,
2613 #endif
2614 #ifdef ZSTD_GZDECOMPRESS
2615     GZ_EXTENSION,
2616     TGZ_EXTENSION,
2617 #endif
2618 #ifdef ZSTD_LZMADECOMPRESS
2619     LZMA_EXTENSION,
2620     XZ_EXTENSION,
2621     TXZ_EXTENSION,
2622 #endif
2623 #ifdef ZSTD_LZ4DECOMPRESS
2624     LZ4_EXTENSION,
2625     TLZ4_EXTENSION,
2626 #endif
2627     NULL
2628 };
2629 
2630 static const char *suffixListStr =
2631     ZSTD_EXTENSION "/" TZSTD_EXTENSION
2632 #ifdef ZSTD_GZDECOMPRESS
2633     "/" GZ_EXTENSION "/" TGZ_EXTENSION
2634 #endif
2635 #ifdef ZSTD_LZMADECOMPRESS
2636     "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION
2637 #endif
2638 #ifdef ZSTD_LZ4DECOMPRESS
2639     "/" LZ4_EXTENSION "/" TLZ4_EXTENSION
2640 #endif
2641 ;
2642 
2643 /* FIO_determineDstName() :
2644  * create a destination filename from a srcFileName.
2645  * @return a pointer to it.
2646  * @return == NULL if there is an error */
2647 static const char*
FIO_determineDstName(const char * srcFileName,const char * outDirName)2648 FIO_determineDstName(const char* srcFileName, const char* outDirName)
2649 {
2650     static size_t dfnbCapacity = 0;
2651     static char* dstFileNameBuffer = NULL;   /* using static allocation : this function cannot be multi-threaded */
2652     size_t dstFileNameEndPos;
2653     char* outDirFilename = NULL;
2654     const char* dstSuffix = "";
2655     size_t dstSuffixLen = 0;
2656 
2657     size_t sfnSize = strlen(srcFileName);
2658 
2659     size_t srcSuffixLen;
2660     const char* const srcSuffix = strrchr(srcFileName, '.');
2661     if (srcSuffix == NULL) {
2662         DISPLAYLEVEL(1,
2663             "zstd: %s: unknown suffix (%s expected). "
2664             "Can't derive the output file name. "
2665             "Specify it with -o dstFileName. Ignoring.\n",
2666             srcFileName, suffixListStr);
2667         return NULL;
2668     }
2669     srcSuffixLen = strlen(srcSuffix);
2670 
2671     {
2672         const char** matchedSuffixPtr;
2673         for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) {
2674             if (!strcmp(*matchedSuffixPtr, srcSuffix)) {
2675                 break;
2676             }
2677         }
2678 
2679         /* check suffix is authorized */
2680         if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) {
2681             DISPLAYLEVEL(1,
2682                 "zstd: %s: unknown suffix (%s expected). "
2683                 "Can't derive the output file name. "
2684                 "Specify it with -o dstFileName. Ignoring.\n",
2685                 srcFileName, suffixListStr);
2686             return NULL;
2687         }
2688 
2689         if ((*matchedSuffixPtr)[1] == 't') {
2690             dstSuffix = ".tar";
2691             dstSuffixLen = strlen(dstSuffix);
2692         }
2693     }
2694 
2695     if (outDirName) {
2696         outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
2697         sfnSize = strlen(outDirFilename);
2698         assert(outDirFilename != NULL);
2699     }
2700 
2701     if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) {
2702         /* allocate enough space to write dstFilename into it */
2703         free(dstFileNameBuffer);
2704         dfnbCapacity = sfnSize + 20;
2705         dstFileNameBuffer = (char*)malloc(dfnbCapacity);
2706         if (dstFileNameBuffer==NULL)
2707             EXM_THROW(74, "%s : not enough memory for dstFileName",
2708                       strerror(errno));
2709     }
2710 
2711     /* return dst name == src name truncated from suffix */
2712     assert(dstFileNameBuffer != NULL);
2713     dstFileNameEndPos = sfnSize - srcSuffixLen;
2714     if (outDirFilename) {
2715         memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos);
2716         free(outDirFilename);
2717     } else {
2718         memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos);
2719     }
2720 
2721     /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar"
2722      * extension on decompression. Also writes terminating null. */
2723     strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix);
2724     return dstFileNameBuffer;
2725 
2726     /* note : dstFileNameBuffer memory is not going to be free */
2727 }
2728 
2729 int
FIO_decompressMultipleFilenames(FIO_ctx_t * const fCtx,FIO_prefs_t * const prefs,const char ** srcNamesTable,const char * outMirroredRootDirName,const char * outDirName,const char * outFileName,const char * dictFileName)2730 FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx,
2731                                 FIO_prefs_t* const prefs,
2732                                 const char** srcNamesTable,
2733                                 const char* outMirroredRootDirName,
2734                                 const char* outDirName, const char* outFileName,
2735                                 const char* dictFileName)
2736 {
2737     int status;
2738     int error = 0;
2739     dRess_t ress = FIO_createDResources(prefs, dictFileName);
2740 
2741     if (outFileName) {
2742         if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
2743             FIO_freeDResources(ress);
2744             return 1;
2745         }
2746         if (!prefs->testMode) {
2747             ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName);
2748             if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName);
2749         }
2750         for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) {
2751             status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]);
2752             if (!status) fCtx->nbFilesProcessed++;
2753             error |= status;
2754         }
2755         if ((!prefs->testMode) && (fclose(ress.dstFile)))
2756             EXM_THROW(72, "Write error : %s : cannot properly close output file",
2757                         strerror(errno));
2758     } else {
2759         if (outMirroredRootDirName)
2760             UTIL_mirrorSourceFilesDirectories(srcNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
2761 
2762         for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) {   /* create dstFileName */
2763             const char* const srcFileName = srcNamesTable[fCtx->currFileIdx];
2764             const char* dstFileName = NULL;
2765             if (outMirroredRootDirName) {
2766                 char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
2767                 if (validMirroredDirName) {
2768                     dstFileName = FIO_determineDstName(srcFileName, validMirroredDirName);
2769                     free(validMirroredDirName);
2770                 } else {
2771                     DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot decompress '%s' into '%s'\n", srcFileName, outMirroredRootDirName);
2772                 }
2773             } else {
2774                 dstFileName = FIO_determineDstName(srcFileName, outDirName);
2775             }
2776             if (dstFileName == NULL) { error=1; continue; }
2777             status = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
2778             if (!status) fCtx->nbFilesProcessed++;
2779             error |= status;
2780         }
2781         if (outDirName)
2782             FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal);
2783     }
2784 
2785     if (fCtx->nbFilesProcessed >= 1  && fCtx->nbFilesTotal > 1 && fCtx->totalBytesOutput != 0)
2786         DISPLAYLEVEL(2, "%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput);
2787 
2788     FIO_freeDResources(ress);
2789     return error;
2790 }
2791 
2792 /* **************************************************************************
2793  *  .zst file info (--list command)
2794  ***************************************************************************/
2795 
2796 typedef struct {
2797     U64 decompressedSize;
2798     U64 compressedSize;
2799     U64 windowSize;
2800     int numActualFrames;
2801     int numSkippableFrames;
2802     int decompUnavailable;
2803     int usesCheck;
2804     U32 nbFiles;
2805 } fileInfo_t;
2806 
2807 typedef enum {
2808   info_success=0,
2809   info_frame_error=1,
2810   info_not_zstd=2,
2811   info_file_error=3,
2812   info_truncated_input=4,
2813 } InfoError;
2814 
2815 #define ERROR_IF(c,n,...) {             \
2816     if (c) {                           \
2817         DISPLAYLEVEL(1, __VA_ARGS__);  \
2818         DISPLAYLEVEL(1, " \n");        \
2819         return n;                      \
2820     }                                  \
2821 }
2822 
2823 static InfoError
FIO_analyzeFrames(fileInfo_t * info,FILE * const srcFile)2824 FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile)
2825 {
2826     /* begin analyzing frame */
2827     for ( ; ; ) {
2828         BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
2829         size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
2830         if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) {
2831             if ( feof(srcFile)
2832               && (numBytesRead == 0)
2833               && (info->compressedSize > 0)
2834               && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
2835                 unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile);
2836                 unsigned long long file_size = (unsigned long long) info->compressedSize;
2837                 ERROR_IF(file_position != file_size, info_truncated_input,
2838                   "Error: seeked to position %llu, which is beyond file size of %llu\n",
2839                   file_position,
2840                   file_size);
2841                 break;  /* correct end of file => success */
2842             }
2843             ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame");
2844             ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames");
2845         }
2846         {   U32 const magicNumber = MEM_readLE32(headerBuffer);
2847             /* Zstandard frame */
2848             if (magicNumber == ZSTD_MAGICNUMBER) {
2849                 ZSTD_frameHeader header;
2850                 U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
2851                 if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR
2852                   || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) {
2853                     info->decompUnavailable = 1;
2854                 } else {
2855                     info->decompressedSize += frameContentSize;
2856                 }
2857                 ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0,
2858                         info_frame_error, "Error: could not decode frame header");
2859                 info->windowSize = header.windowSize;
2860                 /* move to the end of the frame header */
2861                 {   size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
2862                     ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size");
2863                     ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0,
2864                             info_frame_error, "Error: could not move to end of frame header");
2865                 }
2866 
2867                 /* skip all blocks in the frame */
2868                 {   int lastBlock = 0;
2869                     do {
2870                         BYTE blockHeaderBuffer[3];
2871                         ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3,
2872                                 info_frame_error, "Error while reading block header");
2873                         {   U32 const blockHeader = MEM_readLE24(blockHeaderBuffer);
2874                             U32 const blockTypeID = (blockHeader >> 1) & 3;
2875                             U32 const isRLE = (blockTypeID == 1);
2876                             U32 const isWrongBlock = (blockTypeID == 3);
2877                             long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3);
2878                             ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type");
2879                             lastBlock = blockHeader & 1;
2880                             ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0,
2881                                     info_frame_error, "Error: could not skip to end of block");
2882                         }
2883                     } while (lastBlock != 1);
2884                 }
2885 
2886                 /* check if checksum is used */
2887                 {   BYTE const frameHeaderDescriptor = headerBuffer[4];
2888                     int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
2889                     if (contentChecksumFlag) {
2890                         info->usesCheck = 1;
2891                         ERROR_IF(fseek(srcFile, 4, SEEK_CUR) != 0,
2892                                 info_frame_error, "Error: could not skip past checksum");
2893                 }   }
2894                 info->numActualFrames++;
2895             }
2896             /* Skippable frame */
2897             else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
2898                 U32 const frameSize = MEM_readLE32(headerBuffer + 4);
2899                 long const seek = (long)(8 + frameSize - numBytesRead);
2900                 ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0,
2901                         info_frame_error, "Error: could not find end of skippable frame");
2902                 info->numSkippableFrames++;
2903             }
2904             /* unknown content */
2905             else {
2906                 return info_not_zstd;
2907             }
2908         }  /* magic number analysis */
2909     }  /* end analyzing frames */
2910     return info_success;
2911 }
2912 
2913 
2914 static InfoError
getFileInfo_fileConfirmed(fileInfo_t * info,const char * inFileName)2915 getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName)
2916 {
2917     InfoError status;
2918     FILE* const srcFile = FIO_openSrcFile(inFileName);
2919     ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName);
2920 
2921     info->compressedSize = UTIL_getFileSize(inFileName);
2922     status = FIO_analyzeFrames(info, srcFile);
2923 
2924     fclose(srcFile);
2925     info->nbFiles = 1;
2926     return status;
2927 }
2928 
2929 
2930 /** getFileInfo() :
2931  *  Reads information from file, stores in *info
2932  * @return : InfoError status
2933  */
2934 static InfoError
getFileInfo(fileInfo_t * info,const char * srcFileName)2935 getFileInfo(fileInfo_t* info, const char* srcFileName)
2936 {
2937     ERROR_IF(!UTIL_isRegularFile(srcFileName),
2938             info_file_error, "Error : %s is not a file", srcFileName);
2939     return getFileInfo_fileConfirmed(info, srcFileName);
2940 }
2941 
2942 
2943 static void
displayInfo(const char * inFileName,const fileInfo_t * info,int displayLevel)2944 displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel)
2945 {
2946     unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB);
2947     const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB";
2948     double const windowSizeUnit = (double)info->windowSize / unit;
2949     double const compressedSizeUnit = (double)info->compressedSize / unit;
2950     double const decompressedSizeUnit = (double)info->decompressedSize / unit;
2951     double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/(double)info->compressedSize;
2952     const char* const checkString = (info->usesCheck ? "XXH64" : "None");
2953     if (displayLevel <= 2) {
2954         if (!info->decompUnavailable) {
2955             DISPLAYOUT("%6d  %5d  %7.2f %2s  %9.2f %2s  %5.3f  %5s  %s\n",
2956                     info->numSkippableFrames + info->numActualFrames,
2957                     info->numSkippableFrames,
2958                     compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr,
2959                     ratio, checkString, inFileName);
2960         } else {
2961             DISPLAYOUT("%6d  %5d  %7.2f %2s                       %5s  %s\n",
2962                     info->numSkippableFrames + info->numActualFrames,
2963                     info->numSkippableFrames,
2964                     compressedSizeUnit, unitStr,
2965                     checkString, inFileName);
2966         }
2967     } else {
2968         DISPLAYOUT("%s \n", inFileName);
2969         DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
2970         if (info->numSkippableFrames)
2971             DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
2972         DISPLAYOUT("Window Size: %.2f %2s (%llu B)\n",
2973                    windowSizeUnit, unitStr,
2974                    (unsigned long long)info->windowSize);
2975         DISPLAYOUT("Compressed Size: %.2f %2s (%llu B)\n",
2976                     compressedSizeUnit, unitStr,
2977                     (unsigned long long)info->compressedSize);
2978         if (!info->decompUnavailable) {
2979             DISPLAYOUT("Decompressed Size: %.2f %2s (%llu B)\n",
2980                     decompressedSizeUnit, unitStr,
2981                     (unsigned long long)info->decompressedSize);
2982             DISPLAYOUT("Ratio: %.4f\n", ratio);
2983         }
2984         DISPLAYOUT("Check: %s\n", checkString);
2985         DISPLAYOUT("\n");
2986     }
2987 }
2988 
FIO_addFInfo(fileInfo_t fi1,fileInfo_t fi2)2989 static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2)
2990 {
2991     fileInfo_t total;
2992     memset(&total, 0, sizeof(total));
2993     total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames;
2994     total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames;
2995     total.compressedSize = fi1.compressedSize + fi2.compressedSize;
2996     total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize;
2997     total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable;
2998     total.usesCheck = fi1.usesCheck & fi2.usesCheck;
2999     total.nbFiles = fi1.nbFiles + fi2.nbFiles;
3000     return total;
3001 }
3002 
3003 static int
FIO_listFile(fileInfo_t * total,const char * inFileName,int displayLevel)3004 FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel)
3005 {
3006     fileInfo_t info;
3007     memset(&info, 0, sizeof(info));
3008     {   InfoError const error = getFileInfo(&info, inFileName);
3009         switch (error) {
3010             case info_frame_error:
3011                 /* display error, but provide output */
3012                 DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName);
3013                 break;
3014             case info_not_zstd:
3015                 DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName);
3016                 if (displayLevel > 2) DISPLAYOUT("\n");
3017                 return 1;
3018             case info_file_error:
3019                 /* error occurred while opening the file */
3020                 if (displayLevel > 2) DISPLAYOUT("\n");
3021                 return 1;
3022             case info_truncated_input:
3023                 DISPLAYOUT("File \"%s\" is truncated \n", inFileName);
3024                 if (displayLevel > 2) DISPLAYOUT("\n");
3025                 return 1;
3026             case info_success:
3027             default:
3028                 break;
3029         }
3030 
3031         displayInfo(inFileName, &info, displayLevel);
3032         *total = FIO_addFInfo(*total, info);
3033         assert(error == info_success || error == info_frame_error);
3034         return (int)error;
3035     }
3036 }
3037 
FIO_listMultipleFiles(unsigned numFiles,const char ** filenameTable,int displayLevel)3038 int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel)
3039 {
3040     /* ensure no specified input is stdin (needs fseek() capability) */
3041     {   unsigned u;
3042         for (u=0; u<numFiles;u++) {
3043             ERROR_IF(!strcmp (filenameTable[u], stdinmark),
3044                     1, "zstd: --list does not support reading from standard input");
3045     }   }
3046 
3047     if (numFiles == 0) {
3048         if (!IS_CONSOLE(stdin)) {
3049             DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n");
3050         }
3051         DISPLAYLEVEL(1, "No files given \n");
3052         return 1;
3053     }
3054 
3055     if (displayLevel <= 2) {
3056         DISPLAYOUT("Frames  Skips  Compressed  Uncompressed  Ratio  Check  Filename\n");
3057     }
3058     {   int error = 0;
3059         fileInfo_t total;
3060         memset(&total, 0, sizeof(total));
3061         total.usesCheck = 1;
3062         /* --list each file, and check for any error */
3063         {   unsigned u;
3064             for (u=0; u<numFiles;u++) {
3065                 error |= FIO_listFile(&total, filenameTable[u], displayLevel);
3066         }   }
3067         if (numFiles > 1 && displayLevel <= 2) {   /* display total */
3068             unsigned const unit = total.compressedSize < (1 MB) ? (1 KB) : (1 MB);
3069             const char* const unitStr = total.compressedSize < (1 MB) ? "KB" : "MB";
3070             double const compressedSizeUnit = (double)total.compressedSize / unit;
3071             double const decompressedSizeUnit = (double)total.decompressedSize / unit;
3072             double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/(double)total.compressedSize;
3073             const char* const checkString = (total.usesCheck ? "XXH64" : "");
3074             DISPLAYOUT("----------------------------------------------------------------- \n");
3075             if (total.decompUnavailable) {
3076                 DISPLAYOUT("%6d  %5d  %7.2f %2s                       %5s  %u files\n",
3077                         total.numSkippableFrames + total.numActualFrames,
3078                         total.numSkippableFrames,
3079                         compressedSizeUnit, unitStr,
3080                         checkString, (unsigned)total.nbFiles);
3081             } else {
3082                 DISPLAYOUT("%6d  %5d  %7.2f %2s  %9.2f %2s  %5.3f  %5s  %u files\n",
3083                         total.numSkippableFrames + total.numActualFrames,
3084                         total.numSkippableFrames,
3085                         compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr,
3086                         ratio, checkString, (unsigned)total.nbFiles);
3087         }   }
3088         return error;
3089     }
3090 }
3091 
3092 
3093 #endif /* #ifndef ZSTD_NODECOMPRESS */
3094