1 /*
2  * Copyright (c) 2016-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  */
9 #include "Options.h"
10 #include "util.h"
11 #include "utils/ScopeGuard.h"
12 
13 #include <algorithm>
14 #include <cassert>
15 #include <cstdio>
16 #include <cstring>
17 #include <iterator>
18 #include <thread>
19 #include <vector>
20 
21 
22 namespace pzstd {
23 
24 namespace {
defaultNumThreads()25 unsigned defaultNumThreads() {
26 #ifdef PZSTD_NUM_THREADS
27   return PZSTD_NUM_THREADS;
28 #else
29   return std::thread::hardware_concurrency();
30 #endif
31 }
32 
parseUnsigned(const char ** arg)33 unsigned parseUnsigned(const char **arg) {
34   unsigned result = 0;
35   while (**arg >= '0' && **arg <= '9') {
36     result *= 10;
37     result += **arg - '0';
38     ++(*arg);
39   }
40   return result;
41 }
42 
getArgument(const char * options,const char ** argv,int & i,int argc)43 const char *getArgument(const char *options, const char **argv, int &i,
44                         int argc) {
45   if (options[1] != 0) {
46     return options + 1;
47   }
48   ++i;
49   if (i == argc) {
50     std::fprintf(stderr, "Option -%c requires an argument, but none provided\n",
51                  *options);
52     return nullptr;
53   }
54   return argv[i];
55 }
56 
57 const std::string kZstdExtension = ".zst";
58 constexpr char kStdIn[] = "-";
59 constexpr char kStdOut[] = "-";
60 constexpr unsigned kDefaultCompressionLevel = 3;
61 constexpr unsigned kMaxNonUltraCompressionLevel = 19;
62 
63 #ifdef _WIN32
64 const char nullOutput[] = "nul";
65 #else
66 const char nullOutput[] = "/dev/null";
67 #endif
68 
notSupported(const char * option)69 void notSupported(const char *option) {
70   std::fprintf(stderr, "Operation not supported: %s\n", option);
71 }
72 
usage()73 void usage() {
74   std::fprintf(stderr, "Usage:\n");
75   std::fprintf(stderr, "  pzstd [args] [FILE(s)]\n");
76   std::fprintf(stderr, "Parallel ZSTD options:\n");
77   std::fprintf(stderr, "  -p, --processes   #    : number of threads to use for (de)compression (default:<numcpus>)\n");
78 
79   std::fprintf(stderr, "ZSTD options:\n");
80   std::fprintf(stderr, "  -#                     : # compression level (1-%d, default:%d)\n", kMaxNonUltraCompressionLevel, kDefaultCompressionLevel);
81   std::fprintf(stderr, "  -d, --decompress       : decompression\n");
82   std::fprintf(stderr, "  -o                file : result stored into `file` (only if 1 input file)\n");
83   std::fprintf(stderr, "  -f, --force            : overwrite output without prompting, (de)compress links\n");
84   std::fprintf(stderr, "      --rm               : remove source file(s) after successful (de)compression\n");
85   std::fprintf(stderr, "  -k, --keep             : preserve source file(s) (default)\n");
86   std::fprintf(stderr, "  -h, --help             : display help and exit\n");
87   std::fprintf(stderr, "  -V, --version          : display version number and exit\n");
88   std::fprintf(stderr, "  -v, --verbose          : verbose mode; specify multiple times to increase log level (default:2)\n");
89   std::fprintf(stderr, "  -q, --quiet            : suppress warnings; specify twice to suppress errors too\n");
90   std::fprintf(stderr, "  -c, --stdout           : force write to standard output, even if it is the console\n");
91 #ifdef UTIL_HAS_CREATEFILELIST
92   std::fprintf(stderr, "  -r                     : operate recursively on directories\n");
93 #endif
94   std::fprintf(stderr, "      --ultra            : enable levels beyond %i, up to %i (requires more memory)\n", kMaxNonUltraCompressionLevel, ZSTD_maxCLevel());
95   std::fprintf(stderr, "  -C, --check            : integrity check (default)\n");
96   std::fprintf(stderr, "      --no-check         : no integrity check\n");
97   std::fprintf(stderr, "  -t, --test             : test compressed file integrity\n");
98   std::fprintf(stderr, "  --                     : all arguments after \"--\" are treated as files\n");
99 }
100 } // anonymous namespace
101 
Options()102 Options::Options()
103     : numThreads(defaultNumThreads()), maxWindowLog(23),
104       compressionLevel(kDefaultCompressionLevel), decompress(false),
105       overwrite(false), keepSource(true), writeMode(WriteMode::Auto),
106       checksum(true), verbosity(2) {}
107 
parse(int argc,const char ** argv)108 Options::Status Options::parse(int argc, const char **argv) {
109   bool test = false;
110   bool recursive = false;
111   bool ultra = false;
112   bool forceStdout = false;
113   bool followLinks = false;
114   // Local copy of input files, which are pointers into argv.
115   std::vector<const char *> localInputFiles;
116   for (int i = 1; i < argc; ++i) {
117     const char *arg = argv[i];
118     // Protect against empty arguments
119     if (arg[0] == 0) {
120       continue;
121     }
122     // Everything after "--" is an input file
123     if (!std::strcmp(arg, "--")) {
124       ++i;
125       std::copy(argv + i, argv + argc, std::back_inserter(localInputFiles));
126       break;
127     }
128     // Long arguments that don't have a short option
129     {
130       bool isLongOption = true;
131       if (!std::strcmp(arg, "--rm")) {
132         keepSource = false;
133       } else if (!std::strcmp(arg, "--ultra")) {
134         ultra = true;
135         maxWindowLog = 0;
136       } else if (!std::strcmp(arg, "--no-check")) {
137         checksum = false;
138       } else if (!std::strcmp(arg, "--sparse")) {
139         writeMode = WriteMode::Sparse;
140         notSupported("Sparse mode");
141         return Status::Failure;
142       } else if (!std::strcmp(arg, "--no-sparse")) {
143         writeMode = WriteMode::Regular;
144         notSupported("Sparse mode");
145         return Status::Failure;
146       } else if (!std::strcmp(arg, "--dictID")) {
147         notSupported(arg);
148         return Status::Failure;
149       } else if (!std::strcmp(arg, "--no-dictID")) {
150         notSupported(arg);
151         return Status::Failure;
152       } else {
153         isLongOption = false;
154       }
155       if (isLongOption) {
156         continue;
157       }
158     }
159     // Arguments with a short option simply set their short option.
160     const char *options = nullptr;
161     if (!std::strcmp(arg, "--processes")) {
162       options = "p";
163     } else if (!std::strcmp(arg, "--version")) {
164       options = "V";
165     } else if (!std::strcmp(arg, "--help")) {
166       options = "h";
167     } else if (!std::strcmp(arg, "--decompress")) {
168       options = "d";
169     } else if (!std::strcmp(arg, "--force")) {
170       options = "f";
171     } else if (!std::strcmp(arg, "--stdout")) {
172       options = "c";
173     } else if (!std::strcmp(arg, "--keep")) {
174       options = "k";
175     } else if (!std::strcmp(arg, "--verbose")) {
176       options = "v";
177     } else if (!std::strcmp(arg, "--quiet")) {
178       options = "q";
179     } else if (!std::strcmp(arg, "--check")) {
180       options = "C";
181     } else if (!std::strcmp(arg, "--test")) {
182       options = "t";
183     } else if (arg[0] == '-' && arg[1] != 0) {
184       options = arg + 1;
185     } else {
186       localInputFiles.emplace_back(arg);
187       continue;
188     }
189     assert(options != nullptr);
190 
191     bool finished = false;
192     while (!finished && *options != 0) {
193       // Parse the compression level
194       if (*options >= '0' && *options <= '9') {
195         compressionLevel = parseUnsigned(&options);
196         continue;
197       }
198 
199       switch (*options) {
200       case 'h':
201       case 'H':
202         usage();
203         return Status::Message;
204       case 'V':
205         std::fprintf(stderr, "PZSTD version: %s.\n", ZSTD_VERSION_STRING);
206         return Status::Message;
207       case 'p': {
208         finished = true;
209         const char *optionArgument = getArgument(options, argv, i, argc);
210         if (optionArgument == nullptr) {
211           return Status::Failure;
212         }
213         if (*optionArgument < '0' || *optionArgument > '9') {
214           std::fprintf(stderr, "Option -p expects a number, but %s provided\n",
215                        optionArgument);
216           return Status::Failure;
217         }
218         numThreads = parseUnsigned(&optionArgument);
219         if (*optionArgument != 0) {
220           std::fprintf(stderr,
221                        "Option -p expects a number, but %u%s provided\n",
222                        numThreads, optionArgument);
223           return Status::Failure;
224         }
225         break;
226       }
227       case 'o': {
228         finished = true;
229         const char *optionArgument = getArgument(options, argv, i, argc);
230         if (optionArgument == nullptr) {
231           return Status::Failure;
232         }
233         outputFile = optionArgument;
234         break;
235       }
236       case 'C':
237         checksum = true;
238         break;
239       case 'k':
240         keepSource = true;
241         break;
242       case 'd':
243         decompress = true;
244         break;
245       case 'f':
246         overwrite = true;
247         forceStdout = true;
248         followLinks = true;
249         break;
250       case 't':
251         test = true;
252         decompress = true;
253         break;
254 #ifdef UTIL_HAS_CREATEFILELIST
255       case 'r':
256         recursive = true;
257         break;
258 #endif
259       case 'c':
260         outputFile = kStdOut;
261         forceStdout = true;
262         break;
263       case 'v':
264         ++verbosity;
265         break;
266       case 'q':
267         --verbosity;
268         // Ignore them for now
269         break;
270       // Unsupported options from Zstd
271       case 'D':
272       case 's':
273         notSupported("Zstd dictionaries.");
274         return Status::Failure;
275       case 'b':
276       case 'e':
277       case 'i':
278       case 'B':
279         notSupported("Zstd benchmarking options.");
280         return Status::Failure;
281       default:
282         std::fprintf(stderr, "Invalid argument: %s\n", arg);
283         return Status::Failure;
284       }
285       if (!finished) {
286         ++options;
287       }
288     } // while (*options != 0);
289   }   // for (int i = 1; i < argc; ++i);
290 
291   // Set options for test mode
292   if (test) {
293     outputFile = nullOutput;
294     keepSource = true;
295   }
296 
297   // Input file defaults to standard input if not provided.
298   if (localInputFiles.empty()) {
299     localInputFiles.emplace_back(kStdIn);
300   }
301 
302   // Check validity of input files
303   if (localInputFiles.size() > 1) {
304     const auto it = std::find(localInputFiles.begin(), localInputFiles.end(),
305                               std::string{kStdIn});
306     if (it != localInputFiles.end()) {
307       std::fprintf(
308           stderr,
309           "Cannot specify standard input when handling multiple files\n");
310       return Status::Failure;
311     }
312   }
313   if (localInputFiles.size() > 1 || recursive) {
314     if (!outputFile.empty() && outputFile != nullOutput) {
315       std::fprintf(
316           stderr,
317           "Cannot specify an output file when handling multiple inputs\n");
318       return Status::Failure;
319     }
320   }
321 
322   g_utilDisplayLevel = verbosity;
323   // Remove local input files that are symbolic links
324   if (!followLinks) {
325       std::remove_if(localInputFiles.begin(), localInputFiles.end(),
326                      [&](const char *path) {
327                         bool isLink = UTIL_isLink(path);
328                         if (isLink && verbosity >= 2) {
329                             std::fprintf(
330                                     stderr,
331                                     "Warning : %s is symbolic link, ignoring\n",
332                                     path);
333                         }
334                         return isLink;
335                     });
336   }
337 
338   // Translate input files/directories into files to (de)compress
339   if (recursive) {
340     FileNamesTable* const files = UTIL_createExpandedFNT(localInputFiles.data(), localInputFiles.size(), followLinks);
341     if (files == nullptr) {
342       std::fprintf(stderr, "Error traversing directories\n");
343       return Status::Failure;
344     }
345     auto guard =
346         makeScopeGuard([&] { UTIL_freeFileNamesTable(files); });
347     if (files->tableSize == 0) {
348       std::fprintf(stderr, "No files found\n");
349       return Status::Failure;
350     }
351     inputFiles.resize(files->tableSize);
352     std::copy(files->fileNames, files->fileNames + files->tableSize, inputFiles.begin());
353   } else {
354     inputFiles.resize(localInputFiles.size());
355     std::copy(localInputFiles.begin(), localInputFiles.end(),
356               inputFiles.begin());
357   }
358   localInputFiles.clear();
359   assert(!inputFiles.empty());
360 
361   // If reading from standard input, default to standard output
362   if (inputFiles[0] == kStdIn && outputFile.empty()) {
363     assert(inputFiles.size() == 1);
364     outputFile = "-";
365   }
366 
367   if (inputFiles[0] == kStdIn && IS_CONSOLE(stdin)) {
368     assert(inputFiles.size() == 1);
369     std::fprintf(stderr, "Cannot read input from interactive console\n");
370     return Status::Failure;
371   }
372   if (outputFile == "-" && IS_CONSOLE(stdout) && !(forceStdout && decompress)) {
373     std::fprintf(stderr, "Will not write to console stdout unless -c or -f is "
374                          "specified and decompressing\n");
375     return Status::Failure;
376   }
377 
378   // Check compression level
379   {
380     unsigned maxCLevel =
381         ultra ? ZSTD_maxCLevel() : kMaxNonUltraCompressionLevel;
382     if (compressionLevel > maxCLevel || compressionLevel == 0) {
383       std::fprintf(stderr, "Invalid compression level %u.\n", compressionLevel);
384       return Status::Failure;
385     }
386   }
387 
388   // Check that numThreads is set
389   if (numThreads == 0) {
390     std::fprintf(stderr, "Invalid arguments: # of threads not specified "
391                          "and unable to determine hardware concurrency.\n");
392     return Status::Failure;
393   }
394 
395   // Modify verbosity
396   // If we are piping input and output, turn off interaction
397   if (inputFiles[0] == kStdIn && outputFile == kStdOut && verbosity == 2) {
398     verbosity = 1;
399   }
400   // If we are in multi-file mode, turn off interaction
401   if (inputFiles.size() > 1 && verbosity == 2) {
402     verbosity = 1;
403   }
404 
405   return Status::Success;
406 }
407 
getOutputFile(const std::string & inputFile) const408 std::string Options::getOutputFile(const std::string &inputFile) const {
409   if (!outputFile.empty()) {
410     return outputFile;
411   }
412   // Attempt to add/remove zstd extension from the input file
413   if (decompress) {
414     int stemSize = inputFile.size() - kZstdExtension.size();
415     if (stemSize > 0 && inputFile.substr(stemSize) == kZstdExtension) {
416       return inputFile.substr(0, stemSize);
417     } else {
418       return "";
419     }
420   } else {
421     return inputFile + kZstdExtension;
422   }
423 }
424 }
425