1 /* jdupes (C) 2015-2020 Jody Bruchon <jody@jodybruchon.com>
2    Forked from fdupes 1.51 (C) 1999-2014 Adrian Lopez
3 
4    Permission is hereby granted, free of charge, to any person
5    obtaining a copy of this software and associated documentation files
6    (the "Software"), to deal in the Software without restriction,
7    including without limitation the rights to use, copy, modify, merge,
8    publish, distribute, sublicense, and/or sell copies of the Software,
9    and to permit persons to whom the Software is furnished to do so,
10    subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be
13    included in all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
22 
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <strings.h>
28 #include <sys/types.h>
29 #include <fcntl.h>
30 #include <dirent.h>
31 #include <signal.h>
32 #include <unistd.h>
33 #include <stdlib.h>
34 #include <stdint.h>
35 #include <inttypes.h>
36 #ifndef OMIT_GETOPT_LONG
37  #include <getopt.h>
38 #endif
39 #include <string.h>
40 #include <errno.h>
41 #include <libgen.h>
42 #include <time.h>
43 #include <sys/time.h>
44 #include "jdupes.h"
45 #include "xxhash.h"
46 #include "oom.h"
47 #ifdef ENABLE_DEDUPE
48 #include <sys/utsname.h>
49 #endif
50 
51 /* Jody Bruchon's helpful functions */
52 #include "string_malloc.h"
53 #include "jody_sort.h"
54 #include "jody_win_unicode.h"
55 #include "jody_cacheinfo.h"
56 #include "jody_strtoepoch.h"
57 #include "version.h"
58 
59 /* Headers for post-scanning actions */
60 #include "act_deletefiles.h"
61 #include "act_dedupefiles.h"
62 #include "act_linkfiles.h"
63 #include "act_printmatches.h"
64 #include "act_printjson.h"
65 #include "act_summarize.h"
66 
67 /* Detect Windows and modify as needed */
68 #if defined _WIN32 || defined __CYGWIN__
69  const char dir_sep = '\\';
70  #ifdef UNICODE
71   const wchar_t *FILE_MODE_RO = L"rbS";
72  #else
73   const char *FILE_MODE_RO = "rbS";
74  #endif /* UNICODE */
75 
76 #else /* Not Windows */
77  const char *FILE_MODE_RO = "rb";
78  const char dir_sep = '/';
79  #ifdef UNICODE
80   #error Do not define UNICODE on non-Windows platforms.
81   #undef UNICODE
82  #endif
83 #endif /* _WIN32 || __CYGWIN__ */
84 
85 /* Windows + Unicode compilation */
86 #ifdef UNICODE
87 static wpath_t wname, wstr;
88 int out_mode = _O_TEXT;
89 int err_mode = _O_TEXT;
90 #endif /* UNICODE */
91 
92 #ifndef NO_SYMLINKS
93 #include "jody_paths.h"
94 #endif
95 
96 /* Behavior modification flags (a=action, p=-P) */
97 uint_fast32_t flags = 0, a_flags = 0, p_flags = 0;
98 
99 static const char *program_name;
100 
101 /* Stat and SIGUSR */
102 #ifdef ON_WINDOWS
103  struct winstat s;
104 #else
105  struct stat s;
106  static int usr1_toggle = 0;
107 #endif
108 
109 /* Larger chunk size makes large files process faster but uses more RAM */
110 #define MIN_CHUNK_SIZE 4096
111 #define MAX_CHUNK_SIZE 16777216
112 #ifndef CHUNK_SIZE
113  #define CHUNK_SIZE 65536
114 #endif
115 #ifndef PARTIAL_HASH_SIZE
116  #define PARTIAL_HASH_SIZE 4096
117 #endif
118 
119 static size_t auto_chunk_size = CHUNK_SIZE;
120 
121 /* Maximum path buffer size to use; must be large enough for a path plus
122  * any work that might be done to the array it's stored in. PATH_MAX is
123  * not always true. Read this article on the false promises of PATH_MAX:
124  * http://insanecoding.blogspot.com/2007/11/pathmax-simply-isnt.html
125  * Windows + Unicode needs a lot more space than UTF-8 in Linux/Mac OS X
126  */
127 #ifndef PATHBUF_SIZE
128 #define PATHBUF_SIZE 4096
129 #endif
130 /* Refuse to build if PATHBUF_SIZE is too small */
131 #if PATHBUF_SIZE < PATH_MAX
132 #error "PATHBUF_SIZE can't be less than PATH_MAX"
133 #endif
134 
135 /* Size suffixes - this gets exported */
136 const struct size_suffix size_suffix[] = {
137   /* Byte (someone may actually try to use this) */
138   { "b", 1 },
139   { "k", 1024 },
140   { "kib", 1024 },
141   { "m", 1048576 },
142   { "mib", 1048576 },
143   { "g", (uint64_t)1048576 * 1024 },
144   { "gib", (uint64_t)1048576 * 1024 },
145   { "t", (uint64_t)1048576 * 1048576 },
146   { "tib", (uint64_t)1048576 * 1048576 },
147   { "p", (uint64_t)1048576 * 1048576 * 1024},
148   { "pib", (uint64_t)1048576 * 1048576 * 1024},
149   { "e", (uint64_t)1048576 * 1048576 * 1048576},
150   { "eib", (uint64_t)1048576 * 1048576 * 1048576},
151   /* Decimal suffixes */
152   { "kb", 1000 },
153   { "mb", 1000000 },
154   { "gb", 1000000000 },
155   { "tb", 1000000000000 },
156   { "pb", 1000000000000000 },
157   { "eb", 1000000000000000000 },
158   { NULL, 0 },
159 };
160 
161 /* Assemble extension string from compile-time options */
162 const char *extensions[] = {
163   #ifdef ON_WINDOWS
164     "windows",
165     #endif
166     #ifdef UNICODE
167     "unicode",
168     #endif
169     #ifdef OMIT_GETOPT_LONG
170     "nolong",
171     #endif
172     #ifdef __FAST_MATH__
173     "fastmath",
174     #endif
175     #ifdef DEBUG
176     "debug",
177     #endif
178     #ifdef LOUD_DEBUG
179     "loud",
180     #endif
181     #ifdef ENABLE_DEDUPE
182     "fsdedup",
183     #endif
184     #ifdef LOW_MEMORY
185     "lowmem",
186     #endif
187     #ifdef SMA_PAGE_SIZE
188     "smapage",
189     #endif
190     #ifdef NO_PERMS
191     "noperm",
192     #endif
193     #ifdef NO_HARDLINKS
194     "nohardlink",
195     #endif
196     #ifdef NO_SYMLINKS
197     "nosymlink",
198     #endif
199     #ifdef NO_USER_ORDER
200     "nouserorder",
201     #endif
202     NULL
203 };
204 
205 /* Tree to track each directory traversed */
206 struct travdone {
207   struct travdone *left;
208   struct travdone *right;
209   jdupes_ino_t inode;
210   dev_t device;
211 };
212 static struct travdone *travdone_head = NULL;
213 
214 /* Extended filter tree head and static tag list */
215 struct extfilter *extfilter_head = NULL;
216 const struct extfilter_tags extfilter_tags[] = {
217   { "noext",	XF_EXCL_EXT },
218   { "onlyext",	XF_ONLY_EXT },
219   { "size+",	XF_SIZE_GT },
220   { "size-",	XF_SIZE_LT },
221   { "size+=",	XF_SIZE_GTEQ },
222   { "size-=",	XF_SIZE_LTEQ },
223   { "size=",	XF_SIZE_EQ },
224   { "nostr",	XF_EXCL_STR },
225   { "onlystr",	XF_ONLY_STR },
226   { "newer",	XF_DATE_NEWER },
227   { "older",	XF_DATE_OLDER },
228   { NULL, 0 },
229 };
230 
231 /* Required for progress indicator code */
232 static uintmax_t filecount = 0;
233 static uintmax_t progress = 0, item_progress = 0, dupecount = 0;
234 /* Number of read loops before checking progress indicator */
235 #define CHECK_MINIMUM 256
236 
237 /* Hash/compare performance statistics (debug mode) */
238 #ifdef DEBUG
239 static unsigned int small_file = 0, partial_hash = 0, partial_elim = 0;
240 static unsigned int full_hash = 0, partial_to_full = 0, hash_fail = 0;
241 static uintmax_t comparisons = 0;
242 static unsigned int left_branch = 0, right_branch = 0;
243  #ifdef ON_WINDOWS
244   #ifndef NO_HARDLINKS
245 static unsigned int hll_exclude = 0;
246   #endif
247  #endif
248 #endif /* DEBUG */
249 
250 #ifdef TREE_DEPTH_STATS
251 static unsigned int tree_depth = 0;
252 static unsigned int max_depth = 0;
253 #endif
254 
255 /* File tree head */
256 static filetree_t *checktree = NULL;
257 
258 /* Directory/file parameter position counter */
259 static unsigned int user_item_count = 1;
260 
261 /* registerfile() direction options */
262 enum tree_direction { NONE, LEFT, RIGHT };
263 
264 /* Sort order reversal */
265 static int sort_direction = 1;
266 
267 /* Signal handler */
268 static int interrupt = 0;
269 
270 /* Progress indicator time */
271 struct timeval time1, time2;
272 
273 /* For path name mangling */
274 char tempname[PATHBUF_SIZE * 2];
275 
276 /* Compare two hashes like memcmp() */
277 #define HASH_COMPARE(a,b) ((a > b) ? 1:((a == b) ? 0:-1))
278 
279 static void help_text_extfilter(void);
280 
281 /***** End definitions, begin code *****/
282 
283 /* Catch CTRL-C and either notify or terminate */
sighandler(const int signum)284 void sighandler(const int signum)
285 {
286   (void)signum;
287   if (interrupt || !ISFLAG(flags, F_SOFTABORT)) {
288     fprintf(stderr, "\n");
289     string_malloc_destroy();
290     exit(EXIT_FAILURE);
291   }
292   interrupt = 1;
293   return;
294 }
295 
296 
297 #ifndef ON_WINDOWS
sigusr1(const int signum)298 void sigusr1(const int signum)
299 {
300   (void)signum;
301   if (!ISFLAG(flags, F_SOFTABORT)) {
302     SETFLAG(flags, F_SOFTABORT);
303     usr1_toggle = 1;
304   } else {
305     CLEARFLAG(flags, F_SOFTABORT);
306     usr1_toggle = 2;
307   }
308   return;
309 }
310 #endif
311 
312 
313 /* De-allocate on exit */
clean_exit(void)314 void clean_exit(void)
315 {
316 #ifndef SMA_PASSTHROUGH
317   string_malloc_destroy();
318 #endif
319   return;
320 }
321 
322 
323 /* Null pointer failure */
nullptr(const char * restrict func)324 extern void nullptr(const char * restrict func)
325 {
326   static const char n[] = "(NULL)";
327   if (func == NULL) func = n;
328   fprintf(stderr, "\ninternal error: NULL pointer passed to %s\n", func);
329   string_malloc_destroy();
330   exit(EXIT_FAILURE);
331 }
332 
333 
cloneargs(const int argc,char ** argv)334 static inline char **cloneargs(const int argc, char **argv)
335 {
336   static int x;
337   static char **args;
338 
339   args = (char **)string_malloc(sizeof(char *) * (unsigned int)argc);
340   if (args == NULL) oom("cloneargs() start");
341 
342   for (x = 0; x < argc; x++) {
343     args[x] = (char *)string_malloc(strlen(argv[x]) + 1);
344     if (args[x] == NULL) oom("cloneargs() loop");
345     strcpy(args[x], argv[x]);
346   }
347 
348   return args;
349 }
350 
351 
findarg(const char * const arg,const int start,const int argc,char ** argv)352 static int findarg(const char * const arg, const int start,
353                 const int argc, char **argv)
354 {
355   int x;
356 
357   for (x = start; x < argc; x++)
358     if (strcmp(argv[x], arg) == 0)
359       return x;
360 
361   return x;
362 }
363 
364 /* Find the first non-option argument after specified option. */
nonoptafter(const char * option,const int argc,char ** oldargv,char ** newargv)365 static int nonoptafter(const char *option, const int argc,
366                 char **oldargv, char **newargv)
367 {
368   int x;
369   int targetind;
370   int testind;
371   int startat = 1;
372 
373   targetind = findarg(option, 1, argc, oldargv);
374 
375   for (x = optind; x < argc; x++) {
376     testind = findarg(newargv[x], startat, argc, oldargv);
377     if (testind > targetind) return x;
378     else startat = testind;
379   }
380 
381   return x;
382 }
383 
384 
385 /* Update progress indicator if requested */
update_progress(const char * const restrict msg,const int file_percent)386 static void update_progress(const char * const restrict msg, const int file_percent)
387 {
388   static int did_fpct = 0;
389 
390   /* The caller should be doing this anyway...but don't trust that they did */
391   if (ISFLAG(flags, F_HIDEPROGRESS)) return;
392 
393   gettimeofday(&time2, NULL);
394 
395   if (progress == 0 || time2.tv_sec > time1.tv_sec) {
396     fprintf(stderr, "\rProgress [%" PRIuMAX "/%" PRIuMAX ", %" PRIuMAX " pairs matched] %" PRIuMAX "%%",
397       progress, filecount, dupecount, (progress * 100) / filecount);
398     if (file_percent > -1 && msg != NULL) {
399       fprintf(stderr, "  (%s: %d%%)         ", msg, file_percent);
400       did_fpct = 1;
401     } else if (did_fpct != 0) {
402       fprintf(stderr, "                     ");
403       did_fpct = 0;
404     }
405     fflush(stderr);
406   }
407   time1.tv_sec = time2.tv_sec;
408 #ifndef ON_WINDOWS
409   /* Notify of change to soft abort status if SIGUSR1 received */
410   if (usr1_toggle != 0) {
411     fprintf(stderr, "\njdupes received a USR1 signal; soft abort (-Z) is now %s\n", usr1_toggle == 1 ? "ON" : "OFF" );
412     usr1_toggle = 0;
413   }
414 #endif
415   return;
416 }
417 
418 
419 /***** Add new functions here *****/
420 
421 
422 /* Does a file have one of these comma-separated extensions?
423  * Returns 1 after any match, 0 if no matches */
match_extensions(char * path,const char * extlist)424 int match_extensions(char *path, const char *extlist)
425 {
426   char *dot;
427   const char *ext;
428   size_t len, extlen;
429 
430   LOUD(fprintf(stderr, "match_extensions('%s', '%s')\n", path, extlist);)
431   if (path == NULL || extlist == NULL) nullptr("match_extensions");
432 
433   dot = NULL;
434   /* Scan to end of path, save the last dot, reset on path separators */
435   while (*path != '\0') {
436     if (*path == '.') dot = path;
437     if (*path == '/' || *path == '\\') dot = NULL;
438     path++;
439   }
440   /* No dots in the file name = no extension, so give up now */
441   if (dot == NULL) return 0;
442   dot++;
443   /* Handle a dot at the end of a file name */
444   if (*dot == '\0') return 0;
445 
446   /* Get the length of the file's extension for later checking */
447   extlen = strlen(dot);
448   LOUD(fprintf(stderr, "match_extensions: file has extension '%s' with length %ld\n", dot, extlen);)
449 
450   /* dot is now at the location of the last file extension; check the list */
451   /* Skip any commas at the start of the list */
452   while (*extlist == ',') extlist++;
453   ext = extlist;
454   len = 0;
455   while (1) {
456     /* Reject upon hitting the end with no more extensions to process */
457     if (*extlist == '\0' && len == 0) return 0;
458     /* Process extension once a comma or EOL is hit */
459     if (*extlist == ',' || *extlist == '\0') {
460       /* Skip serial commas */
461       while (*extlist == ',') extlist++;
462       if (extlist == ext)  goto skip_empty;
463       if (strncasecmp(dot, ext, len) == 0 && extlen == len) {
464         LOUD(fprintf(stderr, "match_extensions: matched on extension '%s' (len %ld)\n", dot, len);)
465         return 1;
466       }
467       LOUD(fprintf(stderr, "match_extensions: no match: '%s' (%ld), '%s' (%ld)\n", dot, len, ext, extlen);)
468 skip_empty:
469       ext = extlist;
470       len = 0;
471       continue;
472     }
473     extlist++; len++;
474     /* LOUD(fprintf(stderr, "match_extensions: DEBUG: '%s' : '%s' (%ld), '%s' (%ld)\n", extlist, dot, len, ext, extlen);) */
475   }
476   return 0;
477 }
478 
479 
480 /* Check file's stat() info to make sure nothing has changed
481  * Returns 1 if changed, 0 if not changed, negative if error */
file_has_changed(file_t * const restrict file)482 extern int file_has_changed(file_t * const restrict file)
483 {
484   /* If -t/--nochangecheck specified then completely bypass this code */
485   if (ISFLAG(flags, F_NOCHANGECHECK)) return 0;
486 
487   if (file == NULL || file->d_name == NULL) nullptr("file_has_changed()");
488   LOUD(fprintf(stderr, "file_has_changed('%s')\n", file->d_name);)
489 
490   if (!ISFLAG(file->flags, FF_VALID_STAT)) return -66;
491 
492   if (STAT(file->d_name, &s) != 0) return -2;
493   if (file->inode != s.st_ino) return 1;
494   if (file->size != s.st_size) return 1;
495   if (file->device != s.st_dev) return 1;
496   if (file->mtime != s.st_mtime) return 1;
497   if (file->mode != s.st_mode) return 1;
498 #ifndef NO_PERMS
499   if (file->uid != s.st_uid) return 1;
500   if (file->gid != s.st_gid) return 1;
501 #endif
502 #ifndef NO_SYMLINKS
503   if (lstat(file->d_name, &s) != 0) return -3;
504   if ((S_ISLNK(s.st_mode) > 0) ^ ISFLAG(file->flags, FF_IS_SYMLINK)) return 1;
505 #endif
506 
507   return 0;
508 }
509 
510 
getfilestats(file_t * const restrict file)511 extern inline int getfilestats(file_t * const restrict file)
512 {
513   if (file == NULL || file->d_name == NULL) nullptr("getfilestats()");
514   LOUD(fprintf(stderr, "getfilestats('%s')\n", file->d_name);)
515 
516   /* Don't stat the same file more than once */
517   if (ISFLAG(file->flags, FF_VALID_STAT)) return 0;
518   SETFLAG(file->flags, FF_VALID_STAT);
519 
520   if (STAT(file->d_name, &s) != 0) return -1;
521   file->inode = s.st_ino;
522   file->size = s.st_size;
523   file->device = s.st_dev;
524   file->mtime = s.st_mtime;
525   file->mode = s.st_mode;
526 #ifndef NO_HARDLINKS
527   file->nlink = s.st_nlink;
528 #endif
529 #ifndef NO_PERMS
530   file->uid = s.st_uid;
531   file->gid = s.st_gid;
532 #endif
533 #ifndef NO_SYMLINKS
534   if (lstat(file->d_name, &s) != 0) return -1;
535   if (S_ISLNK(s.st_mode) > 0) SETFLAG(file->flags, FF_IS_SYMLINK);
536 #endif
537   return 0;
538 }
539 
540 
add_extfilter(const char * option)541 static void add_extfilter(const char *option)
542 {
543   char *opt, *p;
544   time_t tt;
545   struct extfilter *extf = extfilter_head;
546   const struct extfilter_tags *tags = extfilter_tags;
547   const struct size_suffix *ss = size_suffix;
548 
549   if (option == NULL) nullptr("add_extfilter()");
550 
551   LOUD(fprintf(stderr, "add_extfilter '%s'\n", option);)
552 
553   /* Invoke help text if requested */
554   if (strcasecmp(option, "help") == 0) { help_text_extfilter(); exit(EXIT_SUCCESS); }
555 
556   opt = string_malloc(strlen(option) + 1);
557   if (opt == NULL) oom("add_extfilter option");
558   strcpy(opt, option);
559   p = opt;
560 
561   while (*p != ':' && *p != '\0') p++;
562 
563   /* Split tag string into *opt (tag) and *p (value) */
564   if (*p == ':') {
565     *p = '\0';
566     p++;
567   }
568 
569   while (tags->tag != NULL && strcmp(tags->tag, opt) != 0) tags++;
570   if (tags->tag == NULL) goto error_bad_filter;
571 
572   /* Check for a tag that requires a value */
573   if (tags->flags & XF_REQ_VALUE && *p == '\0') goto error_value_missing;
574 
575   /* *p is now at the value, NOT the tag string! */
576 
577   if (extfilter_head != NULL) {
578     /* Add to end of exclusion stack if head is present */
579     while (extf->next != NULL) extf = extf->next;
580     extf->next = string_malloc(sizeof(struct extfilter) + strlen(p) + 1);
581     if (extf->next == NULL) oom("add_extfilter alloc");
582     extf = extf->next;
583   } else {
584     /* Allocate extfilter_head if no exclusions exist yet */
585     extfilter_head = string_malloc(sizeof(struct extfilter) + strlen(p) + 1);
586     if (extfilter_head == NULL) oom("add_extfilter alloc");
587     extf = extfilter_head;
588   }
589 
590   /* Set tag value from predefined tag array */
591   extf->flags = tags->flags;
592 
593   /* Initialize the new extfilter element */
594   extf->next = NULL;
595   if (extf->flags & XF_REQ_NUMBER) {
596     /* Exclude uses a number; handle it with possible suffixes */
597     *(extf->param) = '\0';
598     /* Get base size */
599     if (*p < '0' || *p > '9') goto error_bad_size_suffix;
600     extf->size = strtoll(p, &p, 10);
601     /* Handle suffix, if any */
602     if (*p != '\0') {
603       while (ss->suffix != NULL && strcasecmp(ss->suffix, p) != 0) ss++;
604       if (ss->suffix == NULL) goto error_bad_size_suffix;
605       extf->size *= ss->multiplier;
606     }
607   } else if (extf->flags & XF_REQ_DATE) {
608     *(extf->param) = '\0';
609     tt = strtoepoch(p);
610     LOUD(fprintf(stderr, "extfilter: jody_strtoepoch: '%s' -> %ld\n", p, tt);)
611     if (tt == -1) goto error_bad_time;
612     extf->size = tt;
613   } else {
614     /* Exclude uses string data; just copy it */
615     extf->size = 0;
616     if (*p != '\0') strcpy(extf->param, p);
617     else *(extf->param) = '\0';
618   }
619 
620   LOUD(fprintf(stderr, "Added extfilter: tag '%s', data '%s', size %lld, flags %d\n", opt, extf->param, (long long)extf->size, extf->flags);)
621   string_free(opt);
622   return;
623 
624 error_bad_time:
625   fprintf(stderr, "Invalid extfilter date[time] was specified: -X filter:datetime\n");
626   help_text_extfilter();
627   exit(EXIT_FAILURE);
628 error_value_missing:
629   fprintf(stderr, "extfilter value missing or invalid: -X filter:value\n");
630   help_text_extfilter();
631   exit(EXIT_FAILURE);
632 error_bad_filter:
633   fprintf(stderr, "Invalid extfilter filter name was specified\n");
634   help_text_extfilter();
635   exit(EXIT_FAILURE);
636 error_bad_size_suffix:
637   fprintf(stderr, "Invalid extfilter size suffix specified; use B or KMGTPE[i][B]\n");
638   help_text_extfilter();
639   exit(EXIT_FAILURE);
640 }
641 
642 
643 /* Returns -1 if stat() fails, 0 if it's a directory, 1 if it's not */
getdirstats(const char * const restrict name,jdupes_ino_t * const restrict inode,dev_t * const restrict dev,jdupes_mode_t * const restrict mode)644 extern int getdirstats(const char * const restrict name,
645         jdupes_ino_t * const restrict inode, dev_t * const restrict dev,
646         jdupes_mode_t * const restrict mode)
647 {
648   if (name == NULL || inode == NULL || dev == NULL) nullptr("getdirstats");
649   LOUD(fprintf(stderr, "getdirstats('%s', %p, %p)\n", name, (void *)inode, (void *)dev);)
650 
651   if (STAT(name, &s) != 0) return -1;
652   *inode = s.st_ino;
653   *dev = s.st_dev;
654   *mode = s.st_mode;
655   if (!S_ISDIR(s.st_mode)) return 1;
656   return 0;
657 }
658 
659 
660 /* Check a pair of files for match exclusion conditions
661  * Returns:
662  *  0 if all condition checks pass
663  * -1 or 1 on compare result less/more
664  * -2 on an absolute exclusion condition met
665  *  2 on an absolute match condition met
666  * -3 on exclusion due to isolation
667  * -4 on exclusion due to same filesystem
668  * -5 on exclusion due to permissions */
check_conditions(const file_t * const restrict file1,const file_t * const restrict file2)669 extern int check_conditions(const file_t * const restrict file1, const file_t * const restrict file2)
670 {
671   if (file1 == NULL || file2 == NULL || file1->d_name == NULL || file2->d_name == NULL) nullptr("check_conditions()");
672 
673   LOUD(fprintf(stderr, "check_conditions('%s', '%s')\n", file1->d_name, file2->d_name);)
674 
675   /* Exclude files that are not the same size */
676   if (file1->size > file2->size) {
677     LOUD(fprintf(stderr, "check_conditions: no match: size of file1 > file2 (%" PRIdMAX " > %" PRIdMAX ")\n",
678       (intmax_t)file1->size, (intmax_t)file2->size));
679     return -1;
680   }
681   if (file1->size < file2->size) {
682     LOUD(fprintf(stderr, "check_conditions: no match: size of file1 < file2 (%" PRIdMAX " < %"PRIdMAX ")\n",
683       (intmax_t)file1->size, (intmax_t)file2->size));
684     return 1;
685   }
686 
687 #ifndef NO_USER_ORDER
688   /* Exclude based on -I/--isolate */
689   if (ISFLAG(flags, F_ISOLATE) && (file1->user_order == file2->user_order)) {
690     LOUD(fprintf(stderr, "check_conditions: files ignored: parameter isolation\n"));
691     return -3;
692   }
693 #endif /* NO_USER_ORDER */
694 
695   /* Exclude based on -1/--one-file-system */
696   if (ISFLAG(flags, F_ONEFS) && (file1->device != file2->device)) {
697     LOUD(fprintf(stderr, "check_conditions: files ignored: not on same filesystem\n"));
698     return -4;
699   }
700 
701    /* Exclude files by permissions if requested */
702   if (ISFLAG(flags, F_PERMISSIONS) &&
703           (file1->mode != file2->mode
704 #ifndef NO_PERMS
705           || file1->uid != file2->uid
706           || file1->gid != file2->gid
707 #endif
708           )) {
709     return -5;
710     LOUD(fprintf(stderr, "check_conditions: no match: permissions/ownership differ (-p on)\n"));
711   }
712 
713   /* Hard link and symlink + '-s' check */
714 #ifndef NO_HARDLINKS
715   if ((file1->inode == file2->inode) && (file1->device == file2->device)) {
716     if (ISFLAG(flags, F_CONSIDERHARDLINKS)) {
717       LOUD(fprintf(stderr, "check_conditions: files match: hard/soft linked (-H on)\n"));
718       return 2;
719     } else {
720       LOUD(fprintf(stderr, "check_conditions: files ignored: hard/soft linked (-H off)\n"));
721       return -2;
722     }
723   }
724 #endif
725 
726   /* Fall through: all checks passed */
727   LOUD(fprintf(stderr, "check_conditions: all condition checks passed\n"));
728   return 0;
729 }
730 
731 
732 /* Check for exclusion conditions for a single file (1 = fail) */
check_singlefile(file_t * const restrict newfile)733 static int check_singlefile(file_t * const restrict newfile)
734 {
735   char * restrict tp = tempname;
736   int excluded;
737 
738   if (newfile == NULL) nullptr("check_singlefile()");
739 
740   LOUD(fprintf(stderr, "check_singlefile: checking '%s'\n", newfile->d_name));
741 
742   /* Exclude hidden files if requested */
743   if (ISFLAG(flags, F_EXCLUDEHIDDEN)) {
744     if (newfile->d_name == NULL) nullptr("check_singlefile newfile->d_name");
745     strcpy(tp, newfile->d_name);
746     tp = basename(tp);
747     if (tp[0] == '.' && strcmp(tp, ".") && strcmp(tp, "..")) {
748       LOUD(fprintf(stderr, "check_singlefile: excluding hidden file (-A on)\n"));
749       return 1;
750     }
751   }
752 
753   /* Get file information and check for validity */
754   const int i = getfilestats(newfile);
755   if (i || newfile->size == -1) {
756     LOUD(fprintf(stderr, "check_singlefile: excluding due to bad stat()\n"));
757     return 1;
758   }
759 
760   if (!S_ISDIR(newfile->mode)) {
761     /* Exclude zero-length files if requested */
762     if (newfile->size == 0 && !ISFLAG(flags, F_INCLUDEEMPTY)) {
763     LOUD(fprintf(stderr, "check_singlefile: excluding zero-length empty file (-z not set)\n"));
764     return 1;
765   }
766 
767     /* Exclude files based on exclusion stack size specs */
768     excluded = 0;
769     for (struct extfilter *extf = extfilter_head; extf != NULL; extf = extf->next) {
770       uint32_t sflag = extf->flags;
771       LOUD(fprintf(stderr, "check_singlefile: extfilter check: %08x %ld %ld %s\n", sflag, newfile->size, extf->size, newfile->d_name);)
772       if (
773            /* Any line that passes will result in file exclusion */
774            ((sflag == XF_SIZE_EQ) && (newfile->size != extf->size)) ||
775            ((sflag == XF_SIZE_LTEQ) && (newfile->size > extf->size)) ||
776            ((sflag == XF_SIZE_GTEQ) && (newfile->size < extf->size)) ||
777            ((sflag == XF_SIZE_GT) && (newfile->size <= extf->size)) ||
778            ((sflag == XF_SIZE_LT) && (newfile->size >= extf->size)) ||
779            ((sflag == XF_EXCL_EXT) && match_extensions(newfile->d_name, extf->param)) ||
780            ((sflag == XF_ONLY_EXT) && !match_extensions(newfile->d_name, extf->param)) ||
781            ((sflag == XF_EXCL_STR) && strstr(newfile->d_name, extf->param)) ||
782            ((sflag == XF_ONLY_STR) && !strstr(newfile->d_name, extf->param)) ||
783            ((sflag == XF_DATE_NEWER) && (newfile->mtime < extf->size)) ||
784            ((sflag == XF_DATE_OLDER) && (newfile->mtime >= extf->size))
785       ) excluded = 1;
786     }
787     if (excluded) {
788       LOUD(fprintf(stderr, "check_singlefile: excluding based on an extfilter option\n"));
789       return 1;
790     }
791   }
792 
793 #ifdef ON_WINDOWS
794   /* Windows has a 1023 (+1) hard link limit. If we're hard linking,
795    * ignore all files that have hit this limit */
796  #ifndef NO_HARDLINKS
797   if (ISFLAG(a_flags, FA_HARDLINKFILES) && newfile->nlink >= 1024) {
798   #ifdef DEBUG
799     hll_exclude++;
800   #endif
801     LOUD(fprintf(stderr, "check_singlefile: excluding due to Windows 1024 hard link limit\n"));
802     return 1;
803   }
804  #endif /* NO_HARDLINKS */
805 #endif /* ON_WINDOWS */
806   LOUD(fprintf(stderr, "check_singlefile: all checks passed\n"));
807   return 0;
808 }
809 
810 
init_newfile(const size_t len,file_t * restrict * const restrict filelistp)811 static file_t *init_newfile(const size_t len, file_t * restrict * const restrict filelistp)
812 {
813   file_t * const restrict newfile = (file_t *)string_malloc(sizeof(file_t));
814 
815   if (!newfile) oom("init_newfile() file structure");
816   if (!filelistp) nullptr("init_newfile() filelistp");
817 
818   LOUD(fprintf(stderr, "init_newfile(len %lu, filelistp %p)\n", len, filelistp));
819 
820   memset(newfile, 0, sizeof(file_t));
821   newfile->d_name = (char *)string_malloc(len);
822   if (!newfile->d_name) oom("init_newfile() filename");
823 
824   newfile->next = *filelistp;
825 #ifndef NO_USER_ORDER
826   newfile->user_order = user_item_count;
827 #endif
828   newfile->size = -1;
829   newfile->duplicates = NULL;
830   return newfile;
831 }
832 
833 
834 /* Create a new traversal check object and initialize its values */
travdone_alloc(const dev_t device,const jdupes_ino_t inode)835 static struct travdone *travdone_alloc(const dev_t device, const jdupes_ino_t inode)
836 {
837   struct travdone *trav;
838 
839   LOUD(fprintf(stderr, "travdone_alloc(%" PRIdMAX ", %" PRIdMAX ")\n", (intmax_t)inode, (intmax_t)device);)
840 
841   trav = (struct travdone *)string_malloc(sizeof(struct travdone));
842   if (trav == NULL) {
843     LOUD(fprintf(stderr, "travdone_alloc: malloc failed\n");)
844     return NULL;
845   }
846   trav->left = NULL;
847   trav->right = NULL;
848   trav->inode = inode;
849   trav->device = device;
850   LOUD(fprintf(stderr, "travdone_alloc returned %p\n", (void *)trav);)
851   return trav;
852 }
853 
854 
855 /* De-allocate the travdone tree */
travdone_free(struct travdone * const restrict cur)856 static void travdone_free(struct travdone * const restrict cur)
857 {
858   LOUD(fprintf(stderr, "travdone_free(%p)\n", cur);)
859   if (cur == NULL) return;
860   if (cur->left != NULL) travdone_free(cur->left);
861   if (cur->right != NULL) travdone_free(cur->right);
862   string_free(cur);
863   return;
864 }
865 
866 
867 /* Check to see if device:inode pair has already been traversed */
traverse_check(const dev_t device,const jdupes_ino_t inode)868 static int traverse_check(const dev_t device, const jdupes_ino_t inode)
869 {
870   struct travdone *traverse = travdone_head;
871 
872   if (travdone_head == NULL) {
873     travdone_head = travdone_alloc(device, inode);
874     if (travdone_head == NULL) return 2;
875   } else {
876     traverse = travdone_head;
877     while (1) {
878       if (traverse == NULL) nullptr("traverse_check()");
879       /* Don't re-traverse directories we've already seen */
880       if (inode == traverse->inode && device == traverse->device) {
881         LOUD(fprintf(stderr, "traverse_check: already seen: %ld:%ld\n", device,inode);)
882         return 1;
883       } else if (inode > traverse->inode || (inode == traverse->inode && device > traverse->device)) {
884         /* Traverse right */
885         if (traverse->right == NULL) {
886           LOUD(fprintf(stderr, "traverse item right: %ld:%ld\n", device, inode);)
887           traverse->right = travdone_alloc(device, inode);
888           if (traverse->right == NULL) return 2;
889           break;
890         }
891         traverse = traverse->right;
892         continue;
893       } else {
894         /* Traverse left */
895         if (traverse->left == NULL) {
896           LOUD(fprintf(stderr, "traverse item left %ld,%ld\n", device, inode);)
897           traverse->left = travdone_alloc(device, inode);
898           if (traverse->left == NULL) return 2;
899           break;
900         }
901         traverse = traverse->left;
902         continue;
903       }
904     }
905   }
906   return 0;
907 }
908 
909 
910 /* This is disabled until a check is in place to make it safe */
911 #if 0
912 /* Add a single file to the file tree */
913 static inline file_t *grokfile(const char * const restrict name, file_t * restrict * const restrict filelistp)
914 {
915   file_t * restrict newfile;
916 
917   if (!name || !filelistp) nullptr("grokfile()");
918   LOUD(fprintf(stderr, "grokfile: '%s' %p\n", name, filelistp));
919 
920   /* Allocate the file_t and the d_name entries */
921   newfile = init_newfile(strlen(name) + 2, filelistp);
922 
923   strcpy(newfile->d_name, name);
924 
925   /* Single-file [l]stat() and exclusion condition check */
926   if (check_singlefile(newfile) != 0) {
927     LOUD(fprintf(stderr, "grokfile: check_singlefile rejected file\n"));
928     string_free(newfile->d_name);
929     string_free(newfile);
930     return NULL;
931   }
932   return newfile;
933 }
934 #endif
935 
936 
937 /* Load a directory's contents into the file tree, recursing as needed */
grokdir(const char * const restrict dir,file_t * restrict * const restrict filelistp,int recurse)938 static void grokdir(const char * const restrict dir,
939                 file_t * restrict * const restrict filelistp,
940                 int recurse)
941 {
942   file_t * restrict newfile;
943   struct dirent *dirinfo;
944   static int grokdir_level = 0;
945   size_t dirlen;
946   int i, single = 0;
947   jdupes_ino_t inode;
948   dev_t device, n_device;
949   jdupes_mode_t mode;
950 #ifdef UNICODE
951   WIN32_FIND_DATA ffd;
952   HANDLE hFind = INVALID_HANDLE_VALUE;
953   char *p;
954 #else
955   DIR *cd;
956 #endif
957 
958   if (dir == NULL || filelistp == NULL) nullptr("grokdir()");
959   LOUD(fprintf(stderr, "grokdir: scanning '%s' (order %d, recurse %d)\n", dir, user_item_count, recurse));
960 
961   /* Get directory stats (or file stats if it's a file) */
962   i = getdirstats(dir, &inode, &device, &mode);
963   if (i < 0) goto error_travdone;
964   /* if dir is actually a file, just add it to the file tree */
965   if (i == 1) {
966 /* Single file addition is disabled for now because there is no safeguard
967  * against the file being compared against itself if it's added in both a
968  * recursion and explicitly on the command line. */
969 #if 0
970     LOUD(fprintf(stderr, "grokdir -> grokfile '%s'\n", dir));
971     newfile = grokfile(dir, filelistp);
972     if (newfile == NULL) {
973       LOUD(fprintf(stderr, "grokfile rejected '%s'\n", dir));
974       return;
975     }
976     single = 1;
977     goto add_single_file;
978 #endif
979     fprintf(stderr, "\nFile specs on command line disabled in this version for safety\n");
980     fprintf(stderr, "This should be restored (and safe) in a future release\n");
981     fprintf(stderr, "See https://github.com/jbruchon/jdupes or email jody@jodybruchon.com\n");
982     return; /* Remove when single file is restored */
983   }
984 
985   /* Double traversal prevention tree */
986   if (!ISFLAG(flags, F_NOTRAVCHECK)) {
987     i = traverse_check(device, inode);
988     if (i == 1) return;
989     if (i == 2) goto error_travdone;
990   }
991 
992   item_progress++;
993   grokdir_level++;
994 
995 #ifdef UNICODE
996   /* Windows requires \* at the end of directory names */
997   strncpy(tempname, dir, PATHBUF_SIZE * 2 - 1);
998   dirlen = strlen(tempname) - 1;
999   p = tempname + dirlen;
1000   if (*p == '/' || *p == '\\') *p = '\0';
1001   strncat(tempname, "\\*", PATHBUF_SIZE * 2 - 1);
1002 
1003   if (!M2W(tempname, wname)) goto error_cd;
1004 
1005   LOUD(fprintf(stderr, "FindFirstFile: %s\n", dir));
1006   hFind = FindFirstFileW(wname, &ffd);
1007   if (hFind == INVALID_HANDLE_VALUE) { LOUD(fprintf(stderr, "\nfile handle bad\n")); goto error_cd; }
1008   LOUD(fprintf(stderr, "Loop start\n"));
1009   do {
1010     char * restrict tp = tempname;
1011     size_t d_name_len;
1012 
1013     /* Get necessary length and allocate d_name */
1014     dirinfo = (struct dirent *)string_malloc(sizeof(struct dirent));
1015     if (!W2M(ffd.cFileName, dirinfo->d_name)) continue;
1016 #else
1017   cd = opendir(dir);
1018   if (!cd) goto error_cd;
1019 
1020   while ((dirinfo = readdir(cd)) != NULL) {
1021     char * restrict tp = tempname;
1022     size_t d_name_len;
1023 #endif /* UNICODE */
1024 
1025     LOUD(fprintf(stderr, "grokdir: readdir: '%s'\n", dirinfo->d_name));
1026     if (!strcmp(dirinfo->d_name, ".") || !strcmp(dirinfo->d_name, "..")) continue;
1027     if (!ISFLAG(flags, F_HIDEPROGRESS)) {
1028       gettimeofday(&time2, NULL);
1029       if (progress == 0 || time2.tv_sec > time1.tv_sec) {
1030         fprintf(stderr, "\rScanning: %" PRIuMAX " files, %" PRIuMAX " dirs (in %u specified)",
1031             progress, item_progress, user_item_count);
1032       }
1033       time1.tv_sec = time2.tv_sec;
1034     }
1035 
1036     /* Assemble the file's full path name, optimized to avoid strcat() */
1037     dirlen = strlen(dir);
1038     d_name_len = strlen(dirinfo->d_name);
1039     memcpy(tp, dir, dirlen+1);
1040     if (dirlen != 0 && tp[dirlen-1] != dir_sep) {
1041       tp[dirlen] = dir_sep;
1042       dirlen++;
1043     }
1044     if (dirlen + d_name_len + 1 >= (PATHBUF_SIZE * 2)) goto error_overflow;
1045     tp += dirlen;
1046     memcpy(tp, dirinfo->d_name, d_name_len);
1047     tp += d_name_len;
1048     *tp = '\0';
1049     d_name_len++;
1050 
1051     /* Allocate the file_t and the d_name entries */
1052     newfile = init_newfile(dirlen + d_name_len + 2, filelistp);
1053 
1054     tp = tempname;
1055     memcpy(newfile->d_name, tp, dirlen + d_name_len);
1056 
1057     /*** WARNING: tempname global gets reused by check_singlefile here! ***/
1058 
1059     /* Single-file [l]stat() and exclusion condition check */
1060     if (check_singlefile(newfile) != 0) {
1061       LOUD(fprintf(stderr, "grokdir: check_singlefile rejected file\n"));
1062       string_free(newfile->d_name);
1063       string_free(newfile);
1064       continue;
1065     }
1066 
1067     /* Optionally recurse directories, including symlinked ones if requested */
1068     if (S_ISDIR(newfile->mode)) {
1069       if (recurse) {
1070         /* --one-file-system - WARNING: this clobbers inode/mode */
1071         if (ISFLAG(flags, F_ONEFS)
1072             && (getdirstats(newfile->d_name, &inode, &n_device, &mode) == 0)
1073             && (device != n_device)) {
1074           LOUD(fprintf(stderr, "grokdir: directory: not recursing (--one-file-system)\n"));
1075           string_free(newfile->d_name);
1076           string_free(newfile);
1077           continue;
1078         }
1079 #ifndef NO_SYMLINKS
1080         else if (ISFLAG(flags, F_FOLLOWLINKS) || !ISFLAG(newfile->flags, FF_IS_SYMLINK)) {
1081           LOUD(fprintf(stderr, "grokdir: directory(symlink): recursing (-r/-R)\n"));
1082           grokdir(newfile->d_name, filelistp, recurse);
1083         }
1084 #else
1085         else {
1086           LOUD(fprintf(stderr, "grokdir: directory: recursing (-r/-R)\n"));
1087           grokdir(newfile->d_name, filelistp, recurse);
1088         }
1089 #endif
1090       } else { LOUD(fprintf(stderr, "grokdir: directory: not recursing\n")); }
1091       string_free(newfile->d_name);
1092       string_free(newfile);
1093       continue;
1094     } else {
1095 //add_single_file:
1096       /* Add regular files to list, including symlink targets if requested */
1097 #ifndef NO_SYMLINKS
1098       if (!ISFLAG(newfile->flags, FF_IS_SYMLINK) || (ISFLAG(newfile->flags, FF_IS_SYMLINK) && ISFLAG(flags, F_FOLLOWLINKS))) {
1099 #else
1100       if (S_ISREG(newfile->mode)) {
1101 #endif
1102         *filelistp = newfile;
1103         filecount++;
1104         progress++;
1105 
1106       } else {
1107         LOUD(fprintf(stderr, "grokdir: not a regular file: %s\n", newfile->d_name);)
1108         string_free(newfile->d_name);
1109         string_free(newfile);
1110         if (single == 1) {
1111           single = 0;
1112           goto skip_single;
1113         }
1114         continue;
1115       }
1116     }
1117     /* Skip directory stuff if adding only a single file */
1118     if (single == 1) {
1119       single = 0;
1120       goto skip_single;
1121     }
1122   }
1123 
1124 #ifdef UNICODE
1125   while (FindNextFileW(hFind, &ffd) != 0);
1126   FindClose(hFind);
1127 #else
1128   closedir(cd);
1129 #endif
1130 
1131 skip_single:
1132   grokdir_level--;
1133   if (grokdir_level == 0 && !ISFLAG(flags, F_HIDEPROGRESS)) {
1134     fprintf(stderr, "\rScanning: %" PRIuMAX " files, %" PRIuMAX " items (in %u specified)",
1135             progress, item_progress, user_item_count);
1136   }
1137   return;
1138 
1139 error_travdone:
1140   fprintf(stderr, "\ncould not stat dir "); fwprint(stderr, dir, 1);
1141   return;
1142 error_cd:
1143   fprintf(stderr, "\ncould not chdir to "); fwprint(stderr, dir, 1);
1144   return;
1145 error_overflow:
1146   fprintf(stderr, "\nerror: a path buffer overflowed\n");
1147   exit(EXIT_FAILURE);
1148 }
1149 
1150 
1151 /* Hash part or all of a file */
1152 static jdupes_hash_t *get_filehash(const file_t * const restrict checkfile,
1153                 const size_t max_read)
1154 {
1155   off_t fsize;
1156   /* This is an array because we return a pointer to it */
1157   static jdupes_hash_t hash[1];
1158   static jdupes_hash_t *chunk = NULL;
1159   FILE *file;
1160   int check = 0;
1161   XXH64_state_t *xxhstate;
1162 
1163   if (checkfile == NULL || checkfile->d_name == NULL) nullptr("get_filehash()");
1164   LOUD(fprintf(stderr, "get_filehash('%s', %" PRIdMAX ")\n", checkfile->d_name, (intmax_t)max_read);)
1165 
1166   /* Allocate on first use */
1167   if (chunk == NULL) {
1168     chunk = (jdupes_hash_t *)string_malloc(auto_chunk_size);
1169     if (!chunk) oom("get_filehash() chunk");
1170   }
1171 
1172   /* Get the file size. If we can't read it, bail out early */
1173   if (checkfile->size == -1) {
1174     LOUD(fprintf(stderr, "get_filehash: not hashing because stat() info is bad\n"));
1175     return NULL;
1176   }
1177   fsize = checkfile->size;
1178 
1179   /* Do not read more than the requested number of bytes */
1180   if (max_read > 0 && fsize > (off_t)max_read)
1181     fsize = (off_t)max_read;
1182 
1183   /* Initialize the hash and file read parameters (with filehash_partial skipped)
1184    *
1185    * If we already hashed the first chunk of this file, we don't want to
1186    * wastefully read and hash it again, so skip the first chunk and use
1187    * the computed hash for that chunk as our starting point.
1188    */
1189 
1190   *hash = 0;
1191   if (ISFLAG(checkfile->flags, FF_HASH_PARTIAL)) {
1192     *hash = checkfile->filehash_partial;
1193     /* Don't bother going further if max_read is already fulfilled */
1194     if (max_read != 0 && max_read <= PARTIAL_HASH_SIZE) {
1195       LOUD(fprintf(stderr, "Partial hash size (%d) >= max_read (%" PRIuMAX "), not hashing anymore\n", PARTIAL_HASH_SIZE, (uintmax_t)max_read);)
1196       return hash;
1197     }
1198   }
1199   errno = 0;
1200 #ifdef UNICODE
1201   if (!M2W(checkfile->d_name, wstr)) file = NULL;
1202   else file = _wfopen(wstr, FILE_MODE_RO);
1203 #else
1204   file = fopen(checkfile->d_name, FILE_MODE_RO);
1205 #endif
1206   if (file == NULL) {
1207     fprintf(stderr, "\n%s error opening file ", strerror(errno)); fwprint(stderr, checkfile->d_name, 1);
1208     return NULL;
1209   }
1210   /* Actually seek past the first chunk if applicable
1211    * This is part of the filehash_partial skip optimization */
1212   if (ISFLAG(checkfile->flags, FF_HASH_PARTIAL)) {
1213     if (fseeko(file, PARTIAL_HASH_SIZE, SEEK_SET) == -1) {
1214       fclose(file);
1215       fprintf(stderr, "\nerror seeking in file "); fwprint(stderr, checkfile->d_name, 1);
1216       return NULL;
1217     }
1218     fsize -= PARTIAL_HASH_SIZE;
1219   }
1220 
1221   xxhstate = XXH64_createState();
1222   if (xxhstate == NULL) nullptr("xxhstate");
1223   XXH64_reset(xxhstate, 0);
1224 
1225   /* Read the file in CHUNK_SIZE chunks until we've read it all. */
1226   while (fsize > 0) {
1227     size_t bytes_to_read;
1228 
1229     if (interrupt) return 0;
1230     bytes_to_read = (fsize >= (off_t)auto_chunk_size) ? auto_chunk_size : (size_t)fsize;
1231     if (fread((void *)chunk, bytes_to_read, 1, file) != 1) {
1232       fprintf(stderr, "\nerror reading from file "); fwprint(stderr, checkfile->d_name, 1);
1233       fclose(file);
1234       return NULL;
1235     }
1236 
1237     XXH64_update(xxhstate, chunk, bytes_to_read);
1238 
1239     if ((off_t)bytes_to_read > fsize) break;
1240     else fsize -= (off_t)bytes_to_read;
1241 
1242     if (!ISFLAG(flags, F_HIDEPROGRESS)) {
1243       check++;
1244       if (check > CHECK_MINIMUM) {
1245         update_progress("hashing", (int)(((checkfile->size - fsize) * 100) / checkfile->size));
1246         check = 0;
1247       }
1248     }
1249   }
1250 
1251   fclose(file);
1252 
1253   *hash = XXH64_digest(xxhstate);
1254   XXH64_freeState(xxhstate);
1255 
1256   LOUD(fprintf(stderr, "get_filehash: returning hash: 0x%016jx\n", (uintmax_t)*hash));
1257   return hash;
1258 }
1259 
1260 
1261 static inline void registerfile(filetree_t * restrict * const restrict nodeptr,
1262                 const enum tree_direction d, file_t * const restrict file)
1263 {
1264   filetree_t * restrict branch;
1265 
1266   if (nodeptr == NULL || file == NULL || (d != NONE && *nodeptr == NULL)) nullptr("registerfile()");
1267   LOUD(fprintf(stderr, "registerfile(direction %d)\n", d));
1268 
1269   /* Allocate and initialize a new node for the file */
1270   branch = (filetree_t *)string_malloc(sizeof(filetree_t));
1271   if (branch == NULL) oom("registerfile() branch");
1272   branch->file = file;
1273   branch->left = NULL;
1274   branch->right = NULL;
1275 
1276   /* Attach the new node to the requested branch */
1277   switch (d) {
1278     case LEFT:
1279       (*nodeptr)->left = branch;
1280       break;
1281     case RIGHT:
1282       (*nodeptr)->right = branch;
1283       break;
1284     case NONE:
1285       /* For the root of the tree only */
1286       *nodeptr = branch;
1287       break;
1288     default:
1289       /* This should never ever happen */
1290       fprintf(stderr, "\ninternal error: invalid direction for registerfile(), report this\n");
1291       string_malloc_destroy();
1292       exit(EXIT_FAILURE);
1293       break;
1294   }
1295 
1296   return;
1297 }
1298 
1299 
1300 #ifdef TREE_DEPTH_STATS
1301 #define TREE_DEPTH_UPDATE_MAX() { if (max_depth < tree_depth) max_depth = tree_depth; tree_depth = 0; }
1302 #else
1303 #define TREE_DEPTH_UPDATE_MAX()
1304 #endif
1305 
1306 
1307 /* Check two files for a match */
1308 static file_t **checkmatch(filetree_t * restrict tree, file_t * const restrict file)
1309 {
1310   int cmpresult = 0;
1311   int cantmatch = 0;
1312   const jdupes_hash_t * restrict filehash;
1313 
1314   if (tree == NULL || file == NULL || tree->file == NULL || tree->file->d_name == NULL || file->d_name == NULL) nullptr("checkmatch()");
1315   LOUD(fprintf(stderr, "checkmatch ('%s', '%s')\n", tree->file->d_name, file->d_name));
1316 
1317   /* If device and inode fields are equal one of the files is a
1318    * hard link to the other or the files have been listed twice
1319    * unintentionally. We don't want to flag these files as
1320    * duplicates unless the user specifies otherwise. */
1321 
1322   /* Count the total number of comparisons requested */
1323   DBG(comparisons++;)
1324 
1325 /* If considering hard linked files as duplicates, they are
1326  * automatically duplicates without being read further since
1327  * they point to the exact same inode. If we aren't considering
1328  * hard links as duplicates, we just return NULL. */
1329 
1330   cmpresult = check_conditions(tree->file, file);
1331   switch (cmpresult) {
1332     case 2: return &tree->file;  /* linked files + -H switch */
1333     case -2: return NULL;  /* linked files, no -H switch */
1334     case -3:    /* user order */
1335     case -4:    /* one filesystem */
1336     case -5:    /* permissions */
1337         cantmatch = 1;
1338         cmpresult = 0;
1339         break;
1340     default: break;
1341   }
1342 
1343   /* Print pre-check (early) match candidates if requested */
1344   if (ISFLAG(p_flags, PF_EARLYMATCH)) printf("Early match check passed:\n   %s\n   %s\n\n", file->d_name, tree->file->d_name);
1345 
1346   /* If preliminary matching succeeded, do main file data checks */
1347   if (cmpresult == 0) {
1348     LOUD(fprintf(stderr, "checkmatch: starting file data comparisons\n"));
1349     /* Attempt to exclude files quickly with partial file hashing */
1350     if (!ISFLAG(tree->file->flags, FF_HASH_PARTIAL)) {
1351       filehash = get_filehash(tree->file, PARTIAL_HASH_SIZE);
1352       if (filehash == NULL) return NULL;
1353 
1354       tree->file->filehash_partial = *filehash;
1355       SETFLAG(tree->file->flags, FF_HASH_PARTIAL);
1356     }
1357 
1358     if (!ISFLAG(file->flags, FF_HASH_PARTIAL)) {
1359       filehash = get_filehash(file, PARTIAL_HASH_SIZE);
1360       if (filehash == NULL) return NULL;
1361 
1362       file->filehash_partial = *filehash;
1363       SETFLAG(file->flags, FF_HASH_PARTIAL);
1364     }
1365 
1366     cmpresult = HASH_COMPARE(file->filehash_partial, tree->file->filehash_partial);
1367     LOUD(if (!cmpresult) fprintf(stderr, "checkmatch: partial hashes match\n"));
1368     LOUD(if (cmpresult) fprintf(stderr, "checkmatch: partial hashes do not match\n"));
1369     DBG(partial_hash++;)
1370 
1371     /* Print partial hash matching pairs if requested */
1372     if (cmpresult == 0 && ISFLAG(p_flags, PF_PARTIAL))
1373       printf("Partial hashes match:\n   %s\n   %s\n\n", file->d_name, tree->file->d_name);
1374 
1375     if (file->size <= PARTIAL_HASH_SIZE || ISFLAG(flags, F_PARTIALONLY)) {
1376       if (ISFLAG(flags, F_PARTIALONLY)) { LOUD(fprintf(stderr, "checkmatch: partial only mode: treating partial hash as full hash\n")); }
1377       else { LOUD(fprintf(stderr, "checkmatch: small file: copying partial hash to full hash\n")); }
1378       /* filehash_partial = filehash if file is small enough */
1379       if (!ISFLAG(file->flags, FF_HASH_FULL)) {
1380         file->filehash = file->filehash_partial;
1381         SETFLAG(file->flags, FF_HASH_FULL);
1382         DBG(small_file++;)
1383       }
1384       if (!ISFLAG(tree->file->flags, FF_HASH_FULL)) {
1385         tree->file->filehash = tree->file->filehash_partial;
1386         SETFLAG(tree->file->flags, FF_HASH_FULL);
1387         DBG(small_file++;)
1388       }
1389     } else if (cmpresult == 0) {
1390 //      if (ISFLAG(flags, F_SKIPHASH)) {
1391 //        LOUD(fprintf(stderr, "checkmatch: skipping full file hashes (F_SKIPMATCH)\n"));
1392 //      } else {
1393         /* If partial match was correct, perform a full file hash match */
1394         if (!ISFLAG(tree->file->flags, FF_HASH_FULL)) {
1395           filehash = get_filehash(tree->file, 0);
1396           if (filehash == NULL) return NULL;
1397 
1398           tree->file->filehash = *filehash;
1399           SETFLAG(tree->file->flags, FF_HASH_FULL);
1400         }
1401 
1402         if (!ISFLAG(file->flags, FF_HASH_FULL)) {
1403           filehash = get_filehash(file, 0);
1404           if (filehash == NULL) return NULL;
1405 
1406           file->filehash = *filehash;
1407           SETFLAG(file->flags, FF_HASH_FULL);
1408         }
1409 
1410         /* Full file hash comparison */
1411         cmpresult = HASH_COMPARE(file->filehash, tree->file->filehash);
1412         LOUD(if (!cmpresult) fprintf(stderr, "checkmatch: full hashes match\n"));
1413         LOUD(if (cmpresult) fprintf(stderr, "checkmatch: full hashes do not match\n"));
1414         DBG(full_hash++);
1415 //      }
1416     } else {
1417       DBG(partial_elim++);
1418     }
1419   }  /* if (cmpresult == 0) */
1420 
1421   if ((cantmatch != 0) && (cmpresult == 0)) {
1422     LOUD(fprintf(stderr, "checkmatch: rejecting because match not allowed (cantmatch = 1)\n"));
1423     cmpresult = -1;
1424   }
1425 
1426   /* How the file tree works
1427    *
1428    * The tree is sorted by size as files arrive. If the files are the same
1429    * size, they are possible duplicates and are checked for duplication.
1430    * If they are not a match, the hashes are used to decide whether to
1431    * continue with the file to the left or the right in the file tree.
1432    * If the direction decision points to a leaf node, the duplicate scan
1433    * continues down that path; if it points to an empty node, the current
1434    * file is attached to the file tree at that point.
1435    *
1436    * This allows for quickly finding files of the same size by avoiding
1437    * tree branches with differing size groups.
1438    */
1439   if (cmpresult < 0) {
1440     if (tree->left != NULL) {
1441       LOUD(fprintf(stderr, "checkmatch: recursing tree: left\n"));
1442       DBG(left_branch++; tree_depth++;)
1443       return checkmatch(tree->left, file);
1444     } else {
1445       LOUD(fprintf(stderr, "checkmatch: registering file: left\n"));
1446       registerfile(&tree, LEFT, file);
1447       TREE_DEPTH_UPDATE_MAX();
1448       return NULL;
1449     }
1450   } else if (cmpresult > 0) {
1451     if (tree->right != NULL) {
1452       LOUD(fprintf(stderr, "checkmatch: recursing tree: right\n"));
1453       DBG(right_branch++; tree_depth++;)
1454       return checkmatch(tree->right, file);
1455     } else {
1456       LOUD(fprintf(stderr, "checkmatch: registering file: right\n"));
1457       registerfile(&tree, RIGHT, file);
1458       TREE_DEPTH_UPDATE_MAX();
1459       return NULL;
1460     }
1461   } else {
1462     /* All compares matched */
1463     DBG(partial_to_full++;)
1464     TREE_DEPTH_UPDATE_MAX();
1465     LOUD(fprintf(stderr, "checkmatch: files appear to match based on hashes\n"));
1466     if (ISFLAG(p_flags, PF_FULLHASH)) printf("Full hashes match:\n   %s\n   %s\n\n", file->d_name, tree->file->d_name);
1467     return &tree->file;
1468   }
1469   /* Fall through - should never be reached */
1470   return NULL;
1471 }
1472 
1473 
1474 /* Do a byte-by-byte comparison in case two different files produce the
1475    same signature. Unlikely, but better safe than sorry. */
1476 static inline int confirmmatch(FILE * const restrict file1, FILE * const restrict file2, const off_t size)
1477 {
1478   static char *c1 = NULL, *c2 = NULL;
1479   size_t r1, r2;
1480   off_t bytes = 0;
1481   int check = 0;
1482 
1483   if (file1 == NULL || file2 == NULL) nullptr("confirmmatch()");
1484   LOUD(fprintf(stderr, "confirmmatch running\n"));
1485 
1486   /* Allocate on first use; OOM if either is ever NULLed */
1487   if (!c1) {
1488     c1 = (char *)string_malloc(auto_chunk_size);
1489     c2 = (char *)string_malloc(auto_chunk_size);
1490   }
1491   if (!c1 || !c2) oom("confirmmatch() c1/c2");
1492 
1493   fseek(file1, 0, SEEK_SET);
1494   fseek(file2, 0, SEEK_SET);
1495 
1496   do {
1497     if (interrupt) return 0;
1498     r1 = fread(c1, sizeof(char), auto_chunk_size, file1);
1499     r2 = fread(c2, sizeof(char), auto_chunk_size, file2);
1500 
1501     if (r1 != r2) return 0; /* file lengths are different */
1502     if (memcmp (c1, c2, r1)) return 0; /* file contents are different */
1503 
1504     if (!ISFLAG(flags, F_HIDEPROGRESS)) {
1505       check++;
1506       bytes += (off_t)r1;
1507       if (check > CHECK_MINIMUM) {
1508         update_progress("confirm", (int)((bytes * 100) / size));
1509         check = 0;
1510       }
1511     }
1512   } while (r2);
1513 
1514   return 1;
1515 }
1516 
1517 
1518 /* Count the following statistics:
1519    - Maximum number of files in a duplicate set (length of longest dupe chain)
1520    - Number of non-zero-length files that have duplicates (if n_files != NULL)
1521    - Total number of duplicate file sets (groups) */
1522 extern unsigned int get_max_dupes(const file_t *files, unsigned int * const restrict max,
1523                 unsigned int * const restrict n_files) {
1524   unsigned int groups = 0;
1525 
1526   if (files == NULL || max == NULL) nullptr("get_max_dupes()");
1527   LOUD(fprintf(stderr, "get_max_dupes(%p, %p, %p)\n", (const void *)files, (void *)max, (void *)n_files));
1528 
1529   *max = 0;
1530   if (n_files) *n_files = 0;
1531 
1532   while (files) {
1533     unsigned int n_dupes;
1534     if (ISFLAG(files->flags, FF_HAS_DUPES)) {
1535       groups++;
1536       if (n_files && files->size) (*n_files)++;
1537       n_dupes = 1;
1538       for (file_t *curdupe = files->duplicates; curdupe; curdupe = curdupe->duplicates) n_dupes++;
1539       if (n_dupes > *max) *max = n_dupes;
1540     }
1541     files = files->next;
1542   }
1543   return groups;
1544 }
1545 
1546 
1547 #ifndef NO_USER_ORDER
1548 static int sort_pairs_by_param_order(file_t *f1, file_t *f2)
1549 {
1550   if (!ISFLAG(flags, F_USEPARAMORDER)) return 0;
1551   if (f1 == NULL || f2 == NULL) nullptr("sort_pairs_by_param_order()");
1552   if (f1->user_order < f2->user_order) return -sort_direction;
1553   if (f1->user_order > f2->user_order) return sort_direction;
1554   return 0;
1555 }
1556 #endif
1557 
1558 
1559 static int sort_pairs_by_mtime(file_t *f1, file_t *f2)
1560 {
1561   if (f1 == NULL || f2 == NULL) nullptr("sort_pairs_by_mtime()");
1562 
1563 #ifndef NO_USER_ORDER
1564   int po = sort_pairs_by_param_order(f1, f2);
1565   if (po != 0) return po;
1566 #endif /* NO_USER_ORDER */
1567 
1568   if (f1->mtime < f2->mtime) return -sort_direction;
1569   else if (f1->mtime > f2->mtime) return sort_direction;
1570 
1571   /* If the mtimes match, use the names to break the tie */
1572   return numeric_sort(f1->d_name, f2->d_name, sort_direction);
1573 }
1574 
1575 
1576 static int sort_pairs_by_filename(file_t *f1, file_t *f2)
1577 {
1578   if (f1 == NULL || f2 == NULL) nullptr("sort_pairs_by_filename()");
1579 
1580 #ifndef NO_USER_ORDER
1581   int po = sort_pairs_by_param_order(f1, f2);
1582   if (po != 0) return po;
1583 #endif /* NO_USER_ORDER */
1584 
1585   return numeric_sort(f1->d_name, f2->d_name, sort_direction);
1586 }
1587 
1588 
1589 static void registerpair(file_t **matchlist, file_t *newmatch,
1590                 int (*comparef)(file_t *f1, file_t *f2))
1591 {
1592   file_t *traverse;
1593   file_t *back;
1594 
1595   /* NULL pointer sanity checks */
1596   if (matchlist == NULL || newmatch == NULL || comparef == NULL) nullptr("registerpair()");
1597   LOUD(fprintf(stderr, "registerpair: '%s', '%s'\n", (*matchlist)->d_name, newmatch->d_name);)
1598 
1599   SETFLAG((*matchlist)->flags, FF_HAS_DUPES);
1600   back = NULL;
1601   traverse = *matchlist;
1602 
1603   /* FIXME: This needs to be changed! As it currently stands, the compare
1604    * function only runs on a pair as it is registered and future pairs can
1605    * mess up the sort order. A separate sorting function should happen before
1606    * the dupe chain is acted upon rather than while pairs are registered. */
1607   while (traverse) {
1608     if (comparef(newmatch, traverse) <= 0) {
1609       newmatch->duplicates = traverse;
1610 
1611       if (!back) {
1612         *matchlist = newmatch; /* update pointer to head of list */
1613         SETFLAG(newmatch->flags, FF_HAS_DUPES);
1614         CLEARFLAG(traverse->flags, FF_HAS_DUPES); /* flag is only for first file in dupe chain */
1615       } else back->duplicates = newmatch;
1616 
1617       break;
1618     } else {
1619       if (traverse->duplicates == 0) {
1620         traverse->duplicates = newmatch;
1621         if (!back) SETFLAG(traverse->flags, FF_HAS_DUPES);
1622 
1623         break;
1624       }
1625     }
1626 
1627     back = traverse;
1628     traverse = traverse->duplicates;
1629   }
1630   return;
1631 }
1632 
1633 
1634 static inline void help_text(void)
1635 {
1636   printf("Usage: jdupes [options] FILES and/or DIRECTORIES...\n\n");
1637 
1638   printf("Duplicate file sets will be printed by default unless a different action\n");
1639   printf("option is specified (delete, summarize, link, dedupe, etc.)\n");
1640 #ifdef LOUD
1641   printf(" -@ --loud        \toutput annoying low-level debug info while running\n");
1642 #endif
1643   printf(" -0 --printnull   \toutput nulls instead of CR/LF (like 'find -print0')\n");
1644   printf(" -1 --one-file-system \tdo not match files on different filesystems/devices\n");
1645   printf(" -A --nohidden    \texclude hidden files from consideration\n");
1646 #ifdef ENABLE_DEDUPE
1647   printf(" -B --dedupe      \tdo a copy-on-write (reflink/clone) deduplication\n");
1648 #endif
1649   printf(" -C --chunksize=# \toverride I/O chunk size (min %d, max %d)\n", MIN_CHUNK_SIZE, MAX_CHUNK_SIZE);
1650   printf(" -d --delete      \tprompt user for files to preserve and delete all\n");
1651   printf("                  \tothers; important: under particular circumstances,\n");
1652   printf("                  \tdata may be lost when using this option together\n");
1653   printf("                  \twith -s or --symlinks, or when specifying a\n");
1654   printf("                  \tparticular directory more than once; refer to the\n");
1655   printf("                  \tdocumentation for additional information\n");
1656 #ifdef DEBUG
1657   printf(" -D --debug       \toutput debug statistics after completion\n");
1658 #endif
1659   printf(" -f --omitfirst   \tomit the first file in each set of matches\n");
1660   printf(" -h --help        \tdisplay this help message\n");
1661 #ifndef NO_HARDLINKS
1662   printf(" -H --hardlinks   \ttreat any linked files as duplicate files. Normally\n");
1663   printf("                  \tlinked files are treated as non-duplicates for safety\n");
1664 #endif
1665   printf(" -i --reverse     \treverse (invert) the match sort order\n");
1666 #ifndef NO_USER_ORDER
1667   printf(" -I --isolate     \tfiles in the same specified directory won't match\n");
1668 #endif
1669   printf(" -j --json        \tproduce JSON (machine-readable) output\n");
1670 /*  printf(" -K --skiphash    \tskip full file hashing (may be faster; 100%% safe)\n"); */
1671   printf("                  \tWARNING: in development, not fully working yet!\n");
1672 #ifndef NO_SYMLINKS
1673   printf(" -l --linksoft    \tmake relative symlinks for duplicates w/o prompting\n");
1674 #endif
1675 #ifndef NO_HARDLINKS
1676   printf(" -L --linkhard    \thard link all duplicate files without prompting\n");
1677  #ifdef ON_WINDOWS
1678   printf("                  \tWindows allows a maximum of 1023 hard links per file;\n");
1679   printf("                  \tlinking large match sets will result in multiple sets\n");
1680   printf("                  \tof hard linked files due to this limit.\n");
1681  #endif /* ON_WINDOWS */
1682 #endif /* NO_HARDLINKS */
1683   printf(" -m --summarize   \tsummarize dupe information\n");
1684   printf(" -M --printwithsummary\twill print matches and --summarize at the end\n");
1685   printf(" -N --noprompt    \ttogether with --delete, preserve the first file in\n");
1686   printf("                  \teach set of duplicates and delete the rest without\n");
1687   printf("                  \tprompting the user\n");
1688   printf(" -o --order=BY    \tselect sort order for output, linking and deleting; by\n");
1689   printf("                  \tmtime (BY=time) or filename (BY=name, the default)\n");
1690 #ifndef NO_USER_ORDER
1691   printf(" -O --paramorder  \tParameter order is more important than selected -o sort\n");
1692 #endif
1693 #ifndef NO_PERMS
1694   printf(" -p --permissions \tdon't consider files with different owner/group or\n");
1695   printf("                  \tpermission bits as duplicates\n");
1696 #endif
1697   printf(" -P --print=type  \tprint extra info (partial, early, fullhash)\n");
1698   printf(" -q --quiet       \thide progress indicator\n");
1699   printf(" -Q --quick       \tskip byte-for-byte confirmation for quick matching\n");
1700   printf("                  \tWARNING: -Q can result in data loss! Be very careful!\n");
1701   printf(" -r --recurse     \tfor every directory, process its subdirectories too\n");
1702   printf(" -R --recurse:    \tfor each directory given after this option follow\n");
1703   printf("                  \tsubdirectories encountered within (note the ':' at\n");
1704   printf("                  \tthe end of the option, manpage for more details)\n");
1705 #ifndef NO_SYMLINKS
1706   printf(" -s --symlinks    \tfollow symlinks\n");
1707 #endif
1708   printf(" -S --size        \tshow size of duplicate files\n");
1709   printf(" -t --nochangecheck\tdisable security check for file changes (aka TOCTTOU)\n");
1710   printf(" -T --partial-only \tmatch based on partial hashes only. WARNING:\n");
1711   printf("                  \tEXTREMELY DANGEROUS paired with destructive actions!\n");
1712   printf("                  \t-T must be specified twice to work. Read the manual!\n");
1713   printf(" -u --printunique \tprint only a list of unique (non-matched) files\n");
1714   printf(" -U --notravcheck \tdisable double-traversal safety check (BE VERY CAREFUL)\n");
1715   printf("                  \tThis fixes a Google Drive File Stream recursion issue\n");
1716   printf(" -v --version     \tdisplay jdupes version and license information\n");
1717   printf(" -X --extfilter=x:y\tfilter files based on specified criteria\n");
1718   printf("                  \tUse '-X help' for detailed extfilter help\n");
1719   printf(" -z --zeromatch   \tconsider zero-length files to be duplicates\n");
1720   printf(" -Z --softabort   \tIf the user aborts (i.e. CTRL-C) act on matches so far\n");
1721 #ifndef ON_WINDOWS
1722   printf("                  \tYou can send SIGUSR1 to the program to toggle this\n");
1723 #endif
1724 #ifdef OMIT_GETOPT_LONG
1725   printf("Note: Long options are not supported in this build.\n\n");
1726 #endif
1727 }
1728 
1729 
1730 static void help_text_extfilter(void)
1731 {
1732   printf("Detailed help for jdupes -X/--extfilter options\n");
1733   printf("General format: jdupes -X filter[:value][size_suffix]\n\n");
1734 
1735   /* FIXME: Remove after v1.19.0 */
1736   printf("****** WARNING: THE MEANINGS HAVE CHANGED IN v1.19.0 - READ CAREFULLY ******\n\n");
1737 
1738   printf("noext:ext1[,ext2,...]   \tExclude files with certain extension(s)\n\n");
1739   printf("onlyext:ext1[,ext2,...] \tOnly include files with certain extension(s)\n\n");
1740   printf("size[+-=]:size[suffix]  \tOnly Include files matching size criteria\n");
1741   printf("                        \tSize specs: + larger, - smaller, = equal to\n");
1742   printf("                        \tSpecs can be mixed, i.e. size+=:100k will\n");
1743   printf("                        \tonly include files 100KiB or more in size.\n\n");
1744   printf("nostr:text_string       \tExclude all paths containing the string\n");
1745   printf("onlystr:text_string     \tOnly allow paths containing the string\n");
1746   printf("                        \tHINT: you can use these for directories:\n");
1747   printf("                        \t-X nostr:/dir_x/  or  -X onlystr:/dir_x/\n");
1748   printf("newer:datetime          \tOnly include files newer than specified date\n");
1749   printf("older:datetime          \tOnly include files older than specified date\n");
1750   printf("                        \tDate/time format: \"YYYY-MM-DD HH:MM:SS\"\n");
1751   printf("                        \tTime is optional (remember to escape spaces!)\n");
1752 /*  printf("\t\n"); */
1753 
1754   printf("\nSome filters take no value or multiple values. Filters that can take\n");
1755   printf(  "a numeric option generally support the size multipliers K/M/G/T/P/E\n");
1756   printf(  "with or without an added iB or B. Multipliers are binary-style unless\n");
1757   printf(  "the -B suffix is used, which will use decimal multipliers. For example,\n");
1758   printf(  "16k or 16kib = 16384; 16kb = 16000. Multipliers are case-insensitive.\n\n");
1759 
1760   printf(  "Filters have cumulative effects: jdupes -X size+:99 -X size-:101 will\n");
1761   printf(  "cause only files of exactly 100 bytes in size to be included.\n\n");
1762 
1763   printf(  "Extension matching is case-insensitive.\n");
1764   printf(  "Path substring matching is case-sensitive.\n");
1765 }
1766 
1767 
1768 #ifdef UNICODE
1769 int wmain(int argc, wchar_t **wargv)
1770 #else
1771 int main(int argc, char **argv)
1772 #endif
1773 {
1774   static file_t *files = NULL;
1775   static file_t *curfile;
1776   static char **oldargv;
1777   static int firstrecurse;
1778   static int opt;
1779   static int pm = 1;
1780   static int partialonly_spec = 0;
1781   static ordertype_t ordertype = ORDER_NAME;
1782   static long manual_chunk_size = 0;
1783 #ifdef __linux__
1784   static struct proc_cacheinfo pci;
1785 #endif
1786 #ifdef ENABLE_DEDUPE
1787   static struct utsname utsname;
1788 #endif
1789 
1790 #ifndef OMIT_GETOPT_LONG
1791   static const struct option long_options[] =
1792   {
1793     { "loud", 0, 0, '@' },
1794     { "printnull", 0, 0, '0' },
1795     { "one-file-system", 0, 0, '1' },
1796     { "nohidden", 0, 0, 'A' },
1797     { "dedupe", 0, 0, 'B' },
1798     { "chunksize", 1, 0, 'C' },
1799     { "debug", 0, 0, 'D' },
1800     { "delete", 0, 0, 'd' },
1801     { "omitfirst", 0, 0, 'f' },
1802     { "hardlinks", 0, 0, 'H' },
1803     { "help", 0, 0, 'h' },
1804     { "isolate", 0, 0, 'I' },
1805     { "reverse", 0, 0, 'i' },
1806     { "json", 0, 0, 'j' },
1807     { "skiphash", 0, 0, 'K' },
1808     { "linkhard", 0, 0, 'L' },
1809     { "linksoft", 0, 0, 'l' },
1810     { "printwithsummary", 0, 0, 'M'},
1811     { "summarize", 0, 0, 'm'},
1812     { "noprompt", 0, 0, 'N' },
1813     { "noempty", 0, 0, 'n' },
1814     { "paramorder", 0, 0, 'O' },
1815     { "order", 1, 0, 'o' },
1816     { "print", 1, 0, 'P' },
1817     { "permissions", 0, 0, 'p' },
1818     { "quick", 0, 0, 'Q' },
1819     { "quiet", 0, 0, 'q' },
1820     { "recurse:", 0, 0, 'R' },
1821     { "recurse", 0, 0, 'r' },
1822     { "size", 0, 0, 'S' },
1823     { "symlinks", 0, 0, 's' },
1824     { "partial-only", 0, 0, 'T' },
1825     { "nochangecheck", 0, 0, 't' },
1826     { "notravcheck", 0, 0, 'U' },
1827     { "printunique", 0, 0, 'u' },
1828     { "version", 0, 0, 'v' },
1829     { "extfilter", 1, 0, 'X' },
1830     { "softabort", 0, 0, 'Z' },
1831     { "zeromatch", 0, 0, 'z' },
1832     { NULL, 0, 0, 0 }
1833   };
1834 #define GETOPT getopt_long
1835 #else
1836 #define GETOPT getopt
1837 #endif
1838 
1839 #define GETOPT_STRING "@01ABC:DdfHhIijKLlMmNnOo:P:pQqRrSsTtUuVvX:Zz"
1840 
1841 /* Windows buffers our stderr output; don't let it do that */
1842 #ifdef ON_WINDOWS
1843   if (setvbuf(stderr, NULL, _IONBF, 0) != 0)
1844     fprintf(stderr, "warning: setvbuf() failed\n");
1845 #endif
1846 
1847 #ifdef UNICODE
1848   /* Create a UTF-8 **argv from the wide version */
1849   static char **argv;
1850   argv = (char **)string_malloc(sizeof(char *) * (size_t)argc);
1851   if (!argv) oom("main() unicode argv");
1852   widearg_to_argv(argc, wargv, argv);
1853   /* fix up __argv so getopt etc. don't crash */
1854   __argv = argv;
1855   /* Only use UTF-16 for terminal output, else use UTF-8 */
1856   if (!_isatty(_fileno(stdout))) out_mode = _O_BINARY;
1857   else out_mode = _O_U16TEXT;
1858   if (!_isatty(_fileno(stderr))) err_mode = _O_BINARY;
1859   else err_mode = _O_U16TEXT;
1860 #endif /* UNICODE */
1861 
1862 #ifdef __linux__
1863   /* Auto-tune chunk size to be half of L1 data cache if possible */
1864   get_proc_cacheinfo(&pci);
1865   if (pci.l1 != 0) auto_chunk_size = (pci.l1 / 2);
1866   else if (pci.l1d != 0) auto_chunk_size = (pci.l1d / 2);
1867   /* Must be at least 4096 (4 KiB) and cannot exceed CHUNK_SIZE */
1868   if (auto_chunk_size < MIN_CHUNK_SIZE || auto_chunk_size > MAX_CHUNK_SIZE) auto_chunk_size = CHUNK_SIZE;
1869   /* Force to a multiple of 4096 if it isn't already */
1870   if ((auto_chunk_size & 0x00000fffUL) != 0)
1871     auto_chunk_size = (auto_chunk_size + 0x00000fffUL) & 0x000ff000;
1872 #endif /* __linux__ */
1873 
1874   /* Is stderr a terminal? If not, we won't write progress to it */
1875 #ifdef ON_WINDOWS
1876   if (!_isatty(_fileno(stderr))) SETFLAG(flags, F_HIDEPROGRESS);
1877 #else
1878   if (!isatty(fileno(stderr))) SETFLAG(flags, F_HIDEPROGRESS);
1879 #endif
1880 
1881   program_name = argv[0];
1882   oldargv = cloneargs(argc, argv);
1883   /* Clean up string_malloc on any exit */
1884   atexit(clean_exit);
1885 
1886   while ((opt = GETOPT(argc, argv, GETOPT_STRING
1887 #ifndef OMIT_GETOPT_LONG
1888           , long_options, NULL
1889 #endif
1890          )) != EOF) {
1891     if ((uintptr_t)optarg == 0x20) goto error_optarg;
1892     switch (opt) {
1893     case '0':
1894       SETFLAG(a_flags, FA_PRINTNULL);
1895       LOUD(fprintf(stderr, "opt: print null instead of newline (--printnull)\n");)
1896       break;
1897     case '1':
1898       SETFLAG(flags, F_ONEFS);
1899       LOUD(fprintf(stderr, "opt: recursion across filesystems disabled (--onefs)\n");)
1900       break;
1901     case 'A':
1902       SETFLAG(flags, F_EXCLUDEHIDDEN);
1903       break;
1904     case 'C':
1905       manual_chunk_size = strtol(optarg, NULL, 10) & 0x0ffff000L;  /* Align to 4K sizes */
1906       if (manual_chunk_size < MIN_CHUNK_SIZE || manual_chunk_size > MAX_CHUNK_SIZE) {
1907         fprintf(stderr, "warning: invalid manual chunk size (must be %d-%d); using defaults\n", MIN_CHUNK_SIZE, MAX_CHUNK_SIZE);
1908         LOUD(fprintf(stderr, "Manual chunk size (failed) was apparently '%s' => %ld\n", optarg, manual_chunk_size));
1909         manual_chunk_size = 0;
1910       } else auto_chunk_size = (size_t)manual_chunk_size;
1911       LOUD(fprintf(stderr, "Manual chunk size is %ld\n", manual_chunk_size));
1912       break;
1913     case 'd':
1914       SETFLAG(a_flags, FA_DELETEFILES);
1915       LOUD(fprintf(stderr, "opt: delete files after matching (--deletefiles)\n");)
1916       break;
1917     case 'D':
1918 #ifdef DEBUG
1919       SETFLAG(flags, F_DEBUG);
1920 #endif
1921       break;
1922     case 'f':
1923       SETFLAG(a_flags, FA_OMITFIRST);
1924       LOUD(fprintf(stderr, "opt: omit first match from each match set (--omitfirst)\n");)
1925       break;
1926     case 'h':
1927       help_text();
1928       string_malloc_destroy();
1929       exit(EXIT_FAILURE);
1930 #ifndef NO_HARDLINKS
1931     case 'H':
1932       SETFLAG(flags, F_CONSIDERHARDLINKS);
1933       LOUD(fprintf(stderr, "opt: hard links count as matches (--hardlinks)\n");)
1934       break;
1935     case 'L':
1936       SETFLAG(a_flags, FA_HARDLINKFILES);
1937       LOUD(fprintf(stderr, "opt: convert duplicates to hard links (--linkhard)\n");)
1938       break;
1939 #endif
1940     case 'i':
1941       SETFLAG(flags, F_REVERSESORT);
1942       LOUD(fprintf(stderr, "opt: sort order reversal enabled (--reverse)\n");)
1943       break;
1944 #ifndef NO_USER_ORDER
1945     case 'I':
1946       SETFLAG(flags, F_ISOLATE);
1947       LOUD(fprintf(stderr, "opt: intra-parameter match isolation enabled (--isolate)\n");)
1948       break;
1949     case 'O':
1950       SETFLAG(flags, F_USEPARAMORDER);
1951       LOUD(fprintf(stderr, "opt: parameter order takes precedence (--paramorder)\n");)
1952       break;
1953 #else
1954     case 'I':
1955     case 'O':
1956       fprintf(stderr, "warning: -I and -O are disabled and ignored in this build\n");
1957       break;
1958 #endif
1959     case 'j':
1960       SETFLAG(a_flags, FA_PRINTJSON);
1961       LOUD(fprintf(stderr, "opt: print output in JSON format (--printjson)\n");)
1962       break;
1963     case 'K':
1964       SETFLAG(flags, F_SKIPHASH);
1965       break;
1966     case 'm':
1967       SETFLAG(a_flags, FA_SUMMARIZEMATCHES);
1968       LOUD(fprintf(stderr, "opt: print a summary of match stats (--summarize)\n");)
1969       break;
1970     case 'M':
1971       SETFLAG(a_flags, FA_SUMMARIZEMATCHES);
1972       SETFLAG(a_flags, FA_PRINTMATCHES);
1973       LOUD(fprintf(stderr, "opt: print matches with a summary (--printwithsummary)\n");)
1974       break;
1975     case 'n':
1976       //fprintf(stderr, "note: -n/--noempty is the default behavior now and is deprecated.\n");
1977       break;
1978     case 'N':
1979       SETFLAG(flags, F_NOPROMPT);
1980       LOUD(fprintf(stderr, "opt: delete files without prompting (--noprompt)\n");)
1981       break;
1982     case 'p':
1983       SETFLAG(flags, F_PERMISSIONS);
1984       LOUD(fprintf(stderr, "opt: permissions must also match (--permissions)\n");)
1985       break;
1986     case 'P':
1987       LOUD(fprintf(stderr, "opt: print early: '%s' (--print)\n", optarg);)
1988       if (strcmp(optarg, "partial") == 0) SETFLAG(p_flags, PF_PARTIAL);
1989       else if (strcmp(optarg, "early") == 0) SETFLAG(p_flags, PF_EARLYMATCH);
1990       else if (strcmp(optarg, "fullhash") == 0) SETFLAG(p_flags, PF_FULLHASH);
1991       else {
1992         fprintf(stderr, "Option '%s' is not valid for -P\n", optarg);
1993         exit(EXIT_FAILURE);
1994       }
1995       break;
1996     case 'q':
1997       SETFLAG(flags, F_HIDEPROGRESS);
1998       break;
1999     case 'Q':
2000       SETFLAG(flags, F_QUICKCOMPARE);
2001       fprintf(stderr, "\nBIG FAT WARNING: -Q/--quick MAY BE DANGEROUS! Read the manual!\n\n");
2002       LOUD(fprintf(stderr, "opt: byte-for-byte safety check disabled (--quick)\n");)
2003       break;
2004     case 'r':
2005       SETFLAG(flags, F_RECURSE);
2006       LOUD(fprintf(stderr, "opt: global recursion enabled (--recurse)\n");)
2007       break;
2008     case 'R':
2009       SETFLAG(flags, F_RECURSEAFTER);
2010       LOUD(fprintf(stderr, "opt: partial recursion enabled (--recurseafter)\n");)
2011       break;
2012     case 't':
2013       SETFLAG(flags, F_NOCHANGECHECK);
2014       LOUD(fprintf(stderr, "opt: TOCTTOU safety check disabled (--nochangecheck)\n");)
2015       break;
2016     case 'T':
2017       if (partialonly_spec == 0)
2018         partialonly_spec = 1;
2019       else {
2020         partialonly_spec = 2;
2021         fprintf(stderr, "\nBIG FAT WARNING: -T/--partialonly is EXTREMELY DANGEROUS! Read the manual!\n\n");
2022         SETFLAG(flags, F_PARTIALONLY);
2023       }
2024       break;
2025     case 'u':
2026       SETFLAG(a_flags, FA_PRINTUNIQUE);
2027       LOUD(fprintf(stderr, "opt: print only non-matched (unique) files (--printunique)\n");)
2028       break;
2029     case 'U':
2030       SETFLAG(flags, F_NOTRAVCHECK);
2031       LOUD(fprintf(stderr, "opt: double-traversal safety check disabled (--notravcheck)\n");)
2032       break;
2033 #ifndef NO_SYMLINKS
2034     case 'l':
2035       SETFLAG(a_flags, FA_MAKESYMLINKS);
2036       LOUD(fprintf(stderr, "opt: convert duplicates to symbolic links (--linksoft)\n");)
2037       break;
2038     case 's':
2039       SETFLAG(flags, F_FOLLOWLINKS);
2040       LOUD(fprintf(stderr, "opt: follow symbolic links enabled (--symlinks)\n");)
2041       break;
2042 #endif
2043     case 'S':
2044       SETFLAG(a_flags, FA_SHOWSIZE);
2045       LOUD(fprintf(stderr, "opt: show size of files enabled (--size)\n");)
2046       break;
2047     case 'X':
2048       add_extfilter(optarg);
2049       break;
2050     case 'z':
2051       SETFLAG(flags, F_INCLUDEEMPTY);
2052       LOUD(fprintf(stderr, "opt: zero-length files count as matches (--zeromatch)\n");)
2053       break;
2054     case 'Z':
2055       SETFLAG(flags, F_SOFTABORT);
2056       LOUD(fprintf(stderr, "opt: soft-abort mode enabled (--softabort)\n");)
2057       break;
2058     case '@':
2059 #ifdef LOUD_DEBUG
2060       SETFLAG(flags, F_DEBUG | F_LOUD | F_HIDEPROGRESS);
2061 #endif
2062       LOUD(fprintf(stderr, "opt: loud debugging enabled, hope you can handle it (--loud)\n");)
2063       break;
2064     case 'v':
2065     case 'V':
2066       printf("jdupes %s (%s) ", VER, VERDATE);
2067 
2068       /* Indicate bitness information */
2069       if (sizeof(uintptr_t) == 8) {
2070         if (sizeof(long) == 4) printf("64-bit i32\n");
2071         else if (sizeof(long) == 8) printf("64-bit\n");
2072       } else if (sizeof(uintptr_t) == 4) {
2073         if (sizeof(long) == 4) printf("32-bit\n");
2074         else if (sizeof(long) == 8) printf("32-bit i64\n");
2075       } else printf("%u-bit i%u\n", (unsigned int)(sizeof(uintptr_t) * 8),
2076           (unsigned int)(sizeof(long) * 8));
2077 
2078       printf("Compile-time extensions:");
2079       if (*extensions != NULL) {
2080         int c = 0;
2081         while (extensions[c] != NULL) {
2082           printf(" %s", extensions[c]);
2083           c++;
2084         }
2085       } else printf(" none");
2086       printf("\nCopyright (C) 2015-2020 by Jody Bruchon and contributors\n");
2087       printf("Forked from fdupes 1.51, (C) 1999-2014 Adrian Lopez and contributors\n\n");
2088       printf("Permission is hereby granted, free of charge, to any person obtaining a copy of\n");
2089       printf("this software and associated documentation files (the \"Software\"), to deal in\n");
2090       printf("the Software without restriction, including without limitation the rights to\n");
2091       printf("use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies\n");
2092       printf("of the Software, and to permit persons to whom the Software is furnished to do\n");
2093       printf("so, subject to the following conditions:\n\n");
2094 
2095       printf("The above copyright notice and this permission notice shall be included in all\n");
2096       printf("copies or substantial portions of the Software.\n\n");
2097       printf("THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n");
2098       printf("IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n");
2099       printf("FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n");
2100       printf("AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n");
2101       printf("LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n");
2102       printf("OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n");
2103       printf("SOFTWARE.\n");
2104       printf("\nIf you find this software useful, please consider financially supporting\n");
2105       printf("its continued development by donating to the author's SubscribeStar:\n");
2106       printf("          https://SubscribeStar.com/JodyBruchon\n");
2107       printf("\nNew releases, bug fixes, and more at the jdupes GitHub project page:\n");
2108       printf("             https://github.com/jbruchon/jdupes\n");
2109       exit(EXIT_SUCCESS);
2110     case 'o':
2111       if (!strncasecmp("name", optarg, 5)) {
2112         ordertype = ORDER_NAME;
2113       } else if (!strncasecmp("time", optarg, 5)) {
2114         ordertype = ORDER_TIME;
2115       } else {
2116         fprintf(stderr, "invalid value for --order: '%s'\n", optarg);
2117         exit(EXIT_FAILURE);
2118       }
2119       break;
2120     case 'B':
2121 #ifdef ENABLE_DEDUPE
2122       /* Refuse to dedupe on 2.x kernels; they could damage user data */
2123       if (uname(&utsname)) {
2124         fprintf(stderr, "Failed to get kernel version! Aborting.\n");
2125         exit(EXIT_FAILURE);
2126       }
2127       LOUD(fprintf(stderr, "dedupefiles: uname got release '%s'\n", utsname.release));
2128       if (*(utsname.release) == '2' && *(utsname.release + 1) == '.') {
2129         fprintf(stderr, "Refusing to dedupe on a 2.x kernel; data loss could occur. Aborting.\n");
2130         exit(EXIT_FAILURE);
2131       }
2132       SETFLAG(a_flags, FA_DEDUPEFILES);
2133       /* btrfs will do the byte-for-byte check itself */
2134       SETFLAG(flags, F_QUICKCOMPARE);
2135       /* It is completely useless to dedupe zero-length extents */
2136       CLEARFLAG(flags, F_INCLUDEEMPTY);
2137 #else
2138       fprintf(stderr, "This program was built without dedupe support\n");
2139       exit(EXIT_FAILURE);
2140 #endif
2141       LOUD(fprintf(stderr, "opt: CoW/block-level deduplication enabled (--dedupe)\n");)
2142       break;
2143 
2144     default:
2145       if (opt != '?') fprintf(stderr, "Sorry, using '-%c' is not supported in this build.\n", opt);
2146       fprintf(stderr, "Try `jdupes --help' for more information.\n");
2147       string_malloc_destroy();
2148       exit(EXIT_FAILURE);
2149     }
2150   }
2151 
2152   if (optind >= argc) {
2153     fprintf(stderr, "no files or directories specified (use -h option for help)\n");
2154     string_malloc_destroy();
2155     exit(EXIT_FAILURE);
2156   }
2157 
2158   if (partialonly_spec == 1) {
2159     fprintf(stderr, "--partial-only specified only once (it's VERY DANGEROUS, read the manual!)\n");
2160     string_malloc_destroy();
2161     exit(EXIT_FAILURE);
2162   }
2163 
2164   if (ISFLAG(flags, F_PARTIALONLY) && ISFLAG(flags, F_QUICKCOMPARE)) {
2165     fprintf(stderr, "--partial-only overrides --quick and is even more dangerous (read the manual!)\n");
2166     string_malloc_destroy();
2167     exit(EXIT_FAILURE);
2168   }
2169 
2170   if (ISFLAG(flags, F_RECURSE) && ISFLAG(flags, F_RECURSEAFTER)) {
2171     fprintf(stderr, "options --recurse and --recurse: are not compatible\n");
2172     string_malloc_destroy();
2173     exit(EXIT_FAILURE);
2174   }
2175 
2176   if (ISFLAG(a_flags, FA_SUMMARIZEMATCHES) && ISFLAG(a_flags, FA_DELETEFILES)) {
2177     fprintf(stderr, "options --summarize and --delete are not compatible\n");
2178     string_malloc_destroy();
2179     exit(EXIT_FAILURE);
2180   }
2181 
2182 #ifdef ENABLE_DEDUPE
2183   if (ISFLAG(flags, F_CONSIDERHARDLINKS) && ISFLAG(a_flags, FA_DEDUPEFILES))
2184     fprintf(stderr, "warning: option --dedupe overrides the behavior of --hardlinks\n");
2185 #endif
2186 
2187   /* If pm == 0, call printmatches() */
2188   pm = !!ISFLAG(a_flags, FA_SUMMARIZEMATCHES) +
2189       !!ISFLAG(a_flags, FA_DELETEFILES) +
2190       !!ISFLAG(a_flags, FA_HARDLINKFILES) +
2191       !!ISFLAG(a_flags, FA_MAKESYMLINKS) +
2192       !!ISFLAG(a_flags, FA_PRINTJSON) +
2193       !!ISFLAG(a_flags, FA_PRINTUNIQUE) +
2194       !!ISFLAG(a_flags, FA_DEDUPEFILES);
2195 
2196   if (pm > 1) {
2197       fprintf(stderr, "Only one of --summarize, --printwithsummary, --delete, --linkhard,\n--linksoft, --json, or --dedupe may be used\n");
2198       string_malloc_destroy();
2199       exit(EXIT_FAILURE);
2200   }
2201   if (pm == 0) SETFLAG(a_flags, FA_PRINTMATCHES);
2202 
2203 #ifndef ON_WINDOWS
2204   /* Catch SIGUSR1 and use it to enable -Z */
2205   signal(SIGUSR1, sigusr1);
2206 #endif
2207 
2208   if (ISFLAG(flags, F_RECURSEAFTER)) {
2209     firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv);
2210 
2211     if (firstrecurse == argc)
2212       firstrecurse = nonoptafter("-R", argc, oldargv, argv);
2213 
2214     if (firstrecurse == argc) {
2215       fprintf(stderr, "-R option must be isolated from other options\n");
2216       string_malloc_destroy();
2217       exit(EXIT_FAILURE);
2218     }
2219 
2220     /* F_RECURSE is not set for directories before --recurse: */
2221     for (int x = optind; x < firstrecurse; x++) {
2222       slash_convert(argv[x]);
2223       grokdir(argv[x], &files, 0);
2224       user_item_count++;
2225     }
2226 
2227     /* Set F_RECURSE for directories after --recurse: */
2228     SETFLAG(flags, F_RECURSE);
2229 
2230     for (int x = firstrecurse; x < argc; x++) {
2231       slash_convert(argv[x]);
2232       grokdir(argv[x], &files, 1);
2233       user_item_count++;
2234     }
2235   } else {
2236     for (int x = optind; x < argc; x++) {
2237       slash_convert(argv[x]);
2238       grokdir(argv[x], &files, ISFLAG(flags, F_RECURSE));
2239       user_item_count++;
2240     }
2241   }
2242 
2243   /* We don't need the double traversal check tree anymore */
2244   travdone_free(travdone_head);
2245 
2246   if (ISFLAG(flags, F_REVERSESORT)) sort_direction = -1;
2247   if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\n");
2248   if (!files) {
2249     fwprint(stderr, "No duplicates found.", 1);
2250     string_malloc_destroy();
2251     exit(EXIT_SUCCESS);
2252   }
2253 
2254   curfile = files;
2255   progress = 0;
2256 
2257   /* Catch CTRL-C */
2258   signal(SIGINT, sighandler);
2259 
2260   while (curfile) {
2261     static file_t **match = NULL;
2262     static FILE *file1;
2263     static FILE *file2;
2264 
2265     if (interrupt) {
2266       fprintf(stderr, "\nStopping file scan due to user abort\n");
2267       if (!ISFLAG(flags, F_SOFTABORT)) exit(EXIT_FAILURE);
2268       interrupt = 0;  /* reset interrupt for re-use */
2269       goto skip_file_scan;
2270     }
2271 
2272     LOUD(fprintf(stderr, "\nMAIN: current file: %s\n", curfile->d_name));
2273 
2274     if (!checktree) registerfile(&checktree, NONE, curfile);
2275     else match = checkmatch(checktree, curfile);
2276 
2277     /* Byte-for-byte check that a matched pair are actually matched */
2278     if (match != NULL) {
2279       /* Quick or partial-only compare will never run confirmmatch()
2280        * Also skip match confirmation for hard-linked files
2281        * (This set of comparisons is ugly, but quite efficient) */
2282       if (ISFLAG(flags, F_QUICKCOMPARE) || ISFLAG(flags, F_PARTIALONLY) ||
2283            (ISFLAG(flags, F_CONSIDERHARDLINKS) &&
2284            (curfile->inode == (*match)->inode) &&
2285            (curfile->device == (*match)->device))
2286          ) {
2287         LOUD(fprintf(stderr, "MAIN: notice: hard linked, quick, or partial-only match (-H/-Q/-T)\n"));
2288         registerpair(match, curfile,
2289             (ordertype == ORDER_TIME) ? sort_pairs_by_mtime : sort_pairs_by_filename);
2290         dupecount++;
2291         goto skip_full_check;
2292       }
2293 
2294 #ifdef UNICODE
2295       if (!M2W(curfile->d_name, wstr)) file1 = NULL;
2296       else file1 = _wfopen(wstr, FILE_MODE_RO);
2297 #else
2298       file1 = fopen(curfile->d_name, FILE_MODE_RO);
2299 #endif
2300       if (!file1) {
2301         LOUD(fprintf(stderr, "MAIN: warning: file1 fopen() failed ('%s')\n", curfile->d_name));
2302         curfile = curfile->next;
2303         continue;
2304       }
2305 
2306 #ifdef UNICODE
2307       if (!M2W((*match)->d_name, wstr)) file2 = NULL;
2308       else file2 = _wfopen(wstr, FILE_MODE_RO);
2309 #else
2310       file2 = fopen((*match)->d_name, FILE_MODE_RO);
2311 #endif
2312       if (!file2) {
2313         fclose(file1);
2314         LOUD(fprintf(stderr, "MAIN: warning: file2 fopen() failed ('%s')\n", (*match)->d_name));
2315         curfile = curfile->next;
2316         continue;
2317       }
2318 
2319       if (confirmmatch(file1, file2, curfile->size)) {
2320         LOUD(fprintf(stderr, "MAIN: registering matched file pair\n"));
2321         registerpair(match, curfile,
2322             (ordertype == ORDER_TIME) ? sort_pairs_by_mtime : sort_pairs_by_filename);
2323         dupecount++;
2324       } DBG(else hash_fail++;)
2325 
2326       fclose(file1);
2327       fclose(file2);
2328     }
2329 
2330 skip_full_check:
2331     curfile = curfile->next;
2332 
2333     if (!ISFLAG(flags, F_HIDEPROGRESS)) update_progress(NULL, -1);
2334     progress++;
2335   }
2336 
2337   if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%60s\r", " ");
2338 
2339 skip_file_scan:
2340   /* Stop catching CTRL+C */
2341   signal(SIGINT, SIG_DFL);
2342   if (ISFLAG(a_flags, FA_DELETEFILES)) {
2343     if (ISFLAG(flags, F_NOPROMPT)) deletefiles(files, 0, 0);
2344     else deletefiles(files, 1, stdin);
2345   }
2346 #ifndef NO_SYMLINKS
2347   if (ISFLAG(a_flags, FA_MAKESYMLINKS)) linkfiles(files, 0);
2348 #endif
2349 #ifndef NO_HARDLINKS
2350   if (ISFLAG(a_flags, FA_HARDLINKFILES)) linkfiles(files, 1);
2351 #endif /* NO_HARDLINKS */
2352 #ifdef ENABLE_DEDUPE
2353   if (ISFLAG(a_flags, FA_DEDUPEFILES)) dedupefiles(files);
2354 #endif /* ENABLE_DEDUPE */
2355   if (ISFLAG(a_flags, FA_PRINTMATCHES)) printmatches(files);
2356   if (ISFLAG(a_flags, FA_PRINTUNIQUE)) printunique(files);
2357   if (ISFLAG(a_flags, FA_PRINTJSON)) printjson(files, argc, argv);
2358   if (ISFLAG(a_flags, FA_SUMMARIZEMATCHES)) {
2359     if (ISFLAG(a_flags, FA_PRINTMATCHES)) printf("\n\n");
2360     summarizematches(files);
2361   }
2362 
2363   string_malloc_destroy();
2364 
2365 #ifdef DEBUG
2366   if (ISFLAG(flags, F_DEBUG)) {
2367     fprintf(stderr, "\n%d partial (+%d small) -> %d full hash -> %d full (%d partial elim) (%d hash%u fail)\n",
2368         partial_hash, small_file, full_hash, partial_to_full,
2369         partial_elim, hash_fail, (unsigned int)sizeof(jdupes_hash_t)*8);
2370     fprintf(stderr, "%" PRIuMAX " total files, %" PRIuMAX " comparisons, branch L %u, R %u, both %u, max tree depth %u\n",
2371         filecount, comparisons, left_branch, right_branch,
2372         left_branch + right_branch, max_depth);
2373     fprintf(stderr, "SMA: allocs %" PRIuMAX ", free %" PRIuMAX " (merge %" PRIuMAX ", repl %" PRIuMAX "), fail %" PRIuMAX ", reuse %" PRIuMAX ", scan %" PRIuMAX ", tails %" PRIuMAX "\n",
2374         sma_allocs, sma_free_good, sma_free_merged, sma_free_replaced,
2375         sma_free_ignored, sma_free_reclaimed,
2376         sma_free_scanned, sma_free_tails);
2377     if (manual_chunk_size > 0) fprintf(stderr, "I/O chunk size: %ld KiB (manually set)\n", manual_chunk_size >> 10);
2378     else {
2379 #ifdef __linux__
2380       fprintf(stderr, "I/O chunk size: %" PRIuMAX " KiB (%s)\n", (uintmax_t)(auto_chunk_size >> 10), (pci.l1 + pci.l1d) != 0 ? "dynamically sized" : "default size");
2381 #else
2382       fprintf(stderr, "I/O chunk size: %" PRIuMAX " KiB (default size)\n", (uintmax_t)(auto_chunk_size >> 10));
2383 #endif /* __linux__ */
2384     }
2385 #ifdef ON_WINDOWS
2386  #ifndef NO_HARDLINKS
2387     if (ISFLAG(a_flags, FA_HARDLINKFILES))
2388       fprintf(stderr, "Exclusions based on Windows hard link limit: %u\n", hll_exclude);
2389  #endif
2390 #endif
2391   }
2392 #endif /* DEBUG */
2393 
2394   exit(EXIT_SUCCESS);
2395 
2396 error_optarg:
2397   fprintf(stderr, "error: option '%c' requires an argument\n", opt);
2398   exit(EXIT_FAILURE);
2399 }
2400