1 /* jdupes (C) 2015-2020 Jody Bruchon <jody@jodybruchon.com>
2 Forked from fdupes 1.51 (C) 1999-2014 Adrian Lopez
3
4 Permission is hereby granted, free of charge, to any person
5 obtaining a copy of this software and associated documentation files
6 (the "Software"), to deal in the Software without restriction,
7 including without limitation the rights to use, copy, modify, merge,
8 publish, distribute, sublicense, and/or sell copies of the Software,
9 and to permit persons to whom the Software is furnished to do so,
10 subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be
13 included in all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <strings.h>
28 #include <sys/types.h>
29 #include <fcntl.h>
30 #include <dirent.h>
31 #include <signal.h>
32 #include <unistd.h>
33 #include <stdlib.h>
34 #include <stdint.h>
35 #include <inttypes.h>
36 #ifndef OMIT_GETOPT_LONG
37 #include <getopt.h>
38 #endif
39 #include <string.h>
40 #include <errno.h>
41 #include <libgen.h>
42 #include <time.h>
43 #include <sys/time.h>
44 #include "jdupes.h"
45 #include "xxhash.h"
46 #include "oom.h"
47 #ifdef ENABLE_DEDUPE
48 #include <sys/utsname.h>
49 #endif
50
51 /* Jody Bruchon's helpful functions */
52 #include "string_malloc.h"
53 #include "jody_sort.h"
54 #include "jody_win_unicode.h"
55 #include "jody_cacheinfo.h"
56 #include "jody_strtoepoch.h"
57 #include "version.h"
58
59 /* Headers for post-scanning actions */
60 #include "act_deletefiles.h"
61 #include "act_dedupefiles.h"
62 #include "act_linkfiles.h"
63 #include "act_printmatches.h"
64 #include "act_printjson.h"
65 #include "act_summarize.h"
66
67 /* Detect Windows and modify as needed */
68 #if defined _WIN32 || defined __CYGWIN__
69 const char dir_sep = '\\';
70 #ifdef UNICODE
71 const wchar_t *FILE_MODE_RO = L"rbS";
72 #else
73 const char *FILE_MODE_RO = "rbS";
74 #endif /* UNICODE */
75
76 #else /* Not Windows */
77 const char *FILE_MODE_RO = "rb";
78 const char dir_sep = '/';
79 #ifdef UNICODE
80 #error Do not define UNICODE on non-Windows platforms.
81 #undef UNICODE
82 #endif
83 #endif /* _WIN32 || __CYGWIN__ */
84
85 /* Windows + Unicode compilation */
86 #ifdef UNICODE
87 static wpath_t wname, wstr;
88 int out_mode = _O_TEXT;
89 int err_mode = _O_TEXT;
90 #endif /* UNICODE */
91
92 #ifndef NO_SYMLINKS
93 #include "jody_paths.h"
94 #endif
95
96 /* Behavior modification flags (a=action, p=-P) */
97 uint_fast32_t flags = 0, a_flags = 0, p_flags = 0;
98
99 static const char *program_name;
100
101 /* Stat and SIGUSR */
102 #ifdef ON_WINDOWS
103 struct winstat s;
104 #else
105 struct stat s;
106 static int usr1_toggle = 0;
107 #endif
108
109 /* Larger chunk size makes large files process faster but uses more RAM */
110 #define MIN_CHUNK_SIZE 4096
111 #define MAX_CHUNK_SIZE 16777216
112 #ifndef CHUNK_SIZE
113 #define CHUNK_SIZE 65536
114 #endif
115 #ifndef PARTIAL_HASH_SIZE
116 #define PARTIAL_HASH_SIZE 4096
117 #endif
118
119 static size_t auto_chunk_size = CHUNK_SIZE;
120
121 /* Maximum path buffer size to use; must be large enough for a path plus
122 * any work that might be done to the array it's stored in. PATH_MAX is
123 * not always true. Read this article on the false promises of PATH_MAX:
124 * http://insanecoding.blogspot.com/2007/11/pathmax-simply-isnt.html
125 * Windows + Unicode needs a lot more space than UTF-8 in Linux/Mac OS X
126 */
127 #ifndef PATHBUF_SIZE
128 #define PATHBUF_SIZE 4096
129 #endif
130 /* Refuse to build if PATHBUF_SIZE is too small */
131 #if PATHBUF_SIZE < PATH_MAX
132 #error "PATHBUF_SIZE can't be less than PATH_MAX"
133 #endif
134
135 /* Size suffixes - this gets exported */
136 const struct size_suffix size_suffix[] = {
137 /* Byte (someone may actually try to use this) */
138 { "b", 1 },
139 { "k", 1024 },
140 { "kib", 1024 },
141 { "m", 1048576 },
142 { "mib", 1048576 },
143 { "g", (uint64_t)1048576 * 1024 },
144 { "gib", (uint64_t)1048576 * 1024 },
145 { "t", (uint64_t)1048576 * 1048576 },
146 { "tib", (uint64_t)1048576 * 1048576 },
147 { "p", (uint64_t)1048576 * 1048576 * 1024},
148 { "pib", (uint64_t)1048576 * 1048576 * 1024},
149 { "e", (uint64_t)1048576 * 1048576 * 1048576},
150 { "eib", (uint64_t)1048576 * 1048576 * 1048576},
151 /* Decimal suffixes */
152 { "kb", 1000 },
153 { "mb", 1000000 },
154 { "gb", 1000000000 },
155 { "tb", 1000000000000 },
156 { "pb", 1000000000000000 },
157 { "eb", 1000000000000000000 },
158 { NULL, 0 },
159 };
160
161 /* Assemble extension string from compile-time options */
162 const char *extensions[] = {
163 #ifdef ON_WINDOWS
164 "windows",
165 #endif
166 #ifdef UNICODE
167 "unicode",
168 #endif
169 #ifdef OMIT_GETOPT_LONG
170 "nolong",
171 #endif
172 #ifdef __FAST_MATH__
173 "fastmath",
174 #endif
175 #ifdef DEBUG
176 "debug",
177 #endif
178 #ifdef LOUD_DEBUG
179 "loud",
180 #endif
181 #ifdef ENABLE_DEDUPE
182 "fsdedup",
183 #endif
184 #ifdef LOW_MEMORY
185 "lowmem",
186 #endif
187 #ifdef SMA_PAGE_SIZE
188 "smapage",
189 #endif
190 #ifdef NO_PERMS
191 "noperm",
192 #endif
193 #ifdef NO_HARDLINKS
194 "nohardlink",
195 #endif
196 #ifdef NO_SYMLINKS
197 "nosymlink",
198 #endif
199 #ifdef NO_USER_ORDER
200 "nouserorder",
201 #endif
202 NULL
203 };
204
205 /* Tree to track each directory traversed */
206 struct travdone {
207 struct travdone *left;
208 struct travdone *right;
209 jdupes_ino_t inode;
210 dev_t device;
211 };
212 static struct travdone *travdone_head = NULL;
213
214 /* Extended filter tree head and static tag list */
215 struct extfilter *extfilter_head = NULL;
216 const struct extfilter_tags extfilter_tags[] = {
217 { "noext", XF_EXCL_EXT },
218 { "onlyext", XF_ONLY_EXT },
219 { "size+", XF_SIZE_GT },
220 { "size-", XF_SIZE_LT },
221 { "size+=", XF_SIZE_GTEQ },
222 { "size-=", XF_SIZE_LTEQ },
223 { "size=", XF_SIZE_EQ },
224 { "nostr", XF_EXCL_STR },
225 { "onlystr", XF_ONLY_STR },
226 { "newer", XF_DATE_NEWER },
227 { "older", XF_DATE_OLDER },
228 { NULL, 0 },
229 };
230
231 /* Required for progress indicator code */
232 static uintmax_t filecount = 0;
233 static uintmax_t progress = 0, item_progress = 0, dupecount = 0;
234 /* Number of read loops before checking progress indicator */
235 #define CHECK_MINIMUM 256
236
237 /* Hash/compare performance statistics (debug mode) */
238 #ifdef DEBUG
239 static unsigned int small_file = 0, partial_hash = 0, partial_elim = 0;
240 static unsigned int full_hash = 0, partial_to_full = 0, hash_fail = 0;
241 static uintmax_t comparisons = 0;
242 static unsigned int left_branch = 0, right_branch = 0;
243 #ifdef ON_WINDOWS
244 #ifndef NO_HARDLINKS
245 static unsigned int hll_exclude = 0;
246 #endif
247 #endif
248 #endif /* DEBUG */
249
250 #ifdef TREE_DEPTH_STATS
251 static unsigned int tree_depth = 0;
252 static unsigned int max_depth = 0;
253 #endif
254
255 /* File tree head */
256 static filetree_t *checktree = NULL;
257
258 /* Directory/file parameter position counter */
259 static unsigned int user_item_count = 1;
260
261 /* registerfile() direction options */
262 enum tree_direction { NONE, LEFT, RIGHT };
263
264 /* Sort order reversal */
265 static int sort_direction = 1;
266
267 /* Signal handler */
268 static int interrupt = 0;
269
270 /* Progress indicator time */
271 struct timeval time1, time2;
272
273 /* For path name mangling */
274 char tempname[PATHBUF_SIZE * 2];
275
276 /* Compare two hashes like memcmp() */
277 #define HASH_COMPARE(a,b) ((a > b) ? 1:((a == b) ? 0:-1))
278
279 static void help_text_extfilter(void);
280
281 /***** End definitions, begin code *****/
282
283 /* Catch CTRL-C and either notify or terminate */
sighandler(const int signum)284 void sighandler(const int signum)
285 {
286 (void)signum;
287 if (interrupt || !ISFLAG(flags, F_SOFTABORT)) {
288 fprintf(stderr, "\n");
289 string_malloc_destroy();
290 exit(EXIT_FAILURE);
291 }
292 interrupt = 1;
293 return;
294 }
295
296
297 #ifndef ON_WINDOWS
sigusr1(const int signum)298 void sigusr1(const int signum)
299 {
300 (void)signum;
301 if (!ISFLAG(flags, F_SOFTABORT)) {
302 SETFLAG(flags, F_SOFTABORT);
303 usr1_toggle = 1;
304 } else {
305 CLEARFLAG(flags, F_SOFTABORT);
306 usr1_toggle = 2;
307 }
308 return;
309 }
310 #endif
311
312
313 /* De-allocate on exit */
clean_exit(void)314 void clean_exit(void)
315 {
316 #ifndef SMA_PASSTHROUGH
317 string_malloc_destroy();
318 #endif
319 return;
320 }
321
322
323 /* Null pointer failure */
nullptr(const char * restrict func)324 extern void nullptr(const char * restrict func)
325 {
326 static const char n[] = "(NULL)";
327 if (func == NULL) func = n;
328 fprintf(stderr, "\ninternal error: NULL pointer passed to %s\n", func);
329 string_malloc_destroy();
330 exit(EXIT_FAILURE);
331 }
332
333
cloneargs(const int argc,char ** argv)334 static inline char **cloneargs(const int argc, char **argv)
335 {
336 static int x;
337 static char **args;
338
339 args = (char **)string_malloc(sizeof(char *) * (unsigned int)argc);
340 if (args == NULL) oom("cloneargs() start");
341
342 for (x = 0; x < argc; x++) {
343 args[x] = (char *)string_malloc(strlen(argv[x]) + 1);
344 if (args[x] == NULL) oom("cloneargs() loop");
345 strcpy(args[x], argv[x]);
346 }
347
348 return args;
349 }
350
351
findarg(const char * const arg,const int start,const int argc,char ** argv)352 static int findarg(const char * const arg, const int start,
353 const int argc, char **argv)
354 {
355 int x;
356
357 for (x = start; x < argc; x++)
358 if (strcmp(argv[x], arg) == 0)
359 return x;
360
361 return x;
362 }
363
364 /* Find the first non-option argument after specified option. */
nonoptafter(const char * option,const int argc,char ** oldargv,char ** newargv)365 static int nonoptafter(const char *option, const int argc,
366 char **oldargv, char **newargv)
367 {
368 int x;
369 int targetind;
370 int testind;
371 int startat = 1;
372
373 targetind = findarg(option, 1, argc, oldargv);
374
375 for (x = optind; x < argc; x++) {
376 testind = findarg(newargv[x], startat, argc, oldargv);
377 if (testind > targetind) return x;
378 else startat = testind;
379 }
380
381 return x;
382 }
383
384
385 /* Update progress indicator if requested */
update_progress(const char * const restrict msg,const int file_percent)386 static void update_progress(const char * const restrict msg, const int file_percent)
387 {
388 static int did_fpct = 0;
389
390 /* The caller should be doing this anyway...but don't trust that they did */
391 if (ISFLAG(flags, F_HIDEPROGRESS)) return;
392
393 gettimeofday(&time2, NULL);
394
395 if (progress == 0 || time2.tv_sec > time1.tv_sec) {
396 fprintf(stderr, "\rProgress [%" PRIuMAX "/%" PRIuMAX ", %" PRIuMAX " pairs matched] %" PRIuMAX "%%",
397 progress, filecount, dupecount, (progress * 100) / filecount);
398 if (file_percent > -1 && msg != NULL) {
399 fprintf(stderr, " (%s: %d%%) ", msg, file_percent);
400 did_fpct = 1;
401 } else if (did_fpct != 0) {
402 fprintf(stderr, " ");
403 did_fpct = 0;
404 }
405 fflush(stderr);
406 }
407 time1.tv_sec = time2.tv_sec;
408 #ifndef ON_WINDOWS
409 /* Notify of change to soft abort status if SIGUSR1 received */
410 if (usr1_toggle != 0) {
411 fprintf(stderr, "\njdupes received a USR1 signal; soft abort (-Z) is now %s\n", usr1_toggle == 1 ? "ON" : "OFF" );
412 usr1_toggle = 0;
413 }
414 #endif
415 return;
416 }
417
418
419 /***** Add new functions here *****/
420
421
422 /* Does a file have one of these comma-separated extensions?
423 * Returns 1 after any match, 0 if no matches */
match_extensions(char * path,const char * extlist)424 int match_extensions(char *path, const char *extlist)
425 {
426 char *dot;
427 const char *ext;
428 size_t len, extlen;
429
430 LOUD(fprintf(stderr, "match_extensions('%s', '%s')\n", path, extlist);)
431 if (path == NULL || extlist == NULL) nullptr("match_extensions");
432
433 dot = NULL;
434 /* Scan to end of path, save the last dot, reset on path separators */
435 while (*path != '\0') {
436 if (*path == '.') dot = path;
437 if (*path == '/' || *path == '\\') dot = NULL;
438 path++;
439 }
440 /* No dots in the file name = no extension, so give up now */
441 if (dot == NULL) return 0;
442 dot++;
443 /* Handle a dot at the end of a file name */
444 if (*dot == '\0') return 0;
445
446 /* Get the length of the file's extension for later checking */
447 extlen = strlen(dot);
448 LOUD(fprintf(stderr, "match_extensions: file has extension '%s' with length %ld\n", dot, extlen);)
449
450 /* dot is now at the location of the last file extension; check the list */
451 /* Skip any commas at the start of the list */
452 while (*extlist == ',') extlist++;
453 ext = extlist;
454 len = 0;
455 while (1) {
456 /* Reject upon hitting the end with no more extensions to process */
457 if (*extlist == '\0' && len == 0) return 0;
458 /* Process extension once a comma or EOL is hit */
459 if (*extlist == ',' || *extlist == '\0') {
460 /* Skip serial commas */
461 while (*extlist == ',') extlist++;
462 if (extlist == ext) goto skip_empty;
463 if (strncasecmp(dot, ext, len) == 0 && extlen == len) {
464 LOUD(fprintf(stderr, "match_extensions: matched on extension '%s' (len %ld)\n", dot, len);)
465 return 1;
466 }
467 LOUD(fprintf(stderr, "match_extensions: no match: '%s' (%ld), '%s' (%ld)\n", dot, len, ext, extlen);)
468 skip_empty:
469 ext = extlist;
470 len = 0;
471 continue;
472 }
473 extlist++; len++;
474 /* LOUD(fprintf(stderr, "match_extensions: DEBUG: '%s' : '%s' (%ld), '%s' (%ld)\n", extlist, dot, len, ext, extlen);) */
475 }
476 return 0;
477 }
478
479
480 /* Check file's stat() info to make sure nothing has changed
481 * Returns 1 if changed, 0 if not changed, negative if error */
file_has_changed(file_t * const restrict file)482 extern int file_has_changed(file_t * const restrict file)
483 {
484 /* If -t/--nochangecheck specified then completely bypass this code */
485 if (ISFLAG(flags, F_NOCHANGECHECK)) return 0;
486
487 if (file == NULL || file->d_name == NULL) nullptr("file_has_changed()");
488 LOUD(fprintf(stderr, "file_has_changed('%s')\n", file->d_name);)
489
490 if (!ISFLAG(file->flags, FF_VALID_STAT)) return -66;
491
492 if (STAT(file->d_name, &s) != 0) return -2;
493 if (file->inode != s.st_ino) return 1;
494 if (file->size != s.st_size) return 1;
495 if (file->device != s.st_dev) return 1;
496 if (file->mtime != s.st_mtime) return 1;
497 if (file->mode != s.st_mode) return 1;
498 #ifndef NO_PERMS
499 if (file->uid != s.st_uid) return 1;
500 if (file->gid != s.st_gid) return 1;
501 #endif
502 #ifndef NO_SYMLINKS
503 if (lstat(file->d_name, &s) != 0) return -3;
504 if ((S_ISLNK(s.st_mode) > 0) ^ ISFLAG(file->flags, FF_IS_SYMLINK)) return 1;
505 #endif
506
507 return 0;
508 }
509
510
getfilestats(file_t * const restrict file)511 extern inline int getfilestats(file_t * const restrict file)
512 {
513 if (file == NULL || file->d_name == NULL) nullptr("getfilestats()");
514 LOUD(fprintf(stderr, "getfilestats('%s')\n", file->d_name);)
515
516 /* Don't stat the same file more than once */
517 if (ISFLAG(file->flags, FF_VALID_STAT)) return 0;
518 SETFLAG(file->flags, FF_VALID_STAT);
519
520 if (STAT(file->d_name, &s) != 0) return -1;
521 file->inode = s.st_ino;
522 file->size = s.st_size;
523 file->device = s.st_dev;
524 file->mtime = s.st_mtime;
525 file->mode = s.st_mode;
526 #ifndef NO_HARDLINKS
527 file->nlink = s.st_nlink;
528 #endif
529 #ifndef NO_PERMS
530 file->uid = s.st_uid;
531 file->gid = s.st_gid;
532 #endif
533 #ifndef NO_SYMLINKS
534 if (lstat(file->d_name, &s) != 0) return -1;
535 if (S_ISLNK(s.st_mode) > 0) SETFLAG(file->flags, FF_IS_SYMLINK);
536 #endif
537 return 0;
538 }
539
540
add_extfilter(const char * option)541 static void add_extfilter(const char *option)
542 {
543 char *opt, *p;
544 time_t tt;
545 struct extfilter *extf = extfilter_head;
546 const struct extfilter_tags *tags = extfilter_tags;
547 const struct size_suffix *ss = size_suffix;
548
549 if (option == NULL) nullptr("add_extfilter()");
550
551 LOUD(fprintf(stderr, "add_extfilter '%s'\n", option);)
552
553 /* Invoke help text if requested */
554 if (strcasecmp(option, "help") == 0) { help_text_extfilter(); exit(EXIT_SUCCESS); }
555
556 opt = string_malloc(strlen(option) + 1);
557 if (opt == NULL) oom("add_extfilter option");
558 strcpy(opt, option);
559 p = opt;
560
561 while (*p != ':' && *p != '\0') p++;
562
563 /* Split tag string into *opt (tag) and *p (value) */
564 if (*p == ':') {
565 *p = '\0';
566 p++;
567 }
568
569 while (tags->tag != NULL && strcmp(tags->tag, opt) != 0) tags++;
570 if (tags->tag == NULL) goto error_bad_filter;
571
572 /* Check for a tag that requires a value */
573 if (tags->flags & XF_REQ_VALUE && *p == '\0') goto error_value_missing;
574
575 /* *p is now at the value, NOT the tag string! */
576
577 if (extfilter_head != NULL) {
578 /* Add to end of exclusion stack if head is present */
579 while (extf->next != NULL) extf = extf->next;
580 extf->next = string_malloc(sizeof(struct extfilter) + strlen(p) + 1);
581 if (extf->next == NULL) oom("add_extfilter alloc");
582 extf = extf->next;
583 } else {
584 /* Allocate extfilter_head if no exclusions exist yet */
585 extfilter_head = string_malloc(sizeof(struct extfilter) + strlen(p) + 1);
586 if (extfilter_head == NULL) oom("add_extfilter alloc");
587 extf = extfilter_head;
588 }
589
590 /* Set tag value from predefined tag array */
591 extf->flags = tags->flags;
592
593 /* Initialize the new extfilter element */
594 extf->next = NULL;
595 if (extf->flags & XF_REQ_NUMBER) {
596 /* Exclude uses a number; handle it with possible suffixes */
597 *(extf->param) = '\0';
598 /* Get base size */
599 if (*p < '0' || *p > '9') goto error_bad_size_suffix;
600 extf->size = strtoll(p, &p, 10);
601 /* Handle suffix, if any */
602 if (*p != '\0') {
603 while (ss->suffix != NULL && strcasecmp(ss->suffix, p) != 0) ss++;
604 if (ss->suffix == NULL) goto error_bad_size_suffix;
605 extf->size *= ss->multiplier;
606 }
607 } else if (extf->flags & XF_REQ_DATE) {
608 *(extf->param) = '\0';
609 tt = strtoepoch(p);
610 LOUD(fprintf(stderr, "extfilter: jody_strtoepoch: '%s' -> %ld\n", p, tt);)
611 if (tt == -1) goto error_bad_time;
612 extf->size = tt;
613 } else {
614 /* Exclude uses string data; just copy it */
615 extf->size = 0;
616 if (*p != '\0') strcpy(extf->param, p);
617 else *(extf->param) = '\0';
618 }
619
620 LOUD(fprintf(stderr, "Added extfilter: tag '%s', data '%s', size %lld, flags %d\n", opt, extf->param, (long long)extf->size, extf->flags);)
621 string_free(opt);
622 return;
623
624 error_bad_time:
625 fprintf(stderr, "Invalid extfilter date[time] was specified: -X filter:datetime\n");
626 help_text_extfilter();
627 exit(EXIT_FAILURE);
628 error_value_missing:
629 fprintf(stderr, "extfilter value missing or invalid: -X filter:value\n");
630 help_text_extfilter();
631 exit(EXIT_FAILURE);
632 error_bad_filter:
633 fprintf(stderr, "Invalid extfilter filter name was specified\n");
634 help_text_extfilter();
635 exit(EXIT_FAILURE);
636 error_bad_size_suffix:
637 fprintf(stderr, "Invalid extfilter size suffix specified; use B or KMGTPE[i][B]\n");
638 help_text_extfilter();
639 exit(EXIT_FAILURE);
640 }
641
642
643 /* Returns -1 if stat() fails, 0 if it's a directory, 1 if it's not */
getdirstats(const char * const restrict name,jdupes_ino_t * const restrict inode,dev_t * const restrict dev,jdupes_mode_t * const restrict mode)644 extern int getdirstats(const char * const restrict name,
645 jdupes_ino_t * const restrict inode, dev_t * const restrict dev,
646 jdupes_mode_t * const restrict mode)
647 {
648 if (name == NULL || inode == NULL || dev == NULL) nullptr("getdirstats");
649 LOUD(fprintf(stderr, "getdirstats('%s', %p, %p)\n", name, (void *)inode, (void *)dev);)
650
651 if (STAT(name, &s) != 0) return -1;
652 *inode = s.st_ino;
653 *dev = s.st_dev;
654 *mode = s.st_mode;
655 if (!S_ISDIR(s.st_mode)) return 1;
656 return 0;
657 }
658
659
660 /* Check a pair of files for match exclusion conditions
661 * Returns:
662 * 0 if all condition checks pass
663 * -1 or 1 on compare result less/more
664 * -2 on an absolute exclusion condition met
665 * 2 on an absolute match condition met
666 * -3 on exclusion due to isolation
667 * -4 on exclusion due to same filesystem
668 * -5 on exclusion due to permissions */
check_conditions(const file_t * const restrict file1,const file_t * const restrict file2)669 extern int check_conditions(const file_t * const restrict file1, const file_t * const restrict file2)
670 {
671 if (file1 == NULL || file2 == NULL || file1->d_name == NULL || file2->d_name == NULL) nullptr("check_conditions()");
672
673 LOUD(fprintf(stderr, "check_conditions('%s', '%s')\n", file1->d_name, file2->d_name);)
674
675 /* Exclude files that are not the same size */
676 if (file1->size > file2->size) {
677 LOUD(fprintf(stderr, "check_conditions: no match: size of file1 > file2 (%" PRIdMAX " > %" PRIdMAX ")\n",
678 (intmax_t)file1->size, (intmax_t)file2->size));
679 return -1;
680 }
681 if (file1->size < file2->size) {
682 LOUD(fprintf(stderr, "check_conditions: no match: size of file1 < file2 (%" PRIdMAX " < %"PRIdMAX ")\n",
683 (intmax_t)file1->size, (intmax_t)file2->size));
684 return 1;
685 }
686
687 #ifndef NO_USER_ORDER
688 /* Exclude based on -I/--isolate */
689 if (ISFLAG(flags, F_ISOLATE) && (file1->user_order == file2->user_order)) {
690 LOUD(fprintf(stderr, "check_conditions: files ignored: parameter isolation\n"));
691 return -3;
692 }
693 #endif /* NO_USER_ORDER */
694
695 /* Exclude based on -1/--one-file-system */
696 if (ISFLAG(flags, F_ONEFS) && (file1->device != file2->device)) {
697 LOUD(fprintf(stderr, "check_conditions: files ignored: not on same filesystem\n"));
698 return -4;
699 }
700
701 /* Exclude files by permissions if requested */
702 if (ISFLAG(flags, F_PERMISSIONS) &&
703 (file1->mode != file2->mode
704 #ifndef NO_PERMS
705 || file1->uid != file2->uid
706 || file1->gid != file2->gid
707 #endif
708 )) {
709 return -5;
710 LOUD(fprintf(stderr, "check_conditions: no match: permissions/ownership differ (-p on)\n"));
711 }
712
713 /* Hard link and symlink + '-s' check */
714 #ifndef NO_HARDLINKS
715 if ((file1->inode == file2->inode) && (file1->device == file2->device)) {
716 if (ISFLAG(flags, F_CONSIDERHARDLINKS)) {
717 LOUD(fprintf(stderr, "check_conditions: files match: hard/soft linked (-H on)\n"));
718 return 2;
719 } else {
720 LOUD(fprintf(stderr, "check_conditions: files ignored: hard/soft linked (-H off)\n"));
721 return -2;
722 }
723 }
724 #endif
725
726 /* Fall through: all checks passed */
727 LOUD(fprintf(stderr, "check_conditions: all condition checks passed\n"));
728 return 0;
729 }
730
731
732 /* Check for exclusion conditions for a single file (1 = fail) */
check_singlefile(file_t * const restrict newfile)733 static int check_singlefile(file_t * const restrict newfile)
734 {
735 char * restrict tp = tempname;
736 int excluded;
737
738 if (newfile == NULL) nullptr("check_singlefile()");
739
740 LOUD(fprintf(stderr, "check_singlefile: checking '%s'\n", newfile->d_name));
741
742 /* Exclude hidden files if requested */
743 if (ISFLAG(flags, F_EXCLUDEHIDDEN)) {
744 if (newfile->d_name == NULL) nullptr("check_singlefile newfile->d_name");
745 strcpy(tp, newfile->d_name);
746 tp = basename(tp);
747 if (tp[0] == '.' && strcmp(tp, ".") && strcmp(tp, "..")) {
748 LOUD(fprintf(stderr, "check_singlefile: excluding hidden file (-A on)\n"));
749 return 1;
750 }
751 }
752
753 /* Get file information and check for validity */
754 const int i = getfilestats(newfile);
755 if (i || newfile->size == -1) {
756 LOUD(fprintf(stderr, "check_singlefile: excluding due to bad stat()\n"));
757 return 1;
758 }
759
760 if (!S_ISDIR(newfile->mode)) {
761 /* Exclude zero-length files if requested */
762 if (newfile->size == 0 && !ISFLAG(flags, F_INCLUDEEMPTY)) {
763 LOUD(fprintf(stderr, "check_singlefile: excluding zero-length empty file (-z not set)\n"));
764 return 1;
765 }
766
767 /* Exclude files based on exclusion stack size specs */
768 excluded = 0;
769 for (struct extfilter *extf = extfilter_head; extf != NULL; extf = extf->next) {
770 uint32_t sflag = extf->flags;
771 LOUD(fprintf(stderr, "check_singlefile: extfilter check: %08x %ld %ld %s\n", sflag, newfile->size, extf->size, newfile->d_name);)
772 if (
773 /* Any line that passes will result in file exclusion */
774 ((sflag == XF_SIZE_EQ) && (newfile->size != extf->size)) ||
775 ((sflag == XF_SIZE_LTEQ) && (newfile->size > extf->size)) ||
776 ((sflag == XF_SIZE_GTEQ) && (newfile->size < extf->size)) ||
777 ((sflag == XF_SIZE_GT) && (newfile->size <= extf->size)) ||
778 ((sflag == XF_SIZE_LT) && (newfile->size >= extf->size)) ||
779 ((sflag == XF_EXCL_EXT) && match_extensions(newfile->d_name, extf->param)) ||
780 ((sflag == XF_ONLY_EXT) && !match_extensions(newfile->d_name, extf->param)) ||
781 ((sflag == XF_EXCL_STR) && strstr(newfile->d_name, extf->param)) ||
782 ((sflag == XF_ONLY_STR) && !strstr(newfile->d_name, extf->param)) ||
783 ((sflag == XF_DATE_NEWER) && (newfile->mtime < extf->size)) ||
784 ((sflag == XF_DATE_OLDER) && (newfile->mtime >= extf->size))
785 ) excluded = 1;
786 }
787 if (excluded) {
788 LOUD(fprintf(stderr, "check_singlefile: excluding based on an extfilter option\n"));
789 return 1;
790 }
791 }
792
793 #ifdef ON_WINDOWS
794 /* Windows has a 1023 (+1) hard link limit. If we're hard linking,
795 * ignore all files that have hit this limit */
796 #ifndef NO_HARDLINKS
797 if (ISFLAG(a_flags, FA_HARDLINKFILES) && newfile->nlink >= 1024) {
798 #ifdef DEBUG
799 hll_exclude++;
800 #endif
801 LOUD(fprintf(stderr, "check_singlefile: excluding due to Windows 1024 hard link limit\n"));
802 return 1;
803 }
804 #endif /* NO_HARDLINKS */
805 #endif /* ON_WINDOWS */
806 LOUD(fprintf(stderr, "check_singlefile: all checks passed\n"));
807 return 0;
808 }
809
810
init_newfile(const size_t len,file_t * restrict * const restrict filelistp)811 static file_t *init_newfile(const size_t len, file_t * restrict * const restrict filelistp)
812 {
813 file_t * const restrict newfile = (file_t *)string_malloc(sizeof(file_t));
814
815 if (!newfile) oom("init_newfile() file structure");
816 if (!filelistp) nullptr("init_newfile() filelistp");
817
818 LOUD(fprintf(stderr, "init_newfile(len %lu, filelistp %p)\n", len, filelistp));
819
820 memset(newfile, 0, sizeof(file_t));
821 newfile->d_name = (char *)string_malloc(len);
822 if (!newfile->d_name) oom("init_newfile() filename");
823
824 newfile->next = *filelistp;
825 #ifndef NO_USER_ORDER
826 newfile->user_order = user_item_count;
827 #endif
828 newfile->size = -1;
829 newfile->duplicates = NULL;
830 return newfile;
831 }
832
833
834 /* Create a new traversal check object and initialize its values */
travdone_alloc(const dev_t device,const jdupes_ino_t inode)835 static struct travdone *travdone_alloc(const dev_t device, const jdupes_ino_t inode)
836 {
837 struct travdone *trav;
838
839 LOUD(fprintf(stderr, "travdone_alloc(%" PRIdMAX ", %" PRIdMAX ")\n", (intmax_t)inode, (intmax_t)device);)
840
841 trav = (struct travdone *)string_malloc(sizeof(struct travdone));
842 if (trav == NULL) {
843 LOUD(fprintf(stderr, "travdone_alloc: malloc failed\n");)
844 return NULL;
845 }
846 trav->left = NULL;
847 trav->right = NULL;
848 trav->inode = inode;
849 trav->device = device;
850 LOUD(fprintf(stderr, "travdone_alloc returned %p\n", (void *)trav);)
851 return trav;
852 }
853
854
855 /* De-allocate the travdone tree */
travdone_free(struct travdone * const restrict cur)856 static void travdone_free(struct travdone * const restrict cur)
857 {
858 LOUD(fprintf(stderr, "travdone_free(%p)\n", cur);)
859 if (cur == NULL) return;
860 if (cur->left != NULL) travdone_free(cur->left);
861 if (cur->right != NULL) travdone_free(cur->right);
862 string_free(cur);
863 return;
864 }
865
866
867 /* Check to see if device:inode pair has already been traversed */
traverse_check(const dev_t device,const jdupes_ino_t inode)868 static int traverse_check(const dev_t device, const jdupes_ino_t inode)
869 {
870 struct travdone *traverse = travdone_head;
871
872 if (travdone_head == NULL) {
873 travdone_head = travdone_alloc(device, inode);
874 if (travdone_head == NULL) return 2;
875 } else {
876 traverse = travdone_head;
877 while (1) {
878 if (traverse == NULL) nullptr("traverse_check()");
879 /* Don't re-traverse directories we've already seen */
880 if (inode == traverse->inode && device == traverse->device) {
881 LOUD(fprintf(stderr, "traverse_check: already seen: %ld:%ld\n", device,inode);)
882 return 1;
883 } else if (inode > traverse->inode || (inode == traverse->inode && device > traverse->device)) {
884 /* Traverse right */
885 if (traverse->right == NULL) {
886 LOUD(fprintf(stderr, "traverse item right: %ld:%ld\n", device, inode);)
887 traverse->right = travdone_alloc(device, inode);
888 if (traverse->right == NULL) return 2;
889 break;
890 }
891 traverse = traverse->right;
892 continue;
893 } else {
894 /* Traverse left */
895 if (traverse->left == NULL) {
896 LOUD(fprintf(stderr, "traverse item left %ld,%ld\n", device, inode);)
897 traverse->left = travdone_alloc(device, inode);
898 if (traverse->left == NULL) return 2;
899 break;
900 }
901 traverse = traverse->left;
902 continue;
903 }
904 }
905 }
906 return 0;
907 }
908
909
910 /* This is disabled until a check is in place to make it safe */
911 #if 0
912 /* Add a single file to the file tree */
913 static inline file_t *grokfile(const char * const restrict name, file_t * restrict * const restrict filelistp)
914 {
915 file_t * restrict newfile;
916
917 if (!name || !filelistp) nullptr("grokfile()");
918 LOUD(fprintf(stderr, "grokfile: '%s' %p\n", name, filelistp));
919
920 /* Allocate the file_t and the d_name entries */
921 newfile = init_newfile(strlen(name) + 2, filelistp);
922
923 strcpy(newfile->d_name, name);
924
925 /* Single-file [l]stat() and exclusion condition check */
926 if (check_singlefile(newfile) != 0) {
927 LOUD(fprintf(stderr, "grokfile: check_singlefile rejected file\n"));
928 string_free(newfile->d_name);
929 string_free(newfile);
930 return NULL;
931 }
932 return newfile;
933 }
934 #endif
935
936
937 /* Load a directory's contents into the file tree, recursing as needed */
grokdir(const char * const restrict dir,file_t * restrict * const restrict filelistp,int recurse)938 static void grokdir(const char * const restrict dir,
939 file_t * restrict * const restrict filelistp,
940 int recurse)
941 {
942 file_t * restrict newfile;
943 struct dirent *dirinfo;
944 static int grokdir_level = 0;
945 size_t dirlen;
946 int i, single = 0;
947 jdupes_ino_t inode;
948 dev_t device, n_device;
949 jdupes_mode_t mode;
950 #ifdef UNICODE
951 WIN32_FIND_DATA ffd;
952 HANDLE hFind = INVALID_HANDLE_VALUE;
953 char *p;
954 #else
955 DIR *cd;
956 #endif
957
958 if (dir == NULL || filelistp == NULL) nullptr("grokdir()");
959 LOUD(fprintf(stderr, "grokdir: scanning '%s' (order %d, recurse %d)\n", dir, user_item_count, recurse));
960
961 /* Get directory stats (or file stats if it's a file) */
962 i = getdirstats(dir, &inode, &device, &mode);
963 if (i < 0) goto error_travdone;
964 /* if dir is actually a file, just add it to the file tree */
965 if (i == 1) {
966 /* Single file addition is disabled for now because there is no safeguard
967 * against the file being compared against itself if it's added in both a
968 * recursion and explicitly on the command line. */
969 #if 0
970 LOUD(fprintf(stderr, "grokdir -> grokfile '%s'\n", dir));
971 newfile = grokfile(dir, filelistp);
972 if (newfile == NULL) {
973 LOUD(fprintf(stderr, "grokfile rejected '%s'\n", dir));
974 return;
975 }
976 single = 1;
977 goto add_single_file;
978 #endif
979 fprintf(stderr, "\nFile specs on command line disabled in this version for safety\n");
980 fprintf(stderr, "This should be restored (and safe) in a future release\n");
981 fprintf(stderr, "See https://github.com/jbruchon/jdupes or email jody@jodybruchon.com\n");
982 return; /* Remove when single file is restored */
983 }
984
985 /* Double traversal prevention tree */
986 if (!ISFLAG(flags, F_NOTRAVCHECK)) {
987 i = traverse_check(device, inode);
988 if (i == 1) return;
989 if (i == 2) goto error_travdone;
990 }
991
992 item_progress++;
993 grokdir_level++;
994
995 #ifdef UNICODE
996 /* Windows requires \* at the end of directory names */
997 strncpy(tempname, dir, PATHBUF_SIZE * 2 - 1);
998 dirlen = strlen(tempname) - 1;
999 p = tempname + dirlen;
1000 if (*p == '/' || *p == '\\') *p = '\0';
1001 strncat(tempname, "\\*", PATHBUF_SIZE * 2 - 1);
1002
1003 if (!M2W(tempname, wname)) goto error_cd;
1004
1005 LOUD(fprintf(stderr, "FindFirstFile: %s\n", dir));
1006 hFind = FindFirstFileW(wname, &ffd);
1007 if (hFind == INVALID_HANDLE_VALUE) { LOUD(fprintf(stderr, "\nfile handle bad\n")); goto error_cd; }
1008 LOUD(fprintf(stderr, "Loop start\n"));
1009 do {
1010 char * restrict tp = tempname;
1011 size_t d_name_len;
1012
1013 /* Get necessary length and allocate d_name */
1014 dirinfo = (struct dirent *)string_malloc(sizeof(struct dirent));
1015 if (!W2M(ffd.cFileName, dirinfo->d_name)) continue;
1016 #else
1017 cd = opendir(dir);
1018 if (!cd) goto error_cd;
1019
1020 while ((dirinfo = readdir(cd)) != NULL) {
1021 char * restrict tp = tempname;
1022 size_t d_name_len;
1023 #endif /* UNICODE */
1024
1025 LOUD(fprintf(stderr, "grokdir: readdir: '%s'\n", dirinfo->d_name));
1026 if (!strcmp(dirinfo->d_name, ".") || !strcmp(dirinfo->d_name, "..")) continue;
1027 if (!ISFLAG(flags, F_HIDEPROGRESS)) {
1028 gettimeofday(&time2, NULL);
1029 if (progress == 0 || time2.tv_sec > time1.tv_sec) {
1030 fprintf(stderr, "\rScanning: %" PRIuMAX " files, %" PRIuMAX " dirs (in %u specified)",
1031 progress, item_progress, user_item_count);
1032 }
1033 time1.tv_sec = time2.tv_sec;
1034 }
1035
1036 /* Assemble the file's full path name, optimized to avoid strcat() */
1037 dirlen = strlen(dir);
1038 d_name_len = strlen(dirinfo->d_name);
1039 memcpy(tp, dir, dirlen+1);
1040 if (dirlen != 0 && tp[dirlen-1] != dir_sep) {
1041 tp[dirlen] = dir_sep;
1042 dirlen++;
1043 }
1044 if (dirlen + d_name_len + 1 >= (PATHBUF_SIZE * 2)) goto error_overflow;
1045 tp += dirlen;
1046 memcpy(tp, dirinfo->d_name, d_name_len);
1047 tp += d_name_len;
1048 *tp = '\0';
1049 d_name_len++;
1050
1051 /* Allocate the file_t and the d_name entries */
1052 newfile = init_newfile(dirlen + d_name_len + 2, filelistp);
1053
1054 tp = tempname;
1055 memcpy(newfile->d_name, tp, dirlen + d_name_len);
1056
1057 /*** WARNING: tempname global gets reused by check_singlefile here! ***/
1058
1059 /* Single-file [l]stat() and exclusion condition check */
1060 if (check_singlefile(newfile) != 0) {
1061 LOUD(fprintf(stderr, "grokdir: check_singlefile rejected file\n"));
1062 string_free(newfile->d_name);
1063 string_free(newfile);
1064 continue;
1065 }
1066
1067 /* Optionally recurse directories, including symlinked ones if requested */
1068 if (S_ISDIR(newfile->mode)) {
1069 if (recurse) {
1070 /* --one-file-system - WARNING: this clobbers inode/mode */
1071 if (ISFLAG(flags, F_ONEFS)
1072 && (getdirstats(newfile->d_name, &inode, &n_device, &mode) == 0)
1073 && (device != n_device)) {
1074 LOUD(fprintf(stderr, "grokdir: directory: not recursing (--one-file-system)\n"));
1075 string_free(newfile->d_name);
1076 string_free(newfile);
1077 continue;
1078 }
1079 #ifndef NO_SYMLINKS
1080 else if (ISFLAG(flags, F_FOLLOWLINKS) || !ISFLAG(newfile->flags, FF_IS_SYMLINK)) {
1081 LOUD(fprintf(stderr, "grokdir: directory(symlink): recursing (-r/-R)\n"));
1082 grokdir(newfile->d_name, filelistp, recurse);
1083 }
1084 #else
1085 else {
1086 LOUD(fprintf(stderr, "grokdir: directory: recursing (-r/-R)\n"));
1087 grokdir(newfile->d_name, filelistp, recurse);
1088 }
1089 #endif
1090 } else { LOUD(fprintf(stderr, "grokdir: directory: not recursing\n")); }
1091 string_free(newfile->d_name);
1092 string_free(newfile);
1093 continue;
1094 } else {
1095 //add_single_file:
1096 /* Add regular files to list, including symlink targets if requested */
1097 #ifndef NO_SYMLINKS
1098 if (!ISFLAG(newfile->flags, FF_IS_SYMLINK) || (ISFLAG(newfile->flags, FF_IS_SYMLINK) && ISFLAG(flags, F_FOLLOWLINKS))) {
1099 #else
1100 if (S_ISREG(newfile->mode)) {
1101 #endif
1102 *filelistp = newfile;
1103 filecount++;
1104 progress++;
1105
1106 } else {
1107 LOUD(fprintf(stderr, "grokdir: not a regular file: %s\n", newfile->d_name);)
1108 string_free(newfile->d_name);
1109 string_free(newfile);
1110 if (single == 1) {
1111 single = 0;
1112 goto skip_single;
1113 }
1114 continue;
1115 }
1116 }
1117 /* Skip directory stuff if adding only a single file */
1118 if (single == 1) {
1119 single = 0;
1120 goto skip_single;
1121 }
1122 }
1123
1124 #ifdef UNICODE
1125 while (FindNextFileW(hFind, &ffd) != 0);
1126 FindClose(hFind);
1127 #else
1128 closedir(cd);
1129 #endif
1130
1131 skip_single:
1132 grokdir_level--;
1133 if (grokdir_level == 0 && !ISFLAG(flags, F_HIDEPROGRESS)) {
1134 fprintf(stderr, "\rScanning: %" PRIuMAX " files, %" PRIuMAX " items (in %u specified)",
1135 progress, item_progress, user_item_count);
1136 }
1137 return;
1138
1139 error_travdone:
1140 fprintf(stderr, "\ncould not stat dir "); fwprint(stderr, dir, 1);
1141 return;
1142 error_cd:
1143 fprintf(stderr, "\ncould not chdir to "); fwprint(stderr, dir, 1);
1144 return;
1145 error_overflow:
1146 fprintf(stderr, "\nerror: a path buffer overflowed\n");
1147 exit(EXIT_FAILURE);
1148 }
1149
1150
1151 /* Hash part or all of a file */
1152 static jdupes_hash_t *get_filehash(const file_t * const restrict checkfile,
1153 const size_t max_read)
1154 {
1155 off_t fsize;
1156 /* This is an array because we return a pointer to it */
1157 static jdupes_hash_t hash[1];
1158 static jdupes_hash_t *chunk = NULL;
1159 FILE *file;
1160 int check = 0;
1161 XXH64_state_t *xxhstate;
1162
1163 if (checkfile == NULL || checkfile->d_name == NULL) nullptr("get_filehash()");
1164 LOUD(fprintf(stderr, "get_filehash('%s', %" PRIdMAX ")\n", checkfile->d_name, (intmax_t)max_read);)
1165
1166 /* Allocate on first use */
1167 if (chunk == NULL) {
1168 chunk = (jdupes_hash_t *)string_malloc(auto_chunk_size);
1169 if (!chunk) oom("get_filehash() chunk");
1170 }
1171
1172 /* Get the file size. If we can't read it, bail out early */
1173 if (checkfile->size == -1) {
1174 LOUD(fprintf(stderr, "get_filehash: not hashing because stat() info is bad\n"));
1175 return NULL;
1176 }
1177 fsize = checkfile->size;
1178
1179 /* Do not read more than the requested number of bytes */
1180 if (max_read > 0 && fsize > (off_t)max_read)
1181 fsize = (off_t)max_read;
1182
1183 /* Initialize the hash and file read parameters (with filehash_partial skipped)
1184 *
1185 * If we already hashed the first chunk of this file, we don't want to
1186 * wastefully read and hash it again, so skip the first chunk and use
1187 * the computed hash for that chunk as our starting point.
1188 */
1189
1190 *hash = 0;
1191 if (ISFLAG(checkfile->flags, FF_HASH_PARTIAL)) {
1192 *hash = checkfile->filehash_partial;
1193 /* Don't bother going further if max_read is already fulfilled */
1194 if (max_read != 0 && max_read <= PARTIAL_HASH_SIZE) {
1195 LOUD(fprintf(stderr, "Partial hash size (%d) >= max_read (%" PRIuMAX "), not hashing anymore\n", PARTIAL_HASH_SIZE, (uintmax_t)max_read);)
1196 return hash;
1197 }
1198 }
1199 errno = 0;
1200 #ifdef UNICODE
1201 if (!M2W(checkfile->d_name, wstr)) file = NULL;
1202 else file = _wfopen(wstr, FILE_MODE_RO);
1203 #else
1204 file = fopen(checkfile->d_name, FILE_MODE_RO);
1205 #endif
1206 if (file == NULL) {
1207 fprintf(stderr, "\n%s error opening file ", strerror(errno)); fwprint(stderr, checkfile->d_name, 1);
1208 return NULL;
1209 }
1210 /* Actually seek past the first chunk if applicable
1211 * This is part of the filehash_partial skip optimization */
1212 if (ISFLAG(checkfile->flags, FF_HASH_PARTIAL)) {
1213 if (fseeko(file, PARTIAL_HASH_SIZE, SEEK_SET) == -1) {
1214 fclose(file);
1215 fprintf(stderr, "\nerror seeking in file "); fwprint(stderr, checkfile->d_name, 1);
1216 return NULL;
1217 }
1218 fsize -= PARTIAL_HASH_SIZE;
1219 }
1220
1221 xxhstate = XXH64_createState();
1222 if (xxhstate == NULL) nullptr("xxhstate");
1223 XXH64_reset(xxhstate, 0);
1224
1225 /* Read the file in CHUNK_SIZE chunks until we've read it all. */
1226 while (fsize > 0) {
1227 size_t bytes_to_read;
1228
1229 if (interrupt) return 0;
1230 bytes_to_read = (fsize >= (off_t)auto_chunk_size) ? auto_chunk_size : (size_t)fsize;
1231 if (fread((void *)chunk, bytes_to_read, 1, file) != 1) {
1232 fprintf(stderr, "\nerror reading from file "); fwprint(stderr, checkfile->d_name, 1);
1233 fclose(file);
1234 return NULL;
1235 }
1236
1237 XXH64_update(xxhstate, chunk, bytes_to_read);
1238
1239 if ((off_t)bytes_to_read > fsize) break;
1240 else fsize -= (off_t)bytes_to_read;
1241
1242 if (!ISFLAG(flags, F_HIDEPROGRESS)) {
1243 check++;
1244 if (check > CHECK_MINIMUM) {
1245 update_progress("hashing", (int)(((checkfile->size - fsize) * 100) / checkfile->size));
1246 check = 0;
1247 }
1248 }
1249 }
1250
1251 fclose(file);
1252
1253 *hash = XXH64_digest(xxhstate);
1254 XXH64_freeState(xxhstate);
1255
1256 LOUD(fprintf(stderr, "get_filehash: returning hash: 0x%016jx\n", (uintmax_t)*hash));
1257 return hash;
1258 }
1259
1260
1261 static inline void registerfile(filetree_t * restrict * const restrict nodeptr,
1262 const enum tree_direction d, file_t * const restrict file)
1263 {
1264 filetree_t * restrict branch;
1265
1266 if (nodeptr == NULL || file == NULL || (d != NONE && *nodeptr == NULL)) nullptr("registerfile()");
1267 LOUD(fprintf(stderr, "registerfile(direction %d)\n", d));
1268
1269 /* Allocate and initialize a new node for the file */
1270 branch = (filetree_t *)string_malloc(sizeof(filetree_t));
1271 if (branch == NULL) oom("registerfile() branch");
1272 branch->file = file;
1273 branch->left = NULL;
1274 branch->right = NULL;
1275
1276 /* Attach the new node to the requested branch */
1277 switch (d) {
1278 case LEFT:
1279 (*nodeptr)->left = branch;
1280 break;
1281 case RIGHT:
1282 (*nodeptr)->right = branch;
1283 break;
1284 case NONE:
1285 /* For the root of the tree only */
1286 *nodeptr = branch;
1287 break;
1288 default:
1289 /* This should never ever happen */
1290 fprintf(stderr, "\ninternal error: invalid direction for registerfile(), report this\n");
1291 string_malloc_destroy();
1292 exit(EXIT_FAILURE);
1293 break;
1294 }
1295
1296 return;
1297 }
1298
1299
1300 #ifdef TREE_DEPTH_STATS
1301 #define TREE_DEPTH_UPDATE_MAX() { if (max_depth < tree_depth) max_depth = tree_depth; tree_depth = 0; }
1302 #else
1303 #define TREE_DEPTH_UPDATE_MAX()
1304 #endif
1305
1306
1307 /* Check two files for a match */
1308 static file_t **checkmatch(filetree_t * restrict tree, file_t * const restrict file)
1309 {
1310 int cmpresult = 0;
1311 int cantmatch = 0;
1312 const jdupes_hash_t * restrict filehash;
1313
1314 if (tree == NULL || file == NULL || tree->file == NULL || tree->file->d_name == NULL || file->d_name == NULL) nullptr("checkmatch()");
1315 LOUD(fprintf(stderr, "checkmatch ('%s', '%s')\n", tree->file->d_name, file->d_name));
1316
1317 /* If device and inode fields are equal one of the files is a
1318 * hard link to the other or the files have been listed twice
1319 * unintentionally. We don't want to flag these files as
1320 * duplicates unless the user specifies otherwise. */
1321
1322 /* Count the total number of comparisons requested */
1323 DBG(comparisons++;)
1324
1325 /* If considering hard linked files as duplicates, they are
1326 * automatically duplicates without being read further since
1327 * they point to the exact same inode. If we aren't considering
1328 * hard links as duplicates, we just return NULL. */
1329
1330 cmpresult = check_conditions(tree->file, file);
1331 switch (cmpresult) {
1332 case 2: return &tree->file; /* linked files + -H switch */
1333 case -2: return NULL; /* linked files, no -H switch */
1334 case -3: /* user order */
1335 case -4: /* one filesystem */
1336 case -5: /* permissions */
1337 cantmatch = 1;
1338 cmpresult = 0;
1339 break;
1340 default: break;
1341 }
1342
1343 /* Print pre-check (early) match candidates if requested */
1344 if (ISFLAG(p_flags, PF_EARLYMATCH)) printf("Early match check passed:\n %s\n %s\n\n", file->d_name, tree->file->d_name);
1345
1346 /* If preliminary matching succeeded, do main file data checks */
1347 if (cmpresult == 0) {
1348 LOUD(fprintf(stderr, "checkmatch: starting file data comparisons\n"));
1349 /* Attempt to exclude files quickly with partial file hashing */
1350 if (!ISFLAG(tree->file->flags, FF_HASH_PARTIAL)) {
1351 filehash = get_filehash(tree->file, PARTIAL_HASH_SIZE);
1352 if (filehash == NULL) return NULL;
1353
1354 tree->file->filehash_partial = *filehash;
1355 SETFLAG(tree->file->flags, FF_HASH_PARTIAL);
1356 }
1357
1358 if (!ISFLAG(file->flags, FF_HASH_PARTIAL)) {
1359 filehash = get_filehash(file, PARTIAL_HASH_SIZE);
1360 if (filehash == NULL) return NULL;
1361
1362 file->filehash_partial = *filehash;
1363 SETFLAG(file->flags, FF_HASH_PARTIAL);
1364 }
1365
1366 cmpresult = HASH_COMPARE(file->filehash_partial, tree->file->filehash_partial);
1367 LOUD(if (!cmpresult) fprintf(stderr, "checkmatch: partial hashes match\n"));
1368 LOUD(if (cmpresult) fprintf(stderr, "checkmatch: partial hashes do not match\n"));
1369 DBG(partial_hash++;)
1370
1371 /* Print partial hash matching pairs if requested */
1372 if (cmpresult == 0 && ISFLAG(p_flags, PF_PARTIAL))
1373 printf("Partial hashes match:\n %s\n %s\n\n", file->d_name, tree->file->d_name);
1374
1375 if (file->size <= PARTIAL_HASH_SIZE || ISFLAG(flags, F_PARTIALONLY)) {
1376 if (ISFLAG(flags, F_PARTIALONLY)) { LOUD(fprintf(stderr, "checkmatch: partial only mode: treating partial hash as full hash\n")); }
1377 else { LOUD(fprintf(stderr, "checkmatch: small file: copying partial hash to full hash\n")); }
1378 /* filehash_partial = filehash if file is small enough */
1379 if (!ISFLAG(file->flags, FF_HASH_FULL)) {
1380 file->filehash = file->filehash_partial;
1381 SETFLAG(file->flags, FF_HASH_FULL);
1382 DBG(small_file++;)
1383 }
1384 if (!ISFLAG(tree->file->flags, FF_HASH_FULL)) {
1385 tree->file->filehash = tree->file->filehash_partial;
1386 SETFLAG(tree->file->flags, FF_HASH_FULL);
1387 DBG(small_file++;)
1388 }
1389 } else if (cmpresult == 0) {
1390 // if (ISFLAG(flags, F_SKIPHASH)) {
1391 // LOUD(fprintf(stderr, "checkmatch: skipping full file hashes (F_SKIPMATCH)\n"));
1392 // } else {
1393 /* If partial match was correct, perform a full file hash match */
1394 if (!ISFLAG(tree->file->flags, FF_HASH_FULL)) {
1395 filehash = get_filehash(tree->file, 0);
1396 if (filehash == NULL) return NULL;
1397
1398 tree->file->filehash = *filehash;
1399 SETFLAG(tree->file->flags, FF_HASH_FULL);
1400 }
1401
1402 if (!ISFLAG(file->flags, FF_HASH_FULL)) {
1403 filehash = get_filehash(file, 0);
1404 if (filehash == NULL) return NULL;
1405
1406 file->filehash = *filehash;
1407 SETFLAG(file->flags, FF_HASH_FULL);
1408 }
1409
1410 /* Full file hash comparison */
1411 cmpresult = HASH_COMPARE(file->filehash, tree->file->filehash);
1412 LOUD(if (!cmpresult) fprintf(stderr, "checkmatch: full hashes match\n"));
1413 LOUD(if (cmpresult) fprintf(stderr, "checkmatch: full hashes do not match\n"));
1414 DBG(full_hash++);
1415 // }
1416 } else {
1417 DBG(partial_elim++);
1418 }
1419 } /* if (cmpresult == 0) */
1420
1421 if ((cantmatch != 0) && (cmpresult == 0)) {
1422 LOUD(fprintf(stderr, "checkmatch: rejecting because match not allowed (cantmatch = 1)\n"));
1423 cmpresult = -1;
1424 }
1425
1426 /* How the file tree works
1427 *
1428 * The tree is sorted by size as files arrive. If the files are the same
1429 * size, they are possible duplicates and are checked for duplication.
1430 * If they are not a match, the hashes are used to decide whether to
1431 * continue with the file to the left or the right in the file tree.
1432 * If the direction decision points to a leaf node, the duplicate scan
1433 * continues down that path; if it points to an empty node, the current
1434 * file is attached to the file tree at that point.
1435 *
1436 * This allows for quickly finding files of the same size by avoiding
1437 * tree branches with differing size groups.
1438 */
1439 if (cmpresult < 0) {
1440 if (tree->left != NULL) {
1441 LOUD(fprintf(stderr, "checkmatch: recursing tree: left\n"));
1442 DBG(left_branch++; tree_depth++;)
1443 return checkmatch(tree->left, file);
1444 } else {
1445 LOUD(fprintf(stderr, "checkmatch: registering file: left\n"));
1446 registerfile(&tree, LEFT, file);
1447 TREE_DEPTH_UPDATE_MAX();
1448 return NULL;
1449 }
1450 } else if (cmpresult > 0) {
1451 if (tree->right != NULL) {
1452 LOUD(fprintf(stderr, "checkmatch: recursing tree: right\n"));
1453 DBG(right_branch++; tree_depth++;)
1454 return checkmatch(tree->right, file);
1455 } else {
1456 LOUD(fprintf(stderr, "checkmatch: registering file: right\n"));
1457 registerfile(&tree, RIGHT, file);
1458 TREE_DEPTH_UPDATE_MAX();
1459 return NULL;
1460 }
1461 } else {
1462 /* All compares matched */
1463 DBG(partial_to_full++;)
1464 TREE_DEPTH_UPDATE_MAX();
1465 LOUD(fprintf(stderr, "checkmatch: files appear to match based on hashes\n"));
1466 if (ISFLAG(p_flags, PF_FULLHASH)) printf("Full hashes match:\n %s\n %s\n\n", file->d_name, tree->file->d_name);
1467 return &tree->file;
1468 }
1469 /* Fall through - should never be reached */
1470 return NULL;
1471 }
1472
1473
1474 /* Do a byte-by-byte comparison in case two different files produce the
1475 same signature. Unlikely, but better safe than sorry. */
1476 static inline int confirmmatch(FILE * const restrict file1, FILE * const restrict file2, const off_t size)
1477 {
1478 static char *c1 = NULL, *c2 = NULL;
1479 size_t r1, r2;
1480 off_t bytes = 0;
1481 int check = 0;
1482
1483 if (file1 == NULL || file2 == NULL) nullptr("confirmmatch()");
1484 LOUD(fprintf(stderr, "confirmmatch running\n"));
1485
1486 /* Allocate on first use; OOM if either is ever NULLed */
1487 if (!c1) {
1488 c1 = (char *)string_malloc(auto_chunk_size);
1489 c2 = (char *)string_malloc(auto_chunk_size);
1490 }
1491 if (!c1 || !c2) oom("confirmmatch() c1/c2");
1492
1493 fseek(file1, 0, SEEK_SET);
1494 fseek(file2, 0, SEEK_SET);
1495
1496 do {
1497 if (interrupt) return 0;
1498 r1 = fread(c1, sizeof(char), auto_chunk_size, file1);
1499 r2 = fread(c2, sizeof(char), auto_chunk_size, file2);
1500
1501 if (r1 != r2) return 0; /* file lengths are different */
1502 if (memcmp (c1, c2, r1)) return 0; /* file contents are different */
1503
1504 if (!ISFLAG(flags, F_HIDEPROGRESS)) {
1505 check++;
1506 bytes += (off_t)r1;
1507 if (check > CHECK_MINIMUM) {
1508 update_progress("confirm", (int)((bytes * 100) / size));
1509 check = 0;
1510 }
1511 }
1512 } while (r2);
1513
1514 return 1;
1515 }
1516
1517
1518 /* Count the following statistics:
1519 - Maximum number of files in a duplicate set (length of longest dupe chain)
1520 - Number of non-zero-length files that have duplicates (if n_files != NULL)
1521 - Total number of duplicate file sets (groups) */
1522 extern unsigned int get_max_dupes(const file_t *files, unsigned int * const restrict max,
1523 unsigned int * const restrict n_files) {
1524 unsigned int groups = 0;
1525
1526 if (files == NULL || max == NULL) nullptr("get_max_dupes()");
1527 LOUD(fprintf(stderr, "get_max_dupes(%p, %p, %p)\n", (const void *)files, (void *)max, (void *)n_files));
1528
1529 *max = 0;
1530 if (n_files) *n_files = 0;
1531
1532 while (files) {
1533 unsigned int n_dupes;
1534 if (ISFLAG(files->flags, FF_HAS_DUPES)) {
1535 groups++;
1536 if (n_files && files->size) (*n_files)++;
1537 n_dupes = 1;
1538 for (file_t *curdupe = files->duplicates; curdupe; curdupe = curdupe->duplicates) n_dupes++;
1539 if (n_dupes > *max) *max = n_dupes;
1540 }
1541 files = files->next;
1542 }
1543 return groups;
1544 }
1545
1546
1547 #ifndef NO_USER_ORDER
1548 static int sort_pairs_by_param_order(file_t *f1, file_t *f2)
1549 {
1550 if (!ISFLAG(flags, F_USEPARAMORDER)) return 0;
1551 if (f1 == NULL || f2 == NULL) nullptr("sort_pairs_by_param_order()");
1552 if (f1->user_order < f2->user_order) return -sort_direction;
1553 if (f1->user_order > f2->user_order) return sort_direction;
1554 return 0;
1555 }
1556 #endif
1557
1558
1559 static int sort_pairs_by_mtime(file_t *f1, file_t *f2)
1560 {
1561 if (f1 == NULL || f2 == NULL) nullptr("sort_pairs_by_mtime()");
1562
1563 #ifndef NO_USER_ORDER
1564 int po = sort_pairs_by_param_order(f1, f2);
1565 if (po != 0) return po;
1566 #endif /* NO_USER_ORDER */
1567
1568 if (f1->mtime < f2->mtime) return -sort_direction;
1569 else if (f1->mtime > f2->mtime) return sort_direction;
1570
1571 /* If the mtimes match, use the names to break the tie */
1572 return numeric_sort(f1->d_name, f2->d_name, sort_direction);
1573 }
1574
1575
1576 static int sort_pairs_by_filename(file_t *f1, file_t *f2)
1577 {
1578 if (f1 == NULL || f2 == NULL) nullptr("sort_pairs_by_filename()");
1579
1580 #ifndef NO_USER_ORDER
1581 int po = sort_pairs_by_param_order(f1, f2);
1582 if (po != 0) return po;
1583 #endif /* NO_USER_ORDER */
1584
1585 return numeric_sort(f1->d_name, f2->d_name, sort_direction);
1586 }
1587
1588
1589 static void registerpair(file_t **matchlist, file_t *newmatch,
1590 int (*comparef)(file_t *f1, file_t *f2))
1591 {
1592 file_t *traverse;
1593 file_t *back;
1594
1595 /* NULL pointer sanity checks */
1596 if (matchlist == NULL || newmatch == NULL || comparef == NULL) nullptr("registerpair()");
1597 LOUD(fprintf(stderr, "registerpair: '%s', '%s'\n", (*matchlist)->d_name, newmatch->d_name);)
1598
1599 SETFLAG((*matchlist)->flags, FF_HAS_DUPES);
1600 back = NULL;
1601 traverse = *matchlist;
1602
1603 /* FIXME: This needs to be changed! As it currently stands, the compare
1604 * function only runs on a pair as it is registered and future pairs can
1605 * mess up the sort order. A separate sorting function should happen before
1606 * the dupe chain is acted upon rather than while pairs are registered. */
1607 while (traverse) {
1608 if (comparef(newmatch, traverse) <= 0) {
1609 newmatch->duplicates = traverse;
1610
1611 if (!back) {
1612 *matchlist = newmatch; /* update pointer to head of list */
1613 SETFLAG(newmatch->flags, FF_HAS_DUPES);
1614 CLEARFLAG(traverse->flags, FF_HAS_DUPES); /* flag is only for first file in dupe chain */
1615 } else back->duplicates = newmatch;
1616
1617 break;
1618 } else {
1619 if (traverse->duplicates == 0) {
1620 traverse->duplicates = newmatch;
1621 if (!back) SETFLAG(traverse->flags, FF_HAS_DUPES);
1622
1623 break;
1624 }
1625 }
1626
1627 back = traverse;
1628 traverse = traverse->duplicates;
1629 }
1630 return;
1631 }
1632
1633
1634 static inline void help_text(void)
1635 {
1636 printf("Usage: jdupes [options] FILES and/or DIRECTORIES...\n\n");
1637
1638 printf("Duplicate file sets will be printed by default unless a different action\n");
1639 printf("option is specified (delete, summarize, link, dedupe, etc.)\n");
1640 #ifdef LOUD
1641 printf(" -@ --loud \toutput annoying low-level debug info while running\n");
1642 #endif
1643 printf(" -0 --printnull \toutput nulls instead of CR/LF (like 'find -print0')\n");
1644 printf(" -1 --one-file-system \tdo not match files on different filesystems/devices\n");
1645 printf(" -A --nohidden \texclude hidden files from consideration\n");
1646 #ifdef ENABLE_DEDUPE
1647 printf(" -B --dedupe \tdo a copy-on-write (reflink/clone) deduplication\n");
1648 #endif
1649 printf(" -C --chunksize=# \toverride I/O chunk size (min %d, max %d)\n", MIN_CHUNK_SIZE, MAX_CHUNK_SIZE);
1650 printf(" -d --delete \tprompt user for files to preserve and delete all\n");
1651 printf(" \tothers; important: under particular circumstances,\n");
1652 printf(" \tdata may be lost when using this option together\n");
1653 printf(" \twith -s or --symlinks, or when specifying a\n");
1654 printf(" \tparticular directory more than once; refer to the\n");
1655 printf(" \tdocumentation for additional information\n");
1656 #ifdef DEBUG
1657 printf(" -D --debug \toutput debug statistics after completion\n");
1658 #endif
1659 printf(" -f --omitfirst \tomit the first file in each set of matches\n");
1660 printf(" -h --help \tdisplay this help message\n");
1661 #ifndef NO_HARDLINKS
1662 printf(" -H --hardlinks \ttreat any linked files as duplicate files. Normally\n");
1663 printf(" \tlinked files are treated as non-duplicates for safety\n");
1664 #endif
1665 printf(" -i --reverse \treverse (invert) the match sort order\n");
1666 #ifndef NO_USER_ORDER
1667 printf(" -I --isolate \tfiles in the same specified directory won't match\n");
1668 #endif
1669 printf(" -j --json \tproduce JSON (machine-readable) output\n");
1670 /* printf(" -K --skiphash \tskip full file hashing (may be faster; 100%% safe)\n"); */
1671 printf(" \tWARNING: in development, not fully working yet!\n");
1672 #ifndef NO_SYMLINKS
1673 printf(" -l --linksoft \tmake relative symlinks for duplicates w/o prompting\n");
1674 #endif
1675 #ifndef NO_HARDLINKS
1676 printf(" -L --linkhard \thard link all duplicate files without prompting\n");
1677 #ifdef ON_WINDOWS
1678 printf(" \tWindows allows a maximum of 1023 hard links per file;\n");
1679 printf(" \tlinking large match sets will result in multiple sets\n");
1680 printf(" \tof hard linked files due to this limit.\n");
1681 #endif /* ON_WINDOWS */
1682 #endif /* NO_HARDLINKS */
1683 printf(" -m --summarize \tsummarize dupe information\n");
1684 printf(" -M --printwithsummary\twill print matches and --summarize at the end\n");
1685 printf(" -N --noprompt \ttogether with --delete, preserve the first file in\n");
1686 printf(" \teach set of duplicates and delete the rest without\n");
1687 printf(" \tprompting the user\n");
1688 printf(" -o --order=BY \tselect sort order for output, linking and deleting; by\n");
1689 printf(" \tmtime (BY=time) or filename (BY=name, the default)\n");
1690 #ifndef NO_USER_ORDER
1691 printf(" -O --paramorder \tParameter order is more important than selected -o sort\n");
1692 #endif
1693 #ifndef NO_PERMS
1694 printf(" -p --permissions \tdon't consider files with different owner/group or\n");
1695 printf(" \tpermission bits as duplicates\n");
1696 #endif
1697 printf(" -P --print=type \tprint extra info (partial, early, fullhash)\n");
1698 printf(" -q --quiet \thide progress indicator\n");
1699 printf(" -Q --quick \tskip byte-for-byte confirmation for quick matching\n");
1700 printf(" \tWARNING: -Q can result in data loss! Be very careful!\n");
1701 printf(" -r --recurse \tfor every directory, process its subdirectories too\n");
1702 printf(" -R --recurse: \tfor each directory given after this option follow\n");
1703 printf(" \tsubdirectories encountered within (note the ':' at\n");
1704 printf(" \tthe end of the option, manpage for more details)\n");
1705 #ifndef NO_SYMLINKS
1706 printf(" -s --symlinks \tfollow symlinks\n");
1707 #endif
1708 printf(" -S --size \tshow size of duplicate files\n");
1709 printf(" -t --nochangecheck\tdisable security check for file changes (aka TOCTTOU)\n");
1710 printf(" -T --partial-only \tmatch based on partial hashes only. WARNING:\n");
1711 printf(" \tEXTREMELY DANGEROUS paired with destructive actions!\n");
1712 printf(" \t-T must be specified twice to work. Read the manual!\n");
1713 printf(" -u --printunique \tprint only a list of unique (non-matched) files\n");
1714 printf(" -U --notravcheck \tdisable double-traversal safety check (BE VERY CAREFUL)\n");
1715 printf(" \tThis fixes a Google Drive File Stream recursion issue\n");
1716 printf(" -v --version \tdisplay jdupes version and license information\n");
1717 printf(" -X --extfilter=x:y\tfilter files based on specified criteria\n");
1718 printf(" \tUse '-X help' for detailed extfilter help\n");
1719 printf(" -z --zeromatch \tconsider zero-length files to be duplicates\n");
1720 printf(" -Z --softabort \tIf the user aborts (i.e. CTRL-C) act on matches so far\n");
1721 #ifndef ON_WINDOWS
1722 printf(" \tYou can send SIGUSR1 to the program to toggle this\n");
1723 #endif
1724 #ifdef OMIT_GETOPT_LONG
1725 printf("Note: Long options are not supported in this build.\n\n");
1726 #endif
1727 }
1728
1729
1730 static void help_text_extfilter(void)
1731 {
1732 printf("Detailed help for jdupes -X/--extfilter options\n");
1733 printf("General format: jdupes -X filter[:value][size_suffix]\n\n");
1734
1735 /* FIXME: Remove after v1.19.0 */
1736 printf("****** WARNING: THE MEANINGS HAVE CHANGED IN v1.19.0 - READ CAREFULLY ******\n\n");
1737
1738 printf("noext:ext1[,ext2,...] \tExclude files with certain extension(s)\n\n");
1739 printf("onlyext:ext1[,ext2,...] \tOnly include files with certain extension(s)\n\n");
1740 printf("size[+-=]:size[suffix] \tOnly Include files matching size criteria\n");
1741 printf(" \tSize specs: + larger, - smaller, = equal to\n");
1742 printf(" \tSpecs can be mixed, i.e. size+=:100k will\n");
1743 printf(" \tonly include files 100KiB or more in size.\n\n");
1744 printf("nostr:text_string \tExclude all paths containing the string\n");
1745 printf("onlystr:text_string \tOnly allow paths containing the string\n");
1746 printf(" \tHINT: you can use these for directories:\n");
1747 printf(" \t-X nostr:/dir_x/ or -X onlystr:/dir_x/\n");
1748 printf("newer:datetime \tOnly include files newer than specified date\n");
1749 printf("older:datetime \tOnly include files older than specified date\n");
1750 printf(" \tDate/time format: \"YYYY-MM-DD HH:MM:SS\"\n");
1751 printf(" \tTime is optional (remember to escape spaces!)\n");
1752 /* printf("\t\n"); */
1753
1754 printf("\nSome filters take no value or multiple values. Filters that can take\n");
1755 printf( "a numeric option generally support the size multipliers K/M/G/T/P/E\n");
1756 printf( "with or without an added iB or B. Multipliers are binary-style unless\n");
1757 printf( "the -B suffix is used, which will use decimal multipliers. For example,\n");
1758 printf( "16k or 16kib = 16384; 16kb = 16000. Multipliers are case-insensitive.\n\n");
1759
1760 printf( "Filters have cumulative effects: jdupes -X size+:99 -X size-:101 will\n");
1761 printf( "cause only files of exactly 100 bytes in size to be included.\n\n");
1762
1763 printf( "Extension matching is case-insensitive.\n");
1764 printf( "Path substring matching is case-sensitive.\n");
1765 }
1766
1767
1768 #ifdef UNICODE
1769 int wmain(int argc, wchar_t **wargv)
1770 #else
1771 int main(int argc, char **argv)
1772 #endif
1773 {
1774 static file_t *files = NULL;
1775 static file_t *curfile;
1776 static char **oldargv;
1777 static int firstrecurse;
1778 static int opt;
1779 static int pm = 1;
1780 static int partialonly_spec = 0;
1781 static ordertype_t ordertype = ORDER_NAME;
1782 static long manual_chunk_size = 0;
1783 #ifdef __linux__
1784 static struct proc_cacheinfo pci;
1785 #endif
1786 #ifdef ENABLE_DEDUPE
1787 static struct utsname utsname;
1788 #endif
1789
1790 #ifndef OMIT_GETOPT_LONG
1791 static const struct option long_options[] =
1792 {
1793 { "loud", 0, 0, '@' },
1794 { "printnull", 0, 0, '0' },
1795 { "one-file-system", 0, 0, '1' },
1796 { "nohidden", 0, 0, 'A' },
1797 { "dedupe", 0, 0, 'B' },
1798 { "chunksize", 1, 0, 'C' },
1799 { "debug", 0, 0, 'D' },
1800 { "delete", 0, 0, 'd' },
1801 { "omitfirst", 0, 0, 'f' },
1802 { "hardlinks", 0, 0, 'H' },
1803 { "help", 0, 0, 'h' },
1804 { "isolate", 0, 0, 'I' },
1805 { "reverse", 0, 0, 'i' },
1806 { "json", 0, 0, 'j' },
1807 { "skiphash", 0, 0, 'K' },
1808 { "linkhard", 0, 0, 'L' },
1809 { "linksoft", 0, 0, 'l' },
1810 { "printwithsummary", 0, 0, 'M'},
1811 { "summarize", 0, 0, 'm'},
1812 { "noprompt", 0, 0, 'N' },
1813 { "noempty", 0, 0, 'n' },
1814 { "paramorder", 0, 0, 'O' },
1815 { "order", 1, 0, 'o' },
1816 { "print", 1, 0, 'P' },
1817 { "permissions", 0, 0, 'p' },
1818 { "quick", 0, 0, 'Q' },
1819 { "quiet", 0, 0, 'q' },
1820 { "recurse:", 0, 0, 'R' },
1821 { "recurse", 0, 0, 'r' },
1822 { "size", 0, 0, 'S' },
1823 { "symlinks", 0, 0, 's' },
1824 { "partial-only", 0, 0, 'T' },
1825 { "nochangecheck", 0, 0, 't' },
1826 { "notravcheck", 0, 0, 'U' },
1827 { "printunique", 0, 0, 'u' },
1828 { "version", 0, 0, 'v' },
1829 { "extfilter", 1, 0, 'X' },
1830 { "softabort", 0, 0, 'Z' },
1831 { "zeromatch", 0, 0, 'z' },
1832 { NULL, 0, 0, 0 }
1833 };
1834 #define GETOPT getopt_long
1835 #else
1836 #define GETOPT getopt
1837 #endif
1838
1839 #define GETOPT_STRING "@01ABC:DdfHhIijKLlMmNnOo:P:pQqRrSsTtUuVvX:Zz"
1840
1841 /* Windows buffers our stderr output; don't let it do that */
1842 #ifdef ON_WINDOWS
1843 if (setvbuf(stderr, NULL, _IONBF, 0) != 0)
1844 fprintf(stderr, "warning: setvbuf() failed\n");
1845 #endif
1846
1847 #ifdef UNICODE
1848 /* Create a UTF-8 **argv from the wide version */
1849 static char **argv;
1850 argv = (char **)string_malloc(sizeof(char *) * (size_t)argc);
1851 if (!argv) oom("main() unicode argv");
1852 widearg_to_argv(argc, wargv, argv);
1853 /* fix up __argv so getopt etc. don't crash */
1854 __argv = argv;
1855 /* Only use UTF-16 for terminal output, else use UTF-8 */
1856 if (!_isatty(_fileno(stdout))) out_mode = _O_BINARY;
1857 else out_mode = _O_U16TEXT;
1858 if (!_isatty(_fileno(stderr))) err_mode = _O_BINARY;
1859 else err_mode = _O_U16TEXT;
1860 #endif /* UNICODE */
1861
1862 #ifdef __linux__
1863 /* Auto-tune chunk size to be half of L1 data cache if possible */
1864 get_proc_cacheinfo(&pci);
1865 if (pci.l1 != 0) auto_chunk_size = (pci.l1 / 2);
1866 else if (pci.l1d != 0) auto_chunk_size = (pci.l1d / 2);
1867 /* Must be at least 4096 (4 KiB) and cannot exceed CHUNK_SIZE */
1868 if (auto_chunk_size < MIN_CHUNK_SIZE || auto_chunk_size > MAX_CHUNK_SIZE) auto_chunk_size = CHUNK_SIZE;
1869 /* Force to a multiple of 4096 if it isn't already */
1870 if ((auto_chunk_size & 0x00000fffUL) != 0)
1871 auto_chunk_size = (auto_chunk_size + 0x00000fffUL) & 0x000ff000;
1872 #endif /* __linux__ */
1873
1874 /* Is stderr a terminal? If not, we won't write progress to it */
1875 #ifdef ON_WINDOWS
1876 if (!_isatty(_fileno(stderr))) SETFLAG(flags, F_HIDEPROGRESS);
1877 #else
1878 if (!isatty(fileno(stderr))) SETFLAG(flags, F_HIDEPROGRESS);
1879 #endif
1880
1881 program_name = argv[0];
1882 oldargv = cloneargs(argc, argv);
1883 /* Clean up string_malloc on any exit */
1884 atexit(clean_exit);
1885
1886 while ((opt = GETOPT(argc, argv, GETOPT_STRING
1887 #ifndef OMIT_GETOPT_LONG
1888 , long_options, NULL
1889 #endif
1890 )) != EOF) {
1891 if ((uintptr_t)optarg == 0x20) goto error_optarg;
1892 switch (opt) {
1893 case '0':
1894 SETFLAG(a_flags, FA_PRINTNULL);
1895 LOUD(fprintf(stderr, "opt: print null instead of newline (--printnull)\n");)
1896 break;
1897 case '1':
1898 SETFLAG(flags, F_ONEFS);
1899 LOUD(fprintf(stderr, "opt: recursion across filesystems disabled (--onefs)\n");)
1900 break;
1901 case 'A':
1902 SETFLAG(flags, F_EXCLUDEHIDDEN);
1903 break;
1904 case 'C':
1905 manual_chunk_size = strtol(optarg, NULL, 10) & 0x0ffff000L; /* Align to 4K sizes */
1906 if (manual_chunk_size < MIN_CHUNK_SIZE || manual_chunk_size > MAX_CHUNK_SIZE) {
1907 fprintf(stderr, "warning: invalid manual chunk size (must be %d-%d); using defaults\n", MIN_CHUNK_SIZE, MAX_CHUNK_SIZE);
1908 LOUD(fprintf(stderr, "Manual chunk size (failed) was apparently '%s' => %ld\n", optarg, manual_chunk_size));
1909 manual_chunk_size = 0;
1910 } else auto_chunk_size = (size_t)manual_chunk_size;
1911 LOUD(fprintf(stderr, "Manual chunk size is %ld\n", manual_chunk_size));
1912 break;
1913 case 'd':
1914 SETFLAG(a_flags, FA_DELETEFILES);
1915 LOUD(fprintf(stderr, "opt: delete files after matching (--deletefiles)\n");)
1916 break;
1917 case 'D':
1918 #ifdef DEBUG
1919 SETFLAG(flags, F_DEBUG);
1920 #endif
1921 break;
1922 case 'f':
1923 SETFLAG(a_flags, FA_OMITFIRST);
1924 LOUD(fprintf(stderr, "opt: omit first match from each match set (--omitfirst)\n");)
1925 break;
1926 case 'h':
1927 help_text();
1928 string_malloc_destroy();
1929 exit(EXIT_FAILURE);
1930 #ifndef NO_HARDLINKS
1931 case 'H':
1932 SETFLAG(flags, F_CONSIDERHARDLINKS);
1933 LOUD(fprintf(stderr, "opt: hard links count as matches (--hardlinks)\n");)
1934 break;
1935 case 'L':
1936 SETFLAG(a_flags, FA_HARDLINKFILES);
1937 LOUD(fprintf(stderr, "opt: convert duplicates to hard links (--linkhard)\n");)
1938 break;
1939 #endif
1940 case 'i':
1941 SETFLAG(flags, F_REVERSESORT);
1942 LOUD(fprintf(stderr, "opt: sort order reversal enabled (--reverse)\n");)
1943 break;
1944 #ifndef NO_USER_ORDER
1945 case 'I':
1946 SETFLAG(flags, F_ISOLATE);
1947 LOUD(fprintf(stderr, "opt: intra-parameter match isolation enabled (--isolate)\n");)
1948 break;
1949 case 'O':
1950 SETFLAG(flags, F_USEPARAMORDER);
1951 LOUD(fprintf(stderr, "opt: parameter order takes precedence (--paramorder)\n");)
1952 break;
1953 #else
1954 case 'I':
1955 case 'O':
1956 fprintf(stderr, "warning: -I and -O are disabled and ignored in this build\n");
1957 break;
1958 #endif
1959 case 'j':
1960 SETFLAG(a_flags, FA_PRINTJSON);
1961 LOUD(fprintf(stderr, "opt: print output in JSON format (--printjson)\n");)
1962 break;
1963 case 'K':
1964 SETFLAG(flags, F_SKIPHASH);
1965 break;
1966 case 'm':
1967 SETFLAG(a_flags, FA_SUMMARIZEMATCHES);
1968 LOUD(fprintf(stderr, "opt: print a summary of match stats (--summarize)\n");)
1969 break;
1970 case 'M':
1971 SETFLAG(a_flags, FA_SUMMARIZEMATCHES);
1972 SETFLAG(a_flags, FA_PRINTMATCHES);
1973 LOUD(fprintf(stderr, "opt: print matches with a summary (--printwithsummary)\n");)
1974 break;
1975 case 'n':
1976 //fprintf(stderr, "note: -n/--noempty is the default behavior now and is deprecated.\n");
1977 break;
1978 case 'N':
1979 SETFLAG(flags, F_NOPROMPT);
1980 LOUD(fprintf(stderr, "opt: delete files without prompting (--noprompt)\n");)
1981 break;
1982 case 'p':
1983 SETFLAG(flags, F_PERMISSIONS);
1984 LOUD(fprintf(stderr, "opt: permissions must also match (--permissions)\n");)
1985 break;
1986 case 'P':
1987 LOUD(fprintf(stderr, "opt: print early: '%s' (--print)\n", optarg);)
1988 if (strcmp(optarg, "partial") == 0) SETFLAG(p_flags, PF_PARTIAL);
1989 else if (strcmp(optarg, "early") == 0) SETFLAG(p_flags, PF_EARLYMATCH);
1990 else if (strcmp(optarg, "fullhash") == 0) SETFLAG(p_flags, PF_FULLHASH);
1991 else {
1992 fprintf(stderr, "Option '%s' is not valid for -P\n", optarg);
1993 exit(EXIT_FAILURE);
1994 }
1995 break;
1996 case 'q':
1997 SETFLAG(flags, F_HIDEPROGRESS);
1998 break;
1999 case 'Q':
2000 SETFLAG(flags, F_QUICKCOMPARE);
2001 fprintf(stderr, "\nBIG FAT WARNING: -Q/--quick MAY BE DANGEROUS! Read the manual!\n\n");
2002 LOUD(fprintf(stderr, "opt: byte-for-byte safety check disabled (--quick)\n");)
2003 break;
2004 case 'r':
2005 SETFLAG(flags, F_RECURSE);
2006 LOUD(fprintf(stderr, "opt: global recursion enabled (--recurse)\n");)
2007 break;
2008 case 'R':
2009 SETFLAG(flags, F_RECURSEAFTER);
2010 LOUD(fprintf(stderr, "opt: partial recursion enabled (--recurseafter)\n");)
2011 break;
2012 case 't':
2013 SETFLAG(flags, F_NOCHANGECHECK);
2014 LOUD(fprintf(stderr, "opt: TOCTTOU safety check disabled (--nochangecheck)\n");)
2015 break;
2016 case 'T':
2017 if (partialonly_spec == 0)
2018 partialonly_spec = 1;
2019 else {
2020 partialonly_spec = 2;
2021 fprintf(stderr, "\nBIG FAT WARNING: -T/--partialonly is EXTREMELY DANGEROUS! Read the manual!\n\n");
2022 SETFLAG(flags, F_PARTIALONLY);
2023 }
2024 break;
2025 case 'u':
2026 SETFLAG(a_flags, FA_PRINTUNIQUE);
2027 LOUD(fprintf(stderr, "opt: print only non-matched (unique) files (--printunique)\n");)
2028 break;
2029 case 'U':
2030 SETFLAG(flags, F_NOTRAVCHECK);
2031 LOUD(fprintf(stderr, "opt: double-traversal safety check disabled (--notravcheck)\n");)
2032 break;
2033 #ifndef NO_SYMLINKS
2034 case 'l':
2035 SETFLAG(a_flags, FA_MAKESYMLINKS);
2036 LOUD(fprintf(stderr, "opt: convert duplicates to symbolic links (--linksoft)\n");)
2037 break;
2038 case 's':
2039 SETFLAG(flags, F_FOLLOWLINKS);
2040 LOUD(fprintf(stderr, "opt: follow symbolic links enabled (--symlinks)\n");)
2041 break;
2042 #endif
2043 case 'S':
2044 SETFLAG(a_flags, FA_SHOWSIZE);
2045 LOUD(fprintf(stderr, "opt: show size of files enabled (--size)\n");)
2046 break;
2047 case 'X':
2048 add_extfilter(optarg);
2049 break;
2050 case 'z':
2051 SETFLAG(flags, F_INCLUDEEMPTY);
2052 LOUD(fprintf(stderr, "opt: zero-length files count as matches (--zeromatch)\n");)
2053 break;
2054 case 'Z':
2055 SETFLAG(flags, F_SOFTABORT);
2056 LOUD(fprintf(stderr, "opt: soft-abort mode enabled (--softabort)\n");)
2057 break;
2058 case '@':
2059 #ifdef LOUD_DEBUG
2060 SETFLAG(flags, F_DEBUG | F_LOUD | F_HIDEPROGRESS);
2061 #endif
2062 LOUD(fprintf(stderr, "opt: loud debugging enabled, hope you can handle it (--loud)\n");)
2063 break;
2064 case 'v':
2065 case 'V':
2066 printf("jdupes %s (%s) ", VER, VERDATE);
2067
2068 /* Indicate bitness information */
2069 if (sizeof(uintptr_t) == 8) {
2070 if (sizeof(long) == 4) printf("64-bit i32\n");
2071 else if (sizeof(long) == 8) printf("64-bit\n");
2072 } else if (sizeof(uintptr_t) == 4) {
2073 if (sizeof(long) == 4) printf("32-bit\n");
2074 else if (sizeof(long) == 8) printf("32-bit i64\n");
2075 } else printf("%u-bit i%u\n", (unsigned int)(sizeof(uintptr_t) * 8),
2076 (unsigned int)(sizeof(long) * 8));
2077
2078 printf("Compile-time extensions:");
2079 if (*extensions != NULL) {
2080 int c = 0;
2081 while (extensions[c] != NULL) {
2082 printf(" %s", extensions[c]);
2083 c++;
2084 }
2085 } else printf(" none");
2086 printf("\nCopyright (C) 2015-2020 by Jody Bruchon and contributors\n");
2087 printf("Forked from fdupes 1.51, (C) 1999-2014 Adrian Lopez and contributors\n\n");
2088 printf("Permission is hereby granted, free of charge, to any person obtaining a copy of\n");
2089 printf("this software and associated documentation files (the \"Software\"), to deal in\n");
2090 printf("the Software without restriction, including without limitation the rights to\n");
2091 printf("use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies\n");
2092 printf("of the Software, and to permit persons to whom the Software is furnished to do\n");
2093 printf("so, subject to the following conditions:\n\n");
2094
2095 printf("The above copyright notice and this permission notice shall be included in all\n");
2096 printf("copies or substantial portions of the Software.\n\n");
2097 printf("THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n");
2098 printf("IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n");
2099 printf("FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n");
2100 printf("AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n");
2101 printf("LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n");
2102 printf("OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n");
2103 printf("SOFTWARE.\n");
2104 printf("\nIf you find this software useful, please consider financially supporting\n");
2105 printf("its continued development by donating to the author's SubscribeStar:\n");
2106 printf(" https://SubscribeStar.com/JodyBruchon\n");
2107 printf("\nNew releases, bug fixes, and more at the jdupes GitHub project page:\n");
2108 printf(" https://github.com/jbruchon/jdupes\n");
2109 exit(EXIT_SUCCESS);
2110 case 'o':
2111 if (!strncasecmp("name", optarg, 5)) {
2112 ordertype = ORDER_NAME;
2113 } else if (!strncasecmp("time", optarg, 5)) {
2114 ordertype = ORDER_TIME;
2115 } else {
2116 fprintf(stderr, "invalid value for --order: '%s'\n", optarg);
2117 exit(EXIT_FAILURE);
2118 }
2119 break;
2120 case 'B':
2121 #ifdef ENABLE_DEDUPE
2122 /* Refuse to dedupe on 2.x kernels; they could damage user data */
2123 if (uname(&utsname)) {
2124 fprintf(stderr, "Failed to get kernel version! Aborting.\n");
2125 exit(EXIT_FAILURE);
2126 }
2127 LOUD(fprintf(stderr, "dedupefiles: uname got release '%s'\n", utsname.release));
2128 if (*(utsname.release) == '2' && *(utsname.release + 1) == '.') {
2129 fprintf(stderr, "Refusing to dedupe on a 2.x kernel; data loss could occur. Aborting.\n");
2130 exit(EXIT_FAILURE);
2131 }
2132 SETFLAG(a_flags, FA_DEDUPEFILES);
2133 /* btrfs will do the byte-for-byte check itself */
2134 SETFLAG(flags, F_QUICKCOMPARE);
2135 /* It is completely useless to dedupe zero-length extents */
2136 CLEARFLAG(flags, F_INCLUDEEMPTY);
2137 #else
2138 fprintf(stderr, "This program was built without dedupe support\n");
2139 exit(EXIT_FAILURE);
2140 #endif
2141 LOUD(fprintf(stderr, "opt: CoW/block-level deduplication enabled (--dedupe)\n");)
2142 break;
2143
2144 default:
2145 if (opt != '?') fprintf(stderr, "Sorry, using '-%c' is not supported in this build.\n", opt);
2146 fprintf(stderr, "Try `jdupes --help' for more information.\n");
2147 string_malloc_destroy();
2148 exit(EXIT_FAILURE);
2149 }
2150 }
2151
2152 if (optind >= argc) {
2153 fprintf(stderr, "no files or directories specified (use -h option for help)\n");
2154 string_malloc_destroy();
2155 exit(EXIT_FAILURE);
2156 }
2157
2158 if (partialonly_spec == 1) {
2159 fprintf(stderr, "--partial-only specified only once (it's VERY DANGEROUS, read the manual!)\n");
2160 string_malloc_destroy();
2161 exit(EXIT_FAILURE);
2162 }
2163
2164 if (ISFLAG(flags, F_PARTIALONLY) && ISFLAG(flags, F_QUICKCOMPARE)) {
2165 fprintf(stderr, "--partial-only overrides --quick and is even more dangerous (read the manual!)\n");
2166 string_malloc_destroy();
2167 exit(EXIT_FAILURE);
2168 }
2169
2170 if (ISFLAG(flags, F_RECURSE) && ISFLAG(flags, F_RECURSEAFTER)) {
2171 fprintf(stderr, "options --recurse and --recurse: are not compatible\n");
2172 string_malloc_destroy();
2173 exit(EXIT_FAILURE);
2174 }
2175
2176 if (ISFLAG(a_flags, FA_SUMMARIZEMATCHES) && ISFLAG(a_flags, FA_DELETEFILES)) {
2177 fprintf(stderr, "options --summarize and --delete are not compatible\n");
2178 string_malloc_destroy();
2179 exit(EXIT_FAILURE);
2180 }
2181
2182 #ifdef ENABLE_DEDUPE
2183 if (ISFLAG(flags, F_CONSIDERHARDLINKS) && ISFLAG(a_flags, FA_DEDUPEFILES))
2184 fprintf(stderr, "warning: option --dedupe overrides the behavior of --hardlinks\n");
2185 #endif
2186
2187 /* If pm == 0, call printmatches() */
2188 pm = !!ISFLAG(a_flags, FA_SUMMARIZEMATCHES) +
2189 !!ISFLAG(a_flags, FA_DELETEFILES) +
2190 !!ISFLAG(a_flags, FA_HARDLINKFILES) +
2191 !!ISFLAG(a_flags, FA_MAKESYMLINKS) +
2192 !!ISFLAG(a_flags, FA_PRINTJSON) +
2193 !!ISFLAG(a_flags, FA_PRINTUNIQUE) +
2194 !!ISFLAG(a_flags, FA_DEDUPEFILES);
2195
2196 if (pm > 1) {
2197 fprintf(stderr, "Only one of --summarize, --printwithsummary, --delete, --linkhard,\n--linksoft, --json, or --dedupe may be used\n");
2198 string_malloc_destroy();
2199 exit(EXIT_FAILURE);
2200 }
2201 if (pm == 0) SETFLAG(a_flags, FA_PRINTMATCHES);
2202
2203 #ifndef ON_WINDOWS
2204 /* Catch SIGUSR1 and use it to enable -Z */
2205 signal(SIGUSR1, sigusr1);
2206 #endif
2207
2208 if (ISFLAG(flags, F_RECURSEAFTER)) {
2209 firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv);
2210
2211 if (firstrecurse == argc)
2212 firstrecurse = nonoptafter("-R", argc, oldargv, argv);
2213
2214 if (firstrecurse == argc) {
2215 fprintf(stderr, "-R option must be isolated from other options\n");
2216 string_malloc_destroy();
2217 exit(EXIT_FAILURE);
2218 }
2219
2220 /* F_RECURSE is not set for directories before --recurse: */
2221 for (int x = optind; x < firstrecurse; x++) {
2222 slash_convert(argv[x]);
2223 grokdir(argv[x], &files, 0);
2224 user_item_count++;
2225 }
2226
2227 /* Set F_RECURSE for directories after --recurse: */
2228 SETFLAG(flags, F_RECURSE);
2229
2230 for (int x = firstrecurse; x < argc; x++) {
2231 slash_convert(argv[x]);
2232 grokdir(argv[x], &files, 1);
2233 user_item_count++;
2234 }
2235 } else {
2236 for (int x = optind; x < argc; x++) {
2237 slash_convert(argv[x]);
2238 grokdir(argv[x], &files, ISFLAG(flags, F_RECURSE));
2239 user_item_count++;
2240 }
2241 }
2242
2243 /* We don't need the double traversal check tree anymore */
2244 travdone_free(travdone_head);
2245
2246 if (ISFLAG(flags, F_REVERSESORT)) sort_direction = -1;
2247 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\n");
2248 if (!files) {
2249 fwprint(stderr, "No duplicates found.", 1);
2250 string_malloc_destroy();
2251 exit(EXIT_SUCCESS);
2252 }
2253
2254 curfile = files;
2255 progress = 0;
2256
2257 /* Catch CTRL-C */
2258 signal(SIGINT, sighandler);
2259
2260 while (curfile) {
2261 static file_t **match = NULL;
2262 static FILE *file1;
2263 static FILE *file2;
2264
2265 if (interrupt) {
2266 fprintf(stderr, "\nStopping file scan due to user abort\n");
2267 if (!ISFLAG(flags, F_SOFTABORT)) exit(EXIT_FAILURE);
2268 interrupt = 0; /* reset interrupt for re-use */
2269 goto skip_file_scan;
2270 }
2271
2272 LOUD(fprintf(stderr, "\nMAIN: current file: %s\n", curfile->d_name));
2273
2274 if (!checktree) registerfile(&checktree, NONE, curfile);
2275 else match = checkmatch(checktree, curfile);
2276
2277 /* Byte-for-byte check that a matched pair are actually matched */
2278 if (match != NULL) {
2279 /* Quick or partial-only compare will never run confirmmatch()
2280 * Also skip match confirmation for hard-linked files
2281 * (This set of comparisons is ugly, but quite efficient) */
2282 if (ISFLAG(flags, F_QUICKCOMPARE) || ISFLAG(flags, F_PARTIALONLY) ||
2283 (ISFLAG(flags, F_CONSIDERHARDLINKS) &&
2284 (curfile->inode == (*match)->inode) &&
2285 (curfile->device == (*match)->device))
2286 ) {
2287 LOUD(fprintf(stderr, "MAIN: notice: hard linked, quick, or partial-only match (-H/-Q/-T)\n"));
2288 registerpair(match, curfile,
2289 (ordertype == ORDER_TIME) ? sort_pairs_by_mtime : sort_pairs_by_filename);
2290 dupecount++;
2291 goto skip_full_check;
2292 }
2293
2294 #ifdef UNICODE
2295 if (!M2W(curfile->d_name, wstr)) file1 = NULL;
2296 else file1 = _wfopen(wstr, FILE_MODE_RO);
2297 #else
2298 file1 = fopen(curfile->d_name, FILE_MODE_RO);
2299 #endif
2300 if (!file1) {
2301 LOUD(fprintf(stderr, "MAIN: warning: file1 fopen() failed ('%s')\n", curfile->d_name));
2302 curfile = curfile->next;
2303 continue;
2304 }
2305
2306 #ifdef UNICODE
2307 if (!M2W((*match)->d_name, wstr)) file2 = NULL;
2308 else file2 = _wfopen(wstr, FILE_MODE_RO);
2309 #else
2310 file2 = fopen((*match)->d_name, FILE_MODE_RO);
2311 #endif
2312 if (!file2) {
2313 fclose(file1);
2314 LOUD(fprintf(stderr, "MAIN: warning: file2 fopen() failed ('%s')\n", (*match)->d_name));
2315 curfile = curfile->next;
2316 continue;
2317 }
2318
2319 if (confirmmatch(file1, file2, curfile->size)) {
2320 LOUD(fprintf(stderr, "MAIN: registering matched file pair\n"));
2321 registerpair(match, curfile,
2322 (ordertype == ORDER_TIME) ? sort_pairs_by_mtime : sort_pairs_by_filename);
2323 dupecount++;
2324 } DBG(else hash_fail++;)
2325
2326 fclose(file1);
2327 fclose(file2);
2328 }
2329
2330 skip_full_check:
2331 curfile = curfile->next;
2332
2333 if (!ISFLAG(flags, F_HIDEPROGRESS)) update_progress(NULL, -1);
2334 progress++;
2335 }
2336
2337 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%60s\r", " ");
2338
2339 skip_file_scan:
2340 /* Stop catching CTRL+C */
2341 signal(SIGINT, SIG_DFL);
2342 if (ISFLAG(a_flags, FA_DELETEFILES)) {
2343 if (ISFLAG(flags, F_NOPROMPT)) deletefiles(files, 0, 0);
2344 else deletefiles(files, 1, stdin);
2345 }
2346 #ifndef NO_SYMLINKS
2347 if (ISFLAG(a_flags, FA_MAKESYMLINKS)) linkfiles(files, 0);
2348 #endif
2349 #ifndef NO_HARDLINKS
2350 if (ISFLAG(a_flags, FA_HARDLINKFILES)) linkfiles(files, 1);
2351 #endif /* NO_HARDLINKS */
2352 #ifdef ENABLE_DEDUPE
2353 if (ISFLAG(a_flags, FA_DEDUPEFILES)) dedupefiles(files);
2354 #endif /* ENABLE_DEDUPE */
2355 if (ISFLAG(a_flags, FA_PRINTMATCHES)) printmatches(files);
2356 if (ISFLAG(a_flags, FA_PRINTUNIQUE)) printunique(files);
2357 if (ISFLAG(a_flags, FA_PRINTJSON)) printjson(files, argc, argv);
2358 if (ISFLAG(a_flags, FA_SUMMARIZEMATCHES)) {
2359 if (ISFLAG(a_flags, FA_PRINTMATCHES)) printf("\n\n");
2360 summarizematches(files);
2361 }
2362
2363 string_malloc_destroy();
2364
2365 #ifdef DEBUG
2366 if (ISFLAG(flags, F_DEBUG)) {
2367 fprintf(stderr, "\n%d partial (+%d small) -> %d full hash -> %d full (%d partial elim) (%d hash%u fail)\n",
2368 partial_hash, small_file, full_hash, partial_to_full,
2369 partial_elim, hash_fail, (unsigned int)sizeof(jdupes_hash_t)*8);
2370 fprintf(stderr, "%" PRIuMAX " total files, %" PRIuMAX " comparisons, branch L %u, R %u, both %u, max tree depth %u\n",
2371 filecount, comparisons, left_branch, right_branch,
2372 left_branch + right_branch, max_depth);
2373 fprintf(stderr, "SMA: allocs %" PRIuMAX ", free %" PRIuMAX " (merge %" PRIuMAX ", repl %" PRIuMAX "), fail %" PRIuMAX ", reuse %" PRIuMAX ", scan %" PRIuMAX ", tails %" PRIuMAX "\n",
2374 sma_allocs, sma_free_good, sma_free_merged, sma_free_replaced,
2375 sma_free_ignored, sma_free_reclaimed,
2376 sma_free_scanned, sma_free_tails);
2377 if (manual_chunk_size > 0) fprintf(stderr, "I/O chunk size: %ld KiB (manually set)\n", manual_chunk_size >> 10);
2378 else {
2379 #ifdef __linux__
2380 fprintf(stderr, "I/O chunk size: %" PRIuMAX " KiB (%s)\n", (uintmax_t)(auto_chunk_size >> 10), (pci.l1 + pci.l1d) != 0 ? "dynamically sized" : "default size");
2381 #else
2382 fprintf(stderr, "I/O chunk size: %" PRIuMAX " KiB (default size)\n", (uintmax_t)(auto_chunk_size >> 10));
2383 #endif /* __linux__ */
2384 }
2385 #ifdef ON_WINDOWS
2386 #ifndef NO_HARDLINKS
2387 if (ISFLAG(a_flags, FA_HARDLINKFILES))
2388 fprintf(stderr, "Exclusions based on Windows hard link limit: %u\n", hll_exclude);
2389 #endif
2390 #endif
2391 }
2392 #endif /* DEBUG */
2393
2394 exit(EXIT_SUCCESS);
2395
2396 error_optarg:
2397 fprintf(stderr, "error: option '%c' requires an argument\n", opt);
2398 exit(EXIT_FAILURE);
2399 }
2400