1 use crossbeam_channel::Receiver;
2 use humansize::{file_size_opts as options, FileSize};
3 use std::collections::BTreeMap;
4 #[cfg(target_family = "unix")]
5 use std::collections::HashSet;
6 use std::fs::{File, Metadata, OpenOptions};
7 use std::io::prelude::*;
8 use std::io::{self, Error, ErrorKind};
9 #[cfg(target_family = "unix")]
10 use std::os::unix::fs::MetadataExt;
11 use std::path::{Path, PathBuf};
12 use std::time::{Duration, SystemTime, UNIX_EPOCH};
13 use std::{fs, mem, thread};
14 
15 use crate::common::Common;
16 use crate::common_directory::Directories;
17 use crate::common_extensions::Extensions;
18 use crate::common_items::ExcludedItems;
19 use crate::common_messages::Messages;
20 use crate::common_traits::*;
21 use directories_next::ProjectDirs;
22 use rayon::prelude::*;
23 use std::hash::Hasher;
24 use std::io::{BufReader, BufWriter};
25 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
26 use std::sync::Arc;
27 use std::thread::sleep;
28 
29 const HASH_MB_LIMIT_BYTES: u64 = 1024 * 1024; // 1MB
30 
31 const CACHE_FILE_NAME: &str = "cache_duplicates.txt";
32 
33 #[derive(Debug)]
34 pub struct ProgressData {
35     pub checking_method: CheckingMethod,
36     pub current_stage: u8,
37     pub max_stage: u8,
38     pub files_checked: usize,
39     pub files_to_check: usize,
40 }
41 
42 #[derive(PartialEq, Eq, Clone, Debug)]
43 pub enum CheckingMethod {
44     None,
45     Name,
46     Size,
47     Hash,
48     HashMb,
49 }
50 
51 impl MyHasher for blake3::Hasher {
update(&mut self, bytes: &[u8])52     fn update(&mut self, bytes: &[u8]) {
53         self.update(bytes);
54     }
finalize(&self) -> String55     fn finalize(&self) -> String {
56         self.finalize().to_hex().to_string()
57     }
58 }
59 
60 impl MyHasher for crc32fast::Hasher {
update(&mut self, bytes: &[u8])61     fn update(&mut self, bytes: &[u8]) {
62         self.write(bytes);
63     }
finalize(&self) -> String64     fn finalize(&self) -> String {
65         self.finish().to_string()
66     }
67 }
68 
69 impl MyHasher for xxhash_rust::xxh3::Xxh3 {
update(&mut self, bytes: &[u8])70     fn update(&mut self, bytes: &[u8]) {
71         self.write(bytes);
72     }
finalize(&self) -> String73     fn finalize(&self) -> String {
74         self.finish().to_string()
75     }
76 }
77 
78 #[derive(PartialEq, Eq, Clone, Debug, Copy)]
79 pub enum HashType {
80     Blake3,
81     Crc32,
82     Xxh3,
83 }
84 
85 impl HashType {
hasher(self: &HashType) -> Box<dyn MyHasher>86     fn hasher(self: &HashType) -> Box<dyn MyHasher> {
87         match self {
88             HashType::Blake3 => Box::new(blake3::Hasher::new()),
89             HashType::Crc32 => Box::new(crc32fast::Hasher::new()),
90             HashType::Xxh3 => Box::new(xxhash_rust::xxh3::Xxh3::new()),
91         }
92     }
93 }
94 
95 #[derive(Eq, PartialEq, Clone, Debug)]
96 pub enum DeleteMethod {
97     None,
98     AllExceptNewest,
99     AllExceptOldest,
100     OneOldest,
101     OneNewest,
102     HardLink,
103 }
104 
105 #[derive(Clone, Debug, PartialEq, Default)]
106 pub struct FileEntry {
107     pub path: PathBuf,
108     pub size: u64,
109     pub modified_date: u64,
110     pub hash: String,
111 }
112 
113 /// Info struck with helpful information's about results
114 #[derive(Default)]
115 pub struct Info {
116     pub number_of_groups_by_size: usize,
117     pub number_of_duplicated_files_by_size: usize,
118     pub number_of_groups_by_hash: usize,
119     pub number_of_duplicated_files_by_hash: usize,
120     pub number_of_groups_by_name: usize,
121     pub number_of_duplicated_files_by_name: usize,
122     pub lost_space_by_size: u64,
123     pub lost_space_by_hash: u64,
124     pub bytes_read_when_hashing: u64,
125     pub number_of_removed_files: usize,
126     pub number_of_failed_to_remove_files: usize,
127     pub gained_space: u64,
128 }
129 
130 impl Info {
new() -> Self131     pub fn new() -> Self {
132         Default::default()
133     }
134 }
135 
136 /// Struct with required information's to work
137 pub struct DuplicateFinder {
138     text_messages: Messages,
139     information: Info,
140     files_with_identical_names: BTreeMap<String, Vec<FileEntry>>,    // File Size, File Entry
141     files_with_identical_size: BTreeMap<u64, Vec<FileEntry>>,        // File Size, File Entry
142     files_with_identical_hashes: BTreeMap<u64, Vec<Vec<FileEntry>>>, // File Size, File Entry
143     directories: Directories,
144     allowed_extensions: Extensions,
145     excluded_items: ExcludedItems,
146     recursive_search: bool,
147     minimal_file_size: u64,
148     maximal_file_size: u64,
149     check_method: CheckingMethod,
150     delete_method: DeleteMethod,
151     hash_type: HashType,
152     ignore_hard_links: bool,
153     dryrun: bool,
154     stopped_search: bool,
155     use_cache: bool,
156     minimal_cache_file_size: u64,
157 }
158 
159 impl DuplicateFinder {
new() -> Self160     pub fn new() -> Self {
161         Self {
162             text_messages: Messages::new(),
163             information: Info::new(),
164             files_with_identical_names: Default::default(),
165             files_with_identical_size: Default::default(),
166             files_with_identical_hashes: Default::default(),
167             recursive_search: true,
168             allowed_extensions: Extensions::new(),
169             check_method: CheckingMethod::None,
170             delete_method: DeleteMethod::None,
171             minimal_file_size: 8192,
172             maximal_file_size: u64::MAX,
173             directories: Directories::new(),
174             excluded_items: ExcludedItems::new(),
175             stopped_search: false,
176             ignore_hard_links: true,
177             hash_type: HashType::Blake3,
178             dryrun: false,
179             use_cache: true,
180             minimal_cache_file_size: 2 * 1024 * 1024, // By default cache only >= 1MB files
181         }
182     }
183 
find_duplicates(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>)184     pub fn find_duplicates(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) {
185         self.directories.optimize_directories(self.recursive_search, &mut self.text_messages);
186 
187         match self.check_method {
188             CheckingMethod::Name => {
189                 if !self.check_files_name(stop_receiver, progress_sender) {
190                     self.stopped_search = true;
191                     return;
192                 }
193             }
194             CheckingMethod::Size => {
195                 if !self.check_files_size(stop_receiver, progress_sender) {
196                     self.stopped_search = true;
197                     return;
198                 }
199             }
200             CheckingMethod::HashMb | CheckingMethod::Hash => {
201                 if !self.check_files_size(stop_receiver, progress_sender) {
202                     self.stopped_search = true;
203                     return;
204                 }
205                 if !self.check_files_hash(stop_receiver, progress_sender) {
206                     self.stopped_search = true;
207                     return;
208                 }
209             }
210             CheckingMethod::None => {
211                 panic!();
212             }
213         }
214         self.delete_files();
215         self.debug_print();
216     }
217 
get_check_method(&self) -> &CheckingMethod218     pub const fn get_check_method(&self) -> &CheckingMethod {
219         &self.check_method
220     }
221 
get_stopped_search(&self) -> bool222     pub fn get_stopped_search(&self) -> bool {
223         self.stopped_search
224     }
225 
set_minimal_cache_file_size(&mut self, minimal_cache_file_size: u64)226     pub fn set_minimal_cache_file_size(&mut self, minimal_cache_file_size: u64) {
227         self.minimal_cache_file_size = minimal_cache_file_size;
228     }
229 
get_files_sorted_by_names(&self) -> &BTreeMap<String, Vec<FileEntry>>230     pub const fn get_files_sorted_by_names(&self) -> &BTreeMap<String, Vec<FileEntry>> {
231         &self.files_with_identical_names
232     }
233 
set_use_cache(&mut self, use_cache: bool)234     pub fn set_use_cache(&mut self, use_cache: bool) {
235         self.use_cache = use_cache;
236     }
237 
get_files_sorted_by_size(&self) -> &BTreeMap<u64, Vec<FileEntry>>238     pub const fn get_files_sorted_by_size(&self) -> &BTreeMap<u64, Vec<FileEntry>> {
239         &self.files_with_identical_size
240     }
241 
get_files_sorted_by_hash(&self) -> &BTreeMap<u64, Vec<Vec<FileEntry>>>242     pub const fn get_files_sorted_by_hash(&self) -> &BTreeMap<u64, Vec<Vec<FileEntry>>> {
243         &self.files_with_identical_hashes
244     }
set_maximal_file_size(&mut self, maximal_file_size: u64)245     pub fn set_maximal_file_size(&mut self, maximal_file_size: u64) {
246         self.maximal_file_size = match maximal_file_size {
247             0 => 1,
248             t => t,
249         };
250     }
251 
get_text_messages(&self) -> &Messages252     pub const fn get_text_messages(&self) -> &Messages {
253         &self.text_messages
254     }
255 
get_information(&self) -> &Info256     pub const fn get_information(&self) -> &Info {
257         &self.information
258     }
259 
set_hash_type(&mut self, hash_type: HashType)260     pub fn set_hash_type(&mut self, hash_type: HashType) {
261         self.hash_type = hash_type;
262     }
263 
set_ignore_hard_links(&mut self, ignore_hard_links: bool)264     pub fn set_ignore_hard_links(&mut self, ignore_hard_links: bool) {
265         self.ignore_hard_links = ignore_hard_links;
266     }
267 
set_dryrun(&mut self, dryrun: bool)268     pub fn set_dryrun(&mut self, dryrun: bool) {
269         self.dryrun = dryrun;
270     }
271 
set_check_method(&mut self, check_method: CheckingMethod)272     pub fn set_check_method(&mut self, check_method: CheckingMethod) {
273         self.check_method = check_method;
274     }
275 
set_delete_method(&mut self, delete_method: DeleteMethod)276     pub fn set_delete_method(&mut self, delete_method: DeleteMethod) {
277         self.delete_method = delete_method;
278     }
279 
set_minimal_file_size(&mut self, minimal_file_size: u64)280     pub fn set_minimal_file_size(&mut self, minimal_file_size: u64) {
281         self.minimal_file_size = match minimal_file_size {
282             0 => 1,
283             t => t,
284         };
285     }
286 
set_recursive_search(&mut self, recursive_search: bool)287     pub fn set_recursive_search(&mut self, recursive_search: bool) {
288         self.recursive_search = recursive_search;
289     }
290 
set_included_directory(&mut self, included_directory: Vec<PathBuf>) -> bool291     pub fn set_included_directory(&mut self, included_directory: Vec<PathBuf>) -> bool {
292         self.directories.set_included_directory(included_directory, &mut self.text_messages)
293     }
294 
set_excluded_directory(&mut self, excluded_directory: Vec<PathBuf>)295     pub fn set_excluded_directory(&mut self, excluded_directory: Vec<PathBuf>) {
296         self.directories.set_excluded_directory(excluded_directory, &mut self.text_messages);
297     }
set_allowed_extensions(&mut self, allowed_extensions: String)298     pub fn set_allowed_extensions(&mut self, allowed_extensions: String) {
299         self.allowed_extensions.set_allowed_extensions(allowed_extensions, &mut self.text_messages);
300     }
301 
set_excluded_items(&mut self, excluded_items: Vec<String>)302     pub fn set_excluded_items(&mut self, excluded_items: Vec<String>) {
303         self.excluded_items.set_excluded_items(excluded_items, &mut self.text_messages);
304     }
305 
check_files_name(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool306     fn check_files_name(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
307         let start_time: SystemTime = SystemTime::now();
308         let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
309 
310         // Add root folders for finding
311         for id in &self.directories.included_directories {
312             folders_to_check.push(id.clone());
313         }
314 
315         //// PROGRESS THREAD START
316         const LOOP_DURATION: u32 = 200; //in ms
317         let progress_thread_run = Arc::new(AtomicBool::new(true));
318 
319         let atomic_file_counter = Arc::new(AtomicUsize::new(0));
320 
321         let progress_thread_handle;
322         if let Some(progress_sender) = progress_sender {
323             let progress_send = progress_sender.clone();
324             let progress_thread_run = progress_thread_run.clone();
325             let atomic_file_counter = atomic_file_counter.clone();
326             progress_thread_handle = thread::spawn(move || loop {
327                 progress_send
328                     .unbounded_send(ProgressData {
329                         checking_method: CheckingMethod::Name,
330                         current_stage: 0,
331                         max_stage: 0,
332                         files_checked: atomic_file_counter.load(Ordering::Relaxed) as usize,
333                         files_to_check: 0,
334                     })
335                     .unwrap();
336                 if !progress_thread_run.load(Ordering::Relaxed) {
337                     break;
338                 }
339                 sleep(Duration::from_millis(LOOP_DURATION as u64));
340             });
341         } else {
342             progress_thread_handle = thread::spawn(|| {});
343         }
344 
345         //// PROGRESS THREAD END
346 
347         while !folders_to_check.is_empty() {
348             if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
349                 // End thread which send info to gui
350                 progress_thread_run.store(false, Ordering::Relaxed);
351                 progress_thread_handle.join().unwrap();
352                 return false;
353             }
354 
355             let current_folder = folders_to_check.pop().unwrap();
356 
357             // Read current dir, if permission are denied just go to next
358             let read_dir = match fs::read_dir(&current_folder) {
359                 Ok(t) => t,
360                 Err(e) => {
361                     self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e));
362                     continue;
363                 } // Permissions denied
364             };
365 
366             // Check every sub folder/file/link etc.
367             'dir: for entry in read_dir {
368                 let entry_data = match entry {
369                     Ok(t) => t,
370                     Err(e) => {
371                         self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e));
372                         continue 'dir;
373                     } //Permissions denied
374                 };
375                 let metadata: Metadata = match entry_data.metadata() {
376                     Ok(t) => t,
377                     Err(e) => {
378                         self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e));
379                         continue 'dir;
380                     } //Permissions denied
381                 };
382                 if metadata.is_dir() {
383                     if !self.recursive_search {
384                         continue 'dir;
385                     }
386 
387                     let next_folder = current_folder.join(entry_data.file_name());
388                     if self.directories.is_excluded(&next_folder) {
389                         continue 'dir;
390                     }
391 
392                     if self.excluded_items.is_excluded(&next_folder) {
393                         continue 'dir;
394                     }
395 
396                     folders_to_check.push(next_folder);
397                 } else if metadata.is_file() {
398                     atomic_file_counter.fetch_add(1, Ordering::Relaxed);
399                     // let mut have_valid_extension: bool;
400                     let file_name_lowercase: String = match entry_data.file_name().into_string() {
401                         Ok(t) => t,
402                         Err(_inspected) => {
403                             println!("File {:?} has not valid UTF-8 name", entry_data);
404                             continue 'dir;
405                         }
406                     }
407                     .to_lowercase();
408 
409                     // Checking allowed extensions
410                     if !self.allowed_extensions.file_extensions.is_empty() {
411                         let allowed = self.allowed_extensions.file_extensions.iter().any(|e| file_name_lowercase.ends_with((".".to_string() + e.to_lowercase().as_str()).as_str()));
412                         if !allowed {
413                             // Not an allowed extension, ignore it.
414                             continue 'dir;
415                         }
416                     }
417                     // Checking files
418                     if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) {
419                         let current_file_name = current_folder.join(entry_data.file_name());
420                         if self.excluded_items.is_excluded(&current_file_name) {
421                             continue 'dir;
422                         }
423 
424                         // Creating new file entry
425                         let fe: FileEntry = FileEntry {
426                             path: current_file_name.clone(),
427                             size: metadata.len(),
428                             modified_date: match metadata.modified() {
429                                 Ok(t) => match t.duration_since(UNIX_EPOCH) {
430                                     Ok(d) => d.as_secs(),
431                                     Err(_inspected) => {
432                                         self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display()));
433                                         0
434                                     }
435                                 },
436                                 Err(e) => {
437                                     self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e));
438                                     0
439                                 } // Permissions Denied
440                             },
441                             hash: "".to_string(),
442                         };
443 
444                         // Adding files to BTreeMap
445                         self.files_with_identical_names.entry(entry_data.file_name().to_string_lossy().to_string()).or_insert_with(Vec::new);
446                         self.files_with_identical_names.get_mut(&entry_data.file_name().to_string_lossy().to_string()).unwrap().push(fe);
447                     }
448                 }
449             }
450         }
451 
452         // End thread which send info to gui
453         progress_thread_run.store(false, Ordering::Relaxed);
454         progress_thread_handle.join().unwrap();
455 
456         // Create new BTreeMap without single size entries(files have not duplicates)
457         let mut new_map: BTreeMap<String, Vec<FileEntry>> = Default::default();
458 
459         for (name, vector) in &self.files_with_identical_names {
460             if vector.len() > 1 {
461                 self.information.number_of_duplicated_files_by_name += vector.len() - 1;
462                 self.information.number_of_groups_by_name += 1;
463                 new_map.insert(name.clone(), vector.clone());
464             }
465         }
466         self.files_with_identical_names = new_map;
467 
468         Common::print_time(start_time, SystemTime::now(), "check_files_name".to_string());
469         true
470     }
471 
472     /// Read file length and puts it to different boxes(each for different lengths)
473     /// If in box is only 1 result, then it is removed
check_files_size(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool474     fn check_files_size(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
475         let start_time: SystemTime = SystemTime::now();
476         let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
477 
478         // Add root folders for finding
479         for id in &self.directories.included_directories {
480             folders_to_check.push(id.clone());
481         }
482 
483         //// PROGRESS THREAD START
484         const LOOP_DURATION: u32 = 200; //in ms
485         let progress_thread_run = Arc::new(AtomicBool::new(true));
486 
487         let atomic_file_counter = Arc::new(AtomicUsize::new(0));
488 
489         let progress_thread_handle;
490         if let Some(progress_sender) = progress_sender {
491             let progress_send = progress_sender.clone();
492             let progress_thread_run = progress_thread_run.clone();
493             let atomic_file_counter = atomic_file_counter.clone();
494             let checking_method = self.check_method.clone();
495             let max_stage = match self.check_method {
496                 CheckingMethod::Size => 0,
497                 CheckingMethod::HashMb | CheckingMethod::Hash => 2,
498                 _ => 255,
499             };
500             progress_thread_handle = thread::spawn(move || loop {
501                 progress_send
502                     .unbounded_send(ProgressData {
503                         checking_method: checking_method.clone(),
504                         current_stage: 0,
505                         max_stage,
506                         files_checked: atomic_file_counter.load(Ordering::Relaxed) as usize,
507                         files_to_check: 0,
508                     })
509                     .unwrap();
510                 if !progress_thread_run.load(Ordering::Relaxed) {
511                     break;
512                 }
513                 sleep(Duration::from_millis(LOOP_DURATION as u64));
514             });
515         } else {
516             progress_thread_handle = thread::spawn(|| {});
517         }
518 
519         //// PROGRESS THREAD END
520 
521         while !folders_to_check.is_empty() {
522             if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
523                 // End thread which send info to gui
524                 progress_thread_run.store(false, Ordering::Relaxed);
525                 progress_thread_handle.join().unwrap();
526                 return false;
527             }
528 
529             let current_folder = folders_to_check.pop().unwrap();
530 
531             // Read current dir, if permission are denied just go to next
532             let read_dir = match fs::read_dir(&current_folder) {
533                 Ok(t) => t,
534                 Err(e) => {
535                     self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e));
536                     continue;
537                 } // Permissions denied
538             };
539 
540             // Check every sub folder/file/link etc.
541             'dir: for entry in read_dir {
542                 let entry_data = match entry {
543                     Ok(t) => t,
544                     Err(e) => {
545                         self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e));
546                         continue 'dir;
547                     } //Permissions denied
548                 };
549                 let metadata: Metadata = match entry_data.metadata() {
550                     Ok(t) => t,
551                     Err(e) => {
552                         self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e));
553                         continue 'dir;
554                     } //Permissions denied
555                 };
556                 if metadata.is_dir() {
557                     if !self.recursive_search {
558                         continue 'dir;
559                     }
560 
561                     let next_folder = current_folder.join(entry_data.file_name());
562                     if self.directories.is_excluded(&next_folder) {
563                         continue 'dir;
564                     }
565 
566                     if self.excluded_items.is_excluded(&next_folder) {
567                         continue 'dir;
568                     }
569 
570                     folders_to_check.push(next_folder);
571                 } else if metadata.is_file() {
572                     atomic_file_counter.fetch_add(1, Ordering::Relaxed);
573                     // let mut have_valid_extension: bool;
574                     let file_name_lowercase: String = match entry_data.file_name().into_string() {
575                         Ok(t) => t,
576                         Err(_inspected) => {
577                             println!("File {:?} has not valid UTF-8 name", entry_data);
578                             continue 'dir;
579                         }
580                     }
581                     .to_lowercase();
582 
583                     // Checking allowed extensions
584                     if !self.allowed_extensions.file_extensions.is_empty() {
585                         let allowed = self.allowed_extensions.file_extensions.iter().any(|e| file_name_lowercase.ends_with((".".to_string() + e.to_lowercase().as_str()).as_str()));
586                         if !allowed {
587                             // Not an allowed extension, ignore it.
588 
589                             continue 'dir;
590                         }
591                     }
592                     // Checking files
593                     if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) {
594                         let current_file_name = current_folder.join(entry_data.file_name());
595                         if self.excluded_items.is_excluded(&current_file_name) {
596                             continue 'dir;
597                         }
598 
599                         // Creating new file entry
600                         let fe: FileEntry = FileEntry {
601                             path: current_file_name.clone(),
602                             size: metadata.len(),
603                             modified_date: match metadata.modified() {
604                                 Ok(t) => match t.duration_since(UNIX_EPOCH) {
605                                     Ok(d) => d.as_secs(),
606                                     Err(_inspected) => {
607                                         self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display()));
608                                         0
609                                     }
610                                 },
611                                 Err(e) => {
612                                     self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e));
613                                     0
614                                 } // Permissions Denied
615                             },
616                             hash: "".to_string(),
617                         };
618 
619                         // Adding files to BTreeMap
620                         self.files_with_identical_size.entry(metadata.len()).or_insert_with(Vec::new);
621                         self.files_with_identical_size.get_mut(&metadata.len()).unwrap().push(fe);
622                     }
623                 }
624             }
625         }
626         // End thread which send info to gui
627         progress_thread_run.store(false, Ordering::Relaxed);
628         progress_thread_handle.join().unwrap();
629 
630         // Create new BTreeMap without single size entries(files have not duplicates)
631         let mut new_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();
632 
633         for (size, vec) in &self.files_with_identical_size {
634             if vec.len() <= 1 {
635                 continue;
636             }
637 
638             let vector;
639             if self.ignore_hard_links {
640                 vector = filter_hard_links(vec);
641             } else {
642                 vector = vec.clone();
643             }
644 
645             if vector.len() > 1 {
646                 self.information.number_of_duplicated_files_by_size += vector.len() - 1;
647                 self.information.number_of_groups_by_size += 1;
648                 self.information.lost_space_by_size += (vector.len() as u64 - 1) * size;
649                 new_map.insert(*size, vector);
650             }
651         }
652         self.files_with_identical_size = new_map;
653 
654         Common::print_time(start_time, SystemTime::now(), "check_files_size".to_string());
655         true
656     }
657 
658     /// The slowest checking type, which must be applied after checking for size
check_files_hash(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool659     fn check_files_hash(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
660         let check_type = Arc::new(self.hash_type);
661 
662         let start_time: SystemTime = SystemTime::now();
663         let check_was_breaked = AtomicBool::new(false); // Used for breaking from GUI and ending check thread
664         let mut pre_checked_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();
665 
666         //// PROGRESS THREAD START
667         const LOOP_DURATION: u32 = 200; //in ms
668         let progress_thread_run = Arc::new(AtomicBool::new(true));
669 
670         let atomic_file_counter = Arc::new(AtomicUsize::new(0));
671 
672         let progress_thread_handle;
673         if let Some(progress_sender) = progress_sender {
674             let progress_send = progress_sender.clone();
675             let progress_thread_run = progress_thread_run.clone();
676             let atomic_file_counter = atomic_file_counter.clone();
677             let files_to_check = self.files_with_identical_size.iter().map(|e| e.1.len()).sum();
678             let checking_method = self.check_method.clone();
679             progress_thread_handle = thread::spawn(move || loop {
680                 progress_send
681                     .unbounded_send(ProgressData {
682                         checking_method: checking_method.clone(),
683                         current_stage: 1,
684                         max_stage: 2,
685                         files_checked: atomic_file_counter.load(Ordering::Relaxed) as usize,
686                         files_to_check,
687                     })
688                     .unwrap();
689                 if !progress_thread_run.load(Ordering::Relaxed) {
690                     break;
691                 }
692                 sleep(Duration::from_millis(LOOP_DURATION as u64));
693             });
694         } else {
695             progress_thread_handle = thread::spawn(|| {});
696         }
697 
698         //// PROGRESS THREAD END
699 
700         #[allow(clippy::type_complexity)]
701         let pre_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>, u64)> = self
702             .files_with_identical_size
703             .par_iter()
704             .map(|(size, vec_file_entry)| {
705                 let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
706                 let mut errors: Vec<String> = Vec::new();
707                 let mut bytes_read: u64 = 0;
708                 let mut buffer = [0u8; 1024 * 2];
709 
710                 atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
711                 for file_entry in vec_file_entry {
712                     if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
713                         check_was_breaked.store(true, Ordering::Relaxed);
714                         return None;
715                     }
716                     match hash_calculation(&mut buffer, file_entry, &check_type, 0) {
717                         Ok((hash_string, bytes)) => {
718                             bytes_read += bytes;
719                             hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new);
720                             hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.clone());
721                         }
722                         Err(s) => errors.push(s),
723                     }
724                 }
725                 Some((*size, hashmap_with_hash, errors, bytes_read))
726             })
727             .while_some()
728             .collect();
729 
730         // End thread which send info to gui
731         progress_thread_run.store(false, Ordering::Relaxed);
732         progress_thread_handle.join().unwrap();
733 
734         // Check if user aborted search(only from GUI)
735         if check_was_breaked.load(Ordering::Relaxed) {
736             return false;
737         }
738 
739         // Check results
740         for (size, hash_map, mut errors, bytes_read) in pre_hash_results {
741             self.information.bytes_read_when_hashing += bytes_read;
742             self.text_messages.warnings.append(&mut errors);
743             for (_hash, mut vec_file_entry) in hash_map {
744                 if vec_file_entry.len() > 1 {
745                     pre_checked_map.entry(size).or_insert_with(Vec::new);
746                     pre_checked_map.get_mut(&size).unwrap().append(&mut vec_file_entry);
747                 }
748             }
749         }
750 
751         Common::print_time(start_time, SystemTime::now(), "check_files_hash - prehash".to_string());
752         let start_time: SystemTime = SystemTime::now();
753 
754         /////////////////////////
755 
756         //// PROGRESS THREAD START
757         // const LOOP_DURATION: u32 = 200; //in ms
758         let progress_thread_run = Arc::new(AtomicBool::new(true));
759 
760         let atomic_file_counter = Arc::new(AtomicUsize::new(0));
761 
762         let progress_thread_handle;
763         if let Some(progress_sender) = progress_sender {
764             let progress_send = progress_sender.clone();
765             let progress_thread_run = progress_thread_run.clone();
766             let atomic_file_counter = atomic_file_counter.clone();
767             let files_to_check = pre_checked_map.iter().map(|e| e.1.len()).sum();
768             let checking_method = self.check_method.clone();
769             progress_thread_handle = thread::spawn(move || loop {
770                 progress_send
771                     .unbounded_send(ProgressData {
772                         checking_method: checking_method.clone(),
773                         current_stage: 2,
774                         max_stage: 2,
775                         files_checked: atomic_file_counter.load(Ordering::Relaxed) as usize,
776                         files_to_check,
777                     })
778                     .unwrap();
779                 if !progress_thread_run.load(Ordering::Relaxed) {
780                     break;
781                 }
782                 sleep(Duration::from_millis(LOOP_DURATION as u64));
783             });
784         } else {
785             progress_thread_handle = thread::spawn(|| {});
786         }
787 
788         //// PROGRESS THREAD END
789 
790         #[allow(clippy::type_complexity)]
791         let mut full_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>, u64)>;
792 
793         match self.check_method {
794             CheckingMethod::HashMb => {
795                 full_hash_results = pre_checked_map
796                     .par_iter()
797                     .map(|(size, vec_file_entry)| {
798                         let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
799                         let mut errors: Vec<String> = Vec::new();
800                         let mut bytes_read: u64 = 0;
801                         let mut buffer = [0u8; 1024 * 128];
802                         atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
803                         for file_entry in vec_file_entry {
804                             if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
805                                 check_was_breaked.store(true, Ordering::Relaxed);
806                                 return None;
807                             }
808 
809                             match hash_calculation(&mut buffer, file_entry, &check_type, HASH_MB_LIMIT_BYTES) {
810                                 Ok((hash_string, bytes)) => {
811                                     bytes_read += bytes;
812                                     hashmap_with_hash.entry(hash_string.to_string()).or_insert_with(Vec::new);
813                                     hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.to_owned());
814                                 }
815                                 Err(s) => errors.push(s),
816                             }
817                         }
818                         Some((*size, hashmap_with_hash, errors, bytes_read))
819                     })
820                     .while_some()
821                     .collect();
822             }
823             CheckingMethod::Hash => {
824                 let loaded_hash_map;
825 
826                 let mut records_already_cached: BTreeMap<u64, Vec<FileEntry>> = Default::default();
827                 let mut non_cached_files_to_check: BTreeMap<u64, Vec<FileEntry>> = Default::default();
828 
829                 if self.use_cache {
830                     loaded_hash_map = match load_hashes_from_file(&mut self.text_messages, &self.hash_type) {
831                         Some(t) => t,
832                         None => Default::default(),
833                     };
834 
835                     for (size, vec_file_entry) in pre_checked_map {
836                         #[allow(clippy::collapsible_if)]
837                         if !loaded_hash_map.contains_key(&size) {
838                             // If loaded data doesn't contains current info
839                             non_cached_files_to_check.insert(size, vec_file_entry);
840                         } else {
841                             let loaded_vec_file_entry = loaded_hash_map.get(&size).unwrap();
842 
843                             for file_entry in vec_file_entry {
844                                 let mut found: bool = false;
845                                 for loaded_file_entry in loaded_vec_file_entry {
846                                     if file_entry.path == loaded_file_entry.path && file_entry.modified_date == loaded_file_entry.modified_date {
847                                         records_already_cached.entry(file_entry.size).or_insert_with(Vec::new);
848                                         records_already_cached.get_mut(&file_entry.size).unwrap().push(loaded_file_entry.clone());
849                                         found = true;
850                                         break;
851                                     }
852                                 }
853 
854                                 if !found {
855                                     non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new);
856                                     non_cached_files_to_check.get_mut(&file_entry.size).unwrap().push(file_entry);
857                                 }
858                             }
859                         }
860                     }
861                 } else {
862                     loaded_hash_map = Default::default();
863                     mem::swap(&mut pre_checked_map, &mut non_cached_files_to_check);
864                 }
865 
866                 full_hash_results = non_cached_files_to_check
867                     .par_iter()
868                     .map(|(size, vec_file_entry)| {
869                         let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
870                         let mut errors: Vec<String> = Vec::new();
871                         let mut bytes_read: u64 = 0;
872                         let mut buffer = [0u8; 1024 * 128];
873 
874                         atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
875                         for file_entry in vec_file_entry {
876                             if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
877                                 check_was_breaked.store(true, Ordering::Relaxed);
878                                 return None;
879                             }
880 
881                             match hash_calculation(&mut buffer, file_entry, &check_type, u64::MAX) {
882                                 Ok((hash_string, bytes)) => {
883                                     bytes_read += bytes;
884                                     let mut file_entry = file_entry.clone();
885                                     file_entry.hash = hash_string.clone();
886                                     hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new);
887                                     hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry);
888                                 }
889                                 Err(s) => errors.push(s),
890                             }
891                         }
892                         Some((*size, hashmap_with_hash, errors, bytes_read))
893                     })
894                     .while_some()
895                     .collect();
896 
897                 if self.use_cache {
898                     'main: for (size, vec_file_entry) in records_already_cached {
899                         // Check if size already exists, if exists we must to change it outside because cannot have mut and non mut reference to full_hash_results
900                         for (full_size, full_hashmap, _errors, _bytes_read) in &mut full_hash_results {
901                             if size == *full_size {
902                                 for file_entry in vec_file_entry {
903                                     full_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new);
904                                     full_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry);
905                                 }
906                                 continue 'main;
907                             }
908                         }
909                         // Size doesn't exists add results to files
910                         let mut temp_hashmap: BTreeMap<String, Vec<FileEntry>> = Default::default();
911                         for file_entry in vec_file_entry {
912                             temp_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new);
913                             temp_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry);
914                         }
915                         full_hash_results.push((size, temp_hashmap, Vec::new(), 0));
916                     }
917 
918                     // Must save all results to file, old loaded from file with all currently counted results
919                     let mut all_results: BTreeMap<String, FileEntry> = Default::default();
920                     for (_size, vec_file_entry) in loaded_hash_map {
921                         for file_entry in vec_file_entry {
922                             all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
923                         }
924                     }
925                     for (_size, hashmap, _errors, _bytes_read) in &full_hash_results {
926                         for vec_file_entry in hashmap.values() {
927                             for file_entry in vec_file_entry {
928                                 all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry.clone());
929                             }
930                         }
931                     }
932                     save_hashes_to_file(&all_results, &mut self.text_messages, &self.hash_type, self.minimal_cache_file_size);
933                 }
934             }
935             _ => panic!("What"),
936         }
937 
938         // End thread which send info to gui
939         progress_thread_run.store(false, Ordering::Relaxed);
940         progress_thread_handle.join().unwrap();
941 
942         // Check if user aborted search(only from GUI)
943         if check_was_breaked.load(Ordering::Relaxed) {
944             return false;
945         }
946 
947         for (size, hash_map, mut errors, bytes_read) in full_hash_results {
948             self.information.bytes_read_when_hashing += bytes_read;
949             self.text_messages.warnings.append(&mut errors);
950             for (_hash, vec_file_entry) in hash_map {
951                 if vec_file_entry.len() > 1 {
952                     self.files_with_identical_hashes.entry(size).or_insert_with(Vec::new);
953                     self.files_with_identical_hashes.get_mut(&size).unwrap().push(vec_file_entry);
954                 }
955             }
956         }
957 
958         /////////////////////////
959 
960         for (size, vector_vectors) in &self.files_with_identical_hashes {
961             for vector in vector_vectors {
962                 self.information.number_of_duplicated_files_by_hash += vector.len() - 1;
963                 self.information.number_of_groups_by_hash += 1;
964                 self.information.lost_space_by_hash += (vector.len() as u64 - 1) * size;
965             }
966         }
967 
968         Common::print_time(start_time, SystemTime::now(), "check_files_hash - full hash".to_string());
969 
970         // Clean unused data
971         self.files_with_identical_size = Default::default();
972 
973         true
974     }
975 
976     /// Function to delete files, from filed before BTreeMap
977     /// Using another function to delete files to avoid duplicates data
delete_files(&mut self)978     fn delete_files(&mut self) {
979         let start_time: SystemTime = SystemTime::now();
980         if self.delete_method == DeleteMethod::None {
981             return;
982         }
983 
984         match self.check_method {
985             CheckingMethod::Name => {
986                 for vector in self.files_with_identical_names.values() {
987                     let tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.text_messages, self.dryrun);
988                     self.information.gained_space += tuple.0;
989                     self.information.number_of_removed_files += tuple.1;
990                     self.information.number_of_failed_to_remove_files += tuple.2;
991                 }
992             }
993             CheckingMethod::Hash | CheckingMethod::HashMb => {
994                 for vector_vectors in self.files_with_identical_hashes.values() {
995                     for vector in vector_vectors.iter() {
996                         let tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.text_messages, self.dryrun);
997                         self.information.gained_space += tuple.0;
998                         self.information.number_of_removed_files += tuple.1;
999                         self.information.number_of_failed_to_remove_files += tuple.2;
1000                     }
1001                 }
1002             }
1003             CheckingMethod::Size => {
1004                 for vector in self.files_with_identical_size.values() {
1005                     let tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.text_messages, self.dryrun);
1006                     self.information.gained_space += tuple.0;
1007                     self.information.number_of_removed_files += tuple.1;
1008                     self.information.number_of_failed_to_remove_files += tuple.2;
1009                 }
1010             }
1011             CheckingMethod::None => {
1012                 //Just do nothing
1013                 panic!("Checking method should never be none.");
1014             }
1015         }
1016 
1017         Common::print_time(start_time, SystemTime::now(), "delete_files".to_string());
1018     }
1019 }
1020 impl Default for DuplicateFinder {
default() -> Self1021     fn default() -> Self {
1022         Self::new()
1023     }
1024 }
1025 
1026 impl DebugPrint for DuplicateFinder {
1027     #[allow(dead_code)]
1028     #[allow(unreachable_code)]
1029     /// Debugging printing - only available on debug build
debug_print(&self)1030     fn debug_print(&self) {
1031         #[cfg(not(debug_assertions))]
1032         {
1033             return;
1034         }
1035         println!("---------------DEBUG PRINT---------------");
1036         println!("### Information's");
1037 
1038         println!("Errors size - {}", self.text_messages.errors.len());
1039         println!("Warnings size - {}", self.text_messages.warnings.len());
1040         println!("Messages size - {}", self.text_messages.messages.len());
1041         println!(
1042             "Number of duplicated files by size(in groups) - {} ({})",
1043             self.information.number_of_duplicated_files_by_size, self.information.number_of_groups_by_size
1044         );
1045         println!(
1046             "Number of duplicated files by hash(in groups) - {} ({})",
1047             self.information.number_of_duplicated_files_by_hash, self.information.number_of_groups_by_hash
1048         );
1049         println!(
1050             "Number of duplicated files by name(in groups) - {} ({})",
1051             self.information.number_of_duplicated_files_by_name, self.information.number_of_groups_by_name
1052         );
1053         println!("Lost space by size - {} ({} bytes)", self.information.lost_space_by_size.file_size(options::BINARY).unwrap(), self.information.lost_space_by_size);
1054         println!("Lost space by hash - {} ({} bytes)", self.information.lost_space_by_hash.file_size(options::BINARY).unwrap(), self.information.lost_space_by_hash);
1055         println!(
1056             "Gained space by removing duplicated entries - {} ({} bytes)",
1057             self.information.gained_space.file_size(options::BINARY).unwrap(),
1058             self.information.gained_space
1059         );
1060         println!(
1061             "Bytes read when hashing - {} ({} bytes)",
1062             self.information.bytes_read_when_hashing.file_size(options::BINARY).unwrap(),
1063             self.information.bytes_read_when_hashing
1064         );
1065         println!("Number of removed files - {}", self.information.number_of_removed_files);
1066         println!("Number of failed to remove files - {}", self.information.number_of_failed_to_remove_files);
1067 
1068         println!("### Other");
1069 
1070         println!("Files list size - {}", self.files_with_identical_size.len());
1071         println!("Hashed Files list size - {}", self.files_with_identical_hashes.len());
1072         println!("Allowed extensions - {:?}", self.allowed_extensions.file_extensions);
1073         println!("Excluded items - {:?}", self.excluded_items.items);
1074         println!("Included directories - {:?}", self.directories.included_directories);
1075         println!("Excluded directories - {:?}", self.directories.excluded_directories);
1076         println!("Recursive search - {}", self.recursive_search.to_string());
1077         println!("Minimum file size - {:?}", self.minimal_file_size);
1078         println!("Checking Method - {:?}", self.check_method);
1079         println!("Delete Method - {:?}", self.delete_method);
1080         println!("-----------------------------------------");
1081     }
1082 }
1083 impl SaveResults for DuplicateFinder {
save_results_to_file(&mut self, file_name: &str) -> bool1084     fn save_results_to_file(&mut self, file_name: &str) -> bool {
1085         let start_time: SystemTime = SystemTime::now();
1086         let file_name: String = match file_name {
1087             "" => "results.txt".to_string(),
1088             k => k.to_string(),
1089         };
1090 
1091         let file_handler = match File::create(&file_name) {
1092             Ok(t) => t,
1093             Err(e) => {
1094                 self.text_messages.errors.push(format!("Failed to create file {}, reason {}", file_name, e));
1095                 return false;
1096             }
1097         };
1098         let mut writer = BufWriter::new(file_handler);
1099 
1100         if let Err(e) = writeln!(
1101             writer,
1102             "Results of searching {:?} with excluded directories {:?} and excluded items {:?}",
1103             self.directories.included_directories, self.directories.excluded_directories, self.excluded_items.items
1104         ) {
1105             self.text_messages.errors.push(format!("Failed to save results to file {}, reason {}", file_name, e));
1106             return false;
1107         }
1108         match self.check_method {
1109             CheckingMethod::Name => {
1110                 if !self.files_with_identical_names.is_empty() {
1111                     writeln!(writer, "-------------------------------------------------Files with same names-------------------------------------------------").unwrap();
1112                     writeln!(
1113                         writer,
1114                         "Found {} files in {} groups with same name(may have different content)",
1115                         self.information.number_of_duplicated_files_by_name, self.information.number_of_groups_by_name,
1116                     )
1117                     .unwrap();
1118                     for (name, vector) in self.files_with_identical_names.iter().rev() {
1119                         writeln!(writer, "Name - {} - {} files ", name, vector.len()).unwrap();
1120                         for j in vector {
1121                             writeln!(writer, "{}", j.path.display()).unwrap();
1122                         }
1123                         writeln!(writer).unwrap();
1124                     }
1125                 } else {
1126                     write!(writer, "Not found any files with same names.").unwrap();
1127                 }
1128             }
1129             CheckingMethod::Size => {
1130                 if !self.files_with_identical_size.is_empty() {
1131                     writeln!(writer, "-------------------------------------------------Files with same size-------------------------------------------------").unwrap();
1132                     writeln!(
1133                         writer,
1134                         "Found {} duplicated files which in {} groups which takes {}.",
1135                         self.information.number_of_duplicated_files_by_size,
1136                         self.information.number_of_groups_by_size,
1137                         self.information.lost_space_by_size.file_size(options::BINARY).unwrap()
1138                     )
1139                     .unwrap();
1140                     for (size, vector) in self.files_with_identical_size.iter().rev() {
1141                         write!(writer, "\n---- Size {} ({}) - {} files \n", size.file_size(options::BINARY).unwrap(), size, vector.len()).unwrap();
1142                         for file_entry in vector {
1143                             writeln!(writer, "{}", file_entry.path.display()).unwrap();
1144                         }
1145                     }
1146                 } else {
1147                     write!(writer, "Not found any duplicates.").unwrap();
1148                 }
1149             }
1150             CheckingMethod::Hash | CheckingMethod::HashMb => {
1151                 if !self.files_with_identical_hashes.is_empty() {
1152                     writeln!(writer, "-------------------------------------------------Files with same hashes-------------------------------------------------").unwrap();
1153                     writeln!(
1154                         writer,
1155                         "Found {} duplicated files which in {} groups which takes {}.",
1156                         self.information.number_of_duplicated_files_by_hash,
1157                         self.information.number_of_groups_by_hash,
1158                         self.information.lost_space_by_hash.file_size(options::BINARY).unwrap()
1159                     )
1160                     .unwrap();
1161                     for (size, vectors_vector) in self.files_with_identical_hashes.iter().rev() {
1162                         for vector in vectors_vector {
1163                             writeln!(writer, "\n---- Size {} ({}) - {} files", size.file_size(options::BINARY).unwrap(), size, vector.len()).unwrap();
1164                             for file_entry in vector {
1165                                 writeln!(writer, "{}", file_entry.path.display()).unwrap();
1166                             }
1167                         }
1168                     }
1169                 } else {
1170                     write!(writer, "Not found any duplicates.").unwrap();
1171                 }
1172             }
1173             CheckingMethod::None => {
1174                 panic!();
1175             }
1176         }
1177         Common::print_time(start_time, SystemTime::now(), "save_results_to_file".to_string());
1178         true
1179     }
1180 }
1181 impl PrintResults for DuplicateFinder {
1182     /// Print information's about duplicated entries
1183     /// Only needed for CLI
print_results(&self)1184     fn print_results(&self) {
1185         let start_time: SystemTime = SystemTime::now();
1186         let mut number_of_files: u64 = 0;
1187         let mut number_of_groups: u64 = 0;
1188 
1189         match self.check_method {
1190             CheckingMethod::Name => {
1191                 for i in &self.files_with_identical_names {
1192                     number_of_files += i.1.len() as u64;
1193                     number_of_groups += 1;
1194                 }
1195                 println!("Found {} files in {} groups with same name(may have different content)", number_of_files, number_of_groups,);
1196                 for (name, vector) in &self.files_with_identical_names {
1197                     println!("Name - {} - {} files ", name, vector.len());
1198                     for j in vector {
1199                         println!("{}", j.path.display());
1200                     }
1201                     println!();
1202                 }
1203             }
1204             CheckingMethod::Hash | CheckingMethod::HashMb => {
1205                 for (_size, vector) in self.files_with_identical_hashes.iter() {
1206                     for j in vector {
1207                         number_of_files += j.len() as u64;
1208                         number_of_groups += 1;
1209                     }
1210                 }
1211                 println!(
1212                     "Found {} duplicated files in {} groups with same content which took {}:",
1213                     number_of_files,
1214                     number_of_groups,
1215                     self.information.lost_space_by_size.file_size(options::BINARY).unwrap()
1216                 );
1217                 for (size, vector) in self.files_with_identical_hashes.iter().rev() {
1218                     for j in vector {
1219                         println!("Size - {} ({}) - {} files ", size.file_size(options::BINARY).unwrap(), size, j.len());
1220                         for k in j {
1221                             println!("{}", k.path.display());
1222                         }
1223                         println!("----");
1224                     }
1225                     println!();
1226                 }
1227             }
1228             CheckingMethod::Size => {
1229                 for i in &self.files_with_identical_size {
1230                     number_of_files += i.1.len() as u64;
1231                     number_of_groups += 1;
1232                 }
1233                 println!(
1234                     "Found {} files in {} groups with same size(may have different content) which took {}:",
1235                     number_of_files,
1236                     number_of_groups,
1237                     self.information.lost_space_by_size.file_size(options::BINARY).unwrap()
1238                 );
1239                 for (size, vector) in &self.files_with_identical_size {
1240                     println!("Size - {} ({}) - {} files ", size.file_size(options::BINARY).unwrap(), size, vector.len());
1241                     for j in vector {
1242                         println!("{}", j.path.display());
1243                     }
1244                     println!();
1245                 }
1246             }
1247             CheckingMethod::None => {
1248                 panic!("Checking Method shouldn't be ever set to None");
1249             }
1250         }
1251         Common::print_time(start_time, SystemTime::now(), "print_entries".to_string());
1252     }
1253 }
1254 
1255 /// Functions to remove slice(vector) of files with provided method
1256 /// Returns size of removed elements, number of deleted and failed to delete files and modified warning list
delete_files(vector: &[FileEntry], delete_method: &DeleteMethod, text_messages: &mut Messages, dryrun: bool) -> (u64, usize, usize)1257 fn delete_files(vector: &[FileEntry], delete_method: &DeleteMethod, text_messages: &mut Messages, dryrun: bool) -> (u64, usize, usize) {
1258     assert!(vector.len() > 1, "Vector length must be bigger than 1(This should be done in previous steps).");
1259     let mut gained_space: u64 = 0;
1260     let mut removed_files: usize = 0;
1261     let mut failed_to_remove_files: usize = 0;
1262     let mut values = vector.iter().enumerate();
1263     let q_index = match delete_method {
1264         DeleteMethod::OneOldest | DeleteMethod::AllExceptNewest => values.max_by(|(_, l), (_, r)| l.modified_date.cmp(&r.modified_date)),
1265         DeleteMethod::OneNewest | DeleteMethod::AllExceptOldest | DeleteMethod::HardLink => values.min_by(|(_, l), (_, r)| l.modified_date.cmp(&r.modified_date)),
1266         DeleteMethod::None => values.next(),
1267     };
1268     let q_index = q_index.map(|t| t.0).unwrap_or(0);
1269     let n = match delete_method {
1270         DeleteMethod::OneNewest | DeleteMethod::OneOldest => 1,
1271         DeleteMethod::AllExceptNewest | DeleteMethod::AllExceptOldest | DeleteMethod::None | DeleteMethod::HardLink => usize::MAX,
1272     };
1273     for (index, file) in vector.iter().enumerate() {
1274         if q_index == index {
1275             continue;
1276         } else if removed_files + failed_to_remove_files >= n {
1277             break;
1278         }
1279 
1280         let r = match delete_method {
1281             DeleteMethod::OneOldest | DeleteMethod::OneNewest | DeleteMethod::AllExceptOldest | DeleteMethod::AllExceptNewest => {
1282                 if dryrun {
1283                     Ok(Some(format!("Delete {}", file.path.display())))
1284                 } else {
1285                     fs::remove_file(&file.path).map(|_| None)
1286                 }
1287             }
1288             DeleteMethod::HardLink => {
1289                 let src = &vector[q_index].path;
1290                 if dryrun {
1291                     Ok(Some(format!("Replace file {} with hard link to {}", file.path.display(), src.display())))
1292                 } else {
1293                     make_hard_link(src, &file.path).map(|_| None)
1294                 }
1295             }
1296             DeleteMethod::None => Ok(None),
1297         };
1298 
1299         match r {
1300             Err(e) => {
1301                 failed_to_remove_files += 1;
1302                 text_messages.warnings.push(format!("Failed to remove {} ({})", file.path.display(), e));
1303             }
1304             Ok(Some(msg)) => {
1305                 text_messages.messages.push(msg);
1306                 removed_files += 1;
1307                 gained_space += file.size;
1308             }
1309             Ok(None) => {
1310                 removed_files += 1;
1311                 gained_space += file.size;
1312             }
1313         }
1314     }
1315     (gained_space, removed_files, failed_to_remove_files)
1316 }
1317 
1318 #[cfg(target_family = "windows")]
filter_hard_links(vec_file_entry: &[FileEntry]) -> Vec<FileEntry>1319 fn filter_hard_links(vec_file_entry: &[FileEntry]) -> Vec<FileEntry> {
1320     vec_file_entry.to_vec()
1321 }
1322 
1323 #[cfg(target_family = "unix")]
filter_hard_links(vec_file_entry: &[FileEntry]) -> Vec<FileEntry>1324 fn filter_hard_links(vec_file_entry: &[FileEntry]) -> Vec<FileEntry> {
1325     let mut inodes: HashSet<u64> = HashSet::with_capacity(vec_file_entry.len());
1326     let mut identical: Vec<FileEntry> = Vec::with_capacity(vec_file_entry.len());
1327     for f in vec_file_entry {
1328         if let Ok(meta) = fs::metadata(&f.path) {
1329             if !inodes.insert(meta.ino()) {
1330                 continue;
1331             }
1332         }
1333         identical.push(f.clone());
1334     }
1335     identical
1336 }
1337 
make_hard_link(src: &Path, dst: &Path) -> io::Result<()>1338 pub fn make_hard_link(src: &Path, dst: &Path) -> io::Result<()> {
1339     let dst_dir = dst.parent().ok_or_else(|| Error::new(ErrorKind::Other, "No parent"))?;
1340     let temp = tempfile::Builder::new().tempfile_in(dst_dir)?;
1341     fs::rename(dst, temp.path())?;
1342     let result = fs::hard_link(src, dst);
1343     if result.is_err() {
1344         fs::rename(temp.path(), dst)?;
1345     }
1346     result
1347 }
1348 
save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, type_of_hash: &HashType, minimal_cache_file_size: u64)1349 fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, type_of_hash: &HashType, minimal_cache_file_size: u64) {
1350     if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
1351         let cache_dir = PathBuf::from(proj_dirs.cache_dir());
1352         if cache_dir.exists() {
1353             if !cache_dir.is_dir() {
1354                 text_messages.messages.push(format!("Config dir {} is a file!", cache_dir.display()));
1355                 return;
1356             }
1357         } else if let Err(e) = fs::create_dir_all(&cache_dir) {
1358             text_messages.messages.push(format!("Cannot create config dir {}, reason {}", cache_dir.display(), e));
1359             return;
1360         }
1361         let cache_file = cache_dir.join(CACHE_FILE_NAME.replace(".", format!("_{:?}.", type_of_hash).as_str()));
1362         let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
1363             Ok(t) => t,
1364             Err(e) => {
1365                 text_messages.messages.push(format!("Cannot create or open cache file {}, reason {}", cache_file.display(), e));
1366                 return;
1367             }
1368         };
1369         let mut writer = BufWriter::new(file_handler);
1370 
1371         for file_entry in hashmap.values() {
1372             // Only cache bigger than 5MB files
1373             if file_entry.size >= minimal_cache_file_size {
1374                 let string: String = format!("{}//{}//{}//{}", file_entry.path.display(), file_entry.size, file_entry.modified_date, file_entry.hash);
1375 
1376                 if let Err(e) = writeln!(writer, "{}", string) {
1377                     text_messages.messages.push(format!("Failed to save some data to cache file {}, reason {}", cache_file.display(), e));
1378                     return;
1379                 };
1380             }
1381         }
1382     }
1383 }
1384 
1385 pub trait MyHasher {
update(&mut self, bytes: &[u8])1386     fn update(&mut self, bytes: &[u8]);
finalize(&self) -> String1387     fn finalize(&self) -> String;
1388 }
1389 
hash_calculation(buffer: &mut [u8], file_entry: &FileEntry, hash_type: &HashType, limit: u64) -> Result<(String, u64), String>1390 fn hash_calculation(buffer: &mut [u8], file_entry: &FileEntry, hash_type: &HashType, limit: u64) -> Result<(String, u64), String> {
1391     let mut file_handler = match File::open(&file_entry.path) {
1392         Ok(t) => t,
1393         Err(e) => return Err(format!("Unable to check hash of file {}, reason {}", file_entry.path.display(), e)),
1394     };
1395     let hasher = &mut *hash_type.hasher();
1396     let mut current_file_read_bytes: u64 = 0;
1397     loop {
1398         let n = match file_handler.read(buffer) {
1399             Ok(0) => break,
1400             Ok(t) => t,
1401             Err(e) => return Err(format!("Error happened when checking hash of file {}, reason {}", file_entry.path.display(), e)),
1402         };
1403 
1404         current_file_read_bytes += n as u64;
1405         hasher.update(&buffer[..n]);
1406 
1407         if current_file_read_bytes >= limit {
1408             break;
1409         }
1410     }
1411     Ok((hasher.finalize(), current_file_read_bytes))
1412 }
1413 
load_hashes_from_file(text_messages: &mut Messages, type_of_hash: &HashType) -> Option<BTreeMap<u64, Vec<FileEntry>>>1414 fn load_hashes_from_file(text_messages: &mut Messages, type_of_hash: &HashType) -> Option<BTreeMap<u64, Vec<FileEntry>>> {
1415     if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
1416         let cache_dir = PathBuf::from(proj_dirs.cache_dir());
1417         let cache_file = cache_dir.join(CACHE_FILE_NAME.replace(".", format!("_{:?}.", type_of_hash).as_str()));
1418         let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
1419             Ok(t) => t,
1420             Err(_inspected) => {
1421                 // text_messages.messages.push(format!("Cannot find or open cache file {}", cache_file.display())); // This shouldn't be write to output
1422                 return None;
1423             }
1424         };
1425 
1426         let reader = BufReader::new(file_handler);
1427 
1428         let mut hashmap_loaded_entries: BTreeMap<u64, Vec<FileEntry>> = Default::default();
1429 
1430         // Read the file line by line using the lines() iterator from std::io::BufRead.
1431         for (index, line) in reader.lines().enumerate() {
1432             let line = match line {
1433                 Ok(t) => t,
1434                 Err(e) => {
1435                     text_messages.warnings.push(format!("Failed to load line number {} from cache file {}, reason {}", index + 1, cache_file.display(), e));
1436                     return None;
1437                 }
1438             };
1439             let uuu = line.split("//").collect::<Vec<&str>>();
1440             if uuu.len() != 4 {
1441                 text_messages
1442                     .warnings
1443                     .push(format!("Found invalid data(too much or too low amount of data) in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
1444                 continue;
1445             }
1446             // Don't load cache data if destination file not exists
1447             if Path::new(uuu[0]).exists() {
1448                 let file_entry = FileEntry {
1449                     path: PathBuf::from(uuu[0]),
1450                     size: match uuu[1].parse::<u64>() {
1451                         Ok(t) => t,
1452                         Err(e) => {
1453                             text_messages
1454                                 .warnings
1455                                 .push(format!("Found invalid size value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
1456                             continue;
1457                         }
1458                     },
1459                     modified_date: match uuu[2].parse::<u64>() {
1460                         Ok(t) => t,
1461                         Err(e) => {
1462                             text_messages
1463                                 .warnings
1464                                 .push(format!("Found invalid modified date value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
1465                             continue;
1466                         }
1467                     },
1468                     hash: uuu[3].to_string(),
1469                 };
1470                 hashmap_loaded_entries.entry(file_entry.size).or_insert_with(Vec::new);
1471                 hashmap_loaded_entries.get_mut(&file_entry.size).unwrap().push(file_entry);
1472             }
1473         }
1474 
1475         return Some(hashmap_loaded_entries);
1476     }
1477 
1478     text_messages.messages.push("Cannot find or open system config dir to save cache file".to_string());
1479     None
1480 }
1481 
1482 #[cfg(test)]
1483 mod tests {
1484     use super::*;
1485     use std::fs::{read_dir, File};
1486     use std::io;
1487     #[cfg(target_family = "windows")]
1488     use std::os::fs::MetadataExt;
1489     #[cfg(target_family = "unix")]
1490     use std::os::unix::fs::MetadataExt;
1491 
1492     #[cfg(target_family = "unix")]
assert_inode(before: &Metadata, after: &Metadata)1493     fn assert_inode(before: &Metadata, after: &Metadata) {
1494         assert_eq!(before.ino(), after.ino());
1495     }
1496     #[cfg(target_family = "windows")]
assert_inode(_: &Metadata, _: &Metadata)1497     fn assert_inode(_: &Metadata, _: &Metadata) {}
1498 
1499     #[test]
test_make_hard_link() -> io::Result<()>1500     fn test_make_hard_link() -> io::Result<()> {
1501         let dir = tempfile::Builder::new().tempdir()?;
1502         let (src, dst) = (dir.path().join("a"), dir.path().join("b"));
1503         File::create(&src)?;
1504         let metadata = fs::metadata(&src)?;
1505         File::create(&dst)?;
1506 
1507         make_hard_link(&src, &dst)?;
1508 
1509         assert_inode(&metadata, &fs::metadata(&dst)?);
1510         assert_eq!(metadata.permissions(), fs::metadata(&dst)?.permissions());
1511         assert_eq!(metadata.modified()?, fs::metadata(&dst)?.modified()?);
1512         assert_inode(&metadata, &fs::metadata(&src)?);
1513         assert_eq!(metadata.permissions(), fs::metadata(&src)?.permissions());
1514         assert_eq!(metadata.modified()?, fs::metadata(&src)?.modified()?);
1515 
1516         let mut actual = read_dir(&dir)?.map(|e| e.unwrap().path()).collect::<Vec<PathBuf>>();
1517         actual.sort();
1518         assert_eq!(vec![src, dst], actual);
1519         Ok(())
1520     }
1521 
1522     #[test]
test_make_hard_link_fails() -> io::Result<()>1523     fn test_make_hard_link_fails() -> io::Result<()> {
1524         let dir = tempfile::Builder::new().tempdir()?;
1525         let (src, dst) = (dir.path().join("a"), dir.path().join("b"));
1526         File::create(&dst)?;
1527         let metadata = fs::metadata(&dst)?;
1528 
1529         assert!(make_hard_link(&src, &dst).is_err());
1530 
1531         assert_inode(&metadata, &fs::metadata(&dst)?);
1532         assert_eq!(metadata.permissions(), fs::metadata(&dst)?.permissions());
1533         assert_eq!(metadata.modified()?, fs::metadata(&dst)?.modified()?);
1534 
1535         assert_eq!(vec![dst], read_dir(&dir)?.map(|e| e.unwrap().path()).collect::<Vec<PathBuf>>());
1536         Ok(())
1537     }
1538 
1539     #[test]
test_filter_hard_links_empty()1540     fn test_filter_hard_links_empty() {
1541         let expected: Vec<FileEntry> = Default::default();
1542         assert_eq!(expected, filter_hard_links(&[]));
1543     }
1544 
1545     #[cfg(target_family = "unix")]
1546     #[test]
test_filter_hard_links() -> io::Result<()>1547     fn test_filter_hard_links() -> io::Result<()> {
1548         let dir = tempfile::Builder::new().tempdir()?;
1549         let (src, dst) = (dir.path().join("a"), dir.path().join("b"));
1550         File::create(&src)?;
1551         fs::hard_link(src.clone(), dst.clone())?;
1552         let e1 = FileEntry { path: src, ..Default::default() };
1553         let e2 = FileEntry { path: dst, ..Default::default() };
1554         let actual = filter_hard_links(&[e1.clone(), e2]);
1555         assert_eq!(vec![e1], actual);
1556         Ok(())
1557     }
1558 
1559     #[test]
test_filter_hard_links_regular_files() -> io::Result<()>1560     fn test_filter_hard_links_regular_files() -> io::Result<()> {
1561         let dir = tempfile::Builder::new().tempdir()?;
1562         let (src, dst) = (dir.path().join("a"), dir.path().join("b"));
1563         File::create(&src)?;
1564         File::create(&dst)?;
1565         let e1 = FileEntry { path: src, ..Default::default() };
1566         let e2 = FileEntry { path: dst, ..Default::default() };
1567         let actual = filter_hard_links(&[e1.clone(), e2.clone()]);
1568         assert_eq!(vec![e1, e2], actual);
1569         Ok(())
1570     }
1571 
1572     #[test]
test_hash_calculation() -> io::Result<()>1573     fn test_hash_calculation() -> io::Result<()> {
1574         let dir = tempfile::Builder::new().tempdir()?;
1575         let mut buf = [0u8; 1 << 10];
1576         let src = dir.path().join("a");
1577         let mut file = File::create(&src)?;
1578         file.write_all(b"aa")?;
1579         let e = FileEntry { path: src, ..Default::default() };
1580         let r = hash_calculation(&mut buf, &e, &HashType::Blake3, 0).unwrap();
1581         assert_eq!(2, r.1);
1582         assert!(!r.0.is_empty());
1583         Ok(())
1584     }
1585 
1586     #[test]
test_hash_calculation_limit() -> io::Result<()>1587     fn test_hash_calculation_limit() -> io::Result<()> {
1588         let dir = tempfile::Builder::new().tempdir()?;
1589         let mut buf = [0u8; 1];
1590         let src = dir.path().join("a");
1591         let mut file = File::create(&src)?;
1592         file.write_all(b"aa")?;
1593         let e = FileEntry { path: src, ..Default::default() };
1594         let r1 = hash_calculation(&mut buf, &e, &HashType::Blake3, 1).unwrap();
1595         let r2 = hash_calculation(&mut buf, &e, &HashType::Blake3, 2).unwrap();
1596         let r3 = hash_calculation(&mut buf, &e, &HashType::Blake3, u64::MAX).unwrap();
1597         assert_ne!(r1, r2);
1598         assert_eq!(r2, r3);
1599         Ok(())
1600     }
1601 
1602     #[test]
test_hash_calculation_invalid_file() -> io::Result<()>1603     fn test_hash_calculation_invalid_file() -> io::Result<()> {
1604         let dir = tempfile::Builder::new().tempdir()?;
1605         let mut buf = [0u8; 1 << 10];
1606         let src = dir.path().join("a");
1607         let e = FileEntry { path: src, ..Default::default() };
1608         let r = hash_calculation(&mut buf, &e, &HashType::Blake3, 0).unwrap_err();
1609         assert!(!r.is_empty());
1610         Ok(())
1611     }
1612 }
1613