1 use crate::common::Common;
2 use crate::common_directory::Directories;
3 use crate::common_items::ExcludedItems;
4 use crate::common_messages::Messages;
5 use crate::common_traits::{DebugPrint, PrintResults, SaveResults};
6 use bk_tree::BKTree;
7 use crossbeam_channel::Receiver;
8 use directories_next::ProjectDirs;
9 use humansize::{file_size_opts as options, FileSize};
10 use image::GenericImageView;
11 use img_hash::{FilterType, HashAlg, HasherConfig};
12 use rayon::prelude::*;
13 use std::collections::{BTreeMap, BTreeSet};
14 use std::fs::OpenOptions;
15 use std::fs::{File, Metadata};
16 use std::io::Write;
17 use std::io::*;
18 use std::path::{Path, PathBuf};
19 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
20 use std::sync::Arc;
21 use std::thread::sleep;
22 use std::time::{Duration, SystemTime, UNIX_EPOCH};
23 use std::{fs, mem, thread};
24 
25 // TODO check for better values
26 pub const SIMILAR_VALUES: [[u32; 6]; 3] = [
27     [0, 1, 2, 3, 4, 5],     // 4 - Max 16
28     [0, 2, 5, 7, 14, 20],   // 8 - Max 256
29     [2, 5, 10, 20, 40, 80], // 16 - Max 65536
30 ];
31 
32 #[derive(Debug)]
33 pub struct ProgressData {
34     pub current_stage: u8,
35     pub max_stage: u8,
36     pub images_checked: usize,
37     pub images_to_check: usize,
38 }
39 
40 #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
41 pub enum Similarity {
42     None,
43     Similar(u32),
44 }
45 
46 #[derive(Clone, Debug)]
47 pub struct FileEntry {
48     pub path: PathBuf,
49     pub size: u64,
50     pub dimensions: String,
51     pub modified_date: u64,
52     pub hash: Vec<u8>,
53     pub similarity: Similarity,
54 }
55 
56 // This is used by CLI tool when we cann
57 #[derive(Clone, Debug)]
58 pub enum SimilarityPreset {
59     VeryHigh,
60     High,
61     Medium,
62     Small,
63     VerySmall,
64     Minimal,
65     None,
66 }
67 
68 /// Distance metric to use with the BK-tree.
69 struct Hamming;
70 
71 impl bk_tree::Metric<Vec<u8>> for Hamming {
distance(&self, a: &Vec<u8>, b: &Vec<u8>) -> u3272     fn distance(&self, a: &Vec<u8>, b: &Vec<u8>) -> u32 {
73         hamming::distance_fast(a, b).unwrap() as u32
74     }
75 
threshold_distance(&self, a: &Vec<u8>, b: &Vec<u8>, _threshold: u32) -> Option<u32>76     fn threshold_distance(&self, a: &Vec<u8>, b: &Vec<u8>, _threshold: u32) -> Option<u32> {
77         Some(self.distance(a, b))
78     }
79 }
80 
81 /// Struct to store most basics info about all folder
82 pub struct SimilarImages {
83     information: Info,
84     text_messages: Messages,
85     directories: Directories,
86     excluded_items: ExcludedItems,
87     bktree: BKTree<Vec<u8>, Hamming>,
88     similar_vectors: Vec<Vec<FileEntry>>,
89     recursive_search: bool,
90     minimal_file_size: u64,
91     maximal_file_size: u64,
92     image_hashes: BTreeMap<Vec<u8>, Vec<FileEntry>>, // Hashmap with image hashes and Vector with names of files
93     stopped_search: bool,
94     similarity: Similarity,
95     images_to_check: BTreeMap<String, FileEntry>,
96     hash_size: u8,
97     hash_alg: HashAlg,
98     image_filter: FilterType,
99     use_cache: bool,
100 }
101 
102 /// Info struck with helpful information's about results
103 #[derive(Default)]
104 pub struct Info {
105     pub number_of_removed_files: usize,
106     pub number_of_failed_to_remove_files: usize,
107     pub gained_space: u64,
108 }
109 impl Info {
new() -> Self110     pub fn new() -> Self {
111         Default::default()
112     }
113 }
114 
115 /// Method implementation for EmptyFolder
116 impl SimilarImages {
117     /// New function providing basics values
new() -> Self118     pub fn new() -> Self {
119         Self {
120             information: Default::default(),
121             text_messages: Messages::new(),
122             directories: Directories::new(),
123             excluded_items: Default::default(),
124             bktree: BKTree::new(Hamming),
125             similar_vectors: vec![],
126             recursive_search: true,
127             minimal_file_size: 1024 * 16, // 16 KB should be enough to exclude too small images from search
128             maximal_file_size: u64::MAX,
129             image_hashes: Default::default(),
130             stopped_search: false,
131             similarity: Similarity::Similar(1),
132             images_to_check: Default::default(),
133             hash_size: 8,
134             hash_alg: HashAlg::Gradient,
135             image_filter: FilterType::Lanczos3,
136             use_cache: true,
137         }
138     }
139 
set_hash_size(&mut self, hash_size: u8)140     pub fn set_hash_size(&mut self, hash_size: u8) {
141         self.hash_size = match hash_size {
142             4 | 8 | 16 => hash_size,
143             e => {
144                 panic!("Invalid value of hash size {}", e);
145             }
146         }
147     }
148 
set_hash_alg(&mut self, hash_alg: HashAlg)149     pub fn set_hash_alg(&mut self, hash_alg: HashAlg) {
150         self.hash_alg = hash_alg;
151     }
152 
set_image_filter(&mut self, image_filter: FilterType)153     pub fn set_image_filter(&mut self, image_filter: FilterType) {
154         self.image_filter = image_filter;
155     }
156 
get_stopped_search(&self) -> bool157     pub fn get_stopped_search(&self) -> bool {
158         self.stopped_search
159     }
160 
get_text_messages(&self) -> &Messages161     pub const fn get_text_messages(&self) -> &Messages {
162         &self.text_messages
163     }
164 
get_similar_images(&self) -> &Vec<Vec<FileEntry>>165     pub const fn get_similar_images(&self) -> &Vec<Vec<FileEntry>> {
166         &self.similar_vectors
167     }
168 
get_information(&self) -> &Info169     pub const fn get_information(&self) -> &Info {
170         &self.information
171     }
172 
set_use_cache(&mut self, use_cache: bool)173     pub fn set_use_cache(&mut self, use_cache: bool) {
174         self.use_cache = use_cache;
175     }
176 
set_recursive_search(&mut self, recursive_search: bool)177     pub fn set_recursive_search(&mut self, recursive_search: bool) {
178         self.recursive_search = recursive_search;
179     }
180 
set_minimal_file_size(&mut self, minimal_file_size: u64)181     pub fn set_minimal_file_size(&mut self, minimal_file_size: u64) {
182         self.minimal_file_size = match minimal_file_size {
183             0 => 1,
184             t => t,
185         };
186     }
set_maximal_file_size(&mut self, maximal_file_size: u64)187     pub fn set_maximal_file_size(&mut self, maximal_file_size: u64) {
188         self.maximal_file_size = match maximal_file_size {
189             0 => 1,
190             t => t,
191         };
192     }
set_similarity(&mut self, similarity: Similarity)193     pub fn set_similarity(&mut self, similarity: Similarity) {
194         self.similarity = similarity;
195     }
196 
197     /// Public function used by CLI to search for empty folders
find_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>)198     pub fn find_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) {
199         self.directories.optimize_directories(true, &mut self.text_messages);
200         if !self.check_for_similar_images(stop_receiver, progress_sender) {
201             self.stopped_search = true;
202             return;
203         }
204         if !self.sort_images(stop_receiver, progress_sender) {
205             self.stopped_search = true;
206             return;
207         }
208         // if self.delete_folders {
209         //     self.delete_empty_folders();
210         // }
211         self.debug_print();
212     }
213 
214     // pub fn set_delete_folder(&mut self, delete_folder: bool) {
215     //     self.delete_folders = delete_folder;
216     // }
217 
218     /// Function to check if folder are empty.
219     /// Parameter initial_checking for second check before deleting to be sure that checked folder is still empty
check_for_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool220     fn check_for_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
221         let start_time: SystemTime = SystemTime::now();
222         let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
223 
224         // Add root folders for finding
225         for id in &self.directories.included_directories {
226             folders_to_check.push(id.clone());
227         }
228 
229         //// PROGRESS THREAD START
230         const LOOP_DURATION: u32 = 200; //in ms
231         let progress_thread_run = Arc::new(AtomicBool::new(true));
232 
233         let atomic_file_counter = Arc::new(AtomicUsize::new(0));
234 
235         let progress_thread_handle;
236         if let Some(progress_sender) = progress_sender {
237             let progress_send = progress_sender.clone();
238             let progress_thread_run = progress_thread_run.clone();
239             let atomic_file_counter = atomic_file_counter.clone();
240             progress_thread_handle = thread::spawn(move || loop {
241                 progress_send
242                     .unbounded_send(ProgressData {
243                         current_stage: 0,
244                         max_stage: 1,
245                         images_checked: atomic_file_counter.load(Ordering::Relaxed) as usize,
246                         images_to_check: 0,
247                     })
248                     .unwrap();
249                 if !progress_thread_run.load(Ordering::Relaxed) {
250                     break;
251                 }
252                 sleep(Duration::from_millis(LOOP_DURATION as u64));
253             });
254         } else {
255             progress_thread_handle = thread::spawn(|| {});
256         }
257         //// PROGRESS THREAD END
258 
259         while !folders_to_check.is_empty() {
260             if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
261                 // End thread which send info to gui
262                 progress_thread_run.store(false, Ordering::Relaxed);
263                 progress_thread_handle.join().unwrap();
264                 return false;
265             }
266             let current_folder = folders_to_check.pop().unwrap();
267 
268             // Read current dir, if permission are denied just go to next
269             let read_dir = match fs::read_dir(&current_folder) {
270                 Ok(t) => t,
271                 Err(e) => {
272                     self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e));
273                     continue;
274                 } // Permissions denied
275             };
276 
277             // Check every sub folder/file/link etc.
278             'dir: for entry in read_dir {
279                 let entry_data = match entry {
280                     Ok(t) => t,
281                     Err(e) => {
282                         self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e));
283                         continue;
284                     } //Permissions denied
285                 };
286                 let metadata: Metadata = match entry_data.metadata() {
287                     Ok(t) => t,
288                     Err(e) => {
289                         self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e));
290                         continue;
291                     } //Permissions denied
292                 };
293                 if metadata.is_dir() {
294                     if !self.recursive_search {
295                         continue;
296                     }
297 
298                     let next_folder = current_folder.join(entry_data.file_name());
299                     if self.directories.is_excluded(&next_folder) {
300                         continue 'dir;
301                     }
302 
303                     if self.excluded_items.is_excluded(&next_folder) {
304                         continue 'dir;
305                     }
306 
307                     folders_to_check.push(next_folder);
308                 } else if metadata.is_file() {
309                     atomic_file_counter.fetch_add(1, Ordering::Relaxed);
310 
311                     let file_name_lowercase: String = match entry_data.file_name().into_string() {
312                         Ok(t) => t,
313                         Err(_inspected) => {
314                             println!("File {:?} has not valid UTF-8 name", entry_data);
315                             continue 'dir;
316                         }
317                     }
318                     .to_lowercase();
319 
320                     // Checking allowed image extensions
321                     let allowed_image_extensions = [".jpg", ".jpeg", ".png" /*, ".bmp"*/, ".tiff", ".tif", ".tga", ".ff" /*, ".gif"*/, ".jif", ".jfi" /*, ".webp"*/]; // webp cannot be seen in preview, gif needs to be enabled after releasing image crate 0.24.0, bmp needs to be fixed in image crate
322                     if !allowed_image_extensions.iter().any(|e| file_name_lowercase.ends_with(e)) {
323                         continue 'dir;
324                     }
325 
326                     // Checking files
327                     if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) {
328                         let current_file_name = current_folder.join(entry_data.file_name());
329                         if self.excluded_items.is_excluded(&current_file_name) {
330                             continue 'dir;
331                         }
332 
333                         let fe: FileEntry = FileEntry {
334                             path: current_file_name.clone(),
335                             size: metadata.len(),
336                             dimensions: "".to_string(),
337                             modified_date: match metadata.modified() {
338                                 Ok(t) => match t.duration_since(UNIX_EPOCH) {
339                                     Ok(d) => d.as_secs(),
340                                     Err(_inspected) => {
341                                         self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display()));
342                                         0
343                                     }
344                                 },
345                                 Err(e) => {
346                                     self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e));
347                                     0
348                                 } // Permissions Denied
349                             },
350 
351                             hash: Vec::new(),
352                             similarity: Similarity::None,
353                         };
354 
355                         self.images_to_check.insert(current_file_name.to_string_lossy().to_string(), fe);
356                     }
357                 }
358             }
359         }
360         // End thread which send info to gui
361         progress_thread_run.store(false, Ordering::Relaxed);
362         progress_thread_handle.join().unwrap();
363         Common::print_time(start_time, SystemTime::now(), "check_for_similar_images".to_string());
364         true
365     }
366 
367     // Cache algorithm:
368     // - Load data from file
369     // - Remove from data to search this already loaded entries(size of image must match)
370     // - Check hash of files which doesn't have saved entry
371     // - Join already read hashes with hashes which were read from file
372     // - Join all hashes and save it to file
373 
sort_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool374     fn sort_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
375         let hash_map_modification = SystemTime::now();
376 
377         let loaded_hash_map;
378 
379         let mut records_already_cached: BTreeMap<String, FileEntry> = Default::default();
380         let mut non_cached_files_to_check: BTreeMap<String, FileEntry> = Default::default();
381 
382         if self.use_cache {
383             loaded_hash_map = match load_hashes_from_file(&mut self.text_messages, self.hash_size, self.hash_alg, self.image_filter) {
384                 Some(t) => t,
385                 None => Default::default(),
386             };
387 
388             for (name, file_entry) in &self.images_to_check {
389                 #[allow(clippy::if_same_then_else)]
390                 if !loaded_hash_map.contains_key(name) {
391                     // If loaded data doesn't contains current image info
392                     non_cached_files_to_check.insert(name.clone(), file_entry.clone());
393                 } else if file_entry.size != loaded_hash_map.get(name).unwrap().size || file_entry.modified_date != loaded_hash_map.get(name).unwrap().modified_date {
394                     // When size or modification date of image changed, then it is clear that is different image
395                     non_cached_files_to_check.insert(name.clone(), file_entry.clone());
396                 } else {
397                     // Checking may be omitted when already there is entry with same size and modification date
398                     records_already_cached.insert(name.clone(), loaded_hash_map.get(name).unwrap().clone());
399                 }
400             }
401         } else {
402             loaded_hash_map = Default::default();
403             mem::swap(&mut self.images_to_check, &mut non_cached_files_to_check);
404         }
405 
406         Common::print_time(hash_map_modification, SystemTime::now(), "sort_images - reading data from cache and preparing them".to_string());
407         let hash_map_modification = SystemTime::now();
408 
409         //// PROGRESS THREAD START
410         const LOOP_DURATION: u32 = 200; //in ms
411         let progress_thread_run = Arc::new(AtomicBool::new(true));
412 
413         let atomic_file_counter = Arc::new(AtomicUsize::new(0));
414 
415         let progress_thread_handle;
416         if let Some(progress_sender) = progress_sender {
417             let progress_send = progress_sender.clone();
418             let progress_thread_run = progress_thread_run.clone();
419             let atomic_file_counter = atomic_file_counter.clone();
420             let images_to_check = non_cached_files_to_check.len();
421             progress_thread_handle = thread::spawn(move || loop {
422                 progress_send
423                     .unbounded_send(ProgressData {
424                         current_stage: 1,
425                         max_stage: 1,
426                         images_checked: atomic_file_counter.load(Ordering::Relaxed) as usize,
427                         images_to_check,
428                     })
429                     .unwrap();
430                 if !progress_thread_run.load(Ordering::Relaxed) {
431                     break;
432                 }
433                 sleep(Duration::from_millis(LOOP_DURATION as u64));
434             });
435         } else {
436             progress_thread_handle = thread::spawn(|| {});
437         }
438         //// PROGRESS THREAD END
439         let mut vec_file_entry: Vec<(FileEntry, Vec<u8>)> = non_cached_files_to_check
440             .par_iter()
441             .map(|file_entry| {
442                 atomic_file_counter.fetch_add(1, Ordering::Relaxed);
443                 if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
444                     // This will not break
445                     return None;
446                 }
447                 let mut file_entry = file_entry.1.clone();
448 
449                 let image = match image::open(file_entry.path.clone()) {
450                     Ok(t) => t,
451                     Err(_inspected) => return Some(None), // Something is wrong with image
452                 };
453                 let dimensions = image.dimensions();
454 
455                 file_entry.dimensions = format!("{}x{}", dimensions.0, dimensions.1);
456 
457                 let hasher_config = HasherConfig::new().hash_size(self.hash_size as u32, self.hash_size as u32).hash_alg(self.hash_alg).resize_filter(self.image_filter);
458                 let hasher = hasher_config.to_hasher();
459 
460                 let hash = hasher.hash_image(&image);
461                 let buf: Vec<u8> = hash.as_bytes().to_vec();
462 
463                 // Images with hashes with full of 0 or 255 usually means that algorithm fails to decode them because e.g. contains a log of alpha channel
464                 {
465                     if buf.iter().all(|e| *e == 0) {
466                         return Some(None);
467                     }
468                     if buf.iter().all(|e| *e == 255) {
469                         return Some(None);
470                     }
471                 }
472 
473                 file_entry.hash = buf.clone();
474 
475                 Some(Some((file_entry, buf)))
476             })
477             .while_some()
478             .filter(|file_entry| file_entry.is_some())
479             .map(|file_entry| file_entry.unwrap())
480             .collect::<Vec<(FileEntry, Vec<u8>)>>();
481 
482         // End thread which send info to gui
483         progress_thread_run.store(false, Ordering::Relaxed);
484         progress_thread_handle.join().unwrap();
485 
486         Common::print_time(hash_map_modification, SystemTime::now(), "sort_images - reading data from files in parallel".to_string());
487         let hash_map_modification = SystemTime::now();
488 
489         // Just connect loaded results with already calculated hashes
490         for (_name, file_entry) in records_already_cached {
491             vec_file_entry.push((file_entry.clone(), file_entry.hash));
492         }
493 
494         for (file_entry, buf) in &vec_file_entry {
495             self.bktree.add(buf.clone());
496             self.image_hashes.entry(buf.clone()).or_insert_with(Vec::<FileEntry>::new);
497             self.image_hashes.get_mut(buf).unwrap().push(file_entry.clone());
498         }
499 
500         if self.use_cache {
501             // Must save all results to file, old loaded from file with all currently counted results
502             let mut all_results: BTreeMap<String, FileEntry> = loaded_hash_map;
503             for (file_entry, _hash) in vec_file_entry {
504                 all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
505             }
506             save_hashes_to_file(&all_results, &mut self.text_messages, self.hash_size, self.hash_alg, self.image_filter);
507         }
508 
509         Common::print_time(hash_map_modification, SystemTime::now(), "sort_images - saving data to files".to_string());
510         let hash_map_modification = SystemTime::now();
511 
512         let similarity: u32 = match self.similarity {
513             Similarity::Similar(k) => k,
514             _ => panic!(),
515         };
516 
517         // TODO
518         // Maybe also add here progress report
519 
520         let mut collected_similar_images: BTreeMap<Vec<u8>, Vec<FileEntry>> = Default::default();
521 
522         let mut available_hashes = self.image_hashes.clone();
523         let mut this_time_check_hashes;
524         let mut master_of_group: BTreeSet<Vec<u8>> = Default::default(); // Lista wszystkich głównych hashy, które odpowiadają za porównywanie
525 
526         // TODO optimize this for big temp_max_similarity values
527         // TODO maybe Simialar(u32) is enough instead SIMILAR_VALUES value?
528         let temp_max_similarity = match self.hash_size {
529             4 => SIMILAR_VALUES[0][5],
530             8 => SIMILAR_VALUES[1][5],
531             16 => SIMILAR_VALUES[2][5],
532             _ => panic!(),
533         };
534 
535         for current_similarity in 0..=temp_max_similarity {
536             this_time_check_hashes = available_hashes.clone();
537 
538             if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
539                 return false;
540             }
541 
542             for (hash, vec_file_entry) in this_time_check_hashes.iter() {
543                 let vector_with_found_similar_hashes = self
544                     .bktree
545                     .find(hash, similarity)
546                     .filter(|r| (r.0 == current_similarity) && !master_of_group.contains(r.1) && available_hashes.contains_key(r.1))
547                     .collect::<Vec<_>>();
548 
549                 // Not found any hash with specific distance
550                 if vector_with_found_similar_hashes.is_empty() {
551                     continue;
552                 }
553 
554                 // This one picture doesn't have similar pictures except self in similarity 0
555                 if current_similarity == 0 && vector_with_found_similar_hashes.len() == 1 {
556                     continue;
557                 }
558 
559                 // Jeśli jeszcze nie dodał, to dodaje teraz grupę główną do już obrobionych
560                 if !master_of_group.contains(hash) {
561                     master_of_group.insert(hash.clone());
562                     collected_similar_images.insert(hash.clone(), Vec::new());
563 
564                     let mut things: Vec<FileEntry> = vec_file_entry
565                         .iter()
566                         .map(|fe| FileEntry {
567                             path: fe.path.clone(),
568                             size: fe.size,
569                             dimensions: fe.dimensions.clone(),
570                             modified_date: fe.modified_date,
571                             hash: fe.hash.clone(),
572                             similarity: Similarity::Similar(0),
573                         })
574                         .collect();
575                     collected_similar_images.get_mut(hash).unwrap().append(&mut things);
576                 }
577 
578                 // Since we checked hash, we don't need to check it again
579                 if current_similarity != 0 {
580                     vector_with_found_similar_hashes.iter().for_each(|e| {
581                         let mut things: Vec<FileEntry> = available_hashes
582                             .get_mut(e.1)
583                             .unwrap()
584                             .iter()
585                             .map(|fe| FileEntry {
586                                 path: fe.path.clone(),
587                                 size: fe.size,
588                                 dimensions: fe.dimensions.clone(),
589                                 modified_date: fe.modified_date,
590                                 hash: Vec::new(),
591                                 similarity: Similarity::Similar(current_similarity),
592                             })
593                             .collect::<Vec<_>>();
594                         collected_similar_images.get_mut(hash).unwrap().append(&mut things);
595                         available_hashes.remove(e.1);
596                     });
597                 }
598             }
599         }
600 
601         self.similar_vectors = collected_similar_images.values().cloned().collect();
602 
603         Common::print_time(hash_map_modification, SystemTime::now(), "sort_images - selecting data from BtreeMap".to_string());
604 
605         // Clean unused data
606         self.image_hashes = Default::default();
607         self.images_to_check = Default::default();
608         self.bktree = BKTree::new(Hamming);
609 
610         true
611     }
612 
613     /// Set included dir which needs to be relative, exists etc.
set_included_directory(&mut self, included_directory: Vec<PathBuf>)614     pub fn set_included_directory(&mut self, included_directory: Vec<PathBuf>) {
615         self.directories.set_included_directory(included_directory, &mut self.text_messages);
616     }
617 
set_excluded_directory(&mut self, excluded_directory: Vec<PathBuf>)618     pub fn set_excluded_directory(&mut self, excluded_directory: Vec<PathBuf>) {
619         self.directories.set_excluded_directory(excluded_directory, &mut self.text_messages);
620     }
621 
set_excluded_items(&mut self, excluded_items: Vec<String>)622     pub fn set_excluded_items(&mut self, excluded_items: Vec<String>) {
623         self.excluded_items.set_excluded_items(excluded_items, &mut self.text_messages);
624     }
625 }
626 impl Default for SimilarImages {
default() -> Self627     fn default() -> Self {
628         Self::new()
629     }
630 }
631 
632 impl DebugPrint for SimilarImages {
633     #[allow(dead_code)]
634     #[allow(unreachable_code)]
debug_print(&self)635     fn debug_print(&self) {
636         #[cfg(not(debug_assertions))]
637         {
638             return;
639         }
640 
641         println!("---------------DEBUG PRINT---------------");
642         println!("Included directories - {:?}", self.directories.included_directories);
643         println!("-----------------------------------------");
644     }
645 }
646 impl SaveResults for SimilarImages {
save_results_to_file(&mut self, file_name: &str) -> bool647     fn save_results_to_file(&mut self, file_name: &str) -> bool {
648         let start_time: SystemTime = SystemTime::now();
649         let file_name: String = match file_name {
650             "" => "results.txt".to_string(),
651             k => k.to_string(),
652         };
653 
654         let file_handler = match File::create(&file_name) {
655             Ok(t) => t,
656             Err(e) => {
657                 self.text_messages.errors.push(format!("Failed to create file {}, reason {}", file_name, e));
658                 return false;
659             }
660         };
661         let mut writer = BufWriter::new(file_handler);
662 
663         if let Err(e) = writeln!(
664             writer,
665             "Results of searching {:?} with excluded directories {:?} and excluded items {:?}",
666             self.directories.included_directories, self.directories.excluded_directories, self.excluded_items.items
667         ) {
668             self.text_messages.errors.push(format!("Failed to save results to file {}, reason {}", file_name, e));
669             return false;
670         }
671 
672         if !self.similar_vectors.is_empty() {
673             write!(writer, "{} images which have similar friends\n\n", self.similar_vectors.len()).unwrap();
674 
675             for struct_similar in self.similar_vectors.iter() {
676                 writeln!(writer, "Found {} images which have similar friends", self.similar_vectors.len()).unwrap();
677                 for file_entry in struct_similar {
678                     writeln!(
679                         writer,
680                         "{} - {} - {} - {}",
681                         file_entry.path.display(),
682                         file_entry.dimensions,
683                         file_entry.size.file_size(options::BINARY).unwrap(),
684                         get_string_from_similarity(&file_entry.similarity, self.hash_size)
685                     )
686                     .unwrap();
687                 }
688                 writeln!(writer).unwrap();
689             }
690         } else {
691             write!(writer, "Not found any similar images.").unwrap();
692         }
693 
694         Common::print_time(start_time, SystemTime::now(), "save_results_to_file".to_string());
695         true
696     }
697 }
698 impl PrintResults for SimilarImages {
699     /// Prints basic info about empty folders // TODO print better
print_results(&self)700     fn print_results(&self) {
701         if !self.similar_vectors.is_empty() {
702             println!("Found {} images which have similar friends", self.similar_vectors.len());
703 
704             for vec_file_entry in &self.similar_vectors {
705                 for file_entry in vec_file_entry {
706                     println!(
707                         "{} - {} - {} - {}",
708                         file_entry.path.display(),
709                         file_entry.dimensions,
710                         file_entry.size.file_size(options::BINARY).unwrap(),
711                         get_string_from_similarity(&file_entry.similarity, self.hash_size)
712                     );
713                 }
714                 println!();
715             }
716         }
717     }
718 }
719 
save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, hash_size: u8, hash_alg: HashAlg, image_filter: FilterType)720 fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, hash_size: u8, hash_alg: HashAlg, image_filter: FilterType) {
721     if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
722         // Lin: /home/username/.cache/czkawka
723         // Win: C:\Users\Username\AppData\Local\Qarmin\Czkawka\cache
724         // Mac: /Users/Username/Library/Caches/pl.Qarmin.Czkawka
725 
726         let cache_dir = PathBuf::from(proj_dirs.cache_dir());
727         if cache_dir.exists() {
728             if !cache_dir.is_dir() {
729                 text_messages.messages.push(format!("Config dir {} is a file!", cache_dir.display()));
730                 return;
731             }
732         } else if let Err(e) = fs::create_dir_all(&cache_dir) {
733             text_messages.messages.push(format!("Cannot create config dir {}, reason {}", cache_dir.display(), e));
734             return;
735         }
736         let cache_file = cache_dir.join(get_cache_file(&hash_size, &hash_alg, &image_filter));
737         let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
738             Ok(t) => t,
739             Err(e) => {
740                 text_messages.messages.push(format!("Cannot create or open cache file {}, reason {}", cache_file.display(), e));
741                 return;
742             }
743         };
744         let mut writer = BufWriter::new(file_handler);
745 
746         for file_entry in hashmap.values() {
747             let mut string: String = String::with_capacity(100);
748 
749             string += format!("{}//{}//{}//{}//", file_entry.path.display(), file_entry.size, file_entry.dimensions, file_entry.modified_date).as_str();
750 
751             for i in 0..file_entry.hash.len() - 1 {
752                 string.push_str(file_entry.hash[i].to_string().as_str());
753                 string.push_str("//");
754             }
755             string += file_entry.hash[file_entry.hash.len() - 1].to_string().as_str();
756 
757             if let Err(e) = writeln!(writer, "{}", string) {
758                 text_messages.messages.push(format!("Failed to save some data to cache file {}, reason {}", cache_file.display(), e));
759                 return;
760             };
761         }
762     }
763 }
load_hashes_from_file(text_messages: &mut Messages, hash_size: u8, hash_alg: HashAlg, image_filter: FilterType) -> Option<BTreeMap<String, FileEntry>>764 fn load_hashes_from_file(text_messages: &mut Messages, hash_size: u8, hash_alg: HashAlg, image_filter: FilterType) -> Option<BTreeMap<String, FileEntry>> {
765     if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
766         let cache_dir = PathBuf::from(proj_dirs.cache_dir());
767         let cache_file = cache_dir.join(get_cache_file(&hash_size, &hash_alg, &image_filter));
768         let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
769             Ok(t) => t,
770             Err(_inspected) => {
771                 // text_messages.messages.push(format!("Cannot find or open cache file {}", cache_file.display())); // This shouldn't be write to output
772                 return None;
773             }
774         };
775 
776         let reader = BufReader::new(file_handler);
777 
778         let mut hashmap_loaded_entries: BTreeMap<String, FileEntry> = Default::default();
779 
780         let number_of_results: usize = hash_size as usize * hash_size as usize / 8;
781 
782         // Read the file line by line using the lines() iterator from std::io::BufRead.
783         for (index, line) in reader.lines().enumerate() {
784             let line = match line {
785                 Ok(t) => t,
786                 Err(e) => {
787                     text_messages.warnings.push(format!("Failed to load line number {} from cache file {}, reason {}", index + 1, cache_file.display(), e));
788                     return None;
789                 }
790             };
791             let uuu = line.split("//").collect::<Vec<&str>>();
792             if uuu.len() != (number_of_results + 4) {
793                 text_messages.warnings.push(format!(
794                     "Found invalid data in line {} - ({}) in cache file {}, expected {} values, found {}",
795                     index + 1,
796                     line,
797                     cache_file.display(),
798                     uuu.len(),
799                     number_of_results + 4
800                 ));
801                 continue;
802             }
803             // Don't load cache data if destination file not exists
804             if Path::new(uuu[0]).exists() {
805                 let mut hash: Vec<u8> = Vec::new();
806                 for i in 0..number_of_results {
807                     hash.push(match uuu[4 + i as usize].parse::<u8>() {
808                         Ok(t) => t,
809                         Err(e) => {
810                             text_messages
811                                 .warnings
812                                 .push(format!("Found invalid hash value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
813                             continue;
814                         }
815                     });
816                 }
817 
818                 #[cfg(debug_assertions)]
819                 {
820                     let mut have_at_least: u8 = 0;
821                     for i in hash.iter() {
822                         if *i == 0 {
823                             have_at_least += 1;
824                         }
825                     }
826                     if have_at_least == hash.len() as u8 {
827                         println!("ERROR START - {}", line);
828                         println!("have_at_least == hash.len() as u8");
829                         println!("ERROR END hash.len() - {} == have_at_least - {}", hash.len(), have_at_least);
830                         continue; // Just skip this entry, it is very very unlikelly that something have this hash, but if it has, then just ignore it
831                     }
832                 }
833 
834                 hashmap_loaded_entries.insert(
835                     uuu[0].to_string(),
836                     FileEntry {
837                         path: PathBuf::from(uuu[0]),
838                         size: match uuu[1].parse::<u64>() {
839                             Ok(t) => t,
840                             Err(e) => {
841                                 text_messages
842                                     .warnings
843                                     .push(format!("Found invalid size value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
844                                 continue;
845                             }
846                         },
847                         dimensions: uuu[2].to_string(),
848                         modified_date: match uuu[3].parse::<u64>() {
849                             Ok(t) => t,
850                             Err(e) => {
851                                 text_messages
852                                     .warnings
853                                     .push(format!("Found invalid modified date value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
854                                 continue;
855                             }
856                         },
857                         hash,
858                         similarity: Similarity::None,
859                     },
860                 );
861             }
862         }
863 
864         return Some(hashmap_loaded_entries);
865     }
866 
867     text_messages.messages.push("Cannot find or open system config dir to save cache file".to_string());
868     None
869 }
870 
get_cache_file(hash_size: &u8, hash_alg: &HashAlg, image_filter: &FilterType) -> String871 fn get_cache_file(hash_size: &u8, hash_alg: &HashAlg, image_filter: &FilterType) -> String {
872     format!("cache_similar_images_{}_{}_{}.txt", hash_size, convert_algorithm_to_string(hash_alg), convert_filters_to_string(image_filter))
873 }
874 
get_string_from_similarity(similarity: &Similarity, hash_size: u8) -> String875 pub fn get_string_from_similarity(similarity: &Similarity, hash_size: u8) -> String {
876     let index_preset = match hash_size {
877         4 => 0,
878         8 => 1,
879         16 => 2,
880         _ => panic!(),
881     };
882 
883     match similarity {
884         Similarity::None => {
885             panic!()
886         }
887         Similarity::Similar(h) => {
888             #[cfg(debug_assertions)]
889             {
890                 if *h <= SIMILAR_VALUES[index_preset][0] {
891                     format!("Very High {}", *h)
892                 } else if *h <= SIMILAR_VALUES[index_preset][1] {
893                     format!("High {}", *h)
894                 } else if *h <= SIMILAR_VALUES[index_preset][2] {
895                     format!("Medium {}", *h)
896                 } else if *h <= SIMILAR_VALUES[index_preset][3] {
897                     format!("Small {}", *h)
898                 } else if *h <= SIMILAR_VALUES[index_preset][4] {
899                     format!("Very Small {}", *h)
900                 } else if *h <= SIMILAR_VALUES[index_preset][5] {
901                     format!("Minimal {}", *h)
902                 } else {
903                     panic!();
904                 }
905             }
906             #[cfg(not(debug_assertions))]
907             {
908                 if *h <= SIMILAR_VALUES[index_preset][0] {
909                     format!("Very High")
910                 } else if *h <= SIMILAR_VALUES[index_preset][1] {
911                     format!("High")
912                 } else if *h <= SIMILAR_VALUES[index_preset][2] {
913                     format!("Medium")
914                 } else if *h <= SIMILAR_VALUES[index_preset][3] {
915                     format!("Small")
916                 } else if *h <= SIMILAR_VALUES[index_preset][4] {
917                     format!("Very Small")
918                 } else if *h <= SIMILAR_VALUES[index_preset][5] {
919                     format!("Minimal")
920                 } else {
921                     panic!();
922                 }
923             }
924         }
925     }
926 }
927 
return_similarity_from_similarity_preset(similarity_preset: &SimilarityPreset, hash_size: u8) -> Similarity928 pub fn return_similarity_from_similarity_preset(similarity_preset: &SimilarityPreset, hash_size: u8) -> Similarity {
929     let index_preset = match hash_size {
930         4 => 0,
931         8 => 1,
932         16 => 2,
933         _ => panic!(),
934     };
935     match similarity_preset {
936         SimilarityPreset::VeryHigh => Similarity::Similar(SIMILAR_VALUES[index_preset][0]),
937         SimilarityPreset::High => Similarity::Similar(SIMILAR_VALUES[index_preset][1]),
938         SimilarityPreset::Medium => Similarity::Similar(SIMILAR_VALUES[index_preset][2]),
939         SimilarityPreset::Small => Similarity::Similar(SIMILAR_VALUES[index_preset][3]),
940         SimilarityPreset::VerySmall => Similarity::Similar(SIMILAR_VALUES[index_preset][4]),
941         SimilarityPreset::Minimal => Similarity::Similar(SIMILAR_VALUES[index_preset][5]),
942         SimilarityPreset::None => panic!(""),
943     }
944 }
945 
convert_filters_to_string(image_filter: &FilterType) -> String946 fn convert_filters_to_string(image_filter: &FilterType) -> String {
947     match image_filter {
948         FilterType::Lanczos3 => "Lanczos3",
949         FilterType::Nearest => "Nearest",
950         FilterType::Triangle => "Triangle",
951         FilterType::Gaussian => "Gaussian",
952         FilterType::CatmullRom => "CatmullRom",
953     }
954     .to_string()
955 }
956 
convert_algorithm_to_string(hash_alg: &HashAlg) -> String957 fn convert_algorithm_to_string(hash_alg: &HashAlg) -> String {
958     match hash_alg {
959         HashAlg::Mean => "Mean",
960         HashAlg::Gradient => "Gradient",
961         HashAlg::Blockhash => "Blockhash",
962         HashAlg::VertGradient => "VertGradient",
963         HashAlg::DoubleGradient => "DoubleGradient",
964         HashAlg::__Nonexhaustive => panic!(),
965     }
966     .to_string()
967 }
968 
test_image_conversion_speed()969 pub fn test_image_conversion_speed() {
970     let file_name: &str = "test.jpg";
971     let file_path = Path::new(file_name);
972     match image::open(file_path) {
973         Ok(img_open) => {
974             for alg in [HashAlg::Blockhash, HashAlg::Gradient, HashAlg::DoubleGradient, HashAlg::VertGradient, HashAlg::Mean] {
975                 for filter in [FilterType::Lanczos3, FilterType::CatmullRom, FilterType::Gaussian, FilterType::Nearest, FilterType::Triangle] {
976                     for size in [2, 4, 8, 16, 32, 64] {
977                         let hasher_config = HasherConfig::new().hash_alg(alg).resize_filter(filter).hash_size(size, size);
978 
979                         let start = SystemTime::now();
980 
981                         let hasher = hasher_config.to_hasher();
982                         let _hash = hasher.hash_image(&img_open);
983 
984                         let end = SystemTime::now();
985 
986                         println!("{:?} us {:?} {:?} {}x{}", end.duration_since(start).unwrap().as_micros(), alg, filter, size, size);
987                     }
988                 }
989             }
990         }
991         Err(e) => {
992             println!(
993                 "Failed to open test file {}, reason {}",
994                 match file_path.canonicalize() {
995                     Ok(t) => t.to_string_lossy().to_string(),
996                     Err(_inspected) => file_name.to_string(),
997                 },
998                 e
999             );
1000         }
1001     }
1002 }
1003