1 use crate::common::Common;
2 use crate::common_directory::Directories;
3 use crate::common_items::ExcludedItems;
4 use crate::common_messages::Messages;
5 use crate::common_traits::{DebugPrint, PrintResults, SaveResults};
6 use bk_tree::BKTree;
7 use crossbeam_channel::Receiver;
8 use directories_next::ProjectDirs;
9 use humansize::{file_size_opts as options, FileSize};
10 use image::GenericImageView;
11 use img_hash::{FilterType, HashAlg, HasherConfig};
12 use rayon::prelude::*;
13 use std::collections::{BTreeMap, BTreeSet};
14 use std::fs::OpenOptions;
15 use std::fs::{File, Metadata};
16 use std::io::Write;
17 use std::io::*;
18 use std::path::{Path, PathBuf};
19 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
20 use std::sync::Arc;
21 use std::thread::sleep;
22 use std::time::{Duration, SystemTime, UNIX_EPOCH};
23 use std::{fs, mem, thread};
24
25 // TODO check for better values
26 pub const SIMILAR_VALUES: [[u32; 6]; 3] = [
27 [0, 1, 2, 3, 4, 5], // 4 - Max 16
28 [0, 2, 5, 7, 14, 20], // 8 - Max 256
29 [2, 5, 10, 20, 40, 80], // 16 - Max 65536
30 ];
31
32 #[derive(Debug)]
33 pub struct ProgressData {
34 pub current_stage: u8,
35 pub max_stage: u8,
36 pub images_checked: usize,
37 pub images_to_check: usize,
38 }
39
40 #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
41 pub enum Similarity {
42 None,
43 Similar(u32),
44 }
45
46 #[derive(Clone, Debug)]
47 pub struct FileEntry {
48 pub path: PathBuf,
49 pub size: u64,
50 pub dimensions: String,
51 pub modified_date: u64,
52 pub hash: Vec<u8>,
53 pub similarity: Similarity,
54 }
55
56 // This is used by CLI tool when we cann
57 #[derive(Clone, Debug)]
58 pub enum SimilarityPreset {
59 VeryHigh,
60 High,
61 Medium,
62 Small,
63 VerySmall,
64 Minimal,
65 None,
66 }
67
68 /// Distance metric to use with the BK-tree.
69 struct Hamming;
70
71 impl bk_tree::Metric<Vec<u8>> for Hamming {
distance(&self, a: &Vec<u8>, b: &Vec<u8>) -> u3272 fn distance(&self, a: &Vec<u8>, b: &Vec<u8>) -> u32 {
73 hamming::distance_fast(a, b).unwrap() as u32
74 }
75
threshold_distance(&self, a: &Vec<u8>, b: &Vec<u8>, _threshold: u32) -> Option<u32>76 fn threshold_distance(&self, a: &Vec<u8>, b: &Vec<u8>, _threshold: u32) -> Option<u32> {
77 Some(self.distance(a, b))
78 }
79 }
80
81 /// Struct to store most basics info about all folder
82 pub struct SimilarImages {
83 information: Info,
84 text_messages: Messages,
85 directories: Directories,
86 excluded_items: ExcludedItems,
87 bktree: BKTree<Vec<u8>, Hamming>,
88 similar_vectors: Vec<Vec<FileEntry>>,
89 recursive_search: bool,
90 minimal_file_size: u64,
91 maximal_file_size: u64,
92 image_hashes: BTreeMap<Vec<u8>, Vec<FileEntry>>, // Hashmap with image hashes and Vector with names of files
93 stopped_search: bool,
94 similarity: Similarity,
95 images_to_check: BTreeMap<String, FileEntry>,
96 hash_size: u8,
97 hash_alg: HashAlg,
98 image_filter: FilterType,
99 use_cache: bool,
100 }
101
102 /// Info struck with helpful information's about results
103 #[derive(Default)]
104 pub struct Info {
105 pub number_of_removed_files: usize,
106 pub number_of_failed_to_remove_files: usize,
107 pub gained_space: u64,
108 }
109 impl Info {
new() -> Self110 pub fn new() -> Self {
111 Default::default()
112 }
113 }
114
115 /// Method implementation for EmptyFolder
116 impl SimilarImages {
117 /// New function providing basics values
new() -> Self118 pub fn new() -> Self {
119 Self {
120 information: Default::default(),
121 text_messages: Messages::new(),
122 directories: Directories::new(),
123 excluded_items: Default::default(),
124 bktree: BKTree::new(Hamming),
125 similar_vectors: vec![],
126 recursive_search: true,
127 minimal_file_size: 1024 * 16, // 16 KB should be enough to exclude too small images from search
128 maximal_file_size: u64::MAX,
129 image_hashes: Default::default(),
130 stopped_search: false,
131 similarity: Similarity::Similar(1),
132 images_to_check: Default::default(),
133 hash_size: 8,
134 hash_alg: HashAlg::Gradient,
135 image_filter: FilterType::Lanczos3,
136 use_cache: true,
137 }
138 }
139
set_hash_size(&mut self, hash_size: u8)140 pub fn set_hash_size(&mut self, hash_size: u8) {
141 self.hash_size = match hash_size {
142 4 | 8 | 16 => hash_size,
143 e => {
144 panic!("Invalid value of hash size {}", e);
145 }
146 }
147 }
148
set_hash_alg(&mut self, hash_alg: HashAlg)149 pub fn set_hash_alg(&mut self, hash_alg: HashAlg) {
150 self.hash_alg = hash_alg;
151 }
152
set_image_filter(&mut self, image_filter: FilterType)153 pub fn set_image_filter(&mut self, image_filter: FilterType) {
154 self.image_filter = image_filter;
155 }
156
get_stopped_search(&self) -> bool157 pub fn get_stopped_search(&self) -> bool {
158 self.stopped_search
159 }
160
get_text_messages(&self) -> &Messages161 pub const fn get_text_messages(&self) -> &Messages {
162 &self.text_messages
163 }
164
get_similar_images(&self) -> &Vec<Vec<FileEntry>>165 pub const fn get_similar_images(&self) -> &Vec<Vec<FileEntry>> {
166 &self.similar_vectors
167 }
168
get_information(&self) -> &Info169 pub const fn get_information(&self) -> &Info {
170 &self.information
171 }
172
set_use_cache(&mut self, use_cache: bool)173 pub fn set_use_cache(&mut self, use_cache: bool) {
174 self.use_cache = use_cache;
175 }
176
set_recursive_search(&mut self, recursive_search: bool)177 pub fn set_recursive_search(&mut self, recursive_search: bool) {
178 self.recursive_search = recursive_search;
179 }
180
set_minimal_file_size(&mut self, minimal_file_size: u64)181 pub fn set_minimal_file_size(&mut self, minimal_file_size: u64) {
182 self.minimal_file_size = match minimal_file_size {
183 0 => 1,
184 t => t,
185 };
186 }
set_maximal_file_size(&mut self, maximal_file_size: u64)187 pub fn set_maximal_file_size(&mut self, maximal_file_size: u64) {
188 self.maximal_file_size = match maximal_file_size {
189 0 => 1,
190 t => t,
191 };
192 }
set_similarity(&mut self, similarity: Similarity)193 pub fn set_similarity(&mut self, similarity: Similarity) {
194 self.similarity = similarity;
195 }
196
197 /// Public function used by CLI to search for empty folders
find_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>)198 pub fn find_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) {
199 self.directories.optimize_directories(true, &mut self.text_messages);
200 if !self.check_for_similar_images(stop_receiver, progress_sender) {
201 self.stopped_search = true;
202 return;
203 }
204 if !self.sort_images(stop_receiver, progress_sender) {
205 self.stopped_search = true;
206 return;
207 }
208 // if self.delete_folders {
209 // self.delete_empty_folders();
210 // }
211 self.debug_print();
212 }
213
214 // pub fn set_delete_folder(&mut self, delete_folder: bool) {
215 // self.delete_folders = delete_folder;
216 // }
217
218 /// Function to check if folder are empty.
219 /// Parameter initial_checking for second check before deleting to be sure that checked folder is still empty
check_for_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool220 fn check_for_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
221 let start_time: SystemTime = SystemTime::now();
222 let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
223
224 // Add root folders for finding
225 for id in &self.directories.included_directories {
226 folders_to_check.push(id.clone());
227 }
228
229 //// PROGRESS THREAD START
230 const LOOP_DURATION: u32 = 200; //in ms
231 let progress_thread_run = Arc::new(AtomicBool::new(true));
232
233 let atomic_file_counter = Arc::new(AtomicUsize::new(0));
234
235 let progress_thread_handle;
236 if let Some(progress_sender) = progress_sender {
237 let progress_send = progress_sender.clone();
238 let progress_thread_run = progress_thread_run.clone();
239 let atomic_file_counter = atomic_file_counter.clone();
240 progress_thread_handle = thread::spawn(move || loop {
241 progress_send
242 .unbounded_send(ProgressData {
243 current_stage: 0,
244 max_stage: 1,
245 images_checked: atomic_file_counter.load(Ordering::Relaxed) as usize,
246 images_to_check: 0,
247 })
248 .unwrap();
249 if !progress_thread_run.load(Ordering::Relaxed) {
250 break;
251 }
252 sleep(Duration::from_millis(LOOP_DURATION as u64));
253 });
254 } else {
255 progress_thread_handle = thread::spawn(|| {});
256 }
257 //// PROGRESS THREAD END
258
259 while !folders_to_check.is_empty() {
260 if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
261 // End thread which send info to gui
262 progress_thread_run.store(false, Ordering::Relaxed);
263 progress_thread_handle.join().unwrap();
264 return false;
265 }
266 let current_folder = folders_to_check.pop().unwrap();
267
268 // Read current dir, if permission are denied just go to next
269 let read_dir = match fs::read_dir(¤t_folder) {
270 Ok(t) => t,
271 Err(e) => {
272 self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e));
273 continue;
274 } // Permissions denied
275 };
276
277 // Check every sub folder/file/link etc.
278 'dir: for entry in read_dir {
279 let entry_data = match entry {
280 Ok(t) => t,
281 Err(e) => {
282 self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e));
283 continue;
284 } //Permissions denied
285 };
286 let metadata: Metadata = match entry_data.metadata() {
287 Ok(t) => t,
288 Err(e) => {
289 self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e));
290 continue;
291 } //Permissions denied
292 };
293 if metadata.is_dir() {
294 if !self.recursive_search {
295 continue;
296 }
297
298 let next_folder = current_folder.join(entry_data.file_name());
299 if self.directories.is_excluded(&next_folder) {
300 continue 'dir;
301 }
302
303 if self.excluded_items.is_excluded(&next_folder) {
304 continue 'dir;
305 }
306
307 folders_to_check.push(next_folder);
308 } else if metadata.is_file() {
309 atomic_file_counter.fetch_add(1, Ordering::Relaxed);
310
311 let file_name_lowercase: String = match entry_data.file_name().into_string() {
312 Ok(t) => t,
313 Err(_inspected) => {
314 println!("File {:?} has not valid UTF-8 name", entry_data);
315 continue 'dir;
316 }
317 }
318 .to_lowercase();
319
320 // Checking allowed image extensions
321 let allowed_image_extensions = [".jpg", ".jpeg", ".png" /*, ".bmp"*/, ".tiff", ".tif", ".tga", ".ff" /*, ".gif"*/, ".jif", ".jfi" /*, ".webp"*/]; // webp cannot be seen in preview, gif needs to be enabled after releasing image crate 0.24.0, bmp needs to be fixed in image crate
322 if !allowed_image_extensions.iter().any(|e| file_name_lowercase.ends_with(e)) {
323 continue 'dir;
324 }
325
326 // Checking files
327 if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) {
328 let current_file_name = current_folder.join(entry_data.file_name());
329 if self.excluded_items.is_excluded(¤t_file_name) {
330 continue 'dir;
331 }
332
333 let fe: FileEntry = FileEntry {
334 path: current_file_name.clone(),
335 size: metadata.len(),
336 dimensions: "".to_string(),
337 modified_date: match metadata.modified() {
338 Ok(t) => match t.duration_since(UNIX_EPOCH) {
339 Ok(d) => d.as_secs(),
340 Err(_inspected) => {
341 self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display()));
342 0
343 }
344 },
345 Err(e) => {
346 self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e));
347 0
348 } // Permissions Denied
349 },
350
351 hash: Vec::new(),
352 similarity: Similarity::None,
353 };
354
355 self.images_to_check.insert(current_file_name.to_string_lossy().to_string(), fe);
356 }
357 }
358 }
359 }
360 // End thread which send info to gui
361 progress_thread_run.store(false, Ordering::Relaxed);
362 progress_thread_handle.join().unwrap();
363 Common::print_time(start_time, SystemTime::now(), "check_for_similar_images".to_string());
364 true
365 }
366
367 // Cache algorithm:
368 // - Load data from file
369 // - Remove from data to search this already loaded entries(size of image must match)
370 // - Check hash of files which doesn't have saved entry
371 // - Join already read hashes with hashes which were read from file
372 // - Join all hashes and save it to file
373
sort_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool374 fn sort_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
375 let hash_map_modification = SystemTime::now();
376
377 let loaded_hash_map;
378
379 let mut records_already_cached: BTreeMap<String, FileEntry> = Default::default();
380 let mut non_cached_files_to_check: BTreeMap<String, FileEntry> = Default::default();
381
382 if self.use_cache {
383 loaded_hash_map = match load_hashes_from_file(&mut self.text_messages, self.hash_size, self.hash_alg, self.image_filter) {
384 Some(t) => t,
385 None => Default::default(),
386 };
387
388 for (name, file_entry) in &self.images_to_check {
389 #[allow(clippy::if_same_then_else)]
390 if !loaded_hash_map.contains_key(name) {
391 // If loaded data doesn't contains current image info
392 non_cached_files_to_check.insert(name.clone(), file_entry.clone());
393 } else if file_entry.size != loaded_hash_map.get(name).unwrap().size || file_entry.modified_date != loaded_hash_map.get(name).unwrap().modified_date {
394 // When size or modification date of image changed, then it is clear that is different image
395 non_cached_files_to_check.insert(name.clone(), file_entry.clone());
396 } else {
397 // Checking may be omitted when already there is entry with same size and modification date
398 records_already_cached.insert(name.clone(), loaded_hash_map.get(name).unwrap().clone());
399 }
400 }
401 } else {
402 loaded_hash_map = Default::default();
403 mem::swap(&mut self.images_to_check, &mut non_cached_files_to_check);
404 }
405
406 Common::print_time(hash_map_modification, SystemTime::now(), "sort_images - reading data from cache and preparing them".to_string());
407 let hash_map_modification = SystemTime::now();
408
409 //// PROGRESS THREAD START
410 const LOOP_DURATION: u32 = 200; //in ms
411 let progress_thread_run = Arc::new(AtomicBool::new(true));
412
413 let atomic_file_counter = Arc::new(AtomicUsize::new(0));
414
415 let progress_thread_handle;
416 if let Some(progress_sender) = progress_sender {
417 let progress_send = progress_sender.clone();
418 let progress_thread_run = progress_thread_run.clone();
419 let atomic_file_counter = atomic_file_counter.clone();
420 let images_to_check = non_cached_files_to_check.len();
421 progress_thread_handle = thread::spawn(move || loop {
422 progress_send
423 .unbounded_send(ProgressData {
424 current_stage: 1,
425 max_stage: 1,
426 images_checked: atomic_file_counter.load(Ordering::Relaxed) as usize,
427 images_to_check,
428 })
429 .unwrap();
430 if !progress_thread_run.load(Ordering::Relaxed) {
431 break;
432 }
433 sleep(Duration::from_millis(LOOP_DURATION as u64));
434 });
435 } else {
436 progress_thread_handle = thread::spawn(|| {});
437 }
438 //// PROGRESS THREAD END
439 let mut vec_file_entry: Vec<(FileEntry, Vec<u8>)> = non_cached_files_to_check
440 .par_iter()
441 .map(|file_entry| {
442 atomic_file_counter.fetch_add(1, Ordering::Relaxed);
443 if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
444 // This will not break
445 return None;
446 }
447 let mut file_entry = file_entry.1.clone();
448
449 let image = match image::open(file_entry.path.clone()) {
450 Ok(t) => t,
451 Err(_inspected) => return Some(None), // Something is wrong with image
452 };
453 let dimensions = image.dimensions();
454
455 file_entry.dimensions = format!("{}x{}", dimensions.0, dimensions.1);
456
457 let hasher_config = HasherConfig::new().hash_size(self.hash_size as u32, self.hash_size as u32).hash_alg(self.hash_alg).resize_filter(self.image_filter);
458 let hasher = hasher_config.to_hasher();
459
460 let hash = hasher.hash_image(&image);
461 let buf: Vec<u8> = hash.as_bytes().to_vec();
462
463 // Images with hashes with full of 0 or 255 usually means that algorithm fails to decode them because e.g. contains a log of alpha channel
464 {
465 if buf.iter().all(|e| *e == 0) {
466 return Some(None);
467 }
468 if buf.iter().all(|e| *e == 255) {
469 return Some(None);
470 }
471 }
472
473 file_entry.hash = buf.clone();
474
475 Some(Some((file_entry, buf)))
476 })
477 .while_some()
478 .filter(|file_entry| file_entry.is_some())
479 .map(|file_entry| file_entry.unwrap())
480 .collect::<Vec<(FileEntry, Vec<u8>)>>();
481
482 // End thread which send info to gui
483 progress_thread_run.store(false, Ordering::Relaxed);
484 progress_thread_handle.join().unwrap();
485
486 Common::print_time(hash_map_modification, SystemTime::now(), "sort_images - reading data from files in parallel".to_string());
487 let hash_map_modification = SystemTime::now();
488
489 // Just connect loaded results with already calculated hashes
490 for (_name, file_entry) in records_already_cached {
491 vec_file_entry.push((file_entry.clone(), file_entry.hash));
492 }
493
494 for (file_entry, buf) in &vec_file_entry {
495 self.bktree.add(buf.clone());
496 self.image_hashes.entry(buf.clone()).or_insert_with(Vec::<FileEntry>::new);
497 self.image_hashes.get_mut(buf).unwrap().push(file_entry.clone());
498 }
499
500 if self.use_cache {
501 // Must save all results to file, old loaded from file with all currently counted results
502 let mut all_results: BTreeMap<String, FileEntry> = loaded_hash_map;
503 for (file_entry, _hash) in vec_file_entry {
504 all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
505 }
506 save_hashes_to_file(&all_results, &mut self.text_messages, self.hash_size, self.hash_alg, self.image_filter);
507 }
508
509 Common::print_time(hash_map_modification, SystemTime::now(), "sort_images - saving data to files".to_string());
510 let hash_map_modification = SystemTime::now();
511
512 let similarity: u32 = match self.similarity {
513 Similarity::Similar(k) => k,
514 _ => panic!(),
515 };
516
517 // TODO
518 // Maybe also add here progress report
519
520 let mut collected_similar_images: BTreeMap<Vec<u8>, Vec<FileEntry>> = Default::default();
521
522 let mut available_hashes = self.image_hashes.clone();
523 let mut this_time_check_hashes;
524 let mut master_of_group: BTreeSet<Vec<u8>> = Default::default(); // Lista wszystkich głównych hashy, które odpowiadają za porównywanie
525
526 // TODO optimize this for big temp_max_similarity values
527 // TODO maybe Simialar(u32) is enough instead SIMILAR_VALUES value?
528 let temp_max_similarity = match self.hash_size {
529 4 => SIMILAR_VALUES[0][5],
530 8 => SIMILAR_VALUES[1][5],
531 16 => SIMILAR_VALUES[2][5],
532 _ => panic!(),
533 };
534
535 for current_similarity in 0..=temp_max_similarity {
536 this_time_check_hashes = available_hashes.clone();
537
538 if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
539 return false;
540 }
541
542 for (hash, vec_file_entry) in this_time_check_hashes.iter() {
543 let vector_with_found_similar_hashes = self
544 .bktree
545 .find(hash, similarity)
546 .filter(|r| (r.0 == current_similarity) && !master_of_group.contains(r.1) && available_hashes.contains_key(r.1))
547 .collect::<Vec<_>>();
548
549 // Not found any hash with specific distance
550 if vector_with_found_similar_hashes.is_empty() {
551 continue;
552 }
553
554 // This one picture doesn't have similar pictures except self in similarity 0
555 if current_similarity == 0 && vector_with_found_similar_hashes.len() == 1 {
556 continue;
557 }
558
559 // Jeśli jeszcze nie dodał, to dodaje teraz grupę główną do już obrobionych
560 if !master_of_group.contains(hash) {
561 master_of_group.insert(hash.clone());
562 collected_similar_images.insert(hash.clone(), Vec::new());
563
564 let mut things: Vec<FileEntry> = vec_file_entry
565 .iter()
566 .map(|fe| FileEntry {
567 path: fe.path.clone(),
568 size: fe.size,
569 dimensions: fe.dimensions.clone(),
570 modified_date: fe.modified_date,
571 hash: fe.hash.clone(),
572 similarity: Similarity::Similar(0),
573 })
574 .collect();
575 collected_similar_images.get_mut(hash).unwrap().append(&mut things);
576 }
577
578 // Since we checked hash, we don't need to check it again
579 if current_similarity != 0 {
580 vector_with_found_similar_hashes.iter().for_each(|e| {
581 let mut things: Vec<FileEntry> = available_hashes
582 .get_mut(e.1)
583 .unwrap()
584 .iter()
585 .map(|fe| FileEntry {
586 path: fe.path.clone(),
587 size: fe.size,
588 dimensions: fe.dimensions.clone(),
589 modified_date: fe.modified_date,
590 hash: Vec::new(),
591 similarity: Similarity::Similar(current_similarity),
592 })
593 .collect::<Vec<_>>();
594 collected_similar_images.get_mut(hash).unwrap().append(&mut things);
595 available_hashes.remove(e.1);
596 });
597 }
598 }
599 }
600
601 self.similar_vectors = collected_similar_images.values().cloned().collect();
602
603 Common::print_time(hash_map_modification, SystemTime::now(), "sort_images - selecting data from BtreeMap".to_string());
604
605 // Clean unused data
606 self.image_hashes = Default::default();
607 self.images_to_check = Default::default();
608 self.bktree = BKTree::new(Hamming);
609
610 true
611 }
612
613 /// Set included dir which needs to be relative, exists etc.
set_included_directory(&mut self, included_directory: Vec<PathBuf>)614 pub fn set_included_directory(&mut self, included_directory: Vec<PathBuf>) {
615 self.directories.set_included_directory(included_directory, &mut self.text_messages);
616 }
617
set_excluded_directory(&mut self, excluded_directory: Vec<PathBuf>)618 pub fn set_excluded_directory(&mut self, excluded_directory: Vec<PathBuf>) {
619 self.directories.set_excluded_directory(excluded_directory, &mut self.text_messages);
620 }
621
set_excluded_items(&mut self, excluded_items: Vec<String>)622 pub fn set_excluded_items(&mut self, excluded_items: Vec<String>) {
623 self.excluded_items.set_excluded_items(excluded_items, &mut self.text_messages);
624 }
625 }
626 impl Default for SimilarImages {
default() -> Self627 fn default() -> Self {
628 Self::new()
629 }
630 }
631
632 impl DebugPrint for SimilarImages {
633 #[allow(dead_code)]
634 #[allow(unreachable_code)]
debug_print(&self)635 fn debug_print(&self) {
636 #[cfg(not(debug_assertions))]
637 {
638 return;
639 }
640
641 println!("---------------DEBUG PRINT---------------");
642 println!("Included directories - {:?}", self.directories.included_directories);
643 println!("-----------------------------------------");
644 }
645 }
646 impl SaveResults for SimilarImages {
save_results_to_file(&mut self, file_name: &str) -> bool647 fn save_results_to_file(&mut self, file_name: &str) -> bool {
648 let start_time: SystemTime = SystemTime::now();
649 let file_name: String = match file_name {
650 "" => "results.txt".to_string(),
651 k => k.to_string(),
652 };
653
654 let file_handler = match File::create(&file_name) {
655 Ok(t) => t,
656 Err(e) => {
657 self.text_messages.errors.push(format!("Failed to create file {}, reason {}", file_name, e));
658 return false;
659 }
660 };
661 let mut writer = BufWriter::new(file_handler);
662
663 if let Err(e) = writeln!(
664 writer,
665 "Results of searching {:?} with excluded directories {:?} and excluded items {:?}",
666 self.directories.included_directories, self.directories.excluded_directories, self.excluded_items.items
667 ) {
668 self.text_messages.errors.push(format!("Failed to save results to file {}, reason {}", file_name, e));
669 return false;
670 }
671
672 if !self.similar_vectors.is_empty() {
673 write!(writer, "{} images which have similar friends\n\n", self.similar_vectors.len()).unwrap();
674
675 for struct_similar in self.similar_vectors.iter() {
676 writeln!(writer, "Found {} images which have similar friends", self.similar_vectors.len()).unwrap();
677 for file_entry in struct_similar {
678 writeln!(
679 writer,
680 "{} - {} - {} - {}",
681 file_entry.path.display(),
682 file_entry.dimensions,
683 file_entry.size.file_size(options::BINARY).unwrap(),
684 get_string_from_similarity(&file_entry.similarity, self.hash_size)
685 )
686 .unwrap();
687 }
688 writeln!(writer).unwrap();
689 }
690 } else {
691 write!(writer, "Not found any similar images.").unwrap();
692 }
693
694 Common::print_time(start_time, SystemTime::now(), "save_results_to_file".to_string());
695 true
696 }
697 }
698 impl PrintResults for SimilarImages {
699 /// Prints basic info about empty folders // TODO print better
print_results(&self)700 fn print_results(&self) {
701 if !self.similar_vectors.is_empty() {
702 println!("Found {} images which have similar friends", self.similar_vectors.len());
703
704 for vec_file_entry in &self.similar_vectors {
705 for file_entry in vec_file_entry {
706 println!(
707 "{} - {} - {} - {}",
708 file_entry.path.display(),
709 file_entry.dimensions,
710 file_entry.size.file_size(options::BINARY).unwrap(),
711 get_string_from_similarity(&file_entry.similarity, self.hash_size)
712 );
713 }
714 println!();
715 }
716 }
717 }
718 }
719
save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, hash_size: u8, hash_alg: HashAlg, image_filter: FilterType)720 fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, hash_size: u8, hash_alg: HashAlg, image_filter: FilterType) {
721 if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
722 // Lin: /home/username/.cache/czkawka
723 // Win: C:\Users\Username\AppData\Local\Qarmin\Czkawka\cache
724 // Mac: /Users/Username/Library/Caches/pl.Qarmin.Czkawka
725
726 let cache_dir = PathBuf::from(proj_dirs.cache_dir());
727 if cache_dir.exists() {
728 if !cache_dir.is_dir() {
729 text_messages.messages.push(format!("Config dir {} is a file!", cache_dir.display()));
730 return;
731 }
732 } else if let Err(e) = fs::create_dir_all(&cache_dir) {
733 text_messages.messages.push(format!("Cannot create config dir {}, reason {}", cache_dir.display(), e));
734 return;
735 }
736 let cache_file = cache_dir.join(get_cache_file(&hash_size, &hash_alg, &image_filter));
737 let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
738 Ok(t) => t,
739 Err(e) => {
740 text_messages.messages.push(format!("Cannot create or open cache file {}, reason {}", cache_file.display(), e));
741 return;
742 }
743 };
744 let mut writer = BufWriter::new(file_handler);
745
746 for file_entry in hashmap.values() {
747 let mut string: String = String::with_capacity(100);
748
749 string += format!("{}//{}//{}//{}//", file_entry.path.display(), file_entry.size, file_entry.dimensions, file_entry.modified_date).as_str();
750
751 for i in 0..file_entry.hash.len() - 1 {
752 string.push_str(file_entry.hash[i].to_string().as_str());
753 string.push_str("//");
754 }
755 string += file_entry.hash[file_entry.hash.len() - 1].to_string().as_str();
756
757 if let Err(e) = writeln!(writer, "{}", string) {
758 text_messages.messages.push(format!("Failed to save some data to cache file {}, reason {}", cache_file.display(), e));
759 return;
760 };
761 }
762 }
763 }
load_hashes_from_file(text_messages: &mut Messages, hash_size: u8, hash_alg: HashAlg, image_filter: FilterType) -> Option<BTreeMap<String, FileEntry>>764 fn load_hashes_from_file(text_messages: &mut Messages, hash_size: u8, hash_alg: HashAlg, image_filter: FilterType) -> Option<BTreeMap<String, FileEntry>> {
765 if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
766 let cache_dir = PathBuf::from(proj_dirs.cache_dir());
767 let cache_file = cache_dir.join(get_cache_file(&hash_size, &hash_alg, &image_filter));
768 let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
769 Ok(t) => t,
770 Err(_inspected) => {
771 // text_messages.messages.push(format!("Cannot find or open cache file {}", cache_file.display())); // This shouldn't be write to output
772 return None;
773 }
774 };
775
776 let reader = BufReader::new(file_handler);
777
778 let mut hashmap_loaded_entries: BTreeMap<String, FileEntry> = Default::default();
779
780 let number_of_results: usize = hash_size as usize * hash_size as usize / 8;
781
782 // Read the file line by line using the lines() iterator from std::io::BufRead.
783 for (index, line) in reader.lines().enumerate() {
784 let line = match line {
785 Ok(t) => t,
786 Err(e) => {
787 text_messages.warnings.push(format!("Failed to load line number {} from cache file {}, reason {}", index + 1, cache_file.display(), e));
788 return None;
789 }
790 };
791 let uuu = line.split("//").collect::<Vec<&str>>();
792 if uuu.len() != (number_of_results + 4) {
793 text_messages.warnings.push(format!(
794 "Found invalid data in line {} - ({}) in cache file {}, expected {} values, found {}",
795 index + 1,
796 line,
797 cache_file.display(),
798 uuu.len(),
799 number_of_results + 4
800 ));
801 continue;
802 }
803 // Don't load cache data if destination file not exists
804 if Path::new(uuu[0]).exists() {
805 let mut hash: Vec<u8> = Vec::new();
806 for i in 0..number_of_results {
807 hash.push(match uuu[4 + i as usize].parse::<u8>() {
808 Ok(t) => t,
809 Err(e) => {
810 text_messages
811 .warnings
812 .push(format!("Found invalid hash value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
813 continue;
814 }
815 });
816 }
817
818 #[cfg(debug_assertions)]
819 {
820 let mut have_at_least: u8 = 0;
821 for i in hash.iter() {
822 if *i == 0 {
823 have_at_least += 1;
824 }
825 }
826 if have_at_least == hash.len() as u8 {
827 println!("ERROR START - {}", line);
828 println!("have_at_least == hash.len() as u8");
829 println!("ERROR END hash.len() - {} == have_at_least - {}", hash.len(), have_at_least);
830 continue; // Just skip this entry, it is very very unlikelly that something have this hash, but if it has, then just ignore it
831 }
832 }
833
834 hashmap_loaded_entries.insert(
835 uuu[0].to_string(),
836 FileEntry {
837 path: PathBuf::from(uuu[0]),
838 size: match uuu[1].parse::<u64>() {
839 Ok(t) => t,
840 Err(e) => {
841 text_messages
842 .warnings
843 .push(format!("Found invalid size value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
844 continue;
845 }
846 },
847 dimensions: uuu[2].to_string(),
848 modified_date: match uuu[3].parse::<u64>() {
849 Ok(t) => t,
850 Err(e) => {
851 text_messages
852 .warnings
853 .push(format!("Found invalid modified date value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
854 continue;
855 }
856 },
857 hash,
858 similarity: Similarity::None,
859 },
860 );
861 }
862 }
863
864 return Some(hashmap_loaded_entries);
865 }
866
867 text_messages.messages.push("Cannot find or open system config dir to save cache file".to_string());
868 None
869 }
870
get_cache_file(hash_size: &u8, hash_alg: &HashAlg, image_filter: &FilterType) -> String871 fn get_cache_file(hash_size: &u8, hash_alg: &HashAlg, image_filter: &FilterType) -> String {
872 format!("cache_similar_images_{}_{}_{}.txt", hash_size, convert_algorithm_to_string(hash_alg), convert_filters_to_string(image_filter))
873 }
874
get_string_from_similarity(similarity: &Similarity, hash_size: u8) -> String875 pub fn get_string_from_similarity(similarity: &Similarity, hash_size: u8) -> String {
876 let index_preset = match hash_size {
877 4 => 0,
878 8 => 1,
879 16 => 2,
880 _ => panic!(),
881 };
882
883 match similarity {
884 Similarity::None => {
885 panic!()
886 }
887 Similarity::Similar(h) => {
888 #[cfg(debug_assertions)]
889 {
890 if *h <= SIMILAR_VALUES[index_preset][0] {
891 format!("Very High {}", *h)
892 } else if *h <= SIMILAR_VALUES[index_preset][1] {
893 format!("High {}", *h)
894 } else if *h <= SIMILAR_VALUES[index_preset][2] {
895 format!("Medium {}", *h)
896 } else if *h <= SIMILAR_VALUES[index_preset][3] {
897 format!("Small {}", *h)
898 } else if *h <= SIMILAR_VALUES[index_preset][4] {
899 format!("Very Small {}", *h)
900 } else if *h <= SIMILAR_VALUES[index_preset][5] {
901 format!("Minimal {}", *h)
902 } else {
903 panic!();
904 }
905 }
906 #[cfg(not(debug_assertions))]
907 {
908 if *h <= SIMILAR_VALUES[index_preset][0] {
909 format!("Very High")
910 } else if *h <= SIMILAR_VALUES[index_preset][1] {
911 format!("High")
912 } else if *h <= SIMILAR_VALUES[index_preset][2] {
913 format!("Medium")
914 } else if *h <= SIMILAR_VALUES[index_preset][3] {
915 format!("Small")
916 } else if *h <= SIMILAR_VALUES[index_preset][4] {
917 format!("Very Small")
918 } else if *h <= SIMILAR_VALUES[index_preset][5] {
919 format!("Minimal")
920 } else {
921 panic!();
922 }
923 }
924 }
925 }
926 }
927
return_similarity_from_similarity_preset(similarity_preset: &SimilarityPreset, hash_size: u8) -> Similarity928 pub fn return_similarity_from_similarity_preset(similarity_preset: &SimilarityPreset, hash_size: u8) -> Similarity {
929 let index_preset = match hash_size {
930 4 => 0,
931 8 => 1,
932 16 => 2,
933 _ => panic!(),
934 };
935 match similarity_preset {
936 SimilarityPreset::VeryHigh => Similarity::Similar(SIMILAR_VALUES[index_preset][0]),
937 SimilarityPreset::High => Similarity::Similar(SIMILAR_VALUES[index_preset][1]),
938 SimilarityPreset::Medium => Similarity::Similar(SIMILAR_VALUES[index_preset][2]),
939 SimilarityPreset::Small => Similarity::Similar(SIMILAR_VALUES[index_preset][3]),
940 SimilarityPreset::VerySmall => Similarity::Similar(SIMILAR_VALUES[index_preset][4]),
941 SimilarityPreset::Minimal => Similarity::Similar(SIMILAR_VALUES[index_preset][5]),
942 SimilarityPreset::None => panic!(""),
943 }
944 }
945
convert_filters_to_string(image_filter: &FilterType) -> String946 fn convert_filters_to_string(image_filter: &FilterType) -> String {
947 match image_filter {
948 FilterType::Lanczos3 => "Lanczos3",
949 FilterType::Nearest => "Nearest",
950 FilterType::Triangle => "Triangle",
951 FilterType::Gaussian => "Gaussian",
952 FilterType::CatmullRom => "CatmullRom",
953 }
954 .to_string()
955 }
956
convert_algorithm_to_string(hash_alg: &HashAlg) -> String957 fn convert_algorithm_to_string(hash_alg: &HashAlg) -> String {
958 match hash_alg {
959 HashAlg::Mean => "Mean",
960 HashAlg::Gradient => "Gradient",
961 HashAlg::Blockhash => "Blockhash",
962 HashAlg::VertGradient => "VertGradient",
963 HashAlg::DoubleGradient => "DoubleGradient",
964 HashAlg::__Nonexhaustive => panic!(),
965 }
966 .to_string()
967 }
968
test_image_conversion_speed()969 pub fn test_image_conversion_speed() {
970 let file_name: &str = "test.jpg";
971 let file_path = Path::new(file_name);
972 match image::open(file_path) {
973 Ok(img_open) => {
974 for alg in [HashAlg::Blockhash, HashAlg::Gradient, HashAlg::DoubleGradient, HashAlg::VertGradient, HashAlg::Mean] {
975 for filter in [FilterType::Lanczos3, FilterType::CatmullRom, FilterType::Gaussian, FilterType::Nearest, FilterType::Triangle] {
976 for size in [2, 4, 8, 16, 32, 64] {
977 let hasher_config = HasherConfig::new().hash_alg(alg).resize_filter(filter).hash_size(size, size);
978
979 let start = SystemTime::now();
980
981 let hasher = hasher_config.to_hasher();
982 let _hash = hasher.hash_image(&img_open);
983
984 let end = SystemTime::now();
985
986 println!("{:?} us {:?} {:?} {}x{}", end.duration_since(start).unwrap().as_micros(), alg, filter, size, size);
987 }
988 }
989 }
990 }
991 Err(e) => {
992 println!(
993 "Failed to open test file {}, reason {}",
994 match file_path.canonicalize() {
995 Ok(t) => t.to_string_lossy().to_string(),
996 Err(_inspected) => file_name.to_string(),
997 },
998 e
999 );
1000 }
1001 }
1002 }
1003