1 use crate::common::Common;
2 use crate::common_directory::Directories;
3 use crate::common_extensions::Extensions;
4 use crate::common_items::ExcludedItems;
5 use crate::common_messages::Messages;
6 use crate::common_traits::{DebugPrint, PrintResults, SaveResults};
7 use crossbeam_channel::Receiver;
8 use humansize::{file_size_opts as options, FileSize};
9 use std::collections::BTreeMap;
10 use std::ffi::OsStr;
11 use std::fs::{File, Metadata};
12 use std::io::{BufWriter, Write};
13 use std::path::PathBuf;
14 use std::sync::atomic::Ordering;
15 use std::sync::atomic::{AtomicBool, AtomicU64};
16 use std::sync::Arc;
17 use std::thread::sleep;
18 use std::time::Duration;
19 use std::time::{SystemTime, UNIX_EPOCH};
20 use std::{fs, thread};
21 
22 #[derive(Debug)]
23 pub struct ProgressData {
24     pub files_checked: usize,
25 }
26 
27 #[derive(Clone)]
28 pub struct FileEntry {
29     pub path: PathBuf,
30     pub size: u64,
31     pub modified_date: u64,
32 }
33 
34 #[derive(Eq, PartialEq, Clone, Debug)]
35 pub enum DeleteMethod {
36     None,
37     Delete,
38 }
39 /// Info struck with helpful information's about results
40 #[derive(Default)]
41 pub struct Info {
42     pub taken_space: u64,
43     pub number_of_real_files: usize,
44 }
45 
46 impl Info {
new() -> Self47     pub fn new() -> Self {
48         Default::default()
49     }
50 }
51 
52 /// Struct with required information's to work
53 pub struct BigFile {
54     text_messages: Messages,
55     information: Info,
56     big_files: BTreeMap<u64, Vec<FileEntry>>,
57     excluded_items: ExcludedItems,
58     directories: Directories,
59     allowed_extensions: Extensions,
60     recursive_search: bool,
61     number_of_files_to_check: usize,
62     delete_method: DeleteMethod,
63     stopped_search: bool,
64 }
65 
66 impl BigFile {
new() -> Self67     pub fn new() -> Self {
68         Self {
69             text_messages: Default::default(),
70             information: Info::new(),
71             big_files: Default::default(),
72             excluded_items: ExcludedItems::new(),
73             directories: Directories::new(),
74             allowed_extensions: Extensions::new(),
75             recursive_search: true,
76             number_of_files_to_check: 50,
77             delete_method: DeleteMethod::None,
78             stopped_search: false,
79         }
80     }
81 
find_big_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>)82     pub fn find_big_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) {
83         self.optimize_directories();
84         if !self.look_for_big_files(stop_receiver, progress_sender) {
85             self.stopped_search = true;
86             return;
87         }
88         self.delete_files();
89         self.debug_print();
90     }
get_stopped_search(&self) -> bool91     pub fn get_stopped_search(&self) -> bool {
92         self.stopped_search
93     }
94 
get_big_files(&self) -> &BTreeMap<u64, Vec<FileEntry>>95     pub const fn get_big_files(&self) -> &BTreeMap<u64, Vec<FileEntry>> {
96         &self.big_files
97     }
98 
get_text_messages(&self) -> &Messages99     pub const fn get_text_messages(&self) -> &Messages {
100         &self.text_messages
101     }
102 
get_information(&self) -> &Info103     pub const fn get_information(&self) -> &Info {
104         &self.information
105     }
106 
set_delete_method(&mut self, delete_method: DeleteMethod)107     pub fn set_delete_method(&mut self, delete_method: DeleteMethod) {
108         self.delete_method = delete_method;
109     }
110 
set_recursive_search(&mut self, recursive_search: bool)111     pub fn set_recursive_search(&mut self, recursive_search: bool) {
112         self.recursive_search = recursive_search;
113     }
114 
115     /// List of allowed extensions, only files with this extensions will be checking if are duplicates
set_allowed_extensions(&mut self, allowed_extensions: String)116     pub fn set_allowed_extensions(&mut self, allowed_extensions: String) {
117         self.allowed_extensions.set_allowed_extensions(allowed_extensions, &mut self.text_messages);
118     }
119 
look_for_big_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool120     fn look_for_big_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
121         let start_time: SystemTime = SystemTime::now();
122         let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
123 
124         // Add root folders for finding
125         for id in &self.directories.included_directories {
126             folders_to_check.push(id.clone());
127         }
128 
129         //// PROGRESS THREAD START
130         const LOOP_DURATION: u32 = 200; //in ms
131         let progress_thread_run = Arc::new(AtomicBool::new(true));
132 
133         let atomic_file_counter = Arc::new(AtomicU64::new(0));
134 
135         let progress_thread_handle;
136         if let Some(progress_sender) = progress_sender {
137             let progress_send = progress_sender.clone();
138             let progress_thread_run = progress_thread_run.clone();
139             let atomic_file_counter = atomic_file_counter.clone();
140             progress_thread_handle = thread::spawn(move || loop {
141                 progress_send
142                     .unbounded_send(ProgressData {
143                         files_checked: atomic_file_counter.load(Ordering::Relaxed) as usize,
144                     })
145                     .unwrap();
146                 if !progress_thread_run.load(Ordering::Relaxed) {
147                     break;
148                 }
149                 sleep(Duration::from_millis(LOOP_DURATION as u64));
150             });
151         } else {
152             progress_thread_handle = thread::spawn(|| {});
153         }
154 
155         //// PROGRESS THREAD END
156 
157         while !folders_to_check.is_empty() {
158             if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
159                 // Be sure that every thread is closed
160                 progress_thread_run.store(false, Ordering::Relaxed);
161                 progress_thread_handle.join().unwrap();
162                 return false;
163             }
164 
165             let current_folder = folders_to_check.pop().unwrap();
166             let read_dir = match fs::read_dir(&current_folder) {
167                 Ok(t) => t,
168                 Err(e) => {
169                     self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e));
170                     continue;
171                 } // Permissions denied
172             };
173             'dir: for entry in read_dir {
174                 let entry_data = match entry {
175                     Ok(t) => t,
176                     Err(e) => {
177                         self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e));
178                         continue;
179                     } //Permissions denied
180                 };
181                 let metadata: Metadata = match entry_data.metadata() {
182                     Ok(t) => t,
183                     Err(e) => {
184                         self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e));
185                         continue;
186                     } //Permissions denied
187                 };
188                 if metadata.is_dir() {
189                     if !self.recursive_search {
190                         continue;
191                     }
192 
193                     let next_folder = current_folder.join(entry_data.file_name());
194                     if self.directories.is_excluded(&next_folder) || self.excluded_items.is_excluded(&next_folder) {
195                         continue 'dir;
196                     }
197 
198                     folders_to_check.push(next_folder);
199                 } else if metadata.is_file() {
200                     atomic_file_counter.fetch_add(1, Ordering::Relaxed);
201                     // Extracting file extension
202                     let file_extension = entry_data.path().extension().and_then(OsStr::to_str).map(str::to_lowercase);
203 
204                     // Checking allowed extensions
205                     if !self.allowed_extensions.file_extensions.is_empty() {
206                         let allowed = self.allowed_extensions.file_extensions.iter().map(|e| e.to_lowercase()).any(|e| file_extension == Some(e));
207                         if !allowed {
208                             // Not an allowed extension, ignore it.
209                             continue 'dir;
210                         }
211                     }
212 
213                     // Checking expressions
214                     let current_file_name = current_folder.join(entry_data.file_name());
215                     if self.excluded_items.is_excluded(&current_file_name) {
216                         continue 'dir;
217                     }
218 
219                     // Creating new file entry
220                     let fe: FileEntry = FileEntry {
221                         path: current_file_name.clone(),
222                         size: metadata.len(),
223                         modified_date: match metadata.modified() {
224                             Ok(t) => match t.duration_since(UNIX_EPOCH) {
225                                 Ok(d) => d.as_secs(),
226                                 Err(_inspected) => {
227                                     self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display()));
228                                     0
229                                 }
230                             },
231                             Err(e) => {
232                                 self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e));
233                                 0
234                             }
235                         },
236                     };
237 
238                     self.big_files.entry(metadata.len()).or_insert_with(Vec::new);
239                     self.big_files.get_mut(&metadata.len()).unwrap().push(fe);
240                 }
241             }
242         }
243 
244         // End thread which send info to gui
245         progress_thread_run.store(false, Ordering::Relaxed);
246         progress_thread_handle.join().unwrap();
247 
248         // Extract n biggest files to new TreeMap
249         let mut new_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();
250 
251         for (size, vector) in self.big_files.iter().rev() {
252             if self.information.number_of_real_files < self.number_of_files_to_check {
253                 for file in vector {
254                     if self.information.number_of_real_files < self.number_of_files_to_check {
255                         new_map.entry(*size).or_insert_with(Vec::new);
256                         new_map.get_mut(size).unwrap().push(file.clone());
257                         self.information.taken_space += size;
258                         self.information.number_of_real_files += 1;
259                     } else {
260                         break;
261                     }
262                 }
263             } else {
264                 break;
265             }
266         }
267         self.big_files = new_map;
268 
269         Common::print_time(start_time, SystemTime::now(), "look_for_big_files".to_string());
270         true
271     }
272 
set_number_of_files_to_check(&mut self, number_of_files_to_check: usize)273     pub fn set_number_of_files_to_check(&mut self, number_of_files_to_check: usize) {
274         self.number_of_files_to_check = number_of_files_to_check;
275     }
276 
277     /// Setting excluded items which needs to contains * wildcard
278     /// Are a lot of slower than absolute path, so it should be used to heavy
set_excluded_items(&mut self, excluded_items: Vec<String>)279     pub fn set_excluded_items(&mut self, excluded_items: Vec<String>) {
280         self.excluded_items.set_excluded_items(excluded_items, &mut self.text_messages);
281     }
282 
283     /// Remove unused entries when included or excluded overlaps with each other or are duplicated etc.
optimize_directories(&mut self)284     fn optimize_directories(&mut self) {
285         self.directories.optimize_directories(self.recursive_search, &mut self.text_messages);
286     }
287 
288     /// Setting included directories, at least one must be provided
set_included_directory(&mut self, included_directory: Vec<PathBuf>)289     pub fn set_included_directory(&mut self, included_directory: Vec<PathBuf>) {
290         self.directories.set_included_directory(included_directory, &mut self.text_messages);
291     }
292 
293     /// Setting absolute path to exclude
set_excluded_directory(&mut self, excluded_directory: Vec<PathBuf>)294     pub fn set_excluded_directory(&mut self, excluded_directory: Vec<PathBuf>) {
295         self.directories.set_excluded_directory(excluded_directory, &mut self.text_messages);
296     }
297 
298     /// Function to delete files, from filed Vector
delete_files(&mut self)299     fn delete_files(&mut self) {
300         let start_time: SystemTime = SystemTime::now();
301 
302         match self.delete_method {
303             DeleteMethod::Delete => {
304                 for vec_file_entry in self.big_files.values() {
305                     for file_entry in vec_file_entry {
306                         if fs::remove_file(file_entry.path.clone()).is_err() {
307                             self.text_messages.warnings.push(file_entry.path.display().to_string());
308                         }
309                     }
310                 }
311             }
312             DeleteMethod::None => {
313                 //Just do nothing
314             }
315         }
316 
317         Common::print_time(start_time, SystemTime::now(), "delete_files".to_string());
318     }
319 }
320 
321 impl Default for BigFile {
default() -> Self322     fn default() -> Self {
323         Self::new()
324     }
325 }
326 
327 impl DebugPrint for BigFile {
328     #[allow(dead_code)]
329     #[allow(unreachable_code)]
330     /// Debugging printing - only available on debug build
debug_print(&self)331     fn debug_print(&self) {
332         #[cfg(not(debug_assertions))]
333         {
334             return;
335         }
336         println!("---------------DEBUG PRINT---------------");
337         println!("### Information's");
338 
339         println!("Errors size - {}", self.text_messages.errors.len());
340         println!("Warnings size - {}", self.text_messages.warnings.len());
341         println!("Messages size - {}", self.text_messages.messages.len());
342 
343         println!("### Other");
344         println!("Big files size {} in {} groups", self.information.number_of_real_files, self.big_files.len());
345         println!("Allowed extensions - {:?}", self.allowed_extensions.file_extensions);
346         println!("Excluded items - {:?}", self.excluded_items.items);
347         println!("Included directories - {:?}", self.directories.included_directories);
348         println!("Excluded directories - {:?}", self.directories.excluded_directories);
349         println!("Recursive search - {}", self.recursive_search.to_string());
350         println!("Number of files to check - {:?}", self.number_of_files_to_check);
351         println!("-----------------------------------------");
352     }
353 }
354 
355 impl SaveResults for BigFile {
356     /// Saving results to provided file
save_results_to_file(&mut self, file_name: &str) -> bool357     fn save_results_to_file(&mut self, file_name: &str) -> bool {
358         let start_time: SystemTime = SystemTime::now();
359         let file_name: String = match file_name {
360             "" => "results.txt".to_string(),
361             k => k.to_string(),
362         };
363 
364         let file_handler = match File::create(&file_name) {
365             Ok(t) => t,
366             Err(e) => {
367                 self.text_messages.errors.push(format!("Failed to create file {}, reason {}", file_name, e));
368                 return false;
369             }
370         };
371         let mut writer = BufWriter::new(file_handler);
372 
373         if let Err(e) = writeln!(
374             writer,
375             "Results of searching {:?} with excluded directories {:?} and excluded items {:?}",
376             self.directories.included_directories, self.directories.excluded_directories, self.excluded_items.items
377         ) {
378             self.text_messages.errors.push(format!("Failed to save results to file {}, reason {}", file_name, e));
379             return false;
380         }
381 
382         if self.information.number_of_real_files != 0 {
383             write!(writer, "{} the biggest files.\n\n", self.information.number_of_real_files).unwrap();
384 
385             for (size, files) in self.big_files.iter().rev() {
386                 for file_entry in files {
387                     writeln!(writer, "{} ({}) - {}", size.file_size(options::BINARY).unwrap(), size, file_entry.path.display()).unwrap();
388                 }
389             }
390         } else {
391             write!(writer, "Not found any files.").unwrap();
392         }
393         Common::print_time(start_time, SystemTime::now(), "save_results_to_file".to_string());
394         true
395     }
396 }
397 
398 impl PrintResults for BigFile {
print_results(&self)399     fn print_results(&self) {
400         let start_time: SystemTime = SystemTime::now();
401         println!("Found {} files which take {}:", self.information.number_of_real_files, self.information.taken_space.file_size(options::BINARY).unwrap());
402         for (size, vector) in self.big_files.iter().rev() {
403             // TODO Align all to same width
404             for entry in vector {
405                 println!("{} ({} bytes) - {}", size.file_size(options::BINARY).unwrap(), size, entry.path.display());
406             }
407         }
408         Common::print_time(start_time, SystemTime::now(), "print_entries".to_string());
409     }
410 }
411