1 use czkawka_core::duplicate::{CheckingMethod, DeleteMethod, HashType};
2 use czkawka_core::same_music::MusicSimilarity;
3 use czkawka_core::similar_images::SimilarityPreset;
4 use img_hash::{FilterType, HashAlg};
5 use std::path::PathBuf;
6 use structopt::StructOpt;
7 
8 #[derive(Debug, StructOpt)]
9 #[structopt(name = "czkawka", help_message = HELP_MESSAGE, template = HELP_TEMPLATE)]
10 pub enum Commands {
11     #[structopt(name = "dup", about = "Finds duplicate files", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n    czkawka dup -d /home/rafal -e /home/rafal/Obrazy  -m 25 -x 7z rar IMAGE -s hashmb -f results.txt -D aeo")]
12     Duplicates {
13         #[structopt(flatten)]
14         directories: Directories,
15         #[structopt(flatten)]
16         excluded_directories: ExcludedDirectories,
17         #[structopt(flatten)]
18         excluded_items: ExcludedItems,
19         #[structopt(short, long, parse(try_from_str = parse_minimal_file_size), default_value = "8192", help = "Minimum size in bytes", long_help = "Minimum size of checked files in bytes, assigning bigger value may speed up searching")]
20         minimal_file_size: u64,
21         #[structopt(short = "i", long, parse(try_from_str = parse_maximal_file_size), default_value = "18446744073709551615", help = "Maximum size in bytes", long_help = "Maximum size of checked files in bytes, assigning lower value may speed up searching")]
22         maximal_file_size: u64,
23         #[structopt(short = "c", long, parse(try_from_str = parse_minimal_file_size), default_value = "2097152", help = "Minimum cached file size in bytes", long_help = "Minimum size of cached files in bytes, assigning bigger value may speed up will cause that lower amount of files will be cached, but loading of cache will be faster")]
24         minimal_cached_file_size: u64,
25         #[structopt(flatten)]
26         allowed_extensions: AllowedExtensions,
27         #[structopt(short, long, default_value = "HASH", parse(try_from_str = parse_checking_method), help = "Search method (NAME, SIZE, HASH, HASHMB)", long_help = "Methods to search files.\nNAME - Fast but but rarely usable,\nSIZE - Fast but not accurate, checking by the file's size,\nHASHMB - More accurate but slower, checking by the hash of the file's first mebibyte\nHASH - The slowest method, checking by the hash of the entire file")]
28         search_method: CheckingMethod,
29         #[structopt(short = "D", long, default_value = "NONE", parse(try_from_str = parse_delete_method), help = "Delete method (AEN, AEO, ON, OO, HARD)", long_help = "Methods to delete the files.\nAEN - All files except the newest,\nAEO - All files except the oldest,\nON - Only 1 file, the newest,\nOO - Only 1 file, the oldest\nHARD - create hard link\nNONE - not delete files")]
30         delete_method: DeleteMethod,
31         #[structopt(short = "ht", long, default_value = "BLAKE3", parse(try_from_str = parse_hash_type), help="Hash type (BLAKE3, CRC32, XXH3)")]
32         hash_type: HashType,
33         #[structopt(flatten)]
34         file_to_save: FileToSave,
35         #[structopt(flatten)]
36         not_recursive: NotRecursive,
37         #[structopt(flatten)]
38         allow_hard_links: AllowHardLinks,
39         #[structopt(flatten)]
40         dryrun: DryRun,
41     },
42     #[structopt(name = "empty-folders", about = "Finds empty folders", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n    czkawka empty-folders -d /home/rafal/rr /home/gateway -f results.txt")]
43     EmptyFolders {
44         #[structopt(flatten)]
45         directories: Directories,
46         #[structopt(flatten)]
47         excluded_directories: ExcludedDirectories,
48         #[structopt(flatten)]
49         excluded_items: ExcludedItems,
50         #[structopt(short = "D", long, help = "Delete found folders")]
51         delete_folders: bool,
52         #[structopt(flatten)]
53         file_to_save: FileToSave,
54     },
55     #[structopt(name = "big", about = "Finds big files", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n    czkawka big -d /home/rafal/ /home/piszczal -e /home/rafal/Roman -n 25 -x VIDEO -f results.txt")]
56     BiggestFiles {
57         #[structopt(flatten)]
58         directories: Directories,
59         #[structopt(flatten)]
60         excluded_directories: ExcludedDirectories,
61         #[structopt(flatten)]
62         excluded_items: ExcludedItems,
63         #[structopt(flatten)]
64         allowed_extensions: AllowedExtensions,
65         #[structopt(short, long, default_value = "50", help = "Number of files to be shown")]
66         number_of_files: usize,
67         #[structopt(short = "D", long, help = "Delete found files")]
68         delete_files: bool,
69         #[structopt(flatten)]
70         file_to_save: FileToSave,
71         #[structopt(flatten)]
72         not_recursive: NotRecursive,
73     },
74     #[structopt(name = "empty-files", about = "Finds empty files", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n    czkawka empty-files -d /home/rafal /home/szczekacz -e /home/rafal/Pulpit -R -f results.txt")]
75     EmptyFiles {
76         #[structopt(flatten)]
77         directories: Directories,
78         #[structopt(flatten)]
79         excluded_directories: ExcludedDirectories,
80         #[structopt(flatten)]
81         excluded_items: ExcludedItems,
82         #[structopt(flatten)]
83         allowed_extensions: AllowedExtensions,
84         #[structopt(short = "D", long, help = "Delete found files")]
85         delete_files: bool,
86         #[structopt(flatten)]
87         file_to_save: FileToSave,
88         #[structopt(flatten)]
89         not_recursive: NotRecursive,
90     },
91     #[structopt(name = "temp", about = "Finds temporary files", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n    czkawka temp -d /home/rafal/ -E */.git */tmp* *Pulpit -f results.txt -D")]
92     Temporary {
93         #[structopt(flatten)]
94         directories: Directories,
95         #[structopt(flatten)]
96         excluded_directories: ExcludedDirectories,
97         #[structopt(flatten)]
98         excluded_items: ExcludedItems,
99         #[structopt(short = "D", long, help = "Delete found files")]
100         delete_files: bool,
101         #[structopt(flatten)]
102         file_to_save: FileToSave,
103         #[structopt(flatten)]
104         not_recursive: NotRecursive,
105     },
106     #[structopt(name = "image", about = "Finds similar images", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n    czkawka image -d /home/rafal/ -E */.git */tmp* *Pulpit -f results.txt")]
107     SimilarImages {
108         #[structopt(flatten)]
109         directories: Directories,
110         #[structopt(flatten)]
111         excluded_directories: ExcludedDirectories,
112         #[structopt(short, long, parse(try_from_str = parse_minimal_file_size), default_value = "16384", help = "Minimum size in bytes", long_help = "Minimum size of checked files in bytes, assigning bigger value may speed up searching")]
113         minimal_file_size: u64,
114         #[structopt(short = "i", long, parse(try_from_str = parse_maximal_file_size), default_value = "18446744073709551615", help = "Maximum size in bytes", long_help = "Maximum size of checked files in bytes, assigning lower value may speed up searching")]
115         maximal_file_size: u64,
116         #[structopt(short, long, default_value = "High", parse(try_from_str = parse_similar_images_similarity), help = "Similairty level (Minimal, VerySmall, Small, Medium, High, VeryHigh)", long_help = "Methods to choose similarity level of images which will be considered as duplicated.")]
117         similarity_preset: SimilarityPreset,
118         #[structopt(flatten)]
119         excluded_items: ExcludedItems,
120         #[structopt(flatten)]
121         file_to_save: FileToSave,
122         #[structopt(flatten)]
123         not_recursive: NotRecursive,
124         #[structopt(short = "g", long, default_value = "Gradient", parse(try_from_str = parse_similar_hash_algorithm), help="Hash algorithm (allowed: Mean, Gradient, Blockhash, VertGradient, DoubleGradient)")]
125         hash_alg: HashAlg,
126         #[structopt(short = "f", long, default_value = "Lanczos3", parse(try_from_str = parse_similar_image_filter), help="Hash algorithm (allowed: Lanczos3, Nearest, Triangle, Faussian, Catmullrom)")]
127         image_filter: FilterType,
128         #[structopt(short = "c", long, default_value = "8", parse(try_from_str = parse_image_hash_size), help="Hash size (allowed: 4, 8, 16)")]
129         hash_size: u8,
130     },
131     #[structopt(name = "zeroed", about = "Finds zeroed files", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n    czkawka zeroed -d /home/rafal -e /home/rafal/Pulpit -f results.txt")]
132     ZeroedFiles {
133         #[structopt(flatten)]
134         directories: Directories,
135         #[structopt(flatten)]
136         excluded_directories: ExcludedDirectories,
137         #[structopt(flatten)]
138         excluded_items: ExcludedItems,
139         #[structopt(flatten)]
140         allowed_extensions: AllowedExtensions,
141         #[structopt(short = "D", long, help = "Delete found files")]
142         delete_files: bool,
143         #[structopt(flatten)]
144         file_to_save: FileToSave,
145         #[structopt(flatten)]
146         not_recursive: NotRecursive,
147         #[structopt(short, long, parse(try_from_str = parse_minimal_file_size), default_value = "8192", help = "Minimum size in bytes", long_help = "Minimum size of checked files in bytes, assigning bigger value may speed up searching")]
148         minimal_file_size: u64,
149         #[structopt(short = "i", long, parse(try_from_str = parse_maximal_file_size), default_value = "18446744073709551615", help = "Maximum size in bytes", long_help = "Maximum size of checked files in bytes, assigning lower value may speed up searching")]
150         maximal_file_size: u64,
151     },
152     #[structopt(name = "music", about = "Finds same music by tags", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n    czkawka music -d /home/rafal -f results.txt")]
153     SameMusic {
154         #[structopt(flatten)]
155         directories: Directories,
156         #[structopt(flatten)]
157         excluded_directories: ExcludedDirectories,
158         #[structopt(flatten)]
159         excluded_items: ExcludedItems,
160         // #[structopt(short = "D", long, help = "Delete found files")]
161         // delete_files: bool, TODO
162         #[structopt(short = "z", long, default_value = "artist,title", parse(try_from_str = parse_music_duplicate_type), help = "Search method (title, artist, album_title, album_artist, year)", long_help = "Sets which rows must be equal to set this files as duplicates(may be mixed, but must be divided by commas).")]
163         music_similarity: MusicSimilarity,
164         #[structopt(flatten)]
165         file_to_save: FileToSave,
166         #[structopt(flatten)]
167         not_recursive: NotRecursive,
168         #[structopt(short, long, parse(try_from_str = parse_minimal_file_size), default_value = "8192", help = "Minimum size in bytes", long_help = "Minimum size of checked files in bytes, assigning bigger value may speed up searching")]
169         minimal_file_size: u64,
170         #[structopt(short = "i", long, parse(try_from_str = parse_maximal_file_size), default_value = "18446744073709551615", help = "Maximum size in bytes", long_help = "Maximum size of checked files in bytes, assigning lower value may speed up searching")]
171         maximal_file_size: u64,
172     },
173     #[structopt(name = "symlinks", about = "Finds invalid symlinks", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n    czkawka symlinks -d /home/kicikici/ /home/szczek -e /home/kicikici/jestempsem -x jpg -f results.txt")]
174     InvalidSymlinks {
175         #[structopt(flatten)]
176         directories: Directories,
177         #[structopt(flatten)]
178         excluded_directories: ExcludedDirectories,
179         #[structopt(flatten)]
180         excluded_items: ExcludedItems,
181         #[structopt(flatten)]
182         allowed_extensions: AllowedExtensions,
183         #[structopt(short = "D", long, help = "Delete found files")]
184         delete_files: bool,
185         #[structopt(flatten)]
186         file_to_save: FileToSave,
187         #[structopt(flatten)]
188         not_recursive: NotRecursive,
189     },
190     #[structopt(name = "broken", about = "Finds broken files", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n    czkawka broken -d /home/kicikici/ /home/szczek -e /home/kicikici/jestempsem -x jpg -f results.txt")]
191     BrokenFiles {
192         #[structopt(flatten)]
193         directories: Directories,
194         #[structopt(flatten)]
195         excluded_directories: ExcludedDirectories,
196         #[structopt(flatten)]
197         excluded_items: ExcludedItems,
198         #[structopt(flatten)]
199         allowed_extensions: AllowedExtensions,
200         #[structopt(short = "D", long, help = "Delete found files")]
201         delete_files: bool,
202         #[structopt(flatten)]
203         file_to_save: FileToSave,
204         #[structopt(flatten)]
205         not_recursive: NotRecursive,
206     },
207     #[structopt(name = "tester", about = "Contains various test", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n    czkawka tests -i")]
208     Tester {
209         #[structopt(short = "i", long = "test_image", help = "Test speed of hashing provided test.jpg image with different filters and methods.")]
210         test_image: bool,
211     },
212 }
213 
214 #[derive(Debug, StructOpt)]
215 pub struct Directories {
216     #[structopt(short, long, parse(from_os_str), required = true, help = "Directorie(s) to search", long_help = "List of directorie(s) which will be searched(absolute path)")]
217     pub directories: Vec<PathBuf>,
218 }
219 
220 #[derive(Debug, StructOpt)]
221 pub struct ExcludedDirectories {
222     #[structopt(short, long, parse(from_os_str), help = "Excluded directorie(s)", long_help = "List of directorie(s) which will be excluded from search(absolute path)")]
223     pub excluded_directories: Vec<PathBuf>,
224 }
225 
226 #[derive(Debug, StructOpt)]
227 pub struct ExcludedItems {
228     #[structopt(short = "E", long, help = "Excluded item(s)", long_help = "List of excluded item(s) which contains * wildcard(may be slow, so use -e where possible)")]
229     pub excluded_items: Vec<String>,
230 }
231 
232 #[derive(Debug, StructOpt)]
233 pub struct AllowedExtensions {
234     #[structopt(
235         short = "x",
236         long,
237         help = "Allowed file extension(s)",
238         long_help = "List of checked files with provided extension(s). There are also helpful macros which allow to easy use a typical extensions like:\nIMAGE(\"jpg,kra,gif,png,bmp,tiff,hdr,svg\"),\nTEXT(\"txt,doc,docx,odt,rtf\"),\nVIDEO(\"mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp\") or\nMUSIC(\"mp3,flac,ogg,tta,wma,webm\")\n "
239     )]
240     pub allowed_extensions: Vec<String>,
241 }
242 
243 #[derive(Debug, StructOpt)]
244 pub struct NotRecursive {
245     #[structopt(short = "R", long, help = "Prevents from recursive check of folders")]
246     pub not_recursive: bool,
247 }
248 
249 #[derive(Debug, StructOpt)]
250 pub struct FileToSave {
251     #[structopt(short, long, value_name = "file-name", help = "Saves the results into the file")]
252     pub file_to_save: Option<PathBuf>,
253 }
254 
255 #[derive(Debug, StructOpt)]
256 pub struct AllowHardLinks {
257     #[structopt(short = "L", long, help = "Do not ignore hard links")]
258     pub allow_hard_links: bool,
259 }
260 
261 #[derive(Debug, StructOpt)]
262 pub struct DryRun {
263     #[structopt(long, help = "Do nothing and print the operation that would happen.")]
264     pub dryrun: bool,
265 }
266 
267 impl FileToSave {
file_name(&self) -> Option<&str>268     pub fn file_name(&self) -> Option<&str> {
269         if let Some(file_name) = &self.file_to_save {
270             return file_name.to_str();
271         }
272 
273         None
274     }
275 }
276 
parse_hash_type(src: &str) -> Result<HashType, &'static str>277 fn parse_hash_type(src: &str) -> Result<HashType, &'static str> {
278     match src.to_ascii_lowercase().as_str() {
279         "blake3" => Ok(HashType::Blake3),
280         "crc32" => Ok(HashType::Crc32),
281         "xxh3" => Ok(HashType::Xxh3),
282         _ => Err("Couldn't parse the hash type (allowed: BLAKE3, CRC32, XXH3)"),
283     }
284 }
285 
parse_checking_method(src: &str) -> Result<CheckingMethod, &'static str>286 fn parse_checking_method(src: &str) -> Result<CheckingMethod, &'static str> {
287     match src.to_ascii_lowercase().as_str() {
288         "name" => Ok(CheckingMethod::Name),
289         "size" => Ok(CheckingMethod::Size),
290         "hash" => Ok(CheckingMethod::Hash),
291         "hashmb" => Ok(CheckingMethod::HashMb),
292         _ => Err("Couldn't parse the search method (allowed: NAME, SIZE, HASH, HASHMB)"),
293     }
294 }
295 
parse_delete_method(src: &str) -> Result<DeleteMethod, &'static str>296 fn parse_delete_method(src: &str) -> Result<DeleteMethod, &'static str> {
297     match src.to_ascii_lowercase().as_str() {
298         "none" => Ok(DeleteMethod::None),
299         "aen" => Ok(DeleteMethod::AllExceptNewest),
300         "aeo" => Ok(DeleteMethod::AllExceptOldest),
301         "hard" => Ok(DeleteMethod::HardLink),
302         "on" => Ok(DeleteMethod::OneNewest),
303         "oo" => Ok(DeleteMethod::OneOldest),
304         _ => Err("Couldn't parse the delete method (allowed: AEN, AEO, ON, OO, HARD)"),
305     }
306 }
307 
parse_similar_images_similarity(src: &str) -> Result<SimilarityPreset, &'static str>308 fn parse_similar_images_similarity(src: &str) -> Result<SimilarityPreset, &'static str> {
309     match src.to_lowercase().replace('_', "").as_str() {
310         "minimal" => Ok(SimilarityPreset::Minimal),
311         "verysmall" => Ok(SimilarityPreset::VerySmall),
312         "small" => Ok(SimilarityPreset::Small),
313         "medium" => Ok(SimilarityPreset::Medium),
314         "high" => Ok(SimilarityPreset::High),
315         "veryhigh" => Ok(SimilarityPreset::VeryHigh),
316         _ => Err("Couldn't parse the image similarity preset (allowed: Minimal, VerySmall, Small, Medium, High, VeryHigh)"),
317     }
318 }
319 
parse_minimal_file_size(src: &str) -> Result<u64, String>320 fn parse_minimal_file_size(src: &str) -> Result<u64, String> {
321     match src.parse::<u64>() {
322         Ok(minimal_file_size) => {
323             if minimal_file_size > 0 {
324                 Ok(minimal_file_size)
325             } else {
326                 Err("Minimum file size must be at least 1 byte".to_string())
327             }
328         }
329         Err(e) => Err(e.to_string()),
330     }
331 }
332 
parse_maximal_file_size(src: &str) -> Result<u64, String>333 fn parse_maximal_file_size(src: &str) -> Result<u64, String> {
334     match src.parse::<u64>() {
335         Ok(maximal_file_size) => Ok(maximal_file_size),
336         Err(e) => Err(e.to_string()),
337     }
338 }
339 
parse_similar_image_filter(src: &str) -> Result<FilterType, String>340 fn parse_similar_image_filter(src: &str) -> Result<FilterType, String> {
341     let filter_type;
342     filter_type = match src.to_lowercase().as_str() {
343         "lanczos3" => FilterType::Lanczos3,
344         "nearest" => FilterType::Nearest,
345         "triangle" => FilterType::Triangle,
346         "faussian" => FilterType::Gaussian,
347         "catmullrom" => FilterType::CatmullRom,
348         _ => return Err("Couldn't parse the image resize filter (allowed: Lanczos3, Nearest, Triangle, Faussian, Catmullrom)".to_string()),
349     };
350     Ok(filter_type)
351 }
parse_similar_hash_algorithm(src: &str) -> Result<HashAlg, String>352 fn parse_similar_hash_algorithm(src: &str) -> Result<HashAlg, String> {
353     let algorithm;
354     algorithm = match src.to_lowercase().as_str() {
355         "mean" => HashAlg::Mean,
356         "gradient" => HashAlg::Gradient,
357         "blockhash" => HashAlg::Blockhash,
358         "vertgradient" => HashAlg::VertGradient,
359         "doublegradient" => HashAlg::DoubleGradient,
360         _ => return Err("Couldn't parse the hash algorithm (allowed: Mean, Gradient, Blockhash, VertGradient, DoubleGradient)".to_string()),
361     };
362     Ok(algorithm)
363 }
364 
parse_image_hash_size(src: &str) -> Result<u8, String>365 fn parse_image_hash_size(src: &str) -> Result<u8, String> {
366     let hash_size;
367     hash_size = match src.to_lowercase().as_str() {
368         "4" => 4,
369         "8" => 8,
370         "16" => 16,
371         _ => return Err("Couldn't parse the image hash size (allowed: 4, 8, 16)".to_string()),
372     };
373     Ok(hash_size)
374 }
375 
parse_music_duplicate_type(src: &str) -> Result<MusicSimilarity, String>376 fn parse_music_duplicate_type(src: &str) -> Result<MusicSimilarity, String> {
377     if src.is_empty() {
378         return Ok(MusicSimilarity::NONE);
379     }
380 
381     let mut similarity: MusicSimilarity = MusicSimilarity::NONE;
382 
383     let parts: Vec<&str> = src.split(',').collect();
384 
385     if parts.iter().any(|e| e.to_lowercase().contains("title") && !e.to_lowercase().contains("album")) {
386         similarity |= MusicSimilarity::TITLE;
387     }
388     if parts.iter().any(|e| e.to_lowercase().contains("artist") && !e.to_lowercase().contains("album")) {
389         similarity |= MusicSimilarity::ARTIST;
390     }
391     if parts.iter().any(|e| e.to_lowercase().contains("title") && e.to_lowercase().contains("album")) {
392         similarity |= MusicSimilarity::ALBUM_TITLE;
393     }
394     if parts.iter().any(|e| e.to_lowercase().contains("artist") && e.to_lowercase().contains("album")) {
395         similarity |= MusicSimilarity::ALBUM_ARTIST;
396     }
397     if parts.iter().any(|e| e.to_lowercase().contains("year")) {
398         similarity |= MusicSimilarity::YEAR;
399     }
400 
401     if similarity == MusicSimilarity::NONE {
402         return Err("Couldn't parse the music search method (allowed: title,artist,album_title,album_artist,year)".to_string());
403     }
404 
405     Ok(similarity)
406 }
407 
408 static HELP_MESSAGE: &str = "Prints help information (--help will give more information)";
409 
410 const HELP_TEMPLATE: &str = r#"
411 {bin} {version}
412 
413 USAGE:
414     {usage} [SCFLAGS] [SCOPTIONS]
415 
416 FLAGS:
417 {flags}
418 
419 SUBCOMMANDS:
420 {subcommands}
421 
422     try "{usage} -h" to get more info about a specific tool
423 
424 EXAMPLES:
425     {bin} dup -d /home/rafal -e /home/rafal/Obrazy  -m 25 -x 7z rar IMAGE -s hashmb -f results.txt -D aeo
426     {bin} empty-folders -d /home/rafal/rr /home/gateway -f results.txt
427     {bin} big -d /home/rafal/ /home/piszczal -e /home/rafal/Roman -n 25 -x VIDEO -f results.txt
428     {bin} empty-files -d /home/rafal /home/szczekacz -e /home/rafal/Pulpit -R -f results.txt
429     {bin} temp -d /home/rafal/ -E */.git */tmp* *Pulpit -f results.txt -D
430     {bin} image -d /home/rafal -e /home/rafal/Pulpit -f results.txt
431     {bin} zeroed -d /home/rafal -e /home/krzak -f results.txt"
432     {bin} music -d /home/rafal -e /home/rafal/Pulpit -z "artist,year, ARTISTALBUM, ALBUM___tiTlE"  -f results.txt
433     {bin} symlinks -d /home/kicikici/ /home/szczek -e /home/kicikici/jestempsem -x jpg -f results.txt
434     {bin} broken -d /home/mikrut/ -e /home/mikrut/trakt -f results.txt"#;
435