1 use crate::{crossdev, get_size_or_panic, InodeFilter, WalkOptions};
2 use anyhow::Result;
3 use filesize::PathExt;
4 use petgraph::{graph::NodeIndex, stable_graph::StableGraph, Directed, Direction};
5 use std::{
6     fs::Metadata,
7     io,
8     path::{Path, PathBuf},
9     time::{Duration, Instant},
10 };
11 
12 pub type TreeIndex = NodeIndex;
13 pub type Tree = StableGraph<EntryData, (), Directed>;
14 
15 #[derive(Eq, PartialEq, Debug, Default, Clone)]
16 pub struct EntryData {
17     pub name: PathBuf,
18     /// The entry's size in bytes. If it's a directory, the size is the aggregated file size of all children
19     pub size: u128,
20     /// If set, the item meta-data could not be obtained
21     pub metadata_io_error: bool,
22 }
23 
24 const REFRESH_RATE: Duration = Duration::from_millis(100);
25 
26 /// The result of the previous filesystem traversal
27 #[derive(Default, Debug)]
28 pub struct Traversal {
29     /// A tree representing the entire filestem traversal
30     pub tree: Tree,
31     /// The top-level node of the tree.
32     pub root_index: TreeIndex,
33     /// Amount of files or directories we have seen during the filesystem traversal
34     pub entries_traversed: u64,
35     /// Total amount of IO errors encountered when traversing the filesystem
36     pub io_errors: u64,
37     /// Total amount of bytes seen during the traversal
38     pub total_bytes: Option<u128>,
39 }
40 
41 impl Traversal {
from_walk( mut walk_options: WalkOptions, input: Vec<PathBuf>, mut update: impl FnMut(&mut Traversal) -> Result<bool>, ) -> Result<Option<Traversal>>42     pub fn from_walk(
43         mut walk_options: WalkOptions,
44         input: Vec<PathBuf>,
45         mut update: impl FnMut(&mut Traversal) -> Result<bool>,
46     ) -> Result<Option<Traversal>> {
47         fn set_size_or_panic(tree: &mut Tree, node_idx: TreeIndex, current_size_at_depth: u128) {
48             tree.node_weight_mut(node_idx)
49                 .expect("node for parent index we just retrieved")
50                 .size = current_size_at_depth;
51         }
52         fn parent_or_panic(tree: &mut Tree, parent_node_idx: TreeIndex) -> TreeIndex {
53             tree.neighbors_directed(parent_node_idx, Direction::Incoming)
54                 .next()
55                 .expect("every node in the iteration has a parent")
56         }
57         fn pop_or_panic(v: &mut Vec<u128>) -> u128 {
58             v.pop().expect("sizes per level to be in sync with graph")
59         }
60 
61         let mut t = {
62             let mut tree = Tree::new();
63             let root_index = tree.add_node(EntryData::default());
64             Traversal {
65                 tree,
66                 root_index,
67                 ..Default::default()
68             }
69         };
70 
71         let (mut previous_node_idx, mut parent_node_idx) = (t.root_index, t.root_index);
72         let mut sizes_per_depth_level = Vec::new();
73         let mut current_size_at_depth: u128 = 0;
74         let mut previous_depth = 0;
75         let mut inodes = InodeFilter::default();
76 
77         let mut last_checked = Instant::now();
78 
79         const INITIAL_CHECK_INTERVAL: usize = 500;
80         let mut check_instant_every = INITIAL_CHECK_INTERVAL;
81         if walk_options.threads == 0 {
82             // avoid using the global rayon pool, as it will keep a lot of threads alive after we are done.
83             // Also means that we will spin up a bunch of threads per root path, instead of reusing them.
84             walk_options.threads = num_cpus::get();
85         }
86 
87         #[cfg(not(windows))]
88         fn size_on_disk(_parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
89             name.size_on_disk_fast(meta)
90         }
91         #[cfg(windows)]
92         fn size_on_disk(parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
93             parent.join(name).size_on_disk_fast(meta)
94         }
95 
96         for path in input.into_iter() {
97             let mut last_seen_eid = 0;
98             let device_id = crossdev::init(path.as_ref())?;
99             for (eid, entry) in walk_options
100                 .iter_from_path(path.as_ref())
101                 .into_iter()
102                 .enumerate()
103             {
104                 t.entries_traversed += 1;
105                 let mut data = EntryData::default();
106                 match entry {
107                     Ok(entry) => {
108                         data.name = if entry.depth < 1 {
109                             path.clone()
110                         } else {
111                             entry.file_name.into()
112                         };
113                         let file_size = match &entry.client_state {
114                             Some(Ok(ref m))
115                                 if !m.is_dir()
116                                     && (walk_options.count_hard_links || inodes.add(m))
117                                     && (walk_options.cross_filesystems
118                                         || crossdev::is_same_device(device_id, m)) =>
119                             {
120                                 if walk_options.apparent_size {
121                                     m.len()
122                                 } else {
123                                     size_on_disk(&entry.parent_path, &data.name, m).unwrap_or_else(
124                                         |_| {
125                                             t.io_errors += 1;
126                                             data.metadata_io_error = true;
127                                             0
128                                         },
129                                     )
130                                 }
131                             }
132                             Some(Ok(_)) => 0,
133                             Some(Err(_)) => {
134                                 t.io_errors += 1;
135                                 data.metadata_io_error = true;
136                                 0
137                             }
138                             None => 0, // a directory
139                         } as u128;
140 
141                         match (entry.depth, previous_depth) {
142                             (n, p) if n > p => {
143                                 sizes_per_depth_level.push(current_size_at_depth);
144                                 current_size_at_depth = file_size;
145                                 parent_node_idx = previous_node_idx;
146                             }
147                             (n, p) if n < p => {
148                                 for _ in n..p {
149                                     set_size_or_panic(
150                                         &mut t.tree,
151                                         parent_node_idx,
152                                         current_size_at_depth,
153                                     );
154                                     current_size_at_depth +=
155                                         pop_or_panic(&mut sizes_per_depth_level);
156                                     parent_node_idx = parent_or_panic(&mut t.tree, parent_node_idx);
157                                 }
158                                 current_size_at_depth += file_size;
159                                 set_size_or_panic(
160                                     &mut t.tree,
161                                     parent_node_idx,
162                                     current_size_at_depth,
163                                 );
164                             }
165                             _ => {
166                                 current_size_at_depth += file_size;
167                             }
168                         };
169 
170                         data.size = file_size;
171                         let entry_index = t.tree.add_node(data);
172 
173                         t.tree.add_edge(parent_node_idx, entry_index, ());
174                         previous_node_idx = entry_index;
175                         previous_depth = entry.depth;
176                     }
177                     Err(_) => {
178                         if previous_depth == 0 {
179                             data.name = path.clone();
180                             let entry_index = t.tree.add_node(data);
181                             t.tree.add_edge(parent_node_idx, entry_index, ());
182                         }
183 
184                         t.io_errors += 1
185                     }
186                 }
187 
188                 if eid != 0
189                     && eid % check_instant_every == 0
190                     && last_checked.elapsed() >= REFRESH_RATE
191                 {
192                     let now = Instant::now();
193                     let elapsed = (now - last_checked).as_millis() as f64;
194                     check_instant_every = (INITIAL_CHECK_INTERVAL as f64
195                         * ((eid - last_seen_eid) as f64 / INITIAL_CHECK_INTERVAL as f64)
196                         * (REFRESH_RATE.as_millis() as f64 / elapsed))
197                         .max(1.0) as usize;
198                     last_seen_eid = eid;
199                     last_checked = now;
200 
201                     if update(&mut t)? {
202                         return Ok(None);
203                     }
204                 }
205             }
206         }
207 
208         sizes_per_depth_level.push(current_size_at_depth);
209         current_size_at_depth = 0;
210         for _ in 0..previous_depth {
211             current_size_at_depth += pop_or_panic(&mut sizes_per_depth_level);
212             set_size_or_panic(&mut t.tree, parent_node_idx, current_size_at_depth);
213             parent_node_idx = parent_or_panic(&mut t.tree, parent_node_idx);
214         }
215         let root_size = t.recompute_root_size();
216         set_size_or_panic(&mut t.tree, t.root_index, root_size);
217         t.total_bytes = Some(root_size);
218 
219         Ok(Some(t))
220     }
221 
recompute_root_size(&self) -> u128222     fn recompute_root_size(&self) -> u128 {
223         self.tree
224             .neighbors_directed(self.root_index, Direction::Outgoing)
225             .map(|idx| get_size_or_panic(&self.tree, idx))
226             .sum()
227     }
228 }
229