1 use crate::{crossdev, get_size_or_panic, InodeFilter, WalkOptions}; 2 use anyhow::Result; 3 use filesize::PathExt; 4 use petgraph::{graph::NodeIndex, stable_graph::StableGraph, Directed, Direction}; 5 use std::{ 6 fs::Metadata, 7 io, 8 path::{Path, PathBuf}, 9 time::{Duration, Instant}, 10 }; 11 12 pub type TreeIndex = NodeIndex; 13 pub type Tree = StableGraph<EntryData, (), Directed>; 14 15 #[derive(Eq, PartialEq, Debug, Default, Clone)] 16 pub struct EntryData { 17 pub name: PathBuf, 18 /// The entry's size in bytes. If it's a directory, the size is the aggregated file size of all children 19 pub size: u128, 20 /// If set, the item meta-data could not be obtained 21 pub metadata_io_error: bool, 22 } 23 24 const REFRESH_RATE: Duration = Duration::from_millis(100); 25 26 /// The result of the previous filesystem traversal 27 #[derive(Default, Debug)] 28 pub struct Traversal { 29 /// A tree representing the entire filestem traversal 30 pub tree: Tree, 31 /// The top-level node of the tree. 32 pub root_index: TreeIndex, 33 /// Amount of files or directories we have seen during the filesystem traversal 34 pub entries_traversed: u64, 35 /// Total amount of IO errors encountered when traversing the filesystem 36 pub io_errors: u64, 37 /// Total amount of bytes seen during the traversal 38 pub total_bytes: Option<u128>, 39 } 40 41 impl Traversal { from_walk( mut walk_options: WalkOptions, input: Vec<PathBuf>, mut update: impl FnMut(&mut Traversal) -> Result<bool>, ) -> Result<Option<Traversal>>42 pub fn from_walk( 43 mut walk_options: WalkOptions, 44 input: Vec<PathBuf>, 45 mut update: impl FnMut(&mut Traversal) -> Result<bool>, 46 ) -> Result<Option<Traversal>> { 47 fn set_size_or_panic(tree: &mut Tree, node_idx: TreeIndex, current_size_at_depth: u128) { 48 tree.node_weight_mut(node_idx) 49 .expect("node for parent index we just retrieved") 50 .size = current_size_at_depth; 51 } 52 fn parent_or_panic(tree: &mut Tree, parent_node_idx: TreeIndex) -> TreeIndex { 53 tree.neighbors_directed(parent_node_idx, Direction::Incoming) 54 .next() 55 .expect("every node in the iteration has a parent") 56 } 57 fn pop_or_panic(v: &mut Vec<u128>) -> u128 { 58 v.pop().expect("sizes per level to be in sync with graph") 59 } 60 61 let mut t = { 62 let mut tree = Tree::new(); 63 let root_index = tree.add_node(EntryData::default()); 64 Traversal { 65 tree, 66 root_index, 67 ..Default::default() 68 } 69 }; 70 71 let (mut previous_node_idx, mut parent_node_idx) = (t.root_index, t.root_index); 72 let mut sizes_per_depth_level = Vec::new(); 73 let mut current_size_at_depth: u128 = 0; 74 let mut previous_depth = 0; 75 let mut inodes = InodeFilter::default(); 76 77 let mut last_checked = Instant::now(); 78 79 const INITIAL_CHECK_INTERVAL: usize = 500; 80 let mut check_instant_every = INITIAL_CHECK_INTERVAL; 81 if walk_options.threads == 0 { 82 // avoid using the global rayon pool, as it will keep a lot of threads alive after we are done. 83 // Also means that we will spin up a bunch of threads per root path, instead of reusing them. 84 walk_options.threads = num_cpus::get(); 85 } 86 87 #[cfg(not(windows))] 88 fn size_on_disk(_parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> { 89 name.size_on_disk_fast(meta) 90 } 91 #[cfg(windows)] 92 fn size_on_disk(parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> { 93 parent.join(name).size_on_disk_fast(meta) 94 } 95 96 for path in input.into_iter() { 97 let mut last_seen_eid = 0; 98 let device_id = crossdev::init(path.as_ref())?; 99 for (eid, entry) in walk_options 100 .iter_from_path(path.as_ref()) 101 .into_iter() 102 .enumerate() 103 { 104 t.entries_traversed += 1; 105 let mut data = EntryData::default(); 106 match entry { 107 Ok(entry) => { 108 data.name = if entry.depth < 1 { 109 path.clone() 110 } else { 111 entry.file_name.into() 112 }; 113 let file_size = match &entry.client_state { 114 Some(Ok(ref m)) 115 if !m.is_dir() 116 && (walk_options.count_hard_links || inodes.add(m)) 117 && (walk_options.cross_filesystems 118 || crossdev::is_same_device(device_id, m)) => 119 { 120 if walk_options.apparent_size { 121 m.len() 122 } else { 123 size_on_disk(&entry.parent_path, &data.name, m).unwrap_or_else( 124 |_| { 125 t.io_errors += 1; 126 data.metadata_io_error = true; 127 0 128 }, 129 ) 130 } 131 } 132 Some(Ok(_)) => 0, 133 Some(Err(_)) => { 134 t.io_errors += 1; 135 data.metadata_io_error = true; 136 0 137 } 138 None => 0, // a directory 139 } as u128; 140 141 match (entry.depth, previous_depth) { 142 (n, p) if n > p => { 143 sizes_per_depth_level.push(current_size_at_depth); 144 current_size_at_depth = file_size; 145 parent_node_idx = previous_node_idx; 146 } 147 (n, p) if n < p => { 148 for _ in n..p { 149 set_size_or_panic( 150 &mut t.tree, 151 parent_node_idx, 152 current_size_at_depth, 153 ); 154 current_size_at_depth += 155 pop_or_panic(&mut sizes_per_depth_level); 156 parent_node_idx = parent_or_panic(&mut t.tree, parent_node_idx); 157 } 158 current_size_at_depth += file_size; 159 set_size_or_panic( 160 &mut t.tree, 161 parent_node_idx, 162 current_size_at_depth, 163 ); 164 } 165 _ => { 166 current_size_at_depth += file_size; 167 } 168 }; 169 170 data.size = file_size; 171 let entry_index = t.tree.add_node(data); 172 173 t.tree.add_edge(parent_node_idx, entry_index, ()); 174 previous_node_idx = entry_index; 175 previous_depth = entry.depth; 176 } 177 Err(_) => { 178 if previous_depth == 0 { 179 data.name = path.clone(); 180 let entry_index = t.tree.add_node(data); 181 t.tree.add_edge(parent_node_idx, entry_index, ()); 182 } 183 184 t.io_errors += 1 185 } 186 } 187 188 if eid != 0 189 && eid % check_instant_every == 0 190 && last_checked.elapsed() >= REFRESH_RATE 191 { 192 let now = Instant::now(); 193 let elapsed = (now - last_checked).as_millis() as f64; 194 check_instant_every = (INITIAL_CHECK_INTERVAL as f64 195 * ((eid - last_seen_eid) as f64 / INITIAL_CHECK_INTERVAL as f64) 196 * (REFRESH_RATE.as_millis() as f64 / elapsed)) 197 .max(1.0) as usize; 198 last_seen_eid = eid; 199 last_checked = now; 200 201 if update(&mut t)? { 202 return Ok(None); 203 } 204 } 205 } 206 } 207 208 sizes_per_depth_level.push(current_size_at_depth); 209 current_size_at_depth = 0; 210 for _ in 0..previous_depth { 211 current_size_at_depth += pop_or_panic(&mut sizes_per_depth_level); 212 set_size_or_panic(&mut t.tree, parent_node_idx, current_size_at_depth); 213 parent_node_idx = parent_or_panic(&mut t.tree, parent_node_idx); 214 } 215 let root_size = t.recompute_root_size(); 216 set_size_or_panic(&mut t.tree, t.root_index, root_size); 217 t.total_bytes = Some(root_size); 218 219 Ok(Some(t)) 220 } 221 recompute_root_size(&self) -> u128222 fn recompute_root_size(&self) -> u128 { 223 self.tree 224 .neighbors_directed(self.root_index, Direction::Outgoing) 225 .map(|idx| get_size_or_panic(&self.tree, idx)) 226 .sum() 227 } 228 } 229