1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 //! Memory profiling functions. 6 7 use ipc_channel::ipc::{self, IpcReceiver}; 8 use ipc_channel::router::ROUTER; 9 use profile_traits::mem::{ProfilerChan, ProfilerMsg, ReportKind, Reporter, ReporterRequest}; 10 use profile_traits::mem::ReportsChan; 11 use std::borrow::ToOwned; 12 use std::cmp::Ordering; 13 use std::collections::HashMap; 14 use std::thread; 15 use std::time::Instant; 16 use time::duration_from_seconds; 17 18 pub struct Profiler { 19 /// The port through which messages are received. 20 pub port: IpcReceiver<ProfilerMsg>, 21 22 /// Registered memory reporters. 23 reporters: HashMap<String, Reporter>, 24 25 /// Instant at which this profiler was created. 26 created: Instant, 27 } 28 29 const JEMALLOC_HEAP_ALLOCATED_STR: &'static str = "jemalloc-heap-allocated"; 30 const SYSTEM_HEAP_ALLOCATED_STR: &'static str = "system-heap-allocated"; 31 32 impl Profiler { create(period: Option<f64>) -> ProfilerChan33 pub fn create(period: Option<f64>) -> ProfilerChan { 34 let (chan, port) = ipc::channel().unwrap(); 35 36 // Create the timer thread if a period was provided. 37 if let Some(period) = period { 38 let chan = chan.clone(); 39 thread::Builder::new().name("Memory profiler timer".to_owned()).spawn(move || { 40 loop { 41 thread::sleep(duration_from_seconds(period)); 42 if chan.send(ProfilerMsg::Print).is_err() { 43 break; 44 } 45 } 46 }).expect("Thread spawning failed"); 47 } 48 49 // Always spawn the memory profiler. If there is no timer thread it won't receive regular 50 // `Print` events, but it will still receive the other events. 51 thread::Builder::new().name("Memory profiler".to_owned()).spawn(move || { 52 let mut mem_profiler = Profiler::new(port); 53 mem_profiler.start(); 54 }).expect("Thread spawning failed"); 55 56 let mem_profiler_chan = ProfilerChan(chan); 57 58 // Register the system memory reporter, which will run on its own thread. It never needs to 59 // be unregistered, because as long as the memory profiler is running the system memory 60 // reporter can make measurements. 61 let (system_reporter_sender, system_reporter_receiver) = ipc::channel().unwrap(); 62 ROUTER.add_route(system_reporter_receiver.to_opaque(), Box::new(|message| { 63 let request: ReporterRequest = message.to().unwrap(); 64 system_reporter::collect_reports(request) 65 })); 66 mem_profiler_chan.send(ProfilerMsg::RegisterReporter("system".to_owned(), 67 Reporter(system_reporter_sender))); 68 69 mem_profiler_chan 70 } 71 new(port: IpcReceiver<ProfilerMsg>) -> Profiler72 pub fn new(port: IpcReceiver<ProfilerMsg>) -> Profiler { 73 Profiler { 74 port: port, 75 reporters: HashMap::new(), 76 created: Instant::now(), 77 } 78 } 79 start(&mut self)80 pub fn start(&mut self) { 81 while let Ok(msg) = self.port.recv() { 82 if !self.handle_msg(msg) { 83 break 84 } 85 } 86 } 87 handle_msg(&mut self, msg: ProfilerMsg) -> bool88 fn handle_msg(&mut self, msg: ProfilerMsg) -> bool { 89 match msg { 90 ProfilerMsg::RegisterReporter(name, reporter) => { 91 // Panic if it has already been registered. 92 let name_clone = name.clone(); 93 match self.reporters.insert(name, reporter) { 94 None => true, 95 Some(_) => panic!(format!("RegisterReporter: '{}' name is already in use", 96 name_clone)), 97 } 98 }, 99 100 ProfilerMsg::UnregisterReporter(name) => { 101 // Panic if it hasn't previously been registered. 102 match self.reporters.remove(&name) { 103 Some(_) => true, 104 None => 105 panic!(format!("UnregisterReporter: '{}' name is unknown", &name)), 106 } 107 }, 108 109 ProfilerMsg::Print => { 110 self.handle_print_msg(); 111 true 112 }, 113 114 ProfilerMsg::Exit => false 115 } 116 } 117 handle_print_msg(&self)118 fn handle_print_msg(&self) { 119 let elapsed = self.created.elapsed(); 120 println!("Begin memory reports {}", elapsed.as_secs()); 121 println!("|"); 122 123 // Collect reports from memory reporters. 124 // 125 // This serializes the report-gathering. It might be worth creating a new scoped thread for 126 // each reporter once we have enough of them. 127 // 128 // If anything goes wrong with a reporter, we just skip it. 129 // 130 // We also track the total memory reported on the jemalloc heap and the system heap, and 131 // use that to compute the special "jemalloc-heap-unclassified" and 132 // "system-heap-unclassified" values. 133 134 let mut forest = ReportsForest::new(); 135 136 let mut jemalloc_heap_reported_size = 0; 137 let mut system_heap_reported_size = 0; 138 139 let mut jemalloc_heap_allocated_size: Option<usize> = None; 140 let mut system_heap_allocated_size: Option<usize> = None; 141 142 for reporter in self.reporters.values() { 143 let (chan, port) = ipc::channel().unwrap(); 144 reporter.collect_reports(ReportsChan(chan)); 145 if let Ok(mut reports) = port.recv() { 146 for report in &mut reports { 147 // Add "explicit" to the start of the path, when appropriate. 148 match report.kind { 149 ReportKind::ExplicitJemallocHeapSize | 150 ReportKind::ExplicitSystemHeapSize | 151 ReportKind::ExplicitNonHeapSize | 152 ReportKind::ExplicitUnknownLocationSize => 153 report.path.insert(0, String::from("explicit")), 154 ReportKind::NonExplicitSize => {}, 155 } 156 157 // Update the reported fractions of the heaps, when appropriate. 158 match report.kind { 159 ReportKind::ExplicitJemallocHeapSize => 160 jemalloc_heap_reported_size += report.size, 161 ReportKind::ExplicitSystemHeapSize => 162 system_heap_reported_size += report.size, 163 _ => {}, 164 } 165 166 // Record total size of the heaps, when we see them. 167 if report.path.len() == 1 { 168 if report.path[0] == JEMALLOC_HEAP_ALLOCATED_STR { 169 assert!(jemalloc_heap_allocated_size.is_none()); 170 jemalloc_heap_allocated_size = Some(report.size); 171 } else if report.path[0] == SYSTEM_HEAP_ALLOCATED_STR { 172 assert!(system_heap_allocated_size.is_none()); 173 system_heap_allocated_size = Some(report.size); 174 } 175 } 176 177 // Insert the report. 178 forest.insert(&report.path, report.size); 179 } 180 } 181 } 182 183 // Compute and insert the heap-unclassified values. 184 if let Some(jemalloc_heap_allocated_size) = jemalloc_heap_allocated_size { 185 forest.insert(&path!["explicit", "jemalloc-heap-unclassified"], 186 jemalloc_heap_allocated_size - jemalloc_heap_reported_size); 187 } 188 if let Some(system_heap_allocated_size) = system_heap_allocated_size { 189 forest.insert(&path!["explicit", "system-heap-unclassified"], 190 system_heap_allocated_size - system_heap_reported_size); 191 } 192 193 forest.print(); 194 195 println!("|"); 196 println!("End memory reports"); 197 println!(""); 198 } 199 } 200 201 /// A collection of one or more reports with the same initial path segment. A ReportsTree 202 /// containing a single node is described as "degenerate". 203 struct ReportsTree { 204 /// For leaf nodes, this is the sum of the sizes of all reports that mapped to this location. 205 /// For interior nodes, this is the sum of the sizes of all its child nodes. 206 size: usize, 207 208 /// For leaf nodes, this is the count of all reports that mapped to this location. 209 /// For interor nodes, this is always zero. 210 count: u32, 211 212 /// The segment from the report path that maps to this node. 213 path_seg: String, 214 215 /// Child nodes. 216 children: Vec<ReportsTree>, 217 } 218 219 impl ReportsTree { new(path_seg: String) -> ReportsTree220 fn new(path_seg: String) -> ReportsTree { 221 ReportsTree { 222 size: 0, 223 count: 0, 224 path_seg: path_seg, 225 children: vec![] 226 } 227 } 228 229 // Searches the tree's children for a path_seg match, and returns the index if there is a 230 // match. find_child(&self, path_seg: &str) -> Option<usize>231 fn find_child(&self, path_seg: &str) -> Option<usize> { 232 for (i, child) in self.children.iter().enumerate() { 233 if child.path_seg == *path_seg { 234 return Some(i); 235 } 236 } 237 None 238 } 239 240 // Insert the path and size into the tree, adding any nodes as necessary. insert(&mut self, path: &[String], size: usize)241 fn insert(&mut self, path: &[String], size: usize) { 242 let mut t: &mut ReportsTree = self; 243 for path_seg in path { 244 let i = match t.find_child(&path_seg) { 245 Some(i) => i, 246 None => { 247 let new_t = ReportsTree::new(path_seg.clone()); 248 t.children.push(new_t); 249 t.children.len() - 1 250 }, 251 }; 252 let tmp = t; // this temporary is needed to satisfy the borrow checker 253 t = &mut tmp.children[i]; 254 } 255 256 t.size += size; 257 t.count += 1; 258 } 259 260 // Fill in sizes for interior nodes and sort sub-trees accordingly. Should only be done once 261 // all the reports have been inserted. compute_interior_node_sizes_and_sort(&mut self) -> usize262 fn compute_interior_node_sizes_and_sort(&mut self) -> usize { 263 if !self.children.is_empty() { 264 // Interior node. Derive its size from its children. 265 if self.size != 0 { 266 // This will occur if e.g. we have paths ["a", "b"] and ["a", "b", "c"]. 267 panic!("one report's path is a sub-path of another report's path"); 268 } 269 for child in &mut self.children { 270 self.size += child.compute_interior_node_sizes_and_sort(); 271 } 272 // Now that child sizes have been computed, we can sort the children. 273 self.children.sort_by(|t1, t2| t2.size.cmp(&t1.size)); 274 } 275 self.size 276 } 277 print(&self, depth: i32)278 fn print(&self, depth: i32) { 279 if !self.children.is_empty() { 280 assert_eq!(self.count, 0); 281 } 282 283 let mut indent_str = String::new(); 284 for _ in 0..depth { 285 indent_str.push_str(" "); 286 } 287 288 let mebi = 1024f64 * 1024f64; 289 let count_str = if self.count > 1 { format!(" [{}]", self.count) } else { "".to_owned() }; 290 println!("|{}{:8.2} MiB -- {}{}", 291 indent_str, (self.size as f64) / mebi, self.path_seg, count_str); 292 293 for child in &self.children { 294 child.print(depth + 1); 295 } 296 } 297 } 298 299 /// A collection of ReportsTrees. It represents the data from multiple memory reports in a form 300 /// that's good to print. 301 struct ReportsForest { 302 trees: HashMap<String, ReportsTree>, 303 } 304 305 impl ReportsForest { new() -> ReportsForest306 fn new() -> ReportsForest { 307 ReportsForest { 308 trees: HashMap::new(), 309 } 310 } 311 312 // Insert the path and size into the forest, adding any trees and nodes as necessary. insert(&mut self, path: &[String], size: usize)313 fn insert(&mut self, path: &[String], size: usize) { 314 let (head, tail) = path.split_first().unwrap(); 315 // Get the right tree, creating it if necessary. 316 if !self.trees.contains_key(head) { 317 self.trees.insert(head.clone(), ReportsTree::new(head.clone())); 318 } 319 let t = self.trees.get_mut(head).unwrap(); 320 321 // Use tail because the 0th path segment was used to find the right tree in the forest. 322 t.insert(tail, size); 323 } 324 print(&mut self)325 fn print(&mut self) { 326 // Fill in sizes of interior nodes, and recursively sort the sub-trees. 327 for (_, tree) in &mut self.trees { 328 tree.compute_interior_node_sizes_and_sort(); 329 } 330 331 // Put the trees into a sorted vector. Primary sort: degenerate trees (those containing a 332 // single node) come after non-degenerate trees. Secondary sort: alphabetical order of the 333 // root node's path_seg. 334 let mut v = vec![]; 335 for (_, tree) in &self.trees { 336 v.push(tree); 337 } 338 v.sort_by(|a, b| { 339 if a.children.is_empty() && !b.children.is_empty() { 340 Ordering::Greater 341 } else if !a.children.is_empty() && b.children.is_empty() { 342 Ordering::Less 343 } else { 344 a.path_seg.cmp(&b.path_seg) 345 } 346 }); 347 348 // Print the forest. 349 for tree in &v { 350 tree.print(0); 351 // Print a blank line after non-degenerate trees. 352 if !tree.children.is_empty() { 353 println!("|"); 354 } 355 } 356 } 357 } 358 359 //--------------------------------------------------------------------------- 360 361 mod system_reporter { 362 #[cfg(all(feature = "unstable", not(target_os = "windows")))] 363 use libc::{c_void, size_t}; 364 #[cfg(target_os = "linux")] 365 use libc::c_int; 366 use profile_traits::mem::{Report, ReportKind, ReporterRequest}; 367 #[cfg(all(feature = "unstable", not(target_os = "windows")))] 368 use std::ffi::CString; 369 #[cfg(all(feature = "unstable", not(target_os = "windows")))] 370 use std::mem::size_of; 371 #[cfg(all(feature = "unstable", not(target_os = "windows")))] 372 use std::ptr::null_mut; 373 use super::{JEMALLOC_HEAP_ALLOCATED_STR, SYSTEM_HEAP_ALLOCATED_STR}; 374 #[cfg(target_os = "macos")] 375 use task_info::task_basic_info::{virtual_size, resident_size}; 376 377 /// Collects global measurements from the OS and heap allocators. collect_reports(request: ReporterRequest)378 pub fn collect_reports(request: ReporterRequest) { 379 let mut reports = vec![]; 380 { 381 let mut report = |path, size| { 382 if let Some(size) = size { 383 reports.push(Report { 384 path: path, 385 kind: ReportKind::NonExplicitSize, 386 size: size, 387 }); 388 } 389 }; 390 391 // Virtual and physical memory usage, as reported by the OS. 392 report(path!["vsize"], vsize()); 393 report(path!["resident"], resident()); 394 395 // Memory segments, as reported by the OS. 396 for seg in resident_segments() { 397 report(path!["resident-according-to-smaps", seg.0], Some(seg.1)); 398 } 399 400 // Total number of bytes allocated by the application on the system 401 // heap. 402 report(path![SYSTEM_HEAP_ALLOCATED_STR], system_heap_allocated()); 403 404 // The descriptions of the following jemalloc measurements are taken 405 // directly from the jemalloc documentation. 406 407 // "Total number of bytes allocated by the application." 408 report(path![JEMALLOC_HEAP_ALLOCATED_STR], jemalloc_stat("stats.allocated")); 409 410 // "Total number of bytes in active pages allocated by the application. 411 // This is a multiple of the page size, and greater than or equal to 412 // |stats.allocated|." 413 report(path!["jemalloc-heap-active"], jemalloc_stat("stats.active")); 414 415 // "Total number of bytes in chunks mapped on behalf of the application. 416 // This is a multiple of the chunk size, and is at least as large as 417 // |stats.active|. This does not include inactive chunks." 418 report(path!["jemalloc-heap-mapped"], jemalloc_stat("stats.mapped")); 419 } 420 421 request.reports_channel.send(reports); 422 } 423 424 #[cfg(target_os = "linux")] 425 extern { mallinfo() -> struct_mallinfo426 fn mallinfo() -> struct_mallinfo; 427 } 428 429 #[cfg(target_os = "linux")] 430 #[repr(C)] 431 pub struct struct_mallinfo { 432 arena: c_int, 433 ordblks: c_int, 434 smblks: c_int, 435 hblks: c_int, 436 hblkhd: c_int, 437 usmblks: c_int, 438 fsmblks: c_int, 439 uordblks: c_int, 440 fordblks: c_int, 441 keepcost: c_int, 442 } 443 444 #[cfg(target_os = "linux")] system_heap_allocated() -> Option<usize>445 fn system_heap_allocated() -> Option<usize> { 446 let info: struct_mallinfo = unsafe { mallinfo() }; 447 448 // The documentation in the glibc man page makes it sound like |uordblks| would suffice, 449 // but that only gets the small allocations that are put in the brk heap. We need |hblkhd| 450 // as well to get the larger allocations that are mmapped. 451 // 452 // These fields are unfortunately |int| and so can overflow (becoming negative) if memory 453 // usage gets high enough. So don't report anything in that case. In the non-overflow case 454 // we cast the two values to usize before adding them to make sure the sum also doesn't 455 // overflow. 456 if info.hblkhd < 0 || info.uordblks < 0 { 457 None 458 } else { 459 Some(info.hblkhd as usize + info.uordblks as usize) 460 } 461 } 462 463 #[cfg(not(target_os = "linux"))] system_heap_allocated() -> Option<usize>464 fn system_heap_allocated() -> Option<usize> { 465 None 466 } 467 468 #[cfg(all(feature = "unstable", not(target_os = "windows")))] 469 use jemalloc_sys::mallctl; 470 471 #[cfg(all(feature = "unstable", not(target_os = "windows")))] jemalloc_stat(value_name: &str) -> Option<usize>472 fn jemalloc_stat(value_name: &str) -> Option<usize> { 473 // Before we request the measurement of interest, we first send an "epoch" 474 // request. Without that jemalloc gives cached statistics(!) which can be 475 // highly inaccurate. 476 let epoch_name = "epoch"; 477 let epoch_c_name = CString::new(epoch_name).unwrap(); 478 let mut epoch: u64 = 0; 479 let epoch_ptr = &mut epoch as *mut _ as *mut c_void; 480 let mut epoch_len = size_of::<u64>() as size_t; 481 482 let value_c_name = CString::new(value_name).unwrap(); 483 let mut value: size_t = 0; 484 let value_ptr = &mut value as *mut _ as *mut c_void; 485 let mut value_len = size_of::<size_t>() as size_t; 486 487 // Using the same values for the `old` and `new` parameters is enough 488 // to get the statistics updated. 489 let rv = unsafe { 490 mallctl(epoch_c_name.as_ptr(), epoch_ptr, &mut epoch_len, epoch_ptr, 491 epoch_len) 492 }; 493 if rv != 0 { 494 return None; 495 } 496 497 let rv = unsafe { 498 mallctl(value_c_name.as_ptr(), value_ptr, &mut value_len, null_mut(), 0) 499 }; 500 if rv != 0 { 501 return None; 502 } 503 504 Some(value as usize) 505 } 506 507 #[cfg(any(target_os = "windows", not(feature = "unstable")))] jemalloc_stat(_value_name: &str) -> Option<usize>508 fn jemalloc_stat(_value_name: &str) -> Option<usize> { 509 None 510 } 511 512 #[cfg(target_os = "linux")] page_size() -> usize513 fn page_size() -> usize { 514 unsafe { 515 ::libc::sysconf(::libc::_SC_PAGESIZE) as usize 516 } 517 } 518 519 #[cfg(target_os = "linux")] proc_self_statm_field(field: usize) -> Option<usize>520 fn proc_self_statm_field(field: usize) -> Option<usize> { 521 use std::fs::File; 522 use std::io::Read; 523 524 let mut f = File::open("/proc/self/statm").ok()?; 525 let mut contents = String::new(); 526 f.read_to_string(&mut contents).ok()?; 527 let s = contents.split_whitespace().nth(field)?; 528 let npages = s.parse::<usize>().ok()?; 529 Some(npages * page_size()) 530 } 531 532 #[cfg(target_os = "linux")] vsize() -> Option<usize>533 fn vsize() -> Option<usize> { 534 proc_self_statm_field(0) 535 } 536 537 #[cfg(target_os = "linux")] resident() -> Option<usize>538 fn resident() -> Option<usize> { 539 proc_self_statm_field(1) 540 } 541 542 #[cfg(target_os = "macos")] vsize() -> Option<usize>543 fn vsize() -> Option<usize> { 544 virtual_size() 545 } 546 547 #[cfg(target_os = "macos")] resident() -> Option<usize>548 fn resident() -> Option<usize> { 549 resident_size() 550 } 551 552 #[cfg(not(any(target_os = "linux", target_os = "macos")))] vsize() -> Option<usize>553 fn vsize() -> Option<usize> { 554 None 555 } 556 557 #[cfg(not(any(target_os = "linux", target_os = "macos")))] resident() -> Option<usize>558 fn resident() -> Option<usize> { 559 None 560 } 561 562 #[cfg(target_os = "linux")] resident_segments() -> Vec<(String, usize)>563 fn resident_segments() -> Vec<(String, usize)> { 564 use regex::Regex; 565 use std::collections::HashMap; 566 use std::collections::hash_map::Entry; 567 use std::fs::File; 568 use std::io::{BufReader, BufRead}; 569 570 // The first line of an entry in /proc/<pid>/smaps looks just like an entry 571 // in /proc/<pid>/maps: 572 // 573 // address perms offset dev inode pathname 574 // 02366000-025d8000 rw-p 00000000 00:00 0 [heap] 575 // 576 // Each of the following lines contains a key and a value, separated 577 // by ": ", where the key does not contain either of those characters. 578 // For example: 579 // 580 // Rss: 132 kB 581 582 let f = match File::open("/proc/self/smaps") { 583 Ok(f) => BufReader::new(f), 584 Err(_) => return vec![], 585 }; 586 587 let seg_re = Regex::new( 588 r"^[:xdigit:]+-[:xdigit:]+ (....) [:xdigit:]+ [:xdigit:]+:[:xdigit:]+ \d+ +(.*)").unwrap(); 589 let rss_re = Regex::new(r"^Rss: +(\d+) kB").unwrap(); 590 591 // We record each segment's resident size. 592 let mut seg_map: HashMap<String, usize> = HashMap::new(); 593 594 #[derive(PartialEq)] 595 enum LookingFor { Segment, Rss } 596 let mut looking_for = LookingFor::Segment; 597 598 let mut curr_seg_name = String::new(); 599 600 // Parse the file. 601 for line in f.lines() { 602 let line = match line { 603 Ok(line) => line, 604 Err(_) => continue, 605 }; 606 if looking_for == LookingFor::Segment { 607 // Look for a segment info line. 608 let cap = match seg_re.captures(&line) { 609 Some(cap) => cap, 610 None => continue, 611 }; 612 let perms = cap.get(1).unwrap().as_str(); 613 let pathname = cap.get(2).unwrap().as_str(); 614 615 // Construct the segment name from its pathname and permissions. 616 curr_seg_name.clear(); 617 if pathname == "" || pathname.starts_with("[stack:") { 618 // Anonymous memory. Entries marked with "[stack:nnn]" 619 // look like thread stacks but they may include other 620 // anonymous mappings, so we can't trust them and just 621 // treat them as entirely anonymous. 622 curr_seg_name.push_str("anonymous"); 623 } else { 624 curr_seg_name.push_str(pathname); 625 } 626 curr_seg_name.push_str(" ("); 627 curr_seg_name.push_str(perms); 628 curr_seg_name.push_str(")"); 629 630 looking_for = LookingFor::Rss; 631 } else { 632 // Look for an "Rss:" line. 633 let cap = match rss_re.captures(&line) { 634 Some(cap) => cap, 635 None => continue, 636 }; 637 let rss = cap.get(1).unwrap().as_str().parse::<usize>().unwrap() * 1024; 638 639 if rss > 0 { 640 // Aggregate small segments into "other". 641 let seg_name = if rss < 512 * 1024 { 642 "other".to_owned() 643 } else { 644 curr_seg_name.clone() 645 }; 646 match seg_map.entry(seg_name) { 647 Entry::Vacant(entry) => { entry.insert(rss); }, 648 Entry::Occupied(mut entry) => *entry.get_mut() += rss, 649 } 650 } 651 652 looking_for = LookingFor::Segment; 653 } 654 } 655 656 // Note that the sum of all these segments' RSS values differs from the "resident" 657 // measurement obtained via /proc/<pid>/statm in resident(). It's unclear why this 658 // difference occurs; for some processes the measurements match, but for Servo they do not. 659 seg_map.into_iter().collect() 660 } 661 662 #[cfg(not(target_os = "linux"))] resident_segments() -> Vec<(String, usize)>663 fn resident_segments() -> Vec<(String, usize)> { 664 vec![] 665 } 666 } 667