1 //! Simple hierarchical profiler
2 use std::{
3     cell::RefCell,
4     collections::{BTreeMap, HashSet},
5     env, fmt,
6     io::{stderr, Write},
7     sync::{
8         atomic::{AtomicBool, Ordering},
9         RwLock,
10     },
11     time::{Duration, Instant},
12 };
13 
14 use once_cell::sync::Lazy;
15 
16 use crate::tree::{Idx, Tree};
17 
18 /// Filtering syntax
19 /// env RA_PROFILE=*             // dump everything
20 /// env RA_PROFILE=foo|bar|baz   // enabled only selected entries
21 /// env RA_PROFILE=*@3>10        // dump everything, up to depth 3, if it takes more than 10 ms
init()22 pub fn init() {
23     countme::enable(env::var("RA_COUNT").is_ok());
24     let spec = env::var("RA_PROFILE").unwrap_or_default();
25     init_from(&spec);
26 }
27 
init_from(spec: &str)28 pub fn init_from(spec: &str) {
29     let filter = if spec.is_empty() { Filter::disabled() } else { Filter::from_spec(spec) };
30     filter.install();
31 }
32 
33 type Label = &'static str;
34 
35 /// This function starts a profiling scope in the current execution stack with a given description.
36 /// It returns a `Profile` struct that measures elapsed time between this method invocation and `Profile` struct drop.
37 /// It supports nested profiling scopes in case when this function is invoked multiple times at the execution stack.
38 /// In this case the profiling information will be nested at the output.
39 /// Profiling information is being printed in the stderr.
40 ///
41 /// # Example
42 /// ```
43 /// profile::init_from("profile1|profile2@2");
44 /// profiling_function1();
45 ///
46 /// fn profiling_function1() {
47 ///     let _p = profile::span("profile1");
48 ///     profiling_function2();
49 /// }
50 ///
51 /// fn profiling_function2() {
52 ///     let _p = profile::span("profile2");
53 /// }
54 /// ```
55 /// This will print in the stderr the following:
56 /// ```text
57 ///  0ms - profile
58 ///      0ms - profile2
59 /// ```
60 #[inline]
span(label: Label) -> ProfileSpan61 pub fn span(label: Label) -> ProfileSpan {
62     debug_assert!(!label.is_empty());
63 
64     let enabled = PROFILING_ENABLED.load(Ordering::Relaxed);
65     if enabled && with_profile_stack(|stack| stack.push(label)) {
66         ProfileSpan(Some(ProfilerImpl { label, detail: None }))
67     } else {
68         ProfileSpan(None)
69     }
70 }
71 
72 #[inline]
heartbeat_span() -> HeartbeatSpan73 pub fn heartbeat_span() -> HeartbeatSpan {
74     let enabled = PROFILING_ENABLED.load(Ordering::Relaxed);
75     HeartbeatSpan::new(enabled)
76 }
77 
78 #[inline]
heartbeat()79 pub fn heartbeat() {
80     let enabled = PROFILING_ENABLED.load(Ordering::Relaxed);
81     if enabled {
82         with_profile_stack(|it| it.heartbeat(1));
83     }
84 }
85 
86 pub struct ProfileSpan(Option<ProfilerImpl>);
87 
88 struct ProfilerImpl {
89     label: Label,
90     detail: Option<String>,
91 }
92 
93 impl ProfileSpan {
detail(mut self, detail: impl FnOnce() -> String) -> ProfileSpan94     pub fn detail(mut self, detail: impl FnOnce() -> String) -> ProfileSpan {
95         if let Some(profiler) = &mut self.0 {
96             profiler.detail = Some(detail());
97         }
98         self
99     }
100 }
101 
102 impl Drop for ProfilerImpl {
103     #[inline]
drop(&mut self)104     fn drop(&mut self) {
105         with_profile_stack(|it| it.pop(self.label, self.detail.take()));
106     }
107 }
108 
109 pub struct HeartbeatSpan {
110     enabled: bool,
111 }
112 
113 impl HeartbeatSpan {
114     #[inline]
new(enabled: bool) -> Self115     pub fn new(enabled: bool) -> Self {
116         if enabled {
117             with_profile_stack(|it| it.heartbeats(true));
118         }
119         Self { enabled }
120     }
121 }
122 
123 impl Drop for HeartbeatSpan {
drop(&mut self)124     fn drop(&mut self) {
125         if self.enabled {
126             with_profile_stack(|it| it.heartbeats(false));
127         }
128     }
129 }
130 
131 static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false);
132 static FILTER: Lazy<RwLock<Filter>> = Lazy::new(Default::default);
133 
with_profile_stack<T>(f: impl FnOnce(&mut ProfileStack) -> T) -> T134 fn with_profile_stack<T>(f: impl FnOnce(&mut ProfileStack) -> T) -> T {
135     thread_local!(static STACK: RefCell<ProfileStack> = RefCell::new(ProfileStack::new()));
136     STACK.with(|it| f(&mut *it.borrow_mut()))
137 }
138 
139 #[derive(Default, Clone, Debug)]
140 struct Filter {
141     depth: usize,
142     allowed: HashSet<String>,
143     longer_than: Duration,
144     heartbeat_longer_than: Duration,
145     version: usize,
146 }
147 
148 impl Filter {
disabled() -> Filter149     fn disabled() -> Filter {
150         Filter::default()
151     }
152 
from_spec(mut spec: &str) -> Filter153     fn from_spec(mut spec: &str) -> Filter {
154         let longer_than = if let Some(idx) = spec.rfind('>') {
155             let longer_than = spec[idx + 1..].parse().expect("invalid profile longer_than");
156             spec = &spec[..idx];
157             Duration::from_millis(longer_than)
158         } else {
159             Duration::new(0, 0)
160         };
161         let heartbeat_longer_than = longer_than;
162 
163         let depth = if let Some(idx) = spec.rfind('@') {
164             let depth: usize = spec[idx + 1..].parse().expect("invalid profile depth");
165             spec = &spec[..idx];
166             depth
167         } else {
168             999
169         };
170         let allowed =
171             if spec == "*" { HashSet::new() } else { spec.split('|').map(String::from).collect() };
172         Filter { depth, allowed, longer_than, heartbeat_longer_than, version: 0 }
173     }
174 
install(mut self)175     fn install(mut self) {
176         PROFILING_ENABLED.store(self.depth > 0, Ordering::SeqCst);
177         let mut old = FILTER.write().unwrap();
178         self.version = old.version + 1;
179         *old = self;
180     }
181 }
182 
183 struct ProfileStack {
184     frames: Vec<Frame>,
185     filter: Filter,
186     messages: Tree<Message>,
187     heartbeats: bool,
188 }
189 
190 struct Frame {
191     t: Instant,
192     heartbeats: u32,
193 }
194 
195 #[derive(Default)]
196 struct Message {
197     duration: Duration,
198     label: Label,
199     detail: Option<String>,
200 }
201 
202 impl ProfileStack {
new() -> ProfileStack203     fn new() -> ProfileStack {
204         ProfileStack {
205             frames: Vec::new(),
206             messages: Tree::default(),
207             filter: Default::default(),
208             heartbeats: false,
209         }
210     }
211 
push(&mut self, label: Label) -> bool212     fn push(&mut self, label: Label) -> bool {
213         if self.frames.is_empty() {
214             if let Ok(f) = FILTER.try_read() {
215                 if f.version > self.filter.version {
216                     self.filter = f.clone();
217                 }
218             };
219         }
220         if self.frames.len() > self.filter.depth {
221             return false;
222         }
223         let allowed = &self.filter.allowed;
224         if self.frames.is_empty() && !allowed.is_empty() && !allowed.contains(label) {
225             return false;
226         }
227 
228         self.frames.push(Frame { t: Instant::now(), heartbeats: 0 });
229         self.messages.start();
230         true
231     }
232 
pop(&mut self, label: Label, detail: Option<String>)233     fn pop(&mut self, label: Label, detail: Option<String>) {
234         let frame = self.frames.pop().unwrap();
235         let duration = frame.t.elapsed();
236 
237         if self.heartbeats {
238             self.heartbeat(frame.heartbeats);
239             let avg_span = duration / (frame.heartbeats + 1);
240             if avg_span > self.filter.heartbeat_longer_than {
241                 eprintln!("Too few heartbeats {} ({}/{:?})?", label, frame.heartbeats, duration);
242             }
243         }
244 
245         self.messages.finish(Message { duration, label, detail });
246         if self.frames.is_empty() {
247             let longer_than = self.filter.longer_than;
248             // Convert to millis for comparison to avoid problems with rounding
249             // (otherwise we could print `0ms` despite user's `>0` filter when
250             // `duration` is just a few nanos).
251             if duration.as_millis() > longer_than.as_millis() {
252                 if let Some(root) = self.messages.root() {
253                     print(&self.messages, root, 0, longer_than, &mut stderr().lock());
254                 }
255             }
256             self.messages.clear();
257         }
258     }
259 
heartbeats(&mut self, yes: bool)260     fn heartbeats(&mut self, yes: bool) {
261         self.heartbeats = yes;
262     }
heartbeat(&mut self, n: u32)263     fn heartbeat(&mut self, n: u32) {
264         if let Some(frame) = self.frames.last_mut() {
265             frame.heartbeats += n;
266         }
267     }
268 }
269 
print( tree: &Tree<Message>, curr: Idx<Message>, level: u32, longer_than: Duration, out: &mut impl Write, )270 fn print(
271     tree: &Tree<Message>,
272     curr: Idx<Message>,
273     level: u32,
274     longer_than: Duration,
275     out: &mut impl Write,
276 ) {
277     let current_indent = "    ".repeat(level as usize);
278     let detail = tree[curr].detail.as_ref().map(|it| format!(" @ {}", it)).unwrap_or_default();
279     writeln!(
280         out,
281         "{}{} - {}{}",
282         current_indent,
283         ms(tree[curr].duration),
284         tree[curr].label,
285         detail,
286     )
287     .expect("printing profiling info");
288 
289     let mut accounted_for = Duration::default();
290     let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
291     for child in tree.children(curr) {
292         accounted_for += tree[child].duration;
293 
294         if tree[child].duration.as_millis() > longer_than.as_millis() {
295             print(tree, child, level + 1, longer_than, out);
296         } else {
297             let (total_duration, cnt) =
298                 short_children.entry(tree[child].label).or_insert((Duration::default(), 0));
299             *total_duration += tree[child].duration;
300             *cnt += 1;
301         }
302     }
303 
304     for (child_msg, (duration, count)) in &short_children {
305         writeln!(out, "    {}{} - {} ({} calls)", current_indent, ms(*duration), child_msg, count)
306             .expect("printing profiling info");
307     }
308 
309     let unaccounted = tree[curr].duration - accounted_for;
310     if tree.children(curr).next().is_some() && unaccounted > longer_than {
311         writeln!(out, "    {}{} - ???", current_indent, ms(unaccounted))
312             .expect("printing profiling info");
313     }
314 }
315 
316 #[allow(non_camel_case_types)]
317 struct ms(Duration);
318 
319 impl fmt::Display for ms {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result320     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
321         match self.0.as_millis() {
322             0 => f.write_str("    0  "),
323             n => write!(f, "{:5}ms", n),
324         }
325     }
326 }
327