1 //! Simple hierarchical profiler
2 use std::{
3 cell::RefCell,
4 collections::{BTreeMap, HashSet},
5 env, fmt,
6 io::{stderr, Write},
7 sync::{
8 atomic::{AtomicBool, Ordering},
9 RwLock,
10 },
11 time::{Duration, Instant},
12 };
13
14 use once_cell::sync::Lazy;
15
16 use crate::tree::{Idx, Tree};
17
18 /// Filtering syntax
19 /// env RA_PROFILE=* // dump everything
20 /// env RA_PROFILE=foo|bar|baz // enabled only selected entries
21 /// env RA_PROFILE=*@3>10 // dump everything, up to depth 3, if it takes more than 10 ms
init()22 pub fn init() {
23 countme::enable(env::var("RA_COUNT").is_ok());
24 let spec = env::var("RA_PROFILE").unwrap_or_default();
25 init_from(&spec);
26 }
27
init_from(spec: &str)28 pub fn init_from(spec: &str) {
29 let filter = if spec.is_empty() { Filter::disabled() } else { Filter::from_spec(spec) };
30 filter.install();
31 }
32
33 type Label = &'static str;
34
35 /// This function starts a profiling scope in the current execution stack with a given description.
36 /// It returns a `Profile` struct that measures elapsed time between this method invocation and `Profile` struct drop.
37 /// It supports nested profiling scopes in case when this function is invoked multiple times at the execution stack.
38 /// In this case the profiling information will be nested at the output.
39 /// Profiling information is being printed in the stderr.
40 ///
41 /// # Example
42 /// ```
43 /// profile::init_from("profile1|profile2@2");
44 /// profiling_function1();
45 ///
46 /// fn profiling_function1() {
47 /// let _p = profile::span("profile1");
48 /// profiling_function2();
49 /// }
50 ///
51 /// fn profiling_function2() {
52 /// let _p = profile::span("profile2");
53 /// }
54 /// ```
55 /// This will print in the stderr the following:
56 /// ```text
57 /// 0ms - profile
58 /// 0ms - profile2
59 /// ```
60 #[inline]
span(label: Label) -> ProfileSpan61 pub fn span(label: Label) -> ProfileSpan {
62 debug_assert!(!label.is_empty());
63
64 let enabled = PROFILING_ENABLED.load(Ordering::Relaxed);
65 if enabled && with_profile_stack(|stack| stack.push(label)) {
66 ProfileSpan(Some(ProfilerImpl { label, detail: None }))
67 } else {
68 ProfileSpan(None)
69 }
70 }
71
72 #[inline]
heartbeat_span() -> HeartbeatSpan73 pub fn heartbeat_span() -> HeartbeatSpan {
74 let enabled = PROFILING_ENABLED.load(Ordering::Relaxed);
75 HeartbeatSpan::new(enabled)
76 }
77
78 #[inline]
heartbeat()79 pub fn heartbeat() {
80 let enabled = PROFILING_ENABLED.load(Ordering::Relaxed);
81 if enabled {
82 with_profile_stack(|it| it.heartbeat(1));
83 }
84 }
85
86 pub struct ProfileSpan(Option<ProfilerImpl>);
87
88 struct ProfilerImpl {
89 label: Label,
90 detail: Option<String>,
91 }
92
93 impl ProfileSpan {
detail(mut self, detail: impl FnOnce() -> String) -> ProfileSpan94 pub fn detail(mut self, detail: impl FnOnce() -> String) -> ProfileSpan {
95 if let Some(profiler) = &mut self.0 {
96 profiler.detail = Some(detail());
97 }
98 self
99 }
100 }
101
102 impl Drop for ProfilerImpl {
103 #[inline]
drop(&mut self)104 fn drop(&mut self) {
105 with_profile_stack(|it| it.pop(self.label, self.detail.take()));
106 }
107 }
108
109 pub struct HeartbeatSpan {
110 enabled: bool,
111 }
112
113 impl HeartbeatSpan {
114 #[inline]
new(enabled: bool) -> Self115 pub fn new(enabled: bool) -> Self {
116 if enabled {
117 with_profile_stack(|it| it.heartbeats(true));
118 }
119 Self { enabled }
120 }
121 }
122
123 impl Drop for HeartbeatSpan {
drop(&mut self)124 fn drop(&mut self) {
125 if self.enabled {
126 with_profile_stack(|it| it.heartbeats(false));
127 }
128 }
129 }
130
131 static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false);
132 static FILTER: Lazy<RwLock<Filter>> = Lazy::new(Default::default);
133
with_profile_stack<T>(f: impl FnOnce(&mut ProfileStack) -> T) -> T134 fn with_profile_stack<T>(f: impl FnOnce(&mut ProfileStack) -> T) -> T {
135 thread_local!(static STACK: RefCell<ProfileStack> = RefCell::new(ProfileStack::new()));
136 STACK.with(|it| f(&mut *it.borrow_mut()))
137 }
138
139 #[derive(Default, Clone, Debug)]
140 struct Filter {
141 depth: usize,
142 allowed: HashSet<String>,
143 longer_than: Duration,
144 heartbeat_longer_than: Duration,
145 version: usize,
146 }
147
148 impl Filter {
disabled() -> Filter149 fn disabled() -> Filter {
150 Filter::default()
151 }
152
from_spec(mut spec: &str) -> Filter153 fn from_spec(mut spec: &str) -> Filter {
154 let longer_than = if let Some(idx) = spec.rfind('>') {
155 let longer_than = spec[idx + 1..].parse().expect("invalid profile longer_than");
156 spec = &spec[..idx];
157 Duration::from_millis(longer_than)
158 } else {
159 Duration::new(0, 0)
160 };
161 let heartbeat_longer_than = longer_than;
162
163 let depth = if let Some(idx) = spec.rfind('@') {
164 let depth: usize = spec[idx + 1..].parse().expect("invalid profile depth");
165 spec = &spec[..idx];
166 depth
167 } else {
168 999
169 };
170 let allowed =
171 if spec == "*" { HashSet::new() } else { spec.split('|').map(String::from).collect() };
172 Filter { depth, allowed, longer_than, heartbeat_longer_than, version: 0 }
173 }
174
install(mut self)175 fn install(mut self) {
176 PROFILING_ENABLED.store(self.depth > 0, Ordering::SeqCst);
177 let mut old = FILTER.write().unwrap();
178 self.version = old.version + 1;
179 *old = self;
180 }
181 }
182
183 struct ProfileStack {
184 frames: Vec<Frame>,
185 filter: Filter,
186 messages: Tree<Message>,
187 heartbeats: bool,
188 }
189
190 struct Frame {
191 t: Instant,
192 heartbeats: u32,
193 }
194
195 #[derive(Default)]
196 struct Message {
197 duration: Duration,
198 label: Label,
199 detail: Option<String>,
200 }
201
202 impl ProfileStack {
new() -> ProfileStack203 fn new() -> ProfileStack {
204 ProfileStack {
205 frames: Vec::new(),
206 messages: Tree::default(),
207 filter: Default::default(),
208 heartbeats: false,
209 }
210 }
211
push(&mut self, label: Label) -> bool212 fn push(&mut self, label: Label) -> bool {
213 if self.frames.is_empty() {
214 if let Ok(f) = FILTER.try_read() {
215 if f.version > self.filter.version {
216 self.filter = f.clone();
217 }
218 };
219 }
220 if self.frames.len() > self.filter.depth {
221 return false;
222 }
223 let allowed = &self.filter.allowed;
224 if self.frames.is_empty() && !allowed.is_empty() && !allowed.contains(label) {
225 return false;
226 }
227
228 self.frames.push(Frame { t: Instant::now(), heartbeats: 0 });
229 self.messages.start();
230 true
231 }
232
pop(&mut self, label: Label, detail: Option<String>)233 fn pop(&mut self, label: Label, detail: Option<String>) {
234 let frame = self.frames.pop().unwrap();
235 let duration = frame.t.elapsed();
236
237 if self.heartbeats {
238 self.heartbeat(frame.heartbeats);
239 let avg_span = duration / (frame.heartbeats + 1);
240 if avg_span > self.filter.heartbeat_longer_than {
241 eprintln!("Too few heartbeats {} ({}/{:?})?", label, frame.heartbeats, duration);
242 }
243 }
244
245 self.messages.finish(Message { duration, label, detail });
246 if self.frames.is_empty() {
247 let longer_than = self.filter.longer_than;
248 // Convert to millis for comparison to avoid problems with rounding
249 // (otherwise we could print `0ms` despite user's `>0` filter when
250 // `duration` is just a few nanos).
251 if duration.as_millis() > longer_than.as_millis() {
252 if let Some(root) = self.messages.root() {
253 print(&self.messages, root, 0, longer_than, &mut stderr().lock());
254 }
255 }
256 self.messages.clear();
257 }
258 }
259
heartbeats(&mut self, yes: bool)260 fn heartbeats(&mut self, yes: bool) {
261 self.heartbeats = yes;
262 }
heartbeat(&mut self, n: u32)263 fn heartbeat(&mut self, n: u32) {
264 if let Some(frame) = self.frames.last_mut() {
265 frame.heartbeats += n;
266 }
267 }
268 }
269
print( tree: &Tree<Message>, curr: Idx<Message>, level: u32, longer_than: Duration, out: &mut impl Write, )270 fn print(
271 tree: &Tree<Message>,
272 curr: Idx<Message>,
273 level: u32,
274 longer_than: Duration,
275 out: &mut impl Write,
276 ) {
277 let current_indent = " ".repeat(level as usize);
278 let detail = tree[curr].detail.as_ref().map(|it| format!(" @ {}", it)).unwrap_or_default();
279 writeln!(
280 out,
281 "{}{} - {}{}",
282 current_indent,
283 ms(tree[curr].duration),
284 tree[curr].label,
285 detail,
286 )
287 .expect("printing profiling info");
288
289 let mut accounted_for = Duration::default();
290 let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
291 for child in tree.children(curr) {
292 accounted_for += tree[child].duration;
293
294 if tree[child].duration.as_millis() > longer_than.as_millis() {
295 print(tree, child, level + 1, longer_than, out);
296 } else {
297 let (total_duration, cnt) =
298 short_children.entry(tree[child].label).or_insert((Duration::default(), 0));
299 *total_duration += tree[child].duration;
300 *cnt += 1;
301 }
302 }
303
304 for (child_msg, (duration, count)) in &short_children {
305 writeln!(out, " {}{} - {} ({} calls)", current_indent, ms(*duration), child_msg, count)
306 .expect("printing profiling info");
307 }
308
309 let unaccounted = tree[curr].duration - accounted_for;
310 if tree.children(curr).next().is_some() && unaccounted > longer_than {
311 writeln!(out, " {}{} - ???", current_indent, ms(unaccounted))
312 .expect("printing profiling info");
313 }
314 }
315
316 #[allow(non_camel_case_types)]
317 struct ms(Duration);
318
319 impl fmt::Display for ms {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result320 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
321 match self.0.as_millis() {
322 0 => f.write_str(" 0 "),
323 n => write!(f, "{:5}ms", n),
324 }
325 }
326 }
327