1 //! A performance monitoring API for Linux.
2 //!
3 //! This crate provides access to processor and kernel counters for things like
4 //! instruction completions, cache references and misses, branch predictions,
5 //! context switches, page faults, and so on.
6 //!
7 //! For example, to compare the number of clock cycles elapsed with the number
8 //! of instructions completed during one call to `println!`:
9 //!
10 //!     use perf_event::{Builder, Group};
11 //!     use perf_event::events::Hardware;
12 //!
13 //!     fn main() -> std::io::Result<()> {
14 //!         // A `Group` lets us enable and disable several counters atomically.
15 //!         let mut group = Group::new()?;
16 //!         let cycles = Builder::new().group(&mut group).kind(Hardware::CPU_CYCLES).build()?;
17 //!         let insns = Builder::new().group(&mut group).kind(Hardware::INSTRUCTIONS).build()?;
18 //!
19 //!         let vec = (0..=51).collect::<Vec<_>>();
20 //!
21 //!         group.enable()?;
22 //!         println!("{:?}", vec);
23 //!         group.disable()?;
24 //!
25 //!         let counts = group.read()?;
26 //!         println!("cycles / instructions: {} / {} ({:.2} cpi)",
27 //!                  counts[&cycles],
28 //!                  counts[&insns],
29 //!                  (counts[&cycles] as f64 / counts[&insns] as f64));
30 //!
31 //!         Ok(())
32 //!     }
33 //!
34 //! This crate is built on top of the Linux [`perf_event_open`][man] system
35 //! call; that documentation has the authoritative explanations of exactly what
36 //! all the counters mean.
37 //!
38 //! There are two main types for measurement:
39 //!
40 //! -   A [`Counter`] is an individual counter. Use [`Builder`] to
41 //!     construct one.
42 //!
43 //! -   A [`Group`] is a collection of counters that can be enabled and
44 //!     disabled atomically, so that they cover exactly the same period of
45 //!     execution, allowing meaningful comparisons of the individual values.
46 //!
47 //! If you're familiar with the kernel API already:
48 //!
49 //! -   A `Builder` holds the arguments to a `perf_event_open` call:
50 //!     a `struct perf_event_attr` and a few other fields.
51 //!
52 //! -   `Counter` and `Group` objects are just event file descriptors, together
53 //!     with their kernel id numbers, and some other details you need to
54 //!     actually use them. They're different types because they yield different
55 //!     types of results, and because you can't retrieve a `Group`'s counts
56 //!     without knowing how many members it has.
57 //!
58 //! ### Call for PRs
59 //!
60 //! Linux's `perf_event_open` API can report all sorts of things this crate
61 //! doesn't yet understand: stack traces, logs of executable and shared library
62 //! activity, tracepoints, kprobes, uprobes, and so on. And beyond the counters
63 //! in the kernel header files, there are others that can only be found at
64 //! runtime by consulting `sysfs`, specific to particular processors and
65 //! devices. For example, modern Intel processors have counters that measure
66 //! power consumption in Joules.
67 //!
68 //! If you find yourself in need of something this crate doesn't support, please
69 //! consider submitting a pull request.
70 //!
71 //! [man]: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
72 
73 #![deny(missing_docs)]
74 
75 use events::Event;
76 use libc::pid_t;
77 use perf_event_open_sys as sys;
78 use perf_event_open_sys::bindings::perf_event_attr;
79 use std::fs::File;
80 use std::io::{self, Read};
81 use std::os::raw::{c_int, c_uint, c_ulong};
82 use std::os::unix::io::{AsRawFd, FromRawFd};
83 
84 pub mod events;
85 
86 /// A counter for one kind of kernel or hardware event.
87 ///
88 /// A `Counter` represents a single performance monitoring counter. You select
89 /// what sort of event you'd like to count when the `Counter` is created, then
90 /// you can enable and disable the counter, call its [`read`] method to
91 /// retrieve the current count, and reset it to zero.
92 ///
93 /// A `Counter`'s value is always a `u64`.
94 ///
95 /// For example, this counts the number of instructions retired (completed)
96 /// during a call to `println!`.
97 ///
98 ///     use perf_event::Builder;
99 ///
100 ///     fn main() -> std::io::Result<()> {
101 ///         let mut counter = Builder::new().build()?;
102 ///
103 ///         let vec = (0..=51).collect::<Vec<_>>();
104 ///
105 ///         counter.enable()?;
106 ///         println!("{:?}", vec);
107 ///         counter.disable()?;
108 ///
109 ///         println!("{} instructions retired", counter.read()?);
110 ///
111 ///         Ok(())
112 ///     }
113 ///
114 /// It is often useful to count several different quantities over the same
115 /// period of time. For example, if you want to measure the average number of
116 /// clock cycles used per instruction, you must count both clock cycles and
117 /// instructions retired, for the same range of execution. The [`Group`] type
118 /// lets you enable, disable, read, and reset any number of counters
119 /// simultaneously.
120 ///
121 /// When a counter is dropped, its kernel resources are freed along with it.
122 ///
123 /// Internally, a `Counter` is just a wrapper around an event file descriptor.
124 ///
125 /// [`read`]: Counter::read
126 pub struct Counter {
127     /// The file descriptor for this counter, returned by `perf_event_open`.
128     ///
129     /// When a `Counter` is dropped, this `File` is dropped, and the kernel
130     /// removes the counter from any group it belongs to.
131     file: File,
132 
133     /// The unique id assigned to this counter by the kernel.
134     id: u64,
135 }
136 
137 /// A builder for [`Counter`]s.
138 ///
139 /// There are dozens of parameters that influence a `Counter`'s behavior.
140 /// `Builder` lets you construct a `Counter` by specifying only those parameters
141 /// for which you don't want the default value.
142 ///
143 /// A freshly built `Counter` is disabled. To begin counting events, you must
144 /// call [`enable`] on the `Counter` or the `Group` to which it belongs.
145 ///
146 /// Internally, a `Builder` is just a wrapper around the kernel's
147 /// `struct perf_event_attr` type. See the [perf_event_open(2)] man page for details.
148 ///
149 /// For example, if you want a `Counter` for instructions retired by the current
150 /// process, those are `Builder`'s defaults, so you need only write:
151 ///
152 ///     # use perf_event::Builder;
153 ///     # fn main() -> std::io::Result<()> {
154 ///     let mut insns = Builder::new().build()?;
155 ///     # Ok(()) }
156 ///
157 /// The [`kind`] method lets you specify what sort of event you want to
158 /// count. So if you'd rather count branch instructions:
159 ///
160 ///     # use perf_event::Builder;
161 ///     # use perf_event::events::Hardware;
162 ///     # fn main() -> std::io::Result<()> {
163 ///     let mut insns = Builder::new()
164 ///         .kind(Hardware::BRANCH_INSTRUCTIONS)
165 ///         .build()?;
166 ///     # Ok(()) }
167 ///
168 /// The [`group`] method lets you gather individual counters into `Group`
169 /// that can be enabled or disabled atomically:
170 ///
171 ///     # use perf_event::{Builder, Group};
172 ///     # use perf_event::events::Hardware;
173 ///     # fn main() -> std::io::Result<()> {
174 ///     let mut group = Group::new()?;
175 ///     let cycles = Builder::new().group(&mut group).kind(Hardware::CPU_CYCLES).build()?;
176 ///     let insns = Builder::new().group(&mut group).kind(Hardware::INSTRUCTIONS).build()?;
177 ///     # Ok(()) }
178 ///
179 /// Other methods let you select:
180 ///
181 /// -   specific processes or cgroups to observe
182 /// -   specific CPU cores to observe
183 ///
184 /// `Builder` supports only a fraction of the many knobs and dials Linux offers,
185 /// but hopefully it will acquire methods to support more of them as time goes
186 /// on.
187 ///
188 /// [`enable`]: Counter::enable
189 /// [`kind`]: Builder::kind
190 /// [`group`]: Builder::group
191 /// [perf_event_open(2)]: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
192 pub struct Builder<'a> {
193     attrs: perf_event_attr,
194     who: EventPid<'a>,
195     cpu: Option<usize>,
196     group: Option<&'a mut Group>,
197 }
198 
199 #[derive(Debug)]
200 enum EventPid<'a> {
201     /// Monitor the calling process.
202     ThisProcess,
203 
204     /// Monitor the given pid.
205     Other(pid_t),
206 
207     /// Monitor members of the given cgroup.
208     CGroup(&'a File),
209 }
210 
211 /// A group of counters that can be managed as a unit.
212 ///
213 /// A `Group` represents a group of [`Counter`]s that can be enabled,
214 /// disabled, reset, or read as a single atomic operation. This is necessary if
215 /// you want to compare counter values, produce ratios, and so on, since those
216 /// operations are only meaningful on counters that cover exactly the same
217 /// period of execution.
218 ///
219 /// A `Counter` is placed in a group when it is created, by calling the
220 /// `Builder`'s [`group`] method. A `Group`'s [`read`] method returns values
221 /// of all its member counters at once as a [`Counts`] value, which can be
222 /// indexed by `Counter` to retrieve a specific value.
223 ///
224 /// For example, the following program computes the average number of cycles
225 /// used per instruction retired for a call to `println!`:
226 ///
227 ///     # fn main() -> std::io::Result<()> {
228 ///     use perf_event::{Builder, Group};
229 ///     use perf_event::events::Hardware;
230 ///
231 ///     let mut group = Group::new()?;
232 ///     let cycles = Builder::new().group(&mut group).kind(Hardware::CPU_CYCLES).build()?;
233 ///     let insns = Builder::new().group(&mut group).kind(Hardware::INSTRUCTIONS).build()?;
234 ///
235 ///     let vec = (0..=51).collect::<Vec<_>>();
236 ///
237 ///     group.enable()?;
238 ///     println!("{:?}", vec);
239 ///     group.disable()?;
240 ///
241 ///     let counts = group.read()?;
242 ///     println!("cycles / instructions: {} / {} ({:.2} cpi)",
243 ///              counts[&cycles],
244 ///              counts[&insns],
245 ///              (counts[&cycles] as f64 / counts[&insns] as f64));
246 ///     # Ok(()) }
247 ///
248 /// The lifetimes of `Counter`s and `Group`s are independent: placing a
249 /// `Counter` in a `Group` does not take ownership of the `Counter`, nor must
250 /// the `Counter`s in a group outlive the `Group`. If a `Counter` is dropped, it
251 /// is simply removed from its `Group`, and omitted from future results. If a
252 /// `Group` is dropped, its individual counters continue to count.
253 ///
254 /// Enabling or disabling a `Group` affects each `Counter` that belongs to it.
255 /// Subsequent reads from the `Counter` will not reflect activity while the
256 /// `Group` was disabled, unless the `Counter` is re-enabled individually.
257 ///
258 /// A `Group` and its members must all observe the same tasks and cpus; mixing
259 /// these makes building the `Counter` return an error. Unfortunately, there is
260 /// no way at present to specify a `Group`'s task and cpu, so you can only use
261 /// `Group` on the calling task. If this is a problem, please file an issue.
262 ///
263 /// Internally, a `Group` is just a wrapper around an event file descriptor.
264 ///
265 /// ## Limits on group size
266 ///
267 /// Hardware counters are implemented using special-purpose registers on the
268 /// processor, of which there are only a fixed number. (For example, an Intel
269 /// high-end laptop processor from 2015 has four such registers per virtual
270 /// processor.) Without using groups, if you request more hardware counters than
271 /// the processor can actually support, a complete count isn't possible, but the
272 /// kernel will rotate the processor's real registers amongst the measurements
273 /// you've requested to at least produce a sample.
274 ///
275 /// But since the point of a counter group is that its members all cover exactly
276 /// the same period of time, this tactic can't be applied to support large
277 /// groups. If the kernel cannot schedule a group, its counters remain zero. I
278 /// think you can detect this situation by comparing the group's [`time_enabled`]
279 /// and [`time_running`] values. It might also be useful to set the `pinned` bit,
280 /// which puts the counter in an error state if it's not able to be put on the
281 /// CPU; see [#10].
282 ///
283 /// According to the `perf_list(1)` man page, you may be able to free up a
284 /// hardware counter by disabling the kernel's NMI watchdog, which reserves one
285 /// for detecting kernel hangs:
286 ///
287 /// ```ignore
288 /// $ echo 0 > /proc/sys/kernel/nmi_watchdog
289 /// ```
290 ///
291 /// You can reenable the watchdog when you're done like this:
292 ///
293 /// ```ignore
294 /// $ echo 1 > /proc/sys/kernel/nmi_watchdog
295 /// ```
296 ///
297 /// [`group`]: Builder::group
298 /// [`read`]: Group::read
299 /// [`#5`]: https://github.com/jimblandy/perf-event/issues/5
300 /// [`#10`]: https://github.com/jimblandy/perf-event/issues/10
301 /// [`time_enabled`]: Counts::time_enabled
302 /// [`time_running`]: Counts::time_running
303 pub struct Group {
304     /// The file descriptor for this counter, returned by `perf_event_open`.
305     /// This counter itself is for the dummy software event, so it's not
306     /// interesting.
307     file: File,
308 
309     /// The unique id assigned to this group by the kernel. We only use this for
310     /// assertions.
311     id: u64,
312 
313     /// An upper bound on the number of Counters in this group. This lets us
314     /// allocate buffers of sufficient size for for PERF_FORMAT_GROUP reads.
315     ///
316     /// There's no way to ask the kernel how many members a group has. And if we
317     /// pass a group read a buffer that's too small, the kernel won't just
318     /// return a truncated result; it returns ENOSPC and leaves the buffer
319     /// untouched. So the buffer just has to be large enough.
320     ///
321     /// Since we're borrowed while building group members, adding members can
322     /// increment this counter. But it's harder to decrement it when a member
323     /// gets dropped: we don't require that a Group outlive its members, so they
324     /// can't necessarily update their `Group`'s count from a `Drop` impl. So we
325     /// just increment, giving us an overestimate, and then correct the count
326     /// when we actually do a read.
327     ///
328     /// This includes the dummy counter for the group itself.
329     max_members: usize
330 }
331 
332 /// A collection of counts from a [`Group`] of counters.
333 ///
334 /// This is the type returned by calling [`read`] on a [`Group`].
335 /// You can index it with a reference to a specific `Counter`:
336 ///
337 ///     # fn main() -> std::io::Result<()> {
338 ///     # use perf_event::{Builder, Group};
339 ///     # let mut group = Group::new()?;
340 ///     # let cycles = Builder::new().group(&mut group).build()?;
341 ///     # let insns = Builder::new().group(&mut group).build()?;
342 ///     let counts = group.read()?;
343 ///     println!("cycles / instructions: {} / {} ({:.2} cpi)",
344 ///              counts[&cycles],
345 ///              counts[&insns],
346 ///              (counts[&cycles] as f64 / counts[&insns] as f64));
347 ///     # Ok(()) }
348 ///
349 /// Or you can iterate over the results it contains:
350 ///
351 ///     # fn main() -> std::io::Result<()> {
352 ///     # use perf_event::Group;
353 ///     # let counts = Group::new()?.read()?;
354 ///     for (id, value) in &counts {
355 ///         println!("Counter id {} has value {}", id, value);
356 ///     }
357 ///     # Ok(()) }
358 ///
359 /// The `id` values produced by this iteration are internal identifiers assigned
360 /// by the kernel. You can use the [`Counter::id`] method to find a
361 /// specific counter's id.
362 ///
363 /// For some kinds of events, the kernel may use timesharing to give all
364 /// counters access to scarce hardware registers. You can see how long a group
365 /// was actually running versus the entire time it was enabled using the
366 /// `time_enabled` and `time_running` methods:
367 ///
368 ///     # fn main() -> std::io::Result<()> {
369 ///     # use perf_event::{Builder, Group};
370 ///     # let mut group = Group::new()?;
371 ///     # let insns = Builder::new().group(&mut group).build()?;
372 ///     # let counts = group.read()?;
373 ///     let scale = counts.time_enabled() as f64 /
374 ///                 counts.time_running() as f64;
375 ///     for (id, value) in &counts {
376 ///         print!("Counter id {} has value {}",
377 ///                id, (*value as f64 * scale) as u64);
378 ///         if scale > 1.0 {
379 ///             print!(" (estimated)");
380 ///         }
381 ///         println!();
382 ///     }
383 ///
384 ///     # Ok(()) }
385 ///
386 /// [`read`]: Group::read
387 pub struct Counts {
388     // Raw results from the `read`.
389     data: Vec<u64>
390 }
391 
392 /// The value of a counter, along with timesharing data.
393 ///
394 /// Some counters are implemented in hardware, and the processor can run
395 /// only a fixed number of them at a time. If more counters are requested
396 /// than the hardware can support, the kernel timeshares them on the
397 /// hardware.
398 ///
399 /// This struct holds the value of a counter, together with the time it was
400 /// enabled, and the proportion of that for which it was actually running.
401 #[repr(C)]
402 pub struct CountAndTime {
403     /// The counter value.
404     ///
405     /// The meaning of this field depends on how the counter was configured when
406     /// it was built; see ['Builder'].
407     pub count: u64,
408 
409     /// How long this counter was enabled by the program, in nanoseconds.
410     pub time_enabled: u64,
411 
412     /// How long the kernel actually ran this counter, in nanoseconds.
413     ///
414     /// If `time_enabled == time_running`, then the counter ran for the entire
415     /// period it was enabled, without interruption. Otherwise, the counter
416     /// shared the underlying hardware with others, and you should prorate its
417     /// value accordingly.
418     pub time_running: u64,
419 }
420 
421 impl<'a> EventPid<'a> {
422     // Return the `pid` arg and the `flags` bits representing `self`.
as_args(&self) -> (pid_t, u32)423     fn as_args(&self) -> (pid_t, u32) {
424         match self {
425             EventPid::ThisProcess => (0, 0),
426             EventPid::Other(pid) => (*pid, 0),
427             EventPid::CGroup(file) =>
428                 (file.as_raw_fd(), sys::bindings::PERF_FLAG_PID_CGROUP),
429         }
430     }
431 }
432 
433 impl<'a> Default for Builder<'a> {
default() -> Builder<'a>434     fn default() -> Builder<'a> {
435 
436         let mut attrs = perf_event_attr::default();
437 
438         // Setting `size` accurately will not prevent the code from working
439         // on older kernels. The module comments for `perf_event_open_sys`
440         // explain why in far too much detail.
441         attrs.size = std::mem::size_of::<perf_event_attr>() as u32;
442 
443         attrs.set_disabled(1);
444         attrs.set_exclude_kernel(1);    // don't count time in kernel
445         attrs.set_exclude_hv(1);        // don't count time in hypervisor
446 
447         // Request data for `time_enabled` and `time_running`.
448 	attrs.read_format |= sys::bindings::perf_event_read_format_PERF_FORMAT_TOTAL_TIME_ENABLED as u64 |
449                              sys::bindings::perf_event_read_format_PERF_FORMAT_TOTAL_TIME_RUNNING as u64;
450 
451         let kind = Event::Hardware(events::Hardware::INSTRUCTIONS);
452         attrs.type_ = kind.as_type();
453         attrs.config = kind.as_config();
454 
455         Builder {
456             attrs,
457             who: EventPid::ThisProcess,
458             cpu: None,
459             group: None,
460         }
461     }
462 }
463 
464 impl<'a> Builder<'a> {
465     /// Return a new `Builder`, with all parameters set to their defaults.
new() -> Builder<'a>466     pub fn new() -> Builder<'a> {
467         Builder::default()
468     }
469 
470     /// Observe the calling process. (This is the default.)
observe_self(mut self) -> Builder<'a>471     pub fn observe_self(mut self) -> Builder<'a> {
472         self.who = EventPid::ThisProcess;
473         self
474     }
475 
476     /// Observe the process with the given process id. This requires
477     /// [`CAP_SYS_PTRACE`][man-capabilities] capabilities.
478     ///
479     /// [man-capabilities]: http://man7.org/linux/man-pages/man7/capabilities.7.html
observe_pid(mut self, pid: pid_t) -> Builder<'a>480     pub fn observe_pid(mut self, pid: pid_t) -> Builder<'a> {
481         self.who = EventPid::Other(pid);
482         self
483     }
484 
485     /// Observe code running in the given [cgroup][man-cgroups] (container). The
486     /// `cgroup` argument should be a `File` referring to the cgroup's directory
487     /// in the cgroupfs filesystem.
488     ///
489     /// [man-cgroups]: http://man7.org/linux/man-pages/man7/cgroups.7.html
observe_cgroup(mut self, cgroup: &'a File) -> Builder<'a>490     pub fn observe_cgroup(mut self, cgroup: &'a File) -> Builder<'a> {
491         self.who = EventPid::CGroup(cgroup);
492         self
493     }
494 
495     /// Observe only code running on the given CPU core.
one_cpu(mut self, cpu: usize) -> Builder<'a>496     pub fn one_cpu(mut self, cpu: usize) -> Builder<'a> {
497         self.cpu = Some(cpu);
498         self
499     }
500 
501     /// Observe code running on any CPU core. (This is the default.)
any_cpu(mut self) -> Builder<'a>502     pub fn any_cpu(mut self) -> Builder<'a> {
503         self.cpu = None;
504         self
505     }
506 
507     /// Set whether this counter is inherited by new threads.
508     ///
509     /// When this flag is set, this counter observes activity in new threads
510     /// created by any thread already being observed.
511     ///
512     /// By default, the flag is unset: counters are not inherited, and observe
513     /// only the threads specified when they are created.
514     ///
515     /// This flag cannot be set if the counter belongs to a `Group`. Doing so
516     /// will result in an error when the counter is built. This is a kernel
517     /// limitation.
inherit(mut self, inherit: bool) -> Builder<'a>518     pub fn inherit(mut self, inherit: bool) -> Builder<'a> {
519         let flag = if inherit { 1 } else { 0 };
520         self.attrs.set_inherit(flag);
521         self
522     }
523 
524     /// Count events of the given kind. This accepts an [`Event`] value,
525     /// or any type that can be converted to one, so you can pass [`Hardware`],
526     /// [`Software`] and [`Cache`] values directly.
527     ///
528     /// The default is to count retired instructions, or
529     /// `Hardware::INSTRUCTIONS` events.
530     ///
531     /// For example, to count level 1 data cache references and misses, pass the
532     /// appropriate `events::Cache` values:
533     ///
534     ///     # fn main() -> std::io::Result<()> {
535     ///     use perf_event::{Builder, Group};
536     ///     use perf_event::events::{Cache, CacheOp, CacheResult, WhichCache};
537     ///
538     ///     const ACCESS: Cache = Cache {
539     ///         which: WhichCache::L1D,
540     ///         operation: CacheOp::READ,
541     ///         result: CacheResult::ACCESS,
542     ///     };
543     ///     const MISS: Cache = Cache { result: CacheResult::MISS, ..ACCESS };
544     ///
545     ///     let mut group = Group::new()?;
546     ///     let access_counter = Builder::new().group(&mut group).kind(ACCESS).build()?;
547     ///     let miss_counter = Builder::new().group(&mut group).kind(MISS).build()?;
548     ///     # Ok(()) }
549     ///
550     /// [`Hardware`]: events::Hardware
551     /// [`Software`]: events::Software
552     /// [`Cache`]: events::Cache
kind<K: Into<Event>>(mut self, kind: K) -> Builder<'a>553     pub fn kind<K: Into<Event>>(mut self, kind: K) -> Builder<'a> {
554         let kind = kind.into();
555         self.attrs.type_ = kind.as_type();
556         self.attrs.config = kind.as_config();
557         self
558     }
559 
560     /// Place the counter in the given [`Group`]. Groups allow a set of counters
561     /// to be enabled, disabled, or read as a single atomic operation, so that
562     /// the counts can be usefully compared.
563     ///
564     /// [`Group`]: struct.Group.html
group(mut self, group: &'a mut Group) -> Builder<'a>565     pub fn group(mut self, group: &'a mut Group) -> Builder<'a> {
566         self.group = Some(group);
567 
568         // man page: "Members of a group are usually initialized with disabled
569         // set to zero."
570         self.attrs.set_disabled(0);
571 
572         self
573     }
574 
575     /// Construct a [`Counter`] according to the specifications made on this
576     /// `Builder`.
577     ///
578     /// A freshly built `Counter` is disabled. To begin counting events, you
579     /// must call [`enable`] on the `Counter` or the `Group` to which it belongs.
580     ///
581     /// If the `Builder` requests features that the running kernel does not
582     /// support, it returns `Err(e)` where `e.kind() == ErrorKind::Other` and
583     /// `e.raw_os_error() == Some(libc::E2BIG)`.
584     ///
585     /// Unfortunately, problems in counter configuration are detected at this
586     /// point, by the kernel, not earlier when the offending request is made on
587     /// the `Builder`. The kernel's returned errors are not always helpful.
588     ///
589     /// [`Counter`]: struct.Counter.html
590     /// [`enable`]: struct.Counter.html#method.enable
build(mut self) -> std::io::Result<Counter>591     pub fn build(mut self) -> std::io::Result<Counter> {
592         let cpu = match self.cpu {
593             Some(cpu) => cpu as c_int,
594             None => -1,
595         };
596         let (pid, flags) = self.who.as_args();
597         let group_fd = match self.group {
598             Some(ref mut g) => {
599                 g.max_members += 1;
600                 g.file.as_raw_fd() as c_int
601             }
602             None => -1,
603         };
604 
605         let file = unsafe {
606             File::from_raw_fd(check_raw_syscall(|| {
607                 sys::perf_event_open(&mut self.attrs, pid, cpu, group_fd, flags as c_ulong)
608             })?)
609         };
610 
611         // If we're going to be part of a Group, retrieve the ID the kernel
612         // assigned us, so we can find our results in a Counts structure. Even
613         // if we're not part of a group, we'll use it in `Debug` output.
614         let mut id = 0_64;
615         check_errno_syscall(|| unsafe {
616             sys::ioctls::ID(file.as_raw_fd(), &mut id)
617         })?;
618 
619         Ok(Counter { file, id, })
620     }
621 }
622 
623 impl Counter {
624     /// Return this counter's kernel-assigned unique id.
625     ///
626     /// This can be useful when iterating over [`Counts`].
627     ///
628     /// [`Counts`]: struct.Counts.html
id(&self) -> u64629     pub fn id(&self) -> u64 {
630         self.id
631     }
632 
633     /// Allow this `Counter` to begin counting its designated event.
634     ///
635     /// This does not affect whatever value the `Counter` had previously; new
636     /// events add to the current count. To clear a `Counter`, use the
637     /// [`reset`] method.
638     ///
639     /// Note that `Group` also has an [`enable`] method, which enables all
640     /// its member `Counter`s as a single atomic operation.
641     ///
642     /// [`reset`]: #method.reset
643     /// [`enable`]: struct.Group.html#method.enable
enable(&mut self) -> io::Result<()>644     pub fn enable(&mut self) -> io::Result<()> {
645         check_errno_syscall(|| unsafe {
646             sys::ioctls::ENABLE(self.file.as_raw_fd(), 0)
647         }).map(|_| ())
648     }
649 
650     /// Make this `Counter` stop counting its designated event. Its count is
651     /// unaffected.
652     ///
653     /// Note that `Group` also has a [`disable`] method, which disables all
654     /// its member `Counter`s as a single atomic operation.
655     ///
656     /// [`disable`]: struct.Group.html#method.disable
disable(&mut self) -> io::Result<()>657     pub fn disable(&mut self) -> io::Result<()> {
658         check_errno_syscall(|| unsafe {
659             sys::ioctls::DISABLE(self.file.as_raw_fd(), 0)
660         }).map(|_| ())
661     }
662 
663     /// Reset the value of this `Counter` to zero.
664     ///
665     /// Note that `Group` also has a [`reset`] method, which resets all
666     /// its member `Counter`s as a single atomic operation.
667     ///
668     /// [`reset`]: struct.Group.html#method.reset
reset(&mut self) -> io::Result<()>669     pub fn reset(&mut self) -> io::Result<()> {
670         check_errno_syscall(|| unsafe {
671             sys::ioctls::RESET(self.file.as_raw_fd(), 0)
672         }).map(|_| ())
673     }
674 
675     /// Return this `Counter`'s current value as a `u64`.
676     ///
677     /// Consider using the [`read_count_and_time`] method instead of this one. Some
678     /// counters are implemented in hardware, and the processor can support only
679     /// a certain number running at a time. If more counters are requested than
680     /// the hardware can support, the kernel timeshares them on the hardware.
681     /// This method gives you no indication whether this has happened;
682     /// `read_count_and_time` does.
683     ///
684     /// Note that `Group` also has a [`read`] method, which reads all
685     /// its member `Counter`s' values at once.
686     ///
687     /// [`read`]: Group::read
688     /// [`read_count_and_time`]: Counter::read_count_and_time
read(&mut self) -> io::Result<u64>689     pub fn read(&mut self) -> io::Result<u64> {
690         Ok(self.read_count_and_time()?.count)
691     }
692 
693     /// Return this `Counter`'s current value and timesharing data.
694     ///
695     /// Some counters are implemented in hardware, and the processor can run
696     /// only a fixed number of them at a time. If more counters are requested
697     /// than the hardware can support, the kernel timeshares them on the
698     /// hardware.
699     ///
700     /// This method returns a [`CountAndTime`] struct, whose `count` field holds
701     /// the counter's value, and whose `time_enabled` and `time_running` fields
702     /// indicate how long you had enabled the counter, and how long the counter
703     /// was actually scheduled on the processor. This lets you detect whether
704     /// the counter was timeshared, and adjust your use accordingly. Times
705     /// are reported in nanoseconds.
706     ///
707     ///     # use perf_event::Builder;
708     ///     # fn main() -> std::io::Result<()> {
709     ///     # let mut counter = Builder::new().build()?;
710     ///     let cat = counter.read_count_and_time()?;
711     ///     if cat.time_running == 0 {
712     ///         println!("No data collected.");
713     ///     } else if cat.time_running < cat.time_enabled {
714     ///         // Note: this way of scaling is accurate, but `u128` division
715     ///         // is usually implemented in software, which may be slow.
716     ///         println!("{} instructions (estimated)",
717     ///                  (cat.count as u128 *
718     ///                   cat.time_enabled as u128 / cat.time_running as u128) as u64);
719     ///     } else {
720     ///         println!("{} instructions", cat.count);
721     ///     }
722     ///     # Ok(()) }
723     ///
724     /// Note that `Group` also has a [`read`] method, which reads all
725     /// its member `Counter`s' values at once.
726     ///
727     /// [`read`]: Group::read
read_count_and_time(&mut self) -> io::Result<CountAndTime>728     pub fn read_count_and_time(&mut self) -> io::Result<CountAndTime> {
729         let mut buf = [0_u64; 3];
730         self.file.read_exact(u64::slice_as_bytes_mut(&mut buf))?;
731 
732         let cat = CountAndTime {
733             count: buf[0],
734             time_enabled: buf[1],
735             time_running: buf[2],
736         };
737 
738         // Does the kernel ever return nonsense?
739         assert!(cat.time_running <= cat.time_enabled);
740 
741         Ok(cat)
742     }
743 }
744 
745 impl std::fmt::Debug for Counter {
fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result746     fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
747         write!(fmt, "Counter {{ fd: {}, id: {} }}",
748                self.file.as_raw_fd(), self.id)
749     }
750 }
751 
752 impl Group {
753     /// Construct a new, empty `Group`.
754     #[allow(unused_parens)]
new() -> io::Result<Group>755     pub fn new() -> io::Result<Group> {
756         // Open a placeholder perf counter that we can add other events to.
757         let mut attrs = perf_event_attr::default();
758         attrs.size = std::mem::size_of::<perf_event_attr>() as u32;
759         attrs.type_ = sys::bindings::perf_type_id_PERF_TYPE_SOFTWARE;
760         attrs.config = sys::bindings::perf_sw_ids_PERF_COUNT_SW_DUMMY as u64;
761         attrs.set_disabled(1);
762         attrs.set_exclude_kernel(1);
763         attrs.set_exclude_hv(1);
764 
765         // Arrange to be able to identify the counters we read back.
766         attrs.read_format = (sys::bindings::perf_event_read_format_PERF_FORMAT_TOTAL_TIME_ENABLED |
767                              sys::bindings::perf_event_read_format_PERF_FORMAT_TOTAL_TIME_RUNNING |
768                              sys::bindings::perf_event_read_format_PERF_FORMAT_ID |
769                              sys::bindings::perf_event_read_format_PERF_FORMAT_GROUP) as u64;
770 
771         let file = unsafe {
772             File::from_raw_fd(check_raw_syscall(|| {
773                 sys::perf_event_open(&mut attrs, 0, -1, -1, 0)
774             })?)
775         };
776 
777         // Retrieve the ID the kernel assigned us.
778         let mut id = 0_64;
779         check_errno_syscall(|| unsafe {
780             sys::ioctls::ID(file.as_raw_fd(), &mut id)
781         })?;
782 
783         Ok(Group { file, id, max_members: 1 })
784     }
785 
786     /// Allow all `Counter`s in this `Group` to begin counting their designated
787     /// events, as a single atomic operation.
788     ///
789     /// This does not affect whatever values the `Counter`s had previously; new
790     /// events add to the current counts. To clear the `Counter`s, use the
791     /// [`reset`] method.
792     ///
793     /// [`reset`]: #method.reset
enable(&mut self) -> io::Result<()>794     pub fn enable(&mut self) -> io::Result<()> {
795         self.generic_ioctl(sys::ioctls::ENABLE)
796     }
797 
798     /// Make all `Counter`s in this `Group` stop counting their designated
799     /// events, as a single atomic operation. Their counts are unaffected.
disable(&mut self) -> io::Result<()>800     pub fn disable(&mut self) -> io::Result<()> {
801         self.generic_ioctl(sys::ioctls::DISABLE)
802     }
803 
804     /// Reset all `Counter`s in this `Group` to zero, as a single atomic operation.
reset(&mut self) -> io::Result<()>805     pub fn reset(&mut self) -> io::Result<()> {
806         self.generic_ioctl(sys::ioctls::RESET)
807     }
808 
809     /// Perform some group ioctl.
810     ///
811     /// `f` must be a syscall that sets `errno` and returns `-1` on failure.
generic_ioctl(&mut self, f: unsafe fn(c_int, c_uint) -> c_int) -> io::Result<()>812     fn generic_ioctl(&mut self, f: unsafe fn(c_int, c_uint) -> c_int) -> io::Result<()> {
813         check_errno_syscall(|| unsafe {
814             f(self.file.as_raw_fd(),
815               sys::bindings::perf_event_ioc_flags_PERF_IOC_FLAG_GROUP)
816         }).map(|_| ())
817     }
818 
819     /// Return the values of all the `Counter`s in this `Group` as a [`Counts`]
820     /// value.
821     ///
822     /// A `Counts` value is a map from specific `Counter`s to their values. You
823     /// can find a specific `Counter`'s value by indexing:
824     ///
825     /// ```ignore
826     /// let mut group = Group::new()?;
827     /// let counter1 = Builder::new().group(&mut group).kind(...).build()?;
828     /// let counter2 = Builder::new().group(&mut group).kind(...).build()?;
829     /// ...
830     /// let counts = group.read()?;
831     /// println!("Rhombus inclinations per taxi medallion: {} / {} ({:.0}%)",
832     ///          counts[&counter1],
833     ///          counts[&counter2],
834     ///          (counts[&counter1] as f64 / counts[&counter2] as f64) * 100.0);
835     /// ```
836     ///
837     /// [`Counts`]: struct.Counts.html
read(&mut self) -> io::Result<Counts>838     pub fn read(&mut self) -> io::Result<Counts> {
839         // Since we passed `PERF_FORMAT_{ID,GROUP,TOTAL_TIME_{ENABLED,RUNNING}}`,
840         // the data we'll read has the form:
841         //
842         //     struct read_format {
843         //         u64 nr;            /* The number of events */
844         //         u64 time_enabled;  /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
845         //         u64 time_running;  /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
846         //         struct {
847         //             u64 value;     /* The value of the event */
848         //             u64 id;        /* if PERF_FORMAT_ID */
849         //         } values[nr];
850         //     };
851         let mut data = vec![0_u64; 3 + 2 * self.max_members];
852         self.file.read(u64::slice_as_bytes_mut(&mut data))?;
853 
854         let counts = Counts { data };
855 
856         // CountsIter assumes that the group's dummy count appears first.
857         assert_eq!(counts.nth_ref(0).0, self.id);
858 
859         // Does the kernel ever return nonsense?
860         assert!(counts.time_running() <= counts.time_enabled());
861 
862         // Update `max_members` for the next read.
863         self.max_members = counts.len();
864 
865         Ok(counts)
866     }
867 }
868 
869 impl std::fmt::Debug for Group {
fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result870     fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
871         write!(fmt, "Group {{ fd: {}, id: {} }}",
872                self.file.as_raw_fd(), self.id)
873     }
874 }
875 
876 impl Counts {
877     /// Return the number of counters this `Counts` holds results for.
len(&self) -> usize878     pub fn len(&self) -> usize {
879         self.data[0] as usize
880     }
881 
882     /// Return the number of nanoseconds the `Group` was enabled that
883     /// contributed to this `Counts`' contents.
time_enabled(&self) -> u64884     pub fn time_enabled(&self) -> u64 {
885         self.data[1]
886     }
887 
888     /// Return the number of nanoseconds the `Group` was actually collecting
889     /// counts that contributed to this `Counts`' contents.
time_running(&self) -> u64890     pub fn time_running(&self) -> u64 {
891         self.data[2]
892     }
893 
894     /// Return a range of indexes covering the count and id of the `n`'th counter.
nth_index(n: usize) -> std::ops::Range<usize>895     fn nth_index(n: usize) -> std::ops::Range<usize> {
896         let base = 3 + 2 * n;
897         base .. base + 2
898     }
899 
900     /// Return the id and count of the `n`'th counter. This returns a reference
901     /// to the count, for use by the `Index` implementation.
nth_ref(&self, n: usize) -> (u64, &u64)902     fn nth_ref(&self, n: usize) -> (u64, &u64) {
903         let id_val = &self.data[Counts::nth_index(n)];
904 
905         // (id, &value)
906         (id_val[1], &id_val[0])
907     }
908 }
909 
910 /// An iterator over the counter values in a [`Counts`], returned by
911 /// [`Group::read`].
912 ///
913 /// Each item is a pair `(id, &value)`, where `id` is the number assigned to the
914 /// counter by the kernel (see `Counter::id`), and `value` is that counter's
915 /// value.
916 ///
917 /// [`Counts`]: struct.Counts.html
918 /// [`Counter::id`]: struct.Counter.html#method.id
919 /// [`Group::read`]: struct.Group.html#method.read
920 pub struct CountsIter<'c> {
921     counts: &'c Counts,
922     next: usize
923 }
924 
925 impl<'c> Iterator for CountsIter<'c> {
926     type Item = (u64, &'c u64);
next(&mut self) -> Option<(u64, &'c u64)>927     fn next(&mut self) -> Option<(u64, &'c u64)> {
928         if self.next >= self.counts.len() {
929             return None;
930         }
931         let result = self.counts.nth_ref(self.next);
932         self.next += 1;
933         return Some(result);
934     }
935 }
936 
937 impl<'c> IntoIterator for &'c Counts {
938     type Item = (u64, &'c u64);
939     type IntoIter = CountsIter<'c>;
into_iter(self) -> CountsIter<'c>940     fn into_iter(self) -> CountsIter<'c> {
941         CountsIter {
942             counts: self,
943             next: 1, // skip the `Group` itself, it's just a dummy.
944         }
945     }
946 }
947 
948 impl Counts {
949     /// Return the value recorded for `member` in `self`, or `None` if `member`
950     /// is not present.
951     ///
952     /// If you know that `member` is in the group, you can simply index:
953     ///
954     ///     # fn main() -> std::io::Result<()> {
955     ///     # use perf_event::{Builder, Group};
956     ///     # let mut group = Group::new()?;
957     ///     # let cycle_counter = Builder::new().group(&mut group).build()?;
958     ///     # let counts = group.read()?;
959     ///     let cycles = counts[&cycle_counter];
960     ///     # Ok(()) }
get(&self, member: &Counter) -> Option<&u64>961     pub fn get(&self, member: &Counter) -> Option<&u64> {
962         self.into_iter()
963             .find(|&(id, _)| id == member.id)
964             .map(|(_, value)| value)
965     }
966 
967     /// Return an iterator over the counts in `self`.
968     ///
969     ///     # fn main() -> std::io::Result<()> {
970     ///     # use perf_event::Group;
971     ///     # let counts = Group::new()?.read()?;
972     ///     for (id, value) in &counts {
973     ///         println!("Counter id {} has value {}", id, value);
974     ///     }
975     ///     # Ok(()) }
976     ///
977     /// Each item is a pair `(id, &value)`, where `id` is the number assigned to
978     /// the counter by the kernel (see `Counter::id`), and `value` is that
979     /// counter's value.
iter(&self) -> CountsIter980     pub fn iter(&self) -> CountsIter {
981         <&Counts as IntoIterator>::into_iter(self)
982     }
983 }
984 
985 impl std::ops::Index<&Counter> for Counts {
986     type Output = u64;
index(&self, index: &Counter) -> &u64987     fn index(&self, index: &Counter) -> &u64 {
988         self.get(index).unwrap()
989     }
990 }
991 
992 impl std::fmt::Debug for Counts {
fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result993     fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
994         fmt.debug_map().entries(self.into_iter()).finish()
995     }
996 }
997 
998 unsafe trait SliceAsBytesMut: Sized {
slice_as_bytes_mut(slice: &mut [Self]) -> &mut [u8]999     fn slice_as_bytes_mut(slice: &mut [Self]) -> &mut [u8] {
1000         unsafe {
1001             std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut u8,
1002                                            std::mem::size_of_val(slice))
1003         }
1004     }
1005 }
1006 
1007 unsafe impl SliceAsBytesMut for u64 { }
1008 
1009 /// Produce an `io::Result` from a raw system call.
1010 ///
1011 /// A 'raw' system call is one that reports failure by returning negated raw OS
1012 /// error value.
check_raw_syscall<F>(f: F) -> io::Result<c_int> where F: FnOnce() -> c_int1013 fn check_raw_syscall<F>(f: F) -> io::Result<c_int>
1014 where F: FnOnce() -> c_int
1015 {
1016     let result = f();
1017     if result < 0 {
1018         Err(io::Error::from_raw_os_error(-result))
1019     } else {
1020         Ok(result)
1021     }
1022 }
1023 
1024 /// Produce an `io::Result` from an errno-style system call.
1025 ///
1026 /// An 'errno-style' system call is one that reports failure by returning -1 and
1027 /// setting the C `errno` value when an error occurs.
check_errno_syscall<F, R>(f: F) -> io::Result<R> where F: FnOnce() -> R, R: PartialOrd + Default1028 fn check_errno_syscall<F, R>(f: F) -> io::Result<R>
1029 where F: FnOnce() -> R,
1030       R: PartialOrd + Default
1031 {
1032     let result = f();
1033     if result < R::default() {
1034         Err(io::Error::last_os_error())
1035     } else {
1036         Ok(result)
1037     }
1038 }
1039 
1040 #[test]
simple_build()1041 fn simple_build() {
1042     Builder::new().build().expect("Couldn't build default Counter");
1043 }
1044