1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 /* This variant of nsIPerfMeasurement uses the perf_event interface
7  * added in Linux 2.6.31.  We key compilation of this file off the
8  * existence of <linux/perf_event.h>.
9  */
10 
11 #include <errno.h>
12 #include <linux/perf_event.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/syscall.h>
16 #include <unistd.h>
17 
18 #include "perf/jsperf.h"
19 
20 using namespace js;
21 
22 // As of July 2010, this system call has not been added to the
23 // C library, so we have to provide our own wrapper function.
24 // If this code runs on a kernel that does not implement the
25 // system call (2.6.30 or older) nothing unpredictable will
26 // happen - it will just always fail and return -1.
27 static int
sys_perf_event_open(struct perf_event_attr * attr,pid_t pid,int cpu,int group_fd,unsigned long flags)28 sys_perf_event_open(struct perf_event_attr* attr, pid_t pid, int cpu,
29                     int group_fd, unsigned long flags)
30 {
31     return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
32 }
33 
34 namespace {
35 
36 using JS::PerfMeasurement;
37 typedef PerfMeasurement::EventMask EventMask;
38 
39 // Additional state required by this implementation.
40 struct Impl
41 {
42     // Each active counter corresponds to an open file descriptor.
43     int f_cpu_cycles;
44     int f_instructions;
45     int f_cache_references;
46     int f_cache_misses;
47     int f_branch_instructions;
48     int f_branch_misses;
49     int f_bus_cycles;
50     int f_page_faults;
51     int f_major_page_faults;
52     int f_context_switches;
53     int f_cpu_migrations;
54 
55     // Counter group leader, for Start and Stop.
56     int group_leader;
57 
58     // Whether counters are running.
59     bool running;
60 
61     Impl();
62     ~Impl();
63 
64     EventMask init(EventMask toMeasure);
65     void start();
66     void stop(PerfMeasurement* counters);
67 };
68 
69 // Mapping from our event bitmask to codes passed into the kernel, and
70 // to fields in the PerfMeasurement and PerfMeasurement::impl structures.
71 static const struct
72 {
73     EventMask bit;
74     uint32_t type;
75     uint32_t config;
76     uint64_t PerfMeasurement::* counter;
77     int Impl::* fd;
78 } kSlots[PerfMeasurement::NUM_MEASURABLE_EVENTS] = {
79 #define HW(mask, constant, fieldname)                                   \
80     { PerfMeasurement::mask, PERF_TYPE_HARDWARE, PERF_COUNT_HW_##constant, \
81       &PerfMeasurement::fieldname, &Impl::f_##fieldname }
82 #define SW(mask, constant, fieldname)                                   \
83     { PerfMeasurement::mask, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##constant, \
84       &PerfMeasurement::fieldname, &Impl::f_##fieldname }
85 
86     HW(CPU_CYCLES,          CPU_CYCLES,          cpu_cycles),
87     HW(INSTRUCTIONS,        INSTRUCTIONS,        instructions),
88     HW(CACHE_REFERENCES,    CACHE_REFERENCES,    cache_references),
89     HW(CACHE_MISSES,        CACHE_MISSES,        cache_misses),
90     HW(BRANCH_INSTRUCTIONS, BRANCH_INSTRUCTIONS, branch_instructions),
91     HW(BRANCH_MISSES,       BRANCH_MISSES,       branch_misses),
92     HW(BUS_CYCLES,          BUS_CYCLES,          bus_cycles),
93     SW(PAGE_FAULTS,         PAGE_FAULTS,         page_faults),
94     SW(MAJOR_PAGE_FAULTS,   PAGE_FAULTS_MAJ,     major_page_faults),
95     SW(CONTEXT_SWITCHES,    CONTEXT_SWITCHES,    context_switches),
96     SW(CPU_MIGRATIONS,      CPU_MIGRATIONS,      cpu_migrations),
97 
98 #undef HW
99 #undef SW
100 };
101 
Impl()102 Impl::Impl()
103   : f_cpu_cycles(-1),
104     f_instructions(-1),
105     f_cache_references(-1),
106     f_cache_misses(-1),
107     f_branch_instructions(-1),
108     f_branch_misses(-1),
109     f_bus_cycles(-1),
110     f_page_faults(-1),
111     f_major_page_faults(-1),
112     f_context_switches(-1),
113     f_cpu_migrations(-1),
114     group_leader(-1),
115     running(false)
116 {
117 }
118 
~Impl()119 Impl::~Impl()
120 {
121     // Close all active counter descriptors.  Take care to do the group
122     // leader last (this may not be necessary, but it's unclear what
123     // happens if you close the group leader out from under a group).
124     for (const auto& slot : kSlots) {
125         int fd = this->*(slot.fd);
126         if (fd != -1 && fd != group_leader)
127             close(fd);
128     }
129 
130     if (group_leader != -1)
131         close(group_leader);
132 }
133 
134 EventMask
init(EventMask toMeasure)135 Impl::init(EventMask toMeasure)
136 {
137     MOZ_ASSERT(group_leader == -1);
138     if (!toMeasure)
139         return EventMask(0);
140 
141     EventMask measured = EventMask(0);
142     struct perf_event_attr attr;
143     for (const auto& slot : kSlots) {
144         if (!(toMeasure & slot.bit))
145             continue;
146 
147         memset(&attr, 0, sizeof(attr));
148         attr.size = sizeof(attr);
149 
150         // Set the type and config fields to indicate the counter we
151         // want to enable.  We want read format 0, and we're not using
152         // sampling, so leave those fields unset.
153         attr.type = slot.type;
154         attr.config = slot.config;
155 
156         // If this will be the group leader it should start off
157         // disabled.  Otherwise it should start off enabled (but blocked
158         // on the group leader).
159         if (group_leader == -1)
160             attr.disabled = 1;
161 
162         // The rest of the bit fields are really poorly documented.
163         // For instance, I have *no idea* whether we should be setting
164         // the inherit, inherit_stat, or task flags.  I'm pretty sure
165         // we do want to set mmap and comm, and not any of the ones I
166         // haven't mentioned.
167         attr.mmap = 1;
168         attr.comm = 1;
169 
170         int fd = sys_perf_event_open(&attr,
171                                      0 /* trace self */,
172                                      -1 /* on any cpu */,
173                                      group_leader,
174                                      0 /* no flags presently defined */);
175         if (fd == -1)
176             continue;
177 
178         measured = EventMask(measured | slot.bit);
179         this->*(slot.fd) = fd;
180         if (group_leader == -1)
181             group_leader = fd;
182     }
183     return measured;
184 }
185 
186 void
start()187 Impl::start()
188 {
189     if (running || group_leader == -1)
190         return;
191 
192     running = true;
193     ioctl(group_leader, PERF_EVENT_IOC_ENABLE, 0);
194 }
195 
196 void
stop(PerfMeasurement * counters)197 Impl::stop(PerfMeasurement* counters)
198 {
199     // This scratch buffer is to ensure that we have read all the
200     // available data, even if that's more than we expect.
201     unsigned char buf[1024];
202 
203     if (!running || group_leader == -1)
204         return;
205 
206     ioctl(group_leader, PERF_EVENT_IOC_DISABLE, 0);
207     running = false;
208 
209     // read out and reset all the counter values
210     for (const auto& slot : kSlots) {
211         int fd = this->*(slot.fd);
212         if (fd == -1)
213             continue;
214 
215         if (read(fd, buf, sizeof(buf)) == sizeof(uint64_t)) {
216             uint64_t cur;
217             memcpy(&cur, buf, sizeof(uint64_t));
218             counters->*(slot.counter) += cur;
219         }
220 
221         // Reset the counter regardless of whether the read did what
222         // we expected.
223         ioctl(fd, PERF_EVENT_IOC_RESET, 0);
224     }
225 }
226 
227 } // namespace
228 
229 
230 namespace JS {
231 
232 #define initCtr(flag) ((eventsMeasured & flag) ? 0 : -1)
233 
PerfMeasurement(PerfMeasurement::EventMask toMeasure)234 PerfMeasurement::PerfMeasurement(PerfMeasurement::EventMask toMeasure)
235   : impl(js_new<Impl>()),
236     eventsMeasured(impl ? static_cast<Impl*>(impl)->init(toMeasure)
237                    : EventMask(0)),
238     cpu_cycles(initCtr(CPU_CYCLES)),
239     instructions(initCtr(INSTRUCTIONS)),
240     cache_references(initCtr(CACHE_REFERENCES)),
241     cache_misses(initCtr(CACHE_MISSES)),
242     branch_instructions(initCtr(BRANCH_INSTRUCTIONS)),
243     branch_misses(initCtr(BRANCH_MISSES)),
244     bus_cycles(initCtr(BUS_CYCLES)),
245     page_faults(initCtr(PAGE_FAULTS)),
246     major_page_faults(initCtr(MAJOR_PAGE_FAULTS)),
247     context_switches(initCtr(CONTEXT_SWITCHES)),
248     cpu_migrations(initCtr(CPU_MIGRATIONS))
249 {
250 }
251 
252 #undef initCtr
253 
~PerfMeasurement()254 PerfMeasurement::~PerfMeasurement()
255 {
256     js_delete(static_cast<Impl*>(impl));
257 }
258 
259 void
start()260 PerfMeasurement::start()
261 {
262     if (impl)
263         static_cast<Impl*>(impl)->start();
264 }
265 
266 void
stop()267 PerfMeasurement::stop()
268 {
269     if (impl)
270         static_cast<Impl*>(impl)->stop(this);
271 }
272 
273 void
reset()274 PerfMeasurement::reset()
275 {
276     for (const auto& slot : kSlots) {
277         if (eventsMeasured & slot.bit)
278             this->*(slot.counter) = 0;
279         else
280             this->*(slot.counter) = -1;
281     }
282 }
283 
284 bool
canMeasureSomething()285 PerfMeasurement::canMeasureSomething()
286 {
287     // Find out if the kernel implements the performance measurement
288     // API.  If it doesn't, syscall(__NR_perf_event_open, ...) is
289     // guaranteed to return -1 and set errno to ENOSYS.
290     //
291     // We set up input parameters that should provoke an EINVAL error
292     // from a kernel that does implement perf_event_open, but we can't
293     // be sure it will (newer kernels might add more event types), so
294     // we have to take care to close any valid fd it might return.
295 
296     struct perf_event_attr attr;
297     memset(&attr, 0, sizeof(attr));
298     attr.size = sizeof(attr);
299     attr.type = PERF_TYPE_MAX;
300 
301     int fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
302     if (fd >= 0) {
303         close(fd);
304         return true;
305     } else {
306         return errno != ENOSYS;
307     }
308 }
309 
310 } // namespace JS
311