1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 /* This variant of nsIPerfMeasurement uses the perf_event interface
7  * added in Linux 2.6.31.  We key compilation of this file off the
8  * existence of <linux/perf_event.h>.
9  */
10 
11 #include <errno.h>
12 #include <linux/perf_event.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/syscall.h>
16 #include <unistd.h>
17 
18 #include "perf/jsperf.h"
19 
20 using namespace js;
21 
22 // As of July 2010, this system call has not been added to the
23 // C library, so we have to provide our own wrapper function.
24 // If this code runs on a kernel that does not implement the
25 // system call (2.6.30 or older) nothing unpredictable will
26 // happen - it will just always fail and return -1.
sys_perf_event_open(struct perf_event_attr * attr,pid_t pid,int cpu,int group_fd,unsigned long flags)27 static int sys_perf_event_open(struct perf_event_attr* attr, pid_t pid, int cpu,
28                                int group_fd, unsigned long flags) {
29   return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
30 }
31 
32 namespace {
33 
34 using JS::PerfMeasurement;
35 typedef PerfMeasurement::EventMask EventMask;
36 
37 // Additional state required by this implementation.
38 struct Impl {
39   // Each active counter corresponds to an open file descriptor.
40   int f_cpu_cycles;
41   int f_instructions;
42   int f_cache_references;
43   int f_cache_misses;
44   int f_branch_instructions;
45   int f_branch_misses;
46   int f_bus_cycles;
47   int f_page_faults;
48   int f_major_page_faults;
49   int f_context_switches;
50   int f_cpu_migrations;
51 
52   // Counter group leader, for Start and Stop.
53   int group_leader;
54 
55   // Whether counters are running.
56   bool running;
57 
58   Impl();
59   ~Impl();
60 
61   EventMask init(EventMask toMeasure);
62   void start();
63   void stop(PerfMeasurement* counters);
64 };
65 
66 // Mapping from our event bitmask to codes passed into the kernel, and
67 // to fields in the PerfMeasurement and PerfMeasurement::impl structures.
68 static const struct {
69   EventMask bit;
70   uint32_t type;
71   uint32_t config;
72   uint64_t PerfMeasurement::*counter;
73   int Impl::*fd;
74 } kSlots[PerfMeasurement::NUM_MEASURABLE_EVENTS] = {
75 #define HW(mask, constant, fieldname)                                    \
76   {                                                                      \
77     PerfMeasurement::mask, PERF_TYPE_HARDWARE, PERF_COUNT_HW_##constant, \
78         &PerfMeasurement::fieldname, &Impl::f_##fieldname                \
79   }
80 #define SW(mask, constant, fieldname)                                    \
81   {                                                                      \
82     PerfMeasurement::mask, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##constant, \
83         &PerfMeasurement::fieldname, &Impl::f_##fieldname                \
84   }
85 
86     HW(CPU_CYCLES, CPU_CYCLES, cpu_cycles),
87     HW(INSTRUCTIONS, INSTRUCTIONS, instructions),
88     HW(CACHE_REFERENCES, CACHE_REFERENCES, cache_references),
89     HW(CACHE_MISSES, CACHE_MISSES, cache_misses),
90     HW(BRANCH_INSTRUCTIONS, BRANCH_INSTRUCTIONS, branch_instructions),
91     HW(BRANCH_MISSES, BRANCH_MISSES, branch_misses),
92     HW(BUS_CYCLES, BUS_CYCLES, bus_cycles),
93     SW(PAGE_FAULTS, PAGE_FAULTS, page_faults),
94     SW(MAJOR_PAGE_FAULTS, PAGE_FAULTS_MAJ, major_page_faults),
95     SW(CONTEXT_SWITCHES, CONTEXT_SWITCHES, context_switches),
96     SW(CPU_MIGRATIONS, CPU_MIGRATIONS, cpu_migrations),
97 
98 #undef HW
99 #undef SW
100 };
101 
Impl()102 Impl::Impl()
103     : f_cpu_cycles(-1),
104       f_instructions(-1),
105       f_cache_references(-1),
106       f_cache_misses(-1),
107       f_branch_instructions(-1),
108       f_branch_misses(-1),
109       f_bus_cycles(-1),
110       f_page_faults(-1),
111       f_major_page_faults(-1),
112       f_context_switches(-1),
113       f_cpu_migrations(-1),
114       group_leader(-1),
115       running(false) {}
116 
~Impl()117 Impl::~Impl() {
118   // Close all active counter descriptors.  Take care to do the group
119   // leader last (this may not be necessary, but it's unclear what
120   // happens if you close the group leader out from under a group).
121   for (const auto& slot : kSlots) {
122     int fd = this->*(slot.fd);
123     if (fd != -1 && fd != group_leader) close(fd);
124   }
125 
126   if (group_leader != -1) close(group_leader);
127 }
128 
init(EventMask toMeasure)129 EventMask Impl::init(EventMask toMeasure) {
130   MOZ_ASSERT(group_leader == -1);
131   if (!toMeasure) return EventMask(0);
132 
133   EventMask measured = EventMask(0);
134   struct perf_event_attr attr;
135   for (const auto& slot : kSlots) {
136     if (!(toMeasure & slot.bit)) continue;
137 
138     memset(&attr, 0, sizeof(attr));
139     attr.size = sizeof(attr);
140 
141     // Set the type and config fields to indicate the counter we
142     // want to enable.  We want read format 0, and we're not using
143     // sampling, so leave those fields unset.
144     attr.type = slot.type;
145     attr.config = slot.config;
146 
147     // If this will be the group leader it should start off
148     // disabled.  Otherwise it should start off enabled (but blocked
149     // on the group leader).
150     if (group_leader == -1) attr.disabled = 1;
151 
152     // The rest of the bit fields are really poorly documented.
153     // For instance, I have *no idea* whether we should be setting
154     // the inherit, inherit_stat, or task flags.  I'm pretty sure
155     // we do want to set mmap and comm, and not any of the ones I
156     // haven't mentioned.
157     attr.mmap = 1;
158     attr.comm = 1;
159 
160     int fd =
161         sys_perf_event_open(&attr, 0 /* trace self */, -1 /* on any cpu */,
162                             group_leader, 0 /* no flags presently defined */);
163     if (fd == -1) continue;
164 
165     measured = EventMask(measured | slot.bit);
166     this->*(slot.fd) = fd;
167     if (group_leader == -1) group_leader = fd;
168   }
169   return measured;
170 }
171 
start()172 void Impl::start() {
173   if (running || group_leader == -1) return;
174 
175   running = true;
176   ioctl(group_leader, PERF_EVENT_IOC_ENABLE, 0);
177 }
178 
stop(PerfMeasurement * counters)179 void Impl::stop(PerfMeasurement* counters) {
180   // This scratch buffer is to ensure that we have read all the
181   // available data, even if that's more than we expect.
182   unsigned char buf[1024];
183 
184   if (!running || group_leader == -1) return;
185 
186   ioctl(group_leader, PERF_EVENT_IOC_DISABLE, 0);
187   running = false;
188 
189   // read out and reset all the counter values
190   for (const auto& slot : kSlots) {
191     int fd = this->*(slot.fd);
192     if (fd == -1) continue;
193 
194     if (read(fd, buf, sizeof(buf)) == sizeof(uint64_t)) {
195       uint64_t cur;
196       memcpy(&cur, buf, sizeof(uint64_t));
197       counters->*(slot.counter) += cur;
198     }
199 
200     // Reset the counter regardless of whether the read did what
201     // we expected.
202     ioctl(fd, PERF_EVENT_IOC_RESET, 0);
203   }
204 }
205 
206 }  // namespace
207 
208 namespace JS {
209 
210 #define initCtr(flag) ((eventsMeasured & flag) ? 0 : -1)
211 
PerfMeasurement(PerfMeasurement::EventMask toMeasure)212 PerfMeasurement::PerfMeasurement(PerfMeasurement::EventMask toMeasure)
213     : impl(js_new<Impl>()),
214       eventsMeasured(impl ? static_cast<Impl*>(impl)->init(toMeasure)
215                           : EventMask(0)),
216       cpu_cycles(initCtr(CPU_CYCLES)),
217       instructions(initCtr(INSTRUCTIONS)),
218       cache_references(initCtr(CACHE_REFERENCES)),
219       cache_misses(initCtr(CACHE_MISSES)),
220       branch_instructions(initCtr(BRANCH_INSTRUCTIONS)),
221       branch_misses(initCtr(BRANCH_MISSES)),
222       bus_cycles(initCtr(BUS_CYCLES)),
223       page_faults(initCtr(PAGE_FAULTS)),
224       major_page_faults(initCtr(MAJOR_PAGE_FAULTS)),
225       context_switches(initCtr(CONTEXT_SWITCHES)),
226       cpu_migrations(initCtr(CPU_MIGRATIONS)) {}
227 
228 #undef initCtr
229 
~PerfMeasurement()230 PerfMeasurement::~PerfMeasurement() { js_delete(static_cast<Impl*>(impl)); }
231 
start()232 void PerfMeasurement::start() {
233   if (impl) static_cast<Impl*>(impl)->start();
234 }
235 
stop()236 void PerfMeasurement::stop() {
237   if (impl) static_cast<Impl*>(impl)->stop(this);
238 }
239 
reset()240 void PerfMeasurement::reset() {
241   for (const auto& slot : kSlots) {
242     if (eventsMeasured & slot.bit)
243       this->*(slot.counter) = 0;
244     else
245       this->*(slot.counter) = -1;
246   }
247 }
248 
canMeasureSomething()249 bool PerfMeasurement::canMeasureSomething() {
250   // Find out if the kernel implements the performance measurement
251   // API.  If it doesn't, syscall(__NR_perf_event_open, ...) is
252   // guaranteed to return -1 and set errno to ENOSYS.
253   //
254   // We set up input parameters that should provoke an EINVAL error
255   // from a kernel that does implement perf_event_open, but we can't
256   // be sure it will (newer kernels might add more event types), so
257   // we have to take care to close any valid fd it might return.
258 
259   struct perf_event_attr attr;
260   memset(&attr, 0, sizeof(attr));
261   attr.size = sizeof(attr);
262   attr.type = PERF_TYPE_MAX;
263 
264   int fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
265   if (fd >= 0) {
266     close(fd);
267     return true;
268   }
269   return errno != ENOSYS;
270 }
271 
272 }  // namespace JS
273