1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 Intel Corporation.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39
40 #include "qbenchmarkperfevents_p.h"
41 #include "qbenchmarkmetric.h"
42 #include "qbenchmark_p.h"
43
44 #ifdef QTESTLIB_USE_PERF_EVENTS
45
46 // include the qcore_unix_p.h without core-private
47 // we only use inline functions anyway
48 #include "../corelib/kernel/qcore_unix_p.h"
49
50 #include <sys/types.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <string.h>
54 #include <stdio.h>
55
56 #include <sys/syscall.h>
57 #include <sys/ioctl.h>
58
59 #include "3rdparty/linux_perf_event_p.h"
60
61 // for PERF_TYPE_HW_CACHE, the config is a bitmask
62 // lowest 8 bits: cache type
63 // bits 8 to 15: cache operation
64 // bits 16 to 23: cache result
65 #define CACHE_L1D_READ (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
66 #define CACHE_L1D_WRITE (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
67 #define CACHE_L1D_PREFETCH (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
68 #define CACHE_L1I_READ (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
69 #define CACHE_L1I_PREFETCH (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
70 #define CACHE_LLC_READ (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
71 #define CACHE_LLC_WRITE (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
72 #define CACHE_LLC_PREFETCH (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
73 #define CACHE_L1D_READ_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
74 #define CACHE_L1D_WRITE_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
75 #define CACHE_L1D_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
76 #define CACHE_L1I_READ_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
77 #define CACHE_L1I_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
78 #define CACHE_LLC_READ_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
79 #define CACHE_LLC_WRITE_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
80 #define CACHE_LLC_PREFETCH_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
81 #define CACHE_BRANCH_READ (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
82 #define CACHE_BRANCH_READ_MISS (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
83
84 QT_BEGIN_NAMESPACE
85
86 static perf_event_attr attr;
87
initPerf()88 static void initPerf()
89 {
90 static bool done;
91 if (!done) {
92 memset(&attr, 0, sizeof attr);
93 attr.size = sizeof attr;
94 attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
95 attr.disabled = true; // we'll enable later
96 attr.inherit = true; // let children processes inherit the monitoring
97 attr.pinned = true; // keep it running in the hardware
98 attr.inherit_stat = true; // aggregate all the info from child processes
99 attr.task = true; // trace fork/exits
100
101 // set a default performance counter: CPU cycles
102 attr.type = PERF_TYPE_HARDWARE;
103 attr.config = PERF_COUNT_HW_CPU_CYCLES; // default
104
105 done = true;
106 }
107 }
108
109 // This class does not exist in the API so it's qdoc comment marker was removed.
110
111 /*
112 \class QBenchmarkPerfEvents
113 \brief The Linux perf events benchmark backend
114
115 This benchmark backend uses the Linux Performance Counters interface,
116 introduced with the Linux kernel v2.6.31. The interface is done by one
117 system call (perf_event_open) which takes an attribute structure and
118 returns a file descriptor.
119
120 More information:
121 \li design docs: tools/perf/design.txt <http://lxr.linux.no/linux/tools/perf/design.txt>
122 \li sample tool: tools/perf/builtin-stat.c <http://lxr.linux.no/linux/tools/perf/builtin-stat.c>
123 (note: as of v3.3.1, the documentation is out-of-date with the kernel
124 interface, so reading the source code of existing tools is necessary)
125
126 This benchlib backend monitors the current process as well as child process
127 launched. We do not try to benchmark in kernel or hypervisor mode, as that
128 usually requires elevated privileges.
129 */
130
perf_event_open(perf_event_attr * attr,pid_t pid,int cpu,int group_fd,unsigned long flags)131 static int perf_event_open(perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags)
132 {
133 #ifdef SYS_perf_event_open
134 return syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags);
135 #else
136 Q_UNUSED(attr);
137 Q_UNUSED(pid);
138 Q_UNUSED(cpu);
139 Q_UNUSED(group_fd);
140 Q_UNUSED(flags);
141 errno = ENOSYS;
142 return -1;
143 #endif
144 }
145
isAvailable()146 bool QBenchmarkPerfEventsMeasurer::isAvailable()
147 {
148 // this generates an EFAULT because attr == NULL if perf_event_open is available
149 // if the kernel is too old, it generates ENOSYS
150 return perf_event_open(nullptr, 0, 0, 0, 0) == -1 && errno != ENOSYS;
151 }
152
153 /* Event list structure
154 The following table provides the list of supported events
155
156 Event type Event counter Unit Name and aliases
157 HARDWARE CPU_CYCLES CPUCycles cycles cpu-cycles
158 HARDWARE REF_CPU_CYCLES RefCPUCycles ref-cycles
159 HARDWARE INSTRUCTIONS Instructions instructions
160 HARDWARE CACHE_REFERENCES CacheReferences cache-references
161 HARDWARE CACHE_MISSES CacheMisses cache-misses
162 HARDWARE BRANCH_INSTRUCTIONS BranchInstructions branch-instructions branches
163 HARDWARE BRANCH_MISSES BranchMisses branch-misses
164 HARDWARE BUS_CYCLES BusCycles bus-cycles
165 HARDWARE STALLED_CYCLES_FRONTEND StalledCycles stalled-cycles-frontend idle-cycles-frontend
166 HARDWARE STALLED_CYCLES_BACKEND StalledCycles stalled-cycles-backend idle-cycles-backend
167 SOFTWARE CPU_CLOCK WalltimeMilliseconds cpu-clock
168 SOFTWARE TASK_CLOCK WalltimeMilliseconds task-clock
169 SOFTWARE PAGE_FAULTS PageFaults page-faults faults
170 SOFTWARE PAGE_FAULTS_MAJ MajorPageFaults major-faults
171 SOFTWARE PAGE_FAULTS_MIN MinorPageFaults minor-faults
172 SOFTWARE CONTEXT_SWITCHES ContextSwitches context-switches cs
173 SOFTWARE CPU_MIGRATIONS CPUMigrations cpu-migrations migrations
174 SOFTWARE ALIGNMENT_FAULTS AlignmentFaults alignment-faults
175 SOFTWARE EMULATION_FAULTS EmulationFaults emulation-faults
176 HW_CACHE L1D_READ CacheReads l1d-cache-reads l1d-cache-loads l1d-reads l1d-loads
177 HW_CACHE L1D_WRITE CacheWrites l1d-cache-writes l1d-cache-stores l1d-writes l1d-stores
178 HW_CACHE L1D_PREFETCH CachePrefetches l1d-cache-prefetches l1d-prefetches
179 HW_CACHE L1I_READ CacheReads l1i-cache-reads l1i-cache-loads l1i-reads l1i-loads
180 HW_CACHE L1I_PREFETCH CachePrefetches l1i-cache-prefetches l1i-prefetches
181 HW_CACHE LLC_READ CacheReads llc-cache-reads llc-cache-loads llc-loads llc-reads
182 HW_CACHE LLC_WRITE CacheWrites llc-cache-writes llc-cache-stores llc-writes llc-stores
183 HW_CACHE LLC_PREFETCH CachePrefetches llc-cache-prefetches llc-prefetches
184 HW_CACHE L1D_READ_MISS CacheReads l1d-cache-read-misses l1d-cache-load-misses l1d-read-misses l1d-load-misses
185 HW_CACHE L1D_WRITE_MISS CacheWrites l1d-cache-write-misses l1d-cache-store-misses l1d-write-misses l1d-store-misses
186 HW_CACHE L1D_PREFETCH_MISS CachePrefetches l1d-cache-prefetch-misses l1d-prefetch-misses
187 HW_CACHE L1I_READ_MISS CacheReads l1i-cache-read-misses l1i-cache-load-misses l1i-read-misses l1i-load-misses
188 HW_CACHE L1I_PREFETCH_MISS CachePrefetches l1i-cache-prefetch-misses l1i-prefetch-misses
189 HW_CACHE LLC_READ_MISS CacheReads llc-cache-read-misses llc-cache-load-misses llc-read-misses llc-load-misses
190 HW_CACHE LLC_WRITE_MISS CacheWrites llc-cache-write-misses llc-cache-store-misses llc-write-misses llc-store-misses
191 HW_CACHE LLC_PREFETCH_MISS CachePrefetches llc-cache-prefetch-misses llc-prefetch-misses
192 HW_CACHE BRANCH_READ BranchInstructions branch-reads branch-loads branch-predicts
193 HW_CACHE BRANCH_READ_MISS BranchMisses branch-mispredicts branch-read-misses branch-load-misses
194
195 Use the following Perl script to re-generate the list
196 === cut perl ===
197 #!/usr/bin/env perl
198 # Load all entries into %map
199 while (<STDIN>) {
200 m/^\s*(.*)\s*$/;
201 @_ = split /\s+/, $1;
202 $type = shift @_;
203 $id = ($type eq "HARDWARE" ? "PERF_COUNT_HW_" :
204 $type eq "SOFTWARE" ? "PERF_COUNT_SW_" :
205 $type eq "HW_CACHE" ? "CACHE_" : "") . shift @_;
206 $unit = shift @_;
207
208 for $string (@_) {
209 die "$string was already seen!" if defined($map{$string});
210 $map{$string} = [-1, $type, $id, $unit];
211 push @strings, $string;
212 }
213 }
214
215 # sort the map and print the string list
216 @strings = sort @strings;
217 print "static const char eventlist_strings[] = \n";
218 $counter = 0;
219 for $entry (@strings) {
220 print " \"$entry\\0\"\n";
221 $map{$entry}[0] = $counter;
222 $counter += 1 + length $entry;
223 }
224
225 # print the table
226 print " \"\\0\";\n\nstatic const Events eventlist[] = {\n";
227 for $entry (sort @strings) {
228 printf " { %3d, PERF_TYPE_%s, %s, QTest::%s },\n",
229 $map{$entry}[0],
230 $map{$entry}[1],
231 $map{$entry}[2],
232 $map{$entry}[3];
233 }
234 print " { 0, PERF_TYPE_MAX, 0, QTest::Events }\n};\n";
235 === cut perl ===
236 */
237
238 struct Events {
239 unsigned offset;
240 quint32 type;
241 quint64 event_id;
242 QTest::QBenchmarkMetric metric;
243 };
244
245 /* -- BEGIN GENERATED CODE -- */
246 static const char eventlist_strings[] =
247 "alignment-faults\0"
248 "branch-instructions\0"
249 "branch-load-misses\0"
250 "branch-loads\0"
251 "branch-mispredicts\0"
252 "branch-misses\0"
253 "branch-predicts\0"
254 "branch-read-misses\0"
255 "branch-reads\0"
256 "branches\0"
257 "bus-cycles\0"
258 "cache-misses\0"
259 "cache-references\0"
260 "context-switches\0"
261 "cpu-clock\0"
262 "cpu-cycles\0"
263 "cpu-migrations\0"
264 "cs\0"
265 "cycles\0"
266 "emulation-faults\0"
267 "faults\0"
268 "idle-cycles-backend\0"
269 "idle-cycles-frontend\0"
270 "instructions\0"
271 "l1d-cache-load-misses\0"
272 "l1d-cache-loads\0"
273 "l1d-cache-prefetch-misses\0"
274 "l1d-cache-prefetches\0"
275 "l1d-cache-read-misses\0"
276 "l1d-cache-reads\0"
277 "l1d-cache-store-misses\0"
278 "l1d-cache-stores\0"
279 "l1d-cache-write-misses\0"
280 "l1d-cache-writes\0"
281 "l1d-load-misses\0"
282 "l1d-loads\0"
283 "l1d-prefetch-misses\0"
284 "l1d-prefetches\0"
285 "l1d-read-misses\0"
286 "l1d-reads\0"
287 "l1d-store-misses\0"
288 "l1d-stores\0"
289 "l1d-write-misses\0"
290 "l1d-writes\0"
291 "l1i-cache-load-misses\0"
292 "l1i-cache-loads\0"
293 "l1i-cache-prefetch-misses\0"
294 "l1i-cache-prefetches\0"
295 "l1i-cache-read-misses\0"
296 "l1i-cache-reads\0"
297 "l1i-load-misses\0"
298 "l1i-loads\0"
299 "l1i-prefetch-misses\0"
300 "l1i-prefetches\0"
301 "l1i-read-misses\0"
302 "l1i-reads\0"
303 "llc-cache-load-misses\0"
304 "llc-cache-loads\0"
305 "llc-cache-prefetch-misses\0"
306 "llc-cache-prefetches\0"
307 "llc-cache-read-misses\0"
308 "llc-cache-reads\0"
309 "llc-cache-store-misses\0"
310 "llc-cache-stores\0"
311 "llc-cache-write-misses\0"
312 "llc-cache-writes\0"
313 "llc-load-misses\0"
314 "llc-loads\0"
315 "llc-prefetch-misses\0"
316 "llc-prefetches\0"
317 "llc-read-misses\0"
318 "llc-reads\0"
319 "llc-store-misses\0"
320 "llc-stores\0"
321 "llc-write-misses\0"
322 "llc-writes\0"
323 "major-faults\0"
324 "migrations\0"
325 "minor-faults\0"
326 "page-faults\0"
327 "ref-cycles\0"
328 "stalled-cycles-backend\0"
329 "stalled-cycles-frontend\0"
330 "task-clock\0"
331 "\0";
332
333 static const Events eventlist[] = {
334 { 0, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, QTest::AlignmentFaults },
335 { 17, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions },
336 { 37, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses },
337 { 56, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions },
338 { 69, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses },
339 { 88, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, QTest::BranchMisses },
340 { 102, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions },
341 { 118, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses },
342 { 137, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions },
343 { 150, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions },
344 { 159, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, QTest::BusCycles },
345 { 170, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, QTest::CacheMisses },
346 { 183, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, QTest::CacheReferences },
347 { 200, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches },
348 { 217, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, QTest::WalltimeMilliseconds },
349 { 227, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles },
350 { 238, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations },
351 { 253, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches },
352 { 256, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles },
353 { 263, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, QTest::EmulationFaults },
354 { 280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults },
355 { 287, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles },
356 { 307, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles },
357 { 328, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, QTest::Instructions },
358 { 341, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
359 { 363, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
360 { 379, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches },
361 { 405, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches },
362 { 426, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
363 { 448, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
364 { 464, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
365 { 487, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
366 { 504, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
367 { 527, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
368 { 544, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
369 { 560, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
370 { 570, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches },
371 { 590, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches },
372 { 605, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
373 { 621, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
374 { 631, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
375 { 648, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
376 { 659, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
377 { 676, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
378 { 687, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
379 { 709, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
380 { 725, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches },
381 { 751, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches },
382 { 772, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
383 { 794, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
384 { 810, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
385 { 826, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
386 { 836, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches },
387 { 856, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches },
388 { 871, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
389 { 887, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
390 { 897, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
391 { 919, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
392 { 935, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches },
393 { 961, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches },
394 { 982, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
395 { 1004, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
396 { 1020, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
397 { 1043, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
398 { 1060, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
399 { 1083, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
400 { 1100, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
401 { 1116, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
402 { 1126, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches },
403 { 1146, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches },
404 { 1161, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
405 { 1177, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
406 { 1187, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
407 { 1204, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
408 { 1215, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
409 { 1232, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
410 { 1243, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, QTest::MajorPageFaults },
411 { 1256, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations },
412 { 1267, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, QTest::MinorPageFaults },
413 { 1280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults },
414 { 1292, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, QTest::RefCPUCycles },
415 { 1303, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles },
416 { 1326, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles },
417 { 1350, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, QTest::WalltimeMilliseconds },
418 { 0, PERF_TYPE_MAX, 0, QTest::Events }
419 };
420 /* -- END GENERATED CODE -- */
421
metricForEvent(quint32 type,quint64 event_id)422 QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricForEvent(quint32 type, quint64 event_id)
423 {
424 const Events *ptr = eventlist;
425 for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) {
426 if (ptr->type == type && ptr->event_id == event_id)
427 return ptr->metric;
428 }
429 return QTest::Events;
430 }
431
setCounter(const char * name)432 void QBenchmarkPerfEventsMeasurer::setCounter(const char *name)
433 {
434 initPerf();
435 const char *colon = strchr(name, ':');
436 int n = colon ? colon - name : strlen(name);
437 const Events *ptr = eventlist;
438 for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) {
439 int c = strncmp(name, eventlist_strings + ptr->offset, n);
440 if (c == 0)
441 break;
442 if (c < 0) {
443 fprintf(stderr, "ERROR: Performance counter type '%s' is unknown\n", name);
444 exit(1);
445 }
446 }
447
448 attr.type = ptr->type;
449 attr.config = ptr->event_id;
450
451 // now parse the attributes
452 if (!colon)
453 return;
454 while (*++colon) {
455 switch (*colon) {
456 case 'u':
457 attr.exclude_user = true;
458 break;
459 case 'k':
460 attr.exclude_kernel = true;
461 break;
462 case 'h':
463 attr.exclude_hv = true;
464 break;
465 case 'G':
466 attr.exclude_guest = true;
467 break;
468 case 'H':
469 attr.exclude_host = true;
470 break;
471 default:
472 fprintf(stderr, "ERROR: Unknown attribute '%c'\n", *colon);
473 exit(1);
474 }
475 }
476 }
477
listCounters()478 void QBenchmarkPerfEventsMeasurer::listCounters()
479 {
480 if (!isAvailable()) {
481 printf("Performance counters are not available on this system\n");
482 return;
483 }
484
485 printf("The following performance counters are available:\n");
486 const Events *ptr = eventlist;
487 for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) {
488 printf(" %-30s [%s]\n", eventlist_strings + ptr->offset,
489 ptr->type == PERF_TYPE_HARDWARE ? "hardware" :
490 ptr->type == PERF_TYPE_SOFTWARE ? "software" :
491 ptr->type == PERF_TYPE_HW_CACHE ? "cache" : "other");
492 }
493
494 printf("\nAttributes can be specified by adding a colon and the following:\n"
495 " u - exclude measuring in the userspace\n"
496 " k - exclude measuring in kernel mode\n"
497 " h - exclude measuring in the hypervisor\n"
498 " G - exclude measuring when running virtualized (guest VM)\n"
499 " H - exclude measuring when running non-virtualized (host system)\n"
500 "Attributes can be combined, for example: -perfcounter branch-mispredicts:kh\n");
501 }
502
503 QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer() = default;
504
~QBenchmarkPerfEventsMeasurer()505 QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer()
506 {
507 qt_safe_close(fd);
508 }
509
init()510 void QBenchmarkPerfEventsMeasurer::init()
511 {
512 }
513
start()514 void QBenchmarkPerfEventsMeasurer::start()
515 {
516
517 initPerf();
518 if (fd == -1) {
519 // pid == 0 -> attach to the current process
520 // cpu == -1 -> monitor on all CPUs
521 // group_fd == -1 -> this is the group leader
522 // flags == 0 -> reserved, must be zero
523 fd = perf_event_open(&attr, 0, -1, -1, 0);
524 if (fd == -1) {
525 perror("QBenchmarkPerfEventsMeasurer::start: perf_event_open");
526 exit(1);
527 } else {
528 ::fcntl(fd, F_SETFD, FD_CLOEXEC);
529 }
530 }
531
532 // enable the counter
533 ::ioctl(fd, PERF_EVENT_IOC_RESET);
534 ::ioctl(fd, PERF_EVENT_IOC_ENABLE);
535 }
536
checkpoint()537 qint64 QBenchmarkPerfEventsMeasurer::checkpoint()
538 {
539 ::ioctl(fd, PERF_EVENT_IOC_DISABLE);
540 qint64 value = readValue();
541 ::ioctl(fd, PERF_EVENT_IOC_ENABLE);
542 return value;
543 }
544
stop()545 qint64 QBenchmarkPerfEventsMeasurer::stop()
546 {
547 // disable the counter
548 ::ioctl(fd, PERF_EVENT_IOC_DISABLE);
549 return readValue();
550 }
551
isMeasurementAccepted(qint64)552 bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(qint64)
553 {
554 return true;
555 }
556
adjustIterationCount(int)557 int QBenchmarkPerfEventsMeasurer::adjustIterationCount(int)
558 {
559 return 1;
560 }
561
adjustMedianCount(int)562 int QBenchmarkPerfEventsMeasurer::adjustMedianCount(int)
563 {
564 return 1;
565 }
566
metricType()567 QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricType()
568 {
569 return metricForEvent(attr.type, attr.config);
570 }
571
rawReadValue(int fd)572 static quint64 rawReadValue(int fd)
573 {
574 /* from the kernel docs:
575 * struct read_format {
576 * { u64 value;
577 * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
578 * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
579 * { u64 id; } && PERF_FORMAT_ID
580 * } && !PERF_FORMAT_GROUP
581 */
582
583 struct read_format {
584 quint64 value;
585 quint64 time_enabled;
586 quint64 time_running;
587 } results;
588
589 size_t nread = 0;
590 while (nread < sizeof results) {
591 char *ptr = reinterpret_cast<char *>(&results);
592 qint64 r = qt_safe_read(fd, ptr + nread, sizeof results - nread);
593 if (r == -1) {
594 perror("QBenchmarkPerfEventsMeasurer::readValue: reading the results");
595 exit(1);
596 }
597 nread += quint64(r);
598 }
599
600 if (results.time_running == results.time_enabled)
601 return results.value;
602
603 // scale the results, though this shouldn't happen!
604 return results.value * (double(results.time_running) / double(results.time_enabled));
605 }
606
readValue()607 qint64 QBenchmarkPerfEventsMeasurer::readValue()
608 {
609 quint64 raw = rawReadValue(fd);
610 if (metricType() == QTest::WalltimeMilliseconds) {
611 // perf returns nanoseconds
612 return raw / 1000000;
613 }
614 return raw;
615 }
616
617 QT_END_NAMESPACE
618
619 #endif
620