1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 Intel Corporation.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #include "qbenchmarkperfevents_p.h"
41 #include "qbenchmarkmetric.h"
42 #include "qbenchmark_p.h"
43 
44 #ifdef QTESTLIB_USE_PERF_EVENTS
45 
46 // include the qcore_unix_p.h without core-private
47 // we only use inline functions anyway
48 #include "../corelib/kernel/qcore_unix_p.h"
49 
50 #include <sys/types.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <string.h>
54 #include <stdio.h>
55 
56 #include <sys/syscall.h>
57 #include <sys/ioctl.h>
58 
59 #include "3rdparty/linux_perf_event_p.h"
60 
61 // for PERF_TYPE_HW_CACHE, the config is a bitmask
62 // lowest 8 bits: cache type
63 // bits 8 to 15: cache operation
64 // bits 16 to 23: cache result
65 #define CACHE_L1D_READ              (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
66 #define CACHE_L1D_WRITE             (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
67 #define CACHE_L1D_PREFETCH          (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
68 #define CACHE_L1I_READ              (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
69 #define CACHE_L1I_PREFETCH          (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
70 #define CACHE_LLC_READ              (PERF_COUNT_HW_CACHE_LL  | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
71 #define CACHE_LLC_WRITE             (PERF_COUNT_HW_CACHE_LL  | PERF_COUNT_HW_CACHE_OP_WRITE << 8| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
72 #define CACHE_LLC_PREFETCH          (PERF_COUNT_HW_CACHE_LL  | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
73 #define CACHE_L1D_READ_MISS         (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
74 #define CACHE_L1D_WRITE_MISS        (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
75 #define CACHE_L1D_PREFETCH_MISS     (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
76 #define CACHE_L1I_READ_MISS         (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
77 #define CACHE_L1I_PREFETCH_MISS     (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
78 #define CACHE_LLC_READ_MISS         (PERF_COUNT_HW_CACHE_LL  | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
79 #define CACHE_LLC_WRITE_MISS        (PERF_COUNT_HW_CACHE_LL  | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
80 #define CACHE_LLC_PREFETCH_MISS     (PERF_COUNT_HW_CACHE_LL  | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
81 #define CACHE_BRANCH_READ           (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
82 #define CACHE_BRANCH_READ_MISS      (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
83 
84 QT_BEGIN_NAMESPACE
85 
86 static perf_event_attr attr;
87 
initPerf()88 static void initPerf()
89 {
90     static bool done;
91     if (!done) {
92         memset(&attr, 0, sizeof attr);
93         attr.size = sizeof attr;
94         attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
95         attr.disabled = true; // we'll enable later
96         attr.inherit = true; // let children processes inherit the monitoring
97         attr.pinned = true; // keep it running in the hardware
98         attr.inherit_stat = true; // aggregate all the info from child processes
99         attr.task = true; // trace fork/exits
100 
101         // set a default performance counter: CPU cycles
102         attr.type = PERF_TYPE_HARDWARE;
103         attr.config = PERF_COUNT_HW_CPU_CYCLES; // default
104 
105         done = true;
106     }
107 }
108 
109 // This class does not exist in the API so it's qdoc comment marker was removed.
110 
111 /*
112     \class QBenchmarkPerfEvents
113     \brief The Linux perf events benchmark backend
114 
115     This benchmark backend uses the Linux Performance Counters interface,
116     introduced with the Linux kernel v2.6.31. The interface is done by one
117     system call (perf_event_open) which takes an attribute structure and
118     returns a file descriptor.
119 
120     More information:
121      \li design docs: tools/perf/design.txt <http://lxr.linux.no/linux/tools/perf/design.txt>
122      \li sample tool: tools/perf/builtin-stat.c <http://lxr.linux.no/linux/tools/perf/builtin-stat.c>
123     (note: as of v3.3.1, the documentation is out-of-date with the kernel
124     interface, so reading the source code of existing tools is necessary)
125 
126     This benchlib backend monitors the current process as well as child process
127     launched. We do not try to benchmark in kernel or hypervisor mode, as that
128     usually requires elevated privileges.
129  */
130 
perf_event_open(perf_event_attr * attr,pid_t pid,int cpu,int group_fd,unsigned long flags)131 static int perf_event_open(perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags)
132 {
133 #ifdef SYS_perf_event_open
134     return syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags);
135 #else
136     Q_UNUSED(attr);
137     Q_UNUSED(pid);
138     Q_UNUSED(cpu);
139     Q_UNUSED(group_fd);
140     Q_UNUSED(flags);
141     errno = ENOSYS;
142     return -1;
143 #endif
144 }
145 
isAvailable()146 bool QBenchmarkPerfEventsMeasurer::isAvailable()
147 {
148     // this generates an EFAULT because attr == NULL if perf_event_open is available
149     // if the kernel is too old, it generates ENOSYS
150     return perf_event_open(nullptr, 0, 0, 0, 0) == -1 && errno != ENOSYS;
151 }
152 
153 /* Event list structure
154    The following table provides the list of supported events
155 
156    Event type   Event counter           Unit            Name and aliases
157    HARDWARE     CPU_CYCLES              CPUCycles       cycles  cpu-cycles
158    HARDWARE     REF_CPU_CYCLES          RefCPUCycles    ref-cycles
159    HARDWARE     INSTRUCTIONS            Instructions    instructions
160    HARDWARE     CACHE_REFERENCES        CacheReferences cache-references
161    HARDWARE     CACHE_MISSES            CacheMisses     cache-misses
162    HARDWARE     BRANCH_INSTRUCTIONS     BranchInstructions branch-instructions branches
163    HARDWARE     BRANCH_MISSES           BranchMisses    branch-misses
164    HARDWARE     BUS_CYCLES              BusCycles       bus-cycles
165    HARDWARE     STALLED_CYCLES_FRONTEND StalledCycles   stalled-cycles-frontend idle-cycles-frontend
166    HARDWARE     STALLED_CYCLES_BACKEND  StalledCycles   stalled-cycles-backend idle-cycles-backend
167    SOFTWARE     CPU_CLOCK               WalltimeMilliseconds cpu-clock
168    SOFTWARE     TASK_CLOCK              WalltimeMilliseconds task-clock
169    SOFTWARE     PAGE_FAULTS             PageFaults      page-faults faults
170    SOFTWARE     PAGE_FAULTS_MAJ         MajorPageFaults major-faults
171    SOFTWARE     PAGE_FAULTS_MIN         MinorPageFaults minor-faults
172    SOFTWARE     CONTEXT_SWITCHES        ContextSwitches context-switches cs
173    SOFTWARE     CPU_MIGRATIONS          CPUMigrations   cpu-migrations migrations
174    SOFTWARE     ALIGNMENT_FAULTS        AlignmentFaults alignment-faults
175    SOFTWARE     EMULATION_FAULTS        EmulationFaults emulation-faults
176    HW_CACHE     L1D_READ                CacheReads      l1d-cache-reads l1d-cache-loads l1d-reads l1d-loads
177    HW_CACHE     L1D_WRITE               CacheWrites     l1d-cache-writes l1d-cache-stores l1d-writes l1d-stores
178    HW_CACHE     L1D_PREFETCH            CachePrefetches l1d-cache-prefetches l1d-prefetches
179    HW_CACHE     L1I_READ                CacheReads      l1i-cache-reads l1i-cache-loads l1i-reads l1i-loads
180    HW_CACHE     L1I_PREFETCH            CachePrefetches l1i-cache-prefetches l1i-prefetches
181    HW_CACHE     LLC_READ                CacheReads      llc-cache-reads llc-cache-loads llc-loads llc-reads
182    HW_CACHE     LLC_WRITE               CacheWrites     llc-cache-writes llc-cache-stores llc-writes llc-stores
183    HW_CACHE     LLC_PREFETCH            CachePrefetches llc-cache-prefetches llc-prefetches
184    HW_CACHE     L1D_READ_MISS           CacheReads      l1d-cache-read-misses l1d-cache-load-misses l1d-read-misses l1d-load-misses
185    HW_CACHE     L1D_WRITE_MISS          CacheWrites     l1d-cache-write-misses l1d-cache-store-misses l1d-write-misses l1d-store-misses
186    HW_CACHE     L1D_PREFETCH_MISS       CachePrefetches l1d-cache-prefetch-misses l1d-prefetch-misses
187    HW_CACHE     L1I_READ_MISS           CacheReads      l1i-cache-read-misses l1i-cache-load-misses l1i-read-misses l1i-load-misses
188    HW_CACHE     L1I_PREFETCH_MISS       CachePrefetches l1i-cache-prefetch-misses l1i-prefetch-misses
189    HW_CACHE     LLC_READ_MISS           CacheReads      llc-cache-read-misses llc-cache-load-misses llc-read-misses llc-load-misses
190    HW_CACHE     LLC_WRITE_MISS          CacheWrites     llc-cache-write-misses llc-cache-store-misses llc-write-misses llc-store-misses
191    HW_CACHE     LLC_PREFETCH_MISS       CachePrefetches llc-cache-prefetch-misses llc-prefetch-misses
192    HW_CACHE     BRANCH_READ             BranchInstructions branch-reads branch-loads branch-predicts
193    HW_CACHE     BRANCH_READ_MISS        BranchMisses    branch-mispredicts branch-read-misses branch-load-misses
194 
195    Use the following Perl script to re-generate the list
196 === cut perl ===
197 #!/usr/bin/env perl
198 # Load all entries into %map
199 while (<STDIN>) {
200     m/^\s*(.*)\s*$/;
201     @_ = split /\s+/, $1;
202     $type = shift @_;
203     $id = ($type eq "HARDWARE" ? "PERF_COUNT_HW_" :
204        $type eq "SOFTWARE" ? "PERF_COUNT_SW_" :
205        $type eq "HW_CACHE" ? "CACHE_" : "") . shift @_;
206     $unit = shift @_;
207 
208     for $string (@_) {
209     die "$string was already seen!" if defined($map{$string});
210     $map{$string} = [-1, $type, $id, $unit];
211     push @strings, $string;
212     }
213 }
214 
215 # sort the map and print the string list
216 @strings = sort @strings;
217 print "static const char eventlist_strings[] = \n";
218 $counter = 0;
219 for $entry (@strings) {
220     print "    \"$entry\\0\"\n";
221     $map{$entry}[0] = $counter;
222     $counter += 1 + length $entry;
223 }
224 
225 # print the table
226 print "    \"\\0\";\n\nstatic const Events eventlist[] = {\n";
227 for $entry (sort @strings) {
228     printf "    { %3d, PERF_TYPE_%s, %s, QTest::%s },\n",
229         $map{$entry}[0],
230     $map{$entry}[1],
231         $map{$entry}[2],
232         $map{$entry}[3];
233 }
234 print "    {   0, PERF_TYPE_MAX, 0, QTest::Events }\n};\n";
235 === cut perl ===
236 */
237 
238 struct Events {
239     unsigned offset;
240     quint32 type;
241     quint64 event_id;
242     QTest::QBenchmarkMetric metric;
243 };
244 
245 /* -- BEGIN GENERATED CODE -- */
246 static const char eventlist_strings[] =
247     "alignment-faults\0"
248     "branch-instructions\0"
249     "branch-load-misses\0"
250     "branch-loads\0"
251     "branch-mispredicts\0"
252     "branch-misses\0"
253     "branch-predicts\0"
254     "branch-read-misses\0"
255     "branch-reads\0"
256     "branches\0"
257     "bus-cycles\0"
258     "cache-misses\0"
259     "cache-references\0"
260     "context-switches\0"
261     "cpu-clock\0"
262     "cpu-cycles\0"
263     "cpu-migrations\0"
264     "cs\0"
265     "cycles\0"
266     "emulation-faults\0"
267     "faults\0"
268     "idle-cycles-backend\0"
269     "idle-cycles-frontend\0"
270     "instructions\0"
271     "l1d-cache-load-misses\0"
272     "l1d-cache-loads\0"
273     "l1d-cache-prefetch-misses\0"
274     "l1d-cache-prefetches\0"
275     "l1d-cache-read-misses\0"
276     "l1d-cache-reads\0"
277     "l1d-cache-store-misses\0"
278     "l1d-cache-stores\0"
279     "l1d-cache-write-misses\0"
280     "l1d-cache-writes\0"
281     "l1d-load-misses\0"
282     "l1d-loads\0"
283     "l1d-prefetch-misses\0"
284     "l1d-prefetches\0"
285     "l1d-read-misses\0"
286     "l1d-reads\0"
287     "l1d-store-misses\0"
288     "l1d-stores\0"
289     "l1d-write-misses\0"
290     "l1d-writes\0"
291     "l1i-cache-load-misses\0"
292     "l1i-cache-loads\0"
293     "l1i-cache-prefetch-misses\0"
294     "l1i-cache-prefetches\0"
295     "l1i-cache-read-misses\0"
296     "l1i-cache-reads\0"
297     "l1i-load-misses\0"
298     "l1i-loads\0"
299     "l1i-prefetch-misses\0"
300     "l1i-prefetches\0"
301     "l1i-read-misses\0"
302     "l1i-reads\0"
303     "llc-cache-load-misses\0"
304     "llc-cache-loads\0"
305     "llc-cache-prefetch-misses\0"
306     "llc-cache-prefetches\0"
307     "llc-cache-read-misses\0"
308     "llc-cache-reads\0"
309     "llc-cache-store-misses\0"
310     "llc-cache-stores\0"
311     "llc-cache-write-misses\0"
312     "llc-cache-writes\0"
313     "llc-load-misses\0"
314     "llc-loads\0"
315     "llc-prefetch-misses\0"
316     "llc-prefetches\0"
317     "llc-read-misses\0"
318     "llc-reads\0"
319     "llc-store-misses\0"
320     "llc-stores\0"
321     "llc-write-misses\0"
322     "llc-writes\0"
323     "major-faults\0"
324     "migrations\0"
325     "minor-faults\0"
326     "page-faults\0"
327     "ref-cycles\0"
328     "stalled-cycles-backend\0"
329     "stalled-cycles-frontend\0"
330     "task-clock\0"
331     "\0";
332 
333 static const Events eventlist[] = {
334     {   0, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, QTest::AlignmentFaults },
335     {  17, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions },
336     {  37, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses },
337     {  56, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions },
338     {  69, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses },
339     {  88, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, QTest::BranchMisses },
340     { 102, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions },
341     { 118, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses },
342     { 137, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions },
343     { 150, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions },
344     { 159, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, QTest::BusCycles },
345     { 170, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, QTest::CacheMisses },
346     { 183, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, QTest::CacheReferences },
347     { 200, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches },
348     { 217, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, QTest::WalltimeMilliseconds },
349     { 227, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles },
350     { 238, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations },
351     { 253, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches },
352     { 256, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles },
353     { 263, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, QTest::EmulationFaults },
354     { 280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults },
355     { 287, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles },
356     { 307, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles },
357     { 328, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, QTest::Instructions },
358     { 341, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
359     { 363, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
360     { 379, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches },
361     { 405, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches },
362     { 426, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
363     { 448, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
364     { 464, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
365     { 487, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
366     { 504, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
367     { 527, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
368     { 544, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
369     { 560, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
370     { 570, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches },
371     { 590, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches },
372     { 605, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
373     { 621, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
374     { 631, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
375     { 648, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
376     { 659, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
377     { 676, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
378     { 687, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
379     { 709, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
380     { 725, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches },
381     { 751, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches },
382     { 772, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
383     { 794, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
384     { 810, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
385     { 826, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
386     { 836, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches },
387     { 856, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches },
388     { 871, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
389     { 887, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
390     { 897, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
391     { 919, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
392     { 935, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches },
393     { 961, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches },
394     { 982, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
395     { 1004, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
396     { 1020, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
397     { 1043, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
398     { 1060, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
399     { 1083, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
400     { 1100, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
401     { 1116, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
402     { 1126, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches },
403     { 1146, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches },
404     { 1161, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
405     { 1177, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
406     { 1187, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
407     { 1204, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
408     { 1215, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
409     { 1232, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
410     { 1243, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, QTest::MajorPageFaults },
411     { 1256, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations },
412     { 1267, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, QTest::MinorPageFaults },
413     { 1280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults },
414     { 1292, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, QTest::RefCPUCycles },
415     { 1303, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles },
416     { 1326, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles },
417     { 1350, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, QTest::WalltimeMilliseconds },
418     {   0, PERF_TYPE_MAX, 0, QTest::Events }
419 };
420 /* -- END GENERATED CODE -- */
421 
metricForEvent(quint32 type,quint64 event_id)422 QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricForEvent(quint32 type, quint64 event_id)
423 {
424     const Events *ptr = eventlist;
425     for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) {
426         if (ptr->type == type && ptr->event_id == event_id)
427             return ptr->metric;
428     }
429     return QTest::Events;
430 }
431 
setCounter(const char * name)432 void QBenchmarkPerfEventsMeasurer::setCounter(const char *name)
433 {
434     initPerf();
435     const char *colon = strchr(name, ':');
436     int n = colon ? colon - name : strlen(name);
437     const Events *ptr = eventlist;
438     for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) {
439         int c = strncmp(name, eventlist_strings + ptr->offset, n);
440         if (c == 0)
441             break;
442         if (c < 0) {
443             fprintf(stderr, "ERROR: Performance counter type '%s' is unknown\n", name);
444             exit(1);
445         }
446     }
447 
448     attr.type = ptr->type;
449     attr.config = ptr->event_id;
450 
451     // now parse the attributes
452     if (!colon)
453         return;
454     while (*++colon) {
455         switch (*colon) {
456         case 'u':
457             attr.exclude_user = true;
458             break;
459         case 'k':
460             attr.exclude_kernel = true;
461             break;
462         case 'h':
463             attr.exclude_hv = true;
464             break;
465         case 'G':
466             attr.exclude_guest = true;
467             break;
468         case 'H':
469             attr.exclude_host = true;
470             break;
471         default:
472             fprintf(stderr, "ERROR: Unknown attribute '%c'\n", *colon);
473             exit(1);
474         }
475     }
476 }
477 
listCounters()478 void QBenchmarkPerfEventsMeasurer::listCounters()
479 {
480     if (!isAvailable()) {
481         printf("Performance counters are not available on this system\n");
482         return;
483     }
484 
485     printf("The following performance counters are available:\n");
486     const Events *ptr = eventlist;
487     for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) {
488         printf("  %-30s [%s]\n", eventlist_strings + ptr->offset,
489                ptr->type == PERF_TYPE_HARDWARE ? "hardware" :
490                ptr->type == PERF_TYPE_SOFTWARE ? "software" :
491                ptr->type == PERF_TYPE_HW_CACHE ? "cache" : "other");
492     }
493 
494     printf("\nAttributes can be specified by adding a colon and the following:\n"
495            "  u - exclude measuring in the userspace\n"
496            "  k - exclude measuring in kernel mode\n"
497            "  h - exclude measuring in the hypervisor\n"
498            "  G - exclude measuring when running virtualized (guest VM)\n"
499            "  H - exclude measuring when running non-virtualized (host system)\n"
500            "Attributes can be combined, for example: -perfcounter branch-mispredicts:kh\n");
501 }
502 
503 QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer() = default;
504 
~QBenchmarkPerfEventsMeasurer()505 QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer()
506 {
507     qt_safe_close(fd);
508 }
509 
init()510 void QBenchmarkPerfEventsMeasurer::init()
511 {
512 }
513 
start()514 void QBenchmarkPerfEventsMeasurer::start()
515 {
516 
517     initPerf();
518     if (fd == -1) {
519         // pid == 0 -> attach to the current process
520         // cpu == -1 -> monitor on all CPUs
521         // group_fd == -1 -> this is the group leader
522         // flags == 0 -> reserved, must be zero
523         fd = perf_event_open(&attr, 0, -1, -1, 0);
524         if (fd == -1) {
525             perror("QBenchmarkPerfEventsMeasurer::start: perf_event_open");
526             exit(1);
527         } else {
528             ::fcntl(fd, F_SETFD, FD_CLOEXEC);
529         }
530     }
531 
532     // enable the counter
533     ::ioctl(fd, PERF_EVENT_IOC_RESET);
534     ::ioctl(fd, PERF_EVENT_IOC_ENABLE);
535 }
536 
checkpoint()537 qint64 QBenchmarkPerfEventsMeasurer::checkpoint()
538 {
539     ::ioctl(fd, PERF_EVENT_IOC_DISABLE);
540     qint64 value = readValue();
541     ::ioctl(fd, PERF_EVENT_IOC_ENABLE);
542     return value;
543 }
544 
stop()545 qint64 QBenchmarkPerfEventsMeasurer::stop()
546 {
547     // disable the counter
548     ::ioctl(fd, PERF_EVENT_IOC_DISABLE);
549     return readValue();
550 }
551 
isMeasurementAccepted(qint64)552 bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(qint64)
553 {
554     return true;
555 }
556 
adjustIterationCount(int)557 int QBenchmarkPerfEventsMeasurer::adjustIterationCount(int)
558 {
559     return 1;
560 }
561 
adjustMedianCount(int)562 int QBenchmarkPerfEventsMeasurer::adjustMedianCount(int)
563 {
564     return 1;
565 }
566 
metricType()567 QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricType()
568 {
569     return metricForEvent(attr.type, attr.config);
570 }
571 
rawReadValue(int fd)572 static quint64 rawReadValue(int fd)
573 {
574     /* from the kernel docs:
575      * struct read_format {
576      *  { u64           value;
577      *    { u64         time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
578      *    { u64         time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
579      *    { u64         id;           } && PERF_FORMAT_ID
580      *  } && !PERF_FORMAT_GROUP
581      */
582 
583     struct read_format {
584         quint64 value;
585         quint64 time_enabled;
586         quint64 time_running;
587     } results;
588 
589     size_t nread = 0;
590     while (nread < sizeof results) {
591         char *ptr = reinterpret_cast<char *>(&results);
592         qint64 r = qt_safe_read(fd, ptr + nread, sizeof results - nread);
593         if (r == -1) {
594             perror("QBenchmarkPerfEventsMeasurer::readValue: reading the results");
595             exit(1);
596         }
597         nread += quint64(r);
598     }
599 
600     if (results.time_running == results.time_enabled)
601         return results.value;
602 
603     // scale the results, though this shouldn't happen!
604     return results.value * (double(results.time_running) / double(results.time_enabled));
605 }
606 
readValue()607 qint64 QBenchmarkPerfEventsMeasurer::readValue()
608 {
609     quint64 raw = rawReadValue(fd);
610     if (metricType() == QTest::WalltimeMilliseconds) {
611         // perf returns nanoseconds
612         return raw / 1000000;
613     }
614     return raw;
615 }
616 
617 QT_END_NAMESPACE
618 
619 #endif
620