1 #ifndef __EVENT_COUNTER_H
2 #define __EVENT_COUNTER_H
3 
4 #include <cassert>
5 #include <cctype>
6 #ifndef _MSC_VER
7 #include <dirent.h>
8 #endif
9 #include <unistd.h>
10 #include <cinttypes>
11 
12 #include <cstdio>
13 #include <cstdlib>
14 #include <cstring>
15 
16 #include <algorithm>
17 #include <chrono>
18 #include <cstring>
19 #include <fstream>
20 #include <iomanip>
21 #include <iostream>
22 #include <map>
23 #include <set>
24 #include <sstream>
25 #include <string>
26 #include <vector>
27 
28 #include "linux-perf-events.h"
29 #ifdef __linux__
30 #include <libgen.h>
31 #endif
32 
33 #include "simdjson.h"
34 
35 using std::string;
36 using std::vector;
37 using std::chrono::steady_clock;
38 using std::chrono::time_point;
39 using std::chrono::duration;
40 
41 struct event_count {
42   duration<double> elapsed;
43   vector<unsigned long long> event_counts;
event_countevent_count44   event_count() : elapsed(0), event_counts{0,0,0,0,0} {}
event_countevent_count45   event_count(const duration<double> _elapsed, const vector<unsigned long long> _event_counts) : elapsed(_elapsed), event_counts(_event_counts) {}
event_countevent_count46   event_count(const event_count& other): elapsed(other.elapsed), event_counts(other.event_counts) { }
47 
48   // The types of counters (so we can read the getter more easily)
49   enum event_counter_types {
50     CPU_CYCLES,
51     INSTRUCTIONS,
52     BRANCH_MISSES,
53     CACHE_REFERENCES,
54     CACHE_MISSES
55   };
56 
elapsed_secevent_count57   double elapsed_sec() const { return duration<double>(elapsed).count(); }
elapsed_nsevent_count58   double elapsed_ns() const { return duration<double, std::nano>(elapsed).count(); }
cyclesevent_count59   double cycles() const { return static_cast<double>(event_counts[CPU_CYCLES]); }
instructionsevent_count60   double instructions() const { return static_cast<double>(event_counts[INSTRUCTIONS]); }
branch_missesevent_count61   double branch_misses() const { return static_cast<double>(event_counts[BRANCH_MISSES]); }
cache_referencesevent_count62   double cache_references() const { return static_cast<double>(event_counts[CACHE_REFERENCES]); }
cache_missesevent_count63   double cache_misses() const { return static_cast<double>(event_counts[CACHE_MISSES]); }
64 
65   event_count& operator=(const event_count& other) {
66     this->elapsed = other.elapsed;
67     this->event_counts = other.event_counts;
68     return *this;
69   }
70   event_count operator+(const event_count& other) const {
71     return event_count(elapsed+other.elapsed, {
72       event_counts[0]+other.event_counts[0],
73       event_counts[1]+other.event_counts[1],
74       event_counts[2]+other.event_counts[2],
75       event_counts[3]+other.event_counts[3],
76       event_counts[4]+other.event_counts[4],
77     });
78   }
79 
80   void operator+=(const event_count& other) {
81     *this = *this + other;
82   }
83 };
84 
85 struct event_aggregate {
86   int iterations = 0;
87   event_count total{};
88   event_count best{};
89   event_count worst{};
90 
event_aggregateevent_aggregate91   event_aggregate() {}
92 
93   void operator<<(const event_count& other) {
94     if (iterations == 0 || other.elapsed < best.elapsed) {
95       best = other;
96     }
97     if (iterations == 0 || other.elapsed > worst.elapsed) {
98       worst = other;
99     }
100     iterations++;
101     total += other;
102   }
103 
elapsed_secevent_aggregate104   double elapsed_sec() const { return total.elapsed_sec() / iterations; }
elapsed_nsevent_aggregate105   double elapsed_ns() const { return total.elapsed_ns() / iterations; }
cyclesevent_aggregate106   double cycles() const { return total.cycles() / iterations; }
instructionsevent_aggregate107   double instructions() const { return total.instructions() / iterations; }
branch_missesevent_aggregate108   double branch_misses() const { return total.branch_misses() / iterations; }
cache_referencesevent_aggregate109   double cache_references() const { return total.cache_references() / iterations; }
cache_missesevent_aggregate110   double cache_misses() const { return total.cache_misses() / iterations; }
111 };
112 
113 struct event_collector {
114   event_count count{};
115   time_point<steady_clock> start_clock{};
116 
117 #if defined(__linux__)
118   LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
linux_eventsevent_collector119   event_collector(bool quiet = false) : linux_events(vector<int>{
120     PERF_COUNT_HW_CPU_CYCLES,
121     PERF_COUNT_HW_INSTRUCTIONS,
122     PERF_COUNT_HW_BRANCH_MISSES,
123     PERF_COUNT_HW_CACHE_REFERENCES,
124     PERF_COUNT_HW_CACHE_MISSES
125   }, quiet) {}
has_eventsevent_collector126   bool has_events() {
127     return linux_events.is_working();
128   }
129 #else
130   event_collector(simdjson_unused bool _quiet = false) {}
has_eventsevent_collector131   bool has_events() {
132     return false;
133   }
134 #endif
135 
startevent_collector136   simdjson_really_inline void start() {
137 #if defined(__linux)
138     linux_events.start();
139 #endif
140     start_clock = steady_clock::now();
141   }
endevent_collector142   simdjson_really_inline event_count& end() {
143     time_point<steady_clock> end_clock = steady_clock::now();
144 #if defined(__linux)
145     linux_events.end(count.event_counts);
146 #endif
147     count.elapsed = end_clock - start_clock;
148     return count;
149   }
150 };
151 
152 #endif
153