1 #ifndef __EVENT_COUNTER_H 2 #define __EVENT_COUNTER_H 3 4 #include <cassert> 5 #include <cctype> 6 #ifndef _MSC_VER 7 #include <dirent.h> 8 #endif 9 #include <unistd.h> 10 #include <cinttypes> 11 12 #include <cstdio> 13 #include <cstdlib> 14 #include <cstring> 15 16 #include <algorithm> 17 #include <chrono> 18 #include <cstring> 19 #include <fstream> 20 #include <iomanip> 21 #include <iostream> 22 #include <map> 23 #include <set> 24 #include <sstream> 25 #include <string> 26 #include <vector> 27 28 #include "linux-perf-events.h" 29 #ifdef __linux__ 30 #include <libgen.h> 31 #endif 32 33 #include "simdjson.h" 34 35 using std::string; 36 using std::vector; 37 using std::chrono::steady_clock; 38 using std::chrono::time_point; 39 using std::chrono::duration; 40 41 struct event_count { 42 duration<double> elapsed; 43 vector<unsigned long long> event_counts; event_countevent_count44 event_count() : elapsed(0), event_counts{0,0,0,0,0} {} event_countevent_count45 event_count(const duration<double> _elapsed, const vector<unsigned long long> _event_counts) : elapsed(_elapsed), event_counts(_event_counts) {} event_countevent_count46 event_count(const event_count& other): elapsed(other.elapsed), event_counts(other.event_counts) { } 47 48 // The types of counters (so we can read the getter more easily) 49 enum event_counter_types { 50 CPU_CYCLES, 51 INSTRUCTIONS, 52 BRANCH_MISSES, 53 CACHE_REFERENCES, 54 CACHE_MISSES 55 }; 56 elapsed_secevent_count57 double elapsed_sec() const { return duration<double>(elapsed).count(); } elapsed_nsevent_count58 double elapsed_ns() const { return duration<double, std::nano>(elapsed).count(); } cyclesevent_count59 double cycles() const { return static_cast<double>(event_counts[CPU_CYCLES]); } instructionsevent_count60 double instructions() const { return static_cast<double>(event_counts[INSTRUCTIONS]); } branch_missesevent_count61 double branch_misses() const { return static_cast<double>(event_counts[BRANCH_MISSES]); } cache_referencesevent_count62 double cache_references() const { return static_cast<double>(event_counts[CACHE_REFERENCES]); } cache_missesevent_count63 double cache_misses() const { return static_cast<double>(event_counts[CACHE_MISSES]); } 64 65 event_count& operator=(const event_count& other) { 66 this->elapsed = other.elapsed; 67 this->event_counts = other.event_counts; 68 return *this; 69 } 70 event_count operator+(const event_count& other) const { 71 return event_count(elapsed+other.elapsed, { 72 event_counts[0]+other.event_counts[0], 73 event_counts[1]+other.event_counts[1], 74 event_counts[2]+other.event_counts[2], 75 event_counts[3]+other.event_counts[3], 76 event_counts[4]+other.event_counts[4], 77 }); 78 } 79 80 void operator+=(const event_count& other) { 81 *this = *this + other; 82 } 83 }; 84 85 struct event_aggregate { 86 int iterations = 0; 87 event_count total{}; 88 event_count best{}; 89 event_count worst{}; 90 event_aggregateevent_aggregate91 event_aggregate() {} 92 93 void operator<<(const event_count& other) { 94 if (iterations == 0 || other.elapsed < best.elapsed) { 95 best = other; 96 } 97 if (iterations == 0 || other.elapsed > worst.elapsed) { 98 worst = other; 99 } 100 iterations++; 101 total += other; 102 } 103 elapsed_secevent_aggregate104 double elapsed_sec() const { return total.elapsed_sec() / iterations; } elapsed_nsevent_aggregate105 double elapsed_ns() const { return total.elapsed_ns() / iterations; } cyclesevent_aggregate106 double cycles() const { return total.cycles() / iterations; } instructionsevent_aggregate107 double instructions() const { return total.instructions() / iterations; } branch_missesevent_aggregate108 double branch_misses() const { return total.branch_misses() / iterations; } cache_referencesevent_aggregate109 double cache_references() const { return total.cache_references() / iterations; } cache_missesevent_aggregate110 double cache_misses() const { return total.cache_misses() / iterations; } 111 }; 112 113 struct event_collector { 114 event_count count{}; 115 time_point<steady_clock> start_clock{}; 116 117 #if defined(__linux__) 118 LinuxEvents<PERF_TYPE_HARDWARE> linux_events; linux_eventsevent_collector119 event_collector(bool quiet = false) : linux_events(vector<int>{ 120 PERF_COUNT_HW_CPU_CYCLES, 121 PERF_COUNT_HW_INSTRUCTIONS, 122 PERF_COUNT_HW_BRANCH_MISSES, 123 PERF_COUNT_HW_CACHE_REFERENCES, 124 PERF_COUNT_HW_CACHE_MISSES 125 }, quiet) {} has_eventsevent_collector126 bool has_events() { 127 return linux_events.is_working(); 128 } 129 #else 130 event_collector(simdjson_unused bool _quiet = false) {} has_eventsevent_collector131 bool has_events() { 132 return false; 133 } 134 #endif 135 startevent_collector136 simdjson_really_inline void start() { 137 #if defined(__linux) 138 linux_events.start(); 139 #endif 140 start_clock = steady_clock::now(); 141 } endevent_collector142 simdjson_really_inline event_count& end() { 143 time_point<steady_clock> end_clock = steady_clock::now(); 144 #if defined(__linux) 145 linux_events.end(count.event_counts); 146 #endif 147 count.elapsed = end_clock - start_clock; 148 return count; 149 } 150 }; 151 152 #endif 153