1 #include "event_counter.h"
2
3 #include <cassert>
4 #include <cctype>
5 #ifndef _MSC_VER
6 #include <dirent.h>
7 #endif
8 #include <unistd.h>
9 #include <cinttypes>
10 #include <initializer_list>
11
12 #include <cstdio>
13 #include <cstdlib>
14 #include <cstring>
15
16 #include <algorithm>
17 #include <chrono>
18 #include <cstring>
19 #include <fstream>
20 #include <iomanip>
21 #include <iostream>
22 #include <map>
23 #include <set>
24 #include <sstream>
25 #include <string>
26 #include <vector>
27
28 #include "linux-perf-events.h"
29 #ifdef __linux__
30 #include <libgen.h>
31 #endif
32
33 #include "simdjson.h"
34
35 #include <functional>
36
37 #include "benchmarker.h"
38
39 using namespace simdjson;
40 using std::cerr;
41 using std::cout;
42 using std::endl;
43 using std::string;
44 using std::to_string;
45 using std::vector;
46 using std::ostream;
47 using std::ofstream;
48 using std::exception;
49
50 // Stash the exe_name in main() for functions to use
51 char* exe_name;
52
print_usage(ostream & out)53 void print_usage(ostream& out) {
54 out << "Usage: " << exe_name << " [-v] [-n #] [-s STAGE] [-a ARCH]" << endl;
55 out << endl;
56 out << "Runs the parser against jsonexamples/generated json files in a loop, measuring speed and other statistics." << endl;
57 out << endl;
58 out << "Options:" << endl;
59 out << endl;
60 out << "-n # - Number of iterations per file. Default: 400" << endl;
61 out << "-i # - Number of times to iterate a single file before moving to the next. Default: 20" << endl;
62 out << "-v - Verbose output." << endl;
63 out << "-s STAGE - Stop after the given stage." << endl;
64 out << " -s stage1 - Stop after find_structural_bits." << endl;
65 out << " -s all - Run all stages." << endl;
66 out << "-a ARCH - Use the parser with the designated architecture (HASWELL, WESTMERE," << endl;
67 out << " PPC64 or ARM64). By default, detects best supported architecture." << endl;
68 }
69
exit_usage(string message)70 void exit_usage(string message) {
71 cerr << message << endl;
72 cerr << endl;
73 print_usage(cerr);
74 exit(EXIT_FAILURE);
75 }
76
77 struct option_struct {
78 bool stage1_only = false;
79
80 int32_t iterations = 400;
81 int32_t iteration_step = 50;
82
83 bool verbose = false;
84
option_structoption_struct85 option_struct(int argc, char **argv) {
86 int c;
87
88 while ((c = getopt(argc, argv, "vtn:i:a:s:")) != -1) {
89 switch (c) {
90 case 'n':
91 iterations = atoi(optarg);
92 break;
93 case 'i':
94 iteration_step = atoi(optarg);
95 break;
96 case 'v':
97 verbose = true;
98 break;
99 case 'a': {
100 auto impl = simdjson::available_implementations[optarg];
101 if(impl && impl->supported_by_runtime_system()) {
102 simdjson::active_implementation = impl;
103 } else {
104 std::cerr << "implementation " << optarg << " not found or not supported " << std::endl;
105 }
106 }
107 break;
108 case 's':
109 if (!strcmp(optarg, "stage1")) {
110 stage1_only = true;
111 } else if (!strcmp(optarg, "all")) {
112 stage1_only = false;
113 } else {
114 exit_usage(string("Unsupported option value -s ") + optarg + ": expected -s stage1 or all");
115 }
116 break;
117 default:
118 exit_error(string("Unexpected argument ") + std::string(1,static_cast<char>(c)));
119 }
120 }
121 }
122
123 template<typename F>
each_stageoption_struct124 void each_stage(const F& f) const {
125 f(BenchmarkStage::STAGE1);
126 if (!this->stage1_only) {
127 f(BenchmarkStage::STAGE2);
128 f(BenchmarkStage::ALL);
129 }
130 }
131
132 };
133
134 struct feature_benchmarker {
135 benchmarker utf8;
136 benchmarker utf8_miss;
137 benchmarker escape;
138 benchmarker escape_miss;
139 benchmarker empty;
140 benchmarker empty_miss;
141 benchmarker struct7;
142 benchmarker struct7_miss;
143 benchmarker struct7_full;
144 benchmarker struct15;
145 benchmarker struct15_miss;
146 benchmarker struct23;
147 benchmarker struct23_miss;
148
feature_benchmarkerfeature_benchmarker149 feature_benchmarker(event_collector& collector) :
150 utf8 (SIMDJSON_BENCHMARK_DATA_DIR "generated/utf-8.json", collector),
151 utf8_miss (SIMDJSON_BENCHMARK_DATA_DIR "generated/utf-8-miss.json", collector),
152 escape (SIMDJSON_BENCHMARK_DATA_DIR "generated/escape.json", collector),
153 escape_miss (SIMDJSON_BENCHMARK_DATA_DIR "generated/escape-miss.json", collector),
154 empty (SIMDJSON_BENCHMARK_DATA_DIR "generated/0-structurals.json", collector),
155 empty_miss (SIMDJSON_BENCHMARK_DATA_DIR "generated/0-structurals-miss.json", collector),
156 struct7 (SIMDJSON_BENCHMARK_DATA_DIR "generated/7-structurals.json", collector),
157 struct7_miss (SIMDJSON_BENCHMARK_DATA_DIR "generated/7-structurals-miss.json", collector),
158 struct7_full (SIMDJSON_BENCHMARK_DATA_DIR "generated/7-structurals-full.json", collector),
159 struct15 (SIMDJSON_BENCHMARK_DATA_DIR "generated/15-structurals.json", collector),
160 struct15_miss(SIMDJSON_BENCHMARK_DATA_DIR "generated/15-structurals-miss.json", collector),
161 struct23 (SIMDJSON_BENCHMARK_DATA_DIR "generated/23-structurals.json", collector),
162 struct23_miss(SIMDJSON_BENCHMARK_DATA_DIR "generated/23-structurals-miss.json", collector)
163 {
164
165 }
166
run_iterationsfeature_benchmarker167 simdjson_really_inline void run_iterations(size_t iterations, bool stage1_only=false) {
168 struct7.run_iterations(iterations, stage1_only);
169 struct7_miss.run_iterations(iterations, stage1_only);
170 struct7_full.run_iterations(iterations, stage1_only);
171 utf8.run_iterations(iterations, stage1_only);
172 utf8_miss.run_iterations(iterations, stage1_only);
173 escape.run_iterations(iterations, stage1_only);
174 escape_miss.run_iterations(iterations, stage1_only);
175 empty.run_iterations(iterations, stage1_only);
176 empty_miss.run_iterations(iterations, stage1_only);
177 struct15.run_iterations(iterations, stage1_only);
178 struct15_miss.run_iterations(iterations, stage1_only);
179 struct23.run_iterations(iterations, stage1_only);
180 struct23_miss.run_iterations(iterations, stage1_only);
181 }
182
cost_per_blockfeature_benchmarker183 double cost_per_block(BenchmarkStage stage, const benchmarker& feature, size_t feature_blocks, const benchmarker& base) const {
184 return (feature[stage].best.elapsed_ns() - base[stage].best.elapsed_ns()) / double(feature_blocks);
185 }
186
187 // Whether we're recording cache miss and branch miss events
has_eventsfeature_benchmarker188 bool has_events() const {
189 return empty.collector.has_events();
190 }
191
192 // Base cost of any block (including empty ones)
base_costfeature_benchmarker193 double base_cost(BenchmarkStage stage) const {
194 return (empty[stage].best.elapsed_ns() / double(empty.stats->blocks));
195 }
196
197 // Extra cost of a 1-7 structural block over an empty block
struct1_7_costfeature_benchmarker198 double struct1_7_cost(BenchmarkStage stage) const {
199 return cost_per_block(stage, struct7, struct7.stats->blocks_with_1_structural, empty);
200 }
201 // Extra cost of an 1-7-structural miss
struct1_7_miss_costfeature_benchmarker202 double struct1_7_miss_cost(BenchmarkStage stage) const {
203 return cost_per_block(stage, struct7_miss, struct7_miss.stats->blocks_with_1_structural, struct7);
204 }
205 // Rate of 1-7-structural misses per 8-structural flip
struct1_7_miss_ratefeature_benchmarker206 double struct1_7_miss_rate(BenchmarkStage stage) const {
207 if (!has_events()) { return 1; }
208 return struct7_miss[stage].best.branch_misses() - struct7[stage].best.branch_misses() / double(struct7_miss.stats->blocks_with_1_structural_flipped);
209 }
210
211 // Extra cost of an 8-15 structural block over a 1-7 structural block
struct8_15_costfeature_benchmarker212 double struct8_15_cost(BenchmarkStage stage) const {
213 return cost_per_block(stage, struct15, struct15.stats->blocks_with_8_structurals, struct7);
214 }
215 // Extra cost of an 8-15-structural miss over a 1-7 miss
struct8_15_miss_costfeature_benchmarker216 double struct8_15_miss_cost(BenchmarkStage stage) const {
217 return cost_per_block(stage, struct15_miss, struct15_miss.stats->blocks_with_8_structurals_flipped, struct15);
218 }
219 // Rate of 8-15-structural misses per 8-structural flip
struct8_15_miss_ratefeature_benchmarker220 double struct8_15_miss_rate(BenchmarkStage stage) const {
221 if (!has_events()) { return 1; }
222 return double(struct15_miss[stage].best.branch_misses() - struct15[stage].best.branch_misses()) / double(struct15_miss.stats->blocks_with_8_structurals_flipped);
223 }
224
225 // Extra cost of a 16+-structural block over an 8-15 structural block (actual varies based on # of structurals!)
struct16_costfeature_benchmarker226 double struct16_cost(BenchmarkStage stage) const {
227 return cost_per_block(stage, struct23, struct23.stats->blocks_with_16_structurals, struct15);
228 }
229 // Extra cost of a 16-structural miss over an 8-15 miss
struct16_miss_costfeature_benchmarker230 double struct16_miss_cost(BenchmarkStage stage) const {
231 return cost_per_block(stage, struct23_miss, struct23_miss.stats->blocks_with_16_structurals_flipped, struct23);
232 }
233 // Rate of 16-structural misses per 16-structural flip
struct16_miss_ratefeature_benchmarker234 double struct16_miss_rate(BenchmarkStage stage) const {
235 if (!has_events()) { return 1; }
236 return double(struct23_miss[stage].best.branch_misses() - struct23[stage].best.branch_misses()) / double(struct23_miss.stats->blocks_with_16_structurals_flipped);
237 }
238
239 // Extra cost of having UTF-8 in a block
utf8_costfeature_benchmarker240 double utf8_cost(BenchmarkStage stage) const {
241 return cost_per_block(stage, utf8, utf8.stats->blocks_with_utf8, struct7_full);
242 }
243 // Extra cost of a UTF-8 miss
utf8_miss_costfeature_benchmarker244 double utf8_miss_cost(BenchmarkStage stage) const {
245 return cost_per_block(stage, utf8_miss, utf8_miss.stats->blocks_with_utf8_flipped, utf8);
246 }
247 // Rate of UTF-8 misses per UTF-8 flip
utf8_miss_ratefeature_benchmarker248 double utf8_miss_rate(BenchmarkStage stage) const {
249 if (!has_events()) { return 1; }
250 return double(utf8_miss[stage].best.branch_misses() - utf8[stage].best.branch_misses()) / double(utf8_miss.stats->blocks_with_utf8_flipped);
251 }
252
253 // Extra cost of having escapes in a block
escape_costfeature_benchmarker254 double escape_cost(BenchmarkStage stage) const {
255 return cost_per_block(stage, escape, escape.stats->blocks_with_escapes, struct7_full);
256 }
257 // Extra cost of an escape miss
escape_miss_costfeature_benchmarker258 double escape_miss_cost(BenchmarkStage stage) const {
259 return cost_per_block(stage, escape_miss, escape_miss.stats->blocks_with_escapes_flipped, escape);
260 }
261 // Rate of escape misses per escape flip
escape_miss_ratefeature_benchmarker262 double escape_miss_rate(BenchmarkStage stage) const {
263 if (!has_events()) { return 1; }
264 return double(escape_miss[stage].best.branch_misses() - escape[stage].best.branch_misses()) / double(escape_miss.stats->blocks_with_escapes_flipped);
265 }
266
calc_expected_feature_costfeature_benchmarker267 double calc_expected_feature_cost(BenchmarkStage stage, const benchmarker& file) const {
268 // Expected base ns/block (empty)
269 json_stats& stats = *file.stats;
270 double expected = base_cost(stage) * double(stats.blocks);
271 expected += struct1_7_cost(stage) * double(stats.blocks_with_1_structural);
272 expected += utf8_cost(stage) * double(stats.blocks_with_utf8);
273 expected += escape_cost(stage) * double(stats.blocks_with_escapes);
274 expected += struct8_15_cost(stage) * double(stats.blocks_with_8_structurals);
275 expected += struct16_cost(stage) * double(stats.blocks_with_16_structurals);
276 return expected / double(stats.blocks);
277 }
278
calc_expected_miss_costfeature_benchmarker279 double calc_expected_miss_cost(BenchmarkStage stage, const benchmarker& file) const {
280 // Expected base ns/block (empty)
281 json_stats& stats = *file.stats;
282 double expected = struct1_7_miss_cost(stage) * double(stats.blocks_with_1_structural_flipped) * struct1_7_miss_rate(stage);
283 expected += utf8_miss_cost(stage) * double(stats.blocks_with_utf8_flipped) * utf8_miss_rate(stage);
284 expected += escape_miss_cost(stage) * double(stats.blocks_with_escapes_flipped) * escape_miss_rate(stage);
285 expected += struct8_15_miss_cost(stage) * double(stats.blocks_with_8_structurals_flipped) * struct8_15_miss_rate(stage);
286 expected += struct16_miss_cost(stage) * double(stats.blocks_with_16_structurals_flipped) * struct16_miss_rate(stage);
287 return expected / double(stats.blocks);
288 }
289
calc_expected_missesfeature_benchmarker290 double calc_expected_misses(BenchmarkStage stage, const benchmarker& file) const {
291 json_stats& stats = *file.stats;
292 double expected = double(stats.blocks_with_1_structural_flipped) * struct1_7_miss_rate(stage);
293 expected += double(stats.blocks_with_utf8_flipped) * utf8_miss_rate(stage);
294 expected += double(stats.blocks_with_escapes_flipped) * escape_miss_rate(stage);
295 expected += double(stats.blocks_with_8_structurals_flipped) * struct8_15_miss_rate(stage);
296 expected += double(stats.blocks_with_16_structurals_flipped) * struct16_miss_rate(stage);
297 return expected;
298 }
299
calc_expectedfeature_benchmarker300 double calc_expected(BenchmarkStage stage, const benchmarker& file) const {
301 return calc_expected_feature_cost(stage, file) + calc_expected_miss_cost(stage, file);
302 }
303
printfeature_benchmarker304 void print(const option_struct& options) const {
305 printf("\n");
306 printf("Features in ns/block (64 bytes):\n");
307 printf("\n");
308 printf("| %-8s ", "Stage");
309 printf("| %8s ", "Base");
310 printf("| %8s ", "7 Struct");
311 printf("| %8s ", "UTF-8");
312 printf("| %8s ", "Escape");
313 printf("| %8s ", "15 Str.");
314 printf("| %8s ", "16+ Str.");
315 printf("| %15s ", "7 Struct Miss");
316 printf("| %15s ", "UTF-8 Miss");
317 printf("| %15s ", "Escape Miss");
318 printf("| %15s ", "15 Str. Miss");
319 printf("| %15s ", "16+ Str. Miss");
320 printf("|\n");
321
322 printf("|%.10s", "---------------------------------------");
323 printf("|%.10s", "---------------------------------------");
324 printf("|%.10s", "---------------------------------------");
325 printf("|%.10s", "---------------------------------------");
326 printf("|%.10s", "---------------------------------------");
327 printf("|%.10s", "---------------------------------------");
328 printf("|%.10s", "---------------------------------------");
329 printf("|%.17s", "---------------------------------------");
330 printf("|%.17s", "---------------------------------------");
331 printf("|%.17s", "---------------------------------------");
332 printf("|%.17s", "---------------------------------------");
333 printf("|%.17s", "---------------------------------------");
334 printf("|\n");
335
336 options.each_stage([&](auto stage) {
337 printf("| %-8s ", benchmark_stage_name(stage));
338 printf("| %8.3g ", base_cost(stage));
339 printf("| %8.3g ", struct1_7_cost(stage));
340 printf("| %8.3g ", utf8_cost(stage));
341 printf("| %8.3g ", escape_cost(stage));
342 printf("| %8.3g ", struct8_15_cost(stage));
343 printf("| %8.3g ", struct16_cost(stage));
344 if (has_events()) {
345 printf("| %8.3g (%3d%%) ", struct1_7_miss_cost(stage), int(struct1_7_miss_rate(stage)*100));
346 printf("| %8.3g (%3d%%) ", utf8_miss_cost(stage), int(utf8_miss_rate(stage)*100));
347 printf("| %8.3g (%3d%%) ", escape_miss_cost(stage), int(escape_miss_rate(stage)*100));
348 printf("| %8.3g (%3d%%) ", struct8_15_miss_cost(stage), int(struct8_15_miss_rate(stage)*100));
349 printf("| %8.3g (%3d%%) ", struct16_miss_cost(stage), int(struct16_miss_rate(stage)*100));
350 } else {
351 printf("| %8.3g ", struct1_7_miss_cost(stage));
352 printf("| %8.3g ", utf8_miss_cost(stage));
353 printf("| %8.3g ", escape_miss_cost(stage));
354 printf("| %8.3g ", struct8_15_miss_cost(stage));
355 printf("| %8.3g ", struct16_miss_cost(stage));
356 }
357 printf("|\n");
358 });
359 }
360 };
361
print_file_effectiveness(BenchmarkStage stage,const char * filename,const benchmarker & results,const feature_benchmarker & features)362 void print_file_effectiveness(BenchmarkStage stage, const char* filename, const benchmarker& results, const feature_benchmarker& features) {
363 double actual = results[stage].best.elapsed_ns() / double(results.stats->blocks);
364 double calc = features.calc_expected(stage, results);
365 double actual_misses = results[stage].best.branch_misses();
366 double calc_misses = features.calc_expected_misses(stage, results);
367 double calc_miss_cost = features.calc_expected_miss_cost(stage, results);
368 printf(" | %-8s ", benchmark_stage_name(stage));
369 printf("| %-15s ", filename);
370 printf("| %8.3g ", features.calc_expected_feature_cost(stage, results));
371 printf("| %8.3g ", calc_miss_cost);
372 printf("| %8.3g ", calc);
373 printf("| %8.3g ", actual);
374 printf("| %+8.3g ", actual - calc);
375 printf("| %13llu ", (long long unsigned)(calc_misses));
376 if (features.has_events()) {
377 printf("| %13llu ", (long long unsigned)(actual_misses));
378 printf("| %+13lld ", (long long int)(actual_misses - calc_misses));
379 double miss_adjustment = calc_miss_cost * (double(int64_t(actual_misses - calc_misses)) / calc_misses);
380 printf("| %8.3g ", calc_miss_cost + miss_adjustment);
381 printf("| %+8.3g ", actual - (calc + miss_adjustment));
382 }
383 printf("|\n");
384 }
385
main(int argc,char * argv[])386 int main(int argc, char *argv[]) {
387 // Read options
388 exe_name = argv[0];
389 option_struct options(argc, argv);
390 if (options.verbose) {
391 verbose_stream = &cout;
392 }
393
394 // Initialize the event collector. We put this early so if it prints an error message, it's the
395 // first thing printed.
396 event_collector collector;
397
398 // Set up benchmarkers by reading all files
399 feature_benchmarker features(collector);
400 benchmarker gsoc_2018(SIMDJSON_BENCHMARK_DATA_DIR "gsoc-2018.json", collector);
401 benchmarker twitter(SIMDJSON_BENCHMARK_DATA_DIR "twitter.json", collector);
402 benchmarker random(SIMDJSON_BENCHMARK_DATA_DIR "random.json", collector);
403
404 // Run the benchmarks
405 progress_bar progress(options.iterations, 100);
406 // Put the if (options.stage1_only) *outside* the loop so that run_iterations will be optimized
407 if (options.stage1_only) {
408 for (int iteration = 0; iteration < options.iterations; iteration += options.iteration_step) {
409 if (!options.verbose) { progress.print(iteration); }
410 features.run_iterations(options.iteration_step, true);
411 gsoc_2018.run_iterations(options.iteration_step, true);
412 twitter.run_iterations(options.iteration_step, true);
413 random.run_iterations(options.iteration_step, true);
414 }
415 } else {
416 for (int iteration = 0; iteration < options.iterations; iteration += options.iteration_step) {
417 if (!options.verbose) { progress.print(iteration); }
418 features.run_iterations(options.iteration_step, false);
419 gsoc_2018.run_iterations(options.iteration_step, false);
420 twitter.run_iterations(options.iteration_step, false);
421 random.run_iterations(options.iteration_step, false);
422 }
423 }
424 if (!options.verbose) { progress.erase(); }
425
426 features.print(options);
427
428 // Gauge effectiveness
429 if (options.verbose) {
430 printf("\n");
431 printf(" Effectiveness Check: Estimated vs. Actual ns/block for real files:\n");
432 printf("\n");
433 printf(" | %8s ", "Stage");
434 printf("| %-15s ", "File");
435 printf("| %11s ", "Est. (Base)");
436 printf("| %11s ", "Est. (Miss)");
437 printf("| %8s ", "Est.");
438 printf("| %8s ", "Actual");
439 printf("| %8s ", "Diff");
440 printf("| %13s ", "Est. Misses");
441 if (features.has_events()) {
442 printf("| %13s ", "Actual Misses");
443 printf("| %13s ", "Diff (Misses)");
444 printf("| %13s ", "Adjusted Miss");
445 printf("| %13s ", "Adjusted Diff");
446 }
447 printf("|\n");
448 printf(" |%.10s", "---------------------------------------");
449 printf("|%.17s", "---------------------------------------");
450 printf("|%.13s", "---------------------------------------");
451 printf("|%.13s", "---------------------------------------");
452 printf("|%.10s", "---------------------------------------");
453 printf("|%.10s", "---------------------------------------");
454 printf("|%.10s", "---------------------------------------");
455 printf("|%.15s", "---------------------------------------");
456 if (features.has_events()) {
457 printf("|%.15s", "---------------------------------------");
458 printf("|%.15s", "---------------------------------------");
459 printf("|%.15s", "---------------------------------------");
460 printf("|%.15s", "---------------------------------------");
461 }
462 printf("|\n");
463
464 options.each_stage([&](auto stage) {
465 print_file_effectiveness(stage, "gsoc-2018.json", gsoc_2018, features);
466 print_file_effectiveness(stage, "twitter.json", twitter, features);
467 print_file_effectiveness(stage, "random.json", random, features);
468 });
469 }
470
471 return EXIT_SUCCESS;
472 }
473