1 #include "event_counter.h"
2 
3 #include <cassert>
4 #include <cctype>
5 #ifndef _MSC_VER
6 #include <dirent.h>
7 #endif
8 #include <unistd.h>
9 #include <cinttypes>
10 #include <initializer_list>
11 
12 #include <cstdio>
13 #include <cstdlib>
14 #include <cstring>
15 
16 #include <algorithm>
17 #include <chrono>
18 #include <cstring>
19 #include <fstream>
20 #include <iomanip>
21 #include <iostream>
22 #include <map>
23 #include <set>
24 #include <sstream>
25 #include <string>
26 #include <vector>
27 
28 #include "linux-perf-events.h"
29 #ifdef __linux__
30 #include <libgen.h>
31 #endif
32 
33 #include "simdjson.h"
34 
35 #include <functional>
36 
37 #include "benchmarker.h"
38 
39 using namespace simdjson;
40 using std::cerr;
41 using std::cout;
42 using std::endl;
43 using std::string;
44 using std::to_string;
45 using std::vector;
46 using std::ostream;
47 using std::ofstream;
48 using std::exception;
49 
50 // Stash the exe_name in main() for functions to use
51 char* exe_name;
52 
print_usage(ostream & out)53 void print_usage(ostream& out) {
54   out << "Usage: " << exe_name << " [-v] [-n #] [-s STAGE] [-a ARCH]" << endl;
55   out << endl;
56   out << "Runs the parser against jsonexamples/generated json files in a loop, measuring speed and other statistics." << endl;
57   out << endl;
58   out << "Options:" << endl;
59   out << endl;
60   out << "-n #       - Number of iterations per file. Default: 400" << endl;
61   out << "-i #       - Number of times to iterate a single file before moving to the next. Default: 20" << endl;
62   out << "-v         - Verbose output." << endl;
63   out << "-s STAGE   - Stop after the given stage." << endl;
64   out << "             -s stage1 - Stop after find_structural_bits." << endl;
65   out << "             -s all    - Run all stages." << endl;
66   out << "-a ARCH    - Use the parser with the designated architecture (HASWELL, WESTMERE," << endl;
67   out << "             PPC64 or ARM64). By default, detects best supported architecture." << endl;
68 }
69 
exit_usage(string message)70 void exit_usage(string message) {
71   cerr << message << endl;
72   cerr << endl;
73   print_usage(cerr);
74   exit(EXIT_FAILURE);
75 }
76 
77 struct option_struct {
78   bool stage1_only = false;
79 
80   int32_t iterations = 400;
81   int32_t iteration_step = 50;
82 
83   bool verbose = false;
84 
option_structoption_struct85   option_struct(int argc, char **argv) {
86     int c;
87 
88     while ((c = getopt(argc, argv, "vtn:i:a:s:")) != -1) {
89       switch (c) {
90       case 'n':
91         iterations = atoi(optarg);
92         break;
93       case 'i':
94         iteration_step = atoi(optarg);
95         break;
96       case 'v':
97         verbose = true;
98         break;
99       case 'a': {
100           auto impl = simdjson::available_implementations[optarg];
101           if(impl && impl->supported_by_runtime_system()) {
102             simdjson::active_implementation = impl;
103           } else {
104             std::cerr << "implementation " << optarg << " not found or not supported " << std::endl;
105           }
106         }
107         break;
108       case 's':
109         if (!strcmp(optarg, "stage1")) {
110           stage1_only = true;
111         } else if (!strcmp(optarg, "all")) {
112           stage1_only = false;
113         } else {
114           exit_usage(string("Unsupported option value -s ") + optarg + ": expected -s stage1 or all");
115         }
116         break;
117       default:
118         exit_error(string("Unexpected argument ") + std::string(1,static_cast<char>(c)));
119       }
120     }
121   }
122 
123   template<typename F>
each_stageoption_struct124   void each_stage(const F& f) const {
125     f(BenchmarkStage::STAGE1);
126     if (!this->stage1_only) {
127       f(BenchmarkStage::STAGE2);
128       f(BenchmarkStage::ALL);
129     }
130   }
131 
132 };
133 
134 struct feature_benchmarker {
135   benchmarker utf8;
136   benchmarker utf8_miss;
137   benchmarker escape;
138   benchmarker escape_miss;
139   benchmarker empty;
140   benchmarker empty_miss;
141   benchmarker struct7;
142   benchmarker struct7_miss;
143   benchmarker struct7_full;
144   benchmarker struct15;
145   benchmarker struct15_miss;
146   benchmarker struct23;
147   benchmarker struct23_miss;
148 
feature_benchmarkerfeature_benchmarker149   feature_benchmarker(event_collector& collector) :
150     utf8               (SIMDJSON_BENCHMARK_DATA_DIR "generated/utf-8.json", collector),
151     utf8_miss          (SIMDJSON_BENCHMARK_DATA_DIR "generated/utf-8-miss.json", collector),
152     escape               (SIMDJSON_BENCHMARK_DATA_DIR "generated/escape.json", collector),
153     escape_miss          (SIMDJSON_BENCHMARK_DATA_DIR "generated/escape-miss.json", collector),
154     empty              (SIMDJSON_BENCHMARK_DATA_DIR "generated/0-structurals.json", collector),
155     empty_miss         (SIMDJSON_BENCHMARK_DATA_DIR "generated/0-structurals-miss.json", collector),
156     struct7           (SIMDJSON_BENCHMARK_DATA_DIR "generated/7-structurals.json", collector),
157     struct7_miss      (SIMDJSON_BENCHMARK_DATA_DIR "generated/7-structurals-miss.json", collector),
158     struct7_full       (SIMDJSON_BENCHMARK_DATA_DIR "generated/7-structurals-full.json", collector),
159     struct15     (SIMDJSON_BENCHMARK_DATA_DIR "generated/15-structurals.json", collector),
160     struct15_miss(SIMDJSON_BENCHMARK_DATA_DIR "generated/15-structurals-miss.json", collector),
161     struct23     (SIMDJSON_BENCHMARK_DATA_DIR "generated/23-structurals.json", collector),
162     struct23_miss(SIMDJSON_BENCHMARK_DATA_DIR "generated/23-structurals-miss.json", collector)
163   {
164 
165   }
166 
run_iterationsfeature_benchmarker167   simdjson_really_inline void run_iterations(size_t iterations, bool stage1_only=false) {
168     struct7.run_iterations(iterations, stage1_only);
169     struct7_miss.run_iterations(iterations, stage1_only);
170     struct7_full.run_iterations(iterations, stage1_only);
171     utf8.run_iterations(iterations, stage1_only);
172     utf8_miss.run_iterations(iterations, stage1_only);
173     escape.run_iterations(iterations, stage1_only);
174     escape_miss.run_iterations(iterations, stage1_only);
175     empty.run_iterations(iterations, stage1_only);
176     empty_miss.run_iterations(iterations, stage1_only);
177     struct15.run_iterations(iterations, stage1_only);
178     struct15_miss.run_iterations(iterations, stage1_only);
179     struct23.run_iterations(iterations, stage1_only);
180     struct23_miss.run_iterations(iterations, stage1_only);
181   }
182 
cost_per_blockfeature_benchmarker183   double cost_per_block(BenchmarkStage stage, const benchmarker& feature, size_t feature_blocks, const benchmarker& base) const {
184     return (feature[stage].best.elapsed_ns() - base[stage].best.elapsed_ns()) / double(feature_blocks);
185   }
186 
187   // Whether we're recording cache miss and branch miss events
has_eventsfeature_benchmarker188   bool has_events() const {
189     return empty.collector.has_events();
190   }
191 
192   // Base cost of any block (including empty ones)
base_costfeature_benchmarker193   double base_cost(BenchmarkStage stage) const {
194     return (empty[stage].best.elapsed_ns() / double(empty.stats->blocks));
195   }
196 
197   // Extra cost of a 1-7 structural block over an empty block
struct1_7_costfeature_benchmarker198   double struct1_7_cost(BenchmarkStage stage) const {
199     return cost_per_block(stage, struct7, struct7.stats->blocks_with_1_structural, empty);
200   }
201   // Extra cost of an 1-7-structural miss
struct1_7_miss_costfeature_benchmarker202   double struct1_7_miss_cost(BenchmarkStage stage) const {
203     return cost_per_block(stage, struct7_miss, struct7_miss.stats->blocks_with_1_structural, struct7);
204   }
205   // Rate of 1-7-structural misses per 8-structural flip
struct1_7_miss_ratefeature_benchmarker206   double struct1_7_miss_rate(BenchmarkStage stage) const {
207     if (!has_events()) { return 1; }
208     return struct7_miss[stage].best.branch_misses() - struct7[stage].best.branch_misses() / double(struct7_miss.stats->blocks_with_1_structural_flipped);
209   }
210 
211   // Extra cost of an 8-15 structural block over a 1-7 structural block
struct8_15_costfeature_benchmarker212   double struct8_15_cost(BenchmarkStage stage) const {
213     return cost_per_block(stage, struct15, struct15.stats->blocks_with_8_structurals, struct7);
214   }
215   // Extra cost of an 8-15-structural miss over a 1-7 miss
struct8_15_miss_costfeature_benchmarker216   double struct8_15_miss_cost(BenchmarkStage stage) const {
217     return cost_per_block(stage, struct15_miss, struct15_miss.stats->blocks_with_8_structurals_flipped, struct15);
218   }
219   // Rate of 8-15-structural misses per 8-structural flip
struct8_15_miss_ratefeature_benchmarker220   double struct8_15_miss_rate(BenchmarkStage stage) const {
221     if (!has_events()) { return 1; }
222     return double(struct15_miss[stage].best.branch_misses() - struct15[stage].best.branch_misses()) / double(struct15_miss.stats->blocks_with_8_structurals_flipped);
223   }
224 
225   // Extra cost of a 16+-structural block over an 8-15 structural block (actual varies based on # of structurals!)
struct16_costfeature_benchmarker226   double struct16_cost(BenchmarkStage stage) const {
227     return cost_per_block(stage, struct23, struct23.stats->blocks_with_16_structurals, struct15);
228   }
229   // Extra cost of a 16-structural miss over an 8-15 miss
struct16_miss_costfeature_benchmarker230   double struct16_miss_cost(BenchmarkStage stage) const {
231     return cost_per_block(stage, struct23_miss, struct23_miss.stats->blocks_with_16_structurals_flipped, struct23);
232   }
233   // Rate of 16-structural misses per 16-structural flip
struct16_miss_ratefeature_benchmarker234   double struct16_miss_rate(BenchmarkStage stage) const {
235     if (!has_events()) { return 1; }
236     return double(struct23_miss[stage].best.branch_misses() - struct23[stage].best.branch_misses()) / double(struct23_miss.stats->blocks_with_16_structurals_flipped);
237   }
238 
239   // Extra cost of having UTF-8 in a block
utf8_costfeature_benchmarker240   double utf8_cost(BenchmarkStage stage) const {
241     return cost_per_block(stage, utf8, utf8.stats->blocks_with_utf8, struct7_full);
242   }
243   // Extra cost of a UTF-8 miss
utf8_miss_costfeature_benchmarker244   double utf8_miss_cost(BenchmarkStage stage) const {
245     return cost_per_block(stage, utf8_miss, utf8_miss.stats->blocks_with_utf8_flipped, utf8);
246   }
247   // Rate of UTF-8 misses per UTF-8 flip
utf8_miss_ratefeature_benchmarker248   double utf8_miss_rate(BenchmarkStage stage) const {
249     if (!has_events()) { return 1; }
250     return double(utf8_miss[stage].best.branch_misses() - utf8[stage].best.branch_misses()) / double(utf8_miss.stats->blocks_with_utf8_flipped);
251   }
252 
253   // Extra cost of having escapes in a block
escape_costfeature_benchmarker254   double escape_cost(BenchmarkStage stage) const {
255     return cost_per_block(stage, escape, escape.stats->blocks_with_escapes, struct7_full);
256   }
257   // Extra cost of an escape miss
escape_miss_costfeature_benchmarker258   double escape_miss_cost(BenchmarkStage stage) const {
259     return cost_per_block(stage, escape_miss, escape_miss.stats->blocks_with_escapes_flipped, escape);
260   }
261   // Rate of escape misses per escape flip
escape_miss_ratefeature_benchmarker262   double escape_miss_rate(BenchmarkStage stage) const {
263     if (!has_events()) { return 1; }
264     return double(escape_miss[stage].best.branch_misses() - escape[stage].best.branch_misses()) / double(escape_miss.stats->blocks_with_escapes_flipped);
265   }
266 
calc_expected_feature_costfeature_benchmarker267   double calc_expected_feature_cost(BenchmarkStage stage, const benchmarker& file) const {
268     // Expected base ns/block (empty)
269     json_stats& stats = *file.stats;
270     double expected = base_cost(stage)       * double(stats.blocks);
271     expected +=       struct1_7_cost(stage)  * double(stats.blocks_with_1_structural);
272     expected +=       utf8_cost(stage)       * double(stats.blocks_with_utf8);
273     expected +=       escape_cost(stage)     * double(stats.blocks_with_escapes);
274     expected +=       struct8_15_cost(stage) * double(stats.blocks_with_8_structurals);
275     expected +=       struct16_cost(stage)   * double(stats.blocks_with_16_structurals);
276     return expected / double(stats.blocks);
277   }
278 
calc_expected_miss_costfeature_benchmarker279   double calc_expected_miss_cost(BenchmarkStage stage, const benchmarker& file) const {
280     // Expected base ns/block (empty)
281     json_stats& stats = *file.stats;
282     double expected = struct1_7_miss_cost(stage)  * double(stats.blocks_with_1_structural_flipped) * struct1_7_miss_rate(stage);
283     expected +=       utf8_miss_cost(stage)       * double(stats.blocks_with_utf8_flipped) * utf8_miss_rate(stage);
284     expected +=       escape_miss_cost(stage)     * double(stats.blocks_with_escapes_flipped) * escape_miss_rate(stage);
285     expected +=       struct8_15_miss_cost(stage) * double(stats.blocks_with_8_structurals_flipped) * struct8_15_miss_rate(stage);
286     expected +=       struct16_miss_cost(stage)   * double(stats.blocks_with_16_structurals_flipped) * struct16_miss_rate(stage);
287     return expected / double(stats.blocks);
288   }
289 
calc_expected_missesfeature_benchmarker290   double calc_expected_misses(BenchmarkStage stage, const benchmarker& file) const {
291     json_stats& stats = *file.stats;
292     double expected = double(stats.blocks_with_1_structural_flipped)   * struct1_7_miss_rate(stage);
293     expected +=       double(stats.blocks_with_utf8_flipped)           * utf8_miss_rate(stage);
294     expected +=       double(stats.blocks_with_escapes_flipped)        * escape_miss_rate(stage);
295     expected +=       double(stats.blocks_with_8_structurals_flipped)  * struct8_15_miss_rate(stage);
296     expected +=       double(stats.blocks_with_16_structurals_flipped) * struct16_miss_rate(stage);
297     return expected;
298   }
299 
calc_expectedfeature_benchmarker300   double calc_expected(BenchmarkStage stage, const benchmarker& file) const {
301     return calc_expected_feature_cost(stage, file) + calc_expected_miss_cost(stage, file);
302   }
303 
printfeature_benchmarker304   void print(const option_struct& options) const {
305     printf("\n");
306     printf("Features in ns/block (64 bytes):\n");
307     printf("\n");
308     printf("| %-8s ",   "Stage");
309     printf("| %8s ",  "Base");
310     printf("| %8s ",  "7 Struct");
311     printf("| %8s ",  "UTF-8");
312     printf("| %8s ",  "Escape");
313     printf("| %8s ",  "15 Str.");
314     printf("| %8s ",  "16+ Str.");
315     printf("| %15s ", "7 Struct Miss");
316     printf("| %15s ", "UTF-8 Miss");
317     printf("| %15s ", "Escape Miss");
318     printf("| %15s ", "15 Str. Miss");
319     printf("| %15s ", "16+ Str. Miss");
320     printf("|\n");
321 
322     printf("|%.10s",  "---------------------------------------");
323     printf("|%.10s",  "---------------------------------------");
324     printf("|%.10s",  "---------------------------------------");
325     printf("|%.10s",  "---------------------------------------");
326     printf("|%.10s",  "---------------------------------------");
327     printf("|%.10s",  "---------------------------------------");
328     printf("|%.10s",  "---------------------------------------");
329     printf("|%.17s", "---------------------------------------");
330     printf("|%.17s", "---------------------------------------");
331     printf("|%.17s", "---------------------------------------");
332     printf("|%.17s", "---------------------------------------");
333     printf("|%.17s", "---------------------------------------");
334     printf("|\n");
335 
336     options.each_stage([&](auto stage) {
337       printf("| %-8s ",         benchmark_stage_name(stage));
338       printf("| %8.3g ",        base_cost(stage));
339       printf("| %8.3g ",        struct1_7_cost(stage));
340       printf("| %8.3g ",        utf8_cost(stage));
341       printf("| %8.3g ",        escape_cost(stage));
342       printf("| %8.3g ",        struct8_15_cost(stage));
343       printf("| %8.3g ",        struct16_cost(stage));
344       if (has_events()) {
345         printf("| %8.3g (%3d%%) ", struct1_7_miss_cost(stage), int(struct1_7_miss_rate(stage)*100));
346         printf("| %8.3g (%3d%%) ", utf8_miss_cost(stage), int(utf8_miss_rate(stage)*100));
347         printf("| %8.3g (%3d%%) ", escape_miss_cost(stage), int(escape_miss_rate(stage)*100));
348         printf("| %8.3g (%3d%%) ", struct8_15_miss_cost(stage), int(struct8_15_miss_rate(stage)*100));
349         printf("| %8.3g (%3d%%) ", struct16_miss_cost(stage), int(struct16_miss_rate(stage)*100));
350       } else {
351         printf("|        %8.3g ", struct1_7_miss_cost(stage));
352         printf("|        %8.3g ", utf8_miss_cost(stage));
353         printf("|        %8.3g ", escape_miss_cost(stage));
354         printf("|        %8.3g ", struct8_15_miss_cost(stage));
355         printf("|        %8.3g ", struct16_miss_cost(stage));
356       }
357       printf("|\n");
358     });
359   }
360 };
361 
print_file_effectiveness(BenchmarkStage stage,const char * filename,const benchmarker & results,const feature_benchmarker & features)362 void print_file_effectiveness(BenchmarkStage stage, const char* filename, const benchmarker& results, const feature_benchmarker& features) {
363   double actual = results[stage].best.elapsed_ns() / double(results.stats->blocks);
364   double calc = features.calc_expected(stage, results);
365   double actual_misses = results[stage].best.branch_misses();
366   double calc_misses = features.calc_expected_misses(stage, results);
367   double calc_miss_cost = features.calc_expected_miss_cost(stage, results);
368   printf("        | %-8s ", benchmark_stage_name(stage));
369   printf("| %-15s ",   filename);
370   printf("|    %8.3g ", features.calc_expected_feature_cost(stage, results));
371   printf("|    %8.3g ", calc_miss_cost);
372   printf("| %8.3g ",  calc);
373   printf("| %8.3g ",  actual);
374   printf("| %+8.3g ", actual - calc);
375   printf("| %13llu ", (long long unsigned)(calc_misses));
376   if (features.has_events()) {
377     printf("| %13llu ", (long long unsigned)(actual_misses));
378     printf("| %+13lld ", (long long int)(actual_misses - calc_misses));
379     double miss_adjustment = calc_miss_cost * (double(int64_t(actual_misses - calc_misses)) / calc_misses);
380     printf("|      %8.3g ", calc_miss_cost + miss_adjustment);
381     printf("|      %+8.3g ", actual - (calc + miss_adjustment));
382   }
383   printf("|\n");
384 }
385 
main(int argc,char * argv[])386 int main(int argc, char *argv[]) {
387   // Read options
388   exe_name = argv[0];
389   option_struct options(argc, argv);
390   if (options.verbose) {
391     verbose_stream = &cout;
392   }
393 
394   // Initialize the event collector. We put this early so if it prints an error message, it's the
395   // first thing printed.
396   event_collector collector;
397 
398   // Set up benchmarkers by reading all files
399   feature_benchmarker features(collector);
400   benchmarker gsoc_2018(SIMDJSON_BENCHMARK_DATA_DIR "gsoc-2018.json", collector);
401   benchmarker twitter(SIMDJSON_BENCHMARK_DATA_DIR "twitter.json", collector);
402   benchmarker random(SIMDJSON_BENCHMARK_DATA_DIR "random.json", collector);
403 
404   // Run the benchmarks
405   progress_bar progress(options.iterations, 100);
406   // Put the if (options.stage1_only) *outside* the loop so that run_iterations will be optimized
407   if (options.stage1_only) {
408     for (int iteration = 0; iteration < options.iterations; iteration += options.iteration_step) {
409       if (!options.verbose) { progress.print(iteration); }
410       features.run_iterations(options.iteration_step, true);
411       gsoc_2018.run_iterations(options.iteration_step, true);
412       twitter.run_iterations(options.iteration_step, true);
413       random.run_iterations(options.iteration_step, true);
414     }
415   } else {
416     for (int iteration = 0; iteration < options.iterations; iteration += options.iteration_step) {
417       if (!options.verbose) { progress.print(iteration); }
418       features.run_iterations(options.iteration_step, false);
419       gsoc_2018.run_iterations(options.iteration_step, false);
420       twitter.run_iterations(options.iteration_step, false);
421       random.run_iterations(options.iteration_step, false);
422     }
423   }
424   if (!options.verbose) { progress.erase(); }
425 
426   features.print(options);
427 
428   // Gauge effectiveness
429   if (options.verbose) {
430     printf("\n");
431     printf("        Effectiveness Check: Estimated vs. Actual ns/block for real files:\n");
432     printf("\n");
433     printf("        | %8s ", "Stage");
434     printf("| %-15s ", "File");
435     printf("| %11s ", "Est. (Base)");
436     printf("| %11s ", "Est. (Miss)");
437     printf("| %8s ",  "Est.");
438     printf("| %8s ",  "Actual");
439     printf("| %8s ",  "Diff");
440     printf("| %13s ", "Est. Misses");
441     if (features.has_events()) {
442       printf("| %13s ", "Actual Misses");
443       printf("| %13s ", "Diff (Misses)");
444       printf("| %13s ", "Adjusted Miss");
445       printf("| %13s ", "Adjusted Diff");
446     }
447     printf("|\n");
448     printf("        |%.10s",  "---------------------------------------");
449     printf("|%.17s",  "---------------------------------------");
450     printf("|%.13s",  "---------------------------------------");
451     printf("|%.13s",  "---------------------------------------");
452     printf("|%.10s",  "---------------------------------------");
453     printf("|%.10s",  "---------------------------------------");
454     printf("|%.10s",  "---------------------------------------");
455     printf("|%.15s",  "---------------------------------------");
456     if (features.has_events()) {
457       printf("|%.15s",  "---------------------------------------");
458       printf("|%.15s",  "---------------------------------------");
459       printf("|%.15s",  "---------------------------------------");
460       printf("|%.15s",  "---------------------------------------");
461     }
462     printf("|\n");
463 
464     options.each_stage([&](auto stage) {
465       print_file_effectiveness(stage, "gsoc-2018.json", gsoc_2018, features);
466       print_file_effectiveness(stage, "twitter.json", twitter, features);
467       print_file_effectiveness(stage, "random.json", random, features);
468     });
469   }
470 
471   return EXIT_SUCCESS;
472 }
473