1 #include <iostream>
2 #include <unistd.h>
3 #include "simdjson.h"
4 #ifdef __linux__
5 #include "linux-perf-events.h"
6 #endif
7 
count_nonasciibytes(const uint8_t * input,size_t length)8 size_t count_nonasciibytes(const uint8_t *input, size_t length) {
9   size_t count = 0;
10   for (size_t i = 0; i < length; i++) {
11     count += input[i] >> 7;
12   }
13   return count;
14 }
15 
count_backslash(const uint8_t * input,size_t length)16 size_t count_backslash(const uint8_t *input, size_t length) {
17   size_t count = 0;
18   for (size_t i = 0; i < length; i++) {
19     count += (input[i] == '\\') ? 1 : 0;
20   }
21   return count;
22 }
23 
24 struct stat_s {
25   size_t integer_count;
26   size_t float_count;
27   size_t string_count;
28   size_t backslash_count;
29   size_t non_ascii_byte_count;
30   size_t object_count;
31   size_t array_count;
32   size_t null_count;
33   size_t true_count;
34   size_t false_count;
35   size_t byte_count;
36   size_t structural_indexes_count;
37   bool valid;
38 };
39 
40 using stat_t = struct stat_s;
41 
42 
43 
simdjson_process_atom(stat_t & s,simdjson::dom::element element)44 simdjson_really_inline void simdjson_process_atom(stat_t &s,
45                                          simdjson::dom::element element) {
46   if (element.is<int64_t>()) {
47     s.integer_count++;
48   } else if(element.is<std::string_view>()) {
49     s.string_count++;
50   } else if(element.is<double>()) {
51     s.float_count++;
52   } else if (element.is<bool>()) {
53     simdjson::error_code err;
54     bool v;
55     err = element.get(v);
56     if (v) {
57       s.true_count++;
58     } else {
59       s.false_count++;
60     }
61   } else if (element.is_null()) {
62     s.null_count++;
63   }
64 }
65 
simdjson_recurse(stat_t & s,simdjson::dom::element element)66 void simdjson_recurse(stat_t &s, simdjson::dom::element element) {
67   simdjson::error_code error;
68   if (element.is<simdjson::dom::array>()) {
69     s.array_count++;
70     simdjson::dom::array array;
71     if ((error = element.get(array))) { std::cerr << error << std::endl; abort(); }
72     for (auto child : array) {
73       if (child.is<simdjson::dom::array>() || child.is<simdjson::dom::object>()) {
74         simdjson_recurse(s, child);
75       } else {
76         simdjson_process_atom(s, child);
77       }
78     }
79   } else if (element.is<simdjson::dom::object>()) {
80     s.object_count++;
81     simdjson::dom::object object;
82     if ((error = element.get(object))) { std::cerr << error << std::endl; abort(); }
83     for (auto field : object) {
84       s.string_count++; // for key
85       if (field.value.is<simdjson::dom::array>() || field.value.is<simdjson::dom::object>()) {
86         simdjson_recurse(s, field.value);
87       } else {
88         simdjson_process_atom(s, field.value);
89       }
90     }
91   } else {
92     simdjson_process_atom(s, element);
93   }
94 }
95 
simdjson_compute_stats(const simdjson::padded_string & p)96 stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
97   stat_t answer{};
98   simdjson::dom::parser parser;
99   simdjson::dom::element doc;
100   auto error = parser.parse(p).get(doc);
101   if (error) {
102     answer.valid = false;
103     return answer;
104   }
105   answer.valid = true;
106   answer.backslash_count =
107       count_backslash(reinterpret_cast<const uint8_t *>(p.data()), p.size());
108   answer.non_ascii_byte_count = count_nonasciibytes(
109       reinterpret_cast<const uint8_t *>(p.data()), p.size());
110   answer.byte_count = p.size();
111   answer.structural_indexes_count = parser.implementation->n_structural_indexes;
112   simdjson_recurse(answer, doc);
113   return answer;
114 }
115 
main(int argc,char * argv[])116 int main(int argc, char *argv[]) {
117 #ifndef _MSC_VER
118   int c;
119   while ((c = getopt(argc, argv, "")) != -1) {
120     switch (c) {
121 
122     default:
123       abort();
124     }
125   }
126 #else
127   int optind = 1;
128 #endif
129   if (optind >= argc) {
130     std::cerr << "Reads json, prints stats. " << std::endl;
131     std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
132 
133     exit(1);
134   }
135   const char *filename = argv[optind];
136   if (optind + 1 < argc) {
137     std::cerr << "warning: ignoring everything after " << argv[optind + 1]
138               << std::endl;
139   }
140   simdjson::padded_string p;
141   auto error = simdjson::padded_string::load(filename).get(p);
142   if (error) {
143     std::cerr << "Could not load the file " << filename << std::endl;
144     return EXIT_FAILURE;
145   }
146   stat_t s = simdjson_compute_stats(p);
147   if (!s.valid) {
148     std::cerr << "not a valid JSON" << std::endl;
149     return EXIT_FAILURE;
150   }
151 
152   printf("# integer_count float_count string_count backslash_count "
153          "non_ascii_byte_count object_count array_count null_count true_count "
154          "false_count byte_count structural_indexes_count ");
155 #ifdef __linux__
156   printf("  stage1_cycle_count stage1_instruction_count  stage2_cycle_count "
157          " stage2_instruction_count  stage3_cycle_count "
158          "stage3_instruction_count  ");
159 #else
160   printf("(you are not under linux, so perf counters are disaabled)");
161 #endif
162   printf("\n");
163   printf("%zu %zu %zu %zu %zu %zu %zu %zu %zu %zu %zu %zu ", s.integer_count,
164          s.float_count, s.string_count, s.backslash_count,
165          s.non_ascii_byte_count, s.object_count, s.array_count, s.null_count,
166          s.true_count, s.false_count, s.byte_count, s.structural_indexes_count);
167 #ifdef __linux__
168   simdjson::dom::parser parser;
169   simdjson::error_code alloc_error = parser.allocate(p.size());
170   if (alloc_error) {
171     std::cerr << alloc_error << std::endl;
172     return EXIT_FAILURE;
173   }
174   const uint32_t iterations = p.size() < 1 * 1000 * 1000 ? 1000 : 50;
175   std::vector<int> evts;
176   evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
177   evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
178   LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
179   unsigned long cy1 = 0, cy2 = 0;
180   unsigned long cl1 = 0, cl2 = 0;
181   std::vector<unsigned long long> results;
182   results.resize(evts.size());
183   for (uint32_t i = 0; i < iterations; i++) {
184     unified.start();
185     // The default template is simdjson::architecture::NATIVE.
186     bool isok = (parser.implementation->stage1((const uint8_t *)p.data(), p.size(), false) == simdjson::SUCCESS);
187     unified.end(results);
188 
189     cy1 += results[0];
190     cl1 += results[1];
191 
192     unified.start();
193     isok = isok && (parser.implementation->stage2(parser.doc) == simdjson::SUCCESS);
194     unified.end(results);
195 
196     cy2 += results[0];
197     cl2 += results[1];
198     if (!isok) {
199       std::cerr << "failure?" << std::endl;
200     }
201   }
202   printf("%f %f %f %f ", static_cast<double>(cy1) / static_cast<double>(iterations), static_cast<double>(cl1) / static_cast<double>(iterations),
203          static_cast<double>(cy2) / static_cast<double>(iterations), static_cast<double>(cl2) / static_cast<double>(iterations));
204 #endif // __linux__
205   printf("\n");
206   return EXIT_SUCCESS;
207 }
208