1 #include <iostream>
2 #include <unistd.h>
3 #include "simdjson.h"
4 #ifdef __linux__
5 #include "linux-perf-events.h"
6 #endif
7
count_nonasciibytes(const uint8_t * input,size_t length)8 size_t count_nonasciibytes(const uint8_t *input, size_t length) {
9 size_t count = 0;
10 for (size_t i = 0; i < length; i++) {
11 count += input[i] >> 7;
12 }
13 return count;
14 }
15
count_backslash(const uint8_t * input,size_t length)16 size_t count_backslash(const uint8_t *input, size_t length) {
17 size_t count = 0;
18 for (size_t i = 0; i < length; i++) {
19 count += (input[i] == '\\') ? 1 : 0;
20 }
21 return count;
22 }
23
24 struct stat_s {
25 size_t integer_count;
26 size_t float_count;
27 size_t string_count;
28 size_t backslash_count;
29 size_t non_ascii_byte_count;
30 size_t object_count;
31 size_t array_count;
32 size_t null_count;
33 size_t true_count;
34 size_t false_count;
35 size_t byte_count;
36 size_t structural_indexes_count;
37 bool valid;
38 };
39
40 using stat_t = struct stat_s;
41
42
43
simdjson_process_atom(stat_t & s,simdjson::dom::element element)44 simdjson_really_inline void simdjson_process_atom(stat_t &s,
45 simdjson::dom::element element) {
46 if (element.is<int64_t>()) {
47 s.integer_count++;
48 } else if(element.is<std::string_view>()) {
49 s.string_count++;
50 } else if(element.is<double>()) {
51 s.float_count++;
52 } else if (element.is<bool>()) {
53 simdjson::error_code err;
54 bool v;
55 err = element.get(v);
56 if (v) {
57 s.true_count++;
58 } else {
59 s.false_count++;
60 }
61 } else if (element.is_null()) {
62 s.null_count++;
63 }
64 }
65
simdjson_recurse(stat_t & s,simdjson::dom::element element)66 void simdjson_recurse(stat_t &s, simdjson::dom::element element) {
67 simdjson::error_code error;
68 if (element.is<simdjson::dom::array>()) {
69 s.array_count++;
70 simdjson::dom::array array;
71 if ((error = element.get(array))) { std::cerr << error << std::endl; abort(); }
72 for (auto child : array) {
73 if (child.is<simdjson::dom::array>() || child.is<simdjson::dom::object>()) {
74 simdjson_recurse(s, child);
75 } else {
76 simdjson_process_atom(s, child);
77 }
78 }
79 } else if (element.is<simdjson::dom::object>()) {
80 s.object_count++;
81 simdjson::dom::object object;
82 if ((error = element.get(object))) { std::cerr << error << std::endl; abort(); }
83 for (auto field : object) {
84 s.string_count++; // for key
85 if (field.value.is<simdjson::dom::array>() || field.value.is<simdjson::dom::object>()) {
86 simdjson_recurse(s, field.value);
87 } else {
88 simdjson_process_atom(s, field.value);
89 }
90 }
91 } else {
92 simdjson_process_atom(s, element);
93 }
94 }
95
simdjson_compute_stats(const simdjson::padded_string & p)96 stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
97 stat_t answer{};
98 simdjson::dom::parser parser;
99 simdjson::dom::element doc;
100 auto error = parser.parse(p).get(doc);
101 if (error) {
102 answer.valid = false;
103 return answer;
104 }
105 answer.valid = true;
106 answer.backslash_count =
107 count_backslash(reinterpret_cast<const uint8_t *>(p.data()), p.size());
108 answer.non_ascii_byte_count = count_nonasciibytes(
109 reinterpret_cast<const uint8_t *>(p.data()), p.size());
110 answer.byte_count = p.size();
111 answer.structural_indexes_count = parser.implementation->n_structural_indexes;
112 simdjson_recurse(answer, doc);
113 return answer;
114 }
115
main(int argc,char * argv[])116 int main(int argc, char *argv[]) {
117 #ifndef _MSC_VER
118 int c;
119 while ((c = getopt(argc, argv, "")) != -1) {
120 switch (c) {
121
122 default:
123 abort();
124 }
125 }
126 #else
127 int optind = 1;
128 #endif
129 if (optind >= argc) {
130 std::cerr << "Reads json, prints stats. " << std::endl;
131 std::cerr << "Usage: " << argv[0] << " <jsonfile>" << std::endl;
132
133 exit(1);
134 }
135 const char *filename = argv[optind];
136 if (optind + 1 < argc) {
137 std::cerr << "warning: ignoring everything after " << argv[optind + 1]
138 << std::endl;
139 }
140 simdjson::padded_string p;
141 auto error = simdjson::padded_string::load(filename).get(p);
142 if (error) {
143 std::cerr << "Could not load the file " << filename << std::endl;
144 return EXIT_FAILURE;
145 }
146 stat_t s = simdjson_compute_stats(p);
147 if (!s.valid) {
148 std::cerr << "not a valid JSON" << std::endl;
149 return EXIT_FAILURE;
150 }
151
152 printf("# integer_count float_count string_count backslash_count "
153 "non_ascii_byte_count object_count array_count null_count true_count "
154 "false_count byte_count structural_indexes_count ");
155 #ifdef __linux__
156 printf(" stage1_cycle_count stage1_instruction_count stage2_cycle_count "
157 " stage2_instruction_count stage3_cycle_count "
158 "stage3_instruction_count ");
159 #else
160 printf("(you are not under linux, so perf counters are disaabled)");
161 #endif
162 printf("\n");
163 printf("%zu %zu %zu %zu %zu %zu %zu %zu %zu %zu %zu %zu ", s.integer_count,
164 s.float_count, s.string_count, s.backslash_count,
165 s.non_ascii_byte_count, s.object_count, s.array_count, s.null_count,
166 s.true_count, s.false_count, s.byte_count, s.structural_indexes_count);
167 #ifdef __linux__
168 simdjson::dom::parser parser;
169 simdjson::error_code alloc_error = parser.allocate(p.size());
170 if (alloc_error) {
171 std::cerr << alloc_error << std::endl;
172 return EXIT_FAILURE;
173 }
174 const uint32_t iterations = p.size() < 1 * 1000 * 1000 ? 1000 : 50;
175 std::vector<int> evts;
176 evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
177 evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
178 LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
179 unsigned long cy1 = 0, cy2 = 0;
180 unsigned long cl1 = 0, cl2 = 0;
181 std::vector<unsigned long long> results;
182 results.resize(evts.size());
183 for (uint32_t i = 0; i < iterations; i++) {
184 unified.start();
185 // The default template is simdjson::architecture::NATIVE.
186 bool isok = (parser.implementation->stage1((const uint8_t *)p.data(), p.size(), false) == simdjson::SUCCESS);
187 unified.end(results);
188
189 cy1 += results[0];
190 cl1 += results[1];
191
192 unified.start();
193 isok = isok && (parser.implementation->stage2(parser.doc) == simdjson::SUCCESS);
194 unified.end(results);
195
196 cy2 += results[0];
197 cl2 += results[1];
198 if (!isok) {
199 std::cerr << "failure?" << std::endl;
200 }
201 }
202 printf("%f %f %f %f ", static_cast<double>(cy1) / static_cast<double>(iterations), static_cast<double>(cl1) / static_cast<double>(iterations),
203 static_cast<double>(cy2) / static_cast<double>(iterations), static_cast<double>(cl2) / static_cast<double>(iterations));
204 #endif // __linux__
205 printf("\n");
206 return EXIT_SUCCESS;
207 }
208