1 #include <cstring>
2 #include <cinttypes>
3 #include <cmath>
4 #include <cstdio>
5 #include <cstdlib>
6
7 #ifndef JSON_TEST_NUMBERS
8 #define JSON_TEST_NUMBERS
9 #endif
10
11 #if (!(_MSC_VER) && !(__MINGW32__) && !(__MINGW64__))
12 #include <dirent.h>
13 #else
14 #include <dirent_portable.h>
15 #endif
16
17 void found_invalid_number(const uint8_t *buf);
18 void found_float(double result, const uint8_t *buf);
19 void found_integer(int64_t result, const uint8_t *buf);
20 void found_unsigned_integer(uint64_t result, const uint8_t *buf);
21
22 #include "simdjson.h"
23
24
25
26 /**
27 * Some systems have bad floating-point parsing. We want to exclude them.
28 */
29 #if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
30 // Ok. So under Visual Studio, linux, apple and freebsd systems, we have a good chance of having a decent
31 // enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2
32 // or cygwin.
33 //
34 // Finally, we want to exclude legacy 32-bit systems.
35 #ifndef SIMDJSON_IS_32BITS
36 // So we only run some of the floating-point tests under 64-bit linux, apple, regular visual studio, freebsd.
37 #define TEST_FLOATS
38 // Apple and freebsd need a special header, typically.
39 #if defined __APPLE__ || defined(__FreeBSD__)
40 # include <xlocale.h>
41 #endif
42
43 #endif
44
45 #endif
46
47
48
49 int parse_error;
50 char *fullpath;
51 enum { PARSE_WARNING, PARSE_ERROR };
52
53 size_t float_count;
54 size_t int_count;
55 size_t invalid_count;
56
57 // strings that start with these should not be parsed as numbers
58 const char *really_bad[] = {"013}", "0x14", "0e]", "0e+]", "0e+-1]"};
59
starts_with(const char * pre,const char * str)60 bool starts_with(const char *pre, const char *str) {
61 size_t lenpre = std::strlen(pre);
62 return strncmp(pre, str, lenpre) == 0;
63 }
64
is_in_bad_list(const char * buf)65 bool is_in_bad_list(const char *buf) {
66 if (buf[0] != '0')
67 return false;
68 for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
69 if (starts_with(really_bad[i], buf))
70 return true;
71 return false;
72 }
73
74 #ifndef TEST_FLOATS
75 // We do not recognize the system, so we do not verify our results.
found_invalid_number(const uint8_t *)76 void found_invalid_number(const uint8_t *) {}
77 #else
found_invalid_number(const uint8_t * buf)78 void found_invalid_number(const uint8_t *buf) {
79 invalid_count++;
80 char *endptr;
81 #ifdef _WIN32
82 static _locale_t c_locale = _create_locale(LC_ALL, "C");
83 double expected = _strtod_l((const char *)buf, &endptr, c_locale);
84 #else
85 static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
86 double expected = strtod_l((const char *)buf, &endptr, c_locale);
87 #endif
88 if (endptr != (const char *)buf) {
89 if (!is_in_bad_list((const char *)buf)) {
90 printf("Warning: found_invalid_number %.32s whereas strtod parses it to "
91 "%f, ",
92 buf, expected);
93 printf(" while parsing %s \n", fullpath);
94 parse_error |= PARSE_WARNING;
95 }
96 }
97 }
98 #endif
99
found_integer(int64_t result,const uint8_t * buf)100 void found_integer(int64_t result, const uint8_t *buf) {
101 int_count++;
102 char *endptr;
103 long long expected = strtoll((const char *)buf, &endptr, 10);
104 if ((endptr == (const char *)buf) || (expected != result)) {
105 #if (!(__MINGW32__) && !(__MINGW64__))
106 fprintf(stderr, "Error: parsed %" PRId64 " out of %.32s, ", result, buf);
107 #else // mingw is busted since we include #include <inttypes.h> and it will still not provide PRId64
108 fprintf(stderr, "Error: parsed %lld out of %.32s, ", (long long)result, buf);
109 #endif
110 fprintf(stderr, " while parsing %s \n", fullpath);
111 parse_error |= PARSE_ERROR;
112 }
113 }
114
found_unsigned_integer(uint64_t result,const uint8_t * buf)115 void found_unsigned_integer(uint64_t result, const uint8_t *buf) {
116 int_count++;
117 char *endptr;
118 unsigned long long expected = strtoull((const char *)buf, &endptr, 10);
119 if ((endptr == (const char *)buf) || (expected != result)) {
120 #if (!(__MINGW32__) && !(__MINGW64__))
121 fprintf(stderr, "Error: parsed %" PRIu64 " out of %.32s, ", result, buf);
122 #else // mingw is busted since we include #include <inttypes.h>
123 fprintf(stderr, "Error: parsed %llu out of %.32s, ", (unsigned long long)result, buf);
124 #endif
125 fprintf(stderr, " while parsing %s \n", fullpath);
126 parse_error |= PARSE_ERROR;
127 }
128 }
129
130 #ifndef TEST_FLOATS
131 // We do not recognize the system, so we do not verify our results.
found_float(double,const uint8_t *)132 void found_float(double , const uint8_t *) {}
133 #else
found_float(double result,const uint8_t * buf)134 void found_float(double result, const uint8_t *buf) {
135 char *endptr;
136 float_count++;
137 #ifdef _WIN32
138 static _locale_t c_locale = _create_locale(LC_ALL, "C");
139 double expected = _strtod_l((const char *)buf, &endptr, c_locale);
140 #else
141 static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
142 double expected = strtod_l((const char *)buf, &endptr, c_locale);
143 #endif
144 if (endptr == (const char *)buf) {
145 fprintf(stderr,
146 "parsed %f from %.32s whereas strtod refuses to parse a float, ",
147 result, buf);
148 fprintf(stderr, " while parsing %s \n", fullpath);
149 parse_error |= PARSE_ERROR;
150 }
151 if (std::fpclassify(expected) != std::fpclassify(result)) {
152 fprintf(stderr,
153 "floats not in the same category expected: %f observed: %f \n",
154 expected, result);
155 fprintf(stderr, "%.32s\n", buf);
156 parse_error |= PARSE_ERROR;
157 return;
158 }
159 if (expected != result) {
160 fprintf(stderr, "parsed %.128e from \n", result);
161 fprintf(stderr, " %.32s whereas strtod gives\n", buf);
162 fprintf(stderr, " %.128e,", expected);
163 fprintf(stderr, " while parsing %s \n", fullpath);
164 parse_error |= PARSE_ERROR;
165 }
166 }
167 #endif
168
169 #include "simdjson.h"
170 #include "simdjson.cpp"
171
172 /**
173 * Does the file filename ends with the given extension.
174 */
has_extension(const char * filename,const char * extension)175 static bool has_extension(const char *filename, const char *extension) {
176 const char *ext = strrchr(filename, '.');
177 return (ext && !strcmp(ext, extension));
178 }
179
validate(const char * dirname)180 bool validate(const char *dirname) {
181 parse_error = 0;
182 size_t total_count = 0;
183 const char *extension = ".json";
184 size_t dirlen = std::strlen(dirname);
185 struct dirent **entry_list;
186 int c = scandir(dirname, &entry_list, 0, alphasort);
187 if (c < 0) {
188 printf("error accessing %s \n", dirname);
189 return false;
190 }
191 if (c == 0) {
192 printf("nothing in dir %s \n", dirname);
193 return false;
194 }
195 bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
196 for (int i = 0; i < c; i++) {
197 const char *name = entry_list[i]->d_name;
198 if (has_extension(name, extension)) {
199 size_t filelen = std::strlen(name);
200 fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
201 strcpy(fullpath, dirname);
202 if (needsep) {
203 fullpath[dirlen] = '/';
204 strcpy(fullpath + dirlen + 1, name);
205 } else {
206 strcpy(fullpath + dirlen, name);
207 }
208 simdjson::padded_string p;
209 auto error = simdjson::padded_string::load(fullpath).get(p);
210 if (error) {
211 std::cerr << "Could not load the file " << fullpath << std::endl;
212 return EXIT_FAILURE;
213 }
214 // terrible hack but just to get it working
215 float_count = 0;
216 int_count = 0;
217 invalid_count = 0;
218 total_count += float_count + int_count + invalid_count;
219 simdjson::dom::parser parser;
220 auto err = parser.parse(p).error();
221 bool isok = (err == simdjson::error_code::SUCCESS);
222 if (int_count + float_count + invalid_count > 0) {
223 printf("File %40s %s --- integers: %10zu floats: %10zu invalid: %10zu "
224 "total numbers: %10zu \n",
225 name, isok ? " is valid " : " is not valid ", int_count,
226 float_count, invalid_count,
227 int_count + float_count + invalid_count);
228 }
229 free(fullpath);
230 }
231 }
232 if ((parse_error & PARSE_ERROR) != 0) {
233 printf("NUMBER PARSING FAILS?\n");
234 } else {
235 printf("All ok.\n");
236 }
237 for (int i = 0; i < c; ++i)
238 free(entry_list[i]);
239 free(entry_list);
240 return ((parse_error & PARSE_ERROR) == 0);
241 }
242
main(int argc,char * argv[])243 int main(int argc, char *argv[]) {
244 if (argc != 2) {
245 std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
246 << std::endl;
247 #if defined(SIMDJSON_TEST_DATA_DIR) && defined(SIMDJSON_BENCHMARK_DATA_DIR)
248 std::cout << "We are going to assume you mean to use the '"
249 << SIMDJSON_TEST_DATA_DIR << "' and '"
250 << SIMDJSON_BENCHMARK_DATA_DIR << "'directories." << std::endl;
251 return validate(SIMDJSON_TEST_DATA_DIR) &&
252 validate(SIMDJSON_BENCHMARK_DATA_DIR)
253 ? EXIT_SUCCESS
254 : EXIT_FAILURE;
255 #else
256 std::cout << "We are going to assume you mean to use the 'jsonchecker' and "
257 "'jsonexamples' directories."
258 << std::endl;
259 return validate("jsonchecker/") && validate("jsonexamples/") ? EXIT_SUCCESS
260 : EXIT_FAILURE;
261 #endif
262 }
263 return validate(argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
264 }
265