1 #include <cstring>
2 #include <cinttypes>
3 #include <cmath>
4 #include <cstdio>
5 #include <cstdlib>
6 
7 #ifndef JSON_TEST_NUMBERS
8 #define JSON_TEST_NUMBERS
9 #endif
10 
11 #if (!(_MSC_VER) && !(__MINGW32__) && !(__MINGW64__))
12 #include <dirent.h>
13 #else
14 #include <dirent_portable.h>
15 #endif
16 
17 void found_invalid_number(const uint8_t *buf);
18 void found_float(double result, const uint8_t *buf);
19 void found_integer(int64_t result, const uint8_t *buf);
20 void found_unsigned_integer(uint64_t result, const uint8_t *buf);
21 
22 #include "simdjson.h"
23 
24 
25 
26 /**
27  * Some systems have bad floating-point parsing. We want to exclude them.
28  */
29 #if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
30 // Ok. So under Visual Studio, linux, apple and freebsd systems, we have a good chance of having a decent
31 // enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2
32 // or cygwin.
33 //
34 // Finally, we want to exclude legacy 32-bit systems.
35 #ifndef SIMDJSON_IS_32BITS
36 // So we only run some of the floating-point tests under 64-bit linux, apple, regular visual studio, freebsd.
37 #define TEST_FLOATS
38 // Apple and freebsd need a special header, typically.
39 #if defined __APPLE__ || defined(__FreeBSD__)
40 #  include <xlocale.h>
41 #endif
42 
43 #endif
44 
45 #endif
46 
47 
48 
49 int parse_error;
50 char *fullpath;
51 enum { PARSE_WARNING, PARSE_ERROR };
52 
53 size_t float_count;
54 size_t int_count;
55 size_t invalid_count;
56 
57 // strings that start with these should not be parsed as numbers
58 const char *really_bad[] = {"013}", "0x14", "0e]", "0e+]", "0e+-1]"};
59 
starts_with(const char * pre,const char * str)60 bool starts_with(const char *pre, const char *str) {
61   size_t lenpre = std::strlen(pre);
62   return strncmp(pre, str, lenpre) == 0;
63 }
64 
is_in_bad_list(const char * buf)65 bool is_in_bad_list(const char *buf) {
66   if (buf[0] != '0')
67     return false;
68   for (size_t i = 0; i < sizeof(really_bad) / sizeof(really_bad[0]); i++)
69     if (starts_with(really_bad[i], buf))
70       return true;
71   return false;
72 }
73 
74 #ifndef TEST_FLOATS
75 // We do not recognize the system, so we do not verify our results.
found_invalid_number(const uint8_t *)76 void found_invalid_number(const uint8_t *) {}
77 #else
found_invalid_number(const uint8_t * buf)78 void found_invalid_number(const uint8_t *buf) {
79   invalid_count++;
80   char *endptr;
81 #ifdef _WIN32
82   static _locale_t c_locale = _create_locale(LC_ALL, "C");
83   double expected = _strtod_l((const char *)buf, &endptr, c_locale);
84 #else
85   static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
86   double expected = strtod_l((const char *)buf, &endptr, c_locale);
87 #endif
88   if (endptr != (const char *)buf) {
89     if (!is_in_bad_list((const char *)buf)) {
90       printf("Warning: found_invalid_number %.32s whereas strtod parses it to "
91              "%f, ",
92              buf, expected);
93       printf(" while parsing %s \n", fullpath);
94       parse_error |= PARSE_WARNING;
95     }
96   }
97 }
98 #endif
99 
found_integer(int64_t result,const uint8_t * buf)100 void found_integer(int64_t result, const uint8_t *buf) {
101   int_count++;
102   char *endptr;
103   long long expected = strtoll((const char *)buf, &endptr, 10);
104   if ((endptr == (const char *)buf) || (expected != result)) {
105 #if (!(__MINGW32__) && !(__MINGW64__))
106     fprintf(stderr, "Error: parsed %" PRId64 " out of %.32s, ", result, buf);
107 #else // mingw is busted since we include #include <inttypes.h> and it will still  not provide PRId64
108     fprintf(stderr, "Error: parsed %lld out of %.32s, ", (long long)result, buf);
109 #endif
110     fprintf(stderr, " while parsing %s \n", fullpath);
111     parse_error |= PARSE_ERROR;
112   }
113 }
114 
found_unsigned_integer(uint64_t result,const uint8_t * buf)115 void found_unsigned_integer(uint64_t result, const uint8_t *buf) {
116   int_count++;
117   char *endptr;
118   unsigned long long expected = strtoull((const char *)buf, &endptr, 10);
119   if ((endptr == (const char *)buf) || (expected != result)) {
120 #if (!(__MINGW32__) && !(__MINGW64__))
121     fprintf(stderr, "Error: parsed %" PRIu64 " out of %.32s, ", result, buf);
122 #else // mingw is busted since we include #include <inttypes.h>
123     fprintf(stderr, "Error: parsed %llu out of %.32s, ", (unsigned long long)result, buf);
124 #endif
125     fprintf(stderr, " while parsing %s \n", fullpath);
126     parse_error |= PARSE_ERROR;
127   }
128 }
129 
130 #ifndef TEST_FLOATS
131 // We do not recognize the system, so we do not verify our results.
found_float(double,const uint8_t *)132 void found_float(double , const uint8_t *) {}
133 #else
found_float(double result,const uint8_t * buf)134 void found_float(double result, const uint8_t *buf) {
135   char *endptr;
136   float_count++;
137 #ifdef _WIN32
138   static _locale_t c_locale = _create_locale(LC_ALL, "C");
139   double expected = _strtod_l((const char *)buf, &endptr, c_locale);
140 #else
141   static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
142   double expected = strtod_l((const char *)buf, &endptr, c_locale);
143 #endif
144   if (endptr == (const char *)buf) {
145     fprintf(stderr,
146             "parsed %f from %.32s whereas strtod refuses to parse a float, ",
147             result, buf);
148     fprintf(stderr, " while parsing %s \n", fullpath);
149     parse_error |= PARSE_ERROR;
150   }
151   if (std::fpclassify(expected) != std::fpclassify(result)) {
152     fprintf(stderr,
153             "floats not in the same category expected: %f observed: %f \n",
154             expected, result);
155     fprintf(stderr, "%.32s\n", buf);
156     parse_error |= PARSE_ERROR;
157     return;
158   }
159   if (expected != result) {
160     fprintf(stderr, "parsed %.128e from \n", result);
161     fprintf(stderr, "       %.32s whereas strtod gives\n", buf);
162     fprintf(stderr, "       %.128e,", expected);
163     fprintf(stderr, " while parsing %s \n", fullpath);
164     parse_error |= PARSE_ERROR;
165   }
166 }
167 #endif
168 
169 #include "simdjson.h"
170 #include "simdjson.cpp"
171 
172 /**
173  * Does the file filename ends with the given extension.
174  */
has_extension(const char * filename,const char * extension)175 static bool has_extension(const char *filename, const char *extension) {
176   const char *ext = strrchr(filename, '.');
177   return (ext && !strcmp(ext, extension));
178 }
179 
validate(const char * dirname)180 bool validate(const char *dirname) {
181   parse_error = 0;
182   size_t total_count = 0;
183   const char *extension = ".json";
184   size_t dirlen = std::strlen(dirname);
185   struct dirent **entry_list;
186   int c = scandir(dirname, &entry_list, 0, alphasort);
187   if (c < 0) {
188     printf("error accessing %s \n", dirname);
189     return false;
190   }
191   if (c == 0) {
192     printf("nothing in dir %s \n", dirname);
193     return false;
194   }
195   bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
196   for (int i = 0; i < c; i++) {
197     const char *name = entry_list[i]->d_name;
198     if (has_extension(name, extension)) {
199       size_t filelen = std::strlen(name);
200       fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
201       strcpy(fullpath, dirname);
202       if (needsep) {
203         fullpath[dirlen] = '/';
204         strcpy(fullpath + dirlen + 1, name);
205       } else {
206         strcpy(fullpath + dirlen, name);
207       }
208       simdjson::padded_string p;
209       auto error = simdjson::padded_string::load(fullpath).get(p);
210       if (error) {
211         std::cerr << "Could not load the file " << fullpath << std::endl;
212         return EXIT_FAILURE;
213       }
214       // terrible hack but just to get it working
215       float_count = 0;
216       int_count = 0;
217       invalid_count = 0;
218       total_count += float_count + int_count + invalid_count;
219       simdjson::dom::parser parser;
220       auto err = parser.parse(p).error();
221       bool isok = (err == simdjson::error_code::SUCCESS);
222       if (int_count + float_count + invalid_count > 0) {
223         printf("File %40s %s --- integers: %10zu floats: %10zu invalid: %10zu "
224                "total numbers: %10zu \n",
225                name, isok ? " is valid     " : " is not valid ", int_count,
226                float_count, invalid_count,
227                int_count + float_count + invalid_count);
228       }
229       free(fullpath);
230     }
231   }
232   if ((parse_error & PARSE_ERROR) != 0) {
233     printf("NUMBER PARSING FAILS?\n");
234   } else {
235     printf("All ok.\n");
236   }
237   for (int i = 0; i < c; ++i)
238     free(entry_list[i]);
239   free(entry_list);
240   return ((parse_error & PARSE_ERROR) == 0);
241 }
242 
main(int argc,char * argv[])243 int main(int argc, char *argv[]) {
244   if (argc != 2) {
245     std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
246               << std::endl;
247 #if defined(SIMDJSON_TEST_DATA_DIR) && defined(SIMDJSON_BENCHMARK_DATA_DIR)
248     std::cout << "We are going to assume you mean to use the '"
249               << SIMDJSON_TEST_DATA_DIR << "'  and  '"
250               << SIMDJSON_BENCHMARK_DATA_DIR << "'directories." << std::endl;
251     return validate(SIMDJSON_TEST_DATA_DIR) &&
252                    validate(SIMDJSON_BENCHMARK_DATA_DIR)
253                ? EXIT_SUCCESS
254                : EXIT_FAILURE;
255 #else
256     std::cout << "We are going to assume you mean to use the 'jsonchecker' and "
257                  "'jsonexamples' directories."
258               << std::endl;
259     return validate("jsonchecker/") && validate("jsonexamples/") ? EXIT_SUCCESS
260                                                                  : EXIT_FAILURE;
261 #endif
262   }
263   return validate(argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
264 }
265