1 #include <cstring>
2 #include <cinttypes>
3 #include <cmath>
4 #include <cstdio>
5 #include <cstdlib>
6 #include <cstdint>
7 #include <random>
8 #include <climits>
9 #include <unistd.h>
10 
11 #include "simdjson.h"
12 
13 
14 /**
15  * Some systems have bad floating-point parsing. We want to exclude them.
16  */
17 #if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__)
18 // Ok. So under Visual Studio, linux, apple and freebsd systems, we have a good chance of having a decent
19 // enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2
20 // or cygwin.
21 //
22 // Finally, we want to exclude legacy 32-bit systems.
23 #ifndef SIMDJSON_IS_32BITS
24 // So we only run some of the floating-point tests under 64-bit linux, apple, regular visual studio, freebsd.
25 #define TEST_FLOATS
26 // Apple and freebsd need a special header, typically.
27 #if defined __APPLE__ || defined(__FreeBSD__)
28 #  include <xlocale.h>
29 #endif
30 
31 #endif
32 
33 #endif
34 
35 
36 struct RandomEngine {
37    RandomEngine() = delete;
RandomEngineRandomEngine38    RandomEngine(uint32_t seed) : one_zero_generator(0,1), digit_generator(0,9),  nonzero_digit_generator(1,9), digit_count_generator (1,40),exp_count_generator (1,3), generator(seed) {}
39    std::uniform_int_distribution<int> one_zero_generator;
40    std::uniform_int_distribution<int> digit_generator;
41    std::uniform_int_distribution<int> nonzero_digit_generator;
42 
43    std::uniform_int_distribution<int> digit_count_generator;
44    std::uniform_int_distribution<int> exp_count_generator;
next_boolRandomEngine45    bool next_bool() { return one_zero_generator(generator); }
next_digitRandomEngine46    int next_digit() { return digit_generator(generator); }
next_nonzero_digitRandomEngine47    int next_nonzero_digit() { return nonzero_digit_generator(generator); }
next_digit_countRandomEngine48    int next_digit_count() { return digit_count_generator(generator); }
next_exp_countRandomEngine49    int next_exp_count() { return exp_count_generator(generator); }
50 
51    std::mt19937 generator;
52 };
53 
build_random_string(RandomEngine & rand,char * buffer)54 size_t build_random_string(RandomEngine &rand, char *buffer) {
55   size_t pos{0};
56   if (rand.next_bool()) {
57     buffer[pos++] = '-';
58   }
59   size_t number_of_digits = size_t(rand.next_digit_count());
60   std::uniform_int_distribution<int> decimal_generator(1,int(number_of_digits));
61   size_t location_of_decimal_separator = size_t(decimal_generator(rand.generator));
62   for (size_t i = 0; i < number_of_digits; i++) {
63     if (i == location_of_decimal_separator) {
64       buffer[pos++] = '.';
65     }
66     if (( i == 0) && (location_of_decimal_separator != 1)) {
67       buffer[pos++] = char(rand.next_nonzero_digit() + '0');
68     } else {
69       buffer[pos++] = char(rand.next_digit() + '0');
70     }
71   }
72   if (rand.next_bool()) {
73     if (rand.next_bool()) {
74       buffer[pos++] = 'e';
75     } else {
76       buffer[pos++] = 'E';
77     }
78     if (rand.next_bool()) {
79       buffer[pos++] = '-';
80     } else {
81       if (rand.next_bool()) {
82         buffer[pos++] = '+';
83       }
84     }
85     number_of_digits = rand.next_exp_count();
86     size_t i = 0;
87     if(number_of_digits > 0) {
88         buffer[pos++] = char(rand.next_nonzero_digit() + '0');
89         i++;
90     }
91     for (; i < number_of_digits; i++) {
92       buffer[pos++] = char(rand.next_digit() + '0');
93     }
94   }
95   buffer[pos] = '\0'; // null termination
96   return pos;
97 }
98 
99 
100 #ifndef TEST_FLOATS
101 // We do not recognize the system, so we do not verify our results.
check_float(double,const char *)102 bool check_float(double , const char *) {
103     return true;
104 }
105 #else
check_float(double result,const char * buf)106 bool check_float(double result, const char *buf) {
107   char *endptr;
108 #ifdef _WIN32
109   static _locale_t c_locale = _create_locale(LC_ALL, "C");
110   double expected = _strtod_l((const char *)buf, &endptr, c_locale);
111 #else
112   static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
113   double expected = strtod_l((const char *)buf, &endptr, c_locale);
114 #endif
115   if (endptr == (const char *)buf) {
116     fprintf(stderr,
117             "parsed %f from %.32s whereas strtod refuses to parse a float, ",
118             result, buf);
119     return false;
120   }
121   if (expected != result) {
122     std::cerr << std::hexfloat << " parsed " << result << " from "
123               << buf << " whereas strtod gives " << expected << std::endl;
124     std::cerr << std::defaultfloat;
125     return false;
126   }
127   return true;
128 }
129 #endif
130 
131 
132 /**
133  * We generate random strings and we try to parse them,
134  * and we verify that we get the same answer.
135  */
tester(int seed,size_t volume)136 bool tester(int seed, size_t volume) {
137   char buffer[1024]; // large buffer (can't overflow)
138   simdjson::dom::parser parser;
139   RandomEngine rand(seed);
140   double result;
141   for (size_t i = 0; i < volume; i++) {
142     if((i%100000) == 0) { std::cout << "."; std::cout.flush(); }
143     size_t length = build_random_string(rand, buffer);
144     auto error = parser.parse(buffer, length).get(result);
145     // When we parse a (finite) number, it better match strtod.
146     if ((!error) && (!check_float(result, buffer))) { return false; }
147   }
148   return true;
149 }
150 
main(int argc,char * argv[])151 int main(int argc, char *argv[]) {
152   // We test 1,000,000 random strings by default.
153   // You can specify more tests with the '-m' flag if you want.
154   size_t howmany = 1000000;
155 
156   int c;
157   while ((c = getopt(argc, argv, "a:m:h")) != -1) {
158     switch (c) {
159     case 'a': {
160       const simdjson::implementation *impl = simdjson::available_implementations[optarg];
161       if (!impl) {
162         fprintf(stderr, "Unsupported architecture value -a %s\n", optarg);
163         return EXIT_FAILURE;
164       }
165       if(!impl->supported_by_runtime_system()) {
166         fprintf(stderr, "The selected implementation does not match your current CPU: -a %s\n", optarg);
167         return EXIT_FAILURE;
168       }
169       simdjson::active_implementation = impl;
170       break;
171     }
172     case 'h': {
173       std::cout << "-a to select an architecture" << std::endl;
174       std::cout << "-m to select a number of tests" << std::endl;
175       return EXIT_SUCCESS;
176     }
177     case 'm': {
178       long long requested_howmany = atoll(optarg);
179       if(requested_howmany <= 0) {
180         fprintf(stderr, "Please provide a positive number of tests -m %s no larger than %lld \n", optarg, LLONG_MAX);
181         return EXIT_FAILURE;
182       }
183       howmany = size_t(requested_howmany);
184       break;
185     }
186     default:
187       fprintf(stderr, "Unexpected argument %c\n", c);
188       return EXIT_FAILURE;
189     }
190   }
191   if (tester(1234344, howmany)) {
192     std::cout << "All tests ok." << std::endl;
193     return EXIT_SUCCESS;
194   }
195   std::cout << "Failure." << std::endl;
196   return EXIT_FAILURE;
197 }