1 #pragma once
2 
3 #include "outputs.h"
4 
5 #if defined(__AVR__)
6 #include <Arduino.h>
7 #include "avr_type_helpers.h"
8 typedef String string_class;
9 // AVR targets do not have enough memory to track which denominatores
10 // have been tested. So this is a dummy placeholder, just to allow
11 // common function signatures.
12 template <typename IntT>
13 using set_t = std::numeric_limits<IntT>;
14 #else
15 #include <limits>
16 #include <random>
17 #include <string>
18 typedef std::string string_class;
19 #include <set>
20 template <typename IntT>
21 using set_t = std::set<IntT>;
22 #endif
23 
24 #if defined(_MSC_VER)
25 #pragma warning(disable : 4324)
26 #pragma warning(disable : 4310)
27 #pragma warning(disable : 4146)
28 #endif
29 
30 #include "libdivide.h"
31 #include "type_mappings.h"
32 
33 using namespace libdivide;
34 
35 #define UNUSED(x) (void)(x)
36 
37 #if defined(LIBDIVIDE_SSE2) && defined(LIBDIVIDE_AVX2) && defined(LIBDIVIDE_AVX512) && defined(LIBDIVIDE_NEON)
38 #define VECTOR_TESTS
39 #endif
40 
41 #ifdef _MSC_VER
42 #pragma warning(push)
43 #pragma warning(disable: 4127) // disable "conditional expression is constant""
44 #endif
45 
46 template <typename T>
47 class DivideTest {
48    private:
49     using UT = typename std::make_unsigned<T>::type;
50     using limits = std::numeric_limits<T>;
51     uint32_t seed = 0;
52     UT rand_n = 0;
53 
54     // This random function slowly increases the random number
55     // until there is an integer overflow, if this happens
56     // the random number is reset to 0 and we restart at the
57     // beginning. We do this to ensure that we get many test
58     // cases (random integers) of varying bit length.
get_random()59     T get_random() {
60         // https://en.wikipedia.org/wiki/Linear_congruential_generator
61         seed = seed * 1664525 + 1013904223;
62 
63         UT old = rand_n;
64         rand_n = rand_n * (seed % 2 + 1) + rand_n % 30000001 + 3;
65 
66         // Reset upon integer overflow
67         if (rand_n < old) {
68             rand_n = seed % 19;
69         }
70 
71         // The algorithm above generates mostly positive numbers.
72         // Hence convert 50% of all values to negative.
73         if (limits::is_signed) {
74             if (seed % 2) return -(T)rand_n;
75         }
76 
77         return (T)rand_n;
78     }
79 
random_denominator()80     T random_denominator() {
81       T denom = get_random();
82       while (denom == 0) {
83           denom = get_random();
84       }
85       return denom;
86     }
87 
testcase_name(int algo)88     string_class testcase_name(int algo) const {
89         string_class result = type_tag<T>::get_tag();
90         if (algo == BRANCHFREE) {
91             result += " (branchfree)";
92         }
93         return result;
94     }
95 
96     template <Branching ALGO>
test_one(T numer,T denom,const divider<T,ALGO> & the_divider)97     void test_one(T numer, T denom, const divider<T, ALGO> &the_divider) {
98         // Don't crash with INT_MIN / -1
99         // INT_MIN / -1 is undefined behavior in C/C++
100         if (limits::is_signed && numer == (limits::min)() && denom == T(-1)) {
101             return;
102         }
103 
104         T expect = numer / denom;
105         T result = numer / the_divider;
106 
107         if (result != expect) {
108             PRINT_ERROR(F("Failure for "));
109             PRINT_ERROR(testcase_name(ALGO));
110             PRINT_ERROR(F(": "));
111             PRINT_ERROR(numer);
112             PRINT_ERROR(F(" / "));
113             PRINT_ERROR(denom);
114             PRINT_ERROR(F(" = "));
115             PRINT_ERROR(expect);
116             PRINT_ERROR(F(", but got "));
117             PRINT_ERROR(result);
118             PRINT_ERROR(F("\n"));
119             exit(1);
120         }
121     }
122 
123     template <typename VecType, Branching ALGO>
test_vec(const T * numers,size_t count,T denom,const divider<T,ALGO> & div)124     void test_vec(const T *numers, size_t count, T denom, const divider<T, ALGO> &div) {
125         size_t size = sizeof(VecType) / sizeof(T);
126         size_t iters = (sizeof(T)*count)/sizeof(VecType);
127 
128         for (size_t j = 0; j < iters; j++, numers += size) {
129             VecType x = *((const VecType *)numers);
130             VecType resultVector = x / div;
131             T *results = (T *)&resultVector;
132 
133             for (size_t i = 0; i < size; i++) {
134                 T numer = numers[i];
135                 T result = results[i];
136                 T expect = numer / denom;
137 
138                 if (result != expect) {
139                     PRINT_ERROR(F("Vector failure for: "));
140                     PRINT_ERROR(testcase_name(ALGO));
141                     PRINT_ERROR(F(": "));
142                     PRINT_ERROR(numer);
143                     PRINT_ERROR(F(" / "));
144                     PRINT_ERROR(denom);
145                     PRINT_ERROR(F(" = "));
146                     PRINT_ERROR(expect );
147                     PRINT_ERROR(F(", but got "));
148                     PRINT_ERROR(result);
149                     PRINT_ERROR(F("\n"));
150                     exit(1);
151                 } else {
152     #if 0
153                         std::cout << "vec" << (CHAR_BIT * sizeof(VecType)) << " success for: " << numer << " / " << denom << " = " << result << std::endl;
154     #endif
155                 }
156             }
157         }
158     }
159 
160     // random_count * sizeof(T) must be >= size of largest
161     // vector type. So figure that out at compile time.
162     union vector_size_u {
163         T s1;
164 #ifdef LIBDIVIDE_SSE2
165         __m128i s2;
166 #endif
167 #ifdef LIBDIVIDE_AVX2
168         __m256i s3;
169 #endif
170 #ifdef LIBDIVIDE_AVX512
171         __m512i s4;
172 #endif
173 #ifdef LIBDIVIDE_NEON
174         typename NeonVecFor<T>::type s5;
175 #endif
176     };
177     static const size_t min_vector_count = sizeof(union vector_size_u)/sizeof(T);
178 
179     static constexpr T min = (std::numeric_limits<T>::min)();
180     static constexpr T max = (std::numeric_limits<T>::max)();
181     static constexpr T edgeCases[] = {
182         0,					(T)(1),				(T)(2),					(T)(3),				(T)(4),				(T)(5),
183         (T)(6),				(T)(7),				(T)(8),					(T)(9),				(T)(10),			(T)(11),
184         (T)(12),			(T)(13),			(T)(14),				(T)(15),			(T)(16),			(T)(17),
185         (T)(18),			(T)(19),			(T)(20),				(T)(21),			(T)(22),			(T)(23),
186         (T)(24),			(T)(25),			(T)(26),				(T)(27),			(T)(28),			(T)(29),
187         (T)(30),			(T)(31),			(T)(32),				(T)(33),			(T)(34),			(T)(35),
188         (T)(36),			(T)(37),			(T)(38),				(T)(39),			(T)(40),			(T)(41),
189         (T)(42),			(T)(43),			(T)(44),				(T)(45),			(T)(46),			(T)(47),
190         (T)(48),			(T)(49),			(T)(123),				(T)(1232),			(T)(36847),			(T)(506838),
191         (T)(3000003),		(T)(70000007),
192 
193         (T)(max),			(T)(max - 1),		(T)(max - 2),			(T)(max - 3),		(T)(max - 4),		(T)(max - 5),
194         (T)(max - 3213),	(T)(max - 2453242),	(T)(max - 432234231),
195 
196         (T)(min),			(T)(min + 1),		(T)(min + 2),			(T)(min + 3),		(T)(min + 4),		(T)(min + 5),
197         (T)(min + 3213),	(T)(min + 2453242),	(T)(min + 432234231),
198 
199         (T)(max / 2),		(T)(max / 2 + 1),	(T)(max / 2 - 1),		(T)(max / 3),		(T)(max / 3 + 1),	(T)(max / 3 - 1),
200         (T)(max / 4),		(T)(max / 4 + 1),	(T)(max / 4 - 1),
201 
202         (T)(min / 2),		(T)(min / 2 + 1),	(T)(min / 2 - 1),		(T)(min / 3),		(T)(min / 3 + 1),	(T)(min / 3 - 1),
203         (T)(min / 4),		(T)(min / 4 + 1),	(T)(min / 4 - 1)
204     };
205 
206     template <Branching ALGO>
test_edgecase_numerators(T denom,const divider<T,ALGO> & the_divider)207     void test_edgecase_numerators(T denom, const divider<T, ALGO> &the_divider) {
208         for (auto numerator : edgeCases) {
209             test_one((T)numerator, denom, the_divider);
210         }
211     }
212 
213     template <Branching ALGO>
test_small_numerators(T denom,const divider<T,ALGO> & the_divider)214     void test_small_numerators(T denom, const divider<T, ALGO> &the_divider) {
215         // test small numerators < 2^16
216         // balance signed & unsigned testing
217 #if defined(__AVR__)
218         int32_t small_stop = (limits::is_signed) ? (int32_t)1 << 7 : (uint32_t)1 << 8;
219 #else
220         int32_t small_stop = (limits::is_signed) ? (int32_t)1 << 14 : (uint32_t)1 << 16;
221 #endif
222         for (int32_t i = 0; i < small_stop; ++i) {
223             test_one((T)i, denom, the_divider);
224 
225             if (limits::is_signed) {
226                 test_one((T)-i, denom, the_divider);
227             }
228         }
229     }
230 
231     template <Branching ALGO>
test_pow2_numerators(T denom,const divider<T,ALGO> & the_divider)232     void test_pow2_numerators(T denom, const divider<T, ALGO> &the_divider) {
233        // test power of 2 numerators: 2^i-1, 2^i, 2^i+1
234         for (int i = 1; i < limits::digits; i++) {
235             for (int j = -1; j <= 1; j++) {
236                 T numerator = static_cast<T>((static_cast<T>(1) << i) + j);
237                 test_one(numerator, denom, the_divider);
238 
239                 if (limits::is_signed) {
240                     test_one(-numerator, denom, the_divider);
241                 }
242             }
243         }
244     }
245 
246     template <Branching ALGO>
test_allbits_numerators(T denom,const divider<T,ALGO> & the_divider)247     void test_allbits_numerators(T denom, const divider<T, ALGO> &the_divider) {
248         // test all bits set:
249         // 11111111, 11111110, 11111100, ...
250         for (UT bits = (std::numeric_limits<UT>::max)(); bits != 0; bits <<= 1) {
251             test_one((T)bits, denom, the_divider);
252         }
253     }
254 
255     template <Branching ALGO>
test_random_numerators(T denom,const divider<T,ALGO> & the_divider)256     void test_random_numerators(T denom, const divider<T, ALGO> &the_divider) {
257         for (size_t i = 0; i < 10000; ++i) {
258             for (size_t j = 0; j < min_vector_count; j++) {
259                 test_one(get_random(), denom, the_divider);
260             }
261         }
262     }
263 
264     template <Branching ALGO>
test_vectordivide_numerators(T denom,const divider<T,ALGO> & the_divider)265     void test_vectordivide_numerators(T denom, const divider<T, ALGO> &the_divider) {
266 #if defined(VECTOR_TESTS)
267         // Align memory to 64 byte boundary for AVX512
268         char mem[min_vector_count * sizeof(T) + 64];
269         size_t offset = 64 - (size_t)&mem % 64;
270         T *numers = (T *)&mem[offset];
271 
272         for (size_t i = 0; i < 10000; ++i) {
273             for (size_t j = 0; j < min_vector_count; j++) {
274                 numers[j] = get_random();
275             }
276 #ifdef LIBDIVIDE_SSE2
277             test_vec<__m128i>(numers, min_vector_count, denom, the_divider);
278 #endif
279 #ifdef LIBDIVIDE_AVX2
280             test_vec<__m256i>(numers, min_vector_count, denom, the_divider);
281 #endif
282 #ifdef LIBDIVIDE_AVX512
283             test_vec<__m512i>(numers, min_vector_count, denom, the_divider);
284 #endif
285 #ifdef LIBDIVIDE_NEON
286             test_vec<typename NeonVecFor<T>::type>(numers, min_vector_count, denom, the_divider);
287 #endif
288         }
289 #else
290         UNUSED(denom);
291         UNUSED(the_divider);
292 #endif
293     }
294 
295     template <Branching ALGO>
test_all_numerators(T denom,const divider<T,ALGO> & the_divider)296     void test_all_numerators(T denom, const divider<T, ALGO> &the_divider) {
297         for (T numerator=(min); numerator!=(max); ++numerator) {
298             test_one((T)numerator, denom, the_divider);
299         }
300     }
301 
302     template <Branching ALGO>
test_many(T denom)303     void test_many(T denom) {
304         // Don't try dividing by 1 with unsigned branchfree
305         if (ALGO == BRANCHFREE && !std::numeric_limits<T>::is_signed && denom == 1) {
306             return;
307         }
308 
309         const divider<T, ALGO> the_divider = divider<T, ALGO>(denom);
310         T recovered = the_divider.recover();
311         if (recovered != denom) {
312             PRINT_ERROR(F("Failed to recover divisor for "));
313             PRINT_ERROR(testcase_name(ALGO));
314             PRINT_ERROR(F(": "));
315             PRINT_ERROR(denom);
316             PRINT_ERROR(F(", but got "));
317             PRINT_ERROR(recovered);
318             PRINT_ERROR(F("\n"));
319             exit(1);
320         }
321 
322         test_edgecase_numerators(denom, the_divider);
323         test_small_numerators(denom, the_divider);
324         test_pow2_numerators(denom, the_divider);
325         test_allbits_numerators(denom, the_divider);
326 #if !defined(__AVR__)
327         test_random_numerators(denom, the_divider);
328         test_vectordivide_numerators(denom, the_divider);
329 #endif
330     }
331 
randomSeed()332     static uint32_t randomSeed() {
333 #if defined(__AVR__)
334         return (uint32_t)analogRead(A0);
335 #else
336         std::random_device randomDevice;
337         std::mt19937 randGen(randomDevice());
338         std::uniform_int_distribution<uint32_t> randDist(1, std::numeric_limits<uint32_t>::max());
339         return randDist(randGen);
340 #endif
341     }
342 
test_all_algorithms(T denom,set_t<T> & tested_denom)343     void test_all_algorithms(T denom, set_t<T> &tested_denom) {
344 #if !defined(__AVR__)
345         if(tested_denom.end()==tested_denom.find(denom)) {
346 #endif
347             PRINT_PROGRESS_MSG(F("Testing deom "));
348             PRINT_PROGRESS_MSG(denom);
349             PRINT_PROGRESS_MSG(F("\n"));
350             test_many<BRANCHFULL>(denom);
351             test_many<BRANCHFREE>(denom);
352 #if !defined(__AVR__)
353             tested_denom.insert(denom);
354         }
355 #else
356         UNUSED(tested_denom);
357 #endif
358     }
359 
test_both_signs(UT denom,set_t<T> & tested_denom)360     void test_both_signs(UT denom, set_t<T> &tested_denom) {
361         test_all_algorithms(denom, tested_denom);
362 
363         if (limits::is_signed) {
364             test_all_algorithms(-denom, tested_denom);
365         }
366     }
367 
368 public:
369 
DivideTest()370     DivideTest() {
371         seed = randomSeed();
372         rand_n = (UT)randomSeed();
373     }
374 
run()375     void run() {
376         set_t<T> tested_denom;
377 
378         // Test small values
379 #if defined(__AVR__)
380         UT primes[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173};
381         for (size_t index = 0; index < sizeof(primes)/sizeof(primes[0]); ++index) {
382             test_both_signs(primes[index], tested_denom);
383         }
384 #else
385         for (UT denom = 1; denom < 1024; ++denom) {
386             test_both_signs(denom, tested_denom);
387         }
388 #endif
389 
390         if (limits::is_signed) {
391             PRINT_PROGRESS_MSG(F("Testing minimum\n"));
392             test_all_algorithms((limits::min)(), tested_denom);
393         }
394 
395         PRINT_PROGRESS_MSG(F("Testing maximum\n"));
396         test_all_algorithms((limits::max)(), tested_denom);
397 
398         // test power of 2 denoms: 2^i-1, 2^i, 2^i+1
399         PRINT_PROGRESS_MSG(F("Testing powers of 2\n"));
400         for (int i = 1; i < limits::digits; i++) {
401             for (int j = -1; j <= 1; j++) {
402                 T denom = static_cast<UT>((static_cast<T>(1) << i) + j);
403 
404                 test_both_signs(denom, tested_denom);
405             }
406         }
407 
408         // test all bits set:
409         // 11111111, 11111110, 11111100, ...
410         PRINT_PROGRESS_MSG(F("Testing all bits set\n"));
411         // For signed types, this degenerates to negative powers of
412         // 2 (-1, -2, -4....): since we just tested those (above), skip.
413         if (!limits::is_signed) {
414             for (UT bits = (std::numeric_limits<UT>::max)(); bits != 0; bits <<= 1) {
415                 PRINT_PROGRESS_MSG((T)bits);
416                 PRINT_PROGRESS_MSG("\n");
417                 test_all_algorithms((T)bits, tested_denom);
418             }
419         }
420 
421         // Test random denominators
422 #if !defined(__AVR__)
423         PRINT_PROGRESS_MSG(F("Test random denominators\n"));
424         for (int i = 0; i < 10000; ++i) {
425             test_all_algorithms(random_denominator(), tested_denom);
426         }
427 #endif
428     }
429 };
430 
431 template<typename IntT>
432 constexpr IntT DivideTest<IntT>::edgeCases[];
433 
434 template <typename T>
run_test()435 void run_test() {
436     PRINT_INFO(F("Testing "));
437     PRINT_INFO(type_name<T>::get_name());
438     PRINT_INFO(F("\n"));
439     DivideTest<T> dt;
440     dt.run();
441 }
442 
443 #ifdef _MSC_VER
444 #pragma warning(pop)
445 #endif