1 #pragma once
2
3 #include "outputs.h"
4
5 #if defined(__AVR__)
6 #include <Arduino.h>
7 #include "avr_type_helpers.h"
8 typedef String string_class;
9 // AVR targets do not have enough memory to track which denominatores
10 // have been tested. So this is a dummy placeholder, just to allow
11 // common function signatures.
12 template <typename IntT>
13 using set_t = std::numeric_limits<IntT>;
14 #else
15 #include <limits>
16 #include <random>
17 #include <string>
18 typedef std::string string_class;
19 #include <set>
20 template <typename IntT>
21 using set_t = std::set<IntT>;
22 #endif
23
24 #if defined(_MSC_VER)
25 #pragma warning(disable : 4324)
26 #pragma warning(disable : 4310)
27 #pragma warning(disable : 4146)
28 #endif
29
30 #include "libdivide.h"
31 #include "type_mappings.h"
32
33 using namespace libdivide;
34
35 #define UNUSED(x) (void)(x)
36
37 #if defined(LIBDIVIDE_SSE2) && defined(LIBDIVIDE_AVX2) && defined(LIBDIVIDE_AVX512) && defined(LIBDIVIDE_NEON)
38 #define VECTOR_TESTS
39 #endif
40
41 #ifdef _MSC_VER
42 #pragma warning(push)
43 #pragma warning(disable: 4127) // disable "conditional expression is constant""
44 #endif
45
46 template <typename T>
47 class DivideTest {
48 private:
49 using UT = typename std::make_unsigned<T>::type;
50 using limits = std::numeric_limits<T>;
51 uint32_t seed = 0;
52 UT rand_n = 0;
53
54 // This random function slowly increases the random number
55 // until there is an integer overflow, if this happens
56 // the random number is reset to 0 and we restart at the
57 // beginning. We do this to ensure that we get many test
58 // cases (random integers) of varying bit length.
get_random()59 T get_random() {
60 // https://en.wikipedia.org/wiki/Linear_congruential_generator
61 seed = seed * 1664525 + 1013904223;
62
63 UT old = rand_n;
64 rand_n = rand_n * (seed % 2 + 1) + rand_n % 30000001 + 3;
65
66 // Reset upon integer overflow
67 if (rand_n < old) {
68 rand_n = seed % 19;
69 }
70
71 // The algorithm above generates mostly positive numbers.
72 // Hence convert 50% of all values to negative.
73 if (limits::is_signed) {
74 if (seed % 2) return -(T)rand_n;
75 }
76
77 return (T)rand_n;
78 }
79
random_denominator()80 T random_denominator() {
81 T denom = get_random();
82 while (denom == 0) {
83 denom = get_random();
84 }
85 return denom;
86 }
87
testcase_name(int algo)88 string_class testcase_name(int algo) const {
89 string_class result = type_tag<T>::get_tag();
90 if (algo == BRANCHFREE) {
91 result += " (branchfree)";
92 }
93 return result;
94 }
95
96 template <Branching ALGO>
test_one(T numer,T denom,const divider<T,ALGO> & the_divider)97 void test_one(T numer, T denom, const divider<T, ALGO> &the_divider) {
98 // Don't crash with INT_MIN / -1
99 // INT_MIN / -1 is undefined behavior in C/C++
100 if (limits::is_signed && numer == (limits::min)() && denom == T(-1)) {
101 return;
102 }
103
104 T expect = numer / denom;
105 T result = numer / the_divider;
106
107 if (result != expect) {
108 PRINT_ERROR(F("Failure for "));
109 PRINT_ERROR(testcase_name(ALGO));
110 PRINT_ERROR(F(": "));
111 PRINT_ERROR(numer);
112 PRINT_ERROR(F(" / "));
113 PRINT_ERROR(denom);
114 PRINT_ERROR(F(" = "));
115 PRINT_ERROR(expect);
116 PRINT_ERROR(F(", but got "));
117 PRINT_ERROR(result);
118 PRINT_ERROR(F("\n"));
119 exit(1);
120 }
121 }
122
123 template <typename VecType, Branching ALGO>
test_vec(const T * numers,size_t count,T denom,const divider<T,ALGO> & div)124 void test_vec(const T *numers, size_t count, T denom, const divider<T, ALGO> &div) {
125 size_t size = sizeof(VecType) / sizeof(T);
126 size_t iters = (sizeof(T)*count)/sizeof(VecType);
127
128 for (size_t j = 0; j < iters; j++, numers += size) {
129 VecType x = *((const VecType *)numers);
130 VecType resultVector = x / div;
131 T *results = (T *)&resultVector;
132
133 for (size_t i = 0; i < size; i++) {
134 T numer = numers[i];
135 T result = results[i];
136 T expect = numer / denom;
137
138 if (result != expect) {
139 PRINT_ERROR(F("Vector failure for: "));
140 PRINT_ERROR(testcase_name(ALGO));
141 PRINT_ERROR(F(": "));
142 PRINT_ERROR(numer);
143 PRINT_ERROR(F(" / "));
144 PRINT_ERROR(denom);
145 PRINT_ERROR(F(" = "));
146 PRINT_ERROR(expect );
147 PRINT_ERROR(F(", but got "));
148 PRINT_ERROR(result);
149 PRINT_ERROR(F("\n"));
150 exit(1);
151 } else {
152 #if 0
153 std::cout << "vec" << (CHAR_BIT * sizeof(VecType)) << " success for: " << numer << " / " << denom << " = " << result << std::endl;
154 #endif
155 }
156 }
157 }
158 }
159
160 // random_count * sizeof(T) must be >= size of largest
161 // vector type. So figure that out at compile time.
162 union vector_size_u {
163 T s1;
164 #ifdef LIBDIVIDE_SSE2
165 __m128i s2;
166 #endif
167 #ifdef LIBDIVIDE_AVX2
168 __m256i s3;
169 #endif
170 #ifdef LIBDIVIDE_AVX512
171 __m512i s4;
172 #endif
173 #ifdef LIBDIVIDE_NEON
174 typename NeonVecFor<T>::type s5;
175 #endif
176 };
177 static const size_t min_vector_count = sizeof(union vector_size_u)/sizeof(T);
178
179 static constexpr T min = (std::numeric_limits<T>::min)();
180 static constexpr T max = (std::numeric_limits<T>::max)();
181 static constexpr T edgeCases[] = {
182 0, (T)(1), (T)(2), (T)(3), (T)(4), (T)(5),
183 (T)(6), (T)(7), (T)(8), (T)(9), (T)(10), (T)(11),
184 (T)(12), (T)(13), (T)(14), (T)(15), (T)(16), (T)(17),
185 (T)(18), (T)(19), (T)(20), (T)(21), (T)(22), (T)(23),
186 (T)(24), (T)(25), (T)(26), (T)(27), (T)(28), (T)(29),
187 (T)(30), (T)(31), (T)(32), (T)(33), (T)(34), (T)(35),
188 (T)(36), (T)(37), (T)(38), (T)(39), (T)(40), (T)(41),
189 (T)(42), (T)(43), (T)(44), (T)(45), (T)(46), (T)(47),
190 (T)(48), (T)(49), (T)(123), (T)(1232), (T)(36847), (T)(506838),
191 (T)(3000003), (T)(70000007),
192
193 (T)(max), (T)(max - 1), (T)(max - 2), (T)(max - 3), (T)(max - 4), (T)(max - 5),
194 (T)(max - 3213), (T)(max - 2453242), (T)(max - 432234231),
195
196 (T)(min), (T)(min + 1), (T)(min + 2), (T)(min + 3), (T)(min + 4), (T)(min + 5),
197 (T)(min + 3213), (T)(min + 2453242), (T)(min + 432234231),
198
199 (T)(max / 2), (T)(max / 2 + 1), (T)(max / 2 - 1), (T)(max / 3), (T)(max / 3 + 1), (T)(max / 3 - 1),
200 (T)(max / 4), (T)(max / 4 + 1), (T)(max / 4 - 1),
201
202 (T)(min / 2), (T)(min / 2 + 1), (T)(min / 2 - 1), (T)(min / 3), (T)(min / 3 + 1), (T)(min / 3 - 1),
203 (T)(min / 4), (T)(min / 4 + 1), (T)(min / 4 - 1)
204 };
205
206 template <Branching ALGO>
test_edgecase_numerators(T denom,const divider<T,ALGO> & the_divider)207 void test_edgecase_numerators(T denom, const divider<T, ALGO> &the_divider) {
208 for (auto numerator : edgeCases) {
209 test_one((T)numerator, denom, the_divider);
210 }
211 }
212
213 template <Branching ALGO>
test_small_numerators(T denom,const divider<T,ALGO> & the_divider)214 void test_small_numerators(T denom, const divider<T, ALGO> &the_divider) {
215 // test small numerators < 2^16
216 // balance signed & unsigned testing
217 #if defined(__AVR__)
218 int32_t small_stop = (limits::is_signed) ? (int32_t)1 << 7 : (uint32_t)1 << 8;
219 #else
220 int32_t small_stop = (limits::is_signed) ? (int32_t)1 << 14 : (uint32_t)1 << 16;
221 #endif
222 for (int32_t i = 0; i < small_stop; ++i) {
223 test_one((T)i, denom, the_divider);
224
225 if (limits::is_signed) {
226 test_one((T)-i, denom, the_divider);
227 }
228 }
229 }
230
231 template <Branching ALGO>
test_pow2_numerators(T denom,const divider<T,ALGO> & the_divider)232 void test_pow2_numerators(T denom, const divider<T, ALGO> &the_divider) {
233 // test power of 2 numerators: 2^i-1, 2^i, 2^i+1
234 for (int i = 1; i < limits::digits; i++) {
235 for (int j = -1; j <= 1; j++) {
236 T numerator = static_cast<T>((static_cast<T>(1) << i) + j);
237 test_one(numerator, denom, the_divider);
238
239 if (limits::is_signed) {
240 test_one(-numerator, denom, the_divider);
241 }
242 }
243 }
244 }
245
246 template <Branching ALGO>
test_allbits_numerators(T denom,const divider<T,ALGO> & the_divider)247 void test_allbits_numerators(T denom, const divider<T, ALGO> &the_divider) {
248 // test all bits set:
249 // 11111111, 11111110, 11111100, ...
250 for (UT bits = (std::numeric_limits<UT>::max)(); bits != 0; bits <<= 1) {
251 test_one((T)bits, denom, the_divider);
252 }
253 }
254
255 template <Branching ALGO>
test_random_numerators(T denom,const divider<T,ALGO> & the_divider)256 void test_random_numerators(T denom, const divider<T, ALGO> &the_divider) {
257 for (size_t i = 0; i < 10000; ++i) {
258 for (size_t j = 0; j < min_vector_count; j++) {
259 test_one(get_random(), denom, the_divider);
260 }
261 }
262 }
263
264 template <Branching ALGO>
test_vectordivide_numerators(T denom,const divider<T,ALGO> & the_divider)265 void test_vectordivide_numerators(T denom, const divider<T, ALGO> &the_divider) {
266 #if defined(VECTOR_TESTS)
267 // Align memory to 64 byte boundary for AVX512
268 char mem[min_vector_count * sizeof(T) + 64];
269 size_t offset = 64 - (size_t)&mem % 64;
270 T *numers = (T *)&mem[offset];
271
272 for (size_t i = 0; i < 10000; ++i) {
273 for (size_t j = 0; j < min_vector_count; j++) {
274 numers[j] = get_random();
275 }
276 #ifdef LIBDIVIDE_SSE2
277 test_vec<__m128i>(numers, min_vector_count, denom, the_divider);
278 #endif
279 #ifdef LIBDIVIDE_AVX2
280 test_vec<__m256i>(numers, min_vector_count, denom, the_divider);
281 #endif
282 #ifdef LIBDIVIDE_AVX512
283 test_vec<__m512i>(numers, min_vector_count, denom, the_divider);
284 #endif
285 #ifdef LIBDIVIDE_NEON
286 test_vec<typename NeonVecFor<T>::type>(numers, min_vector_count, denom, the_divider);
287 #endif
288 }
289 #else
290 UNUSED(denom);
291 UNUSED(the_divider);
292 #endif
293 }
294
295 template <Branching ALGO>
test_all_numerators(T denom,const divider<T,ALGO> & the_divider)296 void test_all_numerators(T denom, const divider<T, ALGO> &the_divider) {
297 for (T numerator=(min); numerator!=(max); ++numerator) {
298 test_one((T)numerator, denom, the_divider);
299 }
300 }
301
302 template <Branching ALGO>
test_many(T denom)303 void test_many(T denom) {
304 // Don't try dividing by 1 with unsigned branchfree
305 if (ALGO == BRANCHFREE && !std::numeric_limits<T>::is_signed && denom == 1) {
306 return;
307 }
308
309 const divider<T, ALGO> the_divider = divider<T, ALGO>(denom);
310 T recovered = the_divider.recover();
311 if (recovered != denom) {
312 PRINT_ERROR(F("Failed to recover divisor for "));
313 PRINT_ERROR(testcase_name(ALGO));
314 PRINT_ERROR(F(": "));
315 PRINT_ERROR(denom);
316 PRINT_ERROR(F(", but got "));
317 PRINT_ERROR(recovered);
318 PRINT_ERROR(F("\n"));
319 exit(1);
320 }
321
322 test_edgecase_numerators(denom, the_divider);
323 test_small_numerators(denom, the_divider);
324 test_pow2_numerators(denom, the_divider);
325 test_allbits_numerators(denom, the_divider);
326 #if !defined(__AVR__)
327 test_random_numerators(denom, the_divider);
328 test_vectordivide_numerators(denom, the_divider);
329 #endif
330 }
331
randomSeed()332 static uint32_t randomSeed() {
333 #if defined(__AVR__)
334 return (uint32_t)analogRead(A0);
335 #else
336 std::random_device randomDevice;
337 std::mt19937 randGen(randomDevice());
338 std::uniform_int_distribution<uint32_t> randDist(1, std::numeric_limits<uint32_t>::max());
339 return randDist(randGen);
340 #endif
341 }
342
test_all_algorithms(T denom,set_t<T> & tested_denom)343 void test_all_algorithms(T denom, set_t<T> &tested_denom) {
344 #if !defined(__AVR__)
345 if(tested_denom.end()==tested_denom.find(denom)) {
346 #endif
347 PRINT_PROGRESS_MSG(F("Testing deom "));
348 PRINT_PROGRESS_MSG(denom);
349 PRINT_PROGRESS_MSG(F("\n"));
350 test_many<BRANCHFULL>(denom);
351 test_many<BRANCHFREE>(denom);
352 #if !defined(__AVR__)
353 tested_denom.insert(denom);
354 }
355 #else
356 UNUSED(tested_denom);
357 #endif
358 }
359
test_both_signs(UT denom,set_t<T> & tested_denom)360 void test_both_signs(UT denom, set_t<T> &tested_denom) {
361 test_all_algorithms(denom, tested_denom);
362
363 if (limits::is_signed) {
364 test_all_algorithms(-denom, tested_denom);
365 }
366 }
367
368 public:
369
DivideTest()370 DivideTest() {
371 seed = randomSeed();
372 rand_n = (UT)randomSeed();
373 }
374
run()375 void run() {
376 set_t<T> tested_denom;
377
378 // Test small values
379 #if defined(__AVR__)
380 UT primes[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173};
381 for (size_t index = 0; index < sizeof(primes)/sizeof(primes[0]); ++index) {
382 test_both_signs(primes[index], tested_denom);
383 }
384 #else
385 for (UT denom = 1; denom < 1024; ++denom) {
386 test_both_signs(denom, tested_denom);
387 }
388 #endif
389
390 if (limits::is_signed) {
391 PRINT_PROGRESS_MSG(F("Testing minimum\n"));
392 test_all_algorithms((limits::min)(), tested_denom);
393 }
394
395 PRINT_PROGRESS_MSG(F("Testing maximum\n"));
396 test_all_algorithms((limits::max)(), tested_denom);
397
398 // test power of 2 denoms: 2^i-1, 2^i, 2^i+1
399 PRINT_PROGRESS_MSG(F("Testing powers of 2\n"));
400 for (int i = 1; i < limits::digits; i++) {
401 for (int j = -1; j <= 1; j++) {
402 T denom = static_cast<UT>((static_cast<T>(1) << i) + j);
403
404 test_both_signs(denom, tested_denom);
405 }
406 }
407
408 // test all bits set:
409 // 11111111, 11111110, 11111100, ...
410 PRINT_PROGRESS_MSG(F("Testing all bits set\n"));
411 // For signed types, this degenerates to negative powers of
412 // 2 (-1, -2, -4....): since we just tested those (above), skip.
413 if (!limits::is_signed) {
414 for (UT bits = (std::numeric_limits<UT>::max)(); bits != 0; bits <<= 1) {
415 PRINT_PROGRESS_MSG((T)bits);
416 PRINT_PROGRESS_MSG("\n");
417 test_all_algorithms((T)bits, tested_denom);
418 }
419 }
420
421 // Test random denominators
422 #if !defined(__AVR__)
423 PRINT_PROGRESS_MSG(F("Test random denominators\n"));
424 for (int i = 0; i < 10000; ++i) {
425 test_all_algorithms(random_denominator(), tested_denom);
426 }
427 #endif
428 }
429 };
430
431 template<typename IntT>
432 constexpr IntT DivideTest<IntT>::edgeCases[];
433
434 template <typename T>
run_test()435 void run_test() {
436 PRINT_INFO(F("Testing "));
437 PRINT_INFO(type_name<T>::get_name());
438 PRINT_INFO(F("\n"));
439 DivideTest<T> dt;
440 dt.run();
441 }
442
443 #ifdef _MSC_VER
444 #pragma warning(pop)
445 #endif