1 /*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #include "tests/checkasm/checkasm.h"
28
29 #include <math.h>
30 #include <stdarg.h>
31 #include <stdio.h>
32 #include <string.h>
33
34 #include "src/cpu.h"
35
36 #ifdef _WIN32
37 #include <windows.h>
38 #define COLOR_RED FOREGROUND_RED
39 #define COLOR_GREEN FOREGROUND_GREEN
40 #define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
41
get_seed(void)42 static unsigned get_seed(void) {
43 return GetTickCount();
44 }
45 #else
46 #include <unistd.h>
47 #include <signal.h>
48 #include <time.h>
49 #ifdef __APPLE__
50 #include <mach/mach_time.h>
51 #endif
52 #define COLOR_RED 1
53 #define COLOR_GREEN 2
54 #define COLOR_YELLOW 3
55
get_seed(void)56 static unsigned get_seed(void) {
57 #ifdef __APPLE__
58 return (unsigned) mach_absolute_time();
59 #elif defined(HAVE_CLOCK_GETTIME)
60 struct timespec ts;
61 clock_gettime(CLOCK_MONOTONIC, &ts);
62 return (unsigned) (1000000000ULL * ts.tv_sec + ts.tv_nsec);
63 #endif
64 }
65 #endif
66
67 /* List of tests to invoke */
68 static const struct {
69 const char *name;
70 void (*func)(void);
71 } tests[] = {
72 { "msac", checkasm_check_msac },
73 #if CONFIG_8BPC
74 { "cdef_8bpc", checkasm_check_cdef_8bpc },
75 { "filmgrain_8bpc", checkasm_check_filmgrain_8bpc },
76 { "ipred_8bpc", checkasm_check_ipred_8bpc },
77 { "itx_8bpc", checkasm_check_itx_8bpc },
78 { "loopfilter_8bpc", checkasm_check_loopfilter_8bpc },
79 { "looprestoration_8bpc", checkasm_check_looprestoration_8bpc },
80 { "mc_8bpc", checkasm_check_mc_8bpc },
81 #endif
82 #if CONFIG_16BPC
83 { "cdef_16bpc", checkasm_check_cdef_16bpc },
84 { "filmgrain_16bpc", checkasm_check_filmgrain_16bpc },
85 { "ipred_16bpc", checkasm_check_ipred_16bpc },
86 { "itx_16bpc", checkasm_check_itx_16bpc },
87 { "loopfilter_16bpc", checkasm_check_loopfilter_16bpc },
88 { "looprestoration_16bpc", checkasm_check_looprestoration_16bpc },
89 { "mc_16bpc", checkasm_check_mc_16bpc },
90 #endif
91 { 0 }
92 };
93
94 /* List of cpu flags to check */
95 static const struct {
96 const char *name;
97 const char *suffix;
98 unsigned flag;
99 } cpus[] = {
100 #if ARCH_X86
101 { "SSE2", "sse2", DAV1D_X86_CPU_FLAG_SSE2 },
102 { "SSSE3", "ssse3", DAV1D_X86_CPU_FLAG_SSSE3 },
103 { "SSE4.1", "sse4", DAV1D_X86_CPU_FLAG_SSE41 },
104 { "AVX2", "avx2", DAV1D_X86_CPU_FLAG_AVX2 },
105 { "AVX-512 (Ice Lake)", "avx512icl", DAV1D_X86_CPU_FLAG_AVX512ICL },
106 #elif ARCH_AARCH64 || ARCH_ARM
107 { "NEON", "neon", DAV1D_ARM_CPU_FLAG_NEON },
108 #elif ARCH_PPC64LE
109 { "VSX", "vsx", DAV1D_PPC_CPU_FLAG_VSX },
110 #endif
111 { 0 }
112 };
113
114 typedef struct CheckasmFuncVersion {
115 struct CheckasmFuncVersion *next;
116 void *func;
117 int ok;
118 unsigned cpu;
119 int iterations;
120 uint64_t cycles;
121 } CheckasmFuncVersion;
122
123 /* Binary search tree node */
124 typedef struct CheckasmFunc {
125 struct CheckasmFunc *child[2];
126 CheckasmFuncVersion versions;
127 uint8_t color; /* 0 = red, 1 = black */
128 char name[];
129 } CheckasmFunc;
130
131 /* Internal state */
132 static struct {
133 CheckasmFunc *funcs;
134 CheckasmFunc *current_func;
135 CheckasmFuncVersion *current_func_ver;
136 const char *current_test_name;
137 const char *bench_pattern;
138 size_t bench_pattern_len;
139 int num_checked;
140 int num_failed;
141 int nop_time;
142 unsigned cpu_flag;
143 const char *cpu_flag_name;
144 const char *test_name;
145 unsigned seed;
146 int bench_c;
147 int verbose;
148 int function_listing;
149 #if ARCH_X86_64
150 void (*simd_warmup)(void);
151 #endif
152 } state;
153
154 /* float compare support code */
155 typedef union {
156 float f;
157 uint32_t i;
158 } intfloat;
159
160 static uint32_t xs_state[4];
161
xor128_srand(unsigned seed)162 static void xor128_srand(unsigned seed) {
163 xs_state[0] = seed;
164 xs_state[1] = ( seed & 0xffff0000) | (~seed & 0x0000ffff);
165 xs_state[2] = (~seed & 0xffff0000) | ( seed & 0x0000ffff);
166 xs_state[3] = ~seed;
167 }
168
169 // xor128 from Marsaglia, George (July 2003). "Xorshift RNGs".
170 // Journal of Statistical Software. 8 (14).
171 // doi:10.18637/jss.v008.i14.
xor128_rand(void)172 int xor128_rand(void) {
173 const uint32_t x = xs_state[0];
174 const uint32_t t = x ^ (x << 11);
175
176 xs_state[0] = xs_state[1];
177 xs_state[1] = xs_state[2];
178 xs_state[2] = xs_state[3];
179 uint32_t w = xs_state[3];
180
181 w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
182 xs_state[3] = w;
183
184 return w >> 1;
185 }
186
is_negative(const intfloat u)187 static int is_negative(const intfloat u) {
188 return u.i >> 31;
189 }
190
float_near_ulp(const float a,const float b,const unsigned max_ulp)191 int float_near_ulp(const float a, const float b, const unsigned max_ulp) {
192 intfloat x, y;
193
194 x.f = a;
195 y.f = b;
196
197 if (is_negative(x) != is_negative(y)) {
198 // handle -0.0 == +0.0
199 return a == b;
200 }
201
202 if (llabs((int64_t)x.i - y.i) <= max_ulp)
203 return 1;
204
205 return 0;
206 }
207
float_near_ulp_array(const float * const a,const float * const b,const unsigned max_ulp,const int len)208 int float_near_ulp_array(const float *const a, const float *const b,
209 const unsigned max_ulp, const int len)
210 {
211 for (int i = 0; i < len; i++)
212 if (!float_near_ulp(a[i], b[i], max_ulp))
213 return 0;
214
215 return 1;
216 }
217
float_near_abs_eps(const float a,const float b,const float eps)218 int float_near_abs_eps(const float a, const float b, const float eps) {
219 return fabsf(a - b) < eps;
220 }
221
float_near_abs_eps_array(const float * const a,const float * const b,const float eps,const int len)222 int float_near_abs_eps_array(const float *const a, const float *const b,
223 const float eps, const int len)
224 {
225 for (int i = 0; i < len; i++)
226 if (!float_near_abs_eps(a[i], b[i], eps))
227 return 0;
228
229 return 1;
230 }
231
float_near_abs_eps_ulp(const float a,const float b,const float eps,const unsigned max_ulp)232 int float_near_abs_eps_ulp(const float a, const float b, const float eps,
233 const unsigned max_ulp)
234 {
235 return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps);
236 }
237
float_near_abs_eps_array_ulp(const float * const a,const float * const b,const float eps,const unsigned max_ulp,const int len)238 int float_near_abs_eps_array_ulp(const float *const a, const float *const b,
239 const float eps, const unsigned max_ulp,
240 const int len)
241 {
242 for (int i = 0; i < len; i++)
243 if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp))
244 return 0;
245
246 return 1;
247 }
248
249 /* Print colored text to stderr if the terminal supports it */
color_printf(const int color,const char * const fmt,...)250 static void color_printf(const int color, const char *const fmt, ...) {
251 static int8_t use_color = -1;
252 va_list arg;
253
254 #ifdef _WIN32
255 static HANDLE con;
256 static WORD org_attributes;
257
258 if (use_color < 0) {
259 CONSOLE_SCREEN_BUFFER_INFO con_info;
260 con = GetStdHandle(STD_ERROR_HANDLE);
261 if (con && con != INVALID_HANDLE_VALUE &&
262 GetConsoleScreenBufferInfo(con, &con_info))
263 {
264 org_attributes = con_info.wAttributes;
265 use_color = 1;
266 } else
267 use_color = 0;
268 }
269 if (use_color)
270 SetConsoleTextAttribute(con, (org_attributes & 0xfff0) |
271 (color & 0x0f));
272 #else
273 if (use_color < 0) {
274 const char *const term = getenv("TERM");
275 use_color = term && strcmp(term, "dumb") && isatty(2);
276 }
277 if (use_color)
278 fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
279 #endif
280
281 va_start(arg, fmt);
282 vfprintf(stderr, fmt, arg);
283 va_end(arg);
284
285 if (use_color) {
286 #ifdef _WIN32
287 SetConsoleTextAttribute(con, org_attributes);
288 #else
289 fprintf(stderr, "\x1b[0m");
290 #endif
291 }
292 }
293
294 /* Deallocate a tree */
destroy_func_tree(CheckasmFunc * const f)295 static void destroy_func_tree(CheckasmFunc *const f) {
296 if (f) {
297 CheckasmFuncVersion *v = f->versions.next;
298 while (v) {
299 CheckasmFuncVersion *next = v->next;
300 free(v);
301 v = next;
302 }
303
304 destroy_func_tree(f->child[0]);
305 destroy_func_tree(f->child[1]);
306 free(f);
307 }
308 }
309
310 /* Allocate a zero-initialized block, clean up and exit on failure */
checkasm_malloc(const size_t size)311 static void *checkasm_malloc(const size_t size) {
312 void *const ptr = calloc(1, size);
313 if (!ptr) {
314 fprintf(stderr, "checkasm: malloc failed\n");
315 destroy_func_tree(state.funcs);
316 exit(1);
317 }
318 return ptr;
319 }
320
321 /* Get the suffix of the specified cpu flag */
cpu_suffix(const unsigned cpu)322 static const char *cpu_suffix(const unsigned cpu) {
323 for (int i = (int)(sizeof(cpus) / sizeof(*cpus)) - 2; i >= 0; i--)
324 if (cpu & cpus[i].flag)
325 return cpus[i].suffix;
326
327 return "c";
328 }
329
330 #ifdef readtime
cmp_nop(const void * a,const void * b)331 static int cmp_nop(const void *a, const void *b) {
332 return *(const uint16_t*)a - *(const uint16_t*)b;
333 }
334
335 /* Measure the overhead of the timing code (in decicycles) */
measure_nop_time(void)336 static int measure_nop_time(void) {
337 uint16_t nops[10000];
338 int nop_sum = 0;
339
340 for (int i = 0; i < 10000; i++) {
341 uint64_t t = readtime();
342 nops[i] = (uint16_t) (readtime() - t);
343 }
344
345 qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
346 for (int i = 2500; i < 7500; i++)
347 nop_sum += nops[i];
348
349 return nop_sum / 500;
350 }
351
352 /* Print benchmark results */
print_benchs(const CheckasmFunc * const f)353 static void print_benchs(const CheckasmFunc *const f) {
354 if (f) {
355 print_benchs(f->child[0]);
356
357 /* Only print functions with at least one assembly version */
358 if (state.bench_c || f->versions.cpu || f->versions.next) {
359 const CheckasmFuncVersion *v = &f->versions;
360 do {
361 if (v->iterations) {
362 const int decicycles = (int) (10*v->cycles/v->iterations -
363 state.nop_time) / 4;
364 printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu),
365 decicycles/10, decicycles%10);
366 }
367 } while ((v = v->next));
368 }
369
370 print_benchs(f->child[1]);
371 }
372 }
373 #endif
374
print_functions(const CheckasmFunc * const f)375 static void print_functions(const CheckasmFunc *const f) {
376 if (f) {
377 print_functions(f->child[0]);
378 printf("%s\n", f->name);
379 print_functions(f->child[1]);
380 }
381 }
382
383 #define is_digit(x) ((x) >= '0' && (x) <= '9')
384
385 /* ASCIIbetical sort except preserving natural order for numbers */
cmp_func_names(const char * a,const char * b)386 static int cmp_func_names(const char *a, const char *b) {
387 const char *const start = a;
388 int ascii_diff, digit_diff;
389
390 for (; !(ascii_diff = *(const unsigned char*)a -
391 *(const unsigned char*)b) && *a; a++, b++);
392 for (; is_digit(*a) && is_digit(*b); a++, b++);
393
394 if (a > start && is_digit(a[-1]) &&
395 (digit_diff = is_digit(*a) - is_digit(*b)))
396 {
397 return digit_diff;
398 }
399
400 return ascii_diff;
401 }
402
403 /* Perform a tree rotation in the specified direction and return the new root */
rotate_tree(CheckasmFunc * const f,const int dir)404 static CheckasmFunc *rotate_tree(CheckasmFunc *const f, const int dir) {
405 CheckasmFunc *const r = f->child[dir^1];
406 f->child[dir^1] = r->child[dir];
407 r->child[dir] = f;
408 r->color = f->color;
409 f->color = 0;
410 return r;
411 }
412
413 #define is_red(f) ((f) && !(f)->color)
414
415 /* Balance a left-leaning red-black tree at the specified node */
balance_tree(CheckasmFunc ** const root)416 static void balance_tree(CheckasmFunc **const root) {
417 CheckasmFunc *const f = *root;
418
419 if (is_red(f->child[0]) && is_red(f->child[1])) {
420 f->color ^= 1;
421 f->child[0]->color = f->child[1]->color = 1;
422 }
423 else if (!is_red(f->child[0]) && is_red(f->child[1]))
424 *root = rotate_tree(f, 0); /* Rotate left */
425 else if (is_red(f->child[0]) && is_red(f->child[0]->child[0]))
426 *root = rotate_tree(f, 1); /* Rotate right */
427 }
428
429 /* Get a node with the specified name, creating it if it doesn't exist */
get_func(CheckasmFunc ** const root,const char * const name)430 static CheckasmFunc *get_func(CheckasmFunc **const root, const char *const name) {
431 CheckasmFunc *f = *root;
432
433 if (f) {
434 /* Search the tree for a matching node */
435 const int cmp = cmp_func_names(name, f->name);
436 if (cmp) {
437 f = get_func(&f->child[cmp > 0], name);
438
439 /* Rebalance the tree on the way up if a new node was inserted */
440 if (!f->versions.func)
441 balance_tree(root);
442 }
443 } else {
444 /* Allocate and insert a new node into the tree */
445 const size_t name_length = strlen(name) + 1;
446 f = *root = checkasm_malloc(offsetof(CheckasmFunc, name) + name_length);
447 memcpy(f->name, name, name_length);
448 }
449
450 return f;
451 }
452
453 checkasm_context checkasm_context_buf;
454
455 /* Crash handling: attempt to catch crashes and handle them
456 * gracefully instead of just aborting abruptly. */
457 #ifdef _WIN32
signal_handler(EXCEPTION_POINTERS * const e)458 static LONG NTAPI signal_handler(EXCEPTION_POINTERS *const e) {
459 switch (e->ExceptionRecord->ExceptionCode) {
460 case EXCEPTION_FLT_DIVIDE_BY_ZERO:
461 case EXCEPTION_INT_DIVIDE_BY_ZERO:
462 checkasm_fail_func("fatal arithmetic error");
463 break;
464 case EXCEPTION_ILLEGAL_INSTRUCTION:
465 case EXCEPTION_PRIV_INSTRUCTION:
466 checkasm_fail_func("illegal instruction");
467 break;
468 case EXCEPTION_ACCESS_VIOLATION:
469 case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
470 case EXCEPTION_DATATYPE_MISALIGNMENT:
471 case EXCEPTION_IN_PAGE_ERROR:
472 case EXCEPTION_STACK_OVERFLOW:
473 checkasm_fail_func("segmentation fault");
474 break;
475 default:
476 return EXCEPTION_CONTINUE_SEARCH;
477 }
478 checkasm_load_context();
479 return EXCEPTION_CONTINUE_EXECUTION; /* never reached, but shuts up gcc */
480 }
481 #else
signal_handler(const int s)482 static void signal_handler(const int s) {
483 checkasm_set_signal_handler_state(0);
484 checkasm_fail_func(s == SIGFPE ? "fatal arithmetic error" :
485 s == SIGILL ? "illegal instruction" :
486 "segmentation fault");
487 checkasm_load_context();
488 }
489 #endif
490
491 /* Perform tests and benchmarks for the specified
492 * cpu flag if supported by the host */
check_cpu_flag(const char * const name,unsigned flag)493 static void check_cpu_flag(const char *const name, unsigned flag) {
494 const unsigned old_cpu_flag = state.cpu_flag;
495
496 flag |= old_cpu_flag;
497 dav1d_set_cpu_flags_mask(flag);
498 state.cpu_flag = dav1d_get_cpu_flags();
499
500 if (!flag || state.cpu_flag != old_cpu_flag) {
501 state.cpu_flag_name = name;
502 for (int i = 0; tests[i].func; i++) {
503 if (state.test_name && strcmp(tests[i].name, state.test_name))
504 continue;
505 xor128_srand(state.seed);
506 state.current_test_name = tests[i].name;
507 tests[i].func();
508 }
509 }
510 }
511
512 /* Print the name of the current CPU flag, but only do it once */
print_cpu_name(void)513 static void print_cpu_name(void) {
514 if (state.cpu_flag_name) {
515 color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
516 state.cpu_flag_name = NULL;
517 }
518 }
519
main(int argc,char * argv[])520 int main(int argc, char *argv[]) {
521 state.seed = get_seed();
522
523 while (argc > 1) {
524 if (!strncmp(argv[1], "--help", 6)) {
525 fprintf(stdout,
526 "checkasm [options] <random seed>\n"
527 " <random seed> Numeric value to seed the rng\n"
528 "Options:\n"
529 " --test=<test_name> Test only <test_name>\n"
530 " --bench=<pattern> Test and benchmark the functions matching <pattern>\n"
531 " --list-functions List available functions\n"
532 " --list-tests List available tests\n"
533 " --bench-c Benchmark the C-only functions\n"
534 " --verbose -v Print failures verbosely\n");
535 return 0;
536 } else if (!strncmp(argv[1], "--bench-c", 9)) {
537 state.bench_c = 1;
538 } else if (!strncmp(argv[1], "--bench", 7)) {
539 #ifndef readtime
540 fprintf(stderr,
541 "checkasm: --bench is not supported on your system\n");
542 return 1;
543 #endif
544 if (argv[1][7] == '=') {
545 state.bench_pattern = argv[1] + 8;
546 state.bench_pattern_len = strlen(state.bench_pattern);
547 } else
548 state.bench_pattern = "";
549 } else if (!strncmp(argv[1], "--test=", 7)) {
550 state.test_name = argv[1] + 7;
551 } else if (!strcmp(argv[1], "--list-functions")) {
552 state.function_listing = 1;
553 } else if (!strcmp(argv[1], "--list-tests")) {
554 for (int i = 0; tests[i].name; i++)
555 printf("%s\n", tests[i].name);
556 return 0;
557 } else if (!strcmp(argv[1], "--verbose") || !strcmp(argv[1], "-v")) {
558 state.verbose = 1;
559 } else {
560 state.seed = (unsigned) strtoul(argv[1], NULL, 10);
561 }
562
563 argc--;
564 argv++;
565 }
566
567 dav1d_init_cpu();
568
569 #ifdef readtime
570 if (state.bench_pattern) {
571 static int testing = 0;
572 checkasm_save_context();
573 if (!testing) {
574 checkasm_set_signal_handler_state(1);
575 testing = 1;
576 readtime();
577 checkasm_set_signal_handler_state(0);
578 } else {
579 fprintf(stderr, "checkasm: unable to access cycle counter\n");
580 return 1;
581 }
582 }
583 #endif
584
585 int ret = 0;
586
587 if (!state.function_listing) {
588 fprintf(stderr, "checkasm: using random seed %u\n", state.seed);
589 #if ARCH_X86_64
590 void checkasm_warmup_avx2(void);
591 void checkasm_warmup_avx512(void);
592 const unsigned cpu_flags = dav1d_get_cpu_flags();
593 if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX512ICL)
594 state.simd_warmup = checkasm_warmup_avx512;
595 else if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX2)
596 state.simd_warmup = checkasm_warmup_avx2;
597 checkasm_simd_warmup();
598 #endif
599 }
600
601 check_cpu_flag(NULL, 0);
602
603 if (state.function_listing) {
604 print_functions(state.funcs);
605 } else {
606 for (int i = 0; cpus[i].flag; i++)
607 check_cpu_flag(cpus[i].name, cpus[i].flag);
608 if (!state.num_checked) {
609 fprintf(stderr, "checkasm: no tests to perform\n");
610 } else if (state.num_failed) {
611 fprintf(stderr, "checkasm: %d of %d tests have failed\n",
612 state.num_failed, state.num_checked);
613 ret = 1;
614 } else {
615 fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
616 #ifdef readtime
617 if (state.bench_pattern) {
618 state.nop_time = measure_nop_time();
619 printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
620 print_benchs(state.funcs);
621 }
622 #endif
623 }
624 }
625
626 destroy_func_tree(state.funcs);
627 return ret;
628 }
629
630 /* Decide whether or not the specified function needs to be tested and
631 * allocate/initialize data structures if needed. Returns a pointer to a
632 * reference function if the function should be tested, otherwise NULL */
checkasm_check_func(void * const func,const char * const name,...)633 void *checkasm_check_func(void *const func, const char *const name, ...) {
634 char name_buf[256];
635 va_list arg;
636
637 va_start(arg, name);
638 const int name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
639 va_end(arg);
640
641 if (!func || name_length <= 0 || (size_t)name_length >= sizeof(name_buf))
642 return NULL;
643
644 state.current_func = get_func(&state.funcs, name_buf);
645
646 if (state.function_listing) /* Save function names without running tests */
647 return NULL;
648
649 state.funcs->color = 1;
650 CheckasmFuncVersion *v = &state.current_func->versions;
651 void *ref = func;
652
653 if (v->func) {
654 CheckasmFuncVersion *prev;
655 do {
656 /* Only test functions that haven't already been tested */
657 if (v->func == func)
658 return NULL;
659
660 if (v->ok)
661 ref = v->func;
662
663 prev = v;
664 } while ((v = v->next));
665
666 v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
667 }
668
669 v->func = func;
670 v->ok = 1;
671 v->cpu = state.cpu_flag;
672 state.current_func_ver = v;
673 xor128_srand(state.seed);
674
675 if (state.cpu_flag || state.bench_c)
676 state.num_checked++;
677
678 return ref;
679 }
680
681 /* Decide whether or not the current function needs to be benchmarked */
checkasm_bench_func(void)682 int checkasm_bench_func(void) {
683 return !state.num_failed && state.bench_pattern &&
684 !strncmp(state.current_func->name, state.bench_pattern,
685 state.bench_pattern_len);
686 }
687
688 /* Indicate that the current test has failed, return whether verbose printing
689 * is requested. */
checkasm_fail_func(const char * const msg,...)690 int checkasm_fail_func(const char *const msg, ...) {
691 if (state.current_func_ver && state.current_func_ver->cpu &&
692 state.current_func_ver->ok)
693 {
694 va_list arg;
695
696 print_cpu_name();
697 fprintf(stderr, " %s_%s (", state.current_func->name,
698 cpu_suffix(state.current_func_ver->cpu));
699 va_start(arg, msg);
700 vfprintf(stderr, msg, arg);
701 va_end(arg);
702 fprintf(stderr, ")\n");
703
704 state.current_func_ver->ok = 0;
705 state.num_failed++;
706 }
707 return state.verbose;
708 }
709
710 /* Update benchmark results of the current function */
checkasm_update_bench(const int iterations,const uint64_t cycles)711 void checkasm_update_bench(const int iterations, const uint64_t cycles) {
712 state.current_func_ver->iterations += iterations;
713 state.current_func_ver->cycles += cycles;
714 }
715
716 /* Print the outcome of all tests performed since
717 * the last time this function was called */
checkasm_report(const char * const name,...)718 void checkasm_report(const char *const name, ...) {
719 static int prev_checked, prev_failed;
720 static size_t max_length;
721
722 if (state.num_checked > prev_checked) {
723 int pad_length = (int) max_length + 4;
724 va_list arg;
725
726 print_cpu_name();
727 pad_length -= fprintf(stderr, " - %s.", state.current_test_name);
728 va_start(arg, name);
729 pad_length -= vfprintf(stderr, name, arg);
730 va_end(arg);
731 fprintf(stderr, "%*c", imax(pad_length, 0) + 2, '[');
732
733 if (state.num_failed == prev_failed)
734 color_printf(COLOR_GREEN, "OK");
735 else
736 color_printf(COLOR_RED, "FAILED");
737 fprintf(stderr, "]\n");
738
739 prev_checked = state.num_checked;
740 prev_failed = state.num_failed;
741 } else if (!state.cpu_flag) {
742 /* Calculate the amount of padding required
743 * to make the output vertically aligned */
744 size_t length = strlen(state.current_test_name);
745 va_list arg;
746
747 va_start(arg, name);
748 length += vsnprintf(NULL, 0, name, arg);
749 va_end(arg);
750
751 if (length > max_length)
752 max_length = length;
753 }
754 }
755
checkasm_set_signal_handler_state(const int enabled)756 void checkasm_set_signal_handler_state(const int enabled) {
757 #ifdef _WIN32
758 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
759 if (enabled)
760 AddVectoredExceptionHandler(0, signal_handler);
761 else
762 RemoveVectoredExceptionHandler(signal_handler);
763 #endif
764 #else
765 void (*const handler)(int) = enabled ? signal_handler : SIG_DFL;
766 signal(SIGBUS, handler);
767 signal(SIGFPE, handler);
768 signal(SIGILL, handler);
769 signal(SIGSEGV, handler);
770 #endif
771 }
772
check_err(const char * const file,const int line,const char * const name,const int w,const int h,int * const err)773 static int check_err(const char *const file, const int line,
774 const char *const name, const int w, const int h,
775 int *const err)
776 {
777 if (*err)
778 return 0;
779 if (!checkasm_fail_func("%s:%d", file, line))
780 return 1;
781 *err = 1;
782 fprintf(stderr, "%s (%dx%d):\n", name, w, h);
783 return 0;
784 }
785
786 #define DEF_CHECKASM_CHECK_FUNC(type, fmt) \
787 int checkasm_check_##type(const char *const file, const int line, \
788 const type *buf1, ptrdiff_t stride1, \
789 const type *buf2, ptrdiff_t stride2, \
790 const int w, int h, const char *const name, \
791 const int align_w, const int align_h, \
792 const int padding) \
793 { \
794 int aligned_w = (w + align_w - 1) & ~(align_w - 1); \
795 int aligned_h = (h + align_h - 1) & ~(align_h - 1); \
796 int err = 0; \
797 stride1 /= sizeof(*buf1); \
798 stride2 /= sizeof(*buf2); \
799 int y = 0; \
800 for (y = 0; y < h; y++) \
801 if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \
802 break; \
803 if (y != h) { \
804 if (check_err(file, line, name, w, h, &err)) \
805 return 1; \
806 for (y = 0; y < h; y++) { \
807 for (int x = 0; x < w; x++) \
808 fprintf(stderr, " " fmt, buf1[x]); \
809 fprintf(stderr, " "); \
810 for (int x = 0; x < w; x++) \
811 fprintf(stderr, " " fmt, buf2[x]); \
812 fprintf(stderr, " "); \
813 for (int x = 0; x < w; x++) \
814 fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \
815 buf1 += stride1; \
816 buf2 += stride2; \
817 fprintf(stderr, "\n"); \
818 } \
819 buf1 -= h*stride1; \
820 buf2 -= h*stride2; \
821 } \
822 for (y = -padding; y < 0; y++) \
823 if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
824 (w + 2*padding)*sizeof(*buf1))) { \
825 if (check_err(file, line, name, w, h, &err)) \
826 return 1; \
827 fprintf(stderr, " overwrite above\n"); \
828 break; \
829 } \
830 for (y = aligned_h; y < aligned_h + padding; y++) \
831 if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
832 (w + 2*padding)*sizeof(*buf1))) { \
833 if (check_err(file, line, name, w, h, &err)) \
834 return 1; \
835 fprintf(stderr, " overwrite below\n"); \
836 break; \
837 } \
838 for (y = 0; y < h; y++) \
839 if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
840 padding*sizeof(*buf1))) { \
841 if (check_err(file, line, name, w, h, &err)) \
842 return 1; \
843 fprintf(stderr, " overwrite left\n"); \
844 break; \
845 } \
846 for (y = 0; y < h; y++) \
847 if (memcmp(&buf1[y*stride1 + aligned_w], &buf2[y*stride2 + aligned_w], \
848 padding*sizeof(*buf1))) { \
849 if (check_err(file, line, name, w, h, &err)) \
850 return 1; \
851 fprintf(stderr, " overwrite right\n"); \
852 break; \
853 } \
854 return err; \
855 }
856
857 DEF_CHECKASM_CHECK_FUNC(int8_t, "%4d")
858 DEF_CHECKASM_CHECK_FUNC(uint8_t, "%02x")
859 DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x")
860 DEF_CHECKASM_CHECK_FUNC(int16_t, "%6d")
861 DEF_CHECKASM_CHECK_FUNC(int32_t, "%9d")
862
863 #if ARCH_X86_64
checkasm_simd_warmup(void)864 void checkasm_simd_warmup(void)
865 {
866 if (state.simd_warmup)
867 state.simd_warmup();
868 }
869 #endif
870