1 /*
2  * Copyright © 2018, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 #include "tests/checkasm/checkasm.h"
28 
29 #include <math.h>
30 #include <stdarg.h>
31 #include <stdio.h>
32 #include <string.h>
33 
34 #include "src/cpu.h"
35 
36 #ifdef _WIN32
37 #include <windows.h>
38 #define COLOR_RED    FOREGROUND_RED
39 #define COLOR_GREEN  FOREGROUND_GREEN
40 #define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
41 
get_seed(void)42 static unsigned get_seed(void) {
43     return GetTickCount();
44 }
45 #else
46 #include <unistd.h>
47 #include <signal.h>
48 #include <time.h>
49 #ifdef __APPLE__
50 #include <mach/mach_time.h>
51 #endif
52 #define COLOR_RED    1
53 #define COLOR_GREEN  2
54 #define COLOR_YELLOW 3
55 
get_seed(void)56 static unsigned get_seed(void) {
57 #ifdef __APPLE__
58     return (unsigned) mach_absolute_time();
59 #elif defined(HAVE_CLOCK_GETTIME)
60     struct timespec ts;
61     clock_gettime(CLOCK_MONOTONIC, &ts);
62     return (unsigned) (1000000000ULL * ts.tv_sec + ts.tv_nsec);
63 #endif
64 }
65 #endif
66 
67 /* List of tests to invoke */
68 static const struct {
69     const char *name;
70     void (*func)(void);
71 } tests[] = {
72     { "msac", checkasm_check_msac },
73 #if CONFIG_8BPC
74     { "cdef_8bpc", checkasm_check_cdef_8bpc },
75     { "filmgrain_8bpc", checkasm_check_filmgrain_8bpc },
76     { "ipred_8bpc", checkasm_check_ipred_8bpc },
77     { "itx_8bpc", checkasm_check_itx_8bpc },
78     { "loopfilter_8bpc", checkasm_check_loopfilter_8bpc },
79     { "looprestoration_8bpc", checkasm_check_looprestoration_8bpc },
80     { "mc_8bpc", checkasm_check_mc_8bpc },
81 #endif
82 #if CONFIG_16BPC
83     { "cdef_16bpc", checkasm_check_cdef_16bpc },
84     { "filmgrain_16bpc", checkasm_check_filmgrain_16bpc },
85     { "ipred_16bpc", checkasm_check_ipred_16bpc },
86     { "itx_16bpc", checkasm_check_itx_16bpc },
87     { "loopfilter_16bpc", checkasm_check_loopfilter_16bpc },
88     { "looprestoration_16bpc", checkasm_check_looprestoration_16bpc },
89     { "mc_16bpc", checkasm_check_mc_16bpc },
90 #endif
91     { 0 }
92 };
93 
94 /* List of cpu flags to check */
95 static const struct {
96     const char *name;
97     const char *suffix;
98     unsigned flag;
99 } cpus[] = {
100 #if ARCH_X86
101     { "SSE2",               "sse2",      DAV1D_X86_CPU_FLAG_SSE2 },
102     { "SSSE3",              "ssse3",     DAV1D_X86_CPU_FLAG_SSSE3 },
103     { "SSE4.1",             "sse4",      DAV1D_X86_CPU_FLAG_SSE41 },
104     { "AVX2",               "avx2",      DAV1D_X86_CPU_FLAG_AVX2 },
105     { "AVX-512 (Ice Lake)", "avx512icl", DAV1D_X86_CPU_FLAG_AVX512ICL },
106 #elif ARCH_AARCH64 || ARCH_ARM
107     { "NEON",               "neon",      DAV1D_ARM_CPU_FLAG_NEON },
108 #elif ARCH_PPC64LE
109     { "VSX",                "vsx",       DAV1D_PPC_CPU_FLAG_VSX },
110 #endif
111     { 0 }
112 };
113 
114 typedef struct CheckasmFuncVersion {
115     struct CheckasmFuncVersion *next;
116     void *func;
117     int ok;
118     unsigned cpu;
119     int iterations;
120     uint64_t cycles;
121 } CheckasmFuncVersion;
122 
123 /* Binary search tree node */
124 typedef struct CheckasmFunc {
125     struct CheckasmFunc *child[2];
126     CheckasmFuncVersion versions;
127     uint8_t color; /* 0 = red, 1 = black */
128     char name[];
129 } CheckasmFunc;
130 
131 /* Internal state */
132 static struct {
133     CheckasmFunc *funcs;
134     CheckasmFunc *current_func;
135     CheckasmFuncVersion *current_func_ver;
136     const char *current_test_name;
137     const char *bench_pattern;
138     size_t bench_pattern_len;
139     int num_checked;
140     int num_failed;
141     int nop_time;
142     unsigned cpu_flag;
143     const char *cpu_flag_name;
144     const char *test_name;
145     unsigned seed;
146     int bench_c;
147     int verbose;
148     int function_listing;
149 #if ARCH_X86_64
150     void (*simd_warmup)(void);
151 #endif
152 } state;
153 
154 /* float compare support code */
155 typedef union {
156     float f;
157     uint32_t i;
158 } intfloat;
159 
160 static uint32_t xs_state[4];
161 
xor128_srand(unsigned seed)162 static void xor128_srand(unsigned seed) {
163     xs_state[0] = seed;
164     xs_state[1] = ( seed & 0xffff0000) | (~seed & 0x0000ffff);
165     xs_state[2] = (~seed & 0xffff0000) | ( seed & 0x0000ffff);
166     xs_state[3] = ~seed;
167 }
168 
169 // xor128 from Marsaglia, George (July 2003). "Xorshift RNGs".
170 //             Journal of Statistical Software. 8 (14).
171 //             doi:10.18637/jss.v008.i14.
xor128_rand(void)172 int xor128_rand(void) {
173     const uint32_t x = xs_state[0];
174     const uint32_t t = x ^ (x << 11);
175 
176     xs_state[0] = xs_state[1];
177     xs_state[1] = xs_state[2];
178     xs_state[2] = xs_state[3];
179     uint32_t w = xs_state[3];
180 
181     w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
182     xs_state[3] = w;
183 
184     return w >> 1;
185 }
186 
is_negative(const intfloat u)187 static int is_negative(const intfloat u) {
188     return u.i >> 31;
189 }
190 
float_near_ulp(const float a,const float b,const unsigned max_ulp)191 int float_near_ulp(const float a, const float b, const unsigned max_ulp) {
192     intfloat x, y;
193 
194     x.f = a;
195     y.f = b;
196 
197     if (is_negative(x) != is_negative(y)) {
198         // handle -0.0 == +0.0
199         return a == b;
200     }
201 
202     if (llabs((int64_t)x.i - y.i) <= max_ulp)
203         return 1;
204 
205     return 0;
206 }
207 
float_near_ulp_array(const float * const a,const float * const b,const unsigned max_ulp,const int len)208 int float_near_ulp_array(const float *const a, const float *const b,
209                          const unsigned max_ulp, const int len)
210 {
211     for (int i = 0; i < len; i++)
212         if (!float_near_ulp(a[i], b[i], max_ulp))
213             return 0;
214 
215     return 1;
216 }
217 
float_near_abs_eps(const float a,const float b,const float eps)218 int float_near_abs_eps(const float a, const float b, const float eps) {
219     return fabsf(a - b) < eps;
220 }
221 
float_near_abs_eps_array(const float * const a,const float * const b,const float eps,const int len)222 int float_near_abs_eps_array(const float *const a, const float *const b,
223                              const float eps, const int len)
224 {
225     for (int i = 0; i < len; i++)
226         if (!float_near_abs_eps(a[i], b[i], eps))
227             return 0;
228 
229     return 1;
230 }
231 
float_near_abs_eps_ulp(const float a,const float b,const float eps,const unsigned max_ulp)232 int float_near_abs_eps_ulp(const float a, const float b, const float eps,
233                            const unsigned max_ulp)
234 {
235     return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps);
236 }
237 
float_near_abs_eps_array_ulp(const float * const a,const float * const b,const float eps,const unsigned max_ulp,const int len)238 int float_near_abs_eps_array_ulp(const float *const a, const float *const b,
239                                  const float eps, const unsigned max_ulp,
240                                  const int len)
241 {
242     for (int i = 0; i < len; i++)
243         if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp))
244             return 0;
245 
246     return 1;
247 }
248 
249 /* Print colored text to stderr if the terminal supports it */
color_printf(const int color,const char * const fmt,...)250 static void color_printf(const int color, const char *const fmt, ...) {
251     static int8_t use_color = -1;
252     va_list arg;
253 
254 #ifdef _WIN32
255     static HANDLE con;
256     static WORD org_attributes;
257 
258     if (use_color < 0) {
259         CONSOLE_SCREEN_BUFFER_INFO con_info;
260         con = GetStdHandle(STD_ERROR_HANDLE);
261         if (con && con != INVALID_HANDLE_VALUE &&
262             GetConsoleScreenBufferInfo(con, &con_info))
263         {
264             org_attributes = con_info.wAttributes;
265             use_color = 1;
266         } else
267             use_color = 0;
268     }
269     if (use_color)
270         SetConsoleTextAttribute(con, (org_attributes & 0xfff0) |
271                                 (color & 0x0f));
272 #else
273     if (use_color < 0) {
274         const char *const term = getenv("TERM");
275         use_color = term && strcmp(term, "dumb") && isatty(2);
276     }
277     if (use_color)
278         fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
279 #endif
280 
281     va_start(arg, fmt);
282     vfprintf(stderr, fmt, arg);
283     va_end(arg);
284 
285     if (use_color) {
286 #ifdef _WIN32
287         SetConsoleTextAttribute(con, org_attributes);
288 #else
289         fprintf(stderr, "\x1b[0m");
290 #endif
291     }
292 }
293 
294 /* Deallocate a tree */
destroy_func_tree(CheckasmFunc * const f)295 static void destroy_func_tree(CheckasmFunc *const f) {
296     if (f) {
297         CheckasmFuncVersion *v = f->versions.next;
298         while (v) {
299             CheckasmFuncVersion *next = v->next;
300             free(v);
301             v = next;
302         }
303 
304         destroy_func_tree(f->child[0]);
305         destroy_func_tree(f->child[1]);
306         free(f);
307     }
308 }
309 
310 /* Allocate a zero-initialized block, clean up and exit on failure */
checkasm_malloc(const size_t size)311 static void *checkasm_malloc(const size_t size) {
312     void *const ptr = calloc(1, size);
313     if (!ptr) {
314         fprintf(stderr, "checkasm: malloc failed\n");
315         destroy_func_tree(state.funcs);
316         exit(1);
317     }
318     return ptr;
319 }
320 
321 /* Get the suffix of the specified cpu flag */
cpu_suffix(const unsigned cpu)322 static const char *cpu_suffix(const unsigned cpu) {
323     for (int i = (int)(sizeof(cpus) / sizeof(*cpus)) - 2; i >= 0; i--)
324         if (cpu & cpus[i].flag)
325             return cpus[i].suffix;
326 
327     return "c";
328 }
329 
330 #ifdef readtime
cmp_nop(const void * a,const void * b)331 static int cmp_nop(const void *a, const void *b) {
332     return *(const uint16_t*)a - *(const uint16_t*)b;
333 }
334 
335 /* Measure the overhead of the timing code (in decicycles) */
measure_nop_time(void)336 static int measure_nop_time(void) {
337     uint16_t nops[10000];
338     int nop_sum = 0;
339 
340     for (int i = 0; i < 10000; i++) {
341         uint64_t t = readtime();
342         nops[i] = (uint16_t) (readtime() - t);
343     }
344 
345     qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
346     for (int i = 2500; i < 7500; i++)
347         nop_sum += nops[i];
348 
349     return nop_sum / 500;
350 }
351 
352 /* Print benchmark results */
print_benchs(const CheckasmFunc * const f)353 static void print_benchs(const CheckasmFunc *const f) {
354     if (f) {
355         print_benchs(f->child[0]);
356 
357         /* Only print functions with at least one assembly version */
358         if (state.bench_c || f->versions.cpu || f->versions.next) {
359             const CheckasmFuncVersion *v = &f->versions;
360             do {
361                 if (v->iterations) {
362                     const int decicycles = (int) (10*v->cycles/v->iterations -
363                                                   state.nop_time) / 4;
364                     printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu),
365                            decicycles/10, decicycles%10);
366                 }
367             } while ((v = v->next));
368         }
369 
370         print_benchs(f->child[1]);
371     }
372 }
373 #endif
374 
print_functions(const CheckasmFunc * const f)375 static void print_functions(const CheckasmFunc *const f) {
376     if (f) {
377         print_functions(f->child[0]);
378         printf("%s\n", f->name);
379         print_functions(f->child[1]);
380     }
381 }
382 
383 #define is_digit(x) ((x) >= '0' && (x) <= '9')
384 
385 /* ASCIIbetical sort except preserving natural order for numbers */
cmp_func_names(const char * a,const char * b)386 static int cmp_func_names(const char *a, const char *b) {
387     const char *const start = a;
388     int ascii_diff, digit_diff;
389 
390     for (; !(ascii_diff = *(const unsigned char*)a -
391                           *(const unsigned char*)b) && *a; a++, b++);
392     for (; is_digit(*a) && is_digit(*b); a++, b++);
393 
394     if (a > start && is_digit(a[-1]) &&
395         (digit_diff = is_digit(*a) - is_digit(*b)))
396     {
397         return digit_diff;
398     }
399 
400     return ascii_diff;
401 }
402 
403 /* Perform a tree rotation in the specified direction and return the new root */
rotate_tree(CheckasmFunc * const f,const int dir)404 static CheckasmFunc *rotate_tree(CheckasmFunc *const f, const int dir) {
405     CheckasmFunc *const r = f->child[dir^1];
406     f->child[dir^1] = r->child[dir];
407     r->child[dir] = f;
408     r->color = f->color;
409     f->color = 0;
410     return r;
411 }
412 
413 #define is_red(f) ((f) && !(f)->color)
414 
415 /* Balance a left-leaning red-black tree at the specified node */
balance_tree(CheckasmFunc ** const root)416 static void balance_tree(CheckasmFunc **const root) {
417     CheckasmFunc *const f = *root;
418 
419     if (is_red(f->child[0]) && is_red(f->child[1])) {
420         f->color ^= 1;
421         f->child[0]->color = f->child[1]->color = 1;
422     }
423     else if (!is_red(f->child[0]) && is_red(f->child[1]))
424         *root = rotate_tree(f, 0); /* Rotate left */
425     else if (is_red(f->child[0]) && is_red(f->child[0]->child[0]))
426         *root = rotate_tree(f, 1); /* Rotate right */
427 }
428 
429 /* Get a node with the specified name, creating it if it doesn't exist */
get_func(CheckasmFunc ** const root,const char * const name)430 static CheckasmFunc *get_func(CheckasmFunc **const root, const char *const name) {
431     CheckasmFunc *f = *root;
432 
433     if (f) {
434         /* Search the tree for a matching node */
435         const int cmp = cmp_func_names(name, f->name);
436         if (cmp) {
437             f = get_func(&f->child[cmp > 0], name);
438 
439             /* Rebalance the tree on the way up if a new node was inserted */
440             if (!f->versions.func)
441                 balance_tree(root);
442         }
443     } else {
444         /* Allocate and insert a new node into the tree */
445         const size_t name_length = strlen(name) + 1;
446         f = *root = checkasm_malloc(offsetof(CheckasmFunc, name) + name_length);
447         memcpy(f->name, name, name_length);
448     }
449 
450     return f;
451 }
452 
453 checkasm_context checkasm_context_buf;
454 
455 /* Crash handling: attempt to catch crashes and handle them
456  * gracefully instead of just aborting abruptly. */
457 #ifdef _WIN32
signal_handler(EXCEPTION_POINTERS * const e)458 static LONG NTAPI signal_handler(EXCEPTION_POINTERS *const e) {
459     switch (e->ExceptionRecord->ExceptionCode) {
460     case EXCEPTION_FLT_DIVIDE_BY_ZERO:
461     case EXCEPTION_INT_DIVIDE_BY_ZERO:
462         checkasm_fail_func("fatal arithmetic error");
463         break;
464     case EXCEPTION_ILLEGAL_INSTRUCTION:
465     case EXCEPTION_PRIV_INSTRUCTION:
466         checkasm_fail_func("illegal instruction");
467         break;
468     case EXCEPTION_ACCESS_VIOLATION:
469     case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
470     case EXCEPTION_DATATYPE_MISALIGNMENT:
471     case EXCEPTION_IN_PAGE_ERROR:
472     case EXCEPTION_STACK_OVERFLOW:
473         checkasm_fail_func("segmentation fault");
474         break;
475     default:
476         return EXCEPTION_CONTINUE_SEARCH;
477     }
478     checkasm_load_context();
479     return EXCEPTION_CONTINUE_EXECUTION; /* never reached, but shuts up gcc */
480 }
481 #else
signal_handler(const int s)482 static void signal_handler(const int s) {
483     checkasm_set_signal_handler_state(0);
484     checkasm_fail_func(s == SIGFPE ? "fatal arithmetic error" :
485                        s == SIGILL ? "illegal instruction" :
486                                      "segmentation fault");
487     checkasm_load_context();
488 }
489 #endif
490 
491 /* Perform tests and benchmarks for the specified
492  * cpu flag if supported by the host */
check_cpu_flag(const char * const name,unsigned flag)493 static void check_cpu_flag(const char *const name, unsigned flag) {
494     const unsigned old_cpu_flag = state.cpu_flag;
495 
496     flag |= old_cpu_flag;
497     dav1d_set_cpu_flags_mask(flag);
498     state.cpu_flag = dav1d_get_cpu_flags();
499 
500     if (!flag || state.cpu_flag != old_cpu_flag) {
501         state.cpu_flag_name = name;
502         for (int i = 0; tests[i].func; i++) {
503             if (state.test_name && strcmp(tests[i].name, state.test_name))
504                 continue;
505             xor128_srand(state.seed);
506             state.current_test_name = tests[i].name;
507             tests[i].func();
508         }
509     }
510 }
511 
512 /* Print the name of the current CPU flag, but only do it once */
print_cpu_name(void)513 static void print_cpu_name(void) {
514     if (state.cpu_flag_name) {
515         color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
516         state.cpu_flag_name = NULL;
517     }
518 }
519 
main(int argc,char * argv[])520 int main(int argc, char *argv[]) {
521     state.seed = get_seed();
522 
523     while (argc > 1) {
524         if (!strncmp(argv[1], "--help", 6)) {
525             fprintf(stdout,
526                     "checkasm [options] <random seed>\n"
527                     "    <random seed>       Numeric value to seed the rng\n"
528                     "Options:\n"
529                     "    --test=<test_name>  Test only <test_name>\n"
530                     "    --bench=<pattern>   Test and benchmark the functions matching <pattern>\n"
531                     "    --list-functions    List available functions\n"
532                     "    --list-tests        List available tests\n"
533                     "    --bench-c           Benchmark the C-only functions\n"
534                     "    --verbose -v        Print failures verbosely\n");
535             return 0;
536         } else if (!strncmp(argv[1], "--bench-c", 9)) {
537             state.bench_c = 1;
538         } else if (!strncmp(argv[1], "--bench", 7)) {
539 #ifndef readtime
540             fprintf(stderr,
541                     "checkasm: --bench is not supported on your system\n");
542             return 1;
543 #endif
544             if (argv[1][7] == '=') {
545                 state.bench_pattern = argv[1] + 8;
546                 state.bench_pattern_len = strlen(state.bench_pattern);
547             } else
548                 state.bench_pattern = "";
549         } else if (!strncmp(argv[1], "--test=", 7)) {
550             state.test_name = argv[1] + 7;
551         } else if (!strcmp(argv[1], "--list-functions")) {
552             state.function_listing = 1;
553         } else if (!strcmp(argv[1], "--list-tests")) {
554             for (int i = 0; tests[i].name; i++)
555                 printf("%s\n", tests[i].name);
556             return 0;
557         } else if (!strcmp(argv[1], "--verbose") || !strcmp(argv[1], "-v")) {
558             state.verbose = 1;
559         } else {
560             state.seed = (unsigned) strtoul(argv[1], NULL, 10);
561         }
562 
563         argc--;
564         argv++;
565     }
566 
567     dav1d_init_cpu();
568 
569 #ifdef readtime
570     if (state.bench_pattern) {
571         static int testing = 0;
572         checkasm_save_context();
573         if (!testing) {
574             checkasm_set_signal_handler_state(1);
575             testing = 1;
576             readtime();
577             checkasm_set_signal_handler_state(0);
578         } else {
579             fprintf(stderr, "checkasm: unable to access cycle counter\n");
580             return 1;
581         }
582     }
583 #endif
584 
585     int ret = 0;
586 
587     if (!state.function_listing) {
588         fprintf(stderr, "checkasm: using random seed %u\n", state.seed);
589 #if ARCH_X86_64
590         void checkasm_warmup_avx2(void);
591         void checkasm_warmup_avx512(void);
592         const unsigned cpu_flags = dav1d_get_cpu_flags();
593         if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX512ICL)
594             state.simd_warmup = checkasm_warmup_avx512;
595         else if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX2)
596             state.simd_warmup = checkasm_warmup_avx2;
597         checkasm_simd_warmup();
598 #endif
599     }
600 
601     check_cpu_flag(NULL, 0);
602 
603     if (state.function_listing) {
604         print_functions(state.funcs);
605     } else {
606         for (int i = 0; cpus[i].flag; i++)
607             check_cpu_flag(cpus[i].name, cpus[i].flag);
608         if (!state.num_checked) {
609             fprintf(stderr, "checkasm: no tests to perform\n");
610         } else if (state.num_failed) {
611             fprintf(stderr, "checkasm: %d of %d tests have failed\n",
612                     state.num_failed, state.num_checked);
613             ret = 1;
614         } else {
615             fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
616 #ifdef readtime
617             if (state.bench_pattern) {
618                 state.nop_time = measure_nop_time();
619                 printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
620                 print_benchs(state.funcs);
621             }
622 #endif
623         }
624     }
625 
626     destroy_func_tree(state.funcs);
627     return ret;
628 }
629 
630 /* Decide whether or not the specified function needs to be tested and
631  * allocate/initialize data structures if needed. Returns a pointer to a
632  * reference function if the function should be tested, otherwise NULL */
checkasm_check_func(void * const func,const char * const name,...)633 void *checkasm_check_func(void *const func, const char *const name, ...) {
634     char name_buf[256];
635     va_list arg;
636 
637     va_start(arg, name);
638     const int name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
639     va_end(arg);
640 
641     if (!func || name_length <= 0 || (size_t)name_length >= sizeof(name_buf))
642         return NULL;
643 
644     state.current_func = get_func(&state.funcs, name_buf);
645 
646     if (state.function_listing) /* Save function names without running tests */
647         return NULL;
648 
649     state.funcs->color = 1;
650     CheckasmFuncVersion *v = &state.current_func->versions;
651     void *ref = func;
652 
653     if (v->func) {
654         CheckasmFuncVersion *prev;
655         do {
656             /* Only test functions that haven't already been tested */
657             if (v->func == func)
658                 return NULL;
659 
660             if (v->ok)
661                 ref = v->func;
662 
663             prev = v;
664         } while ((v = v->next));
665 
666         v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
667     }
668 
669     v->func = func;
670     v->ok = 1;
671     v->cpu = state.cpu_flag;
672     state.current_func_ver = v;
673     xor128_srand(state.seed);
674 
675     if (state.cpu_flag || state.bench_c)
676         state.num_checked++;
677 
678     return ref;
679 }
680 
681 /* Decide whether or not the current function needs to be benchmarked */
checkasm_bench_func(void)682 int checkasm_bench_func(void) {
683     return !state.num_failed && state.bench_pattern &&
684            !strncmp(state.current_func->name, state.bench_pattern,
685                     state.bench_pattern_len);
686 }
687 
688 /* Indicate that the current test has failed, return whether verbose printing
689  * is requested. */
checkasm_fail_func(const char * const msg,...)690 int checkasm_fail_func(const char *const msg, ...) {
691     if (state.current_func_ver && state.current_func_ver->cpu &&
692         state.current_func_ver->ok)
693     {
694         va_list arg;
695 
696         print_cpu_name();
697         fprintf(stderr, "   %s_%s (", state.current_func->name,
698                 cpu_suffix(state.current_func_ver->cpu));
699         va_start(arg, msg);
700         vfprintf(stderr, msg, arg);
701         va_end(arg);
702         fprintf(stderr, ")\n");
703 
704         state.current_func_ver->ok = 0;
705         state.num_failed++;
706     }
707     return state.verbose;
708 }
709 
710 /* Update benchmark results of the current function */
checkasm_update_bench(const int iterations,const uint64_t cycles)711 void checkasm_update_bench(const int iterations, const uint64_t cycles) {
712     state.current_func_ver->iterations += iterations;
713     state.current_func_ver->cycles += cycles;
714 }
715 
716 /* Print the outcome of all tests performed since
717  * the last time this function was called */
checkasm_report(const char * const name,...)718 void checkasm_report(const char *const name, ...) {
719     static int prev_checked, prev_failed;
720     static size_t max_length;
721 
722     if (state.num_checked > prev_checked) {
723         int pad_length = (int) max_length + 4;
724         va_list arg;
725 
726         print_cpu_name();
727         pad_length -= fprintf(stderr, " - %s.", state.current_test_name);
728         va_start(arg, name);
729         pad_length -= vfprintf(stderr, name, arg);
730         va_end(arg);
731         fprintf(stderr, "%*c", imax(pad_length, 0) + 2, '[');
732 
733         if (state.num_failed == prev_failed)
734             color_printf(COLOR_GREEN, "OK");
735         else
736             color_printf(COLOR_RED, "FAILED");
737         fprintf(stderr, "]\n");
738 
739         prev_checked = state.num_checked;
740         prev_failed  = state.num_failed;
741     } else if (!state.cpu_flag) {
742         /* Calculate the amount of padding required
743          * to make the output vertically aligned */
744         size_t length = strlen(state.current_test_name);
745         va_list arg;
746 
747         va_start(arg, name);
748         length += vsnprintf(NULL, 0, name, arg);
749         va_end(arg);
750 
751         if (length > max_length)
752             max_length = length;
753     }
754 }
755 
checkasm_set_signal_handler_state(const int enabled)756 void checkasm_set_signal_handler_state(const int enabled) {
757 #ifdef _WIN32
758 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
759     if (enabled)
760         AddVectoredExceptionHandler(0, signal_handler);
761     else
762         RemoveVectoredExceptionHandler(signal_handler);
763 #endif
764 #else
765     void (*const handler)(int) = enabled ? signal_handler : SIG_DFL;
766     signal(SIGBUS,  handler);
767     signal(SIGFPE,  handler);
768     signal(SIGILL,  handler);
769     signal(SIGSEGV, handler);
770 #endif
771 }
772 
check_err(const char * const file,const int line,const char * const name,const int w,const int h,int * const err)773 static int check_err(const char *const file, const int line,
774                      const char *const name, const int w, const int h,
775                      int *const err)
776 {
777     if (*err)
778         return 0;
779     if (!checkasm_fail_func("%s:%d", file, line))
780         return 1;
781     *err = 1;
782     fprintf(stderr, "%s (%dx%d):\n", name, w, h);
783     return 0;
784 }
785 
786 #define DEF_CHECKASM_CHECK_FUNC(type, fmt) \
787 int checkasm_check_##type(const char *const file, const int line, \
788                           const type *buf1, ptrdiff_t stride1, \
789                           const type *buf2, ptrdiff_t stride2, \
790                           const int w, int h, const char *const name, \
791                           const int align_w, const int align_h, \
792                           const int padding) \
793 { \
794     int aligned_w = (w + align_w - 1) & ~(align_w - 1); \
795     int aligned_h = (h + align_h - 1) & ~(align_h - 1); \
796     int err = 0; \
797     stride1 /= sizeof(*buf1); \
798     stride2 /= sizeof(*buf2); \
799     int y = 0; \
800     for (y = 0; y < h; y++) \
801         if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \
802             break; \
803     if (y != h) { \
804         if (check_err(file, line, name, w, h, &err)) \
805             return 1; \
806         for (y = 0; y < h; y++) { \
807             for (int x = 0; x < w; x++) \
808                 fprintf(stderr, " " fmt, buf1[x]); \
809             fprintf(stderr, "    "); \
810             for (int x = 0; x < w; x++) \
811                 fprintf(stderr, " " fmt, buf2[x]); \
812             fprintf(stderr, "    "); \
813             for (int x = 0; x < w; x++) \
814                 fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \
815             buf1 += stride1; \
816             buf2 += stride2; \
817             fprintf(stderr, "\n"); \
818         } \
819         buf1 -= h*stride1; \
820         buf2 -= h*stride2; \
821     } \
822     for (y = -padding; y < 0; y++) \
823         if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
824                    (w + 2*padding)*sizeof(*buf1))) { \
825             if (check_err(file, line, name, w, h, &err)) \
826                 return 1; \
827             fprintf(stderr, " overwrite above\n"); \
828             break; \
829         } \
830     for (y = aligned_h; y < aligned_h + padding; y++) \
831         if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
832                    (w + 2*padding)*sizeof(*buf1))) { \
833             if (check_err(file, line, name, w, h, &err)) \
834                 return 1; \
835             fprintf(stderr, " overwrite below\n"); \
836             break; \
837         } \
838     for (y = 0; y < h; y++) \
839         if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
840                    padding*sizeof(*buf1))) { \
841             if (check_err(file, line, name, w, h, &err)) \
842                 return 1; \
843             fprintf(stderr, " overwrite left\n"); \
844             break; \
845         } \
846     for (y = 0; y < h; y++) \
847         if (memcmp(&buf1[y*stride1 + aligned_w], &buf2[y*stride2 + aligned_w], \
848                    padding*sizeof(*buf1))) { \
849             if (check_err(file, line, name, w, h, &err)) \
850                 return 1; \
851             fprintf(stderr, " overwrite right\n"); \
852             break; \
853         } \
854     return err; \
855 }
856 
857 DEF_CHECKASM_CHECK_FUNC(int8_t,   "%4d")
858 DEF_CHECKASM_CHECK_FUNC(uint8_t,  "%02x")
859 DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x")
860 DEF_CHECKASM_CHECK_FUNC(int16_t,  "%6d")
861 DEF_CHECKASM_CHECK_FUNC(int32_t,  "%9d")
862 
863 #if ARCH_X86_64
checkasm_simd_warmup(void)864 void checkasm_simd_warmup(void)
865 {
866     if (state.simd_warmup)
867         state.simd_warmup();
868 }
869 #endif
870