1 /*****************************************************************************
2  * Copyright (C) 2013-2020 MulticoreWare, Inc
3  *
4  * Authors: Steve Borho <steve@borho.org>
5  *          Min Chen <chenm003@163.com>
6  *          Yimeng Su <yimeng.su@huawei.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
21  *
22  * This program is also available under a commercial proprietary license.
23  * For more information, contact us at license @ x265.com.
24  *****************************************************************************/
25 
26 #ifndef _TESTHARNESS_H_
27 #define _TESTHARNESS_H_ 1
28 
29 #include "common.h"
30 #include "primitives.h"
31 
32 #if _MSC_VER
33 #pragma warning(disable: 4324) // structure was padded due to __declspec(align())
34 #endif
35 
36 #define PIXEL_MIN 0
37 #define SHORT_MAX  32767
38 #define SHORT_MIN -32767
39 #define UNSIGNED_SHORT_MAX 65535
40 
41 using namespace X265_NS;
42 
43 extern const char* lumaPartStr[NUM_PU_SIZES];
44 extern const char* const* chromaPartStr[X265_CSP_COUNT];
45 
46 class TestHarness
47 {
48 public:
49 
TestHarness()50     TestHarness() {}
51 
~TestHarness()52     virtual ~TestHarness() {}
53 
54     virtual bool testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt) = 0;
55 
56     virtual void measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt) = 0;
57 
58     virtual const char *getName() const = 0;
59 
60 protected:
61 
62     /* Temporary variables for stack checks */
63     int      m_ok;
64 
65     uint64_t m_rand;
66 };
67 
68 #ifdef _MSC_VER
69 #include <intrin.h>
70 #elif HAVE_RDTSC
71 #include <intrin.h>
72 #elif (!defined(__APPLE__) && (defined (__GNUC__) && (defined(__x86_64__) || defined(__i386__))))
73 #include <x86intrin.h>
74 #elif ( !defined(__APPLE__) && defined (__GNUC__) && defined(__ARM_NEON__))
75 #include <arm_neon.h>
76 #elif defined(__GNUC__) && (!defined(__clang__) || __clang_major__ < 4)
77 /* fallback for older GCC/MinGW */
__rdtsc(void)78 static inline uint32_t __rdtsc(void)
79 {
80     uint32_t a = 0;
81 
82 #if X265_ARCH_X86
83     asm volatile("rdtsc" : "=a" (a) ::"edx");
84 #elif X265_ARCH_ARM
85 #if X265_ARCH_ARM64
86     asm volatile("mrs %0, cntvct_el0" : "=r"(a));
87 #else
88     // TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch
89     // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a));
90 
91     // TO-DO: replace clock() function with appropriate ARM cpu instructions
92     a = clock();
93 #endif
94 #endif
95     return a;
96 }
97 #endif // ifdef _MSC_VER
98 
99 #define BENCH_RUNS 2000
100 
101 /* Adapted from checkasm.c, runs each optimized primitive four times, measures rdtsc
102  * and discards invalid times. Repeats BENCH_RUNS times to get a good average.
103  * Then measures the C reference with BENCH_RUNS / 4 runs and reports X factor and average cycles.*/
104 #define REPORT_SPEEDUP(RUNOPT, RUNREF, ...) \
105     { \
106         uint32_t cycles = 0; int runs = 0; \
107         RUNOPT(__VA_ARGS__); \
108         for (int ti = 0; ti < BENCH_RUNS; ti++) { \
109             uint32_t t0 = (uint32_t)__rdtsc(); \
110             RUNOPT(__VA_ARGS__); \
111             RUNOPT(__VA_ARGS__); \
112             RUNOPT(__VA_ARGS__); \
113             RUNOPT(__VA_ARGS__); \
114             uint32_t t1 = (uint32_t)__rdtsc() - t0; \
115             if (t1 * runs <= cycles * 4 && ti > 0) { cycles += t1; runs++; } \
116         } \
117         uint32_t refcycles = 0; int refruns = 0; \
118         RUNREF(__VA_ARGS__); \
119         for (int ti = 0; ti < BENCH_RUNS / 4; ti++) { \
120             uint32_t t0 = (uint32_t)__rdtsc(); \
121             RUNREF(__VA_ARGS__); \
122             RUNREF(__VA_ARGS__); \
123             RUNREF(__VA_ARGS__); \
124             RUNREF(__VA_ARGS__); \
125             uint32_t t1 = (uint32_t)__rdtsc() - t0; \
126             if (t1 * refruns <= refcycles * 4 && ti > 0) { refcycles += t1; refruns++; } \
127         } \
128         x265_emms(); \
129         float optperf = (10.0f * cycles / runs) / 4; \
130         float refperf = (10.0f * refcycles / refruns) / 4; \
131         printf("\t%3.2fx ", refperf / optperf); \
132         printf("\t %-8.2lf \t %-8.2lf\n", optperf, refperf); \
133     }
134 
135 extern "C" {
136 #if X265_ARCH_X86
137 int PFX(stack_pagealign)(int (*func)(), int align);
138 
139 /* detect when callee-saved regs aren't saved
140  * needs an explicit asm check because it only sometimes crashes in normal use. */
141 intptr_t PFX(checkasm_call)(intptr_t (*func)(), int *ok, ...);
142 float PFX(checkasm_call_float)(float (*func)(), int *ok, ...);
143 #elif X265_ARCH_ARM == 0
144 #define PFX(stack_pagealign)(func, align) func()
145 #endif
146 
147 #if X86_64
148 
149 /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
150  * This is done by clobbering the stack with junk around the stack pointer and calling the
151  * assembly function through x265_checkasm_call with added dummy arguments which forces all
152  * real arguments to be passed on the stack and not in registers. For 32-bit argument the
153  * upper half of the 64-bit register location on the stack will now contain junk. Note that
154  * this is dependent on compiler behavior and that interrupts etc. at the wrong time may
155  * overwrite the junk written to the stack so there's no guarantee that it will always
156  * detect all functions that assumes zero-extension.
157  */
158 void PFX(checkasm_stack_clobber)(uint64_t clobber, ...);
159 #define checked(func, ...) ( \
160         m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \
161         PFX(checkasm_stack_clobber)(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
162                                     m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
163                                     m_rand, m_rand, m_rand, m_rand, m_rand), /* max_args+6 */ \
164         PFX(checkasm_call)((intptr_t(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
165 
166 #define checked_float(func, ...) ( \
167         m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \
168         PFX(checkasm_stack_clobber)(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
169                                     m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
170                                     m_rand, m_rand, m_rand, m_rand, m_rand), /* max_args+6 */ \
171         PFX(checkasm_call_float)((float(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
172 #define reportfail() if (!m_ok) { fflush(stdout); fprintf(stderr, "stack clobber check failed at %s:%d", __FILE__, __LINE__); abort(); }
173 #elif ARCH_X86
174 #define checked(func, ...) PFX(checkasm_call)((intptr_t(*)())func, &m_ok, __VA_ARGS__);
175 #define checked_float(func, ...) PFX(checkasm_call_float)((float(*)())func, &m_ok, __VA_ARGS__);
176 
177 #else // if X86_64
178 #define checked(func, ...) func(__VA_ARGS__)
179 #define checked_float(func, ...) func(__VA_ARGS__)
180 #define reportfail()
181 #endif // if X86_64
182 }
183 
184 #endif // ifndef _TESTHARNESS_H_
185