1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <cstdlib>
13 #include <string>
14 #include <tuple>
15 
16 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
17 
18 #include "config/aom_config.h"
19 #include "config/av1_rtcd.h"
20 
21 #include "aom_ports/aom_timer.h"
22 #include "av1/common/cdef_block.h"
23 #include "test/acm_random.h"
24 #include "test/register_state_check.h"
25 #include "test/util.h"
26 
27 using libaom_test::ACMRandom;
28 
29 namespace {
30 
31 typedef std::tuple<cdef_filter_block_func, cdef_filter_block_func, BLOCK_SIZE,
32                    int, int>
33     cdef_dir_param_t;
34 
35 class CDEFBlockTest : public ::testing::TestWithParam<cdef_dir_param_t> {
36  public:
~CDEFBlockTest()37   virtual ~CDEFBlockTest() {}
SetUp()38   virtual void SetUp() {
39     cdef = GET_PARAM(0);
40     ref_cdef = GET_PARAM(1);
41     bsize = GET_PARAM(2);
42     boundary = GET_PARAM(3);
43     depth = GET_PARAM(4);
44   }
45 
TearDown()46   virtual void TearDown() {}
47 
48  protected:
49   int bsize;
50   int boundary;
51   int depth;
52   cdef_filter_block_func cdef;
53   cdef_filter_block_func ref_cdef;
54 };
55 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockTest);
56 
57 typedef CDEFBlockTest CDEFSpeedTest;
58 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFSpeedTest);
59 
test_cdef(int bsize,int iterations,cdef_filter_block_func cdef,cdef_filter_block_func ref_cdef,int boundary,int depth)60 void test_cdef(int bsize, int iterations, cdef_filter_block_func cdef,
61                cdef_filter_block_func ref_cdef, int boundary, int depth) {
62   const int size = 8;
63   const int ysize = size + 2 * CDEF_VBORDER;
64   ACMRandom rnd(ACMRandom::DeterministicSeed());
65   DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]);
66   DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
67   DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
68   memset(ref_d, 0, sizeof(ref_d));
69   memset(d, 0, sizeof(d));
70 
71   int error = 0, pristrength = 0, secstrength, dir;
72   int pridamping, secdamping, bits, level, count,
73       errdepth = 0, errpristrength = 0, errsecstrength = 0, errboundary = 0,
74       errpridamping = 0, errsecdamping = 0;
75   unsigned int pos = 0;
76 
77   const unsigned int max_pos = size * size >> static_cast<int>(depth == 8);
78   for (pridamping = 3 + depth - 8; pridamping < 7 - 3 * !!boundary + depth - 8;
79        pridamping++) {
80     for (secdamping = 3 + depth - 8;
81          secdamping < 7 - 3 * !!boundary + depth - 8; secdamping++) {
82       for (count = 0; count < iterations; count++) {
83         for (level = 0; level < (1 << depth) && !error;
84              level += (2 + 6 * !!boundary) << (depth - 8)) {
85           for (bits = 1; bits <= depth && !error; bits += 1 + 3 * !!boundary) {
86             for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
87               s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
88                            (1 << depth) - 1);
89             if (boundary) {
90               if (boundary & 1) {  // Left
91                 for (int i = 0; i < ysize; i++)
92                   for (int j = 0; j < CDEF_HBORDER; j++)
93                     s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
94               }
95               if (boundary & 2) {  // Right
96                 for (int i = 0; i < ysize; i++)
97                   for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
98                     s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
99               }
100               if (boundary & 4) {  // Above
101                 for (int i = 0; i < CDEF_VBORDER; i++)
102                   for (int j = 0; j < CDEF_BSTRIDE; j++)
103                     s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
104               }
105               if (boundary & 8) {  // Below
106                 for (int i = CDEF_VBORDER + size; i < ysize; i++)
107                   for (int j = 0; j < CDEF_BSTRIDE; j++)
108                     s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
109               }
110             }
111             for (dir = 0; dir < 8; dir++) {
112               for (pristrength = 0; pristrength <= 19 << (depth - 8) && !error;
113                    pristrength += (1 + 4 * !!boundary) << (depth - 8)) {
114                 if (pristrength == 16) pristrength = 19;
115                 for (secstrength = 0; secstrength <= 4 << (depth - 8) && !error;
116                      secstrength += 1 << (depth - 8)) {
117                   if (secstrength == 3 << (depth - 8)) continue;
118                   ref_cdef(depth == 8 ? (uint8_t *)ref_d : 0, ref_d, size,
119                            s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
120                            pristrength, secstrength, dir, pridamping,
121                            secdamping, bsize, depth - 8);
122                   // If cdef and ref_cdef are the same, we're just testing
123                   // speed
124                   if (cdef != ref_cdef)
125                     API_REGISTER_STATE_CHECK(
126                         cdef(depth == 8 ? (uint8_t *)d : 0, d, size,
127                              s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
128                              pristrength, secstrength, dir, pridamping,
129                              secdamping, bsize, depth - 8));
130                   if (ref_cdef != cdef) {
131                     for (pos = 0; pos < max_pos && !error; pos++) {
132                       error = ref_d[pos] != d[pos];
133                       errdepth = depth;
134                       errpristrength = pristrength;
135                       errsecstrength = secstrength;
136                       errboundary = boundary;
137                       errpridamping = pridamping;
138                       errsecdamping = secdamping;
139                     }
140                   }
141                 }
142               }
143             }
144           }
145         }
146       }
147     }
148   }
149 
150   pos--;
151   EXPECT_EQ(0, error) << "Error: CDEFBlockTest, SIMD and C mismatch."
152                       << std::endl
153                       << "First error at " << pos % size << "," << pos / size
154                       << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
155                       << ") " << std::endl
156                       << "pristrength: " << errpristrength << std::endl
157                       << "pridamping: " << errpridamping << std::endl
158                       << "secstrength: " << errsecstrength << std::endl
159                       << "secdamping: " << errsecdamping << std::endl
160                       << "depth: " << errdepth << std::endl
161                       << "size: " << bsize << std::endl
162                       << "boundary: " << errboundary << std::endl
163                       << std::endl;
164 }
165 
test_cdef_speed(int bsize,int iterations,cdef_filter_block_func cdef,cdef_filter_block_func ref_cdef,int boundary,int depth)166 void test_cdef_speed(int bsize, int iterations, cdef_filter_block_func cdef,
167                      cdef_filter_block_func ref_cdef, int boundary, int depth) {
168   aom_usec_timer ref_timer;
169   aom_usec_timer timer;
170 
171   aom_usec_timer_start(&ref_timer);
172   test_cdef(bsize, iterations, ref_cdef, ref_cdef, boundary, depth);
173   aom_usec_timer_mark(&ref_timer);
174   int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
175 
176   aom_usec_timer_start(&timer);
177   test_cdef(bsize, iterations, cdef, cdef, boundary, depth);
178   aom_usec_timer_mark(&timer);
179   int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
180 
181   EXPECT_GT(ref_elapsed_time, elapsed_time)
182       << "Error: CDEFSpeedTest, SIMD slower than C." << std::endl
183       << "C time: " << ref_elapsed_time << " us" << std::endl
184       << "SIMD time: " << elapsed_time << " us" << std::endl;
185 }
186 
187 typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var,
188                           int coeff_shift);
189 
190 typedef std::tuple<find_dir_t, find_dir_t> find_dir_param_t;
191 
192 class CDEFFindDirTest : public ::testing::TestWithParam<find_dir_param_t> {
193  public:
~CDEFFindDirTest()194   virtual ~CDEFFindDirTest() {}
SetUp()195   virtual void SetUp() {
196     finddir = GET_PARAM(0);
197     ref_finddir = GET_PARAM(1);
198   }
199 
TearDown()200   virtual void TearDown() {}
201 
202  protected:
203   find_dir_t finddir;
204   find_dir_t ref_finddir;
205 };
206 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirTest);
207 
208 typedef CDEFFindDirTest CDEFFindDirSpeedTest;
209 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirSpeedTest);
210 
test_finddir(int (* finddir)(const uint16_t * img,int stride,int32_t * var,int coeff_shift),int (* ref_finddir)(const uint16_t * img,int stride,int32_t * var,int coeff_shift))211 void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var,
212                                  int coeff_shift),
213                   int (*ref_finddir)(const uint16_t *img, int stride,
214                                      int32_t *var, int coeff_shift)) {
215   const int size = 8;
216   ACMRandom rnd(ACMRandom::DeterministicSeed());
217   DECLARE_ALIGNED(16, uint16_t, s[size * size]);
218 
219   int error = 0;
220   int depth, bits, level, count, errdepth = 0;
221   int ref_res = 0, res = 0;
222   int32_t ref_var = 0, var = 0;
223 
224   for (depth = 8; depth <= 12 && !error; depth += 2) {
225     for (count = 0; count < 512 && !error; count++) {
226       for (level = 0; level < (1 << depth) && !error;
227            level += 1 << (depth - 8)) {
228         for (bits = 1; bits <= depth && !error; bits++) {
229           for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
230             s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
231                          (1 << depth) - 1);
232           for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
233             ref_res = ref_finddir(s, size, &ref_var, depth - 8);
234           if (finddir != ref_finddir)
235             API_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8));
236           if (ref_finddir != finddir) {
237             if (res != ref_res || var != ref_var) error = 1;
238             errdepth = depth;
239           }
240         }
241       }
242     }
243   }
244 
245   EXPECT_EQ(0, error) << "Error: CDEFFindDirTest, SIMD and C mismatch."
246                       << std::endl
247                       << "return: " << res << " : " << ref_res << std::endl
248                       << "var: " << var << " : " << ref_var << std::endl
249                       << "depth: " << errdepth << std::endl
250                       << std::endl;
251 }
252 
test_finddir_speed(int (* finddir)(const uint16_t * img,int stride,int32_t * var,int coeff_shift),int (* ref_finddir)(const uint16_t * img,int stride,int32_t * var,int coeff_shift))253 void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
254                                        int32_t *var, int coeff_shift),
255                         int (*ref_finddir)(const uint16_t *img, int stride,
256                                            int32_t *var, int coeff_shift)) {
257   aom_usec_timer ref_timer;
258   aom_usec_timer timer;
259 
260   aom_usec_timer_start(&ref_timer);
261   test_finddir(ref_finddir, ref_finddir);
262   aom_usec_timer_mark(&ref_timer);
263   int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
264 
265   aom_usec_timer_start(&timer);
266   test_finddir(finddir, finddir);
267   aom_usec_timer_mark(&timer);
268   int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
269 
270   EXPECT_GT(ref_elapsed_time, elapsed_time)
271       << "Error: CDEFFindDirSpeedTest, SIMD slower than C." << std::endl
272       << "C time: " << ref_elapsed_time << " us" << std::endl
273       << "SIMD time: " << elapsed_time << " us" << std::endl;
274 }
275 
TEST_P(CDEFBlockTest,TestSIMDNoMismatch)276 TEST_P(CDEFBlockTest, TestSIMDNoMismatch) {
277   test_cdef(bsize, 1, cdef, ref_cdef, boundary, depth);
278 }
279 
TEST_P(CDEFSpeedTest,DISABLED_TestSpeed)280 TEST_P(CDEFSpeedTest, DISABLED_TestSpeed) {
281   test_cdef_speed(bsize, 4, cdef, ref_cdef, boundary, depth);
282 }
283 
TEST_P(CDEFFindDirTest,TestSIMDNoMismatch)284 TEST_P(CDEFFindDirTest, TestSIMDNoMismatch) {
285   test_finddir(finddir, ref_finddir);
286 }
287 
TEST_P(CDEFFindDirSpeedTest,DISABLED_TestSpeed)288 TEST_P(CDEFFindDirSpeedTest, DISABLED_TestSpeed) {
289   test_finddir_speed(finddir, ref_finddir);
290 }
291 
292 using std::make_tuple;
293 
294 // VS compiling for 32 bit targets does not support vector types in
295 // structs as arguments, which makes the v256 type of the intrinsics
296 // hard to support, so optimizations for this target are disabled.
297 #if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
298 #if HAVE_SSE2
299 INSTANTIATE_TEST_SUITE_P(
300     SSE2, CDEFBlockTest,
301     ::testing::Combine(::testing::Values(&cdef_filter_block_sse2),
302                        ::testing::Values(&cdef_filter_block_c),
303                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
304                                          BLOCK_8X8),
305                        ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
306 INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirTest,
307                          ::testing::Values(make_tuple(&cdef_find_dir_sse2,
308                                                       &cdef_find_dir_c)));
309 #endif
310 #if HAVE_SSSE3
311 INSTANTIATE_TEST_SUITE_P(
312     SSSE3, CDEFBlockTest,
313     ::testing::Combine(::testing::Values(&cdef_filter_block_ssse3),
314                        ::testing::Values(&cdef_filter_block_c),
315                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
316                                          BLOCK_8X8),
317                        ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
318 INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirTest,
319                          ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
320                                                       &cdef_find_dir_c)));
321 #endif
322 
323 #if HAVE_SSE4_1
324 INSTANTIATE_TEST_SUITE_P(
325     SSE4_1, CDEFBlockTest,
326     ::testing::Combine(::testing::Values(&cdef_filter_block_sse4_1),
327                        ::testing::Values(&cdef_filter_block_c),
328                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
329                                          BLOCK_8X8),
330                        ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
331 INSTANTIATE_TEST_SUITE_P(SSE4_1, CDEFFindDirTest,
332                          ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
333                                                       &cdef_find_dir_c)));
334 #endif
335 
336 #if HAVE_AVX2
337 INSTANTIATE_TEST_SUITE_P(
338     AVX2, CDEFBlockTest,
339     ::testing::Combine(::testing::Values(&cdef_filter_block_avx2),
340                        ::testing::Values(&cdef_filter_block_c),
341                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
342                                          BLOCK_8X8),
343                        ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
344 INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirTest,
345                          ::testing::Values(make_tuple(&cdef_find_dir_avx2,
346                                                       &cdef_find_dir_c)));
347 #endif
348 
349 #if HAVE_NEON
350 INSTANTIATE_TEST_SUITE_P(
351     NEON, CDEFBlockTest,
352     ::testing::Combine(::testing::Values(&cdef_filter_block_neon),
353                        ::testing::Values(&cdef_filter_block_c),
354                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
355                                          BLOCK_8X8),
356                        ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
357 INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirTest,
358                          ::testing::Values(make_tuple(&cdef_find_dir_neon,
359                                                       &cdef_find_dir_c)));
360 #endif
361 
362 // Test speed for all supported architectures
363 #if HAVE_SSE2
364 INSTANTIATE_TEST_SUITE_P(
365     SSE2, CDEFSpeedTest,
366     ::testing::Combine(::testing::Values(&cdef_filter_block_sse2),
367                        ::testing::Values(&cdef_filter_block_c),
368                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
369                                          BLOCK_8X8),
370                        ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
371 INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirSpeedTest,
372                          ::testing::Values(make_tuple(&cdef_find_dir_sse2,
373                                                       &cdef_find_dir_c)));
374 #endif
375 
376 #if HAVE_SSSE3
377 INSTANTIATE_TEST_SUITE_P(
378     SSSE3, CDEFSpeedTest,
379     ::testing::Combine(::testing::Values(&cdef_filter_block_ssse3),
380                        ::testing::Values(&cdef_filter_block_c),
381                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
382                                          BLOCK_8X8),
383                        ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
384 INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirSpeedTest,
385                          ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
386                                                       &cdef_find_dir_c)));
387 #endif
388 
389 #if HAVE_SSE4_1
390 INSTANTIATE_TEST_SUITE_P(
391     SSE4_1, CDEFSpeedTest,
392     ::testing::Combine(::testing::Values(&cdef_filter_block_sse4_1),
393                        ::testing::Values(&cdef_filter_block_c),
394                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
395                                          BLOCK_8X8),
396                        ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
397 INSTANTIATE_TEST_SUITE_P(SSE4_1, CDEFFindDirSpeedTest,
398                          ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
399                                                       &cdef_find_dir_c)));
400 #endif
401 
402 #if HAVE_AVX2
403 INSTANTIATE_TEST_SUITE_P(
404     AVX2, CDEFSpeedTest,
405     ::testing::Combine(::testing::Values(&cdef_filter_block_avx2),
406                        ::testing::Values(&cdef_filter_block_c),
407                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
408                                          BLOCK_8X8),
409                        ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
410 INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirSpeedTest,
411                          ::testing::Values(make_tuple(&cdef_find_dir_avx2,
412                                                       &cdef_find_dir_c)));
413 #endif
414 
415 #if HAVE_NEON
416 INSTANTIATE_TEST_SUITE_P(
417     NEON, CDEFSpeedTest,
418     ::testing::Combine(::testing::Values(&cdef_filter_block_neon),
419                        ::testing::Values(&cdef_filter_block_c),
420                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
421                                          BLOCK_8X8),
422                        ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
423 INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirSpeedTest,
424                          ::testing::Values(make_tuple(&cdef_find_dir_neon,
425                                                       &cdef_find_dir_c)));
426 #endif
427 
428 #endif  // defined(_WIN64) || !defined(_MSC_VER)
429 }  // namespace
430