1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <cmath>
13 #include <cstdlib>
14 #include <string>
15 
16 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
17 
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 
21 #include "test/acm_random.h"
22 #include "test/clear_system_state.h"
23 #include "test/register_state_check.h"
24 #include "test/util.h"
25 #include "av1/common/av1_loopfilter.h"
26 #include "av1/common/entropy.h"
27 #include "aom/aom_integer.h"
28 
29 using libaom_test::ACMRandom;
30 
31 namespace {
32 // Horizontally and Vertically need 32x32: 8  Coeffs preceeding filtered section
33 //                                         16 Coefs within filtered section
34 //                                         8  Coeffs following filtered section
35 const int kNumCoeffs = 1024;
36 
37 const int number_of_iterations = 10000;
38 
39 const int kSpeedTestNum = 500000;
40 
41 #define LOOP_PARAM \
42   int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh
43 #define DUAL_LOOP_PARAM                                                      \
44   int p, const uint8_t *blimit0, const uint8_t *limit0,                      \
45       const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, \
46       const uint8_t *thresh1
47 
48 typedef void (*loop_op_t)(uint8_t *s, LOOP_PARAM);
49 typedef void (*dual_loop_op_t)(uint8_t *s, DUAL_LOOP_PARAM);
50 typedef void (*hbdloop_op_t)(uint16_t *s, LOOP_PARAM, int bd);
51 typedef void (*hbddual_loop_op_t)(uint16_t *s, DUAL_LOOP_PARAM, int bd);
52 
53 typedef ::testing::tuple<hbdloop_op_t, hbdloop_op_t, int> hbdloop_param_t;
54 typedef ::testing::tuple<hbddual_loop_op_t, hbddual_loop_op_t, int>
55     hbddual_loop_param_t;
56 typedef ::testing::tuple<loop_op_t, loop_op_t, int> loop_param_t;
57 typedef ::testing::tuple<dual_loop_op_t, dual_loop_op_t, int> dual_loop_param_t;
58 
59 template <typename Pixel_t, int PIXEL_WIDTH_t>
InitInput(Pixel_t * s,Pixel_t * ref_s,ACMRandom * rnd,const uint8_t limit,const int mask,const int32_t p,const int i)60 void InitInput(Pixel_t *s, Pixel_t *ref_s, ACMRandom *rnd, const uint8_t limit,
61                const int mask, const int32_t p, const int i) {
62   uint16_t tmp_s[kNumCoeffs];
63 
64   for (int j = 0; j < kNumCoeffs;) {
65     const uint8_t val = rnd->Rand8();
66     if (val & 0x80) {  // 50% chance to choose a new value.
67       tmp_s[j] = rnd->Rand16();
68       j++;
69     } else {  // 50% chance to repeat previous value in row X times.
70       int k = 0;
71       while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
72         if (j < 1) {
73           tmp_s[j] = rnd->Rand16();
74         } else if (val & 0x20) {  // Increment by a value within the limit.
75           tmp_s[j] = tmp_s[j - 1] + (limit - 1);
76         } else {  // Decrement by a value within the limit.
77           tmp_s[j] = tmp_s[j - 1] - (limit - 1);
78         }
79         j++;
80       }
81     }
82   }
83 
84   for (int j = 0; j < kNumCoeffs;) {
85     const uint8_t val = rnd->Rand8();
86     if (val & 0x80) {
87       j++;
88     } else {  // 50% chance to repeat previous value in column X times.
89       int k = 0;
90       while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
91         if (j < 1) {
92           tmp_s[j] = rnd->Rand16();
93         } else if (val & 0x20) {  // Increment by a value within the limit.
94           tmp_s[(j % 32) * 32 + j / 32] =
95               tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] + (limit - 1);
96         } else {  // Decrement by a value within the limit.
97           tmp_s[(j % 32) * 32 + j / 32] =
98               tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] - (limit - 1);
99         }
100         j++;
101       }
102     }
103   }
104 
105   for (int j = 0; j < kNumCoeffs; j++) {
106     if (i % 2) {
107       s[j] = tmp_s[j] & mask;
108     } else {
109       s[j] = tmp_s[p * (j % p) + j / p] & mask;
110     }
111     ref_s[j] = s[j];
112   }
113 }
114 
GetOuterThresh(ACMRandom * rnd)115 uint8_t GetOuterThresh(ACMRandom *rnd) {
116   return static_cast<uint8_t>(rnd->PseudoUniform(3 * MAX_LOOP_FILTER + 5));
117 }
118 
GetInnerThresh(ACMRandom * rnd)119 uint8_t GetInnerThresh(ACMRandom *rnd) {
120   return static_cast<uint8_t>(rnd->PseudoUniform(MAX_LOOP_FILTER + 1));
121 }
122 
GetHevThresh(ACMRandom * rnd)123 uint8_t GetHevThresh(ACMRandom *rnd) {
124   return static_cast<uint8_t>(rnd->PseudoUniform(MAX_LOOP_FILTER + 1) >> 4);
125 }
126 
127 template <typename func_type_t, typename params_t>
128 class LoopTestParam : public ::testing::TestWithParam<params_t> {
129  public:
~LoopTestParam()130   virtual ~LoopTestParam() {}
SetUp()131   virtual void SetUp() {
132     loopfilter_op_ = ::testing::get<0>(this->GetParam());
133     ref_loopfilter_op_ = ::testing::get<1>(this->GetParam());
134     bit_depth_ = ::testing::get<2>(this->GetParam());
135     mask_ = (1 << bit_depth_) - 1;
136   }
137 
TearDown()138   virtual void TearDown() { libaom_test::ClearSystemState(); }
139 
140  protected:
141   int bit_depth_;
142   int mask_;
143   func_type_t loopfilter_op_;
144   func_type_t ref_loopfilter_op_;
145 };
146 
call_filter(uint16_t * s,LOOP_PARAM,int bd,hbdloop_op_t op)147 void call_filter(uint16_t *s, LOOP_PARAM, int bd, hbdloop_op_t op) {
148   op(s, p, blimit, limit, thresh, bd);
149 }
call_filter(uint8_t * s,LOOP_PARAM,int bd,loop_op_t op)150 void call_filter(uint8_t *s, LOOP_PARAM, int bd, loop_op_t op) {
151   (void)bd;
152   op(s, p, blimit, limit, thresh);
153 }
call_dualfilter(uint16_t * s,DUAL_LOOP_PARAM,int bd,hbddual_loop_op_t op)154 void call_dualfilter(uint16_t *s, DUAL_LOOP_PARAM, int bd,
155                      hbddual_loop_op_t op) {
156   op(s, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bd);
157 }
call_dualfilter(uint8_t * s,DUAL_LOOP_PARAM,int bd,dual_loop_op_t op)158 void call_dualfilter(uint8_t *s, DUAL_LOOP_PARAM, int bd, dual_loop_op_t op) {
159   (void)bd;
160   op(s, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
161 };
162 
163 typedef LoopTestParam<hbdloop_op_t, hbdloop_param_t> Loop8Test6Param_hbd;
164 typedef LoopTestParam<loop_op_t, loop_param_t> Loop8Test6Param_lbd;
165 typedef LoopTestParam<hbddual_loop_op_t, hbddual_loop_param_t>
166     Loop8Test9Param_hbd;
167 typedef LoopTestParam<dual_loop_op_t, dual_loop_param_t> Loop8Test9Param_lbd;
168 
169 #define OPCHECK(a, b)                                                          \
170   ACMRandom rnd(ACMRandom::DeterministicSeed());                               \
171   const int count_test_block = number_of_iterations;                           \
172   const int32_t p = kNumCoeffs / 32;                                           \
173   DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                        \
174   DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]);                                    \
175   int err_count_total = 0;                                                     \
176   int first_failure = -1;                                                      \
177   for (int i = 0; i < count_test_block; ++i) {                                 \
178     int err_count = 0;                                                         \
179     uint8_t tmp = GetOuterThresh(&rnd);                                        \
180     DECLARE_ALIGNED(16, const uint8_t,                                         \
181                     blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
182                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
183     tmp = GetInnerThresh(&rnd);                                                \
184     DECLARE_ALIGNED(16, const uint8_t,                                         \
185                     limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,     \
186                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };   \
187     tmp = GetHevThresh(&rnd);                                                  \
188     DECLARE_ALIGNED(16, const uint8_t,                                         \
189                     thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
190                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
191     InitInput<a, b>(s, ref_s, &rnd, *limit, mask_, p, i);                      \
192     call_filter(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_,       \
193                 ref_loopfilter_op_);                                           \
194     ASM_REGISTER_STATE_CHECK(call_filter(s + 8 + p * 8, p, blimit, limit,      \
195                                          thresh, bit_depth_, loopfilter_op_)); \
196     for (int j = 0; j < kNumCoeffs; ++j) {                                     \
197       err_count += ref_s[j] != s[j];                                           \
198     }                                                                          \
199     if (err_count && !err_count_total) {                                       \
200       first_failure = i;                                                       \
201     }                                                                          \
202     err_count_total += err_count;                                              \
203   }                                                                            \
204   EXPECT_EQ(0, err_count_total)                                                \
205       << "Error: Loop8Test6Param, C output doesn't match SIMD "                \
206          "loopfilter output. "                                                 \
207       << "First failed at test case " << first_failure;
208 
TEST_P(Loop8Test6Param_hbd,OperationCheck)209 TEST_P(Loop8Test6Param_hbd, OperationCheck) { OPCHECK(uint16_t, 16); }
TEST_P(Loop8Test6Param_lbd,OperationCheck)210 TEST_P(Loop8Test6Param_lbd, OperationCheck) { OPCHECK(uint8_t, 8); }
211 
212 #define VALCHECK(a, b)                                                         \
213   ACMRandom rnd(ACMRandom::DeterministicSeed());                               \
214   const int count_test_block = number_of_iterations;                           \
215   DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                        \
216   DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]);                                    \
217   int err_count_total = 0;                                                     \
218   int first_failure = -1;                                                      \
219   for (int i = 0; i < count_test_block; ++i) {                                 \
220     int err_count = 0;                                                         \
221     uint8_t tmp = GetOuterThresh(&rnd);                                        \
222     DECLARE_ALIGNED(16, const uint8_t,                                         \
223                     blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
224                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
225     tmp = GetInnerThresh(&rnd);                                                \
226     DECLARE_ALIGNED(16, const uint8_t,                                         \
227                     limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,     \
228                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };   \
229     tmp = GetHevThresh(&rnd);                                                  \
230     DECLARE_ALIGNED(16, const uint8_t,                                         \
231                     thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
232                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
233     int32_t p = kNumCoeffs / 32;                                               \
234     for (int j = 0; j < kNumCoeffs; ++j) {                                     \
235       s[j] = rnd.Rand16() & mask_;                                             \
236       ref_s[j] = s[j];                                                         \
237     }                                                                          \
238     call_filter(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_,       \
239                 ref_loopfilter_op_);                                           \
240     ASM_REGISTER_STATE_CHECK(call_filter(s + 8 + p * 8, p, blimit, limit,      \
241                                          thresh, bit_depth_, loopfilter_op_)); \
242     for (int j = 0; j < kNumCoeffs; ++j) {                                     \
243       err_count += ref_s[j] != s[j];                                           \
244     }                                                                          \
245     if (err_count && !err_count_total) {                                       \
246       first_failure = i;                                                       \
247     }                                                                          \
248     err_count_total += err_count;                                              \
249   }                                                                            \
250   EXPECT_EQ(0, err_count_total)                                                \
251       << "Error: Loop8Test6Param, C output doesn't match SIMD "                \
252          "loopfilter output. "                                                 \
253       << "First failed at test case " << first_failure;
254 
TEST_P(Loop8Test6Param_hbd,ValueCheck)255 TEST_P(Loop8Test6Param_hbd, ValueCheck) { VALCHECK(uint16_t, 16); }
TEST_P(Loop8Test6Param_lbd,ValueCheck)256 TEST_P(Loop8Test6Param_lbd, ValueCheck) { VALCHECK(uint8_t, 8); }
257 
258 #define SPEEDCHECK(a, b)                                                      \
259   ACMRandom rnd(ACMRandom::DeterministicSeed());                              \
260   const int count_test_block = kSpeedTestNum;                                 \
261   const int32_t bd = bit_depth_;                                              \
262   DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                       \
263   uint8_t tmp = GetOuterThresh(&rnd);                                         \
264   DECLARE_ALIGNED(16, const uint8_t,                                          \
265                   blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,     \
266                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };   \
267   tmp = GetInnerThresh(&rnd);                                                 \
268   DECLARE_ALIGNED(16, const uint8_t,                                          \
269                   limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,      \
270                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };    \
271   tmp = GetHevThresh(&rnd);                                                   \
272   DECLARE_ALIGNED(16, const uint8_t,                                          \
273                   thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,     \
274                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };   \
275   int32_t p = kNumCoeffs / 32;                                                \
276   for (int j = 0; j < kNumCoeffs; ++j) {                                      \
277     s[j] = rnd.Rand16() & mask_;                                              \
278   }                                                                           \
279   for (int i = 0; i < count_test_block; ++i) {                                \
280     call_filter(s + 8 + p * 8, p, blimit, limit, thresh, bd, loopfilter_op_); \
281   }
282 
TEST_P(Loop8Test6Param_hbd,DISABLED_Speed)283 TEST_P(Loop8Test6Param_hbd, DISABLED_Speed) { SPEEDCHECK(uint16_t, 16); }
TEST_P(Loop8Test6Param_lbd,DISABLED_Speed)284 TEST_P(Loop8Test6Param_lbd, DISABLED_Speed) { SPEEDCHECK(uint8_t, 8); }
285 
286 #define OPCHECKd(a, b)                                                         \
287   ACMRandom rnd(ACMRandom::DeterministicSeed());                               \
288   const int count_test_block = number_of_iterations;                           \
289   DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                        \
290   DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]);                                    \
291   int err_count_total = 0;                                                     \
292   int first_failure = -1;                                                      \
293   for (int i = 0; i < count_test_block; ++i) {                                 \
294     int err_count = 0;                                                         \
295     uint8_t tmp = GetOuterThresh(&rnd);                                        \
296     DECLARE_ALIGNED(16, const uint8_t,                                         \
297                     blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
298                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
299     tmp = GetInnerThresh(&rnd);                                                \
300     DECLARE_ALIGNED(16, const uint8_t,                                         \
301                     limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
302                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
303     tmp = GetHevThresh(&rnd);                                                  \
304     DECLARE_ALIGNED(16, const uint8_t,                                         \
305                     thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
306                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
307     tmp = GetOuterThresh(&rnd);                                                \
308     DECLARE_ALIGNED(16, const uint8_t,                                         \
309                     blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
310                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
311     tmp = GetInnerThresh(&rnd);                                                \
312     DECLARE_ALIGNED(16, const uint8_t,                                         \
313                     limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
314                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
315     tmp = GetHevThresh(&rnd);                                                  \
316     DECLARE_ALIGNED(16, const uint8_t,                                         \
317                     thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
318                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
319     int32_t p = kNumCoeffs / 32;                                               \
320     const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1;               \
321     InitInput<a, b>(s, ref_s, &rnd, limit, mask_, p, i);                       \
322     call_dualfilter(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,   \
323                     limit1, thresh1, bit_depth_, ref_loopfilter_op_);          \
324     ASM_REGISTER_STATE_CHECK(                                                  \
325         call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,   \
326                         limit1, thresh1, bit_depth_, loopfilter_op_));         \
327     for (int j = 0; j < kNumCoeffs; ++j) {                                     \
328       err_count += ref_s[j] != s[j];                                           \
329     }                                                                          \
330     if (err_count && !err_count_total) {                                       \
331       first_failure = i;                                                       \
332     }                                                                          \
333     err_count_total += err_count;                                              \
334   }                                                                            \
335   EXPECT_EQ(0, err_count_total)                                                \
336       << "Error: Loop8Test9Param, C output doesn't match SIMD "                \
337          "loopfilter output. "                                                 \
338       << "First failed at test case " << first_failure;
339 
TEST_P(Loop8Test9Param_hbd,OperationCheck)340 TEST_P(Loop8Test9Param_hbd, OperationCheck) { OPCHECKd(uint16_t, 16); }
TEST_P(Loop8Test9Param_lbd,OperationCheck)341 TEST_P(Loop8Test9Param_lbd, OperationCheck) { OPCHECKd(uint8_t, 8); }
342 
343 #define VALCHECKd(a, b)                                                        \
344   ACMRandom rnd(ACMRandom::DeterministicSeed());                               \
345   const int count_test_block = number_of_iterations;                           \
346   DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                        \
347   DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]);                                    \
348   int err_count_total = 0;                                                     \
349   int first_failure = -1;                                                      \
350   for (int i = 0; i < count_test_block; ++i) {                                 \
351     int err_count = 0;                                                         \
352     uint8_t tmp = GetOuterThresh(&rnd);                                        \
353     DECLARE_ALIGNED(16, const uint8_t,                                         \
354                     blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
355                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
356     tmp = GetInnerThresh(&rnd);                                                \
357     DECLARE_ALIGNED(16, const uint8_t,                                         \
358                     limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
359                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
360     tmp = GetHevThresh(&rnd);                                                  \
361     DECLARE_ALIGNED(16, const uint8_t,                                         \
362                     thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
363                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
364     tmp = GetOuterThresh(&rnd);                                                \
365     DECLARE_ALIGNED(16, const uint8_t,                                         \
366                     blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
367                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
368     tmp = GetInnerThresh(&rnd);                                                \
369     DECLARE_ALIGNED(16, const uint8_t,                                         \
370                     limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
371                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
372     tmp = GetHevThresh(&rnd);                                                  \
373     DECLARE_ALIGNED(16, const uint8_t,                                         \
374                     thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
375                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
376     int32_t p = kNumCoeffs / 32;                                               \
377     for (int j = 0; j < kNumCoeffs; ++j) {                                     \
378       s[j] = rnd.Rand16() & mask_;                                             \
379       ref_s[j] = s[j];                                                         \
380     }                                                                          \
381     call_dualfilter(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,   \
382                     limit1, thresh1, bit_depth_, ref_loopfilter_op_);          \
383     ASM_REGISTER_STATE_CHECK(                                                  \
384         call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,   \
385                         limit1, thresh1, bit_depth_, loopfilter_op_));         \
386     for (int j = 0; j < kNumCoeffs; ++j) {                                     \
387       err_count += ref_s[j] != s[j];                                           \
388     }                                                                          \
389     if (err_count && !err_count_total) {                                       \
390       first_failure = i;                                                       \
391     }                                                                          \
392     err_count_total += err_count;                                              \
393   }                                                                            \
394   EXPECT_EQ(0, err_count_total)                                                \
395       << "Error: Loop8Test9Param, C output doesn't match SIMD "                \
396          "loopfilter output. "                                                 \
397       << "First failed at test case " << first_failure;
398 
TEST_P(Loop8Test9Param_hbd,ValueCheck)399 TEST_P(Loop8Test9Param_hbd, ValueCheck) { VALCHECKd(uint16_t, 16); }
TEST_P(Loop8Test9Param_lbd,ValueCheck)400 TEST_P(Loop8Test9Param_lbd, ValueCheck) { VALCHECKd(uint8_t, 8); }
401 
402 #define SPEEDCHECKd(a, b)                                                    \
403   ACMRandom rnd(ACMRandom::DeterministicSeed());                             \
404   const int count_test_block = kSpeedTestNum;                                \
405   DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                      \
406   uint8_t tmp = GetOuterThresh(&rnd);                                        \
407   DECLARE_ALIGNED(16, const uint8_t,                                         \
408                   blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
409                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
410   tmp = GetInnerThresh(&rnd);                                                \
411   DECLARE_ALIGNED(16, const uint8_t,                                         \
412                   limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
413                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
414   tmp = GetHevThresh(&rnd);                                                  \
415   DECLARE_ALIGNED(16, const uint8_t,                                         \
416                   thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
417                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
418   tmp = GetOuterThresh(&rnd);                                                \
419   DECLARE_ALIGNED(16, const uint8_t,                                         \
420                   blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
421                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
422   tmp = GetInnerThresh(&rnd);                                                \
423   DECLARE_ALIGNED(16, const uint8_t,                                         \
424                   limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
425                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
426   tmp = GetHevThresh(&rnd);                                                  \
427   DECLARE_ALIGNED(16, const uint8_t,                                         \
428                   thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
429                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
430   int32_t p = kNumCoeffs / 32;                                               \
431   for (int j = 0; j < kNumCoeffs; ++j) {                                     \
432     s[j] = rnd.Rand16() & mask_;                                             \
433   }                                                                          \
434   for (int i = 0; i < count_test_block; ++i) {                               \
435     call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,     \
436                     limit1, thresh1, bit_depth_, loopfilter_op_);            \
437   }
438 
TEST_P(Loop8Test9Param_hbd,DISABLED_Speed)439 TEST_P(Loop8Test9Param_hbd, DISABLED_Speed) { SPEEDCHECKd(uint16_t, 16); }
TEST_P(Loop8Test9Param_lbd,DISABLED_Speed)440 TEST_P(Loop8Test9Param_lbd, DISABLED_Speed) { SPEEDCHECKd(uint8_t, 8); }
441 
442 using ::testing::make_tuple;
443 
444 #if HAVE_SSE2
445 
446 const hbdloop_param_t kHbdLoop8Test6[] = {
447   make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
448              8),
449   make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 8),
450   make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c,
451              8),
452   make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
453              8),
454   make_tuple(&aom_highbd_lpf_horizontal_14_sse2,
455              &aom_highbd_lpf_horizontal_14_c, 8),
456   make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 8),
457   make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 8),
458 
459   make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c,
460              8),
461   make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
462              10),
463   make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 10),
464   make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c,
465              10),
466   make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
467              10),
468   make_tuple(&aom_highbd_lpf_horizontal_14_sse2,
469              &aom_highbd_lpf_horizontal_14_c, 10),
470   make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 10),
471   make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 10),
472   make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c,
473              10),
474   make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
475              12),
476   make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 12),
477   make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c,
478              12),
479   make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
480              12),
481   make_tuple(&aom_highbd_lpf_horizontal_14_sse2,
482              &aom_highbd_lpf_horizontal_14_c, 12),
483   make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c,
484              12),
485   make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 12),
486   make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 12)
487 };
488 
489 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param_hbd,
490                         ::testing::ValuesIn(kHbdLoop8Test6));
491 
492 const loop_param_t kLoop8Test6[] = {
493   make_tuple(&aom_lpf_horizontal_4_sse2, &aom_lpf_horizontal_4_c, 8),
494   make_tuple(&aom_lpf_horizontal_8_sse2, &aom_lpf_horizontal_8_c, 8),
495   make_tuple(&aom_lpf_horizontal_6_sse2, &aom_lpf_horizontal_6_c, 8),
496   make_tuple(&aom_lpf_vertical_6_sse2, &aom_lpf_vertical_6_c, 8),
497   make_tuple(&aom_lpf_horizontal_14_sse2, &aom_lpf_horizontal_14_c, 8),
498   make_tuple(&aom_lpf_vertical_4_sse2, &aom_lpf_vertical_4_c, 8),
499   make_tuple(&aom_lpf_vertical_8_sse2, &aom_lpf_vertical_8_c, 8),
500   make_tuple(&aom_lpf_vertical_14_sse2, &aom_lpf_vertical_14_c, 8),
501 };
502 
503 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param_lbd,
504                         ::testing::ValuesIn(kLoop8Test6));
505 
506 const dual_loop_param_t kLoop8Test9[] = {
507   make_tuple(&aom_lpf_horizontal_4_dual_sse2, &aom_lpf_horizontal_4_dual_c, 8),
508   make_tuple(&aom_lpf_vertical_4_dual_sse2, &aom_lpf_vertical_4_dual_c, 8),
509   make_tuple(&aom_lpf_horizontal_6_dual_sse2, &aom_lpf_horizontal_6_dual_c, 8),
510   make_tuple(&aom_lpf_vertical_6_dual_sse2, &aom_lpf_vertical_6_dual_c, 8),
511   make_tuple(&aom_lpf_horizontal_8_dual_sse2, &aom_lpf_horizontal_8_dual_c, 8),
512   make_tuple(&aom_lpf_vertical_8_dual_sse2, &aom_lpf_vertical_8_dual_c, 8),
513   make_tuple(&aom_lpf_horizontal_14_dual_sse2, &aom_lpf_horizontal_14_dual_c,
514              8),
515   make_tuple(&aom_lpf_vertical_14_dual_sse2, &aom_lpf_vertical_14_dual_c, 8)
516 };
517 
518 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param_lbd,
519                         ::testing::ValuesIn(kLoop8Test9));
520 
521 #endif  // HAVE_SSE2
522 
523 #if HAVE_SSE2
524 const hbddual_loop_param_t kHbdLoop8Test9[] = {
525   make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
526              &aom_highbd_lpf_horizontal_4_dual_c, 8),
527   make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2,
528              &aom_highbd_lpf_horizontal_6_dual_c, 8),
529   make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
530              &aom_highbd_lpf_horizontal_8_dual_c, 8),
531   make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2,
532              &aom_highbd_lpf_horizontal_14_dual_c, 8),
533   make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
534              &aom_highbd_lpf_vertical_4_dual_c, 8),
535   make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2,
536              &aom_highbd_lpf_vertical_6_dual_c, 8),
537   make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
538              &aom_highbd_lpf_vertical_8_dual_c, 8),
539   make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2,
540              &aom_highbd_lpf_vertical_14_dual_c, 8),
541   make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
542              &aom_highbd_lpf_horizontal_4_dual_c, 10),
543   make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2,
544              &aom_highbd_lpf_horizontal_6_dual_c, 10),
545   make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
546              &aom_highbd_lpf_horizontal_8_dual_c, 10),
547   make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2,
548              &aom_highbd_lpf_horizontal_14_dual_c, 10),
549   make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
550              &aom_highbd_lpf_vertical_4_dual_c, 10),
551   make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2,
552              &aom_highbd_lpf_vertical_6_dual_c, 10),
553   make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
554              &aom_highbd_lpf_vertical_8_dual_c, 10),
555   make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2,
556              &aom_highbd_lpf_vertical_14_dual_c, 10),
557   make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
558              &aom_highbd_lpf_horizontal_4_dual_c, 12),
559   make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2,
560              &aom_highbd_lpf_horizontal_6_dual_c, 12),
561   make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
562              &aom_highbd_lpf_horizontal_8_dual_c, 12),
563   make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2,
564              &aom_highbd_lpf_horizontal_14_dual_c, 12),
565   make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
566              &aom_highbd_lpf_vertical_4_dual_c, 12),
567   make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2,
568              &aom_highbd_lpf_vertical_6_dual_c, 12),
569   make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
570              &aom_highbd_lpf_vertical_8_dual_c, 12),
571   make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2,
572              &aom_highbd_lpf_vertical_14_dual_c, 12),
573 };
574 
575 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param_hbd,
576                         ::testing::ValuesIn(kHbdLoop8Test9));
577 
578 #endif  // HAVE_SSE2
579 
580 #if HAVE_NEON
581 const loop_param_t kLoop8Test6[] = {
582   make_tuple(&aom_lpf_vertical_14_neon, &aom_lpf_vertical_14_c, 8),
583   make_tuple(&aom_lpf_vertical_8_neon, &aom_lpf_vertical_8_c, 8),
584   make_tuple(&aom_lpf_vertical_6_neon, &aom_lpf_vertical_6_c, 8),
585   make_tuple(&aom_lpf_vertical_4_neon, &aom_lpf_vertical_4_c, 8),
586   make_tuple(&aom_lpf_horizontal_14_neon, &aom_lpf_horizontal_14_c, 8),
587   make_tuple(&aom_lpf_horizontal_8_neon, &aom_lpf_horizontal_8_c, 8),
588   make_tuple(&aom_lpf_horizontal_6_neon, &aom_lpf_horizontal_6_c, 8),
589   make_tuple(&aom_lpf_horizontal_4_neon, &aom_lpf_horizontal_4_c, 8)
590 };
591 
592 INSTANTIATE_TEST_CASE_P(NEON, Loop8Test6Param_lbd,
593                         ::testing::ValuesIn(kLoop8Test6));
594 #endif  // HAVE_NEON
595 
596 #if HAVE_AVX2
597 const hbddual_loop_param_t kHbdLoop8Test9Avx2[] = {
598   make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
599              &aom_highbd_lpf_horizontal_4_dual_c, 8),
600   make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
601              &aom_highbd_lpf_horizontal_4_dual_c, 10),
602   make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
603              &aom_highbd_lpf_horizontal_4_dual_c, 12),
604   make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
605              &aom_highbd_lpf_horizontal_8_dual_c, 8),
606   make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
607              &aom_highbd_lpf_horizontal_8_dual_c, 10),
608   make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
609              &aom_highbd_lpf_horizontal_8_dual_c, 12),
610   make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
611              &aom_highbd_lpf_vertical_4_dual_c, 8),
612   make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
613              &aom_highbd_lpf_vertical_4_dual_c, 10),
614   make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
615              &aom_highbd_lpf_vertical_4_dual_c, 12),
616   make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
617              &aom_highbd_lpf_vertical_8_dual_c, 8),
618   make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
619              &aom_highbd_lpf_vertical_8_dual_c, 10),
620   make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
621              &aom_highbd_lpf_vertical_8_dual_c, 12),
622 };
623 
624 INSTANTIATE_TEST_CASE_P(AVX2, Loop8Test9Param_hbd,
625                         ::testing::ValuesIn(kHbdLoop8Test9Avx2));
626 #endif
627 }  // namespace
628