1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <cmath>
13 #include <cstdlib>
14 #include <string>
15
16 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
17
18 #include "./aom_config.h"
19 #include "./aom_dsp_rtcd.h"
20 #include "test/acm_random.h"
21 #include "test/clear_system_state.h"
22 #include "test/register_state_check.h"
23 #include "test/util.h"
24 #include "av1/common/av1_loopfilter.h"
25 #include "av1/common/entropy.h"
26 #include "aom/aom_integer.h"
27
28 using libaom_test::ACMRandom;
29
30 namespace {
31 // Horizontally and Vertically need 32x32: 8 Coeffs preceeding filtered section
32 // 16 Coefs within filtered section
33 // 8 Coeffs following filtered section
34 const int kNumCoeffs = 1024;
35
36 const int number_of_iterations = 10000;
37
38 const int kSpeedTestNum = 500000;
39
40 #if CONFIG_HIGHBITDEPTH
41 typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit,
42 const uint8_t *limit, const uint8_t *thresh, int bd);
43 typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0,
44 const uint8_t *limit0, const uint8_t *thresh0,
45 const uint8_t *blimit1, const uint8_t *limit1,
46 const uint8_t *thresh1, int bd);
47 #else
48 typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit,
49 const uint8_t *limit, const uint8_t *thresh);
50 typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
51 const uint8_t *limit0, const uint8_t *thresh0,
52 const uint8_t *blimit1, const uint8_t *limit1,
53 const uint8_t *thresh1);
54 #endif // CONFIG_HIGHBITDEPTH
55
56 typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
57 typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
58
59 class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
60 public:
~Loop8Test6Param()61 virtual ~Loop8Test6Param() {}
SetUp()62 virtual void SetUp() {
63 loopfilter_op_ = GET_PARAM(0);
64 ref_loopfilter_op_ = GET_PARAM(1);
65 bit_depth_ = GET_PARAM(2);
66 mask_ = (1 << bit_depth_) - 1;
67 }
68
TearDown()69 virtual void TearDown() { libaom_test::ClearSystemState(); }
70
71 protected:
72 int bit_depth_;
73 int mask_;
74 loop_op_t loopfilter_op_;
75 loop_op_t ref_loopfilter_op_;
76 };
77
78 class Loop8Test9Param : public ::testing::TestWithParam<dualloop8_param_t> {
79 public:
~Loop8Test9Param()80 virtual ~Loop8Test9Param() {}
SetUp()81 virtual void SetUp() {
82 loopfilter_op_ = GET_PARAM(0);
83 ref_loopfilter_op_ = GET_PARAM(1);
84 bit_depth_ = GET_PARAM(2);
85 mask_ = (1 << bit_depth_) - 1;
86 }
87
TearDown()88 virtual void TearDown() { libaom_test::ClearSystemState(); }
89
90 protected:
91 int bit_depth_;
92 int mask_;
93 dual_loop_op_t loopfilter_op_;
94 dual_loop_op_t ref_loopfilter_op_;
95 };
96
TEST_P(Loop8Test6Param,OperationCheck)97 TEST_P(Loop8Test6Param, OperationCheck) {
98 ACMRandom rnd(ACMRandom::DeterministicSeed());
99 const int count_test_block = number_of_iterations;
100 #if CONFIG_HIGHBITDEPTH
101 int32_t bd = bit_depth_;
102 DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
103 DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
104 #else
105 DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
106 DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
107 #endif // CONFIG_HIGHBITDEPTH
108 int err_count_total = 0;
109 int first_failure = -1;
110 for (int i = 0; i < count_test_block; ++i) {
111 int err_count = 0;
112 uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
113 DECLARE_ALIGNED(16, const uint8_t,
114 blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
115 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
116 tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
117 DECLARE_ALIGNED(16, const uint8_t,
118 limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
119 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
120 tmp = rnd.Rand8();
121 DECLARE_ALIGNED(16, const uint8_t,
122 thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
123 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
124 int32_t p = kNumCoeffs / 32;
125
126 uint16_t tmp_s[kNumCoeffs];
127 int j = 0;
128 while (j < kNumCoeffs) {
129 uint8_t val = rnd.Rand8();
130 if (val & 0x80) { // 50% chance to choose a new value.
131 tmp_s[j] = rnd.Rand16();
132 j++;
133 } else { // 50% chance to repeat previous value in row X times
134 int k = 0;
135 while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
136 if (j < 1) {
137 tmp_s[j] = rnd.Rand16();
138 } else if (val & 0x20) { // Increment by an value within the limit
139 tmp_s[j] = (tmp_s[j - 1] + (*limit - 1));
140 } else { // Decrement by an value within the limit
141 tmp_s[j] = (tmp_s[j - 1] - (*limit - 1));
142 }
143 j++;
144 }
145 }
146 }
147 for (j = 0; j < kNumCoeffs; j++) {
148 if (i % 2) {
149 s[j] = tmp_s[j] & mask_;
150 } else {
151 s[j] = tmp_s[p * (j % p) + j / p] & mask_;
152 }
153 ref_s[j] = s[j];
154 }
155 #if CONFIG_HIGHBITDEPTH
156 ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
157 ASM_REGISTER_STATE_CHECK(
158 loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
159 #else
160 ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh);
161 ASM_REGISTER_STATE_CHECK(
162 loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
163 #endif // CONFIG_HIGHBITDEPTH
164
165 for (j = 0; j < kNumCoeffs; ++j) {
166 err_count += ref_s[j] != s[j];
167 }
168 if (err_count && !err_count_total) {
169 first_failure = i;
170 }
171 err_count_total += err_count;
172 }
173 EXPECT_EQ(0, err_count_total)
174 << "Error: Loop8Test6Param, C output doesn't match SSE2 "
175 "loopfilter output. "
176 << "First failed at test case " << first_failure;
177 }
178
TEST_P(Loop8Test6Param,ValueCheck)179 TEST_P(Loop8Test6Param, ValueCheck) {
180 ACMRandom rnd(ACMRandom::DeterministicSeed());
181 const int count_test_block = number_of_iterations;
182 #if CONFIG_HIGHBITDEPTH
183 const int32_t bd = bit_depth_;
184 DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
185 DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
186 #else
187 DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
188 DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
189 #endif // CONFIG_HIGHBITDEPTH
190 int err_count_total = 0;
191 int first_failure = -1;
192
193 // NOTE: The code in av1_loopfilter.c:update_sharpness computes mblim as a
194 // function of sharpness_lvl and the loopfilter lvl as:
195 // block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
196 // ...
197 // memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
198 // SIMD_WIDTH);
199 // This means that the largest value for mblim will occur when sharpness_lvl
200 // is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER).
201 // In this case block_inside_limit will be equal to MAX_LOOP_FILTER and
202 // therefore mblim will be equal to (2 * (lvl + 2) + block_inside_limit) =
203 // 2 * (MAX_LOOP_FILTER + 2) + MAX_LOOP_FILTER = 3 * MAX_LOOP_FILTER + 4
204
205 for (int i = 0; i < count_test_block; ++i) {
206 int err_count = 0;
207 uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
208 DECLARE_ALIGNED(16, const uint8_t,
209 blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
210 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
211 tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
212 DECLARE_ALIGNED(16, const uint8_t,
213 limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
214 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
215 tmp = rnd.Rand8();
216 DECLARE_ALIGNED(16, const uint8_t,
217 thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
218 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
219 int32_t p = kNumCoeffs / 32;
220 for (int j = 0; j < kNumCoeffs; ++j) {
221 s[j] = rnd.Rand16() & mask_;
222 ref_s[j] = s[j];
223 }
224 #if CONFIG_HIGHBITDEPTH
225 ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
226 ASM_REGISTER_STATE_CHECK(
227 loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
228 #else
229 ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh);
230 ASM_REGISTER_STATE_CHECK(
231 loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
232 #endif // CONFIG_HIGHBITDEPTH
233 for (int j = 0; j < kNumCoeffs; ++j) {
234 err_count += ref_s[j] != s[j];
235 }
236 if (err_count && !err_count_total) {
237 first_failure = i;
238 }
239 err_count_total += err_count;
240 }
241 EXPECT_EQ(0, err_count_total)
242 << "Error: Loop8Test6Param, C output doesn't match SSE2 "
243 "loopfilter output. "
244 << "First failed at test case " << first_failure;
245 }
246
TEST_P(Loop8Test6Param,DISABLED_Speed)247 TEST_P(Loop8Test6Param, DISABLED_Speed) {
248 ACMRandom rnd(ACMRandom::DeterministicSeed());
249 const int count_test_block = kSpeedTestNum;
250 #if CONFIG_HIGHBITDEPTH
251 const int32_t bd = bit_depth_;
252 DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
253 #else
254 DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
255 #endif // CONFIG_HIGHBITDEPTH
256
257 uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
258 DECLARE_ALIGNED(16, const uint8_t,
259 blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
260 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
261 tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
262 DECLARE_ALIGNED(16, const uint8_t,
263 limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
264 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
265 tmp = rnd.Rand8();
266 DECLARE_ALIGNED(16, const uint8_t,
267 thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
268 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
269
270 int32_t p = kNumCoeffs / 32;
271 for (int j = 0; j < kNumCoeffs; ++j) {
272 s[j] = rnd.Rand16() & mask_;
273 }
274
275 for (int i = 0; i < count_test_block; ++i) {
276 #if CONFIG_HIGHBITDEPTH
277 loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd);
278 #else
279 loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh);
280 #endif // CONFIG_HIGHBITDEPTH
281 }
282 }
283
TEST_P(Loop8Test9Param,OperationCheck)284 TEST_P(Loop8Test9Param, OperationCheck) {
285 ACMRandom rnd(ACMRandom::DeterministicSeed());
286 const int count_test_block = number_of_iterations;
287 #if CONFIG_HIGHBITDEPTH
288 const int32_t bd = bit_depth_;
289 DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
290 DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
291 #else
292 DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
293 DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
294 #endif // CONFIG_HIGHBITDEPTH
295 int err_count_total = 0;
296 int first_failure = -1;
297 for (int i = 0; i < count_test_block; ++i) {
298 int err_count = 0;
299 uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
300 DECLARE_ALIGNED(16, const uint8_t,
301 blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
302 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
303 tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
304 DECLARE_ALIGNED(16, const uint8_t,
305 limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
306 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
307 tmp = rnd.Rand8();
308 DECLARE_ALIGNED(16, const uint8_t,
309 thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
310 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
311 tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
312 DECLARE_ALIGNED(16, const uint8_t,
313 blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
314 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
315 tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
316 DECLARE_ALIGNED(16, const uint8_t,
317 limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
318 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
319 tmp = rnd.Rand8();
320 DECLARE_ALIGNED(16, const uint8_t,
321 thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
322 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
323 int32_t p = kNumCoeffs / 32;
324 uint16_t tmp_s[kNumCoeffs];
325 int j = 0;
326 const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1;
327 while (j < kNumCoeffs) {
328 uint8_t val = rnd.Rand8();
329 if (val & 0x80) { // 50% chance to choose a new value.
330 tmp_s[j] = rnd.Rand16();
331 j++;
332 } else { // 50% chance to repeat previous value in row X times.
333 int k = 0;
334 while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
335 if (j < 1) {
336 tmp_s[j] = rnd.Rand16();
337 } else if (val & 0x20) { // Increment by a value within the limit.
338 tmp_s[j] = (tmp_s[j - 1] + (limit - 1));
339 } else { // Decrement by an value within the limit.
340 tmp_s[j] = (tmp_s[j - 1] - (limit - 1));
341 }
342 j++;
343 }
344 }
345 }
346 for (j = 0; j < kNumCoeffs; j++) {
347 if (i % 2) {
348 s[j] = tmp_s[j] & mask_;
349 } else {
350 s[j] = tmp_s[p * (j % p) + j / p] & mask_;
351 }
352 ref_s[j] = s[j];
353 }
354 #if CONFIG_HIGHBITDEPTH
355 ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
356 limit1, thresh1, bd);
357 ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
358 thresh0, blimit1, limit1, thresh1,
359 bd));
360 #else
361 ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
362 limit1, thresh1);
363 ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
364 thresh0, blimit1, limit1, thresh1));
365 #endif // CONFIG_HIGHBITDEPTH
366 for (j = 0; j < kNumCoeffs; ++j) {
367 err_count += ref_s[j] != s[j];
368 }
369 if (err_count && !err_count_total) {
370 first_failure = i;
371 }
372 err_count_total += err_count;
373 }
374 EXPECT_EQ(0, err_count_total)
375 << "Error: Loop8Test9Param, C output doesn't match SSE2 "
376 "loopfilter output. "
377 << "First failed at test case " << first_failure;
378 }
379
TEST_P(Loop8Test9Param,ValueCheck)380 TEST_P(Loop8Test9Param, ValueCheck) {
381 ACMRandom rnd(ACMRandom::DeterministicSeed());
382 const int count_test_block = number_of_iterations;
383 #if CONFIG_HIGHBITDEPTH
384 DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
385 DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
386 #else
387 DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
388 DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
389 #endif // CONFIG_HIGHBITDEPTH
390 int err_count_total = 0;
391 int first_failure = -1;
392 for (int i = 0; i < count_test_block; ++i) {
393 int err_count = 0;
394 uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
395 DECLARE_ALIGNED(16, const uint8_t,
396 blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
397 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
398 tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
399 DECLARE_ALIGNED(16, const uint8_t,
400 limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
401 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
402 tmp = rnd.Rand8();
403 DECLARE_ALIGNED(16, const uint8_t,
404 thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
405 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
406 tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
407 DECLARE_ALIGNED(16, const uint8_t,
408 blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
409 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
410 tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
411 DECLARE_ALIGNED(16, const uint8_t,
412 limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
413 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
414 tmp = rnd.Rand8();
415 DECLARE_ALIGNED(16, const uint8_t,
416 thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
417 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
418 int32_t p = kNumCoeffs / 32; // TODO(pdlf) can we have non-square here?
419 for (int j = 0; j < kNumCoeffs; ++j) {
420 s[j] = rnd.Rand16() & mask_;
421 ref_s[j] = s[j];
422 }
423 #if CONFIG_HIGHBITDEPTH
424 const int32_t bd = bit_depth_;
425 ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
426 limit1, thresh1, bd);
427 ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
428 thresh0, blimit1, limit1, thresh1,
429 bd));
430 #else
431 ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
432 limit1, thresh1);
433 ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
434 thresh0, blimit1, limit1, thresh1));
435 #endif // CONFIG_HIGHBITDEPTH
436 for (int j = 0; j < kNumCoeffs; ++j) {
437 err_count += ref_s[j] != s[j];
438 }
439 if (err_count && !err_count_total) {
440 first_failure = i;
441 }
442 err_count_total += err_count;
443 }
444 EXPECT_EQ(0, err_count_total)
445 << "Error: Loop8Test9Param, C output doesn't match SSE2"
446 "loopfilter output. "
447 << "First failed at test case " << first_failure;
448 }
449
TEST_P(Loop8Test9Param,DISABLED_Speed)450 TEST_P(Loop8Test9Param, DISABLED_Speed) {
451 ACMRandom rnd(ACMRandom::DeterministicSeed());
452 const int count_test_block = kSpeedTestNum;
453 #if CONFIG_HIGHBITDEPTH
454 DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
455 #else
456 DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
457 #endif // CONFIG_HIGHBITDEPTH
458
459 uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
460 DECLARE_ALIGNED(16, const uint8_t,
461 blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
462 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
463 tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
464 DECLARE_ALIGNED(16, const uint8_t,
465 limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
466 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
467 tmp = rnd.Rand8();
468 DECLARE_ALIGNED(16, const uint8_t,
469 thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
470 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
471 tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
472 DECLARE_ALIGNED(16, const uint8_t,
473 blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
474 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
475 tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
476 DECLARE_ALIGNED(16, const uint8_t,
477 limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
478 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
479 tmp = rnd.Rand8();
480 DECLARE_ALIGNED(16, const uint8_t,
481 thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
482 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
483 int32_t p = kNumCoeffs / 32; // TODO(pdlf) can we have non-square here?
484 for (int j = 0; j < kNumCoeffs; ++j) {
485 s[j] = rnd.Rand16() & mask_;
486 }
487
488 for (int i = 0; i < count_test_block; ++i) {
489 #if CONFIG_HIGHBITDEPTH
490 const int32_t bd = bit_depth_;
491 loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1,
492 thresh1, bd);
493 #else
494 loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1,
495 thresh1);
496 #endif // CONFIG_HIGHBITDEPTH
497 }
498 }
499
500 using std::tr1::make_tuple;
501
502 #if HAVE_SSE2
503 #if CONFIG_HIGHBITDEPTH
504
505 const loop8_param_t kHbdLoop8Test6[] = {
506 make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
507 8),
508 make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 8),
509 make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
510 8),
511 make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2,
512 &aom_highbd_lpf_horizontal_edge_8_c, 8),
513 make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2,
514 &aom_highbd_lpf_horizontal_edge_16_c, 8),
515 make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 8),
516 make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
517 8),
518 make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
519 10),
520 make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 10),
521 make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
522 10),
523 make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2,
524 &aom_highbd_lpf_horizontal_edge_8_c, 10),
525 make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2,
526 &aom_highbd_lpf_horizontal_edge_16_c, 10),
527 make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 10),
528 make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
529 10),
530 make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
531 12),
532 make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 12),
533 make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
534 12),
535 make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2,
536 &aom_highbd_lpf_horizontal_edge_8_c, 12),
537 make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2,
538 &aom_highbd_lpf_horizontal_edge_16_c, 12),
539 make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 12),
540 make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
541 12),
542 make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
543 &aom_highbd_lpf_vertical_16_dual_c, 8),
544 make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
545 &aom_highbd_lpf_vertical_16_dual_c, 10),
546 make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
547 &aom_highbd_lpf_vertical_16_dual_c, 12)
548 };
549
550 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param,
551 ::testing::ValuesIn(kHbdLoop8Test6));
552 #else
553 const loop8_param_t kLoop8Test6[] = {
554 make_tuple(&aom_lpf_horizontal_4_sse2, &aom_lpf_horizontal_4_c, 8),
555 make_tuple(&aom_lpf_horizontal_8_sse2, &aom_lpf_horizontal_8_c, 8),
556 make_tuple(&aom_lpf_horizontal_edge_8_sse2, &aom_lpf_horizontal_edge_8_c, 8),
557 make_tuple(&aom_lpf_horizontal_edge_16_sse2, &aom_lpf_horizontal_edge_16_c,
558 8),
559 make_tuple(&aom_lpf_vertical_4_sse2, &aom_lpf_vertical_4_c, 8),
560 make_tuple(&aom_lpf_vertical_8_sse2, &aom_lpf_vertical_8_c, 8),
561 make_tuple(&aom_lpf_vertical_16_sse2, &aom_lpf_vertical_16_c, 8),
562 #if !CONFIG_PARALLEL_DEBLOCKING
563 make_tuple(&aom_lpf_vertical_16_dual_sse2, &aom_lpf_vertical_16_dual_c, 8)
564 #endif
565 };
566
567 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param,
568 ::testing::ValuesIn(kLoop8Test6));
569 #endif // CONFIG_HIGHBITDEPTH
570 #endif // HAVE_SSE2
571
572 #if HAVE_AVX2
573 #if CONFIG_HIGHBITDEPTH
574
575 const loop8_param_t kHbdLoop8Test6Avx2[] = {
576 make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2,
577 &aom_highbd_lpf_horizontal_edge_16_c, 8),
578 make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2,
579 &aom_highbd_lpf_horizontal_edge_16_c, 10),
580 make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2,
581 &aom_highbd_lpf_horizontal_edge_16_c, 12),
582 make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
583 &aom_highbd_lpf_vertical_16_dual_c, 8),
584 make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
585 &aom_highbd_lpf_vertical_16_dual_c, 10),
586 make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
587 &aom_highbd_lpf_vertical_16_dual_c, 12)
588 };
589
590 INSTANTIATE_TEST_CASE_P(AVX2, Loop8Test6Param,
591 ::testing::ValuesIn(kHbdLoop8Test6Avx2));
592
593 #endif
594 #endif
595
596 #if HAVE_AVX2 && (!CONFIG_HIGHBITDEPTH) && (!CONFIG_PARALLEL_DEBLOCKING)
597 INSTANTIATE_TEST_CASE_P(
598 AVX2, Loop8Test6Param,
599 ::testing::Values(make_tuple(&aom_lpf_horizontal_edge_8_avx2,
600 &aom_lpf_horizontal_edge_8_c, 8),
601 make_tuple(&aom_lpf_horizontal_edge_16_avx2,
602 &aom_lpf_horizontal_edge_16_c, 8)));
603 #endif
604
605 #if HAVE_SSE2
606 #if CONFIG_HIGHBITDEPTH
607 const dualloop8_param_t kHbdLoop8Test9[] = {
608 make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
609 &aom_highbd_lpf_horizontal_4_dual_c, 8),
610 make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
611 &aom_highbd_lpf_horizontal_8_dual_c, 8),
612 make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
613 &aom_highbd_lpf_vertical_4_dual_c, 8),
614 make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
615 &aom_highbd_lpf_vertical_8_dual_c, 8),
616 make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
617 &aom_highbd_lpf_horizontal_4_dual_c, 10),
618 make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
619 &aom_highbd_lpf_horizontal_8_dual_c, 10),
620 make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
621 &aom_highbd_lpf_vertical_4_dual_c, 10),
622 make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
623 &aom_highbd_lpf_vertical_8_dual_c, 10),
624 make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
625 &aom_highbd_lpf_horizontal_4_dual_c, 12),
626 make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
627 &aom_highbd_lpf_horizontal_8_dual_c, 12),
628 make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
629 &aom_highbd_lpf_vertical_4_dual_c, 12),
630 make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
631 &aom_highbd_lpf_vertical_8_dual_c, 12)
632 };
633
634 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param,
635 ::testing::ValuesIn(kHbdLoop8Test9));
636 #else
637 #if !CONFIG_PARALLEL_DEBLOCKING
638 const dualloop8_param_t kLoop8Test9[] = {
639 make_tuple(&aom_lpf_horizontal_4_dual_sse2, &aom_lpf_horizontal_4_dual_c, 8),
640 make_tuple(&aom_lpf_horizontal_8_dual_sse2, &aom_lpf_horizontal_8_dual_c, 8),
641 make_tuple(&aom_lpf_vertical_4_dual_sse2, &aom_lpf_vertical_4_dual_c, 8),
642 make_tuple(&aom_lpf_vertical_8_dual_sse2, &aom_lpf_vertical_8_dual_c, 8)
643 };
644
645 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param,
646 ::testing::ValuesIn(kLoop8Test9));
647 #endif
648 #endif // CONFIG_HIGHBITDEPTH
649 #endif // HAVE_SSE2
650
651 #if HAVE_AVX2
652 #if CONFIG_HIGHBITDEPTH
653 const dualloop8_param_t kHbdLoop8Test9Avx2[] = {
654 make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
655 &aom_highbd_lpf_horizontal_4_dual_c, 8),
656 make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
657 &aom_highbd_lpf_horizontal_4_dual_c, 10),
658 make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
659 &aom_highbd_lpf_horizontal_4_dual_c, 12),
660 make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
661 &aom_highbd_lpf_horizontal_8_dual_c, 8),
662 make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
663 &aom_highbd_lpf_horizontal_8_dual_c, 10),
664 make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
665 &aom_highbd_lpf_horizontal_8_dual_c, 12),
666 make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
667 &aom_highbd_lpf_vertical_4_dual_c, 8),
668 make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
669 &aom_highbd_lpf_vertical_4_dual_c, 10),
670 make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
671 &aom_highbd_lpf_vertical_4_dual_c, 12),
672 make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
673 &aom_highbd_lpf_vertical_8_dual_c, 8),
674 make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
675 &aom_highbd_lpf_vertical_8_dual_c, 10),
676 make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
677 &aom_highbd_lpf_vertical_8_dual_c, 12),
678 };
679
680 INSTANTIATE_TEST_CASE_P(AVX2, Loop8Test9Param,
681 ::testing::ValuesIn(kHbdLoop8Test9Avx2));
682 #endif
683 #endif
684
685 #if HAVE_NEON && (!CONFIG_PARALLEL_DEBLOCKING)
686 #if CONFIG_HIGHBITDEPTH
687 // No neon high bitdepth functions.
688 #else
689 INSTANTIATE_TEST_CASE_P(
690 NEON, Loop8Test6Param,
691 ::testing::Values(
692 #if HAVE_NEON_ASM
693 // Using #if inside the macro is unsupported on MSVS but the tests are
694 // not
695 // currently built for MSVS with ARM and NEON.
696 make_tuple(&aom_lpf_horizontal_edge_8_neon,
697 &aom_lpf_horizontal_edge_8_c, 8),
698 make_tuple(&aom_lpf_horizontal_edge_16_neon,
699 &aom_lpf_horizontal_edge_16_c, 8),
700 make_tuple(&aom_lpf_vertical_16_neon, &aom_lpf_vertical_16_c, 8),
701 make_tuple(&aom_lpf_vertical_16_dual_neon, &aom_lpf_vertical_16_dual_c,
702 8),
703 #endif // HAVE_NEON_ASM
704 make_tuple(&aom_lpf_horizontal_8_neon, &aom_lpf_horizontal_8_c, 8),
705 make_tuple(&aom_lpf_vertical_8_neon, &aom_lpf_vertical_8_c, 8),
706 make_tuple(&aom_lpf_horizontal_4_neon, &aom_lpf_horizontal_4_c, 8),
707 make_tuple(&aom_lpf_vertical_4_neon, &aom_lpf_vertical_4_c, 8)));
708 INSTANTIATE_TEST_CASE_P(NEON, Loop8Test9Param,
709 ::testing::Values(
710 #if HAVE_NEON_ASM
711 make_tuple(&aom_lpf_horizontal_8_dual_neon,
712 &aom_lpf_horizontal_8_dual_c, 8),
713 make_tuple(&aom_lpf_vertical_8_dual_neon,
714 &aom_lpf_vertical_8_dual_c, 8),
715 #endif // HAVE_NEON_ASM
716 make_tuple(&aom_lpf_horizontal_4_dual_neon,
717 &aom_lpf_horizontal_4_dual_c, 8),
718 make_tuple(&aom_lpf_vertical_4_dual_neon,
719 &aom_lpf_vertical_4_dual_c, 8)));
720 #endif // CONFIG_HIGHBITDEPTH
721 #endif // HAVE_NEON && (!CONFIG_PARALLEL_DEBLOCKING)
722
723 #if HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH && (!CONFIG_PARALLEL_DEBLOCKING)
724 INSTANTIATE_TEST_CASE_P(
725 DSPR2, Loop8Test6Param,
726 ::testing::Values(
727 make_tuple(&aom_lpf_horizontal_4_dspr2, &aom_lpf_horizontal_4_c, 8),
728 make_tuple(&aom_lpf_horizontal_8_dspr2, &aom_lpf_horizontal_8_c, 8),
729 make_tuple(&aom_lpf_horizontal_edge_8, &aom_lpf_horizontal_edge_8, 8),
730 make_tuple(&aom_lpf_horizontal_edge_16, &aom_lpf_horizontal_edge_16, 8),
731 make_tuple(&aom_lpf_vertical_4_dspr2, &aom_lpf_vertical_4_c, 8),
732 make_tuple(&aom_lpf_vertical_8_dspr2, &aom_lpf_vertical_8_c, 8),
733 make_tuple(&aom_lpf_vertical_16_dspr2, &aom_lpf_vertical_16_c, 8),
734 make_tuple(&aom_lpf_vertical_16_dual_dspr2, &aom_lpf_vertical_16_dual_c,
735 8)));
736
737 INSTANTIATE_TEST_CASE_P(
738 DSPR2, Loop8Test9Param,
739 ::testing::Values(make_tuple(&aom_lpf_horizontal_4_dual_dspr2,
740 &aom_lpf_horizontal_4_dual_c, 8),
741 make_tuple(&aom_lpf_horizontal_8_dual_dspr2,
742 &aom_lpf_horizontal_8_dual_c, 8),
743 make_tuple(&aom_lpf_vertical_4_dual_dspr2,
744 &aom_lpf_vertical_4_dual_c, 8),
745 make_tuple(&aom_lpf_vertical_8_dual_dspr2,
746 &aom_lpf_vertical_8_dual_c, 8)));
747 #endif // HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH && (!CONFIG_PARALLEL_DEBLOCKING)
748
749 #if HAVE_MSA && (!CONFIG_HIGHBITDEPTH) && (!CONFIG_PARALLEL_DEBLOCKING)
750 INSTANTIATE_TEST_CASE_P(
751 MSA, Loop8Test6Param,
752 ::testing::Values(
753 make_tuple(&aom_lpf_horizontal_4_msa, &aom_lpf_horizontal_4_c, 8),
754 make_tuple(&aom_lpf_horizontal_8_msa, &aom_lpf_horizontal_8_c, 8),
755 make_tuple(&aom_lpf_horizontal_edge_8_msa, &aom_lpf_horizontal_edge_8_c,
756 8),
757 make_tuple(&aom_lpf_horizontal_edge_16_msa,
758 &aom_lpf_horizontal_edge_16_c, 8),
759 make_tuple(&aom_lpf_vertical_4_msa, &aom_lpf_vertical_4_c, 8),
760 make_tuple(&aom_lpf_vertical_8_msa, &aom_lpf_vertical_8_c, 8),
761 make_tuple(&aom_lpf_vertical_16_msa, &aom_lpf_vertical_16_c, 8)));
762
763 INSTANTIATE_TEST_CASE_P(
764 MSA, Loop8Test9Param,
765 ::testing::Values(make_tuple(&aom_lpf_horizontal_4_dual_msa,
766 &aom_lpf_horizontal_4_dual_c, 8),
767 make_tuple(&aom_lpf_horizontal_8_dual_msa,
768 &aom_lpf_horizontal_8_dual_c, 8),
769 make_tuple(&aom_lpf_vertical_4_dual_msa,
770 &aom_lpf_vertical_4_dual_c, 8),
771 make_tuple(&aom_lpf_vertical_8_dual_msa,
772 &aom_lpf_vertical_8_dual_c, 8)));
773 #endif // HAVE_MSA && (!CONFIG_HIGHBITDEPTH) && (!CONFIG_PARALLEL_DEBLOCKING)
774
775 } // namespace
776