1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11 
12 #include <cmath>
13 #include <cstdlib>
14 #include <string>
15 
16 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
17 
18 #include "./aom_config.h"
19 #include "./aom_dsp_rtcd.h"
20 #include "test/acm_random.h"
21 #include "test/clear_system_state.h"
22 #include "test/register_state_check.h"
23 #include "test/util.h"
24 #include "av1/common/av1_loopfilter.h"
25 #include "av1/common/entropy.h"
26 #include "aom/aom_integer.h"
27 
28 using libaom_test::ACMRandom;
29 
30 namespace {
31 // Horizontally and Vertically need 32x32: 8  Coeffs preceeding filtered section
32 //                                         16 Coefs within filtered section
33 //                                         8  Coeffs following filtered section
34 const int kNumCoeffs = 1024;
35 
36 const int number_of_iterations = 10000;
37 
38 const int kSpeedTestNum = 500000;
39 
40 #if CONFIG_HIGHBITDEPTH
41 typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit,
42                           const uint8_t *limit, const uint8_t *thresh, int bd);
43 typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0,
44                                const uint8_t *limit0, const uint8_t *thresh0,
45                                const uint8_t *blimit1, const uint8_t *limit1,
46                                const uint8_t *thresh1, int bd);
47 #else
48 typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit,
49                           const uint8_t *limit, const uint8_t *thresh);
50 typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
51                                const uint8_t *limit0, const uint8_t *thresh0,
52                                const uint8_t *blimit1, const uint8_t *limit1,
53                                const uint8_t *thresh1);
54 #endif  // CONFIG_HIGHBITDEPTH
55 
56 typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
57 typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
58 
59 class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
60  public:
~Loop8Test6Param()61   virtual ~Loop8Test6Param() {}
SetUp()62   virtual void SetUp() {
63     loopfilter_op_ = GET_PARAM(0);
64     ref_loopfilter_op_ = GET_PARAM(1);
65     bit_depth_ = GET_PARAM(2);
66     mask_ = (1 << bit_depth_) - 1;
67   }
68 
TearDown()69   virtual void TearDown() { libaom_test::ClearSystemState(); }
70 
71  protected:
72   int bit_depth_;
73   int mask_;
74   loop_op_t loopfilter_op_;
75   loop_op_t ref_loopfilter_op_;
76 };
77 
78 class Loop8Test9Param : public ::testing::TestWithParam<dualloop8_param_t> {
79  public:
~Loop8Test9Param()80   virtual ~Loop8Test9Param() {}
SetUp()81   virtual void SetUp() {
82     loopfilter_op_ = GET_PARAM(0);
83     ref_loopfilter_op_ = GET_PARAM(1);
84     bit_depth_ = GET_PARAM(2);
85     mask_ = (1 << bit_depth_) - 1;
86   }
87 
TearDown()88   virtual void TearDown() { libaom_test::ClearSystemState(); }
89 
90  protected:
91   int bit_depth_;
92   int mask_;
93   dual_loop_op_t loopfilter_op_;
94   dual_loop_op_t ref_loopfilter_op_;
95 };
96 
TEST_P(Loop8Test6Param,OperationCheck)97 TEST_P(Loop8Test6Param, OperationCheck) {
98   ACMRandom rnd(ACMRandom::DeterministicSeed());
99   const int count_test_block = number_of_iterations;
100 #if CONFIG_HIGHBITDEPTH
101   int32_t bd = bit_depth_;
102   DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
103   DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
104 #else
105   DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
106   DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
107 #endif  // CONFIG_HIGHBITDEPTH
108   int err_count_total = 0;
109   int first_failure = -1;
110   for (int i = 0; i < count_test_block; ++i) {
111     int err_count = 0;
112     uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
113     DECLARE_ALIGNED(16, const uint8_t,
114                     blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
115                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
116     tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
117     DECLARE_ALIGNED(16, const uint8_t,
118                     limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
119                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
120     tmp = rnd.Rand8();
121     DECLARE_ALIGNED(16, const uint8_t,
122                     thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
123                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
124     int32_t p = kNumCoeffs / 32;
125 
126     uint16_t tmp_s[kNumCoeffs];
127     int j = 0;
128     while (j < kNumCoeffs) {
129       uint8_t val = rnd.Rand8();
130       if (val & 0x80) {  // 50% chance to choose a new value.
131         tmp_s[j] = rnd.Rand16();
132         j++;
133       } else {  // 50% chance to repeat previous value in row X times
134         int k = 0;
135         while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
136           if (j < 1) {
137             tmp_s[j] = rnd.Rand16();
138           } else if (val & 0x20) {  // Increment by an value within the limit
139             tmp_s[j] = (tmp_s[j - 1] + (*limit - 1));
140           } else {  // Decrement by an value within the limit
141             tmp_s[j] = (tmp_s[j - 1] - (*limit - 1));
142           }
143           j++;
144         }
145       }
146     }
147     for (j = 0; j < kNumCoeffs; j++) {
148       if (i % 2) {
149         s[j] = tmp_s[j] & mask_;
150       } else {
151         s[j] = tmp_s[p * (j % p) + j / p] & mask_;
152       }
153       ref_s[j] = s[j];
154     }
155 #if CONFIG_HIGHBITDEPTH
156     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
157     ASM_REGISTER_STATE_CHECK(
158         loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
159 #else
160     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh);
161     ASM_REGISTER_STATE_CHECK(
162         loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
163 #endif  // CONFIG_HIGHBITDEPTH
164 
165     for (j = 0; j < kNumCoeffs; ++j) {
166       err_count += ref_s[j] != s[j];
167     }
168     if (err_count && !err_count_total) {
169       first_failure = i;
170     }
171     err_count_total += err_count;
172   }
173   EXPECT_EQ(0, err_count_total)
174       << "Error: Loop8Test6Param, C output doesn't match SSE2 "
175          "loopfilter output. "
176       << "First failed at test case " << first_failure;
177 }
178 
TEST_P(Loop8Test6Param,ValueCheck)179 TEST_P(Loop8Test6Param, ValueCheck) {
180   ACMRandom rnd(ACMRandom::DeterministicSeed());
181   const int count_test_block = number_of_iterations;
182 #if CONFIG_HIGHBITDEPTH
183   const int32_t bd = bit_depth_;
184   DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
185   DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
186 #else
187   DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
188   DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
189 #endif  // CONFIG_HIGHBITDEPTH
190   int err_count_total = 0;
191   int first_failure = -1;
192 
193   // NOTE: The code in av1_loopfilter.c:update_sharpness computes mblim as a
194   // function of sharpness_lvl and the loopfilter lvl as:
195   // block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
196   // ...
197   // memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
198   //        SIMD_WIDTH);
199   // This means that the largest value for mblim will occur when sharpness_lvl
200   // is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER).
201   // In this case block_inside_limit will be equal to MAX_LOOP_FILTER and
202   // therefore mblim will be equal to (2 * (lvl + 2) + block_inside_limit) =
203   // 2 * (MAX_LOOP_FILTER + 2) + MAX_LOOP_FILTER = 3 * MAX_LOOP_FILTER + 4
204 
205   for (int i = 0; i < count_test_block; ++i) {
206     int err_count = 0;
207     uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
208     DECLARE_ALIGNED(16, const uint8_t,
209                     blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
210                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
211     tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
212     DECLARE_ALIGNED(16, const uint8_t,
213                     limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
214                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
215     tmp = rnd.Rand8();
216     DECLARE_ALIGNED(16, const uint8_t,
217                     thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
218                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
219     int32_t p = kNumCoeffs / 32;
220     for (int j = 0; j < kNumCoeffs; ++j) {
221       s[j] = rnd.Rand16() & mask_;
222       ref_s[j] = s[j];
223     }
224 #if CONFIG_HIGHBITDEPTH
225     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
226     ASM_REGISTER_STATE_CHECK(
227         loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
228 #else
229     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh);
230     ASM_REGISTER_STATE_CHECK(
231         loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
232 #endif  // CONFIG_HIGHBITDEPTH
233     for (int j = 0; j < kNumCoeffs; ++j) {
234       err_count += ref_s[j] != s[j];
235     }
236     if (err_count && !err_count_total) {
237       first_failure = i;
238     }
239     err_count_total += err_count;
240   }
241   EXPECT_EQ(0, err_count_total)
242       << "Error: Loop8Test6Param, C output doesn't match SSE2 "
243          "loopfilter output. "
244       << "First failed at test case " << first_failure;
245 }
246 
TEST_P(Loop8Test6Param,DISABLED_Speed)247 TEST_P(Loop8Test6Param, DISABLED_Speed) {
248   ACMRandom rnd(ACMRandom::DeterministicSeed());
249   const int count_test_block = kSpeedTestNum;
250 #if CONFIG_HIGHBITDEPTH
251   const int32_t bd = bit_depth_;
252   DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
253 #else
254   DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
255 #endif  // CONFIG_HIGHBITDEPTH
256 
257   uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
258   DECLARE_ALIGNED(16, const uint8_t,
259                   blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
260                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
261   tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
262   DECLARE_ALIGNED(16, const uint8_t,
263                   limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
264                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
265   tmp = rnd.Rand8();
266   DECLARE_ALIGNED(16, const uint8_t,
267                   thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
268                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
269 
270   int32_t p = kNumCoeffs / 32;
271   for (int j = 0; j < kNumCoeffs; ++j) {
272     s[j] = rnd.Rand16() & mask_;
273   }
274 
275   for (int i = 0; i < count_test_block; ++i) {
276 #if CONFIG_HIGHBITDEPTH
277     loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd);
278 #else
279     loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh);
280 #endif  // CONFIG_HIGHBITDEPTH
281   }
282 }
283 
TEST_P(Loop8Test9Param,OperationCheck)284 TEST_P(Loop8Test9Param, OperationCheck) {
285   ACMRandom rnd(ACMRandom::DeterministicSeed());
286   const int count_test_block = number_of_iterations;
287 #if CONFIG_HIGHBITDEPTH
288   const int32_t bd = bit_depth_;
289   DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
290   DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
291 #else
292   DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
293   DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
294 #endif  // CONFIG_HIGHBITDEPTH
295   int err_count_total = 0;
296   int first_failure = -1;
297   for (int i = 0; i < count_test_block; ++i) {
298     int err_count = 0;
299     uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
300     DECLARE_ALIGNED(16, const uint8_t,
301                     blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
302                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
303     tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
304     DECLARE_ALIGNED(16, const uint8_t,
305                     limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
306                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
307     tmp = rnd.Rand8();
308     DECLARE_ALIGNED(16, const uint8_t,
309                     thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
310                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
311     tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
312     DECLARE_ALIGNED(16, const uint8_t,
313                     blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
314                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
315     tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
316     DECLARE_ALIGNED(16, const uint8_t,
317                     limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
318                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
319     tmp = rnd.Rand8();
320     DECLARE_ALIGNED(16, const uint8_t,
321                     thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
322                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
323     int32_t p = kNumCoeffs / 32;
324     uint16_t tmp_s[kNumCoeffs];
325     int j = 0;
326     const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1;
327     while (j < kNumCoeffs) {
328       uint8_t val = rnd.Rand8();
329       if (val & 0x80) {  // 50% chance to choose a new value.
330         tmp_s[j] = rnd.Rand16();
331         j++;
332       } else {  // 50% chance to repeat previous value in row X times.
333         int k = 0;
334         while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
335           if (j < 1) {
336             tmp_s[j] = rnd.Rand16();
337           } else if (val & 0x20) {  // Increment by a value within the limit.
338             tmp_s[j] = (tmp_s[j - 1] + (limit - 1));
339           } else {  // Decrement by an value within the limit.
340             tmp_s[j] = (tmp_s[j - 1] - (limit - 1));
341           }
342           j++;
343         }
344       }
345     }
346     for (j = 0; j < kNumCoeffs; j++) {
347       if (i % 2) {
348         s[j] = tmp_s[j] & mask_;
349       } else {
350         s[j] = tmp_s[p * (j % p) + j / p] & mask_;
351       }
352       ref_s[j] = s[j];
353     }
354 #if CONFIG_HIGHBITDEPTH
355     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
356                        limit1, thresh1, bd);
357     ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
358                                             thresh0, blimit1, limit1, thresh1,
359                                             bd));
360 #else
361     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
362                        limit1, thresh1);
363     ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
364                                             thresh0, blimit1, limit1, thresh1));
365 #endif  // CONFIG_HIGHBITDEPTH
366     for (j = 0; j < kNumCoeffs; ++j) {
367       err_count += ref_s[j] != s[j];
368     }
369     if (err_count && !err_count_total) {
370       first_failure = i;
371     }
372     err_count_total += err_count;
373   }
374   EXPECT_EQ(0, err_count_total)
375       << "Error: Loop8Test9Param, C output doesn't match SSE2 "
376          "loopfilter output. "
377       << "First failed at test case " << first_failure;
378 }
379 
TEST_P(Loop8Test9Param,ValueCheck)380 TEST_P(Loop8Test9Param, ValueCheck) {
381   ACMRandom rnd(ACMRandom::DeterministicSeed());
382   const int count_test_block = number_of_iterations;
383 #if CONFIG_HIGHBITDEPTH
384   DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
385   DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
386 #else
387   DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
388   DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
389 #endif  // CONFIG_HIGHBITDEPTH
390   int err_count_total = 0;
391   int first_failure = -1;
392   for (int i = 0; i < count_test_block; ++i) {
393     int err_count = 0;
394     uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
395     DECLARE_ALIGNED(16, const uint8_t,
396                     blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
397                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
398     tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
399     DECLARE_ALIGNED(16, const uint8_t,
400                     limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
401                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
402     tmp = rnd.Rand8();
403     DECLARE_ALIGNED(16, const uint8_t,
404                     thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
405                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
406     tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
407     DECLARE_ALIGNED(16, const uint8_t,
408                     blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
409                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
410     tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
411     DECLARE_ALIGNED(16, const uint8_t,
412                     limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
413                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
414     tmp = rnd.Rand8();
415     DECLARE_ALIGNED(16, const uint8_t,
416                     thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
417                                      tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
418     int32_t p = kNumCoeffs / 32;  // TODO(pdlf) can we have non-square here?
419     for (int j = 0; j < kNumCoeffs; ++j) {
420       s[j] = rnd.Rand16() & mask_;
421       ref_s[j] = s[j];
422     }
423 #if CONFIG_HIGHBITDEPTH
424     const int32_t bd = bit_depth_;
425     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
426                        limit1, thresh1, bd);
427     ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
428                                             thresh0, blimit1, limit1, thresh1,
429                                             bd));
430 #else
431     ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
432                        limit1, thresh1);
433     ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
434                                             thresh0, blimit1, limit1, thresh1));
435 #endif  // CONFIG_HIGHBITDEPTH
436     for (int j = 0; j < kNumCoeffs; ++j) {
437       err_count += ref_s[j] != s[j];
438     }
439     if (err_count && !err_count_total) {
440       first_failure = i;
441     }
442     err_count_total += err_count;
443   }
444   EXPECT_EQ(0, err_count_total)
445       << "Error: Loop8Test9Param, C output doesn't match SSE2"
446          "loopfilter output. "
447       << "First failed at test case " << first_failure;
448 }
449 
TEST_P(Loop8Test9Param,DISABLED_Speed)450 TEST_P(Loop8Test9Param, DISABLED_Speed) {
451   ACMRandom rnd(ACMRandom::DeterministicSeed());
452   const int count_test_block = kSpeedTestNum;
453 #if CONFIG_HIGHBITDEPTH
454   DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
455 #else
456   DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
457 #endif  // CONFIG_HIGHBITDEPTH
458 
459   uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
460   DECLARE_ALIGNED(16, const uint8_t,
461                   blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
462                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
463   tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
464   DECLARE_ALIGNED(16, const uint8_t,
465                   limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
466                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
467   tmp = rnd.Rand8();
468   DECLARE_ALIGNED(16, const uint8_t,
469                   thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
470                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
471   tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
472   DECLARE_ALIGNED(16, const uint8_t,
473                   blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
474                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
475   tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
476   DECLARE_ALIGNED(16, const uint8_t,
477                   limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
478                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
479   tmp = rnd.Rand8();
480   DECLARE_ALIGNED(16, const uint8_t,
481                   thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
482                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
483   int32_t p = kNumCoeffs / 32;  // TODO(pdlf) can we have non-square here?
484   for (int j = 0; j < kNumCoeffs; ++j) {
485     s[j] = rnd.Rand16() & mask_;
486   }
487 
488   for (int i = 0; i < count_test_block; ++i) {
489 #if CONFIG_HIGHBITDEPTH
490     const int32_t bd = bit_depth_;
491     loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1,
492                    thresh1, bd);
493 #else
494     loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1,
495                    thresh1);
496 #endif  // CONFIG_HIGHBITDEPTH
497   }
498 }
499 
500 using std::tr1::make_tuple;
501 
502 #if HAVE_SSE2
503 #if CONFIG_HIGHBITDEPTH
504 
505 const loop8_param_t kHbdLoop8Test6[] = {
506   make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
507              8),
508   make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 8),
509   make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
510              8),
511   make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2,
512              &aom_highbd_lpf_horizontal_edge_8_c, 8),
513   make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2,
514              &aom_highbd_lpf_horizontal_edge_16_c, 8),
515   make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 8),
516   make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
517              8),
518   make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
519              10),
520   make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 10),
521   make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
522              10),
523   make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2,
524              &aom_highbd_lpf_horizontal_edge_8_c, 10),
525   make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2,
526              &aom_highbd_lpf_horizontal_edge_16_c, 10),
527   make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 10),
528   make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
529              10),
530   make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
531              12),
532   make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 12),
533   make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
534              12),
535   make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2,
536              &aom_highbd_lpf_horizontal_edge_8_c, 12),
537   make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2,
538              &aom_highbd_lpf_horizontal_edge_16_c, 12),
539   make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 12),
540   make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
541              12),
542   make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
543              &aom_highbd_lpf_vertical_16_dual_c, 8),
544   make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
545              &aom_highbd_lpf_vertical_16_dual_c, 10),
546   make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
547              &aom_highbd_lpf_vertical_16_dual_c, 12)
548 };
549 
550 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param,
551                         ::testing::ValuesIn(kHbdLoop8Test6));
552 #else
553 const loop8_param_t kLoop8Test6[] = {
554   make_tuple(&aom_lpf_horizontal_4_sse2, &aom_lpf_horizontal_4_c, 8),
555   make_tuple(&aom_lpf_horizontal_8_sse2, &aom_lpf_horizontal_8_c, 8),
556   make_tuple(&aom_lpf_horizontal_edge_8_sse2, &aom_lpf_horizontal_edge_8_c, 8),
557   make_tuple(&aom_lpf_horizontal_edge_16_sse2, &aom_lpf_horizontal_edge_16_c,
558              8),
559   make_tuple(&aom_lpf_vertical_4_sse2, &aom_lpf_vertical_4_c, 8),
560   make_tuple(&aom_lpf_vertical_8_sse2, &aom_lpf_vertical_8_c, 8),
561   make_tuple(&aom_lpf_vertical_16_sse2, &aom_lpf_vertical_16_c, 8),
562 #if !CONFIG_PARALLEL_DEBLOCKING
563   make_tuple(&aom_lpf_vertical_16_dual_sse2, &aom_lpf_vertical_16_dual_c, 8)
564 #endif
565 };
566 
567 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param,
568                         ::testing::ValuesIn(kLoop8Test6));
569 #endif  // CONFIG_HIGHBITDEPTH
570 #endif  // HAVE_SSE2
571 
572 #if HAVE_AVX2
573 #if CONFIG_HIGHBITDEPTH
574 
575 const loop8_param_t kHbdLoop8Test6Avx2[] = {
576   make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2,
577              &aom_highbd_lpf_horizontal_edge_16_c, 8),
578   make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2,
579              &aom_highbd_lpf_horizontal_edge_16_c, 10),
580   make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2,
581              &aom_highbd_lpf_horizontal_edge_16_c, 12),
582   make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
583              &aom_highbd_lpf_vertical_16_dual_c, 8),
584   make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
585              &aom_highbd_lpf_vertical_16_dual_c, 10),
586   make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
587              &aom_highbd_lpf_vertical_16_dual_c, 12)
588 };
589 
590 INSTANTIATE_TEST_CASE_P(AVX2, Loop8Test6Param,
591                         ::testing::ValuesIn(kHbdLoop8Test6Avx2));
592 
593 #endif
594 #endif
595 
596 #if HAVE_AVX2 && (!CONFIG_HIGHBITDEPTH) && (!CONFIG_PARALLEL_DEBLOCKING)
597 INSTANTIATE_TEST_CASE_P(
598     AVX2, Loop8Test6Param,
599     ::testing::Values(make_tuple(&aom_lpf_horizontal_edge_8_avx2,
600                                  &aom_lpf_horizontal_edge_8_c, 8),
601                       make_tuple(&aom_lpf_horizontal_edge_16_avx2,
602                                  &aom_lpf_horizontal_edge_16_c, 8)));
603 #endif
604 
605 #if HAVE_SSE2
606 #if CONFIG_HIGHBITDEPTH
607 const dualloop8_param_t kHbdLoop8Test9[] = {
608   make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
609              &aom_highbd_lpf_horizontal_4_dual_c, 8),
610   make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
611              &aom_highbd_lpf_horizontal_8_dual_c, 8),
612   make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
613              &aom_highbd_lpf_vertical_4_dual_c, 8),
614   make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
615              &aom_highbd_lpf_vertical_8_dual_c, 8),
616   make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
617              &aom_highbd_lpf_horizontal_4_dual_c, 10),
618   make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
619              &aom_highbd_lpf_horizontal_8_dual_c, 10),
620   make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
621              &aom_highbd_lpf_vertical_4_dual_c, 10),
622   make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
623              &aom_highbd_lpf_vertical_8_dual_c, 10),
624   make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
625              &aom_highbd_lpf_horizontal_4_dual_c, 12),
626   make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
627              &aom_highbd_lpf_horizontal_8_dual_c, 12),
628   make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
629              &aom_highbd_lpf_vertical_4_dual_c, 12),
630   make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
631              &aom_highbd_lpf_vertical_8_dual_c, 12)
632 };
633 
634 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param,
635                         ::testing::ValuesIn(kHbdLoop8Test9));
636 #else
637 #if !CONFIG_PARALLEL_DEBLOCKING
638 const dualloop8_param_t kLoop8Test9[] = {
639   make_tuple(&aom_lpf_horizontal_4_dual_sse2, &aom_lpf_horizontal_4_dual_c, 8),
640   make_tuple(&aom_lpf_horizontal_8_dual_sse2, &aom_lpf_horizontal_8_dual_c, 8),
641   make_tuple(&aom_lpf_vertical_4_dual_sse2, &aom_lpf_vertical_4_dual_c, 8),
642   make_tuple(&aom_lpf_vertical_8_dual_sse2, &aom_lpf_vertical_8_dual_c, 8)
643 };
644 
645 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param,
646                         ::testing::ValuesIn(kLoop8Test9));
647 #endif
648 #endif  // CONFIG_HIGHBITDEPTH
649 #endif  // HAVE_SSE2
650 
651 #if HAVE_AVX2
652 #if CONFIG_HIGHBITDEPTH
653 const dualloop8_param_t kHbdLoop8Test9Avx2[] = {
654   make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
655              &aom_highbd_lpf_horizontal_4_dual_c, 8),
656   make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
657              &aom_highbd_lpf_horizontal_4_dual_c, 10),
658   make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
659              &aom_highbd_lpf_horizontal_4_dual_c, 12),
660   make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
661              &aom_highbd_lpf_horizontal_8_dual_c, 8),
662   make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
663              &aom_highbd_lpf_horizontal_8_dual_c, 10),
664   make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
665              &aom_highbd_lpf_horizontal_8_dual_c, 12),
666   make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
667              &aom_highbd_lpf_vertical_4_dual_c, 8),
668   make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
669              &aom_highbd_lpf_vertical_4_dual_c, 10),
670   make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
671              &aom_highbd_lpf_vertical_4_dual_c, 12),
672   make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
673              &aom_highbd_lpf_vertical_8_dual_c, 8),
674   make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
675              &aom_highbd_lpf_vertical_8_dual_c, 10),
676   make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
677              &aom_highbd_lpf_vertical_8_dual_c, 12),
678 };
679 
680 INSTANTIATE_TEST_CASE_P(AVX2, Loop8Test9Param,
681                         ::testing::ValuesIn(kHbdLoop8Test9Avx2));
682 #endif
683 #endif
684 
685 #if HAVE_NEON && (!CONFIG_PARALLEL_DEBLOCKING)
686 #if CONFIG_HIGHBITDEPTH
687 // No neon high bitdepth functions.
688 #else
689 INSTANTIATE_TEST_CASE_P(
690     NEON, Loop8Test6Param,
691     ::testing::Values(
692 #if HAVE_NEON_ASM
693         // Using #if inside the macro is unsupported on MSVS but the tests are
694         // not
695         // currently built for MSVS with ARM and NEON.
696         make_tuple(&aom_lpf_horizontal_edge_8_neon,
697                    &aom_lpf_horizontal_edge_8_c, 8),
698         make_tuple(&aom_lpf_horizontal_edge_16_neon,
699                    &aom_lpf_horizontal_edge_16_c, 8),
700         make_tuple(&aom_lpf_vertical_16_neon, &aom_lpf_vertical_16_c, 8),
701         make_tuple(&aom_lpf_vertical_16_dual_neon, &aom_lpf_vertical_16_dual_c,
702                    8),
703 #endif  // HAVE_NEON_ASM
704         make_tuple(&aom_lpf_horizontal_8_neon, &aom_lpf_horizontal_8_c, 8),
705         make_tuple(&aom_lpf_vertical_8_neon, &aom_lpf_vertical_8_c, 8),
706         make_tuple(&aom_lpf_horizontal_4_neon, &aom_lpf_horizontal_4_c, 8),
707         make_tuple(&aom_lpf_vertical_4_neon, &aom_lpf_vertical_4_c, 8)));
708 INSTANTIATE_TEST_CASE_P(NEON, Loop8Test9Param,
709                         ::testing::Values(
710 #if HAVE_NEON_ASM
711                             make_tuple(&aom_lpf_horizontal_8_dual_neon,
712                                        &aom_lpf_horizontal_8_dual_c, 8),
713                             make_tuple(&aom_lpf_vertical_8_dual_neon,
714                                        &aom_lpf_vertical_8_dual_c, 8),
715 #endif  // HAVE_NEON_ASM
716                             make_tuple(&aom_lpf_horizontal_4_dual_neon,
717                                        &aom_lpf_horizontal_4_dual_c, 8),
718                             make_tuple(&aom_lpf_vertical_4_dual_neon,
719                                        &aom_lpf_vertical_4_dual_c, 8)));
720 #endif  // CONFIG_HIGHBITDEPTH
721 #endif  // HAVE_NEON && (!CONFIG_PARALLEL_DEBLOCKING)
722 
723 #if HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH && (!CONFIG_PARALLEL_DEBLOCKING)
724 INSTANTIATE_TEST_CASE_P(
725     DSPR2, Loop8Test6Param,
726     ::testing::Values(
727         make_tuple(&aom_lpf_horizontal_4_dspr2, &aom_lpf_horizontal_4_c, 8),
728         make_tuple(&aom_lpf_horizontal_8_dspr2, &aom_lpf_horizontal_8_c, 8),
729         make_tuple(&aom_lpf_horizontal_edge_8, &aom_lpf_horizontal_edge_8, 8),
730         make_tuple(&aom_lpf_horizontal_edge_16, &aom_lpf_horizontal_edge_16, 8),
731         make_tuple(&aom_lpf_vertical_4_dspr2, &aom_lpf_vertical_4_c, 8),
732         make_tuple(&aom_lpf_vertical_8_dspr2, &aom_lpf_vertical_8_c, 8),
733         make_tuple(&aom_lpf_vertical_16_dspr2, &aom_lpf_vertical_16_c, 8),
734         make_tuple(&aom_lpf_vertical_16_dual_dspr2, &aom_lpf_vertical_16_dual_c,
735                    8)));
736 
737 INSTANTIATE_TEST_CASE_P(
738     DSPR2, Loop8Test9Param,
739     ::testing::Values(make_tuple(&aom_lpf_horizontal_4_dual_dspr2,
740                                  &aom_lpf_horizontal_4_dual_c, 8),
741                       make_tuple(&aom_lpf_horizontal_8_dual_dspr2,
742                                  &aom_lpf_horizontal_8_dual_c, 8),
743                       make_tuple(&aom_lpf_vertical_4_dual_dspr2,
744                                  &aom_lpf_vertical_4_dual_c, 8),
745                       make_tuple(&aom_lpf_vertical_8_dual_dspr2,
746                                  &aom_lpf_vertical_8_dual_c, 8)));
747 #endif  // HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH && (!CONFIG_PARALLEL_DEBLOCKING)
748 
749 #if HAVE_MSA && (!CONFIG_HIGHBITDEPTH) && (!CONFIG_PARALLEL_DEBLOCKING)
750 INSTANTIATE_TEST_CASE_P(
751     MSA, Loop8Test6Param,
752     ::testing::Values(
753         make_tuple(&aom_lpf_horizontal_4_msa, &aom_lpf_horizontal_4_c, 8),
754         make_tuple(&aom_lpf_horizontal_8_msa, &aom_lpf_horizontal_8_c, 8),
755         make_tuple(&aom_lpf_horizontal_edge_8_msa, &aom_lpf_horizontal_edge_8_c,
756                    8),
757         make_tuple(&aom_lpf_horizontal_edge_16_msa,
758                    &aom_lpf_horizontal_edge_16_c, 8),
759         make_tuple(&aom_lpf_vertical_4_msa, &aom_lpf_vertical_4_c, 8),
760         make_tuple(&aom_lpf_vertical_8_msa, &aom_lpf_vertical_8_c, 8),
761         make_tuple(&aom_lpf_vertical_16_msa, &aom_lpf_vertical_16_c, 8)));
762 
763 INSTANTIATE_TEST_CASE_P(
764     MSA, Loop8Test9Param,
765     ::testing::Values(make_tuple(&aom_lpf_horizontal_4_dual_msa,
766                                  &aom_lpf_horizontal_4_dual_c, 8),
767                       make_tuple(&aom_lpf_horizontal_8_dual_msa,
768                                  &aom_lpf_horizontal_8_dual_c, 8),
769                       make_tuple(&aom_lpf_vertical_4_dual_msa,
770                                  &aom_lpf_vertical_4_dual_c, 8),
771                       make_tuple(&aom_lpf_vertical_8_dual_msa,
772                                  &aom_lpf_vertical_8_dual_c, 8)));
773 #endif  // HAVE_MSA && (!CONFIG_HIGHBITDEPTH) && (!CONFIG_PARALLEL_DEBLOCKING)
774 
775 }  // namespace
776