1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 #include <math.h>
12 #include <stdlib.h>
13 #include <string.h>
14 
15 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
16 #include "test/acm_random.h"
17 #include "test/clear_system_state.h"
18 #include "test/register_state_check.h"
19 #include "test/util.h"
20 
21 #include "config/aom_config.h"
22 #include "config/aom_dsp_rtcd.h"
23 
24 #include "aom/aom_integer.h"
25 
26 using libaom_test::ACMRandom;
27 
28 namespace {
29 const int number_of_iterations = 200;
30 
31 typedef unsigned int (*MaskedSADFunc)(const uint8_t *src, int src_stride,
32                                       const uint8_t *ref, int ref_stride,
33                                       const uint8_t *second_pred,
34                                       const uint8_t *msk, int msk_stride,
35                                       int invert_mask);
36 typedef ::testing::tuple<MaskedSADFunc, MaskedSADFunc> MaskedSADParam;
37 
38 class MaskedSADTest : public ::testing::TestWithParam<MaskedSADParam> {
39  public:
~MaskedSADTest()40   virtual ~MaskedSADTest() {}
SetUp()41   virtual void SetUp() {
42     maskedSAD_op_ = GET_PARAM(0);
43     ref_maskedSAD_op_ = GET_PARAM(1);
44   }
45 
TearDown()46   virtual void TearDown() { libaom_test::ClearSystemState(); }
47   void runMaskedSADTest(int run_times);
48 
49  protected:
50   MaskedSADFunc maskedSAD_op_;
51   MaskedSADFunc ref_maskedSAD_op_;
52 };
runMaskedSADTest(int run_times)53 void MaskedSADTest::runMaskedSADTest(int run_times) {
54   unsigned int ref_ret = 0, ret = 1;
55   ACMRandom rnd(ACMRandom::DeterministicSeed());
56   DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
57   DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
58   DECLARE_ALIGNED(16, uint8_t, second_pred_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
59   DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
60   int err_count = 0;
61   int first_failure = -1;
62   int src_stride = MAX_SB_SIZE;
63   int ref_stride = MAX_SB_SIZE;
64   int msk_stride = MAX_SB_SIZE;
65   const int iters = run_times == 1 ? number_of_iterations : 1;
66   for (int i = 0; i < iters; ++i) {
67     for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
68       src_ptr[j] = rnd.Rand8();
69       ref_ptr[j] = rnd.Rand8();
70       second_pred_ptr[j] = rnd.Rand8();
71       msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64;
72       assert(msk_ptr[j] <= 64);
73     }
74 
75     for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
76       aom_usec_timer timer;
77       aom_usec_timer_start(&timer);
78       for (int repeat = 0; repeat < run_times; ++repeat) {
79         ref_ret = ref_maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride,
80                                     second_pred_ptr, msk_ptr, msk_stride,
81                                     invert_mask);
82       }
83       aom_usec_timer_mark(&timer);
84       const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
85       aom_usec_timer_start(&timer);
86       if (run_times == 1) {
87         ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src_ptr, src_stride,
88                                                      ref_ptr, ref_stride,
89                                                      second_pred_ptr, msk_ptr,
90                                                      msk_stride, invert_mask));
91       } else {
92         for (int repeat = 0; repeat < run_times; ++repeat) {
93           ret =
94               maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride,
95                             second_pred_ptr, msk_ptr, msk_stride, invert_mask);
96         }
97       }
98       aom_usec_timer_mark(&timer);
99       const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
100       if (run_times > 10) {
101         printf("%7.2f/%7.2fns", time1, time2);
102         printf("(%3.2f)\n", time1 / time2);
103       }
104       if (ret != ref_ret) {
105         err_count++;
106         if (first_failure == -1) first_failure = i;
107       }
108     }
109   }
110   EXPECT_EQ(0, err_count) << "Error: Masked SAD Test,  output doesn't match. "
111                           << "First failed at test case " << first_failure;
112 }
113 
TEST_P(MaskedSADTest,OperationCheck)114 TEST_P(MaskedSADTest, OperationCheck) { runMaskedSADTest(1); }
115 
TEST_P(MaskedSADTest,DISABLED_Speed)116 TEST_P(MaskedSADTest, DISABLED_Speed) { runMaskedSADTest(2000000); }
117 
118 typedef unsigned int (*HighbdMaskedSADFunc)(const uint8_t *src, int src_stride,
119                                             const uint8_t *ref, int ref_stride,
120                                             const uint8_t *second_pred,
121                                             const uint8_t *msk, int msk_stride,
122                                             int invert_mask);
123 typedef ::testing::tuple<HighbdMaskedSADFunc, HighbdMaskedSADFunc>
124     HighbdMaskedSADParam;
125 
126 class HighbdMaskedSADTest
127     : public ::testing::TestWithParam<HighbdMaskedSADParam> {
128  public:
~HighbdMaskedSADTest()129   virtual ~HighbdMaskedSADTest() {}
SetUp()130   virtual void SetUp() {
131     maskedSAD_op_ = GET_PARAM(0);
132     ref_maskedSAD_op_ = GET_PARAM(1);
133   }
134 
TearDown()135   virtual void TearDown() { libaom_test::ClearSystemState(); }
136   void runHighbdMaskedSADTest(int run_times);
137 
138  protected:
139   HighbdMaskedSADFunc maskedSAD_op_;
140   HighbdMaskedSADFunc ref_maskedSAD_op_;
141 };
runHighbdMaskedSADTest(int run_times)142 void HighbdMaskedSADTest::runHighbdMaskedSADTest(int run_times) {
143   unsigned int ref_ret = 0, ret = 1;
144   ACMRandom rnd(ACMRandom::DeterministicSeed());
145   DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
146   DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
147   DECLARE_ALIGNED(16, uint16_t, second_pred_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
148   DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
149   uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
150   uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
151   uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr);
152   int err_count = 0;
153   int first_failure = -1;
154   int src_stride = MAX_SB_SIZE;
155   int ref_stride = MAX_SB_SIZE;
156   int msk_stride = MAX_SB_SIZE;
157   const int iters = run_times == 1 ? number_of_iterations : 1;
158   for (int i = 0; i < iters; ++i) {
159     for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
160       src_ptr[j] = rnd.Rand16() & 0xfff;
161       ref_ptr[j] = rnd.Rand16() & 0xfff;
162       second_pred_ptr[j] = rnd.Rand16() & 0xfff;
163       msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64;
164     }
165 
166     for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
167       aom_usec_timer timer;
168       aom_usec_timer_start(&timer);
169       for (int repeat = 0; repeat < run_times; ++repeat) {
170         ref_ret = ref_maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride,
171                                     second_pred8_ptr, msk_ptr, msk_stride,
172                                     invert_mask);
173       }
174       aom_usec_timer_mark(&timer);
175       const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
176       aom_usec_timer_start(&timer);
177       if (run_times == 1) {
178         ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src8_ptr, src_stride,
179                                                      ref8_ptr, ref_stride,
180                                                      second_pred8_ptr, msk_ptr,
181                                                      msk_stride, invert_mask));
182       } else {
183         for (int repeat = 0; repeat < run_times; ++repeat) {
184           ret =
185               maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride,
186                             second_pred8_ptr, msk_ptr, msk_stride, invert_mask);
187         }
188       }
189       aom_usec_timer_mark(&timer);
190       const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
191       if (run_times > 10) {
192         printf("%7.2f/%7.2fns", time1, time2);
193         printf("(%3.2f)\n", time1 / time2);
194       }
195       if (ret != ref_ret) {
196         err_count++;
197         if (first_failure == -1) first_failure = i;
198       }
199     }
200   }
201   EXPECT_EQ(0, err_count)
202       << "Error: High BD Masked SAD Test, output doesn't match. "
203       << "First failed at test case " << first_failure;
204 }
205 
TEST_P(HighbdMaskedSADTest,OperationCheck)206 TEST_P(HighbdMaskedSADTest, OperationCheck) { runHighbdMaskedSADTest(1); }
207 
TEST_P(HighbdMaskedSADTest,DISABLED_Speed)208 TEST_P(HighbdMaskedSADTest, DISABLED_Speed) { runHighbdMaskedSADTest(1000000); }
209 
210 using ::testing::make_tuple;
211 
212 #if HAVE_SSSE3
213 const MaskedSADParam msad_test[] = {
214   make_tuple(&aom_masked_sad4x4_ssse3, &aom_masked_sad4x4_c),
215   make_tuple(&aom_masked_sad4x8_ssse3, &aom_masked_sad4x8_c),
216   make_tuple(&aom_masked_sad8x4_ssse3, &aom_masked_sad8x4_c),
217   make_tuple(&aom_masked_sad8x8_ssse3, &aom_masked_sad8x8_c),
218   make_tuple(&aom_masked_sad8x16_ssse3, &aom_masked_sad8x16_c),
219   make_tuple(&aom_masked_sad16x8_ssse3, &aom_masked_sad16x8_c),
220   make_tuple(&aom_masked_sad16x16_ssse3, &aom_masked_sad16x16_c),
221   make_tuple(&aom_masked_sad16x32_ssse3, &aom_masked_sad16x32_c),
222   make_tuple(&aom_masked_sad32x16_ssse3, &aom_masked_sad32x16_c),
223   make_tuple(&aom_masked_sad32x32_ssse3, &aom_masked_sad32x32_c),
224   make_tuple(&aom_masked_sad32x64_ssse3, &aom_masked_sad32x64_c),
225   make_tuple(&aom_masked_sad64x32_ssse3, &aom_masked_sad64x32_c),
226   make_tuple(&aom_masked_sad64x64_ssse3, &aom_masked_sad64x64_c),
227   make_tuple(&aom_masked_sad64x128_ssse3, &aom_masked_sad64x128_c),
228   make_tuple(&aom_masked_sad128x64_ssse3, &aom_masked_sad128x64_c),
229   make_tuple(&aom_masked_sad128x128_ssse3, &aom_masked_sad128x128_c),
230   make_tuple(&aom_masked_sad4x16_ssse3, &aom_masked_sad4x16_c),
231   make_tuple(&aom_masked_sad16x4_ssse3, &aom_masked_sad16x4_c),
232   make_tuple(&aom_masked_sad8x32_ssse3, &aom_masked_sad8x32_c),
233   make_tuple(&aom_masked_sad32x8_ssse3, &aom_masked_sad32x8_c),
234   make_tuple(&aom_masked_sad16x64_ssse3, &aom_masked_sad16x64_c),
235   make_tuple(&aom_masked_sad64x16_ssse3, &aom_masked_sad64x16_c),
236 };
237 
238 INSTANTIATE_TEST_CASE_P(SSSE3, MaskedSADTest, ::testing::ValuesIn(msad_test));
239 
240 const HighbdMaskedSADParam hbd_msad_test[] = {
241   make_tuple(&aom_highbd_masked_sad4x4_ssse3, &aom_highbd_masked_sad4x4_c),
242   make_tuple(&aom_highbd_masked_sad4x8_ssse3, &aom_highbd_masked_sad4x8_c),
243   make_tuple(&aom_highbd_masked_sad8x4_ssse3, &aom_highbd_masked_sad8x4_c),
244   make_tuple(&aom_highbd_masked_sad8x8_ssse3, &aom_highbd_masked_sad8x8_c),
245   make_tuple(&aom_highbd_masked_sad8x16_ssse3, &aom_highbd_masked_sad8x16_c),
246   make_tuple(&aom_highbd_masked_sad16x8_ssse3, &aom_highbd_masked_sad16x8_c),
247   make_tuple(&aom_highbd_masked_sad16x16_ssse3, &aom_highbd_masked_sad16x16_c),
248   make_tuple(&aom_highbd_masked_sad16x32_ssse3, &aom_highbd_masked_sad16x32_c),
249   make_tuple(&aom_highbd_masked_sad32x16_ssse3, &aom_highbd_masked_sad32x16_c),
250   make_tuple(&aom_highbd_masked_sad32x32_ssse3, &aom_highbd_masked_sad32x32_c),
251   make_tuple(&aom_highbd_masked_sad32x64_ssse3, &aom_highbd_masked_sad32x64_c),
252   make_tuple(&aom_highbd_masked_sad64x32_ssse3, &aom_highbd_masked_sad64x32_c),
253   make_tuple(&aom_highbd_masked_sad64x64_ssse3, &aom_highbd_masked_sad64x64_c),
254   make_tuple(&aom_highbd_masked_sad64x128_ssse3,
255              &aom_highbd_masked_sad64x128_c),
256   make_tuple(&aom_highbd_masked_sad128x64_ssse3,
257              &aom_highbd_masked_sad128x64_c),
258   make_tuple(&aom_highbd_masked_sad128x128_ssse3,
259              &aom_highbd_masked_sad128x128_c),
260   make_tuple(&aom_highbd_masked_sad4x16_ssse3, &aom_highbd_masked_sad4x16_c),
261   make_tuple(&aom_highbd_masked_sad16x4_ssse3, &aom_highbd_masked_sad16x4_c),
262   make_tuple(&aom_highbd_masked_sad8x32_ssse3, &aom_highbd_masked_sad8x32_c),
263   make_tuple(&aom_highbd_masked_sad32x8_ssse3, &aom_highbd_masked_sad32x8_c),
264   make_tuple(&aom_highbd_masked_sad16x64_ssse3, &aom_highbd_masked_sad16x64_c),
265   make_tuple(&aom_highbd_masked_sad64x16_ssse3, &aom_highbd_masked_sad64x16_c),
266 };
267 
268 INSTANTIATE_TEST_CASE_P(SSSE3, HighbdMaskedSADTest,
269                         ::testing::ValuesIn(hbd_msad_test));
270 #endif  // HAVE_SSSE3
271 
272 #if HAVE_AVX2
273 const MaskedSADParam msad_avx2_test[] = {
274   make_tuple(&aom_masked_sad4x4_avx2, &aom_masked_sad4x4_ssse3),
275   make_tuple(&aom_masked_sad4x8_avx2, &aom_masked_sad4x8_ssse3),
276   make_tuple(&aom_masked_sad8x4_avx2, &aom_masked_sad8x4_ssse3),
277   make_tuple(&aom_masked_sad8x8_avx2, &aom_masked_sad8x8_ssse3),
278   make_tuple(&aom_masked_sad8x16_avx2, &aom_masked_sad8x16_ssse3),
279   make_tuple(&aom_masked_sad16x8_avx2, &aom_masked_sad16x8_ssse3),
280   make_tuple(&aom_masked_sad16x16_avx2, &aom_masked_sad16x16_ssse3),
281   make_tuple(&aom_masked_sad16x32_avx2, &aom_masked_sad16x32_ssse3),
282   make_tuple(&aom_masked_sad32x16_avx2, &aom_masked_sad32x16_ssse3),
283   make_tuple(&aom_masked_sad32x32_avx2, &aom_masked_sad32x32_ssse3),
284   make_tuple(&aom_masked_sad32x64_avx2, &aom_masked_sad32x64_ssse3),
285   make_tuple(&aom_masked_sad64x32_avx2, &aom_masked_sad64x32_ssse3),
286   make_tuple(&aom_masked_sad64x64_avx2, &aom_masked_sad64x64_ssse3),
287   make_tuple(&aom_masked_sad64x128_avx2, &aom_masked_sad64x128_ssse3),
288   make_tuple(&aom_masked_sad128x64_avx2, &aom_masked_sad128x64_ssse3),
289   make_tuple(&aom_masked_sad128x128_avx2, &aom_masked_sad128x128_ssse3),
290   make_tuple(&aom_masked_sad4x16_avx2, &aom_masked_sad4x16_ssse3),
291   make_tuple(&aom_masked_sad16x4_avx2, &aom_masked_sad16x4_ssse3),
292   make_tuple(&aom_masked_sad8x32_avx2, &aom_masked_sad8x32_ssse3),
293   make_tuple(&aom_masked_sad32x8_avx2, &aom_masked_sad32x8_ssse3),
294   make_tuple(&aom_masked_sad16x64_avx2, &aom_masked_sad16x64_ssse3),
295   make_tuple(&aom_masked_sad64x16_avx2, &aom_masked_sad64x16_ssse3)
296 };
297 
298 INSTANTIATE_TEST_CASE_P(AVX2, MaskedSADTest,
299                         ::testing::ValuesIn(msad_avx2_test));
300 
301 const HighbdMaskedSADParam hbd_msad_avx2_test[] = {
302   make_tuple(&aom_highbd_masked_sad4x4_avx2, &aom_highbd_masked_sad4x4_ssse3),
303   make_tuple(&aom_highbd_masked_sad4x8_avx2, &aom_highbd_masked_sad4x8_ssse3),
304   make_tuple(&aom_highbd_masked_sad8x4_avx2, &aom_highbd_masked_sad8x4_ssse3),
305   make_tuple(&aom_highbd_masked_sad8x8_avx2, &aom_highbd_masked_sad8x8_ssse3),
306   make_tuple(&aom_highbd_masked_sad8x16_avx2, &aom_highbd_masked_sad8x16_ssse3),
307   make_tuple(&aom_highbd_masked_sad16x8_avx2, &aom_highbd_masked_sad16x8_ssse3),
308   make_tuple(&aom_highbd_masked_sad16x16_avx2,
309              &aom_highbd_masked_sad16x16_ssse3),
310   make_tuple(&aom_highbd_masked_sad16x32_avx2,
311              &aom_highbd_masked_sad16x32_ssse3),
312   make_tuple(&aom_highbd_masked_sad32x16_avx2,
313              &aom_highbd_masked_sad32x16_ssse3),
314   make_tuple(&aom_highbd_masked_sad32x32_avx2,
315              &aom_highbd_masked_sad32x32_ssse3),
316   make_tuple(&aom_highbd_masked_sad32x64_avx2,
317              &aom_highbd_masked_sad32x64_ssse3),
318   make_tuple(&aom_highbd_masked_sad64x32_avx2,
319              &aom_highbd_masked_sad64x32_ssse3),
320   make_tuple(&aom_highbd_masked_sad64x64_avx2,
321              &aom_highbd_masked_sad64x64_ssse3),
322   make_tuple(&aom_highbd_masked_sad64x128_avx2,
323              &aom_highbd_masked_sad64x128_ssse3),
324   make_tuple(&aom_highbd_masked_sad128x64_avx2,
325              &aom_highbd_masked_sad128x64_ssse3),
326   make_tuple(&aom_highbd_masked_sad128x128_avx2,
327              &aom_highbd_masked_sad128x128_ssse3),
328   make_tuple(&aom_highbd_masked_sad4x16_avx2, &aom_highbd_masked_sad4x16_ssse3),
329   make_tuple(&aom_highbd_masked_sad16x4_avx2, &aom_highbd_masked_sad16x4_ssse3),
330   make_tuple(&aom_highbd_masked_sad8x32_avx2, &aom_highbd_masked_sad8x32_ssse3),
331   make_tuple(&aom_highbd_masked_sad32x8_avx2, &aom_highbd_masked_sad32x8_ssse3),
332   make_tuple(&aom_highbd_masked_sad16x64_avx2,
333              &aom_highbd_masked_sad16x64_ssse3),
334   make_tuple(&aom_highbd_masked_sad64x16_avx2,
335              &aom_highbd_masked_sad64x16_ssse3)
336 };
337 
338 INSTANTIATE_TEST_CASE_P(AVX2, HighbdMaskedSADTest,
339                         ::testing::ValuesIn(hbd_msad_avx2_test));
340 #endif  // HAVE_AVX2
341 
342 }  // namespace
343