1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
13 #include "test/acm_random.h"
14 
15 #include "test/function_equivalence_test.h"
16 #include "test/register_state_check.h"
17 
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 
21 #include "aom/aom_integer.h"
22 
23 #define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
24 
25 using libaom_test::ACMRandom;
26 using libaom_test::FunctionEquivalenceTest;
27 
28 namespace {
29 
30 static const int kIterations = 1000;
31 static const int kMaskMax = 64;
32 
33 typedef unsigned int (*ObmcVarF)(const uint8_t *pre, int pre_stride,
34                                  const int32_t *wsrc, const int32_t *mask,
35                                  unsigned int *sse);
36 typedef libaom_test::FuncParam<ObmcVarF> TestFuncs;
37 
38 ////////////////////////////////////////////////////////////////////////////////
39 // 8 bit
40 ////////////////////////////////////////////////////////////////////////////////
41 
42 class ObmcVarianceTest : public FunctionEquivalenceTest<ObmcVarF> {};
43 
TEST_P(ObmcVarianceTest,RandomValues)44 TEST_P(ObmcVarianceTest, RandomValues) {
45   DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
46   DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
47   DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
48 
49   for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
50     const int pre_stride = this->rng_(MAX_SB_SIZE + 1);
51 
52     for (int i = 0; i < MAX_SB_SQUARE; ++i) {
53       pre[i] = this->rng_.Rand8();
54       wsrc[i] = this->rng_.Rand8() * this->rng_(kMaskMax * kMaskMax + 1);
55       mask[i] = this->rng_(kMaskMax * kMaskMax + 1);
56     }
57 
58     unsigned int ref_sse, tst_sse;
59     const unsigned int ref_res =
60         params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse);
61     unsigned int tst_res;
62     ASM_REGISTER_STATE_CHECK(
63         tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse));
64 
65     ASSERT_EQ(ref_res, tst_res);
66     ASSERT_EQ(ref_sse, tst_sse);
67   }
68 }
69 
TEST_P(ObmcVarianceTest,ExtremeValues)70 TEST_P(ObmcVarianceTest, ExtremeValues) {
71   DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
72   DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
73   DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
74 
75   for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
76     const int pre_stride = iter;
77 
78     for (int i = 0; i < MAX_SB_SQUARE; ++i) {
79       pre[i] = UINT8_MAX;
80       wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax;
81       mask[i] = kMaskMax * kMaskMax;
82     }
83 
84     unsigned int ref_sse, tst_sse;
85     const unsigned int ref_res =
86         params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse);
87     unsigned int tst_res;
88     ASM_REGISTER_STATE_CHECK(
89         tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse));
90 
91     ASSERT_EQ(ref_res, tst_res);
92     ASSERT_EQ(ref_sse, tst_sse);
93   }
94 }
95 
TEST_P(ObmcVarianceTest,DISABLED_Speed)96 TEST_P(ObmcVarianceTest, DISABLED_Speed) {
97   DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
98   DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
99   DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
100 
101   const int pre_stride = this->rng_(MAX_SB_SIZE + 1);
102 
103   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
104     pre[i] = this->rng_.Rand8();
105     wsrc[i] = this->rng_.Rand8() * this->rng_(kMaskMax * kMaskMax + 1);
106     mask[i] = this->rng_(kMaskMax * kMaskMax + 1);
107   }
108 
109   const int num_loops = 1000000;
110   unsigned int ref_sse, tst_sse;
111   aom_usec_timer ref_timer, test_timer;
112 
113   aom_usec_timer_start(&ref_timer);
114   for (int i = 0; i < num_loops; ++i) {
115     params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse);
116   }
117   aom_usec_timer_mark(&ref_timer);
118   const int elapsed_time_c =
119       static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
120 
121   aom_usec_timer_start(&test_timer);
122   for (int i = 0; i < num_loops; ++i) {
123     params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse);
124   }
125   aom_usec_timer_mark(&test_timer);
126   const int elapsed_time_simd =
127       static_cast<int>(aom_usec_timer_elapsed(&test_timer));
128 
129   printf("c_time=%d \t simd_time=%d \t gain=%d \n", elapsed_time_c,
130          elapsed_time_simd, (elapsed_time_c / elapsed_time_simd));
131 }
132 
133 #if HAVE_SSE4_1
134 const ObmcVarianceTest::ParamType sse4_functions[] = {
135   TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_sse4_1),
136   TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_sse4_1),
137   TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_sse4_1),
138   TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_sse4_1),
139   TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_sse4_1),
140   TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_sse4_1),
141   TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_sse4_1),
142   TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_sse4_1),
143   TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_sse4_1),
144   TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_sse4_1),
145   TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_sse4_1),
146   TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_sse4_1),
147   TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_sse4_1),
148   TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_sse4_1),
149   TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_sse4_1),
150   TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_sse4_1)
151 };
152 
153 INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcVarianceTest,
154                         ::testing::ValuesIn(sse4_functions));
155 #endif  // HAVE_SSE4_1
156 
157 #if HAVE_AVX2
158 const ObmcVarianceTest::ParamType avx2_functions[] = {
159   TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_avx2),
160   TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_avx2),
161   TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_avx2),
162   TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_avx2),
163   TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_avx2),
164   TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_avx2),
165   TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_avx2),
166   TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_avx2),
167   TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_avx2),
168   TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_avx2),
169   TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_avx2),
170   TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_avx2),
171   TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_avx2),
172   TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_avx2),
173   TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_sse4_1),
174   TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_sse4_1)
175 };
176 
177 INSTANTIATE_TEST_CASE_P(AVX2, ObmcVarianceTest,
178                         ::testing::ValuesIn(avx2_functions));
179 #endif  // HAVE_AVX2
180 
181 ////////////////////////////////////////////////////////////////////////////////
182 // High bit-depth
183 ////////////////////////////////////////////////////////////////////////////////
184 
185 class ObmcVarianceHBDTest : public FunctionEquivalenceTest<ObmcVarF> {};
186 
TEST_P(ObmcVarianceHBDTest,RandomValues)187 TEST_P(ObmcVarianceHBDTest, RandomValues) {
188   DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
189   DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
190   DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
191 
192   for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
193     const int pre_stride = this->rng_(MAX_SB_SIZE + 1);
194 
195     for (int i = 0; i < MAX_SB_SQUARE; ++i) {
196       pre[i] = this->rng_(1 << params_.bit_depth);
197       wsrc[i] = this->rng_(1 << params_.bit_depth) *
198                 this->rng_(kMaskMax * kMaskMax + 1);
199       mask[i] = this->rng_(kMaskMax * kMaskMax + 1);
200     }
201 
202     unsigned int ref_sse, tst_sse;
203     const unsigned int ref_res = params_.ref_func(
204         CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse);
205     unsigned int tst_res;
206     ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre),
207                                                         pre_stride, wsrc, mask,
208                                                         &tst_sse));
209 
210     ASSERT_EQ(ref_res, tst_res);
211     ASSERT_EQ(ref_sse, tst_sse);
212   }
213 }
214 
TEST_P(ObmcVarianceHBDTest,ExtremeValues)215 TEST_P(ObmcVarianceHBDTest, ExtremeValues) {
216   DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
217   DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
218   DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
219 
220   for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
221     const int pre_stride = iter;
222 
223     for (int i = 0; i < MAX_SB_SQUARE; ++i) {
224       pre[i] = (1 << params_.bit_depth) - 1;
225       wsrc[i] = ((1 << params_.bit_depth) - 1) * kMaskMax * kMaskMax;
226       mask[i] = kMaskMax * kMaskMax;
227     }
228 
229     unsigned int ref_sse, tst_sse;
230     const unsigned int ref_res = params_.ref_func(
231         CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse);
232     unsigned int tst_res;
233     ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre),
234                                                         pre_stride, wsrc, mask,
235                                                         &tst_sse));
236 
237     ASSERT_EQ(ref_res, tst_res);
238     ASSERT_EQ(ref_sse, tst_sse);
239   }
240 }
241 
242 #if HAVE_SSE4_1
243 ObmcVarianceHBDTest::ParamType sse4_functions_hbd[] = {
244   TestFuncs(aom_highbd_obmc_variance128x128_c,
245             aom_highbd_obmc_variance128x128_sse4_1, 8),
246   TestFuncs(aom_highbd_obmc_variance128x64_c,
247             aom_highbd_obmc_variance128x64_sse4_1, 8),
248   TestFuncs(aom_highbd_obmc_variance64x128_c,
249             aom_highbd_obmc_variance64x128_sse4_1, 8),
250   TestFuncs(aom_highbd_obmc_variance64x64_c,
251             aom_highbd_obmc_variance64x64_sse4_1, 8),
252   TestFuncs(aom_highbd_obmc_variance64x32_c,
253             aom_highbd_obmc_variance64x32_sse4_1, 8),
254   TestFuncs(aom_highbd_obmc_variance32x64_c,
255             aom_highbd_obmc_variance32x64_sse4_1, 8),
256   TestFuncs(aom_highbd_obmc_variance32x32_c,
257             aom_highbd_obmc_variance32x32_sse4_1, 8),
258   TestFuncs(aom_highbd_obmc_variance32x16_c,
259             aom_highbd_obmc_variance32x16_sse4_1, 8),
260   TestFuncs(aom_highbd_obmc_variance16x32_c,
261             aom_highbd_obmc_variance16x32_sse4_1, 8),
262   TestFuncs(aom_highbd_obmc_variance16x16_c,
263             aom_highbd_obmc_variance16x16_sse4_1, 8),
264   TestFuncs(aom_highbd_obmc_variance16x8_c, aom_highbd_obmc_variance16x8_sse4_1,
265             8),
266   TestFuncs(aom_highbd_obmc_variance8x16_c, aom_highbd_obmc_variance8x16_sse4_1,
267             8),
268   TestFuncs(aom_highbd_obmc_variance8x8_c, aom_highbd_obmc_variance8x8_sse4_1,
269             8),
270   TestFuncs(aom_highbd_obmc_variance8x4_c, aom_highbd_obmc_variance8x4_sse4_1,
271             8),
272   TestFuncs(aom_highbd_obmc_variance4x8_c, aom_highbd_obmc_variance4x8_sse4_1,
273             8),
274   TestFuncs(aom_highbd_obmc_variance4x4_c, aom_highbd_obmc_variance4x4_sse4_1,
275             8),
276   TestFuncs(aom_highbd_10_obmc_variance128x128_c,
277             aom_highbd_10_obmc_variance128x128_sse4_1, 10),
278   TestFuncs(aom_highbd_10_obmc_variance128x64_c,
279             aom_highbd_10_obmc_variance128x64_sse4_1, 10),
280   TestFuncs(aom_highbd_10_obmc_variance64x128_c,
281             aom_highbd_10_obmc_variance64x128_sse4_1, 10),
282   TestFuncs(aom_highbd_10_obmc_variance64x64_c,
283             aom_highbd_10_obmc_variance64x64_sse4_1, 10),
284   TestFuncs(aom_highbd_10_obmc_variance64x32_c,
285             aom_highbd_10_obmc_variance64x32_sse4_1, 10),
286   TestFuncs(aom_highbd_10_obmc_variance32x64_c,
287             aom_highbd_10_obmc_variance32x64_sse4_1, 10),
288   TestFuncs(aom_highbd_10_obmc_variance32x32_c,
289             aom_highbd_10_obmc_variance32x32_sse4_1, 10),
290   TestFuncs(aom_highbd_10_obmc_variance32x16_c,
291             aom_highbd_10_obmc_variance32x16_sse4_1, 10),
292   TestFuncs(aom_highbd_10_obmc_variance16x32_c,
293             aom_highbd_10_obmc_variance16x32_sse4_1, 10),
294   TestFuncs(aom_highbd_10_obmc_variance16x16_c,
295             aom_highbd_10_obmc_variance16x16_sse4_1, 10),
296   TestFuncs(aom_highbd_10_obmc_variance16x8_c,
297             aom_highbd_10_obmc_variance16x8_sse4_1, 10),
298   TestFuncs(aom_highbd_10_obmc_variance8x16_c,
299             aom_highbd_10_obmc_variance8x16_sse4_1, 10),
300   TestFuncs(aom_highbd_10_obmc_variance8x8_c,
301             aom_highbd_10_obmc_variance8x8_sse4_1, 10),
302   TestFuncs(aom_highbd_10_obmc_variance8x4_c,
303             aom_highbd_10_obmc_variance8x4_sse4_1, 10),
304   TestFuncs(aom_highbd_10_obmc_variance4x8_c,
305             aom_highbd_10_obmc_variance4x8_sse4_1, 10),
306   TestFuncs(aom_highbd_10_obmc_variance4x4_c,
307             aom_highbd_10_obmc_variance4x4_sse4_1, 10),
308   TestFuncs(aom_highbd_12_obmc_variance128x128_c,
309             aom_highbd_12_obmc_variance128x128_sse4_1, 12),
310   TestFuncs(aom_highbd_12_obmc_variance128x64_c,
311             aom_highbd_12_obmc_variance128x64_sse4_1, 12),
312   TestFuncs(aom_highbd_12_obmc_variance64x128_c,
313             aom_highbd_12_obmc_variance64x128_sse4_1, 12),
314   TestFuncs(aom_highbd_12_obmc_variance64x64_c,
315             aom_highbd_12_obmc_variance64x64_sse4_1, 12),
316   TestFuncs(aom_highbd_12_obmc_variance64x32_c,
317             aom_highbd_12_obmc_variance64x32_sse4_1, 12),
318   TestFuncs(aom_highbd_12_obmc_variance32x64_c,
319             aom_highbd_12_obmc_variance32x64_sse4_1, 12),
320   TestFuncs(aom_highbd_12_obmc_variance32x32_c,
321             aom_highbd_12_obmc_variance32x32_sse4_1, 12),
322   TestFuncs(aom_highbd_12_obmc_variance32x16_c,
323             aom_highbd_12_obmc_variance32x16_sse4_1, 12),
324   TestFuncs(aom_highbd_12_obmc_variance16x32_c,
325             aom_highbd_12_obmc_variance16x32_sse4_1, 12),
326   TestFuncs(aom_highbd_12_obmc_variance16x16_c,
327             aom_highbd_12_obmc_variance16x16_sse4_1, 12),
328   TestFuncs(aom_highbd_12_obmc_variance16x8_c,
329             aom_highbd_12_obmc_variance16x8_sse4_1, 12),
330   TestFuncs(aom_highbd_12_obmc_variance8x16_c,
331             aom_highbd_12_obmc_variance8x16_sse4_1, 12),
332   TestFuncs(aom_highbd_12_obmc_variance8x8_c,
333             aom_highbd_12_obmc_variance8x8_sse4_1, 12),
334   TestFuncs(aom_highbd_12_obmc_variance8x4_c,
335             aom_highbd_12_obmc_variance8x4_sse4_1, 12),
336   TestFuncs(aom_highbd_12_obmc_variance4x8_c,
337             aom_highbd_12_obmc_variance4x8_sse4_1, 12),
338   TestFuncs(aom_highbd_12_obmc_variance4x4_c,
339             aom_highbd_12_obmc_variance4x4_sse4_1, 12)
340 };
341 
342 INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcVarianceHBDTest,
343                         ::testing::ValuesIn(sse4_functions_hbd));
344 #endif  // HAVE_SSE4_1
345 }  // namespace
346