1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <string>
13 
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 
19 #include "test/acm_random.h"
20 #include "test/clear_system_state.h"
21 #include "test/register_state_check.h"
22 #include "test/util.h"
23 #include "av1/common/blockd.h"
24 #include "av1/common/common.h"
25 #include "av1/common/pred_common.h"
26 #include "aom_mem/aom_mem.h"
27 
28 namespace {
29 
30 using libaom_test::ACMRandom;
31 
32 const int count_test_block = 100000;
33 
34 typedef void (*HighbdIntraPred)(uint16_t *dst, ptrdiff_t stride,
35                                 const uint16_t *above, const uint16_t *left,
36                                 int bps);
37 typedef void (*IntraPred)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above,
38                           const uint8_t *left);
39 
40 }  // namespace
41 
42 // NOTE: Under gcc version 7.3.0 (Debian 7.3.0-5), if this template is in the
43 // anonymous namespace, then we get a strange compiler warning in
44 // the begin() and end() methods of the ParamGenerator template class in
45 // gtest/internal/gtest-param-util.h:
46 //   warning: ‘<anonymous>’ is used uninitialized in this function
47 // As a workaround, put this template outside the anonymous namespace.
48 // See bug aomedia:2003.
49 template <typename FuncType>
50 struct IntraPredFunc {
IntraPredFuncIntraPredFunc51   IntraPredFunc(FuncType pred = NULL, FuncType ref = NULL,
52                 int block_width_value = 0, int block_height_value = 0,
53                 int bit_depth_value = 0)
54       : pred_fn(pred), ref_fn(ref), block_width(block_width_value),
55         block_height(block_height_value), bit_depth(bit_depth_value) {}
56 
57   FuncType pred_fn;
58   FuncType ref_fn;
59   int block_width;
60   int block_height;
61   int bit_depth;
62 };
63 
64 namespace {
65 
66 template <typename FuncType, typename Pixel>
67 class AV1IntraPredTest
68     : public ::testing::TestWithParam<IntraPredFunc<FuncType> > {
69  public:
RunTest(Pixel * left_col,Pixel * above_data,Pixel * dst,Pixel * ref_dst)70   void RunTest(Pixel *left_col, Pixel *above_data, Pixel *dst, Pixel *ref_dst) {
71     ACMRandom rnd(ACMRandom::DeterministicSeed());
72     const int block_width = params_.block_width;
73     const int block_height = params_.block_height;
74     above_row_ = above_data + 16;
75     left_col_ = left_col;
76     dst_ = dst;
77     ref_dst_ = ref_dst;
78     int error_count = 0;
79     for (int i = 0; i < count_test_block; ++i) {
80       // Fill edges with random data, try first with saturated values.
81       for (int x = -1; x <= block_width * 2; x++) {
82         if (i == 0) {
83           above_row_[x] = mask_;
84         } else {
85           above_row_[x] = rnd.Rand16() & mask_;
86         }
87       }
88       for (int y = 0; y < block_height; y++) {
89         if (i == 0) {
90           left_col_[y] = mask_;
91         } else {
92           left_col_[y] = rnd.Rand16() & mask_;
93         }
94       }
95       Predict();
96       CheckPrediction(i, &error_count);
97     }
98     ASSERT_EQ(0, error_count);
99   }
RunSpeedTest(Pixel * left_col,Pixel * above_data,Pixel * dst,Pixel * ref_dst)100   void RunSpeedTest(Pixel *left_col, Pixel *above_data, Pixel *dst,
101                     Pixel *ref_dst) {
102     ACMRandom rnd(ACMRandom::DeterministicSeed());
103     const int block_width = params_.block_width;
104     const int block_height = params_.block_height;
105     above_row_ = above_data + 16;
106     left_col_ = left_col;
107     dst_ = dst;
108     ref_dst_ = ref_dst;
109     int error_count = 0;
110     const int numIter = 100;
111 
112     int c_sum_time = 0;
113     int simd_sum_time = 0;
114     for (int i = 0; i < count_test_block; ++i) {
115       // Fill edges with random data, try first with saturated values.
116       for (int x = -1; x <= block_width * 2; x++) {
117         if (i == 0) {
118           above_row_[x] = mask_;
119         } else {
120           above_row_[x] = rnd.Rand16() & mask_;
121         }
122       }
123       for (int y = 0; y < block_height; y++) {
124         if (i == 0) {
125           left_col_[y] = mask_;
126         } else {
127           left_col_[y] = rnd.Rand16() & mask_;
128         }
129       }
130 
131       aom_usec_timer c_timer_;
132       aom_usec_timer_start(&c_timer_);
133 
134       PredictRefSpeedTest(numIter);
135 
136       aom_usec_timer_mark(&c_timer_);
137 
138       aom_usec_timer simd_timer_;
139       aom_usec_timer_start(&simd_timer_);
140 
141       PredictFncSpeedTest(numIter);
142 
143       aom_usec_timer_mark(&simd_timer_);
144 
145       c_sum_time += static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
146       simd_sum_time += static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
147 
148       CheckPrediction(i, &error_count);
149     }
150 
151     printf(
152         "blockWxH = %d x %d c_time = %d \t simd_time = %d \t Gain = %4.2f \n",
153         block_width, block_height, c_sum_time, simd_sum_time,
154         (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
155     ASSERT_EQ(0, error_count);
156   }
157 
158  protected:
SetUp()159   virtual void SetUp() {
160     params_ = this->GetParam();
161     stride_ = params_.block_width * 3;
162     mask_ = (1 << params_.bit_depth) - 1;
163   }
164 
165   virtual void Predict() = 0;
166 
167   virtual void PredictRefSpeedTest(int num) = 0;
168   virtual void PredictFncSpeedTest(int num) = 0;
169 
CheckPrediction(int test_case_number,int * error_count) const170   void CheckPrediction(int test_case_number, int *error_count) const {
171     // For each pixel ensure that the calculated value is the same as reference.
172     const int block_width = params_.block_width;
173     const int block_height = params_.block_height;
174     for (int y = 0; y < block_height; y++) {
175       for (int x = 0; x < block_width; x++) {
176         *error_count += ref_dst_[x + y * stride_] != dst_[x + y * stride_];
177         if (*error_count == 1) {
178           ASSERT_EQ(ref_dst_[x + y * stride_], dst_[x + y * stride_])
179               << " Failed on Test Case Number " << test_case_number
180               << " location: x = " << x << " y = " << y;
181         }
182       }
183     }
184   }
185 
186   Pixel *above_row_;
187   Pixel *left_col_;
188   Pixel *dst_;
189   Pixel *ref_dst_;
190   ptrdiff_t stride_;
191   int mask_;
192 
193   IntraPredFunc<FuncType> params_;
194 };
195 
196 #if CONFIG_AV1_HIGHBITDEPTH
197 class HighbdIntraPredTest : public AV1IntraPredTest<HighbdIntraPred, uint16_t> {
198  protected:
Predict()199   void Predict() {
200     const int bit_depth = params_.bit_depth;
201     params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
202     ASM_REGISTER_STATE_CHECK(
203         params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth));
204   }
PredictRefSpeedTest(int num)205   void PredictRefSpeedTest(int num) {
206     const int bit_depth = params_.bit_depth;
207     for (int i = 0; i < num; i++) {
208       params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
209     }
210   }
PredictFncSpeedTest(int num)211   void PredictFncSpeedTest(int num) {
212     const int bit_depth = params_.bit_depth;
213     for (int i = 0; i < num; i++) {
214       params_.pred_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
215     }
216   }
217 };
218 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HighbdIntraPredTest);
219 
220 #endif
221 
222 class LowbdIntraPredTest : public AV1IntraPredTest<IntraPred, uint8_t> {
223  protected:
Predict()224   void Predict() {
225     params_.ref_fn(ref_dst_, stride_, above_row_, left_col_);
226     ASM_REGISTER_STATE_CHECK(
227         params_.pred_fn(dst_, stride_, above_row_, left_col_));
228   }
PredictRefSpeedTest(int num)229   void PredictRefSpeedTest(int num) {
230     for (int i = 0; i < num; i++) {
231       params_.ref_fn(ref_dst_, stride_, above_row_, left_col_);
232     }
233   }
PredictFncSpeedTest(int num)234   void PredictFncSpeedTest(int num) {
235     for (int i = 0; i < num; i++) {
236       params_.pred_fn(dst_, stride_, above_row_, left_col_);
237     }
238   }
239 };
240 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(LowbdIntraPredTest);
241 
242 #if CONFIG_AV1_HIGHBITDEPTH
243 // Suppress an unitialized warning. Once there are implementations to test then
244 // this can be restored.
TEST_P(HighbdIntraPredTest,Bitexact)245 TEST_P(HighbdIntraPredTest, Bitexact) {
246   // max block size is 64
247   DECLARE_ALIGNED(16, uint16_t, left_col[2 * 64]);
248   DECLARE_ALIGNED(16, uint16_t, above_data[2 * 64 + 64]);
249   DECLARE_ALIGNED(16, uint16_t, dst[3 * 64 * 64]);
250   DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 64 * 64]);
251   av1_zero(left_col);
252   av1_zero(above_data);
253   RunTest(left_col, above_data, dst, ref_dst);
254 }
255 #endif
256 
TEST_P(LowbdIntraPredTest,Bitexact)257 TEST_P(LowbdIntraPredTest, Bitexact) {
258   // max block size is 64
259   DECLARE_ALIGNED(16, uint8_t, left_col[2 * 64]);
260   DECLARE_ALIGNED(16, uint8_t, above_data[2 * 64 + 64]);
261   DECLARE_ALIGNED(16, uint8_t, dst[3 * 64 * 64]);
262   DECLARE_ALIGNED(16, uint8_t, ref_dst[3 * 64 * 64]);
263   av1_zero(left_col);
264   av1_zero(above_data);
265   RunTest(left_col, above_data, dst, ref_dst);
266 }
TEST_P(LowbdIntraPredTest,DISABLED_Speed)267 TEST_P(LowbdIntraPredTest, DISABLED_Speed) {
268   // max block size is 64
269   DECLARE_ALIGNED(16, uint8_t, left_col[2 * 64]);
270   DECLARE_ALIGNED(16, uint8_t, above_data[2 * 64 + 64]);
271   DECLARE_ALIGNED(16, uint8_t, dst[3 * 64 * 64]);
272   DECLARE_ALIGNED(16, uint8_t, ref_dst[3 * 64 * 64]);
273   av1_zero(left_col);
274   av1_zero(above_data);
275   RunSpeedTest(left_col, above_data, dst, ref_dst);
276 }
277 
278 #if CONFIG_AV1_HIGHBITDEPTH
279 // -----------------------------------------------------------------------------
280 // High Bit Depth Tests
281 #define highbd_entry(type, width, height, opt, bd)                          \
282   IntraPredFunc<HighbdIntraPred>(                                           \
283       &aom_highbd_##type##_predictor_##width##x##height##_##opt,            \
284       &aom_highbd_##type##_predictor_##width##x##height##_c, width, height, \
285       bd)
286 
287 #if 0
288 #define highbd_intrapred(type, opt, bd)                                       \
289   highbd_entry(type, 4, 4, opt, bd), highbd_entry(type, 4, 8, opt, bd),       \
290       highbd_entry(type, 8, 4, opt, bd), highbd_entry(type, 8, 8, opt, bd),   \
291       highbd_entry(type, 8, 16, opt, bd), highbd_entry(type, 16, 8, opt, bd), \
292       highbd_entry(type, 16, 16, opt, bd),                                    \
293       highbd_entry(type, 16, 32, opt, bd),                                    \
294       highbd_entry(type, 32, 16, opt, bd), highbd_entry(type, 32, 32, opt, bd)
295 #endif
296 #endif  // CONFIG_AV1_HIGHBITDEPTH
297 // ---------------------------------------------------------------------------
298 // Low Bit Depth Tests
299 
300 #define lowbd_entry(type, width, height, opt)                                  \
301   IntraPredFunc<IntraPred>(&aom_##type##_predictor_##width##x##height##_##opt, \
302                            &aom_##type##_predictor_##width##x##height##_c,     \
303                            width, height, 8)
304 
305 #define lowbd_intrapred(type, opt)                                    \
306   lowbd_entry(type, 4, 4, opt), lowbd_entry(type, 4, 8, opt),         \
307       lowbd_entry(type, 8, 4, opt), lowbd_entry(type, 8, 8, opt),     \
308       lowbd_entry(type, 8, 16, opt), lowbd_entry(type, 16, 8, opt),   \
309       lowbd_entry(type, 16, 16, opt), lowbd_entry(type, 16, 32, opt), \
310       lowbd_entry(type, 32, 16, opt), lowbd_entry(type, 32, 32, opt)
311 
312 #if HAVE_SSE2
313 const IntraPredFunc<IntraPred> LowbdIntraPredTestVector[] = {
314   lowbd_intrapred(dc, sse2),      lowbd_intrapred(dc_top, sse2),
315   lowbd_intrapred(dc_left, sse2), lowbd_intrapred(dc_128, sse2),
316   lowbd_intrapred(v, sse2),       lowbd_intrapred(h, sse2),
317 };
318 
319 INSTANTIATE_TEST_SUITE_P(SSE2, LowbdIntraPredTest,
320                          ::testing::ValuesIn(LowbdIntraPredTestVector));
321 
322 #endif  // HAVE_SSE2
323 
324 #if HAVE_NEON
325 const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorNeon[] = {
326   lowbd_entry(smooth, 4, 4, neon),   lowbd_entry(smooth, 4, 8, neon),
327   lowbd_entry(smooth, 4, 16, neon),  lowbd_entry(smooth, 8, 4, neon),
328   lowbd_entry(smooth, 8, 8, neon),   lowbd_entry(smooth, 8, 16, neon),
329   lowbd_entry(smooth, 8, 32, neon),  lowbd_entry(smooth, 16, 4, neon),
330   lowbd_entry(smooth, 16, 8, neon),  lowbd_entry(smooth, 16, 16, neon),
331   lowbd_entry(smooth, 16, 32, neon), lowbd_entry(smooth, 16, 64, neon),
332   lowbd_entry(smooth, 32, 8, neon),  lowbd_entry(smooth, 32, 16, neon),
333   lowbd_entry(smooth, 32, 32, neon), lowbd_entry(smooth, 32, 64, neon),
334   lowbd_entry(smooth, 64, 16, neon), lowbd_entry(smooth, 64, 32, neon),
335   lowbd_entry(smooth, 64, 64, neon)
336 };
337 INSTANTIATE_TEST_SUITE_P(NEON, LowbdIntraPredTest,
338                          ::testing::ValuesIn(LowbdIntraPredTestVectorNeon));
339 #endif  // HAVE_NEON
340 
341 #if HAVE_SSSE3
342 const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorSsse3[] = {
343   lowbd_intrapred(paeth, ssse3),
344   lowbd_intrapred(smooth, ssse3),
345 };
346 
347 INSTANTIATE_TEST_SUITE_P(SSSE3, LowbdIntraPredTest,
348                          ::testing::ValuesIn(LowbdIntraPredTestVectorSsse3));
349 
350 #endif  // HAVE_SSSE3
351 
352 #if HAVE_AVX2
353 const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = {
354   lowbd_entry(dc, 32, 32, avx2),      lowbd_entry(dc_top, 32, 32, avx2),
355   lowbd_entry(dc_left, 32, 32, avx2), lowbd_entry(dc_128, 32, 32, avx2),
356   lowbd_entry(v, 32, 32, avx2),       lowbd_entry(h, 32, 32, avx2),
357   lowbd_entry(dc, 32, 16, avx2),      lowbd_entry(dc_top, 32, 16, avx2),
358   lowbd_entry(dc_left, 32, 16, avx2), lowbd_entry(dc_128, 32, 16, avx2),
359   lowbd_entry(v, 32, 16, avx2),       lowbd_entry(paeth, 16, 8, avx2),
360   lowbd_entry(paeth, 16, 16, avx2),   lowbd_entry(paeth, 16, 32, avx2),
361   lowbd_entry(paeth, 32, 16, avx2),   lowbd_entry(paeth, 32, 32, avx2),
362 };
363 
364 INSTANTIATE_TEST_SUITE_P(AVX2, LowbdIntraPredTest,
365                          ::testing::ValuesIn(LowbdIntraPredTestVectorAvx2));
366 
367 #endif  // HAVE_AVX2
368 
369 #if CONFIG_AV1_HIGHBITDEPTH
370 #if HAVE_NEON
371 const IntraPredFunc<HighbdIntraPred> HighbdIntraPredTestVectorNeon[] = {
372   highbd_entry(dc, 4, 4, neon, 8),   highbd_entry(dc, 8, 8, neon, 8),
373   highbd_entry(dc, 16, 16, neon, 8), highbd_entry(dc, 32, 32, neon, 8),
374   highbd_entry(dc, 64, 64, neon, 8),
375 };
376 
377 INSTANTIATE_TEST_SUITE_P(NEON, HighbdIntraPredTest,
378                          ::testing::ValuesIn(HighbdIntraPredTestVectorNeon));
379 
380 #endif  // HAVE_NEON
381 #endif  // CONFIG_AV1_HIGHBITDEPTH
382 }  // namespace
383