1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <string>
13
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
15
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18
19 #include "test/acm_random.h"
20 #include "test/clear_system_state.h"
21 #include "test/register_state_check.h"
22 #include "test/util.h"
23 #include "av1/common/blockd.h"
24 #include "av1/common/common.h"
25 #include "av1/common/pred_common.h"
26 #include "aom_mem/aom_mem.h"
27
28 namespace {
29
30 using libaom_test::ACMRandom;
31
32 const int count_test_block = 100000;
33
34 typedef void (*HighbdIntraPred)(uint16_t *dst, ptrdiff_t stride,
35 const uint16_t *above, const uint16_t *left,
36 int bps);
37 typedef void (*IntraPred)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above,
38 const uint8_t *left);
39
40 } // namespace
41
42 // NOTE: Under gcc version 7.3.0 (Debian 7.3.0-5), if this template is in the
43 // anonymous namespace, then we get a strange compiler warning in
44 // the begin() and end() methods of the ParamGenerator template class in
45 // gtest/internal/gtest-param-util.h:
46 // warning: ‘<anonymous>’ is used uninitialized in this function
47 // As a workaround, put this template outside the anonymous namespace.
48 // See bug aomedia:2003.
49 template <typename FuncType>
50 struct IntraPredFunc {
IntraPredFuncIntraPredFunc51 IntraPredFunc(FuncType pred = NULL, FuncType ref = NULL,
52 int block_width_value = 0, int block_height_value = 0,
53 int bit_depth_value = 0)
54 : pred_fn(pred), ref_fn(ref), block_width(block_width_value),
55 block_height(block_height_value), bit_depth(bit_depth_value) {}
56
57 FuncType pred_fn;
58 FuncType ref_fn;
59 int block_width;
60 int block_height;
61 int bit_depth;
62 };
63
64 namespace {
65
66 template <typename FuncType, typename Pixel>
67 class AV1IntraPredTest
68 : public ::testing::TestWithParam<IntraPredFunc<FuncType> > {
69 public:
RunTest(Pixel * left_col,Pixel * above_data,Pixel * dst,Pixel * ref_dst)70 void RunTest(Pixel *left_col, Pixel *above_data, Pixel *dst, Pixel *ref_dst) {
71 ACMRandom rnd(ACMRandom::DeterministicSeed());
72 const int block_width = params_.block_width;
73 const int block_height = params_.block_height;
74 above_row_ = above_data + 16;
75 left_col_ = left_col;
76 dst_ = dst;
77 ref_dst_ = ref_dst;
78 int error_count = 0;
79 for (int i = 0; i < count_test_block; ++i) {
80 // Fill edges with random data, try first with saturated values.
81 for (int x = -1; x <= block_width * 2; x++) {
82 if (i == 0) {
83 above_row_[x] = mask_;
84 } else {
85 above_row_[x] = rnd.Rand16() & mask_;
86 }
87 }
88 for (int y = 0; y < block_height; y++) {
89 if (i == 0) {
90 left_col_[y] = mask_;
91 } else {
92 left_col_[y] = rnd.Rand16() & mask_;
93 }
94 }
95 Predict();
96 CheckPrediction(i, &error_count);
97 }
98 ASSERT_EQ(0, error_count);
99 }
RunSpeedTest(Pixel * left_col,Pixel * above_data,Pixel * dst,Pixel * ref_dst)100 void RunSpeedTest(Pixel *left_col, Pixel *above_data, Pixel *dst,
101 Pixel *ref_dst) {
102 ACMRandom rnd(ACMRandom::DeterministicSeed());
103 const int block_width = params_.block_width;
104 const int block_height = params_.block_height;
105 above_row_ = above_data + 16;
106 left_col_ = left_col;
107 dst_ = dst;
108 ref_dst_ = ref_dst;
109 int error_count = 0;
110 const int numIter = 100;
111
112 int c_sum_time = 0;
113 int simd_sum_time = 0;
114 for (int i = 0; i < count_test_block; ++i) {
115 // Fill edges with random data, try first with saturated values.
116 for (int x = -1; x <= block_width * 2; x++) {
117 if (i == 0) {
118 above_row_[x] = mask_;
119 } else {
120 above_row_[x] = rnd.Rand16() & mask_;
121 }
122 }
123 for (int y = 0; y < block_height; y++) {
124 if (i == 0) {
125 left_col_[y] = mask_;
126 } else {
127 left_col_[y] = rnd.Rand16() & mask_;
128 }
129 }
130
131 aom_usec_timer c_timer_;
132 aom_usec_timer_start(&c_timer_);
133
134 PredictRefSpeedTest(numIter);
135
136 aom_usec_timer_mark(&c_timer_);
137
138 aom_usec_timer simd_timer_;
139 aom_usec_timer_start(&simd_timer_);
140
141 PredictFncSpeedTest(numIter);
142
143 aom_usec_timer_mark(&simd_timer_);
144
145 c_sum_time += static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
146 simd_sum_time += static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
147
148 CheckPrediction(i, &error_count);
149 }
150
151 printf(
152 "blockWxH = %d x %d c_time = %d \t simd_time = %d \t Gain = %4.2f \n",
153 block_width, block_height, c_sum_time, simd_sum_time,
154 (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
155 ASSERT_EQ(0, error_count);
156 }
157
158 protected:
SetUp()159 virtual void SetUp() {
160 params_ = this->GetParam();
161 stride_ = params_.block_width * 3;
162 mask_ = (1 << params_.bit_depth) - 1;
163 }
164
165 virtual void Predict() = 0;
166
167 virtual void PredictRefSpeedTest(int num) = 0;
168 virtual void PredictFncSpeedTest(int num) = 0;
169
CheckPrediction(int test_case_number,int * error_count) const170 void CheckPrediction(int test_case_number, int *error_count) const {
171 // For each pixel ensure that the calculated value is the same as reference.
172 const int block_width = params_.block_width;
173 const int block_height = params_.block_height;
174 for (int y = 0; y < block_height; y++) {
175 for (int x = 0; x < block_width; x++) {
176 *error_count += ref_dst_[x + y * stride_] != dst_[x + y * stride_];
177 if (*error_count == 1) {
178 ASSERT_EQ(ref_dst_[x + y * stride_], dst_[x + y * stride_])
179 << " Failed on Test Case Number " << test_case_number
180 << " location: x = " << x << " y = " << y;
181 }
182 }
183 }
184 }
185
186 Pixel *above_row_;
187 Pixel *left_col_;
188 Pixel *dst_;
189 Pixel *ref_dst_;
190 ptrdiff_t stride_;
191 int mask_;
192
193 IntraPredFunc<FuncType> params_;
194 };
195
196 #if CONFIG_AV1_HIGHBITDEPTH
197 class HighbdIntraPredTest : public AV1IntraPredTest<HighbdIntraPred, uint16_t> {
198 protected:
Predict()199 void Predict() {
200 const int bit_depth = params_.bit_depth;
201 params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
202 ASM_REGISTER_STATE_CHECK(
203 params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth));
204 }
PredictRefSpeedTest(int num)205 void PredictRefSpeedTest(int num) {
206 const int bit_depth = params_.bit_depth;
207 for (int i = 0; i < num; i++) {
208 params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
209 }
210 }
PredictFncSpeedTest(int num)211 void PredictFncSpeedTest(int num) {
212 const int bit_depth = params_.bit_depth;
213 for (int i = 0; i < num; i++) {
214 params_.pred_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
215 }
216 }
217 };
218 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HighbdIntraPredTest);
219
220 #endif
221
222 class LowbdIntraPredTest : public AV1IntraPredTest<IntraPred, uint8_t> {
223 protected:
Predict()224 void Predict() {
225 params_.ref_fn(ref_dst_, stride_, above_row_, left_col_);
226 ASM_REGISTER_STATE_CHECK(
227 params_.pred_fn(dst_, stride_, above_row_, left_col_));
228 }
PredictRefSpeedTest(int num)229 void PredictRefSpeedTest(int num) {
230 for (int i = 0; i < num; i++) {
231 params_.ref_fn(ref_dst_, stride_, above_row_, left_col_);
232 }
233 }
PredictFncSpeedTest(int num)234 void PredictFncSpeedTest(int num) {
235 for (int i = 0; i < num; i++) {
236 params_.pred_fn(dst_, stride_, above_row_, left_col_);
237 }
238 }
239 };
240 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(LowbdIntraPredTest);
241
242 #if CONFIG_AV1_HIGHBITDEPTH
243 // Suppress an unitialized warning. Once there are implementations to test then
244 // this can be restored.
TEST_P(HighbdIntraPredTest,Bitexact)245 TEST_P(HighbdIntraPredTest, Bitexact) {
246 // max block size is 64
247 DECLARE_ALIGNED(16, uint16_t, left_col[2 * 64]);
248 DECLARE_ALIGNED(16, uint16_t, above_data[2 * 64 + 64]);
249 DECLARE_ALIGNED(16, uint16_t, dst[3 * 64 * 64]);
250 DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 64 * 64]);
251 av1_zero(left_col);
252 av1_zero(above_data);
253 RunTest(left_col, above_data, dst, ref_dst);
254 }
255 #endif
256
TEST_P(LowbdIntraPredTest,Bitexact)257 TEST_P(LowbdIntraPredTest, Bitexact) {
258 // max block size is 64
259 DECLARE_ALIGNED(16, uint8_t, left_col[2 * 64]);
260 DECLARE_ALIGNED(16, uint8_t, above_data[2 * 64 + 64]);
261 DECLARE_ALIGNED(16, uint8_t, dst[3 * 64 * 64]);
262 DECLARE_ALIGNED(16, uint8_t, ref_dst[3 * 64 * 64]);
263 av1_zero(left_col);
264 av1_zero(above_data);
265 RunTest(left_col, above_data, dst, ref_dst);
266 }
TEST_P(LowbdIntraPredTest,DISABLED_Speed)267 TEST_P(LowbdIntraPredTest, DISABLED_Speed) {
268 // max block size is 64
269 DECLARE_ALIGNED(16, uint8_t, left_col[2 * 64]);
270 DECLARE_ALIGNED(16, uint8_t, above_data[2 * 64 + 64]);
271 DECLARE_ALIGNED(16, uint8_t, dst[3 * 64 * 64]);
272 DECLARE_ALIGNED(16, uint8_t, ref_dst[3 * 64 * 64]);
273 av1_zero(left_col);
274 av1_zero(above_data);
275 RunSpeedTest(left_col, above_data, dst, ref_dst);
276 }
277
278 #if CONFIG_AV1_HIGHBITDEPTH
279 // -----------------------------------------------------------------------------
280 // High Bit Depth Tests
281 #define highbd_entry(type, width, height, opt, bd) \
282 IntraPredFunc<HighbdIntraPred>( \
283 &aom_highbd_##type##_predictor_##width##x##height##_##opt, \
284 &aom_highbd_##type##_predictor_##width##x##height##_c, width, height, \
285 bd)
286
287 #if 0
288 #define highbd_intrapred(type, opt, bd) \
289 highbd_entry(type, 4, 4, opt, bd), highbd_entry(type, 4, 8, opt, bd), \
290 highbd_entry(type, 8, 4, opt, bd), highbd_entry(type, 8, 8, opt, bd), \
291 highbd_entry(type, 8, 16, opt, bd), highbd_entry(type, 16, 8, opt, bd), \
292 highbd_entry(type, 16, 16, opt, bd), \
293 highbd_entry(type, 16, 32, opt, bd), \
294 highbd_entry(type, 32, 16, opt, bd), highbd_entry(type, 32, 32, opt, bd)
295 #endif
296 #endif // CONFIG_AV1_HIGHBITDEPTH
297 // ---------------------------------------------------------------------------
298 // Low Bit Depth Tests
299
300 #define lowbd_entry(type, width, height, opt) \
301 IntraPredFunc<IntraPred>(&aom_##type##_predictor_##width##x##height##_##opt, \
302 &aom_##type##_predictor_##width##x##height##_c, \
303 width, height, 8)
304
305 #define lowbd_intrapred(type, opt) \
306 lowbd_entry(type, 4, 4, opt), lowbd_entry(type, 4, 8, opt), \
307 lowbd_entry(type, 8, 4, opt), lowbd_entry(type, 8, 8, opt), \
308 lowbd_entry(type, 8, 16, opt), lowbd_entry(type, 16, 8, opt), \
309 lowbd_entry(type, 16, 16, opt), lowbd_entry(type, 16, 32, opt), \
310 lowbd_entry(type, 32, 16, opt), lowbd_entry(type, 32, 32, opt)
311
312 #if HAVE_SSE2
313 const IntraPredFunc<IntraPred> LowbdIntraPredTestVector[] = {
314 lowbd_intrapred(dc, sse2), lowbd_intrapred(dc_top, sse2),
315 lowbd_intrapred(dc_left, sse2), lowbd_intrapred(dc_128, sse2),
316 lowbd_intrapred(v, sse2), lowbd_intrapred(h, sse2),
317 };
318
319 INSTANTIATE_TEST_SUITE_P(SSE2, LowbdIntraPredTest,
320 ::testing::ValuesIn(LowbdIntraPredTestVector));
321
322 #endif // HAVE_SSE2
323
324 #if HAVE_NEON
325 const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorNeon[] = {
326 lowbd_entry(smooth, 4, 4, neon), lowbd_entry(smooth, 4, 8, neon),
327 lowbd_entry(smooth, 4, 16, neon), lowbd_entry(smooth, 8, 4, neon),
328 lowbd_entry(smooth, 8, 8, neon), lowbd_entry(smooth, 8, 16, neon),
329 lowbd_entry(smooth, 8, 32, neon), lowbd_entry(smooth, 16, 4, neon),
330 lowbd_entry(smooth, 16, 8, neon), lowbd_entry(smooth, 16, 16, neon),
331 lowbd_entry(smooth, 16, 32, neon), lowbd_entry(smooth, 16, 64, neon),
332 lowbd_entry(smooth, 32, 8, neon), lowbd_entry(smooth, 32, 16, neon),
333 lowbd_entry(smooth, 32, 32, neon), lowbd_entry(smooth, 32, 64, neon),
334 lowbd_entry(smooth, 64, 16, neon), lowbd_entry(smooth, 64, 32, neon),
335 lowbd_entry(smooth, 64, 64, neon)
336 };
337 INSTANTIATE_TEST_SUITE_P(NEON, LowbdIntraPredTest,
338 ::testing::ValuesIn(LowbdIntraPredTestVectorNeon));
339 #endif // HAVE_NEON
340
341 #if HAVE_SSSE3
342 const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorSsse3[] = {
343 lowbd_intrapred(paeth, ssse3),
344 lowbd_intrapred(smooth, ssse3),
345 };
346
347 INSTANTIATE_TEST_SUITE_P(SSSE3, LowbdIntraPredTest,
348 ::testing::ValuesIn(LowbdIntraPredTestVectorSsse3));
349
350 #endif // HAVE_SSSE3
351
352 #if HAVE_AVX2
353 const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = {
354 lowbd_entry(dc, 32, 32, avx2), lowbd_entry(dc_top, 32, 32, avx2),
355 lowbd_entry(dc_left, 32, 32, avx2), lowbd_entry(dc_128, 32, 32, avx2),
356 lowbd_entry(v, 32, 32, avx2), lowbd_entry(h, 32, 32, avx2),
357 lowbd_entry(dc, 32, 16, avx2), lowbd_entry(dc_top, 32, 16, avx2),
358 lowbd_entry(dc_left, 32, 16, avx2), lowbd_entry(dc_128, 32, 16, avx2),
359 lowbd_entry(v, 32, 16, avx2), lowbd_entry(paeth, 16, 8, avx2),
360 lowbd_entry(paeth, 16, 16, avx2), lowbd_entry(paeth, 16, 32, avx2),
361 lowbd_entry(paeth, 32, 16, avx2), lowbd_entry(paeth, 32, 32, avx2),
362 };
363
364 INSTANTIATE_TEST_SUITE_P(AVX2, LowbdIntraPredTest,
365 ::testing::ValuesIn(LowbdIntraPredTestVectorAvx2));
366
367 #endif // HAVE_AVX2
368
369 #if CONFIG_AV1_HIGHBITDEPTH
370 #if HAVE_NEON
371 const IntraPredFunc<HighbdIntraPred> HighbdIntraPredTestVectorNeon[] = {
372 highbd_entry(dc, 4, 4, neon, 8), highbd_entry(dc, 8, 8, neon, 8),
373 highbd_entry(dc, 16, 16, neon, 8), highbd_entry(dc, 32, 32, neon, 8),
374 highbd_entry(dc, 64, 64, neon, 8),
375 };
376
377 INSTANTIATE_TEST_SUITE_P(NEON, HighbdIntraPredTest,
378 ::testing::ValuesIn(HighbdIntraPredTestVectorNeon));
379
380 #endif // HAVE_NEON
381 #endif // CONFIG_AV1_HIGHBITDEPTH
382 } // namespace
383