1 /*
2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <cstdlib>
13 #include <new>
14 #include <tuple>
15
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18
19 #include "aom/aom_codec.h"
20 #include "aom/aom_integer.h"
21 #include "aom_dsp/variance.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25 #include "av1/common/reconinter.h"
26 #include "av1/encoder/reconinter_enc.h"
27 #include "test/acm_random.h"
28 #include "test/register_state_check.h"
29 #include "test/util.h"
30 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
31
32 namespace AV1CompMaskVariance {
33 typedef void (*comp_mask_pred_func)(uint8_t *comp_pred, const uint8_t *pred,
34 int width, int height, const uint8_t *ref,
35 int ref_stride, const uint8_t *mask,
36 int mask_stride, int invert_mask);
37
38 #if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AVX2
39 const BLOCK_SIZE kValidBlockSize[] = {
40 BLOCK_8X8, BLOCK_8X16, BLOCK_8X32, BLOCK_16X8, BLOCK_16X16,
41 BLOCK_16X32, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64,
42 BLOCK_64X32, BLOCK_64X64, BLOCK_64X128, BLOCK_128X64, BLOCK_128X128,
43 BLOCK_16X64, BLOCK_64X16
44 };
45 #endif
46 typedef std::tuple<comp_mask_pred_func, BLOCK_SIZE> CompMaskPredParam;
47
48 class AV1CompMaskVarianceTest
49 : public ::testing::TestWithParam<CompMaskPredParam> {
50 public:
51 ~AV1CompMaskVarianceTest();
52 void SetUp();
53
54 void TearDown();
55
56 protected:
57 void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
58 void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
CheckResult(int width,int height)59 bool CheckResult(int width, int height) {
60 for (int y = 0; y < height; ++y) {
61 for (int x = 0; x < width; ++x) {
62 const int idx = y * width + x;
63 if (comp_pred1_[idx] != comp_pred2_[idx]) {
64 printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
65 printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
66 return false;
67 }
68 }
69 }
70 return true;
71 }
72
73 libaom_test::ACMRandom rnd_;
74 uint8_t *comp_pred1_;
75 uint8_t *comp_pred2_;
76 uint8_t *pred_;
77 uint8_t *ref_buffer_;
78 uint8_t *ref_;
79 };
80 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompMaskVarianceTest);
81
~AV1CompMaskVarianceTest()82 AV1CompMaskVarianceTest::~AV1CompMaskVarianceTest() { ; }
83
SetUp()84 void AV1CompMaskVarianceTest::SetUp() {
85 rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
86 av1_init_wedge_masks();
87 comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
88 comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
89 pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
90 ref_buffer_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE + (8 * MAX_SB_SIZE));
91 ref_ = ref_buffer_ + (8 * MAX_SB_SIZE);
92 for (int i = 0; i < MAX_SB_SQUARE; ++i) {
93 pred_[i] = rnd_.Rand8();
94 }
95 for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
96 ref_buffer_[i] = rnd_.Rand8();
97 }
98 }
99
TearDown()100 void AV1CompMaskVarianceTest::TearDown() {
101 aom_free(comp_pred1_);
102 aom_free(comp_pred2_);
103 aom_free(pred_);
104 aom_free(ref_buffer_);
105 }
106
RunCheckOutput(comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)107 void AV1CompMaskVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
108 BLOCK_SIZE bsize, int inv) {
109 const int w = block_size_wide[bsize];
110 const int h = block_size_high[bsize];
111 const int wedge_types = get_wedge_types_lookup(bsize);
112 for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
113 const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
114
115 aom_comp_mask_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w,
116 inv);
117 test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, inv);
118
119 ASSERT_EQ(CheckResult(w, h), true)
120 << " wedge " << wedge_index << " inv " << inv;
121 }
122 }
123
RunSpeedTest(comp_mask_pred_func test_impl,BLOCK_SIZE bsize)124 void AV1CompMaskVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
125 BLOCK_SIZE bsize) {
126 const int w = block_size_wide[bsize];
127 const int h = block_size_high[bsize];
128 const int wedge_types = get_wedge_types_lookup(bsize);
129 int wedge_index = wedge_types / 2;
130 const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
131 const int num_loops = 1000000000 / (w + h);
132
133 comp_mask_pred_func funcs[2] = { aom_comp_mask_pred_c, test_impl };
134 double elapsed_time[2] = { 0 };
135 for (int i = 0; i < 2; ++i) {
136 aom_usec_timer timer;
137 aom_usec_timer_start(&timer);
138 comp_mask_pred_func func = funcs[i];
139 for (int j = 0; j < num_loops; ++j) {
140 func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, 0);
141 }
142 aom_usec_timer_mark(&timer);
143 double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
144 elapsed_time[i] = 1000.0 * time / num_loops;
145 }
146 printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
147 elapsed_time[1]);
148 printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
149 }
150
TEST_P(AV1CompMaskVarianceTest,CheckOutput)151 TEST_P(AV1CompMaskVarianceTest, CheckOutput) {
152 // inv = 0, 1
153 RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
154 RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
155 }
156
TEST_P(AV1CompMaskVarianceTest,DISABLED_Speed)157 TEST_P(AV1CompMaskVarianceTest, DISABLED_Speed) {
158 RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
159 }
160
161 #if HAVE_SSSE3
162 INSTANTIATE_TEST_SUITE_P(
163 SSSE3, AV1CompMaskVarianceTest,
164 ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
165 ::testing::ValuesIn(kValidBlockSize)));
166 #endif
167
168 #if HAVE_AVX2
169 INSTANTIATE_TEST_SUITE_P(
170 AVX2, AV1CompMaskVarianceTest,
171 ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
172 ::testing::ValuesIn(kValidBlockSize)));
173 #endif
174
175 #ifndef aom_comp_mask_pred
176 // can't run this test if aom_comp_mask_pred is defined to aom_comp_mask_pred_c
177 class AV1CompMaskUpVarianceTest : public AV1CompMaskVarianceTest {
178 public:
179 ~AV1CompMaskUpVarianceTest();
180
181 protected:
182 void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
183 void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
184 int havSub);
185 };
186
~AV1CompMaskUpVarianceTest()187 AV1CompMaskUpVarianceTest::~AV1CompMaskUpVarianceTest() { ; }
188
RunCheckOutput(comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)189 void AV1CompMaskUpVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
190 BLOCK_SIZE bsize, int inv) {
191 const int w = block_size_wide[bsize];
192 const int h = block_size_high[bsize];
193 const int wedge_types = get_wedge_types_lookup(bsize);
194 int subpel_search;
195 for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
196 ++subpel_search) {
197 // loop through subx and suby
198 for (int sub = 0; sub < 8 * 8; ++sub) {
199 int subx = sub & 0x7;
200 int suby = (sub >> 3);
201 for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
202 const uint8_t *mask =
203 av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
204
205 // ref
206 aom_comp_mask_upsampled_pred_c(
207 NULL, NULL, 0, 0, NULL, comp_pred1_, pred_, w, h, subx, suby, ref_,
208 MAX_SB_SIZE, mask, w, inv, subpel_search);
209
210 aom_comp_mask_pred = test_impl; // test
211 aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred2_, pred_,
212 w, h, subx, suby, ref_, MAX_SB_SIZE, mask,
213 w, inv, subpel_search);
214 ASSERT_EQ(CheckResult(w, h), true)
215 << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
216 << "," << suby << ")";
217 }
218 }
219 }
220 }
221
RunSpeedTest(comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int havSub)222 void AV1CompMaskUpVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
223 BLOCK_SIZE bsize, int havSub) {
224 const int w = block_size_wide[bsize];
225 const int h = block_size_high[bsize];
226 const int subx = havSub ? 3 : 0;
227 const int suby = havSub ? 4 : 0;
228 const int wedge_types = get_wedge_types_lookup(bsize);
229 int wedge_index = wedge_types / 2;
230 const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
231
232 const int num_loops = 1000000000 / (w + h);
233 comp_mask_pred_func funcs[2] = { &aom_comp_mask_pred_c, test_impl };
234 double elapsed_time[2] = { 0 };
235 int subpel_search = USE_8_TAPS; // set to USE_4_TAPS to test 4-tap filter.
236 for (int i = 0; i < 2; ++i) {
237 aom_usec_timer timer;
238 aom_usec_timer_start(&timer);
239 aom_comp_mask_pred = funcs[i];
240 for (int j = 0; j < num_loops; ++j) {
241 aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred1_, pred_,
242 w, h, subx, suby, ref_, MAX_SB_SIZE, mask, w,
243 0, subpel_search);
244 }
245 aom_usec_timer_mark(&timer);
246 double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
247 elapsed_time[i] = 1000.0 * time / num_loops;
248 }
249 printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
250 elapsed_time[1]);
251 printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
252 }
253
TEST_P(AV1CompMaskUpVarianceTest,CheckOutput)254 TEST_P(AV1CompMaskUpVarianceTest, CheckOutput) {
255 // inv mask = 0, 1
256 RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
257 RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
258 }
259
TEST_P(AV1CompMaskUpVarianceTest,DISABLED_Speed)260 TEST_P(AV1CompMaskUpVarianceTest, DISABLED_Speed) {
261 RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
262 }
263
264 #if HAVE_SSSE3
265 INSTANTIATE_TEST_SUITE_P(
266 SSSE3, AV1CompMaskUpVarianceTest,
267 ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
268 ::testing::ValuesIn(kValidBlockSize)));
269 #endif
270
271 #if HAVE_AVX2
272 INSTANTIATE_TEST_SUITE_P(
273 AVX2, AV1CompMaskUpVarianceTest,
274 ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
275 ::testing::ValuesIn(kValidBlockSize)));
276 #endif
277
278 #endif // ifndef aom_comp_mask_pred
279
280 #if CONFIG_AV1_HIGHBITDEPTH
281 typedef void (*highbd_comp_mask_pred_func)(uint8_t *comp_pred8,
282 const uint8_t *pred8, int width,
283 int height, const uint8_t *ref8,
284 int ref_stride, const uint8_t *mask,
285 int mask_stride, int invert_mask);
286
287 typedef std::tuple<highbd_comp_mask_pred_func, BLOCK_SIZE, int>
288 HighbdCompMaskPredParam;
289
290 class AV1HighbdCompMaskVarianceTest
291 : public ::testing::TestWithParam<HighbdCompMaskPredParam> {
292 public:
293 ~AV1HighbdCompMaskVarianceTest();
294 void SetUp();
295
296 void TearDown();
297
298 protected:
299 void RunCheckOutput(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
300 int inv);
301 void RunSpeedTest(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
CheckResult(int width,int height)302 bool CheckResult(int width, int height) {
303 for (int y = 0; y < height; ++y) {
304 for (int x = 0; x < width; ++x) {
305 const int idx = y * width + x;
306 if (comp_pred1_[idx] != comp_pred2_[idx]) {
307 printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
308 printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
309 return false;
310 }
311 }
312 }
313 return true;
314 }
315
316 libaom_test::ACMRandom rnd_;
317 uint16_t *comp_pred1_;
318 uint16_t *comp_pred2_;
319 uint16_t *pred_;
320 uint16_t *ref_buffer_;
321 uint16_t *ref_;
322 };
323 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdCompMaskVarianceTest);
324
~AV1HighbdCompMaskVarianceTest()325 AV1HighbdCompMaskVarianceTest::~AV1HighbdCompMaskVarianceTest() { ; }
326
SetUp()327 void AV1HighbdCompMaskVarianceTest::SetUp() {
328 rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
329 av1_init_wedge_masks();
330
331 comp_pred1_ =
332 (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_));
333 comp_pred2_ =
334 (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_));
335 pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_));
336 ref_buffer_ = (uint16_t *)aom_memalign(
337 16, (MAX_SB_SQUARE + (8 * MAX_SB_SIZE)) * sizeof(*ref_buffer_));
338 ref_ = ref_buffer_ + (8 * MAX_SB_SIZE);
339 }
340
TearDown()341 void AV1HighbdCompMaskVarianceTest::TearDown() {
342 aom_free(comp_pred1_);
343 aom_free(comp_pred2_);
344 aom_free(pred_);
345 aom_free(ref_buffer_);
346 }
347
RunCheckOutput(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)348 void AV1HighbdCompMaskVarianceTest::RunCheckOutput(
349 highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) {
350 int bd_ = GET_PARAM(2);
351 const int w = block_size_wide[bsize];
352 const int h = block_size_high[bsize];
353 const int wedge_types = get_wedge_types_lookup(bsize);
354
355 for (int i = 0; i < MAX_SB_SQUARE; ++i) {
356 pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
357 }
358 for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
359 ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
360 }
361
362 for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
363 const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
364
365 aom_highbd_comp_mask_pred_c(
366 CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
367 CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
368
369 test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
370 CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
371
372 ASSERT_EQ(CheckResult(w, h), true)
373 << " wedge " << wedge_index << " inv " << inv;
374 }
375 }
376
RunSpeedTest(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize)377 void AV1HighbdCompMaskVarianceTest::RunSpeedTest(
378 highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize) {
379 int bd_ = GET_PARAM(2);
380
381 const int w = block_size_wide[bsize];
382 const int h = block_size_high[bsize];
383 const int wedge_types = get_wedge_types_lookup(bsize);
384 int wedge_index = wedge_types / 2;
385
386 for (int i = 0; i < MAX_SB_SQUARE; ++i) {
387 pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
388 }
389 for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
390 ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
391 }
392
393 const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
394 const int num_loops = 1000000000 / (w + h);
395
396 highbd_comp_mask_pred_func funcs[2] = { aom_highbd_comp_mask_pred_c,
397 test_impl };
398 double elapsed_time[2] = { 0 };
399 for (int i = 0; i < 2; ++i) {
400 aom_usec_timer timer;
401 aom_usec_timer_start(&timer);
402 highbd_comp_mask_pred_func func = funcs[i];
403 for (int j = 0; j < num_loops; ++j) {
404 func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
405 CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, 0);
406 }
407 aom_usec_timer_mark(&timer);
408 double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
409 elapsed_time[i] = 1000.0 * time / num_loops;
410 }
411 printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
412 elapsed_time[1]);
413 printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
414 }
415
TEST_P(AV1HighbdCompMaskVarianceTest,CheckOutput)416 TEST_P(AV1HighbdCompMaskVarianceTest, CheckOutput) {
417 // inv = 0, 1
418 RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
419 RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
420 }
421
TEST_P(AV1HighbdCompMaskVarianceTest,DISABLED_Speed)422 TEST_P(AV1HighbdCompMaskVarianceTest, DISABLED_Speed) {
423 RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
424 }
425
426 #if HAVE_AVX2
427 INSTANTIATE_TEST_SUITE_P(
428 AVX2, AV1HighbdCompMaskVarianceTest,
429 ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2),
430 ::testing::ValuesIn(kValidBlockSize),
431 ::testing::Range(8, 13, 2)));
432 #endif
433
434 #if HAVE_SSE2
435 INSTANTIATE_TEST_SUITE_P(
436 SSE2, AV1HighbdCompMaskVarianceTest,
437 ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2),
438 ::testing::ValuesIn(kValidBlockSize),
439 ::testing::Range(8, 13, 2)));
440 #endif
441
442 #ifndef aom_highbd_comp_mask_pred
443 // can't run this test if aom_highbd_comp_mask_pred is defined to
444 // aom_highbd_comp_mask_pred_c
445 class AV1HighbdCompMaskUpVarianceTest : public AV1HighbdCompMaskVarianceTest {
446 public:
447 ~AV1HighbdCompMaskUpVarianceTest();
448
449 protected:
450 void RunCheckOutput(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
451 int inv);
452 void RunSpeedTest(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
453 int havSub);
454 };
455
~AV1HighbdCompMaskUpVarianceTest()456 AV1HighbdCompMaskUpVarianceTest::~AV1HighbdCompMaskUpVarianceTest() { ; }
457
RunCheckOutput(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)458 void AV1HighbdCompMaskUpVarianceTest::RunCheckOutput(
459 highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) {
460 (void)test_impl;
461 int bd_ = GET_PARAM(2);
462 const int w = block_size_wide[bsize];
463 const int h = block_size_high[bsize];
464 const int wedge_types = get_wedge_types_lookup(bsize);
465
466 for (int i = 0; i < MAX_SB_SQUARE; ++i) {
467 pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
468 }
469 for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
470 ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
471 }
472
473 int subpel_search;
474 for (subpel_search = 1; subpel_search <= 2; ++subpel_search) {
475 // loop through subx and suby
476 for (int sub = 0; sub < 8 * 8; ++sub) {
477 int subx = sub & 0x7;
478 int suby = (sub >> 3);
479 for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
480 const uint8_t *mask =
481 av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
482
483 // ref
484 aom_highbd_upsampled_pred_c(
485 NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred1_), w, h, subx,
486 suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, subpel_search);
487
488 aom_highbd_comp_mask_pred_c(
489 CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
490 CONVERT_TO_BYTEPTR(comp_pred1_), w, mask, w, inv);
491
492 // test
493 aom_highbd_upsampled_pred(
494 NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred2_), w, h, subx,
495 suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, subpel_search);
496
497 aom_highbd_comp_mask_pred(
498 CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
499 CONVERT_TO_BYTEPTR(comp_pred2_), w, mask, w, inv);
500
501 ASSERT_EQ(CheckResult(w, h), true)
502 << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
503 << "," << suby << ")";
504 }
505 }
506 }
507 }
508
RunSpeedTest(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int havSub)509 void AV1HighbdCompMaskUpVarianceTest::RunSpeedTest(
510 highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int havSub) {
511 int bd_ = GET_PARAM(2);
512 const int w = block_size_wide[bsize];
513 const int h = block_size_high[bsize];
514 const int subx = havSub ? 3 : 0;
515 const int suby = havSub ? 4 : 0;
516 const int wedge_types = get_wedge_types_lookup(bsize);
517 int wedge_index = wedge_types / 2;
518 const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
519
520 for (int i = 0; i < MAX_SB_SQUARE; ++i) {
521 pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
522 }
523 for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
524 ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
525 }
526
527 const int num_loops = 1000000000 / (w + h);
528 highbd_comp_mask_pred_func funcs[2] = { &aom_highbd_comp_mask_pred_c,
529 test_impl };
530 double elapsed_time[2] = { 0 };
531 for (int i = 0; i < 2; ++i) {
532 aom_usec_timer timer;
533 aom_usec_timer_start(&timer);
534 aom_highbd_comp_mask_pred = funcs[i];
535 int subpel_search = 2; // set to 1 to test 4-tap filter.
536 for (int j = 0; j < num_loops; ++j) {
537 aom_highbd_comp_mask_upsampled_pred(
538 NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred1_),
539 CONVERT_TO_BYTEPTR(pred_), w, h, subx, suby, CONVERT_TO_BYTEPTR(ref_),
540 MAX_SB_SIZE, mask, w, 0, bd_, subpel_search);
541 }
542 aom_usec_timer_mark(&timer);
543 double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
544 elapsed_time[i] = 1000.0 * time / num_loops;
545 }
546 printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
547 elapsed_time[1]);
548 printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
549 }
550
TEST_P(AV1HighbdCompMaskUpVarianceTest,CheckOutput)551 TEST_P(AV1HighbdCompMaskUpVarianceTest, CheckOutput) {
552 // inv mask = 0, 1
553 RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
554 RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
555 }
556
TEST_P(AV1HighbdCompMaskUpVarianceTest,DISABLED_Speed)557 TEST_P(AV1HighbdCompMaskUpVarianceTest, DISABLED_Speed) {
558 RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
559 }
560
561 #if HAVE_AVX2
562 INSTANTIATE_TEST_SUITE_P(
563 AVX2, AV1HighbdCompMaskUpVarianceTest,
564 ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2),
565 ::testing::ValuesIn(kValidBlockSize),
566 ::testing::Range(8, 13, 2)));
567 #endif
568
569 #if HAVE_SSE2
570 INSTANTIATE_TEST_SUITE_P(
571 SSE2, AV1HighbdCompMaskUpVarianceTest,
572 ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2),
573 ::testing::ValuesIn(kValidBlockSize),
574 ::testing::Range(8, 13, 2)));
575 #endif
576
577 #endif // ifndef aom_highbd_comp_mask_pred
578 #endif // CONFIG_AV1_HIGHBITDEPTH
579 } // namespace AV1CompMaskVariance
580