1 /*
2  * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <cstdlib>
13 #include <new>
14 #include <tuple>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 
19 #include "aom/aom_codec.h"
20 #include "aom/aom_integer.h"
21 #include "aom_dsp/variance.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25 #include "av1/common/reconinter.h"
26 #include "av1/encoder/reconinter_enc.h"
27 #include "test/acm_random.h"
28 #include "test/register_state_check.h"
29 #include "test/util.h"
30 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
31 
32 namespace AV1CompMaskVariance {
33 typedef void (*comp_mask_pred_func)(uint8_t *comp_pred, const uint8_t *pred,
34                                     int width, int height, const uint8_t *ref,
35                                     int ref_stride, const uint8_t *mask,
36                                     int mask_stride, int invert_mask);
37 
38 #if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AVX2
39 const BLOCK_SIZE kValidBlockSize[] = {
40   BLOCK_8X8,   BLOCK_8X16,  BLOCK_8X32,   BLOCK_16X8,   BLOCK_16X16,
41   BLOCK_16X32, BLOCK_32X8,  BLOCK_32X16,  BLOCK_32X32,  BLOCK_32X64,
42   BLOCK_64X32, BLOCK_64X64, BLOCK_64X128, BLOCK_128X64, BLOCK_128X128,
43   BLOCK_16X64, BLOCK_64X16
44 };
45 #endif
46 typedef std::tuple<comp_mask_pred_func, BLOCK_SIZE> CompMaskPredParam;
47 
48 class AV1CompMaskVarianceTest
49     : public ::testing::TestWithParam<CompMaskPredParam> {
50  public:
51   ~AV1CompMaskVarianceTest();
52   void SetUp();
53 
54   void TearDown();
55 
56  protected:
57   void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
58   void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
CheckResult(int width,int height)59   bool CheckResult(int width, int height) {
60     for (int y = 0; y < height; ++y) {
61       for (int x = 0; x < width; ++x) {
62         const int idx = y * width + x;
63         if (comp_pred1_[idx] != comp_pred2_[idx]) {
64           printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
65           printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
66           return false;
67         }
68       }
69     }
70     return true;
71   }
72 
73   libaom_test::ACMRandom rnd_;
74   uint8_t *comp_pred1_;
75   uint8_t *comp_pred2_;
76   uint8_t *pred_;
77   uint8_t *ref_buffer_;
78   uint8_t *ref_;
79 };
80 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompMaskVarianceTest);
81 
~AV1CompMaskVarianceTest()82 AV1CompMaskVarianceTest::~AV1CompMaskVarianceTest() { ; }
83 
SetUp()84 void AV1CompMaskVarianceTest::SetUp() {
85   rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
86   av1_init_wedge_masks();
87   comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
88   comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
89   pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
90   ref_buffer_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE + (8 * MAX_SB_SIZE));
91   ref_ = ref_buffer_ + (8 * MAX_SB_SIZE);
92   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
93     pred_[i] = rnd_.Rand8();
94   }
95   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
96     ref_buffer_[i] = rnd_.Rand8();
97   }
98 }
99 
TearDown()100 void AV1CompMaskVarianceTest::TearDown() {
101   aom_free(comp_pred1_);
102   aom_free(comp_pred2_);
103   aom_free(pred_);
104   aom_free(ref_buffer_);
105 }
106 
RunCheckOutput(comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)107 void AV1CompMaskVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
108                                              BLOCK_SIZE bsize, int inv) {
109   const int w = block_size_wide[bsize];
110   const int h = block_size_high[bsize];
111   const int wedge_types = get_wedge_types_lookup(bsize);
112   for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
113     const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
114 
115     aom_comp_mask_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w,
116                          inv);
117     test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, inv);
118 
119     ASSERT_EQ(CheckResult(w, h), true)
120         << " wedge " << wedge_index << " inv " << inv;
121   }
122 }
123 
RunSpeedTest(comp_mask_pred_func test_impl,BLOCK_SIZE bsize)124 void AV1CompMaskVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
125                                            BLOCK_SIZE bsize) {
126   const int w = block_size_wide[bsize];
127   const int h = block_size_high[bsize];
128   const int wedge_types = get_wedge_types_lookup(bsize);
129   int wedge_index = wedge_types / 2;
130   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
131   const int num_loops = 1000000000 / (w + h);
132 
133   comp_mask_pred_func funcs[2] = { aom_comp_mask_pred_c, test_impl };
134   double elapsed_time[2] = { 0 };
135   for (int i = 0; i < 2; ++i) {
136     aom_usec_timer timer;
137     aom_usec_timer_start(&timer);
138     comp_mask_pred_func func = funcs[i];
139     for (int j = 0; j < num_loops; ++j) {
140       func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, 0);
141     }
142     aom_usec_timer_mark(&timer);
143     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
144     elapsed_time[i] = 1000.0 * time / num_loops;
145   }
146   printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
147          elapsed_time[1]);
148   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
149 }
150 
TEST_P(AV1CompMaskVarianceTest,CheckOutput)151 TEST_P(AV1CompMaskVarianceTest, CheckOutput) {
152   // inv = 0, 1
153   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
154   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
155 }
156 
TEST_P(AV1CompMaskVarianceTest,DISABLED_Speed)157 TEST_P(AV1CompMaskVarianceTest, DISABLED_Speed) {
158   RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
159 }
160 
161 #if HAVE_SSSE3
162 INSTANTIATE_TEST_SUITE_P(
163     SSSE3, AV1CompMaskVarianceTest,
164     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
165                        ::testing::ValuesIn(kValidBlockSize)));
166 #endif
167 
168 #if HAVE_AVX2
169 INSTANTIATE_TEST_SUITE_P(
170     AVX2, AV1CompMaskVarianceTest,
171     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
172                        ::testing::ValuesIn(kValidBlockSize)));
173 #endif
174 
175 #ifndef aom_comp_mask_pred
176 // can't run this test if aom_comp_mask_pred is defined to aom_comp_mask_pred_c
177 class AV1CompMaskUpVarianceTest : public AV1CompMaskVarianceTest {
178  public:
179   ~AV1CompMaskUpVarianceTest();
180 
181  protected:
182   void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
183   void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
184                     int havSub);
185 };
186 
~AV1CompMaskUpVarianceTest()187 AV1CompMaskUpVarianceTest::~AV1CompMaskUpVarianceTest() { ; }
188 
RunCheckOutput(comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)189 void AV1CompMaskUpVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
190                                                BLOCK_SIZE bsize, int inv) {
191   const int w = block_size_wide[bsize];
192   const int h = block_size_high[bsize];
193   const int wedge_types = get_wedge_types_lookup(bsize);
194   int subpel_search;
195   for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
196        ++subpel_search) {
197     // loop through subx and suby
198     for (int sub = 0; sub < 8 * 8; ++sub) {
199       int subx = sub & 0x7;
200       int suby = (sub >> 3);
201       for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
202         const uint8_t *mask =
203             av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
204 
205         // ref
206         aom_comp_mask_upsampled_pred_c(
207             NULL, NULL, 0, 0, NULL, comp_pred1_, pred_, w, h, subx, suby, ref_,
208             MAX_SB_SIZE, mask, w, inv, subpel_search);
209 
210         aom_comp_mask_pred = test_impl;  // test
211         aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred2_, pred_,
212                                      w, h, subx, suby, ref_, MAX_SB_SIZE, mask,
213                                      w, inv, subpel_search);
214         ASSERT_EQ(CheckResult(w, h), true)
215             << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
216             << "," << suby << ")";
217       }
218     }
219   }
220 }
221 
RunSpeedTest(comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int havSub)222 void AV1CompMaskUpVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
223                                              BLOCK_SIZE bsize, int havSub) {
224   const int w = block_size_wide[bsize];
225   const int h = block_size_high[bsize];
226   const int subx = havSub ? 3 : 0;
227   const int suby = havSub ? 4 : 0;
228   const int wedge_types = get_wedge_types_lookup(bsize);
229   int wedge_index = wedge_types / 2;
230   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
231 
232   const int num_loops = 1000000000 / (w + h);
233   comp_mask_pred_func funcs[2] = { &aom_comp_mask_pred_c, test_impl };
234   double elapsed_time[2] = { 0 };
235   int subpel_search = USE_8_TAPS;  // set to USE_4_TAPS to test 4-tap filter.
236   for (int i = 0; i < 2; ++i) {
237     aom_usec_timer timer;
238     aom_usec_timer_start(&timer);
239     aom_comp_mask_pred = funcs[i];
240     for (int j = 0; j < num_loops; ++j) {
241       aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred1_, pred_,
242                                    w, h, subx, suby, ref_, MAX_SB_SIZE, mask, w,
243                                    0, subpel_search);
244     }
245     aom_usec_timer_mark(&timer);
246     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
247     elapsed_time[i] = 1000.0 * time / num_loops;
248   }
249   printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
250          elapsed_time[1]);
251   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
252 }
253 
TEST_P(AV1CompMaskUpVarianceTest,CheckOutput)254 TEST_P(AV1CompMaskUpVarianceTest, CheckOutput) {
255   // inv mask = 0, 1
256   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
257   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
258 }
259 
TEST_P(AV1CompMaskUpVarianceTest,DISABLED_Speed)260 TEST_P(AV1CompMaskUpVarianceTest, DISABLED_Speed) {
261   RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
262 }
263 
264 #if HAVE_SSSE3
265 INSTANTIATE_TEST_SUITE_P(
266     SSSE3, AV1CompMaskUpVarianceTest,
267     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
268                        ::testing::ValuesIn(kValidBlockSize)));
269 #endif
270 
271 #if HAVE_AVX2
272 INSTANTIATE_TEST_SUITE_P(
273     AVX2, AV1CompMaskUpVarianceTest,
274     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
275                        ::testing::ValuesIn(kValidBlockSize)));
276 #endif
277 
278 #endif  // ifndef aom_comp_mask_pred
279 
280 #if CONFIG_AV1_HIGHBITDEPTH
281 typedef void (*highbd_comp_mask_pred_func)(uint8_t *comp_pred8,
282                                            const uint8_t *pred8, int width,
283                                            int height, const uint8_t *ref8,
284                                            int ref_stride, const uint8_t *mask,
285                                            int mask_stride, int invert_mask);
286 
287 typedef std::tuple<highbd_comp_mask_pred_func, BLOCK_SIZE, int>
288     HighbdCompMaskPredParam;
289 
290 class AV1HighbdCompMaskVarianceTest
291     : public ::testing::TestWithParam<HighbdCompMaskPredParam> {
292  public:
293   ~AV1HighbdCompMaskVarianceTest();
294   void SetUp();
295 
296   void TearDown();
297 
298  protected:
299   void RunCheckOutput(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
300                       int inv);
301   void RunSpeedTest(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
CheckResult(int width,int height)302   bool CheckResult(int width, int height) {
303     for (int y = 0; y < height; ++y) {
304       for (int x = 0; x < width; ++x) {
305         const int idx = y * width + x;
306         if (comp_pred1_[idx] != comp_pred2_[idx]) {
307           printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
308           printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
309           return false;
310         }
311       }
312     }
313     return true;
314   }
315 
316   libaom_test::ACMRandom rnd_;
317   uint16_t *comp_pred1_;
318   uint16_t *comp_pred2_;
319   uint16_t *pred_;
320   uint16_t *ref_buffer_;
321   uint16_t *ref_;
322 };
323 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdCompMaskVarianceTest);
324 
~AV1HighbdCompMaskVarianceTest()325 AV1HighbdCompMaskVarianceTest::~AV1HighbdCompMaskVarianceTest() { ; }
326 
SetUp()327 void AV1HighbdCompMaskVarianceTest::SetUp() {
328   rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
329   av1_init_wedge_masks();
330 
331   comp_pred1_ =
332       (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_));
333   comp_pred2_ =
334       (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_));
335   pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_));
336   ref_buffer_ = (uint16_t *)aom_memalign(
337       16, (MAX_SB_SQUARE + (8 * MAX_SB_SIZE)) * sizeof(*ref_buffer_));
338   ref_ = ref_buffer_ + (8 * MAX_SB_SIZE);
339 }
340 
TearDown()341 void AV1HighbdCompMaskVarianceTest::TearDown() {
342   aom_free(comp_pred1_);
343   aom_free(comp_pred2_);
344   aom_free(pred_);
345   aom_free(ref_buffer_);
346 }
347 
RunCheckOutput(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)348 void AV1HighbdCompMaskVarianceTest::RunCheckOutput(
349     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) {
350   int bd_ = GET_PARAM(2);
351   const int w = block_size_wide[bsize];
352   const int h = block_size_high[bsize];
353   const int wedge_types = get_wedge_types_lookup(bsize);
354 
355   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
356     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
357   }
358   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
359     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
360   }
361 
362   for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
363     const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
364 
365     aom_highbd_comp_mask_pred_c(
366         CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
367         CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
368 
369     test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
370               CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
371 
372     ASSERT_EQ(CheckResult(w, h), true)
373         << " wedge " << wedge_index << " inv " << inv;
374   }
375 }
376 
RunSpeedTest(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize)377 void AV1HighbdCompMaskVarianceTest::RunSpeedTest(
378     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize) {
379   int bd_ = GET_PARAM(2);
380 
381   const int w = block_size_wide[bsize];
382   const int h = block_size_high[bsize];
383   const int wedge_types = get_wedge_types_lookup(bsize);
384   int wedge_index = wedge_types / 2;
385 
386   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
387     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
388   }
389   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
390     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
391   }
392 
393   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
394   const int num_loops = 1000000000 / (w + h);
395 
396   highbd_comp_mask_pred_func funcs[2] = { aom_highbd_comp_mask_pred_c,
397                                           test_impl };
398   double elapsed_time[2] = { 0 };
399   for (int i = 0; i < 2; ++i) {
400     aom_usec_timer timer;
401     aom_usec_timer_start(&timer);
402     highbd_comp_mask_pred_func func = funcs[i];
403     for (int j = 0; j < num_loops; ++j) {
404       func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
405            CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, 0);
406     }
407     aom_usec_timer_mark(&timer);
408     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
409     elapsed_time[i] = 1000.0 * time / num_loops;
410   }
411   printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
412          elapsed_time[1]);
413   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
414 }
415 
TEST_P(AV1HighbdCompMaskVarianceTest,CheckOutput)416 TEST_P(AV1HighbdCompMaskVarianceTest, CheckOutput) {
417   // inv = 0, 1
418   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
419   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
420 }
421 
TEST_P(AV1HighbdCompMaskVarianceTest,DISABLED_Speed)422 TEST_P(AV1HighbdCompMaskVarianceTest, DISABLED_Speed) {
423   RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
424 }
425 
426 #if HAVE_AVX2
427 INSTANTIATE_TEST_SUITE_P(
428     AVX2, AV1HighbdCompMaskVarianceTest,
429     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2),
430                        ::testing::ValuesIn(kValidBlockSize),
431                        ::testing::Range(8, 13, 2)));
432 #endif
433 
434 #if HAVE_SSE2
435 INSTANTIATE_TEST_SUITE_P(
436     SSE2, AV1HighbdCompMaskVarianceTest,
437     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2),
438                        ::testing::ValuesIn(kValidBlockSize),
439                        ::testing::Range(8, 13, 2)));
440 #endif
441 
442 #ifndef aom_highbd_comp_mask_pred
443 // can't run this test if aom_highbd_comp_mask_pred is defined to
444 // aom_highbd_comp_mask_pred_c
445 class AV1HighbdCompMaskUpVarianceTest : public AV1HighbdCompMaskVarianceTest {
446  public:
447   ~AV1HighbdCompMaskUpVarianceTest();
448 
449  protected:
450   void RunCheckOutput(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
451                       int inv);
452   void RunSpeedTest(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
453                     int havSub);
454 };
455 
~AV1HighbdCompMaskUpVarianceTest()456 AV1HighbdCompMaskUpVarianceTest::~AV1HighbdCompMaskUpVarianceTest() { ; }
457 
RunCheckOutput(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)458 void AV1HighbdCompMaskUpVarianceTest::RunCheckOutput(
459     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) {
460   (void)test_impl;
461   int bd_ = GET_PARAM(2);
462   const int w = block_size_wide[bsize];
463   const int h = block_size_high[bsize];
464   const int wedge_types = get_wedge_types_lookup(bsize);
465 
466   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
467     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
468   }
469   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
470     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
471   }
472 
473   int subpel_search;
474   for (subpel_search = 1; subpel_search <= 2; ++subpel_search) {
475     // loop through subx and suby
476     for (int sub = 0; sub < 8 * 8; ++sub) {
477       int subx = sub & 0x7;
478       int suby = (sub >> 3);
479       for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
480         const uint8_t *mask =
481             av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
482 
483         // ref
484         aom_highbd_upsampled_pred_c(
485             NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred1_), w, h, subx,
486             suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, subpel_search);
487 
488         aom_highbd_comp_mask_pred_c(
489             CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
490             CONVERT_TO_BYTEPTR(comp_pred1_), w, mask, w, inv);
491 
492         // test
493         aom_highbd_upsampled_pred(
494             NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred2_), w, h, subx,
495             suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, subpel_search);
496 
497         aom_highbd_comp_mask_pred(
498             CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
499             CONVERT_TO_BYTEPTR(comp_pred2_), w, mask, w, inv);
500 
501         ASSERT_EQ(CheckResult(w, h), true)
502             << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
503             << "," << suby << ")";
504       }
505     }
506   }
507 }
508 
RunSpeedTest(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int havSub)509 void AV1HighbdCompMaskUpVarianceTest::RunSpeedTest(
510     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int havSub) {
511   int bd_ = GET_PARAM(2);
512   const int w = block_size_wide[bsize];
513   const int h = block_size_high[bsize];
514   const int subx = havSub ? 3 : 0;
515   const int suby = havSub ? 4 : 0;
516   const int wedge_types = get_wedge_types_lookup(bsize);
517   int wedge_index = wedge_types / 2;
518   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
519 
520   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
521     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
522   }
523   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
524     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
525   }
526 
527   const int num_loops = 1000000000 / (w + h);
528   highbd_comp_mask_pred_func funcs[2] = { &aom_highbd_comp_mask_pred_c,
529                                           test_impl };
530   double elapsed_time[2] = { 0 };
531   for (int i = 0; i < 2; ++i) {
532     aom_usec_timer timer;
533     aom_usec_timer_start(&timer);
534     aom_highbd_comp_mask_pred = funcs[i];
535     int subpel_search = 2;  // set to 1 to test 4-tap filter.
536     for (int j = 0; j < num_loops; ++j) {
537       aom_highbd_comp_mask_upsampled_pred(
538           NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred1_),
539           CONVERT_TO_BYTEPTR(pred_), w, h, subx, suby, CONVERT_TO_BYTEPTR(ref_),
540           MAX_SB_SIZE, mask, w, 0, bd_, subpel_search);
541     }
542     aom_usec_timer_mark(&timer);
543     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
544     elapsed_time[i] = 1000.0 * time / num_loops;
545   }
546   printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
547          elapsed_time[1]);
548   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
549 }
550 
TEST_P(AV1HighbdCompMaskUpVarianceTest,CheckOutput)551 TEST_P(AV1HighbdCompMaskUpVarianceTest, CheckOutput) {
552   // inv mask = 0, 1
553   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
554   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
555 }
556 
TEST_P(AV1HighbdCompMaskUpVarianceTest,DISABLED_Speed)557 TEST_P(AV1HighbdCompMaskUpVarianceTest, DISABLED_Speed) {
558   RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
559 }
560 
561 #if HAVE_AVX2
562 INSTANTIATE_TEST_SUITE_P(
563     AVX2, AV1HighbdCompMaskUpVarianceTest,
564     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2),
565                        ::testing::ValuesIn(kValidBlockSize),
566                        ::testing::Range(8, 13, 2)));
567 #endif
568 
569 #if HAVE_SSE2
570 INSTANTIATE_TEST_SUITE_P(
571     SSE2, AV1HighbdCompMaskUpVarianceTest,
572     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2),
573                        ::testing::ValuesIn(kValidBlockSize),
574                        ::testing::Range(8, 13, 2)));
575 #endif
576 
577 #endif  // ifndef aom_highbd_comp_mask_pred
578 #endif  // CONFIG_AV1_HIGHBITDEPTH
579 }  // namespace AV1CompMaskVariance
580