1 /*
2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <tuple>
13
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
15
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/av1_rtcd.h"
19
20 #include "aom/aom_codec.h"
21 #include "aom_ports/aom_timer.h"
22 #include "av1/encoder/encoder.h"
23 #include "av1/common/scan.h"
24 #include "test/acm_random.h"
25 #include "test/clear_system_state.h"
26 #include "test/register_state_check.h"
27 #include "test/util.h"
28
29 namespace {
30 using libaom_test::ACMRandom;
31
32 #define QUAN_PARAM_LIST \
33 const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, \
34 const int16_t *round_ptr, const int16_t *quant_ptr, \
35 const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, \
36 tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, \
37 const int16_t *scan, const int16_t *iscan
38
39 typedef void (*QuantizeFunc)(QUAN_PARAM_LIST);
40 typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST, int log_scale);
41
42 #define HBD_QUAN_FUNC \
43 fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
44 qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, log_scale)
45
46 #define LBD_QUAN_FUNC \
47 fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
48 qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan)
49
50 template <QuantizeFuncHbd fn>
highbd_quan16x16_wrapper(QUAN_PARAM_LIST)51 void highbd_quan16x16_wrapper(QUAN_PARAM_LIST) {
52 const int log_scale = 0;
53 HBD_QUAN_FUNC;
54 }
55
56 template <QuantizeFuncHbd fn>
highbd_quan32x32_wrapper(QUAN_PARAM_LIST)57 void highbd_quan32x32_wrapper(QUAN_PARAM_LIST) {
58 const int log_scale = 1;
59 HBD_QUAN_FUNC;
60 }
61
62 template <QuantizeFuncHbd fn>
highbd_quan64x64_wrapper(QUAN_PARAM_LIST)63 void highbd_quan64x64_wrapper(QUAN_PARAM_LIST) {
64 const int log_scale = 2;
65 HBD_QUAN_FUNC;
66 }
67
68 enum QuantType { TYPE_B, TYPE_DC, TYPE_FP };
69
70 using std::tuple;
71 typedef tuple<QuantizeFunc, QuantizeFunc, TX_SIZE, QuantType, aom_bit_depth_t>
72 QuantizeParam;
73
74 typedef struct {
75 QUANTS quant;
76 Dequants dequant;
77 } QuanTable;
78
79 const int kTestNum = 1000;
80
81 template <typename CoeffType>
82 class QuantizeTestBase : public ::testing::TestWithParam<QuantizeParam> {
83 protected:
QuantizeTestBase()84 QuantizeTestBase()
85 : quant_ref_(GET_PARAM(0)), quant_(GET_PARAM(1)), tx_size_(GET_PARAM(2)),
86 type_(GET_PARAM(3)), bd_(GET_PARAM(4)) {}
87
~QuantizeTestBase()88 virtual ~QuantizeTestBase() {}
89
SetUp()90 virtual void SetUp() {
91 qtab_ = reinterpret_cast<QuanTable *>(aom_memalign(32, sizeof(*qtab_)));
92 const int n_coeffs = coeff_num();
93 coeff_ = reinterpret_cast<CoeffType *>(
94 aom_memalign(32, 6 * n_coeffs * sizeof(CoeffType)));
95 InitQuantizer();
96 }
97
TearDown()98 virtual void TearDown() {
99 aom_free(qtab_);
100 qtab_ = NULL;
101 aom_free(coeff_);
102 coeff_ = NULL;
103 libaom_test::ClearSystemState();
104 }
105
InitQuantizer()106 void InitQuantizer() {
107 av1_build_quantizer(bd_, 0, 0, 0, 0, 0, &qtab_->quant, &qtab_->dequant);
108 }
109
110 virtual void RunQuantizeFunc(
111 const CoeffType *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
112 const int16_t *round_ptr, const int16_t *quant_ptr,
113 const int16_t *quant_shift_ptr, CoeffType *qcoeff_ptr,
114 CoeffType *qcoeff_ref_ptr, CoeffType *dqcoeff_ptr,
115 CoeffType *dqcoeff_ref_ptr, const int16_t *dequant_ptr,
116 uint16_t *eob_ref_ptr, uint16_t *eob_ptr, const int16_t *scan,
117 const int16_t *iscan) = 0;
118
QuantizeRun(bool is_loop,int q=0,int test_num=1)119 void QuantizeRun(bool is_loop, int q = 0, int test_num = 1) {
120 CoeffType *coeff_ptr = coeff_;
121 const intptr_t n_coeffs = coeff_num();
122
123 CoeffType *qcoeff_ref = coeff_ptr + n_coeffs;
124 CoeffType *dqcoeff_ref = qcoeff_ref + n_coeffs;
125
126 CoeffType *qcoeff = dqcoeff_ref + n_coeffs;
127 CoeffType *dqcoeff = qcoeff + n_coeffs;
128 uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);
129
130 // Testing uses 2-D DCT scan order table
131 const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);
132
133 // Testing uses luminance quantization table
134 const int16_t *zbin = qtab_->quant.y_zbin[q];
135
136 const int16_t *round = 0;
137 const int16_t *quant = 0;
138 if (type_ == TYPE_B) {
139 round = qtab_->quant.y_round[q];
140 quant = qtab_->quant.y_quant[q];
141 } else if (type_ == TYPE_FP) {
142 round = qtab_->quant.y_round_fp[q];
143 quant = qtab_->quant.y_quant_fp[q];
144 }
145
146 const int16_t *quant_shift = qtab_->quant.y_quant_shift[q];
147 const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];
148
149 for (int i = 0; i < test_num; ++i) {
150 if (is_loop) FillCoeffRandom();
151
152 memset(qcoeff_ref, 0, 5 * n_coeffs * sizeof(*qcoeff_ref));
153
154 RunQuantizeFunc(coeff_ptr, n_coeffs, zbin, round, quant, quant_shift,
155 qcoeff, qcoeff_ref, dqcoeff, dqcoeff_ref, dequant,
156 &eob[0], &eob[1], sc->scan, sc->iscan);
157
158 quant_ref_(coeff_ptr, n_coeffs, zbin, round, quant, quant_shift,
159 qcoeff_ref, dqcoeff_ref, dequant, &eob[0], sc->scan,
160 sc->iscan);
161
162 ASM_REGISTER_STATE_CHECK(quant_(coeff_ptr, n_coeffs, zbin, round, quant,
163 quant_shift, qcoeff, dqcoeff, dequant,
164 &eob[1], sc->scan, sc->iscan));
165
166 for (int j = 0; j < n_coeffs; ++j) {
167 ASSERT_EQ(qcoeff_ref[j], qcoeff[j])
168 << "Q mismatch on test: " << i << " at position: " << j
169 << " Q: " << q << " coeff: " << coeff_ptr[j];
170 }
171
172 for (int j = 0; j < n_coeffs; ++j) {
173 ASSERT_EQ(dqcoeff_ref[j], dqcoeff[j])
174 << "Dq mismatch on test: " << i << " at position: " << j
175 << " Q: " << q << " coeff: " << coeff_ptr[j];
176 }
177
178 ASSERT_EQ(eob[0], eob[1])
179 << "eobs mismatch on test: " << i << " Q: " << q;
180 }
181 }
182
CompareResults(const CoeffType * buf_ref,const CoeffType * buf,int size,const char * text,int q,int number)183 void CompareResults(const CoeffType *buf_ref, const CoeffType *buf, int size,
184 const char *text, int q, int number) {
185 int i;
186 for (i = 0; i < size; ++i) {
187 ASSERT_EQ(buf_ref[i], buf[i]) << text << " mismatch on test: " << number
188 << " at position: " << i << " Q: " << q;
189 }
190 }
191
coeff_num() const192 int coeff_num() const { return av1_get_max_eob(tx_size_); }
193
FillCoeff(CoeffType c)194 void FillCoeff(CoeffType c) {
195 const int n_coeffs = coeff_num();
196 for (int i = 0; i < n_coeffs; ++i) {
197 coeff_[i] = c;
198 }
199 }
200
FillCoeffRandom()201 void FillCoeffRandom() {
202 const int n_coeffs = coeff_num();
203 FillCoeffZero();
204 int num = rnd_.Rand16() % n_coeffs;
205 for (int i = 0; i < num; ++i) {
206 coeff_[i] = GetRandomCoeff();
207 }
208 }
209
FillCoeffRandomRows(int num)210 void FillCoeffRandomRows(int num) {
211 FillCoeffZero();
212 for (int i = 0; i < num; ++i) {
213 coeff_[i] = GetRandomCoeff();
214 }
215 }
216
FillCoeffZero()217 void FillCoeffZero() { FillCoeff(0); }
218
FillCoeffConstant()219 void FillCoeffConstant() {
220 CoeffType c = GetRandomCoeff();
221 FillCoeff(c);
222 }
223
FillDcOnly()224 void FillDcOnly() {
225 FillCoeffZero();
226 coeff_[0] = GetRandomCoeff();
227 }
228
FillDcLargeNegative()229 void FillDcLargeNegative() {
230 FillCoeffZero();
231 // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues
232 // like BUG=883 where the constant being compared was incorrectly
233 // initialized.
234 coeff_[0] = -8191;
235 }
236
GetRandomCoeff()237 CoeffType GetRandomCoeff() {
238 CoeffType coeff;
239 if (bd_ == AOM_BITS_8) {
240 coeff =
241 clamp(static_cast<int16_t>(rnd_.Rand16()), INT16_MIN + 1, INT16_MAX);
242 } else {
243 CoeffType min = -(1 << (7 + bd_));
244 CoeffType max = -min - 1;
245 coeff = clamp(static_cast<CoeffType>(rnd_.Rand31()), min, max);
246 }
247 return coeff;
248 }
249
250 ACMRandom rnd_;
251 QuanTable *qtab_;
252 CoeffType *coeff_;
253 QuantizeFunc quant_ref_;
254 QuantizeFunc quant_;
255 TX_SIZE tx_size_;
256 QuantType type_;
257 aom_bit_depth_t bd_;
258 };
259
260 class FullPrecisionQuantizeTest : public QuantizeTestBase<tran_low_t> {
RunQuantizeFunc(const tran_low_t * coeff_ptr,intptr_t n_coeffs,const int16_t * zbin_ptr,const int16_t * round_ptr,const int16_t * quant_ptr,const int16_t * quant_shift_ptr,tran_low_t * qcoeff_ptr,tran_low_t * qcoeff_ref_ptr,tran_low_t * dqcoeff_ptr,tran_low_t * dqcoeff_ref_ptr,const int16_t * dequant_ptr,uint16_t * eob_ref_ptr,uint16_t * eob_ptr,const int16_t * scan,const int16_t * iscan)261 void RunQuantizeFunc(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
262 const int16_t *zbin_ptr, const int16_t *round_ptr,
263 const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
264 tran_low_t *qcoeff_ptr, tran_low_t *qcoeff_ref_ptr,
265 tran_low_t *dqcoeff_ptr, tran_low_t *dqcoeff_ref_ptr,
266 const int16_t *dequant_ptr, uint16_t *eob_ref_ptr,
267 uint16_t *eob_ptr, const int16_t *scan,
268 const int16_t *iscan) override {
269 quant_ref_(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
270 quant_shift_ptr, qcoeff_ref_ptr, dqcoeff_ref_ptr, dequant_ptr,
271 eob_ref_ptr, scan, iscan);
272
273 ASM_REGISTER_STATE_CHECK(quant_(
274 coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
275 qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan));
276 }
277 };
278
TEST_P(FullPrecisionQuantizeTest,ZeroInput)279 TEST_P(FullPrecisionQuantizeTest, ZeroInput) {
280 FillCoeffZero();
281 QuantizeRun(false);
282 }
283
TEST_P(FullPrecisionQuantizeTest,LargeNegativeInput)284 TEST_P(FullPrecisionQuantizeTest, LargeNegativeInput) {
285 FillDcLargeNegative();
286 QuantizeRun(false, 0, 1);
287 }
288
TEST_P(FullPrecisionQuantizeTest,DcOnlyInput)289 TEST_P(FullPrecisionQuantizeTest, DcOnlyInput) {
290 FillDcOnly();
291 QuantizeRun(false, 0, 1);
292 }
293
TEST_P(FullPrecisionQuantizeTest,RandomInput)294 TEST_P(FullPrecisionQuantizeTest, RandomInput) {
295 QuantizeRun(true, 0, kTestNum);
296 }
297
TEST_P(FullPrecisionQuantizeTest,MultipleQ)298 TEST_P(FullPrecisionQuantizeTest, MultipleQ) {
299 for (int q = 0; q < QINDEX_RANGE; ++q) {
300 QuantizeRun(true, q, kTestNum);
301 }
302 }
303
304 // Force the coeff to be half the value of the dequant. This exposes a
305 // mismatch found in av1_quantize_fp_sse2().
TEST_P(FullPrecisionQuantizeTest,CoeffHalfDequant)306 TEST_P(FullPrecisionQuantizeTest, CoeffHalfDequant) {
307 FillCoeff(16);
308 QuantizeRun(false, 25, 1);
309 }
310
TEST_P(FullPrecisionQuantizeTest,DISABLED_Speed)311 TEST_P(FullPrecisionQuantizeTest, DISABLED_Speed) {
312 tran_low_t *coeff_ptr = coeff_;
313 const intptr_t n_coeffs = coeff_num();
314
315 tran_low_t *qcoeff_ref = coeff_ptr + n_coeffs;
316 tran_low_t *dqcoeff_ref = qcoeff_ref + n_coeffs;
317
318 tran_low_t *qcoeff = dqcoeff_ref + n_coeffs;
319 tran_low_t *dqcoeff = qcoeff + n_coeffs;
320 uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);
321
322 // Testing uses 2-D DCT scan order table
323 const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);
324
325 // Testing uses luminance quantization table
326 const int q = 22;
327 const int16_t *zbin = qtab_->quant.y_zbin[q];
328 const int16_t *round_fp = qtab_->quant.y_round_fp[q];
329 const int16_t *quant_fp = qtab_->quant.y_quant_fp[q];
330 const int16_t *quant_shift = qtab_->quant.y_quant_shift[q];
331 const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];
332 const int kNumTests = 5000000;
333 aom_usec_timer timer, simd_timer;
334 int rows = tx_size_high[tx_size_];
335 int cols = tx_size_wide[tx_size_];
336 rows = AOMMIN(32, rows);
337 cols = AOMMIN(32, cols);
338 for (int cnt = 0; cnt <= rows; cnt++) {
339 FillCoeffRandomRows(cnt * cols);
340
341 aom_usec_timer_start(&timer);
342 for (int n = 0; n < kNumTests; ++n) {
343 quant_ref_(coeff_ptr, n_coeffs, zbin, round_fp, quant_fp, quant_shift,
344 qcoeff, dqcoeff, dequant, eob, sc->scan, sc->iscan);
345 }
346 aom_usec_timer_mark(&timer);
347
348 aom_usec_timer_start(&simd_timer);
349 for (int n = 0; n < kNumTests; ++n) {
350 quant_(coeff_ptr, n_coeffs, zbin, round_fp, quant_fp, quant_shift, qcoeff,
351 dqcoeff, dequant, eob, sc->scan, sc->iscan);
352 }
353 aom_usec_timer_mark(&simd_timer);
354
355 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
356 const int simd_elapsed_time =
357 static_cast<int>(aom_usec_timer_elapsed(&simd_timer));
358 printf("c_time = %d \t simd_time = %d \t Gain = %d \n", elapsed_time,
359 simd_elapsed_time, (elapsed_time / simd_elapsed_time));
360 }
361 }
362
363 using std::make_tuple;
364
365 #if HAVE_AVX2
366 const QuantizeParam kQParamArrayAvx2[] = {
367 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,
368 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
369 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,
370 static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8),
371 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,
372 static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8),
373 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,
374 static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8),
375 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,
376 static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8),
377 make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2,
378 static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8),
379 make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2,
380 static_cast<TX_SIZE>(TX_16X64), TYPE_FP, AOM_BITS_8),
381 make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2,
382 static_cast<TX_SIZE>(TX_64X16), TYPE_FP, AOM_BITS_8),
383 make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_avx2,
384 static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8),
385 #if CONFIG_AV1_HIGHBITDEPTH
386 make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
387 &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>,
388 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
389 make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
390 &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>,
391 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_10),
392 make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
393 &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>,
394 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_12),
395 make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
396 &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>,
397 static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8),
398 make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
399 &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>,
400 static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_10),
401 make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
402 &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>,
403 static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_12),
404 make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
405 &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>,
406 static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8),
407 make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
408 &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>,
409 static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_10),
410 make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
411 &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>,
412 static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_12),
413 make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2,
414 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
415 make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2,
416 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10),
417 make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2,
418 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),
419 make_tuple(&aom_highbd_quantize_b_adaptive_c,
420 &aom_highbd_quantize_b_adaptive_avx2,
421 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
422 make_tuple(&aom_highbd_quantize_b_adaptive_c,
423 &aom_highbd_quantize_b_adaptive_avx2,
424 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10),
425 make_tuple(&aom_highbd_quantize_b_adaptive_c,
426 &aom_highbd_quantize_b_adaptive_avx2,
427 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),
428 make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,
429 &aom_highbd_quantize_b_32x32_adaptive_avx2,
430 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
431 make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,
432 &aom_highbd_quantize_b_32x32_adaptive_avx2,
433 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10),
434 make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,
435 &aom_highbd_quantize_b_32x32_adaptive_avx2,
436 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),
437 #endif
438 make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2,
439 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
440 make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2,
441 static_cast<TX_SIZE>(TX_8X8), TYPE_B, AOM_BITS_8),
442 make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2,
443 static_cast<TX_SIZE>(TX_4X4), TYPE_B, AOM_BITS_8)
444 };
445
446 INSTANTIATE_TEST_SUITE_P(AVX2, FullPrecisionQuantizeTest,
447 ::testing::ValuesIn(kQParamArrayAvx2));
448 #endif // HAVE_AVX2
449
450 #if HAVE_SSE2
451 const QuantizeParam kQParamArraySSE2[] = {
452 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,
453 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
454 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,
455 static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8),
456 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,
457 static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8),
458 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,
459 static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8),
460 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,
461 static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8),
462 make_tuple(&aom_quantize_b_c, &aom_quantize_b_sse2,
463 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
464 #if CONFIG_AV1_HIGHBITDEPTH
465 make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2,
466 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
467 make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2,
468 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10),
469 make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2,
470 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),
471 make_tuple(&aom_highbd_quantize_b_adaptive_c,
472 &aom_highbd_quantize_b_adaptive_sse2,
473 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
474 make_tuple(&aom_highbd_quantize_b_adaptive_c,
475 &aom_highbd_quantize_b_adaptive_sse2,
476 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10),
477 make_tuple(&aom_highbd_quantize_b_adaptive_c,
478 &aom_highbd_quantize_b_adaptive_sse2,
479 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),
480 make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,
481 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
482 make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,
483 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10),
484 make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,
485 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),
486 make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,
487 &aom_highbd_quantize_b_32x32_adaptive_sse2,
488 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
489 make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,
490 &aom_highbd_quantize_b_32x32_adaptive_sse2,
491 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10),
492 make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,
493 &aom_highbd_quantize_b_32x32_adaptive_sse2,
494 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),
495 make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2,
496 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8),
497 make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2,
498 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_10),
499 make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2,
500 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12),
501 make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c,
502 &aom_highbd_quantize_b_64x64_adaptive_sse2,
503 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8),
504 make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c,
505 &aom_highbd_quantize_b_64x64_adaptive_sse2,
506 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_10),
507 make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c,
508 &aom_highbd_quantize_b_64x64_adaptive_sse2,
509 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12),
510 #endif
511 make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2,
512 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
513 make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2,
514 static_cast<TX_SIZE>(TX_8X8), TYPE_B, AOM_BITS_8),
515 make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2,
516 static_cast<TX_SIZE>(TX_4X4), TYPE_B, AOM_BITS_8),
517 make_tuple(&aom_quantize_b_32x32_adaptive_c,
518 &aom_quantize_b_32x32_adaptive_sse2,
519 static_cast<TX_SIZE>(TX_32X16), TYPE_B, AOM_BITS_8),
520 make_tuple(&aom_quantize_b_32x32_adaptive_c,
521 &aom_quantize_b_32x32_adaptive_sse2,
522 static_cast<TX_SIZE>(TX_16X32), TYPE_B, AOM_BITS_8),
523 make_tuple(&aom_quantize_b_32x32_adaptive_c,
524 &aom_quantize_b_32x32_adaptive_sse2,
525 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
526 make_tuple(&aom_quantize_b_64x64_adaptive_c,
527 &aom_quantize_b_64x64_adaptive_sse2,
528 static_cast<TX_SIZE>(TX_32X64), TYPE_B, AOM_BITS_8),
529 make_tuple(&aom_quantize_b_64x64_adaptive_c,
530 &aom_quantize_b_64x64_adaptive_sse2,
531 static_cast<TX_SIZE>(TX_64X32), TYPE_B, AOM_BITS_8),
532 make_tuple(&aom_quantize_b_64x64_adaptive_c,
533 &aom_quantize_b_64x64_adaptive_sse2,
534 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8)
535 };
536
537 INSTANTIATE_TEST_SUITE_P(SSE2, FullPrecisionQuantizeTest,
538 ::testing::ValuesIn(kQParamArraySSE2));
539 #endif
540
541 #if HAVE_NEON
542 const QuantizeParam kQParamArrayNEON[] = {
543 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
544 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),
545 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
546 static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8),
547 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
548 static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8),
549 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
550 static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8),
551 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,
552 static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8),
553 make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_neon,
554 static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8),
555 make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_neon,
556 static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8),
557 make_tuple(&aom_quantize_b_c, &aom_quantize_b_neon,
558 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
559 make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_neon,
560 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
561 make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_neon,
562 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8)
563 };
564
565 INSTANTIATE_TEST_SUITE_P(NEON, FullPrecisionQuantizeTest,
566 ::testing::ValuesIn(kQParamArrayNEON));
567 #endif
568
569 #if HAVE_SSSE3 && ARCH_X86_64
570 INSTANTIATE_TEST_SUITE_P(
571 SSSE3, FullPrecisionQuantizeTest,
572 ::testing::Values(
573 make_tuple(&aom_quantize_b_c, &aom_quantize_b_ssse3,
574 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
575 make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_ssse3,
576 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),
577 make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_ssse3,
578 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8)));
579
580 #endif // HAVE_SSSE3 && ARCH_X86_64
581
582 #if HAVE_AVX
583 INSTANTIATE_TEST_SUITE_P(
584 AVX, FullPrecisionQuantizeTest,
585 ::testing::Values(
586 make_tuple(&aom_quantize_b_c, &aom_quantize_b_avx,
587 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),
588 make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_avx,
589 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8)));
590
591 #endif // HAVE_AVX
592 } // namespace
593