1 /*
2  *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <tuple>
15 
16 #include "third_party/googletest/src/include/gtest/gtest.h"
17 
18 #include "./vp9_rtcd.h"
19 #include "./vpx_dsp_rtcd.h"
20 #include "test/acm_random.h"
21 #include "test/buffer.h"
22 #include "test/clear_system_state.h"
23 #include "test/register_state_check.h"
24 #include "test/util.h"
25 #include "vp9/common/vp9_entropy.h"
26 #include "vpx/vpx_codec.h"
27 #include "vpx/vpx_integer.h"
28 #include "vpx_ports/mem.h"
29 
30 using libvpx_test::ACMRandom;
31 using libvpx_test::Buffer;
32 using std::make_tuple;
33 using std::tuple;
34 
35 namespace {
36 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
37 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
38 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
39                         int tx_type);
40 typedef void (*FhtFuncRef)(const Buffer<int16_t> &in, Buffer<tran_low_t> *out,
41                            int size, int tx_type);
42 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
43                         int tx_type);
44 typedef void (*IhtWithBdFunc)(const tran_low_t *in, uint8_t *out, int stride,
45                               int tx_type, int bd);
46 
47 template <FdctFunc fn>
fdct_wrapper(const int16_t * in,tran_low_t * out,int stride,int tx_type)48 void fdct_wrapper(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
49   (void)tx_type;
50   fn(in, out, stride);
51 }
52 
53 template <IdctFunc fn>
idct_wrapper(const tran_low_t * in,uint8_t * out,int stride,int tx_type,int bd)54 void idct_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type,
55                   int bd) {
56   (void)tx_type;
57   (void)bd;
58   fn(in, out, stride);
59 }
60 
61 template <IhtFunc fn>
iht_wrapper(const tran_low_t * in,uint8_t * out,int stride,int tx_type,int bd)62 void iht_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type,
63                  int bd) {
64   (void)bd;
65   fn(in, out, stride, tx_type);
66 }
67 
68 #if CONFIG_VP9_HIGHBITDEPTH
69 typedef void (*HighbdIdctFunc)(const tran_low_t *in, uint16_t *out, int stride,
70                                int bd);
71 
72 typedef void (*HighbdIhtFunc)(const tran_low_t *in, uint16_t *out, int stride,
73                               int tx_type, int bd);
74 
75 template <HighbdIdctFunc fn>
highbd_idct_wrapper(const tran_low_t * in,uint8_t * out,int stride,int tx_type,int bd)76 void highbd_idct_wrapper(const tran_low_t *in, uint8_t *out, int stride,
77                          int tx_type, int bd) {
78   (void)tx_type;
79   fn(in, CAST_TO_SHORTPTR(out), stride, bd);
80 }
81 
82 template <HighbdIhtFunc fn>
highbd_iht_wrapper(const tran_low_t * in,uint8_t * out,int stride,int tx_type,int bd)83 void highbd_iht_wrapper(const tran_low_t *in, uint8_t *out, int stride,
84                         int tx_type, int bd) {
85   fn(in, CAST_TO_SHORTPTR(out), stride, tx_type, bd);
86 }
87 #endif  // CONFIG_VP9_HIGHBITDEPTH
88 
89 struct FuncInfo {
90   FhtFunc ft_func;
91   IhtWithBdFunc it_func;
92   int size;
93   int pixel_size;
94 };
95 
96 /* forward transform, inverse transform, size, transform type, bit depth */
97 typedef tuple<int, const FuncInfo *, int, vpx_bit_depth_t> DctParam;
98 
fdct_ref(const Buffer<int16_t> & in,Buffer<tran_low_t> * out,int size,int)99 void fdct_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
100               int /*tx_type*/) {
101   const int16_t *i = in.TopLeftPixel();
102   const int i_stride = in.stride();
103   tran_low_t *o = out->TopLeftPixel();
104   if (size == 4) {
105     vpx_fdct4x4_c(i, o, i_stride);
106   } else if (size == 8) {
107     vpx_fdct8x8_c(i, o, i_stride);
108   } else if (size == 16) {
109     vpx_fdct16x16_c(i, o, i_stride);
110   } else if (size == 32) {
111     vpx_fdct32x32_c(i, o, i_stride);
112   }
113 }
114 
fht_ref(const Buffer<int16_t> & in,Buffer<tran_low_t> * out,int size,int tx_type)115 void fht_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
116              int tx_type) {
117   const int16_t *i = in.TopLeftPixel();
118   const int i_stride = in.stride();
119   tran_low_t *o = out->TopLeftPixel();
120   if (size == 4) {
121     vp9_fht4x4_c(i, o, i_stride, tx_type);
122   } else if (size == 8) {
123     vp9_fht8x8_c(i, o, i_stride, tx_type);
124   } else if (size == 16) {
125     vp9_fht16x16_c(i, o, i_stride, tx_type);
126   }
127 }
128 
fwht_ref(const Buffer<int16_t> & in,Buffer<tran_low_t> * out,int size,int)129 void fwht_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
130               int /*tx_type*/) {
131   ASSERT_EQ(size, 4);
132   vp9_fwht4x4_c(in.TopLeftPixel(), out->TopLeftPixel(), in.stride());
133 }
134 
135 class TransTestBase : public ::testing::TestWithParam<DctParam> {
136  public:
SetUp()137   virtual void SetUp() {
138     rnd_.Reset(ACMRandom::DeterministicSeed());
139     const int idx = GET_PARAM(0);
140     const FuncInfo *func_info = &(GET_PARAM(1)[idx]);
141     tx_type_ = GET_PARAM(2);
142     bit_depth_ = GET_PARAM(3);
143     fwd_txfm_ = func_info->ft_func;
144     inv_txfm_ = func_info->it_func;
145     size_ = func_info->size;
146     pixel_size_ = func_info->pixel_size;
147     max_pixel_value_ = (1 << bit_depth_) - 1;
148 
149     // Randomize stride_ to a value less than or equal to 1024
150     stride_ = rnd_(1024) + 1;
151     if (stride_ < size_) {
152       stride_ = size_;
153     }
154     // Align stride_ to 16 if it's bigger than 16.
155     if (stride_ > 16) {
156       stride_ &= ~15;
157     }
158 
159     block_size_ = size_ * stride_;
160 
161     src_ = reinterpret_cast<uint8_t *>(
162         vpx_memalign(16, pixel_size_ * block_size_));
163     ASSERT_TRUE(src_ != NULL);
164     dst_ = reinterpret_cast<uint8_t *>(
165         vpx_memalign(16, pixel_size_ * block_size_));
166     ASSERT_TRUE(dst_ != NULL);
167   }
168 
TearDown()169   virtual void TearDown() {
170     vpx_free(src_);
171     src_ = NULL;
172     vpx_free(dst_);
173     dst_ = NULL;
174     libvpx_test::ClearSystemState();
175   }
176 
InitMem()177   void InitMem() {
178     if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
179     if (pixel_size_ == 1) {
180       for (int j = 0; j < block_size_; ++j) {
181         src_[j] = rnd_.Rand16() & max_pixel_value_;
182       }
183       for (int j = 0; j < block_size_; ++j) {
184         dst_[j] = rnd_.Rand16() & max_pixel_value_;
185       }
186     } else {
187       ASSERT_EQ(pixel_size_, 2);
188       uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
189       uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
190       for (int j = 0; j < block_size_; ++j) {
191         src[j] = rnd_.Rand16() & max_pixel_value_;
192       }
193       for (int j = 0; j < block_size_; ++j) {
194         dst[j] = rnd_.Rand16() & max_pixel_value_;
195       }
196     }
197   }
198 
RunFwdTxfm(const Buffer<int16_t> & in,Buffer<tran_low_t> * out)199   void RunFwdTxfm(const Buffer<int16_t> &in, Buffer<tran_low_t> *out) {
200     fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride(), tx_type_);
201   }
202 
RunInvTxfm(const Buffer<tran_low_t> & in,uint8_t * out)203   void RunInvTxfm(const Buffer<tran_low_t> &in, uint8_t *out) {
204     inv_txfm_(in.TopLeftPixel(), out, stride_, tx_type_, bit_depth_);
205   }
206 
207  protected:
RunAccuracyCheck(int limit)208   void RunAccuracyCheck(int limit) {
209     if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
210     ACMRandom rnd(ACMRandom::DeterministicSeed());
211     Buffer<int16_t> test_input_block =
212         Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
213     ASSERT_TRUE(test_input_block.Init());
214     ASSERT_TRUE(test_input_block.TopLeftPixel() != NULL);
215     Buffer<tran_low_t> test_temp_block =
216         Buffer<tran_low_t>(size_, size_, 0, 16);
217     ASSERT_TRUE(test_temp_block.Init());
218     uint32_t max_error = 0;
219     int64_t total_error = 0;
220     const int count_test_block = 10000;
221     for (int i = 0; i < count_test_block; ++i) {
222       InitMem();
223       for (int h = 0; h < size_; ++h) {
224         for (int w = 0; w < size_; ++w) {
225           if (pixel_size_ == 1) {
226             test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] =
227                 src_[h * stride_ + w] - dst_[h * stride_ + w];
228           } else {
229             ASSERT_EQ(pixel_size_, 2);
230             const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
231             const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
232             test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] =
233                 src[h * stride_ + w] - dst[h * stride_ + w];
234           }
235         }
236       }
237 
238       ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block, &test_temp_block));
239       ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst_));
240 
241       for (int h = 0; h < size_; ++h) {
242         for (int w = 0; w < size_; ++w) {
243           int diff;
244           if (pixel_size_ == 1) {
245             diff = dst_[h * stride_ + w] - src_[h * stride_ + w];
246           } else {
247             ASSERT_EQ(pixel_size_, 2);
248             const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
249             const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
250             diff = dst[h * stride_ + w] - src[h * stride_ + w];
251           }
252           const uint32_t error = diff * diff;
253           if (max_error < error) max_error = error;
254           total_error += error;
255         }
256       }
257     }
258 
259     EXPECT_GE(static_cast<uint32_t>(limit), max_error)
260         << "Error: " << size_ << "x" << size_
261         << " transform/inverse transform has an individual round trip error > "
262         << limit;
263 
264     EXPECT_GE(count_test_block * limit, total_error)
265         << "Error: " << size_ << "x" << size_
266         << " transform/inverse transform has average round trip error > "
267         << limit << " per block";
268   }
269 
RunCoeffCheck()270   void RunCoeffCheck() {
271     if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
272     ACMRandom rnd(ACMRandom::DeterministicSeed());
273     const int count_test_block = 5000;
274     Buffer<int16_t> input_block =
275         Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
276     ASSERT_TRUE(input_block.Init());
277     Buffer<tran_low_t> output_ref_block = Buffer<tran_low_t>(size_, size_, 0);
278     ASSERT_TRUE(output_ref_block.Init());
279     Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
280     ASSERT_TRUE(output_block.Init());
281 
282     for (int i = 0; i < count_test_block; ++i) {
283       // Initialize a test block with input range [-max_pixel_value_,
284       // max_pixel_value_].
285       input_block.Set(&rnd, -max_pixel_value_, max_pixel_value_);
286 
287       fwd_txfm_ref(input_block, &output_ref_block, size_, tx_type_);
288       ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, &output_block));
289 
290       // The minimum quant value is 4.
291       EXPECT_TRUE(output_block.CheckValues(output_ref_block));
292       if (::testing::Test::HasFailure()) {
293         printf("Size: %d Transform type: %d\n", size_, tx_type_);
294         output_block.PrintDifference(output_ref_block);
295         return;
296       }
297     }
298   }
299 
RunMemCheck()300   void RunMemCheck() {
301     if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
302     ACMRandom rnd(ACMRandom::DeterministicSeed());
303     const int count_test_block = 5000;
304     Buffer<int16_t> input_extreme_block =
305         Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
306     ASSERT_TRUE(input_extreme_block.Init());
307     Buffer<tran_low_t> output_ref_block = Buffer<tran_low_t>(size_, size_, 0);
308     ASSERT_TRUE(output_ref_block.Init());
309     Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
310     ASSERT_TRUE(output_block.Init());
311 
312     for (int i = 0; i < count_test_block; ++i) {
313       // Initialize a test block with -max_pixel_value_ or max_pixel_value_.
314       if (i == 0) {
315         input_extreme_block.Set(max_pixel_value_);
316       } else if (i == 1) {
317         input_extreme_block.Set(-max_pixel_value_);
318       } else {
319         ASSERT_TRUE(input_extreme_block.TopLeftPixel() != NULL);
320         for (int h = 0; h < size_; ++h) {
321           for (int w = 0; w < size_; ++w) {
322             input_extreme_block
323                 .TopLeftPixel()[h * input_extreme_block.stride() + w] =
324                 rnd.Rand8() % 2 ? max_pixel_value_ : -max_pixel_value_;
325           }
326         }
327       }
328 
329       fwd_txfm_ref(input_extreme_block, &output_ref_block, size_, tx_type_);
330       ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block, &output_block));
331 
332       // The minimum quant value is 4.
333       EXPECT_TRUE(output_block.CheckValues(output_ref_block));
334       ASSERT_TRUE(output_block.TopLeftPixel() != NULL);
335       for (int h = 0; h < size_; ++h) {
336         for (int w = 0; w < size_; ++w) {
337           EXPECT_GE(
338               4 * DCT_MAX_VALUE << (bit_depth_ - 8),
339               abs(output_block.TopLeftPixel()[h * output_block.stride() + w]))
340               << "Error: " << size_ << "x" << size_
341               << " transform has coefficient larger than 4*DCT_MAX_VALUE"
342               << " at " << w << "," << h;
343           if (::testing::Test::HasFailure()) {
344             printf("Size: %d Transform type: %d\n", size_, tx_type_);
345             output_block.DumpBuffer();
346             return;
347           }
348         }
349       }
350     }
351   }
352 
RunInvAccuracyCheck(int limit)353   void RunInvAccuracyCheck(int limit) {
354     if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
355     ACMRandom rnd(ACMRandom::DeterministicSeed());
356     const int count_test_block = 1000;
357     Buffer<int16_t> in = Buffer<int16_t>(size_, size_, 4);
358     ASSERT_TRUE(in.Init());
359     Buffer<tran_low_t> coeff = Buffer<tran_low_t>(size_, size_, 0, 16);
360     ASSERT_TRUE(coeff.Init());
361     Buffer<uint8_t> dst = Buffer<uint8_t>(size_, size_, 0, 16);
362     ASSERT_TRUE(dst.Init());
363     Buffer<uint8_t> src = Buffer<uint8_t>(size_, size_, 0);
364     ASSERT_TRUE(src.Init());
365     Buffer<uint16_t> dst16 = Buffer<uint16_t>(size_, size_, 0, 16);
366     ASSERT_TRUE(dst16.Init());
367     Buffer<uint16_t> src16 = Buffer<uint16_t>(size_, size_, 0);
368     ASSERT_TRUE(src16.Init());
369 
370     for (int i = 0; i < count_test_block; ++i) {
371       InitMem();
372       ASSERT_TRUE(in.TopLeftPixel() != NULL);
373       // Initialize a test block with input range [-max_pixel_value_,
374       // max_pixel_value_].
375       for (int h = 0; h < size_; ++h) {
376         for (int w = 0; w < size_; ++w) {
377           if (pixel_size_ == 1) {
378             in.TopLeftPixel()[h * in.stride() + w] =
379                 src_[h * stride_ + w] - dst_[h * stride_ + w];
380           } else {
381             ASSERT_EQ(pixel_size_, 2);
382             const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
383             const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
384             in.TopLeftPixel()[h * in.stride() + w] =
385                 src[h * stride_ + w] - dst[h * stride_ + w];
386           }
387         }
388       }
389 
390       fwd_txfm_ref(in, &coeff, size_, tx_type_);
391 
392       ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst_));
393 
394       for (int h = 0; h < size_; ++h) {
395         for (int w = 0; w < size_; ++w) {
396           int diff;
397           if (pixel_size_ == 1) {
398             diff = dst_[h * stride_ + w] - src_[h * stride_ + w];
399           } else {
400             ASSERT_EQ(pixel_size_, 2);
401             const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
402             const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
403             diff = dst[h * stride_ + w] - src[h * stride_ + w];
404           }
405           const uint32_t error = diff * diff;
406           EXPECT_GE(static_cast<uint32_t>(limit), error)
407               << "Error: " << size_ << "x" << size_
408               << " inverse transform has error " << error << " at " << w << ","
409               << h;
410           if (::testing::Test::HasFailure()) {
411             printf("Size: %d Transform type: %d\n", size_, tx_type_);
412             return;
413           }
414         }
415       }
416     }
417   }
418 
419   FhtFunc fwd_txfm_;
420   FhtFuncRef fwd_txfm_ref;
421   IhtWithBdFunc inv_txfm_;
422   ACMRandom rnd_;
423   uint8_t *src_;
424   uint8_t *dst_;
425   vpx_bit_depth_t bit_depth_;
426   int tx_type_;
427   int max_pixel_value_;
428   int size_;
429   int stride_;
430   int pixel_size_;
431   int block_size_;
432 };
433 
434 /* -------------------------------------------------------------------------- */
435 
436 class TransDCT : public TransTestBase {
437  public:
TransDCT()438   TransDCT() { fwd_txfm_ref = fdct_ref; }
439 };
440 
TEST_P(TransDCT,AccuracyCheck)441 TEST_P(TransDCT, AccuracyCheck) {
442   int t = 1;
443   if (size_ == 16 && bit_depth_ > 10 && pixel_size_ == 2) {
444     t = 2;
445   } else if (size_ == 32 && bit_depth_ > 10 && pixel_size_ == 2) {
446     t = 7;
447   }
448   RunAccuracyCheck(t);
449 }
450 
TEST_P(TransDCT,CoeffCheck)451 TEST_P(TransDCT, CoeffCheck) { RunCoeffCheck(); }
452 
TEST_P(TransDCT,MemCheck)453 TEST_P(TransDCT, MemCheck) { RunMemCheck(); }
454 
TEST_P(TransDCT,InvAccuracyCheck)455 TEST_P(TransDCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
456 
457 static const FuncInfo dct_c_func_info[] = {
458 #if CONFIG_VP9_HIGHBITDEPTH
459   { &fdct_wrapper<vpx_highbd_fdct4x4_c>,
460     &highbd_idct_wrapper<vpx_highbd_idct4x4_16_add_c>, 4, 2 },
461   { &fdct_wrapper<vpx_highbd_fdct8x8_c>,
462     &highbd_idct_wrapper<vpx_highbd_idct8x8_64_add_c>, 8, 2 },
463   { &fdct_wrapper<vpx_highbd_fdct16x16_c>,
464     &highbd_idct_wrapper<vpx_highbd_idct16x16_256_add_c>, 16, 2 },
465   { &fdct_wrapper<vpx_highbd_fdct32x32_c>,
466     &highbd_idct_wrapper<vpx_highbd_idct32x32_1024_add_c>, 32, 2 },
467 #endif
468   { &fdct_wrapper<vpx_fdct4x4_c>, &idct_wrapper<vpx_idct4x4_16_add_c>, 4, 1 },
469   { &fdct_wrapper<vpx_fdct8x8_c>, &idct_wrapper<vpx_idct8x8_64_add_c>, 8, 1 },
470   { &fdct_wrapper<vpx_fdct16x16_c>, &idct_wrapper<vpx_idct16x16_256_add_c>, 16,
471     1 },
472   { &fdct_wrapper<vpx_fdct32x32_c>, &idct_wrapper<vpx_idct32x32_1024_add_c>, 32,
473     1 }
474 };
475 
476 INSTANTIATE_TEST_CASE_P(
477     C, TransDCT,
478     ::testing::Combine(
479         ::testing::Range(0, static_cast<int>(sizeof(dct_c_func_info) /
480                                              sizeof(dct_c_func_info[0]))),
481         ::testing::Values(dct_c_func_info), ::testing::Values(0),
482         ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
483 
484 #if !CONFIG_EMULATE_HARDWARE
485 
486 #if HAVE_SSE2
487 static const FuncInfo dct_sse2_func_info[] = {
488 #if CONFIG_VP9_HIGHBITDEPTH
489   { &fdct_wrapper<vpx_highbd_fdct4x4_sse2>,
490     &highbd_idct_wrapper<vpx_highbd_idct4x4_16_add_sse2>, 4, 2 },
491   { &fdct_wrapper<vpx_highbd_fdct8x8_sse2>,
492     &highbd_idct_wrapper<vpx_highbd_idct8x8_64_add_sse2>, 8, 2 },
493   { &fdct_wrapper<vpx_highbd_fdct16x16_sse2>,
494     &highbd_idct_wrapper<vpx_highbd_idct16x16_256_add_sse2>, 16, 2 },
495   { &fdct_wrapper<vpx_highbd_fdct32x32_sse2>,
496     &highbd_idct_wrapper<vpx_highbd_idct32x32_1024_add_sse2>, 32, 2 },
497 #endif
498   { &fdct_wrapper<vpx_fdct4x4_sse2>, &idct_wrapper<vpx_idct4x4_16_add_sse2>, 4,
499     1 },
500   { &fdct_wrapper<vpx_fdct8x8_sse2>, &idct_wrapper<vpx_idct8x8_64_add_sse2>, 8,
501     1 },
502   { &fdct_wrapper<vpx_fdct16x16_sse2>,
503     &idct_wrapper<vpx_idct16x16_256_add_sse2>, 16, 1 },
504   { &fdct_wrapper<vpx_fdct32x32_sse2>,
505     &idct_wrapper<vpx_idct32x32_1024_add_sse2>, 32, 1 }
506 };
507 
508 INSTANTIATE_TEST_CASE_P(
509     SSE2, TransDCT,
510     ::testing::Combine(
511         ::testing::Range(0, static_cast<int>(sizeof(dct_sse2_func_info) /
512                                              sizeof(dct_sse2_func_info[0]))),
513         ::testing::Values(dct_sse2_func_info), ::testing::Values(0),
514         ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
515 #endif  // HAVE_SSE2
516 
517 #if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
518 // vpx_fdct8x8_ssse3 is only available in 64 bit builds.
519 static const FuncInfo dct_ssse3_func_info = {
520   &fdct_wrapper<vpx_fdct8x8_ssse3>, &idct_wrapper<vpx_idct8x8_64_add_sse2>, 8, 1
521 };
522 
523 // TODO(johannkoenig): high bit depth fdct8x8.
524 INSTANTIATE_TEST_CASE_P(SSSE3, TransDCT,
525                         ::testing::Values(make_tuple(0, &dct_ssse3_func_info, 0,
526                                                      VPX_BITS_8)));
527 #endif  // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
528 
529 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
530 static const FuncInfo dct_avx2_func_info = {
531   &fdct_wrapper<vpx_fdct32x32_avx2>, &idct_wrapper<vpx_idct32x32_1024_add_sse2>,
532   32, 1
533 };
534 
535 // TODO(johannkoenig): high bit depth fdct32x32.
536 INSTANTIATE_TEST_CASE_P(AVX2, TransDCT,
537                         ::testing::Values(make_tuple(0, &dct_avx2_func_info, 0,
538                                                      VPX_BITS_8)));
539 #endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
540 
541 #if HAVE_NEON
542 static const FuncInfo dct_neon_func_info[4] = {
543   { &fdct_wrapper<vpx_fdct4x4_neon>, &idct_wrapper<vpx_idct4x4_16_add_neon>, 4,
544     1 },
545   { &fdct_wrapper<vpx_fdct8x8_neon>, &idct_wrapper<vpx_idct8x8_64_add_neon>, 8,
546     1 },
547   { &fdct_wrapper<vpx_fdct16x16_neon>,
548     &idct_wrapper<vpx_idct16x16_256_add_neon>, 16, 1 },
549   { &fdct_wrapper<vpx_fdct32x32_neon>,
550     &idct_wrapper<vpx_idct32x32_1024_add_neon>, 32, 1 }
551 };
552 
553 INSTANTIATE_TEST_CASE_P(
554     NEON, TransDCT,
555     ::testing::Combine(::testing::Range(0, 4),
556                        ::testing::Values(dct_neon_func_info),
557                        ::testing::Values(0), ::testing::Values(VPX_BITS_8)));
558 #endif  // HAVE_NEON
559 
560 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH
561 static const FuncInfo dct_msa_func_info[4] = {
562   { &fdct_wrapper<vpx_fdct4x4_msa>, &idct_wrapper<vpx_idct4x4_16_add_msa>, 4,
563     1 },
564   { &fdct_wrapper<vpx_fdct8x8_msa>, &idct_wrapper<vpx_idct8x8_64_add_msa>, 8,
565     1 },
566   { &fdct_wrapper<vpx_fdct16x16_msa>, &idct_wrapper<vpx_idct16x16_256_add_msa>,
567     16, 1 },
568   { &fdct_wrapper<vpx_fdct32x32_msa>, &idct_wrapper<vpx_idct32x32_1024_add_msa>,
569     32, 1 }
570 };
571 
572 INSTANTIATE_TEST_CASE_P(MSA, TransDCT,
573                         ::testing::Combine(::testing::Range(0, 4),
574                                            ::testing::Values(dct_msa_func_info),
575                                            ::testing::Values(0),
576                                            ::testing::Values(VPX_BITS_8)));
577 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH
578 
579 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
580 static const FuncInfo dct_vsx_func_info = {
581   &fdct_wrapper<vpx_fdct4x4_c>, &idct_wrapper<vpx_idct4x4_16_add_vsx>, 4, 1
582 };
583 
584 INSTANTIATE_TEST_CASE_P(VSX, TransDCT,
585                         ::testing::Values(make_tuple(0, &dct_vsx_func_info, 0,
586                                                      VPX_BITS_8)));
587 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH &&
588 
589 #endif  // !CONFIG_EMULATE_HARDWARE
590 
591 /* -------------------------------------------------------------------------- */
592 
593 class TransHT : public TransTestBase {
594  public:
TransHT()595   TransHT() { fwd_txfm_ref = fht_ref; }
596 };
597 
TEST_P(TransHT,AccuracyCheck)598 TEST_P(TransHT, AccuracyCheck) {
599   RunAccuracyCheck(size_ == 16 && bit_depth_ > 10 && pixel_size_ == 2 ? 2 : 1);
600 }
601 
TEST_P(TransHT,CoeffCheck)602 TEST_P(TransHT, CoeffCheck) { RunCoeffCheck(); }
603 
TEST_P(TransHT,MemCheck)604 TEST_P(TransHT, MemCheck) { RunMemCheck(); }
605 
TEST_P(TransHT,InvAccuracyCheck)606 TEST_P(TransHT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
607 
608 static const FuncInfo ht_c_func_info[] = {
609 #if CONFIG_VP9_HIGHBITDEPTH
610   { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper<vp9_highbd_iht4x4_16_add_c>, 4,
611     2 },
612   { &vp9_highbd_fht8x8_c, &highbd_iht_wrapper<vp9_highbd_iht8x8_64_add_c>, 8,
613     2 },
614   { &vp9_highbd_fht16x16_c, &highbd_iht_wrapper<vp9_highbd_iht16x16_256_add_c>,
615     16, 2 },
616 #endif
617   { &vp9_fht4x4_c, &iht_wrapper<vp9_iht4x4_16_add_c>, 4, 1 },
618   { &vp9_fht8x8_c, &iht_wrapper<vp9_iht8x8_64_add_c>, 8, 1 },
619   { &vp9_fht16x16_c, &iht_wrapper<vp9_iht16x16_256_add_c>, 16, 1 }
620 };
621 
622 INSTANTIATE_TEST_CASE_P(
623     C, TransHT,
624     ::testing::Combine(
625         ::testing::Range(0, static_cast<int>(sizeof(ht_c_func_info) /
626                                              sizeof(ht_c_func_info[0]))),
627         ::testing::Values(ht_c_func_info), ::testing::Range(0, 4),
628         ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
629 
630 #if !CONFIG_EMULATE_HARDWARE
631 
632 #if HAVE_NEON
633 
634 static const FuncInfo ht_neon_func_info[] = {
635 #if CONFIG_VP9_HIGHBITDEPTH
636   { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper<vp9_highbd_iht4x4_16_add_neon>, 4,
637     2 },
638   { &vp9_highbd_fht8x8_c, &highbd_iht_wrapper<vp9_highbd_iht8x8_64_add_neon>, 8,
639     2 },
640   { &vp9_highbd_fht16x16_c,
641     &highbd_iht_wrapper<vp9_highbd_iht16x16_256_add_neon>, 16, 2 },
642 #endif
643   { &vp9_fht4x4_c, &iht_wrapper<vp9_iht4x4_16_add_neon>, 4, 1 },
644   { &vp9_fht8x8_c, &iht_wrapper<vp9_iht8x8_64_add_neon>, 8, 1 },
645   { &vp9_fht16x16_c, &iht_wrapper<vp9_iht16x16_256_add_neon>, 16, 1 }
646 };
647 
648 INSTANTIATE_TEST_CASE_P(
649     NEON, TransHT,
650     ::testing::Combine(
651         ::testing::Range(0, static_cast<int>(sizeof(ht_neon_func_info) /
652                                              sizeof(ht_neon_func_info[0]))),
653         ::testing::Values(ht_neon_func_info), ::testing::Range(0, 4),
654         ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
655 #endif  // HAVE_NEON
656 
657 #if HAVE_SSE2
658 
659 static const FuncInfo ht_sse2_func_info[3] = {
660   { &vp9_fht4x4_sse2, &iht_wrapper<vp9_iht4x4_16_add_sse2>, 4, 1 },
661   { &vp9_fht8x8_sse2, &iht_wrapper<vp9_iht8x8_64_add_sse2>, 8, 1 },
662   { &vp9_fht16x16_sse2, &iht_wrapper<vp9_iht16x16_256_add_sse2>, 16, 1 }
663 };
664 
665 INSTANTIATE_TEST_CASE_P(SSE2, TransHT,
666                         ::testing::Combine(::testing::Range(0, 3),
667                                            ::testing::Values(ht_sse2_func_info),
668                                            ::testing::Range(0, 4),
669                                            ::testing::Values(VPX_BITS_8)));
670 #endif  // HAVE_SSE2
671 
672 #if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
673 static const FuncInfo ht_sse4_1_func_info[3] = {
674   { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper<vp9_highbd_iht4x4_16_add_sse4_1>,
675     4, 2 },
676   { vp9_highbd_fht8x8_c, &highbd_iht_wrapper<vp9_highbd_iht8x8_64_add_sse4_1>,
677     8, 2 },
678   { &vp9_highbd_fht16x16_c,
679     &highbd_iht_wrapper<vp9_highbd_iht16x16_256_add_sse4_1>, 16, 2 }
680 };
681 
682 INSTANTIATE_TEST_CASE_P(
683     SSE4_1, TransHT,
684     ::testing::Combine(::testing::Range(0, 3),
685                        ::testing::Values(ht_sse4_1_func_info),
686                        ::testing::Range(0, 4),
687                        ::testing::Values(VPX_BITS_8, VPX_BITS_10,
688                                          VPX_BITS_12)));
689 #endif  // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
690 
691 #if HAVE_VSX && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
692 static const FuncInfo ht_vsx_func_info[3] = {
693   { &vp9_fht4x4_c, &iht_wrapper<vp9_iht4x4_16_add_vsx>, 4, 1 },
694   { &vp9_fht8x8_c, &iht_wrapper<vp9_iht8x8_64_add_vsx>, 8, 1 },
695   { &vp9_fht16x16_c, &iht_wrapper<vp9_iht16x16_256_add_vsx>, 16, 1 }
696 };
697 
698 INSTANTIATE_TEST_CASE_P(VSX, TransHT,
699                         ::testing::Combine(::testing::Range(0, 3),
700                                            ::testing::Values(ht_vsx_func_info),
701                                            ::testing::Range(0, 4),
702                                            ::testing::Values(VPX_BITS_8)));
703 #endif  // HAVE_VSX
704 #endif  // !CONFIG_EMULATE_HARDWARE
705 
706 /* -------------------------------------------------------------------------- */
707 
708 class TransWHT : public TransTestBase {
709  public:
TransWHT()710   TransWHT() { fwd_txfm_ref = fwht_ref; }
711 };
712 
TEST_P(TransWHT,AccuracyCheck)713 TEST_P(TransWHT, AccuracyCheck) { RunAccuracyCheck(0); }
714 
TEST_P(TransWHT,CoeffCheck)715 TEST_P(TransWHT, CoeffCheck) { RunCoeffCheck(); }
716 
TEST_P(TransWHT,MemCheck)717 TEST_P(TransWHT, MemCheck) { RunMemCheck(); }
718 
TEST_P(TransWHT,InvAccuracyCheck)719 TEST_P(TransWHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
720 
721 static const FuncInfo wht_c_func_info[] = {
722 #if CONFIG_VP9_HIGHBITDEPTH
723   { &fdct_wrapper<vp9_highbd_fwht4x4_c>,
724     &highbd_idct_wrapper<vpx_highbd_iwht4x4_16_add_c>, 4, 2 },
725 #endif
726   { &fdct_wrapper<vp9_fwht4x4_c>, &idct_wrapper<vpx_iwht4x4_16_add_c>, 4, 1 }
727 };
728 
729 INSTANTIATE_TEST_CASE_P(
730     C, TransWHT,
731     ::testing::Combine(
732         ::testing::Range(0, static_cast<int>(sizeof(wht_c_func_info) /
733                                              sizeof(wht_c_func_info[0]))),
734         ::testing::Values(wht_c_func_info), ::testing::Values(0),
735         ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
736 
737 #if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
738 static const FuncInfo wht_sse2_func_info = {
739   &fdct_wrapper<vp9_fwht4x4_sse2>, &idct_wrapper<vpx_iwht4x4_16_add_sse2>, 4, 1
740 };
741 
742 INSTANTIATE_TEST_CASE_P(SSE2, TransWHT,
743                         ::testing::Values(make_tuple(0, &wht_sse2_func_info, 0,
744                                                      VPX_BITS_8)));
745 #endif  // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
746 
747 #if HAVE_VSX && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
748 static const FuncInfo wht_vsx_func_info = {
749   &fdct_wrapper<vp9_fwht4x4_c>, &idct_wrapper<vpx_iwht4x4_16_add_vsx>, 4, 1
750 };
751 
752 INSTANTIATE_TEST_CASE_P(VSX, TransWHT,
753                         ::testing::Values(make_tuple(0, &wht_vsx_func_info, 0,
754                                                      VPX_BITS_8)));
755 #endif  // HAVE_VSX && !CONFIG_EMULATE_HARDWARE
756 }  // namespace
757