1 /*
2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <tuple>
15 
16 #include "third_party/googletest/src/include/gtest/gtest.h"
17 
18 #include "./vp9_rtcd.h"
19 #include "./vpx_dsp_rtcd.h"
20 #include "test/acm_random.h"
21 #include "test/clear_system_state.h"
22 #include "test/register_state_check.h"
23 #include "test/util.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_scan.h"
26 #include "vpx/vpx_codec.h"
27 #include "vpx/vpx_integer.h"
28 #include "vpx_ports/mem.h"
29 
30 using libvpx_test::ACMRandom;
31 
32 namespace {
33 
34 const int kNumCoeffs = 64;
35 const double kPi = 3.141592653589793238462643383279502884;
36 
37 const int kSignBiasMaxDiff255 = 1500;
38 const int kSignBiasMaxDiff15 = 10000;
39 
40 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
41 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
42 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
43                         int tx_type);
44 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
45                         int tx_type);
46 
47 typedef std::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
48 typedef std::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
49 typedef std::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
50 
reference_8x8_dct_1d(const double in[8],double out[8])51 void reference_8x8_dct_1d(const double in[8], double out[8]) {
52   const double kInvSqrt2 = 0.707106781186547524400844362104;
53   for (int k = 0; k < 8; k++) {
54     out[k] = 0.0;
55     for (int n = 0; n < 8; n++) {
56       out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0);
57     }
58     if (k == 0) out[k] = out[k] * kInvSqrt2;
59   }
60 }
61 
reference_8x8_dct_2d(const int16_t input[kNumCoeffs],double output[kNumCoeffs])62 void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
63                           double output[kNumCoeffs]) {
64   // First transform columns
65   for (int i = 0; i < 8; ++i) {
66     double temp_in[8], temp_out[8];
67     for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
68     reference_8x8_dct_1d(temp_in, temp_out);
69     for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
70   }
71   // Then transform rows
72   for (int i = 0; i < 8; ++i) {
73     double temp_in[8], temp_out[8];
74     for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
75     reference_8x8_dct_1d(temp_in, temp_out);
76     // Scale by some magic number
77     for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2;
78   }
79 }
80 
fdct8x8_ref(const int16_t * in,tran_low_t * out,int stride,int)81 void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
82                  int /*tx_type*/) {
83   vpx_fdct8x8_c(in, out, stride);
84 }
85 
fht8x8_ref(const int16_t * in,tran_low_t * out,int stride,int tx_type)86 void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
87   vp9_fht8x8_c(in, out, stride, tx_type);
88 }
89 
90 #if CONFIG_VP9_HIGHBITDEPTH
idct8x8_10(const tran_low_t * in,uint8_t * out,int stride)91 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
92   vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
93 }
94 
idct8x8_12(const tran_low_t * in,uint8_t * out,int stride)95 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
96   vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
97 }
98 
iht8x8_10(const tran_low_t * in,uint8_t * out,int stride,int tx_type)99 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
100   vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
101 }
102 
iht8x8_12(const tran_low_t * in,uint8_t * out,int stride,int tx_type)103 void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
104   vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
105 }
106 
107 #if HAVE_SSE2
108 
idct8x8_12_add_10_c(const tran_low_t * in,uint8_t * out,int stride)109 void idct8x8_12_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
110   vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
111 }
112 
idct8x8_12_add_12_c(const tran_low_t * in,uint8_t * out,int stride)113 void idct8x8_12_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
114   vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
115 }
116 
idct8x8_12_add_10_sse2(const tran_low_t * in,uint8_t * out,int stride)117 void idct8x8_12_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
118   vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
119 }
120 
idct8x8_12_add_12_sse2(const tran_low_t * in,uint8_t * out,int stride)121 void idct8x8_12_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
122   vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
123 }
124 
idct8x8_64_add_10_sse2(const tran_low_t * in,uint8_t * out,int stride)125 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
126   vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
127 }
128 
idct8x8_64_add_12_sse2(const tran_low_t * in,uint8_t * out,int stride)129 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
130   vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
131 }
132 #endif  // HAVE_SSE2
133 #endif  // CONFIG_VP9_HIGHBITDEPTH
134 
135 class FwdTrans8x8TestBase {
136  public:
~FwdTrans8x8TestBase()137   virtual ~FwdTrans8x8TestBase() {}
138 
139  protected:
140   virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
141   virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
142 
RunSignBiasCheck()143   void RunSignBiasCheck() {
144     ACMRandom rnd(ACMRandom::DeterministicSeed());
145     DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
146     DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
147     int count_sign_block[64][2];
148     const int count_test_block = 100000;
149 
150     memset(count_sign_block, 0, sizeof(count_sign_block));
151 
152     for (int i = 0; i < count_test_block; ++i) {
153       // Initialize a test block with input range [-255, 255].
154       for (int j = 0; j < 64; ++j) {
155         test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) -
156                               ((rnd.Rand16() >> (16 - bit_depth_)) & mask_);
157       }
158       ASM_REGISTER_STATE_CHECK(
159           RunFwdTxfm(test_input_block, test_output_block, pitch_));
160 
161       for (int j = 0; j < 64; ++j) {
162         if (test_output_block[j] < 0) {
163           ++count_sign_block[j][0];
164         } else if (test_output_block[j] > 0) {
165           ++count_sign_block[j][1];
166         }
167       }
168     }
169 
170     for (int j = 0; j < 64; ++j) {
171       const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
172       const int max_diff = kSignBiasMaxDiff255;
173       EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
174           << "Error: 8x8 FDCT/FHT has a sign bias > "
175           << 1. * max_diff / count_test_block * 100 << "%"
176           << " for input range [-255, 255] at index " << j
177           << " count0: " << count_sign_block[j][0]
178           << " count1: " << count_sign_block[j][1] << " diff: " << diff;
179     }
180 
181     memset(count_sign_block, 0, sizeof(count_sign_block));
182 
183     for (int i = 0; i < count_test_block; ++i) {
184       // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
185       for (int j = 0; j < 64; ++j) {
186         test_input_block[j] =
187             ((rnd.Rand16() & mask_) >> 4) - ((rnd.Rand16() & mask_) >> 4);
188       }
189       ASM_REGISTER_STATE_CHECK(
190           RunFwdTxfm(test_input_block, test_output_block, pitch_));
191 
192       for (int j = 0; j < 64; ++j) {
193         if (test_output_block[j] < 0) {
194           ++count_sign_block[j][0];
195         } else if (test_output_block[j] > 0) {
196           ++count_sign_block[j][1];
197         }
198       }
199     }
200 
201     for (int j = 0; j < 64; ++j) {
202       const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
203       const int max_diff = kSignBiasMaxDiff15;
204       EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
205           << "Error: 8x8 FDCT/FHT has a sign bias > "
206           << 1. * max_diff / count_test_block * 100 << "%"
207           << " for input range [-15, 15] at index " << j
208           << " count0: " << count_sign_block[j][0]
209           << " count1: " << count_sign_block[j][1] << " diff: " << diff;
210     }
211   }
212 
RunRoundTripErrorCheck()213   void RunRoundTripErrorCheck() {
214     ACMRandom rnd(ACMRandom::DeterministicSeed());
215     int max_error = 0;
216     int total_error = 0;
217     const int count_test_block = 100000;
218     DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
219     DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
220     DECLARE_ALIGNED(16, uint8_t, dst[64]);
221     DECLARE_ALIGNED(16, uint8_t, src[64]);
222 #if CONFIG_VP9_HIGHBITDEPTH
223     DECLARE_ALIGNED(16, uint16_t, dst16[64]);
224     DECLARE_ALIGNED(16, uint16_t, src16[64]);
225 #endif
226 
227     for (int i = 0; i < count_test_block; ++i) {
228       // Initialize a test block with input range [-mask_, mask_].
229       for (int j = 0; j < 64; ++j) {
230         if (bit_depth_ == VPX_BITS_8) {
231           src[j] = rnd.Rand8();
232           dst[j] = rnd.Rand8();
233           test_input_block[j] = src[j] - dst[j];
234 #if CONFIG_VP9_HIGHBITDEPTH
235         } else {
236           src16[j] = rnd.Rand16() & mask_;
237           dst16[j] = rnd.Rand16() & mask_;
238           test_input_block[j] = src16[j] - dst16[j];
239 #endif
240         }
241       }
242 
243       ASM_REGISTER_STATE_CHECK(
244           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
245       for (int j = 0; j < 64; ++j) {
246         if (test_temp_block[j] > 0) {
247           test_temp_block[j] += 2;
248           test_temp_block[j] /= 4;
249           test_temp_block[j] *= 4;
250         } else {
251           test_temp_block[j] -= 2;
252           test_temp_block[j] /= 4;
253           test_temp_block[j] *= 4;
254         }
255       }
256       if (bit_depth_ == VPX_BITS_8) {
257         ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
258 #if CONFIG_VP9_HIGHBITDEPTH
259       } else {
260         ASM_REGISTER_STATE_CHECK(
261             RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
262 #endif
263       }
264 
265       for (int j = 0; j < 64; ++j) {
266 #if CONFIG_VP9_HIGHBITDEPTH
267         const int diff =
268             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
269 #else
270         const int diff = dst[j] - src[j];
271 #endif
272         const int error = diff * diff;
273         if (max_error < error) max_error = error;
274         total_error += error;
275       }
276     }
277 
278     EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
279         << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
280         << " roundtrip error > 1";
281 
282     EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
283         << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
284         << "error > 1/5 per block";
285   }
286 
RunExtremalCheck()287   void RunExtremalCheck() {
288     ACMRandom rnd(ACMRandom::DeterministicSeed());
289     int max_error = 0;
290     int total_error = 0;
291     int total_coeff_error = 0;
292     const int count_test_block = 100000;
293     DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
294     DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
295     DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
296     DECLARE_ALIGNED(16, uint8_t, dst[64]);
297     DECLARE_ALIGNED(16, uint8_t, src[64]);
298 #if CONFIG_VP9_HIGHBITDEPTH
299     DECLARE_ALIGNED(16, uint16_t, dst16[64]);
300     DECLARE_ALIGNED(16, uint16_t, src16[64]);
301 #endif
302 
303     for (int i = 0; i < count_test_block; ++i) {
304       // Initialize a test block with input range [-mask_, mask_].
305       for (int j = 0; j < 64; ++j) {
306         if (bit_depth_ == VPX_BITS_8) {
307           if (i == 0) {
308             src[j] = 255;
309             dst[j] = 0;
310           } else if (i == 1) {
311             src[j] = 0;
312             dst[j] = 255;
313           } else {
314             src[j] = rnd.Rand8() % 2 ? 255 : 0;
315             dst[j] = rnd.Rand8() % 2 ? 255 : 0;
316           }
317           test_input_block[j] = src[j] - dst[j];
318 #if CONFIG_VP9_HIGHBITDEPTH
319         } else {
320           if (i == 0) {
321             src16[j] = mask_;
322             dst16[j] = 0;
323           } else if (i == 1) {
324             src16[j] = 0;
325             dst16[j] = mask_;
326           } else {
327             src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
328             dst16[j] = rnd.Rand8() % 2 ? mask_ : 0;
329           }
330           test_input_block[j] = src16[j] - dst16[j];
331 #endif
332         }
333       }
334 
335       ASM_REGISTER_STATE_CHECK(
336           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
337       ASM_REGISTER_STATE_CHECK(
338           fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
339       if (bit_depth_ == VPX_BITS_8) {
340         ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
341 #if CONFIG_VP9_HIGHBITDEPTH
342       } else {
343         ASM_REGISTER_STATE_CHECK(
344             RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
345 #endif
346       }
347 
348       for (int j = 0; j < 64; ++j) {
349 #if CONFIG_VP9_HIGHBITDEPTH
350         const int diff =
351             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
352 #else
353         const int diff = dst[j] - src[j];
354 #endif
355         const int error = diff * diff;
356         if (max_error < error) max_error = error;
357         total_error += error;
358 
359         const int coeff_diff = test_temp_block[j] - ref_temp_block[j];
360         total_coeff_error += abs(coeff_diff);
361       }
362 
363       EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
364           << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
365           << "an individual roundtrip error > 1";
366 
367       EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
368           << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
369           << " roundtrip error > 1/5 per block";
370 
371       EXPECT_EQ(0, total_coeff_error)
372           << "Error: Extremal 8x8 FDCT/FHT has"
373           << "overflow issues in the intermediate steps > 1";
374     }
375   }
376 
RunInvAccuracyCheck()377   void RunInvAccuracyCheck() {
378     ACMRandom rnd(ACMRandom::DeterministicSeed());
379     const int count_test_block = 1000;
380     DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
381     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
382     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
383     DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
384 #if CONFIG_VP9_HIGHBITDEPTH
385     DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
386     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
387 #endif
388 
389     for (int i = 0; i < count_test_block; ++i) {
390       double out_r[kNumCoeffs];
391 
392       // Initialize a test block with input range [-255, 255].
393       for (int j = 0; j < kNumCoeffs; ++j) {
394         if (bit_depth_ == VPX_BITS_8) {
395           src[j] = rnd.Rand8() % 2 ? 255 : 0;
396           dst[j] = src[j] > 0 ? 0 : 255;
397           in[j] = src[j] - dst[j];
398 #if CONFIG_VP9_HIGHBITDEPTH
399         } else {
400           src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
401           dst16[j] = src16[j] > 0 ? 0 : mask_;
402           in[j] = src16[j] - dst16[j];
403 #endif
404         }
405       }
406 
407       reference_8x8_dct_2d(in, out_r);
408       for (int j = 0; j < kNumCoeffs; ++j) {
409         coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
410       }
411 
412       if (bit_depth_ == VPX_BITS_8) {
413         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
414 #if CONFIG_VP9_HIGHBITDEPTH
415       } else {
416         ASM_REGISTER_STATE_CHECK(
417             RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
418 #endif
419       }
420 
421       for (int j = 0; j < kNumCoeffs; ++j) {
422 #if CONFIG_VP9_HIGHBITDEPTH
423         const int diff =
424             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
425 #else
426         const int diff = dst[j] - src[j];
427 #endif
428         const uint32_t error = diff * diff;
429         EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
430             << "Error: 8x8 IDCT has error " << error << " at index " << j;
431       }
432     }
433   }
434 
RunFwdAccuracyCheck()435   void RunFwdAccuracyCheck() {
436     ACMRandom rnd(ACMRandom::DeterministicSeed());
437     const int count_test_block = 1000;
438     DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
439     DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
440     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
441 
442     for (int i = 0; i < count_test_block; ++i) {
443       double out_r[kNumCoeffs];
444 
445       // Initialize a test block with input range [-mask_, mask_].
446       for (int j = 0; j < kNumCoeffs; ++j) {
447         in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_;
448       }
449 
450       RunFwdTxfm(in, coeff, pitch_);
451       reference_8x8_dct_2d(in, out_r);
452       for (int j = 0; j < kNumCoeffs; ++j) {
453         coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
454       }
455 
456       for (int j = 0; j < kNumCoeffs; ++j) {
457         const int32_t diff = coeff[j] - coeff_r[j];
458         const uint32_t error = diff * diff;
459         EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
460             << "Error: 8x8 DCT has error " << error << " at index " << j;
461       }
462     }
463   }
464 
CompareInvReference(IdctFunc ref_txfm,int thresh)465   void CompareInvReference(IdctFunc ref_txfm, int thresh) {
466     ACMRandom rnd(ACMRandom::DeterministicSeed());
467     const int count_test_block = 10000;
468     const int eob = 12;
469     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
470     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
471     DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
472 #if CONFIG_VP9_HIGHBITDEPTH
473     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
474     DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
475 #endif
476     const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;
477 
478     for (int i = 0; i < count_test_block; ++i) {
479       for (int j = 0; j < kNumCoeffs; ++j) {
480         if (j < eob) {
481           // Random values less than the threshold, either positive or negative
482           coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
483         } else {
484           coeff[scan[j]] = 0;
485         }
486         if (bit_depth_ == VPX_BITS_8) {
487           dst[j] = 0;
488           ref[j] = 0;
489 #if CONFIG_VP9_HIGHBITDEPTH
490         } else {
491           dst16[j] = 0;
492           ref16[j] = 0;
493 #endif
494         }
495       }
496       if (bit_depth_ == VPX_BITS_8) {
497         ref_txfm(coeff, ref, pitch_);
498         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
499 #if CONFIG_VP9_HIGHBITDEPTH
500       } else {
501         ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
502         ASM_REGISTER_STATE_CHECK(
503             RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
504 #endif
505       }
506 
507       for (int j = 0; j < kNumCoeffs; ++j) {
508 #if CONFIG_VP9_HIGHBITDEPTH
509         const int diff =
510             bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
511 #else
512         const int diff = dst[j] - ref[j];
513 #endif
514         const uint32_t error = diff * diff;
515         EXPECT_EQ(0u, error)
516             << "Error: 8x8 IDCT has error " << error << " at index " << j;
517       }
518     }
519   }
520   int pitch_;
521   int tx_type_;
522   FhtFunc fwd_txfm_ref;
523   vpx_bit_depth_t bit_depth_;
524   int mask_;
525 };
526 
527 class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
528                        public ::testing::TestWithParam<Dct8x8Param> {
529  public:
~FwdTrans8x8DCT()530   virtual ~FwdTrans8x8DCT() {}
531 
SetUp()532   virtual void SetUp() {
533     fwd_txfm_ = GET_PARAM(0);
534     inv_txfm_ = GET_PARAM(1);
535     tx_type_ = GET_PARAM(2);
536     pitch_ = 8;
537     fwd_txfm_ref = fdct8x8_ref;
538     bit_depth_ = GET_PARAM(3);
539     mask_ = (1 << bit_depth_) - 1;
540   }
541 
TearDown()542   virtual void TearDown() { libvpx_test::ClearSystemState(); }
543 
544  protected:
RunFwdTxfm(int16_t * in,tran_low_t * out,int stride)545   void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
546     fwd_txfm_(in, out, stride);
547   }
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)548   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
549     inv_txfm_(out, dst, stride);
550   }
551 
552   FdctFunc fwd_txfm_;
553   IdctFunc inv_txfm_;
554 };
555 
TEST_P(FwdTrans8x8DCT,SignBiasCheck)556 TEST_P(FwdTrans8x8DCT, SignBiasCheck) { RunSignBiasCheck(); }
557 
TEST_P(FwdTrans8x8DCT,RoundTripErrorCheck)558 TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
559 
TEST_P(FwdTrans8x8DCT,ExtremalCheck)560 TEST_P(FwdTrans8x8DCT, ExtremalCheck) { RunExtremalCheck(); }
561 
TEST_P(FwdTrans8x8DCT,FwdAccuracyCheck)562 TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) { RunFwdAccuracyCheck(); }
563 
TEST_P(FwdTrans8x8DCT,InvAccuracyCheck)564 TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
565 
566 class FwdTrans8x8HT : public FwdTrans8x8TestBase,
567                       public ::testing::TestWithParam<Ht8x8Param> {
568  public:
~FwdTrans8x8HT()569   virtual ~FwdTrans8x8HT() {}
570 
SetUp()571   virtual void SetUp() {
572     fwd_txfm_ = GET_PARAM(0);
573     inv_txfm_ = GET_PARAM(1);
574     tx_type_ = GET_PARAM(2);
575     pitch_ = 8;
576     fwd_txfm_ref = fht8x8_ref;
577     bit_depth_ = GET_PARAM(3);
578     mask_ = (1 << bit_depth_) - 1;
579   }
580 
TearDown()581   virtual void TearDown() { libvpx_test::ClearSystemState(); }
582 
583  protected:
RunFwdTxfm(int16_t * in,tran_low_t * out,int stride)584   void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
585     fwd_txfm_(in, out, stride, tx_type_);
586   }
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)587   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
588     inv_txfm_(out, dst, stride, tx_type_);
589   }
590 
591   FhtFunc fwd_txfm_;
592   IhtFunc inv_txfm_;
593 };
594 
TEST_P(FwdTrans8x8HT,SignBiasCheck)595 TEST_P(FwdTrans8x8HT, SignBiasCheck) { RunSignBiasCheck(); }
596 
TEST_P(FwdTrans8x8HT,RoundTripErrorCheck)597 TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
598 
TEST_P(FwdTrans8x8HT,ExtremalCheck)599 TEST_P(FwdTrans8x8HT, ExtremalCheck) { RunExtremalCheck(); }
600 
601 class InvTrans8x8DCT : public FwdTrans8x8TestBase,
602                        public ::testing::TestWithParam<Idct8x8Param> {
603  public:
~InvTrans8x8DCT()604   virtual ~InvTrans8x8DCT() {}
605 
SetUp()606   virtual void SetUp() {
607     ref_txfm_ = GET_PARAM(0);
608     inv_txfm_ = GET_PARAM(1);
609     thresh_ = GET_PARAM(2);
610     pitch_ = 8;
611     bit_depth_ = GET_PARAM(3);
612     mask_ = (1 << bit_depth_) - 1;
613   }
614 
TearDown()615   virtual void TearDown() { libvpx_test::ClearSystemState(); }
616 
617  protected:
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)618   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
619     inv_txfm_(out, dst, stride);
620   }
RunFwdTxfm(int16_t *,tran_low_t *,int)621   void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/, int /*stride*/) {}
622 
623   IdctFunc ref_txfm_;
624   IdctFunc inv_txfm_;
625   int thresh_;
626 };
627 
TEST_P(InvTrans8x8DCT,CompareReference)628 TEST_P(InvTrans8x8DCT, CompareReference) {
629   CompareInvReference(ref_txfm_, thresh_);
630 }
631 
632 using std::make_tuple;
633 
634 #if CONFIG_VP9_HIGHBITDEPTH
635 INSTANTIATE_TEST_CASE_P(
636     C, FwdTrans8x8DCT,
637     ::testing::Values(
638         make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
639         make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
640         make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
641 #else
642 INSTANTIATE_TEST_CASE_P(C, FwdTrans8x8DCT,
643                         ::testing::Values(make_tuple(&vpx_fdct8x8_c,
644                                                      &vpx_idct8x8_64_add_c, 0,
645                                                      VPX_BITS_8)));
646 #endif  // CONFIG_VP9_HIGHBITDEPTH
647 
648 #if CONFIG_VP9_HIGHBITDEPTH
649 INSTANTIATE_TEST_CASE_P(
650     C, FwdTrans8x8HT,
651     ::testing::Values(
652         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
653         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
654         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
655         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
656         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 3, VPX_BITS_10),
657         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 0, VPX_BITS_12),
658         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
659         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
660         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
661         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
662         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
663         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
664 #else
665 INSTANTIATE_TEST_CASE_P(
666     C, FwdTrans8x8HT,
667     ::testing::Values(
668         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
669         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
670         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
671         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
672 #endif  // CONFIG_VP9_HIGHBITDEPTH
673 
674 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
675 INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
676                         ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
677                                                      &vpx_idct8x8_64_add_neon,
678                                                      0, VPX_BITS_8)));
679 
680 #if !CONFIG_VP9_HIGHBITDEPTH
681 INSTANTIATE_TEST_CASE_P(
682     NEON, FwdTrans8x8HT,
683     ::testing::Values(
684         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
685         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
686         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
687         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
688 #endif  // !CONFIG_VP9_HIGHBITDEPTH
689 #endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
690 
691 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
692 INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT,
693                         ::testing::Values(make_tuple(&vpx_fdct8x8_sse2,
694                                                      &vpx_idct8x8_64_add_sse2,
695                                                      0, VPX_BITS_8)));
696 INSTANTIATE_TEST_CASE_P(
697     SSE2, FwdTrans8x8HT,
698     ::testing::Values(
699         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0, VPX_BITS_8),
700         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
701         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
702         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
703 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
704 
705 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
706 INSTANTIATE_TEST_CASE_P(
707     SSE2, FwdTrans8x8DCT,
708     ::testing::Values(make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0,
709                                  VPX_BITS_8),
710                       make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_10_sse2,
711                                  12, VPX_BITS_10),
712                       make_tuple(&vpx_highbd_fdct8x8_sse2,
713                                  &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
714                       make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_12_sse2,
715                                  12, VPX_BITS_12),
716                       make_tuple(&vpx_highbd_fdct8x8_sse2,
717                                  &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));
718 
719 INSTANTIATE_TEST_CASE_P(
720     SSE2, FwdTrans8x8HT,
721     ::testing::Values(
722         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
723         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
724         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
725         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
726 
727 // Optimizations take effect at a threshold of 6201, so we use a value close to
728 // that to test both branches.
729 INSTANTIATE_TEST_CASE_P(
730     SSE2, InvTrans8x8DCT,
731     ::testing::Values(
732         make_tuple(&idct8x8_12_add_10_c, &idct8x8_12_add_10_sse2, 6225,
733                    VPX_BITS_10),
734         make_tuple(&idct8x8_10, &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
735         make_tuple(&idct8x8_12_add_12_c, &idct8x8_12_add_12_sse2, 6225,
736                    VPX_BITS_12),
737         make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
738 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
739 
740 #if HAVE_SSSE3 && VPX_ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
741     !CONFIG_EMULATE_HARDWARE
742 INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT,
743                         ::testing::Values(make_tuple(&vpx_fdct8x8_ssse3,
744                                                      &vpx_idct8x8_64_add_sse2,
745                                                      0, VPX_BITS_8)));
746 #endif
747 
748 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
749 INSTANTIATE_TEST_CASE_P(MSA, FwdTrans8x8DCT,
750                         ::testing::Values(make_tuple(&vpx_fdct8x8_msa,
751                                                      &vpx_idct8x8_64_add_msa, 0,
752                                                      VPX_BITS_8)));
753 INSTANTIATE_TEST_CASE_P(
754     MSA, FwdTrans8x8HT,
755     ::testing::Values(
756         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
757         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
758         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
759         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
760 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
761 
762 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
763 INSTANTIATE_TEST_CASE_P(VSX, FwdTrans8x8DCT,
764                         ::testing::Values(make_tuple(&vpx_fdct8x8_c,
765                                                      &vpx_idct8x8_64_add_vsx, 0,
766                                                      VPX_BITS_8)));
767 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
768 }  // namespace
769