1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <tuple>
16
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
19
20 #include "config/av1_rtcd.h"
21 #include "config/aom_dsp_rtcd.h"
22 #include "test/acm_random.h"
23 #include "test/register_state_check.h"
24 #include "test/transform_test_base.h"
25 #include "test/util.h"
26 #include "av1/common/entropy.h"
27 #include "aom/aom_codec.h"
28 #include "aom/aom_integer.h"
29 #include "aom_ports/mem.h"
30
31 using libaom_test::ACMRandom;
32
33 namespace {
34 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
35 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
36
37 using libaom_test::FhtFunc;
38
39 typedef std::tuple<FdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t, int, FdctFunc>
40 Dct4x4Param;
41
fwht4x4_ref(const int16_t * in,tran_low_t * out,int stride,TxfmParam *)42 void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
43 TxfmParam * /*txfm_param*/) {
44 av1_fwht4x4_c(in, out, stride);
45 }
46
iwht4x4_10_c(const tran_low_t * in,uint8_t * out,int stride)47 void iwht4x4_10_c(const tran_low_t *in, uint8_t *out, int stride) {
48 av1_highbd_iwht4x4_16_add_c(in, out, stride, 10);
49 }
50
iwht4x4_12_c(const tran_low_t * in,uint8_t * out,int stride)51 void iwht4x4_12_c(const tran_low_t *in, uint8_t *out, int stride) {
52 av1_highbd_iwht4x4_16_add_c(in, out, stride, 12);
53 }
54
55 #if HAVE_SSE4_1
56
iwht4x4_10_sse4_1(const tran_low_t * in,uint8_t * out,int stride)57 void iwht4x4_10_sse4_1(const tran_low_t *in, uint8_t *out, int stride) {
58 av1_highbd_iwht4x4_16_add_sse4_1(in, out, stride, 10);
59 }
60
iwht4x4_12_sse4_1(const tran_low_t * in,uint8_t * out,int stride)61 void iwht4x4_12_sse4_1(const tran_low_t *in, uint8_t *out, int stride) {
62 av1_highbd_iwht4x4_16_add_sse4_1(in, out, stride, 12);
63 }
64
65 #endif
66
67 class Trans4x4WHT : public libaom_test::TransformTestBase<tran_low_t>,
68 public ::testing::TestWithParam<Dct4x4Param> {
69 public:
~Trans4x4WHT()70 virtual ~Trans4x4WHT() {}
71
SetUp()72 virtual void SetUp() {
73 fwd_txfm_ = GET_PARAM(0);
74 inv_txfm_ = GET_PARAM(1);
75 pitch_ = 4;
76 height_ = 4;
77 fwd_txfm_ref = fwht4x4_ref;
78 bit_depth_ = GET_PARAM(3);
79 mask_ = (1 << bit_depth_) - 1;
80 num_coeffs_ = GET_PARAM(4);
81 fwd_txfm_c_ = GET_PARAM(5);
82 }
TearDown()83 virtual void TearDown() {}
84
85 protected:
RunFwdTxfm(const int16_t * in,tran_low_t * out,int stride)86 void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
87 fwd_txfm_(in, out, stride);
88 }
RunInvTxfm(const tran_low_t * out,uint8_t * dst,int stride)89 void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
90 inv_txfm_(out, dst, stride);
91 }
RunSpeedTest()92 void RunSpeedTest() {
93 if (!fwd_txfm_c_) {
94 GTEST_SKIP();
95 } else {
96 ACMRandom rnd(ACMRandom::DeterministicSeed());
97 const int count_test_block = 10;
98 const int numIter = 5000;
99
100 int c_sum_time = 0;
101 int simd_sum_time = 0;
102
103 int stride = 96;
104
105 int16_t *input_block = reinterpret_cast<int16_t *>(
106 aom_memalign(16, sizeof(int16_t) * stride * height_));
107 tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>(
108 aom_memalign(16, sizeof(output_ref_block[0]) * num_coeffs_));
109 tran_low_t *output_block = reinterpret_cast<tran_low_t *>(
110 aom_memalign(16, sizeof(output_block[0]) * num_coeffs_));
111
112 for (int i = 0; i < count_test_block; ++i) {
113 int j, k;
114 for (j = 0; j < height_; ++j) {
115 for (k = 0; k < pitch_; ++k) {
116 int in_idx = j * stride + k;
117 int out_idx = j * pitch_ + k;
118 input_block[in_idx] =
119 (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
120 if (bit_depth_ == AOM_BITS_8) {
121 output_block[out_idx] = output_ref_block[out_idx] = rnd.Rand8();
122 } else {
123 output_block[out_idx] = output_ref_block[out_idx] =
124 rnd.Rand16() & mask_;
125 }
126 }
127 }
128
129 aom_usec_timer c_timer_;
130 aom_usec_timer_start(&c_timer_);
131 for (int i = 0; i < numIter; i++) {
132 API_REGISTER_STATE_CHECK(
133 fwd_txfm_c_(input_block, output_ref_block, stride));
134 }
135 aom_usec_timer_mark(&c_timer_);
136
137 aom_usec_timer simd_timer_;
138 aom_usec_timer_start(&simd_timer_);
139
140 for (int i = 0; i < numIter; i++) {
141 API_REGISTER_STATE_CHECK(
142 fwd_txfm_(input_block, output_block, stride));
143 }
144 aom_usec_timer_mark(&simd_timer_);
145
146 c_sum_time += static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
147 simd_sum_time += static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
148
149 // The minimum quant value is 4.
150 for (j = 0; j < height_; ++j) {
151 for (k = 0; k < pitch_; ++k) {
152 int out_idx = j * pitch_ + k;
153 ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx])
154 << "Error: not bit-exact result at index: " << out_idx
155 << " at test block: " << i;
156 }
157 }
158 }
159
160 printf(
161 "c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
162 simd_sum_time,
163 (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
164
165 aom_free(input_block);
166 aom_free(output_ref_block);
167 aom_free(output_block);
168 }
169 }
170
171 FdctFunc fwd_txfm_;
172 IdctFunc inv_txfm_;
173
174 FdctFunc fwd_txfm_c_; // C version of forward transform for speed test.
175 };
176
TEST_P(Trans4x4WHT,AccuracyCheck)177 TEST_P(Trans4x4WHT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
178
TEST_P(Trans4x4WHT,CoeffCheck)179 TEST_P(Trans4x4WHT, CoeffCheck) { RunCoeffCheck(); }
180
TEST_P(Trans4x4WHT,MemCheck)181 TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); }
182
TEST_P(Trans4x4WHT,InvAccuracyCheck)183 TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
184
TEST_P(Trans4x4WHT,DISABLED_Speed)185 TEST_P(Trans4x4WHT, DISABLED_Speed) { RunSpeedTest(); }
186
187 using std::make_tuple;
188
189 INSTANTIATE_TEST_SUITE_P(
190 C, Trans4x4WHT,
191 ::testing::Values(make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_10_c, DCT_DCT,
192 AOM_BITS_10, 16, static_cast<FdctFunc>(NULL)),
193 make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_12_c, DCT_DCT,
194 AOM_BITS_12, 16,
195 static_cast<FdctFunc>(NULL))));
196
197 #if HAVE_SSE4_1
198
199 INSTANTIATE_TEST_SUITE_P(
200 SSE4_1, Trans4x4WHT,
201 ::testing::Values(make_tuple(&av1_highbd_fwht4x4_sse4_1, &iwht4x4_10_sse4_1,
202 DCT_DCT, AOM_BITS_10, 16,
203 static_cast<FdctFunc>(NULL)),
204 make_tuple(&av1_highbd_fwht4x4_sse4_1, &iwht4x4_12_sse4_1,
205 DCT_DCT, AOM_BITS_12, 16,
206 static_cast<FdctFunc>(NULL))));
207
208 #endif // HAVE_SSE4_1
209
210 #if HAVE_NEON
211
212 INSTANTIATE_TEST_SUITE_P(
213 NEON, Trans4x4WHT,
214 ::testing::Values(make_tuple(&av1_highbd_fwht4x4_neon, &iwht4x4_10_c,
215 DCT_DCT, AOM_BITS_10, 16,
216 &av1_highbd_fwht4x4_c),
217 make_tuple(&av1_highbd_fwht4x4_neon, &iwht4x4_12_c,
218 DCT_DCT, AOM_BITS_12, 16,
219 &av1_highbd_fwht4x4_c)));
220
221 #endif // HAVE_NEON
222
223 } // namespace
224