1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <tuple>
16 
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
19 
20 #include "config/av1_rtcd.h"
21 #include "config/aom_dsp_rtcd.h"
22 #include "test/acm_random.h"
23 #include "test/register_state_check.h"
24 #include "test/transform_test_base.h"
25 #include "test/util.h"
26 #include "av1/common/entropy.h"
27 #include "aom/aom_codec.h"
28 #include "aom/aom_integer.h"
29 #include "aom_ports/mem.h"
30 
31 using libaom_test::ACMRandom;
32 
33 namespace {
34 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
35 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
36 
37 using libaom_test::FhtFunc;
38 
39 typedef std::tuple<FdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t, int, FdctFunc>
40     Dct4x4Param;
41 
fwht4x4_ref(const int16_t * in,tran_low_t * out,int stride,TxfmParam *)42 void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
43                  TxfmParam * /*txfm_param*/) {
44   av1_fwht4x4_c(in, out, stride);
45 }
46 
iwht4x4_10_c(const tran_low_t * in,uint8_t * out,int stride)47 void iwht4x4_10_c(const tran_low_t *in, uint8_t *out, int stride) {
48   av1_highbd_iwht4x4_16_add_c(in, out, stride, 10);
49 }
50 
iwht4x4_12_c(const tran_low_t * in,uint8_t * out,int stride)51 void iwht4x4_12_c(const tran_low_t *in, uint8_t *out, int stride) {
52   av1_highbd_iwht4x4_16_add_c(in, out, stride, 12);
53 }
54 
55 #if HAVE_SSE4_1
56 
iwht4x4_10_sse4_1(const tran_low_t * in,uint8_t * out,int stride)57 void iwht4x4_10_sse4_1(const tran_low_t *in, uint8_t *out, int stride) {
58   av1_highbd_iwht4x4_16_add_sse4_1(in, out, stride, 10);
59 }
60 
iwht4x4_12_sse4_1(const tran_low_t * in,uint8_t * out,int stride)61 void iwht4x4_12_sse4_1(const tran_low_t *in, uint8_t *out, int stride) {
62   av1_highbd_iwht4x4_16_add_sse4_1(in, out, stride, 12);
63 }
64 
65 #endif
66 
67 class Trans4x4WHT : public libaom_test::TransformTestBase<tran_low_t>,
68                     public ::testing::TestWithParam<Dct4x4Param> {
69  public:
~Trans4x4WHT()70   virtual ~Trans4x4WHT() {}
71 
SetUp()72   virtual void SetUp() {
73     fwd_txfm_ = GET_PARAM(0);
74     inv_txfm_ = GET_PARAM(1);
75     pitch_ = 4;
76     height_ = 4;
77     fwd_txfm_ref = fwht4x4_ref;
78     bit_depth_ = GET_PARAM(3);
79     mask_ = (1 << bit_depth_) - 1;
80     num_coeffs_ = GET_PARAM(4);
81     fwd_txfm_c_ = GET_PARAM(5);
82   }
TearDown()83   virtual void TearDown() {}
84 
85  protected:
RunFwdTxfm(const int16_t * in,tran_low_t * out,int stride)86   void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
87     fwd_txfm_(in, out, stride);
88   }
RunInvTxfm(const tran_low_t * out,uint8_t * dst,int stride)89   void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
90     inv_txfm_(out, dst, stride);
91   }
RunSpeedTest()92   void RunSpeedTest() {
93     if (!fwd_txfm_c_) {
94       GTEST_SKIP();
95     } else {
96       ACMRandom rnd(ACMRandom::DeterministicSeed());
97       const int count_test_block = 10;
98       const int numIter = 5000;
99 
100       int c_sum_time = 0;
101       int simd_sum_time = 0;
102 
103       int stride = 96;
104 
105       int16_t *input_block = reinterpret_cast<int16_t *>(
106           aom_memalign(16, sizeof(int16_t) * stride * height_));
107       tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>(
108           aom_memalign(16, sizeof(output_ref_block[0]) * num_coeffs_));
109       tran_low_t *output_block = reinterpret_cast<tran_low_t *>(
110           aom_memalign(16, sizeof(output_block[0]) * num_coeffs_));
111 
112       for (int i = 0; i < count_test_block; ++i) {
113         int j, k;
114         for (j = 0; j < height_; ++j) {
115           for (k = 0; k < pitch_; ++k) {
116             int in_idx = j * stride + k;
117             int out_idx = j * pitch_ + k;
118             input_block[in_idx] =
119                 (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
120             if (bit_depth_ == AOM_BITS_8) {
121               output_block[out_idx] = output_ref_block[out_idx] = rnd.Rand8();
122             } else {
123               output_block[out_idx] = output_ref_block[out_idx] =
124                   rnd.Rand16() & mask_;
125             }
126           }
127         }
128 
129         aom_usec_timer c_timer_;
130         aom_usec_timer_start(&c_timer_);
131         for (int i = 0; i < numIter; i++) {
132           API_REGISTER_STATE_CHECK(
133               fwd_txfm_c_(input_block, output_ref_block, stride));
134         }
135         aom_usec_timer_mark(&c_timer_);
136 
137         aom_usec_timer simd_timer_;
138         aom_usec_timer_start(&simd_timer_);
139 
140         for (int i = 0; i < numIter; i++) {
141           API_REGISTER_STATE_CHECK(
142               fwd_txfm_(input_block, output_block, stride));
143         }
144         aom_usec_timer_mark(&simd_timer_);
145 
146         c_sum_time += static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
147         simd_sum_time += static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
148 
149         // The minimum quant value is 4.
150         for (j = 0; j < height_; ++j) {
151           for (k = 0; k < pitch_; ++k) {
152             int out_idx = j * pitch_ + k;
153             ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx])
154                 << "Error: not bit-exact result at index: " << out_idx
155                 << " at test block: " << i;
156           }
157         }
158       }
159 
160       printf(
161           "c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
162           simd_sum_time,
163           (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
164 
165       aom_free(input_block);
166       aom_free(output_ref_block);
167       aom_free(output_block);
168     }
169   }
170 
171   FdctFunc fwd_txfm_;
172   IdctFunc inv_txfm_;
173 
174   FdctFunc fwd_txfm_c_;  // C version of forward transform for speed test.
175 };
176 
TEST_P(Trans4x4WHT,AccuracyCheck)177 TEST_P(Trans4x4WHT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
178 
TEST_P(Trans4x4WHT,CoeffCheck)179 TEST_P(Trans4x4WHT, CoeffCheck) { RunCoeffCheck(); }
180 
TEST_P(Trans4x4WHT,MemCheck)181 TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); }
182 
TEST_P(Trans4x4WHT,InvAccuracyCheck)183 TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
184 
TEST_P(Trans4x4WHT,DISABLED_Speed)185 TEST_P(Trans4x4WHT, DISABLED_Speed) { RunSpeedTest(); }
186 
187 using std::make_tuple;
188 
189 INSTANTIATE_TEST_SUITE_P(
190     C, Trans4x4WHT,
191     ::testing::Values(make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_10_c, DCT_DCT,
192                                  AOM_BITS_10, 16, static_cast<FdctFunc>(NULL)),
193                       make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_12_c, DCT_DCT,
194                                  AOM_BITS_12, 16,
195                                  static_cast<FdctFunc>(NULL))));
196 
197 #if HAVE_SSE4_1
198 
199 INSTANTIATE_TEST_SUITE_P(
200     SSE4_1, Trans4x4WHT,
201     ::testing::Values(make_tuple(&av1_highbd_fwht4x4_sse4_1, &iwht4x4_10_sse4_1,
202                                  DCT_DCT, AOM_BITS_10, 16,
203                                  static_cast<FdctFunc>(NULL)),
204                       make_tuple(&av1_highbd_fwht4x4_sse4_1, &iwht4x4_12_sse4_1,
205                                  DCT_DCT, AOM_BITS_12, 16,
206                                  static_cast<FdctFunc>(NULL))));
207 
208 #endif  // HAVE_SSE4_1
209 
210 #if HAVE_NEON
211 
212 INSTANTIATE_TEST_SUITE_P(
213     NEON, Trans4x4WHT,
214     ::testing::Values(make_tuple(&av1_highbd_fwht4x4_neon, &iwht4x4_10_c,
215                                  DCT_DCT, AOM_BITS_10, 16,
216                                  &av1_highbd_fwht4x4_c),
217                       make_tuple(&av1_highbd_fwht4x4_neon, &iwht4x4_12_c,
218                                  DCT_DCT, AOM_BITS_12, 16,
219                                  &av1_highbd_fwht4x4_c)));
220 
221 #endif  // HAVE_NEON
222 
223 }  // namespace
224