1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include "test/hiprec_convolve_test_util.h"
13
14 #include "av1/common/restoration.h"
15
16 using ::testing::make_tuple;
17 using ::testing::tuple;
18
19 namespace libaom_test {
20
21 // Generate a random pair of filter kernels, using the ranges
22 // of possible values from the loop-restoration experiment
generate_kernels(ACMRandom * rnd,InterpKernel hkernel,InterpKernel vkernel)23 static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel,
24 InterpKernel vkernel) {
25 hkernel[0] = hkernel[6] =
26 WIENER_FILT_TAP0_MINV +
27 rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
28 hkernel[1] = hkernel[5] =
29 WIENER_FILT_TAP1_MINV +
30 rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
31 hkernel[2] = hkernel[4] =
32 WIENER_FILT_TAP2_MINV +
33 rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
34 hkernel[3] = -(hkernel[0] + hkernel[1] + hkernel[2]);
35 hkernel[7] = 0;
36
37 vkernel[0] = vkernel[6] =
38 WIENER_FILT_TAP0_MINV +
39 rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
40 vkernel[1] = vkernel[5] =
41 WIENER_FILT_TAP1_MINV +
42 rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
43 vkernel[2] = vkernel[4] =
44 WIENER_FILT_TAP2_MINV +
45 rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
46 vkernel[3] = -(vkernel[0] + vkernel[1] + vkernel[2]);
47 vkernel[7] = 0;
48 }
49
50 namespace AV1HiprecConvolve {
51
BuildParams(hiprec_convolve_func filter)52 ::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
53 hiprec_convolve_func filter) {
54 const HiprecConvolveParam params[] = {
55 make_tuple(8, 8, 50000, filter), make_tuple(8, 4, 50000, filter),
56 make_tuple(64, 24, 1000, filter), make_tuple(64, 64, 1000, filter),
57 make_tuple(64, 56, 1000, filter), make_tuple(32, 8, 10000, filter),
58 make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter),
59 make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter),
60 make_tuple(64, 34, 1000, filter), make_tuple(8, 17, 10000, filter),
61 make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter)
62 };
63 return ::testing::ValuesIn(params);
64 }
65
~AV1HiprecConvolveTest()66 AV1HiprecConvolveTest::~AV1HiprecConvolveTest() {}
SetUp()67 void AV1HiprecConvolveTest::SetUp() {
68 rnd_.Reset(ACMRandom::DeterministicSeed());
69 }
70
TearDown()71 void AV1HiprecConvolveTest::TearDown() { libaom_test::ClearSystemState(); }
72
RunCheckOutput(hiprec_convolve_func test_impl)73 void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
74 const int w = 128, h = 128;
75 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
76 const int num_iters = GET_PARAM(2);
77 int i, j;
78 const ConvolveParams conv_params = get_conv_params_wiener(8);
79
80 uint8_t *input_ = new uint8_t[h * w];
81 uint8_t *input = input_;
82
83 // The AVX2 convolve functions always write rows with widths that are
84 // multiples of 16. So to avoid a buffer overflow, we may need to pad
85 // rows to a multiple of 16.
86 int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
87 uint8_t *output = new uint8_t[output_n];
88 uint8_t *output2 = new uint8_t[output_n];
89
90 // Generate random filter kernels
91 DECLARE_ALIGNED(16, InterpKernel, hkernel);
92 DECLARE_ALIGNED(16, InterpKernel, vkernel);
93
94 generate_kernels(&rnd_, hkernel, vkernel);
95
96 for (i = 0; i < h; ++i)
97 for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
98
99 for (i = 0; i < num_iters; ++i) {
100 // Choose random locations within the source block
101 int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
102 int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
103 av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w, output,
104 out_w, hkernel, 16, vkernel, 16, out_w, out_h,
105 &conv_params);
106 test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16,
107 vkernel, 16, out_w, out_h, &conv_params);
108
109 for (j = 0; j < out_w * out_h; ++j)
110 ASSERT_EQ(output[j], output2[j])
111 << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
112 << (j / out_w) << ") on iteration " << i;
113 }
114 delete[] input_;
115 delete[] output;
116 delete[] output2;
117 }
118
RunSpeedTest(hiprec_convolve_func test_impl)119 void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
120 const int w = 128, h = 128;
121 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
122 const int num_iters = GET_PARAM(2) / 500;
123 int i, j, k;
124 const ConvolveParams conv_params = get_conv_params_wiener(8);
125
126 uint8_t *input_ = new uint8_t[h * w];
127 uint8_t *input = input_;
128
129 // The AVX2 convolve functions always write rows with widths that are
130 // multiples of 16. So to avoid a buffer overflow, we may need to pad
131 // rows to a multiple of 16.
132 int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
133 uint8_t *output = new uint8_t[output_n];
134 uint8_t *output2 = new uint8_t[output_n];
135
136 // Generate random filter kernels
137 DECLARE_ALIGNED(16, InterpKernel, hkernel);
138 DECLARE_ALIGNED(16, InterpKernel, vkernel);
139
140 generate_kernels(&rnd_, hkernel, vkernel);
141
142 for (i = 0; i < h; ++i)
143 for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
144
145 aom_usec_timer ref_timer;
146 aom_usec_timer_start(&ref_timer);
147 for (i = 0; i < num_iters; ++i) {
148 for (j = 3; j < h - out_h - 4; j++) {
149 for (k = 3; k < w - out_w - 4; k++) {
150 av1_wiener_convolve_add_src_c(input + j * w + k, w, output, out_w,
151 hkernel, 16, vkernel, 16, out_w, out_h,
152 &conv_params);
153 }
154 }
155 }
156 aom_usec_timer_mark(&ref_timer);
157 const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
158
159 aom_usec_timer tst_timer;
160 aom_usec_timer_start(&tst_timer);
161 for (i = 0; i < num_iters; ++i) {
162 for (j = 3; j < h - out_h - 4; j++) {
163 for (k = 3; k < w - out_w - 4; k++) {
164 test_impl(input + j * w + k, w, output2, out_w, hkernel, 16, vkernel,
165 16, out_w, out_h, &conv_params);
166 }
167 }
168 }
169 aom_usec_timer_mark(&tst_timer);
170 const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
171
172 std::cout << "[ ] C time = " << ref_time / 1000
173 << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
174
175 EXPECT_GT(ref_time, tst_time)
176 << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
177 << "C time: " << ref_time << " us\n"
178 << "SIMD time: " << tst_time << " us\n";
179
180 delete[] input_;
181 delete[] output;
182 delete[] output2;
183 }
184 } // namespace AV1HiprecConvolve
185
186 namespace AV1HighbdHiprecConvolve {
187
BuildParams(highbd_hiprec_convolve_func filter)188 ::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
189 highbd_hiprec_convolve_func filter) {
190 const HighbdHiprecConvolveParam params[] = {
191 make_tuple(8, 8, 50000, 8, filter), make_tuple(64, 64, 1000, 8, filter),
192 make_tuple(32, 8, 10000, 8, filter), make_tuple(8, 8, 50000, 10, filter),
193 make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter),
194 make_tuple(8, 8, 50000, 12, filter), make_tuple(64, 64, 1000, 12, filter),
195 make_tuple(32, 8, 10000, 12, filter),
196 };
197 return ::testing::ValuesIn(params);
198 }
199
~AV1HighbdHiprecConvolveTest()200 AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() {}
SetUp()201 void AV1HighbdHiprecConvolveTest::SetUp() {
202 rnd_.Reset(ACMRandom::DeterministicSeed());
203 }
204
TearDown()205 void AV1HighbdHiprecConvolveTest::TearDown() {
206 libaom_test::ClearSystemState();
207 }
208
RunCheckOutput(highbd_hiprec_convolve_func test_impl)209 void AV1HighbdHiprecConvolveTest::RunCheckOutput(
210 highbd_hiprec_convolve_func test_impl) {
211 const int w = 128, h = 128;
212 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
213 const int num_iters = GET_PARAM(2);
214 const int bd = GET_PARAM(3);
215 int i, j;
216 const ConvolveParams conv_params = get_conv_params_wiener(bd);
217
218 uint16_t *input = new uint16_t[h * w];
219
220 // The AVX2 convolve functions always write rows with widths that are
221 // multiples of 16. So to avoid a buffer overflow, we may need to pad
222 // rows to a multiple of 16.
223 int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
224 uint16_t *output = new uint16_t[output_n];
225 uint16_t *output2 = new uint16_t[output_n];
226
227 // Generate random filter kernels
228 DECLARE_ALIGNED(16, InterpKernel, hkernel);
229 DECLARE_ALIGNED(16, InterpKernel, vkernel);
230
231 generate_kernels(&rnd_, hkernel, vkernel);
232
233 for (i = 0; i < h; ++i)
234 for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
235
236 uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input);
237 uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output);
238 uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2);
239
240 for (i = 0; i < num_iters; ++i) {
241 // Choose random locations within the source block
242 int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
243 int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
244 av1_highbd_wiener_convolve_add_src_c(
245 input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel, 16,
246 vkernel, 16, out_w, out_h, &conv_params, bd);
247 test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
248 hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd);
249
250 for (j = 0; j < out_w * out_h; ++j)
251 ASSERT_EQ(output[j], output2[j])
252 << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
253 << (j / out_w) << ") on iteration " << i;
254 }
255 delete[] input;
256 delete[] output;
257 delete[] output2;
258 }
259
RunSpeedTest(highbd_hiprec_convolve_func test_impl)260 void AV1HighbdHiprecConvolveTest::RunSpeedTest(
261 highbd_hiprec_convolve_func test_impl) {
262 const int w = 128, h = 128;
263 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
264 const int num_iters = GET_PARAM(2) / 500;
265 const int bd = GET_PARAM(3);
266 int i, j, k;
267 const ConvolveParams conv_params = get_conv_params_wiener(bd);
268
269 uint16_t *input = new uint16_t[h * w];
270
271 // The AVX2 convolve functions always write rows with widths that are
272 // multiples of 16. So to avoid a buffer overflow, we may need to pad
273 // rows to a multiple of 16.
274 int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
275 uint16_t *output = new uint16_t[output_n];
276 uint16_t *output2 = new uint16_t[output_n];
277
278 // Generate random filter kernels
279 DECLARE_ALIGNED(16, InterpKernel, hkernel);
280 DECLARE_ALIGNED(16, InterpKernel, vkernel);
281
282 generate_kernels(&rnd_, hkernel, vkernel);
283
284 for (i = 0; i < h; ++i)
285 for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
286
287 uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input);
288 uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output);
289 uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2);
290
291 aom_usec_timer ref_timer;
292 aom_usec_timer_start(&ref_timer);
293 for (i = 0; i < num_iters; ++i) {
294 for (j = 3; j < h - out_h - 4; j++) {
295 for (k = 3; k < w - out_w - 4; k++) {
296 av1_highbd_wiener_convolve_add_src_c(
297 input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel,
298 16, out_w, out_h, &conv_params, bd);
299 }
300 }
301 }
302 aom_usec_timer_mark(&ref_timer);
303 const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
304
305 aom_usec_timer tst_timer;
306 aom_usec_timer_start(&tst_timer);
307 for (i = 0; i < num_iters; ++i) {
308 for (j = 3; j < h - out_h - 4; j++) {
309 for (k = 3; k < w - out_w - 4; k++) {
310 test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16,
311 vkernel, 16, out_w, out_h, &conv_params, bd);
312 }
313 }
314 }
315 aom_usec_timer_mark(&tst_timer);
316 const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
317
318 std::cout << "[ ] C time = " << ref_time / 1000
319 << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
320
321 EXPECT_GT(ref_time, tst_time)
322 << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
323 << "C time: " << ref_time << " us\n"
324 << "SIMD time: " << tst_time << " us\n";
325
326 delete[] input;
327 delete[] output;
328 delete[] output2;
329 }
330 } // namespace AV1HighbdHiprecConvolve
331 } // namespace libaom_test
332