1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include "test/hiprec_convolve_test_util.h"
13 
14 #include "av1/common/restoration.h"
15 
16 using ::testing::make_tuple;
17 using ::testing::tuple;
18 
19 namespace libaom_test {
20 
21 // Generate a random pair of filter kernels, using the ranges
22 // of possible values from the loop-restoration experiment
generate_kernels(ACMRandom * rnd,InterpKernel hkernel,InterpKernel vkernel)23 static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel,
24                              InterpKernel vkernel) {
25   hkernel[0] = hkernel[6] =
26       WIENER_FILT_TAP0_MINV +
27       rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
28   hkernel[1] = hkernel[5] =
29       WIENER_FILT_TAP1_MINV +
30       rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
31   hkernel[2] = hkernel[4] =
32       WIENER_FILT_TAP2_MINV +
33       rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
34   hkernel[3] = -(hkernel[0] + hkernel[1] + hkernel[2]);
35   hkernel[7] = 0;
36 
37   vkernel[0] = vkernel[6] =
38       WIENER_FILT_TAP0_MINV +
39       rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
40   vkernel[1] = vkernel[5] =
41       WIENER_FILT_TAP1_MINV +
42       rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
43   vkernel[2] = vkernel[4] =
44       WIENER_FILT_TAP2_MINV +
45       rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
46   vkernel[3] = -(vkernel[0] + vkernel[1] + vkernel[2]);
47   vkernel[7] = 0;
48 }
49 
50 namespace AV1HiprecConvolve {
51 
BuildParams(hiprec_convolve_func filter)52 ::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
53     hiprec_convolve_func filter) {
54   const HiprecConvolveParam params[] = {
55     make_tuple(8, 8, 50000, filter),   make_tuple(8, 4, 50000, filter),
56     make_tuple(64, 24, 1000, filter),  make_tuple(64, 64, 1000, filter),
57     make_tuple(64, 56, 1000, filter),  make_tuple(32, 8, 10000, filter),
58     make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter),
59     make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter),
60     make_tuple(64, 34, 1000, filter),  make_tuple(8, 17, 10000, filter),
61     make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter)
62   };
63   return ::testing::ValuesIn(params);
64 }
65 
~AV1HiprecConvolveTest()66 AV1HiprecConvolveTest::~AV1HiprecConvolveTest() {}
SetUp()67 void AV1HiprecConvolveTest::SetUp() {
68   rnd_.Reset(ACMRandom::DeterministicSeed());
69 }
70 
TearDown()71 void AV1HiprecConvolveTest::TearDown() { libaom_test::ClearSystemState(); }
72 
RunCheckOutput(hiprec_convolve_func test_impl)73 void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
74   const int w = 128, h = 128;
75   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
76   const int num_iters = GET_PARAM(2);
77   int i, j;
78   const ConvolveParams conv_params = get_conv_params_wiener(8);
79 
80   uint8_t *input_ = new uint8_t[h * w];
81   uint8_t *input = input_;
82 
83   // The AVX2 convolve functions always write rows with widths that are
84   // multiples of 16. So to avoid a buffer overflow, we may need to pad
85   // rows to a multiple of 16.
86   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
87   uint8_t *output = new uint8_t[output_n];
88   uint8_t *output2 = new uint8_t[output_n];
89 
90   // Generate random filter kernels
91   DECLARE_ALIGNED(16, InterpKernel, hkernel);
92   DECLARE_ALIGNED(16, InterpKernel, vkernel);
93 
94   generate_kernels(&rnd_, hkernel, vkernel);
95 
96   for (i = 0; i < h; ++i)
97     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
98 
99   for (i = 0; i < num_iters; ++i) {
100     // Choose random locations within the source block
101     int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
102     int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
103     av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w, output,
104                                   out_w, hkernel, 16, vkernel, 16, out_w, out_h,
105                                   &conv_params);
106     test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16,
107               vkernel, 16, out_w, out_h, &conv_params);
108 
109     for (j = 0; j < out_w * out_h; ++j)
110       ASSERT_EQ(output[j], output2[j])
111           << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
112           << (j / out_w) << ") on iteration " << i;
113   }
114   delete[] input_;
115   delete[] output;
116   delete[] output2;
117 }
118 
RunSpeedTest(hiprec_convolve_func test_impl)119 void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
120   const int w = 128, h = 128;
121   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
122   const int num_iters = GET_PARAM(2) / 500;
123   int i, j, k;
124   const ConvolveParams conv_params = get_conv_params_wiener(8);
125 
126   uint8_t *input_ = new uint8_t[h * w];
127   uint8_t *input = input_;
128 
129   // The AVX2 convolve functions always write rows with widths that are
130   // multiples of 16. So to avoid a buffer overflow, we may need to pad
131   // rows to a multiple of 16.
132   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
133   uint8_t *output = new uint8_t[output_n];
134   uint8_t *output2 = new uint8_t[output_n];
135 
136   // Generate random filter kernels
137   DECLARE_ALIGNED(16, InterpKernel, hkernel);
138   DECLARE_ALIGNED(16, InterpKernel, vkernel);
139 
140   generate_kernels(&rnd_, hkernel, vkernel);
141 
142   for (i = 0; i < h; ++i)
143     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
144 
145   aom_usec_timer ref_timer;
146   aom_usec_timer_start(&ref_timer);
147   for (i = 0; i < num_iters; ++i) {
148     for (j = 3; j < h - out_h - 4; j++) {
149       for (k = 3; k < w - out_w - 4; k++) {
150         av1_wiener_convolve_add_src_c(input + j * w + k, w, output, out_w,
151                                       hkernel, 16, vkernel, 16, out_w, out_h,
152                                       &conv_params);
153       }
154     }
155   }
156   aom_usec_timer_mark(&ref_timer);
157   const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
158 
159   aom_usec_timer tst_timer;
160   aom_usec_timer_start(&tst_timer);
161   for (i = 0; i < num_iters; ++i) {
162     for (j = 3; j < h - out_h - 4; j++) {
163       for (k = 3; k < w - out_w - 4; k++) {
164         test_impl(input + j * w + k, w, output2, out_w, hkernel, 16, vkernel,
165                   16, out_w, out_h, &conv_params);
166       }
167     }
168   }
169   aom_usec_timer_mark(&tst_timer);
170   const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
171 
172   std::cout << "[          ] C time = " << ref_time / 1000
173             << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
174 
175   EXPECT_GT(ref_time, tst_time)
176       << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
177       << "C time: " << ref_time << " us\n"
178       << "SIMD time: " << tst_time << " us\n";
179 
180   delete[] input_;
181   delete[] output;
182   delete[] output2;
183 }
184 }  // namespace AV1HiprecConvolve
185 
186 namespace AV1HighbdHiprecConvolve {
187 
BuildParams(highbd_hiprec_convolve_func filter)188 ::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
189     highbd_hiprec_convolve_func filter) {
190   const HighbdHiprecConvolveParam params[] = {
191     make_tuple(8, 8, 50000, 8, filter),   make_tuple(64, 64, 1000, 8, filter),
192     make_tuple(32, 8, 10000, 8, filter),  make_tuple(8, 8, 50000, 10, filter),
193     make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter),
194     make_tuple(8, 8, 50000, 12, filter),  make_tuple(64, 64, 1000, 12, filter),
195     make_tuple(32, 8, 10000, 12, filter),
196   };
197   return ::testing::ValuesIn(params);
198 }
199 
~AV1HighbdHiprecConvolveTest()200 AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() {}
SetUp()201 void AV1HighbdHiprecConvolveTest::SetUp() {
202   rnd_.Reset(ACMRandom::DeterministicSeed());
203 }
204 
TearDown()205 void AV1HighbdHiprecConvolveTest::TearDown() {
206   libaom_test::ClearSystemState();
207 }
208 
RunCheckOutput(highbd_hiprec_convolve_func test_impl)209 void AV1HighbdHiprecConvolveTest::RunCheckOutput(
210     highbd_hiprec_convolve_func test_impl) {
211   const int w = 128, h = 128;
212   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
213   const int num_iters = GET_PARAM(2);
214   const int bd = GET_PARAM(3);
215   int i, j;
216   const ConvolveParams conv_params = get_conv_params_wiener(bd);
217 
218   uint16_t *input = new uint16_t[h * w];
219 
220   // The AVX2 convolve functions always write rows with widths that are
221   // multiples of 16. So to avoid a buffer overflow, we may need to pad
222   // rows to a multiple of 16.
223   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
224   uint16_t *output = new uint16_t[output_n];
225   uint16_t *output2 = new uint16_t[output_n];
226 
227   // Generate random filter kernels
228   DECLARE_ALIGNED(16, InterpKernel, hkernel);
229   DECLARE_ALIGNED(16, InterpKernel, vkernel);
230 
231   generate_kernels(&rnd_, hkernel, vkernel);
232 
233   for (i = 0; i < h; ++i)
234     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
235 
236   uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input);
237   uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output);
238   uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2);
239 
240   for (i = 0; i < num_iters; ++i) {
241     // Choose random locations within the source block
242     int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
243     int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
244     av1_highbd_wiener_convolve_add_src_c(
245         input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel, 16,
246         vkernel, 16, out_w, out_h, &conv_params, bd);
247     test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
248               hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd);
249 
250     for (j = 0; j < out_w * out_h; ++j)
251       ASSERT_EQ(output[j], output2[j])
252           << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
253           << (j / out_w) << ") on iteration " << i;
254   }
255   delete[] input;
256   delete[] output;
257   delete[] output2;
258 }
259 
RunSpeedTest(highbd_hiprec_convolve_func test_impl)260 void AV1HighbdHiprecConvolveTest::RunSpeedTest(
261     highbd_hiprec_convolve_func test_impl) {
262   const int w = 128, h = 128;
263   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
264   const int num_iters = GET_PARAM(2) / 500;
265   const int bd = GET_PARAM(3);
266   int i, j, k;
267   const ConvolveParams conv_params = get_conv_params_wiener(bd);
268 
269   uint16_t *input = new uint16_t[h * w];
270 
271   // The AVX2 convolve functions always write rows with widths that are
272   // multiples of 16. So to avoid a buffer overflow, we may need to pad
273   // rows to a multiple of 16.
274   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
275   uint16_t *output = new uint16_t[output_n];
276   uint16_t *output2 = new uint16_t[output_n];
277 
278   // Generate random filter kernels
279   DECLARE_ALIGNED(16, InterpKernel, hkernel);
280   DECLARE_ALIGNED(16, InterpKernel, vkernel);
281 
282   generate_kernels(&rnd_, hkernel, vkernel);
283 
284   for (i = 0; i < h; ++i)
285     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
286 
287   uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input);
288   uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output);
289   uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2);
290 
291   aom_usec_timer ref_timer;
292   aom_usec_timer_start(&ref_timer);
293   for (i = 0; i < num_iters; ++i) {
294     for (j = 3; j < h - out_h - 4; j++) {
295       for (k = 3; k < w - out_w - 4; k++) {
296         av1_highbd_wiener_convolve_add_src_c(
297             input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel,
298             16, out_w, out_h, &conv_params, bd);
299       }
300     }
301   }
302   aom_usec_timer_mark(&ref_timer);
303   const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
304 
305   aom_usec_timer tst_timer;
306   aom_usec_timer_start(&tst_timer);
307   for (i = 0; i < num_iters; ++i) {
308     for (j = 3; j < h - out_h - 4; j++) {
309       for (k = 3; k < w - out_w - 4; k++) {
310         test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16,
311                   vkernel, 16, out_w, out_h, &conv_params, bd);
312       }
313     }
314   }
315   aom_usec_timer_mark(&tst_timer);
316   const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
317 
318   std::cout << "[          ] C time = " << ref_time / 1000
319             << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
320 
321   EXPECT_GT(ref_time, tst_time)
322       << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
323       << "C time: " << ref_time << " us\n"
324       << "SIMD time: " << tst_time << " us\n";
325 
326   delete[] input;
327   delete[] output;
328   delete[] output2;
329 }
330 }  // namespace AV1HighbdHiprecConvolve
331 }  // namespace libaom_test
332