1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <ctime>
13 
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
15 
16 #include "config/av1_rtcd.h"
17 
18 #include "test/acm_random.h"
19 #include "test/clear_system_state.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
22 
23 #include "aom_ports/aom_timer.h"
24 #include "av1/common/mv.h"
25 #include "av1/common/restoration.h"
26 
27 namespace {
28 
29 using ::testing::make_tuple;
30 using ::testing::tuple;
31 using libaom_test::ACMRandom;
32 
33 typedef void (*SgrFunc)(const uint8_t *dat8, int width, int height, int stride,
34                         int eps, const int *xqd, uint8_t *dst8, int dst_stride,
35                         int32_t *tmpbuf, int bit_depth, int highbd);
36 
37 // Test parameter list:
38 //  <tst_fun_>
39 typedef tuple<SgrFunc> FilterTestParam;
40 
41 class AV1SelfguidedFilterTest
42     : public ::testing::TestWithParam<FilterTestParam> {
43  public:
~AV1SelfguidedFilterTest()44   virtual ~AV1SelfguidedFilterTest() {}
SetUp()45   virtual void SetUp() {}
46 
TearDown()47   virtual void TearDown() { libaom_test::ClearSystemState(); }
48 
49  protected:
RunSpeedTest()50   void RunSpeedTest() {
51     tst_fun_ = GET_PARAM(0);
52     const int pu_width = RESTORATION_PROC_UNIT_SIZE;
53     const int pu_height = RESTORATION_PROC_UNIT_SIZE;
54     const int width = 256, height = 256, stride = 288, out_stride = 288;
55     const int NUM_ITERS = 2000;
56     int i, j, k;
57 
58     uint8_t *input_ =
59         (uint8_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint8_t));
60     uint8_t *output_ = (uint8_t *)aom_memalign(
61         32, out_stride * (height + 32) * sizeof(uint8_t));
62     int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
63     uint8_t *input = input_ + stride * 16 + 16;
64     uint8_t *output = output_ + out_stride * 16 + 16;
65 
66     ACMRandom rnd(ACMRandom::DeterministicSeed());
67 
68     for (i = -16; i < height + 16; ++i)
69       for (j = -16; j < width + 16; ++j)
70         input[i * stride + j] = rnd.Rand16() & 0xFF;
71 
72     int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
73                                                         SGRPROJ_PRJ_MIN0),
74                    SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
75                                                         SGRPROJ_PRJ_MIN1) };
76     // Fix a parameter set, since the speed depends slightly on r.
77     // Change this to test different combinations of values of r.
78     int eps = 15;
79 
80     av1_loop_restoration_precal();
81 
82     aom_usec_timer ref_timer;
83     aom_usec_timer_start(&ref_timer);
84     for (i = 0; i < NUM_ITERS; ++i) {
85       for (k = 0; k < height; k += pu_height)
86         for (j = 0; j < width; j += pu_width) {
87           int w = AOMMIN(pu_width, width - j);
88           int h = AOMMIN(pu_height, height - k);
89           uint8_t *input_p = input + k * stride + j;
90           uint8_t *output_p = output + k * out_stride + j;
91           apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd,
92                                          output_p, out_stride, tmpbuf, 8, 0);
93         }
94     }
95     aom_usec_timer_mark(&ref_timer);
96     const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
97 
98     aom_usec_timer tst_timer;
99     aom_usec_timer_start(&tst_timer);
100     for (i = 0; i < NUM_ITERS; ++i) {
101       for (k = 0; k < height; k += pu_height)
102         for (j = 0; j < width; j += pu_width) {
103           int w = AOMMIN(pu_width, width - j);
104           int h = AOMMIN(pu_height, height - k);
105           uint8_t *input_p = input + k * stride + j;
106           uint8_t *output_p = output + k * out_stride + j;
107           tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride,
108                    tmpbuf, 8, 0);
109         }
110     }
111     aom_usec_timer_mark(&tst_timer);
112     const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
113 
114     std::cout << "[          ] C time = " << ref_time / 1000
115               << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
116 
117     EXPECT_GT(ref_time, tst_time)
118         << "Error: AV1SelfguidedFilterTest.SpeedTest, SIMD slower than C.\n"
119         << "C time: " << ref_time << " us\n"
120         << "SIMD time: " << tst_time << " us\n";
121 
122     aom_free(input_);
123     aom_free(output_);
124     aom_free(tmpbuf);
125   }
126 
RunCorrectnessTest()127   void RunCorrectnessTest() {
128     tst_fun_ = GET_PARAM(0);
129     const int pu_width = RESTORATION_PROC_UNIT_SIZE;
130     const int pu_height = RESTORATION_PROC_UNIT_SIZE;
131     // Set the maximum width/height to test here. We actually test a small
132     // range of sizes *up to* this size, so that we can check, eg.,
133     // the behaviour on tiles which are not a multiple of 4 wide.
134     const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
135     const int NUM_ITERS = 81;
136     int i, j, k;
137 
138     uint8_t *input_ =
139         (uint8_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint8_t));
140     uint8_t *output_ = (uint8_t *)aom_memalign(
141         32, out_stride * (max_h + 32) * sizeof(uint8_t));
142     uint8_t *output2_ = (uint8_t *)aom_memalign(
143         32, out_stride * (max_h + 32) * sizeof(uint8_t));
144     int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
145 
146     uint8_t *input = input_ + stride * 16 + 16;
147     uint8_t *output = output_ + out_stride * 16 + 16;
148     uint8_t *output2 = output2_ + out_stride * 16 + 16;
149 
150     ACMRandom rnd(ACMRandom::DeterministicSeed());
151 
152     av1_loop_restoration_precal();
153 
154     for (i = 0; i < NUM_ITERS; ++i) {
155       for (j = -16; j < max_h + 16; ++j)
156         for (k = -16; k < max_w + 16; ++k)
157           input[j * stride + k] = rnd.Rand16() & 0xFF;
158 
159       int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
160                                                           SGRPROJ_PRJ_MIN0),
161                      SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
162                                                           SGRPROJ_PRJ_MIN1) };
163       int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
164 
165       // Test various tile sizes around 256x256
166       int test_w = max_w - (i / 9);
167       int test_h = max_h - (i % 9);
168 
169       for (k = 0; k < test_h; k += pu_height)
170         for (j = 0; j < test_w; j += pu_width) {
171           int w = AOMMIN(pu_width, test_w - j);
172           int h = AOMMIN(pu_height, test_h - k);
173           uint8_t *input_p = input + k * stride + j;
174           uint8_t *output_p = output + k * out_stride + j;
175           uint8_t *output2_p = output2 + k * out_stride + j;
176           tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride,
177                    tmpbuf, 8, 0);
178           apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd,
179                                          output2_p, out_stride, tmpbuf, 8, 0);
180         }
181 
182       for (j = 0; j < test_h; ++j)
183         for (k = 0; k < test_w; ++k) {
184           ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
185         }
186     }
187 
188     aom_free(input_);
189     aom_free(output_);
190     aom_free(output2_);
191     aom_free(tmpbuf);
192   }
193 
194  private:
195   SgrFunc tst_fun_;
196 };
197 
TEST_P(AV1SelfguidedFilterTest,DISABLED_SpeedTest)198 TEST_P(AV1SelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); }
TEST_P(AV1SelfguidedFilterTest,CorrectnessTest)199 TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
200 
201 #if HAVE_SSE4_1
202 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1SelfguidedFilterTest,
203                         ::testing::Values(apply_selfguided_restoration_sse4_1));
204 #endif
205 
206 #if HAVE_AVX2
207 INSTANTIATE_TEST_CASE_P(AVX2, AV1SelfguidedFilterTest,
208                         ::testing::Values(apply_selfguided_restoration_avx2));
209 #endif
210 
211 #if HAVE_NEON
212 INSTANTIATE_TEST_CASE_P(NEON, AV1SelfguidedFilterTest,
213                         ::testing::Values(apply_selfguided_restoration_neon));
214 #endif
215 
216 // Test parameter list:
217 //  <tst_fun_, bit_depth>
218 typedef tuple<SgrFunc, int> HighbdFilterTestParam;
219 
220 class AV1HighbdSelfguidedFilterTest
221     : public ::testing::TestWithParam<HighbdFilterTestParam> {
222  public:
~AV1HighbdSelfguidedFilterTest()223   virtual ~AV1HighbdSelfguidedFilterTest() {}
SetUp()224   virtual void SetUp() {}
225 
TearDown()226   virtual void TearDown() { libaom_test::ClearSystemState(); }
227 
228  protected:
RunSpeedTest()229   void RunSpeedTest() {
230     tst_fun_ = GET_PARAM(0);
231     const int pu_width = RESTORATION_PROC_UNIT_SIZE;
232     const int pu_height = RESTORATION_PROC_UNIT_SIZE;
233     const int width = 256, height = 256, stride = 288, out_stride = 288;
234     const int NUM_ITERS = 2000;
235     int i, j, k;
236     int bit_depth = GET_PARAM(1);
237     int mask = (1 << bit_depth) - 1;
238 
239     uint16_t *input_ =
240         (uint16_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint16_t));
241     uint16_t *output_ = (uint16_t *)aom_memalign(
242         32, out_stride * (height + 32) * sizeof(uint16_t));
243     int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
244     uint16_t *input = input_ + stride * 16 + 16;
245     uint16_t *output = output_ + out_stride * 16 + 16;
246 
247     ACMRandom rnd(ACMRandom::DeterministicSeed());
248 
249     for (i = -16; i < height + 16; ++i)
250       for (j = -16; j < width + 16; ++j)
251         input[i * stride + j] = rnd.Rand16() & mask;
252 
253     int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
254                                                         SGRPROJ_PRJ_MIN0),
255                    SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
256                                                         SGRPROJ_PRJ_MIN1) };
257     // Fix a parameter set, since the speed depends slightly on r.
258     // Change this to test different combinations of values of r.
259     int eps = 15;
260 
261     av1_loop_restoration_precal();
262 
263     aom_usec_timer ref_timer;
264     aom_usec_timer_start(&ref_timer);
265     for (i = 0; i < NUM_ITERS; ++i) {
266       for (k = 0; k < height; k += pu_height)
267         for (j = 0; j < width; j += pu_width) {
268           int w = AOMMIN(pu_width, width - j);
269           int h = AOMMIN(pu_height, height - k);
270           uint16_t *input_p = input + k * stride + j;
271           uint16_t *output_p = output + k * out_stride + j;
272           apply_selfguided_restoration_c(
273               CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
274               CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, 1);
275         }
276     }
277     aom_usec_timer_mark(&ref_timer);
278     const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
279 
280     aom_usec_timer tst_timer;
281     aom_usec_timer_start(&tst_timer);
282     for (i = 0; i < NUM_ITERS; ++i) {
283       for (k = 0; k < height; k += pu_height)
284         for (j = 0; j < width; j += pu_width) {
285           int w = AOMMIN(pu_width, width - j);
286           int h = AOMMIN(pu_height, height - k);
287           uint16_t *input_p = input + k * stride + j;
288           uint16_t *output_p = output + k * out_stride + j;
289           tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
290                    CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth,
291                    1);
292         }
293     }
294     aom_usec_timer_mark(&tst_timer);
295     const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
296 
297     std::cout << "[          ] C time = " << ref_time / 1000
298               << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
299 
300     EXPECT_GT(ref_time, tst_time)
301         << "Error: AV1HighbdSelfguidedFilterTest.SpeedTest, SIMD slower than "
302            "C.\n"
303         << "C time: " << ref_time << " us\n"
304         << "SIMD time: " << tst_time << " us\n";
305 
306     aom_free(input_);
307     aom_free(output_);
308     aom_free(tmpbuf);
309   }
310 
RunCorrectnessTest()311   void RunCorrectnessTest() {
312     tst_fun_ = GET_PARAM(0);
313     const int pu_width = RESTORATION_PROC_UNIT_SIZE;
314     const int pu_height = RESTORATION_PROC_UNIT_SIZE;
315     // Set the maximum width/height to test here. We actually test a small
316     // range of sizes *up to* this size, so that we can check, eg.,
317     // the behaviour on tiles which are not a multiple of 4 wide.
318     const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
319     const int NUM_ITERS = 81;
320     int i, j, k;
321     int bit_depth = GET_PARAM(1);
322     int mask = (1 << bit_depth) - 1;
323 
324     uint16_t *input_ =
325         (uint16_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint16_t));
326     uint16_t *output_ = (uint16_t *)aom_memalign(
327         32, out_stride * (max_h + 32) * sizeof(uint16_t));
328     uint16_t *output2_ = (uint16_t *)aom_memalign(
329         32, out_stride * (max_h + 32) * sizeof(uint16_t));
330     int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
331 
332     uint16_t *input = input_ + stride * 16 + 16;
333     uint16_t *output = output_ + out_stride * 16 + 16;
334     uint16_t *output2 = output2_ + out_stride * 16 + 16;
335 
336     ACMRandom rnd(ACMRandom::DeterministicSeed());
337 
338     av1_loop_restoration_precal();
339 
340     for (i = 0; i < NUM_ITERS; ++i) {
341       for (j = -16; j < max_h + 16; ++j)
342         for (k = -16; k < max_w + 16; ++k)
343           input[j * stride + k] = rnd.Rand16() & mask;
344 
345       int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
346                                                           SGRPROJ_PRJ_MIN0),
347                      SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
348                                                           SGRPROJ_PRJ_MIN1) };
349       int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
350 
351       // Test various tile sizes around 256x256
352       int test_w = max_w - (i / 9);
353       int test_h = max_h - (i % 9);
354 
355       for (k = 0; k < test_h; k += pu_height)
356         for (j = 0; j < test_w; j += pu_width) {
357           int w = AOMMIN(pu_width, test_w - j);
358           int h = AOMMIN(pu_height, test_h - k);
359           uint16_t *input_p = input + k * stride + j;
360           uint16_t *output_p = output + k * out_stride + j;
361           uint16_t *output2_p = output2 + k * out_stride + j;
362           tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
363                    CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth,
364                    1);
365           apply_selfguided_restoration_c(
366               CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
367               CONVERT_TO_BYTEPTR(output2_p), out_stride, tmpbuf, bit_depth, 1);
368         }
369 
370       for (j = 0; j < test_h; ++j)
371         for (k = 0; k < test_w; ++k)
372           ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
373     }
374 
375     aom_free(input_);
376     aom_free(output_);
377     aom_free(output2_);
378     aom_free(tmpbuf);
379   }
380 
381  private:
382   SgrFunc tst_fun_;
383 };
384 
TEST_P(AV1HighbdSelfguidedFilterTest,DISABLED_SpeedTest)385 TEST_P(AV1HighbdSelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); }
TEST_P(AV1HighbdSelfguidedFilterTest,CorrectnessTest)386 TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
387 
388 #if HAVE_SSE4_1
389 const int highbd_params_sse4_1[] = { 8, 10, 12 };
390 INSTANTIATE_TEST_CASE_P(
391     SSE4_1, AV1HighbdSelfguidedFilterTest,
392     ::testing::Combine(::testing::Values(apply_selfguided_restoration_sse4_1),
393                        ::testing::ValuesIn(highbd_params_sse4_1)));
394 #endif
395 
396 #if HAVE_AVX2
397 const int highbd_params_avx2[] = { 8, 10, 12 };
398 INSTANTIATE_TEST_CASE_P(
399     AVX2, AV1HighbdSelfguidedFilterTest,
400     ::testing::Combine(::testing::Values(apply_selfguided_restoration_avx2),
401                        ::testing::ValuesIn(highbd_params_avx2)));
402 #endif
403 #if HAVE_NEON
404 const int highbd_params_neon[] = { 8, 10, 12 };
405 INSTANTIATE_TEST_CASE_P(
406     NEON, AV1HighbdSelfguidedFilterTest,
407     ::testing::Combine(::testing::Values(apply_selfguided_restoration_neon),
408                        ::testing::ValuesIn(highbd_params_neon)));
409 #endif
410 }  // namespace
411