1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <string.h>
13
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
15
16 #include "./aom_config.h"
17 #include "./aom_dsp_rtcd.h"
18 #include "test/acm_random.h"
19 #include "test/clear_system_state.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/aom_filter.h"
24 #include "aom_mem/aom_mem.h"
25 #include "aom_ports/mem.h"
26 #include "aom_ports/aom_timer.h"
27 #include "av1/common/filter.h"
28
29 namespace {
30
31 static const unsigned int kMaxDimension = MAX_SB_SIZE;
32
33 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
34 uint8_t *dst, ptrdiff_t dst_stride,
35 const int16_t *filter_x, int filter_x_stride,
36 const int16_t *filter_y, int filter_y_stride,
37 int w, int h);
38
39 struct ConvolveFunctions {
ConvolveFunctions__anon4dddae770111::ConvolveFunctions40 ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8,
41 ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg,
42 ConvolveFunc hv8, ConvolveFunc hv8_avg, ConvolveFunc sh8,
43 ConvolveFunc sh8_avg, ConvolveFunc sv8,
44 ConvolveFunc sv8_avg, ConvolveFunc shv8,
45 ConvolveFunc shv8_avg, int bd)
46 : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
47 v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8),
48 sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg),
49 use_highbd_(bd) {}
50
51 ConvolveFunc copy_;
52 ConvolveFunc avg_;
53 ConvolveFunc h8_;
54 ConvolveFunc v8_;
55 ConvolveFunc hv8_;
56 ConvolveFunc h8_avg_;
57 ConvolveFunc v8_avg_;
58 ConvolveFunc hv8_avg_;
59 ConvolveFunc sh8_; // scaled horiz
60 ConvolveFunc sv8_; // scaled vert
61 ConvolveFunc shv8_; // scaled horiz/vert
62 ConvolveFunc sh8_avg_; // scaled avg horiz
63 ConvolveFunc sv8_avg_; // scaled avg vert
64 ConvolveFunc shv8_avg_; // scaled avg horiz/vert
65 int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth.
66 };
67
68 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
69
70 #define ALL_SIZES_64(convolve_fn) \
71 make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn), \
72 make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn), \
73 make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn), \
74 make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
75 make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
76 make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
77 make_tuple(64, 64, &convolve_fn)
78
79 #if CONFIG_AV1 && CONFIG_EXT_PARTITION
80 #define ALL_SIZES(convolve_fn) \
81 make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \
82 make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn)
83 #else
84 #define ALL_SIZES ALL_SIZES_64
85 #endif // CONFIG_AV1 && CONFIG_EXT_PARTITION
86
87 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
88 #define AV1_FILTER_WEIGHT 128
89 #define AV1_FILTER_SHIFT 7
clip_pixel(int x)90 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
91
filter_block2d_8_c(const uint8_t * src_ptr,unsigned int src_stride,const int16_t * HFilter,const int16_t * VFilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height)92 void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride,
93 const int16_t *HFilter, const int16_t *VFilter,
94 uint8_t *dst_ptr, unsigned int dst_stride,
95 unsigned int output_width, unsigned int output_height) {
96 // Between passes, we use an intermediate buffer whose height is extended to
97 // have enough horizontally filtered values as input for the vertical pass.
98 // This buffer is allocated to be big enough for the largest block type we
99 // support.
100 const int kInterp_Extend = 4;
101 const unsigned int intermediate_height =
102 (kInterp_Extend - 1) + output_height + kInterp_Extend;
103 unsigned int i, j;
104
105 assert(intermediate_height > 7);
106
107 // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
108 // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
109 // + kInterp_Extend
110 // = 3 + 16 + 4
111 // = 23
112 // and filter_max_width = 16
113 //
114 uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension];
115 const int intermediate_next_stride =
116 1 - static_cast<int>(intermediate_height * output_width);
117
118 // Horizontal pass (src -> transposed intermediate).
119 uint8_t *output_ptr = intermediate_buffer;
120 const int src_next_row_stride = src_stride - output_width;
121 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
122 for (i = 0; i < intermediate_height; ++i) {
123 for (j = 0; j < output_width; ++j) {
124 // Apply filter...
125 const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
126 (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
127 (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
128 (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
129 (AV1_FILTER_WEIGHT >> 1); // Rounding
130
131 // Normalize back to 0-255...
132 *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT);
133 ++src_ptr;
134 output_ptr += intermediate_height;
135 }
136 src_ptr += src_next_row_stride;
137 output_ptr += intermediate_next_stride;
138 }
139
140 // Vertical pass (transposed intermediate -> dst).
141 src_ptr = intermediate_buffer;
142 const int dst_next_row_stride = dst_stride - output_width;
143 for (i = 0; i < output_height; ++i) {
144 for (j = 0; j < output_width; ++j) {
145 // Apply filter...
146 const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
147 (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
148 (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
149 (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
150 (AV1_FILTER_WEIGHT >> 1); // Rounding
151
152 // Normalize back to 0-255...
153 *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT);
154 src_ptr += intermediate_height;
155 }
156 src_ptr += intermediate_next_stride;
157 dst_ptr += dst_next_row_stride;
158 }
159 }
160
block2d_average_c(uint8_t * src,unsigned int src_stride,uint8_t * output_ptr,unsigned int output_stride,unsigned int output_width,unsigned int output_height)161 void block2d_average_c(uint8_t *src, unsigned int src_stride,
162 uint8_t *output_ptr, unsigned int output_stride,
163 unsigned int output_width, unsigned int output_height) {
164 unsigned int i, j;
165 for (i = 0; i < output_height; ++i) {
166 for (j = 0; j < output_width; ++j) {
167 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
168 }
169 output_ptr += output_stride;
170 }
171 }
172
filter_average_block2d_8_c(const uint8_t * src_ptr,const unsigned int src_stride,const int16_t * HFilter,const int16_t * VFilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height)173 void filter_average_block2d_8_c(const uint8_t *src_ptr,
174 const unsigned int src_stride,
175 const int16_t *HFilter, const int16_t *VFilter,
176 uint8_t *dst_ptr, unsigned int dst_stride,
177 unsigned int output_width,
178 unsigned int output_height) {
179 uint8_t tmp[kMaxDimension * kMaxDimension];
180
181 assert(output_width <= kMaxDimension);
182 assert(output_height <= kMaxDimension);
183 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension,
184 output_width, output_height);
185 block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width,
186 output_height);
187 }
188
189 #if CONFIG_HIGHBITDEPTH
highbd_filter_block2d_8_c(const uint16_t * src_ptr,const unsigned int src_stride,const int16_t * HFilter,const int16_t * VFilter,uint16_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height,int bd)190 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
191 const unsigned int src_stride,
192 const int16_t *HFilter, const int16_t *VFilter,
193 uint16_t *dst_ptr, unsigned int dst_stride,
194 unsigned int output_width,
195 unsigned int output_height, int bd) {
196 // Between passes, we use an intermediate buffer whose height is extended to
197 // have enough horizontally filtered values as input for the vertical pass.
198 // This buffer is allocated to be big enough for the largest block type we
199 // support.
200 const int kInterp_Extend = 4;
201 const unsigned int intermediate_height =
202 (kInterp_Extend - 1) + output_height + kInterp_Extend;
203
204 /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
205 * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
206 * + kInterp_Extend
207 * = 3 + 16 + 4
208 * = 23
209 * and filter_max_width = 16
210 */
211 uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension] = { 0 };
212 const int intermediate_next_stride =
213 1 - static_cast<int>(intermediate_height * output_width);
214
215 // Horizontal pass (src -> transposed intermediate).
216 {
217 uint16_t *output_ptr = intermediate_buffer;
218 const int src_next_row_stride = src_stride - output_width;
219 unsigned int i, j;
220 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
221 for (i = 0; i < intermediate_height; ++i) {
222 for (j = 0; j < output_width; ++j) {
223 // Apply filter...
224 const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
225 (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
226 (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
227 (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
228 (AV1_FILTER_WEIGHT >> 1); // Rounding
229
230 // Normalize back to 0-255...
231 *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
232 ++src_ptr;
233 output_ptr += intermediate_height;
234 }
235 src_ptr += src_next_row_stride;
236 output_ptr += intermediate_next_stride;
237 }
238 }
239
240 // Vertical pass (transposed intermediate -> dst).
241 {
242 const uint16_t *interm_ptr = intermediate_buffer;
243 const int dst_next_row_stride = dst_stride - output_width;
244 unsigned int i, j;
245 for (i = 0; i < output_height; ++i) {
246 for (j = 0; j < output_width; ++j) {
247 // Apply filter...
248 const int temp =
249 (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) +
250 (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) +
251 (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) +
252 (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) +
253 (AV1_FILTER_WEIGHT >> 1); // Rounding
254
255 // Normalize back to 0-255...
256 *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
257 interm_ptr += intermediate_height;
258 }
259 interm_ptr += intermediate_next_stride;
260 dst_ptr += dst_next_row_stride;
261 }
262 }
263 }
264
highbd_block2d_average_c(uint16_t * src,unsigned int src_stride,uint16_t * output_ptr,unsigned int output_stride,unsigned int output_width,unsigned int output_height)265 void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
266 uint16_t *output_ptr, unsigned int output_stride,
267 unsigned int output_width,
268 unsigned int output_height) {
269 unsigned int i, j;
270 for (i = 0; i < output_height; ++i) {
271 for (j = 0; j < output_width; ++j) {
272 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
273 }
274 output_ptr += output_stride;
275 }
276 }
277
highbd_filter_average_block2d_8_c(const uint16_t * src_ptr,unsigned int src_stride,const int16_t * HFilter,const int16_t * VFilter,uint16_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height,int bd)278 void highbd_filter_average_block2d_8_c(
279 const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
280 const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride,
281 unsigned int output_width, unsigned int output_height, int bd) {
282 uint16_t tmp[kMaxDimension * kMaxDimension];
283
284 assert(output_width <= kMaxDimension);
285 assert(output_height <= kMaxDimension);
286 highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp,
287 kMaxDimension, output_width, output_height, bd);
288 highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
289 output_width, output_height);
290 }
291 #endif // CONFIG_HIGHBITDEPTH
292
293 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
294 public:
SetUpTestCase()295 static void SetUpTestCase() {
296 // Force input_ to be unaligned, output to be 16 byte aligned.
297 input_ = reinterpret_cast<uint8_t *>(
298 aom_memalign(kDataAlignment, kInputBufferSize + 1)) +
299 1;
300 output_ = reinterpret_cast<uint8_t *>(
301 aom_memalign(kDataAlignment, kOutputBufferSize));
302 output_ref_ = reinterpret_cast<uint8_t *>(
303 aom_memalign(kDataAlignment, kOutputBufferSize));
304 #if CONFIG_HIGHBITDEPTH
305 input16_ = reinterpret_cast<uint16_t *>(aom_memalign(
306 kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
307 1;
308 output16_ = reinterpret_cast<uint16_t *>(
309 aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
310 output16_ref_ = reinterpret_cast<uint16_t *>(
311 aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
312 #endif
313 }
314
TearDown()315 virtual void TearDown() { libaom_test::ClearSystemState(); }
316
TearDownTestCase()317 static void TearDownTestCase() {
318 aom_free(input_ - 1);
319 input_ = NULL;
320 aom_free(output_);
321 output_ = NULL;
322 aom_free(output_ref_);
323 output_ref_ = NULL;
324 #if CONFIG_HIGHBITDEPTH
325 aom_free(input16_ - 1);
326 input16_ = NULL;
327 aom_free(output16_);
328 output16_ = NULL;
329 aom_free(output16_ref_);
330 output16_ref_ = NULL;
331 #endif
332 }
333
334 protected:
335 static const int kDataAlignment = 16;
336 static const int kOuterBlockSize = 4 * kMaxDimension;
337 static const int kInputStride = kOuterBlockSize;
338 static const int kOutputStride = kOuterBlockSize;
339 static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
340 static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
341
Width() const342 int Width() const { return GET_PARAM(0); }
Height() const343 int Height() const { return GET_PARAM(1); }
BorderLeft() const344 int BorderLeft() const {
345 const int center = (kOuterBlockSize - Width()) / 2;
346 return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
347 }
BorderTop() const348 int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
349
IsIndexInBorder(int i)350 bool IsIndexInBorder(int i) {
351 return (i < BorderTop() * kOuterBlockSize ||
352 i >= (BorderTop() + Height()) * kOuterBlockSize ||
353 i % kOuterBlockSize < BorderLeft() ||
354 i % kOuterBlockSize >= (BorderLeft() + Width()));
355 }
356
SetUp()357 virtual void SetUp() {
358 UUT_ = GET_PARAM(2);
359 #if CONFIG_HIGHBITDEPTH
360 if (UUT_->use_highbd_ != 0)
361 mask_ = (1 << UUT_->use_highbd_) - 1;
362 else
363 mask_ = 255;
364 #endif
365 /* Set up guard blocks for an inner block centered in the outer block */
366 for (int i = 0; i < kOutputBufferSize; ++i) {
367 if (IsIndexInBorder(i)) {
368 output_[i] = 255;
369 #if CONFIG_HIGHBITDEPTH
370 output16_[i] = mask_;
371 #endif
372 } else {
373 output_[i] = 0;
374 #if CONFIG_HIGHBITDEPTH
375 output16_[i] = 0;
376 #endif
377 }
378 }
379
380 ::libaom_test::ACMRandom prng;
381 for (int i = 0; i < kInputBufferSize; ++i) {
382 if (i & 1) {
383 input_[i] = 255;
384 #if CONFIG_HIGHBITDEPTH
385 input16_[i] = mask_;
386 #endif
387 } else {
388 input_[i] = prng.Rand8Extremes();
389 #if CONFIG_HIGHBITDEPTH
390 input16_[i] = prng.Rand16() & mask_;
391 #endif
392 }
393 }
394 }
395
SetConstantInput(int value)396 void SetConstantInput(int value) {
397 memset(input_, value, kInputBufferSize);
398 #if CONFIG_HIGHBITDEPTH
399 aom_memset16(input16_, value, kInputBufferSize);
400 #endif
401 }
402
CopyOutputToRef()403 void CopyOutputToRef() {
404 memcpy(output_ref_, output_, kOutputBufferSize);
405 #if CONFIG_HIGHBITDEPTH
406 // Copy 16-bit pixels values. The effective number of bytes is double.
407 memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize);
408 #endif
409 }
410
CheckGuardBlocks()411 void CheckGuardBlocks() {
412 for (int i = 0; i < kOutputBufferSize; ++i) {
413 if (IsIndexInBorder(i)) {
414 EXPECT_EQ(255, output_[i]);
415 }
416 }
417 }
418
input() const419 uint8_t *input() const {
420 const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
421 #if CONFIG_HIGHBITDEPTH
422 if (UUT_->use_highbd_ == 0) {
423 return input_ + offset;
424 } else {
425 return CONVERT_TO_BYTEPTR(input16_) + offset;
426 }
427 #else
428 return input_ + offset;
429 #endif
430 }
431
output() const432 uint8_t *output() const {
433 const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
434 #if CONFIG_HIGHBITDEPTH
435 if (UUT_->use_highbd_ == 0) {
436 return output_ + offset;
437 } else {
438 return CONVERT_TO_BYTEPTR(output16_) + offset;
439 }
440 #else
441 return output_ + offset;
442 #endif
443 }
444
output_ref() const445 uint8_t *output_ref() const {
446 const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
447 #if CONFIG_HIGHBITDEPTH
448 if (UUT_->use_highbd_ == 0) {
449 return output_ref_ + offset;
450 } else {
451 return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
452 }
453 #else
454 return output_ref_ + offset;
455 #endif
456 }
457
lookup(uint8_t * list,int index) const458 uint16_t lookup(uint8_t *list, int index) const {
459 #if CONFIG_HIGHBITDEPTH
460 if (UUT_->use_highbd_ == 0) {
461 return list[index];
462 } else {
463 return CONVERT_TO_SHORTPTR(list)[index];
464 }
465 #else
466 return list[index];
467 #endif
468 }
469
assign_val(uint8_t * list,int index,uint16_t val) const470 void assign_val(uint8_t *list, int index, uint16_t val) const {
471 #if CONFIG_HIGHBITDEPTH
472 if (UUT_->use_highbd_ == 0) {
473 list[index] = (uint8_t)val;
474 } else {
475 CONVERT_TO_SHORTPTR(list)[index] = val;
476 }
477 #else
478 list[index] = (uint8_t)val;
479 #endif
480 }
481
wrapper_filter_average_block2d_8_c(const uint8_t * src_ptr,unsigned int src_stride,const int16_t * HFilter,const int16_t * VFilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height)482 void wrapper_filter_average_block2d_8_c(
483 const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
484 const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
485 unsigned int output_width, unsigned int output_height) {
486 #if CONFIG_HIGHBITDEPTH
487 if (UUT_->use_highbd_ == 0) {
488 filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
489 dst_stride, output_width, output_height);
490 } else {
491 highbd_filter_average_block2d_8_c(
492 CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter,
493 CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
494 UUT_->use_highbd_);
495 }
496 #else
497 filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
498 dst_stride, output_width, output_height);
499 #endif
500 }
501
wrapper_filter_block2d_8_c(const uint8_t * src_ptr,unsigned int src_stride,const int16_t * HFilter,const int16_t * VFilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height)502 void wrapper_filter_block2d_8_c(
503 const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
504 const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
505 unsigned int output_width, unsigned int output_height) {
506 #if CONFIG_HIGHBITDEPTH
507 if (UUT_->use_highbd_ == 0) {
508 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
509 dst_stride, output_width, output_height);
510 } else {
511 highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
512 HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr),
513 dst_stride, output_width, output_height,
514 UUT_->use_highbd_);
515 }
516 #else
517 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
518 dst_stride, output_width, output_height);
519 #endif
520 }
521
522 const ConvolveFunctions *UUT_;
523 static uint8_t *input_;
524 static uint8_t *output_;
525 static uint8_t *output_ref_;
526 #if CONFIG_HIGHBITDEPTH
527 static uint16_t *input16_;
528 static uint16_t *output16_;
529 static uint16_t *output16_ref_;
530 int mask_;
531 #endif
532 };
533
534 uint8_t *ConvolveTest::input_ = NULL;
535 uint8_t *ConvolveTest::output_ = NULL;
536 uint8_t *ConvolveTest::output_ref_ = NULL;
537 #if CONFIG_HIGHBITDEPTH
538 uint16_t *ConvolveTest::input16_ = NULL;
539 uint16_t *ConvolveTest::output16_ = NULL;
540 uint16_t *ConvolveTest::output16_ref_ = NULL;
541 #endif
542
TEST_P(ConvolveTest,GuardBlocks)543 TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
544
TEST_P(ConvolveTest,Copy)545 TEST_P(ConvolveTest, Copy) {
546 uint8_t *const in = input();
547 uint8_t *const out = output();
548
549 ASM_REGISTER_STATE_CHECK(UUT_->copy_(in, kInputStride, out, kOutputStride,
550 NULL, 0, NULL, 0, Width(), Height()));
551
552 CheckGuardBlocks();
553
554 for (int y = 0; y < Height(); ++y)
555 for (int x = 0; x < Width(); ++x)
556 ASSERT_EQ(lookup(out, y * kOutputStride + x),
557 lookup(in, y * kInputStride + x))
558 << "(" << x << "," << y << ")";
559 }
560
TEST_P(ConvolveTest,Avg)561 TEST_P(ConvolveTest, Avg) {
562 uint8_t *const in = input();
563 uint8_t *const out = output();
564 uint8_t *const out_ref = output_ref();
565 CopyOutputToRef();
566
567 ASM_REGISTER_STATE_CHECK(UUT_->avg_(in, kInputStride, out, kOutputStride,
568 NULL, 0, NULL, 0, Width(), Height()));
569
570 CheckGuardBlocks();
571
572 for (int y = 0; y < Height(); ++y)
573 for (int x = 0; x < Width(); ++x)
574 ASSERT_EQ(lookup(out, y * kOutputStride + x),
575 ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
576 lookup(out_ref, y * kOutputStride + x),
577 1))
578 << "(" << x << "," << y << ")";
579 }
580
TEST_P(ConvolveTest,CopyHoriz)581 TEST_P(ConvolveTest, CopyHoriz) {
582 uint8_t *const in = input();
583 uint8_t *const out = output();
584 DECLARE_ALIGNED(256, const int16_t,
585 filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
586
587 ASM_REGISTER_STATE_CHECK(UUT_->sh8_(in, kInputStride, out, kOutputStride,
588 filter8, 16, filter8, 16, Width(),
589 Height()));
590
591 CheckGuardBlocks();
592
593 for (int y = 0; y < Height(); ++y)
594 for (int x = 0; x < Width(); ++x)
595 ASSERT_EQ(lookup(out, y * kOutputStride + x),
596 lookup(in, y * kInputStride + x))
597 << "(" << x << "," << y << ")";
598 }
599
TEST_P(ConvolveTest,CopyVert)600 TEST_P(ConvolveTest, CopyVert) {
601 uint8_t *const in = input();
602 uint8_t *const out = output();
603 DECLARE_ALIGNED(256, const int16_t,
604 filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
605
606 ASM_REGISTER_STATE_CHECK(UUT_->sv8_(in, kInputStride, out, kOutputStride,
607 filter8, 16, filter8, 16, Width(),
608 Height()));
609
610 CheckGuardBlocks();
611
612 for (int y = 0; y < Height(); ++y)
613 for (int x = 0; x < Width(); ++x)
614 ASSERT_EQ(lookup(out, y * kOutputStride + x),
615 lookup(in, y * kInputStride + x))
616 << "(" << x << "," << y << ")";
617 }
618
TEST_P(ConvolveTest,Copy2D)619 TEST_P(ConvolveTest, Copy2D) {
620 uint8_t *const in = input();
621 uint8_t *const out = output();
622 DECLARE_ALIGNED(256, const int16_t,
623 filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
624
625 ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
626 filter8, 16, filter8, 16, Width(),
627 Height()));
628
629 CheckGuardBlocks();
630
631 for (int y = 0; y < Height(); ++y)
632 for (int x = 0; x < Width(); ++x)
633 ASSERT_EQ(lookup(out, y * kOutputStride + x),
634 lookup(in, y * kInputStride + x))
635 << "(" << x << "," << y << ")";
636 }
637
638 const int kNumFilterBanks = SWITCHABLE_FILTERS;
639 const int kNumFilters = 16;
640
TEST(ConvolveTest,FiltersWontSaturateWhenAddedPairwise)641 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
642 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
643 const InterpFilter filter = (InterpFilter)filter_bank;
644 const InterpKernel *filters =
645 (const InterpKernel *)av1_get_interp_filter_kernel(filter);
646 #if CONFIG_DUAL_FILTER
647 const InterpFilterParams filter_params =
648 av1_get_interp_filter_params(filter);
649 if (filter_params.taps != SUBPEL_TAPS) continue;
650 #endif
651 for (int i = 0; i < kNumFilters; i++) {
652 const int p0 = filters[i][0] + filters[i][1];
653 const int p1 = filters[i][2] + filters[i][3];
654 const int p2 = filters[i][4] + filters[i][5];
655 const int p3 = filters[i][6] + filters[i][7];
656 EXPECT_LE(p0, 128);
657 EXPECT_LE(p1, 128);
658 EXPECT_LE(p2, 128);
659 EXPECT_LE(p3, 128);
660 EXPECT_LE(p0 + p3, 128);
661 EXPECT_LE(p0 + p3 + p1, 128);
662 EXPECT_LE(p0 + p3 + p1 + p2, 128);
663 EXPECT_EQ(p0 + p1 + p2 + p3, 128);
664 }
665 }
666 }
667
668 const int16_t kInvalidFilter[8] = { 0 };
669
TEST_P(ConvolveTest,MatchesReferenceSubpixelFilter)670 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
671 uint8_t *const in = input();
672 uint8_t *const out = output();
673 #if CONFIG_HIGHBITDEPTH
674 uint8_t ref8[kOutputStride * kMaxDimension];
675 uint16_t ref16[kOutputStride * kMaxDimension];
676 uint8_t *ref;
677 if (UUT_->use_highbd_ == 0) {
678 ref = ref8;
679 } else {
680 ref = CONVERT_TO_BYTEPTR(ref16);
681 }
682 #else
683 uint8_t ref[kOutputStride * kMaxDimension];
684 #endif
685
686 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
687 const InterpFilter filter = (InterpFilter)filter_bank;
688 const InterpKernel *filters =
689 (const InterpKernel *)av1_get_interp_filter_kernel(filter);
690 #if CONFIG_DUAL_FILTER
691 const InterpFilterParams filter_params =
692 av1_get_interp_filter_params(filter);
693 if (filter_params.taps != SUBPEL_TAPS) continue;
694 #endif
695
696 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
697 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
698 wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
699 filters[filter_y], ref, kOutputStride,
700 Width(), Height());
701
702 if (filter_x && filter_y)
703 ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
704 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
705 filters[filter_y], 16, Width(), Height()));
706 else if (filter_y)
707 ASM_REGISTER_STATE_CHECK(
708 UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
709 16, filters[filter_y], 16, Width(), Height()));
710 else if (filter_x)
711 ASM_REGISTER_STATE_CHECK(
712 UUT_->h8_(in, kInputStride, out, kOutputStride, filters[filter_x],
713 16, kInvalidFilter, 16, Width(), Height()));
714 else
715 ASM_REGISTER_STATE_CHECK(
716 UUT_->copy_(in, kInputStride, out, kOutputStride, kInvalidFilter,
717 0, kInvalidFilter, 0, Width(), Height()));
718
719 CheckGuardBlocks();
720
721 for (int y = 0; y < Height(); ++y)
722 for (int x = 0; x < Width(); ++x)
723 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
724 lookup(out, y * kOutputStride + x))
725 << "mismatch at (" << x << "," << y << "), "
726 << "filters (" << filter_bank << "," << filter_x << ","
727 << filter_y << ")";
728 }
729 }
730 }
731 }
732
TEST_P(ConvolveTest,MatchesReferenceAveragingSubpixelFilter)733 TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
734 uint8_t *const in = input();
735 uint8_t *const out = output();
736 #if CONFIG_HIGHBITDEPTH
737 uint8_t ref8[kOutputStride * kMaxDimension];
738 uint16_t ref16[kOutputStride * kMaxDimension];
739 uint8_t *ref;
740 if (UUT_->use_highbd_ == 0) {
741 ref = ref8;
742 } else {
743 ref = CONVERT_TO_BYTEPTR(ref16);
744 }
745 #else
746 uint8_t ref[kOutputStride * kMaxDimension];
747 #endif
748
749 // Populate ref and out with some random data
750 ::libaom_test::ACMRandom prng;
751 for (int y = 0; y < Height(); ++y) {
752 for (int x = 0; x < Width(); ++x) {
753 uint16_t r;
754 #if CONFIG_HIGHBITDEPTH
755 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
756 r = prng.Rand8Extremes();
757 } else {
758 r = prng.Rand16() & mask_;
759 }
760 #else
761 r = prng.Rand8Extremes();
762 #endif
763
764 assign_val(out, y * kOutputStride + x, r);
765 assign_val(ref, y * kOutputStride + x, r);
766 }
767 }
768
769 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
770 const InterpFilter filter = (InterpFilter)filter_bank;
771 const InterpKernel *filters =
772 (const InterpKernel *)av1_get_interp_filter_kernel(filter);
773 #if CONFIG_DUAL_FILTER
774 const InterpFilterParams filter_params =
775 av1_get_interp_filter_params(filter);
776 if (filter_params.taps != SUBPEL_TAPS) continue;
777 #endif
778
779 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
780 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
781 wrapper_filter_average_block2d_8_c(in, kInputStride, filters[filter_x],
782 filters[filter_y], ref,
783 kOutputStride, Width(), Height());
784
785 if (filter_x && filter_y)
786 ASM_REGISTER_STATE_CHECK(UUT_->hv8_avg_(
787 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
788 filters[filter_y], 16, Width(), Height()));
789 else if (filter_y)
790 ASM_REGISTER_STATE_CHECK(UUT_->v8_avg_(
791 in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
792 filters[filter_y], 16, Width(), Height()));
793 else if (filter_x)
794 ASM_REGISTER_STATE_CHECK(UUT_->h8_avg_(
795 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
796 kInvalidFilter, 16, Width(), Height()));
797 else
798 ASM_REGISTER_STATE_CHECK(
799 UUT_->avg_(in, kInputStride, out, kOutputStride, kInvalidFilter,
800 0, kInvalidFilter, 0, Width(), Height()));
801
802 CheckGuardBlocks();
803
804 for (int y = 0; y < Height(); ++y)
805 for (int x = 0; x < Width(); ++x)
806 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
807 lookup(out, y * kOutputStride + x))
808 << "mismatch at (" << x << "," << y << "), "
809 << "filters (" << filter_bank << "," << filter_x << ","
810 << filter_y << ")";
811 }
812 }
813 }
814 }
815
TEST_P(ConvolveTest,FilterExtremes)816 TEST_P(ConvolveTest, FilterExtremes) {
817 uint8_t *const in = input();
818 uint8_t *const out = output();
819 #if CONFIG_HIGHBITDEPTH
820 uint8_t ref8[kOutputStride * kMaxDimension];
821 uint16_t ref16[kOutputStride * kMaxDimension];
822 uint8_t *ref;
823 if (UUT_->use_highbd_ == 0) {
824 ref = ref8;
825 } else {
826 ref = CONVERT_TO_BYTEPTR(ref16);
827 }
828 #else
829 uint8_t ref[kOutputStride * kMaxDimension];
830 #endif
831
832 // Populate ref and out with some random data
833 ::libaom_test::ACMRandom prng;
834 for (int y = 0; y < Height(); ++y) {
835 for (int x = 0; x < Width(); ++x) {
836 uint16_t r;
837 #if CONFIG_HIGHBITDEPTH
838 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
839 r = prng.Rand8Extremes();
840 } else {
841 r = prng.Rand16() & mask_;
842 }
843 #else
844 r = prng.Rand8Extremes();
845 #endif
846 assign_val(out, y * kOutputStride + x, r);
847 assign_val(ref, y * kOutputStride + x, r);
848 }
849 }
850
851 for (int axis = 0; axis < 2; axis++) {
852 int seed_val = 0;
853 while (seed_val < 256) {
854 for (int y = 0; y < 8; ++y) {
855 for (int x = 0; x < 8; ++x) {
856 #if CONFIG_HIGHBITDEPTH
857 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
858 ((seed_val >> (axis ? y : x)) & 1) * mask_);
859 #else
860 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
861 ((seed_val >> (axis ? y : x)) & 1) * 255);
862 #endif
863 if (axis) seed_val++;
864 }
865 if (axis)
866 seed_val -= 8;
867 else
868 seed_val++;
869 }
870 if (axis) seed_val += 8;
871
872 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
873 const InterpFilter filter = (InterpFilter)filter_bank;
874 const InterpKernel *filters =
875 (const InterpKernel *)av1_get_interp_filter_kernel(filter);
876 #if CONFIG_DUAL_FILTER
877 const InterpFilterParams filter_params =
878 av1_get_interp_filter_params(filter);
879 if (filter_params.taps != SUBPEL_TAPS) continue;
880 #endif
881 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
882 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
883 wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
884 filters[filter_y], ref, kOutputStride,
885 Width(), Height());
886 if (filter_x && filter_y)
887 ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
888 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
889 filters[filter_y], 16, Width(), Height()));
890 else if (filter_y)
891 ASM_REGISTER_STATE_CHECK(UUT_->v8_(
892 in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
893 filters[filter_y], 16, Width(), Height()));
894 else if (filter_x)
895 ASM_REGISTER_STATE_CHECK(UUT_->h8_(
896 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
897 kInvalidFilter, 16, Width(), Height()));
898 else
899 ASM_REGISTER_STATE_CHECK(UUT_->copy_(
900 in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
901 kInvalidFilter, 0, Width(), Height()));
902
903 for (int y = 0; y < Height(); ++y)
904 for (int x = 0; x < Width(); ++x)
905 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
906 lookup(out, y * kOutputStride + x))
907 << "mismatch at (" << x << "," << y << "), "
908 << "filters (" << filter_bank << "," << filter_x << ","
909 << filter_y << ")";
910 }
911 }
912 }
913 }
914 }
915 }
916
917 /* This test exercises that enough rows and columns are filtered with every
918 possible initial fractional positions and scaling steps. */
TEST_P(ConvolveTest,CheckScalingFiltering)919 TEST_P(ConvolveTest, CheckScalingFiltering) {
920 uint8_t *const in = input();
921 uint8_t *const out = output();
922 const InterpKernel *const eighttap =
923 (const InterpKernel *)av1_get_interp_filter_kernel(EIGHTTAP_REGULAR);
924
925 SetConstantInput(127);
926
927 for (int frac = 0; frac < 16; ++frac) {
928 for (int step = 1; step <= 32; ++step) {
929 /* Test the horizontal and vertical filters in combination. */
930 ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
931 eighttap[frac], step, eighttap[frac],
932 step, Width(), Height()));
933
934 CheckGuardBlocks();
935
936 for (int y = 0; y < Height(); ++y) {
937 for (int x = 0; x < Width(); ++x) {
938 ASSERT_EQ(lookup(in, y * kInputStride + x),
939 lookup(out, y * kOutputStride + x))
940 << "x == " << x << ", y == " << y << ", frac == " << frac
941 << ", step == " << step;
942 }
943 }
944 }
945 }
946 }
947
TEST_P(ConvolveTest,DISABLED_Copy_Speed)948 TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
949 const uint8_t *const in = input();
950 uint8_t *const out = output();
951 const int kNumTests = 5000000;
952 const int width = Width();
953 const int height = Height();
954 aom_usec_timer timer;
955
956 aom_usec_timer_start(&timer);
957 for (int n = 0; n < kNumTests; ++n) {
958 UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width,
959 height);
960 }
961 aom_usec_timer_mark(&timer);
962
963 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
964 printf("convolve_copy_%dx%d_%d: %d us\n", width, height,
965 UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
966 }
967
TEST_P(ConvolveTest,DISABLED_Avg_Speed)968 TEST_P(ConvolveTest, DISABLED_Avg_Speed) {
969 const uint8_t *const in = input();
970 uint8_t *const out = output();
971 const int kNumTests = 5000000;
972 const int width = Width();
973 const int height = Height();
974 aom_usec_timer timer;
975
976 aom_usec_timer_start(&timer);
977 for (int n = 0; n < kNumTests; ++n) {
978 UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width,
979 height);
980 }
981 aom_usec_timer_mark(&timer);
982
983 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
984 printf("convolve_avg_%dx%d_%d: %d us\n", width, height,
985 UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
986 }
987
TEST_P(ConvolveTest,DISABLED_Speed)988 TEST_P(ConvolveTest, DISABLED_Speed) {
989 uint8_t *const in = input();
990 uint8_t *const out = output();
991 #if CONFIG_HIGHBITDEPTH
992 uint8_t ref8[kOutputStride * kMaxDimension];
993 uint16_t ref16[kOutputStride * kMaxDimension];
994 uint8_t *ref;
995 if (UUT_->use_highbd_ == 0) {
996 ref = ref8;
997 } else {
998 ref = CONVERT_TO_BYTEPTR(ref16);
999 }
1000 #else
1001 uint8_t ref[kOutputStride * kMaxDimension];
1002 #endif
1003
1004 // Populate ref and out with some random data
1005 ::libaom_test::ACMRandom prng;
1006 for (int y = 0; y < Height(); ++y) {
1007 for (int x = 0; x < Width(); ++x) {
1008 uint16_t r;
1009 #if CONFIG_HIGHBITDEPTH
1010 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
1011 r = prng.Rand8Extremes();
1012 } else {
1013 r = prng.Rand16() & mask_;
1014 }
1015 #else
1016 r = prng.Rand8Extremes();
1017 #endif
1018
1019 assign_val(out, y * kOutputStride + x, r);
1020 assign_val(ref, y * kOutputStride + x, r);
1021 }
1022 }
1023
1024 const InterpFilter filter = (InterpFilter)1;
1025 const InterpKernel *filters =
1026 (const InterpKernel *)av1_get_interp_filter_kernel(filter);
1027 wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1],
1028 out, kOutputStride, Width(), Height());
1029
1030 aom_usec_timer timer;
1031 int tests_num = 1000;
1032
1033 aom_usec_timer_start(&timer);
1034 while (tests_num > 0) {
1035 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
1036 const InterpFilter filter = (InterpFilter)filter_bank;
1037 const InterpKernel *filters =
1038 (const InterpKernel *)av1_get_interp_filter_kernel(filter);
1039 #if CONFIG_DUAL_FILTER
1040 const InterpFilterParams filter_params =
1041 av1_get_interp_filter_params(filter);
1042 if (filter_params.taps != SUBPEL_TAPS) continue;
1043 #endif
1044
1045 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
1046 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
1047 if (filter_x && filter_y)
1048 ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
1049 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
1050 filters[filter_y], 16, Width(), Height()));
1051 if (filter_y)
1052 ASM_REGISTER_STATE_CHECK(
1053 UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
1054 16, filters[filter_y], 16, Width(), Height()));
1055 else if (filter_x)
1056 ASM_REGISTER_STATE_CHECK(UUT_->h8_(
1057 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
1058 kInvalidFilter, 16, Width(), Height()));
1059 }
1060 }
1061 }
1062 tests_num--;
1063 }
1064 aom_usec_timer_mark(&timer);
1065
1066 const int elapsed_time =
1067 static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
1068 printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(),
1069 UUT_->use_highbd_, elapsed_time);
1070 }
1071
1072 using std::tr1::make_tuple;
1073
1074 #if CONFIG_HIGHBITDEPTH
1075 #define WRAP(func, bd) \
1076 void wrap_##func##_##bd( \
1077 const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
1078 ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, \
1079 const int16_t *filter_y, int filter_y_stride, int w, int h) { \
1080 aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x, \
1081 filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
1082 }
1083 #if HAVE_SSE2 && ARCH_X86_64
1084 WRAP(convolve_copy_sse2, 8)
1085 WRAP(convolve_avg_sse2, 8)
1086 WRAP(convolve_copy_sse2, 10)
1087 WRAP(convolve_avg_sse2, 10)
1088 WRAP(convolve_copy_sse2, 12)
1089 WRAP(convolve_avg_sse2, 12)
1090 WRAP(convolve8_horiz_sse2, 8)
1091 WRAP(convolve8_avg_horiz_sse2, 8)
1092 WRAP(convolve8_vert_sse2, 8)
1093 WRAP(convolve8_avg_vert_sse2, 8)
1094 WRAP(convolve8_sse2, 8)
1095 WRAP(convolve8_avg_sse2, 8)
1096 WRAP(convolve8_horiz_sse2, 10)
1097 WRAP(convolve8_avg_horiz_sse2, 10)
1098 WRAP(convolve8_vert_sse2, 10)
1099 WRAP(convolve8_avg_vert_sse2, 10)
1100 WRAP(convolve8_sse2, 10)
1101 WRAP(convolve8_avg_sse2, 10)
1102 WRAP(convolve8_horiz_sse2, 12)
1103 WRAP(convolve8_avg_horiz_sse2, 12)
1104 WRAP(convolve8_vert_sse2, 12)
1105 WRAP(convolve8_avg_vert_sse2, 12)
1106 WRAP(convolve8_sse2, 12)
1107 WRAP(convolve8_avg_sse2, 12)
1108 #endif // HAVE_SSE2 && ARCH_X86_64
1109
1110 WRAP(convolve_copy_c, 8)
1111 WRAP(convolve_avg_c, 8)
1112 WRAP(convolve8_horiz_c, 8)
1113 WRAP(convolve8_avg_horiz_c, 8)
1114 WRAP(convolve8_vert_c, 8)
1115 WRAP(convolve8_avg_vert_c, 8)
1116 WRAP(convolve8_c, 8)
1117 WRAP(convolve8_avg_c, 8)
1118 WRAP(convolve_copy_c, 10)
1119 WRAP(convolve_avg_c, 10)
1120 WRAP(convolve8_horiz_c, 10)
1121 WRAP(convolve8_avg_horiz_c, 10)
1122 WRAP(convolve8_vert_c, 10)
1123 WRAP(convolve8_avg_vert_c, 10)
1124 WRAP(convolve8_c, 10)
1125 WRAP(convolve8_avg_c, 10)
1126 WRAP(convolve_copy_c, 12)
1127 WRAP(convolve_avg_c, 12)
1128 WRAP(convolve8_horiz_c, 12)
1129 WRAP(convolve8_avg_horiz_c, 12)
1130 WRAP(convolve8_vert_c, 12)
1131 WRAP(convolve8_avg_vert_c, 12)
1132 WRAP(convolve8_c, 12)
1133 WRAP(convolve8_avg_c, 12)
1134
1135 #if HAVE_AVX2
1136 WRAP(convolve_copy_avx2, 8)
1137 WRAP(convolve_avg_avx2, 8)
1138 WRAP(convolve8_horiz_avx2, 8)
1139 WRAP(convolve8_avg_horiz_avx2, 8)
1140 WRAP(convolve8_vert_avx2, 8)
1141 WRAP(convolve8_avg_vert_avx2, 8)
1142 WRAP(convolve8_avx2, 8)
1143 WRAP(convolve8_avg_avx2, 8)
1144
1145 WRAP(convolve_copy_avx2, 10)
1146 WRAP(convolve_avg_avx2, 10)
1147 WRAP(convolve8_avx2, 10)
1148 WRAP(convolve8_horiz_avx2, 10)
1149 WRAP(convolve8_vert_avx2, 10)
1150 WRAP(convolve8_avg_avx2, 10)
1151 WRAP(convolve8_avg_horiz_avx2, 10)
1152 WRAP(convolve8_avg_vert_avx2, 10)
1153
1154 WRAP(convolve_copy_avx2, 12)
1155 WRAP(convolve_avg_avx2, 12)
1156 WRAP(convolve8_avx2, 12)
1157 WRAP(convolve8_horiz_avx2, 12)
1158 WRAP(convolve8_vert_avx2, 12)
1159 WRAP(convolve8_avg_avx2, 12)
1160 WRAP(convolve8_avg_horiz_avx2, 12)
1161 WRAP(convolve8_avg_vert_avx2, 12)
1162 #endif // HAVE_AVX2
1163
1164 #undef WRAP
1165
1166 const ConvolveFunctions convolve8_c(
1167 wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, wrap_convolve8_horiz_c_8,
1168 wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
1169 wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
1170 wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1171 wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8,
1172 wrap_convolve8_avg_c_8, 8);
1173 const ConvolveFunctions convolve10_c(
1174 wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10,
1175 wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10,
1176 wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
1177 wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1178 wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
1179 wrap_convolve8_avg_c_10, 10);
1180 const ConvolveFunctions convolve12_c(
1181 wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12,
1182 wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12,
1183 wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
1184 wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1185 wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
1186 wrap_convolve8_avg_c_12, 12);
1187 const ConvolveParam kArrayConvolve_c[] = {
1188 ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c)
1189 };
1190
1191 #else
1192 const ConvolveFunctions convolve8_c(
1193 aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_c,
1194 aom_convolve8_avg_horiz_c, aom_convolve8_vert_c, aom_convolve8_avg_vert_c,
1195 aom_convolve8_c, aom_convolve8_avg_c, aom_scaled_horiz_c,
1196 aom_scaled_avg_horiz_c, aom_scaled_vert_c, aom_scaled_avg_vert_c,
1197 aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1198 const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
1199 #endif
1200 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c));
1201
1202 #if HAVE_SSE2 && ARCH_X86_64
1203 #if CONFIG_HIGHBITDEPTH
1204 const ConvolveFunctions convolve8_sse2(
1205 wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
1206 wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1207 wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1208 wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
1209 wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1210 wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1211 wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
1212 const ConvolveFunctions convolve10_sse2(
1213 wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10,
1214 wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1215 wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1216 wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
1217 wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1218 wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1219 wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
1220 const ConvolveFunctions convolve12_sse2(
1221 wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12,
1222 wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1223 wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1224 wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
1225 wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1226 wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1227 wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
1228 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2),
1229 ALL_SIZES(convolve10_sse2),
1230 ALL_SIZES(convolve12_sse2) };
1231 #else
1232 const ConvolveFunctions convolve8_sse2(
1233 aom_convolve_copy_sse2, aom_convolve_avg_sse2, aom_convolve8_horiz_sse2,
1234 aom_convolve8_avg_horiz_sse2, aom_convolve8_vert_sse2,
1235 aom_convolve8_avg_vert_sse2, aom_convolve8_sse2, aom_convolve8_avg_sse2,
1236 aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1237 aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1238
1239 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
1240 #endif // CONFIG_HIGHBITDEPTH
1241 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
1242 ::testing::ValuesIn(kArrayConvolve_sse2));
1243 #endif
1244
1245 #if HAVE_SSSE3
1246 const ConvolveFunctions convolve8_ssse3(
1247 aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_ssse3,
1248 aom_convolve8_avg_horiz_ssse3, aom_convolve8_vert_ssse3,
1249 aom_convolve8_avg_vert_ssse3, aom_convolve8_ssse3, aom_convolve8_avg_ssse3,
1250 aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1251 aom_scaled_avg_vert_c, aom_scaled_2d_ssse3, aom_scaled_avg_2d_c, 0);
1252
1253 const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
1254 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
1255 ::testing::ValuesIn(kArrayConvolve8_ssse3));
1256 #endif
1257
1258 #if HAVE_AVX2
1259 #if CONFIG_HIGHBITDEPTH
1260 const ConvolveFunctions convolve8_avx2(
1261 wrap_convolve_copy_avx2_8, wrap_convolve_avg_avx2_8,
1262 wrap_convolve8_horiz_avx2_8, wrap_convolve8_avg_horiz_avx2_8,
1263 wrap_convolve8_vert_avx2_8, wrap_convolve8_avg_vert_avx2_8,
1264 wrap_convolve8_avx2_8, wrap_convolve8_avg_avx2_8, wrap_convolve8_horiz_c_8,
1265 wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
1266 wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
1267 const ConvolveFunctions convolve10_avx2(
1268 wrap_convolve_copy_avx2_10, wrap_convolve_avg_avx2_10,
1269 wrap_convolve8_horiz_avx2_10, wrap_convolve8_avg_horiz_avx2_10,
1270 wrap_convolve8_vert_avx2_10, wrap_convolve8_avg_vert_avx2_10,
1271 wrap_convolve8_avx2_10, wrap_convolve8_avg_avx2_10,
1272 wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1273 wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
1274 wrap_convolve8_avg_c_10, 10);
1275 const ConvolveFunctions convolve12_avx2(
1276 wrap_convolve_copy_avx2_12, wrap_convolve_avg_avx2_12,
1277 wrap_convolve8_horiz_avx2_12, wrap_convolve8_avg_horiz_avx2_12,
1278 wrap_convolve8_vert_avx2_12, wrap_convolve8_avg_vert_avx2_12,
1279 wrap_convolve8_avx2_12, wrap_convolve8_avg_avx2_12,
1280 wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1281 wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
1282 wrap_convolve8_avg_c_12, 12);
1283 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES_64(convolve8_avx2),
1284 ALL_SIZES_64(convolve10_avx2),
1285 ALL_SIZES_64(convolve12_avx2) };
1286 #else
1287 const ConvolveFunctions convolve8_avx2(
1288 aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_avx2,
1289 aom_convolve8_avg_horiz_ssse3, aom_convolve8_vert_avx2,
1290 aom_convolve8_avg_vert_ssse3, aom_convolve8_avx2, aom_convolve8_avg_ssse3,
1291 aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1292 aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1293
1294 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES_64(convolve8_avx2) };
1295 #endif // CONFIG_HIGHBITDEPTH
1296 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
1297 ::testing::ValuesIn(kArrayConvolve8_avx2));
1298 #endif // HAVE_AVX2
1299
1300 // TODO(any): Make NEON versions support 128x128 128x64 64x128 block sizes
1301 #if HAVE_NEON && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
1302 #if HAVE_NEON_ASM
1303 const ConvolveFunctions convolve8_neon(
1304 aom_convolve_copy_neon, aom_convolve_avg_neon, aom_convolve8_horiz_neon,
1305 aom_convolve8_avg_horiz_neon, aom_convolve8_vert_neon,
1306 aom_convolve8_avg_vert_neon, aom_convolve8_neon, aom_convolve8_avg_neon,
1307 aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1308 aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1309 #else // HAVE_NEON
1310 const ConvolveFunctions convolve8_neon(
1311 aom_convolve_copy_neon, aom_convolve_avg_neon, aom_convolve8_horiz_neon,
1312 aom_convolve8_avg_horiz_neon, aom_convolve8_vert_neon,
1313 aom_convolve8_avg_vert_neon, aom_convolve8_neon, aom_convolve8_avg_neon,
1314 aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1315 aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1316 #endif // HAVE_NEON_ASM
1317
1318 const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES_64(convolve8_neon) };
1319 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
1320 ::testing::ValuesIn(kArrayConvolve8_neon));
1321 #endif // HAVE_NEON && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
1322
1323 // TODO(any): Make DSPR2 versions support 128x128 128x64 64x128 block sizes
1324 #if HAVE_DSPR2 && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
1325 const ConvolveFunctions convolve8_dspr2(
1326 aom_convolve_copy_dspr2, aom_convolve_avg_dspr2, aom_convolve8_horiz_dspr2,
1327 aom_convolve8_avg_horiz_dspr2, aom_convolve8_vert_dspr2,
1328 aom_convolve8_avg_vert_dspr2, aom_convolve8_dspr2, aom_convolve8_avg_dspr2,
1329 aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1330 aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1331
1332 const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES_64(convolve8_dspr2) };
1333 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest,
1334 ::testing::ValuesIn(kArrayConvolve8_dspr2));
1335 #endif // HAVE_DSPR2 && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
1336
1337 // TODO(any): Make MSA versions support 128x128 128x64 64x128 block sizes
1338 #if HAVE_MSA && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
1339 const ConvolveFunctions convolve8_msa(
1340 aom_convolve_copy_msa, aom_convolve_avg_msa, aom_convolve8_horiz_msa,
1341 aom_convolve8_avg_horiz_msa, aom_convolve8_vert_msa,
1342 aom_convolve8_avg_vert_msa, aom_convolve8_msa, aom_convolve8_avg_msa,
1343 aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1344 aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1345
1346 const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES_64(convolve8_msa) };
1347 INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest,
1348 ::testing::ValuesIn(kArrayConvolve8_msa));
1349 #endif // HAVE_MSA && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
1350 } // namespace
1351