1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11 
12 #include <string.h>
13 
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
15 
16 #include "./aom_config.h"
17 #include "./aom_dsp_rtcd.h"
18 #include "test/acm_random.h"
19 #include "test/clear_system_state.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/aom_filter.h"
24 #include "aom_mem/aom_mem.h"
25 #include "aom_ports/mem.h"
26 #include "aom_ports/aom_timer.h"
27 #include "av1/common/filter.h"
28 
29 namespace {
30 
31 static const unsigned int kMaxDimension = MAX_SB_SIZE;
32 
33 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
34                              uint8_t *dst, ptrdiff_t dst_stride,
35                              const int16_t *filter_x, int filter_x_stride,
36                              const int16_t *filter_y, int filter_y_stride,
37                              int w, int h);
38 
39 struct ConvolveFunctions {
ConvolveFunctions__anon4dddae770111::ConvolveFunctions40   ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8,
41                     ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg,
42                     ConvolveFunc hv8, ConvolveFunc hv8_avg, ConvolveFunc sh8,
43                     ConvolveFunc sh8_avg, ConvolveFunc sv8,
44                     ConvolveFunc sv8_avg, ConvolveFunc shv8,
45                     ConvolveFunc shv8_avg, int bd)
46       : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
47         v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8),
48         sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg),
49         use_highbd_(bd) {}
50 
51   ConvolveFunc copy_;
52   ConvolveFunc avg_;
53   ConvolveFunc h8_;
54   ConvolveFunc v8_;
55   ConvolveFunc hv8_;
56   ConvolveFunc h8_avg_;
57   ConvolveFunc v8_avg_;
58   ConvolveFunc hv8_avg_;
59   ConvolveFunc sh8_;       // scaled horiz
60   ConvolveFunc sv8_;       // scaled vert
61   ConvolveFunc shv8_;      // scaled horiz/vert
62   ConvolveFunc sh8_avg_;   // scaled avg horiz
63   ConvolveFunc sv8_avg_;   // scaled avg vert
64   ConvolveFunc shv8_avg_;  // scaled avg horiz/vert
65   int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
66 };
67 
68 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
69 
70 #define ALL_SIZES_64(convolve_fn)                                         \
71   make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn),         \
72       make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn),     \
73       make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn),   \
74       make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
75       make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
76       make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
77       make_tuple(64, 64, &convolve_fn)
78 
79 #if CONFIG_AV1 && CONFIG_EXT_PARTITION
80 #define ALL_SIZES(convolve_fn)                                          \
81   make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \
82       make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn)
83 #else
84 #define ALL_SIZES ALL_SIZES_64
85 #endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
86 
87 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
88 #define AV1_FILTER_WEIGHT 128
89 #define AV1_FILTER_SHIFT 7
clip_pixel(int x)90 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
91 
filter_block2d_8_c(const uint8_t * src_ptr,unsigned int src_stride,const int16_t * HFilter,const int16_t * VFilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height)92 void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride,
93                         const int16_t *HFilter, const int16_t *VFilter,
94                         uint8_t *dst_ptr, unsigned int dst_stride,
95                         unsigned int output_width, unsigned int output_height) {
96   // Between passes, we use an intermediate buffer whose height is extended to
97   // have enough horizontally filtered values as input for the vertical pass.
98   // This buffer is allocated to be big enough for the largest block type we
99   // support.
100   const int kInterp_Extend = 4;
101   const unsigned int intermediate_height =
102       (kInterp_Extend - 1) + output_height + kInterp_Extend;
103   unsigned int i, j;
104 
105   assert(intermediate_height > 7);
106 
107   // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
108   // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
109   //                                 + kInterp_Extend
110   //                               = 3 + 16 + 4
111   //                               = 23
112   // and filter_max_width          = 16
113   //
114   uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension];
115   const int intermediate_next_stride =
116       1 - static_cast<int>(intermediate_height * output_width);
117 
118   // Horizontal pass (src -> transposed intermediate).
119   uint8_t *output_ptr = intermediate_buffer;
120   const int src_next_row_stride = src_stride - output_width;
121   src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
122   for (i = 0; i < intermediate_height; ++i) {
123     for (j = 0; j < output_width; ++j) {
124       // Apply filter...
125       const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
126                        (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
127                        (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
128                        (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
129                        (AV1_FILTER_WEIGHT >> 1);  // Rounding
130 
131       // Normalize back to 0-255...
132       *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT);
133       ++src_ptr;
134       output_ptr += intermediate_height;
135     }
136     src_ptr += src_next_row_stride;
137     output_ptr += intermediate_next_stride;
138   }
139 
140   // Vertical pass (transposed intermediate -> dst).
141   src_ptr = intermediate_buffer;
142   const int dst_next_row_stride = dst_stride - output_width;
143   for (i = 0; i < output_height; ++i) {
144     for (j = 0; j < output_width; ++j) {
145       // Apply filter...
146       const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
147                        (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
148                        (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
149                        (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
150                        (AV1_FILTER_WEIGHT >> 1);  // Rounding
151 
152       // Normalize back to 0-255...
153       *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT);
154       src_ptr += intermediate_height;
155     }
156     src_ptr += intermediate_next_stride;
157     dst_ptr += dst_next_row_stride;
158   }
159 }
160 
block2d_average_c(uint8_t * src,unsigned int src_stride,uint8_t * output_ptr,unsigned int output_stride,unsigned int output_width,unsigned int output_height)161 void block2d_average_c(uint8_t *src, unsigned int src_stride,
162                        uint8_t *output_ptr, unsigned int output_stride,
163                        unsigned int output_width, unsigned int output_height) {
164   unsigned int i, j;
165   for (i = 0; i < output_height; ++i) {
166     for (j = 0; j < output_width; ++j) {
167       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
168     }
169     output_ptr += output_stride;
170   }
171 }
172 
filter_average_block2d_8_c(const uint8_t * src_ptr,const unsigned int src_stride,const int16_t * HFilter,const int16_t * VFilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height)173 void filter_average_block2d_8_c(const uint8_t *src_ptr,
174                                 const unsigned int src_stride,
175                                 const int16_t *HFilter, const int16_t *VFilter,
176                                 uint8_t *dst_ptr, unsigned int dst_stride,
177                                 unsigned int output_width,
178                                 unsigned int output_height) {
179   uint8_t tmp[kMaxDimension * kMaxDimension];
180 
181   assert(output_width <= kMaxDimension);
182   assert(output_height <= kMaxDimension);
183   filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension,
184                      output_width, output_height);
185   block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width,
186                     output_height);
187 }
188 
189 #if CONFIG_HIGHBITDEPTH
highbd_filter_block2d_8_c(const uint16_t * src_ptr,const unsigned int src_stride,const int16_t * HFilter,const int16_t * VFilter,uint16_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height,int bd)190 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
191                                const unsigned int src_stride,
192                                const int16_t *HFilter, const int16_t *VFilter,
193                                uint16_t *dst_ptr, unsigned int dst_stride,
194                                unsigned int output_width,
195                                unsigned int output_height, int bd) {
196   // Between passes, we use an intermediate buffer whose height is extended to
197   // have enough horizontally filtered values as input for the vertical pass.
198   // This buffer is allocated to be big enough for the largest block type we
199   // support.
200   const int kInterp_Extend = 4;
201   const unsigned int intermediate_height =
202       (kInterp_Extend - 1) + output_height + kInterp_Extend;
203 
204   /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
205    * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
206    *                                 + kInterp_Extend
207    *                               = 3 + 16 + 4
208    *                               = 23
209    * and filter_max_width = 16
210    */
211   uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension] = { 0 };
212   const int intermediate_next_stride =
213       1 - static_cast<int>(intermediate_height * output_width);
214 
215   // Horizontal pass (src -> transposed intermediate).
216   {
217     uint16_t *output_ptr = intermediate_buffer;
218     const int src_next_row_stride = src_stride - output_width;
219     unsigned int i, j;
220     src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
221     for (i = 0; i < intermediate_height; ++i) {
222       for (j = 0; j < output_width; ++j) {
223         // Apply filter...
224         const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
225                          (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
226                          (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
227                          (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
228                          (AV1_FILTER_WEIGHT >> 1);  // Rounding
229 
230         // Normalize back to 0-255...
231         *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
232         ++src_ptr;
233         output_ptr += intermediate_height;
234       }
235       src_ptr += src_next_row_stride;
236       output_ptr += intermediate_next_stride;
237     }
238   }
239 
240   // Vertical pass (transposed intermediate -> dst).
241   {
242     const uint16_t *interm_ptr = intermediate_buffer;
243     const int dst_next_row_stride = dst_stride - output_width;
244     unsigned int i, j;
245     for (i = 0; i < output_height; ++i) {
246       for (j = 0; j < output_width; ++j) {
247         // Apply filter...
248         const int temp =
249             (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) +
250             (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) +
251             (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) +
252             (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) +
253             (AV1_FILTER_WEIGHT >> 1);  // Rounding
254 
255         // Normalize back to 0-255...
256         *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
257         interm_ptr += intermediate_height;
258       }
259       interm_ptr += intermediate_next_stride;
260       dst_ptr += dst_next_row_stride;
261     }
262   }
263 }
264 
highbd_block2d_average_c(uint16_t * src,unsigned int src_stride,uint16_t * output_ptr,unsigned int output_stride,unsigned int output_width,unsigned int output_height)265 void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
266                               uint16_t *output_ptr, unsigned int output_stride,
267                               unsigned int output_width,
268                               unsigned int output_height) {
269   unsigned int i, j;
270   for (i = 0; i < output_height; ++i) {
271     for (j = 0; j < output_width; ++j) {
272       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
273     }
274     output_ptr += output_stride;
275   }
276 }
277 
highbd_filter_average_block2d_8_c(const uint16_t * src_ptr,unsigned int src_stride,const int16_t * HFilter,const int16_t * VFilter,uint16_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height,int bd)278 void highbd_filter_average_block2d_8_c(
279     const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
280     const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride,
281     unsigned int output_width, unsigned int output_height, int bd) {
282   uint16_t tmp[kMaxDimension * kMaxDimension];
283 
284   assert(output_width <= kMaxDimension);
285   assert(output_height <= kMaxDimension);
286   highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp,
287                             kMaxDimension, output_width, output_height, bd);
288   highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
289                            output_width, output_height);
290 }
291 #endif  // CONFIG_HIGHBITDEPTH
292 
293 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
294  public:
SetUpTestCase()295   static void SetUpTestCase() {
296     // Force input_ to be unaligned, output to be 16 byte aligned.
297     input_ = reinterpret_cast<uint8_t *>(
298                  aom_memalign(kDataAlignment, kInputBufferSize + 1)) +
299              1;
300     output_ = reinterpret_cast<uint8_t *>(
301         aom_memalign(kDataAlignment, kOutputBufferSize));
302     output_ref_ = reinterpret_cast<uint8_t *>(
303         aom_memalign(kDataAlignment, kOutputBufferSize));
304 #if CONFIG_HIGHBITDEPTH
305     input16_ = reinterpret_cast<uint16_t *>(aom_memalign(
306                    kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
307                1;
308     output16_ = reinterpret_cast<uint16_t *>(
309         aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
310     output16_ref_ = reinterpret_cast<uint16_t *>(
311         aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
312 #endif
313   }
314 
TearDown()315   virtual void TearDown() { libaom_test::ClearSystemState(); }
316 
TearDownTestCase()317   static void TearDownTestCase() {
318     aom_free(input_ - 1);
319     input_ = NULL;
320     aom_free(output_);
321     output_ = NULL;
322     aom_free(output_ref_);
323     output_ref_ = NULL;
324 #if CONFIG_HIGHBITDEPTH
325     aom_free(input16_ - 1);
326     input16_ = NULL;
327     aom_free(output16_);
328     output16_ = NULL;
329     aom_free(output16_ref_);
330     output16_ref_ = NULL;
331 #endif
332   }
333 
334  protected:
335   static const int kDataAlignment = 16;
336   static const int kOuterBlockSize = 4 * kMaxDimension;
337   static const int kInputStride = kOuterBlockSize;
338   static const int kOutputStride = kOuterBlockSize;
339   static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
340   static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
341 
Width() const342   int Width() const { return GET_PARAM(0); }
Height() const343   int Height() const { return GET_PARAM(1); }
BorderLeft() const344   int BorderLeft() const {
345     const int center = (kOuterBlockSize - Width()) / 2;
346     return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
347   }
BorderTop() const348   int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
349 
IsIndexInBorder(int i)350   bool IsIndexInBorder(int i) {
351     return (i < BorderTop() * kOuterBlockSize ||
352             i >= (BorderTop() + Height()) * kOuterBlockSize ||
353             i % kOuterBlockSize < BorderLeft() ||
354             i % kOuterBlockSize >= (BorderLeft() + Width()));
355   }
356 
SetUp()357   virtual void SetUp() {
358     UUT_ = GET_PARAM(2);
359 #if CONFIG_HIGHBITDEPTH
360     if (UUT_->use_highbd_ != 0)
361       mask_ = (1 << UUT_->use_highbd_) - 1;
362     else
363       mask_ = 255;
364 #endif
365     /* Set up guard blocks for an inner block centered in the outer block */
366     for (int i = 0; i < kOutputBufferSize; ++i) {
367       if (IsIndexInBorder(i)) {
368         output_[i] = 255;
369 #if CONFIG_HIGHBITDEPTH
370         output16_[i] = mask_;
371 #endif
372       } else {
373         output_[i] = 0;
374 #if CONFIG_HIGHBITDEPTH
375         output16_[i] = 0;
376 #endif
377       }
378     }
379 
380     ::libaom_test::ACMRandom prng;
381     for (int i = 0; i < kInputBufferSize; ++i) {
382       if (i & 1) {
383         input_[i] = 255;
384 #if CONFIG_HIGHBITDEPTH
385         input16_[i] = mask_;
386 #endif
387       } else {
388         input_[i] = prng.Rand8Extremes();
389 #if CONFIG_HIGHBITDEPTH
390         input16_[i] = prng.Rand16() & mask_;
391 #endif
392       }
393     }
394   }
395 
SetConstantInput(int value)396   void SetConstantInput(int value) {
397     memset(input_, value, kInputBufferSize);
398 #if CONFIG_HIGHBITDEPTH
399     aom_memset16(input16_, value, kInputBufferSize);
400 #endif
401   }
402 
CopyOutputToRef()403   void CopyOutputToRef() {
404     memcpy(output_ref_, output_, kOutputBufferSize);
405 #if CONFIG_HIGHBITDEPTH
406     // Copy 16-bit pixels values. The effective number of bytes is double.
407     memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize);
408 #endif
409   }
410 
CheckGuardBlocks()411   void CheckGuardBlocks() {
412     for (int i = 0; i < kOutputBufferSize; ++i) {
413       if (IsIndexInBorder(i)) {
414         EXPECT_EQ(255, output_[i]);
415       }
416     }
417   }
418 
input() const419   uint8_t *input() const {
420     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
421 #if CONFIG_HIGHBITDEPTH
422     if (UUT_->use_highbd_ == 0) {
423       return input_ + offset;
424     } else {
425       return CONVERT_TO_BYTEPTR(input16_) + offset;
426     }
427 #else
428     return input_ + offset;
429 #endif
430   }
431 
output() const432   uint8_t *output() const {
433     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
434 #if CONFIG_HIGHBITDEPTH
435     if (UUT_->use_highbd_ == 0) {
436       return output_ + offset;
437     } else {
438       return CONVERT_TO_BYTEPTR(output16_) + offset;
439     }
440 #else
441     return output_ + offset;
442 #endif
443   }
444 
output_ref() const445   uint8_t *output_ref() const {
446     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
447 #if CONFIG_HIGHBITDEPTH
448     if (UUT_->use_highbd_ == 0) {
449       return output_ref_ + offset;
450     } else {
451       return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
452     }
453 #else
454     return output_ref_ + offset;
455 #endif
456   }
457 
lookup(uint8_t * list,int index) const458   uint16_t lookup(uint8_t *list, int index) const {
459 #if CONFIG_HIGHBITDEPTH
460     if (UUT_->use_highbd_ == 0) {
461       return list[index];
462     } else {
463       return CONVERT_TO_SHORTPTR(list)[index];
464     }
465 #else
466     return list[index];
467 #endif
468   }
469 
assign_val(uint8_t * list,int index,uint16_t val) const470   void assign_val(uint8_t *list, int index, uint16_t val) const {
471 #if CONFIG_HIGHBITDEPTH
472     if (UUT_->use_highbd_ == 0) {
473       list[index] = (uint8_t)val;
474     } else {
475       CONVERT_TO_SHORTPTR(list)[index] = val;
476     }
477 #else
478     list[index] = (uint8_t)val;
479 #endif
480   }
481 
wrapper_filter_average_block2d_8_c(const uint8_t * src_ptr,unsigned int src_stride,const int16_t * HFilter,const int16_t * VFilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height)482   void wrapper_filter_average_block2d_8_c(
483       const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
484       const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
485       unsigned int output_width, unsigned int output_height) {
486 #if CONFIG_HIGHBITDEPTH
487     if (UUT_->use_highbd_ == 0) {
488       filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
489                                  dst_stride, output_width, output_height);
490     } else {
491       highbd_filter_average_block2d_8_c(
492           CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter,
493           CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
494           UUT_->use_highbd_);
495     }
496 #else
497     filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
498                                dst_stride, output_width, output_height);
499 #endif
500   }
501 
wrapper_filter_block2d_8_c(const uint8_t * src_ptr,unsigned int src_stride,const int16_t * HFilter,const int16_t * VFilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height)502   void wrapper_filter_block2d_8_c(
503       const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
504       const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
505       unsigned int output_width, unsigned int output_height) {
506 #if CONFIG_HIGHBITDEPTH
507     if (UUT_->use_highbd_ == 0) {
508       filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
509                          dst_stride, output_width, output_height);
510     } else {
511       highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
512                                 HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr),
513                                 dst_stride, output_width, output_height,
514                                 UUT_->use_highbd_);
515     }
516 #else
517     filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
518                        dst_stride, output_width, output_height);
519 #endif
520   }
521 
522   const ConvolveFunctions *UUT_;
523   static uint8_t *input_;
524   static uint8_t *output_;
525   static uint8_t *output_ref_;
526 #if CONFIG_HIGHBITDEPTH
527   static uint16_t *input16_;
528   static uint16_t *output16_;
529   static uint16_t *output16_ref_;
530   int mask_;
531 #endif
532 };
533 
534 uint8_t *ConvolveTest::input_ = NULL;
535 uint8_t *ConvolveTest::output_ = NULL;
536 uint8_t *ConvolveTest::output_ref_ = NULL;
537 #if CONFIG_HIGHBITDEPTH
538 uint16_t *ConvolveTest::input16_ = NULL;
539 uint16_t *ConvolveTest::output16_ = NULL;
540 uint16_t *ConvolveTest::output16_ref_ = NULL;
541 #endif
542 
TEST_P(ConvolveTest,GuardBlocks)543 TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
544 
TEST_P(ConvolveTest,Copy)545 TEST_P(ConvolveTest, Copy) {
546   uint8_t *const in = input();
547   uint8_t *const out = output();
548 
549   ASM_REGISTER_STATE_CHECK(UUT_->copy_(in, kInputStride, out, kOutputStride,
550                                        NULL, 0, NULL, 0, Width(), Height()));
551 
552   CheckGuardBlocks();
553 
554   for (int y = 0; y < Height(); ++y)
555     for (int x = 0; x < Width(); ++x)
556       ASSERT_EQ(lookup(out, y * kOutputStride + x),
557                 lookup(in, y * kInputStride + x))
558           << "(" << x << "," << y << ")";
559 }
560 
TEST_P(ConvolveTest,Avg)561 TEST_P(ConvolveTest, Avg) {
562   uint8_t *const in = input();
563   uint8_t *const out = output();
564   uint8_t *const out_ref = output_ref();
565   CopyOutputToRef();
566 
567   ASM_REGISTER_STATE_CHECK(UUT_->avg_(in, kInputStride, out, kOutputStride,
568                                       NULL, 0, NULL, 0, Width(), Height()));
569 
570   CheckGuardBlocks();
571 
572   for (int y = 0; y < Height(); ++y)
573     for (int x = 0; x < Width(); ++x)
574       ASSERT_EQ(lookup(out, y * kOutputStride + x),
575                 ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
576                                        lookup(out_ref, y * kOutputStride + x),
577                                    1))
578           << "(" << x << "," << y << ")";
579 }
580 
TEST_P(ConvolveTest,CopyHoriz)581 TEST_P(ConvolveTest, CopyHoriz) {
582   uint8_t *const in = input();
583   uint8_t *const out = output();
584   DECLARE_ALIGNED(256, const int16_t,
585                   filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
586 
587   ASM_REGISTER_STATE_CHECK(UUT_->sh8_(in, kInputStride, out, kOutputStride,
588                                       filter8, 16, filter8, 16, Width(),
589                                       Height()));
590 
591   CheckGuardBlocks();
592 
593   for (int y = 0; y < Height(); ++y)
594     for (int x = 0; x < Width(); ++x)
595       ASSERT_EQ(lookup(out, y * kOutputStride + x),
596                 lookup(in, y * kInputStride + x))
597           << "(" << x << "," << y << ")";
598 }
599 
TEST_P(ConvolveTest,CopyVert)600 TEST_P(ConvolveTest, CopyVert) {
601   uint8_t *const in = input();
602   uint8_t *const out = output();
603   DECLARE_ALIGNED(256, const int16_t,
604                   filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
605 
606   ASM_REGISTER_STATE_CHECK(UUT_->sv8_(in, kInputStride, out, kOutputStride,
607                                       filter8, 16, filter8, 16, Width(),
608                                       Height()));
609 
610   CheckGuardBlocks();
611 
612   for (int y = 0; y < Height(); ++y)
613     for (int x = 0; x < Width(); ++x)
614       ASSERT_EQ(lookup(out, y * kOutputStride + x),
615                 lookup(in, y * kInputStride + x))
616           << "(" << x << "," << y << ")";
617 }
618 
TEST_P(ConvolveTest,Copy2D)619 TEST_P(ConvolveTest, Copy2D) {
620   uint8_t *const in = input();
621   uint8_t *const out = output();
622   DECLARE_ALIGNED(256, const int16_t,
623                   filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
624 
625   ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
626                                        filter8, 16, filter8, 16, Width(),
627                                        Height()));
628 
629   CheckGuardBlocks();
630 
631   for (int y = 0; y < Height(); ++y)
632     for (int x = 0; x < Width(); ++x)
633       ASSERT_EQ(lookup(out, y * kOutputStride + x),
634                 lookup(in, y * kInputStride + x))
635           << "(" << x << "," << y << ")";
636 }
637 
638 const int kNumFilterBanks = SWITCHABLE_FILTERS;
639 const int kNumFilters = 16;
640 
TEST(ConvolveTest,FiltersWontSaturateWhenAddedPairwise)641 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
642   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
643     const InterpFilter filter = (InterpFilter)filter_bank;
644     const InterpKernel *filters =
645         (const InterpKernel *)av1_get_interp_filter_kernel(filter);
646 #if CONFIG_DUAL_FILTER
647     const InterpFilterParams filter_params =
648         av1_get_interp_filter_params(filter);
649     if (filter_params.taps != SUBPEL_TAPS) continue;
650 #endif
651     for (int i = 0; i < kNumFilters; i++) {
652       const int p0 = filters[i][0] + filters[i][1];
653       const int p1 = filters[i][2] + filters[i][3];
654       const int p2 = filters[i][4] + filters[i][5];
655       const int p3 = filters[i][6] + filters[i][7];
656       EXPECT_LE(p0, 128);
657       EXPECT_LE(p1, 128);
658       EXPECT_LE(p2, 128);
659       EXPECT_LE(p3, 128);
660       EXPECT_LE(p0 + p3, 128);
661       EXPECT_LE(p0 + p3 + p1, 128);
662       EXPECT_LE(p0 + p3 + p1 + p2, 128);
663       EXPECT_EQ(p0 + p1 + p2 + p3, 128);
664     }
665   }
666 }
667 
668 const int16_t kInvalidFilter[8] = { 0 };
669 
TEST_P(ConvolveTest,MatchesReferenceSubpixelFilter)670 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
671   uint8_t *const in = input();
672   uint8_t *const out = output();
673 #if CONFIG_HIGHBITDEPTH
674   uint8_t ref8[kOutputStride * kMaxDimension];
675   uint16_t ref16[kOutputStride * kMaxDimension];
676   uint8_t *ref;
677   if (UUT_->use_highbd_ == 0) {
678     ref = ref8;
679   } else {
680     ref = CONVERT_TO_BYTEPTR(ref16);
681   }
682 #else
683   uint8_t ref[kOutputStride * kMaxDimension];
684 #endif
685 
686   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
687     const InterpFilter filter = (InterpFilter)filter_bank;
688     const InterpKernel *filters =
689         (const InterpKernel *)av1_get_interp_filter_kernel(filter);
690 #if CONFIG_DUAL_FILTER
691     const InterpFilterParams filter_params =
692         av1_get_interp_filter_params(filter);
693     if (filter_params.taps != SUBPEL_TAPS) continue;
694 #endif
695 
696     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
697       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
698         wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
699                                    filters[filter_y], ref, kOutputStride,
700                                    Width(), Height());
701 
702         if (filter_x && filter_y)
703           ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
704               in, kInputStride, out, kOutputStride, filters[filter_x], 16,
705               filters[filter_y], 16, Width(), Height()));
706         else if (filter_y)
707           ASM_REGISTER_STATE_CHECK(
708               UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
709                         16, filters[filter_y], 16, Width(), Height()));
710         else if (filter_x)
711           ASM_REGISTER_STATE_CHECK(
712               UUT_->h8_(in, kInputStride, out, kOutputStride, filters[filter_x],
713                         16, kInvalidFilter, 16, Width(), Height()));
714         else
715           ASM_REGISTER_STATE_CHECK(
716               UUT_->copy_(in, kInputStride, out, kOutputStride, kInvalidFilter,
717                           0, kInvalidFilter, 0, Width(), Height()));
718 
719         CheckGuardBlocks();
720 
721         for (int y = 0; y < Height(); ++y)
722           for (int x = 0; x < Width(); ++x)
723             ASSERT_EQ(lookup(ref, y * kOutputStride + x),
724                       lookup(out, y * kOutputStride + x))
725                 << "mismatch at (" << x << "," << y << "), "
726                 << "filters (" << filter_bank << "," << filter_x << ","
727                 << filter_y << ")";
728       }
729     }
730   }
731 }
732 
TEST_P(ConvolveTest,MatchesReferenceAveragingSubpixelFilter)733 TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
734   uint8_t *const in = input();
735   uint8_t *const out = output();
736 #if CONFIG_HIGHBITDEPTH
737   uint8_t ref8[kOutputStride * kMaxDimension];
738   uint16_t ref16[kOutputStride * kMaxDimension];
739   uint8_t *ref;
740   if (UUT_->use_highbd_ == 0) {
741     ref = ref8;
742   } else {
743     ref = CONVERT_TO_BYTEPTR(ref16);
744   }
745 #else
746   uint8_t ref[kOutputStride * kMaxDimension];
747 #endif
748 
749   // Populate ref and out with some random data
750   ::libaom_test::ACMRandom prng;
751   for (int y = 0; y < Height(); ++y) {
752     for (int x = 0; x < Width(); ++x) {
753       uint16_t r;
754 #if CONFIG_HIGHBITDEPTH
755       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
756         r = prng.Rand8Extremes();
757       } else {
758         r = prng.Rand16() & mask_;
759       }
760 #else
761       r = prng.Rand8Extremes();
762 #endif
763 
764       assign_val(out, y * kOutputStride + x, r);
765       assign_val(ref, y * kOutputStride + x, r);
766     }
767   }
768 
769   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
770     const InterpFilter filter = (InterpFilter)filter_bank;
771     const InterpKernel *filters =
772         (const InterpKernel *)av1_get_interp_filter_kernel(filter);
773 #if CONFIG_DUAL_FILTER
774     const InterpFilterParams filter_params =
775         av1_get_interp_filter_params(filter);
776     if (filter_params.taps != SUBPEL_TAPS) continue;
777 #endif
778 
779     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
780       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
781         wrapper_filter_average_block2d_8_c(in, kInputStride, filters[filter_x],
782                                            filters[filter_y], ref,
783                                            kOutputStride, Width(), Height());
784 
785         if (filter_x && filter_y)
786           ASM_REGISTER_STATE_CHECK(UUT_->hv8_avg_(
787               in, kInputStride, out, kOutputStride, filters[filter_x], 16,
788               filters[filter_y], 16, Width(), Height()));
789         else if (filter_y)
790           ASM_REGISTER_STATE_CHECK(UUT_->v8_avg_(
791               in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
792               filters[filter_y], 16, Width(), Height()));
793         else if (filter_x)
794           ASM_REGISTER_STATE_CHECK(UUT_->h8_avg_(
795               in, kInputStride, out, kOutputStride, filters[filter_x], 16,
796               kInvalidFilter, 16, Width(), Height()));
797         else
798           ASM_REGISTER_STATE_CHECK(
799               UUT_->avg_(in, kInputStride, out, kOutputStride, kInvalidFilter,
800                          0, kInvalidFilter, 0, Width(), Height()));
801 
802         CheckGuardBlocks();
803 
804         for (int y = 0; y < Height(); ++y)
805           for (int x = 0; x < Width(); ++x)
806             ASSERT_EQ(lookup(ref, y * kOutputStride + x),
807                       lookup(out, y * kOutputStride + x))
808                 << "mismatch at (" << x << "," << y << "), "
809                 << "filters (" << filter_bank << "," << filter_x << ","
810                 << filter_y << ")";
811       }
812     }
813   }
814 }
815 
TEST_P(ConvolveTest,FilterExtremes)816 TEST_P(ConvolveTest, FilterExtremes) {
817   uint8_t *const in = input();
818   uint8_t *const out = output();
819 #if CONFIG_HIGHBITDEPTH
820   uint8_t ref8[kOutputStride * kMaxDimension];
821   uint16_t ref16[kOutputStride * kMaxDimension];
822   uint8_t *ref;
823   if (UUT_->use_highbd_ == 0) {
824     ref = ref8;
825   } else {
826     ref = CONVERT_TO_BYTEPTR(ref16);
827   }
828 #else
829   uint8_t ref[kOutputStride * kMaxDimension];
830 #endif
831 
832   // Populate ref and out with some random data
833   ::libaom_test::ACMRandom prng;
834   for (int y = 0; y < Height(); ++y) {
835     for (int x = 0; x < Width(); ++x) {
836       uint16_t r;
837 #if CONFIG_HIGHBITDEPTH
838       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
839         r = prng.Rand8Extremes();
840       } else {
841         r = prng.Rand16() & mask_;
842       }
843 #else
844       r = prng.Rand8Extremes();
845 #endif
846       assign_val(out, y * kOutputStride + x, r);
847       assign_val(ref, y * kOutputStride + x, r);
848     }
849   }
850 
851   for (int axis = 0; axis < 2; axis++) {
852     int seed_val = 0;
853     while (seed_val < 256) {
854       for (int y = 0; y < 8; ++y) {
855         for (int x = 0; x < 8; ++x) {
856 #if CONFIG_HIGHBITDEPTH
857           assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
858                      ((seed_val >> (axis ? y : x)) & 1) * mask_);
859 #else
860           assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
861                      ((seed_val >> (axis ? y : x)) & 1) * 255);
862 #endif
863           if (axis) seed_val++;
864         }
865         if (axis)
866           seed_val -= 8;
867         else
868           seed_val++;
869       }
870       if (axis) seed_val += 8;
871 
872       for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
873         const InterpFilter filter = (InterpFilter)filter_bank;
874         const InterpKernel *filters =
875             (const InterpKernel *)av1_get_interp_filter_kernel(filter);
876 #if CONFIG_DUAL_FILTER
877         const InterpFilterParams filter_params =
878             av1_get_interp_filter_params(filter);
879         if (filter_params.taps != SUBPEL_TAPS) continue;
880 #endif
881         for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
882           for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
883             wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
884                                        filters[filter_y], ref, kOutputStride,
885                                        Width(), Height());
886             if (filter_x && filter_y)
887               ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
888                   in, kInputStride, out, kOutputStride, filters[filter_x], 16,
889                   filters[filter_y], 16, Width(), Height()));
890             else if (filter_y)
891               ASM_REGISTER_STATE_CHECK(UUT_->v8_(
892                   in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
893                   filters[filter_y], 16, Width(), Height()));
894             else if (filter_x)
895               ASM_REGISTER_STATE_CHECK(UUT_->h8_(
896                   in, kInputStride, out, kOutputStride, filters[filter_x], 16,
897                   kInvalidFilter, 16, Width(), Height()));
898             else
899               ASM_REGISTER_STATE_CHECK(UUT_->copy_(
900                   in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
901                   kInvalidFilter, 0, Width(), Height()));
902 
903             for (int y = 0; y < Height(); ++y)
904               for (int x = 0; x < Width(); ++x)
905                 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
906                           lookup(out, y * kOutputStride + x))
907                     << "mismatch at (" << x << "," << y << "), "
908                     << "filters (" << filter_bank << "," << filter_x << ","
909                     << filter_y << ")";
910           }
911         }
912       }
913     }
914   }
915 }
916 
917 /* This test exercises that enough rows and columns are filtered with every
918    possible initial fractional positions and scaling steps. */
TEST_P(ConvolveTest,CheckScalingFiltering)919 TEST_P(ConvolveTest, CheckScalingFiltering) {
920   uint8_t *const in = input();
921   uint8_t *const out = output();
922   const InterpKernel *const eighttap =
923       (const InterpKernel *)av1_get_interp_filter_kernel(EIGHTTAP_REGULAR);
924 
925   SetConstantInput(127);
926 
927   for (int frac = 0; frac < 16; ++frac) {
928     for (int step = 1; step <= 32; ++step) {
929       /* Test the horizontal and vertical filters in combination. */
930       ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
931                                            eighttap[frac], step, eighttap[frac],
932                                            step, Width(), Height()));
933 
934       CheckGuardBlocks();
935 
936       for (int y = 0; y < Height(); ++y) {
937         for (int x = 0; x < Width(); ++x) {
938           ASSERT_EQ(lookup(in, y * kInputStride + x),
939                     lookup(out, y * kOutputStride + x))
940               << "x == " << x << ", y == " << y << ", frac == " << frac
941               << ", step == " << step;
942         }
943       }
944     }
945   }
946 }
947 
TEST_P(ConvolveTest,DISABLED_Copy_Speed)948 TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
949   const uint8_t *const in = input();
950   uint8_t *const out = output();
951   const int kNumTests = 5000000;
952   const int width = Width();
953   const int height = Height();
954   aom_usec_timer timer;
955 
956   aom_usec_timer_start(&timer);
957   for (int n = 0; n < kNumTests; ++n) {
958     UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width,
959                 height);
960   }
961   aom_usec_timer_mark(&timer);
962 
963   const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
964   printf("convolve_copy_%dx%d_%d: %d us\n", width, height,
965          UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
966 }
967 
TEST_P(ConvolveTest,DISABLED_Avg_Speed)968 TEST_P(ConvolveTest, DISABLED_Avg_Speed) {
969   const uint8_t *const in = input();
970   uint8_t *const out = output();
971   const int kNumTests = 5000000;
972   const int width = Width();
973   const int height = Height();
974   aom_usec_timer timer;
975 
976   aom_usec_timer_start(&timer);
977   for (int n = 0; n < kNumTests; ++n) {
978     UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width,
979                height);
980   }
981   aom_usec_timer_mark(&timer);
982 
983   const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
984   printf("convolve_avg_%dx%d_%d: %d us\n", width, height,
985          UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
986 }
987 
TEST_P(ConvolveTest,DISABLED_Speed)988 TEST_P(ConvolveTest, DISABLED_Speed) {
989   uint8_t *const in = input();
990   uint8_t *const out = output();
991 #if CONFIG_HIGHBITDEPTH
992   uint8_t ref8[kOutputStride * kMaxDimension];
993   uint16_t ref16[kOutputStride * kMaxDimension];
994   uint8_t *ref;
995   if (UUT_->use_highbd_ == 0) {
996     ref = ref8;
997   } else {
998     ref = CONVERT_TO_BYTEPTR(ref16);
999   }
1000 #else
1001   uint8_t ref[kOutputStride * kMaxDimension];
1002 #endif
1003 
1004   // Populate ref and out with some random data
1005   ::libaom_test::ACMRandom prng;
1006   for (int y = 0; y < Height(); ++y) {
1007     for (int x = 0; x < Width(); ++x) {
1008       uint16_t r;
1009 #if CONFIG_HIGHBITDEPTH
1010       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
1011         r = prng.Rand8Extremes();
1012       } else {
1013         r = prng.Rand16() & mask_;
1014       }
1015 #else
1016       r = prng.Rand8Extremes();
1017 #endif
1018 
1019       assign_val(out, y * kOutputStride + x, r);
1020       assign_val(ref, y * kOutputStride + x, r);
1021     }
1022   }
1023 
1024   const InterpFilter filter = (InterpFilter)1;
1025   const InterpKernel *filters =
1026       (const InterpKernel *)av1_get_interp_filter_kernel(filter);
1027   wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1],
1028                                      out, kOutputStride, Width(), Height());
1029 
1030   aom_usec_timer timer;
1031   int tests_num = 1000;
1032 
1033   aom_usec_timer_start(&timer);
1034   while (tests_num > 0) {
1035     for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
1036       const InterpFilter filter = (InterpFilter)filter_bank;
1037       const InterpKernel *filters =
1038           (const InterpKernel *)av1_get_interp_filter_kernel(filter);
1039 #if CONFIG_DUAL_FILTER
1040       const InterpFilterParams filter_params =
1041           av1_get_interp_filter_params(filter);
1042       if (filter_params.taps != SUBPEL_TAPS) continue;
1043 #endif
1044 
1045       for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
1046         for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
1047           if (filter_x && filter_y)
1048             ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
1049                 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
1050                 filters[filter_y], 16, Width(), Height()));
1051           if (filter_y)
1052             ASM_REGISTER_STATE_CHECK(
1053                 UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
1054                           16, filters[filter_y], 16, Width(), Height()));
1055           else if (filter_x)
1056             ASM_REGISTER_STATE_CHECK(UUT_->h8_(
1057                 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
1058                 kInvalidFilter, 16, Width(), Height()));
1059         }
1060       }
1061     }
1062     tests_num--;
1063   }
1064   aom_usec_timer_mark(&timer);
1065 
1066   const int elapsed_time =
1067       static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
1068   printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(),
1069          UUT_->use_highbd_, elapsed_time);
1070 }
1071 
1072 using std::tr1::make_tuple;
1073 
1074 #if CONFIG_HIGHBITDEPTH
1075 #define WRAP(func, bd)                                                       \
1076   void wrap_##func##_##bd(                                                   \
1077       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
1078       ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride,    \
1079       const int16_t *filter_y, int filter_y_stride, int w, int h) {          \
1080     aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x,            \
1081                       filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
1082   }
1083 #if HAVE_SSE2 && ARCH_X86_64
1084 WRAP(convolve_copy_sse2, 8)
1085 WRAP(convolve_avg_sse2, 8)
1086 WRAP(convolve_copy_sse2, 10)
1087 WRAP(convolve_avg_sse2, 10)
1088 WRAP(convolve_copy_sse2, 12)
1089 WRAP(convolve_avg_sse2, 12)
1090 WRAP(convolve8_horiz_sse2, 8)
1091 WRAP(convolve8_avg_horiz_sse2, 8)
1092 WRAP(convolve8_vert_sse2, 8)
1093 WRAP(convolve8_avg_vert_sse2, 8)
1094 WRAP(convolve8_sse2, 8)
1095 WRAP(convolve8_avg_sse2, 8)
1096 WRAP(convolve8_horiz_sse2, 10)
1097 WRAP(convolve8_avg_horiz_sse2, 10)
1098 WRAP(convolve8_vert_sse2, 10)
1099 WRAP(convolve8_avg_vert_sse2, 10)
1100 WRAP(convolve8_sse2, 10)
1101 WRAP(convolve8_avg_sse2, 10)
1102 WRAP(convolve8_horiz_sse2, 12)
1103 WRAP(convolve8_avg_horiz_sse2, 12)
1104 WRAP(convolve8_vert_sse2, 12)
1105 WRAP(convolve8_avg_vert_sse2, 12)
1106 WRAP(convolve8_sse2, 12)
1107 WRAP(convolve8_avg_sse2, 12)
1108 #endif  // HAVE_SSE2 && ARCH_X86_64
1109 
1110 WRAP(convolve_copy_c, 8)
1111 WRAP(convolve_avg_c, 8)
1112 WRAP(convolve8_horiz_c, 8)
1113 WRAP(convolve8_avg_horiz_c, 8)
1114 WRAP(convolve8_vert_c, 8)
1115 WRAP(convolve8_avg_vert_c, 8)
1116 WRAP(convolve8_c, 8)
1117 WRAP(convolve8_avg_c, 8)
1118 WRAP(convolve_copy_c, 10)
1119 WRAP(convolve_avg_c, 10)
1120 WRAP(convolve8_horiz_c, 10)
1121 WRAP(convolve8_avg_horiz_c, 10)
1122 WRAP(convolve8_vert_c, 10)
1123 WRAP(convolve8_avg_vert_c, 10)
1124 WRAP(convolve8_c, 10)
1125 WRAP(convolve8_avg_c, 10)
1126 WRAP(convolve_copy_c, 12)
1127 WRAP(convolve_avg_c, 12)
1128 WRAP(convolve8_horiz_c, 12)
1129 WRAP(convolve8_avg_horiz_c, 12)
1130 WRAP(convolve8_vert_c, 12)
1131 WRAP(convolve8_avg_vert_c, 12)
1132 WRAP(convolve8_c, 12)
1133 WRAP(convolve8_avg_c, 12)
1134 
1135 #if HAVE_AVX2
1136 WRAP(convolve_copy_avx2, 8)
1137 WRAP(convolve_avg_avx2, 8)
1138 WRAP(convolve8_horiz_avx2, 8)
1139 WRAP(convolve8_avg_horiz_avx2, 8)
1140 WRAP(convolve8_vert_avx2, 8)
1141 WRAP(convolve8_avg_vert_avx2, 8)
1142 WRAP(convolve8_avx2, 8)
1143 WRAP(convolve8_avg_avx2, 8)
1144 
1145 WRAP(convolve_copy_avx2, 10)
1146 WRAP(convolve_avg_avx2, 10)
1147 WRAP(convolve8_avx2, 10)
1148 WRAP(convolve8_horiz_avx2, 10)
1149 WRAP(convolve8_vert_avx2, 10)
1150 WRAP(convolve8_avg_avx2, 10)
1151 WRAP(convolve8_avg_horiz_avx2, 10)
1152 WRAP(convolve8_avg_vert_avx2, 10)
1153 
1154 WRAP(convolve_copy_avx2, 12)
1155 WRAP(convolve_avg_avx2, 12)
1156 WRAP(convolve8_avx2, 12)
1157 WRAP(convolve8_horiz_avx2, 12)
1158 WRAP(convolve8_vert_avx2, 12)
1159 WRAP(convolve8_avg_avx2, 12)
1160 WRAP(convolve8_avg_horiz_avx2, 12)
1161 WRAP(convolve8_avg_vert_avx2, 12)
1162 #endif  // HAVE_AVX2
1163 
1164 #undef WRAP
1165 
1166 const ConvolveFunctions convolve8_c(
1167     wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, wrap_convolve8_horiz_c_8,
1168     wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
1169     wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
1170     wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1171     wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8,
1172     wrap_convolve8_avg_c_8, 8);
1173 const ConvolveFunctions convolve10_c(
1174     wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10,
1175     wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10,
1176     wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
1177     wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1178     wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
1179     wrap_convolve8_avg_c_10, 10);
1180 const ConvolveFunctions convolve12_c(
1181     wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12,
1182     wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12,
1183     wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
1184     wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1185     wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
1186     wrap_convolve8_avg_c_12, 12);
1187 const ConvolveParam kArrayConvolve_c[] = {
1188   ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c)
1189 };
1190 
1191 #else
1192 const ConvolveFunctions convolve8_c(
1193     aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_c,
1194     aom_convolve8_avg_horiz_c, aom_convolve8_vert_c, aom_convolve8_avg_vert_c,
1195     aom_convolve8_c, aom_convolve8_avg_c, aom_scaled_horiz_c,
1196     aom_scaled_avg_horiz_c, aom_scaled_vert_c, aom_scaled_avg_vert_c,
1197     aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1198 const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
1199 #endif
1200 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c));
1201 
1202 #if HAVE_SSE2 && ARCH_X86_64
1203 #if CONFIG_HIGHBITDEPTH
1204 const ConvolveFunctions convolve8_sse2(
1205     wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
1206     wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1207     wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1208     wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
1209     wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1210     wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1211     wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
1212 const ConvolveFunctions convolve10_sse2(
1213     wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10,
1214     wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1215     wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1216     wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
1217     wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1218     wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1219     wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
1220 const ConvolveFunctions convolve12_sse2(
1221     wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12,
1222     wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1223     wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1224     wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
1225     wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1226     wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1227     wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
1228 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2),
1229                                               ALL_SIZES(convolve10_sse2),
1230                                               ALL_SIZES(convolve12_sse2) };
1231 #else
1232 const ConvolveFunctions convolve8_sse2(
1233     aom_convolve_copy_sse2, aom_convolve_avg_sse2, aom_convolve8_horiz_sse2,
1234     aom_convolve8_avg_horiz_sse2, aom_convolve8_vert_sse2,
1235     aom_convolve8_avg_vert_sse2, aom_convolve8_sse2, aom_convolve8_avg_sse2,
1236     aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1237     aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1238 
1239 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
1240 #endif  // CONFIG_HIGHBITDEPTH
1241 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
1242                         ::testing::ValuesIn(kArrayConvolve_sse2));
1243 #endif
1244 
1245 #if HAVE_SSSE3
1246 const ConvolveFunctions convolve8_ssse3(
1247     aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_ssse3,
1248     aom_convolve8_avg_horiz_ssse3, aom_convolve8_vert_ssse3,
1249     aom_convolve8_avg_vert_ssse3, aom_convolve8_ssse3, aom_convolve8_avg_ssse3,
1250     aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1251     aom_scaled_avg_vert_c, aom_scaled_2d_ssse3, aom_scaled_avg_2d_c, 0);
1252 
1253 const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
1254 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
1255                         ::testing::ValuesIn(kArrayConvolve8_ssse3));
1256 #endif
1257 
1258 #if HAVE_AVX2
1259 #if CONFIG_HIGHBITDEPTH
1260 const ConvolveFunctions convolve8_avx2(
1261     wrap_convolve_copy_avx2_8, wrap_convolve_avg_avx2_8,
1262     wrap_convolve8_horiz_avx2_8, wrap_convolve8_avg_horiz_avx2_8,
1263     wrap_convolve8_vert_avx2_8, wrap_convolve8_avg_vert_avx2_8,
1264     wrap_convolve8_avx2_8, wrap_convolve8_avg_avx2_8, wrap_convolve8_horiz_c_8,
1265     wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
1266     wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
1267 const ConvolveFunctions convolve10_avx2(
1268     wrap_convolve_copy_avx2_10, wrap_convolve_avg_avx2_10,
1269     wrap_convolve8_horiz_avx2_10, wrap_convolve8_avg_horiz_avx2_10,
1270     wrap_convolve8_vert_avx2_10, wrap_convolve8_avg_vert_avx2_10,
1271     wrap_convolve8_avx2_10, wrap_convolve8_avg_avx2_10,
1272     wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1273     wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
1274     wrap_convolve8_avg_c_10, 10);
1275 const ConvolveFunctions convolve12_avx2(
1276     wrap_convolve_copy_avx2_12, wrap_convolve_avg_avx2_12,
1277     wrap_convolve8_horiz_avx2_12, wrap_convolve8_avg_horiz_avx2_12,
1278     wrap_convolve8_vert_avx2_12, wrap_convolve8_avg_vert_avx2_12,
1279     wrap_convolve8_avx2_12, wrap_convolve8_avg_avx2_12,
1280     wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1281     wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
1282     wrap_convolve8_avg_c_12, 12);
1283 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES_64(convolve8_avx2),
1284                                                ALL_SIZES_64(convolve10_avx2),
1285                                                ALL_SIZES_64(convolve12_avx2) };
1286 #else
1287 const ConvolveFunctions convolve8_avx2(
1288     aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_avx2,
1289     aom_convolve8_avg_horiz_ssse3, aom_convolve8_vert_avx2,
1290     aom_convolve8_avg_vert_ssse3, aom_convolve8_avx2, aom_convolve8_avg_ssse3,
1291     aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1292     aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1293 
1294 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES_64(convolve8_avx2) };
1295 #endif  // CONFIG_HIGHBITDEPTH
1296 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
1297                         ::testing::ValuesIn(kArrayConvolve8_avx2));
1298 #endif  // HAVE_AVX2
1299 
1300 // TODO(any): Make NEON versions support 128x128 128x64 64x128 block sizes
1301 #if HAVE_NEON && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
1302 #if HAVE_NEON_ASM
1303 const ConvolveFunctions convolve8_neon(
1304     aom_convolve_copy_neon, aom_convolve_avg_neon, aom_convolve8_horiz_neon,
1305     aom_convolve8_avg_horiz_neon, aom_convolve8_vert_neon,
1306     aom_convolve8_avg_vert_neon, aom_convolve8_neon, aom_convolve8_avg_neon,
1307     aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1308     aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1309 #else   // HAVE_NEON
1310 const ConvolveFunctions convolve8_neon(
1311     aom_convolve_copy_neon, aom_convolve_avg_neon, aom_convolve8_horiz_neon,
1312     aom_convolve8_avg_horiz_neon, aom_convolve8_vert_neon,
1313     aom_convolve8_avg_vert_neon, aom_convolve8_neon, aom_convolve8_avg_neon,
1314     aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1315     aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1316 #endif  // HAVE_NEON_ASM
1317 
1318 const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES_64(convolve8_neon) };
1319 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
1320                         ::testing::ValuesIn(kArrayConvolve8_neon));
1321 #endif  // HAVE_NEON && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
1322 
1323 // TODO(any): Make DSPR2 versions support 128x128 128x64 64x128 block sizes
1324 #if HAVE_DSPR2 && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
1325 const ConvolveFunctions convolve8_dspr2(
1326     aom_convolve_copy_dspr2, aom_convolve_avg_dspr2, aom_convolve8_horiz_dspr2,
1327     aom_convolve8_avg_horiz_dspr2, aom_convolve8_vert_dspr2,
1328     aom_convolve8_avg_vert_dspr2, aom_convolve8_dspr2, aom_convolve8_avg_dspr2,
1329     aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1330     aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1331 
1332 const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES_64(convolve8_dspr2) };
1333 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest,
1334                         ::testing::ValuesIn(kArrayConvolve8_dspr2));
1335 #endif  // HAVE_DSPR2 && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
1336 
1337 // TODO(any): Make MSA versions support 128x128 128x64 64x128 block sizes
1338 #if HAVE_MSA && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
1339 const ConvolveFunctions convolve8_msa(
1340     aom_convolve_copy_msa, aom_convolve_avg_msa, aom_convolve8_horiz_msa,
1341     aom_convolve8_avg_horiz_msa, aom_convolve8_vert_msa,
1342     aom_convolve8_avg_vert_msa, aom_convolve8_msa, aom_convolve8_avg_msa,
1343     aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
1344     aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
1345 
1346 const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES_64(convolve8_msa) };
1347 INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest,
1348                         ::testing::ValuesIn(kArrayConvolve8_msa));
1349 #endif  // HAVE_MSA && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
1350 }  // namespace
1351