1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <algorithm>
13 #include <vector>
14
15 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
16
17 #include "./av1_rtcd.h"
18 #include "./aom_dsp_rtcd.h"
19 #include "aom_dsp/aom_dsp_common.h"
20 #include "aom_ports/mem.h"
21 #include "av1/common/filter.h"
22 #include "av1/common/convolve.h"
23 #include "test/acm_random.h"
24 #include "test/util.h"
25
26 using libaom_test::ACMRandom;
27
28 namespace {
29 using std::tr1::tuple;
filter_block1d_horiz_c(const uint8_t * src_ptr,int src_stride,const int16_t * filter,int tap,uint8_t * dst_ptr,int dst_stride,int w,int h)30 static void filter_block1d_horiz_c(const uint8_t *src_ptr, int src_stride,
31 const int16_t *filter, int tap,
32 uint8_t *dst_ptr, int dst_stride, int w,
33 int h) {
34 src_ptr -= tap / 2 - 1;
35 for (int r = 0; r < h; ++r) {
36 for (int c = 0; c < w; ++c) {
37 int sum = 0;
38 for (int i = 0; i < tap; ++i) {
39 sum += src_ptr[c + i] * filter[i];
40 }
41 dst_ptr[c] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
42 }
43 src_ptr += src_stride;
44 dst_ptr += dst_stride;
45 }
46 }
47
filter_block1d_vert_c(const uint8_t * src_ptr,int src_stride,const int16_t * filter,int tap,uint8_t * dst_ptr,int dst_stride,int w,int h)48 static void filter_block1d_vert_c(const uint8_t *src_ptr, int src_stride,
49 const int16_t *filter, int tap,
50 uint8_t *dst_ptr, int dst_stride, int w,
51 int h) {
52 src_ptr -= (tap / 2 - 1) * src_stride;
53 for (int r = 0; r < h; ++r) {
54 for (int c = 0; c < w; ++c) {
55 int sum = 0;
56 for (int i = 0; i < tap; ++i) {
57 sum += src_ptr[c + i * src_stride] * filter[i];
58 }
59 dst_ptr[c] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
60 }
61 src_ptr += src_stride;
62 dst_ptr += dst_stride;
63 }
64 }
65
match(const uint8_t * out,int out_stride,const uint8_t * ref_out,int ref_out_stride,int w,int h)66 static int match(const uint8_t *out, int out_stride, const uint8_t *ref_out,
67 int ref_out_stride, int w, int h) {
68 for (int r = 0; r < h; ++r) {
69 for (int c = 0; c < w; ++c) {
70 if (out[r * out_stride + c] != ref_out[r * ref_out_stride + c]) return 0;
71 }
72 }
73 return 1;
74 }
75
76 typedef void (*ConvolveFunc)(const uint8_t *src, int src_stride, uint8_t *dst,
77 int dst_stride, int w, int h,
78 const InterpFilterParams filter_params,
79 const int subpel_q4, int step_q4,
80 ConvolveParams *conv_params);
81
82 struct ConvolveFunctions {
ConvolveFunctions__anond82d75de0111::ConvolveFunctions83 ConvolveFunctions(ConvolveFunc hf, ConvolveFunc vf) : hf_(hf), vf_(vf) {}
84 ConvolveFunc hf_;
85 ConvolveFunc vf_;
86 };
87
88 typedef tuple<ConvolveFunctions *, InterpFilter /*filter_x*/,
89 InterpFilter /*filter_y*/>
90 ConvolveParam;
91
92 class Av1ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
93 public:
SetUp()94 virtual void SetUp() {
95 rnd_(ACMRandom::DeterministicSeed());
96 cfs_ = GET_PARAM(0);
97 interp_filter_ls_[0] = GET_PARAM(2);
98 interp_filter_ls_[2] = interp_filter_ls_[0];
99 interp_filter_ls_[1] = GET_PARAM(1);
100 interp_filter_ls_[3] = interp_filter_ls_[1];
101 }
TearDown()102 virtual void TearDown() {
103 while (buf_ls_.size() > 0) {
104 uint8_t *buf = buf_ls_.back();
105 aom_free(buf);
106 buf_ls_.pop_back();
107 }
108 }
add_input(int w,int h,int * stride)109 virtual uint8_t *add_input(int w, int h, int *stride) {
110 uint8_t *buf =
111 reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, kBufferSize));
112 buf_ls_.push_back(buf);
113 *stride = w + MAX_FILTER_TAP - 1;
114 int offset = MAX_FILTER_TAP / 2 - 1;
115 for (int r = 0; r < h + MAX_FILTER_TAP - 1; ++r) {
116 for (int c = 0; c < w + MAX_FILTER_TAP - 1; ++c) {
117 buf[r * (*stride) + c] = rnd_.Rand8();
118 }
119 }
120 return buf + offset * (*stride) + offset;
121 }
add_output(int w,int,int * stride)122 virtual uint8_t *add_output(int w, int /*h*/, int *stride) {
123 uint8_t *buf =
124 reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, kBufferSize));
125 buf_ls_.push_back(buf);
126 *stride = w;
127 return buf;
128 }
random_init_buf(uint8_t * buf,int w,int h,int stride)129 virtual void random_init_buf(uint8_t *buf, int w, int h, int stride) {
130 for (int r = 0; r < h; ++r) {
131 for (int c = 0; c < w; ++c) {
132 buf[r * stride + c] = rnd_.Rand8();
133 }
134 }
135 }
136
137 protected:
138 static const int kDataAlignment = 16;
139 static const int kOuterBlockSize = MAX_SB_SIZE + MAX_FILTER_TAP - 1;
140 static const int kBufferSize = kOuterBlockSize * kOuterBlockSize;
141 std::vector<uint8_t *> buf_ls_;
142 InterpFilter interp_filter_ls_[4];
143 ConvolveFunctions *cfs_;
144 ACMRandom rnd_;
145 };
146
147 int bsize_ls[] = { 1, 2, 4, 8, 16, 32, 64, 3, 7, 15, 31, 63 };
148 int bsize_num = NELEMENTS(bsize_ls);
149
TEST_P(Av1ConvolveTest,av1_convolve_vert)150 TEST_P(Av1ConvolveTest, av1_convolve_vert) {
151 const int y_step_q4 = 16;
152 ConvolveParams conv_params = get_conv_params(0, 0, 0);
153
154 int in_stride, out_stride, ref_out_stride, avg_out_stride, ref_avg_out_stride;
155 uint8_t *in = add_input(MAX_SB_SIZE, MAX_SB_SIZE, &in_stride);
156 uint8_t *out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &out_stride);
157 uint8_t *ref_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_out_stride);
158 uint8_t *avg_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &avg_out_stride);
159 uint8_t *ref_avg_out =
160 add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_avg_out_stride);
161 for (int hb_idx = 0; hb_idx < bsize_num; ++hb_idx) {
162 for (int vb_idx = 0; vb_idx < bsize_num; ++vb_idx) {
163 int w = bsize_ls[hb_idx];
164 int h = bsize_ls[vb_idx];
165 for (int subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; ++subpel_y_q4) {
166 InterpFilter filter_y = interp_filter_ls_[0];
167 InterpFilterParams param_vert = av1_get_interp_filter_params(filter_y);
168 const int16_t *filter_vert =
169 av1_get_interp_filter_subpel_kernel(param_vert, subpel_y_q4);
170
171 filter_block1d_vert_c(in, in_stride, filter_vert, param_vert.taps,
172 ref_out, ref_out_stride, w, h);
173
174 conv_params.ref = 0;
175 conv_params.do_average = 0;
176 cfs_->vf_(in, in_stride, out, out_stride, w, h, param_vert, subpel_y_q4,
177 y_step_q4, &conv_params);
178 EXPECT_EQ(match(out, out_stride, ref_out, ref_out_stride, w, h), 1)
179 << " hb_idx " << hb_idx << " vb_idx " << vb_idx << " filter_y "
180 << filter_y << " subpel_y_q4 " << subpel_y_q4;
181
182 random_init_buf(avg_out, w, h, avg_out_stride);
183 for (int r = 0; r < h; ++r) {
184 for (int c = 0; c < w; ++c) {
185 ref_avg_out[r * ref_avg_out_stride + c] = ROUND_POWER_OF_TWO(
186 avg_out[r * avg_out_stride + c] + out[r * out_stride + c], 1);
187 }
188 }
189 conv_params.ref = 1;
190 conv_params.do_average = 1;
191 cfs_->vf_(in, in_stride, avg_out, avg_out_stride, w, h, param_vert,
192 subpel_y_q4, y_step_q4, &conv_params);
193 EXPECT_EQ(match(avg_out, avg_out_stride, ref_avg_out,
194 ref_avg_out_stride, w, h),
195 1)
196 << " hb_idx " << hb_idx << " vb_idx " << vb_idx << " filter_y "
197 << filter_y << " subpel_y_q4 " << subpel_y_q4;
198 }
199 }
200 }
201 };
202
TEST_P(Av1ConvolveTest,av1_convolve_horiz)203 TEST_P(Av1ConvolveTest, av1_convolve_horiz) {
204 const int x_step_q4 = 16;
205 ConvolveParams conv_params = get_conv_params(0, 0, 0);
206
207 int in_stride, out_stride, ref_out_stride, avg_out_stride, ref_avg_out_stride;
208 uint8_t *in = add_input(MAX_SB_SIZE, MAX_SB_SIZE, &in_stride);
209 uint8_t *out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &out_stride);
210 uint8_t *ref_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_out_stride);
211 uint8_t *avg_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &avg_out_stride);
212 uint8_t *ref_avg_out =
213 add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_avg_out_stride);
214 for (int hb_idx = 0; hb_idx < bsize_num; ++hb_idx) {
215 for (int vb_idx = 0; vb_idx < bsize_num; ++vb_idx) {
216 int w = bsize_ls[hb_idx];
217 int h = bsize_ls[vb_idx];
218 for (int subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; ++subpel_x_q4) {
219 InterpFilter filter_x = interp_filter_ls_[1];
220 InterpFilterParams param_horiz = av1_get_interp_filter_params(filter_x);
221 const int16_t *filter_horiz =
222 av1_get_interp_filter_subpel_kernel(param_horiz, subpel_x_q4);
223
224 filter_block1d_horiz_c(in, in_stride, filter_horiz, param_horiz.taps,
225 ref_out, ref_out_stride, w, h);
226
227 conv_params.ref = 0;
228 conv_params.do_average = 0;
229 cfs_->hf_(in, in_stride, out, out_stride, w, h, param_horiz,
230 subpel_x_q4, x_step_q4, &conv_params);
231 EXPECT_EQ(match(out, out_stride, ref_out, ref_out_stride, w, h), 1)
232 << " hb_idx " << hb_idx << " vb_idx " << vb_idx << " filter_x "
233 << filter_x << " subpel_x_q4 " << subpel_x_q4;
234
235 random_init_buf(avg_out, w, h, avg_out_stride);
236 for (int r = 0; r < h; ++r) {
237 for (int c = 0; c < w; ++c) {
238 ref_avg_out[r * ref_avg_out_stride + c] = ROUND_POWER_OF_TWO(
239 avg_out[r * avg_out_stride + c] + out[r * out_stride + c], 1);
240 }
241 }
242 conv_params.ref = 1;
243 conv_params.do_average = 1;
244 cfs_->hf_(in, in_stride, avg_out, avg_out_stride, w, h, param_horiz,
245 subpel_x_q4, x_step_q4, &conv_params);
246 EXPECT_EQ(match(avg_out, avg_out_stride, ref_avg_out,
247 ref_avg_out_stride, w, h),
248 1)
249 << "hb_idx " << hb_idx << "vb_idx" << vb_idx << " filter_x "
250 << filter_x << "subpel_x_q4 " << subpel_x_q4;
251 }
252 }
253 }
254 };
255
256 ConvolveFunctions convolve_functions_c(av1_convolve_horiz_c,
257 av1_convolve_vert_c);
258
259 InterpFilter filter_ls[] = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH,
260 MULTITAP_SHARP };
261
262 INSTANTIATE_TEST_CASE_P(
263 C, Av1ConvolveTest,
264 ::testing::Combine(::testing::Values(&convolve_functions_c),
265 ::testing::ValuesIn(filter_ls),
266 ::testing::ValuesIn(filter_ls)));
267
268 #if CONFIG_HIGHBITDEPTH
269 #ifndef __clang_analyzer__
TEST(AV1ConvolveTest,av1_highbd_convolve)270 TEST(AV1ConvolveTest, av1_highbd_convolve) {
271 ACMRandom rnd(ACMRandom::DeterministicSeed());
272 InterpFilters interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
273 InterpFilterParams filter_params =
274 av1_get_interp_filter_params(EIGHTTAP_REGULAR);
275 int filter_size = filter_params.taps;
276 int filter_center = filter_size / 2 - 1;
277 uint16_t src[12 * 12];
278 int src_stride = filter_size;
279 uint16_t dst[1] = { 0 };
280 int dst_stride = 1;
281 int x_step_q4 = 16;
282 int y_step_q4 = 16;
283 int avg = 0;
284 int bd = 10;
285 int w = 1;
286 int h = 1;
287
288 int subpel_x_q4;
289 int subpel_y_q4;
290
291 for (int i = 0; i < filter_size * filter_size; i++) {
292 src[i] = rnd.Rand16() % (1 << bd);
293 }
294
295 for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
296 for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
297 av1_highbd_convolve(
298 CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
299 src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, interp_filters,
300 subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
301
302 const int16_t *x_filter =
303 av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
304 const int16_t *y_filter =
305 av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
306
307 int temp[12];
308 int dst_ref = 0;
309 for (int r = 0; r < filter_size; r++) {
310 temp[r] = 0;
311 for (int c = 0; c < filter_size; c++) {
312 temp[r] += x_filter[c] * src[r * filter_size + c];
313 }
314 temp[r] =
315 clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
316 dst_ref += temp[r] * y_filter[r];
317 }
318 dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
319 EXPECT_EQ(dst[0], dst_ref);
320 }
321 }
322 }
323 #endif
324
TEST(AV1ConvolveTest,av1_highbd_convolve_avg)325 TEST(AV1ConvolveTest, av1_highbd_convolve_avg) {
326 ACMRandom rnd(ACMRandom::DeterministicSeed());
327 InterpFilters interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
328 InterpFilterParams filter_params =
329 av1_get_interp_filter_params(EIGHTTAP_REGULAR);
330 int filter_size = filter_params.taps;
331 int filter_center = filter_size / 2 - 1;
332 uint16_t src0[12 * 12];
333 uint16_t src1[12 * 12];
334 int src_stride = filter_size;
335 uint16_t dst0[1] = { 0 };
336 uint16_t dst1[1] = { 0 };
337 uint16_t dst[1] = { 0 };
338 int dst_stride = 1;
339 int x_step_q4 = 16;
340 int y_step_q4 = 16;
341 int avg = 0;
342 int bd = 10;
343
344 int w = 1;
345 int h = 1;
346
347 int subpel_x_q4;
348 int subpel_y_q4;
349
350 for (int i = 0; i < filter_size * filter_size; i++) {
351 src0[i] = rnd.Rand16() % (1 << bd);
352 src1[i] = rnd.Rand16() % (1 << bd);
353 }
354
355 for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
356 for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
357 int offset = filter_size * filter_center + filter_center;
358
359 avg = 0;
360 av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
361 CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
362 interp_filters, subpel_x_q4, x_step_q4, subpel_y_q4,
363 y_step_q4, avg, bd);
364 avg = 0;
365 av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
366 CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
367 interp_filters, subpel_x_q4, x_step_q4, subpel_y_q4,
368 y_step_q4, avg, bd);
369
370 avg = 0;
371 av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
372 CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
373 interp_filters, subpel_x_q4, x_step_q4, subpel_y_q4,
374 y_step_q4, avg, bd);
375 avg = 1;
376 av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
377 CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
378 interp_filters, subpel_x_q4, x_step_q4, subpel_y_q4,
379 y_step_q4, avg, bd);
380
381 EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
382 }
383 }
384 }
385 #endif // CONFIG_HIGHBITDEPTH
386
387 #define CONVOLVE_SPEED_TEST 0
388 #if CONVOLVE_SPEED_TEST
389 #define highbd_convolve_speed(func, block_size, frame_size) \
390 TEST(AV1ConvolveTest, func##_speed_##block_size##_##frame_size) { \
391 ACMRandom rnd(ACMRandom::DeterministicSeed()); \
392 InterpFilter interp_filter = EIGHTTAP; \
393 InterpFilterParams filter_params = \
394 av1_get_interp_filter_params(interp_filter); \
395 int filter_size = filter_params.tap; \
396 int filter_center = filter_size / 2 - 1; \
397 DECLARE_ALIGNED(16, uint16_t, \
398 src[(frame_size + 7) * (frame_size + 7)]) = { 0 }; \
399 int src_stride = frame_size + 7; \
400 DECLARE_ALIGNED(16, uint16_t, dst[frame_size * frame_size]) = { 0 }; \
401 int dst_stride = frame_size; \
402 int x_step_q4 = 16; \
403 int y_step_q4 = 16; \
404 int subpel_x_q4 = 8; \
405 int subpel_y_q4 = 6; \
406 int bd = 10; \
407 \
408 int w = block_size; \
409 int h = block_size; \
410 \
411 const int16_t *filter_x = \
412 av1_get_interp_filter_kernel(filter_params, subpel_x_q4); \
413 const int16_t *filter_y = \
414 av1_get_interp_filter_kernel(filter_params, subpel_y_q4); \
415 \
416 for (int i = 0; i < src_stride * src_stride; i++) { \
417 src[i] = rnd.Rand16() % (1 << bd); \
418 } \
419 \
420 int offset = filter_center * src_stride + filter_center; \
421 int row_offset = 0; \
422 int col_offset = 0; \
423 for (int i = 0; i < 100000; i++) { \
424 int src_total_offset = offset + col_offset * src_stride + row_offset; \
425 int dst_total_offset = col_offset * dst_stride + row_offset; \
426 func(CONVERT_TO_BYTEPTR(src + src_total_offset), src_stride, \
427 CONVERT_TO_BYTEPTR(dst + dst_total_offset), dst_stride, filter_x, \
428 x_step_q4, filter_y, y_step_q4, w, h, bd); \
429 if (offset + w + w < frame_size) { \
430 row_offset += w; \
431 } else { \
432 row_offset = 0; \
433 col_offset += h; \
434 } \
435 if (col_offset + h >= frame_size) { \
436 col_offset = 0; \
437 } \
438 } \
439 }
440
441 #define lowbd_convolve_speed(func, block_size, frame_size) \
442 TEST(AV1ConvolveTest, func##_speed_l_##block_size##_##frame_size) { \
443 ACMRandom rnd(ACMRandom::DeterministicSeed()); \
444 InterpFilter interp_filter = EIGHTTAP; \
445 InterpFilterParams filter_params = \
446 av1_get_interp_filter_params(interp_filter); \
447 int filter_size = filter_params.tap; \
448 int filter_center = filter_size / 2 - 1; \
449 DECLARE_ALIGNED(16, uint8_t, src[(frame_size + 7) * (frame_size + 7)]); \
450 int src_stride = frame_size + 7; \
451 DECLARE_ALIGNED(16, uint8_t, dst[frame_size * frame_size]); \
452 int dst_stride = frame_size; \
453 int x_step_q4 = 16; \
454 int y_step_q4 = 16; \
455 int subpel_x_q4 = 8; \
456 int subpel_y_q4 = 6; \
457 int bd = 8; \
458 \
459 int w = block_size; \
460 int h = block_size; \
461 \
462 const int16_t *filter_x = \
463 av1_get_interp_filter_kernel(filter_params, subpel_x_q4); \
464 const int16_t *filter_y = \
465 av1_get_interp_filter_kernel(filter_params, subpel_y_q4); \
466 \
467 for (int i = 0; i < src_stride * src_stride; i++) { \
468 src[i] = rnd.Rand16() % (1 << bd); \
469 } \
470 \
471 int offset = filter_center * src_stride + filter_center; \
472 int row_offset = 0; \
473 int col_offset = 0; \
474 for (int i = 0; i < 100000; i++) { \
475 func(src + offset, src_stride, dst, dst_stride, filter_x, x_step_q4, \
476 filter_y, y_step_q4, w, h); \
477 if (offset + w + w < frame_size) { \
478 row_offset += w; \
479 } else { \
480 row_offset = 0; \
481 col_offset += h; \
482 } \
483 if (col_offset + h >= frame_size) { \
484 col_offset = 0; \
485 } \
486 } \
487 }
488
489 // This experiment shows that when frame size is 64x64
490 // aom_highbd_convolve8_sse2 and aom_convolve8_sse2's speed are similar.
491 // However when frame size becomes 1024x1024
492 // aom_highbd_convolve8_sse2 is around 50% slower than aom_convolve8_sse2
493 // we think the bottleneck is from memory IO
494 highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 64);
495 highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 64);
496 highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 64);
497 highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 64);
498
499 lowbd_convolve_speed(aom_convolve8_sse2, 8, 64);
500 lowbd_convolve_speed(aom_convolve8_sse2, 16, 64);
501 lowbd_convolve_speed(aom_convolve8_sse2, 32, 64);
502 lowbd_convolve_speed(aom_convolve8_sse2, 64, 64);
503
504 highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 1024);
505 highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 1024);
506 highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 1024);
507 highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 1024);
508
509 lowbd_convolve_speed(aom_convolve8_sse2, 8, 1024);
510 lowbd_convolve_speed(aom_convolve8_sse2, 16, 1024);
511 lowbd_convolve_speed(aom_convolve8_sse2, 32, 1024);
512 lowbd_convolve_speed(aom_convolve8_sse2, 64, 1024);
513 #endif // CONVOLVE_SPEED_TEST
514 } // namespace
515