1 /*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "tests/checkasm/checkasm.h"
29
30 #include "src/levels.h"
31 #include "src/mc.h"
32
33 static const char *const filter_names[] = {
34 "8tap_regular", "8tap_regular_smooth", "8tap_regular_sharp",
35 "8tap_sharp_regular", "8tap_sharp_smooth", "8tap_sharp",
36 "8tap_smooth_regular", "8tap_smooth", "8tap_smooth_sharp",
37 "bilinear"
38 };
39
40 static const char *const mxy_names[] = { "0", "h", "v", "hv" };
41 static const char *const scaled_paths[] = { "", "_dy1", "_dy2" };
42
mc_h_next(const int h)43 static int mc_h_next(const int h) {
44 switch (h) {
45 case 4:
46 case 8:
47 case 16:
48 return (h * 3) >> 1;
49 case 6:
50 case 12:
51 case 24:
52 return (h & (h - 1)) * 2;
53 default:
54 return h * 2;
55 }
56 }
57
check_mc(Dav1dMCDSPContext * const c)58 static void check_mc(Dav1dMCDSPContext *const c) {
59 ALIGN_STK_64(pixel, src_buf, 135 * 135,);
60 ALIGN_STK_64(pixel, c_dst, 128 * 128,);
61 ALIGN_STK_64(pixel, a_dst, 128 * 128,);
62 const pixel *src = src_buf + 135 * 3 + 3;
63 const ptrdiff_t src_stride = 135 * sizeof(pixel);
64
65 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
66 ptrdiff_t src_stride, int w, int h, int mx, int my
67 HIGHBD_DECL_SUFFIX);
68
69 for (int filter = 0; filter < N_2D_FILTERS; filter++)
70 for (int w = 2; w <= 128; w <<= 1) {
71 const ptrdiff_t dst_stride = w * sizeof(pixel);
72 for (int mxy = 0; mxy < 4; mxy++)
73 if (check_func(c->mc[filter], "mc_%s_w%d_%s_%dbpc",
74 filter_names[filter], w, mxy_names[mxy], BITDEPTH))
75 {
76 const int h_min = w <= 32 ? 2 : w / 4;
77 const int h_max = imax(imin(w * 4, 128), 32);
78 for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
79 const int mx = (mxy & 1) ? rnd() % 15 + 1 : 0;
80 const int my = (mxy & 2) ? rnd() % 15 + 1 : 0;
81 #if BITDEPTH == 16
82 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
83 #else
84 const int bitdepth_max = 0xff;
85 #endif
86
87 for (int i = 0; i < 135 * 135; i++)
88 src_buf[i] = rnd() & bitdepth_max;
89
90 call_ref(c_dst, dst_stride, src, src_stride, w, h,
91 mx, my HIGHBD_TAIL_SUFFIX);
92 call_new(a_dst, dst_stride, src, src_stride, w, h,
93 mx, my HIGHBD_TAIL_SUFFIX);
94 checkasm_check_pixel(c_dst, dst_stride,
95 a_dst, dst_stride,
96 w, h, "dst");
97
98 if (filter == FILTER_2D_8TAP_REGULAR ||
99 filter == FILTER_2D_BILINEAR)
100 {
101 bench_new(a_dst, dst_stride, src, src_stride, w, h,
102 mx, my HIGHBD_TAIL_SUFFIX);
103 }
104 }
105 }
106 }
107 report("mc");
108 }
109
110 /* Generate worst case input in the topleft corner, randomize the rest */
generate_mct_input(pixel * const buf,const int bitdepth_max)111 static void generate_mct_input(pixel *const buf, const int bitdepth_max) {
112 static const int8_t pattern[8] = { -1, 0, -1, 0, 0, -1, 0, -1 };
113 const int sign = -(rnd() & 1);
114
115 for (int y = 0; y < 135; y++)
116 for (int x = 0; x < 135; x++)
117 buf[135*y+x] = ((x | y) < 8 ? (pattern[x] ^ pattern[y] ^ sign)
118 : rnd()) & bitdepth_max;
119 }
120
check_mct(Dav1dMCDSPContext * const c)121 static void check_mct(Dav1dMCDSPContext *const c) {
122 ALIGN_STK_64(pixel, src_buf, 135 * 135,);
123 ALIGN_STK_64(int16_t, c_tmp, 128 * 128,);
124 ALIGN_STK_64(int16_t, a_tmp, 128 * 128,);
125 const pixel *src = src_buf + 135 * 3 + 3;
126 const ptrdiff_t src_stride = 135 * sizeof(pixel);
127
128 declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
129 int w, int h, int mx, int my HIGHBD_DECL_SUFFIX);
130
131 for (int filter = 0; filter < N_2D_FILTERS; filter++)
132 for (int w = 4; w <= 128; w <<= 1)
133 for (int mxy = 0; mxy < 4; mxy++)
134 if (check_func(c->mct[filter], "mct_%s_w%d_%s_%dbpc",
135 filter_names[filter], w, mxy_names[mxy], BITDEPTH))
136 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
137 {
138 const int mx = (mxy & 1) ? rnd() % 15 + 1 : 0;
139 const int my = (mxy & 2) ? rnd() % 15 + 1 : 0;
140 #if BITDEPTH == 16
141 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
142 #else
143 const int bitdepth_max = 0xff;
144 #endif
145 generate_mct_input(src_buf, bitdepth_max);
146
147 call_ref(c_tmp, src, src_stride, w, h,
148 mx, my HIGHBD_TAIL_SUFFIX);
149 call_new(a_tmp, src, src_stride, w, h,
150 mx, my HIGHBD_TAIL_SUFFIX);
151 checkasm_check(int16_t, c_tmp, w * sizeof(*c_tmp),
152 a_tmp, w * sizeof(*a_tmp),
153 w, h, "tmp");
154
155 if (filter == FILTER_2D_8TAP_REGULAR ||
156 filter == FILTER_2D_BILINEAR)
157 {
158 bench_new(a_tmp, src, src_stride, w, h,
159 mx, my HIGHBD_TAIL_SUFFIX);
160 }
161 }
162 report("mct");
163 }
164
check_mc_scaled(Dav1dMCDSPContext * const c)165 static void check_mc_scaled(Dav1dMCDSPContext *const c) {
166 ALIGN_STK_64(pixel, src_buf, 263 * 263,);
167 ALIGN_STK_64(pixel, c_dst, 128 * 128,);
168 ALIGN_STK_64(pixel, a_dst, 128 * 128,);
169 const pixel *src = src_buf + 263 * 3 + 3;
170 const ptrdiff_t src_stride = 263 * sizeof(pixel);
171 #if BITDEPTH == 16
172 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
173 #else
174 const int bitdepth_max = 0xff;
175 #endif
176
177 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
178 ptrdiff_t src_stride, int w, int h,
179 int mx, int my, int dx, int dy HIGHBD_DECL_SUFFIX);
180
181 for (int filter = 0; filter < N_2D_FILTERS; filter++)
182 for (int w = 2; w <= 128; w <<= 1) {
183 const ptrdiff_t dst_stride = w * sizeof(pixel);
184 for (int p = 0; p < 3; ++p) {
185 if (check_func(c->mc_scaled[filter], "mc_scaled_%s_w%d%s_%dbpc",
186 filter_names[filter], w, scaled_paths[p], BITDEPTH))
187 {
188 const int h_min = w <= 32 ? 2 : w / 4;
189 const int h_max = imax(imin(w * 4, 128), 32);
190 for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
191 const int mx = rnd() % 1024;
192 const int my = rnd() % 1024;
193 const int dx = rnd() % 2048 + 1;
194 const int dy = !p
195 ? rnd() % 2048 + 1
196 : p << 10; // ystep=1.0 and ystep=2.0 paths
197
198 for (int k = 0; k < 263 * 263; k++)
199 src_buf[k] = rnd() & bitdepth_max;
200
201 call_ref(c_dst, dst_stride, src, src_stride,
202 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
203 call_new(a_dst, dst_stride, src, src_stride,
204 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
205 checkasm_check_pixel(c_dst, dst_stride,
206 a_dst, dst_stride, w, h, "dst");
207
208 if (filter == FILTER_2D_8TAP_REGULAR ||
209 filter == FILTER_2D_BILINEAR)
210 bench_new(a_dst, dst_stride, src, src_stride,
211 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
212 }
213 }
214 }
215 }
216 report("mc_scaled");
217 }
218
check_mct_scaled(Dav1dMCDSPContext * const c)219 static void check_mct_scaled(Dav1dMCDSPContext *const c) {
220 ALIGN_STK_64(pixel, src_buf, 263 * 263,);
221 ALIGN_STK_64(int16_t, c_tmp, 128 * 128,);
222 ALIGN_STK_64(int16_t, a_tmp, 128 * 128,);
223 const pixel *src = src_buf + 263 * 3 + 3;
224 const ptrdiff_t src_stride = 263 * sizeof(pixel);
225 #if BITDEPTH == 16
226 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
227 #else
228 const int bitdepth_max = 0xff;
229 #endif
230
231 declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
232 int w, int h, int mx, int my, int dx, int dy HIGHBD_DECL_SUFFIX);
233
234 for (int filter = 0; filter < N_2D_FILTERS; filter++)
235 for (int w = 4; w <= 128; w <<= 1)
236 for (int p = 0; p < 3; ++p) {
237 if (check_func(c->mct_scaled[filter], "mct_scaled_%s_w%d%s_%dbpc",
238 filter_names[filter], w, scaled_paths[p], BITDEPTH))
239 {
240 const int h_min = imax(w / 4, 4);
241 const int h_max = imin(w * 4, 128);
242 for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
243 const int mx = rnd() % 1024;
244 const int my = rnd() % 1024;
245 const int dx = rnd() % 2048 + 1;
246 const int dy = !p
247 ? rnd() % 2048 + 1
248 : p << 10; // ystep=1.0 and ystep=2.0 paths
249
250 for (int k = 0; k < 263 * 263; k++)
251 src_buf[k] = rnd() & bitdepth_max;
252
253 call_ref(c_tmp, src, src_stride,
254 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
255 call_new(a_tmp, src, src_stride,
256 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
257 checkasm_check(int16_t, c_tmp, w * sizeof(*c_tmp),
258 a_tmp, w * sizeof(*a_tmp),
259 w, h, "tmp");
260
261 if (filter == FILTER_2D_8TAP_REGULAR ||
262 filter == FILTER_2D_BILINEAR)
263 bench_new(a_tmp, src, src_stride,
264 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
265 }
266 }
267 }
268 report("mct_scaled");
269 }
270
init_tmp(Dav1dMCDSPContext * const c,pixel * const buf,int16_t (* const tmp)[128* 128],const int bitdepth_max)271 static void init_tmp(Dav1dMCDSPContext *const c, pixel *const buf,
272 int16_t (*const tmp)[128 * 128], const int bitdepth_max)
273 {
274 for (int i = 0; i < 2; i++) {
275 generate_mct_input(buf, bitdepth_max);
276 c->mct[FILTER_2D_8TAP_SHARP](tmp[i], buf + 135 * 3 + 3,
277 135 * sizeof(pixel), 128, 128,
278 8, 8 HIGHBD_TAIL_SUFFIX);
279 }
280 }
281
check_avg(Dav1dMCDSPContext * const c)282 static void check_avg(Dav1dMCDSPContext *const c) {
283 ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
284 ALIGN_STK_64(pixel, c_dst, 135 * 135,);
285 ALIGN_STK_64(pixel, a_dst, 128 * 128,);
286
287 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
288 const int16_t *tmp2, int w, int h HIGHBD_DECL_SUFFIX);
289
290 for (int w = 4; w <= 128; w <<= 1)
291 if (check_func(c->avg, "avg_w%d_%dbpc", w, BITDEPTH)) {
292 ptrdiff_t dst_stride = w * sizeof(pixel);
293 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
294 {
295 #if BITDEPTH == 16
296 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
297 #else
298 const int bitdepth_max = 0xff;
299 #endif
300
301 init_tmp(c, c_dst, tmp, bitdepth_max);
302 call_ref(c_dst, dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
303 call_new(a_dst, dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
304 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
305 w, h, "dst");
306
307 bench_new(a_dst, dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
308 }
309 }
310 report("avg");
311 }
312
check_w_avg(Dav1dMCDSPContext * const c)313 static void check_w_avg(Dav1dMCDSPContext *const c) {
314 ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
315 ALIGN_STK_64(pixel, c_dst, 135 * 135,);
316 ALIGN_STK_64(pixel, a_dst, 128 * 128,);
317
318 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
319 const int16_t *tmp2, int w, int h, int weight HIGHBD_DECL_SUFFIX);
320
321 for (int w = 4; w <= 128; w <<= 1)
322 if (check_func(c->w_avg, "w_avg_w%d_%dbpc", w, BITDEPTH)) {
323 ptrdiff_t dst_stride = w * sizeof(pixel);
324 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
325 {
326 int weight = rnd() % 15 + 1;
327 #if BITDEPTH == 16
328 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
329 #else
330 const int bitdepth_max = 0xff;
331 #endif
332 init_tmp(c, c_dst, tmp, bitdepth_max);
333
334 call_ref(c_dst, dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
335 call_new(a_dst, dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
336 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
337 w, h, "dst");
338
339 bench_new(a_dst, dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
340 }
341 }
342 report("w_avg");
343 }
344
check_mask(Dav1dMCDSPContext * const c)345 static void check_mask(Dav1dMCDSPContext *const c) {
346 ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
347 ALIGN_STK_64(pixel, c_dst, 135 * 135,);
348 ALIGN_STK_64(pixel, a_dst, 128 * 128,);
349 ALIGN_STK_64(uint8_t, mask, 128 * 128,);
350
351 for (int i = 0; i < 128 * 128; i++)
352 mask[i] = rnd() % 65;
353
354 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
355 const int16_t *tmp2, int w, int h, const uint8_t *mask
356 HIGHBD_DECL_SUFFIX);
357
358 for (int w = 4; w <= 128; w <<= 1)
359 if (check_func(c->mask, "mask_w%d_%dbpc", w, BITDEPTH)) {
360 ptrdiff_t dst_stride = w * sizeof(pixel);
361 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
362 {
363 #if BITDEPTH == 16
364 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
365 #else
366 const int bitdepth_max = 0xff;
367 #endif
368 init_tmp(c, c_dst, tmp, bitdepth_max);
369 call_ref(c_dst, dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
370 call_new(a_dst, dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
371 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
372 w, h, "dst");
373
374 bench_new(a_dst, dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
375 }
376 }
377 report("mask");
378 }
379
check_w_mask(Dav1dMCDSPContext * const c)380 static void check_w_mask(Dav1dMCDSPContext *const c) {
381 ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
382 ALIGN_STK_64(pixel, c_dst, 135 * 135,);
383 ALIGN_STK_64(pixel, a_dst, 128 * 128,);
384 ALIGN_STK_64(uint8_t, c_mask, 128 * 128,);
385 ALIGN_STK_64(uint8_t, a_mask, 128 * 128,);
386
387 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
388 const int16_t *tmp2, int w, int h, uint8_t *mask, int sign
389 HIGHBD_DECL_SUFFIX);
390
391 static const uint16_t ss[] = { 444, 422, 420 };
392 static const uint8_t ss_hor[] = { 0, 1, 1 };
393 static const uint8_t ss_ver[] = { 0, 0, 1 };
394
395 for (int i = 0; i < 3; i++)
396 for (int w = 4; w <= 128; w <<= 1)
397 if (check_func(c->w_mask[i], "w_mask_%d_w%d_%dbpc", ss[i], w,
398 BITDEPTH))
399 {
400 ptrdiff_t dst_stride = w * sizeof(pixel);
401 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
402 {
403 int sign = rnd() & 1;
404 #if BITDEPTH == 16
405 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
406 #else
407 const int bitdepth_max = 0xff;
408 #endif
409 init_tmp(c, c_dst, tmp, bitdepth_max);
410
411 call_ref(c_dst, dst_stride, tmp[0], tmp[1], w, h,
412 c_mask, sign HIGHBD_TAIL_SUFFIX);
413 call_new(a_dst, dst_stride, tmp[0], tmp[1], w, h,
414 a_mask, sign HIGHBD_TAIL_SUFFIX);
415 checkasm_check_pixel(c_dst, dst_stride,
416 a_dst, dst_stride,
417 w, h, "dst");
418 checkasm_check(uint8_t, c_mask, w >> ss_hor[i],
419 a_mask, w >> ss_hor[i],
420 w >> ss_hor[i], h >> ss_ver[i],
421 "mask");
422
423 bench_new(a_dst, dst_stride, tmp[0], tmp[1], w, h,
424 a_mask, sign HIGHBD_TAIL_SUFFIX);
425 }
426 }
427 report("w_mask");
428 }
429
check_blend(Dav1dMCDSPContext * const c)430 static void check_blend(Dav1dMCDSPContext *const c) {
431 ALIGN_STK_64(pixel, tmp, 32 * 32,);
432 ALIGN_STK_64(pixel, c_dst, 32 * 32,);
433 ALIGN_STK_64(pixel, a_dst, 32 * 32,);
434 ALIGN_STK_64(uint8_t, mask, 32 * 32,);
435
436 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
437 int w, int h, const uint8_t *mask);
438
439 for (int w = 4; w <= 32; w <<= 1) {
440 const ptrdiff_t dst_stride = w * sizeof(pixel);
441 if (check_func(c->blend, "blend_w%d_%dbpc", w, BITDEPTH))
442 for (int h = imax(w / 2, 4); h <= imin(w * 2, 32); h <<= 1) {
443 #if BITDEPTH == 16
444 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
445 #else
446 const int bitdepth_max = 0xff;
447 #endif
448 for (int i = 0; i < 32 * 32; i++) {
449 tmp[i] = rnd() & bitdepth_max;
450 mask[i] = rnd() % 65;
451 }
452 for (int i = 0; i < w * h; i++)
453 c_dst[i] = a_dst[i] = rnd() & bitdepth_max;
454
455 call_ref(c_dst, dst_stride, tmp, w, h, mask);
456 call_new(a_dst, dst_stride, tmp, w, h, mask);
457 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
458 w, h, "dst");
459
460 bench_new(a_dst, dst_stride, tmp, w, h, mask);
461 }
462 }
463 report("blend");
464 }
465
check_blend_v(Dav1dMCDSPContext * const c)466 static void check_blend_v(Dav1dMCDSPContext *const c) {
467 ALIGN_STK_64(pixel, tmp, 32 * 128,);
468 ALIGN_STK_64(pixel, c_dst, 32 * 128,);
469 ALIGN_STK_64(pixel, a_dst, 32 * 128,);
470
471 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
472 int w, int h);
473
474 for (int w = 2; w <= 32; w <<= 1) {
475 const ptrdiff_t dst_stride = w * sizeof(pixel);
476 if (check_func(c->blend_v, "blend_v_w%d_%dbpc", w, BITDEPTH))
477 for (int h = 2; h <= (w == 2 ? 64 : 128); h <<= 1) {
478 #if BITDEPTH == 16
479 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
480 #else
481 const int bitdepth_max = 0xff;
482 #endif
483
484 for (int i = 0; i < w * h; i++)
485 c_dst[i] = a_dst[i] = rnd() & bitdepth_max;
486 for (int i = 0; i < 32 * 128; i++)
487 tmp[i] = rnd() & bitdepth_max;
488
489 call_ref(c_dst, dst_stride, tmp, w, h);
490 call_new(a_dst, dst_stride, tmp, w, h);
491 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
492 w, h, "dst");
493
494 bench_new(a_dst, dst_stride, tmp, w, h);
495 }
496 }
497 report("blend_v");
498 }
499
check_blend_h(Dav1dMCDSPContext * const c)500 static void check_blend_h(Dav1dMCDSPContext *const c) {
501 ALIGN_STK_64(pixel, tmp, 128 * 32,);
502 ALIGN_STK_64(pixel, c_dst, 128 * 32,);
503 ALIGN_STK_64(pixel, a_dst, 128 * 32,);
504
505 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
506 int w, int h);
507
508 for (int w = 2; w <= 128; w <<= 1) {
509 const ptrdiff_t dst_stride = w * sizeof(pixel);
510 if (check_func(c->blend_h, "blend_h_w%d_%dbpc", w, BITDEPTH))
511 for (int h = (w == 128 ? 4 : 2); h <= 32; h <<= 1) {
512 #if BITDEPTH == 16
513 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
514 #else
515 const int bitdepth_max = 0xff;
516 #endif
517 for (int i = 0; i < w * h; i++)
518 c_dst[i] = a_dst[i] = rnd() & bitdepth_max;
519 for (int i = 0; i < 128 * 32; i++)
520 tmp[i] = rnd() & bitdepth_max;
521
522 call_ref(c_dst, dst_stride, tmp, w, h);
523 call_new(a_dst, dst_stride, tmp, w, h);
524 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
525 w, h, "dst");
526
527 bench_new(a_dst, dst_stride, tmp, w, h);
528 }
529 }
530 report("blend_h");
531 }
532
check_warp8x8(Dav1dMCDSPContext * const c)533 static void check_warp8x8(Dav1dMCDSPContext *const c) {
534 ALIGN_STK_64(pixel, src_buf, 15 * 15,);
535 ALIGN_STK_64(pixel, c_dst, 8 * 8,);
536 ALIGN_STK_64(pixel, a_dst, 8 * 8,);
537 int16_t abcd[4];
538 const pixel *src = src_buf + 15 * 3 + 3;
539 const ptrdiff_t dst_stride = 8 * sizeof(pixel);
540 const ptrdiff_t src_stride = 15 * sizeof(pixel);
541
542 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
543 ptrdiff_t src_stride, const int16_t *abcd, int mx, int my
544 HIGHBD_DECL_SUFFIX);
545
546 if (check_func(c->warp8x8, "warp_8x8_%dbpc", BITDEPTH)) {
547 const int mx = (rnd() & 0x1fff) - 0xa00;
548 const int my = (rnd() & 0x1fff) - 0xa00;
549 #if BITDEPTH == 16
550 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
551 #else
552 const int bitdepth_max = 0xff;
553 #endif
554
555 for (int i = 0; i < 4; i++)
556 abcd[i] = (rnd() & 0x1fff) - 0xa00;
557
558 for (int i = 0; i < 15 * 15; i++)
559 src_buf[i] = rnd() & bitdepth_max;
560
561 call_ref(c_dst, dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
562 call_new(a_dst, dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
563 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
564 8, 8, "dst");
565
566 bench_new(a_dst, dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
567 }
568 report("warp8x8");
569 }
570
check_warp8x8t(Dav1dMCDSPContext * const c)571 static void check_warp8x8t(Dav1dMCDSPContext *const c) {
572 ALIGN_STK_64(pixel, src_buf, 15 * 15,);
573 ALIGN_STK_64(int16_t, c_tmp, 8 * 8,);
574 ALIGN_STK_64(int16_t, a_tmp, 8 * 8,);
575 int16_t abcd[4];
576 const pixel *src = src_buf + 15 * 3 + 3;
577 const ptrdiff_t src_stride = 15 * sizeof(pixel);
578
579 declare_func(void, int16_t *tmp, ptrdiff_t tmp_stride, const pixel *src,
580 ptrdiff_t src_stride, const int16_t *abcd, int mx, int my
581 HIGHBD_DECL_SUFFIX);
582
583 if (check_func(c->warp8x8t, "warp_8x8t_%dbpc", BITDEPTH)) {
584 const int mx = (rnd() & 0x1fff) - 0xa00;
585 const int my = (rnd() & 0x1fff) - 0xa00;
586 #if BITDEPTH == 16
587 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
588 #else
589 const int bitdepth_max = 0xff;
590 #endif
591
592 for (int i = 0; i < 4; i++)
593 abcd[i] = (rnd() & 0x1fff) - 0xa00;
594
595 for (int i = 0; i < 15 * 15; i++)
596 src_buf[i] = rnd() & bitdepth_max;
597
598 call_ref(c_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
599 call_new(a_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
600 checkasm_check(int16_t, c_tmp, 8 * sizeof(*c_tmp),
601 a_tmp, 8 * sizeof(*a_tmp),
602 8, 8, "tmp");
603
604 bench_new(a_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
605 }
606 report("warp8x8t");
607 }
608
609 enum EdgeFlags {
610 HAVE_TOP = 1,
611 HAVE_BOTTOM = 2,
612 HAVE_LEFT = 4,
613 HAVE_RIGHT = 8,
614 };
615
random_offset_for_edge(int * const x,int * const y,const int bw,const int bh,int * const iw,int * const ih,const enum EdgeFlags edge)616 static void random_offset_for_edge(int *const x, int *const y,
617 const int bw, const int bh,
618 int *const iw, int *const ih,
619 const enum EdgeFlags edge)
620 {
621 #define set_off(edge1, edge2, pos, dim) \
622 *i##dim = edge & (HAVE_##edge1 | HAVE_##edge2) ? 160 : 1 + (rnd() % (b##dim - 2)); \
623 switch (edge & (HAVE_##edge1 | HAVE_##edge2)) { \
624 case HAVE_##edge1 | HAVE_##edge2: \
625 assert(b##dim <= *i##dim); \
626 *pos = rnd() % (*i##dim - b##dim + 1); \
627 break; \
628 case HAVE_##edge1: \
629 *pos = (*i##dim - b##dim) + 1 + (rnd() % (b##dim - 1)); \
630 break; \
631 case HAVE_##edge2: \
632 *pos = -(1 + (rnd() % (b##dim - 1))); \
633 break; \
634 case 0: \
635 assert(b##dim - 1 > *i##dim); \
636 *pos = -(1 + (rnd() % (b##dim - *i##dim - 1))); \
637 break; \
638 }
639 set_off(LEFT, RIGHT, x, w);
640 set_off(TOP, BOTTOM, y, h);
641 }
642
check_emuedge(Dav1dMCDSPContext * const c)643 static void check_emuedge(Dav1dMCDSPContext *const c) {
644 ALIGN_STK_64(pixel, c_dst, 135 * 192,);
645 ALIGN_STK_64(pixel, a_dst, 135 * 192,);
646 ALIGN_STK_64(pixel, src, 160 * 160,);
647
648 for (int i = 0; i < 160 * 160; i++)
649 src[i] = rnd() & ((1U << BITDEPTH) - 1);
650
651 declare_func(void, intptr_t bw, intptr_t bh, intptr_t iw, intptr_t ih,
652 intptr_t x, intptr_t y,
653 pixel *dst, ptrdiff_t dst_stride,
654 const pixel *src, ptrdiff_t src_stride);
655
656 int x, y, iw, ih;
657 for (int w = 4; w <= 128; w <<= 1)
658 if (check_func(c->emu_edge, "emu_edge_w%d_%dbpc", w, BITDEPTH)) {
659 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1) {
660 // we skip 0xf, since it implies that we don't need emu_edge
661 for (enum EdgeFlags edge = 0; edge < 0xf; edge++) {
662 const int bw = w + (rnd() & 7);
663 const int bh = h + (rnd() & 7);
664 random_offset_for_edge(&x, &y, bw, bh, &iw, &ih, edge);
665 call_ref(bw, bh, iw, ih, x, y,
666 c_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
667 call_new(bw, bh, iw, ih, x, y,
668 a_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
669 checkasm_check_pixel(c_dst, 192 * sizeof(pixel),
670 a_dst, 192 * sizeof(pixel),
671 bw, bh, "dst");
672 }
673 }
674 for (enum EdgeFlags edge = 1; edge < 0xf; edge <<= 1) {
675 random_offset_for_edge(&x, &y, w + 7, w + 7, &iw, &ih, edge);
676 bench_new(w + 7, w + 7, iw, ih, x, y,
677 a_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
678 }
679 }
680 report("emu_edge");
681 }
682
get_upscale_x0(const int in_w,const int out_w,const int step)683 static int get_upscale_x0(const int in_w, const int out_w, const int step) {
684 const int err = out_w * step - (in_w << 14);
685 const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err >> 1);
686 return x0 & 0x3fff;
687 }
688
check_resize(Dav1dMCDSPContext * const c)689 static void check_resize(Dav1dMCDSPContext *const c) {
690 ALIGN_STK_64(pixel, c_dst, 1024 * 64,);
691 ALIGN_STK_64(pixel, a_dst, 1024 * 64,);
692 ALIGN_STK_64(pixel, src, 512 * 64,);
693
694 const int height = 64;
695 const int max_src_width = 512;
696 const ptrdiff_t dst_stride = 1024 * sizeof(pixel);
697 const ptrdiff_t src_stride = 512 * sizeof(pixel);
698
699 declare_func(void, pixel *dst, ptrdiff_t dst_stride,
700 const pixel *src, ptrdiff_t src_stride,
701 int dst_w, int src_w, int h, int dx, int mx0
702 HIGHBD_DECL_SUFFIX);
703
704 if (check_func(c->resize, "resize_%dbpc", BITDEPTH)) {
705 #if BITDEPTH == 16
706 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
707 #else
708 const int bitdepth_max = 0xff;
709 #endif
710
711 for (int i = 0; i < max_src_width * height; i++)
712 src[i] = rnd() & bitdepth_max;
713
714 const int w_den = 9 + (rnd() & 7);
715 const int src_w = 16 + (rnd() % (max_src_width - 16 + 1));
716 const int dst_w = w_den * src_w >> 3;
717 #define scale_fac(ref_sz, this_sz) \
718 ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
719 const int dx = scale_fac(src_w, dst_w);
720 #undef scale_fac
721 const int mx0 = get_upscale_x0(src_w, dst_w, dx);
722
723 call_ref(c_dst, dst_stride, src, src_stride,
724 dst_w, height, src_w, dx, mx0 HIGHBD_TAIL_SUFFIX);
725 call_new(a_dst, dst_stride, src, src_stride,
726 dst_w, height, src_w, dx, mx0 HIGHBD_TAIL_SUFFIX);
727 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
728 dst_w, height, "dst");
729
730 bench_new(a_dst, dst_stride, src, src_stride,
731 512, height, 512 * 8 / w_den, dx, mx0 HIGHBD_TAIL_SUFFIX);
732 }
733
734 report("resize");
735 }
736
bitfn(checkasm_check_mc)737 void bitfn(checkasm_check_mc)(void) {
738 Dav1dMCDSPContext c;
739 bitfn(dav1d_mc_dsp_init)(&c);
740
741 check_mc(&c);
742 check_mct(&c);
743 check_mc_scaled(&c);
744 check_mct_scaled(&c);
745 check_avg(&c);
746 check_w_avg(&c);
747 check_mask(&c);
748 check_w_mask(&c);
749 check_blend(&c);
750 check_blend_v(&c);
751 check_blend_h(&c);
752 check_warp8x8(&c);
753 check_warp8x8t(&c);
754 check_emuedge(&c);
755 check_resize(&c);
756 }
757