1 /*
2  * Copyright © 2018, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "tests/checkasm/checkasm.h"
29 
30 #include "src/levels.h"
31 #include "src/mc.h"
32 
33 static const char *const filter_names[] = {
34     "8tap_regular",        "8tap_regular_smooth", "8tap_regular_sharp",
35     "8tap_sharp_regular",  "8tap_sharp_smooth",   "8tap_sharp",
36     "8tap_smooth_regular", "8tap_smooth",         "8tap_smooth_sharp",
37     "bilinear"
38 };
39 
40 static const char *const mxy_names[] = { "0", "h", "v", "hv" };
41 static const char *const scaled_paths[] = { "", "_dy1", "_dy2" };
42 
mc_h_next(const int h)43 static int mc_h_next(const int h) {
44     switch (h) {
45     case 4:
46     case 8:
47     case 16:
48         return (h * 3) >> 1;
49     case 6:
50     case 12:
51     case 24:
52         return (h & (h - 1)) * 2;
53     default:
54         return h * 2;
55     }
56 }
57 
check_mc(Dav1dMCDSPContext * const c)58 static void check_mc(Dav1dMCDSPContext *const c) {
59     ALIGN_STK_64(pixel, src_buf, 135 * 135,);
60     ALIGN_STK_64(pixel, c_dst,   128 * 128,);
61     ALIGN_STK_64(pixel, a_dst,   128 * 128,);
62     const pixel *src = src_buf + 135 * 3 + 3;
63     const ptrdiff_t src_stride = 135 * sizeof(pixel);
64 
65     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
66                  ptrdiff_t src_stride, int w, int h, int mx, int my
67                  HIGHBD_DECL_SUFFIX);
68 
69     for (int filter = 0; filter < N_2D_FILTERS; filter++)
70         for (int w = 2; w <= 128; w <<= 1) {
71             const ptrdiff_t dst_stride = w * sizeof(pixel);
72             for (int mxy = 0; mxy < 4; mxy++)
73                 if (check_func(c->mc[filter], "mc_%s_w%d_%s_%dbpc",
74                     filter_names[filter], w, mxy_names[mxy], BITDEPTH))
75                 {
76                     const int h_min = w <= 32 ? 2 : w / 4;
77                     const int h_max = imax(imin(w * 4, 128), 32);
78                     for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
79                         const int mx = (mxy & 1) ? rnd() % 15 + 1 : 0;
80                         const int my = (mxy & 2) ? rnd() % 15 + 1 : 0;
81 #if BITDEPTH == 16
82                         const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
83 #else
84                         const int bitdepth_max = 0xff;
85 #endif
86 
87                         for (int i = 0; i < 135 * 135; i++)
88                             src_buf[i] = rnd() & bitdepth_max;
89 
90                         call_ref(c_dst, dst_stride, src, src_stride, w, h,
91                                  mx, my HIGHBD_TAIL_SUFFIX);
92                         call_new(a_dst, dst_stride, src, src_stride, w, h,
93                                  mx, my HIGHBD_TAIL_SUFFIX);
94                         checkasm_check_pixel(c_dst, dst_stride,
95                                              a_dst, dst_stride,
96                                              w, h, "dst");
97 
98                         if (filter == FILTER_2D_8TAP_REGULAR ||
99                             filter == FILTER_2D_BILINEAR)
100                         {
101                             bench_new(a_dst, dst_stride, src, src_stride, w, h,
102                                       mx, my HIGHBD_TAIL_SUFFIX);
103                         }
104                     }
105                 }
106         }
107     report("mc");
108 }
109 
110 /* Generate worst case input in the topleft corner, randomize the rest */
generate_mct_input(pixel * const buf,const int bitdepth_max)111 static void generate_mct_input(pixel *const buf, const int bitdepth_max) {
112     static const int8_t pattern[8] = { -1,  0, -1,  0,  0, -1,  0, -1 };
113     const int sign = -(rnd() & 1);
114 
115     for (int y = 0; y < 135; y++)
116         for (int x = 0; x < 135; x++)
117             buf[135*y+x] = ((x | y) < 8 ? (pattern[x] ^ pattern[y] ^ sign)
118                                         : rnd()) & bitdepth_max;
119 }
120 
check_mct(Dav1dMCDSPContext * const c)121 static void check_mct(Dav1dMCDSPContext *const c) {
122     ALIGN_STK_64(pixel, src_buf, 135 * 135,);
123     ALIGN_STK_64(int16_t, c_tmp, 128 * 128,);
124     ALIGN_STK_64(int16_t, a_tmp, 128 * 128,);
125     const pixel *src = src_buf + 135 * 3 + 3;
126     const ptrdiff_t src_stride = 135 * sizeof(pixel);
127 
128     declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
129                  int w, int h, int mx, int my HIGHBD_DECL_SUFFIX);
130 
131     for (int filter = 0; filter < N_2D_FILTERS; filter++)
132         for (int w = 4; w <= 128; w <<= 1)
133             for (int mxy = 0; mxy < 4; mxy++)
134                 if (check_func(c->mct[filter], "mct_%s_w%d_%s_%dbpc",
135                     filter_names[filter], w, mxy_names[mxy], BITDEPTH))
136                     for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
137                     {
138                         const int mx = (mxy & 1) ? rnd() % 15 + 1 : 0;
139                         const int my = (mxy & 2) ? rnd() % 15 + 1 : 0;
140 #if BITDEPTH == 16
141                         const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
142 #else
143                         const int bitdepth_max = 0xff;
144 #endif
145                         generate_mct_input(src_buf, bitdepth_max);
146 
147                         call_ref(c_tmp, src, src_stride, w, h,
148                                  mx, my HIGHBD_TAIL_SUFFIX);
149                         call_new(a_tmp, src, src_stride, w, h,
150                                  mx, my HIGHBD_TAIL_SUFFIX);
151                         checkasm_check(int16_t, c_tmp, w * sizeof(*c_tmp),
152                                                 a_tmp, w * sizeof(*a_tmp),
153                                                 w, h, "tmp");
154 
155                         if (filter == FILTER_2D_8TAP_REGULAR ||
156                             filter == FILTER_2D_BILINEAR)
157                         {
158                             bench_new(a_tmp, src, src_stride, w, h,
159                                       mx, my HIGHBD_TAIL_SUFFIX);
160                         }
161                     }
162     report("mct");
163 }
164 
check_mc_scaled(Dav1dMCDSPContext * const c)165 static void check_mc_scaled(Dav1dMCDSPContext *const c) {
166     ALIGN_STK_64(pixel, src_buf, 263 * 263,);
167     ALIGN_STK_64(pixel, c_dst,   128 * 128,);
168     ALIGN_STK_64(pixel, a_dst,   128 * 128,);
169     const pixel *src = src_buf + 263 * 3 + 3;
170     const ptrdiff_t src_stride = 263 * sizeof(pixel);
171 #if BITDEPTH == 16
172     const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
173 #else
174     const int bitdepth_max = 0xff;
175 #endif
176 
177     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
178                  ptrdiff_t src_stride, int w, int h,
179                  int mx, int my, int dx, int dy HIGHBD_DECL_SUFFIX);
180 
181     for (int filter = 0; filter < N_2D_FILTERS; filter++)
182         for (int w = 2; w <= 128; w <<= 1) {
183             const ptrdiff_t dst_stride = w * sizeof(pixel);
184             for (int p = 0; p < 3; ++p) {
185                 if (check_func(c->mc_scaled[filter], "mc_scaled_%s_w%d%s_%dbpc",
186                                filter_names[filter], w, scaled_paths[p], BITDEPTH))
187                 {
188                     const int h_min = w <= 32 ? 2 : w / 4;
189                     const int h_max = imax(imin(w * 4, 128), 32);
190                     for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
191                         const int mx = rnd() % 1024;
192                         const int my = rnd() % 1024;
193                         const int dx = rnd() % 2048 + 1;
194                         const int dy = !p
195                             ? rnd() % 2048 + 1
196                             : p << 10; // ystep=1.0 and ystep=2.0 paths
197 
198                         for (int k = 0; k < 263 * 263; k++)
199                             src_buf[k] = rnd() & bitdepth_max;
200 
201                         call_ref(c_dst, dst_stride, src, src_stride,
202                                  w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
203                         call_new(a_dst, dst_stride, src, src_stride,
204                                  w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
205                         checkasm_check_pixel(c_dst, dst_stride,
206                                              a_dst, dst_stride, w, h, "dst");
207 
208                         if (filter == FILTER_2D_8TAP_REGULAR ||
209                             filter == FILTER_2D_BILINEAR)
210                             bench_new(a_dst, dst_stride, src, src_stride,
211                                       w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
212                     }
213                 }
214             }
215         }
216     report("mc_scaled");
217 }
218 
check_mct_scaled(Dav1dMCDSPContext * const c)219 static void check_mct_scaled(Dav1dMCDSPContext *const c) {
220     ALIGN_STK_64(pixel, src_buf, 263 * 263,);
221     ALIGN_STK_64(int16_t, c_tmp,   128 * 128,);
222     ALIGN_STK_64(int16_t, a_tmp,   128 * 128,);
223     const pixel *src = src_buf + 263 * 3 + 3;
224     const ptrdiff_t src_stride = 263 * sizeof(pixel);
225 #if BITDEPTH == 16
226     const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
227 #else
228     const int bitdepth_max = 0xff;
229 #endif
230 
231     declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
232                  int w, int h, int mx, int my, int dx, int dy HIGHBD_DECL_SUFFIX);
233 
234     for (int filter = 0; filter < N_2D_FILTERS; filter++)
235         for (int w = 4; w <= 128; w <<= 1)
236             for (int p = 0; p < 3; ++p) {
237                 if (check_func(c->mct_scaled[filter], "mct_scaled_%s_w%d%s_%dbpc",
238                                filter_names[filter], w, scaled_paths[p], BITDEPTH))
239                 {
240                     const int h_min = imax(w / 4, 4);
241                     const int h_max = imin(w * 4, 128);
242                     for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
243                         const int mx = rnd() % 1024;
244                         const int my = rnd() % 1024;
245                         const int dx = rnd() % 2048 + 1;
246                         const int dy = !p
247                             ? rnd() % 2048 + 1
248                             : p << 10; // ystep=1.0 and ystep=2.0 paths
249 
250                         for (int k = 0; k < 263 * 263; k++)
251                             src_buf[k] = rnd() & bitdepth_max;
252 
253                         call_ref(c_tmp, src, src_stride,
254                                  w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
255                         call_new(a_tmp, src, src_stride,
256                                  w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
257                         checkasm_check(int16_t, c_tmp, w * sizeof(*c_tmp),
258                                                 a_tmp, w * sizeof(*a_tmp),
259                                                 w, h, "tmp");
260 
261                         if (filter == FILTER_2D_8TAP_REGULAR ||
262                             filter == FILTER_2D_BILINEAR)
263                             bench_new(a_tmp, src, src_stride,
264                                       w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
265                     }
266                 }
267             }
268     report("mct_scaled");
269 }
270 
init_tmp(Dav1dMCDSPContext * const c,pixel * const buf,int16_t (* const tmp)[128* 128],const int bitdepth_max)271 static void init_tmp(Dav1dMCDSPContext *const c, pixel *const buf,
272                      int16_t (*const tmp)[128 * 128], const int bitdepth_max)
273 {
274     for (int i = 0; i < 2; i++) {
275         generate_mct_input(buf, bitdepth_max);
276         c->mct[FILTER_2D_8TAP_SHARP](tmp[i], buf + 135 * 3 + 3,
277                                       135 * sizeof(pixel), 128, 128,
278                                       8, 8 HIGHBD_TAIL_SUFFIX);
279     }
280 }
281 
check_avg(Dav1dMCDSPContext * const c)282 static void check_avg(Dav1dMCDSPContext *const c) {
283     ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
284     ALIGN_STK_64(pixel, c_dst, 135 * 135,);
285     ALIGN_STK_64(pixel, a_dst, 128 * 128,);
286 
287     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
288                  const int16_t *tmp2, int w, int h HIGHBD_DECL_SUFFIX);
289 
290     for (int w = 4; w <= 128; w <<= 1)
291         if (check_func(c->avg, "avg_w%d_%dbpc", w, BITDEPTH)) {
292             ptrdiff_t dst_stride = w * sizeof(pixel);
293             for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
294             {
295 #if BITDEPTH == 16
296                 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
297 #else
298                 const int bitdepth_max = 0xff;
299 #endif
300 
301                 init_tmp(c, c_dst, tmp, bitdepth_max);
302                 call_ref(c_dst, dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
303                 call_new(a_dst, dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
304                 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
305                                      w, h, "dst");
306 
307                 bench_new(a_dst, dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
308             }
309         }
310     report("avg");
311 }
312 
check_w_avg(Dav1dMCDSPContext * const c)313 static void check_w_avg(Dav1dMCDSPContext *const c) {
314     ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
315     ALIGN_STK_64(pixel, c_dst, 135 * 135,);
316     ALIGN_STK_64(pixel, a_dst, 128 * 128,);
317 
318     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
319                  const int16_t *tmp2, int w, int h, int weight HIGHBD_DECL_SUFFIX);
320 
321     for (int w = 4; w <= 128; w <<= 1)
322         if (check_func(c->w_avg, "w_avg_w%d_%dbpc", w, BITDEPTH)) {
323             ptrdiff_t dst_stride = w * sizeof(pixel);
324             for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
325             {
326                 int weight = rnd() % 15 + 1;
327 #if BITDEPTH == 16
328                 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
329 #else
330                 const int bitdepth_max = 0xff;
331 #endif
332                 init_tmp(c, c_dst, tmp, bitdepth_max);
333 
334                 call_ref(c_dst, dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
335                 call_new(a_dst, dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
336                 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
337                                      w, h, "dst");
338 
339                 bench_new(a_dst, dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
340             }
341         }
342     report("w_avg");
343 }
344 
check_mask(Dav1dMCDSPContext * const c)345 static void check_mask(Dav1dMCDSPContext *const c) {
346     ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
347     ALIGN_STK_64(pixel,   c_dst, 135 * 135,);
348     ALIGN_STK_64(pixel,   a_dst, 128 * 128,);
349     ALIGN_STK_64(uint8_t, mask,  128 * 128,);
350 
351     for (int i = 0; i < 128 * 128; i++)
352         mask[i] = rnd() % 65;
353 
354     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
355                  const int16_t *tmp2, int w, int h, const uint8_t *mask
356                  HIGHBD_DECL_SUFFIX);
357 
358     for (int w = 4; w <= 128; w <<= 1)
359         if (check_func(c->mask, "mask_w%d_%dbpc", w, BITDEPTH)) {
360             ptrdiff_t dst_stride = w * sizeof(pixel);
361             for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
362             {
363 #if BITDEPTH == 16
364                 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
365 #else
366                 const int bitdepth_max = 0xff;
367 #endif
368                 init_tmp(c, c_dst, tmp, bitdepth_max);
369                 call_ref(c_dst, dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
370                 call_new(a_dst, dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
371                 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
372                                      w, h, "dst");
373 
374                 bench_new(a_dst, dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
375             }
376         }
377     report("mask");
378 }
379 
check_w_mask(Dav1dMCDSPContext * const c)380 static void check_w_mask(Dav1dMCDSPContext *const c) {
381     ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
382     ALIGN_STK_64(pixel,   c_dst,  135 * 135,);
383     ALIGN_STK_64(pixel,   a_dst,  128 * 128,);
384     ALIGN_STK_64(uint8_t, c_mask, 128 * 128,);
385     ALIGN_STK_64(uint8_t, a_mask, 128 * 128,);
386 
387     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
388                  const int16_t *tmp2, int w, int h, uint8_t *mask, int sign
389                  HIGHBD_DECL_SUFFIX);
390 
391     static const uint16_t ss[] = { 444, 422, 420 };
392     static const uint8_t ss_hor[] = { 0, 1, 1 };
393     static const uint8_t ss_ver[] = { 0, 0, 1 };
394 
395     for (int i = 0; i < 3; i++)
396         for (int w = 4; w <= 128; w <<= 1)
397             if (check_func(c->w_mask[i], "w_mask_%d_w%d_%dbpc", ss[i], w,
398                            BITDEPTH))
399             {
400                 ptrdiff_t dst_stride = w * sizeof(pixel);
401                 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
402                 {
403                     int sign = rnd() & 1;
404 #if BITDEPTH == 16
405                     const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
406 #else
407                     const int bitdepth_max = 0xff;
408 #endif
409                     init_tmp(c, c_dst, tmp, bitdepth_max);
410 
411                     call_ref(c_dst, dst_stride, tmp[0], tmp[1], w, h,
412                              c_mask, sign HIGHBD_TAIL_SUFFIX);
413                     call_new(a_dst, dst_stride, tmp[0], tmp[1], w, h,
414                              a_mask, sign HIGHBD_TAIL_SUFFIX);
415                     checkasm_check_pixel(c_dst, dst_stride,
416                                          a_dst, dst_stride,
417                                          w, h, "dst");
418                     checkasm_check(uint8_t, c_mask, w >> ss_hor[i],
419                                             a_mask, w >> ss_hor[i],
420                                             w >> ss_hor[i], h >> ss_ver[i],
421                                             "mask");
422 
423                     bench_new(a_dst, dst_stride, tmp[0], tmp[1], w, h,
424                               a_mask, sign HIGHBD_TAIL_SUFFIX);
425                 }
426             }
427     report("w_mask");
428 }
429 
check_blend(Dav1dMCDSPContext * const c)430 static void check_blend(Dav1dMCDSPContext *const c) {
431     ALIGN_STK_64(pixel, tmp, 32 * 32,);
432     ALIGN_STK_64(pixel, c_dst, 32 * 32,);
433     ALIGN_STK_64(pixel, a_dst, 32 * 32,);
434     ALIGN_STK_64(uint8_t, mask, 32 * 32,);
435 
436     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
437                  int w, int h, const uint8_t *mask);
438 
439     for (int w = 4; w <= 32; w <<= 1) {
440         const ptrdiff_t dst_stride = w * sizeof(pixel);
441         if (check_func(c->blend, "blend_w%d_%dbpc", w, BITDEPTH))
442             for (int h = imax(w / 2, 4); h <= imin(w * 2, 32); h <<= 1) {
443 #if BITDEPTH == 16
444                 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
445 #else
446                 const int bitdepth_max = 0xff;
447 #endif
448                 for (int i = 0; i < 32 * 32; i++) {
449                     tmp[i] = rnd() & bitdepth_max;
450                     mask[i] = rnd() % 65;
451                 }
452                 for (int i = 0; i < w * h; i++)
453                     c_dst[i] = a_dst[i] = rnd() & bitdepth_max;
454 
455                 call_ref(c_dst, dst_stride, tmp, w, h, mask);
456                 call_new(a_dst, dst_stride, tmp, w, h, mask);
457                 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
458                                      w, h, "dst");
459 
460                 bench_new(a_dst, dst_stride, tmp, w, h, mask);
461             }
462     }
463     report("blend");
464 }
465 
check_blend_v(Dav1dMCDSPContext * const c)466 static void check_blend_v(Dav1dMCDSPContext *const c) {
467     ALIGN_STK_64(pixel, tmp,   32 * 128,);
468     ALIGN_STK_64(pixel, c_dst, 32 * 128,);
469     ALIGN_STK_64(pixel, a_dst, 32 * 128,);
470 
471     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
472                  int w, int h);
473 
474     for (int w = 2; w <= 32; w <<= 1) {
475         const ptrdiff_t dst_stride = w * sizeof(pixel);
476         if (check_func(c->blend_v, "blend_v_w%d_%dbpc", w, BITDEPTH))
477             for (int h = 2; h <= (w == 2 ? 64 : 128); h <<= 1) {
478 #if BITDEPTH == 16
479                 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
480 #else
481                 const int bitdepth_max = 0xff;
482 #endif
483 
484                 for (int i = 0; i < w * h; i++)
485                     c_dst[i] = a_dst[i] = rnd() & bitdepth_max;
486                 for (int i = 0; i < 32 * 128; i++)
487                     tmp[i] = rnd() & bitdepth_max;
488 
489                 call_ref(c_dst, dst_stride, tmp, w, h);
490                 call_new(a_dst, dst_stride, tmp, w, h);
491                 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
492                                      w, h, "dst");
493 
494                 bench_new(a_dst, dst_stride, tmp, w, h);
495             }
496     }
497     report("blend_v");
498 }
499 
check_blend_h(Dav1dMCDSPContext * const c)500 static void check_blend_h(Dav1dMCDSPContext *const c) {
501     ALIGN_STK_64(pixel, tmp,   128 * 32,);
502     ALIGN_STK_64(pixel, c_dst, 128 * 32,);
503     ALIGN_STK_64(pixel, a_dst, 128 * 32,);
504 
505     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
506                  int w, int h);
507 
508     for (int w = 2; w <= 128; w <<= 1) {
509         const ptrdiff_t dst_stride = w * sizeof(pixel);
510         if (check_func(c->blend_h, "blend_h_w%d_%dbpc", w, BITDEPTH))
511             for (int h = (w == 128 ? 4 : 2); h <= 32; h <<= 1) {
512 #if BITDEPTH == 16
513                 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
514 #else
515                 const int bitdepth_max = 0xff;
516 #endif
517                 for (int i = 0; i < w * h; i++)
518                     c_dst[i] = a_dst[i] = rnd() & bitdepth_max;
519                 for (int i = 0; i < 128 * 32; i++)
520                     tmp[i] = rnd() & bitdepth_max;
521 
522                 call_ref(c_dst, dst_stride, tmp, w, h);
523                 call_new(a_dst, dst_stride, tmp, w, h);
524                 checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
525                                      w, h, "dst");
526 
527                 bench_new(a_dst, dst_stride, tmp, w, h);
528             }
529     }
530     report("blend_h");
531 }
532 
check_warp8x8(Dav1dMCDSPContext * const c)533 static void check_warp8x8(Dav1dMCDSPContext *const c) {
534     ALIGN_STK_64(pixel, src_buf, 15 * 15,);
535     ALIGN_STK_64(pixel, c_dst,    8 *  8,);
536     ALIGN_STK_64(pixel, a_dst,    8 *  8,);
537     int16_t abcd[4];
538     const pixel *src = src_buf + 15 * 3 + 3;
539     const ptrdiff_t dst_stride =  8 * sizeof(pixel);
540     const ptrdiff_t src_stride = 15 * sizeof(pixel);
541 
542     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
543                  ptrdiff_t src_stride, const int16_t *abcd, int mx, int my
544                  HIGHBD_DECL_SUFFIX);
545 
546     if (check_func(c->warp8x8, "warp_8x8_%dbpc", BITDEPTH)) {
547         const int mx = (rnd() & 0x1fff) - 0xa00;
548         const int my = (rnd() & 0x1fff) - 0xa00;
549 #if BITDEPTH == 16
550         const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
551 #else
552         const int bitdepth_max = 0xff;
553 #endif
554 
555         for (int i = 0; i < 4; i++)
556             abcd[i] = (rnd() & 0x1fff) - 0xa00;
557 
558         for (int i = 0; i < 15 * 15; i++)
559             src_buf[i] = rnd() & bitdepth_max;
560 
561         call_ref(c_dst, dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
562         call_new(a_dst, dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
563         checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
564                              8, 8, "dst");
565 
566         bench_new(a_dst, dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
567     }
568     report("warp8x8");
569 }
570 
check_warp8x8t(Dav1dMCDSPContext * const c)571 static void check_warp8x8t(Dav1dMCDSPContext *const c) {
572     ALIGN_STK_64(pixel, src_buf, 15 * 15,);
573     ALIGN_STK_64(int16_t, c_tmp,  8 *  8,);
574     ALIGN_STK_64(int16_t, a_tmp,  8 *  8,);
575     int16_t abcd[4];
576     const pixel *src = src_buf + 15 * 3 + 3;
577     const ptrdiff_t src_stride = 15 * sizeof(pixel);
578 
579     declare_func(void, int16_t *tmp, ptrdiff_t tmp_stride, const pixel *src,
580                  ptrdiff_t src_stride, const int16_t *abcd, int mx, int my
581                  HIGHBD_DECL_SUFFIX);
582 
583     if (check_func(c->warp8x8t, "warp_8x8t_%dbpc", BITDEPTH)) {
584         const int mx = (rnd() & 0x1fff) - 0xa00;
585         const int my = (rnd() & 0x1fff) - 0xa00;
586 #if BITDEPTH == 16
587         const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
588 #else
589         const int bitdepth_max = 0xff;
590 #endif
591 
592         for (int i = 0; i < 4; i++)
593             abcd[i] = (rnd() & 0x1fff) - 0xa00;
594 
595         for (int i = 0; i < 15 * 15; i++)
596             src_buf[i] = rnd() & bitdepth_max;
597 
598         call_ref(c_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
599         call_new(a_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
600         checkasm_check(int16_t, c_tmp, 8 * sizeof(*c_tmp),
601                                 a_tmp, 8 * sizeof(*a_tmp),
602                                 8, 8, "tmp");
603 
604         bench_new(a_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
605     }
606     report("warp8x8t");
607 }
608 
609 enum EdgeFlags {
610     HAVE_TOP = 1,
611     HAVE_BOTTOM = 2,
612     HAVE_LEFT = 4,
613     HAVE_RIGHT = 8,
614 };
615 
random_offset_for_edge(int * const x,int * const y,const int bw,const int bh,int * const iw,int * const ih,const enum EdgeFlags edge)616 static void random_offset_for_edge(int *const x, int *const y,
617                                    const int bw, const int bh,
618                                    int *const iw, int *const ih,
619                                    const enum EdgeFlags edge)
620 {
621 #define set_off(edge1, edge2, pos, dim) \
622     *i##dim = edge & (HAVE_##edge1 | HAVE_##edge2) ? 160 : 1 + (rnd() % (b##dim - 2)); \
623     switch (edge & (HAVE_##edge1 | HAVE_##edge2)) { \
624     case HAVE_##edge1 | HAVE_##edge2: \
625         assert(b##dim <= *i##dim); \
626         *pos = rnd() % (*i##dim - b##dim + 1); \
627         break; \
628     case HAVE_##edge1: \
629         *pos = (*i##dim - b##dim) + 1 + (rnd() % (b##dim - 1)); \
630         break; \
631     case HAVE_##edge2: \
632         *pos = -(1 + (rnd() % (b##dim - 1))); \
633         break; \
634     case 0: \
635         assert(b##dim - 1 > *i##dim); \
636         *pos = -(1 + (rnd() % (b##dim - *i##dim - 1))); \
637         break; \
638     }
639     set_off(LEFT, RIGHT, x, w);
640     set_off(TOP, BOTTOM, y, h);
641 }
642 
check_emuedge(Dav1dMCDSPContext * const c)643 static void check_emuedge(Dav1dMCDSPContext *const c) {
644     ALIGN_STK_64(pixel, c_dst, 135 * 192,);
645     ALIGN_STK_64(pixel, a_dst, 135 * 192,);
646     ALIGN_STK_64(pixel, src,   160 * 160,);
647 
648     for (int i = 0; i < 160 * 160; i++)
649         src[i] = rnd() & ((1U << BITDEPTH) - 1);
650 
651     declare_func(void, intptr_t bw, intptr_t bh, intptr_t iw, intptr_t ih,
652                  intptr_t x, intptr_t y,
653                  pixel *dst, ptrdiff_t dst_stride,
654                  const pixel *src, ptrdiff_t src_stride);
655 
656     int x, y, iw, ih;
657     for (int w = 4; w <= 128; w <<= 1)
658         if (check_func(c->emu_edge, "emu_edge_w%d_%dbpc", w, BITDEPTH)) {
659             for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1) {
660                 // we skip 0xf, since it implies that we don't need emu_edge
661                 for (enum EdgeFlags edge = 0; edge < 0xf; edge++) {
662                     const int bw = w + (rnd() & 7);
663                     const int bh = h + (rnd() & 7);
664                     random_offset_for_edge(&x, &y, bw, bh, &iw, &ih, edge);
665                     call_ref(bw, bh, iw, ih, x, y,
666                              c_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
667                     call_new(bw, bh, iw, ih, x, y,
668                              a_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
669                     checkasm_check_pixel(c_dst, 192 * sizeof(pixel),
670                                          a_dst, 192 * sizeof(pixel),
671                                          bw, bh, "dst");
672                 }
673             }
674             for (enum EdgeFlags edge = 1; edge < 0xf; edge <<= 1) {
675                 random_offset_for_edge(&x, &y, w + 7, w + 7, &iw, &ih, edge);
676                 bench_new(w + 7, w + 7, iw, ih, x, y,
677                           a_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
678             }
679         }
680     report("emu_edge");
681 }
682 
get_upscale_x0(const int in_w,const int out_w,const int step)683 static int get_upscale_x0(const int in_w, const int out_w, const int step) {
684     const int err = out_w * step - (in_w << 14);
685     const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err >> 1);
686     return x0 & 0x3fff;
687 }
688 
check_resize(Dav1dMCDSPContext * const c)689 static void check_resize(Dav1dMCDSPContext *const c) {
690     ALIGN_STK_64(pixel, c_dst, 1024 * 64,);
691     ALIGN_STK_64(pixel, a_dst, 1024 * 64,);
692     ALIGN_STK_64(pixel, src,   512 * 64,);
693 
694     const int height = 64;
695     const int max_src_width = 512;
696     const ptrdiff_t dst_stride = 1024 * sizeof(pixel);
697     const ptrdiff_t src_stride = 512 * sizeof(pixel);
698 
699     declare_func(void, pixel *dst, ptrdiff_t dst_stride,
700                  const pixel *src, ptrdiff_t src_stride,
701                  int dst_w, int src_w, int h, int dx, int mx0
702                  HIGHBD_DECL_SUFFIX);
703 
704     if (check_func(c->resize, "resize_%dbpc", BITDEPTH)) {
705 #if BITDEPTH == 16
706         const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
707 #else
708         const int bitdepth_max = 0xff;
709 #endif
710 
711         for (int i = 0; i < max_src_width * height; i++)
712             src[i] = rnd() & bitdepth_max;
713 
714         const int w_den = 9 + (rnd() & 7);
715         const int src_w = 16 + (rnd() % (max_src_width - 16 + 1));
716         const int dst_w = w_den * src_w >> 3;
717 #define scale_fac(ref_sz, this_sz) \
718     ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
719         const int dx = scale_fac(src_w, dst_w);
720 #undef scale_fac
721         const int mx0 = get_upscale_x0(src_w, dst_w, dx);
722 
723         call_ref(c_dst, dst_stride, src, src_stride,
724                  dst_w, height, src_w, dx, mx0 HIGHBD_TAIL_SUFFIX);
725         call_new(a_dst, dst_stride, src, src_stride,
726                  dst_w, height, src_w, dx, mx0 HIGHBD_TAIL_SUFFIX);
727         checkasm_check_pixel(c_dst, dst_stride, a_dst, dst_stride,
728                              dst_w, height, "dst");
729 
730         bench_new(a_dst, dst_stride, src, src_stride,
731                   512, height, 512 * 8 / w_den, dx, mx0 HIGHBD_TAIL_SUFFIX);
732     }
733 
734     report("resize");
735 }
736 
bitfn(checkasm_check_mc)737 void bitfn(checkasm_check_mc)(void) {
738     Dav1dMCDSPContext c;
739     bitfn(dav1d_mc_dsp_init)(&c);
740 
741     check_mc(&c);
742     check_mct(&c);
743     check_mc_scaled(&c);
744     check_mct_scaled(&c);
745     check_avg(&c);
746     check_w_avg(&c);
747     check_mask(&c);
748     check_w_mask(&c);
749     check_blend(&c);
750     check_blend_v(&c);
751     check_blend_h(&c);
752     check_warp8x8(&c);
753     check_warp8x8t(&c);
754     check_emuedge(&c);
755     check_resize(&c);
756 }
757