1 /*
2  * Copyright © 2018, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "config.h"
29 
30 #include <stdint.h>
31 #include <string.h>
32 
33 #include "common/intops.h"
34 
35 #include "src/wedge.h"
36 
37 enum WedgeDirectionType {
38     WEDGE_HORIZONTAL = 0,
39     WEDGE_VERTICAL = 1,
40     WEDGE_OBLIQUE27 = 2,
41     WEDGE_OBLIQUE63 = 3,
42     WEDGE_OBLIQUE117 = 4,
43     WEDGE_OBLIQUE153 = 5,
44     N_WEDGE_DIRECTIONS
45 };
46 
47 typedef struct {
48     uint8_t /* enum WedgeDirectionType */ direction;
49     uint8_t x_offset;
50     uint8_t y_offset;
51 } wedge_code_type;
52 
53 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
54     { WEDGE_OBLIQUE27,  4, 4 }, { WEDGE_OBLIQUE63,  4, 4 },
55     { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
56     { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
57     { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL,   4, 4 },
58     { WEDGE_OBLIQUE27,  4, 2 }, { WEDGE_OBLIQUE27,  4, 6 },
59     { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
60     { WEDGE_OBLIQUE63,  2, 4 }, { WEDGE_OBLIQUE63,  6, 4 },
61     { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
62 };
63 
64 static const wedge_code_type wedge_codebook_16_hltw[16] = {
65     { WEDGE_OBLIQUE27,  4, 4 }, { WEDGE_OBLIQUE63,  4, 4 },
66     { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
67     { WEDGE_VERTICAL,   2, 4 }, { WEDGE_VERTICAL,   4, 4 },
68     { WEDGE_VERTICAL,   6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
69     { WEDGE_OBLIQUE27,  4, 2 }, { WEDGE_OBLIQUE27,  4, 6 },
70     { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
71     { WEDGE_OBLIQUE63,  2, 4 }, { WEDGE_OBLIQUE63,  6, 4 },
72     { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
73 };
74 
75 static const wedge_code_type wedge_codebook_16_heqw[16] = {
76     { WEDGE_OBLIQUE27,  4, 4 }, { WEDGE_OBLIQUE63,  4, 4 },
77     { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
78     { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
79     { WEDGE_VERTICAL,   2, 4 }, { WEDGE_VERTICAL,   6, 4 },
80     { WEDGE_OBLIQUE27,  4, 2 }, { WEDGE_OBLIQUE27,  4, 6 },
81     { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
82     { WEDGE_OBLIQUE63,  2, 4 }, { WEDGE_OBLIQUE63,  6, 4 },
83     { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
84 };
85 
86 static uint8_t ALIGN(wedge_masks_444_32x32[2 * 16 * 32 * 32], 64);
87 static uint8_t ALIGN(wedge_masks_444_32x16[2 * 16 * 32 * 16], 64);
88 static uint8_t ALIGN(wedge_masks_444_32x8 [2 * 16 * 32 *  8], 64);
89 static uint8_t ALIGN(wedge_masks_444_16x32[2 * 16 * 16 * 32], 64);
90 static uint8_t ALIGN(wedge_masks_444_16x16[2 * 16 * 16 * 16], 64);
91 static uint8_t ALIGN(wedge_masks_444_16x8 [2 * 16 * 16 *  8], 64);
92 static uint8_t ALIGN(wedge_masks_444_8x32 [2 * 16 *  8 * 32], 64);
93 static uint8_t ALIGN(wedge_masks_444_8x16 [2 * 16 *  8 * 16], 64);
94 static uint8_t ALIGN(wedge_masks_444_8x8  [2 * 16 *  8 *  8], 64);
95 
96 static uint8_t ALIGN(wedge_masks_422_16x32[2 * 16 * 16 * 32], 64);
97 static uint8_t ALIGN(wedge_masks_422_16x16[2 * 16 * 16 * 16], 64);
98 static uint8_t ALIGN(wedge_masks_422_16x8 [2 * 16 * 16 *  8], 64);
99 static uint8_t ALIGN(wedge_masks_422_8x32 [2 * 16 *  8 * 32], 64);
100 static uint8_t ALIGN(wedge_masks_422_8x16 [2 * 16 *  8 * 16], 64);
101 static uint8_t ALIGN(wedge_masks_422_8x8  [2 * 16 *  8 *  8], 64);
102 static uint8_t ALIGN(wedge_masks_422_4x32 [2 * 16 *  4 * 32], 64);
103 static uint8_t ALIGN(wedge_masks_422_4x16 [2 * 16 *  4 * 16], 64);
104 static uint8_t ALIGN(wedge_masks_422_4x8  [2 * 16 *  4 *  8], 32);
105 
106 static uint8_t ALIGN(wedge_masks_420_16x16[2 * 16 * 16 * 16], 64);
107 static uint8_t ALIGN(wedge_masks_420_16x8 [2 * 16 * 16 *  8], 64);
108 static uint8_t ALIGN(wedge_masks_420_16x4 [2 * 16 * 16 *  4], 64);
109 static uint8_t ALIGN(wedge_masks_420_8x16 [2 * 16 *  8 * 16], 64);
110 static uint8_t ALIGN(wedge_masks_420_8x8  [2 * 16 *  8 *  8], 64);
111 static uint8_t ALIGN(wedge_masks_420_8x4  [2 * 16 *  8 *  4], 64);
112 static uint8_t ALIGN(wedge_masks_420_4x16 [2 * 16 *  4 * 16], 64);
113 static uint8_t ALIGN(wedge_masks_420_4x8  [2 * 16 *  4 *  8], 32);
114 static uint8_t ALIGN(wedge_masks_420_4x4  [2 * 16 *  4 *  4], 16);
115 
116 const uint8_t *dav1d_wedge_masks[N_BS_SIZES][3][2][16];
117 
insert_border(uint8_t * const dst,const uint8_t * const src,const int ctr)118 static void insert_border(uint8_t *const dst, const uint8_t *const src,
119                           const int ctr)
120 {
121     if (ctr > 4) memset(dst, 0, ctr - 4);
122     memcpy(dst + imax(ctr, 4) - 4, src + imax(4 - ctr, 0), imin(64 - ctr, 8));
123     if (ctr < 64 - 4)
124         memset(dst + ctr + 4, 64, 64 - 4 - ctr);
125 }
126 
transpose(uint8_t * const dst,const uint8_t * const src)127 static void transpose(uint8_t *const dst, const uint8_t *const src) {
128     for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
129         for (int x = 0, x_off = 0; x < 64; x++, x_off += 64)
130             dst[x_off + y] = src[y_off + x];
131 }
132 
hflip(uint8_t * const dst,const uint8_t * const src)133 static void hflip(uint8_t *const dst, const uint8_t *const src) {
134     for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
135         for (int x = 0; x < 64; x++)
136             dst[y_off + 64 - 1 - x] = src[y_off + x];
137 }
138 
invert(uint8_t * const dst,const uint8_t * const src,const int w,const int h)139 static void invert(uint8_t *const dst, const uint8_t *const src,
140                    const int w, const int h)
141 {
142     for (int y = 0, y_off = 0; y < h; y++, y_off += w)
143         for (int x = 0; x < w; x++)
144             dst[y_off + x] = 64 - src[y_off + x];
145 }
146 
copy2d(uint8_t * dst,const uint8_t * src,const int w,const int h,const int x_off,const int y_off)147 static void copy2d(uint8_t *dst, const uint8_t *src,
148                    const int w, const int h, const int x_off, const int y_off)
149 {
150     src += y_off * 64 + x_off;
151     for (int y = 0; y < h; y++) {
152         memcpy(dst, src, w);
153         src += 64;
154         dst += w;
155     }
156 }
157 
init_chroma(uint8_t * chroma,const uint8_t * luma,const int sign,const int w,const int h,const int ss_ver)158 static COLD void init_chroma(uint8_t *chroma, const uint8_t *luma,
159                              const int sign, const int w, const int h,
160                              const int ss_ver)
161 {
162     for (int y = 0; y < h; y += 1 + ss_ver) {
163         for (int x = 0; x < w; x += 2) {
164             int sum = luma[x] + luma[x + 1] + 1;
165             if (ss_ver) sum += luma[w + x] + luma[w + x + 1] + 1;
166             chroma[x >> 1] = (sum - sign) >> (1 + ss_ver);
167         }
168         luma += w << ss_ver;
169         chroma += w >> 1;
170     }
171 }
172 
fill2d_16x2(uint8_t * dst,const int w,const int h,const enum BlockSize bs,const uint8_t (* const master)[64* 64],const wedge_code_type * const cb,uint8_t * masks_444,uint8_t * masks_422,uint8_t * masks_420,const unsigned signs)173 static COLD void fill2d_16x2(uint8_t *dst, const int w, const int h,
174                              const enum BlockSize bs,
175                              const uint8_t (*const master)[64 * 64],
176                              const wedge_code_type *const cb,
177                              uint8_t *masks_444, uint8_t *masks_422,
178                              uint8_t *masks_420, const unsigned signs)
179 {
180     uint8_t *ptr = dst;
181     for (int n = 0; n < 16; n++) {
182         copy2d(ptr, master[cb[n].direction], w, h,
183                32 - (w * cb[n].x_offset >> 3), 32 - (h * cb[n].y_offset >> 3));
184         ptr += w * h;
185     }
186     for (int n = 0, off = 0; n < 16; n++, off += w * h)
187         invert(ptr + off, dst + off, w, h);
188 
189     const int n_stride_444 = (w * h);
190     const int n_stride_422 = n_stride_444 >> 1;
191     const int n_stride_420 = n_stride_444 >> 2;
192     const int sign_stride_444 = 16 * n_stride_444;
193     const int sign_stride_422 = 16 * n_stride_422;
194     const int sign_stride_420 = 16 * n_stride_420;
195     // assign pointers in externally visible array
196     for (int n = 0; n < 16; n++) {
197         const int sign = (signs >> n) & 1;
198         dav1d_wedge_masks[bs][0][0][n] = &masks_444[ sign * sign_stride_444];
199         // not using !sign is intentional here, since 444 does not require
200         // any rounding since no chroma subsampling is applied.
201         dav1d_wedge_masks[bs][0][1][n] = &masks_444[ sign * sign_stride_444];
202         dav1d_wedge_masks[bs][1][0][n] = &masks_422[ sign * sign_stride_422];
203         dav1d_wedge_masks[bs][1][1][n] = &masks_422[!sign * sign_stride_422];
204         dav1d_wedge_masks[bs][2][0][n] = &masks_420[ sign * sign_stride_420];
205         dav1d_wedge_masks[bs][2][1][n] = &masks_420[!sign * sign_stride_420];
206         masks_444 += n_stride_444;
207         masks_422 += n_stride_422;
208         masks_420 += n_stride_420;
209 
210         // since the pointers come from inside, we know that
211         // violation of the const is OK here. Any other approach
212         // means we would have to duplicate the sign correction
213         // logic in two places, which isn't very nice, or mark
214         // the table faced externally as non-const, which also sucks
215         init_chroma((uint8_t *)dav1d_wedge_masks[bs][1][0][n],
216                     dav1d_wedge_masks[bs][0][0][n], 0, w, h, 0);
217         init_chroma((uint8_t *)dav1d_wedge_masks[bs][1][1][n],
218                     dav1d_wedge_masks[bs][0][0][n], 1, w, h, 0);
219         init_chroma((uint8_t *)dav1d_wedge_masks[bs][2][0][n],
220                     dav1d_wedge_masks[bs][0][0][n], 0, w, h, 1);
221         init_chroma((uint8_t *)dav1d_wedge_masks[bs][2][1][n],
222                     dav1d_wedge_masks[bs][0][0][n], 1, w, h, 1);
223     }
224 }
225 
dav1d_init_wedge_masks(void)226 COLD void dav1d_init_wedge_masks(void) {
227     // This function is guaranteed to be called only once
228 
229     enum WedgeMasterLineType {
230         WEDGE_MASTER_LINE_ODD,
231         WEDGE_MASTER_LINE_EVEN,
232         WEDGE_MASTER_LINE_VERT,
233         N_WEDGE_MASTER_LINES,
234     };
235     static const uint8_t wedge_master_border[N_WEDGE_MASTER_LINES][8] = {
236         [WEDGE_MASTER_LINE_ODD]  = {  1,  2,  6, 18, 37, 53, 60, 63 },
237         [WEDGE_MASTER_LINE_EVEN] = {  1,  4, 11, 27, 46, 58, 62, 63 },
238         [WEDGE_MASTER_LINE_VERT] = {  0,  2,  7, 21, 43, 57, 62, 64 },
239     };
240     uint8_t master[6][64 * 64];
241 
242     // create master templates
243     for (int y = 0, off = 0; y < 64; y++, off += 64)
244         insert_border(&master[WEDGE_VERTICAL][off],
245                       wedge_master_border[WEDGE_MASTER_LINE_VERT], 32);
246     for (int y = 0, off = 0, ctr = 48; y < 64; y += 2, off += 128, ctr--)
247     {
248         insert_border(&master[WEDGE_OBLIQUE63][off],
249                       wedge_master_border[WEDGE_MASTER_LINE_EVEN], ctr);
250         insert_border(&master[WEDGE_OBLIQUE63][off + 64],
251                       wedge_master_border[WEDGE_MASTER_LINE_ODD], ctr - 1);
252     }
253 
254     transpose(master[WEDGE_OBLIQUE27], master[WEDGE_OBLIQUE63]);
255     transpose(master[WEDGE_HORIZONTAL], master[WEDGE_VERTICAL]);
256     hflip(master[WEDGE_OBLIQUE117], master[WEDGE_OBLIQUE63]);
257     hflip(master[WEDGE_OBLIQUE153], master[WEDGE_OBLIQUE27]);
258 
259 #define fill(w, h, sz_422, sz_420, hvsw, signs) \
260     fill2d_16x2((uint8_t *) wedge_masks_444_##w##x##h,  w, h, BS_##w##x##h, \
261                 master, wedge_codebook_16_##hvsw, wedge_masks_444_##w##x##h, \
262                 wedge_masks_422_##sz_422, wedge_masks_420_##sz_420, signs)
263 
264     fill(32, 32, 16x32, 16x16, heqw, 0x7bfb);
265     fill(32, 16, 16x16, 16x8,  hltw, 0x7beb);
266     fill(32,  8, 16x8,  16x4,  hltw, 0x6beb);
267     fill(16, 32,  8x32,  8x16, hgtw, 0x7beb);
268     fill(16, 16,  8x16,  8x8,  heqw, 0x7bfb);
269     fill(16,  8,  8x8,   8x4,  hltw, 0x7beb);
270     fill( 8, 32,  4x32,  4x16, hgtw, 0x7aeb);
271     fill( 8, 16,  4x16,  4x8,  hgtw, 0x7beb);
272     fill( 8,  8,  4x8,   4x4,  heqw, 0x7bfb);
273 #undef fill
274 }
275 
276 #define N_II_PRED_MODES (N_INTER_INTRA_PRED_MODES - 1)
277 static uint8_t ALIGN(ii_dc_mask[32 * 32], 64);
278 static uint8_t ALIGN(ii_nondc_mask_32x32[N_II_PRED_MODES][32 * 32], 64);
279 static uint8_t ALIGN(ii_nondc_mask_16x32[N_II_PRED_MODES][16 * 32], 64);
280 static uint8_t ALIGN(ii_nondc_mask_16x16[N_II_PRED_MODES][16 * 16], 64);
281 static uint8_t ALIGN(ii_nondc_mask_8x32 [N_II_PRED_MODES][ 8 * 32], 64);
282 static uint8_t ALIGN(ii_nondc_mask_8x16 [N_II_PRED_MODES][ 8 * 16], 64);
283 static uint8_t ALIGN(ii_nondc_mask_8x8  [N_II_PRED_MODES][ 8 *  8], 64);
284 static uint8_t ALIGN(ii_nondc_mask_4x16 [N_II_PRED_MODES][ 4 * 16], 64);
285 static uint8_t ALIGN(ii_nondc_mask_4x8  [N_II_PRED_MODES][ 4 *  8], 32);
286 static uint8_t ALIGN(ii_nondc_mask_4x4  [N_II_PRED_MODES][ 4 *  4], 16);
287 #undef N_II_PRED_MODES
288 
289 #define set1(sz) \
290     [II_DC_PRED] = ii_dc_mask, \
291     [II_VERT_PRED] = ii_nondc_mask_##sz[II_VERT_PRED - 1], \
292     [II_HOR_PRED] = ii_nondc_mask_##sz[II_HOR_PRED - 1], \
293     [II_SMOOTH_PRED] = ii_nondc_mask_##sz[II_SMOOTH_PRED - 1]
294 #define set(sz_444, sz_422, sz_420) \
295     { { set1(sz_444) }, { set1(sz_422) }, { set1(sz_420) } }
296 const uint8_t *dav1d_ii_masks[N_BS_SIZES][3][N_INTER_INTRA_PRED_MODES] = {
297     [BS_8x8]   = set( 8x8,   4x8,   4x4),
298     [BS_8x16]  = set( 8x16,  4x16,  4x8),
299     [BS_16x8]  = set(16x16,  8x8,   8x8),
300     [BS_16x16] = set(16x16,  8x16,  8x8),
301     [BS_16x32] = set(16x32,  8x32,  8x16),
302     [BS_32x16] = set(32x32, 16x16, 16x16),
303     [BS_32x32] = set(32x32, 16x32, 16x16),
304 };
305 #undef set
306 #undef set1
307 
build_nondc_ii_masks(uint8_t * const mask_v,uint8_t * const mask_h,uint8_t * const mask_sm,const int w,const int h,const int step)308 static COLD void build_nondc_ii_masks(uint8_t *const mask_v,
309                                       uint8_t *const mask_h,
310                                       uint8_t *const mask_sm,
311                                       const int w, const int h, const int step)
312 {
313     static const uint8_t ii_weights_1d[] = {
314         60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10,  8,  7,
315          6,  6,  5,  4,  4,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1,  1,
316     };
317 
318     for (int y = 0, off = 0; y < h; y++, off += w) {
319         memset(&mask_v[off], ii_weights_1d[y * step], w);
320         for (int x = 0; x < w; x++) {
321             mask_sm[off + x] = ii_weights_1d[imin(x, y) * step];
322             mask_h[off + x] = ii_weights_1d[x * step];
323         }
324     }
325 }
326 
dav1d_init_interintra_masks(void)327 COLD void dav1d_init_interintra_masks(void) {
328     // This function is guaranteed to be called only once
329 
330     memset(ii_dc_mask, 32, 32 * 32);
331 #define set(a) a[II_VERT_PRED - 1], a[II_HOR_PRED - 1], a[II_SMOOTH_PRED - 1]
332     build_nondc_ii_masks(set(ii_nondc_mask_32x32), 32, 32, 1);
333     build_nondc_ii_masks(set(ii_nondc_mask_16x32), 16, 32, 1);
334     build_nondc_ii_masks(set(ii_nondc_mask_16x16), 16, 16, 2);
335     build_nondc_ii_masks(set(ii_nondc_mask_8x32),   8, 32, 1);
336     build_nondc_ii_masks(set(ii_nondc_mask_8x16),   8, 16, 2);
337     build_nondc_ii_masks(set(ii_nondc_mask_8x8),    8,  8, 4);
338     build_nondc_ii_masks(set(ii_nondc_mask_4x16),   4, 16, 2);
339     build_nondc_ii_masks(set(ii_nondc_mask_4x8),    4,  8, 4);
340     build_nondc_ii_masks(set(ii_nondc_mask_4x4),    4,  4, 8);
341 #undef set
342 }
343